Spaces:

ginipick
/

OpenSUNO

Building on L40S

App Files Files Community

ginipick commited on Jan 29

Commit

3c1a098

verified ·

1 Parent(s): bd2cd71

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -31

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import tempfile
 import torch
 import logging
 import numpy as np
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
@@ -19,16 +20,48 @@ logging.basicConfig(
     ]
 )
 # GPU 설정 최적화
 def optimize_gpu_settings():
     if torch.cuda.is_available():
-        # L40S에 최적화된 설정
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.benchmark = True
         torch.backends.cudnn.deterministic = False
         torch.backends.cudnn.enabled = True
-        # GPU 메모리 설정
         torch.cuda.empty_cache()
         torch.cuda.set_device(0)
@@ -37,7 +70,6 @@ def optimize_gpu_settings():
     else:
         logging.warning("GPU not available!")
-# flash-attn 설치 함수 개선
 def install_flash_attn():
     try:
         logging.info("Installing flash-attn...")
@@ -51,26 +83,22 @@ def install_flash_attn():
         logging.error(f"Failed to install flash-attn: {e}")
         raise
-# 초기화 함수
 def initialize_system():
     optimize_gpu_settings()
     install_flash_attn()
     from huggingface_hub import snapshot_download
-    # xcodec_mini_infer 폴더 생성
     folder_path = './inference/xcodec_mini_infer'
     os.makedirs(folder_path, exist_ok=True)
     logging.info(f"Created folder at: {folder_path}")
-    # 모델 다운로드
     snapshot_download(
         repo_id="m-a-p/xcodec_mini_infer",
         local_dir="./inference/xcodec_mini_infer",
         resume_download=True
     )
-    # inference 디렉토리로 이동
     try:
         os.chdir("./inference")
         logging.info(f"Working directory changed to: {os.getcwd()}")
@@ -78,7 +106,6 @@ def initialize_system():
         logging.error(f"Directory error: {e}")
         raise
-# 캐시를 활용한 파일 관리
 @lru_cache(maxsize=100)
 def get_cached_file_path(content_hash, prefix):
     return create_temp_file(content_hash, prefix)
@@ -111,9 +138,12 @@ def get_last_mp3_file(output_dir):
     mp3_files_with_path.sort(key=os.path.getmtime, reverse=True)
     return mp3_files_with_path[0]
-# L40S에 최적화된 추론 함수
 def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
     try:
         # 임시 파일 생성
         genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
         lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
@@ -122,21 +152,22 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
-        # L40S에 최적화된 명령어
         command = [
             "python", "infer.py",
-            "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
             "--stage2_model", "m-a-p/YuE-s2-1B-general",
             "--genre_txt", genre_txt_path,
             "--lyrics_txt", lyrics_txt_path,
             "--run_n_segments", str(num_segments),
-            "--stage2_batch_size", "8",  # L40S에 맞게 증가
             "--output_dir", output_dir,
             "--cuda_idx", "0",
-            "--max_new_tokens", str(max_new_tokens),
             "--disable_offload_model",
-            "--use_flash_attention_2",  # Flash Attention 2 활성화
-            "--bf16"  # BF16 정밀도 사용
         ]
         # CUDA 환경 변수 설정
@@ -177,7 +208,7 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
 # Gradio 인터페이스
 with gr.Blocks() as demo:
     with gr.Column():
-        gr.Markdown("# YuE: Open Music Foundation Models for Full-Song Generation (L40S Optimized)")
         gr.HTML("""
         <div style="display:flex;column-gap:4px;">
             <a href="https://github.com/multimodal-art-projection/YuE">
@@ -196,7 +227,7 @@ with gr.Blocks() as demo:
                     placeholder="Enter music genre and style descriptions..."
                 )
                 lyrics_txt = gr.Textbox(
-                    label="Lyrics",
                     placeholder="Enter song lyrics...",
                     lines=10
                 )
@@ -213,7 +244,7 @@ with gr.Blocks() as demo:
                 max_new_tokens = gr.Slider(
                     label="Max New Tokens",
                     minimum=500,
-                    maximum=32000,  # L40S의 큰 메모리를 활용
                     step=500,
                     value=4000,
                     interactive=True
@@ -221,8 +252,10 @@ with gr.Blocks() as demo:
                 submit_btn = gr.Button("Generate Music", variant="primary")
                 music_out = gr.Audio(label="Generated Audio")
         gr.Examples(
             examples=[
                 [
                     "female blues airy vocal bright vocal piano sad romantic guitar jazz",
                     """[verse]
@@ -238,23 +271,52 @@ Can't imagine life alone, don't want to let you go
 Stay with me forever, let our love just flow
                     """
                 ],
                 [
-                    "rap piano street tough piercing vocal hip-hop synthesizer clear vocal male",
                     """[verse]
-Woke up in the morning, sun is shining bright
-Chasing all my dreams, gotta get my mind right
-City lights are fading, but my vision's clear
-Got my team beside me, no room for fear
-Walking through the streets, beats inside my head
-Every step I take, closer to the bread
-People passing by, they don't understand
-Building up my future with my own two hands
 [chorus]
-This is my life, and I'm aiming for the top
-Never gonna quit, no, I'm never gonna stop
-Through the highs and lows, I'mma keep it real
-Living out my dreams with this mic and a deal
                     """
                 ]
             ],

 import torch
 import logging
 import numpy as np
+import re
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
     ]
 )
+# 언어 감지 및 모델 선택 함수
+def detect_and_select_model(text):
+    if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text):  # 한글
+        return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
+    elif re.search(r'[\u4e00-\u9fff]', text):  # 중국어
+        return "m-a-p/YuE-s1-7B-anneal-zh-cot"
+    elif re.search(r'[\u3040-\u309F\u30A0-\u30FF]', text):  # 일본어
+        return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
+    else:  # 영어/기타
+        return "m-a-p/YuE-s1-7B-anneal-en-cot"
+def optimize_model_selection(lyrics, genre):
+    model_path = detect_and_select_model(lyrics)
+    model_config = {
+        "m-a-p/YuE-s1-7B-anneal-en-cot": {
+            "max_tokens": 24000,
+            "temperature": 0.8,
+            "batch_size": 8
+        },
+        "m-a-p/YuE-s1-7B-anneal-jp-kr-cot": {
+            "max_tokens": 24000,
+            "temperature": 0.7,
+            "batch_size": 8
+        },
+        "m-a-p/YuE-s1-7B-anneal-zh-cot": {
+            "max_tokens": 24000,
+            "temperature": 0.7,
+            "batch_size": 8
+        }
+    }
+    return model_path, model_config[model_path]
 # GPU 설정 최적화
 def optimize_gpu_settings():
     if torch.cuda.is_available():
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.benchmark = True
         torch.backends.cudnn.deterministic = False
         torch.backends.cudnn.enabled = True
         torch.cuda.empty_cache()
         torch.cuda.set_device(0)
     else:
         logging.warning("GPU not available!")
 def install_flash_attn():
     try:
         logging.info("Installing flash-attn...")
         logging.error(f"Failed to install flash-attn: {e}")
         raise
 def initialize_system():
     optimize_gpu_settings()
     install_flash_attn()
     from huggingface_hub import snapshot_download
     folder_path = './inference/xcodec_mini_infer'
     os.makedirs(folder_path, exist_ok=True)
     logging.info(f"Created folder at: {folder_path}")
     snapshot_download(
         repo_id="m-a-p/xcodec_mini_infer",
         local_dir="./inference/xcodec_mini_infer",
         resume_download=True
     )
     try:
         os.chdir("./inference")
         logging.info(f"Working directory changed to: {os.getcwd()}")
         logging.error(f"Directory error: {e}")
         raise
 @lru_cache(maxsize=100)
 def get_cached_file_path(content_hash, prefix):
     return create_temp_file(content_hash, prefix)
     mp3_files_with_path.sort(key=os.path.getmtime, reverse=True)
     return mp3_files_with_path[0]
 def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
     try:
+        # 모델 선택 및 설정
+        model_path, config = optimize_model_selection(lyrics_txt_content, genre_txt_content)
+        logging.info(f"Selected model: {model_path}")
         # 임시 파일 생성
         genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
         lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
+        # 명령어 구성
         command = [
             "python", "infer.py",
+            "--stage1_model", model_path,
             "--stage2_model", "m-a-p/YuE-s2-1B-general",
             "--genre_txt", genre_txt_path,
             "--lyrics_txt", lyrics_txt_path,
             "--run_n_segments", str(num_segments),
+            "--stage2_batch_size", str(config['batch_size']),
             "--output_dir", output_dir,
             "--cuda_idx", "0",
+            "--max_new_tokens", str(config['max_tokens']),
+            "--temperature", str(config['temperature']),
             "--disable_offload_model",
+            "--use_flash_attention_2",
+            "--bf16"
         ]
         # CUDA 환경 변수 설정
 # Gradio 인터페이스
 with gr.Blocks() as demo:
     with gr.Column():
+        gr.Markdown("# YuE: Open Music Foundation Models for Full-Song Generation (Multi-Language Support)")
         gr.HTML("""
         <div style="display:flex;column-gap:4px;">
             <a href="https://github.com/multimodal-art-projection/YuE">
                     placeholder="Enter music genre and style descriptions..."
                 )
                 lyrics_txt = gr.Textbox(
+                    label="Lyrics (Supports English, Korean, Japanese, Chinese)",
                     placeholder="Enter song lyrics...",
                     lines=10
                 )
                 max_new_tokens = gr.Slider(
                     label="Max New Tokens",
                     minimum=500,
+                    maximum=32000,
                     step=500,
                     value=4000,
                     interactive=True
                 submit_btn = gr.Button("Generate Music", variant="primary")
                 music_out = gr.Audio(label="Generated Audio")
+        # 다국어 예제 추가
         gr.Examples(
             examples=[
+                # 영어 예제
                 [
                     "female blues airy vocal bright vocal piano sad romantic guitar jazz",
                     """[verse]
 Stay with me forever, let our love just flow
                     """
                 ],
+                # 한국어 예제
+                [
+                    "K-pop bright energetic synth dance electronic",
+                    """[verse]
+빛나는 별들처럼 우리의 꿈이
+저 하늘을 수놓아 반짝이네
+함께라면 어디든 갈 수 있어
+우리의 이야기가 시작되네
+[chorus]
+달려가자 더 높이 더 멀리
+두려움은 없어 너와 함께라면
+영원히 계속될 우리의 노래
+이 순간을 기억해 forever
+                    """
+                ],
+                # 일본어 예제
+                [
+                    "J-pop melodic soft piano emotional",
+                    """[verse]
+春の風に乗って
+思い出が流れる
+あの日の約束を
+今でも覚えてる
+[chorus]
+君と見た空は
+今も変わらないよ
+どこまでも続く
+この道の先で
+                    """
+                ],
+                # 중국어 예제
                 [
+                    "Chinese pop traditional fusion modern",
                     """[verse]
+晨光照亮天际
+新的一天开始
+追逐着梦想前进
+不停歇的脚步
 [chorus]
+让希望照亮前方
+让勇气伴随身旁
+这一路有你相伴
+永远不会孤单
                     """
                 ]
             ],