ginipick commited on
Commit
b790f96
ยท
verified ยท
1 Parent(s): a1ecc8d

Create app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +548 -0
app-backup.py ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+ import shutil
5
+ import tempfile
6
+ import torch
7
+ import logging
8
+ import numpy as np
9
+ import re
10
+ from concurrent.futures import ThreadPoolExecutor
11
+ from functools import lru_cache
12
+
13
+ # ๋กœ๊น… ์„ค์ •
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(levelname)s - %(message)s',
17
+ handlers=[
18
+ logging.FileHandler('yue_generation.log'),
19
+ logging.StreamHandler()
20
+ ]
21
+ )
22
+
23
+ def optimize_gpu_settings():
24
+ if torch.cuda.is_available():
25
+ # GPU ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ ์ตœ์ ํ™”
26
+ torch.backends.cuda.matmul.allow_tf32 = True
27
+ torch.backends.cudnn.benchmark = True
28
+ torch.backends.cudnn.enabled = True
29
+ torch.backends.cudnn.deterministic = False
30
+
31
+ # L40S์— ์ตœ์ ํ™”๋œ ๋ฉ”๋ชจ๋ฆฌ ์„ค์ •
32
+ torch.cuda.empty_cache()
33
+ torch.cuda.set_device(0)
34
+
35
+ # CUDA ์ŠคํŠธ๋ฆผ ์ตœ์ ํ™”
36
+ torch.cuda.Stream(0)
37
+
38
+ # ๋ฉ”๋ชจ๋ฆฌ ํ• ๋‹น ์ตœ์ ํ™”
39
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
40
+
41
+ logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
42
+ logging.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
43
+
44
+ # L40S ํŠนํ™” ์„ค์ •
45
+ if 'L40S' in torch.cuda.get_device_name(0):
46
+ torch.cuda.set_per_process_memory_fraction(0.95)
47
+
48
+ def analyze_lyrics(lyrics, repeat_chorus=2):
49
+ lines = [line.strip() for line in lyrics.split('\n') if line.strip()]
50
+
51
+ sections = {
52
+ 'verse': 0,
53
+ 'chorus': 0,
54
+ 'bridge': 0,
55
+ 'total_lines': len(lines)
56
+ }
57
+
58
+ current_section = None
59
+ section_lines = {
60
+ 'verse': [],
61
+ 'chorus': [],
62
+ 'bridge': []
63
+ }
64
+ last_section = None
65
+
66
+ # ๋งˆ์ง€๋ง‰ ์„น์…˜ ํƒœ๊ทธ ์ฐพ๊ธฐ
67
+ for i, line in enumerate(lines):
68
+ if '[verse]' in line.lower() or '[chorus]' in line.lower() or '[bridge]' in line.lower():
69
+ last_section = i
70
+
71
+ for i, line in enumerate(lines):
72
+ lower_line = line.lower()
73
+
74
+ # ์„น์…˜ ํƒœ๊ทธ ์ฒ˜๋ฆฌ
75
+ if '[verse]' in lower_line:
76
+ if current_section: # ์ด์ „ ์„น์…˜์˜ ๋ผ์ธ๋“ค ์ €์žฅ
77
+ section_lines[current_section].extend(lines[last_section_start:i])
78
+ current_section = 'verse'
79
+ sections['verse'] += 1
80
+ last_section_start = i + 1
81
+ continue
82
+ elif '[chorus]' in lower_line:
83
+ if current_section:
84
+ section_lines[current_section].extend(lines[last_section_start:i])
85
+ current_section = 'chorus'
86
+ sections['chorus'] += 1
87
+ last_section_start = i + 1
88
+ continue
89
+ elif '[bridge]' in lower_line:
90
+ if current_section:
91
+ section_lines[current_section].extend(lines[last_section_start:i])
92
+ current_section = 'bridge'
93
+ sections['bridge'] += 1
94
+ last_section_start = i + 1
95
+ continue
96
+
97
+ # ๋งˆ์ง€๋ง‰ ์„น์…˜์˜ ๋ผ์ธ๋“ค ์ถ”๊ฐ€
98
+ if current_section and last_section_start < len(lines):
99
+ section_lines[current_section].extend(lines[last_section_start:])
100
+
101
+ # ์ฝ”๋Ÿฌ์Šค ๋ฐ˜๋ณต ์ฒ˜๋ฆฌ
102
+ if sections['chorus'] > 0 and repeat_chorus > 1:
103
+ original_chorus = section_lines['chorus'][:]
104
+ for _ in range(repeat_chorus - 1):
105
+ section_lines['chorus'].extend(original_chorus)
106
+
107
+ # ์„น์…˜๋ณ„ ๋ผ์ธ ์ˆ˜ ํ™•์ธ ๋กœ๊น…
108
+ logging.info(f"Section line counts - Verse: {len(section_lines['verse'])}, "
109
+ f"Chorus: {len(section_lines['chorus'])}, "
110
+ f"Bridge: {len(section_lines['bridge'])}")
111
+
112
+ return sections, (sections['verse'] + sections['chorus'] + sections['bridge']), len(lines), section_lines
113
+
114
+ def calculate_generation_params(lyrics):
115
+ sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
116
+
117
+ # ๊ธฐ๋ณธ ์‹œ๊ฐ„ ๊ณ„์‚ฐ (์ดˆ ๋‹จ์œ„)
118
+ time_per_line = {
119
+ 'verse': 4, # verse๋Š” ํ•œ ์ค„๋‹น 4์ดˆ
120
+ 'chorus': 6, # chorus๋Š” ํ•œ ์ค„๋‹น 6์ดˆ
121
+ 'bridge': 5 # bridge๋Š” ํ•œ ์ค„๋‹น 5์ดˆ
122
+ }
123
+
124
+ # ๊ฐ ์„น์…˜๋ณ„ ์˜ˆ์ƒ ์‹œ๊ฐ„ ๊ณ„์‚ฐ (๋งˆ์ง€๋ง‰ ์„น์…˜ ํฌํ•จ)
125
+ section_durations = {}
126
+ for section_type in ['verse', 'chorus', 'bridge']:
127
+ lines_count = len(section_lines[section_type])
128
+ section_durations[section_type] = lines_count * time_per_line[section_type]
129
+
130
+ # ์ „์ฒด ์‹œ๊ฐ„ ๊ณ„์‚ฐ (์—ฌ์œ  ์‹œ๊ฐ„ ์ถ”๊ฐ€)
131
+ total_duration = sum(duration for duration in section_durations.values())
132
+ total_duration = max(60, int(total_duration * 1.2)) # 20% ์—ฌ์œ  ์‹œ๊ฐ„ ์ถ”๊ฐ€
133
+
134
+ # ํ† ํฐ ๊ณ„์‚ฐ (๋งˆ์ง€๋ง‰ ์„น์…˜์„ ์œ„ํ•œ ์ถ”๊ฐ€ ํ† ํฐ)
135
+ base_tokens = 3000
136
+ tokens_per_line = 200
137
+ extra_tokens = 1000 # ๋งˆ์ง€๋ง‰ ์„น์…˜์„ ์œ„ํ•œ ์ถ”๊ฐ€ ํ† ํฐ
138
+
139
+ total_tokens = base_tokens + (total_lines * tokens_per_line) + extra_tokens
140
+
141
+ # ์„ธ๊ทธ๋จผํŠธ ์ˆ˜ ๊ณ„์‚ฐ (๋งˆ์ง€๋ง‰ ์„น์…˜์„ ๏ฟฝ๏ฟฝํ•œ ์ถ”๊ฐ€ ์„ธ๊ทธ๋จผํŠธ)
142
+ if sections['chorus'] > 0:
143
+ num_segments = 4 # ์ฝ”๋Ÿฌ์Šค๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ 4๊ฐœ ์„ธ๊ทธ๋จผํŠธ
144
+ else:
145
+ num_segments = 3 # ์ฝ”๋Ÿฌ์Šค๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ 3๊ฐœ ์„ธ๊ทธ๋จผํŠธ
146
+
147
+ # ํ† ํฐ ์ˆ˜ ์ œํ•œ (๋” ํฐ ์ œํ•œ)
148
+ max_tokens = min(12000, total_tokens) # ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜ ์ฆ๊ฐ€
149
+
150
+ return {
151
+ 'max_tokens': max_tokens,
152
+ 'num_segments': num_segments,
153
+ 'sections': sections,
154
+ 'section_lines': section_lines,
155
+ 'estimated_duration': total_duration,
156
+ 'section_durations': section_durations,
157
+ 'has_chorus': sections['chorus'] > 0
158
+ }
159
+
160
+ def detect_and_select_model(text):
161
+ if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text):
162
+ return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
163
+ elif re.search(r'[\u4e00-\u9fff]', text):
164
+ return "m-a-p/YuE-s1-7B-anneal-zh-cot"
165
+ elif re.search(r'[\u3040-\u309F\u30A0-\u30FF]', text):
166
+ return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
167
+ else:
168
+ return "m-a-p/YuE-s1-7B-anneal-en-cot"
169
+
170
+ def install_flash_attn():
171
+ try:
172
+ if not torch.cuda.is_available():
173
+ logging.warning("GPU not available, skipping flash-attn installation")
174
+ return False
175
+
176
+ cuda_version = torch.version.cuda
177
+ if cuda_version is None:
178
+ logging.warning("CUDA not available, skipping flash-attn installation")
179
+ return False
180
+
181
+ logging.info(f"Detected CUDA version: {cuda_version}")
182
+
183
+ try:
184
+ import flash_attn
185
+ logging.info("flash-attn already installed")
186
+ return True
187
+ except ImportError:
188
+ logging.info("Installing flash-attn...")
189
+
190
+ subprocess.run(
191
+ ["pip", "install", "flash-attn", "--no-build-isolation"],
192
+ check=True,
193
+ capture_output=True
194
+ )
195
+ logging.info("flash-attn installed successfully!")
196
+ return True
197
+
198
+ except Exception as e:
199
+ logging.warning(f"Failed to install flash-attn: {e}")
200
+ return False
201
+
202
+ def initialize_system():
203
+ optimize_gpu_settings()
204
+
205
+ with ThreadPoolExecutor(max_workers=4) as executor:
206
+ futures = []
207
+
208
+ futures.append(executor.submit(install_flash_attn))
209
+
210
+ from huggingface_hub import snapshot_download
211
+
212
+ folder_path = './inference/xcodec_mini_infer'
213
+ os.makedirs(folder_path, exist_ok=True)
214
+ logging.info(f"Created folder at: {folder_path}")
215
+
216
+ futures.append(executor.submit(
217
+ snapshot_download,
218
+ repo_id="m-a-p/xcodec_mini_infer",
219
+ local_dir="./inference/xcodec_mini_infer",
220
+ resume_download=True
221
+ ))
222
+
223
+ for future in futures:
224
+ future.result()
225
+
226
+ try:
227
+ os.chdir("./inference")
228
+ logging.info(f"Working directory changed to: {os.getcwd()}")
229
+ except FileNotFoundError as e:
230
+ logging.error(f"Directory error: {e}")
231
+ raise
232
+
233
+ @lru_cache(maxsize=100)
234
+ def get_cached_file_path(content_hash, prefix):
235
+ return create_temp_file(content_hash, prefix)
236
+
237
+ def empty_output_folder(output_dir):
238
+ try:
239
+ shutil.rmtree(output_dir)
240
+ os.makedirs(output_dir)
241
+ logging.info(f"Output folder cleaned: {output_dir}")
242
+ except Exception as e:
243
+ logging.error(f"Error cleaning output folder: {e}")
244
+ raise
245
+
246
+ def create_temp_file(content, prefix, suffix=".txt"):
247
+ temp_file = tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=prefix, suffix=suffix)
248
+ content = content.strip() + "\n\n"
249
+ content = content.replace("\r\n", "\n").replace("\r", "\n")
250
+ temp_file.write(content)
251
+ temp_file.close()
252
+ logging.debug(f"Temporary file created: {temp_file.name}")
253
+ return temp_file.name
254
+
255
+ def get_last_mp3_file(output_dir):
256
+ mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')]
257
+ if not mp3_files:
258
+ logging.warning("No MP3 files found")
259
+ return None
260
+
261
+ mp3_files_with_path = [os.path.join(output_dir, f) for f in mp3_files]
262
+ mp3_files_with_path.sort(key=os.path.getmtime, reverse=True)
263
+ return mp3_files_with_path[0]
264
+
265
+ def get_audio_duration(file_path):
266
+ try:
267
+ import librosa
268
+ duration = librosa.get_duration(path=file_path)
269
+ return duration
270
+ except Exception as e:
271
+ logging.error(f"Failed to get audio duration: {e}")
272
+ return None
273
+
274
+ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
275
+ genre_txt_path = None
276
+ lyrics_txt_path = None
277
+
278
+ try:
279
+ model_path, config, params = optimize_model_selection(lyrics_txt_content, genre_txt_content)
280
+ logging.info(f"Selected model: {model_path}")
281
+ logging.info(f"Lyrics analysis: {params}")
282
+
283
+ has_chorus = params['sections']['chorus'] > 0
284
+ estimated_duration = params.get('estimated_duration', 90)
285
+
286
+
287
+ # ์„ธ๊ทธ๋จผํŠธ ๋ฐ ํ† ํฐ ์ˆ˜ ์„ค์ •
288
+ if has_chorus:
289
+ actual_max_tokens = min(12000, int(config['max_tokens'] * 1.3)) # 30% ๋” ๋งŽ์€ ํ† ํฐ
290
+ actual_num_segments = min(5, params['num_segments'] + 2) # ์ถ”๊ฐ€ ์„ธ๊ทธ๋จผํŠธ
291
+ else:
292
+ actual_max_tokens = min(10000, int(config['max_tokens'] * 1.2))
293
+ actual_num_segments = min(4, params['num_segments'] + 1)
294
+
295
+
296
+
297
+ logging.info(f"Estimated duration: {estimated_duration} seconds")
298
+ logging.info(f"Has chorus sections: {has_chorus}")
299
+ logging.info(f"Using segments: {actual_num_segments}, tokens: {actual_max_tokens}")
300
+
301
+ genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
302
+ lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
303
+
304
+ output_dir = "./output"
305
+ os.makedirs(output_dir, exist_ok=True)
306
+ empty_output_folder(output_dir)
307
+
308
+ # ์ˆ˜์ •๋œ command - ์ง€์›๋˜์ง€ ์•Š๋Š” ์ธ์ˆ˜ ์ œ๊ฑฐ
309
+ command = [
310
+ "python", "infer.py",
311
+ "--stage1_model", model_path,
312
+ "--stage2_model", "m-a-p/YuE-s2-1B-general",
313
+ "--genre_txt", genre_txt_path,
314
+ "--lyrics_txt", lyrics_txt_path,
315
+ "--run_n_segments", str(actual_num_segments),
316
+ "--stage2_batch_size", "16",
317
+ "--output_dir", output_dir,
318
+ "--cuda_idx", "0",
319
+ "--max_new_tokens", str(actual_max_tokens),
320
+ "--disable_offload_model" # GPU ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”๋ฅผ ์œ„ํ•ด ์ถ”๊ฐ€
321
+ ]
322
+
323
+ env = os.environ.copy()
324
+ if torch.cuda.is_available():
325
+ env.update({
326
+ "CUDA_VISIBLE_DEVICES": "0",
327
+ "CUDA_HOME": "/usr/local/cuda",
328
+ "PATH": f"/usr/local/cuda/bin:{env.get('PATH', '')}",
329
+ "LD_LIBRARY_PATH": f"/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}",
330
+ "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512",
331
+ "CUDA_LAUNCH_BLOCKING": "0"
332
+ })
333
+
334
+ # transformers ์บ์‹œ ๋งˆ์ด๊ทธ๋ ˆ์ด์…˜ ์ฒ˜๋ฆฌ
335
+ try:
336
+ from transformers.utils import move_cache
337
+ move_cache()
338
+ except Exception as e:
339
+ logging.warning(f"Cache migration warning (non-critical): {e}")
340
+
341
+ process = subprocess.run(
342
+ command,
343
+ env=env,
344
+ check=False,
345
+ capture_output=True,
346
+ text=True
347
+ )
348
+
349
+ logging.info(f"Command output: {process.stdout}")
350
+ if process.stderr:
351
+ logging.error(f"Command error: {process.stderr}")
352
+
353
+ if process.returncode != 0:
354
+ logging.error(f"Command failed with return code: {process.returncode}")
355
+ logging.error(f"Command: {' '.join(command)}")
356
+ raise RuntimeError(f"Inference failed: {process.stderr}")
357
+
358
+ last_mp3 = get_last_mp3_file(output_dir)
359
+ if last_mp3:
360
+ try:
361
+ duration = get_audio_duration(last_mp3)
362
+ logging.info(f"Generated audio file: {last_mp3}")
363
+ if duration:
364
+ logging.info(f"Audio duration: {duration:.2f} seconds")
365
+ logging.info(f"Expected duration: {estimated_duration} seconds")
366
+
367
+ if duration < estimated_duration * 0.8:
368
+ logging.warning(f"Generated audio is shorter than expected: {duration:.2f}s < {estimated_duration:.2f}s")
369
+ except Exception as e:
370
+ logging.warning(f"Failed to get audio duration: {e}")
371
+ return last_mp3
372
+ else:
373
+ logging.warning("No output audio file generated")
374
+ return None
375
+
376
+ except Exception as e:
377
+ logging.error(f"Inference error: {e}")
378
+ raise
379
+ finally:
380
+ for path in [genre_txt_path, lyrics_txt_path]:
381
+ if path and os.path.exists(path):
382
+ try:
383
+ os.remove(path)
384
+ logging.debug(f"Removed temporary file: {path}")
385
+ except Exception as e:
386
+ logging.warning(f"Failed to remove temporary file {path}: {e}")
387
+
388
+ def optimize_model_selection(lyrics, genre):
389
+ model_path = detect_and_select_model(lyrics)
390
+ params = calculate_generation_params(lyrics)
391
+
392
+ has_chorus = params['sections']['chorus'] > 0
393
+ tokens_per_segment = params['max_tokens'] // params['num_segments']
394
+
395
+ model_config = {
396
+ "m-a-p/YuE-s1-7B-anneal-en-cot": {
397
+ "max_tokens": params['max_tokens'],
398
+ "temperature": 0.8,
399
+ "batch_size": 16,
400
+ "num_segments": params['num_segments'],
401
+ "estimated_duration": params['estimated_duration']
402
+ },
403
+ "m-a-p/YuE-s1-7B-anneal-jp-kr-cot": {
404
+ "max_tokens": params['max_tokens'],
405
+ "temperature": 0.7,
406
+ "batch_size": 16,
407
+ "num_segments": params['num_segments'],
408
+ "estimated_duration": params['estimated_duration']
409
+ },
410
+ "m-a-p/YuE-s1-7B-anneal-zh-cot": {
411
+ "max_tokens": params['max_tokens'],
412
+ "temperature": 0.7,
413
+ "batch_size": 16,
414
+ "num_segments": params['num_segments'],
415
+ "estimated_duration": params['estimated_duration']
416
+ }
417
+ }
418
+
419
+ if has_chorus:
420
+ for config in model_config.values():
421
+ config['max_tokens'] = int(config['max_tokens'] * 1.5)
422
+
423
+ return model_path, model_config[model_path], params
424
+
425
+ def main():
426
+ with gr.Blocks() as demo:
427
+ with gr.Column():
428
+ gr.Markdown("# Open SUNO: Full-Song Generation (Multi-Language Support)")
429
+
430
+ with gr.Row():
431
+ with gr.Column():
432
+ genre_txt = gr.Textbox(
433
+ label="Genre",
434
+ placeholder="Enter music genre and style descriptions..."
435
+ )
436
+ lyrics_txt = gr.Textbox(
437
+ label="Lyrics (Supports English, Korean, Japanese, Chinese)",
438
+ placeholder="Enter song lyrics with [verse], [chorus], [bridge] tags...",
439
+ lines=10
440
+ )
441
+
442
+ with gr.Column():
443
+ num_segments = gr.Number(
444
+ label="Number of Song Segments (Auto-adjusted based on lyrics)",
445
+ value=2,
446
+ minimum=1,
447
+ maximum=4,
448
+ step=1,
449
+ interactive=False
450
+ )
451
+ max_new_tokens = gr.Slider(
452
+ label="Max New Tokens (Auto-adjusted based on lyrics)",
453
+ minimum=500,
454
+ maximum=32000,
455
+ step=500,
456
+ value=4000,
457
+ interactive=False
458
+ )
459
+ with gr.Row():
460
+ duration_info = gr.Label(label="Estimated Duration")
461
+ sections_info = gr.Label(label="Section Information")
462
+ submit_btn = gr.Button("Generate Music", variant="primary")
463
+ music_out = gr.Audio(label="Generated Audio")
464
+
465
+ gr.Examples(
466
+ examples=[
467
+ [
468
+ "female blues airy vocal bright vocal piano sad romantic guitar jazz",
469
+ """[verse]
470
+ In the quiet of the evening, shadows start to fall
471
+ Whispers of the night wind echo through the hall
472
+ Lost within the silence, I hear your gentle voice
473
+ Guiding me back homeward, making my heart rejoice
474
+
475
+ [chorus]
476
+ Don't let this moment fade, hold me close tonight
477
+ With you here beside me, everything's alright
478
+ Can't imagine life alone, don't want to let you go
479
+ Stay with me forever, let our love just flow
480
+
481
+ [verse]
482
+ In the quiet of the evening, shadows start to fall
483
+ Whispers of the night wind echo through the hall
484
+ Lost within the silence, I hear your gentle voice
485
+ Guiding me back homeward, making my heart rejoice
486
+
487
+ [chorus]
488
+ Don't let this moment fade, hold me close tonight
489
+ With you here beside me, everything's alright
490
+ Can't imagine life alone, don't want to let you go
491
+ Stay with me forever, let our love just flow"""
492
+ ],
493
+ [
494
+ "K-pop bright energetic synth dance electronic",
495
+ """[verse]
496
+ ์–ธ์  ๊ฐ€ ๋งˆ์ฃผํ•œ ๋ˆˆ๋น› ์†์—์„œ
497
+
498
+ [chorus]
499
+ ๋‹ค์‹œ ํ•œ ๋ฒˆ ๋‚ด๊ฒŒ ๋งํ•ด์ค˜
500
+
501
+ [verse]
502
+ ์–ด๋‘์šด ๋ฐค์„ ์ง€๋‚  ๋•Œ๋งˆ๋‹ค
503
+
504
+ [chorus]
505
+ ๋‹ค์‹œ ํ•œ ๋ฒˆ ๋‚ด๊ฒŒ ๋งํ•ด์ค˜
506
+ """
507
+ ]
508
+ ],
509
+ inputs=[genre_txt, lyrics_txt]
510
+ )
511
+
512
+ initialize_system()
513
+
514
+ def update_info(lyrics):
515
+ if not lyrics:
516
+ return "No lyrics entered", "No sections detected"
517
+ params = calculate_generation_params(lyrics)
518
+ duration = params['estimated_duration']
519
+ sections = params['sections']
520
+ return (
521
+ f"Estimated duration: {duration:.1f} seconds",
522
+ f"Verses: {sections['verse']}, Chorus: {sections['chorus']} (Expected full length including chorus)"
523
+ )
524
+
525
+ lyrics_txt.change(
526
+ fn=update_info,
527
+ inputs=[lyrics_txt],
528
+ outputs=[duration_info, sections_info]
529
+ )
530
+
531
+ submit_btn.click(
532
+ fn=infer,
533
+ inputs=[genre_txt, lyrics_txt, num_segments, max_new_tokens],
534
+ outputs=[music_out]
535
+ )
536
+
537
+ return demo
538
+
539
+ if __name__ == "__main__":
540
+ demo = main()
541
+ demo.queue(max_size=20).launch(
542
+ server_name="0.0.0.0",
543
+ server_port=7860,
544
+ share=True,
545
+ show_api=True,
546
+ show_error=True,
547
+ max_threads=8
548
+ )