ginipick commited on
Commit
3414e90
·
verified ·
1 Parent(s): fba7d92

Delete app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +0 -548
app-backup.py DELETED
@@ -1,548 +0,0 @@
1
- import gradio as gr
2
- import subprocess
3
- import os
4
- import shutil
5
- import tempfile
6
- import torch
7
- import logging
8
- import numpy as np
9
- import re
10
- from concurrent.futures import ThreadPoolExecutor
11
- from functools import lru_cache
12
-
13
- # 로깅 설정
14
- logging.basicConfig(
15
- level=logging.INFO,
16
- format='%(asctime)s - %(levelname)s - %(message)s',
17
- handlers=[
18
- logging.FileHandler('yue_generation.log'),
19
- logging.StreamHandler()
20
- ]
21
- )
22
-
23
- def optimize_gpu_settings():
24
- if torch.cuda.is_available():
25
- # GPU 메모리 관리 최적화
26
- torch.backends.cuda.matmul.allow_tf32 = True
27
- torch.backends.cudnn.benchmark = True
28
- torch.backends.cudnn.enabled = True
29
- torch.backends.cudnn.deterministic = False
30
-
31
- # L40S에 최적화된 메모리 설정
32
- torch.cuda.empty_cache()
33
- torch.cuda.set_device(0)
34
-
35
- # CUDA 스트림 최적화
36
- torch.cuda.Stream(0)
37
-
38
- # 메모리 할당 최적화
39
- os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
40
-
41
- logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
42
- logging.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
43
-
44
- # L40S 특화 설정
45
- if 'L40S' in torch.cuda.get_device_name(0):
46
- torch.cuda.set_per_process_memory_fraction(0.95)
47
-
48
- def analyze_lyrics(lyrics, repeat_chorus=2):
49
- lines = [line.strip() for line in lyrics.split('\n') if line.strip()]
50
-
51
- sections = {
52
- 'verse': 0,
53
- 'chorus': 0,
54
- 'bridge': 0,
55
- 'total_lines': len(lines)
56
- }
57
-
58
- current_section = None
59
- section_lines = {
60
- 'verse': [],
61
- 'chorus': [],
62
- 'bridge': []
63
- }
64
- last_section = None
65
-
66
- # 마지막 섹션 태그 찾기
67
- for i, line in enumerate(lines):
68
- if '[verse]' in line.lower() or '[chorus]' in line.lower() or '[bridge]' in line.lower():
69
- last_section = i
70
-
71
- for i, line in enumerate(lines):
72
- lower_line = line.lower()
73
-
74
- # 섹션 태그 처리
75
- if '[verse]' in lower_line:
76
- if current_section: # 이전 섹션의 라인들 저장
77
- section_lines[current_section].extend(lines[last_section_start:i])
78
- current_section = 'verse'
79
- sections['verse'] += 1
80
- last_section_start = i + 1
81
- continue
82
- elif '[chorus]' in lower_line:
83
- if current_section:
84
- section_lines[current_section].extend(lines[last_section_start:i])
85
- current_section = 'chorus'
86
- sections['chorus'] += 1
87
- last_section_start = i + 1
88
- continue
89
- elif '[bridge]' in lower_line:
90
- if current_section:
91
- section_lines[current_section].extend(lines[last_section_start:i])
92
- current_section = 'bridge'
93
- sections['bridge'] += 1
94
- last_section_start = i + 1
95
- continue
96
-
97
- # 마지막 섹션의 라인들 추가
98
- if current_section and last_section_start < len(lines):
99
- section_lines[current_section].extend(lines[last_section_start:])
100
-
101
- # 코러스 반복 처리
102
- if sections['chorus'] > 0 and repeat_chorus > 1:
103
- original_chorus = section_lines['chorus'][:]
104
- for _ in range(repeat_chorus - 1):
105
- section_lines['chorus'].extend(original_chorus)
106
-
107
- # 섹션별 라인 수 확인 로깅
108
- logging.info(f"Section line counts - Verse: {len(section_lines['verse'])}, "
109
- f"Chorus: {len(section_lines['chorus'])}, "
110
- f"Bridge: {len(section_lines['bridge'])}")
111
-
112
- return sections, (sections['verse'] + sections['chorus'] + sections['bridge']), len(lines), section_lines
113
-
114
- def calculate_generation_params(lyrics):
115
- sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
116
-
117
- # 기본 시간 계산 (초 단위)
118
- time_per_line = {
119
- 'verse': 4, # verse는 한 줄당 4초
120
- 'chorus': 6, # chorus는 한 줄당 6초
121
- 'bridge': 5 # bridge는 한 줄당 5초
122
- }
123
-
124
- # 각 섹션별 예상 시간 계산 (마지막 섹션 포함)
125
- section_durations = {}
126
- for section_type in ['verse', 'chorus', 'bridge']:
127
- lines_count = len(section_lines[section_type])
128
- section_durations[section_type] = lines_count * time_per_line[section_type]
129
-
130
- # 전체 시간 계산 (여유 시간 추가)
131
- total_duration = sum(duration for duration in section_durations.values())
132
- total_duration = max(60, int(total_duration * 1.2)) # 20% 여유 시간 추가
133
-
134
- # 토큰 계산 (마지막 섹션을 위한 추가 토큰)
135
- base_tokens = 3000
136
- tokens_per_line = 200
137
- extra_tokens = 1000 # 마지막 섹션을 위한 추가 토큰
138
-
139
- total_tokens = base_tokens + (total_lines * tokens_per_line) + extra_tokens
140
-
141
- # 세그먼트 수 계산 (마지막 섹션을 ��한 추가 세그먼트)
142
- if sections['chorus'] > 0:
143
- num_segments = 4 # 코러스가 있는 경우 4개 세그먼트
144
- else:
145
- num_segments = 3 # 코러스가 없는 경우 3개 세그먼트
146
-
147
- # 토큰 수 제한 (더 큰 제한)
148
- max_tokens = min(12000, total_tokens) # 최대 토큰 수 증가
149
-
150
- return {
151
- 'max_tokens': max_tokens,
152
- 'num_segments': num_segments,
153
- 'sections': sections,
154
- 'section_lines': section_lines,
155
- 'estimated_duration': total_duration,
156
- 'section_durations': section_durations,
157
- 'has_chorus': sections['chorus'] > 0
158
- }
159
-
160
- def detect_and_select_model(text):
161
- if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text):
162
- return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
163
- elif re.search(r'[\u4e00-\u9fff]', text):
164
- return "m-a-p/YuE-s1-7B-anneal-zh-cot"
165
- elif re.search(r'[\u3040-\u309F\u30A0-\u30FF]', text):
166
- return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
167
- else:
168
- return "m-a-p/YuE-s1-7B-anneal-en-cot"
169
-
170
- def install_flash_attn():
171
- try:
172
- if not torch.cuda.is_available():
173
- logging.warning("GPU not available, skipping flash-attn installation")
174
- return False
175
-
176
- cuda_version = torch.version.cuda
177
- if cuda_version is None:
178
- logging.warning("CUDA not available, skipping flash-attn installation")
179
- return False
180
-
181
- logging.info(f"Detected CUDA version: {cuda_version}")
182
-
183
- try:
184
- import flash_attn
185
- logging.info("flash-attn already installed")
186
- return True
187
- except ImportError:
188
- logging.info("Installing flash-attn...")
189
-
190
- subprocess.run(
191
- ["pip", "install", "flash-attn", "--no-build-isolation"],
192
- check=True,
193
- capture_output=True
194
- )
195
- logging.info("flash-attn installed successfully!")
196
- return True
197
-
198
- except Exception as e:
199
- logging.warning(f"Failed to install flash-attn: {e}")
200
- return False
201
-
202
- def initialize_system():
203
- optimize_gpu_settings()
204
-
205
- with ThreadPoolExecutor(max_workers=4) as executor:
206
- futures = []
207
-
208
- futures.append(executor.submit(install_flash_attn))
209
-
210
- from huggingface_hub import snapshot_download
211
-
212
- folder_path = './inference/xcodec_mini_infer'
213
- os.makedirs(folder_path, exist_ok=True)
214
- logging.info(f"Created folder at: {folder_path}")
215
-
216
- futures.append(executor.submit(
217
- snapshot_download,
218
- repo_id="m-a-p/xcodec_mini_infer",
219
- local_dir="./inference/xcodec_mini_infer",
220
- resume_download=True
221
- ))
222
-
223
- for future in futures:
224
- future.result()
225
-
226
- try:
227
- os.chdir("./inference")
228
- logging.info(f"Working directory changed to: {os.getcwd()}")
229
- except FileNotFoundError as e:
230
- logging.error(f"Directory error: {e}")
231
- raise
232
-
233
- @lru_cache(maxsize=100)
234
- def get_cached_file_path(content_hash, prefix):
235
- return create_temp_file(content_hash, prefix)
236
-
237
- def empty_output_folder(output_dir):
238
- try:
239
- shutil.rmtree(output_dir)
240
- os.makedirs(output_dir)
241
- logging.info(f"Output folder cleaned: {output_dir}")
242
- except Exception as e:
243
- logging.error(f"Error cleaning output folder: {e}")
244
- raise
245
-
246
- def create_temp_file(content, prefix, suffix=".txt"):
247
- temp_file = tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=prefix, suffix=suffix)
248
- content = content.strip() + "\n\n"
249
- content = content.replace("\r\n", "\n").replace("\r", "\n")
250
- temp_file.write(content)
251
- temp_file.close()
252
- logging.debug(f"Temporary file created: {temp_file.name}")
253
- return temp_file.name
254
-
255
- def get_last_mp3_file(output_dir):
256
- mp3_files = [f for f in os.listdir(output_dir) if f.endswith('.mp3')]
257
- if not mp3_files:
258
- logging.warning("No MP3 files found")
259
- return None
260
-
261
- mp3_files_with_path = [os.path.join(output_dir, f) for f in mp3_files]
262
- mp3_files_with_path.sort(key=os.path.getmtime, reverse=True)
263
- return mp3_files_with_path[0]
264
-
265
- def get_audio_duration(file_path):
266
- try:
267
- import librosa
268
- duration = librosa.get_duration(path=file_path)
269
- return duration
270
- except Exception as e:
271
- logging.error(f"Failed to get audio duration: {e}")
272
- return None
273
-
274
- def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
275
- genre_txt_path = None
276
- lyrics_txt_path = None
277
-
278
- try:
279
- model_path, config, params = optimize_model_selection(lyrics_txt_content, genre_txt_content)
280
- logging.info(f"Selected model: {model_path}")
281
- logging.info(f"Lyrics analysis: {params}")
282
-
283
- has_chorus = params['sections']['chorus'] > 0
284
- estimated_duration = params.get('estimated_duration', 90)
285
-
286
-
287
- # 세그먼트 및 토큰 수 설정
288
- if has_chorus:
289
- actual_max_tokens = min(12000, int(config['max_tokens'] * 1.3)) # 30% 더 많은 토큰
290
- actual_num_segments = min(5, params['num_segments'] + 2) # 추가 세그먼트
291
- else:
292
- actual_max_tokens = min(10000, int(config['max_tokens'] * 1.2))
293
- actual_num_segments = min(4, params['num_segments'] + 1)
294
-
295
-
296
-
297
- logging.info(f"Estimated duration: {estimated_duration} seconds")
298
- logging.info(f"Has chorus sections: {has_chorus}")
299
- logging.info(f"Using segments: {actual_num_segments}, tokens: {actual_max_tokens}")
300
-
301
- genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
302
- lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
303
-
304
- output_dir = "./output"
305
- os.makedirs(output_dir, exist_ok=True)
306
- empty_output_folder(output_dir)
307
-
308
- # 수정된 command - 지원되지 않는 인수 제거
309
- command = [
310
- "python", "infer.py",
311
- "--stage1_model", model_path,
312
- "--stage2_model", "m-a-p/YuE-s2-1B-general",
313
- "--genre_txt", genre_txt_path,
314
- "--lyrics_txt", lyrics_txt_path,
315
- "--run_n_segments", str(actual_num_segments),
316
- "--stage2_batch_size", "16",
317
- "--output_dir", output_dir,
318
- "--cuda_idx", "0",
319
- "--max_new_tokens", str(actual_max_tokens),
320
- "--disable_offload_model" # GPU 메모리 최적화를 위해 추가
321
- ]
322
-
323
- env = os.environ.copy()
324
- if torch.cuda.is_available():
325
- env.update({
326
- "CUDA_VISIBLE_DEVICES": "0",
327
- "CUDA_HOME": "/usr/local/cuda",
328
- "PATH": f"/usr/local/cuda/bin:{env.get('PATH', '')}",
329
- "LD_LIBRARY_PATH": f"/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}",
330
- "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512",
331
- "CUDA_LAUNCH_BLOCKING": "0"
332
- })
333
-
334
- # transformers 캐시 마이그레이션 처리
335
- try:
336
- from transformers.utils import move_cache
337
- move_cache()
338
- except Exception as e:
339
- logging.warning(f"Cache migration warning (non-critical): {e}")
340
-
341
- process = subprocess.run(
342
- command,
343
- env=env,
344
- check=False,
345
- capture_output=True,
346
- text=True
347
- )
348
-
349
- logging.info(f"Command output: {process.stdout}")
350
- if process.stderr:
351
- logging.error(f"Command error: {process.stderr}")
352
-
353
- if process.returncode != 0:
354
- logging.error(f"Command failed with return code: {process.returncode}")
355
- logging.error(f"Command: {' '.join(command)}")
356
- raise RuntimeError(f"Inference failed: {process.stderr}")
357
-
358
- last_mp3 = get_last_mp3_file(output_dir)
359
- if last_mp3:
360
- try:
361
- duration = get_audio_duration(last_mp3)
362
- logging.info(f"Generated audio file: {last_mp3}")
363
- if duration:
364
- logging.info(f"Audio duration: {duration:.2f} seconds")
365
- logging.info(f"Expected duration: {estimated_duration} seconds")
366
-
367
- if duration < estimated_duration * 0.8:
368
- logging.warning(f"Generated audio is shorter than expected: {duration:.2f}s < {estimated_duration:.2f}s")
369
- except Exception as e:
370
- logging.warning(f"Failed to get audio duration: {e}")
371
- return last_mp3
372
- else:
373
- logging.warning("No output audio file generated")
374
- return None
375
-
376
- except Exception as e:
377
- logging.error(f"Inference error: {e}")
378
- raise
379
- finally:
380
- for path in [genre_txt_path, lyrics_txt_path]:
381
- if path and os.path.exists(path):
382
- try:
383
- os.remove(path)
384
- logging.debug(f"Removed temporary file: {path}")
385
- except Exception as e:
386
- logging.warning(f"Failed to remove temporary file {path}: {e}")
387
-
388
- def optimize_model_selection(lyrics, genre):
389
- model_path = detect_and_select_model(lyrics)
390
- params = calculate_generation_params(lyrics)
391
-
392
- has_chorus = params['sections']['chorus'] > 0
393
- tokens_per_segment = params['max_tokens'] // params['num_segments']
394
-
395
- model_config = {
396
- "m-a-p/YuE-s1-7B-anneal-en-cot": {
397
- "max_tokens": params['max_tokens'],
398
- "temperature": 0.8,
399
- "batch_size": 16,
400
- "num_segments": params['num_segments'],
401
- "estimated_duration": params['estimated_duration']
402
- },
403
- "m-a-p/YuE-s1-7B-anneal-jp-kr-cot": {
404
- "max_tokens": params['max_tokens'],
405
- "temperature": 0.7,
406
- "batch_size": 16,
407
- "num_segments": params['num_segments'],
408
- "estimated_duration": params['estimated_duration']
409
- },
410
- "m-a-p/YuE-s1-7B-anneal-zh-cot": {
411
- "max_tokens": params['max_tokens'],
412
- "temperature": 0.7,
413
- "batch_size": 16,
414
- "num_segments": params['num_segments'],
415
- "estimated_duration": params['estimated_duration']
416
- }
417
- }
418
-
419
- if has_chorus:
420
- for config in model_config.values():
421
- config['max_tokens'] = int(config['max_tokens'] * 1.5)
422
-
423
- return model_path, model_config[model_path], params
424
-
425
- def main():
426
- with gr.Blocks() as demo:
427
- with gr.Column():
428
- gr.Markdown("# Open SUNO: Full-Song Generation (Multi-Language Support)")
429
-
430
- with gr.Row():
431
- with gr.Column():
432
- genre_txt = gr.Textbox(
433
- label="Genre",
434
- placeholder="Enter music genre and style descriptions..."
435
- )
436
- lyrics_txt = gr.Textbox(
437
- label="Lyrics (Supports English, Korean, Japanese, Chinese)",
438
- placeholder="Enter song lyrics with [verse], [chorus], [bridge] tags...",
439
- lines=10
440
- )
441
-
442
- with gr.Column():
443
- num_segments = gr.Number(
444
- label="Number of Song Segments (Auto-adjusted based on lyrics)",
445
- value=2,
446
- minimum=1,
447
- maximum=4,
448
- step=1,
449
- interactive=False
450
- )
451
- max_new_tokens = gr.Slider(
452
- label="Max New Tokens (Auto-adjusted based on lyrics)",
453
- minimum=500,
454
- maximum=32000,
455
- step=500,
456
- value=4000,
457
- interactive=False
458
- )
459
- with gr.Row():
460
- duration_info = gr.Label(label="Estimated Duration")
461
- sections_info = gr.Label(label="Section Information")
462
- submit_btn = gr.Button("Generate Music", variant="primary")
463
- music_out = gr.Audio(label="Generated Audio")
464
-
465
- gr.Examples(
466
- examples=[
467
- [
468
- "female blues airy vocal bright vocal piano sad romantic guitar jazz",
469
- """[verse]
470
- In the quiet of the evening, shadows start to fall
471
- Whispers of the night wind echo through the hall
472
- Lost within the silence, I hear your gentle voice
473
- Guiding me back homeward, making my heart rejoice
474
-
475
- [chorus]
476
- Don't let this moment fade, hold me close tonight
477
- With you here beside me, everything's alright
478
- Can't imagine life alone, don't want to let you go
479
- Stay with me forever, let our love just flow
480
-
481
- [verse]
482
- In the quiet of the evening, shadows start to fall
483
- Whispers of the night wind echo through the hall
484
- Lost within the silence, I hear your gentle voice
485
- Guiding me back homeward, making my heart rejoice
486
-
487
- [chorus]
488
- Don't let this moment fade, hold me close tonight
489
- With you here beside me, everything's alright
490
- Can't imagine life alone, don't want to let you go
491
- Stay with me forever, let our love just flow"""
492
- ],
493
- [
494
- "K-pop bright energetic synth dance electronic",
495
- """[verse]
496
- 언젠가 마주한 눈빛 속에서
497
-
498
- [chorus]
499
- 다시 한 번 내게 말해줘
500
-
501
- [verse]
502
- 어두운 밤을 지날 때마다
503
-
504
- [chorus]
505
- 다시 한 번 내게 말해줘
506
- """
507
- ]
508
- ],
509
- inputs=[genre_txt, lyrics_txt]
510
- )
511
-
512
- initialize_system()
513
-
514
- def update_info(lyrics):
515
- if not lyrics:
516
- return "No lyrics entered", "No sections detected"
517
- params = calculate_generation_params(lyrics)
518
- duration = params['estimated_duration']
519
- sections = params['sections']
520
- return (
521
- f"Estimated duration: {duration:.1f} seconds",
522
- f"Verses: {sections['verse']}, Chorus: {sections['chorus']} (Expected full length including chorus)"
523
- )
524
-
525
- lyrics_txt.change(
526
- fn=update_info,
527
- inputs=[lyrics_txt],
528
- outputs=[duration_info, sections_info]
529
- )
530
-
531
- submit_btn.click(
532
- fn=infer,
533
- inputs=[genre_txt, lyrics_txt, num_segments, max_new_tokens],
534
- outputs=[music_out]
535
- )
536
-
537
- return demo
538
-
539
- if __name__ == "__main__":
540
- demo = main()
541
- demo.queue(max_size=20).launch(
542
- server_name="0.0.0.0",
543
- server_port=7860,
544
- share=True,
545
- show_api=True,
546
- show_error=True,
547
- max_threads=8
548
- )