Nishur commited on
Commit
45248e1
·
verified ·
1 Parent(s): af655e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -25
app.py CHANGED
@@ -5,7 +5,7 @@ import torch
5
  from TTS.api import TTS
6
  from deep_translator import GoogleTranslator
7
  import pysrt
8
- import whisper # Free speech-to-text
9
  import webvtt
10
  import shutil
11
  import time
@@ -37,20 +37,38 @@ SUBTITLE_STYLES = {
37
  "Black Background": "background-color: black; padding: 5px;"
38
  }
39
 
40
- # Create output directory
41
  OUTPUT_DIR = "outputs"
42
  os.makedirs(OUTPUT_DIR, exist_ok=True)
43
 
44
- # Initialize TTS
45
  device = "cuda" if torch.cuda.is_available() else "cpu"
46
- tts_models = {
47
- "en": TTS("tts_models/en/ljspeech/tacotron2-DDC").to(device),
48
- "es": TTS("tts_models/es/css10/vits").to(device),
49
- "fr": TTS("tts_models/fr/css10/vits").to(device),
50
- "de": TTS("tts_models/de/thorsten/tacotron2-DDC").to(device),
51
- "ja": TTS("tts_models/ja/kokoro/tacotron2-DDC").to(device),
52
- "hi": TTS("tts_models/hi/kb/tacotron2-DDC").to(device)
53
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Initialize Whisper (load when needed)
56
  whisper_model = None
@@ -69,7 +87,7 @@ def extract_audio(video_path: str) -> str:
69
  '-acodec', 'pcm_s16le', '-ar', '16000',
70
  '-ac', '1', '-y', audio_path
71
  ]
72
- subprocess.run(cmd, check=True)
73
  return audio_path
74
 
75
  def transcribe_with_whisper(audio_path: str, language: str = None) -> str:
@@ -158,7 +176,10 @@ def generate_translated_audio(
158
 
159
  audio_files = []
160
  timings = []
161
- tts = tts_models.get(target_lang)
 
 
 
162
 
163
  for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} audio")):
164
  text = sub.text.strip()
@@ -185,7 +206,7 @@ def generate_translated_audio(
185
  subprocess.run([
186
  'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=stereo',
187
  '-t', str(video_duration), '-y', silence_file
188
- ], check=True)
189
 
190
  # Mix audio
191
  filter_complex = "[0:a]" + "".join(
@@ -200,7 +221,7 @@ def generate_translated_audio(
200
  '-map', '[aout]',
201
  os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")]
202
 
203
- subprocess.run(' '.join(cmd), shell=True, check=True)
204
  shutil.rmtree(temp_dir)
205
  return os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
206
 
@@ -329,7 +350,7 @@ def process_video(
329
  'ffmpeg', '-i', base_video, '-i', translated_audio,
330
  '-map', '0:v', '-map', '1:a', '-c:v', 'copy', '-c:a', 'aac',
331
  '-y', output_video
332
- ], check=True)
333
 
334
  # 7. Create HTML player
335
  progress(0.9, "Creating HTML player...")
@@ -450,12 +471,10 @@ def create_interface():
450
  return demo
451
 
452
  if __name__ == "__main__":
453
- # Check requirements
454
- try:
455
- subprocess.run(["ffmpeg", "-version"], check=True)
456
- import torch, whisper
457
- demo = create_interface()
458
- demo.launch()
459
- except Exception as e:
460
- print(f"Error: {str(e)}")
461
- print("Please install all requirements: pip install -r requirements.txt")
 
5
  from TTS.api import TTS
6
  from deep_translator import GoogleTranslator
7
  import pysrt
8
+ import whisper
9
  import webvtt
10
  import shutil
11
  import time
 
37
  "Black Background": "background-color: black; padding: 5px;"
38
  }
39
 
40
+ # Create output directory (relative path for Spaces)
41
  OUTPUT_DIR = "outputs"
42
  os.makedirs(OUTPUT_DIR, exist_ok=True)
43
 
44
+ # Initialize TTS with error handling
45
  device = "cuda" if torch.cuda.is_available() else "cpu"
46
+ tts_models = {}
47
+
48
+ def load_tts_model(model_name: str, lang_code: str):
49
+ try:
50
+ tts = TTS(model_name).to(device)
51
+ # Try to use gruut phonemizer if espeak fails
52
+ if hasattr(tts.synthesizer, 'tts_config'):
53
+ tts.synthesizer.tts_config.phonemizer = "gruut"
54
+ return tts
55
+ except Exception as e:
56
+ logger.error(f"Failed to load {model_name}: {str(e)}")
57
+ return None
58
+
59
+ # Initialize models only when needed
60
+ def get_tts_model(lang_code: str):
61
+ if lang_code not in tts_models:
62
+ model_map = {
63
+ "en": "tts_models/en/ljspeech/tacotron2-DDC",
64
+ "es": "tts_models/es/css10/vits",
65
+ "fr": "tts_models/fr/css10/vits",
66
+ "de": "tts_models/de/thorsten/vits", # Using VITS instead of tacotron2
67
+ "ja": "tts_models/ja/kokoro/tacotron2-DDC",
68
+ "hi": "tts_models/hi/kb/tacotron2-DDC"
69
+ }
70
+ tts_models[lang_code] = load_tts_model(model_map[lang_code], lang_code)
71
+ return tts_models[lang_code]
72
 
73
  # Initialize Whisper (load when needed)
74
  whisper_model = None
 
87
  '-acodec', 'pcm_s16le', '-ar', '16000',
88
  '-ac', '1', '-y', audio_path
89
  ]
90
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
91
  return audio_path
92
 
93
  def transcribe_with_whisper(audio_path: str, language: str = None) -> str:
 
176
 
177
  audio_files = []
178
  timings = []
179
+ tts = get_tts_model(target_lang)
180
+
181
+ if tts is None:
182
+ raise Exception(f"TTS model for {target_lang} not available")
183
 
184
  for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} audio")):
185
  text = sub.text.strip()
 
206
  subprocess.run([
207
  'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=stereo',
208
  '-t', str(video_duration), '-y', silence_file
209
+ ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
210
 
211
  # Mix audio
212
  filter_complex = "[0:a]" + "".join(
 
221
  '-map', '[aout]',
222
  os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")]
223
 
224
+ subprocess.run(' '.join(cmd), shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
225
  shutil.rmtree(temp_dir)
226
  return os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
227
 
 
350
  'ffmpeg', '-i', base_video, '-i', translated_audio,
351
  '-map', '0:v', '-map', '1:a', '-c:v', 'copy', '-c:a', 'aac',
352
  '-y', output_video
353
+ ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
354
 
355
  # 7. Create HTML player
356
  progress(0.9, "Creating HTML player...")
 
471
  return demo
472
 
473
  if __name__ == "__main__":
474
+ # Clear output directory on startup
475
+ if os.path.exists(OUTPUT_DIR):
476
+ shutil.rmtree(OUTPUT_DIR)
477
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
478
+
479
+ demo = create_interface()
480
+ demo.launch(share=True) # Required for Hugging Face Spaces