Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import torch
|
|
5 |
from TTS.api import TTS
|
6 |
from deep_translator import GoogleTranslator
|
7 |
import pysrt
|
8 |
-
import whisper
|
9 |
import webvtt
|
10 |
import shutil
|
11 |
import time
|
@@ -37,20 +37,38 @@ SUBTITLE_STYLES = {
|
|
37 |
"Black Background": "background-color: black; padding: 5px;"
|
38 |
}
|
39 |
|
40 |
-
# Create output directory
|
41 |
OUTPUT_DIR = "outputs"
|
42 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
43 |
|
44 |
-
# Initialize TTS
|
45 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
46 |
-
tts_models = {
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# Initialize Whisper (load when needed)
|
56 |
whisper_model = None
|
@@ -69,7 +87,7 @@ def extract_audio(video_path: str) -> str:
|
|
69 |
'-acodec', 'pcm_s16le', '-ar', '16000',
|
70 |
'-ac', '1', '-y', audio_path
|
71 |
]
|
72 |
-
subprocess.run(cmd, check=True)
|
73 |
return audio_path
|
74 |
|
75 |
def transcribe_with_whisper(audio_path: str, language: str = None) -> str:
|
@@ -158,7 +176,10 @@ def generate_translated_audio(
|
|
158 |
|
159 |
audio_files = []
|
160 |
timings = []
|
161 |
-
tts =
|
|
|
|
|
|
|
162 |
|
163 |
for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} audio")):
|
164 |
text = sub.text.strip()
|
@@ -185,7 +206,7 @@ def generate_translated_audio(
|
|
185 |
subprocess.run([
|
186 |
'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=stereo',
|
187 |
'-t', str(video_duration), '-y', silence_file
|
188 |
-
], check=True)
|
189 |
|
190 |
# Mix audio
|
191 |
filter_complex = "[0:a]" + "".join(
|
@@ -200,7 +221,7 @@ def generate_translated_audio(
|
|
200 |
'-map', '[aout]',
|
201 |
os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")]
|
202 |
|
203 |
-
subprocess.run(' '.join(cmd), shell=True, check=True)
|
204 |
shutil.rmtree(temp_dir)
|
205 |
return os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
206 |
|
@@ -329,7 +350,7 @@ def process_video(
|
|
329 |
'ffmpeg', '-i', base_video, '-i', translated_audio,
|
330 |
'-map', '0:v', '-map', '1:a', '-c:v', 'copy', '-c:a', 'aac',
|
331 |
'-y', output_video
|
332 |
-
], check=True)
|
333 |
|
334 |
# 7. Create HTML player
|
335 |
progress(0.9, "Creating HTML player...")
|
@@ -450,12 +471,10 @@ def create_interface():
|
|
450 |
return demo
|
451 |
|
452 |
if __name__ == "__main__":
|
453 |
-
#
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
print(f"Error: {str(e)}")
|
461 |
-
print("Please install all requirements: pip install -r requirements.txt")
|
|
|
5 |
from TTS.api import TTS
|
6 |
from deep_translator import GoogleTranslator
|
7 |
import pysrt
|
8 |
+
import whisper
|
9 |
import webvtt
|
10 |
import shutil
|
11 |
import time
|
|
|
37 |
"Black Background": "background-color: black; padding: 5px;"
|
38 |
}
|
39 |
|
40 |
+
# Create output directory (relative path for Spaces)
|
41 |
OUTPUT_DIR = "outputs"
|
42 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
43 |
|
44 |
+
# Initialize TTS with error handling
|
45 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
46 |
+
tts_models = {}
|
47 |
+
|
48 |
+
def load_tts_model(model_name: str, lang_code: str):
|
49 |
+
try:
|
50 |
+
tts = TTS(model_name).to(device)
|
51 |
+
# Try to use gruut phonemizer if espeak fails
|
52 |
+
if hasattr(tts.synthesizer, 'tts_config'):
|
53 |
+
tts.synthesizer.tts_config.phonemizer = "gruut"
|
54 |
+
return tts
|
55 |
+
except Exception as e:
|
56 |
+
logger.error(f"Failed to load {model_name}: {str(e)}")
|
57 |
+
return None
|
58 |
+
|
59 |
+
# Initialize models only when needed
|
60 |
+
def get_tts_model(lang_code: str):
|
61 |
+
if lang_code not in tts_models:
|
62 |
+
model_map = {
|
63 |
+
"en": "tts_models/en/ljspeech/tacotron2-DDC",
|
64 |
+
"es": "tts_models/es/css10/vits",
|
65 |
+
"fr": "tts_models/fr/css10/vits",
|
66 |
+
"de": "tts_models/de/thorsten/vits", # Using VITS instead of tacotron2
|
67 |
+
"ja": "tts_models/ja/kokoro/tacotron2-DDC",
|
68 |
+
"hi": "tts_models/hi/kb/tacotron2-DDC"
|
69 |
+
}
|
70 |
+
tts_models[lang_code] = load_tts_model(model_map[lang_code], lang_code)
|
71 |
+
return tts_models[lang_code]
|
72 |
|
73 |
# Initialize Whisper (load when needed)
|
74 |
whisper_model = None
|
|
|
87 |
'-acodec', 'pcm_s16le', '-ar', '16000',
|
88 |
'-ac', '1', '-y', audio_path
|
89 |
]
|
90 |
+
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
91 |
return audio_path
|
92 |
|
93 |
def transcribe_with_whisper(audio_path: str, language: str = None) -> str:
|
|
|
176 |
|
177 |
audio_files = []
|
178 |
timings = []
|
179 |
+
tts = get_tts_model(target_lang)
|
180 |
+
|
181 |
+
if tts is None:
|
182 |
+
raise Exception(f"TTS model for {target_lang} not available")
|
183 |
|
184 |
for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} audio")):
|
185 |
text = sub.text.strip()
|
|
|
206 |
subprocess.run([
|
207 |
'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=stereo',
|
208 |
'-t', str(video_duration), '-y', silence_file
|
209 |
+
], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
210 |
|
211 |
# Mix audio
|
212 |
filter_complex = "[0:a]" + "".join(
|
|
|
221 |
'-map', '[aout]',
|
222 |
os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")]
|
223 |
|
224 |
+
subprocess.run(' '.join(cmd), shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
225 |
shutil.rmtree(temp_dir)
|
226 |
return os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
227 |
|
|
|
350 |
'ffmpeg', '-i', base_video, '-i', translated_audio,
|
351 |
'-map', '0:v', '-map', '1:a', '-c:v', 'copy', '-c:a', 'aac',
|
352 |
'-y', output_video
|
353 |
+
], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
354 |
|
355 |
# 7. Create HTML player
|
356 |
progress(0.9, "Creating HTML player...")
|
|
|
471 |
return demo
|
472 |
|
473 |
if __name__ == "__main__":
|
474 |
+
# Clear output directory on startup
|
475 |
+
if os.path.exists(OUTPUT_DIR):
|
476 |
+
shutil.rmtree(OUTPUT_DIR)
|
477 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
478 |
+
|
479 |
+
demo = create_interface()
|
480 |
+
demo.launch(share=True) # Required for Hugging Face Spaces
|
|
|
|