Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,19 +2,22 @@ import streamlit as st
|
|
2 |
import torch
|
3 |
import tempfile
|
4 |
import os
|
5 |
-
os.environ["NUMBA_DISABLE_CACHE"] = "1"
|
6 |
-
from TTS.api import TTS
|
7 |
-
import soundfile as sf
|
8 |
import glob
|
|
|
9 |
import numba
|
|
|
|
|
|
|
10 |
numba.config.THREADING_LAYER = "workqueue"
|
11 |
numba.config.DISABLE_JIT = True
|
12 |
|
13 |
-
# Load XTTS model
|
14 |
@st.cache_resource
|
15 |
def load_xtts_model():
|
|
|
16 |
return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
|
17 |
|
|
|
18 |
tts = load_xtts_model()
|
19 |
|
20 |
# UI
|
@@ -26,8 +29,10 @@ demo_voice_dir = "./demo_voices"
|
|
26 |
demo_files = glob.glob(f"{demo_voice_dir}/*")
|
27 |
demo_names = [os.path.basename(f) for f in demo_files]
|
28 |
|
|
|
29 |
voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"])
|
30 |
|
|
|
31 |
speaker_wav_path = None
|
32 |
|
33 |
if voice_source == "Use pre-recorded demo voice":
|
@@ -46,7 +51,6 @@ elif voice_source == "Upload your own voice":
|
|
46 |
speaker_wav_path = temp_audio.name
|
47 |
st.audio(speaker_wav_path, format="audio/wav")
|
48 |
|
49 |
-
|
50 |
# Hindi Predefined Texts
|
51 |
predefined_texts = {
|
52 |
"नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।",
|
@@ -54,31 +58,32 @@ predefined_texts = {
|
|
54 |
"मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।",
|
55 |
"यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?",
|
56 |
"This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference",
|
57 |
-
# "हेलो! मैं टनु हूँ और मुझे AI से खेलना पसंद है।": "हेलो! मैं टनु हूँ और मुझे AI से खेलना पसंद है।",
|
58 |
"जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।",
|
59 |
"Use custom text": "custom"
|
60 |
}
|
61 |
|
62 |
-
|
63 |
selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys()))
|
64 |
if predefined_texts[selected_text] == "custom":
|
65 |
input_text = st.text_area("Enter custom text:", "Hello, how are you?")
|
66 |
else:
|
67 |
input_text = predefined_texts[selected_text]
|
68 |
|
69 |
-
# Clone & Synthesize
|
70 |
if speaker_wav_path and input_text.strip():
|
71 |
if st.button("🎧 Clone & Synthesize"):
|
72 |
with st.spinner("Cloning voice..."):
|
73 |
output_path = "xtts_output.wav"
|
74 |
|
|
|
75 |
tts.tts_to_file(
|
76 |
text=input_text,
|
77 |
speaker_wav=speaker_wav_path,
|
78 |
-
language="en",
|
79 |
file_path=output_path
|
80 |
)
|
81 |
|
|
|
82 |
st.success("Done! Here's your cloned voice:")
|
83 |
st.audio(output_path, format="audio/wav")
|
84 |
|
|
|
2 |
import torch
|
3 |
import tempfile
|
4 |
import os
|
|
|
|
|
|
|
5 |
import glob
|
6 |
+
from TTS.api import TTS
|
7 |
import numba
|
8 |
+
|
9 |
+
# Disable numba JIT cache for better compatibility
|
10 |
+
os.environ["NUMBA_DISABLE_CACHE"] = "1"
|
11 |
numba.config.THREADING_LAYER = "workqueue"
|
12 |
numba.config.DISABLE_JIT = True
|
13 |
|
14 |
+
# Load XTTS model (GPU supported if available)
|
15 |
@st.cache_resource
|
16 |
def load_xtts_model():
|
17 |
+
# Check if GPU is available, if not, use CPU
|
18 |
return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
|
19 |
|
20 |
+
# Load model
|
21 |
tts = load_xtts_model()
|
22 |
|
23 |
# UI
|
|
|
29 |
demo_files = glob.glob(f"{demo_voice_dir}/*")
|
30 |
demo_names = [os.path.basename(f) for f in demo_files]
|
31 |
|
32 |
+
# Voice input selection
|
33 |
voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"])
|
34 |
|
35 |
+
# Initialize speaker_wav_path
|
36 |
speaker_wav_path = None
|
37 |
|
38 |
if voice_source == "Use pre-recorded demo voice":
|
|
|
51 |
speaker_wav_path = temp_audio.name
|
52 |
st.audio(speaker_wav_path, format="audio/wav")
|
53 |
|
|
|
54 |
# Hindi Predefined Texts
|
55 |
predefined_texts = {
|
56 |
"नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।",
|
|
|
58 |
"मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।",
|
59 |
"यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?",
|
60 |
"This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference",
|
|
|
61 |
"जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।",
|
62 |
"Use custom text": "custom"
|
63 |
}
|
64 |
|
65 |
+
# Text selection for synthesis
|
66 |
selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys()))
|
67 |
if predefined_texts[selected_text] == "custom":
|
68 |
input_text = st.text_area("Enter custom text:", "Hello, how are you?")
|
69 |
else:
|
70 |
input_text = predefined_texts[selected_text]
|
71 |
|
72 |
+
# Clone & Synthesize functionality
|
73 |
if speaker_wav_path and input_text.strip():
|
74 |
if st.button("🎧 Clone & Synthesize"):
|
75 |
with st.spinner("Cloning voice..."):
|
76 |
output_path = "xtts_output.wav"
|
77 |
|
78 |
+
# Clone and synthesize the voice using XTTS model
|
79 |
tts.tts_to_file(
|
80 |
text=input_text,
|
81 |
speaker_wav=speaker_wav_path,
|
82 |
+
language="en", # Language set as 'en' for English (adjust as needed)
|
83 |
file_path=output_path
|
84 |
)
|
85 |
|
86 |
+
# Display the cloned audio
|
87 |
st.success("Done! Here's your cloned voice:")
|
88 |
st.audio(output_path, format="audio/wav")
|
89 |
|