|
import streamlit as st |
|
import torch |
|
import tempfile |
|
import os |
|
import glob |
|
from TTS.api import TTS |
|
import numba |
|
|
|
|
|
os.environ["NUMBA_DISABLE_CACHE"] = "1" |
|
numba.config.THREADING_LAYER = "workqueue" |
|
numba.config.DISABLE_JIT = True |
|
|
|
|
|
@st.cache_resource |
|
def load_xtts_model(): |
|
|
|
return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available()) |
|
|
|
|
|
tts = load_xtts_model() |
|
|
|
|
|
st.title("XTTS Voice Cloning Demo") |
|
st.markdown("1. Select a demo voice OR upload your own\n2. Choose or write text\n3. Hear your cloned voice!") |
|
|
|
|
|
demo_voice_dir = "./demo_voices" |
|
demo_files = glob.glob(f"{demo_voice_dir}/*") |
|
demo_names = [os.path.basename(f) for f in demo_files] |
|
|
|
|
|
voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"]) |
|
|
|
|
|
speaker_wav_path = None |
|
|
|
if voice_source == "Use pre-recorded demo voice": |
|
if demo_files: |
|
selected_demo = st.selectbox("Choose a demo voice:", demo_names) |
|
speaker_wav_path = os.path.join(demo_voice_dir, selected_demo) |
|
st.audio(speaker_wav_path, format="audio/wav") |
|
else: |
|
st.warning("No demo voices found in 'demo_voices/' folder.") |
|
|
|
elif voice_source == "Upload your own voice": |
|
uploaded_file = st.file_uploader("Upload your voice sample (WAV, mono, 16k–48kHz):", type=["wav"]) |
|
if uploaded_file: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: |
|
temp_audio.write(uploaded_file.read()) |
|
speaker_wav_path = temp_audio.name |
|
st.audio(speaker_wav_path, format="audio/wav") |
|
|
|
|
|
predefined_texts = { |
|
"नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।", |
|
"Hello Everyone, This is my voice cloned using previously recorded voice sample": "Hello Everyone, This is my voice cloned using previously recorded voice sample", |
|
"मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।", |
|
"यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?", |
|
"This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference", |
|
"जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।", |
|
"Use custom text": "custom" |
|
} |
|
|
|
|
|
selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys())) |
|
if predefined_texts[selected_text] == "custom": |
|
input_text = st.text_area("Enter custom text:", "Hello, how are you?") |
|
else: |
|
input_text = predefined_texts[selected_text] |
|
|
|
|
|
if speaker_wav_path and input_text.strip(): |
|
if st.button("🎧 Clone & Synthesize"): |
|
with st.spinner("Cloning voice..."): |
|
output_path = "xtts_output.wav" |
|
|
|
|
|
tts.tts_to_file( |
|
text=input_text, |
|
speaker_wav=speaker_wav_path, |
|
language="en", |
|
file_path=output_path |
|
) |
|
|
|
|
|
st.success("Done! Here's your cloned voice:") |
|
st.audio(output_path, format="audio/wav") |
|
|
|
|
|
if voice_source == "Upload your own voice": |
|
os.remove(speaker_wav_path) |
|
|