Spaces:
Running
Running
import streamlit as st | |
import torch | |
import tempfile | |
import os | |
import glob | |
from TTS.api import TTS | |
import numba | |
# Disable numba JIT cache for better compatibility | |
os.environ["NUMBA_DISABLE_CACHE"] = "1" | |
numba.config.THREADING_LAYER = "workqueue" | |
numba.config.DISABLE_JIT = True | |
# Load XTTS model (GPU supported if available) | |
def load_xtts_model(): | |
# Check if GPU is available, if not, use CPU | |
return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available()) | |
# Load model | |
tts = load_xtts_model() | |
# UI | |
st.title("XTTS Voice Cloning Demo") | |
st.markdown("1. Select a demo voice OR upload your own\n2. Choose or write text\n3. Hear your cloned voice!") | |
# Load pre-recorded demo voices | |
demo_voice_dir = "./demo_voices" | |
demo_files = glob.glob(f"{demo_voice_dir}/*") | |
demo_names = [os.path.basename(f) for f in demo_files] | |
# Voice input selection | |
voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"]) | |
# Initialize speaker_wav_path | |
speaker_wav_path = None | |
if voice_source == "Use pre-recorded demo voice": | |
if demo_files: | |
selected_demo = st.selectbox("Choose a demo voice:", demo_names) | |
speaker_wav_path = os.path.join(demo_voice_dir, selected_demo) | |
st.audio(speaker_wav_path, format="audio/wav") | |
else: | |
st.warning("No demo voices found in 'demo_voices/' folder.") | |
elif voice_source == "Upload your own voice": | |
uploaded_file = st.file_uploader("Upload your voice sample (WAV, mono, 16k–48kHz):", type=["wav"]) | |
if uploaded_file: | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
temp_audio.write(uploaded_file.read()) | |
speaker_wav_path = temp_audio.name | |
st.audio(speaker_wav_path, format="audio/wav") | |
# Hindi Predefined Texts | |
predefined_texts = { | |
"नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।", | |
"Hello Everyone, This is my voice cloned using previously recorded voice sample": "Hello Everyone, This is my voice cloned using previously recorded voice sample", | |
"मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।", | |
"यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?", | |
"This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference", | |
"जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।", | |
"Use custom text": "custom" | |
} | |
# Text selection for synthesis | |
selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys())) | |
if predefined_texts[selected_text] == "custom": | |
input_text = st.text_area("Enter custom text:", "Hello, how are you?") | |
else: | |
input_text = predefined_texts[selected_text] | |
# Clone & Synthesize functionality | |
if speaker_wav_path and input_text.strip(): | |
if st.button("🎧 Clone & Synthesize"): | |
with st.spinner("Cloning voice..."): | |
output_path = "xtts_output.wav" | |
# Clone and synthesize the voice using XTTS model | |
tts.tts_to_file( | |
text=input_text, | |
speaker_wav=speaker_wav_path, | |
language="en", # Language set as 'en' for English (adjust as needed) | |
file_path=output_path | |
) | |
# Display the cloned audio | |
st.success("Done! Here's your cloned voice:") | |
st.audio(output_path, format="audio/wav") | |
# Clean up temp file if uploaded | |
if voice_source == "Upload your own voice": | |
os.remove(speaker_wav_path) | |