Itanutiwari527's picture
Upload 2 files
c53363c verified
raw
history blame contribute delete
4.27 kB
import streamlit as st
import torch
import tempfile
import os
import glob
from TTS.api import TTS
import numba
# Disable numba JIT cache for better compatibility
os.environ["NUMBA_DISABLE_CACHE"] = "1"
numba.config.THREADING_LAYER = "workqueue"
numba.config.DISABLE_JIT = True
# Load XTTS model (GPU supported if available)
@st.cache_resource
def load_xtts_model():
# Check if GPU is available, if not, use CPU
return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
# Load model
tts = load_xtts_model()
# UI
st.title("XTTS Voice Cloning Demo")
st.markdown("1. Select a demo voice OR upload your own\n2. Choose or write text\n3. Hear your cloned voice!")
# Load pre-recorded demo voices
demo_voice_dir = "./demo_voices"
demo_files = glob.glob(f"{demo_voice_dir}/*")
demo_names = [os.path.basename(f) for f in demo_files]
# Voice input selection
voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"])
# Initialize speaker_wav_path
speaker_wav_path = None
if voice_source == "Use pre-recorded demo voice":
if demo_files:
selected_demo = st.selectbox("Choose a demo voice:", demo_names)
speaker_wav_path = os.path.join(demo_voice_dir, selected_demo)
st.audio(speaker_wav_path, format="audio/wav")
else:
st.warning("No demo voices found in 'demo_voices/' folder.")
elif voice_source == "Upload your own voice":
uploaded_file = st.file_uploader("Upload your voice sample (WAV, mono, 16k–48kHz):", type=["wav"])
if uploaded_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio.write(uploaded_file.read())
speaker_wav_path = temp_audio.name
st.audio(speaker_wav_path, format="audio/wav")
# Hindi Predefined Texts
predefined_texts = {
"नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।",
"Hello Everyone, This is my voice cloned using previously recorded voice sample": "Hello Everyone, This is my voice cloned using previously recorded voice sample",
"मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।",
"यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?",
"This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference",
"जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।",
"Use custom text": "custom"
}
# Text selection for synthesis
selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys()))
if predefined_texts[selected_text] == "custom":
input_text = st.text_area("Enter custom text:", "Hello, how are you?")
else:
input_text = predefined_texts[selected_text]
# Clone & Synthesize functionality
if speaker_wav_path and input_text.strip():
if st.button("🎧 Clone & Synthesize"):
with st.spinner("Cloning voice..."):
output_path = "xtts_output.wav"
# Clone and synthesize the voice using XTTS model
tts.tts_to_file(
text=input_text,
speaker_wav=speaker_wav_path,
language="en", # Language set as 'en' for English (adjust as needed)
file_path=output_path
)
# Display the cloned audio
st.success("Done! Here's your cloned voice:")
st.audio(output_path, format="audio/wav")
# Clean up temp file if uploaded
if voice_source == "Upload your own voice":
os.remove(speaker_wav_path)