Spaces:

Itanutiwari527
/

xtts2_voice_clone

Running

App Files Files Community

xtts2_voice_clone / app.py

Itanutiwari527

Upload 2 files

c53363c verified 4 days ago

raw

history blame contribute delete

4.27 kB

	import streamlit as st
	import torch
	import tempfile
	import os
	import glob
	from TTS.api import TTS
	import numba

	# Disable numba JIT cache for better compatibility
	os.environ["NUMBA_DISABLE_CACHE"] = "1"
	numba.config.THREADING_LAYER = "workqueue"
	numba.config.DISABLE_JIT = True

	# Load XTTS model (GPU supported if available)
	@st.cache_resource
	def load_xtts_model():
	# Check if GPU is available, if not, use CPU
	return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())

	# Load model
	tts = load_xtts_model()

	# UI
	st.title("XTTS Voice Cloning Demo")
	st.markdown("1. Select a demo voice OR upload your own\n2. Choose or write text\n3. Hear your cloned voice!")

	# Load pre-recorded demo voices
	demo_voice_dir = "./demo_voices"
	demo_files = glob.glob(f"{demo_voice_dir}/*")
	demo_names = [os.path.basename(f) for f in demo_files]

	# Voice input selection
	voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"])

	# Initialize speaker_wav_path
	speaker_wav_path = None

	if voice_source == "Use pre-recorded demo voice":
	if demo_files:
	selected_demo = st.selectbox("Choose a demo voice:", demo_names)
	speaker_wav_path = os.path.join(demo_voice_dir, selected_demo)
	st.audio(speaker_wav_path, format="audio/wav")
	else:
	st.warning("No demo voices found in 'demo_voices/' folder.")

	elif voice_source == "Upload your own voice":
	uploaded_file = st.file_uploader("Upload your voice sample (WAV, mono, 16k–48kHz):", type=["wav"])
	if uploaded_file:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
	temp_audio.write(uploaded_file.read())
	speaker_wav_path = temp_audio.name
	st.audio(speaker_wav_path, format="audio/wav")

	# Hindi Predefined Texts
	predefined_texts = {
	"नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।",
	"Hello Everyone, This is my voice cloned using previously recorded voice sample": "Hello Everyone, This is my voice cloned using previously recorded voice sample",
	"मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।",
	"यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?",
	"This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference",
	"जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।",
	"Use custom text": "custom"
	}

	# Text selection for synthesis
	selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys()))
	if predefined_texts[selected_text] == "custom":
	input_text = st.text_area("Enter custom text:", "Hello, how are you?")
	else:
	input_text = predefined_texts[selected_text]

	# Clone & Synthesize functionality
	if speaker_wav_path and input_text.strip():
	if st.button("🎧 Clone & Synthesize"):
	with st.spinner("Cloning voice..."):
	output_path = "xtts_output.wav"

	# Clone and synthesize the voice using XTTS model
	tts.tts_to_file(
	text=input_text,
	speaker_wav=speaker_wav_path,
	language="en", # Language set as 'en' for English (adjust as needed)
	file_path=output_path
	)

	# Display the cloned audio
	st.success("Done! Here's your cloned voice:")
	st.audio(output_path, format="audio/wav")

	# Clean up temp file if uploaded
	if voice_source == "Upload your own voice":
	os.remove(speaker_wav_path)