Spaces:

Itanutiwari527
/

Voice_clone_demo

Sleeping

App Files Files Community

Itanutiwari527 commited on Apr 26

Commit

c6fa95b

verified ·

1 Parent(s): cc5b068

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -9

app.py CHANGED Viewed

@@ -2,19 +2,22 @@ import streamlit as st
 import torch
 import tempfile
 import os
-os.environ["NUMBA_DISABLE_CACHE"] = "1"
-from TTS.api import TTS
-import soundfile as sf
 import glob
 import numba
 numba.config.THREADING_LAYER = "workqueue"
 numba.config.DISABLE_JIT = True
-# Load XTTS model
 @st.cache_resource
 def load_xtts_model():
     return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
 tts = load_xtts_model()
 # UI
@@ -26,8 +29,10 @@ demo_voice_dir = "./demo_voices"
 demo_files = glob.glob(f"{demo_voice_dir}/*")
 demo_names = [os.path.basename(f) for f in demo_files]
 voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"])
 speaker_wav_path = None
 if voice_source == "Use pre-recorded demo voice":
@@ -46,7 +51,6 @@ elif voice_source == "Upload your own voice":
             speaker_wav_path = temp_audio.name
         st.audio(speaker_wav_path, format="audio/wav")
 # Hindi Predefined Texts
 predefined_texts = {
     "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।",
@@ -54,31 +58,32 @@ predefined_texts = {
     "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।",
     "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?",
     "This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference",
-    # "हेलो! मैं टनु हूँ और मुझे AI से खेलना पसंद है।": "हेलो! मैं टनु हूँ और मुझे AI से खेलना पसंद है।",
     "जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।",
     "Use custom text": "custom"
 }
 selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys()))
 if predefined_texts[selected_text] == "custom":
     input_text = st.text_area("Enter custom text:", "Hello, how are you?")
 else:
     input_text = predefined_texts[selected_text]
-# Clone & Synthesize
 if speaker_wav_path and input_text.strip():
     if st.button("🎧 Clone & Synthesize"):
         with st.spinner("Cloning voice..."):
             output_path = "xtts_output.wav"
             tts.tts_to_file(
                 text=input_text,
                 speaker_wav=speaker_wav_path,
-                language="en",
                 file_path=output_path
             )
             st.success("Done! Here's your cloned voice:")
             st.audio(output_path, format="audio/wav")

 import torch
 import tempfile
 import os
 import glob
+from TTS.api import TTS
 import numba
+# Disable numba JIT cache for better compatibility
+os.environ["NUMBA_DISABLE_CACHE"] = "1"
 numba.config.THREADING_LAYER = "workqueue"
 numba.config.DISABLE_JIT = True
+# Load XTTS model (GPU supported if available)
 @st.cache_resource
 def load_xtts_model():
+    # Check if GPU is available, if not, use CPU
     return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
+# Load model
 tts = load_xtts_model()
 # UI
 demo_files = glob.glob(f"{demo_voice_dir}/*")
 demo_names = [os.path.basename(f) for f in demo_files]
+# Voice input selection
 voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"])
+# Initialize speaker_wav_path
 speaker_wav_path = None
 if voice_source == "Use pre-recorded demo voice":
             speaker_wav_path = temp_audio.name
         st.audio(speaker_wav_path, format="audio/wav")
 # Hindi Predefined Texts
 predefined_texts = {
     "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।",
     "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।",
     "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?",
     "This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference",
     "जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।",
     "Use custom text": "custom"
 }
+# Text selection for synthesis
 selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys()))
 if predefined_texts[selected_text] == "custom":
     input_text = st.text_area("Enter custom text:", "Hello, how are you?")
 else:
     input_text = predefined_texts[selected_text]
+# Clone & Synthesize functionality
 if speaker_wav_path and input_text.strip():
     if st.button("🎧 Clone & Synthesize"):
         with st.spinner("Cloning voice..."):
             output_path = "xtts_output.wav"
+            # Clone and synthesize the voice using XTTS model
             tts.tts_to_file(
                 text=input_text,
                 speaker_wav=speaker_wav_path,
+                language="en",  # Language set as 'en' for English (adjust as needed)
                 file_path=output_path
             )
+            # Display the cloned audio
             st.success("Done! Here's your cloned voice:")
             st.audio(output_path, format="audio/wav")