Itanutiwari527 commited on
Commit
76db824
Β·
verified Β·
1 Parent(s): a4c2598

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +64 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import os
4
+ import streamlit as st
5
+ import soundfile as sf
6
+ from TTS.api import TTS
7
+
8
+ # Load XTTSv2 model (Coqui TTS)
9
+ st.session_state.setdefault("tts", TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True))
10
+
11
+ st.set_page_config(page_title="🧬 XTTSv2 Voice Cloner", layout="centered")
12
+ st.title("πŸŽ™οΈ Clone Your Voice with XTTSv2")
13
+ st.markdown("Upload your voice sample and generate cloned speech from text.")
14
+
15
+ # Step 1: Upload voice sample
16
+ st.subheader("🎀 Upload Your Voice Sample (.wav)")
17
+ speaker_wav = st.file_uploader("Upload a clean voice sample (10–30 seconds)", type=["wav","mp3"], key="uploader")
18
+
19
+ # Step 2: Choose or write text
20
+ st.subheader("✍️ Choose or Write Your Text")
21
+ predefined_texts = {
22
+ "Greeting": "Hi there! I'm excited to talk to you today.",
23
+ "Hindi-English": "Namaste! Mera naam Tanu hai aur main AI ke saath kaam karti hoon.",
24
+ "Narration": "Once upon a time, in a quiet village, there lived a young explorer named Zoya.",
25
+ "Funny": "Why did the neural net go to therapy? It couldn't stop overfitting!"
26
+ }
27
+ choice = st.selectbox("Pick a predefined sentence:", list(predefined_texts.keys()), key="text_selector")
28
+ text = st.text_area("Or write your own:", predefined_texts[choice], height=120, key="custom_text")
29
+
30
+ # Step 3: Generate and play audio
31
+ generate_clicked = st.button("πŸ”Š Generate Voice", key="generate_button")
32
+
33
+ if generate_clicked and speaker_wav is not None and text.strip():
34
+ with st.spinner("Cloning your voice and synthesizing..."):
35
+ # Save uploaded speaker wav locally
36
+ with open("uploaded_speaker.wav", "wb") as f:
37
+ f.write(speaker_wav.read())
38
+
39
+ output_path = "xtts_output.wav"
40
+ st.session_state.tts.tts_to_file(
41
+ text=text,
42
+ speaker_wav="uploaded_speaker.wav",
43
+ language="hi", # Use "hi" for Hindi-only text
44
+ file_path=output_path
45
+ )
46
+
47
+ st.success("βœ… Voice cloned successfully!")
48
+ st.audio(output_path)
49
+
50
+ elif generate_clicked:
51
+ st.warning("⚠️ Please upload a voice sample and enter some text.")
52
+
53
+ # Optional demo samples
54
+ st.markdown("---")
55
+ st.subheader("🎧 Demo Samples (Optional)")
56
+
57
+ demo_folder = "demo_samples"
58
+ if os.path.exists(demo_folder):
59
+ demo_files = [f for f in os.listdir(demo_folder) if f.endswith(".wav")]
60
+ for demo in demo_files:
61
+ st.markdown(f"**{demo}**")
62
+ st.audio(os.path.join(demo_folder, demo))
63
+ else:
64
+ st.info("πŸ“ No demo samples found. Add `.wav` files in the `demo_samples/` folder.")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.33.0
2
+ TTS==0.22.0
3
+ torch==1.10.0
4
+ torchaudio==0.10.0
5
+ soundfile==0.12.1
6
+ numpy>=1.21.0
7
+ scipy>=1.5.0
8
+ numba==0.58.1