Itanutiwari527 commited on
Commit
1339db1
·
verified ·
1 Parent(s): 22e9316

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ demo_voices/demo_eng_female.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ demo_voices/demo_eng_male.mp3 filter=lfs diff=lfs merge=lfs -text
38
+ demo_voices/demo_female_hindi.wav filter=lfs diff=lfs merge=lfs -text
39
+ demo_voices/English_male_long.mp3 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Avoid prompts during build
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+
6
+ # Install required system packages
7
+ RUN apt-get update && apt-get install -y \
8
+ ffmpeg \
9
+ libsndfile1 \
10
+ libgl1 \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ WORKDIR /app
14
+
15
+ # Install numpy before other libraries to avoid conflicts
16
+ COPY pre-requirements.txt .
17
+ RUN pip install --upgrade pip && pip install --no-cache-dir -r pre-requirements.txt
18
+
19
+ # Now install the rest of the dependencies
20
+ COPY requirements.txt .
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Copy app code
24
+ COPY . .
25
+
26
+ # Start Streamlit app
27
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import tempfile
4
+ import os
5
+ from TTS.api import TTS
6
+ import soundfile as sf
7
+ import glob
8
+
9
+ # Load XTTS model
10
+ @st.cache_resource
11
+ def load_xtts_model():
12
+ return TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
13
+
14
+ tts = load_xtts_model()
15
+
16
+ # UI
17
+ st.title("XTTS Voice Cloning Demo")
18
+ st.markdown("1. Select a demo voice OR upload your own\n2. Choose or write text\n3. Hear your cloned voice!")
19
+
20
+ # Load pre-recorded demo voices
21
+ demo_voice_dir = "./demo_voices"
22
+ demo_files = glob.glob(f"{demo_voice_dir}/*")
23
+ demo_names = [os.path.basename(f) for f in demo_files]
24
+
25
+ voice_source = st.radio("Choose voice input method:", ["Use pre-recorded demo voice", "Upload your own voice"])
26
+
27
+ speaker_wav_path = None
28
+
29
+ if voice_source == "Use pre-recorded demo voice":
30
+ if demo_files:
31
+ selected_demo = st.selectbox("Choose a demo voice:", demo_names)
32
+ speaker_wav_path = os.path.join(demo_voice_dir, selected_demo)
33
+ st.audio(speaker_wav_path, format="audio/wav")
34
+ else:
35
+ st.warning("No demo voices found in 'demo_voices/' folder.")
36
+
37
+ elif voice_source == "Upload your own voice":
38
+ uploaded_file = st.file_uploader("Upload your voice sample (WAV, mono, 16k–48kHz):", type=["wav"])
39
+ if uploaded_file:
40
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
41
+ temp_audio.write(uploaded_file.read())
42
+ speaker_wav_path = temp_audio.name
43
+ st.audio(speaker_wav_path, format="audio/wav")
44
+
45
+
46
+ # Hindi Predefined Texts
47
+ predefined_texts = {
48
+ "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।": "नमस्ते, यह मेरी क्लोन की गई आवाज़ है।",
49
+ "Hello Everyone, This is my voice cloned using previously recorded voice sample": "Hello Everyone, This is my voice cloned using previously recorded voice sample",
50
+ "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।": "मैं आर्टिफिशियल इंटेलिजेंस की मदद से बोल रहा हूँ।",
51
+ "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?": "यह आवाज़ असली नहीं है, लेकिन क्या आपने फर्क किया?",
52
+ "This is not my real voice, but can you tell the difference":"This is not my real voice, but can you tell the difference",
53
+ # "हेलो! मैं टनु हूँ और मुझे AI से खेलना पसंद है।": "हेलो! मैं टनु हूँ और मुझे AI से खेलना पसंद है।",
54
+ "जीवन एक सुंदर यात्रा है, हर पल को जीओ।": "जीवन एक सुंदर यात्रा है, हर पल को जीओ।",
55
+ "Use custom text": "custom"
56
+ }
57
+
58
+
59
+ selected_text = st.selectbox("Choose or write text to synthesize:", list(predefined_texts.keys()))
60
+ if predefined_texts[selected_text] == "custom":
61
+ input_text = st.text_area("Enter custom text:", "Hello, how are you?")
62
+ else:
63
+ input_text = predefined_texts[selected_text]
64
+
65
+ # Clone & Synthesize
66
+ if speaker_wav_path and input_text.strip():
67
+ if st.button("🎧 Clone & Synthesize"):
68
+ with st.spinner("Cloning voice..."):
69
+ output_path = "xtts_output.wav"
70
+
71
+ tts.tts_to_file(
72
+ text=input_text,
73
+ speaker_wav=speaker_wav_path,
74
+ language="en",
75
+ file_path=output_path
76
+ )
77
+
78
+ st.success("Done! Here's your cloned voice:")
79
+ st.audio(output_path, format="audio/wav")
80
+
81
+ # Clean up temp file if uploaded
82
+ if voice_source == "Upload your own voice":
83
+ os.remove(speaker_wav_path)
demo_voices/English_male_long.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e25f7504473dc28cf288828a46bd1cd2974a8bbaf6fc81b4b261e7fae6429e3
3
+ size 1828181
demo_voices/demo_eng_female.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0f3710c54962749d652d06ee880430bbde4a1ced3af3210016b1d2ad04b9ce9
3
+ size 578228
demo_voices/demo_eng_male.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b693f7f888f358cf17ddd660af2f0835924482b7e8614153e763921282da1587
3
+ size 248810
demo_voices/demo_female_hindi.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a90e0875347854b9771b83a12c974166af9a0a844c683c318484985e28c02381
3
+ size 2986062
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ numpy==1.21.6
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Streamlit frontend
2
+ streamlit==1.33.0
3
+
4
+ # Voice Cloning - Coqui TTS
5
+ TTS==0.22.0
6
+
7
+ # Audio handling
8
+ soundfile
9
+ numpy==1.22.0
10
+ scipy==1.11.4
11
+
12
+ # PyTorch (CPU version is okay for Spaces unless GPU is enabled)
13
+ torch==2.0.1
14
+ torchaudio==2.0.2
15
+
16
+ # File handling
17
+ ffmpeg-python