Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,57 +1,37 @@
|
|
| 1 |
import os
|
| 2 |
import torch
|
| 3 |
import gradio as gr
|
| 4 |
-
from huggingface_hub import hf_hub_download
|
| 5 |
|
| 6 |
from openvoice import se_extractor
|
| 7 |
from openvoice.api import BaseSpeakerTTS, ToneColorConverter
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
# Setup paths
|
| 11 |
-
# ---------------------------
|
| 12 |
CHECKPOINTS_DIR = "./checkpoints_v2"
|
| 13 |
os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
|
| 14 |
-
|
| 15 |
-
# Download OpenVoice V2 checkpoints from Hugging Face
|
| 16 |
-
hf_hub_download(repo_id="myshell-ai/OpenVoiceV2", local_dir=CHECKPOINTS_DIR, local_dir_use_symlinks=False)
|
| 17 |
-
|
| 18 |
-
# Converter + base checkpoints
|
| 19 |
ckpt_converter = os.path.join(CHECKPOINTS_DIR, "converter")
|
| 20 |
ckpt_base = os.path.join(CHECKPOINTS_DIR, "base_speakers")
|
| 21 |
-
|
| 22 |
-
# Output folder
|
| 23 |
OUTPUT_DIR = "./outputs"
|
| 24 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
|
|
|
|
| 29 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 30 |
base_tts = BaseSpeakerTTS(ckpt_base, device=device)
|
| 31 |
converter = ToneColorConverter(ckpt_converter, device=device)
|
| 32 |
|
| 33 |
-
# ---------------------------
|
| 34 |
-
# Default reference voice (from your repo)
|
| 35 |
-
# ---------------------------
|
| 36 |
-
DEFAULT_VOICE = os.path.join(os.path.dirname(__file__), "my_voice.wav")
|
| 37 |
-
|
| 38 |
-
# ---------------------------
|
| 39 |
-
# Voice generation function
|
| 40 |
-
# ---------------------------
|
| 41 |
def generate_voice(script, ref_audio):
|
| 42 |
-
if not script.strip():
|
| 43 |
return None
|
| 44 |
|
| 45 |
-
# Use uploaded voice or fallback to default
|
| 46 |
ref_audio = ref_audio or DEFAULT_VOICE
|
| 47 |
target_se, _ = se_extractor.get_se(ref_audio, converter, vad=True)
|
| 48 |
|
| 49 |
-
# Step 1: Generate base speech
|
| 50 |
src_path = os.path.join(OUTPUT_DIR, "tmp.wav")
|
| 51 |
base_tts.tts(script, src_path, speaker="EN", language="EN")
|
| 52 |
|
| 53 |
-
|
| 54 |
-
out_path = os.path.join(OUTPUT_DIR, "output.wav")
|
| 55 |
converter.convert(
|
| 56 |
audio_src_path=src_path,
|
| 57 |
src_se=None,
|
|
@@ -61,19 +41,14 @@ def generate_voice(script, ref_audio):
|
|
| 61 |
|
| 62 |
return out_path
|
| 63 |
|
| 64 |
-
# ---------------------------
|
| 65 |
-
# Gradio app
|
| 66 |
-
# ---------------------------
|
| 67 |
with gr.Blocks() as demo:
|
| 68 |
-
gr.Markdown("# 🎙️ OpenVoice V2 - Voice Cloning")
|
| 69 |
-
gr.Markdown("Type
|
| 70 |
-
|
| 71 |
script = gr.Textbox(label="Script", lines=4, placeholder="Enter text here...")
|
| 72 |
ref_audio = gr.Audio(label="Reference Voice (optional)", type="filepath")
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
generate_btn.click(fn=generate_voice, inputs=[script, ref_audio], outputs=output_audio)
|
| 77 |
|
| 78 |
if __name__ == "__main__":
|
| 79 |
-
demo.launch()
|
|
|
|
| 1 |
import os
|
| 2 |
import torch
|
| 3 |
import gradio as gr
|
|
|
|
| 4 |
|
| 5 |
from openvoice import se_extractor
|
| 6 |
from openvoice.api import BaseSpeakerTTS, ToneColorConverter
|
| 7 |
|
| 8 |
+
# paths
|
|
|
|
|
|
|
| 9 |
CHECKPOINTS_DIR = "./checkpoints_v2"
|
| 10 |
os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
ckpt_converter = os.path.join(CHECKPOINTS_DIR, "converter")
|
| 12 |
ckpt_base = os.path.join(CHECKPOINTS_DIR, "base_speakers")
|
|
|
|
|
|
|
| 13 |
OUTPUT_DIR = "./outputs"
|
| 14 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 15 |
|
| 16 |
+
# Default voice in your repo root (you said you uploaded it)
|
| 17 |
+
DEFAULT_VOICE = os.path.join(os.path.dirname(__file__), "my_voice.wav")
|
| 18 |
+
|
| 19 |
+
# load models (will use available device)
|
| 20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 21 |
base_tts = BaseSpeakerTTS(ckpt_base, device=device)
|
| 22 |
converter = ToneColorConverter(ckpt_converter, device=device)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def generate_voice(script, ref_audio):
|
| 25 |
+
if not script or not script.strip():
|
| 26 |
return None
|
| 27 |
|
|
|
|
| 28 |
ref_audio = ref_audio or DEFAULT_VOICE
|
| 29 |
target_se, _ = se_extractor.get_se(ref_audio, converter, vad=True)
|
| 30 |
|
|
|
|
| 31 |
src_path = os.path.join(OUTPUT_DIR, "tmp.wav")
|
| 32 |
base_tts.tts(script, src_path, speaker="EN", language="EN")
|
| 33 |
|
| 34 |
+
out_path = os.path.join(OUTPUT_DIR, f"output_{int(torch.randint(0,1e9,(1,)).item())}.wav")
|
|
|
|
| 35 |
converter.convert(
|
| 36 |
audio_src_path=src_path,
|
| 37 |
src_se=None,
|
|
|
|
| 41 |
|
| 42 |
return out_path
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
with gr.Blocks() as demo:
|
| 45 |
+
gr.Markdown("# 🎙️ OpenVoice V2 - Voice Cloning (Space)")
|
| 46 |
+
gr.Markdown("Type text and optionally upload a reference audio. If none, `my_voice.wav` in repo root is used.")
|
|
|
|
| 47 |
script = gr.Textbox(label="Script", lines=4, placeholder="Enter text here...")
|
| 48 |
ref_audio = gr.Audio(label="Reference Voice (optional)", type="filepath")
|
| 49 |
+
btn = gr.Button("Generate")
|
| 50 |
+
out = gr.Audio(label="Generated Audio", type="filepath")
|
| 51 |
+
btn.click(generate_voice, inputs=[script, ref_audio], outputs=out)
|
|
|
|
| 52 |
|
| 53 |
if __name__ == "__main__":
|
| 54 |
+
demo.launch()
|