FarmanMansoori commited on
Commit
4a1af29
·
verified ·
1 Parent(s): 5f55ad8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -38
app.py CHANGED
@@ -1,57 +1,37 @@
1
  import os
2
  import torch
3
  import gradio as gr
4
- from huggingface_hub import hf_hub_download
5
 
6
  from openvoice import se_extractor
7
  from openvoice.api import BaseSpeakerTTS, ToneColorConverter
8
 
9
- # ---------------------------
10
- # Setup paths
11
- # ---------------------------
12
  CHECKPOINTS_DIR = "./checkpoints_v2"
13
  os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
14
-
15
- # Download OpenVoice V2 checkpoints from Hugging Face
16
- hf_hub_download(repo_id="myshell-ai/OpenVoiceV2", local_dir=CHECKPOINTS_DIR, local_dir_use_symlinks=False)
17
-
18
- # Converter + base checkpoints
19
  ckpt_converter = os.path.join(CHECKPOINTS_DIR, "converter")
20
  ckpt_base = os.path.join(CHECKPOINTS_DIR, "base_speakers")
21
-
22
- # Output folder
23
  OUTPUT_DIR = "./outputs"
24
  os.makedirs(OUTPUT_DIR, exist_ok=True)
25
 
26
- # ---------------------------
27
- # Load models
28
- # ---------------------------
 
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
  base_tts = BaseSpeakerTTS(ckpt_base, device=device)
31
  converter = ToneColorConverter(ckpt_converter, device=device)
32
 
33
- # ---------------------------
34
- # Default reference voice (from your repo)
35
- # ---------------------------
36
- DEFAULT_VOICE = os.path.join(os.path.dirname(__file__), "my_voice.wav")
37
-
38
- # ---------------------------
39
- # Voice generation function
40
- # ---------------------------
41
  def generate_voice(script, ref_audio):
42
- if not script.strip():
43
  return None
44
 
45
- # Use uploaded voice or fallback to default
46
  ref_audio = ref_audio or DEFAULT_VOICE
47
  target_se, _ = se_extractor.get_se(ref_audio, converter, vad=True)
48
 
49
- # Step 1: Generate base speech
50
  src_path = os.path.join(OUTPUT_DIR, "tmp.wav")
51
  base_tts.tts(script, src_path, speaker="EN", language="EN")
52
 
53
- # Step 2: Convert to target tone
54
- out_path = os.path.join(OUTPUT_DIR, "output.wav")
55
  converter.convert(
56
  audio_src_path=src_path,
57
  src_se=None,
@@ -61,19 +41,14 @@ def generate_voice(script, ref_audio):
61
 
62
  return out_path
63
 
64
- # ---------------------------
65
- # Gradio app
66
- # ---------------------------
67
  with gr.Blocks() as demo:
68
- gr.Markdown("# 🎙️ OpenVoice V2 - Voice Cloning")
69
- gr.Markdown("Type a script and optionally upload a reference audio. If none is uploaded, your `my_voice.wav` file will be used.")
70
-
71
  script = gr.Textbox(label="Script", lines=4, placeholder="Enter text here...")
72
  ref_audio = gr.Audio(label="Reference Voice (optional)", type="filepath")
73
- generate_btn = gr.Button("Generate Speech")
74
- output_audio = gr.Audio(label="Generated Audio", type="filepath")
75
-
76
- generate_btn.click(fn=generate_voice, inputs=[script, ref_audio], outputs=output_audio)
77
 
78
  if __name__ == "__main__":
79
- demo.launch()
 
1
  import os
2
  import torch
3
  import gradio as gr
 
4
 
5
  from openvoice import se_extractor
6
  from openvoice.api import BaseSpeakerTTS, ToneColorConverter
7
 
8
+ # paths
 
 
9
  CHECKPOINTS_DIR = "./checkpoints_v2"
10
  os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
 
 
 
 
 
11
  ckpt_converter = os.path.join(CHECKPOINTS_DIR, "converter")
12
  ckpt_base = os.path.join(CHECKPOINTS_DIR, "base_speakers")
 
 
13
  OUTPUT_DIR = "./outputs"
14
  os.makedirs(OUTPUT_DIR, exist_ok=True)
15
 
16
+ # Default voice in your repo root (you said you uploaded it)
17
+ DEFAULT_VOICE = os.path.join(os.path.dirname(__file__), "my_voice.wav")
18
+
19
+ # load models (will use available device)
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
  base_tts = BaseSpeakerTTS(ckpt_base, device=device)
22
  converter = ToneColorConverter(ckpt_converter, device=device)
23
 
 
 
 
 
 
 
 
 
24
  def generate_voice(script, ref_audio):
25
+ if not script or not script.strip():
26
  return None
27
 
 
28
  ref_audio = ref_audio or DEFAULT_VOICE
29
  target_se, _ = se_extractor.get_se(ref_audio, converter, vad=True)
30
 
 
31
  src_path = os.path.join(OUTPUT_DIR, "tmp.wav")
32
  base_tts.tts(script, src_path, speaker="EN", language="EN")
33
 
34
+ out_path = os.path.join(OUTPUT_DIR, f"output_{int(torch.randint(0,1e9,(1,)).item())}.wav")
 
35
  converter.convert(
36
  audio_src_path=src_path,
37
  src_se=None,
 
41
 
42
  return out_path
43
 
 
 
 
44
  with gr.Blocks() as demo:
45
+ gr.Markdown("# 🎙️ OpenVoice V2 - Voice Cloning (Space)")
46
+ gr.Markdown("Type text and optionally upload a reference audio. If none, `my_voice.wav` in repo root is used.")
 
47
  script = gr.Textbox(label="Script", lines=4, placeholder="Enter text here...")
48
  ref_audio = gr.Audio(label="Reference Voice (optional)", type="filepath")
49
+ btn = gr.Button("Generate")
50
+ out = gr.Audio(label="Generated Audio", type="filepath")
51
+ btn.click(generate_voice, inputs=[script, ref_audio], outputs=out)
 
52
 
53
  if __name__ == "__main__":
54
+ demo.launch()