mrfakename commited on
Commit
fb9850e
·
verified ·
1 Parent(s): d7dc228

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -61
app.py CHANGED
@@ -1,68 +1,38 @@
1
- ABOUT = """
2
- # OpenF5 TTS Demo
3
-
4
- Model is not released yet, release planned once model has finished training.
5
- """
6
  import gradio as gr
7
- from f5_tts.api import F5TTS
8
- from huggingface_hub import snapshot_download
9
- import os
10
- from tqdm import tqdm
11
- import spaces
12
-
13
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
14
-
15
- checkpoints_path = snapshot_download("mrfakename/openf5-v2", allow_patterns=["model_*.pt", "vocab.txt"], token=os.getenv("HF_TOKEN"))
16
- models = {}
17
- checkpoint_files = [f for f in os.listdir(checkpoints_path) if f.endswith(".pt")]
18
-
19
- # Sort checkpoint files by step number
20
- def get_step_number(filename):
21
- name = filename.replace(".pt", "")
22
- if name == "model_last":
23
- return float('-inf') # Ensure model_last comes first
24
- try:
25
- return int(name.split("_")[1])
26
- except (IndexError, ValueError):
27
- return float('inf') # Put non-standard names at the end
28
-
29
- sorted_checkpoints = sorted(checkpoint_files, key=get_step_number)
30
-
31
- # Load models in the sorted order
32
- for checkpoint_path in tqdm(sorted_checkpoints, desc="Loading models"):
33
- model_name = checkpoint_path.replace(".pt", "")
34
- # Load one model at a time to be memory efficient
35
- models[model_name] = F5TTS(ckpt_file=os.path.join(checkpoints_path, checkpoint_path),
36
- vocab_file=os.path.join(checkpoints_path, "vocab.txt"))
37
-
38
 
 
 
39
 
40
-
41
-
42
- @spaces.GPU
43
- def generate_audio(model_name, ref_file, ref_text, gen_text, progress=gr.Progress()):
44
- if not ref_file or not gen_text:
45
- raise gr.Error("Please provide a reference audio and text to generate")
46
- wav, sr, _ = models[model_name].infer(
47
- ref_file=ref_file,
48
- ref_text=ref_text,
49
- gen_text=gen_text,
50
- seed=-1, # random seed = -1
51
- progress=progress
52
- )
53
-
54
- return (sr, wav)
55
 
56
  with gr.Blocks() as demo:
57
- gr.Markdown(ABOUT)
58
- model_name = gr.Radio(label="Model", choices=list(models.keys()))
59
- ref_file = gr.Audio(label="Reference Audio", type="filepath")
60
- gen_text = gr.Textbox(label="Text")
61
- btn_generate = gr.Button("Generate Audio", variant="primary")
62
- gen_audio = gr.Audio(label="Generated Audio")
63
- with gr.Accordion("Advanced Options", open=False):
64
- ref_text = gr.Textbox(label="Reference Text")
65
-
66
- btn_generate.click(generate_audio, [model_name, ref_file, ref_text, gen_text], [gen_audio])
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ import soundfile as sf
3
+ import numpy as np
4
+ from dia.model import Dia
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Load the model once
7
+ model = Dia.from_pretrained("nari-labs/Dia-1.6B")
8
 
9
+ def generate_dialogue(script):
10
+ output = model.generate(script)
11
+ filename = "generated_dialogue.wav"
12
+ sf.write(filename, output, 44100)
13
+ return filename
 
 
 
 
 
 
 
 
 
 
14
 
15
  with gr.Blocks() as demo:
16
+ gr.Markdown("## 🎙️ Dia - Text to Dialogue Demo")
17
+ gr.Markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.")
18
+
19
+ with gr.Row():
20
+ script_input = gr.Textbox(
21
+ label="Script",
22
+ lines=6,
23
+ value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face."
24
+ )
25
+
26
+ with gr.Row():
27
+ generate_btn = gr.Button("Generate Audio")
28
+
29
+ with gr.Row():
30
+ audio_output = gr.Audio(label="Generated Dialogue", type="filepath")
31
+
32
+ generate_btn.click(
33
+ fn=generate_dialogue,
34
+ inputs=script_input,
35
+ outputs=audio_output
36
+ )
37
 
38
  demo.launch()