Spaces:

mrfakename
/

dia-1.6b

Running on Zero

App Files Files Community

mrfakename commited on 8 days ago

Commit

fb9850e

verified ·

1 Parent(s): d7dc228

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -61

app.py CHANGED Viewed

@@ -1,68 +1,38 @@
-ABOUT = """
-# OpenF5 TTS Demo
-Model is not released yet, release planned once model has finished training.
-"""
 import gradio as gr
-from f5_tts.api import F5TTS
-from huggingface_hub import snapshot_download
-import os
-from tqdm import tqdm
-import spaces
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-checkpoints_path = snapshot_download("mrfakename/openf5-v2", allow_patterns=["model_*.pt", "vocab.txt"], token=os.getenv("HF_TOKEN"))
-models = {}
-checkpoint_files = [f for f in os.listdir(checkpoints_path) if f.endswith(".pt")]
-# Sort checkpoint files by step number
-def get_step_number(filename):
-    name = filename.replace(".pt", "")
-    if name == "model_last":
-        return float('-inf')  # Ensure model_last comes first
-    try:
-        return int(name.split("_")[1])
-    except (IndexError, ValueError):
-        return float('inf')  # Put non-standard names at the end
-sorted_checkpoints = sorted(checkpoint_files, key=get_step_number)
-# Load models in the sorted order
-for checkpoint_path in tqdm(sorted_checkpoints, desc="Loading models"):
-    model_name = checkpoint_path.replace(".pt", "")
-    # Load one model at a time to be memory efficient
-    models[model_name] = F5TTS(ckpt_file=os.path.join(checkpoints_path, checkpoint_path),
-                              vocab_file=os.path.join(checkpoints_path, "vocab.txt"))
-@spaces.GPU
-def generate_audio(model_name, ref_file, ref_text, gen_text, progress=gr.Progress()):
-    if not ref_file or not gen_text:
-        raise gr.Error("Please provide a reference audio and text to generate")
-    wav, sr, _ = models[model_name].infer(
-        ref_file=ref_file,
-        ref_text=ref_text,
-        gen_text=gen_text,
-        seed=-1,  # random seed = -1
-        progress=progress
-    )
-    return (sr, wav)
 with gr.Blocks() as demo:
-    gr.Markdown(ABOUT)
-    model_name = gr.Radio(label="Model", choices=list(models.keys()))
-    ref_file = gr.Audio(label="Reference Audio", type="filepath")
-    gen_text = gr.Textbox(label="Text")
-    btn_generate = gr.Button("Generate Audio", variant="primary")
-    gen_audio = gr.Audio(label="Generated Audio")
-    with gr.Accordion("Advanced Options", open=False):
-        ref_text = gr.Textbox(label="Reference Text")
-    btn_generate.click(generate_audio, [model_name, ref_file, ref_text, gen_text], [gen_audio])
 demo.launch()

 import gradio as gr
+import soundfile as sf
+import numpy as np
+from dia.model import Dia
+# Load the model once
+model = Dia.from_pretrained("nari-labs/Dia-1.6B")
+def generate_dialogue(script):
+    output = model.generate(script)
+    filename = "generated_dialogue.wav"
+    sf.write(filename, output, 44100)
+    return filename
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎙️ Dia - Text to Dialogue Demo")
+    gr.Markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.")
+    with gr.Row():
+        script_input = gr.Textbox(
+            label="Script",
+            lines=6,
+            value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face."
+        )
+    with gr.Row():
+        generate_btn = gr.Button("Generate Audio")
+    with gr.Row():
+        audio_output = gr.Audio(label="Generated Dialogue", type="filepath")
+    generate_btn.click(
+        fn=generate_dialogue,
+        inputs=script_input,
+        outputs=audio_output
+    )
 demo.launch()