Spaces:

akhaliq
/

note-taking

Running

App Files Files Community

akhaliq HF Staff commited on about 16 hours ago

Commit

e7644c2

verified ·

1 Parent(s): c9c1116

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+Gradio note-taking app that:
+1. Records voice via microphone
+2. Transcribes to text with Whisper (openai/whisper-large-v3)
+3. Generates a diagram image from the text with FLUX
+4. Displays the note and the diagram side-by-side
+"""
+import os
+import tempfile
+import gradio as gr
+from huggingface_hub import InferenceClient
+# ------------------------------------------------------------------
+# Configuration
+# ------------------------------------------------------------------
+HF_TOKEN = os.getenv("HF_TOKEN")  # export HF_TOKEN=...
+if not HF_TOKEN:
+    raise RuntimeError("Set HF_TOKEN environment variable")
+client = InferenceClient(
+    provider="fal-ai",
+    api_key=HF_TOKEN,
+    bill_to="huggingface",
+)
+# ------------------------------------------------------------------
+# Core helpers
+# ------------------------------------------------------------------
+def transcribe(audio_path: str) -> str:
+    """Transcribe audio file to text using Whisper."""
+    transcription = client.automatic_speech_recognition(
+        audio_path,
+        model="openai/whisper-large-v3",
+    )
+    return transcription["text"]
+def generate_diagram(text: str) -> str:
+    """Generate a diagram image from text using FLUX, save to tmp file and return path."""
+    image = client.text_to_image(
+        prompt=f"Clean, simple diagram illustrating: {text}",
+        model="black-forest-labs/FLUX.1-schnell",
+        width=768,
+        height=512,
+    )
+    tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+    image.save(tmp.name)
+    return tmp.name
+# ------------------------------------------------------------------
+# Gradio UI
+# ------------------------------------------------------------------
+def process_voice(audio):
+    """Chain transcription + diagram generation."""
+    text = transcribe(audio)
+    img_path = generate_diagram(text)
+    return text, img_path
+with gr.Blocks(title="Voice-to-Diagram Note Taker") as demo:
+    gr.Markdown("# 🎤 Voice Note & Diagram Generator")
+    gr.Markdown("Speak into the microphone; your words become a note and an auto-generated diagram.")
+    with gr.Row():
+        mic = gr.Audio(sources="microphone", type="filepath", label="Record")
+    with gr.Row():
+        with gr.Column(scale=2):
+            note_text = gr.Textbox(label="Transcription", lines=5, interactive=True)
+        with gr.Column(scale=1):
+            diagram_img = gr.Image(label="Generated Diagram")
+    mic.change(fn=process_voice, inputs=mic, outputs=[note_text, diagram_img])
+demo.launch()