akhaliq HF Staff commited on
Commit
e7644c2
·
verified ·
1 Parent(s): c9c1116

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio note-taking app that:
3
+ 1. Records voice via microphone
4
+ 2. Transcribes to text with Whisper (openai/whisper-large-v3)
5
+ 3. Generates a diagram image from the text with FLUX
6
+ 4. Displays the note and the diagram side-by-side
7
+ """
8
+
9
+ import os
10
+ import tempfile
11
+ import gradio as gr
12
+ from huggingface_hub import InferenceClient
13
+
14
+ # ------------------------------------------------------------------
15
+ # Configuration
16
+ # ------------------------------------------------------------------
17
+ HF_TOKEN = os.getenv("HF_TOKEN") # export HF_TOKEN=...
18
+ if not HF_TOKEN:
19
+ raise RuntimeError("Set HF_TOKEN environment variable")
20
+
21
+ client = InferenceClient(
22
+ provider="fal-ai",
23
+ api_key=HF_TOKEN,
24
+ bill_to="huggingface",
25
+ )
26
+
27
+ # ------------------------------------------------------------------
28
+ # Core helpers
29
+ # ------------------------------------------------------------------
30
+
31
+
32
+ def transcribe(audio_path: str) -> str:
33
+ """Transcribe audio file to text using Whisper."""
34
+ transcription = client.automatic_speech_recognition(
35
+ audio_path,
36
+ model="openai/whisper-large-v3",
37
+ )
38
+ return transcription["text"]
39
+
40
+
41
+ def generate_diagram(text: str) -> str:
42
+ """Generate a diagram image from text using FLUX, save to tmp file and return path."""
43
+ image = client.text_to_image(
44
+ prompt=f"Clean, simple diagram illustrating: {text}",
45
+ model="black-forest-labs/FLUX.1-schnell",
46
+ width=768,
47
+ height=512,
48
+ )
49
+
50
+ tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
51
+ image.save(tmp.name)
52
+ return tmp.name
53
+
54
+
55
+ # ------------------------------------------------------------------
56
+ # Gradio UI
57
+ # ------------------------------------------------------------------
58
+
59
+ def process_voice(audio):
60
+ """Chain transcription + diagram generation."""
61
+ text = transcribe(audio)
62
+ img_path = generate_diagram(text)
63
+ return text, img_path
64
+
65
+
66
+ with gr.Blocks(title="Voice-to-Diagram Note Taker") as demo:
67
+ gr.Markdown("# 🎤 Voice Note & Diagram Generator")
68
+ gr.Markdown("Speak into the microphone; your words become a note and an auto-generated diagram.")
69
+
70
+ with gr.Row():
71
+ mic = gr.Audio(sources="microphone", type="filepath", label="Record")
72
+ with gr.Row():
73
+ with gr.Column(scale=2):
74
+ note_text = gr.Textbox(label="Transcription", lines=5, interactive=True)
75
+ with gr.Column(scale=1):
76
+ diagram_img = gr.Image(label="Generated Diagram")
77
+
78
+ mic.change(fn=process_voice, inputs=mic, outputs=[note_text, diagram_img])
79
+
80
+ demo.launch()