Spaces:

akhaliq
/

note-taking

Running

App Files Files Community

note-taking / app.py

akhaliq HF Staff

Upload app.py with huggingface_hub

e7644c2 verified about 16 hours ago

raw

history blame contribute delete

2.53 kB

	"""
	Gradio note-taking app that:
	1. Records voice via microphone
	2. Transcribes to text with Whisper (openai/whisper-large-v3)
	3. Generates a diagram image from the text with FLUX
	4. Displays the note and the diagram side-by-side
	"""

	import os
	import tempfile
	import gradio as gr
	from huggingface_hub import InferenceClient

	# ------------------------------------------------------------------
	# Configuration
	# ------------------------------------------------------------------
	HF_TOKEN = os.getenv("HF_TOKEN") # export HF_TOKEN=...
	if not HF_TOKEN:
	raise RuntimeError("Set HF_TOKEN environment variable")

	client = InferenceClient(
	provider="fal-ai",
	api_key=HF_TOKEN,
	bill_to="huggingface",
	)

	# ------------------------------------------------------------------
	# Core helpers
	# ------------------------------------------------------------------


	def transcribe(audio_path: str) -> str:
	"""Transcribe audio file to text using Whisper."""
	transcription = client.automatic_speech_recognition(
	audio_path,
	model="openai/whisper-large-v3",
	)
	return transcription["text"]


	def generate_diagram(text: str) -> str:
	"""Generate a diagram image from text using FLUX, save to tmp file and return path."""
	image = client.text_to_image(
	prompt=f"Clean, simple diagram illustrating: {text}",
	model="black-forest-labs/FLUX.1-schnell",
	width=768,
	height=512,
	)

	tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	image.save(tmp.name)
	return tmp.name


	# ------------------------------------------------------------------
	# Gradio UI
	# ------------------------------------------------------------------

	def process_voice(audio):
	"""Chain transcription + diagram generation."""
	text = transcribe(audio)
	img_path = generate_diagram(text)
	return text, img_path


	with gr.Blocks(title="Voice-to-Diagram Note Taker") as demo:
	gr.Markdown("# 🎤 Voice Note & Diagram Generator")
	gr.Markdown("Speak into the microphone; your words become a note and an auto-generated diagram.")

	with gr.Row():
	mic = gr.Audio(sources="microphone", type="filepath", label="Record")
	with gr.Row():
	with gr.Column(scale=2):
	note_text = gr.Textbox(label="Transcription", lines=5, interactive=True)
	with gr.Column(scale=1):
	diagram_img = gr.Image(label="Generated Diagram")

	mic.change(fn=process_voice, inputs=mic, outputs=[note_text, diagram_img])

	demo.launch()