Spaces:

freddyaboulton
/

really-fast-whisper

Running on CPU Upgrade

App Files Files Community

really-fast-whisper / app.py

mfuntowicz HF Staff

update link to endpoint

327eac0 verified 20 days ago

raw

history blame

2.76 kB

	import os
	from pathlib import Path
	from httpx import AsyncClient

	import gradio as gr
	import numpy as np
	from dotenv import load_dotenv
	from fastrtc import (
	AdditionalOutputs,
	ReplyOnPause,
	Stream,
	audio_to_bytes,
	get_turn_credentials_async,
	get_turn_credentials,
	)
	from gradio.utils import get_space
	from languages import LANGUAGES

	cur_dir = Path(__file__).parent

	load_dotenv()


	client = AsyncClient(timeout=30)


	async def transcribe_file(audio: tuple[int, np.ndarray], language: str):
	response = await client.post(
	url="https://cw18rfhfqf3db1m8.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
	headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
	files={"file": audio_to_bytes(audio)},
	data={"response_format": "text", "language": language},
	)
	return response.text


	async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str):
	text = await transcribe_file(audio, language)
	yield AdditionalOutputs(transcript + " " + text)


	transcript = gr.Textbox(label="Transcript")
	stream = Stream(
	ReplyOnPause(transcribe, input_sample_rate=48_100),
	modality="audio",
	mode="send",
	additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")],
	additional_outputs=[transcript],
	additional_outputs_handler=lambda a, b: b,
	rtc_configuration=get_turn_credentials_async,
	server_rtc_configuration=get_turn_credentials(ttl=604_800),
	concurrency_limit=20 if get_space() else None,
	time_limit=300,
	ui_args={"title": ""},
	)

	iface = gr.Interface(
	fn=transcribe_file,
	inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")],
	outputs=gr.Textbox(label="Transcript"),
	)


	with gr.Blocks() as demo:
	gr.HTML(
	"""
	<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
	<img src="/gradio_api/file=AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> Really Fast Whisper
	</h1>
	"""
	)
	gr.HTML(
	"""
	<h2 style='text-align: center'>
	Powered by <a href="https://huggingface.co/hfendpoints/whisper-large-v3">HF Inference Endpoints</a> and <a href="https://fastrtc.org/">FastRTC</a>
	</h2>
	"""
	)
	with gr.Tabs():
	with gr.Tab("Streaming"):
	gr.Markdown(
	"Grant access to the microphone and speak naturally. The transcript will be updated as you pause."
	)
	stream.ui.render()
	with gr.Tab("File Upload"):
	iface.render()
	if __name__ == "__main__":
	demo.launch(allowed_paths=["AV_Huggy.png"])