mfuntowicz's picture
mfuntowicz HF Staff
update link to endpoint
327eac0 verified
raw
history blame
2.76 kB
import os
from pathlib import Path
from httpx import AsyncClient
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
audio_to_bytes,
get_turn_credentials_async,
get_turn_credentials,
)
from gradio.utils import get_space
from languages import LANGUAGES
cur_dir = Path(__file__).parent
load_dotenv()
client = AsyncClient(timeout=30)
async def transcribe_file(audio: tuple[int, np.ndarray], language: str):
response = await client.post(
url="https://cw18rfhfqf3db1m8.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
files={"file": audio_to_bytes(audio)},
data={"response_format": "text", "language": language},
)
return response.text
async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str):
text = await transcribe_file(audio, language)
yield AdditionalOutputs(transcript + " " + text)
transcript = gr.Textbox(label="Transcript")
stream = Stream(
ReplyOnPause(transcribe, input_sample_rate=48_100),
modality="audio",
mode="send",
additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")],
additional_outputs=[transcript],
additional_outputs_handler=lambda a, b: b,
rtc_configuration=get_turn_credentials_async,
server_rtc_configuration=get_turn_credentials(ttl=604_800),
concurrency_limit=20 if get_space() else None,
time_limit=300,
ui_args={"title": ""},
)
iface = gr.Interface(
fn=transcribe_file,
inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")],
outputs=gr.Textbox(label="Transcript"),
)
with gr.Blocks() as demo:
gr.HTML(
"""
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
<img src="/gradio_api/file=AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> Really Fast Whisper
</h1>
"""
)
gr.HTML(
"""
<h2 style='text-align: center'>
Powered by <a href="https://huggingface.co/hfendpoints/whisper-large-v3">HF Inference Endpoints</a> and <a href="https://fastrtc.org/">FastRTC</a>
</h2>
"""
)
with gr.Tabs():
with gr.Tab("Streaming"):
gr.Markdown(
"Grant access to the microphone and speak naturally. The transcript will be updated as you pause."
)
stream.ui.render()
with gr.Tab("File Upload"):
iface.render()
if __name__ == "__main__":
demo.launch(allowed_paths=["AV_Huggy.png"])