Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline | |
import os | |
# Model ID from Hugging Face | |
model_id = "sbapan41/Quantum_STT" | |
# Load the speech recognition pipeline | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model_id, | |
generate_kwargs={"language": "en", "task": "transcribe"}, | |
tokenizer=model_id | |
) | |
# Transcription function with format check | |
def transcribe(audio): | |
if audio is None: | |
return "Please upload an audio file." | |
# Optional: validate file extension | |
ext = os.path.splitext(audio)[1].lower() | |
if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]: | |
return f"β Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files." | |
result = pipe(audio) | |
return result["text"] | |
# Gradio interface | |
interface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio( | |
type="filepath", # return audio file path | |
sources=["upload"], # restrict to file upload (not mic) | |
label="π΅ Upload Audio File" | |
), | |
outputs=gr.Textbox(label="π Transcription"), | |
title="ποΈ Quantum Speech Recognizer", | |
description="Upload an audio file (.caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav, .mp3)<br>***to transcribe it using the Quantum_STT model***." | |
) | |
# Launch the interface | |
interface.launch() |