Spaces:

hriteshMaikap
/

marathi-asr-wav2vec2bert

Sleeping

App Files Files Community

hriteshMaikap commited on Apr 16

Commit

670f5ce

verified ·

1 Parent(s): 132669d

Create app.py

Browse files

Files changed (1) hide show

app.py +50 -0

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+import torch
+import torchaudio
+import numpy as np
+from transformers import Wav2Vec2BertProcessor, Wav2Vec2BertForCTC
+# Load model and processor
+repo_id = "hriteshMaikap/marathi-asr-model"
+processor = Wav2Vec2BertProcessor.from_pretrained(repo_id)
+model = Wav2Vec2BertForCTC.from_pretrained(repo_id)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+def transcribe(audio):
+    # Process audio
+    waveform, sample_rate = torchaudio.load(audio)
+    # Resample if needed
+    if sample_rate != 16000:
+        resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+        waveform = resampler(waveform)
+    # Convert to mono if needed
+    if waveform.shape[0] > 1:
+        waveform = torch.mean(waveform, dim=0, keepdim=True)
+    # Convert to numpy
+    speech_array = waveform.squeeze().numpy()
+    # Process and run inference
+    with torch.no_grad():
+        inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt").to(device)
+        logits = model(inputs.input_features).logits
+        predicted_ids = torch.argmax(logits, dim=-1)
+    # Decode the predicted IDs
+    transcription = processor.decode(predicted_ids[0])
+    return transcription
+# Create Gradio interface
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(source="microphone", type="filepath"),
+    outputs="text",
+    title="Marathi Speech Recognition",
+    description="Record your voice in Marathi and get a transcription."
+)
+iface.launch()