arshadchanna023 commited on
Commit
b603b58
·
verified ·
1 Parent(s): f98aceb

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +36 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import librosa
4
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
5
+
6
+ MODEL = "steja/whisper-large-sindhi" # Ya whisper-small-sindhi agar GPU na mile
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+ # Model & processor load
10
+ processor = WhisperProcessor.from_pretrained(MODEL)
11
+ model = WhisperForConditionalGeneration.from_pretrained(MODEL).to(device)
12
+
13
+ def transcribe(audio_file):
14
+ # audio_file is (sample_rate, numpy array)
15
+ sr, audio = audio_file
16
+ if sr != 16000:
17
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
18
+ sr = 16000
19
+
20
+ inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
21
+ input_features = inputs.input_features.to(device)
22
+
23
+ # Generate prediction
24
+ pred_ids = model.generate(input_features)
25
+ text = processor.batch_decode(pred_ids, skip_special_tokens=True)[0]
26
+ return text
27
+
28
+ iface = gr.Interface(
29
+ fn=transcribe,
30
+ inputs=gr.Audio(sources=["upload", "microphone"], type="numpy"),
31
+ outputs="text",
32
+ title="Sindhi Speech-to-Text (Whisper)",
33
+ description="Upload or record Sindhi audio to get transcription using steja/whisper-large-sindhi."
34
+ )
35
+
36
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ torchaudio
4
+ librosa
5
+ gradio