kanyekuthi commited on
Commit
3814abf
·
verified ·
1 Parent(s): 5cc5855

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
4
  import torchaudio
5
 
6
- model_id = "kanyekuthi/dsn_afrispeech" # or your correct model repo ID
7
 
8
  processor = AutoProcessor.from_pretrained(model_id)
9
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
@@ -11,22 +11,19 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
11
  def transcribe(audio):
12
  waveform, sr = torchaudio.load(audio)
13
  if sr != 16000:
14
- resampler = torchaudio.transforms.Resample(sr, 16000)
15
- waveform = resampler(waveform)
16
-
17
  inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt")
18
-
19
  with torch.no_grad():
20
- generated_ids = model.generate(**inputs)
21
- transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
22
-
23
  return transcription
24
 
25
  iface = gr.Interface(
26
  fn=transcribe,
27
- inputs=gr.Audio(source="microphone", type="filepath"),
28
  outputs="text",
29
- title="Whisper-based ASR Demo"
30
  )
31
 
32
  iface.launch()
 
3
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
4
  import torchaudio
5
 
6
+ model_id = "kanyekuthi/dsn_afrispeech"
7
 
8
  processor = AutoProcessor.from_pretrained(model_id)
9
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
 
11
  def transcribe(audio):
12
  waveform, sr = torchaudio.load(audio)
13
  if sr != 16000:
14
+ waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=16000)
15
+
 
16
  inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt")
 
17
  with torch.no_grad():
18
+ predicted_ids = model.generate(**inputs)
19
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
 
20
  return transcription
21
 
22
  iface = gr.Interface(
23
  fn=transcribe,
24
+ inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
25
  outputs="text",
26
+ title="Whisper ASR: DSN Afrispeech"
27
  )
28
 
29
  iface.launch()