black44 commited on
Commit
71579f2
·
verified ·
1 Parent(s): ec4f4ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from fastapi import FastAPI, HTTPException, UploadFile, File
2
- from fastapi.responses import StreamingResponse, JSONResponse
3
  from pydantic import BaseModel
4
  import torch
5
  from transformers import (
@@ -25,7 +25,7 @@ app = FastAPI(title="Kinyarwanda Engine", version="1.0")
25
  MODEL_PATH = "/app/models/suno-bark"
26
  SENTIMENT_MODEL_PATH = "/app/models/sentiment"
27
  SAMPLE_RATE = 24000
28
- ASR_MODEL_PATH = "jonatasgrosman/wav2vec2-large-xlsr-53-Kinyarwanda"
29
 
30
  # Ensure working directory for audio
31
  AUDIO_DIR = "/tmp/audio"
@@ -107,6 +107,12 @@ def speech_to_text(audio_file: UploadFile = File(...)):
107
  audio_bytes = audio_file.file.read()
108
  audio, sample_rate = sf.read(BytesIO(audio_bytes))
109
 
 
 
 
 
 
 
110
  inputs = asr_processor(audio, sampling_rate=sample_rate, return_tensors="pt", padding=True).input_values.to(device)
111
 
112
  with torch.no_grad():
 
1
  from fastapi import FastAPI, HTTPException, UploadFile, File
2
+ from fastapi.responses import StreamingResponse
3
  from pydantic import BaseModel
4
  import torch
5
  from transformers import (
 
25
  MODEL_PATH = "/app/models/suno-bark"
26
  SENTIMENT_MODEL_PATH = "/app/models/sentiment"
27
  SAMPLE_RATE = 24000
28
+ ASR_MODEL_PATH = "lucio/wav2vec2-large-xlsr-kinyarwanda"
29
 
30
  # Ensure working directory for audio
31
  AUDIO_DIR = "/tmp/audio"
 
107
  audio_bytes = audio_file.file.read()
108
  audio, sample_rate = sf.read(BytesIO(audio_bytes))
109
 
110
+ # Resample if necessary
111
+ if sample_rate != 16000:
112
+ import librosa
113
+ audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)
114
+ sample_rate = 16000
115
+
116
  inputs = asr_processor(audio, sampling_rate=sample_rate, return_tensors="pt", padding=True).input_values.to(device)
117
 
118
  with torch.no_grad():