Spaces:

RajatMalviya
/

telecom

Runtime error

RajatMalviya commited on Mar 30

Commit

40b3e9c

verified ·

1 Parent(s): 58affcb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import streamlit as st
 import tempfile
 import os
-from transformers import pipeline
-# Load the ASR model
 @st.cache_resource
 def load_model():
-    return pipeline("automatic-speech-recognition", model="ivrit-ai/whisper-large-v3-turbo")
-model = load_model()
-# Streamlit UI
 st.title("Hebrew Speech-to-Text Transcription")
 # Upload audio file
@@ -22,14 +25,24 @@ if uploaded_file is not None:
         temp_audio.write(uploaded_file.read())
         temp_audio_path = temp_audio.name
-    # Transcribe the audio
-    st.write("Transcribing...")
     try:
-        result = model(temp_audio_path)
         st.subheader("Transcription:")
-        st.write(result["text"])
     except Exception as e:
         st.error(f"Error: {str(e)}")
     # Clean up the temporary file
-    os.remove(temp_audio_path)

 import streamlit as st
 import tempfile
 import os
+import librosa  # For audio resampling
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+# Load the model and processor
 @st.cache_resource
 def load_model():
+    processor = WhisperProcessor.from_pretrained("ivrit-ai/whisper-large-v3-turbo")
+    model = WhisperForConditionalGeneration.from_pretrained("ivrit-ai/whisper-large-v3-turbo")
+    return processor, model
+processor, model = load_model()
 st.title("Hebrew Speech-to-Text Transcription")
 # Upload audio file
         temp_audio.write(uploaded_file.read())
         temp_audio_path = temp_audio.name
     try:
+        # Load and resample audio to 16kHz (required by Whisper)
+        speech_array, sampling_rate = librosa.load(temp_audio_path, sr=16000)
+        # Preprocess audio
+        inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt")
+        # Generate transcription
+        with torch.no_grad():
+            predicted_ids = model.generate(inputs.input_features)
+        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
         st.subheader("Transcription:")
+        st.write(transcription)
     except Exception as e:
         st.error(f"Error: {str(e)}")
     # Clean up the temporary file
+    os.remove(temp_audio_path)