Spaces:

revaza
/

speech-2-text-ka

Running

revaza commited on Oct 14, 2024

Commit

704f9ea

verified ·

1 Parent(s): d79b6ab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,22 +25,35 @@ asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
 )
-def transcribe_audio(audio_file):
-    if audio_file:
-        # Convert the uploaded audio to mono
-        mono_audio = convert_to_mono(audio_file)
-        # Write the mono audio to a temporary file and close it before transcribing
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-            temp_file.write(mono_audio.read())
-            temp_file_path = temp_file.name
-        # Transcribe the audio using the temporary file path
-        res = asr_model.transcribe([temp_file_path])
-        # Clean up the temporary file
-        os.remove(temp_file_path)
         # Return the transcription result
         return res[0][0]

 )
+# def transcribe_audio(audio_file):
+#     if audio_file:
+#         # Convert the uploaded audio to mono
+#         mono_audio = convert_to_mono(audio_file)
+#         # Write the mono audio to a temporary file and close it before transcribing
+#         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+#             temp_file.write(mono_audio.read())
+#             temp_file_path = temp_file.name
+#         # Transcribe the audio using the temporary file path
+#         res = asr_model.transcribe([temp_file_path])
+#         # Clean up the temporary file
+#         os.remove(temp_file_path)
+#         # Return the transcription result
+#         return res[0][0]
+def transcribe_audio(audio_file):
+    if audio_file:
+        # Convert the uploaded audio to mono
+        mono_audio = convert_to_mono(audio_file)
+        # Transcribe the audio using the BytesIO object directly
+        audio_data = mono_audio.read()
+        # Use the audio_data in the format expected by the ASR model
+        res = asr_model.transcribe([BytesIO(audio_data)])
         # Return the transcription result
         return res[0][0]