Spaces:

revaza
/

speech-2-text-ka

Running

revaza commited on Oct 14, 2024

Commit

6a0d521

verified ·

1 Parent(s): 704f9ea

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,37 +25,37 @@ asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
 )
-# def transcribe_audio(audio_file):
-#     if audio_file:
-#         # Convert the uploaded audio to mono
-#         mono_audio = convert_to_mono(audio_file)
-#         # Write the mono audio to a temporary file and close it before transcribing
-#         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-#             temp_file.write(mono_audio.read())
-#             temp_file_path = temp_file.name
-#         # Transcribe the audio using the temporary file path
-#         res = asr_model.transcribe([temp_file_path])
-#         # Clean up the temporary file
-#         os.remove(temp_file_path)
-#         # Return the transcription result
-#         return res[0][0]
 def transcribe_audio(audio_file):
     if audio_file:
         # Convert the uploaded audio to mono
         mono_audio = convert_to_mono(audio_file)
-        # Transcribe the audio using the BytesIO object directly
-        audio_data = mono_audio.read()
-        # Use the audio_data in the format expected by the ASR model
-        res = asr_model.transcribe([BytesIO(audio_data)])
-        # Return the transcription result
-        return res[0][0]
 # Create the Gradio interface

 )
 def transcribe_audio(audio_file):
     if audio_file:
         # Convert the uploaded audio to mono
         mono_audio = convert_to_mono(audio_file)
+        # Write the mono audio to a temporary file and close it before transcribing
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            temp_file.write(mono_audio.read())
+            temp_file_path = temp_file.name
+        # Transcribe the audio using the temporary file path
+        res = asr_model.transcribe([temp_file_path])
+        # Clean up the temporary file
+        os.remove(temp_file_path)
+        # Return the transcription result
+        return res[0][0]
+# def transcribe_audio(audio_file):
+#     if audio_file:
+#         # Convert the uploaded audio to mono
+#         mono_audio = convert_to_mono(audio_file)
+#         # Transcribe the audio using the BytesIO object directly
+#         audio_data = mono_audio.read()
+#         # Use the audio_data in the format expected by the ASR model
+#         res = asr_model.transcribe([BytesIO(audio_data)])
+#         # Return the transcription result
+#         return res[0][0]
 # Create the Gradio interface