Spaces:

Lenylvt
/

Whisper-API

Sleeping

App Files Files Community

Lenylvt commited on Feb 17, 2024

Commit

3a81031

verified ·

1 Parent(s): af1960a

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -15

app.py CHANGED Viewed

@@ -7,34 +7,29 @@ logging.basicConfig()
 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
 # Initialize the Whisper model with your desired configuration
-model_size = "large-v3"  # Choose the model size
 device = "cpu"  # GPU : cuda  CPU : cpu
 compute_type = "int8"  # GPU : float16 or int8 - CPU : int8
 model = WhisperModel(model_size, device=device, compute_type=compute_type)
 def transcribe(audio_file):
-    # Enable word-level timestamps
-    segments, _ = model.transcribe(audio_file, word_timestamps=True)
-    # Format and gather transcription with timestamps
-    transcription_with_timestamps = []
-    for segment in segments:
-        segment_text = f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}\n"
-        # If word-level detail is desired
-        word_details = "\n".join(
-            f"    [{word.start:.2f}s - {word.end:.2f}s] {word.word}" for word in segment.words
-        )
-        transcription_with_timestamps.append(segment_text + word_details)
     return "\n".join(transcription_with_timestamps)
 # Define the Gradio interface
 iface = gr.Interface(fn=transcribe,
-                     inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                      outputs="text",
-                     title="Enhanced Whisper Transcription with Timestamps",
-                     description="Upload an audio file to get detailed transcription with timestamps using Faster Whisper.")
 # Launch the app
 if __name__ == "__main__":

 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
 # Initialize the Whisper model with your desired configuration
+model_size = "small"  # Choose the model size
 device = "cpu"  # GPU : cuda  CPU : cpu
 compute_type = "int8"  # GPU : float16 or int8 - CPU : int8
 model = WhisperModel(model_size, device=device, compute_type=compute_type)
 def transcribe(audio_file):
+    # Transcribe the audio file without word-level timestamps
+    segments, _ = model.transcribe(audio_file)
+    # Format and gather transcription with segment timestamps
+    transcription_with_timestamps = [
+        f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}" for segment in segments
+    ]
     return "\n".join(transcription_with_timestamps)
 # Define the Gradio interface
 iface = gr.Interface(fn=transcribe,
+                     inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"),
                      outputs="text",
+                     title="Whisper Transcription with Line-by-Line Timestamps",
+                     description="Upload an audio file to get transcription with line-by-line timestamps using Faster Whisper.")
 # Launch the app
 if __name__ == "__main__":