Lenylvt commited on
Commit
7752cd2
·
verified ·
1 Parent(s): 6cb9375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -11
app.py CHANGED
@@ -1,23 +1,42 @@
1
  import gradio as gr
2
- import whisper
 
3
 
4
- # Load the Whisper model
5
- model = whisper.load_model("base")
 
6
 
7
- def transcribe(audio_file):
8
- # Process the audio file directly with the file path
9
- result = model.transcribe(audio_file)
 
 
 
10
 
11
- # Return the transcription
12
- return result['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Create the Gradio interface
15
  iface = gr.Interface(fn=transcribe,
16
  inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
17
  outputs="text",
18
- title="Whisper Transcription",
19
- description="Upload an audio file to transcribe it using OpenAI's Whisper model.")
20
 
21
  # Launch the app
22
  if __name__ == "__main__":
23
  iface.launch()
 
 
1
  import gradio as gr
2
+ from faster_whisper import WhisperModel
3
+ import logging
4
 
5
+ # Configure logging for debugging purposes
6
+ logging.basicConfig()
7
+ logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
8
 
9
+ # Initialize the Whisper model with your desired configuration
10
+ model_size = "large-v3" # Choose the model size
11
+ device = "cpu" # or "cuda" if GPU is available
12
+ compute_type = "float16" # Choose the compute type based on your hardware
13
+
14
+ model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
15
 
16
+ def transcribe(audio_file):
17
+ # Enable word-level timestamps
18
+ segments, _ = model.transcribe(audio_file, word_timestamps=True)
19
+
20
+ # Format and gather transcription with timestamps
21
+ transcription_with_timestamps = []
22
+ for segment in segments:
23
+ segment_text = f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}\n"
24
+ # If word-level detail is desired
25
+ word_details = "\n".join(
26
+ f" [{word.start:.2f}s - {word.end:.2f}s] {word.word}" for word in segment.words
27
+ )
28
+ transcription_with_timestamps.append(segment_text + word_details)
29
+
30
+ return "\n".join(transcription_with_timestamps)
31
 
32
+ # Define the Gradio interface
33
  iface = gr.Interface(fn=transcribe,
34
  inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
35
  outputs="text",
36
+ title="Enhanced Whisper Transcription with Timestamps",
37
+ description="Upload an audio file to get detailed transcription with timestamps using Faster Whisper.")
38
 
39
  # Launch the app
40
  if __name__ == "__main__":
41
  iface.launch()
42
+