Lenylvt commited on
Commit
3a81031
·
verified ·
1 Parent(s): af1960a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -7,34 +7,29 @@ logging.basicConfig()
7
  logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
8
 
9
  # Initialize the Whisper model with your desired configuration
10
- model_size = "large-v3" # Choose the model size
11
  device = "cpu" # GPU : cuda CPU : cpu
12
  compute_type = "int8" # GPU : float16 or int8 - CPU : int8
13
 
14
  model = WhisperModel(model_size, device=device, compute_type=compute_type)
15
 
16
  def transcribe(audio_file):
17
- # Enable word-level timestamps
18
- segments, _ = model.transcribe(audio_file, word_timestamps=True)
19
 
20
- # Format and gather transcription with timestamps
21
- transcription_with_timestamps = []
22
- for segment in segments:
23
- segment_text = f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}\n"
24
- # If word-level detail is desired
25
- word_details = "\n".join(
26
- f" [{word.start:.2f}s - {word.end:.2f}s] {word.word}" for word in segment.words
27
- )
28
- transcription_with_timestamps.append(segment_text + word_details)
29
 
30
  return "\n".join(transcription_with_timestamps)
31
 
32
  # Define the Gradio interface
33
  iface = gr.Interface(fn=transcribe,
34
- inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
35
  outputs="text",
36
- title="Enhanced Whisper Transcription with Timestamps",
37
- description="Upload an audio file to get detailed transcription with timestamps using Faster Whisper.")
38
 
39
  # Launch the app
40
  if __name__ == "__main__":
 
7
  logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
8
 
9
  # Initialize the Whisper model with your desired configuration
10
+ model_size = "small" # Choose the model size
11
  device = "cpu" # GPU : cuda CPU : cpu
12
  compute_type = "int8" # GPU : float16 or int8 - CPU : int8
13
 
14
  model = WhisperModel(model_size, device=device, compute_type=compute_type)
15
 
16
  def transcribe(audio_file):
17
+ # Transcribe the audio file without word-level timestamps
18
+ segments, _ = model.transcribe(audio_file)
19
 
20
+ # Format and gather transcription with segment timestamps
21
+ transcription_with_timestamps = [
22
+ f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}" for segment in segments
23
+ ]
 
 
 
 
 
24
 
25
  return "\n".join(transcription_with_timestamps)
26
 
27
  # Define the Gradio interface
28
  iface = gr.Interface(fn=transcribe,
29
+ inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"),
30
  outputs="text",
31
+ title="Whisper Transcription with Line-by-Line Timestamps",
32
+ description="Upload an audio file to get transcription with line-by-line timestamps using Faster Whisper.")
33
 
34
  # Launch the app
35
  if __name__ == "__main__":