yama commited on
Commit
14e358d
·
1 Parent(s): d40e32c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -49
app.py CHANGED
@@ -27,6 +27,12 @@ import contextlib
27
  from transformers import pipeline
28
  import psutil
29
 
 
 
 
 
 
 
30
  whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
31
  source_languages = {
32
  "en": "English",
@@ -254,39 +260,22 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
254
  raise RuntimeError("Error Running inference with local model", e)
255
 
256
 
257
- def create_meeting_summary(openai_key, prompt, uploaded_audio, max_transcribe_seconds):
258
- openai.api_key = openai_key
259
-
260
- # 音声ファイルを開く
261
- audio = AudioSegment.from_file(uploaded_audio)
262
-
263
- # 文字起こしする音声データの上限を設定する
264
- if len(audio) > int(max_transcribe_seconds) * 1000:
265
- audio = audio[:int(max_transcribe_seconds) * 1000]
266
-
267
- # ファイルサイズを削減するために音声ファイルを圧縮する
268
- compressed_audio = audio.set_frame_rate(16000).set_channels(1)
 
 
 
269
 
270
- # 圧縮した音声ファイルをmp3形式で一時ファイルに保存する
271
- with tempfile.NamedTemporaryFile(delete=True, suffix=".mp3") as tmp:
272
- compressed_audio.export(tmp.name, format="mp3")
273
-
274
- transcript = openai.Audio.transcribe("whisper-1", open(tmp.name, "rb"), response_format="verbose_json")
275
- transcript_text = ""
276
- for segment in transcript.segments:
277
- transcript_text += f"{segment['text']}\n"
278
-
279
- system_template = prompt
280
-
281
- completion = openai.ChatCompletion.create(
282
- model="gpt-3.5-turbo",
283
- messages=[
284
- {"role": "system", "content": system_template},
285
- {"role": "user", "content": transcript_text}
286
- ]
287
- )
288
- summary = completion.choices[0].message.content
289
- return summary, transcript_text
290
 
291
  # ---- Gradio Layout -----
292
  # Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
@@ -312,13 +301,13 @@ demo.encrypt = False
312
 
313
  with demo:
314
  with gr.Tab("Whisper speaker diarization"):
315
- # gr.Markdown('''
316
- # <div>
317
- # <h1 style='text-align: center'>Whisper speaker diarization</h1>
318
- # This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
319
- # and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
320
- # </div>
321
- # ''')
322
 
323
  with gr.Row():
324
  gr.Markdown('''
@@ -377,16 +366,16 @@ with demo:
377
  # gr.Markdown(
378
  # '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
379
 
380
- with gr.Row():
381
- with gr.Column():
382
- gr.Textbox(lines=1, label="openai_key", type="password")
383
- gr.TextArea(label="prompt", value="""会議の文字起こしが渡されます。
384
-
385
- この会議のサマリーをMarkdown形式で作成してください。サマリーは、以下のような形式で書いてください。
386
- - 会議の目的
387
- - 会議の内容
388
- - 会議の結果""")
389
- gr.Textbox(label="transcription_summary")
390
 
391
 
392
  demo.launch(debug=True)
 
27
  from transformers import pipeline
28
  import psutil
29
 
30
+ import openai
31
+ import os
32
+ import tempfile
33
+ from pydub import AudioSegment
34
+
35
+
36
  whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
37
  source_languages = {
38
  "en": "English",
 
260
  raise RuntimeError("Error Running inference with local model", e)
261
 
262
 
263
+ # def create_meeting_summary(openai_key, prompt):
264
+ # openai.api_key = openai_key
265
+ #
266
+ # # 文字起こししたテキストを取得
267
+ # system_template = prompt
268
+ #
269
+ # completion = openai.ChatCompletion.create(
270
+ # model="gpt-3.5-turbo",
271
+ # messages=[
272
+ # {"role": "system", "content": system_template},
273
+ # {"role": "user", "content": transcript_text}
274
+ # ]
275
+ # )
276
+ # summary = completion.choices[0].message.content
277
+ # return summary
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  # ---- Gradio Layout -----
281
  # Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
 
301
 
302
  with demo:
303
  with gr.Tab("Whisper speaker diarization"):
304
+ gr.Markdown('''
305
+ <div>
306
+ <h1 style='text-align: center'>Whisper speaker diarization</h1>
307
+ This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
308
+ and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
309
+ </div>
310
+ ''')
311
 
312
  with gr.Row():
313
  gr.Markdown('''
 
366
  # gr.Markdown(
367
  # '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
368
 
369
+ # with gr.Row():
370
+ # with gr.Column():
371
+ # gr.Textbox(lines=1, label="openai_key", type="password")
372
+ # gr.TextArea(label="prompt", value="""会議の文字起こしが渡されます。
373
+ #
374
+ # この会議のサマリーをMarkdown形式で作成してください。サマリーは、以下のような形式で書いてください。
375
+ # - 会議の目的
376
+ # - 会議の内容
377
+ # - 会議の結果""")
378
+ # gr.Textbox(label="transcription_summary")
379
 
380
 
381
  demo.launch(debug=True)