Spaces:

yamashiro3
/

Whisper-gpt-voicescribe

Build error

App Files Files Community

yama commited on Jun 30, 2023

Commit

14e358d

1 Parent(s): d40e32c

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -49

app.py CHANGED Viewed

@@ -27,6 +27,12 @@ import contextlib
 from transformers import pipeline
 import psutil
 whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
 source_languages = {
     "en": "English",
@@ -254,39 +260,22 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
         raise RuntimeError("Error Running inference with local model", e)
-def create_meeting_summary(openai_key, prompt, uploaded_audio, max_transcribe_seconds):
-    openai.api_key = openai_key
-    # 音声ファイルを開く
-    audio = AudioSegment.from_file(uploaded_audio)
-    # 文字起こしする音声データの上限を設定する
-    if len(audio) > int(max_transcribe_seconds) * 1000:
-        audio = audio[:int(max_transcribe_seconds) * 1000]
-    # ファイルサイズを削減するために音声ファイルを圧縮する
-    compressed_audio = audio.set_frame_rate(16000).set_channels(1)
-    # 圧縮した音声ファイルをmp3形式で一時ファイルに保存する
-    with tempfile.NamedTemporaryFile(delete=True, suffix=".mp3") as tmp:
-        compressed_audio.export(tmp.name, format="mp3")
-        transcript = openai.Audio.transcribe("whisper-1", open(tmp.name, "rb"), response_format="verbose_json")
-        transcript_text = ""
-        for segment in transcript.segments:
-            transcript_text += f"{segment['text']}\n"
-    system_template = prompt
-    completion = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": system_template},
-            {"role": "user", "content": transcript_text}
-        ]
-    )
-    summary = completion.choices[0].message.content
-    return summary, transcript_text
 # ---- Gradio Layout -----
 # Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
@@ -312,13 +301,13 @@ demo.encrypt = False
 with demo:
     with gr.Tab("Whisper speaker diarization"):
-        # gr.Markdown('''
-        #     <div>
-        #     <h1 style='text-align: center'>Whisper speaker diarization</h1>
-        #     This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
-        #     and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
-        #     </div>
-        # ''')
         with gr.Row():
             gr.Markdown('''
@@ -377,16 +366,16 @@ with demo:
                 # gr.Markdown(
                 #     '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
-        with gr.Row():
-            with gr.Column():
-                gr.Textbox(lines=1, label="openai_key", type="password")
-                gr.TextArea(label="prompt", value="""会議の文字起こしが渡されます。
-                この会議のサマリーをMarkdown形式で作成してください。サマリーは、以下のような形式で書いてください。
-                - 会議の目的
-                - 会議の内容
-                - 会議の結果""")
-                gr.Textbox(label="transcription_summary")
 demo.launch(debug=True)

 from transformers import pipeline
 import psutil
+import openai
+import os
+import tempfile
+from pydub import AudioSegment
 whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
 source_languages = {
     "en": "English",
         raise RuntimeError("Error Running inference with local model", e)
+# def create_meeting_summary(openai_key, prompt):
+#     openai.api_key = openai_key
+#
+#     # 文字起こししたテキストを取得
+#     system_template = prompt
+#
+#     completion = openai.ChatCompletion.create(
+#         model="gpt-3.5-turbo",
+#         messages=[
+#             {"role": "system", "content": system_template},
+#             {"role": "user", "content": transcript_text}
+#         ]
+#     )
+#     summary = completion.choices[0].message.content
+#     return summary
 # ---- Gradio Layout -----
 # Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
 with demo:
     with gr.Tab("Whisper speaker diarization"):
+        gr.Markdown('''
+            <div>
+            <h1 style='text-align: center'>Whisper speaker diarization</h1>
+            This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
+            and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
+            </div>
+        ''')
         with gr.Row():
             gr.Markdown('''
                 # gr.Markdown(
                 #     '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
+        # with gr.Row():
+        #     with gr.Column():
+        #         gr.Textbox(lines=1, label="openai_key", type="password")
+        #         gr.TextArea(label="prompt", value="""会議の文字起こしが渡されます。
+        #
+        #         この会議のサマリーをMarkdown形式で作成してください。サマリーは、以下のような形式で書いてください。
+        #         - 会議の目的
+        #         - 会議の内容
+        #         - 会議の結果""")
+        #         gr.Textbox(label="transcription_summary")
 demo.launch(debug=True)