Spaces:
Build error
Build error
yama
commited on
Commit
·
14e358d
1
Parent(s):
d40e32c
Update app.py
Browse files
app.py
CHANGED
@@ -27,6 +27,12 @@ import contextlib
|
|
27 |
from transformers import pipeline
|
28 |
import psutil
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
|
31 |
source_languages = {
|
32 |
"en": "English",
|
@@ -254,39 +260,22 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
254 |
raise RuntimeError("Error Running inference with local model", e)
|
255 |
|
256 |
|
257 |
-
def create_meeting_summary(openai_key, prompt
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
269 |
|
270 |
-
# 圧縮した音声ファイルをmp3形式で一時ファイルに保存する
|
271 |
-
with tempfile.NamedTemporaryFile(delete=True, suffix=".mp3") as tmp:
|
272 |
-
compressed_audio.export(tmp.name, format="mp3")
|
273 |
-
|
274 |
-
transcript = openai.Audio.transcribe("whisper-1", open(tmp.name, "rb"), response_format="verbose_json")
|
275 |
-
transcript_text = ""
|
276 |
-
for segment in transcript.segments:
|
277 |
-
transcript_text += f"{segment['text']}\n"
|
278 |
-
|
279 |
-
system_template = prompt
|
280 |
-
|
281 |
-
completion = openai.ChatCompletion.create(
|
282 |
-
model="gpt-3.5-turbo",
|
283 |
-
messages=[
|
284 |
-
{"role": "system", "content": system_template},
|
285 |
-
{"role": "user", "content": transcript_text}
|
286 |
-
]
|
287 |
-
)
|
288 |
-
summary = completion.choices[0].message.content
|
289 |
-
return summary, transcript_text
|
290 |
|
291 |
# ---- Gradio Layout -----
|
292 |
# Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
|
@@ -312,13 +301,13 @@ demo.encrypt = False
|
|
312 |
|
313 |
with demo:
|
314 |
with gr.Tab("Whisper speaker diarization"):
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
|
323 |
with gr.Row():
|
324 |
gr.Markdown('''
|
@@ -377,16 +366,16 @@ with demo:
|
|
377 |
# gr.Markdown(
|
378 |
# '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
|
379 |
|
380 |
-
with gr.Row():
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
|
391 |
|
392 |
demo.launch(debug=True)
|
|
|
27 |
from transformers import pipeline
|
28 |
import psutil
|
29 |
|
30 |
+
import openai
|
31 |
+
import os
|
32 |
+
import tempfile
|
33 |
+
from pydub import AudioSegment
|
34 |
+
|
35 |
+
|
36 |
whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
|
37 |
source_languages = {
|
38 |
"en": "English",
|
|
|
260 |
raise RuntimeError("Error Running inference with local model", e)
|
261 |
|
262 |
|
263 |
+
# def create_meeting_summary(openai_key, prompt):
|
264 |
+
# openai.api_key = openai_key
|
265 |
+
#
|
266 |
+
# # 文字起こししたテキストを取得
|
267 |
+
# system_template = prompt
|
268 |
+
#
|
269 |
+
# completion = openai.ChatCompletion.create(
|
270 |
+
# model="gpt-3.5-turbo",
|
271 |
+
# messages=[
|
272 |
+
# {"role": "system", "content": system_template},
|
273 |
+
# {"role": "user", "content": transcript_text}
|
274 |
+
# ]
|
275 |
+
# )
|
276 |
+
# summary = completion.choices[0].message.content
|
277 |
+
# return summary
|
278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
# ---- Gradio Layout -----
|
281 |
# Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
|
|
|
301 |
|
302 |
with demo:
|
303 |
with gr.Tab("Whisper speaker diarization"):
|
304 |
+
gr.Markdown('''
|
305 |
+
<div>
|
306 |
+
<h1 style='text-align: center'>Whisper speaker diarization</h1>
|
307 |
+
This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
|
308 |
+
and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
|
309 |
+
</div>
|
310 |
+
''')
|
311 |
|
312 |
with gr.Row():
|
313 |
gr.Markdown('''
|
|
|
366 |
# gr.Markdown(
|
367 |
# '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
|
368 |
|
369 |
+
# with gr.Row():
|
370 |
+
# with gr.Column():
|
371 |
+
# gr.Textbox(lines=1, label="openai_key", type="password")
|
372 |
+
# gr.TextArea(label="prompt", value="""会議の文字起こしが渡されます。
|
373 |
+
#
|
374 |
+
# この会議のサマリーをMarkdown形式で作成してください。サマリーは、以下のような形式で書いてください。
|
375 |
+
# - 会議の目的
|
376 |
+
# - 会議の内容
|
377 |
+
# - 会議の結果""")
|
378 |
+
# gr.Textbox(label="transcription_summary")
|
379 |
|
380 |
|
381 |
demo.launch(debug=True)
|