Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
|
@@ -27,18 +27,22 @@ translator_ppl = pipeline(
|
|
| 27 |
# model producing an image from text
|
| 28 |
image_ppl = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=YOUR_TOKEN)
|
| 29 |
|
| 30 |
-
def transcribe(microphone, file_upload):
|
|
|
|
| 31 |
warn_output = ""
|
| 32 |
-
if (microphone is not None) and (file_upload is not None):
|
|
|
|
| 33 |
warn_output = (
|
| 34 |
"WARNING: You've uploaded an audio file and used the microphone. "
|
| 35 |
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
|
| 36 |
)
|
| 37 |
|
| 38 |
-
elif (microphone is None) and (file_upload is None):
|
|
|
|
| 39 |
return "ERROR: You have to either use the microphone or upload an audio file"
|
| 40 |
|
| 41 |
-
file = microphone if microphone is not None else file_upload
|
|
|
|
| 42 |
|
| 43 |
text = speech_ppl(file)["text"]
|
| 44 |
print("Text: ", text)
|
|
@@ -48,7 +52,7 @@ def transcribe(microphone, file_upload):
|
|
| 48 |
print("Translate 2: ", translate)
|
| 49 |
print("Building image .....")
|
| 50 |
#image = image_ppl(translate).images[0]
|
| 51 |
-
image = image_ppl(translate)["sample"]
|
| 52 |
print("Image: ", image)
|
| 53 |
image.save("text-to-image.png")
|
| 54 |
|
|
@@ -86,14 +90,14 @@ mf_transcribe = gr.Interface(
|
|
| 86 |
fn=transcribe,
|
| 87 |
inputs=[
|
| 88 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
| 89 |
-
gr.inputs.Audio(source="upload", type="filepath", optional=True),
|
| 90 |
],
|
| 91 |
outputs=[gr.Textbox(label="Transcribed text"),
|
| 92 |
gr.Textbox(label="Summarized text"),
|
| 93 |
gr.Image(type="pil", label="Output image")],
|
| 94 |
layout="horizontal",
|
| 95 |
theme="huggingface",
|
| 96 |
-
title="Whisper Demo: Transcribe Audio",
|
| 97 |
description=(
|
| 98 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
|
| 99 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
|
@@ -101,7 +105,7 @@ mf_transcribe = gr.Interface(
|
|
| 101 |
),
|
| 102 |
allow_flagging="never",
|
| 103 |
)
|
| 104 |
-
|
| 105 |
yt_transcribe = gr.Interface(
|
| 106 |
fn=yt_transcribe,
|
| 107 |
inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
|
|
@@ -116,8 +120,10 @@ yt_transcribe = gr.Interface(
|
|
| 116 |
),
|
| 117 |
allow_flagging="never",
|
| 118 |
)
|
|
|
|
| 119 |
|
| 120 |
with demo:
|
| 121 |
-
gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
|
|
|
|
| 122 |
|
| 123 |
demo.launch(enable_queue=True)
|
|
|
|
| 27 |
# model producing an image from text
|
| 28 |
image_ppl = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=YOUR_TOKEN)
|
| 29 |
|
| 30 |
+
#def transcribe(microphone, file_upload):
|
| 31 |
+
def transcribe(microphone):
|
| 32 |
warn_output = ""
|
| 33 |
+
# if (microphone is not None) and (file_upload is not None):
|
| 34 |
+
if (microphone is not None):
|
| 35 |
warn_output = (
|
| 36 |
"WARNING: You've uploaded an audio file and used the microphone. "
|
| 37 |
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
|
| 38 |
)
|
| 39 |
|
| 40 |
+
# elif (microphone is None) and (file_upload is None):
|
| 41 |
+
elif (microphone is None):
|
| 42 |
return "ERROR: You have to either use the microphone or upload an audio file"
|
| 43 |
|
| 44 |
+
# file = microphone if microphone is not None else file_upload
|
| 45 |
+
file = microphone
|
| 46 |
|
| 47 |
text = speech_ppl(file)["text"]
|
| 48 |
print("Text: ", text)
|
|
|
|
| 52 |
print("Translate 2: ", translate)
|
| 53 |
print("Building image .....")
|
| 54 |
#image = image_ppl(translate).images[0]
|
| 55 |
+
image = image_ppl(translate, num_inference_steps=15)["sample"]
|
| 56 |
print("Image: ", image)
|
| 57 |
image.save("text-to-image.png")
|
| 58 |
|
|
|
|
| 90 |
fn=transcribe,
|
| 91 |
inputs=[
|
| 92 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
| 93 |
+
#gr.inputs.Audio(source="upload", type="filepath", optional=True),
|
| 94 |
],
|
| 95 |
outputs=[gr.Textbox(label="Transcribed text"),
|
| 96 |
gr.Textbox(label="Summarized text"),
|
| 97 |
gr.Image(type="pil", label="Output image")],
|
| 98 |
layout="horizontal",
|
| 99 |
theme="huggingface",
|
| 100 |
+
title="Whisper Demo: Transcribe Audio to Image",
|
| 101 |
description=(
|
| 102 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
|
| 103 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
|
|
|
| 105 |
),
|
| 106 |
allow_flagging="never",
|
| 107 |
)
|
| 108 |
+
'''
|
| 109 |
yt_transcribe = gr.Interface(
|
| 110 |
fn=yt_transcribe,
|
| 111 |
inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
|
|
|
|
| 120 |
),
|
| 121 |
allow_flagging="never",
|
| 122 |
)
|
| 123 |
+
'''
|
| 124 |
|
| 125 |
with demo:
|
| 126 |
+
#gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
|
| 127 |
+
gr.TabbedInterface(mf_transcribe, "Transcribe Audio to Image")
|
| 128 |
|
| 129 |
demo.launch(enable_queue=True)
|