Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,13 +36,22 @@ def get_model(model_name):
|
|
| 36 |
@spaces.GPU(duration=120)
|
| 37 |
def transcribe_and_score(audio):
|
| 38 |
if audio is None:
|
| 39 |
-
return ""
|
| 40 |
model = get_model(MODEL_NAMES[0])
|
| 41 |
predictions = model.transcribe([audio])
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
@spaces.GPU(duration=120)
|
| 48 |
def batch_transcribe(audio_files):
|
|
@@ -50,23 +59,37 @@ def batch_transcribe(audio_files):
|
|
| 50 |
return []
|
| 51 |
model = get_model(MODEL_NAMES[0])
|
| 52 |
predictions = model.transcribe(audio_files)
|
|
|
|
|
|
|
| 53 |
if isinstance(predictions, list):
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
else:
|
| 56 |
-
texts
|
|
|
|
| 57 |
return [[t.strip()] for t in texts]
|
| 58 |
|
| 59 |
with gr.Blocks(title="EgypTalk-ASR-v2") as demo:
|
| 60 |
gr.Markdown("""
|
| 61 |
# EgypTalk-ASR-v2
|
| 62 |
-
Upload an audio file. This app transcribes audio using EgypTalk-ASR-v2.
|
| 63 |
""")
|
| 64 |
with gr.Tab("Single Test"):
|
| 65 |
with gr.Row():
|
| 66 |
-
audio_input = gr.Audio(type="filepath", label="Audio
|
| 67 |
transcribe_btn = gr.Button("Transcribe")
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
| 70 |
|
| 71 |
with gr.Tab("Batch Test"):
|
| 72 |
gr.Markdown("Upload multiple audio files. Batch size is limited by GPU/CPU memory.")
|
|
|
|
| 36 |
@spaces.GPU(duration=120)
|
| 37 |
def transcribe_and_score(audio):
|
| 38 |
if audio is None:
|
| 39 |
+
return "", None
|
| 40 |
model = get_model(MODEL_NAMES[0])
|
| 41 |
predictions = model.transcribe([audio])
|
| 42 |
+
item = predictions[0] if isinstance(predictions, list) else predictions
|
| 43 |
+
|
| 44 |
+
# Extract plain text regardless of return type
|
| 45 |
+
if hasattr(item, "text"):
|
| 46 |
+
text = item.text
|
| 47 |
+
elif isinstance(item, dict) and "text" in item:
|
| 48 |
+
text = item["text"]
|
| 49 |
+
elif isinstance(item, str):
|
| 50 |
+
text = item
|
| 51 |
+
else:
|
| 52 |
+
text = str(item)
|
| 53 |
+
|
| 54 |
+
return text.strip(), audio
|
| 55 |
|
| 56 |
@spaces.GPU(duration=120)
|
| 57 |
def batch_transcribe(audio_files):
|
|
|
|
| 59 |
return []
|
| 60 |
model = get_model(MODEL_NAMES[0])
|
| 61 |
predictions = model.transcribe(audio_files)
|
| 62 |
+
|
| 63 |
+
texts = []
|
| 64 |
if isinstance(predictions, list):
|
| 65 |
+
for p in predictions:
|
| 66 |
+
if hasattr(p, "text"):
|
| 67 |
+
t = p.text
|
| 68 |
+
elif isinstance(p, dict) and "text" in p:
|
| 69 |
+
t = p["text"]
|
| 70 |
+
elif isinstance(p, str):
|
| 71 |
+
t = p
|
| 72 |
+
else:
|
| 73 |
+
t = str(p)
|
| 74 |
+
texts.append(t)
|
| 75 |
else:
|
| 76 |
+
texts.append(str(predictions))
|
| 77 |
+
|
| 78 |
return [[t.strip()] for t in texts]
|
| 79 |
|
| 80 |
with gr.Blocks(title="EgypTalk-ASR-v2") as demo:
|
| 81 |
gr.Markdown("""
|
| 82 |
# EgypTalk-ASR-v2
|
| 83 |
+
Upload or record an audio file. This app transcribes audio using EgypTalk-ASR-v2.
|
| 84 |
""")
|
| 85 |
with gr.Tab("Single Test"):
|
| 86 |
with gr.Row():
|
| 87 |
+
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
|
| 88 |
transcribe_btn = gr.Button("Transcribe")
|
| 89 |
+
with gr.Row():
|
| 90 |
+
pred_output = gr.Textbox(label="Transcription")
|
| 91 |
+
audio_playback = gr.Audio(type="filepath", label="Playback")
|
| 92 |
+
transcribe_btn.click(transcribe_and_score, inputs=[audio_input], outputs=[pred_output, audio_playback])
|
| 93 |
|
| 94 |
with gr.Tab("Batch Test"):
|
| 95 |
gr.Markdown("Upload multiple audio files. Batch size is limited by GPU/CPU memory.")
|