Commit
763786a
·
verified ·
1 Parent(s): a091506

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -11
app.py CHANGED
@@ -36,13 +36,22 @@ def get_model(model_name):
36
  @spaces.GPU(duration=120)
37
  def transcribe_and_score(audio):
38
  if audio is None:
39
- return ""
40
  model = get_model(MODEL_NAMES[0])
41
  predictions = model.transcribe([audio])
42
- pred = predictions[0] if isinstance(predictions, list) else predictions
43
- if not isinstance(pred, str):
44
- pred = str(pred)
45
- return pred.strip()
 
 
 
 
 
 
 
 
 
46
 
47
  @spaces.GPU(duration=120)
48
  def batch_transcribe(audio_files):
@@ -50,23 +59,37 @@ def batch_transcribe(audio_files):
50
  return []
51
  model = get_model(MODEL_NAMES[0])
52
  predictions = model.transcribe(audio_files)
 
 
53
  if isinstance(predictions, list):
54
- texts = [p if isinstance(p, str) else str(p) for p in predictions]
 
 
 
 
 
 
 
 
 
55
  else:
56
- texts = [str(predictions)]
 
57
  return [[t.strip()] for t in texts]
58
 
59
  with gr.Blocks(title="EgypTalk-ASR-v2") as demo:
60
  gr.Markdown("""
61
  # EgypTalk-ASR-v2
62
- Upload an audio file. This app transcribes audio using EgypTalk-ASR-v2.
63
  """)
64
  with gr.Tab("Single Test"):
65
  with gr.Row():
66
- audio_input = gr.Audio(type="filepath", label="Audio File")
67
  transcribe_btn = gr.Button("Transcribe")
68
- pred_output = gr.Textbox(label="Transcription")
69
- transcribe_btn.click(transcribe_and_score, inputs=[audio_input], outputs=[pred_output])
 
 
70
 
71
  with gr.Tab("Batch Test"):
72
  gr.Markdown("Upload multiple audio files. Batch size is limited by GPU/CPU memory.")
 
36
  @spaces.GPU(duration=120)
37
  def transcribe_and_score(audio):
38
  if audio is None:
39
+ return "", None
40
  model = get_model(MODEL_NAMES[0])
41
  predictions = model.transcribe([audio])
42
+ item = predictions[0] if isinstance(predictions, list) else predictions
43
+
44
+ # Extract plain text regardless of return type
45
+ if hasattr(item, "text"):
46
+ text = item.text
47
+ elif isinstance(item, dict) and "text" in item:
48
+ text = item["text"]
49
+ elif isinstance(item, str):
50
+ text = item
51
+ else:
52
+ text = str(item)
53
+
54
+ return text.strip(), audio
55
 
56
  @spaces.GPU(duration=120)
57
  def batch_transcribe(audio_files):
 
59
  return []
60
  model = get_model(MODEL_NAMES[0])
61
  predictions = model.transcribe(audio_files)
62
+
63
+ texts = []
64
  if isinstance(predictions, list):
65
+ for p in predictions:
66
+ if hasattr(p, "text"):
67
+ t = p.text
68
+ elif isinstance(p, dict) and "text" in p:
69
+ t = p["text"]
70
+ elif isinstance(p, str):
71
+ t = p
72
+ else:
73
+ t = str(p)
74
+ texts.append(t)
75
  else:
76
+ texts.append(str(predictions))
77
+
78
  return [[t.strip()] for t in texts]
79
 
80
  with gr.Blocks(title="EgypTalk-ASR-v2") as demo:
81
  gr.Markdown("""
82
  # EgypTalk-ASR-v2
83
+ Upload or record an audio file. This app transcribes audio using EgypTalk-ASR-v2.
84
  """)
85
  with gr.Tab("Single Test"):
86
  with gr.Row():
87
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
88
  transcribe_btn = gr.Button("Transcribe")
89
+ with gr.Row():
90
+ pred_output = gr.Textbox(label="Transcription")
91
+ audio_playback = gr.Audio(type="filepath", label="Playback")
92
+ transcribe_btn.click(transcribe_and_score, inputs=[audio_input], outputs=[pred_output, audio_playback])
93
 
94
  with gr.Tab("Batch Test"):
95
  gr.Markdown("Upload multiple audio files. Batch size is limited by GPU/CPU memory.")