littlebird13 commited on
Commit
b1e90aa
·
verified ·
1 Parent(s): d462144

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -64
app.py CHANGED
@@ -11,7 +11,7 @@ dashscope.base_http_api_url = "https://dashscope.aliyuncs.com/api/v1"
11
 
12
  def asr_inference(audio_file, context, language, enable_itn):
13
  if not audio_file:
14
- return "请上传音频文件"
15
 
16
  messages = [
17
  {
@@ -69,49 +69,49 @@ def asr_inference(audio_file, context, language, enable_itn):
69
  else:
70
  result_lang = None
71
  else:
72
- result_text = "未找到文本内容"
73
  result_lang = None
74
  else:
75
- result_text = "响应结构不完整"
76
  result_lang = None
77
  else:
78
- result_text = "响应中没有找到识别结果"
79
  result_lang = None
80
  else:
81
- status_code = getattr(response, 'status_code', '未知')
82
- error_msg = getattr(response, 'message', '未知错误')
83
- result_text = f"请求失败 (状态码: {status_code}): {error_msg}"
84
  result_lang = None
85
 
86
  except Exception as e:
87
- result_text = f"处理出错: {str(e)}"
88
  result_lang = None
89
 
90
- # 映射 result_lang 为中文/英文名称
91
  lang_display = {
92
- "auto": "自动识别 / Auto Detect",
93
- "zh": "中文 / Chinese",
94
- "en": "英文 / English",
95
- "ja": "日文 / Japanese",
96
- "ko": "韩文 / Korean",
97
- "es": "西班牙文 / Spanish",
98
- "fr": "法文 / French",
99
- "de": "德文 / German",
100
- "ar": "阿拉伯文 / Arabic",
101
- "it": "意大利文 / Italian",
102
- "ru": "俄文 / Russian",
103
- "pt": "葡萄牙文 / Portuguese"
104
  }
105
  if result_lang in lang_display:
106
  result_lang = lang_display[result_lang]
107
  elif result_lang is not None:
108
- result_lang = f"未知语种 / Unknown ({result_lang})"
109
 
110
  return result_text, result_lang
111
 
112
 
113
- with gr.Blocks(theme=gr.themes.Soft(), title="语音识别工具") as demo:
114
- # ========== LOGO 区域(居中 + 放大) ==========
115
  gr.Markdown("""
116
  <div style="width: 100%; display: flex; justify-content: center; margin: 30px 0;">
117
  <img src="https://modelscope.oss-cn-beijing.aliyuncs.com/resource/00EE8C99-9C05-4236-A6D0-B58FF172D31B.png"
@@ -121,48 +121,47 @@ with gr.Blocks(theme=gr.themes.Soft(), title="语音识别工具") as demo:
121
  </div>
122
  """, sanitize_html=False)
123
 
124
- # ========== API 链接预留区域 ==========
125
  gr.Markdown("""
126
  <div style="text-align: center; margin: 10px 0; font-size: 14px; color: #555;">
127
  🌐 <a href="https://help.aliyun.com/zh/dashscope/developer-reference/"
128
  target="_blank"
129
  style="color: #0066cc; text-decoration: none;">
130
- 查看 DashScope API 文档
131
  </a>
132
  </div>
133
  """, sanitize_html=False)
134
 
135
- gr.Markdown("上传音频文件,获取语音转文字结果。\n支持指定任意格式的上下文信息以获取定制化的识别结果。支持语言识别和逆文本标准化。")
136
 
137
  with gr.Row():
138
  with gr.Column():
139
- audio_input = gr.Audio(label="🎤 上传音频", type="filepath")
140
- context_input = gr.Textbox(label="📝 上下文信息(可选)", value="", interactive=True)
141
  language = gr.Dropdown(
142
- label="🌍 语言设置",
143
  choices=[
144
- ("自动识别 / Auto Detect", "auto"),
145
- ("中文 / Chinese", "zh"),
146
- ("英文 / English", "en"),
147
- ("日文 / Japanese", "ja"),
148
- ("韩文 / Korean", "ko"),
149
- ("西班牙文 / Spanish", "es"),
150
- ("法文 / French", "fr"),
151
- ("德文 / German", "de"),
152
- ("阿拉伯文 / Arabic", "ar"),
153
- ("意大利文 / Italian", "it"),
154
- ("俄文 / Russian", "ru"),
155
- ("葡萄牙文 / Portuguese", "pt")
156
  ],
157
  value="auto"
158
  )
159
- # enable_lid = gr.Checkbox(label=" 启用语言识别(LID)", value=True)
160
- enable_itn = gr.Checkbox(label="🔄 启用逆文本标准化(ITN)", value=False)
161
- submit_btn = gr.Button("🚀 开始识别", variant="primary")
162
 
163
  with gr.Column():
164
- text_output = gr.Textbox(label="📝 识别结果", interactive=False, lines=6, max_lines=12)
165
- lang_output = gr.Textbox(label="📝 语种检测结果(仅在auto模式下返回)", interactive=False, lines=1, max_lines=12)
166
 
167
  submit_btn.click(
168
  fn=asr_inference,
@@ -171,42 +170,39 @@ with gr.Blocks(theme=gr.themes.Soft(), title="语音识别工具") as demo:
171
  )
172
 
173
  gr.Markdown("---")
174
- gr.Markdown("💡 **使用提示**:")
175
- gr.Markdown("- 支持 MP3WAV 等常见音频格式")
176
- gr.Markdown("- 启用 LID 可自动识别语音语言")
177
- gr.Markdown("- 以任意格式配置 context 信息可以获取定制化的文本结果,纠正命名实体名称等")
178
- gr.Markdown("- 启用 ITN 可将数字、日期等转换为标准文本格式")
179
-
180
- # 方法1:使用 HTML 手动创建示例按钮(推荐)
181
- gr.Markdown("### 💡 示例")
182
 
183
- # 定义示例数据
184
  examples_data = {
185
- "Example 1 - CSGO比赛": {
186
  "audio": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR/csgo.wav",
187
  "context": "A csgo match between NAVI and FazeClan in Major Paris 2023. S1mple and B1t are in NAVI. Ropz, Rain, Karrigan and Twistzz are in Faze.",
188
- "description": "游戏解说示例(包含专业术语)"
189
  },
190
- "Example 2 - 噪音环境": {
191
  "audio": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR/noise3.wav",
192
  "context": "",
193
- "description": "噪音环境下的语音识别"
194
  },
195
- "Example 3 - 复杂音频": {
196
  "audio": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR/noise1.wav",
197
  "context": "",
198
- "description": "复杂背景音频处理"
199
  }
200
  }
201
 
202
- # 创建示例按钮
203
  with gr.Row():
204
  for title, data in examples_data.items():
205
  with gr.Column():
206
  example_btn = gr.Button(f"📎 {title}", variant="secondary", size="sm")
207
  gr.Markdown(f"*{data['description']}*", elem_classes=["example-desc"])
208
 
209
- # 为每个按钮创建点击事件
210
  example_btn.click(
211
  fn=lambda audio=data['audio'], context=data['context']: (audio, context),
212
  outputs=[audio_input, context_input]
 
11
 
12
  def asr_inference(audio_file, context, language, enable_itn):
13
  if not audio_file:
14
+ return "Please upload an audio file"
15
 
16
  messages = [
17
  {
 
69
  else:
70
  result_lang = None
71
  else:
72
+ result_text = "No text content found"
73
  result_lang = None
74
  else:
75
+ result_text = "Incomplete response structure"
76
  result_lang = None
77
  else:
78
+ result_text = "No recognition result found in response"
79
  result_lang = None
80
  else:
81
+ status_code = getattr(response, 'status_code', 'Unknown')
82
+ error_msg = getattr(response, 'message', 'Unknown error')
83
+ result_text = f"Request failed (Status: {status_code}): {error_msg}"
84
  result_lang = None
85
 
86
  except Exception as e:
87
+ result_text = f"Processing error: {str(e)}"
88
  result_lang = None
89
 
90
+ # Map result_lang to display name
91
  lang_display = {
92
+ "auto": "Auto Detect",
93
+ "zh": "Chinese",
94
+ "en": "English",
95
+ "ja": "Japanese",
96
+ "ko": "Korean",
97
+ "es": "Spanish",
98
+ "fr": "French",
99
+ "de": "German",
100
+ "ar": "Arabic",
101
+ "it": "Italian",
102
+ "ru": "Russian",
103
+ "pt": "Portuguese"
104
  }
105
  if result_lang in lang_display:
106
  result_lang = lang_display[result_lang]
107
  elif result_lang is not None:
108
+ result_lang = f"Unknown Language ({result_lang})"
109
 
110
  return result_text, result_lang
111
 
112
 
113
+ with gr.Blocks(theme=gr.themes.Soft(), title="Speech Recognition Tool") as demo:
114
+ # ========== LOGO Area (Centered + Enlarged) ==========
115
  gr.Markdown("""
116
  <div style="width: 100%; display: flex; justify-content: center; margin: 30px 0;">
117
  <img src="https://modelscope.oss-cn-beijing.aliyuncs.com/resource/00EE8C99-9C05-4236-A6D0-B58FF172D31B.png"
 
121
  </div>
122
  """, sanitize_html=False)
123
 
124
+ # ========== API Documentation Link ==========
125
  gr.Markdown("""
126
  <div style="text-align: center; margin: 10px 0; font-size: 14px; color: #555;">
127
  🌐 <a href="https://help.aliyun.com/zh/dashscope/developer-reference/"
128
  target="_blank"
129
  style="color: #0066cc; text-decoration: none;">
130
+ View DashScope API Documentation
131
  </a>
132
  </div>
133
  """, sanitize_html=False)
134
 
135
+ gr.Markdown("Upload an audio file to get speech-to-text results.\nSupports custom context for tailored recognition. Supports language detection and inverse text normalization.")
136
 
137
  with gr.Row():
138
  with gr.Column():
139
+ audio_input = gr.Audio(label="🎤 Upload Audio", type="filepath")
140
+ context_input = gr.Textbox(label="📝 Context (Optional)", value="", interactive=True)
141
  language = gr.Dropdown(
142
+ label="🌍 Language Setting",
143
  choices=[
144
+ ("Auto Detect", "auto"),
145
+ ("Chinese", "zh"),
146
+ ("English", "en"),
147
+ ("Japanese", "ja"),
148
+ ("Korean", "ko"),
149
+ ("Spanish", "es"),
150
+ ("French", "fr"),
151
+ ("German", "de"),
152
+ ("Arabic", "ar"),
153
+ ("Italian", "it"),
154
+ ("Russian", "ru"),
155
+ ("Portuguese", "pt")
156
  ],
157
  value="auto"
158
  )
159
+ enable_itn = gr.Checkbox(label="🔄 Enable Inverse Text Normalization (ITN)", value=False)
160
+ submit_btn = gr.Button("🚀 Start Recognition", variant="primary")
 
161
 
162
  with gr.Column():
163
+ text_output = gr.Textbox(label="📝 Recognition Result", interactive=False, lines=6, max_lines=12)
164
+ lang_output = gr.Textbox(label="📝 Detected Language (only in auto mode)", interactive=False, lines=1, max_lines=12)
165
 
166
  submit_btn.click(
167
  fn=asr_inference,
 
170
  )
171
 
172
  gr.Markdown("---")
173
+ gr.Markdown("💡 **Tips**:")
174
+ gr.Markdown("- Supports common audio formats: MP3, WAV, etc.")
175
+ gr.Markdown("- Enable LID to auto-detect spoken language")
176
+ gr.Markdown("- Use context to customize output, e.g., correct named entities")
177
+ gr.Markdown("- Enable ITN to convert numbers/dates to standardized text")
178
+
179
+ # Example Section
180
+ gr.Markdown("### 💡 Examples")
181
 
 
182
  examples_data = {
183
+ "Example 1 - CSGO Match": {
184
  "audio": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR/csgo.wav",
185
  "context": "A csgo match between NAVI and FazeClan in Major Paris 2023. S1mple and B1t are in NAVI. Ropz, Rain, Karrigan and Twistzz are in Faze.",
186
+ "description": "Game commentary (with professional terms)"
187
  },
188
+ "Example 2 - Noisy Environment": {
189
  "audio": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR/noise3.wav",
190
  "context": "",
191
+ "description": "Speech recognition in noisy environment"
192
  },
193
+ "Example 3 - Complex Audio": {
194
  "audio": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR/noise1.wav",
195
  "context": "",
196
+ "description": "Processing audio with complex background"
197
  }
198
  }
199
 
 
200
  with gr.Row():
201
  for title, data in examples_data.items():
202
  with gr.Column():
203
  example_btn = gr.Button(f"📎 {title}", variant="secondary", size="sm")
204
  gr.Markdown(f"*{data['description']}*", elem_classes=["example-desc"])
205
 
 
206
  example_btn.click(
207
  fn=lambda audio=data['audio'], context=data['context']: (audio, context),
208
  outputs=[audio_input, context_input]