File size: 31,637 Bytes
95bd630 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 |
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import os
import datetime
import re
import time
import traceback
import math
from urllib.parse import urlparse
from urllib3 import encode_multipart_formdata
from wsgiref.handlers import format_date_time
from time import mktime
import hashlib
import base64
import hmac
from urllib.parse import urlencode
import json
import requests
import azure.cognitiveservices.speech as speechsdk
# 常量定义
LFASR_HOST = "http://upload-ost-api.xfyun.cn/file" # 文件上传Host
API_INIT = "/mpupload/init" # 初始化接口
API_UPLOAD = "/upload" # 上传接口
API_CUT = "/mpupload/upload" # 分片上传接口
API_CUT_COMPLETE = "/mpupload/complete" # 分片完成接口
API_CUT_CANCEL = "/mpupload/cancel" # 分片取消接口
FILE_PIECE_SIZE = 5242880 # 文件分片大小5M
PRO_CREATE_URI = "/v2/ost/pro_create"
QUERY_URI = "/v2/ost/query"
# 文件上传类
class FileUploader:
def __init__(self, app_id, api_key, api_secret, upload_file_path):
self.app_id = app_id
self.api_key = api_key
self.api_secret = api_secret
self.upload_file_path = upload_file_path
def get_request_id(self):
"""生成请求ID"""
return time.strftime("%Y%m%d%H%M")
def hashlib_256(self, data):
"""计算 SHA256 哈希"""
m = hashlib.sha256(bytes(data.encode(encoding="utf-8"))).digest()
digest = "SHA-256=" + base64.b64encode(m).decode(encoding="utf-8")
return digest
def assemble_auth_header(self, request_url, file_data_type, method="", body=""):
"""组装鉴权头部"""
u = urlparse(request_url)
host = u.hostname
path = u.path
now = datetime.datetime.now()
date = format_date_time(mktime(now.timetuple()))
digest = "SHA256=" + self.hashlib_256("")
signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1\ndigest: {}".format(
host, date, method, path, digest
)
signature_sha = hmac.new(
self.api_secret.encode("utf-8"),
signature_origin.encode("utf-8"),
digestmod=hashlib.sha256,
).digest()
signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
authorization = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % (
self.api_key,
"hmac-sha256",
"host date request-line digest",
signature_sha,
)
headers = {
"host": host,
"date": date,
"authorization": authorization,
"digest": digest,
"content-type": file_data_type,
}
return headers
def call_api(self, url, file_data, file_data_type):
"""调用POST API接口"""
headers = self.assemble_auth_header(
url, file_data_type, method="POST", body=file_data
)
try:
resp = requests.post(url, headers=headers, data=file_data, timeout=8)
print("上传状态:", resp.status_code, resp.text)
return resp.json()
except Exception as e:
print("上传失败!Exception :%s" % e)
return None
def upload_cut_complete(self, upload_id):
"""分块上传完成"""
body_dict = {
"app_id": self.app_id,
"request_id": self.get_request_id(),
"upload_id": upload_id,
}
file_data_type = "application/json"
url = LFASR_HOST + API_CUT_COMPLETE
response = self.call_api(url, json.dumps(body_dict), file_data_type)
if response and "data" in response and "url" in response["data"]:
file_url = response["data"]["url"]
print("任务上传结束")
return file_url
else:
print("分片上传完成失败", response)
return None
def upload_file(self):
"""上传文件,根据文件大小选择分片或普通上传"""
file_total_size = os.path.getsize(self.upload_file_path)
if file_total_size < 31457280: # 30MB
print("-----不使用分块上传-----")
return self.simple_upload()
else:
print("-----使用分块上传-----")
return self.multipart_upload()
def simple_upload(self):
"""简单上传文件"""
try:
with open(self.upload_file_path, mode="rb") as f:
file = {
"data": (self.upload_file_path, f.read()),
"app_id": self.app_id,
"request_id": self.get_request_id(),
}
encode_data = encode_multipart_formdata(file)
file_data = encode_data[0]
file_data_type = encode_data[1]
url = LFASR_HOST + API_UPLOAD
response = self.call_api(url, file_data, file_data_type)
if response and "data" in response and "url" in response["data"]:
return response["data"]["url"]
else:
print("简单上传失败", response)
return None
except FileNotFoundError:
print("文件未找到:", self.upload_file_path)
return None
def multipart_upload(self):
"""分片上传文件"""
upload_id = self.prepare_upload()
if not upload_id:
return None
if not self.do_upload(upload_id):
return None
file_url = self.upload_cut_complete(upload_id)
print("分片上传地址:", file_url)
return file_url
def prepare_upload(self):
"""预处理,获取upload_id"""
body_dict = {
"app_id": self.app_id,
"request_id": self.get_request_id(),
}
url = LFASR_HOST + API_INIT
file_data_type = "application/json"
response = self.call_api(url, json.dumps(body_dict), file_data_type)
if response and "data" in response and "upload_id" in response["data"]:
return response["data"]["upload_id"]
else:
print("预处理失败", response)
return None
def do_upload(self, upload_id):
"""执行分片上传"""
file_total_size = os.path.getsize(self.upload_file_path)
chunk_size = FILE_PIECE_SIZE
chunks = math.ceil(file_total_size / chunk_size)
request_id = self.get_request_id()
slice_id = 1
print(
"文件:",
self.upload_file_path,
" 文件大小:",
file_total_size,
" 分块大小:",
chunk_size,
" 分块数:",
chunks,
)
with open(self.upload_file_path, mode="rb") as content:
while slice_id <= chunks:
current_size = min(
chunk_size, file_total_size - (slice_id - 1) * chunk_size
)
file = {
"data": (self.upload_file_path, content.read(current_size)),
"app_id": self.app_id,
"request_id": request_id,
"upload_id": upload_id,
"slice_id": slice_id,
}
encode_data = encode_multipart_formdata(file)
file_data = encode_data[0]
file_data_type = encode_data[1]
url = LFASR_HOST + API_CUT
resp = self.call_api(url, file_data, file_data_type)
count = 0
while not resp and (count < 3):
print("上传重试")
resp = self.call_api(url, file_data, file_data_type)
count = count + 1
time.sleep(1)
if not resp:
print("分片上传失败")
return False
slice_id += 1
return True
class ResultExtractor:
def __init__(self, appid, apikey, apisecret):
# POST 请求相关参数
self.Host = "ost-api.xfyun.cn"
self.RequestUriCreate = PRO_CREATE_URI
self.RequestUriQuery = QUERY_URI
# 设置 URL
if re.match(r"^\d", self.Host):
self.urlCreate = "http://" + self.Host + self.RequestUriCreate
self.urlQuery = "http://" + self.Host + self.RequestUriQuery
else:
self.urlCreate = "https://" + self.Host + self.RequestUriCreate
self.urlQuery = "https://" + self.Host + self.RequestUriQuery
self.HttpMethod = "POST"
self.APPID = appid
self.Algorithm = "hmac-sha256"
self.HttpProto = "HTTP/1.1"
self.UserName = apikey
self.Secret = apisecret
# 设置当前时间
cur_time_utc = datetime.datetime.now(datetime.timezone.utc)
self.Date = self.httpdate(cur_time_utc)
# 设置测试音频文件参数
self.BusinessArgsCreate = {
"language": "zh_cn",
"accent": "mandarin",
"domain": "pro_ost_ed",
}
def img_read(self, path):
with open(path, "rb") as fo:
return fo.read()
def hashlib_256(self, res):
m = hashlib.sha256(bytes(res.encode(encoding="utf-8"))).digest()
result = "SHA-256=" + base64.b64encode(m).decode(encoding="utf-8")
return result
def httpdate(self, dt):
weekday = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()]
month = [
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
][dt.month - 1]
return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
weekday,
dt.day,
month,
dt.year,
dt.hour,
dt.minute,
dt.second,
)
def generateSignature(self, digest, uri):
signature_str = "host: " + self.Host + "\n"
signature_str += "date: " + self.Date + "\n"
signature_str += self.HttpMethod + " " + uri + " " + self.HttpProto + "\n"
signature_str += "digest: " + digest
signature = hmac.new(
bytes(self.Secret.encode("utf-8")),
bytes(signature_str.encode("utf-8")),
digestmod=hashlib.sha256,
).digest()
result = base64.b64encode(signature)
return result.decode(encoding="utf-8")
def init_header(self, data, uri):
digest = self.hashlib_256(data)
sign = self.generateSignature(digest, uri)
auth_header = (
'api_key="%s",algorithm="%s", '
'headers="host date request-line digest", '
'signature="%s"' % (self.UserName, self.Algorithm, sign)
)
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Method": "POST",
"Host": self.Host,
"Date": self.Date,
"Digest": digest,
"Authorization": auth_header,
}
return headers
def get_create_body(self, fileurl):
post_data = {
"common": {"app_id": self.APPID},
"business": self.BusinessArgsCreate,
"data": {"audio_src": "http", "audio_url": fileurl, "encoding": "raw"},
}
body = json.dumps(post_data)
return body
def get_query_body(self, task_id):
post_data = {
"common": {"app_id": self.APPID},
"business": {
"task_id": task_id,
},
}
body = json.dumps(post_data)
return body
def call(self, url, body, headers):
try:
response = requests.post(url, data=body, headers=headers, timeout=8)
status_code = response.status_code
if status_code != 200:
info = response.content
return info
else:
try:
return json.loads(response.text)
except json.JSONDecodeError:
return response.text
except Exception as e:
print("Exception :%s" % e)
return None
def task_create(self, fileurl):
body = self.get_create_body(fileurl)
headers_create = self.init_header(body, self.RequestUriCreate)
return self.call(self.urlCreate, body, headers_create)
def task_query(self, task_id):
query_body = self.get_query_body(task_id)
headers_query = self.init_header(query_body, self.RequestUriQuery)
return self.call(self.urlQuery, query_body, headers_query)
def extract_text(self, result):
"""
从API响应中提取文本内容
支持多种结果格式,增强错误处理
"""
# 调试输出:打印原始结果类型
print(f"\n[DEBUG] extract_text 输入类型: {type(result)}")
# 如果是字符串,尝试解析为JSON
if isinstance(result, str):
print(f"[DEBUG] 字符串内容 (前200字符): {result[:200]}")
try:
result = json.loads(result)
print("[DEBUG] 成功解析字符串为JSON对象")
except json.JSONDecodeError:
print("[DEBUG] 无法解析为JSON,返回原始字符串")
return result
# 处理字典类型的结果
if isinstance(result, dict):
print("[DEBUG] 处理字典类型结果")
# 1. 检查错误信息
if "code" in result and result["code"] != 0:
error_msg = result.get("message", "未知错误")
print(
f"[ERROR] API返回错误: code={result['code']}, message={error_msg}"
)
return f"错误: {error_msg}"
# 2. 检查直接包含文本结果的情况
if "result" in result and isinstance(result["result"], str):
print("[DEBUG] 找到直接结果字段")
return result["result"]
# 3. 检查lattice结构(详细结果)
if "lattice" in result and isinstance(result["lattice"], list):
print("[DEBUG] 解析lattice结构")
text_parts = []
for lattice in result["lattice"]:
if not isinstance(lattice, dict):
continue
# 获取json_1best内容
json_1best = lattice.get("json_1best", {})
if not json_1best or not isinstance(json_1best, dict):
continue
# 处理st字段 - 修正:st可能是字典或列表
st_content = json_1best.get("st")
st_list = []
if isinstance(st_content, dict):
st_list = [st_content] # 转为列表统一处理
elif isinstance(st_content, list):
st_list = st_content
for st in st_list:
if isinstance(st, str):
# 直接是字符串结果
text_parts.append(st)
elif isinstance(st, dict):
# 处理字典结构的st
rt = st.get("rt", [])
if not isinstance(rt, list):
continue
for item in rt:
if isinstance(item, dict):
ws_list = item.get("ws", [])
if isinstance(ws_list, list):
for ws in ws_list:
if isinstance(ws, dict):
cw_list = ws.get("cw", [])
if isinstance(cw_list, list):
for cw in cw_list:
if isinstance(cw, dict):
w = cw.get("w", "")
if w:
text_parts.append(w)
return "".join(text_parts)
# 4. 检查简化结构(直接包含st)
if "st" in result and isinstance(result["st"], list):
print("[DEBUG] 解析st结构")
text_parts = []
for st in result["st"]:
if isinstance(st, str):
text_parts.append(st)
elif isinstance(st, dict):
rt = st.get("rt", [])
if isinstance(rt, list):
for item in rt:
if isinstance(item, dict):
ws_list = item.get("ws", [])
if isinstance(ws_list, list):
for ws in ws_list:
if isinstance(ws, dict):
cw_list = ws.get("cw", [])
if isinstance(cw_list, list):
for cw in cw_list:
if isinstance(cw, dict):
w = cw.get("w", "")
if w:
text_parts.append(w)
return "".join(text_parts)
# 5. 其他未知结构
print("[WARNING] 无法识别的结果结构")
return json.dumps(result, indent=2, ensure_ascii=False)
# 6. 非字典类型结果
print(f"[WARNING] 非字典类型结果: {type(result)}")
return str(result)
def audio_to_str(appid, apikey, apisecret, file_path):
"""
调用讯飞开放平台接口,获取音频文件的转写结果。
参数:
appid (str): 讯飞开放平台的appid。
apikey (str): 讯飞开放平台的apikey。
apisecret (str): 讯飞开放平台的apisecret。
file_path (str): 音频文件路径。
返回值:
str: 转写结果文本,如果发生错误则返回None。
"""
# 检查文件是否存在
if not os.path.exists(file_path):
print(f"错误:文件 {file_path} 不存在")
return None
try:
# 1. 文件上传
file_uploader = FileUploader(
app_id=appid,
api_key=apikey,
api_secret=apisecret,
upload_file_path=file_path,
)
fileurl = file_uploader.upload_file()
if not fileurl:
print("文件上传失败")
return None
print("文件上传成功,fileurl:", fileurl)
# 2. 创建任务并查询结果
result_extractor = ResultExtractor(appid, apikey, apisecret)
print("\n------ 创建任务 -------")
create_response = result_extractor.task_create(fileurl)
# 调试输出创建响应
print(
f"[DEBUG] 创建任务响应: {json.dumps(create_response, indent=2, ensure_ascii=False)}"
)
if not isinstance(create_response, dict) or "data" not in create_response:
print("创建任务失败:", create_response)
return None
task_id = create_response["data"]["task_id"]
print(f"任务ID: {task_id}")
# 查询任务
print("\n------ 查询任务 -------")
print("任务转写中······")
max_attempts = 30
attempt = 0
while attempt < max_attempts:
result = result_extractor.task_query(task_id)
# 调试输出查询响应
print(f"\n[QUERY {attempt + 1}] 响应类型: {type(result)}")
if isinstance(result, dict):
print(
f"[QUERY {attempt + 1}] 响应内容: {json.dumps(result, indent=2, ensure_ascii=False)}"
)
else:
print(
f"[QUERY {attempt + 1}] 响应内容 (前200字符): {str(result)[:200]}"
)
# 检查响应是否有效
if not isinstance(result, dict):
print(f"无效响应类型: {type(result)}")
return None
# 检查API错误码
if "code" in result and result["code"] != 0:
error_msg = result.get("message", "未知错误")
print(f"API错误: code={result['code']}, message={error_msg}")
return None
# 获取任务状态
task_data = result.get("data", {})
task_status = task_data.get("task_status")
if not task_status:
print("响应中缺少任务状态字段")
print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
return None
# 处理不同状态
if task_status in ["3", "4"]: # 任务已完成或回调完成
print("转写完成···")
# 提取结果
result_content = task_data.get("result")
if result_content is not None:
try:
result_text = result_extractor.extract_text(result_content)
print("\n转写结果:\n", result_text)
return result_text
except Exception as e:
print(f"\n提取文本时出错: {str(e)}")
print(f"错误详情:\n{traceback.format_exc()}")
print(
"原始结果内容:",
json.dumps(result_content, indent=2, ensure_ascii=False),
)
return None
else:
print("\n响应中缺少结果字段")
print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
return None
elif task_status in ["1", "2"]: # 任务待处理或处理中
print(
f"任务状态:{task_status},等待中... (尝试 {attempt + 1}/{max_attempts})"
)
time.sleep(5)
attempt += 1
else:
print(f"未知任务状态:{task_status}")
print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
return None
else:
print(f"超过最大查询次数({max_attempts}),任务可能仍在处理中")
return None
except Exception as e:
print(f"发生异常: {str(e)}")
print(f"错误详情:\n{traceback.format_exc()}")
return None
"""
1、通用文字识别,图像数据base64编码后大小不得超过10M
2、appid、apiSecret、apiKey请到讯飞开放平台控制台获取并填写到此demo中
3、支持中英文,支持手写和印刷文字。
4、在倾斜文字上效果有提升,同时支持部分生僻字的识别
"""
# 图像识别接口地址
URL = "https://api.xf-yun.com/v1/private/sf8e6aca1"
class AssembleHeaderException(Exception):
def __init__(self, msg):
self.message = msg
class Url:
def __init__(self, host, path, schema):
self.host = host
self.path = path
self.schema = schema
pass
# calculate sha256 and encode to base64
def sha256base64(data):
sha256 = hashlib.sha256()
sha256.update(data)
digest = base64.b64encode(sha256.digest()).decode(encoding="utf-8")
return digest
def parse_url(requset_url):
stidx = requset_url.index("://")
host = requset_url[stidx + 3 :]
schema = requset_url[: stidx + 3]
edidx = host.index("/")
if edidx <= 0:
raise AssembleHeaderException("invalid request url:" + requset_url)
path = host[edidx:]
host = host[:edidx]
u = Url(host, path, schema)
return u
# build websocket auth request url
def assemble_ws_auth_url(requset_url, method="POST", api_key="", api_secret=""):
u = parse_url(requset_url)
host = u.host
path = u.path
now = datetime.datetime.now()
date = format_date_time(mktime(now.timetuple()))
# print(date) # 可选:打印Date值
signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(
host, date, method, path
)
# print(signature_origin) # 可选:打印签名原文
signature_sha = hmac.new(
api_secret.encode("utf-8"),
signature_origin.encode("utf-8"),
digestmod=hashlib.sha256,
).digest()
signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
authorization_origin = (
'api_key="%s", algorithm="%s", headers="%s", signature="%s"'
% (api_key, "hmac-sha256", "host date request-line", signature_sha)
)
authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(
encoding="utf-8"
)
# print(authorization_origin) # 可选:打印鉴权原文
values = {"host": host, "date": date, "authorization": authorization}
return requset_url + "?" + urlencode(values)
def image_to_str(endpoint=None, key=None, unused_param=None, file_path=None):
"""
调用Azure Computer Vision API识别图片中的文字。
参数:
endpoint (str): Azure Computer Vision endpoint URL。
key (str): Azure Computer Vision API key。
unused_param (str): 未使用的参数,保持兼容性。
file_path (str): 图片文件路径。
返回值:
str: 图片中的文字识别结果,如果发生错误则返回None。
"""
# 默认配置
if endpoint is None:
endpoint = "https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/"
if key is None:
key = "45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ"
try:
# 读取图片文件
with open(file_path, "rb") as f:
image_data = f.read()
# 构造请求URL
analyze_url = endpoint.rstrip('/') + "/vision/v3.2/read/analyze"
# 设置请求头
headers = {
'Ocp-Apim-Subscription-Key': key,
'Content-Type': 'application/octet-stream'
}
# 发送POST请求开始分析
response = requests.post(analyze_url, headers=headers, data=image_data)
if response.status_code != 202:
print(f"分析请求失败: {response.status_code}, {response.text}")
return None
# 获取操作位置
operation_url = response.headers["Operation-Location"]
# 轮询结果
import time
while True:
result_response = requests.get(operation_url, headers={'Ocp-Apim-Subscription-Key': key})
result = result_response.json()
if result["status"] == "succeeded":
# 提取文字
text_results = []
if "analyzeResult" in result and "readResults" in result["analyzeResult"]:
for read_result in result["analyzeResult"]["readResults"]:
for line in read_result["lines"]:
text_results.append(line["text"])
return " ".join(text_results) if text_results else ""
elif result["status"] == "failed":
print(f"文字识别失败: {result}")
return None
# 等待1秒后重试
time.sleep(1)
except Exception as e:
print(f"发生异常: {e}")
return None
if __name__ == "__main__":
# 输入讯飞开放平台的 appid,secret、key 和文件路径
appid = "33c1b63d"
apikey = "40bf7cd82e31ace30a9cfb76309a43a3"
apisecret = "OTY1YzIyZWM3YTg0OWZiMGE2ZjA2ZmE4"
audio_path = r"audio_sample_little.wav" # 确保文件路径正确
image_path = r"1.png" # 确保文件路径正确
# 音频转文字
audio_text = audio_to_str(appid, apikey, apisecret, audio_path)
# 图片转文字
image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=image_path)
print("-"* 20)
print("\n音频转文字结果:", audio_text)
print("\n图片转文字结果:", image_text)
def azure_speech_to_text(speech_key, speech_region, audio_file_path):
"""
使用Azure Speech服务将音频文件转换为文本。
参数:
speech_key (str): Azure Speech服务的API密钥。
speech_region (str): Azure Speech服务的区域。
audio_file_path (str): 音频文件路径。
返回值:
str: 转换后的文本,如果发生错误则返回None。
"""
try:
# 设置语音配置
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_region)
speech_config.speech_recognition_language = "zh-CN" # 设置为中文
# 设置音频配置
audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
# 创建语音识别器
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
# 执行语音识别
result = speech_recognizer.recognize_once()
# 检查识别结果
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
print(f"Azure Speech识别成功: {result.text}")
return result.text
elif result.reason == speechsdk.ResultReason.NoMatch:
print("Azure Speech未识别到语音")
return None
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print(f"Azure Speech识别被取消: {cancellation_details.reason}")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print(f"错误详情: {cancellation_details.error_details}")
return None
except Exception as e:
print(f"Azure Speech识别出错: {str(e)}")
return None
|