File size: 31,637 Bytes
95bd630
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import os
import datetime
import re
import time
import traceback
import math
from urllib.parse import urlparse
from urllib3 import encode_multipart_formdata
from wsgiref.handlers import format_date_time
from time import mktime
import hashlib
import base64
import hmac
from urllib.parse import urlencode
import json
import requests
import azure.cognitiveservices.speech as speechsdk

# 常量定义
LFASR_HOST = "http://upload-ost-api.xfyun.cn/file"  # 文件上传Host
API_INIT = "/mpupload/init"  # 初始化接口
API_UPLOAD = "/upload"  # 上传接口
API_CUT = "/mpupload/upload"  # 分片上传接口
API_CUT_COMPLETE = "/mpupload/complete"  # 分片完成接口
API_CUT_CANCEL = "/mpupload/cancel"  # 分片取消接口
FILE_PIECE_SIZE = 5242880  # 文件分片大小5M
PRO_CREATE_URI = "/v2/ost/pro_create"
QUERY_URI = "/v2/ost/query"


# 文件上传类
class FileUploader:
    def __init__(self, app_id, api_key, api_secret, upload_file_path):
        self.app_id = app_id
        self.api_key = api_key
        self.api_secret = api_secret
        self.upload_file_path = upload_file_path

    def get_request_id(self):
        """生成请求ID"""
        return time.strftime("%Y%m%d%H%M")

    def hashlib_256(self, data):
        """计算 SHA256 哈希"""
        m = hashlib.sha256(bytes(data.encode(encoding="utf-8"))).digest()
        digest = "SHA-256=" + base64.b64encode(m).decode(encoding="utf-8")
        return digest

    def assemble_auth_header(self, request_url, file_data_type, method="", body=""):
        """组装鉴权头部"""
        u = urlparse(request_url)
        host = u.hostname
        path = u.path
        now = datetime.datetime.now()
        date = format_date_time(mktime(now.timetuple()))
        digest = "SHA256=" + self.hashlib_256("")
        signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1\ndigest: {}".format(
            host, date, method, path, digest
        )
        signature_sha = hmac.new(
            self.api_secret.encode("utf-8"),
            signature_origin.encode("utf-8"),
            digestmod=hashlib.sha256,
        ).digest()
        signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
        authorization = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % (
            self.api_key,
            "hmac-sha256",
            "host date request-line digest",
            signature_sha,
        )
        headers = {
            "host": host,
            "date": date,
            "authorization": authorization,
            "digest": digest,
            "content-type": file_data_type,
        }
        return headers

    def call_api(self, url, file_data, file_data_type):
        """调用POST API接口"""
        headers = self.assemble_auth_header(
            url, file_data_type, method="POST", body=file_data
        )
        try:
            resp = requests.post(url, headers=headers, data=file_data, timeout=8)
            print("上传状态:", resp.status_code, resp.text)
            return resp.json()
        except Exception as e:
            print("上传失败!Exception :%s" % e)
            return None

    def upload_cut_complete(self, upload_id):
        """分块上传完成"""
        body_dict = {
            "app_id": self.app_id,
            "request_id": self.get_request_id(),
            "upload_id": upload_id,
        }
        file_data_type = "application/json"
        url = LFASR_HOST + API_CUT_COMPLETE
        response = self.call_api(url, json.dumps(body_dict), file_data_type)
        if response and "data" in response and "url" in response["data"]:
            file_url = response["data"]["url"]
            print("任务上传结束")
            return file_url
        else:
            print("分片上传完成失败", response)
            return None

    def upload_file(self):
        """上传文件,根据文件大小选择分片或普通上传"""
        file_total_size = os.path.getsize(self.upload_file_path)
        if file_total_size < 31457280:  # 30MB
            print("-----不使用分块上传-----")
            return self.simple_upload()
        else:
            print("-----使用分块上传-----")
            return self.multipart_upload()

    def simple_upload(self):
        """简单上传文件"""
        try:
            with open(self.upload_file_path, mode="rb") as f:
                file = {
                    "data": (self.upload_file_path, f.read()),
                    "app_id": self.app_id,
                    "request_id": self.get_request_id(),
                }
                encode_data = encode_multipart_formdata(file)
                file_data = encode_data[0]
                file_data_type = encode_data[1]
            url = LFASR_HOST + API_UPLOAD
            response = self.call_api(url, file_data, file_data_type)
            if response and "data" in response and "url" in response["data"]:
                return response["data"]["url"]
            else:
                print("简单上传失败", response)
                return None
        except FileNotFoundError:
            print("文件未找到:", self.upload_file_path)
            return None

    def multipart_upload(self):
        """分片上传文件"""
        upload_id = self.prepare_upload()
        if not upload_id:
            return None

        if not self.do_upload(upload_id):
            return None

        file_url = self.upload_cut_complete(upload_id)
        print("分片上传地址:", file_url)
        return file_url

    def prepare_upload(self):
        """预处理,获取upload_id"""
        body_dict = {
            "app_id": self.app_id,
            "request_id": self.get_request_id(),
        }
        url = LFASR_HOST + API_INIT
        file_data_type = "application/json"
        response = self.call_api(url, json.dumps(body_dict), file_data_type)
        if response and "data" in response and "upload_id" in response["data"]:
            return response["data"]["upload_id"]
        else:
            print("预处理失败", response)
            return None

    def do_upload(self, upload_id):
        """执行分片上传"""
        file_total_size = os.path.getsize(self.upload_file_path)
        chunk_size = FILE_PIECE_SIZE
        chunks = math.ceil(file_total_size / chunk_size)
        request_id = self.get_request_id()
        slice_id = 1

        print(
            "文件:",
            self.upload_file_path,
            " 文件大小:",
            file_total_size,
            " 分块大小:",
            chunk_size,
            " 分块数:",
            chunks,
        )

        with open(self.upload_file_path, mode="rb") as content:
            while slice_id <= chunks:
                current_size = min(
                    chunk_size, file_total_size - (slice_id - 1) * chunk_size
                )

                file = {
                    "data": (self.upload_file_path, content.read(current_size)),
                    "app_id": self.app_id,
                    "request_id": request_id,
                    "upload_id": upload_id,
                    "slice_id": slice_id,
                }

                encode_data = encode_multipart_formdata(file)
                file_data = encode_data[0]
                file_data_type = encode_data[1]
                url = LFASR_HOST + API_CUT

                resp = self.call_api(url, file_data, file_data_type)
                count = 0
                while not resp and (count < 3):
                    print("上传重试")
                    resp = self.call_api(url, file_data, file_data_type)
                    count = count + 1
                    time.sleep(1)
                if not resp:
                    print("分片上传失败")
                    return False
                slice_id += 1

        return True


class ResultExtractor:
    def __init__(self, appid, apikey, apisecret):
        # POST 请求相关参数
        self.Host = "ost-api.xfyun.cn"
        self.RequestUriCreate = PRO_CREATE_URI
        self.RequestUriQuery = QUERY_URI
        # 设置 URL
        if re.match(r"^\d", self.Host):
            self.urlCreate = "http://" + self.Host + self.RequestUriCreate
            self.urlQuery = "http://" + self.Host + self.RequestUriQuery
        else:
            self.urlCreate = "https://" + self.Host + self.RequestUriCreate
            self.urlQuery = "https://" + self.Host + self.RequestUriQuery
        self.HttpMethod = "POST"
        self.APPID = appid
        self.Algorithm = "hmac-sha256"
        self.HttpProto = "HTTP/1.1"
        self.UserName = apikey
        self.Secret = apisecret

        # 设置当前时间
        cur_time_utc = datetime.datetime.now(datetime.timezone.utc)
        self.Date = self.httpdate(cur_time_utc)

        # 设置测试音频文件参数
        self.BusinessArgsCreate = {
            "language": "zh_cn",
            "accent": "mandarin",
            "domain": "pro_ost_ed",
        }

    def img_read(self, path):
        with open(path, "rb") as fo:
            return fo.read()

    def hashlib_256(self, res):
        m = hashlib.sha256(bytes(res.encode(encoding="utf-8"))).digest()
        result = "SHA-256=" + base64.b64encode(m).decode(encoding="utf-8")
        return result

    def httpdate(self, dt):
        weekday = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()]
        month = [
            "Jan",
            "Feb",
            "Mar",
            "Apr",
            "May",
            "Jun",
            "Jul",
            "Aug",
            "Sep",
            "Oct",
            "Nov",
            "Dec",
        ][dt.month - 1]
        return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
            weekday,
            dt.day,
            month,
            dt.year,
            dt.hour,
            dt.minute,
            dt.second,
        )

    def generateSignature(self, digest, uri):
        signature_str = "host: " + self.Host + "\n"
        signature_str += "date: " + self.Date + "\n"
        signature_str += self.HttpMethod + " " + uri + " " + self.HttpProto + "\n"
        signature_str += "digest: " + digest
        signature = hmac.new(
            bytes(self.Secret.encode("utf-8")),
            bytes(signature_str.encode("utf-8")),
            digestmod=hashlib.sha256,
        ).digest()
        result = base64.b64encode(signature)
        return result.decode(encoding="utf-8")

    def init_header(self, data, uri):
        digest = self.hashlib_256(data)
        sign = self.generateSignature(digest, uri)
        auth_header = (
            'api_key="%s",algorithm="%s", '
            'headers="host date request-line digest", '
            'signature="%s"' % (self.UserName, self.Algorithm, sign)
        )
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json",
            "Method": "POST",
            "Host": self.Host,
            "Date": self.Date,
            "Digest": digest,
            "Authorization": auth_header,
        }
        return headers

    def get_create_body(self, fileurl):
        post_data = {
            "common": {"app_id": self.APPID},
            "business": self.BusinessArgsCreate,
            "data": {"audio_src": "http", "audio_url": fileurl, "encoding": "raw"},
        }
        body = json.dumps(post_data)
        return body

    def get_query_body(self, task_id):
        post_data = {
            "common": {"app_id": self.APPID},
            "business": {
                "task_id": task_id,
            },
        }
        body = json.dumps(post_data)
        return body

    def call(self, url, body, headers):
        try:
            response = requests.post(url, data=body, headers=headers, timeout=8)
            status_code = response.status_code
            if status_code != 200:
                info = response.content
                return info
            else:
                try:
                    return json.loads(response.text)
                except json.JSONDecodeError:
                    return response.text
        except Exception as e:
            print("Exception :%s" % e)
            return None

    def task_create(self, fileurl):
        body = self.get_create_body(fileurl)
        headers_create = self.init_header(body, self.RequestUriCreate)
        return self.call(self.urlCreate, body, headers_create)

    def task_query(self, task_id):
        query_body = self.get_query_body(task_id)
        headers_query = self.init_header(query_body, self.RequestUriQuery)
        return self.call(self.urlQuery, query_body, headers_query)

    def extract_text(self, result):
        """

        从API响应中提取文本内容

        支持多种结果格式,增强错误处理

        """
        # 调试输出:打印原始结果类型
        print(f"\n[DEBUG] extract_text 输入类型: {type(result)}")

        # 如果是字符串,尝试解析为JSON
        if isinstance(result, str):
            print(f"[DEBUG] 字符串内容 (前200字符): {result[:200]}")
            try:
                result = json.loads(result)
                print("[DEBUG] 成功解析字符串为JSON对象")
            except json.JSONDecodeError:
                print("[DEBUG] 无法解析为JSON,返回原始字符串")
                return result

        # 处理字典类型的结果
        if isinstance(result, dict):
            print("[DEBUG] 处理字典类型结果")

            # 1. 检查错误信息
            if "code" in result and result["code"] != 0:
                error_msg = result.get("message", "未知错误")
                print(
                    f"[ERROR] API返回错误: code={result['code']}, message={error_msg}"
                )
                return f"错误: {error_msg}"

            # 2. 检查直接包含文本结果的情况
            if "result" in result and isinstance(result["result"], str):
                print("[DEBUG] 找到直接结果字段")
                return result["result"]

            # 3. 检查lattice结构(详细结果)
            if "lattice" in result and isinstance(result["lattice"], list):
                print("[DEBUG] 解析lattice结构")
                text_parts = []
                for lattice in result["lattice"]:
                    if not isinstance(lattice, dict):
                        continue

                    # 获取json_1best内容
                    json_1best = lattice.get("json_1best", {})
                    if not json_1best or not isinstance(json_1best, dict):
                        continue

                    # 处理st字段 - 修正:st可能是字典或列表
                    st_content = json_1best.get("st")
                    st_list = []
                    if isinstance(st_content, dict):
                        st_list = [st_content]  # 转为列表统一处理
                    elif isinstance(st_content, list):
                        st_list = st_content

                    for st in st_list:
                        if isinstance(st, str):
                            # 直接是字符串结果
                            text_parts.append(st)
                        elif isinstance(st, dict):
                            # 处理字典结构的st
                            rt = st.get("rt", [])
                            if not isinstance(rt, list):
                                continue

                            for item in rt:
                                if isinstance(item, dict):
                                    ws_list = item.get("ws", [])
                                    if isinstance(ws_list, list):
                                        for ws in ws_list:
                                            if isinstance(ws, dict):
                                                cw_list = ws.get("cw", [])
                                                if isinstance(cw_list, list):
                                                    for cw in cw_list:
                                                        if isinstance(cw, dict):
                                                            w = cw.get("w", "")
                                                            if w:
                                                                text_parts.append(w)
                return "".join(text_parts)

            # 4. 检查简化结构(直接包含st)
            if "st" in result and isinstance(result["st"], list):
                print("[DEBUG] 解析st结构")
                text_parts = []
                for st in result["st"]:
                    if isinstance(st, str):
                        text_parts.append(st)
                    elif isinstance(st, dict):
                        rt = st.get("rt", [])
                        if isinstance(rt, list):
                            for item in rt:
                                if isinstance(item, dict):
                                    ws_list = item.get("ws", [])
                                    if isinstance(ws_list, list):
                                        for ws in ws_list:
                                            if isinstance(ws, dict):
                                                cw_list = ws.get("cw", [])
                                                if isinstance(cw_list, list):
                                                    for cw in cw_list:
                                                        if isinstance(cw, dict):
                                                            w = cw.get("w", "")
                                                            if w:
                                                                text_parts.append(w)
                return "".join(text_parts)

            # 5. 其他未知结构
            print("[WARNING] 无法识别的结果结构")
            return json.dumps(result, indent=2, ensure_ascii=False)

        # 6. 非字典类型结果
        print(f"[WARNING] 非字典类型结果: {type(result)}")
        return str(result)


def audio_to_str(appid, apikey, apisecret, file_path):
    """

    调用讯飞开放平台接口,获取音频文件的转写结果。



    参数:

    appid (str): 讯飞开放平台的appid。

    apikey (str): 讯飞开放平台的apikey。

    apisecret (str): 讯飞开放平台的apisecret。

    file_path (str): 音频文件路径。



    返回值:

    str: 转写结果文本,如果发生错误则返回None。

    """
    # 检查文件是否存在
    if not os.path.exists(file_path):
        print(f"错误:文件 {file_path} 不存在")
        return None

    try:
        # 1. 文件上传
        file_uploader = FileUploader(
            app_id=appid,
            api_key=apikey,
            api_secret=apisecret,
            upload_file_path=file_path,
        )
        fileurl = file_uploader.upload_file()
        if not fileurl:
            print("文件上传失败")
            return None
        print("文件上传成功,fileurl:", fileurl)

        # 2. 创建任务并查询结果
        result_extractor = ResultExtractor(appid, apikey, apisecret)
        print("\n------ 创建任务 -------")
        create_response = result_extractor.task_create(fileurl)

        # 调试输出创建响应
        print(
            f"[DEBUG] 创建任务响应: {json.dumps(create_response, indent=2, ensure_ascii=False)}"
        )

        if not isinstance(create_response, dict) or "data" not in create_response:
            print("创建任务失败:", create_response)
            return None

        task_id = create_response["data"]["task_id"]
        print(f"任务ID: {task_id}")

        # 查询任务
        print("\n------ 查询任务 -------")
        print("任务转写中······")
        max_attempts = 30
        attempt = 0

        while attempt < max_attempts:
            result = result_extractor.task_query(task_id)

            # 调试输出查询响应
            print(f"\n[QUERY {attempt + 1}] 响应类型: {type(result)}")
            if isinstance(result, dict):
                print(
                    f"[QUERY {attempt + 1}] 响应内容: {json.dumps(result, indent=2, ensure_ascii=False)}"
                )
            else:
                print(
                    f"[QUERY {attempt + 1}] 响应内容 (前200字符): {str(result)[:200]}"
                )

            # 检查响应是否有效
            if not isinstance(result, dict):
                print(f"无效响应类型: {type(result)}")
                return None

            # 检查API错误码
            if "code" in result and result["code"] != 0:
                error_msg = result.get("message", "未知错误")
                print(f"API错误: code={result['code']}, message={error_msg}")
                return None

            # 获取任务状态
            task_data = result.get("data", {})
            task_status = task_data.get("task_status")

            if not task_status:
                print("响应中缺少任务状态字段")
                print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
                return None

            # 处理不同状态
            if task_status in ["3", "4"]:  # 任务已完成或回调完成
                print("转写完成···")

                # 提取结果
                result_content = task_data.get("result")
                if result_content is not None:
                    try:
                        result_text = result_extractor.extract_text(result_content)
                        print("\n转写结果:\n", result_text)
                        return result_text
                    except Exception as e:
                        print(f"\n提取文本时出错: {str(e)}")
                        print(f"错误详情:\n{traceback.format_exc()}")
                        print(
                            "原始结果内容:",
                            json.dumps(result_content, indent=2, ensure_ascii=False),
                        )
                        return None
                else:
                    print("\n响应中缺少结果字段")
                    print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
                    return None

            elif task_status in ["1", "2"]:  # 任务待处理或处理中
                print(
                    f"任务状态:{task_status},等待中... (尝试 {attempt + 1}/{max_attempts})"
                )
                time.sleep(5)
                attempt += 1
            else:
                print(f"未知任务状态:{task_status}")
                print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
                return None
        else:
            print(f"超过最大查询次数({max_attempts}),任务可能仍在处理中")
            return None

    except Exception as e:
        print(f"发生异常: {str(e)}")
        print(f"错误详情:\n{traceback.format_exc()}")
        return None


"""

1、通用文字识别,图像数据base64编码后大小不得超过10M

2、appid、apiSecret、apiKey请到讯飞开放平台控制台获取并填写到此demo中

3、支持中英文,支持手写和印刷文字。

4、在倾斜文字上效果有提升,同时支持部分生僻字的识别

"""

# 图像识别接口地址
URL = "https://api.xf-yun.com/v1/private/sf8e6aca1"


class AssembleHeaderException(Exception):
    def __init__(self, msg):
        self.message = msg


class Url:
    def __init__(self, host, path, schema):
        self.host = host
        self.path = path
        self.schema = schema
        pass


# calculate sha256 and encode to base64
def sha256base64(data):
    sha256 = hashlib.sha256()
    sha256.update(data)
    digest = base64.b64encode(sha256.digest()).decode(encoding="utf-8")
    return digest


def parse_url(requset_url):
    stidx = requset_url.index("://")
    host = requset_url[stidx + 3 :]
    schema = requset_url[: stidx + 3]
    edidx = host.index("/")
    if edidx <= 0:
        raise AssembleHeaderException("invalid request url:" + requset_url)
    path = host[edidx:]
    host = host[:edidx]
    u = Url(host, path, schema)
    return u


# build websocket auth request url
def assemble_ws_auth_url(requset_url, method="POST", api_key="", api_secret=""):
    u = parse_url(requset_url)
    host = u.host
    path = u.path
    now = datetime.datetime.now()
    date = format_date_time(mktime(now.timetuple()))
    # print(date) # 可选:打印Date值

    signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(
        host, date, method, path
    )
    # print(signature_origin)  # 可选:打印签名原文
    signature_sha = hmac.new(
        api_secret.encode("utf-8"),
        signature_origin.encode("utf-8"),
        digestmod=hashlib.sha256,
    ).digest()
    signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
    authorization_origin = (
        'api_key="%s", algorithm="%s", headers="%s", signature="%s"'
        % (api_key, "hmac-sha256", "host date request-line", signature_sha)
    )
    authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(
        encoding="utf-8"
    )
    # print(authorization_origin) # 可选:打印鉴权原文
    values = {"host": host, "date": date, "authorization": authorization}

    return requset_url + "?" + urlencode(values)


def image_to_str(endpoint=None, key=None, unused_param=None, file_path=None):
    """

    调用Azure Computer Vision API识别图片中的文字。



    参数:

    endpoint (str): Azure Computer Vision endpoint URL。

    key (str): Azure Computer Vision API key。

    unused_param (str): 未使用的参数,保持兼容性。

    file_path (str): 图片文件路径。



    返回值:

    str: 图片中的文字识别结果,如果发生错误则返回None。

    """
    
    # 默认配置
    if endpoint is None:
        endpoint = "https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/"
    if key is None:
        key = "45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ"

    try:
        # 读取图片文件
        with open(file_path, "rb") as f:
            image_data = f.read()

        # 构造请求URL
        analyze_url = endpoint.rstrip('/') + "/vision/v3.2/read/analyze"
        
        # 设置请求头
        headers = {
            'Ocp-Apim-Subscription-Key': key,
            'Content-Type': 'application/octet-stream'
        }
        
        # 发送POST请求开始分析
        response = requests.post(analyze_url, headers=headers, data=image_data)
        
        if response.status_code != 202:
            print(f"分析请求失败: {response.status_code}, {response.text}")
            return None
            
        # 获取操作位置
        operation_url = response.headers["Operation-Location"]
        
        # 轮询结果
        import time
        while True:
            result_response = requests.get(operation_url, headers={'Ocp-Apim-Subscription-Key': key})
            result = result_response.json()
            
            if result["status"] == "succeeded":
                # 提取文字
                text_results = []
                if "analyzeResult" in result and "readResults" in result["analyzeResult"]:
                    for read_result in result["analyzeResult"]["readResults"]:
                        for line in read_result["lines"]:
                            text_results.append(line["text"])
                
                return " ".join(text_results) if text_results else ""
                
            elif result["status"] == "failed":
                print(f"文字识别失败: {result}")
                return None
                
            # 等待1秒后重试
            time.sleep(1)
            
    except Exception as e:
        print(f"发生异常: {e}")
        return None


if __name__ == "__main__":
    # 输入讯飞开放平台的 appid,secret、key 和文件路径
    appid = "33c1b63d"
    apikey = "40bf7cd82e31ace30a9cfb76309a43a3"
    apisecret = "OTY1YzIyZWM3YTg0OWZiMGE2ZjA2ZmE4"
    audio_path = r"audio_sample_little.wav"  # 确保文件路径正确
    image_path = r"1.png"  # 确保文件路径正确

    # 音频转文字
    audio_text = audio_to_str(appid, apikey, apisecret, audio_path)
    # 图片转文字
    image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=image_path)
    
    print("-"* 20)

    print("\n音频转文字结果:", audio_text)
    print("\n图片转文字结果:", image_text)


def azure_speech_to_text(speech_key, speech_region, audio_file_path):
    """

    使用Azure Speech服务将音频文件转换为文本。

    

    参数:

    speech_key (str): Azure Speech服务的API密钥。

    speech_region (str): Azure Speech服务的区域。

    audio_file_path (str): 音频文件路径。

    

    返回值:

    str: 转换后的文本,如果发生错误则返回None。

    """
    try:
        # 设置语音配置
        speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_region)
        speech_config.speech_recognition_language = "zh-CN"  # 设置为中文
        
        # 设置音频配置
        audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
        
        # 创建语音识别器
        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
        
        # 执行语音识别
        result = speech_recognizer.recognize_once()
        
        # 检查识别结果
        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
            print(f"Azure Speech识别成功: {result.text}")
            return result.text
        elif result.reason == speechsdk.ResultReason.NoMatch:
            print("Azure Speech未识别到语音")
            return None
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print(f"Azure Speech识别被取消: {cancellation_details.reason}")
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print(f"错误详情: {cancellation_details.error_details}")
            return None
    except Exception as e:
        print(f"Azure Speech识别出错: {str(e)}")
        return None