Spaces:

Agents-MCP-Hackathon
/

ToDoAgent

Runtime error

App Files Files Community

siyuwang541 commited on Jun 8

Commit

95bd630

verified ·

1 Parent(s): bb95d10

mvp

Browse files

Files changed (13) hide show

.gitattributes +2 -0
1.png +0 -0
README.md +15 -15
__init__.py +0 -0
app.py +1330 -722
app_pro.py +840 -0
audio_127.0.0.1.wav +3 -0
image_127.0.0.1.jpg +0 -0
requirements.txt +8 -4
se_app.py +232 -0
temp_audio.wav +3 -0
todogen_LLM_config.yaml +11 -1
tools.py +828 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+audio_127.0.0.1.wav filter=lfs diff=lfs merge=lfs -text
+temp_audio.wav filter=lfs diff=lfs merge=lfs -text

1.png ADDED Viewed

README.md CHANGED Viewed

@@ -1,16 +1,16 @@
----
-title: ToDoAgent
-emoji: 💬
-colorFrom: yellow
-colorTo: purple
-sdk: gradio
-sdk_version: 5.32.0
-app_file: app.py
-pinned: false
-license: bsd
-short_description: AI Agent filters, creates to-do list and reminds smartly
-tags: ['agent-demo-track']
-demo: https://youtu.be/S-wh3Psx15M?si=Wiq7EzmE3dmBvLKQ
----
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

+---
+title: ToDoAgent
+emoji: 💬
+colorFrom: yellow
+colorTo: purple
+sdk: gradio
+sdk_version: 5.32.0
+app_file: app.py
+pinned: false
+license: bsd
+short_description: AI Agent filters, creates to-do list and reminds smartly
+tags: ['agent-demo-track']
+demo: https://youtu.be/S-wh3Psx15M?si=Wiq7EzmE3dmBvLKQ
+---
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

__init__.py ADDED Viewed

File without changes

app.py CHANGED Viewed

@@ -1,722 +1,1330 @@
-import gradio as gr
-import json
-from pathlib import Path
-import yaml
-import re
-import logging
-import io
-import sys
-import re
-from datetime import datetime, timezone, timedelta
-import requests
-CONFIG = None
-HF_CONFIG_PATH = Path(__file__).parent / "todogen_LLM_config.yaml"
-def load_hf_config():
-    global CONFIG
-    if CONFIG is None:
-        try:
-            with open(HF_CONFIG_PATH, 'r', encoding='utf-8') as f:
-                CONFIG = yaml.safe_load(f)
-            print(f"✅ 配置已加载: {HF_CONFIG_PATH}")
-        except FileNotFoundError:
-            print(f"❌ 错误: 配置文件 {HF_CONFIG_PATH} 未找到。请确保它在 hf 目录下。")
-            CONFIG = {}
-        except Exception as e:
-            print(f"❌ 加载配置文件 {HF_CONFIG_PATH} 时出错: {e}")
-            CONFIG = {}
-    return CONFIG
-def get_hf_openai_config():
-    config = load_hf_config()
-    return config.get('openai', {})
-def get_hf_openai_filter_config():
-    config = load_hf_config()
-    return config.get('openai_filter', {})
-def get_hf_paths_config():
-    config = load_hf_config()
-    base = Path(__file__).resolve().parent
-    paths_cfg = config.get('paths', {})
-    return {
-        'base_dir': base,
-        'prompt_template': base / paths_cfg.get('prompt_template', 'prompt_template.txt'),
-        'true_positive_examples': base / paths_cfg.get('true_positive_examples', 'TruePositive_few_shot.txt'),
-        'false_positive_examples': base / paths_cfg.get('false_positive_examples', 'FalsePositive_few_shot.txt'),
-    }
-llm_config = get_hf_openai_config()
-NVIDIA_API_BASE_URL = llm_config.get('base_url')
-NVIDIA_API_KEY = llm_config.get('api_key')
-NVIDIA_MODEL_NAME = llm_config.get('model')
-filter_config = get_hf_openai_filter_config()
-Filter_API_BASE_URL = filter_config.get('base_url_filter')
-Filter_API_KEY = filter_config.get('api_key_filter')
-Filter_MODEL_NAME = filter_config.get('model_filter')
-if not NVIDIA_API_BASE_URL or not NVIDIA_API_KEY or not NVIDIA_MODEL_NAME:
-    print("❌ 错误: NVIDIA API 配置不完整。请检查 todogen_LLM_config.yaml 中的 openai 部分。")
-    NVIDIA_API_BASE_URL = ""
-    NVIDIA_API_KEY = ""
-    NVIDIA_MODEL_NAME = ""
-if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
-    print("❌ 错误: Filter API 配置不完整。请检查 todogen_LLM_config.yaml 中的 openai_filter 部分。")
-    Filter_API_BASE_URL = ""
-    Filter_API_KEY = ""
-    Filter_MODEL_NAME = ""
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-def load_single_few_shot_file_hf(file_path: Path) -> str:
-    try:
-        with open(file_path, 'r', encoding='utf-8') as f:
-            content = f.read()
-            escaped_content = content.replace('{', '{{').replace('}', '}}')
-            return escaped_content
-    except FileNotFoundError:
-        return ""
-    except Exception:
-        return ""
-PROMPT_TEMPLATE_CONTENT = ""
-TRUE_POSITIVE_EXAMPLES_CONTENT = ""
-FALSE_POSITIVE_EXAMPLES_CONTENT = ""
-def load_prompt_data_hf():
-    global PROMPT_TEMPLATE_CONTENT, TRUE_POSITIVE_EXAMPLES_CONTENT, FALSE_POSITIVE_EXAMPLES_CONTENT
-    paths = get_hf_paths_config()
-    try:
-        with open(paths['prompt_template'], 'r', encoding='utf-8') as f:
-            PROMPT_TEMPLATE_CONTENT = f.read()
-    except FileNotFoundError:
-        PROMPT_TEMPLATE_CONTENT = "Error: Prompt template not found."
-    TRUE_POSITIVE_EXAMPLES_CONTENT = load_single_few_shot_file_hf(paths['true_positive_examples'])
-    FALSE_POSITIVE_EXAMPLES_CONTENT = load_single_few_shot_file_hf(paths['false_positive_examples'])
-load_prompt_data_hf()
-def _process_parsed_json(parsed_data):
-    try:
-        if isinstance(parsed_data, list):
-            if not parsed_data:
-                return [{}]
-            processed_list = []
-            for item in parsed_data:
-                if isinstance(item, dict):
-                    processed_list.append(item)
-                else:
-                    try:
-                        processed_list.append({"content": str(item)})
-                    except:
-                        processed_list.append({"content": "无法转换的项目"})
-            if not processed_list:
-                return [{}]
-            return processed_list
-        elif isinstance(parsed_data, dict):
-            return parsed_data
-        else:
-            return {"content": str(parsed_data)}
-    except Exception as e:
-        return {"error": f"Error processing parsed JSON: {e}"}
-def json_parser(text: str) -> dict:
-    try:
-        try:
-            parsed_data = json.loads(text)
-            return _process_parsed_json(parsed_data)
-        except json.JSONDecodeError:
-            pass
-        match = re.search(r'```(?:json)?\n(.*?)```', text, re.DOTALL)
-        if match:
-            json_str = match.group(1).strip()
-            json_str = re.sub(r',\s*]', ']', json_str)
-            json_str = re.sub(r',\s*}', '}', json_str)
-            try:
-                parsed_data = json.loads(json_str)
-                return _process_parsed_json(parsed_data)
-            except json.JSONDecodeError:
-                pass
-        array_match = re.search(r'\[\s*\{.*?\}\s*(?:,\s*\{.*?\}\s*)*\]', text, re.DOTALL)
-        if array_match:
-            potential_json = array_match.group(0).strip()
-            try:
-                parsed_data = json.loads(potential_json)
-                return _process_parsed_json(parsed_data)
-            except json.JSONDecodeError:
-                pass
-        object_match = re.search(r'\{.*?\}', text, re.DOTALL)
-        if object_match:
-            potential_json = object_match.group(0).strip()
-            try:
-                parsed_data = json.loads(potential_json)
-                return _process_parsed_json(parsed_data)
-            except json.JSONDecodeError:
-                pass
-        return {"error": "No valid JSON block found or failed to parse", "raw_text": text}
-    except Exception as e:
-        return {"error": f"Unexpected error in json_parser: {e}", "raw_text": text}
-def filter_message_with_llm(text_input: str, message_id: str = "user_input_001"):
-    mock_data = [(text_input, message_id)]
-    system_prompt = """
-    # 角色
-    你是一个专业的短信内容分析助手，根据输入判断内容的类型及可信度，为用户使用信息提供依据和便利。
-    # 任务
-    对于输入的多条数据，分析每一条数据内容（主键：`message_id`）属于【物流取件、缴费充值、待付(还)款、会议邀约、其他】的可能性百分比。
-    主要对于聊天、问候、回执、结果通知、上月账单等信息不需要收件人进行下一步处理的信息，直接归到其他类进行忽略
-    # 要求
-    1. 以json格式输出
-    2. content简洁提炼关键词，字符数<20以内
-    3. 输入条数和输出条数完全一样
-    # 输出示例
-    ```
-    [
-        {"message_id":"1111111","content":"账单805.57元待还","物流取件":0,"欠费缴纳":99,"待付(还)款":1,"会议邀约":0,"其他":0, "分类":"欠费缴纳"},
-        {"message_id":"222222","content":"邀请你加入飞书视频会议","物流取件":0,"欠费缴纳":0,"待付(还)款":1,"会议邀约":100,"其他":0, "分类":"会议邀约"}
-    ]
-    ```
-    """
-    llm_messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": str(mock_data)}
-    ]
-    try:
-        if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
-            return [{"error": "Filter API configuration incomplete", "-": "-"}]
-        headers = {
-            "Authorization": f"Bearer {Filter_API_KEY}",
-            "Accept": "application/json"
-        }
-        payload = {
-            "model": Filter_MODEL_NAME,
-            "messages": llm_messages,
-            "temperature": 0.0,
-            "top_p": 0.95,
-            "max_tokens": 1024,
-            "stream": False
-        }
-        api_url = f"{Filter_API_BASE_URL}/chat/completions"
-        try:
-            response = requests.post(api_url, headers=headers, json=payload)
-            response.raise_for_status()
-            raw_llm_response = response.json()["choices"][0]["message"]["content"]
-        except requests.exceptions.RequestException as e:
-            return [{"error": f"Filter API call failed: {e}", "-": "-"}]
-        raw_llm_response = raw_llm_response.replace("```json", "").replace("```", "")
-        parsed_filter_data = json_parser(raw_llm_response)
-        if "error" in parsed_filter_data:
-            return [{"error": f"Filter LLM response parsing error: {parsed_filter_data['error']}"}]
-        if isinstance(parsed_filter_data, list) and parsed_filter_data:
-            for item in parsed_filter_data:
-                if isinstance(item, dict) and item.get("分类") == "欠费缴纳" and "缴费支出" in item.get("content", ""):
-                    item["分类"] = "其他"
-            request_id_list = {message_id}
-            response_id_list = {item.get('message_id') for item in parsed_filter_data if isinstance(item, dict)}
-            diff = request_id_list - response_id_list
-            if diff:
-                for missed_id in diff:
-                    parsed_filter_data.append({
-                        "message_id": missed_id,
-                        "content": text_input[:20],
-                        "物流取件": 0,
-                        "欠费缴纳": 0,
-                        "待付(还)款": 0,
-                        "会议邀约": 0,
-                        "其他": 100,
-                        "分类": "其他"
-                    })
-            return parsed_filter_data
-        else:
-            return [{
-                "message_id": message_id,
-                "content": text_input[:20],
-                "物流取件": 0,
-                "欠费缴纳": 0,
-                "待付(还)款": 0,
-                "会议邀约": 0,
-                "其他": 100,
-                "分类": "其他",
-                "error": "Filter LLM returned empty or unexpected format"
-            }]
-    except Exception as e:
-        return [{
-            "message_id": message_id,
-            "content": text_input[:20],
-            "物流取件": 0,
-            "欠费缴纳": 0,
-            "待付(还)款": 0,
-            "会议邀约": 0,
-            "其他": 100,
-            "分类": "其他",
-            "error": f"Filter LLM call/parse error: {str(e)}"
-        }]
-def generate_todolist_from_text(text_input: str, message_id: str = "user_input_001"):
-    if not PROMPT_TEMPLATE_CONTENT or "Error:" in PROMPT_TEMPLATE_CONTENT:
-        return [["error", "Prompt template not loaded", "-"]]
-    current_time_iso = datetime.now(timezone.utc).isoformat()
-    content_escaped = text_input.replace('{', '{{').replace('}', '}}')
-    formatted_prompt = PROMPT_TEMPLATE_CONTENT.format(
-        true_positive_examples=TRUE_POSITIVE_EXAMPLES_CONTENT,
-        false_positive_examples=FALSE_POSITIVE_EXAMPLES_CONTENT,
-        current_time=current_time_iso,
-        message_id=message_id,
-        content_escaped=content_escaped
-    )
-    enhanced_prompt = formatted_prompt + """
-# 重要提示
-请确保你的回复是有效的JSON格式，并且只包含JSON内容。不要添加任何额外的解释或文本。
-你的回复应该严格按照上面的输出示例格式，只包含JSON对象，不要有任何其他文本。
-"""
-    llm_messages = [
-        {"role": "user", "content": enhanced_prompt}
-    ]
-    try:
-        if ("充值" in text_input or "缴费" in text_input) and ("移动" in text_input or "话费" in text_input or "余额" in text_input):
-            todo_item = {
-                message_id: {
-                    "is_todo": True,
-                    "end_time": (datetime.now(timezone.utc) + timedelta(days=3)).isoformat(),
-                    "location": "线上:中国移动APP",
-                    "todo_content": "缴纳话费",
-                    "urgency": "important"
-                }
-            }
-            todo_content = "缴纳话费"
-            end_time = todo_item[message_id]["end_time"].split("T")[0]
-            location = todo_item[message_id]["location"]
-            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
-            output_for_df = []
-            output_for_df.append([1, combined_content, "重要"])
-            return output_for_df
-        elif "会议" in text_input and ("邀请" in text_input or "参加" in text_input):
-            meeting_time = None
-            meeting_pattern = r'(\d{1,2}[月/-]\d{1,2}[日号]?\s*\d{1,2}[点:]\d{0,2}|\d{4}[年/-]\d{1,2}[月/-]\d{1,2}[日号]?\s*\d{1,2}[点:]\d{0,2})'
-            meeting_match = re.search(meeting_pattern, text_input)
-            if meeting_match:
-                meeting_time = (datetime.now(timezone.utc) + timedelta(days=1, hours=2)).isoformat()
-            else:
-                meeting_time = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat()
-            todo_item = {
-                message_id: {
-                    "is_todo": True,
-                    "end_time": meeting_time,
-                    "location": "线上:会议软件",
-                    "todo_content": "参加会议",
-                    "urgency": "important"
-                }
-            }
-            todo_content = "参加会议"
-            end_time = todo_item[message_id]["end_time"].split("T")[0]
-            location = todo_item[message_id]["location"]
-            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
-            output_for_df = []
-            output_for_df.append([1, combined_content, "重要"])
-            return output_for_df
-        elif ("快递" in text_input or "物流" in text_input or "取件" in text_input) and ("到达" in text_input or "取件码" in text_input or "柜" in text_input):
-            pickup_code = None
-            code_pattern = r'取件码[是为:]?\s*(\d{4,6})'
-            code_match = re.search(code_pattern, text_input)
-            todo_content = "取快递"
-            if code_match:
-                pickup_code = code_match.group(1)
-                todo_content = f"取快递(取件码:{pickup_code})"
-            todo_item = {
-                message_id: {
-                    "is_todo": True,
-                    "end_time": (datetime.now(timezone.utc) + timedelta(days=2)).isoformat(),
-                    "location": "线下:快递柜",
-                    "todo_content": todo_content,
-                    "urgency": "important"
-                }
-            }
-            end_time = todo_item[message_id]["end_time"].split("T")[0]
-            location = todo_item[message_id]["location"]
-            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
-            output_for_df = []
-            output_for_df.append([1, combined_content, "重要"])
-            return output_for_df
-        if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
-            return [["error", "Filter API configuration incomplete", "-"]]
-        headers = {
-            "Authorization": f"Bearer {Filter_API_KEY}",
-            "Accept": "application/json"
-        }
-        payload = {
-            "model": Filter_MODEL_NAME,
-            "messages": llm_messages,
-            "temperature": 0.2,
-            "top_p": 0.95,
-            "max_tokens": 1024,
-            "stream": False
-        }
-        api_url = f"{Filter_API_BASE_URL}/chat/completions"
-        try:
-            response = requests.post(api_url, headers=headers, json=payload)
-            response.raise_for_status()
-            raw_llm_response = response.json()['choices'][0]['message']['content']
-        except requests.exceptions.RequestException as e:
-            return [["error", f"Filter API call failed: {e}", "-"]]
-        parsed_todos_data = json_parser(raw_llm_response)
-        if "error" in parsed_todos_data:
-            return [["error", f"LLM response parsing error: {parsed_todos_data['error']}", parsed_todos_data.get('raw_text', '')[:50] + "..."]]
-        output_for_df = []
-        if isinstance(parsed_todos_data, dict):
-            todo_info = None
-            for key, value in parsed_todos_data.items():
-                if key == message_id or key == str(message_id):
-                    todo_info = value
-                    break
-            if todo_info and isinstance(todo_info, dict) and todo_info.get("is_todo", False):
-                todo_content = todo_info.get("todo_content", "未指定待办内容")
-                end_time = todo_info.get("end_time")
-                location = todo_info.get("location")
-                urgency = todo_info.get("urgency", "unimportant")
-                combined_content = todo_content
-                if end_time and end_time != "null":
-                    try:
-                        date_part = end_time.split("T")[0] if "T" in end_time else end_time
-                        combined_content += f" (截止时间: {date_part}"
-                    except:
-                        combined_content += f" (截止时间: {end_time}"
-                else:
-                    combined_content += " ("
-                if location and location != "null":
-                    combined_content += f", 地点: {location})"
-                else:
-                    combined_content += ")"
-                urgency_display = "一般"
-                if urgency == "urgent":
-                    urgency_display = "紧急"
-                elif urgency == "important":
-                    urgency_display = "重要"
-                output_for_df = []
-                output_for_df.append([1, combined_content, urgency_display])
-            else:
-                output_for_df = []
-                output_for_df.append([1, "此消息不包含待办事项", "-"])
-        elif isinstance(parsed_todos_data, list):
-            output_for_df = []
-            if not parsed_todos_data:
-                return [[1, "未能生成待办事项", "-"]]
-            for i, item in enumerate(parsed_todos_data):
-                if isinstance(item, dict):
-                    todo_content = item.get('todo_content', item.get('content', 'N/A'))
-                    status = item.get('status', '未完成')
-                    urgency = item.get('urgency', 'normal')
-                    combined_content = todo_content
-                    if 'end_time' in item and item['end_time']:
-                        try:
-                            if isinstance(item['end_time'], str):
-                                date_part = item['end_time'].split("T")[0] if "T" in item['end_time'] else item['end_time']
-                                combined_content += f" (截止时间: {date_part}"
-                            else:
-                                combined_content += f" (截止时间: {str(item['end_time'])}"
-                        except Exception:
-                            combined_content += " ("
-                    else:
-                        combined_content += " ("
-                    if 'location' in item and item['location']:
-                        combined_content += f", 地点: {item['location']})"
-                    else:
-                        combined_content += ")"
-                    importance = "一般"
-                    if urgency == "urgent":
-                        importance = "紧急"
-                    elif urgency == "important":
-                        importance = "重要"
-                    output_for_df.append([i + 1, combined_content, importance])
-                else:
-                    try:
-                        item_str = str(item) if item is not None else "未知项目"
-                        output_for_df.append([i + 1, item_str, "一般"])
-                    except Exception:
-                        output_for_df.append([i + 1, "处理错误的项目", "一般"])
-        if not output_for_df:
-            return [["info", "未发现待办事项", "-"]]
-        return output_for_df
-    except Exception as e:
-        return [["error", f"LLM call/parse error: {str(e)}", "-"]]
-def process(audio, image):
-    if audio is not None:
-        sample_rate, audio_data = audio
-        audio_info = f"音频采样率: {sample_rate}Hz, 数据长度: {len(audio_data)}"
-    else:
-        audio_info = "未收到音频"
-    if image is not None:
-        image_info = f"图片尺寸: {image.shape}"
-    else:
-        image_info = "未收到图片"
-    return audio_info, image_info
-def respond(message, history, system_message, max_tokens, temperature, top_p, audio, image):
-    chat_messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            chat_messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            chat_messages.append({"role": "assistant", "content": val[1]})
-    chat_messages.append({"role": "user", "content": message})
-    chat_response_stream = ""
-    if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
-        yield "Filter API 配置不完整，无法提供聊天回复。", []
-        return
-    headers = {
-        "Authorization": f"Bearer {Filter_API_KEY}",
-        "Accept": "application/json"
-    }
-    payload = {
-        "model": Filter_MODEL_NAME,
-        "messages": chat_messages,
-        "temperature": temperature,
-        "top_p": top_p,
-        "max_tokens": max_tokens,
-        "stream": True
-    }
-    api_url = f"{Filter_API_BASE_URL}/chat/completions"
-    try:
-        response = requests.post(api_url, headers=headers, json=payload, stream=True)
-        response.raise_for_status()
-        for chunk in response.iter_content(chunk_size=None):
-            if chunk:
-                try:
-                    for line in chunk.decode('utf-8').splitlines():
-                        if line.startswith('data: '):
-                            json_data = line[len('data: '):]
-                            if json_data.strip() == '[DONE]':
-                                break
-                            data = json.loads(json_data)
-                            token = data['choices'][0]['delta'].get('content', '')
-                            if token:
-                                chat_response_stream += token
-                                yield chat_response_stream, []
-                except json.JSONDecodeError:
-                    pass
-                except Exception as e:
-                    yield chat_response_stream + f"\n\n错误: {e}", []
-    except requests.exceptions.RequestException as e:
-        yield f"调用 NVIDIA API 失败: {e}", []
-with gr.Blocks() as app:
-    gr.Markdown("# ToDoAgent Multi-Modal Interface with ToDo List")
-    with gr.Row():
-        with gr.Column(scale=2):
-            gr.Markdown("## Chat Interface")
-            chatbot = gr.Chatbot(height=450, label="聊天记录", type="messages")
-            msg = gr.Textbox(label="输入消息", placeholder="输入您的问题或待办事项...")
-            with gr.Row():
-                audio_input = gr.Audio(label="上传语音", type="numpy", sources=["upload", "microphone"])
-                image_input = gr.Image(label="上传图片", type="numpy")
-            with gr.Accordion("高级设置", open=False):
-                system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="系统提示")
-                max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="最大生成长度(聊天)")
-                temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="温度(聊天)")
-                top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p(聊天)")
-            with gr.Row():
-                submit_btn = gr.Button("发送", variant="primary")
-                clear_btn = gr.Button("清除聊天和ToDo")
-        with gr.Column(scale=1):
-            gr.Markdown("## Generated ToDo List")
-            todolist_df = gr.DataFrame(headers=["ID", "任务内容", "状态"],
-                                       datatype=["number", "str", "str"],
-                                       row_count=(0, "dynamic"),
-                                       col_count=(3, "fixed"),
-                                       label="待办事项列表")
-    def handle_submit(user_msg_content, ch_history, sys_msg, max_t, temp, t_p, audio_f, image_f):
-        if not ch_history: ch_history = []
-        ch_history.append({"role": "user", "content": user_msg_content})
-        yield ch_history, []
-        formatted_hist_for_respond = []
-        temp_user_msg_for_hist = None
-        for item_hist in ch_history[:-1]:
-            if item_hist["role"] == "user":
-                temp_user_msg_for_hist = item_hist["content"]
-            elif item_hist["role"] == "assistant" and temp_user_msg_for_hist is not None:
-                formatted_hist_for_respond.append((temp_user_msg_for_hist, item_hist["content"]))
-                temp_user_msg_for_hist = None
-            elif item_hist["role"] == "assistant" and temp_user_msg_for_hist is None:
-                formatted_hist_for_respond.append(("", item_hist["content"]))
-        ch_history.append({"role": "assistant", "content": ""})
-        full_bot_response = ""
-        for bot_response_token, _ in respond(user_msg_content, formatted_hist_for_respond, sys_msg, max_t, temp, t_p, audio_f, image_f):
-            full_bot_response = bot_response_token
-            ch_history[-1]["content"] = full_bot_response
-            yield ch_history, []
-        text_for_todo = user_msg_content
-        current_todos_list = []
-        filtered_result = filter_message_with_llm(text_for_todo)
-        if isinstance(filtered_result, dict) and "error" in filtered_result:
-            current_todos_list = [["Error", filtered_result['error'], "Filter Failed"]]
-        elif isinstance(filtered_result, dict) and filtered_result.get("分类") == "其他":
-            current_todos_list = [["Info", "消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
-        elif isinstance(filtered_result, list):
-            category = None
-            if not filtered_result:
-                if text_for_todo:
-                    msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
-                    current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
-                yield ch_history, current_todos_list
-                return
-            valid_item = None
-            for item in filtered_result:
-                if isinstance(item, dict):
-                    valid_item = item
-                    if "分类" in item:
-                        category = item["分类"]
-                        break
-            if valid_item is None:
-                if text_for_todo:
-                    msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
-                    current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
-                yield ch_history, current_todos_list
-                return
-            if category == "其他":
-                current_todos_list = [["Info", "消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
-            else:
-                if text_for_todo:
-                    msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
-                    current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
-        else:
-            if text_for_todo:
-                msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
-                current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
-        yield ch_history, current_todos_list
-    submit_btn.click(
-        handle_submit,
-        [msg, chatbot, system_msg, max_tokens_slider, temperature_slider, top_p_slider, audio_input, image_input],
-        [chatbot, todolist_df]
-    )
-    msg.submit(
-        handle_submit,
-        [msg, chatbot, system_msg, max_tokens_slider, temperature_slider, top_p_slider, audio_input, image_input],
-        [chatbot, todolist_df]
-    )
-    def clear_all():
-        return None, None, ""
-    clear_btn.click(clear_all, None, [chatbot, todolist_df, msg], queue=False)
-    with gr.Tab("Audio/Image Processing (Original)"):
-        gr.Markdown("## 处理音频和图片")
-        audio_processor = gr.Audio(label="上传音频", type="numpy")
-        image_processor = gr.Image(label="上传图片", type="numpy")
-        process_btn = gr.Button("处理", variant="primary")
-        audio_output = gr.Textbox(label="音频信息")
-        image_output = gr.Textbox(label="图片信息")
-        process_btn.click(
-            process,
-            inputs=[audio_processor, image_processor],
-            outputs=[audio_output, image_output]
-        )
-if __name__ == "__main__":
-    app.launch(debug=False)

+import gradio as gr
+import json
+from pathlib import Path
+import yaml
+import re
+import logging
+import io
+import sys
+import os
+import re
+from datetime import datetime, timezone, timedelta
+import requests
+from tools import FileUploader, ResultExtractor, audio_to_str, image_to_str, azure_speech_to_text #gege的多模态
+import numpy as np
+from scipy.io.wavfile import write as write_wav
+from PIL import Image
+# 指定保存文件的相对路径
+SAVE_DIR = 'download'  # 相对路径
+os.makedirs(SAVE_DIR, exist_ok=True)  # 确保目录存在
+def save_audio(audio, filename):
+    """保存音频为.wav文件"""
+    sample_rate, audio_data = audio
+    write_wav(filename, sample_rate, audio_data)
+def save_image(image, filename):
+    """保存图片为.jpg文件"""
+    img = Image.fromarray(image.astype('uint8'))
+    img.save(filename)
+# --- IP获取功能 (从 se_app.py 迁移) ---
+def get_client_ip(request: gr.Request, debug_mode=False):
+    """获取客户端真实IP地址"""
+    if request:
+        # 从请求头中获取真实IP（考虑代理情况）
+        x_forwarded_for = request.headers.get("x-forwarded-for", "")
+        if x_forwarded_for:
+            client_ip = x_forwarded_for.split(",")[0]
+        else:
+            client_ip = request.client.host
+        if debug_mode:
+            print(f"Debug: Client IP detected as {client_ip}")
+        return client_ip
+    return "unknown"
+# --- 配置加载 (从 config_loader.py 迁移并简化) ---
+CONFIG = None
+HF_CONFIG_PATH = Path(__file__).parent / "todogen_LLM_config.yaml"
+def load_hf_config():
+    global CONFIG
+    if CONFIG is None:
+        try:
+            with open(HF_CONFIG_PATH, 'r', encoding='utf-8') as f:
+                CONFIG = yaml.safe_load(f)
+            print(f"✅ 配置已加载: {HF_CONFIG_PATH}")
+        except FileNotFoundError:
+            print(f"❌ 错误: 配置文件 {HF_CONFIG_PATH} 未找到。请确保它在 hf 目录下。")
+            CONFIG = {} # 提供一个空配置以避免后续错误
+        except Exception as e:
+            print(f"❌ 加载配置文件 {HF_CONFIG_PATH} 时出错: {e}")
+            CONFIG = {}
+    return CONFIG
+def get_hf_openai_config():
+    config = load_hf_config()
+    return config.get('openai', {})
+def get_hf_openai_filter_config():
+    config = load_hf_config()
+    return config.get('openai_filter', {})
+def get_hf_xunfei_config():
+    config = load_hf_config()
+    return config.get('xunfei', {})
+def get_hf_azure_speech_config():
+    config = load_hf_config()
+    return config.get('azure_speech', {})
+def get_hf_paths_config():
+    config = load_hf_config()
+    # 在hf环境下，路径相对于hf目录
+    base = Path(__file__).resolve().parent
+    paths_cfg = config.get('paths', {})
+    return {
+        'base_dir': base,
+        'prompt_template': base / paths_cfg.get('prompt_template', 'prompt_template.txt'),
+        'true_positive_examples': base / paths_cfg.get('true_positive_examples', 'TruePositive_few_shot.txt'),
+        'false_positive_examples': base / paths_cfg.get('false_positive_examples', 'FalsePositive_few_shot.txt'),
+        # data_dir 和 logging_dir 在 app.py 中可能用途不大，除非需要保存 LLM 输出
+    }
+# --- LLM Client 初始化 (使用 NVIDIA API) ---
+# 从配置加载 NVIDIA API 的 base_url, api_key 和 model
+llm_config = get_hf_openai_config()
+NVIDIA_API_BASE_URL = llm_config.get('base_url')
+NVIDIA_API_KEY = llm_config.get('api_key')
+NVIDIA_MODEL_NAME = llm_config.get('model')
+# 从配置加载 Filter API 的 base_url, api_key 和 model
+filter_config = get_hf_openai_filter_config()
+Filter_API_BASE_URL = filter_config.get('base_url_filter')
+Filter_API_KEY = filter_config.get('api_key_filter')
+Filter_MODEL_NAME = filter_config.get('model_filter')
+if not NVIDIA_API_BASE_URL or not NVIDIA_API_KEY or not NVIDIA_MODEL_NAME:
+    print("❌ 错误: NVIDIA API 配置不完整。请检查 todogen_LLM_config.yaml 中的 openai 部分。")
+    # 提供默认值或退出，以便程序可以继续运行，但LLM调用会失败
+    NVIDIA_API_BASE_URL = ""
+    NVIDIA_API_KEY = ""
+    NVIDIA_MODEL_NAME = ""
+if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+    print("❌ 错误: Filter API 配置不完整。请检查 todogen_LLM_config.yaml 中的 openai_filter 部分。")
+    # 提供默认值或退出，以便程序可以继续运行，但Filter LLM调用会失败
+    Filter_API_BASE_URL = ""
+    Filter_API_KEY = ""
+    Filter_MODEL_NAME = ""
+# --- 日志配置 (简化版) ---
+# 修正后的标准流编码设置 (如果需要，但 Gradio 通常处理自己的输出)
+# sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', write_through=True)
+# sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', write_through=True)
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# --- Prompt 和 Few-Shot 加载 (从 todogen_llm.py 迁移并适配) ---
+def load_single_few_shot_file_hf(file_path: Path) -> str:
+    """加载单个 few-shot 文件并转义 { 和 }"""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+            escaped_content = content.replace('{', '{{').replace('}', '}}')
+            logger.info(f"✅ 成功加载并转义文件: {file_path}")
+            return escaped_content
+    except FileNotFoundError:
+        logger.warning(f"⚠️ 警告：找不到文件 {file_path}。")
+        return ""
+    except Exception as e:
+        logger.error(f"❌ 加载文件 {file_path} 时出错: {e}", exc_info=True)
+        return ""
+PROMPT_TEMPLATE_CONTENT = ""
+TRUE_POSITIVE_EXAMPLES_CONTENT = ""
+FALSE_POSITIVE_EXAMPLES_CONTENT = ""
+def load_prompt_data_hf():
+    global PROMPT_TEMPLATE_CONTENT, TRUE_POSITIVE_EXAMPLES_CONTENT, FALSE_POSITIVE_EXAMPLES_CONTENT
+    paths = get_hf_paths_config()
+    try:
+        with open(paths['prompt_template'], 'r', encoding='utf-8') as f:
+            PROMPT_TEMPLATE_CONTENT = f.read()
+            logger.info(f"✅ 成功加载 Prompt 模板文件: {paths['prompt_template']}")
+    except FileNotFoundError:
+        logger.error(f"❌ 错误：找不到 Prompt 模板文件：{paths['prompt_template']}")
+        PROMPT_TEMPLATE_CONTENT = "Error: Prompt template not found."
+    TRUE_POSITIVE_EXAMPLES_CONTENT = load_single_few_shot_file_hf(paths['true_positive_examples'])
+    FALSE_POSITIVE_EXAMPLES_CONTENT = load_single_few_shot_file_hf(paths['false_positive_examples'])
+# 应用启动时加载 prompts
+load_prompt_data_hf()
+# --- JSON 解析器 (从 todogen_llm.py 迁移) ---
+def json_parser(text: str) -> dict:
+    # 改进的JSON解析器，更健壮地处理各种格式
+    logger.info(f"Attempting to parse: {text[:200]}...")
+    try:
+        # 1. 尝试直接将整个文本作为JSON解析
+        try:
+            parsed_data = json.loads(text)
+            # 使用_process_parsed_json处理解析结果
+            return _process_parsed_json(parsed_data)
+        except json.JSONDecodeError:
+            pass # 如果直接解析失败，继续尝试提取代码块
+        # 2. 尝试从 ```json ... ``` 代码块中提取和解析
+        match = re.search(r'```(?:json)?\n(.*?)```', text, re.DOTALL)
+        if match:
+            json_str = match.group(1).strip()
+            # 修复常见的JSON格式问题
+            json_str = re.sub(r',\s*]', ']', json_str)
+            json_str = re.sub(r',\s*}', '}', json_str)
+            try:
+                parsed_data = json.loads(json_str)
+                # 使用_process_parsed_json处理解析结果
+                return _process_parsed_json(parsed_data)
+            except json.JSONDecodeError as e_block:
+                logger.warning(f"JSONDecodeError from code block: {e_block} while parsing: {json_str[:200]}")
+                # 如果从代码块解析也失败，则继续
+        # 3. 尝试查找最外层的 '{...}' 或 '[...]' 作为JSON
+        # 先尝试查找数组格式 [...]
+        array_match = re.search(r'\[\s*\{.*?\}\s*(?:,\s*\{.*?\}\s*)*\]', text, re.DOTALL)
+        if array_match:
+            potential_json = array_match.group(0).strip()
+            try:
+                parsed_data = json.loads(potential_json)
+                # 使用_process_parsed_json处理解析结果
+                return _process_parsed_json(parsed_data)
+            except json.JSONDecodeError:
+                logger.warning(f"Could not parse potential JSON array: {potential_json[:200]}")
+                pass
+        # 再尝试查找单个对象格式 {...}
+        object_match = re.search(r'\{.*?\}', text, re.DOTALL)
+        if object_match:
+            potential_json = object_match.group(0).strip()
+            try:
+                parsed_data = json.loads(potential_json)
+                # 使用_process_parsed_json处理解析结果
+                return _process_parsed_json(parsed_data)
+            except json.JSONDecodeError:
+                logger.warning(f"Could not parse potential JSON object: {potential_json[:200]}")
+                pass
+        # 4. 如果所有尝试都失败，返回错误信息
+        logger.error(f"Failed to find or parse JSON block in text: {text[:500]}") # 增加日志长度
+        return {"error": "No valid JSON block found or failed to parse", "raw_text": text}
+    except Exception as e: # 捕获所有其他意外错误
+        logger.error(f"Unexpected error in json_parser: {e} for text: {text[:200]}", exc_info=True)
+        return {"error": f"Unexpected error in json_parser: {e}", "raw_text": text}
+def _process_parsed_json(parsed_data):
+    """处理解析后的JSON数据，确保返回有效的数据结构"""
+    try:
+        # 如果解析结果是空列表，���回包含空字典的列表
+        if isinstance(parsed_data, list):
+            if not parsed_data:
+                logger.warning("JSON解析结果为空列表，返回包含空字典的列表")
+                return [{}]
+            # 确保列表中的每个元素都是字典
+            processed_list = []
+            for item in parsed_data:
+                if isinstance(item, dict):
+                    processed_list.append(item)
+                else:
+                    # 如果不是字典，将其转换为字典
+                    try:
+                        processed_list.append({"content": str(item)})
+                    except:
+                        processed_list.append({"content": "无法转换的项目"})
+            # 如果处理后的列表为空，返回包含空字典的列表
+            if not processed_list:
+                logger.warning("处理后的JSON列表为空，返回包含空字典的列表")
+                return [{}]
+            return processed_list
+        # 如果是字典，直接返回
+        elif isinstance(parsed_data, dict):
+            return parsed_data
+        # 如果是其他类型，转换为字典
+        else:
+            logger.warning(f"JSON解析结果不是列表或字典，而是{type(parsed_data)}，转换为字典")
+            return {"content": str(parsed_data)}
+    except Exception as e:
+        logger.error(f"处理解析后的JSON数据时出错: {e}")
+        return {"error": f"Error processing parsed JSON: {e}"}
+# --- Filter 模块的 System Prompt (从 filter_message/libs.py 迁移) ---
+FILTER_SYSTEM_PROMPT = """
+# 角色
+你是一个专业的短信内容分析助手，根据输入判断内容的类型及可信度，为用户使用信息提供依据和便利。
+# 任务
+对于输入的多条数据，分析每一条数据内容（主键：`message_id`）属于【物流取件、缴费充值、待付(还)款、会议邀约、其他】的可能性百分比。
+主要对于聊天、问候、回执、结果通知、上月账单等信息不需要收件人进行下一步处理的信息，直接归到其他类进行忽略
+# 要求
+1. 以json格式输出
+2. content简洁提炼关键词，字符数<20以内
+3. 输入条数和输出条数完全一样
+# 输出示例
+```
+[
+    {"message_id":"1111111","content":"账单805.57元待还","物流取件":0,"欠费缴纳":99,"待付(还)款":1,"会议邀约":0,"其他":0, "分类":"欠费缴纳"},
+    {"message_id":"222222","content":"邀请你加入飞书视频会议","物流取件":0,"欠费缴纳":0,"待付(还)款":1,"会议邀约":100,"其他":0, "分类":"会议"}
+]
+```
+"""
+# --- Filter 核心逻辑 (从ToDoAgent集成) ---
+def filter_message_with_llm(text_input: str, message_id: str = "user_input_001"):
+    logger.info(f"调用 filter_message_with_llm 处理输入: {text_input} (msg_id: {message_id})")
+    # 构造发送给 LLM 的消息
+    # filter 模块的 send_llm_with_prompt 接收的是 tuple[tuple] 格式的数据
+    # 这里我们只有一个文本输入，需要模拟成那种格式
+    mock_data = [(text_input, message_id)]
+    # 使用与ToDoAgent相同的system prompt
+    system_prompt = """
+    # 角色
+    你是一个专业的短信内容分析助手，根据输入判断内容的类型及可信度，为用户使用信息提供依据和便利。
+    # 任务
+    对于输入的多条数据，分析每一条数据内容（主键：`message_id`）属于【物流取件、缴费充值、待付(还)款、会议邀约、其他】的可能性百分比。
+    主要对于聊天、问候、回执、结果通知、上月账单等信息不需要收件人进行下一步处理的信息，直接归到其他类进行忽略
+    # 要求
+    1. 以json格式输出
+    2. content简洁提炼关键词，字符数<20以内
+    3. 输入条数和输出条数完全一样
+    # 输出示例
+    ```
+    [
+        {"message_id":"1111111","content":"账单805.57元待还","物流取件":0,"欠费缴纳":99,"待付(还)款":1,"会议邀约":0,"其他":0, "分类":"欠费缴纳"},
+        {"message_id":"222222","content":"邀请你加入飞书视频会议","物流取件":0,"欠费缴纳":0,"待付(还)款":1,"会议邀约":100,"其他":0, "分类":"会议邀约"}
+    ]
+    ```
+    """
+    llm_messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": str(mock_data)}
+    ]
+    try:
+        if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+            logger.error("Filter API 配置不完整，无法调用 Filter LLM。")
+            return [{"error": "Filter API configuration incomplete", "-": "-"}]
+        headers = {
+            "Authorization": f"Bearer {Filter_API_KEY}",
+            "Accept": "application/json"
+        }
+        payload = {
+            "model": Filter_MODEL_NAME,
+            "messages": llm_messages,
+            "temperature": 0.0,  # 为提高准确率，温度为0（与ToDoAgent一致）
+            "top_p": 0.95,
+            "max_tokens": 1024,
+            "stream": False
+        }
+        api_url = f"{Filter_API_BASE_URL}/chat/completions"
+        try:
+            response = requests.post(api_url, headers=headers, json=payload)
+            response.raise_for_status() # 检查 HTTP 错误
+            raw_llm_response = response.json()["choices"][0]["message"]["content"]
+            logger.info(f"LLM 原始回复 (部分): {raw_llm_response[:200]}...")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"调用 Filter API 失败: {e}")
+            return [{"error": f"Filter API call failed: {e}", "-": "-"}]
+        logger.info(f"Filter LLM 原始回复 (部分): {raw_llm_response[:200]}...")
+        # 解析 LLM 响应
+        # 移除可能的代码块标记
+        raw_llm_response = raw_llm_response.replace("```json", "").replace("```", "")
+        parsed_filter_data = json_parser(raw_llm_response)
+        if "error" in parsed_filter_data:
+            logger.error(f"解析 Filter LLM 响应失败: {parsed_filter_data['error']}")
+            return [{"error": f"Filter LLM response parsing error: {parsed_filter_data['error']}"}]
+        # 返回解析后的数据
+        if isinstance(parsed_filter_data, list) and parsed_filter_data:
+            # 应用规则：如果分类是欠费缴纳且内容包含"缴费支出"，归类为"其他"
+            for item in parsed_filter_data:
+                if isinstance(item, dict) and item.get("分类") == "欠费缴纳" and "缴费支出" in item.get("content", ""):
+                    item["分类"] = "其他"
+            # 检查是否有遗漏的消息ID（ToDoAgent的补充逻辑）
+            request_id_list = {message_id}
+            response_id_list = {item.get('message_id') for item in parsed_filter_data if isinstance(item, dict)}
+            diff = request_id_list - response_id_list
+            if diff:
+                logger.warning(f"Filter LLM 响应中有遗漏的消息ID: {diff}")
+                # 对于遗漏的消息，添加一个默认分类为"其他"的项
+                for missed_id in diff:
+                    parsed_filter_data.append({
+                        "message_id": missed_id,
+                        "content": text_input[:20],  # 截取前20个字符作为content
+                        "物流取件": 0,
+                        "欠费缴纳": 0,
+                        "待付(还)款": 0,
+                        "会议邀约": 0,
+                        "其他": 100,
+                        "分类": "其他"
+                    })
+            return parsed_filter_data
+        else:
+            logger.warning(f"Filter LLM 返回空列表或非预期格式: {parsed_filter_data}")
+            # 返回默认分类为"其他"的项
+            return [{
+                "message_id": message_id,
+                "content": text_input[:20],  # 截取前20个字符作为content
+                "物流取件": 0,
+                "欠费缴纳": 0,
+                "待付(还)款": 0,
+                "会议邀约": 0,
+                "其他": 100,
+                "分类": "其他",
+                "error": "Filter LLM returned empty or unexpected format"
+            }]
+    except Exception as e:
+        logger.exception(f"调用 Filter LLM 或解析时发生错误 (filter_message_with_llm)")
+        return [{
+            "message_id": message_id,
+            "content": text_input[:20],  # 截取前20个字符作为content
+            "物流取件": 0,
+            "欠费缴纳": 0,
+            "待付(还)款": 0,
+            "会议邀约": 0,
+            "其他": 100,
+            "分类": "其他",
+            "error": f"Filter LLM call/parse error: {str(e)}"
+        }]
+# --- ToDo List 生成核心逻辑 (使用迁移的代码) ---
+def generate_todolist_from_text(text_input: str, message_id: str = "user_input_001"):
+    """根据输入文本生成 ToDoList (使用迁移的逻辑)"""
+    logger.info(f"调用 generate_todolist_from_text 处理输入: {text_input} (msg_id: {message_id})")
+    if not PROMPT_TEMPLATE_CONTENT or "Error:" in PROMPT_TEMPLATE_CONTENT:
+        logger.error("Prompt 模板未正确加载，无法生成 ToDoList。")
+        return [["error", "Prompt template not loaded", "-"]]
+    current_time_iso = datetime.now(timezone.utc).isoformat()
+    # 转义输入内容中的 { 和 }
+    content_escaped = text_input.replace('{', '{{').replace('}', '}}')
+    # 构造 prompt
+    formatted_prompt = PROMPT_TEMPLATE_CONTENT.format(
+        true_positive_examples=TRUE_POSITIVE_EXAMPLES_CONTENT,
+        false_positive_examples=FALSE_POSITIVE_EXAMPLES_CONTENT,
+        current_time=current_time_iso,
+        message_id=message_id,
+        content_escaped=content_escaped
+    )
+    # 添加明确的JSON输出指令
+    enhanced_prompt = formatted_prompt + """
+# 重要提示
+请确保你的回复是有效的JSON格式，并且只包含JSON内容。不要添加任何额外的解释或文本。
+你的回复应该严格按照上面的输出示例格式，只包含JSON对象，不要有任何其他文本。
+"""
+    # 构造发送给 LLM 的消息
+    llm_messages = [
+        {"role": "user", "content": enhanced_prompt}
+    ]
+    logger.info(f"发送给 LLM 的消息 (部分): {str(llm_messages)[:300]}...")
+    try:
+        # 根据输入文本智能生成 ToDo List
+        # 如果是移动话费充值提醒类消息
+        if ("充值" in text_input or "缴费" in text_input) and ("移动" in text_input or "话费" in text_input or "余额" in text_input):
+            # 直接生成待办事项，不调用API
+            todo_item = {
+                message_id: {
+                    "is_todo": True,
+                    "end_time": (datetime.now(timezone.utc) + timedelta(days=3)).isoformat(),
+                    "location": "线上:中国移动APP",
+                    "todo_content": "缴纳话费",
+                    "urgency": "important"
+                }
+            }
+            # 转换为表格显示格式 - 合并为一行
+            todo_content = "缴纳话费"
+            end_time = todo_item[message_id]["end_time"].split("T")[0]
+            location = todo_item[message_id]["location"]
+            # 合并所有信息到任务内容中
+            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
+            output_for_df = []
+            output_for_df.append([1, combined_content, "重要"])
+            return output_for_df
+        # 如果是会议邀约类消息
+        elif "会议" in text_input and ("邀请" in text_input or "参加" in text_input):
+            # 提取可能的会议时间
+            meeting_time = None
+            meeting_pattern = r'(\d{1,2}[月/-]\d{1,2}[日号]?\s*\d{1,2}[点:]\d{0,2}|\d{4}[年/-]\d{1,2}[月/-]\d{1,2}[日号]?\s*\d{1,2}[点:]\d{0,2})'
+            meeting_match = re.search(meeting_pattern, text_input)
+            if meeting_match:
+                # 简单处理，实际应用中应该更精确地解析日期时间
+                meeting_time = (datetime.now(timezone.utc) + timedelta(days=1, hours=2)).isoformat()
+            else:
+                meeting_time = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat()
+            todo_item = {
+                message_id: {
+                    "is_todo": True,
+                    "end_time": meeting_time,
+                    "location": "线上:会议软件",
+                    "todo_content": "参加会议",
+                    "urgency": "important"
+                }
+            }
+            # 转换为表格显示格式 - 合并为一行
+            todo_content = "参加会议"
+            end_time = todo_item[message_id]["end_time"].split("T")[0]
+            location = todo_item[message_id]["location"]
+            # 合并所有信息到任务内容中
+            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
+            output_for_df = []
+            output_for_df.append([1, combined_content, "重要"])
+            return output_for_df
+        # 如果是物流取件类消息
+        elif ("快递" in text_input or "物流" in text_input or "取件" in text_input) and ("到达" in text_input or "取件码" in text_input or "柜" in text_input):
+            # 提取可能的取件码
+            pickup_code = None
+            code_pattern = r'取件码[是为:]?\s*(\d{4,6})'
+            code_match = re.search(code_pattern, text_input)
+            todo_content = "取快递"
+            if code_match:
+                pickup_code = code_match.group(1)
+                todo_content = f"取快递(取件码:{pickup_code})"
+            todo_item = {
+                message_id: {
+                    "is_todo": True,
+                    "end_time": (datetime.now(timezone.utc) + timedelta(days=2)).isoformat(),
+                    "location": "线下:快递柜",
+                    "todo_content": todo_content,
+                    "urgency": "important"
+                }
+            }
+            # 转换为表格显示格式 - 合并为一行
+            end_time = todo_item[message_id]["end_time"].split("T")[0]
+            location = todo_item[message_id]["location"]
+            # 合并所有信息到任务内容中
+            combined_content = f"{todo_content} (截止时间: {end_time}, 地���: {location})"
+            output_for_df = []
+            output_for_df.append([1, combined_content, "重要"])
+            return output_for_df
+        # 对于其他类型的消息，调用LLM API进行处理
+        if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+            logger.error("Filter API 配置不完整，无法调用 Filter LLM。")
+            return [["error", "Filter API configuration incomplete", "-"]]
+        headers = {
+            "Authorization": f"Bearer {Filter_API_KEY}",
+            "Accept": "application/json"
+        }
+        payload = {
+            "model": Filter_MODEL_NAME,
+            "messages": llm_messages,
+            "temperature": 0.2,  # 降低温度以提高一致性
+            "top_p": 0.95,
+            "max_tokens": 1024,
+            "stream": False
+        }
+        api_url = f"{Filter_API_BASE_URL}/chat/completions"
+        try:
+            response = requests.post(api_url, headers=headers, json=payload)
+            response.raise_for_status() # 检查 HTTP 错误
+            raw_llm_response = response.json()['choices'][0]['message']['content']
+            logger.info(f"LLM 原始回复 (部分): {raw_llm_response[:200]}...")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"调用 Filter API 失败: {e}")
+            return [["error", f"Filter API call failed: {e}", "-"]]
+        # 解析 LLM 响应
+        parsed_todos_data = json_parser(raw_llm_response)
+        if "error" in parsed_todos_data:
+            logger.error(f"解析 LLM 响应失败: {parsed_todos_data['error']}")
+            return [["error", f"LLM response parsing error: {parsed_todos_data['error']}", parsed_todos_data.get('raw_text', '')[:50] + "..."]]
+        # 处理解析后的数据
+        output_for_df = []
+        # 如果是字典格式（符合prompt模板输出格式）
+        if isinstance(parsed_todos_data, dict):
+            # 获取消息ID对应的待办信息
+            todo_info = None
+            for key, value in parsed_todos_data.items():
+                if key == message_id or key == str(message_id):
+                    todo_info = value
+                    break
+            if todo_info and isinstance(todo_info, dict) and todo_info.get("is_todo", False):
+                # 提取待办信息
+                todo_content = todo_info.get("todo_content", "未指定待办内容")
+                end_time = todo_info.get("end_time")
+                location = todo_info.get("location")
+                urgency = todo_info.get("urgency", "unimportant")
+                # 准备合并显示的内容
+                combined_content = todo_content
+                # 添加截止时间
+                if end_time and end_time != "null":
+                    try:
+                        date_part = end_time.split("T")[0] if "T" in end_time else end_time
+                        combined_content += f" (截止时间: {date_part}"
+                    except:
+                        combined_content += f" (截止时间: {end_time}"
+                else:
+                    combined_content += " ("
+                # 添加地点
+                if location and location != "null":
+                    combined_content += f", 地点: {location})"
+                else:
+                    combined_content += ")"
+                # 添加紧急程度
+                urgency_display = "一般"
+                if urgency == "urgent":
+                    urgency_display = "紧急"
+                elif urgency == "important":
+                    urgency_display = "重要"
+                # 创建单行输出
+                output_for_df = []
+                output_for_df.append([1, combined_content, urgency_display])
+            else:
+                # 不是待办事项
+                output_for_df = []
+                output_for_df.append([1, "此消息不包含待办事项", "-"])
+        # 如果是旧格式（列表格式）
+        elif isinstance(parsed_todos_data, list):
+            output_for_df = []
+            # 检查列表是否为空
+            if not parsed_todos_data:
+                logger.warning("LLM 返回了空列表，无法生成 ToDo 项目")
+                return [[1, "未能生成待办事项", "-"]]
+            for i, item in enumerate(parsed_todos_data):
+                if isinstance(item, dict):
+                    todo_content = item.get('todo_content', item.get('content', 'N/A'))
+                    status = item.get('status', '未完成')
+                    urgency = item.get('urgency', 'normal')
+                    # 合并所有信息到一行
+                    combined_content = todo_content
+                    # 添加截止时间
+                    if 'end_time' in item and item['end_time']:
+                        try:
+                            if isinstance(item['end_time'], str):
+                                date_part = item['end_time'].split("T")[0] if "T" in item['end_time'] else item['end_time']
+                                combined_content += f" (截止时间: {date_part}"
+                            else:
+                                combined_content += f" (截止时间: {str(item['end_time'])}"
+                        except Exception as e:
+                            logger.warning(f"处理end_time时出错: {e}")
+                            combined_content += " ("
+                    else:
+                        combined_content += " ("
+                    # 添加地点
+                    if 'location' in item and item['location']:
+                        combined_content += f", 地点: {item['location']})"
+                    else:
+                        combined_content += ")"
+                    # 设置重要等级
+                    importance = "一般"
+                    if urgency == "urgent":
+                        importance = "紧急"
+                    elif urgency == "important":
+                        importance = "重要"
+                    output_for_df.append([i + 1, combined_content, importance])
+                else:
+                    # 如果不是字典，转换为字符串并添加到列表
+                    try:
+                        item_str = str(item) if item is not None else "未知项目"
+                        output_for_df.append([i + 1, item_str, "一般"])
+                    except Exception as e:
+                        logger.warning(f"处理非字典项目时出错: {e}")
+                        output_for_df.append([i + 1, "处理错误的项目", "一般"])
+        if not output_for_df:
+            logger.info("LLM 解析结果为空或无法转换为DataFrame格式。")
+            return [["info", "未发现待办事项", "-"]]
+        return output_for_df
+    except Exception as e:
+        logger.exception(f"调用 LLM 或解析时发生错误 (generate_todolist_from_text)")
+        return [["error", f"LLM call/parse error: {str(e)}", "-"]]
+#gradio
+def process(audio, image, request: gr.Request):
+    """处理语音和图片的示例函数"""
+    # 获取并记录客户端IP
+    client_ip = get_client_ip(request, True)
+    print(f"Processing audio/image request from IP: {client_ip}")
+    if audio is not None:
+        sample_rate, audio_data = audio
+        audio_info = f"音频采样率: {sample_rate}Hz, 数据长度: {len(audio_data)}"
+    else:
+        audio_info = "未收到音频"
+    if image is not None:
+        image_info = f"图片尺寸: {image.shape}"
+    else:
+        image_info = "未收到图片"
+    return audio_info, image_info
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    audio, # 多模态输入：音频
+    image  # 多模态输入：图片
+):
+    # ... (聊天回复逻辑基本保持不变, 但确保 client 使用的是配置好的 HF client)
+    # 1. 多模态处理接口 (其他人负责)
+    # processed_text_from_multimodal = multimodal_placeholder_function(audio, image)
+    # 多模态处理：调用讯飞API进行语音和图像识别
+    multimodal_content = ""
+    # 多模态处理配置已移至具体处理部分
+    if audio is not None:
+        try:
+            audio_sample_rate, audio_data = audio
+            multimodal_content += f"\n[音频信息: 采样率 {audio_sample_rate}Hz, 时长 {len(audio_data)/audio_sample_rate:.2f}秒]"
+            # 调用Azure Speech语音识别
+            azure_speech_config = get_hf_azure_speech_config()
+            azure_speech_key = azure_speech_config.get('key')
+            azure_speech_region = azure_speech_config.get('region')
+            if azure_speech_key and azure_speech_region:
+                import tempfile
+                import soundfile as sf
+                import os
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+                    sf.write(temp_audio.name, audio_data, audio_sample_rate)
+                    temp_audio_path = temp_audio.name
+                audio_text = azure_speech_to_text(azure_speech_key, azure_speech_region, temp_audio_path)
+                if audio_text:
+                    multimodal_content += f"\n[语音识别结果: {audio_text}]"
+                else:
+                    multimodal_content += "\n[语音识别失败]"
+                os.unlink(temp_audio_path)
+            else:
+                multimodal_content += "\n[Azure Speech API配置不完整，无法进行语音识别]"
+        except Exception as e:
+            multimodal_content += f"\n[音频处理错误: {str(e)}]"
+    if image is not None:
+        try:
+            multimodal_content += f"\n[图片信息: 尺寸 {image.shape}]"
+            # 调用讯飞图像识别
+            if xunfei_appid and xunfei_apikey and xunfei_apisecret:
+                import tempfile
+                from PIL import Image
+                import os
+                with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_image:
+                    if len(image.shape) == 3:  # RGB图像
+                        pil_image = Image.fromarray(image.astype('uint8'), 'RGB')
+                    else:  # 灰度图像
+                        pil_image = Image.fromarray(image.astype('uint8'), 'L')
+                    pil_image.save(temp_image.name, 'JPEG')
+                    temp_image_path = temp_image.name
+                image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=temp_image_path)
+                if image_text:
+                    multimodal_content += f"\n[图像识别结果: {image_text}]"
+                else:
+                    multimodal_content += "\n[图像识别失败]"
+                os.unlink(temp_image_path)
+            else:
+                multimodal_content += "\n[讯飞API配置不完整，无法进行图像识别]"
+        except Exception as e:
+            multimodal_content += f"\n[图像处理错误: {str(e)}]"
+    # 将多模态内容（或其处理结果）与用户文本消息结合
+    # combined_message = message
+    # if multimodal_content: # 如果有多模态内容，则附加
+    #     combined_message += "\n" + multimodal_content
+    # 为了聊天模型的连贯性，聊天部分可能只使用原始 message
+    # 而 ToDoList 生成则使用 combined_message
+    # 聊天回复生成
+    chat_messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            chat_messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            chat_messages.append({"role": "assistant", "content": val[1]})
+    chat_messages.append({"role": "user", "content": message}) # 聊天机器人使用原始消息
+    chat_response_stream = ""
+    if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+        logger.error("Filter API 配置不完整，无法调用 LLM333。")
+        yield "Filter API 配置不完整，无法提供聊天回复。", []
+        return
+    headers = {
+        "Authorization": f"Bearer {Filter_API_KEY}",
+        "Accept": "application/json"
+    }
+    payload = {
+        "model": Filter_MODEL_NAME,
+        "messages": chat_messages,
+        "temperature": temperature,
+        "top_p": top_p,
+        "max_tokens": max_tokens,
+        "stream": True # 聊天通常需要流式输出
+    }
+    api_url = f"{Filter_API_BASE_URL}/chat/completions"
+    try:
+        response = requests.post(api_url, headers=headers, json=payload, stream=True)
+        response.raise_for_status() # 检查 HTTP 错误
+        for chunk in response.iter_content(chunk_size=None):
+            if chunk:
+                try:
+                    # NVIDIA API 的流式输出是 SSE 格式，需要解析
+                    # 每一行以 'data: ' 开头，后面是 JSON
+                    for line in chunk.decode('utf-8').splitlines():
+                        if line.startswith('data: '):
+                            json_data = line[len('data: '):]
+                            if json_data.strip() == '[DONE]':
+                                break
+                            data = json.loads(json_data)
+                            # 检查 choices 列表是否存在且不为空
+                            if 'choices' in data and len(data['choices']) > 0:
+                                token = data['choices'][0]['delta'].get('content', '')
+                                if token:
+                                    chat_response_stream += token
+                                    yield chat_response_stream, []
+                except json.JSONDecodeError:
+                    logger.warning(f"无法解析流式响应块: {chunk.decode('utf-8')}")
+                except Exception as e:
+                    logger.error(f"处理流式响应时发生错误: {e}")
+                    yield chat_response_stream + f"\n\n错误: {e}", []
+    except requests.exceptions.RequestException as e:
+        logger.error(f"调用 NVIDIA API 失败: {e}")
+        yield f"调用 NVIDIA API 失败: {e}", []
+# 全局变量存储所有待办事项
+all_todos_global = []
+# 创建自定义的聊天界面
+with gr.Blocks() as app:
+    gr.Markdown("# ToDoAgent Multi-Modal Interface with ToDo List")
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("## Chat Interface")
+            chatbot = gr.Chatbot(height=450, label="聊天记录", type="messages") # 推荐使用 type="messages"
+            msg = gr.Textbox(label="输入消息", placeholder="输入您的问题或待办事项...")
+            with gr.Row():
+                audio_input = gr.Audio(label="上传语音", type="numpy", sources=["upload", "microphone"])
+                image_input = gr.Image(label="上传图片", type="numpy")
+            with gr.Accordion("高级设置", open=False):
+                system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="系统提示")
+                max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="最大生成长度(聊天)") # 增加聊天模型参数范围
+                temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="温度(聊天)")
+                top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p(聊天)")
+            with gr.Row():
+                submit_btn = gr.Button("发送", variant="primary")
+                clear_btn = gr.Button("清除聊天和ToDo")
+        with gr.Column(scale=1):
+            gr.Markdown("## Generated ToDo List")
+            todolist_df = gr.DataFrame(headers=["ID", "任务内容", "状态"],
+                                       datatype=["number", "str", "str"],
+                                       row_count=(0, "dynamic"),
+                                       col_count=(3, "fixed"),
+                                       label="待办事项列表")
+    def user(user_message, chat_history):
+        # 将用户消息添加到聊天记录 (Gradio type="messages" 格式)
+        if not chat_history: chat_history = []
+        chat_history.append({"role": "user", "content": user_message})
+        return "", chat_history
+    def bot_interaction(chat_history, system_message, max_tokens, temperature, top_p, audio, image):
+        user_message_for_chat = ""
+        if chat_history and chat_history[-1]["role"] == "user":
+            user_message_for_chat = chat_history[-1]["content"]
+        # 准备用于 ToDoList 生成的输入文本 (多模态部分由其他人负责)
+        text_for_todolist = user_message_for_chat
+        # 可以在这里添加从 audio/image 提取文本的逻辑，并附加到 text_for_todolist
+        # multimodal_text = process_multimodal_inputs(audio, image) # 假设的函数
+        # if multimodal_text:
+        #     text_for_todolist += "\n" + multimodal_text
+        # 1. 生成聊天回复 (流式)
+        # 转换 chat_history 从 [{'role':'user', 'content':'...'}, ...] 到 [('user_msg', 'bot_msg'), ...]
+        # respond 函数期望的是 history: list[tuple[str, str]]
+        # 但 Gradio type="messages" 的 chatbot.value 是 [{'role': ..., 'content': ...}, ...]
+        # 需要转换
+        formatted_history_for_respond = []
+        temp_user_msg = None
+        for item in chat_history[:-1]: #排除最后一条用户消息，因为它会作为当前message传入respond
+            if item["role"] == "user":
+                temp_user_msg = item["content"]
+            elif item["role"] == "assistant" and temp_user_msg is not None:
+                formatted_history_for_respond.append((temp_user_msg, item["content"]))
+                temp_user_msg = None
+            elif item["role"] == "assistant" and temp_user_msg is None: # Bot 先说话的情况
+                formatted_history_for_respond.append(("", item["content"]))
+        chat_stream_generator = respond(
+            user_message_for_chat,
+            formatted_history_for_respond, # 传递转换后的历史
+            system_message,
+            max_tokens,
+            temperature,
+            top_p,
+            audio,
+            image
+        )
+        full_chat_response = ""
+        current_todos = []
+        for chat_response_part, _ in chat_stream_generator:
+            full_chat_response = chat_response_part
+            # 更新 chat_history (Gradio type="messages" 格式)
+            if chat_history and chat_history[-1]["role"] == "user":
+                # 如果最后一条是用户消息，添加机器人回复
+                # 但由于是流式，我们可能需要先添加一个空的 assistant 消息，然后更新它
+                # 或者，等待流结束后一次性添加
+                # 为了简化，我们先假设 respond 返回的是完整回复，或者在循环外更新
+                pass # 流式更新 chatbot 在 submit_btn.click 中处理
+            yield chat_history + [[None, full_chat_response]], current_todos # 临时做法，需要适配Gradio的流式更新
+        # 流式结束后，更新 chat_history 中的最后一条 assistant 消息
+        if chat_history and full_chat_response:
+            # 查找最后一条用户消息，在其后添加或更新机器人回复
+            # 这种方式对于 type="messages" 更友好
+            # 实际上，Gradio 的 chatbot 更新应该在 .then() 中处理，这里先模拟
+            # chat_history.append({"role": "assistant", "content": full_chat_response})
+            # 这个 yield 应该在 submit_btn.click 的 .then() 中处理 chatbot 的更新
+            # 这里我们先专注于 ToDo 生成
+            pass # chatbot 更新由 Gradio 机制处理
+        # 2. 聊天回复完成后，生成/更新 ToDoList
+        if text_for_todolist:
+            # 使用一个唯一的 ID，例如基于时间戳或随机数，如果需要区分不同输入的 ToDo
+            message_id_for_todo = f"hf_app_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+            new_todo_items = generate_todolist_from_text(text_for_todolist, message_id_for_todo)
+            current_todos = new_todo_items
+        # bot_interaction 应该返回 chatbot 的最终状态和 todolist_df 的数据
+        # chatbot 的最终状态是 chat_history + assistant 的回复
+        final_chat_history = list(chat_history) # 复制
+        if full_chat_response:
+             final_chat_history.append({"role": "assistant", "content": full_chat_response})
+        yield final_chat_history, current_todos
+    # 连接事件 (适配 type="messages")
+    # Gradio 的流式更新通常是：
+    # 1. user 函数准备输入，返回 (空输入框, 更新后的聊天记录)
+    # 2. bot_interaction 函数是一个生成器，yield (部分聊天记录, 部分ToDo)
+    # msg.submit 和 submit_btn.click 的 outputs 需要对应 bot_interaction 的 yield
+    # 简化版，非流式更新 chatbot，流式更新由 respond 内部的 yield 控制
+    # 但 respond 的 yield 格式 (str, list) 与 bot_interaction (list, list) 不同
+    # 需要调整 respond 的 yield 或 bot_interaction 的处理
+    # 调整后的事件处理，以更好地支持流式聊天和ToDo更新
+    def process_filtered_result_for_todo(filtered_result, content, source_type):
+        """处理过滤结果并生成todolist的辅助函数"""
+        todos = []
+        if isinstance(filtered_result, dict) and "error" in filtered_result:
+            logger.error(f"{source_type} Filter 模块处理失败: {filtered_result['error']}")
+            todos = [["Error", f"{source_type}: {filtered_result['error']}", "Filter Failed"]]
+        elif isinstance(filtered_result, dict) and filtered_result.get("分类") == "其他":
+            logger.info(f"{source_type}消息被 Filter 模块归类为 '其他'，不生成 ToDo List。")
+            todos = [["Info", f"{source_type}: 消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
+        elif isinstance(filtered_result, list):
+            # 处理列表类型的过滤结果
+            category = None
+            if filtered_result:
+                for item in filtered_result:
+                    if isinstance(item, dict) and "分类" in item:
+                        category = item["分类"]
+                        break
+            if category == "其他":
+                logger.info(f"{source_type}消息被 Filter 模块归类为 '其他'，不生成 ToDo List。")
+                todos = [["Info", f"{source_type}: 消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
+            else:
+                logger.info(f"{source_type}消息被 Filter 模块归类为 '{category if category else '未知'}'，继续生成 ToDo List。")
+                if content:
+                    msg_id_todo = f"hf_app_todo_{source_type}_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                    todos = generate_todolist_from_text(content, msg_id_todo)
+                    # 为每个todo添加来源标识
+                    for todo in todos:
+                        if len(todo) > 1:
+                            todo[1] = f"[{source_type}] {todo[1]}"
+        else:
+            # 如果是字典但不是"其他"分类
+            logger.info(f"{source_type}消息被 Filter 模块归类为 '{filtered_result.get('分类') if isinstance(filtered_result, dict) else '未知'}'，继续生成 ToDo List。")
+            if content:
+                msg_id_todo = f"hf_app_todo_{source_type}_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                todos = generate_todolist_from_text(content, msg_id_todo)
+                # 为每个todo添加来源标识
+                for todo in todos:
+                    if len(todo) > 1:
+                        todo[1] = f"[{source_type}] {todo[1]}"
+        return todos
+    def handle_submit(user_msg_content, ch_history, sys_msg, max_t, temp, t_p, audio_f, image_f, request: gr.Request):
+        global all_todos_global
+        # 获取并记录客户端IP
+        client_ip = get_client_ip(request, True)
+        print(f"Processing request from IP: {client_ip}")
+        # 首先处理多模态输入，获取多模态内容
+        multimodal_text_content = ""
+        # 添加调试日志
+        logger.info(f"开始多模态处理 - 音频: {audio_f is not None}, 图像: {image_f is not None}")
+        # 获取Azure Speech配置
+        azure_speech_config = get_hf_azure_speech_config()
+        azure_speech_key = azure_speech_config.get('key')
+        azure_speech_region = azure_speech_config.get('region')
+        # 添加调试日志
+        logger.info(f"Azure Speech配置状态 - key: {bool(azure_speech_key)}, region: {bool(azure_speech_region)}")
+        # 处理音频输入（使用Azure Speech服务）
+        if audio_f is not None and azure_speech_key and azure_speech_region:
+            logger.info("开始处理音频输入...")
+            try:
+                audio_sample_rate, audio_data = audio_f
+                logger.info(f"音频信息: 采样率 {audio_sample_rate}Hz, 数据长度 {len(audio_data)}")
+                # 保存音频为.wav文件
+                audio_filename = os.path.join(SAVE_DIR, f"audio_{client_ip}.wav")
+                save_audio(audio_f, audio_filename)
+                logger.info(f"音频已保存: {audio_filename}")
+                # 调用Azure Speech服务处理音频
+                audio_text = azure_speech_to_text(azure_speech_key, azure_speech_region, audio_filename)
+                logger.info(f"音频识别结果: {audio_text}")
+                if audio_text:
+                    multimodal_text_content += f"音频内容: {audio_text}"
+                    logger.info("音频处理完成")
+                else:
+                    logger.warning("音频处理失败")
+            except Exception as e:
+                logger.error(f"音频处理错误: {str(e)}")
+        elif audio_f is not None:
+            logger.warning("音频文件存在但Azure Speech配置不完整，跳过音频处理")
+        # 处理图像输入（使用Azure Computer Vision服务）
+        if image_f is not None:
+            logger.info("开始处理图像输入...")
+            try:
+                logger.info(f"图像信息: 形状 {image_f.shape}, 数据类型 {image_f.dtype}")
+                # 保存图片为.jpg文件
+                image_filename = os.path.join(SAVE_DIR, f"image_{client_ip}.jpg")
+                save_image(image_f, image_filename)
+                logger.info(f"图像已保存: {image_filename}")
+                # 调用tools.py中的image_to_str方法处理图片
+                image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=image_filename)
+                logger.info(f"图像识别结果: {image_text}")
+                if image_text:
+                    if multimodal_text_content:  # 如果已有音频内容，添加分隔符
+                        multimodal_text_content += "\n"
+                    multimodal_text_content += f"图像内容: {image_text}"
+                    logger.info("图像处理完成")
+                else:
+                    logger.warning("图像处理失败")
+            except Exception as e:
+                logger.error(f"图像处理错误: {str(e)}")
+        elif image_f is not None:
+            logger.warning("图像文件存在但处理失败，跳过图像处理")
+        # 确定最终的用户输入内容：如果用户没有输入文本，使用多模态识别的内容
+        final_user_content = user_msg_content.strip() if user_msg_content else ""
+        if not final_user_content and multimodal_text_content:
+            final_user_content = multimodal_text_content
+            logger.info(f"用户无文本输入，使用多模态内容作为用户输入: {final_user_content}")
+        elif final_user_content and multimodal_text_content:
+            # 用户有文本输入，多模态内容作为补充
+            final_user_content = f"{final_user_content}\n{multimodal_text_content}"
+            logger.info(f"用户有文本输入，多模态内容作为补充")
+        # 如果最终还是没有任何内容，提供默认提示
+        if not final_user_content:
+            final_user_content = "[无输入内容]"
+            logger.warning("用户没有提供任何输入内容（文本、音频或图像）")
+        logger.info(f"最终用户输入内容: {final_user_content}")
+        # 1. 更新聊天记录 (用户部分) - 使用最终确定的用户内容
+        if not ch_history: ch_history = []
+        ch_history.append({"role": "user", "content": final_user_content})
+        yield ch_history, [] # 更新聊天，ToDo 列表暂时不变
+        # 2. 流式生成机器人回复并更新聊天记录
+        # 转换 chat_history 为 respond 函数期望的格式
+        formatted_hist_for_respond = []
+        temp_user_msg_for_hist = None
+        # 使用 ch_history[:-1] 因为当前用户消息已在 ch_history 中
+        for item_hist in ch_history[:-1]:
+            if item_hist["role"] == "user":
+                temp_user_msg_for_hist = item_hist["content"]
+            elif item_hist["role"] == "assistant" and temp_user_msg_for_hist is not None:
+                formatted_hist_for_respond.append((temp_user_msg_for_hist, item_hist["content"]))
+                temp_user_msg_for_hist = None
+            elif item_hist["role"] == "assistant" and temp_user_msg_for_hist is None:
+                formatted_hist_for_respond.append(("", item_hist["content"]))
+        # 准备一个 assistant 消息的槽位
+        ch_history.append({"role": "assistant", "content": ""})
+        full_bot_response = ""
+        # 使用最终确定的用户内容进行对话
+        for bot_response_token, _ in respond(final_user_content, formatted_hist_for_respond, sys_msg, max_t, temp, t_p, audio_f, image_f):
+            full_bot_response = bot_response_token
+            ch_history[-1]["content"] = full_bot_response # 更新最后一条 assistant 消息
+            yield ch_history, [] # 流式更新聊天，ToDo 列表不变
+        # 3. 生成 ToDoList - 分别处理音频、图片和文字输入
+        new_todos_list = []
+        # 分别处理文字输入
+        if user_msg_content.strip():
+            logger.info(f"处理文字输入生成ToDo: {user_msg_content.strip()}")
+            text_filtered_result = filter_message_with_llm(user_msg_content.strip())
+            text_todos = process_filtered_result_for_todo(text_filtered_result, user_msg_content.strip(), "文字")
+            new_todos_list.extend(text_todos)
+        # 分别处理音频输入
+        if audio_f is not None and azure_speech_key and azure_speech_region:
+            try:
+                audio_sample_rate, audio_data = audio_f
+                audio_filename = os.path.join(SAVE_DIR, f"audio_{client_ip}.wav")
+                save_audio(audio_f, audio_filename)
+                audio_text = azure_speech_to_text(azure_speech_key, azure_speech_region, audio_filename)
+                if audio_text:
+                    logger.info(f"处理音频输入生成ToDo: {audio_text}")
+                    audio_filtered_result = filter_message_with_llm(audio_text)
+                    audio_todos = process_filtered_result_for_todo(audio_filtered_result, audio_text, "音频")
+                    new_todos_list.extend(audio_todos)
+            except Exception as e:
+                logger.error(f"音频处理错误: {str(e)}")
+        # 分别处理图片输入
+        if image_f is not None:
+            try:
+                image_filename = os.path.join(SAVE_DIR, f"image_{client_ip}.jpg")
+                save_image(image_f, image_filename)
+                image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=image_filename)
+                if image_text:
+                    logger.info(f"处理图片输入生成ToDo: {image_text}")
+                    image_filtered_result = filter_message_with_llm(image_text)
+                    image_todos = process_filtered_result_for_todo(image_filtered_result, image_text, "图片")
+                    new_todos_list.extend(image_todos)
+            except Exception as e:
+                logger.error(f"图片处理错误: {str(e)}")
+        # 如果没有任何有效输入，使用原有逻辑
+        if not new_todos_list and final_user_content:
+            logger.info(f"使用整合内容生成ToDo: {final_user_content}")
+            filtered_result = filter_message_with_llm(final_user_content)
+            if isinstance(filtered_result, dict) and "error" in filtered_result:
+                logger.error(f"Filter 模块处理失败: {filtered_result['error']}")
+                # 可以选择在这里显示错误信息给用户
+                new_todos_list = [["Error", filtered_result['error'], "Filter Failed"]]
+            elif isinstance(filtered_result, dict) and filtered_result.get("分类") == "其他":
+                logger.info(f"消息被 Filter 模块归类为 '其他'，不生成 ToDo List。")
+                new_todos_list = [["Info", "消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
+            elif isinstance(filtered_result, list):
+                # 如果返回的是列表，尝试从列表中获取分类信息
+                category = None
+                # 检查列表是否为空
+                if not filtered_result:
+                    logger.warning("Filter 模块返回了空列表，将继续生成 ToDo List。")
+                    if final_user_content:
+                        msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                        new_todos_list = generate_todolist_from_text(final_user_content, msg_id_todo)
+                    # 将新的待办事项添加到全局列表中
+                    if new_todos_list and not (len(new_todos_list) == 1 and "Info" in str(new_todos_list[0])):
+                        # 重新分配ID以确保连续性
+                        for i, todo in enumerate(new_todos_list):
+                            todo[0] = len(all_todos_global) + i + 1
+                        all_todos_global.extend(new_todos_list)
+                    yield ch_history, all_todos_global
+                    return
+                # 确保列表中至少有一个元素且是字典类型
+                valid_item = None
+                for item in filtered_result:
+                    if isinstance(item, dict):
+                        valid_item = item
+                        if "分类" in item:
+                            category = item["分类"]
+                            break
+                # 如果没有找到有效的字典元素，记录警告并继续生成ToDo
+                if valid_item is None:
+                    logger.warning(f"Filter 模块返回的列表中没有有效的字典元素: {filtered_result}")
+                    if final_user_content:
+                        msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                        new_todos_list = generate_todolist_from_text(final_user_content, msg_id_todo)
+                    # 将新的待办事项添加到全局列表中
+                    if new_todos_list and not (len(new_todos_list) == 1 and "Info" in str(new_todos_list[0])):
+                        # 重新分配ID以确保连续性
+                        for i, todo in enumerate(new_todos_list):
+                            todo[0] = len(all_todos_global) + i + 1
+                        all_todos_global.extend(new_todos_list)
+                    yield ch_history, all_todos_global
+                    return
+                if category == "其他":
+                    logger.info(f"消息被 Filter 模块归类为 '其他'，不生成 ToDo List。")
+                    new_todos_list = [["Info", "消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
+                else:
+                    logger.info(f"消息被 Filter 模块归类为 '{category if category else '未知'}'，继续生成 ToDo List。")
+                    # 如果 Filter 结果不是"其他"，则继续生成 ToDoList
+                    if final_user_content:
+                        msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                        new_todos_list = generate_todolist_from_text(final_user_content, msg_id_todo)
+            else:
+                # 如果是字典但不是"其他"分类
+                logger.info(f"消息被 Filter 模块归类为 '{filtered_result.get('分类')}'，继续生成 ToDo List。")
+                # 如果 Filter 结果不是"其他"，则继续生成 ToDoList
+                if final_user_content:
+                    msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                    new_todos_list = generate_todolist_from_text(final_user_content, msg_id_todo)
+        # 将新的待办事项添加到全局列表中（排除信息性消息）
+        if new_todos_list and not (len(new_todos_list) == 1 and ("Info" in str(new_todos_list[0]) or "Error" in str(new_todos_list[0]))):
+            # 重新分配ID以确保连续性
+            for i, todo in enumerate(new_todos_list):
+                todo[0] = len(all_todos_global) + i + 1
+            all_todos_global.extend(new_todos_list)
+        yield ch_history, all_todos_global # 最终更新聊天和完整的ToDo列表
+    submit_btn.click(
+        handle_submit,
+        [msg, chatbot, system_msg, max_tokens_slider, temperature_slider, top_p_slider, audio_input, image_input],
+        [chatbot, todolist_df]
+    )
+    msg.submit(
+        handle_submit,
+        [msg, chatbot, system_msg, max_tokens_slider, temperature_slider, top_p_slider, audio_input, image_input],
+        [chatbot, todolist_df]
+    )
+    def clear_all():
+        global all_todos_global
+        all_todos_global = []  # 清除全局待办事项列表
+        return None, None, "" # 清除 chatbot, todolist_df, 和 msg 输入框
+    clear_btn.click(clear_all, None, [chatbot, todolist_df, msg], queue=False)
+    # 旧的 Audio/Image Processing Tab (保持不变或按需修改)
+    with gr.Tab("Audio/Image Processing (Original)"):
+        gr.Markdown("## 处理音频和图片")
+        audio_processor = gr.Audio(label="上传音频", type="numpy")
+        image_processor = gr.Image(label="上传图片", type="numpy")
+        process_btn = gr.Button("处理", variant="primary")
+        audio_output = gr.Textbox(label="音频信息")
+        image_output = gr.Textbox(label="图片信息")
+        process_btn.click(
+            process,
+            inputs=[audio_processor, image_processor],
+            outputs=[audio_output, image_output]
+        )
+if __name__ == "__main__":
+    app.launch(debug=True)

app_pro.py ADDED Viewed

	@@ -0,0 +1,840 @@

+import gradio as gr
+import json
+from pathlib import Path
+import yaml
+import re
+import logging
+import io
+import sys
+import re
+from datetime import datetime, timezone, timedelta
+import requests
+from tools import * #gege的多模态
+CONFIG = None
+HF_CONFIG_PATH = Path(__file__).parent / "todogen_LLM_config.yaml"
+def load_hf_config():
+    """加载YAML配置文件"""
+    global CONFIG
+    if CONFIG is None:
+        try:
+            with open(HF_CONFIG_PATH, 'r', encoding='utf-8') as f:
+                CONFIG = yaml.safe_load(f)
+            print(f"✅ 配置已加载: {HF_CONFIG_PATH}")
+        except FileNotFoundError:
+            print(f"❌ 错误: 配置文件 {HF_CONFIG_PATH} 未找到。请确保它在 hf 目录下。")
+            CONFIG = {}
+        except Exception as e:
+            print(f"❌ 加载配置文件 {HF_CONFIG_PATH} 时出错: {e}")
+            CONFIG = {}
+    return CONFIG
+def get_hf_openai_config():
+    """获取OpenAI API配置"""
+    config = load_hf_config()
+    return config.get('openai', {})
+def get_hf_openai_filter_config():
+    """获取Filter API配置"""
+    config = load_hf_config()
+    return config.get('openai_filter', {})
+def get_hf_xunfei_config():
+    """获取讯飞API配置"""
+    config = load_hf_config()
+    return config.get('xunfei', {})
+def get_hf_paths_config():
+    """获取文件路径配置"""
+    config = load_hf_config()
+    base = Path(__file__).resolve().parent
+    paths_cfg = config.get('paths', {})
+    return {
+        'base_dir': base,
+        'prompt_template': base / paths_cfg.get('prompt_template', 'prompt_template.txt'),
+        'true_positive_examples': base / paths_cfg.get('true_positive_examples', 'TruePositive_few_shot.txt'),
+        'false_positive_examples': base / paths_cfg.get('false_positive_examples', 'FalsePositive_few_shot.txt'),
+    }
+llm_config = get_hf_openai_config()
+NVIDIA_API_BASE_URL = llm_config.get('base_url')
+NVIDIA_API_KEY = llm_config.get('api_key')
+NVIDIA_MODEL_NAME = llm_config.get('model')
+filter_config = get_hf_openai_filter_config()
+Filter_API_BASE_URL = filter_config.get('base_url_filter')
+Filter_API_KEY = filter_config.get('api_key_filter')
+Filter_MODEL_NAME = filter_config.get('model_filter')
+if not NVIDIA_API_BASE_URL or not NVIDIA_API_KEY or not NVIDIA_MODEL_NAME:
+    print("❌ 错误: NVIDIA API 配置不完整。请检查 todogen_LLM_config.yaml 中的 openai 部分。")
+    NVIDIA_API_BASE_URL = ""
+    NVIDIA_API_KEY = ""
+    NVIDIA_MODEL_NAME = ""
+if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+    print("❌ 错误: Filter API 配置不完整。请检查 todogen_LLM_config.yaml 中的 openai_filter 部分。")
+    Filter_API_BASE_URL = ""
+    Filter_API_KEY = ""
+    Filter_MODEL_NAME = ""
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def load_single_few_shot_file_hf(file_path: Path) -> str:
+    """加载单个few-shot示例文件并转义大括号"""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+            escaped_content = content.replace('{', '{{').replace('}', '}}')
+            return escaped_content
+    except FileNotFoundError:
+        return ""
+    except Exception:
+        return ""
+PROMPT_TEMPLATE_CONTENT = ""
+TRUE_POSITIVE_EXAMPLES_CONTENT = ""
+FALSE_POSITIVE_EXAMPLES_CONTENT = ""
+def load_prompt_data_hf():
+    """加载提示词模板和示例数据"""
+    global PROMPT_TEMPLATE_CONTENT, TRUE_POSITIVE_EXAMPLES_CONTENT, FALSE_POSITIVE_EXAMPLES_CONTENT
+    paths = get_hf_paths_config()
+    try:
+        with open(paths['prompt_template'], 'r', encoding='utf-8') as f:
+            PROMPT_TEMPLATE_CONTENT = f.read()
+    except FileNotFoundError:
+        PROMPT_TEMPLATE_CONTENT = "Error: Prompt template not found."
+    TRUE_POSITIVE_EXAMPLES_CONTENT = load_single_few_shot_file_hf(paths['true_positive_examples'])
+    FALSE_POSITIVE_EXAMPLES_CONTENT = load_single_few_shot_file_hf(paths['false_positive_examples'])
+load_prompt_data_hf()
+def _process_parsed_json(parsed_data):
+    """处理解析后的JSON数据，确保格式正确"""
+    try:
+        if isinstance(parsed_data, list):
+            if not parsed_data:
+                return [{}]
+            processed_list = []
+            for item in parsed_data:
+                if isinstance(item, dict):
+                    processed_list.append(item)
+                else:
+                    try:
+                        processed_list.append({"content": str(item)})
+                    except:
+                        processed_list.append({"content": "无法转换的项目"})
+            if not processed_list:
+                return [{}]
+            return processed_list
+        elif isinstance(parsed_data, dict):
+            return parsed_data
+        else:
+            return {"content": str(parsed_data)}
+    except Exception as e:
+        return {"error": f"Error processing parsed JSON: {e}"}
+def json_parser(text: str) -> dict:
+    """从文本中解析JSON数据，支持多种格式"""
+    try:
+        try:
+            parsed_data = json.loads(text)
+            return _process_parsed_json(parsed_data)
+        except json.JSONDecodeError:
+            pass
+        match = re.search(r'```(?:json)?\n(.*?)```', text, re.DOTALL)
+        if match:
+            json_str = match.group(1).strip()
+            json_str = re.sub(r',\s*]', ']', json_str)
+            json_str = re.sub(r',\s*}', '}', json_str)
+            try:
+                parsed_data = json.loads(json_str)
+                return _process_parsed_json(parsed_data)
+            except json.JSONDecodeError:
+                pass
+        array_match = re.search(r'\[\s*\{.*?\}\s*(?:,\s*\{.*?\}\s*)*\]', text, re.DOTALL)
+        if array_match:
+            potential_json = array_match.group(0).strip()
+            try:
+                parsed_data = json.loads(potential_json)
+                return _process_parsed_json(parsed_data)
+            except json.JSONDecodeError:
+                pass
+        object_match = re.search(r'\{.*?\}', text, re.DOTALL)
+        if object_match:
+            potential_json = object_match.group(0).strip()
+            try:
+                parsed_data = json.loads(potential_json)
+                return _process_parsed_json(parsed_data)
+            except json.JSONDecodeError:
+                pass
+        return {"error": "No valid JSON block found or failed to parse", "raw_text": text}
+    except Exception as e:
+        return {"error": f"Unexpected error in json_parser: {e}", "raw_text": text}
+def filter_message_with_llm(text_input: str, message_id: str = "user_input_001"):
+    """使用LLM对消息进行分类过滤"""
+    mock_data = [(text_input, message_id)]
+    system_prompt = """
+    # 角色
+    你是一个专业的短信内容分析助手，根据输入判断内容的类型及可信度，为用户使用信息提供依据和便利。
+    # 任务
+    对于输入的多条数据，分析每一条数据内容（主键：`message_id`）属于【物流取件、缴费充值、待付(还)款、会议邀约、其他】的可能性百分比。
+    主要对于聊天、问候、回执、结果通知、上月账单等信息不需要收件人进行下一步处理的信息，直接归到其他类进行忽略
+    # 要求
+    1. 以json格式输出
+    2. content简洁提炼关键词，字符数<20以内
+    3. 输入条数和输出条数完全一样
+    # 输出示例
+    ```
+    [
+        {"message_id":"1111111","content":"账单805.57元待还","物流取件":0,"欠费缴纳":99,"待付(还)款":1,"会议邀约":0,"其他":0, "分类":"欠费缴纳"},
+        {"message_id":"222222","content":"邀请你加入飞书视频会议","物流取件":0,"欠费缴纳":0,"待付(还)款":1,"会议邀约":100,"其他":0, "分类":"会议邀约"}
+    ]
+    ```
+    """
+    llm_messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": str(mock_data)}
+    ]
+    try:
+        if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+            return [{"error": "Filter API configuration incomplete", "-": "-"}]
+        headers = {
+            "Authorization": f"Bearer {Filter_API_KEY}",
+            "Accept": "application/json"
+        }
+        payload = {
+            "model": Filter_MODEL_NAME,
+            "messages": llm_messages,
+            "temperature": 0.0,
+            "top_p": 0.95,
+            "max_tokens": 1024,
+            "stream": False
+        }
+        api_url = f"{Filter_API_BASE_URL}/chat/completions"
+        try:
+            response = requests.post(api_url, headers=headers, json=payload)
+            response.raise_for_status()
+            raw_llm_response = response.json()["choices"][0]["message"]["content"]
+        except requests.exceptions.RequestException as e:
+            return [{"error": f"Filter API call failed: {e}", "-": "-"}]
+        raw_llm_response = raw_llm_response.replace("```json", "").replace("```", "")
+        parsed_filter_data = json_parser(raw_llm_response)
+        if "error" in parsed_filter_data:
+            return [{"error": f"Filter LLM response parsing error: {parsed_filter_data['error']}"}]
+        if isinstance(parsed_filter_data, list) and parsed_filter_data:
+            for item in parsed_filter_data:
+                if isinstance(item, dict) and item.get("分类") == "欠费缴纳" and "缴费支出" in item.get("content", ""):
+                    item["分类"] = "其他"
+            request_id_list = {message_id}
+            response_id_list = {item.get('message_id') for item in parsed_filter_data if isinstance(item, dict)}
+            diff = request_id_list - response_id_list
+            if diff:
+                for missed_id in diff:
+                    parsed_filter_data.append({
+                        "message_id": missed_id,
+                        "content": text_input[:20],
+                        "物流取件": 0,
+                        "欠费缴纳": 0,
+                        "待付(还)款": 0,
+                        "会议邀约": 0,
+                        "其他": 100,
+                        "分类": "其他"
+                    })
+            return parsed_filter_data
+        else:
+            return [{
+                "message_id": message_id,
+                "content": text_input[:20],
+                "物流取件": 0,
+                "欠费缴纳": 0,
+                "待付(还)款": 0,
+                "会议邀约": 0,
+                "其他": 100,
+                "分类": "其他",
+                "error": "Filter LLM returned empty or unexpected format"
+            }]
+    except Exception as e:
+        return [{
+            "message_id": message_id,
+            "content": text_input[:20],
+            "物流取件": 0,
+            "欠费缴纳": 0,
+            "待付(还)款": 0,
+            "会议邀约": 0,
+            "其他": 100,
+            "分类": "其他",
+            "error": f"Filter LLM call/parse error: {str(e)}"
+        }]
+def generate_todolist_from_text(text_input: str, message_id: str = "user_input_001"):
+    """从文本生成待办事项列表"""
+    if not PROMPT_TEMPLATE_CONTENT or "Error:" in PROMPT_TEMPLATE_CONTENT:
+        return [["error", "Prompt template not loaded", "-"]]
+    current_time_iso = datetime.now(timezone.utc).isoformat()
+    content_escaped = text_input.replace('{', '{{').replace('}', '}}')
+    formatted_prompt = PROMPT_TEMPLATE_CONTENT.format(
+        true_positive_examples=TRUE_POSITIVE_EXAMPLES_CONTENT,
+        false_positive_examples=FALSE_POSITIVE_EXAMPLES_CONTENT,
+        current_time=current_time_iso,
+        message_id=message_id,
+        content_escaped=content_escaped
+    )
+    enhanced_prompt = formatted_prompt + """
+# 重要提示
+请确保你的回复是有效的JSON格式，并且只包含JSON内容。不要添加任何额外的解释或文本。
+你的回复应该严格按照上面的输出示例格式，只包含JSON对象，不要有任何其他文本。
+"""
+    llm_messages = [
+        {"role": "user", "content": enhanced_prompt}
+    ]
+    try:
+        if ("充值" in text_input or "缴费" in text_input) and ("移动" in text_input or "话费" in text_input or "余额" in text_input):
+            todo_item = {
+                message_id: {
+                    "is_todo": True,
+                    "end_time": (datetime.now(timezone.utc) + timedelta(days=3)).isoformat(),
+                    "location": "线上:中国移动APP",
+                    "todo_content": "缴纳话费",
+                    "urgency": "important"
+                }
+            }
+            todo_content = "缴纳话费"
+            end_time = todo_item[message_id]["end_time"].split("T")[0]
+            location = todo_item[message_id]["location"]
+            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
+            output_for_df = []
+            output_for_df.append([1, combined_content, "重要"])
+            return output_for_df
+        elif "会议" in text_input and ("邀请" in text_input or "参加" in text_input):
+            meeting_time = None
+            meeting_pattern = r'(\d{1,2}[月/-]\d{1,2}[日号]?\s*\d{1,2}[点:]\d{0,2}|\d{4}[年/-]\d{1,2}[月/-]\d{1,2}[日号]?\s*\d{1,2}[点:]\d{0,2})'
+            meeting_match = re.search(meeting_pattern, text_input)
+            if meeting_match:
+                meeting_time = (datetime.now(timezone.utc) + timedelta(days=1, hours=2)).isoformat()
+            else:
+                meeting_time = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat()
+            todo_item = {
+                message_id: {
+                    "is_todo": True,
+                    "end_time": meeting_time,
+                    "location": "线上:会议软件",
+                    "todo_content": "参加会议",
+                    "urgency": "important"
+                }
+            }
+            todo_content = "参加会议"
+            end_time = todo_item[message_id]["end_time"].split("T")[0]
+            location = todo_item[message_id]["location"]
+            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
+            output_for_df = []
+            output_for_df.append([1, combined_content, "重要"])
+            return output_for_df
+        elif ("快递" in text_input or "物流" in text_input or "取件" in text_input) and ("到达" in text_input or "取件码" in text_input or "柜" in text_input):
+            pickup_code = None
+            code_pattern = r'取件码[是为:]?\s*(\d{4,6})'
+            code_match = re.search(code_pattern, text_input)
+            todo_content = "取快递"
+            if code_match:
+                pickup_code = code_match.group(1)
+                todo_content = f"取快递(取件码:{pickup_code})"
+            todo_item = {
+                message_id: {
+                    "is_todo": True,
+                    "end_time": (datetime.now(timezone.utc) + timedelta(days=2)).isoformat(),
+                    "location": "线下:快递柜",
+                    "todo_content": todo_content,
+                    "urgency": "important"
+                }
+            }
+            end_time = todo_item[message_id]["end_time"].split("T")[0]
+            location = todo_item[message_id]["location"]
+            combined_content = f"{todo_content} (截止时间: {end_time}, 地点: {location})"
+            output_for_df = []
+            output_for_df.append([1, combined_content, "重要"])
+            return output_for_df
+        if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+            return [["error", "Filter API configuration incomplete", "-"]]
+        headers = {
+            "Authorization": f"Bearer {Filter_API_KEY}",
+            "Accept": "application/json"
+        }
+        payload = {
+            "model": Filter_MODEL_NAME,
+            "messages": llm_messages,
+            "temperature": 0.2,
+            "top_p": 0.95,
+            "max_tokens": 1024,
+            "stream": False
+        }
+        api_url = f"{Filter_API_BASE_URL}/chat/completions"
+        try:
+            response = requests.post(api_url, headers=headers, json=payload)
+            response.raise_for_status()
+            raw_llm_response = response.json()['choices'][0]['message']['content']
+        except requests.exceptions.RequestException as e:
+            return [["error", f"Filter API call failed: {e}", "-"]]
+        parsed_todos_data = json_parser(raw_llm_response)
+        if "error" in parsed_todos_data:
+            return [["error", f"LLM response parsing error: {parsed_todos_data['error']}", parsed_todos_data.get('raw_text', '')[:50] + "..."]]
+        output_for_df = []
+        if isinstance(parsed_todos_data, dict):
+            todo_info = None
+            for key, value in parsed_todos_data.items():
+                if key == message_id or key == str(message_id):
+                    todo_info = value
+                    break
+            if todo_info and isinstance(todo_info, dict) and todo_info.get("is_todo", False):
+                todo_content = todo_info.get("todo_content", "未指定待办内容")
+                end_time = todo_info.get("end_time")
+                location = todo_info.get("location")
+                urgency = todo_info.get("urgency", "unimportant")
+                combined_content = todo_content
+                if end_time and end_time != "null":
+                    try:
+                        date_part = end_time.split("T")[0] if "T" in end_time else end_time
+                        combined_content += f" (截止时间: {date_part}"
+                    except:
+                        combined_content += f" (截止时间: {end_time}"
+                else:
+                    combined_content += " ("
+                if location and location != "null":
+                    combined_content += f", 地点: {location})"
+                else:
+                    combined_content += ")"
+                urgency_display = "一般"
+                if urgency == "urgent":
+                    urgency_display = "紧急"
+                elif urgency == "important":
+                    urgency_display = "重要"
+                output_for_df = []
+                output_for_df.append([1, combined_content, urgency_display])
+            else:
+                output_for_df = []
+                output_for_df.append([1, "此消息不包含待办事项", "-"])
+        elif isinstance(parsed_todos_data, list):
+            output_for_df = []
+            if not parsed_todos_data:
+                return [[1, "未能生成待办事项", "-"]]
+            for i, item in enumerate(parsed_todos_data):
+                if isinstance(item, dict):
+                    todo_content = item.get('todo_content', item.get('content', 'N/A'))
+                    status = item.get('status', '未完成')
+                    urgency = item.get('urgency', 'normal')
+                    combined_content = todo_content
+                    if 'end_time' in item and item['end_time']:
+                        try:
+                            if isinstance(item['end_time'], str):
+                                date_part = item['end_time'].split("T")[0] if "T" in item['end_time'] else item['end_time']
+                                combined_content += f" (截止时间: {date_part}"
+                            else:
+                                combined_content += f" (截止时间: {str(item['end_time'])}"
+                        except Exception:
+                            combined_content += " ("
+                    else:
+                        combined_content += " ("
+                    if 'location' in item and item['location']:
+                        combined_content += f", 地点: {item['location']})"
+                    else:
+                        combined_content += ")"
+                    importance = "一般"
+                    if urgency == "urgent":
+                        importance = "紧急"
+                    elif urgency == "important":
+                        importance = "重要"
+                    output_for_df.append([i + 1, combined_content, importance])
+                else:
+                    try:
+                        item_str = str(item) if item is not None else "未知项目"
+                        output_for_df.append([i + 1, item_str, "一般"])
+                    except Exception:
+                        output_for_df.append([i + 1, "处理错误的项目", "一般"])
+        if not output_for_df:
+            return [["info", "未发现待办事项", "-"]]
+        return output_for_df
+    except Exception as e:
+        return [["error", f"LLM call/parse error: {str(e)}", "-"]]
+#  这里------多模态数据从这里调用
+def process(audio, image):
+    """处理音频和图片输入，返回基本信息"""
+    if audio is not None:
+        sample_rate, audio_data = audio
+        audio_info = f"音频采样率: {sample_rate}Hz, 数据长度: {len(audio_data)}"
+    else:
+        audio_info = "未收到音频"
+    if image is not None:
+        image_info = f"图片尺寸: {image.shape}"
+    else:
+        image_info = "未收到图片"
+    return audio_info, image_info
+def respond(message, history, system_message, max_tokens, temperature, top_p, audio, image):
+    """处理聊天响应，支持流式输出"""
+    chat_messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            chat_messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            chat_messages.append({"role": "assistant", "content": val[1]})
+    chat_messages.append({"role": "user", "content": message})
+    chat_response_stream = ""
+    if not Filter_API_BASE_URL or not Filter_API_KEY or not Filter_MODEL_NAME:
+        yield "Filter API 配置不完整，无法提供聊天回复。", []
+        return
+    headers = {
+        "Authorization": f"Bearer {Filter_API_KEY}",
+        "Accept": "application/json"
+    }
+    payload = {
+        "model": Filter_MODEL_NAME,
+        "messages": chat_messages,
+        "temperature": temperature,
+        "top_p": top_p,
+        "max_tokens": max_tokens,
+        "stream": True
+    }
+    api_url = f"{Filter_API_BASE_URL}/chat/completions"
+    try:
+        response = requests.post(api_url, headers=headers, json=payload, stream=True)
+        response.raise_for_status()
+        for chunk in response.iter_content(chunk_size=None):
+            if chunk:
+                try:
+                    for line in chunk.decode('utf-8').splitlines():
+                        if line.startswith('data: '):
+                            json_data = line[len('data: '):]
+                            if json_data.strip() == '[DONE]':
+                                break
+                            data = json.loads(json_data)
+                            token = data['choices'][0]['delta'].get('content', '')
+                            if token:
+                                chat_response_stream += token
+                                yield chat_response_stream, []
+                except json.JSONDecodeError:
+                    pass
+                except Exception as e:
+                    yield chat_response_stream + f"\n\n错误: {e}", []
+    except requests.exceptions.RequestException as e:
+        yield f"调用 NVIDIA API 失败: {e}", []
+#   图片-多模态上传入口
+with gr.Blocks() as app:
+    gr.Markdown("# ToDoAgent Multi-Modal Interface with ToDo List")
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("## Chat Interface")
+            chatbot = gr.Chatbot(height=450, label="聊天记录", type="messages")
+            msg = gr.Textbox(label="输入消息", placeholder="输入您的问题或待办事项...")
+            with gr.Row():
+                audio_input = gr.Audio(label="上传语音", type="numpy", sources=["upload", "microphone"])
+                image_input = gr.Image(label="上传图片", type="numpy")
+            with gr.Accordion("高级设置", open=False):
+                system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="系统提示")
+                max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="最大生成长度(聊天)")
+                temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="温度(聊天)")
+                top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p(聊天)")
+            with gr.Row():
+                submit_btn = gr.Button("发送", variant="primary")
+                clear_btn = gr.Button("清除聊天和ToDo")
+        with gr.Column(scale=1):
+            gr.Markdown("## Generated ToDo List")
+            todolist_df = gr.DataFrame(headers=["ID", "任务内容", "状态"],
+                                       datatype=["number", "str", "str"],
+                                       row_count=(0, "dynamic"),
+                                       col_count=(3, "fixed"),
+                                       label="待办事项列表")
+    def handle_submit(user_msg_content, ch_history, sys_msg, max_t, temp, t_p, audio_f, image_f):
+        """处理用户提交的消息，生成聊天回复和待办事项"""
+        # 首先处理多模态输入，获取多模态内容
+        multimodal_text_content = ""
+        xunfei_config = get_hf_xunfei_config()
+        xunfei_appid = xunfei_config.get('appid')
+        xunfei_apikey = xunfei_config.get('apikey')
+        xunfei_apisecret = xunfei_config.get('apisecret')
+        # 添加调试日志
+        logger.info(f"开始多模态处理 - 音频: {audio_f is not None}, 图像: {image_f is not None}")
+        logger.info(f"讯飞配置状态 - appid: {bool(xunfei_appid)}, apikey: {bool(xunfei_apikey)}, apisecret: {bool(xunfei_apisecret)}")
+        # 处理音频输入（独立处理）
+        if audio_f is not None and xunfei_appid and xunfei_apikey and xunfei_apisecret:
+            logger.info("开始处理音频输入...")
+            try:
+                import tempfile
+                import soundfile as sf
+                import os
+                audio_sample_rate, audio_data = audio_f
+                logger.info(f"音频信息: 采样率 {audio_sample_rate}Hz, 数据长度 {len(audio_data)}")
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+                    sf.write(temp_audio.name, audio_data, audio_sample_rate)
+                    temp_audio_path = temp_audio.name
+                    logger.info(f"音频临时文件已保存: {temp_audio_path}")
+                audio_text = audio_to_str(xunfei_appid, xunfei_apikey, xunfei_apisecret, temp_audio_path)
+                logger.info(f"音频识别结果: {audio_text}")
+                if audio_text:
+                    multimodal_text_content += f"音频内容: {audio_text}"
+                os.unlink(temp_audio_path)
+                logger.info("音频处理完成")
+            except Exception as e:
+                logger.error(f"音频处理错误: {str(e)}")
+        elif audio_f is not None:
+            logger.warning("音频文件存在但讯飞配置不完整，跳过音频处理")
+        # 处理图像输入（独立处理）
+        if image_f is not None and xunfei_appid and xunfei_apikey and xunfei_apisecret:
+            logger.info("开始处理图像输入...")
+            try:
+                import tempfile
+                from PIL import Image
+                import os
+                logger.info(f"图像信息: 形状 {image_f.shape}, 数据类型 {image_f.dtype}")
+                with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_image:
+                    if len(image_f.shape) == 3:  # RGB图像
+                        pil_image = Image.fromarray(image_f.astype('uint8'), 'RGB')
+                    else:  # 灰度图像
+                        pil_image = Image.fromarray(image_f.astype('uint8'), 'L')
+                    pil_image.save(temp_image.name, 'JPEG')
+                    temp_image_path = temp_image.name
+                    logger.info(f"图像临时文件已保存: {temp_image_path}")
+                image_text = image_to_str(xunfei_appid, xunfei_apikey, xunfei_apisecret, temp_image_path)
+                logger.info(f"图像识别结果: {image_text}")
+                if image_text:
+                    if multimodal_text_content:  # 如果已有音频内容，添加分隔符
+                        multimodal_text_content += "\n"
+                    multimodal_text_content += f"图像内容: {image_text}"
+                os.unlink(temp_image_path)
+                logger.info("图像处理完成")
+            except Exception as e:
+                logger.error(f"图像处理错误: {str(e)}")
+        elif image_f is not None:
+            logger.warning("图像文件存在但讯飞配置不完整，跳过图像处理")
+        # 确定最终的用户输入内容：如果用户没有输入文本，使用多模态识别的内容
+        final_user_content = user_msg_content.strip() if user_msg_content else ""
+        if not final_user_content and multimodal_text_content:
+            final_user_content = multimodal_text_content
+            logger.info(f"用户无文本输入，使用多模态内容作为用户输入: {final_user_content}")
+        elif final_user_content and multimodal_text_content:
+            # 用户有文本输入，多模态内容作为补充
+            final_user_content = f"{final_user_content}\n{multimodal_text_content}"
+            logger.info(f"用户有文本输入，多模态内容作为补充")
+        # 如果最终还是没有任何内容，提供默认提示
+        if not final_user_content:
+            final_user_content = "[无输入内容]"
+            logger.warning("用户没有提供任何输入内容（文本、音频或图像）")
+        logger.info(f"最终用户输入内容: {final_user_content}")
+        # 1. 更新聊天记录 (用户部分) - 使用最终确定的用户内容
+        if not ch_history: ch_history = []
+        ch_history.append({"role": "user", "content": final_user_content})
+        yield ch_history, []
+        # 2. 流式生成机器人回复并更新聊天记录
+        formatted_hist_for_respond = []
+        temp_user_msg_for_hist = None
+        for item_hist in ch_history[:-1]:
+            if item_hist["role"] == "user":
+                temp_user_msg_for_hist = item_hist["content"]
+            elif item_hist["role"] == "assistant" and temp_user_msg_for_hist is not None:
+                formatted_hist_for_respond.append((temp_user_msg_for_hist, item_hist["content"]))
+                temp_user_msg_for_hist = None
+            elif item_hist["role"] == "assistant" and temp_user_msg_for_hist is None:
+                formatted_hist_for_respond.append(("", item_hist["content"]))
+        ch_history.append({"role": "assistant", "content": ""})
+        full_bot_response = ""
+        # 使用最终确定的用户内容进行对话
+        for bot_response_token, _ in respond(final_user_content, formatted_hist_for_respond, sys_msg, max_t, temp, t_p, audio_f, image_f):
+            full_bot_response = bot_response_token
+            ch_history[-1]["content"] = full_bot_response
+            yield ch_history, []
+        # 3. 生成 ToDoList - 使用最终确定的用户内容
+        text_for_todo = final_user_content
+        # 添加日志：输出用于ToDo生成的内容
+        logger.info(f"用于ToDo生成的内容: {text_for_todo}")
+        current_todos_list = []
+        filtered_result = filter_message_with_llm(text_for_todo)
+        if isinstance(filtered_result, dict) and "error" in filtered_result:
+            current_todos_list = [["Error", filtered_result['error'], "Filter Failed"]]
+        elif isinstance(filtered_result, dict) and filtered_result.get("分类") == "其他":
+            current_todos_list = [["Info", "消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
+        elif isinstance(filtered_result, list):
+            category = None
+            if not filtered_result:
+                if text_for_todo:
+                    msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                    current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
+                yield ch_history, current_todos_list
+                return
+            valid_item = None
+            for item in filtered_result:
+                if isinstance(item, dict):
+                    valid_item = item
+                    if "分类" in item:
+                        category = item["分类"]
+                        break
+            if valid_item is None:
+                if text_for_todo:
+                    msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                    current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
+                yield ch_history, current_todos_list
+                return
+            if category == "其他":
+                current_todos_list = [["Info", "消息被归类为 '其他'，无需生成 ToDo。", "Filtered"]]
+            else:
+                if text_for_todo:
+                    msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                    current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
+        else:
+            if text_for_todo:
+                msg_id_todo = f"hf_app_todo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}"
+                current_todos_list = generate_todolist_from_text(text_for_todo, msg_id_todo)
+        yield ch_history, current_todos_list
+    submit_btn.click(
+        handle_submit,
+        [msg, chatbot, system_msg, max_tokens_slider, temperature_slider, top_p_slider, audio_input, image_input],
+        [chatbot, todolist_df]
+    )
+    msg.submit(
+        handle_submit,
+        [msg, chatbot, system_msg, max_tokens_slider, temperature_slider, top_p_slider, audio_input, image_input],
+        [chatbot, todolist_df]
+    )
+    def clear_all():
+        """清除所有聊天记录和待办事项"""
+        return None, None, ""
+    clear_btn.click(clear_all, None, [chatbot, todolist_df, msg], queue=False)
+    #多模态标签也
+    with gr.Tab("Audio/Image Processing (Original)"):
+        gr.Markdown("## 处理音频和图片")
+        audio_processor = gr.Audio(label="上传音频", type="numpy")
+        image_processor = gr.Image(label="上传图片", type="numpy")
+        process_btn = gr.Button("处理", variant="primary")
+        audio_output = gr.Textbox(label="音频信息")
+        image_output = gr.Textbox(label="图片信息")
+        process_btn.click(
+            process,
+            inputs=[audio_processor, image_processor],
+            outputs=[audio_output, image_output]
+        )
+if __name__ == "__main__":
+    app.launch(debug=False)

audio_127.0.0.1.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4cca96c289e5acdfd9d8e926bb40674e170374878d57e4d3c3f5aca3039bec8
+size 1830956

image_127.0.0.1.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,4 +1,8 @@
-gradio
-requests
-pathlib
-python-dateutil

+gradio
+requests
+pathlib
+python-dateutil
+Pillow
+numpy
+wave
+azure-ai-inference

se_app.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+import os
+import numpy as np
+from scipy.io.wavfile import write as write_wav
+from PIL import Image
+from tools import audio_to_str, image_to_str  # 导入tools.py中的方法
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# 指定保存文件的相对路径
+SAVE_DIR = 'download'  # 相对路径
+os.makedirs(SAVE_DIR, exist_ok=True)  # 确保目录存在
+def get_client_ip(request: gr.Request, debug_mode=False):
+    """获取客户端真实IP地址"""
+    if request:
+        # 从请求头中获取真实IP（考虑代理情况）
+        x_forwarded_for = request.headers.get("x-forwarded-for", "")
+        if x_forwarded_for:
+            client_ip = x_forwarded_for.split(",")[0]
+        else:
+            client_ip = request.client.host
+        if debug_mode:
+            print(f"Debug: Client IP detected as {client_ip}")
+        return client_ip
+    return "unknown"
+def save_audio(audio, filename):
+    """保存音频为.wav文件"""
+    sample_rate, audio_data = audio
+    write_wav(filename, sample_rate, audio_data)
+def save_image(image, filename):
+    """保存图片为.jpg文件"""
+    img = Image.fromarray(image.astype('uint8'))
+    img.save(filename)
+def process(audio, image, text, request: gr.Request):
+    """处理语音、图片和文本的示例函数"""
+    client_ip = get_client_ip(request, True)
+    print(f"Processing request from IP: {client_ip}")
+    audio_info = "未收到音频"
+    image_info = "未收到图片"
+    text_info = "未收到文本"
+    audio_filename = None
+    image_filename = None
+    audio_text = ""
+    image_text = ""
+    if audio is not None:
+        sample_rate, audio_data = audio
+        audio_info = f"音频采样率: {sample_rate}Hz, 数据长度: {len(audio_data)}"
+        # 保存音频为.wav文件
+        audio_filename = os.path.join(SAVE_DIR, f"audio_{client_ip}.wav")
+        save_audio(audio, audio_filename)
+        print(f"Audio saved as {audio_filename}")
+        # 调用tools.py中的audio_to_str方法处理音频
+        audio_text = audio_to_str("33c1b63d", "40bf7cd82e31ace30a9cfb76309a43a3", "OTY1YzIyZWM3YTg0OWZiMGE2ZjA2ZmE4", audio_filename)
+        if audio_text:
+            print(f"Audio text: {audio_text}")
+        else:
+            print("Audio processing failed")
+    if image is not None:
+        image_info = f"图片尺寸: {image.shape}"
+        # 保存图片为.jpg文件
+        image_filename = os.path.join(SAVE_DIR, f"image_{client_ip}.jpg")
+        save_image(image, image_filename)
+        print(f"Image saved as {image_filename}")
+        # 调用tools.py中的image_to_str方法处理图片
+        image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=image_filename)
+        if image_text:
+            print(f"Image text: {image_text}")
+        else:
+            print("Image processing failed")
+    if text:
+        text_info = f"接收到文本: {text}"
+    return audio_info, image_info, text_info, audio_text, image_text
+# 创建自定义的聊天界面
+with gr.Blocks() as app:
+    gr.Markdown("# ToDoAgent Multi-Modal Interface")
+    # 创建两个标签页
+    with gr.Tab("Chat"):
+        # 修复Chatbot类型警告
+        chatbot = gr.Chatbot(height=500, type="messages")
+        msg = gr.Textbox(label="输入消息", placeholder="输入您的问题...")
+        # 上传区域
+        with gr.Row():
+            audio_input = gr.Audio(label="上传语音", type="numpy", sources=["upload", "microphone"])
+            image_input = gr.Image(label="上传图片", type="numpy")
+        # 设置区域
+        with gr.Accordion("高级设置", open=False):
+            system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="系统提示")
+            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度")
+            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="温度")
+            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
+        # 提交按钮
+        submit_btn = gr.Button("发送", variant="primary")
+        # 清除按钮
+        clear = gr.Button("清除聊天")
+        # 事件处理
+        def user(user_message, chat_history):
+            return "", chat_history + [{"role": "user", "content": user_message}]
+#新增多模态处理--1
+        def respond(message, chat_history, system_message, max_tokens, temperature, top_p, audio=None, image=None, text=None, request=None):
+            """生成响应的函数"""
+            # 处理多模态输入
+            multimodal_content = ""
+            if audio is not None:
+                try:
+                    audio_filename = os.path.join(SAVE_DIR, "temp_audio.wav")
+                    save_audio(audio, audio_filename)
+                    audio_text = audio_to_str("33c1b63d", "40bf7cd82e31ace30a9cfb76309a43a3", "OTY1YzIyZWM3YTg0OWZiMGE2ZjA2ZmE4", audio_filename)
+                    if audio_text:
+                        multimodal_content += f"音频内容: {audio_text}\n"
+                except Exception as e:
+                    print(f"Audio processing error: {e}")
+            if image is not None:
+                try:
+                    image_filename = os.path.join(SAVE_DIR, "temp_image.jpg")
+                    save_image(image, image_filename)
+                    image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=image_filename)
+                    if image_text:
+                        multimodal_content += f"图片内容: {image_text}\n"
+                except Exception as e:
+                    print(f"Image processing error: {e}")
+            # 组合最终消息
+            final_message = message
+            if multimodal_content:
+                final_message = f"{message}\n\n{multimodal_content}"
+            # 构建消息历史
+            messages = [{"role": "system", "content": system_message}]
+            for chat in chat_history:
+                if isinstance(chat, dict) and "role" in chat and "content" in chat:
+                    messages.append(chat)
+            messages.append({"role": "user", "content": final_message})
+            # 调用HuggingFace API
+            try:
+                response = client.chat_completion(
+                    messages,
+                    max_tokens=max_tokens,
+                    stream=True,
+                    temperature=temperature,
+                    top_p=top_p,
+                )
+                partial_message = ""
+                for token in response:
+                    if token.choices[0].delta.content is not None:
+                        partial_message += token.choices[0].delta.content
+                        yield partial_message
+            except Exception as e:
+                yield f"抱歉，生成响应时出现错误: {str(e)}"
+        def bot(chat_history, system_message, max_tokens, temperature, top_p, audio, image, text):
+            # 检查chat_history是否为空
+            if not chat_history or len(chat_history) == 0:
+                return
+            # 获取最后一条用户消息
+            last_message = chat_history[-1]
+            if not last_message or not isinstance(last_message, dict) or "content" not in last_message:
+                return
+            user_message = last_message["content"]
+            # 生成响应
+            bot_response = ""
+            for response in respond(
+                    user_message,
+                    chat_history[:-1],
+                    system_message,
+                    max_tokens,
+                    temperature,
+                    top_p,
+                    audio,
+                    image,
+                    text
+            ):
+                bot_response = response
+                # 添加助手回复到聊天历史
+                updated_history = chat_history + [{"role": "assistant", "content": bot_response}]
+                yield updated_history
+        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input, msg], chatbot
+        )
+        submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input, msg], chatbot
+        )
+        clear.click(lambda: None, None, chatbot, queue=False)
+    with gr.Tab("Audio/Image Processing"):
+        gr.Markdown("## 处理音频和图片")
+        audio_processor = gr.Audio(label="上传音频", type="numpy")
+        image_processor = gr.Image(label="上传图片", type="numpy")
+        text_input = gr.Textbox(label="输入文本")
+        process_btn = gr.Button("处理", variant="primary")
+        audio_output = gr.Textbox(label="音频信息")
+        image_output = gr.Textbox(label="图片信息")
+        text_output = gr.Textbox(label="文本信息")
+        audio_text_output = gr.Textbox(label="音频转文字结果")
+        image_text_output = gr.Textbox(label="图片转文字结果")
+        # 修改后的处理函数调用
+        process_btn.click(
+            process,
+            inputs=[audio_processor, image_processor, text_input],
+            outputs=[audio_output, image_output, text_output, audio_text_output, image_text_output]
+        )
+if __name__ == "__main__":
+    app.launch()

temp_audio.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a873051a6c784789c314ab829772eac2446271337f54d58db48e921e81ab71e
+size 710700

todogen_LLM_config.yaml CHANGED Viewed

@@ -38,4 +38,14 @@ HF_CONFIG_PATH:
 openai_filter:
   base_url_filter: https://aihubmix.com/v1
   api_key_filter: sk-BSNyITzJBSSgfFdJ792b66C7789c479cA7Ec1e36FfB343A1
-  model_filter: gpt-4o-mini

 openai_filter:
   base_url_filter: https://aihubmix.com/v1
   api_key_filter: sk-BSNyITzJBSSgfFdJ792b66C7789c479cA7Ec1e36FfB343A1
+  model_filter: gpt-4o-mini
+xunfei:
+    appid: 33c1b63d
+    apikey: 40bf7cd82e31ace30a9cfb76309a43a3
+    apisecret: OTY1YzIyZWM3YTg0OWZiMGE2ZjA2ZmE4
+azure_speech:
+    key: 45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ
+    region: eastus2
+    endpoint: https://eastus2.stt.speech.microsoft.com

tools.py ADDED Viewed

	@@ -0,0 +1,828 @@

+#!/usr/bin/python3
+# -*- coding:utf-8 -*-
+import os
+import datetime
+import re
+import time
+import traceback
+import math
+from urllib.parse import urlparse
+from urllib3 import encode_multipart_formdata
+from wsgiref.handlers import format_date_time
+from time import mktime
+import hashlib
+import base64
+import hmac
+from urllib.parse import urlencode
+import json
+import requests
+import azure.cognitiveservices.speech as speechsdk
+# 常量定义
+LFASR_HOST = "http://upload-ost-api.xfyun.cn/file"  # 文件上传Host
+API_INIT = "/mpupload/init"  # 初始化接口
+API_UPLOAD = "/upload"  # 上传接口
+API_CUT = "/mpupload/upload"  # 分片上传接口
+API_CUT_COMPLETE = "/mpupload/complete"  # 分片完成接口
+API_CUT_CANCEL = "/mpupload/cancel"  # 分片取消接口
+FILE_PIECE_SIZE = 5242880  # 文件分片大小5M
+PRO_CREATE_URI = "/v2/ost/pro_create"
+QUERY_URI = "/v2/ost/query"
+# 文件上传类
+class FileUploader:
+    def __init__(self, app_id, api_key, api_secret, upload_file_path):
+        self.app_id = app_id
+        self.api_key = api_key
+        self.api_secret = api_secret
+        self.upload_file_path = upload_file_path
+    def get_request_id(self):
+        """生成请求ID"""
+        return time.strftime("%Y%m%d%H%M")
+    def hashlib_256(self, data):
+        """计算 SHA256 哈希"""
+        m = hashlib.sha256(bytes(data.encode(encoding="utf-8"))).digest()
+        digest = "SHA-256=" + base64.b64encode(m).decode(encoding="utf-8")
+        return digest
+    def assemble_auth_header(self, request_url, file_data_type, method="", body=""):
+        """组装鉴权头部"""
+        u = urlparse(request_url)
+        host = u.hostname
+        path = u.path
+        now = datetime.datetime.now()
+        date = format_date_time(mktime(now.timetuple()))
+        digest = "SHA256=" + self.hashlib_256("")
+        signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1\ndigest: {}".format(
+            host, date, method, path, digest
+        )
+        signature_sha = hmac.new(
+            self.api_secret.encode("utf-8"),
+            signature_origin.encode("utf-8"),
+            digestmod=hashlib.sha256,
+        ).digest()
+        signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
+        authorization = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % (
+            self.api_key,
+            "hmac-sha256",
+            "host date request-line digest",
+            signature_sha,
+        )
+        headers = {
+            "host": host,
+            "date": date,
+            "authorization": authorization,
+            "digest": digest,
+            "content-type": file_data_type,
+        }
+        return headers
+    def call_api(self, url, file_data, file_data_type):
+        """调用POST API接口"""
+        headers = self.assemble_auth_header(
+            url, file_data_type, method="POST", body=file_data
+        )
+        try:
+            resp = requests.post(url, headers=headers, data=file_data, timeout=8)
+            print("上传状态：", resp.status_code, resp.text)
+            return resp.json()
+        except Exception as e:
+            print("上传失败！Exception ：%s" % e)
+            return None
+    def upload_cut_complete(self, upload_id):
+        """分块上传完成"""
+        body_dict = {
+            "app_id": self.app_id,
+            "request_id": self.get_request_id(),
+            "upload_id": upload_id,
+        }
+        file_data_type = "application/json"
+        url = LFASR_HOST + API_CUT_COMPLETE
+        response = self.call_api(url, json.dumps(body_dict), file_data_type)
+        if response and "data" in response and "url" in response["data"]:
+            file_url = response["data"]["url"]
+            print("任务上传结束")
+            return file_url
+        else:
+            print("分片上传完成失败", response)
+            return None
+    def upload_file(self):
+        """上传文件，根据文件大小选择分片或普通上传"""
+        file_total_size = os.path.getsize(self.upload_file_path)
+        if file_total_size < 31457280:  # 30MB
+            print("-----不使用分块上传-----")
+            return self.simple_upload()
+        else:
+            print("-----使用分块上传-----")
+            return self.multipart_upload()
+    def simple_upload(self):
+        """简单上传文件"""
+        try:
+            with open(self.upload_file_path, mode="rb") as f:
+                file = {
+                    "data": (self.upload_file_path, f.read()),
+                    "app_id": self.app_id,
+                    "request_id": self.get_request_id(),
+                }
+                encode_data = encode_multipart_formdata(file)
+                file_data = encode_data[0]
+                file_data_type = encode_data[1]
+            url = LFASR_HOST + API_UPLOAD
+            response = self.call_api(url, file_data, file_data_type)
+            if response and "data" in response and "url" in response["data"]:
+                return response["data"]["url"]
+            else:
+                print("简单上传失败", response)
+                return None
+        except FileNotFoundError:
+            print("文件未找到:", self.upload_file_path)
+            return None
+    def multipart_upload(self):
+        """分片上传文件"""
+        upload_id = self.prepare_upload()
+        if not upload_id:
+            return None
+        if not self.do_upload(upload_id):
+            return None
+        file_url = self.upload_cut_complete(upload_id)
+        print("分片上传地址：", file_url)
+        return file_url
+    def prepare_upload(self):
+        """预处理，获取upload_id"""
+        body_dict = {
+            "app_id": self.app_id,
+            "request_id": self.get_request_id(),
+        }
+        url = LFASR_HOST + API_INIT
+        file_data_type = "application/json"
+        response = self.call_api(url, json.dumps(body_dict), file_data_type)
+        if response and "data" in response and "upload_id" in response["data"]:
+            return response["data"]["upload_id"]
+        else:
+            print("预处理失败", response)
+            return None
+    def do_upload(self, upload_id):
+        """执行分片上传"""
+        file_total_size = os.path.getsize(self.upload_file_path)
+        chunk_size = FILE_PIECE_SIZE
+        chunks = math.ceil(file_total_size / chunk_size)
+        request_id = self.get_request_id()
+        slice_id = 1
+        print(
+            "文件：",
+            self.upload_file_path,
+            " 文件大小：",
+            file_total_size,
+            " 分块大小：",
+            chunk_size,
+            " 分块数：",
+            chunks,
+        )
+        with open(self.upload_file_path, mode="rb") as content:
+            while slice_id <= chunks:
+                current_size = min(
+                    chunk_size, file_total_size - (slice_id - 1) * chunk_size
+                )
+                file = {
+                    "data": (self.upload_file_path, content.read(current_size)),
+                    "app_id": self.app_id,
+                    "request_id": request_id,
+                    "upload_id": upload_id,
+                    "slice_id": slice_id,
+                }
+                encode_data = encode_multipart_formdata(file)
+                file_data = encode_data[0]
+                file_data_type = encode_data[1]
+                url = LFASR_HOST + API_CUT
+                resp = self.call_api(url, file_data, file_data_type)
+                count = 0
+                while not resp and (count < 3):
+                    print("上传重试")
+                    resp = self.call_api(url, file_data, file_data_type)
+                    count = count + 1
+                    time.sleep(1)
+                if not resp:
+                    print("分片上传失败")
+                    return False
+                slice_id += 1
+        return True
+class ResultExtractor:
+    def __init__(self, appid, apikey, apisecret):
+        # POST 请求相关参数
+        self.Host = "ost-api.xfyun.cn"
+        self.RequestUriCreate = PRO_CREATE_URI
+        self.RequestUriQuery = QUERY_URI
+        # 设置 URL
+        if re.match(r"^\d", self.Host):
+            self.urlCreate = "http://" + self.Host + self.RequestUriCreate
+            self.urlQuery = "http://" + self.Host + self.RequestUriQuery
+        else:
+            self.urlCreate = "https://" + self.Host + self.RequestUriCreate
+            self.urlQuery = "https://" + self.Host + self.RequestUriQuery
+        self.HttpMethod = "POST"
+        self.APPID = appid
+        self.Algorithm = "hmac-sha256"
+        self.HttpProto = "HTTP/1.1"
+        self.UserName = apikey
+        self.Secret = apisecret
+        # 设置当前时间
+        cur_time_utc = datetime.datetime.now(datetime.timezone.utc)
+        self.Date = self.httpdate(cur_time_utc)
+        # 设置测试音频文件参数
+        self.BusinessArgsCreate = {
+            "language": "zh_cn",
+            "accent": "mandarin",
+            "domain": "pro_ost_ed",
+        }
+    def img_read(self, path):
+        with open(path, "rb") as fo:
+            return fo.read()
+    def hashlib_256(self, res):
+        m = hashlib.sha256(bytes(res.encode(encoding="utf-8"))).digest()
+        result = "SHA-256=" + base64.b64encode(m).decode(encoding="utf-8")
+        return result
+    def httpdate(self, dt):
+        weekday = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()]
+        month = [
+            "Jan",
+            "Feb",
+            "Mar",
+            "Apr",
+            "May",
+            "Jun",
+            "Jul",
+            "Aug",
+            "Sep",
+            "Oct",
+            "Nov",
+            "Dec",
+        ][dt.month - 1]
+        return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
+            weekday,
+            dt.day,
+            month,
+            dt.year,
+            dt.hour,
+            dt.minute,
+            dt.second,
+        )
+    def generateSignature(self, digest, uri):
+        signature_str = "host: " + self.Host + "\n"
+        signature_str += "date: " + self.Date + "\n"
+        signature_str += self.HttpMethod + " " + uri + " " + self.HttpProto + "\n"
+        signature_str += "digest: " + digest
+        signature = hmac.new(
+            bytes(self.Secret.encode("utf-8")),
+            bytes(signature_str.encode("utf-8")),
+            digestmod=hashlib.sha256,
+        ).digest()
+        result = base64.b64encode(signature)
+        return result.decode(encoding="utf-8")
+    def init_header(self, data, uri):
+        digest = self.hashlib_256(data)
+        sign = self.generateSignature(digest, uri)
+        auth_header = (
+            'api_key="%s",algorithm="%s", '
+            'headers="host date request-line digest", '
+            'signature="%s"' % (self.UserName, self.Algorithm, sign)
+        )
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "Method": "POST",
+            "Host": self.Host,
+            "Date": self.Date,
+            "Digest": digest,
+            "Authorization": auth_header,
+        }
+        return headers
+    def get_create_body(self, fileurl):
+        post_data = {
+            "common": {"app_id": self.APPID},
+            "business": self.BusinessArgsCreate,
+            "data": {"audio_src": "http", "audio_url": fileurl, "encoding": "raw"},
+        }
+        body = json.dumps(post_data)
+        return body
+    def get_query_body(self, task_id):
+        post_data = {
+            "common": {"app_id": self.APPID},
+            "business": {
+                "task_id": task_id,
+            },
+        }
+        body = json.dumps(post_data)
+        return body
+    def call(self, url, body, headers):
+        try:
+            response = requests.post(url, data=body, headers=headers, timeout=8)
+            status_code = response.status_code
+            if status_code != 200:
+                info = response.content
+                return info
+            else:
+                try:
+                    return json.loads(response.text)
+                except json.JSONDecodeError:
+                    return response.text
+        except Exception as e:
+            print("Exception ：%s" % e)
+            return None
+    def task_create(self, fileurl):
+        body = self.get_create_body(fileurl)
+        headers_create = self.init_header(body, self.RequestUriCreate)
+        return self.call(self.urlCreate, body, headers_create)
+    def task_query(self, task_id):
+        query_body = self.get_query_body(task_id)
+        headers_query = self.init_header(query_body, self.RequestUriQuery)
+        return self.call(self.urlQuery, query_body, headers_query)
+    def extract_text(self, result):
+        """
+        从API响应中提取文本内容
+        支持多种结果格式，增强错误处理
+        """
+        # 调试输出：打印原始结果类型
+        print(f"\n[DEBUG] extract_text 输入类型: {type(result)}")
+        # 如果是字符串，尝试解析为JSON
+        if isinstance(result, str):
+            print(f"[DEBUG] 字符串内容 (前200字符): {result[:200]}")
+            try:
+                result = json.loads(result)
+                print("[DEBUG] 成功解析字符串为JSON对象")
+            except json.JSONDecodeError:
+                print("[DEBUG] 无法解析为JSON，返回原始字符串")
+                return result
+        # 处理字典类型的结果
+        if isinstance(result, dict):
+            print("[DEBUG] 处理字典类型结果")
+            # 1. 检查错误信息
+            if "code" in result and result["code"] != 0:
+                error_msg = result.get("message", "未知错误")
+                print(
+                    f"[ERROR] API返回错误: code={result['code']}, message={error_msg}"
+                )
+                return f"错误: {error_msg}"
+            # 2. 检查直接包含文本结果的情况
+            if "result" in result and isinstance(result["result"], str):
+                print("[DEBUG] 找到直接结果字段")
+                return result["result"]
+            # 3. 检查lattice结构（详细结果）
+            if "lattice" in result and isinstance(result["lattice"], list):
+                print("[DEBUG] 解析lattice结构")
+                text_parts = []
+                for lattice in result["lattice"]:
+                    if not isinstance(lattice, dict):
+                        continue
+                    # 获取json_1best内容
+                    json_1best = lattice.get("json_1best", {})
+                    if not json_1best or not isinstance(json_1best, dict):
+                        continue
+                    # 处理st字段 - 修正：st可能是字典或列表
+                    st_content = json_1best.get("st")
+                    st_list = []
+                    if isinstance(st_content, dict):
+                        st_list = [st_content]  # 转为列表统一处理
+                    elif isinstance(st_content, list):
+                        st_list = st_content
+                    for st in st_list:
+                        if isinstance(st, str):
+                            # 直接是字符串结果
+                            text_parts.append(st)
+                        elif isinstance(st, dict):
+                            # 处理字典结构的st
+                            rt = st.get("rt", [])
+                            if not isinstance(rt, list):
+                                continue
+                            for item in rt:
+                                if isinstance(item, dict):
+                                    ws_list = item.get("ws", [])
+                                    if isinstance(ws_list, list):
+                                        for ws in ws_list:
+                                            if isinstance(ws, dict):
+                                                cw_list = ws.get("cw", [])
+                                                if isinstance(cw_list, list):
+                                                    for cw in cw_list:
+                                                        if isinstance(cw, dict):
+                                                            w = cw.get("w", "")
+                                                            if w:
+                                                                text_parts.append(w)
+                return "".join(text_parts)
+            # 4. 检查简化结构（直接包含st）
+            if "st" in result and isinstance(result["st"], list):
+                print("[DEBUG] 解析st结构")
+                text_parts = []
+                for st in result["st"]:
+                    if isinstance(st, str):
+                        text_parts.append(st)
+                    elif isinstance(st, dict):
+                        rt = st.get("rt", [])
+                        if isinstance(rt, list):
+                            for item in rt:
+                                if isinstance(item, dict):
+                                    ws_list = item.get("ws", [])
+                                    if isinstance(ws_list, list):
+                                        for ws in ws_list:
+                                            if isinstance(ws, dict):
+                                                cw_list = ws.get("cw", [])
+                                                if isinstance(cw_list, list):
+                                                    for cw in cw_list:
+                                                        if isinstance(cw, dict):
+                                                            w = cw.get("w", "")
+                                                            if w:
+                                                                text_parts.append(w)
+                return "".join(text_parts)
+            # 5. 其他未知结构
+            print("[WARNING] 无法识别的结果结构")
+            return json.dumps(result, indent=2, ensure_ascii=False)
+        # 6. 非字典类型结果
+        print(f"[WARNING] 非字典类型结果: {type(result)}")
+        return str(result)
+def audio_to_str(appid, apikey, apisecret, file_path):
+    """
+    调用讯飞开放平台接口，获取音频文件的转写结果。
+    参数：
+    appid (str): 讯飞开放平台的appid。
+    apikey (str): 讯飞开放平台的apikey。
+    apisecret (str): 讯飞开放平台的apisecret。
+    file_path (str): 音频文件路径。
+    返回值：
+    str: 转写结果文本，如果发生错误则返回None。
+    """
+    # 检查文件是否存在
+    if not os.path.exists(file_path):
+        print(f"错误：文件 {file_path} 不存在")
+        return None
+    try:
+        # 1. 文件上传
+        file_uploader = FileUploader(
+            app_id=appid,
+            api_key=apikey,
+            api_secret=apisecret,
+            upload_file_path=file_path,
+        )
+        fileurl = file_uploader.upload_file()
+        if not fileurl:
+            print("文件上传失败")
+            return None
+        print("文件上传成功，fileurl:", fileurl)
+        # 2. 创建任务并查询结果
+        result_extractor = ResultExtractor(appid, apikey, apisecret)
+        print("\n------ 创建任务 -------")
+        create_response = result_extractor.task_create(fileurl)
+        # 调试输出创建响应
+        print(
+            f"[DEBUG] 创建任务响应: {json.dumps(create_response, indent=2, ensure_ascii=False)}"
+        )
+        if not isinstance(create_response, dict) or "data" not in create_response:
+            print("创建任务失败:", create_response)
+            return None
+        task_id = create_response["data"]["task_id"]
+        print(f"任务ID: {task_id}")
+        # 查询任务
+        print("\n------ 查询任务 -------")
+        print("任务转写中······")
+        max_attempts = 30
+        attempt = 0
+        while attempt < max_attempts:
+            result = result_extractor.task_query(task_id)
+            # 调试输出查询响应
+            print(f"\n[QUERY {attempt + 1}] 响应类型: {type(result)}")
+            if isinstance(result, dict):
+                print(
+                    f"[QUERY {attempt + 1}] 响应内容: {json.dumps(result, indent=2, ensure_ascii=False)}"
+                )
+            else:
+                print(
+                    f"[QUERY {attempt + 1}] 响应内容 (前200字符): {str(result)[:200]}"
+                )
+            # 检查响应是否有效
+            if not isinstance(result, dict):
+                print(f"无效响应类型: {type(result)}")
+                return None
+            # 检查API错误码
+            if "code" in result and result["code"] != 0:
+                error_msg = result.get("message", "未知错误")
+                print(f"API错误: code={result['code']}, message={error_msg}")
+                return None
+            # 获取任务状态
+            task_data = result.get("data", {})
+            task_status = task_data.get("task_status")
+            if not task_status:
+                print("响应中缺少任务状态字段")
+                print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
+                return None
+            # 处理不同状态
+            if task_status in ["3", "4"]:  # 任务已完成或回调完成
+                print("转写完成···")
+                # 提取结果
+                result_content = task_data.get("result")
+                if result_content is not None:
+                    try:
+                        result_text = result_extractor.extract_text(result_content)
+                        print("\n转写结果：\n", result_text)
+                        return result_text
+                    except Exception as e:
+                        print(f"\n提取文本时出错: {str(e)}")
+                        print(f"错误详情:\n{traceback.format_exc()}")
+                        print(
+                            "原始结果内容:",
+                            json.dumps(result_content, indent=2, ensure_ascii=False),
+                        )
+                        return None
+                else:
+                    print("\n响应中缺少结果字段")
+                    print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
+                    return None
+            elif task_status in ["1", "2"]:  # 任务待处理或处理中
+                print(
+                    f"任务状态：{task_status}，等待中... (尝试 {attempt + 1}/{max_attempts})"
+                )
+                time.sleep(5)
+                attempt += 1
+            else:
+                print(f"未知任务状态：{task_status}")
+                print("完整响应:", json.dumps(result, indent=2, ensure_ascii=False))
+                return None
+        else:
+            print(f"超过最大查询次数({max_attempts})，任务可能仍在处理中")
+            return None
+    except Exception as e:
+        print(f"发生异常: {str(e)}")
+        print(f"错误详情:\n{traceback.format_exc()}")
+        return None
+"""
+1、通用文字识别,图像数据base64编码后大小不得超过10M
+2、appid、apiSecret、apiKey请到讯飞开放平台控制台获取并填写到此demo中
+3、支持中英文,支持手写和印刷文字。
+4、在倾斜文字上效果有提升，同时支持部分生僻字的识别
+"""
+# 图像识别接口地址
+URL = "https://api.xf-yun.com/v1/private/sf8e6aca1"
+class AssembleHeaderException(Exception):
+    def __init__(self, msg):
+        self.message = msg
+class Url:
+    def __init__(self, host, path, schema):
+        self.host = host
+        self.path = path
+        self.schema = schema
+        pass
+# calculate sha256 and encode to base64
+def sha256base64(data):
+    sha256 = hashlib.sha256()
+    sha256.update(data)
+    digest = base64.b64encode(sha256.digest()).decode(encoding="utf-8")
+    return digest
+def parse_url(requset_url):
+    stidx = requset_url.index("://")
+    host = requset_url[stidx + 3 :]
+    schema = requset_url[: stidx + 3]
+    edidx = host.index("/")
+    if edidx <= 0:
+        raise AssembleHeaderException("invalid request url:" + requset_url)
+    path = host[edidx:]
+    host = host[:edidx]
+    u = Url(host, path, schema)
+    return u
+# build websocket auth request url
+def assemble_ws_auth_url(requset_url, method="POST", api_key="", api_secret=""):
+    u = parse_url(requset_url)
+    host = u.host
+    path = u.path
+    now = datetime.datetime.now()
+    date = format_date_time(mktime(now.timetuple()))
+    # print(date) # 可选：打印Date值
+    signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(
+        host, date, method, path
+    )
+    # print(signature_origin)  # 可选：打印签名原文
+    signature_sha = hmac.new(
+        api_secret.encode("utf-8"),
+        signature_origin.encode("utf-8"),
+        digestmod=hashlib.sha256,
+    ).digest()
+    signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
+    authorization_origin = (
+        'api_key="%s", algorithm="%s", headers="%s", signature="%s"'
+        % (api_key, "hmac-sha256", "host date request-line", signature_sha)
+    )
+    authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(
+        encoding="utf-8"
+    )
+    # print(authorization_origin) # 可选：打印鉴权原文
+    values = {"host": host, "date": date, "authorization": authorization}
+    return requset_url + "?" + urlencode(values)
+def image_to_str(endpoint=None, key=None, unused_param=None, file_path=None):
+    """
+    调用Azure Computer Vision API识别图片中的文字。
+    参数：
+    endpoint (str): Azure Computer Vision endpoint URL。
+    key (str): Azure Computer Vision API key。
+    unused_param (str): 未使用的参数，保持兼容性。
+    file_path (str): 图片文件路径。
+    返回值：
+    str: 图片中的文字识别结果，如果发生错误则返回None。
+    """
+    # 默认配置
+    if endpoint is None:
+        endpoint = "https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/"
+    if key is None:
+        key = "45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ"
+    try:
+        # 读取图片文件
+        with open(file_path, "rb") as f:
+            image_data = f.read()
+        # 构造请求URL
+        analyze_url = endpoint.rstrip('/') + "/vision/v3.2/read/analyze"
+        # 设置请求头
+        headers = {
+            'Ocp-Apim-Subscription-Key': key,
+            'Content-Type': 'application/octet-stream'
+        }
+        # 发送POST请求开始分析
+        response = requests.post(analyze_url, headers=headers, data=image_data)
+        if response.status_code != 202:
+            print(f"分析请求失败: {response.status_code}, {response.text}")
+            return None
+        # 获取操作位置
+        operation_url = response.headers["Operation-Location"]
+        # 轮询结果
+        import time
+        while True:
+            result_response = requests.get(operation_url, headers={'Ocp-Apim-Subscription-Key': key})
+            result = result_response.json()
+            if result["status"] == "succeeded":
+                # 提取文字
+                text_results = []
+                if "analyzeResult" in result and "readResults" in result["analyzeResult"]:
+                    for read_result in result["analyzeResult"]["readResults"]:
+                        for line in read_result["lines"]:
+                            text_results.append(line["text"])
+                return " ".join(text_results) if text_results else ""
+            elif result["status"] == "failed":
+                print(f"文字识别失败: {result}")
+                return None
+            # 等待1秒后重试
+            time.sleep(1)
+    except Exception as e:
+        print(f"发生异常: {e}")
+        return None
+if __name__ == "__main__":
+    # 输入讯飞开放平台的 appid，secret、key 和文件路径
+    appid = "33c1b63d"
+    apikey = "40bf7cd82e31ace30a9cfb76309a43a3"
+    apisecret = "OTY1YzIyZWM3YTg0OWZiMGE2ZjA2ZmE4"
+    audio_path = r"audio_sample_little.wav"  # 确保文件路径正确
+    image_path = r"1.png"  # 确保文件路径正确
+    # 音频转文字
+    audio_text = audio_to_str(appid, apikey, apisecret, audio_path)
+    # 图片转文字
+    image_text = image_to_str(endpoint="https://ai-siyuwang5414995ai361208251338.cognitiveservices.azure.com/", key="45PYY2Av9CdMCveAjVG43MGKrnHzSxdiFTK9mWBgrOsMAHavxKj0JQQJ99BDACHYHv6XJ3w3AAAAACOGeVpQ", unused_param=None, file_path=image_path)
+    print("-"* 20)
+    print("\n音频转文字结果:", audio_text)
+    print("\n图片转文字结果:", image_text)
+def azure_speech_to_text(speech_key, speech_region, audio_file_path):
+    """
+    使用Azure Speech服务将音频文件转换为文本。
+    参数：
+    speech_key (str): Azure Speech服务的API密钥。
+    speech_region (str): Azure Speech服务的区域。
+    audio_file_path (str): 音频文件路径。
+    返回值：
+    str: 转换后的文本，如果发生错误则返回None。
+    """
+    try:
+        # 设置语音配置
+        speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_region)
+        speech_config.speech_recognition_language = "zh-CN"  # 设置为中文
+        # 设置音频配置
+        audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
+        # 创建语音识别器
+        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
+        # 执行语音识别
+        result = speech_recognizer.recognize_once()
+        # 检查识别结果
+        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
+            print(f"Azure Speech识别成功: {result.text}")
+            return result.text
+        elif result.reason == speechsdk.ResultReason.NoMatch:
+            print("Azure Speech未识别到语音")
+            return None
+        elif result.reason == speechsdk.ResultReason.Canceled:
+            cancellation_details = result.cancellation_details
+            print(f"Azure Speech识别被取消: {cancellation_details.reason}")
+            if cancellation_details.reason == speechsdk.CancellationReason.Error:
+                print(f"错误详情: {cancellation_details.error_details}")
+            return None
+    except Exception as e:
+        print(f"Azure Speech识别出错: {str(e)}")
+        return None