Siyu Wang commited on
Commit
2b2a944
·
1 Parent(s): 1f172f7

Moved from https://huggingface.co/spaces/siyuwang541/ToDoAgent for the hackthon

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. LICENSE +201 -0
  3. LLM/.DS_Store +0 -0
  4. LLM/Database/FalseNegative_samples.json +1 -0
  5. LLM/Database/FalsePositive_samples.json +0 -0
  6. LLM/Database/Manual_filtering_samples.json +190 -0
  7. LLM/Database/Messages.json +0 -0
  8. LLM/Database/README.md +8 -0
  9. LLM/Database/TrueNegative_samples.json +197 -0
  10. LLM/Database/TruePositive_samples.json +270 -0
  11. LLM/Database/classify_samples.py +222 -0
  12. LLM/Database/negative_samples.json +1 -0
  13. LLM/Database/positive_samples.json +0 -0
  14. LLM/Database/update_Messagejson.py +63 -0
  15. LLM/Notify/NotifyReadme.md +50 -0
  16. LLM/Notify/__pycache__/dataBaseConnecter.cpython-312.pyc +0 -0
  17. LLM/Notify/compareDb2txt.py +130 -0
  18. LLM/Notify/dataBaseConnecter.py +128 -0
  19. LLM/Notify/db2txt.py +88 -0
  20. LLM/Notify/notifyMain.py +88 -0
  21. LLM/Notify/usrSpareTime.py +148 -0
  22. LLM/filter_message/README.MD +18 -0
  23. LLM/filter_message/__pycache__/libs.cpython-312.pyc +0 -0
  24. LLM/filter_message/libs.py +217 -0
  25. LLM/filter_message/main.py +124 -0
  26. LLM/filter_message/prompt.md +96 -0
  27. LLM/filter_message/prompt.txt +90 -0
  28. LLM/filter_message/requirements.txt +4 -0
  29. LLM/orchestrator.py +95 -0
  30. LLM/requirements.txt +12 -0
  31. LLM/todogen_LLM/FalsePositive_few_shot.txt +31 -0
  32. LLM/todogen_LLM/TruePositive_few_shot.txt +59 -0
  33. LLM/todogen_LLM/__pycache__/compare_data.cpython-312.pyc +0 -0
  34. LLM/todogen_LLM/__pycache__/config_loader.cpython-312.pyc +0 -0
  35. LLM/todogen_LLM/__pycache__/database_of_messages.cpython-312.pyc +0 -0
  36. LLM/todogen_LLM/__pycache__/export_todolist.cpython-312.pyc +0 -0
  37. LLM/todogen_LLM/__pycache__/filter_message_list.cpython-312.pyc +0 -0
  38. LLM/todogen_LLM/__pycache__/filter_useful_data_to_dict.cpython-312.pyc +0 -0
  39. LLM/todogen_LLM/__pycache__/logger_config.cpython-312.pyc +0 -0
  40. LLM/todogen_LLM/__pycache__/path_validator.cpython-312.pyc +0 -0
  41. LLM/todogen_LLM/__pycache__/receiving_useful_messages.cpython-312.pyc +0 -0
  42. LLM/todogen_LLM/__pycache__/todogen_llm.cpython-312.pyc +0 -0
  43. LLM/todogen_LLM/compare_data.py +209 -0
  44. LLM/todogen_LLM/config_loader.py +34 -0
  45. LLM/todogen_LLM/database_of_messages.py +221 -0
  46. LLM/todogen_LLM/export_todolist.py +59 -0
  47. LLM/todogen_LLM/filter_message_list.py +49 -0
  48. LLM/todogen_LLM/filter_useful_data_to_dict.py +163 -0
  49. LLM/todogen_LLM/jiaoben.py +318 -0
  50. LLM/todogen_LLM/logger_config.py +69 -0
.DS_Store ADDED
Binary file (8.2 kB). View file
 
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
LLM/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LLM/Database/FalseNegative_samples.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
LLM/Database/FalsePositive_samples.json ADDED
File without changes
LLM/Database/Manual_filtering_samples.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "sender": "ASAP Sample",
4
+ "content": "王斯煜[表情]Vince 黑客松nv: @AlisaGG @LJK86 明天中午再对 WANG Siyu邀请你加入飞书视频会议\n会议主题:FilterLLM ToDoGenLLM PR Merge讨论\n会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)\n会议 ID:656 445 907\n会议链接:https://...",
5
+ "app_name": "com.tencent.mm",
6
+ "message_id": 331001504,
7
+ "user_id": 67619984,
8
+ "date": "2025-03-31T00:15:21",
9
+ "classification": "TruePositive"
10
+ },
11
+ {
12
+ "sender": "ASAP Sample",
13
+ "content": "[6条]王斯煜[表情]Vince 黑客松nv: Your Mint 3-Month, Unlimited plan expires in 2 days. Log into your account at my.mintmobile.com or via our app to make a payment and keep your Mint se...",
14
+ "app_name": "com.tencent.mm",
15
+ "message_id": 331002905,
16
+ "user_id": 67619984,
17
+ "date": "2025-03-31T00:29:45",
18
+ "classification": "TruePositive"
19
+ },
20
+ {
21
+ "sender": "ASAP Sample",
22
+ "content": "[4条]斯煜[表情]Vince: 【美团月付】您2月账单805.57元待还,最后还款日为本月22号,查账或立即还款点 dpurl.cn/HXFUxEba",
23
+ "app_name": "com.tencent.mm",
24
+ "message_id": 331002907,
25
+ "user_id": 74797059,
26
+ "date": "2025-03-31T00:29:45",
27
+ "classification": "TruePositive"
28
+ },
29
+ {
30
+ "sender": "ASAP Sample",
31
+ "content": "[4条]斯煜[表情]Vince: 【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。",
32
+ "app_name": "com.tencent.mm",
33
+ "message_id": 331002908,
34
+ "user_id": 74797059,
35
+ "date": "2025-03-31T00:29:45",
36
+ "classification": "TruePositive"
37
+ },
38
+ {
39
+ "sender": "ASAP Sample",
40
+ "content": "[6条]斯煜[表情]Vince: 尊敬的*斯煜,您在我行办理的1笔个人贷款需于2024年12月08日17:00前还款,当期还款金额本息合计999999.84元,请您留意尾号0455的账户可用余额是否充足,避免因贷款逾期影响个人征信。具体贷款信息可通过工行网上银行、手机银行或致电贷款经办行查询。【工商银行】",
41
+ "app_name": "com.tencent.mm",
42
+ "message_id": 331002909,
43
+ "user_id": 74797059,
44
+ "date": "2025-03-31T00:29:45",
45
+ "classification": "TruePositive"
46
+ },
47
+ {
48
+ "sender": "ASAP Sample",
49
+ "content": "[6条]斯煜[表情]Vince: Your Mint 3-Month, Unlimited plan expires in 2 days. Log into your account at my.mintmobile.com or via our app to make a payment and keep your Mint se...",
50
+ "app_name": "com.tencent.mm",
51
+ "message_id": 331002910,
52
+ "user_id": 74797059,
53
+ "date": "2025-03-31T00:29:45",
54
+ "classification": "TruePositive"
55
+ },
56
+ {
57
+ "sender": "ASAP Sample",
58
+ "content": "王斯煜[表情]Vince 黑客松nv: WANG Siyu邀请你加入飞书视频会议\n会议主题:FilterLLM ToDoGenLLM PR Merge讨论\n会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)\n会议 ID:656 445 907\n会议链接:https://vc.feishu.cn/j/65644590...",
59
+ "app_name": "com.tencent.mm",
60
+ "message_id": 331115637,
61
+ "user_id": 67619984,
62
+ "date": "2025-03-31T11:56:36",
63
+ "classification": "TruePositive"
64
+ },
65
+ {
66
+ "sender": "ASAP Sample",
67
+ "content": "AlisaGG陈G老师1010: 【驿收发】您的邮政包裹已到凯丰花园2栋驿站,请23:00前凭5-5-6530来取,详询18320926368",
68
+ "app_name": "com.tencent.mm",
69
+ "message_id": 331133944,
70
+ "user_id": 67619984,
71
+ "date": "2025-03-31T13:39:49",
72
+ "classification": "TruePositive"
73
+ },
74
+ {
75
+ "sender": "ASAP Sample",
76
+ "content": "[2条]AlisaGG: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。",
77
+ "app_name": "com.tencent.mm",
78
+ "message_id": 331134011,
79
+ "user_id": 74797059,
80
+ "date": "2025-03-31T13:40:54"
81
+ },
82
+ {
83
+ "sender": "ASAP Sample",
84
+ "content": "[2条]AlisaGG陈G老师1010: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。",
85
+ "app_name": "com.tencent.mm",
86
+ "message_id": 331134045,
87
+ "user_id": 67619984,
88
+ "date": "2025-03-31T13:40:54",
89
+ "classification": "TruePositive"
90
+ },
91
+ {
92
+ "sender": "ASAP Sample",
93
+ "content": "[3条]AlisaGG陈G老师1010: 取件通知\n取件码: 11724837\n运单号: 464285154986072\n取件地址: 深圳新一代产业园P2出入口内侧4号丰巢柜\n配送人员: 18124519013\n计费规则: 查看详情",
94
+ "app_name": "com.tencent.mm",
95
+ "message_id": 331134346,
96
+ "user_id": 67619984,
97
+ "date": "2025-03-31T13:43:17",
98
+ "classification": "TruePositive"
99
+ },
100
+ {
101
+ "sender": "ASAP Sample",
102
+ "content": "[4条]AlisaGG陈G老师1010: 取件再次提醒\n取件码:: 27696201\n配送公司:: 申通快递\n运单号:: 777293635831671\n配送���手机:: 13392809673\n取件地址:: 深圳新一代产业园P2出入口内侧4号丰巢柜",
103
+ "app_name": "com.tencent.mm",
104
+ "message_id": 331134447,
105
+ "user_id": 67619984,
106
+ "date": "2025-03-31T13:44:02",
107
+ "classification": "TruePositive"
108
+ },
109
+ {
110
+ "sender": "ASAP Sample",
111
+ "content": "[5条]李JK老师-1228: 【菜鸟驿站】请凭140-3-1019到菜鸟驿站取件,查询详情u.cainiao.com/53h4bSr7zrh",
112
+ "app_name": "com.tencent.mm",
113
+ "message_id": 331135751,
114
+ "user_id": 67619984,
115
+ "date": "2025-03-31T13:57:28",
116
+ "classification": "TruePositive"
117
+ },
118
+ {
119
+ "sender": "ASAP Sample",
120
+ "content": "刻刻: 【停机前提醒】尊敬的移动客户,您好!您的账户余额不足被限制使用。现提醒您需充值缴费至少32.49元,以确保您继续享受畅通的通信服务。诚邀您一键办理自动充服务,自动充值缴费更轻松:https://dx.10086.cn/7WyRLA。 心级服务、让爱连接【中国移动】",
121
+ "app_name": "com.tencent.mm",
122
+ "message_id": 331162269,
123
+ "user_id": 67619984,
124
+ "date": "2025-03-31T16:22:21",
125
+ "classification": "TruePositive"
126
+ },
127
+ {
128
+ "sender": "ASAP Sample",
129
+ "content": "[3条]LJK86: 您的号卡已在配送途中,物流单号SF3147624215612,点击:http://t.hn.189.cn/EZv6bazD,查询物流进度。如已签收请在有效期内先实名激活。客服热线4008155555【中国电信】",
130
+ "app_name": "com.tencent.mm",
131
+ "message_id": 330225703,
132
+ "user_id": 74797059,
133
+ "date": "2025-03-30T22:57:51",
134
+ "classification": "TruePositive"
135
+ },
136
+ {
137
+ "sender": "106875230196298038",
138
+ "content": "【京东快递】取件码C8042,您的快件尾号8980已送达成都东软学院配送营业点,地址:四川成都市都江堰市青城山镇东软大道1号东软学院电信营业厅旁 ,联系电话13032837084",
139
+ "app_name": "SMS",
140
+ "message_id": 328233028,
141
+ "user_id": 33642157,
142
+ "date": "2025-03-28T23:30:29",
143
+ "classification": "TruePositive"
144
+ },
145
+ {
146
+ "sender": "1068837016151",
147
+ "content": "【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。",
148
+ "app_name": "SMS",
149
+ "message_id": 322223721,
150
+ "user_id": 67619984,
151
+ "date": "2025-03-22T22:37:50",
152
+ "classification": "TruePositive"
153
+ },
154
+ {
155
+ "sender": "ASAP Azure ToDoAgent",
156
+ "content": "陈格(alisagege.chen): WANG Siyu邀请你加入飞书视频会议\n会议主题:FilterLLM ToDoGenLLM PR Merge讨论\n会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)\n会议 ID:656 445 907\n会议链接:https://vc.feishu.cn/j/656445907\n\n手机拨号一键入会\n+862122504720,,656445907(中国大陆)\n4008208888,,656445907(中国大陆)\n\n根据所在地拨打号...",
157
+ "app_name": "com.ss.android.lark",
158
+ "message_id": 331150106,
159
+ "user_id": 33642157,
160
+ "date": "2025-03-31T15:01:36",
161
+ "classification": "TruePositive"
162
+ },
163
+ {
164
+ "sender": "106917190196",
165
+ "content": "【广州自来水】(水费通知)尊敬的0001914043用户(大沙头三马路9号大院8号604)水量6(202503行度83),应缴水费11.88元,污水费5.7元。了解更多用水资讯可关注“广州自来水96968”微信公众号。",
166
+ "app_name": "SMS",
167
+ "message_id": 319155403,
168
+ "user_id": 67619984,
169
+ "date": "2025-03-19T15:54:16",
170
+ "classification": "TruePositive"
171
+ },
172
+ {
173
+ "sender": "ASAP Sample",
174
+ "content": "AlisaGG陈G老师1010: 【招商银行】您本月办理分期还款可享限时2.3折优惠!打开掌上生活APP,搜“分期还款”立享折扣优惠!资格实时测评,拒收请回复R",
175
+ "app_name": "com.tencent.mm",
176
+ "message_id": 330231000,
177
+ "user_id": 67619984,
178
+ "date": "2025-03-30T23:10:59",
179
+ "classification": "TrueNegative"
180
+ },
181
+ {
182
+ "sender": "ASAP Sample",
183
+ "content": "[4条]AlisaGG: 【招商银行】您本月办理分期还款可享限时2.3折优惠!打开掌上生活APP,搜“分期还款”立享折扣优惠!资格实时测评,拒收请回复R",
184
+ "app_name": "com.tencent.mm",
185
+ "message_id": 330231005,
186
+ "user_id": 74797059,
187
+ "date": "2025-03-30T23:11:00",
188
+ "classification": "TrueNegative"
189
+ },
190
+ ]
LLM/Database/Messages.json ADDED
The diff for this file is too large to render. See raw diff
 
LLM/Database/README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ## Messages.json
2
+ 原始数据
3
+ ## Manual_filtering_samples.json
4
+ 手工筛的
5
+ ## negative_samples.json
6
+ ai辅助筛的负样本
7
+ ## positive_samples.json
8
+ ai辅助筛的正样本
LLM/Database/TrueNegative_samples.json ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "sender": "ASAP Sample",
4
+ "content": "AlisaGG陈G老师1010: 【招商银行】您本月办理分期还款可享限时2.3折优惠!打开掌上生活APP,搜“分期还款”立享折扣优惠!资格实时测评,拒收请回复R",
5
+ "app_name": "com.tencent.mm",
6
+ "message_id": 330231000,
7
+ "user_id": 67619984,
8
+ "date": "2025-03-30T23:10:59",
9
+ "classification": "TrueNegative"
10
+ },
11
+ {
12
+ "sender": "ASAP Sample",
13
+ "content": "[4条]AlisaGG: 【招商银行】您本月办理分期还款可享限时2.3折优惠!打开掌上生活APP,搜“分期还款”立享折扣优惠!资格实时测评,拒收请回复R",
14
+ "app_name": "com.tencent.mm",
15
+ "message_id": 330231005,
16
+ "user_id": 74797059,
17
+ "date": "2025-03-30T23:11:00",
18
+ "classification": "TrueNegative"
19
+ },
20
+ {
21
+ "sender": "ASAP Azure ToDoAgent",
22
+ "content": "WANG Siyu: [图片]",
23
+ "app_name": "com.ss.android.lark",
24
+ "message_id": 401101707,
25
+ "user_id": 67619984,
26
+ "date": "2025-04-01T10:17:31"
27
+ },
28
+ {
29
+ "sender": "ASAP Azure ToDoAgent",
30
+ "content": "WANG Siyu: 我真的尝试给我们做一个公共邮箱 outlook🤮",
31
+ "app_name": "com.ss.android.lark",
32
+ "message_id": 401101708,
33
+ "user_id": 67619984,
34
+ "date": "2025-04-01T10:17:48"
35
+ },
36
+ {
37
+ "sender": "1065812627",
38
+ "content": "【流量领取提醒】尊敬的用户,您已获得10GB免费通用流量,将于2025年4月30日到期,点击:https://c.139.com/m/a/s?p=GDCM 或到“移动云盘App-我的-领奖专区-优惠卡券”领取,数量有限,领完即止。拒收请回复R【中国移动云盘】",
39
+ "app_name": "SMS",
40
+ "message_id": 401101418,
41
+ "user_id": 74797059,
42
+ "date": "2025-04-01T10:14:54"
43
+ },
44
+ {
45
+ "sender": "106575329994",
46
+ "content": "【霸王茶姬】好茶一口鲜,唤醒好状态,送你「整单8折+双杯78券」3ct.cc/e1mwQaSz 仅限广东区域部分门店核销 拒收请回复R",
47
+ "app_name": "SMS",
48
+ "message_id": 401100317,
49
+ "user_id": 74797059,
50
+ "date": "2025-04-01T10:03:40"
51
+ },
52
+ {
53
+ "sender": "10086",
54
+ "content": "尊敬的客户,根据您的充值历史记录赠送6.00元,您总共还有待赠送金额60.00元,其中下月将到帐6.00元,当前可用余额73.51元,感谢您对我公司的支持。登录“中国移动”APP足不出户查办业务,尊享充值优惠,立即前往: https://dx.10086.cn/A/kHXkEQ 。【中国移动】",
55
+ "app_name": "SMS",
56
+ "message_id": 401090015,
57
+ "user_id": 74797059,
58
+ "date": "2025-04-01T09:00:45"
59
+ },
60
+ {
61
+ "sender": "106980095533",
62
+ "content": "【建设银行】建行送福利啦!支付1分钱,有机会抽取2-188元微信立减金,100%中奖!手机银行APP首页搜索进入“建粤有礼”,点击“月月有礼”即可参与活动。如已参与,请忽略本条短信,详询广州建行各网点大堂经理。拒收请回复R",
63
+ "app_name": "SMS",
64
+ "message_id": 329111118,
65
+ "user_id": 67619984,
66
+ "date": "2025-03-29T11:11:51"
67
+ },
68
+ {
69
+ "sender": "1068518895595",
70
+ "content": "【光大银行】尊敬的光大信用卡受邀客户!欢迎参与4月积分福利大放送活动。2025年4月1日至30日累计消费满200元额外获赠1千积分,累计消费满800元再赠4千积分,累计消费满3千元叠加获赠1万积分!最高奖励1.5万积分,有效期1年。详情可咨询95595 。*拒收请回复R",
71
+ "app_name": "SMS",
72
+ "message_id": 328124936,
73
+ "user_id": 67619984,
74
+ "date": "2025-03-28T12:49:56"
75
+ },
76
+ {
77
+ "sender": "备忘录",
78
+ "content": "[4条]M丶D: 【开心麻花】开心麻花推出“降温补贴票”!《贼想得到你》新开场次买一送一,优惠限时到气温恢复15°C!抢票戳:kxmh.net/3Eo9。拒收请回复R",
79
+ "app_name": "com.tencent.mm",
80
+ "message_id": 328111725,
81
+ "user_id": 67619984,
82
+ "date": "2025-03-28T11:17:56"
83
+ },
84
+ {
85
+ "sender": "10086109",
86
+ "content": "春风送暖绿意浓,森林防火莫放松!清明祭祀倡新风,鲜花追思情更重;踏青旅游守规章,莫留火种在山中。严防森林火灾,永葆绿色家园。(市规划资源局)",
87
+ "app_name": "SMS",
88
+ "message_id": 328105821,
89
+ "user_id": 67619984,
90
+ "date": "2025-03-28T10:58:47"
91
+ },
92
+ {
93
+ "sender": "10680175780000628",
94
+ "content": "【养生课】派送中:通知您数次了,您的免费中医养生课程已经送到,请及时通过 b.i7o.cn/0B7YCY1 获取,拒收请回复R",
95
+ "app_name": "SMS",
96
+ "message_id": 328100301,
97
+ "user_id": 67619984,
98
+ "date": "2025-03-28T10:03:20"
99
+ },
100
+ {
101
+ "sender": "ASAP Azure ToDoAgent",
102
+ "content": "WANG Siyu: Message_id: 1, content:“ 【中国联通】出境漫游低至19.5元/天,订购天数越多越优惠,覆盖全球106个国家和地区,一次订购畅游全球!即日起至12月31日,订购国漫畅游流量3天流量包及以上天数产品,即可获赠权益六选一通兑券一张,打车、外卖、美食代金券等你来拿。订购方式点击https://u.10010.cn/uAbdw 或发送KTGM10至10010,根据回复提示进行订购,更多出境漫游流量产品请点击https://u.10010.cn...",
103
+ "app_name": "com.ss.android.lark",
104
+ "message_id": 327121407,
105
+ "user_id": 33642157,
106
+ "date": "2025-03-27T12:14:17"
107
+ },
108
+ {
109
+ "sender": "10086",
110
+ "content": "【防诈温馨提醒】尊敬的客户,惠州移动提醒您:“不明来电及时挂”、“可疑链接不点击”,请您对各类调研评分、评价有奖等陌生来电谨慎接听,谨防电信网络诈骗。您可回复KTFSR办理高频骚扰电话拦截服务,该服务无需收费,办理后如需取消请发送QXFSR到10086。点击 https://dx.10086.cn/A/tx04HA 进入“中国移动”APP服务大厅,可清晰查阅已办理业务,诚邀体验。心级服务、让爱连接,我们期待您的“10分”满意。【中国移动】",
111
+ "app_name": "SMS",
112
+ "message_id": 327111797,
113
+ "user_id": 74797059,
114
+ "date": "2025-03-27T11:17:29"
115
+ },
116
+ {
117
+ "sender": "🌳广州新地标!云山天地",
118
+ "content": "白云山南门旁,美食购物一站式,负一层还有儿童乐园,逛完白云山来歇歇脚吧!",
119
+ "app_name": "com.dianping.v1",
120
+ "message_id": 327085555,
121
+ "user_id": 67619984,
122
+ "date": "2025-03-27T08:55:56"
123
+ },
124
+ {
125
+ "sender": "四川一小区疑高空抛物砸死快递员",
126
+ "content": "点击查看详情>>>",
127
+ "app_name": "com.sina.weibo",
128
+ "message_id": 327083854,
129
+ "user_id": 67619984,
130
+ "date": "2025-03-27T08:38:49"
131
+ },
132
+ {
133
+ "sender": "男子为发明永动机出租屋连杀5人",
134
+ "content": "男子为发明永动机,想通过杀人锻炼胆量,在出租屋内连杀5人,被判处死刑,目前案件正在二审审理期间。",
135
+ "app_name": "com.ss.android.ugc.aweme",
136
+ "message_id": 327083851,
137
+ "user_id": 67619984,
138
+ "date": "2025-03-27T08:38:49"
139
+ },
140
+ {
141
+ "sender": "福利红包天天抢,最高666元",
142
+ "content": "机会难得,不要错过>>",
143
+ "app_name": "com.taobao.taobao",
144
+ "message_id": 327083853,
145
+ "user_id": 67619984,
146
+ "date": "2025-03-27T08:38:49"
147
+ },
148
+ {
149
+ "sender": "106980095533",
150
+ "content": "【建设银行】您账户8699于3月27日8时3分向微信支付-羊城通缴费支出人民币3.5元,可用余额5240.93元。",
151
+ "app_name": "SMS",
152
+ "message_id": 327080344,
153
+ "user_id": 67619984,
154
+ "date": "2025-03-27T08:03:48"
155
+ },
156
+ {
157
+ "sender": "恭喜,你被喜马选中做有声书副业!",
158
+ "content": "根据收听记录,你被认定适合喜马的声音副业,下班后用1-2个小时录制时薪书、分成书上传就可能获得收获!",
159
+ "app_name": "com.ximalaya.ting.android",
160
+ "message_id": 326202922,
161
+ "user_id": 67619984,
162
+ "date": "2025-03-26T20:29:01"
163
+ },
164
+ {
165
+ "sender": "备忘录",
166
+ "content": "[14条]M丶D: [文件] WEMEC METABOTS R60(1).pdf",
167
+ "app_name": "com.tencent.mm",
168
+ "message_id": 326183714,
169
+ "user_id": 67619984,
170
+ "date": "2025-03-26T18:37:48"
171
+ },
172
+ {
173
+ "sender": "你的兴趣商品",
174
+ "content": "JETSON ORIN NX Super 开发套件 AI 智能模组orin nx 主板限时优惠发放中!",
175
+ "app_name": "com.taobao.taobao",
176
+ "message_id": 326190015,
177
+ "user_id": 67619984,
178
+ "date": "2025-03-26T19:00:09"
179
+ },
180
+ {
181
+ "sender": "恭喜您被666元红包选中啦!",
182
+ "content": "好友***已成功领取,抓紧参与吧!",
183
+ "app_name": "com.taobao.taobao",
184
+ "message_id": 326190116,
185
+ "user_id": 67619984,
186
+ "date": "2025-03-26T19:01:14"
187
+ },
188
+ {
189
+ "sender": "106980095533",
190
+ "content": "【建设银行】您账户8699于3月26日19时8分向微信支付-羊城通缴费支出人民币1元,可用余额5263.83元。",
191
+ "app_name": "SMS",
192
+ "message_id": 326190917,
193
+ "user_id": 67619984,
194
+ "date": "2025-03-26T19:09:07"
195
+ },
196
+
197
+ ]
LLM/Database/TruePositive_samples.json ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "sender": "ASAP Sample",
4
+ "content": "王斯煜[表情]Vince 黑客松nv: @AlisaGG @LJK86 明天中午再对 WANG Siyu邀请你加入飞书视频会议\n会议主题:FilterLLM ToDoGenLLM PR Merge讨论\n会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)\n会议 ID:656 445 907\n会议链接:https://...",
5
+ "app_name": "com.tencent.mm",
6
+ "message_id": 331001504,
7
+ "user_id": 67619984,
8
+ "date": "2025-03-31T00:15:21",
9
+ "classification": "TruePositive"
10
+ },
11
+ {
12
+ "sender": "ASAP Sample",
13
+ "content": "[6条]王斯煜[表情]Vince 黑客松nv: Your Mint 3-Month, Unlimited plan expires in 2 days. Log into your account at my.mintmobile.com or via our app to make a payment and keep your Mint se...",
14
+ "app_name": "com.tencent.mm",
15
+ "message_id": 331002905,
16
+ "user_id": 67619984,
17
+ "date": "2025-03-31T00:29:45",
18
+ "classification": "TruePositive"
19
+ },
20
+ {
21
+ "sender": "ASAP Sample",
22
+ "content": "[4条]斯煜[表情]Vince: 【美团月付】您2月账单805.57元待还,最后还款日为本月22号,查账或立即还款点 dpurl.cn/HXFUxEba",
23
+ "app_name": "com.tencent.mm",
24
+ "message_id": 331002907,
25
+ "user_id": 74797059,
26
+ "date": "2025-03-31T00:29:45",
27
+ "classification": "TruePositive"
28
+ },
29
+ {
30
+ "sender": "ASAP Sample",
31
+ "content": "[4条]斯煜[表情]Vince: 【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。",
32
+ "app_name": "com.tencent.mm",
33
+ "message_id": 331002908,
34
+ "user_id": 74797059,
35
+ "date": "2025-03-31T00:29:45",
36
+ "classification": "TruePositive"
37
+ },
38
+ {
39
+ "sender": "ASAP Sample",
40
+ "content": "[6条]斯煜[表情]Vince: 尊敬的*斯煜,您在我行办理的1笔个人贷款需于2024年12月08日17:00前还款,当期还款金额本息合计999999.84元,请您留意尾号0455的账户可用余额是否充足,避免因贷款逾期影响个人征信。具体贷款信息可通过工行网上银行、手机银行或致电贷款经办行查询。【工商银行】",
41
+ "app_name": "com.tencent.mm",
42
+ "message_id": 331002909,
43
+ "user_id": 74797059,
44
+ "date": "2025-03-31T00:29:45",
45
+ "classification": "TruePositive"
46
+ },
47
+ {
48
+ "sender": "ASAP Sample",
49
+ "content": "[6条]斯煜[表情]Vince: Your Mint 3-Month, Unlimited plan expires in 2 days. Log into your account at my.mintmobile.com or via our app to make a payment and keep your Mint se...",
50
+ "app_name": "com.tencent.mm",
51
+ "message_id": 331002910,
52
+ "user_id": 74797059,
53
+ "date": "2025-03-31T00:29:45",
54
+ "classification": "TruePositive"
55
+ },
56
+ {
57
+ "sender": "ASAP Sample",
58
+ "content": "王斯煜[表情]Vince 黑客松nv: WANG Siyu邀请你加入飞书视频会议\n会议主题:FilterLLM ToDoGenLLM PR Merge讨论\n会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)\n会议 ID:656 445 907\n会议链接:https://vc.feishu.cn/j/65644590...",
59
+ "app_name": "com.tencent.mm",
60
+ "message_id": 331115637,
61
+ "user_id": 67619984,
62
+ "date": "2025-03-31T11:56:36",
63
+ "classification": "TruePositive"
64
+ },
65
+ {
66
+ "sender": "ASAP Sample",
67
+ "content": "AlisaGG陈G老师1010: 【驿收发】您的邮政包裹已到凯丰花园2栋驿站,请23:00前凭5-5-6530来取,详询18320926368",
68
+ "app_name": "com.tencent.mm",
69
+ "message_id": 331133944,
70
+ "user_id": 67619984,
71
+ "date": "2025-03-31T13:39:49",
72
+ "classification": "TruePositive"
73
+ },
74
+ {
75
+ "sender": "ASAP Sample",
76
+ "content": "[2条]AlisaGG: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。",
77
+ "app_name": "com.tencent.mm",
78
+ "message_id": 331134011,
79
+ "user_id": 74797059,
80
+ "date": "2025-03-31T13:40:54"
81
+ },
82
+ {
83
+ "sender": "ASAP Sample",
84
+ "content": "[2条]AlisaGG陈G老师1010: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。",
85
+ "app_name": "com.tencent.mm",
86
+ "message_id": 331134045,
87
+ "user_id": 67619984,
88
+ "date": "2025-03-31T13:40:54",
89
+ "classification": "TruePositive"
90
+ },
91
+ {
92
+ "sender": "ASAP Sample",
93
+ "content": "[3条]AlisaGG陈G老师1010: 取件通知\n取件码: 11724837\n运单号: 464285154986072\n取件地址: 深圳新一代产业园P2出入口内侧4号丰巢柜\n配送人员: 18124519013\n计费规则: 查看详情",
94
+ "app_name": "com.tencent.mm",
95
+ "message_id": 331134346,
96
+ "user_id": 67619984,
97
+ "date": "2025-03-31T13:43:17",
98
+ "classification": "TruePositive"
99
+ },
100
+ {
101
+ "sender": "ASAP Sample",
102
+ "content": "[4条]AlisaGG陈G老师1010: 取件再次提醒\n取件码:: 27696201\n配送公司:: 申通快递\n运单号:: 777293635831671\n配送���手机:: 13392809673\n取件地址:: 深圳新一代产业园P2出入口内侧4号丰巢柜",
103
+ "app_name": "com.tencent.mm",
104
+ "message_id": 331134447,
105
+ "user_id": 67619984,
106
+ "date": "2025-03-31T13:44:02",
107
+ "classification": "TruePositive"
108
+ },
109
+ {
110
+ "sender": "ASAP Sample",
111
+ "content": "[5条]李JK老师-1228: 【菜鸟驿站】请凭140-3-1019到菜鸟驿站取件,查询详情u.cainiao.com/53h4bSr7zrh",
112
+ "app_name": "com.tencent.mm",
113
+ "message_id": 331135751,
114
+ "user_id": 67619984,
115
+ "date": "2025-03-31T13:57:28",
116
+ "classification": "TruePositive"
117
+ },
118
+ {
119
+ "sender": "ASAP Sample",
120
+ "content": "刻刻: 【停机前提醒】尊敬的移动客户,您好!您的账户余额不足被限制使用。现提醒您需充值缴费至少32.49元,以确保您继续享受畅通的通信服务。诚邀您一键办理自动充服务,自动充值缴费更轻松:https://dx.10086.cn/7WyRLA。 心级服务、让爱连接【中国移动】",
121
+ "app_name": "com.tencent.mm",
122
+ "message_id": 331162269,
123
+ "user_id": 67619984,
124
+ "date": "2025-03-31T16:22:21",
125
+ "classification": "TruePositive"
126
+ },
127
+ {
128
+ "sender": "ASAP Sample",
129
+ "content": "[3条]LJK86: 您的号卡已在配送途中,物流单号SF3147624215612,点击:http://t.hn.189.cn/EZv6bazD,查询物流进度。如已签收请在有效期内先实名激活。客服热线4008155555【中国电信】",
130
+ "app_name": "com.tencent.mm",
131
+ "message_id": 330225703,
132
+ "user_id": 74797059,
133
+ "date": "2025-03-30T22:57:51",
134
+ "classification": "TruePositive"
135
+ },
136
+ {
137
+ "sender": "106875230196298038",
138
+ "content": "【京东快递】取件码C8042,您的快件尾号8980已送达成都东软学院配送营业点,地址:四川成都市都江堰市青城山镇东软大道1号东软学院电信营业厅旁 ,联系电话13032837084",
139
+ "app_name": "SMS",
140
+ "message_id": 328233028,
141
+ "user_id": 33642157,
142
+ "date": "2025-03-28T23:30:29",
143
+ "classification": "TruePositive"
144
+ },
145
+ {
146
+ "sender": "1068837016151",
147
+ "content": "【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。",
148
+ "app_name": "SMS",
149
+ "message_id": 322223721,
150
+ "user_id": 67619984,
151
+ "date": "2025-03-22T22:37:50",
152
+ "classification": "TruePositive"
153
+ },
154
+ {
155
+ "sender": "ASAP Azure ToDoAgent",
156
+ "content": "陈格(alisagege.chen): WANG Siyu邀请你加入飞书视频会议\n会议主题:FilterLLM ToDoGenLLM PR Merge讨论\n会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)\n会议 ID:656 445 907\n会议链接:https://vc.feishu.cn/j/656445907\n\n手机拨号一键入会\n+862122504720,,656445907(中国大陆)\n4008208888,,656445907(中国大陆)\n\n根据所在地拨打号...",
157
+ "app_name": "com.ss.android.lark",
158
+ "message_id": 331150106,
159
+ "user_id": 33642157,
160
+ "date": "2025-03-31T15:01:36",
161
+ "classification": "TruePositive"
162
+ },
163
+ {
164
+ "sender": "106917190196",
165
+ "content": "【广州自来水】(水费通知)尊敬的0001914043用户(大沙头三马路9号大院8号604)水量6(202503行度83),应缴水费11.88元,污水费5.7元。了解更多用水资讯可关注“广州自来水96968”微信公众号。",
166
+ "app_name": "SMS",
167
+ "message_id": 319155403,
168
+ "user_id": 67619984,
169
+ "date": "2025-03-19T15:54:16",
170
+ "classification": "TruePositive"
171
+ },
172
+ {
173
+ "sender": "ASAP Sample",
174
+ "content": "AlisaGG: 【小象超市】您的商品已放置在门口,因有易碎等商品请尽快取回,如有疑问请联系15794935204 。祝您生活愉快!",
175
+ "app_name": "com.tencent.mm",
176
+ "message_id": 331232914,
177
+ "user_id": 74797059,
178
+ "date": "2025-03-31T23:29:47"
179
+ },
180
+ {
181
+ "sender": "ASAP Sample",
182
+ "content": "AlisaGG陈G老师1010: 【小象超市】您的商品已放置在门口,因有易碎等商品请尽快取回,如有疑问请联系15794935204 。祝您生活愉快!",
183
+ "app_name": "com.tencent.mm",
184
+ "message_id": 331232986,
185
+ "user_id": 67619984,
186
+ "date": "2025-03-31T23:29:46"
187
+ },
188
+ {
189
+ "sender": "ASAP Sample",
190
+ "content": "[3条]AlisaGG: 取件通知\n取件码: 11724837\n运单号: 464285154986072\n取件地址: 深圳新一代产业园P2出入口内侧4号丰巢柜\n配送人员: 18124519013\n计费规则: 查看详情",
191
+ "app_name": "com.tencent.mm",
192
+ "message_id": 331150113,
193
+ "user_id": 33642157,
194
+ "date": "2025-03-31T15:01:37"
195
+ },
196
+ {
197
+ "sender": "ASAP Sample",
198
+ "content": "[4条]AlisaGG: 取件再次提醒\n取件码:: 27696201\n配送公司:: 申通快递\n运单号:: 777293635831671\n配送员手机:: 13392809673\n取件地址:: 深圳新一代产业园P2出入口内侧4号丰巢柜",
199
+ "app_name": "com.tencent.mm",
200
+ "message_id": 331150114,
201
+ "user_id": 33642157,
202
+ "date": "2025-03-31T15:01:37"
203
+ },
204
+ {
205
+ "sender": "ASAP Sample",
206
+ "content": "AlisaGG: 【驿收发】您的邮政包裹已到凯丰花园2栋驿站,请23:00前凭5-5-6530来取,详询18320926368",
207
+ "app_name": "com.tencent.mm",
208
+ "message_id": 331150111,
209
+ "user_id": 33642157,
210
+ "date": "2025-03-31T15:01:36"
211
+ },
212
+ {
213
+ "sender": "ASAP Sample",
214
+ "content": "[2条]AlisaGG: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。",
215
+ "app_name": "com.tencent.mm",
216
+ "message_id": 331150112,
217
+ "user_id": 33642157,
218
+ "date": "2025-03-31T15:01:37"
219
+ },
220
+ {
221
+ "sender": "ASAP Sample",
222
+ "content": "斯煜[表情]Vince: WANG Siyu邀请你加入飞书视频会议\n会议主题:FilterLLM ToDoGenLLM PR Merge讨论\n会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)\n会议 ID:656 445 907\n会议链接:https://vc.feishu.cn/j/65644590...",
223
+ "app_name": "com.tencent.mm",
224
+ "message_id": 331150102,
225
+ "user_id": 33642157,
226
+ "date": "2025-03-31T15:01:37"
227
+ },
228
+ {
229
+ "sender": "#SoSIM",
230
+ "content": "[SoSIM提示] 未來30日自動續購服務收費 (04/2025)\r\n\r\n為確保你現有的服務不會中斷,根據系統紀錄,以下是你未來30日已開啟自動續購的服務收費總額,只供參考。如果此額多於「現有儲值額」,建議立即增值。\r\n\r\n\r\n儲值卡號碼:96334767\r\n\r\n未來30日續購服務總額 : $55.0\r\n\r\n現有儲值額 : $26.65\r\n\r\n注意:個別服務 (包括內地副號及附屬卡) 一旦到期而未有足夠餘額自動續購,有關號碼將即時失效而不可復原。\r\n\r\n請於賬戶主頁 > 設定 內瀏覽有關服務組合有效期及詳情。\r\n\r\n\r\n***如果你的「自動增值」金額不少於所需服務收費,請忽略此訊息***",
231
+ "app_name": "SMS",
232
+ "message_id": 401100763,
233
+ "user_id": 96989258,
234
+ "date": "2025-04-01T10:07:29"
235
+ },
236
+ {
237
+ "sender": "106875230196298038",
238
+ "content": "【京东快递】取件码C8042,您的快件尾号8980已送达成都东软学院配送营业点,地址:四川成都市都江堰市青城山镇东软大道1号东软学院电信营业厅旁 ,联系电话13032837084",
239
+ "app_name": "SMS",
240
+ "message_id": 328233028,
241
+ "user_id": 33642157,
242
+ "date": "2025-03-28T23:30:29"
243
+ },
244
+ {
245
+ "sender": "您购买的宝贝已送达自提柜",
246
+ "content": "包裹待签收,点击查看详情>>",
247
+ "app_name": "com.taobao.taobao",
248
+ "message_id": 327135963,
249
+ "user_id": 67619984,
250
+ "date": "2025-03-27T13:59:30"
251
+ },
252
+ {
253
+ "sender": "您购买的宝贝正在派送中",
254
+ "content": "【搓澡巾洗澡神器男女款】预计送往【智能柜】,手机号已加密,享号码保护服务,点击查看预计送达时间>>",
255
+ "app_name": "com.taobao.taobao",
256
+ "message_id": 327091156,
257
+ "user_id": 67619984,
258
+ "date": "2025-03-27T09:11:37"
259
+ },
260
+ {
261
+ "sender": "#SoSIM",
262
+ "content": "[SoSIM提示] 30日/無限影視數據組合 (由2025年1月7日起續購收費$55/30日) 用量提示:\r\n儲值卡號碼:96334767\r\n累積用量:30 GB \r\n公平使用數據用量上限:已用50% (截至:27/03/2025 06:53:27)\r\n如已達到 / 超過公平使用數據用量100%,數據傳輸速度將會即時被限制至高達128kbps,直至 25/04/2025 19:42:52 (香港時間)。\r\n立即於賬戶主頁 > 本地服務 https://sosimhk.com/SoSim/main/tc/topup 購買追加數據以保持流暢的上網速度。\r\n",
263
+ "app_name": "SMS",
264
+ "message_id": 327065312,
265
+ "user_id": 96989258,
266
+ "date": "2025-03-27T06:53:42"
267
+ },
268
+
269
+
270
+ ]
LLM/Database/classify_samples.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import csv
3
+ import os
4
+ import pandas as pd
5
+ import openai
6
+ import time
7
+ import requests
8
+ from dotenv import load_dotenv
9
+ from tqdm import tqdm
10
+
11
+ # 加载环境变量(如果有.env文件)
12
+ load_dotenv()
13
+
14
+ # 配置SiliconFlow API
15
+ SILICONFLOW_API_KEY = os.getenv("SILICONFLOW_API_KEY", "sk-ypjvmantsostdxrkirhidrtswohjpmlzuhyqojpudbreakwk")
16
+ SILICONFLOW_API_BASE = os.getenv("SILICONFLOW_API_BASE", "https://api.siliconflow.cn/v1")
17
+
18
+ # 保留OpenAI API配置(作为备选)
19
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your_openai_api_key_here")
20
+ openai.api_key = OPENAI_API_KEY
21
+
22
+ # 可以配置为Azure OpenAI
23
+ #AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "")
24
+ #AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "")
25
+ #AZURE_DEPLOYMENT_NAME = os.getenv("AZURE_DEPLOYMENT_NAME", "")
26
+
27
+ # 获取Azure配置参数
28
+ AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "")
29
+ AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "")
30
+ AZURE_DEPLOYMENT_NAME = os.getenv("AZURE_DEPLOYMENT_NAME", "")
31
+
32
+ # 如果有Azure OpenAI配置,则使用Azure OpenAI
33
+ if AZURE_OPENAI_ENDPOINT.strip() and AZURE_OPENAI_API_KEY.strip() and AZURE_DEPLOYMENT_NAME.strip():
34
+ openai.api_type = "azure"
35
+ openai.api_base = AZURE_OPENAI_ENDPOINT
36
+ openai.api_key = AZURE_OPENAI_API_KEY
37
+ openai.api_version = "2023-05-15" # 可能需要根据实际情况调整
38
+
39
+ # 定义TruePositive的标准(根据mvp三类案例)
40
+ def define_positive_sample_criteria():
41
+ """
42
+ 定义TruePositive的标准
43
+ 根据搜索结果,TruePositive被定义为"mvp三类案例",但没有找到具体定义
44
+ 这里我们定义一些可能的标准,实际使用时可以根据需求调整
45
+ """
46
+ return """
47
+ 请判断以下消息是否属于TruePositive。TruePositive定义为与任务管理、待办事项、提醒、通知筛选相关的有用信息,具体包括:
48
+ 1. 包含明确的任务、待办事项或需要完成的工作
49
+ 2. 包含时间安排、截止日期或日程提醒
50
+ 3. 包含项目进展、状态更新或工作报告
51
+
52
+ 如果消息符合以上任一条件,则为TruePositive;否则为TrueNegative。
53
+ 请只回答"TruePositive"或"TrueNegative"。
54
+ """
55
+
56
+ # 使用大模型API进行分类
57
+ def classify_with_llm(message, criteria, max_retries=3, retry_delay=2):
58
+ """
59
+ 使用大模型API对消息进行分类
60
+
61
+ Args:
62
+ message: 要分类的消息内容
63
+ criteria: 分类标准
64
+ max_retries: 最大重试次数
65
+ retry_delay: 重试延迟(秒)
66
+
67
+ Returns:
68
+ str: "TruePositive" 或 "TrueNegative"
69
+ """
70
+ prompt = f"{criteria}\n\n消息内容: {message}"
71
+ system_message = "你是一个专业的数据分类助手,根据给定标准判断消息是TruePositive还是TrueNegative。"
72
+
73
+ for attempt in range(max_retries):
74
+ try:
75
+ # 使用SiliconFlow API
76
+ headers = {
77
+ "Content-Type": "application/json",
78
+ "Authorization": f"Bearer {SILICONFLOW_API_KEY}"
79
+ }
80
+
81
+ payload = {
82
+ "model": "deepseek-ai/DeepSeek-V3",
83
+ "messages": [
84
+ {"role": "system", "content": system_message},
85
+ {"role": "user", "content": prompt}
86
+ ],
87
+ "stream": False,
88
+ "max_tokens": 512,
89
+ "temperature": 0.1,
90
+ "top_p": 0.7,
91
+ "top_k": 50,
92
+ "frequency_penalty": 0.5,
93
+ "n": 1
94
+ }
95
+
96
+ response = requests.post(
97
+ f"{SILICONFLOW_API_BASE}/chat/completions",
98
+ headers=headers,
99
+ json=payload
100
+ )
101
+
102
+ # 检查响应状态
103
+ response.raise_for_status()
104
+ response_data = response.json()
105
+
106
+ # 解析响应
107
+ result = response_data["choices"][0]["message"]["content"].strip()
108
+
109
+ # 标准化结果
110
+ if "TruePositive" in result:
111
+ return "TruePositive"
112
+ else:
113
+ return "TrueNegative"
114
+
115
+ except Exception as e:
116
+ if attempt < max_retries - 1:
117
+ print(f"API调用失败,{retry_delay}秒后重试: {e}\n响应状态码: {response.status_code if 'response' in locals() else 'N/A'}\n响应内容: {response.text if 'response' in locals() else 'N/A'}")
118
+ time.sleep(retry_delay)
119
+ else:
120
+ print(f"API调用失败,达到最大重试次数: {e}\n最后响应状态码: {response.status_code if 'response' in locals() else 'N/A'}\n最后响应内容: {response.text if 'response' in locals() else 'N/A'}")
121
+ return "分类失败" # 返回一个默认值
122
+
123
+ # 批量处理消息
124
+ def batch_process_messages(messages, batch_size=10, delay=1):
125
+ """
126
+ 批量处理消息以避免API限制
127
+
128
+ Args:
129
+ messages: 消息列表
130
+ batch_size: 每批处理的消息数量
131
+ delay: 批次间延迟(秒)
132
+
133
+ Returns:
134
+ list: 处理结果列表
135
+ """
136
+ results = []
137
+ criteria = define_positive_sample_criteria()
138
+
139
+ for i in tqdm(range(0, len(messages), batch_size), desc="处理批次"):
140
+ batch = messages[i:i+batch_size]
141
+ batch_results = []
142
+
143
+ for msg in tqdm(batch, desc="处理消息", leave=False):
144
+ # 只处理有实际内容的消息
145
+ if msg.get("content") and len(msg["content"]) > 5: # 忽略过短的消息
146
+ classification = classify_with_llm(msg["content"], criteria)
147
+ msg["classification"] = classification
148
+ else:
149
+ msg["classification"] = "TrueNegative" # 默认短消息为TrueNegative
150
+
151
+ batch_results.append(msg)
152
+
153
+ results.extend(batch_results)
154
+
155
+ if i + batch_size < len(messages):
156
+ time.sleep(delay) # 批次间延迟
157
+
158
+ return results
159
+
160
+ # 主函数
161
+ def main():
162
+ # 检查API密钥是否配置
163
+ if SILICONFLOW_API_KEY == "":
164
+ print("警告: 未设置SiliconFlow API密钥。请设置环境变量SILICONFLOW_API_KEY或在代码中直接设置。")
165
+ return
166
+
167
+ # 确定输入文件
168
+ input_file = "Messages.json" # 默认使用JSON格式
169
+ if not os.path.exists(input_file):
170
+ print(f"错误: 找不到JSON输入文件 {input_file}")
171
+ return
172
+
173
+ print(f"使用输入文件: {input_file}")
174
+
175
+ # 读取数据
176
+ messages = []
177
+ if input_file.endswith(".json"):
178
+ with open(input_file, "r", encoding="utf-8") as f:
179
+ messages = json.load(f)
180
+
181
+
182
+ print(f"读取了 {len(messages)} 条消息")
183
+
184
+ # 询问用户是否要处理所有消息或仅处理一部分样本
185
+ sample_size = input("请输入要处理的消息数量(输入'all'处理所有消息,或输入一个数字如'100'处理部分消息): ")
186
+
187
+ if sample_size.lower() != "all":
188
+ try:
189
+ sample_size = int(sample_size)
190
+ if sample_size < len(messages):
191
+ print(f"将处理 {sample_size} 条消息作为样本")
192
+ messages = messages[:sample_size]
193
+ else:
194
+ print(f"样本大小大于等于总消息数,将处理所有 {len(messages)} 条消息")
195
+ except ValueError:
196
+ print("无效输入,将处理所有消息")
197
+
198
+ # 批量处理消息
199
+ print("开始处理消息...")
200
+ classified_messages = batch_process_messages(messages)
201
+
202
+ # 分离TruePositive from TrueNegative
203
+ positive_samples = [msg for msg in classified_messages if msg.get("classification") == "TruePositive"]
204
+ negative_samples = [msg for msg in classified_messages if msg.get("classification") == "TrueNegative"]
205
+
206
+ print(f"分类完成: TruePositive {len(positive_samples)} 条, TrueNegative {len(negative_samples)} 条")
207
+
208
+ # 保存结果
209
+ if input_file.endswith(".json"):
210
+ # 保存JSON格式
211
+ with open("positive_samples.json", "w", encoding="utf-8") as f:
212
+ json.dump(positive_samples, f, ensure_ascii=False, indent=2)
213
+
214
+ with open("negative_samples.json", "w", encoding="utf-8") as f:
215
+ json.dump(negative_samples, f, ensure_ascii=False, indent=2)
216
+
217
+
218
+
219
+ print("结果已保存到 positive_samples.json/csv 和 negative_samples.json/csv")
220
+
221
+ if __name__ == "__main__":
222
+ main()
LLM/Database/negative_samples.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
LLM/Database/positive_samples.json ADDED
File without changes
LLM/Database/update_Messagejson.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mysql.connector
2
+ from datetime import datetime # 导入 datetime 模块
3
+ import os
4
+ from pathlib import Path
5
+ import json
6
+
7
+ #Azure MySQL数据库连接
8
+ current_dir = Path(__file__).parent.absolute()
9
+ ssl_ca_path = current_dir / "DigiCertGlobalRootCA.crt.pem"
10
+
11
+ #写入json时对datetime类型进行序列化
12
+ def datetime_serializer(obj):
13
+ if isinstance(obj, datetime):
14
+ return obj.isoformat() # 将 datetime 转换为 ISO 8601 格式的字符串
15
+ raise TypeError("Type not serializable")
16
+
17
+ try:
18
+ # 建立数据库连接
19
+ cnx = mysql.connector.connect(
20
+ user="siyuwang541",
21
+ password="ToDoAgentASAP!1",
22
+ host="todoagent-databases.mysql.database.azure.com",
23
+ port=3306,
24
+ database="todoagent",
25
+ ssl_ca=str(ssl_ca_path),
26
+ ssl_disabled=False
27
+ )
28
+
29
+ print("数据库连接成功!")
30
+
31
+ # 测试查询
32
+ cursor = cnx.cursor()
33
+ cursor.execute("SELECT * FROM Messages")
34
+ # 获取表头(列名)
35
+ columns = [desc[0] for desc in cursor.description]
36
+
37
+ # 获取数据
38
+ rows = cursor.fetchall()
39
+
40
+ # 将表头和数据合并为字典列表
41
+ data = [dict(zip(columns, row)) for row in rows]
42
+
43
+ # 打印表头和数据
44
+ print("表头:", columns)
45
+ print("数据:")
46
+ for row in data:
47
+ print(row)
48
+
49
+ # 将数据写入 JSON 文件
50
+ with open("Messages.json", "w", encoding="utf-8") as file:
51
+ json.dump(data, file, ensure_ascii=False, indent=4, default=datetime_serializer) # datetime使用自定义序列化器
52
+
53
+ # 关闭连接
54
+ cursor.close()
55
+ cnx.close()
56
+ print("连接已正常关闭")
57
+
58
+ except mysql.connector.Error as err:
59
+ print(f"数据库错误: {err}")
60
+ except Exception as e:
61
+ print(f"发生异常: {str(e)}")
62
+
63
+
LLM/Notify/NotifyReadme.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!--
2
+ * @Description:
3
+ * @Author: Manda
4
+ * @Version:
5
+ * @Date: 2025-03-30 17:01:58
6
+ * @LastEditors: Manda
7
+ * @LastEditTime: 2025-04-03 13:39:35
8
+ -->
9
+ # 运行方式
10
+ 确保DigiCertGlobalRootCA.crt.pem与config.yaml在同一文件夹内
11
+ 运行方式 pip install -r requirements.txt 按装环境
12
+ 运行 python notifyMain.py
13
+
14
+ ## 20250403
15
+ ### 更新数据库连接办法
16
+ 启用Azure 与 GG老师的 yaml
17
+ 更新requirements.txt,增加yaml
18
+
19
+ ## 20250402
20
+ ### notifyMain.py 主函数
21
+ 主函数,每隔1小时调用下方三个py('db2txt.py', 'usrSpareTime.py', 'compareDb2txt.py')
22
+ 运行方式 pip install -r requirements.txt 按装环境
23
+ 运行 python notifyMain.py
24
+ -----------------------------
25
+ 得到的效果是:
26
+ 生成compare_output,output,time_analysis 文件夹
27
+ #### compare_output
28
+ 对比相同user_ID的UCtodolist表 & todolist表 生成结果
29
+ #### output
30
+ 拉取 todolist表 生成user_ID.txt
31
+ #### time_analysis
32
+ 对比相同user_ID的UCtodolist表不同更改时段,生成结果
33
+
34
+ ## 20250320
35
+ 一些做RAG的数据文档以user_id来命名
36
+
37
+ ### dataBaseConnecter
38
+ dataBaseConnecter.py实现连接服务器功能,并提供端口让其他py(如db2txt.py)将指定数据库内文本提取出来
39
+
40
+ ### db2txt (好像直接在数据库完成了对比,这个文件貌似没啥用了)
41
+ db2txt.py 将ToDoAgent数据库中的ToDoList表格内容下载到txt中,按ToDoList表格中的user_id来命名txt,也就是不同的user_id有不同的txt
42
+
43
+ ### usrSpareTime -->千人千面推送时间可以用到的RAG
44
+ usrSpareTime.py 将ToDoAgent数据库中的UCtodolist表格内容last_modified,数据获取出来分36个时段进行统计,统计出出现频率最高的6个时段, 将时段信息及出现次数下载到txt中,以相同“todo_id”为前提,查询ToDoList表格中的user_id来命名txt,也就是不同的user_id有不同的txt
45
+
46
+ ### compareDb2txt-->自动生成ToDoList可以用到的的RAG
47
+ compareDb2txt.py 将ToDoAgent数据库中的UCtodolist表格内容与ToDoList做对比,以相同“todo_id”为前提,对比“start_time”"end_time""location""todo_content",一旦发现有差异,则将差异内容下载到txt中,按ToDoList表格中的user_id来命名txt,也就是不同的user_id有不同的txt
48
+
49
+
50
+
LLM/Notify/__pycache__/dataBaseConnecter.cpython-312.pyc ADDED
Binary file (6.5 kB). View file
 
LLM/Notify/compareDb2txt.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # '''
2
+ # Description:
3
+ # Author: Manda
4
+ # Version:
5
+ # Date: 2025-03-30 16:28:58
6
+ # LastEditors: mdhuang555 [email protected]
7
+ # LastEditTime: 2025-03-30 16:39:18
8
+ # '''
9
+ from dataBaseConnecter import DatabaseConnector
10
+ import os
11
+ from datetime import datetime
12
+ import sys
13
+ import io
14
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
15
+
16
+ def get_table_data(db_connector: DatabaseConnector, table_name: str) -> dict:
17
+ """获取表格数据,以todo_id为键"""
18
+ try:
19
+ # 连接数据库
20
+ conn = db_connector.connect_db()
21
+ if not conn:
22
+ print("无法连接到数据库")
23
+ return {}
24
+
25
+ cursor = conn.cursor(dictionary=True)
26
+ try:
27
+ # 使用连接器的extract_text方法获取数据
28
+ results = db_connector.extract_text(conn, table_name, '*')
29
+ # 将结果转换为以todo_id为键的字典
30
+ return {str(row['todo_id']): row for row in results}
31
+ finally:
32
+ cursor.close()
33
+ conn.close()
34
+ except Exception as e:
35
+ print(f"获取{table_name}数据错误: {e}")
36
+ return {}
37
+
38
+ def compare_records(todolist_record: dict, uctodolist_record: dict) -> dict:
39
+ """比较两条记录的差异"""
40
+ differences = {}
41
+ fields_to_compare = ['start_time', 'end_time', 'location', 'todo_content']
42
+
43
+ for field in fields_to_compare:
44
+ todo_value = todolist_record.get(field)
45
+ uc_value = uctodolist_record.get(field)
46
+
47
+ # 特殊处理datetime类型的比较
48
+ if isinstance(todo_value, datetime):
49
+ todo_value = todo_value.strftime('%Y-%m-%d %H:%M:%S')
50
+ if isinstance(uc_value, datetime):
51
+ uc_value = uc_value.strftime('%Y-%m-%d %H:%M:%S')
52
+
53
+ if todo_value != uc_value:
54
+ differences[field] = {
55
+ 'ToDoList': todo_value,
56
+ 'UCtodolist': uc_value
57
+ }
58
+
59
+ return differences
60
+
61
+ def save_differences_to_file(differences: dict, output_dir: str = 'compare_output'):
62
+ """将差异保存到文件中"""
63
+ if not os.path.exists(output_dir):
64
+ os.makedirs(output_dir)
65
+
66
+ # 按用户ID分组
67
+ user_differences = {}
68
+ for todo_id, diff in differences.items():
69
+ user_id = diff['user_id']
70
+ if user_id not in user_differences:
71
+ user_differences[user_id] = {}
72
+ user_differences[user_id][todo_id] = diff['differences']
73
+
74
+ # 为每个用户创建文件
75
+ for user_id, user_diffs in user_differences.items():
76
+ filename = os.path.join(output_dir, f'user_{user_id}_differences.txt')
77
+ with open(filename, 'w', encoding='utf-8') as f:
78
+ f.write(f"对比时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
79
+ f.write(f"用户ID: {user_id}\n")
80
+ f.write("=" * 50 + "\n\n")
81
+
82
+ for todo_id, diffs in user_diffs.items():
83
+ f.write(f"待办事项ID: {todo_id}\n")
84
+ for field, values in diffs.items():
85
+ f.write(f" 字段: {field}\n")
86
+ f.write(f" ToDoList值: {values['ToDoList']}\n")
87
+ f.write(f" UCtodolist值: {values['UCtodolist']}\n")
88
+ f.write("-" * 50 + "\n")
89
+
90
+ print(f"已保存用户 {user_id} 的差异到文件: {filename}")
91
+
92
+ def main():
93
+ print("正在连接数据库...")
94
+
95
+ try:
96
+ # 创建数据库连接器实例
97
+ db_connector = DatabaseConnector()
98
+
99
+ # 获取两个表的数据
100
+ print("正在获取表格数据...")
101
+ todolist_data = get_table_data(db_connector, 'ToDoList')
102
+ uctodolist_data = get_table_data(db_connector, 'UCtodolist')
103
+
104
+ # 比较差异
105
+ print("正在比较差异...")
106
+ differences = {}
107
+ for todo_id in set(todolist_data.keys()) & set(uctodolist_data.keys()):
108
+ todolist_record = todolist_data[todo_id]
109
+ uctodolist_record = uctodolist_data[todo_id]
110
+
111
+ record_differences = compare_records(todolist_record, uctodolist_record)
112
+ if record_differences:
113
+ differences[todo_id] = {
114
+ 'user_id': todolist_record['user_id'],
115
+ 'differences': record_differences
116
+ }
117
+
118
+ # 保存差异
119
+ if differences:
120
+ print(f"发现 {len(differences)} 条记录有差异")
121
+ save_differences_to_file(differences)
122
+ print("差异已保存到文件中")
123
+ else:
124
+ print("未发现差异")
125
+
126
+ except Exception as e:
127
+ print(f"处理过程中出错: {e}")
128
+
129
+ if __name__ == "__main__":
130
+ main()
LLM/Notify/dataBaseConnecter.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # '''
2
+ # Author: mdhuang555 [email protected]
3
+ # Date: 2025-03-30 15:57:22
4
+ # LastEditors: mdhuang555 [email protected]
5
+ # LastEditTime: 2025-04-03 11:32:30
6
+ # FilePath: \Notyif\dataBaseConnecter.py
7
+ # Description: 数据库连接器,支持SSL连接
8
+ # '''
9
+ import socket
10
+ import json
11
+ import mysql.connector
12
+ from typing import Dict, Any, Optional
13
+ import yaml
14
+ from pathlib import Path
15
+
16
+ class DatabaseConnector:
17
+ def __init__(self, host: str = '103.116.245.150', port: int = 3306):
18
+ self.host = host
19
+ self.port = port
20
+ self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
21
+ self.config = self._load_config()
22
+
23
+ def _load_config(self) -> Dict[str, Any]:
24
+ """加载配置文件"""
25
+ try:
26
+ config_path = Path(__file__).parent / "config.yaml"
27
+ with open(config_path, "r", encoding="utf-8") as f:
28
+ return yaml.safe_load(f)
29
+ except Exception as e:
30
+ print(f"加载配置文件错误: {e}")
31
+ return {}
32
+
33
+ def connect_db(self) -> Optional[mysql.connector.MySQLConnection]:
34
+ """连接到MySQL数据库,使用SSL连接"""
35
+ try:
36
+ # 获取SSL证书路径
37
+ current_dir = Path(__file__).parent.absolute()
38
+ ssl_ca_path = current_dir / "DigiCertGlobalRootCA.crt.pem"
39
+
40
+ # 确保SSL证书文件存在
41
+ if not ssl_ca_path.exists():
42
+ raise FileNotFoundError(f"SSL证书文件未找到: {ssl_ca_path}")
43
+
44
+ # 建立数据库连接
45
+ conn = mysql.connector.connect(
46
+ host=self.config["mysql"]["host"],
47
+ port=self.config["mysql"].get("port", 3306),
48
+ user=self.config["mysql"]["user"],
49
+ password=self.config["mysql"]["password"],
50
+ database=self.config["mysql"]["database"],
51
+ ssl_ca=str(ssl_ca_path),
52
+ ssl_disabled=False,
53
+ charset='utf8mb4',
54
+ collation='utf8mb4_unicode_ci'
55
+ )
56
+ return conn
57
+ except Exception as e:
58
+ print(f"数据库连接错误: {e}")
59
+ return None
60
+
61
+ def extract_text(self, conn: mysql.connector.MySQLConnection, table: str, column: str) -> list:
62
+ """从指定表格和列中提取文本"""
63
+ try:
64
+ cursor = conn.cursor(dictionary=True)
65
+ # 如果请求所有列,则获取完整的行数据
66
+ if column == '*':
67
+ query = f"SELECT * FROM {table}"
68
+ else:
69
+ query = f"SELECT {column} FROM {table}"
70
+ cursor.execute(query)
71
+ results = cursor.fetchall()
72
+ cursor.close()
73
+ return results
74
+ except Exception as e:
75
+ print(f"提取文本错误: {e}")
76
+ return []
77
+
78
+ def start_server(self):
79
+ """启动服务器监听请求"""
80
+ self.server_socket.bind((self.host, self.port))
81
+ self.server_socket.listen(5)
82
+ print(f"服务器启动在 {self.host}:{self.port}")
83
+
84
+ while True:
85
+ try:
86
+ client_socket, address = self.server_socket.accept()
87
+ print(f"接受来自 {address} 的连接")
88
+
89
+ # 接收客户端请求
90
+ data = client_socket.recv(1024).decode('utf-8')
91
+ request = json.loads(data)
92
+
93
+ # 处理请求
94
+ table = request.get('table')
95
+ column = request.get('column')
96
+
97
+ # 连接数据库并提取文本
98
+ conn = self.connect_db()
99
+ if conn:
100
+ try:
101
+ results = self.extract_text(conn, table, column)
102
+ response = {'status': 'success', 'data': results}
103
+ except Exception as e:
104
+ response = {'status': 'error', 'message': str(e)}
105
+ finally:
106
+ conn.close()
107
+ else:
108
+ response = {'status': 'error', 'message': '数据库连接失败'}
109
+
110
+ # 发送响应
111
+ response_data = json.dumps(response, ensure_ascii=False)
112
+ response_bytes = response_data.encode('utf-8')
113
+
114
+ # 先发送数据长度
115
+ length_prefix = len(response_bytes).to_bytes(4, byteorder='big')
116
+ client_socket.send(length_prefix)
117
+
118
+ # 再发送实际数据
119
+ client_socket.send(response_bytes)
120
+ client_socket.close()
121
+
122
+ except Exception as e:
123
+ print(f"处理请求错误: {e}")
124
+ continue
125
+
126
+ if __name__ == "__main__":
127
+ server = DatabaseConnector()
128
+ server.start_server()
LLM/Notify/db2txt.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # '''
3
+ # Author: mdhuang555 [email protected]
4
+ # Date: 2025-03-30 16:09:29
5
+ # LastEditors: mdhuang555 [email protected]
6
+ # LastEditTime: 2025-04-03 11:02:35
7
+ # FilePath: \Notify\db2txt.py
8
+ # Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
9
+ # '''
10
+ from dataBaseConnecter import DatabaseConnector
11
+ import sys
12
+ import io
13
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
14
+ import os
15
+ from datetime import datetime
16
+
17
+ def get_database_text(table: str) -> list:
18
+ """使用DatabaseConnector从数据库获取数据"""
19
+ try:
20
+ # 创建数据库连接器实例
21
+ db_connector = DatabaseConnector()
22
+
23
+ # 连接数据库
24
+ conn = db_connector.connect_db()
25
+ if not conn:
26
+ print("无法连接到数据库")
27
+ return []
28
+
29
+ try:
30
+ # 使用连接器的extract_text方法获取数据
31
+ results = db_connector.extract_text(conn, table, '*')
32
+ return results
33
+ finally:
34
+ conn.close()
35
+
36
+ except Exception as e:
37
+ print(f"获取数据时发生错误: {e}")
38
+ return []
39
+
40
+ def save_todos_by_user(todos: list, output_dir: str = 'output'):
41
+ """将待办事项按用户ID保存到不同的文本文件中"""
42
+ if not os.path.exists(output_dir):
43
+ os.makedirs(output_dir)
44
+
45
+ if not todos:
46
+ print("没有数据可以保存")
47
+ return
48
+
49
+ # 按用户ID分组
50
+ user_todos = {}
51
+ for todo in todos:
52
+ user_id = str(todo['user_id'])
53
+ if user_id not in user_todos:
54
+ user_todos[user_id] = []
55
+ user_todos[user_id].append(todo)
56
+
57
+ # 为每个用户创建文件
58
+ for user_id, user_todos_list in user_todos.items():
59
+ filename = os.path.join(output_dir, f'{user_id}.txt')
60
+ try:
61
+ with open(filename, 'w', encoding='utf-8') as f:
62
+ f.write(f"导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
63
+ f.write(f"用户ID: {user_id}\n")
64
+ f.write("=" * 50 + "\n\n")
65
+
66
+ for todo in user_todos_list:
67
+ f.write("待办事项:\n")
68
+ for key, value in todo.items():
69
+ if value is not None: # 只写入非空值
70
+ f.write(f" {key}: {value}\n")
71
+ f.write("-" * 50 + "\n")
72
+ print(f"已保存用户 {user_id} 的待办事项到文件: {filename}")
73
+ except Exception as e:
74
+ print(f"保存用户 {user_id} 的数据时出错: {e}")
75
+
76
+ def main():
77
+ print("正在连接数据库...")
78
+ todos = get_database_text('ToDoList')
79
+
80
+ if todos:
81
+ print(f"成功获取 {len(todos)} 条记录")
82
+ save_todos_by_user(todos)
83
+ print("所有数据已保存完成")
84
+ else:
85
+ print("未能获取到数据")
86
+
87
+ if __name__ == "__main__":
88
+ main()
LLM/Notify/notifyMain.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # '''
3
+ # Description: 定时运行数据库相关脚本的主程序
4
+ # Author: Manda
5
+ # Version: 1.0
6
+ # Date: 2024-03-30
7
+ # '''
8
+ import schedule
9
+ import time
10
+ import subprocess
11
+ import logging
12
+ from datetime import datetime
13
+ import os
14
+ import sys
15
+ import io
16
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
17
+
18
+ # 配置日志
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(asctime)s - %(levelname)s - %(message)s',
22
+ handlers=[
23
+ logging.FileHandler('notify_main.log', encoding='utf-8'),
24
+ logging.StreamHandler()
25
+ ]
26
+ )
27
+
28
+
29
+ def run_script(script_name: str):
30
+ """运行指定的Python脚本"""
31
+ try:
32
+ logging.info(f"开始运行脚本: {script_name}")
33
+ result = subprocess.run(['python', script_name],
34
+ capture_output=True,
35
+ text=True,
36
+ encoding='utf-8') # 明确指定使用 UTF-8 编码
37
+
38
+ if result.returncode == 0:
39
+ logging.info(f"脚本 {script_name} 运行成功")
40
+ if result.stdout:
41
+ logging.info(f"输出: {result.stdout}")
42
+ else:
43
+ logging.error(f"脚本 {script_name} 运行失败")
44
+ if result.stderr:
45
+ logging.error(f"错误: {result.stderr}")
46
+ except Exception as e:
47
+ logging.error(f"运行脚本 {script_name} 时发生错误: {str(e)}")
48
+
49
+
50
+ def run_all_scripts():
51
+ """运行所有脚本"""
52
+ logging.info("开始执行所有脚本")
53
+
54
+ # 获取当前脚本所在的目录
55
+ current_dir = os.path.dirname(os.path.abspath(__file__))
56
+ scripts = ['db2txt.py', 'usrSpareTime.py', 'compareDb2txt.py']
57
+
58
+ for script in scripts:
59
+ # 使用完整的文件路径
60
+ script_path = os.path.join(current_dir, script)
61
+ if os.path.exists(script_path):
62
+ run_script(script_path)
63
+ time.sleep(5)
64
+ else:
65
+ logging.error(f"脚本文件不存在: {script_path}")
66
+
67
+ logging.info("所有脚本执行完成")
68
+
69
+ def main():
70
+ logging.info("启动定时任务程序")
71
+
72
+ # 设置每小时运行一次
73
+ schedule.every().hour.at(":43").do(run_all_scripts)
74
+
75
+ # 立即运行一次
76
+ run_all_scripts()
77
+
78
+ # 持续运行
79
+ while True:
80
+ try:
81
+ schedule.run_pending()
82
+ time.sleep(60)
83
+ except Exception as e:
84
+ logging.error(f"运行时发生错误: {str(e)}")
85
+ time.sleep(60) # 发生错误时等待一分钟后继续
86
+
87
+ if __name__ == "__main__":
88
+ main()
LLM/Notify/usrSpareTime.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # '''
2
+ # Description:
3
+ # Author: Manda
4
+ # Version:
5
+ # Date: 2025-03-30 16:42:47
6
+ # LastEditors: mdhuang555 [email protected]
7
+ # LastEditTime: 2025-03-30 16:59:19
8
+ # '''
9
+ import mysql.connector
10
+ import os
11
+ from datetime import datetime
12
+ from collections import defaultdict
13
+ from dataBaseConnecter import DatabaseConnector
14
+ import sys
15
+ import io
16
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
17
+
18
+ def connect_to_database(db_config: dict) -> mysql.connector.MySQLConnection:
19
+ """连接到MySQL数据库"""
20
+ try:
21
+ conn = mysql.connector.connect(
22
+ host=db_config['host'],
23
+ user=db_config['user'],
24
+ password=db_config['password'],
25
+ database=db_config['database'],
26
+ charset='utf8mb4'
27
+ )
28
+ return conn
29
+ except Exception as e:
30
+ print(f"数据库连接错误: {e}")
31
+ return None
32
+
33
+ def get_time_slot(hour: int, minute: int) -> str:
34
+ """将时间转换为40分钟一段的时间段"""
35
+ # 计算一天中的第几个40分钟
36
+ total_minutes = hour * 60 + minute
37
+ slot_index = total_minutes // 40
38
+
39
+ # 计算时间段的起始和结束时间
40
+ start_minutes = slot_index * 40
41
+ end_minutes = start_minutes + 40
42
+
43
+ start_hour = start_minutes // 60
44
+ start_minute = start_minutes % 60
45
+ end_hour = end_minutes // 60
46
+ end_minute = end_minutes % 60
47
+
48
+ # 格式化时间段字符串
49
+ return f"{start_hour:02d}:{start_minute:02d}-{end_hour:02d}:{end_minute:02d}"
50
+
51
+ def analyze_time_slots(db_connector: DatabaseConnector) -> dict:
52
+ """分析时间段分布"""
53
+ try:
54
+ # 连接数据库
55
+ conn = db_connector.connect_db()
56
+ if not conn:
57
+ print("无法连接到数据库")
58
+ return {}
59
+
60
+ cursor = conn.cursor(dictionary=True)
61
+
62
+ # 获取UCtodolist的数据和对应的ToDoList用户ID
63
+ query = """
64
+ SELECT uc.todo_id, uc.last_modified, t.user_id
65
+ FROM UCtodolist uc
66
+ JOIN ToDoList t ON uc.todo_id = t.todo_id
67
+ WHERE uc.last_modified IS NOT NULL
68
+ """
69
+ cursor.execute(query)
70
+ results = cursor.fetchall()
71
+
72
+ # 按用户ID分组统计时间段
73
+ user_time_slots = defaultdict(lambda: defaultdict(int))
74
+
75
+ for row in results:
76
+ if isinstance(row['last_modified'], datetime):
77
+ hour = row['last_modified'].hour
78
+ minute = row['last_modified'].minute
79
+ time_slot = get_time_slot(hour, minute)
80
+ user_time_slots[row['user_id']][time_slot] += 1
81
+
82
+ return dict(user_time_slots)
83
+
84
+ except Exception as e:
85
+ print(f"分析时间段时出错: {e}")
86
+ return {}
87
+ finally:
88
+ if 'cursor' in locals():
89
+ cursor.close()
90
+ if 'conn' in locals() and conn:
91
+ conn.close()
92
+
93
+ def save_analysis_results(results: dict, output_dir: str = 'time_analysis'):
94
+ """保存分析结果到文件"""
95
+ if not os.path.exists(output_dir):
96
+ os.makedirs(output_dir)
97
+
98
+ for user_id, time_slots in results.items():
99
+ filename = os.path.join(output_dir, f'user_{user_id}_time_analysis.txt')
100
+
101
+ try:
102
+ with open(filename, 'w', encoding='utf-8') as f:
103
+ f.write(f"分析时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
104
+ f.write(f"用户ID: {user_id}\n")
105
+ f.write("=" * 50 + "\n\n")
106
+
107
+ f.write("时间段使用频率统计(前6名):\n")
108
+ # 按频率排序并获取前6个时段
109
+ top_slots = sorted(time_slots.items(), key=lambda x: x[1], reverse=True)[:6]
110
+
111
+ for i, (slot, count) in enumerate(top_slots, 1):
112
+ f.write(f"第{i}名: {slot}\n")
113
+ f.write(f" 出现次数: {count}\n")
114
+ percentage = (count / sum(time_slots.values())) * 100
115
+ f.write(f" 占比: {percentage:.2f}%\n")
116
+ f.write("-" * 30 + "\n")
117
+
118
+ # 添加总计信息
119
+ f.write(f"\n总修改次数: {sum(time_slots.values())}\n")
120
+ f.write(f"总时间段数: {len(time_slots)}/36\n")
121
+
122
+ print(f"已保存用户 {user_id} 的时间分析到文件: {filename}")
123
+
124
+ except Exception as e:
125
+ print(f"保存用户 {user_id} 的分析结果时出错: {e}")
126
+
127
+ def main():
128
+ print("正在连接数据库...")
129
+
130
+ try:
131
+ # 创建数据库连接器实例
132
+ db_connector = DatabaseConnector()
133
+
134
+ print("正在分析时间段分布...")
135
+ results = analyze_time_slots(db_connector)
136
+
137
+ if results:
138
+ print(f"分析完成,共有 {len(results)} 个用户的数据")
139
+ save_analysis_results(results)
140
+ print("分析结果已保存到文件中")
141
+ else:
142
+ print("未找到可分析的数据")
143
+
144
+ except Exception as e:
145
+ print(f"处理过程中出错: {e}")
146
+
147
+ if __name__ == "__main__":
148
+ main()
LLM/filter_message/README.MD ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## 安装依赖
3
+
4
+
5
+ ```
6
+ pip install -r requirements.txt
7
+ ```
8
+
9
+ ## 运行示例
10
+
11
+ ```
12
+ python main.py
13
+ ```
14
+
15
+
16
+ ## 查看结果
17
+
18
+ data目录下的json文件(未清洗 )
LLM/filter_message/__pycache__/libs.cpython-312.pyc ADDED
Binary file (8.75 kB). View file
 
LLM/filter_message/libs.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ import yaml
4
+ import pymysql
5
+ from openai import OpenAI
6
+
7
+ def read_config(yaml_file):
8
+ """从yaml文件读取配置"""
9
+ with open(yaml_file, "r", encoding="utf-8") as f:
10
+ return yaml.safe_load(f)
11
+
12
+
13
+ import mysql.connector
14
+ import os
15
+ from pathlib import Path
16
+
17
+
18
+ def get_db_conn():
19
+ """获取复用数据库链接 (Azure MySQL) """
20
+ config = CONFIG["mysql"]
21
+
22
+ # 获取 SSL 证书路径
23
+ current_dir = Path(__file__).parent.absolute()
24
+ ssl_ca_path = current_dir / "DigiCertGlobalRootCA.crt.pem"
25
+
26
+ # 建立数据库连接
27
+ conn = mysql.connector.connect(
28
+ host=config["host"],
29
+ port=config.get("port", 3306),
30
+ user=config["user"],
31
+ password=config["password"],
32
+ database=config["database"],
33
+ ssl_ca=str(ssl_ca_path),
34
+ ssl_disabled=False
35
+ )
36
+
37
+ return conn
38
+
39
+
40
+ def execute_sql(sql):
41
+ """执行sql"""
42
+ with DB_CONN.cursor() as cursor:
43
+ cursor.execute(sql)
44
+
45
+ # 判断是否为SELECT语句
46
+ if sql.strip().upper().startswith("SELECT"):
47
+ result = cursor.fetchall()
48
+ DB_CONN.commit() # 提交事务,虽然SELECT语句不需要,但养成习惯
49
+ return result
50
+ else:
51
+ affected_rows = cursor.rowcount
52
+ DB_CONN.commit() # 提交事务,INSERT/UPDATE/DELETE需要提交
53
+ return affected_rows
54
+
55
+
56
+ def release():
57
+ """释放资源"""
58
+ DB_CONN.close()
59
+
60
+ del CONFIG
61
+ del DB_CONN
62
+
63
+
64
+ def get_llm():
65
+ config = CONFIG["openai"]
66
+ client = OpenAI(base_url=config["base_url"], api_key=config["api_key"])
67
+
68
+ return client
69
+
70
+
71
+ def send_llm(messages: list[dict[str, str]], model: Optional[str] = None, resp_json=False):
72
+ """调用LLM"""
73
+ print(">>>>>>>>>>>>>>>>>",messages)
74
+ config = CONFIG["openai"]
75
+
76
+ if model is None:
77
+ model = config["model"]
78
+
79
+ if resp_json:
80
+ completion = LLM.chat.completions.create(
81
+ model=model, # 选择模型
82
+ messages=messages,
83
+ temperature=0, # 为提高准确率,温度为0
84
+ response_format={ "type": "json_object" },
85
+ )
86
+ else:
87
+ completion = LLM.chat.completions.create(
88
+ model=model, # 选择模型
89
+ messages=messages,
90
+ temperature=0, # 为提高准确率,温度为0
91
+ )
92
+
93
+ print("<<<<<",completion.choices[0].message.content)
94
+ return completion.choices[0].message.content
95
+
96
+
97
+ def send_llm_with_query(query):
98
+ messages = {
99
+ "role": "user",
100
+ "content": query,
101
+ },
102
+ return send_llm(messages)
103
+
104
+
105
+ def send_llm_with_prompt(query):
106
+ system = """
107
+ # 角色
108
+ 你是一个专业的短信内容分析助手,根据输入判断内容的类型及可信度,为用户使用信息提供依据和便利。
109
+
110
+ # 任务
111
+ 对于输入的多条数据,分析每一条数据内容(主键:`message_id`)属于【物流取件、缴费充值、待付(还)款、会议邀约、其他】的可能性百分比。
112
+ 主要对于聊天、问候、回执、结果通知、上月账单等信息不需要收件人进行下一步处理的信息,直接归到其他类进行忽略
113
+
114
+ # 要求
115
+ 1. 以json格式输出
116
+ 2. content简洁提炼关键词,字符数<20以内
117
+ 3. 输入条数和输出条数完全一样
118
+
119
+ # 输出示例
120
+ ```
121
+ [
122
+ {"message_id":"1111111","content":"账单805.57元待还","物流取件":0,"欠费缴纳":99,"待付(还)款":1: "会议邀约":0,"其他":0, "分类":"欠费缴纳"},
123
+ {"message_id":"222222","content":"邀请你加入飞书视频会议","物流取件":0,"欠费缴纳":0,"待付(还)款":1: "会议邀约":100,"其他":0, "分类":"会议"}
124
+ ]
125
+
126
+ ```
127
+ """
128
+
129
+ messages = [
130
+ {
131
+ "role": "system",
132
+ "content": system,
133
+ },
134
+ {
135
+ "role": "user",
136
+ "content": str(query),
137
+ }
138
+ ]
139
+ return send_llm(messages)
140
+
141
+ def save_to_mysql(data):
142
+ """新增:保存数据到 MySQL"""
143
+ # 字段映射关系(中文键名 → 数据库英文列名)
144
+ COLUMN_MAPPING = {
145
+ "message_id": "message_id",
146
+ "content": "content",
147
+ "物流取件": "logistics_pickup",
148
+ "欠费缴纳": "overdue_payment",
149
+ "待付(还)款": "pending_payment",
150
+ "会议邀约": "meeting_invitation",
151
+ "其他": "other",
152
+ "分类": "category"
153
+ }
154
+
155
+ BATCH_SIZE = 100 # 每次插入 100 行,减少锁冲突
156
+ conn = get_db_conn()
157
+
158
+ try:
159
+ with conn.cursor() as cursor:
160
+ sql = f"""
161
+ INSERT INTO message_stats
162
+ ({', '.join(COLUMN_MAPPING.values())})
163
+ VALUES ({', '.join(['%s'] * len(COLUMN_MAPPING))})
164
+ ON DUPLICATE KEY UPDATE
165
+ {', '.join([f"{col} = VALUES({col})" for col in COLUMN_MAPPING.values() if col != 'message_id'])}
166
+ """
167
+
168
+ values = []
169
+ for item in data:
170
+ item["content"] = str(item["content"]).encode('utf-8').decode('utf-8', errors='ignore')
171
+
172
+ # 规则 1: 会议 且 content 不包含 "邀请你加入飞书视频会议",归类为 "其他"
173
+ # if item.get("分类") == "会议邀约" and "邀请你加入飞书视频会议" not in item.get("content", ""):
174
+ # item["分类"] = "其他"
175
+
176
+ # 规则 2: 欠费缴纳 且 content 包含 "缴费支出",归类为 "其他"
177
+ if item.get("分类") == "欠费缴纳" and "缴费支出" in item.get("content", ""):
178
+ item["分类"] = "其他"
179
+
180
+ row = [item.get(key, None) for key in COLUMN_MAPPING.keys()]
181
+ values.append(row)
182
+
183
+ # 分批插入 message_stats
184
+ for i in range(0, len(values), BATCH_SIZE):
185
+ batch = values[i: i + BATCH_SIZE]
186
+ cursor.executemany(sql, batch)
187
+ conn.commit()
188
+ print(f"成功插入 {len(batch)} 条数据到 message_stats")
189
+
190
+ # **3. 插入 `filter_message` 表,仅插入分类不等于“其他”的数据**
191
+ filter_sql = """
192
+ INSERT IGNORE INTO filter_message (message_id, content)
193
+ VALUES (%s, %s)
194
+ """
195
+
196
+ filter_values = [
197
+ (item.get("message_id"), item.get("content")) for item in data if item.get("分类") != "其他"
198
+ ]
199
+
200
+ # 分批插入 filter_message
201
+ for i in range(0, len(filter_values), BATCH_SIZE):
202
+ batch = filter_values[i: i + BATCH_SIZE]
203
+ cursor.executemany(filter_sql, batch)
204
+ conn.commit()
205
+ print(f"成功插入 {len(batch)} 条数据到 filter_message")
206
+
207
+ except pymysql.MySQLError as e:
208
+ conn.rollback()
209
+ print(f"数据插入失败: {e}")
210
+
211
+ finally:
212
+ conn.close()
213
+ ###### init #####
214
+
215
+ CONFIG = read_config("filter_llm_config.yaml")
216
+ DB_CONN = get_db_conn()
217
+ LLM = get_llm()
LLM/filter_message/main.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ import json
3
+ import time
4
+
5
+ from libs import save_to_mysql, execute_sql, send_llm_with_prompt
6
+
7
+
8
+ def get_message_with_page(page_num, page_size=10):
9
+ """分页查询数据库(保持不变)"""
10
+ offset = (page_num) * page_size
11
+ sql = f"""
12
+ SELECT
13
+ m.content,
14
+ MAX(m.message_id) as message_id,
15
+ MAX(m.date) as date
16
+ FROM
17
+ Messages m
18
+ left join message_stats n on m.message_id=n.message_id
19
+ WHERE
20
+ m.app_name IN ('com.tencent.mm', 'SMS', 'com.ss.android.lark','com.ss.android.teams')
21
+ AND DATE(m.date) >= '2025-04-26'
22
+ AND LENGTH(m.content) > 50
23
+ AND m.content NOT LIKE '%可用余额%'
24
+ and n.message_id is null
25
+ GROUP BY
26
+ m.content
27
+ LIMIT {page_size} OFFSET {offset};
28
+ """
29
+ print("======", sql)
30
+ return execute_sql(sql)
31
+
32
+
33
+ def get_page_count(page_size=10): # 3,数据库分页数,该怎么自动计算;
34
+ sql = f"""
35
+ SELECT
36
+ COUNT(1) AS total_rows
37
+ FROM
38
+ (
39
+ SELECT
40
+ m.content
41
+ FROM
42
+ Messages m
43
+ LEFT JOIN message_stats n ON m.message_id = n.message_id
44
+ WHERE
45
+ m.app_name IN ('com.tencent.mm', 'SMS', 'com.ss.android.lark')
46
+ AND DATE(m.date) >= '2025-03-31'
47
+ AND LENGTH(m.content) > 50
48
+ AND m.content NOT LIKE '%可用余额%'
49
+ AND n.message_id IS NULL
50
+ GROUP BY
51
+ m.content
52
+ ) AS subquery;
53
+ """
54
+ rows = execute_sql(sql)
55
+ count = rows[0][0]
56
+
57
+ if count % page_size == 0:
58
+ page_count = count // page_size
59
+ else:
60
+ page_count = (count // page_size) + 1
61
+
62
+ print(f'表中数据共 {page_count} 页')
63
+ return page_count
64
+
65
+
66
+ def data_to_todo(data: tuple[tuple], todo_list: list[dict], debug=False): # 1,查漏补缺message id,保证输入message数=输出数,最终miss_rate=0
67
+ request_id_list = {int(d[1]) for d in data}
68
+
69
+ resp = send_llm_with_prompt(data)
70
+ print(f'请求id: {len(request_id_list)} ->{request_id_list}')
71
+
72
+ resp = resp.replace("```json", "").replace("```", "")
73
+ print(' ' + resp)
74
+ print(' ' + '-' * 20)
75
+
76
+ try:
77
+ parsed_resp = json.loads(resp)
78
+ response_id_list = {int(d['message_id']) for d in parsed_resp}
79
+ print(f'响应id: {len(response_id_list)} -> {response_id_list}')
80
+
81
+ diff = request_id_list - response_id_list
82
+ if diff:
83
+ print(f'本次处理有遗漏:{diff}')
84
+ diff_data = tuple(d for d in data if d[1] in diff)
85
+ data_to_todo(diff_data, todo_list)
86
+
87
+ todo_list.extend(parsed_resp)
88
+ print(f'{len(todo_list)=}')
89
+ except Exception as e:
90
+ print(f"解析响应失败: {e}")
91
+
92
+
93
+ def main():
94
+ todo_list = []
95
+
96
+ page_count = get_page_count()
97
+
98
+ """ 从0开始算分页 """
99
+ for i in range(0, page_count):
100
+ print(f'正在处理第 {i + 1} / {page_count} 页数据')
101
+ data = get_message_with_page(i)
102
+ if not data:
103
+ print('没有更多数据了')
104
+ break
105
+
106
+ data_to_todo(data, todo_list)
107
+
108
+ # 保存到MySQL
109
+ if todo_list:
110
+ save_to_mysql(todo_list)
111
+ print(f"成功保存{len(todo_list)}条数据到数据库")
112
+ else:
113
+ print("没有需要保存的数据")
114
+
115
+
116
+ def main_loop(): # 2,当表新增一条信息,自动化调用llm处理;
117
+ while True:
118
+ main()
119
+ time.sleep(30)
120
+
121
+
122
+ if __name__ == '__main__':
123
+ main()
124
+ # main_loop()
LLM/filter_message/prompt.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 人工标注数据
2
+
3
+ | content | 是否正负样本 | 关键词 | 类型 |
4
+ | ------------------------------------------------------------ | ------------ | ------------------------ | -------- |
5
+ | 【建设银行】您账户8699于3月24日12时22分向微信支付-扫二维码付款支出人民币15元,可用余额5433.72元。 | FALSE | 可用余额 | |
6
+ | 【建设银行】您账户8699于3月24日12时11分向支付宝-天猫-深圳市升景科技有限公司支出人民币1889元,可用余额5465.6元。 | FALSE | 可用余额 | |
7
+ | 【建设银行】您账户8699于3月24日12时11分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额5448.72元。 | FALSE | 可用余额 | |
8
+ | 【驿收发】您的京东包裹已到惜福堂对面快递驿站,请21:00前凭2-0147来取,详询16675200600 | TRUE | 驿站、包裹、取件 | 包裹物流 |
9
+ | 【中国农业银行】为您特别准备了一份专属月度账单,登掌银搜“月度账单”或戳 go.abchina.com/k/CS7 查收。拒收请回复R | FALSE | 拒收 | |
10
+ | 【中国农业银行】充话费最高减20元,本月登掌银【城市专区-优惠立减】或点 go.abchina.com/k/C05 先到先得。拒收请回复R | FALSE | 拒收 | |
11
+ | 刻刻: 【火山引擎】亲爱的用户,您有千万DeepSeek模型额度待领取哦!每邀请1位新用户注册和使用,最高可获得130元代金券,多邀多得上不封顶!点击下载海报,分享到更多渠道:szacq.cn/ewyeb/ 拒收请回复R | FALSE | 拒收 | |
12
+ | M丶D: 【阿里云推广】尊敬的mandahuang,老友季钜惠!云服务器 99元/年,新购续费同享!戳>>https://t.aliyun.com/TIFqsCZY 立即抢购! 拒收请回复R | FALSE | 拒收 | |
13
+ | 王斯煜[表情]Vince 黑客松nv: 【AlipayHK】於3月19日 08:15需要通過Apple 服務(Apple services)待付款支付HKD8.00 | TRUE | 账单、待付款 | 支付 |
14
+ | [2条]李JK老师-1228: 【百度】亲爱的开发者您好,文心智能体年底送福利,即日起至12月30日,创建并提交您的智能体,即有机会获得现金卡福利,最高500元,多档奖励,中奖机会大!登陆天数越多,创建有创意,中奖率越高!福利倒计时!点击参与!https://agents.baidu.com/activity/detail/13 | FALSE | 送福利、有机会 | |
15
+ | [5条]LJK86: 【百度】亲爱的开发者您好,文心智能体年底送福利,即日起至12月30日,创建并提交您的智能体,即有机会获得现金卡福利,最高500元,多档奖励,中奖机会大!登陆天数越多,创建有创意,中奖率越高!福利倒计时!点击参与!https://agents.baidu.com/activity/detail/13 | FALSE | 送福利、有机会 | |
16
+ | [3条]M丶D: 【韵达快递】亲434466097408983超10小时未取出,如需帮助或有问题请致电15900077340 、020-89725127 | TRUE | 快递、取件 | 包裹物流 |
17
+ | [2条]LJK86: 【智谱AI】亲爱的开发者您好,感谢您参与智谱开放平台满意度调研,您的智谱清言月卡奖励正在发放,请在链接中输入您问卷中填写的手机号查询礼品码,前往PC/APP智谱清言会员充值页,选择“礼品码兑换”。查询链接:https://zhipu-ai.feishu.cn/share/base/query/shr... | FALSE | 满意度调研 | |
18
+ | [2条]M丶D: 【中国电信】流量满满,温暖相伴,我们特别为你准备了预存领10GB的流量大礼,无论是与家人视频通话,还是朋友间分享趣事,都能畅通无阻,让爱不断线。马上戳 https://vipxjzl.mini189.cn/BG/ 了解吧,具体以实际页面展示为准,如已办理请忽略,转发无效。拒收请回复R | FALSE | 拒收 | |
19
+ | [3条]王斯煜[表情]Vince 黑客松nv: 【中通快递】包裹已到深圳光明正大城商业街103号店,取件码3-5-1092。到店扫“取件二维码”,线上查询更方便!询19128399078 | TRUE | 快递、包裹、取件码 | 包裹物流 |
20
+ | 王斯煜[表情]Vince 黑客松nv: 尊敬的*斯煜,您在我行办理的1笔个人贷款需于2024年12月08日17:00前还款,当期还款金额本息合计999999.84元,请您留意尾号0455的账户可用余额是否充足,避免因贷款逾期影响个人征信。具体贷款信息可通过工行网上银行、手机银行或致电贷款经办行查询。【工商银行】 | TRUE | 银行、贷款、还款 | 支付 |
21
+ | [2条]李JK老师-1228: 【51CTO学堂】《DeepSeek训练营》火爆来袭!0元报名,学习AI核心技术,解锁职业新可能 zt60.cn/CMPE9 拒收请回复R | FALSE | 拒收 | |
22
+ | [3条]刻刻: 60GB本地數據及5000分鐘本地通話30日組合成功開啟,餘額已被扣除,有效期至10/02/2025 23:59。請立即登入MyLink App bit.ly/MySimMyLink 進行增值,若賬戶餘額充足,60GB本地數據及5000分鐘本地通話30日組合將每30日以$38自動續期,並於餘額內自動... | FALSE | | |
23
+ | [3条]李JK老师-1228: 【湖南通信】尊敬的用户:您订购的卡已配送暂未签收,为了更好的为您服务,烦请回复:未拿到卡且需要回复1,卡在站点未取件回复2,已取件回复3。如有疑问请拨打4008155555 | TRUE | 配送、未签收、未取件 | 包裹物流 |
24
+ | [2条]李JK老师-1228: 【深势科技】Bohrium用户您好,您有共计余额20.00元的体验卡将于2024-12-24 23:59:59到期,请及时使用。 | FALSE | 共计余额 | |
25
+ | [3条]李JK老师-1228: 【讯飞开放平台】到期预警!尊敬的会员用户:您的个人级乐享会员,将于2024-12-08 00:00:00正式到期,截至目前仅剩7天。如您要继续使用,自即日起,7天内完成会员体验问卷http://1024-2019.iflytek.com/h5/vip-ques?t=2024-12-15,即可免费续约... | TRUE | 到期 | 到期提醒 |
26
+ | M丶D: 【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。 | TRUE | 快递、取件 | 包裹物流 |
27
+ | [33条]斯煜[表情]Vince: 【中通快递】包裹已到深圳光明正大城商业街103号店,取件码3-5-1092。到店扫“取件二维码”,线上查询更方便!询19128399078 | TRUE | 快递、取件、取件码 | 包裹物流 |
28
+ | 王斯煜[表情]Vince 黑客松nv: 【申通快递】快递尾号7117已在代收点1天未取,请尽快取件,详询13434525312,最新快递状态请访问t.sto.cn/18PBt4 | TRUE | 快递、取件 | 包裹物流 |
29
+ | [2条]王斯煜[表情]Vince 黑客松nv: 【申通快递】包裹已到深圳光明正大城商业街103号店,取件码5-5-7117。到店扫“取件二维码”,线上查询更方便!询19128399078 | TRUE | 快递、取件码 | 包裹物流 |
30
+ | [2条]王斯煜[表情]Vince 黑客松nv: 【美团月付】您2月账单805.57元待还,最后还款日为本月22号,查账或立即还款点 dpurl.cn/HXFUxEba | TRUE | 账单、还款 | 支付 |
31
+ | [32条]斯煜[表情]Vince: 【美团月付】您2月账单805.57元待还,最后还款日为本月22号,查账或立即还款点 dpurl.cn/HXFUxEba | TRUE | 账单、还款 | 支付 |
32
+ | [30条]斯煜[表情]Vince: 【美团月付】您的2月账单805.57元需要付款,查看详情点击 dpurl.cn/80JQdKHa | TRUE | 账单、付款 | 支付 |
33
+ | [26条]斯煜[表情]Vince: Mox: 你的Mox Credit月結單已準備就緒。你可登入Mox應用程式查看。 | TRUE | 账单 | 支付 |
34
+ | 【建设银行】您账户8699于3月22日9时51分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7414.6元。 | FALSE | 可用余额 | |
35
+ | [3条]M丶D: 明天九点开会 | TRUE | 时间、开会 | 会议 |
36
+ | 【建设银行】您账户8699于3月22日9时42分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7431.48元。 | FALSE | 可用余额 | |
37
+ | 【建设银行】您账户8699于3月22日9时32分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7448.36元。 | FALSE | 可用余额 | |
38
+ | 【建设银行】您账户8699于3月22日9时12分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7465.24元。 | FALSE | 可用余额 | |
39
+ | 【建设银行】您账户8699于3月22日8时53分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7482.12元。 | FALSE | 可用余额 | |
40
+ | 【建设银行】您账户8699于3月22日18时2分向微信支付-羊城通缴费支出人民币2元,可用余额7397.6元。 | FALSE | 可用余额 | |
41
+ | 【招商银行】您的风险评估已到期,为避免错过我行优质产品信息,请及时重评!点击 cmbt.cn/a/zhV 去评估。如已完成或销户请忽略 | FALSE | 请忽略 | |
42
+ | 【建设银行】您账户8699于3月22日23时45分向微信支付-西苑出版社支出人民币180元,可用余额7215.6元。 | FALSE | 可用余额 | |
43
+ | 【建设银行】您账户8699于3月22日22时47分向微信支付-羊城通缴费支出人民币2���,可用余额7395.6元。 | FALSE | 可用余额 | |
44
+ | 【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。 | TRUE | 快递、丰巢、取件 | 包裹物流 |
45
+ | 【建设银行】您账户8699于3月22日13时16分向微信支付-扫二维码付款支出人民币15元,可用余额7399.6元。 | FALSE | 可用余额 | |
46
+ | 【建设银行】您账户8699于3月21日8时11分向微信支付-小霞包点(香雪店)支出人民币5.5元,可用余额7566.14元。 | FALSE | 可用余额 | |
47
+ | 【建设银行】您账户8699于3月21日8时7分向微信支付-羊城通缴费支出人民币3.5元,可用余额7571.64元。 | FALSE | 可用余额 | |
48
+ | 【建设银行】您账户8699于3月21日20时5分向微信支付-羊城通缴费支出人民币1元,可用余额7519.38元。 | FALSE | 可用余额 | |
49
+ | 【中国农业银行】百万立减金限时抢,最高10元立减金等您拿,本月戳 go.abchina.com/k/CfK 直达,先到先得。拒收请回复R | FALSE | 拒收 | |
50
+ | 【建设银行】您账户8699于3月21日15时28分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7520.38元。 | FALSE | 可用余额 | |
51
+ | 【建设银行】您账户8699于3月21日15时28分向支付宝-理财-天弘基金管理有限公司支出人民币16.88元,可用余额7537.26元。 | FALSE | 可用余额 | |
52
+ | M丶D: 【话费账单】尊敬的150****9503客户,您02月01日- 02月28日共消费8.00元。主要消费项目包括: | FALSE | 共消费 | |
53
+ | 菜鳥包裹HK00083544922到達香港仔置富道19號置富花園19座地下B舖,提件碼7-1-1002,3個工作日内取。 | TRUE | 菜鸟、包裹、到达、提件码 | 包裹物流 |
54
+ | 您的集運單SF3148634434778因其他异常已拒收退回賣家,建議您聯系賣家處理,如有必要可申請退款 | TRUE | 申请退款 | 包裹物流 |
55
+ | 菜鳥包裹HK00083525678到達香港仔置富道19號置富花園19座地下B舖,提件碼7-1-1004,3個工作日内取。 | TRUE | 菜鸟、包裹、到达、提件码 | 包裹物流 |
56
+ | [10条]王斯煜[表情]Vince 黑客松nv: 菜鳥包裹HK00083544922到達香港仔置富道19號置富花園19座地下B舖,提件碼7-1-1002,3個工作日内取。 | TRUE | 菜鸟、包裹、到达、提件码 | 包裹物流 |
57
+ | [11条]王斯煜[表情]Vince 黑客松nv: 您的集運單SF3148634434778因其他异常已拒收退回賣家,建議您聯系賣家處理,如有必要可申請退款 | TRUE | 申请退款 | 包裹物流 |
58
+ | 【建设银行】您账户8699于3月21日12时20分向微信支付-扫二维码付款支出人民币12元,可用余额7554.14元。 | FALSE | 可用余额 | |
59
+ | [2条]AlisaGG陈G老师1010: 预约一个会和你 15分钟。我也正经了[加油] | TRUE | 预约、开会、时间 | 会议 |
60
+ | 【建设银行】您账户8699于3月20日20时3分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7574.12元。 | FALSE | 可用余额 | |
61
+ | 【建设银行】您账户8699于3月20日19时51分向微信支付-羊城通缴费支出人民币3.5元,可用余额7591元。 | FALSE | 可用余额 | |
62
+ | 【建设银行】您账户8699于3月20日18时36分向微信支付-羊城通缴费支出人民币1元,可用余额7594.5元。 | FALSE | 可用余额 | |
63
+ | WANG Siyu: @AlisaGG 6:00PM后 今晚 什么时候有空 | TRUE | 有空、时间 | 会议 |
64
+ | 【建设银行】您账户8699于3月27日8时3分向微信支付-羊城通缴费支出人民币3.5元,可用余额5240.93元。 | FALSE | 可用余额 | |
65
+ | 【建设银行】您账户8699于3月27日8时28分向微信支付-百度平台商家支出人民币18.95元,可用余额5221.98元。 | FALSE | 可用余额 | |
66
+ | 【建设银行】您账户8699于3月27日9时12分向支付宝-理财-天弘基金管理有限公司支出人民币16.88元,可用余额5205.1元。 | FALSE | 可用余额 | |
67
+ | 【广州银行】您尾号5138的卡片转入人民币550.00元,本期账单已还清。绑定广州银行信用卡官微实时查账。 | FALSE | 账单、还清 | |
68
+ | 【建设银行】您账户8699于3月27日9时26分向支付宝-黄敏达还款支出人民币550元,可用余额4655.1元。 | FALSE | 可用余额 | |
69
+ | 【建设银行】您账户8699于3月27日9时32分向��付宝-广州市自来水有限公司支出人民币17.58元,可用余额4637.52元。 | FALSE | 可用余额 | |
70
+ | 【建设银行】您账户8699于3月27日9时32分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额4620.64元。 | FALSE | 可用余额 | |
71
+ | 【建设银行】您账户8699于3月27日9时48分向微信支付-壹伴助手支出人民币399元,可用余额4221.64元。 | FALSE | 可用余额 | |
72
+ | 【广州银行】黄敏达先生,您好。您的信用卡欠款未缴,请即日内还款,否则我行不排除根据客户协议委托第三方公司向您催缴。若已还款无需理会 | TRUE | 信用卡、还款 | 支付 |
73
+ | 【中国农业银行】尊敬的用户,您已超过1个月未登录账户,邀您点击 go.abchina.com/k/C7C 查看账户详情。拒收请回复R | FALSE | 拒收 | |
74
+ | 【建设银行】您账户8699于3月27日12时38分向微信支付-兰州拉面(佳大)支出人民币16元,可用余额4205.64元。 | FALSE | 可用余额 | |
75
+ | [23条]M丶D: 1000本链接: https://pan.baidu.com/s/13dV3m54iGE8oWgYtkARQPw?pwd=vwq3 提取码: vwq3 复制这段内容后打开百度网盘手机App,操作更方便哦 | FALSE | 百度网盘 | |
76
+ | 【建设银行】您账户8699于3月27日9时32分向支付宝-广州市自来水有限公司支出人民币17.58元,可用余额4637.52元。 | FALSE | 可用余额 | |
77
+ | 招商银行风险评估到期提醒 | FALSE | 到期提醒 | 提醒 |
78
+ | 【建设银行】您账户8699于3月27日9时32分向支付宝-广州市自来水有限公司支出人民币17.58元,可用余额4637.52元。 | FALSE | 可用余额 | |
79
+ | 重构工作完成时间 | FALSE | | 聊天 |
80
+ | 上课时间 | FALSE | | 提醒 |
81
+ | 明天上班 | FALSE | | 聊天 |
82
+ | 等待黄老师远程会议 | FALSE | 等待 | 聊天 |
83
+ | 确认代码与数据库兼容性 | FALSE | 确认 | |
84
+ | 补全项目说明书 | FALSE | | |
85
+ | 更新readme文档 | FALSE | | 待办 |
86
+ | 确认是否需要关防火墙 | FALSE | | |
87
+ | 使用企业邮箱 | FALSE | | |
88
+ | 处理手机验证问题 | FALSE | | |
89
+ | 分享达子的课程到飞书共享空间 | FALSE | | |
90
+ | 明天下午3点有会议 | TRUE | 会议 | 开会 |
91
+ | 确认会议时间 | FALSE | | 细节讨论 |
92
+ | 把代码上传到云上的自己的分支 | FALSE | | 讨论 |
93
+ | "大数据需求准入评审周会,时间:11:00 -12:30 参会人:所有 地点:2栋17楼-用户-配有飞书会议-优先5人以上预定(12)深圳新一个代2栋" | TRUE | 参会人、时间、地点、会议 | 开会 |
94
+ | [26条]斯煜[表情]Vince: Mox: 你的Mox Credit月結單已準備就緒。你可登入Mox應用程式查看。 | TRUE | 账单 | 缴费 |
95
+ | 飞书验证码、处理飞书邮箱问题 | FALSE | | 无效信息 |
96
+ | Python版本确认 | FALSE | | 无效信息 |
LLM/filter_message/prompt.txt ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 人工标注数据
2
+
3
+ | content | 是否正负样本 | 关键词 | 类型 |
4
+ | --- | --- | --- | --- |
5
+ | 【建设银行】您账户8699于3月26日8时6分向微信支付-羊城通缴费支出人民币3.5元,可用余额5285.33元。 | FALSE | 余额 | 提醒 |
6
+ | 【建设银行】您账户8699于3月26日8时11分向微信支付-小霞包点(香雪店)支出人民币5.5元,可用余额5279.83元。 | FALSE | 余额 | 提醒 |
7
+ | [3条]LJK86: 您的号卡已在配送途中,物流单号SF3147624215612,点击:http://t.hn.189.cn/EZv6bazD ,查询物流进度。如已签收请在有效期内先实名激活。客服热线4008155555【中国电信】 | FALSE | | |
8
+ | AlisaGG陈G老师1010: 【招商银行】您本月办理分期还款可享限时2.3折优惠!打开掌上生活APP,搜“分期还款”立享折扣优惠!资格实时测评,拒收请回复R | FALSE | | |
9
+ | "王斯煜[表情]Vince 黑客松nv: @AlisaGG @LJK86 明天中午再对 WANG Siyu邀请你加入飞书视频会议<br>会议主题:FilterLLM ToDoGenLLM PR Merge讨论<br>会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)<br>会议 ID:656 445 907<br>会议链接:https://..." | TRUE | 飞书视频会议 | 会议 |
10
+ | [6条]王斯煜[表情]Vince 黑客松nv: Your Mint 3-Month, Unlimited plan expires in 2 days. Log into your account at my.mintmobile.com or via our app to make a payment and keep your Mint se... | FALSE | | |
11
+ | [4条]斯煜[表情]Vince: 【美团月付】您2月账单805.57元待还,最后还款日为本月22号,查账或立即还款点 dpurl.cn/HXFUxEba | TRUE | 还款 | 待付(还)款 |
12
+ | [4条]斯煜[表情]Vince: 【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。 | TRUE | 快递、取件 | 物流取件 |
13
+ | [6条]斯煜[表情]Vince: 尊敬的*斯煜,您在我行办理的1笔个人贷款需于2024年12月08日17:00前还款,当期还款金额本息合计999999.84元,请您留意尾号0455的账户可用余额是否充足,避免因贷款逾期影响个人征信。具体贷款信息可通过工行网上银行、手机银行或致电贷款经办行查询。【工商银行】 | TRUE | 还款 | 待付(还)款 |
14
+ | [6条]斯煜[表情]Vince: Your Mint 3-Month, Unlimited plan expires in 2 days. Log into your account at my.mintmobile.com or via our app to make a payment and keep your Mint se... | FALSE | | 提醒 |
15
+ | AlisaGG陈G老师1010: 【驿收发】您的邮政包裹已到凯丰花园2栋驿站,请23:00前凭5-5-6530来取,详询18320926368 | TRUE | | 物流取件 |
16
+ | [2条]AlisaGG: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。 | TRUE | 取件码 | 物流取件 |
17
+ | [2条]AlisaGG陈G老师1010: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。 | TRUE | 取件码 | 物流取件 |
18
+ | "[3条]AlisaGG陈G老师1010: 取件通知<br>取件码: 11724837<br>运单号: 464285154986072<br>取件地址: 深圳新一代产业园P2出入口内侧4号丰巢柜<br>配送人员: 18124519013<br>计费规则: 查看详情" | TRUE | 取件码、运单号 | 物流取件 |
19
+ | "[4条]AlisaGG陈G老师1010: 取件再次提醒<br>取件码:: 27696201<br>配送公司:: 申通快递<br>运单号:: 777293635831671<br>配送员手机:: 13392809673<br>取件地址:: 深圳新一代产业园P2出入口内侧4号丰巢柜" | TRUE | 取件码、运单号 | 物流取件 |
20
+ | [5条]李JK老师-1228: 【菜鸟驿站】请凭140-3-1019到菜鸟驿站取件,查询详情u.cainiao.com/53h4bSr7zrh | TRUE | 取件码、菜鸟驿站 | 物流取件 |
21
+ | "斯煜[表情]Vince: WANG Siyu邀请你加入飞书视频会议<br>会议主题:FilterLLM ToDoGenLLM PR Merge讨论<br>会议时间:3月31日 (今天) 12:00 - 12:30 (GMT+8)<br>会议 ID:656 445 907<br>会议链接:https://vc.feishu.cn/j/65644590 ..." | TRUE | 飞书视频会议 | 会议 |
22
+ | 刻刻: 【停机前提醒】尊敬的移动客户,您好!您的账户余额不足被限制使用。现提醒您需充值缴费至少32.49元,以确保您继续享受畅通的通信服务。诚邀您一键办理自动充服务,自动充值缴费更轻松:https://dx.10086.cn/7WyRLA 。 心级服务、让爱连接【中国移动】 | TRUE | 缴费、余额不足 | 待付(还)款 |
23
+ | AlisaGG: 【小象超市】您的商品已放置在门口,因有易碎等商品请尽快取回,如有疑问请联系15794935204 。祝您生活愉快! | FALSE | | 提醒 |
24
+ | 【美团月付】您4月账单8937.95元待还,最后还款日为本月8号,查账或立即还款点 dpurl.cn/kVln1cqa | TRUE | 还款 | 待付(还)款 |
25
+ | 建设银行】您账户8699于3月24日12��22分向微信支付-扫二维码付款支出人民币15元,可用余额5433.72元。 | FALSE | 可用余额 | 提醒 |
26
+ | 【建设银行】您账户8699于3月24日12时11分向支付宝-天猫-深圳市升景科技有限公司支出人民币1889元,可用余额5465.6元。 | FALSE | 可用余额 | |
27
+ | 【建设银行】您账户8699于3月24日12时11分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额5448.72元。 | FALSE | 可用余额 | |
28
+ | 【驿收发】您的京东包裹已到惜福堂对面快递驿站,请21:00前凭2-0147来取,详询16675200600 | TRUE | 驿站、包裹、取件 | 物流取件 |
29
+ | 【中国农业银行】为您特别准备了一份专属月度账单,登掌银搜“月度账单”或戳 go.abchina.com/k/CS7 查收。拒收请回复R | FALSE | 拒收 | 提醒 |
30
+ | 刻刻: 【火山引擎】亲爱的用户,您有千万DeepSeek模型额度待领取哦!每邀请1位新用户注册和使用,最高可获得130元代金券,多邀多得上不封顶!点击下载海报,分享到更多渠道:szacq.cn/ewyeb/ 拒收请回复R | FALSE | 拒收 | 提醒 |
31
+ | M丶D: 【阿里云推广】尊敬的mandahuang,老友季钜惠!云服务器 99元/年,新购续费同享!戳>><url id="cvlp38lgsoaarffpph8g" type="url" status="parsed" title="阿里云权益中心" wc="3700">https://t.aliyun.com/TIFqsCZY </url> 立即抢购!拒收请回复R | FALSE | 拒收 | 提醒 |
32
+ | 【AlipayHK】於3月19日 08:15需要通過Apple 服務(Apple services)待付款支付HKD8.00 | TRUE | 账单、待付款 | 支付 |
33
+ | [2条]李JK老师-1228: 【百度】亲爱的开发者您好,文心智能体年底送福利,即日起至12月30日,创建并提交您的智能体,即有机会获得现金卡福利,最高500元,多档奖励,中奖机会大!登陆天数越多,创建有创意,中奖率越高!福利倒计时!点击参与!<url id="cvlp38lgsoaarffpph9g" type="url" status="failed" title="" wc="0">https://agents.baidu.com/activity/detail/13 </url> | FALSE | 送福利、有机会 | 无效信息 |
34
+ | [5条]LJK86: 【百度】亲爱的开发者您好,文心智能体年底送福利,即日起至12月30日,创建并提交您的智能体,即有机会获得现金卡福利,最高500元,多档奖励,中奖机会大!登陆天数越多,创建有创意,中奖率越高!福利倒计时!点击参与!<url id="cvlp38lgsoaarffpph9g" type="url" status="failed" title="" wc="0">https://agents.baidu.com/activity/detail/13 </url> | FALSE | 送福利、有机会 | 无效信息 |
35
+ | [3条]M丶D: 【韵达快递】亲434466097408983超10小时未取出,如需帮助或有问题请致电15900077340 、020-89725127 | TRUE | 取件 | 物流取件 |
36
+ | [2条]LJK86: 【智谱AI】亲爱的开发者您好,感谢您参与智谱开放平台满意度调研,您的智谱清言月卡奖励正在发放,请在链接中输入您问卷中填写的手机号查询礼品码,前往PC/APP智谱清言会员充值页,选择“礼品码兑换”。查询链接:<url id="cvlp38lgsoaarffppha0" type="url" status="parsed" title="Feishu - Log in" wc="339">https://zhipu-ai.feishu.cn/share/base/query/shr </url> ... | FALSE | 满意度调研 | 无效信息 |
37
+ | [2条]M丶D: 【中国电信】流量满满,温暖相伴,我们特别为你准备了预存领10GB的流量大礼,无论是与家人视频通话,还是朋友间分享趣事,都能畅通无阻,让爱不断线。马上戳 <url id="cvlp38lgsoaarffpphag" type="url" status="parsed" title="预存领流量" wc="3306">https://vipxjzl.mini189.cn/BG/ </url> 了解吧,具体以实际页面展示为准,如已办理请忽略,转发无效。拒收请回复R | FALSE | 拒收 | |
38
+ | [3条]王斯煜[表情]Vince 黑客松nv: 【中通快递】包裹已到深圳光明正大城商业街103号店,取件码3-5-1092。到店扫“取件二维码”,线上查询更方便!询19128399078 | TRUE | 快递、包裹、取件码 | 物流取件 |
39
+ | [2条]李JK老师-1228: 【51CTO学堂】《DeepSeek训练营》火爆来袭!0元报名,学习AI核心技术,解锁职业新可能 zt60.cn/CMPE9 拒收请回复R | FALSE | 拒收 | 无效信息 |
40
+ | [3条]刻刻: 60GB本地數據及5000分鐘本地通話30日組合成功開啟,餘額已被扣除,有效期至10/02/2025 23:59。請立即登入MyLink App bit.ly/MySimMyLink 進行增值,若賬戶餘額充足,60GB本地數據及5000分鐘本地通話30日組合將每30日以$38自動續期,並於餘額內自動... | FALSE | | 提醒 |
41
+ | [3条]李JK老师-1228: 【湖南通信】尊敬的用户:您订购的卡已配送暂未签收,为了更好的为您服务,烦请回复:未拿到卡且需要回复1,卡在站点未取件回复2,已取件回复3。如有疑问请拨打4008155555 | TRUE | 未签收 | 包裹物流 |
42
+ | [2条]李JK老师-1228: 【深势科技】Bohrium用户您好,您有共计余额20.00元的体验卡将于2024-12-24 23:59:59到期,请及时使用。 | FALSE | 共计余额 | 提醒 |
43
+ | [3条]李JK老师-1228: 【讯飞开放平台】到期预警!尊敬的会员用户:您的个人级乐享会员,将于2024-12-08 00:00:00正式到期,截至目前仅剩7天。如您要继续使用,自即日起,7天内完成会员体验问卷<url id="cvlp38lgsoaarffpphb0" type="url" status="parsed" title="乐享会员调查问卷" wc="1540">http://1024-2019.iflytek.com/h5/vip-ques?t=2024-12-15 </url> ,即可免费续约... | FALSE | 到期 | 提醒 |
44
+ | M中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。 | TRUE | 丰巢、取件 | 物流取件 |
45
+ | [33条]斯煜[表情]Vince: 【中通快递】包裹已到深圳光明正大城商业街103号店,取件码3-5-1092。到店扫“取件二维码”,线上查询更方便!询19128399078 | TRUE | | 物流取件 |
46
+ | 王斯煜[表情]Vince 黑客松nv: 【申通快递】快递尾号7117已在代收点1天未取,请尽快取件,详询13434525312,最新快递状态请访问t.sto.cn/18PBt4 | TRUE | | 物流取件 |
47
+ | [2条]王斯煜[表情]Vince 黑客松nv: 【美团月付】您2月账单805.57元待还,最后还款日为本月22号,查账或立即还款点 dpurl.cn/HXFUxEba | TRUE | 账单、还款 | 物流取件 |
48
+ | [32条]斯煜[表情]Vince: 【美团月付】您2月账单805.57元待还,最后还款日为本月22号,查账或立即还款点 dpurl.cn/HXFUxEba | TRUE | 账单、还款 | 待付(还)款 |
49
+ | [26条]斯煜[表情]Vince: Mox: 你的Mox Credit月結單已準備就緒。你可登入Mox應用程式查看。 | TRUE | 账单 | 待付(还)款 |
50
+ | 【建设银行】您账户8699于3月22日9时51分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7414.6元。 | FALSE | 可用余额 | 提醒 |
51
+ | [3条]M丶D: 明天九点开会 | TRUE | 时间、开会 | 会议 |
52
+ | 【建设银行】您账户8699于3月22日9时42分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7431.48元。 | FALSE | 可用余额 | 提醒 |
53
+ | 【建设银行】您账户8699于3月22日18时2分向微信支付-羊城通缴费支出人民币2元,可用余额7397.6元。 | FALSE | 可用余额 | |
54
+ | 【招商银行】您的风险评估已到期,为避免错过我行优质产品信息,请及时重评!点击 cmbt.cn/a/zhV 去评估。如已完成或销户请忽略 | FALSE | 风险评估 | 提醒 |
55
+ | 【建设银行】您账户8699于3月22日23时45分向微信支付-西苑出版社支出人民币180元,可用余额7215.6元。 | FALSE | 可用余额 | 提醒 |
56
+ | 【建设银行】您账户8699于3月22日22时47分向微信支付-羊城通缴费支出人民币2元,可用余额7395.6元。 | FALSE | 可用余额 | 提醒 |
57
+ | 【中通快递】73547529665397放在丰巢的包裹请及时取件,如有问题致电15900077340、020-22504077。 | TRUE | 快递、丰巢、取件 | 包裹物流 |
58
+ | 【建设银行】您账户8699于3月22日13时16分向微信支付-扫二维码付款支出人民币15元,可用余额7399.6元。 | FALSE | 可用余额 | |
59
+ | 【建设银行】您账户8699于3月21日8时11分向微信支付-小霞包点(香雪店)支出人民币5.5元,可用余额7566.14元。 | FALSE | 可用余额 | |
60
+ | 【建设银行】您账户8699于3月21日8时7分向微信支付-羊城通缴费支出人民币3.5元,可用余额7571.64元。 | FALSE | 可用余额 | |
61
+ | 【建设银行】您账户8699于3月21日20时5分向微信支付-羊城通缴费支出人民币1元,可用余额7519.38元。 | FALSE | 可用余额 | |
62
+ | 【中国农业银行】百万立减金限时抢,最高10元立减金等您拿,本月戳 go.abchina.com/k/CfK 直达,先到先得。拒收请回复R | FALSE | 拒收 | 提醒 |
63
+ | 【建设银行】您账户8699于3月21日15时28分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额7520.38元。 | FALSE | 可用余额 | 提醒 |
64
+ | 【建设银行】您账户8699于3月21日15时28分向支付宝-理财-天弘基金管理有限公司支出人民币16.88元,可用余额7537.26元。 | FALSE | 可用余额 | 提醒 |
65
+ | M丶D: 【话费账单】尊敬的150****9503客户,您02月01日- 02月28日共消费8.00元。主要消费项目包括: | FALSE | 共消费 | 提醒 |
66
+ | 菜鳥包裹HK00083544922到達香港仔置富道19號置富花園19座地下B舖,提件碼7-1-1002,3個工作日内取。 | TRUE | | 物流取件 |
67
+ | 您的集運單SF3148634434778因其他异常已拒收退回賣家,建議您聯系賣家處理,如有必要可申請退款 | FALSE | | 提醒 |
68
+ | 菜鳥包裹HK00083525678到達香港仔置富道19號置富花園19座地下B舖,提件碼7-1-1004,3個工作日内取。 | TRUE | | 物流取件 |
69
+ | [10条]王斯煜[表情]Vince 黑客松nv: 菜鳥包裹HK00083544922到達香港仔置富道19號置富花園19座地下B舖,提件碼7-1-1002,3個工作日内取。 | TRUE | | 物流取件 |
70
+ | 【建设银行】您账户8699于3月21日12时20分向微信支付-扫二维码付款支出人民币12元,可用余额7554.14元。 | FALSE | 可用余额 | |
71
+ | [2条]AlisaGG陈G老师1010: 预约一个会和你 15分钟。我也正经了[加油] | TRUE | | 会议 |
72
+ | 【建设银行】您账户8699于3月27日8时3分向微信支付-羊城通缴费支出人民币3.5元,可用余额5240.93元。 | FALSE | 可用余额 | 提醒 |
73
+ | 【建设银行】您账户8699于3月27日8时28分向微信支付-百度平台商家支出人民币18.95元,可用余额5221.98元。 | FALSE | 可用余额 | 提醒 |
74
+ | 【建设银行】您账户8699于3月27日9时12分向支付宝-理财-天弘基金管理有限公司支出人民币16.88元,可用余额5205.1元。 | FALSE | 可用余额 | 提醒 |
75
+ | 【广州银行】您尾号5138的卡片转入人民币550.00元,本期账单已还清。绑定广州银行信用卡官微实时查账。 | FALSE | 账单、还清 | 提醒 |
76
+ | 【建设银行】您账户8699于3月27日9时26分向支付宝-黄敏达还款支出人民币550元,可用余额4655.1元。 | FALSE | 可用余额 | 提醒 |
77
+ | 【建设银行】您账户8699于3月27日9时32分向支付宝-广州市自来水有限公司支出人民币17.58元,可用余额4637.52元。 | FALSE | 可用余额 | 提醒 |
78
+ | 【建设银行】您账户8699于3月27日9时32分向支付宝-天弘基金管理有限公司支出人民币16.88元,可用余额4620.64元。 | FALSE | 可用余额 | 提醒 |
79
+ | 【建设银行】您账户8699于3月27日9时48分向微信支付-壹伴助手支出人民币399元,可用余额4221.64元。 | FALSE | 可用余额 | 提醒 |
80
+ | 【广州银行】黄敏达先生,您好。您的信用卡欠款未缴,请即日内还款,否则我行不排除根据客户协议委托第三方公司向您催缴。若已还款无需理会 | TRUE | 信用卡、还款 | 待付(还)款 |
81
+ | 【中国农业银行】尊敬的用户,您已超过1个月未登录账户,邀您点击 go.abchina.com/k/C7C 查看账户详情。拒收请回复R | FALSE | 拒收 | 提醒 |
82
+ | 【建设银行】您账户8699于3月27日12时38分向微信支付-兰州拉面(佳大)支出人民币16元,可用余额4205.64元。 | FALSE | 可用余额 | 提醒 |
83
+ | [23条]M丶D: 1000本链接: <url id="cvlp38lgsoaarffpphbg" type="url" status="failed" title="" wc="0">https://pan.baidu.com/s/13dV3m54iGE8oWgYtkARQPw?pwd=vwq3 </url> 提取码: vwq3 复制这段内容后打开百度网盘手机App,操作更方便哦 | FALSE | 百度网盘 | 提醒 |
84
+ | 【建设银行】您账户8699于3月27日9时32分向支付宝-广州市自来水有限公司支出人民币17.58元,可用余额4637.52元。 | FALSE | 可用余额 | 提醒 |
85
+ | 招商银行风险评估到期提醒 | FALSE | 到期提醒 | 提醒 |
86
+ | 【建设银行】您账户8699于3月27日9时32分向支付宝-广州市自来水有限公司支出人民币17.58元,可用余额4637.52元。 | FALSE | 可用余额 | 提醒 |
87
+ | 明天下午3点有会议 | TRUE | | 会议 |
88
+ | 确认会议时间 | FALSE | | 无效信息 |
89
+ | 把代码上传到云上的自己的分支 | FALSE | | 无效信息 |
90
+ | "大数据需求准入评审周会,时间:11:00 -12:30 参会人:所有 地点:2栋17楼-用户-配有飞书会议-优先5人以上预定(12)深圳新一个代2栋" | TRUE | 飞书会议 | 会议 |
LLM/filter_message/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai
2
+ pyyaml
3
+ pymysql
4
+ black
LLM/orchestrator.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import yaml
3
+ import threading
4
+ import importlib
5
+ from pathlib import Path
6
+ import sys
7
+ # import yaml # Removed yaml import
8
+ import mysql.connector as pymysql # Use mysql-connector-python alias
9
+
10
+ # Import config loader from todogen_LLM
11
+ from todogen_LLM.config_loader import get_mysql_config
12
+
13
+ # Removed local load_config function and CONFIG/MYSQL_CONFIG globals
14
+ # # 配置
15
+ def load_config():
16
+ config_path = Path(__file__).parent / "todogen_LLM" / "todogen_LLM_config.yaml"
17
+ with open(config_path, 'r', encoding='utf-8') as f:
18
+ return yaml.safe_load(f)
19
+
20
+ CONFIG = load_config()
21
+ MYSQL_CONFIG = CONFIG['mysql']
22
+
23
+ # 数据库
24
+ def get_db_conn():
25
+ mysql_config = get_mysql_config()
26
+ return pymysql.connect(
27
+ host=mysql_config['host'],
28
+ port=mysql_config.get('port', 3306),
29
+ user=mysql_config['user'],
30
+ password=mysql_config['password'],
31
+ database=mysql_config['database'],
32
+ ssl_ca=mysql_config['ssl_ca'], # path
33
+ ssl_disabled=False, # enabled
34
+ charset='utf8mb4',
35
+ autocommit=True
36
+ )
37
+
38
+ def get_latest_update_time(conn):
39
+ with conn.cursor() as cursor:
40
+ cursor.execute("SELECT MAX(date) FROM Messages")
41
+ result = cursor.fetchone()
42
+ return result[0]
43
+
44
+ #filter_llm主入口
45
+ def run_filter_llm():
46
+ sys.path.append(str(Path(__file__).parent / 'filter_llm'))
47
+ main = importlib.import_module('main')
48
+ if hasattr(main, 'main'):
49
+ main.main()
50
+ else:
51
+ raise RuntimeError('filter_llm.main.py未找到main函数')
52
+
53
+ #todogen_LLM主入口
54
+ def run_todogen_llm():
55
+ sys.path.append(str(Path(__file__).parent / 'todogen_LLM'))
56
+ todogen = importlib.import_module('todogen_llm')
57
+ if hasattr(todogen, 'main'):
58
+ todogen.main()
59
+ else:
60
+ if hasattr(todogen, 'load_formatted_data') and hasattr(todogen, 'process_data'):
61
+ data = todogen.load_formatted_data()
62
+ todogen.process_data(data)
63
+ else:
64
+ raise RuntimeError('todogen_llm.py未找到main或核心处理函数')
65
+
66
+ # 调用Notify主入口
67
+ #// def run_notify():
68
+ #// sys.path.append(str(Path(__file__).parent / 'Notify')) # Path needs update if kept
69
+ #// notify = importlib.import_module('notifyMain')
70
+ #// if hasattr(notify, 'main'):
71
+ #// notify.main()
72
+ #// else:
73
+ #// raise RuntimeError('Notify/notifyMain.py未找到main函数')
74
+
75
+ # 监听messages表并联动
76
+ def monitor_and_orchestrate(interval=5):
77
+ conn = get_db_conn()
78
+ last_update = get_latest_update_time(conn)
79
+ print(f"初始messages表更新时间: {last_update}")
80
+ while True:
81
+ time.sleep(interval)
82
+ try:
83
+ current_update = get_latest_update_time(conn)
84
+ if current_update != last_update:
85
+ print(f"检测到messages表有更新: {current_update}, 开始联动执行...")
86
+ run_filter_llm()
87
+ run_todogen_llm()
88
+ last_update = current_update
89
+ else:
90
+ print("无更新,继续监听...")
91
+ except Exception as e:
92
+ print(f"监听或执行过程中发生错误: {e}")
93
+
94
+ if __name__ == "__main__":
95
+ monitor_and_orchestrate(interval=5)
LLM/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ black
2
+ python-dateutil
3
+ mysql-connector
4
+ mysql-connector-python>=8.0.0
5
+ schedule>=1.2.0
6
+ pyyaml>=6.0.1
7
+ python-dotenv
8
+ pymysql
9
+ PyYAML
10
+ openai
11
+ tqdm
12
+
LLM/todogen_LLM/FalsePositive_few_shot.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # False Positive Few-Shot Examples
2
+ # (Non-actionable messages or pure notifications)
3
+ # 请用实际的、有代表性的例子替换以下内容
4
+
5
+ ## Example 1 (纯粹通知)
6
+ Input Message:
7
+ {"123456789":"[通知]您的账户安全设置已更新。"}
8
+
9
+ Expected Output JSON:
10
+ ```json
11
+ {"123456789":{"is_todo": false, "end_time":"null","location":"null","todo_content":"账号安全更新", "urgency": "unimportant"}}
12
+ ```
13
+
14
+ ## Example 2 (垃圾/广告信息)
15
+ Input Message:
16
+ {"987654321":"【优惠促销】限时抢购!全场商品低至一折,点击链接查看详情:xxx.com"}
17
+
18
+ Expected Output JSON:
19
+ ```json
20
+ {"987654321":{"is_todo": false, "end_time":"null","location":"null","todo_content":"广告推销", "urgency": "unimportant"}}
21
+ ```
22
+
23
+ ## Example 3 (已完成/过期信息)
24
+ Input Message:
25
+ {"112233445":"[提醒]您昨天预约的会议已结束。"}
26
+
27
+ Expected Output JSON:
28
+ ```json
29
+ {"112233445":{"is_todo": false, "end_time":"null","location":"null","todo_content":"过期内容", "urgency": "unimportant"}}
30
+ ```
31
+
LLM/todogen_LLM/TruePositive_few_shot.txt ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # True Positive Few-Shot Examples
2
+ # (Actionable messages that should result in a to-do)
3
+
4
+ ## Example 1
5
+ Input Message:
6
+ {"323231519":"[2条]李JK老师-1228: 【深势科技】Bohrium用户您好,您有共计余额20.00元的体验卡将于2024-12-24 23:59:59到期,请及时使用。"}
7
+
8
+ Expected Output JSON:
9
+ ```json
10
+ {"323231519":{"is_todo": true, "end_time":"2024-12-24 23:59:59","location":"线上平台:【深势科技】Bohrium","todo_content":"请尽快使用20元体验卡", "urgency": "important"}}
11
+ ```
12
+
13
+ ## Example 2
14
+ Input Message:
15
+ {"331150112": "开始日期为2025-03-31T15:01:37,内容源于'ASAP Sample',[2条]AlisaGG: 【小象超市】您好,我是小象超市骑手,【美团智能外卖柜】您的外卖已送至新一代产业园2栋4号柜(面向马路侧),格口号:27,取件码:9310,存柜超过12小时将被清理,请及时取件。"}
16
+
17
+ Expected Output JSON:
18
+ ```json
19
+ {"331150112": {"is_todo": true, "end_time": "2025-04-01T03:01:37", "location": "线下:新一代产业园2栋4号柜", "todo_content": "取快递(格口号:27, 取件码:9310)", "urgency": "urgent"}}
20
+ ```
21
+
22
+ ## Example 3
23
+ Input Message:
24
+ {"331150111": "开始日期为2025-03-31T15:01:36,内容源于'ASAP Sample',AlisaGG: 【驿收发】您的邮政包裹已到凯丰花园2栋驿站,请23:00前凭5-5-6530来取,详询18320926368"}
25
+
26
+ Expected Output JSON:
27
+ ```json
28
+ {"331150111": {"is_todo": true, "end_time": "2025-03-31T23:00:00", "location": "线下:凯丰花园2栋驿站", "todo_content": "取快递(单号:5-5-6530)", "urgency": "important"}}
29
+ ```
30
+
31
+ ## Example 4
32
+ Input Message:
33
+ {"323231510":"[3条]王斯煜[表情]Vince 黑客松nv: 【中通快递】包裹已到深圳光明正大城商业街103号店,取件码3-5-1092。到店扫"取件二维码",线上查询更方便!询19128399078"}
34
+
35
+ Expected Output JSON:
36
+ ```json
37
+ {"323231510":{"is_todo": true, "end_time":"null","location":"线下地点:深圳光明正大城商业街103号店","todo_content":"快递取件,取件码为3-5-1092", "urgency": "important"}}
38
+ ```
39
+
40
+ ## Example 5
41
+ Input Message:
42
+ {"323231172":"[24条]斯煜[表情]Vince: 【高德打车】您有车费尚未支付,为不影响乘车信用,请前往高德地图app处理,或点击l.amap.com/2i0mBifpr 支付"}
43
+
44
+ Expected Output JSON:
45
+ ```json
46
+ {"323231172":{"is_todo": true, "end_time":"null","location":"线上平台: 高德地图app","todo_content":"未支付车费,请点击l.amap.com/2i0mBifpr 支付", "urgency": "urgent"}}
47
+ ```
48
+
49
+ ## Example 6
50
+ Input Message:
51
+ {"405091409":"【圆通速递】快件尾号1014的包裹已送至(家门口)详询18038103314"}
52
+
53
+ Expected Output JSON:
54
+ ```json
55
+ {"405091409":{"is_todo": true, "end_time":"null","location":"线下:家门口","todo_content":"取圆通快递(尾号1014)", "urgency": "important"}}
56
+
57
+
58
+
59
+
LLM/todogen_LLM/__pycache__/compare_data.cpython-312.pyc ADDED
Binary file (9.54 kB). View file
 
LLM/todogen_LLM/__pycache__/config_loader.cpython-312.pyc ADDED
Binary file (1.88 kB). View file
 
LLM/todogen_LLM/__pycache__/database_of_messages.cpython-312.pyc ADDED
Binary file (11.5 kB). View file
 
LLM/todogen_LLM/__pycache__/export_todolist.cpython-312.pyc ADDED
Binary file (3.36 kB). View file
 
LLM/todogen_LLM/__pycache__/filter_message_list.cpython-312.pyc ADDED
Binary file (2.15 kB). View file
 
LLM/todogen_LLM/__pycache__/filter_useful_data_to_dict.cpython-312.pyc ADDED
Binary file (8.05 kB). View file
 
LLM/todogen_LLM/__pycache__/logger_config.cpython-312.pyc ADDED
Binary file (2.33 kB). View file
 
LLM/todogen_LLM/__pycache__/path_validator.cpython-312.pyc ADDED
Binary file (814 Bytes). View file
 
LLM/todogen_LLM/__pycache__/receiving_useful_messages.cpython-312.pyc ADDED
Binary file (2.98 kB). View file
 
LLM/todogen_LLM/__pycache__/todogen_llm.cpython-312.pyc ADDED
Binary file (21.1 kB). View file
 
LLM/todogen_LLM/compare_data.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sys
4
+ import copy
5
+ from config_loader import get_paths
6
+ from datetime import datetime
7
+
8
+ from export_todolist import export_todolist_to_json
9
+ from receiving_useful_messages import main
10
+
11
+ # --- 配置与辅助函数 ---
12
+ sys.stdout.reconfigure(encoding='utf-8')
13
+
14
+ def convert_datetime(obj):
15
+ """自定义JSON序列化处理器"""
16
+ if isinstance(obj, datetime):
17
+ return obj.isoformat()
18
+ raise TypeError(f"Type {type(obj)} not serializable")
19
+
20
+ def load_json_data(file_path):
21
+ """加载JSON文件数据,处理错误并确保返回列表"""
22
+ if not os.path.exists(file_path):
23
+ print(f"[错误] 文件未找到: {file_path}")
24
+ return None
25
+ try:
26
+ with open(file_path, 'r', encoding='utf-8') as f:
27
+ data = json.load(f)
28
+ if isinstance(data, list):
29
+ return data
30
+ else:
31
+ print(f"[错误] 文件格式不正确,预期为列表: {file_path}")
32
+ return None
33
+ except json.JSONDecodeError:
34
+ print(f"[错误] JSON解码失败: {file_path}")
35
+ return None
36
+ except Exception as e:
37
+ print(f"[错误] 加载文件时发生未知错误 {file_path}: {str(e)}")
38
+ return None
39
+
40
+ def generate_unique_id(base_id, existing_ids_set):
41
+ """生成唯一的 ID (base_id_upd, base_id_upd_1, ...)"""
42
+ potential_id = f"{str(base_id)}_upd"
43
+ counter = 1
44
+ while potential_id in existing_ids_set:
45
+ potential_id = f"{str(base_id)}_upd_{counter}"
46
+ counter += 1
47
+ return potential_id
48
+
49
+ # --- 核心处理逻辑函数 ---
50
+ def process_record(item_r, existing_message_ids, existing_todo_contents, existing_message_id_to_record, all_known_message_ids, stats):
51
+ """
52
+ 处理来自 result1.json 的单条记录,根据规则决定操作。
53
+ 返回: 要添加到 compare.json 的记录 (字典) 或 None。
54
+ 同时更新 stats 字典和 all_known_message_ids 集合。
55
+ """
56
+ record_to_save = None # 初始化返回值
57
+
58
+ try:
59
+ r_message_id_str = str(item_r['message_id'])
60
+ r_todo_content_str = str(item_r['todo_content'])
61
+ except KeyError as e:
62
+ print(f"[警告] result1.json 中的记录缺少键 {e},已跳过: {item_r}")
63
+ stats['skipped_missing_keys'] += 1
64
+ return None
65
+ except TypeError as e:
66
+ print(f"[警告] result1.json 中的记录键值类型错误 {e},已跳过: {item_r}")
67
+ stats['skipped_missing_keys'] += 1
68
+ return None
69
+
70
+ if r_message_id_str not in existing_message_ids:
71
+ # --- 情况 1: 新 message_id ---
72
+ if r_todo_content_str not in existing_todo_contents:
73
+ # 1.1: 新 todo_content -> 保存
74
+ record_to_save = item_r
75
+ all_known_message_ids.add(r_message_id_str) # 追踪新 ID
76
+ stats['saved_new_id_new_content'] += 1
77
+ else:
78
+ # 1.2: 已存在 todo_content -> 打印
79
+ print("-" * 30)
80
+ print(f"打印 (新 message_id: {r_message_id_str}, 但 todo_content 已存在):")
81
+ print(json.dumps(item_r, indent=2, ensure_ascii=False, default=convert_datetime))
82
+ print("-" * 30)
83
+ stats['printed_new_id_existing_content'] += 1
84
+ else:
85
+ # --- 情况 2: 已存在 message_id ---
86
+ record_e = existing_message_id_to_record.get(r_message_id_str) # 获取现有记录
87
+ if record_e is None:
88
+ # 理论上不应发生,因为 ID 在 existing_message_ids 中
89
+ print(f"[警告] ID {r_message_id_str} 在集合中但在字典中找不到?跳过。")
90
+ stats['skipped_internal_error'] = stats.get('skipped_internal_error', 0) + 1 # 新增统计
91
+ return None
92
+
93
+ e_todo_content_str = str(record_e.get('todo_content', '')) # 安全获取
94
+
95
+ if r_todo_content_str != e_todo_content_str:
96
+ # 2.1: todo_content 不同 -> 修改 ID 并保存
97
+ new_unique_id = generate_unique_id(r_message_id_str, all_known_message_ids)
98
+ # all_known_message_ids.add(new_unique_id) # 追踪新生成的 ID
99
+
100
+ modified_item_r = copy.deepcopy(item_r)
101
+ # modified_item_r['message_id'] = new_unique_id
102
+ record_to_save = modified_item_r
103
+ stats['saved_modified_id_diff_content'] += 1
104
+ else:
105
+ # 2.2: todo_content 相同 -> 打印
106
+ print("-" * 30)
107
+ print(f"打印 (message_id: {r_message_id_str} 已存在, todo_content 相同):")
108
+ print("来自 result1.json:")
109
+ print(json.dumps(item_r, indent=2, ensure_ascii=False, default=convert_datetime))
110
+ print("-" * 30)
111
+ stats['printed_existing_id_same_content'] += 1
112
+
113
+ return record_to_save
114
+
115
+ # --- 主函数 ---
116
+ def compare_and_generate_updates():
117
+ """主函数:加载数据、处理、保存和打印统计信息"""
118
+ paths = get_paths()
119
+ data_dir = paths['data_dir']
120
+
121
+ # +++ 新增: 定义 compare_output_file 路径 +++
122
+ compare_output_file = os.path.join(data_dir, "compare.json") # 明确输出路径
123
+
124
+ # 1. 主动触发数据导出流程,获取导出的 JSON 文件路径
125
+ extracted_list_path = export_todolist_to_json() # 返回 todolist_export.json 的路径
126
+ if not extracted_list_path or not os.path.exists(extracted_list_path):
127
+ print("[错误] 导出 todolist 数据失败,流程终止。")
128
+ return
129
+
130
+ # 2. 主动触发消息处理流程,生成 result1.json
131
+ result1_path = main() # 返回 result1.json 的路径
132
+ if not result1_path or not os.path.exists(result1_path):
133
+ print("[错误] 生成 result1.json 失败,流程终止。")
134
+ return
135
+
136
+ # 3. 加载数据
137
+ result1_data = load_json_data(result1_path)
138
+ extracted_data = load_json_data(extracted_list_path)
139
+ if result1_data is None or extracted_data is None:
140
+ print("[错误] 数据加载失败,流程终止。")
141
+ return
142
+
143
+ # --- 创建查找结构 ---
144
+ try:
145
+ existing_message_ids = {str(item['message_id']) for item in extracted_data if 'message_id' in item}
146
+ existing_message_id_to_record = {str(item['message_id']): item for item in extracted_data if 'message_id' in item}
147
+ existing_todo_contents = {str(item['todo_content']) for item in extracted_data if 'todo_content' in item}
148
+ except (KeyError, TypeError) as e:
149
+ print(f"[错误] extracted_list.json 文件处理失败: {e}。请检查文件内容和格式。")
150
+ return
151
+
152
+ # --- 初始化 ---
153
+ records_for_compare_json = []
154
+ all_known_message_ids = set(existing_message_ids)
155
+ stats = { # 使用字典来存储统计数据
156
+ 'processed': 0,
157
+ 'skipped_missing_keys': 0,
158
+ 'saved_new_id_new_content': 0,
159
+ 'printed_new_id_existing_content': 0,
160
+ 'saved_modified_id_diff_content': 0,
161
+ 'printed_existing_id_same_content': 0,
162
+ 'skipped_internal_error': 0 # 用于 process_record 内部错误
163
+ }
164
+
165
+ print("[信息] 开始比较和处理数据...")
166
+ # --- 主循环 ---
167
+ for item_r in result1_data:
168
+ stats['processed'] += 1
169
+ record_to_save = process_record(
170
+ item_r,
171
+ existing_message_ids,
172
+ existing_todo_contents,
173
+ existing_message_id_to_record,
174
+ all_known_message_ids,
175
+ stats # 传递 stats 字典用于更新
176
+ )
177
+ if record_to_save is not None:
178
+ records_for_compare_json.append(record_to_save)
179
+
180
+ # --- 写入文件 ---
181
+ try:
182
+ with open(compare_output_file, 'w', encoding='utf-8') as f:
183
+ json.dump(records_for_compare_json, f, indent=2, ensure_ascii=False, default=convert_datetime)
184
+ print(f"✅ 成功生成 compare.json 文件,包含 {len(records_for_compare_json)} 条记录。")
185
+ except Exception as e:
186
+ print(f"[错误] 写入 compare.json 文件时发生错误: {str(e)}")
187
+
188
+ # --- 打印统计 ---
189
+ print("=" * 40)
190
+ print("处理统计:")
191
+ print(f" 处理 result1.json 记录总数: {stats['processed']}")
192
+ print(f" 跳过 (缺少关键键或类型错误): {stats['skipped_missing_keys']}")
193
+ if stats['skipped_internal_error'] > 0:
194
+ print(f" 跳过 (内部逻辑错误): {stats['skipped_internal_error']}")
195
+ print("-" * 20)
196
+ print(" 写入 compare.json:")
197
+ print(f" - 新 message_id, 新 todo_content: {stats['saved_new_id_new_content']}")
198
+ print(f" - 修改后 message_id (因冲突且 todo_content 不同): {stats['saved_modified_id_diff_content']}")
199
+ print("-" * 20)
200
+ print(" 打印到控制台:")
201
+ print(f" - 新 message_id, 但 todo_content 已存在: {stats['printed_new_id_existing_content']}")
202
+ print(f" - message_id 已存在, todo_content 相同: {stats['printed_existing_id_same_content']}")
203
+ print("=" * 40)
204
+
205
+
206
+ return records_for_compare_json
207
+
208
+ if __name__ == "__main__":
209
+ compare_and_generate_updates() # 触发整个流程
LLM/todogen_LLM/config_loader.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config_loader.py
2
+ import yaml
3
+ from pathlib import Path
4
+
5
+ def load_config():
6
+ config_path = Path(__file__).parent / "todogen_LLM_config.yaml"
7
+ with open(config_path, 'r', encoding='utf-8') as f:
8
+ return yaml.safe_load(f)
9
+
10
+ CONFIG = load_config()
11
+
12
+ def get_mysql_config():
13
+ return {
14
+ **CONFIG['mysql'],
15
+ 'ssl_ca': str(Path(__file__).parent / CONFIG['mysql']['ssl_ca'])
16
+ }
17
+
18
+ def get_openai_config():
19
+ return CONFIG['openai']
20
+
21
+ def get_paths():
22
+ config = load_config()
23
+ base = Path(__file__).resolve().parent # 定位到todogen_LLM目录
24
+ return {
25
+ 'base_dir': base,
26
+ 'data_dir': base / config['paths']['data_dir'],
27
+ 'logging_dir': base / config['paths']['logging_dir']
28
+ }
29
+
30
+ def get_processing_config():
31
+ return CONFIG['processing']
32
+
33
+ def get_defaults_config():
34
+ return CONFIG['defaults']
LLM/todogen_LLM/database_of_messages.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # database_of_messages.py
2
+ from config_loader import get_paths
3
+ import mysql.connector
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ import sys
7
+ from config_loader import get_mysql_config, get_defaults_config, get_processing_config
8
+ import concurrent.futures # 必须添加的模块导入
9
+ from concurrent.futures import ThreadPoolExecutor # 关键修复导入
10
+ from tqdm import tqdm
11
+ import logging # 导入 logging
12
+
13
+ # 获取 logger 实例
14
+ logger = logging.getLogger(__name__)
15
+
16
+ sys.stdout.reconfigure(encoding='utf-8')
17
+
18
+ def process_row(args):
19
+ """多线程处理单行数据"""
20
+ columns, row = args
21
+ row_dict = {}
22
+ for col_name, value in zip(columns, row):
23
+ if isinstance(value, datetime):
24
+ row_dict[col_name] = value.isoformat()
25
+ elif isinstance(value, int):
26
+ row_dict[col_name] = str(value)
27
+ else:
28
+ row_dict[col_name] = str(value)
29
+ message_id = str(row_dict.get("message_id", ""))
30
+ return (message_id, row_dict)
31
+
32
+ def async_main() -> dict:
33
+ """带进度条和多线程的版本 - 添加日志记录"""
34
+
35
+ paths = get_paths()
36
+ (paths['data_dir']).mkdir(exist_ok=True) # 确保输出目录存在
37
+
38
+ logger.info("开始执行 async_main 获取所有消息...")
39
+ db_config = get_mysql_config() # 从配置加载数据库连接信息
40
+ processing_config = get_processing_config() # 从配置加载处理参数
41
+ db_fetch_workers = processing_config.get('db_fetch_workers', 4) # 获取数据库读取并发度,默认4
42
+
43
+ conn = None # 初始化 conn
44
+ cursor = None # 初始化 cursor
45
+ try:
46
+ logger.debug("尝试连接数据库 (async_main)...")
47
+ conn = mysql.connector.connect(
48
+ user=db_config['user'],
49
+ password=db_config['password'],
50
+ host=db_config['host'],
51
+ port=db_config['port'],
52
+ database=db_config['database'],
53
+ ssl_ca=db_config['ssl_ca'],
54
+ ssl_disabled=False
55
+ )
56
+ logger.info("✅ 数据库连接成功 (async_main)")
57
+ cursor = conn.cursor()
58
+ logger.info("开始执行查询: SELECT * FROM Messages")
59
+ cursor.execute("SELECT * FROM Messages")
60
+ result = cursor.fetchall()
61
+ columns = [desc[0] for desc in cursor.description]
62
+ logger.info(f"数据库查询完成,获取到 {len(result)} 条原始记录。")
63
+
64
+ data = {}
65
+ # 明确设置 max_workers
66
+ with ThreadPoolExecutor(max_workers=db_fetch_workers) as executor, \
67
+ tqdm(total=len(result), desc="数据获取进度") as pbar:
68
+
69
+ future_to_row = {
70
+ executor.submit(process_row, (columns, row)): row
71
+ for row in result
72
+ }
73
+
74
+ processed_count = 0
75
+ for future in concurrent.futures.as_completed(future_to_row):
76
+ try:
77
+ message_id, row_dict = future.result()
78
+ data[message_id] = row_dict
79
+ processed_count += 1
80
+ except Exception as exc:
81
+ logger.error(f"处理单行数据时出错: {exc}", exc_info=True)
82
+ pbar.update(1)
83
+ logger.info(f"数据行处理完成,成功处理 {processed_count}/{len(result)} 行。")
84
+ return data
85
+
86
+ except mysql.connector.Error as e:
87
+ # 记录数据库操作错误
88
+ logger.error(f"❌ 数据库操作错误 (async_main): {e}", exc_info=True)
89
+ return {}
90
+ except Exception as e:
91
+ # 记录其他可能的错误
92
+ logger.exception(f"❌ async_main 中发生未知错误: {e}") # 使用 exception 自动记录 traceback
93
+ return {}
94
+ finally:
95
+ if cursor:
96
+ cursor.close()
97
+ if conn and conn.is_connected():
98
+ conn.close()
99
+ logger.info("ℹ️ 数据库连接已关闭 (async_main)。")
100
+
101
+ """
102
+ 后面部分即为上传数据部分,切不可搞错
103
+
104
+ """
105
+ def upload_to_todolist(data: dict):
106
+ """将转换后的JSON数据上传到todolist表 (使用 executemany 批量插入) - 添加日志记录"""
107
+ logger.info(f"开始执行 upload_to_todolist,准备处理 {len(data)} 条输入数据...")
108
+ db_config = get_mysql_config() # 直接使用从配置加载的字典
109
+
110
+ rows_to_insert = [] # 用于收集待插入的数据行
111
+ skipped_count = 0
112
+
113
+ # 获取默认值配置
114
+ defaults_config = get_defaults_config()
115
+ default_todo_status = defaults_config.get('todo_status', 'doing')
116
+ default_urgency_status = defaults_config.get('urgency_status', 'unimportant')
117
+
118
+ logger.debug("开始遍历和转换数据用于批量插入...")
119
+ for item_key, item in data.items(): # 使用 item_key (可能是 message_id) 记录日志
120
+ try:
121
+ # 必填字段处理
122
+ # 注意:todolist 表结构似乎没有 todo_id 列,插入语句中也没有,这里假设它是 message_id 或其他需要处理的 ID
123
+ # 假设 user_id 是从 item 中获取���如果不是,需要调整来源
124
+ # user_id = int(item["user_id"])
125
+ user_id = int(item.get("user_id", 0)) # 示例:如果 item 中没有 user_id,则使用默认值 0,需要确认
126
+
127
+ # 检查 date 字段是否存在且有效
128
+ date_str = item.get("date")
129
+ if not date_str:
130
+ raise ValueError("缺少 'date' 字段")
131
+ start_time = datetime.fromisoformat(date_str.replace("T", " "))
132
+
133
+ todo_content = item["todo_content"]
134
+
135
+ # 选填字段处理
136
+ end_time_str = item.get("end_time")
137
+ end_time = datetime.fromisoformat(end_time_str) if end_time_str else None
138
+
139
+ location = item.get("location", "")[:255] # 截断超长内容
140
+
141
+ # 使用从配置加载的默认值
142
+ todo_status = item.get("todo_statu", default_todo_status)
143
+ urgency_status = item.get("urgency_statu", default_urgency_status)
144
+
145
+ # 将有效数据添加到待插入列表
146
+ rows_to_insert.append((
147
+ # 注意:根据 INSERT 语句调整这里的字段顺序和来源
148
+ user_id, # user_id
149
+ start_time, # start_time
150
+ end_time, # end_time
151
+ location, # location
152
+ todo_content, # todo_content
153
+ todo_status, # todo_statu
154
+ urgency_status # urgency_statu
155
+ ))
156
+
157
+ except (KeyError, ValueError, TypeError) as e: # 捕捉更多可能的错误
158
+ skipped_count += 1
159
+ # 替换 print 为 logger.warning
160
+ logger.warning(f"⚠️ 跳过无效数据 (来自 key {item_key}, 原因: {str(e)}) - 原始数据: {item}")
161
+ continue
162
+
163
+ logger.debug("数据转换完成。")
164
+
165
+ if skipped_count > 0:
166
+ # 替换 print 为 logger.info
167
+ logger.info(f"ℹ️ 共跳过 {skipped_count} 条无效数据。")
168
+
169
+ if not rows_to_insert:
170
+ # 替换 print 为 logger.info
171
+ logger.info("ℹ️ 没有有效数据需要插入到数据库。")
172
+ return
173
+
174
+ logger.info(f"准备将 {len(rows_to_insert)} 条有效数据批量插入数据库...")
175
+ cnx = None
176
+ cursor = None
177
+ try:
178
+ logger.debug("尝试连接数据库 (upload_to_todolist)...")
179
+ cnx = mysql.connector.connect(**db_config)
180
+ logger.info("✅ 数据库连接成功 (upload_to_todolist)")
181
+ cursor = cnx.cursor()
182
+
183
+ # 预处理插入语句 (保持不变)
184
+ insert_query = """
185
+ INSERT INTO todolist (
186
+ user_id,
187
+ start_time,
188
+ end_time,
189
+ location,
190
+ todo_content,
191
+ todo_statu,
192
+ urgency_statu
193
+ ) VALUES (%s, %s, %s, %s, %s, %s, %s)
194
+ """
195
+
196
+ logger.info("开始执行批量插入 (executemany)...")
197
+ cursor.executemany(insert_query, rows_to_insert)
198
+ rowcount = cursor.rowcount # 获取影响的行数
199
+ logger.info("批量插入执行完毕,尝试提交事务...")
200
+ cnx.commit()
201
+ logger.info(f"✅ 成功批量插入 {rowcount} 条记录到 todolist 表")
202
+
203
+ except mysql.connector.Error as err:
204
+ # 替换 print 为 logger.error
205
+ logger.error(f"❌ 数据库批量插入错误: {err}", exc_info=True)
206
+ logger.warning("尝试回滚数据库事务...")
207
+ try:
208
+ if cnx and cnx.is_connected():
209
+ cnx.rollback()
210
+ logger.warning("数据库事务已回滚。")
211
+ except Exception as rollback_err:
212
+ logger.error(f"尝试回滚事务时出错: {rollback_err}", exc_info=True)
213
+ except Exception as e:
214
+ logger.exception(f"❌ upload_to_todolist 中发生未知错误: {e}")
215
+ finally:
216
+ if cursor:
217
+ cursor.close()
218
+ if cnx and cnx.is_connected():
219
+ cnx.close()
220
+ # 替换 print 为 logger.info
221
+ logger.info("ℹ️ 数据库连接已关闭 (upload_to_todolist)。")
LLM/todogen_LLM/export_todolist.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # export_todolist.py
2
+ import json
3
+ from pathlib import Path
4
+ import mysql.connector
5
+ from config_loader import get_mysql_config, get_paths
6
+ from datetime import datetime # 新增导入
7
+ import sys
8
+
9
+ sys.stdout.reconfigure(encoding='utf-8')
10
+
11
+ def convert_datetime(obj):
12
+ """自定义JSON序列化处理器"""
13
+ if isinstance(obj, datetime):
14
+ return obj.isoformat()
15
+ raise TypeError(f"Type {type(obj)} not serializable")
16
+
17
+ def export_todolist_to_json():
18
+ """导出todolist表数据到JSON文件"""
19
+ try:
20
+ # 获取配置
21
+ db_config = get_mysql_config()
22
+ paths = get_paths()
23
+
24
+ # 建立数据库连接
25
+ conn = mysql.connector.connect(**db_config)
26
+ cursor = conn.cursor(dictionary=True)
27
+
28
+ # 执行查询
29
+ cursor.execute("SELECT * FROM todolist")
30
+ results = cursor.fetchall()
31
+
32
+ # 创建输出目录
33
+ output_dir = Path(paths['data_dir'])
34
+ output_dir.mkdir(parents=True, exist_ok=True)
35
+
36
+ # 保存文件(增加cls参数)
37
+ output_path = output_dir / "todolist_export.json"
38
+ with open(output_path, 'w', encoding='utf-8') as f:
39
+ json.dump(results, f,
40
+ indent=2,
41
+ ensure_ascii=False,
42
+ default=convert_datetime) # 关键修改
43
+
44
+ print(f"✅ 成功导出 {len(results)} 条记录到 {output_path}")
45
+
46
+ return str(output_path)
47
+
48
+ except mysql.connector.Error as err:
49
+ print(f"[错误] 数据库错误: {err}") # 移除了Unicode符号
50
+ except Exception as e:
51
+ print(f"[错误] 发生异常: {str(e)}")
52
+ return None
53
+ finally:
54
+ if 'conn' in locals() and conn.is_connected():
55
+ cursor.close()
56
+ conn.close()
57
+
58
+ if __name__ == "__main__":
59
+ export_todolist_to_json()
LLM/todogen_LLM/filter_message_list.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mysql.connector
2
+ from pathlib import Path
3
+ import sys
4
+ from config_loader import get_mysql_config
5
+
6
+ sys.stdout.reconfigure(encoding='utf-8')
7
+
8
+ def get_message_ids():
9
+ current_dir = Path(__file__).parent.absolute()
10
+ # ssl_ca_path = current_dir / "DigiCertGlobalRootCA.crt.pem"
11
+ # message_ids = []
12
+ config = get_mysql_config()
13
+
14
+ try:
15
+ # 建立数据库连接
16
+ cnx = mysql.connector.connect(
17
+ user=config['user'],
18
+ password=config['password'],
19
+ host=config['host'],
20
+ port=config['port'],
21
+ database=config['database'],
22
+ ssl_ca=config['ssl_ca'],
23
+ ssl_disabled=False
24
+ )
25
+
26
+ cursor = cnx.cursor()
27
+
28
+ # 执行查询
29
+ cursor.execute("SELECT message_id FROM filter_message")
30
+ results = cursor.fetchall()
31
+
32
+ # 提取为纯数字列表
33
+ message_ids = [row[0] for row in results]
34
+
35
+ cursor.close()
36
+ cnx.close()
37
+ print(f"成功获取 {len(message_ids)} 条message_id")
38
+
39
+ except mysql.connector.Error as err:
40
+ print(f"数据库错误: {err}")
41
+ except Exception as e:
42
+ print(f"发生异常: {str(e)}")
43
+
44
+ return message_ids
45
+
46
+ if __name__ == '__main__':
47
+ id_list = get_message_ids()
48
+ print("\n提取结果:")
49
+ print(id_list)
LLM/todogen_LLM/filter_useful_data_to_dict.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # filter_useful_data_to_dict.py
2
+ from database_of_messages import async_main # 确保导入的是同步函数
3
+ from typing import List, Dict
4
+ import re
5
+ import json
6
+ import datetime
7
+ from filter_message_list import get_message_ids
8
+ from pathlib import Path # 确保导入 Path
9
+ import logging
10
+ from config_loader import get_paths # 确保导入 get_paths
11
+
12
+ # --- 在顶层获取 logger 实例 ---
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # 移除所有异步装饰器和await调用
16
+ def fetch_target_messages(target_ids: List[int]) -> Dict[str, Dict]:
17
+ """核心函数1:获取指定message_id的原始数据"""
18
+ logger.info("🛜 正在获取目标消息原始数据...")
19
+
20
+ # 直接调用 async_main,不传递参数
21
+ all_data = async_main()
22
+
23
+ str_ids = {str(msg_id) for msg_id in target_ids}
24
+ filtered_data = {k: v for k, v in all_data.items() if k in str_ids}
25
+
26
+ logger.info(f"✅ 找到 {len(filtered_data)}/{len(target_ids)} 条目标消息")
27
+ return filtered_data
28
+
29
+ def format_messages(raw_data: Dict[str, Dict]) -> Dict[str, str]:
30
+ """核心函数2:格式化消息为指定字符串"""
31
+ logger.info("🔄 正在进行数据格式化...")
32
+
33
+ formatted = {}
34
+ for msg_id, details in raw_data.items():
35
+ # 修正:date字段在database_of_messages中已被转换为字符串
36
+ date = details.get("date", "null")
37
+ if isinstance(date, str): # 类型检查改为字符串
38
+ try:
39
+ # 转换为datetime对象确保格式有效
40
+ parsed_date = datetime.datetime.fromisoformat(date)
41
+ date = parsed_date.strftime("%Y-%m-%dT%H:%M:%S")
42
+ except ValueError:
43
+ date = "null"
44
+
45
+ sender = details.get("sender", "null").strip("'‘'")
46
+ content = details.get("content", "null")
47
+
48
+ # 判断sender是否为纯数字
49
+ if sender.isdigit():
50
+ formatted_str = f"开始日期为{date},{content}"
51
+ else:
52
+ # 确保外部双引号,内部单引号
53
+ formatted_str = f"开始日期为{date},内容源于'{sender}',{content}"
54
+
55
+ formatted[msg_id] = formatted_str
56
+
57
+ logger.info("🎉 格式化完成")
58
+ return formatted
59
+
60
+ def validate_format(formatted_data: Dict[str, str], target_ids: List[int]) -> bool:
61
+ for msg_id, content in formatted_data.items():
62
+ if "内容源于" in content and re.search(r"内容源于'(\d+)'", content):
63
+ raise ValueError(f"❌ 值 {msg_id} 包含数字来源标识")
64
+ return True
65
+
66
+ # --- 恢复 get_formatted_data 的标准保存逻辑 ---
67
+ # 移除 output_override_path 参数
68
+ def get_formatted_data(target_ids: List[int]) -> Dict[str, str]:
69
+ """
70
+ 供其他模块调用的同步接口 - 保存结果到配置文件指定的目录。
71
+ """
72
+
73
+ paths = get_paths()
74
+
75
+ # 替换原有路径构造逻辑
76
+ output_file_path = paths['data_dir'] / "filter_data.json"
77
+
78
+ # 确保目录存在
79
+ paths['data_dir'].mkdir(parents=True, exist_ok=True)
80
+
81
+ logger.info("开始执行 get_formatted_data...")
82
+ raw_data = fetch_target_messages(target_ids)
83
+ formatted_data = format_messages(raw_data)
84
+ try:
85
+ validate_format(formatted_data, target_ids)
86
+ logger.info("数据格式验证通过。")
87
+ except ValueError as ve:
88
+ logger.error(f"❌ 数据格式验证失败: {ve}")
89
+ logger.warning("数据格式验证失败,但仍将尝试保存当前格式化数据。")
90
+
91
+ # === 恢复:保存到配置文件指定的目录 ===
92
+ logger.info("准备将格式化数据保存到配置文件指定的目录...")
93
+ output_filename = "filter_data.json"
94
+ try:
95
+ paths_config = get_paths()
96
+ # 从配置读取 data_dir (值为 "output")
97
+ output_dir_rel = paths_config.get('data_dir', 'output') # 使用 config 值
98
+
99
+ # 获取当前脚本所在的目录
100
+ script_dir = Path(__file__).resolve().parent
101
+
102
+ # 构建正确的输出目录绝对路径 (todogen_LLM/output)
103
+ output_dir_abs = script_dir / output_dir_rel
104
+ output_file_path = output_dir_abs / output_filename
105
+
106
+ # 确保目标目录存在
107
+ output_dir_abs.mkdir(parents=True, exist_ok=True)
108
+
109
+ with open(output_file_path, 'w', encoding='utf-8') as f:
110
+ json.dump(formatted_data, f, ensure_ascii=False, indent=2)
111
+ logger.info(f"✅ 格式化数据已成功保存至: {output_file_path}")
112
+ except KeyError as e:
113
+ logger.error(f"❌ config.yaml 中缺少路径配置项 'data_dir': {e}")
114
+ except Exception as e:
115
+ logger.error(f"❌ 保存 {output_filename} 时发生错误: {e}", exc_info=True)
116
+ # ===================================================
117
+
118
+ logger.info("get_formatted_data 执行完毕。")
119
+ return formatted_data
120
+
121
+ # --- 恢复 main 函数的标准调用 ---
122
+ # 移除 output_override_path 参数
123
+ def main(target_ids: List[int]):
124
+ """主函数,用于直接运行脚本。"""
125
+ logger.info("执行 main 函数 (用于直接运行脚本)...")
126
+ # 调用 get_formatted_data (它现在总是保存到配置指定的目录)
127
+ formatted_data = get_formatted_data(target_ids)
128
+ if formatted_data:
129
+ logger.info("main 函数执行完成,格式化数据已生成并尝试保存。")
130
+ else:
131
+ logger.warning("main 函数执行完成,但 get_formatted_data 未返回有效数据或保存失败。")
132
+
133
+ def print_results(raw_data: Dict[str, Dict], formatted_data: Dict[str, str]):
134
+ print("\n=== 原始数据 ===")
135
+ print(json.dumps(raw_data, ensure_ascii=False, indent=2))
136
+ print("\n=== 格式化数据 ===")
137
+ print(json.dumps(formatted_data, ensure_ascii=False, indent=2))
138
+
139
+ if __name__ == '__main__':
140
+ # === 确保日志也初始化 ===
141
+ # 注意:这里的 setup_logging() 会配置根 logger,
142
+ # 我们在顶层获取的 logger = logging.getLogger(__name__) 会继承这个配置
143
+ try:
144
+ from logger_config import setup_logging
145
+ setup_logging()
146
+ except ImportError:
147
+ # 如果 logger_config 导入失败,进行基本配置
148
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
149
+ # logger = logging.getLogger(__name__) # 这行不再需要在这里定义
150
+ logger.warning("logger_config 未找到,使用基本日志配置。")
151
+ except Exception as log_setup_err:
152
+ # logger = logging.getLogger(__name__) # 这行不再需要在这里定义
153
+ logger.error(f"日志设置失败: {log_setup_err}", exc_info=True)
154
+ # 仍然进行基本配置作为后备
155
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
156
+ # =============================================
157
+
158
+ # 现在 logger 变量肯定存在了
159
+ logger.info(f"脚本 {__file__} 作为主程序运行...")
160
+
161
+ TARGET_IDS = get_message_ids()
162
+
163
+ main(TARGET_IDS)
LLM/todogen_LLM/jiaoben.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ import sys
4
+ from datetime import timedelta
5
+ from dateutil.parser import parse
6
+ import mysql.connector
7
+ from mysql.connector import Error
8
+ from config_loader import get_mysql_config, get_paths
9
+ # 修改导入:导入比较函数,移除旧的合并函数
10
+ from compare_data import compare_and_generate_updates
11
+ import logging # 导入 logging
12
+ from logger_config import setup_logging # 导入 setup_logging
13
+ import traceback # 导入 traceback
14
+
15
+ # === 日志初始化 ===
16
+ setup_logging() # 在所有代码执行前调用
17
+ logger = logging.getLogger(__name__) # 获取 logger 实例
18
+ # =================
19
+
20
+ sys.stdout.reconfigure(encoding='utf-8')
21
+
22
+ def get_db_connection():
23
+ """建立数据库连接"""
24
+ logger.debug("尝试建立数据库连接...")
25
+ try:
26
+ # current_dir = Path(__file__).parent.absolute()
27
+ # ssl_ca_path = current_dir / "DigiCertGlobalRootCA.crt.pem"
28
+
29
+ db_config = get_mysql_config()
30
+
31
+ connection = mysql.connector.connect(
32
+ user=db_config['user'],
33
+ password=db_config['password'],
34
+ host=db_config['host'],
35
+ port=db_config['port'],
36
+ database=db_config['database'],
37
+ ssl_ca=db_config['ssl_ca'],
38
+ ssl_disabled=False
39
+ )
40
+ logger.info("✅ 数据库连接成功")
41
+ return connection
42
+ except Error as e:
43
+ # 使用 logger.error 并包含异常信息
44
+ logger.error(f"❌ 数据库连接失败: {e}", exc_info=True)
45
+ return None
46
+
47
+ def process_end_time(item):
48
+ """处理end_time字段:如果为null则设置为date加1小时"""
49
+ # 确保 date 和 end_time 存在
50
+ date_str = item.get("date")
51
+ end_time_val = item.get("end_time")
52
+
53
+ # 检查 end_time 是否为 None 或 "null"
54
+ if end_time_val is None or end_time_val == "null":
55
+ # 检查 date 是否有效
56
+ if date_str and date_str != "null":
57
+ try:
58
+ date_obj = parse(date_str)
59
+ end_time_obj = date_obj + timedelta(hours=1)
60
+ item["end_time"] = end_time_obj.isoformat()
61
+ except (ValueError, TypeError):
62
+ # 如果 date 解析失败,将 end_time 设为 "null"
63
+ item["end_time"] = "null"
64
+ else:
65
+ # 如果 date 无效,将 end_time 设为 "null"
66
+ item["end_time"] = "null"
67
+ # 如果 end_time 已有值,则不做处理
68
+ return item
69
+
70
+
71
+ def insert_to_database(data_list):
72
+ """将处理后的数据插入到数据库"""
73
+ connection = get_db_connection()
74
+ if not connection:
75
+ # get_db_connection 内部已记录错误
76
+ return False
77
+
78
+ if not data_list:
79
+ logger.info("ℹ️ 没有数据需要插入数据库。")
80
+ return True # 没有数据也算成功
81
+
82
+ logger.info(f"准备将 {len(data_list)} 条记录插入数据库...")
83
+ try:
84
+ cursor = connection.cursor()
85
+
86
+ # 准备插入SQL - 在字段列表和 VALUES 中加入 message_id
87
+ insert_query = """
88
+ INSERT INTO todolist
89
+ (message_id, user_id, start_time, todo_content, urgency_statu, end_time, location)
90
+ VALUES (%s, %s, %s, %s, %s, %s, %s)
91
+ """
92
+
93
+ # 准备数据 - 字段映射关系:
94
+ # message_id -> message_id <--- 新增
95
+ # user_id → user_id
96
+ # date → start_time
97
+ # todo_content → todo_content
98
+ # urgency → urgency_statu
99
+ # end_time → end_time
100
+ # location → location
101
+ records_to_insert = []
102
+ skipped_count = 0
103
+ for item in data_list:
104
+ try:
105
+ # 处理日期格式,添加更健壮的错误处理
106
+ start_time = parse(item["date"]).strftime('%Y-%m-%d %H:%M:%S') if item.get("date") and item["date"] != "null" else None
107
+ end_time = parse(item["end_time"]).strftime('%Y-%m-%d %H:%M:%S') if item.get("end_time") and item["end_time"] != "null" else None
108
+
109
+ # 确保关键字段存在且不为空
110
+ # 获取 message_id,假设它应该是整数,如果不是或者为空则设为 None 或其他默认值
111
+ message_id_str = item.get("message_id", "")
112
+ message_id = None
113
+ if message_id_str:
114
+ try:
115
+ # 假设 message_id 在数据库中是数字类型
116
+ # 如果 message_id 可能是非数字,需要调整这里的转换逻辑
117
+ # 或者直接作为字符串插入(如果数据库字段允许)
118
+ message_id = int(message_id_str)
119
+ except ValueError:
120
+ # 替换 print 为 logger.warning
121
+ logger.warning(f"⚠️ 跳过记录,message_id '{message_id_str}' 不是有效的整数: {item}")
122
+ skipped_count += 1
123
+ continue
124
+
125
+ user_id = item.get("user_id")
126
+ todo_content = item.get("todo_content")
127
+ urgency = item.get("urgency", "unimportant") # 提供默认值
128
+ location = item.get("location", "") # 提供默认值
129
+
130
+ # 增加对 message_id 的检查,如果它在数据库中是必需的
131
+ if location is None or todo_content is None: # 假设 message_id 也是必需的
132
+ # 替换 print 为 logger.warning
133
+ logger.warning(f"⚠️ 跳过记录,缺少 location, end_time 或 todo_content: {item}")
134
+ skipped_count += 1
135
+ continue
136
+
137
+ # 在 record 元组中加入 message_id
138
+ record = (
139
+ message_id, # message_id <--- 新增
140
+ user_id, # user_id
141
+ start_time, # start_time (可能为 None)
142
+ todo_content, # todo_content
143
+ urgency, # urgency_statu
144
+ end_time, # end_time (可能为 None)
145
+ location # location
146
+ )
147
+
148
+ records_to_insert.append(record)
149
+
150
+ except (ValueError, TypeError, KeyError) as e:
151
+ # 替换 print 为 logger.warning
152
+ logger.warning(f"⚠️ 处理记录时出错,已跳过: {item}, 错误: {e}")
153
+ skipped_count += 1
154
+ continue
155
+
156
+
157
+ if skipped_count > 0:
158
+ # 替换 print 为 logger.info
159
+ logger.info(f"ℹ️ 在准备插入数据库时跳过了 {skipped_count} 条记录。")
160
+
161
+ if not records_to_insert:
162
+ # 替换 print 为 logger.info
163
+ logger.info("ℹ️ 没有有效记录可供插入数据库。")
164
+ return True # 没有有效数据也算操作完成
165
+
166
+ # 执行批量插入
167
+ cursor.executemany(insert_query, records_to_insert)
168
+ connection.commit()
169
+ # 替换 print 为 logger.info
170
+ logger.info(f"✅ 成功尝试插入 {len(records_to_insert)} 条记录到数据库 (受 INSERT IGNORE 影响,实际插入可能更少)")
171
+ return True
172
+
173
+ except Error as e:
174
+ # 替换 print 为 logger.error 并包含异常信息
175
+ logger.error(f"❌ 数据库插入失败: {e}", exc_info=True)
176
+ if connection.is_connected():
177
+ connection.rollback() # 如果出错则回滚
178
+ logger.warning("数据库事务已回滚")
179
+ return False
180
+ finally:
181
+ if connection and connection.is_connected():
182
+ if cursor: # 确保 cursor 存在再关闭
183
+ cursor.close()
184
+ connection.close()
185
+ # 替换 print 为 logger.info
186
+ logger.info("ℹ️ 数据库连接已关闭。")
187
+
188
+ # 重命名函数并修改逻辑
189
+ def process_and_insert_updates() -> bool:
190
+ """
191
+ 从 compare_data 获取更新数据,处理后插入数据库
192
+ :return: 处理和插入是否成功
193
+ """
194
+ logger.info("🚀 开始执行数据更新与插入流程...")
195
+ try:
196
+ # 1. 调用 compare_data 获取需要处理的数据列表
197
+ logger.info("ℹ️ 开始从 compare_data 获取待处理数据...")
198
+ data_to_process = compare_and_generate_updates()
199
+
200
+ if data_to_process is None:
201
+ logger.error("❌ 从 compare_data 获取数据失败。")
202
+ return False
203
+
204
+ if not data_to_process:
205
+ logger.info("ℹ️ compare_data 没有返回需要处理的数据。")
206
+ # 即使没有数据,也认为流程是成功的,只是没有工作可做
207
+ # 但仍尝试调用 insert_to_database 以处理空列表情况并关闭连接
208
+ insert_to_database([])
209
+ return True
210
+
211
+ logger.info(f"ℹ️ 从 compare_data 成功获取 {len(data_to_process)} 条待处理记录。")
212
+
213
+ result_list = []
214
+ for item in data_to_process:
215
+ # 2. 提取和验证字段 (compare_data 返回的结构已基本符合要求)
216
+ # 我们主要需要处理 end_time 和格式化时间
217
+ extracted = {
218
+ "message_id": item.get("message_id", ""), # 保留 message_id 以便调试或未来使用
219
+ "date": item.get("date", "null"),
220
+ "location": item.get("location", ""),
221
+ "end_time": item.get("end_time"), # 先获取原始值
222
+ "todo_content": item.get("todo_content", ""),
223
+ "user_id": item.get("user_id", ""),
224
+ "urgency": item.get("urgency", "unimportant")
225
+ }
226
+
227
+ # 3. 处理 end_time 字段
228
+ extracted = process_end_time(extracted)
229
+
230
+ # 4. 统一格式化日期字段 (插入数据库时会再次格式化,此步可选,但保持一致性)
231
+ for time_field in ["date", "end_time"]:
232
+ current_val = extracted.get(time_field)
233
+ if current_val and current_val != "null":
234
+ try:
235
+ # 尝试解析以验证格式,并转为 ISO 格式
236
+ time_obj = parse(current_val)
237
+ extracted[time_field] = time_obj.isoformat()
238
+ except (ValueError, TypeError):
239
+ # 如果解析失败,标记为 "null"
240
+ # 替换 print 为 logger.warning
241
+ logger.warning(f"⚠️ 警告:无法解析字段 '{time_field}' 的值 '{current_val}',将设为 null。记录:{item}")
242
+ extracted[time_field] = "null"
243
+
244
+
245
+ result_list.append(extracted)
246
+
247
+ # 5. 将数据插入数据库
248
+ logger.info(f"ℹ️ 准备将处理后的 {len(result_list)} 条记录插入数据库...")
249
+ if not insert_to_database(result_list):
250
+ logger.error("❌ 数据插入数据库失败。")
251
+ return False # 插入失败则整个流程失败
252
+
253
+ logger.info("✅ 数据处理和插入流程成功完成。")
254
+ return True
255
+
256
+ except Exception as e:
257
+ # 替换 print 为 logger.exception (自动包含 traceback)
258
+ logger.exception(f"❌ 处理和插入过程中发生未预期错误: {str(e)}")
259
+ # print(traceback.format_exc()) # 不再需要手动打印
260
+ return False
261
+
262
+ def compare_data() -> dict:
263
+ # ... (读取 result1.json 和 filter_data.json 的代码) ...
264
+ try:
265
+ # ... (json.load 代码) ...
266
+ logger.info("✅ 成功加载 result1.json 和 filter_data.json")
267
+ except Exception as e:
268
+ logger.error(f"❌ 加载 JSON 文件时出错: {e}", exc_info=True)
269
+ return [] # 或者 None,取决于 process_and_insert_updates 如何处理
270
+
271
+ # --- !! 关键在这里:确保 combined_data 被初始化 !! ---
272
+ combined_data = {} # <--- 这一行非常重要,它定义了变量并初始化为空字典
273
+ updates_to_insert = []
274
+ # --- !! 结束关键点 !! ---
275
+
276
+ logger.info("🔍 开始比较数据并生成更新...")
277
+
278
+ logger.info("📊 数据比较完成。")
279
+
280
+ # === 恢复标准保存 todolist_export.json 逻辑 ===
281
+ export_filename = "todolist_export.json"
282
+ logger.info(f"准备导出合并后的数据到配置文件指定的目录...")
283
+ if combined_data:
284
+ try:
285
+ paths_config = get_paths()
286
+ output_dir_rel = paths_config.get('data_dir', 'output') # 使用 config 值 "output"
287
+ script_dir = Path(__file__).resolve().parent
288
+ output_dir_abs = script_dir / output_dir_rel # 构建 todogen_LLM/output
289
+ export_file_path = output_dir_abs / export_filename
290
+
291
+ # +++ 添加诊断日志 +++
292
+ logger.critical(f"JIAOBEN SAVE EXPORT: Target directory: {output_dir_abs.resolve()}")
293
+ logger.critical(f"JIAOBEN SAVE EXPORT: Target file: {export_file_path.resolve()}")
294
+ # +++ 结束添加 +++
295
+
296
+ output_dir_abs.mkdir(parents=True, exist_ok=True)
297
+
298
+ with open(export_file_path, 'w', encoding='utf-8') as f:
299
+ json.dump(combined_data, f, ensure_ascii=False, indent=2)
300
+ logger.info(f"✅ 合并数据已成功导出至: {export_file_path}")
301
+ except KeyError as e:
302
+ logger.error(f"❌ config.yaml 中缺少路径配置项 'data_dir': {e}")
303
+ except Exception as e:
304
+ logger.error(f"❌ 保存 {export_filename} 时发生错误: {e}", exc_info=True)
305
+ else:
306
+ logger.warning(f"没有合并数据可以导出到 {export_filename}。")
307
+ # === 结束恢复 ===
308
+
309
+ logger.info(f"compare_data 函数准备返回 {len(updates_to_insert)} 条待插入记录。")
310
+ return updates_to_insert
311
+
312
+ if __name__ == "__main__":
313
+ logger.info("🚀 脚本入口:开始执行数据更新与插入流程...")
314
+ # 执行处理和插入流程
315
+ if process_and_insert_updates():
316
+ logger.info("🎉 流程执行完毕。")
317
+ else:
318
+ logger.error("🔥 处理流程失败,请检查日志中的错误信息。")
LLM/todogen_LLM/logger_config.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # logger_config.py
2
+ import logging
3
+ from config_loader import get_paths
4
+ import os
5
+ import datetime
6
+ from logging.handlers import RotatingFileHandler # Use RotatingFileHandler for potential future size limits
7
+
8
+ LOG_FOLDER = "todogen_LLM/todogen_logging"
9
+ LOG_LEVEL = logging.INFO # Default level, can be changed (e.g., logging.DEBUG)
10
+
11
+ def setup_logging():
12
+ """Configures the logging system."""
13
+ try:
14
+ #
15
+ paths = get_paths()
16
+ log_dir = paths['logging_dir']
17
+
18
+ # 自动创建日志目录
19
+ log_dir.mkdir(parents=True, exist_ok=True)
20
+
21
+ # 修改日志路径生成逻辑
22
+ today_str = datetime.date.today().strftime('%Y-%m-%d')
23
+ log_filename = log_dir / f"{today_str}.log"
24
+
25
+ # --- Create Formatter ---
26
+ # Example format: 2023-10-27 15:30:00,123 - INFO - module_name - Log message
27
+ log_format = logging.Formatter(
28
+ '%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s',
29
+ datefmt='%Y-%m-%d %H:%M:%S'
30
+ )
31
+
32
+ # --- Get Root Logger ---
33
+ # Configure the root logger - other modules will inherit this
34
+ logger = logging.getLogger()
35
+ logger.setLevel(LOG_LEVEL)
36
+
37
+ # --- Avoid Adding Handlers Multiple Times ---
38
+ # Check if handlers already exist to prevent duplication if setup_logging is called more than once
39
+ if not logger.handlers:
40
+ # --- File Handler (Appends to daily log file) ---
41
+ file_handler = logging.FileHandler(log_filename, mode='a', encoding='utf-8')
42
+ file_handler.setLevel(LOG_LEVEL)
43
+ file_handler.setFormatter(log_format)
44
+ logger.addHandler(file_handler)
45
+
46
+ # --- Console Handler (Optional - for seeing logs in the terminal) ---
47
+ console_handler = logging.StreamHandler()
48
+ console_handler.setLevel(LOG_LEVEL) # Or set a different level for console, e.g., WARNING
49
+ console_handler.setFormatter(log_format)
50
+ logger.addHandler(console_handler)
51
+
52
+ logger.info("Logging setup complete. Logging to %s", log_filename)
53
+ else:
54
+ # This might happen if setup is called again, which shouldn't normally occur
55
+ # but this prevents duplicate handlers just in case.
56
+ pass
57
+ # logger.debug("Logging already configured.")
58
+
59
+
60
+ except Exception as e:
61
+ # Fallback basic logging if setup fails
62
+ logging.basicConfig(level=logging.ERROR)
63
+ logging.error("Failed to configure logging: %s", e, exc_info=True)
64
+ # Re-raise the exception if needed, or handle appropriately
65
+ # raise
66
+
67
+ # Optional: Call setup immediately when this module is imported?
68
+ # Or better, call it explicitly from the main entry point script.
69
+ # setup_logging() # Avoid calling here, call from main script instead.