Minibase commited on
Commit
fb21b1a
·
verified ·
1 Parent(s): 2374a43

Upload benchmark_results.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. benchmark_results.json +176 -0
benchmark_results.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "timestamp": "2025-09-25T12:35:05.897062",
4
+ "model": "Minibase-DeId-Small",
5
+ "dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
6
+ "sample_size": 100
7
+ },
8
+ "metrics": {
9
+ "pii_detection_rate": 0.20322907647907631,
10
+ "completeness_score": 0.64,
11
+ "semantic_preservation": 0.10867183143653728,
12
+ "average_latency_ms": 492.3843741416931,
13
+ "successful_requests": 100,
14
+ "total_requests": 100
15
+ },
16
+ "domain_performance": {
17
+ "medical": {
18
+ "sample_count": 33,
19
+ "pii_detection_rate": 0.21426713017622112,
20
+ "completeness_score": 0.6060606060606061,
21
+ "semantic_preservation": 0.10982099451350788
22
+ },
23
+ "legal": {
24
+ "sample_count": 6,
25
+ "pii_detection_rate": 0.11342592592592593,
26
+ "completeness_score": 0.5,
27
+ "semantic_preservation": 0.05610021786492375
28
+ },
29
+ "hr": {
30
+ "sample_count": 11,
31
+ "pii_detection_rate": 0.20202020202020202,
32
+ "completeness_score": 0.2727272727272727,
33
+ "semantic_preservation": 0.10847864256955164
34
+ },
35
+ "general": {
36
+ "sample_count": 40,
37
+ "pii_detection_rate": 0.21849476911976912,
38
+ "completeness_score": 0.75,
39
+ "semantic_preservation": 0.11955831545905074
40
+ },
41
+ "research": {
42
+ "sample_count": 4,
43
+ "pii_detection_rate": 0.19166666666666668,
44
+ "completeness_score": 0.5,
45
+ "semantic_preservation": 0.10833333333333334
46
+ },
47
+ "customer_service": {
48
+ "sample_count": 6,
49
+ "pii_detection_rate": 0.14047619047619048,
50
+ "completeness_score": 1.0,
51
+ "semantic_preservation": 0.08292633292633293
52
+ }
53
+ },
54
+ "examples": [
55
+ {
56
+ "input": "Patient Sarah Johnson, DOB 05/12/1980, visited Dr. Lee at St. Jude Hospital on 2023-10-26. Her contact is (555) 123-4567. She resides at 123 Maple Street, Anytown, CA 90210.",
57
+ "expected": "Patient [NAME_1], DOB [DOB_1], visited [NAME_2] at [HOSPITAL_1] on [DATE_1]. Her contact is [PHONE_1]. She resides at [ADDRESS_1].",
58
+ "predicted": "Patient [FIRSTNAME_1] [MIDDLENAME_1], DOB [DOB_1], visited Dr. [LASTNAME_1] at [CITY_1] Hospital on [DATE_1]. Her contact is [PHONENUMBER_1]. She resides at [BUILDINGNUMBER_1] [STREET_1], [CITY_2], [STATE_1], [STATE_2].",
59
+ "domain": "medical",
60
+ "metrics": {
61
+ "pii_detection": 0.2857142857142857,
62
+ "completeness": true,
63
+ "semantic_preservation": 0.14285714285714285,
64
+ "latency_ms": 465.7011032104492
65
+ }
66
+ },
67
+ {
68
+ "input": "Deponent Mr. Robert Davis, CEO of GlobalCorp Inc., stated under oath on December 1, 2022, that his attorney, Ms. Emily White from Legal Eagles LLP, advised him. Case number: LD-2022-007.",
69
+ "expected": "Deponent [NAME_1], CEO of [ORGANIZATION_1], stated under oath on [DATE_1], that his attorney, [NAME_2] from [ORGANIZATION_2], advised him. Case number: [CASE_ID_1].",
70
+ "predicted": "Deponent [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], CEO of [COMPANYNAME_1], stated under oath on [DATE_1], that his attorney, [PREFIX_2] [MIDDLENAME_1] [LASTNAME_2], advised him. Case number: LD-2022-007.",
71
+ "domain": "legal",
72
+ "metrics": {
73
+ "pii_detection": 0.16666666666666666,
74
+ "completeness": true,
75
+ "semantic_preservation": 0.1111111111111111,
76
+ "latency_ms": 379.5027732849121
77
+ }
78
+ },
79
+ {
80
+ "input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
81
+ "expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
82
+ "predicted": "Employee ID: EMP-[BUILDINGNUMBER_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
83
+ "domain": "hr",
84
+ "metrics": {
85
+ "pii_detection": 0.16666666666666666,
86
+ "completeness": false,
87
+ "semantic_preservation": 0.09090909090909091,
88
+ "latency_ms": 333.10723304748535
89
+ }
90
+ },
91
+ {
92
+ "input": "Sra. Elena Rodriguez llam\u00f3 preocupada por su hijo, Miguel Rodriguez, de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es +34 912 345 678. Viven en Calle Mayor 10, Madrid, Espa\u00f1a.",
93
+ "expected": "Sra. [NAME_1] llam\u00f3 preocupada por su hijo, [NAME_2], de [AGE_1] a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONE_1]. Viven en [ADDRESS_1].",
94
+ "predicted": "Sra. [FIRSTNAME_1] [MIDDLENAME_1] [LASTNAME_1] llam\u00f3 preocupada por su hijo, [FIRSTNAME_2] [LASTNAME_2], de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONENUMBER_1]. Viven en Calle [STREET_1] [BUILDINGNUMBER_1], [STATE_1], [STATE_2].",
95
+ "domain": "general",
96
+ "metrics": {
97
+ "pii_detection": 0.0,
98
+ "completeness": true,
99
+ "semantic_preservation": 0.0,
100
+ "latency_ms": 507.0638656616211
101
+ }
102
+ },
103
+ {
104
+ "input": "Claim filed by Mr. David Chen, Policy #INS-98765, on 15/03/2023, regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
105
+ "expected": "Claim filed by [NAME_1], Policy #[POLICY_NUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is [OCCUPATION_1] at [ORGANIZATION_1].",
106
+ "predicted": "Claim filed by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], Policy #INS-[BUILDINGNUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
107
+ "domain": "general",
108
+ "metrics": {
109
+ "pii_detection": 0.2,
110
+ "completeness": true,
111
+ "semantic_preservation": 0.1111111111111111,
112
+ "latency_ms": 322.0179080963135
113
+ }
114
+ },
115
+ {
116
+ "input": "Received feedback from Ms. Olivia Brown, born on 1995-11-20. She visited on January 10, 2024, and mentioned a previous appointment on 2023-12-05. Her email is [email protected].",
117
+ "expected": "Received feedback from [NAME_1], born on [DOB_1]. She visited on [DATE_1], and mentioned a previous appointment on [DATE_2]. Her email is [EMAIL_1].",
118
+ "predicted": "Received feedback from [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], born on [DATE_1]. She visited on [DATE_2], and mentioned a previous appointment on [DATE_3]. Her email is [EMAIL_1].",
119
+ "domain": "general",
120
+ "metrics": {
121
+ "pii_detection": 0.6,
122
+ "completeness": true,
123
+ "semantic_preservation": 0.2857142857142857,
124
+ "latency_ms": 331.7408561706543
125
+ }
126
+ },
127
+ {
128
+ "input": "Participant ID: RP-0042. Name: Dr. Anya Sharma. Ethnicity: South Asian. Occupation: Physician. Consent signed on 2023-09-01.",
129
+ "expected": "Participant ID: [PARTICIPANT_ID_1]. Name: [NAME_1]. Ethnicity: [ETHNICITY_1]. Occupation: [OCCUPATION_1]. Consent signed on [DATE_1].",
130
+ "predicted": "Participant ID: RP-[BUILDINGNUMBER_1]. Name: Dr. [MIDDLENAME_1]. Ethnicity: [EYECOLOR_1]. Occupation: Physician. Consent signed on [DATE_1].",
131
+ "domain": "research",
132
+ "metrics": {
133
+ "pii_detection": 0.2,
134
+ "completeness": true,
135
+ "semantic_preservation": 0.125,
136
+ "latency_ms": 280.93981742858887
137
+ }
138
+ },
139
+ {
140
+ "input": "Customer reported an issue with Order #CUST-ORD-5678. Name: Michael Green. Payment method: Visa. Last 4 digits of card: 1234. Contacted via phone (020 7946 0123) on 2024-01-15.",
141
+ "expected": "Customer reported an issue with Order #[ORDER_ID_1]. Name: [NAME_1]. Payment method: [PAYMENT_METHOD_1]. Last 4 digits of card: [CARD_LAST_4_1]. Contacted via phone [PHONE_1] on [DATE_1].",
142
+ "predicted": "Customer reported an issue with Order #CUST-ORD-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] [LASTNAME_1]. Payment method: Visa. Last 4 digits of card: [CREDITCARDCVV_2]. Contacted via phone ([PHONENUMBER_1]) on [DATE_1].",
143
+ "domain": "customer_service",
144
+ "metrics": {
145
+ "pii_detection": 0.2,
146
+ "completeness": true,
147
+ "semantic_preservation": 0.1111111111111111,
148
+ "latency_ms": 405.1778316497803
149
+ }
150
+ },
151
+ {
152
+ "input": "This work was conducted by Dr. Jun Li and Professor Maria Gomez, affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
153
+ "expected": "This work was conducted by [NAME_1] and [NAME_2], affiliated with [ORGANIZATION_1]. Their paper is titled 'AI in Healthcare Deidentification'.",
154
+ "predicted": "This work was conducted by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1] and Professor [FIRSTNAME_2] [LASTNAME_2], affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
155
+ "domain": "general",
156
+ "metrics": {
157
+ "pii_detection": 0.0,
158
+ "completeness": false,
159
+ "semantic_preservation": 0.0,
160
+ "latency_ms": 409.94930267333984
161
+ }
162
+ },
163
+ {
164
+ "input": "Hello, my name is Alex Kim. I live at Apt 3B, 456 Oak Avenue, Metropolis, NY 10001. My birthday is April 22, 1990. I'm interested in your services.",
165
+ "expected": "Hello, my name is [NAME_1]. I live at [ADDRESS_1]. My birthday is [DOB_1]. I'm interested in your services.",
166
+ "predicted": "Hello, my name is [FIRSTNAME_1] [MIDDLENAME_1]. I live at [SECONDARYADDRESS_1], [BUILDINGNUMBER_1] [STREET_1], [CITY_1], [STATE_1] [ZIPCODE_1]. My birthday is [DATE_1]. I'm interested in your services.",
167
+ "domain": "customer_service",
168
+ "metrics": {
169
+ "pii_detection": 0.0,
170
+ "completeness": true,
171
+ "semantic_preservation": 0.0,
172
+ "latency_ms": 415.47536849975586
173
+ }
174
+ }
175
+ ]
176
+ }