Upload benchmark_results.json with huggingface_hub
Browse files- benchmark_results.json +176 -0
benchmark_results.json
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"timestamp": "2025-09-25T12:35:05.897062",
|
4 |
+
"model": "Minibase-DeId-Small",
|
5 |
+
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
|
6 |
+
"sample_size": 100
|
7 |
+
},
|
8 |
+
"metrics": {
|
9 |
+
"pii_detection_rate": 0.20322907647907631,
|
10 |
+
"completeness_score": 0.64,
|
11 |
+
"semantic_preservation": 0.10867183143653728,
|
12 |
+
"average_latency_ms": 492.3843741416931,
|
13 |
+
"successful_requests": 100,
|
14 |
+
"total_requests": 100
|
15 |
+
},
|
16 |
+
"domain_performance": {
|
17 |
+
"medical": {
|
18 |
+
"sample_count": 33,
|
19 |
+
"pii_detection_rate": 0.21426713017622112,
|
20 |
+
"completeness_score": 0.6060606060606061,
|
21 |
+
"semantic_preservation": 0.10982099451350788
|
22 |
+
},
|
23 |
+
"legal": {
|
24 |
+
"sample_count": 6,
|
25 |
+
"pii_detection_rate": 0.11342592592592593,
|
26 |
+
"completeness_score": 0.5,
|
27 |
+
"semantic_preservation": 0.05610021786492375
|
28 |
+
},
|
29 |
+
"hr": {
|
30 |
+
"sample_count": 11,
|
31 |
+
"pii_detection_rate": 0.20202020202020202,
|
32 |
+
"completeness_score": 0.2727272727272727,
|
33 |
+
"semantic_preservation": 0.10847864256955164
|
34 |
+
},
|
35 |
+
"general": {
|
36 |
+
"sample_count": 40,
|
37 |
+
"pii_detection_rate": 0.21849476911976912,
|
38 |
+
"completeness_score": 0.75,
|
39 |
+
"semantic_preservation": 0.11955831545905074
|
40 |
+
},
|
41 |
+
"research": {
|
42 |
+
"sample_count": 4,
|
43 |
+
"pii_detection_rate": 0.19166666666666668,
|
44 |
+
"completeness_score": 0.5,
|
45 |
+
"semantic_preservation": 0.10833333333333334
|
46 |
+
},
|
47 |
+
"customer_service": {
|
48 |
+
"sample_count": 6,
|
49 |
+
"pii_detection_rate": 0.14047619047619048,
|
50 |
+
"completeness_score": 1.0,
|
51 |
+
"semantic_preservation": 0.08292633292633293
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"examples": [
|
55 |
+
{
|
56 |
+
"input": "Patient Sarah Johnson, DOB 05/12/1980, visited Dr. Lee at St. Jude Hospital on 2023-10-26. Her contact is (555) 123-4567. She resides at 123 Maple Street, Anytown, CA 90210.",
|
57 |
+
"expected": "Patient [NAME_1], DOB [DOB_1], visited [NAME_2] at [HOSPITAL_1] on [DATE_1]. Her contact is [PHONE_1]. She resides at [ADDRESS_1].",
|
58 |
+
"predicted": "Patient [FIRSTNAME_1] [MIDDLENAME_1], DOB [DOB_1], visited Dr. [LASTNAME_1] at [CITY_1] Hospital on [DATE_1]. Her contact is [PHONENUMBER_1]. She resides at [BUILDINGNUMBER_1] [STREET_1], [CITY_2], [STATE_1], [STATE_2].",
|
59 |
+
"domain": "medical",
|
60 |
+
"metrics": {
|
61 |
+
"pii_detection": 0.2857142857142857,
|
62 |
+
"completeness": true,
|
63 |
+
"semantic_preservation": 0.14285714285714285,
|
64 |
+
"latency_ms": 465.7011032104492
|
65 |
+
}
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"input": "Deponent Mr. Robert Davis, CEO of GlobalCorp Inc., stated under oath on December 1, 2022, that his attorney, Ms. Emily White from Legal Eagles LLP, advised him. Case number: LD-2022-007.",
|
69 |
+
"expected": "Deponent [NAME_1], CEO of [ORGANIZATION_1], stated under oath on [DATE_1], that his attorney, [NAME_2] from [ORGANIZATION_2], advised him. Case number: [CASE_ID_1].",
|
70 |
+
"predicted": "Deponent [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], CEO of [COMPANYNAME_1], stated under oath on [DATE_1], that his attorney, [PREFIX_2] [MIDDLENAME_1] [LASTNAME_2], advised him. Case number: LD-2022-007.",
|
71 |
+
"domain": "legal",
|
72 |
+
"metrics": {
|
73 |
+
"pii_detection": 0.16666666666666666,
|
74 |
+
"completeness": true,
|
75 |
+
"semantic_preservation": 0.1111111111111111,
|
76 |
+
"latency_ms": 379.5027732849121
|
77 |
+
}
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
|
81 |
+
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
|
82 |
+
"predicted": "Employee ID: EMP-[BUILDINGNUMBER_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
|
83 |
+
"domain": "hr",
|
84 |
+
"metrics": {
|
85 |
+
"pii_detection": 0.16666666666666666,
|
86 |
+
"completeness": false,
|
87 |
+
"semantic_preservation": 0.09090909090909091,
|
88 |
+
"latency_ms": 333.10723304748535
|
89 |
+
}
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"input": "Sra. Elena Rodriguez llam\u00f3 preocupada por su hijo, Miguel Rodriguez, de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es +34 912 345 678. Viven en Calle Mayor 10, Madrid, Espa\u00f1a.",
|
93 |
+
"expected": "Sra. [NAME_1] llam\u00f3 preocupada por su hijo, [NAME_2], de [AGE_1] a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONE_1]. Viven en [ADDRESS_1].",
|
94 |
+
"predicted": "Sra. [FIRSTNAME_1] [MIDDLENAME_1] [LASTNAME_1] llam\u00f3 preocupada por su hijo, [FIRSTNAME_2] [LASTNAME_2], de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONENUMBER_1]. Viven en Calle [STREET_1] [BUILDINGNUMBER_1], [STATE_1], [STATE_2].",
|
95 |
+
"domain": "general",
|
96 |
+
"metrics": {
|
97 |
+
"pii_detection": 0.0,
|
98 |
+
"completeness": true,
|
99 |
+
"semantic_preservation": 0.0,
|
100 |
+
"latency_ms": 507.0638656616211
|
101 |
+
}
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"input": "Claim filed by Mr. David Chen, Policy #INS-98765, on 15/03/2023, regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
|
105 |
+
"expected": "Claim filed by [NAME_1], Policy #[POLICY_NUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is [OCCUPATION_1] at [ORGANIZATION_1].",
|
106 |
+
"predicted": "Claim filed by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], Policy #INS-[BUILDINGNUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
|
107 |
+
"domain": "general",
|
108 |
+
"metrics": {
|
109 |
+
"pii_detection": 0.2,
|
110 |
+
"completeness": true,
|
111 |
+
"semantic_preservation": 0.1111111111111111,
|
112 |
+
"latency_ms": 322.0179080963135
|
113 |
+
}
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"input": "Received feedback from Ms. Olivia Brown, born on 1995-11-20. She visited on January 10, 2024, and mentioned a previous appointment on 2023-12-05. Her email is [email protected].",
|
117 |
+
"expected": "Received feedback from [NAME_1], born on [DOB_1]. She visited on [DATE_1], and mentioned a previous appointment on [DATE_2]. Her email is [EMAIL_1].",
|
118 |
+
"predicted": "Received feedback from [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], born on [DATE_1]. She visited on [DATE_2], and mentioned a previous appointment on [DATE_3]. Her email is [EMAIL_1].",
|
119 |
+
"domain": "general",
|
120 |
+
"metrics": {
|
121 |
+
"pii_detection": 0.6,
|
122 |
+
"completeness": true,
|
123 |
+
"semantic_preservation": 0.2857142857142857,
|
124 |
+
"latency_ms": 331.7408561706543
|
125 |
+
}
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"input": "Participant ID: RP-0042. Name: Dr. Anya Sharma. Ethnicity: South Asian. Occupation: Physician. Consent signed on 2023-09-01.",
|
129 |
+
"expected": "Participant ID: [PARTICIPANT_ID_1]. Name: [NAME_1]. Ethnicity: [ETHNICITY_1]. Occupation: [OCCUPATION_1]. Consent signed on [DATE_1].",
|
130 |
+
"predicted": "Participant ID: RP-[BUILDINGNUMBER_1]. Name: Dr. [MIDDLENAME_1]. Ethnicity: [EYECOLOR_1]. Occupation: Physician. Consent signed on [DATE_1].",
|
131 |
+
"domain": "research",
|
132 |
+
"metrics": {
|
133 |
+
"pii_detection": 0.2,
|
134 |
+
"completeness": true,
|
135 |
+
"semantic_preservation": 0.125,
|
136 |
+
"latency_ms": 280.93981742858887
|
137 |
+
}
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"input": "Customer reported an issue with Order #CUST-ORD-5678. Name: Michael Green. Payment method: Visa. Last 4 digits of card: 1234. Contacted via phone (020 7946 0123) on 2024-01-15.",
|
141 |
+
"expected": "Customer reported an issue with Order #[ORDER_ID_1]. Name: [NAME_1]. Payment method: [PAYMENT_METHOD_1]. Last 4 digits of card: [CARD_LAST_4_1]. Contacted via phone [PHONE_1] on [DATE_1].",
|
142 |
+
"predicted": "Customer reported an issue with Order #CUST-ORD-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] [LASTNAME_1]. Payment method: Visa. Last 4 digits of card: [CREDITCARDCVV_2]. Contacted via phone ([PHONENUMBER_1]) on [DATE_1].",
|
143 |
+
"domain": "customer_service",
|
144 |
+
"metrics": {
|
145 |
+
"pii_detection": 0.2,
|
146 |
+
"completeness": true,
|
147 |
+
"semantic_preservation": 0.1111111111111111,
|
148 |
+
"latency_ms": 405.1778316497803
|
149 |
+
}
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"input": "This work was conducted by Dr. Jun Li and Professor Maria Gomez, affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
|
153 |
+
"expected": "This work was conducted by [NAME_1] and [NAME_2], affiliated with [ORGANIZATION_1]. Their paper is titled 'AI in Healthcare Deidentification'.",
|
154 |
+
"predicted": "This work was conducted by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1] and Professor [FIRSTNAME_2] [LASTNAME_2], affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
|
155 |
+
"domain": "general",
|
156 |
+
"metrics": {
|
157 |
+
"pii_detection": 0.0,
|
158 |
+
"completeness": false,
|
159 |
+
"semantic_preservation": 0.0,
|
160 |
+
"latency_ms": 409.94930267333984
|
161 |
+
}
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"input": "Hello, my name is Alex Kim. I live at Apt 3B, 456 Oak Avenue, Metropolis, NY 10001. My birthday is April 22, 1990. I'm interested in your services.",
|
165 |
+
"expected": "Hello, my name is [NAME_1]. I live at [ADDRESS_1]. My birthday is [DOB_1]. I'm interested in your services.",
|
166 |
+
"predicted": "Hello, my name is [FIRSTNAME_1] [MIDDLENAME_1]. I live at [SECONDARYADDRESS_1], [BUILDINGNUMBER_1] [STREET_1], [CITY_1], [STATE_1] [ZIPCODE_1]. My birthday is [DATE_1]. I'm interested in your services.",
|
167 |
+
"domain": "customer_service",
|
168 |
+
"metrics": {
|
169 |
+
"pii_detection": 0.0,
|
170 |
+
"completeness": true,
|
171 |
+
"semantic_preservation": 0.0,
|
172 |
+
"latency_ms": 415.47536849975586
|
173 |
+
}
|
174 |
+
}
|
175 |
+
]
|
176 |
+
}
|