|
{ |
|
"metadata": { |
|
"timestamp": "2025-09-25T12:48:06.242738", |
|
"model": "Minibase-DeId-Small", |
|
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl", |
|
"sample_size": 100 |
|
}, |
|
"metrics": { |
|
"pii_detection_rate": 1.0, |
|
"completeness_score": 0.65, |
|
"semantic_preservation": 0.8110479139122438, |
|
"average_latency_ms": 477.0322895050049, |
|
"successful_requests": 100, |
|
"total_requests": 100 |
|
}, |
|
"domain_performance": {}, |
|
"examples": [ |
|
{ |
|
"input": "Patient Sarah Johnson, DOB 05/12/1980, visited Dr. Lee at St. Jude Hospital on 2023-10-26. Her contact is (555) 123-4567. She resides at 123 Maple Street, Anytown, CA 90210.", |
|
"expected": "Patient [NAME_1], DOB [DOB_1], visited [NAME_2] at [HOSPITAL_1] on [DATE_1]. Her contact is [PHONE_1]. She resides at [ADDRESS_1].", |
|
"predicted": "Patient [FIRSTNAME_1] [MIDDLENAME_1], DOB [DOB_1], visited Dr. [LASTNAME_1] at [CITY_1] Hospital on [DATE_1]. Her contact is [PHONENUMBER_1]. She resides at [BUILDINGNUMBER_1] [STREET_1], [CITY_2], [STATE_1], [STATE_2].", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.7382222222222222, |
|
"latency_ms": 499.6819496154785 |
|
} |
|
}, |
|
{ |
|
"input": "Deponent Mr. Robert Davis, CEO of GlobalCorp Inc., stated under oath on December 1, 2022, that his attorney, Ms. Emily White from Legal Eagles LLP, advised him. Case number: LD-2022-007.", |
|
"expected": "Deponent [NAME_1], CEO of [ORGANIZATION_1], stated under oath on [DATE_1], that his attorney, [NAME_2] from [ORGANIZATION_2], advised him. Case number: [CASE_ID_1].", |
|
"predicted": "Deponent [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], CEO of [COMPANYNAME_1], stated under oath on [DATE_1], that his attorney, [PREFIX_2] [MIDDLENAME_1] [LASTNAME_2], advised him. Case number: [CURRENCYCODE_1]-[BUILDINGNUMBER_1].", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.7531262939958592, |
|
"latency_ms": 442.65270233154297 |
|
} |
|
}, |
|
{ |
|
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.", |
|
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].", |
|
"predicted": "Employee ID: EMP-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": false, |
|
"semantic_preservation": 0.8557142857142858, |
|
"latency_ms": 359.38310623168945 |
|
} |
|
}, |
|
{ |
|
"input": "Sra. Elena Rodriguez llam\u00f3 preocupada por su hijo, Miguel Rodriguez, de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es +34 912 345 678. Viven en Calle Mayor 10, Madrid, Espa\u00f1a.", |
|
"expected": "Sra. [NAME_1] llam\u00f3 preocupada por su hijo, [NAME_2], de [AGE_1] a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONE_1]. Viven en [ADDRESS_1].", |
|
"predicted": "Sra. [FIRSTNAME_1] [MIDDLENAME_1] [LASTNAME_1] llam\u00f3 preocupada por su hijo, [FIRSTNAME_2] [LASTNAME_2], de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONENUMBER_1]. Viven en Calle [STREET_1] [BUILDINGNUMBER_1], [STATE_1], [STATE_2].", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.8737037037037036, |
|
"latency_ms": 530.4110050201416 |
|
} |
|
}, |
|
{ |
|
"input": "Claim filed by Mr. David Chen, Policy #INS-98765, on 15/03/2023, regarding a fractured tibia. His occupation is software engineer at TechSolutions.", |
|
"expected": "Claim filed by [NAME_1], Policy #[POLICY_NUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is [OCCUPATION_1] at [ORGANIZATION_1].", |
|
"predicted": "Claim filed by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], Policy #INS-[BUILDINGNUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is software engineer at TechSolutions.", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.9, |
|
"latency_ms": 294.41308975219727 |
|
} |
|
}, |
|
{ |
|
"input": "Received feedback from Ms. Olivia Brown, born on 1995-11-20. She visited on January 10, 2024, and mentioned a previous appointment on 2023-12-05. Her email is [email protected].", |
|
"expected": "Received feedback from [NAME_1], born on [DOB_1]. She visited on [DATE_1], and mentioned a previous appointment on [DATE_2]. Her email is [EMAIL_1].", |
|
"predicted": "Received feedback from [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], born on [DATE_1]. She visited on [DATE_2], and mentioned a previous appointment on [DATE_3]. Her email is [EMAIL_1].", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.8818181818181818, |
|
"latency_ms": 301.6502857208252 |
|
} |
|
}, |
|
{ |
|
"input": "Participant ID: RP-0042. Name: Dr. Anya Sharma. Ethnicity: South Asian. Occupation: Physician. Consent signed on 2023-09-01.", |
|
"expected": "Participant ID: [PARTICIPANT_ID_1]. Name: [NAME_1]. Ethnicity: [ETHNICITY_1]. Occupation: [OCCUPATION_1]. Consent signed on [DATE_1].", |
|
"predicted": "Participant ID: RP-[BUILDINGNUMBER_1]. Name: Dr. [MIDDLENAME_1]. Ethnicity: [EYECOLOR_1]. Occupation: Physician. Consent signed on [DATE_1].", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.9846153846153847, |
|
"latency_ms": 269.90580558776855 |
|
} |
|
}, |
|
{ |
|
"input": "Customer reported an issue with Order #CUST-ORD-5678. Name: Michael Green. Payment method: Visa. Last 4 digits of card: 1234. Contacted via phone (020 7946 0123) on 2024-01-15.", |
|
"expected": "Customer reported an issue with Order #[ORDER_ID_1]. Name: [NAME_1]. Payment method: [PAYMENT_METHOD_1]. Last 4 digits of card: [CARD_LAST_4_1]. Contacted via phone [PHONE_1] on [DATE_1].", |
|
"predicted": "Customer reported an issue with Order #CUST-ORD-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] [LASTNAME_1]. Payment method: Visa. Last 4 digits of card: [CREDITCARDCVV_2]. Contacted via phone ([PHONENUMBER_1]) on [DATE_1].", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.9916666666666667, |
|
"latency_ms": 399.30129051208496 |
|
} |
|
}, |
|
{ |
|
"input": "This work was conducted by Dr. Jun Li and Professor Maria Gomez, affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.", |
|
"expected": "This work was conducted by [NAME_1] and [NAME_2], affiliated with [ORGANIZATION_1]. Their paper is titled 'AI in Healthcare Deidentification'.", |
|
"predicted": "This work was conducted by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1] and Professor [FIRSTNAME_2] [LASTNAME_2], affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": false, |
|
"semantic_preservation": 0.8596491228070176, |
|
"latency_ms": 292.7100658416748 |
|
} |
|
}, |
|
{ |
|
"input": "Hello, my name is Alex Kim. I live at Apt 3B, 456 Oak Avenue, Metropolis, NY 10001. My birthday is April 22, 1990. I'm interested in your services.", |
|
"expected": "Hello, my name is [NAME_1]. I live at [ADDRESS_1]. My birthday is [DOB_1]. I'm interested in your services.", |
|
"predicted": "Hello, my name is [FIRSTNAME_1] [MIDDLENAME_1]. I live at [SECONDARYADDRESS_1], [BUILDINGNUMBER_1] [STREET_1], [CITY_1], [STATE_1] [ZIPCODE_1]. My birthday is [DOB_1]. I'm interested in your services.", |
|
"metrics": { |
|
"pii_detection": 1.0, |
|
"completeness": true, |
|
"semantic_preservation": 0.8733333333333333, |
|
"latency_ms": 410.9461307525635 |
|
} |
|
} |
|
] |
|
} |