Upload benchmark_results.json with huggingface_hub
Browse files- benchmark_results.json +25 -25
benchmark_results.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"timestamp": "2025-09-25T12:
|
| 4 |
"model": "Minibase-DeId-Small",
|
| 5 |
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
|
| 6 |
"sample_size": 100
|
| 7 |
},
|
| 8 |
"metrics": {
|
| 9 |
"pii_detection_rate": 1.0,
|
| 10 |
-
"completeness_score": 0.
|
| 11 |
-
"semantic_preservation": 0.
|
| 12 |
-
"average_latency_ms":
|
| 13 |
"successful_requests": 100,
|
| 14 |
"total_requests": 100
|
| 15 |
},
|
|
@@ -22,8 +22,8 @@
|
|
| 22 |
"metrics": {
|
| 23 |
"pii_detection": 1.0,
|
| 24 |
"completeness": true,
|
| 25 |
-
"semantic_preservation": 0.
|
| 26 |
-
"latency_ms":
|
| 27 |
}
|
| 28 |
},
|
| 29 |
{
|
|
@@ -33,19 +33,19 @@
|
|
| 33 |
"metrics": {
|
| 34 |
"pii_detection": 1.0,
|
| 35 |
"completeness": true,
|
| 36 |
-
"semantic_preservation": 0.
|
| 37 |
-
"latency_ms":
|
| 38 |
}
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
|
| 42 |
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
|
| 43 |
-
"predicted": "Employee ID: EMP-[
|
| 44 |
"metrics": {
|
| 45 |
"pii_detection": 1.0,
|
| 46 |
"completeness": false,
|
| 47 |
-
"semantic_preservation": 0.
|
| 48 |
-
"latency_ms":
|
| 49 |
}
|
| 50 |
},
|
| 51 |
{
|
|
@@ -55,8 +55,8 @@
|
|
| 55 |
"metrics": {
|
| 56 |
"pii_detection": 1.0,
|
| 57 |
"completeness": true,
|
| 58 |
-
"semantic_preservation": 0.
|
| 59 |
-
"latency_ms":
|
| 60 |
}
|
| 61 |
},
|
| 62 |
{
|
|
@@ -66,8 +66,8 @@
|
|
| 66 |
"metrics": {
|
| 67 |
"pii_detection": 1.0,
|
| 68 |
"completeness": true,
|
| 69 |
-
"semantic_preservation": 0.
|
| 70 |
-
"latency_ms":
|
| 71 |
}
|
| 72 |
},
|
| 73 |
{
|
|
@@ -77,8 +77,8 @@
|
|
| 77 |
"metrics": {
|
| 78 |
"pii_detection": 1.0,
|
| 79 |
"completeness": true,
|
| 80 |
-
"semantic_preservation": 0.
|
| 81 |
-
"latency_ms":
|
| 82 |
}
|
| 83 |
},
|
| 84 |
{
|
|
@@ -88,8 +88,8 @@
|
|
| 88 |
"metrics": {
|
| 89 |
"pii_detection": 1.0,
|
| 90 |
"completeness": true,
|
| 91 |
-
"semantic_preservation": 0.
|
| 92 |
-
"latency_ms":
|
| 93 |
}
|
| 94 |
},
|
| 95 |
{
|
|
@@ -99,8 +99,8 @@
|
|
| 99 |
"metrics": {
|
| 100 |
"pii_detection": 1.0,
|
| 101 |
"completeness": true,
|
| 102 |
-
"semantic_preservation": 0.
|
| 103 |
-
"latency_ms":
|
| 104 |
}
|
| 105 |
},
|
| 106 |
{
|
|
@@ -110,8 +110,8 @@
|
|
| 110 |
"metrics": {
|
| 111 |
"pii_detection": 1.0,
|
| 112 |
"completeness": false,
|
| 113 |
-
"semantic_preservation": 0.
|
| 114 |
-
"latency_ms":
|
| 115 |
}
|
| 116 |
},
|
| 117 |
{
|
|
@@ -121,8 +121,8 @@
|
|
| 121 |
"metrics": {
|
| 122 |
"pii_detection": 1.0,
|
| 123 |
"completeness": true,
|
| 124 |
-
"semantic_preservation": 0.
|
| 125 |
-
"latency_ms":
|
| 126 |
}
|
| 127 |
}
|
| 128 |
]
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"timestamp": "2025-09-25T12:48:06.242738",
|
| 4 |
"model": "Minibase-DeId-Small",
|
| 5 |
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
|
| 6 |
"sample_size": 100
|
| 7 |
},
|
| 8 |
"metrics": {
|
| 9 |
"pii_detection_rate": 1.0,
|
| 10 |
+
"completeness_score": 0.65,
|
| 11 |
+
"semantic_preservation": 0.8110479139122438,
|
| 12 |
+
"average_latency_ms": 477.0322895050049,
|
| 13 |
"successful_requests": 100,
|
| 14 |
"total_requests": 100
|
| 15 |
},
|
|
|
|
| 22 |
"metrics": {
|
| 23 |
"pii_detection": 1.0,
|
| 24 |
"completeness": true,
|
| 25 |
+
"semantic_preservation": 0.7382222222222222,
|
| 26 |
+
"latency_ms": 499.6819496154785
|
| 27 |
}
|
| 28 |
},
|
| 29 |
{
|
|
|
|
| 33 |
"metrics": {
|
| 34 |
"pii_detection": 1.0,
|
| 35 |
"completeness": true,
|
| 36 |
+
"semantic_preservation": 0.7531262939958592,
|
| 37 |
+
"latency_ms": 442.65270233154297
|
| 38 |
}
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
|
| 42 |
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
|
| 43 |
+
"predicted": "Employee ID: EMP-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
|
| 44 |
"metrics": {
|
| 45 |
"pii_detection": 1.0,
|
| 46 |
"completeness": false,
|
| 47 |
+
"semantic_preservation": 0.8557142857142858,
|
| 48 |
+
"latency_ms": 359.38310623168945
|
| 49 |
}
|
| 50 |
},
|
| 51 |
{
|
|
|
|
| 55 |
"metrics": {
|
| 56 |
"pii_detection": 1.0,
|
| 57 |
"completeness": true,
|
| 58 |
+
"semantic_preservation": 0.8737037037037036,
|
| 59 |
+
"latency_ms": 530.4110050201416
|
| 60 |
}
|
| 61 |
},
|
| 62 |
{
|
|
|
|
| 66 |
"metrics": {
|
| 67 |
"pii_detection": 1.0,
|
| 68 |
"completeness": true,
|
| 69 |
+
"semantic_preservation": 0.9,
|
| 70 |
+
"latency_ms": 294.41308975219727
|
| 71 |
}
|
| 72 |
},
|
| 73 |
{
|
|
|
|
| 77 |
"metrics": {
|
| 78 |
"pii_detection": 1.0,
|
| 79 |
"completeness": true,
|
| 80 |
+
"semantic_preservation": 0.8818181818181818,
|
| 81 |
+
"latency_ms": 301.6502857208252
|
| 82 |
}
|
| 83 |
},
|
| 84 |
{
|
|
|
|
| 88 |
"metrics": {
|
| 89 |
"pii_detection": 1.0,
|
| 90 |
"completeness": true,
|
| 91 |
+
"semantic_preservation": 0.9846153846153847,
|
| 92 |
+
"latency_ms": 269.90580558776855
|
| 93 |
}
|
| 94 |
},
|
| 95 |
{
|
|
|
|
| 99 |
"metrics": {
|
| 100 |
"pii_detection": 1.0,
|
| 101 |
"completeness": true,
|
| 102 |
+
"semantic_preservation": 0.9916666666666667,
|
| 103 |
+
"latency_ms": 399.30129051208496
|
| 104 |
}
|
| 105 |
},
|
| 106 |
{
|
|
|
|
| 110 |
"metrics": {
|
| 111 |
"pii_detection": 1.0,
|
| 112 |
"completeness": false,
|
| 113 |
+
"semantic_preservation": 0.8596491228070176,
|
| 114 |
+
"latency_ms": 292.7100658416748
|
| 115 |
}
|
| 116 |
},
|
| 117 |
{
|
|
|
|
| 121 |
"metrics": {
|
| 122 |
"pii_detection": 1.0,
|
| 123 |
"completeness": true,
|
| 124 |
+
"semantic_preservation": 0.8733333333333333,
|
| 125 |
+
"latency_ms": 410.9461307525635
|
| 126 |
}
|
| 127 |
}
|
| 128 |
]
|