Upload benchmark_results.json with huggingface_hub
Browse files- benchmark_results.json +25 -25
benchmark_results.json
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"timestamp": "2025-09-25T12:
|
4 |
"model": "Minibase-DeId-Small",
|
5 |
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
|
6 |
"sample_size": 100
|
7 |
},
|
8 |
"metrics": {
|
9 |
"pii_detection_rate": 1.0,
|
10 |
-
"completeness_score": 0.
|
11 |
-
"semantic_preservation": 0.
|
12 |
-
"average_latency_ms":
|
13 |
"successful_requests": 100,
|
14 |
"total_requests": 100
|
15 |
},
|
@@ -22,8 +22,8 @@
|
|
22 |
"metrics": {
|
23 |
"pii_detection": 1.0,
|
24 |
"completeness": true,
|
25 |
-
"semantic_preservation": 0.
|
26 |
-
"latency_ms":
|
27 |
}
|
28 |
},
|
29 |
{
|
@@ -33,19 +33,19 @@
|
|
33 |
"metrics": {
|
34 |
"pii_detection": 1.0,
|
35 |
"completeness": true,
|
36 |
-
"semantic_preservation": 0.
|
37 |
-
"latency_ms":
|
38 |
}
|
39 |
},
|
40 |
{
|
41 |
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
|
42 |
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
|
43 |
-
"predicted": "Employee ID: EMP-[
|
44 |
"metrics": {
|
45 |
"pii_detection": 1.0,
|
46 |
"completeness": false,
|
47 |
-
"semantic_preservation": 0.
|
48 |
-
"latency_ms":
|
49 |
}
|
50 |
},
|
51 |
{
|
@@ -55,8 +55,8 @@
|
|
55 |
"metrics": {
|
56 |
"pii_detection": 1.0,
|
57 |
"completeness": true,
|
58 |
-
"semantic_preservation": 0.
|
59 |
-
"latency_ms":
|
60 |
}
|
61 |
},
|
62 |
{
|
@@ -66,8 +66,8 @@
|
|
66 |
"metrics": {
|
67 |
"pii_detection": 1.0,
|
68 |
"completeness": true,
|
69 |
-
"semantic_preservation": 0.
|
70 |
-
"latency_ms":
|
71 |
}
|
72 |
},
|
73 |
{
|
@@ -77,8 +77,8 @@
|
|
77 |
"metrics": {
|
78 |
"pii_detection": 1.0,
|
79 |
"completeness": true,
|
80 |
-
"semantic_preservation": 0.
|
81 |
-
"latency_ms":
|
82 |
}
|
83 |
},
|
84 |
{
|
@@ -88,8 +88,8 @@
|
|
88 |
"metrics": {
|
89 |
"pii_detection": 1.0,
|
90 |
"completeness": true,
|
91 |
-
"semantic_preservation": 0.
|
92 |
-
"latency_ms":
|
93 |
}
|
94 |
},
|
95 |
{
|
@@ -99,8 +99,8 @@
|
|
99 |
"metrics": {
|
100 |
"pii_detection": 1.0,
|
101 |
"completeness": true,
|
102 |
-
"semantic_preservation": 0.
|
103 |
-
"latency_ms":
|
104 |
}
|
105 |
},
|
106 |
{
|
@@ -110,8 +110,8 @@
|
|
110 |
"metrics": {
|
111 |
"pii_detection": 1.0,
|
112 |
"completeness": false,
|
113 |
-
"semantic_preservation": 0.
|
114 |
-
"latency_ms":
|
115 |
}
|
116 |
},
|
117 |
{
|
@@ -121,8 +121,8 @@
|
|
121 |
"metrics": {
|
122 |
"pii_detection": 1.0,
|
123 |
"completeness": true,
|
124 |
-
"semantic_preservation": 0.
|
125 |
-
"latency_ms":
|
126 |
}
|
127 |
}
|
128 |
]
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"timestamp": "2025-09-25T12:48:06.242738",
|
4 |
"model": "Minibase-DeId-Small",
|
5 |
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
|
6 |
"sample_size": 100
|
7 |
},
|
8 |
"metrics": {
|
9 |
"pii_detection_rate": 1.0,
|
10 |
+
"completeness_score": 0.65,
|
11 |
+
"semantic_preservation": 0.8110479139122438,
|
12 |
+
"average_latency_ms": 477.0322895050049,
|
13 |
"successful_requests": 100,
|
14 |
"total_requests": 100
|
15 |
},
|
|
|
22 |
"metrics": {
|
23 |
"pii_detection": 1.0,
|
24 |
"completeness": true,
|
25 |
+
"semantic_preservation": 0.7382222222222222,
|
26 |
+
"latency_ms": 499.6819496154785
|
27 |
}
|
28 |
},
|
29 |
{
|
|
|
33 |
"metrics": {
|
34 |
"pii_detection": 1.0,
|
35 |
"completeness": true,
|
36 |
+
"semantic_preservation": 0.7531262939958592,
|
37 |
+
"latency_ms": 442.65270233154297
|
38 |
}
|
39 |
},
|
40 |
{
|
41 |
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
|
42 |
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
|
43 |
+
"predicted": "Employee ID: EMP-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
|
44 |
"metrics": {
|
45 |
"pii_detection": 1.0,
|
46 |
"completeness": false,
|
47 |
+
"semantic_preservation": 0.8557142857142858,
|
48 |
+
"latency_ms": 359.38310623168945
|
49 |
}
|
50 |
},
|
51 |
{
|
|
|
55 |
"metrics": {
|
56 |
"pii_detection": 1.0,
|
57 |
"completeness": true,
|
58 |
+
"semantic_preservation": 0.8737037037037036,
|
59 |
+
"latency_ms": 530.4110050201416
|
60 |
}
|
61 |
},
|
62 |
{
|
|
|
66 |
"metrics": {
|
67 |
"pii_detection": 1.0,
|
68 |
"completeness": true,
|
69 |
+
"semantic_preservation": 0.9,
|
70 |
+
"latency_ms": 294.41308975219727
|
71 |
}
|
72 |
},
|
73 |
{
|
|
|
77 |
"metrics": {
|
78 |
"pii_detection": 1.0,
|
79 |
"completeness": true,
|
80 |
+
"semantic_preservation": 0.8818181818181818,
|
81 |
+
"latency_ms": 301.6502857208252
|
82 |
}
|
83 |
},
|
84 |
{
|
|
|
88 |
"metrics": {
|
89 |
"pii_detection": 1.0,
|
90 |
"completeness": true,
|
91 |
+
"semantic_preservation": 0.9846153846153847,
|
92 |
+
"latency_ms": 269.90580558776855
|
93 |
}
|
94 |
},
|
95 |
{
|
|
|
99 |
"metrics": {
|
100 |
"pii_detection": 1.0,
|
101 |
"completeness": true,
|
102 |
+
"semantic_preservation": 0.9916666666666667,
|
103 |
+
"latency_ms": 399.30129051208496
|
104 |
}
|
105 |
},
|
106 |
{
|
|
|
110 |
"metrics": {
|
111 |
"pii_detection": 1.0,
|
112 |
"completeness": false,
|
113 |
+
"semantic_preservation": 0.8596491228070176,
|
114 |
+
"latency_ms": 292.7100658416748
|
115 |
}
|
116 |
},
|
117 |
{
|
|
|
121 |
"metrics": {
|
122 |
"pii_detection": 1.0,
|
123 |
"completeness": true,
|
124 |
+
"semantic_preservation": 0.8733333333333333,
|
125 |
+
"latency_ms": 410.9461307525635
|
126 |
}
|
127 |
}
|
128 |
]
|