Minibase commited on
Commit
4924d32
·
verified ·
1 Parent(s): 40919d8

Upload benchmark_results.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. benchmark_results.json +25 -25
benchmark_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "metadata": {
3
- "timestamp": "2025-09-25T12:38:54.363196",
4
  "model": "Minibase-DeId-Small",
5
  "dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
6
  "sample_size": 100
7
  },
8
  "metrics": {
9
  "pii_detection_rate": 1.0,
10
- "completeness_score": 0.67,
11
- "semantic_preservation": 0.10869745772803042,
12
- "average_latency_ms": 483.6828875541687,
13
  "successful_requests": 100,
14
  "total_requests": 100
15
  },
@@ -22,8 +22,8 @@
22
  "metrics": {
23
  "pii_detection": 1.0,
24
  "completeness": true,
25
- "semantic_preservation": 0.14285714285714285,
26
- "latency_ms": 465.2400016784668
27
  }
28
  },
29
  {
@@ -33,19 +33,19 @@
33
  "metrics": {
34
  "pii_detection": 1.0,
35
  "completeness": true,
36
- "semantic_preservation": 0.09090909090909091,
37
- "latency_ms": 414.6158695220947
38
  }
39
  },
40
  {
41
  "input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
42
  "expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
43
- "predicted": "Employee ID: EMP-[BUILDINGNUMBER_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
44
  "metrics": {
45
  "pii_detection": 1.0,
46
  "completeness": false,
47
- "semantic_preservation": 0.09090909090909091,
48
- "latency_ms": 343.43409538269043
49
  }
50
  },
51
  {
@@ -55,8 +55,8 @@
55
  "metrics": {
56
  "pii_detection": 1.0,
57
  "completeness": true,
58
- "semantic_preservation": 0.0,
59
- "latency_ms": 543.3461666107178
60
  }
61
  },
62
  {
@@ -66,8 +66,8 @@
66
  "metrics": {
67
  "pii_detection": 1.0,
68
  "completeness": true,
69
- "semantic_preservation": 0.1111111111111111,
70
- "latency_ms": 286.21578216552734
71
  }
72
  },
73
  {
@@ -77,8 +77,8 @@
77
  "metrics": {
78
  "pii_detection": 1.0,
79
  "completeness": true,
80
- "semantic_preservation": 0.2857142857142857,
81
- "latency_ms": 293.6849594116211
82
  }
83
  },
84
  {
@@ -88,8 +88,8 @@
88
  "metrics": {
89
  "pii_detection": 1.0,
90
  "completeness": true,
91
- "semantic_preservation": 0.125,
92
- "latency_ms": 264.0669345855713
93
  }
94
  },
95
  {
@@ -99,8 +99,8 @@
99
  "metrics": {
100
  "pii_detection": 1.0,
101
  "completeness": true,
102
- "semantic_preservation": 0.1111111111111111,
103
- "latency_ms": 389.96291160583496
104
  }
105
  },
106
  {
@@ -110,8 +110,8 @@
110
  "metrics": {
111
  "pii_detection": 1.0,
112
  "completeness": false,
113
- "semantic_preservation": 0.0,
114
- "latency_ms": 286.5257263183594
115
  }
116
  },
117
  {
@@ -121,8 +121,8 @@
121
  "metrics": {
122
  "pii_detection": 1.0,
123
  "completeness": true,
124
- "semantic_preservation": 0.09090909090909091,
125
- "latency_ms": 388.6828422546387
126
  }
127
  }
128
  ]
 
1
  {
2
  "metadata": {
3
+ "timestamp": "2025-09-25T12:48:06.242738",
4
  "model": "Minibase-DeId-Small",
5
  "dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
6
  "sample_size": 100
7
  },
8
  "metrics": {
9
  "pii_detection_rate": 1.0,
10
+ "completeness_score": 0.65,
11
+ "semantic_preservation": 0.8110479139122438,
12
+ "average_latency_ms": 477.0322895050049,
13
  "successful_requests": 100,
14
  "total_requests": 100
15
  },
 
22
  "metrics": {
23
  "pii_detection": 1.0,
24
  "completeness": true,
25
+ "semantic_preservation": 0.7382222222222222,
26
+ "latency_ms": 499.6819496154785
27
  }
28
  },
29
  {
 
33
  "metrics": {
34
  "pii_detection": 1.0,
35
  "completeness": true,
36
+ "semantic_preservation": 0.7531262939958592,
37
+ "latency_ms": 442.65270233154297
38
  }
39
  },
40
  {
41
  "input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: [email protected]. Marital Status: Married. Nationality: Canadian.",
42
  "expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
43
+ "predicted": "Employee ID: EMP-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
44
  "metrics": {
45
  "pii_detection": 1.0,
46
  "completeness": false,
47
+ "semantic_preservation": 0.8557142857142858,
48
+ "latency_ms": 359.38310623168945
49
  }
50
  },
51
  {
 
55
  "metrics": {
56
  "pii_detection": 1.0,
57
  "completeness": true,
58
+ "semantic_preservation": 0.8737037037037036,
59
+ "latency_ms": 530.4110050201416
60
  }
61
  },
62
  {
 
66
  "metrics": {
67
  "pii_detection": 1.0,
68
  "completeness": true,
69
+ "semantic_preservation": 0.9,
70
+ "latency_ms": 294.41308975219727
71
  }
72
  },
73
  {
 
77
  "metrics": {
78
  "pii_detection": 1.0,
79
  "completeness": true,
80
+ "semantic_preservation": 0.8818181818181818,
81
+ "latency_ms": 301.6502857208252
82
  }
83
  },
84
  {
 
88
  "metrics": {
89
  "pii_detection": 1.0,
90
  "completeness": true,
91
+ "semantic_preservation": 0.9846153846153847,
92
+ "latency_ms": 269.90580558776855
93
  }
94
  },
95
  {
 
99
  "metrics": {
100
  "pii_detection": 1.0,
101
  "completeness": true,
102
+ "semantic_preservation": 0.9916666666666667,
103
+ "latency_ms": 399.30129051208496
104
  }
105
  },
106
  {
 
110
  "metrics": {
111
  "pii_detection": 1.0,
112
  "completeness": false,
113
+ "semantic_preservation": 0.8596491228070176,
114
+ "latency_ms": 292.7100658416748
115
  }
116
  },
117
  {
 
121
  "metrics": {
122
  "pii_detection": 1.0,
123
  "completeness": true,
124
+ "semantic_preservation": 0.8733333333333333,
125
+ "latency_ms": 410.9461307525635
126
  }
127
  }
128
  ]