gokulsrinivasagan commited on
Commit
a1c60a3
·
verified ·
1 Parent(s): 6175228

End of training

Browse files
README.md CHANGED
@@ -1,14 +1,32 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: gokulsrinivasagan/bert_tiny_lda_100_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  - f1
9
  model-index:
10
  - name: bert_tiny_lda_100_v1_qqp
11
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,12 +34,12 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # bert_tiny_lda_100_v1_qqp
18
 
19
- This model is a fine-tuned version of [gokulsrinivasagan/bert_tiny_lda_100_v1](https://huggingface.co/gokulsrinivasagan/bert_tiny_lda_100_v1) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4678
22
- - Accuracy: 0.8567
23
- - F1: 0.8093
24
- - Combined Score: 0.8330
25
 
26
  ## Model description
27
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: gokulsrinivasagan/bert_tiny_lda_100_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - accuracy
12
  - f1
13
  model-index:
14
  - name: bert_tiny_lda_100_v1_qqp
15
+ results:
16
+ - task:
17
+ name: Text Classification
18
+ type: text-classification
19
+ dataset:
20
+ name: GLUE QQP
21
+ type: glue
22
+ args: qqp
23
+ metrics:
24
+ - name: Accuracy
25
+ type: accuracy
26
+ value: 0.8543408360128617
27
+ - name: F1
28
+ type: f1
29
+ value: 0.8063020096700984
30
  ---
31
 
32
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  # bert_tiny_lda_100_v1_qqp
36
 
37
+ This model is a fine-tuned version of [gokulsrinivasagan/bert_tiny_lda_100_v1](https://huggingface.co/gokulsrinivasagan/bert_tiny_lda_100_v1) on the GLUE QQP dataset.
38
  It achieves the following results on the evaluation set:
39
+ - Loss: 0.3551
40
+ - Accuracy: 0.8543
41
+ - F1: 0.8063
42
+ - Combined Score: 0.8303
43
 
44
  ## Model description
45
 
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.6318327974276527,
4
- "eval_combined_score": 0.3159163987138264,
5
- "eval_f1": 0.0,
6
- "eval_loss": 0.6569345593452454,
7
- "eval_runtime": 12.0041,
8
  "eval_samples": 40430,
9
- "eval_samples_per_second": 3368.005,
10
- "eval_steps_per_second": 13.162,
11
- "total_flos": 7.633075201391002e+16,
12
- "train_loss": 0.6589447268286167,
13
- "train_runtime": 1607.151,
14
  "train_samples": 363846,
15
- "train_samples_per_second": 11319.596,
16
- "train_steps_per_second": 44.24
17
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "eval_accuracy": 0.8543408360128617,
4
+ "eval_combined_score": 0.83032142284148,
5
+ "eval_f1": 0.8063020096700984,
6
+ "eval_loss": 0.3550606966018677,
7
+ "eval_runtime": 12.3435,
8
  "eval_samples": 40430,
9
+ "eval_samples_per_second": 3275.409,
10
+ "eval_steps_per_second": 12.8,
11
+ "total_flos": 1.0495478401912627e+17,
12
+ "train_loss": 0.2538350579257268,
13
+ "train_runtime": 2291.1306,
14
  "train_samples": 363846,
15
+ "train_samples_per_second": 7940.316,
16
+ "train_steps_per_second": 31.033
17
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.6318327974276527,
4
- "eval_combined_score": 0.3159163987138264,
5
- "eval_f1": 0.0,
6
- "eval_loss": 0.6569345593452454,
7
- "eval_runtime": 12.0041,
8
  "eval_samples": 40430,
9
- "eval_samples_per_second": 3368.005,
10
- "eval_steps_per_second": 13.162
11
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "eval_accuracy": 0.8543408360128617,
4
+ "eval_combined_score": 0.83032142284148,
5
+ "eval_f1": 0.8063020096700984,
6
+ "eval_loss": 0.3550606966018677,
7
+ "eval_runtime": 12.3435,
8
  "eval_samples": 40430,
9
+ "eval_samples_per_second": 3275.409,
10
+ "eval_steps_per_second": 12.8
11
  }
logs/events.out.tfevents.1733326990.ki-g0008.1208741.25 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b3d1378e9b7b74ea526dee11a4747441dbb7c6c6c633439dc7c495222de65e3
3
+ size 515
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 7.633075201391002e+16,
4
- "train_loss": 0.6589447268286167,
5
- "train_runtime": 1607.151,
6
  "train_samples": 363846,
7
- "train_samples_per_second": 11319.596,
8
- "train_steps_per_second": 44.24
9
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "total_flos": 1.0495478401912627e+17,
4
+ "train_loss": 0.2538350579257268,
5
+ "train_runtime": 2291.1306,
6
  "train_samples": 363846,
7
+ "train_samples_per_second": 7940.316,
8
+ "train_steps_per_second": 31.033
9
  }
trainer_state.json CHANGED
@@ -1,165 +1,219 @@
1
  {
2
- "best_metric": 0.6569345593452454,
3
- "best_model_checkpoint": "bert_tiny_lda_100_v1_qqp/checkpoint-4266",
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 11376,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.38018599152565,
14
- "learning_rate": 0.00098,
15
- "loss": 0.6613,
16
  "step": 1422
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.6318327974276527,
21
- "eval_combined_score": 0.3159163987138264,
22
- "eval_f1": 0.0,
23
- "eval_loss": 0.6579914689064026,
24
- "eval_runtime": 11.9864,
25
- "eval_samples_per_second": 3372.978,
26
- "eval_steps_per_second": 13.182,
27
  "step": 1422
28
  },
29
  {
30
  "epoch": 2.0,
31
- "grad_norm": 0.12200487405061722,
32
- "learning_rate": 0.00096,
33
- "loss": 0.6591,
34
  "step": 2844
35
  },
36
  {
37
  "epoch": 2.0,
38
- "eval_accuracy": 0.6318327974276527,
39
- "eval_combined_score": 0.3159163987138264,
40
- "eval_f1": 0.0,
41
- "eval_loss": 0.6583530902862549,
42
- "eval_runtime": 11.9444,
43
- "eval_samples_per_second": 3384.853,
44
- "eval_steps_per_second": 13.228,
45
  "step": 2844
46
  },
47
  {
48
  "epoch": 3.0,
49
- "grad_norm": 0.05550260841846466,
50
- "learning_rate": 0.00094,
51
- "loss": 0.6587,
52
  "step": 4266
53
  },
54
  {
55
  "epoch": 3.0,
56
- "eval_accuracy": 0.6318327974276527,
57
- "eval_combined_score": 0.3159163987138264,
58
- "eval_f1": 0.0,
59
- "eval_loss": 0.6569345593452454,
60
- "eval_runtime": 11.982,
61
- "eval_samples_per_second": 3374.235,
62
- "eval_steps_per_second": 13.186,
63
  "step": 4266
64
  },
65
  {
66
  "epoch": 4.0,
67
- "grad_norm": 0.08356369286775589,
68
- "learning_rate": 0.00092,
69
- "loss": 0.6585,
70
  "step": 5688
71
  },
72
  {
73
  "epoch": 4.0,
74
- "eval_accuracy": 0.6318327974276527,
75
- "eval_combined_score": 0.3159163987138264,
76
- "eval_f1": 0.0,
77
- "eval_loss": 0.6573521494865417,
78
- "eval_runtime": 11.9967,
79
- "eval_samples_per_second": 3370.092,
80
- "eval_steps_per_second": 13.17,
81
  "step": 5688
82
  },
83
  {
84
  "epoch": 5.0,
85
- "grad_norm": 0.12464858591556549,
86
- "learning_rate": 0.0009000000000000001,
87
- "loss": 0.6585,
88
  "step": 7110
89
  },
90
  {
91
  "epoch": 5.0,
92
- "eval_accuracy": 0.6318327974276527,
93
- "eval_combined_score": 0.3159163987138264,
94
- "eval_f1": 0.0,
95
- "eval_loss": 0.6573521494865417,
96
- "eval_runtime": 11.9603,
97
- "eval_samples_per_second": 3380.359,
98
- "eval_steps_per_second": 13.21,
99
  "step": 7110
100
  },
101
  {
102
  "epoch": 6.0,
103
- "grad_norm": 0.03694356605410576,
104
- "learning_rate": 0.00088,
105
- "loss": 0.6585,
106
  "step": 8532
107
  },
108
  {
109
  "epoch": 6.0,
110
- "eval_accuracy": 0.6318327974276527,
111
- "eval_combined_score": 0.3159163987138264,
112
- "eval_f1": 0.0,
113
- "eval_loss": 0.6579644680023193,
114
- "eval_runtime": 11.9528,
115
- "eval_samples_per_second": 3382.463,
116
- "eval_steps_per_second": 13.219,
117
  "step": 8532
118
  },
119
  {
120
  "epoch": 7.0,
121
- "grad_norm": 0.050063714385032654,
122
- "learning_rate": 0.00086,
123
- "loss": 0.6585,
124
  "step": 9954
125
  },
126
  {
127
  "epoch": 7.0,
128
- "eval_accuracy": 0.6318327974276527,
129
- "eval_combined_score": 0.3159163987138264,
130
- "eval_f1": 0.0,
131
- "eval_loss": 0.6573427319526672,
132
- "eval_runtime": 11.9023,
133
- "eval_samples_per_second": 3396.831,
134
- "eval_steps_per_second": 13.275,
135
  "step": 9954
136
  },
137
  {
138
  "epoch": 8.0,
139
- "grad_norm": 0.11186650395393372,
140
- "learning_rate": 0.00084,
141
- "loss": 0.6585,
142
  "step": 11376
143
  },
144
  {
145
  "epoch": 8.0,
146
- "eval_accuracy": 0.6318327974276527,
147
- "eval_combined_score": 0.3159163987138264,
148
- "eval_f1": 0.0,
149
- "eval_loss": 0.6573427319526672,
150
- "eval_runtime": 11.9741,
151
- "eval_samples_per_second": 3376.452,
152
- "eval_steps_per_second": 13.195,
153
  "step": 11376
154
  },
155
  {
156
- "epoch": 8.0,
157
- "step": 11376,
158
- "total_flos": 7.633075201391002e+16,
159
- "train_loss": 0.6589447268286167,
160
- "train_runtime": 1607.151,
161
- "train_samples_per_second": 11319.596,
162
- "train_steps_per_second": 44.24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
  ],
165
  "logging_steps": 1,
@@ -188,7 +242,7 @@
188
  "attributes": {}
189
  }
190
  },
191
- "total_flos": 7.633075201391002e+16,
192
  "train_batch_size": 256,
193
  "trial_name": null,
194
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3550606966018677,
3
+ "best_model_checkpoint": "bert_tiny_lda_100_v1_qqp/checkpoint-8532",
4
+ "epoch": 11.0,
5
  "eval_steps": 500,
6
+ "global_step": 15642,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 2.963968276977539,
14
+ "learning_rate": 4.9e-05,
15
+ "loss": 0.4874,
16
  "step": 1422
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.797996537224833,
21
+ "eval_combined_score": 0.7552586086455046,
22
+ "eval_f1": 0.7125206800661763,
23
+ "eval_loss": 0.4273848235607147,
24
+ "eval_runtime": 12.4751,
25
+ "eval_samples_per_second": 3240.852,
26
+ "eval_steps_per_second": 12.665,
27
  "step": 1422
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 1.7753087282180786,
32
+ "learning_rate": 4.8e-05,
33
+ "loss": 0.388,
34
  "step": 2844
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_accuracy": 0.822384368043532,
39
+ "eval_combined_score": 0.7974788031824944,
40
+ "eval_f1": 0.7725732383214569,
41
+ "eval_loss": 0.37860846519470215,
42
+ "eval_runtime": 12.4162,
43
+ "eval_samples_per_second": 3256.242,
44
+ "eval_steps_per_second": 12.725,
45
  "step": 2844
46
  },
47
  {
48
  "epoch": 3.0,
49
+ "grad_norm": 2.818178176879883,
50
+ "learning_rate": 4.7e-05,
51
+ "loss": 0.3354,
52
  "step": 4266
53
  },
54
  {
55
  "epoch": 3.0,
56
+ "eval_accuracy": 0.8372248330447687,
57
+ "eval_combined_score": 0.8135618147281756,
58
+ "eval_f1": 0.7898987964115826,
59
+ "eval_loss": 0.3613271117210388,
60
+ "eval_runtime": 12.3656,
61
+ "eval_samples_per_second": 3269.55,
62
+ "eval_steps_per_second": 12.777,
63
  "step": 4266
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "grad_norm": 3.026447296142578,
68
+ "learning_rate": 4.600000000000001e-05,
69
+ "loss": 0.2928,
70
  "step": 5688
71
  },
72
  {
73
  "epoch": 4.0,
74
+ "eval_accuracy": 0.8447440019787287,
75
+ "eval_combined_score": 0.8138597443078306,
76
+ "eval_f1": 0.7829754866369325,
77
+ "eval_loss": 0.3564006984233856,
78
+ "eval_runtime": 12.2406,
79
+ "eval_samples_per_second": 3302.952,
80
+ "eval_steps_per_second": 12.908,
81
  "step": 5688
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "grad_norm": 2.733372688293457,
86
+ "learning_rate": 4.5e-05,
87
+ "loss": 0.2583,
88
  "step": 7110
89
  },
90
  {
91
  "epoch": 5.0,
92
+ "eval_accuracy": 0.8509275290625773,
93
+ "eval_combined_score": 0.8253042271895796,
94
+ "eval_f1": 0.7996809253165819,
95
+ "eval_loss": 0.3613673448562622,
96
+ "eval_runtime": 12.2989,
97
+ "eval_samples_per_second": 3287.292,
98
+ "eval_steps_per_second": 12.847,
99
  "step": 7110
100
  },
101
  {
102
  "epoch": 6.0,
103
+ "grad_norm": 2.535404682159424,
104
+ "learning_rate": 4.4000000000000006e-05,
105
+ "loss": 0.2277,
106
  "step": 8532
107
  },
108
  {
109
  "epoch": 6.0,
110
+ "eval_accuracy": 0.8543408360128617,
111
+ "eval_combined_score": 0.83032142284148,
112
+ "eval_f1": 0.8063020096700984,
113
+ "eval_loss": 0.3550606966018677,
114
+ "eval_runtime": 12.5469,
115
+ "eval_samples_per_second": 3222.316,
116
+ "eval_steps_per_second": 12.593,
117
  "step": 8532
118
  },
119
  {
120
  "epoch": 7.0,
121
+ "grad_norm": 2.6766912937164307,
122
+ "learning_rate": 4.3e-05,
123
+ "loss": 0.2014,
124
  "step": 9954
125
  },
126
  {
127
  "epoch": 7.0,
128
+ "eval_accuracy": 0.8551570615879297,
129
+ "eval_combined_score": 0.8322039379600886,
130
+ "eval_f1": 0.8092508143322475,
131
+ "eval_loss": 0.3854043185710907,
132
+ "eval_runtime": 12.1595,
133
+ "eval_samples_per_second": 3324.98,
134
+ "eval_steps_per_second": 12.994,
135
  "step": 9954
136
  },
137
  {
138
  "epoch": 8.0,
139
+ "grad_norm": 2.472510576248169,
140
+ "learning_rate": 4.2e-05,
141
+ "loss": 0.1784,
142
  "step": 11376
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "eval_accuracy": 0.8544892406628741,
147
+ "eval_combined_score": 0.8304560974134679,
148
+ "eval_f1": 0.8064229541640617,
149
+ "eval_loss": 0.3979368805885315,
150
+ "eval_runtime": 12.3624,
151
+ "eval_samples_per_second": 3270.399,
152
+ "eval_steps_per_second": 12.781,
153
  "step": 11376
154
  },
155
  {
156
+ "epoch": 9.0,
157
+ "grad_norm": 4.070095062255859,
158
+ "learning_rate": 4.1e-05,
159
+ "loss": 0.1578,
160
+ "step": 12798
161
+ },
162
+ {
163
+ "epoch": 9.0,
164
+ "eval_accuracy": 0.8558496166213208,
165
+ "eval_combined_score": 0.8330247887705053,
166
+ "eval_f1": 0.8101999609196899,
167
+ "eval_loss": 0.4261317253112793,
168
+ "eval_runtime": 12.3748,
169
+ "eval_samples_per_second": 3267.134,
170
+ "eval_steps_per_second": 12.768,
171
+ "step": 12798
172
+ },
173
+ {
174
+ "epoch": 10.0,
175
+ "grad_norm": 2.72301983833313,
176
+ "learning_rate": 4e-05,
177
+ "loss": 0.1403,
178
+ "step": 14220
179
+ },
180
+ {
181
+ "epoch": 10.0,
182
+ "eval_accuracy": 0.8587929755132327,
183
+ "eval_combined_score": 0.8347857688543592,
184
+ "eval_f1": 0.8107785621954857,
185
+ "eval_loss": 0.4443197548389435,
186
+ "eval_runtime": 12.4932,
187
+ "eval_samples_per_second": 3236.159,
188
+ "eval_steps_per_second": 12.647,
189
+ "step": 14220
190
+ },
191
+ {
192
+ "epoch": 11.0,
193
+ "grad_norm": 4.5569281578063965,
194
+ "learning_rate": 3.9000000000000006e-05,
195
+ "loss": 0.1246,
196
+ "step": 15642
197
+ },
198
+ {
199
+ "epoch": 11.0,
200
+ "eval_accuracy": 0.8566658421963889,
201
+ "eval_combined_score": 0.8329733686874654,
202
+ "eval_f1": 0.8092808951785421,
203
+ "eval_loss": 0.46776074171066284,
204
+ "eval_runtime": 12.4239,
205
+ "eval_samples_per_second": 3254.203,
206
+ "eval_steps_per_second": 12.717,
207
+ "step": 15642
208
+ },
209
+ {
210
+ "epoch": 11.0,
211
+ "step": 15642,
212
+ "total_flos": 1.0495478401912627e+17,
213
+ "train_loss": 0.2538350579257268,
214
+ "train_runtime": 2291.1306,
215
+ "train_samples_per_second": 7940.316,
216
+ "train_steps_per_second": 31.033
217
  }
218
  ],
219
  "logging_steps": 1,
 
242
  "attributes": {}
243
  }
244
  },
245
+ "total_flos": 1.0495478401912627e+17,
246
  "train_batch_size": 256,
247
  "trial_name": null,
248
  "trial_params": null