Mauregato commited on
Commit
dd51472
·
1 Parent(s): 1b929f1

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 22.0,
3
+ "eval_accuracy": 0.6718301778542742,
4
+ "eval_f1": 0.6702822325183047,
5
+ "eval_loss": 0.971169114112854,
6
+ "eval_precision": 0.6698287004001044,
7
+ "eval_recall": 0.6718301778542742,
8
+ "eval_runtime": 24.527,
9
+ "eval_samples_per_second": 71.064,
10
+ "eval_steps_per_second": 4.444,
11
+ "total_flos": 6.932186220913164e+18,
12
+ "train_loss": 0.7904366488483819,
13
+ "train_runtime": 4221.7797,
14
+ "train_samples_per_second": 21.188,
15
+ "train_steps_per_second": 0.167
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 22.0,
3
+ "eval_accuracy": 0.6718301778542742,
4
+ "eval_f1": 0.6702822325183047,
5
+ "eval_loss": 0.971169114112854,
6
+ "eval_precision": 0.6698287004001044,
7
+ "eval_recall": 0.6718301778542742,
8
+ "eval_runtime": 24.527,
9
+ "eval_samples_per_second": 71.064,
10
+ "eval_steps_per_second": 4.444
11
+ }
runs/May11_16-29-23_3384b47cb3ff/events.out.tfevents.1683826962.3384b47cb3ff.2104.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a14fb9b48b0518238709c561a7af5be57340a54a78e759ad606f6a7d901e13
3
+ size 560
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 22.0,
3
+ "total_flos": 6.932186220913164e+18,
4
+ "train_loss": 0.7904366488483819,
5
+ "train_runtime": 4221.7797,
6
+ "train_samples_per_second": 21.188,
7
+ "train_steps_per_second": 0.167
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,709 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6718301778542742,
3
+ "best_model_checkpoint": "vit-base-patch16-224-best-finetuned-on-affectnet_short/checkpoint-672",
4
+ "epoch": 22.0,
5
+ "global_step": 704,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.31,
12
+ "learning_rate": 7.042253521126762e-06,
13
+ "loss": 2.1884,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.62,
18
+ "learning_rate": 1.4084507042253523e-05,
19
+ "loss": 2.0969,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.94,
24
+ "learning_rate": 2.112676056338028e-05,
25
+ "loss": 1.9968,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_accuracy": 0.27538726333907054,
31
+ "eval_f1": 0.22796602838082472,
32
+ "eval_loss": 1.9113303422927856,
33
+ "eval_precision": 0.2518389295969545,
34
+ "eval_recall": 0.27538726333907054,
35
+ "eval_runtime": 25.038,
36
+ "eval_samples_per_second": 69.614,
37
+ "eval_steps_per_second": 4.353,
38
+ "step": 32
39
+ },
40
+ {
41
+ "epoch": 1.25,
42
+ "learning_rate": 2.8169014084507046e-05,
43
+ "loss": 1.8758,
44
+ "step": 40
45
+ },
46
+ {
47
+ "epoch": 1.56,
48
+ "learning_rate": 3.5211267605633805e-05,
49
+ "loss": 1.6422,
50
+ "step": 50
51
+ },
52
+ {
53
+ "epoch": 1.88,
54
+ "learning_rate": 4.225352112676056e-05,
55
+ "loss": 1.4178,
56
+ "step": 60
57
+ },
58
+ {
59
+ "epoch": 2.0,
60
+ "eval_accuracy": 0.5048766494549627,
61
+ "eval_f1": 0.4900447126711682,
62
+ "eval_loss": 1.2704347372055054,
63
+ "eval_precision": 0.5149366289457634,
64
+ "eval_recall": 0.5048766494549627,
65
+ "eval_runtime": 24.2957,
66
+ "eval_samples_per_second": 71.741,
67
+ "eval_steps_per_second": 4.486,
68
+ "step": 64
69
+ },
70
+ {
71
+ "epoch": 2.19,
72
+ "learning_rate": 4.929577464788733e-05,
73
+ "loss": 1.2352,
74
+ "step": 70
75
+ },
76
+ {
77
+ "epoch": 2.5,
78
+ "learning_rate": 4.928909952606635e-05,
79
+ "loss": 1.1754,
80
+ "step": 80
81
+ },
82
+ {
83
+ "epoch": 2.81,
84
+ "learning_rate": 4.849921011058452e-05,
85
+ "loss": 1.1751,
86
+ "step": 90
87
+ },
88
+ {
89
+ "epoch": 3.0,
90
+ "eval_accuracy": 0.5840504876649455,
91
+ "eval_f1": 0.5787373156833001,
92
+ "eval_loss": 1.111608624458313,
93
+ "eval_precision": 0.5891362431175351,
94
+ "eval_recall": 0.5840504876649455,
95
+ "eval_runtime": 24.5236,
96
+ "eval_samples_per_second": 71.074,
97
+ "eval_steps_per_second": 4.445,
98
+ "step": 96
99
+ },
100
+ {
101
+ "epoch": 3.12,
102
+ "learning_rate": 4.770932069510269e-05,
103
+ "loss": 1.1688,
104
+ "step": 100
105
+ },
106
+ {
107
+ "epoch": 3.44,
108
+ "learning_rate": 4.691943127962086e-05,
109
+ "loss": 1.0893,
110
+ "step": 110
111
+ },
112
+ {
113
+ "epoch": 3.75,
114
+ "learning_rate": 4.6129541864139027e-05,
115
+ "loss": 1.0127,
116
+ "step": 120
117
+ },
118
+ {
119
+ "epoch": 4.0,
120
+ "eval_accuracy": 0.6161790017211703,
121
+ "eval_f1": 0.6141128202096976,
122
+ "eval_loss": 1.0236890316009521,
123
+ "eval_precision": 0.633451007227001,
124
+ "eval_recall": 0.6161790017211703,
125
+ "eval_runtime": 24.3337,
126
+ "eval_samples_per_second": 71.629,
127
+ "eval_steps_per_second": 4.479,
128
+ "step": 128
129
+ },
130
+ {
131
+ "epoch": 4.06,
132
+ "learning_rate": 4.533965244865719e-05,
133
+ "loss": 1.0259,
134
+ "step": 130
135
+ },
136
+ {
137
+ "epoch": 4.38,
138
+ "learning_rate": 4.454976303317536e-05,
139
+ "loss": 0.9659,
140
+ "step": 140
141
+ },
142
+ {
143
+ "epoch": 4.69,
144
+ "learning_rate": 4.3759873617693526e-05,
145
+ "loss": 0.9996,
146
+ "step": 150
147
+ },
148
+ {
149
+ "epoch": 5.0,
150
+ "learning_rate": 4.2969984202211694e-05,
151
+ "loss": 0.9969,
152
+ "step": 160
153
+ },
154
+ {
155
+ "epoch": 5.0,
156
+ "eval_accuracy": 0.6259323006310958,
157
+ "eval_f1": 0.6150016278628547,
158
+ "eval_loss": 0.989003598690033,
159
+ "eval_precision": 0.6294369746617599,
160
+ "eval_recall": 0.6259323006310958,
161
+ "eval_runtime": 24.5638,
162
+ "eval_samples_per_second": 70.958,
163
+ "eval_steps_per_second": 4.437,
164
+ "step": 160
165
+ },
166
+ {
167
+ "epoch": 5.31,
168
+ "learning_rate": 4.218009478672986e-05,
169
+ "loss": 0.9251,
170
+ "step": 170
171
+ },
172
+ {
173
+ "epoch": 5.62,
174
+ "learning_rate": 4.1390205371248025e-05,
175
+ "loss": 0.9081,
176
+ "step": 180
177
+ },
178
+ {
179
+ "epoch": 5.94,
180
+ "learning_rate": 4.060031595576619e-05,
181
+ "loss": 0.9376,
182
+ "step": 190
183
+ },
184
+ {
185
+ "epoch": 6.0,
186
+ "eval_accuracy": 0.6190476190476191,
187
+ "eval_f1": 0.6182633788848451,
188
+ "eval_loss": 0.9767513275146484,
189
+ "eval_precision": 0.6334597304884556,
190
+ "eval_recall": 0.6190476190476191,
191
+ "eval_runtime": 24.3243,
192
+ "eval_samples_per_second": 71.657,
193
+ "eval_steps_per_second": 4.481,
194
+ "step": 192
195
+ },
196
+ {
197
+ "epoch": 6.25,
198
+ "learning_rate": 3.981042654028436e-05,
199
+ "loss": 0.8888,
200
+ "step": 200
201
+ },
202
+ {
203
+ "epoch": 6.56,
204
+ "learning_rate": 3.902053712480253e-05,
205
+ "loss": 0.891,
206
+ "step": 210
207
+ },
208
+ {
209
+ "epoch": 6.88,
210
+ "learning_rate": 3.82306477093207e-05,
211
+ "loss": 0.8299,
212
+ "step": 220
213
+ },
214
+ {
215
+ "epoch": 7.0,
216
+ "eval_accuracy": 0.6356855995410212,
217
+ "eval_f1": 0.6282041804399863,
218
+ "eval_loss": 0.9579089283943176,
219
+ "eval_precision": 0.6339366993746611,
220
+ "eval_recall": 0.6356855995410212,
221
+ "eval_runtime": 24.4946,
222
+ "eval_samples_per_second": 71.159,
223
+ "eval_steps_per_second": 4.45,
224
+ "step": 224
225
+ },
226
+ {
227
+ "epoch": 7.19,
228
+ "learning_rate": 3.744075829383886e-05,
229
+ "loss": 0.8563,
230
+ "step": 230
231
+ },
232
+ {
233
+ "epoch": 7.5,
234
+ "learning_rate": 3.665086887835703e-05,
235
+ "loss": 0.8477,
236
+ "step": 240
237
+ },
238
+ {
239
+ "epoch": 7.81,
240
+ "learning_rate": 3.58609794628752e-05,
241
+ "loss": 0.7645,
242
+ "step": 250
243
+ },
244
+ {
245
+ "epoch": 8.0,
246
+ "eval_accuracy": 0.648881239242685,
247
+ "eval_f1": 0.647358676044992,
248
+ "eval_loss": 0.9366128444671631,
249
+ "eval_precision": 0.6559082514099844,
250
+ "eval_recall": 0.648881239242685,
251
+ "eval_runtime": 24.292,
252
+ "eval_samples_per_second": 71.752,
253
+ "eval_steps_per_second": 4.487,
254
+ "step": 256
255
+ },
256
+ {
257
+ "epoch": 8.12,
258
+ "learning_rate": 3.507109004739337e-05,
259
+ "loss": 0.8149,
260
+ "step": 260
261
+ },
262
+ {
263
+ "epoch": 8.44,
264
+ "learning_rate": 3.4281200631911535e-05,
265
+ "loss": 0.7755,
266
+ "step": 270
267
+ },
268
+ {
269
+ "epoch": 8.75,
270
+ "learning_rate": 3.34913112164297e-05,
271
+ "loss": 0.7944,
272
+ "step": 280
273
+ },
274
+ {
275
+ "epoch": 9.0,
276
+ "eval_accuracy": 0.6442914515203672,
277
+ "eval_f1": 0.6447425675895946,
278
+ "eval_loss": 0.9302862286567688,
279
+ "eval_precision": 0.6494435537086374,
280
+ "eval_recall": 0.6442914515203672,
281
+ "eval_runtime": 24.5079,
282
+ "eval_samples_per_second": 71.12,
283
+ "eval_steps_per_second": 4.448,
284
+ "step": 288
285
+ },
286
+ {
287
+ "epoch": 9.06,
288
+ "learning_rate": 3.2701421800947866e-05,
289
+ "loss": 0.74,
290
+ "step": 290
291
+ },
292
+ {
293
+ "epoch": 9.38,
294
+ "learning_rate": 3.1911532385466034e-05,
295
+ "loss": 0.7196,
296
+ "step": 300
297
+ },
298
+ {
299
+ "epoch": 9.69,
300
+ "learning_rate": 3.11216429699842e-05,
301
+ "loss": 0.7218,
302
+ "step": 310
303
+ },
304
+ {
305
+ "epoch": 10.0,
306
+ "learning_rate": 3.0331753554502375e-05,
307
+ "loss": 0.7334,
308
+ "step": 320
309
+ },
310
+ {
311
+ "epoch": 10.0,
312
+ "eval_accuracy": 0.6546184738955824,
313
+ "eval_f1": 0.6523112247053579,
314
+ "eval_loss": 0.9510300755500793,
315
+ "eval_precision": 0.6634128950611891,
316
+ "eval_recall": 0.6546184738955824,
317
+ "eval_runtime": 24.2151,
318
+ "eval_samples_per_second": 71.98,
319
+ "eval_steps_per_second": 4.501,
320
+ "step": 320
321
+ },
322
+ {
323
+ "epoch": 10.31,
324
+ "learning_rate": 2.9541864139020537e-05,
325
+ "loss": 0.7152,
326
+ "step": 330
327
+ },
328
+ {
329
+ "epoch": 10.62,
330
+ "learning_rate": 2.8751974723538705e-05,
331
+ "loss": 0.6944,
332
+ "step": 340
333
+ },
334
+ {
335
+ "epoch": 10.94,
336
+ "learning_rate": 2.7962085308056874e-05,
337
+ "loss": 0.6596,
338
+ "step": 350
339
+ },
340
+ {
341
+ "epoch": 11.0,
342
+ "eval_accuracy": 0.6448651749856569,
343
+ "eval_f1": 0.6428493928535022,
344
+ "eval_loss": 0.936934232711792,
345
+ "eval_precision": 0.6528079277533798,
346
+ "eval_recall": 0.6448651749856569,
347
+ "eval_runtime": 24.2671,
348
+ "eval_samples_per_second": 71.826,
349
+ "eval_steps_per_second": 4.492,
350
+ "step": 352
351
+ },
352
+ {
353
+ "epoch": 11.25,
354
+ "learning_rate": 2.7172195892575043e-05,
355
+ "loss": 0.6759,
356
+ "step": 360
357
+ },
358
+ {
359
+ "epoch": 11.56,
360
+ "learning_rate": 2.638230647709321e-05,
361
+ "loss": 0.6274,
362
+ "step": 370
363
+ },
364
+ {
365
+ "epoch": 11.88,
366
+ "learning_rate": 2.5592417061611373e-05,
367
+ "loss": 0.6781,
368
+ "step": 380
369
+ },
370
+ {
371
+ "epoch": 12.0,
372
+ "eval_accuracy": 0.6368330464716007,
373
+ "eval_f1": 0.6359635726412315,
374
+ "eval_loss": 0.9716911911964417,
375
+ "eval_precision": 0.6513038081890579,
376
+ "eval_recall": 0.6368330464716007,
377
+ "eval_runtime": 24.1568,
378
+ "eval_samples_per_second": 72.154,
379
+ "eval_steps_per_second": 4.512,
380
+ "step": 384
381
+ },
382
+ {
383
+ "epoch": 12.19,
384
+ "learning_rate": 2.480252764612954e-05,
385
+ "loss": 0.6889,
386
+ "step": 390
387
+ },
388
+ {
389
+ "epoch": 12.5,
390
+ "learning_rate": 2.401263823064771e-05,
391
+ "loss": 0.6189,
392
+ "step": 400
393
+ },
394
+ {
395
+ "epoch": 12.81,
396
+ "learning_rate": 2.322274881516588e-05,
397
+ "loss": 0.5688,
398
+ "step": 410
399
+ },
400
+ {
401
+ "epoch": 13.0,
402
+ "eval_accuracy": 0.6540447504302926,
403
+ "eval_f1": 0.6495260815424722,
404
+ "eval_loss": 0.95087730884552,
405
+ "eval_precision": 0.6531237727480816,
406
+ "eval_recall": 0.6540447504302926,
407
+ "eval_runtime": 24.3359,
408
+ "eval_samples_per_second": 71.623,
409
+ "eval_steps_per_second": 4.479,
410
+ "step": 416
411
+ },
412
+ {
413
+ "epoch": 13.12,
414
+ "learning_rate": 2.2432859399684044e-05,
415
+ "loss": 0.6399,
416
+ "step": 420
417
+ },
418
+ {
419
+ "epoch": 13.44,
420
+ "learning_rate": 2.1642969984202213e-05,
421
+ "loss": 0.5852,
422
+ "step": 430
423
+ },
424
+ {
425
+ "epoch": 13.75,
426
+ "learning_rate": 2.0853080568720378e-05,
427
+ "loss": 0.5766,
428
+ "step": 440
429
+ },
430
+ {
431
+ "epoch": 14.0,
432
+ "eval_accuracy": 0.6615031554790591,
433
+ "eval_f1": 0.6601098289619065,
434
+ "eval_loss": 0.9484531283378601,
435
+ "eval_precision": 0.6655227009747305,
436
+ "eval_recall": 0.6615031554790591,
437
+ "eval_runtime": 24.0407,
438
+ "eval_samples_per_second": 72.502,
439
+ "eval_steps_per_second": 4.534,
440
+ "step": 448
441
+ },
442
+ {
443
+ "epoch": 14.06,
444
+ "learning_rate": 2.0063191153238547e-05,
445
+ "loss": 0.5559,
446
+ "step": 450
447
+ },
448
+ {
449
+ "epoch": 14.38,
450
+ "learning_rate": 1.9273301737756715e-05,
451
+ "loss": 0.5602,
452
+ "step": 460
453
+ },
454
+ {
455
+ "epoch": 14.69,
456
+ "learning_rate": 1.848341232227488e-05,
457
+ "loss": 0.5529,
458
+ "step": 470
459
+ },
460
+ {
461
+ "epoch": 15.0,
462
+ "learning_rate": 1.769352290679305e-05,
463
+ "loss": 0.5529,
464
+ "step": 480
465
+ },
466
+ {
467
+ "epoch": 15.0,
468
+ "eval_accuracy": 0.6569133677567413,
469
+ "eval_f1": 0.6537964772465467,
470
+ "eval_loss": 0.9590263962745667,
471
+ "eval_precision": 0.6561006700640171,
472
+ "eval_recall": 0.6569133677567413,
473
+ "eval_runtime": 24.2565,
474
+ "eval_samples_per_second": 71.857,
475
+ "eval_steps_per_second": 4.494,
476
+ "step": 480
477
+ },
478
+ {
479
+ "epoch": 15.31,
480
+ "learning_rate": 1.6903633491311218e-05,
481
+ "loss": 0.5288,
482
+ "step": 490
483
+ },
484
+ {
485
+ "epoch": 15.62,
486
+ "learning_rate": 1.6113744075829386e-05,
487
+ "loss": 0.5355,
488
+ "step": 500
489
+ },
490
+ {
491
+ "epoch": 15.94,
492
+ "learning_rate": 1.532385466034755e-05,
493
+ "loss": 0.4998,
494
+ "step": 510
495
+ },
496
+ {
497
+ "epoch": 16.0,
498
+ "eval_accuracy": 0.6511761331038439,
499
+ "eval_f1": 0.6487852957891203,
500
+ "eval_loss": 0.9676968455314636,
501
+ "eval_precision": 0.6513647898316787,
502
+ "eval_recall": 0.6511761331038439,
503
+ "eval_runtime": 23.9784,
504
+ "eval_samples_per_second": 72.69,
505
+ "eval_steps_per_second": 4.546,
506
+ "step": 512
507
+ },
508
+ {
509
+ "epoch": 16.25,
510
+ "learning_rate": 1.4533965244865718e-05,
511
+ "loss": 0.4864,
512
+ "step": 520
513
+ },
514
+ {
515
+ "epoch": 16.56,
516
+ "learning_rate": 1.3744075829383887e-05,
517
+ "loss": 0.5188,
518
+ "step": 530
519
+ },
520
+ {
521
+ "epoch": 16.88,
522
+ "learning_rate": 1.2954186413902054e-05,
523
+ "loss": 0.4908,
524
+ "step": 540
525
+ },
526
+ {
527
+ "epoch": 17.0,
528
+ "eval_accuracy": 0.663798049340218,
529
+ "eval_f1": 0.6615693208028428,
530
+ "eval_loss": 0.9670336842536926,
531
+ "eval_precision": 0.6645442403159969,
532
+ "eval_recall": 0.663798049340218,
533
+ "eval_runtime": 24.1891,
534
+ "eval_samples_per_second": 72.057,
535
+ "eval_steps_per_second": 4.506,
536
+ "step": 544
537
+ },
538
+ {
539
+ "epoch": 17.19,
540
+ "learning_rate": 1.216429699842022e-05,
541
+ "loss": 0.4697,
542
+ "step": 550
543
+ },
544
+ {
545
+ "epoch": 17.5,
546
+ "learning_rate": 1.137440758293839e-05,
547
+ "loss": 0.4791,
548
+ "step": 560
549
+ },
550
+ {
551
+ "epoch": 17.81,
552
+ "learning_rate": 1.0584518167456558e-05,
553
+ "loss": 0.4682,
554
+ "step": 570
555
+ },
556
+ {
557
+ "epoch": 18.0,
558
+ "eval_accuracy": 0.6678141135972461,
559
+ "eval_f1": 0.668417951352874,
560
+ "eval_loss": 0.9634829163551331,
561
+ "eval_precision": 0.6707246062463791,
562
+ "eval_recall": 0.6678141135972461,
563
+ "eval_runtime": 24.0376,
564
+ "eval_samples_per_second": 72.511,
565
+ "eval_steps_per_second": 4.535,
566
+ "step": 576
567
+ },
568
+ {
569
+ "epoch": 18.12,
570
+ "learning_rate": 9.794628751974725e-06,
571
+ "loss": 0.5067,
572
+ "step": 580
573
+ },
574
+ {
575
+ "epoch": 18.44,
576
+ "learning_rate": 9.004739336492892e-06,
577
+ "loss": 0.4401,
578
+ "step": 590
579
+ },
580
+ {
581
+ "epoch": 18.75,
582
+ "learning_rate": 8.214849921011059e-06,
583
+ "loss": 0.4761,
584
+ "step": 600
585
+ },
586
+ {
587
+ "epoch": 19.0,
588
+ "eval_accuracy": 0.6666666666666666,
589
+ "eval_f1": 0.6658035749901843,
590
+ "eval_loss": 0.968035876750946,
591
+ "eval_precision": 0.6673820855481607,
592
+ "eval_recall": 0.6666666666666666,
593
+ "eval_runtime": 24.4115,
594
+ "eval_samples_per_second": 71.401,
595
+ "eval_steps_per_second": 4.465,
596
+ "step": 608
597
+ },
598
+ {
599
+ "epoch": 19.06,
600
+ "learning_rate": 7.424960505529226e-06,
601
+ "loss": 0.4684,
602
+ "step": 610
603
+ },
604
+ {
605
+ "epoch": 19.38,
606
+ "learning_rate": 6.635071090047394e-06,
607
+ "loss": 0.4466,
608
+ "step": 620
609
+ },
610
+ {
611
+ "epoch": 19.69,
612
+ "learning_rate": 5.845181674565561e-06,
613
+ "loss": 0.4507,
614
+ "step": 630
615
+ },
616
+ {
617
+ "epoch": 20.0,
618
+ "learning_rate": 5.055292259083728e-06,
619
+ "loss": 0.4161,
620
+ "step": 640
621
+ },
622
+ {
623
+ "epoch": 20.0,
624
+ "eval_accuracy": 0.6712564543889845,
625
+ "eval_f1": 0.6700650267836397,
626
+ "eval_loss": 0.9700806736946106,
627
+ "eval_precision": 0.6719394542069612,
628
+ "eval_recall": 0.6712564543889845,
629
+ "eval_runtime": 24.0864,
630
+ "eval_samples_per_second": 72.364,
631
+ "eval_steps_per_second": 4.525,
632
+ "step": 640
633
+ },
634
+ {
635
+ "epoch": 20.31,
636
+ "learning_rate": 4.265402843601897e-06,
637
+ "loss": 0.4724,
638
+ "step": 650
639
+ },
640
+ {
641
+ "epoch": 20.62,
642
+ "learning_rate": 3.4755134281200636e-06,
643
+ "loss": 0.4127,
644
+ "step": 660
645
+ },
646
+ {
647
+ "epoch": 20.94,
648
+ "learning_rate": 2.685624012638231e-06,
649
+ "loss": 0.4295,
650
+ "step": 670
651
+ },
652
+ {
653
+ "epoch": 21.0,
654
+ "eval_accuracy": 0.6718301778542742,
655
+ "eval_f1": 0.6702822325183047,
656
+ "eval_loss": 0.971169114112854,
657
+ "eval_precision": 0.6698287004001044,
658
+ "eval_recall": 0.6718301778542742,
659
+ "eval_runtime": 24.5456,
660
+ "eval_samples_per_second": 71.011,
661
+ "eval_steps_per_second": 4.441,
662
+ "step": 672
663
+ },
664
+ {
665
+ "epoch": 21.25,
666
+ "learning_rate": 1.8957345971563984e-06,
667
+ "loss": 0.4618,
668
+ "step": 680
669
+ },
670
+ {
671
+ "epoch": 21.56,
672
+ "learning_rate": 1.1058451816745656e-06,
673
+ "loss": 0.434,
674
+ "step": 690
675
+ },
676
+ {
677
+ "epoch": 21.88,
678
+ "learning_rate": 3.15955766192733e-07,
679
+ "loss": 0.434,
680
+ "step": 700
681
+ },
682
+ {
683
+ "epoch": 22.0,
684
+ "eval_accuracy": 0.6706827309236948,
685
+ "eval_f1": 0.6690376760558668,
686
+ "eval_loss": 0.9755037426948547,
687
+ "eval_precision": 0.6705174221778998,
688
+ "eval_recall": 0.6706827309236948,
689
+ "eval_runtime": 24.2573,
690
+ "eval_samples_per_second": 71.855,
691
+ "eval_steps_per_second": 4.493,
692
+ "step": 704
693
+ },
694
+ {
695
+ "epoch": 22.0,
696
+ "step": 704,
697
+ "total_flos": 6.932186220913164e+18,
698
+ "train_loss": 0.7904366488483819,
699
+ "train_runtime": 4221.7797,
700
+ "train_samples_per_second": 21.188,
701
+ "train_steps_per_second": 0.167
702
+ }
703
+ ],
704
+ "max_steps": 704,
705
+ "num_train_epochs": 22,
706
+ "total_flos": 6.932186220913164e+18,
707
+ "trial_name": null,
708
+ "trial_params": null
709
+ }