apwic commited on
Commit
b77b690
1 Parent(s): e1f15de

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,17 +1,21 @@
1
  {
 
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9047619047619048,
4
- "eval_f1": 0.8878351186601172,
5
- "eval_loss": 0.7535876035690308,
6
- "eval_precision": 0.879776516905975,
7
- "eval_recall": 0.8976177486815784,
8
- "eval_runtime": 4.7232,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 84.476,
11
- "eval_steps_per_second": 10.586,
12
- "train_loss": 0.06173487283655855,
13
- "train_runtime": 2892.3535,
 
 
 
14
  "train_samples": 3638,
15
- "train_samples_per_second": 25.156,
16
- "train_steps_per_second": 0.844
17
  }
 
1
  {
2
+ "accuracy": 0.8615232443125618,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8546365914786967,
5
+ "eval_f1": 0.8263655462184873,
6
+ "eval_loss": 0.35401326417922974,
7
+ "eval_precision": 0.8233396753671443,
8
+ "eval_recall": 0.8296508456082925,
9
+ "eval_runtime": 5.0488,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 79.029,
12
+ "eval_steps_per_second": 9.903,
13
+ "f1": 0.8344251555846709,
14
+ "precision": 0.8325509007667684,
15
+ "recall": 0.8363917467548971,
16
+ "train_loss": 0.35800845193081215,
17
+ "train_runtime": 2113.1391,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 34.432,
20
+ "train_steps_per_second": 1.155
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9047619047619048,
4
- "eval_f1": 0.8878351186601172,
5
- "eval_loss": 0.7535876035690308,
6
- "eval_precision": 0.879776516905975,
7
- "eval_recall": 0.8976177486815784,
8
- "eval_runtime": 4.7232,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 84.476,
11
- "eval_steps_per_second": 10.586
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8546365914786967,
4
+ "eval_f1": 0.8263655462184873,
5
+ "eval_loss": 0.35401326417922974,
6
+ "eval_precision": 0.8233396753671443,
7
+ "eval_recall": 0.8296508456082925,
8
+ "eval_runtime": 5.0488,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 79.029,
11
+ "eval_steps_per_second": 9.903
12
  }
predict_results.txt CHANGED
@@ -1,6 +1,6 @@
1
  index prediction
2
  0 1
3
- 1 1
4
  2 1
5
  3 1
6
  4 0
@@ -14,21 +14,21 @@ index prediction
14
  12 1
15
  13 1
16
  14 1
17
- 15 0
18
  16 1
19
  17 1
20
  18 1
21
  19 1
22
  20 1
23
  21 1
24
- 22 1
25
  23 1
26
- 24 0
27
  25 1
28
  26 1
29
  27 1
30
  28 1
31
- 29 1
32
  30 1
33
  31 1
34
  32 1
@@ -38,39 +38,39 @@ index prediction
38
  36 1
39
  37 1
40
  38 1
41
- 39 1
42
  40 1
43
- 41 1
44
- 42 1
45
- 43 1
46
- 44 1
47
- 45 1
48
  46 1
49
  47 1
50
  48 1
51
  49 0
52
  50 1
53
  51 1
54
- 52 1
55
  53 1
56
  54 1
57
  55 1
58
- 56 1
59
  57 0
60
  58 1
61
  59 1
62
- 60 1
63
  61 1
64
  62 1
65
  63 1
66
- 64 1
67
  65 1
68
  66 1
69
  67 1
70
  68 1
71
  69 1
72
  70 1
73
- 71 0
74
  72 1
75
  73 1
76
  74 1
@@ -79,31 +79,31 @@ index prediction
79
  77 0
80
  78 1
81
  79 1
82
- 80 1
83
- 81 0
84
  82 1
85
  83 1
86
  84 1
87
  85 0
88
- 86 1
89
  87 1
90
  88 1
91
  89 1
92
  90 1
93
- 91 1
94
- 92 1
95
- 93 1
96
  94 1
97
  95 1
98
  96 1
99
  97 0
100
- 98 1
101
  99 0
102
  100 0
103
  101 0
104
  102 1
105
  103 1
106
- 104 1
107
  105 1
108
  106 1
109
  107 1
@@ -112,7 +112,7 @@ index prediction
112
  110 1
113
  111 1
114
  112 1
115
- 113 0
116
  114 1
117
  115 1
118
  116 1
@@ -124,7 +124,7 @@ index prediction
124
  122 1
125
  123 1
126
  124 1
127
- 125 0
128
  126 1
129
  127 1
130
  128 1
@@ -134,7 +134,7 @@ index prediction
134
  132 1
135
  133 1
136
  134 1
137
- 135 1
138
  136 0
139
  137 1
140
  138 1
@@ -146,7 +146,7 @@ index prediction
146
  144 1
147
  145 1
148
  146 1
149
- 147 1
150
  148 1
151
  149 1
152
  150 1
@@ -163,24 +163,24 @@ index prediction
163
  161 1
164
  162 1
165
  163 1
166
- 164 1
167
  165 0
168
  166 1
169
  167 1
170
- 168 1
171
- 169 1
172
  170 1
173
  171 1
174
- 172 1
175
  173 0
176
- 174 1
177
  175 1
178
- 176 1
179
  177 0
180
  178 1
181
  179 1
182
  180 1
183
- 181 0
184
  182 1
185
  183 1
186
  184 1
@@ -189,7 +189,7 @@ index prediction
189
  187 1
190
  188 1
191
  189 1
192
- 190 1
193
  191 1
194
  192 1
195
  193 1
@@ -217,22 +217,22 @@ index prediction
217
  215 1
218
  216 0
219
  217 0
220
- 218 1
221
  219 1
222
  220 0
223
  221 1
224
  222 1
225
  223 1
226
- 224 0
227
  225 1
228
  226 0
229
  227 0
230
- 228 1
231
- 229 1
232
  230 1
233
  231 1
234
- 232 1
235
- 233 1
236
  234 1
237
  235 1
238
  236 1
@@ -248,7 +248,7 @@ index prediction
248
  246 0
249
  247 1
250
  248 1
251
- 249 0
252
  250 0
253
  251 1
254
  252 1
@@ -260,19 +260,19 @@ index prediction
260
  258 1
261
  259 1
262
  260 1
263
- 261 1
264
  262 1
265
  263 1
266
  264 1
267
- 265 1
268
  266 1
269
  267 1
270
  268 1
271
  269 1
272
  270 1
273
  271 1
274
- 272 0
275
- 273 1
276
  274 1
277
  275 1
278
  276 1
@@ -292,11 +292,11 @@ index prediction
292
  290 1
293
  291 1
294
  292 1
295
- 293 1
296
  294 1
297
  295 1
298
  296 1
299
- 297 0
300
  298 0
301
  299 0
302
  300 0
@@ -313,9 +313,9 @@ index prediction
313
  311 0
314
  312 0
315
  313 0
316
- 314 0
317
  315 0
318
- 316 0
319
  317 0
320
  318 1
321
  319 0
@@ -327,30 +327,30 @@ index prediction
327
  325 0
328
  326 0
329
  327 0
330
- 328 1
331
  329 0
332
- 330 1
333
- 331 1
334
  332 0
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
- 338 1
341
  339 0
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
- 344 0
347
  345 0
348
- 346 1
349
  347 0
350
  348 0
351
  349 0
352
  350 0
353
- 351 0
354
  352 0
355
  353 0
356
  354 0
@@ -375,7 +375,7 @@ index prediction
375
  373 0
376
  374 0
377
  375 0
378
- 376 0
379
  377 0
380
  378 0
381
  379 0
@@ -397,11 +397,11 @@ index prediction
397
  395 0
398
  396 0
399
  397 0
400
- 398 1
401
  399 0
402
  400 0
403
- 401 0
404
- 402 1
405
  403 0
406
  404 0
407
  405 0
@@ -419,12 +419,12 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 1
423
- 421 0
424
  422 0
425
  423 0
426
  424 0
427
- 425 0
428
  426 0
429
  427 0
430
  428 0
@@ -438,7 +438,7 @@ index prediction
438
  436 0
439
  437 0
440
  438 0
441
- 439 0
442
  440 0
443
  441 0
444
  442 0
@@ -446,7 +446,7 @@ index prediction
446
  444 0
447
  445 0
448
  446 0
449
- 447 1
450
  448 0
451
  449 0
452
  450 0
@@ -454,7 +454,7 @@ index prediction
454
  452 0
455
  453 0
456
  454 0
457
- 455 0
458
  456 0
459
  457 0
460
  458 0
@@ -465,7 +465,7 @@ index prediction
465
  463 0
466
  464 0
467
  465 0
468
- 466 0
469
  467 0
470
  468 0
471
  469 0
@@ -486,12 +486,12 @@ index prediction
486
  484 0
487
  485 0
488
  486 0
489
- 487 0
490
  488 0
491
  489 0
492
  490 0
493
  491 0
494
- 492 0
495
  493 0
496
  494 0
497
  495 0
@@ -502,7 +502,7 @@ index prediction
502
  500 0
503
  501 0
504
  502 0
505
- 503 0
506
  504 0
507
  505 0
508
  506 0
@@ -510,7 +510,7 @@ index prediction
510
  508 0
511
  509 0
512
  510 0
513
- 511 1
514
  512 0
515
  513 0
516
  514 0
@@ -521,9 +521,9 @@ index prediction
521
  519 0
522
  520 0
523
  521 0
524
- 522 0
525
  523 0
526
- 524 0
527
  525 0
528
  526 0
529
  527 0
@@ -533,9 +533,9 @@ index prediction
533
  531 0
534
  532 0
535
  533 0
536
- 534 0
537
  535 0
538
- 536 1
539
  537 0
540
  538 0
541
  539 0
@@ -559,7 +559,7 @@ index prediction
559
  557 0
560
  558 0
561
  559 0
562
- 560 1
563
  561 0
564
  562 0
565
  563 0
@@ -587,20 +587,20 @@ index prediction
587
  585 0
588
  586 0
589
  587 0
590
- 588 0
591
  589 0
592
  590 0
593
  591 0
594
  592 0
595
  593 0
596
  594 0
597
- 595 0
598
- 596 0
599
  597 0
600
  598 0
601
  599 0
602
  600 0
603
- 601 0
604
  602 0
605
  603 0
606
  604 0
@@ -623,18 +623,18 @@ index prediction
623
  621 1
624
  622 0
625
  623 0
626
- 624 0
627
  625 0
628
  626 0
629
  627 0
630
- 628 0
631
  629 0
632
  630 0
633
  631 0
634
- 632 1
635
  633 1
636
  634 0
637
- 635 0
638
  636 0
639
  637 0
640
  638 0
@@ -653,7 +653,7 @@ index prediction
653
  651 0
654
  652 1
655
  653 0
656
- 654 0
657
  655 0
658
  656 0
659
  657 1
@@ -662,7 +662,7 @@ index prediction
662
  660 0
663
  661 0
664
  662 0
665
- 663 1
666
  664 0
667
  665 0
668
  666 0
@@ -702,7 +702,7 @@ index prediction
702
  700 0
703
  701 0
704
  702 0
705
- 703 1
706
  704 0
707
  705 0
708
  706 0
@@ -755,7 +755,7 @@ index prediction
755
  753 0
756
  754 0
757
  755 0
758
- 756 0
759
  757 0
760
  758 0
761
  759 0
@@ -764,14 +764,14 @@ index prediction
764
  762 0
765
  763 0
766
  764 0
767
- 765 0
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
  770 1
773
  771 0
774
- 772 0
775
  773 0
776
  774 0
777
  775 0
@@ -808,12 +808,12 @@ index prediction
808
  806 0
809
  807 0
810
  808 0
811
- 809 0
812
  810 0
813
  811 0
814
  812 0
815
  813 0
816
- 814 0
817
  815 0
818
  816 0
819
  817 0
@@ -831,7 +831,7 @@ index prediction
831
  829 0
832
  830 0
833
  831 0
834
- 832 0
835
  833 1
836
  834 0
837
  835 0
@@ -840,7 +840,7 @@ index prediction
840
  838 0
841
  839 0
842
  840 0
843
- 841 1
844
  842 0
845
  843 0
846
  844 0
@@ -867,7 +867,7 @@ index prediction
867
  865 0
868
  866 0
869
  867 0
870
- 868 1
871
  869 0
872
  870 0
873
  871 0
@@ -883,7 +883,7 @@ index prediction
883
  881 0
884
  882 0
885
  883 0
886
- 884 0
887
  885 0
888
  886 0
889
  887 0
@@ -893,10 +893,10 @@ index prediction
893
  891 1
894
  892 0
895
  893 0
896
- 894 0
897
  895 0
898
  896 0
899
- 897 0
900
  898 0
901
  899 0
902
  900 0
@@ -905,13 +905,13 @@ index prediction
905
  903 0
906
  904 0
907
  905 0
908
- 906 0
909
  907 1
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
- 912 0
915
  913 0
916
  914 0
917
  915 0
@@ -949,20 +949,20 @@ index prediction
949
  947 0
950
  948 0
951
  949 0
952
- 950 0
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
- 955 1
958
  956 0
959
- 957 0
960
  958 0
961
  959 0
962
  960 0
963
  961 0
964
  962 0
965
- 963 1
966
  964 0
967
  965 0
968
  966 0
@@ -991,18 +991,18 @@ index prediction
991
  989 0
992
  990 0
993
  991 1
994
- 992 0
995
  993 0
996
  994 0
997
- 995 0
998
  996 0
999
  997 0
1000
- 998 0
1001
  999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
1005
- 1003 0
1006
  1004 0
1007
  1005 0
1008
  1006 0
 
1
  index prediction
2
  0 1
3
+ 1 0
4
  2 1
5
  3 1
6
  4 0
 
14
  12 1
15
  13 1
16
  14 1
17
+ 15 1
18
  16 1
19
  17 1
20
  18 1
21
  19 1
22
  20 1
23
  21 1
24
+ 22 0
25
  23 1
26
+ 24 1
27
  25 1
28
  26 1
29
  27 1
30
  28 1
31
+ 29 0
32
  30 1
33
  31 1
34
  32 1
 
38
  36 1
39
  37 1
40
  38 1
41
+ 39 0
42
  40 1
43
+ 41 0
44
+ 42 0
45
+ 43 0
46
+ 44 0
47
+ 45 0
48
  46 1
49
  47 1
50
  48 1
51
  49 0
52
  50 1
53
  51 1
54
+ 52 0
55
  53 1
56
  54 1
57
  55 1
58
+ 56 0
59
  57 0
60
  58 1
61
  59 1
62
+ 60 0
63
  61 1
64
  62 1
65
  63 1
66
+ 64 0
67
  65 1
68
  66 1
69
  67 1
70
  68 1
71
  69 1
72
  70 1
73
+ 71 1
74
  72 1
75
  73 1
76
  74 1
 
79
  77 0
80
  78 1
81
  79 1
82
+ 80 0
83
+ 81 1
84
  82 1
85
  83 1
86
  84 1
87
  85 0
88
+ 86 0
89
  87 1
90
  88 1
91
  89 1
92
  90 1
93
+ 91 0
94
+ 92 0
95
+ 93 0
96
  94 1
97
  95 1
98
  96 1
99
  97 0
100
+ 98 0
101
  99 0
102
  100 0
103
  101 0
104
  102 1
105
  103 1
106
+ 104 0
107
  105 1
108
  106 1
109
  107 1
 
112
  110 1
113
  111 1
114
  112 1
115
+ 113 1
116
  114 1
117
  115 1
118
  116 1
 
124
  122 1
125
  123 1
126
  124 1
127
+ 125 1
128
  126 1
129
  127 1
130
  128 1
 
134
  132 1
135
  133 1
136
  134 1
137
+ 135 0
138
  136 0
139
  137 1
140
  138 1
 
146
  144 1
147
  145 1
148
  146 1
149
+ 147 0
150
  148 1
151
  149 1
152
  150 1
 
163
  161 1
164
  162 1
165
  163 1
166
+ 164 0
167
  165 0
168
  166 1
169
  167 1
170
+ 168 0
171
+ 169 0
172
  170 1
173
  171 1
174
+ 172 0
175
  173 0
176
+ 174 0
177
  175 1
178
+ 176 0
179
  177 0
180
  178 1
181
  179 1
182
  180 1
183
+ 181 1
184
  182 1
185
  183 1
186
  184 1
 
189
  187 1
190
  188 1
191
  189 1
192
+ 190 0
193
  191 1
194
  192 1
195
  193 1
 
217
  215 1
218
  216 0
219
  217 0
220
+ 218 0
221
  219 1
222
  220 0
223
  221 1
224
  222 1
225
  223 1
226
+ 224 1
227
  225 1
228
  226 0
229
  227 0
230
+ 228 0
231
+ 229 0
232
  230 1
233
  231 1
234
+ 232 0
235
+ 233 0
236
  234 1
237
  235 1
238
  236 1
 
248
  246 0
249
  247 1
250
  248 1
251
+ 249 1
252
  250 0
253
  251 1
254
  252 1
 
260
  258 1
261
  259 1
262
  260 1
263
+ 261 0
264
  262 1
265
  263 1
266
  264 1
267
+ 265 0
268
  266 1
269
  267 1
270
  268 1
271
  269 1
272
  270 1
273
  271 1
274
+ 272 1
275
+ 273 0
276
  274 1
277
  275 1
278
  276 1
 
292
  290 1
293
  291 1
294
  292 1
295
+ 293 0
296
  294 1
297
  295 1
298
  296 1
299
+ 297 1
300
  298 0
301
  299 0
302
  300 0
 
313
  311 0
314
  312 0
315
  313 0
316
+ 314 1
317
  315 0
318
+ 316 1
319
  317 0
320
  318 1
321
  319 0
 
327
  325 0
328
  326 0
329
  327 0
330
+ 328 0
331
  329 0
332
+ 330 0
333
+ 331 0
334
  332 0
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
+ 338 0
341
  339 0
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
+ 344 1
347
  345 0
348
+ 346 0
349
  347 0
350
  348 0
351
  349 0
352
  350 0
353
+ 351 1
354
  352 0
355
  353 0
356
  354 0
 
375
  373 0
376
  374 0
377
  375 0
378
+ 376 1
379
  377 0
380
  378 0
381
  379 0
 
397
  395 0
398
  396 0
399
  397 0
400
+ 398 0
401
  399 0
402
  400 0
403
+ 401 1
404
+ 402 0
405
  403 0
406
  404 0
407
  405 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 0
423
+ 421 1
424
  422 0
425
  423 0
426
  424 0
427
+ 425 1
428
  426 0
429
  427 0
430
  428 0
 
438
  436 0
439
  437 0
440
  438 0
441
+ 439 1
442
  440 0
443
  441 0
444
  442 0
 
446
  444 0
447
  445 0
448
  446 0
449
+ 447 0
450
  448 0
451
  449 0
452
  450 0
 
454
  452 0
455
  453 0
456
  454 0
457
+ 455 1
458
  456 0
459
  457 0
460
  458 0
 
465
  463 0
466
  464 0
467
  465 0
468
+ 466 1
469
  467 0
470
  468 0
471
  469 0
 
486
  484 0
487
  485 0
488
  486 0
489
+ 487 1
490
  488 0
491
  489 0
492
  490 0
493
  491 0
494
+ 492 1
495
  493 0
496
  494 0
497
  495 0
 
502
  500 0
503
  501 0
504
  502 0
505
+ 503 1
506
  504 0
507
  505 0
508
  506 0
 
510
  508 0
511
  509 0
512
  510 0
513
+ 511 0
514
  512 0
515
  513 0
516
  514 0
 
521
  519 0
522
  520 0
523
  521 0
524
+ 522 1
525
  523 0
526
+ 524 1
527
  525 0
528
  526 0
529
  527 0
 
533
  531 0
534
  532 0
535
  533 0
536
+ 534 1
537
  535 0
538
+ 536 0
539
  537 0
540
  538 0
541
  539 0
 
559
  557 0
560
  558 0
561
  559 0
562
+ 560 0
563
  561 0
564
  562 0
565
  563 0
 
587
  585 0
588
  586 0
589
  587 0
590
+ 588 1
591
  589 0
592
  590 0
593
  591 0
594
  592 0
595
  593 0
596
  594 0
597
+ 595 1
598
+ 596 1
599
  597 0
600
  598 0
601
  599 0
602
  600 0
603
+ 601 1
604
  602 0
605
  603 0
606
  604 0
 
623
  621 1
624
  622 0
625
  623 0
626
+ 624 1
627
  625 0
628
  626 0
629
  627 0
630
+ 628 1
631
  629 0
632
  630 0
633
  631 0
634
+ 632 0
635
  633 1
636
  634 0
637
+ 635 1
638
  636 0
639
  637 0
640
  638 0
 
653
  651 0
654
  652 1
655
  653 0
656
+ 654 1
657
  655 0
658
  656 0
659
  657 1
 
662
  660 0
663
  661 0
664
  662 0
665
+ 663 0
666
  664 0
667
  665 0
668
  666 0
 
702
  700 0
703
  701 0
704
  702 0
705
+ 703 0
706
  704 0
707
  705 0
708
  706 0
 
755
  753 0
756
  754 0
757
  755 0
758
+ 756 1
759
  757 0
760
  758 0
761
  759 0
 
764
  762 0
765
  763 0
766
  764 0
767
+ 765 1
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
  770 1
773
  771 0
774
+ 772 1
775
  773 0
776
  774 0
777
  775 0
 
808
  806 0
809
  807 0
810
  808 0
811
+ 809 1
812
  810 0
813
  811 0
814
  812 0
815
  813 0
816
+ 814 1
817
  815 0
818
  816 0
819
  817 0
 
831
  829 0
832
  830 0
833
  831 0
834
+ 832 1
835
  833 1
836
  834 0
837
  835 0
 
840
  838 0
841
  839 0
842
  840 0
843
+ 841 0
844
  842 0
845
  843 0
846
  844 0
 
867
  865 0
868
  866 0
869
  867 0
870
+ 868 0
871
  869 0
872
  870 0
873
  871 0
 
883
  881 0
884
  882 0
885
  883 0
886
+ 884 1
887
  885 0
888
  886 0
889
  887 0
 
893
  891 1
894
  892 0
895
  893 0
896
+ 894 1
897
  895 0
898
  896 0
899
+ 897 1
900
  898 0
901
  899 0
902
  900 0
 
905
  903 0
906
  904 0
907
  905 0
908
+ 906 1
909
  907 1
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
+ 912 1
915
  913 0
916
  914 0
917
  915 0
 
949
  947 0
950
  948 0
951
  949 0
952
+ 950 1
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
+ 955 0
958
  956 0
959
+ 957 1
960
  958 0
961
  959 0
962
  960 0
963
  961 0
964
  962 0
965
+ 963 0
966
  964 0
967
  965 0
968
  966 0
 
991
  989 0
992
  990 0
993
  991 1
994
+ 992 1
995
  993 0
996
  994 0
997
+ 995 1
998
  996 0
999
  997 0
1000
+ 998 1
1001
  999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
1005
+ 1003 1
1006
  1004 0
1007
  1005 0
1008
  1006 0
runs/May15_17-27-18_indolem-petl-vm/events.out.tfevents.1715796187.indolem-petl-vm.579186.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c10c37be125002ed98cbc0b129247fa6687b064aa0f13207c7aee099ebc758a
3
+ size 560
test_results.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "accuracy": 0.8615232443125618,
3
+ "f1": 0.8344251555846709,
4
+ "precision": 0.8325509007667684,
5
+ "recall": 0.8363917467548971
6
+ }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.06173487283655855,
4
- "train_runtime": 2892.3535,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 25.156,
7
- "train_steps_per_second": 0.844
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.35800845193081215,
4
+ "train_runtime": 2113.1391,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 34.432,
7
+ "train_steps_per_second": 1.155
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.053804397583008,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.4355,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8696741854636592,
21
- "eval_f1": 0.835906358747232,
22
- "eval_loss": 0.3243214786052704,
23
- "eval_precision": 0.853844109243139,
24
- "eval_recall": 0.8227859610838335,
25
- "eval_runtime": 4.6306,
26
- "eval_samples_per_second": 86.166,
27
- "eval_steps_per_second": 10.798,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 23.150257110595703,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.2295,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8897243107769424,
40
- "eval_f1": 0.8701248742380304,
41
- "eval_loss": 0.3046626150608063,
42
- "eval_precision": 0.8624507874015748,
43
- "eval_recall": 0.8794780869248955,
44
- "eval_runtime": 4.9587,
45
- "eval_samples_per_second": 80.464,
46
- "eval_steps_per_second": 10.083,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 0.44390636682510376,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.1337,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.899749373433584,
59
- "eval_f1": 0.879667048676036,
60
- "eval_loss": 0.3747338354587555,
61
- "eval_precision": 0.8778361344537815,
62
- "eval_recall": 0.8815693762502272,
63
- "eval_runtime": 4.996,
64
- "eval_samples_per_second": 79.865,
65
- "eval_steps_per_second": 10.008,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 15.949886322021484,
71
  "learning_rate": 4e-05,
72
- "loss": 0.1038,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8822055137844611,
78
- "eval_f1": 0.8651222336500356,
79
- "eval_loss": 0.41882890462875366,
80
- "eval_precision": 0.8518339768339769,
81
- "eval_recall": 0.8866612111292962,
82
- "eval_runtime": 5.018,
83
- "eval_samples_per_second": 79.514,
84
- "eval_steps_per_second": 9.964,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.7783217430114746,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.072,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8872180451127819,
97
- "eval_f1": 0.8622036668943447,
98
- "eval_loss": 0.6270534992218018,
99
- "eval_precision": 0.8671602787456446,
100
- "eval_recall": 0.8577014002545917,
101
- "eval_runtime": 4.9576,
102
- "eval_samples_per_second": 80.483,
103
- "eval_steps_per_second": 10.086,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 44.57243347167969,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.0462,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8897243107769424,
116
- "eval_f1": 0.8695225637671682,
117
- "eval_loss": 0.6129250526428223,
118
- "eval_precision": 0.8631532846715328,
119
- "eval_recall": 0.8769776322967813,
120
- "eval_runtime": 4.9521,
121
- "eval_samples_per_second": 80.572,
122
- "eval_steps_per_second": 10.097,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.024074144661426544,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.0459,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8897243107769424,
135
- "eval_f1": 0.8649122807017544,
136
- "eval_loss": 0.5890637636184692,
137
- "eval_precision": 0.8710116366366366,
138
- "eval_recall": 0.8594744498999818,
139
- "eval_runtime": 4.9483,
140
- "eval_samples_per_second": 80.633,
141
- "eval_steps_per_second": 10.104,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 0.022918157279491425,
147
  "learning_rate": 3e-05,
148
- "loss": 0.0391,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8872180451127819,
154
- "eval_f1": 0.8680720368560659,
155
- "eval_loss": 0.5972921252250671,
156
- "eval_precision": 0.8587217615098657,
157
- "eval_recall": 0.8802054919076197,
158
- "eval_runtime": 4.9878,
159
- "eval_samples_per_second": 79.995,
160
- "eval_steps_per_second": 10.024,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 106.23094177246094,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.0307,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.87468671679198,
173
- "eval_f1": 0.8584865509022812,
174
- "eval_loss": 0.7086873054504395,
175
- "eval_precision": 0.8441043083900227,
176
- "eval_recall": 0.8863429714493545,
177
- "eval_runtime": 4.9636,
178
- "eval_samples_per_second": 80.386,
179
- "eval_steps_per_second": 10.073,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 0.024997469037771225,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.0199,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8972431077694235,
192
- "eval_f1": 0.8717112228173498,
193
- "eval_loss": 0.7264124155044556,
194
- "eval_precision": 0.8869295958279009,
195
- "eval_recall": 0.8597926895799237,
196
- "eval_runtime": 4.965,
197
- "eval_samples_per_second": 80.363,
198
- "eval_steps_per_second": 10.07,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.004392046481370926,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.0105,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8972431077694235,
211
- "eval_f1": 0.8757339815412664,
212
- "eval_loss": 0.6738360524177551,
213
- "eval_precision": 0.8766906299500427,
214
- "eval_recall": 0.8747954173486088,
215
- "eval_runtime": 4.9486,
216
- "eval_samples_per_second": 80.628,
217
- "eval_steps_per_second": 10.104,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 0.004026818089187145,
223
  "learning_rate": 2e-05,
224
- "loss": 0.0131,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.899749373433584,
230
- "eval_f1": 0.882467302933899,
231
- "eval_loss": 0.7488105297088623,
232
- "eval_precision": 0.8732988802756245,
233
- "eval_recall": 0.8940716493907983,
234
- "eval_runtime": 4.9589,
235
- "eval_samples_per_second": 80.462,
236
- "eval_steps_per_second": 10.083,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.004543425515294075,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.0102,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8972431077694235,
249
- "eval_f1": 0.8792560061999484,
250
- "eval_loss": 0.7154756784439087,
251
- "eval_precision": 0.8707622232472325,
252
- "eval_recall": 0.889798145117294,
253
- "eval_runtime": 4.9704,
254
- "eval_samples_per_second": 80.275,
255
- "eval_steps_per_second": 10.06,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 0.0037931231781840324,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.0061,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.9072681704260651,
268
- "eval_f1": 0.8894993300948346,
269
- "eval_loss": 0.7196279168128967,
270
- "eval_precision": 0.8850535598035154,
271
- "eval_recall": 0.8943898890707401,
272
- "eval_runtime": 4.9583,
273
- "eval_samples_per_second": 80.47,
274
- "eval_steps_per_second": 10.084,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.0027608012314885855,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.0138,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9022556390977443,
287
- "eval_f1": 0.884617951284618,
288
- "eval_loss": 0.7618029713630676,
289
- "eval_precision": 0.8772893772893773,
290
- "eval_recall": 0.8933442444080741,
291
- "eval_runtime": 5.0532,
292
- "eval_samples_per_second": 78.96,
293
- "eval_steps_per_second": 9.895,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.0022813216783106327,
299
  "learning_rate": 1e-05,
300
- "loss": 0.0075,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.9047619047619048,
306
- "eval_f1": 0.8873149414352814,
307
- "eval_loss": 0.7252941727638245,
308
- "eval_precision": 0.8806277372262774,
309
- "eval_recall": 0.8951172940534643,
310
- "eval_runtime": 5.598,
311
- "eval_samples_per_second": 71.275,
312
- "eval_steps_per_second": 8.932,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 15.638340950012207,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.0063,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.9022556390977443,
325
- "eval_f1": 0.8840781602687784,
326
- "eval_loss": 0.7560042142868042,
327
- "eval_precision": 0.87816715542522,
328
- "eval_recall": 0.89084378977996,
329
- "eval_runtime": 4.9548,
330
- "eval_samples_per_second": 80.528,
331
- "eval_steps_per_second": 10.091,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 0.002121408935636282,
337
  "learning_rate": 5e-06,
338
- "loss": 0.0066,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.9022556390977443,
344
- "eval_f1": 0.8856624319419237,
345
- "eval_loss": 0.748332142829895,
346
- "eval_precision": 0.8758364312267658,
347
- "eval_recall": 0.8983451536643026,
348
- "eval_runtime": 4.9482,
349
- "eval_samples_per_second": 80.635,
350
- "eval_steps_per_second": 10.105,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.004570267163217068,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.0023,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.9022556390977443,
363
- "eval_f1": 0.884617951284618,
364
- "eval_loss": 0.7535205483436584,
365
- "eval_precision": 0.8772893772893773,
366
- "eval_recall": 0.8933442444080741,
367
- "eval_runtime": 4.9538,
368
- "eval_samples_per_second": 80.543,
369
- "eval_steps_per_second": 10.093,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.0018295175395905972,
375
  "learning_rate": 0.0,
376
- "loss": 0.0021,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.9047619047619048,
382
- "eval_f1": 0.8878351186601172,
383
- "eval_loss": 0.7535876035690308,
384
- "eval_precision": 0.879776516905975,
385
- "eval_recall": 0.8976177486815784,
386
- "eval_runtime": 4.9557,
387
- "eval_samples_per_second": 80.514,
388
- "eval_steps_per_second": 10.089,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
- "total_flos": 7584162436176000.0,
395
- "train_loss": 0.06173487283655855,
396
- "train_runtime": 2892.3535,
397
- "train_samples_per_second": 25.156,
398
- "train_steps_per_second": 0.844
399
  }
400
  ],
401
  "logging_steps": 500,
@@ -403,7 +403,7 @@
403
  "num_input_tokens_seen": 0,
404
  "num_train_epochs": 20,
405
  "save_steps": 500,
406
- "total_flos": 7584162436176000.0,
407
  "train_batch_size": 30,
408
  "trial_name": null,
409
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 3.0131800174713135,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5623,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7167919799498746,
21
+ "eval_f1": 0.5794790005316321,
22
+ "eval_loss": 0.5053456425666809,
23
+ "eval_precision": 0.6409822866344606,
24
+ "eval_recall": 0.5796053827968721,
25
+ "eval_runtime": 5.6071,
26
+ "eval_samples_per_second": 71.159,
27
+ "eval_steps_per_second": 8.917,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 5.634490966796875,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.518,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7293233082706767,
40
+ "eval_f1": 0.599784530797236,
41
+ "eval_loss": 0.4860531687736511,
42
+ "eval_precision": 0.6673625792811839,
43
+ "eval_recall": 0.5959719949081652,
44
+ "eval_runtime": 5.7755,
45
+ "eval_samples_per_second": 69.085,
46
+ "eval_steps_per_second": 8.657,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.959808111190796,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.4835,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.7694235588972431,
59
+ "eval_f1": 0.7145034843205575,
60
+ "eval_loss": 0.45518842339515686,
61
+ "eval_precision": 0.7210824478299833,
62
+ "eval_recall": 0.7093562465902892,
63
+ "eval_runtime": 5.2584,
64
+ "eval_samples_per_second": 75.878,
65
+ "eval_steps_per_second": 9.509,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 4.635169506072998,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.4497,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.7944862155388471,
78
+ "eval_f1": 0.7520912893253319,
79
+ "eval_loss": 0.4223441481590271,
80
+ "eval_precision": 0.7520912893253319,
81
+ "eval_recall": 0.7520912893253319,
82
+ "eval_runtime": 5.0487,
83
+ "eval_samples_per_second": 79.03,
84
+ "eval_steps_per_second": 9.903,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 8.219679832458496,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.4266,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8170426065162907,
97
+ "eval_f1": 0.7740779522978476,
98
+ "eval_loss": 0.399569034576416,
99
+ "eval_precision": 0.7814051164566629,
100
+ "eval_recall": 0.7680487361338425,
101
+ "eval_runtime": 5.0767,
102
+ "eval_samples_per_second": 78.595,
103
+ "eval_steps_per_second": 9.849,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 4.150725841522217,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.3907,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8195488721804511,
116
+ "eval_f1": 0.784453781512605,
117
+ "eval_loss": 0.3830115497112274,
118
+ "eval_precision": 0.7818241274748796,
119
+ "eval_recall": 0.787324968176032,
120
+ "eval_runtime": 5.0718,
121
+ "eval_samples_per_second": 78.67,
122
+ "eval_steps_per_second": 9.858,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 3.297985076904297,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.3742,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8345864661654135,
135
+ "eval_f1": 0.798423147581139,
136
+ "eval_loss": 0.3684135675430298,
137
+ "eval_precision": 0.8016430472182685,
138
+ "eval_recall": 0.7954628114202582,
139
+ "eval_runtime": 5.0743,
140
+ "eval_samples_per_second": 78.632,
141
+ "eval_steps_per_second": 9.854,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 8.395323753356934,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.3616,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8270676691729323,
154
+ "eval_f1": 0.7967966933608887,
155
+ "eval_loss": 0.3719731867313385,
156
+ "eval_precision": 0.7902444649446494,
157
+ "eval_recall": 0.8051463902527732,
158
+ "eval_runtime": 5.0484,
159
+ "eval_samples_per_second": 79.035,
160
+ "eval_steps_per_second": 9.904,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 3.748974561691284,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.3294,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8370927318295739,
173
+ "eval_f1": 0.8076965854743632,
174
+ "eval_loss": 0.36888691782951355,
175
+ "eval_precision": 0.8018925518925519,
176
+ "eval_recall": 0.8147390434624477,
177
+ "eval_runtime": 5.0543,
178
+ "eval_samples_per_second": 78.943,
179
+ "eval_steps_per_second": 9.893,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 9.309541702270508,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.3207,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8395989974937343,
192
+ "eval_f1": 0.8110907261644079,
193
+ "eval_loss": 0.36315786838531494,
194
+ "eval_precision": 0.8046983557202408,
195
+ "eval_recall": 0.819012547735952,
196
+ "eval_runtime": 5.0709,
197
+ "eval_samples_per_second": 78.684,
198
+ "eval_steps_per_second": 9.86,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 1.2568168640136719,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.3214,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8370927318295739,
211
+ "eval_f1": 0.8085765951950401,
212
+ "eval_loss": 0.3577338457107544,
213
+ "eval_precision": 0.8017470018450185,
214
+ "eval_recall": 0.817239498090562,
215
+ "eval_runtime": 5.1071,
216
+ "eval_samples_per_second": 78.126,
217
+ "eval_steps_per_second": 9.79,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 1.915198802947998,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.3167,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8395989974937343,
230
+ "eval_f1": 0.8119476846942383,
231
+ "eval_loss": 0.36069995164871216,
232
+ "eval_precision": 0.8045650301464256,
233
+ "eval_recall": 0.8215130023640662,
234
+ "eval_runtime": 5.0598,
235
+ "eval_samples_per_second": 78.857,
236
+ "eval_steps_per_second": 9.882,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.9545631408691406,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.289,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8345864661654135,
249
+ "eval_f1": 0.8060710498409331,
250
+ "eval_loss": 0.3684280812740326,
251
+ "eval_precision": 0.7988372093023256,
252
+ "eval_recall": 0.8154664484451719,
253
+ "eval_runtime": 5.1019,
254
+ "eval_samples_per_second": 78.206,
255
+ "eval_steps_per_second": 9.8,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 5.748187065124512,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.2997,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.849624060150376,
268
+ "eval_f1": 0.8176861216035092,
269
+ "eval_loss": 0.3479882776737213,
270
+ "eval_precision": 0.8193355786895284,
271
+ "eval_recall": 0.8161029278050556,
272
+ "eval_runtime": 5.0557,
273
+ "eval_samples_per_second": 78.92,
274
+ "eval_steps_per_second": 9.89,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 4.010083198547363,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.2986,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.849624060150376,
287
+ "eval_f1": 0.821236559139785,
288
+ "eval_loss": 0.35758015513420105,
289
+ "eval_precision": 0.8169406150583245,
290
+ "eval_recall": 0.8261047463175123,
291
+ "eval_runtime": 5.0955,
292
+ "eval_samples_per_second": 78.304,
293
+ "eval_steps_per_second": 9.813,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.9220337271690369,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.2914,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.849624060150376,
306
+ "eval_f1": 0.8195005730140539,
307
+ "eval_loss": 0.34965991973876953,
308
+ "eval_precision": 0.8179621848739496,
309
+ "eval_recall": 0.8211038370612839,
310
+ "eval_runtime": 5.0617,
311
+ "eval_samples_per_second": 78.827,
312
+ "eval_steps_per_second": 9.878,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 1.7026562690734863,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.278,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8521303258145363,
325
+ "eval_f1": 0.8229427559286084,
326
+ "eval_loss": 0.3539772927761078,
327
+ "eval_precision": 0.8206541218637993,
328
+ "eval_recall": 0.8253773413347881,
329
+ "eval_runtime": 5.1199,
330
+ "eval_samples_per_second": 77.931,
331
+ "eval_steps_per_second": 9.766,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 5.839470863342285,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.2887,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8521303258145363,
344
+ "eval_f1": 0.8229427559286084,
345
+ "eval_loss": 0.35161107778549194,
346
+ "eval_precision": 0.8206541218637993,
347
+ "eval_recall": 0.8253773413347881,
348
+ "eval_runtime": 5.1154,
349
+ "eval_samples_per_second": 77.999,
350
+ "eval_steps_per_second": 9.774,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 7.782900810241699,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.2829,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8521303258145363,
363
+ "eval_f1": 0.8229427559286084,
364
+ "eval_loss": 0.35369938611984253,
365
+ "eval_precision": 0.8206541218637993,
366
+ "eval_recall": 0.8253773413347881,
367
+ "eval_runtime": 5.0565,
368
+ "eval_samples_per_second": 78.908,
369
+ "eval_steps_per_second": 9.888,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 5.059621334075928,
375
  "learning_rate": 0.0,
376
+ "loss": 0.2771,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8546365914786967,
382
+ "eval_f1": 0.8263655462184873,
383
+ "eval_loss": 0.35401326417922974,
384
+ "eval_precision": 0.8233396753671443,
385
+ "eval_recall": 0.8296508456082925,
386
+ "eval_runtime": 5.0854,
387
+ "eval_samples_per_second": 78.459,
388
+ "eval_steps_per_second": 9.832,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
+ "total_flos": 7597037114448000.0,
395
+ "train_loss": 0.35800845193081215,
396
+ "train_runtime": 2113.1391,
397
+ "train_samples_per_second": 34.432,
398
+ "train_steps_per_second": 1.155
399
  }
400
  ],
401
  "logging_steps": 500,
 
403
  "num_input_tokens_seen": 0,
404
  "num_train_epochs": 20,
405
  "save_steps": 500,
406
+ "total_flos": 7597037114448000.0,
407
  "train_batch_size": 30,
408
  "trial_name": null,
409
  "trial_params": null