apwic commited on
Commit
f6a9a12
1 Parent(s): 981615a

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.8615232443125618,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8546365914786967,
5
- "eval_f1": 0.8263655462184873,
6
- "eval_loss": 0.35401326417922974,
7
- "eval_precision": 0.8233396753671443,
8
- "eval_recall": 0.8296508456082925,
9
- "eval_runtime": 5.0488,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.029,
12
- "eval_steps_per_second": 9.903,
13
- "f1": 0.8344251555846709,
14
- "precision": 0.8325509007667684,
15
- "recall": 0.8363917467548971,
16
- "train_loss": 0.35800845193081215,
17
- "train_runtime": 2113.1391,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 34.432,
20
- "train_steps_per_second": 1.155
21
  }
 
1
  {
2
+ "accuracy": 0.9109792284866469,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.9022556390977443,
5
+ "eval_f1": 0.8817957385392532,
6
+ "eval_loss": 0.8104944229125977,
7
+ "eval_precision": 0.8827677592299257,
8
+ "eval_recall": 0.8808419712675032,
9
+ "eval_runtime": 4.7231,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 84.478,
12
+ "eval_steps_per_second": 10.586,
13
+ "f1": 0.8920886346170267,
14
+ "precision": 0.8953297623033144,
15
+ "recall": 0.8890334817436486,
16
+ "train_loss": 0.05662053943168922,
17
+ "train_runtime": 2712.8409,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 26.821,
20
+ "train_steps_per_second": 0.899
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8546365914786967,
4
- "eval_f1": 0.8263655462184873,
5
- "eval_loss": 0.35401326417922974,
6
- "eval_precision": 0.8233396753671443,
7
- "eval_recall": 0.8296508456082925,
8
- "eval_runtime": 5.0488,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.029,
11
- "eval_steps_per_second": 9.903
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9022556390977443,
4
+ "eval_f1": 0.8817957385392532,
5
+ "eval_loss": 0.8104944229125977,
6
+ "eval_precision": 0.8827677592299257,
7
+ "eval_recall": 0.8808419712675032,
8
+ "eval_runtime": 4.7231,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 84.478,
11
+ "eval_steps_per_second": 10.586
12
  }
predict_results.txt CHANGED
@@ -1,6 +1,6 @@
1
  index prediction
2
  0 1
3
- 1 0
4
  2 1
5
  3 1
6
  4 0
@@ -8,62 +8,62 @@ index prediction
8
  6 1
9
  7 1
10
  8 0
11
- 9 1
12
  10 1
13
  11 1
14
  12 1
15
  13 1
16
  14 1
17
- 15 1
18
  16 1
19
  17 1
20
  18 1
21
  19 1
22
  20 1
23
  21 1
24
- 22 0
25
  23 1
26
- 24 1
27
- 25 1
28
  26 1
29
  27 1
30
  28 1
31
- 29 0
32
  30 1
33
  31 1
34
  32 1
35
  33 1
36
- 34 0
37
  35 1
38
  36 1
39
  37 1
40
  38 1
41
  39 0
42
  40 1
43
- 41 0
44
- 42 0
45
- 43 0
46
- 44 0
47
  45 0
48
  46 1
49
  47 1
50
  48 1
51
  49 0
52
- 50 1
53
  51 1
54
  52 0
55
  53 1
56
  54 1
57
  55 1
58
- 56 0
59
  57 0
60
- 58 1
61
- 59 1
62
- 60 0
63
  61 1
64
  62 1
65
  63 1
66
- 64 0
67
  65 1
68
  66 1
69
  67 1
@@ -79,31 +79,31 @@ index prediction
79
  77 0
80
  78 1
81
  79 1
82
- 80 0
83
  81 1
84
  82 1
85
  83 1
86
  84 1
87
  85 0
88
- 86 0
89
  87 1
90
  88 1
91
  89 1
92
  90 1
93
- 91 0
94
  92 0
95
- 93 0
96
  94 1
97
  95 1
98
  96 1
99
  97 0
100
- 98 0
101
- 99 0
102
- 100 0
103
  101 0
104
  102 1
105
  103 1
106
- 104 0
107
  105 1
108
  106 1
109
  107 1
@@ -112,12 +112,12 @@ index prediction
112
  110 1
113
  111 1
114
  112 1
115
- 113 1
116
  114 1
117
  115 1
118
  116 1
119
  117 1
120
- 118 1
121
  119 1
122
  120 1
123
  121 1
@@ -133,23 +133,23 @@ index prediction
133
  131 0
134
  132 1
135
  133 1
136
- 134 1
137
- 135 0
138
  136 0
139
  137 1
140
  138 1
141
  139 1
142
  140 1
143
  141 1
144
- 142 1
145
  143 1
146
  144 1
147
  145 1
148
  146 1
149
- 147 0
150
  148 1
151
  149 1
152
- 150 1
153
  151 1
154
  152 1
155
  153 1
@@ -163,24 +163,24 @@ index prediction
163
  161 1
164
  162 1
165
  163 1
166
- 164 0
167
  165 0
168
  166 1
169
  167 1
170
- 168 0
171
- 169 0
172
  170 1
173
  171 1
174
  172 0
175
  173 0
176
- 174 0
177
  175 1
178
- 176 0
179
  177 0
180
  178 1
181
  179 1
182
  180 1
183
- 181 1
184
  182 1
185
  183 1
186
  184 1
@@ -189,7 +189,7 @@ index prediction
189
  187 1
190
  188 1
191
  189 1
192
- 190 0
193
  191 1
194
  192 1
195
  193 1
@@ -206,7 +206,7 @@ index prediction
206
  204 1
207
  205 0
208
  206 1
209
- 207 1
210
  208 1
211
  209 1
212
  210 1
@@ -217,22 +217,22 @@ index prediction
217
  215 1
218
  216 0
219
  217 0
220
- 218 0
221
  219 1
222
  220 0
223
  221 1
224
  222 1
225
  223 1
226
- 224 1
227
  225 1
228
  226 0
229
  227 0
230
- 228 0
231
- 229 0
232
  230 1
233
  231 1
234
- 232 0
235
- 233 0
236
  234 1
237
  235 1
238
  236 1
@@ -260,7 +260,7 @@ index prediction
260
  258 1
261
  259 1
262
  260 1
263
- 261 0
264
  262 1
265
  263 1
266
  264 1
@@ -292,16 +292,16 @@ index prediction
292
  290 1
293
  291 1
294
  292 1
295
- 293 0
296
  294 1
297
  295 1
298
  296 1
299
- 297 1
300
  298 0
301
  299 0
302
  300 0
303
  301 0
304
- 302 1
305
  303 0
306
  304 0
307
  305 1
@@ -313,9 +313,9 @@ index prediction
313
  311 0
314
  312 0
315
  313 0
316
- 314 1
317
  315 0
318
- 316 1
319
  317 0
320
  318 1
321
  319 0
@@ -327,7 +327,7 @@ index prediction
327
  325 0
328
  326 0
329
  327 0
330
- 328 0
331
  329 0
332
  330 0
333
  331 0
@@ -343,14 +343,14 @@ index prediction
343
  341 0
344
  342 0
345
  343 0
346
- 344 1
347
  345 0
348
- 346 0
349
  347 0
350
  348 0
351
  349 0
352
  350 0
353
- 351 1
354
  352 0
355
  353 0
356
  354 0
@@ -363,9 +363,9 @@ index prediction
363
  361 0
364
  362 0
365
  363 0
366
- 364 1
367
  365 0
368
- 366 1
369
  367 0
370
  368 0
371
  369 0
@@ -375,7 +375,7 @@ index prediction
375
  373 0
376
  374 0
377
  375 0
378
- 376 1
379
  377 0
380
  378 0
381
  379 0
@@ -391,16 +391,16 @@ index prediction
391
  389 0
392
  390 0
393
  391 0
394
- 392 0
395
  393 0
396
  394 0
397
  395 0
398
  396 0
399
  397 0
400
- 398 0
401
  399 0
402
  400 0
403
- 401 1
404
  402 0
405
  403 0
406
  404 0
@@ -419,12 +419,12 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 0
423
  421 1
424
  422 0
425
  423 0
426
  424 0
427
- 425 1
428
  426 0
429
  427 0
430
  428 0
@@ -438,7 +438,7 @@ index prediction
438
  436 0
439
  437 0
440
  438 0
441
- 439 1
442
  440 0
443
  441 0
444
  442 0
@@ -446,17 +446,17 @@ index prediction
446
  444 0
447
  445 0
448
  446 0
449
- 447 0
450
  448 0
451
  449 0
452
  450 0
453
  451 0
454
- 452 0
455
  453 0
456
  454 0
457
- 455 1
458
  456 0
459
- 457 0
460
  458 0
461
  459 0
462
  460 0
@@ -465,12 +465,12 @@ index prediction
465
  463 0
466
  464 0
467
  465 0
468
- 466 1
469
  467 0
470
  468 0
471
  469 0
472
  470 0
473
- 471 0
474
  472 0
475
  473 0
476
  474 0
@@ -486,12 +486,12 @@ index prediction
486
  484 0
487
  485 0
488
  486 0
489
- 487 1
490
  488 0
491
  489 0
492
  490 0
493
  491 0
494
- 492 1
495
  493 0
496
  494 0
497
  495 0
@@ -502,7 +502,7 @@ index prediction
502
  500 0
503
  501 0
504
  502 0
505
- 503 1
506
  504 0
507
  505 0
508
  506 0
@@ -510,7 +510,7 @@ index prediction
510
  508 0
511
  509 0
512
  510 0
513
- 511 0
514
  512 0
515
  513 0
516
  514 0
@@ -521,9 +521,9 @@ index prediction
521
  519 0
522
  520 0
523
  521 0
524
- 522 1
525
  523 0
526
- 524 1
527
  525 0
528
  526 0
529
  527 0
@@ -533,11 +533,11 @@ index prediction
533
  531 0
534
  532 0
535
  533 0
536
- 534 1
537
  535 0
538
  536 0
539
  537 0
540
- 538 0
541
  539 0
542
  540 0
543
  541 0
@@ -559,7 +559,7 @@ index prediction
559
  557 0
560
  558 0
561
  559 0
562
- 560 0
563
  561 0
564
  562 0
565
  563 0
@@ -587,27 +587,27 @@ index prediction
587
  585 0
588
  586 0
589
  587 0
590
- 588 1
591
  589 0
592
  590 0
593
  591 0
594
  592 0
595
  593 0
596
  594 0
597
- 595 1
598
- 596 1
599
  597 0
600
  598 0
601
  599 0
602
  600 0
603
- 601 1
604
  602 0
605
  603 0
606
  604 0
607
  605 0
608
  606 0
609
  607 0
610
- 608 1
611
  609 0
612
  610 1
613
  611 0
@@ -623,22 +623,22 @@ index prediction
623
  621 1
624
  622 0
625
  623 0
626
- 624 1
627
  625 0
628
  626 0
629
  627 0
630
- 628 1
631
  629 0
632
  630 0
633
  631 0
634
  632 0
635
- 633 1
636
  634 0
637
- 635 1
638
  636 0
639
  637 0
640
  638 0
641
- 639 1
642
  640 0
643
  641 0
644
  642 0
@@ -653,7 +653,7 @@ index prediction
653
  651 0
654
  652 1
655
  653 0
656
- 654 1
657
  655 0
658
  656 0
659
  657 1
@@ -668,7 +668,7 @@ index prediction
668
  666 0
669
  667 0
670
  668 0
671
- 669 1
672
  670 0
673
  671 0
674
  672 0
@@ -726,7 +726,7 @@ index prediction
726
  724 0
727
  725 0
728
  726 0
729
- 727 1
730
  728 1
731
  729 0
732
  730 0
@@ -755,7 +755,7 @@ index prediction
755
  753 0
756
  754 0
757
  755 0
758
- 756 1
759
  757 0
760
  758 0
761
  759 0
@@ -764,14 +764,14 @@ index prediction
764
  762 0
765
  763 0
766
  764 0
767
- 765 1
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
- 770 1
773
  771 0
774
- 772 1
775
  773 0
776
  774 0
777
  775 0
@@ -808,12 +808,12 @@ index prediction
808
  806 0
809
  807 0
810
  808 0
811
- 809 1
812
  810 0
813
  811 0
814
  812 0
815
  813 0
816
- 814 1
817
  815 0
818
  816 0
819
  817 0
@@ -837,10 +837,10 @@ index prediction
837
  835 0
838
  836 0
839
  837 0
840
- 838 0
841
  839 0
842
  840 0
843
- 841 0
844
  842 0
845
  843 0
846
  844 0
@@ -867,7 +867,7 @@ index prediction
867
  865 0
868
  866 0
869
  867 0
870
- 868 0
871
  869 0
872
  870 0
873
  871 0
@@ -883,7 +883,7 @@ index prediction
883
  881 0
884
  882 0
885
  883 0
886
- 884 1
887
  885 0
888
  886 0
889
  887 0
@@ -893,10 +893,10 @@ index prediction
893
  891 1
894
  892 0
895
  893 0
896
- 894 1
897
  895 0
898
  896 0
899
- 897 1
900
  898 0
901
  899 0
902
  900 0
@@ -905,13 +905,13 @@ index prediction
905
  903 0
906
  904 0
907
  905 0
908
- 906 1
909
  907 1
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
- 912 1
915
  913 0
916
  914 0
917
  915 0
@@ -933,7 +933,7 @@ index prediction
933
  931 0
934
  932 0
935
  933 0
936
- 934 0
937
  935 0
938
  936 0
939
  937 0
@@ -949,14 +949,14 @@ index prediction
949
  947 0
950
  948 0
951
  949 0
952
- 950 1
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
- 955 0
958
  956 0
959
- 957 1
960
  958 0
961
  959 0
962
  960 0
@@ -991,18 +991,18 @@ index prediction
991
  989 0
992
  990 0
993
  991 1
994
- 992 1
995
  993 0
996
  994 0
997
- 995 1
998
  996 0
999
  997 0
1000
- 998 1
1001
  999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
1005
- 1003 1
1006
  1004 0
1007
  1005 0
1008
  1006 0
 
1
  index prediction
2
  0 1
3
+ 1 1
4
  2 1
5
  3 1
6
  4 0
 
8
  6 1
9
  7 1
10
  8 0
11
+ 9 0
12
  10 1
13
  11 1
14
  12 1
15
  13 1
16
  14 1
17
+ 15 0
18
  16 1
19
  17 1
20
  18 1
21
  19 1
22
  20 1
23
  21 1
24
+ 22 1
25
  23 1
26
+ 24 0
27
+ 25 0
28
  26 1
29
  27 1
30
  28 1
31
+ 29 1
32
  30 1
33
  31 1
34
  32 1
35
  33 1
36
+ 34 1
37
  35 1
38
  36 1
39
  37 1
40
  38 1
41
  39 0
42
  40 1
43
+ 41 1
44
+ 42 1
45
+ 43 1
46
+ 44 1
47
  45 0
48
  46 1
49
  47 1
50
  48 1
51
  49 0
52
+ 50 0
53
  51 1
54
  52 0
55
  53 1
56
  54 1
57
  55 1
58
+ 56 1
59
  57 0
60
+ 58 0
61
+ 59 0
62
+ 60 1
63
  61 1
64
  62 1
65
  63 1
66
+ 64 1
67
  65 1
68
  66 1
69
  67 1
 
79
  77 0
80
  78 1
81
  79 1
82
+ 80 1
83
  81 1
84
  82 1
85
  83 1
86
  84 1
87
  85 0
88
+ 86 1
89
  87 1
90
  88 1
91
  89 1
92
  90 1
93
+ 91 1
94
  92 0
95
+ 93 1
96
  94 1
97
  95 1
98
  96 1
99
  97 0
100
+ 98 1
101
+ 99 1
102
+ 100 1
103
  101 0
104
  102 1
105
  103 1
106
+ 104 1
107
  105 1
108
  106 1
109
  107 1
 
112
  110 1
113
  111 1
114
  112 1
115
+ 113 0
116
  114 1
117
  115 1
118
  116 1
119
  117 1
120
+ 118 0
121
  119 1
122
  120 1
123
  121 1
 
133
  131 0
134
  132 1
135
  133 1
136
+ 134 0
137
+ 135 1
138
  136 0
139
  137 1
140
  138 1
141
  139 1
142
  140 1
143
  141 1
144
+ 142 0
145
  143 1
146
  144 1
147
  145 1
148
  146 1
149
+ 147 1
150
  148 1
151
  149 1
152
+ 150 0
153
  151 1
154
  152 1
155
  153 1
 
163
  161 1
164
  162 1
165
  163 1
166
+ 164 1
167
  165 0
168
  166 1
169
  167 1
170
+ 168 1
171
+ 169 1
172
  170 1
173
  171 1
174
  172 0
175
  173 0
176
+ 174 1
177
  175 1
178
+ 176 1
179
  177 0
180
  178 1
181
  179 1
182
  180 1
183
+ 181 0
184
  182 1
185
  183 1
186
  184 1
 
189
  187 1
190
  188 1
191
  189 1
192
+ 190 1
193
  191 1
194
  192 1
195
  193 1
 
206
  204 1
207
  205 0
208
  206 1
209
+ 207 0
210
  208 1
211
  209 1
212
  210 1
 
217
  215 1
218
  216 0
219
  217 0
220
+ 218 1
221
  219 1
222
  220 0
223
  221 1
224
  222 1
225
  223 1
226
+ 224 0
227
  225 1
228
  226 0
229
  227 0
230
+ 228 1
231
+ 229 1
232
  230 1
233
  231 1
234
+ 232 1
235
+ 233 1
236
  234 1
237
  235 1
238
  236 1
 
260
  258 1
261
  259 1
262
  260 1
263
+ 261 1
264
  262 1
265
  263 1
266
  264 1
 
292
  290 1
293
  291 1
294
  292 1
295
+ 293 1
296
  294 1
297
  295 1
298
  296 1
299
+ 297 0
300
  298 0
301
  299 0
302
  300 0
303
  301 0
304
+ 302 0
305
  303 0
306
  304 0
307
  305 1
 
313
  311 0
314
  312 0
315
  313 0
316
+ 314 0
317
  315 0
318
+ 316 0
319
  317 0
320
  318 1
321
  319 0
 
327
  325 0
328
  326 0
329
  327 0
330
+ 328 1
331
  329 0
332
  330 0
333
  331 0
 
343
  341 0
344
  342 0
345
  343 0
346
+ 344 0
347
  345 0
348
+ 346 1
349
  347 0
350
  348 0
351
  349 0
352
  350 0
353
+ 351 0
354
  352 0
355
  353 0
356
  354 0
 
363
  361 0
364
  362 0
365
  363 0
366
+ 364 0
367
  365 0
368
+ 366 0
369
  367 0
370
  368 0
371
  369 0
 
375
  373 0
376
  374 0
377
  375 0
378
+ 376 0
379
  377 0
380
  378 0
381
  379 0
 
391
  389 0
392
  390 0
393
  391 0
394
+ 392 1
395
  393 0
396
  394 0
397
  395 0
398
  396 0
399
  397 0
400
+ 398 1
401
  399 0
402
  400 0
403
+ 401 0
404
  402 0
405
  403 0
406
  404 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 1
423
  421 1
424
  422 0
425
  423 0
426
  424 0
427
+ 425 0
428
  426 0
429
  427 0
430
  428 0
 
438
  436 0
439
  437 0
440
  438 0
441
+ 439 0
442
  440 0
443
  441 0
444
  442 0
 
446
  444 0
447
  445 0
448
  446 0
449
+ 447 1
450
  448 0
451
  449 0
452
  450 0
453
  451 0
454
+ 452 1
455
  453 0
456
  454 0
457
+ 455 0
458
  456 0
459
+ 457 1
460
  458 0
461
  459 0
462
  460 0
 
465
  463 0
466
  464 0
467
  465 0
468
+ 466 0
469
  467 0
470
  468 0
471
  469 0
472
  470 0
473
+ 471 1
474
  472 0
475
  473 0
476
  474 0
 
486
  484 0
487
  485 0
488
  486 0
489
+ 487 0
490
  488 0
491
  489 0
492
  490 0
493
  491 0
494
+ 492 0
495
  493 0
496
  494 0
497
  495 0
 
502
  500 0
503
  501 0
504
  502 0
505
+ 503 0
506
  504 0
507
  505 0
508
  506 0
 
510
  508 0
511
  509 0
512
  510 0
513
+ 511 1
514
  512 0
515
  513 0
516
  514 0
 
521
  519 0
522
  520 0
523
  521 0
524
+ 522 0
525
  523 0
526
+ 524 0
527
  525 0
528
  526 0
529
  527 0
 
533
  531 0
534
  532 0
535
  533 0
536
+ 534 0
537
  535 0
538
  536 0
539
  537 0
540
+ 538 1
541
  539 0
542
  540 0
543
  541 0
 
559
  557 0
560
  558 0
561
  559 0
562
+ 560 1
563
  561 0
564
  562 0
565
  563 0
 
587
  585 0
588
  586 0
589
  587 0
590
+ 588 0
591
  589 0
592
  590 0
593
  591 0
594
  592 0
595
  593 0
596
  594 0
597
+ 595 0
598
+ 596 0
599
  597 0
600
  598 0
601
  599 0
602
  600 0
603
+ 601 0
604
  602 0
605
  603 0
606
  604 0
607
  605 0
608
  606 0
609
  607 0
610
+ 608 0
611
  609 0
612
  610 1
613
  611 0
 
623
  621 1
624
  622 0
625
  623 0
626
+ 624 0
627
  625 0
628
  626 0
629
  627 0
630
+ 628 0
631
  629 0
632
  630 0
633
  631 0
634
  632 0
635
+ 633 0
636
  634 0
637
+ 635 0
638
  636 0
639
  637 0
640
  638 0
641
+ 639 0
642
  640 0
643
  641 0
644
  642 0
 
653
  651 0
654
  652 1
655
  653 0
656
+ 654 0
657
  655 0
658
  656 0
659
  657 1
 
668
  666 0
669
  667 0
670
  668 0
671
+ 669 0
672
  670 0
673
  671 0
674
  672 0
 
726
  724 0
727
  725 0
728
  726 0
729
+ 727 0
730
  728 1
731
  729 0
732
  730 0
 
755
  753 0
756
  754 0
757
  755 0
758
+ 756 0
759
  757 0
760
  758 0
761
  759 0
 
764
  762 0
765
  763 0
766
  764 0
767
+ 765 0
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
+ 770 0
773
  771 0
774
+ 772 0
775
  773 0
776
  774 0
777
  775 0
 
808
  806 0
809
  807 0
810
  808 0
811
+ 809 0
812
  810 0
813
  811 0
814
  812 0
815
  813 0
816
+ 814 0
817
  815 0
818
  816 0
819
  817 0
 
837
  835 0
838
  836 0
839
  837 0
840
+ 838 1
841
  839 0
842
  840 0
843
+ 841 1
844
  842 0
845
  843 0
846
  844 0
 
867
  865 0
868
  866 0
869
  867 0
870
+ 868 1
871
  869 0
872
  870 0
873
  871 0
 
883
  881 0
884
  882 0
885
  883 0
886
+ 884 0
887
  885 0
888
  886 0
889
  887 0
 
893
  891 1
894
  892 0
895
  893 0
896
+ 894 0
897
  895 0
898
  896 0
899
+ 897 0
900
  898 0
901
  899 0
902
  900 0
 
905
  903 0
906
  904 0
907
  905 0
908
+ 906 0
909
  907 1
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
+ 912 0
915
  913 0
916
  914 0
917
  915 0
 
933
  931 0
934
  932 0
935
  933 0
936
+ 934 1
937
  935 0
938
  936 0
939
  937 0
 
949
  947 0
950
  948 0
951
  949 0
952
+ 950 0
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
+ 955 1
958
  956 0
959
+ 957 0
960
  958 0
961
  959 0
962
  960 0
 
991
  989 0
992
  990 0
993
  991 1
994
+ 992 0
995
  993 0
996
  994 0
997
+ 995 0
998
  996 0
999
  997 0
1000
+ 998 0
1001
  999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
1005
+ 1003 0
1006
  1004 0
1007
  1005 0
1008
  1006 0
runs/May16_05-22-17_indolem-petl-vm/events.out.tfevents.1715839711.indolem-petl-vm.755698.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edc15e198501063b589387ed23d37f34dcd5b85d6a545bd80fa0ff8c476b4cff
3
+ size 560
test_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.8615232443125618,
3
- "f1": 0.8344251555846709,
4
- "precision": 0.8325509007667684,
5
- "recall": 0.8363917467548971
6
  }
 
1
  {
2
+ "accuracy": 0.9109792284866469,
3
+ "f1": 0.8920886346170267,
4
+ "precision": 0.8953297623033144,
5
+ "recall": 0.8890334817436486
6
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.35800845193081215,
4
- "train_runtime": 2113.1391,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 34.432,
7
- "train_steps_per_second": 1.155
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.05662053943168922,
4
+ "train_runtime": 2712.8409,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 26.821,
7
+ "train_steps_per_second": 0.899
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 3.0131800174713135,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5623,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7167919799498746,
21
- "eval_f1": 0.5794790005316321,
22
- "eval_loss": 0.5053456425666809,
23
- "eval_precision": 0.6409822866344606,
24
- "eval_recall": 0.5796053827968721,
25
- "eval_runtime": 5.6071,
26
- "eval_samples_per_second": 71.159,
27
- "eval_steps_per_second": 8.917,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 5.634490966796875,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.518,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7293233082706767,
40
- "eval_f1": 0.599784530797236,
41
- "eval_loss": 0.4860531687736511,
42
- "eval_precision": 0.6673625792811839,
43
- "eval_recall": 0.5959719949081652,
44
- "eval_runtime": 5.7755,
45
- "eval_samples_per_second": 69.085,
46
- "eval_steps_per_second": 8.657,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.959808111190796,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.4835,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.7694235588972431,
59
- "eval_f1": 0.7145034843205575,
60
- "eval_loss": 0.45518842339515686,
61
- "eval_precision": 0.7210824478299833,
62
- "eval_recall": 0.7093562465902892,
63
- "eval_runtime": 5.2584,
64
- "eval_samples_per_second": 75.878,
65
- "eval_steps_per_second": 9.509,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 4.635169506072998,
71
  "learning_rate": 4e-05,
72
- "loss": 0.4497,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.7944862155388471,
78
- "eval_f1": 0.7520912893253319,
79
- "eval_loss": 0.4223441481590271,
80
- "eval_precision": 0.7520912893253319,
81
- "eval_recall": 0.7520912893253319,
82
- "eval_runtime": 5.0487,
83
- "eval_samples_per_second": 79.03,
84
- "eval_steps_per_second": 9.903,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 8.219679832458496,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.4266,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8170426065162907,
97
- "eval_f1": 0.7740779522978476,
98
- "eval_loss": 0.399569034576416,
99
- "eval_precision": 0.7814051164566629,
100
- "eval_recall": 0.7680487361338425,
101
- "eval_runtime": 5.0767,
102
- "eval_samples_per_second": 78.595,
103
- "eval_steps_per_second": 9.849,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 4.150725841522217,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.3907,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8195488721804511,
116
- "eval_f1": 0.784453781512605,
117
- "eval_loss": 0.3830115497112274,
118
- "eval_precision": 0.7818241274748796,
119
- "eval_recall": 0.787324968176032,
120
- "eval_runtime": 5.0718,
121
- "eval_samples_per_second": 78.67,
122
- "eval_steps_per_second": 9.858,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 3.297985076904297,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.3742,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8345864661654135,
135
- "eval_f1": 0.798423147581139,
136
- "eval_loss": 0.3684135675430298,
137
- "eval_precision": 0.8016430472182685,
138
- "eval_recall": 0.7954628114202582,
139
- "eval_runtime": 5.0743,
140
- "eval_samples_per_second": 78.632,
141
- "eval_steps_per_second": 9.854,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 8.395323753356934,
147
  "learning_rate": 3e-05,
148
- "loss": 0.3616,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8270676691729323,
154
- "eval_f1": 0.7967966933608887,
155
- "eval_loss": 0.3719731867313385,
156
- "eval_precision": 0.7902444649446494,
157
- "eval_recall": 0.8051463902527732,
158
- "eval_runtime": 5.0484,
159
- "eval_samples_per_second": 79.035,
160
- "eval_steps_per_second": 9.904,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 3.748974561691284,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.3294,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8370927318295739,
173
- "eval_f1": 0.8076965854743632,
174
- "eval_loss": 0.36888691782951355,
175
- "eval_precision": 0.8018925518925519,
176
- "eval_recall": 0.8147390434624477,
177
- "eval_runtime": 5.0543,
178
- "eval_samples_per_second": 78.943,
179
- "eval_steps_per_second": 9.893,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 9.309541702270508,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.3207,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8395989974937343,
192
- "eval_f1": 0.8110907261644079,
193
- "eval_loss": 0.36315786838531494,
194
- "eval_precision": 0.8046983557202408,
195
- "eval_recall": 0.819012547735952,
196
- "eval_runtime": 5.0709,
197
- "eval_samples_per_second": 78.684,
198
- "eval_steps_per_second": 9.86,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 1.2568168640136719,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.3214,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8370927318295739,
211
- "eval_f1": 0.8085765951950401,
212
- "eval_loss": 0.3577338457107544,
213
- "eval_precision": 0.8017470018450185,
214
- "eval_recall": 0.817239498090562,
215
- "eval_runtime": 5.1071,
216
- "eval_samples_per_second": 78.126,
217
- "eval_steps_per_second": 9.79,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 1.915198802947998,
223
  "learning_rate": 2e-05,
224
- "loss": 0.3167,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8395989974937343,
230
- "eval_f1": 0.8119476846942383,
231
- "eval_loss": 0.36069995164871216,
232
- "eval_precision": 0.8045650301464256,
233
- "eval_recall": 0.8215130023640662,
234
- "eval_runtime": 5.0598,
235
- "eval_samples_per_second": 78.857,
236
- "eval_steps_per_second": 9.882,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 1.9545631408691406,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.289,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8345864661654135,
249
- "eval_f1": 0.8060710498409331,
250
- "eval_loss": 0.3684280812740326,
251
- "eval_precision": 0.7988372093023256,
252
- "eval_recall": 0.8154664484451719,
253
- "eval_runtime": 5.1019,
254
- "eval_samples_per_second": 78.206,
255
- "eval_steps_per_second": 9.8,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 5.748187065124512,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.2997,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.849624060150376,
268
- "eval_f1": 0.8176861216035092,
269
- "eval_loss": 0.3479882776737213,
270
- "eval_precision": 0.8193355786895284,
271
- "eval_recall": 0.8161029278050556,
272
- "eval_runtime": 5.0557,
273
- "eval_samples_per_second": 78.92,
274
- "eval_steps_per_second": 9.89,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 4.010083198547363,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.2986,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.849624060150376,
287
- "eval_f1": 0.821236559139785,
288
- "eval_loss": 0.35758015513420105,
289
- "eval_precision": 0.8169406150583245,
290
- "eval_recall": 0.8261047463175123,
291
- "eval_runtime": 5.0955,
292
- "eval_samples_per_second": 78.304,
293
- "eval_steps_per_second": 9.813,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.9220337271690369,
299
  "learning_rate": 1e-05,
300
- "loss": 0.2914,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.849624060150376,
306
- "eval_f1": 0.8195005730140539,
307
- "eval_loss": 0.34965991973876953,
308
- "eval_precision": 0.8179621848739496,
309
- "eval_recall": 0.8211038370612839,
310
- "eval_runtime": 5.0617,
311
- "eval_samples_per_second": 78.827,
312
- "eval_steps_per_second": 9.878,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 1.7026562690734863,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.278,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8521303258145363,
325
- "eval_f1": 0.8229427559286084,
326
- "eval_loss": 0.3539772927761078,
327
- "eval_precision": 0.8206541218637993,
328
- "eval_recall": 0.8253773413347881,
329
- "eval_runtime": 5.1199,
330
- "eval_samples_per_second": 77.931,
331
- "eval_steps_per_second": 9.766,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 5.839470863342285,
337
  "learning_rate": 5e-06,
338
- "loss": 0.2887,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8521303258145363,
344
- "eval_f1": 0.8229427559286084,
345
- "eval_loss": 0.35161107778549194,
346
- "eval_precision": 0.8206541218637993,
347
- "eval_recall": 0.8253773413347881,
348
- "eval_runtime": 5.1154,
349
- "eval_samples_per_second": 77.999,
350
- "eval_steps_per_second": 9.774,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 7.782900810241699,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.2829,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8521303258145363,
363
- "eval_f1": 0.8229427559286084,
364
- "eval_loss": 0.35369938611984253,
365
- "eval_precision": 0.8206541218637993,
366
- "eval_recall": 0.8253773413347881,
367
- "eval_runtime": 5.0565,
368
- "eval_samples_per_second": 78.908,
369
- "eval_steps_per_second": 9.888,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 5.059621334075928,
375
  "learning_rate": 0.0,
376
- "loss": 0.2771,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8546365914786967,
382
- "eval_f1": 0.8263655462184873,
383
- "eval_loss": 0.35401326417922974,
384
- "eval_precision": 0.8233396753671443,
385
- "eval_recall": 0.8296508456082925,
386
- "eval_runtime": 5.0854,
387
- "eval_samples_per_second": 78.459,
388
- "eval_steps_per_second": 9.832,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
- "total_flos": 7597037114448000.0,
395
- "train_loss": 0.35800845193081215,
396
- "train_runtime": 2113.1391,
397
- "train_samples_per_second": 34.432,
398
- "train_steps_per_second": 1.155
399
  }
400
  ],
401
  "logging_steps": 500,
@@ -403,7 +403,7 @@
403
  "num_input_tokens_seen": 0,
404
  "num_train_epochs": 20,
405
  "save_steps": 500,
406
- "total_flos": 7597037114448000.0,
407
  "train_batch_size": 30,
408
  "trial_name": null,
409
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 21.900392532348633,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.4267,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8796992481203008,
21
+ "eval_f1": 0.8409196624360422,
22
+ "eval_loss": 0.35864609479904175,
23
+ "eval_precision": 0.8892469089546646,
24
+ "eval_recall": 0.8148754318967084,
25
+ "eval_runtime": 4.9024,
26
+ "eval_samples_per_second": 81.389,
27
+ "eval_steps_per_second": 10.199,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 23.599449157714844,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.2234,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8696741854636592,
40
+ "eval_f1": 0.8539284708532808,
41
+ "eval_loss": 0.36683306097984314,
42
+ "eval_precision": 0.8394707327494125,
43
+ "eval_recall": 0.8852973267866885,
44
+ "eval_runtime": 4.9237,
45
+ "eval_samples_per_second": 81.037,
46
+ "eval_steps_per_second": 10.155,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 8.989884376525879,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.126,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8922305764411027,
59
+ "eval_f1": 0.8755702215614461,
60
+ "eval_loss": 0.4554330110549927,
61
+ "eval_precision": 0.8632157235517781,
62
+ "eval_recall": 0.8937534097108566,
63
+ "eval_runtime": 4.9271,
64
+ "eval_samples_per_second": 80.981,
65
+ "eval_steps_per_second": 10.148,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 15.863175392150879,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.0886,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.9072681704260651,
78
+ "eval_f1": 0.8854915648632926,
79
+ "eval_loss": 0.44412538409233093,
80
+ "eval_precision": 0.8956662848415425,
81
+ "eval_recall": 0.8768867066739408,
82
+ "eval_runtime": 4.9398,
83
+ "eval_samples_per_second": 80.773,
84
+ "eval_steps_per_second": 10.122,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 0.03718271106481552,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.0611,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.9047619047619048,
97
+ "eval_f1": 0.8839406001224739,
98
+ "eval_loss": 0.4922772943973541,
99
+ "eval_precision": 0.8880654743486602,
100
+ "eval_recall": 0.880114566284779,
101
+ "eval_runtime": 4.9142,
102
+ "eval_samples_per_second": 81.193,
103
+ "eval_steps_per_second": 10.175,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 0.006236851681023836,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.0366,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.899749373433584,
116
+ "eval_f1": 0.8813841488792438,
117
+ "eval_loss": 0.6796092391014099,
118
+ "eval_precision": 0.8748029197080291,
119
+ "eval_recall": 0.8890707401345699,
120
+ "eval_runtime": 4.9691,
121
+ "eval_samples_per_second": 80.296,
122
+ "eval_steps_per_second": 10.062,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.006574722938239574,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.0358,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.9047619047619048,
135
+ "eval_f1": 0.8820775261324042,
136
+ "eval_loss": 0.5746134519577026,
137
+ "eval_precision": 0.8934835488413775,
138
+ "eval_recall": 0.8726132024004365,
139
+ "eval_runtime": 4.9771,
140
+ "eval_samples_per_second": 80.167,
141
+ "eval_steps_per_second": 10.046,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 0.0049128723330795765,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.0272,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8947368421052632,
154
+ "eval_f1": 0.8736504011098378,
155
+ "eval_loss": 0.5952700972557068,
156
+ "eval_precision": 0.8718487394957983,
157
+ "eval_recall": 0.8755228223313329,
158
+ "eval_runtime": 4.9556,
159
+ "eval_samples_per_second": 80.515,
160
+ "eval_steps_per_second": 10.09,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 0.006197785492986441,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.0231,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.899749373433584,
173
+ "eval_f1": 0.8751876876876876,
174
+ "eval_loss": 0.6506014466285706,
175
+ "eval_precision": 0.8891156462585035,
176
+ "eval_recall": 0.864066193853428,
177
+ "eval_runtime": 4.9505,
178
+ "eval_samples_per_second": 80.598,
179
+ "eval_steps_per_second": 10.1,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 0.009196682833135128,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.0141,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.9022556390977443,
192
+ "eval_f1": 0.8823853973772722,
193
+ "eval_loss": 0.6854431629180908,
194
+ "eval_precision": 0.8814464081066409,
195
+ "eval_recall": 0.8833424258956174,
196
+ "eval_runtime": 4.9291,
197
+ "eval_samples_per_second": 80.948,
198
+ "eval_steps_per_second": 10.144,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 0.01001653354614973,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.023,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.9022556390977443,
211
+ "eval_f1": 0.8823853973772722,
212
+ "eval_loss": 0.7218220829963684,
213
+ "eval_precision": 0.8814464081066409,
214
+ "eval_recall": 0.8833424258956174,
215
+ "eval_runtime": 4.9437,
216
+ "eval_samples_per_second": 80.709,
217
+ "eval_steps_per_second": 10.114,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 0.0031623237300664186,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.0067,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.9022556390977443,
230
+ "eval_f1": 0.8823853973772722,
231
+ "eval_loss": 0.7694610357284546,
232
+ "eval_precision": 0.8814464081066409,
233
+ "eval_recall": 0.8833424258956174,
234
+ "eval_runtime": 4.9712,
235
+ "eval_samples_per_second": 80.263,
236
+ "eval_steps_per_second": 10.058,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.002341507002711296,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.0064,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8796992481203008,
249
+ "eval_f1": 0.8602043795620438,
250
+ "eval_loss": 0.9004446268081665,
251
+ "eval_precision": 0.849624060150376,
252
+ "eval_recall": 0.8748863429714493,
253
+ "eval_runtime": 4.9523,
254
+ "eval_samples_per_second": 80.569,
255
+ "eval_steps_per_second": 10.096,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 0.0026841196231544018,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.0103,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.9022556390977443,
268
+ "eval_f1": 0.8835263209107715,
269
+ "eval_loss": 0.7978267073631287,
270
+ "eval_precision": 0.8791501449961532,
271
+ "eval_recall": 0.8883433351518457,
272
+ "eval_runtime": 4.9622,
273
+ "eval_samples_per_second": 80.407,
274
+ "eval_steps_per_second": 10.076,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.0031805976759642363,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.0072,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.899749373433584,
287
+ "eval_f1": 0.8790689216221131,
288
+ "eval_loss": 0.8251467943191528,
289
+ "eval_precision": 0.8790689216221131,
290
+ "eval_recall": 0.8790689216221131,
291
+ "eval_runtime": 4.9558,
292
+ "eval_samples_per_second": 80.512,
293
+ "eval_steps_per_second": 10.089,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.0014117677928879857,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.0054,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.9022556390977443,
306
+ "eval_f1": 0.8823853973772722,
307
+ "eval_loss": 0.7715300917625427,
308
+ "eval_precision": 0.8814464081066409,
309
+ "eval_recall": 0.8833424258956174,
310
+ "eval_runtime": 4.96,
311
+ "eval_samples_per_second": 80.444,
312
+ "eval_steps_per_second": 10.081,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.0014807094121351838,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.0038,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.9072681704260651,
325
+ "eval_f1": 0.8867007927797945,
326
+ "eval_loss": 0.7821339964866638,
327
+ "eval_precision": 0.89198606271777,
328
+ "eval_recall": 0.8818876159301692,
329
+ "eval_runtime": 4.9491,
330
+ "eval_samples_per_second": 80.62,
331
+ "eval_steps_per_second": 10.103,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.0014356797328218818,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.0021,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8972431077694235,
344
+ "eval_f1": 0.8763538792940554,
345
+ "eval_loss": 0.8211472630500793,
346
+ "eval_precision": 0.8754297605404427,
347
+ "eval_recall": 0.877295871976723,
348
+ "eval_runtime": 4.9631,
349
+ "eval_samples_per_second": 80.393,
350
+ "eval_steps_per_second": 10.074,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.0011136590037494898,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.0022,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.899749373433584,
363
+ "eval_f1": 0.8790689216221131,
364
+ "eval_loss": 0.8161725401878357,
365
+ "eval_precision": 0.8790689216221131,
366
+ "eval_recall": 0.8790689216221131,
367
+ "eval_runtime": 4.9339,
368
+ "eval_samples_per_second": 80.869,
369
+ "eval_steps_per_second": 10.134,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.0013613783521577716,
375
  "learning_rate": 0.0,
376
+ "loss": 0.0027,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.9022556390977443,
382
+ "eval_f1": 0.8817957385392532,
383
+ "eval_loss": 0.8104944229125977,
384
+ "eval_precision": 0.8827677592299257,
385
+ "eval_recall": 0.8808419712675032,
386
+ "eval_runtime": 4.9425,
387
+ "eval_samples_per_second": 80.729,
388
+ "eval_steps_per_second": 10.116,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
+ "total_flos": 7584162436176000.0,
395
+ "train_loss": 0.05662053943168922,
396
+ "train_runtime": 2712.8409,
397
+ "train_samples_per_second": 26.821,
398
+ "train_steps_per_second": 0.899
399
  }
400
  ],
401
  "logging_steps": 500,
 
403
  "num_input_tokens_seen": 0,
404
  "num_train_epochs": 20,
405
  "save_steps": 500,
406
+ "total_flos": 7584162436176000.0,
407
  "train_batch_size": 30,
408
  "trial_name": null,
409
  "trial_params": null