apwic commited on
Commit
daff424
1 Parent(s): 5f9751f

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9109792284866469,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.9022556390977443,
5
- "eval_f1": 0.8817957385392532,
6
- "eval_loss": 0.8104944229125977,
7
- "eval_precision": 0.8827677592299257,
8
- "eval_recall": 0.8808419712675032,
9
- "eval_runtime": 4.7093,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 84.725,
12
- "eval_steps_per_second": 10.617,
13
- "f1": 0.8920886346170267,
14
- "precision": 0.8953297623033144,
15
- "recall": 0.8890334817436486,
16
- "train_loss": 0.05662053943168922,
17
- "train_runtime": 2686.8503,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 27.08,
20
- "train_steps_per_second": 0.908
21
  }
 
1
  {
2
+ "accuracy": 0.9060336300692384,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8972431077694235,
5
+ "eval_f1": 0.8792560061999484,
6
+ "eval_loss": 0.8335620164871216,
7
+ "eval_precision": 0.8707622232472325,
8
+ "eval_recall": 0.889798145117294,
9
+ "eval_runtime": 1.6549,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 241.101,
12
+ "eval_steps_per_second": 30.213,
13
+ "f1": 0.8885945244345052,
14
+ "precision": 0.8834872799509323,
15
+ "recall": 0.8943164810753316,
16
+ "train_loss": 0.05526667458356404,
17
+ "train_runtime": 862.9394,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 84.316,
20
+ "train_steps_per_second": 2.828
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9022556390977443,
4
- "eval_f1": 0.8817957385392532,
5
- "eval_loss": 0.8104944229125977,
6
- "eval_precision": 0.8827677592299257,
7
- "eval_recall": 0.8808419712675032,
8
- "eval_runtime": 4.7093,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 84.725,
11
- "eval_steps_per_second": 10.617
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8972431077694235,
4
+ "eval_f1": 0.8792560061999484,
5
+ "eval_loss": 0.8335620164871216,
6
+ "eval_precision": 0.8707622232472325,
7
+ "eval_recall": 0.889798145117294,
8
+ "eval_runtime": 1.6549,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 241.101,
11
+ "eval_steps_per_second": 30.213
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9109792284866469,
3
- "f1": 0.8920886346170267,
4
- "precision": 0.8953297623033144,
5
- "recall": 0.8890334817436486
6
  }
 
1
  {
2
+ "accuracy": 0.9060336300692384,
3
+ "f1": 0.8885945244345052,
4
+ "precision": 0.8834872799509323,
5
+ "recall": 0.8943164810753316
6
  }
predict_results.txt CHANGED
@@ -1,14 +1,14 @@
1
  index prediction
2
  0 1
3
- 1 1
4
  2 1
5
  3 1
6
- 4 0
7
  5 1
8
  6 1
9
  7 1
10
  8 0
11
- 9 0
12
  10 1
13
  11 1
14
  12 1
@@ -24,7 +24,7 @@ index prediction
24
  22 1
25
  23 1
26
  24 0
27
- 25 0
28
  26 1
29
  27 1
30
  28 1
@@ -33,23 +33,23 @@ index prediction
33
  31 1
34
  32 1
35
  33 1
36
- 34 1
37
  35 1
38
  36 1
39
  37 1
40
- 38 1
41
- 39 0
42
  40 1
43
  41 1
44
  42 1
45
  43 1
46
  44 1
47
- 45 0
48
  46 1
49
  47 1
50
  48 1
51
  49 0
52
- 50 0
53
  51 1
54
  52 0
55
  53 1
@@ -58,7 +58,7 @@ index prediction
58
  56 1
59
  57 0
60
  58 0
61
- 59 0
62
  60 1
63
  61 1
64
  62 1
@@ -76,10 +76,10 @@ index prediction
76
  74 1
77
  75 1
78
  76 1
79
- 77 0
80
  78 1
81
  79 1
82
- 80 1
83
  81 1
84
  82 1
85
  83 1
@@ -96,11 +96,11 @@ index prediction
96
  94 1
97
  95 1
98
  96 1
99
- 97 0
100
  98 1
101
- 99 1
102
  100 1
103
- 101 0
104
  102 1
105
  103 1
106
  104 1
@@ -112,12 +112,12 @@ index prediction
112
  110 1
113
  111 1
114
  112 1
115
- 113 0
116
  114 1
117
  115 1
118
  116 1
119
  117 1
120
- 118 0
121
  119 1
122
  120 1
123
  121 1
@@ -141,7 +141,7 @@ index prediction
141
  139 1
142
  140 1
143
  141 1
144
- 142 0
145
  143 1
146
  144 1
147
  145 1
@@ -149,7 +149,7 @@ index prediction
149
  147 1
150
  148 1
151
  149 1
152
- 150 0
153
  151 1
154
  152 1
155
  153 1
@@ -167,11 +167,11 @@ index prediction
167
  165 0
168
  166 1
169
  167 1
170
- 168 1
171
  169 1
172
- 170 1
173
  171 1
174
- 172 0
175
  173 0
176
  174 1
177
  175 1
@@ -204,7 +204,7 @@ index prediction
204
  202 1
205
  203 1
206
  204 1
207
- 205 0
208
  206 1
209
  207 0
210
  208 1
@@ -216,7 +216,7 @@ index prediction
216
  214 0
217
  215 1
218
  216 0
219
- 217 0
220
  218 1
221
  219 1
222
  220 0
@@ -228,8 +228,8 @@ index prediction
228
  226 0
229
  227 0
230
  228 1
231
- 229 1
232
- 230 1
233
  231 1
234
  232 1
235
  233 1
@@ -272,7 +272,7 @@ index prediction
272
  270 1
273
  271 1
274
  272 1
275
- 273 0
276
  274 1
277
  275 1
278
  276 1
@@ -365,7 +365,7 @@ index prediction
365
  363 0
366
  364 0
367
  365 0
368
- 366 0
369
  367 0
370
  368 0
371
  369 0
@@ -391,7 +391,7 @@ index prediction
391
  389 0
392
  390 0
393
  391 0
394
- 392 1
395
  393 0
396
  394 0
397
  395 0
@@ -401,7 +401,7 @@ index prediction
401
  399 0
402
  400 0
403
  401 0
404
- 402 0
405
  403 0
406
  404 0
407
  405 0
@@ -420,14 +420,14 @@ index prediction
420
  418 0
421
  419 0
422
  420 1
423
- 421 1
424
  422 0
425
  423 0
426
  424 0
427
  425 0
428
  426 0
429
  427 0
430
- 428 0
431
  429 0
432
  430 0
433
  431 0
@@ -446,17 +446,17 @@ index prediction
446
  444 0
447
  445 0
448
  446 0
449
- 447 1
450
  448 0
451
  449 0
452
  450 0
453
  451 0
454
- 452 1
455
  453 0
456
  454 0
457
  455 0
458
  456 0
459
- 457 1
460
  458 0
461
  459 0
462
  460 0
@@ -510,7 +510,7 @@ index prediction
510
  508 0
511
  509 0
512
  510 0
513
- 511 1
514
  512 0
515
  513 0
516
  514 0
@@ -521,7 +521,7 @@ index prediction
521
  519 0
522
  520 0
523
  521 0
524
- 522 0
525
  523 0
526
  524 0
527
  525 0
@@ -535,7 +535,7 @@ index prediction
535
  533 0
536
  534 0
537
  535 0
538
- 536 0
539
  537 0
540
  538 1
541
  539 0
@@ -559,7 +559,7 @@ index prediction
559
  557 0
560
  558 0
561
  559 0
562
- 560 1
563
  561 0
564
  562 0
565
  563 0
@@ -595,7 +595,7 @@ index prediction
595
  593 0
596
  594 0
597
  595 0
598
- 596 0
599
  597 0
600
  598 0
601
  599 0
@@ -607,9 +607,9 @@ index prediction
607
  605 0
608
  606 0
609
  607 0
610
- 608 0
611
  609 0
612
- 610 1
613
  611 0
614
  612 0
615
  613 0
@@ -625,20 +625,20 @@ index prediction
625
  623 0
626
  624 0
627
  625 0
628
- 626 0
629
  627 0
630
- 628 0
631
  629 0
632
  630 0
633
  631 0
634
  632 0
635
- 633 0
636
  634 0
637
  635 0
638
  636 0
639
  637 0
640
  638 0
641
- 639 0
642
  640 0
643
  641 0
644
  642 0
@@ -649,7 +649,7 @@ index prediction
649
  647 0
650
  648 0
651
  649 0
652
- 650 0
653
  651 0
654
  652 1
655
  653 0
@@ -668,11 +668,11 @@ index prediction
668
  666 0
669
  667 0
670
  668 0
671
- 669 0
672
  670 0
673
  671 0
674
  672 0
675
- 673 0
676
  674 0
677
  675 0
678
  676 0
@@ -702,7 +702,7 @@ index prediction
702
  700 0
703
  701 0
704
  702 0
705
- 703 0
706
  704 0
707
  705 0
708
  706 0
@@ -726,7 +726,7 @@ index prediction
726
  724 0
727
  725 0
728
  726 0
729
- 727 0
730
  728 1
731
  729 0
732
  730 0
@@ -738,7 +738,7 @@ index prediction
738
  736 0
739
  737 0
740
  738 0
741
- 739 0
742
  740 0
743
  741 0
744
  742 0
@@ -769,7 +769,7 @@ index prediction
769
  767 0
770
  768 0
771
  769 0
772
- 770 0
773
  771 0
774
  772 0
775
  773 0
@@ -837,7 +837,7 @@ index prediction
837
  835 0
838
  836 0
839
  837 0
840
- 838 1
841
  839 0
842
  840 0
843
  841 1
@@ -861,7 +861,7 @@ index prediction
861
  859 0
862
  860 0
863
  861 0
864
- 862 0
865
  863 0
866
  864 0
867
  865 0
@@ -885,7 +885,7 @@ index prediction
885
  883 0
886
  884 0
887
  885 0
888
- 886 0
889
  887 0
890
  888 0
891
  889 0
@@ -933,13 +933,13 @@ index prediction
933
  931 0
934
  932 0
935
  933 0
936
- 934 1
937
  935 0
938
  936 0
939
  937 0
940
  938 0
941
  939 0
942
- 940 0
943
  941 0
944
  942 0
945
  943 1
@@ -949,11 +949,11 @@ index prediction
949
  947 0
950
  948 0
951
  949 0
952
- 950 0
953
  951 0
954
  952 0
955
  953 0
956
- 954 0
957
  955 1
958
  956 0
959
  957 0
@@ -968,7 +968,7 @@ index prediction
968
  966 0
969
  967 0
970
  968 0
971
- 969 0
972
  970 0
973
  971 0
974
  972 0
@@ -984,13 +984,13 @@ index prediction
984
  982 0
985
  983 0
986
  984 0
987
- 985 0
988
  986 1
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
- 991 1
994
  992 0
995
  993 0
996
  994 0
 
1
  index prediction
2
  0 1
3
+ 1 0
4
  2 1
5
  3 1
6
+ 4 1
7
  5 1
8
  6 1
9
  7 1
10
  8 0
11
+ 9 1
12
  10 1
13
  11 1
14
  12 1
 
24
  22 1
25
  23 1
26
  24 0
27
+ 25 1
28
  26 1
29
  27 1
30
  28 1
 
33
  31 1
34
  32 1
35
  33 1
36
+ 34 0
37
  35 1
38
  36 1
39
  37 1
40
+ 38 0
41
+ 39 1
42
  40 1
43
  41 1
44
  42 1
45
  43 1
46
  44 1
47
+ 45 1
48
  46 1
49
  47 1
50
  48 1
51
  49 0
52
+ 50 1
53
  51 1
54
  52 0
55
  53 1
 
58
  56 1
59
  57 0
60
  58 0
61
+ 59 1
62
  60 1
63
  61 1
64
  62 1
 
76
  74 1
77
  75 1
78
  76 1
79
+ 77 1
80
  78 1
81
  79 1
82
+ 80 0
83
  81 1
84
  82 1
85
  83 1
 
96
  94 1
97
  95 1
98
  96 1
99
+ 97 1
100
  98 1
101
+ 99 0
102
  100 1
103
+ 101 1
104
  102 1
105
  103 1
106
  104 1
 
112
  110 1
113
  111 1
114
  112 1
115
+ 113 1
116
  114 1
117
  115 1
118
  116 1
119
  117 1
120
+ 118 1
121
  119 1
122
  120 1
123
  121 1
 
141
  139 1
142
  140 1
143
  141 1
144
+ 142 1
145
  143 1
146
  144 1
147
  145 1
 
149
  147 1
150
  148 1
151
  149 1
152
+ 150 1
153
  151 1
154
  152 1
155
  153 1
 
167
  165 0
168
  166 1
169
  167 1
170
+ 168 0
171
  169 1
172
+ 170 0
173
  171 1
174
+ 172 1
175
  173 0
176
  174 1
177
  175 1
 
204
  202 1
205
  203 1
206
  204 1
207
+ 205 1
208
  206 1
209
  207 0
210
  208 1
 
216
  214 0
217
  215 1
218
  216 0
219
+ 217 1
220
  218 1
221
  219 1
222
  220 0
 
228
  226 0
229
  227 0
230
  228 1
231
+ 229 0
232
+ 230 0
233
  231 1
234
  232 1
235
  233 1
 
272
  270 1
273
  271 1
274
  272 1
275
+ 273 1
276
  274 1
277
  275 1
278
  276 1
 
365
  363 0
366
  364 0
367
  365 0
368
+ 366 1
369
  367 0
370
  368 0
371
  369 0
 
391
  389 0
392
  390 0
393
  391 0
394
+ 392 0
395
  393 0
396
  394 0
397
  395 0
 
401
  399 0
402
  400 0
403
  401 0
404
+ 402 1
405
  403 0
406
  404 0
407
  405 0
 
420
  418 0
421
  419 0
422
  420 1
423
+ 421 0
424
  422 0
425
  423 0
426
  424 0
427
  425 0
428
  426 0
429
  427 0
430
+ 428 1
431
  429 0
432
  430 0
433
  431 0
 
446
  444 0
447
  445 0
448
  446 0
449
+ 447 0
450
  448 0
451
  449 0
452
  450 0
453
  451 0
454
+ 452 0
455
  453 0
456
  454 0
457
  455 0
458
  456 0
459
+ 457 0
460
  458 0
461
  459 0
462
  460 0
 
510
  508 0
511
  509 0
512
  510 0
513
+ 511 0
514
  512 0
515
  513 0
516
  514 0
 
521
  519 0
522
  520 0
523
  521 0
524
+ 522 1
525
  523 0
526
  524 0
527
  525 0
 
535
  533 0
536
  534 0
537
  535 0
538
+ 536 1
539
  537 0
540
  538 1
541
  539 0
 
559
  557 0
560
  558 0
561
  559 0
562
+ 560 0
563
  561 0
564
  562 0
565
  563 0
 
595
  593 0
596
  594 0
597
  595 0
598
+ 596 1
599
  597 0
600
  598 0
601
  599 0
 
607
  605 0
608
  606 0
609
  607 0
610
+ 608 1
611
  609 0
612
+ 610 0
613
  611 0
614
  612 0
615
  613 0
 
625
  623 0
626
  624 0
627
  625 0
628
+ 626 1
629
  627 0
630
+ 628 1
631
  629 0
632
  630 0
633
  631 0
634
  632 0
635
+ 633 1
636
  634 0
637
  635 0
638
  636 0
639
  637 0
640
  638 0
641
+ 639 1
642
  640 0
643
  641 0
644
  642 0
 
649
  647 0
650
  648 0
651
  649 0
652
+ 650 1
653
  651 0
654
  652 1
655
  653 0
 
668
  666 0
669
  667 0
670
  668 0
671
+ 669 1
672
  670 0
673
  671 0
674
  672 0
675
+ 673 1
676
  674 0
677
  675 0
678
  676 0
 
702
  700 0
703
  701 0
704
  702 0
705
+ 703 1
706
  704 0
707
  705 0
708
  706 0
 
726
  724 0
727
  725 0
728
  726 0
729
+ 727 1
730
  728 1
731
  729 0
732
  730 0
 
738
  736 0
739
  737 0
740
  738 0
741
+ 739 1
742
  740 0
743
  741 0
744
  742 0
 
769
  767 0
770
  768 0
771
  769 0
772
+ 770 1
773
  771 0
774
  772 0
775
  773 0
 
837
  835 0
838
  836 0
839
  837 0
840
+ 838 0
841
  839 0
842
  840 0
843
  841 1
 
861
  859 0
862
  860 0
863
  861 0
864
+ 862 1
865
  863 0
866
  864 0
867
  865 0
 
885
  883 0
886
  884 0
887
  885 0
888
+ 886 1
889
  887 0
890
  888 0
891
  889 0
 
933
  931 0
934
  932 0
935
  933 0
936
+ 934 0
937
  935 0
938
  936 0
939
  937 0
940
  938 0
941
  939 0
942
+ 940 1
943
  941 0
944
  942 0
945
  943 1
 
949
  947 0
950
  948 0
951
  949 0
952
+ 950 1
953
  951 0
954
  952 0
955
  953 0
956
+ 954 1
957
  955 1
958
  956 0
959
  957 0
 
968
  966 0
969
  967 0
970
  968 0
971
+ 969 1
972
  970 0
973
  971 0
974
  972 0
 
984
  982 0
985
  983 0
986
  984 0
987
+ 985 1
988
  986 1
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
+ 991 0
994
  992 0
995
  993 0
996
  994 0
runs/Jun03_09-42-28_a358b85c7679/events.out.tfevents.1717408644.a358b85c7679.12601.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc2d609c87bd801d879bee42d0a8edaf96cff90b8084f8ad411431b44b674449
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.05662053943168922,
4
- "train_runtime": 2686.8503,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 27.08,
7
- "train_steps_per_second": 0.908
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.05526667458356404,
4
+ "train_runtime": 862.9394,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 84.316,
7
+ "train_steps_per_second": 2.828
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 21.900392532348633,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.4267,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.8796992481203008,
21
- "eval_f1": 0.8409196624360422,
22
- "eval_loss": 0.35864609479904175,
23
- "eval_precision": 0.8892469089546646,
24
- "eval_recall": 0.8148754318967084,
25
- "eval_runtime": 4.8207,
26
- "eval_samples_per_second": 82.769,
27
- "eval_steps_per_second": 10.372,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 23.599449157714844,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.2234,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8696741854636592,
40
- "eval_f1": 0.8539284708532808,
41
- "eval_loss": 0.36683306097984314,
42
- "eval_precision": 0.8394707327494125,
43
- "eval_recall": 0.8852973267866885,
44
- "eval_runtime": 4.9471,
45
- "eval_samples_per_second": 80.653,
46
- "eval_steps_per_second": 10.107,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 8.989884376525879,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.126,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8922305764411027,
59
- "eval_f1": 0.8755702215614461,
60
- "eval_loss": 0.4554330110549927,
61
- "eval_precision": 0.8632157235517781,
62
- "eval_recall": 0.8937534097108566,
63
- "eval_runtime": 4.9562,
64
- "eval_samples_per_second": 80.505,
65
- "eval_steps_per_second": 10.088,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 15.863175392150879,
71
  "learning_rate": 4e-05,
72
- "loss": 0.0886,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.9072681704260651,
78
- "eval_f1": 0.8854915648632926,
79
- "eval_loss": 0.44412538409233093,
80
- "eval_precision": 0.8956662848415425,
81
- "eval_recall": 0.8768867066739408,
82
- "eval_runtime": 4.9565,
83
- "eval_samples_per_second": 80.5,
84
- "eval_steps_per_second": 10.088,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.03718271106481552,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.0611,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.9047619047619048,
97
- "eval_f1": 0.8839406001224739,
98
- "eval_loss": 0.4922772943973541,
99
- "eval_precision": 0.8880654743486602,
100
- "eval_recall": 0.880114566284779,
101
- "eval_runtime": 4.9305,
102
- "eval_samples_per_second": 80.925,
103
- "eval_steps_per_second": 10.141,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 0.006236851681023836,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.0366,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.899749373433584,
116
- "eval_f1": 0.8813841488792438,
117
- "eval_loss": 0.6796092391014099,
118
- "eval_precision": 0.8748029197080291,
119
- "eval_recall": 0.8890707401345699,
120
- "eval_runtime": 4.9528,
121
- "eval_samples_per_second": 80.56,
122
- "eval_steps_per_second": 10.095,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.006574722938239574,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.0358,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.9047619047619048,
135
- "eval_f1": 0.8820775261324042,
136
- "eval_loss": 0.5746134519577026,
137
- "eval_precision": 0.8934835488413775,
138
- "eval_recall": 0.8726132024004365,
139
- "eval_runtime": 4.962,
140
- "eval_samples_per_second": 80.411,
141
- "eval_steps_per_second": 10.077,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 0.0049128723330795765,
147
  "learning_rate": 3e-05,
148
- "loss": 0.0272,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8947368421052632,
154
- "eval_f1": 0.8736504011098378,
155
- "eval_loss": 0.5952700972557068,
156
- "eval_precision": 0.8718487394957983,
157
- "eval_recall": 0.8755228223313329,
158
- "eval_runtime": 4.9497,
159
- "eval_samples_per_second": 80.612,
160
- "eval_steps_per_second": 10.102,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 0.006197785492986441,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.0231,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.899749373433584,
173
- "eval_f1": 0.8751876876876876,
174
- "eval_loss": 0.6506014466285706,
175
- "eval_precision": 0.8891156462585035,
176
- "eval_recall": 0.864066193853428,
177
- "eval_runtime": 4.9626,
178
- "eval_samples_per_second": 80.401,
179
- "eval_steps_per_second": 10.075,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 0.009196682833135128,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.0141,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.9022556390977443,
192
- "eval_f1": 0.8823853973772722,
193
- "eval_loss": 0.6854431629180908,
194
- "eval_precision": 0.8814464081066409,
195
- "eval_recall": 0.8833424258956174,
196
- "eval_runtime": 4.945,
197
- "eval_samples_per_second": 80.688,
198
- "eval_steps_per_second": 10.111,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.01001653354614973,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.023,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.9022556390977443,
211
- "eval_f1": 0.8823853973772722,
212
- "eval_loss": 0.7218220829963684,
213
- "eval_precision": 0.8814464081066409,
214
- "eval_recall": 0.8833424258956174,
215
- "eval_runtime": 4.9612,
216
- "eval_samples_per_second": 80.424,
217
- "eval_steps_per_second": 10.078,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 0.0031623237300664186,
223
  "learning_rate": 2e-05,
224
- "loss": 0.0067,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.9022556390977443,
230
- "eval_f1": 0.8823853973772722,
231
- "eval_loss": 0.7694610357284546,
232
- "eval_precision": 0.8814464081066409,
233
- "eval_recall": 0.8833424258956174,
234
- "eval_runtime": 4.9888,
235
- "eval_samples_per_second": 79.98,
236
- "eval_steps_per_second": 10.023,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.002341507002711296,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.0064,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8796992481203008,
249
- "eval_f1": 0.8602043795620438,
250
- "eval_loss": 0.9004446268081665,
251
- "eval_precision": 0.849624060150376,
252
- "eval_recall": 0.8748863429714493,
253
- "eval_runtime": 4.9345,
254
- "eval_samples_per_second": 80.86,
255
- "eval_steps_per_second": 10.133,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 0.0026841196231544018,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.0103,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.9022556390977443,
268
- "eval_f1": 0.8835263209107715,
269
- "eval_loss": 0.7978267073631287,
270
- "eval_precision": 0.8791501449961532,
271
- "eval_recall": 0.8883433351518457,
272
- "eval_runtime": 4.9323,
273
- "eval_samples_per_second": 80.895,
274
- "eval_steps_per_second": 10.137,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.0031805976759642363,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.0072,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
  "eval_accuracy": 0.899749373433584,
287
- "eval_f1": 0.8790689216221131,
288
- "eval_loss": 0.8251467943191528,
289
- "eval_precision": 0.8790689216221131,
290
- "eval_recall": 0.8790689216221131,
291
- "eval_runtime": 4.9785,
292
- "eval_samples_per_second": 80.144,
293
- "eval_steps_per_second": 10.043,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.0014117677928879857,
299
  "learning_rate": 1e-05,
300
- "loss": 0.0054,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.9022556390977443,
306
- "eval_f1": 0.8823853973772722,
307
- "eval_loss": 0.7715300917625427,
308
- "eval_precision": 0.8814464081066409,
309
- "eval_recall": 0.8833424258956174,
310
- "eval_runtime": 4.9483,
311
- "eval_samples_per_second": 80.634,
312
- "eval_steps_per_second": 10.104,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.0014807094121351838,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.0038,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.9072681704260651,
325
- "eval_f1": 0.8867007927797945,
326
- "eval_loss": 0.7821339964866638,
327
- "eval_precision": 0.89198606271777,
328
- "eval_recall": 0.8818876159301692,
329
- "eval_runtime": 4.951,
330
- "eval_samples_per_second": 80.589,
331
- "eval_steps_per_second": 10.099,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 0.0014356797328218818,
337
  "learning_rate": 5e-06,
338
- "loss": 0.0021,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8972431077694235,
344
- "eval_f1": 0.8763538792940554,
345
- "eval_loss": 0.8211472630500793,
346
- "eval_precision": 0.8754297605404427,
347
- "eval_recall": 0.877295871976723,
348
- "eval_runtime": 4.9494,
349
- "eval_samples_per_second": 80.616,
350
- "eval_steps_per_second": 10.102,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.0011136590037494898,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.0022,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.899749373433584,
363
- "eval_f1": 0.8790689216221131,
364
- "eval_loss": 0.8161725401878357,
365
- "eval_precision": 0.8790689216221131,
366
- "eval_recall": 0.8790689216221131,
367
- "eval_runtime": 4.9488,
368
- "eval_samples_per_second": 80.626,
369
- "eval_steps_per_second": 10.104,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.0013613783521577716,
375
  "learning_rate": 0.0,
376
- "loss": 0.0027,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.9022556390977443,
382
- "eval_f1": 0.8817957385392532,
383
- "eval_loss": 0.8104944229125977,
384
- "eval_precision": 0.8827677592299257,
385
- "eval_recall": 0.8808419712675032,
386
- "eval_runtime": 4.9834,
387
- "eval_samples_per_second": 80.065,
388
- "eval_steps_per_second": 10.033,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7584162436176000.0,
395
- "train_loss": 0.05662053943168922,
396
- "train_runtime": 2686.8503,
397
- "train_samples_per_second": 27.08,
398
- "train_steps_per_second": 0.908
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 78.255126953125,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.3942,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.8796992481203008,
21
+ "eval_f1": 0.8419489007724301,
22
+ "eval_loss": 0.3128369450569153,
23
+ "eval_precision": 0.8857758620689655,
24
+ "eval_recall": 0.8173758865248226,
25
+ "eval_runtime": 1.6299,
26
+ "eval_samples_per_second": 244.801,
27
+ "eval_steps_per_second": 30.677,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 42.82415771484375,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.2168,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8897243107769424,
40
+ "eval_f1": 0.8676337535436396,
41
+ "eval_loss": 0.3043781518936157,
42
+ "eval_precision": 0.8658613445378152,
43
+ "eval_recall": 0.8694762684124386,
44
+ "eval_runtime": 1.6375,
45
+ "eval_samples_per_second": 243.661,
46
+ "eval_steps_per_second": 30.534,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 0.2970781624317169,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.1372,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8897243107769424,
59
+ "eval_f1": 0.8595250288055307,
60
+ "eval_loss": 0.5317866802215576,
61
+ "eval_precision": 0.885164197446576,
62
+ "eval_recall": 0.8419712675031824,
63
+ "eval_runtime": 1.6412,
64
+ "eval_samples_per_second": 243.114,
65
+ "eval_steps_per_second": 30.465,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 0.16418644785881042,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.0957,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8947368421052632,
78
+ "eval_f1": 0.8765906680805938,
79
+ "eval_loss": 0.47654101252555847,
80
+ "eval_precision": 0.8675710594315245,
81
+ "eval_recall": 0.888025095471904,
82
+ "eval_runtime": 1.6551,
83
+ "eval_samples_per_second": 241.073,
84
+ "eval_steps_per_second": 30.21,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 0.4955180287361145,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.0674,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8872180451127819,
97
+ "eval_f1": 0.8728804559453431,
98
+ "eval_loss": 0.552257239818573,
99
+ "eval_precision": 0.8576773985140519,
100
+ "eval_recall": 0.9027095835606473,
101
+ "eval_runtime": 1.6807,
102
+ "eval_samples_per_second": 237.402,
103
+ "eval_steps_per_second": 29.75,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 0.03946012258529663,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.0535,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.9072681704260651,
116
+ "eval_f1": 0.8878574955372402,
117
+ "eval_loss": 0.5158531069755554,
118
+ "eval_precision": 0.8888448885098087,
119
+ "eval_recall": 0.8868885251863976,
120
+ "eval_runtime": 1.6465,
121
+ "eval_samples_per_second": 242.338,
122
+ "eval_steps_per_second": 30.368,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.008608223870396614,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.027,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8872180451127819,
135
+ "eval_f1": 0.8642908431276217,
136
+ "eval_loss": 0.5940884351730347,
137
+ "eval_precision": 0.8633964654080464,
138
+ "eval_recall": 0.8652027641389344,
139
+ "eval_runtime": 1.6485,
140
+ "eval_samples_per_second": 242.043,
141
+ "eval_steps_per_second": 30.331,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 0.010127891786396503,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.0223,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8796992481203008,
154
+ "eval_f1": 0.8548827059465357,
155
+ "eval_loss": 0.7166243195533752,
156
+ "eval_precision": 0.8548827059465357,
157
+ "eval_recall": 0.8548827059465357,
158
+ "eval_runtime": 1.6562,
159
+ "eval_samples_per_second": 240.913,
160
+ "eval_steps_per_second": 30.19,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 0.005933025386184454,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.0145,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.9022556390977443,
173
+ "eval_f1": 0.8829621606985718,
174
+ "eval_loss": 0.7022837996482849,
175
+ "eval_precision": 0.8802419354838709,
176
+ "eval_recall": 0.8858428805237315,
177
+ "eval_runtime": 1.6595,
178
+ "eval_samples_per_second": 240.429,
179
+ "eval_steps_per_second": 30.129,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 0.02505210041999817,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.0106,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.9047619047619048,
192
+ "eval_f1": 0.8839406001224739,
193
+ "eval_loss": 0.699307918548584,
194
+ "eval_precision": 0.8880654743486602,
195
+ "eval_recall": 0.880114566284779,
196
+ "eval_runtime": 1.6551,
197
+ "eval_samples_per_second": 241.07,
198
+ "eval_steps_per_second": 30.209,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 0.002501419745385647,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.0093,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8947368421052632,
211
+ "eval_f1": 0.8703663593044124,
212
+ "eval_loss": 0.8273664712905884,
213
+ "eval_precision": 0.8789149003479912,
214
+ "eval_recall": 0.8630205491907619,
215
+ "eval_runtime": 1.6583,
216
+ "eval_samples_per_second": 240.615,
217
+ "eval_steps_per_second": 30.152,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 0.012166227214038372,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.0086,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8972431077694235,
230
+ "eval_f1": 0.8744522298370696,
231
+ "eval_loss": 0.7971612215042114,
232
+ "eval_precision": 0.8795731707317074,
233
+ "eval_recall": 0.8697945080923805,
234
+ "eval_runtime": 1.6712,
235
+ "eval_samples_per_second": 238.744,
236
+ "eval_steps_per_second": 29.918,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.00197013420984149,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.0106,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8972431077694235,
249
+ "eval_f1": 0.8787009231453675,
250
+ "eval_loss": 0.7591652870178223,
251
+ "eval_precision": 0.8714896214896215,
252
+ "eval_recall": 0.8872976904891798,
253
+ "eval_runtime": 1.6672,
254
+ "eval_samples_per_second": 239.329,
255
+ "eval_steps_per_second": 29.991,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 0.0050615849904716015,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.0072,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.899749373433584,
268
+ "eval_f1": 0.8813841488792438,
269
+ "eval_loss": 0.7834069728851318,
270
+ "eval_precision": 0.8748029197080291,
271
+ "eval_recall": 0.8890707401345699,
272
+ "eval_runtime": 1.6555,
273
+ "eval_samples_per_second": 241.019,
274
+ "eval_steps_per_second": 30.203,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.002086851978674531,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.0098,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
  "eval_accuracy": 0.899749373433584,
287
+ "eval_f1": 0.8802521008403361,
288
+ "eval_loss": 0.8048883676528931,
289
+ "eval_precision": 0.8767168083714847,
290
+ "eval_recall": 0.8840698308783415,
291
+ "eval_runtime": 1.6591,
292
+ "eval_samples_per_second": 240.488,
293
+ "eval_steps_per_second": 30.136,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.0012473827227950096,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.0058,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.899749373433584,
306
+ "eval_f1": 0.8802521008403361,
307
+ "eval_loss": 0.7670984268188477,
308
+ "eval_precision": 0.8767168083714847,
309
+ "eval_recall": 0.8840698308783415,
310
+ "eval_runtime": 1.659,
311
+ "eval_samples_per_second": 240.503,
312
+ "eval_steps_per_second": 30.138,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.00188881263602525,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.0035,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.9022556390977443,
325
+ "eval_f1": 0.8856624319419237,
326
+ "eval_loss": 0.8084732294082642,
327
+ "eval_precision": 0.8758364312267658,
328
+ "eval_recall": 0.8983451536643026,
329
+ "eval_runtime": 1.6569,
330
+ "eval_samples_per_second": 240.816,
331
+ "eval_steps_per_second": 30.177,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.0014366944087669253,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.0052,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.899749373433584,
344
+ "eval_f1": 0.8808243727598566,
345
+ "eval_loss": 0.7721081972122192,
346
+ "eval_precision": 0.875706963591375,
347
+ "eval_recall": 0.8865702855064557,
348
+ "eval_runtime": 1.6546,
349
+ "eval_samples_per_second": 241.143,
350
+ "eval_steps_per_second": 30.218,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.0011094665387645364,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.0028,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8972431077694235,
363
+ "eval_f1": 0.8792560061999484,
364
+ "eval_loss": 0.8358559608459473,
365
+ "eval_precision": 0.8707622232472325,
366
+ "eval_recall": 0.889798145117294,
367
+ "eval_runtime": 1.6584,
368
+ "eval_samples_per_second": 240.592,
369
+ "eval_steps_per_second": 30.149,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.0015741140814498067,
375
  "learning_rate": 0.0,
376
+ "loss": 0.0033,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8972431077694235,
382
+ "eval_f1": 0.8792560061999484,
383
+ "eval_loss": 0.8335620164871216,
384
+ "eval_precision": 0.8707622232472325,
385
+ "eval_recall": 0.889798145117294,
386
+ "eval_runtime": 1.6776,
387
+ "eval_samples_per_second": 237.834,
388
+ "eval_steps_per_second": 29.804,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7584162436176000.0,
395
+ "train_loss": 0.05526667458356404,
396
+ "train_runtime": 862.9394,
397
+ "train_samples_per_second": 84.316,
398
+ "train_steps_per_second": 2.828
399
  }
400
  ],
401
  "logging_steps": 500,