Ehsanl commited on
Commit
2f9ecc8
·
verified ·
1 Parent(s): d967f60

Checkpoint 1019

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. model.safetensors +2 -2
  3. trainer_state.json +178 -3
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
  "cls_token_id": 1,
9
- "dtype": "bfloat16",
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
 
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
  "cls_token_id": 1,
9
+ "dtype": "float32",
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6716fa52c0e723b5cfc225046ec38b517e6b1f2e35410569578f6319b602d369
3
- size 690656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:845ceb8734c02da9b708649efbf028c4c9e2dbba279d99bc0a3f8b60ff43dbc2
3
+ size 1338773320
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7507360157016683,
6
  "eval_steps": 500,
7
- "global_step": 765,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -540,6 +540,181 @@
540
  "learning_rate": 1e-05,
541
  "loss": 2.7141,
542
  "step": 760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
  }
544
  ],
545
  "logging_steps": 10,
@@ -554,7 +729,7 @@
554
  "should_evaluate": false,
555
  "should_log": false,
556
  "should_save": true,
557
- "should_training_stop": false
558
  },
559
  "attributes": {}
560
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 1019,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
540
  "learning_rate": 1e-05,
541
  "loss": 2.7141,
542
  "step": 760
543
+ },
544
+ {
545
+ "epoch": 0.7556427870461236,
546
+ "grad_norm": 3178157.25,
547
+ "learning_rate": 1e-05,
548
+ "loss": 0.9265,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 0.7654563297350343,
553
+ "grad_norm": 3.6791253089904785,
554
+ "learning_rate": 1e-05,
555
+ "loss": 1.8104,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 0.7752698724239451,
560
+ "grad_norm": 4302724.5,
561
+ "learning_rate": 1e-05,
562
+ "loss": 1.4787,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 0.7850834151128557,
567
+ "grad_norm": 1720963.75,
568
+ "learning_rate": 1e-05,
569
+ "loss": 2.1176,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.7948969578017664,
574
+ "grad_norm": 1612358.875,
575
+ "learning_rate": 1e-05,
576
+ "loss": 1.2736,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.8047105004906772,
581
+ "grad_norm": 1152146.25,
582
+ "learning_rate": 1e-05,
583
+ "loss": 1.5657,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.8145240431795878,
588
+ "grad_norm": 3.5905027389526367,
589
+ "learning_rate": 1e-05,
590
+ "loss": 2.6198,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.8243375858684985,
595
+ "grad_norm": 736680.8125,
596
+ "learning_rate": 1e-05,
597
+ "loss": 0.9112,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.8341511285574092,
602
+ "grad_norm": 2.9653732776641846,
603
+ "learning_rate": 1e-05,
604
+ "loss": 2.3842,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.8439646712463199,
609
+ "grad_norm": 12.001425743103027,
610
+ "learning_rate": 1e-05,
611
+ "loss": 2.3966,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.8537782139352306,
616
+ "grad_norm": 2124122.25,
617
+ "learning_rate": 1e-05,
618
+ "loss": 1.3734,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.8635917566241413,
623
+ "grad_norm": 6534144.0,
624
+ "learning_rate": 1e-05,
625
+ "loss": 1.3486,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.873405299313052,
630
+ "grad_norm": 3.6779091358184814,
631
+ "learning_rate": 1e-05,
632
+ "loss": 0.949,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.8832188420019627,
637
+ "grad_norm": 1221940.0,
638
+ "learning_rate": 1e-05,
639
+ "loss": 2.6138,
640
+ "step": 900
641
+ },
642
+ {
643
+ "epoch": 0.8930323846908734,
644
+ "grad_norm": 1095478.5,
645
+ "learning_rate": 1e-05,
646
+ "loss": 1.4675,
647
+ "step": 910
648
+ },
649
+ {
650
+ "epoch": 0.9028459273797841,
651
+ "grad_norm": 548933.875,
652
+ "learning_rate": 1e-05,
653
+ "loss": 2.8343,
654
+ "step": 920
655
+ },
656
+ {
657
+ "epoch": 0.9126594700686947,
658
+ "grad_norm": 13.783559799194336,
659
+ "learning_rate": 1e-05,
660
+ "loss": 2.1122,
661
+ "step": 930
662
+ },
663
+ {
664
+ "epoch": 0.9224730127576055,
665
+ "grad_norm": 13.174997329711914,
666
+ "learning_rate": 1e-05,
667
+ "loss": 2.4962,
668
+ "step": 940
669
+ },
670
+ {
671
+ "epoch": 0.9322865554465162,
672
+ "grad_norm": 10.191123962402344,
673
+ "learning_rate": 1e-05,
674
+ "loss": 2.2086,
675
+ "step": 950
676
+ },
677
+ {
678
+ "epoch": 0.9421000981354269,
679
+ "grad_norm": 3.606752872467041,
680
+ "learning_rate": 1e-05,
681
+ "loss": 1.323,
682
+ "step": 960
683
+ },
684
+ {
685
+ "epoch": 0.9519136408243376,
686
+ "grad_norm": 2473294.0,
687
+ "learning_rate": 1e-05,
688
+ "loss": 1.0528,
689
+ "step": 970
690
+ },
691
+ {
692
+ "epoch": 0.9617271835132483,
693
+ "grad_norm": 2.848081588745117,
694
+ "learning_rate": 1e-05,
695
+ "loss": 1.5576,
696
+ "step": 980
697
+ },
698
+ {
699
+ "epoch": 0.971540726202159,
700
+ "grad_norm": 3.5542256832122803,
701
+ "learning_rate": 1e-05,
702
+ "loss": 1.8997,
703
+ "step": 990
704
+ },
705
+ {
706
+ "epoch": 0.9813542688910697,
707
+ "grad_norm": 1991637.375,
708
+ "learning_rate": 1e-05,
709
+ "loss": 2.5923,
710
+ "step": 1000
711
+ },
712
+ {
713
+ "epoch": 0.9911678115799804,
714
+ "grad_norm": 21.8354434967041,
715
+ "learning_rate": 1e-05,
716
+ "loss": 2.0656,
717
+ "step": 1010
718
  }
719
  ],
720
  "logging_steps": 10,
 
729
  "should_evaluate": false,
730
  "should_log": false,
731
  "should_save": true,
732
+ "should_training_stop": true
733
  },
734
  "attributes": {}
735
  }