souging commited on
Commit
6f4a094
·
verified ·
1 Parent(s): 653bd4e

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28d15da9af301277310acc0b89ceba89e60b1da4d1490ae6c5de047dbc248f61
3
  size 413085368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e9056b95b69811556bbbab4909b13b9f29315b95936bfb64bc63ae28d402fb1
3
  size 413085368
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7280325c4227f3f70d9aa819988c5685def3e83e3d99fe2b4ece9c86024b4a1f
3
  size 348403672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:015601a740b4cf20fc09fef3f06463c812e19a4a15baffd2a62d379b525e5448
3
  size 348403672
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d153cbd5816bad7e393766d8a9ab7370767a743794723f4d66f035c55bea3201
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:631efc3abf5a36014450900fad93b8d3f2995f952dae98fd7425b6875e114a2a
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cf6320e61cad91d817b3175aa63c2bffb4c5c8db4e7b0ae3fd9b761374e3cfc
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db7f30731a016df744ba5e943c5a72e4b1cf4337482ecadb31ab0a0a177fb91
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6baa6638ee60a43a92f69e130b69f4ccd62bff32f6bafab4da7f0364252faef6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f681f974a94e0a2c66c1abdd7f0ead89d21c1be7c5c0471b01ae966fdbe6e1b4
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0992e134d32ed3abd9b8e06cbc166e0100a68cdef8da84ec1b2e5e2e2413baeb
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abd6e1ce998d7be3d4c58694feb93e4792744bd560a2382ce56af0849f5ce5f7
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d452f5ed56546a6402f4038d92474259f8b52c0076c284980ad6fb42ea107eb4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1317e51f02727b9c9a875b269ddda1c544429c3b505d10d59cd59ee551f012
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd8be60fb86bc6e7cec48c48c53cb8dd0e2aec1e30c5cfcdb31232bc12cd246c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ba223d36a93c2c23c48d6c58884870776a88563e3a6d4158aeee1cf466ccc7
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc7bbac244760ea3c1579f834f91bb860d6b6348bcc2b73bd97b775d6e442387
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf879edc3373f3aa1fbfa5e39bed9c1d9f687cefa885aaa51983263621bfbad
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33df15a0d0c8b844eb7dfbbf49148670f816272f730064d145e3081bfa07317b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c842ac876e2d9e274e43bcbfb734aef5a3ad04915985ece79c51c1dec6267db2
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aba0f78a6ddd258c43ec093bf60cb133a45c0f2e060c6fded879ac028f3123e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58326b6331289c141bb2ed088a04a076bde1df3da5eec19b81a35fe023f7a792
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.0859394073486328,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.4917629702483403,
5
  "eval_steps": 250,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3531,6 +3531,1406 @@
3531
  "eval_samples_per_second": 33.548,
3532
  "eval_steps_per_second": 4.195,
3533
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3534
  }
3535
  ],
3536
  "logging_steps": 1,
@@ -3554,12 +4954,12 @@
3554
  "should_evaluate": false,
3555
  "should_log": false,
3556
  "should_save": true,
3557
- "should_training_stop": false
3558
  },
3559
  "attributes": {}
3560
  }
3561
  },
3562
- "total_flos": 6.025530644522598e+16,
3563
  "train_batch_size": 1,
3564
  "trial_name": null,
3565
  "trial_params": null
 
1
  {
2
  "best_metric": 1.0859394073486328,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.6884681583476764,
5
  "eval_steps": 250,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3531
  "eval_samples_per_second": 33.548,
3532
  "eval_steps_per_second": 4.195,
3533
  "step": 500
3534
+ },
3535
+ {
3536
+ "epoch": 0.492746496188837,
3537
+ "grad_norm": 1.7722065448760986,
3538
+ "learning_rate": 9.909447523699617e-05,
3539
+ "loss": 1.142,
3540
+ "step": 501
3541
+ },
3542
+ {
3543
+ "epoch": 0.49373002212933365,
3544
+ "grad_norm": 1.684760332107544,
3545
+ "learning_rate": 9.819171684992574e-05,
3546
+ "loss": 1.1404,
3547
+ "step": 502
3548
+ },
3549
+ {
3550
+ "epoch": 0.49471354806983037,
3551
+ "grad_norm": 1.6577117443084717,
3552
+ "learning_rate": 9.729174958836604e-05,
3553
+ "loss": 1.1942,
3554
+ "step": 503
3555
+ },
3556
+ {
3557
+ "epoch": 0.49569707401032703,
3558
+ "grad_norm": 1.6315124034881592,
3559
+ "learning_rate": 9.639459812537399e-05,
3560
+ "loss": 1.1396,
3561
+ "step": 504
3562
+ },
3563
+ {
3564
+ "epoch": 0.4966805999508237,
3565
+ "grad_norm": 1.611763834953308,
3566
+ "learning_rate": 9.550028705681025e-05,
3567
+ "loss": 1.0717,
3568
+ "step": 505
3569
+ },
3570
+ {
3571
+ "epoch": 0.49766412589132036,
3572
+ "grad_norm": 1.792077660560608,
3573
+ "learning_rate": 9.460884090066449e-05,
3574
+ "loss": 1.1422,
3575
+ "step": 506
3576
+ },
3577
+ {
3578
+ "epoch": 0.4986476518318171,
3579
+ "grad_norm": 1.6460882425308228,
3580
+ "learning_rate": 9.37202840963834e-05,
3581
+ "loss": 1.1549,
3582
+ "step": 507
3583
+ },
3584
+ {
3585
+ "epoch": 0.49963117777231375,
3586
+ "grad_norm": 1.6016443967819214,
3587
+ "learning_rate": 9.283464100420063e-05,
3588
+ "loss": 1.1433,
3589
+ "step": 508
3590
+ },
3591
+ {
3592
+ "epoch": 0.5006147037128105,
3593
+ "grad_norm": 1.642370581626892,
3594
+ "learning_rate": 9.1951935904469e-05,
3595
+ "loss": 1.0781,
3596
+ "step": 509
3597
+ },
3598
+ {
3599
+ "epoch": 0.5015982296533071,
3600
+ "grad_norm": 1.6989452838897705,
3601
+ "learning_rate": 9.107219299699459e-05,
3602
+ "loss": 1.1015,
3603
+ "step": 510
3604
+ },
3605
+ {
3606
+ "epoch": 0.5025817555938038,
3607
+ "grad_norm": 1.6444441080093384,
3608
+ "learning_rate": 9.019543640037363e-05,
3609
+ "loss": 1.1494,
3610
+ "step": 511
3611
+ },
3612
+ {
3613
+ "epoch": 0.5035652815343005,
3614
+ "grad_norm": 1.650043249130249,
3615
+ "learning_rate": 8.93216901513312e-05,
3616
+ "loss": 1.1497,
3617
+ "step": 512
3618
+ },
3619
+ {
3620
+ "epoch": 0.5045488074747971,
3621
+ "grad_norm": 1.5892958641052246,
3622
+ "learning_rate": 8.845097820406199e-05,
3623
+ "loss": 1.0955,
3624
+ "step": 513
3625
+ },
3626
+ {
3627
+ "epoch": 0.5055323334152938,
3628
+ "grad_norm": 1.606795072555542,
3629
+ "learning_rate": 8.758332442957394e-05,
3630
+ "loss": 1.096,
3631
+ "step": 514
3632
+ },
3633
+ {
3634
+ "epoch": 0.5065158593557905,
3635
+ "grad_norm": 1.6573283672332764,
3636
+ "learning_rate": 8.671875261503348e-05,
3637
+ "loss": 1.1306,
3638
+ "step": 515
3639
+ },
3640
+ {
3641
+ "epoch": 0.5074993852962872,
3642
+ "grad_norm": 1.6091228723526,
3643
+ "learning_rate": 8.585728646311369e-05,
3644
+ "loss": 1.0845,
3645
+ "step": 516
3646
+ },
3647
+ {
3648
+ "epoch": 0.5084829112367839,
3649
+ "grad_norm": 1.6882151365280151,
3650
+ "learning_rate": 8.499894959134436e-05,
3651
+ "loss": 1.1502,
3652
+ "step": 517
3653
+ },
3654
+ {
3655
+ "epoch": 0.5094664371772806,
3656
+ "grad_norm": 1.6060574054718018,
3657
+ "learning_rate": 8.414376553146428e-05,
3658
+ "loss": 1.1295,
3659
+ "step": 518
3660
+ },
3661
+ {
3662
+ "epoch": 0.5104499631177772,
3663
+ "grad_norm": 1.6418123245239258,
3664
+ "learning_rate": 8.329175772877653e-05,
3665
+ "loss": 1.1259,
3666
+ "step": 519
3667
+ },
3668
+ {
3669
+ "epoch": 0.5114334890582739,
3670
+ "grad_norm": 1.6071466207504272,
3671
+ "learning_rate": 8.24429495415054e-05,
3672
+ "loss": 1.1085,
3673
+ "step": 520
3674
+ },
3675
+ {
3676
+ "epoch": 0.5124170149987706,
3677
+ "grad_norm": 1.5179975032806396,
3678
+ "learning_rate": 8.159736424015609e-05,
3679
+ "loss": 1.0485,
3680
+ "step": 521
3681
+ },
3682
+ {
3683
+ "epoch": 0.5134005409392672,
3684
+ "grad_norm": 1.6379435062408447,
3685
+ "learning_rate": 8.075502500687681e-05,
3686
+ "loss": 1.1094,
3687
+ "step": 522
3688
+ },
3689
+ {
3690
+ "epoch": 0.5143840668797639,
3691
+ "grad_norm": 1.6749610900878906,
3692
+ "learning_rate": 7.991595493482323e-05,
3693
+ "loss": 1.0844,
3694
+ "step": 523
3695
+ },
3696
+ {
3697
+ "epoch": 0.5153675928202607,
3698
+ "grad_norm": 1.696667194366455,
3699
+ "learning_rate": 7.908017702752504e-05,
3700
+ "loss": 1.1329,
3701
+ "step": 524
3702
+ },
3703
+ {
3704
+ "epoch": 0.5163511187607573,
3705
+ "grad_norm": 1.619667649269104,
3706
+ "learning_rate": 7.824771419825587e-05,
3707
+ "loss": 1.1005,
3708
+ "step": 525
3709
+ },
3710
+ {
3711
+ "epoch": 0.517334644701254,
3712
+ "grad_norm": 1.891265630722046,
3713
+ "learning_rate": 7.741858926940475e-05,
3714
+ "loss": 1.1328,
3715
+ "step": 526
3716
+ },
3717
+ {
3718
+ "epoch": 0.5183181706417507,
3719
+ "grad_norm": 1.612331509590149,
3720
+ "learning_rate": 7.65928249718503e-05,
3721
+ "loss": 1.1873,
3722
+ "step": 527
3723
+ },
3724
+ {
3725
+ "epoch": 0.5193016965822473,
3726
+ "grad_norm": 1.7046937942504883,
3727
+ "learning_rate": 7.577044394433794e-05,
3728
+ "loss": 1.0657,
3729
+ "step": 528
3730
+ },
3731
+ {
3732
+ "epoch": 0.520285222522744,
3733
+ "grad_norm": 1.6257797479629517,
3734
+ "learning_rate": 7.495146873285903e-05,
3735
+ "loss": 1.1406,
3736
+ "step": 529
3737
+ },
3738
+ {
3739
+ "epoch": 0.5212687484632407,
3740
+ "grad_norm": 1.6082725524902344,
3741
+ "learning_rate": 7.413592179003255e-05,
3742
+ "loss": 1.092,
3743
+ "step": 530
3744
+ },
3745
+ {
3746
+ "epoch": 0.5222522744037374,
3747
+ "grad_norm": 1.638418436050415,
3748
+ "learning_rate": 7.332382547449e-05,
3749
+ "loss": 1.1757,
3750
+ "step": 531
3751
+ },
3752
+ {
3753
+ "epoch": 0.5232358003442341,
3754
+ "grad_norm": 1.6094216108322144,
3755
+ "learning_rate": 7.251520205026205e-05,
3756
+ "loss": 1.1619,
3757
+ "step": 532
3758
+ },
3759
+ {
3760
+ "epoch": 0.5242193262847308,
3761
+ "grad_norm": 1.666649580001831,
3762
+ "learning_rate": 7.171007368616842e-05,
3763
+ "loss": 1.0869,
3764
+ "step": 533
3765
+ },
3766
+ {
3767
+ "epoch": 0.5252028522252274,
3768
+ "grad_norm": 1.6826838254928589,
3769
+ "learning_rate": 7.090846245520986e-05,
3770
+ "loss": 1.1334,
3771
+ "step": 534
3772
+ },
3773
+ {
3774
+ "epoch": 0.5261863781657241,
3775
+ "grad_norm": 1.6268287897109985,
3776
+ "learning_rate": 7.011039033396329e-05,
3777
+ "loss": 1.1287,
3778
+ "step": 535
3779
+ },
3780
+ {
3781
+ "epoch": 0.5271699041062208,
3782
+ "grad_norm": 1.6804072856903076,
3783
+ "learning_rate": 6.93158792019789e-05,
3784
+ "loss": 1.1468,
3785
+ "step": 536
3786
+ },
3787
+ {
3788
+ "epoch": 0.5281534300467174,
3789
+ "grad_norm": 1.6158145666122437,
3790
+ "learning_rate": 6.852495084118083e-05,
3791
+ "loss": 1.0231,
3792
+ "step": 537
3793
+ },
3794
+ {
3795
+ "epoch": 0.5291369559872141,
3796
+ "grad_norm": 1.7181380987167358,
3797
+ "learning_rate": 6.773762693526965e-05,
3798
+ "loss": 1.1221,
3799
+ "step": 538
3800
+ },
3801
+ {
3802
+ "epoch": 0.5301204819277109,
3803
+ "grad_norm": 1.662822961807251,
3804
+ "learning_rate": 6.695392906912786e-05,
3805
+ "loss": 1.1764,
3806
+ "step": 539
3807
+ },
3808
+ {
3809
+ "epoch": 0.5311040078682076,
3810
+ "grad_norm": 1.5750726461410522,
3811
+ "learning_rate": 6.617387872822842e-05,
3812
+ "loss": 1.0695,
3813
+ "step": 540
3814
+ },
3815
+ {
3816
+ "epoch": 0.5320875338087042,
3817
+ "grad_norm": 1.6667808294296265,
3818
+ "learning_rate": 6.539749729804538e-05,
3819
+ "loss": 1.1717,
3820
+ "step": 541
3821
+ },
3822
+ {
3823
+ "epoch": 0.5330710597492009,
3824
+ "grad_norm": 1.6217957735061646,
3825
+ "learning_rate": 6.462480606346788e-05,
3826
+ "loss": 1.0892,
3827
+ "step": 542
3828
+ },
3829
+ {
3830
+ "epoch": 0.5340545856896975,
3831
+ "grad_norm": 1.5805596113204956,
3832
+ "learning_rate": 6.385582620821644e-05,
3833
+ "loss": 1.1495,
3834
+ "step": 543
3835
+ },
3836
+ {
3837
+ "epoch": 0.5350381116301942,
3838
+ "grad_norm": 1.619666576385498,
3839
+ "learning_rate": 6.309057881426226e-05,
3840
+ "loss": 1.1372,
3841
+ "step": 544
3842
+ },
3843
+ {
3844
+ "epoch": 0.5360216375706909,
3845
+ "grad_norm": 1.5543806552886963,
3846
+ "learning_rate": 6.232908486124918e-05,
3847
+ "loss": 1.0527,
3848
+ "step": 545
3849
+ },
3850
+ {
3851
+ "epoch": 0.5370051635111877,
3852
+ "grad_norm": 1.5906850099563599,
3853
+ "learning_rate": 6.157136522591866e-05,
3854
+ "loss": 1.0886,
3855
+ "step": 546
3856
+ },
3857
+ {
3858
+ "epoch": 0.5379886894516843,
3859
+ "grad_norm": 1.7546547651290894,
3860
+ "learning_rate": 6.0817440681537144e-05,
3861
+ "loss": 1.0984,
3862
+ "step": 547
3863
+ },
3864
+ {
3865
+ "epoch": 0.538972215392181,
3866
+ "grad_norm": 1.7061015367507935,
3867
+ "learning_rate": 6.0067331897326895e-05,
3868
+ "loss": 1.0988,
3869
+ "step": 548
3870
+ },
3871
+ {
3872
+ "epoch": 0.5399557413326777,
3873
+ "grad_norm": 1.6275510787963867,
3874
+ "learning_rate": 5.9321059437899254e-05,
3875
+ "loss": 1.0858,
3876
+ "step": 549
3877
+ },
3878
+ {
3879
+ "epoch": 0.5409392672731743,
3880
+ "grad_norm": 1.6035560369491577,
3881
+ "learning_rate": 5.857864376269051e-05,
3882
+ "loss": 1.1456,
3883
+ "step": 550
3884
+ },
3885
+ {
3886
+ "epoch": 0.541922793213671,
3887
+ "grad_norm": 1.6571654081344604,
3888
+ "learning_rate": 5.784010522540151e-05,
3889
+ "loss": 1.1019,
3890
+ "step": 551
3891
+ },
3892
+ {
3893
+ "epoch": 0.5429063191541676,
3894
+ "grad_norm": 1.6013000011444092,
3895
+ "learning_rate": 5.7105464073439375e-05,
3896
+ "loss": 1.1046,
3897
+ "step": 552
3898
+ },
3899
+ {
3900
+ "epoch": 0.5438898450946644,
3901
+ "grad_norm": 1.5961564779281616,
3902
+ "learning_rate": 5.6374740447362264e-05,
3903
+ "loss": 1.1063,
3904
+ "step": 553
3905
+ },
3906
+ {
3907
+ "epoch": 0.5448733710351611,
3908
+ "grad_norm": 1.6290183067321777,
3909
+ "learning_rate": 5.564795438032757e-05,
3910
+ "loss": 1.1154,
3911
+ "step": 554
3912
+ },
3913
+ {
3914
+ "epoch": 0.5458568969756578,
3915
+ "grad_norm": 1.731971263885498,
3916
+ "learning_rate": 5.492512579754252e-05,
3917
+ "loss": 1.1149,
3918
+ "step": 555
3919
+ },
3920
+ {
3921
+ "epoch": 0.5468404229161544,
3922
+ "grad_norm": 1.6082978248596191,
3923
+ "learning_rate": 5.4206274515717736e-05,
3924
+ "loss": 1.1585,
3925
+ "step": 556
3926
+ },
3927
+ {
3928
+ "epoch": 0.5478239488566511,
3929
+ "grad_norm": 1.618871808052063,
3930
+ "learning_rate": 5.349142024252427e-05,
3931
+ "loss": 1.048,
3932
+ "step": 557
3933
+ },
3934
+ {
3935
+ "epoch": 0.5488074747971478,
3936
+ "grad_norm": 1.626085638999939,
3937
+ "learning_rate": 5.278058257605314e-05,
3938
+ "loss": 1.1256,
3939
+ "step": 558
3940
+ },
3941
+ {
3942
+ "epoch": 0.5497910007376444,
3943
+ "grad_norm": 1.5993646383285522,
3944
+ "learning_rate": 5.207378100427804e-05,
3945
+ "loss": 1.0744,
3946
+ "step": 559
3947
+ },
3948
+ {
3949
+ "epoch": 0.5507745266781411,
3950
+ "grad_norm": 1.6538002490997314,
3951
+ "learning_rate": 5.1371034904521134e-05,
3952
+ "loss": 1.103,
3953
+ "step": 560
3954
+ },
3955
+ {
3956
+ "epoch": 0.5517580526186379,
3957
+ "grad_norm": 1.6243610382080078,
3958
+ "learning_rate": 5.067236354292175e-05,
3959
+ "loss": 1.0917,
3960
+ "step": 561
3961
+ },
3962
+ {
3963
+ "epoch": 0.5527415785591345,
3964
+ "grad_norm": 1.570988416671753,
3965
+ "learning_rate": 4.9977786073908086e-05,
3966
+ "loss": 1.1085,
3967
+ "step": 562
3968
+ },
3969
+ {
3970
+ "epoch": 0.5537251044996312,
3971
+ "grad_norm": 1.603846788406372,
3972
+ "learning_rate": 4.928732153967246e-05,
3973
+ "loss": 1.1432,
3974
+ "step": 563
3975
+ },
3976
+ {
3977
+ "epoch": 0.5547086304401279,
3978
+ "grad_norm": 1.584790825843811,
3979
+ "learning_rate": 4.8600988869648745e-05,
3980
+ "loss": 1.0537,
3981
+ "step": 564
3982
+ },
3983
+ {
3984
+ "epoch": 0.5556921563806245,
3985
+ "grad_norm": 1.5726522207260132,
3986
+ "learning_rate": 4.7918806879993814e-05,
3987
+ "loss": 1.0789,
3988
+ "step": 565
3989
+ },
3990
+ {
3991
+ "epoch": 0.5566756823211212,
3992
+ "grad_norm": 1.608412504196167,
3993
+ "learning_rate": 4.724079427307162e-05,
3994
+ "loss": 1.1146,
3995
+ "step": 566
3996
+ },
3997
+ {
3998
+ "epoch": 0.5576592082616179,
3999
+ "grad_norm": 1.5979876518249512,
4000
+ "learning_rate": 4.656696963694012e-05,
4001
+ "loss": 1.0771,
4002
+ "step": 567
4003
+ },
4004
+ {
4005
+ "epoch": 0.5586427342021146,
4006
+ "grad_norm": 1.5700702667236328,
4007
+ "learning_rate": 4.589735144484217e-05,
4008
+ "loss": 1.0369,
4009
+ "step": 568
4010
+ },
4011
+ {
4012
+ "epoch": 0.5596262601426113,
4013
+ "grad_norm": 1.6290395259857178,
4014
+ "learning_rate": 4.5231958054698774e-05,
4015
+ "loss": 1.1129,
4016
+ "step": 569
4017
+ },
4018
+ {
4019
+ "epoch": 0.560609786083108,
4020
+ "grad_norm": 1.664450764656067,
4021
+ "learning_rate": 4.4570807708605825e-05,
4022
+ "loss": 1.0261,
4023
+ "step": 570
4024
+ },
4025
+ {
4026
+ "epoch": 0.5615933120236046,
4027
+ "grad_norm": 1.5685371160507202,
4028
+ "learning_rate": 4.391391853233404e-05,
4029
+ "loss": 1.1199,
4030
+ "step": 571
4031
+ },
4032
+ {
4033
+ "epoch": 0.5625768379641013,
4034
+ "grad_norm": 1.6812089681625366,
4035
+ "learning_rate": 4.326130853483206e-05,
4036
+ "loss": 1.0344,
4037
+ "step": 572
4038
+ },
4039
+ {
4040
+ "epoch": 0.563560363904598,
4041
+ "grad_norm": 1.622582197189331,
4042
+ "learning_rate": 4.261299560773255e-05,
4043
+ "loss": 1.061,
4044
+ "step": 573
4045
+ },
4046
+ {
4047
+ "epoch": 0.5645438898450946,
4048
+ "grad_norm": 1.660653829574585,
4049
+ "learning_rate": 4.196899752486192e-05,
4050
+ "loss": 1.1085,
4051
+ "step": 574
4052
+ },
4053
+ {
4054
+ "epoch": 0.5655274157855913,
4055
+ "grad_norm": 1.6321308612823486,
4056
+ "learning_rate": 4.132933194175299e-05,
4057
+ "loss": 1.0944,
4058
+ "step": 575
4059
+ },
4060
+ {
4061
+ "epoch": 0.5665109417260881,
4062
+ "grad_norm": 1.6691020727157593,
4063
+ "learning_rate": 4.069401639516075e-05,
4064
+ "loss": 1.1395,
4065
+ "step": 576
4066
+ },
4067
+ {
4068
+ "epoch": 0.5674944676665847,
4069
+ "grad_norm": 1.5686017274856567,
4070
+ "learning_rate": 4.0063068302581885e-05,
4071
+ "loss": 1.0362,
4072
+ "step": 577
4073
+ },
4074
+ {
4075
+ "epoch": 0.5684779936070814,
4076
+ "grad_norm": 1.6180577278137207,
4077
+ "learning_rate": 3.943650496177713e-05,
4078
+ "loss": 1.0636,
4079
+ "step": 578
4080
+ },
4081
+ {
4082
+ "epoch": 0.5694615195475781,
4083
+ "grad_norm": 1.5809311866760254,
4084
+ "learning_rate": 3.881434355029687e-05,
4085
+ "loss": 1.0899,
4086
+ "step": 579
4087
+ },
4088
+ {
4089
+ "epoch": 0.5704450454880747,
4090
+ "grad_norm": 1.5602284669876099,
4091
+ "learning_rate": 3.819660112501053e-05,
4092
+ "loss": 1.0964,
4093
+ "step": 580
4094
+ },
4095
+ {
4096
+ "epoch": 0.5714285714285714,
4097
+ "grad_norm": 1.555755376815796,
4098
+ "learning_rate": 3.758329462163874e-05,
4099
+ "loss": 1.1217,
4100
+ "step": 581
4101
+ },
4102
+ {
4103
+ "epoch": 0.5724120973690681,
4104
+ "grad_norm": 1.6677347421646118,
4105
+ "learning_rate": 3.697444085428914e-05,
4106
+ "loss": 1.1332,
4107
+ "step": 582
4108
+ },
4109
+ {
4110
+ "epoch": 0.5733956233095648,
4111
+ "grad_norm": 1.6057909727096558,
4112
+ "learning_rate": 3.637005651499528e-05,
4113
+ "loss": 1.0566,
4114
+ "step": 583
4115
+ },
4116
+ {
4117
+ "epoch": 0.5743791492500615,
4118
+ "grad_norm": 1.5967357158660889,
4119
+ "learning_rate": 3.57701581732592e-05,
4120
+ "loss": 1.1161,
4121
+ "step": 584
4122
+ },
4123
+ {
4124
+ "epoch": 0.5753626751905582,
4125
+ "grad_norm": 1.6041682958602905,
4126
+ "learning_rate": 3.5174762275596864e-05,
4127
+ "loss": 1.1068,
4128
+ "step": 585
4129
+ },
4130
+ {
4131
+ "epoch": 0.5763462011310548,
4132
+ "grad_norm": 1.5802150964736938,
4133
+ "learning_rate": 3.4583885145087615e-05,
4134
+ "loss": 1.0469,
4135
+ "step": 586
4136
+ },
4137
+ {
4138
+ "epoch": 0.5773297270715515,
4139
+ "grad_norm": 1.57747483253479,
4140
+ "learning_rate": 3.3997542980926524e-05,
4141
+ "loss": 1.0733,
4142
+ "step": 587
4143
+ },
4144
+ {
4145
+ "epoch": 0.5783132530120482,
4146
+ "grad_norm": 1.5697864294052124,
4147
+ "learning_rate": 3.3415751857980115e-05,
4148
+ "loss": 1.0767,
4149
+ "step": 588
4150
+ },
4151
+ {
4152
+ "epoch": 0.5792967789525448,
4153
+ "grad_norm": 1.550041675567627,
4154
+ "learning_rate": 3.2838527726345993e-05,
4155
+ "loss": 1.063,
4156
+ "step": 589
4157
+ },
4158
+ {
4159
+ "epoch": 0.5802803048930415,
4160
+ "grad_norm": 1.5676511526107788,
4161
+ "learning_rate": 3.226588641091521e-05,
4162
+ "loss": 1.0693,
4163
+ "step": 590
4164
+ },
4165
+ {
4166
+ "epoch": 0.5812638308335383,
4167
+ "grad_norm": 1.578323245048523,
4168
+ "learning_rate": 3.1697843610938794e-05,
4169
+ "loss": 1.0403,
4170
+ "step": 591
4171
+ },
4172
+ {
4173
+ "epoch": 0.5822473567740349,
4174
+ "grad_norm": 1.607877492904663,
4175
+ "learning_rate": 3.113441489959703e-05,
4176
+ "loss": 1.0868,
4177
+ "step": 592
4178
+ },
4179
+ {
4180
+ "epoch": 0.5832308827145316,
4181
+ "grad_norm": 1.5824037790298462,
4182
+ "learning_rate": 3.057561572357262e-05,
4183
+ "loss": 1.1237,
4184
+ "step": 593
4185
+ },
4186
+ {
4187
+ "epoch": 0.5842144086550283,
4188
+ "grad_norm": 1.5974537134170532,
4189
+ "learning_rate": 3.002146140262725e-05,
4190
+ "loss": 1.1145,
4191
+ "step": 594
4192
+ },
4193
+ {
4194
+ "epoch": 0.5851979345955249,
4195
+ "grad_norm": 1.5872732400894165,
4196
+ "learning_rate": 2.9471967129181565e-05,
4197
+ "loss": 0.9714,
4198
+ "step": 595
4199
+ },
4200
+ {
4201
+ "epoch": 0.5861814605360216,
4202
+ "grad_norm": 1.597594976425171,
4203
+ "learning_rate": 2.8927147967898682e-05,
4204
+ "loss": 1.0821,
4205
+ "step": 596
4206
+ },
4207
+ {
4208
+ "epoch": 0.5871649864765183,
4209
+ "grad_norm": 1.6081088781356812,
4210
+ "learning_rate": 2.8387018855271085e-05,
4211
+ "loss": 1.0513,
4212
+ "step": 597
4213
+ },
4214
+ {
4215
+ "epoch": 0.588148512417015,
4216
+ "grad_norm": 1.5636123418807983,
4217
+ "learning_rate": 2.7851594599211293e-05,
4218
+ "loss": 1.0317,
4219
+ "step": 598
4220
+ },
4221
+ {
4222
+ "epoch": 0.5891320383575117,
4223
+ "grad_norm": 1.5617702007293701,
4224
+ "learning_rate": 2.732088987864567e-05,
4225
+ "loss": 1.0692,
4226
+ "step": 599
4227
+ },
4228
+ {
4229
+ "epoch": 0.5901155642980084,
4230
+ "grad_norm": 1.6476192474365234,
4231
+ "learning_rate": 2.679491924311226e-05,
4232
+ "loss": 1.1095,
4233
+ "step": 600
4234
+ },
4235
+ {
4236
+ "epoch": 0.591099090238505,
4237
+ "grad_norm": 1.5220457315444946,
4238
+ "learning_rate": 2.6273697112361784e-05,
4239
+ "loss": 1.083,
4240
+ "step": 601
4241
+ },
4242
+ {
4243
+ "epoch": 0.5920826161790017,
4244
+ "grad_norm": 1.5808863639831543,
4245
+ "learning_rate": 2.575723777596213e-05,
4246
+ "loss": 1.1458,
4247
+ "step": 602
4248
+ },
4249
+ {
4250
+ "epoch": 0.5930661421194984,
4251
+ "grad_norm": 1.5608521699905396,
4252
+ "learning_rate": 2.524555539290696e-05,
4253
+ "loss": 1.049,
4254
+ "step": 603
4255
+ },
4256
+ {
4257
+ "epoch": 0.594049668059995,
4258
+ "grad_norm": 1.6394426822662354,
4259
+ "learning_rate": 2.473866399122733e-05,
4260
+ "loss": 1.0653,
4261
+ "step": 604
4262
+ },
4263
+ {
4264
+ "epoch": 0.5950331940004918,
4265
+ "grad_norm": 1.7107138633728027,
4266
+ "learning_rate": 2.4236577467606946e-05,
4267
+ "loss": 1.1567,
4268
+ "step": 605
4269
+ },
4270
+ {
4271
+ "epoch": 0.5960167199409885,
4272
+ "grad_norm": 1.5471723079681396,
4273
+ "learning_rate": 2.3739309587001567e-05,
4274
+ "loss": 1.0468,
4275
+ "step": 606
4276
+ },
4277
+ {
4278
+ "epoch": 0.5970002458814851,
4279
+ "grad_norm": 1.5568169355392456,
4280
+ "learning_rate": 2.324687398226131e-05,
4281
+ "loss": 1.0912,
4282
+ "step": 607
4283
+ },
4284
+ {
4285
+ "epoch": 0.5979837718219818,
4286
+ "grad_norm": 1.602166771888733,
4287
+ "learning_rate": 2.2759284153757053e-05,
4288
+ "loss": 1.1232,
4289
+ "step": 608
4290
+ },
4291
+ {
4292
+ "epoch": 0.5989672977624785,
4293
+ "grad_norm": 1.6254791021347046,
4294
+ "learning_rate": 2.2276553469010208e-05,
4295
+ "loss": 1.0962,
4296
+ "step": 609
4297
+ },
4298
+ {
4299
+ "epoch": 0.5999508237029751,
4300
+ "grad_norm": 1.5922654867172241,
4301
+ "learning_rate": 2.1798695162326442e-05,
4302
+ "loss": 1.0786,
4303
+ "step": 610
4304
+ },
4305
+ {
4306
+ "epoch": 0.6009343496434718,
4307
+ "grad_norm": 1.5703258514404297,
4308
+ "learning_rate": 2.1325722334432485e-05,
4309
+ "loss": 1.0496,
4310
+ "step": 611
4311
+ },
4312
+ {
4313
+ "epoch": 0.6019178755839685,
4314
+ "grad_norm": 1.5971964597702026,
4315
+ "learning_rate": 2.085764795211742e-05,
4316
+ "loss": 1.0631,
4317
+ "step": 612
4318
+ },
4319
+ {
4320
+ "epoch": 0.6029014015244653,
4321
+ "grad_norm": 1.6196777820587158,
4322
+ "learning_rate": 2.0394484847876892e-05,
4323
+ "loss": 1.1323,
4324
+ "step": 613
4325
+ },
4326
+ {
4327
+ "epoch": 0.6038849274649619,
4328
+ "grad_norm": 1.5888458490371704,
4329
+ "learning_rate": 1.9936245719561296e-05,
4330
+ "loss": 1.0985,
4331
+ "step": 614
4332
+ },
4333
+ {
4334
+ "epoch": 0.6048684534054586,
4335
+ "grad_norm": 1.6777266263961792,
4336
+ "learning_rate": 1.948294313002792e-05,
4337
+ "loss": 1.1057,
4338
+ "step": 615
4339
+ },
4340
+ {
4341
+ "epoch": 0.6058519793459552,
4342
+ "grad_norm": 1.5191071033477783,
4343
+ "learning_rate": 1.903458950679613e-05,
4344
+ "loss": 1.0123,
4345
+ "step": 616
4346
+ },
4347
+ {
4348
+ "epoch": 0.6068355052864519,
4349
+ "grad_norm": 1.5552016496658325,
4350
+ "learning_rate": 1.8591197141707027e-05,
4351
+ "loss": 1.0797,
4352
+ "step": 617
4353
+ },
4354
+ {
4355
+ "epoch": 0.6078190312269486,
4356
+ "grad_norm": 1.5961596965789795,
4357
+ "learning_rate": 1.8152778190586296e-05,
4358
+ "loss": 1.1184,
4359
+ "step": 618
4360
+ },
4361
+ {
4362
+ "epoch": 0.6088025571674452,
4363
+ "grad_norm": 1.5839762687683105,
4364
+ "learning_rate": 1.771934467291094e-05,
4365
+ "loss": 1.1104,
4366
+ "step": 619
4367
+ },
4368
+ {
4369
+ "epoch": 0.609786083107942,
4370
+ "grad_norm": 1.589618444442749,
4371
+ "learning_rate": 1.7290908471479805e-05,
4372
+ "loss": 1.1358,
4373
+ "step": 620
4374
+ },
4375
+ {
4376
+ "epoch": 0.6107696090484387,
4377
+ "grad_norm": 1.6342886686325073,
4378
+ "learning_rate": 1.6867481332087797e-05,
4379
+ "loss": 1.0282,
4380
+ "step": 621
4381
+ },
4382
+ {
4383
+ "epoch": 0.6117531349889354,
4384
+ "grad_norm": 1.5776050090789795,
4385
+ "learning_rate": 1.6449074863203772e-05,
4386
+ "loss": 1.0728,
4387
+ "step": 622
4388
+ },
4389
+ {
4390
+ "epoch": 0.612736660929432,
4391
+ "grad_norm": 1.6011443138122559,
4392
+ "learning_rate": 1.6035700535652465e-05,
4393
+ "loss": 1.1404,
4394
+ "step": 623
4395
+ },
4396
+ {
4397
+ "epoch": 0.6137201868699287,
4398
+ "grad_norm": 1.5918643474578857,
4399
+ "learning_rate": 1.562736968229992e-05,
4400
+ "loss": 1.0628,
4401
+ "step": 624
4402
+ },
4403
+ {
4404
+ "epoch": 0.6147037128104254,
4405
+ "grad_norm": 1.5177937746047974,
4406
+ "learning_rate": 1.5224093497742653e-05,
4407
+ "loss": 1.0399,
4408
+ "step": 625
4409
+ },
4410
+ {
4411
+ "epoch": 0.615687238750922,
4412
+ "grad_norm": 1.622794508934021,
4413
+ "learning_rate": 1.4825883038001054e-05,
4414
+ "loss": 1.1013,
4415
+ "step": 626
4416
+ },
4417
+ {
4418
+ "epoch": 0.6166707646914187,
4419
+ "grad_norm": 1.5753284692764282,
4420
+ "learning_rate": 1.4432749220216024e-05,
4421
+ "loss": 1.071,
4422
+ "step": 627
4423
+ },
4424
+ {
4425
+ "epoch": 0.6176542906319155,
4426
+ "grad_norm": 1.6217107772827148,
4427
+ "learning_rate": 1.4044702822349731e-05,
4428
+ "loss": 1.0002,
4429
+ "step": 628
4430
+ },
4431
+ {
4432
+ "epoch": 0.6186378165724121,
4433
+ "grad_norm": 1.5546847581863403,
4434
+ "learning_rate": 1.3661754482890222e-05,
4435
+ "loss": 1.0251,
4436
+ "step": 629
4437
+ },
4438
+ {
4439
+ "epoch": 0.6196213425129088,
4440
+ "grad_norm": 1.6065832376480103,
4441
+ "learning_rate": 1.3283914700559675e-05,
4442
+ "loss": 1.1277,
4443
+ "step": 630
4444
+ },
4445
+ {
4446
+ "epoch": 0.6206048684534055,
4447
+ "grad_norm": 1.582098364830017,
4448
+ "learning_rate": 1.2911193834026546e-05,
4449
+ "loss": 1.0606,
4450
+ "step": 631
4451
+ },
4452
+ {
4453
+ "epoch": 0.6215883943939021,
4454
+ "grad_norm": 1.5727086067199707,
4455
+ "learning_rate": 1.2543602101621709e-05,
4456
+ "loss": 1.0722,
4457
+ "step": 632
4458
+ },
4459
+ {
4460
+ "epoch": 0.6225719203343988,
4461
+ "grad_norm": 1.5591822862625122,
4462
+ "learning_rate": 1.2181149581058181e-05,
4463
+ "loss": 1.0718,
4464
+ "step": 633
4465
+ },
4466
+ {
4467
+ "epoch": 0.6235554462748955,
4468
+ "grad_norm": 1.5548235177993774,
4469
+ "learning_rate": 1.182384620915491e-05,
4470
+ "loss": 1.0455,
4471
+ "step": 634
4472
+ },
4473
+ {
4474
+ "epoch": 0.6245389722153922,
4475
+ "grad_norm": 1.5538259744644165,
4476
+ "learning_rate": 1.1471701781564314e-05,
4477
+ "loss": 1.0569,
4478
+ "step": 635
4479
+ },
4480
+ {
4481
+ "epoch": 0.6255224981558889,
4482
+ "grad_norm": 1.5374336242675781,
4483
+ "learning_rate": 1.1124725952503801e-05,
4484
+ "loss": 1.0752,
4485
+ "step": 636
4486
+ },
4487
+ {
4488
+ "epoch": 0.6265060240963856,
4489
+ "grad_norm": 1.590369462966919,
4490
+ "learning_rate": 1.078292823449094e-05,
4491
+ "loss": 1.0142,
4492
+ "step": 637
4493
+ },
4494
+ {
4495
+ "epoch": 0.6274895500368822,
4496
+ "grad_norm": 1.5979132652282715,
4497
+ "learning_rate": 1.0446317998082888e-05,
4498
+ "loss": 1.072,
4499
+ "step": 638
4500
+ },
4501
+ {
4502
+ "epoch": 0.6284730759773789,
4503
+ "grad_norm": 1.5855075120925903,
4504
+ "learning_rate": 1.0114904471619247e-05,
4505
+ "loss": 1.0809,
4506
+ "step": 639
4507
+ },
4508
+ {
4509
+ "epoch": 0.6294566019178756,
4510
+ "grad_norm": 1.6333812475204468,
4511
+ "learning_rate": 9.788696740969295e-06,
4512
+ "loss": 1.1021,
4513
+ "step": 640
4514
+ },
4515
+ {
4516
+ "epoch": 0.6304401278583722,
4517
+ "grad_norm": 1.646070957183838,
4518
+ "learning_rate": 9.467703749282764e-06,
4519
+ "loss": 1.0921,
4520
+ "step": 641
4521
+ },
4522
+ {
4523
+ "epoch": 0.631423653798869,
4524
+ "grad_norm": 1.5639617443084717,
4525
+ "learning_rate": 9.151934296744635e-06,
4526
+ "loss": 1.052,
4527
+ "step": 642
4528
+ },
4529
+ {
4530
+ "epoch": 0.6324071797393657,
4531
+ "grad_norm": 1.6541653871536255,
4532
+ "learning_rate": 8.841397040333976e-06,
4533
+ "loss": 1.0609,
4534
+ "step": 643
4535
+ },
4536
+ {
4537
+ "epoch": 0.6333907056798623,
4538
+ "grad_norm": 1.5831325054168701,
4539
+ "learning_rate": 8.536100493586552e-06,
4540
+ "loss": 1.1326,
4541
+ "step": 644
4542
+ },
4543
+ {
4544
+ "epoch": 0.634374231620359,
4545
+ "grad_norm": 1.496453046798706,
4546
+ "learning_rate": 8.23605302636139e-06,
4547
+ "loss": 1.0269,
4548
+ "step": 645
4549
+ },
4550
+ {
4551
+ "epoch": 0.6353577575608557,
4552
+ "grad_norm": 1.6001390218734741,
4553
+ "learning_rate": 7.941262864611387e-06,
4554
+ "loss": 1.0708,
4555
+ "step": 646
4556
+ },
4557
+ {
4558
+ "epoch": 0.6363412835013523,
4559
+ "grad_norm": 1.6304258108139038,
4560
+ "learning_rate": 7.651738090157733e-06,
4561
+ "loss": 1.0645,
4562
+ "step": 647
4563
+ },
4564
+ {
4565
+ "epoch": 0.637324809441849,
4566
+ "grad_norm": 1.5697929859161377,
4567
+ "learning_rate": 7.36748664046838e-06,
4568
+ "loss": 1.0812,
4569
+ "step": 648
4570
+ },
4571
+ {
4572
+ "epoch": 0.6383083353823457,
4573
+ "grad_norm": 1.6335376501083374,
4574
+ "learning_rate": 7.0885163084403846e-06,
4575
+ "loss": 1.0871,
4576
+ "step": 649
4577
+ },
4578
+ {
4579
+ "epoch": 0.6392918613228424,
4580
+ "grad_norm": 1.6851428747177124,
4581
+ "learning_rate": 6.81483474218636e-06,
4582
+ "loss": 1.066,
4583
+ "step": 650
4584
+ },
4585
+ {
4586
+ "epoch": 0.6402753872633391,
4587
+ "grad_norm": 1.5803459882736206,
4588
+ "learning_rate": 6.546449444824654e-06,
4589
+ "loss": 1.0518,
4590
+ "step": 651
4591
+ },
4592
+ {
4593
+ "epoch": 0.6412589132038358,
4594
+ "grad_norm": 1.6332086324691772,
4595
+ "learning_rate": 6.283367774273785e-06,
4596
+ "loss": 1.0243,
4597
+ "step": 652
4598
+ },
4599
+ {
4600
+ "epoch": 0.6422424391443324,
4601
+ "grad_norm": 1.5875962972640991,
4602
+ "learning_rate": 6.025596943050648e-06,
4603
+ "loss": 1.0768,
4604
+ "step": 653
4605
+ },
4606
+ {
4607
+ "epoch": 0.6432259650848291,
4608
+ "grad_norm": 1.648381233215332,
4609
+ "learning_rate": 5.773144018072807e-06,
4610
+ "loss": 1.1158,
4611
+ "step": 654
4612
+ },
4613
+ {
4614
+ "epoch": 0.6442094910253258,
4615
+ "grad_norm": 1.6049330234527588,
4616
+ "learning_rate": 5.5260159204646885e-06,
4617
+ "loss": 1.1665,
4618
+ "step": 655
4619
+ },
4620
+ {
4621
+ "epoch": 0.6451930169658224,
4622
+ "grad_norm": 1.5462381839752197,
4623
+ "learning_rate": 5.2842194253679424e-06,
4624
+ "loss": 1.0788,
4625
+ "step": 656
4626
+ },
4627
+ {
4628
+ "epoch": 0.6461765429063192,
4629
+ "grad_norm": 1.664047122001648,
4630
+ "learning_rate": 5.0477611617556485e-06,
4631
+ "loss": 1.156,
4632
+ "step": 657
4633
+ },
4634
+ {
4635
+ "epoch": 0.6471600688468159,
4636
+ "grad_norm": 1.7467290163040161,
4637
+ "learning_rate": 4.8166476122505135e-06,
4638
+ "loss": 1.1192,
4639
+ "step": 658
4640
+ },
4641
+ {
4642
+ "epoch": 0.6481435947873125,
4643
+ "grad_norm": 1.7353417873382568,
4644
+ "learning_rate": 4.590885112947274e-06,
4645
+ "loss": 1.0867,
4646
+ "step": 659
4647
+ },
4648
+ {
4649
+ "epoch": 0.6491271207278092,
4650
+ "grad_norm": 1.587186336517334,
4651
+ "learning_rate": 4.370479853238863e-06,
4652
+ "loss": 1.055,
4653
+ "step": 660
4654
+ },
4655
+ {
4656
+ "epoch": 0.6501106466683059,
4657
+ "grad_norm": 1.553141474723816,
4658
+ "learning_rate": 4.155437875646829e-06,
4659
+ "loss": 1.0706,
4660
+ "step": 661
4661
+ },
4662
+ {
4663
+ "epoch": 0.6510941726088025,
4664
+ "grad_norm": 1.5985965728759766,
4665
+ "learning_rate": 3.945765075655649e-06,
4666
+ "loss": 1.0794,
4667
+ "step": 662
4668
+ },
4669
+ {
4670
+ "epoch": 0.6520776985492992,
4671
+ "grad_norm": 1.505215048789978,
4672
+ "learning_rate": 3.7414672015509746e-06,
4673
+ "loss": 1.0067,
4674
+ "step": 663
4675
+ },
4676
+ {
4677
+ "epoch": 0.6530612244897959,
4678
+ "grad_norm": 1.575181484222412,
4679
+ "learning_rate": 3.542549854262278e-06,
4680
+ "loss": 1.0166,
4681
+ "step": 664
4682
+ },
4683
+ {
4684
+ "epoch": 0.6540447504302926,
4685
+ "grad_norm": 1.6103475093841553,
4686
+ "learning_rate": 3.349018487209099e-06,
4687
+ "loss": 1.1203,
4688
+ "step": 665
4689
+ },
4690
+ {
4691
+ "epoch": 0.6550282763707893,
4692
+ "grad_norm": 1.6004595756530762,
4693
+ "learning_rate": 3.160878406151624e-06,
4694
+ "loss": 1.0892,
4695
+ "step": 666
4696
+ },
4697
+ {
4698
+ "epoch": 0.656011802311286,
4699
+ "grad_norm": 1.5086390972137451,
4700
+ "learning_rate": 2.9781347690452266e-06,
4701
+ "loss": 1.051,
4702
+ "step": 667
4703
+ },
4704
+ {
4705
+ "epoch": 0.6569953282517826,
4706
+ "grad_norm": 1.570785403251648,
4707
+ "learning_rate": 2.800792585899026e-06,
4708
+ "loss": 0.9648,
4709
+ "step": 668
4710
+ },
4711
+ {
4712
+ "epoch": 0.6579788541922793,
4713
+ "grad_norm": 1.54836905002594,
4714
+ "learning_rate": 2.6288567186385505e-06,
4715
+ "loss": 1.0549,
4716
+ "step": 669
4717
+ },
4718
+ {
4719
+ "epoch": 0.658962380132776,
4720
+ "grad_norm": 1.660688042640686,
4721
+ "learning_rate": 2.462331880972468e-06,
4722
+ "loss": 1.0269,
4723
+ "step": 670
4724
+ },
4725
+ {
4726
+ "epoch": 0.6599459060732726,
4727
+ "grad_norm": 1.5790220499038696,
4728
+ "learning_rate": 2.3012226382632894e-06,
4729
+ "loss": 1.0473,
4730
+ "step": 671
4731
+ },
4732
+ {
4733
+ "epoch": 0.6609294320137694,
4734
+ "grad_norm": 1.6120561361312866,
4735
+ "learning_rate": 2.1455334074023337e-06,
4736
+ "loss": 1.1179,
4737
+ "step": 672
4738
+ },
4739
+ {
4740
+ "epoch": 0.6619129579542661,
4741
+ "grad_norm": 1.5404844284057617,
4742
+ "learning_rate": 1.9952684566884926e-06,
4743
+ "loss": 1.0333,
4744
+ "step": 673
4745
+ },
4746
+ {
4747
+ "epoch": 0.6628964838947627,
4748
+ "grad_norm": 1.5952411890029907,
4749
+ "learning_rate": 1.8504319057112806e-06,
4750
+ "loss": 1.0023,
4751
+ "step": 674
4752
+ },
4753
+ {
4754
+ "epoch": 0.6638800098352594,
4755
+ "grad_norm": 1.549829363822937,
4756
+ "learning_rate": 1.7110277252379236e-06,
4757
+ "loss": 1.0127,
4758
+ "step": 675
4759
+ },
4760
+ {
4761
+ "epoch": 0.6648635357757561,
4762
+ "grad_norm": 1.5770440101623535,
4763
+ "learning_rate": 1.577059737104447e-06,
4764
+ "loss": 1.0096,
4765
+ "step": 676
4766
+ },
4767
+ {
4768
+ "epoch": 0.6658470617162527,
4769
+ "grad_norm": 1.625446081161499,
4770
+ "learning_rate": 1.4485316141108928e-06,
4771
+ "loss": 1.0445,
4772
+ "step": 677
4773
+ },
4774
+ {
4775
+ "epoch": 0.6668305876567494,
4776
+ "grad_norm": 1.6090573072433472,
4777
+ "learning_rate": 1.325446879920711e-06,
4778
+ "loss": 1.0989,
4779
+ "step": 678
4780
+ },
4781
+ {
4782
+ "epoch": 0.6678141135972461,
4783
+ "grad_norm": 1.5554827451705933,
4784
+ "learning_rate": 1.2078089089640809e-06,
4785
+ "loss": 1.022,
4786
+ "step": 679
4787
+ },
4788
+ {
4789
+ "epoch": 0.6687976395377428,
4790
+ "grad_norm": 1.5727711915969849,
4791
+ "learning_rate": 1.0956209263453421e-06,
4792
+ "loss": 1.0687,
4793
+ "step": 680
4794
+ },
4795
+ {
4796
+ "epoch": 0.6697811654782395,
4797
+ "grad_norm": 1.590895175933838,
4798
+ "learning_rate": 9.888860077547524e-07,
4799
+ "loss": 1.1218,
4800
+ "step": 681
4801
+ },
4802
+ {
4803
+ "epoch": 0.6707646914187362,
4804
+ "grad_norm": 1.6273709535598755,
4805
+ "learning_rate": 8.876070793840008e-07,
4806
+ "loss": 1.0526,
4807
+ "step": 682
4808
+ },
4809
+ {
4810
+ "epoch": 0.6717482173592328,
4811
+ "grad_norm": 1.5715453624725342,
4812
+ "learning_rate": 7.917869178460934e-07,
4813
+ "loss": 1.0828,
4814
+ "step": 683
4815
+ },
4816
+ {
4817
+ "epoch": 0.6727317432997295,
4818
+ "grad_norm": 1.5626118183135986,
4819
+ "learning_rate": 7.01428150099126e-07,
4820
+ "loss": 1.1077,
4821
+ "step": 684
4822
+ },
4823
+ {
4824
+ "epoch": 0.6737152692402262,
4825
+ "grad_norm": 1.5551321506500244,
4826
+ "learning_rate": 6.165332533744073e-07,
4827
+ "loss": 1.0527,
4828
+ "step": 685
4829
+ },
4830
+ {
4831
+ "epoch": 0.6746987951807228,
4832
+ "grad_norm": 1.6907010078430176,
4833
+ "learning_rate": 5.371045551083808e-07,
4834
+ "loss": 1.0607,
4835
+ "step": 686
4836
+ },
4837
+ {
4838
+ "epoch": 0.6756823211212196,
4839
+ "grad_norm": 1.5568333864212036,
4840
+ "learning_rate": 4.631442328789426e-07,
4841
+ "loss": 0.9964,
4842
+ "step": 687
4843
+ },
4844
+ {
4845
+ "epoch": 0.6766658470617163,
4846
+ "grad_norm": 1.6289827823638916,
4847
+ "learning_rate": 3.946543143456882e-07,
4848
+ "loss": 1.0931,
4849
+ "step": 688
4850
+ },
4851
+ {
4852
+ "epoch": 0.677649373002213,
4853
+ "grad_norm": 1.5730174779891968,
4854
+ "learning_rate": 3.3163667719433576e-07,
4855
+ "loss": 1.0916,
4856
+ "step": 689
4857
+ },
4858
+ {
4859
+ "epoch": 0.6786328989427096,
4860
+ "grad_norm": 1.5842548608779907,
4861
+ "learning_rate": 2.7409304908523336e-07,
4862
+ "loss": 1.051,
4863
+ "step": 690
4864
+ },
4865
+ {
4866
+ "epoch": 0.6796164248832063,
4867
+ "grad_norm": 1.617651343345642,
4868
+ "learning_rate": 2.220250076060193e-07,
4869
+ "loss": 1.0332,
4870
+ "step": 691
4871
+ },
4872
+ {
4873
+ "epoch": 0.680599950823703,
4874
+ "grad_norm": 1.6278129816055298,
4875
+ "learning_rate": 1.7543398022832336e-07,
4876
+ "loss": 1.1475,
4877
+ "step": 692
4878
+ },
4879
+ {
4880
+ "epoch": 0.6815834767641996,
4881
+ "grad_norm": 1.5694608688354492,
4882
+ "learning_rate": 1.343212442687536e-07,
4883
+ "loss": 1.0138,
4884
+ "step": 693
4885
+ },
4886
+ {
4887
+ "epoch": 0.6825670027046964,
4888
+ "grad_norm": 1.5981290340423584,
4889
+ "learning_rate": 9.868792685368e-08,
4890
+ "loss": 0.9832,
4891
+ "step": 694
4892
+ },
4893
+ {
4894
+ "epoch": 0.6835505286451931,
4895
+ "grad_norm": 1.5327750444412231,
4896
+ "learning_rate": 6.853500488854803e-08,
4897
+ "loss": 1.0314,
4898
+ "step": 695
4899
+ },
4900
+ {
4901
+ "epoch": 0.6845340545856897,
4902
+ "grad_norm": 1.5427277088165283,
4903
+ "learning_rate": 4.386330503090008e-08,
4904
+ "loss": 1.0459,
4905
+ "step": 696
4906
+ },
4907
+ {
4908
+ "epoch": 0.6855175805261864,
4909
+ "grad_norm": 1.5548511743545532,
4910
+ "learning_rate": 2.4673503667882458e-08,
4911
+ "loss": 1.0574,
4912
+ "step": 697
4913
+ },
4914
+ {
4915
+ "epoch": 0.686501106466683,
4916
+ "grad_norm": 1.5951749086380005,
4917
+ "learning_rate": 1.0966126897571372e-08,
4918
+ "loss": 1.1301,
4919
+ "step": 698
4920
+ },
4921
+ {
4922
+ "epoch": 0.6874846324071797,
4923
+ "grad_norm": 1.5737613439559937,
4924
+ "learning_rate": 2.741550514651081e-09,
4925
+ "loss": 1.0478,
4926
+ "step": 699
4927
+ },
4928
+ {
4929
+ "epoch": 0.6884681583476764,
4930
+ "grad_norm": 1.5692044496536255,
4931
+ "learning_rate": 0.0,
4932
+ "loss": 1.0708,
4933
+ "step": 700
4934
  }
4935
  ],
4936
  "logging_steps": 1,
 
4954
  "should_evaluate": false,
4955
  "should_log": false,
4956
  "should_save": true,
4957
+ "should_training_stop": true
4958
  },
4959
  "attributes": {}
4960
  }
4961
  },
4962
+ "total_flos": 8.438718091440947e+16,
4963
  "train_batch_size": 1,
4964
  "trial_name": null,
4965
  "trial_params": null