souging commited on
Commit
ff7cff2
·
verified ·
1 Parent(s): e704775

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffb3465e71c5bee6f4f5e5da64c699f608f6ead07de1d80f1a26a4a03cdc00e2
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e26503ee5864af07ce4030836db51a4668d97e466902ce26562a2ae8f4dbd1
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ded546b513548abb34555a93329577fda18469633eec4183162817a92eace5f
3
  size 102864868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f87dfd3983905fc6f45e08cd5447e3695907754f080349935e996e48fe7470
3
  size 102864868
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0630b80e6010b6167b3c914fb039d36d04348e90d73277d6b3ff304e362a145
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba9dafcc54ce9ff084db79366d7b940d91787c2cfb616ba91a4bb22cd048faed
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef947ecdd841018862402e346e260921abb32476179f2a411f99f254c6604aa9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f5dfab0aba1f4849abd9625392cb4af7139fd3842b09daef021f142b39cff71
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:586245ddea8f6eb4fff149afe2c5e522e979b38346ec14e3f918c49a7e94da83
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a4c7a3e3866946bfa6c6f5a1716563f36029ceb622e5804a6f396508c55ea3
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc8a51ee6b623fc916968b6c1e8129f92f357e485afa577c074aa455bfec2060
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b39933ef00f6fb7daa29790fb90e8ff14fa4ec97393154716671cac132e30db
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6db9f882e223faff71a3c209174f2d1892ff0b368d3fb3c61032e1410ac2df2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c37e29f749e6da69d82af0a38f01b394a754fce56fccbdf27902a5d585e43f
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6e5305a0174d753deeee81d904295e1599a3ce6201608b9ca6f559008066ff0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c091df1ce77bff431f3379561fab3489657fa2cde7f17e48d31b6b28bf660c
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47b774774389bd6b7d6435db504cd44a21b9f7c3e21aaef3d0f596a0992fdb6a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e3de474f59434cf28a8c41406b1e8bd00ec6293d50ff81f9c75db3e70e91e0
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8e3d214fa721bc54b61ba46b56b89c17d23565620f7ba5b29da43974d292d8c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca6a786fe6d9b25cb7742ed56fbbd3f68052ae10454dcdbf494e787b2682812
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39b0eb8ba61ee085f253937de30f38210900965eda8d8a5143a3bccb3144a58d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ac31dcfba988823188c44099086577dd09e162577217892c3f4fd5ad8489c2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9996076892899176,
5
  "eval_steps": 500,
6
- "global_step": 637,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4466,6 +4466,447 @@
4466
  "learning_rate": 5.445378758417925e-06,
4467
  "loss": 1.063,
4468
  "step": 637
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4469
  }
4470
  ],
4471
  "logging_steps": 1,
@@ -4480,12 +4921,12 @@
4480
  "should_evaluate": false,
4481
  "should_log": false,
4482
  "should_save": true,
4483
- "should_training_stop": false
4484
  },
4485
  "attributes": {}
4486
  }
4487
  },
4488
- "total_flos": 2.2983334799867904e+17,
4489
  "train_batch_size": 4,
4490
  "trial_name": null,
4491
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0984699882306788,
5
  "eval_steps": 500,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4466
  "learning_rate": 5.445378758417925e-06,
4467
  "loss": 1.063,
4468
  "step": 637
4469
+ },
4470
+ {
4471
+ "epoch": 1.001176932130247,
4472
+ "grad_norm": 3.3213553428649902,
4473
+ "learning_rate": 5.275390589031859e-06,
4474
+ "loss": 1.7375,
4475
+ "step": 638
4476
+ },
4477
+ {
4478
+ "epoch": 1.0027461749705766,
4479
+ "grad_norm": 1.4553793668746948,
4480
+ "learning_rate": 5.108026758167719e-06,
4481
+ "loss": 0.9496,
4482
+ "step": 639
4483
+ },
4484
+ {
4485
+ "epoch": 1.0043154178109062,
4486
+ "grad_norm": 1.3991262912750244,
4487
+ "learning_rate": 4.943291854189493e-06,
4488
+ "loss": 0.9158,
4489
+ "step": 640
4490
+ },
4491
+ {
4492
+ "epoch": 1.0058846606512357,
4493
+ "grad_norm": 1.5718517303466797,
4494
+ "learning_rate": 4.781190393387796e-06,
4495
+ "loss": 1.032,
4496
+ "step": 641
4497
+ },
4498
+ {
4499
+ "epoch": 1.0074539034915653,
4500
+ "grad_norm": 1.546706199645996,
4501
+ "learning_rate": 4.6217268198560404e-06,
4502
+ "loss": 0.9635,
4503
+ "step": 642
4504
+ },
4505
+ {
4506
+ "epoch": 1.0090231463318948,
4507
+ "grad_norm": 1.4677547216415405,
4508
+ "learning_rate": 4.464905505368658e-06,
4509
+ "loss": 0.9359,
4510
+ "step": 643
4511
+ },
4512
+ {
4513
+ "epoch": 1.0105923891722244,
4514
+ "grad_norm": 1.4244987964630127,
4515
+ "learning_rate": 4.3107307492612086e-06,
4516
+ "loss": 0.8894,
4517
+ "step": 644
4518
+ },
4519
+ {
4520
+ "epoch": 1.012161632012554,
4521
+ "grad_norm": 1.470013976097107,
4522
+ "learning_rate": 4.1592067783125015e-06,
4523
+ "loss": 1.0618,
4524
+ "step": 645
4525
+ },
4526
+ {
4527
+ "epoch": 1.0137308748528835,
4528
+ "grad_norm": 1.5071467161178589,
4529
+ "learning_rate": 4.010337746628751e-06,
4530
+ "loss": 0.9016,
4531
+ "step": 646
4532
+ },
4533
+ {
4534
+ "epoch": 1.015300117693213,
4535
+ "grad_norm": 1.406595230102539,
4536
+ "learning_rate": 3.864127735529656e-06,
4537
+ "loss": 0.9207,
4538
+ "step": 647
4539
+ },
4540
+ {
4541
+ "epoch": 1.0168693605335426,
4542
+ "grad_norm": 1.355204701423645,
4543
+ "learning_rate": 3.7205807534365315e-06,
4544
+ "loss": 0.9483,
4545
+ "step": 648
4546
+ },
4547
+ {
4548
+ "epoch": 1.0184386033738722,
4549
+ "grad_norm": 1.5814651250839233,
4550
+ "learning_rate": 3.5797007357623945e-06,
4551
+ "loss": 1.0056,
4552
+ "step": 649
4553
+ },
4554
+ {
4555
+ "epoch": 1.0200078462142017,
4556
+ "grad_norm": 1.5514806509017944,
4557
+ "learning_rate": 3.441491544804112e-06,
4558
+ "loss": 0.9282,
4559
+ "step": 650
4560
+ },
4561
+ {
4562
+ "epoch": 1.0215770890545313,
4563
+ "grad_norm": 1.3481324911117554,
4564
+ "learning_rate": 3.3059569696364502e-06,
4565
+ "loss": 0.853,
4566
+ "step": 651
4567
+ },
4568
+ {
4569
+ "epoch": 1.0231463318948608,
4570
+ "grad_norm": 1.4449180364608765,
4571
+ "learning_rate": 3.1731007260082616e-06,
4572
+ "loss": 0.9905,
4573
+ "step": 652
4574
+ },
4575
+ {
4576
+ "epoch": 1.0247155747351904,
4577
+ "grad_norm": 1.6003049612045288,
4578
+ "learning_rate": 3.0429264562405776e-06,
4579
+ "loss": 0.9784,
4580
+ "step": 653
4581
+ },
4582
+ {
4583
+ "epoch": 1.0262848175755197,
4584
+ "grad_norm": 1.501465916633606,
4585
+ "learning_rate": 2.9154377291267674e-06,
4586
+ "loss": 1.0185,
4587
+ "step": 654
4588
+ },
4589
+ {
4590
+ "epoch": 1.0278540604158493,
4591
+ "grad_norm": 1.373434066772461,
4592
+ "learning_rate": 2.790638039834668e-06,
4593
+ "loss": 0.9125,
4594
+ "step": 655
4595
+ },
4596
+ {
4597
+ "epoch": 1.0294233032561788,
4598
+ "grad_norm": 1.6120808124542236,
4599
+ "learning_rate": 2.6685308098108106e-06,
4600
+ "loss": 1.0961,
4601
+ "step": 656
4602
+ },
4603
+ {
4604
+ "epoch": 1.0309925460965084,
4605
+ "grad_norm": 1.455134630203247,
4606
+ "learning_rate": 2.5491193866866025e-06,
4607
+ "loss": 0.8963,
4608
+ "step": 657
4609
+ },
4610
+ {
4611
+ "epoch": 1.032561788936838,
4612
+ "grad_norm": 1.4574368000030518,
4613
+ "learning_rate": 2.432407044186509e-06,
4614
+ "loss": 0.9296,
4615
+ "step": 658
4616
+ },
4617
+ {
4618
+ "epoch": 1.0341310317771675,
4619
+ "grad_norm": 1.5060312747955322,
4620
+ "learning_rate": 2.3183969820383735e-06,
4621
+ "loss": 0.9674,
4622
+ "step": 659
4623
+ },
4624
+ {
4625
+ "epoch": 1.035700274617497,
4626
+ "grad_norm": 1.544988751411438,
4627
+ "learning_rate": 2.2070923258856255e-06,
4628
+ "loss": 0.9882,
4629
+ "step": 660
4630
+ },
4631
+ {
4632
+ "epoch": 1.0372695174578266,
4633
+ "grad_norm": 1.4960849285125732,
4634
+ "learning_rate": 2.098496127201648e-06,
4635
+ "loss": 0.966,
4636
+ "step": 661
4637
+ },
4638
+ {
4639
+ "epoch": 1.0388387602981561,
4640
+ "grad_norm": 1.5136550664901733,
4641
+ "learning_rate": 1.992611363206103e-06,
4642
+ "loss": 0.9574,
4643
+ "step": 662
4644
+ },
4645
+ {
4646
+ "epoch": 1.0404080031384857,
4647
+ "grad_norm": 1.4802120923995972,
4648
+ "learning_rate": 1.889440936783242e-06,
4649
+ "loss": 0.8649,
4650
+ "step": 663
4651
+ },
4652
+ {
4653
+ "epoch": 1.0419772459788152,
4654
+ "grad_norm": 1.5333938598632812,
4655
+ "learning_rate": 1.7889876764024505e-06,
4656
+ "loss": 1.0243,
4657
+ "step": 664
4658
+ },
4659
+ {
4660
+ "epoch": 1.0435464888191448,
4661
+ "grad_norm": 1.5968855619430542,
4662
+ "learning_rate": 1.691254336040595e-06,
4663
+ "loss": 0.9513,
4664
+ "step": 665
4665
+ },
4666
+ {
4667
+ "epoch": 1.0451157316594744,
4668
+ "grad_norm": 1.512230396270752,
4669
+ "learning_rate": 1.59624359510657e-06,
4670
+ "loss": 1.037,
4671
+ "step": 666
4672
+ },
4673
+ {
4674
+ "epoch": 1.046684974499804,
4675
+ "grad_norm": 1.5507651567459106,
4676
+ "learning_rate": 1.5039580583678393e-06,
4677
+ "loss": 0.9005,
4678
+ "step": 667
4679
+ },
4680
+ {
4681
+ "epoch": 1.0482542173401335,
4682
+ "grad_norm": 1.6249401569366455,
4683
+ "learning_rate": 1.414400255879008e-06,
4684
+ "loss": 0.9521,
4685
+ "step": 668
4686
+ },
4687
+ {
4688
+ "epoch": 1.049823460180463,
4689
+ "grad_norm": 1.5170681476593018,
4690
+ "learning_rate": 1.327572642912468e-06,
4691
+ "loss": 1.0033,
4692
+ "step": 669
4693
+ },
4694
+ {
4695
+ "epoch": 1.0513927030207926,
4696
+ "grad_norm": 1.505729079246521,
4697
+ "learning_rate": 1.2434775998910964e-06,
4698
+ "loss": 0.9384,
4699
+ "step": 670
4700
+ },
4701
+ {
4702
+ "epoch": 1.052961945861122,
4703
+ "grad_norm": 1.5404661893844604,
4704
+ "learning_rate": 1.1621174323229612e-06,
4705
+ "loss": 1.0742,
4706
+ "step": 671
4707
+ },
4708
+ {
4709
+ "epoch": 1.0545311887014515,
4710
+ "grad_norm": 1.6279956102371216,
4711
+ "learning_rate": 1.0834943707381784e-06,
4712
+ "loss": 0.8588,
4713
+ "step": 672
4714
+ },
4715
+ {
4716
+ "epoch": 1.056100431541781,
4717
+ "grad_norm": 1.5603866577148438,
4718
+ "learning_rate": 1.0076105706276888e-06,
4719
+ "loss": 1.0602,
4720
+ "step": 673
4721
+ },
4722
+ {
4723
+ "epoch": 1.0576696743821106,
4724
+ "grad_norm": 1.5433942079544067,
4725
+ "learning_rate": 9.344681123841967e-07,
4726
+ "loss": 0.9865,
4727
+ "step": 674
4728
+ },
4729
+ {
4730
+ "epoch": 1.0592389172224401,
4731
+ "grad_norm": 1.5365676879882812,
4732
+ "learning_rate": 8.640690012451515e-07,
4733
+ "loss": 0.967,
4734
+ "step": 675
4735
+ },
4736
+ {
4737
+ "epoch": 1.0608081600627697,
4738
+ "grad_norm": 1.5364326238632202,
4739
+ "learning_rate": 7.964151672377458e-07,
4740
+ "loss": 0.9036,
4741
+ "step": 676
4742
+ },
4743
+ {
4744
+ "epoch": 1.0623774029030992,
4745
+ "grad_norm": 1.4506745338439941,
4746
+ "learning_rate": 7.315084651260009e-07,
4747
+ "loss": 0.8917,
4748
+ "step": 677
4749
+ },
4750
+ {
4751
+ "epoch": 1.0639466457434288,
4752
+ "grad_norm": 1.42936372756958,
4753
+ "learning_rate": 6.69350674359959e-07,
4754
+ "loss": 0.8587,
4755
+ "step": 678
4756
+ },
4757
+ {
4758
+ "epoch": 1.0655158885837583,
4759
+ "grad_norm": 1.4997657537460327,
4760
+ "learning_rate": 6.099434990268609e-07,
4761
+ "loss": 1.0052,
4762
+ "step": 679
4763
+ },
4764
+ {
4765
+ "epoch": 1.067085131424088,
4766
+ "grad_norm": 1.4839539527893066,
4767
+ "learning_rate": 5.532885678043977e-07,
4768
+ "loss": 0.8442,
4769
+ "step": 680
4770
+ },
4771
+ {
4772
+ "epoch": 1.0686543742644175,
4773
+ "grad_norm": 1.5665620565414429,
4774
+ "learning_rate": 4.9938743391615e-07,
4775
+ "loss": 0.9402,
4776
+ "step": 681
4777
+ },
4778
+ {
4779
+ "epoch": 1.070223617104747,
4780
+ "grad_norm": 1.7468892335891724,
4781
+ "learning_rate": 4.482415750889204e-07,
4782
+ "loss": 0.9801,
4783
+ "step": 682
4784
+ },
4785
+ {
4786
+ "epoch": 1.0717928599450766,
4787
+ "grad_norm": 1.5470365285873413,
4788
+ "learning_rate": 3.998523935122772e-07,
4789
+ "loss": 0.9294,
4790
+ "step": 683
4791
+ },
4792
+ {
4793
+ "epoch": 1.0733621027854061,
4794
+ "grad_norm": 1.544100046157837,
4795
+ "learning_rate": 3.5422121580005864e-07,
4796
+ "loss": 0.9683,
4797
+ "step": 684
4798
+ },
4799
+ {
4800
+ "epoch": 1.0749313456257357,
4801
+ "grad_norm": 1.5746307373046875,
4802
+ "learning_rate": 3.1134929295407564e-07,
4803
+ "loss": 0.9611,
4804
+ "step": 685
4805
+ },
4806
+ {
4807
+ "epoch": 1.0765005884660652,
4808
+ "grad_norm": 1.612066388130188,
4809
+ "learning_rate": 2.7123780032973235e-07,
4810
+ "loss": 0.9273,
4811
+ "step": 686
4812
+ },
4813
+ {
4814
+ "epoch": 1.0780698313063946,
4815
+ "grad_norm": 1.5221633911132812,
4816
+ "learning_rate": 2.3388783760386601e-07,
4817
+ "loss": 0.9797,
4818
+ "step": 687
4819
+ },
4820
+ {
4821
+ "epoch": 1.079639074146724,
4822
+ "grad_norm": 1.4008647203445435,
4823
+ "learning_rate": 1.9930042874457254e-07,
4824
+ "loss": 0.9276,
4825
+ "step": 688
4826
+ },
4827
+ {
4828
+ "epoch": 1.0812083169870537,
4829
+ "grad_norm": 1.4528234004974365,
4830
+ "learning_rate": 1.6747652198313957e-07,
4831
+ "loss": 0.9828,
4832
+ "step": 689
4833
+ },
4834
+ {
4835
+ "epoch": 1.0827775598273832,
4836
+ "grad_norm": 1.5776127576828003,
4837
+ "learning_rate": 1.3841698978804285e-07,
4838
+ "loss": 0.9532,
4839
+ "step": 690
4840
+ },
4841
+ {
4842
+ "epoch": 1.0843468026677128,
4843
+ "grad_norm": 1.5421435832977295,
4844
+ "learning_rate": 1.1212262884103974e-07,
4845
+ "loss": 0.9135,
4846
+ "step": 691
4847
+ },
4848
+ {
4849
+ "epoch": 1.0859160455080423,
4850
+ "grad_norm": 1.5392076969146729,
4851
+ "learning_rate": 8.85941600153033e-08,
4852
+ "loss": 0.9884,
4853
+ "step": 692
4854
+ },
4855
+ {
4856
+ "epoch": 1.0874852883483719,
4857
+ "grad_norm": 1.4609785079956055,
4858
+ "learning_rate": 6.783222835572055e-08,
4859
+ "loss": 0.933,
4860
+ "step": 693
4861
+ },
4862
+ {
4863
+ "epoch": 1.0890545311887014,
4864
+ "grad_norm": 1.4286479949951172,
4865
+ "learning_rate": 4.98374030611084e-08,
4866
+ "loss": 0.8715,
4867
+ "step": 694
4868
+ },
4869
+ {
4870
+ "epoch": 1.090623774029031,
4871
+ "grad_norm": 1.6319226026535034,
4872
+ "learning_rate": 3.461017746871675e-08,
4873
+ "loss": 1.0219,
4874
+ "step": 695
4875
+ },
4876
+ {
4877
+ "epoch": 1.0921930168693605,
4878
+ "grad_norm": 1.5931543111801147,
4879
+ "learning_rate": 2.215096904060454e-08,
4880
+ "loss": 0.9608,
4881
+ "step": 696
4882
+ },
4883
+ {
4884
+ "epoch": 1.09376225970969,
4885
+ "grad_norm": 1.4178990125656128,
4886
+ "learning_rate": 1.246011935228064e-08,
4887
+ "loss": 0.8892,
4888
+ "step": 697
4889
+ },
4890
+ {
4891
+ "epoch": 1.0953315025500197,
4892
+ "grad_norm": 1.5912450551986694,
4893
+ "learning_rate": 5.537894083273543e-09,
4894
+ "loss": 0.9645,
4895
+ "step": 698
4896
+ },
4897
+ {
4898
+ "epoch": 1.0969007453903492,
4899
+ "grad_norm": 1.4889562129974365,
4900
+ "learning_rate": 1.384483009898796e-09,
4901
+ "loss": 0.87,
4902
+ "step": 699
4903
+ },
4904
+ {
4905
+ "epoch": 1.0984699882306788,
4906
+ "grad_norm": 1.4964066743850708,
4907
+ "learning_rate": 0.0,
4908
+ "loss": 0.9337,
4909
+ "step": 700
4910
  }
4911
  ],
4912
  "logging_steps": 1,
 
4921
  "should_evaluate": false,
4922
  "should_log": false,
4923
  "should_save": true,
4924
+ "should_training_stop": true
4925
  },
4926
  "attributes": {}
4927
  }
4928
  },
4929
+ "total_flos": 2.524580473166889e+17,
4930
  "train_batch_size": 4,
4931
  "trial_name": null,
4932
  "trial_params": null