JoshMe1 commited on
Commit
00be7cf
·
verified ·
1 Parent(s): 9ef5d23

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e89b67e25ff5149f8efc732ee9a0058d81c8cc5f34cfd4e49fb793320709e62f
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1584ae76418d0ad06d3a0c687bd6381d31c844d9d4c2bdcc22249561ad11caf
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c97204b0b27d87ebc2475c24d3687ba410b24996f874ce8c920278b1e768387d
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ead3318824789c653c56b77c6d4d0aeb208de2dae7abc1143a2c723725a45f
3
  size 341314644
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:758843d0c6c58ce132f2714b31cc2527e7f3cfcb04752d8d539e5d47664f0974
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe5027b0a60817e9531c9bc52773bc5fa697b42d7d4016dd1b080e3c99c4d80a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012537743832997252,
5
  "eval_steps": 500,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4507,6 +4507,756 @@
4507
  "rewards/margins": 5.479434967041016,
4508
  "rewards/rejected": -19.215713500976562,
4509
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4510
  }
4511
  ],
4512
  "logging_steps": 1,
@@ -4521,7 +5271,7 @@
4521
  "should_evaluate": false,
4522
  "should_log": false,
4523
  "should_save": true,
4524
- "should_training_stop": false
4525
  },
4526
  "attributes": {}
4527
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.01462736780516346,
5
  "eval_steps": 500,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4507
  "rewards/margins": 5.479434967041016,
4508
  "rewards/rejected": -19.215713500976562,
4509
  "step": 300
4510
+ },
4511
+ {
4512
+ "epoch": 0.012579536312440576,
4513
+ "grad_norm": 491.7016296386719,
4514
+ "learning_rate": 1.2049324765671749e-05,
4515
+ "logits/chosen": -3.287421703338623,
4516
+ "logits/rejected": -3.370901584625244,
4517
+ "logps/chosen": -385.67474365234375,
4518
+ "logps/rejected": -419.36627197265625,
4519
+ "loss": 3.6965,
4520
+ "rewards/accuracies": 0.625,
4521
+ "rewards/chosen": -13.180034637451172,
4522
+ "rewards/margins": 4.595239639282227,
4523
+ "rewards/rejected": -17.7752742767334,
4524
+ "step": 301
4525
+ },
4526
+ {
4527
+ "epoch": 0.0126213287918839,
4528
+ "grad_norm": 263.8130187988281,
4529
+ "learning_rate": 1.1604330125525079e-05,
4530
+ "logits/chosen": -3.4727532863616943,
4531
+ "logits/rejected": -3.349696159362793,
4532
+ "logps/chosen": -267.19024658203125,
4533
+ "logps/rejected": -271.441650390625,
4534
+ "loss": 1.8457,
4535
+ "rewards/accuracies": 0.875,
4536
+ "rewards/chosen": -9.394519805908203,
4537
+ "rewards/margins": 6.870561599731445,
4538
+ "rewards/rejected": -16.26508140563965,
4539
+ "step": 302
4540
+ },
4541
+ {
4542
+ "epoch": 0.012663121271327224,
4543
+ "grad_norm": 16.329265594482422,
4544
+ "learning_rate": 1.11652112689164e-05,
4545
+ "logits/chosen": -2.86873459815979,
4546
+ "logits/rejected": -2.7891271114349365,
4547
+ "logps/chosen": -356.82208251953125,
4548
+ "logps/rejected": -489.48175048828125,
4549
+ "loss": 0.1476,
4550
+ "rewards/accuracies": 1.0,
4551
+ "rewards/chosen": -17.0943660736084,
4552
+ "rewards/margins": 12.185678482055664,
4553
+ "rewards/rejected": -29.280044555664062,
4554
+ "step": 303
4555
+ },
4556
+ {
4557
+ "epoch": 0.012704913750770548,
4558
+ "grad_norm": 155.79867553710938,
4559
+ "learning_rate": 1.0732160807889211e-05,
4560
+ "logits/chosen": -3.2120351791381836,
4561
+ "logits/rejected": -3.2581393718719482,
4562
+ "logps/chosen": -293.7206115722656,
4563
+ "logps/rejected": -309.9013366699219,
4564
+ "loss": 0.5898,
4565
+ "rewards/accuracies": 0.875,
4566
+ "rewards/chosen": -9.185495376586914,
4567
+ "rewards/margins": 9.083627700805664,
4568
+ "rewards/rejected": -18.269123077392578,
4569
+ "step": 304
4570
+ },
4571
+ {
4572
+ "epoch": 0.012746706230213872,
4573
+ "grad_norm": 79.4925765991211,
4574
+ "learning_rate": 1.0305368692688174e-05,
4575
+ "logits/chosen": -3.025317668914795,
4576
+ "logits/rejected": -3.0122780799865723,
4577
+ "logps/chosen": -349.7897644042969,
4578
+ "logps/rejected": -384.69464111328125,
4579
+ "loss": 0.4964,
4580
+ "rewards/accuracies": 0.875,
4581
+ "rewards/chosen": -14.22126293182373,
4582
+ "rewards/margins": 6.923146724700928,
4583
+ "rewards/rejected": -21.1444091796875,
4584
+ "step": 305
4585
+ },
4586
+ {
4587
+ "epoch": 0.012788498709657196,
4588
+ "grad_norm": 273.6874694824219,
4589
+ "learning_rate": 9.88502212844063e-06,
4590
+ "logits/chosen": -2.805180311203003,
4591
+ "logits/rejected": -2.683415412902832,
4592
+ "logps/chosen": -467.9751892089844,
4593
+ "logps/rejected": -377.84820556640625,
4594
+ "loss": 1.7734,
4595
+ "rewards/accuracies": 0.875,
4596
+ "rewards/chosen": -19.00494956970215,
4597
+ "rewards/margins": 8.104949951171875,
4598
+ "rewards/rejected": -27.10989761352539,
4599
+ "step": 306
4600
+ },
4601
+ {
4602
+ "epoch": 0.01283029118910052,
4603
+ "grad_norm": 4.262240886688232,
4604
+ "learning_rate": 9.471305493042243e-06,
4605
+ "logits/chosen": -3.1370010375976562,
4606
+ "logits/rejected": -3.1797690391540527,
4607
+ "logps/chosen": -386.4381103515625,
4608
+ "logps/rejected": -456.6276550292969,
4609
+ "loss": 0.0166,
4610
+ "rewards/accuracies": 1.0,
4611
+ "rewards/chosen": -15.7275972366333,
4612
+ "rewards/margins": 8.137879371643066,
4613
+ "rewards/rejected": -23.865474700927734,
4614
+ "step": 307
4615
+ },
4616
+ {
4617
+ "epoch": 0.012872083668543845,
4618
+ "grad_norm": 198.5441436767578,
4619
+ "learning_rate": 9.064400256282757e-06,
4620
+ "logits/chosen": -3.035953998565674,
4621
+ "logits/rejected": -3.114983320236206,
4622
+ "logps/chosen": -304.5688171386719,
4623
+ "logps/rejected": -407.5628356933594,
4624
+ "loss": 0.2622,
4625
+ "rewards/accuracies": 1.0,
4626
+ "rewards/chosen": -10.267864227294922,
4627
+ "rewards/margins": 10.81235122680664,
4628
+ "rewards/rejected": -21.080215454101562,
4629
+ "step": 308
4630
+ },
4631
+ {
4632
+ "epoch": 0.012913876147987169,
4633
+ "grad_norm": 402.1909484863281,
4634
+ "learning_rate": 8.664484900247363e-06,
4635
+ "logits/chosen": -3.2249553203582764,
4636
+ "logits/rejected": -3.268974781036377,
4637
+ "logps/chosen": -228.5661163330078,
4638
+ "logps/rejected": -353.9108581542969,
4639
+ "loss": 7.305,
4640
+ "rewards/accuracies": 0.875,
4641
+ "rewards/chosen": -9.834470748901367,
4642
+ "rewards/margins": 7.000467777252197,
4643
+ "rewards/rejected": -16.834938049316406,
4644
+ "step": 309
4645
+ },
4646
+ {
4647
+ "epoch": 0.012955668627430493,
4648
+ "grad_norm": 18.060256958007812,
4649
+ "learning_rate": 8.271734841028553e-06,
4650
+ "logits/chosen": -2.8380823135375977,
4651
+ "logits/rejected": -2.7715110778808594,
4652
+ "logps/chosen": -249.33860778808594,
4653
+ "logps/rejected": -301.26629638671875,
4654
+ "loss": 0.1223,
4655
+ "rewards/accuracies": 1.0,
4656
+ "rewards/chosen": -11.448515892028809,
4657
+ "rewards/margins": 8.624532699584961,
4658
+ "rewards/rejected": -20.073047637939453,
4659
+ "step": 310
4660
+ },
4661
+ {
4662
+ "epoch": 0.012997461106873819,
4663
+ "grad_norm": 0.21917754411697388,
4664
+ "learning_rate": 7.886322351782783e-06,
4665
+ "logits/chosen": -3.0230164527893066,
4666
+ "logits/rejected": -2.8923580646514893,
4667
+ "logps/chosen": -209.99588012695312,
4668
+ "logps/rejected": -348.78564453125,
4669
+ "loss": 0.0003,
4670
+ "rewards/accuracies": 1.0,
4671
+ "rewards/chosen": -7.846118450164795,
4672
+ "rewards/margins": 12.90873908996582,
4673
+ "rewards/rejected": -20.75485610961914,
4674
+ "step": 311
4675
+ },
4676
+ {
4677
+ "epoch": 0.013039253586317143,
4678
+ "grad_norm": 80.00068664550781,
4679
+ "learning_rate": 7.508416487165862e-06,
4680
+ "logits/chosen": -2.6761441230773926,
4681
+ "logits/rejected": -2.6984710693359375,
4682
+ "logps/chosen": -256.08441162109375,
4683
+ "logps/rejected": -284.93353271484375,
4684
+ "loss": 1.1904,
4685
+ "rewards/accuracies": 0.875,
4686
+ "rewards/chosen": -8.947043418884277,
4687
+ "rewards/margins": 7.310464382171631,
4688
+ "rewards/rejected": -16.25750732421875,
4689
+ "step": 312
4690
+ },
4691
+ {
4692
+ "epoch": 0.013081046065760467,
4693
+ "grad_norm": 18.157445907592773,
4694
+ "learning_rate": 7.138183009179922e-06,
4695
+ "logits/chosen": -2.7042577266693115,
4696
+ "logits/rejected": -2.4380972385406494,
4697
+ "logps/chosen": -330.15972900390625,
4698
+ "logps/rejected": -362.9132385253906,
4699
+ "loss": 0.0393,
4700
+ "rewards/accuracies": 1.0,
4701
+ "rewards/chosen": -11.075960159301758,
4702
+ "rewards/margins": 10.102376937866211,
4703
+ "rewards/rejected": -21.17833709716797,
4704
+ "step": 313
4705
+ },
4706
+ {
4707
+ "epoch": 0.013122838545203791,
4708
+ "grad_norm": 136.31207275390625,
4709
+ "learning_rate": 6.775784314464717e-06,
4710
+ "logits/chosen": -3.101740598678589,
4711
+ "logits/rejected": -3.0880491733551025,
4712
+ "logps/chosen": -280.362548828125,
4713
+ "logps/rejected": -307.99456787109375,
4714
+ "loss": 1.9468,
4715
+ "rewards/accuracies": 0.875,
4716
+ "rewards/chosen": -9.248235702514648,
4717
+ "rewards/margins": 5.009317398071289,
4718
+ "rewards/rejected": -14.257552146911621,
4719
+ "step": 314
4720
+ },
4721
+ {
4722
+ "epoch": 0.013164631024647116,
4723
+ "grad_norm": 39.999839782714844,
4724
+ "learning_rate": 6.421379363065142e-06,
4725
+ "logits/chosen": -2.7587459087371826,
4726
+ "logits/rejected": -2.930101156234741,
4727
+ "logps/chosen": -398.45050048828125,
4728
+ "logps/rejected": -447.44342041015625,
4729
+ "loss": 0.2493,
4730
+ "rewards/accuracies": 1.0,
4731
+ "rewards/chosen": -10.370040893554688,
4732
+ "rewards/margins": 9.242597579956055,
4733
+ "rewards/rejected": -19.612638473510742,
4734
+ "step": 315
4735
+ },
4736
+ {
4737
+ "epoch": 0.01320642350409044,
4738
+ "grad_norm": 1.3460954427719116,
4739
+ "learning_rate": 6.075123608706093e-06,
4740
+ "logits/chosen": -3.2334988117218018,
4741
+ "logits/rejected": -3.234659194946289,
4742
+ "logps/chosen": -251.70684814453125,
4743
+ "logps/rejected": -263.55120849609375,
4744
+ "loss": 0.0042,
4745
+ "rewards/accuracies": 1.0,
4746
+ "rewards/chosen": -5.089323043823242,
4747
+ "rewards/margins": 12.2367582321167,
4748
+ "rewards/rejected": -17.326082229614258,
4749
+ "step": 316
4750
+ },
4751
+ {
4752
+ "epoch": 0.013248215983533764,
4753
+ "grad_norm": 218.19578552246094,
4754
+ "learning_rate": 5.737168930605272e-06,
4755
+ "logits/chosen": -3.100266933441162,
4756
+ "logits/rejected": -3.374037027359009,
4757
+ "logps/chosen": -235.7011260986328,
4758
+ "logps/rejected": -234.38894653320312,
4759
+ "loss": 1.9371,
4760
+ "rewards/accuracies": 0.75,
4761
+ "rewards/chosen": -4.014512538909912,
4762
+ "rewards/margins": 6.0716729164123535,
4763
+ "rewards/rejected": -10.086185455322266,
4764
+ "step": 317
4765
+ },
4766
+ {
4767
+ "epoch": 0.013290008462977088,
4768
+ "grad_norm": 220.26162719726562,
4769
+ "learning_rate": 5.4076635668540075e-06,
4770
+ "logits/chosen": -3.213263988494873,
4771
+ "logits/rejected": -3.115088701248169,
4772
+ "logps/chosen": -344.0352783203125,
4773
+ "logps/rejected": -303.6358337402344,
4774
+ "loss": 5.0835,
4775
+ "rewards/accuracies": 0.875,
4776
+ "rewards/chosen": -7.557135105133057,
4777
+ "rewards/margins": 5.570637226104736,
4778
+ "rewards/rejected": -13.127771377563477,
4779
+ "step": 318
4780
+ },
4781
+ {
4782
+ "epoch": 0.013331800942420412,
4783
+ "grad_norm": 25.128589630126953,
4784
+ "learning_rate": 5.086752049395094e-06,
4785
+ "logits/chosen": -3.3187851905822754,
4786
+ "logits/rejected": -3.2173070907592773,
4787
+ "logps/chosen": -352.69342041015625,
4788
+ "logps/rejected": -370.28448486328125,
4789
+ "loss": 0.1174,
4790
+ "rewards/accuracies": 1.0,
4791
+ "rewards/chosen": -10.892807006835938,
4792
+ "rewards/margins": 8.983996391296387,
4793
+ "rewards/rejected": -19.876802444458008,
4794
+ "step": 319
4795
+ },
4796
+ {
4797
+ "epoch": 0.013373593421863736,
4798
+ "grad_norm": 28.23332977294922,
4799
+ "learning_rate": 4.7745751406263165e-06,
4800
+ "logits/chosen": -3.117103338241577,
4801
+ "logits/rejected": -3.0183892250061035,
4802
+ "logps/chosen": -183.39730834960938,
4803
+ "logps/rejected": -257.51708984375,
4804
+ "loss": 0.5492,
4805
+ "rewards/accuracies": 0.875,
4806
+ "rewards/chosen": -6.208366870880127,
4807
+ "rewards/margins": 6.4307146072387695,
4808
+ "rewards/rejected": -12.639081001281738,
4809
+ "step": 320
4810
+ },
4811
+ {
4812
+ "epoch": 0.01341538590130706,
4813
+ "grad_norm": 0.29758647084236145,
4814
+ "learning_rate": 4.4712697716574e-06,
4815
+ "logits/chosen": -3.1782500743865967,
4816
+ "logits/rejected": -3.3849024772644043,
4817
+ "logps/chosen": -313.7176208496094,
4818
+ "logps/rejected": -310.43206787109375,
4819
+ "loss": 0.3477,
4820
+ "rewards/accuracies": 1.0,
4821
+ "rewards/chosen": -5.478094100952148,
4822
+ "rewards/margins": 10.825754165649414,
4823
+ "rewards/rejected": -16.303848266601562,
4824
+ "step": 321
4825
+ },
4826
+ {
4827
+ "epoch": 0.013457178380750385,
4828
+ "grad_norm": 42.00798797607422,
4829
+ "learning_rate": 4.176968982247514e-06,
4830
+ "logits/chosen": -3.189098596572876,
4831
+ "logits/rejected": -3.191659450531006,
4832
+ "logps/chosen": -185.61148071289062,
4833
+ "logps/rejected": -265.11590576171875,
4834
+ "loss": 0.5371,
4835
+ "rewards/accuracies": 1.0,
4836
+ "rewards/chosen": -4.42623233795166,
4837
+ "rewards/margins": 8.69643783569336,
4838
+ "rewards/rejected": -13.12267017364502,
4839
+ "step": 322
4840
+ },
4841
+ {
4842
+ "epoch": 0.013498970860193709,
4843
+ "grad_norm": 25.100051879882812,
4844
+ "learning_rate": 3.891801862449629e-06,
4845
+ "logits/chosen": -2.929891586303711,
4846
+ "logits/rejected": -2.9269561767578125,
4847
+ "logps/chosen": -248.1248779296875,
4848
+ "logps/rejected": -295.5849914550781,
4849
+ "loss": 0.1219,
4850
+ "rewards/accuracies": 1.0,
4851
+ "rewards/chosen": -7.96785306930542,
4852
+ "rewards/margins": 8.257719039916992,
4853
+ "rewards/rejected": -16.22557258605957,
4854
+ "step": 323
4855
+ },
4856
+ {
4857
+ "epoch": 0.013540763339637033,
4858
+ "grad_norm": 340.0687255859375,
4859
+ "learning_rate": 3.6158934959873353e-06,
4860
+ "logits/chosen": -2.5700321197509766,
4861
+ "logits/rejected": -2.4596493244171143,
4862
+ "logps/chosen": -381.09490966796875,
4863
+ "logps/rejected": -405.3451843261719,
4864
+ "loss": 4.0703,
4865
+ "rewards/accuracies": 0.75,
4866
+ "rewards/chosen": -10.214908599853516,
4867
+ "rewards/margins": 6.979231834411621,
4868
+ "rewards/rejected": -17.19413948059082,
4869
+ "step": 324
4870
+ },
4871
+ {
4872
+ "epoch": 0.013582555819080357,
4873
+ "grad_norm": 200.0123291015625,
4874
+ "learning_rate": 3.3493649053890326e-06,
4875
+ "logits/chosen": -3.020430326461792,
4876
+ "logits/rejected": -3.0444626808166504,
4877
+ "logps/chosen": -332.80682373046875,
4878
+ "logps/rejected": -316.76641845703125,
4879
+ "loss": 0.9297,
4880
+ "rewards/accuracies": 0.875,
4881
+ "rewards/chosen": -13.117790222167969,
4882
+ "rewards/margins": 4.298157691955566,
4883
+ "rewards/rejected": -17.41594886779785,
4884
+ "step": 325
4885
+ },
4886
+ {
4887
+ "epoch": 0.013624348298523681,
4888
+ "grad_norm": 25.32855224609375,
4889
+ "learning_rate": 3.092332998903416e-06,
4890
+ "logits/chosen": -2.877979278564453,
4891
+ "logits/rejected": -2.9653923511505127,
4892
+ "logps/chosen": -297.63922119140625,
4893
+ "logps/rejected": -353.91668701171875,
4894
+ "loss": 0.2341,
4895
+ "rewards/accuracies": 1.0,
4896
+ "rewards/chosen": -9.10722541809082,
4897
+ "rewards/margins": 7.169858932495117,
4898
+ "rewards/rejected": -16.277084350585938,
4899
+ "step": 326
4900
+ },
4901
+ {
4902
+ "epoch": 0.013666140777967005,
4903
+ "grad_norm": 126.90220642089844,
4904
+ "learning_rate": 2.8449105192196316e-06,
4905
+ "logits/chosen": -3.0049197673797607,
4906
+ "logits/rejected": -3.0737524032592773,
4907
+ "logps/chosen": -148.86209106445312,
4908
+ "logps/rejected": -199.28346252441406,
4909
+ "loss": 1.2454,
4910
+ "rewards/accuracies": 0.75,
4911
+ "rewards/chosen": -2.6697428226470947,
4912
+ "rewards/margins": 6.129971981048584,
4913
+ "rewards/rejected": -8.799715042114258,
4914
+ "step": 327
4915
+ },
4916
+ {
4917
+ "epoch": 0.01370793325741033,
4918
+ "grad_norm": 118.86524963378906,
4919
+ "learning_rate": 2.6072059940146775e-06,
4920
+ "logits/chosen": -2.9463143348693848,
4921
+ "logits/rejected": -2.917930841445923,
4922
+ "logps/chosen": -245.6232452392578,
4923
+ "logps/rejected": -331.4186096191406,
4924
+ "loss": 0.3829,
4925
+ "rewards/accuracies": 1.0,
4926
+ "rewards/chosen": -6.183173179626465,
4927
+ "rewards/margins": 9.998785972595215,
4928
+ "rewards/rejected": -16.181961059570312,
4929
+ "step": 328
4930
+ },
4931
+ {
4932
+ "epoch": 0.013749725736853654,
4933
+ "grad_norm": 16.672832489013672,
4934
+ "learning_rate": 2.379323688349516e-06,
4935
+ "logits/chosen": -2.9064996242523193,
4936
+ "logits/rejected": -2.991321563720703,
4937
+ "logps/chosen": -241.98927307128906,
4938
+ "logps/rejected": -274.4370422363281,
4939
+ "loss": 0.1581,
4940
+ "rewards/accuracies": 1.0,
4941
+ "rewards/chosen": -5.3424072265625,
4942
+ "rewards/margins": 11.008346557617188,
4943
+ "rewards/rejected": -16.350753784179688,
4944
+ "step": 329
4945
+ },
4946
+ {
4947
+ "epoch": 0.013791518216296978,
4948
+ "grad_norm": 46.51081848144531,
4949
+ "learning_rate": 2.1613635589349756e-06,
4950
+ "logits/chosen": -3.2066824436187744,
4951
+ "logits/rejected": -3.1287407875061035,
4952
+ "logps/chosen": -351.7823791503906,
4953
+ "logps/rejected": -451.6476135253906,
4954
+ "loss": 0.3389,
4955
+ "rewards/accuracies": 1.0,
4956
+ "rewards/chosen": -8.472803115844727,
4957
+ "rewards/margins": 10.613195419311523,
4958
+ "rewards/rejected": -19.086000442504883,
4959
+ "step": 330
4960
+ },
4961
+ {
4962
+ "epoch": 0.013833310695740302,
4963
+ "grad_norm": 332.8823547363281,
4964
+ "learning_rate": 1.95342121028749e-06,
4965
+ "logits/chosen": -2.901820182800293,
4966
+ "logits/rejected": -2.879037618637085,
4967
+ "logps/chosen": -323.36968994140625,
4968
+ "logps/rejected": -369.71441650390625,
4969
+ "loss": 2.4841,
4970
+ "rewards/accuracies": 0.875,
4971
+ "rewards/chosen": -9.933320045471191,
4972
+ "rewards/margins": 5.173391342163086,
4973
+ "rewards/rejected": -15.106710433959961,
4974
+ "step": 331
4975
+ },
4976
+ {
4977
+ "epoch": 0.013875103175183626,
4978
+ "grad_norm": 42.102596282958984,
4979
+ "learning_rate": 1.7555878527937164e-06,
4980
+ "logits/chosen": -3.0418646335601807,
4981
+ "logits/rejected": -2.8981995582580566,
4982
+ "logps/chosen": -288.25274658203125,
4983
+ "logps/rejected": -369.7027587890625,
4984
+ "loss": 0.2115,
4985
+ "rewards/accuracies": 1.0,
4986
+ "rewards/chosen": -7.725009441375732,
4987
+ "rewards/margins": 9.557852745056152,
4988
+ "rewards/rejected": -17.282861709594727,
4989
+ "step": 332
4990
+ },
4991
+ {
4992
+ "epoch": 0.01391689565462695,
4993
+ "grad_norm": 20.132675170898438,
4994
+ "learning_rate": 1.5679502627027136e-06,
4995
+ "logits/chosen": -2.849980354309082,
4996
+ "logits/rejected": -2.8271608352661133,
4997
+ "logps/chosen": -297.3507080078125,
4998
+ "logps/rejected": -400.2951965332031,
4999
+ "loss": 0.0436,
5000
+ "rewards/accuracies": 1.0,
5001
+ "rewards/chosen": -7.2351155281066895,
5002
+ "rewards/margins": 10.109538078308105,
5003
+ "rewards/rejected": -17.344654083251953,
5004
+ "step": 333
5005
+ },
5006
+ {
5007
+ "epoch": 0.013958688134070274,
5008
+ "grad_norm": 4.88221549987793,
5009
+ "learning_rate": 1.3905907440629752e-06,
5010
+ "logits/chosen": -2.9725608825683594,
5011
+ "logits/rejected": -3.0983333587646484,
5012
+ "logps/chosen": -230.85008239746094,
5013
+ "logps/rejected": -330.5326843261719,
5014
+ "loss": 0.0201,
5015
+ "rewards/accuracies": 1.0,
5016
+ "rewards/chosen": -5.605626106262207,
5017
+ "rewards/margins": 11.81916618347168,
5018
+ "rewards/rejected": -17.424793243408203,
5019
+ "step": 334
5020
+ },
5021
+ {
5022
+ "epoch": 0.014000480613513598,
5023
+ "grad_norm": 156.0076904296875,
5024
+ "learning_rate": 1.2235870926211619e-06,
5025
+ "logits/chosen": -2.927966833114624,
5026
+ "logits/rejected": -2.847036123275757,
5027
+ "logps/chosen": -438.1847839355469,
5028
+ "logps/rejected": -413.7171936035156,
5029
+ "loss": 1.4457,
5030
+ "rewards/accuracies": 0.75,
5031
+ "rewards/chosen": -14.443679809570312,
5032
+ "rewards/margins": 6.269698619842529,
5033
+ "rewards/rejected": -20.71337890625,
5034
+ "step": 335
5035
+ },
5036
+ {
5037
+ "epoch": 0.014042273092956923,
5038
+ "grad_norm": 217.4608612060547,
5039
+ "learning_rate": 1.067012561698319e-06,
5040
+ "logits/chosen": -2.8868706226348877,
5041
+ "logits/rejected": -2.8142154216766357,
5042
+ "logps/chosen": -215.70008850097656,
5043
+ "logps/rejected": -326.85589599609375,
5044
+ "loss": 1.4518,
5045
+ "rewards/accuracies": 0.875,
5046
+ "rewards/chosen": -5.566734313964844,
5047
+ "rewards/margins": 8.456573486328125,
5048
+ "rewards/rejected": -14.023307800292969,
5049
+ "step": 336
5050
+ },
5051
+ {
5052
+ "epoch": 0.014084065572400247,
5053
+ "grad_norm": 76.68461608886719,
5054
+ "learning_rate": 9.209358300585474e-07,
5055
+ "logits/chosen": -2.725767135620117,
5056
+ "logits/rejected": -2.780688762664795,
5057
+ "logps/chosen": -288.87896728515625,
5058
+ "logps/rejected": -286.787109375,
5059
+ "loss": 0.7406,
5060
+ "rewards/accuracies": 0.75,
5061
+ "rewards/chosen": -5.583068370819092,
5062
+ "rewards/margins": 9.488775253295898,
5063
+ "rewards/rejected": -15.071844100952148,
5064
+ "step": 337
5065
+ },
5066
+ {
5067
+ "epoch": 0.014125858051843571,
5068
+ "grad_norm": 75.13135528564453,
5069
+ "learning_rate": 7.854209717842231e-07,
5070
+ "logits/chosen": -2.362233877182007,
5071
+ "logits/rejected": -2.460773229598999,
5072
+ "logps/chosen": -325.7066650390625,
5073
+ "logps/rejected": -384.96832275390625,
5074
+ "loss": 0.8344,
5075
+ "rewards/accuracies": 1.0,
5076
+ "rewards/chosen": -10.121431350708008,
5077
+ "rewards/margins": 5.352227210998535,
5078
+ "rewards/rejected": -15.473657608032227,
5079
+ "step": 338
5080
+ },
5081
+ {
5082
+ "epoch": 0.014167650531286895,
5083
+ "grad_norm": 28.0489444732666,
5084
+ "learning_rate": 6.605274281709928e-07,
5085
+ "logits/chosen": -2.648594856262207,
5086
+ "logits/rejected": -2.7179548740386963,
5087
+ "logps/chosen": -253.41746520996094,
5088
+ "logps/rejected": -348.7283630371094,
5089
+ "loss": 0.3676,
5090
+ "rewards/accuracies": 1.0,
5091
+ "rewards/chosen": -9.479151725769043,
5092
+ "rewards/margins": 9.781237602233887,
5093
+ "rewards/rejected": -19.260387420654297,
5094
+ "step": 339
5095
+ },
5096
+ {
5097
+ "epoch": 0.01420944301073022,
5098
+ "grad_norm": 98.39196014404297,
5099
+ "learning_rate": 5.463099816548579e-07,
5100
+ "logits/chosen": -2.792505979537964,
5101
+ "logits/rejected": -2.649038076400757,
5102
+ "logps/chosen": -324.14288330078125,
5103
+ "logps/rejected": -330.5845642089844,
5104
+ "loss": 1.5468,
5105
+ "rewards/accuracies": 0.75,
5106
+ "rewards/chosen": -12.339548110961914,
5107
+ "rewards/margins": 5.8348469734191895,
5108
+ "rewards/rejected": -18.174394607543945,
5109
+ "step": 340
5110
+ },
5111
+ {
5112
+ "epoch": 0.014251235490173543,
5113
+ "grad_norm": 108.51323699951172,
5114
+ "learning_rate": 4.4281873178278475e-07,
5115
+ "logits/chosen": -2.9908299446105957,
5116
+ "logits/rejected": -2.8990650177001953,
5117
+ "logps/chosen": -239.39895629882812,
5118
+ "logps/rejected": -306.28094482421875,
5119
+ "loss": 0.1488,
5120
+ "rewards/accuracies": 1.0,
5121
+ "rewards/chosen": -9.657529830932617,
5122
+ "rewards/margins": 9.04963493347168,
5123
+ "rewards/rejected": -18.707164764404297,
5124
+ "step": 341
5125
+ },
5126
+ {
5127
+ "epoch": 0.014293027969616868,
5128
+ "grad_norm": 125.85196685791016,
5129
+ "learning_rate": 3.5009907323737825e-07,
5130
+ "logits/chosen": -3.3109447956085205,
5131
+ "logits/rejected": -3.237929344177246,
5132
+ "logps/chosen": -313.3047790527344,
5133
+ "logps/rejected": -364.43463134765625,
5134
+ "loss": 0.809,
5135
+ "rewards/accuracies": 0.875,
5136
+ "rewards/chosen": -9.16235065460205,
5137
+ "rewards/margins": 7.958996772766113,
5138
+ "rewards/rejected": -17.121347427368164,
5139
+ "step": 342
5140
+ },
5141
+ {
5142
+ "epoch": 0.014334820449060192,
5143
+ "grad_norm": 170.32565307617188,
5144
+ "learning_rate": 2.681916759252917e-07,
5145
+ "logits/chosen": -2.6865477561950684,
5146
+ "logits/rejected": -2.6000945568084717,
5147
+ "logps/chosen": -390.5458068847656,
5148
+ "logps/rejected": -411.51409912109375,
5149
+ "loss": 1.2146,
5150
+ "rewards/accuracies": 0.75,
5151
+ "rewards/chosen": -15.36628246307373,
5152
+ "rewards/margins": 4.630067825317383,
5153
+ "rewards/rejected": -19.99635124206543,
5154
+ "step": 343
5155
+ },
5156
+ {
5157
+ "epoch": 0.014376612928503516,
5158
+ "grad_norm": 85.18592071533203,
5159
+ "learning_rate": 1.9713246713805588e-07,
5160
+ "logits/chosen": -3.113831043243408,
5161
+ "logits/rejected": -3.068502426147461,
5162
+ "logps/chosen": -176.83450317382812,
5163
+ "logps/rejected": -275.0065612792969,
5164
+ "loss": 0.642,
5165
+ "rewards/accuracies": 0.875,
5166
+ "rewards/chosen": -6.174612522125244,
5167
+ "rewards/margins": 6.579104423522949,
5168
+ "rewards/rejected": -12.753717422485352,
5169
+ "step": 344
5170
+ },
5171
+ {
5172
+ "epoch": 0.01441840540794684,
5173
+ "grad_norm": 340.12322998046875,
5174
+ "learning_rate": 1.3695261579316777e-07,
5175
+ "logits/chosen": -3.1806676387786865,
5176
+ "logits/rejected": -3.2769603729248047,
5177
+ "logps/chosen": -351.36773681640625,
5178
+ "logps/rejected": -432.24249267578125,
5179
+ "loss": 3.8974,
5180
+ "rewards/accuracies": 0.75,
5181
+ "rewards/chosen": -11.651407241821289,
5182
+ "rewards/margins": 8.784332275390625,
5183
+ "rewards/rejected": -20.435739517211914,
5184
+ "step": 345
5185
+ },
5186
+ {
5187
+ "epoch": 0.014460197887390164,
5188
+ "grad_norm": 32.293914794921875,
5189
+ "learning_rate": 8.767851876239074e-08,
5190
+ "logits/chosen": -2.917407274246216,
5191
+ "logits/rejected": -2.8539297580718994,
5192
+ "logps/chosen": -185.53709411621094,
5193
+ "logps/rejected": -255.1479949951172,
5194
+ "loss": 0.4062,
5195
+ "rewards/accuracies": 0.875,
5196
+ "rewards/chosen": -7.592297077178955,
5197
+ "rewards/margins": 7.888314247131348,
5198
+ "rewards/rejected": -15.480610847473145,
5199
+ "step": 346
5200
+ },
5201
+ {
5202
+ "epoch": 0.014501990366833488,
5203
+ "grad_norm": 45.319358825683594,
5204
+ "learning_rate": 4.9331789293211026e-08,
5205
+ "logits/chosen": -2.7475247383117676,
5206
+ "logits/rejected": -2.596950054168701,
5207
+ "logps/chosen": -252.41659545898438,
5208
+ "logps/rejected": -370.46380615234375,
5209
+ "loss": 0.5195,
5210
+ "rewards/accuracies": 0.875,
5211
+ "rewards/chosen": -5.741701602935791,
5212
+ "rewards/margins": 10.352378845214844,
5213
+ "rewards/rejected": -16.094079971313477,
5214
+ "step": 347
5215
+ },
5216
+ {
5217
+ "epoch": 0.014543782846276812,
5218
+ "grad_norm": 0.7127296924591064,
5219
+ "learning_rate": 2.192924752854042e-08,
5220
+ "logits/chosen": -3.2812771797180176,
5221
+ "logits/rejected": -3.0997869968414307,
5222
+ "logps/chosen": -310.9162902832031,
5223
+ "logps/rejected": -349.9539794921875,
5224
+ "loss": 0.0015,
5225
+ "rewards/accuracies": 1.0,
5226
+ "rewards/chosen": -8.671211242675781,
5227
+ "rewards/margins": 12.159473419189453,
5228
+ "rewards/rejected": -20.8306827545166,
5229
+ "step": 348
5230
+ },
5231
+ {
5232
+ "epoch": 0.014585575325720137,
5233
+ "grad_norm": 56.26605224609375,
5234
+ "learning_rate": 5.48291312886251e-09,
5235
+ "logits/chosen": -2.7509982585906982,
5236
+ "logits/rejected": -2.6064796447753906,
5237
+ "logps/chosen": -298.1324462890625,
5238
+ "logps/rejected": -331.2496643066406,
5239
+ "loss": 1.4717,
5240
+ "rewards/accuracies": 0.625,
5241
+ "rewards/chosen": -9.22291374206543,
5242
+ "rewards/margins": 4.03200626373291,
5243
+ "rewards/rejected": -13.254920959472656,
5244
+ "step": 349
5245
+ },
5246
+ {
5247
+ "epoch": 0.01462736780516346,
5248
+ "grad_norm": 9.557753562927246,
5249
+ "learning_rate": 0.0,
5250
+ "logits/chosen": -3.0502820014953613,
5251
+ "logits/rejected": -2.965240240097046,
5252
+ "logps/chosen": -393.92120361328125,
5253
+ "logps/rejected": -409.619384765625,
5254
+ "loss": 0.0324,
5255
+ "rewards/accuracies": 1.0,
5256
+ "rewards/chosen": -9.593274116516113,
5257
+ "rewards/margins": 12.19149112701416,
5258
+ "rewards/rejected": -21.784767150878906,
5259
+ "step": 350
5260
  }
5261
  ],
5262
  "logging_steps": 1,
 
5271
  "should_evaluate": false,
5272
  "should_log": false,
5273
  "should_save": true,
5274
+ "should_training_stop": true
5275
  },
5276
  "attributes": {}
5277
  }