Training in progress, step 350, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671149168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1584ae76418d0ad06d3a0c687bd6381d31c844d9d4c2bdcc22249561ad11caf
|
3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 341314644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60ead3318824789c653c56b77c6d4d0aeb208de2dae7abc1143a2c723725a45f
|
3 |
size 341314644
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe5027b0a60817e9531c9bc52773bc5fa697b42d7d4016dd1b080e3c99c4d80a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4507,6 +4507,756 @@
|
|
4507 |
"rewards/margins": 5.479434967041016,
|
4508 |
"rewards/rejected": -19.215713500976562,
|
4509 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4510 |
}
|
4511 |
],
|
4512 |
"logging_steps": 1,
|
@@ -4521,7 +5271,7 @@
|
|
4521 |
"should_evaluate": false,
|
4522 |
"should_log": false,
|
4523 |
"should_save": true,
|
4524 |
-
"should_training_stop":
|
4525 |
},
|
4526 |
"attributes": {}
|
4527 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.01462736780516346,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 350,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4507 |
"rewards/margins": 5.479434967041016,
|
4508 |
"rewards/rejected": -19.215713500976562,
|
4509 |
"step": 300
|
4510 |
+
},
|
4511 |
+
{
|
4512 |
+
"epoch": 0.012579536312440576,
|
4513 |
+
"grad_norm": 491.7016296386719,
|
4514 |
+
"learning_rate": 1.2049324765671749e-05,
|
4515 |
+
"logits/chosen": -3.287421703338623,
|
4516 |
+
"logits/rejected": -3.370901584625244,
|
4517 |
+
"logps/chosen": -385.67474365234375,
|
4518 |
+
"logps/rejected": -419.36627197265625,
|
4519 |
+
"loss": 3.6965,
|
4520 |
+
"rewards/accuracies": 0.625,
|
4521 |
+
"rewards/chosen": -13.180034637451172,
|
4522 |
+
"rewards/margins": 4.595239639282227,
|
4523 |
+
"rewards/rejected": -17.7752742767334,
|
4524 |
+
"step": 301
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 0.0126213287918839,
|
4528 |
+
"grad_norm": 263.8130187988281,
|
4529 |
+
"learning_rate": 1.1604330125525079e-05,
|
4530 |
+
"logits/chosen": -3.4727532863616943,
|
4531 |
+
"logits/rejected": -3.349696159362793,
|
4532 |
+
"logps/chosen": -267.19024658203125,
|
4533 |
+
"logps/rejected": -271.441650390625,
|
4534 |
+
"loss": 1.8457,
|
4535 |
+
"rewards/accuracies": 0.875,
|
4536 |
+
"rewards/chosen": -9.394519805908203,
|
4537 |
+
"rewards/margins": 6.870561599731445,
|
4538 |
+
"rewards/rejected": -16.26508140563965,
|
4539 |
+
"step": 302
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 0.012663121271327224,
|
4543 |
+
"grad_norm": 16.329265594482422,
|
4544 |
+
"learning_rate": 1.11652112689164e-05,
|
4545 |
+
"logits/chosen": -2.86873459815979,
|
4546 |
+
"logits/rejected": -2.7891271114349365,
|
4547 |
+
"logps/chosen": -356.82208251953125,
|
4548 |
+
"logps/rejected": -489.48175048828125,
|
4549 |
+
"loss": 0.1476,
|
4550 |
+
"rewards/accuracies": 1.0,
|
4551 |
+
"rewards/chosen": -17.0943660736084,
|
4552 |
+
"rewards/margins": 12.185678482055664,
|
4553 |
+
"rewards/rejected": -29.280044555664062,
|
4554 |
+
"step": 303
|
4555 |
+
},
|
4556 |
+
{
|
4557 |
+
"epoch": 0.012704913750770548,
|
4558 |
+
"grad_norm": 155.79867553710938,
|
4559 |
+
"learning_rate": 1.0732160807889211e-05,
|
4560 |
+
"logits/chosen": -3.2120351791381836,
|
4561 |
+
"logits/rejected": -3.2581393718719482,
|
4562 |
+
"logps/chosen": -293.7206115722656,
|
4563 |
+
"logps/rejected": -309.9013366699219,
|
4564 |
+
"loss": 0.5898,
|
4565 |
+
"rewards/accuracies": 0.875,
|
4566 |
+
"rewards/chosen": -9.185495376586914,
|
4567 |
+
"rewards/margins": 9.083627700805664,
|
4568 |
+
"rewards/rejected": -18.269123077392578,
|
4569 |
+
"step": 304
|
4570 |
+
},
|
4571 |
+
{
|
4572 |
+
"epoch": 0.012746706230213872,
|
4573 |
+
"grad_norm": 79.4925765991211,
|
4574 |
+
"learning_rate": 1.0305368692688174e-05,
|
4575 |
+
"logits/chosen": -3.025317668914795,
|
4576 |
+
"logits/rejected": -3.0122780799865723,
|
4577 |
+
"logps/chosen": -349.7897644042969,
|
4578 |
+
"logps/rejected": -384.69464111328125,
|
4579 |
+
"loss": 0.4964,
|
4580 |
+
"rewards/accuracies": 0.875,
|
4581 |
+
"rewards/chosen": -14.22126293182373,
|
4582 |
+
"rewards/margins": 6.923146724700928,
|
4583 |
+
"rewards/rejected": -21.1444091796875,
|
4584 |
+
"step": 305
|
4585 |
+
},
|
4586 |
+
{
|
4587 |
+
"epoch": 0.012788498709657196,
|
4588 |
+
"grad_norm": 273.6874694824219,
|
4589 |
+
"learning_rate": 9.88502212844063e-06,
|
4590 |
+
"logits/chosen": -2.805180311203003,
|
4591 |
+
"logits/rejected": -2.683415412902832,
|
4592 |
+
"logps/chosen": -467.9751892089844,
|
4593 |
+
"logps/rejected": -377.84820556640625,
|
4594 |
+
"loss": 1.7734,
|
4595 |
+
"rewards/accuracies": 0.875,
|
4596 |
+
"rewards/chosen": -19.00494956970215,
|
4597 |
+
"rewards/margins": 8.104949951171875,
|
4598 |
+
"rewards/rejected": -27.10989761352539,
|
4599 |
+
"step": 306
|
4600 |
+
},
|
4601 |
+
{
|
4602 |
+
"epoch": 0.01283029118910052,
|
4603 |
+
"grad_norm": 4.262240886688232,
|
4604 |
+
"learning_rate": 9.471305493042243e-06,
|
4605 |
+
"logits/chosen": -3.1370010375976562,
|
4606 |
+
"logits/rejected": -3.1797690391540527,
|
4607 |
+
"logps/chosen": -386.4381103515625,
|
4608 |
+
"logps/rejected": -456.6276550292969,
|
4609 |
+
"loss": 0.0166,
|
4610 |
+
"rewards/accuracies": 1.0,
|
4611 |
+
"rewards/chosen": -15.7275972366333,
|
4612 |
+
"rewards/margins": 8.137879371643066,
|
4613 |
+
"rewards/rejected": -23.865474700927734,
|
4614 |
+
"step": 307
|
4615 |
+
},
|
4616 |
+
{
|
4617 |
+
"epoch": 0.012872083668543845,
|
4618 |
+
"grad_norm": 198.5441436767578,
|
4619 |
+
"learning_rate": 9.064400256282757e-06,
|
4620 |
+
"logits/chosen": -3.035953998565674,
|
4621 |
+
"logits/rejected": -3.114983320236206,
|
4622 |
+
"logps/chosen": -304.5688171386719,
|
4623 |
+
"logps/rejected": -407.5628356933594,
|
4624 |
+
"loss": 0.2622,
|
4625 |
+
"rewards/accuracies": 1.0,
|
4626 |
+
"rewards/chosen": -10.267864227294922,
|
4627 |
+
"rewards/margins": 10.81235122680664,
|
4628 |
+
"rewards/rejected": -21.080215454101562,
|
4629 |
+
"step": 308
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 0.012913876147987169,
|
4633 |
+
"grad_norm": 402.1909484863281,
|
4634 |
+
"learning_rate": 8.664484900247363e-06,
|
4635 |
+
"logits/chosen": -3.2249553203582764,
|
4636 |
+
"logits/rejected": -3.268974781036377,
|
4637 |
+
"logps/chosen": -228.5661163330078,
|
4638 |
+
"logps/rejected": -353.9108581542969,
|
4639 |
+
"loss": 7.305,
|
4640 |
+
"rewards/accuracies": 0.875,
|
4641 |
+
"rewards/chosen": -9.834470748901367,
|
4642 |
+
"rewards/margins": 7.000467777252197,
|
4643 |
+
"rewards/rejected": -16.834938049316406,
|
4644 |
+
"step": 309
|
4645 |
+
},
|
4646 |
+
{
|
4647 |
+
"epoch": 0.012955668627430493,
|
4648 |
+
"grad_norm": 18.060256958007812,
|
4649 |
+
"learning_rate": 8.271734841028553e-06,
|
4650 |
+
"logits/chosen": -2.8380823135375977,
|
4651 |
+
"logits/rejected": -2.7715110778808594,
|
4652 |
+
"logps/chosen": -249.33860778808594,
|
4653 |
+
"logps/rejected": -301.26629638671875,
|
4654 |
+
"loss": 0.1223,
|
4655 |
+
"rewards/accuracies": 1.0,
|
4656 |
+
"rewards/chosen": -11.448515892028809,
|
4657 |
+
"rewards/margins": 8.624532699584961,
|
4658 |
+
"rewards/rejected": -20.073047637939453,
|
4659 |
+
"step": 310
|
4660 |
+
},
|
4661 |
+
{
|
4662 |
+
"epoch": 0.012997461106873819,
|
4663 |
+
"grad_norm": 0.21917754411697388,
|
4664 |
+
"learning_rate": 7.886322351782783e-06,
|
4665 |
+
"logits/chosen": -3.0230164527893066,
|
4666 |
+
"logits/rejected": -2.8923580646514893,
|
4667 |
+
"logps/chosen": -209.99588012695312,
|
4668 |
+
"logps/rejected": -348.78564453125,
|
4669 |
+
"loss": 0.0003,
|
4670 |
+
"rewards/accuracies": 1.0,
|
4671 |
+
"rewards/chosen": -7.846118450164795,
|
4672 |
+
"rewards/margins": 12.90873908996582,
|
4673 |
+
"rewards/rejected": -20.75485610961914,
|
4674 |
+
"step": 311
|
4675 |
+
},
|
4676 |
+
{
|
4677 |
+
"epoch": 0.013039253586317143,
|
4678 |
+
"grad_norm": 80.00068664550781,
|
4679 |
+
"learning_rate": 7.508416487165862e-06,
|
4680 |
+
"logits/chosen": -2.6761441230773926,
|
4681 |
+
"logits/rejected": -2.6984710693359375,
|
4682 |
+
"logps/chosen": -256.08441162109375,
|
4683 |
+
"logps/rejected": -284.93353271484375,
|
4684 |
+
"loss": 1.1904,
|
4685 |
+
"rewards/accuracies": 0.875,
|
4686 |
+
"rewards/chosen": -8.947043418884277,
|
4687 |
+
"rewards/margins": 7.310464382171631,
|
4688 |
+
"rewards/rejected": -16.25750732421875,
|
4689 |
+
"step": 312
|
4690 |
+
},
|
4691 |
+
{
|
4692 |
+
"epoch": 0.013081046065760467,
|
4693 |
+
"grad_norm": 18.157445907592773,
|
4694 |
+
"learning_rate": 7.138183009179922e-06,
|
4695 |
+
"logits/chosen": -2.7042577266693115,
|
4696 |
+
"logits/rejected": -2.4380972385406494,
|
4697 |
+
"logps/chosen": -330.15972900390625,
|
4698 |
+
"logps/rejected": -362.9132385253906,
|
4699 |
+
"loss": 0.0393,
|
4700 |
+
"rewards/accuracies": 1.0,
|
4701 |
+
"rewards/chosen": -11.075960159301758,
|
4702 |
+
"rewards/margins": 10.102376937866211,
|
4703 |
+
"rewards/rejected": -21.17833709716797,
|
4704 |
+
"step": 313
|
4705 |
+
},
|
4706 |
+
{
|
4707 |
+
"epoch": 0.013122838545203791,
|
4708 |
+
"grad_norm": 136.31207275390625,
|
4709 |
+
"learning_rate": 6.775784314464717e-06,
|
4710 |
+
"logits/chosen": -3.101740598678589,
|
4711 |
+
"logits/rejected": -3.0880491733551025,
|
4712 |
+
"logps/chosen": -280.362548828125,
|
4713 |
+
"logps/rejected": -307.99456787109375,
|
4714 |
+
"loss": 1.9468,
|
4715 |
+
"rewards/accuracies": 0.875,
|
4716 |
+
"rewards/chosen": -9.248235702514648,
|
4717 |
+
"rewards/margins": 5.009317398071289,
|
4718 |
+
"rewards/rejected": -14.257552146911621,
|
4719 |
+
"step": 314
|
4720 |
+
},
|
4721 |
+
{
|
4722 |
+
"epoch": 0.013164631024647116,
|
4723 |
+
"grad_norm": 39.999839782714844,
|
4724 |
+
"learning_rate": 6.421379363065142e-06,
|
4725 |
+
"logits/chosen": -2.7587459087371826,
|
4726 |
+
"logits/rejected": -2.930101156234741,
|
4727 |
+
"logps/chosen": -398.45050048828125,
|
4728 |
+
"logps/rejected": -447.44342041015625,
|
4729 |
+
"loss": 0.2493,
|
4730 |
+
"rewards/accuracies": 1.0,
|
4731 |
+
"rewards/chosen": -10.370040893554688,
|
4732 |
+
"rewards/margins": 9.242597579956055,
|
4733 |
+
"rewards/rejected": -19.612638473510742,
|
4734 |
+
"step": 315
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 0.01320642350409044,
|
4738 |
+
"grad_norm": 1.3460954427719116,
|
4739 |
+
"learning_rate": 6.075123608706093e-06,
|
4740 |
+
"logits/chosen": -3.2334988117218018,
|
4741 |
+
"logits/rejected": -3.234659194946289,
|
4742 |
+
"logps/chosen": -251.70684814453125,
|
4743 |
+
"logps/rejected": -263.55120849609375,
|
4744 |
+
"loss": 0.0042,
|
4745 |
+
"rewards/accuracies": 1.0,
|
4746 |
+
"rewards/chosen": -5.089323043823242,
|
4747 |
+
"rewards/margins": 12.2367582321167,
|
4748 |
+
"rewards/rejected": -17.326082229614258,
|
4749 |
+
"step": 316
|
4750 |
+
},
|
4751 |
+
{
|
4752 |
+
"epoch": 0.013248215983533764,
|
4753 |
+
"grad_norm": 218.19578552246094,
|
4754 |
+
"learning_rate": 5.737168930605272e-06,
|
4755 |
+
"logits/chosen": -3.100266933441162,
|
4756 |
+
"logits/rejected": -3.374037027359009,
|
4757 |
+
"logps/chosen": -235.7011260986328,
|
4758 |
+
"logps/rejected": -234.38894653320312,
|
4759 |
+
"loss": 1.9371,
|
4760 |
+
"rewards/accuracies": 0.75,
|
4761 |
+
"rewards/chosen": -4.014512538909912,
|
4762 |
+
"rewards/margins": 6.0716729164123535,
|
4763 |
+
"rewards/rejected": -10.086185455322266,
|
4764 |
+
"step": 317
|
4765 |
+
},
|
4766 |
+
{
|
4767 |
+
"epoch": 0.013290008462977088,
|
4768 |
+
"grad_norm": 220.26162719726562,
|
4769 |
+
"learning_rate": 5.4076635668540075e-06,
|
4770 |
+
"logits/chosen": -3.213263988494873,
|
4771 |
+
"logits/rejected": -3.115088701248169,
|
4772 |
+
"logps/chosen": -344.0352783203125,
|
4773 |
+
"logps/rejected": -303.6358337402344,
|
4774 |
+
"loss": 5.0835,
|
4775 |
+
"rewards/accuracies": 0.875,
|
4776 |
+
"rewards/chosen": -7.557135105133057,
|
4777 |
+
"rewards/margins": 5.570637226104736,
|
4778 |
+
"rewards/rejected": -13.127771377563477,
|
4779 |
+
"step": 318
|
4780 |
+
},
|
4781 |
+
{
|
4782 |
+
"epoch": 0.013331800942420412,
|
4783 |
+
"grad_norm": 25.128589630126953,
|
4784 |
+
"learning_rate": 5.086752049395094e-06,
|
4785 |
+
"logits/chosen": -3.3187851905822754,
|
4786 |
+
"logits/rejected": -3.2173070907592773,
|
4787 |
+
"logps/chosen": -352.69342041015625,
|
4788 |
+
"logps/rejected": -370.28448486328125,
|
4789 |
+
"loss": 0.1174,
|
4790 |
+
"rewards/accuracies": 1.0,
|
4791 |
+
"rewards/chosen": -10.892807006835938,
|
4792 |
+
"rewards/margins": 8.983996391296387,
|
4793 |
+
"rewards/rejected": -19.876802444458008,
|
4794 |
+
"step": 319
|
4795 |
+
},
|
4796 |
+
{
|
4797 |
+
"epoch": 0.013373593421863736,
|
4798 |
+
"grad_norm": 28.23332977294922,
|
4799 |
+
"learning_rate": 4.7745751406263165e-06,
|
4800 |
+
"logits/chosen": -3.117103338241577,
|
4801 |
+
"logits/rejected": -3.0183892250061035,
|
4802 |
+
"logps/chosen": -183.39730834960938,
|
4803 |
+
"logps/rejected": -257.51708984375,
|
4804 |
+
"loss": 0.5492,
|
4805 |
+
"rewards/accuracies": 0.875,
|
4806 |
+
"rewards/chosen": -6.208366870880127,
|
4807 |
+
"rewards/margins": 6.4307146072387695,
|
4808 |
+
"rewards/rejected": -12.639081001281738,
|
4809 |
+
"step": 320
|
4810 |
+
},
|
4811 |
+
{
|
4812 |
+
"epoch": 0.01341538590130706,
|
4813 |
+
"grad_norm": 0.29758647084236145,
|
4814 |
+
"learning_rate": 4.4712697716574e-06,
|
4815 |
+
"logits/chosen": -3.1782500743865967,
|
4816 |
+
"logits/rejected": -3.3849024772644043,
|
4817 |
+
"logps/chosen": -313.7176208496094,
|
4818 |
+
"logps/rejected": -310.43206787109375,
|
4819 |
+
"loss": 0.3477,
|
4820 |
+
"rewards/accuracies": 1.0,
|
4821 |
+
"rewards/chosen": -5.478094100952148,
|
4822 |
+
"rewards/margins": 10.825754165649414,
|
4823 |
+
"rewards/rejected": -16.303848266601562,
|
4824 |
+
"step": 321
|
4825 |
+
},
|
4826 |
+
{
|
4827 |
+
"epoch": 0.013457178380750385,
|
4828 |
+
"grad_norm": 42.00798797607422,
|
4829 |
+
"learning_rate": 4.176968982247514e-06,
|
4830 |
+
"logits/chosen": -3.189098596572876,
|
4831 |
+
"logits/rejected": -3.191659450531006,
|
4832 |
+
"logps/chosen": -185.61148071289062,
|
4833 |
+
"logps/rejected": -265.11590576171875,
|
4834 |
+
"loss": 0.5371,
|
4835 |
+
"rewards/accuracies": 1.0,
|
4836 |
+
"rewards/chosen": -4.42623233795166,
|
4837 |
+
"rewards/margins": 8.69643783569336,
|
4838 |
+
"rewards/rejected": -13.12267017364502,
|
4839 |
+
"step": 322
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 0.013498970860193709,
|
4843 |
+
"grad_norm": 25.100051879882812,
|
4844 |
+
"learning_rate": 3.891801862449629e-06,
|
4845 |
+
"logits/chosen": -2.929891586303711,
|
4846 |
+
"logits/rejected": -2.9269561767578125,
|
4847 |
+
"logps/chosen": -248.1248779296875,
|
4848 |
+
"logps/rejected": -295.5849914550781,
|
4849 |
+
"loss": 0.1219,
|
4850 |
+
"rewards/accuracies": 1.0,
|
4851 |
+
"rewards/chosen": -7.96785306930542,
|
4852 |
+
"rewards/margins": 8.257719039916992,
|
4853 |
+
"rewards/rejected": -16.22557258605957,
|
4854 |
+
"step": 323
|
4855 |
+
},
|
4856 |
+
{
|
4857 |
+
"epoch": 0.013540763339637033,
|
4858 |
+
"grad_norm": 340.0687255859375,
|
4859 |
+
"learning_rate": 3.6158934959873353e-06,
|
4860 |
+
"logits/chosen": -2.5700321197509766,
|
4861 |
+
"logits/rejected": -2.4596493244171143,
|
4862 |
+
"logps/chosen": -381.09490966796875,
|
4863 |
+
"logps/rejected": -405.3451843261719,
|
4864 |
+
"loss": 4.0703,
|
4865 |
+
"rewards/accuracies": 0.75,
|
4866 |
+
"rewards/chosen": -10.214908599853516,
|
4867 |
+
"rewards/margins": 6.979231834411621,
|
4868 |
+
"rewards/rejected": -17.19413948059082,
|
4869 |
+
"step": 324
|
4870 |
+
},
|
4871 |
+
{
|
4872 |
+
"epoch": 0.013582555819080357,
|
4873 |
+
"grad_norm": 200.0123291015625,
|
4874 |
+
"learning_rate": 3.3493649053890326e-06,
|
4875 |
+
"logits/chosen": -3.020430326461792,
|
4876 |
+
"logits/rejected": -3.0444626808166504,
|
4877 |
+
"logps/chosen": -332.80682373046875,
|
4878 |
+
"logps/rejected": -316.76641845703125,
|
4879 |
+
"loss": 0.9297,
|
4880 |
+
"rewards/accuracies": 0.875,
|
4881 |
+
"rewards/chosen": -13.117790222167969,
|
4882 |
+
"rewards/margins": 4.298157691955566,
|
4883 |
+
"rewards/rejected": -17.41594886779785,
|
4884 |
+
"step": 325
|
4885 |
+
},
|
4886 |
+
{
|
4887 |
+
"epoch": 0.013624348298523681,
|
4888 |
+
"grad_norm": 25.32855224609375,
|
4889 |
+
"learning_rate": 3.092332998903416e-06,
|
4890 |
+
"logits/chosen": -2.877979278564453,
|
4891 |
+
"logits/rejected": -2.9653923511505127,
|
4892 |
+
"logps/chosen": -297.63922119140625,
|
4893 |
+
"logps/rejected": -353.91668701171875,
|
4894 |
+
"loss": 0.2341,
|
4895 |
+
"rewards/accuracies": 1.0,
|
4896 |
+
"rewards/chosen": -9.10722541809082,
|
4897 |
+
"rewards/margins": 7.169858932495117,
|
4898 |
+
"rewards/rejected": -16.277084350585938,
|
4899 |
+
"step": 326
|
4900 |
+
},
|
4901 |
+
{
|
4902 |
+
"epoch": 0.013666140777967005,
|
4903 |
+
"grad_norm": 126.90220642089844,
|
4904 |
+
"learning_rate": 2.8449105192196316e-06,
|
4905 |
+
"logits/chosen": -3.0049197673797607,
|
4906 |
+
"logits/rejected": -3.0737524032592773,
|
4907 |
+
"logps/chosen": -148.86209106445312,
|
4908 |
+
"logps/rejected": -199.28346252441406,
|
4909 |
+
"loss": 1.2454,
|
4910 |
+
"rewards/accuracies": 0.75,
|
4911 |
+
"rewards/chosen": -2.6697428226470947,
|
4912 |
+
"rewards/margins": 6.129971981048584,
|
4913 |
+
"rewards/rejected": -8.799715042114258,
|
4914 |
+
"step": 327
|
4915 |
+
},
|
4916 |
+
{
|
4917 |
+
"epoch": 0.01370793325741033,
|
4918 |
+
"grad_norm": 118.86524963378906,
|
4919 |
+
"learning_rate": 2.6072059940146775e-06,
|
4920 |
+
"logits/chosen": -2.9463143348693848,
|
4921 |
+
"logits/rejected": -2.917930841445923,
|
4922 |
+
"logps/chosen": -245.6232452392578,
|
4923 |
+
"logps/rejected": -331.4186096191406,
|
4924 |
+
"loss": 0.3829,
|
4925 |
+
"rewards/accuracies": 1.0,
|
4926 |
+
"rewards/chosen": -6.183173179626465,
|
4927 |
+
"rewards/margins": 9.998785972595215,
|
4928 |
+
"rewards/rejected": -16.181961059570312,
|
4929 |
+
"step": 328
|
4930 |
+
},
|
4931 |
+
{
|
4932 |
+
"epoch": 0.013749725736853654,
|
4933 |
+
"grad_norm": 16.672832489013672,
|
4934 |
+
"learning_rate": 2.379323688349516e-06,
|
4935 |
+
"logits/chosen": -2.9064996242523193,
|
4936 |
+
"logits/rejected": -2.991321563720703,
|
4937 |
+
"logps/chosen": -241.98927307128906,
|
4938 |
+
"logps/rejected": -274.4370422363281,
|
4939 |
+
"loss": 0.1581,
|
4940 |
+
"rewards/accuracies": 1.0,
|
4941 |
+
"rewards/chosen": -5.3424072265625,
|
4942 |
+
"rewards/margins": 11.008346557617188,
|
4943 |
+
"rewards/rejected": -16.350753784179688,
|
4944 |
+
"step": 329
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 0.013791518216296978,
|
4948 |
+
"grad_norm": 46.51081848144531,
|
4949 |
+
"learning_rate": 2.1613635589349756e-06,
|
4950 |
+
"logits/chosen": -3.2066824436187744,
|
4951 |
+
"logits/rejected": -3.1287407875061035,
|
4952 |
+
"logps/chosen": -351.7823791503906,
|
4953 |
+
"logps/rejected": -451.6476135253906,
|
4954 |
+
"loss": 0.3389,
|
4955 |
+
"rewards/accuracies": 1.0,
|
4956 |
+
"rewards/chosen": -8.472803115844727,
|
4957 |
+
"rewards/margins": 10.613195419311523,
|
4958 |
+
"rewards/rejected": -19.086000442504883,
|
4959 |
+
"step": 330
|
4960 |
+
},
|
4961 |
+
{
|
4962 |
+
"epoch": 0.013833310695740302,
|
4963 |
+
"grad_norm": 332.8823547363281,
|
4964 |
+
"learning_rate": 1.95342121028749e-06,
|
4965 |
+
"logits/chosen": -2.901820182800293,
|
4966 |
+
"logits/rejected": -2.879037618637085,
|
4967 |
+
"logps/chosen": -323.36968994140625,
|
4968 |
+
"logps/rejected": -369.71441650390625,
|
4969 |
+
"loss": 2.4841,
|
4970 |
+
"rewards/accuracies": 0.875,
|
4971 |
+
"rewards/chosen": -9.933320045471191,
|
4972 |
+
"rewards/margins": 5.173391342163086,
|
4973 |
+
"rewards/rejected": -15.106710433959961,
|
4974 |
+
"step": 331
|
4975 |
+
},
|
4976 |
+
{
|
4977 |
+
"epoch": 0.013875103175183626,
|
4978 |
+
"grad_norm": 42.102596282958984,
|
4979 |
+
"learning_rate": 1.7555878527937164e-06,
|
4980 |
+
"logits/chosen": -3.0418646335601807,
|
4981 |
+
"logits/rejected": -2.8981995582580566,
|
4982 |
+
"logps/chosen": -288.25274658203125,
|
4983 |
+
"logps/rejected": -369.7027587890625,
|
4984 |
+
"loss": 0.2115,
|
4985 |
+
"rewards/accuracies": 1.0,
|
4986 |
+
"rewards/chosen": -7.725009441375732,
|
4987 |
+
"rewards/margins": 9.557852745056152,
|
4988 |
+
"rewards/rejected": -17.282861709594727,
|
4989 |
+
"step": 332
|
4990 |
+
},
|
4991 |
+
{
|
4992 |
+
"epoch": 0.01391689565462695,
|
4993 |
+
"grad_norm": 20.132675170898438,
|
4994 |
+
"learning_rate": 1.5679502627027136e-06,
|
4995 |
+
"logits/chosen": -2.849980354309082,
|
4996 |
+
"logits/rejected": -2.8271608352661133,
|
4997 |
+
"logps/chosen": -297.3507080078125,
|
4998 |
+
"logps/rejected": -400.2951965332031,
|
4999 |
+
"loss": 0.0436,
|
5000 |
+
"rewards/accuracies": 1.0,
|
5001 |
+
"rewards/chosen": -7.2351155281066895,
|
5002 |
+
"rewards/margins": 10.109538078308105,
|
5003 |
+
"rewards/rejected": -17.344654083251953,
|
5004 |
+
"step": 333
|
5005 |
+
},
|
5006 |
+
{
|
5007 |
+
"epoch": 0.013958688134070274,
|
5008 |
+
"grad_norm": 4.88221549987793,
|
5009 |
+
"learning_rate": 1.3905907440629752e-06,
|
5010 |
+
"logits/chosen": -2.9725608825683594,
|
5011 |
+
"logits/rejected": -3.0983333587646484,
|
5012 |
+
"logps/chosen": -230.85008239746094,
|
5013 |
+
"logps/rejected": -330.5326843261719,
|
5014 |
+
"loss": 0.0201,
|
5015 |
+
"rewards/accuracies": 1.0,
|
5016 |
+
"rewards/chosen": -5.605626106262207,
|
5017 |
+
"rewards/margins": 11.81916618347168,
|
5018 |
+
"rewards/rejected": -17.424793243408203,
|
5019 |
+
"step": 334
|
5020 |
+
},
|
5021 |
+
{
|
5022 |
+
"epoch": 0.014000480613513598,
|
5023 |
+
"grad_norm": 156.0076904296875,
|
5024 |
+
"learning_rate": 1.2235870926211619e-06,
|
5025 |
+
"logits/chosen": -2.927966833114624,
|
5026 |
+
"logits/rejected": -2.847036123275757,
|
5027 |
+
"logps/chosen": -438.1847839355469,
|
5028 |
+
"logps/rejected": -413.7171936035156,
|
5029 |
+
"loss": 1.4457,
|
5030 |
+
"rewards/accuracies": 0.75,
|
5031 |
+
"rewards/chosen": -14.443679809570312,
|
5032 |
+
"rewards/margins": 6.269698619842529,
|
5033 |
+
"rewards/rejected": -20.71337890625,
|
5034 |
+
"step": 335
|
5035 |
+
},
|
5036 |
+
{
|
5037 |
+
"epoch": 0.014042273092956923,
|
5038 |
+
"grad_norm": 217.4608612060547,
|
5039 |
+
"learning_rate": 1.067012561698319e-06,
|
5040 |
+
"logits/chosen": -2.8868706226348877,
|
5041 |
+
"logits/rejected": -2.8142154216766357,
|
5042 |
+
"logps/chosen": -215.70008850097656,
|
5043 |
+
"logps/rejected": -326.85589599609375,
|
5044 |
+
"loss": 1.4518,
|
5045 |
+
"rewards/accuracies": 0.875,
|
5046 |
+
"rewards/chosen": -5.566734313964844,
|
5047 |
+
"rewards/margins": 8.456573486328125,
|
5048 |
+
"rewards/rejected": -14.023307800292969,
|
5049 |
+
"step": 336
|
5050 |
+
},
|
5051 |
+
{
|
5052 |
+
"epoch": 0.014084065572400247,
|
5053 |
+
"grad_norm": 76.68461608886719,
|
5054 |
+
"learning_rate": 9.209358300585474e-07,
|
5055 |
+
"logits/chosen": -2.725767135620117,
|
5056 |
+
"logits/rejected": -2.780688762664795,
|
5057 |
+
"logps/chosen": -288.87896728515625,
|
5058 |
+
"logps/rejected": -286.787109375,
|
5059 |
+
"loss": 0.7406,
|
5060 |
+
"rewards/accuracies": 0.75,
|
5061 |
+
"rewards/chosen": -5.583068370819092,
|
5062 |
+
"rewards/margins": 9.488775253295898,
|
5063 |
+
"rewards/rejected": -15.071844100952148,
|
5064 |
+
"step": 337
|
5065 |
+
},
|
5066 |
+
{
|
5067 |
+
"epoch": 0.014125858051843571,
|
5068 |
+
"grad_norm": 75.13135528564453,
|
5069 |
+
"learning_rate": 7.854209717842231e-07,
|
5070 |
+
"logits/chosen": -2.362233877182007,
|
5071 |
+
"logits/rejected": -2.460773229598999,
|
5072 |
+
"logps/chosen": -325.7066650390625,
|
5073 |
+
"logps/rejected": -384.96832275390625,
|
5074 |
+
"loss": 0.8344,
|
5075 |
+
"rewards/accuracies": 1.0,
|
5076 |
+
"rewards/chosen": -10.121431350708008,
|
5077 |
+
"rewards/margins": 5.352227210998535,
|
5078 |
+
"rewards/rejected": -15.473657608032227,
|
5079 |
+
"step": 338
|
5080 |
+
},
|
5081 |
+
{
|
5082 |
+
"epoch": 0.014167650531286895,
|
5083 |
+
"grad_norm": 28.0489444732666,
|
5084 |
+
"learning_rate": 6.605274281709928e-07,
|
5085 |
+
"logits/chosen": -2.648594856262207,
|
5086 |
+
"logits/rejected": -2.7179548740386963,
|
5087 |
+
"logps/chosen": -253.41746520996094,
|
5088 |
+
"logps/rejected": -348.7283630371094,
|
5089 |
+
"loss": 0.3676,
|
5090 |
+
"rewards/accuracies": 1.0,
|
5091 |
+
"rewards/chosen": -9.479151725769043,
|
5092 |
+
"rewards/margins": 9.781237602233887,
|
5093 |
+
"rewards/rejected": -19.260387420654297,
|
5094 |
+
"step": 339
|
5095 |
+
},
|
5096 |
+
{
|
5097 |
+
"epoch": 0.01420944301073022,
|
5098 |
+
"grad_norm": 98.39196014404297,
|
5099 |
+
"learning_rate": 5.463099816548579e-07,
|
5100 |
+
"logits/chosen": -2.792505979537964,
|
5101 |
+
"logits/rejected": -2.649038076400757,
|
5102 |
+
"logps/chosen": -324.14288330078125,
|
5103 |
+
"logps/rejected": -330.5845642089844,
|
5104 |
+
"loss": 1.5468,
|
5105 |
+
"rewards/accuracies": 0.75,
|
5106 |
+
"rewards/chosen": -12.339548110961914,
|
5107 |
+
"rewards/margins": 5.8348469734191895,
|
5108 |
+
"rewards/rejected": -18.174394607543945,
|
5109 |
+
"step": 340
|
5110 |
+
},
|
5111 |
+
{
|
5112 |
+
"epoch": 0.014251235490173543,
|
5113 |
+
"grad_norm": 108.51323699951172,
|
5114 |
+
"learning_rate": 4.4281873178278475e-07,
|
5115 |
+
"logits/chosen": -2.9908299446105957,
|
5116 |
+
"logits/rejected": -2.8990650177001953,
|
5117 |
+
"logps/chosen": -239.39895629882812,
|
5118 |
+
"logps/rejected": -306.28094482421875,
|
5119 |
+
"loss": 0.1488,
|
5120 |
+
"rewards/accuracies": 1.0,
|
5121 |
+
"rewards/chosen": -9.657529830932617,
|
5122 |
+
"rewards/margins": 9.04963493347168,
|
5123 |
+
"rewards/rejected": -18.707164764404297,
|
5124 |
+
"step": 341
|
5125 |
+
},
|
5126 |
+
{
|
5127 |
+
"epoch": 0.014293027969616868,
|
5128 |
+
"grad_norm": 125.85196685791016,
|
5129 |
+
"learning_rate": 3.5009907323737825e-07,
|
5130 |
+
"logits/chosen": -3.3109447956085205,
|
5131 |
+
"logits/rejected": -3.237929344177246,
|
5132 |
+
"logps/chosen": -313.3047790527344,
|
5133 |
+
"logps/rejected": -364.43463134765625,
|
5134 |
+
"loss": 0.809,
|
5135 |
+
"rewards/accuracies": 0.875,
|
5136 |
+
"rewards/chosen": -9.16235065460205,
|
5137 |
+
"rewards/margins": 7.958996772766113,
|
5138 |
+
"rewards/rejected": -17.121347427368164,
|
5139 |
+
"step": 342
|
5140 |
+
},
|
5141 |
+
{
|
5142 |
+
"epoch": 0.014334820449060192,
|
5143 |
+
"grad_norm": 170.32565307617188,
|
5144 |
+
"learning_rate": 2.681916759252917e-07,
|
5145 |
+
"logits/chosen": -2.6865477561950684,
|
5146 |
+
"logits/rejected": -2.6000945568084717,
|
5147 |
+
"logps/chosen": -390.5458068847656,
|
5148 |
+
"logps/rejected": -411.51409912109375,
|
5149 |
+
"loss": 1.2146,
|
5150 |
+
"rewards/accuracies": 0.75,
|
5151 |
+
"rewards/chosen": -15.36628246307373,
|
5152 |
+
"rewards/margins": 4.630067825317383,
|
5153 |
+
"rewards/rejected": -19.99635124206543,
|
5154 |
+
"step": 343
|
5155 |
+
},
|
5156 |
+
{
|
5157 |
+
"epoch": 0.014376612928503516,
|
5158 |
+
"grad_norm": 85.18592071533203,
|
5159 |
+
"learning_rate": 1.9713246713805588e-07,
|
5160 |
+
"logits/chosen": -3.113831043243408,
|
5161 |
+
"logits/rejected": -3.068502426147461,
|
5162 |
+
"logps/chosen": -176.83450317382812,
|
5163 |
+
"logps/rejected": -275.0065612792969,
|
5164 |
+
"loss": 0.642,
|
5165 |
+
"rewards/accuracies": 0.875,
|
5166 |
+
"rewards/chosen": -6.174612522125244,
|
5167 |
+
"rewards/margins": 6.579104423522949,
|
5168 |
+
"rewards/rejected": -12.753717422485352,
|
5169 |
+
"step": 344
|
5170 |
+
},
|
5171 |
+
{
|
5172 |
+
"epoch": 0.01441840540794684,
|
5173 |
+
"grad_norm": 340.12322998046875,
|
5174 |
+
"learning_rate": 1.3695261579316777e-07,
|
5175 |
+
"logits/chosen": -3.1806676387786865,
|
5176 |
+
"logits/rejected": -3.2769603729248047,
|
5177 |
+
"logps/chosen": -351.36773681640625,
|
5178 |
+
"logps/rejected": -432.24249267578125,
|
5179 |
+
"loss": 3.8974,
|
5180 |
+
"rewards/accuracies": 0.75,
|
5181 |
+
"rewards/chosen": -11.651407241821289,
|
5182 |
+
"rewards/margins": 8.784332275390625,
|
5183 |
+
"rewards/rejected": -20.435739517211914,
|
5184 |
+
"step": 345
|
5185 |
+
},
|
5186 |
+
{
|
5187 |
+
"epoch": 0.014460197887390164,
|
5188 |
+
"grad_norm": 32.293914794921875,
|
5189 |
+
"learning_rate": 8.767851876239074e-08,
|
5190 |
+
"logits/chosen": -2.917407274246216,
|
5191 |
+
"logits/rejected": -2.8539297580718994,
|
5192 |
+
"logps/chosen": -185.53709411621094,
|
5193 |
+
"logps/rejected": -255.1479949951172,
|
5194 |
+
"loss": 0.4062,
|
5195 |
+
"rewards/accuracies": 0.875,
|
5196 |
+
"rewards/chosen": -7.592297077178955,
|
5197 |
+
"rewards/margins": 7.888314247131348,
|
5198 |
+
"rewards/rejected": -15.480610847473145,
|
5199 |
+
"step": 346
|
5200 |
+
},
|
5201 |
+
{
|
5202 |
+
"epoch": 0.014501990366833488,
|
5203 |
+
"grad_norm": 45.319358825683594,
|
5204 |
+
"learning_rate": 4.9331789293211026e-08,
|
5205 |
+
"logits/chosen": -2.7475247383117676,
|
5206 |
+
"logits/rejected": -2.596950054168701,
|
5207 |
+
"logps/chosen": -252.41659545898438,
|
5208 |
+
"logps/rejected": -370.46380615234375,
|
5209 |
+
"loss": 0.5195,
|
5210 |
+
"rewards/accuracies": 0.875,
|
5211 |
+
"rewards/chosen": -5.741701602935791,
|
5212 |
+
"rewards/margins": 10.352378845214844,
|
5213 |
+
"rewards/rejected": -16.094079971313477,
|
5214 |
+
"step": 347
|
5215 |
+
},
|
5216 |
+
{
|
5217 |
+
"epoch": 0.014543782846276812,
|
5218 |
+
"grad_norm": 0.7127296924591064,
|
5219 |
+
"learning_rate": 2.192924752854042e-08,
|
5220 |
+
"logits/chosen": -3.2812771797180176,
|
5221 |
+
"logits/rejected": -3.0997869968414307,
|
5222 |
+
"logps/chosen": -310.9162902832031,
|
5223 |
+
"logps/rejected": -349.9539794921875,
|
5224 |
+
"loss": 0.0015,
|
5225 |
+
"rewards/accuracies": 1.0,
|
5226 |
+
"rewards/chosen": -8.671211242675781,
|
5227 |
+
"rewards/margins": 12.159473419189453,
|
5228 |
+
"rewards/rejected": -20.8306827545166,
|
5229 |
+
"step": 348
|
5230 |
+
},
|
5231 |
+
{
|
5232 |
+
"epoch": 0.014585575325720137,
|
5233 |
+
"grad_norm": 56.26605224609375,
|
5234 |
+
"learning_rate": 5.48291312886251e-09,
|
5235 |
+
"logits/chosen": -2.7509982585906982,
|
5236 |
+
"logits/rejected": -2.6064796447753906,
|
5237 |
+
"logps/chosen": -298.1324462890625,
|
5238 |
+
"logps/rejected": -331.2496643066406,
|
5239 |
+
"loss": 1.4717,
|
5240 |
+
"rewards/accuracies": 0.625,
|
5241 |
+
"rewards/chosen": -9.22291374206543,
|
5242 |
+
"rewards/margins": 4.03200626373291,
|
5243 |
+
"rewards/rejected": -13.254920959472656,
|
5244 |
+
"step": 349
|
5245 |
+
},
|
5246 |
+
{
|
5247 |
+
"epoch": 0.01462736780516346,
|
5248 |
+
"grad_norm": 9.557753562927246,
|
5249 |
+
"learning_rate": 0.0,
|
5250 |
+
"logits/chosen": -3.0502820014953613,
|
5251 |
+
"logits/rejected": -2.965240240097046,
|
5252 |
+
"logps/chosen": -393.92120361328125,
|
5253 |
+
"logps/rejected": -409.619384765625,
|
5254 |
+
"loss": 0.0324,
|
5255 |
+
"rewards/accuracies": 1.0,
|
5256 |
+
"rewards/chosen": -9.593274116516113,
|
5257 |
+
"rewards/margins": 12.19149112701416,
|
5258 |
+
"rewards/rejected": -21.784767150878906,
|
5259 |
+
"step": 350
|
5260 |
}
|
5261 |
],
|
5262 |
"logging_steps": 1,
|
|
|
5271 |
"should_evaluate": false,
|
5272 |
"should_log": false,
|
5273 |
"should_save": true,
|
5274 |
+
"should_training_stop": true
|
5275 |
},
|
5276 |
"attributes": {}
|
5277 |
}
|