|
{ |
|
"best_metric": 0.307707816362381, |
|
"best_model_checkpoint": "saves/sycophancy/Llama-3.1-8B-Instruct/dpo-llama-1000/train/checkpoint-200", |
|
"epoch": 9.955555555555556, |
|
"eval_steps": 50, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 1.8430479764938354, |
|
"learning_rate": 8.928571428571429e-07, |
|
"logits/chosen": -0.42922043800354004, |
|
"logits/rejected": -0.48287302255630493, |
|
"logps/chosen": -22.499736785888672, |
|
"logps/rejected": -25.05568504333496, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 3.8243924791458994e-05, |
|
"rewards/margins": 0.00029587256722152233, |
|
"rewards/rejected": -0.00025762844597920775, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 1.1751757860183716, |
|
"learning_rate": 1.7857142857142859e-06, |
|
"logits/chosen": -0.4168773591518402, |
|
"logits/rejected": -0.4500039517879486, |
|
"logps/chosen": -23.05335807800293, |
|
"logps/rejected": -24.607397079467773, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0031167108099907637, |
|
"rewards/margins": 0.001834285445511341, |
|
"rewards/rejected": 0.0012824248988181353, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 2.2308878898620605, |
|
"learning_rate": 2.6785714285714285e-06, |
|
"logits/chosen": -0.4415621757507324, |
|
"logits/rejected": -0.47939205169677734, |
|
"logps/chosen": -23.05750846862793, |
|
"logps/rejected": -24.027509689331055, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.006902683060616255, |
|
"rewards/margins": 0.0014179922873154283, |
|
"rewards/rejected": 0.005484690889716148, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 1.4753832817077637, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"logits/chosen": -0.4203382134437561, |
|
"logits/rejected": -0.4865742623806, |
|
"logps/chosen": -21.71892738342285, |
|
"logps/rejected": -24.279401779174805, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.019825046882033348, |
|
"rewards/margins": 0.010711194016039371, |
|
"rewards/rejected": 0.009113854728639126, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 1.723747968673706, |
|
"learning_rate": 4.464285714285715e-06, |
|
"logits/chosen": -0.45647165179252625, |
|
"logits/rejected": -0.4898137152194977, |
|
"logps/chosen": -22.14567756652832, |
|
"logps/rejected": -24.119258880615234, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.04727761819958687, |
|
"rewards/margins": 0.024313444271683693, |
|
"rewards/rejected": 0.022964173927903175, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"eval_logits/chosen": -0.4114196300506592, |
|
"eval_logits/rejected": -0.4791559875011444, |
|
"eval_logps/chosen": -21.660146713256836, |
|
"eval_logps/rejected": -24.039798736572266, |
|
"eval_loss": 0.6706590056419373, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": 0.08334928005933762, |
|
"eval_rewards/margins": 0.048002347350120544, |
|
"eval_rewards/rejected": 0.03534693643450737, |
|
"eval_runtime": 12.9721, |
|
"eval_samples_per_second": 7.709, |
|
"eval_steps_per_second": 3.854, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 1.8534899950027466, |
|
"learning_rate": 4.999222955002041e-06, |
|
"logits/chosen": -0.4518943428993225, |
|
"logits/rejected": -0.4910917282104492, |
|
"logps/chosen": -22.00179672241211, |
|
"logps/rejected": -23.545581817626953, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.106198251247406, |
|
"rewards/margins": 0.05078822001814842, |
|
"rewards/rejected": 0.05541003867983818, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 2.505237340927124, |
|
"learning_rate": 4.990486745229364e-06, |
|
"logits/chosen": -0.42713257670402527, |
|
"logits/rejected": -0.48525214195251465, |
|
"logps/chosen": -20.35358428955078, |
|
"logps/rejected": -23.495656967163086, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.25575509667396545, |
|
"rewards/margins": 0.14482033252716064, |
|
"rewards/rejected": 0.11093475669622421, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 2.34883713722229, |
|
"learning_rate": 4.9720770655628216e-06, |
|
"logits/chosen": -0.40827736258506775, |
|
"logits/rejected": -0.45998048782348633, |
|
"logps/chosen": -19.513925552368164, |
|
"logps/rejected": -23.603103637695312, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4022657573223114, |
|
"rewards/margins": 0.26808053255081177, |
|
"rewards/rejected": 0.13418518006801605, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 3.0297181606292725, |
|
"learning_rate": 4.944065422298262e-06, |
|
"logits/chosen": -0.3988552689552307, |
|
"logits/rejected": -0.465530127286911, |
|
"logps/chosen": -14.463418960571289, |
|
"logps/rejected": -21.68548583984375, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": 0.7595838904380798, |
|
"rewards/margins": 0.5038682818412781, |
|
"rewards/rejected": 0.25571563839912415, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 2.1064534187316895, |
|
"learning_rate": 4.90656061737503e-06, |
|
"logits/chosen": -0.3954499363899231, |
|
"logits/rejected": -0.448087602853775, |
|
"logps/chosen": -13.89409065246582, |
|
"logps/rejected": -21.66886329650879, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.9080438613891602, |
|
"rewards/margins": 0.6023429036140442, |
|
"rewards/rejected": 0.30570098757743835, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"eval_logits/chosen": -0.3558562695980072, |
|
"eval_logits/rejected": -0.4376991093158722, |
|
"eval_logps/chosen": -12.185461044311523, |
|
"eval_logps/rejected": -22.450639724731445, |
|
"eval_loss": 0.4427681863307953, |
|
"eval_rewards/accuracies": 0.7899999618530273, |
|
"eval_rewards/chosen": 1.0308177471160889, |
|
"eval_rewards/margins": 0.8365544080734253, |
|
"eval_rewards/rejected": 0.19426332414150238, |
|
"eval_runtime": 12.9808, |
|
"eval_samples_per_second": 7.704, |
|
"eval_steps_per_second": 3.852, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9555555555555557, |
|
"grad_norm": 2.931056261062622, |
|
"learning_rate": 4.859708325770919e-06, |
|
"logits/chosen": -0.3505342900753021, |
|
"logits/rejected": -0.41251203417778015, |
|
"logps/chosen": -12.102005004882812, |
|
"logps/rejected": -22.603740692138672, |
|
"loss": 0.4419, |
|
"rewards/accuracies": 0.8375000357627869, |
|
"rewards/chosen": 0.9892138838768005, |
|
"rewards/margins": 0.8685030341148376, |
|
"rewards/rejected": 0.12071088701486588, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 3.659130573272705, |
|
"learning_rate": 4.80369052967602e-06, |
|
"logits/chosen": -0.3336087167263031, |
|
"logits/rejected": -0.4222935140132904, |
|
"logps/chosen": -11.186881065368652, |
|
"logps/rejected": -24.831924438476562, |
|
"loss": 0.365, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.164413332939148, |
|
"rewards/margins": 1.20394766330719, |
|
"rewards/rejected": -0.03953445702791214, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.311111111111111, |
|
"grad_norm": 3.590104103088379, |
|
"learning_rate": 4.7387248116432524e-06, |
|
"logits/chosen": -0.319705069065094, |
|
"logits/rejected": -0.4010167717933655, |
|
"logps/chosen": -10.573092460632324, |
|
"logps/rejected": -25.705326080322266, |
|
"loss": 0.3523, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.1774530410766602, |
|
"rewards/margins": 1.3132127523422241, |
|
"rewards/rejected": -0.13575978577136993, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.488888888888889, |
|
"grad_norm": 3.342087984085083, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": -0.33713865280151367, |
|
"logits/rejected": -0.40943965315818787, |
|
"logps/chosen": -11.759323120117188, |
|
"logps/rejected": -27.167184829711914, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.0846505165100098, |
|
"rewards/margins": 1.4101663827896118, |
|
"rewards/rejected": -0.3255158066749573, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 1.9896889925003052, |
|
"learning_rate": 4.5829927360311224e-06, |
|
"logits/chosen": -0.2843226492404938, |
|
"logits/rejected": -0.3789869546890259, |
|
"logps/chosen": -10.016444206237793, |
|
"logps/rejected": -29.09038734436035, |
|
"loss": 0.2979, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.313085675239563, |
|
"rewards/margins": 1.761974573135376, |
|
"rewards/rejected": -0.4488888680934906, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"eval_logits/chosen": -0.2695091664791107, |
|
"eval_logits/rejected": -0.36552613973617554, |
|
"eval_logps/chosen": -9.01309871673584, |
|
"eval_logps/rejected": -28.563730239868164, |
|
"eval_loss": 0.32149210572242737, |
|
"eval_rewards/accuracies": 0.85999995470047, |
|
"eval_rewards/chosen": 1.348054051399231, |
|
"eval_rewards/margins": 1.7650997638702393, |
|
"eval_rewards/rejected": -0.4170458912849426, |
|
"eval_runtime": 13.0246, |
|
"eval_samples_per_second": 7.678, |
|
"eval_steps_per_second": 3.839, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 1.8847765922546387, |
|
"learning_rate": 4.492831268057307e-06, |
|
"logits/chosen": -0.2392854541540146, |
|
"logits/rejected": -0.32519179582595825, |
|
"logps/chosen": -10.91231918334961, |
|
"logps/rejected": -29.385107040405273, |
|
"loss": 0.3806, |
|
"rewards/accuracies": 0.8375000357627869, |
|
"rewards/chosen": 1.174955129623413, |
|
"rewards/margins": 1.6338344812393188, |
|
"rewards/rejected": -0.458879292011261, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.022222222222222, |
|
"grad_norm": 4.04591703414917, |
|
"learning_rate": 4.394929307863633e-06, |
|
"logits/chosen": -0.25453540682792664, |
|
"logits/rejected": -0.3346460461616516, |
|
"logps/chosen": -9.183024406433105, |
|
"logps/rejected": -30.89423942565918, |
|
"loss": 0.2928, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.3575671911239624, |
|
"rewards/margins": 1.941465973854065, |
|
"rewards/rejected": -0.5838987231254578, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 4.6089653968811035, |
|
"learning_rate": 4.289667123149296e-06, |
|
"logits/chosen": -0.2588854432106018, |
|
"logits/rejected": -0.344396710395813, |
|
"logps/chosen": -8.893033027648926, |
|
"logps/rejected": -30.978750228881836, |
|
"loss": 0.2991, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.382232427597046, |
|
"rewards/margins": 1.9790451526641846, |
|
"rewards/rejected": -0.596812903881073, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.3777777777777778, |
|
"grad_norm": 2.9751718044281006, |
|
"learning_rate": 4.177453569964925e-06, |
|
"logits/chosen": -0.21491435170173645, |
|
"logits/rejected": -0.30069640278816223, |
|
"logps/chosen": -8.679551124572754, |
|
"logps/rejected": -32.274330139160156, |
|
"loss": 0.2848, |
|
"rewards/accuracies": 0.8687500357627869, |
|
"rewards/chosen": 1.3893749713897705, |
|
"rewards/margins": 2.1629250049591064, |
|
"rewards/rejected": -0.7735500931739807, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 4.007388591766357, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -0.2135535329580307, |
|
"logits/rejected": -0.280956506729126, |
|
"logps/chosen": -8.478487968444824, |
|
"logps/rejected": -31.442169189453125, |
|
"loss": 0.2862, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.4557212591171265, |
|
"rewards/margins": 2.184141159057617, |
|
"rewards/rejected": -0.7284198999404907, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"eval_logits/chosen": -0.21535295248031616, |
|
"eval_logits/rejected": -0.31062132120132446, |
|
"eval_logps/chosen": -7.679609775543213, |
|
"eval_logps/rejected": -31.993576049804688, |
|
"eval_loss": 0.307707816362381, |
|
"eval_rewards/accuracies": 0.8499999642372131, |
|
"eval_rewards/chosen": 1.48140287399292, |
|
"eval_rewards/margins": 2.2414333820343018, |
|
"eval_rewards/rejected": -0.7600305080413818, |
|
"eval_runtime": 13.0936, |
|
"eval_samples_per_second": 7.637, |
|
"eval_steps_per_second": 3.819, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 20.840646743774414, |
|
"learning_rate": 3.933941090877615e-06, |
|
"logits/chosen": -0.21822166442871094, |
|
"logits/rejected": -0.3239297866821289, |
|
"logps/chosen": -10.312289237976074, |
|
"logps/rejected": -33.20119857788086, |
|
"loss": 0.3321, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.287852168083191, |
|
"rewards/margins": 2.1572673320770264, |
|
"rewards/rejected": -0.8694152235984802, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.911111111111111, |
|
"grad_norm": 15.999884605407715, |
|
"learning_rate": 3.8035880084487454e-06, |
|
"logits/chosen": -0.21503937244415283, |
|
"logits/rejected": -0.29350587725639343, |
|
"logps/chosen": -8.648448944091797, |
|
"logps/rejected": -34.50216293334961, |
|
"loss": 0.2686, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.3622721433639526, |
|
"rewards/margins": 2.405822992324829, |
|
"rewards/rejected": -1.0435508489608765, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.088888888888889, |
|
"grad_norm": 7.8126959800720215, |
|
"learning_rate": 3.6681715706826555e-06, |
|
"logits/chosen": -0.22714447975158691, |
|
"logits/rejected": -0.3037336468696594, |
|
"logps/chosen": -9.144305229187012, |
|
"logps/rejected": -35.50819778442383, |
|
"loss": 0.2284, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.3389980792999268, |
|
"rewards/margins": 2.4519691467285156, |
|
"rewards/rejected": -1.1129711866378784, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.266666666666667, |
|
"grad_norm": 9.81760311126709, |
|
"learning_rate": 3.5282177578265295e-06, |
|
"logits/chosen": -0.1930251568555832, |
|
"logits/rejected": -0.28283536434173584, |
|
"logps/chosen": -9.54443073272705, |
|
"logps/rejected": -36.10524368286133, |
|
"loss": 0.2864, |
|
"rewards/accuracies": 0.9000000357627869, |
|
"rewards/chosen": 1.4175978899002075, |
|
"rewards/margins": 2.55643630027771, |
|
"rewards/rejected": -1.1388384103775024, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 5.866613864898682, |
|
"learning_rate": 3.384270174056454e-06, |
|
"logits/chosen": -0.21315816044807434, |
|
"logits/rejected": -0.30596357583999634, |
|
"logps/chosen": -8.75170612335205, |
|
"logps/rejected": -37.464111328125, |
|
"loss": 0.2747, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.375083088874817, |
|
"rewards/margins": 2.6868672370910645, |
|
"rewards/rejected": -1.3117841482162476, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"eval_logits/chosen": -0.18717029690742493, |
|
"eval_logits/rejected": -0.28792956471443176, |
|
"eval_logps/chosen": -8.346588134765625, |
|
"eval_logps/rejected": -36.838497161865234, |
|
"eval_loss": 0.3183891177177429, |
|
"eval_rewards/accuracies": 0.85999995470047, |
|
"eval_rewards/chosen": 1.4147050380706787, |
|
"eval_rewards/margins": 2.6592278480529785, |
|
"eval_rewards/rejected": -1.2445228099822998, |
|
"eval_runtime": 13.1818, |
|
"eval_samples_per_second": 7.586, |
|
"eval_steps_per_second": 3.793, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.622222222222222, |
|
"grad_norm": 1.7029662132263184, |
|
"learning_rate": 3.236887936027261e-06, |
|
"logits/chosen": -0.19846662878990173, |
|
"logits/rejected": -0.27518361806869507, |
|
"logps/chosen": -8.800247192382812, |
|
"logps/rejected": -38.98476791381836, |
|
"loss": 0.2502, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.383112907409668, |
|
"rewards/margins": 2.8278017044067383, |
|
"rewards/rejected": -1.4446886777877808, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.8134036064147949, |
|
"learning_rate": 3.0866435011692884e-06, |
|
"logits/chosen": -0.1693725436925888, |
|
"logits/rejected": -0.2626606523990631, |
|
"logps/chosen": -7.444068908691406, |
|
"logps/rejected": -39.20288848876953, |
|
"loss": 0.2289, |
|
"rewards/accuracies": 0.9000000357627869, |
|
"rewards/chosen": 1.527998447418213, |
|
"rewards/margins": 2.998661994934082, |
|
"rewards/rejected": -1.47066330909729, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.977777777777778, |
|
"grad_norm": 6.755254745483398, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -0.1920958012342453, |
|
"logits/rejected": -0.28297463059425354, |
|
"logps/chosen": -8.827613830566406, |
|
"logps/rejected": -37.988773345947266, |
|
"loss": 0.2723, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.3093467950820923, |
|
"rewards/margins": 2.6953580379486084, |
|
"rewards/rejected": -1.3860112428665161, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.155555555555556, |
|
"grad_norm": 8.348061561584473, |
|
"learning_rate": 2.7799111902582697e-06, |
|
"logits/chosen": -0.19827289879322052, |
|
"logits/rejected": -0.28890833258628845, |
|
"logps/chosen": -8.641542434692383, |
|
"logps/rejected": -37.83559036254883, |
|
"loss": 0.2319, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.4176464080810547, |
|
"rewards/margins": 2.750339984893799, |
|
"rewards/rejected": -1.3326934576034546, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 4.27839469909668, |
|
"learning_rate": 2.624614714151743e-06, |
|
"logits/chosen": -0.16082732379436493, |
|
"logits/rejected": -0.2566200792789459, |
|
"logps/chosen": -7.895478248596191, |
|
"logps/rejected": -37.70954132080078, |
|
"loss": 0.2688, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.4443522691726685, |
|
"rewards/margins": 2.8291947841644287, |
|
"rewards/rejected": -1.3848422765731812, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"eval_logits/chosen": -0.17137397825717926, |
|
"eval_logits/rejected": -0.27050745487213135, |
|
"eval_logps/chosen": -8.0242280960083, |
|
"eval_logps/rejected": -37.18735122680664, |
|
"eval_loss": 0.3195304572582245, |
|
"eval_rewards/accuracies": 0.8499999642372131, |
|
"eval_rewards/chosen": 1.4469408988952637, |
|
"eval_rewards/margins": 2.726349115371704, |
|
"eval_rewards/rejected": -1.2794082164764404, |
|
"eval_runtime": 13.1946, |
|
"eval_samples_per_second": 7.579, |
|
"eval_steps_per_second": 3.789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.511111111111111, |
|
"grad_norm": 1.2177822589874268, |
|
"learning_rate": 2.4688342135114625e-06, |
|
"logits/chosen": -0.18045730888843536, |
|
"logits/rejected": -0.2632906138896942, |
|
"logps/chosen": -8.068435668945312, |
|
"logps/rejected": -38.28617477416992, |
|
"loss": 0.2432, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.5070723295211792, |
|
"rewards/margins": 2.8888046741485596, |
|
"rewards/rejected": -1.3817322254180908, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.688888888888889, |
|
"grad_norm": 6.162428379058838, |
|
"learning_rate": 2.3131747660339396e-06, |
|
"logits/chosen": -0.16258001327514648, |
|
"logits/rejected": -0.2531837522983551, |
|
"logps/chosen": -7.69020938873291, |
|
"logps/rejected": -39.50102233886719, |
|
"loss": 0.2155, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.4402731657028198, |
|
"rewards/margins": 2.9210917949676514, |
|
"rewards/rejected": -1.4808186292648315, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.866666666666667, |
|
"grad_norm": 4.585377216339111, |
|
"learning_rate": 2.158240979224817e-06, |
|
"logits/chosen": -0.16741995513439178, |
|
"logits/rejected": -0.25773563981056213, |
|
"logps/chosen": -7.611827373504639, |
|
"logps/rejected": -40.226985931396484, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.5339945554733276, |
|
"rewards/margins": 3.106376886367798, |
|
"rewards/rejected": -1.5723823308944702, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.044444444444444, |
|
"grad_norm": 1.262926697731018, |
|
"learning_rate": 2.004634642001507e-06, |
|
"logits/chosen": -0.17107848823070526, |
|
"logits/rejected": -0.2691803276538849, |
|
"logps/chosen": -8.973631858825684, |
|
"logps/rejected": -41.54354476928711, |
|
"loss": 0.1994, |
|
"rewards/accuracies": 0.9312500357627869, |
|
"rewards/chosen": 1.4238203763961792, |
|
"rewards/margins": 3.09421968460083, |
|
"rewards/rejected": -1.67039954662323, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.222222222222222, |
|
"grad_norm": 7.014649868011475, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits/chosen": -0.17273230850696564, |
|
"logits/rejected": -0.2703326344490051, |
|
"logps/chosen": -8.288504600524902, |
|
"logps/rejected": -42.46126174926758, |
|
"loss": 0.2047, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.4250819683074951, |
|
"rewards/margins": 3.235600709915161, |
|
"rewards/rejected": -1.8105186223983765, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.222222222222222, |
|
"eval_logits/chosen": -0.15530475974082947, |
|
"eval_logits/rejected": -0.2578139305114746, |
|
"eval_logps/chosen": -9.47491455078125, |
|
"eval_logps/rejected": -40.34950637817383, |
|
"eval_loss": 0.3629891574382782, |
|
"eval_rewards/accuracies": 0.8399999737739563, |
|
"eval_rewards/chosen": 1.3018722534179688, |
|
"eval_rewards/margins": 2.897495746612549, |
|
"eval_rewards/rejected": -1.59562349319458, |
|
"eval_runtime": 13.1284, |
|
"eval_samples_per_second": 7.617, |
|
"eval_steps_per_second": 3.809, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 8.984504699707031, |
|
"learning_rate": 1.7037833743707892e-06, |
|
"logits/chosen": -0.14409998059272766, |
|
"logits/rejected": -0.2163417637348175, |
|
"logps/chosen": -8.419466018676758, |
|
"logps/rejected": -41.356468200683594, |
|
"loss": 0.1894, |
|
"rewards/accuracies": 0.9000000357627869, |
|
"rewards/chosen": 1.4324077367782593, |
|
"rewards/margins": 3.1765902042388916, |
|
"rewards/rejected": -1.7441825866699219, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.5777777777777775, |
|
"grad_norm": 7.33497953414917, |
|
"learning_rate": 1.5577070009474872e-06, |
|
"logits/chosen": -0.14226722717285156, |
|
"logits/rejected": -0.23484404385089874, |
|
"logps/chosen": -7.855214595794678, |
|
"logps/rejected": -42.96932601928711, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.9000000357627869, |
|
"rewards/chosen": 1.4563724994659424, |
|
"rewards/margins": 3.2921605110168457, |
|
"rewards/rejected": -1.8357880115509033, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.7555555555555555, |
|
"grad_norm": 1.3846873044967651, |
|
"learning_rate": 1.415290652206105e-06, |
|
"logits/chosen": -0.11893842369318008, |
|
"logits/rejected": -0.22303898632526398, |
|
"logps/chosen": -5.515126705169678, |
|
"logps/rejected": -43.147682189941406, |
|
"loss": 0.1296, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.6900787353515625, |
|
"rewards/margins": 3.544553756713867, |
|
"rewards/rejected": -1.8544749021530151, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.933333333333334, |
|
"grad_norm": 3.4178779125213623, |
|
"learning_rate": 1.2770874972267777e-06, |
|
"logits/chosen": -0.2029663324356079, |
|
"logits/rejected": -0.27416250109672546, |
|
"logps/chosen": -10.447962760925293, |
|
"logps/rejected": -41.886138916015625, |
|
"loss": 0.2833, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.2835975885391235, |
|
"rewards/margins": 3.022332191467285, |
|
"rewards/rejected": -1.738734483718872, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"grad_norm": 5.459321975708008, |
|
"learning_rate": 1.1436343403356019e-06, |
|
"logits/chosen": -0.14208023250102997, |
|
"logits/rejected": -0.22397640347480774, |
|
"logps/chosen": -8.451988220214844, |
|
"logps/rejected": -42.06145095825195, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.4957071542739868, |
|
"rewards/margins": 3.238795518875122, |
|
"rewards/rejected": -1.7430883646011353, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"eval_logits/chosen": -0.1451883465051651, |
|
"eval_logits/rejected": -0.24787528812885284, |
|
"eval_logps/chosen": -8.884222030639648, |
|
"eval_logps/rejected": -41.02870178222656, |
|
"eval_loss": 0.352620393037796, |
|
"eval_rewards/accuracies": 0.8499999642372131, |
|
"eval_rewards/chosen": 1.3609414100646973, |
|
"eval_rewards/margins": 3.024484872817993, |
|
"eval_rewards/rejected": -1.6635433435440063, |
|
"eval_runtime": 13.1853, |
|
"eval_samples_per_second": 7.584, |
|
"eval_steps_per_second": 3.792, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.288888888888889, |
|
"grad_norm": 2.762150764465332, |
|
"learning_rate": 1.0154495360662464e-06, |
|
"logits/chosen": -0.18289707601070404, |
|
"logits/rejected": -0.28008756041526794, |
|
"logps/chosen": -7.561173439025879, |
|
"logps/rejected": -43.28544998168945, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.419569730758667, |
|
"rewards/margins": 3.3263187408447266, |
|
"rewards/rejected": -1.9067490100860596, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.466666666666667, |
|
"grad_norm": 2.009507417678833, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits/chosen": -0.12424879521131516, |
|
"logits/rejected": -0.22191472351551056, |
|
"logps/chosen": -7.338543891906738, |
|
"logps/rejected": -42.68900680541992, |
|
"loss": 0.2035, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.533159852027893, |
|
"rewards/margins": 3.3633785247802734, |
|
"rewards/rejected": -1.8302189111709595, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.644444444444445, |
|
"grad_norm": 1.1898467540740967, |
|
"learning_rate": 7.768541537901325e-07, |
|
"logits/chosen": -0.13503606617450714, |
|
"logits/rejected": -0.23675648868083954, |
|
"logps/chosen": -8.620238304138184, |
|
"logps/rejected": -44.113094329833984, |
|
"loss": 0.1728, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.4350792169570923, |
|
"rewards/margins": 3.3960673809051514, |
|
"rewards/rejected": -1.9609882831573486, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.822222222222222, |
|
"grad_norm": 6.634049892425537, |
|
"learning_rate": 6.673703204254348e-07, |
|
"logits/chosen": -0.13498146831989288, |
|
"logits/rejected": -0.21733498573303223, |
|
"logps/chosen": -8.704492568969727, |
|
"logps/rejected": -43.417938232421875, |
|
"loss": 0.2495, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.4058576822280884, |
|
"rewards/margins": 3.286863088607788, |
|
"rewards/rejected": -1.8810051679611206, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.482236385345459, |
|
"learning_rate": 5.650047293344316e-07, |
|
"logits/chosen": -0.14725255966186523, |
|
"logits/rejected": -0.2412375509738922, |
|
"logps/chosen": -7.016914367675781, |
|
"logps/rejected": -44.169490814208984, |
|
"loss": 0.144, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.5698460340499878, |
|
"rewards/margins": 3.528562307357788, |
|
"rewards/rejected": -1.9587162733078003, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_logits/chosen": -0.14205148816108704, |
|
"eval_logits/rejected": -0.24482183158397675, |
|
"eval_logps/chosen": -9.005942344665527, |
|
"eval_logps/rejected": -41.425498962402344, |
|
"eval_loss": 0.3662210702896118, |
|
"eval_rewards/accuracies": 0.8399999737739563, |
|
"eval_rewards/chosen": 1.3487695455551147, |
|
"eval_rewards/margins": 3.051992654800415, |
|
"eval_rewards/rejected": -1.7032231092453003, |
|
"eval_runtime": 13.1525, |
|
"eval_samples_per_second": 7.603, |
|
"eval_steps_per_second": 3.802, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.177777777777777, |
|
"grad_norm": 18.307058334350586, |
|
"learning_rate": 4.7015498571035877e-07, |
|
"logits/chosen": -0.10620441287755966, |
|
"logits/rejected": -0.19579415023326874, |
|
"logps/chosen": -9.014161109924316, |
|
"logps/rejected": -43.37262725830078, |
|
"loss": 0.2079, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.3402118682861328, |
|
"rewards/margins": 3.250830888748169, |
|
"rewards/rejected": -1.910618782043457, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.355555555555556, |
|
"grad_norm": 1.149519681930542, |
|
"learning_rate": 3.831895019292897e-07, |
|
"logits/chosen": -0.12923561036586761, |
|
"logits/rejected": -0.24200458824634552, |
|
"logps/chosen": -6.25982666015625, |
|
"logps/rejected": -44.513946533203125, |
|
"loss": 0.1659, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.5601545572280884, |
|
"rewards/margins": 3.614009141921997, |
|
"rewards/rejected": -2.0538547039031982, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.533333333333333, |
|
"grad_norm": 9.660409927368164, |
|
"learning_rate": 3.044460665744284e-07, |
|
"logits/chosen": -0.12687157094478607, |
|
"logits/rejected": -0.2105720490217209, |
|
"logps/chosen": -8.170889854431152, |
|
"logps/rejected": -43.91897201538086, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 0.9000000357627869, |
|
"rewards/chosen": 1.490912675857544, |
|
"rewards/margins": 3.416290283203125, |
|
"rewards/rejected": -1.9253777265548706, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.71111111111111, |
|
"grad_norm": 6.015744686126709, |
|
"learning_rate": 2.3423053240837518e-07, |
|
"logits/chosen": -0.1734294891357422, |
|
"logits/rejected": -0.2597627341747284, |
|
"logps/chosen": -9.138812065124512, |
|
"logps/rejected": -43.02109909057617, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.4286350011825562, |
|
"rewards/margins": 3.2899787425994873, |
|
"rewards/rejected": -1.8613442182540894, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 5.968247890472412, |
|
"learning_rate": 1.7281562838948968e-07, |
|
"logits/chosen": -0.1330401748418808, |
|
"logits/rejected": -0.2250121682882309, |
|
"logps/chosen": -7.425269603729248, |
|
"logps/rejected": -43.31748580932617, |
|
"loss": 0.171, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 1.5740032196044922, |
|
"rewards/margins": 3.423919677734375, |
|
"rewards/rejected": -1.8499164581298828, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_logits/chosen": -0.1399160772562027, |
|
"eval_logits/rejected": -0.2429780215024948, |
|
"eval_logps/chosen": -9.18053913116455, |
|
"eval_logps/rejected": -41.719730377197266, |
|
"eval_loss": 0.36349406838417053, |
|
"eval_rewards/accuracies": 0.8399999737739563, |
|
"eval_rewards/chosen": 1.3313097953796387, |
|
"eval_rewards/margins": 3.0639562606811523, |
|
"eval_rewards/rejected": -1.7326463460922241, |
|
"eval_runtime": 13.2218, |
|
"eval_samples_per_second": 7.563, |
|
"eval_steps_per_second": 3.782, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.066666666666666, |
|
"grad_norm": 2.3828182220458984, |
|
"learning_rate": 1.2043990034669413e-07, |
|
"logits/chosen": -0.16531167924404144, |
|
"logits/rejected": -0.26316291093826294, |
|
"logps/chosen": -6.18184757232666, |
|
"logps/rejected": -45.66379928588867, |
|
"loss": 0.136, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.5860216617584229, |
|
"rewards/margins": 3.676870107650757, |
|
"rewards/rejected": -2.090848207473755, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.244444444444444, |
|
"grad_norm": 1.0561579465866089, |
|
"learning_rate": 7.730678442730539e-08, |
|
"logits/chosen": -0.12930361926555634, |
|
"logits/rejected": -0.22025151550769806, |
|
"logps/chosen": -7.336406230926514, |
|
"logps/rejected": -44.349239349365234, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.5953136682510376, |
|
"rewards/margins": 3.572978973388672, |
|
"rewards/rejected": -1.9776651859283447, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.422222222222222, |
|
"grad_norm": 11.791927337646484, |
|
"learning_rate": 4.358381691677932e-08, |
|
"logits/chosen": -0.10277407616376877, |
|
"logits/rejected": -0.1993442177772522, |
|
"logps/chosen": -7.81331205368042, |
|
"logps/rejected": -44.955955505371094, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.5365545749664307, |
|
"rewards/margins": 3.621255874633789, |
|
"rewards/rejected": -2.0847015380859375, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 3.8846185207366943, |
|
"learning_rate": 1.9401983499569843e-08, |
|
"logits/chosen": -0.1286163181066513, |
|
"logits/rejected": -0.25202152132987976, |
|
"logps/chosen": -7.090148448944092, |
|
"logps/rejected": -45.3367805480957, |
|
"loss": 0.1675, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.5582847595214844, |
|
"rewards/margins": 3.6432290077209473, |
|
"rewards/rejected": -2.084944009780884, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"grad_norm": 6.826923847198486, |
|
"learning_rate": 4.855210488670381e-09, |
|
"logits/chosen": -0.14255917072296143, |
|
"logits/rejected": -0.24119222164154053, |
|
"logps/chosen": -9.036189079284668, |
|
"logps/rejected": -41.72261047363281, |
|
"loss": 0.2313, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.3214199542999268, |
|
"rewards/margins": 3.085725784301758, |
|
"rewards/rejected": -1.7643059492111206, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"eval_logits/chosen": -0.13784560561180115, |
|
"eval_logits/rejected": -0.24095743894577026, |
|
"eval_logps/chosen": -9.1017484664917, |
|
"eval_logps/rejected": -41.82564926147461, |
|
"eval_loss": 0.3612578511238098, |
|
"eval_rewards/accuracies": 0.8399999737739563, |
|
"eval_rewards/chosen": 1.3391889333724976, |
|
"eval_rewards/margins": 3.0824267864227295, |
|
"eval_rewards/rejected": -1.7432377338409424, |
|
"eval_runtime": 13.1911, |
|
"eval_samples_per_second": 7.581, |
|
"eval_steps_per_second": 3.79, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.955555555555556, |
|
"grad_norm": 1.7594248056411743, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.15151795744895935, |
|
"logits/rejected": -0.2365965098142624, |
|
"logps/chosen": -7.8617448806762695, |
|
"logps/rejected": -42.87919235229492, |
|
"loss": 0.1746, |
|
"rewards/accuracies": 0.9312500357627869, |
|
"rewards/chosen": 1.5052164793014526, |
|
"rewards/margins": 3.3184120655059814, |
|
"rewards/rejected": -1.8131954669952393, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.955555555555556, |
|
"step": 560, |
|
"total_flos": 6.741083695664333e+16, |
|
"train_loss": 0.31093634622437616, |
|
"train_runtime": 2833.3873, |
|
"train_samples_per_second": 3.176, |
|
"train_steps_per_second": 0.198 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.741083695664333e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|