|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1942533387292594, |
|
"eval_steps": 500, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006475111290975314, |
|
"grad_norm": 9.607009887695312, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -3.4352195262908936, |
|
"logits/rejected": -3.425337314605713, |
|
"logps/chosen": -323.16705322265625, |
|
"logps/rejected": -302.4476623535156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012950222581950627, |
|
"grad_norm": 9.66933822631836, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -3.3881449699401855, |
|
"logits/rejected": -3.4377424716949463, |
|
"logps/chosen": -322.08050537109375, |
|
"logps/rejected": -300.86651611328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01942533387292594, |
|
"grad_norm": 7.823226451873779, |
|
"learning_rate": 8e-05, |
|
"logits/chosen": -3.4503798484802246, |
|
"logits/rejected": -3.4216644763946533, |
|
"logps/chosen": -316.35699462890625, |
|
"logps/rejected": -299.4790344238281, |
|
"loss": 0.5385, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.25093284249305725, |
|
"rewards/margins": 0.34339073300361633, |
|
"rewards/rejected": -0.09245789051055908, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.025900445163901255, |
|
"grad_norm": 2.2421059608459473, |
|
"learning_rate": 0.00012, |
|
"logits/chosen": -3.4315147399902344, |
|
"logits/rejected": -3.408001661300659, |
|
"logps/chosen": -300.1934814453125, |
|
"logps/rejected": -288.28045654296875, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1727596521377563, |
|
"rewards/margins": 2.187995433807373, |
|
"rewards/rejected": -1.0152357816696167, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03237555645487657, |
|
"grad_norm": 0.24723561108112335, |
|
"learning_rate": 0.00016, |
|
"logits/chosen": -3.4229865074157715, |
|
"logits/rejected": -3.3911449909210205, |
|
"logps/chosen": -300.7767028808594, |
|
"logps/rejected": -323.2894592285156, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5630507469177246, |
|
"rewards/margins": 5.784091472625732, |
|
"rewards/rejected": -3.2210402488708496, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03885066774585188, |
|
"grad_norm": 0.01604278013110161, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": -3.388056516647339, |
|
"logits/rejected": -3.340381383895874, |
|
"logps/chosen": -301.595458984375, |
|
"logps/rejected": -378.6108703613281, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.1038761138916016, |
|
"rewards/margins": 10.030583381652832, |
|
"rewards/rejected": -6.926706790924072, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0453257790368272, |
|
"grad_norm": 0.00046919251326471567, |
|
"learning_rate": 0.0001992114701314478, |
|
"logits/chosen": -3.3325581550598145, |
|
"logits/rejected": -3.262298107147217, |
|
"logps/chosen": -287.6573181152344, |
|
"logps/rejected": -413.3544616699219, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.678926467895508, |
|
"rewards/margins": 15.118080139160156, |
|
"rewards/rejected": -12.439154624938965, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05180089032780251, |
|
"grad_norm": 0.030723145231604576, |
|
"learning_rate": 0.0001968583161128631, |
|
"logits/chosen": -3.3250393867492676, |
|
"logits/rejected": -3.220548391342163, |
|
"logps/chosen": -311.5693054199219, |
|
"logps/rejected": -449.70294189453125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03436547517776489, |
|
"rewards/margins": 16.31571388244629, |
|
"rewards/rejected": -16.350078582763672, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05827600161877782, |
|
"grad_norm": 0.0016233824426308274, |
|
"learning_rate": 0.00019297764858882514, |
|
"logits/chosen": -3.20749568939209, |
|
"logits/rejected": -3.159514904022217, |
|
"logps/chosen": -330.7140808105469, |
|
"logps/rejected": -504.84979248046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.49750018119812, |
|
"rewards/margins": 19.492156982421875, |
|
"rewards/rejected": -21.98965835571289, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06475111290975313, |
|
"grad_norm": 0.001190170063637197, |
|
"learning_rate": 0.00018763066800438636, |
|
"logits/chosen": -3.1841561794281006, |
|
"logits/rejected": -3.1465625762939453, |
|
"logps/chosen": -378.6527099609375, |
|
"logps/rejected": -586.8410034179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.0924973487854, |
|
"rewards/margins": 21.506628036499023, |
|
"rewards/rejected": -27.599124908447266, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07122622420072845, |
|
"grad_norm": 0.06168733909726143, |
|
"learning_rate": 0.00018090169943749476, |
|
"logits/chosen": -3.1322693824768066, |
|
"logits/rejected": -3.0882985591888428, |
|
"logps/chosen": -420.40570068359375, |
|
"logps/rejected": -590.8090209960938, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.916619300842285, |
|
"rewards/margins": 20.480270385742188, |
|
"rewards/rejected": -30.396888732910156, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07770133549170376, |
|
"grad_norm": 0.3103026747703552, |
|
"learning_rate": 0.00017289686274214118, |
|
"logits/chosen": -3.109321117401123, |
|
"logits/rejected": -3.0893146991729736, |
|
"logps/chosen": -423.5613098144531, |
|
"logps/rejected": -605.92041015625, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.163900375366211, |
|
"rewards/margins": 22.695072174072266, |
|
"rewards/rejected": -32.858970642089844, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08417644678267908, |
|
"grad_norm": 7.292612281162292e-05, |
|
"learning_rate": 0.000163742398974869, |
|
"logits/chosen": -3.1894640922546387, |
|
"logits/rejected": -3.126338005065918, |
|
"logps/chosen": -438.2912292480469, |
|
"logps/rejected": -652.4424438476562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.139822959899902, |
|
"rewards/margins": 25.713586807250977, |
|
"rewards/rejected": -34.85341262817383, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0906515580736544, |
|
"grad_norm": 0.00019792577950283885, |
|
"learning_rate": 0.00015358267949789966, |
|
"logits/chosen": -3.232335090637207, |
|
"logits/rejected": -3.1516857147216797, |
|
"logps/chosen": -412.1377258300781, |
|
"logps/rejected": -669.3051147460938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.704937934875488, |
|
"rewards/margins": 28.375762939453125, |
|
"rewards/rejected": -36.0806999206543, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0971266693646297, |
|
"grad_norm": 8.50434571475489e-06, |
|
"learning_rate": 0.00014257792915650728, |
|
"logits/chosen": -3.296929359436035, |
|
"logits/rejected": -3.224276542663574, |
|
"logps/chosen": -429.69140625, |
|
"logps/rejected": -668.3744506835938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.6317315101623535, |
|
"rewards/margins": 28.540904998779297, |
|
"rewards/rejected": -36.172637939453125, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10360178065560502, |
|
"grad_norm": 0.0009452006197534502, |
|
"learning_rate": 0.00013090169943749476, |
|
"logits/chosen": -3.324937343597412, |
|
"logits/rejected": -3.226996898651123, |
|
"logps/chosen": -371.1409606933594, |
|
"logps/rejected": -632.6340942382812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.080913543701172, |
|
"rewards/margins": 29.492341995239258, |
|
"rewards/rejected": -34.57324981689453, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11007689194658034, |
|
"grad_norm": 9.954872075468302e-05, |
|
"learning_rate": 0.00011873813145857249, |
|
"logits/chosen": -3.3159940242767334, |
|
"logits/rejected": -3.252108573913574, |
|
"logps/chosen": -327.1322937011719, |
|
"logps/rejected": -610.6470336914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.523940086364746, |
|
"rewards/margins": 30.721057891845703, |
|
"rewards/rejected": -34.244998931884766, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11655200323755564, |
|
"grad_norm": 0.00013703889271710068, |
|
"learning_rate": 0.00010627905195293135, |
|
"logits/chosen": -3.369493007659912, |
|
"logits/rejected": -3.2716524600982666, |
|
"logps/chosen": -364.87738037109375, |
|
"logps/rejected": -619.3846435546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.915665626525879, |
|
"rewards/margins": 30.543182373046875, |
|
"rewards/rejected": -34.45884704589844, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12302711452853096, |
|
"grad_norm": 2.333914744667709e-05, |
|
"learning_rate": 9.372094804706867e-05, |
|
"logits/chosen": -3.376049518585205, |
|
"logits/rejected": -3.2898783683776855, |
|
"logps/chosen": -357.4218444824219, |
|
"logps/rejected": -616.6192626953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.283668041229248, |
|
"rewards/margins": 30.039329528808594, |
|
"rewards/rejected": -34.323001861572266, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12950222581950627, |
|
"grad_norm": 3.3141390076707467e-07, |
|
"learning_rate": 8.126186854142752e-05, |
|
"logits/chosen": -3.396101474761963, |
|
"logits/rejected": -3.25456166267395, |
|
"logps/chosen": -406.4652099609375, |
|
"logps/rejected": -674.7239379882812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.991150856018066, |
|
"rewards/margins": 31.693134307861328, |
|
"rewards/rejected": -36.68428421020508, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1359773371104816, |
|
"grad_norm": 4.7546738102255404e-08, |
|
"learning_rate": 6.909830056250527e-05, |
|
"logits/chosen": -3.3930516242980957, |
|
"logits/rejected": -3.2768235206604004, |
|
"logps/chosen": -411.16485595703125, |
|
"logps/rejected": -710.5061645507812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.375492572784424, |
|
"rewards/margins": 32.4808349609375, |
|
"rewards/rejected": -37.856327056884766, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1424524484014569, |
|
"grad_norm": 0.00029760634060949087, |
|
"learning_rate": 5.7422070843492734e-05, |
|
"logits/chosen": -3.370030403137207, |
|
"logits/rejected": -3.275481939315796, |
|
"logps/chosen": -369.93206787109375, |
|
"logps/rejected": -653.92431640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.696152210235596, |
|
"rewards/margins": 30.0690975189209, |
|
"rewards/rejected": -34.7652473449707, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1489275596924322, |
|
"grad_norm": 0.0007433082209900022, |
|
"learning_rate": 4.6417320502100316e-05, |
|
"logits/chosen": -3.332247018814087, |
|
"logits/rejected": -3.3013038635253906, |
|
"logps/chosen": -398.0705261230469, |
|
"logps/rejected": -665.6163940429688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.122736930847168, |
|
"rewards/margins": 30.323734283447266, |
|
"rewards/rejected": -35.44647216796875, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.15540267098340751, |
|
"grad_norm": 5.046871933700459e-07, |
|
"learning_rate": 3.6257601025131026e-05, |
|
"logits/chosen": -3.39272403717041, |
|
"logits/rejected": -3.2838759422302246, |
|
"logps/chosen": -406.24566650390625, |
|
"logps/rejected": -653.7831420898438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.370276927947998, |
|
"rewards/margins": 29.569276809692383, |
|
"rewards/rejected": -34.939552307128906, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16187778227438285, |
|
"grad_norm": 6.661146301212284e-08, |
|
"learning_rate": 2.7103137257858868e-05, |
|
"logits/chosen": -3.3827924728393555, |
|
"logits/rejected": -3.2959158420562744, |
|
"logps/chosen": -384.3971862792969, |
|
"logps/rejected": -677.8952026367188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.599191188812256, |
|
"rewards/margins": 32.55278778076172, |
|
"rewards/rejected": -37.1519775390625, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16835289356535815, |
|
"grad_norm": 3.000658352902974e-06, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"logits/chosen": -3.4050755500793457, |
|
"logits/rejected": -3.2928526401519775, |
|
"logps/chosen": -376.23394775390625, |
|
"logps/rejected": -664.83203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.051126956939697, |
|
"rewards/margins": 31.333837509155273, |
|
"rewards/rejected": -36.38496398925781, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.17482800485633346, |
|
"grad_norm": 3.1926545034366427e-06, |
|
"learning_rate": 1.2369331995613665e-05, |
|
"logits/chosen": -3.4148049354553223, |
|
"logits/rejected": -3.307281017303467, |
|
"logps/chosen": -363.273193359375, |
|
"logps/rejected": -626.52197265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.571832180023193, |
|
"rewards/margins": 28.954362869262695, |
|
"rewards/rejected": -33.52619552612305, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1813031161473088, |
|
"grad_norm": 6.979635713832977e-07, |
|
"learning_rate": 7.022351411174866e-06, |
|
"logits/chosen": -3.3811025619506836, |
|
"logits/rejected": -3.2903144359588623, |
|
"logps/chosen": -347.3415222167969, |
|
"logps/rejected": -619.4091186523438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.595226287841797, |
|
"rewards/margins": 29.120075225830078, |
|
"rewards/rejected": -33.715301513671875, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1877782274382841, |
|
"grad_norm": 0.00033328495919704437, |
|
"learning_rate": 3.1416838871368924e-06, |
|
"logits/chosen": -3.442066192626953, |
|
"logits/rejected": -3.2837891578674316, |
|
"logps/chosen": -429.7748107910156, |
|
"logps/rejected": -705.698486328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.5345988273620605, |
|
"rewards/margins": 31.772361755371094, |
|
"rewards/rejected": -38.30696105957031, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1942533387292594, |
|
"grad_norm": 0.00010963407112285495, |
|
"learning_rate": 7.885298685522235e-07, |
|
"logits/chosen": -3.4091765880584717, |
|
"logits/rejected": -3.3008341789245605, |
|
"logps/chosen": -382.4224548339844, |
|
"logps/rejected": -653.5609130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.606748580932617, |
|
"rewards/margins": 31.45462417602539, |
|
"rewards/rejected": -36.06137466430664, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 30, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|