PhoenixB's picture
Training in progress, step 30, checkpoint
b40f7f5 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.1942533387292594,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006475111290975314,
"grad_norm": 9.607009887695312,
"learning_rate": 0.0,
"logits/chosen": -3.4352195262908936,
"logits/rejected": -3.425337314605713,
"logps/chosen": -323.16705322265625,
"logps/rejected": -302.4476623535156,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.012950222581950627,
"grad_norm": 9.66933822631836,
"learning_rate": 4e-05,
"logits/chosen": -3.3881449699401855,
"logits/rejected": -3.4377424716949463,
"logps/chosen": -322.08050537109375,
"logps/rejected": -300.86651611328125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 2
},
{
"epoch": 0.01942533387292594,
"grad_norm": 7.823226451873779,
"learning_rate": 8e-05,
"logits/chosen": -3.4503798484802246,
"logits/rejected": -3.4216644763946533,
"logps/chosen": -316.35699462890625,
"logps/rejected": -299.4790344238281,
"loss": 0.5385,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.25093284249305725,
"rewards/margins": 0.34339073300361633,
"rewards/rejected": -0.09245789051055908,
"step": 3
},
{
"epoch": 0.025900445163901255,
"grad_norm": 2.2421059608459473,
"learning_rate": 0.00012,
"logits/chosen": -3.4315147399902344,
"logits/rejected": -3.408001661300659,
"logps/chosen": -300.1934814453125,
"logps/rejected": -288.28045654296875,
"loss": 0.1223,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.1727596521377563,
"rewards/margins": 2.187995433807373,
"rewards/rejected": -1.0152357816696167,
"step": 4
},
{
"epoch": 0.03237555645487657,
"grad_norm": 0.24723561108112335,
"learning_rate": 0.00016,
"logits/chosen": -3.4229865074157715,
"logits/rejected": -3.3911449909210205,
"logps/chosen": -300.7767028808594,
"logps/rejected": -323.2894592285156,
"loss": 0.0077,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.5630507469177246,
"rewards/margins": 5.784091472625732,
"rewards/rejected": -3.2210402488708496,
"step": 5
},
{
"epoch": 0.03885066774585188,
"grad_norm": 0.01604278013110161,
"learning_rate": 0.0002,
"logits/chosen": -3.388056516647339,
"logits/rejected": -3.340381383895874,
"logps/chosen": -301.595458984375,
"logps/rejected": -378.6108703613281,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1038761138916016,
"rewards/margins": 10.030583381652832,
"rewards/rejected": -6.926706790924072,
"step": 6
},
{
"epoch": 0.0453257790368272,
"grad_norm": 0.00046919251326471567,
"learning_rate": 0.0001992114701314478,
"logits/chosen": -3.3325581550598145,
"logits/rejected": -3.262298107147217,
"logps/chosen": -287.6573181152344,
"logps/rejected": -413.3544616699219,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.678926467895508,
"rewards/margins": 15.118080139160156,
"rewards/rejected": -12.439154624938965,
"step": 7
},
{
"epoch": 0.05180089032780251,
"grad_norm": 0.030723145231604576,
"learning_rate": 0.0001968583161128631,
"logits/chosen": -3.3250393867492676,
"logits/rejected": -3.220548391342163,
"logps/chosen": -311.5693054199219,
"logps/rejected": -449.70294189453125,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.03436547517776489,
"rewards/margins": 16.31571388244629,
"rewards/rejected": -16.350078582763672,
"step": 8
},
{
"epoch": 0.05827600161877782,
"grad_norm": 0.0016233824426308274,
"learning_rate": 0.00019297764858882514,
"logits/chosen": -3.20749568939209,
"logits/rejected": -3.159514904022217,
"logps/chosen": -330.7140808105469,
"logps/rejected": -504.84979248046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.49750018119812,
"rewards/margins": 19.492156982421875,
"rewards/rejected": -21.98965835571289,
"step": 9
},
{
"epoch": 0.06475111290975313,
"grad_norm": 0.001190170063637197,
"learning_rate": 0.00018763066800438636,
"logits/chosen": -3.1841561794281006,
"logits/rejected": -3.1465625762939453,
"logps/chosen": -378.6527099609375,
"logps/rejected": -586.8410034179688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.0924973487854,
"rewards/margins": 21.506628036499023,
"rewards/rejected": -27.599124908447266,
"step": 10
},
{
"epoch": 0.07122622420072845,
"grad_norm": 0.06168733909726143,
"learning_rate": 0.00018090169943749476,
"logits/chosen": -3.1322693824768066,
"logits/rejected": -3.0882985591888428,
"logps/chosen": -420.40570068359375,
"logps/rejected": -590.8090209960938,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -9.916619300842285,
"rewards/margins": 20.480270385742188,
"rewards/rejected": -30.396888732910156,
"step": 11
},
{
"epoch": 0.07770133549170376,
"grad_norm": 0.3103026747703552,
"learning_rate": 0.00017289686274214118,
"logits/chosen": -3.109321117401123,
"logits/rejected": -3.0893146991729736,
"logps/chosen": -423.5613098144531,
"logps/rejected": -605.92041015625,
"loss": 0.0017,
"rewards/accuracies": 1.0,
"rewards/chosen": -10.163900375366211,
"rewards/margins": 22.695072174072266,
"rewards/rejected": -32.858970642089844,
"step": 12
},
{
"epoch": 0.08417644678267908,
"grad_norm": 7.292612281162292e-05,
"learning_rate": 0.000163742398974869,
"logits/chosen": -3.1894640922546387,
"logits/rejected": -3.126338005065918,
"logps/chosen": -438.2912292480469,
"logps/rejected": -652.4424438476562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -9.139822959899902,
"rewards/margins": 25.713586807250977,
"rewards/rejected": -34.85341262817383,
"step": 13
},
{
"epoch": 0.0906515580736544,
"grad_norm": 0.00019792577950283885,
"learning_rate": 0.00015358267949789966,
"logits/chosen": -3.232335090637207,
"logits/rejected": -3.1516857147216797,
"logps/chosen": -412.1377258300781,
"logps/rejected": -669.3051147460938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.704937934875488,
"rewards/margins": 28.375762939453125,
"rewards/rejected": -36.0806999206543,
"step": 14
},
{
"epoch": 0.0971266693646297,
"grad_norm": 8.50434571475489e-06,
"learning_rate": 0.00014257792915650728,
"logits/chosen": -3.296929359436035,
"logits/rejected": -3.224276542663574,
"logps/chosen": -429.69140625,
"logps/rejected": -668.3744506835938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.6317315101623535,
"rewards/margins": 28.540904998779297,
"rewards/rejected": -36.172637939453125,
"step": 15
},
{
"epoch": 0.10360178065560502,
"grad_norm": 0.0009452006197534502,
"learning_rate": 0.00013090169943749476,
"logits/chosen": -3.324937343597412,
"logits/rejected": -3.226996898651123,
"logps/chosen": -371.1409606933594,
"logps/rejected": -632.6340942382812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.080913543701172,
"rewards/margins": 29.492341995239258,
"rewards/rejected": -34.57324981689453,
"step": 16
},
{
"epoch": 0.11007689194658034,
"grad_norm": 9.954872075468302e-05,
"learning_rate": 0.00011873813145857249,
"logits/chosen": -3.3159940242767334,
"logits/rejected": -3.252108573913574,
"logps/chosen": -327.1322937011719,
"logps/rejected": -610.6470336914062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.523940086364746,
"rewards/margins": 30.721057891845703,
"rewards/rejected": -34.244998931884766,
"step": 17
},
{
"epoch": 0.11655200323755564,
"grad_norm": 0.00013703889271710068,
"learning_rate": 0.00010627905195293135,
"logits/chosen": -3.369493007659912,
"logits/rejected": -3.2716524600982666,
"logps/chosen": -364.87738037109375,
"logps/rejected": -619.3846435546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.915665626525879,
"rewards/margins": 30.543182373046875,
"rewards/rejected": -34.45884704589844,
"step": 18
},
{
"epoch": 0.12302711452853096,
"grad_norm": 2.333914744667709e-05,
"learning_rate": 9.372094804706867e-05,
"logits/chosen": -3.376049518585205,
"logits/rejected": -3.2898783683776855,
"logps/chosen": -357.4218444824219,
"logps/rejected": -616.6192626953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.283668041229248,
"rewards/margins": 30.039329528808594,
"rewards/rejected": -34.323001861572266,
"step": 19
},
{
"epoch": 0.12950222581950627,
"grad_norm": 3.3141390076707467e-07,
"learning_rate": 8.126186854142752e-05,
"logits/chosen": -3.396101474761963,
"logits/rejected": -3.25456166267395,
"logps/chosen": -406.4652099609375,
"logps/rejected": -674.7239379882812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.991150856018066,
"rewards/margins": 31.693134307861328,
"rewards/rejected": -36.68428421020508,
"step": 20
},
{
"epoch": 0.1359773371104816,
"grad_norm": 4.7546738102255404e-08,
"learning_rate": 6.909830056250527e-05,
"logits/chosen": -3.3930516242980957,
"logits/rejected": -3.2768235206604004,
"logps/chosen": -411.16485595703125,
"logps/rejected": -710.5061645507812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.375492572784424,
"rewards/margins": 32.4808349609375,
"rewards/rejected": -37.856327056884766,
"step": 21
},
{
"epoch": 0.1424524484014569,
"grad_norm": 0.00029760634060949087,
"learning_rate": 5.7422070843492734e-05,
"logits/chosen": -3.370030403137207,
"logits/rejected": -3.275481939315796,
"logps/chosen": -369.93206787109375,
"logps/rejected": -653.92431640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.696152210235596,
"rewards/margins": 30.0690975189209,
"rewards/rejected": -34.7652473449707,
"step": 22
},
{
"epoch": 0.1489275596924322,
"grad_norm": 0.0007433082209900022,
"learning_rate": 4.6417320502100316e-05,
"logits/chosen": -3.332247018814087,
"logits/rejected": -3.3013038635253906,
"logps/chosen": -398.0705261230469,
"logps/rejected": -665.6163940429688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.122736930847168,
"rewards/margins": 30.323734283447266,
"rewards/rejected": -35.44647216796875,
"step": 23
},
{
"epoch": 0.15540267098340751,
"grad_norm": 5.046871933700459e-07,
"learning_rate": 3.6257601025131026e-05,
"logits/chosen": -3.39272403717041,
"logits/rejected": -3.2838759422302246,
"logps/chosen": -406.24566650390625,
"logps/rejected": -653.7831420898438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.370276927947998,
"rewards/margins": 29.569276809692383,
"rewards/rejected": -34.939552307128906,
"step": 24
},
{
"epoch": 0.16187778227438285,
"grad_norm": 6.661146301212284e-08,
"learning_rate": 2.7103137257858868e-05,
"logits/chosen": -3.3827924728393555,
"logits/rejected": -3.2959158420562744,
"logps/chosen": -384.3971862792969,
"logps/rejected": -677.8952026367188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.599191188812256,
"rewards/margins": 32.55278778076172,
"rewards/rejected": -37.1519775390625,
"step": 25
},
{
"epoch": 0.16835289356535815,
"grad_norm": 3.000658352902974e-06,
"learning_rate": 1.9098300562505266e-05,
"logits/chosen": -3.4050755500793457,
"logits/rejected": -3.2928526401519775,
"logps/chosen": -376.23394775390625,
"logps/rejected": -664.83203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.051126956939697,
"rewards/margins": 31.333837509155273,
"rewards/rejected": -36.38496398925781,
"step": 26
},
{
"epoch": 0.17482800485633346,
"grad_norm": 3.1926545034366427e-06,
"learning_rate": 1.2369331995613665e-05,
"logits/chosen": -3.4148049354553223,
"logits/rejected": -3.307281017303467,
"logps/chosen": -363.273193359375,
"logps/rejected": -626.52197265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.571832180023193,
"rewards/margins": 28.954362869262695,
"rewards/rejected": -33.52619552612305,
"step": 27
},
{
"epoch": 0.1813031161473088,
"grad_norm": 6.979635713832977e-07,
"learning_rate": 7.022351411174866e-06,
"logits/chosen": -3.3811025619506836,
"logits/rejected": -3.2903144359588623,
"logps/chosen": -347.3415222167969,
"logps/rejected": -619.4091186523438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.595226287841797,
"rewards/margins": 29.120075225830078,
"rewards/rejected": -33.715301513671875,
"step": 28
},
{
"epoch": 0.1877782274382841,
"grad_norm": 0.00033328495919704437,
"learning_rate": 3.1416838871368924e-06,
"logits/chosen": -3.442066192626953,
"logits/rejected": -3.2837891578674316,
"logps/chosen": -429.7748107910156,
"logps/rejected": -705.698486328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.5345988273620605,
"rewards/margins": 31.772361755371094,
"rewards/rejected": -38.30696105957031,
"step": 29
},
{
"epoch": 0.1942533387292594,
"grad_norm": 0.00010963407112285495,
"learning_rate": 7.885298685522235e-07,
"logits/chosen": -3.4091765880584717,
"logits/rejected": -3.3008341789245605,
"logps/chosen": -382.4224548339844,
"logps/rejected": -653.5609130859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.606748580932617,
"rewards/margins": 31.45462417602539,
"rewards/rejected": -36.06137466430664,
"step": 30
}
],
"logging_steps": 1,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}