|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7904487443392343, |
|
"eval_steps": 500, |
|
"global_step": 60, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 0.8317140340805054, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": 9.990612030029297, |
|
"logits/rejected": 10.698101997375488, |
|
"logps/chosen": -102.88545989990234, |
|
"logps/ref_chosen": -102.88545989990234, |
|
"logps/ref_rejected": -121.84871673583984, |
|
"logps/rejected": -121.84871673583984, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.36753880977630615, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.2926611006259918, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.211905479431152, |
|
"logits/rejected": 11.06594467163086, |
|
"logps/chosen": -107.70349884033203, |
|
"logps/ref_chosen": -107.70349884033203, |
|
"logps/ref_rejected": -121.89966583251953, |
|
"logps/rejected": -121.89966583251953, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.41013145446777344, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.3214672803878784, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 10.036933898925781, |
|
"logits/rejected": 11.024795532226562, |
|
"logps/chosen": -108.28660583496094, |
|
"logps/ref_chosen": -107.98188781738281, |
|
"logps/ref_rejected": -124.51527404785156, |
|
"logps/rejected": -124.7075424194336, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -0.0030471612699329853, |
|
"rewards/margins": -0.001124453847296536, |
|
"rewards/rejected": -0.0019227075390517712, |
|
"sft_loss": 0.4123927652835846, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 0.7239967584609985, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 9.835367202758789, |
|
"logits/rejected": 10.854362487792969, |
|
"logps/chosen": -109.89208221435547, |
|
"logps/ref_chosen": -109.20836639404297, |
|
"logps/ref_rejected": -119.23908996582031, |
|
"logps/rejected": -119.69357299804688, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.006837163120508194, |
|
"rewards/margins": -0.0022922407370060682, |
|
"rewards/rejected": -0.004544922150671482, |
|
"sft_loss": 0.4051341712474823, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 0.9211171269416809, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 10.207865715026855, |
|
"logits/rejected": 10.963621139526367, |
|
"logps/chosen": -104.02088928222656, |
|
"logps/ref_chosen": -103.87680053710938, |
|
"logps/ref_rejected": -118.41618347167969, |
|
"logps/rejected": -118.34123992919922, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0014409449649974704, |
|
"rewards/margins": -0.002190487692132592, |
|
"rewards/rejected": 0.0007495426689274609, |
|
"sft_loss": 0.3706103563308716, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 0.7005686163902283, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 10.686239242553711, |
|
"logits/rejected": 11.462547302246094, |
|
"logps/chosen": -108.1670913696289, |
|
"logps/ref_chosen": -107.58968353271484, |
|
"logps/ref_rejected": -122.07303619384766, |
|
"logps/rejected": -122.58065032958984, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.005774094723165035, |
|
"rewards/margins": -0.0006979470490477979, |
|
"rewards/rejected": -0.005076148081570864, |
|
"sft_loss": 0.39328432083129883, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 0.8737779259681702, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": 10.040006637573242, |
|
"logits/rejected": 10.747206687927246, |
|
"logps/chosen": -107.037353515625, |
|
"logps/ref_chosen": -107.42727661132812, |
|
"logps/ref_rejected": -116.87063598632812, |
|
"logps/rejected": -116.42378997802734, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.0038992553018033504, |
|
"rewards/margins": -0.0005691752885468304, |
|
"rewards/rejected": 0.004468431230634451, |
|
"sft_loss": 0.37478095293045044, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 0.46314188838005066, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.196634292602539, |
|
"logits/rejected": 11.089151382446289, |
|
"logps/chosen": -104.9737548828125, |
|
"logps/ref_chosen": -105.60282135009766, |
|
"logps/ref_rejected": -119.53916931152344, |
|
"logps/rejected": -118.9640884399414, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.006290654186159372, |
|
"rewards/margins": 0.0005399012006819248, |
|
"rewards/rejected": 0.0057507529854774475, |
|
"sft_loss": 0.37916287779808044, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 0.6739105582237244, |
|
"learning_rate": 4.997252228714278e-07, |
|
"logits/chosen": 10.174893379211426, |
|
"logits/rejected": 11.141225814819336, |
|
"logps/chosen": -104.830078125, |
|
"logps/ref_chosen": -105.46086120605469, |
|
"logps/ref_rejected": -119.00373840332031, |
|
"logps/rejected": -118.41084289550781, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.006307927425950766, |
|
"rewards/margins": 0.00037892413092777133, |
|
"rewards/rejected": 0.005929003469645977, |
|
"sft_loss": 0.3834277391433716, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 0.32560959458351135, |
|
"learning_rate": 4.989014955054745e-07, |
|
"logits/chosen": 10.061932563781738, |
|
"logits/rejected": 10.884778022766113, |
|
"logps/chosen": -102.10572052001953, |
|
"logps/ref_chosen": -104.21009826660156, |
|
"logps/ref_rejected": -118.9209213256836, |
|
"logps/rejected": -117.1087646484375, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02104363776743412, |
|
"rewards/margins": 0.00292214541696012, |
|
"rewards/rejected": 0.018121493980288506, |
|
"sft_loss": 0.34126415848731995, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 0.8538271188735962, |
|
"learning_rate": 4.975306286336627e-07, |
|
"logits/chosen": 9.989534378051758, |
|
"logits/rejected": 11.177312850952148, |
|
"logps/chosen": -102.71998596191406, |
|
"logps/ref_chosen": -105.94319152832031, |
|
"logps/ref_rejected": -122.76007843017578, |
|
"logps/rejected": -119.8812026977539, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.03223201259970665, |
|
"rewards/margins": 0.0034431489184498787, |
|
"rewards/rejected": 0.028788862749934196, |
|
"sft_loss": 0.3909299075603485, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 0.599084734916687, |
|
"learning_rate": 4.956156357188939e-07, |
|
"logits/chosen": 9.907474517822266, |
|
"logits/rejected": 10.599059104919434, |
|
"logps/chosen": -105.25791931152344, |
|
"logps/ref_chosen": -109.08442687988281, |
|
"logps/ref_rejected": -121.41947174072266, |
|
"logps/rejected": -117.7214584350586, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.03826504573225975, |
|
"rewards/margins": 0.00128496577963233, |
|
"rewards/rejected": 0.036980077624320984, |
|
"sft_loss": 0.3606122136116028, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17126389460683408, |
|
"grad_norm": 0.20007266104221344, |
|
"learning_rate": 4.931607263312032e-07, |
|
"logits/chosen": 9.952820777893066, |
|
"logits/rejected": 11.027226448059082, |
|
"logps/chosen": -99.99949645996094, |
|
"logps/ref_chosen": -104.62150573730469, |
|
"logps/ref_rejected": -119.55384063720703, |
|
"logps/rejected": -115.20744323730469, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.04622000828385353, |
|
"rewards/margins": 0.0027560186572372913, |
|
"rewards/rejected": 0.04346399009227753, |
|
"sft_loss": 0.37296974658966064, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.30717214941978455, |
|
"learning_rate": 4.9017129689421e-07, |
|
"logits/chosen": 10.493995666503906, |
|
"logits/rejected": 11.61634635925293, |
|
"logps/chosen": -100.9910888671875, |
|
"logps/ref_chosen": -106.179443359375, |
|
"logps/ref_rejected": -120.73036193847656, |
|
"logps/rejected": -115.51315307617188, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.051883526146411896, |
|
"rewards/margins": -0.0002885278081521392, |
|
"rewards/rejected": 0.05217204988002777, |
|
"sft_loss": 0.35513877868652344, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.31972214579582214, |
|
"learning_rate": 4.866539188226085e-07, |
|
"logits/chosen": 9.85748291015625, |
|
"logits/rejected": 10.79136848449707, |
|
"logps/chosen": -98.44343566894531, |
|
"logps/ref_chosen": -105.70547485351562, |
|
"logps/ref_rejected": -118.89997863769531, |
|
"logps/rejected": -111.49993133544922, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.0726204589009285, |
|
"rewards/margins": -0.001380045199766755, |
|
"rewards/rejected": 0.07400050014257431, |
|
"sft_loss": 0.3432847261428833, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2107863318237958, |
|
"grad_norm": 0.3299219012260437, |
|
"learning_rate": 4.826163240767716e-07, |
|
"logits/chosen": 10.693929672241211, |
|
"logits/rejected": 11.317272186279297, |
|
"logps/chosen": -99.89289855957031, |
|
"logps/ref_chosen": -108.86376953125, |
|
"logps/ref_rejected": -122.1635513305664, |
|
"logps/rejected": -113.75732421875, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.08970852196216583, |
|
"rewards/margins": 0.005646157078444958, |
|
"rewards/rejected": 0.084062360227108, |
|
"sft_loss": 0.3626875579357147, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22396047756278303, |
|
"grad_norm": 0.39949116110801697, |
|
"learning_rate": 4.780673881662242e-07, |
|
"logits/chosen": 10.215154647827148, |
|
"logits/rejected": 10.865469932556152, |
|
"logps/chosen": -92.98998260498047, |
|
"logps/ref_chosen": -102.93986511230469, |
|
"logps/ref_rejected": -119.43718719482422, |
|
"logps/rejected": -109.6163558959961, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.09949885308742523, |
|
"rewards/margins": 0.0012904854957014322, |
|
"rewards/rejected": 0.09820836037397385, |
|
"sft_loss": 0.37107497453689575, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.23713462330177026, |
|
"grad_norm": 0.3356131911277771, |
|
"learning_rate": 4.730171106393466e-07, |
|
"logits/chosen": 10.427769660949707, |
|
"logits/rejected": 11.224242210388184, |
|
"logps/chosen": -92.83369445800781, |
|
"logps/ref_chosen": -103.81341552734375, |
|
"logps/ref_rejected": -117.45123291015625, |
|
"logps/rejected": -107.25675964355469, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.10979717969894409, |
|
"rewards/margins": 0.007852486334741116, |
|
"rewards/rejected": 0.101944699883461, |
|
"sft_loss": 0.3487062454223633, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2503087690407575, |
|
"grad_norm": 0.31761711835861206, |
|
"learning_rate": 4.6747659310219757e-07, |
|
"logits/chosen": 10.344837188720703, |
|
"logits/rejected": 11.015111923217773, |
|
"logps/chosen": -97.11812591552734, |
|
"logps/ref_chosen": -107.85797119140625, |
|
"logps/ref_rejected": -121.88042449951172, |
|
"logps/rejected": -110.71996307373047, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.10739848017692566, |
|
"rewards/margins": -0.004206114914268255, |
|
"rewards/rejected": 0.11160460114479065, |
|
"sft_loss": 0.33181920647621155, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 0.3374970555305481, |
|
"learning_rate": 4.6145801481477433e-07, |
|
"logits/chosen": 10.763664245605469, |
|
"logits/rejected": 11.578323364257812, |
|
"logps/chosen": -92.1986083984375, |
|
"logps/ref_chosen": -103.42721557617188, |
|
"logps/ref_rejected": -116.7796630859375, |
|
"logps/rejected": -106.10646057128906, |
|
"loss": 0.4709, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.11228612810373306, |
|
"rewards/margins": 0.005554294213652611, |
|
"rewards/rejected": 0.1067318320274353, |
|
"sft_loss": 0.324346125125885, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.20886239409446716, |
|
"learning_rate": 4.549746059183561e-07, |
|
"logits/chosen": 9.804973602294922, |
|
"logits/rejected": 10.907769203186035, |
|
"logps/chosen": -94.7210693359375, |
|
"logps/ref_chosen": -106.60163879394531, |
|
"logps/ref_rejected": -124.56562805175781, |
|
"logps/rejected": -112.27490234375, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.11880576610565186, |
|
"rewards/margins": -0.004101429134607315, |
|
"rewards/rejected": 0.12290719151496887, |
|
"sft_loss": 0.3191758394241333, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.28983120625771924, |
|
"grad_norm": 0.4104432165622711, |
|
"learning_rate": 4.480406183527823e-07, |
|
"logits/chosen": 10.239618301391602, |
|
"logits/rejected": 11.115823745727539, |
|
"logps/chosen": -90.41361999511719, |
|
"logps/ref_chosen": -103.77696228027344, |
|
"logps/ref_rejected": -118.73616027832031, |
|
"logps/rejected": -106.71508026123047, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.13363340497016907, |
|
"rewards/margins": 0.013422610238194466, |
|
"rewards/rejected": 0.12021078914403915, |
|
"sft_loss": 0.34187009930610657, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3030053519967065, |
|
"grad_norm": 0.18107129633426666, |
|
"learning_rate": 4.4067129452759546e-07, |
|
"logits/chosen": 10.109177589416504, |
|
"logits/rejected": 11.132944107055664, |
|
"logps/chosen": -91.18827819824219, |
|
"logps/ref_chosen": -104.72956085205078, |
|
"logps/ref_rejected": -121.35556030273438, |
|
"logps/rejected": -108.52244567871094, |
|
"loss": 0.4793, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.13541287183761597, |
|
"rewards/margins": 0.007081696763634682, |
|
"rewards/rejected": 0.12833118438720703, |
|
"sft_loss": 0.33867964148521423, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3161794977356937, |
|
"grad_norm": 0.2613002359867096, |
|
"learning_rate": 4.3288283381591725e-07, |
|
"logits/chosen": 10.174164772033691, |
|
"logits/rejected": 11.02208137512207, |
|
"logps/chosen": -92.25580596923828, |
|
"logps/ref_chosen": -105.88758087158203, |
|
"logps/ref_rejected": -125.69054412841797, |
|
"logps/rejected": -112.1562728881836, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.13631772994995117, |
|
"rewards/margins": 0.0009749364107847214, |
|
"rewards/rejected": 0.1353427916765213, |
|
"sft_loss": 0.32745954394340515, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 0.2633656859397888, |
|
"learning_rate": 4.246923569447104e-07, |
|
"logits/chosen": 10.34875202178955, |
|
"logits/rejected": 11.083490371704102, |
|
"logps/chosen": -94.46806335449219, |
|
"logps/ref_chosen": -110.0761489868164, |
|
"logps/ref_rejected": -129.10540771484375, |
|
"logps/rejected": -113.79875183105469, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.15608078241348267, |
|
"rewards/margins": 0.003014356829226017, |
|
"rewards/rejected": 0.15306642651557922, |
|
"sft_loss": 0.3239065706729889, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.34252778921366817, |
|
"grad_norm": 0.32009434700012207, |
|
"learning_rate": 4.161178683597054e-07, |
|
"logits/chosen": 10.391422271728516, |
|
"logits/rejected": 11.489303588867188, |
|
"logps/chosen": -87.06050109863281, |
|
"logps/ref_chosen": -103.74571990966797, |
|
"logps/ref_rejected": -120.73832702636719, |
|
"logps/rejected": -104.55479431152344, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.16685229539871216, |
|
"rewards/margins": 0.005017024464905262, |
|
"rewards/rejected": 0.16183526813983917, |
|
"sft_loss": 0.3127303719520569, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3557019349526554, |
|
"grad_norm": 0.45031648874282837, |
|
"learning_rate": 4.0717821664772124e-07, |
|
"logits/chosen": 10.074949264526367, |
|
"logits/rejected": 11.320752143859863, |
|
"logps/chosen": -87.53924560546875, |
|
"logps/ref_chosen": -105.47428131103516, |
|
"logps/ref_rejected": -120.5193099975586, |
|
"logps/rejected": -103.33158111572266, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.1793503314256668, |
|
"rewards/margins": 0.0074730804190039635, |
|
"rewards/rejected": 0.17187726497650146, |
|
"sft_loss": 0.34031108021736145, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.1718786656856537, |
|
"learning_rate": 3.978930531033806e-07, |
|
"logits/chosen": 9.691258430480957, |
|
"logits/rejected": 10.853616714477539, |
|
"logps/chosen": -86.0871810913086, |
|
"logps/ref_chosen": -103.72540283203125, |
|
"logps/ref_rejected": -119.79557800292969, |
|
"logps/rejected": -102.32144927978516, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.1763821244239807, |
|
"rewards/margins": 0.0016407554503530264, |
|
"rewards/rejected": 0.17474135756492615, |
|
"sft_loss": 0.2956213057041168, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3820502264306299, |
|
"grad_norm": 0.22632478177547455, |
|
"learning_rate": 3.882827885312998e-07, |
|
"logits/chosen": 10.157691955566406, |
|
"logits/rejected": 11.235154151916504, |
|
"logps/chosen": -90.04312133789062, |
|
"logps/ref_chosen": -108.65434265136719, |
|
"logps/ref_rejected": -121.46784973144531, |
|
"logps/rejected": -103.93795776367188, |
|
"loss": 0.4546, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.1861121654510498, |
|
"rewards/margins": 0.010813241824507713, |
|
"rewards/rejected": 0.17529892921447754, |
|
"sft_loss": 0.29843831062316895, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 0.1572926938533783, |
|
"learning_rate": 3.7836854837871044e-07, |
|
"logits/chosen": 10.309174537658691, |
|
"logits/rejected": 11.701813697814941, |
|
"logps/chosen": -83.15330505371094, |
|
"logps/ref_chosen": -103.62174224853516, |
|
"logps/ref_rejected": -126.73807525634766, |
|
"logps/rejected": -107.76830291748047, |
|
"loss": 0.4646, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.20468442142009735, |
|
"rewards/margins": 0.014986753463745117, |
|
"rewards/rejected": 0.18969768285751343, |
|
"sft_loss": 0.31653928756713867, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4083985179086044, |
|
"grad_norm": 0.2045690417289734, |
|
"learning_rate": 3.681721262971413e-07, |
|
"logits/chosen": 9.92952823638916, |
|
"logits/rejected": 10.956416130065918, |
|
"logps/chosen": -85.6856689453125, |
|
"logps/ref_chosen": -106.10479736328125, |
|
"logps/ref_rejected": -120.6382827758789, |
|
"logps/rejected": -101.52389526367188, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.20419135689735413, |
|
"rewards/margins": 0.013047425076365471, |
|
"rewards/rejected": 0.1911439299583435, |
|
"sft_loss": 0.3177209496498108, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4215726636475916, |
|
"grad_norm": 0.17902837693691254, |
|
"learning_rate": 3.577159362352426e-07, |
|
"logits/chosen": 10.117594718933105, |
|
"logits/rejected": 11.502117156982422, |
|
"logps/chosen": -86.69911193847656, |
|
"logps/ref_chosen": -105.99569702148438, |
|
"logps/ref_rejected": -128.34303283691406, |
|
"logps/rejected": -108.87689208984375, |
|
"loss": 0.4587, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.19296588003635406, |
|
"rewards/margins": -0.0016955481842160225, |
|
"rewards/rejected": 0.19466140866279602, |
|
"sft_loss": 0.30118295550346375, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.43474680938657884, |
|
"grad_norm": 0.2392444610595703, |
|
"learning_rate": 3.470229631680624e-07, |
|
"logits/chosen": 10.104952812194824, |
|
"logits/rejected": 10.932075500488281, |
|
"logps/chosen": -85.76262664794922, |
|
"logps/ref_chosen": -105.72196197509766, |
|
"logps/ref_rejected": -121.59507751464844, |
|
"logps/rejected": -101.98007202148438, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.19959335029125214, |
|
"rewards/margins": 0.0034433496184647083, |
|
"rewards/rejected": 0.196150004863739, |
|
"sft_loss": 0.28305673599243164, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.44792095512556607, |
|
"grad_norm": 0.24395713210105896, |
|
"learning_rate": 3.361167125710832e-07, |
|
"logits/chosen": 10.26183032989502, |
|
"logits/rejected": 11.106597900390625, |
|
"logps/chosen": -90.0468978881836, |
|
"logps/ref_chosen": -111.4834976196289, |
|
"logps/ref_rejected": -130.48089599609375, |
|
"logps/rejected": -108.98446655273438, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.21436603367328644, |
|
"rewards/margins": -0.0005981822032481432, |
|
"rewards/rejected": 0.21496421098709106, |
|
"sft_loss": 0.3201817572116852, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.28230205178260803, |
|
"learning_rate": 3.2502115875008516e-07, |
|
"logits/chosen": 10.544075012207031, |
|
"logits/rejected": 11.514993667602539, |
|
"logps/chosen": -86.92802429199219, |
|
"logps/ref_chosen": -108.9183349609375, |
|
"logps/ref_rejected": -121.32493591308594, |
|
"logps/rejected": -100.38153839111328, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.21990321576595306, |
|
"rewards/margins": 0.010469252243638039, |
|
"rewards/rejected": 0.20943395793437958, |
|
"sft_loss": 0.29018867015838623, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.47426924660354053, |
|
"grad_norm": 0.3982340395450592, |
|
"learning_rate": 3.137606921404191e-07, |
|
"logits/chosen": 10.228411674499512, |
|
"logits/rejected": 10.880895614624023, |
|
"logps/chosen": -85.34222412109375, |
|
"logps/ref_chosen": -107.1411361694336, |
|
"logps/ref_rejected": -118.66165161132812, |
|
"logps/rejected": -97.36593627929688, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.21798911690711975, |
|
"rewards/margins": 0.005031956359744072, |
|
"rewards/rejected": 0.21295715868473053, |
|
"sft_loss": 0.30310767889022827, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4874433923425278, |
|
"grad_norm": 0.24910680949687958, |
|
"learning_rate": 3.0236006569153616e-07, |
|
"logits/chosen": 10.425313949584961, |
|
"logits/rejected": 11.244641304016113, |
|
"logps/chosen": -85.0268325805664, |
|
"logps/ref_chosen": -106.6348876953125, |
|
"logps/ref_rejected": -121.37834167480469, |
|
"logps/rejected": -99.6484375, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.2160806506872177, |
|
"rewards/margins": -0.001218426157720387, |
|
"rewards/rejected": 0.21729910373687744, |
|
"sft_loss": 0.3029400110244751, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.500617538081515, |
|
"grad_norm": 0.2774628698825836, |
|
"learning_rate": 2.9084434045463254e-07, |
|
"logits/chosen": 9.934067726135254, |
|
"logits/rejected": 11.023883819580078, |
|
"logps/chosen": -80.26193237304688, |
|
"logps/ref_chosen": -104.01033782958984, |
|
"logps/ref_rejected": -119.02666473388672, |
|
"logps/rejected": -94.75761413574219, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.23748423159122467, |
|
"rewards/margins": -0.005206258036196232, |
|
"rewards/rejected": 0.24269047379493713, |
|
"sft_loss": 0.30372247099876404, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5137916838205022, |
|
"grad_norm": 0.2735401391983032, |
|
"learning_rate": 2.7923883049302066e-07, |
|
"logits/chosen": 10.450285911560059, |
|
"logits/rejected": 11.239182472229004, |
|
"logps/chosen": -86.74555206298828, |
|
"logps/ref_chosen": -109.76485443115234, |
|
"logps/ref_rejected": -122.25163269042969, |
|
"logps/rejected": -100.94914245605469, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": 0.23019295930862427, |
|
"rewards/margins": 0.017168078571558, |
|
"rewards/rejected": 0.21302486956119537, |
|
"sft_loss": 0.3085777759552002, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5269658295594895, |
|
"grad_norm": 0.44081562757492065, |
|
"learning_rate": 2.6756904723632324e-07, |
|
"logits/chosen": 10.308364868164062, |
|
"logits/rejected": 11.480310440063477, |
|
"logps/chosen": -83.50869750976562, |
|
"logps/ref_chosen": -107.18782806396484, |
|
"logps/ref_rejected": -124.24542236328125, |
|
"logps/rejected": -101.8099594116211, |
|
"loss": 0.4537, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.23679129779338837, |
|
"rewards/margins": 0.012436658143997192, |
|
"rewards/rejected": 0.22435463964939117, |
|
"sft_loss": 0.29746735095977783, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5401399752984768, |
|
"grad_norm": 0.2262677699327469, |
|
"learning_rate": 2.5586064340081516e-07, |
|
"logits/chosen": 10.629181861877441, |
|
"logits/rejected": 11.249412536621094, |
|
"logps/chosen": -83.5615463256836, |
|
"logps/ref_chosen": -106.42051696777344, |
|
"logps/ref_rejected": -122.25247192382812, |
|
"logps/rejected": -99.15153503417969, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.22858966886997223, |
|
"rewards/margins": -0.0024197339080274105, |
|
"rewards/rejected": 0.23100940883159637, |
|
"sft_loss": 0.30392372608184814, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 0.24259349703788757, |
|
"learning_rate": 2.4413935659918487e-07, |
|
"logits/chosen": 9.622812271118164, |
|
"logits/rejected": 10.690420150756836, |
|
"logps/chosen": -78.60126495361328, |
|
"logps/ref_chosen": -103.1148452758789, |
|
"logps/ref_rejected": -116.55464935302734, |
|
"logps/rejected": -93.2408676147461, |
|
"loss": 0.4435, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.24513573944568634, |
|
"rewards/margins": 0.011997973546385765, |
|
"rewards/rejected": 0.23313775658607483, |
|
"sft_loss": 0.2801953852176666, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5664882667764513, |
|
"grad_norm": 0.259721577167511, |
|
"learning_rate": 2.3243095276367684e-07, |
|
"logits/chosen": 9.710105895996094, |
|
"logits/rejected": 10.863941192626953, |
|
"logps/chosen": -81.1871566772461, |
|
"logps/ref_chosen": -104.21064758300781, |
|
"logps/ref_rejected": -118.7614974975586, |
|
"logps/rejected": -95.96993255615234, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.23023495078086853, |
|
"rewards/margins": 0.0023193652741611004, |
|
"rewards/rejected": 0.22791558504104614, |
|
"sft_loss": 0.2908448278903961, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5796624125154385, |
|
"grad_norm": 0.20098231732845306, |
|
"learning_rate": 2.2076116950697937e-07, |
|
"logits/chosen": 9.823664665222168, |
|
"logits/rejected": 10.643632888793945, |
|
"logps/chosen": -76.53820037841797, |
|
"logps/ref_chosen": -100.59449005126953, |
|
"logps/ref_rejected": -115.95166778564453, |
|
"logps/rejected": -92.3114013671875, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.24056285619735718, |
|
"rewards/margins": 0.004160105250775814, |
|
"rewards/rejected": 0.2364027351140976, |
|
"sft_loss": 0.26114290952682495, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5928365582544257, |
|
"grad_norm": 0.18732482194900513, |
|
"learning_rate": 2.091556595453674e-07, |
|
"logits/chosen": 10.034218788146973, |
|
"logits/rejected": 10.895869255065918, |
|
"logps/chosen": -82.88983917236328, |
|
"logps/ref_chosen": -106.96060943603516, |
|
"logps/ref_rejected": -125.49449157714844, |
|
"logps/rejected": -102.64103698730469, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.24070771038532257, |
|
"rewards/margins": 0.012173159047961235, |
|
"rewards/rejected": 0.2285345494747162, |
|
"sft_loss": 0.28714537620544434, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.606010703993413, |
|
"grad_norm": 0.21146298944950104, |
|
"learning_rate": 1.9763993430846392e-07, |
|
"logits/chosen": 10.091521263122559, |
|
"logits/rejected": 10.786705017089844, |
|
"logps/chosen": -81.19091796875, |
|
"logps/ref_chosen": -107.08544158935547, |
|
"logps/ref_rejected": -120.38542175292969, |
|
"logps/rejected": -94.10372924804688, |
|
"loss": 0.4533, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": 0.2589452862739563, |
|
"rewards/margins": -0.0038715943228453398, |
|
"rewards/rejected": 0.2628169059753418, |
|
"sft_loss": 0.291401743888855, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.6191848497324002, |
|
"grad_norm": 0.1926412284374237, |
|
"learning_rate": 1.862393078595809e-07, |
|
"logits/chosen": 10.000544548034668, |
|
"logits/rejected": 11.2357177734375, |
|
"logps/chosen": -81.96806335449219, |
|
"logps/ref_chosen": -105.74787902832031, |
|
"logps/ref_rejected": -122.93606567382812, |
|
"logps/rejected": -100.6171875, |
|
"loss": 0.4428, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.237798273563385, |
|
"rewards/margins": 0.01460958831012249, |
|
"rewards/rejected": 0.22318868339061737, |
|
"sft_loss": 0.2802280783653259, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6323589954713874, |
|
"grad_norm": 0.25539568066596985, |
|
"learning_rate": 1.7497884124991485e-07, |
|
"logits/chosen": 10.508065223693848, |
|
"logits/rejected": 11.458433151245117, |
|
"logps/chosen": -80.53589630126953, |
|
"logps/ref_chosen": -105.3005599975586, |
|
"logps/ref_rejected": -123.93569946289062, |
|
"logps/rejected": -98.65711212158203, |
|
"loss": 0.4509, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.24764665961265564, |
|
"rewards/margins": -0.005139135755598545, |
|
"rewards/rejected": 0.2527858018875122, |
|
"sft_loss": 0.2870361804962158, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.2758500277996063, |
|
"learning_rate": 1.6388328742891678e-07, |
|
"logits/chosen": 10.53940486907959, |
|
"logits/rejected": 11.424980163574219, |
|
"logps/chosen": -78.68338775634766, |
|
"logps/ref_chosen": -104.30430603027344, |
|
"logps/ref_rejected": -115.85497283935547, |
|
"logps/rejected": -91.4198989868164, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.25620919466018677, |
|
"rewards/margins": 0.011858467943966389, |
|
"rewards/rejected": 0.24435076117515564, |
|
"sft_loss": 0.2917044162750244, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.6587072869493619, |
|
"grad_norm": 0.21196329593658447, |
|
"learning_rate": 1.5297703683193753e-07, |
|
"logits/chosen": 10.163137435913086, |
|
"logits/rejected": 11.043447494506836, |
|
"logps/chosen": -80.13683319091797, |
|
"logps/ref_chosen": -104.65946960449219, |
|
"logps/ref_rejected": -118.84170532226562, |
|
"logps/rejected": -94.65231323242188, |
|
"loss": 0.4413, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.24522626399993896, |
|
"rewards/margins": 0.003332418156787753, |
|
"rewards/rejected": 0.24189382791519165, |
|
"sft_loss": 0.2736455202102661, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6718814326883491, |
|
"grad_norm": 0.15545395016670227, |
|
"learning_rate": 1.422840637647574e-07, |
|
"logits/chosen": 10.250130653381348, |
|
"logits/rejected": 10.794774055480957, |
|
"logps/chosen": -79.24235534667969, |
|
"logps/ref_chosen": -104.4243392944336, |
|
"logps/ref_rejected": -117.16233825683594, |
|
"logps/rejected": -92.9332504272461, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.25181978940963745, |
|
"rewards/margins": 0.009528912603855133, |
|
"rewards/rejected": 0.24229088425636292, |
|
"sft_loss": 0.27486634254455566, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.6850555784273363, |
|
"grad_norm": 0.13252638280391693, |
|
"learning_rate": 1.3182787370285865e-07, |
|
"logits/chosen": 9.646485328674316, |
|
"logits/rejected": 10.813876152038574, |
|
"logps/chosen": -77.11428833007812, |
|
"logps/ref_chosen": -101.99165344238281, |
|
"logps/ref_rejected": -123.20516204833984, |
|
"logps/rejected": -98.14683532714844, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.24877360463142395, |
|
"rewards/margins": -0.001809664536267519, |
|
"rewards/rejected": 0.2505832612514496, |
|
"sft_loss": 0.2712918817996979, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6982297241663236, |
|
"grad_norm": 0.18613919615745544, |
|
"learning_rate": 1.2163145162128946e-07, |
|
"logits/chosen": 10.172059059143066, |
|
"logits/rejected": 11.077869415283203, |
|
"logps/chosen": -83.44391632080078, |
|
"logps/ref_chosen": -108.26175689697266, |
|
"logps/ref_rejected": -118.12374114990234, |
|
"logps/rejected": -93.78585052490234, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.2481783926486969, |
|
"rewards/margins": 0.004799447022378445, |
|
"rewards/rejected": 0.24337893724441528, |
|
"sft_loss": 0.27333518862724304, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.7114038699053108, |
|
"grad_norm": 0.2294066846370697, |
|
"learning_rate": 1.1171721146870014e-07, |
|
"logits/chosen": 10.27616024017334, |
|
"logits/rejected": 11.273432731628418, |
|
"logps/chosen": -81.40825653076172, |
|
"logps/ref_chosen": -108.5864028930664, |
|
"logps/ref_rejected": -130.25155639648438, |
|
"logps/rejected": -101.91203308105469, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/chosen": 0.27178147435188293, |
|
"rewards/margins": -0.011613852344453335, |
|
"rewards/rejected": 0.28339529037475586, |
|
"sft_loss": 0.3042542040348053, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.724578015644298, |
|
"grad_norm": 0.1640205830335617, |
|
"learning_rate": 1.0210694689661939e-07, |
|
"logits/chosen": 10.290548324584961, |
|
"logits/rejected": 11.116561889648438, |
|
"logps/chosen": -79.66968536376953, |
|
"logps/ref_chosen": -105.69741821289062, |
|
"logps/ref_rejected": -122.07044219970703, |
|
"logps/rejected": -95.34159088134766, |
|
"loss": 0.4444, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.26027727127075195, |
|
"rewards/margins": -0.007011266425251961, |
|
"rewards/rejected": 0.26728853583335876, |
|
"sft_loss": 0.2752927243709564, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 0.2337108701467514, |
|
"learning_rate": 9.282178335227883e-08, |
|
"logits/chosen": 9.930572509765625, |
|
"logits/rejected": 11.117300033569336, |
|
"logps/chosen": -80.77919006347656, |
|
"logps/ref_chosen": -106.5007095336914, |
|
"logps/ref_rejected": -123.01736450195312, |
|
"logps/rejected": -97.2107162475586, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.2572151720523834, |
|
"rewards/margins": -0.0008512809872627258, |
|
"rewards/rejected": 0.25806647539138794, |
|
"sft_loss": 0.29223573207855225, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7509263071222725, |
|
"grad_norm": 0.15076801180839539, |
|
"learning_rate": 8.388213164029459e-08, |
|
"logits/chosen": 10.707691192626953, |
|
"logits/rejected": 11.468770027160645, |
|
"logps/chosen": -84.24454498291016, |
|
"logps/ref_chosen": -109.18460083007812, |
|
"logps/ref_rejected": -124.3697280883789, |
|
"logps/rejected": -98.2645263671875, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.24940063059329987, |
|
"rewards/margins": -0.011651396751403809, |
|
"rewards/rejected": 0.26105204224586487, |
|
"sft_loss": 0.29044556617736816, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.7641004528612598, |
|
"grad_norm": 0.3673655390739441, |
|
"learning_rate": 7.530764305528958e-08, |
|
"logits/chosen": 10.054794311523438, |
|
"logits/rejected": 10.671183586120605, |
|
"logps/chosen": -77.9069595336914, |
|
"logps/ref_chosen": -104.43944549560547, |
|
"logps/ref_rejected": -118.44985961914062, |
|
"logps/rejected": -90.52770233154297, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.2653248608112335, |
|
"rewards/margins": -0.013896753080189228, |
|
"rewards/rejected": 0.2792215943336487, |
|
"sft_loss": 0.27804291248321533, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7772745986002471, |
|
"grad_norm": 0.21028165519237518, |
|
"learning_rate": 6.711716618408281e-08, |
|
"logits/chosen": 10.271055221557617, |
|
"logits/rejected": 11.227380752563477, |
|
"logps/chosen": -77.78079223632812, |
|
"logps/ref_chosen": -103.32658386230469, |
|
"logps/ref_rejected": -121.63726806640625, |
|
"logps/rejected": -95.64451599121094, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 0.2554578483104706, |
|
"rewards/margins": -0.004469675477594137, |
|
"rewards/rejected": 0.25992754101753235, |
|
"sft_loss": 0.2870379686355591, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.7904487443392343, |
|
"grad_norm": 0.18925029039382935, |
|
"learning_rate": 5.932870547240454e-08, |
|
"logits/chosen": 10.035722732543945, |
|
"logits/rejected": 11.13114070892334, |
|
"logps/chosen": -76.91880798339844, |
|
"logps/ref_chosen": -102.98921966552734, |
|
"logps/ref_rejected": -124.47185516357422, |
|
"logps/rejected": -97.81532287597656, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.26070404052734375, |
|
"rewards/margins": -0.005861295852810144, |
|
"rewards/rejected": 0.26656535267829895, |
|
"sft_loss": 0.27288156747817993, |
|
"step": 60 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|