|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.47426924660354053, |
|
"eval_steps": 500, |
|
"global_step": 36, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 1.194186806678772, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": 9.990612030029297, |
|
"logits/rejected": 10.698101997375488, |
|
"logps/chosen": -102.88545989990234, |
|
"logps/ref_chosen": -102.88545989990234, |
|
"logps/ref_rejected": -121.84871673583984, |
|
"logps/rejected": -121.84871673583984, |
|
"loss": 0.3675, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.36753880977630615, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.5353251099586487, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.211905479431152, |
|
"logits/rejected": 11.06594467163086, |
|
"logps/chosen": -107.70349884033203, |
|
"logps/ref_chosen": -107.70349884033203, |
|
"logps/ref_rejected": -121.89966583251953, |
|
"logps/rejected": -121.89966583251953, |
|
"loss": 0.4101, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.41013145446777344, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.7126303315162659, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 10.032384872436523, |
|
"logits/rejected": 11.023520469665527, |
|
"logps/chosen": -108.3123779296875, |
|
"logps/ref_chosen": -107.98188781738281, |
|
"logps/ref_rejected": -124.51527404785156, |
|
"logps/rejected": -124.87130737304688, |
|
"loss": 0.412, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.003304910147562623, |
|
"rewards/margins": 0.0002554532838985324, |
|
"rewards/rejected": -0.003560363780707121, |
|
"sft_loss": 0.41195932030677795, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 1.2344533205032349, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 9.836658477783203, |
|
"logits/rejected": 10.855621337890625, |
|
"logps/chosen": -109.55919647216797, |
|
"logps/ref_chosen": -109.20836639404297, |
|
"logps/ref_rejected": -119.23908996582031, |
|
"logps/rejected": -119.48279571533203, |
|
"loss": 0.4039, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -0.003508324269205332, |
|
"rewards/margins": -0.0010712125804275274, |
|
"rewards/rejected": -0.0024371116887778044, |
|
"sft_loss": 0.4038863480091095, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 1.426048994064331, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 10.212320327758789, |
|
"logits/rejected": 10.966379165649414, |
|
"logps/chosen": -103.76991271972656, |
|
"logps/ref_chosen": -103.87680053710938, |
|
"logps/ref_rejected": -118.41618347167969, |
|
"logps/rejected": -118.23270416259766, |
|
"loss": 0.3697, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 0.0010687037138268352, |
|
"rewards/margins": -0.000766113749705255, |
|
"rewards/rejected": 0.0018348174635320902, |
|
"sft_loss": 0.3697226345539093, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 1.413549780845642, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 10.700042724609375, |
|
"logits/rejected": 11.478326797485352, |
|
"logps/chosen": -107.56877899169922, |
|
"logps/ref_chosen": -107.58968353271484, |
|
"logps/ref_rejected": -122.07303619384766, |
|
"logps/rejected": -121.85940551757812, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.0002090137859340757, |
|
"rewards/margins": -0.0019273017533123493, |
|
"rewards/rejected": 0.00213631521910429, |
|
"sft_loss": 0.390906423330307, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 1.2342580556869507, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": 10.01632308959961, |
|
"logits/rejected": 10.7178955078125, |
|
"logps/chosen": -107.01339721679688, |
|
"logps/ref_chosen": -107.42727661132812, |
|
"logps/ref_rejected": -116.87063598632812, |
|
"logps/rejected": -116.37357330322266, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": 0.00413867924362421, |
|
"rewards/margins": -0.0008318667532876134, |
|
"rewards/rejected": 0.004970546346157789, |
|
"sft_loss": 0.3746669888496399, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 0.6644937992095947, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.211028099060059, |
|
"logits/rejected": 11.11027717590332, |
|
"logps/chosen": -104.41184997558594, |
|
"logps/ref_chosen": -105.60282135009766, |
|
"logps/ref_rejected": -119.53916931152344, |
|
"logps/rejected": -118.27430725097656, |
|
"loss": 0.3773, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.01190974935889244, |
|
"rewards/margins": -0.0007388982339762151, |
|
"rewards/rejected": 0.012648648582398891, |
|
"sft_loss": 0.37729793787002563, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 0.9437576532363892, |
|
"learning_rate": 4.997252228714278e-07, |
|
"logits/chosen": 10.179821014404297, |
|
"logits/rejected": 11.147579193115234, |
|
"logps/chosen": -104.13174438476562, |
|
"logps/ref_chosen": -105.46086120605469, |
|
"logps/ref_rejected": -119.00373840332031, |
|
"logps/rejected": -117.734130859375, |
|
"loss": 0.3807, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.013291322626173496, |
|
"rewards/margins": 0.0005952615174464881, |
|
"rewards/rejected": 0.012696062214672565, |
|
"sft_loss": 0.38070446252822876, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 0.700039803981781, |
|
"learning_rate": 4.989014955054745e-07, |
|
"logits/chosen": 10.076737403869629, |
|
"logits/rejected": 10.897785186767578, |
|
"logps/chosen": -100.81087493896484, |
|
"logps/ref_chosen": -104.21009826660156, |
|
"logps/ref_rejected": -118.9209213256836, |
|
"logps/rejected": -115.75495910644531, |
|
"loss": 0.3367, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.033992186188697815, |
|
"rewards/margins": 0.0023326175287365913, |
|
"rewards/rejected": 0.0316595658659935, |
|
"sft_loss": 0.33672136068344116, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 0.9160856008529663, |
|
"learning_rate": 4.975306286336627e-07, |
|
"logits/chosen": 9.973880767822266, |
|
"logits/rejected": 11.158487319946289, |
|
"logps/chosen": -101.3505630493164, |
|
"logps/ref_chosen": -105.94319152832031, |
|
"logps/ref_rejected": -122.76007843017578, |
|
"logps/rejected": -118.6338119506836, |
|
"loss": 0.3851, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.045926500111818314, |
|
"rewards/margins": 0.004663803614675999, |
|
"rewards/rejected": 0.04126270115375519, |
|
"sft_loss": 0.3850533962249756, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 0.9421964883804321, |
|
"learning_rate": 4.956156357188939e-07, |
|
"logits/chosen": 9.908226013183594, |
|
"logits/rejected": 10.598045349121094, |
|
"logps/chosen": -103.32762908935547, |
|
"logps/ref_chosen": -109.08442687988281, |
|
"logps/ref_rejected": -121.41947174072266, |
|
"logps/rejected": -115.84996795654297, |
|
"loss": 0.3532, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.05756799131631851, |
|
"rewards/margins": 0.0018730255542322993, |
|
"rewards/rejected": 0.05569496005773544, |
|
"sft_loss": 0.3532242476940155, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17126389460683408, |
|
"grad_norm": 0.3328304886817932, |
|
"learning_rate": 4.931607263312032e-07, |
|
"logits/chosen": 9.964012145996094, |
|
"logits/rejected": 11.03992748260498, |
|
"logps/chosen": -98.97601318359375, |
|
"logps/ref_chosen": -104.62150573730469, |
|
"logps/ref_rejected": -119.55384063720703, |
|
"logps/rejected": -114.12371826171875, |
|
"loss": 0.3686, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.056454867124557495, |
|
"rewards/margins": 0.0021536569111049175, |
|
"rewards/rejected": 0.05430121719837189, |
|
"sft_loss": 0.36859023571014404, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.30642038583755493, |
|
"learning_rate": 4.9017129689421e-07, |
|
"logits/chosen": 10.519927978515625, |
|
"logits/rejected": 11.649580001831055, |
|
"logps/chosen": -96.5634765625, |
|
"logps/ref_chosen": -106.179443359375, |
|
"logps/ref_rejected": -120.73036193847656, |
|
"logps/rejected": -110.86133575439453, |
|
"loss": 0.3385, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.09615952521562576, |
|
"rewards/margins": -0.002530643017962575, |
|
"rewards/rejected": 0.09869016706943512, |
|
"sft_loss": 0.338548868894577, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.301073282957077, |
|
"learning_rate": 4.866539188226085e-07, |
|
"logits/chosen": 9.891039848327637, |
|
"logits/rejected": 10.824172973632812, |
|
"logps/chosen": -95.14861297607422, |
|
"logps/ref_chosen": -105.70547485351562, |
|
"logps/ref_rejected": -118.89997863769531, |
|
"logps/rejected": -108.2326889038086, |
|
"loss": 0.3305, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.10556865483522415, |
|
"rewards/margins": -0.0011043368140235543, |
|
"rewards/rejected": 0.1066729873418808, |
|
"sft_loss": 0.3305360674858093, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2107863318237958, |
|
"grad_norm": 0.36065343022346497, |
|
"learning_rate": 4.826163240767716e-07, |
|
"logits/chosen": 10.682470321655273, |
|
"logits/rejected": 11.299846649169922, |
|
"logps/chosen": -96.53520202636719, |
|
"logps/ref_chosen": -108.86376953125, |
|
"logps/ref_rejected": -122.1635513305664, |
|
"logps/rejected": -110.50537872314453, |
|
"loss": 0.3484, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.12328556925058365, |
|
"rewards/margins": 0.006703883409500122, |
|
"rewards/rejected": 0.11658168584108353, |
|
"sft_loss": 0.3484281003475189, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22396047756278303, |
|
"grad_norm": 0.5325565934181213, |
|
"learning_rate": 4.780673881662242e-07, |
|
"logits/chosen": 10.187503814697266, |
|
"logits/rejected": 10.843408584594727, |
|
"logps/chosen": -90.1707992553711, |
|
"logps/ref_chosen": -102.93986511230469, |
|
"logps/ref_rejected": -119.43718719482422, |
|
"logps/rejected": -106.76301574707031, |
|
"loss": 0.359, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": 0.12769076228141785, |
|
"rewards/margins": 0.0009490540251135826, |
|
"rewards/rejected": 0.1267417073249817, |
|
"sft_loss": 0.3589847683906555, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.23713462330177026, |
|
"grad_norm": 0.4098409116268158, |
|
"learning_rate": 4.730171106393466e-07, |
|
"logits/chosen": 10.4215669631958, |
|
"logits/rejected": 11.216498374938965, |
|
"logps/chosen": -90.09894561767578, |
|
"logps/ref_chosen": -103.81341552734375, |
|
"logps/ref_rejected": -117.45123291015625, |
|
"logps/rejected": -104.58552551269531, |
|
"loss": 0.3368, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.13714462518692017, |
|
"rewards/margins": 0.00848748255521059, |
|
"rewards/rejected": 0.12865713238716125, |
|
"sft_loss": 0.33678534626960754, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2503087690407575, |
|
"grad_norm": 0.3302735984325409, |
|
"learning_rate": 4.6747659310219757e-07, |
|
"logits/chosen": 10.332744598388672, |
|
"logits/rejected": 11.005766868591309, |
|
"logps/chosen": -94.52428436279297, |
|
"logps/ref_chosen": -107.85797119140625, |
|
"logps/ref_rejected": -121.88042449951172, |
|
"logps/rejected": -108.09265899658203, |
|
"loss": 0.3222, |
|
"rewards/accuracies": 0.4296875, |
|
"rewards/chosen": 0.1333368420600891, |
|
"rewards/margins": -0.004540742840617895, |
|
"rewards/rejected": 0.13787758350372314, |
|
"sft_loss": 0.3221552073955536, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 0.40531161427497864, |
|
"learning_rate": 4.6145801481477433e-07, |
|
"logits/chosen": 10.747330665588379, |
|
"logits/rejected": 11.561124801635742, |
|
"logps/chosen": -89.97228240966797, |
|
"logps/ref_chosen": -103.42721557617188, |
|
"logps/ref_rejected": -116.7796630859375, |
|
"logps/rejected": -103.99850463867188, |
|
"loss": 0.3157, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.13454943895339966, |
|
"rewards/margins": 0.006737923249602318, |
|
"rewards/rejected": 0.1278115212917328, |
|
"sft_loss": 0.3156886100769043, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.2686282992362976, |
|
"learning_rate": 4.549746059183561e-07, |
|
"logits/chosen": 9.720458984375, |
|
"logits/rejected": 10.846506118774414, |
|
"logps/chosen": -92.48249816894531, |
|
"logps/ref_chosen": -106.60163879394531, |
|
"logps/ref_rejected": -124.56562805175781, |
|
"logps/rejected": -109.58876037597656, |
|
"loss": 0.3106, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.14119136333465576, |
|
"rewards/margins": -0.00857722107321024, |
|
"rewards/rejected": 0.14976857602596283, |
|
"sft_loss": 0.31064528226852417, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.28983120625771924, |
|
"grad_norm": 0.5989738702774048, |
|
"learning_rate": 4.480406183527823e-07, |
|
"logits/chosen": 10.225810050964355, |
|
"logits/rejected": 11.099544525146484, |
|
"logps/chosen": -88.04141998291016, |
|
"logps/ref_chosen": -103.77696228027344, |
|
"logps/ref_rejected": -118.73616027832031, |
|
"logps/rejected": -104.40451049804688, |
|
"loss": 0.3321, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.157355397939682, |
|
"rewards/margins": 0.014038847759366035, |
|
"rewards/rejected": 0.14331655204296112, |
|
"sft_loss": 0.3321138620376587, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3030053519967065, |
|
"grad_norm": 0.1970965415239334, |
|
"learning_rate": 4.4067129452759546e-07, |
|
"logits/chosen": 10.115339279174805, |
|
"logits/rejected": 11.140266418457031, |
|
"logps/chosen": -87.26233673095703, |
|
"logps/ref_chosen": -104.72956085205078, |
|
"logps/ref_rejected": -121.35556030273438, |
|
"logps/rejected": -104.43501281738281, |
|
"loss": 0.3228, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.17467224597930908, |
|
"rewards/margins": 0.005466699134558439, |
|
"rewards/rejected": 0.16920553147792816, |
|
"sft_loss": 0.3228015899658203, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3161794977356937, |
|
"grad_norm": 0.2894323468208313, |
|
"learning_rate": 4.3288283381591725e-07, |
|
"logits/chosen": 10.147160530090332, |
|
"logits/rejected": 10.98647689819336, |
|
"logps/chosen": -86.99087524414062, |
|
"logps/ref_chosen": -105.88758087158203, |
|
"logps/ref_rejected": -125.69054412841797, |
|
"logps/rejected": -106.15878295898438, |
|
"loss": 0.3069, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.1889670193195343, |
|
"rewards/margins": -0.0063507393933832645, |
|
"rewards/rejected": 0.19531774520874023, |
|
"sft_loss": 0.30694928765296936, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 0.31463876366615295, |
|
"learning_rate": 4.246923569447104e-07, |
|
"logits/chosen": 10.327369689941406, |
|
"logits/rejected": 11.063910484313965, |
|
"logps/chosen": -87.99880981445312, |
|
"logps/ref_chosen": -110.0761489868164, |
|
"logps/ref_rejected": -129.10540771484375, |
|
"logps/rejected": -107.19017028808594, |
|
"loss": 0.2993, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.22077329456806183, |
|
"rewards/margins": 0.001621072180569172, |
|
"rewards/rejected": 0.21915221214294434, |
|
"sft_loss": 0.2992705702781677, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.34252778921366817, |
|
"grad_norm": 0.25855836272239685, |
|
"learning_rate": 4.161178683597054e-07, |
|
"logits/chosen": 10.388958930969238, |
|
"logits/rejected": 11.489179611206055, |
|
"logps/chosen": -81.3349609375, |
|
"logps/ref_chosen": -103.74571990966797, |
|
"logps/ref_rejected": -120.73832702636719, |
|
"logps/rejected": -98.57904052734375, |
|
"loss": 0.2909, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.22410757839679718, |
|
"rewards/margins": 0.0025147469714283943, |
|
"rewards/rejected": 0.22159285843372345, |
|
"sft_loss": 0.2909452021121979, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3557019349526554, |
|
"grad_norm": 0.5696946382522583, |
|
"learning_rate": 4.0717821664772124e-07, |
|
"logits/chosen": 10.086296081542969, |
|
"logits/rejected": 11.336379051208496, |
|
"logps/chosen": -81.64080810546875, |
|
"logps/ref_chosen": -105.47428131103516, |
|
"logps/ref_rejected": -120.5193099975586, |
|
"logps/rejected": -97.64772033691406, |
|
"loss": 0.3163, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.23833464086055756, |
|
"rewards/margins": 0.00961877591907978, |
|
"rewards/rejected": 0.22871585190296173, |
|
"sft_loss": 0.316275417804718, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.14858409762382507, |
|
"learning_rate": 3.978930531033806e-07, |
|
"logits/chosen": 9.710855484008789, |
|
"logits/rejected": 10.872222900390625, |
|
"logps/chosen": -80.8717269897461, |
|
"logps/ref_chosen": -103.72540283203125, |
|
"logps/ref_rejected": -119.79557800292969, |
|
"logps/rejected": -96.69274139404297, |
|
"loss": 0.2766, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.22853665053844452, |
|
"rewards/margins": -0.0024917693808674812, |
|
"rewards/rejected": 0.23102842271327972, |
|
"sft_loss": 0.2765931785106659, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3820502264306299, |
|
"grad_norm": 0.2741422951221466, |
|
"learning_rate": 3.882827885312998e-07, |
|
"logits/chosen": 10.16092586517334, |
|
"logits/rejected": 11.23297119140625, |
|
"logps/chosen": -85.11812591552734, |
|
"logps/ref_chosen": -108.65434265136719, |
|
"logps/ref_rejected": -121.46784973144531, |
|
"logps/rejected": -98.77241516113281, |
|
"loss": 0.2799, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.23536208271980286, |
|
"rewards/margins": 0.008407761342823505, |
|
"rewards/rejected": 0.22695434093475342, |
|
"sft_loss": 0.27991783618927, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 0.16848187148571014, |
|
"learning_rate": 3.7836854837871044e-07, |
|
"logits/chosen": 10.2907133102417, |
|
"logits/rejected": 11.690597534179688, |
|
"logps/chosen": -78.23504638671875, |
|
"logps/ref_chosen": -103.62174224853516, |
|
"logps/ref_rejected": -126.73807525634766, |
|
"logps/rejected": -102.43669128417969, |
|
"loss": 0.2962, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.2538670301437378, |
|
"rewards/margins": 0.010853251442313194, |
|
"rewards/rejected": 0.24301378428936005, |
|
"sft_loss": 0.2962155342102051, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4083985179086044, |
|
"grad_norm": 0.1987890601158142, |
|
"learning_rate": 3.681721262971413e-07, |
|
"logits/chosen": 9.929094314575195, |
|
"logits/rejected": 10.946361541748047, |
|
"logps/chosen": -80.73751831054688, |
|
"logps/ref_chosen": -106.10479736328125, |
|
"logps/ref_rejected": -120.6382827758789, |
|
"logps/rejected": -96.38467407226562, |
|
"loss": 0.2982, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.2536728084087372, |
|
"rewards/margins": 0.011136716231703758, |
|
"rewards/rejected": 0.24253609776496887, |
|
"sft_loss": 0.2981662452220917, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4215726636475916, |
|
"grad_norm": 0.1829695999622345, |
|
"learning_rate": 3.577159362352426e-07, |
|
"logits/chosen": 10.097947120666504, |
|
"logits/rejected": 11.477932929992676, |
|
"logps/chosen": -82.30887603759766, |
|
"logps/ref_chosen": -105.99569702148438, |
|
"logps/ref_rejected": -128.34303283691406, |
|
"logps/rejected": -104.14814758300781, |
|
"loss": 0.2848, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.2368682324886322, |
|
"rewards/margins": -0.0050805676728487015, |
|
"rewards/rejected": 0.24194881319999695, |
|
"sft_loss": 0.28480714559555054, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.43474680938657884, |
|
"grad_norm": 0.22964715957641602, |
|
"learning_rate": 3.470229631680624e-07, |
|
"logits/chosen": 10.105993270874023, |
|
"logits/rejected": 10.923880577087402, |
|
"logps/chosen": -81.49787902832031, |
|
"logps/ref_chosen": -105.72196197509766, |
|
"logps/ref_rejected": -121.59507751464844, |
|
"logps/rejected": -97.2413101196289, |
|
"loss": 0.2686, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.242240771651268, |
|
"rewards/margins": -0.0012968010269105434, |
|
"rewards/rejected": 0.24353757500648499, |
|
"sft_loss": 0.2686034142971039, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.44792095512556607, |
|
"grad_norm": 0.2910502254962921, |
|
"learning_rate": 3.361167125710832e-07, |
|
"logits/chosen": 10.264101028442383, |
|
"logits/rejected": 11.107752799987793, |
|
"logps/chosen": -85.68121337890625, |
|
"logps/ref_chosen": -111.4834976196289, |
|
"logps/ref_rejected": -130.48089599609375, |
|
"logps/rejected": -104.08442687988281, |
|
"loss": 0.3039, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.2580227851867676, |
|
"rewards/margins": -0.005941788665950298, |
|
"rewards/rejected": 0.26396459341049194, |
|
"sft_loss": 0.3038797080516815, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.2656664550304413, |
|
"learning_rate": 3.2502115875008516e-07, |
|
"logits/chosen": 10.529006958007812, |
|
"logits/rejected": 11.506540298461914, |
|
"logps/chosen": -82.62615966796875, |
|
"logps/ref_chosen": -108.9183349609375, |
|
"logps/ref_rejected": -121.32493591308594, |
|
"logps/rejected": -95.86014556884766, |
|
"loss": 0.2751, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.26292186975479126, |
|
"rewards/margins": 0.008274020627140999, |
|
"rewards/rejected": 0.2546478509902954, |
|
"sft_loss": 0.2751036286354065, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.47426924660354053, |
|
"grad_norm": 0.4242345690727234, |
|
"learning_rate": 3.137606921404191e-07, |
|
"logits/chosen": 10.204312324523926, |
|
"logits/rejected": 10.856239318847656, |
|
"logps/chosen": -81.047607421875, |
|
"logps/ref_chosen": -107.1411361694336, |
|
"logps/ref_rejected": -118.66165161132812, |
|
"logps/rejected": -92.72647094726562, |
|
"loss": 0.2871, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.26093533635139465, |
|
"rewards/margins": 0.0015834786463528872, |
|
"rewards/rejected": 0.25935184955596924, |
|
"sft_loss": 0.28712767362594604, |
|
"step": 36 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|