|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.710508474576271, |
|
"eval_steps": 100, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013559322033898305, |
|
"grad_norm": 154.68963322032346, |
|
"learning_rate": 4.504504504504504e-08, |
|
"logits/chosen": -1.6006476879119873, |
|
"logits/rejected": -1.7503880262374878, |
|
"logps/chosen": -136.20535278320312, |
|
"logps/rejected": -650.806396484375, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.40000003576278687, |
|
"rewards/chosen": -0.002106652595102787, |
|
"rewards/margins": 0.007925467565655708, |
|
"rewards/rejected": -0.01003211922943592, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02711864406779661, |
|
"grad_norm": 143.60255538961036, |
|
"learning_rate": 9.009009009009008e-08, |
|
"logits/chosen": -1.4859544038772583, |
|
"logits/rejected": -1.6451891660690308, |
|
"logps/chosen": -125.66300964355469, |
|
"logps/rejected": -636.0386962890625, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5374999642372131, |
|
"rewards/chosen": 0.003618550719693303, |
|
"rewards/margins": 0.012560315430164337, |
|
"rewards/rejected": -0.008941764943301678, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04067796610169491, |
|
"grad_norm": 146.5406135926217, |
|
"learning_rate": 1.3513513513513515e-07, |
|
"logits/chosen": -1.6544740200042725, |
|
"logits/rejected": -1.831311583518982, |
|
"logps/chosen": -134.34368896484375, |
|
"logps/rejected": -654.5028686523438, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.009306993335485458, |
|
"rewards/margins": 0.04563351720571518, |
|
"rewards/rejected": -0.03632652387022972, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05423728813559322, |
|
"grad_norm": 134.93316662114188, |
|
"learning_rate": 1.8018018018018017e-07, |
|
"logits/chosen": -1.6334645748138428, |
|
"logits/rejected": -1.8082122802734375, |
|
"logps/chosen": -150.40078735351562, |
|
"logps/rejected": -714.775390625, |
|
"loss": 0.5965, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.00473959231749177, |
|
"rewards/margins": 0.20695891976356506, |
|
"rewards/rejected": -0.20221932232379913, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06779661016949153, |
|
"grad_norm": 85.1543469130844, |
|
"learning_rate": 2.2522522522522522e-07, |
|
"logits/chosen": -1.636574387550354, |
|
"logits/rejected": -1.8049229383468628, |
|
"logps/chosen": -142.1752471923828, |
|
"logps/rejected": -667.3391723632812, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.007638626731932163, |
|
"rewards/margins": 0.6071017980575562, |
|
"rewards/rejected": -0.5994631052017212, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08135593220338982, |
|
"grad_norm": 55.45578483376317, |
|
"learning_rate": 2.702702702702703e-07, |
|
"logits/chosen": -1.7117058038711548, |
|
"logits/rejected": -1.887060523033142, |
|
"logps/chosen": -117.88133239746094, |
|
"logps/rejected": -649.540283203125, |
|
"loss": 0.2845, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0008934163488447666, |
|
"rewards/margins": 1.142379879951477, |
|
"rewards/rejected": -1.1414865255355835, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09491525423728814, |
|
"grad_norm": 9.23309067720513, |
|
"learning_rate": 3.153153153153153e-07, |
|
"logits/chosen": -1.4379091262817383, |
|
"logits/rejected": -1.6131579875946045, |
|
"logps/chosen": -134.87896728515625, |
|
"logps/rejected": -696.23779296875, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.001966355135664344, |
|
"rewards/margins": 3.1120121479034424, |
|
"rewards/rejected": -3.113978624343872, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10847457627118644, |
|
"grad_norm": 2.7786137414686887, |
|
"learning_rate": 3.6036036036036033e-07, |
|
"logits/chosen": -1.531922698020935, |
|
"logits/rejected": -1.6740307807922363, |
|
"logps/chosen": -130.48211669921875, |
|
"logps/rejected": -688.8399047851562, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.001919470028951764, |
|
"rewards/margins": 4.491945266723633, |
|
"rewards/rejected": -4.490025997161865, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12203389830508475, |
|
"grad_norm": 0.011215121612896709, |
|
"learning_rate": 4.054054054054054e-07, |
|
"logits/chosen": -1.5453845262527466, |
|
"logits/rejected": -1.702711820602417, |
|
"logps/chosen": -137.26138305664062, |
|
"logps/rejected": -749.4444580078125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00951399840414524, |
|
"rewards/margins": 8.38076400756836, |
|
"rewards/rejected": -8.37125015258789, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13559322033898305, |
|
"grad_norm": 0.002748606315174295, |
|
"learning_rate": 4.5045045045045043e-07, |
|
"logits/chosen": -1.6784169673919678, |
|
"logits/rejected": -1.8867419958114624, |
|
"logps/chosen": -134.819091796875, |
|
"logps/rejected": -786.6911010742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0101728904992342, |
|
"rewards/margins": 12.241233825683594, |
|
"rewards/rejected": -12.231060981750488, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13559322033898305, |
|
"eval_logits/chosen": -1.5288071632385254, |
|
"eval_logits/rejected": -1.6835100650787354, |
|
"eval_logps/chosen": -129.43997192382812, |
|
"eval_logps/rejected": -748.4630126953125, |
|
"eval_loss": 3.7055913253425388e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.012690085917711258, |
|
"eval_rewards/margins": 13.008617401123047, |
|
"eval_rewards/rejected": -12.995927810668945, |
|
"eval_runtime": 23.2529, |
|
"eval_samples_per_second": 4.301, |
|
"eval_steps_per_second": 1.075, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14915254237288136, |
|
"grad_norm": 0.0011093150463870978, |
|
"learning_rate": 4.954954954954955e-07, |
|
"logits/chosen": -1.57046377658844, |
|
"logits/rejected": -1.7331207990646362, |
|
"logps/chosen": -127.67959594726562, |
|
"logps/rejected": -752.2487182617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004207385703921318, |
|
"rewards/margins": 13.458518981933594, |
|
"rewards/rejected": -13.462725639343262, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16271186440677965, |
|
"grad_norm": 0.0005762634187664357, |
|
"learning_rate": 4.999773405362863e-07, |
|
"logits/chosen": -1.6726722717285156, |
|
"logits/rejected": -1.8734807968139648, |
|
"logps/chosen": -143.523193359375, |
|
"logps/rejected": -832.698974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014902787283062935, |
|
"rewards/margins": 13.953229904174805, |
|
"rewards/rejected": -13.938325881958008, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17627118644067796, |
|
"grad_norm": 0.0003332599393998685, |
|
"learning_rate": 4.998990167994546e-07, |
|
"logits/chosen": -1.6244195699691772, |
|
"logits/rejected": -1.790928602218628, |
|
"logps/chosen": -137.99049377441406, |
|
"logps/rejected": -785.167236328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014037198387086391, |
|
"rewards/margins": 13.896438598632812, |
|
"rewards/rejected": -13.882402420043945, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18983050847457628, |
|
"grad_norm": 0.0003146655208168566, |
|
"learning_rate": 4.997647665674343e-07, |
|
"logits/chosen": -1.6478142738342285, |
|
"logits/rejected": -1.860294222831726, |
|
"logps/chosen": -147.6448516845703, |
|
"logps/rejected": -811.6314086914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015513481572270393, |
|
"rewards/margins": 14.067631721496582, |
|
"rewards/rejected": -14.052118301391602, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2033898305084746, |
|
"grad_norm": 0.0002489502956030974, |
|
"learning_rate": 4.995746198849412e-07, |
|
"logits/chosen": -1.5896422863006592, |
|
"logits/rejected": -1.7557127475738525, |
|
"logps/chosen": -140.1243896484375, |
|
"logps/rejected": -822.0531616210938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02557023987174034, |
|
"rewards/margins": 14.115985870361328, |
|
"rewards/rejected": -14.090415954589844, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21694915254237288, |
|
"grad_norm": 0.00037978880201729675, |
|
"learning_rate": 4.993286193061145e-07, |
|
"logits/chosen": -1.6275484561920166, |
|
"logits/rejected": -1.8082895278930664, |
|
"logps/chosen": -145.8615264892578, |
|
"logps/rejected": -797.1973876953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005417819134891033, |
|
"rewards/margins": 14.054203033447266, |
|
"rewards/rejected": -14.048785209655762, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2305084745762712, |
|
"grad_norm": 0.0011709678087963664, |
|
"learning_rate": 4.99026819884993e-07, |
|
"logits/chosen": -1.691068172454834, |
|
"logits/rejected": -1.8546725511550903, |
|
"logps/chosen": -140.33941650390625, |
|
"logps/rejected": -833.8118896484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005253595765680075, |
|
"rewards/margins": 14.147695541381836, |
|
"rewards/rejected": -14.142441749572754, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2440677966101695, |
|
"grad_norm": 0.0005430831103481389, |
|
"learning_rate": 4.986692891631945e-07, |
|
"logits/chosen": -1.654329776763916, |
|
"logits/rejected": -1.8268946409225464, |
|
"logps/chosen": -142.9085235595703, |
|
"logps/rejected": -781.0303955078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004031305201351643, |
|
"rewards/margins": 13.904911041259766, |
|
"rewards/rejected": -13.900879859924316, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2576271186440678, |
|
"grad_norm": 0.0002685256860972653, |
|
"learning_rate": 4.982561071548001e-07, |
|
"logits/chosen": -1.537014365196228, |
|
"logits/rejected": -1.6596903800964355, |
|
"logps/chosen": -123.82093811035156, |
|
"logps/rejected": -741.5814208984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.018150925636291504, |
|
"rewards/margins": 14.061857223510742, |
|
"rewards/rejected": -14.043705940246582, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2711864406779661, |
|
"grad_norm": 0.00047407562273882125, |
|
"learning_rate": 4.977873663284474e-07, |
|
"logits/chosen": -1.6226494312286377, |
|
"logits/rejected": -1.8021833896636963, |
|
"logps/chosen": -152.94691467285156, |
|
"logps/rejected": -808.9371337890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021439315751194954, |
|
"rewards/margins": 14.185613632202148, |
|
"rewards/rejected": -14.164173126220703, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2711864406779661, |
|
"eval_logits/chosen": -1.5458643436431885, |
|
"eval_logits/rejected": -1.6861909627914429, |
|
"eval_logps/chosen": -129.42483520507812, |
|
"eval_logps/rejected": -757.507080078125, |
|
"eval_loss": 1.5668128980905749e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.014203321188688278, |
|
"eval_rewards/margins": 13.914541244506836, |
|
"eval_rewards/rejected": -13.900337219238281, |
|
"eval_runtime": 23.2677, |
|
"eval_samples_per_second": 4.298, |
|
"eval_steps_per_second": 1.074, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2847457627118644, |
|
"grad_norm": 0.0005141024917889797, |
|
"learning_rate": 4.972631715866361e-07, |
|
"logits/chosen": -1.5850636959075928, |
|
"logits/rejected": -1.7246123552322388, |
|
"logps/chosen": -143.24696350097656, |
|
"logps/rejected": -778.4476318359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008326123468577862, |
|
"rewards/margins": 13.95406436920166, |
|
"rewards/rejected": -13.945737838745117, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2983050847457627, |
|
"grad_norm": 0.0006130760590228257, |
|
"learning_rate": 4.966836402422515e-07, |
|
"logits/chosen": -1.6016970872879028, |
|
"logits/rejected": -1.7708076238632202, |
|
"logps/chosen": -132.4145050048828, |
|
"logps/rejected": -792.4443359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.017194140702486038, |
|
"rewards/margins": 14.093986511230469, |
|
"rewards/rejected": -14.07679271697998, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.31186440677966104, |
|
"grad_norm": 0.000828819568409969, |
|
"learning_rate": 4.960489019923105e-07, |
|
"logits/chosen": -1.679842233657837, |
|
"logits/rejected": -1.8294236660003662, |
|
"logps/chosen": -136.24644470214844, |
|
"logps/rejected": -809.9606323242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016033075749874115, |
|
"rewards/margins": 14.131494522094727, |
|
"rewards/rejected": -14.115462303161621, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3254237288135593, |
|
"grad_norm": 0.0008292339546344823, |
|
"learning_rate": 4.95359098888935e-07, |
|
"logits/chosen": -1.6466869115829468, |
|
"logits/rejected": -1.8071887493133545, |
|
"logps/chosen": -144.3760986328125, |
|
"logps/rejected": -816.1300659179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00023554242216050625, |
|
"rewards/margins": 14.113880157470703, |
|
"rewards/rejected": -14.113645553588867, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 0.0007326232553653241, |
|
"learning_rate": 4.946143853075625e-07, |
|
"logits/chosen": -1.6482115983963013, |
|
"logits/rejected": -1.8068441152572632, |
|
"logps/chosen": -136.09292602539062, |
|
"logps/rejected": -786.30908203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0055378577671945095, |
|
"rewards/margins": 14.151810646057129, |
|
"rewards/rejected": -14.146272659301758, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3525423728813559, |
|
"grad_norm": 0.0006201108834653548, |
|
"learning_rate": 4.938149279123959e-07, |
|
"logits/chosen": -1.614100694656372, |
|
"logits/rejected": -1.7720319032669067, |
|
"logps/chosen": -145.45883178710938, |
|
"logps/rejected": -772.9180908203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.014799129217863083, |
|
"rewards/margins": 13.972376823425293, |
|
"rewards/rejected": -13.987175941467285, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.36610169491525424, |
|
"grad_norm": 0.0002824417146726921, |
|
"learning_rate": 4.929609056191057e-07, |
|
"logits/chosen": -1.6527501344680786, |
|
"logits/rejected": -1.8346607685089111, |
|
"logps/chosen": -147.9451904296875, |
|
"logps/rejected": -845.679931640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.073175558820367e-05, |
|
"rewards/margins": 14.214624404907227, |
|
"rewards/rejected": -14.214543342590332, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37966101694915255, |
|
"grad_norm": 0.0003459147020709031, |
|
"learning_rate": 4.920525095547895e-07, |
|
"logits/chosen": -1.700380802154541, |
|
"logits/rejected": -1.8940513134002686, |
|
"logps/chosen": -148.5547332763672, |
|
"logps/rejected": -806.921142578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0022345068864524364, |
|
"rewards/margins": 13.974288940429688, |
|
"rewards/rejected": -13.976522445678711, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.39322033898305087, |
|
"grad_norm": 0.0007686863218550116, |
|
"learning_rate": 4.910899430151973e-07, |
|
"logits/chosen": -1.690828800201416, |
|
"logits/rejected": -1.8689790964126587, |
|
"logps/chosen": -127.06858825683594, |
|
"logps/rejected": -781.9804077148438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014154733158648014, |
|
"rewards/margins": 13.95429801940918, |
|
"rewards/rejected": -13.940142631530762, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4067796610169492, |
|
"grad_norm": 0.0007833273852497298, |
|
"learning_rate": 4.900734214192358e-07, |
|
"logits/chosen": -1.657442331314087, |
|
"logits/rejected": -1.8555173873901367, |
|
"logps/chosen": -142.8946075439453, |
|
"logps/rejected": -833.9373168945312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016108307987451553, |
|
"rewards/margins": 14.204878807067871, |
|
"rewards/rejected": -14.188769340515137, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4067796610169492, |
|
"eval_logits/chosen": -1.5289416313171387, |
|
"eval_logits/rejected": -1.6836309432983398, |
|
"eval_logps/chosen": -129.3828582763672, |
|
"eval_logps/rejected": -758.2346801757812, |
|
"eval_loss": 1.4355653092934517e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.018402252346277237, |
|
"eval_rewards/margins": 13.991499900817871, |
|
"eval_rewards/rejected": -13.97309684753418, |
|
"eval_runtime": 23.0859, |
|
"eval_samples_per_second": 4.332, |
|
"eval_steps_per_second": 1.083, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42033898305084744, |
|
"grad_norm": 0.00021728835196828888, |
|
"learning_rate": 4.890031722607576e-07, |
|
"logits/chosen": -1.5837361812591553, |
|
"logits/rejected": -1.7333931922912598, |
|
"logps/chosen": -133.32366943359375, |
|
"logps/rejected": -758.0372314453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0008433577604591846, |
|
"rewards/margins": 14.209864616394043, |
|
"rewards/rejected": -14.209020614624023, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.43389830508474575, |
|
"grad_norm": 0.00042770904659586716, |
|
"learning_rate": 4.878794350576498e-07, |
|
"logits/chosen": -1.577714443206787, |
|
"logits/rejected": -1.759061336517334, |
|
"logps/chosen": -128.18081665039062, |
|
"logps/rejected": -749.0996704101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.006202464923262596, |
|
"rewards/margins": 14.050898551940918, |
|
"rewards/rejected": -14.044694900512695, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.44745762711864406, |
|
"grad_norm": 0.0005431267045930932, |
|
"learning_rate": 4.867024612982295e-07, |
|
"logits/chosen": -1.5479421615600586, |
|
"logits/rejected": -1.6868603229522705, |
|
"logps/chosen": -145.11839294433594, |
|
"logps/rejected": -793.3886108398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007111231796443462, |
|
"rewards/margins": 14.120939254760742, |
|
"rewards/rejected": -14.1138277053833, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4610169491525424, |
|
"grad_norm": 0.0004452926543919869, |
|
"learning_rate": 4.854725143849631e-07, |
|
"logits/chosen": -1.5681132078170776, |
|
"logits/rejected": -1.7265939712524414, |
|
"logps/chosen": -135.96771240234375, |
|
"logps/rejected": -776.887939453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01606500893831253, |
|
"rewards/margins": 13.997052192687988, |
|
"rewards/rejected": -13.980987548828125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4745762711864407, |
|
"grad_norm": 0.0003403651283500103, |
|
"learning_rate": 4.841898695755167e-07, |
|
"logits/chosen": -1.6001578569412231, |
|
"logits/rejected": -1.7874952554702759, |
|
"logps/chosen": -128.35435485839844, |
|
"logps/rejected": -756.4078979492188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.018694868311285973, |
|
"rewards/margins": 14.069332122802734, |
|
"rewards/rejected": -14.050636291503906, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.488135593220339, |
|
"grad_norm": 0.0005571850400383578, |
|
"learning_rate": 4.828548139211545e-07, |
|
"logits/chosen": -1.6838054656982422, |
|
"logits/rejected": -1.8665508031845093, |
|
"logps/chosen": -135.80624389648438, |
|
"logps/rejected": -794.292724609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02806948870420456, |
|
"rewards/margins": 14.219563484191895, |
|
"rewards/rejected": -14.19149398803711, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5016949152542373, |
|
"grad_norm": 0.0007950002555106897, |
|
"learning_rate": 4.814676462024987e-07, |
|
"logits/chosen": -1.657123327255249, |
|
"logits/rejected": -1.8259265422821045, |
|
"logps/chosen": -151.13162231445312, |
|
"logps/rejected": -806.3003540039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016831912100315094, |
|
"rewards/margins": 14.099932670593262, |
|
"rewards/rejected": -14.08310317993164, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5152542372881356, |
|
"grad_norm": 0.0003242442253345165, |
|
"learning_rate": 4.800286768626621e-07, |
|
"logits/chosen": -1.657513976097107, |
|
"logits/rejected": -1.8597712516784668, |
|
"logps/chosen": -139.53028869628906, |
|
"logps/rejected": -826.2210083007812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014754395000636578, |
|
"rewards/margins": 14.141820907592773, |
|
"rewards/rejected": -14.127065658569336, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5288135593220339, |
|
"grad_norm": 0.0005090620291333299, |
|
"learning_rate": 4.785382279377733e-07, |
|
"logits/chosen": -1.6210606098175049, |
|
"logits/rejected": -1.8241839408874512, |
|
"logps/chosen": -143.5242462158203, |
|
"logps/rejected": -836.617431640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02124781347811222, |
|
"rewards/margins": 14.220711708068848, |
|
"rewards/rejected": -14.199464797973633, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 0.000825922097218788, |
|
"learning_rate": 4.769966329849054e-07, |
|
"logits/chosen": -1.6811935901641846, |
|
"logits/rejected": -1.8629109859466553, |
|
"logps/chosen": -150.13429260253906, |
|
"logps/rejected": -827.9759521484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0035764453932642937, |
|
"rewards/margins": 14.165657997131348, |
|
"rewards/rejected": -14.162080764770508, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"eval_logits/chosen": -1.5275362730026245, |
|
"eval_logits/rejected": -1.6860315799713135, |
|
"eval_logps/chosen": -129.4043426513672, |
|
"eval_logps/rejected": -758.570556640625, |
|
"eval_loss": 1.3993409311297e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.016253653913736343, |
|
"eval_rewards/margins": 14.022944450378418, |
|
"eval_rewards/rejected": -14.006690979003906, |
|
"eval_runtime": 23.1703, |
|
"eval_samples_per_second": 4.316, |
|
"eval_steps_per_second": 1.079, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5559322033898305, |
|
"grad_norm": 0.0007397757326606043, |
|
"learning_rate": 4.7540423700742726e-07, |
|
"logits/chosen": -1.6916306018829346, |
|
"logits/rejected": -1.8490281105041504, |
|
"logps/chosen": -149.16802978515625, |
|
"logps/rejected": -804.88525390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008371539413928986, |
|
"rewards/margins": 14.190282821655273, |
|
"rewards/rejected": -14.18191146850586, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5694915254237288, |
|
"grad_norm": 0.0005353994450256941, |
|
"learning_rate": 4.7376139637779354e-07, |
|
"logits/chosen": -1.6099956035614014, |
|
"logits/rejected": -1.7850123643875122, |
|
"logps/chosen": -122.01039123535156, |
|
"logps/rejected": -747.48974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.017607325688004494, |
|
"rewards/margins": 14.0084867477417, |
|
"rewards/rejected": -13.99087905883789, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5830508474576271, |
|
"grad_norm": 0.00048794620873617576, |
|
"learning_rate": 4.7206847875778913e-07, |
|
"logits/chosen": -1.6391756534576416, |
|
"logits/rejected": -1.8201954364776611, |
|
"logps/chosen": -131.18875122070312, |
|
"logps/rejected": -759.473388671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01886049658060074, |
|
"rewards/margins": 14.103827476501465, |
|
"rewards/rejected": -14.084967613220215, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5966101694915255, |
|
"grad_norm": 0.00044451168413258085, |
|
"learning_rate": 4.70325863016248e-07, |
|
"logits/chosen": -1.678541660308838, |
|
"logits/rejected": -1.8704532384872437, |
|
"logps/chosen": -144.02674865722656, |
|
"logps/rejected": -808.55859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015889765694737434, |
|
"rewards/margins": 14.10900592803955, |
|
"rewards/rejected": -14.093117713928223, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6101694915254238, |
|
"grad_norm": 0.00038795072502636444, |
|
"learning_rate": 4.68533939144264e-07, |
|
"logits/chosen": -1.6224497556686401, |
|
"logits/rejected": -1.7962324619293213, |
|
"logps/chosen": -140.4901123046875, |
|
"logps/rejected": -825.280517578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005872346460819244, |
|
"rewards/margins": 14.365765571594238, |
|
"rewards/rejected": -14.359892845153809, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6237288135593221, |
|
"grad_norm": 0.00038646259276520423, |
|
"learning_rate": 4.6669310816791184e-07, |
|
"logits/chosen": -1.7635605335235596, |
|
"logits/rejected": -1.951772928237915, |
|
"logps/chosen": -136.94752502441406, |
|
"logps/rejected": -807.515869140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0075184679590165615, |
|
"rewards/margins": 14.222395896911621, |
|
"rewards/rejected": -14.214877128601074, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6372881355932203, |
|
"grad_norm": 0.0003666927496908433, |
|
"learning_rate": 4.6480378205849926e-07, |
|
"logits/chosen": -1.6419250965118408, |
|
"logits/rejected": -1.7922152280807495, |
|
"logps/chosen": -130.02239990234375, |
|
"logps/rejected": -759.2348022460938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014397606253623962, |
|
"rewards/margins": 14.155649185180664, |
|
"rewards/rejected": -14.141252517700195, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6508474576271186, |
|
"grad_norm": 0.00046994952764087256, |
|
"learning_rate": 4.6286638364036905e-07, |
|
"logits/chosen": -1.6200618743896484, |
|
"logits/rejected": -1.797215461730957, |
|
"logps/chosen": -130.50233459472656, |
|
"logps/rejected": -829.3895263671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02378329634666443, |
|
"rewards/margins": 14.199002265930176, |
|
"rewards/rejected": -14.17521858215332, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6644067796610169, |
|
"grad_norm": 0.00029735714895166485, |
|
"learning_rate": 4.6088134649627284e-07, |
|
"logits/chosen": -1.5747830867767334, |
|
"logits/rejected": -1.753875494003296, |
|
"logps/chosen": -138.4869842529297, |
|
"logps/rejected": -819.8696899414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010798036120831966, |
|
"rewards/margins": 14.393420219421387, |
|
"rewards/rejected": -14.382623672485352, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 0.00035937901840089274, |
|
"learning_rate": 4.5884911487033665e-07, |
|
"logits/chosen": -1.6192841529846191, |
|
"logits/rejected": -1.7925256490707397, |
|
"logps/chosen": -143.53257751464844, |
|
"logps/rejected": -787.250244140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016855549067258835, |
|
"rewards/margins": 14.227276802062988, |
|
"rewards/rejected": -14.210421562194824, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"eval_logits/chosen": -1.5297393798828125, |
|
"eval_logits/rejected": -1.6854040622711182, |
|
"eval_logps/chosen": -129.43572998046875, |
|
"eval_logps/rejected": -759.3566284179688, |
|
"eval_loss": 1.3430080798570998e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.01311517134308815, |
|
"eval_rewards/margins": 14.098403930664062, |
|
"eval_rewards/rejected": -14.085289001464844, |
|
"eval_runtime": 23.0065, |
|
"eval_samples_per_second": 4.347, |
|
"eval_steps_per_second": 1.087, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6915254237288135, |
|
"grad_norm": 0.00020815218989513314, |
|
"learning_rate": 4.567701435686404e-07, |
|
"logits/chosen": -1.6033257246017456, |
|
"logits/rejected": -1.7678654193878174, |
|
"logps/chosen": -143.47254943847656, |
|
"logps/rejected": -833.3479614257812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.018440861254930496, |
|
"rewards/margins": 14.311261177062988, |
|
"rewards/rejected": -14.292821884155273, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7050847457627119, |
|
"grad_norm": 0.00044702973171401343, |
|
"learning_rate": 4.5464489785743454e-07, |
|
"logits/chosen": -1.588118076324463, |
|
"logits/rejected": -1.7491432428359985, |
|
"logps/chosen": -121.74967193603516, |
|
"logps/rejected": -763.8538818359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021985743194818497, |
|
"rewards/margins": 14.20384693145752, |
|
"rewards/rejected": -14.181861877441406, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7186440677966102, |
|
"grad_norm": 0.00048331111256865464, |
|
"learning_rate": 4.5247385335901457e-07, |
|
"logits/chosen": -1.62116277217865, |
|
"logits/rejected": -1.798133134841919, |
|
"logps/chosen": -144.86624145507812, |
|
"logps/rejected": -807.5030517578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.006796732544898987, |
|
"rewards/margins": 14.287379264831543, |
|
"rewards/rejected": -14.280582427978516, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7322033898305085, |
|
"grad_norm": 0.0004276849725743657, |
|
"learning_rate": 4.5025749594527895e-07, |
|
"logits/chosen": -1.5937074422836304, |
|
"logits/rejected": -1.766377329826355, |
|
"logps/chosen": -130.42138671875, |
|
"logps/rejected": -770.9468994140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011606786400079727, |
|
"rewards/margins": 14.063494682312012, |
|
"rewards/rejected": -14.051887512207031, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7457627118644068, |
|
"grad_norm": 0.00047701124764661757, |
|
"learning_rate": 4.4799632162899236e-07, |
|
"logits/chosen": -1.586263656616211, |
|
"logits/rejected": -1.7527462244033813, |
|
"logps/chosen": -138.99436950683594, |
|
"logps/rejected": -832.9317626953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01648543030023575, |
|
"rewards/margins": 14.375706672668457, |
|
"rewards/rejected": -14.359220504760742, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7593220338983051, |
|
"grad_norm": 0.0005331903399639826, |
|
"learning_rate": 4.456908364527802e-07, |
|
"logits/chosen": -1.6967836618423462, |
|
"logits/rejected": -1.8608123064041138, |
|
"logps/chosen": -135.91598510742188, |
|
"logps/rejected": -800.591796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020324843004345894, |
|
"rewards/margins": 14.120574951171875, |
|
"rewards/rejected": -14.100250244140625, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7728813559322034, |
|
"grad_norm": 0.000533186088613475, |
|
"learning_rate": 4.433415563758778e-07, |
|
"logits/chosen": -1.6483113765716553, |
|
"logits/rejected": -1.8176215887069702, |
|
"logps/chosen": -138.11244201660156, |
|
"logps/rejected": -791.1510620117188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01274613942950964, |
|
"rewards/margins": 14.025276184082031, |
|
"rewards/rejected": -14.012529373168945, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7864406779661017, |
|
"grad_norm": 0.0003330593802523546, |
|
"learning_rate": 4.409490071586606e-07, |
|
"logits/chosen": -1.58778715133667, |
|
"logits/rejected": -1.7679705619812012, |
|
"logps/chosen": -143.45578002929688, |
|
"logps/rejected": -871.753173828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004776511341333389, |
|
"rewards/margins": 14.490696907043457, |
|
"rewards/rejected": -14.485920906066895, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.0004356549740596174, |
|
"learning_rate": 4.38513724244981e-07, |
|
"logits/chosen": -1.677300214767456, |
|
"logits/rejected": -1.823121428489685, |
|
"logps/chosen": -130.93118286132812, |
|
"logps/rejected": -767.2311401367188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01857886277139187, |
|
"rewards/margins": 14.194330215454102, |
|
"rewards/rejected": -14.175750732421875, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8135593220338984, |
|
"grad_norm": 0.0006738152861953452, |
|
"learning_rate": 4.360362526423382e-07, |
|
"logits/chosen": -1.6017038822174072, |
|
"logits/rejected": -1.7450367212295532, |
|
"logps/chosen": -134.8054656982422, |
|
"logps/rejected": -829.2752075195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023766428232192993, |
|
"rewards/margins": 14.333120346069336, |
|
"rewards/rejected": -14.309354782104492, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8135593220338984, |
|
"eval_logits/chosen": -1.535141944885254, |
|
"eval_logits/rejected": -1.6854389905929565, |
|
"eval_logps/chosen": -129.3685302734375, |
|
"eval_logps/rejected": -759.9224853515625, |
|
"eval_loss": 1.2519085430540144e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.019834483042359352, |
|
"eval_rewards/margins": 14.161703109741211, |
|
"eval_rewards/rejected": -14.14186954498291, |
|
"eval_runtime": 23.2942, |
|
"eval_samples_per_second": 4.293, |
|
"eval_steps_per_second": 1.073, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8271186440677966, |
|
"grad_norm": 0.00028376978865923564, |
|
"learning_rate": 4.3351714679990706e-07, |
|
"logits/chosen": -1.5073317289352417, |
|
"logits/rejected": -1.630771517753601, |
|
"logps/chosen": -108.68913269042969, |
|
"logps/rejected": -755.2454223632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021552443504333496, |
|
"rewards/margins": 14.418371200561523, |
|
"rewards/rejected": -14.396819114685059, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.8406779661016949, |
|
"grad_norm": 0.00048164470828510026, |
|
"learning_rate": 4.3095697048445447e-07, |
|
"logits/chosen": -1.6341415643692017, |
|
"logits/rejected": -1.7738165855407715, |
|
"logps/chosen": -140.3278045654297, |
|
"logps/rejected": -794.1578369140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0028394628316164017, |
|
"rewards/margins": 14.288480758666992, |
|
"rewards/rejected": -14.28564167022705, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8542372881355932, |
|
"grad_norm": 0.00046143931913359906, |
|
"learning_rate": 4.283562966541707e-07, |
|
"logits/chosen": -1.5982298851013184, |
|
"logits/rejected": -1.7638392448425293, |
|
"logps/chosen": -141.6815185546875, |
|
"logps/rejected": -788.85888671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.018078766763210297, |
|
"rewards/margins": 14.164934158325195, |
|
"rewards/rejected": -14.146854400634766, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.8677966101694915, |
|
"grad_norm": 0.00032792057687413, |
|
"learning_rate": 4.25715707330443e-07, |
|
"logits/chosen": -1.5992499589920044, |
|
"logits/rejected": -1.7798690795898438, |
|
"logps/chosen": -128.59031677246094, |
|
"logps/rejected": -792.5185546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009904641658067703, |
|
"rewards/margins": 14.5419340133667, |
|
"rewards/rejected": -14.532029151916504, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8813559322033898, |
|
"grad_norm": 0.00021017148877000384, |
|
"learning_rate": 4.2303579346760173e-07, |
|
"logits/chosen": -1.613771915435791, |
|
"logits/rejected": -1.801413655281067, |
|
"logps/chosen": -143.33572387695312, |
|
"logps/rejected": -811.8010864257812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015471378341317177, |
|
"rewards/margins": 14.446039199829102, |
|
"rewards/rejected": -14.430566787719727, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8949152542372881, |
|
"grad_norm": 0.0003556697469879977, |
|
"learning_rate": 4.2031715482066655e-07, |
|
"logits/chosen": -1.585208535194397, |
|
"logits/rejected": -1.7628443241119385, |
|
"logps/chosen": -133.2225799560547, |
|
"logps/rejected": -783.7073974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01283181644976139, |
|
"rewards/margins": 14.336665153503418, |
|
"rewards/rejected": -14.323833465576172, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9084745762711864, |
|
"grad_norm": 0.00032469878909931687, |
|
"learning_rate": 4.1756039981112373e-07, |
|
"logits/chosen": -1.5636094808578491, |
|
"logits/rejected": -1.7128851413726807, |
|
"logps/chosen": -138.43899536132812, |
|
"logps/rejected": -804.6480712890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008499560877680779, |
|
"rewards/margins": 14.382369995117188, |
|
"rewards/rejected": -14.373868942260742, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9220338983050848, |
|
"grad_norm": 0.0002716865446675816, |
|
"learning_rate": 4.147661453907635e-07, |
|
"logits/chosen": -1.6562086343765259, |
|
"logits/rejected": -1.8350610733032227, |
|
"logps/chosen": -153.1056365966797, |
|
"logps/rejected": -784.3397216796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011557278223335743, |
|
"rewards/margins": 14.29238510131836, |
|
"rewards/rejected": -14.280828475952148, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9355932203389831, |
|
"grad_norm": 0.00021878390732588127, |
|
"learning_rate": 4.1193501690360834e-07, |
|
"logits/chosen": -1.582091212272644, |
|
"logits/rejected": -1.7254104614257812, |
|
"logps/chosen": -141.56887817382812, |
|
"logps/rejected": -816.9537353515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.025769073516130447, |
|
"rewards/margins": 14.515641212463379, |
|
"rewards/rejected": -14.489871978759766, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9491525423728814, |
|
"grad_norm": 0.0006094805270326929, |
|
"learning_rate": 4.0906764794596347e-07, |
|
"logits/chosen": -1.5609385967254639, |
|
"logits/rejected": -1.7095026969909668, |
|
"logps/chosen": -142.61056518554688, |
|
"logps/rejected": -779.0126953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011463040485978127, |
|
"rewards/margins": 14.310674667358398, |
|
"rewards/rejected": -14.299211502075195, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9491525423728814, |
|
"eval_logits/chosen": -1.5444095134735107, |
|
"eval_logits/rejected": -1.686043620109558, |
|
"eval_logps/chosen": -129.370361328125, |
|
"eval_logps/rejected": -760.5565795898438, |
|
"eval_loss": 1.1656707101792563e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.019651925191283226, |
|
"eval_rewards/margins": 14.224937438964844, |
|
"eval_rewards/rejected": -14.205286026000977, |
|
"eval_runtime": 23.3494, |
|
"eval_samples_per_second": 4.283, |
|
"eval_steps_per_second": 1.071, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9627118644067797, |
|
"grad_norm": 0.0006059188782453061, |
|
"learning_rate": 4.0616468022462013e-07, |
|
"logits/chosen": -1.6205476522445679, |
|
"logits/rejected": -1.785776138305664, |
|
"logps/chosen": -137.9543914794922, |
|
"logps/rejected": -812.373291015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015250766649842262, |
|
"rewards/margins": 14.447315216064453, |
|
"rewards/rejected": -14.432065963745117, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.976271186440678, |
|
"grad_norm": 0.00027120641837016974, |
|
"learning_rate": 4.0322676341324414e-07, |
|
"logits/chosen": -1.613734245300293, |
|
"logits/rejected": -1.7981483936309814, |
|
"logps/chosen": -133.99325561523438, |
|
"logps/rejected": -817.4915771484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011865440756082535, |
|
"rewards/margins": 14.308055877685547, |
|
"rewards/rejected": -14.296191215515137, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9898305084745763, |
|
"grad_norm": 0.000231523046440184, |
|
"learning_rate": 4.002545550069808e-07, |
|
"logits/chosen": -1.6496268510818481, |
|
"logits/rejected": -1.841505527496338, |
|
"logps/chosen": -140.759765625, |
|
"logps/rejected": -784.6021118164062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014270871877670288, |
|
"rewards/margins": 14.330395698547363, |
|
"rewards/rejected": -14.31612491607666, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0027118644067796, |
|
"grad_norm": 0.00021386925480897464, |
|
"learning_rate": 3.972487201753106e-07, |
|
"logits/chosen": -1.6074280738830566, |
|
"logits/rejected": -1.7795652151107788, |
|
"logps/chosen": -136.89691162109375, |
|
"logps/rejected": -791.8516845703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0169425867497921, |
|
"rewards/margins": 14.432229995727539, |
|
"rewards/rejected": -14.415288925170898, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.016271186440678, |
|
"grad_norm": 0.00040145408076981584, |
|
"learning_rate": 3.9420993161318615e-07, |
|
"logits/chosen": -1.6055034399032593, |
|
"logits/rejected": -1.77395761013031, |
|
"logps/chosen": -134.6471405029297, |
|
"logps/rejected": -791.5114135742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014754224568605423, |
|
"rewards/margins": 14.438673973083496, |
|
"rewards/rejected": -14.423918724060059, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0298305084745762, |
|
"grad_norm": 0.00025553044965608517, |
|
"learning_rate": 3.911388693904854e-07, |
|
"logits/chosen": -1.6558409929275513, |
|
"logits/rejected": -1.8468029499053955, |
|
"logps/chosen": -140.86526489257812, |
|
"logps/rejected": -796.6107788085938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010989008471369743, |
|
"rewards/margins": 14.3390531539917, |
|
"rewards/rejected": -14.32806396484375, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.0433898305084746, |
|
"grad_norm": 0.00032980079352337916, |
|
"learning_rate": 3.8803622079981496e-07, |
|
"logits/chosen": -1.578873872756958, |
|
"logits/rejected": -1.7349332571029663, |
|
"logps/chosen": -130.14547729492188, |
|
"logps/rejected": -764.939208984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0018688759300857782, |
|
"rewards/margins": 14.415413856506348, |
|
"rewards/rejected": -14.413544654846191, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0569491525423729, |
|
"grad_norm": 0.00031172746262580516, |
|
"learning_rate": 3.8490268020269614e-07, |
|
"logits/chosen": -1.6029382944107056, |
|
"logits/rejected": -1.7419565916061401, |
|
"logps/chosen": -140.53709411621094, |
|
"logps/rejected": -811.80908203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016495605930685997, |
|
"rewards/margins": 14.483687400817871, |
|
"rewards/rejected": -14.467193603515625, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0705084745762712, |
|
"grad_norm": 0.0003301712565175788, |
|
"learning_rate": 3.817389488741694e-07, |
|
"logits/chosen": -1.57521653175354, |
|
"logits/rejected": -1.7471016645431519, |
|
"logps/chosen": -133.0887451171875, |
|
"logps/rejected": -820.1575927734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.027268720790743828, |
|
"rewards/margins": 14.49877643585205, |
|
"rewards/rejected": -14.471506118774414, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.0840677966101695, |
|
"grad_norm": 0.000600013552539676, |
|
"learning_rate": 3.785457348458516e-07, |
|
"logits/chosen": -1.6120831966400146, |
|
"logits/rejected": -1.783928394317627, |
|
"logps/chosen": -145.61685180664062, |
|
"logps/rejected": -781.661376953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0003941003233194351, |
|
"rewards/margins": 14.212892532348633, |
|
"rewards/rejected": -14.21249771118164, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0840677966101695, |
|
"eval_logits/chosen": -1.5446038246154785, |
|
"eval_logits/rejected": -1.6855926513671875, |
|
"eval_logps/chosen": -129.36080932617188, |
|
"eval_logps/rejected": -761.1478881835938, |
|
"eval_loss": 1.1145154985570116e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.020607449114322662, |
|
"eval_rewards/margins": 14.285026550292969, |
|
"eval_rewards/rejected": -14.264419555664062, |
|
"eval_runtime": 23.5778, |
|
"eval_samples_per_second": 4.241, |
|
"eval_steps_per_second": 1.06, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0976271186440678, |
|
"grad_norm": 0.00037026369248627336, |
|
"learning_rate": 3.753237527474812e-07, |
|
"logits/chosen": -1.5942147970199585, |
|
"logits/rejected": -1.7750327587127686, |
|
"logps/chosen": -137.2639923095703, |
|
"logps/rejected": -795.8204956054688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.018559224903583527, |
|
"rewards/margins": 14.402730941772461, |
|
"rewards/rejected": -14.384172439575195, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.1111864406779661, |
|
"grad_norm": 0.0002646065707675486, |
|
"learning_rate": 3.7207372364698645e-07, |
|
"logits/chosen": -1.6505929231643677, |
|
"logits/rejected": -1.810257911682129, |
|
"logps/chosen": -137.0296173095703, |
|
"logps/rejected": -838.5280151367188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02975718304514885, |
|
"rewards/margins": 14.56403923034668, |
|
"rewards/rejected": -14.534282684326172, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.1247457627118644, |
|
"grad_norm": 0.0004516110377872378, |
|
"learning_rate": 3.687963748891131e-07, |
|
"logits/chosen": -1.5973923206329346, |
|
"logits/rejected": -1.7931503057479858, |
|
"logps/chosen": -148.9749755859375, |
|
"logps/rejected": -849.8316650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007867500185966492, |
|
"rewards/margins": 14.508625030517578, |
|
"rewards/rejected": -14.50075626373291, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.1383050847457628, |
|
"grad_norm": 0.000400542368638696, |
|
"learning_rate": 3.6549243993264747e-07, |
|
"logits/chosen": -1.645015001296997, |
|
"logits/rejected": -1.8152064085006714, |
|
"logps/chosen": -134.02035522460938, |
|
"logps/rejected": -784.4261474609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00821724720299244, |
|
"rewards/margins": 14.368156433105469, |
|
"rewards/rejected": -14.359938621520996, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.151864406779661, |
|
"grad_norm": 0.0007766829343963702, |
|
"learning_rate": 3.6216265818627066e-07, |
|
"logits/chosen": -1.6092370748519897, |
|
"logits/rejected": -1.7626619338989258, |
|
"logps/chosen": -147.05447387695312, |
|
"logps/rejected": -797.9004516601562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005066628567874432, |
|
"rewards/margins": 14.369694709777832, |
|
"rewards/rejected": -14.36462688446045, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1654237288135594, |
|
"grad_norm": 0.00034472860967714935, |
|
"learning_rate": 3.588077748430819e-07, |
|
"logits/chosen": -1.53743577003479, |
|
"logits/rejected": -1.6860629320144653, |
|
"logps/chosen": -130.87753295898438, |
|
"logps/rejected": -766.9425048828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.017799021676182747, |
|
"rewards/margins": 14.47961139678955, |
|
"rewards/rejected": -14.461811065673828, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.1789830508474577, |
|
"grad_norm": 0.00026858839945804227, |
|
"learning_rate": 3.554285407138269e-07, |
|
"logits/chosen": -1.5462911128997803, |
|
"logits/rejected": -1.691042423248291, |
|
"logps/chosen": -129.9382781982422, |
|
"logps/rejected": -737.553955078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0059865121729671955, |
|
"rewards/margins": 14.388315200805664, |
|
"rewards/rejected": -14.382328987121582, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.192542372881356, |
|
"grad_norm": 0.00031207053329295663, |
|
"learning_rate": 3.5202571205886913e-07, |
|
"logits/chosen": -1.6693938970565796, |
|
"logits/rejected": -1.8746066093444824, |
|
"logps/chosen": -148.41986083984375, |
|
"logps/rejected": -816.6522827148438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023402290418744087, |
|
"rewards/margins": 14.48322582244873, |
|
"rewards/rejected": -14.459823608398438, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2061016949152543, |
|
"grad_norm": 0.0004730088609444601, |
|
"learning_rate": 3.486000504189414e-07, |
|
"logits/chosen": -1.5571904182434082, |
|
"logits/rejected": -1.739476203918457, |
|
"logps/chosen": -131.6705322265625, |
|
"logps/rejected": -770.3902587890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011558154597878456, |
|
"rewards/margins": 14.456482887268066, |
|
"rewards/rejected": -14.444924354553223, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2196610169491526, |
|
"grad_norm": 0.0004757504728478808, |
|
"learning_rate": 3.4515232244471606e-07, |
|
"logits/chosen": -1.4787291288375854, |
|
"logits/rejected": -1.661275029182434, |
|
"logps/chosen": -151.16482543945312, |
|
"logps/rejected": -888.5305786132812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014035153202712536, |
|
"rewards/margins": 14.75745964050293, |
|
"rewards/rejected": -14.743424415588379, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2196610169491526, |
|
"eval_logits/chosen": -1.5419373512268066, |
|
"eval_logits/rejected": -1.685987949371338, |
|
"eval_logps/chosen": -129.385986328125, |
|
"eval_logps/rejected": -761.9303588867188, |
|
"eval_loss": 1.042011717800051e-06, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.01808955892920494, |
|
"eval_rewards/margins": 14.360756874084473, |
|
"eval_rewards/rejected": -14.342667579650879, |
|
"eval_runtime": 23.2568, |
|
"eval_samples_per_second": 4.3, |
|
"eval_steps_per_second": 1.075, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2332203389830507, |
|
"grad_norm": 0.0003716686876859065, |
|
"learning_rate": 3.41683299725231e-07, |
|
"logits/chosen": -1.6072585582733154, |
|
"logits/rejected": -1.7751410007476807, |
|
"logps/chosen": -121.9486312866211, |
|
"logps/rejected": -751.9805297851562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014703430235385895, |
|
"rewards/margins": 14.331403732299805, |
|
"rewards/rejected": -14.316699981689453, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.2467796610169493, |
|
"grad_norm": 0.00045583582930412936, |
|
"learning_rate": 3.3819375861521116e-07, |
|
"logits/chosen": -1.6996678113937378, |
|
"logits/rejected": -1.8710010051727295, |
|
"logps/chosen": -144.1748046875, |
|
"logps/rejected": -781.7122192382812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00705701345577836, |
|
"rewards/margins": 14.237908363342285, |
|
"rewards/rejected": -14.244966506958008, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.2603389830508474, |
|
"grad_norm": 0.00027042786850133124, |
|
"learning_rate": 3.346844800613229e-07, |
|
"logits/chosen": -1.635947823524475, |
|
"logits/rejected": -1.776626467704773, |
|
"logps/chosen": -122.72543334960938, |
|
"logps/rejected": -782.336669921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01217057928442955, |
|
"rewards/margins": 14.620203971862793, |
|
"rewards/rejected": -14.608033180236816, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.2738983050847459, |
|
"grad_norm": 0.00012613818860590913, |
|
"learning_rate": 3.311562494274009e-07, |
|
"logits/chosen": -1.5437029600143433, |
|
"logits/rejected": -1.7053155899047852, |
|
"logps/chosen": -146.37957763671875, |
|
"logps/rejected": -853.6106567382812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.013366146944463253, |
|
"rewards/margins": 14.802824020385742, |
|
"rewards/rejected": -14.789458274841309, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.287457627118644, |
|
"grad_norm": 0.0009108583927914208, |
|
"learning_rate": 3.2760985631868716e-07, |
|
"logits/chosen": -1.6577401161193848, |
|
"logits/rejected": -1.8012009859085083, |
|
"logps/chosen": -128.98199462890625, |
|
"logps/rejected": -803.785400390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0071465689688920975, |
|
"rewards/margins": 14.534771919250488, |
|
"rewards/rejected": -14.527626037597656, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.3010169491525423, |
|
"grad_norm": 0.00018290415757236828, |
|
"learning_rate": 3.240460944051194e-07, |
|
"logits/chosen": -1.6779531240463257, |
|
"logits/rejected": -1.866011142730713, |
|
"logps/chosen": -139.95425415039062, |
|
"logps/rejected": -777.5191650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005402210168540478, |
|
"rewards/margins": 14.424090385437012, |
|
"rewards/rejected": -14.41868782043457, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3145762711864406, |
|
"grad_norm": 0.00032737612531831567, |
|
"learning_rate": 3.2046576124371106e-07, |
|
"logits/chosen": -1.6980135440826416, |
|
"logits/rejected": -1.8551394939422607, |
|
"logps/chosen": -135.28433227539062, |
|
"logps/rejected": -785.8731689453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01490587368607521, |
|
"rewards/margins": 14.301523208618164, |
|
"rewards/rejected": -14.286617279052734, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.328135593220339, |
|
"grad_norm": 0.00025153854347185144, |
|
"learning_rate": 3.1686965810006104e-07, |
|
"logits/chosen": -1.617310643196106, |
|
"logits/rejected": -1.804062008857727, |
|
"logps/chosen": -139.4668731689453, |
|
"logps/rejected": -800.4359130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021481037139892578, |
|
"rewards/margins": 14.51413345336914, |
|
"rewards/rejected": -14.492652893066406, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.3416949152542372, |
|
"grad_norm": 0.000131467090970899, |
|
"learning_rate": 3.132585897690329e-07, |
|
"logits/chosen": -1.6868890523910522, |
|
"logits/rejected": -1.8882062435150146, |
|
"logps/chosen": -145.70948791503906, |
|
"logps/rejected": -857.4185180664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008370953612029552, |
|
"rewards/margins": 14.516642570495605, |
|
"rewards/rejected": -14.508271217346191, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.3552542372881355, |
|
"grad_norm": 0.00031956784200775317, |
|
"learning_rate": 3.096333643946452e-07, |
|
"logits/chosen": -1.6276357173919678, |
|
"logits/rejected": -1.8146514892578125, |
|
"logps/chosen": -142.74488830566406, |
|
"logps/rejected": -807.506103515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.012263872660696507, |
|
"rewards/margins": 14.435070037841797, |
|
"rewards/rejected": -14.422805786132812, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3552542372881355, |
|
"eval_logits/chosen": -1.5454390048980713, |
|
"eval_logits/rejected": -1.6858749389648438, |
|
"eval_logps/chosen": -129.4228973388672, |
|
"eval_logps/rejected": -762.3302612304688, |
|
"eval_loss": 9.916642511598184e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.014398334547877312, |
|
"eval_rewards/margins": 14.39704704284668, |
|
"eval_rewards/rejected": -14.382649421691895, |
|
"eval_runtime": 23.2909, |
|
"eval_samples_per_second": 4.294, |
|
"eval_steps_per_second": 1.073, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3688135593220339, |
|
"grad_norm": 0.0005110214664274667, |
|
"learning_rate": 3.059947932892113e-07, |
|
"logits/chosen": -1.581527590751648, |
|
"logits/rejected": -1.741445541381836, |
|
"logps/chosen": -130.16134643554688, |
|
"logps/rejected": -802.2377319335938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008667388930916786, |
|
"rewards/margins": 14.611343383789062, |
|
"rewards/rejected": -14.602675437927246, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.3823728813559322, |
|
"grad_norm": 0.000385109902014585, |
|
"learning_rate": 3.0234369075177105e-07, |
|
"logits/chosen": -1.6576886177062988, |
|
"logits/rejected": -1.8101606369018555, |
|
"logps/chosen": -139.96914672851562, |
|
"logps/rejected": -817.3280639648438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023594459518790245, |
|
"rewards/margins": 14.629423141479492, |
|
"rewards/rejected": -14.605830192565918, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.3959322033898305, |
|
"grad_norm": 0.00040091688342177467, |
|
"learning_rate": 2.9868087388585344e-07, |
|
"logits/chosen": -1.5842094421386719, |
|
"logits/rejected": -1.7406607866287231, |
|
"logps/chosen": -129.07803344726562, |
|
"logps/rejected": -770.8359985351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.013609381392598152, |
|
"rewards/margins": 14.5753812789917, |
|
"rewards/rejected": -14.561771392822266, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4094915254237288, |
|
"grad_norm": 0.00026033035395961934, |
|
"learning_rate": 2.950071624166115e-07, |
|
"logits/chosen": -1.7157691717147827, |
|
"logits/rejected": -1.9252738952636719, |
|
"logps/chosen": -141.04685974121094, |
|
"logps/rejected": -809.7179565429688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015553833916783333, |
|
"rewards/margins": 14.591537475585938, |
|
"rewards/rejected": -14.575984954833984, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.423050847457627, |
|
"grad_norm": 0.00019886076594595082, |
|
"learning_rate": 2.9132337850737127e-07, |
|
"logits/chosen": -1.5866303443908691, |
|
"logits/rejected": -1.745476245880127, |
|
"logps/chosen": -136.6873779296875, |
|
"logps/rejected": -779.5513916015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01606130599975586, |
|
"rewards/margins": 14.531635284423828, |
|
"rewards/rejected": -14.51557445526123, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.4366101694915254, |
|
"grad_norm": 0.0003434669449456214, |
|
"learning_rate": 2.8763034657563425e-07, |
|
"logits/chosen": -1.594861626625061, |
|
"logits/rejected": -1.7643938064575195, |
|
"logps/chosen": -139.26364135742188, |
|
"logps/rejected": -812.1429443359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015910768881440163, |
|
"rewards/margins": 14.620904922485352, |
|
"rewards/rejected": -14.60499382019043, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.4501694915254237, |
|
"grad_norm": 0.00024295622286491588, |
|
"learning_rate": 2.839288931085761e-07, |
|
"logits/chosen": -1.6374831199645996, |
|
"logits/rejected": -1.8399394750595093, |
|
"logps/chosen": -146.24412536621094, |
|
"logps/rejected": -838.77587890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0162392258644104, |
|
"rewards/margins": 14.61961555480957, |
|
"rewards/rejected": -14.603377342224121, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.463728813559322, |
|
"grad_norm": 0.00021624383711316104, |
|
"learning_rate": 2.802198464780814e-07, |
|
"logits/chosen": -1.6567904949188232, |
|
"logits/rejected": -1.8536536693572998, |
|
"logps/chosen": -140.39337158203125, |
|
"logps/rejected": -793.81640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.022600781172513962, |
|
"rewards/margins": 14.519584655761719, |
|
"rewards/rejected": -14.496984481811523, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.4772881355932204, |
|
"grad_norm": 0.00025925077468732225, |
|
"learning_rate": 2.765040367553572e-07, |
|
"logits/chosen": -1.6113574504852295, |
|
"logits/rejected": -1.8030925989151, |
|
"logps/chosen": -137.23849487304688, |
|
"logps/rejected": -808.6797485351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005808805115520954, |
|
"rewards/margins": 14.623893737792969, |
|
"rewards/rejected": -14.618083953857422, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.4908474576271187, |
|
"grad_norm": 0.0003907081889108827, |
|
"learning_rate": 2.727822955251663e-07, |
|
"logits/chosen": -1.6336138248443604, |
|
"logits/rejected": -1.7939205169677734, |
|
"logps/chosen": -135.02374267578125, |
|
"logps/rejected": -803.9449462890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01596558652818203, |
|
"rewards/margins": 14.46599292755127, |
|
"rewards/rejected": -14.450026512145996, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.4908474576271187, |
|
"eval_logits/chosen": -1.5441298484802246, |
|
"eval_logits/rejected": -1.6875685453414917, |
|
"eval_logps/chosen": -129.44482421875, |
|
"eval_logps/rejected": -762.7908325195312, |
|
"eval_loss": 9.502831517238519e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.012204695492982864, |
|
"eval_rewards/margins": 14.44091510772705, |
|
"eval_rewards/rejected": -14.4287109375, |
|
"eval_runtime": 23.159, |
|
"eval_samples_per_second": 4.318, |
|
"eval_steps_per_second": 1.079, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.504406779661017, |
|
"grad_norm": 0.00022865739977937088, |
|
"learning_rate": 2.6905545569972124e-07, |
|
"logits/chosen": -1.5306434631347656, |
|
"logits/rejected": -1.7007068395614624, |
|
"logps/chosen": -141.87411499023438, |
|
"logps/rejected": -800.179931640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020658917725086212, |
|
"rewards/margins": 14.778863906860352, |
|
"rewards/rejected": -14.758204460144043, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.5179661016949153, |
|
"grad_norm": 0.00013390402806158302, |
|
"learning_rate": 2.6532435133228176e-07, |
|
"logits/chosen": -1.6342971324920654, |
|
"logits/rejected": -1.7988243103027344, |
|
"logps/chosen": -137.4864501953125, |
|
"logps/rejected": -778.6915283203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011233944445848465, |
|
"rewards/margins": 14.523719787597656, |
|
"rewards/rejected": -14.51248550415039, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.5315254237288136, |
|
"grad_norm": 0.0005026258076613325, |
|
"learning_rate": 2.615898174304967e-07, |
|
"logits/chosen": -1.6171948909759521, |
|
"logits/rejected": -1.7615530490875244, |
|
"logps/chosen": -157.30349731445312, |
|
"logps/rejected": -839.1676635742188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011025247164070606, |
|
"rewards/margins": 14.724686622619629, |
|
"rewards/rejected": -14.713661193847656, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.5450847457627117, |
|
"grad_norm": 0.00042097845856288896, |
|
"learning_rate": 2.5785268976953204e-07, |
|
"logits/chosen": -1.6210533380508423, |
|
"logits/rejected": -1.776179552078247, |
|
"logps/chosen": -138.84580993652344, |
|
"logps/rejected": -801.5419921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.006160397548228502, |
|
"rewards/margins": 14.480558395385742, |
|
"rewards/rejected": -14.474397659301758, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.5586440677966102, |
|
"grad_norm": 0.00020478295097651352, |
|
"learning_rate": 2.541138047050281e-07, |
|
"logits/chosen": -1.5814204216003418, |
|
"logits/rejected": -1.7641853094100952, |
|
"logps/chosen": -137.55215454101562, |
|
"logps/rejected": -799.6993408203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021200550720095634, |
|
"rewards/margins": 14.847275733947754, |
|
"rewards/rejected": -14.826072692871094, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.5722033898305083, |
|
"grad_norm": 0.00023188287664193435, |
|
"learning_rate": 2.5037399898592537e-07, |
|
"logits/chosen": -1.5283310413360596, |
|
"logits/rejected": -1.697199821472168, |
|
"logps/chosen": -136.95700073242188, |
|
"logps/rejected": -806.5762329101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.025980135425925255, |
|
"rewards/margins": 14.58230209350586, |
|
"rewards/rejected": -14.55632209777832, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.5857627118644069, |
|
"grad_norm": 0.0002882430862738881, |
|
"learning_rate": 2.466341095672036e-07, |
|
"logits/chosen": -1.5664174556732178, |
|
"logits/rejected": -1.7281914949417114, |
|
"logps/chosen": -140.96392822265625, |
|
"logps/rejected": -860.5864868164062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015253797173500061, |
|
"rewards/margins": 14.810201644897461, |
|
"rewards/rejected": -14.794947624206543, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.599322033898305, |
|
"grad_norm": 0.00032442091374497006, |
|
"learning_rate": 2.428949734225744e-07, |
|
"logits/chosen": -1.5551780462265015, |
|
"logits/rejected": -1.7200207710266113, |
|
"logps/chosen": -116.5546646118164, |
|
"logps/rejected": -769.7291259765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020303260535001755, |
|
"rewards/margins": 14.525774002075195, |
|
"rewards/rejected": -14.505470275878906, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.6128813559322035, |
|
"grad_norm": 0.00026348293289965986, |
|
"learning_rate": 2.3915742735716914e-07, |
|
"logits/chosen": -1.5562556982040405, |
|
"logits/rejected": -1.721585988998413, |
|
"logps/chosen": -132.5054931640625, |
|
"logps/rejected": -780.1019287109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0179964117705822, |
|
"rewards/margins": 14.633602142333984, |
|
"rewards/rejected": -14.615605354309082, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.6264406779661016, |
|
"grad_norm": 0.0004614000733649941, |
|
"learning_rate": 2.3542230782026533e-07, |
|
"logits/chosen": -1.6813287734985352, |
|
"logits/rejected": -1.8313826322555542, |
|
"logps/chosen": -139.7794189453125, |
|
"logps/rejected": -794.0023803710938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.018270863220095634, |
|
"rewards/margins": 14.51429271697998, |
|
"rewards/rejected": -14.496021270751953, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6264406779661016, |
|
"eval_logits/chosen": -1.5334513187408447, |
|
"eval_logits/rejected": -1.6858574151992798, |
|
"eval_logps/chosen": -129.38221740722656, |
|
"eval_logps/rejected": -763.2522583007812, |
|
"eval_loss": 8.998525800052448e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.01846611313521862, |
|
"eval_rewards/margins": 14.493313789367676, |
|
"eval_rewards/rejected": -14.474847793579102, |
|
"eval_runtime": 23.3442, |
|
"eval_samples_per_second": 4.284, |
|
"eval_steps_per_second": 1.071, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 0.0002485397852219414, |
|
"learning_rate": 2.3169045071809214e-07, |
|
"logits/chosen": -1.6468091011047363, |
|
"logits/rejected": -1.837449312210083, |
|
"logps/chosen": -132.42532348632812, |
|
"logps/rejected": -768.0850830078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.118643017951399e-05, |
|
"rewards/margins": 14.453639030456543, |
|
"rewards/rejected": -14.453720092773438, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.6535593220338982, |
|
"grad_norm": 0.0002600309213234688, |
|
"learning_rate": 2.279626912267576e-07, |
|
"logits/chosen": -1.6016912460327148, |
|
"logits/rejected": -1.7917726039886475, |
|
"logps/chosen": -124.97222137451172, |
|
"logps/rejected": -763.31396484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0005855015479028225, |
|
"rewards/margins": 14.508626937866211, |
|
"rewards/rejected": -14.509212493896484, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.6671186440677968, |
|
"grad_norm": 0.0001866086825603447, |
|
"learning_rate": 2.2423986360533944e-07, |
|
"logits/chosen": -1.5999516248703003, |
|
"logits/rejected": -1.7527964115142822, |
|
"logps/chosen": -127.54315185546875, |
|
"logps/rejected": -779.42626953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03687942773103714, |
|
"rewards/margins": 14.615551948547363, |
|
"rewards/rejected": -14.578672409057617, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.6806779661016948, |
|
"grad_norm": 0.0001806454551986488, |
|
"learning_rate": 2.2052280100918053e-07, |
|
"logits/chosen": -1.6087480783462524, |
|
"logits/rejected": -1.733353614807129, |
|
"logps/chosen": -139.215576171875, |
|
"logps/rejected": -828.4327392578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.012436101213097572, |
|
"rewards/margins": 14.784907341003418, |
|
"rewards/rejected": -14.77247142791748, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.6942372881355934, |
|
"grad_norm": 0.0003598095391952748, |
|
"learning_rate": 2.1681233530343226e-07, |
|
"logits/chosen": -1.6320774555206299, |
|
"logits/rejected": -1.770911455154419, |
|
"logps/chosen": -130.1085968017578, |
|
"logps/rejected": -781.075927734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008734717965126038, |
|
"rewards/margins": 14.645079612731934, |
|
"rewards/rejected": -14.636345863342285, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.7077966101694915, |
|
"grad_norm": 0.00021229397199613504, |
|
"learning_rate": 2.131092968768856e-07, |
|
"logits/chosen": -1.6914520263671875, |
|
"logits/rejected": -1.8534152507781982, |
|
"logps/chosen": -137.82667541503906, |
|
"logps/rejected": -795.1356811523438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01237824559211731, |
|
"rewards/margins": 14.534427642822266, |
|
"rewards/rejected": -14.522048950195312, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.7213559322033898, |
|
"grad_norm": 0.00034558435149114095, |
|
"learning_rate": 2.094145144561334e-07, |
|
"logits/chosen": -1.6149834394454956, |
|
"logits/rejected": -1.781679391860962, |
|
"logps/chosen": -135.41860961914062, |
|
"logps/rejected": -788.1875610351562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007942800410091877, |
|
"rewards/margins": 14.633131980895996, |
|
"rewards/rejected": -14.625189781188965, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.734915254237288, |
|
"grad_norm": 0.0003052738823642222, |
|
"learning_rate": 2.057288149201042e-07, |
|
"logits/chosen": -1.6356430053710938, |
|
"logits/rejected": -1.7994301319122314, |
|
"logps/chosen": -148.37130737304688, |
|
"logps/rejected": -835.1704711914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00246133329346776, |
|
"rewards/margins": 14.854413032531738, |
|
"rewards/rejected": -14.851951599121094, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.7484745762711864, |
|
"grad_norm": 0.00021946391817271224, |
|
"learning_rate": 2.0205302311501e-07, |
|
"logits/chosen": -1.4806194305419922, |
|
"logits/rejected": -1.6502048969268799, |
|
"logps/chosen": -143.40792846679688, |
|
"logps/rejected": -826.89990234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.022700820118188858, |
|
"rewards/margins": 14.935534477233887, |
|
"rewards/rejected": -14.912833213806152, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.7620338983050847, |
|
"grad_norm": 0.00018901278388815272, |
|
"learning_rate": 1.9838796166974835e-07, |
|
"logits/chosen": -1.6020697355270386, |
|
"logits/rejected": -1.7288063764572144, |
|
"logps/chosen": -118.82467651367188, |
|
"logps/rejected": -745.2700805664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023728668689727783, |
|
"rewards/margins": 14.655136108398438, |
|
"rewards/rejected": -14.631406784057617, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7620338983050847, |
|
"eval_logits/chosen": -1.541303038597107, |
|
"eval_logits/rejected": -1.6866235733032227, |
|
"eval_logps/chosen": -129.41481018066406, |
|
"eval_logps/rejected": -763.7774658203125, |
|
"eval_loss": 8.536229643141269e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.01520751602947712, |
|
"eval_rewards/margins": 14.542576789855957, |
|
"eval_rewards/rejected": -14.527369499206543, |
|
"eval_runtime": 23.2319, |
|
"eval_samples_per_second": 4.304, |
|
"eval_steps_per_second": 1.076, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.775593220338983, |
|
"grad_norm": 0.00024516636429012606, |
|
"learning_rate": 1.947344508118013e-07, |
|
"logits/chosen": -1.7006336450576782, |
|
"logits/rejected": -1.9313997030258179, |
|
"logps/chosen": -135.2695770263672, |
|
"logps/rejected": -803.58984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01268923282623291, |
|
"rewards/margins": 14.559757232666016, |
|
"rewards/rejected": -14.54706859588623, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.7891525423728813, |
|
"grad_norm": 0.0002825945001585042, |
|
"learning_rate": 1.9109330818367103e-07, |
|
"logits/chosen": -1.6026619672775269, |
|
"logits/rejected": -1.7476422786712646, |
|
"logps/chosen": -138.92730712890625, |
|
"logps/rejected": -774.2352294921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011833753436803818, |
|
"rewards/margins": 14.747849464416504, |
|
"rewards/rejected": -14.736017227172852, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.8027118644067797, |
|
"grad_norm": 0.00023343486470064288, |
|
"learning_rate": 1.8746534865989477e-07, |
|
"logits/chosen": -1.6764352321624756, |
|
"logits/rejected": -1.8599662780761719, |
|
"logps/chosen": -136.24755859375, |
|
"logps/rejected": -778.046630859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016610510647296906, |
|
"rewards/margins": 14.57569694519043, |
|
"rewards/rejected": -14.559085845947266, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.816271186440678, |
|
"grad_norm": 0.00020349358848448975, |
|
"learning_rate": 1.8385138416467886e-07, |
|
"logits/chosen": -1.619372010231018, |
|
"logits/rejected": -1.770355224609375, |
|
"logps/chosen": -137.48997497558594, |
|
"logps/rejected": -796.6224975585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01347330305725336, |
|
"rewards/margins": 14.743675231933594, |
|
"rewards/rejected": -14.730201721191406, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.8298305084745763, |
|
"grad_norm": 0.000259228080505575, |
|
"learning_rate": 1.802522234901927e-07, |
|
"logits/chosen": -1.6564035415649414, |
|
"logits/rejected": -1.8519361019134521, |
|
"logps/chosen": -129.4320068359375, |
|
"logps/rejected": -815.1136474609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03748949244618416, |
|
"rewards/margins": 14.78825855255127, |
|
"rewards/rejected": -14.750767707824707, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.8433898305084746, |
|
"grad_norm": 0.0002871334654289991, |
|
"learning_rate": 1.7666867211556436e-07, |
|
"logits/chosen": -1.6476807594299316, |
|
"logits/rejected": -1.7843477725982666, |
|
"logps/chosen": -132.54351806640625, |
|
"logps/rejected": -779.9259643554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004673462361097336, |
|
"rewards/margins": 14.616876602172852, |
|
"rewards/rejected": -14.612202644348145, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.856949152542373, |
|
"grad_norm": 0.00022022136706817435, |
|
"learning_rate": 1.7310153202661698e-07, |
|
"logits/chosen": -1.5748957395553589, |
|
"logits/rejected": -1.7366337776184082, |
|
"logps/chosen": -142.26052856445312, |
|
"logps/rejected": -840.873779296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01894519291818142, |
|
"rewards/margins": 14.885005950927734, |
|
"rewards/rejected": -14.866060256958008, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.8705084745762712, |
|
"grad_norm": 0.0002676827155917166, |
|
"learning_rate": 1.695516015363876e-07, |
|
"logits/chosen": -1.6750328540802002, |
|
"logits/rejected": -1.8339290618896484, |
|
"logps/chosen": -144.47813415527344, |
|
"logps/rejected": -832.0415649414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023609986528754234, |
|
"rewards/margins": 14.777373313903809, |
|
"rewards/rejected": -14.753763198852539, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.8840677966101695, |
|
"grad_norm": 0.00024546650545054666, |
|
"learning_rate": 1.6601967510646718e-07, |
|
"logits/chosen": -1.725512981414795, |
|
"logits/rejected": -1.8677659034729004, |
|
"logps/chosen": -147.13035583496094, |
|
"logps/rejected": -842.0433349609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.013028274290263653, |
|
"rewards/margins": 15.025127410888672, |
|
"rewards/rejected": -15.012099266052246, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.8976271186440679, |
|
"grad_norm": 0.00018426525023186526, |
|
"learning_rate": 1.6250654316920325e-07, |
|
"logits/chosen": -1.6960985660552979, |
|
"logits/rejected": -1.874474287033081, |
|
"logps/chosen": -138.58255004882812, |
|
"logps/rejected": -803.3692016601562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.012537289410829544, |
|
"rewards/margins": 14.797500610351562, |
|
"rewards/rejected": -14.784963607788086, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.8976271186440679, |
|
"eval_logits/chosen": -1.5352431535720825, |
|
"eval_logits/rejected": -1.686131238937378, |
|
"eval_logps/chosen": -129.33590698242188, |
|
"eval_logps/rejected": -764.0711059570312, |
|
"eval_loss": 8.283525403385283e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.02309771627187729, |
|
"eval_rewards/margins": 14.579834938049316, |
|
"eval_rewards/rejected": -14.556737899780273, |
|
"eval_runtime": 23.3395, |
|
"eval_samples_per_second": 4.285, |
|
"eval_steps_per_second": 1.071, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.911186440677966, |
|
"grad_norm": 0.0002575697935659544, |
|
"learning_rate": 1.5901299195080392e-07, |
|
"logits/chosen": -1.6227922439575195, |
|
"logits/rejected": -1.7947490215301514, |
|
"logps/chosen": -130.98719787597656, |
|
"logps/rejected": -791.5205078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00749362725764513, |
|
"rewards/margins": 14.689105987548828, |
|
"rewards/rejected": -14.681612014770508, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.9247457627118645, |
|
"grad_norm": 0.00018017946205386775, |
|
"learning_rate": 1.5553980329538323e-07, |
|
"logits/chosen": -1.5999001264572144, |
|
"logits/rejected": -1.7650830745697021, |
|
"logps/chosen": -139.0697021484375, |
|
"logps/rejected": -810.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010547073557972908, |
|
"rewards/margins": 14.652795791625977, |
|
"rewards/rejected": -14.642248153686523, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.9383050847457626, |
|
"grad_norm": 0.0001479458221901959, |
|
"learning_rate": 1.520877544899875e-07, |
|
"logits/chosen": -1.6732416152954102, |
|
"logits/rejected": -1.8719912767410278, |
|
"logps/chosen": -154.01966857910156, |
|
"logps/rejected": -817.1717529296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0030850698240101337, |
|
"rewards/margins": 14.651540756225586, |
|
"rewards/rejected": -14.654626846313477, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.951864406779661, |
|
"grad_norm": 0.0002763487512077753, |
|
"learning_rate": 1.4865761809064097e-07, |
|
"logits/chosen": -1.6813652515411377, |
|
"logits/rejected": -1.878997802734375, |
|
"logps/chosen": -125.57931518554688, |
|
"logps/rejected": -793.8370361328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015030990354716778, |
|
"rewards/margins": 14.654011726379395, |
|
"rewards/rejected": -14.638980865478516, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.9654237288135592, |
|
"grad_norm": 0.00016799758819048652, |
|
"learning_rate": 1.4525016174945103e-07, |
|
"logits/chosen": -1.7774598598480225, |
|
"logits/rejected": -1.9586834907531738, |
|
"logps/chosen": -128.68084716796875, |
|
"logps/rejected": -789.97021484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020244726911187172, |
|
"rewards/margins": 14.630430221557617, |
|
"rewards/rejected": -14.610186576843262, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.9789830508474577, |
|
"grad_norm": 0.00023868564973718838, |
|
"learning_rate": 1.4186614804280978e-07, |
|
"logits/chosen": -1.6796725988388062, |
|
"logits/rejected": -1.8473913669586182, |
|
"logps/chosen": -146.37466430664062, |
|
"logps/rejected": -850.5054931640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009393090382218361, |
|
"rewards/margins": 14.856435775756836, |
|
"rewards/rejected": -14.84704303741455, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.9925423728813558, |
|
"grad_norm": 0.000159343350698008, |
|
"learning_rate": 1.3850633430073286e-07, |
|
"logits/chosen": -1.5762964487075806, |
|
"logits/rejected": -1.7709100246429443, |
|
"logps/chosen": -151.92115783691406, |
|
"logps/rejected": -829.155029296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.013924097642302513, |
|
"rewards/margins": 14.711325645446777, |
|
"rewards/rejected": -14.69740104675293, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.0054237288135592, |
|
"grad_norm": 0.00024916837620644227, |
|
"learning_rate": 1.3517147243737148e-07, |
|
"logits/chosen": -1.5804271697998047, |
|
"logits/rejected": -1.7189936637878418, |
|
"logps/chosen": -127.54490661621094, |
|
"logps/rejected": -792.7044067382812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021060097962617874, |
|
"rewards/margins": 14.793603897094727, |
|
"rewards/rejected": -14.772544860839844, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.0189830508474578, |
|
"grad_norm": 0.00028667386075358764, |
|
"learning_rate": 1.3186230878273653e-07, |
|
"logits/chosen": -1.6898704767227173, |
|
"logits/rejected": -1.8163692951202393, |
|
"logps/chosen": -133.3569793701172, |
|
"logps/rejected": -762.7427368164062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00874436367303133, |
|
"rewards/margins": 14.47949504852295, |
|
"rewards/rejected": -14.47075080871582, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.032542372881356, |
|
"grad_norm": 0.00030745885090509886, |
|
"learning_rate": 1.285795839156729e-07, |
|
"logits/chosen": -1.6830826997756958, |
|
"logits/rejected": -1.8209247589111328, |
|
"logps/chosen": -137.370361328125, |
|
"logps/rejected": -783.111083984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009560978040099144, |
|
"rewards/margins": 14.544479370117188, |
|
"rewards/rejected": -14.534917831420898, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.032542372881356, |
|
"eval_logits/chosen": -1.5413455963134766, |
|
"eval_logits/rejected": -1.6867271661758423, |
|
"eval_logps/chosen": -129.37197875976562, |
|
"eval_logps/rejected": -764.1895751953125, |
|
"eval_loss": 8.282594876618532e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.01949072629213333, |
|
"eval_rewards/margins": 14.588081359863281, |
|
"eval_rewards/rejected": -14.56859016418457, |
|
"eval_runtime": 23.3878, |
|
"eval_samples_per_second": 4.276, |
|
"eval_steps_per_second": 1.069, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0461016949152544, |
|
"grad_norm": 0.00027440430133344047, |
|
"learning_rate": 1.2532403249812073e-07, |
|
"logits/chosen": -1.629027247428894, |
|
"logits/rejected": -1.7673338651657104, |
|
"logps/chosen": -143.42172241210938, |
|
"logps/rejected": -824.6468505859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004273869562894106, |
|
"rewards/margins": 14.795296669006348, |
|
"rewards/rejected": -14.791022300720215, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.0596610169491525, |
|
"grad_norm": 0.00023891678563468625, |
|
"learning_rate": 1.2209638311070024e-07, |
|
"logits/chosen": -1.6091238260269165, |
|
"logits/rejected": -1.7953072786331177, |
|
"logps/chosen": -127.51914978027344, |
|
"logps/rejected": -782.3043212890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0034638545475900173, |
|
"rewards/margins": 14.720000267028809, |
|
"rewards/rejected": -14.716536521911621, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.073220338983051, |
|
"grad_norm": 0.00024388492083099195, |
|
"learning_rate": 1.1889735808965853e-07, |
|
"logits/chosen": -1.6395263671875, |
|
"logits/rejected": -1.8181636333465576, |
|
"logps/chosen": -135.23330688476562, |
|
"logps/rejected": -783.9580078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009885641746222973, |
|
"rewards/margins": 14.711644172668457, |
|
"rewards/rejected": -14.701759338378906, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.086779661016949, |
|
"grad_norm": 5.583927886360231e-05, |
|
"learning_rate": 1.1572767336521322e-07, |
|
"logits/chosen": -1.7426564693450928, |
|
"logits/rejected": -1.9373841285705566, |
|
"logps/chosen": -163.0467529296875, |
|
"logps/rejected": -873.5623779296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01771201565861702, |
|
"rewards/margins": 14.745927810668945, |
|
"rewards/rejected": -14.728215217590332, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.1003389830508477, |
|
"grad_norm": 0.00017823331489187885, |
|
"learning_rate": 1.125880383013294e-07, |
|
"logits/chosen": -1.5471131801605225, |
|
"logits/rejected": -1.7288830280303955, |
|
"logps/chosen": -122.73018646240234, |
|
"logps/rejected": -782.6522216796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.029936904087662697, |
|
"rewards/margins": 14.729475021362305, |
|
"rewards/rejected": -14.699538230895996, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.1138983050847457, |
|
"grad_norm": 0.0003091912161991017, |
|
"learning_rate": 1.0947915553696741e-07, |
|
"logits/chosen": -1.5830570459365845, |
|
"logits/rejected": -1.7391778230667114, |
|
"logps/chosen": -124.14356994628906, |
|
"logps/rejected": -795.6461791992188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01131663378328085, |
|
"rewards/margins": 14.700472831726074, |
|
"rewards/rejected": -14.689155578613281, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.127457627118644, |
|
"grad_norm": 0.00016494367259474433, |
|
"learning_rate": 1.0640172082883377e-07, |
|
"logits/chosen": -1.575786828994751, |
|
"logits/rejected": -1.7403693199157715, |
|
"logps/chosen": -143.2032470703125, |
|
"logps/rejected": -797.1260986328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01606675237417221, |
|
"rewards/margins": 14.676783561706543, |
|
"rewards/rejected": -14.660717010498047, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.1410169491525424, |
|
"grad_norm": 0.0001055398603476797, |
|
"learning_rate": 1.0335642289567453e-07, |
|
"logits/chosen": -1.5635545253753662, |
|
"logits/rejected": -1.6949539184570312, |
|
"logps/chosen": -137.00326538085938, |
|
"logps/rejected": -795.691650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.017065811902284622, |
|
"rewards/margins": 14.849153518676758, |
|
"rewards/rejected": -14.832088470458984, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.154576271186441, |
|
"grad_norm": 0.00015184404381463285, |
|
"learning_rate": 1.003439432641412e-07, |
|
"logits/chosen": -1.5640537738800049, |
|
"logits/rejected": -1.718758225440979, |
|
"logps/chosen": -145.822998046875, |
|
"logps/rejected": -822.9422607421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.027230584993958473, |
|
"rewards/margins": 14.735852241516113, |
|
"rewards/rejected": -14.70862102508545, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.168135593220339, |
|
"grad_norm": 0.00017944006421282506, |
|
"learning_rate": 9.736495611626869e-08, |
|
"logits/chosen": -1.7426433563232422, |
|
"logits/rejected": -1.9045021533966064, |
|
"logps/chosen": -136.1781463623047, |
|
"logps/rejected": -826.3635864257812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.009065323509275913, |
|
"rewards/margins": 14.765503883361816, |
|
"rewards/rejected": -14.774569511413574, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.168135593220339, |
|
"eval_logits/chosen": -1.5352487564086914, |
|
"eval_logits/rejected": -1.6869568824768066, |
|
"eval_logps/chosen": -129.40280151367188, |
|
"eval_logps/rejected": -764.318359375, |
|
"eval_loss": 8.185303954633127e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.016407504677772522, |
|
"eval_rewards/margins": 14.597875595092773, |
|
"eval_rewards/rejected": -14.58146858215332, |
|
"eval_runtime": 23.0522, |
|
"eval_samples_per_second": 4.338, |
|
"eval_steps_per_second": 1.084, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.181694915254237, |
|
"grad_norm": 0.0005351685788183639, |
|
"learning_rate": 9.442012813859495e-08, |
|
"logits/chosen": -1.7391018867492676, |
|
"logits/rejected": -1.9293949604034424, |
|
"logps/chosen": -147.13917541503906, |
|
"logps/rejected": -805.6615600585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.002429434796795249, |
|
"rewards/margins": 14.607394218444824, |
|
"rewards/rejected": -14.604965209960938, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.1952542372881356, |
|
"grad_norm": 0.0001727788888810501, |
|
"learning_rate": 9.151011837295967e-08, |
|
"logits/chosen": -1.5907230377197266, |
|
"logits/rejected": -1.779097318649292, |
|
"logps/chosen": -135.67388916015625, |
|
"logps/rejected": -828.772705078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02872363105416298, |
|
"rewards/margins": 14.719782829284668, |
|
"rewards/rejected": -14.691059112548828, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.2088135593220337, |
|
"grad_norm": 0.00025741808212769845, |
|
"learning_rate": 8.863557806901232e-08, |
|
"logits/chosen": -1.8002914190292358, |
|
"logits/rejected": -1.9408048391342163, |
|
"logps/chosen": -132.03172302246094, |
|
"logps/rejected": -784.93115234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011570243164896965, |
|
"rewards/margins": 14.813987731933594, |
|
"rewards/rejected": -14.80241584777832, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.2223728813559323, |
|
"grad_norm": 0.00021793936297483956, |
|
"learning_rate": 8.579715053846584e-08, |
|
"logits/chosen": -1.5967586040496826, |
|
"logits/rejected": -1.7385027408599854, |
|
"logps/chosen": -154.05177307128906, |
|
"logps/rejected": -800.445068359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.005721386056393385, |
|
"rewards/margins": 14.734086036682129, |
|
"rewards/rejected": -14.728364944458008, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.2359322033898303, |
|
"grad_norm": 0.0002391850164899579, |
|
"learning_rate": 8.299547101112466e-08, |
|
"logits/chosen": -1.6040098667144775, |
|
"logits/rejected": -1.767350435256958, |
|
"logps/chosen": -144.35110473632812, |
|
"logps/rejected": -783.85107421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00950249470770359, |
|
"rewards/margins": 14.55084228515625, |
|
"rewards/rejected": -14.560342788696289, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.249491525423729, |
|
"grad_norm": 0.00031210724967463475, |
|
"learning_rate": 8.023116649272357e-08, |
|
"logits/chosen": -1.6417827606201172, |
|
"logits/rejected": -1.843612790107727, |
|
"logps/chosen": -140.8085479736328, |
|
"logps/rejected": -832.3277587890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01454589981585741, |
|
"rewards/margins": 14.897970199584961, |
|
"rewards/rejected": -14.883424758911133, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.263050847457627, |
|
"grad_norm": 0.0001537621460843329, |
|
"learning_rate": 7.750485562460529e-08, |
|
"logits/chosen": -1.637737512588501, |
|
"logits/rejected": -1.838837742805481, |
|
"logps/chosen": -151.76361083984375, |
|
"logps/rejected": -879.8406982421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.028323961421847343, |
|
"rewards/margins": 15.002603530883789, |
|
"rewards/rejected": -14.974279403686523, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.2766101694915255, |
|
"grad_norm": 0.00014037312273128166, |
|
"learning_rate": 7.48171485452716e-08, |
|
"logits/chosen": -1.644343614578247, |
|
"logits/rejected": -1.8316127061843872, |
|
"logps/chosen": -148.27976989746094, |
|
"logps/rejected": -807.6490478515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.002391491085290909, |
|
"rewards/margins": 14.774595260620117, |
|
"rewards/rejected": -14.776987075805664, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.2901694915254236, |
|
"grad_norm": 0.0003568019109756037, |
|
"learning_rate": 7.216864675383566e-08, |
|
"logits/chosen": -1.7114001512527466, |
|
"logits/rejected": -1.8946892023086548, |
|
"logps/chosen": -136.16168212890625, |
|
"logps/rejected": -822.518310546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0228084959089756, |
|
"rewards/margins": 14.955730438232422, |
|
"rewards/rejected": -14.93292236328125, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.303728813559322, |
|
"grad_norm": 0.00020642497846486078, |
|
"learning_rate": 6.955994297540946e-08, |
|
"logits/chosen": -1.582494854927063, |
|
"logits/rejected": -1.7430905103683472, |
|
"logps/chosen": -127.01213073730469, |
|
"logps/rejected": -749.1889038085938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004720491822808981, |
|
"rewards/margins": 14.59855842590332, |
|
"rewards/rejected": -14.593838691711426, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.303728813559322, |
|
"eval_logits/chosen": -1.5455721616744995, |
|
"eval_logits/rejected": -1.6867108345031738, |
|
"eval_logps/chosen": -129.34518432617188, |
|
"eval_logps/rejected": -764.3837890625, |
|
"eval_loss": 8.27629833111132e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.022169284522533417, |
|
"eval_rewards/margins": 14.610172271728516, |
|
"eval_rewards/rejected": -14.588001251220703, |
|
"eval_runtime": 23.2684, |
|
"eval_samples_per_second": 4.298, |
|
"eval_steps_per_second": 1.074, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.31728813559322, |
|
"grad_norm": 0.0003301800906268837, |
|
"learning_rate": 6.699162102845371e-08, |
|
"logits/chosen": -1.6968494653701782, |
|
"logits/rejected": -1.8610255718231201, |
|
"logps/chosen": -144.05172729492188, |
|
"logps/rejected": -792.3526000976562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0033090743236243725, |
|
"rewards/margins": 14.642684936523438, |
|
"rewards/rejected": -14.645994186401367, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.3308474576271188, |
|
"grad_norm": 0.00022300666262349114, |
|
"learning_rate": 6.446425569412145e-08, |
|
"logits/chosen": -1.5895376205444336, |
|
"logits/rejected": -1.7664618492126465, |
|
"logps/chosen": -147.58518981933594, |
|
"logps/rejected": -821.5425415039062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.013544298708438873, |
|
"rewards/margins": 14.664928436279297, |
|
"rewards/rejected": -14.651383399963379, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.344406779661017, |
|
"grad_norm": 0.0002689016484107736, |
|
"learning_rate": 6.197841258762393e-08, |
|
"logits/chosen": -1.6732720136642456, |
|
"logits/rejected": -1.8248271942138672, |
|
"logps/chosen": -143.9190216064453, |
|
"logps/rejected": -798.0980224609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.002302667824551463, |
|
"rewards/margins": 14.700139999389648, |
|
"rewards/rejected": -14.70244312286377, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.3579661016949154, |
|
"grad_norm": 0.00013823061030867806, |
|
"learning_rate": 5.95346480316484e-08, |
|
"logits/chosen": -1.6567462682724, |
|
"logits/rejected": -1.8002218008041382, |
|
"logps/chosen": -133.57330322265625, |
|
"logps/rejected": -790.8118896484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01070284005254507, |
|
"rewards/margins": 14.786917686462402, |
|
"rewards/rejected": -14.776214599609375, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.3715254237288135, |
|
"grad_norm": 0.00012708429822748938, |
|
"learning_rate": 5.713350893185462e-08, |
|
"logits/chosen": -1.540064811706543, |
|
"logits/rejected": -1.6900475025177002, |
|
"logps/chosen": -139.0050048828125, |
|
"logps/rejected": -813.723388671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011193746700882912, |
|
"rewards/margins": 14.780536651611328, |
|
"rewards/rejected": -14.769343376159668, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.385084745762712, |
|
"grad_norm": 0.00031972660791255883, |
|
"learning_rate": 5.4775532654479703e-08, |
|
"logits/chosen": -1.6927651166915894, |
|
"logits/rejected": -1.8659099340438843, |
|
"logps/chosen": -140.10357666015625, |
|
"logps/rejected": -804.2578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.026886438950896263, |
|
"rewards/margins": 14.513021469116211, |
|
"rewards/rejected": -14.486133575439453, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.39864406779661, |
|
"grad_norm": 0.0002306535610201857, |
|
"learning_rate": 5.246124690607739e-08, |
|
"logits/chosen": -1.597672939300537, |
|
"logits/rejected": -1.7689580917358398, |
|
"logps/chosen": -136.6164093017578, |
|
"logps/rejected": -814.9773559570312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015394443646073341, |
|
"rewards/margins": 14.934272766113281, |
|
"rewards/rejected": -14.918877601623535, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.4122033898305086, |
|
"grad_norm": 0.00025827411726207306, |
|
"learning_rate": 5.019116961541928e-08, |
|
"logits/chosen": -1.6076412200927734, |
|
"logits/rejected": -1.787865400314331, |
|
"logps/chosen": -130.0896759033203, |
|
"logps/rejected": -771.5728149414062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.018673747777938843, |
|
"rewards/margins": 14.67123794555664, |
|
"rewards/rejected": -14.652565002441406, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.4257627118644067, |
|
"grad_norm": 0.0001607149373365815, |
|
"learning_rate": 4.796580881758394e-08, |
|
"logits/chosen": -1.5243449211120605, |
|
"logits/rejected": -1.7028474807739258, |
|
"logps/chosen": -135.9856719970703, |
|
"logps/rejected": -788.3903198242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.013456443324685097, |
|
"rewards/margins": 14.943410873413086, |
|
"rewards/rejected": -14.92995548248291, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.4393220338983053, |
|
"grad_norm": 0.0008019666936544235, |
|
"learning_rate": 4.5785662540261035e-08, |
|
"logits/chosen": -1.5732166767120361, |
|
"logits/rejected": -1.7345067262649536, |
|
"logps/chosen": -123.94300842285156, |
|
"logps/rejected": -809.0182495117188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009817011654376984, |
|
"rewards/margins": 14.806741714477539, |
|
"rewards/rejected": -14.796926498413086, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4393220338983053, |
|
"eval_logits/chosen": -1.5405449867248535, |
|
"eval_logits/rejected": -1.6869794130325317, |
|
"eval_logps/chosen": -129.44265747070312, |
|
"eval_logps/rejected": -764.5172729492188, |
|
"eval_loss": 8.009520229279588e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.012421955354511738, |
|
"eval_rewards/margins": 14.613780975341797, |
|
"eval_rewards/rejected": -14.601359367370605, |
|
"eval_runtime": 23.2676, |
|
"eval_samples_per_second": 4.298, |
|
"eval_steps_per_second": 1.074, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4528813559322034, |
|
"grad_norm": 0.0005455920009654483, |
|
"learning_rate": 4.365121869229399e-08, |
|
"logits/chosen": -1.6582889556884766, |
|
"logits/rejected": -1.8295698165893555, |
|
"logps/chosen": -117.7739486694336, |
|
"logps/rejected": -741.6177368164062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011549338698387146, |
|
"rewards/margins": 14.508467674255371, |
|
"rewards/rejected": -14.496919631958008, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.4664406779661014, |
|
"grad_norm": 0.00023816862497805363, |
|
"learning_rate": 4.1562954954488194e-08, |
|
"logits/chosen": -1.6208901405334473, |
|
"logits/rejected": -1.81270432472229, |
|
"logps/chosen": -137.12130737304688, |
|
"logps/rejected": -798.8505249023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011716622859239578, |
|
"rewards/margins": 14.657186508178711, |
|
"rewards/rejected": -14.64547061920166, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.0002564351546730618, |
|
"learning_rate": 3.952133867270749e-08, |
|
"logits/chosen": -1.6017396450042725, |
|
"logits/rejected": -1.760289192199707, |
|
"logps/chosen": -135.62570190429688, |
|
"logps/rejected": -787.9927978515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021429751068353653, |
|
"rewards/margins": 14.778738021850586, |
|
"rewards/rejected": -14.757308006286621, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.4935593220338985, |
|
"grad_norm": 0.00024431220204588973, |
|
"learning_rate": 3.7526826753284055e-08, |
|
"logits/chosen": -1.5733165740966797, |
|
"logits/rejected": -1.734438419342041, |
|
"logps/chosen": -135.39663696289062, |
|
"logps/rejected": -781.9830322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021723030135035515, |
|
"rewards/margins": 14.634653091430664, |
|
"rewards/rejected": -14.612930297851562, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.5071186440677966, |
|
"grad_norm": 0.0002804770855324591, |
|
"learning_rate": 3.5579865560764086e-08, |
|
"logits/chosen": -1.6363115310668945, |
|
"logits/rejected": -1.8157660961151123, |
|
"logps/chosen": -143.70252990722656, |
|
"logps/rejected": -803.8099365234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008160289376974106, |
|
"rewards/margins": 14.804468154907227, |
|
"rewards/rejected": -14.796307563781738, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.5206779661016947, |
|
"grad_norm": 0.00042995034846870784, |
|
"learning_rate": 3.3680890818013506e-08, |
|
"logits/chosen": -1.7073619365692139, |
|
"logits/rejected": -1.8638834953308105, |
|
"logps/chosen": -136.8450469970703, |
|
"logps/rejected": -822.551025390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.012727834284305573, |
|
"rewards/margins": 14.761212348937988, |
|
"rewards/rejected": -14.74848461151123, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.5342372881355932, |
|
"grad_norm": 0.0005052071520552922, |
|
"learning_rate": 3.183032750870443e-08, |
|
"logits/chosen": -1.7014129161834717, |
|
"logits/rejected": -1.890128493309021, |
|
"logps/chosen": -144.991943359375, |
|
"logps/rejected": -852.4500732421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.019090861082077026, |
|
"rewards/margins": 14.904670715332031, |
|
"rewards/rejected": -14.885579109191895, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.5477966101694918, |
|
"grad_norm": 0.00029171939654467954, |
|
"learning_rate": 3.002858978220535e-08, |
|
"logits/chosen": -1.6215015649795532, |
|
"logits/rejected": -1.8319523334503174, |
|
"logps/chosen": -148.42018127441406, |
|
"logps/rejected": -826.58349609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.017919525504112244, |
|
"rewards/margins": 14.893685340881348, |
|
"rewards/rejected": -14.875765800476074, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.56135593220339, |
|
"grad_norm": 0.00014374560387721727, |
|
"learning_rate": 2.8276080860896223e-08, |
|
"logits/chosen": -1.5488464832305908, |
|
"logits/rejected": -1.7413768768310547, |
|
"logps/chosen": -148.57704162597656, |
|
"logps/rejected": -838.784423828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015819568186998367, |
|
"rewards/margins": 15.020793914794922, |
|
"rewards/rejected": -15.004974365234375, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.574915254237288, |
|
"grad_norm": 0.00013430221339783704, |
|
"learning_rate": 2.65731929499286e-08, |
|
"logits/chosen": -1.6086156368255615, |
|
"logits/rejected": -1.7848008871078491, |
|
"logps/chosen": -144.15988159179688, |
|
"logps/rejected": -854.092529296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02485763281583786, |
|
"rewards/margins": 14.896612167358398, |
|
"rewards/rejected": -14.871755599975586, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.574915254237288, |
|
"eval_logits/chosen": -1.5360183715820312, |
|
"eval_logits/rejected": -1.686428189277649, |
|
"eval_logps/chosen": -129.38560485839844, |
|
"eval_logps/rejected": -764.64111328125, |
|
"eval_loss": 7.962561880958674e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.018127169460058212, |
|
"eval_rewards/margins": 14.63186264038086, |
|
"eval_rewards/rejected": -14.613737106323242, |
|
"eval_runtime": 23.1267, |
|
"eval_samples_per_second": 4.324, |
|
"eval_steps_per_second": 1.081, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.5884745762711865, |
|
"grad_norm": 0.0003822317334269131, |
|
"learning_rate": 2.492030714945162e-08, |
|
"logits/chosen": -1.5995548963546753, |
|
"logits/rejected": -1.76951003074646, |
|
"logps/chosen": -135.77032470703125, |
|
"logps/rejected": -790.2930908203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00021793553605675697, |
|
"rewards/margins": 14.569367408752441, |
|
"rewards/rejected": -14.569584846496582, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.6020338983050846, |
|
"grad_norm": 0.000792494216087496, |
|
"learning_rate": 2.3317793369322992e-08, |
|
"logits/chosen": -1.6509454250335693, |
|
"logits/rejected": -1.8071436882019043, |
|
"logps/chosen": -132.31130981445312, |
|
"logps/rejected": -799.173583984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.030081693083047867, |
|
"rewards/margins": 14.740245819091797, |
|
"rewards/rejected": -14.710164070129395, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.615593220338983, |
|
"grad_norm": 0.0002708186052822774, |
|
"learning_rate": 2.1766010246324795e-08, |
|
"logits/chosen": -1.5743701457977295, |
|
"logits/rejected": -1.7702776193618774, |
|
"logps/chosen": -147.4547119140625, |
|
"logps/rejected": -827.8794555664062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.006160350050777197, |
|
"rewards/margins": 14.79060173034668, |
|
"rewards/rejected": -14.784441947937012, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.629152542372881, |
|
"grad_norm": 0.00020442518354563292, |
|
"learning_rate": 2.026530506390156e-08, |
|
"logits/chosen": -1.5262815952301025, |
|
"logits/rejected": -1.6566275358200073, |
|
"logps/chosen": -147.53939819335938, |
|
"logps/rejected": -822.6124267578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014457417652010918, |
|
"rewards/margins": 14.995853424072266, |
|
"rewards/rejected": -14.981395721435547, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.6427118644067797, |
|
"grad_norm": 0.00014356861276548948, |
|
"learning_rate": 1.8816013674439885e-08, |
|
"logits/chosen": -1.6366736888885498, |
|
"logits/rejected": -1.816946268081665, |
|
"logps/chosen": -133.2000274658203, |
|
"logps/rejected": -794.9268798828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0276030246168375, |
|
"rewards/margins": 14.789546012878418, |
|
"rewards/rejected": -14.761942863464355, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.656271186440678, |
|
"grad_norm": 0.00018299695034949964, |
|
"learning_rate": 1.741846042410533e-08, |
|
"logits/chosen": -1.5427926778793335, |
|
"logits/rejected": -1.7075831890106201, |
|
"logps/chosen": -137.50941467285156, |
|
"logps/rejected": -807.8783569335938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010896590538322926, |
|
"rewards/margins": 14.576469421386719, |
|
"rewards/rejected": -14.565572738647461, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.6698305084745764, |
|
"grad_norm": 0.00016135810197605974, |
|
"learning_rate": 1.607295808025558e-08, |
|
"logits/chosen": -1.6068065166473389, |
|
"logits/rejected": -1.7660378217697144, |
|
"logps/chosen": -131.94476318359375, |
|
"logps/rejected": -811.60791015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.019821301102638245, |
|
"rewards/margins": 14.834625244140625, |
|
"rewards/rejected": -14.814804077148438, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.6833898305084745, |
|
"grad_norm": 0.00023692180234191582, |
|
"learning_rate": 1.4779807761443635e-08, |
|
"logits/chosen": -1.6394813060760498, |
|
"logits/rejected": -1.8306336402893066, |
|
"logps/chosen": -143.0768585205078, |
|
"logps/rejected": -815.6318359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.021681049838662148, |
|
"rewards/margins": 14.815585136413574, |
|
"rewards/rejected": -14.793903350830078, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.696949152542373, |
|
"grad_norm": 0.00026461222708204653, |
|
"learning_rate": 1.353929887002897e-08, |
|
"logits/chosen": -1.5212616920471191, |
|
"logits/rejected": -1.702871561050415, |
|
"logps/chosen": -137.1354522705078, |
|
"logps/rejected": -811.0231323242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.022010358050465584, |
|
"rewards/margins": 14.848760604858398, |
|
"rewards/rejected": -14.826749801635742, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.710508474576271, |
|
"grad_norm": 0.00034035092300092515, |
|
"learning_rate": 1.2351709027410145e-08, |
|
"logits/chosen": -1.5257236957550049, |
|
"logits/rejected": -1.6637029647827148, |
|
"logps/chosen": -125.89049530029297, |
|
"logps/rejected": -765.981201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02222251147031784, |
|
"rewards/margins": 14.72728157043457, |
|
"rewards/rejected": -14.705059051513672, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.710508474576271, |
|
"eval_logits/chosen": -1.5505026578903198, |
|
"eval_logits/rejected": -1.686079740524292, |
|
"eval_logps/chosen": -129.3984832763672, |
|
"eval_logps/rejected": -764.6372680664062, |
|
"eval_loss": 7.91568538716092e-07, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.016839873045682907, |
|
"eval_rewards/margins": 14.630191802978516, |
|
"eval_rewards/rejected": -14.613351821899414, |
|
"eval_runtime": 23.1883, |
|
"eval_samples_per_second": 4.313, |
|
"eval_steps_per_second": 1.078, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2211, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 145012404715520.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|