|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9880609304240429, |
|
"eval_steps": 500, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 0.5102696418762207, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.088521957397461, |
|
"logits/rejected": 10.263787269592285, |
|
"logps/chosen": -0.9118157029151917, |
|
"logps/rejected": -0.9621729850769043, |
|
"loss": 1.3897, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -1.8236314058303833, |
|
"rewards/margins": 0.10071463882923126, |
|
"rewards/rejected": -1.9243459701538086, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.9815747141838074, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 10.592972755432129, |
|
"logits/rejected": 10.720216751098633, |
|
"logps/chosen": -0.945902943611145, |
|
"logps/rejected": -1.0317902565002441, |
|
"loss": 1.3077, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.89180588722229, |
|
"rewards/margins": 0.1717745065689087, |
|
"rewards/rejected": -2.0635805130004883, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.9049758315086365, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 10.041976928710938, |
|
"logits/rejected": 10.399316787719727, |
|
"logps/chosen": -1.0869810581207275, |
|
"logps/rejected": -1.1895216703414917, |
|
"loss": 1.346, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.173962116241455, |
|
"rewards/margins": 0.20508113503456116, |
|
"rewards/rejected": -2.3790433406829834, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 1.8911848068237305, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.243470191955566, |
|
"logits/rejected": 10.443375587463379, |
|
"logps/chosen": -0.966098427772522, |
|
"logps/rejected": -1.0040662288665771, |
|
"loss": 1.4032, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.932196855545044, |
|
"rewards/margins": 0.07593552023172379, |
|
"rewards/rejected": -2.0081324577331543, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 0.6135074496269226, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": 10.439040184020996, |
|
"logits/rejected": 10.739177703857422, |
|
"logps/chosen": -0.9262609481811523, |
|
"logps/rejected": -0.9657196998596191, |
|
"loss": 1.3727, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.8525218963623047, |
|
"rewards/margins": 0.07891744375228882, |
|
"rewards/rejected": -1.9314393997192383, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 0.5990542769432068, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": 10.910269737243652, |
|
"logits/rejected": 11.204473495483398, |
|
"logps/chosen": -0.9439595341682434, |
|
"logps/rejected": -1.0420396327972412, |
|
"loss": 1.3491, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8879190683364868, |
|
"rewards/margins": 0.196160227060318, |
|
"rewards/rejected": -2.0840792655944824, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 1.3676807880401611, |
|
"learning_rate": 8.75e-07, |
|
"logits/chosen": 9.873465538024902, |
|
"logits/rejected": 10.022269248962402, |
|
"logps/chosen": -0.8941428661346436, |
|
"logps/rejected": -1.0010743141174316, |
|
"loss": 1.3507, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.788285732269287, |
|
"rewards/margins": 0.21386288106441498, |
|
"rewards/rejected": -2.0021486282348633, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 1.7690002918243408, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 10.597719192504883, |
|
"logits/rejected": 10.780376434326172, |
|
"logps/chosen": -0.9080270528793335, |
|
"logps/rejected": -0.9909782409667969, |
|
"loss": 1.3305, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.816054105758667, |
|
"rewards/margins": 0.16590236127376556, |
|
"rewards/rejected": -1.9819564819335938, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 1.056247353553772, |
|
"learning_rate": 9.994504457428556e-07, |
|
"logits/chosen": 10.446786880493164, |
|
"logits/rejected": 10.839168548583984, |
|
"logps/chosen": -1.1091859340667725, |
|
"logps/rejected": -1.0694739818572998, |
|
"loss": 1.5127, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -2.218371868133545, |
|
"rewards/margins": -0.07942387461662292, |
|
"rewards/rejected": -2.1389479637145996, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 2.076240062713623, |
|
"learning_rate": 9.97802991010949e-07, |
|
"logits/chosen": 10.343971252441406, |
|
"logits/rejected": 10.492179870605469, |
|
"logps/chosen": -0.9705042839050293, |
|
"logps/rejected": -0.9916192889213562, |
|
"loss": 1.4611, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.9410085678100586, |
|
"rewards/margins": 0.04223020374774933, |
|
"rewards/rejected": -1.9832385778427124, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 1.13358736038208, |
|
"learning_rate": 9.950612572673255e-07, |
|
"logits/chosen": 10.49313735961914, |
|
"logits/rejected": 10.680143356323242, |
|
"logps/chosen": -1.1081148386001587, |
|
"logps/rejected": -1.223841667175293, |
|
"loss": 1.3449, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.2162296772003174, |
|
"rewards/margins": 0.23145350813865662, |
|
"rewards/rejected": -2.447683334350586, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 1.1638388633728027, |
|
"learning_rate": 9.912312714377879e-07, |
|
"logits/chosen": 10.328557014465332, |
|
"logits/rejected": 10.365793228149414, |
|
"logps/chosen": -0.9249637722969055, |
|
"logps/rejected": -0.9842618703842163, |
|
"loss": 1.351, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -1.849927544593811, |
|
"rewards/margins": 0.11859625577926636, |
|
"rewards/rejected": -1.9685237407684326, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17126389460683408, |
|
"grad_norm": 0.9941473007202148, |
|
"learning_rate": 9.863214526624063e-07, |
|
"logits/chosen": 9.909621238708496, |
|
"logits/rejected": 10.248769760131836, |
|
"logps/chosen": -0.9913480877876282, |
|
"logps/rejected": -1.1752512454986572, |
|
"loss": 1.2767, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.9826961755752563, |
|
"rewards/margins": 0.36780619621276855, |
|
"rewards/rejected": -2.3505024909973145, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 1.3600269556045532, |
|
"learning_rate": 9.8034259378842e-07, |
|
"logits/chosen": 10.472145080566406, |
|
"logits/rejected": 10.987956047058105, |
|
"logps/chosen": -0.9751205444335938, |
|
"logps/rejected": -1.0532664060592651, |
|
"loss": 1.3626, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.9502410888671875, |
|
"rewards/margins": 0.15629185736179352, |
|
"rewards/rejected": -2.1065328121185303, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.3477364182472229, |
|
"learning_rate": 9.73307837645217e-07, |
|
"logits/chosen": 10.209980010986328, |
|
"logits/rejected": 10.457592964172363, |
|
"logps/chosen": -0.9716652035713196, |
|
"logps/rejected": -1.0775285959243774, |
|
"loss": 1.3132, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.9433304071426392, |
|
"rewards/margins": 0.2117268592119217, |
|
"rewards/rejected": -2.155057191848755, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2107863318237958, |
|
"grad_norm": 0.975040853023529, |
|
"learning_rate": 9.652326481535433e-07, |
|
"logits/chosen": 10.770889282226562, |
|
"logits/rejected": 11.057292938232422, |
|
"logps/chosen": -0.9405269026756287, |
|
"logps/rejected": -0.9816387891769409, |
|
"loss": 1.4142, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.8810538053512573, |
|
"rewards/margins": 0.08222392201423645, |
|
"rewards/rejected": -1.9632775783538818, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22396047756278303, |
|
"grad_norm": 0.47477808594703674, |
|
"learning_rate": 9.561347763324483e-07, |
|
"logits/chosen": 10.384443283081055, |
|
"logits/rejected": 10.546278953552246, |
|
"logps/chosen": -0.9655594229698181, |
|
"logps/rejected": -0.9963297247886658, |
|
"loss": 1.4058, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.9311188459396362, |
|
"rewards/margins": 0.061540693044662476, |
|
"rewards/rejected": -1.9926594495773315, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.23713462330177026, |
|
"grad_norm": 0.9369856119155884, |
|
"learning_rate": 9.460342212786932e-07, |
|
"logits/chosen": 10.428518295288086, |
|
"logits/rejected": 10.742942810058594, |
|
"logps/chosen": -1.0061042308807373, |
|
"logps/rejected": -0.9558196067810059, |
|
"loss": 1.5279, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -2.0122084617614746, |
|
"rewards/margins": -0.10056903213262558, |
|
"rewards/rejected": -1.9116392135620117, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2503087690407575, |
|
"grad_norm": 0.6867318153381348, |
|
"learning_rate": 9.349531862043951e-07, |
|
"logits/chosen": 10.536978721618652, |
|
"logits/rejected": 10.496305465698242, |
|
"logps/chosen": -1.0390187501907349, |
|
"logps/rejected": -1.1175179481506348, |
|
"loss": 1.3199, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -2.0780375003814697, |
|
"rewards/margins": 0.1569983810186386, |
|
"rewards/rejected": -2.2350358963012695, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 1.6277413368225098, |
|
"learning_rate": 9.229160296295487e-07, |
|
"logits/chosen": 10.487991333007812, |
|
"logits/rejected": 10.849261283874512, |
|
"logps/chosen": -1.005416989326477, |
|
"logps/rejected": -1.0715974569320679, |
|
"loss": 1.3772, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -2.010833978652954, |
|
"rewards/margins": 0.1323607861995697, |
|
"rewards/rejected": -2.1431949138641357, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 1.1200292110443115, |
|
"learning_rate": 9.099492118367122e-07, |
|
"logits/chosen": 10.419047355651855, |
|
"logits/rejected": 10.756099700927734, |
|
"logps/chosen": -0.9289014935493469, |
|
"logps/rejected": -1.020794153213501, |
|
"loss": 1.3128, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8578029870986938, |
|
"rewards/margins": 0.18378500640392303, |
|
"rewards/rejected": -2.041588306427002, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.28983120625771924, |
|
"grad_norm": 1.3312275409698486, |
|
"learning_rate": 8.960812367055646e-07, |
|
"logits/chosen": 10.375129699707031, |
|
"logits/rejected": 10.722561836242676, |
|
"logps/chosen": -1.0812478065490723, |
|
"logps/rejected": -1.104426383972168, |
|
"loss": 1.438, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -2.1624956130981445, |
|
"rewards/margins": 0.04635699465870857, |
|
"rewards/rejected": -2.208852767944336, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3030053519967065, |
|
"grad_norm": 0.36184069514274597, |
|
"learning_rate": 8.813425890551909e-07, |
|
"logits/chosen": 10.423131942749023, |
|
"logits/rejected": 10.69787311553955, |
|
"logps/chosen": -1.0428340435028076, |
|
"logps/rejected": -1.0485754013061523, |
|
"loss": 1.4543, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0856680870056152, |
|
"rewards/margins": 0.01148274727165699, |
|
"rewards/rejected": -2.0971508026123047, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3161794977356937, |
|
"grad_norm": 0.5770995020866394, |
|
"learning_rate": 8.657656676318345e-07, |
|
"logits/chosen": 10.316368103027344, |
|
"logits/rejected": 10.479074478149414, |
|
"logps/chosen": -0.9681622982025146, |
|
"logps/rejected": -1.050377607345581, |
|
"loss": 1.3743, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.9363245964050293, |
|
"rewards/margins": 0.16443049907684326, |
|
"rewards/rejected": -2.100755214691162, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 2.933119297027588, |
|
"learning_rate": 8.493847138894208e-07, |
|
"logits/chosen": 10.537033081054688, |
|
"logits/rejected": 10.929938316345215, |
|
"logps/chosen": -0.8849888443946838, |
|
"logps/rejected": -0.9310915470123291, |
|
"loss": 1.3801, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.7699776887893677, |
|
"rewards/margins": 0.09220531582832336, |
|
"rewards/rejected": -1.8621830940246582, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.34252778921366817, |
|
"grad_norm": 0.6123780012130737, |
|
"learning_rate": 8.322357367194108e-07, |
|
"logits/chosen": 10.728922843933105, |
|
"logits/rejected": 10.86151123046875, |
|
"logps/chosen": -0.9060953855514526, |
|
"logps/rejected": -0.9559903144836426, |
|
"loss": 1.3675, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8121907711029053, |
|
"rewards/margins": 0.09978975355625153, |
|
"rewards/rejected": -1.9119806289672852, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3557019349526554, |
|
"grad_norm": 1.8228118419647217, |
|
"learning_rate": 8.143564332954425e-07, |
|
"logits/chosen": 10.698800086975098, |
|
"logits/rejected": 11.009087562561035, |
|
"logps/chosen": -1.0113650560379028, |
|
"logps/rejected": -1.0222567319869995, |
|
"loss": 1.4265, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -2.0227301120758057, |
|
"rewards/margins": 0.021783310920000076, |
|
"rewards/rejected": -2.044513463973999, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.5645075440406799, |
|
"learning_rate": 7.957861062067612e-07, |
|
"logits/chosen": 10.079174995422363, |
|
"logits/rejected": 10.544219970703125, |
|
"logps/chosen": -0.905964195728302, |
|
"logps/rejected": -0.9862813949584961, |
|
"loss": 1.3617, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.811928391456604, |
|
"rewards/margins": 0.16063442826271057, |
|
"rewards/rejected": -1.9725627899169922, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3820502264306299, |
|
"grad_norm": 2.0421998500823975, |
|
"learning_rate": 7.765655770625996e-07, |
|
"logits/chosen": 10.456260681152344, |
|
"logits/rejected": 10.634986877441406, |
|
"logps/chosen": -0.8751351237297058, |
|
"logps/rejected": -0.9531751871109009, |
|
"loss": 1.3385, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.7502702474594116, |
|
"rewards/margins": 0.15608005225658417, |
|
"rewards/rejected": -1.9063503742218018, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 0.5317772030830383, |
|
"learning_rate": 7.567370967574209e-07, |
|
"logits/chosen": 10.539588928222656, |
|
"logits/rejected": 11.065520286560059, |
|
"logps/chosen": -0.9008954763412476, |
|
"logps/rejected": -0.9472513794898987, |
|
"loss": 1.3915, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.8017909526824951, |
|
"rewards/margins": 0.0927119180560112, |
|
"rewards/rejected": -1.8945027589797974, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4083985179086044, |
|
"grad_norm": 0.929427981376648, |
|
"learning_rate": 7.363442525942826e-07, |
|
"logits/chosen": 10.132299423217773, |
|
"logits/rejected": 10.620383262634277, |
|
"logps/chosen": -0.8826152086257935, |
|
"logps/rejected": -0.9457932114601135, |
|
"loss": 1.3575, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.765230417251587, |
|
"rewards/margins": 0.12635602056980133, |
|
"rewards/rejected": -1.891586422920227, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4215726636475916, |
|
"grad_norm": 0.287424236536026, |
|
"learning_rate": 7.154318724704851e-07, |
|
"logits/chosen": 10.419651985168457, |
|
"logits/rejected": 10.7128267288208, |
|
"logps/chosen": -0.9339243173599243, |
|
"logps/rejected": -0.95993971824646, |
|
"loss": 1.4233, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8678486347198486, |
|
"rewards/margins": 0.052030690014362335, |
|
"rewards/rejected": -1.91987943649292, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.43474680938657884, |
|
"grad_norm": 0.8097081184387207, |
|
"learning_rate": 6.940459263361248e-07, |
|
"logits/chosen": 10.180652618408203, |
|
"logits/rejected": 10.812442779541016, |
|
"logps/chosen": -0.9092710614204407, |
|
"logps/rejected": -0.8953740000724792, |
|
"loss": 1.4729, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.8185421228408813, |
|
"rewards/margins": -0.027794085443019867, |
|
"rewards/rejected": -1.7907480001449585, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.44792095512556607, |
|
"grad_norm": 0.6889541745185852, |
|
"learning_rate": 6.722334251421664e-07, |
|
"logits/chosen": 10.093196868896484, |
|
"logits/rejected": 10.485943794250488, |
|
"logps/chosen": -1.0066155195236206, |
|
"logps/rejected": -1.033683180809021, |
|
"loss": 1.46, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -2.013231039047241, |
|
"rewards/margins": 0.054135359823703766, |
|
"rewards/rejected": -2.067366361618042, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 1.549689531326294, |
|
"learning_rate": 6.500423175001703e-07, |
|
"logits/chosen": 10.556034088134766, |
|
"logits/rejected": 10.797924995422363, |
|
"logps/chosen": -0.9926838278770447, |
|
"logps/rejected": -1.0250440835952759, |
|
"loss": 1.45, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.9853676557540894, |
|
"rewards/margins": 0.06472064554691315, |
|
"rewards/rejected": -2.0500881671905518, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.47426924660354053, |
|
"grad_norm": 0.8994730710983276, |
|
"learning_rate": 6.275213842808382e-07, |
|
"logits/chosen": 10.474414825439453, |
|
"logits/rejected": 10.848793983459473, |
|
"logps/chosen": -0.875037431716919, |
|
"logps/rejected": -0.9078534841537476, |
|
"loss": 1.3809, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.750074863433838, |
|
"rewards/margins": 0.0656321793794632, |
|
"rewards/rejected": -1.8157069683074951, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4874433923425278, |
|
"grad_norm": 0.47796007990837097, |
|
"learning_rate": 6.047201313830723e-07, |
|
"logits/chosen": 10.430042266845703, |
|
"logits/rejected": 10.628593444824219, |
|
"logps/chosen": -0.9276302456855774, |
|
"logps/rejected": -1.0414646863937378, |
|
"loss": 1.2843, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -1.8552604913711548, |
|
"rewards/margins": 0.22766906023025513, |
|
"rewards/rejected": -2.0829293727874756, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.500617538081515, |
|
"grad_norm": 1.8155546188354492, |
|
"learning_rate": 5.816886809092651e-07, |
|
"logits/chosen": 10.451735496520996, |
|
"logits/rejected": 10.694074630737305, |
|
"logps/chosen": -0.9971815943717957, |
|
"logps/rejected": -1.0258358716964722, |
|
"loss": 1.4073, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.9943631887435913, |
|
"rewards/margins": 0.057308606803417206, |
|
"rewards/rejected": -2.0516717433929443, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5137916838205022, |
|
"grad_norm": 1.4760124683380127, |
|
"learning_rate": 5.584776609860413e-07, |
|
"logits/chosen": 10.413124084472656, |
|
"logits/rejected": 10.561273574829102, |
|
"logps/chosen": -0.9041131734848022, |
|
"logps/rejected": -0.8810122609138489, |
|
"loss": 1.4648, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.8082263469696045, |
|
"rewards/margins": -0.046201735734939575, |
|
"rewards/rejected": -1.7620245218276978, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5269658295594895, |
|
"grad_norm": 0.5761233568191528, |
|
"learning_rate": 5.351380944726465e-07, |
|
"logits/chosen": 10.481398582458496, |
|
"logits/rejected": 10.893855094909668, |
|
"logps/chosen": -0.8643166422843933, |
|
"logps/rejected": -0.9709457159042358, |
|
"loss": 1.283, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.7286332845687866, |
|
"rewards/margins": 0.21325840055942535, |
|
"rewards/rejected": -1.9418914318084717, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5401399752984768, |
|
"grad_norm": 2.2331480979919434, |
|
"learning_rate": 5.117212868016303e-07, |
|
"logits/chosen": 10.834035873413086, |
|
"logits/rejected": 10.830018997192383, |
|
"logps/chosen": -0.8488631248474121, |
|
"logps/rejected": -0.896608293056488, |
|
"loss": 1.3632, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.6977262496948242, |
|
"rewards/margins": 0.09549038112163544, |
|
"rewards/rejected": -1.793216586112976, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 0.8566221594810486, |
|
"learning_rate": 4.882787131983697e-07, |
|
"logits/chosen": 10.066476821899414, |
|
"logits/rejected": 10.382861137390137, |
|
"logps/chosen": -0.7725206613540649, |
|
"logps/rejected": -0.8358003497123718, |
|
"loss": 1.3445, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.5450413227081299, |
|
"rewards/margins": 0.12655934691429138, |
|
"rewards/rejected": -1.6716006994247437, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5664882667764513, |
|
"grad_norm": 1.208364725112915, |
|
"learning_rate": 4.648619055273537e-07, |
|
"logits/chosen": 9.969656944274902, |
|
"logits/rejected": 10.292926788330078, |
|
"logps/chosen": -0.8092156052589417, |
|
"logps/rejected": -0.9089646339416504, |
|
"loss": 1.302, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -1.6184312105178833, |
|
"rewards/margins": 0.19949808716773987, |
|
"rewards/rejected": -1.8179292678833008, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5796624125154385, |
|
"grad_norm": 0.6581346988677979, |
|
"learning_rate": 4.4152233901395875e-07, |
|
"logits/chosen": 10.372085571289062, |
|
"logits/rejected": 10.586714744567871, |
|
"logps/chosen": -0.8714113235473633, |
|
"logps/rejected": -0.8994148969650269, |
|
"loss": 1.384, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.7428226470947266, |
|
"rewards/margins": 0.05600719153881073, |
|
"rewards/rejected": -1.7988297939300537, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5928365582544257, |
|
"grad_norm": 0.4405794143676758, |
|
"learning_rate": 4.183113190907348e-07, |
|
"logits/chosen": 10.185098648071289, |
|
"logits/rejected": 10.43220329284668, |
|
"logps/chosen": -0.8584359288215637, |
|
"logps/rejected": -0.9151718616485596, |
|
"loss": 1.3552, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.7168718576431274, |
|
"rewards/margins": 0.11347203701734543, |
|
"rewards/rejected": -1.8303437232971191, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.606010703993413, |
|
"grad_norm": 3.3551409244537354, |
|
"learning_rate": 3.9527986861692785e-07, |
|
"logits/chosen": 10.364571571350098, |
|
"logits/rejected": 10.566858291625977, |
|
"logps/chosen": -0.8840798735618591, |
|
"logps/rejected": -0.9646981954574585, |
|
"loss": 1.343, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.7681597471237183, |
|
"rewards/margins": 0.16123665869235992, |
|
"rewards/rejected": -1.929396390914917, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.6191848497324002, |
|
"grad_norm": 0.4494335949420929, |
|
"learning_rate": 3.724786157191618e-07, |
|
"logits/chosen": 10.511517524719238, |
|
"logits/rejected": 10.93988037109375, |
|
"logps/chosen": -0.9318357706069946, |
|
"logps/rejected": -0.9812297224998474, |
|
"loss": 1.3768, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.8636715412139893, |
|
"rewards/margins": 0.09878775477409363, |
|
"rewards/rejected": -1.9624594449996948, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6323589954713874, |
|
"grad_norm": 0.5882486701011658, |
|
"learning_rate": 3.499576824998297e-07, |
|
"logits/chosen": 10.835572242736816, |
|
"logits/rejected": 10.978731155395508, |
|
"logps/chosen": -0.8741555213928223, |
|
"logps/rejected": -0.974146842956543, |
|
"loss": 1.3111, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7483110427856445, |
|
"rewards/margins": 0.1999826580286026, |
|
"rewards/rejected": -1.948293685913086, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.3740289807319641, |
|
"learning_rate": 3.2776657485783356e-07, |
|
"logits/chosen": 10.704976081848145, |
|
"logits/rejected": 11.161348342895508, |
|
"logps/chosen": -0.8556671142578125, |
|
"logps/rejected": -0.9082369804382324, |
|
"loss": 1.3737, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.711334228515625, |
|
"rewards/margins": 0.105139821767807, |
|
"rewards/rejected": -1.8164739608764648, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.6587072869493619, |
|
"grad_norm": 3.8892507553100586, |
|
"learning_rate": 3.0595407366387506e-07, |
|
"logits/chosen": 10.376428604125977, |
|
"logits/rejected": 10.601147651672363, |
|
"logps/chosen": -0.7704035639762878, |
|
"logps/rejected": -0.7979400753974915, |
|
"loss": 1.396, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.5408071279525757, |
|
"rewards/margins": 0.05507296323776245, |
|
"rewards/rejected": -1.595880150794983, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6718814326883491, |
|
"grad_norm": 0.2746015787124634, |
|
"learning_rate": 2.845681275295148e-07, |
|
"logits/chosen": 9.914533615112305, |
|
"logits/rejected": 10.269950866699219, |
|
"logps/chosen": -0.8250231742858887, |
|
"logps/rejected": -0.8431719541549683, |
|
"loss": 1.3893, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.6500463485717773, |
|
"rewards/margins": 0.03629734367132187, |
|
"rewards/rejected": -1.6863439083099365, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.6850555784273363, |
|
"grad_norm": 0.24610334634780884, |
|
"learning_rate": 2.636557474057173e-07, |
|
"logits/chosen": 10.291726112365723, |
|
"logits/rejected": 10.781953811645508, |
|
"logps/chosen": -0.8618065714836121, |
|
"logps/rejected": -0.9313206076622009, |
|
"loss": 1.3446, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.7236131429672241, |
|
"rewards/margins": 0.13902832567691803, |
|
"rewards/rejected": -1.8626412153244019, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6982297241663236, |
|
"grad_norm": 0.15776224434375763, |
|
"learning_rate": 2.432629032425789e-07, |
|
"logits/chosen": 10.308675765991211, |
|
"logits/rejected": 10.378983497619629, |
|
"logps/chosen": -0.7716065645217896, |
|
"logps/rejected": -0.8652410507202148, |
|
"loss": 1.3032, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.543213129043579, |
|
"rewards/margins": 0.18726901710033417, |
|
"rewards/rejected": -1.7304821014404297, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.7114038699053108, |
|
"grad_norm": 1.2065925598144531, |
|
"learning_rate": 2.2343442293740028e-07, |
|
"logits/chosen": 10.570337295532227, |
|
"logits/rejected": 10.923002243041992, |
|
"logps/chosen": -0.9139507412910461, |
|
"logps/rejected": -0.9362795352935791, |
|
"loss": 1.4161, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.8279014825820923, |
|
"rewards/margins": 0.04465775564312935, |
|
"rewards/rejected": -1.8725590705871582, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.724578015644298, |
|
"grad_norm": 1.3746451139450073, |
|
"learning_rate": 2.0421389379323877e-07, |
|
"logits/chosen": 10.228102684020996, |
|
"logits/rejected": 10.668277740478516, |
|
"logps/chosen": -0.9302918910980225, |
|
"logps/rejected": -0.9963704347610474, |
|
"loss": 1.3653, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.860583782196045, |
|
"rewards/margins": 0.1321568638086319, |
|
"rewards/rejected": -1.9927408695220947, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 0.943622887134552, |
|
"learning_rate": 1.8564356670455767e-07, |
|
"logits/chosen": 10.320796966552734, |
|
"logits/rejected": 10.657548904418945, |
|
"logps/chosen": -0.8569545149803162, |
|
"logps/rejected": -0.9385513067245483, |
|
"loss": 1.3233, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.7139090299606323, |
|
"rewards/margins": 0.16319361329078674, |
|
"rewards/rejected": -1.8771026134490967, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7509263071222725, |
|
"grad_norm": 0.6669130325317383, |
|
"learning_rate": 1.6776426328058919e-07, |
|
"logits/chosen": 10.639445304870605, |
|
"logits/rejected": 10.801530838012695, |
|
"logps/chosen": -0.894336462020874, |
|
"logps/rejected": -0.9663994312286377, |
|
"loss": 1.3622, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.788672924041748, |
|
"rewards/margins": 0.1441258043050766, |
|
"rewards/rejected": -1.9327988624572754, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.7641004528612598, |
|
"grad_norm": 0.5335331559181213, |
|
"learning_rate": 1.5061528611057915e-07, |
|
"logits/chosen": 10.268043518066406, |
|
"logits/rejected": 10.705906867980957, |
|
"logps/chosen": -0.8205227255821228, |
|
"logps/rejected": -0.9216040372848511, |
|
"loss": 1.3441, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.6410454511642456, |
|
"rewards/margins": 0.20216265320777893, |
|
"rewards/rejected": -1.8432080745697021, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7772745986002471, |
|
"grad_norm": 0.4525432884693146, |
|
"learning_rate": 1.3423433236816562e-07, |
|
"logits/chosen": 10.535262107849121, |
|
"logits/rejected": 10.616332054138184, |
|
"logps/chosen": -0.8587080836296082, |
|
"logps/rejected": -0.9249216914176941, |
|
"loss": 1.3529, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.7174161672592163, |
|
"rewards/margins": 0.13242727518081665, |
|
"rewards/rejected": -1.8498433828353882, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.7904487443392343, |
|
"grad_norm": 0.22125543653964996, |
|
"learning_rate": 1.1865741094480908e-07, |
|
"logits/chosen": 10.357114791870117, |
|
"logits/rejected": 10.703635215759277, |
|
"logps/chosen": -0.7657562494277954, |
|
"logps/rejected": -0.8366930484771729, |
|
"loss": 1.3376, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.5315124988555908, |
|
"rewards/margins": 0.1418738067150116, |
|
"rewards/rejected": -1.6733860969543457, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8036228900782215, |
|
"grad_norm": 1.0575804710388184, |
|
"learning_rate": 1.0391876329443533e-07, |
|
"logits/chosen": 10.659791946411133, |
|
"logits/rejected": 11.023832321166992, |
|
"logps/chosen": -0.9209427833557129, |
|
"logps/rejected": -0.8952279090881348, |
|
"loss": 1.4912, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.8418855667114258, |
|
"rewards/margins": -0.051429763436317444, |
|
"rewards/rejected": -1.7904558181762695, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.8167970358172087, |
|
"grad_norm": 0.6347566843032837, |
|
"learning_rate": 9.00507881632877e-08, |
|
"logits/chosen": 10.281499862670898, |
|
"logits/rejected": 10.502365112304688, |
|
"logps/chosen": -0.739016592502594, |
|
"logps/rejected": -0.7842726111412048, |
|
"loss": 1.3593, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -1.478033185005188, |
|
"rewards/margins": 0.09051205217838287, |
|
"rewards/rejected": -1.5685452222824097, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.829971181556196, |
|
"grad_norm": 0.721809983253479, |
|
"learning_rate": 7.708397037045128e-08, |
|
"logits/chosen": 10.44649600982666, |
|
"logits/rejected": 10.727692604064941, |
|
"logps/chosen": -0.816267728805542, |
|
"logps/rejected": -0.8777220249176025, |
|
"loss": 1.3528, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -1.632535457611084, |
|
"rewards/margins": 0.12290860712528229, |
|
"rewards/rejected": -1.755444049835205, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.8431453272951832, |
|
"grad_norm": 1.6892390251159668, |
|
"learning_rate": 6.504681379560489e-08, |
|
"logits/chosen": 10.393595695495605, |
|
"logits/rejected": 10.786395072937012, |
|
"logps/chosen": -0.8907488584518433, |
|
"logps/rejected": -0.9129250645637512, |
|
"loss": 1.3994, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -1.7814977169036865, |
|
"rewards/margins": 0.044352397322654724, |
|
"rewards/rejected": -1.8258501291275024, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.8563194730341704, |
|
"grad_norm": 0.44313424825668335, |
|
"learning_rate": 5.396577872130675e-08, |
|
"logits/chosen": 10.191991806030273, |
|
"logits/rejected": 10.535318374633789, |
|
"logps/chosen": -0.7858970761299133, |
|
"logps/rejected": -0.8720111846923828, |
|
"loss": 1.3057, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.5717941522598267, |
|
"rewards/margins": 0.17222818732261658, |
|
"rewards/rejected": -1.7440223693847656, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8694936187731577, |
|
"grad_norm": 0.7300361394882202, |
|
"learning_rate": 4.3865223667551686e-08, |
|
"logits/chosen": 10.755420684814453, |
|
"logits/rejected": 11.041528701782227, |
|
"logps/chosen": -0.8608958125114441, |
|
"logps/rejected": -0.918579638004303, |
|
"loss": 1.37, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.7217916250228882, |
|
"rewards/margins": 0.11536761373281479, |
|
"rewards/rejected": -1.837159276008606, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.8826677645121449, |
|
"grad_norm": 0.2531627118587494, |
|
"learning_rate": 3.476735184645674e-08, |
|
"logits/chosen": 10.110332489013672, |
|
"logits/rejected": 10.64801025390625, |
|
"logps/chosen": -0.8378317356109619, |
|
"logps/rejected": -0.8242462873458862, |
|
"loss": 1.4525, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.6756634712219238, |
|
"rewards/margins": -0.027171123772859573, |
|
"rewards/rejected": -1.6484925746917725, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.8958419102511321, |
|
"grad_norm": 0.41518858075141907, |
|
"learning_rate": 2.6692162354782943e-08, |
|
"logits/chosen": 10.715375900268555, |
|
"logits/rejected": 10.947654724121094, |
|
"logps/chosen": -0.8526171445846558, |
|
"logps/rejected": -0.869063138961792, |
|
"loss": 1.3975, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.7052342891693115, |
|
"rewards/margins": 0.032891832292079926, |
|
"rewards/rejected": -1.738126277923584, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.9090160559901194, |
|
"grad_norm": 0.5735509991645813, |
|
"learning_rate": 1.9657406211579962e-08, |
|
"logits/chosen": 10.639143943786621, |
|
"logits/rejected": 10.795159339904785, |
|
"logps/chosen": -0.8615827560424805, |
|
"logps/rejected": -0.9446123838424683, |
|
"loss": 1.3404, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.723165512084961, |
|
"rewards/margins": 0.16605933010578156, |
|
"rewards/rejected": -1.8892247676849365, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.6274116039276123, |
|
"learning_rate": 1.3678547337593494e-08, |
|
"logits/chosen": 10.071352005004883, |
|
"logits/rejected": 10.274941444396973, |
|
"logps/chosen": -0.8070065379142761, |
|
"logps/rejected": -0.8831263184547424, |
|
"loss": 1.3129, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -1.6140130758285522, |
|
"rewards/margins": 0.15223939716815948, |
|
"rewards/rejected": -1.7662526369094849, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9353643474680938, |
|
"grad_norm": 0.600334107875824, |
|
"learning_rate": 8.768728562211946e-09, |
|
"logits/chosen": 10.405344009399414, |
|
"logits/rejected": 10.851999282836914, |
|
"logps/chosen": -0.8354118466377258, |
|
"logps/rejected": -0.905266523361206, |
|
"loss": 1.3566, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.6708236932754517, |
|
"rewards/margins": 0.13970917463302612, |
|
"rewards/rejected": -1.810533046722412, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.9485384932070811, |
|
"grad_norm": 0.298692524433136, |
|
"learning_rate": 4.938742732674528e-09, |
|
"logits/chosen": 10.517662048339844, |
|
"logits/rejected": 10.823286056518555, |
|
"logps/chosen": -0.7517382502555847, |
|
"logps/rejected": -0.7897137999534607, |
|
"loss": 1.3684, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.5034765005111694, |
|
"rewards/margins": 0.0759509950876236, |
|
"rewards/rejected": -1.5794275999069214, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.9617126389460683, |
|
"grad_norm": 0.6866773962974548, |
|
"learning_rate": 2.1970089890509524e-09, |
|
"logits/chosen": 10.318891525268555, |
|
"logits/rejected": 10.598917961120605, |
|
"logps/chosen": -0.811228334903717, |
|
"logps/rejected": -0.8924151062965393, |
|
"loss": 1.316, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -1.622456669807434, |
|
"rewards/margins": 0.1623736321926117, |
|
"rewards/rejected": -1.7848302125930786, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.9748867846850556, |
|
"grad_norm": 2.966809034347534, |
|
"learning_rate": 5.495542571443135e-10, |
|
"logits/chosen": 10.162529945373535, |
|
"logits/rejected": 10.41257381439209, |
|
"logps/chosen": -0.7982832789421082, |
|
"logps/rejected": -0.8748475909233093, |
|
"loss": 1.339, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -1.5965665578842163, |
|
"rewards/margins": 0.15312884747982025, |
|
"rewards/rejected": -1.7496951818466187, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.9880609304240429, |
|
"grad_norm": 0.4032129645347595, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 10.685279846191406, |
|
"logits/rejected": 10.931794166564941, |
|
"logps/chosen": -0.7736072540283203, |
|
"logps/rejected": -0.8905478119850159, |
|
"loss": 1.3094, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -1.5472145080566406, |
|
"rewards/margins": 0.23388129472732544, |
|
"rewards/rejected": -1.7810956239700317, |
|
"step": 75 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|