math_phi3_simpo_20_0 / checkpoint-24 /trainer_state.json
lzc0525's picture
Upload folder using huggingface_hub
f8a43c5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3161794977356937,
"eval_steps": 500,
"global_step": 24,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013174145738987238,
"grad_norm": 0.5102696418762207,
"learning_rate": 1.25e-07,
"logits/chosen": 10.088521957397461,
"logits/rejected": 10.263787269592285,
"logps/chosen": -0.9118157029151917,
"logps/rejected": -0.9621729850769043,
"loss": 1.3897,
"rewards/accuracies": 0.5234375,
"rewards/chosen": -1.8236314058303833,
"rewards/margins": 0.10071463882923126,
"rewards/rejected": -1.9243459701538086,
"step": 1
},
{
"epoch": 0.026348291477974475,
"grad_norm": 0.9815747141838074,
"learning_rate": 2.5e-07,
"logits/chosen": 10.592972755432129,
"logits/rejected": 10.720216751098633,
"logps/chosen": -0.945902943611145,
"logps/rejected": -1.0317902565002441,
"loss": 1.3077,
"rewards/accuracies": 0.609375,
"rewards/chosen": -1.89180588722229,
"rewards/margins": 0.1717745065689087,
"rewards/rejected": -2.0635805130004883,
"step": 2
},
{
"epoch": 0.03952243721696171,
"grad_norm": 0.9049758315086365,
"learning_rate": 3.75e-07,
"logits/chosen": 10.041976928710938,
"logits/rejected": 10.399316787719727,
"logps/chosen": -1.0869810581207275,
"logps/rejected": -1.1895216703414917,
"loss": 1.346,
"rewards/accuracies": 0.578125,
"rewards/chosen": -2.173962116241455,
"rewards/margins": 0.20508113503456116,
"rewards/rejected": -2.3790433406829834,
"step": 3
},
{
"epoch": 0.05269658295594895,
"grad_norm": 1.8911848068237305,
"learning_rate": 5e-07,
"logits/chosen": 10.243470191955566,
"logits/rejected": 10.443375587463379,
"logps/chosen": -0.966098427772522,
"logps/rejected": -1.0040662288665771,
"loss": 1.4032,
"rewards/accuracies": 0.546875,
"rewards/chosen": -1.932196855545044,
"rewards/margins": 0.07593552023172379,
"rewards/rejected": -2.0081324577331543,
"step": 4
},
{
"epoch": 0.06587072869493618,
"grad_norm": 0.6135074496269226,
"learning_rate": 6.249999999999999e-07,
"logits/chosen": 10.439040184020996,
"logits/rejected": 10.739177703857422,
"logps/chosen": -0.9262609481811523,
"logps/rejected": -0.9657196998596191,
"loss": 1.3727,
"rewards/accuracies": 0.5546875,
"rewards/chosen": -1.8525218963623047,
"rewards/margins": 0.07891744375228882,
"rewards/rejected": -1.9314393997192383,
"step": 5
},
{
"epoch": 0.07904487443392343,
"grad_norm": 0.5990542769432068,
"learning_rate": 7.5e-07,
"logits/chosen": 10.910269737243652,
"logits/rejected": 11.204473495483398,
"logps/chosen": -0.9439595341682434,
"logps/rejected": -1.0420396327972412,
"loss": 1.3491,
"rewards/accuracies": 0.59375,
"rewards/chosen": -1.8879190683364868,
"rewards/margins": 0.196160227060318,
"rewards/rejected": -2.0840792655944824,
"step": 6
},
{
"epoch": 0.09221902017291066,
"grad_norm": 1.3676807880401611,
"learning_rate": 8.75e-07,
"logits/chosen": 9.873465538024902,
"logits/rejected": 10.022269248962402,
"logps/chosen": -0.8941428661346436,
"logps/rejected": -1.0010743141174316,
"loss": 1.3507,
"rewards/accuracies": 0.625,
"rewards/chosen": -1.788285732269287,
"rewards/margins": 0.21386288106441498,
"rewards/rejected": -2.0021486282348633,
"step": 7
},
{
"epoch": 0.1053931659118979,
"grad_norm": 1.7690002918243408,
"learning_rate": 1e-06,
"logits/chosen": 10.597719192504883,
"logits/rejected": 10.780376434326172,
"logps/chosen": -0.9080270528793335,
"logps/rejected": -0.9909782409667969,
"loss": 1.3305,
"rewards/accuracies": 0.640625,
"rewards/chosen": -1.816054105758667,
"rewards/margins": 0.16590236127376556,
"rewards/rejected": -1.9819564819335938,
"step": 8
},
{
"epoch": 0.11856731165088513,
"grad_norm": 1.056247353553772,
"learning_rate": 9.994504457428556e-07,
"logits/chosen": 10.446786880493164,
"logits/rejected": 10.839168548583984,
"logps/chosen": -1.1091859340667725,
"logps/rejected": -1.0694739818572998,
"loss": 1.5127,
"rewards/accuracies": 0.5390625,
"rewards/chosen": -2.218371868133545,
"rewards/margins": -0.07942387461662292,
"rewards/rejected": -2.1389479637145996,
"step": 9
},
{
"epoch": 0.13174145738987236,
"grad_norm": 2.076240062713623,
"learning_rate": 9.97802991010949e-07,
"logits/chosen": 10.343971252441406,
"logits/rejected": 10.492179870605469,
"logps/chosen": -0.9705042839050293,
"logps/rejected": -0.9916192889213562,
"loss": 1.4611,
"rewards/accuracies": 0.53125,
"rewards/chosen": -1.9410085678100586,
"rewards/margins": 0.04223020374774933,
"rewards/rejected": -1.9832385778427124,
"step": 10
},
{
"epoch": 0.14491560312885962,
"grad_norm": 1.13358736038208,
"learning_rate": 9.950612572673255e-07,
"logits/chosen": 10.49313735961914,
"logits/rejected": 10.680143356323242,
"logps/chosen": -1.1081148386001587,
"logps/rejected": -1.223841667175293,
"loss": 1.3449,
"rewards/accuracies": 0.59375,
"rewards/chosen": -2.2162296772003174,
"rewards/margins": 0.23145350813865662,
"rewards/rejected": -2.447683334350586,
"step": 11
},
{
"epoch": 0.15808974886784685,
"grad_norm": 1.1638388633728027,
"learning_rate": 9.912312714377879e-07,
"logits/chosen": 10.328557014465332,
"logits/rejected": 10.365793228149414,
"logps/chosen": -0.9249637722969055,
"logps/rejected": -0.9842618703842163,
"loss": 1.351,
"rewards/accuracies": 0.6015625,
"rewards/chosen": -1.849927544593811,
"rewards/margins": 0.11859625577926636,
"rewards/rejected": -1.9685237407684326,
"step": 12
},
{
"epoch": 0.17126389460683408,
"grad_norm": 0.9941473007202148,
"learning_rate": 9.863214526624063e-07,
"logits/chosen": 9.909621238708496,
"logits/rejected": 10.248769760131836,
"logps/chosen": -0.9913480877876282,
"logps/rejected": -1.1752512454986572,
"loss": 1.2767,
"rewards/accuracies": 0.5703125,
"rewards/chosen": -1.9826961755752563,
"rewards/margins": 0.36780619621276855,
"rewards/rejected": -2.3505024909973145,
"step": 13
},
{
"epoch": 0.1844380403458213,
"grad_norm": 1.3600269556045532,
"learning_rate": 9.8034259378842e-07,
"logits/chosen": 10.472145080566406,
"logits/rejected": 10.987956047058105,
"logps/chosen": -0.9751205444335938,
"logps/rejected": -1.0532664060592651,
"loss": 1.3626,
"rewards/accuracies": 0.578125,
"rewards/chosen": -1.9502410888671875,
"rewards/margins": 0.15629185736179352,
"rewards/rejected": -2.1065328121185303,
"step": 14
},
{
"epoch": 0.19761218608480857,
"grad_norm": 0.3477364182472229,
"learning_rate": 9.73307837645217e-07,
"logits/chosen": 10.209980010986328,
"logits/rejected": 10.457592964172363,
"logps/chosen": -0.9716652035713196,
"logps/rejected": -1.0775285959243774,
"loss": 1.3132,
"rewards/accuracies": 0.5859375,
"rewards/chosen": -1.9433304071426392,
"rewards/margins": 0.2117268592119217,
"rewards/rejected": -2.155057191848755,
"step": 15
},
{
"epoch": 0.2107863318237958,
"grad_norm": 0.975040853023529,
"learning_rate": 9.652326481535433e-07,
"logits/chosen": 10.770889282226562,
"logits/rejected": 11.057292938232422,
"logps/chosen": -0.9405269026756287,
"logps/rejected": -0.9816387891769409,
"loss": 1.4142,
"rewards/accuracies": 0.5078125,
"rewards/chosen": -1.8810538053512573,
"rewards/margins": 0.08222392201423645,
"rewards/rejected": -1.9632775783538818,
"step": 16
},
{
"epoch": 0.22396047756278303,
"grad_norm": 0.47477808594703674,
"learning_rate": 9.561347763324483e-07,
"logits/chosen": 10.384443283081055,
"logits/rejected": 10.546278953552246,
"logps/chosen": -0.9655594229698181,
"logps/rejected": -0.9963297247886658,
"loss": 1.4058,
"rewards/accuracies": 0.578125,
"rewards/chosen": -1.9311188459396362,
"rewards/margins": 0.061540693044662476,
"rewards/rejected": -1.9926594495773315,
"step": 17
},
{
"epoch": 0.23713462330177026,
"grad_norm": 0.9369856119155884,
"learning_rate": 9.460342212786932e-07,
"logits/chosen": 10.428518295288086,
"logits/rejected": 10.742942810058594,
"logps/chosen": -1.0061042308807373,
"logps/rejected": -0.9558196067810059,
"loss": 1.5279,
"rewards/accuracies": 0.5390625,
"rewards/chosen": -2.0122084617614746,
"rewards/margins": -0.10056903213262558,
"rewards/rejected": -1.9116392135620117,
"step": 18
},
{
"epoch": 0.2503087690407575,
"grad_norm": 0.6867318153381348,
"learning_rate": 9.349531862043951e-07,
"logits/chosen": 10.536978721618652,
"logits/rejected": 10.496305465698242,
"logps/chosen": -1.0390187501907349,
"logps/rejected": -1.1175179481506348,
"loss": 1.3199,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -2.0780375003814697,
"rewards/margins": 0.1569983810186386,
"rewards/rejected": -2.2350358963012695,
"step": 19
},
{
"epoch": 0.2634829147797447,
"grad_norm": 1.6277413368225098,
"learning_rate": 9.229160296295487e-07,
"logits/chosen": 10.487991333007812,
"logits/rejected": 10.849261283874512,
"logps/chosen": -1.005416989326477,
"logps/rejected": -1.0715974569320679,
"loss": 1.3772,
"rewards/accuracies": 0.6328125,
"rewards/chosen": -2.010833978652954,
"rewards/margins": 0.1323607861995697,
"rewards/rejected": -2.1431949138641357,
"step": 20
},
{
"epoch": 0.276657060518732,
"grad_norm": 1.1200292110443115,
"learning_rate": 9.099492118367122e-07,
"logits/chosen": 10.419047355651855,
"logits/rejected": 10.756099700927734,
"logps/chosen": -0.9289014935493469,
"logps/rejected": -1.020794153213501,
"loss": 1.3128,
"rewards/accuracies": 0.625,
"rewards/chosen": -1.8578029870986938,
"rewards/margins": 0.18378500640392303,
"rewards/rejected": -2.041588306427002,
"step": 21
},
{
"epoch": 0.28983120625771924,
"grad_norm": 1.3312275409698486,
"learning_rate": 8.960812367055646e-07,
"logits/chosen": 10.375129699707031,
"logits/rejected": 10.722561836242676,
"logps/chosen": -1.0812478065490723,
"logps/rejected": -1.104426383972168,
"loss": 1.438,
"rewards/accuracies": 0.5703125,
"rewards/chosen": -2.1624956130981445,
"rewards/margins": 0.04635699465870857,
"rewards/rejected": -2.208852767944336,
"step": 22
},
{
"epoch": 0.3030053519967065,
"grad_norm": 0.36184069514274597,
"learning_rate": 8.813425890551909e-07,
"logits/chosen": 10.423131942749023,
"logits/rejected": 10.69787311553955,
"logps/chosen": -1.0428340435028076,
"logps/rejected": -1.0485754013061523,
"loss": 1.4543,
"rewards/accuracies": 0.5625,
"rewards/chosen": -2.0856680870056152,
"rewards/margins": 0.01148274727165699,
"rewards/rejected": -2.0971508026123047,
"step": 23
},
{
"epoch": 0.3161794977356937,
"grad_norm": 0.5770995020866394,
"learning_rate": 8.657656676318345e-07,
"logits/chosen": 10.316368103027344,
"logits/rejected": 10.479074478149414,
"logps/chosen": -0.9681622982025146,
"logps/rejected": -1.050377607345581,
"loss": 1.3743,
"rewards/accuracies": 0.515625,
"rewards/chosen": -1.9363245964050293,
"rewards/margins": 0.16443049907684326,
"rewards/rejected": -2.100755214691162,
"step": 24
}
],
"logging_steps": 1,
"max_steps": 75,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 12,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}