PhoenixB's picture
Training in progress, step 30, checkpoint
e6f4353 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0071111111111111115,
"grad_norm": 0.9622864127159119,
"learning_rate": 0.0,
"logits/chosen": -0.3361184597015381,
"logits/rejected": -0.38528943061828613,
"logps/chosen": -63.102577209472656,
"logps/rejected": -71.57357788085938,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.014222222222222223,
"grad_norm": 0.9525438547134399,
"learning_rate": 4e-05,
"logits/chosen": -0.35798677802085876,
"logits/rejected": -0.37521645426750183,
"logps/chosen": -64.16624450683594,
"logps/rejected": -75.4444808959961,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 2
},
{
"epoch": 0.021333333333333333,
"grad_norm": 1.0270054340362549,
"learning_rate": 8e-05,
"logits/chosen": -0.4199008643627167,
"logits/rejected": -0.3876492381095886,
"logps/chosen": -69.85212707519531,
"logps/rejected": -78.2700424194336,
"loss": 0.688,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.0030962349846959114,
"rewards/margins": 0.010574335232377052,
"rewards/rejected": -0.013670570217072964,
"step": 3
},
{
"epoch": 0.028444444444444446,
"grad_norm": 1.218668818473816,
"learning_rate": 0.00012,
"logits/chosen": -0.3441811203956604,
"logits/rejected": -0.42176032066345215,
"logps/chosen": -64.0100326538086,
"logps/rejected": -80.89389038085938,
"loss": 0.6599,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02109670080244541,
"rewards/margins": 0.06868590414524078,
"rewards/rejected": -0.08978260308504105,
"step": 4
},
{
"epoch": 0.035555555555555556,
"grad_norm": 0.9603152871131897,
"learning_rate": 0.00016,
"logits/chosen": -0.3670881390571594,
"logits/rejected": -0.415330708026886,
"logps/chosen": -63.949180603027344,
"logps/rejected": -74.87257385253906,
"loss": 0.5841,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.095927894115448,
"rewards/margins": 0.24052195250988007,
"rewards/rejected": -0.33644983172416687,
"step": 5
},
{
"epoch": 0.042666666666666665,
"grad_norm": 0.9287283420562744,
"learning_rate": 0.0002,
"logits/chosen": -0.40374666452407837,
"logits/rejected": -0.5151565074920654,
"logps/chosen": -66.59954833984375,
"logps/rejected": -80.40635681152344,
"loss": 0.4661,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.2986072897911072,
"rewards/margins": 0.585195004940033,
"rewards/rejected": -0.8838022947311401,
"step": 6
},
{
"epoch": 0.049777777777777775,
"grad_norm": 1.1165237426757812,
"learning_rate": 0.0001992114701314478,
"logits/chosen": -0.4537544250488281,
"logits/rejected": -0.6164412498474121,
"logps/chosen": -75.28590393066406,
"logps/rejected": -95.29585266113281,
"loss": 0.3027,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.8317357897758484,
"rewards/margins": 1.296346664428711,
"rewards/rejected": -2.128082513809204,
"step": 7
},
{
"epoch": 0.05688888888888889,
"grad_norm": 0.9855431914329529,
"learning_rate": 0.0001968583161128631,
"logits/chosen": -0.5777950286865234,
"logits/rejected": -0.7553069591522217,
"logps/chosen": -73.72053527832031,
"logps/rejected": -99.57424926757812,
"loss": 0.213,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.4827743172645569,
"rewards/margins": 1.8666009902954102,
"rewards/rejected": -2.3493752479553223,
"step": 8
},
{
"epoch": 0.064,
"grad_norm": 0.6502730250358582,
"learning_rate": 0.00019297764858882514,
"logits/chosen": -0.5584444403648376,
"logits/rejected": -0.8268064856529236,
"logps/chosen": -69.64950561523438,
"logps/rejected": -109.12018585205078,
"loss": 0.1107,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.2655777037143707,
"rewards/margins": 3.2483396530151367,
"rewards/rejected": -3.5139172077178955,
"step": 9
},
{
"epoch": 0.07111111111111111,
"grad_norm": 1.1950925588607788,
"learning_rate": 0.00018763066800438636,
"logits/chosen": -0.6012760996818542,
"logits/rejected": -0.9081443548202515,
"logps/chosen": -74.8345718383789,
"logps/rejected": -130.38316345214844,
"loss": 0.1323,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.7443335056304932,
"rewards/margins": 4.142513275146484,
"rewards/rejected": -4.886847019195557,
"step": 10
},
{
"epoch": 0.07822222222222222,
"grad_norm": 1.0462037324905396,
"learning_rate": 0.00018090169943749476,
"logits/chosen": -0.7805650234222412,
"logits/rejected": -1.0125257968902588,
"logps/chosen": -95.06548309326172,
"logps/rejected": -138.53005981445312,
"loss": 0.1366,
"rewards/accuracies": 0.96875,
"rewards/chosen": -1.786426305770874,
"rewards/margins": 3.8278050422668457,
"rewards/rejected": -5.614231109619141,
"step": 11
},
{
"epoch": 0.08533333333333333,
"grad_norm": 1.5766606330871582,
"learning_rate": 0.00017289686274214118,
"logits/chosen": -0.6917849779129028,
"logits/rejected": -1.037487268447876,
"logps/chosen": -84.8817138671875,
"logps/rejected": -152.9879150390625,
"loss": 0.0965,
"rewards/accuracies": 0.953125,
"rewards/chosen": -1.916405439376831,
"rewards/margins": 6.033751964569092,
"rewards/rejected": -7.950157165527344,
"step": 12
},
{
"epoch": 0.09244444444444444,
"grad_norm": 3.453160524368286,
"learning_rate": 0.000163742398974869,
"logits/chosen": -0.7463029623031616,
"logits/rejected": -1.1712623834609985,
"logps/chosen": -90.2038345336914,
"logps/rejected": -167.4069366455078,
"loss": 0.1399,
"rewards/accuracies": 0.921875,
"rewards/chosen": -2.7762935161590576,
"rewards/margins": 6.096405982971191,
"rewards/rejected": -8.872700691223145,
"step": 13
},
{
"epoch": 0.09955555555555555,
"grad_norm": 1.8318997621536255,
"learning_rate": 0.00015358267949789966,
"logits/chosen": -0.7684139609336853,
"logits/rejected": -1.1252436637878418,
"logps/chosen": -94.43594360351562,
"logps/rejected": -162.26513671875,
"loss": 0.1509,
"rewards/accuracies": 0.9375,
"rewards/chosen": -2.8215208053588867,
"rewards/margins": 5.9356255531311035,
"rewards/rejected": -8.757145881652832,
"step": 14
},
{
"epoch": 0.10666666666666667,
"grad_norm": 1.6136397123336792,
"learning_rate": 0.00014257792915650728,
"logits/chosen": -0.8049843311309814,
"logits/rejected": -1.0397863388061523,
"logps/chosen": -98.64947509765625,
"logps/rejected": -146.51724243164062,
"loss": 0.1929,
"rewards/accuracies": 0.921875,
"rewards/chosen": -3.0760955810546875,
"rewards/margins": 4.134893417358398,
"rewards/rejected": -7.210988998413086,
"step": 15
},
{
"epoch": 0.11377777777777778,
"grad_norm": 1.269758939743042,
"learning_rate": 0.00013090169943749476,
"logits/chosen": -0.8264177441596985,
"logits/rejected": -1.1195734739303589,
"logps/chosen": -98.1386947631836,
"logps/rejected": -157.2500457763672,
"loss": 0.1604,
"rewards/accuracies": 0.921875,
"rewards/chosen": -2.7527129650115967,
"rewards/margins": 5.01126766204834,
"rewards/rejected": -7.763980388641357,
"step": 16
},
{
"epoch": 0.12088888888888889,
"grad_norm": 1.2737501859664917,
"learning_rate": 0.00011873813145857249,
"logits/chosen": -0.8584411144256592,
"logits/rejected": -1.1313375234603882,
"logps/chosen": -89.23213195800781,
"logps/rejected": -154.30953979492188,
"loss": 0.1342,
"rewards/accuracies": 0.9375,
"rewards/chosen": -2.770606279373169,
"rewards/margins": 5.409886360168457,
"rewards/rejected": -8.180492401123047,
"step": 17
},
{
"epoch": 0.128,
"grad_norm": 0.9322919845581055,
"learning_rate": 0.00010627905195293135,
"logits/chosen": -0.8519349694252014,
"logits/rejected": -1.1860893964767456,
"logps/chosen": -94.88754272460938,
"logps/rejected": -156.7233123779297,
"loss": 0.1312,
"rewards/accuracies": 0.953125,
"rewards/chosen": -2.8481788635253906,
"rewards/margins": 5.50647497177124,
"rewards/rejected": -8.354653358459473,
"step": 18
},
{
"epoch": 0.1351111111111111,
"grad_norm": 1.2062392234802246,
"learning_rate": 9.372094804706867e-05,
"logits/chosen": -0.9854850769042969,
"logits/rejected": -1.2079541683197021,
"logps/chosen": -106.10295104980469,
"logps/rejected": -164.16534423828125,
"loss": 0.1747,
"rewards/accuracies": 0.9375,
"rewards/chosen": -4.063753604888916,
"rewards/margins": 5.2118425369262695,
"rewards/rejected": -9.275596618652344,
"step": 19
},
{
"epoch": 0.14222222222222222,
"grad_norm": 0.9117494225502014,
"learning_rate": 8.126186854142752e-05,
"logits/chosen": -0.8748866319656372,
"logits/rejected": -1.2855944633483887,
"logps/chosen": -95.78570556640625,
"logps/rejected": -183.678955078125,
"loss": 0.0888,
"rewards/accuracies": 0.953125,
"rewards/chosen": -3.073942184448242,
"rewards/margins": 7.4819488525390625,
"rewards/rejected": -10.555891036987305,
"step": 20
},
{
"epoch": 0.14933333333333335,
"grad_norm": 2.010823965072632,
"learning_rate": 6.909830056250527e-05,
"logits/chosen": -0.9118089079856873,
"logits/rejected": -1.2503169775009155,
"logps/chosen": -98.3197021484375,
"logps/rejected": -162.0580596923828,
"loss": 0.1889,
"rewards/accuracies": 0.90625,
"rewards/chosen": -3.5207109451293945,
"rewards/margins": 5.349686145782471,
"rewards/rejected": -8.870397567749023,
"step": 21
},
{
"epoch": 0.15644444444444444,
"grad_norm": 1.245758056640625,
"learning_rate": 5.7422070843492734e-05,
"logits/chosen": -0.9743110537528992,
"logits/rejected": -1.208943247795105,
"logps/chosen": -98.93869018554688,
"logps/rejected": -163.2411651611328,
"loss": 0.1492,
"rewards/accuracies": 0.953125,
"rewards/chosen": -3.0828969478607178,
"rewards/margins": 5.751500606536865,
"rewards/rejected": -8.834397315979004,
"step": 22
},
{
"epoch": 0.16355555555555557,
"grad_norm": 0.8686921000480652,
"learning_rate": 4.6417320502100316e-05,
"logits/chosen": -0.8691350817680359,
"logits/rejected": -1.1770260334014893,
"logps/chosen": -87.81027221679688,
"logps/rejected": -163.592529296875,
"loss": 0.111,
"rewards/accuracies": 0.953125,
"rewards/chosen": -2.3160297870635986,
"rewards/margins": 6.70501184463501,
"rewards/rejected": -9.021041870117188,
"step": 23
},
{
"epoch": 0.17066666666666666,
"grad_norm": 1.3923372030258179,
"learning_rate": 3.6257601025131026e-05,
"logits/chosen": -1.0141935348510742,
"logits/rejected": -1.2782260179519653,
"logps/chosen": -103.1563949584961,
"logps/rejected": -172.56581115722656,
"loss": 0.102,
"rewards/accuracies": 0.96875,
"rewards/chosen": -3.0495543479919434,
"rewards/margins": 6.135320663452148,
"rewards/rejected": -9.184874534606934,
"step": 24
},
{
"epoch": 0.17777777777777778,
"grad_norm": 1.2816081047058105,
"learning_rate": 2.7103137257858868e-05,
"logits/chosen": -0.9182357788085938,
"logits/rejected": -1.2490135431289673,
"logps/chosen": -90.02302551269531,
"logps/rejected": -160.6967315673828,
"loss": 0.1513,
"rewards/accuracies": 0.96875,
"rewards/chosen": -2.6499993801116943,
"rewards/margins": 5.982861518859863,
"rewards/rejected": -8.632862091064453,
"step": 25
},
{
"epoch": 0.18488888888888888,
"grad_norm": 1.1344267129898071,
"learning_rate": 1.9098300562505266e-05,
"logits/chosen": -0.979172945022583,
"logits/rejected": -1.309502124786377,
"logps/chosen": -94.45278930664062,
"logps/rejected": -169.9461669921875,
"loss": 0.0974,
"rewards/accuracies": 0.96875,
"rewards/chosen": -2.4992923736572266,
"rewards/margins": 6.363759517669678,
"rewards/rejected": -8.863051414489746,
"step": 26
},
{
"epoch": 0.192,
"grad_norm": 0.9824000597000122,
"learning_rate": 1.2369331995613665e-05,
"logits/chosen": -0.9647933840751648,
"logits/rejected": -1.2633442878723145,
"logps/chosen": -94.56600189208984,
"logps/rejected": -156.5263671875,
"loss": 0.1367,
"rewards/accuracies": 0.921875,
"rewards/chosen": -2.7392513751983643,
"rewards/margins": 5.356634140014648,
"rewards/rejected": -8.09588623046875,
"step": 27
},
{
"epoch": 0.1991111111111111,
"grad_norm": 1.7272309064865112,
"learning_rate": 7.022351411174866e-06,
"logits/chosen": -1.0286459922790527,
"logits/rejected": -1.320716142654419,
"logps/chosen": -98.15447235107422,
"logps/rejected": -169.14036560058594,
"loss": 0.2012,
"rewards/accuracies": 0.90625,
"rewards/chosen": -2.8377060890197754,
"rewards/margins": 6.118044853210449,
"rewards/rejected": -8.955750465393066,
"step": 28
},
{
"epoch": 0.20622222222222222,
"grad_norm": 1.1793086528778076,
"learning_rate": 3.1416838871368924e-06,
"logits/chosen": -0.9110668897628784,
"logits/rejected": -1.2544306516647339,
"logps/chosen": -87.0140609741211,
"logps/rejected": -160.61866760253906,
"loss": 0.1207,
"rewards/accuracies": 0.9375,
"rewards/chosen": -2.3407022953033447,
"rewards/margins": 6.302703380584717,
"rewards/rejected": -8.64340591430664,
"step": 29
},
{
"epoch": 0.21333333333333335,
"grad_norm": 0.6413570046424866,
"learning_rate": 7.885298685522235e-07,
"logits/chosen": -0.9873690605163574,
"logits/rejected": -1.3247737884521484,
"logps/chosen": -95.96586608886719,
"logps/rejected": -178.8674774169922,
"loss": 0.0721,
"rewards/accuracies": 0.984375,
"rewards/chosen": -2.3605031967163086,
"rewards/margins": 6.789929389953613,
"rewards/rejected": -9.150433540344238,
"step": 30
}
],
"logging_steps": 1,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}