|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.21333333333333335, |
|
"eval_steps": 500, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0071111111111111115, |
|
"grad_norm": 0.9622864127159119, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.3361184597015381, |
|
"logits/rejected": -0.38528943061828613, |
|
"logps/chosen": -63.102577209472656, |
|
"logps/rejected": -71.57357788085938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.014222222222222223, |
|
"grad_norm": 0.9525438547134399, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -0.35798677802085876, |
|
"logits/rejected": -0.37521645426750183, |
|
"logps/chosen": -64.16624450683594, |
|
"logps/rejected": -75.4444808959961, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.021333333333333333, |
|
"grad_norm": 1.0270054340362549, |
|
"learning_rate": 8e-05, |
|
"logits/chosen": -0.4199008643627167, |
|
"logits/rejected": -0.3876492381095886, |
|
"logps/chosen": -69.85212707519531, |
|
"logps/rejected": -78.2700424194336, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.0030962349846959114, |
|
"rewards/margins": 0.010574335232377052, |
|
"rewards/rejected": -0.013670570217072964, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.028444444444444446, |
|
"grad_norm": 1.218668818473816, |
|
"learning_rate": 0.00012, |
|
"logits/chosen": -0.3441811203956604, |
|
"logits/rejected": -0.42176032066345215, |
|
"logps/chosen": -64.0100326538086, |
|
"logps/rejected": -80.89389038085938, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.02109670080244541, |
|
"rewards/margins": 0.06868590414524078, |
|
"rewards/rejected": -0.08978260308504105, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.035555555555555556, |
|
"grad_norm": 0.9603152871131897, |
|
"learning_rate": 0.00016, |
|
"logits/chosen": -0.3670881390571594, |
|
"logits/rejected": -0.415330708026886, |
|
"logps/chosen": -63.949180603027344, |
|
"logps/rejected": -74.87257385253906, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.095927894115448, |
|
"rewards/margins": 0.24052195250988007, |
|
"rewards/rejected": -0.33644983172416687, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.042666666666666665, |
|
"grad_norm": 0.9287283420562744, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": -0.40374666452407837, |
|
"logits/rejected": -0.5151565074920654, |
|
"logps/chosen": -66.59954833984375, |
|
"logps/rejected": -80.40635681152344, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.2986072897911072, |
|
"rewards/margins": 0.585195004940033, |
|
"rewards/rejected": -0.8838022947311401, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.049777777777777775, |
|
"grad_norm": 1.1165237426757812, |
|
"learning_rate": 0.0001992114701314478, |
|
"logits/chosen": -0.4537544250488281, |
|
"logits/rejected": -0.6164412498474121, |
|
"logps/chosen": -75.28590393066406, |
|
"logps/rejected": -95.29585266113281, |
|
"loss": 0.3027, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.8317357897758484, |
|
"rewards/margins": 1.296346664428711, |
|
"rewards/rejected": -2.128082513809204, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05688888888888889, |
|
"grad_norm": 0.9855431914329529, |
|
"learning_rate": 0.0001968583161128631, |
|
"logits/chosen": -0.5777950286865234, |
|
"logits/rejected": -0.7553069591522217, |
|
"logps/chosen": -73.72053527832031, |
|
"logps/rejected": -99.57424926757812, |
|
"loss": 0.213, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.4827743172645569, |
|
"rewards/margins": 1.8666009902954102, |
|
"rewards/rejected": -2.3493752479553223, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 0.6502730250358582, |
|
"learning_rate": 0.00019297764858882514, |
|
"logits/chosen": -0.5584444403648376, |
|
"logits/rejected": -0.8268064856529236, |
|
"logps/chosen": -69.64950561523438, |
|
"logps/rejected": -109.12018585205078, |
|
"loss": 0.1107, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.2655777037143707, |
|
"rewards/margins": 3.2483396530151367, |
|
"rewards/rejected": -3.5139172077178955, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.07111111111111111, |
|
"grad_norm": 1.1950925588607788, |
|
"learning_rate": 0.00018763066800438636, |
|
"logits/chosen": -0.6012760996818542, |
|
"logits/rejected": -0.9081443548202515, |
|
"logps/chosen": -74.8345718383789, |
|
"logps/rejected": -130.38316345214844, |
|
"loss": 0.1323, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.7443335056304932, |
|
"rewards/margins": 4.142513275146484, |
|
"rewards/rejected": -4.886847019195557, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07822222222222222, |
|
"grad_norm": 1.0462037324905396, |
|
"learning_rate": 0.00018090169943749476, |
|
"logits/chosen": -0.7805650234222412, |
|
"logits/rejected": -1.0125257968902588, |
|
"logps/chosen": -95.06548309326172, |
|
"logps/rejected": -138.53005981445312, |
|
"loss": 0.1366, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.786426305770874, |
|
"rewards/margins": 3.8278050422668457, |
|
"rewards/rejected": -5.614231109619141, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.08533333333333333, |
|
"grad_norm": 1.5766606330871582, |
|
"learning_rate": 0.00017289686274214118, |
|
"logits/chosen": -0.6917849779129028, |
|
"logits/rejected": -1.037487268447876, |
|
"logps/chosen": -84.8817138671875, |
|
"logps/rejected": -152.9879150390625, |
|
"loss": 0.0965, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -1.916405439376831, |
|
"rewards/margins": 6.033751964569092, |
|
"rewards/rejected": -7.950157165527344, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09244444444444444, |
|
"grad_norm": 3.453160524368286, |
|
"learning_rate": 0.000163742398974869, |
|
"logits/chosen": -0.7463029623031616, |
|
"logits/rejected": -1.1712623834609985, |
|
"logps/chosen": -90.2038345336914, |
|
"logps/rejected": -167.4069366455078, |
|
"loss": 0.1399, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.7762935161590576, |
|
"rewards/margins": 6.096405982971191, |
|
"rewards/rejected": -8.872700691223145, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09955555555555555, |
|
"grad_norm": 1.8318997621536255, |
|
"learning_rate": 0.00015358267949789966, |
|
"logits/chosen": -0.7684139609336853, |
|
"logits/rejected": -1.1252436637878418, |
|
"logps/chosen": -94.43594360351562, |
|
"logps/rejected": -162.26513671875, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.8215208053588867, |
|
"rewards/margins": 5.9356255531311035, |
|
"rewards/rejected": -8.757145881652832, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 1.6136397123336792, |
|
"learning_rate": 0.00014257792915650728, |
|
"logits/chosen": -0.8049843311309814, |
|
"logits/rejected": -1.0397863388061523, |
|
"logps/chosen": -98.64947509765625, |
|
"logps/rejected": -146.51724243164062, |
|
"loss": 0.1929, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -3.0760955810546875, |
|
"rewards/margins": 4.134893417358398, |
|
"rewards/rejected": -7.210988998413086, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11377777777777778, |
|
"grad_norm": 1.269758939743042, |
|
"learning_rate": 0.00013090169943749476, |
|
"logits/chosen": -0.8264177441596985, |
|
"logits/rejected": -1.1195734739303589, |
|
"logps/chosen": -98.1386947631836, |
|
"logps/rejected": -157.2500457763672, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.7527129650115967, |
|
"rewards/margins": 5.01126766204834, |
|
"rewards/rejected": -7.763980388641357, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.12088888888888889, |
|
"grad_norm": 1.2737501859664917, |
|
"learning_rate": 0.00011873813145857249, |
|
"logits/chosen": -0.8584411144256592, |
|
"logits/rejected": -1.1313375234603882, |
|
"logps/chosen": -89.23213195800781, |
|
"logps/rejected": -154.30953979492188, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.770606279373169, |
|
"rewards/margins": 5.409886360168457, |
|
"rewards/rejected": -8.180492401123047, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.9322919845581055, |
|
"learning_rate": 0.00010627905195293135, |
|
"logits/chosen": -0.8519349694252014, |
|
"logits/rejected": -1.1860893964767456, |
|
"logps/chosen": -94.88754272460938, |
|
"logps/rejected": -156.7233123779297, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -2.8481788635253906, |
|
"rewards/margins": 5.50647497177124, |
|
"rewards/rejected": -8.354653358459473, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1351111111111111, |
|
"grad_norm": 1.2062392234802246, |
|
"learning_rate": 9.372094804706867e-05, |
|
"logits/chosen": -0.9854850769042969, |
|
"logits/rejected": -1.2079541683197021, |
|
"logps/chosen": -106.10295104980469, |
|
"logps/rejected": -164.16534423828125, |
|
"loss": 0.1747, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.063753604888916, |
|
"rewards/margins": 5.2118425369262695, |
|
"rewards/rejected": -9.275596618652344, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 0.9117494225502014, |
|
"learning_rate": 8.126186854142752e-05, |
|
"logits/chosen": -0.8748866319656372, |
|
"logits/rejected": -1.2855944633483887, |
|
"logps/chosen": -95.78570556640625, |
|
"logps/rejected": -183.678955078125, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -3.073942184448242, |
|
"rewards/margins": 7.4819488525390625, |
|
"rewards/rejected": -10.555891036987305, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14933333333333335, |
|
"grad_norm": 2.010823965072632, |
|
"learning_rate": 6.909830056250527e-05, |
|
"logits/chosen": -0.9118089079856873, |
|
"logits/rejected": -1.2503169775009155, |
|
"logps/chosen": -98.3197021484375, |
|
"logps/rejected": -162.0580596923828, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.5207109451293945, |
|
"rewards/margins": 5.349686145782471, |
|
"rewards/rejected": -8.870397567749023, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.15644444444444444, |
|
"grad_norm": 1.245758056640625, |
|
"learning_rate": 5.7422070843492734e-05, |
|
"logits/chosen": -0.9743110537528992, |
|
"logits/rejected": -1.208943247795105, |
|
"logps/chosen": -98.93869018554688, |
|
"logps/rejected": -163.2411651611328, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -3.0828969478607178, |
|
"rewards/margins": 5.751500606536865, |
|
"rewards/rejected": -8.834397315979004, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.16355555555555557, |
|
"grad_norm": 0.8686921000480652, |
|
"learning_rate": 4.6417320502100316e-05, |
|
"logits/chosen": -0.8691350817680359, |
|
"logits/rejected": -1.1770260334014893, |
|
"logps/chosen": -87.81027221679688, |
|
"logps/rejected": -163.592529296875, |
|
"loss": 0.111, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -2.3160297870635986, |
|
"rewards/margins": 6.70501184463501, |
|
"rewards/rejected": -9.021041870117188, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.17066666666666666, |
|
"grad_norm": 1.3923372030258179, |
|
"learning_rate": 3.6257601025131026e-05, |
|
"logits/chosen": -1.0141935348510742, |
|
"logits/rejected": -1.2782260179519653, |
|
"logps/chosen": -103.1563949584961, |
|
"logps/rejected": -172.56581115722656, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.0495543479919434, |
|
"rewards/margins": 6.135320663452148, |
|
"rewards/rejected": -9.184874534606934, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 1.2816081047058105, |
|
"learning_rate": 2.7103137257858868e-05, |
|
"logits/chosen": -0.9182357788085938, |
|
"logits/rejected": -1.2490135431289673, |
|
"logps/chosen": -90.02302551269531, |
|
"logps/rejected": -160.6967315673828, |
|
"loss": 0.1513, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.6499993801116943, |
|
"rewards/margins": 5.982861518859863, |
|
"rewards/rejected": -8.632862091064453, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.18488888888888888, |
|
"grad_norm": 1.1344267129898071, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"logits/chosen": -0.979172945022583, |
|
"logits/rejected": -1.309502124786377, |
|
"logps/chosen": -94.45278930664062, |
|
"logps/rejected": -169.9461669921875, |
|
"loss": 0.0974, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.4992923736572266, |
|
"rewards/margins": 6.363759517669678, |
|
"rewards/rejected": -8.863051414489746, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.9824000597000122, |
|
"learning_rate": 1.2369331995613665e-05, |
|
"logits/chosen": -0.9647933840751648, |
|
"logits/rejected": -1.2633442878723145, |
|
"logps/chosen": -94.56600189208984, |
|
"logps/rejected": -156.5263671875, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -2.7392513751983643, |
|
"rewards/margins": 5.356634140014648, |
|
"rewards/rejected": -8.09588623046875, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1991111111111111, |
|
"grad_norm": 1.7272309064865112, |
|
"learning_rate": 7.022351411174866e-06, |
|
"logits/chosen": -1.0286459922790527, |
|
"logits/rejected": -1.320716142654419, |
|
"logps/chosen": -98.15447235107422, |
|
"logps/rejected": -169.14036560058594, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.8377060890197754, |
|
"rewards/margins": 6.118044853210449, |
|
"rewards/rejected": -8.955750465393066, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.20622222222222222, |
|
"grad_norm": 1.1793086528778076, |
|
"learning_rate": 3.1416838871368924e-06, |
|
"logits/chosen": -0.9110668897628784, |
|
"logits/rejected": -1.2544306516647339, |
|
"logps/chosen": -87.0140609741211, |
|
"logps/rejected": -160.61866760253906, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.3407022953033447, |
|
"rewards/margins": 6.302703380584717, |
|
"rewards/rejected": -8.64340591430664, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 0.6413570046424866, |
|
"learning_rate": 7.885298685522235e-07, |
|
"logits/chosen": -0.9873690605163574, |
|
"logits/rejected": -1.3247737884521484, |
|
"logps/chosen": -95.96586608886719, |
|
"logps/rejected": -178.8674774169922, |
|
"loss": 0.0721, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -2.3605031967163086, |
|
"rewards/margins": 6.789929389953613, |
|
"rewards/rejected": -9.150433540344238, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 30, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|