|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9485384932070811, |
|
"eval_steps": 500, |
|
"global_step": 72, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 0.538081705570221, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.038352012634277, |
|
"logits/rejected": 10.592904090881348, |
|
"logps/chosen": -0.6228358745574951, |
|
"logps/rejected": -0.6871199011802673, |
|
"loss": 1.342, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.2456717491149902, |
|
"rewards/margins": 0.12856802344322205, |
|
"rewards/rejected": -1.3742398023605347, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.6521235108375549, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 10.320584297180176, |
|
"logits/rejected": 10.721946716308594, |
|
"logps/chosen": -0.7115719318389893, |
|
"logps/rejected": -0.788784384727478, |
|
"loss": 1.3147, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.4231438636779785, |
|
"rewards/margins": 0.15442489087581635, |
|
"rewards/rejected": -1.577568769454956, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.8797138929367065, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 9.899504661560059, |
|
"logits/rejected": 10.505952835083008, |
|
"logps/chosen": -0.8225007057189941, |
|
"logps/rejected": -0.8832307457923889, |
|
"loss": 1.3674, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.6450014114379883, |
|
"rewards/margins": 0.12146000564098358, |
|
"rewards/rejected": -1.7664614915847778, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 1.9139935970306396, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.082985877990723, |
|
"logits/rejected": 10.576549530029297, |
|
"logps/chosen": -0.6892099976539612, |
|
"logps/rejected": -0.7180394530296326, |
|
"loss": 1.4038, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.3784199953079224, |
|
"rewards/margins": 0.05765870213508606, |
|
"rewards/rejected": -1.4360789060592651, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 0.8647859692573547, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": 10.318564414978027, |
|
"logits/rejected": 11.072587966918945, |
|
"logps/chosen": -0.6658570766448975, |
|
"logps/rejected": -0.6663312911987305, |
|
"loss": 1.4062, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.331714153289795, |
|
"rewards/margins": 0.0009482596069574356, |
|
"rewards/rejected": -1.332662582397461, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 0.7906696796417236, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": 10.802580833435059, |
|
"logits/rejected": 11.333773612976074, |
|
"logps/chosen": -0.7257988452911377, |
|
"logps/rejected": -0.7839725017547607, |
|
"loss": 1.3781, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -1.4515976905822754, |
|
"rewards/margins": 0.11634734272956848, |
|
"rewards/rejected": -1.5679450035095215, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 0.724219799041748, |
|
"learning_rate": 8.75e-07, |
|
"logits/chosen": 9.928263664245605, |
|
"logits/rejected": 10.422144889831543, |
|
"logps/chosen": -0.5926575660705566, |
|
"logps/rejected": -0.6688517928123474, |
|
"loss": 1.314, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.1853151321411133, |
|
"rewards/margins": 0.15238842368125916, |
|
"rewards/rejected": -1.3377035856246948, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 0.558660089969635, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 10.657012939453125, |
|
"logits/rejected": 11.171004295349121, |
|
"logps/chosen": -0.6659789681434631, |
|
"logps/rejected": -0.7012848258018494, |
|
"loss": 1.365, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.3319579362869263, |
|
"rewards/margins": 0.07061176747083664, |
|
"rewards/rejected": -1.4025696516036987, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 0.7675488591194153, |
|
"learning_rate": 9.994504457428556e-07, |
|
"logits/chosen": 10.544637680053711, |
|
"logits/rejected": 10.839460372924805, |
|
"logps/chosen": -0.814159095287323, |
|
"logps/rejected": -0.7815468907356262, |
|
"loss": 1.4888, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.628318190574646, |
|
"rewards/margins": -0.0652243047952652, |
|
"rewards/rejected": -1.5630937814712524, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 1.1157082319259644, |
|
"learning_rate": 9.97802991010949e-07, |
|
"logits/chosen": 10.10114574432373, |
|
"logits/rejected": 10.555818557739258, |
|
"logps/chosen": -0.673196017742157, |
|
"logps/rejected": -0.6864349246025085, |
|
"loss": 1.4279, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.346392035484314, |
|
"rewards/margins": 0.02647773176431656, |
|
"rewards/rejected": -1.372869849205017, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 1.2121509313583374, |
|
"learning_rate": 9.950612572673255e-07, |
|
"logits/chosen": 10.158075332641602, |
|
"logits/rejected": 10.813385009765625, |
|
"logps/chosen": -0.7734582424163818, |
|
"logps/rejected": -0.8254096508026123, |
|
"loss": 1.3538, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.5469164848327637, |
|
"rewards/margins": 0.10390281677246094, |
|
"rewards/rejected": -1.6508193016052246, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 0.921929657459259, |
|
"learning_rate": 9.912312714377879e-07, |
|
"logits/chosen": 10.2570161819458, |
|
"logits/rejected": 10.633421897888184, |
|
"logps/chosen": -0.7107410430908203, |
|
"logps/rejected": -0.7390152812004089, |
|
"loss": 1.3791, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4214820861816406, |
|
"rewards/margins": 0.056548528373241425, |
|
"rewards/rejected": -1.4780305624008179, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17126389460683408, |
|
"grad_norm": 1.0088778734207153, |
|
"learning_rate": 9.863214526624063e-07, |
|
"logits/chosen": 9.808923721313477, |
|
"logits/rejected": 10.603569984436035, |
|
"logps/chosen": -0.7246454954147339, |
|
"logps/rejected": -0.8062013387680054, |
|
"loss": 1.3588, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -1.4492909908294678, |
|
"rewards/margins": 0.1631116420030594, |
|
"rewards/rejected": -1.6124026775360107, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 1.2618002891540527, |
|
"learning_rate": 9.8034259378842e-07, |
|
"logits/chosen": 10.618194580078125, |
|
"logits/rejected": 11.330740928649902, |
|
"logps/chosen": -0.6871081590652466, |
|
"logps/rejected": -0.752004086971283, |
|
"loss": 1.3318, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3742163181304932, |
|
"rewards/margins": 0.1297919750213623, |
|
"rewards/rejected": -1.504008173942566, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.3426913917064667, |
|
"learning_rate": 9.73307837645217e-07, |
|
"logits/chosen": 9.945769309997559, |
|
"logits/rejected": 10.648375511169434, |
|
"logps/chosen": -0.6394751071929932, |
|
"logps/rejected": -0.7090991139411926, |
|
"loss": 1.3241, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.2789502143859863, |
|
"rewards/margins": 0.13924814760684967, |
|
"rewards/rejected": -1.4181982278823853, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2107863318237958, |
|
"grad_norm": 1.1439241170883179, |
|
"learning_rate": 9.652326481535433e-07, |
|
"logits/chosen": 10.895740509033203, |
|
"logits/rejected": 11.243146896362305, |
|
"logps/chosen": -0.6249470710754395, |
|
"logps/rejected": -0.6594743132591248, |
|
"loss": 1.3593, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -1.249894142150879, |
|
"rewards/margins": 0.06905444711446762, |
|
"rewards/rejected": -1.3189486265182495, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22396047756278303, |
|
"grad_norm": 0.6084972023963928, |
|
"learning_rate": 9.561347763324483e-07, |
|
"logits/chosen": 10.294602394104004, |
|
"logits/rejected": 10.75848388671875, |
|
"logps/chosen": -0.6647714972496033, |
|
"logps/rejected": -0.6630449891090393, |
|
"loss": 1.4144, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.3295429944992065, |
|
"rewards/margins": -0.0034531853161752224, |
|
"rewards/rejected": -1.3260899782180786, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.23713462330177026, |
|
"grad_norm": 1.20784330368042, |
|
"learning_rate": 9.460342212786932e-07, |
|
"logits/chosen": 10.368552207946777, |
|
"logits/rejected": 11.025596618652344, |
|
"logps/chosen": -0.8019055724143982, |
|
"logps/rejected": -0.7324545979499817, |
|
"loss": 1.5474, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.6038111448287964, |
|
"rewards/margins": -0.13890185952186584, |
|
"rewards/rejected": -1.4649091958999634, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2503087690407575, |
|
"grad_norm": 0.6674954891204834, |
|
"learning_rate": 9.349531862043951e-07, |
|
"logits/chosen": 10.525771141052246, |
|
"logits/rejected": 10.864877700805664, |
|
"logps/chosen": -0.6636431217193604, |
|
"logps/rejected": -0.690964937210083, |
|
"loss": 1.3837, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.3272862434387207, |
|
"rewards/margins": 0.05464361608028412, |
|
"rewards/rejected": -1.381929874420166, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 0.9430455565452576, |
|
"learning_rate": 9.229160296295487e-07, |
|
"logits/chosen": 10.509737014770508, |
|
"logits/rejected": 11.218311309814453, |
|
"logps/chosen": -0.7572717666625977, |
|
"logps/rejected": -0.7445338368415833, |
|
"loss": 1.4599, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5145435333251953, |
|
"rewards/margins": -0.02547581121325493, |
|
"rewards/rejected": -1.4890676736831665, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.5352253317832947, |
|
"learning_rate": 9.099492118367122e-07, |
|
"logits/chosen": 9.723971366882324, |
|
"logits/rejected": 10.554994583129883, |
|
"logps/chosen": -0.706066906452179, |
|
"logps/rejected": -0.7981133460998535, |
|
"loss": 1.3079, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.412133812904358, |
|
"rewards/margins": 0.18409286439418793, |
|
"rewards/rejected": -1.596226692199707, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.28983120625771924, |
|
"grad_norm": 1.4943097829818726, |
|
"learning_rate": 8.960812367055646e-07, |
|
"logits/chosen": 10.025705337524414, |
|
"logits/rejected": 10.595457077026367, |
|
"logps/chosen": -0.7785383462905884, |
|
"logps/rejected": -0.7879722118377686, |
|
"loss": 1.4168, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5570766925811768, |
|
"rewards/margins": 0.01886790432035923, |
|
"rewards/rejected": -1.575944423675537, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3030053519967065, |
|
"grad_norm": 0.4453490972518921, |
|
"learning_rate": 8.813425890551909e-07, |
|
"logits/chosen": 10.01511001586914, |
|
"logits/rejected": 10.817825317382812, |
|
"logps/chosen": -0.7974975109100342, |
|
"logps/rejected": -0.7531540393829346, |
|
"loss": 1.5107, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.5949950218200684, |
|
"rewards/margins": -0.08868695795536041, |
|
"rewards/rejected": -1.5063080787658691, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3161794977356937, |
|
"grad_norm": 0.8292773962020874, |
|
"learning_rate": 8.657656676318345e-07, |
|
"logits/chosen": 10.253622055053711, |
|
"logits/rejected": 10.707040786743164, |
|
"logps/chosen": -0.6545951962471008, |
|
"logps/rejected": -0.6871030330657959, |
|
"loss": 1.3837, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.3091903924942017, |
|
"rewards/margins": 0.06501554697751999, |
|
"rewards/rejected": -1.3742060661315918, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 3.5230870246887207, |
|
"learning_rate": 8.493847138894208e-07, |
|
"logits/chosen": 10.166427612304688, |
|
"logits/rejected": 10.720212936401367, |
|
"logps/chosen": -0.6964614391326904, |
|
"logps/rejected": -0.7443124055862427, |
|
"loss": 1.3755, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.3929228782653809, |
|
"rewards/margins": 0.09570197016000748, |
|
"rewards/rejected": -1.4886248111724854, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.34252778921366817, |
|
"grad_norm": 1.1598762273788452, |
|
"learning_rate": 8.322357367194108e-07, |
|
"logits/chosen": 10.49393367767334, |
|
"logits/rejected": 11.039409637451172, |
|
"logps/chosen": -0.7815979719161987, |
|
"logps/rejected": -0.8116061687469482, |
|
"loss": 1.3981, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5631959438323975, |
|
"rewards/margins": 0.06001615524291992, |
|
"rewards/rejected": -1.6232123374938965, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3557019349526554, |
|
"grad_norm": 2.6781487464904785, |
|
"learning_rate": 8.143564332954425e-07, |
|
"logits/chosen": 10.239036560058594, |
|
"logits/rejected": 11.069831848144531, |
|
"logps/chosen": -0.7489575147628784, |
|
"logps/rejected": -0.7497468590736389, |
|
"loss": 1.4293, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -1.4979150295257568, |
|
"rewards/margins": 0.0015787146985530853, |
|
"rewards/rejected": -1.4994937181472778, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.4053559899330139, |
|
"learning_rate": 7.957861062067612e-07, |
|
"logits/chosen": 9.529670715332031, |
|
"logits/rejected": 10.38549518585205, |
|
"logps/chosen": -0.7035645246505737, |
|
"logps/rejected": -0.7123870253562927, |
|
"loss": 1.4258, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.4071290493011475, |
|
"rewards/margins": 0.017645111307501793, |
|
"rewards/rejected": -1.4247740507125854, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3820502264306299, |
|
"grad_norm": 0.6468765735626221, |
|
"learning_rate": 7.765655770625996e-07, |
|
"logits/chosen": 10.515686988830566, |
|
"logits/rejected": 10.88726806640625, |
|
"logps/chosen": -0.6611747145652771, |
|
"logps/rejected": -0.7040727734565735, |
|
"loss": 1.368, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -1.3223494291305542, |
|
"rewards/margins": 0.08579609543085098, |
|
"rewards/rejected": -1.408145546913147, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 0.6986653208732605, |
|
"learning_rate": 7.567370967574209e-07, |
|
"logits/chosen": 10.111821174621582, |
|
"logits/rejected": 11.31137466430664, |
|
"logps/chosen": -0.6936085820198059, |
|
"logps/rejected": -0.6943917274475098, |
|
"loss": 1.4297, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.3872171640396118, |
|
"rewards/margins": 0.0015661753714084625, |
|
"rewards/rejected": -1.3887834548950195, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4083985179086044, |
|
"grad_norm": 0.4217478334903717, |
|
"learning_rate": 7.363442525942826e-07, |
|
"logits/chosen": 9.911711692810059, |
|
"logits/rejected": 10.704763412475586, |
|
"logps/chosen": -0.6641364097595215, |
|
"logps/rejected": -0.6581347584724426, |
|
"loss": 1.4344, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.328272819519043, |
|
"rewards/margins": -0.012003323063254356, |
|
"rewards/rejected": -1.3162695169448853, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4215726636475916, |
|
"grad_norm": 0.6227706670761108, |
|
"learning_rate": 7.154318724704851e-07, |
|
"logits/chosen": 9.962979316711426, |
|
"logits/rejected": 10.975818634033203, |
|
"logps/chosen": -0.6416503190994263, |
|
"logps/rejected": -0.6450206637382507, |
|
"loss": 1.4274, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -1.2833006381988525, |
|
"rewards/margins": 0.006740846671164036, |
|
"rewards/rejected": -1.2900413274765015, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.43474680938657884, |
|
"grad_norm": 0.9526137709617615, |
|
"learning_rate": 6.940459263361248e-07, |
|
"logits/chosen": 10.01764965057373, |
|
"logits/rejected": 10.877232551574707, |
|
"logps/chosen": -0.7044752836227417, |
|
"logps/rejected": -0.6560808420181274, |
|
"loss": 1.5103, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -1.4089505672454834, |
|
"rewards/margins": -0.09678899496793747, |
|
"rewards/rejected": -1.3121616840362549, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.44792095512556607, |
|
"grad_norm": 0.7180893421173096, |
|
"learning_rate": 6.722334251421664e-07, |
|
"logits/chosen": 9.873922348022461, |
|
"logits/rejected": 10.598325729370117, |
|
"logps/chosen": -0.7634012699127197, |
|
"logps/rejected": -0.7632129192352295, |
|
"loss": 1.4281, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.5268025398254395, |
|
"rewards/margins": -0.00037665292620658875, |
|
"rewards/rejected": -1.526425838470459, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 1.9519481658935547, |
|
"learning_rate": 6.500423175001703e-07, |
|
"logits/chosen": 10.66737174987793, |
|
"logits/rejected": 11.170511245727539, |
|
"logps/chosen": -0.7307225465774536, |
|
"logps/rejected": -0.7907586097717285, |
|
"loss": 1.3918, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.4614450931549072, |
|
"rewards/margins": 0.12007206678390503, |
|
"rewards/rejected": -1.581517219543457, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.47426924660354053, |
|
"grad_norm": 0.7991163730621338, |
|
"learning_rate": 6.275213842808382e-07, |
|
"logits/chosen": 10.241875648498535, |
|
"logits/rejected": 10.639405250549316, |
|
"logps/chosen": -0.6178256869316101, |
|
"logps/rejected": -0.6322227120399475, |
|
"loss": 1.3976, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -1.2356513738632202, |
|
"rewards/margins": 0.028794117271900177, |
|
"rewards/rejected": -1.264445424079895, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4874433923425278, |
|
"grad_norm": 0.6113852262496948, |
|
"learning_rate": 6.047201313830723e-07, |
|
"logits/chosen": 10.230165481567383, |
|
"logits/rejected": 10.763371467590332, |
|
"logps/chosen": -0.6862261295318604, |
|
"logps/rejected": -0.7486332654953003, |
|
"loss": 1.3326, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3724522590637207, |
|
"rewards/margins": 0.12481416761875153, |
|
"rewards/rejected": -1.4972665309906006, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.500617538081515, |
|
"grad_norm": 1.0607084035873413, |
|
"learning_rate": 5.816886809092651e-07, |
|
"logits/chosen": 10.049585342407227, |
|
"logits/rejected": 10.759794235229492, |
|
"logps/chosen": -0.6145029067993164, |
|
"logps/rejected": -0.6638337969779968, |
|
"loss": 1.3483, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.2290058135986328, |
|
"rewards/margins": 0.0986616313457489, |
|
"rewards/rejected": -1.3276675939559937, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5137916838205022, |
|
"grad_norm": 1.5060722827911377, |
|
"learning_rate": 5.584776609860413e-07, |
|
"logits/chosen": 10.381738662719727, |
|
"logits/rejected": 10.864119529724121, |
|
"logps/chosen": -0.7063203454017639, |
|
"logps/rejected": -0.6916000247001648, |
|
"loss": 1.4498, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.4126406908035278, |
|
"rewards/margins": -0.029440607875585556, |
|
"rewards/rejected": -1.3832000494003296, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5269658295594895, |
|
"grad_norm": 2.536262273788452, |
|
"learning_rate": 5.351380944726465e-07, |
|
"logits/chosen": 10.201406478881836, |
|
"logits/rejected": 11.195016860961914, |
|
"logps/chosen": -0.6529728174209595, |
|
"logps/rejected": -0.695421576499939, |
|
"loss": 1.3726, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.305945634841919, |
|
"rewards/margins": 0.08489762991666794, |
|
"rewards/rejected": -1.390843152999878, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5401399752984768, |
|
"grad_norm": 3.44834303855896, |
|
"learning_rate": 5.117212868016303e-07, |
|
"logits/chosen": 10.57003402709961, |
|
"logits/rejected": 10.89995288848877, |
|
"logps/chosen": -0.6615808606147766, |
|
"logps/rejected": -0.6666579842567444, |
|
"loss": 1.4101, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.3231617212295532, |
|
"rewards/margins": 0.010154157876968384, |
|
"rewards/rejected": -1.3333159685134888, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 0.3262707591056824, |
|
"learning_rate": 4.882787131983697e-07, |
|
"logits/chosen": 9.684130668640137, |
|
"logits/rejected": 10.468993186950684, |
|
"logps/chosen": -0.6019183993339539, |
|
"logps/rejected": -0.6411232948303223, |
|
"loss": 1.3798, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.2038367986679077, |
|
"rewards/margins": 0.07840971648693085, |
|
"rewards/rejected": -1.2822465896606445, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5664882667764513, |
|
"grad_norm": 1.150577187538147, |
|
"learning_rate": 4.648619055273537e-07, |
|
"logits/chosen": 9.844969749450684, |
|
"logits/rejected": 10.612713813781738, |
|
"logps/chosen": -0.5501813292503357, |
|
"logps/rejected": -0.591031551361084, |
|
"loss": 1.3513, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -1.1003626585006714, |
|
"rewards/margins": 0.08170032501220703, |
|
"rewards/rejected": -1.182063102722168, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5796624125154385, |
|
"grad_norm": 0.3965461850166321, |
|
"learning_rate": 4.4152233901395875e-07, |
|
"logits/chosen": 9.765134811401367, |
|
"logits/rejected": 10.664865493774414, |
|
"logps/chosen": -0.689943253993988, |
|
"logps/rejected": -0.6770355701446533, |
|
"loss": 1.4345, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.379886507987976, |
|
"rewards/margins": -0.025815514847636223, |
|
"rewards/rejected": -1.3540711402893066, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5928365582544257, |
|
"grad_norm": 0.5376319289207458, |
|
"learning_rate": 4.183113190907348e-07, |
|
"logits/chosen": 10.018446922302246, |
|
"logits/rejected": 10.538941383361816, |
|
"logps/chosen": -0.6233353614807129, |
|
"logps/rejected": -0.633882999420166, |
|
"loss": 1.4166, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.2466707229614258, |
|
"rewards/margins": 0.02109522372484207, |
|
"rewards/rejected": -1.267765998840332, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.606010703993413, |
|
"grad_norm": 3.684044122695923, |
|
"learning_rate": 3.9527986861692785e-07, |
|
"logits/chosen": 10.023819923400879, |
|
"logits/rejected": 10.466320037841797, |
|
"logps/chosen": -0.644775927066803, |
|
"logps/rejected": -0.6808078289031982, |
|
"loss": 1.3706, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -1.289551854133606, |
|
"rewards/margins": 0.07206393778324127, |
|
"rewards/rejected": -1.3616156578063965, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.6191848497324002, |
|
"grad_norm": 0.5732494592666626, |
|
"learning_rate": 3.724786157191618e-07, |
|
"logits/chosen": 10.115565299987793, |
|
"logits/rejected": 10.92066764831543, |
|
"logps/chosen": -0.7901978492736816, |
|
"logps/rejected": -0.7942913174629211, |
|
"loss": 1.4251, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.5803956985473633, |
|
"rewards/margins": 0.008186978287994862, |
|
"rewards/rejected": -1.5885826349258423, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6323589954713874, |
|
"grad_norm": 0.6679599285125732, |
|
"learning_rate": 3.499576824998297e-07, |
|
"logits/chosen": 10.542475700378418, |
|
"logits/rejected": 11.161402702331543, |
|
"logps/chosen": -0.6402223706245422, |
|
"logps/rejected": -0.6876631379127502, |
|
"loss": 1.3518, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -1.2804447412490845, |
|
"rewards/margins": 0.09488136321306229, |
|
"rewards/rejected": -1.3753262758255005, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.45617973804473877, |
|
"learning_rate": 3.2776657485783356e-07, |
|
"logits/chosen": 10.640542030334473, |
|
"logits/rejected": 11.157126426696777, |
|
"logps/chosen": -0.6611472368240356, |
|
"logps/rejected": -0.6986327767372131, |
|
"loss": 1.3795, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.3222944736480713, |
|
"rewards/margins": 0.07497115433216095, |
|
"rewards/rejected": -1.3972655534744263, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.6587072869493619, |
|
"grad_norm": 0.718313992023468, |
|
"learning_rate": 3.0595407366387506e-07, |
|
"logits/chosen": 10.246100425720215, |
|
"logits/rejected": 10.788708686828613, |
|
"logps/chosen": -0.5527799725532532, |
|
"logps/rejected": -0.5676759481430054, |
|
"loss": 1.383, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1055599451065063, |
|
"rewards/margins": 0.02979196421802044, |
|
"rewards/rejected": -1.1353518962860107, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6718814326883491, |
|
"grad_norm": 0.18990406394004822, |
|
"learning_rate": 2.845681275295148e-07, |
|
"logits/chosen": 10.160649299621582, |
|
"logits/rejected": 10.608582496643066, |
|
"logps/chosen": -0.5916914343833923, |
|
"logps/rejected": -0.5999540090560913, |
|
"loss": 1.4021, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.1833828687667847, |
|
"rewards/margins": 0.016525164246559143, |
|
"rewards/rejected": -1.1999080181121826, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.6850555784273363, |
|
"grad_norm": 0.48461204767227173, |
|
"learning_rate": 2.636557474057173e-07, |
|
"logits/chosen": 9.736953735351562, |
|
"logits/rejected": 10.655252456665039, |
|
"logps/chosen": -0.6059479117393494, |
|
"logps/rejected": -0.65425705909729, |
|
"loss": 1.3626, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.2118958234786987, |
|
"rewards/margins": 0.09661829471588135, |
|
"rewards/rejected": -1.30851411819458, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6982297241663236, |
|
"grad_norm": 0.33438029885292053, |
|
"learning_rate": 2.432629032425789e-07, |
|
"logits/chosen": 10.174696922302246, |
|
"logits/rejected": 10.733561515808105, |
|
"logps/chosen": -0.6094481348991394, |
|
"logps/rejected": -0.6400697827339172, |
|
"loss": 1.3683, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -1.2188962697982788, |
|
"rewards/margins": 0.06124337017536163, |
|
"rewards/rejected": -1.2801395654678345, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.7114038699053108, |
|
"grad_norm": 0.3054359257221222, |
|
"learning_rate": 2.2343442293740028e-07, |
|
"logits/chosen": 10.520360946655273, |
|
"logits/rejected": 11.13870620727539, |
|
"logps/chosen": -0.6996334195137024, |
|
"logps/rejected": -0.6852084994316101, |
|
"loss": 1.4469, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -1.3992668390274048, |
|
"rewards/margins": -0.028849666938185692, |
|
"rewards/rejected": -1.3704169988632202, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.724578015644298, |
|
"grad_norm": 1.6864748001098633, |
|
"learning_rate": 2.0421389379323877e-07, |
|
"logits/chosen": 10.444706916809082, |
|
"logits/rejected": 10.993450164794922, |
|
"logps/chosen": -0.6644651293754578, |
|
"logps/rejected": -0.732700765132904, |
|
"loss": 1.3438, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3289302587509155, |
|
"rewards/margins": 0.1364712119102478, |
|
"rewards/rejected": -1.465401530265808, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 1.1142092943191528, |
|
"learning_rate": 1.8564356670455767e-07, |
|
"logits/chosen": 10.173879623413086, |
|
"logits/rejected": 10.971562385559082, |
|
"logps/chosen": -0.6234769225120544, |
|
"logps/rejected": -0.682326078414917, |
|
"loss": 1.3395, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2469538450241089, |
|
"rewards/margins": 0.11769835650920868, |
|
"rewards/rejected": -1.364652156829834, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7509263071222725, |
|
"grad_norm": 1.3884321451187134, |
|
"learning_rate": 1.6776426328058919e-07, |
|
"logits/chosen": 10.79382038116455, |
|
"logits/rejected": 11.301587104797363, |
|
"logps/chosen": -0.6458045244216919, |
|
"logps/rejected": -0.6631965041160583, |
|
"loss": 1.3818, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2916090488433838, |
|
"rewards/margins": 0.03478388115763664, |
|
"rewards/rejected": -1.3263930082321167, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.7641004528612598, |
|
"grad_norm": 0.9691101312637329, |
|
"learning_rate": 1.5061528611057915e-07, |
|
"logits/chosen": 10.396957397460938, |
|
"logits/rejected": 10.836028099060059, |
|
"logps/chosen": -0.6224152445793152, |
|
"logps/rejected": -0.6705679893493652, |
|
"loss": 1.3657, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.2448304891586304, |
|
"rewards/margins": 0.09630556404590607, |
|
"rewards/rejected": -1.3411359786987305, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7772745986002471, |
|
"grad_norm": 0.5600712895393372, |
|
"learning_rate": 1.3423433236816562e-07, |
|
"logits/chosen": 10.439901351928711, |
|
"logits/rejected": 11.005784034729004, |
|
"logps/chosen": -0.6858406662940979, |
|
"logps/rejected": -0.7000098824501038, |
|
"loss": 1.4016, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -1.3716813325881958, |
|
"rewards/margins": 0.028338586911559105, |
|
"rewards/rejected": -1.4000197649002075, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.7904487443392343, |
|
"grad_norm": 0.4134702980518341, |
|
"learning_rate": 1.1865741094480908e-07, |
|
"logits/chosen": 10.502270698547363, |
|
"logits/rejected": 11.183271408081055, |
|
"logps/chosen": -0.606414794921875, |
|
"logps/rejected": -0.6565254926681519, |
|
"loss": 1.333, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -1.21282958984375, |
|
"rewards/margins": 0.10022131353616714, |
|
"rewards/rejected": -1.3130509853363037, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8036228900782215, |
|
"grad_norm": 1.311030626296997, |
|
"learning_rate": 1.0391876329443533e-07, |
|
"logits/chosen": 10.570302963256836, |
|
"logits/rejected": 11.035686492919922, |
|
"logps/chosen": -0.6471387147903442, |
|
"logps/rejected": -0.6374362707138062, |
|
"loss": 1.4409, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2942774295806885, |
|
"rewards/margins": -0.01940501108765602, |
|
"rewards/rejected": -1.2748725414276123, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.8167970358172087, |
|
"grad_norm": 0.7308284044265747, |
|
"learning_rate": 9.00507881632877e-08, |
|
"logits/chosen": 10.05208969116211, |
|
"logits/rejected": 10.694860458374023, |
|
"logps/chosen": -0.565160870552063, |
|
"logps/rejected": -0.5756160616874695, |
|
"loss": 1.3922, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.130321741104126, |
|
"rewards/margins": 0.020910188555717468, |
|
"rewards/rejected": -1.151232123374939, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.829971181556196, |
|
"grad_norm": 0.6492581367492676, |
|
"learning_rate": 7.708397037045128e-08, |
|
"logits/chosen": 10.530025482177734, |
|
"logits/rejected": 11.059508323669434, |
|
"logps/chosen": -0.7004156112670898, |
|
"logps/rejected": -0.6723081469535828, |
|
"loss": 1.4625, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -1.4008312225341797, |
|
"rewards/margins": -0.05621491000056267, |
|
"rewards/rejected": -1.3446162939071655, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.8431453272951832, |
|
"grad_norm": 0.6271635293960571, |
|
"learning_rate": 6.504681379560489e-08, |
|
"logits/chosen": 10.55631160736084, |
|
"logits/rejected": 11.027376174926758, |
|
"logps/chosen": -0.664703905582428, |
|
"logps/rejected": -0.6908209323883057, |
|
"loss": 1.3858, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.329407811164856, |
|
"rewards/margins": 0.0522342287003994, |
|
"rewards/rejected": -1.3816418647766113, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.8563194730341704, |
|
"grad_norm": 0.5429289937019348, |
|
"learning_rate": 5.396577872130675e-08, |
|
"logits/chosen": 10.111677169799805, |
|
"logits/rejected": 10.929055213928223, |
|
"logps/chosen": -0.5984624028205872, |
|
"logps/rejected": -0.6411637663841248, |
|
"loss": 1.3614, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -1.1969248056411743, |
|
"rewards/margins": 0.08540263772010803, |
|
"rewards/rejected": -1.2823275327682495, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8694936187731577, |
|
"grad_norm": 0.5303859710693359, |
|
"learning_rate": 4.3865223667551686e-08, |
|
"logits/chosen": 10.389065742492676, |
|
"logits/rejected": 10.994528770446777, |
|
"logps/chosen": -0.7132977843284607, |
|
"logps/rejected": -0.7277129888534546, |
|
"loss": 1.3985, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.4265955686569214, |
|
"rewards/margins": 0.02883046492934227, |
|
"rewards/rejected": -1.4554259777069092, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.8826677645121449, |
|
"grad_norm": 0.8996275067329407, |
|
"learning_rate": 3.476735184645674e-08, |
|
"logits/chosen": 10.195171356201172, |
|
"logits/rejected": 10.991616249084473, |
|
"logps/chosen": -0.6438397765159607, |
|
"logps/rejected": -0.6550954580307007, |
|
"loss": 1.4138, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.2876795530319214, |
|
"rewards/margins": 0.02251136302947998, |
|
"rewards/rejected": -1.3101909160614014, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.8958419102511321, |
|
"grad_norm": 0.48269936442375183, |
|
"learning_rate": 2.6692162354782943e-08, |
|
"logits/chosen": 10.73905086517334, |
|
"logits/rejected": 11.05859661102295, |
|
"logps/chosen": -0.6217143535614014, |
|
"logps/rejected": -0.6485203504562378, |
|
"loss": 1.3768, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.2434287071228027, |
|
"rewards/margins": 0.053611982613801956, |
|
"rewards/rejected": -1.2970407009124756, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.9090160559901194, |
|
"grad_norm": 0.6163386702537537, |
|
"learning_rate": 1.9657406211579962e-08, |
|
"logits/chosen": 10.56104850769043, |
|
"logits/rejected": 11.248091697692871, |
|
"logps/chosen": -0.6829113364219666, |
|
"logps/rejected": -0.7346217036247253, |
|
"loss": 1.3653, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -1.365822672843933, |
|
"rewards/margins": 0.10342076420783997, |
|
"rewards/rejected": -1.4692434072494507, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.9636749625205994, |
|
"learning_rate": 1.3678547337593494e-08, |
|
"logits/chosen": 10.036705017089844, |
|
"logits/rejected": 10.577312469482422, |
|
"logps/chosen": -0.6028749942779541, |
|
"logps/rejected": -0.6518182754516602, |
|
"loss": 1.3437, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2057499885559082, |
|
"rewards/margins": 0.09788656234741211, |
|
"rewards/rejected": -1.3036365509033203, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9353643474680938, |
|
"grad_norm": 0.7640946507453918, |
|
"learning_rate": 8.768728562211946e-09, |
|
"logits/chosen": 10.159095764160156, |
|
"logits/rejected": 11.175093650817871, |
|
"logps/chosen": -0.6173404455184937, |
|
"logps/rejected": -0.662550151348114, |
|
"loss": 1.3734, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2346808910369873, |
|
"rewards/margins": 0.09041938930749893, |
|
"rewards/rejected": -1.325100302696228, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.9485384932070811, |
|
"grad_norm": 0.5827455520629883, |
|
"learning_rate": 4.938742732674528e-09, |
|
"logits/chosen": 10.687435150146484, |
|
"logits/rejected": 11.472260475158691, |
|
"logps/chosen": -0.48376959562301636, |
|
"logps/rejected": -0.5084850192070007, |
|
"loss": 1.3663, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9675391912460327, |
|
"rewards/margins": 0.04943079501390457, |
|
"rewards/rejected": -1.0169700384140015, |
|
"step": 72 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|