|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7904487443392343, |
|
"eval_steps": 500, |
|
"global_step": 60, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 0.4815484285354614, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": 10.088521957397461, |
|
"logits/rejected": 10.263787269592285, |
|
"logps/chosen": -163.12940979003906, |
|
"logps/ref_chosen": -163.12940979003906, |
|
"logps/ref_rejected": -171.48428344726562, |
|
"logps/rejected": -171.48428344726562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.627070426940918, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.592972755432129, |
|
"logits/rejected": 10.720216751098633, |
|
"logps/chosen": -155.91574096679688, |
|
"logps/ref_chosen": -155.91574096679688, |
|
"logps/ref_rejected": -161.34078979492188, |
|
"logps/rejected": -161.34078979492188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.4148138165473938, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 10.043272972106934, |
|
"logits/rejected": 10.398024559020996, |
|
"logps/chosen": -158.3568115234375, |
|
"logps/ref_chosen": -157.65640258789062, |
|
"logps/ref_rejected": -168.5882110595703, |
|
"logps/rejected": -168.91085815429688, |
|
"loss": 0.6951, |
|
"rewards/accuracies": 0.3828125, |
|
"rewards/chosen": -0.007004000246524811, |
|
"rewards/margins": -0.0037774655502289534, |
|
"rewards/rejected": -0.003226534929126501, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 0.7029770612716675, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 10.250253677368164, |
|
"logits/rejected": 10.45008659362793, |
|
"logps/chosen": -164.01119995117188, |
|
"logps/ref_chosen": -162.89878845214844, |
|
"logps/ref_rejected": -168.30462646484375, |
|
"logps/rejected": -169.1818389892578, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.01112416572868824, |
|
"rewards/margins": -0.0023521997500211, |
|
"rewards/rejected": -0.008771965280175209, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 0.4063253104686737, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 10.442557334899902, |
|
"logits/rejected": 10.740192413330078, |
|
"logps/chosen": -156.1859130859375, |
|
"logps/ref_chosen": -156.03257751464844, |
|
"logps/ref_rejected": -165.37911987304688, |
|
"logps/rejected": -165.6518096923828, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.001533512957394123, |
|
"rewards/margins": 0.001193464733660221, |
|
"rewards/rejected": -0.002726977691054344, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 0.4845049977302551, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 10.906261444091797, |
|
"logits/rejected": 11.201122283935547, |
|
"logps/chosen": -162.45692443847656, |
|
"logps/ref_chosen": -161.98570251464844, |
|
"logps/ref_rejected": -169.72560119628906, |
|
"logps/rejected": -170.18275451660156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.004712029360234737, |
|
"rewards/margins": -0.00014030117017682642, |
|
"rewards/rejected": -0.004571728408336639, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 0.8172655701637268, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": 9.883949279785156, |
|
"logits/rejected": 10.030972480773926, |
|
"logps/chosen": -157.43295288085938, |
|
"logps/ref_chosen": -157.26968383789062, |
|
"logps/ref_rejected": -167.37155151367188, |
|
"logps/rejected": -167.53939819335938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0016327811172232032, |
|
"rewards/margins": 4.5756096369586885e-05, |
|
"rewards/rejected": -0.0016785369953140616, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 0.588524341583252, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.633930206298828, |
|
"logits/rejected": 10.81590747833252, |
|
"logps/chosen": -162.8237762451172, |
|
"logps/ref_chosen": -162.2948455810547, |
|
"logps/ref_rejected": -172.98866271972656, |
|
"logps/rejected": -173.56680297851562, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.005289244465529919, |
|
"rewards/margins": 0.0004922347725369036, |
|
"rewards/rejected": -0.005781479645520449, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 0.46077635884284973, |
|
"learning_rate": 4.997252228714278e-07, |
|
"logits/chosen": 10.326555252075195, |
|
"logits/rejected": 10.736672401428223, |
|
"logps/chosen": -164.5288543701172, |
|
"logps/ref_chosen": -163.37091064453125, |
|
"logps/ref_rejected": -173.1500701904297, |
|
"logps/rejected": -174.08392333984375, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.011579334735870361, |
|
"rewards/margins": -0.0022407739888876677, |
|
"rewards/rejected": -0.00933856051415205, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 0.673312783241272, |
|
"learning_rate": 4.989014955054745e-07, |
|
"logits/chosen": 10.325155258178711, |
|
"logits/rejected": 10.473593711853027, |
|
"logps/chosen": -157.8944091796875, |
|
"logps/ref_chosen": -156.87838745117188, |
|
"logps/ref_rejected": -165.17373657226562, |
|
"logps/rejected": -166.20751953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.010160216130316257, |
|
"rewards/margins": 0.00017760891932994127, |
|
"rewards/rejected": -0.010337824933230877, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 0.6500194668769836, |
|
"learning_rate": 4.975306286336627e-07, |
|
"logits/chosen": 10.476134300231934, |
|
"logits/rejected": 10.66375732421875, |
|
"logps/chosen": -161.99935913085938, |
|
"logps/ref_chosen": -160.73855590820312, |
|
"logps/ref_rejected": -173.1862030029297, |
|
"logps/rejected": -174.4076385498047, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0126079972833395, |
|
"rewards/margins": -0.00039388981531374156, |
|
"rewards/rejected": -0.012214107438921928, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 0.5539909601211548, |
|
"learning_rate": 4.956156357188939e-07, |
|
"logits/chosen": 10.318845748901367, |
|
"logits/rejected": 10.355680465698242, |
|
"logps/chosen": -167.43121337890625, |
|
"logps/ref_chosen": -165.21177673339844, |
|
"logps/ref_rejected": -170.47381591796875, |
|
"logps/rejected": -172.76483154296875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.022194450721144676, |
|
"rewards/margins": 0.0007156741339713335, |
|
"rewards/rejected": -0.02291012369096279, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17126389460683408, |
|
"grad_norm": 0.5150694251060486, |
|
"learning_rate": 4.931607263312032e-07, |
|
"logits/chosen": 9.89578914642334, |
|
"logits/rejected": 10.236948013305664, |
|
"logps/chosen": -161.29905700683594, |
|
"logps/ref_chosen": -158.68667602539062, |
|
"logps/ref_rejected": -168.57968139648438, |
|
"logps/rejected": -171.51979064941406, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.026123855262994766, |
|
"rewards/margins": 0.0032772955019026995, |
|
"rewards/rejected": -0.029401153326034546, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.6596489548683167, |
|
"learning_rate": 4.9017129689421e-07, |
|
"logits/chosen": 10.484810829162598, |
|
"logits/rejected": 10.99763298034668, |
|
"logps/chosen": -157.44769287109375, |
|
"logps/ref_chosen": -153.92340087890625, |
|
"logps/ref_rejected": -167.03564453125, |
|
"logps/rejected": -170.4557647705078, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": -0.03524318337440491, |
|
"rewards/margins": -0.0010417333105579019, |
|
"rewards/rejected": -0.034201446920633316, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.39550018310546875, |
|
"learning_rate": 4.866539188226085e-07, |
|
"logits/chosen": 10.189282417297363, |
|
"logits/rejected": 10.43722152709961, |
|
"logps/chosen": -166.56544494628906, |
|
"logps/ref_chosen": -162.66110229492188, |
|
"logps/ref_rejected": -168.7485809326172, |
|
"logps/rejected": -172.78038024902344, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.03904342278838158, |
|
"rewards/margins": 0.0012746157590299845, |
|
"rewards/rejected": -0.04031803831458092, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2107863318237958, |
|
"grad_norm": 0.6276482939720154, |
|
"learning_rate": 4.826163240767716e-07, |
|
"logits/chosen": 10.743437767028809, |
|
"logits/rejected": 11.031370162963867, |
|
"logps/chosen": -166.45135498046875, |
|
"logps/ref_chosen": -163.39239501953125, |
|
"logps/ref_rejected": -172.29183959960938, |
|
"logps/rejected": -175.4534912109375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -0.03058951534330845, |
|
"rewards/margins": 0.0010271857026964426, |
|
"rewards/rejected": -0.0316167026758194, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22396047756278303, |
|
"grad_norm": 0.516729474067688, |
|
"learning_rate": 4.780673881662242e-07, |
|
"logits/chosen": 10.332087516784668, |
|
"logits/rejected": 10.48865032196045, |
|
"logps/chosen": -157.08522033691406, |
|
"logps/ref_chosen": -153.6072540283203, |
|
"logps/ref_rejected": -161.9541473388672, |
|
"logps/rejected": -165.6874542236328, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -0.03477972373366356, |
|
"rewards/margins": 0.0025533493608236313, |
|
"rewards/rejected": -0.03733307495713234, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.23713462330177026, |
|
"grad_norm": 0.70009446144104, |
|
"learning_rate": 4.730171106393466e-07, |
|
"logits/chosen": 10.40684986114502, |
|
"logits/rejected": 10.725347518920898, |
|
"logps/chosen": -158.2038116455078, |
|
"logps/ref_chosen": -154.3197021484375, |
|
"logps/ref_rejected": -161.81753540039062, |
|
"logps/rejected": -165.58631896972656, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.03884127736091614, |
|
"rewards/margins": -0.0011533537181094289, |
|
"rewards/rejected": -0.03768792748451233, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2503087690407575, |
|
"grad_norm": 0.47613224387168884, |
|
"learning_rate": 4.6747659310219757e-07, |
|
"logits/chosen": 10.489011764526367, |
|
"logits/rejected": 10.455073356628418, |
|
"logps/chosen": -171.99160766601562, |
|
"logps/ref_chosen": -167.8755340576172, |
|
"logps/ref_rejected": -175.09603881835938, |
|
"logps/rejected": -179.593994140625, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -0.04116089642047882, |
|
"rewards/margins": 0.003818710334599018, |
|
"rewards/rejected": -0.04497961327433586, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 0.6483292579650879, |
|
"learning_rate": 4.6145801481477433e-07, |
|
"logits/chosen": 10.415058135986328, |
|
"logits/rejected": 10.774059295654297, |
|
"logps/chosen": -163.5430450439453, |
|
"logps/ref_chosen": -159.07583618164062, |
|
"logps/ref_rejected": -169.23069763183594, |
|
"logps/rejected": -173.73776245117188, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.04467229172587395, |
|
"rewards/margins": 0.00039826278225518763, |
|
"rewards/rejected": -0.04507055878639221, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.5634174942970276, |
|
"learning_rate": 4.549746059183561e-07, |
|
"logits/chosen": 10.342830657958984, |
|
"logits/rejected": 10.680377960205078, |
|
"logps/chosen": -163.2490997314453, |
|
"logps/ref_chosen": -159.25521850585938, |
|
"logps/ref_rejected": -169.57681274414062, |
|
"logps/rejected": -173.69276428222656, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.03993882238864899, |
|
"rewards/margins": 0.0012204290833324194, |
|
"rewards/rejected": -0.0411592535674572, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.28983120625771924, |
|
"grad_norm": 0.3794897198677063, |
|
"learning_rate": 4.480406183527823e-07, |
|
"logits/chosen": 10.29517936706543, |
|
"logits/rejected": 10.647847175598145, |
|
"logps/chosen": -161.54783630371094, |
|
"logps/ref_chosen": -157.0568084716797, |
|
"logps/ref_rejected": -163.96209716796875, |
|
"logps/rejected": -168.51736450195312, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04491012915968895, |
|
"rewards/margins": 0.0006425387691706419, |
|
"rewards/rejected": -0.04555266723036766, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3030053519967065, |
|
"grad_norm": 0.4016757607460022, |
|
"learning_rate": 4.4067129452759546e-07, |
|
"logits/chosen": 10.357274055480957, |
|
"logits/rejected": 10.63122844696045, |
|
"logps/chosen": -162.94578552246094, |
|
"logps/ref_chosen": -158.10250854492188, |
|
"logps/ref_rejected": -169.85250854492188, |
|
"logps/rejected": -174.79525756835938, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.04843292012810707, |
|
"rewards/margins": 0.0009945080382749438, |
|
"rewards/rejected": -0.04942742735147476, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3161794977356937, |
|
"grad_norm": 0.46131113171577454, |
|
"learning_rate": 4.3288283381591725e-07, |
|
"logits/chosen": 10.260627746582031, |
|
"logits/rejected": 10.424566268920898, |
|
"logps/chosen": -163.2139129638672, |
|
"logps/ref_chosen": -158.93540954589844, |
|
"logps/ref_rejected": -168.12344360351562, |
|
"logps/rejected": -172.52456665039062, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -0.042785100638866425, |
|
"rewards/margins": 0.0012263581156730652, |
|
"rewards/rejected": -0.04401145875453949, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 0.4610799252986908, |
|
"learning_rate": 4.246923569447104e-07, |
|
"logits/chosen": 10.461551666259766, |
|
"logits/rejected": 10.855925559997559, |
|
"logps/chosen": -165.60084533691406, |
|
"logps/ref_chosen": -161.0833740234375, |
|
"logps/ref_rejected": -174.85760498046875, |
|
"logps/rejected": -179.20965576171875, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.0451747290790081, |
|
"rewards/margins": -0.0016541833756491542, |
|
"rewards/rejected": -0.043520547449588776, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.34252778921366817, |
|
"grad_norm": 0.41953545808792114, |
|
"learning_rate": 4.161178683597054e-07, |
|
"logits/chosen": 10.611435890197754, |
|
"logits/rejected": 10.745625495910645, |
|
"logps/chosen": -160.7465057373047, |
|
"logps/ref_chosen": -156.07315063476562, |
|
"logps/ref_rejected": -161.84292602539062, |
|
"logps/rejected": -166.85279846191406, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -0.046733610332012177, |
|
"rewards/margins": 0.0033648861572146416, |
|
"rewards/rejected": -0.05009850114583969, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3557019349526554, |
|
"grad_norm": 0.3880956470966339, |
|
"learning_rate": 4.0717821664772124e-07, |
|
"logits/chosen": 10.590215682983398, |
|
"logits/rejected": 10.893061637878418, |
|
"logps/chosen": -168.67279052734375, |
|
"logps/ref_chosen": -163.2271728515625, |
|
"logps/ref_rejected": -171.53738403320312, |
|
"logps/rejected": -176.9310302734375, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": -0.05445636808872223, |
|
"rewards/margins": -0.00051975465612486, |
|
"rewards/rejected": -0.05393661558628082, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.5345169901847839, |
|
"learning_rate": 3.978930531033806e-07, |
|
"logits/chosen": 9.953861236572266, |
|
"logits/rejected": 10.416406631469727, |
|
"logps/chosen": -162.322021484375, |
|
"logps/ref_chosen": -157.08795166015625, |
|
"logps/ref_rejected": -167.1195068359375, |
|
"logps/rejected": -172.4853973388672, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -0.0523407980799675, |
|
"rewards/margins": 0.001318173250183463, |
|
"rewards/rejected": -0.053658969700336456, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3820502264306299, |
|
"grad_norm": 0.5297831296920776, |
|
"learning_rate": 3.882827885312998e-07, |
|
"logits/chosen": 10.323142051696777, |
|
"logits/rejected": 10.501938819885254, |
|
"logps/chosen": -168.2061004638672, |
|
"logps/ref_chosen": -163.59707641601562, |
|
"logps/ref_rejected": -171.89508056640625, |
|
"logps/rejected": -176.55738830566406, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.04609035328030586, |
|
"rewards/margins": 0.0005328265833668411, |
|
"rewards/rejected": -0.04662318155169487, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 0.35810208320617676, |
|
"learning_rate": 3.7836854837871044e-07, |
|
"logits/chosen": 10.40945053100586, |
|
"logits/rejected": 10.931025505065918, |
|
"logps/chosen": -169.71910095214844, |
|
"logps/ref_chosen": -164.91160583496094, |
|
"logps/ref_rejected": -176.66453552246094, |
|
"logps/rejected": -181.69285583496094, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -0.04807499051094055, |
|
"rewards/margins": 0.0022084712982177734, |
|
"rewards/rejected": -0.050283461809158325, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4083985179086044, |
|
"grad_norm": 0.44152358174324036, |
|
"learning_rate": 3.681721262971413e-07, |
|
"logits/chosen": 10.004611015319824, |
|
"logits/rejected": 10.491472244262695, |
|
"logps/chosen": -161.24798583984375, |
|
"logps/ref_chosen": -155.95877075195312, |
|
"logps/ref_rejected": -166.5852508544922, |
|
"logps/rejected": -172.22703552246094, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.052892111241817474, |
|
"rewards/margins": 0.003525771899148822, |
|
"rewards/rejected": -0.05641787871718407, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4215726636475916, |
|
"grad_norm": 0.5185390114784241, |
|
"learning_rate": 3.577159362352426e-07, |
|
"logits/chosen": 10.27377986907959, |
|
"logits/rejected": 10.56481647491455, |
|
"logps/chosen": -167.19921875, |
|
"logps/ref_chosen": -161.83575439453125, |
|
"logps/ref_rejected": -169.53759765625, |
|
"logps/rejected": -174.91342163085938, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.05363469570875168, |
|
"rewards/margins": 0.00012344191782176495, |
|
"rewards/rejected": -0.05375813692808151, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.43474680938657884, |
|
"grad_norm": 1.1196942329406738, |
|
"learning_rate": 3.470229631680624e-07, |
|
"logits/chosen": 10.063702583312988, |
|
"logits/rejected": 10.693009376525879, |
|
"logps/chosen": -164.40225219726562, |
|
"logps/ref_chosen": -158.7517547607422, |
|
"logps/ref_rejected": -168.51002502441406, |
|
"logps/rejected": -174.00901794433594, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.056504976004362106, |
|
"rewards/margins": -0.0015149968676269054, |
|
"rewards/rejected": -0.054989978671073914, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.44792095512556607, |
|
"grad_norm": 0.593383252620697, |
|
"learning_rate": 3.361167125710832e-07, |
|
"logits/chosen": 9.993782043457031, |
|
"logits/rejected": 10.380085945129395, |
|
"logps/chosen": -170.81832885742188, |
|
"logps/ref_chosen": -165.12754821777344, |
|
"logps/ref_rejected": -177.654296875, |
|
"logps/rejected": -183.54998779296875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.05690779164433479, |
|
"rewards/margins": 0.0020490488968789577, |
|
"rewards/rejected": -0.05895683914422989, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.4870688319206238, |
|
"learning_rate": 3.2502115875008516e-07, |
|
"logits/chosen": 10.418455123901367, |
|
"logits/rejected": 10.655467987060547, |
|
"logps/chosen": -165.74551391601562, |
|
"logps/ref_chosen": -159.895751953125, |
|
"logps/ref_rejected": -167.39785766601562, |
|
"logps/rejected": -173.5540008544922, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -0.058497510850429535, |
|
"rewards/margins": 0.0030638885218650103, |
|
"rewards/rejected": -0.06156139820814133, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.47426924660354053, |
|
"grad_norm": 0.9538622498512268, |
|
"learning_rate": 3.137606921404191e-07, |
|
"logits/chosen": 10.286083221435547, |
|
"logits/rejected": 10.6614408493042, |
|
"logps/chosen": -170.4355926513672, |
|
"logps/ref_chosen": -165.02023315429688, |
|
"logps/ref_rejected": -172.28997802734375, |
|
"logps/rejected": -177.64358520507812, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.05415371432900429, |
|
"rewards/margins": -0.0006177356699481606, |
|
"rewards/rejected": -0.053535979241132736, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4874433923425278, |
|
"grad_norm": 0.6481872200965881, |
|
"learning_rate": 3.0236006569153616e-07, |
|
"logits/chosen": 10.281628608703613, |
|
"logits/rejected": 10.485464096069336, |
|
"logps/chosen": -167.83177185058594, |
|
"logps/ref_chosen": -162.57997131347656, |
|
"logps/ref_rejected": -174.94447326660156, |
|
"logps/rejected": -180.6204833984375, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -0.05251805856823921, |
|
"rewards/margins": 0.004242150578647852, |
|
"rewards/rejected": -0.056760210543870926, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.500617538081515, |
|
"grad_norm": 0.6258410215377808, |
|
"learning_rate": 2.9084434045463254e-07, |
|
"logits/chosen": 10.348075866699219, |
|
"logits/rejected": 10.585733413696289, |
|
"logps/chosen": -170.00047302246094, |
|
"logps/ref_chosen": -164.2234649658203, |
|
"logps/ref_rejected": -170.92135620117188, |
|
"logps/rejected": -177.09783935546875, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.05776997655630112, |
|
"rewards/margins": 0.003995058126747608, |
|
"rewards/rejected": -0.06176503747701645, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5137916838205022, |
|
"grad_norm": 0.45227017998695374, |
|
"learning_rate": 2.7923883049302066e-07, |
|
"logits/chosen": 10.290374755859375, |
|
"logits/rejected": 10.424775123596191, |
|
"logps/chosen": -172.98834228515625, |
|
"logps/ref_chosen": -166.5240478515625, |
|
"logps/ref_rejected": -174.45970153808594, |
|
"logps/rejected": -180.691162109375, |
|
"loss": 0.6944, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0646430104970932, |
|
"rewards/margins": -0.002328395377844572, |
|
"rewards/rejected": -0.06231461465358734, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5269658295594895, |
|
"grad_norm": 0.7223658561706543, |
|
"learning_rate": 2.6756904723632324e-07, |
|
"logits/chosen": 10.33895492553711, |
|
"logits/rejected": 10.744306564331055, |
|
"logps/chosen": -162.80517578125, |
|
"logps/ref_chosen": -156.39364624023438, |
|
"logps/ref_rejected": -172.6676483154297, |
|
"logps/rejected": -178.97576904296875, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -0.06411512196063995, |
|
"rewards/margins": -0.0010338453575968742, |
|
"rewards/rejected": -0.0630812719464302, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5401399752984768, |
|
"grad_norm": 0.9710007905960083, |
|
"learning_rate": 2.5586064340081516e-07, |
|
"logits/chosen": 10.650144577026367, |
|
"logits/rejected": 10.64175796508789, |
|
"logps/chosen": -164.384765625, |
|
"logps/ref_chosen": -158.60899353027344, |
|
"logps/ref_rejected": -167.36000061035156, |
|
"logps/rejected": -173.48138427734375, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -0.057757824659347534, |
|
"rewards/margins": 0.0034558887127786875, |
|
"rewards/rejected": -0.061213716864585876, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 1.060659408569336, |
|
"learning_rate": 2.4413935659918487e-07, |
|
"logits/chosen": 9.889198303222656, |
|
"logits/rejected": 10.20853042602539, |
|
"logps/chosen": -161.29913330078125, |
|
"logps/ref_chosen": -155.86929321289062, |
|
"logps/ref_rejected": -164.31619262695312, |
|
"logps/rejected": -170.2007598876953, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -0.05429830774664879, |
|
"rewards/margins": 0.004547302611172199, |
|
"rewards/rejected": -0.05884561315178871, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5664882667764513, |
|
"grad_norm": 0.5962560176849365, |
|
"learning_rate": 2.3243095276367684e-07, |
|
"logits/chosen": 9.809310913085938, |
|
"logits/rejected": 10.134492874145508, |
|
"logps/chosen": -155.75411987304688, |
|
"logps/ref_chosen": -149.69866943359375, |
|
"logps/ref_rejected": -163.860107421875, |
|
"logps/rejected": -170.56063842773438, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -0.06055447459220886, |
|
"rewards/margins": 0.006450990214943886, |
|
"rewards/rejected": -0.0670054703950882, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5796624125154385, |
|
"grad_norm": 0.38942375779151917, |
|
"learning_rate": 2.2076116950697937e-07, |
|
"logits/chosen": 10.249410629272461, |
|
"logits/rejected": 10.454606056213379, |
|
"logps/chosen": -161.172119140625, |
|
"logps/ref_chosen": -155.06076049804688, |
|
"logps/ref_rejected": -161.446044921875, |
|
"logps/rejected": -167.66583251953125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06111355125904083, |
|
"rewards/margins": 0.0010843212949112058, |
|
"rewards/rejected": -0.06219786778092384, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5928365582544257, |
|
"grad_norm": 0.3398829996585846, |
|
"learning_rate": 2.091556595453674e-07, |
|
"logits/chosen": 10.017583847045898, |
|
"logits/rejected": 10.271501541137695, |
|
"logps/chosen": -169.4973907470703, |
|
"logps/ref_chosen": -163.5751190185547, |
|
"logps/ref_rejected": -176.65078735351562, |
|
"logps/rejected": -183.13449096679688, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.059222809970378876, |
|
"rewards/margins": 0.005614194553345442, |
|
"rewards/rejected": -0.0648370012640953, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.606010703993413, |
|
"grad_norm": 0.5492353439331055, |
|
"learning_rate": 1.9763993430846392e-07, |
|
"logits/chosen": 10.156986236572266, |
|
"logits/rejected": 10.36589241027832, |
|
"logps/chosen": -168.26568603515625, |
|
"logps/ref_chosen": -162.17233276367188, |
|
"logps/ref_rejected": -169.48728942871094, |
|
"logps/rejected": -176.28375244140625, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.06093353033065796, |
|
"rewards/margins": 0.007031145039945841, |
|
"rewards/rejected": -0.06796467304229736, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.6191848497324002, |
|
"grad_norm": 0.361573725938797, |
|
"learning_rate": 1.862393078595809e-07, |
|
"logits/chosen": 10.36301040649414, |
|
"logits/rejected": 10.790814399719238, |
|
"logps/chosen": -164.67152404785156, |
|
"logps/ref_chosen": -158.01217651367188, |
|
"logps/ref_rejected": -170.02401733398438, |
|
"logps/rejected": -177.1544189453125, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -0.06659334897994995, |
|
"rewards/margins": 0.004710891284048557, |
|
"rewards/rejected": -0.07130423933267593, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6323589954713874, |
|
"grad_norm": 0.49494439363479614, |
|
"learning_rate": 1.7497884124991485e-07, |
|
"logits/chosen": 10.640326499938965, |
|
"logits/rejected": 10.784747123718262, |
|
"logps/chosen": -170.2487335205078, |
|
"logps/ref_chosen": -163.1754608154297, |
|
"logps/ref_rejected": -171.04629516601562, |
|
"logps/rejected": -178.30601501464844, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.07073251903057098, |
|
"rewards/margins": 0.0018647168762981892, |
|
"rewards/rejected": -0.07259723544120789, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.5513429045677185, |
|
"learning_rate": 1.6388328742891678e-07, |
|
"logits/chosen": 10.51612663269043, |
|
"logits/rejected": 10.971571922302246, |
|
"logps/chosen": -173.3045654296875, |
|
"logps/ref_chosen": -166.46066284179688, |
|
"logps/ref_rejected": -176.8931884765625, |
|
"logps/rejected": -183.9400634765625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.06843903660774231, |
|
"rewards/margins": 0.002029917435720563, |
|
"rewards/rejected": -0.0704689472913742, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.6587072869493619, |
|
"grad_norm": 0.4830932319164276, |
|
"learning_rate": 1.5297703683193753e-07, |
|
"logits/chosen": 10.228598594665527, |
|
"logits/rejected": 10.446922302246094, |
|
"logps/chosen": -160.33726501464844, |
|
"logps/ref_chosen": -154.5497589111328, |
|
"logps/ref_rejected": -163.0555419921875, |
|
"logps/rejected": -169.6492156982422, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.057875264436006546, |
|
"rewards/margins": 0.008061729371547699, |
|
"rewards/rejected": -0.06593699753284454, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6718814326883491, |
|
"grad_norm": 0.3345705568790436, |
|
"learning_rate": 1.422840637647574e-07, |
|
"logits/chosen": 9.768302917480469, |
|
"logits/rejected": 10.109855651855469, |
|
"logps/chosen": -163.43936157226562, |
|
"logps/ref_chosen": -156.50027465820312, |
|
"logps/ref_rejected": -165.34817504882812, |
|
"logps/rejected": -172.6494140625, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.06939102709293365, |
|
"rewards/margins": 0.003621481591835618, |
|
"rewards/rejected": -0.07301251590251923, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.6850555784273363, |
|
"grad_norm": 0.4939371347427368, |
|
"learning_rate": 1.3182787370285865e-07, |
|
"logits/chosen": 10.192159652709961, |
|
"logits/rejected": 10.675037384033203, |
|
"logps/chosen": -166.86045837402344, |
|
"logps/ref_chosen": -159.9600830078125, |
|
"logps/ref_rejected": -173.38265991210938, |
|
"logps/rejected": -180.2958526611328, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.06900367140769958, |
|
"rewards/margins": 0.00012808499741367996, |
|
"rewards/rejected": -0.0691317617893219, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6982297241663236, |
|
"grad_norm": 0.34416159987449646, |
|
"learning_rate": 1.2163145162128946e-07, |
|
"logits/chosen": 10.153275489807129, |
|
"logits/rejected": 10.229888916015625, |
|
"logps/chosen": -163.69931030273438, |
|
"logps/ref_chosen": -156.9239501953125, |
|
"logps/ref_rejected": -164.6415557861328, |
|
"logps/rejected": -171.66647338867188, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.06775350123643875, |
|
"rewards/margins": 0.002495494903996587, |
|
"rewards/rejected": -0.07024899125099182, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.7114038699053108, |
|
"grad_norm": 0.6642581820487976, |
|
"learning_rate": 1.1171721146870014e-07, |
|
"logits/chosen": 10.387247085571289, |
|
"logits/rejected": 10.734329223632812, |
|
"logps/chosen": -165.91758728027344, |
|
"logps/ref_chosen": -158.57778930664062, |
|
"logps/ref_rejected": -167.73760986328125, |
|
"logps/rejected": -174.94659423828125, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.07339778542518616, |
|
"rewards/margins": -0.0013078839983791113, |
|
"rewards/rejected": -0.07208990305662155, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.724578015644298, |
|
"grad_norm": 0.4084431529045105, |
|
"learning_rate": 1.0210694689661939e-07, |
|
"logits/chosen": 10.050434112548828, |
|
"logits/rejected": 10.472386360168457, |
|
"logps/chosen": -165.36961364746094, |
|
"logps/ref_chosen": -157.52191162109375, |
|
"logps/ref_rejected": -169.84109497070312, |
|
"logps/rejected": -177.8925018310547, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.07847694307565689, |
|
"rewards/margins": 0.0020371756982058287, |
|
"rewards/rejected": -0.08051411807537079, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 0.4536319971084595, |
|
"learning_rate": 9.282178335227883e-08, |
|
"logits/chosen": 10.111780166625977, |
|
"logits/rejected": 10.440515518188477, |
|
"logps/chosen": -165.53118896484375, |
|
"logps/ref_chosen": -157.51145935058594, |
|
"logps/ref_rejected": -171.0047607421875, |
|
"logps/rejected": -179.44081115722656, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -0.08019725978374481, |
|
"rewards/margins": 0.004163055680692196, |
|
"rewards/rejected": -0.08436032384634018, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7509263071222725, |
|
"grad_norm": 0.7599093914031982, |
|
"learning_rate": 8.388213164029459e-08, |
|
"logits/chosen": 10.463480949401855, |
|
"logits/rejected": 10.61793041229248, |
|
"logps/chosen": -163.78585815429688, |
|
"logps/ref_chosen": -155.50022888183594, |
|
"logps/ref_rejected": -163.588623046875, |
|
"logps/rejected": -171.88169860839844, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.08285625278949738, |
|
"rewards/margins": 7.443735376000404e-05, |
|
"rewards/rejected": -0.08293069899082184, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.7641004528612598, |
|
"grad_norm": 1.0936464071273804, |
|
"learning_rate": 7.530764305528958e-08, |
|
"logits/chosen": 10.068573951721191, |
|
"logits/rejected": 10.497234344482422, |
|
"logps/chosen": -168.2652587890625, |
|
"logps/ref_chosen": -160.71681213378906, |
|
"logps/ref_rejected": -171.31556701660156, |
|
"logps/rejected": -179.1130828857422, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.07548440247774124, |
|
"rewards/margins": 0.0024907987099140882, |
|
"rewards/rejected": -0.07797519862651825, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.7772745986002471, |
|
"grad_norm": 0.49573731422424316, |
|
"learning_rate": 6.711716618408281e-08, |
|
"logits/chosen": 10.322929382324219, |
|
"logits/rejected": 10.415237426757812, |
|
"logps/chosen": -176.02772521972656, |
|
"logps/ref_chosen": -168.35157775878906, |
|
"logps/ref_rejected": -173.06912231445312, |
|
"logps/rejected": -181.29351806640625, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.07676173746585846, |
|
"rewards/margins": 0.005482470151036978, |
|
"rewards/rejected": -0.08224420994520187, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.7904487443392343, |
|
"grad_norm": 0.6191822290420532, |
|
"learning_rate": 5.932870547240454e-08, |
|
"logits/chosen": 10.167850494384766, |
|
"logits/rejected": 10.512453079223633, |
|
"logps/chosen": -162.01792907714844, |
|
"logps/ref_chosen": -154.5463409423828, |
|
"logps/ref_rejected": -164.9440155029297, |
|
"logps/rejected": -172.77783203125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -0.07471606135368347, |
|
"rewards/margins": 0.0036221330519765615, |
|
"rewards/rejected": -0.07833817601203918, |
|
"step": 60 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|