|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.47426924660354053, |
|
"eval_steps": 500, |
|
"global_step": 36, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 1.0125823020935059, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": 9.990612030029297, |
|
"logits/rejected": 10.698101997375488, |
|
"logps/chosen": -102.88545989990234, |
|
"logps/ref_chosen": -102.88545989990234, |
|
"logps/ref_rejected": -121.84871673583984, |
|
"logps/rejected": -121.84871673583984, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.36753880977630615, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.3579196035861969, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.211905479431152, |
|
"logits/rejected": 11.06594467163086, |
|
"logps/chosen": -107.70349884033203, |
|
"logps/ref_chosen": -107.70349884033203, |
|
"logps/ref_rejected": -121.89966583251953, |
|
"logps/rejected": -121.89966583251953, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.41013145446777344, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.49040451645851135, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 10.035531044006348, |
|
"logits/rejected": 11.027185440063477, |
|
"logps/chosen": -108.23310852050781, |
|
"logps/ref_chosen": -107.98188781738281, |
|
"logps/ref_rejected": -124.51527404785156, |
|
"logps/rejected": -124.64785766601562, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0025122263468801975, |
|
"rewards/margins": -0.0011863748077303171, |
|
"rewards/rejected": -0.0013258515391498804, |
|
"sft_loss": 0.41194257140159607, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 0.8740162253379822, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 9.860024452209473, |
|
"logits/rejected": 10.876106262207031, |
|
"logps/chosen": -109.94369506835938, |
|
"logps/ref_chosen": -109.20836639404297, |
|
"logps/ref_rejected": -119.23908996582031, |
|
"logps/rejected": -119.73454284667969, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.3828125, |
|
"rewards/chosen": -0.007353362161666155, |
|
"rewards/margins": -0.002398767275735736, |
|
"rewards/rejected": -0.004954595118761063, |
|
"sft_loss": 0.40552011132240295, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 1.1980141401290894, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 10.19467830657959, |
|
"logits/rejected": 10.95050048828125, |
|
"logps/chosen": -104.02793884277344, |
|
"logps/ref_chosen": -103.87680053710938, |
|
"logps/ref_rejected": -118.41618347167969, |
|
"logps/rejected": -118.46170806884766, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.001511452835984528, |
|
"rewards/margins": -0.001056289067491889, |
|
"rewards/rejected": -0.0004551640013232827, |
|
"sft_loss": 0.3704559803009033, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 0.928102433681488, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 10.701957702636719, |
|
"logits/rejected": 11.477033615112305, |
|
"logps/chosen": -107.61714935302734, |
|
"logps/ref_chosen": -107.58968353271484, |
|
"logps/ref_rejected": -122.07303619384766, |
|
"logps/rejected": -122.0443115234375, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -0.0002746534300968051, |
|
"rewards/margins": -0.0005618570139631629, |
|
"rewards/rejected": 0.0002872035256586969, |
|
"sft_loss": 0.3909577429294586, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 1.0250380039215088, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": 10.025421142578125, |
|
"logits/rejected": 10.72871208190918, |
|
"logps/chosen": -107.13175201416016, |
|
"logps/ref_chosen": -107.42727661132812, |
|
"logps/ref_rejected": -116.87063598632812, |
|
"logps/rejected": -116.28421020507812, |
|
"loss": 0.4392, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/chosen": 0.002955180360004306, |
|
"rewards/margins": -0.0029091311153024435, |
|
"rewards/rejected": 0.005864311475306749, |
|
"sft_loss": 0.3753029406070709, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 0.5661666393280029, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.203546524047852, |
|
"logits/rejected": 11.103278160095215, |
|
"logps/chosen": -104.93194580078125, |
|
"logps/ref_chosen": -105.60282135009766, |
|
"logps/ref_rejected": -119.53916931152344, |
|
"logps/rejected": -118.93331909179688, |
|
"loss": 0.4416, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.006708861328661442, |
|
"rewards/margins": 0.0006504050688818097, |
|
"rewards/rejected": 0.006058456376194954, |
|
"sft_loss": 0.3787955939769745, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 0.820360541343689, |
|
"learning_rate": 4.997252228714278e-07, |
|
"logits/chosen": 10.184520721435547, |
|
"logits/rejected": 11.154094696044922, |
|
"logps/chosen": -104.26238250732422, |
|
"logps/ref_chosen": -105.46086120605469, |
|
"logps/ref_rejected": -119.00373840332031, |
|
"logps/rejected": -117.88744354248047, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.011985024437308311, |
|
"rewards/margins": 0.0008220230811275542, |
|
"rewards/rejected": 0.011163001880049706, |
|
"sft_loss": 0.38146448135375977, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 0.4781506061553955, |
|
"learning_rate": 4.989014955054745e-07, |
|
"logits/chosen": 10.042634963989258, |
|
"logits/rejected": 10.866905212402344, |
|
"logps/chosen": -101.11405944824219, |
|
"logps/ref_chosen": -104.21009826660156, |
|
"logps/ref_rejected": -118.9209213256836, |
|
"logps/rejected": -115.99314880371094, |
|
"loss": 0.4088, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.030960241332650185, |
|
"rewards/margins": 0.0016824830090627074, |
|
"rewards/rejected": 0.029277760535478592, |
|
"sft_loss": 0.3378788232803345, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 0.8178320527076721, |
|
"learning_rate": 4.975306286336627e-07, |
|
"logits/chosen": 9.987105369567871, |
|
"logits/rejected": 11.181533813476562, |
|
"logps/chosen": -101.77717590332031, |
|
"logps/ref_chosen": -105.94319152832031, |
|
"logps/ref_rejected": -122.76007843017578, |
|
"logps/rejected": -119.00365447998047, |
|
"loss": 0.4478, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.04166024178266525, |
|
"rewards/margins": 0.0040960111655294895, |
|
"rewards/rejected": 0.0375642292201519, |
|
"sft_loss": 0.3869646191596985, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 0.7931886315345764, |
|
"learning_rate": 4.956156357188939e-07, |
|
"logits/chosen": 9.913724899291992, |
|
"logits/rejected": 10.605714797973633, |
|
"logps/chosen": -104.08253479003906, |
|
"logps/ref_chosen": -109.08442687988281, |
|
"logps/ref_rejected": -121.41947174072266, |
|
"logps/rejected": -116.61964416503906, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.05001899227499962, |
|
"rewards/margins": 0.002020882908254862, |
|
"rewards/rejected": 0.04799811542034149, |
|
"sft_loss": 0.3560585379600525, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17126389460683408, |
|
"grad_norm": 0.21988148987293243, |
|
"learning_rate": 4.931607263312032e-07, |
|
"logits/chosen": 9.934953689575195, |
|
"logits/rejected": 11.010736465454102, |
|
"logps/chosen": -99.40065002441406, |
|
"logps/ref_chosen": -104.62150573730469, |
|
"logps/ref_rejected": -119.55384063720703, |
|
"logps/rejected": -114.51910400390625, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.05220862105488777, |
|
"rewards/margins": 0.0018612804124131799, |
|
"rewards/rejected": 0.05034734308719635, |
|
"sft_loss": 0.3703528344631195, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.25260093808174133, |
|
"learning_rate": 4.9017129689421e-07, |
|
"logits/chosen": 10.499589920043945, |
|
"logits/rejected": 11.620351791381836, |
|
"logps/chosen": -98.26732635498047, |
|
"logps/ref_chosen": -106.179443359375, |
|
"logps/ref_rejected": -120.73036193847656, |
|
"logps/rejected": -112.42403411865234, |
|
"loss": 0.415, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.07912100851535797, |
|
"rewards/margins": -0.00394221069291234, |
|
"rewards/rejected": 0.08306321501731873, |
|
"sft_loss": 0.3448493182659149, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.2699073553085327, |
|
"learning_rate": 4.866539188226085e-07, |
|
"logits/chosen": 9.888190269470215, |
|
"logits/rejected": 10.820487976074219, |
|
"logps/chosen": -96.35069274902344, |
|
"logps/ref_chosen": -105.70547485351562, |
|
"logps/ref_rejected": -118.89997863769531, |
|
"logps/rejected": -109.4480209350586, |
|
"loss": 0.4071, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.09354789555072784, |
|
"rewards/margins": -0.0009717608336359262, |
|
"rewards/rejected": 0.09451965987682343, |
|
"sft_loss": 0.3353506624698639, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2107863318237958, |
|
"grad_norm": 0.3605174422264099, |
|
"learning_rate": 4.826163240767716e-07, |
|
"logits/chosen": 10.686586380004883, |
|
"logits/rejected": 11.310283660888672, |
|
"logps/chosen": -97.58367919921875, |
|
"logps/ref_chosen": -108.86376953125, |
|
"logps/ref_rejected": -122.1635513305664, |
|
"logps/rejected": -111.56184387207031, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.11280079185962677, |
|
"rewards/margins": 0.006783789023756981, |
|
"rewards/rejected": 0.10601700842380524, |
|
"sft_loss": 0.35289227962493896, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22396047756278303, |
|
"grad_norm": 0.41622957587242126, |
|
"learning_rate": 4.780673881662242e-07, |
|
"logits/chosen": 10.20689582824707, |
|
"logits/rejected": 10.86001205444336, |
|
"logps/chosen": -91.06839752197266, |
|
"logps/ref_chosen": -102.93986511230469, |
|
"logps/ref_rejected": -119.43718719482422, |
|
"logps/rejected": -107.62686920166016, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.4296875, |
|
"rewards/chosen": 0.11871471256017685, |
|
"rewards/margins": 0.0006115150172263384, |
|
"rewards/rejected": 0.11810319125652313, |
|
"sft_loss": 0.3628237247467041, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.23713462330177026, |
|
"grad_norm": 0.36325603723526, |
|
"learning_rate": 4.730171106393466e-07, |
|
"logits/chosen": 10.428996086120605, |
|
"logits/rejected": 11.2207612991333, |
|
"logps/chosen": -91.17449951171875, |
|
"logps/ref_chosen": -103.81341552734375, |
|
"logps/ref_rejected": -117.45123291015625, |
|
"logps/rejected": -105.54415893554688, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.1263890266418457, |
|
"rewards/margins": 0.007318255491554737, |
|
"rewards/rejected": 0.11907076835632324, |
|
"sft_loss": 0.3411993384361267, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2503087690407575, |
|
"grad_norm": 0.3004375994205475, |
|
"learning_rate": 4.6747659310219757e-07, |
|
"logits/chosen": 10.341066360473633, |
|
"logits/rejected": 11.011281967163086, |
|
"logps/chosen": -95.462158203125, |
|
"logps/ref_chosen": -107.85797119140625, |
|
"logps/ref_rejected": -121.88042449951172, |
|
"logps/rejected": -109.14384460449219, |
|
"loss": 0.3995, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.12395807355642319, |
|
"rewards/margins": -0.0034077779855579138, |
|
"rewards/rejected": 0.1273658722639084, |
|
"sft_loss": 0.3255612552165985, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 0.3459661304950714, |
|
"learning_rate": 4.6145801481477433e-07, |
|
"logits/chosen": 10.744415283203125, |
|
"logits/rejected": 11.557382583618164, |
|
"logps/chosen": -90.79095458984375, |
|
"logps/ref_chosen": -103.42721557617188, |
|
"logps/ref_rejected": -116.7796630859375, |
|
"logps/rejected": -104.72395324707031, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.1263626366853714, |
|
"rewards/margins": 0.005805579479783773, |
|
"rewards/rejected": 0.12055706232786179, |
|
"sft_loss": 0.31898409128189087, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.23334962129592896, |
|
"learning_rate": 4.549746059183561e-07, |
|
"logits/chosen": 9.809565544128418, |
|
"logits/rejected": 10.91222095489502, |
|
"logps/chosen": -93.41632843017578, |
|
"logps/ref_chosen": -106.60163879394531, |
|
"logps/ref_rejected": -124.56562805175781, |
|
"logps/rejected": -110.83106994628906, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.13185299932956696, |
|
"rewards/margins": -0.0054924385622143745, |
|
"rewards/rejected": 0.13734543323516846, |
|
"sft_loss": 0.3142167329788208, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.28983120625771924, |
|
"grad_norm": 0.5167871713638306, |
|
"learning_rate": 4.480406183527823e-07, |
|
"logits/chosen": 10.247149467468262, |
|
"logits/rejected": 11.123760223388672, |
|
"logps/chosen": -89.27349090576172, |
|
"logps/ref_chosen": -103.77696228027344, |
|
"logps/ref_rejected": -118.73616027832031, |
|
"logps/rejected": -105.45307922363281, |
|
"loss": 0.4072, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.14503462612628937, |
|
"rewards/margins": 0.012203728780150414, |
|
"rewards/rejected": 0.1328309029340744, |
|
"sft_loss": 0.3371087312698364, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3030053519967065, |
|
"grad_norm": 0.18110381066799164, |
|
"learning_rate": 4.4067129452759546e-07, |
|
"logits/chosen": 10.112556457519531, |
|
"logits/rejected": 11.132116317749023, |
|
"logps/chosen": -89.66647338867188, |
|
"logps/ref_chosen": -104.72956085205078, |
|
"logps/ref_rejected": -121.35556030273438, |
|
"logps/rejected": -106.93888092041016, |
|
"loss": 0.4041, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.15063081681728363, |
|
"rewards/margins": 0.006464053876698017, |
|
"rewards/rejected": 0.1441667526960373, |
|
"sft_loss": 0.3324388861656189, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3161794977356937, |
|
"grad_norm": 0.2767745554447174, |
|
"learning_rate": 4.3288283381591725e-07, |
|
"logits/chosen": 10.138566017150879, |
|
"logits/rejected": 10.986135482788086, |
|
"logps/chosen": -90.00975799560547, |
|
"logps/ref_chosen": -105.88758087158203, |
|
"logps/ref_rejected": -125.69054412841797, |
|
"logps/rejected": -109.54483795166016, |
|
"loss": 0.3939, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.15877819061279297, |
|
"rewards/margins": -0.002678929828107357, |
|
"rewards/rejected": 0.1614571213722229, |
|
"sft_loss": 0.3185826539993286, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 0.2805613875389099, |
|
"learning_rate": 4.246923569447104e-07, |
|
"logits/chosen": 10.334699630737305, |
|
"logits/rejected": 11.066987037658691, |
|
"logps/chosen": -91.0733642578125, |
|
"logps/ref_chosen": -110.0761489868164, |
|
"logps/ref_rejected": -129.10540771484375, |
|
"logps/rejected": -110.36031341552734, |
|
"loss": 0.3872, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.1900278627872467, |
|
"rewards/margins": 0.0025771353393793106, |
|
"rewards/rejected": 0.18745073676109314, |
|
"sft_loss": 0.3108268082141876, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.34252778921366817, |
|
"grad_norm": 0.26198408007621765, |
|
"learning_rate": 4.161178683597054e-07, |
|
"logits/chosen": 10.408025741577148, |
|
"logits/rejected": 11.505026817321777, |
|
"logps/chosen": -83.76263427734375, |
|
"logps/ref_chosen": -103.74571990966797, |
|
"logps/ref_rejected": -120.73832702636719, |
|
"logps/rejected": -101.1318588256836, |
|
"loss": 0.3785, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": 0.19983090460300446, |
|
"rewards/margins": 0.003766256384551525, |
|
"rewards/rejected": 0.19606465101242065, |
|
"sft_loss": 0.30009642243385315, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3557019349526554, |
|
"grad_norm": 0.5107954144477844, |
|
"learning_rate": 4.0717821664772124e-07, |
|
"logits/chosen": 10.090536117553711, |
|
"logits/rejected": 11.338558197021484, |
|
"logps/chosen": -84.10513305664062, |
|
"logps/ref_chosen": -105.47428131103516, |
|
"logps/ref_rejected": -120.5193099975586, |
|
"logps/rejected": -99.99079895019531, |
|
"loss": 0.399, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.21369142830371857, |
|
"rewards/margins": 0.008406372740864754, |
|
"rewards/rejected": 0.20528505742549896, |
|
"sft_loss": 0.32625776529312134, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.14193640649318695, |
|
"learning_rate": 3.978930531033806e-07, |
|
"logits/chosen": 9.690971374511719, |
|
"logits/rejected": 10.854898452758789, |
|
"logps/chosen": -82.89115905761719, |
|
"logps/ref_chosen": -103.72540283203125, |
|
"logps/ref_rejected": -119.79557800292969, |
|
"logps/rejected": -98.77869415283203, |
|
"loss": 0.3661, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.20834232866764069, |
|
"rewards/margins": -0.0018264743266627192, |
|
"rewards/rejected": 0.2101687788963318, |
|
"sft_loss": 0.2838048040866852, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3820502264306299, |
|
"grad_norm": 0.22989627718925476, |
|
"learning_rate": 3.882827885312998e-07, |
|
"logits/chosen": 10.156278610229492, |
|
"logits/rejected": 11.227837562561035, |
|
"logps/chosen": -87.06941223144531, |
|
"logps/ref_chosen": -108.65434265136719, |
|
"logps/ref_rejected": -121.46784973144531, |
|
"logps/rejected": -100.8239974975586, |
|
"loss": 0.3674, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.2158493846654892, |
|
"rewards/margins": 0.00941087119281292, |
|
"rewards/rejected": 0.20643851161003113, |
|
"sft_loss": 0.28684642910957336, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 0.1578030288219452, |
|
"learning_rate": 3.7836854837871044e-07, |
|
"logits/chosen": 10.308172225952148, |
|
"logits/rejected": 11.710267066955566, |
|
"logps/chosen": -80.15604400634766, |
|
"logps/ref_chosen": -103.62174224853516, |
|
"logps/ref_rejected": -126.73807525634766, |
|
"logps/rejected": -104.45430755615234, |
|
"loss": 0.381, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.23465700447559357, |
|
"rewards/margins": 0.011819422245025635, |
|
"rewards/rejected": 0.22283759713172913, |
|
"sft_loss": 0.30422142148017883, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4083985179086044, |
|
"grad_norm": 0.19974292814731598, |
|
"learning_rate": 3.681721262971413e-07, |
|
"logits/chosen": 9.940515518188477, |
|
"logits/rejected": 10.964177131652832, |
|
"logps/chosen": -82.63470458984375, |
|
"logps/ref_chosen": -106.10479736328125, |
|
"logps/ref_rejected": -120.6382827758789, |
|
"logps/rejected": -98.44656372070312, |
|
"loss": 0.382, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.2347009927034378, |
|
"rewards/margins": 0.012783760204911232, |
|
"rewards/rejected": 0.22191724181175232, |
|
"sft_loss": 0.30557647347450256, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4215726636475916, |
|
"grad_norm": 0.1747150868177414, |
|
"learning_rate": 3.577159362352426e-07, |
|
"logits/chosen": 10.107948303222656, |
|
"logits/rejected": 11.489829063415527, |
|
"logps/chosen": -83.92710876464844, |
|
"logps/ref_chosen": -105.99569702148438, |
|
"logps/ref_rejected": -128.34303283691406, |
|
"logps/rejected": -106.02722930908203, |
|
"loss": 0.3718, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": 0.22068597376346588, |
|
"rewards/margins": -0.0024720439687371254, |
|
"rewards/rejected": 0.22315803170204163, |
|
"sft_loss": 0.2908666133880615, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.43474680938657884, |
|
"grad_norm": 0.2229217141866684, |
|
"learning_rate": 3.470229631680624e-07, |
|
"logits/chosen": 10.121207237243652, |
|
"logits/rejected": 10.942390441894531, |
|
"logps/chosen": -83.12457275390625, |
|
"logps/ref_chosen": -105.72196197509766, |
|
"logps/ref_rejected": -121.59507751464844, |
|
"logps/rejected": -98.94662475585938, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.22597381472587585, |
|
"rewards/margins": -0.0005105836316943169, |
|
"rewards/rejected": 0.22648443281650543, |
|
"sft_loss": 0.27406632900238037, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.44792095512556607, |
|
"grad_norm": 0.2622174620628357, |
|
"learning_rate": 3.361167125710832e-07, |
|
"logits/chosen": 10.276717185974121, |
|
"logits/rejected": 11.122960090637207, |
|
"logps/chosen": -87.3719253540039, |
|
"logps/ref_chosen": -111.4834976196289, |
|
"logps/ref_rejected": -130.48089599609375, |
|
"logps/rejected": -105.93075561523438, |
|
"loss": 0.3875, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.24111570417881012, |
|
"rewards/margins": -0.004385640844702721, |
|
"rewards/rejected": 0.24550136923789978, |
|
"sft_loss": 0.3102506101131439, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.2475651353597641, |
|
"learning_rate": 3.2502115875008516e-07, |
|
"logits/chosen": 10.531122207641602, |
|
"logits/rejected": 11.501781463623047, |
|
"logps/chosen": -84.39804077148438, |
|
"logps/ref_chosen": -108.9183349609375, |
|
"logps/ref_rejected": -121.32493591308594, |
|
"logps/rejected": -97.67329406738281, |
|
"loss": 0.363, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.24520304799079895, |
|
"rewards/margins": 0.008686644956469536, |
|
"rewards/rejected": 0.23651635646820068, |
|
"sft_loss": 0.28123030066490173, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.47426924660354053, |
|
"grad_norm": 0.37637782096862793, |
|
"learning_rate": 3.137606921404191e-07, |
|
"logits/chosen": 10.115397453308105, |
|
"logits/rejected": 10.798471450805664, |
|
"logps/chosen": -82.94326782226562, |
|
"logps/ref_chosen": -107.1411361694336, |
|
"logps/ref_rejected": -118.66165161132812, |
|
"logps/rejected": -94.65679931640625, |
|
"loss": 0.374, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.24197861552238464, |
|
"rewards/margins": 0.0019301516003906727, |
|
"rewards/rejected": 0.24004849791526794, |
|
"sft_loss": 0.2941214442253113, |
|
"step": 36 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|