|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0421052631578946, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.021052631578947368, |
|
"grad_norm": 1.224082589149475, |
|
"kl": 0.0, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": 270973525.3333333, |
|
"logits/rejected": 308584228.5714286, |
|
"logps/chosen": -743.7762586805555, |
|
"logps/rejected": -327.42550223214283, |
|
"loss": 0.5, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.042105263157894736, |
|
"grad_norm": 1.7011622190475464, |
|
"kl": 0.08848989009857178, |
|
"learning_rate": 4.999405067699773e-06, |
|
"logits/chosen": 283945024.0, |
|
"logits/rejected": 300706848.0, |
|
"logps/chosen": -670.29150390625, |
|
"logps/rejected": -340.8790283203125, |
|
"loss": 0.5072, |
|
"rewards/chosen": -0.04161600396037102, |
|
"rewards/margins": -0.051780181005597115, |
|
"rewards/rejected": 0.010164177045226097, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06315789473684211, |
|
"grad_norm": 1.2693746089935303, |
|
"kl": 0.17015418410301208, |
|
"learning_rate": 4.997620553954645e-06, |
|
"logits/chosen": 268529444.5714286, |
|
"logits/rejected": 305261397.3333333, |
|
"logps/chosen": -813.0807756696429, |
|
"logps/rejected": -319.21929253472223, |
|
"loss": 0.496, |
|
"rewards/chosen": 0.018497141344206675, |
|
"rewards/margins": 0.028634450974918547, |
|
"rewards/rejected": -0.010137309630711874, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08421052631578947, |
|
"grad_norm": 2.279048442840576, |
|
"kl": 0.06164896488189697, |
|
"learning_rate": 4.994647308096509e-06, |
|
"logits/chosen": 252110563.55555555, |
|
"logits/rejected": 306722450.28571427, |
|
"logps/chosen": -748.6314019097222, |
|
"logps/rejected": -325.29725864955356, |
|
"loss": 0.4976, |
|
"rewards/chosen": 0.011831367181407081, |
|
"rewards/margins": 0.011405953797437841, |
|
"rewards/rejected": 0.0004254133839692388, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"grad_norm": 1.5578685998916626, |
|
"kl": 0.06295323371887207, |
|
"learning_rate": 4.990486745229364e-06, |
|
"logits/chosen": 292113728.0, |
|
"logits/rejected": 316883904.0, |
|
"logps/chosen": -815.0404663085938, |
|
"logps/rejected": -337.57080078125, |
|
"loss": 0.499, |
|
"rewards/chosen": 0.006664060987532139, |
|
"rewards/margins": 0.011198383755981922, |
|
"rewards/rejected": -0.004534322768449783, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12631578947368421, |
|
"grad_norm": 1.006475567817688, |
|
"kl": 0.049591064453125, |
|
"learning_rate": 4.985140845555799e-06, |
|
"logits/chosen": 306853741.71428573, |
|
"logits/rejected": 325227320.8888889, |
|
"logps/chosen": -479.3779994419643, |
|
"logps/rejected": -286.5568576388889, |
|
"loss": 0.4958, |
|
"rewards/chosen": 0.026198712842805044, |
|
"rewards/margins": 0.030564389649837737, |
|
"rewards/rejected": -0.004365676807032691, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.14736842105263157, |
|
"grad_norm": 1.2592947483062744, |
|
"kl": 0.15429818630218506, |
|
"learning_rate": 4.978612153434527e-06, |
|
"logits/chosen": 305409763.5555556, |
|
"logits/rejected": 309714139.4285714, |
|
"logps/chosen": -609.1155056423611, |
|
"logps/rejected": -277.2419956752232, |
|
"loss": 0.4977, |
|
"rewards/chosen": 0.01816416945722368, |
|
"rewards/margins": 0.028014377705634586, |
|
"rewards/rejected": -0.009850208248410906, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.16842105263157894, |
|
"grad_norm": 1.420854926109314, |
|
"kl": 0.20131784677505493, |
|
"learning_rate": 4.970903776169403e-06, |
|
"logits/chosen": 300502112.0, |
|
"logits/rejected": 337999712.0, |
|
"logps/chosen": -599.28125, |
|
"logps/rejected": -321.2285461425781, |
|
"loss": 0.5003, |
|
"rewards/chosen": 0.006110990885645151, |
|
"rewards/margins": 0.016330440063029528, |
|
"rewards/rejected": -0.010219449177384377, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.18947368421052632, |
|
"grad_norm": 1.0294359922409058, |
|
"kl": 0.15416035056114197, |
|
"learning_rate": 4.962019382530521e-06, |
|
"logits/chosen": 280579858.28571427, |
|
"logits/rejected": 310000554.6666667, |
|
"logps/chosen": -656.5274135044643, |
|
"logps/rejected": -314.7470703125, |
|
"loss": 0.4943, |
|
"rewards/chosen": 0.05247082880565098, |
|
"rewards/margins": 0.061781181111222216, |
|
"rewards/rejected": -0.009310352305571238, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 1.8129311800003052, |
|
"kl": 0.1927608847618103, |
|
"learning_rate": 4.9519632010080765e-06, |
|
"logits/chosen": 238834005.33333334, |
|
"logits/rejected": 301916818.28571427, |
|
"logps/chosen": -759.1360134548611, |
|
"logps/rejected": -292.9808872767857, |
|
"loss": 0.5004, |
|
"rewards/chosen": 0.014881134033203125, |
|
"rewards/margins": 0.019827809184789658, |
|
"rewards/rejected": -0.004946675151586533, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23157894736842105, |
|
"grad_norm": 1.232249140739441, |
|
"kl": 0.23234114050865173, |
|
"learning_rate": 4.9407400177998335e-06, |
|
"logits/chosen": 286699296.0, |
|
"logits/rejected": 300436576.0, |
|
"logps/chosen": -627.421875, |
|
"logps/rejected": -282.529296875, |
|
"loss": 0.4981, |
|
"rewards/chosen": 0.03665875270962715, |
|
"rewards/margins": 0.0362735278904438, |
|
"rewards/rejected": 0.0003852248191833496, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.25263157894736843, |
|
"grad_norm": 1.2517226934432983, |
|
"kl": 0.2269219160079956, |
|
"learning_rate": 4.928355174533153e-06, |
|
"logits/chosen": 283396937.14285713, |
|
"logits/rejected": 298269838.2222222, |
|
"logps/chosen": -700.8916015625, |
|
"logps/rejected": -346.30750868055554, |
|
"loss": 0.4953, |
|
"rewards/chosen": 0.052171528339385986, |
|
"rewards/margins": 0.05235843691560957, |
|
"rewards/rejected": -0.00018690857622358535, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2736842105263158, |
|
"grad_norm": 1.2330269813537598, |
|
"kl": 0.2519031763076782, |
|
"learning_rate": 4.914814565722671e-06, |
|
"logits/chosen": 302669795.5555556, |
|
"logits/rejected": 313485238.85714287, |
|
"logps/chosen": -654.2307942708334, |
|
"logps/rejected": -330.16427176339283, |
|
"loss": 0.4958, |
|
"rewards/chosen": 0.0363319648636712, |
|
"rewards/margins": 0.07227063652068849, |
|
"rewards/rejected": -0.0359386716570173, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.29473684210526313, |
|
"grad_norm": 1.4144365787506104, |
|
"kl": 0.130226731300354, |
|
"learning_rate": 4.900124635964823e-06, |
|
"logits/chosen": 276869248.0, |
|
"logits/rejected": 289425024.0, |
|
"logps/chosen": -519.8916015625, |
|
"logps/rejected": -265.8385314941406, |
|
"loss": 0.496, |
|
"rewards/chosen": 0.03466583415865898, |
|
"rewards/margins": 0.0368356395047158, |
|
"rewards/rejected": -0.002169805346056819, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3157894736842105, |
|
"grad_norm": 1.305309772491455, |
|
"kl": 0.12101024389266968, |
|
"learning_rate": 4.884292376870567e-06, |
|
"logits/chosen": 293158875.4285714, |
|
"logits/rejected": 304345201.7777778, |
|
"logps/chosen": -500.9711216517857, |
|
"logps/rejected": -344.5473361545139, |
|
"loss": 0.4944, |
|
"rewards/chosen": 0.06058854716164725, |
|
"rewards/margins": 0.07498784883627815, |
|
"rewards/rejected": -0.014399301674630906, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"grad_norm": 1.447916865348816, |
|
"kl": 0.1458943486213684, |
|
"learning_rate": 4.867325323737765e-06, |
|
"logits/chosen": 290241507.5555556, |
|
"logits/rejected": 316717494.85714287, |
|
"logps/chosen": -713.4215494791666, |
|
"logps/rejected": -295.7858189174107, |
|
"loss": 0.4897, |
|
"rewards/chosen": 0.08352628681394789, |
|
"rewards/margins": 0.09608901836096294, |
|
"rewards/rejected": -0.012562731547015054, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.35789473684210527, |
|
"grad_norm": 1.3701616525650024, |
|
"kl": 0.3826329708099365, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": 279923008.0, |
|
"logits/rejected": 295235712.0, |
|
"logps/chosen": -549.1046752929688, |
|
"logps/rejected": -335.35260009765625, |
|
"loss": 0.4927, |
|
"rewards/chosen": 0.06883127987384796, |
|
"rewards/margins": 0.08687522634863853, |
|
"rewards/rejected": -0.018043946474790573, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.37894736842105264, |
|
"grad_norm": 1.467745065689087, |
|
"kl": 0.5873703956604004, |
|
"learning_rate": 4.830019673206997e-06, |
|
"logits/chosen": 278496658.28571427, |
|
"logits/rejected": 300650268.4444444, |
|
"logps/chosen": -629.6449497767857, |
|
"logps/rejected": -360.1111653645833, |
|
"loss": 0.4923, |
|
"rewards/chosen": 0.06821728178433009, |
|
"rewards/margins": 0.0920389105403234, |
|
"rewards/rejected": -0.023821628755993314, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9833446741104126, |
|
"kl": 0.7327308654785156, |
|
"learning_rate": 4.809698831278217e-06, |
|
"logits/chosen": 277803520.0, |
|
"logits/rejected": 308114651.4285714, |
|
"logps/chosen": -737.9443901909722, |
|
"logps/rejected": -338.15098353794644, |
|
"loss": 0.4907, |
|
"rewards/chosen": 0.10881086852815416, |
|
"rewards/margins": 0.15786849695538718, |
|
"rewards/rejected": -0.04905762842723301, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 1.154601812362671, |
|
"kl": 0.1896182894706726, |
|
"learning_rate": 4.788278697798619e-06, |
|
"logits/chosen": 283599232.0, |
|
"logits/rejected": 313346368.0, |
|
"logps/chosen": -707.3074951171875, |
|
"logps/rejected": -316.3603515625, |
|
"loss": 0.4943, |
|
"rewards/chosen": 0.04999881610274315, |
|
"rewards/margins": 0.0727224051952362, |
|
"rewards/rejected": -0.022723589092493057, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4421052631578947, |
|
"grad_norm": 1.574962854385376, |
|
"kl": 0.4762837886810303, |
|
"learning_rate": 4.765769467591626e-06, |
|
"logits/chosen": 287853293.71428573, |
|
"logits/rejected": 302228480.0, |
|
"logps/chosen": -572.1301618303571, |
|
"logps/rejected": -282.95513237847223, |
|
"loss": 0.488, |
|
"rewards/chosen": 0.12458467483520508, |
|
"rewards/margins": 0.15258528788884482, |
|
"rewards/rejected": -0.02800061305363973, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4631578947368421, |
|
"grad_norm": 1.5850838422775269, |
|
"kl": 0.5586809515953064, |
|
"learning_rate": 4.742181853831721e-06, |
|
"logits/chosen": 290416867.5555556, |
|
"logits/rejected": 309413156.5714286, |
|
"logps/chosen": -668.2986653645834, |
|
"logps/rejected": -324.6029575892857, |
|
"loss": 0.4884, |
|
"rewards/chosen": 0.09367326895395915, |
|
"rewards/margins": 0.1388323534102667, |
|
"rewards/rejected": -0.04515908445630755, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4842105263157895, |
|
"grad_norm": 1.4497779607772827, |
|
"kl": 0.5230355262756348, |
|
"learning_rate": 4.717527082945555e-06, |
|
"logits/chosen": 280489216.0, |
|
"logits/rejected": 309412736.0, |
|
"logps/chosen": -682.4186401367188, |
|
"logps/rejected": -332.3192138671875, |
|
"loss": 0.488, |
|
"rewards/chosen": 0.1085066944360733, |
|
"rewards/margins": 0.14131877198815346, |
|
"rewards/rejected": -0.032812077552080154, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 1.3657130002975464, |
|
"kl": 0.5021036863327026, |
|
"learning_rate": 4.69181688926877e-06, |
|
"logits/chosen": 239562038.85714287, |
|
"logits/rejected": 313432405.3333333, |
|
"logps/chosen": -761.9135044642857, |
|
"logps/rejected": -289.5192057291667, |
|
"loss": 0.4848, |
|
"rewards/chosen": 0.1538386004311698, |
|
"rewards/margins": 0.17894491955401406, |
|
"rewards/rejected": -0.02510631912284427, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 1.446842074394226, |
|
"kl": 0.6509883999824524, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": 293611036.4444444, |
|
"logits/rejected": 316255341.71428573, |
|
"logps/chosen": -721.4010416666666, |
|
"logps/rejected": -285.1228724888393, |
|
"loss": 0.4881, |
|
"rewards/chosen": 0.12438484032948811, |
|
"rewards/margins": 0.1453892659573328, |
|
"rewards/rejected": -0.021004425627844676, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5473684210526316, |
|
"grad_norm": 1.2756037712097168, |
|
"kl": 0.4022580087184906, |
|
"learning_rate": 4.637279676682367e-06, |
|
"logits/chosen": 292368704.0, |
|
"logits/rejected": 317329792.0, |
|
"logps/chosen": -469.4183044433594, |
|
"logps/rejected": -302.54144287109375, |
|
"loss": 0.4966, |
|
"rewards/chosen": 0.07243937253952026, |
|
"rewards/margins": 0.10708872973918915, |
|
"rewards/rejected": -0.034649357199668884, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.5684210526315789, |
|
"grad_norm": 1.2571344375610352, |
|
"kl": 1.006415605545044, |
|
"learning_rate": 4.608478614532215e-06, |
|
"logits/chosen": 252147291.42857143, |
|
"logits/rejected": 306243811.5555556, |
|
"logps/chosen": -751.1729910714286, |
|
"logps/rejected": -298.89708116319446, |
|
"loss": 0.4868, |
|
"rewards/chosen": 0.15643044880458287, |
|
"rewards/margins": 0.2008941164092412, |
|
"rewards/rejected": -0.044463667604658336, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5894736842105263, |
|
"grad_norm": 1.2348135709762573, |
|
"kl": 0.844231903553009, |
|
"learning_rate": 4.578674030756364e-06, |
|
"logits/chosen": 328972714.6666667, |
|
"logits/rejected": 334514980.5714286, |
|
"logps/chosen": -661.2038302951389, |
|
"logps/rejected": -348.25048828125, |
|
"loss": 0.4897, |
|
"rewards/chosen": 0.1231810384326511, |
|
"rewards/margins": 0.1448404531157206, |
|
"rewards/rejected": -0.021659414683069502, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6105263157894737, |
|
"grad_norm": 1.038001298904419, |
|
"kl": 0.5513710379600525, |
|
"learning_rate": 4.54788011072248e-06, |
|
"logits/chosen": 276993920.0, |
|
"logits/rejected": 302539648.0, |
|
"logps/chosen": -632.3786010742188, |
|
"logps/rejected": -318.97100830078125, |
|
"loss": 0.489, |
|
"rewards/chosen": 0.10169073939323425, |
|
"rewards/margins": 0.1353834606707096, |
|
"rewards/rejected": -0.03369272127747536, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.631578947368421, |
|
"grad_norm": 1.5136151313781738, |
|
"kl": 0.7220326066017151, |
|
"learning_rate": 4.516111510668707e-06, |
|
"logits/chosen": 276634130.28571427, |
|
"logits/rejected": 300450048.0, |
|
"logps/chosen": -667.6506696428571, |
|
"logps/rejected": -341.21375868055554, |
|
"loss": 0.4831, |
|
"rewards/chosen": 0.2004882778440203, |
|
"rewards/margins": 0.24258499533411054, |
|
"rewards/rejected": -0.042096717490090266, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6526315789473685, |
|
"grad_norm": 1.0843844413757324, |
|
"kl": 0.6163355112075806, |
|
"learning_rate": 4.4833833507280884e-06, |
|
"logits/chosen": 307501454.2222222, |
|
"logits/rejected": 303213787.4285714, |
|
"logps/chosen": -486.1819661458333, |
|
"logps/rejected": -337.0064174107143, |
|
"loss": 0.4833, |
|
"rewards/chosen": 0.12439311875237359, |
|
"rewards/margins": 0.1898724100892506, |
|
"rewards/rejected": -0.06547929133687701, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"grad_norm": 1.1725709438323975, |
|
"kl": 0.6453120708465576, |
|
"learning_rate": 4.4497112077322045e-06, |
|
"logits/chosen": 314310528.0, |
|
"logits/rejected": 323297440.0, |
|
"logps/chosen": -570.1083374023438, |
|
"logps/rejected": -277.752197265625, |
|
"loss": 0.4858, |
|
"rewards/chosen": 0.13651692867279053, |
|
"rewards/margins": 0.190122302621603, |
|
"rewards/rejected": -0.053605373948812485, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6947368421052632, |
|
"grad_norm": 1.653479814529419, |
|
"kl": 1.0490498542785645, |
|
"learning_rate": 4.415111107797445e-06, |
|
"logits/chosen": 311787922.28571427, |
|
"logits/rejected": 350929635.5555556, |
|
"logps/chosen": -676.2388392857143, |
|
"logps/rejected": -374.4116482204861, |
|
"loss": 0.4785, |
|
"rewards/chosen": 0.2800070898873465, |
|
"rewards/margins": 0.29183324911291636, |
|
"rewards/rejected": -0.011826159225569831, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7157894736842105, |
|
"grad_norm": 1.5196908712387085, |
|
"kl": 0.8070676922798157, |
|
"learning_rate": 4.379599518697444e-06, |
|
"logits/chosen": 278536049.7777778, |
|
"logits/rejected": 313191350.85714287, |
|
"logps/chosen": -602.8457573784722, |
|
"logps/rejected": -309.17257254464283, |
|
"loss": 0.475, |
|
"rewards/chosen": 0.19871669345431858, |
|
"rewards/margins": 0.28476871952177985, |
|
"rewards/rejected": -0.08605202606746129, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7368421052631579, |
|
"grad_norm": 1.0484706163406372, |
|
"kl": 0.807353138923645, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits/chosen": 299660160.0, |
|
"logits/rejected": 306065632.0, |
|
"logps/chosen": -465.7568359375, |
|
"logps/rejected": -229.71621704101562, |
|
"loss": 0.4919, |
|
"rewards/chosen": 0.11409495025873184, |
|
"rewards/margins": 0.16953209787607193, |
|
"rewards/rejected": -0.05543714761734009, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7578947368421053, |
|
"grad_norm": 1.3274704217910767, |
|
"kl": 0.8778089880943298, |
|
"learning_rate": 4.305909905149389e-06, |
|
"logits/chosen": 270111561.14285713, |
|
"logits/rejected": 306747079.1111111, |
|
"logps/chosen": -576.6547502790179, |
|
"logps/rejected": -307.5883517795139, |
|
"loss": 0.4756, |
|
"rewards/chosen": 0.23983100482395717, |
|
"rewards/margins": 0.28282778887521653, |
|
"rewards/rejected": -0.04299678405125936, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7789473684210526, |
|
"grad_norm": 1.6128290891647339, |
|
"kl": 1.20198392868042, |
|
"learning_rate": 4.267766952966369e-06, |
|
"logits/chosen": 279583146.6666667, |
|
"logits/rejected": 321376694.85714287, |
|
"logps/chosen": -716.5959201388889, |
|
"logps/rejected": -284.7782505580357, |
|
"loss": 0.4762, |
|
"rewards/chosen": 0.2411472267574734, |
|
"rewards/margins": 0.34859214321015375, |
|
"rewards/rejected": -0.10744491645268031, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.4675477743148804, |
|
"kl": 0.7991436719894409, |
|
"learning_rate": 4.228782639455674e-06, |
|
"logits/chosen": 289123392.0, |
|
"logits/rejected": 312878304.0, |
|
"logps/chosen": -548.8831176757812, |
|
"logps/rejected": -298.0077819824219, |
|
"loss": 0.4854, |
|
"rewards/chosen": 0.1557559221982956, |
|
"rewards/margins": 0.20764141902327538, |
|
"rewards/rejected": -0.05188549682497978, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8210526315789474, |
|
"grad_norm": 1.5274215936660767, |
|
"kl": 1.3201950788497925, |
|
"learning_rate": 4.188975519039151e-06, |
|
"logits/chosen": 306054198.85714287, |
|
"logits/rejected": 318293873.7777778, |
|
"logps/chosen": -641.423828125, |
|
"logps/rejected": -323.07017686631946, |
|
"loss": 0.4675, |
|
"rewards/chosen": 0.2957319532121931, |
|
"rewards/margins": 0.35744103268971517, |
|
"rewards/rejected": -0.061709079477522105, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 1.8003302812576294, |
|
"kl": 1.431631326675415, |
|
"learning_rate": 4.1483645377501726e-06, |
|
"logits/chosen": 286558065.7777778, |
|
"logits/rejected": 281730944.0, |
|
"logps/chosen": -683.1265190972222, |
|
"logps/rejected": -394.296875, |
|
"loss": 0.4595, |
|
"rewards/chosen": 0.2565513981713189, |
|
"rewards/margins": 0.4815535848102872, |
|
"rewards/rejected": -0.22500218663896834, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8631578947368421, |
|
"grad_norm": 1.385386347770691, |
|
"kl": 1.2051702737808228, |
|
"learning_rate": 4.106969024216348e-06, |
|
"logits/chosen": 297838432.0, |
|
"logits/rejected": 317846336.0, |
|
"logps/chosen": -683.0381469726562, |
|
"logps/rejected": -333.70623779296875, |
|
"loss": 0.4735, |
|
"rewards/chosen": 0.30369484424591064, |
|
"rewards/margins": 0.38917434215545654, |
|
"rewards/rejected": -0.0854794979095459, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8842105263157894, |
|
"grad_norm": 2.4110379219055176, |
|
"kl": 1.4937351942062378, |
|
"learning_rate": 4.064808680460149e-06, |
|
"logits/chosen": 275613819.5862069, |
|
"logits/rejected": 329032821.0285714, |
|
"logps/chosen": -821.4407327586207, |
|
"logps/rejected": -363.7448660714286, |
|
"loss": 0.4399, |
|
"rewards/chosen": 0.4741830825805664, |
|
"rewards/margins": 0.6639985765729631, |
|
"rewards/rejected": -0.18981549399239675, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9052631578947369, |
|
"grad_norm": 1.4786880016326904, |
|
"kl": 1.3580402135849, |
|
"learning_rate": 4.021903572521802e-06, |
|
"logits/chosen": 292805532.9032258, |
|
"logits/rejected": 320448667.1515151, |
|
"logps/chosen": -615.5443548387096, |
|
"logps/rejected": -311.78767163825756, |
|
"loss": 0.4669, |
|
"rewards/chosen": 0.3574201829971806, |
|
"rewards/margins": 0.4636997006622344, |
|
"rewards/rejected": -0.10627951766505386, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.9263157894736842, |
|
"grad_norm": 1.0773484706878662, |
|
"kl": 1.1364717483520508, |
|
"learning_rate": 3.978274120908957e-06, |
|
"logits/chosen": 298700241.45454544, |
|
"logits/rejected": 323973846.7096774, |
|
"logps/chosen": -521.9517045454545, |
|
"logps/rejected": -351.1326864919355, |
|
"loss": 0.4819, |
|
"rewards/chosen": 0.21399710395119406, |
|
"rewards/margins": 0.30352772575669273, |
|
"rewards/rejected": -0.08953062180549867, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.9473684210526315, |
|
"grad_norm": 1.8579978942871094, |
|
"kl": 1.5150351524353027, |
|
"learning_rate": 3.933941090877615e-06, |
|
"logits/chosen": 250053461.33333334, |
|
"logits/rejected": 297370359.7419355, |
|
"logps/chosen": -674.4289180871212, |
|
"logps/rejected": -328.01861769153226, |
|
"loss": 0.4562, |
|
"rewards/chosen": 0.3749615929343484, |
|
"rewards/margins": 0.5518351999545726, |
|
"rewards/rejected": -0.17687360702022428, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.968421052631579, |
|
"grad_norm": 1.442551612854004, |
|
"kl": 1.4570834636688232, |
|
"learning_rate": 3.888925582549006e-06, |
|
"logits/chosen": 296643956.3636364, |
|
"logits/rejected": 300621658.83870965, |
|
"logps/chosen": -726.8507339015151, |
|
"logps/rejected": -310.6574470766129, |
|
"loss": 0.4584, |
|
"rewards/chosen": 0.3162169022993608, |
|
"rewards/margins": 0.4777500119027504, |
|
"rewards/rejected": -0.16153310960338962, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.9894736842105263, |
|
"grad_norm": 1.5954558849334717, |
|
"kl": 1.3480093479156494, |
|
"learning_rate": 3.8432490208670605e-06, |
|
"logits/chosen": 266226656.96969697, |
|
"logits/rejected": 327847011.0967742, |
|
"logps/chosen": -818.7182765151515, |
|
"logps/rejected": -304.5207283266129, |
|
"loss": 0.4383, |
|
"rewards/chosen": 0.4229244463371508, |
|
"rewards/margins": 0.5965663433541067, |
|
"rewards/rejected": -0.17364189701695595, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6833202838897705, |
|
"kl": 0.490053653717041, |
|
"learning_rate": 3.796933145401304e-06, |
|
"logits/chosen": 347558865.45454544, |
|
"logits/rejected": 323800656.84210527, |
|
"logps/chosen": -481.98979048295456, |
|
"logps/rejected": -379.3189761513158, |
|
"loss": 0.231, |
|
"rewards/chosen": 0.20139399441805753, |
|
"rewards/margins": 0.35350871200196476, |
|
"rewards/rejected": -0.15211471758390727, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0210526315789474, |
|
"grad_norm": 1.429196834564209, |
|
"kl": 2.1841373443603516, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": 271206684.4444444, |
|
"logits/rejected": 308082724.5714286, |
|
"logps/chosen": -739.5323893229166, |
|
"logps/rejected": -329.6499720982143, |
|
"loss": 0.452, |
|
"rewards/chosen": 0.4243852562374539, |
|
"rewards/margins": 0.6468354274356176, |
|
"rewards/rejected": -0.2224501711981637, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.0421052631578946, |
|
"grad_norm": 1.9531340599060059, |
|
"kl": 2.2712414264678955, |
|
"learning_rate": 3.7024719222984696e-06, |
|
"logits/chosen": 283599008.0, |
|
"logits/rejected": 300947008.0, |
|
"logps/chosen": -664.508056640625, |
|
"logps/rejected": -342.87835693359375, |
|
"loss": 0.4543, |
|
"rewards/chosen": 0.5367215871810913, |
|
"rewards/margins": 0.7264900505542755, |
|
"rewards/rejected": -0.1897684633731842, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 144, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|