|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9993753903810119, |
|
"eval_steps": 500, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006246096189881324, |
|
"grad_norm": 12.530923843383789, |
|
"kl": 0.36666667461395264, |
|
"learning_rate": 1.5625e-07, |
|
"logits/chosen": 765380163.5404255, |
|
"logits/rejected": 259708735.73877552, |
|
"logps/chosen": -2094.8425531914895, |
|
"logps/rejected": -1494.334693877551, |
|
"loss": 0.5003, |
|
"rewards/chosen": 1.1236422924285239, |
|
"rewards/margins": -85949919.52941893, |
|
"rewards/rejected": 85949920.65306123, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012492192379762648, |
|
"grad_norm": 11.841174125671387, |
|
"kl": 0.5041666626930237, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": 922524724.0677966, |
|
"logits/rejected": 222366871.0819672, |
|
"logps/chosen": -2251.6610169491523, |
|
"logps/rejected": -1449.5737704918033, |
|
"loss": 0.4989, |
|
"rewards/chosen": 1.0612761933924788, |
|
"rewards/margins": -41449781.49610086, |
|
"rewards/rejected": 41449782.55737705, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018738288569643973, |
|
"grad_norm": 16.48398780822754, |
|
"kl": 1.5750000476837158, |
|
"learning_rate": 4.6875000000000006e-07, |
|
"logits/chosen": 586256366.4206009, |
|
"logits/rejected": 338159392.1295546, |
|
"logps/chosen": -2081.922746781116, |
|
"logps/rejected": -1634.8502024291497, |
|
"loss": 0.4964, |
|
"rewards/chosen": 2.080868045659536, |
|
"rewards/margins": -31932565.23897001, |
|
"rewards/rejected": 31932567.31983806, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.024984384759525295, |
|
"grad_norm": 10.12692928314209, |
|
"kl": 4.645833492279053, |
|
"learning_rate": 6.25e-07, |
|
"logits/chosen": 646690373.632, |
|
"logits/rejected": 292872975.5826087, |
|
"logps/chosen": -1991.936, |
|
"logps/rejected": -1609.0434782608695, |
|
"loss": 0.4901, |
|
"rewards/chosen": 1.5784925537109376, |
|
"rewards/margins": -24520791.46498571, |
|
"rewards/rejected": 24520793.04347826, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03123048094940662, |
|
"grad_norm": 9.373343467712402, |
|
"kl": 10.037500381469727, |
|
"learning_rate": 7.8125e-07, |
|
"logits/chosen": 722191438.0627803, |
|
"logits/rejected": 241486644.7937743, |
|
"logps/chosen": -2005.9551569506727, |
|
"logps/rejected": -1370.645914396887, |
|
"loss": 0.4737, |
|
"rewards/chosen": 1.6375982857605802, |
|
"rewards/margins": 6768824.921644978, |
|
"rewards/rejected": -6768823.284046693, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.037476577139287946, |
|
"grad_norm": 9.365606307983398, |
|
"kl": 23.879167556762695, |
|
"learning_rate": 9.375000000000001e-07, |
|
"logits/chosen": 924951578.2564102, |
|
"logits/rejected": 539978277.4634147, |
|
"logps/chosen": -2154.119658119658, |
|
"logps/rejected": -1664.780487804878, |
|
"loss": 0.4414, |
|
"rewards/chosen": 1.9932785686264691, |
|
"rewards/margins": -80350996.90103038, |
|
"rewards/rejected": 80350998.89430894, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04372267332916927, |
|
"grad_norm": 8.471269607543945, |
|
"kl": 40.59166717529297, |
|
"learning_rate": 1.0937500000000001e-06, |
|
"logits/chosen": 1037311177.5100402, |
|
"logits/rejected": 570116672.2770563, |
|
"logps/chosen": -2063.0361445783133, |
|
"logps/rejected": -1451.2207792207791, |
|
"loss": 0.4117, |
|
"rewards/chosen": 2.058617281626506, |
|
"rewards/margins": -23764258.51281129, |
|
"rewards/rejected": 23764260.57142857, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04996876951905059, |
|
"grad_norm": 7.911782741546631, |
|
"kl": 63.125, |
|
"learning_rate": 1.25e-06, |
|
"logits/chosen": 1458762482.9490197, |
|
"logits/rejected": 982212790.0444444, |
|
"logps/chosen": -2011.2313725490196, |
|
"logps/rejected": -1490.9155555555556, |
|
"loss": 0.3641, |
|
"rewards/chosen": 2.3586921243106618, |
|
"rewards/margins": -33682500.1835301, |
|
"rewards/rejected": 33682502.542222224, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.056214865708931916, |
|
"grad_norm": 4.834893703460693, |
|
"kl": 68.25, |
|
"learning_rate": 1.40625e-06, |
|
"logits/chosen": 1641465751.8644068, |
|
"logits/rejected": 1301059348.9836066, |
|
"logps/chosen": -1905.6271186440679, |
|
"logps/rejected": -1531.4754098360656, |
|
"loss": 0.3385, |
|
"rewards/chosen": 2.5306468898967163, |
|
"rewards/margins": -92570008.74804163, |
|
"rewards/rejected": 92570011.27868852, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06246096189881324, |
|
"grad_norm": 4.706682205200195, |
|
"kl": 77.67500305175781, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": 2110757053.3109243, |
|
"logits/rejected": 1733322125.7520661, |
|
"logps/chosen": -1932.1008403361345, |
|
"logps/rejected": -1390.4132231404958, |
|
"loss": 0.305, |
|
"rewards/chosen": 2.7748095087644433, |
|
"rewards/margins": -111042684.16733925, |
|
"rewards/rejected": 111042686.94214876, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06870705808869457, |
|
"grad_norm": 4.392908096313477, |
|
"kl": 81.23332977294922, |
|
"learning_rate": 1.71875e-06, |
|
"logits/chosen": 2346525682.9276595, |
|
"logits/rejected": 2144470596.9632654, |
|
"logps/chosen": -1960.0340425531915, |
|
"logps/rejected": -1477.0938775510203, |
|
"loss": 0.2917, |
|
"rewards/chosen": 2.3351378095910906, |
|
"rewards/margins": -107784122.7913928, |
|
"rewards/rejected": 107784125.12653062, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07495315427857589, |
|
"grad_norm": 6.164892196655273, |
|
"kl": 86.1624984741211, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"logits/chosen": 2208228740.413793, |
|
"logits/rejected": 2164937364.645161, |
|
"logps/chosen": -1969.9310344827586, |
|
"logps/rejected": -1540.774193548387, |
|
"loss": 0.2827, |
|
"rewards/chosen": 2.851356769430226, |
|
"rewards/margins": -100023912.50348194, |
|
"rewards/rejected": 100023915.35483871, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08119925046845722, |
|
"grad_norm": 3.74001145362854, |
|
"kl": 92.03333282470703, |
|
"learning_rate": 2.0312500000000002e-06, |
|
"logits/chosen": 2025868071.9266055, |
|
"logits/rejected": 2308852290.442748, |
|
"logps/chosen": -2005.2844036697247, |
|
"logps/rejected": -1620.1526717557251, |
|
"loss": 0.2535, |
|
"rewards/chosen": 3.949506287181049, |
|
"rewards/margins": -85202893.14973035, |
|
"rewards/rejected": 85202897.09923664, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.08744534665833854, |
|
"grad_norm": 3.4245049953460693, |
|
"kl": 93.34166717529297, |
|
"learning_rate": 2.1875000000000002e-06, |
|
"logits/chosen": 1961691515.2592592, |
|
"logits/rejected": 2473542115.9156117, |
|
"logps/chosen": -1924.2469135802469, |
|
"logps/rejected": -1569.0801687763712, |
|
"loss": 0.2364, |
|
"rewards/chosen": 3.217338405028292, |
|
"rewards/margins": -110899300.47886413, |
|
"rewards/rejected": 110899303.69620253, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09369144284821987, |
|
"grad_norm": 2.3636491298675537, |
|
"kl": 75.18333435058594, |
|
"learning_rate": 2.3437500000000002e-06, |
|
"logits/chosen": 1799070057.8955824, |
|
"logits/rejected": 2992154739.255411, |
|
"logps/chosen": -1946.7951807228915, |
|
"logps/rejected": -1687.5497835497836, |
|
"loss": 0.1854, |
|
"rewards/chosen": 3.330800496909513, |
|
"rewards/margins": -75541336.65188348, |
|
"rewards/rejected": 75541339.98268399, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09993753903810118, |
|
"grad_norm": 5.065158843994141, |
|
"kl": 37.454166412353516, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": 1232394418.4564316, |
|
"logits/rejected": 3475059836.251046, |
|
"logps/chosen": -1765.97510373444, |
|
"logps/rejected": -1910.4937238493724, |
|
"loss": 0.1416, |
|
"rewards/chosen": 3.2204437889004147, |
|
"rewards/margins": -76902045.44901228, |
|
"rewards/rejected": 76902048.66945606, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1061836352279825, |
|
"grad_norm": 2.6473050117492676, |
|
"kl": 10.274999618530273, |
|
"learning_rate": 2.65625e-06, |
|
"logits/chosen": 616933788.9711934, |
|
"logits/rejected": 3842477993.586498, |
|
"logps/chosen": -1740.9053497942386, |
|
"logps/rejected": -2050.9704641350213, |
|
"loss": 0.103, |
|
"rewards/chosen": 3.3858763141396606, |
|
"rewards/margins": -32738116.985431705, |
|
"rewards/rejected": 32738120.371308018, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11242973141786383, |
|
"grad_norm": 1.7162717580795288, |
|
"kl": 12.845833778381348, |
|
"learning_rate": 2.8125e-06, |
|
"logits/chosen": 694821410.1333333, |
|
"logits/rejected": 4035479688.5333333, |
|
"logps/chosen": -1776.4, |
|
"logps/rejected": -2271.4666666666667, |
|
"loss": 0.0757, |
|
"rewards/chosen": 4.09275156656901, |
|
"rewards/margins": -24005026.307248432, |
|
"rewards/rejected": 24005030.4, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11867582760774516, |
|
"grad_norm": 1.7540525197982788, |
|
"kl": 2.304166555404663, |
|
"learning_rate": 2.96875e-06, |
|
"logits/chosen": 658665198.9333333, |
|
"logits/rejected": 4033801966.9333334, |
|
"logps/chosen": -2076.266666666667, |
|
"logps/rejected": -2257.866666666667, |
|
"loss": 0.0738, |
|
"rewards/chosen": 3.6998219807942707, |
|
"rewards/margins": -76789833.10017802, |
|
"rewards/rejected": 76789836.8, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.12492192379762648, |
|
"grad_norm": 1.0445038080215454, |
|
"kl": 19.09166717529297, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": 273522077.6156863, |
|
"logits/rejected": 3739305902.08, |
|
"logps/chosen": -1813.835294117647, |
|
"logps/rejected": -2274.56, |
|
"loss": 0.0878, |
|
"rewards/chosen": 3.6624865962009805, |
|
"rewards/margins": -22223153.813068956, |
|
"rewards/rejected": 22223157.475555554, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1311680199875078, |
|
"grad_norm": 1.0965297222137451, |
|
"kl": 9.591666221618652, |
|
"learning_rate": 3.28125e-06, |
|
"logits/chosen": 526766851.2133891, |
|
"logits/rejected": 3644206237.2116184, |
|
"logps/chosen": -1742.8619246861924, |
|
"logps/rejected": -2335.8672199170123, |
|
"loss": 0.0538, |
|
"rewards/chosen": 4.146895532328713, |
|
"rewards/margins": -51111124.5501999, |
|
"rewards/rejected": 51111128.69709544, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.13741411617738913, |
|
"grad_norm": 2.170487880706787, |
|
"kl": 6.2791666984558105, |
|
"learning_rate": 3.4375e-06, |
|
"logits/chosen": 213918950.63063064, |
|
"logits/rejected": 3645207829.8294573, |
|
"logps/chosen": -1730.162162162162, |
|
"logps/rejected": -2354.3565891472867, |
|
"loss": 0.0487, |
|
"rewards/chosen": 4.383471514727618, |
|
"rewards/margins": -8812543.585520733, |
|
"rewards/rejected": 8812547.968992248, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14366021236727045, |
|
"grad_norm": 1.0979344844818115, |
|
"kl": 34.391666412353516, |
|
"learning_rate": 3.59375e-06, |
|
"logits/chosen": 134268182.49372384, |
|
"logits/rejected": 3293625076.315353, |
|
"logps/chosen": -1707.7824267782428, |
|
"logps/rejected": -2157.6763485477177, |
|
"loss": 0.0577, |
|
"rewards/chosen": 4.70439218177955, |
|
"rewards/margins": -32989264.27486093, |
|
"rewards/rejected": 32989268.979253113, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.14990630855715179, |
|
"grad_norm": 0.6703037023544312, |
|
"kl": 26.808332443237305, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": 169325389.7327935, |
|
"logits/rejected": 3147348117.424893, |
|
"logps/chosen": -1694.1862348178138, |
|
"logps/rejected": -2355.0901287553647, |
|
"loss": 0.0532, |
|
"rewards/chosen": 4.615021369717865, |
|
"rewards/margins": -13508923.110300519, |
|
"rewards/rejected": 13508927.725321889, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1561524047470331, |
|
"grad_norm": 0.5291562676429749, |
|
"kl": 11.0, |
|
"learning_rate": 3.90625e-06, |
|
"logits/chosen": -25907824.813559324, |
|
"logits/rejected": 3046577403.803279, |
|
"logps/chosen": -1716.8813559322034, |
|
"logps/rejected": -2457.1803278688526, |
|
"loss": 0.0509, |
|
"rewards/chosen": 4.240614551608846, |
|
"rewards/margins": 4334839.125860454, |
|
"rewards/rejected": -4334834.8852459015, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16239850093691444, |
|
"grad_norm": 0.6935663819313049, |
|
"kl": 27.100000381469727, |
|
"learning_rate": 4.0625000000000005e-06, |
|
"logits/chosen": -2747534.582278481, |
|
"logits/rejected": 2980406832.4609056, |
|
"logps/chosen": -1617.282700421941, |
|
"logps/rejected": -2338.5020576131687, |
|
"loss": 0.0494, |
|
"rewards/chosen": 5.010839019646625, |
|
"rewards/margins": -9376828.306033408, |
|
"rewards/rejected": 9376833.316872427, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16864459712679575, |
|
"grad_norm": 0.3774864375591278, |
|
"kl": 50.74166488647461, |
|
"learning_rate": 4.21875e-06, |
|
"logits/chosen": -33710690.384313725, |
|
"logits/rejected": 2680178897.351111, |
|
"logps/chosen": -1679.1843137254903, |
|
"logps/rejected": -2161.92, |
|
"loss": 0.0471, |
|
"rewards/chosen": 4.944170304840687, |
|
"rewards/margins": -12017750.078051917, |
|
"rewards/rejected": 12017755.022222223, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1748906933166771, |
|
"grad_norm": 0.31545472145080566, |
|
"kl": 21.391666412353516, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"logits/chosen": -161463154.61087865, |
|
"logits/rejected": 2881086561.726141, |
|
"logps/chosen": -1553.6066945606694, |
|
"logps/rejected": -2346.4896265560164, |
|
"loss": 0.0364, |
|
"rewards/chosen": 5.193703112741893, |
|
"rewards/margins": 1904972.62938776, |
|
"rewards/rejected": -1904967.4356846474, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1811367895065584, |
|
"grad_norm": 0.24622267484664917, |
|
"kl": 20.46666717529297, |
|
"learning_rate": 4.53125e-06, |
|
"logits/chosen": 30635895.466666665, |
|
"logits/rejected": 3146846481.0666666, |
|
"logps/chosen": -1706.6666666666667, |
|
"logps/rejected": -2571.0666666666666, |
|
"loss": 0.0382, |
|
"rewards/chosen": 5.129107666015625, |
|
"rewards/margins": -16690829.004225668, |
|
"rewards/rejected": 16690834.133333333, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.18738288569643974, |
|
"grad_norm": 0.484261155128479, |
|
"kl": 21.75, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -77826685.90163934, |
|
"logits/rejected": 2589804995.254237, |
|
"logps/chosen": -1657.5737704918033, |
|
"logps/rejected": -2377.491525423729, |
|
"loss": 0.0368, |
|
"rewards/chosen": 5.271223224577357, |
|
"rewards/margins": -5416552.288098809, |
|
"rewards/rejected": 5416557.559322034, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19362898188632105, |
|
"grad_norm": 0.40670040249824524, |
|
"kl": 30.483333587646484, |
|
"learning_rate": 4.84375e-06, |
|
"logits/chosen": -26928078.378854625, |
|
"logits/rejected": 2463001409.770751, |
|
"logps/chosen": -1662.2378854625551, |
|
"logps/rejected": -2253.913043478261, |
|
"loss": 0.051, |
|
"rewards/chosen": 5.290996265831498, |
|
"rewards/margins": 5252345.148703775, |
|
"rewards/rejected": -5252339.85770751, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.19987507807620236, |
|
"grad_norm": 0.18753188848495483, |
|
"kl": 24.350000381469727, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -89292537.856, |
|
"logits/rejected": 2722431545.878261, |
|
"logps/chosen": -1611.712, |
|
"logps/rejected": -2609.5304347826086, |
|
"loss": 0.0265, |
|
"rewards/chosen": 5.405384765625, |
|
"rewards/margins": 2749760.396689113, |
|
"rewards/rejected": -2749754.9913043478, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2061211742660837, |
|
"grad_norm": 0.3736235201358795, |
|
"kl": 38.81666564941406, |
|
"learning_rate": 4.999851262500375e-06, |
|
"logits/chosen": -188500339.57805908, |
|
"logits/rejected": 2439669566.1563787, |
|
"logps/chosen": -1609.5864978902953, |
|
"logps/rejected": -2361.5473251028807, |
|
"loss": 0.0298, |
|
"rewards/chosen": 5.654172229364452, |
|
"rewards/margins": 11241722.065694863, |
|
"rewards/rejected": -11241716.411522634, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.212367270455965, |
|
"grad_norm": 0.20251910388469696, |
|
"kl": 39.24166488647461, |
|
"learning_rate": 4.999405067699773e-06, |
|
"logits/chosen": -153447545.4915254, |
|
"logits/rejected": 2220780829.377049, |
|
"logps/chosen": -1598.6440677966102, |
|
"logps/rejected": -2233.44262295082, |
|
"loss": 0.0324, |
|
"rewards/chosen": 5.615495067531779, |
|
"rewards/margins": -22238567.499259032, |
|
"rewards/rejected": 22238573.1147541, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21861336664584635, |
|
"grad_norm": 0.6630544662475586, |
|
"kl": 55.57500076293945, |
|
"learning_rate": 4.998661468690914e-06, |
|
"logits/chosen": -90310722.06451613, |
|
"logits/rejected": 2574579500.137931, |
|
"logps/chosen": -1666.1935483870968, |
|
"logps/rejected": -2460.8275862068967, |
|
"loss": 0.0478, |
|
"rewards/chosen": 5.576678860572077, |
|
"rewards/margins": -12666122.975045277, |
|
"rewards/rejected": 12666128.551724138, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.22485946283572766, |
|
"grad_norm": 0.3803296685218811, |
|
"kl": 48.650001525878906, |
|
"learning_rate": 4.997620553954645e-06, |
|
"logits/chosen": -155793365.8600823, |
|
"logits/rejected": 2239510571.206751, |
|
"logps/chosen": -1490.8312757201645, |
|
"logps/rejected": -2156.2869198312237, |
|
"loss": 0.036, |
|
"rewards/chosen": 5.737128866062243, |
|
"rewards/margins": -4821880.102533582, |
|
"rewards/rejected": 4821885.839662448, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.231105559025609, |
|
"grad_norm": 0.28740158677101135, |
|
"kl": 105.14167022705078, |
|
"learning_rate": 4.996282447349408e-06, |
|
"logits/chosen": -180883622.50406504, |
|
"logits/rejected": 2277471223.2478633, |
|
"logps/chosen": -1626.1463414634147, |
|
"logps/rejected": -2156.4444444444443, |
|
"loss": 0.0546, |
|
"rewards/chosen": 6.121557716431656, |
|
"rewards/margins": -17472389.91263032, |
|
"rewards/rejected": 17472396.034188036, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.23735165521549031, |
|
"grad_norm": 0.570648193359375, |
|
"kl": 74.65833282470703, |
|
"learning_rate": 4.994647308096509e-06, |
|
"logits/chosen": -326146294.439834, |
|
"logits/rejected": 2238810668.9874477, |
|
"logps/chosen": -1514.8215767634854, |
|
"logps/rejected": -2162.0753138075315, |
|
"loss": 0.056, |
|
"rewards/chosen": 5.687685384789938, |
|
"rewards/margins": 8352651.5788987735, |
|
"rewards/rejected": -8352645.891213389, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24359775140537165, |
|
"grad_norm": 0.23554614186286926, |
|
"kl": 37.224998474121094, |
|
"learning_rate": 4.992715330761167e-06, |
|
"logits/chosen": -252608231.93162394, |
|
"logits/rejected": 2436106323.2520328, |
|
"logps/chosen": -1510.017094017094, |
|
"logps/rejected": -2297.756097560976, |
|
"loss": 0.0355, |
|
"rewards/chosen": 5.856194390190972, |
|
"rewards/margins": 12085535.872454552, |
|
"rewards/rejected": -12085530.016260162, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.24984384759525297, |
|
"grad_norm": 0.2651134729385376, |
|
"kl": 53.724998474121094, |
|
"learning_rate": 4.990486745229364e-06, |
|
"logits/chosen": -353363390.35897434, |
|
"logits/rejected": 2253193748.8130083, |
|
"logps/chosen": -1603.145299145299, |
|
"logps/rejected": -2289.430894308943, |
|
"loss": 0.0301, |
|
"rewards/chosen": 5.701554257645566, |
|
"rewards/margins": 16624133.701554257, |
|
"rewards/rejected": -16624128.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2560899437851343, |
|
"grad_norm": 0.19966034591197968, |
|
"kl": 64.79582977294922, |
|
"learning_rate": 4.987961816680493e-06, |
|
"logits/chosen": -321853638.1935484, |
|
"logits/rejected": 2614515782.62069, |
|
"logps/chosen": -1530.0645161290322, |
|
"logps/rejected": -2399.448275862069, |
|
"loss": 0.0292, |
|
"rewards/chosen": 6.3989006780808975, |
|
"rewards/margins": -6738289.8769613905, |
|
"rewards/rejected": 6738296.275862069, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.2623360399750156, |
|
"grad_norm": 0.12018506973981857, |
|
"kl": 35.98749923706055, |
|
"learning_rate": 4.985140845555799e-06, |
|
"logits/chosen": -93972786.7854251, |
|
"logits/rejected": 2599676422.5922747, |
|
"logps/chosen": -1611.2064777327935, |
|
"logps/rejected": -2370.74678111588, |
|
"loss": 0.0286, |
|
"rewards/chosen": 6.0249038263853745, |
|
"rewards/margins": -1163880.9450532552, |
|
"rewards/rejected": 1163886.9699570816, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.26858213616489696, |
|
"grad_norm": 0.3969196379184723, |
|
"kl": 45.61249923706055, |
|
"learning_rate": 4.982024167522638e-06, |
|
"logits/chosen": -198077725.37704918, |
|
"logits/rejected": 2726813001.762712, |
|
"logps/chosen": -1471.672131147541, |
|
"logps/rejected": -2450.3050847457625, |
|
"loss": 0.0393, |
|
"rewards/chosen": 5.829876508869108, |
|
"rewards/margins": 8360851.389198543, |
|
"rewards/rejected": -8360845.559322034, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.27482823235477827, |
|
"grad_norm": 0.34571054577827454, |
|
"kl": 108.92500305175781, |
|
"learning_rate": 4.978612153434527e-06, |
|
"logits/chosen": -214673374.4262295, |
|
"logits/rejected": 2318099403.9322033, |
|
"logps/chosen": -1586.7540983606557, |
|
"logps/rejected": -2199.0508474576272, |
|
"loss": 0.0471, |
|
"rewards/chosen": 6.15484619140625, |
|
"rewards/margins": 1069003.239591954, |
|
"rewards/rejected": -1068997.0847457626, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2810743285446596, |
|
"grad_norm": 0.30018988251686096, |
|
"kl": 31.90833282470703, |
|
"learning_rate": 4.97490520928702e-06, |
|
"logits/chosen": -101820645.4439834, |
|
"logits/rejected": 3018214138.6443515, |
|
"logps/chosen": -1520.331950207469, |
|
"logps/rejected": -2587.0460251046024, |
|
"loss": 0.018, |
|
"rewards/chosen": 6.184298313504927, |
|
"rewards/margins": 22056342.786808774, |
|
"rewards/rejected": -22056336.60251046, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2873204247345409, |
|
"grad_norm": 0.5999372005462646, |
|
"kl": 7.224999904632568, |
|
"learning_rate": 4.970903776169403e-06, |
|
"logits/chosen": -140607768.06837606, |
|
"logits/rejected": 2407871496.3252034, |
|
"logps/chosen": -1606.4273504273503, |
|
"logps/rejected": -2584.9756097560976, |
|
"loss": 0.0167, |
|
"rewards/chosen": 5.993517753405449, |
|
"rewards/margins": 12516249.765875475, |
|
"rewards/rejected": -12516243.772357723, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.29356652092442226, |
|
"grad_norm": 0.3130801022052765, |
|
"kl": 6.9666666984558105, |
|
"learning_rate": 4.966608330212198e-06, |
|
"logits/chosen": -466865134.6440678, |
|
"logits/rejected": 2092888936.918033, |
|
"logps/chosen": -1367.1864406779662, |
|
"logps/rejected": -2658.8852459016393, |
|
"loss": 0.0166, |
|
"rewards/chosen": 6.395548675019862, |
|
"rewards/margins": 12007318.133253593, |
|
"rewards/rejected": -12007311.737704918, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.29981261711430357, |
|
"grad_norm": 0.26502835750579834, |
|
"kl": 23.16666603088379, |
|
"learning_rate": 4.962019382530521e-06, |
|
"logits/chosen": -196223522.13333333, |
|
"logits/rejected": 2744053486.9333334, |
|
"logps/chosen": -1525.4333333333334, |
|
"logps/rejected": -2999.4666666666667, |
|
"loss": 0.0198, |
|
"rewards/chosen": 6.421251424153646, |
|
"rewards/margins": -11612441.578748576, |
|
"rewards/rejected": 11612448.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3060587133041849, |
|
"grad_norm": 0.33838242292404175, |
|
"kl": 30.891666412353516, |
|
"learning_rate": 4.957137479163253e-06, |
|
"logits/chosen": -261833796.26666668, |
|
"logits/rejected": 1684432486.4, |
|
"logps/chosen": -1455.4666666666667, |
|
"logps/rejected": -2374.9333333333334, |
|
"loss": 0.0183, |
|
"rewards/chosen": 5.891462198893229, |
|
"rewards/margins": -4358305.308537802, |
|
"rewards/rejected": 4358311.2, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3123048094940662, |
|
"grad_norm": 0.9754793047904968, |
|
"kl": 22.049999237060547, |
|
"learning_rate": 4.9519632010080765e-06, |
|
"logits/chosen": -299084816.78688526, |
|
"logits/rejected": 1817093345.6271186, |
|
"logps/chosen": -1480.655737704918, |
|
"logps/rejected": -2994.4406779661017, |
|
"loss": 0.0212, |
|
"rewards/chosen": 6.027991122886783, |
|
"rewards/margins": 7320732.265279259, |
|
"rewards/rejected": -7320726.237288136, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3185509056839475, |
|
"grad_norm": 0.3510875999927521, |
|
"kl": 16.53333282470703, |
|
"learning_rate": 4.9464971637523465e-06, |
|
"logits/chosen": 257342375.9638009, |
|
"logits/rejected": 3064983599.4440155, |
|
"logps/chosen": -1668.633484162896, |
|
"logps/rejected": -3069.034749034749, |
|
"loss": 0.0189, |
|
"rewards/chosen": 5.769157858455882, |
|
"rewards/margins": -4938778.354394265, |
|
"rewards/rejected": 4938784.123552124, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.32479700187382887, |
|
"grad_norm": 0.16266603767871857, |
|
"kl": 0.0, |
|
"learning_rate": 4.9407400177998335e-06, |
|
"logits/chosen": -265875506.56790122, |
|
"logits/rejected": 3530719093.7383966, |
|
"logps/chosen": -1570.897119341564, |
|
"logps/rejected": -3502.987341772152, |
|
"loss": 0.0126, |
|
"rewards/chosen": 6.517467106320731, |
|
"rewards/margins": -4732542.343292387, |
|
"rewards/rejected": 4732548.860759494, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3310430980637102, |
|
"grad_norm": 0.12264657765626907, |
|
"kl": 7.008333206176758, |
|
"learning_rate": 4.9346924481933345e-06, |
|
"logits/chosen": -322727020.4235294, |
|
"logits/rejected": 2676152365.5111113, |
|
"logps/chosen": -1618.9490196078432, |
|
"logps/rejected": -2990.08, |
|
"loss": 0.0204, |
|
"rewards/chosen": 6.062105066636029, |
|
"rewards/margins": 15871837.670993956, |
|
"rewards/rejected": -15871831.608888889, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3372891942535915, |
|
"grad_norm": 0.17087456583976746, |
|
"kl": 2.3583333492279053, |
|
"learning_rate": 4.928355174533153e-06, |
|
"logits/chosen": -415921328.2232558, |
|
"logits/rejected": 2119342242.2943397, |
|
"logps/chosen": -1588.093023255814, |
|
"logps/rejected": -2714.0830188679247, |
|
"loss": 0.008, |
|
"rewards/chosen": 7.158184388626453, |
|
"rewards/margins": 7941654.131769294, |
|
"rewards/rejected": -7941646.973584905, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3435352904434728, |
|
"grad_norm": 0.26105254888534546, |
|
"kl": 13.591666221618652, |
|
"learning_rate": 4.9217289508914836e-06, |
|
"logits/chosen": -344134401.11790395, |
|
"logits/rejected": 2264991001.498008, |
|
"logps/chosen": -1525.659388646288, |
|
"logps/rejected": -3018.4541832669324, |
|
"loss": 0.0132, |
|
"rewards/chosen": 6.172569574747544, |
|
"rewards/margins": 11028239.351852443, |
|
"rewards/rejected": -11028233.179282868, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3497813866333542, |
|
"grad_norm": 0.14341367781162262, |
|
"kl": 13.274999618530273, |
|
"learning_rate": 4.914814565722671e-06, |
|
"logits/chosen": -605742868.3893806, |
|
"logits/rejected": 1702325981.7322834, |
|
"logps/chosen": -1471.929203539823, |
|
"logps/rejected": -2880.503937007874, |
|
"loss": 0.0128, |
|
"rewards/chosen": 6.208206041724281, |
|
"rewards/margins": 17545562.869623367, |
|
"rewards/rejected": -17545556.661417324, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3560274828232355, |
|
"grad_norm": 0.11326275765895844, |
|
"kl": 12.416666984558105, |
|
"learning_rate": 4.907612841769407e-06, |
|
"logits/chosen": -296522604.57587546, |
|
"logits/rejected": 2360177650.224215, |
|
"logps/chosen": -1487.1906614785992, |
|
"logps/rejected": -3418.403587443946, |
|
"loss": 0.013, |
|
"rewards/chosen": 6.169439293531128, |
|
"rewards/margins": -2895297.866435146, |
|
"rewards/rejected": 2895304.0358744394, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3622735790131168, |
|
"grad_norm": 0.371408611536026, |
|
"kl": 22.683332443237305, |
|
"learning_rate": 4.900124635964823e-06, |
|
"logits/chosen": -535632088.0337553, |
|
"logits/rejected": 2330721225.218107, |
|
"logps/chosen": -1422.8523206751054, |
|
"logps/rejected": -3448.098765432099, |
|
"loss": 0.0181, |
|
"rewards/chosen": 5.8123933816257916, |
|
"rewards/margins": 8163410.190994204, |
|
"rewards/rejected": -8163404.378600823, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3685196752029981, |
|
"grad_norm": 0.1974770724773407, |
|
"kl": 3.7249999046325684, |
|
"learning_rate": 4.8923508393305224e-06, |
|
"logits/chosen": -610109912.6153846, |
|
"logits/rejected": 1617081524.7058823, |
|
"logps/chosen": -1457.1153846153845, |
|
"logps/rejected": -2936.9411764705883, |
|
"loss": 0.0082, |
|
"rewards/chosen": 6.869715177095854, |
|
"rewards/margins": 8407971.105009295, |
|
"rewards/rejected": -8407964.235294119, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3747657713928795, |
|
"grad_norm": 0.15918996930122375, |
|
"kl": 10.741666793823242, |
|
"learning_rate": 4.884292376870567e-06, |
|
"logits/chosen": -526262306.27615064, |
|
"logits/rejected": 1927291389.8755186, |
|
"logps/chosen": -1425.4058577405858, |
|
"logps/rejected": -3032.697095435685, |
|
"loss": 0.0112, |
|
"rewards/chosen": 6.35287156364409, |
|
"rewards/margins": 14068449.174448326, |
|
"rewards/rejected": -14068442.821576763, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3810118675827608, |
|
"grad_norm": 0.27272769808769226, |
|
"kl": 5.775000095367432, |
|
"learning_rate": 4.875950207461403e-06, |
|
"logits/chosen": -388197814.85714287, |
|
"logits/rejected": 2318860829.619835, |
|
"logps/chosen": -1382.453781512605, |
|
"logps/rejected": -3041.8512396694214, |
|
"loss": 0.012, |
|
"rewards/chosen": 6.2134866153492645, |
|
"rewards/margins": 2678316.2961312435, |
|
"rewards/rejected": -2678310.082644628, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.3872579637726421, |
|
"grad_norm": 0.17649586498737335, |
|
"kl": 38.70833206176758, |
|
"learning_rate": 4.867325323737765e-06, |
|
"logits/chosen": -427835718.374502, |
|
"logits/rejected": 1462694835.9825327, |
|
"logps/chosen": -1543.2669322709164, |
|
"logps/rejected": -2535.9650655021833, |
|
"loss": 0.0242, |
|
"rewards/chosen": 6.1200719582607075, |
|
"rewards/margins": 10201054.993434405, |
|
"rewards/rejected": -10201048.873362446, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3935040599625234, |
|
"grad_norm": 0.15216855704784393, |
|
"kl": 9.566666603088379, |
|
"learning_rate": 4.858418751974564e-06, |
|
"logits/chosen": -660057950.7401575, |
|
"logits/rejected": 1958461584.9911504, |
|
"logps/chosen": -1579.5905511811025, |
|
"logps/rejected": -2988.8849557522126, |
|
"loss": 0.0123, |
|
"rewards/chosen": 6.074395127183809, |
|
"rewards/margins": 16072222.99474911, |
|
"rewards/rejected": -16072216.920353983, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3997501561524047, |
|
"grad_norm": 0.15513299405574799, |
|
"kl": 4.599999904632568, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -757533956.338983, |
|
"logits/rejected": 1946294574.1639345, |
|
"logps/chosen": -1310.7118644067796, |
|
"logps/rejected": -3486.1639344262294, |
|
"loss": 0.0089, |
|
"rewards/chosen": 6.3125450004965575, |
|
"rewards/margins": 20475022.705987625, |
|
"rewards/rejected": -20475016.393442623, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4059962523422861, |
|
"grad_norm": 0.13751330971717834, |
|
"kl": 0.0, |
|
"learning_rate": 4.839764816893315e-06, |
|
"logits/chosen": -558023220.9655173, |
|
"logits/rejected": 1358303364.1290324, |
|
"logps/chosen": -1644.8275862068965, |
|
"logps/rejected": -3607.2258064516127, |
|
"loss": 0.0123, |
|
"rewards/chosen": 6.341705848430765, |
|
"rewards/margins": 9993958.341705848, |
|
"rewards/rejected": -9993952.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4122423485321674, |
|
"grad_norm": 0.30045750737190247, |
|
"kl": 0.2083333283662796, |
|
"learning_rate": 4.830019673206997e-06, |
|
"logits/chosen": -620181966.451613, |
|
"logits/rejected": 1212370802.7586207, |
|
"logps/chosen": -1604.0, |
|
"logps/rejected": -3483.0344827586205, |
|
"loss": 0.0114, |
|
"rewards/chosen": 6.462984146610383, |
|
"rewards/margins": 21377946.325053114, |
|
"rewards/rejected": -21377939.862068966, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4184884447220487, |
|
"grad_norm": 0.22446036338806152, |
|
"kl": 0.0, |
|
"learning_rate": 4.8199972804804615e-06, |
|
"logits/chosen": -509039072.9068826, |
|
"logits/rejected": 1352807050.4377682, |
|
"logps/chosen": -1562.5587044534414, |
|
"logps/rejected": -3614.214592274678, |
|
"loss": 0.009, |
|
"rewards/chosen": 6.133410990479504, |
|
"rewards/margins": 17571108.897359487, |
|
"rewards/rejected": -17571102.763948496, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.42473454091193, |
|
"grad_norm": 0.06178925931453705, |
|
"kl": 0.0, |
|
"learning_rate": 4.809698831278217e-06, |
|
"logits/chosen": -515473141.5934959, |
|
"logits/rejected": 1385061349.7435896, |
|
"logps/chosen": -1490.081300813008, |
|
"logps/rejected": -3626.6666666666665, |
|
"loss": 0.0101, |
|
"rewards/chosen": 6.447595115599594, |
|
"rewards/margins": 7472535.95186862, |
|
"rewards/rejected": -7472529.504273504, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4309806371018114, |
|
"grad_norm": 0.10884927213191986, |
|
"kl": 1.8083332777023315, |
|
"learning_rate": 4.799125551012731e-06, |
|
"logits/chosen": -577620593.5397489, |
|
"logits/rejected": 1478309420.6141078, |
|
"logps/chosen": -1485.9246861924687, |
|
"logps/rejected": -3730.058091286307, |
|
"loss": 0.0079, |
|
"rewards/chosen": 6.468292874771182, |
|
"rewards/margins": 14943737.721404908, |
|
"rewards/rejected": -14943731.253112033, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.4372267332916927, |
|
"grad_norm": 0.15412873029708862, |
|
"kl": 11.300000190734863, |
|
"learning_rate": 4.788278697798619e-06, |
|
"logits/chosen": -543128542.967742, |
|
"logits/rejected": 1143824666.4827585, |
|
"logps/chosen": -1587.6129032258063, |
|
"logps/rejected": -3566.6206896551726, |
|
"loss": 0.0124, |
|
"rewards/chosen": 6.4700597947643645, |
|
"rewards/margins": 17442286.194197726, |
|
"rewards/rejected": -17442279.724137932, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.443472829481574, |
|
"grad_norm": 0.6250858902931213, |
|
"kl": 17.0625, |
|
"learning_rate": 4.77715956230294e-06, |
|
"logits/chosen": -585785205.1525424, |
|
"logits/rejected": 1392276866.0983605, |
|
"logps/chosen": -1420.6101694915253, |
|
"logps/rejected": -3279.7377049180327, |
|
"loss": 0.0152, |
|
"rewards/chosen": 6.209764383606991, |
|
"rewards/margins": 9625444.635993892, |
|
"rewards/rejected": -9625438.426229509, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.4497189256714553, |
|
"grad_norm": 0.3552960753440857, |
|
"kl": 10.883333206176758, |
|
"learning_rate": 4.765769467591626e-06, |
|
"logits/chosen": -576030856.5333333, |
|
"logits/rejected": 1427985749.3333333, |
|
"logps/chosen": -1384.0, |
|
"logps/rejected": -2608.0, |
|
"loss": 0.0139, |
|
"rewards/chosen": 6.204444885253906, |
|
"rewards/margins": 19064277.13777822, |
|
"rewards/rejected": -19064270.933333334, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.45596502186133664, |
|
"grad_norm": 0.29342055320739746, |
|
"kl": 14.975000381469727, |
|
"learning_rate": 4.75410976897204e-06, |
|
"logits/chosen": -637534208.0, |
|
"logits/rejected": 1524465529.1522634, |
|
"logps/chosen": -1628.8945147679324, |
|
"logps/rejected": -3114.40329218107, |
|
"loss": 0.0092, |
|
"rewards/chosen": 6.717748199334125, |
|
"rewards/margins": 22818043.129270833, |
|
"rewards/rejected": -22818036.411522634, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.462211118051218, |
|
"grad_norm": 0.1184186041355133, |
|
"kl": 10.0, |
|
"learning_rate": 4.742181853831721e-06, |
|
"logits/chosen": -462829314.11570245, |
|
"logits/rejected": 1503693230.252101, |
|
"logps/chosen": -1507.4380165289256, |
|
"logps/rejected": -3374.5210084033615, |
|
"loss": 0.015, |
|
"rewards/chosen": 6.435300212261105, |
|
"rewards/margins": 12131771.141182566, |
|
"rewards/rejected": -12131764.705882354, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4684572142410993, |
|
"grad_norm": 0.14714303612709045, |
|
"kl": 21.516666412353516, |
|
"learning_rate": 4.729987141473286e-06, |
|
"logits/chosen": -530579456.0, |
|
"logits/rejected": 1497512767.720524, |
|
"logps/chosen": -1480.5418326693227, |
|
"logps/rejected": -3180.995633187773, |
|
"loss": 0.0226, |
|
"rewards/chosen": 6.02860870969248, |
|
"rewards/margins": -5513310.582745003, |
|
"rewards/rejected": 5513316.611353712, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.47470331043098063, |
|
"grad_norm": 0.1731535792350769, |
|
"kl": 28.633333206176758, |
|
"learning_rate": 4.717527082945555e-06, |
|
"logits/chosen": -814227456.0, |
|
"logits/rejected": 1239173412.5714285, |
|
"logps/chosen": -1477.25, |
|
"logps/rejected": -3086.0, |
|
"loss": 0.0179, |
|
"rewards/chosen": 6.597807884216309, |
|
"rewards/margins": 17609291.169236455, |
|
"rewards/rejected": -17609284.57142857, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.48094940662086194, |
|
"grad_norm": 0.13685530424118042, |
|
"kl": 14.733333587646484, |
|
"learning_rate": 4.704803160870888e-06, |
|
"logits/chosen": -748893894.9868996, |
|
"logits/rejected": 1572780448.12749, |
|
"logps/chosen": -1380.122270742358, |
|
"logps/rejected": -3014.629482071713, |
|
"loss": 0.0118, |
|
"rewards/chosen": 6.789244272823417, |
|
"rewards/margins": 21147032.63784985, |
|
"rewards/rejected": -21147025.848605577, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.4871955028107433, |
|
"grad_norm": 0.3021528124809265, |
|
"kl": 7.650000095367432, |
|
"learning_rate": 4.69181688926877e-06, |
|
"logits/chosen": -378228522.1526104, |
|
"logits/rejected": 2085631281.8701298, |
|
"logps/chosen": -1484.5943775100402, |
|
"logps/rejected": -3172.848484848485, |
|
"loss": 0.0109, |
|
"rewards/chosen": 6.780140581858685, |
|
"rewards/margins": 15358581.602651404, |
|
"rewards/rejected": -15358574.822510822, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4934415990006246, |
|
"grad_norm": 0.07516395300626755, |
|
"kl": 0.0, |
|
"learning_rate": 4.678569813375654e-06, |
|
"logits/chosen": -424886970.8436019, |
|
"logits/rejected": 1915752250.0520446, |
|
"logps/chosen": -1499.9810426540284, |
|
"logps/rejected": -3160.5055762081784, |
|
"loss": 0.0064, |
|
"rewards/chosen": 6.621546740780509, |
|
"rewards/margins": 6866231.1568627255, |
|
"rewards/rejected": -6866224.535315985, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.49968769519050593, |
|
"grad_norm": 0.27578118443489075, |
|
"kl": 0.0, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": -471388229.4237288, |
|
"logits/rejected": 1642860745.442623, |
|
"logps/chosen": -1550.6440677966102, |
|
"logps/rejected": -3196.590163934426, |
|
"loss": 0.0074, |
|
"rewards/chosen": 6.7454585705773304, |
|
"rewards/margins": 16931292.778245453, |
|
"rewards/rejected": -16931286.032786883, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5059337913803873, |
|
"grad_norm": 0.10973331332206726, |
|
"kl": 3.674999952316284, |
|
"learning_rate": 4.651299584640198e-06, |
|
"logits/chosen": -555291323.3170732, |
|
"logits/rejected": 1601991111.1111112, |
|
"logps/chosen": -1485.918699186992, |
|
"logps/rejected": -3310.4957264957266, |
|
"loss": 0.0134, |
|
"rewards/chosen": 5.922939393578506, |
|
"rewards/margins": 9290643.051144522, |
|
"rewards/rejected": -9290637.128205128, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5121798875702686, |
|
"grad_norm": 0.54237300157547, |
|
"kl": 4.599999904632568, |
|
"learning_rate": 4.637279676682367e-06, |
|
"logits/chosen": -570763808.4050633, |
|
"logits/rejected": 932611261.6296296, |
|
"logps/chosen": -1530.9367088607594, |
|
"logps/rejected": -3226.074074074074, |
|
"loss": 0.0121, |
|
"rewards/chosen": 6.037538938884493, |
|
"rewards/margins": 11322472.01284758, |
|
"rewards/rejected": -11322465.975308642, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5184259837601499, |
|
"grad_norm": 0.08977213501930237, |
|
"kl": 1.5416666269302368, |
|
"learning_rate": 4.623005453816447e-06, |
|
"logits/chosen": -606227321.370518, |
|
"logits/rejected": 1303320441.8515284, |
|
"logps/chosen": -1591.3306772908365, |
|
"logps/rejected": -3280.069868995633, |
|
"loss": 0.0067, |
|
"rewards/chosen": 6.4393286230079685, |
|
"rewards/margins": 31069579.46989631, |
|
"rewards/rejected": -31069573.030567687, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.5246720799500312, |
|
"grad_norm": 3.119323492050171, |
|
"kl": 13.933333396911621, |
|
"learning_rate": 4.608478614532215e-06, |
|
"logits/chosen": -561827020.8, |
|
"logits/rejected": 1508271718.4, |
|
"logps/chosen": -1578.5333333333333, |
|
"logps/rejected": -3426.133333333333, |
|
"loss": 0.0123, |
|
"rewards/chosen": 6.627941385904948, |
|
"rewards/margins": 29183765.56127472, |
|
"rewards/rejected": -29183758.933333334, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5309181761399125, |
|
"grad_norm": 0.252535343170166, |
|
"kl": 10.433333396911621, |
|
"learning_rate": 4.59370088737827e-06, |
|
"logits/chosen": -592064528.7183673, |
|
"logits/rejected": 1528359757.3446808, |
|
"logps/chosen": -1488.4571428571428, |
|
"logps/rejected": -3226.9617021276595, |
|
"loss": 0.0161, |
|
"rewards/chosen": 6.000954639668367, |
|
"rewards/margins": 24429160.04350783, |
|
"rewards/rejected": -24429154.04255319, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5371642723297939, |
|
"grad_norm": 0.19260543584823608, |
|
"kl": 0.24166665971279144, |
|
"learning_rate": 4.578674030756364e-06, |
|
"logits/chosen": -723806702.3448275, |
|
"logits/rejected": 1168265876.6451614, |
|
"logps/chosen": -1373.103448275862, |
|
"logps/rejected": -2701.6774193548385, |
|
"loss": 0.0105, |
|
"rewards/chosen": 6.012945767106681, |
|
"rewards/margins": 25346937.625848994, |
|
"rewards/rejected": -25346931.612903226, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5434103685196752, |
|
"grad_norm": 0.08087070286273956, |
|
"kl": 16.933332443237305, |
|
"learning_rate": 4.5633998327121595e-06, |
|
"logits/chosen": -731779713.5421686, |
|
"logits/rejected": 1168406448.2077923, |
|
"logps/chosen": -1502.008032128514, |
|
"logps/rejected": -3129.627705627706, |
|
"loss": 0.0166, |
|
"rewards/chosen": 6.375834392256526, |
|
"rewards/margins": 31221008.349860363, |
|
"rewards/rejected": -31221001.974025972, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.5496564647095565, |
|
"grad_norm": 0.08805792033672333, |
|
"kl": 9.266666412353516, |
|
"learning_rate": 4.54788011072248e-06, |
|
"logits/chosen": -716458515.6085106, |
|
"logits/rejected": 1169859864.032653, |
|
"logps/chosen": -1333.1063829787233, |
|
"logps/rejected": -3449.208163265306, |
|
"loss": 0.012, |
|
"rewards/chosen": 7.045041348071808, |
|
"rewards/margins": -3183029.5590402847, |
|
"rewards/rejected": 3183036.6040816326, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5559025608994379, |
|
"grad_norm": 0.7193201780319214, |
|
"kl": 1.4166666269302368, |
|
"learning_rate": 4.532116711479039e-06, |
|
"logits/chosen": -627270098.2113822, |
|
"logits/rejected": 1177344717.6752136, |
|
"logps/chosen": -1573.138211382114, |
|
"logps/rejected": -3736.8888888888887, |
|
"loss": 0.0096, |
|
"rewards/chosen": 6.526741834190803, |
|
"rewards/margins": 28779710.32161363, |
|
"rewards/rejected": -28779703.794871796, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.5621486570893192, |
|
"grad_norm": 0.08647624403238297, |
|
"kl": 5.0333333015441895, |
|
"learning_rate": 4.516111510668707e-06, |
|
"logits/chosen": -656890023.3463035, |
|
"logits/rejected": 919079215.0672646, |
|
"logps/chosen": -1566.1322957198443, |
|
"logps/rejected": -3726.6367713004483, |
|
"loss": 0.0096, |
|
"rewards/chosen": 6.501714209174367, |
|
"rewards/margins": 49463311.68557071, |
|
"rewards/rejected": -49463305.1838565, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5683947532792005, |
|
"grad_norm": 0.07795864343643188, |
|
"kl": 1.7000000476837158, |
|
"learning_rate": 4.499866412750324e-06, |
|
"logits/chosen": -725505175.373913, |
|
"logits/rejected": 908956008.448, |
|
"logps/chosen": -1507.3391304347826, |
|
"logps/rejected": -3735.04, |
|
"loss": 0.0063, |
|
"rewards/chosen": 6.4802914826766305, |
|
"rewards/margins": 20719481.168291483, |
|
"rewards/rejected": -20719474.688, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.5746408494690818, |
|
"grad_norm": 0.09131667762994766, |
|
"kl": 3.2249999046325684, |
|
"learning_rate": 4.4833833507280884e-06, |
|
"logits/chosen": -645614869.8884759, |
|
"logits/rejected": 997203230.3317536, |
|
"logps/chosen": -1540.8773234200744, |
|
"logps/rejected": -4016.0758293838862, |
|
"loss": 0.0118, |
|
"rewards/chosen": 6.323027543418912, |
|
"rewards/margins": 38274318.81591854, |
|
"rewards/rejected": -38274312.492891, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5808869456589631, |
|
"grad_norm": 0.0715484768152237, |
|
"kl": 11.366666793823242, |
|
"learning_rate": 4.466664285921543e-06, |
|
"logits/chosen": -630874549.740458, |
|
"logits/rejected": 962645677.7981651, |
|
"logps/chosen": -1566.1679389312976, |
|
"logps/rejected": -3975.3394495412845, |
|
"loss": 0.0095, |
|
"rewards/chosen": 6.996014544072042, |
|
"rewards/margins": 56229115.25289527, |
|
"rewards/rejected": -56229108.25688073, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.5871330418488445, |
|
"grad_norm": 0.07269534468650818, |
|
"kl": 0.0, |
|
"learning_rate": 4.4497112077322045e-06, |
|
"logits/chosen": -580528576.2746781, |
|
"logits/rejected": 840915499.5303644, |
|
"logps/chosen": -1388.2231759656652, |
|
"logps/rejected": -4148.599190283401, |
|
"loss": 0.0098, |
|
"rewards/chosen": 6.23273621915236, |
|
"rewards/margins": 21097550.783343505, |
|
"rewards/rejected": -21097544.550607286, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5933791380387258, |
|
"grad_norm": 0.08192071318626404, |
|
"kl": 0.0, |
|
"learning_rate": 4.432526133406843e-06, |
|
"logits/chosen": -690147276.5857741, |
|
"logits/rejected": 1121767474.987552, |
|
"logps/chosen": -1430.7615062761506, |
|
"logps/rejected": -4231.9668049792535, |
|
"loss": 0.0066, |
|
"rewards/chosen": 6.778336656642259, |
|
"rewards/margins": 19634322.99410429, |
|
"rewards/rejected": -19634316.215767633, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5996252342286071, |
|
"grad_norm": 0.14791642129421234, |
|
"kl": 8.800000190734863, |
|
"learning_rate": 4.415111107797445e-06, |
|
"logits/chosen": -582916348.9402391, |
|
"logits/rejected": 1169253818.6899564, |
|
"logps/chosen": -1577.434262948207, |
|
"logps/rejected": -3904.5589519650657, |
|
"loss": 0.015, |
|
"rewards/chosen": 6.03934554749751, |
|
"rewards/margins": 19996107.90834118, |
|
"rewards/rejected": -19996101.868995633, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6058713304184884, |
|
"grad_norm": 0.0918608009815216, |
|
"kl": 2.3333332538604736, |
|
"learning_rate": 4.397468203117905e-06, |
|
"logits/chosen": -683436670.976, |
|
"logits/rejected": 1088444683.1304348, |
|
"logps/chosen": -1622.784, |
|
"logps/rejected": -3430.4, |
|
"loss": 0.0057, |
|
"rewards/chosen": 6.6358408203125, |
|
"rewards/margins": 13961369.557579951, |
|
"rewards/rejected": -13961362.921739131, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6121174266083698, |
|
"grad_norm": 0.1823032945394516, |
|
"kl": 25.15833282470703, |
|
"learning_rate": 4.379599518697444e-06, |
|
"logits/chosen": -739122483.5893536, |
|
"logits/rejected": 745465053.7880185, |
|
"logps/chosen": -1405.3231939163497, |
|
"logps/rejected": -2948.7188940092165, |
|
"loss": 0.0152, |
|
"rewards/chosen": 6.272283633852187, |
|
"rewards/margins": 11475810.290716814, |
|
"rewards/rejected": -11475804.01843318, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6183635227982511, |
|
"grad_norm": 0.6821036338806152, |
|
"kl": 5.625, |
|
"learning_rate": 4.3615071807308165e-06, |
|
"logits/chosen": -681329732.2666667, |
|
"logits/rejected": 1006300910.9333333, |
|
"logps/chosen": -1461.8666666666666, |
|
"logps/rejected": -3339.733333333333, |
|
"loss": 0.0081, |
|
"rewards/chosen": 6.6669565836588545, |
|
"rewards/margins": 18135166.13362325, |
|
"rewards/rejected": -18135159.466666665, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.6246096189881324, |
|
"grad_norm": 0.1369735449552536, |
|
"kl": 2.433333396911621, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits/chosen": -694496685.1452283, |
|
"logits/rejected": 1160242762.9790795, |
|
"logps/chosen": -1413.5767634854772, |
|
"logps/rejected": -3294.794979079498, |
|
"loss": 0.0092, |
|
"rewards/chosen": 6.173919804363329, |
|
"rewards/margins": 13679084.466806835, |
|
"rewards/rejected": -13679078.29288703, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6246096189881324, |
|
"eval_kl": 14.301383018493652, |
|
"eval_logits/chosen": -660690092.3172147, |
|
"eval_logits/rejected": 961826460.1858586, |
|
"eval_logps/chosen": -1467.8413926499034, |
|
"eval_logps/rejected": -3063.4989898989897, |
|
"eval_loss": 0.009466251358389854, |
|
"eval_rewards/chosen": 6.571533203125, |
|
"eval_rewards/margins": 21357531.12910896, |
|
"eval_rewards/rejected": -21357524.55757576, |
|
"eval_runtime": 640.876, |
|
"eval_samples_per_second": 6.312, |
|
"eval_steps_per_second": 0.395, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6308557151780138, |
|
"grad_norm": 0.0860004872083664, |
|
"kl": 11.016666412353516, |
|
"learning_rate": 4.324660181744589e-06, |
|
"logits/chosen": -672199384.5375494, |
|
"logits/rejected": 952610509.2511014, |
|
"logps/chosen": -1372.0790513833992, |
|
"logps/rejected": -3022.6607929515417, |
|
"loss": 0.0113, |
|
"rewards/chosen": 6.333535658041008, |
|
"rewards/margins": 46791826.17494535, |
|
"rewards/rejected": -46791819.84140969, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.637101811367895, |
|
"grad_norm": 0.1688835471868515, |
|
"kl": 10.633333206176758, |
|
"learning_rate": 4.305909905149389e-06, |
|
"logits/chosen": -653113051.4285715, |
|
"logits/rejected": 957479286.7404256, |
|
"logps/chosen": -1485.7142857142858, |
|
"logps/rejected": -3097.6, |
|
"loss": 0.0079, |
|
"rewards/chosen": 6.55218431122449, |
|
"rewards/margins": 14125448.186226865, |
|
"rewards/rejected": -14125441.634042554, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6433479075577764, |
|
"grad_norm": 0.23886211216449738, |
|
"kl": 10.949999809265137, |
|
"learning_rate": 4.2869447433351165e-06, |
|
"logits/chosen": -685645342.117647, |
|
"logits/rejected": 734592482.3801653, |
|
"logps/chosen": -1563.6302521008404, |
|
"logps/rejected": -3077.0247933884298, |
|
"loss": 0.0116, |
|
"rewards/chosen": 6.41238018644958, |
|
"rewards/margins": 4603817.850396716, |
|
"rewards/rejected": -4603811.438016529, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.6495940037476577, |
|
"grad_norm": 0.10929810255765915, |
|
"kl": 5.0333333015441895, |
|
"learning_rate": 4.267766952966369e-06, |
|
"logits/chosen": -814962587.8755555, |
|
"logits/rejected": 927048097.6313726, |
|
"logps/chosen": -1531.591111111111, |
|
"logps/rejected": -3431.9058823529413, |
|
"loss": 0.0069, |
|
"rewards/chosen": 6.492171223958334, |
|
"rewards/margins": 4893755.700014361, |
|
"rewards/rejected": -4893749.207843137, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.655840099937539, |
|
"grad_norm": 0.24036245048046112, |
|
"kl": 6.233333110809326, |
|
"learning_rate": 4.248378816008418e-06, |
|
"logits/chosen": -786074843.1091703, |
|
"logits/rejected": 869507626.8366534, |
|
"logps/chosen": -1462.5676855895197, |
|
"logps/rejected": -3666.868525896414, |
|
"loss": 0.0092, |
|
"rewards/chosen": 6.3837330913960155, |
|
"rewards/margins": 22811593.18851397, |
|
"rewards/rejected": -22811586.804780878, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.6620861961274204, |
|
"grad_norm": 0.0712604969739914, |
|
"kl": 0.0, |
|
"learning_rate": 4.228782639455674e-06, |
|
"logits/chosen": -546996645.9055794, |
|
"logits/rejected": 1379747715.6275303, |
|
"logps/chosen": -1523.5021459227469, |
|
"logps/rejected": -4356.663967611336, |
|
"loss": 0.0098, |
|
"rewards/chosen": 6.099889141295601, |
|
"rewards/margins": 21226120.69098226, |
|
"rewards/rejected": -21226114.59109312, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6683322923173017, |
|
"grad_norm": 0.08271102607250214, |
|
"kl": 0.0, |
|
"learning_rate": 4.2089807550571786e-06, |
|
"logits/chosen": -598833823.1932774, |
|
"logits/rejected": 1428741128.4628098, |
|
"logps/chosen": -1500.90756302521, |
|
"logps/rejected": -3963.2396694214876, |
|
"loss": 0.0107, |
|
"rewards/chosen": 6.574934246159401, |
|
"rewards/margins": 16506663.368322676, |
|
"rewards/rejected": -16506656.79338843, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.674578388507183, |
|
"grad_norm": 0.09333149343729019, |
|
"kl": 1.4166666269302368, |
|
"learning_rate": 4.188975519039151e-06, |
|
"logits/chosen": -715718131.712, |
|
"logits/rejected": 1054329490.9217391, |
|
"logps/chosen": -1413.376, |
|
"logps/rejected": -3373.913043478261, |
|
"loss": 0.0079, |
|
"rewards/chosen": 6.30774169921875, |
|
"rewards/margins": 4260589.264263438, |
|
"rewards/rejected": -4260582.956521739, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6808244846970644, |
|
"grad_norm": 0.0794818177819252, |
|
"kl": 0.0, |
|
"learning_rate": 4.168769311824619e-06, |
|
"logits/chosen": -656695737.8921162, |
|
"logits/rejected": 1032728901.623431, |
|
"logps/chosen": -1501.7427385892115, |
|
"logps/rejected": -3163.313807531381, |
|
"loss": 0.0088, |
|
"rewards/chosen": 6.385147537927905, |
|
"rewards/margins": 29309559.924896494, |
|
"rewards/rejected": -29309553.539748956, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.6870705808869456, |
|
"grad_norm": 0.07905972003936768, |
|
"kl": 13.633333206176758, |
|
"learning_rate": 4.1483645377501726e-06, |
|
"logits/chosen": -694805668.1526718, |
|
"logits/rejected": 1306294816.880734, |
|
"logps/chosen": -1517.3129770992366, |
|
"logps/rejected": -3286.605504587156, |
|
"loss": 0.0127, |
|
"rewards/chosen": 5.729227546517175, |
|
"rewards/margins": 14022816.738401858, |
|
"rewards/rejected": -14022811.009174312, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.693316677076827, |
|
"grad_norm": 0.20355987548828125, |
|
"kl": 1.100000023841858, |
|
"learning_rate": 4.127763624779873e-06, |
|
"logits/chosen": -776127811.6017317, |
|
"logits/rejected": 950439393.1566265, |
|
"logps/chosen": -1429.0562770562772, |
|
"logps/rejected": -3153.2208835341366, |
|
"loss": 0.0086, |
|
"rewards/chosen": 6.4727360448796, |
|
"rewards/margins": 17968574.50486456, |
|
"rewards/rejected": -17968568.032128513, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.6995627732667083, |
|
"grad_norm": 0.25120851397514343, |
|
"kl": 29.850000381469727, |
|
"learning_rate": 4.106969024216348e-06, |
|
"logits/chosen": -833976294.0759493, |
|
"logits/rejected": 1107952155.3909464, |
|
"logps/chosen": -1552.7426160337552, |
|
"logps/rejected": -3334.320987654321, |
|
"loss": 0.017, |
|
"rewards/chosen": 6.508130294864188, |
|
"rewards/margins": -5248354.841663944, |
|
"rewards/rejected": 5248361.349794239, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7058088694565896, |
|
"grad_norm": 0.06628026813268661, |
|
"kl": 0.0, |
|
"learning_rate": 4.085983210409114e-06, |
|
"logits/chosen": -842612666.8691983, |
|
"logits/rejected": 1499886562.5020576, |
|
"logps/chosen": -1503.392405063291, |
|
"logps/rejected": -3725.9588477366256, |
|
"loss": 0.0065, |
|
"rewards/chosen": 6.703082764702004, |
|
"rewards/margins": 5656834.489090995, |
|
"rewards/rejected": -5656827.78600823, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.712054965646471, |
|
"grad_norm": 0.05841664969921112, |
|
"kl": 0.0, |
|
"learning_rate": 4.064808680460149e-06, |
|
"logits/chosen": -759934940.3826087, |
|
"logits/rejected": 1470086774.784, |
|
"logps/chosen": -1512.0695652173913, |
|
"logps/rejected": -3696.64, |
|
"loss": 0.0065, |
|
"rewards/chosen": 6.662904424252718, |
|
"rewards/margins": 9375535.622904425, |
|
"rewards/rejected": -9375528.96, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7183010618363522, |
|
"grad_norm": 0.335035502910614, |
|
"kl": 2.8333332538604736, |
|
"learning_rate": 4.043447953926763e-06, |
|
"logits/chosen": -727839396.7304348, |
|
"logits/rejected": 1297751212.032, |
|
"logps/chosen": -1522.6434782608696, |
|
"logps/rejected": -3939.072, |
|
"loss": 0.0101, |
|
"rewards/chosen": 6.710281504755435, |
|
"rewards/margins": 13018670.646281505, |
|
"rewards/rejected": -13018663.936, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7245471580262336, |
|
"grad_norm": 0.08834685385227203, |
|
"kl": 3.0833332538604736, |
|
"learning_rate": 4.021903572521802e-06, |
|
"logits/chosen": -775282141.8666667, |
|
"logits/rejected": 1288280473.6, |
|
"logps/chosen": -1568.4, |
|
"logps/rejected": -3556.266666666667, |
|
"loss": 0.0058, |
|
"rewards/chosen": 6.64389902750651, |
|
"rewards/margins": 10371696.243899027, |
|
"rewards/rejected": -10371689.6, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.730793254216115, |
|
"grad_norm": 0.08919289708137512, |
|
"kl": 2.25, |
|
"learning_rate": 4.000178099811203e-06, |
|
"logits/chosen": -764677672.110599, |
|
"logits/rejected": 1221270088.0304182, |
|
"logps/chosen": -1534.2304147465438, |
|
"logps/rejected": -3374.722433460076, |
|
"loss": 0.0068, |
|
"rewards/chosen": 6.908961423531106, |
|
"rewards/margins": 16852222.148505148, |
|
"rewards/rejected": -16852215.239543725, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.7370393504059962, |
|
"grad_norm": 0.08590350300073624, |
|
"kl": 0.0, |
|
"learning_rate": 3.978274120908957e-06, |
|
"logits/chosen": -685725950.9012876, |
|
"logits/rejected": 1515111660.3076923, |
|
"logps/chosen": -1563.4678111587982, |
|
"logps/rejected": -3674.1700404858298, |
|
"loss": 0.0083, |
|
"rewards/chosen": 6.62035294561427, |
|
"rewards/margins": 13114593.308612056, |
|
"rewards/rejected": -13114586.68825911, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.7432854465958776, |
|
"grad_norm": 0.0870102196931839, |
|
"kl": 0.0, |
|
"learning_rate": 3.956194242169506e-06, |
|
"logits/chosen": -676005492.1680672, |
|
"logits/rejected": 1381572540.2975206, |
|
"logps/chosen": -1465.3445378151262, |
|
"logps/rejected": -3624.7272727272725, |
|
"loss": 0.0074, |
|
"rewards/chosen": 6.180169626444328, |
|
"rewards/margins": 18082138.8082688, |
|
"rewards/rejected": -18082132.628099173, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.749531542785759, |
|
"grad_norm": 0.2675867974758148, |
|
"kl": 8.666666984558105, |
|
"learning_rate": 3.933941090877615e-06, |
|
"logits/chosen": -682359716.493617, |
|
"logits/rejected": 1673604334.2367346, |
|
"logps/chosen": -1567.659574468085, |
|
"logps/rejected": -4016.326530612245, |
|
"loss": 0.0115, |
|
"rewards/chosen": 6.600190118018617, |
|
"rewards/margins": 17001855.808353383, |
|
"rewards/rejected": -17001849.208163265, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7557776389756402, |
|
"grad_norm": 0.06292186677455902, |
|
"kl": 0.0, |
|
"learning_rate": 3.911517314935752e-06, |
|
"logits/chosen": -702037249.0893617, |
|
"logits/rejected": 1173634737.632653, |
|
"logps/chosen": -1671.6255319148936, |
|
"logps/rejected": -3740.734693877551, |
|
"loss": 0.006, |
|
"rewards/chosen": 6.977364527925532, |
|
"rewards/margins": 23121256.152874734, |
|
"rewards/rejected": -23121249.175510205, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.7620237351655216, |
|
"grad_norm": 0.07434140145778656, |
|
"kl": 0.0, |
|
"learning_rate": 3.888925582549006e-06, |
|
"logits/chosen": -725903268.7935222, |
|
"logits/rejected": 1318685577.3390558, |
|
"logps/chosen": -1664.0, |
|
"logps/rejected": -3763.0901287553647, |
|
"loss": 0.008, |
|
"rewards/chosen": 6.757370674658401, |
|
"rewards/margins": 18745667.18655522, |
|
"rewards/rejected": -18745660.42918455, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7682698313554028, |
|
"grad_norm": 0.06401233375072479, |
|
"kl": 22.33333396911621, |
|
"learning_rate": 3.866168581907609e-06, |
|
"logits/chosen": -783920391.083004, |
|
"logits/rejected": 1419005103.9295154, |
|
"logps/chosen": -1494.3873517786562, |
|
"logps/rejected": -4285.180616740088, |
|
"loss": 0.0153, |
|
"rewards/chosen": 6.33299623270751, |
|
"rewards/margins": 32352429.24048522, |
|
"rewards/rejected": -32352422.907488987, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.7745159275452842, |
|
"grad_norm": 0.10351639986038208, |
|
"kl": 0.0, |
|
"learning_rate": 3.8432490208670605e-06, |
|
"logits/chosen": -914429361.898305, |
|
"logits/rejected": 1108860525.1147542, |
|
"logps/chosen": -1482.5762711864406, |
|
"logps/rejected": -4299.803278688524, |
|
"loss": 0.006, |
|
"rewards/chosen": 6.938127420716366, |
|
"rewards/margins": 10782713.298783159, |
|
"rewards/rejected": -10782706.360655738, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7807620237351656, |
|
"grad_norm": 0.05740602687001228, |
|
"kl": 0.0, |
|
"learning_rate": 3.82016962662592e-06, |
|
"logits/chosen": -904887262.967742, |
|
"logits/rejected": 979027103.634981, |
|
"logps/chosen": -1401.3640552995391, |
|
"logps/rejected": -4223.2699619771865, |
|
"loss": 0.0063, |
|
"rewards/chosen": 6.702658095118087, |
|
"rewards/margins": 38639299.53916, |
|
"rewards/rejected": -38639292.836501904, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.7870081199250468, |
|
"grad_norm": 0.07504531741142273, |
|
"kl": 0.0, |
|
"learning_rate": 3.796933145401304e-06, |
|
"logits/chosen": -765740100.2666667, |
|
"logits/rejected": 919574937.6, |
|
"logps/chosen": -1549.2, |
|
"logps/rejected": -4261.333333333333, |
|
"loss": 0.0063, |
|
"rewards/chosen": 6.80685780843099, |
|
"rewards/margins": 22749884.14019114, |
|
"rewards/rejected": -22749877.333333332, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7932542161149282, |
|
"grad_norm": 0.04929427057504654, |
|
"kl": 3.9833333492279053, |
|
"learning_rate": 3.773542342102105e-06, |
|
"logits/chosen": -887692046.5691057, |
|
"logits/rejected": 754652081.2307693, |
|
"logps/chosen": -1502.69918699187, |
|
"logps/rejected": -4267.213675213675, |
|
"loss": 0.0069, |
|
"rewards/chosen": 6.618328311579014, |
|
"rewards/margins": 30490055.16533686, |
|
"rewards/rejected": -30490048.547008548, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.7995003123048094, |
|
"grad_norm": 0.05546702817082405, |
|
"kl": 14.333333015441895, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -872644608.0, |
|
"logits/rejected": 810034322.2857143, |
|
"logps/chosen": -1477.5, |
|
"logps/rejected": -4392.285714285715, |
|
"loss": 0.0087, |
|
"rewards/chosen": 6.3790788650512695, |
|
"rewards/margins": 32740708.093364578, |
|
"rewards/rejected": -32740701.714285713, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8057464084946908, |
|
"grad_norm": 0.08095496147871017, |
|
"kl": 0.0, |
|
"learning_rate": 3.7263089203982698e-06, |
|
"logits/chosen": -660306022.8675799, |
|
"logits/rejected": 944827238.9885057, |
|
"logps/chosen": -1674.2283105022832, |
|
"logps/rejected": -4282.360153256705, |
|
"loss": 0.0062, |
|
"rewards/chosen": 6.8761649632562785, |
|
"rewards/margins": -5804263.468662622, |
|
"rewards/rejected": 5804270.344827586, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.8119925046845722, |
|
"grad_norm": 0.3066859245300293, |
|
"kl": 0.0, |
|
"learning_rate": 3.7024719222984696e-06, |
|
"logits/chosen": -730389925.1255411, |
|
"logits/rejected": 1048660222.9718876, |
|
"logps/chosen": -1364.3636363636363, |
|
"logps/rejected": -4099.084337349397, |
|
"loss": 0.0097, |
|
"rewards/chosen": 6.316058534564394, |
|
"rewards/margins": 3112194.7176649603, |
|
"rewards/rejected": -3112188.4016064256, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8182386008744534, |
|
"grad_norm": 0.0594465434551239, |
|
"kl": 1.0083333253860474, |
|
"learning_rate": 3.6784918420649952e-06, |
|
"logits/chosen": -717847362.3703704, |
|
"logits/rejected": 960088573.8396624, |
|
"logps/chosen": -1455.8024691358025, |
|
"logps/rejected": -3842.700421940928, |
|
"loss": 0.0086, |
|
"rewards/chosen": 6.571082597897377, |
|
"rewards/margins": 9235133.119605804, |
|
"rewards/rejected": -9235126.548523206, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.8244846970643348, |
|
"grad_norm": 0.7943433523178101, |
|
"kl": 13.666666984558105, |
|
"learning_rate": 3.654371533087586e-06, |
|
"logits/chosen": -735423581.8326694, |
|
"logits/rejected": 1230538278.0087335, |
|
"logps/chosen": -1492.9083665338646, |
|
"logps/rejected": -3983.3711790393013, |
|
"loss": 0.0139, |
|
"rewards/chosen": 6.401267002303287, |
|
"rewards/margins": 29458865.440568313, |
|
"rewards/rejected": -29458859.03930131, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.8307307932542161, |
|
"grad_norm": 0.5621448159217834, |
|
"kl": 43.11249923706055, |
|
"learning_rate": 3.6301138654418e-06, |
|
"logits/chosen": -750683250.6563706, |
|
"logits/rejected": 951841305.4841629, |
|
"logps/chosen": -1559.7837837837837, |
|
"logps/rejected": -2898.823529411765, |
|
"loss": 0.0147, |
|
"rewards/chosen": 6.53166287101834, |
|
"rewards/margins": 61762992.81220586, |
|
"rewards/rejected": -61762986.280542985, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.8369768894440974, |
|
"grad_norm": 0.06558384746313095, |
|
"kl": 22.733333587646484, |
|
"learning_rate": 3.6057217255475034e-06, |
|
"logits/chosen": -798352653.4736842, |
|
"logits/rejected": 947858700.0858369, |
|
"logps/chosen": -1364.34008097166, |
|
"logps/rejected": -3295.038626609442, |
|
"loss": 0.0106, |
|
"rewards/chosen": 6.661080920261893, |
|
"rewards/margins": 42726151.75979336, |
|
"rewards/rejected": -42726145.098712444, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.8432229856339788, |
|
"grad_norm": 0.34487301111221313, |
|
"kl": 2.7333333492279053, |
|
"learning_rate": 3.5811980158254156e-06, |
|
"logits/chosen": -712224751.144033, |
|
"logits/rejected": 919353387.2067511, |
|
"logps/chosen": -1495.9012345679012, |
|
"logps/rejected": -4504.84388185654, |
|
"loss": 0.0084, |
|
"rewards/chosen": 6.79199721096965, |
|
"rewards/margins": 18640485.84684953, |
|
"rewards/rejected": -18640479.05485232, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.84946908182386, |
|
"grad_norm": 0.07742549479007721, |
|
"kl": 0.0, |
|
"learning_rate": 3.556545654351749e-06, |
|
"logits/chosen": -634858531.3103448, |
|
"logits/rejected": 1233641207.7419355, |
|
"logps/chosen": -1602.4137931034484, |
|
"logps/rejected": -4847.4838709677415, |
|
"loss": 0.0064, |
|
"rewards/chosen": 6.5001373291015625, |
|
"rewards/margins": 12399482.113040555, |
|
"rewards/rejected": -12399475.612903226, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.8557151780137414, |
|
"grad_norm": 0.07713647931814194, |
|
"kl": 0.0, |
|
"learning_rate": 3.531767574510987e-06, |
|
"logits/chosen": -767521783.2478633, |
|
"logits/rejected": 1424972158.9593496, |
|
"logps/chosen": -1447.6581196581196, |
|
"logps/rejected": -4773.463414634146, |
|
"loss": 0.009, |
|
"rewards/chosen": 6.673850035056089, |
|
"rewards/margins": 36117947.747020766, |
|
"rewards/rejected": -36117941.07317073, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.8619612742036228, |
|
"grad_norm": 0.0707743763923645, |
|
"kl": 4.150000095367432, |
|
"learning_rate": 3.5068667246468437e-06, |
|
"logits/chosen": -698709666.3414634, |
|
"logits/rejected": 932399156.5128205, |
|
"logps/chosen": -1396.5528455284552, |
|
"logps/rejected": -4100.102564102564, |
|
"loss": 0.0118, |
|
"rewards/chosen": 6.197249249714177, |
|
"rewards/margins": 10702407.838274892, |
|
"rewards/rejected": -10702401.641025642, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.868207370393504, |
|
"grad_norm": 0.0866667851805687, |
|
"kl": 4.666666507720947, |
|
"learning_rate": 3.481846067711436e-06, |
|
"logits/chosen": -739102499.053942, |
|
"logits/rejected": 999367512.9037657, |
|
"logps/chosen": -1414.240663900415, |
|
"logps/rejected": -4140.719665271967, |
|
"loss": 0.0065, |
|
"rewards/chosen": 6.7948727429655085, |
|
"rewards/margins": 23043689.338805795, |
|
"rewards/rejected": -23043682.543933053, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.8744534665833854, |
|
"grad_norm": 0.06301329284906387, |
|
"kl": 4.400000095367432, |
|
"learning_rate": 3.4567085809127247e-06, |
|
"logits/chosen": -748142063.483871, |
|
"logits/rejected": 1094568712.8275862, |
|
"logps/chosen": -1499.8709677419354, |
|
"logps/rejected": -4060.137931034483, |
|
"loss": 0.0071, |
|
"rewards/chosen": 6.396800379599294, |
|
"rewards/margins": 33424125.569214173, |
|
"rewards/rejected": -33424119.172413792, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8806995627732667, |
|
"grad_norm": 0.08260803669691086, |
|
"kl": 0.0, |
|
"learning_rate": 3.4314572553602577e-06, |
|
"logits/chosen": -728655004.5065502, |
|
"logits/rejected": 914725900.2390438, |
|
"logps/chosen": -1468.0873362445416, |
|
"logps/rejected": -3849.434262948207, |
|
"loss": 0.0058, |
|
"rewards/chosen": 6.83486072257096, |
|
"rewards/margins": 14487753.6396416, |
|
"rewards/rejected": -14487746.804780876, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.886945658963148, |
|
"grad_norm": 0.11022159457206726, |
|
"kl": 0.0, |
|
"learning_rate": 3.406095095709254e-06, |
|
"logits/chosen": -760663350.931174, |
|
"logits/rejected": 794069053.527897, |
|
"logps/chosen": -1469.9271255060728, |
|
"logps/rejected": -3839.725321888412, |
|
"loss": 0.0064, |
|
"rewards/chosen": 6.487148516573886, |
|
"rewards/margins": 22605913.98929444, |
|
"rewards/rejected": -22605907.502145924, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8931917551530294, |
|
"grad_norm": 0.08453460782766342, |
|
"kl": 0.0, |
|
"learning_rate": 3.3806251198030843e-06, |
|
"logits/chosen": -895154599.1404959, |
|
"logits/rejected": 1068807348.7058823, |
|
"logps/chosen": -1484.9586776859503, |
|
"logps/rejected": -4220.235294117647, |
|
"loss": 0.0093, |
|
"rewards/chosen": 6.4102722672391526, |
|
"rewards/margins": 18819080.56153277, |
|
"rewards/rejected": -18819074.151260503, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.8994378513429107, |
|
"grad_norm": 0.0879315510392189, |
|
"kl": 0.0, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -890640039.6460177, |
|
"logits/rejected": 928196172.5984251, |
|
"logps/chosen": -1418.7610619469026, |
|
"logps/rejected": -4121.1968503937005, |
|
"loss": 0.0083, |
|
"rewards/chosen": 6.719538055689989, |
|
"rewards/margins": 28256163.94788451, |
|
"rewards/rejected": -28256157.228346456, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.905683947532792, |
|
"grad_norm": 0.08083692938089371, |
|
"kl": 0.0, |
|
"learning_rate": 3.3293738543833807e-06, |
|
"logits/chosen": -763081357.9831933, |
|
"logits/rejected": 966262784.0, |
|
"logps/chosen": -1469.9831932773109, |
|
"logps/rejected": -4322.644628099173, |
|
"loss": 0.0063, |
|
"rewards/chosen": 6.22869103696166, |
|
"rewards/margins": 14399539.716294343, |
|
"rewards/rejected": -14399533.487603305, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9119300437226733, |
|
"grad_norm": 0.08498977869749069, |
|
"kl": 0.0, |
|
"learning_rate": 3.303598663257904e-06, |
|
"logits/chosen": -845987405.0265486, |
|
"logits/rejected": 976843493.7952756, |
|
"logps/chosen": -1476.3893805309735, |
|
"logps/rejected": -3976.566929133858, |
|
"loss": 0.0082, |
|
"rewards/chosen": 6.599641783047566, |
|
"rewards/margins": 20458966.221689027, |
|
"rewards/rejected": -20458959.622047246, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.9181761399125546, |
|
"grad_norm": 0.1156986802816391, |
|
"kl": 0.0, |
|
"learning_rate": 3.277727851927727e-06, |
|
"logits/chosen": -723482487.4666667, |
|
"logits/rejected": 912776669.8666667, |
|
"logps/chosen": -1497.3333333333333, |
|
"logps/rejected": -3891.733333333333, |
|
"loss": 0.009, |
|
"rewards/chosen": 6.8691650390625, |
|
"rewards/margins": 15557920.469165038, |
|
"rewards/rejected": -15557913.6, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.924422236102436, |
|
"grad_norm": 0.15747644007205963, |
|
"kl": 0.0, |
|
"learning_rate": 3.2517644987606827e-06, |
|
"logits/chosen": -711371434.6666666, |
|
"logits/rejected": 1074933387.6363637, |
|
"logps/chosen": -1456.5925925925926, |
|
"logps/rejected": -4056.7272727272725, |
|
"loss": 0.0082, |
|
"rewards/chosen": 6.889565927010995, |
|
"rewards/margins": 31410347.738050774, |
|
"rewards/rejected": -31410340.848484848, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.9306683322923173, |
|
"grad_norm": 0.07812928408384323, |
|
"kl": 0.0, |
|
"learning_rate": 3.225711693136156e-06, |
|
"logits/chosen": -697507742.0876493, |
|
"logits/rejected": 1089667358.183406, |
|
"logps/chosen": -1519.4262948207172, |
|
"logps/rejected": -4072.803493449782, |
|
"loss": 0.0107, |
|
"rewards/chosen": 6.269356169073705, |
|
"rewards/margins": 20432616.085950054, |
|
"rewards/rejected": -20432609.816593885, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.9369144284821986, |
|
"grad_norm": 0.09357881546020508, |
|
"kl": 4.599999904632568, |
|
"learning_rate": 3.199572535077481e-06, |
|
"logits/chosen": -772760009.0643777, |
|
"logits/rejected": 1114988643.4979758, |
|
"logps/chosen": -1435.5708154506437, |
|
"logps/rejected": -4109.7327935222675, |
|
"loss": 0.0085, |
|
"rewards/chosen": 6.513185689377682, |
|
"rewards/margins": 38015799.2257363, |
|
"rewards/rejected": -38015792.71255061, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.94316052467208, |
|
"grad_norm": 0.08641842007637024, |
|
"kl": 0.0, |
|
"learning_rate": 3.173350134883066e-06, |
|
"logits/chosen": -784815252.8634361, |
|
"logits/rejected": 1084269029.6916995, |
|
"logps/chosen": -1494.4140969162995, |
|
"logps/rejected": -4097.01185770751, |
|
"loss": 0.0048, |
|
"rewards/chosen": 6.749071297666575, |
|
"rewards/margins": 42705485.57911082, |
|
"rewards/rejected": -42705478.83003952, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.9494066208619613, |
|
"grad_norm": 0.11333774030208588, |
|
"kl": 13.466666221618652, |
|
"learning_rate": 3.147047612756302e-06, |
|
"logits/chosen": -665163950.4932735, |
|
"logits/rejected": 1101347525.229572, |
|
"logps/chosen": -1584.57399103139, |
|
"logps/rejected": -4380.389105058366, |
|
"loss": 0.0135, |
|
"rewards/chosen": 6.7324350126121075, |
|
"rewards/margins": -230439.8356583726, |
|
"rewards/rejected": 230446.56809338523, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.9556527170518426, |
|
"grad_norm": 0.07624714821577072, |
|
"kl": 0.0, |
|
"learning_rate": 3.120668098434291e-06, |
|
"logits/chosen": -905083063.1544715, |
|
"logits/rejected": 1016455518.0854701, |
|
"logps/chosen": -1511.479674796748, |
|
"logps/rejected": -4418.188034188034, |
|
"loss": 0.0074, |
|
"rewards/chosen": 6.443224899167937, |
|
"rewards/margins": 31545728.973139428, |
|
"rewards/rejected": -31545722.529914528, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.9618988132417239, |
|
"grad_norm": 0.05935695767402649, |
|
"kl": 11.266666412353516, |
|
"learning_rate": 3.094214730815433e-06, |
|
"logits/chosen": -773629720.6359832, |
|
"logits/rejected": 844717162.2240664, |
|
"logps/chosen": -1496.3012552301254, |
|
"logps/rejected": -4301.809128630705, |
|
"loss": 0.014, |
|
"rewards/chosen": 6.738523858361663, |
|
"rewards/margins": 28066950.20740353, |
|
"rewards/rejected": -28066943.46887967, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9681449094316052, |
|
"grad_norm": 0.048142824321985245, |
|
"kl": 0.0, |
|
"learning_rate": 3.0676906575859335e-06, |
|
"logits/chosen": -790502942.117647, |
|
"logits/rejected": 963840660.0991735, |
|
"logps/chosen": -1486.6554621848738, |
|
"logps/rejected": -4551.4049586776855, |
|
"loss": 0.0075, |
|
"rewards/chosen": 6.345016864167542, |
|
"rewards/margins": 13151236.229314385, |
|
"rewards/rejected": -13151229.88429752, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.9743910056214866, |
|
"grad_norm": 0.06406976282596588, |
|
"kl": 0.0, |
|
"learning_rate": 3.0410990348452572e-06, |
|
"logits/chosen": -673650812.6608696, |
|
"logits/rejected": 1048039129.088, |
|
"logps/chosen": -1577.5304347826086, |
|
"logps/rejected": -4895.744, |
|
"loss": 0.0057, |
|
"rewards/chosen": 6.700010614809782, |
|
"rewards/margins": 10417035.820010614, |
|
"rewards/rejected": -10417029.12, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.9806371018113679, |
|
"grad_norm": 0.060679152607917786, |
|
"kl": 0.0, |
|
"learning_rate": 3.0144430267305874e-06, |
|
"logits/chosen": -849952403.9111111, |
|
"logits/rejected": 943619710.4941176, |
|
"logps/chosen": -1398.6844444444444, |
|
"logps/rejected": -4804.266666666666, |
|
"loss": 0.0062, |
|
"rewards/chosen": 6.7072553168402775, |
|
"rewards/margins": 11713205.389608258, |
|
"rewards/rejected": -11713198.682352941, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.9868831980012492, |
|
"grad_norm": 0.08234000205993652, |
|
"kl": 0.0, |
|
"learning_rate": 2.9877258050403214e-06, |
|
"logits/chosen": -818881825.221374, |
|
"logits/rejected": 1011058143.119266, |
|
"logps/chosen": -1413.0687022900763, |
|
"logps/rejected": -4671.412844036698, |
|
"loss": 0.0087, |
|
"rewards/chosen": 6.607599855379294, |
|
"rewards/margins": 45059064.515856735, |
|
"rewards/rejected": -45059057.90825688, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9931292941911305, |
|
"grad_norm": 0.07609214633703232, |
|
"kl": 0.0, |
|
"learning_rate": 2.9609505488566585e-06, |
|
"logits/chosen": -804330948.4651163, |
|
"logits/rejected": 1076613599.7117116, |
|
"logps/chosen": -1639.8139534883721, |
|
"logps/rejected": -4862.846846846847, |
|
"loss": 0.0127, |
|
"rewards/chosen": 6.3508963178294575, |
|
"rewards/margins": 54194814.27882425, |
|
"rewards/rejected": -54194807.927927926, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.9993753903810119, |
|
"grad_norm": 0.0829169750213623, |
|
"kl": 0.0, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -796991022.9519651, |
|
"logits/rejected": 1063515074.8047808, |
|
"logps/chosen": -1517.9737991266375, |
|
"logps/rejected": -5103.681274900398, |
|
"loss": 0.0058, |
|
"rewards/chosen": 6.788350334334061, |
|
"rewards/margins": 13866081.561258702, |
|
"rewards/rejected": -13866074.772908367, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0062460961898814, |
|
"grad_norm": 0.06701330095529556, |
|
"kl": 0.5483871102333069, |
|
"learning_rate": 2.9072386834864723e-06, |
|
"logits/chosen": -763707123.409836, |
|
"logits/rejected": 865724318.4761904, |
|
"logps/chosen": -1432.9180327868853, |
|
"logps/rejected": -4660.825396825397, |
|
"loss": 0.0085, |
|
"rewards/chosen": 6.726138255635246, |
|
"rewards/margins": 11638771.424550954, |
|
"rewards/rejected": -11638764.698412698, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.0124921923797627, |
|
"grad_norm": 1.086881160736084, |
|
"kl": 0.0, |
|
"learning_rate": 2.880308465474792e-06, |
|
"logits/chosen": -682072029.2881356, |
|
"logits/rejected": 712537474.0983607, |
|
"logps/chosen": -1559.1864406779662, |
|
"logps/rejected": -4389.770491803279, |
|
"loss": 0.009, |
|
"rewards/chosen": 7.010221836930614, |
|
"rewards/margins": 21856382.616779212, |
|
"rewards/rejected": -21856375.606557377, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.0187382885696439, |
|
"grad_norm": 0.08652088791131973, |
|
"kl": 0.0, |
|
"learning_rate": 2.8533329945589192e-06, |
|
"logits/chosen": -876447524.2575108, |
|
"logits/rejected": 1136792231.902834, |
|
"logps/chosen": -1390.8326180257511, |
|
"logps/rejected": -5287.384615384615, |
|
"loss": 0.0064, |
|
"rewards/chosen": 7.198371592509388, |
|
"rewards/margins": 17011080.753027465, |
|
"rewards/rejected": -17011073.554655872, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.0249843847595252, |
|
"grad_norm": 0.10212092101573944, |
|
"kl": 0.0, |
|
"learning_rate": 2.82631548055013e-06, |
|
"logits/chosen": -851879919.616, |
|
"logits/rejected": 1202871678.8869565, |
|
"logps/chosen": -1350.912, |
|
"logps/rejected": -5666.504347826087, |
|
"loss": 0.0088, |
|
"rewards/chosen": 6.3730400390625, |
|
"rewards/margins": 17158576.668692213, |
|
"rewards/rejected": -17158570.295652173, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.0312304809494066, |
|
"grad_norm": 0.05050951614975929, |
|
"kl": 0.0, |
|
"learning_rate": 2.7992591382624064e-06, |
|
"logits/chosen": -876872855.5336323, |
|
"logits/rejected": 692957773.696498, |
|
"logps/chosen": -1370.1165919282512, |
|
"logps/rejected": -4394.334630350195, |
|
"loss": 0.0074, |
|
"rewards/chosen": 6.687953247617713, |
|
"rewards/margins": 15439475.263828734, |
|
"rewards/rejected": -15439468.575875487, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.037476577139288, |
|
"grad_norm": 0.17391963303089142, |
|
"kl": 0.0, |
|
"learning_rate": 2.7721671871299115e-06, |
|
"logits/chosen": -870837886.9059829, |
|
"logits/rejected": 948289935.6097561, |
|
"logps/chosen": -1535.2478632478633, |
|
"logps/rejected": -4426.926829268293, |
|
"loss": 0.0094, |
|
"rewards/chosen": 7.150073972522703, |
|
"rewards/margins": 36318818.72730975, |
|
"rewards/rejected": -36318811.57723577, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.0437226733291693, |
|
"grad_norm": 0.05891693755984306, |
|
"kl": 1.2000000476837158, |
|
"learning_rate": 2.7450428508239024e-06, |
|
"logits/chosen": -946800960.7710843, |
|
"logits/rejected": 793304485.1255411, |
|
"logps/chosen": -1461.0763052208836, |
|
"logps/rejected": -4203.774891774891, |
|
"loss": 0.005, |
|
"rewards/chosen": 6.78319430064006, |
|
"rewards/margins": 9203793.588389104, |
|
"rewards/rejected": -9203786.805194804, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.0499687695190505, |
|
"grad_norm": 0.0753137543797493, |
|
"kl": 0.0, |
|
"learning_rate": 2.717889356869146e-06, |
|
"logits/chosen": -808641250.8862745, |
|
"logits/rejected": 812026575.0755556, |
|
"logps/chosen": -1453.678431372549, |
|
"logps/rejected": -4184.746666666667, |
|
"loss": 0.0095, |
|
"rewards/chosen": 6.665716911764706, |
|
"rewards/margins": 23116390.239050247, |
|
"rewards/rejected": -23116383.573333334, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.0562148657089319, |
|
"grad_norm": 0.049415141344070435, |
|
"kl": 6.5333333015441895, |
|
"learning_rate": 2.6907099362598815e-06, |
|
"logits/chosen": -968955313.898305, |
|
"logits/rejected": 909888931.6721312, |
|
"logps/chosen": -1353.7627118644068, |
|
"logps/rejected": -4428.459016393443, |
|
"loss": 0.0098, |
|
"rewards/chosen": 6.966336007845604, |
|
"rewards/margins": 45752528.40895896, |
|
"rewards/rejected": -45752521.44262295, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.0624609618988132, |
|
"grad_norm": 0.08883284777402878, |
|
"kl": 2.566666603088379, |
|
"learning_rate": 2.663507823075358e-06, |
|
"logits/chosen": -916332062.117647, |
|
"logits/rejected": 652192607.2066115, |
|
"logps/chosen": -1383.5966386554621, |
|
"logps/rejected": -3607.5371900826444, |
|
"loss": 0.0059, |
|
"rewards/chosen": 7.278132590926996, |
|
"rewards/margins": 19222385.063256558, |
|
"rewards/rejected": -19222377.785123967, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.0687070580886946, |
|
"grad_norm": 0.18526360392570496, |
|
"kl": 0.0, |
|
"learning_rate": 2.6362862540950163e-06, |
|
"logits/chosen": -882909916.0510638, |
|
"logits/rejected": 661963888.8489796, |
|
"logps/chosen": -1447.2170212765957, |
|
"logps/rejected": -3268.9632653061226, |
|
"loss": 0.0099, |
|
"rewards/chosen": 6.842736037234043, |
|
"rewards/margins": 42400891.87130746, |
|
"rewards/rejected": -42400885.02857143, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.074953154278576, |
|
"grad_norm": 0.14632245898246765, |
|
"kl": 6.016666889190674, |
|
"learning_rate": 2.6090484684133406e-06, |
|
"logits/chosen": -773478470.6206896, |
|
"logits/rejected": 809466846.967742, |
|
"logps/chosen": -1492.2758620689656, |
|
"logps/rejected": -3227.0967741935483, |
|
"loss": 0.0072, |
|
"rewards/chosen": 6.721473430765086, |
|
"rewards/margins": 33828763.36663473, |
|
"rewards/rejected": -33828756.64516129, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.0811992504684573, |
|
"grad_norm": 0.134897381067276, |
|
"kl": 0.0, |
|
"learning_rate": 2.5817977070544408e-06, |
|
"logits/chosen": -743969481.9816514, |
|
"logits/rejected": 971032904.3053435, |
|
"logps/chosen": -1543.8165137614678, |
|
"logps/rejected": -3347.053435114504, |
|
"loss": 0.0055, |
|
"rewards/chosen": 7.166585659762041, |
|
"rewards/margins": 12854784.326891003, |
|
"rewards/rejected": -12854777.160305344, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.0874453466583385, |
|
"grad_norm": 0.06570931524038315, |
|
"kl": 28.450000762939453, |
|
"learning_rate": 2.554537212586403e-06, |
|
"logits/chosen": -829488342.9135803, |
|
"logits/rejected": 857283882.1265823, |
|
"logps/chosen": -1473.0864197530864, |
|
"logps/rejected": -3268.590717299578, |
|
"loss": 0.0132, |
|
"rewards/chosen": 6.962601775495113, |
|
"rewards/margins": 34971415.16513342, |
|
"rewards/rejected": -34971408.20253164, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.0936914428482198, |
|
"grad_norm": 0.05726313218474388, |
|
"kl": 7.766666889190674, |
|
"learning_rate": 2.527270228735456e-06, |
|
"logits/chosen": -825991529.8955823, |
|
"logits/rejected": 898852057.2121212, |
|
"logps/chosen": -1504.7710843373493, |
|
"logps/rejected": -3464.034632034632, |
|
"loss": 0.006, |
|
"rewards/chosen": 6.7191294474774095, |
|
"rewards/margins": 26011091.740774468, |
|
"rewards/rejected": -26011085.02164502, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.0999375390381012, |
|
"grad_norm": 0.2399851530790329, |
|
"kl": 1.2166666984558105, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -906456969.0290457, |
|
"logits/rejected": 896194654.2594142, |
|
"logps/chosen": -1351.5684647302905, |
|
"logps/rejected": -3565.255230125523, |
|
"loss": 0.0053, |
|
"rewards/chosen": 6.544242637286048, |
|
"rewards/margins": 50190004.067255184, |
|
"rewards/rejected": -50189997.52301255, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.1061836352279826, |
|
"grad_norm": 0.0749124139547348, |
|
"kl": 7.150000095367432, |
|
"learning_rate": 2.4727297712645446e-06, |
|
"logits/chosen": -879664646.3209877, |
|
"logits/rejected": 918287113.721519, |
|
"logps/chosen": -1344.2633744855966, |
|
"logps/rejected": -3626.9367088607596, |
|
"loss": 0.0092, |
|
"rewards/chosen": 6.501157407407407, |
|
"rewards/margins": 46364501.35347809, |
|
"rewards/rejected": -46364494.85232068, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.1124297314178637, |
|
"grad_norm": 0.0572606585919857, |
|
"kl": 10.399999618530273, |
|
"learning_rate": 2.4454627874135976e-06, |
|
"logits/chosen": -1075104972.8, |
|
"logits/rejected": 832289723.7333333, |
|
"logps/chosen": -1370.6666666666667, |
|
"logps/rejected": -3668.0, |
|
"loss": 0.0071, |
|
"rewards/chosen": 7.1034596761067705, |
|
"rewards/margins": 29097848.17012634, |
|
"rewards/rejected": -29097841.066666666, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.118675827607745, |
|
"grad_norm": 0.08893398195505142, |
|
"kl": 0.2666666805744171, |
|
"learning_rate": 2.41820229294556e-06, |
|
"logits/chosen": -897616008.5333333, |
|
"logits/rejected": 795406062.9333333, |
|
"logps/chosen": -1664.1333333333334, |
|
"logps/rejected": -3720.5333333333333, |
|
"loss": 0.0087, |
|
"rewards/chosen": 7.050230916341146, |
|
"rewards/margins": 46222744.11689758, |
|
"rewards/rejected": -46222737.06666667, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.1249219237976265, |
|
"grad_norm": 0.04612141475081444, |
|
"kl": 3.1500000953674316, |
|
"learning_rate": 2.3909515315866606e-06, |
|
"logits/chosen": -1041832217.0980393, |
|
"logits/rejected": 625883363.5555556, |
|
"logps/chosen": -1448.9098039215687, |
|
"logps/rejected": -3645.7244444444445, |
|
"loss": 0.0051, |
|
"rewards/chosen": 6.208157169117647, |
|
"rewards/margins": 32499763.71926828, |
|
"rewards/rejected": -32499757.51111111, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1311680199875078, |
|
"grad_norm": 0.09885445982217789, |
|
"kl": 13.033333778381348, |
|
"learning_rate": 2.363713745904984e-06, |
|
"logits/chosen": -1081814542.995816, |
|
"logits/rejected": 461145015.76763487, |
|
"logps/chosen": -1408.8702928870293, |
|
"logps/rejected": -3603.3858921161827, |
|
"loss": 0.0046, |
|
"rewards/chosen": 6.505180071587343, |
|
"rewards/margins": 36068290.52177758, |
|
"rewards/rejected": -36068284.01659751, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.1374141161773892, |
|
"grad_norm": 0.06535188853740692, |
|
"kl": 0.0, |
|
"learning_rate": 2.3364921769246423e-06, |
|
"logits/chosen": -1133595675.6756756, |
|
"logits/rejected": 612681331.1007752, |
|
"logps/chosen": -1402.8108108108108, |
|
"logps/rejected": -3662.8837209302324, |
|
"loss": 0.0049, |
|
"rewards/chosen": 6.57831250439893, |
|
"rewards/margins": 14068877.52404894, |
|
"rewards/rejected": -14068870.945736434, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.1436602123672706, |
|
"grad_norm": 0.031041713431477547, |
|
"kl": 5.800000190734863, |
|
"learning_rate": 2.3092900637401193e-06, |
|
"logits/chosen": -1192516089.5732217, |
|
"logits/rejected": 424164220.28215766, |
|
"logps/chosen": -1403.44769874477, |
|
"logps/rejected": -3883.286307053942, |
|
"loss": 0.007, |
|
"rewards/chosen": 6.954869226431747, |
|
"rewards/margins": 42810608.64781529, |
|
"rewards/rejected": -42810601.69294606, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.1499063085571517, |
|
"grad_norm": 0.05815276503562927, |
|
"kl": 12.633333206176758, |
|
"learning_rate": 2.2821106431308546e-06, |
|
"logits/chosen": -1078428576.6477733, |
|
"logits/rejected": 431689288.51502144, |
|
"logps/chosen": -1423.5465587044534, |
|
"logps/rejected": -4039.6909871244634, |
|
"loss": 0.0103, |
|
"rewards/chosen": 6.802721525493421, |
|
"rewards/margins": 29919731.02589749, |
|
"rewards/rejected": -29919724.223175965, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.156152404747033, |
|
"grad_norm": 0.14204278588294983, |
|
"kl": 0.0, |
|
"learning_rate": 2.2549571491760985e-06, |
|
"logits/chosen": -1101822333.8305085, |
|
"logits/rejected": 492673863.3442623, |
|
"logps/chosen": -1459.050847457627, |
|
"logps/rejected": -4242.360655737705, |
|
"loss": 0.0094, |
|
"rewards/chosen": 6.55073831849179, |
|
"rewards/margins": 50274793.17368914, |
|
"rewards/rejected": -50274786.62295082, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.1623985009369144, |
|
"grad_norm": 0.034907300025224686, |
|
"kl": 0.0, |
|
"learning_rate": 2.2278328128700893e-06, |
|
"logits/chosen": -1120958714.5991561, |
|
"logits/rejected": 516123778.63374484, |
|
"logps/chosen": -1365.0632911392406, |
|
"logps/rejected": -4247.7037037037035, |
|
"loss": 0.0061, |
|
"rewards/chosen": 6.981713661161656, |
|
"rewards/margins": 10248818.438503785, |
|
"rewards/rejected": -10248811.456790123, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.1686445971267958, |
|
"grad_norm": 0.05778981000185013, |
|
"kl": 0.0, |
|
"learning_rate": 2.2007408617375944e-06, |
|
"logits/chosen": -1153482944.7529411, |
|
"logits/rejected": 333381923.27111113, |
|
"logps/chosen": -1441.3803921568629, |
|
"logps/rejected": -4042.5244444444443, |
|
"loss": 0.0044, |
|
"rewards/chosen": 6.869606885723039, |
|
"rewards/margins": 59051943.29627355, |
|
"rewards/rejected": -59051936.42666667, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.1748906933166772, |
|
"grad_norm": 0.05797224119305611, |
|
"kl": 0.0, |
|
"learning_rate": 2.173684519449872e-06, |
|
"logits/chosen": -1122529125.757322, |
|
"logits/rejected": 264763264.53112033, |
|
"logps/chosen": -1340.7866108786611, |
|
"logps/rejected": -3995.0871369294605, |
|
"loss": 0.0052, |
|
"rewards/chosen": 6.821588364605126, |
|
"rewards/margins": 60523131.6680614, |
|
"rewards/rejected": -60523124.84647303, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.1811367895065583, |
|
"grad_norm": 0.3804548680782318, |
|
"kl": 6.166666507720947, |
|
"learning_rate": 2.146667005441082e-06, |
|
"logits/chosen": -1062347298.1333333, |
|
"logits/rejected": 383971054.93333334, |
|
"logps/chosen": -1508.7333333333333, |
|
"logps/rejected": -4347.2, |
|
"loss": 0.0089, |
|
"rewards/chosen": 6.360628763834636, |
|
"rewards/margins": 30816569.560628764, |
|
"rewards/rejected": -30816563.2, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.1873828856964397, |
|
"grad_norm": 0.04914607107639313, |
|
"kl": 0.0, |
|
"learning_rate": 2.1196915345252085e-06, |
|
"logits/chosen": -1130330548.4590163, |
|
"logits/rejected": 303704931.7966102, |
|
"logps/chosen": -1441.9016393442623, |
|
"logps/rejected": -4298.3050847457625, |
|
"loss": 0.0046, |
|
"rewards/chosen": 6.969489425909324, |
|
"rewards/margins": 28575672.86779451, |
|
"rewards/rejected": -28575665.898305085, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.193628981886321, |
|
"grad_norm": 0.033815231174230576, |
|
"kl": 0.0, |
|
"learning_rate": 2.0927613165135285e-06, |
|
"logits/chosen": -1095581768.1762114, |
|
"logits/rejected": 306727130.5612648, |
|
"logps/chosen": -1453.0396475770924, |
|
"logps/rejected": -4572.584980237154, |
|
"loss": 0.0066, |
|
"rewards/chosen": 6.561520210971916, |
|
"rewards/margins": 32260731.93701428, |
|
"rewards/rejected": -32260725.37549407, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.1998750780762024, |
|
"grad_norm": 0.28993692994117737, |
|
"kl": 3.1666667461395264, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1137361027.072, |
|
"logits/rejected": 360618963.4782609, |
|
"logps/chosen": -1458.944, |
|
"logps/rejected": -4253.495652173913, |
|
"loss": 0.0061, |
|
"rewards/chosen": 6.25701904296875, |
|
"rewards/margins": 26442014.743975565, |
|
"rewards/rejected": -26442008.486956522, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.2061211742660838, |
|
"grad_norm": 0.048584070056676865, |
|
"kl": 2.433333396911621, |
|
"learning_rate": 2.039049451143342e-06, |
|
"logits/chosen": -1235638418.9029536, |
|
"logits/rejected": 253565526.38683128, |
|
"logps/chosen": -1440.1350210970463, |
|
"logps/rejected": -3978.008230452675, |
|
"loss": 0.004, |
|
"rewards/chosen": 6.875963170820148, |
|
"rewards/margins": 24204303.303946707, |
|
"rewards/rejected": -24204296.427983537, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.212367270455965, |
|
"grad_norm": 0.1172674149274826, |
|
"kl": 2.883333444595337, |
|
"learning_rate": 2.01227419495968e-06, |
|
"logits/chosen": -1092509557.1525424, |
|
"logits/rejected": 51586501.24590164, |
|
"logps/chosen": -1448.8813559322034, |
|
"logps/rejected": -3612.8524590163934, |
|
"loss": 0.0083, |
|
"rewards/chosen": 6.858350850768009, |
|
"rewards/margins": 14972855.120645933, |
|
"rewards/rejected": -14972848.262295082, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.2186133666458463, |
|
"grad_norm": 0.13202552497386932, |
|
"kl": 22.200000762939453, |
|
"learning_rate": 1.985556973269413e-06, |
|
"logits/chosen": -1104590253.419355, |
|
"logits/rejected": 359010727.7241379, |
|
"logps/chosen": -1484.774193548387, |
|
"logps/rejected": -3469.793103448276, |
|
"loss": 0.0057, |
|
"rewards/chosen": 7.040771484375, |
|
"rewards/margins": 48334387.17870252, |
|
"rewards/rejected": -48334380.137931034, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.2248594628357277, |
|
"grad_norm": 0.08171664923429489, |
|
"kl": 11.266666412353516, |
|
"learning_rate": 1.958900965154743e-06, |
|
"logits/chosen": -1088931073.053498, |
|
"logits/rejected": 97844971.47679324, |
|
"logps/chosen": -1347.6213991769548, |
|
"logps/rejected": -3505.1476793248944, |
|
"loss": 0.0062, |
|
"rewards/chosen": 6.958841708461934, |
|
"rewards/margins": 21401479.498926096, |
|
"rewards/rejected": -21401472.540084388, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.231105559025609, |
|
"grad_norm": 0.13173836469650269, |
|
"kl": 46.71666717529297, |
|
"learning_rate": 1.9323093424140673e-06, |
|
"logits/chosen": -1094815644.097561, |
|
"logits/rejected": 227897238.97435898, |
|
"logps/chosen": -1464.0650406504064, |
|
"logps/rejected": -3056.136752136752, |
|
"loss": 0.011, |
|
"rewards/chosen": 7.490336596481199, |
|
"rewards/margins": 25790834.70401181, |
|
"rewards/rejected": -25790827.213675212, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.2373516552154904, |
|
"grad_norm": 0.09634287655353546, |
|
"kl": 29.266666412353516, |
|
"learning_rate": 1.9057852691845677e-06, |
|
"logits/chosen": -1083139849.560166, |
|
"logits/rejected": 329840768.53556484, |
|
"logps/chosen": -1380.5809128630706, |
|
"logps/rejected": -3287.297071129707, |
|
"loss": 0.0128, |
|
"rewards/chosen": 6.732099730938796, |
|
"rewards/margins": 21379312.23837588, |
|
"rewards/rejected": -21379305.50627615, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.2435977514053715, |
|
"grad_norm": 0.034083809703588486, |
|
"kl": 2.058333396911621, |
|
"learning_rate": 1.8793319015657091e-06, |
|
"logits/chosen": -1052806152.7521367, |
|
"logits/rejected": 341929551.08943087, |
|
"logps/chosen": -1385.7094017094016, |
|
"logps/rejected": -3508.5528455284552, |
|
"loss": 0.0068, |
|
"rewards/chosen": 6.91802978515625, |
|
"rewards/margins": 45265156.836728975, |
|
"rewards/rejected": -45265149.91869919, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.249843847595253, |
|
"grad_norm": 0.05067850649356842, |
|
"kl": 0.0, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits/chosen": -1049794857.5726496, |
|
"logits/rejected": 357337437.6585366, |
|
"logps/chosen": -1484.034188034188, |
|
"logps/rejected": -3700.8130081300815, |
|
"loss": 0.0035, |
|
"rewards/chosen": 6.565341525607639, |
|
"rewards/margins": 30968287.02062608, |
|
"rewards/rejected": -30968280.455284555, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.249843847595253, |
|
"eval_kl": 6.689723491668701, |
|
"eval_logits/chosen": -1030192454.8085107, |
|
"eval_logits/rejected": 341122426.56969696, |
|
"eval_logps/chosen": -1450.943907156673, |
|
"eval_logps/rejected": -3746.327272727273, |
|
"eval_loss": 0.007318771444261074, |
|
"eval_rewards/chosen": 6.702488439917795, |
|
"eval_rewards/margins": 34027874.24188238, |
|
"eval_rewards/rejected": -34027867.53939394, |
|
"eval_runtime": 640.6156, |
|
"eval_samples_per_second": 6.314, |
|
"eval_steps_per_second": 0.395, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2560899437851343, |
|
"grad_norm": 0.5064871907234192, |
|
"kl": 0.6333333253860474, |
|
"learning_rate": 1.8266498651169352e-06, |
|
"logits/chosen": -1037007838.967742, |
|
"logits/rejected": 415172819.86206895, |
|
"logps/chosen": -1417.4193548387098, |
|
"logps/rejected": -4225.103448275862, |
|
"loss": 0.0039, |
|
"rewards/chosen": 7.380758962323589, |
|
"rewards/margins": 24907485.44972448, |
|
"rewards/rejected": -24907478.068965517, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.2623360399750156, |
|
"grad_norm": 0.06193430721759796, |
|
"kl": 0.0, |
|
"learning_rate": 1.8004274649225201e-06, |
|
"logits/chosen": -985542573.0850202, |
|
"logits/rejected": 323438442.5751073, |
|
"logps/chosen": -1514.5587044534414, |
|
"logps/rejected": -3861.1502145922746, |
|
"loss": 0.0061, |
|
"rewards/chosen": 6.887757088973937, |
|
"rewards/margins": 33208175.26543949, |
|
"rewards/rejected": -33208168.377682403, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.268582136164897, |
|
"grad_norm": 0.03230896219611168, |
|
"kl": 7.991666793823242, |
|
"learning_rate": 1.7742883068638447e-06, |
|
"logits/chosen": -1005395296.5245901, |
|
"logits/rejected": 510674284.4745763, |
|
"logps/chosen": -1370.4262295081967, |
|
"logps/rejected": -3835.6610169491523, |
|
"loss": 0.0075, |
|
"rewards/chosen": 6.63354242043417, |
|
"rewards/margins": 29501325.142017, |
|
"rewards/rejected": -29501318.508474577, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.2748282323547784, |
|
"grad_norm": 0.03562912717461586, |
|
"kl": 19.53333282470703, |
|
"learning_rate": 1.7482355012393177e-06, |
|
"logits/chosen": -955716859.8032787, |
|
"logits/rejected": 323245767.59322035, |
|
"logps/chosen": -1482.0983606557377, |
|
"logps/rejected": -3633.898305084746, |
|
"loss": 0.0073, |
|
"rewards/chosen": 6.989379382524334, |
|
"rewards/margins": 17213807.124972604, |
|
"rewards/rejected": -17213800.13559322, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.2810743285446595, |
|
"grad_norm": 0.05819810554385185, |
|
"kl": 5.400000095367432, |
|
"learning_rate": 1.722272148072273e-06, |
|
"logits/chosen": -1113979296.3983402, |
|
"logits/rejected": 439603422.7949791, |
|
"logps/chosen": -1445.5767634854772, |
|
"logps/rejected": -3578.1087866108787, |
|
"loss": 0.0048, |
|
"rewards/chosen": 6.725795635049274, |
|
"rewards/margins": 81843742.2906492, |
|
"rewards/rejected": -81843735.56485356, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.287320424734541, |
|
"grad_norm": 0.03748779371380806, |
|
"kl": 3.299999952316284, |
|
"learning_rate": 1.6964013367420967e-06, |
|
"logits/chosen": -988995373.948718, |
|
"logits/rejected": 310352920.9756098, |
|
"logps/chosen": -1538.4615384615386, |
|
"logps/rejected": -3188.2926829268295, |
|
"loss": 0.0066, |
|
"rewards/chosen": 6.511683276575854, |
|
"rewards/margins": 55828161.82875645, |
|
"rewards/rejected": -55828155.317073174, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.2935665209244223, |
|
"grad_norm": 0.032022129744291306, |
|
"kl": 3.616666555404663, |
|
"learning_rate": 1.6706261456166205e-06, |
|
"logits/chosen": -1067770272.5423728, |
|
"logits/rejected": 299978684.852459, |
|
"logps/chosen": -1298.3050847457628, |
|
"logps/rejected": -3424.5245901639346, |
|
"loss": 0.0049, |
|
"rewards/chosen": 6.950107380495233, |
|
"rewards/margins": 31364489.048468035, |
|
"rewards/rejected": -31364482.098360654, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.2998126171143036, |
|
"grad_norm": 1.6918329000473022, |
|
"kl": 17.450000762939453, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"logits/chosen": -1045500177.0666667, |
|
"logits/rejected": 558655078.4, |
|
"logps/chosen": -1459.0166666666667, |
|
"logps/rejected": -3562.133333333333, |
|
"loss": 0.0086, |
|
"rewards/chosen": 6.9263356526692705, |
|
"rewards/margins": 35992681.05966899, |
|
"rewards/rejected": -35992674.13333333, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.3060587133041848, |
|
"grad_norm": 0.08456436544656754, |
|
"kl": 12.133333206176758, |
|
"learning_rate": 1.6193748801969164e-06, |
|
"logits/chosen": -949660330.6666666, |
|
"logits/rejected": 287388467.2, |
|
"logps/chosen": -1376.4, |
|
"logps/rejected": -3333.0666666666666, |
|
"loss": 0.0053, |
|
"rewards/chosen": 6.561256408691406, |
|
"rewards/margins": 14701292.961256409, |
|
"rewards/rejected": -14701286.4, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.3123048094940661, |
|
"grad_norm": 0.0434587299823761, |
|
"kl": 20.799999237060547, |
|
"learning_rate": 1.5939049042907463e-06, |
|
"logits/chosen": -961389484.0655738, |
|
"logits/rejected": 351912769.08474576, |
|
"logps/chosen": -1404.8524590163934, |
|
"logps/rejected": -3683.7966101694915, |
|
"loss": 0.008, |
|
"rewards/chosen": 6.551050405033299, |
|
"rewards/margins": 32163902.957830068, |
|
"rewards/rejected": -32163896.40677966, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.3185509056839475, |
|
"grad_norm": 0.05942801013588905, |
|
"kl": 13.866666793823242, |
|
"learning_rate": 1.5685427446397427e-06, |
|
"logits/chosen": -1025611711.1312218, |
|
"logits/rejected": 455446354.03861004, |
|
"logps/chosen": -1561.9185520361991, |
|
"logps/rejected": -3815.289575289575, |
|
"loss": 0.0066, |
|
"rewards/chosen": 6.581945082720588, |
|
"rewards/margins": 32960453.34642385, |
|
"rewards/rejected": -32960446.764478765, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.3247970018738289, |
|
"grad_norm": 0.0555511899292469, |
|
"kl": 7.133333206176758, |
|
"learning_rate": 1.5432914190872757e-06, |
|
"logits/chosen": -1093971142.0576131, |
|
"logits/rejected": 627083842.9704641, |
|
"logps/chosen": -1495.9670781893003, |
|
"logps/rejected": -3863.7637130801686, |
|
"loss": 0.0055, |
|
"rewards/chosen": 6.9783146862139915, |
|
"rewards/margins": 25399112.868610047, |
|
"rewards/rejected": -25399105.89029536, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.3310430980637102, |
|
"grad_norm": 0.05366786941885948, |
|
"kl": 8.399999618530273, |
|
"learning_rate": 1.5181539322885652e-06, |
|
"logits/chosen": -972289011.9529412, |
|
"logits/rejected": 415688148.7644445, |
|
"logps/chosen": -1533.7411764705882, |
|
"logps/rejected": -3943.5377777777776, |
|
"loss": 0.0075, |
|
"rewards/chosen": 6.679204963235295, |
|
"rewards/margins": 57861982.28809385, |
|
"rewards/rejected": -57861975.60888889, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.3372891942535916, |
|
"grad_norm": 0.03548969328403473, |
|
"kl": 0.0, |
|
"learning_rate": 1.4931332753531575e-06, |
|
"logits/chosen": -974249031.4418604, |
|
"logits/rejected": 311672399.2150943, |
|
"logps/chosen": -1517.3953488372092, |
|
"logps/rejected": -3859.8037735849057, |
|
"loss": 0.0036, |
|
"rewards/chosen": 7.6878400935683135, |
|
"rewards/margins": 18645339.733123112, |
|
"rewards/rejected": -18645332.04528302, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.3435352904434728, |
|
"grad_norm": 0.022944239899516106, |
|
"kl": 5.241666793823242, |
|
"learning_rate": 1.4682324254890135e-06, |
|
"logits/chosen": -901207572.1222707, |
|
"logits/rejected": 596886222.0239043, |
|
"logps/chosen": -1441.6768558951965, |
|
"logps/rejected": -3946.5816733067727, |
|
"loss": 0.004, |
|
"rewards/chosen": 6.79593521851119, |
|
"rewards/margins": 43888491.768046774, |
|
"rewards/rejected": -43888484.97211155, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.3497813866333541, |
|
"grad_norm": 0.03675493597984314, |
|
"kl": 0.0, |
|
"learning_rate": 1.443454345648252e-06, |
|
"logits/chosen": -1049132766.0176991, |
|
"logits/rejected": 458731358.7401575, |
|
"logps/chosen": -1401.9823008849557, |
|
"logps/rejected": -3990.4251968503936, |
|
"loss": 0.0041, |
|
"rewards/chosen": 6.76517830907771, |
|
"rewards/margins": 32297702.513209805, |
|
"rewards/rejected": -32297695.748031497, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.3560274828232355, |
|
"grad_norm": 0.03743245080113411, |
|
"kl": 20.516666412353516, |
|
"learning_rate": 1.4188019841745842e-06, |
|
"logits/chosen": -938349038.0700389, |
|
"logits/rejected": 473034733.63228697, |
|
"logps/chosen": -1411.1128404669262, |
|
"logps/rejected": -3885.345291479821, |
|
"loss": 0.0057, |
|
"rewards/chosen": 6.771459675948444, |
|
"rewards/margins": 14190376.062939497, |
|
"rewards/rejected": -14190369.29147982, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.3622735790131169, |
|
"grad_norm": 0.1011684238910675, |
|
"kl": 21.53333282470703, |
|
"learning_rate": 1.3942782744524974e-06, |
|
"logits/chosen": -1010349431.8987342, |
|
"logits/rejected": 668188874.2716049, |
|
"logps/chosen": -1350.210970464135, |
|
"logps/rejected": -4185.020576131687, |
|
"loss": 0.0073, |
|
"rewards/chosen": 6.38992760251846, |
|
"rewards/margins": 14171458.43931032, |
|
"rewards/rejected": -14171452.049382716, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.368519675202998, |
|
"grad_norm": 0.050006281584501266, |
|
"kl": 0.0, |
|
"learning_rate": 1.369886134558201e-06, |
|
"logits/chosen": -1023652155.0769231, |
|
"logits/rejected": 468892732.2352941, |
|
"logps/chosen": -1394.6538461538462, |
|
"logps/rejected": -3811.0588235294117, |
|
"loss": 0.0038, |
|
"rewards/chosen": 7.336665813739483, |
|
"rewards/margins": 22742599.336665813, |
|
"rewards/rejected": -22742592.0, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.3747657713928794, |
|
"grad_norm": 0.04441044107079506, |
|
"kl": 7.0, |
|
"learning_rate": 1.3456284669124159e-06, |
|
"logits/chosen": -1112350480.0669456, |
|
"logits/rejected": 470060087.2365145, |
|
"logps/chosen": -1366.7615062761506, |
|
"logps/rejected": -3846.3734439834025, |
|
"loss": 0.0058, |
|
"rewards/chosen": 6.793774209760722, |
|
"rewards/margins": 25733330.743981678, |
|
"rewards/rejected": -25733323.950207468, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3810118675827607, |
|
"grad_norm": 0.04001372680068016, |
|
"kl": 0.550000011920929, |
|
"learning_rate": 1.3215081579350058e-06, |
|
"logits/chosen": -1090801010.0168068, |
|
"logits/rejected": 500092758.74380165, |
|
"logps/chosen": -1313.4789915966387, |
|
"logps/rejected": -4148.892561983471, |
|
"loss": 0.0048, |
|
"rewards/chosen": 6.737123633633141, |
|
"rewards/margins": 30996945.84456165, |
|
"rewards/rejected": -30996939.107438017, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.387257963772642, |
|
"grad_norm": 0.05370747670531273, |
|
"kl": 6.116666793823242, |
|
"learning_rate": 1.2975280777015315e-06, |
|
"logits/chosen": -955014613.1633466, |
|
"logits/rejected": 278692269.2751092, |
|
"logps/chosen": -1469.4501992031871, |
|
"logps/rejected": -3723.598253275109, |
|
"loss": 0.0082, |
|
"rewards/chosen": 6.8361475971115535, |
|
"rewards/margins": 18840817.595972925, |
|
"rewards/rejected": -18840810.759825327, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.3935040599625235, |
|
"grad_norm": 0.10151516646146774, |
|
"kl": 5.0333333015441895, |
|
"learning_rate": 1.2736910796017302e-06, |
|
"logits/chosen": -874066532.7874016, |
|
"logits/rejected": 606169722.3362832, |
|
"logps/chosen": -1502.4881889763778, |
|
"logps/rejected": -3978.761061946903, |
|
"loss": 0.0058, |
|
"rewards/chosen": 6.727448711245079, |
|
"rewards/margins": 24209047.718599156, |
|
"rewards/rejected": -24209040.991150443, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.3997501561524048, |
|
"grad_norm": 0.03879309073090553, |
|
"kl": 4.183333396911621, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"logits/chosen": -974429236.0677966, |
|
"logits/rejected": 597739889.3114754, |
|
"logps/chosen": -1249.3559322033898, |
|
"logps/rejected": -3902.688524590164, |
|
"loss": 0.0045, |
|
"rewards/chosen": 6.744752011056674, |
|
"rewards/margins": 29603798.48245693, |
|
"rewards/rejected": -29603791.737704918, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.4059962523422862, |
|
"grad_norm": 0.04185258969664574, |
|
"kl": 13.066666603088379, |
|
"learning_rate": 1.2264576578978956e-06, |
|
"logits/chosen": -792940402.7586207, |
|
"logits/rejected": 305645105.5483871, |
|
"logps/chosen": -1571.0344827586207, |
|
"logps/rejected": -3693.4193548387098, |
|
"loss": 0.0082, |
|
"rewards/chosen": 6.826761706122037, |
|
"rewards/margins": 13727978.955793964, |
|
"rewards/rejected": -13727972.129032258, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.4122423485321673, |
|
"grad_norm": 0.03908821567893028, |
|
"kl": 6.266666889190674, |
|
"learning_rate": 1.203066854598696e-06, |
|
"logits/chosen": -893589702.1935484, |
|
"logits/rejected": 462767774.8965517, |
|
"logps/chosen": -1564.516129032258, |
|
"logps/rejected": -3881.103448275862, |
|
"loss": 0.0087, |
|
"rewards/chosen": 6.733085386214718, |
|
"rewards/margins": 33438053.62963711, |
|
"rewards/rejected": -33438046.896551725, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.4184884447220487, |
|
"grad_norm": 0.05348537489771843, |
|
"kl": 0.0, |
|
"learning_rate": 1.1798303733740801e-06, |
|
"logits/chosen": -912541306.2995951, |
|
"logits/rejected": 547716698.0944206, |
|
"logps/chosen": -1501.7327935222672, |
|
"logps/rejected": -4242.403433476395, |
|
"loss": 0.0047, |
|
"rewards/chosen": 6.570156329073886, |
|
"rewards/margins": 41152707.5486971, |
|
"rewards/rejected": -41152700.97854077, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.42473454091193, |
|
"grad_norm": 0.03471559286117554, |
|
"kl": 0.0, |
|
"learning_rate": 1.1567509791329402e-06, |
|
"logits/chosen": -936182292.8130082, |
|
"logits/rejected": 496272200.2051282, |
|
"logps/chosen": -1443.3821138211383, |
|
"logps/rejected": -3950.4957264957266, |
|
"loss": 0.0067, |
|
"rewards/chosen": 6.8144675154026935, |
|
"rewards/margins": 13923291.053783756, |
|
"rewards/rejected": -13923284.23931624, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.4309806371018114, |
|
"grad_norm": 0.03510862588882446, |
|
"kl": 3.0999999046325684, |
|
"learning_rate": 1.1338314180923917e-06, |
|
"logits/chosen": -1019970495.7322176, |
|
"logits/rejected": 594355501.6763486, |
|
"logps/chosen": -1436.1171548117154, |
|
"logps/rejected": -4024.033195020747, |
|
"loss": 0.0038, |
|
"rewards/chosen": 6.848525043311977, |
|
"rewards/margins": 26934969.304956578, |
|
"rewards/rejected": -26934962.456431534, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.4372267332916926, |
|
"grad_norm": 0.045122962445020676, |
|
"kl": 13.449999809265137, |
|
"learning_rate": 1.1110744174509952e-06, |
|
"logits/chosen": -953595309.4193548, |
|
"logits/rejected": 422915107.3103448, |
|
"logps/chosen": -1544.6451612903227, |
|
"logps/rejected": -4019.310344827586, |
|
"loss": 0.0101, |
|
"rewards/chosen": 6.858165125693044, |
|
"rewards/margins": 35274222.582303055, |
|
"rewards/rejected": -35274215.72413793, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.443472829481574, |
|
"grad_norm": 0.04991288483142853, |
|
"kl": 13.316666603088379, |
|
"learning_rate": 1.0884826850642492e-06, |
|
"logits/chosen": -982462394.5762712, |
|
"logits/rejected": 519917500.852459, |
|
"logps/chosen": -1360.2372881355932, |
|
"logps/rejected": -4078.688524590164, |
|
"loss": 0.0068, |
|
"rewards/chosen": 6.708972478317002, |
|
"rewards/margins": 27566999.495857727, |
|
"rewards/rejected": -27566992.786885247, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.4497189256714553, |
|
"grad_norm": 0.0597245953977108, |
|
"kl": 0.0, |
|
"learning_rate": 1.0660589091223854e-06, |
|
"logits/chosen": -956790647.4666667, |
|
"logits/rejected": 440581051.73333335, |
|
"logps/chosen": -1363.0666666666666, |
|
"logps/rejected": -4116.8, |
|
"loss": 0.009, |
|
"rewards/chosen": 6.348196411132813, |
|
"rewards/margins": 36036477.81486308, |
|
"rewards/rejected": -36036471.46666667, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.4559650218613367, |
|
"grad_norm": 0.03371795266866684, |
|
"kl": 6.150000095367432, |
|
"learning_rate": 1.043805757830495e-06, |
|
"logits/chosen": -1058097247.0548524, |
|
"logits/rejected": 537000365.8271605, |
|
"logps/chosen": -1570.8354430379748, |
|
"logps/rejected": -4048.329218106996, |
|
"loss": 0.0037, |
|
"rewards/chosen": 7.199290343980749, |
|
"rewards/margins": 49041229.783652484, |
|
"rewards/rejected": -49041222.58436214, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.462211118051218, |
|
"grad_norm": 0.05939038470387459, |
|
"kl": 6.483333110809326, |
|
"learning_rate": 1.0217258790910447e-06, |
|
"logits/chosen": -901359395.9669422, |
|
"logits/rejected": 502541062.4537815, |
|
"logps/chosen": -1473.8512396694214, |
|
"logps/rejected": -3943.529411764706, |
|
"loss": 0.0118, |
|
"rewards/chosen": 6.762523304332387, |
|
"rewards/margins": 40793770.25832162, |
|
"rewards/rejected": -40793763.49579832, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.4684572142410994, |
|
"grad_norm": 0.1239381730556488, |
|
"kl": 10.458333015441895, |
|
"learning_rate": 9.99821900188798e-07, |
|
"logits/chosen": -941830127.6812749, |
|
"logits/rejected": 502061851.9475983, |
|
"logps/chosen": -1454.406374501992, |
|
"logps/rejected": -4060.5065502183406, |
|
"loss": 0.0094, |
|
"rewards/chosen": 6.164070281374502, |
|
"rewards/margins": 3046339.0898344736, |
|
"rewards/rejected": -3046332.9257641924, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.4747033104309806, |
|
"grad_norm": 0.04187585785984993, |
|
"kl": 5.474999904632568, |
|
"learning_rate": 9.780964274781984e-07, |
|
"logits/chosen": -1160314880.0, |
|
"logits/rejected": 799277056.0, |
|
"logps/chosen": -1435.3125, |
|
"logps/rejected": -4670.285714285715, |
|
"loss": 0.0069, |
|
"rewards/chosen": 7.046051502227783, |
|
"rewards/margins": 23172132.760337215, |
|
"rewards/rejected": -23172125.714285713, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.480949406620862, |
|
"grad_norm": 0.032712046056985855, |
|
"kl": 0.9333333373069763, |
|
"learning_rate": 9.56552046073238e-07, |
|
"logits/chosen": -1228912756.2620087, |
|
"logits/rejected": 683057445.7370518, |
|
"logps/chosen": -1331.8427947598254, |
|
"logps/rejected": -4543.235059760957, |
|
"loss": 0.0037, |
|
"rewards/chosen": 7.146233304619269, |
|
"rewards/margins": 35616020.40519745, |
|
"rewards/rejected": -35616013.25896414, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.4871955028107433, |
|
"grad_norm": 0.041652340441942215, |
|
"kl": 4.083333492279053, |
|
"learning_rate": 9.351913195398523e-07, |
|
"logits/chosen": -1104601120.8995984, |
|
"logits/rejected": 549363038.1991342, |
|
"logps/chosen": -1428.0481927710844, |
|
"logps/rejected": -4372.779220779221, |
|
"loss": 0.0059, |
|
"rewards/chosen": 7.262572261703062, |
|
"rewards/margins": 39940559.85131685, |
|
"rewards/rejected": -39940552.58874459, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.4934415990006247, |
|
"grad_norm": 0.0400969497859478, |
|
"kl": 0.0, |
|
"learning_rate": 9.140167895908867e-07, |
|
"logits/chosen": -1067380794.2369668, |
|
"logits/rejected": 536605844.4609665, |
|
"logps/chosen": -1447.9620853080569, |
|
"logps/rejected": -4313.457249070632, |
|
"loss": 0.0036, |
|
"rewards/chosen": 6.994773539321683, |
|
"rewards/margins": 27280765.953881346, |
|
"rewards/rejected": -27280758.959107805, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.4996876951905058, |
|
"grad_norm": 0.03562096133828163, |
|
"kl": 0.0, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits/chosen": -970590381.559322, |
|
"logits/rejected": 498589293.1147541, |
|
"logps/chosen": -1501.8305084745762, |
|
"logps/rejected": -4478.426229508197, |
|
"loss": 0.0041, |
|
"rewards/chosen": 7.16283196918035, |
|
"rewards/margins": 43964162.96611066, |
|
"rewards/rejected": -43964155.80327869, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5059337913803872, |
|
"grad_norm": 0.04305117204785347, |
|
"kl": 2.2333333492279053, |
|
"learning_rate": 8.722363752201277e-07, |
|
"logits/chosen": -984962389.3333334, |
|
"logits/rejected": 612717909.3333334, |
|
"logps/chosen": -1455.8699186991869, |
|
"logps/rejected": -4411.076923076923, |
|
"loss": 0.009, |
|
"rewards/chosen": 6.036764005335366, |
|
"rewards/margins": 34317619.985481955, |
|
"rewards/rejected": -34317613.94871795, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.5121798875702686, |
|
"grad_norm": 0.04826142638921738, |
|
"kl": 0.0, |
|
"learning_rate": 8.516354622498279e-07, |
|
"logits/chosen": -991059172.9957806, |
|
"logits/rejected": 368671554.3703704, |
|
"logps/chosen": -1475.0379746835442, |
|
"logps/rejected": -4335.14403292181, |
|
"loss": 0.0046, |
|
"rewards/chosen": 6.493236171545359, |
|
"rewards/margins": 28039880.336857572, |
|
"rewards/rejected": -28039873.8436214, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.51842598376015, |
|
"grad_norm": 0.04798099398612976, |
|
"kl": 2.4666666984558105, |
|
"learning_rate": 8.31230688175382e-07, |
|
"logits/chosen": -1036176902.1195219, |
|
"logits/rejected": 545987570.5851529, |
|
"logps/chosen": -1544.03187250996, |
|
"logps/rejected": -4271.371179039302, |
|
"loss": 0.0041, |
|
"rewards/chosen": 6.812338050143177, |
|
"rewards/margins": 59293774.358189575, |
|
"rewards/rejected": -59293767.54585153, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.5246720799500313, |
|
"grad_norm": 0.04589550942182541, |
|
"kl": 9.600000381469727, |
|
"learning_rate": 8.110244809608494e-07, |
|
"logits/chosen": -1033476505.6, |
|
"logits/rejected": 549855778.1333333, |
|
"logps/chosen": -1523.4666666666667, |
|
"logps/rejected": -4341.6, |
|
"loss": 0.0038, |
|
"rewards/chosen": 7.062318929036459, |
|
"rewards/margins": 54005668.928985596, |
|
"rewards/rejected": -54005661.86666667, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.5309181761399127, |
|
"grad_norm": 0.035851314663887024, |
|
"kl": 0.0, |
|
"learning_rate": 7.910192449428216e-07, |
|
"logits/chosen": -989924222.4326531, |
|
"logits/rejected": 594846009.7361702, |
|
"logps/chosen": -1455.6734693877552, |
|
"logps/rejected": -4535.2851063829785, |
|
"loss": 0.0092, |
|
"rewards/chosen": 6.187053571428572, |
|
"rewards/margins": 61192067.46364931, |
|
"rewards/rejected": -61192061.27659574, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.537164272329794, |
|
"grad_norm": 0.053536731749773026, |
|
"kl": 0.0, |
|
"learning_rate": 7.712173605443269e-07, |
|
"logits/chosen": -1039319604.9655173, |
|
"logits/rejected": 441644989.9354839, |
|
"logps/chosen": -1326.2068965517242, |
|
"logps/rejected": -4275.612903225807, |
|
"loss": 0.0057, |
|
"rewards/chosen": 6.277499494881465, |
|
"rewards/margins": 36871186.664596274, |
|
"rewards/rejected": -36871180.38709678, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.5434103685196752, |
|
"grad_norm": 0.05226130411028862, |
|
"kl": 4.4666666984558105, |
|
"learning_rate": 7.516211839915821e-07, |
|
"logits/chosen": -1063635067.373494, |
|
"logits/rejected": 564147505.8701298, |
|
"logps/chosen": -1477.012048192771, |
|
"logps/rejected": -4608.554112554112, |
|
"loss": 0.0074, |
|
"rewards/chosen": 6.572622521335341, |
|
"rewards/margins": 44541413.32586928, |
|
"rewards/rejected": -44541406.753246754, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.5496564647095565, |
|
"grad_norm": 0.05410970002412796, |
|
"kl": 13.733333587646484, |
|
"learning_rate": 7.322330470336314e-07, |
|
"logits/chosen": -1117790940.0510638, |
|
"logits/rejected": 435368755.2, |
|
"logps/chosen": -1283.9489361702128, |
|
"logps/rejected": -4281.0775510204085, |
|
"loss": 0.0102, |
|
"rewards/chosen": 7.458873213098404, |
|
"rewards/margins": 6269761.450709948, |
|
"rewards/rejected": -6269753.991836735, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.555902560899438, |
|
"grad_norm": 0.038916345685720444, |
|
"kl": 1.7333333492279053, |
|
"learning_rate": 7.130552566648847e-07, |
|
"logits/chosen": -1104022652.878049, |
|
"logits/rejected": 367427303.93162394, |
|
"logps/chosen": -1533.4634146341464, |
|
"logps/rejected": -4443.076923076923, |
|
"loss": 0.0062, |
|
"rewards/chosen": 6.831621371633638, |
|
"rewards/margins": 49302145.3615359, |
|
"rewards/rejected": -49302138.52991453, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.562148657089319, |
|
"grad_norm": 0.057752642780542374, |
|
"kl": 4.333333492279053, |
|
"learning_rate": 6.940900948506113e-07, |
|
"logits/chosen": -1084643750.3501945, |
|
"logits/rejected": 301049461.0941704, |
|
"logps/chosen": -1523.4241245136186, |
|
"logps/rejected": -4271.354260089686, |
|
"loss": 0.0056, |
|
"rewards/chosen": 6.883037641354572, |
|
"rewards/margins": 76870272.56913629, |
|
"rewards/rejected": -76870265.68609865, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.5683947532792004, |
|
"grad_norm": 0.04232333227992058, |
|
"kl": 0.0, |
|
"learning_rate": 6.753398182554116e-07, |
|
"logits/chosen": -1142273104.1391304, |
|
"logits/rejected": 393610264.576, |
|
"logps/chosen": -1470.3304347826088, |
|
"logps/rejected": -4278.528, |
|
"loss": 0.0041, |
|
"rewards/chosen": 6.700403893512228, |
|
"rewards/margins": 32329095.724403895, |
|
"rewards/rejected": -32329089.024, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.5746408494690818, |
|
"grad_norm": 0.05231759324669838, |
|
"kl": 5.699999809265137, |
|
"learning_rate": 6.568066579746901e-07, |
|
"logits/chosen": -1081600296.9219332, |
|
"logits/rejected": 481572194.27488154, |
|
"logps/chosen": -1511.1375464684015, |
|
"logps/rejected": -4275.109004739336, |
|
"loss": 0.0077, |
|
"rewards/chosen": 6.582321677509293, |
|
"rewards/margins": 64165729.577582344, |
|
"rewards/rejected": -64165722.99526066, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.5808869456589631, |
|
"grad_norm": 0.05123298987746239, |
|
"kl": 16.875, |
|
"learning_rate": 6.384928192691844e-07, |
|
"logits/chosen": -1078488431.389313, |
|
"logits/rejected": 398170281.10091746, |
|
"logps/chosen": -1525.9236641221373, |
|
"logps/rejected": -4215.339449541284, |
|
"loss": 0.0097, |
|
"rewards/chosen": 7.378648131858301, |
|
"rewards/margins": 95379745.21351051, |
|
"rewards/rejected": -95379737.83486238, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.5871330418488445, |
|
"grad_norm": 0.08783072233200073, |
|
"kl": 9.783333778381348, |
|
"learning_rate": 6.204004813025569e-07, |
|
"logits/chosen": -984671368.2403433, |
|
"logits/rejected": 288396607.22267205, |
|
"logps/chosen": -1377.7854077253219, |
|
"logps/rejected": -4215.708502024291, |
|
"loss": 0.0092, |
|
"rewards/chosen": 6.247759774007511, |
|
"rewards/margins": 38891629.89148447, |
|
"rewards/rejected": -38891623.643724695, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.5933791380387259, |
|
"grad_norm": 0.05191274732351303, |
|
"kl": 0.375, |
|
"learning_rate": 6.025317968820954e-07, |
|
"logits/chosen": -1148081036.3179917, |
|
"logits/rejected": 496389787.0871369, |
|
"logps/chosen": -1394.878661087866, |
|
"logps/rejected": -4440.165975103734, |
|
"loss": 0.0043, |
|
"rewards/chosen": 7.052397277065899, |
|
"rewards/margins": 31868582.38849686, |
|
"rewards/rejected": -31868575.336099584, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.5996252342286073, |
|
"grad_norm": 0.07014898210763931, |
|
"kl": 14.300000190734863, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1002822994.6135458, |
|
"logits/rejected": 517804228.7510917, |
|
"logps/chosen": -1539.187250996016, |
|
"logps/rejected": -4211.423580786026, |
|
"loss": 0.0095, |
|
"rewards/chosen": 6.328193087026892, |
|
"rewards/margins": 36380255.760507494, |
|
"rewards/rejected": -36380249.43231441, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.6058713304184884, |
|
"grad_norm": 0.039311449974775314, |
|
"kl": 4.733333110809326, |
|
"learning_rate": 5.674738665931575e-07, |
|
"logits/chosen": -1064598241.28, |
|
"logits/rejected": 489178940.1043478, |
|
"logps/chosen": -1587.584, |
|
"logps/rejected": -4630.539130434782, |
|
"loss": 0.0038, |
|
"rewards/chosen": 6.928841796875, |
|
"rewards/margins": 36989088.5288418, |
|
"rewards/rejected": -36989081.6, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.6121174266083698, |
|
"grad_norm": 0.06494542211294174, |
|
"kl": 17.399999618530273, |
|
"learning_rate": 5.50288792267796e-07, |
|
"logits/chosen": -1104234254.6007605, |
|
"logits/rejected": 231532345.80645162, |
|
"logps/chosen": -1379.406844106464, |
|
"logps/rejected": -3870.672811059908, |
|
"loss": 0.0078, |
|
"rewards/chosen": 6.449863448342443, |
|
"rewards/margins": 20531010.615762066, |
|
"rewards/rejected": -20531004.165898617, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.6183635227982511, |
|
"grad_norm": 0.04059774428606033, |
|
"kl": 0.0, |
|
"learning_rate": 5.333357140784576e-07, |
|
"logits/chosen": -1077796317.8666666, |
|
"logits/rejected": 325858099.2, |
|
"logps/chosen": -1426.5333333333333, |
|
"logps/rejected": -4271.466666666666, |
|
"loss": 0.0045, |
|
"rewards/chosen": 6.975811258951823, |
|
"rewards/margins": 33188625.642477926, |
|
"rewards/rejected": -33188618.666666668, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.6246096189881323, |
|
"grad_norm": 0.05548970028758049, |
|
"kl": 0.0, |
|
"learning_rate": 5.166166492719124e-07, |
|
"logits/chosen": -1096140451.5850623, |
|
"logits/rejected": 495085816.50209206, |
|
"logps/chosen": -1395.3858921161825, |
|
"logps/rejected": -4268.18410041841, |
|
"loss": 0.0069, |
|
"rewards/chosen": 6.420809797231587, |
|
"rewards/margins": 19974116.14465917, |
|
"rewards/rejected": -19974109.72384937, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6308557151780136, |
|
"grad_norm": 0.06512665003538132, |
|
"kl": 5.791666507720947, |
|
"learning_rate": 5.001335872496759e-07, |
|
"logits/chosen": -1032362445.4071146, |
|
"logits/rejected": 395955231.5770925, |
|
"logps/chosen": -1337.4229249011858, |
|
"logps/rejected": -4086.1321585903083, |
|
"loss": 0.0073, |
|
"rewards/chosen": 6.668516377686512, |
|
"rewards/margins": 77927011.1442873, |
|
"rewards/rejected": -77927004.47577092, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.637101811367895, |
|
"grad_norm": 0.046625133603811264, |
|
"kl": 0.0, |
|
"learning_rate": 4.838884893312934e-07, |
|
"logits/chosen": -972410863.2816327, |
|
"logits/rejected": 417935621.4468085, |
|
"logps/chosen": -1453.9755102040817, |
|
"logps/rejected": -4156.4595744680855, |
|
"loss": 0.0047, |
|
"rewards/chosen": 6.839940409757653, |
|
"rewards/margins": 22208490.516536154, |
|
"rewards/rejected": -22208483.676595744, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.6433479075577764, |
|
"grad_norm": 0.038477230817079544, |
|
"kl": 2.741666555404663, |
|
"learning_rate": 4.678832885209622e-07, |
|
"logits/chosen": -1002685379.7647059, |
|
"logits/rejected": 291196487.9338843, |
|
"logps/chosen": -1523.9663865546217, |
|
"logps/rejected": -4202.3140495867765, |
|
"loss": 0.0042, |
|
"rewards/chosen": 6.712801380317752, |
|
"rewards/margins": 19249440.101231135, |
|
"rewards/rejected": -19249433.388429753, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.6495940037476577, |
|
"grad_norm": 0.04708554968237877, |
|
"kl": 1.2333333492279053, |
|
"learning_rate": 4.5211988927752026e-07, |
|
"logits/chosen": -1077693790.4355555, |
|
"logits/rejected": 553812610.5098039, |
|
"logps/chosen": -1498.4533333333334, |
|
"logps/rejected": -4704.376470588235, |
|
"loss": 0.0043, |
|
"rewards/chosen": 6.698840060763889, |
|
"rewards/margins": 9807112.706683198, |
|
"rewards/rejected": -9807106.007843137, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.6558400999375391, |
|
"grad_norm": 0.04353487491607666, |
|
"kl": 9.600000381469727, |
|
"learning_rate": 4.366001672878406e-07, |
|
"logits/chosen": -1135099546.2707424, |
|
"logits/rejected": 389176266.96414346, |
|
"logps/chosen": -1425.467248908297, |
|
"logps/rejected": -4486.629482071713, |
|
"loss": 0.0059, |
|
"rewards/chosen": 6.637555651269105, |
|
"rewards/margins": 30159156.533969995, |
|
"rewards/rejected": -30159149.896414343, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.6620861961274205, |
|
"grad_norm": 0.03297581896185875, |
|
"kl": 1.3333333730697632, |
|
"learning_rate": 4.2132596924363666e-07, |
|
"logits/chosen": -1105496125.527897, |
|
"logits/rejected": 518404087.708502, |
|
"logps/chosen": -1485.3218884120172, |
|
"logps/rejected": -4637.538461538462, |
|
"loss": 0.0065, |
|
"rewards/chosen": 6.355323103876073, |
|
"rewards/margins": 54210093.73993849, |
|
"rewards/rejected": -54210087.384615384, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.6683322923173018, |
|
"grad_norm": 0.11054141819477081, |
|
"kl": 0.0, |
|
"learning_rate": 4.0629911262173053e-07, |
|
"logits/chosen": -1095594500.302521, |
|
"logits/rejected": 497796290.6446281, |
|
"logps/chosen": -1463.126050420168, |
|
"logps/rejected": -4335.074380165289, |
|
"loss": 0.0076, |
|
"rewards/chosen": 6.8984651966255255, |
|
"rewards/margins": 29223174.898465198, |
|
"rewards/rejected": -29223168.0, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.674578388507183, |
|
"grad_norm": 0.05203811824321747, |
|
"kl": 0.0, |
|
"learning_rate": 3.915213854677863e-07, |
|
"logits/chosen": -1098504994.816, |
|
"logits/rejected": 172379055.86086956, |
|
"logps/chosen": -1367.936, |
|
"logps/rejected": -4157.217391304348, |
|
"loss": 0.005, |
|
"rewards/chosen": 6.6947431640625, |
|
"rewards/margins": 6683829.99909099, |
|
"rewards/rejected": -6683823.304347826, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.6808244846970644, |
|
"grad_norm": 0.03993624076247215, |
|
"kl": 0.0, |
|
"learning_rate": 3.7699454618355306e-07, |
|
"logits/chosen": -946833671.4356847, |
|
"logits/rejected": 229857511.36401674, |
|
"logps/chosen": -1473.0622406639004, |
|
"logps/rejected": -4066.543933054393, |
|
"loss": 0.0066, |
|
"rewards/chosen": 6.581208161793309, |
|
"rewards/margins": 39973453.27576883, |
|
"rewards/rejected": -39973446.69456067, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.6870705808869455, |
|
"grad_norm": 0.04042872413992882, |
|
"kl": 5.699999809265137, |
|
"learning_rate": 3.627203233176341e-07, |
|
"logits/chosen": -964882025.5267175, |
|
"logits/rejected": 379531602.20183486, |
|
"logps/chosen": -1474.6870229007634, |
|
"logps/rejected": -4828.477064220184, |
|
"loss": 0.007, |
|
"rewards/chosen": 6.037196501520754, |
|
"rewards/margins": 20895698.367471732, |
|
"rewards/rejected": -20895692.33027523, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.6933166770768269, |
|
"grad_norm": 0.05504531040787697, |
|
"kl": 0.0, |
|
"learning_rate": 3.4870041535980283e-07, |
|
"logits/chosen": -1005325644.4675325, |
|
"logits/rejected": 160775336.61044177, |
|
"logps/chosen": -1396.2943722943724, |
|
"logps/rejected": -4198.29718875502, |
|
"loss": 0.0061, |
|
"rewards/chosen": 6.7330190154897185, |
|
"rewards/margins": 31472147.070368413, |
|
"rewards/rejected": -31472140.337349396, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.6995627732667082, |
|
"grad_norm": 0.2234458178281784, |
|
"kl": 30.433332443237305, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"logits/chosen": -1036081575.4261603, |
|
"logits/rejected": 231148438.65020576, |
|
"logps/chosen": -1513.9915611814347, |
|
"logps/rejected": -3994.864197530864, |
|
"loss": 0.0134, |
|
"rewards/chosen": 6.843573847903481, |
|
"rewards/margins": -1633841.1235043413, |
|
"rewards/rejected": 1633847.9670781894, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.7058088694565896, |
|
"grad_norm": 0.044682055711746216, |
|
"kl": 0.0, |
|
"learning_rate": 3.214301866243469e-07, |
|
"logits/chosen": -1072538395.0042194, |
|
"logits/rejected": 448950187.7201646, |
|
"logps/chosen": -1468.759493670886, |
|
"logps/rejected": -4584.2962962962965, |
|
"loss": 0.0042, |
|
"rewards/chosen": 6.981564807489452, |
|
"rewards/margins": 8730029.960988674, |
|
"rewards/rejected": -8730022.979423868, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.712054965646471, |
|
"grad_norm": 0.04081505164504051, |
|
"kl": 0.0, |
|
"learning_rate": 3.081831107312308e-07, |
|
"logits/chosen": -993211187.2, |
|
"logits/rejected": 445330227.2, |
|
"logps/chosen": -1481.2521739130434, |
|
"logps/rejected": -4459.52, |
|
"loss": 0.0041, |
|
"rewards/chosen": 6.918339737601902, |
|
"rewards/margins": 13266534.150339738, |
|
"rewards/rejected": -13266527.232, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.7183010618363523, |
|
"grad_norm": 0.04136398062109947, |
|
"kl": 6.233333110809326, |
|
"learning_rate": 2.9519683912911267e-07, |
|
"logits/chosen": -1032674117.0086956, |
|
"logits/rejected": 353256865.792, |
|
"logps/chosen": -1485.0782608695652, |
|
"logps/rejected": -4357.376, |
|
"loss": 0.0072, |
|
"rewards/chosen": 7.032565174932065, |
|
"rewards/margins": 18321705.848565176, |
|
"rewards/rejected": -18321698.816, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.7245471580262337, |
|
"grad_norm": 0.037806980311870575, |
|
"kl": 0.0, |
|
"learning_rate": 2.8247291705444575e-07, |
|
"logits/chosen": -1073811729.0666666, |
|
"logits/rejected": 381803997.8666667, |
|
"logps/chosen": -1532.9333333333334, |
|
"logps/rejected": -4173.866666666667, |
|
"loss": 0.0033, |
|
"rewards/chosen": 6.961608378092448, |
|
"rewards/margins": 17027739.228275042, |
|
"rewards/rejected": -17027732.266666666, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.730793254216115, |
|
"grad_norm": 0.04734532907605171, |
|
"kl": 0.0, |
|
"learning_rate": 2.700128585267148e-07, |
|
"logits/chosen": -1064232157.7880185, |
|
"logits/rejected": 400121451.07224333, |
|
"logps/chosen": -1503.26267281106, |
|
"logps/rejected": -4154.6463878326995, |
|
"loss": 0.004, |
|
"rewards/chosen": 7.132465415286578, |
|
"rewards/margins": 24937706.904328533, |
|
"rewards/rejected": -24937699.771863118, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.7370393504059962, |
|
"grad_norm": 0.0439315065741539, |
|
"kl": 0.0, |
|
"learning_rate": 2.5781814616827936e-07, |
|
"logits/chosen": -1055812524.4978541, |
|
"logits/rejected": 363995965.14979756, |
|
"logps/chosen": -1543.4849785407725, |
|
"logps/rejected": -4515.757085020243, |
|
"loss": 0.0061, |
|
"rewards/chosen": 6.714240209227468, |
|
"rewards/margins": 19813420.0259811, |
|
"rewards/rejected": -19813413.31174089, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.7432854465958776, |
|
"grad_norm": 0.05658557638525963, |
|
"kl": 2.4833333492279053, |
|
"learning_rate": 2.458902310279601e-07, |
|
"logits/chosen": -1039975914.4873949, |
|
"logits/rejected": 141531762.2479339, |
|
"logps/chosen": -1433.142857142857, |
|
"logps/rejected": -4137.520661157025, |
|
"loss": 0.0053, |
|
"rewards/chosen": 6.4333219127494745, |
|
"rewards/margins": 29758231.358941745, |
|
"rewards/rejected": -29758224.925619833, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.749531542785759, |
|
"grad_norm": 0.18774373829364777, |
|
"kl": 13.333333015441895, |
|
"learning_rate": 2.3423053240837518e-07, |
|
"logits/chosen": -1019430049.2255319, |
|
"logits/rejected": 447735532.1469388, |
|
"logps/chosen": -1531.3021276595746, |
|
"logps/rejected": -4217.404081632653, |
|
"loss": 0.0092, |
|
"rewards/chosen": 6.913489029255319, |
|
"rewards/margins": 26482598.782876782, |
|
"rewards/rejected": -26482591.869387753, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.75577763897564, |
|
"grad_norm": 0.043808355927467346, |
|
"kl": 0.0, |
|
"learning_rate": 2.2284043769706026e-07, |
|
"logits/chosen": -1011416251.3702128, |
|
"logits/rejected": 144172780.14693877, |
|
"logps/chosen": -1641.531914893617, |
|
"logps/rejected": -4018.6775510204084, |
|
"loss": 0.0043, |
|
"rewards/chosen": 7.178429916057181, |
|
"rewards/margins": 31332219.162103385, |
|
"rewards/rejected": -31332211.98367347, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.7620237351655215, |
|
"grad_norm": 0.048849448561668396, |
|
"kl": 0.0, |
|
"learning_rate": 2.1172130220138227e-07, |
|
"logits/chosen": -1013900822.8016194, |
|
"logits/rejected": 300855805.8025751, |
|
"logps/chosen": -1627.076923076923, |
|
"logps/rejected": -4419.021459227468, |
|
"loss": 0.0047, |
|
"rewards/chosen": 7.054942513284413, |
|
"rewards/margins": 24462578.179406036, |
|
"rewards/rejected": -24462571.12446352, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.7682698313554028, |
|
"grad_norm": 0.049721091985702515, |
|
"kl": 28.33333396911621, |
|
"learning_rate": 2.0087444898726938e-07, |
|
"logits/chosen": -995293418.7509881, |
|
"logits/rejected": 524846932.5814978, |
|
"logps/chosen": -1456.5691699604743, |
|
"logps/rejected": -4499.101321585903, |
|
"loss": 0.0116, |
|
"rewards/chosen": 6.656613798480731, |
|
"rewards/margins": 43495953.93414684, |
|
"rewards/rejected": -43495947.27753304, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.7745159275452842, |
|
"grad_norm": 0.022943388670682907, |
|
"kl": 0.0, |
|
"learning_rate": 1.9030116872178317e-07, |
|
"logits/chosen": -1116324673.0847456, |
|
"logits/rejected": 419383128.13114756, |
|
"logps/chosen": -1456.2711864406779, |
|
"logps/rejected": -4149.639344262295, |
|
"loss": 0.0041, |
|
"rewards/chosen": 7.154229632878708, |
|
"rewards/margins": 12445927.678819798, |
|
"rewards/rejected": -12445920.524590164, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.7807620237351656, |
|
"grad_norm": 0.04606853052973747, |
|
"kl": 0.0, |
|
"learning_rate": 1.800027195195389e-07, |
|
"logits/chosen": -1112389339.4285715, |
|
"logits/rejected": 345240657.76425856, |
|
"logps/chosen": -1370.8387096774193, |
|
"logps/rejected": -4172.897338403041, |
|
"loss": 0.0042, |
|
"rewards/chosen": 6.945747902865784, |
|
"rewards/margins": 47055758.246128134, |
|
"rewards/rejected": -47055751.30038023, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.787008119925047, |
|
"grad_norm": 0.041725583374500275, |
|
"kl": 6.366666793823242, |
|
"learning_rate": 1.699803267930039e-07, |
|
"logits/chosen": -978041787.7333333, |
|
"logits/rejected": 322212113.06666666, |
|
"logps/chosen": -1522.6666666666667, |
|
"logps/rejected": -4063.733333333333, |
|
"loss": 0.0051, |
|
"rewards/chosen": 7.036024983723959, |
|
"rewards/margins": 30979183.56935832, |
|
"rewards/rejected": -30979176.533333335, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.7932542161149283, |
|
"grad_norm": 0.03576849400997162, |
|
"kl": 5.733333110809326, |
|
"learning_rate": 1.602351831066862e-07, |
|
"logits/chosen": -1131575479.1544716, |
|
"logits/rejected": 228284853.6068376, |
|
"logps/chosen": -1475.2520325203252, |
|
"logps/rejected": -3997.811965811966, |
|
"loss": 0.0049, |
|
"rewards/chosen": 6.8408907758511175, |
|
"rewards/margins": 40579949.67849761, |
|
"rewards/rejected": -40579942.83760684, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.7995003123048094, |
|
"grad_norm": 0.05014333873987198, |
|
"kl": 23.66666603088379, |
|
"learning_rate": 1.507684480352292e-07, |
|
"logits/chosen": -1127743488.0, |
|
"logits/rejected": 292159488.0, |
|
"logps/chosen": -1444.25, |
|
"logps/rejected": -4205.714285714285, |
|
"loss": 0.007, |
|
"rewards/chosen": 6.619314193725586, |
|
"rewards/margins": 43717551.762171336, |
|
"rewards/rejected": -43717545.14285714, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.8057464084946908, |
|
"grad_norm": 0.04119168221950531, |
|
"kl": 1.6333333253860474, |
|
"learning_rate": 1.4158124802543693e-07, |
|
"logits/chosen": -911179027.8721461, |
|
"logits/rejected": 418642963.61685824, |
|
"logps/chosen": -1645.7351598173516, |
|
"logps/rejected": -4242.145593869732, |
|
"loss": 0.0037, |
|
"rewards/chosen": 7.1187459867294525, |
|
"rewards/margins": -1617341.9080739366, |
|
"rewards/rejected": 1617349.0268199234, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.8119925046845722, |
|
"grad_norm": 0.04770239442586899, |
|
"kl": 0.0, |
|
"learning_rate": 1.3267467626223606e-07, |
|
"logits/chosen": -1013024280.3809524, |
|
"logits/rejected": 458046632.61044174, |
|
"logps/chosen": -1338.1818181818182, |
|
"logps/rejected": -4339.919678714859, |
|
"loss": 0.0055, |
|
"rewards/chosen": 6.514653721929113, |
|
"rewards/margins": 3903908.329914766, |
|
"rewards/rejected": -3903901.815261044, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.8182386008744533, |
|
"grad_norm": 0.05117342248558998, |
|
"kl": 2.0333333015441895, |
|
"learning_rate": 1.2404979253859722e-07, |
|
"logits/chosen": -1017058308.2139918, |
|
"logits/rejected": 364585893.26582277, |
|
"logps/chosen": -1433.9423868312758, |
|
"logps/rejected": -4159.729957805907, |
|
"loss": 0.0049, |
|
"rewards/chosen": 6.74083668901106, |
|
"rewards/margins": 12966103.213410528, |
|
"rewards/rejected": -12966096.47257384, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.8244846970643347, |
|
"grad_norm": 0.048183873295784, |
|
"kl": 11.966666221618652, |
|
"learning_rate": 1.1570762312943295e-07, |
|
"logits/chosen": -1041858429.4501992, |
|
"logits/rejected": 471474569.5021834, |
|
"logps/chosen": -1457.9123505976095, |
|
"logps/rejected": -4449.816593886463, |
|
"loss": 0.0062, |
|
"rewards/chosen": 6.686128532744024, |
|
"rewards/margins": 48752919.45468748, |
|
"rewards/rejected": -48752912.76855895, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.830730793254216, |
|
"grad_norm": 0.041397128254175186, |
|
"kl": 3.049999952316284, |
|
"learning_rate": 1.0764916066947795e-07, |
|
"logits/chosen": -1057709542.3011583, |
|
"logits/rejected": 425707621.9366516, |
|
"logps/chosen": -1529.6988416988418, |
|
"logps/rejected": -4262.226244343891, |
|
"loss": 0.0042, |
|
"rewards/chosen": 6.806276770632239, |
|
"rewards/margins": 84398012.67053016, |
|
"rewards/rejected": -84398005.86425339, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.8369768894440974, |
|
"grad_norm": 0.0291211549192667, |
|
"kl": 12.516666412353516, |
|
"learning_rate": 9.98753640351785e-08, |
|
"logits/chosen": -1084134188.5668015, |
|
"logits/rejected": 390043270.04291844, |
|
"logps/chosen": -1341.1497975708503, |
|
"logps/rejected": -4192.137339055794, |
|
"loss": 0.006, |
|
"rewards/chosen": 6.858956896824393, |
|
"rewards/margins": 62714258.39543758, |
|
"rewards/rejected": -62714251.53648069, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.8432229856339788, |
|
"grad_norm": 0.042660392820835114, |
|
"kl": 12.533333778381348, |
|
"learning_rate": 9.238715823059324e-08, |
|
"logits/chosen": -1036907895.0452675, |
|
"logits/rejected": 222360053.19831222, |
|
"logps/chosen": -1464.4938271604938, |
|
"logps/rejected": -4489.991561181435, |
|
"loss": 0.0065, |
|
"rewards/chosen": 7.0193659778485085, |
|
"rewards/margins": 30820391.424429268, |
|
"rewards/rejected": -30820384.40506329, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.8494690818238602, |
|
"grad_norm": 0.04971213638782501, |
|
"kl": 0.0, |
|
"learning_rate": 8.518543427732951e-08, |
|
"logits/chosen": -1037584030.8965517, |
|
"logits/rejected": 420588907.3548387, |
|
"logps/chosen": -1572.896551724138, |
|
"logps/rejected": -4336.774193548387, |
|
"loss": 0.0044, |
|
"rewards/chosen": 6.730742355872845, |
|
"rewards/margins": 21317710.988806874, |
|
"rewards/rejected": -21317704.258064516, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.8557151780137415, |
|
"grad_norm": 0.0393749363720417, |
|
"kl": 2.6666667461395264, |
|
"learning_rate": 7.827104910851729e-08, |
|
"logits/chosen": -1214985907.4188035, |
|
"logits/rejected": 577688650.9268292, |
|
"logps/chosen": -1419.3504273504273, |
|
"logps/rejected": -4508.09756097561, |
|
"loss": 0.007, |
|
"rewards/chosen": 6.878979283520299, |
|
"rewards/margins": 57327797.70824757, |
|
"rewards/rejected": -57327790.82926829, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.861961274203623, |
|
"grad_norm": 0.037422481924295425, |
|
"kl": 9.25, |
|
"learning_rate": 7.164482546684642e-08, |
|
"logits/chosen": -1083562633.3658535, |
|
"logits/rejected": 255969052.44444445, |
|
"logps/chosen": -1367.5447154471544, |
|
"logps/rejected": -4047.042735042735, |
|
"loss": 0.0078, |
|
"rewards/chosen": 6.497047486344004, |
|
"rewards/margins": 20068045.6081586, |
|
"rewards/rejected": -20068039.111111112, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.868207370393504, |
|
"grad_norm": 0.06087194010615349, |
|
"kl": 5.900000095367432, |
|
"learning_rate": 6.530755180666593e-08, |
|
"logits/chosen": -1080703324.4149377, |
|
"logits/rejected": 321298603.38075316, |
|
"logps/chosen": -1386.323651452282, |
|
"logps/rejected": -4462.058577405858, |
|
"loss": 0.0047, |
|
"rewards/chosen": 7.002343142181017, |
|
"rewards/margins": 37324824.140418455, |
|
"rewards/rejected": -37324817.138075314, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.8744534665833854, |
|
"grad_norm": 0.04000187665224075, |
|
"kl": 2.7166666984558105, |
|
"learning_rate": 5.92599822001666e-08, |
|
"logits/chosen": -1044922896.516129, |
|
"logits/rejected": 480415037.79310346, |
|
"logps/chosen": -1473.2903225806451, |
|
"logps/rejected": -4401.6551724137935, |
|
"loss": 0.0047, |
|
"rewards/chosen": 6.6074873401272685, |
|
"rewards/margins": 46696542.88334941, |
|
"rewards/rejected": -46696536.27586207, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8744534665833854, |
|
"eval_kl": 5.369565010070801, |
|
"eval_logits/chosen": -1053226647.5203094, |
|
"eval_logits/rejected": 314457350.7232323, |
|
"eval_logps/chosen": -1451.8375241779497, |
|
"eval_logps/rejected": -4126.286868686869, |
|
"eval_loss": 0.006971114315092564, |
|
"eval_rewards/chosen": 6.690913246494198, |
|
"eval_rewards/margins": 34923553.58384254, |
|
"eval_rewards/rejected": -34923546.89292929, |
|
"eval_runtime": 640.7129, |
|
"eval_samples_per_second": 6.313, |
|
"eval_steps_per_second": 0.395, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8806995627732666, |
|
"grad_norm": 0.051798831671476364, |
|
"kl": 1.024999976158142, |
|
"learning_rate": 5.3502836247654176e-08, |
|
"logits/chosen": -997878037.2401747, |
|
"logits/rejected": 345052523.09163344, |
|
"logps/chosen": -1443.7729257641922, |
|
"logps/rejected": -4034.03984063745, |
|
"loss": 0.0041, |
|
"rewards/chosen": 7.0013470379025655, |
|
"rewards/margins": 18439719.63879724, |
|
"rewards/rejected": -18439712.6374502, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.886945658963148, |
|
"grad_norm": 0.05178893357515335, |
|
"kl": 0.0, |
|
"learning_rate": 4.8036798991923925e-08, |
|
"logits/chosen": -992199120.3238866, |
|
"logits/rejected": 275653979.19313306, |
|
"logps/chosen": -1448.161943319838, |
|
"logps/rejected": -4049.304721030043, |
|
"loss": 0.0047, |
|
"rewards/chosen": 6.675408120097419, |
|
"rewards/margins": 31989076.683991812, |
|
"rewards/rejected": -31989070.00858369, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.8931917551530293, |
|
"grad_norm": 0.04293132573366165, |
|
"kl": 0.0, |
|
"learning_rate": 4.2862520836747245e-08, |
|
"logits/chosen": -1123284873.520661, |
|
"logits/rejected": 473070789.9159664, |
|
"logps/chosen": -1461.685950413223, |
|
"logps/rejected": -4277.243697478992, |
|
"loss": 0.0059, |
|
"rewards/chosen": 6.628247978273502, |
|
"rewards/margins": 25677056.17446647, |
|
"rewards/rejected": -25677049.54621849, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.8994378513429107, |
|
"grad_norm": 0.062386684119701385, |
|
"kl": 0.0, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1092597633.1327434, |
|
"logits/rejected": 353865502.2362205, |
|
"logps/chosen": -1405.0265486725664, |
|
"logps/rejected": -4292.031496062992, |
|
"loss": 0.0068, |
|
"rewards/chosen": 6.833754277862279, |
|
"rewards/margins": 36038900.692021996, |
|
"rewards/rejected": -36038893.85826772, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.905683947532792, |
|
"grad_norm": 0.04458368569612503, |
|
"kl": 0.11666666716337204, |
|
"learning_rate": 3.339166978780256e-08, |
|
"logits/chosen": -973501483.0252101, |
|
"logits/rejected": 412649319.6694215, |
|
"logps/chosen": -1451.4285714285713, |
|
"logps/rejected": -4935.93388429752, |
|
"loss": 0.0048, |
|
"rewards/chosen": 6.3846789448201156, |
|
"rewards/margins": 18261066.087158285, |
|
"rewards/rejected": -18261059.70247934, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.9119300437226734, |
|
"grad_norm": 0.06264102458953857, |
|
"kl": 0.0, |
|
"learning_rate": 2.9096223830598347e-08, |
|
"logits/chosen": -1078418658.5486727, |
|
"logits/rejected": 434630623.7480315, |
|
"logps/chosen": -1453.6637168141592, |
|
"logps/rejected": -4444.220472440945, |
|
"loss": 0.0066, |
|
"rewards/chosen": 6.742341978360066, |
|
"rewards/margins": 28229193.262027018, |
|
"rewards/rejected": -28229186.51968504, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.9181761399125548, |
|
"grad_norm": 0.03891793265938759, |
|
"kl": 0.0, |
|
"learning_rate": 2.5094790712980322e-08, |
|
"logits/chosen": -994888908.8, |
|
"logits/rejected": 331507302.4, |
|
"logps/chosen": -1478.8, |
|
"logps/rejected": -4330.133333333333, |
|
"loss": 0.0071, |
|
"rewards/chosen": 7.01343739827474, |
|
"rewards/margins": 25696160.6134374, |
|
"rewards/rejected": -25696153.6, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.9244222361024361, |
|
"grad_norm": 0.04806411266326904, |
|
"kl": 0.0, |
|
"learning_rate": 2.1387846565474047e-08, |
|
"logits/chosen": -1012147693.037037, |
|
"logits/rejected": 443452322.90909094, |
|
"logps/chosen": -1441.037037037037, |
|
"logps/rejected": -4375.272727272727, |
|
"loss": 0.0062, |
|
"rewards/chosen": 7.009937427662037, |
|
"rewards/margins": 45192074.8887253, |
|
"rewards/rejected": -45192067.878787875, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.9306683322923173, |
|
"grad_norm": 0.06165820360183716, |
|
"kl": 0.3916666805744171, |
|
"learning_rate": 1.79758324773624e-08, |
|
"logits/chosen": -996874101.2908367, |
|
"logits/rejected": 430538895.09170306, |
|
"logps/chosen": -1505.5298804780878, |
|
"logps/rejected": -4623.930131004367, |
|
"loss": 0.0076, |
|
"rewards/chosen": 6.439177858876992, |
|
"rewards/margins": 33095169.96756214, |
|
"rewards/rejected": -33095163.52838428, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.9369144284821986, |
|
"grad_norm": 0.04440110921859741, |
|
"kl": 2.866666555404663, |
|
"learning_rate": 1.4859154444200885e-08, |
|
"logits/chosen": -1060492863.7253219, |
|
"logits/rejected": 399448022.5425101, |
|
"logps/chosen": -1412.4291845493563, |
|
"logps/rejected": -4322.202429149797, |
|
"loss": 0.0061, |
|
"rewards/chosen": 6.6896883592073495, |
|
"rewards/margins": 48499838.91640901, |
|
"rewards/rejected": -48499832.226720646, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.94316052467208, |
|
"grad_norm": 0.03537024185061455, |
|
"kl": 0.0, |
|
"learning_rate": 1.2038183319507957e-08, |
|
"logits/chosen": -1090112543.5770924, |
|
"logits/rejected": 381984217.5494071, |
|
"logps/chosen": -1472.704845814978, |
|
"logps/rejected": -4046.418972332016, |
|
"loss": 0.0036, |
|
"rewards/chosen": 6.901054214275881, |
|
"rewards/margins": 61204207.62832694, |
|
"rewards/rejected": -61204200.72727273, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.9494066208619611, |
|
"grad_norm": 0.03937402740120888, |
|
"kl": 19.266666412353516, |
|
"learning_rate": 9.513254770636138e-09, |
|
"logits/chosen": -958182165.8116592, |
|
"logits/rejected": 389183878.4747082, |
|
"logps/chosen": -1551.2825112107623, |
|
"logps/rejected": -4418.988326848249, |
|
"loss": 0.0108, |
|
"rewards/chosen": 7.0115835420753925, |
|
"rewards/margins": 5401249.408470702, |
|
"rewards/rejected": -5401242.39688716, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.9556527170518425, |
|
"grad_norm": 0.5586203336715698, |
|
"kl": 0.0, |
|
"learning_rate": 7.284669238833419e-09, |
|
"logits/chosen": -1119640467.7723577, |
|
"logits/rejected": 510185997.1282051, |
|
"logps/chosen": -1490.1463414634147, |
|
"logps/rejected": -4648.478632478633, |
|
"loss": 0.0051, |
|
"rewards/chosen": 6.604302134940295, |
|
"rewards/margins": 39599342.53592607, |
|
"rewards/rejected": -39599335.93162393, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 1.9618988132417239, |
|
"grad_norm": 0.05282744765281677, |
|
"kl": 14.333333015441895, |
|
"learning_rate": 5.352691903491303e-09, |
|
"logits/chosen": -963496561.5397489, |
|
"logits/rejected": 376038497.7261411, |
|
"logps/chosen": -1471.7991631799164, |
|
"logps/rejected": -4203.817427385892, |
|
"loss": 0.011, |
|
"rewards/chosen": 6.948775415141213, |
|
"rewards/margins": 33892983.54628579, |
|
"rewards/rejected": -33892976.597510375, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.9681449094316052, |
|
"grad_norm": 0.04685989022254944, |
|
"kl": 3.25, |
|
"learning_rate": 3.71755265059226e-09, |
|
"logits/chosen": -1014598612.9747899, |
|
"logits/rejected": 395408477.09090906, |
|
"logps/chosen": -1472.2689075630253, |
|
"logps/rejected": -4229.289256198347, |
|
"loss": 0.0064, |
|
"rewards/chosen": 6.425900242909663, |
|
"rewards/margins": 16834238.492015947, |
|
"rewards/rejected": -16834232.066115703, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.9743910056214866, |
|
"grad_norm": 0.05169759690761566, |
|
"kl": 0.0, |
|
"learning_rate": 2.3794460453555046e-09, |
|
"logits/chosen": -900699429.8434782, |
|
"logits/rejected": 464678551.552, |
|
"logps/chosen": -1558.2608695652175, |
|
"logps/rejected": -4444.928, |
|
"loss": 0.0044, |
|
"rewards/chosen": 6.816860033118206, |
|
"rewards/margins": 14998663.840860033, |
|
"rewards/rejected": -14998657.024, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.980637101811368, |
|
"grad_norm": 0.04657047241926193, |
|
"kl": 0.0, |
|
"learning_rate": 1.3385313090857888e-09, |
|
"logits/chosen": -1086119681.1377778, |
|
"logits/rejected": 389585048.59607846, |
|
"logps/chosen": -1381.7955555555557, |
|
"logps/rejected": -4238.054901960784, |
|
"loss": 0.0047, |
|
"rewards/chosen": 6.858923068576389, |
|
"rewards/margins": 15058023.235393656, |
|
"rewards/rejected": -15058016.376470588, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.9868831980012494, |
|
"grad_norm": 0.055049166083335876, |
|
"kl": 3.799999952316284, |
|
"learning_rate": 5.94932300227169e-10, |
|
"logits/chosen": -1062919879.3282443, |
|
"logits/rejected": 403480500.8440367, |
|
"logps/chosen": -1391.5725190839694, |
|
"logps/rejected": -4085.7247706422017, |
|
"loss": 0.0064, |
|
"rewards/chosen": 6.819431916447996, |
|
"rewards/margins": 56341567.18640439, |
|
"rewards/rejected": -56341560.366972476, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.9931292941911305, |
|
"grad_norm": 0.05392511561512947, |
|
"kl": 2.633333444595337, |
|
"learning_rate": 1.4873749962562855e-10, |
|
"logits/chosen": -1080439704.8062015, |
|
"logits/rejected": 458492217.6576577, |
|
"logps/chosen": -1620.3410852713178, |
|
"logps/rejected": -4351.711711711711, |
|
"loss": 0.0114, |
|
"rewards/chosen": 6.508843000545058, |
|
"rewards/margins": 68250289.17550968, |
|
"rewards/rejected": -68250282.66666667, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 1.9993753903810119, |
|
"grad_norm": 0.0583941750228405, |
|
"kl": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1059895126.0786027, |
|
"logits/rejected": 462384417.6573705, |
|
"logps/chosen": -1498.8296943231442, |
|
"logps/rejected": -4708.462151394422, |
|
"loss": 0.0044, |
|
"rewards/chosen": 6.921540239492358, |
|
"rewards/margins": 25431611.957396813, |
|
"rewards/rejected": -25431605.035856575, |
|
"step": 1600 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|