|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.1983471074380168, |
|
"eval_steps": 100, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05509641873278237, |
|
"grad_norm": 245.1579195730004, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"logits/chosen": -0.4725155234336853, |
|
"logits/rejected": 0.2096169888973236, |
|
"logps/chosen": -269.412841796875, |
|
"logps/rejected": -1846.9844970703125, |
|
"loss": 0.7021, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.0009570938418619335, |
|
"rewards/margins": -0.00988767296075821, |
|
"rewards/rejected": 0.010844765231013298, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11019283746556474, |
|
"grad_norm": 173.5029184520767, |
|
"learning_rate": 7.142857142857143e-07, |
|
"logits/chosen": -0.48380351066589355, |
|
"logits/rejected": 0.16157253086566925, |
|
"logps/chosen": -263.0589599609375, |
|
"logps/rejected": -1847.731201171875, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.7874999642372131, |
|
"rewards/chosen": 0.08561563491821289, |
|
"rewards/margins": 0.15573981404304504, |
|
"rewards/rejected": -0.07012417167425156, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1652892561983471, |
|
"grad_norm": 74.48927188892095, |
|
"learning_rate": 9.999627882666473e-07, |
|
"logits/chosen": -0.44514018297195435, |
|
"logits/rejected": 0.13901665806770325, |
|
"logps/chosen": -259.4621276855469, |
|
"logps/rejected": -1711.037109375, |
|
"loss": 0.3346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5383985042572021, |
|
"rewards/margins": 1.005392074584961, |
|
"rewards/rejected": -0.4669935405254364, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22038567493112948, |
|
"grad_norm": 7.660881219187966, |
|
"learning_rate": 9.98660959086472e-07, |
|
"logits/chosen": -0.41537362337112427, |
|
"logits/rejected": 0.28159233927726746, |
|
"logps/chosen": -241.8282928466797, |
|
"logps/rejected": -1647.9976806640625, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6659537553787231, |
|
"rewards/margins": 3.620225429534912, |
|
"rewards/rejected": -1.9542717933654785, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27548209366391185, |
|
"grad_norm": 0.26407280974468034, |
|
"learning_rate": 9.955040783863372e-07, |
|
"logits/chosen": -0.43045905232429504, |
|
"logits/rejected": 0.4600587785243988, |
|
"logps/chosen": -232.1391143798828, |
|
"logps/rejected": -1867.119140625, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.669019937515259, |
|
"rewards/margins": 6.597150802612305, |
|
"rewards/rejected": -3.9281303882598877, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3305785123966942, |
|
"grad_norm": 0.0414724400265944, |
|
"learning_rate": 9.905038899697923e-07, |
|
"logits/chosen": -0.4028685390949249, |
|
"logits/rejected": 0.5511271953582764, |
|
"logps/chosen": -229.65939331054688, |
|
"logps/rejected": -1880.948486328125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.849907875061035, |
|
"rewards/margins": 11.137469291687012, |
|
"rewards/rejected": -7.287561416625977, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3856749311294766, |
|
"grad_norm": 0.02329936461620973, |
|
"learning_rate": 9.8367899486616e-07, |
|
"logits/chosen": -0.394756942987442, |
|
"logits/rejected": 0.6026133298873901, |
|
"logps/chosen": -220.28147888183594, |
|
"logps/rejected": -1803.0296630859375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.949002742767334, |
|
"rewards/margins": 12.70076847076416, |
|
"rewards/rejected": -8.751766204833984, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44077134986225897, |
|
"grad_norm": 0.015381886490533226, |
|
"learning_rate": 9.750547821334867e-07, |
|
"logits/chosen": -0.4100641906261444, |
|
"logits/rejected": 0.5826520323753357, |
|
"logps/chosen": -209.83123779296875, |
|
"logps/rejected": -1813.609619140625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.180466651916504, |
|
"rewards/margins": 13.536256790161133, |
|
"rewards/rejected": -9.355790138244629, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.49586776859504134, |
|
"grad_norm": 0.01886022560525684, |
|
"learning_rate": 9.646633344095777e-07, |
|
"logits/chosen": -0.4125652015209198, |
|
"logits/rejected": 0.6124175786972046, |
|
"logps/chosen": -217.37664794921875, |
|
"logps/rejected": -1866.38330078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.220520496368408, |
|
"rewards/margins": 13.80252742767334, |
|
"rewards/rejected": -9.582008361816406, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5509641873278237, |
|
"grad_norm": 0.005925555908148722, |
|
"learning_rate": 9.525433085624788e-07, |
|
"logits/chosen": -0.4284254014492035, |
|
"logits/rejected": 0.5839693546295166, |
|
"logps/chosen": -229.79859924316406, |
|
"logps/rejected": -1867.4002685546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.302082538604736, |
|
"rewards/margins": 14.04192066192627, |
|
"rewards/rejected": -9.739837646484375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5509641873278237, |
|
"eval_logits/chosen": -0.4308052659034729, |
|
"eval_logits/rejected": 0.6109545230865479, |
|
"eval_logps/chosen": -215.23263549804688, |
|
"eval_logps/rejected": -1857.6256103515625, |
|
"eval_loss": 7.565925625385717e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 4.255834102630615, |
|
"eval_rewards/margins": 13.677898406982422, |
|
"eval_rewards/rejected": -9.422063827514648, |
|
"eval_runtime": 14.6994, |
|
"eval_samples_per_second": 6.803, |
|
"eval_steps_per_second": 0.884, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 0.11306476239956821, |
|
"learning_rate": 9.387397918843868e-07, |
|
"logits/chosen": -0.4106900095939636, |
|
"logits/rejected": 0.5514415502548218, |
|
"logps/chosen": -211.5902099609375, |
|
"logps/rejected": -1779.3956298828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.1011528968811035, |
|
"rewards/margins": 13.1325044631958, |
|
"rewards/rejected": -9.031352043151855, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6611570247933884, |
|
"grad_norm": 0.0019763702825082116, |
|
"learning_rate": 9.233041343639622e-07, |
|
"logits/chosen": -0.41396018862724304, |
|
"logits/rejected": 0.6652277708053589, |
|
"logps/chosen": -217.4370880126953, |
|
"logps/rejected": -1980.4158935546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.152511119842529, |
|
"rewards/margins": 14.063387870788574, |
|
"rewards/rejected": -9.910876274108887, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7162534435261708, |
|
"grad_norm": 0.004513340856514194, |
|
"learning_rate": 9.062937576609982e-07, |
|
"logits/chosen": -0.4127367436885834, |
|
"logits/rejected": 0.6161201596260071, |
|
"logps/chosen": -206.23768615722656, |
|
"logps/rejected": -1906.336669921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.257203102111816, |
|
"rewards/margins": 13.963435173034668, |
|
"rewards/rejected": -9.706233024597168, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7713498622589532, |
|
"grad_norm": 0.0016410444467114878, |
|
"learning_rate": 8.87771941494075e-07, |
|
"logits/chosen": -0.41299960017204285, |
|
"logits/rejected": 0.6197333335876465, |
|
"logps/chosen": -223.1101837158203, |
|
"logps/rejected": -1924.6033935546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.367452144622803, |
|
"rewards/margins": 14.50761890411377, |
|
"rewards/rejected": -10.140168190002441, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8264462809917356, |
|
"grad_norm": 0.0067065214886613675, |
|
"learning_rate": 8.678075882358505e-07, |
|
"logits/chosen": -0.416629821062088, |
|
"logits/rejected": 0.5853482484817505, |
|
"logps/chosen": -212.60301208496094, |
|
"logps/rejected": -1887.0203857421875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.241779327392578, |
|
"rewards/margins": 14.451003074645996, |
|
"rewards/rejected": -10.209223747253418, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8815426997245179, |
|
"grad_norm": 0.00367787676601538, |
|
"learning_rate": 8.46474966591708e-07, |
|
"logits/chosen": -0.40926405787467957, |
|
"logits/rejected": 0.5995694398880005, |
|
"logps/chosen": -216.64437866210938, |
|
"logps/rejected": -1882.6590576171875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.257536888122559, |
|
"rewards/margins": 13.861567497253418, |
|
"rewards/rejected": -9.60403060913086, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9366391184573003, |
|
"grad_norm": 0.018619958135959652, |
|
"learning_rate": 8.23853435315295e-07, |
|
"logits/chosen": -0.40368929505348206, |
|
"logits/rejected": 0.6110401153564453, |
|
"logps/chosen": -206.5170440673828, |
|
"logps/rejected": -1802.53515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.327918529510498, |
|
"rewards/margins": 14.353678703308105, |
|
"rewards/rejected": -10.025761604309082, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9917355371900827, |
|
"grad_norm": 0.0028079764623551934, |
|
"learning_rate": 8.000271479887468e-07, |
|
"logits/chosen": -0.4238608479499817, |
|
"logits/rejected": 0.6033484935760498, |
|
"logps/chosen": -211.7775115966797, |
|
"logps/rejected": -1884.5142822265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.310672283172607, |
|
"rewards/margins": 14.503658294677734, |
|
"rewards/rejected": -10.192986488342285, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.044077134986226, |
|
"grad_norm": 0.007283395506607482, |
|
"learning_rate": 7.750847399658336e-07, |
|
"logits/chosen": -0.4257272481918335, |
|
"logits/rejected": 0.5874243974685669, |
|
"logps/chosen": -223.40489196777344, |
|
"logps/rejected": -1896.12890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.3628621101379395, |
|
"rewards/margins": 14.685640335083008, |
|
"rewards/rejected": -10.322778701782227, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0991735537190084, |
|
"grad_norm": 0.011209994441386166, |
|
"learning_rate": 7.491189986426235e-07, |
|
"logits/chosen": -0.4639908969402313, |
|
"logits/rejected": 0.6308785676956177, |
|
"logps/chosen": -214.9459991455078, |
|
"logps/rejected": -1917.2332763671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.369436740875244, |
|
"rewards/margins": 14.576619148254395, |
|
"rewards/rejected": -10.207181930541992, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0991735537190084, |
|
"eval_logits/chosen": -0.507227897644043, |
|
"eval_logits/rejected": 0.6003714799880981, |
|
"eval_logps/chosen": -213.58753967285156, |
|
"eval_logps/rejected": -1862.263427734375, |
|
"eval_loss": 4.040318890474737e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 4.420343399047852, |
|
"eval_rewards/margins": 14.306204795837402, |
|
"eval_rewards/rejected": -9.88586139678955, |
|
"eval_runtime": 14.4092, |
|
"eval_samples_per_second": 6.94, |
|
"eval_steps_per_second": 0.902, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1542699724517906, |
|
"grad_norm": 0.0054132379341320196, |
|
"learning_rate": 7.222265182822738e-07, |
|
"logits/chosen": -0.4459373950958252, |
|
"logits/rejected": 0.5961474180221558, |
|
"logps/chosen": -213.8883514404297, |
|
"logps/rejected": -1930.0252685546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.289681434631348, |
|
"rewards/margins": 14.502676963806152, |
|
"rewards/rejected": -10.212996482849121, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.209366391184573, |
|
"grad_norm": 0.0030775392892693093, |
|
"learning_rate": 6.945073406780295e-07, |
|
"logits/chosen": -0.4307125210762024, |
|
"logits/rejected": 0.590903103351593, |
|
"logps/chosen": -222.69374084472656, |
|
"logps/rejected": -1890.8583984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.399219989776611, |
|
"rewards/margins": 14.314299583435059, |
|
"rewards/rejected": -9.915079116821289, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2644628099173554, |
|
"grad_norm": 0.0014803503973676816, |
|
"learning_rate": 6.660645829911793e-07, |
|
"logits/chosen": -0.4305216372013092, |
|
"logits/rejected": 0.6309400796890259, |
|
"logps/chosen": -209.11781311035156, |
|
"logps/rejected": -1899.018310546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.402595043182373, |
|
"rewards/margins": 14.83905029296875, |
|
"rewards/rejected": -10.436454772949219, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3195592286501379, |
|
"grad_norm": 0.0022991422374565593, |
|
"learning_rate": 6.37004054148445e-07, |
|
"logits/chosen": -0.41835030913352966, |
|
"logits/rejected": 0.5944371819496155, |
|
"logps/chosen": -221.07647705078125, |
|
"logps/rejected": -1923.2203369140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.42329216003418, |
|
"rewards/margins": 14.693617820739746, |
|
"rewards/rejected": -10.27032470703125, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.3746556473829201, |
|
"grad_norm": 0.004211894623875947, |
|
"learning_rate": 6.074338612258229e-07, |
|
"logits/chosen": -0.48152151703834534, |
|
"logits/rejected": 0.580518364906311, |
|
"logps/chosen": -212.5377197265625, |
|
"logps/rejected": -1869.040283203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.343794822692871, |
|
"rewards/margins": 14.768539428710938, |
|
"rewards/rejected": -10.424745559692383, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4297520661157024, |
|
"grad_norm": 0.0079758108256039, |
|
"learning_rate": 5.774640072831621e-07, |
|
"logits/chosen": -0.45527154207229614, |
|
"logits/rejected": 0.5836026668548584, |
|
"logps/chosen": -209.74172973632812, |
|
"logps/rejected": -1867.1463623046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.34083890914917, |
|
"rewards/margins": 14.600232124328613, |
|
"rewards/rejected": -10.259394645690918, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.4848484848484849, |
|
"grad_norm": 0.0012553783852512462, |
|
"learning_rate": 5.472059821455553e-07, |
|
"logits/chosen": -0.44259369373321533, |
|
"logits/rejected": 0.6033287644386292, |
|
"logps/chosen": -210.79086303710938, |
|
"logps/rejected": -1938.5059814453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.464999198913574, |
|
"rewards/margins": 15.05081558227539, |
|
"rewards/rejected": -10.585816383361816, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5399449035812673, |
|
"grad_norm": 0.008589622119854833, |
|
"learning_rate": 5.167723476538683e-07, |
|
"logits/chosen": -0.45647233724594116, |
|
"logits/rejected": 0.5457276105880737, |
|
"logps/chosen": -215.01332092285156, |
|
"logps/rejected": -1809.7384033203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.425556182861328, |
|
"rewards/margins": 14.342202186584473, |
|
"rewards/rejected": -9.916645050048828, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.5950413223140496, |
|
"grad_norm": 0.0018748874345375542, |
|
"learning_rate": 4.862763189272975e-07, |
|
"logits/chosen": -0.43524405360221863, |
|
"logits/rejected": 0.5741606950759888, |
|
"logps/chosen": -219.72682189941406, |
|
"logps/rejected": -1854.3282470703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.421448707580566, |
|
"rewards/margins": 14.898137092590332, |
|
"rewards/rejected": -10.476688385009766, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6501377410468319, |
|
"grad_norm": 0.0030320670189152815, |
|
"learning_rate": 4.558313431956913e-07, |
|
"logits/chosen": -0.4375744163990021, |
|
"logits/rejected": 0.618186891078949, |
|
"logps/chosen": -213.5662841796875, |
|
"logps/rejected": -1898.40576171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.537826061248779, |
|
"rewards/margins": 15.106904983520508, |
|
"rewards/rejected": -10.569079399108887, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6501377410468319, |
|
"eval_logits/chosen": -0.46859756112098694, |
|
"eval_logits/rejected": 0.5961298942565918, |
|
"eval_logps/chosen": -213.1056365966797, |
|
"eval_logps/rejected": -1864.2166748046875, |
|
"eval_loss": 3.174965240759775e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 4.468534469604492, |
|
"eval_rewards/margins": 14.549699783325195, |
|
"eval_rewards/rejected": -10.081165313720703, |
|
"eval_runtime": 14.4086, |
|
"eval_samples_per_second": 6.94, |
|
"eval_steps_per_second": 0.902, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7052341597796143, |
|
"grad_norm": 0.0030997575847191983, |
|
"learning_rate": 4.25550677768404e-07, |
|
"logits/chosen": -0.43731874227523804, |
|
"logits/rejected": 0.5772750377655029, |
|
"logps/chosen": -205.9576416015625, |
|
"logps/rejected": -1783.1455078125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.505218982696533, |
|
"rewards/margins": 14.560399055480957, |
|
"rewards/rejected": -10.055180549621582, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7603305785123968, |
|
"grad_norm": 0.002449571759456164, |
|
"learning_rate": 3.955469687096656e-07, |
|
"logits/chosen": -0.42703863978385925, |
|
"logits/rejected": 0.5979916453361511, |
|
"logps/chosen": -220.76214599609375, |
|
"logps/rejected": -1828.969970703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.466343402862549, |
|
"rewards/margins": 14.93801212310791, |
|
"rewards/rejected": -10.471668243408203, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.815426997245179, |
|
"grad_norm": 0.016071440515503, |
|
"learning_rate": 3.659318317878245e-07, |
|
"logits/chosen": -0.41183432936668396, |
|
"logits/rejected": 0.5881065130233765, |
|
"logps/chosen": -217.6732177734375, |
|
"logps/rejected": -1841.065673828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.430981159210205, |
|
"rewards/margins": 14.954968452453613, |
|
"rewards/rejected": -10.52398681640625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.8705234159779613, |
|
"grad_norm": 0.013402421117586839, |
|
"learning_rate": 3.368154372573584e-07, |
|
"logits/chosen": -0.4136713445186615, |
|
"logits/rejected": 0.5703952312469482, |
|
"logps/chosen": -210.0157470703125, |
|
"logps/rejected": -1780.6533203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.327345848083496, |
|
"rewards/margins": 14.798017501831055, |
|
"rewards/rejected": -10.470672607421875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.9256198347107438, |
|
"grad_norm": 0.003756112993476676, |
|
"learning_rate": 3.083061000182917e-07, |
|
"logits/chosen": -0.4032787084579468, |
|
"logits/rejected": 0.587185263633728, |
|
"logps/chosen": -229.91104125976562, |
|
"logps/rejected": -1978.4166259765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.442521572113037, |
|
"rewards/margins": 15.278725624084473, |
|
"rewards/rejected": -10.836204528808594, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9807162534435263, |
|
"grad_norm": 0.0032546271859814767, |
|
"learning_rate": 2.8050987667765287e-07, |
|
"logits/chosen": -0.4333140552043915, |
|
"logits/rejected": 0.5765838623046875, |
|
"logps/chosen": -212.27235412597656, |
|
"logps/rejected": -1811.88037109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.425715446472168, |
|
"rewards/margins": 14.542272567749023, |
|
"rewards/rejected": -10.116557121276855, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.0330578512396693, |
|
"grad_norm": 0.000227181896341656, |
|
"learning_rate": 2.535301710119312e-07, |
|
"logits/chosen": -0.4048818349838257, |
|
"logits/rejected": 0.6457623839378357, |
|
"logps/chosen": -210.2235107421875, |
|
"logps/rejected": -1879.3052978515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.492225646972656, |
|
"rewards/margins": 15.174772262573242, |
|
"rewards/rejected": -10.682547569274902, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.088154269972452, |
|
"grad_norm": 0.009143269690491793, |
|
"learning_rate": 2.274673492982359e-07, |
|
"logits/chosen": -0.4314221739768982, |
|
"logits/rejected": 0.6219318509101868, |
|
"logps/chosen": -219.50732421875, |
|
"logps/rejected": -2001.64599609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.443706035614014, |
|
"rewards/margins": 15.459733009338379, |
|
"rewards/rejected": -11.016026496887207, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.1432506887052343, |
|
"grad_norm": 0.007754682851821086, |
|
"learning_rate": 2.0241836694515336e-07, |
|
"logits/chosen": -0.4248574674129486, |
|
"logits/rejected": 0.6066387295722961, |
|
"logps/chosen": -217.33660888671875, |
|
"logps/rejected": -1914.7127685546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.50474739074707, |
|
"rewards/margins": 15.207026481628418, |
|
"rewards/rejected": -10.702278137207031, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.1983471074380168, |
|
"grad_norm": 0.0014314389375320595, |
|
"learning_rate": 1.7847640781225982e-07, |
|
"logits/chosen": -0.4343634247779846, |
|
"logits/rejected": 0.6130660176277161, |
|
"logps/chosen": -219.28175354003906, |
|
"logps/rejected": -1882.6259765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.440445899963379, |
|
"rewards/margins": 15.030638694763184, |
|
"rewards/rejected": -10.590191841125488, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.1983471074380168, |
|
"eval_logits/chosen": -0.40049922466278076, |
|
"eval_logits/rejected": 0.5949092507362366, |
|
"eval_logps/chosen": -212.68089294433594, |
|
"eval_logps/rejected": -1865.75537109375, |
|
"eval_loss": 2.486165067239199e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 4.511009693145752, |
|
"eval_rewards/margins": 14.746051788330078, |
|
"eval_rewards/rejected": -10.235041618347168, |
|
"eval_runtime": 14.4422, |
|
"eval_samples_per_second": 6.924, |
|
"eval_steps_per_second": 0.9, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 543, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 40662871375872.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|