|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9925373134328357, |
|
"eval_steps": 100, |
|
"global_step": 266, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 593.2823944091797, |
|
"epoch": 0.007462686567164179, |
|
"grad_norm": 0.5037462115287781, |
|
"learning_rate": 3.7037037037037036e-08, |
|
"loss": 0.2424, |
|
"num_tokens": 667405.0, |
|
"reward": 0.18871622439473867, |
|
"reward_std": 0.5178131051361561, |
|
"rewards/accuracy_reward": 0.13169642724096775, |
|
"rewards/cosine_scaled_reward": 0.00010013708379119635, |
|
"rewards/format_reward": 0.05691964435391128, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 629.1518173217773, |
|
"epoch": 0.014925373134328358, |
|
"grad_norm": 0.7031348943710327, |
|
"learning_rate": 7.407407407407407e-08, |
|
"loss": 0.2401, |
|
"num_tokens": 1365053.0, |
|
"reward": 0.20235019456595182, |
|
"reward_std": 0.5161089487373829, |
|
"rewards/accuracy_reward": 0.13616071455180645, |
|
"rewards/cosine_scaled_reward": -0.006355166085995734, |
|
"rewards/format_reward": 0.0725446434225887, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 579.0893096923828, |
|
"epoch": 0.022388059701492536, |
|
"grad_norm": 0.6601409316062927, |
|
"learning_rate": 1.111111111111111e-07, |
|
"loss": 0.2374, |
|
"num_tokens": 2014861.0, |
|
"reward": 0.2147554385010153, |
|
"reward_std": 0.5122785679996014, |
|
"rewards/accuracy_reward": 0.14174107275903225, |
|
"rewards/cosine_scaled_reward": 0.008282212191261351, |
|
"rewards/format_reward": 0.06473214365541935, |
|
"step": 3 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 565.4654350280762, |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 0.4544272720813751, |
|
"learning_rate": 1.4814814814814815e-07, |
|
"loss": 0.2732, |
|
"num_tokens": 2647950.0, |
|
"reward": 0.2221650118008256, |
|
"reward_std": 0.5312090590596199, |
|
"rewards/accuracy_reward": 0.14062500093132257, |
|
"rewards/cosine_scaled_reward": 0.01792393707728479, |
|
"rewards/format_reward": 0.06361607159487903, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 537.1294937133789, |
|
"epoch": 0.03731343283582089, |
|
"grad_norm": 0.4951060116291046, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"loss": 0.2245, |
|
"num_tokens": 3266122.0, |
|
"reward": 0.2845571478828788, |
|
"reward_std": 0.5756800286471844, |
|
"rewards/accuracy_reward": 0.16183035727590322, |
|
"rewards/cosine_scaled_reward": 0.05464643065351993, |
|
"rewards/format_reward": 0.06808035844005644, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 595.4140930175781, |
|
"epoch": 0.04477611940298507, |
|
"grad_norm": 0.43293312191963196, |
|
"learning_rate": 2.222222222222222e-07, |
|
"loss": 0.2237, |
|
"num_tokens": 3928245.0, |
|
"reward": 0.22057450748980045, |
|
"reward_std": 0.5795701257884502, |
|
"rewards/accuracy_reward": 0.1395089291036129, |
|
"rewards/cosine_scaled_reward": 0.011869142268551514, |
|
"rewards/format_reward": 0.06919642933644354, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 557.8248062133789, |
|
"epoch": 0.05223880597014925, |
|
"grad_norm": 0.650478720664978, |
|
"learning_rate": 2.5925925925925923e-07, |
|
"loss": 0.2528, |
|
"num_tokens": 4550280.0, |
|
"reward": 0.23954601865261793, |
|
"reward_std": 0.5432833544909954, |
|
"rewards/accuracy_reward": 0.1372767877765, |
|
"rewards/cosine_scaled_reward": 0.01856386021245271, |
|
"rewards/format_reward": 0.08370535750873387, |
|
"step": 7 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 560.6506958007812, |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 0.46624094247817993, |
|
"learning_rate": 2.962962962962963e-07, |
|
"loss": 0.2368, |
|
"num_tokens": 5196015.0, |
|
"reward": 0.23683909513056278, |
|
"reward_std": 0.5061681233346462, |
|
"rewards/accuracy_reward": 0.15178571362048388, |
|
"rewards/cosine_scaled_reward": 0.024785520159639418, |
|
"rewards/format_reward": 0.06026785750873387, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 571.0123062133789, |
|
"epoch": 0.06716417910447761, |
|
"grad_norm": 0.533889889717102, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.2392, |
|
"num_tokens": 5835498.0, |
|
"reward": 0.23420938570052385, |
|
"reward_std": 0.5464016310870647, |
|
"rewards/accuracy_reward": 0.13839285681024194, |
|
"rewards/cosine_scaled_reward": 0.023271879297681153, |
|
"rewards/format_reward": 0.07254464412108064, |
|
"step": 9 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 573.4230194091797, |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 8.411685943603516, |
|
"learning_rate": 3.703703703703703e-07, |
|
"loss": 0.2067, |
|
"num_tokens": 6479685.0, |
|
"reward": 0.28206104040145874, |
|
"reward_std": 0.5646266750991344, |
|
"rewards/accuracy_reward": 0.16406249906867743, |
|
"rewards/cosine_scaled_reward": 0.03429317264817655, |
|
"rewards/format_reward": 0.08370535681024194, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 545.127254486084, |
|
"epoch": 0.08208955223880597, |
|
"grad_norm": 0.9542278051376343, |
|
"learning_rate": 4.0740740740740737e-07, |
|
"loss": 0.1322, |
|
"num_tokens": 7103751.0, |
|
"reward": 0.31405315548181534, |
|
"reward_std": 0.6051793843507767, |
|
"rewards/accuracy_reward": 0.1629464291036129, |
|
"rewards/cosine_scaled_reward": 0.04619600536534563, |
|
"rewards/format_reward": 0.10491071362048388, |
|
"step": 11 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 613.8381958007812, |
|
"epoch": 0.08955223880597014, |
|
"grad_norm": 0.5434120297431946, |
|
"learning_rate": 4.444444444444444e-07, |
|
"loss": 0.2048, |
|
"num_tokens": 7791062.0, |
|
"reward": 0.26040036062477157, |
|
"reward_std": 0.5332776308059692, |
|
"rewards/accuracy_reward": 0.14174107008147985, |
|
"rewards/cosine_scaled_reward": 0.015980710508301854, |
|
"rewards/format_reward": 0.10267857136204839, |
|
"step": 12 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 515.9442176818848, |
|
"epoch": 0.09701492537313433, |
|
"grad_norm": 0.5895915031433105, |
|
"learning_rate": 4.814814814814814e-07, |
|
"loss": 0.1288, |
|
"num_tokens": 8388340.0, |
|
"reward": 0.40466225892305374, |
|
"reward_std": 0.6555211395025253, |
|
"rewards/accuracy_reward": 0.1919642873108387, |
|
"rewards/cosine_scaled_reward": 0.07988545499392785, |
|
"rewards/format_reward": 0.1328124995343387, |
|
"step": 13 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 584.5747985839844, |
|
"epoch": 0.1044776119402985, |
|
"grad_norm": 1.4778566360473633, |
|
"learning_rate": 5.185185185185185e-07, |
|
"loss": 0.1507, |
|
"num_tokens": 9054239.0, |
|
"reward": 0.2996965404599905, |
|
"reward_std": 0.5809138379991055, |
|
"rewards/accuracy_reward": 0.11941964272409678, |
|
"rewards/cosine_scaled_reward": -0.00722311669960618, |
|
"rewards/format_reward": 0.1875, |
|
"step": 14 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 620.6741371154785, |
|
"epoch": 0.11194029850746269, |
|
"grad_norm": 2.612856149673462, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.2128, |
|
"num_tokens": 9741539.0, |
|
"reward": 0.34804879780858755, |
|
"reward_std": 0.6096060052514076, |
|
"rewards/accuracy_reward": 0.14062499906867743, |
|
"rewards/cosine_scaled_reward": 0.010995208285748959, |
|
"rewards/format_reward": 0.19642856996506453, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 603.7500228881836, |
|
"epoch": 0.11940298507462686, |
|
"grad_norm": 1.191655158996582, |
|
"learning_rate": 5.925925925925926e-07, |
|
"loss": 0.1818, |
|
"num_tokens": 10413043.0, |
|
"reward": 0.3713220842182636, |
|
"reward_std": 0.639706090092659, |
|
"rewards/accuracy_reward": 0.13950893003493547, |
|
"rewards/cosine_scaled_reward": 0.011947068211156875, |
|
"rewards/format_reward": 0.2198660708963871, |
|
"step": 16 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 544.5558242797852, |
|
"epoch": 0.12686567164179105, |
|
"grad_norm": 0.6938550472259521, |
|
"learning_rate": 6.296296296296296e-07, |
|
"loss": 0.085, |
|
"num_tokens": 11037421.0, |
|
"reward": 0.5226609222590923, |
|
"reward_std": 0.7315020114183426, |
|
"rewards/accuracy_reward": 0.17410713899880648, |
|
"rewards/cosine_scaled_reward": 0.05502696509938687, |
|
"rewards/format_reward": 0.2935267835855484, |
|
"step": 17 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 571.3337364196777, |
|
"epoch": 0.13432835820895522, |
|
"grad_norm": 1.5410027503967285, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.1118, |
|
"num_tokens": 11676216.0, |
|
"reward": 0.591816034168005, |
|
"reward_std": 0.7450486496090889, |
|
"rewards/accuracy_reward": 0.17633928451687098, |
|
"rewards/cosine_scaled_reward": 0.05163742566946894, |
|
"rewards/format_reward": 0.3638392873108387, |
|
"step": 18 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 549.0669898986816, |
|
"epoch": 0.1417910447761194, |
|
"grad_norm": 0.5230954885482788, |
|
"learning_rate": 7.037037037037037e-07, |
|
"loss": 0.0718, |
|
"num_tokens": 12316516.0, |
|
"reward": 0.7351889088749886, |
|
"reward_std": 0.7607561945915222, |
|
"rewards/accuracy_reward": 0.1893028812482953, |
|
"rewards/cosine_scaled_reward": 0.08228707825765014, |
|
"rewards/format_reward": 0.4654017835855484, |
|
"step": 19 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 553.9040451049805, |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 1.0374072790145874, |
|
"learning_rate": 7.407407407407406e-07, |
|
"loss": 0.0991, |
|
"num_tokens": 12945302.0, |
|
"reward": 0.6941376700997353, |
|
"reward_std": 0.730622187256813, |
|
"rewards/accuracy_reward": 0.13281249720603228, |
|
"rewards/cosine_scaled_reward": 0.027843003364978358, |
|
"rewards/format_reward": 0.5334821417927742, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 513.1774749755859, |
|
"epoch": 0.15671641791044777, |
|
"grad_norm": 0.46707218885421753, |
|
"learning_rate": 7.777777777777778e-07, |
|
"loss": 0.0643, |
|
"num_tokens": 13534109.0, |
|
"reward": 0.8666251823306084, |
|
"reward_std": 0.7560148313641548, |
|
"rewards/accuracy_reward": 0.1741071422584355, |
|
"rewards/cosine_scaled_reward": 0.07198226451873779, |
|
"rewards/format_reward": 0.6205357164144516, |
|
"step": 21 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 577.2232398986816, |
|
"epoch": 0.16417910447761194, |
|
"grad_norm": 0.7016672492027283, |
|
"learning_rate": 8.148148148148147e-07, |
|
"loss": 0.1127, |
|
"num_tokens": 14194037.0, |
|
"reward": 0.8435313403606415, |
|
"reward_std": 0.7020122557878494, |
|
"rewards/accuracy_reward": 0.14174107182770967, |
|
"rewards/cosine_scaled_reward": 0.02098666892379697, |
|
"rewards/format_reward": 0.6808035746216774, |
|
"step": 22 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 502.9531440734863, |
|
"epoch": 0.17164179104477612, |
|
"grad_norm": 0.35482147336006165, |
|
"learning_rate": 8.518518518518518e-07, |
|
"loss": 0.1307, |
|
"num_tokens": 14768411.0, |
|
"reward": 1.1029141992330551, |
|
"reward_std": 0.7098172605037689, |
|
"rewards/accuracy_reward": 0.22544642724096775, |
|
"rewards/cosine_scaled_reward": 0.11742306314408779, |
|
"rewards/format_reward": 0.7600446492433548, |
|
"step": 23 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 512.8035926818848, |
|
"epoch": 0.1791044776119403, |
|
"grad_norm": 0.32759323716163635, |
|
"learning_rate": 8.888888888888888e-07, |
|
"loss": 0.0876, |
|
"num_tokens": 15351427.0, |
|
"reward": 1.149243749678135, |
|
"reward_std": 0.7510530278086662, |
|
"rewards/accuracy_reward": 0.21205356903374195, |
|
"rewards/cosine_scaled_reward": 0.1124133332632482, |
|
"rewards/format_reward": 0.8247767984867096, |
|
"step": 24 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 535.4419860839844, |
|
"epoch": 0.1865671641791045, |
|
"grad_norm": 0.3772229850292206, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.1594, |
|
"num_tokens": 15974879.0, |
|
"reward": 1.1618424132466316, |
|
"reward_std": 0.6563375778496265, |
|
"rewards/accuracy_reward": 0.20535714086145163, |
|
"rewards/cosine_scaled_reward": 0.09934233513195068, |
|
"rewards/format_reward": 0.8571428582072258, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 557.8861923217773, |
|
"epoch": 0.19402985074626866, |
|
"grad_norm": 0.291864275932312, |
|
"learning_rate": 9.629629629629628e-07, |
|
"loss": 0.0932, |
|
"num_tokens": 16604401.0, |
|
"reward": 1.2150916159152985, |
|
"reward_std": 0.7205987647175789, |
|
"rewards/accuracy_reward": 0.22656249813735485, |
|
"rewards/cosine_scaled_reward": 0.12022545811487362, |
|
"rewards/format_reward": 0.8683035597205162, |
|
"step": 26 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 512.9129600524902, |
|
"epoch": 0.20149253731343283, |
|
"grad_norm": 0.31357285380363464, |
|
"learning_rate": 1e-06, |
|
"loss": 0.1004, |
|
"num_tokens": 17195475.0, |
|
"reward": 1.3348890244960785, |
|
"reward_std": 0.6589159071445465, |
|
"rewards/accuracy_reward": 0.26897321455180645, |
|
"rewards/cosine_scaled_reward": 0.1685942793264985, |
|
"rewards/format_reward": 0.8973214253783226, |
|
"step": 27 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 532.2611846923828, |
|
"epoch": 0.208955223880597, |
|
"grad_norm": 0.3403704762458801, |
|
"learning_rate": 9.999575185316993e-07, |
|
"loss": 0.1619, |
|
"num_tokens": 17811437.0, |
|
"reward": 1.2805243134498596, |
|
"reward_std": 0.6447809338569641, |
|
"rewards/accuracy_reward": 0.24441963993012905, |
|
"rewards/cosine_scaled_reward": 0.12873857002705336, |
|
"rewards/format_reward": 0.9073660746216774, |
|
"step": 28 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 506.21988677978516, |
|
"epoch": 0.21641791044776118, |
|
"grad_norm": 0.35326462984085083, |
|
"learning_rate": 9.99830081345498e-07, |
|
"loss": 0.1134, |
|
"num_tokens": 18408722.0, |
|
"reward": 1.336455225944519, |
|
"reward_std": 0.6456731334328651, |
|
"rewards/accuracy_reward": 0.25000000186264515, |
|
"rewards/cosine_scaled_reward": 0.15565160103142262, |
|
"rewards/format_reward": 0.9308035746216774, |
|
"step": 29 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 503.8482475280762, |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 0.26988422870635986, |
|
"learning_rate": 9.996177100962712e-07, |
|
"loss": 0.0995, |
|
"num_tokens": 18986002.0, |
|
"reward": 1.4584019258618355, |
|
"reward_std": 0.6846916638314724, |
|
"rewards/accuracy_reward": 0.315848215483129, |
|
"rewards/cosine_scaled_reward": 0.21733042690902948, |
|
"rewards/format_reward": 0.9252232164144516, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 471.99778747558594, |
|
"epoch": 0.23134328358208955, |
|
"grad_norm": 0.3263660669326782, |
|
"learning_rate": 9.99320440871389e-07, |
|
"loss": 0.1279, |
|
"num_tokens": 19548200.0, |
|
"reward": 1.4953693896532059, |
|
"reward_std": 0.7131579741835594, |
|
"rewards/accuracy_reward": 0.32142856903374195, |
|
"rewards/cosine_scaled_reward": 0.2308604083955288, |
|
"rewards/format_reward": 0.9430803582072258, |
|
"step": 31 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 494.79801177978516, |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 0.32770803570747375, |
|
"learning_rate": 9.989383241845837e-07, |
|
"loss": 0.0804, |
|
"num_tokens": 20116083.0, |
|
"reward": 1.599047303199768, |
|
"reward_std": 0.754006952047348, |
|
"rewards/accuracy_reward": 0.3761160746216774, |
|
"rewards/cosine_scaled_reward": 0.28208293952047825, |
|
"rewards/format_reward": 0.9408482015132904, |
|
"step": 32 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 531.2064971923828, |
|
"epoch": 0.2462686567164179, |
|
"grad_norm": 0.27149882912635803, |
|
"learning_rate": 9.984714249673673e-07, |
|
"loss": 0.1024, |
|
"num_tokens": 20746676.0, |
|
"reward": 1.6310840100049973, |
|
"reward_std": 0.6816431954503059, |
|
"rewards/accuracy_reward": 0.3861607164144516, |
|
"rewards/cosine_scaled_reward": 0.27952144481241703, |
|
"rewards/format_reward": 0.9654017835855484, |
|
"step": 33 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 514.7946662902832, |
|
"epoch": 0.2537313432835821, |
|
"grad_norm": 0.27265796065330505, |
|
"learning_rate": 9.979198225579968e-07, |
|
"loss": 0.1376, |
|
"num_tokens": 21335188.0, |
|
"reward": 1.7159467786550522, |
|
"reward_std": 0.6568828374147415, |
|
"rewards/accuracy_reward": 0.4274553544819355, |
|
"rewards/cosine_scaled_reward": 0.3186252359300852, |
|
"rewards/format_reward": 0.9698660746216774, |
|
"step": 34 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 546.0234527587891, |
|
"epoch": 0.26119402985074625, |
|
"grad_norm": 0.2513149380683899, |
|
"learning_rate": 9.972836106879934e-07, |
|
"loss": 0.1169, |
|
"num_tokens": 21950753.0, |
|
"reward": 1.686128944158554, |
|
"reward_std": 0.6522306874394417, |
|
"rewards/accuracy_reward": 0.4196428582072258, |
|
"rewards/cosine_scaled_reward": 0.316709216684103, |
|
"rewards/format_reward": 0.9497767835855484, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 481.2410888671875, |
|
"epoch": 0.26865671641791045, |
|
"grad_norm": 0.2620702385902405, |
|
"learning_rate": 9.965628974662144e-07, |
|
"loss": 0.1147, |
|
"num_tokens": 22503649.0, |
|
"reward": 1.9226552546024323, |
|
"reward_std": 0.6497415080666542, |
|
"rewards/accuracy_reward": 0.5256696417927742, |
|
"rewards/cosine_scaled_reward": 0.4248872734606266, |
|
"rewards/format_reward": 0.9720982164144516, |
|
"step": 36 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 523.6540451049805, |
|
"epoch": 0.27611940298507465, |
|
"grad_norm": 0.2901047468185425, |
|
"learning_rate": 9.957578053604837e-07, |
|
"loss": 0.155, |
|
"num_tokens": 23097323.0, |
|
"reward": 1.9160521030426025, |
|
"reward_std": 0.5634343735873699, |
|
"rewards/accuracy_reward": 0.5234375074505806, |
|
"rewards/cosine_scaled_reward": 0.4238645453006029, |
|
"rewards/format_reward": 0.96875, |
|
"step": 37 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 471.8593978881836, |
|
"epoch": 0.2835820895522388, |
|
"grad_norm": 0.2969004511833191, |
|
"learning_rate": 9.948684711767799e-07, |
|
"loss": 0.1299, |
|
"num_tokens": 23653853.0, |
|
"reward": 1.9379696995019913, |
|
"reward_std": 0.4608934037387371, |
|
"rewards/accuracy_reward": 0.5189732126891613, |
|
"rewards/cosine_scaled_reward": 0.434621412307024, |
|
"rewards/format_reward": 0.984375, |
|
"step": 38 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 532.287971496582, |
|
"epoch": 0.291044776119403, |
|
"grad_norm": 0.2593567669391632, |
|
"learning_rate": 9.938950460359912e-07, |
|
"loss": 0.1593, |
|
"num_tokens": 24272495.0, |
|
"reward": 1.7254538089036942, |
|
"reward_std": 0.5551125332713127, |
|
"rewards/accuracy_reward": 0.4196428656578064, |
|
"rewards/cosine_scaled_reward": 0.32478404976427555, |
|
"rewards/format_reward": 0.9810267761349678, |
|
"step": 39 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 484.3884086608887, |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.279813677072525, |
|
"learning_rate": 9.928376953482342e-07, |
|
"loss": 0.1591, |
|
"num_tokens": 24837707.0, |
|
"reward": 1.905771628022194, |
|
"reward_std": 0.4304558988660574, |
|
"rewards/accuracy_reward": 0.5066964253783226, |
|
"rewards/cosine_scaled_reward": 0.42139650508761406, |
|
"rewards/format_reward": 0.9776785671710968, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 467.0167579650879, |
|
"epoch": 0.30597014925373134, |
|
"grad_norm": 0.486409068107605, |
|
"learning_rate": 9.916965987847484e-07, |
|
"loss": 0.1263, |
|
"num_tokens": 25387114.0, |
|
"reward": 1.8283725529909134, |
|
"reward_std": 0.5463476590812206, |
|
"rewards/accuracy_reward": 0.4620535746216774, |
|
"rewards/cosine_scaled_reward": 0.38417606614530087, |
|
"rewards/format_reward": 0.9821428582072258, |
|
"step": 41 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 461.70984268188477, |
|
"epoch": 0.31343283582089554, |
|
"grad_norm": 0.30666935443878174, |
|
"learning_rate": 9.904719502473632e-07, |
|
"loss": 0.1408, |
|
"num_tokens": 25937686.0, |
|
"reward": 1.784839078783989, |
|
"reward_std": 0.5817533135414124, |
|
"rewards/accuracy_reward": 0.4464285708963871, |
|
"rewards/cosine_scaled_reward": 0.3629639744758606, |
|
"rewards/format_reward": 0.9754464328289032, |
|
"step": 42 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 387.7968940734863, |
|
"epoch": 0.3208955223880597, |
|
"grad_norm": 0.3015042841434479, |
|
"learning_rate": 9.89163957835551e-07, |
|
"loss": 0.1362, |
|
"num_tokens": 26423936.0, |
|
"reward": 1.9611081928014755, |
|
"reward_std": 0.5745424814522266, |
|
"rewards/accuracy_reward": 0.5156250037252903, |
|
"rewards/cosine_scaled_reward": 0.45664383843541145, |
|
"rewards/format_reward": 0.9888392761349678, |
|
"step": 43 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 348.5122871398926, |
|
"epoch": 0.3283582089552239, |
|
"grad_norm": 0.34659165143966675, |
|
"learning_rate": 9.877728438110645e-07, |
|
"loss": 0.1396, |
|
"num_tokens": 26857179.0, |
|
"reward": 1.9145096093416214, |
|
"reward_std": 0.5022773817181587, |
|
"rewards/accuracy_reward": 0.4955357201397419, |
|
"rewards/cosine_scaled_reward": 0.44241129234433174, |
|
"rewards/format_reward": 0.9765625, |
|
"step": 44 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 332.58372497558594, |
|
"epoch": 0.3358208955223881, |
|
"grad_norm": 0.43583589792251587, |
|
"learning_rate": 9.862988445601687e-07, |
|
"loss": 0.169, |
|
"num_tokens": 27290358.0, |
|
"reward": 1.7086158692836761, |
|
"reward_std": 0.429446816444397, |
|
"rewards/accuracy_reward": 0.3816964291036129, |
|
"rewards/cosine_scaled_reward": 0.3414282575249672, |
|
"rewards/format_reward": 0.9854910671710968, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 278.4453239440918, |
|
"epoch": 0.34328358208955223, |
|
"grad_norm": 0.46366527676582336, |
|
"learning_rate": 9.847422105534737e-07, |
|
"loss": 0.1147, |
|
"num_tokens": 27683117.0, |
|
"reward": 1.891074076294899, |
|
"reward_std": 0.536506325006485, |
|
"rewards/accuracy_reward": 0.4654017798602581, |
|
"rewards/cosine_scaled_reward": 0.43236861005425453, |
|
"rewards/format_reward": 0.9933035597205162, |
|
"step": 46 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 242.423002243042, |
|
"epoch": 0.35074626865671643, |
|
"grad_norm": 0.5321322679519653, |
|
"learning_rate": 9.831032063033724e-07, |
|
"loss": 0.113, |
|
"num_tokens": 28037664.0, |
|
"reward": 1.8316063284873962, |
|
"reward_std": 0.5450388044118881, |
|
"rewards/accuracy_reward": 0.4308035634458065, |
|
"rewards/cosine_scaled_reward": 0.4063830114901066, |
|
"rewards/format_reward": 0.9944196417927742, |
|
"step": 47 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 193.76898002624512, |
|
"epoch": 0.3582089552238806, |
|
"grad_norm": 0.5765193104743958, |
|
"learning_rate": 9.813821103190931e-07, |
|
"loss": 0.1659, |
|
"num_tokens": 28342873.0, |
|
"reward": 1.669696494936943, |
|
"reward_std": 0.4199746139347553, |
|
"rewards/accuracy_reward": 0.3482142835855484, |
|
"rewards/cosine_scaled_reward": 0.3315267227590084, |
|
"rewards/format_reward": 0.9899553507566452, |
|
"step": 48 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 158.8895149230957, |
|
"epoch": 0.3656716417910448, |
|
"grad_norm": 1.0433906316757202, |
|
"learning_rate": 9.795792150593738e-07, |
|
"loss": 0.135, |
|
"num_tokens": 28625046.0, |
|
"reward": 1.7185450494289398, |
|
"reward_std": 0.4323030523955822, |
|
"rewards/accuracy_reward": 0.3727678544819355, |
|
"rewards/cosine_scaled_reward": 0.3614020850509405, |
|
"rewards/format_reward": 0.9843749925494194, |
|
"step": 49 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 140.64509391784668, |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 2.962268829345703, |
|
"learning_rate": 9.776948268827657e-07, |
|
"loss": 0.1502, |
|
"num_tokens": 28884872.0, |
|
"reward": 1.5966612845659256, |
|
"reward_std": 0.47611169144511223, |
|
"rewards/accuracy_reward": 0.3058035708963871, |
|
"rewards/cosine_scaled_reward": 0.29755405336618423, |
|
"rewards/format_reward": 0.9933035597205162, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 110.19531726837158, |
|
"epoch": 0.3805970149253731, |
|
"grad_norm": 1.2034224271774292, |
|
"learning_rate": 9.757292659955754e-07, |
|
"loss": 0.1468, |
|
"num_tokens": 29105703.0, |
|
"reward": 1.6452730596065521, |
|
"reward_std": 0.4458727203309536, |
|
"rewards/accuracy_reward": 0.32924107275903225, |
|
"rewards/cosine_scaled_reward": 0.32384440395981073, |
|
"rewards/format_reward": 0.9921874925494194, |
|
"step": 51 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 88.12277317047119, |
|
"epoch": 0.3880597014925373, |
|
"grad_norm": 1.8556318283081055, |
|
"learning_rate": 9.736828663974526e-07, |
|
"loss": 0.1886, |
|
"num_tokens": 29322037.0, |
|
"reward": 1.5704896599054337, |
|
"reward_std": 0.4764312729239464, |
|
"rewards/accuracy_reward": 0.29017857275903225, |
|
"rewards/cosine_scaled_reward": 0.28700744174420834, |
|
"rewards/format_reward": 0.9933035671710968, |
|
"step": 52 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 69.49553918838501, |
|
"epoch": 0.39552238805970147, |
|
"grad_norm": 1.8629873991012573, |
|
"learning_rate": 9.715559758246361e-07, |
|
"loss": 0.1698, |
|
"num_tokens": 29519473.0, |
|
"reward": 1.5159788131713867, |
|
"reward_std": 0.4448518790304661, |
|
"rewards/accuracy_reward": 0.261160715483129, |
|
"rewards/cosine_scaled_reward": 0.2581662777811289, |
|
"rewards/format_reward": 0.9966517761349678, |
|
"step": 53 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 53.76785898208618, |
|
"epoch": 0.40298507462686567, |
|
"grad_norm": 2.3949267864227295, |
|
"learning_rate": 9.69348955690864e-07, |
|
"loss": 0.1639, |
|
"num_tokens": 29700337.0, |
|
"reward": 1.477759376168251, |
|
"reward_std": 0.3300577197223902, |
|
"rewards/accuracy_reward": 0.2410714291036129, |
|
"rewards/cosine_scaled_reward": 0.2400361318141222, |
|
"rewards/format_reward": 0.9966517761349678, |
|
"step": 54 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 41.593751430511475, |
|
"epoch": 0.41044776119402987, |
|
"grad_norm": 6.706897258758545, |
|
"learning_rate": 9.670621810259594e-07, |
|
"loss": 0.107, |
|
"num_tokens": 29859917.0, |
|
"reward": 1.6255246251821518, |
|
"reward_std": 0.3502213731408119, |
|
"rewards/accuracy_reward": 0.31473214365541935, |
|
"rewards/cosine_scaled_reward": 0.3141406271606684, |
|
"rewards/format_reward": 0.9966517761349678, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 36.96540403366089, |
|
"epoch": 0.417910447761194, |
|
"grad_norm": 3.161987781524658, |
|
"learning_rate": 9.64696040412104e-07, |
|
"loss": 0.1082, |
|
"num_tokens": 30023526.0, |
|
"reward": 1.4321366250514984, |
|
"reward_std": 0.3272698614746332, |
|
"rewards/accuracy_reward": 0.21986606623977423, |
|
"rewards/cosine_scaled_reward": 0.21896691620349884, |
|
"rewards/format_reward": 0.9933035746216774, |
|
"step": 56 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 28.55915331840515, |
|
"epoch": 0.4253731343283582, |
|
"grad_norm": 2.9937174320220947, |
|
"learning_rate": 9.62250935917808e-07, |
|
"loss": 0.0479, |
|
"num_tokens": 30176771.0, |
|
"reward": 1.4093515276908875, |
|
"reward_std": 0.22921365313231945, |
|
"rewards/accuracy_reward": 0.20535714086145163, |
|
"rewards/cosine_scaled_reward": 0.20511038601398468, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 57 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 26.62834930419922, |
|
"epoch": 0.43283582089552236, |
|
"grad_norm": 3.9241106510162354, |
|
"learning_rate": 9.597272830295876e-07, |
|
"loss": 0.035, |
|
"num_tokens": 30335774.0, |
|
"reward": 1.479707032442093, |
|
"reward_std": 0.24030436016619205, |
|
"rewards/accuracy_reward": 0.24107142724096775, |
|
"rewards/cosine_scaled_reward": 0.24086768366396427, |
|
"rewards/format_reward": 0.9977678507566452, |
|
"step": 58 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 25.078126192092896, |
|
"epoch": 0.44029850746268656, |
|
"grad_norm": 3.5471527576446533, |
|
"learning_rate": 9.57125510581363e-07, |
|
"loss": 0.023, |
|
"num_tokens": 30487948.0, |
|
"reward": 1.6225911229848862, |
|
"reward_std": 0.31288249231874943, |
|
"rewards/accuracy_reward": 0.31361607275903225, |
|
"rewards/cosine_scaled_reward": 0.31120707653462887, |
|
"rewards/format_reward": 0.9977678507566452, |
|
"step": 59 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 24.590402603149414, |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 2.711862802505493, |
|
"learning_rate": 9.5444606068159e-07, |
|
"loss": 0.0366, |
|
"num_tokens": 30641445.0, |
|
"reward": 1.433879777789116, |
|
"reward_std": 0.2141956863924861, |
|
"rewards/accuracy_reward": 0.21763392444700003, |
|
"rewards/cosine_scaled_reward": 0.21624577604234219, |
|
"rewards/format_reward": 1.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 23.093750953674316, |
|
"epoch": 0.4552238805970149, |
|
"grad_norm": 2.6093637943267822, |
|
"learning_rate": 9.516893886381321e-07, |
|
"loss": 0.0178, |
|
"num_tokens": 30803937.0, |
|
"reward": 1.3692706674337387, |
|
"reward_std": 0.1967663299292326, |
|
"rewards/accuracy_reward": 0.1863839286379516, |
|
"rewards/cosine_scaled_reward": 0.18623489327728748, |
|
"rewards/format_reward": 0.9966517761349678, |
|
"step": 61 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 22.19084930419922, |
|
"epoch": 0.4626865671641791, |
|
"grad_norm": 2.131438732147217, |
|
"learning_rate": 9.488559628808938e-07, |
|
"loss": 0.0064, |
|
"num_tokens": 30953740.0, |
|
"reward": 1.422857090830803, |
|
"reward_std": 0.20307728182524443, |
|
"rewards/accuracy_reward": 0.21205356996506453, |
|
"rewards/cosine_scaled_reward": 0.21191950421780348, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 62 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 22.098215341567993, |
|
"epoch": 0.4701492537313433, |
|
"grad_norm": 2.0213825702667236, |
|
"learning_rate": 9.459462648822207e-07, |
|
"loss": 0.0076, |
|
"num_tokens": 31112844.0, |
|
"reward": 1.4250895529985428, |
|
"reward_std": 0.2019376672008093, |
|
"rewards/accuracy_reward": 0.2131696455180645, |
|
"rewards/cosine_scaled_reward": 0.21303593553602695, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 63 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 22.00111722946167, |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 2.22880220413208, |
|
"learning_rate": 9.429607890750862e-07, |
|
"loss": 0.0049, |
|
"num_tokens": 31274853.0, |
|
"reward": 1.5980830639600754, |
|
"reward_std": 0.21290546283125877, |
|
"rewards/accuracy_reward": 0.29910714365541935, |
|
"rewards/cosine_scaled_reward": 0.29897585324943066, |
|
"rewards/format_reward": 1.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.63169765472412, |
|
"epoch": 0.48507462686567165, |
|
"grad_norm": 2.368590831756592, |
|
"learning_rate": 9.399000427690734e-07, |
|
"loss": 0.007, |
|
"num_tokens": 31425235.0, |
|
"reward": 1.475319281220436, |
|
"reward_std": 0.19503124710172415, |
|
"rewards/accuracy_reward": 0.2388392835855484, |
|
"rewards/cosine_scaled_reward": 0.2375959688797593, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.521206617355347, |
|
"epoch": 0.4925373134328358, |
|
"grad_norm": 1.893410325050354, |
|
"learning_rate": 9.367645460641714e-07, |
|
"loss": 0.0017, |
|
"num_tokens": 31578222.0, |
|
"reward": 1.4797853082418442, |
|
"reward_std": 0.20832678768783808, |
|
"rewards/accuracy_reward": 0.23995535541325808, |
|
"rewards/cosine_scaled_reward": 0.23982988391071558, |
|
"rewards/format_reward": 1.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.70647430419922, |
|
"epoch": 0.5, |
|
"grad_norm": 2.0883381366729736, |
|
"learning_rate": 9.335548317623956e-07, |
|
"loss": 0.002, |
|
"num_tokens": 31726287.0, |
|
"reward": 1.573533684015274, |
|
"reward_std": 0.2080208584666252, |
|
"rewards/accuracy_reward": 0.2868303582072258, |
|
"rewards/cosine_scaled_reward": 0.28670324943959713, |
|
"rewards/format_reward": 1.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.202009916305542, |
|
"epoch": 0.5074626865671642, |
|
"grad_norm": 2.551394462585449, |
|
"learning_rate": 9.302714452772514e-07, |
|
"loss": 0.0052, |
|
"num_tokens": 31876548.0, |
|
"reward": 1.4675123244524002, |
|
"reward_std": 0.1835378697142005, |
|
"rewards/accuracy_reward": 0.2343750037252903, |
|
"rewards/cosine_scaled_reward": 0.23425334133207798, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 68 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.535715103149414, |
|
"epoch": 0.5149253731343284, |
|
"grad_norm": 2.6030776500701904, |
|
"learning_rate": 9.269149445410544e-07, |
|
"loss": 0.0046, |
|
"num_tokens": 32020812.0, |
|
"reward": 1.5433960407972336, |
|
"reward_std": 0.20367479603737593, |
|
"rewards/accuracy_reward": 0.2723214318975806, |
|
"rewards/cosine_scaled_reward": 0.2721905801445246, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 69 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.599331617355347, |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 2.546696424484253, |
|
"learning_rate": 9.23485899910123e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 32169237.0, |
|
"reward": 1.4831460118293762, |
|
"reward_std": 0.17036813125014305, |
|
"rewards/accuracy_reward": 0.24029876478016376, |
|
"rewards/cosine_scaled_reward": 0.2465388011187315, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.520090103149414, |
|
"epoch": 0.5298507462686567, |
|
"grad_norm": 2.1121726036071777, |
|
"learning_rate": 9.199848940678605e-07, |
|
"loss": 0.0011, |
|
"num_tokens": 32314543.0, |
|
"reward": 1.5132792741060257, |
|
"reward_std": 0.1777313705533743, |
|
"rewards/accuracy_reward": 0.25669643096625805, |
|
"rewards/cosine_scaled_reward": 0.25658278726041317, |
|
"rewards/format_reward": 1.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.00334882736206, |
|
"epoch": 0.5373134328358209, |
|
"grad_norm": 3.4874420166015625, |
|
"learning_rate": 9.164125219257417e-07, |
|
"loss": 0.0042, |
|
"num_tokens": 32464130.0, |
|
"reward": 1.4764407873153687, |
|
"reward_std": 0.17472796607762575, |
|
"rewards/accuracy_reward": 0.23883928544819355, |
|
"rewards/cosine_scaled_reward": 0.23871749639511108, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 72 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.36049222946167, |
|
"epoch": 0.5447761194029851, |
|
"grad_norm": 2.506866931915283, |
|
"learning_rate": 9.127693905222223e-07, |
|
"loss": -0.0017, |
|
"num_tokens": 32614685.0, |
|
"reward": 1.508817195892334, |
|
"reward_std": 0.1598520427942276, |
|
"rewards/accuracy_reward": 0.2555803544819355, |
|
"rewards/cosine_scaled_reward": 0.2554689049720764, |
|
"rewards/format_reward": 0.9977678507566452, |
|
"step": 73 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.82366180419922, |
|
"epoch": 0.5522388059701493, |
|
"grad_norm": 2.803783893585205, |
|
"learning_rate": 9.090561189195869e-07, |
|
"loss": 0.0032, |
|
"num_tokens": 32764399.0, |
|
"reward": 1.5980816781520844, |
|
"reward_std": 0.1671485211700201, |
|
"rewards/accuracy_reward": 0.29910713993012905, |
|
"rewards/cosine_scaled_reward": 0.2989744786173105, |
|
"rewards/format_reward": 1.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.162947416305542, |
|
"epoch": 0.5597014925373134, |
|
"grad_norm": 2.7349565029144287, |
|
"learning_rate": 9.052733380987554e-07, |
|
"loss": -0.0034, |
|
"num_tokens": 32921881.0, |
|
"reward": 1.5400699526071548, |
|
"reward_std": 0.15323490625996783, |
|
"rewards/accuracy_reward": 0.2700892873108387, |
|
"rewards/cosine_scaled_reward": 0.26998060569167137, |
|
"rewards/format_reward": 1.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.48549175262451, |
|
"epoch": 0.5671641791044776, |
|
"grad_norm": 2.8505167961120605, |
|
"learning_rate": 9.014216908520618e-07, |
|
"loss": 0.0031, |
|
"num_tokens": 33073868.0, |
|
"reward": 1.4675205424427986, |
|
"reward_std": 0.18623241062249463, |
|
"rewards/accuracy_reward": 0.23437499813735485, |
|
"rewards/cosine_scaled_reward": 0.23426154493154172, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 76 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.43861675262451, |
|
"epoch": 0.5746268656716418, |
|
"grad_norm": 2.716010332107544, |
|
"learning_rate": 8.975018316740277e-07, |
|
"loss": 0.0014, |
|
"num_tokens": 33228253.0, |
|
"reward": 1.3793513923883438, |
|
"reward_std": 0.13768241831448336, |
|
"rewards/accuracy_reward": 0.18973214644938707, |
|
"rewards/cosine_scaled_reward": 0.1896191742271185, |
|
"rewards/format_reward": 1.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.793527603149414, |
|
"epoch": 0.582089552238806, |
|
"grad_norm": 2.40360164642334, |
|
"learning_rate": 8.935144266501468e-07, |
|
"loss": 0.0032, |
|
"num_tokens": 33381900.0, |
|
"reward": 1.5244351625442505, |
|
"reward_std": 0.11904470889763274, |
|
"rewards/accuracy_reward": 0.26227678917348385, |
|
"rewards/cosine_scaled_reward": 0.2621582942083478, |
|
"rewards/format_reward": 1.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.420759677886963, |
|
"epoch": 0.5895522388059702, |
|
"grad_norm": 1.7719311714172363, |
|
"learning_rate": 8.894601533436998e-07, |
|
"loss": 0.0, |
|
"num_tokens": 33529069.0, |
|
"reward": 1.4775669872760773, |
|
"reward_std": 0.08695766101230618, |
|
"rewards/accuracy_reward": 0.238839291036129, |
|
"rewards/cosine_scaled_reward": 0.23872762825340033, |
|
"rewards/format_reward": 1.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.217634916305542, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 2.443143129348755, |
|
"learning_rate": 8.853397006806181e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 33688720.0, |
|
"reward": 1.513253703713417, |
|
"reward_std": 0.07582757750845559, |
|
"rewards/accuracy_reward": 0.25669642724096775, |
|
"rewards/cosine_scaled_reward": 0.25655714236199856, |
|
"rewards/format_reward": 1.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.36495614051819, |
|
"epoch": 0.6044776119402985, |
|
"grad_norm": 2.328760862350464, |
|
"learning_rate": 8.811537688324187e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 33841447.0, |
|
"reward": 1.5244421511888504, |
|
"reward_std": 0.14594503585249186, |
|
"rewards/accuracy_reward": 0.2622767873108387, |
|
"rewards/cosine_scaled_reward": 0.2621652837842703, |
|
"rewards/format_reward": 1.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.29017925262451, |
|
"epoch": 0.6119402985074627, |
|
"grad_norm": 2.725046396255493, |
|
"learning_rate": 8.769030690972261e-07, |
|
"loss": 0.001, |
|
"num_tokens": 33983067.0, |
|
"reward": 1.6338183134794235, |
|
"reward_std": 0.10190565621059022, |
|
"rewards/accuracy_reward": 0.31696428544819355, |
|
"rewards/cosine_scaled_reward": 0.31685397773981094, |
|
"rewards/format_reward": 1.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.447545528411865, |
|
"epoch": 0.6194029850746269, |
|
"grad_norm": 2.60213565826416, |
|
"learning_rate": 8.725883237789044e-07, |
|
"loss": 0.0015, |
|
"num_tokens": 34142804.0, |
|
"reward": 1.578012928366661, |
|
"reward_std": 0.13737911762410704, |
|
"rewards/accuracy_reward": 0.28906249813735485, |
|
"rewards/cosine_scaled_reward": 0.2889503873884678, |
|
"rewards/format_reward": 1.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.629465103149414, |
|
"epoch": 0.6268656716417911, |
|
"grad_norm": 3.175710439682007, |
|
"learning_rate": 8.682102660643195e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 34285752.0, |
|
"reward": 1.6293497383594513, |
|
"reward_std": 0.1396320709803831, |
|
"rewards/accuracy_reward": 0.3147321417927742, |
|
"rewards/cosine_scaled_reward": 0.31461753230541945, |
|
"rewards/format_reward": 1.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.609375715255737, |
|
"epoch": 0.6343283582089553, |
|
"grad_norm": 1.994175672531128, |
|
"learning_rate": 8.637696398987515e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 34437546.0, |
|
"reward": 1.4998853504657745, |
|
"reward_std": 0.11160349007695913, |
|
"rewards/accuracy_reward": 0.25, |
|
"rewards/cosine_scaled_reward": 0.2498853299766779, |
|
"rewards/format_reward": 1.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.772322177886963, |
|
"epoch": 0.6417910447761194, |
|
"grad_norm": 2.2707390785217285, |
|
"learning_rate": 8.592671998594793e-07, |
|
"loss": 0.0014, |
|
"num_tokens": 34590166.0, |
|
"reward": 1.569079726934433, |
|
"reward_std": 0.12746261023711725, |
|
"rewards/accuracy_reward": 0.2845982136204839, |
|
"rewards/cosine_scaled_reward": 0.2844814406707883, |
|
"rewards/format_reward": 1.0, |
|
"step": 86 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.42522406578064, |
|
"epoch": 0.6492537313432836, |
|
"grad_norm": 3.1332757472991943, |
|
"learning_rate": 8.547037110275579e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 34732587.0, |
|
"reward": 1.5824772864580154, |
|
"reward_std": 0.17397390864789486, |
|
"rewards/accuracy_reward": 0.29129463993012905, |
|
"rewards/cosine_scaled_reward": 0.2911826092749834, |
|
"rewards/format_reward": 1.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.521206378936768, |
|
"epoch": 0.6567164179104478, |
|
"grad_norm": 3.331784248352051, |
|
"learning_rate": 8.500799488578119e-07, |
|
"loss": 0.0018, |
|
"num_tokens": 34886118.0, |
|
"reward": 1.517743095755577, |
|
"reward_std": 0.10318425773594697, |
|
"rewards/accuracy_reward": 0.25892857275903225, |
|
"rewards/cosine_scaled_reward": 0.2588145062327385, |
|
"rewards/format_reward": 1.0, |
|
"step": 88 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.195313453674316, |
|
"epoch": 0.664179104477612, |
|
"grad_norm": 2.983870506286621, |
|
"learning_rate": 8.453966990470656e-07, |
|
"loss": 0.0017, |
|
"num_tokens": 35035173.0, |
|
"reward": 1.4329265505075455, |
|
"reward_std": 0.1654082857307344, |
|
"rewards/accuracy_reward": 0.21651785587891936, |
|
"rewards/cosine_scaled_reward": 0.21640866296365857, |
|
"rewards/format_reward": 1.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.505581378936768, |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 2.7624764442443848, |
|
"learning_rate": 8.406547574006324e-07, |
|
"loss": 0.0028, |
|
"num_tokens": 35170154.0, |
|
"reward": 1.6784569025039673, |
|
"reward_std": 0.11971815738125713, |
|
"rewards/accuracy_reward": 0.33928571827709675, |
|
"rewards/cosine_scaled_reward": 0.339171065017581, |
|
"rewards/format_reward": 1.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.358259677886963, |
|
"epoch": 0.6791044776119403, |
|
"grad_norm": 3.5564069747924805, |
|
"learning_rate": 8.358549296970875e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 35322683.0, |
|
"reward": 1.4887285381555557, |
|
"reward_std": 0.10889045795626373, |
|
"rewards/accuracy_reward": 0.24441964458674192, |
|
"rewards/cosine_scaled_reward": 0.24430878367275, |
|
"rewards/format_reward": 1.0, |
|
"step": 91 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.440849542617798, |
|
"epoch": 0.6865671641791045, |
|
"grad_norm": 3.2373881340026855, |
|
"learning_rate": 8.309980315513442e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 35471790.0, |
|
"reward": 1.7141735553741455, |
|
"reward_std": 0.15256303502246737, |
|
"rewards/accuracy_reward": 0.3571428544819355, |
|
"rewards/cosine_scaled_reward": 0.3570306524634361, |
|
"rewards/format_reward": 1.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.72433114051819, |
|
"epoch": 0.6940298507462687, |
|
"grad_norm": 3.330997943878174, |
|
"learning_rate": 8.260848882760615e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 35625503.0, |
|
"reward": 1.480910375714302, |
|
"reward_std": 0.10137590842316513, |
|
"rewards/accuracy_reward": 0.2410714291036129, |
|
"rewards/cosine_scaled_reward": 0.24095493368804455, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 93 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.22991156578064, |
|
"epoch": 0.7014925373134329, |
|
"grad_norm": 2.4767699241638184, |
|
"learning_rate": 8.211163347414003e-07, |
|
"loss": -0.0, |
|
"num_tokens": 35774893.0, |
|
"reward": 1.667300522327423, |
|
"reward_std": 0.13715945463627577, |
|
"rewards/accuracy_reward": 0.3337053582072258, |
|
"rewards/cosine_scaled_reward": 0.3335950942710042, |
|
"rewards/format_reward": 1.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.570313692092896, |
|
"epoch": 0.7089552238805971, |
|
"grad_norm": 2.710425615310669, |
|
"learning_rate": 8.160932152331586e-07, |
|
"loss": 0.001, |
|
"num_tokens": 35919596.0, |
|
"reward": 1.5222073197364807, |
|
"reward_std": 0.11986963993861366, |
|
"rewards/accuracy_reward": 0.2611607192084193, |
|
"rewards/cosine_scaled_reward": 0.2610465129837394, |
|
"rewards/format_reward": 1.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.17745590209961, |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 2.4527230262756348, |
|
"learning_rate": 8.110163833093049e-07, |
|
"loss": -0.0014, |
|
"num_tokens": 36073515.0, |
|
"reward": 1.4708732217550278, |
|
"reward_std": 0.1094957971945405, |
|
"rewards/accuracy_reward": 0.23549107182770967, |
|
"rewards/cosine_scaled_reward": 0.2353821201249957, |
|
"rewards/format_reward": 1.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.28459882736206, |
|
"epoch": 0.7238805970149254, |
|
"grad_norm": 3.3163673877716064, |
|
"learning_rate": 8.058867016549371e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 36224698.0, |
|
"reward": 1.5244434028863907, |
|
"reward_std": 0.12670162599533796, |
|
"rewards/accuracy_reward": 0.26227678544819355, |
|
"rewards/cosine_scaled_reward": 0.2621665708720684, |
|
"rewards/format_reward": 1.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.32366132736206, |
|
"epoch": 0.7313432835820896, |
|
"grad_norm": 2.859325408935547, |
|
"learning_rate": 8.007050419356898e-07, |
|
"loss": -0.0038, |
|
"num_tokens": 36379460.0, |
|
"reward": 1.5936386585235596, |
|
"reward_std": 0.11910764441277877, |
|
"rewards/accuracy_reward": 0.2968750074505806, |
|
"rewards/cosine_scaled_reward": 0.2967636212706566, |
|
"rewards/format_reward": 1.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.453126192092896, |
|
"epoch": 0.7388059701492538, |
|
"grad_norm": 2.248077154159546, |
|
"learning_rate": 7.954722846496149e-07, |
|
"loss": 0.003, |
|
"num_tokens": 36535562.0, |
|
"reward": 1.5244402140378952, |
|
"reward_std": 0.08259574370241296, |
|
"rewards/accuracy_reward": 0.2622767873108387, |
|
"rewards/cosine_scaled_reward": 0.2621633689850569, |
|
"rewards/format_reward": 1.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.748884916305542, |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 2.1548383235931396, |
|
"learning_rate": 7.901893189775639e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 36690505.0, |
|
"reward": 1.4797910004854202, |
|
"reward_std": 0.08582926816011138, |
|
"rewards/accuracy_reward": 0.23995536100119352, |
|
"rewards/cosine_scaled_reward": 0.23983560875058174, |
|
"rewards/format_reward": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.24776864051819, |
|
"epoch": 0.753731343283582, |
|
"grad_norm": 2.7886292934417725, |
|
"learning_rate": 7.848570426320916e-07, |
|
"loss": 0.0015, |
|
"num_tokens": 36832751.0, |
|
"reward": 1.6460942327976227, |
|
"reward_std": 0.10873846150510502, |
|
"rewards/accuracy_reward": 0.32477678917348385, |
|
"rewards/cosine_scaled_reward": 0.3246655622497201, |
|
"rewards/format_reward": 0.9966517835855484, |
|
"step": 101 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.510045528411865, |
|
"epoch": 0.7611940298507462, |
|
"grad_norm": 2.756274700164795, |
|
"learning_rate": 7.794763617049123e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 36983200.0, |
|
"reward": 1.7454221993684769, |
|
"reward_std": 0.10062572493555422, |
|
"rewards/accuracy_reward": 0.3727678582072258, |
|
"rewards/cosine_scaled_reward": 0.372654240578413, |
|
"rewards/format_reward": 1.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.693081378936768, |
|
"epoch": 0.7686567164179104, |
|
"grad_norm": 1.6583493947982788, |
|
"learning_rate": 7.740481905129306e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 37135357.0, |
|
"reward": 1.5065791308879852, |
|
"reward_std": 0.045692659896090504, |
|
"rewards/accuracy_reward": 0.2533482136204839, |
|
"rewards/cosine_scaled_reward": 0.2532308688387275, |
|
"rewards/format_reward": 1.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.45424199104309, |
|
"epoch": 0.7761194029850746, |
|
"grad_norm": 3.4718310832977295, |
|
"learning_rate": 7.685734514428766e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 37286476.0, |
|
"reward": 1.4396189004182816, |
|
"reward_std": 0.07011978597014945, |
|
"rewards/accuracy_reward": 0.21986607369035482, |
|
"rewards/cosine_scaled_reward": 0.2197527764365077, |
|
"rewards/format_reward": 1.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.142858266830444, |
|
"epoch": 0.7835820895522388, |
|
"grad_norm": 3.465782642364502, |
|
"learning_rate": 7.630530747945672e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 37435684.0, |
|
"reward": 1.4943107217550278, |
|
"reward_std": 0.08559985571561413, |
|
"rewards/accuracy_reward": 0.2477678619325161, |
|
"rewards/cosine_scaled_reward": 0.2476588897407055, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.049107789993286, |
|
"epoch": 0.7910447761194029, |
|
"grad_norm": 2.42425274848938, |
|
"learning_rate": 7.574879986228244e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 37582544.0, |
|
"reward": 1.5132858008146286, |
|
"reward_std": 0.08311572534432088, |
|
"rewards/accuracy_reward": 0.2566964318975806, |
|
"rewards/cosine_scaled_reward": 0.25658932141959667, |
|
"rewards/format_reward": 1.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.716518878936768, |
|
"epoch": 0.7985074626865671, |
|
"grad_norm": 2.004718065261841, |
|
"learning_rate": 7.518791685780768e-07, |
|
"loss": 0.0029, |
|
"num_tokens": 37739602.0, |
|
"reward": 1.6248817294836044, |
|
"reward_std": 0.07741166379830844, |
|
"rewards/accuracy_reward": 0.3125, |
|
"rewards/cosine_scaled_reward": 0.3123816456645727, |
|
"rewards/format_reward": 1.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.168527364730835, |
|
"epoch": 0.8059701492537313, |
|
"grad_norm": 3.211866855621338, |
|
"learning_rate": 7.462275377456669e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 37891401.0, |
|
"reward": 1.5891772359609604, |
|
"reward_std": 0.09964832732346451, |
|
"rewards/accuracy_reward": 0.2946428610011935, |
|
"rewards/cosine_scaled_reward": 0.2945342995226383, |
|
"rewards/format_reward": 1.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.299107789993286, |
|
"epoch": 0.8134328358208955, |
|
"grad_norm": 2.558931589126587, |
|
"learning_rate": 7.405340664838993e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 38042885.0, |
|
"reward": 1.4619427621364594, |
|
"reward_std": 0.11446394885876998, |
|
"rewards/accuracy_reward": 0.23102678544819355, |
|
"rewards/cosine_scaled_reward": 0.23091593850404024, |
|
"rewards/format_reward": 1.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.55022406578064, |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 2.110745906829834, |
|
"learning_rate": 7.347997222608492e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 38197242.0, |
|
"reward": 1.5735462754964828, |
|
"reward_std": 0.0649347297767946, |
|
"rewards/accuracy_reward": 0.2868303544819355, |
|
"rewards/cosine_scaled_reward": 0.2867158204317093, |
|
"rewards/format_reward": 1.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.551340103149414, |
|
"epoch": 0.8283582089552238, |
|
"grad_norm": 1.8552799224853516, |
|
"learning_rate": 7.290254794899664e-07, |
|
"loss": 0.002, |
|
"num_tokens": 38340640.0, |
|
"reward": 1.5255557298660278, |
|
"reward_std": 0.06177972303260404, |
|
"rewards/accuracy_reward": 0.26339286006987095, |
|
"rewards/cosine_scaled_reward": 0.26327887177467346, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 111 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.752233266830444, |
|
"epoch": 0.835820895522388, |
|
"grad_norm": 2.4142839908599854, |
|
"learning_rate": 7.232123193644956e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 38500346.0, |
|
"reward": 1.51997210085392, |
|
"reward_std": 0.07079227790242726, |
|
"rewards/accuracy_reward": 0.2600446445867419, |
|
"rewards/cosine_scaled_reward": 0.25992743112146854, |
|
"rewards/format_reward": 1.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.882813215255737, |
|
"epoch": 0.8432835820895522, |
|
"grad_norm": 2.0007951259613037, |
|
"learning_rate": 7.173612296907472e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 38658729.0, |
|
"reward": 1.6516644805669785, |
|
"reward_std": 0.10580981522798538, |
|
"rewards/accuracy_reward": 0.3258928544819355, |
|
"rewards/cosine_scaled_reward": 0.3257715832442045, |
|
"rewards/format_reward": 1.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.75558114051819, |
|
"epoch": 0.8507462686567164, |
|
"grad_norm": 2.6902952194213867, |
|
"learning_rate": 7.114732047202432e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 38805462.0, |
|
"reward": 1.6114880591630936, |
|
"reward_std": 0.09198851990785784, |
|
"rewards/accuracy_reward": 0.3058035708963871, |
|
"rewards/cosine_scaled_reward": 0.3056844547390938, |
|
"rewards/format_reward": 1.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.13616156578064, |
|
"epoch": 0.8582089552238806, |
|
"grad_norm": 1.9690958261489868, |
|
"learning_rate": 7.055492449807683e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 38959272.0, |
|
"reward": 1.5489989072084427, |
|
"reward_std": 0.08409377404399265, |
|
"rewards/accuracy_reward": 0.27455357648432255, |
|
"rewards/cosine_scaled_reward": 0.27444526366889477, |
|
"rewards/format_reward": 1.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.392857789993286, |
|
"epoch": 0.8656716417910447, |
|
"grad_norm": 1.6137839555740356, |
|
"learning_rate": 6.99590357106354e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 39103456.0, |
|
"reward": 1.5356025993824005, |
|
"reward_std": 0.06981676115469782, |
|
"rewards/accuracy_reward": 0.2678571445867419, |
|
"rewards/cosine_scaled_reward": 0.2677453998476267, |
|
"rewards/format_reward": 1.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.395090103149414, |
|
"epoch": 0.8731343283582089, |
|
"grad_norm": 3.123340129852295, |
|
"learning_rate": 6.935975536662253e-07, |
|
"loss": 0.0011, |
|
"num_tokens": 39248058.0, |
|
"reward": 1.589174211025238, |
|
"reward_std": 0.12407711929557763, |
|
"rewards/accuracy_reward": 0.29464285634458065, |
|
"rewards/cosine_scaled_reward": 0.29453128995373845, |
|
"rewards/format_reward": 1.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.720983028411865, |
|
"epoch": 0.8805970149253731, |
|
"grad_norm": 2.0955867767333984, |
|
"learning_rate": 6.875718529927404e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 39410136.0, |
|
"reward": 1.5802399963140488, |
|
"reward_std": 0.0863498275235628, |
|
"rewards/accuracy_reward": 0.2901785708963871, |
|
"rewards/cosine_scaled_reward": 0.2900614067912102, |
|
"rewards/format_reward": 1.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.71651864051819, |
|
"epoch": 0.8880597014925373, |
|
"grad_norm": 3.44924259185791, |
|
"learning_rate": 6.815142790083473e-07, |
|
"loss": 0.0025, |
|
"num_tokens": 39569234.0, |
|
"reward": 1.7007768154144287, |
|
"reward_std": 0.14646728224154515, |
|
"rewards/accuracy_reward": 0.3504464291036129, |
|
"rewards/cosine_scaled_reward": 0.35033031180500984, |
|
"rewards/format_reward": 1.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.483259916305542, |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 2.0056583881378174, |
|
"learning_rate": 6.754258610515948e-07, |
|
"loss": 0.0015, |
|
"num_tokens": 39727235.0, |
|
"reward": 1.4820292592048645, |
|
"reward_std": 0.07951601898218996, |
|
"rewards/accuracy_reward": 0.24107143096625805, |
|
"rewards/cosine_scaled_reward": 0.24095771089196205, |
|
"rewards/format_reward": 1.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.08147430419922, |
|
"epoch": 0.9029850746268657, |
|
"grad_norm": 2.79129958152771, |
|
"learning_rate": 6.69307633702221e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 39874236.0, |
|
"reward": 1.5199817568063736, |
|
"reward_std": 0.08146786268110873, |
|
"rewards/accuracy_reward": 0.2600446455180645, |
|
"rewards/cosine_scaled_reward": 0.2599370051175356, |
|
"rewards/format_reward": 1.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.085938453674316, |
|
"epoch": 0.9104477611940298, |
|
"grad_norm": 3.3468284606933594, |
|
"learning_rate": 6.631606366053506e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 40015729.0, |
|
"reward": 1.7141781598329544, |
|
"reward_std": 0.10137949584012773, |
|
"rewards/accuracy_reward": 0.3571428582072258, |
|
"rewards/cosine_scaled_reward": 0.35703518986701965, |
|
"rewards/format_reward": 1.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.622768878936768, |
|
"epoch": 0.917910447761194, |
|
"grad_norm": 2.6946330070495605, |
|
"learning_rate": 6.569859142948327e-07, |
|
"loss": 0.0021, |
|
"num_tokens": 40163039.0, |
|
"reward": 1.5467600226402283, |
|
"reward_std": 0.11038771457970142, |
|
"rewards/accuracy_reward": 0.27343749813735485, |
|
"rewards/cosine_scaled_reward": 0.2733224518597126, |
|
"rewards/format_reward": 1.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.418527841567993, |
|
"epoch": 0.9253731343283582, |
|
"grad_norm": 2.7771787643432617, |
|
"learning_rate": 6.507845160157475e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 40317998.0, |
|
"reward": 1.6159598380327225, |
|
"reward_std": 0.07643294239515797, |
|
"rewards/accuracy_reward": 0.30803571827709675, |
|
"rewards/cosine_scaled_reward": 0.30792406760156155, |
|
"rewards/format_reward": 1.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.400670289993286, |
|
"epoch": 0.9328358208955224, |
|
"grad_norm": 1.899020791053772, |
|
"learning_rate": 6.445574955461133e-07, |
|
"loss": -0.0025, |
|
"num_tokens": 40465605.0, |
|
"reward": 1.4998881071805954, |
|
"reward_std": 0.10077201342210174, |
|
"rewards/accuracy_reward": 0.2500000037252903, |
|
"rewards/cosine_scaled_reward": 0.2498880298808217, |
|
"rewards/format_reward": 1.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.99330425262451, |
|
"epoch": 0.9402985074626866, |
|
"grad_norm": 1.1476165056228638, |
|
"learning_rate": 6.383059110178203e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 40617031.0, |
|
"reward": 1.6114817261695862, |
|
"reward_std": 0.0364510658172037, |
|
"rewards/accuracy_reward": 0.30580357275903225, |
|
"rewards/cosine_scaled_reward": 0.3056781152263284, |
|
"rewards/format_reward": 1.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.44642949104309, |
|
"epoch": 0.9477611940298507, |
|
"grad_norm": 2.6259467601776123, |
|
"learning_rate": 6.320308247368284e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 40772791.0, |
|
"reward": 1.5289026349782944, |
|
"reward_std": 0.13789613312110305, |
|
"rewards/accuracy_reward": 0.2645089328289032, |
|
"rewards/cosine_scaled_reward": 0.2643936015665531, |
|
"rewards/format_reward": 1.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.164063453674316, |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 1.8769416809082031, |
|
"learning_rate": 6.257333030026538e-07, |
|
"loss": -0.0007, |
|
"num_tokens": 40921778.0, |
|
"reward": 1.5445343106985092, |
|
"reward_std": 0.07823521745721607, |
|
"rewards/accuracy_reward": 0.2723214318975806, |
|
"rewards/cosine_scaled_reward": 0.2722127726301551, |
|
"rewards/format_reward": 1.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.060268878936768, |
|
"epoch": 0.9626865671641791, |
|
"grad_norm": 3.188025951385498, |
|
"learning_rate": 6.194144159271755e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 41068368.0, |
|
"reward": 1.4440890699625015, |
|
"reward_std": 0.07432721156590105, |
|
"rewards/accuracy_reward": 0.22209821362048388, |
|
"rewards/cosine_scaled_reward": 0.22199082095175982, |
|
"rewards/format_reward": 1.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.361608028411865, |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 1.6484222412109375, |
|
"learning_rate": 6.130752372527981e-07, |
|
"loss": 0.0019, |
|
"num_tokens": 41215084.0, |
|
"reward": 1.4931926876306534, |
|
"reward_std": 0.051030852994102816, |
|
"rewards/accuracy_reward": 0.246651791036129, |
|
"rewards/cosine_scaled_reward": 0.24654078483581543, |
|
"rewards/format_reward": 1.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.842634677886963, |
|
"epoch": 0.9776119402985075, |
|
"grad_norm": 3.736074447631836, |
|
"learning_rate": 6.067168441699927e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 41368055.0, |
|
"reward": 1.6940742880105972, |
|
"reward_std": 0.12490348052233458, |
|
"rewards/accuracy_reward": 0.3470982164144516, |
|
"rewards/cosine_scaled_reward": 0.3469760064035654, |
|
"rewards/format_reward": 1.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.293527603149414, |
|
"epoch": 0.9850746268656716, |
|
"grad_norm": 11.65377140045166, |
|
"learning_rate": 6.003403171342562e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 41521014.0, |
|
"reward": 1.388282224535942, |
|
"reward_std": 0.06252689357782515, |
|
"rewards/accuracy_reward": 0.19419642724096775, |
|
"rewards/cosine_scaled_reward": 0.19408572791144252, |
|
"rewards/format_reward": 1.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.136138439178467, |
|
"epoch": 0.9925373134328358, |
|
"grad_norm": 3.632760763168335, |
|
"learning_rate": 5.939467396825136e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 41671398.0, |
|
"reward": 1.4574763923883438, |
|
"reward_std": 0.08762641241588653, |
|
"rewards/accuracy_reward": 0.2287946417927742, |
|
"rewards/cosine_scaled_reward": 0.22868163883686066, |
|
"rewards/format_reward": 1.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.29799175262451, |
|
"epoch": 1.007462686567164, |
|
"grad_norm": 5.531825542449951, |
|
"learning_rate": 5.875371982489958e-07, |
|
"loss": 0.0008, |
|
"num_tokens": 41808033.0, |
|
"reward": 1.5132822692394257, |
|
"reward_std": 0.07973260305406171, |
|
"rewards/accuracy_reward": 0.2566964328289032, |
|
"rewards/cosine_scaled_reward": 0.2565857656300068, |
|
"rewards/format_reward": 1.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.155134916305542, |
|
"epoch": 1.0149253731343284, |
|
"grad_norm": 4.794013023376465, |
|
"learning_rate": 5.811127819806276e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 41952708.0, |
|
"reward": 1.6561159640550613, |
|
"reward_std": 0.10551196637798199, |
|
"rewards/accuracy_reward": 0.32812500186264515, |
|
"rewards/cosine_scaled_reward": 0.32799087278544903, |
|
"rewards/format_reward": 1.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.325893878936768, |
|
"epoch": 1.0223880597014925, |
|
"grad_norm": 4.288095474243164, |
|
"learning_rate": 5.746745825519538e-07, |
|
"loss": -0.0005, |
|
"num_tokens": 42096168.0, |
|
"reward": 1.6539065688848495, |
|
"reward_std": 0.06463327458860135, |
|
"rewards/accuracy_reward": 0.32700893096625805, |
|
"rewards/cosine_scaled_reward": 0.3268975578248501, |
|
"rewards/format_reward": 1.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.46428680419922, |
|
"epoch": 1.0298507462686568, |
|
"grad_norm": 2.075472593307495, |
|
"learning_rate": 5.682236939796336e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 42245792.0, |
|
"reward": 1.682920902967453, |
|
"reward_std": 0.11724273651551442, |
|
"rewards/accuracy_reward": 0.34151786006987095, |
|
"rewards/cosine_scaled_reward": 0.34140297770500183, |
|
"rewards/format_reward": 1.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.25334930419922, |
|
"epoch": 1.037313432835821, |
|
"grad_norm": 3.787569046020508, |
|
"learning_rate": 5.61761212436541e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 42387403.0, |
|
"reward": 1.5601582527160645, |
|
"reward_std": 0.09724155126728817, |
|
"rewards/accuracy_reward": 0.2801339253783226, |
|
"rewards/cosine_scaled_reward": 0.28002420626580715, |
|
"rewards/format_reward": 1.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.424108266830444, |
|
"epoch": 1.044776119402985, |
|
"grad_norm": 2.1570165157318115, |
|
"learning_rate": 5.552882360654949e-07, |
|
"loss": 0.0008, |
|
"num_tokens": 42536287.0, |
|
"reward": 1.627119928598404, |
|
"reward_std": 0.10236127915219129, |
|
"rewards/accuracy_reward": 0.313616075553, |
|
"rewards/cosine_scaled_reward": 0.3135037589818239, |
|
"rewards/format_reward": 1.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.625000953674316, |
|
"epoch": 1.0522388059701493, |
|
"grad_norm": 3.6223268508911133, |
|
"learning_rate": 5.488058647926577e-07, |
|
"loss": 0.001, |
|
"num_tokens": 42696151.0, |
|
"reward": 1.535599023103714, |
|
"reward_std": 0.09071048016893712, |
|
"rewards/accuracy_reward": 0.2654533013701439, |
|
"rewards/cosine_scaled_reward": 0.27220611833035946, |
|
"rewards/format_reward": 1.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.46428632736206, |
|
"epoch": 1.0597014925373134, |
|
"grad_norm": 3.7596476078033447, |
|
"learning_rate": 5.423152001406282e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 42855279.0, |
|
"reward": 1.6070294231176376, |
|
"reward_std": 0.12858991045504808, |
|
"rewards/accuracy_reward": 0.30357142724096775, |
|
"rewards/cosine_scaled_reward": 0.3034578887745738, |
|
"rewards/format_reward": 1.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.022322177886963, |
|
"epoch": 1.0671641791044777, |
|
"grad_norm": 3.700883150100708, |
|
"learning_rate": 5.358173450412648e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 43008275.0, |
|
"reward": 1.5623932778835297, |
|
"reward_std": 0.1167864422913425, |
|
"rewards/accuracy_reward": 0.2812500009313226, |
|
"rewards/cosine_scaled_reward": 0.28114316891878843, |
|
"rewards/format_reward": 1.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.39955425262451, |
|
"epoch": 1.0746268656716418, |
|
"grad_norm": 4.979684829711914, |
|
"learning_rate": 5.293134036482698e-07, |
|
"loss": 0.0, |
|
"num_tokens": 43159225.0, |
|
"reward": 1.5132808834314346, |
|
"reward_std": 0.09190170587856983, |
|
"rewards/accuracy_reward": 0.2566964328289032, |
|
"rewards/cosine_scaled_reward": 0.25658440589904785, |
|
"rewards/format_reward": 1.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.123884677886963, |
|
"epoch": 1.0820895522388059, |
|
"grad_norm": 2.860994338989258, |
|
"learning_rate": 5.228044811495631e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 43309240.0, |
|
"reward": 1.582481175661087, |
|
"reward_std": 0.0821403276665933, |
|
"rewards/accuracy_reward": 0.2912946417927742, |
|
"rewards/cosine_scaled_reward": 0.29118647053837776, |
|
"rewards/format_reward": 1.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.095982789993286, |
|
"epoch": 1.0895522388059702, |
|
"grad_norm": 3.0046682357788086, |
|
"learning_rate": 5.162916835794843e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 43458758.0, |
|
"reward": 1.5556955337524414, |
|
"reward_std": 0.07936285677858734, |
|
"rewards/accuracy_reward": 0.2779017873108387, |
|
"rewards/cosine_scaled_reward": 0.27779373340308666, |
|
"rewards/format_reward": 1.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.234375476837158, |
|
"epoch": 1.0970149253731343, |
|
"grad_norm": 4.488862991333008, |
|
"learning_rate": 5.09776117630847e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 43608960.0, |
|
"reward": 1.6360509097576141, |
|
"reward_std": 0.07161617746324822, |
|
"rewards/accuracy_reward": 0.3180803544819355, |
|
"rewards/cosine_scaled_reward": 0.31797049194574356, |
|
"rewards/format_reward": 1.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.46651864051819, |
|
"epoch": 1.1044776119402986, |
|
"grad_norm": 1.4745782613754272, |
|
"learning_rate": 5.032588904668851e-07, |
|
"loss": -0.0036, |
|
"num_tokens": 43754050.0, |
|
"reward": 1.7454189360141754, |
|
"reward_std": 0.05425459118813336, |
|
"rewards/accuracy_reward": 0.37276786379516125, |
|
"rewards/cosine_scaled_reward": 0.37265095487236977, |
|
"rewards/format_reward": 1.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.15736722946167, |
|
"epoch": 1.1119402985074627, |
|
"grad_norm": 4.071691513061523, |
|
"learning_rate": 4.967411095331149e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 43899775.0, |
|
"reward": 1.6427484452724457, |
|
"reward_std": 0.09672015977940873, |
|
"rewards/accuracy_reward": 0.32142856903374195, |
|
"rewards/cosine_scaled_reward": 0.32131983526051044, |
|
"rewards/format_reward": 1.0, |
|
"step": 148 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.150670528411865, |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 5.170862197875977, |
|
"learning_rate": 4.90223882369153e-07, |
|
"loss": 0.001, |
|
"num_tokens": 44050078.0, |
|
"reward": 1.629356011748314, |
|
"reward_std": 0.12038855330866483, |
|
"rewards/accuracy_reward": 0.3147321455180645, |
|
"rewards/cosine_scaled_reward": 0.3146238140761852, |
|
"rewards/format_reward": 1.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.264509439468384, |
|
"epoch": 1.126865671641791, |
|
"grad_norm": 2.8727357387542725, |
|
"learning_rate": 4.837083164205159e-07, |
|
"loss": 0.0013, |
|
"num_tokens": 44198707.0, |
|
"reward": 1.5356042981147766, |
|
"reward_std": 0.13752735047977538, |
|
"rewards/accuracy_reward": 0.2678571455180645, |
|
"rewards/cosine_scaled_reward": 0.26774709299206734, |
|
"rewards/format_reward": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.49330425262451, |
|
"epoch": 1.1343283582089552, |
|
"grad_norm": 2.9410152435302734, |
|
"learning_rate": 4.77195518850437e-07, |
|
"loss": -0.0017, |
|
"num_tokens": 44350965.0, |
|
"reward": 1.5467612594366074, |
|
"reward_std": 0.08439550402343343, |
|
"rewards/accuracy_reward": 0.27343750186264515, |
|
"rewards/cosine_scaled_reward": 0.2733236690983176, |
|
"rewards/format_reward": 1.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.28459882736206, |
|
"epoch": 1.1417910447761195, |
|
"grad_norm": 5.266554832458496, |
|
"learning_rate": 4.7068659635173025e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 44496740.0, |
|
"reward": 1.571318507194519, |
|
"reward_std": 0.09055654217311826, |
|
"rewards/accuracy_reward": 0.2857142873108387, |
|
"rewards/cosine_scaled_reward": 0.2856041230261326, |
|
"rewards/format_reward": 1.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.398438453674316, |
|
"epoch": 1.1492537313432836, |
|
"grad_norm": 2.222485303878784, |
|
"learning_rate": 4.6418265495873516e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 44656433.0, |
|
"reward": 1.667298749089241, |
|
"reward_std": 0.10092825663519989, |
|
"rewards/accuracy_reward": 0.3337053582072258, |
|
"rewards/cosine_scaled_reward": 0.3335932996124029, |
|
"rewards/format_reward": 1.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.23995590209961, |
|
"epoch": 1.1567164179104479, |
|
"grad_norm": 2.372213840484619, |
|
"learning_rate": 4.5768479985937194e-07, |
|
"loss": 0.002, |
|
"num_tokens": 44807600.0, |
|
"reward": 1.624890297651291, |
|
"reward_std": 0.13548944082560865, |
|
"rewards/accuracy_reward": 0.31249999813735485, |
|
"rewards/cosine_scaled_reward": 0.31239020079374313, |
|
"rewards/format_reward": 1.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.25558114051819, |
|
"epoch": 1.164179104477612, |
|
"grad_norm": 1.6955794095993042, |
|
"learning_rate": 4.511941352073424e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 44957893.0, |
|
"reward": 1.5780149698257446, |
|
"reward_std": 0.07597658339989977, |
|
"rewards/accuracy_reward": 0.28906250558793545, |
|
"rewards/cosine_scaled_reward": 0.2889523971825838, |
|
"rewards/format_reward": 1.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.380581378936768, |
|
"epoch": 1.171641791044776, |
|
"grad_norm": 2.024655342102051, |
|
"learning_rate": 4.4471176393450515e-07, |
|
"loss": 0.001, |
|
"num_tokens": 45113066.0, |
|
"reward": 1.4954225569963455, |
|
"reward_std": 0.09183560762491538, |
|
"rewards/accuracy_reward": 0.2477678582072258, |
|
"rewards/cosine_scaled_reward": 0.24765462800860405, |
|
"rewards/format_reward": 1.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 19.982143878936768, |
|
"epoch": 1.1791044776119404, |
|
"grad_norm": 3.3944594860076904, |
|
"learning_rate": 4.382387875634591e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 45261530.0, |
|
"reward": 1.6472150832414627, |
|
"reward_std": 0.10724194766953588, |
|
"rewards/accuracy_reward": 0.32366071455180645, |
|
"rewards/cosine_scaled_reward": 0.32355429045856, |
|
"rewards/format_reward": 1.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.106027603149414, |
|
"epoch": 1.1865671641791045, |
|
"grad_norm": 1.5790033340454102, |
|
"learning_rate": 4.317763060203664e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 45416145.0, |
|
"reward": 1.4440883994102478, |
|
"reward_std": 0.06350326852842159, |
|
"rewards/accuracy_reward": 0.22209821734577417, |
|
"rewards/cosine_scaled_reward": 0.2219901392236352, |
|
"rewards/format_reward": 1.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.33705449104309, |
|
"epoch": 1.1940298507462686, |
|
"grad_norm": 1.6651524305343628, |
|
"learning_rate": 4.253254174480462e-07, |
|
"loss": -0.0, |
|
"num_tokens": 45560303.0, |
|
"reward": 1.5333705618977547, |
|
"reward_std": 0.040959979950784486, |
|
"rewards/accuracy_reward": 0.266741075553, |
|
"rewards/cosine_scaled_reward": 0.2666294459568235, |
|
"rewards/format_reward": 1.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.408483028411865, |
|
"epoch": 1.2014925373134329, |
|
"grad_norm": 3.0133707523345947, |
|
"learning_rate": 4.1888721801937226e-07, |
|
"loss": 0.001, |
|
"num_tokens": 45701981.0, |
|
"reward": 1.5891735255718231, |
|
"reward_std": 0.08890740286335586, |
|
"rewards/accuracy_reward": 0.2946428610011935, |
|
"rewards/cosine_scaled_reward": 0.2945305937901139, |
|
"rewards/format_reward": 1.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.158483266830444, |
|
"epoch": 1.208955223880597, |
|
"grad_norm": 2.2523109912872314, |
|
"learning_rate": 4.124628017510042e-07, |
|
"loss": -0.0028, |
|
"num_tokens": 45851651.0, |
|
"reward": 1.5891766995191574, |
|
"reward_std": 0.08664929727092385, |
|
"rewards/accuracy_reward": 0.2946428647264838, |
|
"rewards/cosine_scaled_reward": 0.2945337858982384, |
|
"rewards/format_reward": 1.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.29017925262451, |
|
"epoch": 1.2164179104477613, |
|
"grad_norm": 1.8585433959960938, |
|
"learning_rate": 4.0605326031748646e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 45994527.0, |
|
"reward": 1.5445328205823898, |
|
"reward_std": 0.05591056302149866, |
|
"rewards/accuracy_reward": 0.27232143096625805, |
|
"rewards/cosine_scaled_reward": 0.27221135422587395, |
|
"rewards/format_reward": 1.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.445313453674316, |
|
"epoch": 1.2238805970149254, |
|
"grad_norm": 5.333817481994629, |
|
"learning_rate": 3.9965968286574367e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 46140630.0, |
|
"reward": 1.4463159441947937, |
|
"reward_std": 0.08372260938289244, |
|
"rewards/accuracy_reward": 0.22321428637951612, |
|
"rewards/cosine_scaled_reward": 0.2231016056612134, |
|
"rewards/format_reward": 1.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.148438215255737, |
|
"epoch": 1.2313432835820897, |
|
"grad_norm": 2.544990301132202, |
|
"learning_rate": 3.9328315583000737e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 46290443.0, |
|
"reward": 1.636052206158638, |
|
"reward_std": 0.09034244809299707, |
|
"rewards/accuracy_reward": 0.3180803582072258, |
|
"rewards/cosine_scaled_reward": 0.31797176599502563, |
|
"rewards/format_reward": 1.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.42299175262451, |
|
"epoch": 1.2388059701492538, |
|
"grad_norm": 4.120884895324707, |
|
"learning_rate": 3.869247627472021e-07, |
|
"loss": 0.0018, |
|
"num_tokens": 46439446.0, |
|
"reward": 1.653905838727951, |
|
"reward_std": 0.10355404989928729, |
|
"rewards/accuracy_reward": 0.3270089291036129, |
|
"rewards/cosine_scaled_reward": 0.32689686864614487, |
|
"rewards/format_reward": 1.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.90736722946167, |
|
"epoch": 1.2462686567164178, |
|
"grad_norm": 6.220430374145508, |
|
"learning_rate": 3.805855840728246e-07, |
|
"loss": -0.0009, |
|
"num_tokens": 46583603.0, |
|
"reward": 1.5690757930278778, |
|
"reward_std": 0.1022080342995082, |
|
"rewards/accuracy_reward": 0.2845982126891613, |
|
"rewards/cosine_scaled_reward": 0.2844774592667818, |
|
"rewards/format_reward": 1.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.39955425262451, |
|
"epoch": 1.2537313432835822, |
|
"grad_norm": 3.827559232711792, |
|
"learning_rate": 3.7426669699734626e-07, |
|
"loss": 0.0, |
|
"num_tokens": 46729329.0, |
|
"reward": 1.5199765115976334, |
|
"reward_std": 0.07372096207812717, |
|
"rewards/accuracy_reward": 0.26004464365541935, |
|
"rewards/cosine_scaled_reward": 0.2599317729473114, |
|
"rewards/format_reward": 1.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.420759439468384, |
|
"epoch": 1.2611940298507462, |
|
"grad_norm": 2.8399739265441895, |
|
"learning_rate": 3.679691752631715e-07, |
|
"loss": 0.0012, |
|
"num_tokens": 46881738.0, |
|
"reward": 1.6873881220817566, |
|
"reward_std": 0.028184092890906953, |
|
"rewards/accuracy_reward": 0.34375000558793545, |
|
"rewards/cosine_scaled_reward": 0.3436380457133055, |
|
"rewards/format_reward": 1.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.460938453674316, |
|
"epoch": 1.2686567164179103, |
|
"grad_norm": 3.9981281757354736, |
|
"learning_rate": 3.6169408898217966e-07, |
|
"loss": -0.0007, |
|
"num_tokens": 47032783.0, |
|
"reward": 1.5735474079847336, |
|
"reward_std": 0.09409430988146994, |
|
"rewards/accuracy_reward": 0.28683035261929035, |
|
"rewards/cosine_scaled_reward": 0.28671699203550816, |
|
"rewards/format_reward": 1.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.434152603149414, |
|
"epoch": 1.2761194029850746, |
|
"grad_norm": 4.026747226715088, |
|
"learning_rate": 3.554425044538867e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 47186252.0, |
|
"reward": 1.6293520778417587, |
|
"reward_std": 0.06463150960456687, |
|
"rewards/accuracy_reward": 0.31473214738070965, |
|
"rewards/cosine_scaled_reward": 0.31461989507079124, |
|
"rewards/format_reward": 1.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.639509677886963, |
|
"epoch": 1.2835820895522387, |
|
"grad_norm": 2.835671901702881, |
|
"learning_rate": 3.492154839842524e-07, |
|
"loss": -0.001, |
|
"num_tokens": 47337545.0, |
|
"reward": 1.6829200685024261, |
|
"reward_std": 0.11416030763536611, |
|
"rewards/accuracy_reward": 0.34151785261929035, |
|
"rewards/cosine_scaled_reward": 0.3414021451026201, |
|
"rewards/format_reward": 1.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.23549175262451, |
|
"epoch": 1.291044776119403, |
|
"grad_norm": 2.3982043266296387, |
|
"learning_rate": 3.430140857051674e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 47485516.0, |
|
"reward": 1.7543545216321945, |
|
"reward_std": 0.08049214289215456, |
|
"rewards/accuracy_reward": 0.3772321417927742, |
|
"rewards/cosine_scaled_reward": 0.37712232768535614, |
|
"rewards/format_reward": 1.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.290179014205933, |
|
"epoch": 1.2985074626865671, |
|
"grad_norm": 2.3194615840911865, |
|
"learning_rate": 3.3683936339464955e-07, |
|
"loss": -0.0009, |
|
"num_tokens": 47626200.0, |
|
"reward": 1.5512276887893677, |
|
"reward_std": 0.05493424205126729, |
|
"rewards/accuracy_reward": 0.2756696483120322, |
|
"rewards/cosine_scaled_reward": 0.2755579724907875, |
|
"rewards/format_reward": 1.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.627233028411865, |
|
"epoch": 1.3059701492537314, |
|
"grad_norm": 1.7407325506210327, |
|
"learning_rate": 3.3069236629777884e-07, |
|
"loss": 0.0019, |
|
"num_tokens": 47773482.0, |
|
"reward": 1.7253312766551971, |
|
"reward_std": 0.04794870165083864, |
|
"rewards/accuracy_reward": 0.3627232201397419, |
|
"rewards/cosine_scaled_reward": 0.36260795034468174, |
|
"rewards/format_reward": 1.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.26339364051819, |
|
"epoch": 1.3134328358208955, |
|
"grad_norm": 1.944441556930542, |
|
"learning_rate": 3.2457413894840514e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 47925630.0, |
|
"reward": 1.542300522327423, |
|
"reward_std": 0.04922672476845946, |
|
"rewards/accuracy_reward": 0.2712053554132581, |
|
"rewards/cosine_scaled_reward": 0.2710951156914234, |
|
"rewards/format_reward": 1.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.04799175262451, |
|
"epoch": 1.3208955223880596, |
|
"grad_norm": 2.3487837314605713, |
|
"learning_rate": 3.184857209916528e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 48071681.0, |
|
"reward": 1.5333750247955322, |
|
"reward_std": 0.08439638444930608, |
|
"rewards/accuracy_reward": 0.26674107275903225, |
|
"rewards/cosine_scaled_reward": 0.2666339073330164, |
|
"rewards/format_reward": 1.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.16294765472412, |
|
"epoch": 1.328358208955224, |
|
"grad_norm": 2.5152955055236816, |
|
"learning_rate": 3.124281470072597e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 48219603.0, |
|
"reward": 1.6070343852043152, |
|
"reward_std": 0.07860209648621108, |
|
"rewards/accuracy_reward": 0.3035714291036129, |
|
"rewards/cosine_scaled_reward": 0.3034628815948963, |
|
"rewards/format_reward": 1.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.195312976837158, |
|
"epoch": 1.335820895522388, |
|
"grad_norm": 2.6069984436035156, |
|
"learning_rate": 3.064024463337747e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 48382522.0, |
|
"reward": 1.7208726704120636, |
|
"reward_std": 0.12136359186843038, |
|
"rewards/accuracy_reward": 0.3604910708963871, |
|
"rewards/cosine_scaled_reward": 0.3603815697133541, |
|
"rewards/format_reward": 1.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.24330425262451, |
|
"epoch": 1.3432835820895521, |
|
"grad_norm": 3.2656219005584717, |
|
"learning_rate": 3.004096428936461e-07, |
|
"loss": -0.0005, |
|
"num_tokens": 48544316.0, |
|
"reward": 1.7164078652858734, |
|
"reward_std": 0.12633045494445128, |
|
"rewards/accuracy_reward": 0.3582589328289032, |
|
"rewards/cosine_scaled_reward": 0.3581488821655512, |
|
"rewards/format_reward": 1.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.943081378936768, |
|
"epoch": 1.3507462686567164, |
|
"grad_norm": 1.9599158763885498, |
|
"learning_rate": 2.9445075501923176e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 48698129.0, |
|
"reward": 1.6896116733551025, |
|
"reward_std": 0.055237259725728904, |
|
"rewards/accuracy_reward": 0.3404017873108387, |
|
"rewards/cosine_scaled_reward": 0.3492097966372967, |
|
"rewards/format_reward": 1.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.816965103149414, |
|
"epoch": 1.3582089552238805, |
|
"grad_norm": 2.1912848949432373, |
|
"learning_rate": 2.8852679527975685e-07, |
|
"loss": 0.0008, |
|
"num_tokens": 48846693.0, |
|
"reward": 1.5489892959594727, |
|
"reward_std": 0.07417789786538975, |
|
"rewards/accuracy_reward": 0.2745535736903548, |
|
"rewards/cosine_scaled_reward": 0.27443567011505365, |
|
"rewards/format_reward": 1.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.80022382736206, |
|
"epoch": 1.3656716417910448, |
|
"grad_norm": 2.0120460987091064, |
|
"learning_rate": 2.8263877030925277e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 48993618.0, |
|
"reward": 1.6047926098108292, |
|
"reward_std": 0.08897415082109461, |
|
"rewards/accuracy_reward": 0.3024553554132581, |
|
"rewards/cosine_scaled_reward": 0.30233720503747463, |
|
"rewards/format_reward": 1.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.600447416305542, |
|
"epoch": 1.373134328358209, |
|
"grad_norm": 2.198028087615967, |
|
"learning_rate": 2.767876806355045e-07, |
|
"loss": 0.0011, |
|
"num_tokens": 49145180.0, |
|
"reward": 1.5378303229808807, |
|
"reward_std": 0.10318562714383006, |
|
"rewards/accuracy_reward": 0.2689732126891613, |
|
"rewards/cosine_scaled_reward": 0.2688570562750101, |
|
"rewards/format_reward": 1.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.30580472946167, |
|
"epoch": 1.3805970149253732, |
|
"grad_norm": 3.135545015335083, |
|
"learning_rate": 2.709745205100337e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 49288630.0, |
|
"reward": 1.6762290000915527, |
|
"reward_std": 0.10257845791056752, |
|
"rewards/accuracy_reward": 0.3381696464493871, |
|
"rewards/cosine_scaled_reward": 0.3380592940375209, |
|
"rewards/format_reward": 1.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.150670051574707, |
|
"epoch": 1.3880597014925373, |
|
"grad_norm": 2.3859987258911133, |
|
"learning_rate": 2.652002777391507e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 49430613.0, |
|
"reward": 1.7298022359609604, |
|
"reward_std": 0.09394080052152276, |
|
"rewards/accuracy_reward": 0.36495535634458065, |
|
"rewards/cosine_scaled_reward": 0.36484682094305754, |
|
"rewards/format_reward": 1.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.50892972946167, |
|
"epoch": 1.3955223880597014, |
|
"grad_norm": 2.2975528240203857, |
|
"learning_rate": 2.594659335161008e-07, |
|
"loss": -0.0025, |
|
"num_tokens": 49577965.0, |
|
"reward": 1.555689975619316, |
|
"reward_std": 0.07875558780506253, |
|
"rewards/accuracy_reward": 0.2779017873108387, |
|
"rewards/cosine_scaled_reward": 0.2777881510555744, |
|
"rewards/format_reward": 1.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.405134916305542, |
|
"epoch": 1.4029850746268657, |
|
"grad_norm": 1.598307490348816, |
|
"learning_rate": 2.5377246225433304e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 49731048.0, |
|
"reward": 1.6873885244131088, |
|
"reward_std": 0.061549990693965384, |
|
"rewards/accuracy_reward": 0.34375, |
|
"rewards/cosine_scaled_reward": 0.3436384294182062, |
|
"rewards/format_reward": 1.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.441964864730835, |
|
"epoch": 1.4104477611940298, |
|
"grad_norm": 1.098617434501648, |
|
"learning_rate": 2.4812083142192323e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 49875452.0, |
|
"reward": 1.6940844804048538, |
|
"reward_std": 0.03013577858569505, |
|
"rewards/accuracy_reward": 0.34709821455180645, |
|
"rewards/cosine_scaled_reward": 0.34698620066046715, |
|
"rewards/format_reward": 1.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.527902841567993, |
|
"epoch": 1.417910447761194, |
|
"grad_norm": 2.504321336746216, |
|
"learning_rate": 2.4251200137717543e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 50048789.0, |
|
"reward": 1.6360465586185455, |
|
"reward_std": 0.08695389210011228, |
|
"rewards/accuracy_reward": 0.3180803582072258, |
|
"rewards/cosine_scaled_reward": 0.3179661240428686, |
|
"rewards/format_reward": 1.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.17299175262451, |
|
"epoch": 1.4253731343283582, |
|
"grad_norm": 2.5839359760284424, |
|
"learning_rate": 2.3694692520543292e-07, |
|
"loss": 0.0013, |
|
"num_tokens": 50197200.0, |
|
"reward": 1.6204270422458649, |
|
"reward_std": 0.07372181725033755, |
|
"rewards/accuracy_reward": 0.31026786006987095, |
|
"rewards/cosine_scaled_reward": 0.31015911884605885, |
|
"rewards/format_reward": 1.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.678572416305542, |
|
"epoch": 1.4328358208955223, |
|
"grad_norm": 2.182985544204712, |
|
"learning_rate": 2.314265485571235e-07, |
|
"loss": 0.0091, |
|
"num_tokens": 50350120.0, |
|
"reward": 1.66729336977005, |
|
"reward_std": 0.08215013663391346, |
|
"rewards/accuracy_reward": 0.3337053582072258, |
|
"rewards/cosine_scaled_reward": 0.3335879575461149, |
|
"rewards/format_reward": 1.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.617188692092896, |
|
"epoch": 1.4402985074626866, |
|
"grad_norm": 1.8787038326263428, |
|
"learning_rate": 2.2595180948706926e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 50507097.0, |
|
"reward": 1.6829163581132889, |
|
"reward_std": 0.03772871130308175, |
|
"rewards/accuracy_reward": 0.34151786006987095, |
|
"rewards/cosine_scaled_reward": 0.34139839746057987, |
|
"rewards/format_reward": 1.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.189732551574707, |
|
"epoch": 1.4477611940298507, |
|
"grad_norm": 1.7796034812927246, |
|
"learning_rate": 2.2052363829508776e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 50667171.0, |
|
"reward": 1.7521222680807114, |
|
"reward_std": 0.05102925866069086, |
|
"rewards/accuracy_reward": 0.37611608020961285, |
|
"rewards/cosine_scaled_reward": 0.37600609846413136, |
|
"rewards/format_reward": 1.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.324777603149414, |
|
"epoch": 1.455223880597015, |
|
"grad_norm": 2.2849700450897217, |
|
"learning_rate": 2.1514295736790838e-07, |
|
"loss": 0.0008, |
|
"num_tokens": 50814222.0, |
|
"reward": 1.735380157828331, |
|
"reward_std": 0.09453902203552644, |
|
"rewards/accuracy_reward": 0.36830357275903225, |
|
"rewards/cosine_scaled_reward": 0.3681926131248474, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 194 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.19084906578064, |
|
"epoch": 1.462686567164179, |
|
"grad_norm": 3.1908223628997803, |
|
"learning_rate": 2.0981068102243616e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 50957049.0, |
|
"reward": 1.6472126096487045, |
|
"reward_std": 0.09634861818715024, |
|
"rewards/accuracy_reward": 0.32366071455180645, |
|
"rewards/cosine_scaled_reward": 0.323551825247705, |
|
"rewards/format_reward": 1.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.42299199104309, |
|
"epoch": 1.4701492537313432, |
|
"grad_norm": 1.6977643966674805, |
|
"learning_rate": 2.0452771535038515e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 51099484.0, |
|
"reward": 1.54006627202034, |
|
"reward_std": 0.043066854786879105, |
|
"rewards/accuracy_reward": 0.27008928917348385, |
|
"rewards/cosine_scaled_reward": 0.26997692696750164, |
|
"rewards/format_reward": 1.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.802456378936768, |
|
"epoch": 1.4776119402985075, |
|
"grad_norm": 2.020225763320923, |
|
"learning_rate": 1.9929495806431023e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 51261563.0, |
|
"reward": 1.6829185336828232, |
|
"reward_std": 0.058621840249692525, |
|
"rewards/accuracy_reward": 0.3415178619325161, |
|
"rewards/cosine_scaled_reward": 0.3414005693048239, |
|
"rewards/format_reward": 1.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.07924199104309, |
|
"epoch": 1.4850746268656716, |
|
"grad_norm": 1.6952190399169922, |
|
"learning_rate": 1.9411329834506286e-07, |
|
"loss": 0.0012, |
|
"num_tokens": 51404026.0, |
|
"reward": 1.4038956314325333, |
|
"reward_std": 0.053959765473592824, |
|
"rewards/accuracy_reward": 0.2020089328289032, |
|
"rewards/cosine_scaled_reward": 0.20188664738088846, |
|
"rewards/format_reward": 1.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.199777841567993, |
|
"epoch": 1.4925373134328357, |
|
"grad_norm": 2.445509433746338, |
|
"learning_rate": 1.8898361669069497e-07, |
|
"loss": -0.0013, |
|
"num_tokens": 51564021.0, |
|
"reward": 1.640516072511673, |
|
"reward_std": 0.09235968008678697, |
|
"rewards/accuracy_reward": 0.32031249813735485, |
|
"rewards/cosine_scaled_reward": 0.3202035166323185, |
|
"rewards/format_reward": 1.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.577009916305542, |
|
"epoch": 1.5, |
|
"grad_norm": 1.4149043560028076, |
|
"learning_rate": 1.8390678476684142e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 51712674.0, |
|
"reward": 1.5110464841127396, |
|
"reward_std": 0.038402879612469576, |
|
"rewards/accuracy_reward": 0.2555803582072258, |
|
"rewards/cosine_scaled_reward": 0.2554660662135575, |
|
"rewards/format_reward": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.31361699104309, |
|
"epoch": 1.5074626865671643, |
|
"grad_norm": 1.3769646883010864, |
|
"learning_rate": 1.7888366525859967e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 51862699.0, |
|
"reward": 1.7298002988100052, |
|
"reward_std": 0.03547355129772001, |
|
"rewards/accuracy_reward": 0.3649553656578064, |
|
"rewards/cosine_scaled_reward": 0.3648448847234249, |
|
"rewards/format_reward": 1.0, |
|
"step": 201 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.623884916305542, |
|
"epoch": 1.5149253731343284, |
|
"grad_norm": 3.1723361015319824, |
|
"learning_rate": 1.7391511172393848e-07, |
|
"loss": 0.0012, |
|
"num_tokens": 52014514.0, |
|
"reward": 1.5690792500972748, |
|
"reward_std": 0.09754315220763488, |
|
"rewards/accuracy_reward": 0.28459821082651615, |
|
"rewards/cosine_scaled_reward": 0.2844809675589204, |
|
"rewards/format_reward": 1.0, |
|
"step": 202 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.436384916305542, |
|
"epoch": 1.5223880597014925, |
|
"grad_norm": 1.6968096494674683, |
|
"learning_rate": 1.690019684486557e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 52161273.0, |
|
"reward": 1.7119403928518295, |
|
"reward_std": 0.031414411859074676, |
|
"rewards/accuracy_reward": 0.35602678544819355, |
|
"rewards/cosine_scaled_reward": 0.3559135627001524, |
|
"rewards/format_reward": 1.0, |
|
"step": 203 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.014509677886963, |
|
"epoch": 1.5298507462686568, |
|
"grad_norm": 2.15055513381958, |
|
"learning_rate": 1.6414507030291246e-07, |
|
"loss": 0.0008, |
|
"num_tokens": 52310342.0, |
|
"reward": 1.7543574571609497, |
|
"reward_std": 0.06756016856554936, |
|
"rewards/accuracy_reward": 0.3772321417927742, |
|
"rewards/cosine_scaled_reward": 0.3771252781152725, |
|
"rewards/format_reward": 1.0, |
|
"step": 204 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.49553632736206, |
|
"epoch": 1.537313432835821, |
|
"grad_norm": 2.6926231384277344, |
|
"learning_rate": 1.5934524259936753e-07, |
|
"loss": -0.0012, |
|
"num_tokens": 52466618.0, |
|
"reward": 1.5155121833086014, |
|
"reward_std": 0.1101713702082634, |
|
"rewards/accuracy_reward": 0.2578125009313226, |
|
"rewards/cosine_scaled_reward": 0.25769960321485996, |
|
"rewards/format_reward": 1.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.606027364730835, |
|
"epoch": 1.544776119402985, |
|
"grad_norm": 2.4713823795318604, |
|
"learning_rate": 1.5460330095293443e-07, |
|
"loss": 0.0008, |
|
"num_tokens": 52628361.0, |
|
"reward": 1.767742782831192, |
|
"reward_std": 0.07515440785066829, |
|
"rewards/accuracy_reward": 0.38392857275903225, |
|
"rewards/cosine_scaled_reward": 0.38381412625312805, |
|
"rewards/format_reward": 1.0, |
|
"step": 206 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.731027603149414, |
|
"epoch": 1.5522388059701493, |
|
"grad_norm": 2.261600971221924, |
|
"learning_rate": 1.4992005114218804e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 52775840.0, |
|
"reward": 1.6472041308879852, |
|
"reward_std": 0.07109666805187231, |
|
"rewards/accuracy_reward": 0.32366071827709675, |
|
"rewards/cosine_scaled_reward": 0.32354335859417915, |
|
"rewards/format_reward": 1.0, |
|
"step": 207 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.045759677886963, |
|
"epoch": 1.5597014925373134, |
|
"grad_norm": 0.7100464105606079, |
|
"learning_rate": 1.4529628897244212e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 52938273.0, |
|
"reward": 1.6248737573623657, |
|
"reward_std": 0.012628240511414646, |
|
"rewards/accuracy_reward": 0.31250000838190317, |
|
"rewards/cosine_scaled_reward": 0.31237365305423737, |
|
"rewards/format_reward": 1.0, |
|
"step": 208 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.343751192092896, |
|
"epoch": 1.5671641791044775, |
|
"grad_norm": 1.3848381042480469, |
|
"learning_rate": 1.4073280014052074e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 53086589.0, |
|
"reward": 1.736496239900589, |
|
"reward_std": 0.02916058116019471, |
|
"rewards/accuracy_reward": 0.36830357648432255, |
|
"rewards/cosine_scaled_reward": 0.36819261126220226, |
|
"rewards/format_reward": 1.0, |
|
"step": 209 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.465402841567993, |
|
"epoch": 1.5746268656716418, |
|
"grad_norm": 2.789355993270874, |
|
"learning_rate": 1.3623036010124845e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 53244486.0, |
|
"reward": 1.7097086608409882, |
|
"reward_std": 0.08943129004910588, |
|
"rewards/accuracy_reward": 0.3549107173457742, |
|
"rewards/cosine_scaled_reward": 0.35479787550866604, |
|
"rewards/format_reward": 1.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.30022406578064, |
|
"epoch": 1.582089552238806, |
|
"grad_norm": 2.885791778564453, |
|
"learning_rate": 1.3178973393568056e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 53394467.0, |
|
"reward": 1.6762287318706512, |
|
"reward_std": 0.08162061781850127, |
|
"rewards/accuracy_reward": 0.33816964738070965, |
|
"rewards/cosine_scaled_reward": 0.3380589783191681, |
|
"rewards/format_reward": 1.0, |
|
"step": 211 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.32366132736206, |
|
"epoch": 1.5895522388059702, |
|
"grad_norm": 2.8630964756011963, |
|
"learning_rate": 1.2741167622109555e-07, |
|
"loss": 0.0, |
|
"num_tokens": 53546981.0, |
|
"reward": 1.6427462249994278, |
|
"reward_std": 0.058320232714407894, |
|
"rewards/accuracy_reward": 0.3214285708963871, |
|
"rewards/cosine_scaled_reward": 0.3213176503777504, |
|
"rewards/format_reward": 1.0, |
|
"step": 212 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.03236675262451, |
|
"epoch": 1.5970149253731343, |
|
"grad_norm": 2.2839713096618652, |
|
"learning_rate": 1.230969309027739e-07, |
|
"loss": -0.001, |
|
"num_tokens": 53698458.0, |
|
"reward": 1.6181965470314026, |
|
"reward_std": 0.07244142005220056, |
|
"rewards/accuracy_reward": 0.30915178707800806, |
|
"rewards/cosine_scaled_reward": 0.3090446996502578, |
|
"rewards/format_reward": 1.0, |
|
"step": 213 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.234375953674316, |
|
"epoch": 1.6044776119402986, |
|
"grad_norm": 2.546302556991577, |
|
"learning_rate": 1.1884623116758119e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 53849116.0, |
|
"reward": 1.651675522327423, |
|
"reward_std": 0.0710948963102993, |
|
"rewards/accuracy_reward": 0.32589285634458065, |
|
"rewards/cosine_scaled_reward": 0.3257826119661331, |
|
"rewards/format_reward": 1.0, |
|
"step": 214 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.366072177886963, |
|
"epoch": 1.6119402985074627, |
|
"grad_norm": 2.6176400184631348, |
|
"learning_rate": 1.1466029931938181e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 53999708.0, |
|
"reward": 1.6025667041540146, |
|
"reward_std": 0.055539907814715406, |
|
"rewards/accuracy_reward": 0.30133928917348385, |
|
"rewards/cosine_scaled_reward": 0.30122734420001507, |
|
"rewards/format_reward": 1.0, |
|
"step": 215 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.61830449104309, |
|
"epoch": 1.6194029850746268, |
|
"grad_norm": 2.8905839920043945, |
|
"learning_rate": 1.1053984665630023e-07, |
|
"loss": 0.0013, |
|
"num_tokens": 54147318.0, |
|
"reward": 1.6003320217132568, |
|
"reward_std": 0.12910877341846927, |
|
"rewards/accuracy_reward": 0.30022321455180645, |
|
"rewards/cosine_scaled_reward": 0.3001087475568056, |
|
"rewards/format_reward": 1.0, |
|
"step": 216 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.37834930419922, |
|
"epoch": 1.626865671641791, |
|
"grad_norm": 2.013456106185913, |
|
"learning_rate": 1.0648557334985308e-07, |
|
"loss": 0.001, |
|
"num_tokens": 54315337.0, |
|
"reward": 1.3972100466489792, |
|
"reward_std": 0.08131316915778086, |
|
"rewards/accuracy_reward": 0.19866071827709675, |
|
"rewards/cosine_scaled_reward": 0.1985492706298828, |
|
"rewards/format_reward": 1.0, |
|
"step": 217 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.402902841567993, |
|
"epoch": 1.6343283582089554, |
|
"grad_norm": 2.0653622150421143, |
|
"learning_rate": 1.024981683259723e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 54458594.0, |
|
"reward": 1.7476565390825272, |
|
"reward_std": 0.05102954798443449, |
|
"rewards/accuracy_reward": 0.3738839291036129, |
|
"rewards/cosine_scaled_reward": 0.37377250753343105, |
|
"rewards/format_reward": 1.0, |
|
"step": 218 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.71651840209961, |
|
"epoch": 1.6417910447761193, |
|
"grad_norm": 1.5992612838745117, |
|
"learning_rate": 9.857830914793824e-08, |
|
"loss": 0.0009, |
|
"num_tokens": 54607180.0, |
|
"reward": 1.7409540712833405, |
|
"reward_std": 0.04907748210338525, |
|
"rewards/accuracy_reward": 0.37053571827709675, |
|
"rewards/cosine_scaled_reward": 0.37041825242340565, |
|
"rewards/format_reward": 1.0, |
|
"step": 219 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.84709906578064, |
|
"epoch": 1.6492537313432836, |
|
"grad_norm": 1.980920672416687, |
|
"learning_rate": 9.472666190124456e-08, |
|
"loss": 0.0006, |
|
"num_tokens": 54749371.0, |
|
"reward": 1.746531069278717, |
|
"reward_std": 0.06538078441796102, |
|
"rewards/accuracy_reward": 0.37388392724096775, |
|
"rewards/cosine_scaled_reward": 0.3737631347030401, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 220 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.19642925262451, |
|
"epoch": 1.6567164179104479, |
|
"grad_norm": 2.4586503505706787, |
|
"learning_rate": 9.094388108041301e-08, |
|
"loss": -0.0002, |
|
"num_tokens": 54894707.0, |
|
"reward": 1.6717657148838043, |
|
"reward_std": 0.07176896455235493, |
|
"rewards/accuracy_reward": 0.3359375037252903, |
|
"rewards/cosine_scaled_reward": 0.3358281459659338, |
|
"rewards/format_reward": 1.0, |
|
"step": 221 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 19.99776864051819, |
|
"epoch": 1.664179104477612, |
|
"grad_norm": 1.5232560634613037, |
|
"learning_rate": 8.723060947777777e-08, |
|
"loss": 0.001, |
|
"num_tokens": 55046777.0, |
|
"reward": 1.4195362627506256, |
|
"reward_std": 0.05862198262564533, |
|
"rewards/accuracy_reward": 0.2098214307334274, |
|
"rewards/cosine_scaled_reward": 0.2097147584427148, |
|
"rewards/format_reward": 1.0, |
|
"step": 222 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.31808066368103, |
|
"epoch": 1.671641791044776, |
|
"grad_norm": 1.8125054836273193, |
|
"learning_rate": 8.358747807425826e-08, |
|
"loss": 0.0004, |
|
"num_tokens": 55197894.0, |
|
"reward": 1.6070322841405869, |
|
"reward_std": 0.06688734842464328, |
|
"rewards/accuracy_reward": 0.30357143096625805, |
|
"rewards/cosine_scaled_reward": 0.3034607693552971, |
|
"rewards/format_reward": 1.0, |
|
"step": 223 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.676340341567993, |
|
"epoch": 1.6791044776119404, |
|
"grad_norm": 2.117981195449829, |
|
"learning_rate": 8.001510593213945e-08, |
|
"loss": 0.0009, |
|
"num_tokens": 55361956.0, |
|
"reward": 1.5824733972549438, |
|
"reward_std": 0.061247594597631405, |
|
"rewards/accuracy_reward": 0.2912946464493871, |
|
"rewards/cosine_scaled_reward": 0.2911787135526538, |
|
"rewards/format_reward": 1.0, |
|
"step": 224 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.402902603149414, |
|
"epoch": 1.6865671641791045, |
|
"grad_norm": 2.322235584259033, |
|
"learning_rate": 7.651410008987697e-08, |
|
"loss": -0.0005, |
|
"num_tokens": 55534853.0, |
|
"reward": 1.5400669574737549, |
|
"reward_std": 0.07680402030835864, |
|
"rewards/accuracy_reward": 0.2700892873108387, |
|
"rewards/cosine_scaled_reward": 0.26997758261859417, |
|
"rewards/format_reward": 1.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.39732265472412, |
|
"epoch": 1.6940298507462686, |
|
"grad_norm": 2.2915542125701904, |
|
"learning_rate": 7.308505545894566e-08, |
|
"loss": 0.0015, |
|
"num_tokens": 55689753.0, |
|
"reward": 1.5065849125385284, |
|
"reward_std": 0.05005305098056567, |
|
"rewards/accuracy_reward": 0.2533482164144516, |
|
"rewards/cosine_scaled_reward": 0.25323665514588356, |
|
"rewards/format_reward": 1.0, |
|
"step": 226 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.694197177886963, |
|
"epoch": 1.7014925373134329, |
|
"grad_norm": 0.9342123866081238, |
|
"learning_rate": 6.972855472274852e-08, |
|
"loss": 0.0018, |
|
"num_tokens": 55839255.0, |
|
"reward": 1.5266692787408829, |
|
"reward_std": 0.020895601193345215, |
|
"rewards/accuracy_reward": 0.2633928619325161, |
|
"rewards/cosine_scaled_reward": 0.2632763609290123, |
|
"rewards/format_reward": 1.0, |
|
"step": 227 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.92299199104309, |
|
"epoch": 1.7089552238805972, |
|
"grad_norm": 1.9537469148635864, |
|
"learning_rate": 6.644516823760437e-08, |
|
"loss": 0.0, |
|
"num_tokens": 55992818.0, |
|
"reward": 1.6561282128095627, |
|
"reward_std": 0.07515494169327752, |
|
"rewards/accuracy_reward": 0.3281250037252903, |
|
"rewards/cosine_scaled_reward": 0.3280031867325306, |
|
"rewards/format_reward": 1.0, |
|
"step": 228 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.45870614051819, |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 2.364659547805786, |
|
"learning_rate": 6.323545393582847e-08, |
|
"loss": 0.0011, |
|
"num_tokens": 56149261.0, |
|
"reward": 1.718636766076088, |
|
"reward_std": 0.12023507321784876, |
|
"rewards/accuracy_reward": 0.35937500186264515, |
|
"rewards/cosine_scaled_reward": 0.35926168598234653, |
|
"rewards/format_reward": 1.0, |
|
"step": 229 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.56584858894348, |
|
"epoch": 1.7238805970149254, |
|
"grad_norm": 3.120504379272461, |
|
"learning_rate": 6.009995723092653e-08, |
|
"loss": -0.0001, |
|
"num_tokens": 56305584.0, |
|
"reward": 1.8458667993545532, |
|
"reward_std": 0.08213974455068751, |
|
"rewards/accuracy_reward": 0.42299107275903225, |
|
"rewards/cosine_scaled_reward": 0.42287569493055344, |
|
"rewards/format_reward": 1.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.895090103149414, |
|
"epoch": 1.7313432835820897, |
|
"grad_norm": 1.5434519052505493, |
|
"learning_rate": 5.703921092491393e-08, |
|
"loss": 0.0003, |
|
"num_tokens": 56468698.0, |
|
"reward": 1.6784496754407883, |
|
"reward_std": 0.029160332879651918, |
|
"rewards/accuracy_reward": 0.3392857192084193, |
|
"rewards/cosine_scaled_reward": 0.3391639143228531, |
|
"rewards/format_reward": 1.0, |
|
"step": 231 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.31584906578064, |
|
"epoch": 1.7388059701492538, |
|
"grad_norm": 2.2976887226104736, |
|
"learning_rate": 5.405373511777939e-08, |
|
"loss": 0.0003, |
|
"num_tokens": 56622173.0, |
|
"reward": 1.6114967614412308, |
|
"reward_std": 0.07515410965470437, |
|
"rewards/accuracy_reward": 0.30580357275903225, |
|
"rewards/cosine_scaled_reward": 0.30569313652813435, |
|
"rewards/format_reward": 1.0, |
|
"step": 232 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.062500715255737, |
|
"epoch": 1.7462686567164178, |
|
"grad_norm": 2.2692880630493164, |
|
"learning_rate": 5.114403711910631e-08, |
|
"loss": 0.0004, |
|
"num_tokens": 56781293.0, |
|
"reward": 1.6472149044275284, |
|
"reward_std": 0.07417689614470646, |
|
"rewards/accuracy_reward": 0.32194368727505207, |
|
"rewards/cosine_scaled_reward": 0.328018419444561, |
|
"rewards/format_reward": 1.0, |
|
"step": 233 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.42745614051819, |
|
"epoch": 1.7537313432835822, |
|
"grad_norm": 1.9118342399597168, |
|
"learning_rate": 4.831061136186787e-08, |
|
"loss": -0.0002, |
|
"num_tokens": 56925372.0, |
|
"reward": 1.6449773013591766, |
|
"reward_std": 0.06771036455336343, |
|
"rewards/accuracy_reward": 0.3225446417927742, |
|
"rewards/cosine_scaled_reward": 0.322432579472661, |
|
"rewards/format_reward": 1.0, |
|
"step": 234 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.910715103149414, |
|
"epoch": 1.7611940298507462, |
|
"grad_norm": 2.7811856269836426, |
|
"learning_rate": 4.5553939318410004e-08, |
|
"loss": 0.0011, |
|
"num_tokens": 57072716.0, |
|
"reward": 1.825772985816002, |
|
"reward_std": 0.09235973202066816, |
|
"rewards/accuracy_reward": 0.4129464291036129, |
|
"rewards/cosine_scaled_reward": 0.4128264728933573, |
|
"rewards/format_reward": 1.0, |
|
"step": 235 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.27678656578064, |
|
"epoch": 1.7686567164179103, |
|
"grad_norm": 1.4767699241638184, |
|
"learning_rate": 4.287448941863692e-08, |
|
"loss": 0.0006, |
|
"num_tokens": 57219380.0, |
|
"reward": 1.6014523804187775, |
|
"reward_std": 0.04930526966539617, |
|
"rewards/accuracy_reward": 0.30133928917348385, |
|
"rewards/cosine_scaled_reward": 0.30122908018529415, |
|
"rewards/format_reward": 0.9988839253783226, |
|
"step": 236 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.325893878936768, |
|
"epoch": 1.7761194029850746, |
|
"grad_norm": 3.0593392848968506, |
|
"learning_rate": 4.0272716970412516e-08, |
|
"loss": -0.0001, |
|
"num_tokens": 57371640.0, |
|
"reward": 1.5713180601596832, |
|
"reward_std": 0.124595548491925, |
|
"rewards/accuracy_reward": 0.2857142901048064, |
|
"rewards/cosine_scaled_reward": 0.28560364712029696, |
|
"rewards/format_reward": 1.0, |
|
"step": 237 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.405134677886963, |
|
"epoch": 1.783582089552239, |
|
"grad_norm": 2.0308165550231934, |
|
"learning_rate": 3.774906408219197e-08, |
|
"loss": 0.0018, |
|
"num_tokens": 57518011.0, |
|
"reward": 1.6494415253400803, |
|
"reward_std": 0.04959785374813919, |
|
"rewards/accuracy_reward": 0.3247767873108387, |
|
"rewards/cosine_scaled_reward": 0.32466465793550014, |
|
"rewards/format_reward": 1.0, |
|
"step": 238 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.67745614051819, |
|
"epoch": 1.7910447761194028, |
|
"grad_norm": 1.7336244583129883, |
|
"learning_rate": 3.5303959587895896e-08, |
|
"loss": 0.0034, |
|
"num_tokens": 57677018.0, |
|
"reward": 1.591400220990181, |
|
"reward_std": 0.03742815442538472, |
|
"rewards/accuracy_reward": 0.29575893096625805, |
|
"rewards/cosine_scaled_reward": 0.2956412099301815, |
|
"rewards/format_reward": 1.0, |
|
"step": 239 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.30803656578064, |
|
"epoch": 1.7985074626865671, |
|
"grad_norm": 2.150383234024048, |
|
"learning_rate": 3.293781897404063e-08, |
|
"loss": -0.001, |
|
"num_tokens": 57828678.0, |
|
"reward": 1.6829252541065216, |
|
"reward_std": 0.06816608617647546, |
|
"rewards/accuracy_reward": 0.34151785634458065, |
|
"rewards/cosine_scaled_reward": 0.34140734374523163, |
|
"rewards/format_reward": 1.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.556920528411865, |
|
"epoch": 1.8059701492537314, |
|
"grad_norm": 1.195513367652893, |
|
"learning_rate": 3.065104430913601e-08, |
|
"loss": 0.0005, |
|
"num_tokens": 57977305.0, |
|
"reward": 1.6561355143785477, |
|
"reward_std": 0.041787590547230025, |
|
"rewards/accuracy_reward": 0.3281250074505806, |
|
"rewards/cosine_scaled_reward": 0.328010406345129, |
|
"rewards/format_reward": 1.0, |
|
"step": 241 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.666295289993286, |
|
"epoch": 1.8134328358208955, |
|
"grad_norm": 2.063978672027588, |
|
"learning_rate": 2.8444024175363733e-08, |
|
"loss": 0.0002, |
|
"num_tokens": 58129758.0, |
|
"reward": 1.6315756142139435, |
|
"reward_std": 0.07628487978815457, |
|
"rewards/accuracy_reward": 0.31584821455180645, |
|
"rewards/cosine_scaled_reward": 0.31572734005749226, |
|
"rewards/format_reward": 1.0, |
|
"step": 242 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.343751192092896, |
|
"epoch": 1.8208955223880596, |
|
"grad_norm": 2.139723777770996, |
|
"learning_rate": 2.6317133602547335e-08, |
|
"loss": -0.0, |
|
"num_tokens": 58286354.0, |
|
"reward": 1.6070320904254913, |
|
"reward_std": 0.06395891746558391, |
|
"rewards/accuracy_reward": 0.3035714291036129, |
|
"rewards/cosine_scaled_reward": 0.30346059799194336, |
|
"rewards/format_reward": 1.0, |
|
"step": 243 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.492188453674316, |
|
"epoch": 1.828358208955224, |
|
"grad_norm": 1.4515061378479004, |
|
"learning_rate": 2.4270734004424643e-08, |
|
"loss": 0.0004, |
|
"num_tokens": 58436883.0, |
|
"reward": 1.624885842204094, |
|
"reward_std": 0.05441444956320396, |
|
"rewards/accuracy_reward": 0.3125000074505806, |
|
"rewards/cosine_scaled_reward": 0.3123858105391264, |
|
"rewards/format_reward": 1.0, |
|
"step": 244 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.328125953674316, |
|
"epoch": 1.835820895522388, |
|
"grad_norm": 1.8083621263504028, |
|
"learning_rate": 2.2305173117234233e-08, |
|
"loss": -0.0003, |
|
"num_tokens": 58590025.0, |
|
"reward": 1.6539071798324585, |
|
"reward_std": 0.06463310816476309, |
|
"rewards/accuracy_reward": 0.32700893096625805, |
|
"rewards/cosine_scaled_reward": 0.3268981762230396, |
|
"rewards/format_reward": 1.0, |
|
"step": 245 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.55580449104309, |
|
"epoch": 1.8432835820895521, |
|
"grad_norm": 2.2442469596862793, |
|
"learning_rate": 2.0420784940626156e-08, |
|
"loss": 0.0013, |
|
"num_tokens": 58742411.0, |
|
"reward": 1.714171290397644, |
|
"reward_std": 0.08214285858039716, |
|
"rewards/accuracy_reward": 0.3571428544819355, |
|
"rewards/cosine_scaled_reward": 0.35702834837138653, |
|
"rewards/format_reward": 1.0, |
|
"step": 246 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.25111675262451, |
|
"epoch": 1.8507462686567164, |
|
"grad_norm": 2.635986804962158, |
|
"learning_rate": 1.861788968090683e-08, |
|
"loss": -0.0002, |
|
"num_tokens": 58885124.0, |
|
"reward": 1.6583720594644547, |
|
"reward_std": 0.09363627548930253, |
|
"rewards/accuracy_reward": 0.32924107275903225, |
|
"rewards/cosine_scaled_reward": 0.32913094013929367, |
|
"rewards/format_reward": 1.0, |
|
"step": 247 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.059152364730835, |
|
"epoch": 1.8582089552238807, |
|
"grad_norm": 2.7098944187164307, |
|
"learning_rate": 1.68967936966275e-08, |
|
"loss": 0.0005, |
|
"num_tokens": 59042857.0, |
|
"reward": 1.647214189171791, |
|
"reward_std": 0.0797318636930413, |
|
"rewards/accuracy_reward": 0.32366071455180645, |
|
"rewards/cosine_scaled_reward": 0.3235534243285656, |
|
"rewards/format_reward": 1.0, |
|
"step": 248 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.43861699104309, |
|
"epoch": 1.8656716417910446, |
|
"grad_norm": 2.948129177093506, |
|
"learning_rate": 1.525778944652617e-08, |
|
"loss": -0.0017, |
|
"num_tokens": 59189866.0, |
|
"reward": 1.6047987192869186, |
|
"reward_std": 0.09995094314217567, |
|
"rewards/accuracy_reward": 0.30245535261929035, |
|
"rewards/cosine_scaled_reward": 0.30234329774975777, |
|
"rewards/format_reward": 1.0, |
|
"step": 249 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.51116156578064, |
|
"epoch": 1.873134328358209, |
|
"grad_norm": 1.8753550052642822, |
|
"learning_rate": 1.3701155439831248e-08, |
|
"loss": 0.0003, |
|
"num_tokens": 59351876.0, |
|
"reward": 1.4842608720064163, |
|
"reward_std": 0.05374052021700493, |
|
"rewards/accuracy_reward": 0.24218750186264515, |
|
"rewards/cosine_scaled_reward": 0.2420733030885458, |
|
"rewards/format_reward": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 19.960938692092896, |
|
"epoch": 1.8805970149253732, |
|
"grad_norm": 3.710313320159912, |
|
"learning_rate": 1.222715618893555e-08, |
|
"loss": -0.0011, |
|
"num_tokens": 59501249.0, |
|
"reward": 1.6673045605421066, |
|
"reward_std": 0.11986687686294317, |
|
"rewards/accuracy_reward": 0.33370536379516125, |
|
"rewards/cosine_scaled_reward": 0.3335991408675909, |
|
"rewards/format_reward": 1.0, |
|
"step": 251 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.74553632736206, |
|
"epoch": 1.8880597014925373, |
|
"grad_norm": 2.096618890762329, |
|
"learning_rate": 1.0836042164448944e-08, |
|
"loss": 0.0005, |
|
"num_tokens": 59655845.0, |
|
"reward": 1.5623822510242462, |
|
"reward_std": 0.06011815097401296, |
|
"rewards/accuracy_reward": 0.2812500037252903, |
|
"rewards/cosine_scaled_reward": 0.28113218024373055, |
|
"rewards/format_reward": 1.0, |
|
"step": 252 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.500000953674316, |
|
"epoch": 1.8955223880597014, |
|
"grad_norm": 2.3821330070495605, |
|
"learning_rate": 9.528049752636714e-09, |
|
"loss": 0.0004, |
|
"num_tokens": 59810181.0, |
|
"reward": 1.747654750943184, |
|
"reward_std": 0.0755647381696889, |
|
"rewards/accuracy_reward": 0.37611607275903225, |
|
"rewards/cosine_scaled_reward": 0.37600288540124893, |
|
"rewards/format_reward": 0.9955357164144516, |
|
"step": 253 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.908483266830444, |
|
"epoch": 1.9029850746268657, |
|
"grad_norm": 1.859230875968933, |
|
"learning_rate": 8.303401215251581e-09, |
|
"loss": 0.0002, |
|
"num_tokens": 59954931.0, |
|
"reward": 1.600327655673027, |
|
"reward_std": 0.03937964968498875, |
|
"rewards/accuracy_reward": 0.3002232201397419, |
|
"rewards/cosine_scaled_reward": 0.3001043573021889, |
|
"rewards/format_reward": 1.0, |
|
"step": 254 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.248884677886963, |
|
"epoch": 1.9104477611940298, |
|
"grad_norm": 2.67840313911438, |
|
"learning_rate": 7.1623046517656495e-09, |
|
"loss": 0.0004, |
|
"num_tokens": 60097050.0, |
|
"reward": 1.758818194270134, |
|
"reward_std": 0.06981627906458954, |
|
"rewards/accuracy_reward": 0.3794642873108387, |
|
"rewards/cosine_scaled_reward": 0.37935382314026356, |
|
"rewards/format_reward": 1.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.142858266830444, |
|
"epoch": 1.917910447761194, |
|
"grad_norm": 2.1021740436553955, |
|
"learning_rate": 6.104953964008897e-09, |
|
"loss": 0.0007, |
|
"num_tokens": 60255522.0, |
|
"reward": 1.5333734452724457, |
|
"reward_std": 0.08875712241180622, |
|
"rewards/accuracy_reward": 0.26674107648432255, |
|
"rewards/cosine_scaled_reward": 0.26663233898580074, |
|
"rewards/format_reward": 1.0, |
|
"step": 256 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.85267949104309, |
|
"epoch": 1.9253731343283582, |
|
"grad_norm": 2.7934389114379883, |
|
"learning_rate": 5.131528823220099e-09, |
|
"loss": -0.0019, |
|
"num_tokens": 60410742.0, |
|
"reward": 1.670636236667633, |
|
"reward_std": 0.08307532503371817, |
|
"rewards/accuracy_reward": 0.3348214328289032, |
|
"rewards/cosine_scaled_reward": 0.3358147069811821, |
|
"rewards/format_reward": 1.0, |
|
"step": 257 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.453125953674316, |
|
"epoch": 1.9328358208955225, |
|
"grad_norm": 2.496358871459961, |
|
"learning_rate": 4.242194639516416e-09, |
|
"loss": 0.0009, |
|
"num_tokens": 60571516.0, |
|
"reward": 1.6360480785369873, |
|
"reward_std": 0.07417896673651114, |
|
"rewards/accuracy_reward": 0.31808036006987095, |
|
"rewards/cosine_scaled_reward": 0.3179676216095686, |
|
"rewards/format_reward": 1.0, |
|
"step": 258 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.14955425262451, |
|
"epoch": 1.9402985074626866, |
|
"grad_norm": 1.2594853639602661, |
|
"learning_rate": 3.4371025337855407e-09, |
|
"loss": 0.0003, |
|
"num_tokens": 60722354.0, |
|
"reward": 1.586945116519928, |
|
"reward_std": 0.03156871721012067, |
|
"rewards/accuracy_reward": 0.29352678975556046, |
|
"rewards/cosine_scaled_reward": 0.29341828147880733, |
|
"rewards/format_reward": 1.0, |
|
"step": 259 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.205358266830444, |
|
"epoch": 1.9477611940298507, |
|
"grad_norm": 1.8333399295806885, |
|
"learning_rate": 2.7163893120066285e-09, |
|
"loss": 0.0005, |
|
"num_tokens": 60871290.0, |
|
"reward": 1.5088191330432892, |
|
"reward_std": 0.052982652708983835, |
|
"rewards/accuracy_reward": 0.2544642798602581, |
|
"rewards/cosine_scaled_reward": 0.25435481034219265, |
|
"rewards/format_reward": 1.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.30803656578064, |
|
"epoch": 1.955223880597015, |
|
"grad_norm": 2.2571942806243896, |
|
"learning_rate": 2.080177442003117e-09, |
|
"loss": -0.0004, |
|
"num_tokens": 61015958.0, |
|
"reward": 1.6137285828590393, |
|
"reward_std": 0.1302357604727149, |
|
"rewards/accuracy_reward": 0.3069196389988065, |
|
"rewards/cosine_scaled_reward": 0.30680886935442686, |
|
"rewards/format_reward": 1.0, |
|
"step": 261 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.48214340209961, |
|
"epoch": 1.962686567164179, |
|
"grad_norm": 1.8233392238616943, |
|
"learning_rate": 1.5285750326325953e-09, |
|
"loss": 0.0001, |
|
"num_tokens": 61160438.0, |
|
"reward": 1.6360481083393097, |
|
"reward_std": 0.04794773051276735, |
|
"rewards/accuracy_reward": 0.31808035634458065, |
|
"rewards/cosine_scaled_reward": 0.31796768493950367, |
|
"rewards/format_reward": 1.0, |
|
"step": 262 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 21.101563215255737, |
|
"epoch": 1.9701492537313432, |
|
"grad_norm": 1.5878843069076538, |
|
"learning_rate": 1.0616758154161631e-09, |
|
"loss": 0.0006, |
|
"num_tokens": 61316161.0, |
|
"reward": 1.6092515885829926, |
|
"reward_std": 0.0644803009436572, |
|
"rewards/accuracy_reward": 0.30468749441206455, |
|
"rewards/cosine_scaled_reward": 0.30456401966512203, |
|
"rewards/format_reward": 1.0, |
|
"step": 263 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.38839364051819, |
|
"epoch": 1.9776119402985075, |
|
"grad_norm": 2.332639455795288, |
|
"learning_rate": 6.795591286109514e-10, |
|
"loss": -0.0004, |
|
"num_tokens": 61460901.0, |
|
"reward": 1.490959793329239, |
|
"reward_std": 0.07921304133695628, |
|
"rewards/accuracy_reward": 0.24553572060540318, |
|
"rewards/cosine_scaled_reward": 0.24542399495840073, |
|
"rewards/format_reward": 1.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.500001192092896, |
|
"epoch": 1.9850746268656716, |
|
"grad_norm": 1.9155348539352417, |
|
"learning_rate": 3.8228990372862756e-10, |
|
"loss": 0.0008, |
|
"num_tokens": 61616757.0, |
|
"reward": 1.624886617064476, |
|
"reward_std": 0.07289814859302624, |
|
"rewards/accuracy_reward": 0.3125, |
|
"rewards/cosine_scaled_reward": 0.31238655652850866, |
|
"rewards/format_reward": 1.0, |
|
"step": 265 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 20.14851450920105, |
|
"epoch": 1.9925373134328357, |
|
"grad_norm": 2.2550103664398193, |
|
"learning_rate": 1.6991865450188825e-10, |
|
"loss": 0.0002, |
|
"num_tokens": 61771838.0, |
|
"reward": 1.6784630566835403, |
|
"reward_std": 0.06463183751365165, |
|
"rewards/accuracy_reward": 0.3392857164144516, |
|
"rewards/cosine_scaled_reward": 0.3391772899776697, |
|
"rewards/format_reward": 1.0, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.9925373134328357, |
|
"step": 266, |
|
"total_flos": 0.0, |
|
"train_loss": 0.032786881700187384, |
|
"train_runtime": 16819.5455, |
|
"train_samples_per_second": 1.783, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 268, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|