|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9962034927866363, |
|
"eval_steps": 2000000, |
|
"global_step": 164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 803.0022583007812, |
|
"epoch": 0.006074411541381929, |
|
"grad_norm": 0.14656181705800808, |
|
"learning_rate": 5.88235294117647e-08, |
|
"loss": 0.0479, |
|
"num_tokens": 918402.0, |
|
"reward": 0.9815848618745804, |
|
"reward_std": 0.23756355978548527, |
|
"rewards/accuracy_reward": 0.4933035746216774, |
|
"rewards/format_reward": 0.9765624925494194, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 732.8228569030762, |
|
"epoch": 0.030372057706909643, |
|
"grad_norm": 0.1360498469331386, |
|
"learning_rate": 2.941176470588235e-07, |
|
"loss": 0.0275, |
|
"num_tokens": 4253375.0, |
|
"reward": 1.0891462452709675, |
|
"reward_std": 0.2294587183278054, |
|
"rewards/accuracy_reward": 0.5954241026192904, |
|
"rewards/format_reward": 0.9874441903084517, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 733.9051712036132, |
|
"epoch": 0.060744115413819286, |
|
"grad_norm": 2.020390330382682, |
|
"learning_rate": 5.88235294117647e-07, |
|
"loss": 0.0394, |
|
"num_tokens": 8424966.0, |
|
"reward": 1.074776826798916, |
|
"reward_std": 0.2325986440293491, |
|
"rewards/accuracy_reward": 0.5812499992549419, |
|
"rewards/format_reward": 0.9870535641908645, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 730.850700378418, |
|
"epoch": 0.09111617312072894, |
|
"grad_norm": 0.6485429367394874, |
|
"learning_rate": 8.823529411764705e-07, |
|
"loss": 0.0319, |
|
"num_tokens": 12651361.0, |
|
"reward": 1.068080399930477, |
|
"reward_std": 0.22116196975111962, |
|
"rewards/accuracy_reward": 0.5736607149243355, |
|
"rewards/format_reward": 0.988839277625084, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 749.2339599609375, |
|
"epoch": 0.12148823082763857, |
|
"grad_norm": 0.15121241250002537, |
|
"learning_rate": 9.989726963751682e-07, |
|
"loss": 0.0412, |
|
"num_tokens": 16924113.0, |
|
"reward": 1.0832589775323869, |
|
"reward_std": 0.24419224373996257, |
|
"rewards/accuracy_reward": 0.590625, |
|
"rewards/format_reward": 0.9852678462862968, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 733.0969100952149, |
|
"epoch": 0.15186028853454822, |
|
"grad_norm": 0.1625406073177716, |
|
"learning_rate": 9.927100106776212e-07, |
|
"loss": 0.0385, |
|
"num_tokens": 21115051.0, |
|
"reward": 1.1054687976837159, |
|
"reward_std": 0.22408477105200292, |
|
"rewards/accuracy_reward": 0.6136160686612129, |
|
"rewards/format_reward": 0.9837053477764129, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 720.6406631469727, |
|
"epoch": 0.18223234624145787, |
|
"grad_norm": 2.666467566757727, |
|
"learning_rate": 9.808267184205181e-07, |
|
"loss": 0.0239, |
|
"num_tokens": 25258505.0, |
|
"reward": 1.0918527334928512, |
|
"reward_std": 0.22341929338872432, |
|
"rewards/accuracy_reward": 0.5984375, |
|
"rewards/format_reward": 0.9868303462862968, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 701.581282043457, |
|
"epoch": 0.2126044039483675, |
|
"grad_norm": 0.2580879090307691, |
|
"learning_rate": 9.634583786730108e-07, |
|
"loss": 0.0261, |
|
"num_tokens": 29330221.0, |
|
"reward": 1.1060268327593803, |
|
"reward_std": 0.21581739820539952, |
|
"rewards/accuracy_reward": 0.613616070151329, |
|
"rewards/format_reward": 0.984821417927742, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 707.9558319091797, |
|
"epoch": 0.24297646165527714, |
|
"grad_norm": 0.8886692354776214, |
|
"learning_rate": 9.408031213740044e-07, |
|
"loss": 0.0272, |
|
"num_tokens": 33442287.0, |
|
"reward": 1.0679688140749932, |
|
"reward_std": 0.21454464346170427, |
|
"rewards/accuracy_reward": 0.5720982141792774, |
|
"rewards/format_reward": 0.9917410612106323, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 699.1395431518555, |
|
"epoch": 0.2733485193621868, |
|
"grad_norm": 0.1839147765074166, |
|
"learning_rate": 9.131193871579974e-07, |
|
"loss": 0.0273, |
|
"num_tokens": 37510936.0, |
|
"reward": 1.0806920111179352, |
|
"reward_std": 0.20978465098887683, |
|
"rewards/accuracy_reward": 0.5868303567171097, |
|
"rewards/format_reward": 0.987723208963871, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 688.0076187133789, |
|
"epoch": 0.30372057706909644, |
|
"grad_norm": 2.39827459380332, |
|
"learning_rate": 8.807229791845671e-07, |
|
"loss": 0.0254, |
|
"num_tokens": 41532546.0, |
|
"reward": 1.0955357611179353, |
|
"reward_std": 0.2291064240038395, |
|
"rewards/accuracy_reward": 0.6026785708963871, |
|
"rewards/format_reward": 0.9857142746448517, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 705.5748062133789, |
|
"epoch": 0.3340926347760061, |
|
"grad_norm": 6.336128416350602, |
|
"learning_rate": 8.439834606028593e-07, |
|
"loss": 0.0312, |
|
"num_tokens": 45638753.0, |
|
"reward": 1.0700893342494964, |
|
"reward_std": 0.22525846362113952, |
|
"rewards/accuracy_reward": 0.5787946447730065, |
|
"rewards/format_reward": 0.9825892791152, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 699.0966857910156, |
|
"epoch": 0.36446469248291574, |
|
"grad_norm": 2.8773101008786597, |
|
"learning_rate": 8.033199387471276e-07, |
|
"loss": 0.0257, |
|
"num_tokens": 49682178.0, |
|
"reward": 1.0860491678118707, |
|
"reward_std": 0.19614009652286768, |
|
"rewards/accuracy_reward": 0.5910714276134967, |
|
"rewards/format_reward": 0.9899553537368775, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 676.4727966308594, |
|
"epoch": 0.39483675018982534, |
|
"grad_norm": 0.1564093879360907, |
|
"learning_rate": 7.591962841552626e-07, |
|
"loss": 0.0148, |
|
"num_tokens": 53632632.0, |
|
"reward": 1.104575951397419, |
|
"reward_std": 0.1936511306092143, |
|
"rewards/accuracy_reward": 0.6069196410477161, |
|
"rewards/format_reward": 0.9953124940395355, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 693.0926666259766, |
|
"epoch": 0.425208807896735, |
|
"grad_norm": 0.21433648094895766, |
|
"learning_rate": 7.121158389495185e-07, |
|
"loss": 0.0179, |
|
"num_tokens": 57653679.0, |
|
"reward": 1.096875049173832, |
|
"reward_std": 0.19877330139279364, |
|
"rewards/accuracy_reward": 0.6008928552269935, |
|
"rewards/format_reward": 0.9919642791152, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 677.7578414916992, |
|
"epoch": 0.45558086560364464, |
|
"grad_norm": 0.13423679492808266, |
|
"learning_rate": 6.626156749437736e-07, |
|
"loss": 0.023, |
|
"num_tokens": 61583418.0, |
|
"reward": 1.0986607670783997, |
|
"reward_std": 0.19030132256448268, |
|
"rewards/accuracy_reward": 0.6033482141792774, |
|
"rewards/format_reward": 0.9906249925494194, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 677.3216796875, |
|
"epoch": 0.4859529233105543, |
|
"grad_norm": 0.12374303041671109, |
|
"learning_rate": 6.112604669781572e-07, |
|
"loss": 0.0139, |
|
"num_tokens": 65523531.0, |
|
"reward": 1.0838170185685159, |
|
"reward_std": 0.20196059457957743, |
|
"rewards/accuracy_reward": 0.5859375014901161, |
|
"rewards/format_reward": 0.9957589223980904, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 661.0935562133789, |
|
"epoch": 0.5163249810174639, |
|
"grad_norm": 0.24633881049994122, |
|
"learning_rate": 5.586360513712009e-07, |
|
"loss": 0.0146, |
|
"num_tokens": 69412862.0, |
|
"reward": 1.1289062976837159, |
|
"reward_std": 0.19050537403672935, |
|
"rewards/accuracy_reward": 0.6308035746216774, |
|
"rewards/format_reward": 0.9962053522467613, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 684.6299407958984, |
|
"epoch": 0.5466970387243736, |
|
"grad_norm": 0.20823664365461947, |
|
"learning_rate": 5.053427429716866e-07, |
|
"loss": 0.0183, |
|
"num_tokens": 73405012.0, |
|
"reward": 1.127790230512619, |
|
"reward_std": 0.1836160296574235, |
|
"rewards/accuracy_reward": 0.6328468464314938, |
|
"rewards/format_reward": 0.9921874940395355, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 679.512525177002, |
|
"epoch": 0.5770690964312832, |
|
"grad_norm": 0.17725373837889702, |
|
"learning_rate": 4.519884870461591e-07, |
|
"loss": 0.013, |
|
"num_tokens": 77357644.0, |
|
"reward": 1.1372768431901932, |
|
"reward_std": 0.1831109957769513, |
|
"rewards/accuracy_reward": 0.6390624985098838, |
|
"rewards/format_reward": 0.9964285656809807, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 691.0065063476562, |
|
"epoch": 0.6074411541381929, |
|
"grad_norm": 0.18903103595690637, |
|
"learning_rate": 3.991819241221835e-07, |
|
"loss": 0.015, |
|
"num_tokens": 81377353.0, |
|
"reward": 1.1179687947034835, |
|
"reward_std": 0.18047410659492016, |
|
"rewards/accuracy_reward": 0.6209821425378322, |
|
"rewards/format_reward": 0.9939732044935227, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 678.3850769042969, |
|
"epoch": 0.6378132118451025, |
|
"grad_norm": 0.20779542924679995, |
|
"learning_rate": 3.4752544690038643e-07, |
|
"loss": 0.0112, |
|
"num_tokens": 85350694.0, |
|
"reward": 1.1294643417000771, |
|
"reward_std": 0.18618108071386813, |
|
"rewards/accuracy_reward": 0.6323660708963871, |
|
"rewards/format_reward": 0.9941964238882065, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 688.9138717651367, |
|
"epoch": 0.6681852695520122, |
|
"grad_norm": 0.2676944642662668, |
|
"learning_rate": 2.976083284388031e-07, |
|
"loss": 0.0163, |
|
"num_tokens": 89382916.0, |
|
"reward": 1.101339329779148, |
|
"reward_std": 0.1832605952396989, |
|
"rewards/accuracy_reward": 0.6040178589522839, |
|
"rewards/format_reward": 0.9946428507566452, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 709.0390884399415, |
|
"epoch": 0.6985573272589218, |
|
"grad_norm": 0.1723246791980809, |
|
"learning_rate": 2.500000000000001e-07, |
|
"loss": 0.0124, |
|
"num_tokens": 93498363.0, |
|
"reward": 1.123660759627819, |
|
"reward_std": 0.18700322844088077, |
|
"rewards/accuracy_reward": 0.6258928567171097, |
|
"rewards/format_reward": 0.9955357074737549, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 674.1076217651367, |
|
"epoch": 0.7289293849658315, |
|
"grad_norm": 0.16904917847238007, |
|
"learning_rate": 2.0524355524417015e-07, |
|
"loss": 0.0146, |
|
"num_tokens": 97437677.0, |
|
"reward": 1.131361649930477, |
|
"reward_std": 0.19161452651023864, |
|
"rewards/accuracy_reward": 0.6328125, |
|
"rewards/format_reward": 0.997098208963871, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 682.0279342651368, |
|
"epoch": 0.7593014426727411, |
|
"grad_norm": 0.17946193842059094, |
|
"learning_rate": 1.6384955486934154e-07, |
|
"loss": 0.0112, |
|
"num_tokens": 101433570.0, |
|
"reward": 1.1439732566475869, |
|
"reward_std": 0.1751913372427225, |
|
"rewards/accuracy_reward": 0.6462053626775741, |
|
"rewards/format_reward": 0.9955357074737549, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 695.6339569091797, |
|
"epoch": 0.7896735003796507, |
|
"grad_norm": 0.1905658306829658, |
|
"learning_rate": 1.262902023724824e-07, |
|
"loss": 0.0081, |
|
"num_tokens": 105473282.0, |
|
"reward": 1.1212054073810578, |
|
"reward_std": 0.17985087335109712, |
|
"rewards/accuracy_reward": 0.6225446417927742, |
|
"rewards/format_reward": 0.9973214253783226, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 689.9069519042969, |
|
"epoch": 0.8200455580865603, |
|
"grad_norm": 0.18740313988650945, |
|
"learning_rate": 9.299395737170757e-08, |
|
"loss": 0.0183, |
|
"num_tokens": 109523129.0, |
|
"reward": 1.0936384439468383, |
|
"reward_std": 0.20308975782245398, |
|
"rewards/accuracy_reward": 0.5962053589522839, |
|
"rewards/format_reward": 0.9948660627007484, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 689.1723495483399, |
|
"epoch": 0.85041761579347, |
|
"grad_norm": 0.17643493300658672, |
|
"learning_rate": 6.43406479383053e-08, |
|
"loss": 0.0089, |
|
"num_tokens": 113562189.0, |
|
"reward": 1.1333705842494965, |
|
"reward_std": 0.18020046018064023, |
|
"rewards/accuracy_reward": 0.6352678582072258, |
|
"rewards/format_reward": 0.9962053537368775, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 673.6042694091797, |
|
"epoch": 0.8807896735003796, |
|
"grad_norm": 3.201259451192229, |
|
"learning_rate": 4.065713769482082e-08, |
|
"loss": 0.0138, |
|
"num_tokens": 117482928.0, |
|
"reward": 1.1319196924567223, |
|
"reward_std": 0.1873009530827403, |
|
"rewards/accuracy_reward": 0.6339285716414451, |
|
"rewards/format_reward": 0.9959821388125419, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 687.8794952392578, |
|
"epoch": 0.9111617312072893, |
|
"grad_norm": 2.7680554916216296, |
|
"learning_rate": 2.2213597106929605e-08, |
|
"loss": 0.0152, |
|
"num_tokens": 121487468.0, |
|
"reward": 1.1334821954369545, |
|
"reward_std": 0.1863085398450494, |
|
"rewards/accuracy_reward": 0.634821429848671, |
|
"rewards/format_reward": 0.9973214238882064, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 691.2263702392578, |
|
"epoch": 0.9415337889141989, |
|
"grad_norm": 0.16790447183243948, |
|
"learning_rate": 9.22042150446728e-09, |
|
"loss": 0.0089, |
|
"num_tokens": 125492538.0, |
|
"reward": 1.1498884424567222, |
|
"reward_std": 0.1872717458754778, |
|
"rewards/accuracy_reward": 0.6511160723865033, |
|
"rewards/format_reward": 0.997544638812542, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 689.0906539916992, |
|
"epoch": 0.9719058466211086, |
|
"grad_norm": 0.8754711405794506, |
|
"learning_rate": 1.8258309893965374e-09, |
|
"loss": 0.0103, |
|
"num_tokens": 129525872.0, |
|
"reward": 1.152567020058632, |
|
"reward_std": 0.189371613971889, |
|
"rewards/accuracy_reward": 0.6555803567171097, |
|
"rewards/format_reward": 0.9939732134342194, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 673.1839790344238, |
|
"epoch": 0.9962034927866363, |
|
"num_tokens": 132694223.0, |
|
"reward": 1.1247210260480642, |
|
"reward_std": 0.1711470059817657, |
|
"rewards/accuracy_reward": 0.6278545688837767, |
|
"rewards/format_reward": 0.997209819033742, |
|
"step": 164, |
|
"total_flos": 0.0, |
|
"train_loss": 0.019764071750659043, |
|
"train_runtime": 31261.7596, |
|
"train_samples_per_second": 0.59, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|