|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9962034927866363, |
|
"eval_steps": 2000000, |
|
"global_step": 164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 803.0022583007812, |
|
"epoch": 0.006074411541381929, |
|
"grad_norm": 0.14674668166353436, |
|
"kl": 0.0, |
|
"learning_rate": 5.88235294117647e-08, |
|
"loss": 0.0479, |
|
"num_tokens": 918402.0, |
|
"reward": 0.9815848618745804, |
|
"reward_std": 0.23756355978548527, |
|
"rewards/accuracy_reward": 0.4933035746216774, |
|
"rewards/format_reward": 0.9765624925494194, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 734.3027648925781, |
|
"epoch": 0.030372057706909643, |
|
"grad_norm": 0.13057922454955487, |
|
"kl": 5.84721565246582e-05, |
|
"learning_rate": 2.941176470588235e-07, |
|
"loss": 0.037, |
|
"num_tokens": 4258679.0, |
|
"reward": 1.0901228114962578, |
|
"reward_std": 0.23264290555380285, |
|
"rewards/accuracy_reward": 0.5965401763096452, |
|
"rewards/format_reward": 0.9871651735156775, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 743.0277114868164, |
|
"epoch": 0.060744115413819286, |
|
"grad_norm": 0.1343510023679506, |
|
"kl": 8.401274681091309e-05, |
|
"learning_rate": 5.88235294117647e-07, |
|
"loss": 0.0465, |
|
"num_tokens": 8471139.0, |
|
"reward": 1.062276841700077, |
|
"reward_std": 0.23240854553878307, |
|
"rewards/accuracy_reward": 0.5718749992549419, |
|
"rewards/format_reward": 0.9808035627007484, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 739.1007049560546, |
|
"epoch": 0.09111617312072894, |
|
"grad_norm": 0.738570752534254, |
|
"kl": 0.00017675161361694336, |
|
"learning_rate": 8.823529411764705e-07, |
|
"loss": 0.0358, |
|
"num_tokens": 12734494.0, |
|
"reward": 1.0722098737955092, |
|
"reward_std": 0.2205923892557621, |
|
"rewards/accuracy_reward": 0.5785714268684388, |
|
"rewards/format_reward": 0.9872767761349678, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 745.5227966308594, |
|
"epoch": 0.12148823082763857, |
|
"grad_norm": 0.30158627332435983, |
|
"kl": 0.0026874780654907227, |
|
"learning_rate": 9.989726963751682e-07, |
|
"loss": 0.037, |
|
"num_tokens": 16990620.0, |
|
"reward": 1.0786830827593803, |
|
"reward_std": 0.23385403044521808, |
|
"rewards/accuracy_reward": 0.5861607141792774, |
|
"rewards/format_reward": 0.9850446373224259, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 731.8799407958984, |
|
"epoch": 0.15186028853454822, |
|
"grad_norm": 0.10368791276497527, |
|
"kl": 0.0003843784332275391, |
|
"learning_rate": 9.927100106776212e-07, |
|
"loss": 0.0351, |
|
"num_tokens": 21176106.0, |
|
"reward": 1.106584869325161, |
|
"reward_std": 0.21388941686600446, |
|
"rewards/accuracy_reward": 0.6142857171595096, |
|
"rewards/format_reward": 0.9845982104539871, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 716.5080688476562, |
|
"epoch": 0.18223234624145787, |
|
"grad_norm": 0.24334246130754558, |
|
"kl": 0.0008969306945800781, |
|
"learning_rate": 9.808267184205181e-07, |
|
"loss": 0.0203, |
|
"num_tokens": 25301046.0, |
|
"reward": 1.0974330857396126, |
|
"reward_std": 0.21846173331141472, |
|
"rewards/accuracy_reward": 0.6022321447730065, |
|
"rewards/format_reward": 0.9904017791152, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 718.258511352539, |
|
"epoch": 0.2126044039483675, |
|
"grad_norm": 0.13060657109261103, |
|
"kl": 0.0011320114135742188, |
|
"learning_rate": 9.634583786730108e-07, |
|
"loss": 0.0247, |
|
"num_tokens": 29447476.0, |
|
"reward": 1.116071480512619, |
|
"reward_std": 0.21333869993686677, |
|
"rewards/accuracy_reward": 0.620758930593729, |
|
"rewards/format_reward": 0.9906249955296517, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 734.5969055175781, |
|
"epoch": 0.24297646165527714, |
|
"grad_norm": 0.14790799582315342, |
|
"kl": 0.0016210556030273437, |
|
"learning_rate": 9.408031213740044e-07, |
|
"loss": 0.0307, |
|
"num_tokens": 33678894.0, |
|
"reward": 1.0677455827593803, |
|
"reward_std": 0.2126396529376507, |
|
"rewards/accuracy_reward": 0.5736607156693936, |
|
"rewards/format_reward": 0.9881696373224258, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 724.3696716308593, |
|
"epoch": 0.2733485193621868, |
|
"grad_norm": 0.1235294567362974, |
|
"kl": 0.0030605316162109373, |
|
"learning_rate": 9.131193871579974e-07, |
|
"loss": 0.0288, |
|
"num_tokens": 37860574.0, |
|
"reward": 1.0801339834928512, |
|
"reward_std": 0.22113933004438877, |
|
"rewards/accuracy_reward": 0.5854910694062709, |
|
"rewards/format_reward": 0.9892857074737549, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 719.0857437133789, |
|
"epoch": 0.30372057706909644, |
|
"grad_norm": 0.11260442217805001, |
|
"kl": 0.004908370971679688, |
|
"learning_rate": 8.807229791845671e-07, |
|
"loss": 0.0309, |
|
"num_tokens": 42021414.0, |
|
"reward": 1.1001116633415222, |
|
"reward_std": 0.2082567172124982, |
|
"rewards/accuracy_reward": 0.6053571477532387, |
|
"rewards/format_reward": 0.9895089223980904, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 710.4136505126953, |
|
"epoch": 0.3340926347760061, |
|
"grad_norm": 0.12446746182181773, |
|
"kl": 0.00711669921875, |
|
"learning_rate": 8.439834606028593e-07, |
|
"loss": 0.03, |
|
"num_tokens": 46149299.0, |
|
"reward": 1.1018973752856254, |
|
"reward_std": 0.20198939852416514, |
|
"rewards/accuracy_reward": 0.6075892850756646, |
|
"rewards/format_reward": 0.9886160641908646, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 711.5286056518555, |
|
"epoch": 0.36446469248291574, |
|
"grad_norm": 0.13140372700302483, |
|
"kl": 0.01026763916015625, |
|
"learning_rate": 8.033199387471276e-07, |
|
"loss": 0.0255, |
|
"num_tokens": 50248419.0, |
|
"reward": 1.0939732655882835, |
|
"reward_std": 0.20504674576222898, |
|
"rewards/accuracy_reward": 0.6002232126891613, |
|
"rewards/format_reward": 0.9874999925494194, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 693.0864105224609, |
|
"epoch": 0.39483675018982534, |
|
"grad_norm": 5.84593208956697, |
|
"kl": 0.01532440185546875, |
|
"learning_rate": 7.591962841552626e-07, |
|
"loss": 0.0221, |
|
"num_tokens": 54273302.0, |
|
"reward": 1.102567011117935, |
|
"reward_std": 0.19313923437148334, |
|
"rewards/accuracy_reward": 0.6060267843306064, |
|
"rewards/format_reward": 0.9930803507566452, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 698.3335159301757, |
|
"epoch": 0.425208807896735, |
|
"grad_norm": 0.18797262379226592, |
|
"kl": 0.0115875244140625, |
|
"learning_rate": 7.121158389495185e-07, |
|
"loss": 0.0308, |
|
"num_tokens": 58317828.0, |
|
"reward": 1.0886161223053932, |
|
"reward_std": 0.2050962893292308, |
|
"rewards/accuracy_reward": 0.5941964283585548, |
|
"rewards/format_reward": 0.9888392791152001, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 680.4533798217774, |
|
"epoch": 0.45558086560364464, |
|
"grad_norm": 0.12462934113614446, |
|
"kl": 0.01414947509765625, |
|
"learning_rate": 6.626156749437736e-07, |
|
"loss": 0.0258, |
|
"num_tokens": 62259643.0, |
|
"reward": 1.1068080976605414, |
|
"reward_std": 0.1903689544647932, |
|
"rewards/accuracy_reward": 0.6107142798602581, |
|
"rewards/format_reward": 0.9921874910593033, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 671.9855209350586, |
|
"epoch": 0.4859529233105543, |
|
"grad_norm": 0.12209526567257212, |
|
"kl": 0.01604156494140625, |
|
"learning_rate": 6.112604669781572e-07, |
|
"loss": 0.0134, |
|
"num_tokens": 66175850.0, |
|
"reward": 1.0722098708152772, |
|
"reward_std": 0.1989122748374939, |
|
"rewards/accuracy_reward": 0.5743303574621678, |
|
"rewards/format_reward": 0.9957589223980904, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 655.3560516357422, |
|
"epoch": 0.5163249810174639, |
|
"grad_norm": 0.12987027750323757, |
|
"kl": 0.017596435546875, |
|
"learning_rate": 5.586360513712009e-07, |
|
"loss": 0.0202, |
|
"num_tokens": 70039477.0, |
|
"reward": 1.1373884499073028, |
|
"reward_std": 0.17809431692585348, |
|
"rewards/accuracy_reward": 0.6401785723865032, |
|
"rewards/format_reward": 0.9944196373224259, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 679.969448852539, |
|
"epoch": 0.5466970387243736, |
|
"grad_norm": 0.11569644648116158, |
|
"kl": 0.01629638671875, |
|
"learning_rate": 5.053427429716866e-07, |
|
"loss": 0.0261, |
|
"num_tokens": 74010748.0, |
|
"reward": 1.1206473752856254, |
|
"reward_std": 0.16938802655786275, |
|
"rewards/accuracy_reward": 0.62518887296319, |
|
"rewards/format_reward": 0.9930803492665291, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 670.8172203063965, |
|
"epoch": 0.5770690964312832, |
|
"grad_norm": 0.14653189798827554, |
|
"kl": 0.01604461669921875, |
|
"learning_rate": 4.519884870461591e-07, |
|
"loss": 0.0062, |
|
"num_tokens": 77924425.0, |
|
"reward": 1.1319196939468383, |
|
"reward_std": 0.18193732015788555, |
|
"rewards/accuracy_reward": 0.6334821462631226, |
|
"rewards/format_reward": 0.9968749970197678, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 689.6049453735352, |
|
"epoch": 0.6074411541381929, |
|
"grad_norm": 0.1373443920915848, |
|
"kl": 0.0151153564453125, |
|
"learning_rate": 3.991819241221835e-07, |
|
"loss": 0.0202, |
|
"num_tokens": 81937855.0, |
|
"reward": 1.1222098782658576, |
|
"reward_std": 0.1860942555591464, |
|
"rewards/accuracy_reward": 0.625, |
|
"rewards/format_reward": 0.9944196373224259, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 675.8683364868164, |
|
"epoch": 0.6378132118451025, |
|
"grad_norm": 0.14762350547769187, |
|
"kl": 0.015521240234375, |
|
"learning_rate": 3.4752544690038643e-07, |
|
"loss": 0.0151, |
|
"num_tokens": 85899921.0, |
|
"reward": 1.128125049173832, |
|
"reward_std": 0.1950968151912093, |
|
"rewards/accuracy_reward": 0.6305803559720516, |
|
"rewards/format_reward": 0.9950892791152001, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 696.7239181518555, |
|
"epoch": 0.6681852695520122, |
|
"grad_norm": 0.12897190097851505, |
|
"kl": 0.0144561767578125, |
|
"learning_rate": 2.976083284388031e-07, |
|
"loss": 0.0225, |
|
"num_tokens": 89967132.0, |
|
"reward": 1.0918527334928512, |
|
"reward_std": 0.1823650782927871, |
|
"rewards/accuracy_reward": 0.5953125059604645, |
|
"rewards/format_reward": 0.9930803492665291, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 719.4877540588379, |
|
"epoch": 0.6985573272589218, |
|
"grad_norm": 0.12020301457071197, |
|
"kl": 0.014898681640625, |
|
"learning_rate": 2.500000000000001e-07, |
|
"loss": 0.0153, |
|
"num_tokens": 94129389.0, |
|
"reward": 1.1255580872297286, |
|
"reward_std": 0.19005396589636803, |
|
"rewards/accuracy_reward": 0.6283482149243355, |
|
"rewards/format_reward": 0.9944196343421936, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 681.8125282287598, |
|
"epoch": 0.7289293849658315, |
|
"grad_norm": 0.1344831996031667, |
|
"kl": 0.01582489013671875, |
|
"learning_rate": 2.0524355524417015e-07, |
|
"loss": 0.0192, |
|
"num_tokens": 98103221.0, |
|
"reward": 1.1333705812692643, |
|
"reward_std": 0.20433492437005044, |
|
"rewards/accuracy_reward": 0.6354910746216774, |
|
"rewards/format_reward": 0.9957589209079742, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 681.395115661621, |
|
"epoch": 0.7593014426727411, |
|
"grad_norm": 0.13865803911159388, |
|
"kl": 0.0163787841796875, |
|
"learning_rate": 1.6384955486934154e-07, |
|
"loss": 0.0122, |
|
"num_tokens": 102096279.0, |
|
"reward": 1.1371652349829673, |
|
"reward_std": 0.18053851332515478, |
|
"rewards/accuracy_reward": 0.6404017873108387, |
|
"rewards/format_reward": 0.9935267820954323, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 702.8656555175781, |
|
"epoch": 0.7896735003796507, |
|
"grad_norm": 0.10840464189916307, |
|
"kl": 0.01602935791015625, |
|
"learning_rate": 1.262902023724824e-07, |
|
"loss": 0.0162, |
|
"num_tokens": 106168389.0, |
|
"reward": 1.1045759484171866, |
|
"reward_std": 0.18122291592881085, |
|
"rewards/accuracy_reward": 0.6062500029802322, |
|
"rewards/format_reward": 0.9966517820954323, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 696.1946792602539, |
|
"epoch": 0.8200455580865603, |
|
"grad_norm": 0.1591189816361499, |
|
"kl": 0.01680145263671875, |
|
"learning_rate": 9.299395737170757e-08, |
|
"loss": 0.0167, |
|
"num_tokens": 110246405.0, |
|
"reward": 1.0947545170783997, |
|
"reward_std": 0.20664523243904115, |
|
"rewards/accuracy_reward": 0.5973214291036129, |
|
"rewards/format_reward": 0.9948660656809807, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 698.2578453063965, |
|
"epoch": 0.85041761579347, |
|
"grad_norm": 0.13052581661637774, |
|
"kl": 0.0175323486328125, |
|
"learning_rate": 6.43406479383053e-08, |
|
"loss": 0.0206, |
|
"num_tokens": 114326168.0, |
|
"reward": 1.1349330857396125, |
|
"reward_std": 0.19575350042432546, |
|
"rewards/accuracy_reward": 0.6372767858207226, |
|
"rewards/format_reward": 0.9953124925494194, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 678.3460144042969, |
|
"epoch": 0.8807896735003796, |
|
"grad_norm": 0.12342327662368928, |
|
"kl": 0.01726837158203125, |
|
"learning_rate": 4.065713769482082e-08, |
|
"loss": 0.0217, |
|
"num_tokens": 118268150.0, |
|
"reward": 1.131250050663948, |
|
"reward_std": 0.19892821311950684, |
|
"rewards/accuracy_reward": 0.6337053582072258, |
|
"rewards/format_reward": 0.9950892806053162, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 699.1832885742188, |
|
"epoch": 0.9111617312072893, |
|
"grad_norm": 0.11347140306400243, |
|
"kl": 0.01734161376953125, |
|
"learning_rate": 2.2213597106929605e-08, |
|
"loss": 0.0206, |
|
"num_tokens": 122323331.0, |
|
"reward": 1.123325940966606, |
|
"reward_std": 0.19308128226548432, |
|
"rewards/accuracy_reward": 0.625669640302658, |
|
"rewards/format_reward": 0.9953124925494194, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 699.7196716308594, |
|
"epoch": 0.9415337889141989, |
|
"grad_norm": 0.1475830222889839, |
|
"kl": 0.01719207763671875, |
|
"learning_rate": 9.22042150446728e-09, |
|
"loss": 0.0185, |
|
"num_tokens": 126366451.0, |
|
"reward": 1.154241117835045, |
|
"reward_std": 0.1731583815999329, |
|
"rewards/accuracy_reward": 0.6560267820954323, |
|
"rewards/format_reward": 0.9964285656809807, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 695.9060577392578, |
|
"epoch": 0.9719058466211086, |
|
"grad_norm": 0.13967302854618574, |
|
"kl": 0.01683197021484375, |
|
"learning_rate": 1.8258309893965374e-09, |
|
"loss": 0.0191, |
|
"num_tokens": 130430318.0, |
|
"reward": 1.1508929118514062, |
|
"reward_std": 0.20208097249269485, |
|
"rewards/accuracy_reward": 0.6546875029802323, |
|
"rewards/format_reward": 0.9924107119441032, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 684.7267169952393, |
|
"epoch": 0.9962034927866363, |
|
"kl": 0.017988204956054688, |
|
"num_tokens": 133627034.0, |
|
"reward": 1.118443138897419, |
|
"reward_std": 0.1834622365422547, |
|
"rewards/accuracy_reward": 0.6226820051670074, |
|
"rewards/format_reward": 0.9946986511349678, |
|
"step": 164, |
|
"total_flos": 0.0, |
|
"train_loss": 0.024119453254814554, |
|
"train_runtime": 33084.5686, |
|
"train_samples_per_second": 0.557, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|