|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9954198473282443, |
|
"eval_steps": 2000000, |
|
"global_step": 163, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 702.7132034301758, |
|
"epoch": 0.0061068702290076335, |
|
"grad_norm": 0.13332052615614565, |
|
"kl": 0.0, |
|
"learning_rate": 5.88235294117647e-08, |
|
"loss": 0.0341, |
|
"num_tokens": 880751.0, |
|
"reward": 0.770089328289032, |
|
"reward_std": 0.13822399266064167, |
|
"rewards/accuracy_reward": 0.27678571827709675, |
|
"rewards/format_reward": 0.9866071343421936, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 739.6551666259766, |
|
"epoch": 0.030534351145038167, |
|
"grad_norm": 0.2044588463598013, |
|
"kl": 5.408376455307007e-05, |
|
"learning_rate": 2.941176470588235e-07, |
|
"loss": 0.0268, |
|
"num_tokens": 4507619.0, |
|
"reward": 0.7359096258878708, |
|
"reward_std": 0.1397800410632044, |
|
"rewards/accuracy_reward": 0.24414062616415322, |
|
"rewards/format_reward": 0.9835379421710968, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 739.3268173217773, |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 0.2606158264843265, |
|
"kl": 7.957220077514648e-05, |
|
"learning_rate": 5.88235294117647e-07, |
|
"loss": 0.0332, |
|
"num_tokens": 9079091.0, |
|
"reward": 0.7665178939700127, |
|
"reward_std": 0.14487640419974923, |
|
"rewards/accuracy_reward": 0.2741071430966258, |
|
"rewards/format_reward": 0.9848214223980903, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 733.3120864868164, |
|
"epoch": 0.0916030534351145, |
|
"grad_norm": 0.12473067773197893, |
|
"kl": 0.0001289844512939453, |
|
"learning_rate": 8.823529411764705e-07, |
|
"loss": 0.0328, |
|
"num_tokens": 13590105.0, |
|
"reward": 0.7473214596509934, |
|
"reward_std": 0.1489832980558276, |
|
"rewards/accuracy_reward": 0.2566964281722903, |
|
"rewards/format_reward": 0.9812499925494194, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 713.0060607910157, |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 0.10652200791094474, |
|
"kl": 0.0002826213836669922, |
|
"learning_rate": 9.989585804326962e-07, |
|
"loss": 0.0185, |
|
"num_tokens": 18037460.0, |
|
"reward": 0.7446428909897804, |
|
"reward_std": 0.13395796837285162, |
|
"rewards/accuracy_reward": 0.2495535710826516, |
|
"rewards/format_reward": 0.9901785641908646, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 721.1808349609375, |
|
"epoch": 0.15267175572519084, |
|
"grad_norm": 0.20214832387838885, |
|
"kl": 0.0041507244110107425, |
|
"learning_rate": 9.926100533780304e-07, |
|
"loss": 0.0254, |
|
"num_tokens": 22546158.0, |
|
"reward": 0.7579241439700126, |
|
"reward_std": 0.14215883370488883, |
|
"rewards/accuracy_reward": 0.26495535727590325, |
|
"rewards/format_reward": 0.9859374895691871, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 742.2864181518555, |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 0.1594816777583089, |
|
"kl": 0.0016126632690429688, |
|
"learning_rate": 9.805648919361503e-07, |
|
"loss": 0.0357, |
|
"num_tokens": 27073905.0, |
|
"reward": 0.7584821790456772, |
|
"reward_std": 0.12880926295183598, |
|
"rewards/accuracy_reward": 0.2645089291036129, |
|
"rewards/format_reward": 0.9879464194178581, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 725.3341888427734, |
|
"epoch": 0.21374045801526717, |
|
"grad_norm": 0.09889141258603189, |
|
"kl": 0.001922607421875, |
|
"learning_rate": 9.62962388596925e-07, |
|
"loss": 0.0234, |
|
"num_tokens": 31605946.0, |
|
"reward": 0.7930803909897804, |
|
"reward_std": 0.12912328215315938, |
|
"rewards/accuracy_reward": 0.2970982141792774, |
|
"rewards/format_reward": 0.9919642791152, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 710.5219085693359, |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 0.12468270475594573, |
|
"kl": 0.0033203125, |
|
"learning_rate": 9.400061019867678e-07, |
|
"loss": 0.0202, |
|
"num_tokens": 36024036.0, |
|
"reward": 0.7656250327825547, |
|
"reward_std": 0.12553255576640368, |
|
"rewards/accuracy_reward": 0.26986607145518066, |
|
"rewards/format_reward": 0.9915178537368774, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 691.2645370483399, |
|
"epoch": 0.2748091603053435, |
|
"grad_norm": 0.10338268423256407, |
|
"kl": 0.004708480834960937, |
|
"learning_rate": 9.11961502878777e-07, |
|
"loss": 0.025, |
|
"num_tokens": 40342221.0, |
|
"reward": 0.789732177555561, |
|
"reward_std": 0.13373914500698447, |
|
"rewards/accuracy_reward": 0.29419642593711615, |
|
"rewards/format_reward": 0.9910714223980903, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 689.1975814819336, |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 0.11801883674056522, |
|
"kl": 0.006087493896484375, |
|
"learning_rate": 8.791529042392812e-07, |
|
"loss": 0.0221, |
|
"num_tokens": 44648378.0, |
|
"reward": 0.7669643178582192, |
|
"reward_std": 0.12678753938525916, |
|
"rewards/accuracy_reward": 0.2709821423981339, |
|
"rewards/format_reward": 0.9919642791152, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 681.3627517700195, |
|
"epoch": 0.33587786259541985, |
|
"grad_norm": 0.13594270458309027, |
|
"kl": 0.007537078857421875, |
|
"learning_rate": 8.419597108123053e-07, |
|
"loss": 0.0159, |
|
"num_tokens": 48948507.0, |
|
"reward": 0.7919643267989158, |
|
"reward_std": 0.1272535071708262, |
|
"rewards/accuracy_reward": 0.2941964280791581, |
|
"rewards/format_reward": 0.9955357074737549, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 667.7931106567382, |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 0.12780945861669307, |
|
"kl": 0.00973663330078125, |
|
"learning_rate": 8.008120316124611e-07, |
|
"loss": 0.0151, |
|
"num_tokens": 53166244.0, |
|
"reward": 0.7524553954601287, |
|
"reward_std": 0.12692366167902946, |
|
"rewards/accuracy_reward": 0.25491071604192256, |
|
"rewards/format_reward": 0.9950892791152001, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 696.0245849609375, |
|
"epoch": 0.3969465648854962, |
|
"grad_norm": 0.12051116100632073, |
|
"kl": 0.01003570556640625, |
|
"learning_rate": 7.561857060642119e-07, |
|
"loss": 0.0107, |
|
"num_tokens": 57558962.0, |
|
"reward": 0.7633928894996643, |
|
"reward_std": 0.13064181264489888, |
|
"rewards/accuracy_reward": 0.26562499944120643, |
|
"rewards/format_reward": 0.9955357104539871, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 662.5156509399415, |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 0.11229836339791986, |
|
"kl": 0.01019439697265625, |
|
"learning_rate": 7.085968013061584e-07, |
|
"loss": 0.0178, |
|
"num_tokens": 61733288.0, |
|
"reward": 0.7448661029338837, |
|
"reward_std": 0.11164179369807244, |
|
"rewards/accuracy_reward": 0.2475446429103613, |
|
"rewards/format_reward": 0.9946428507566452, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 682.3205673217774, |
|
"epoch": 0.4580152671755725, |
|
"grad_norm": 0.12731781754060964, |
|
"kl": 0.009069061279296875, |
|
"learning_rate": 6.585956442945531e-07, |
|
"loss": 0.0177, |
|
"num_tokens": 66027892.0, |
|
"reward": 0.7617187812924385, |
|
"reward_std": 0.11499134246259927, |
|
"rewards/accuracy_reward": 0.26450892849825325, |
|
"rewards/format_reward": 0.9944196343421936, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 700.5924423217773, |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 0.11204035822940968, |
|
"kl": 0.009134674072265625, |
|
"learning_rate": 6.06760457719898e-07, |
|
"loss": 0.0133, |
|
"num_tokens": 70433842.0, |
|
"reward": 0.8136161074042321, |
|
"reward_std": 0.12341635385528207, |
|
"rewards/accuracy_reward": 0.3149553577415645, |
|
"rewards/format_reward": 0.9973214238882064, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 667.9774856567383, |
|
"epoch": 0.5190839694656488, |
|
"grad_norm": 0.08504179774118455, |
|
"kl": 0.010693359375, |
|
"learning_rate": 5.536906733320815e-07, |
|
"loss": 0.0095, |
|
"num_tokens": 74684629.0, |
|
"reward": 0.7792411088943482, |
|
"reward_std": 0.10279256403446198, |
|
"rewards/accuracy_reward": 0.28125000055879357, |
|
"rewards/format_reward": 0.9959821373224258, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 672.8207870483399, |
|
"epoch": 0.549618320610687, |
|
"grad_norm": 0.09677151139504983, |
|
"kl": 0.0111236572265625, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0082, |
|
"num_tokens": 78941554.0, |
|
"reward": 0.7789062857627869, |
|
"reward_std": 0.10907009486109018, |
|
"rewards/accuracy_reward": 0.2801339304074645, |
|
"rewards/format_reward": 0.997544638812542, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 676.2569496154786, |
|
"epoch": 0.5801526717557252, |
|
"grad_norm": 0.11724442552756359, |
|
"kl": 0.0115997314453125, |
|
"learning_rate": 4.463093266679185e-07, |
|
"loss": 0.0106, |
|
"num_tokens": 83189777.0, |
|
"reward": 0.7709821790456772, |
|
"reward_std": 0.11432018820196391, |
|
"rewards/accuracy_reward": 0.27232142791617664, |
|
"rewards/format_reward": 0.9973214238882064, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 675.6011459350586, |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 0.10650310445039746, |
|
"kl": 0.01142425537109375, |
|
"learning_rate": 3.932395422801019e-07, |
|
"loss": 0.0117, |
|
"num_tokens": 87446302.0, |
|
"reward": 0.796875037252903, |
|
"reward_std": 0.11783077660948038, |
|
"rewards/accuracy_reward": 0.2979910722468048, |
|
"rewards/format_reward": 0.9977678522467613, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 666.8801651000977, |
|
"epoch": 0.6412213740458015, |
|
"grad_norm": 0.10284875734947345, |
|
"kl": 0.01230621337890625, |
|
"learning_rate": 3.41404355705447e-07, |
|
"loss": 0.0094, |
|
"num_tokens": 91633661.0, |
|
"reward": 0.8254464611411094, |
|
"reward_std": 0.11524229180067777, |
|
"rewards/accuracy_reward": 0.3265624988824129, |
|
"rewards/format_reward": 0.9977678552269935, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 708.1185592651367, |
|
"epoch": 0.6717557251908397, |
|
"grad_norm": 0.09648724954516992, |
|
"kl": 0.0115264892578125, |
|
"learning_rate": 2.914031986938417e-07, |
|
"loss": 0.0124, |
|
"num_tokens": 96055112.0, |
|
"reward": 0.7448661044239998, |
|
"reward_std": 0.12669871849939227, |
|
"rewards/accuracy_reward": 0.24642857336439192, |
|
"rewards/format_reward": 0.9968749955296516, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 677.4897605895997, |
|
"epoch": 0.7022900763358778, |
|
"grad_norm": 0.1011113055763225, |
|
"kl": 0.0118255615234375, |
|
"learning_rate": 2.4381429393578815e-07, |
|
"loss": 0.009, |
|
"num_tokens": 100317890.0, |
|
"reward": 0.7813616394996643, |
|
"reward_std": 0.1330147437751293, |
|
"rewards/accuracy_reward": 0.2825892847031355, |
|
"rewards/format_reward": 0.997544638812542, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 678.3256973266601, |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 0.15245123106678932, |
|
"kl": 0.01221771240234375, |
|
"learning_rate": 1.991879683875386e-07, |
|
"loss": 0.0104, |
|
"num_tokens": 104592629.0, |
|
"reward": 0.7625000357627869, |
|
"reward_std": 0.11710045160725713, |
|
"rewards/accuracy_reward": 0.2633928569033742, |
|
"rewards/format_reward": 0.9982142806053161, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 670.6040435791016, |
|
"epoch": 0.7633587786259542, |
|
"grad_norm": 0.11742459382336433, |
|
"kl": 0.01245880126953125, |
|
"learning_rate": 1.5804028918769485e-07, |
|
"loss": 0.0114, |
|
"num_tokens": 108835759.0, |
|
"reward": 0.7925223559141159, |
|
"reward_std": 0.1248929288238287, |
|
"rewards/accuracy_reward": 0.29352678582072256, |
|
"rewards/format_reward": 0.9979910671710968, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 659.5288223266601, |
|
"epoch": 0.7938931297709924, |
|
"grad_norm": 0.09688477664215772, |
|
"kl": 0.012872314453125, |
|
"learning_rate": 1.2084709576071883e-07, |
|
"loss": 0.0099, |
|
"num_tokens": 112970592.0, |
|
"reward": 0.7906250342726707, |
|
"reward_std": 0.12112973481416703, |
|
"rewards/accuracy_reward": 0.29174106996506455, |
|
"rewards/format_reward": 0.9977678537368775, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 698.9047195434571, |
|
"epoch": 0.8244274809160306, |
|
"grad_norm": 0.10609676202620595, |
|
"kl": 0.0119049072265625, |
|
"learning_rate": 8.803849712122291e-08, |
|
"loss": 0.0084, |
|
"num_tokens": 117351077.0, |
|
"reward": 0.7845982551574707, |
|
"reward_std": 0.11648994972929358, |
|
"rewards/accuracy_reward": 0.28571428507566454, |
|
"rewards/format_reward": 0.9977678522467613, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 688.1453430175782, |
|
"epoch": 0.8549618320610687, |
|
"grad_norm": 0.08115857291878435, |
|
"kl": 0.0126739501953125, |
|
"learning_rate": 5.999389801323218e-08, |
|
"loss": 0.0113, |
|
"num_tokens": 121661224.0, |
|
"reward": 0.8302455723285675, |
|
"reward_std": 0.12670655427500604, |
|
"rewards/accuracy_reward": 0.3314732125028968, |
|
"rewards/format_reward": 0.9975446373224258, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 672.133511352539, |
|
"epoch": 0.8854961832061069, |
|
"grad_norm": 0.10287154621758528, |
|
"kl": 0.01266937255859375, |
|
"learning_rate": 3.7037611403075096e-08, |
|
"loss": 0.0093, |
|
"num_tokens": 125919062.0, |
|
"reward": 0.8181920021772384, |
|
"reward_std": 0.12157530700787902, |
|
"rewards/accuracy_reward": 0.3189732149243355, |
|
"rewards/format_reward": 0.9984374955296517, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 659.8361923217774, |
|
"epoch": 0.916030534351145, |
|
"grad_norm": 0.11539905461898202, |
|
"kl": 0.01181793212890625, |
|
"learning_rate": 1.943510806384968e-08, |
|
"loss": 0.0061, |
|
"num_tokens": 130131040.0, |
|
"reward": 0.781919676065445, |
|
"reward_std": 0.10376817025244237, |
|
"rewards/accuracy_reward": 0.2823660712689161, |
|
"rewards/format_reward": 0.9991071403026581, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 663.9966735839844, |
|
"epoch": 0.9465648854961832, |
|
"grad_norm": 0.14485603251022727, |
|
"kl": 0.01304473876953125, |
|
"learning_rate": 7.389946621969678e-09, |
|
"loss": 0.0114, |
|
"num_tokens": 134339337.0, |
|
"reward": 0.759709857404232, |
|
"reward_std": 0.12261311169713736, |
|
"rewards/accuracy_reward": 0.26138392817229034, |
|
"rewards/format_reward": 0.996651777625084, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 664.3817291259766, |
|
"epoch": 0.9770992366412213, |
|
"grad_norm": 0.10778077233274512, |
|
"kl": 0.0122100830078125, |
|
"learning_rate": 1.0414195673039138e-09, |
|
"loss": 0.0035, |
|
"num_tokens": 138539071.0, |
|
"reward": 0.7744419991970062, |
|
"reward_std": 0.10305451611056923, |
|
"rewards/accuracy_reward": 0.27477678619325163, |
|
"rewards/format_reward": 0.9993303567171097, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 704.554547627767, |
|
"epoch": 0.9954198473282443, |
|
"kl": 0.012700398763020834, |
|
"num_tokens": 141142234.0, |
|
"reward": 0.788318489988645, |
|
"reward_std": 0.12267326470464468, |
|
"rewards/accuracy_reward": 0.2898065475746989, |
|
"rewards/format_reward": 0.9970238034923872, |
|
"step": 163, |
|
"total_flos": 0.0, |
|
"train_loss": 0.015952993104337183, |
|
"train_runtime": 32110.3153, |
|
"train_samples_per_second": 0.571, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 163, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|