|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9952755905511811, |
|
"eval_steps": 2000000, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 814.1228103637695, |
|
"epoch": 0.012598425196850394, |
|
"grad_norm": 0.4256682094610429, |
|
"learning_rate": 1.25e-07, |
|
"loss": 0.1859, |
|
"num_tokens": 857038.0, |
|
"reward": 0.5837053805589676, |
|
"reward_std": 0.38259728997945786, |
|
"rewards/accuracy_reward": 0.578125, |
|
"rewards/format_reward": 0.011160714784637094, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 799.2157211303711, |
|
"epoch": 0.06299212598425197, |
|
"grad_norm": 0.2682297103080829, |
|
"learning_rate": 6.249999999999999e-07, |
|
"loss": 0.1749, |
|
"num_tokens": 4268899.0, |
|
"reward": 0.5555245783179998, |
|
"reward_std": 0.3812973015010357, |
|
"rewards/accuracy_reward": 0.5532924123108387, |
|
"rewards/format_reward": 0.004464285884751007, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 818.2045013427735, |
|
"epoch": 0.12598425196850394, |
|
"grad_norm": 0.3636912627924593, |
|
"learning_rate": 9.980434110374724e-07, |
|
"loss": 0.163, |
|
"num_tokens": 8619311.0, |
|
"reward": 0.5619419872760772, |
|
"reward_std": 0.3748646147549152, |
|
"rewards/accuracy_reward": 0.5591517865657807, |
|
"rewards/format_reward": 0.005580357369035482, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 825.4431121826171, |
|
"epoch": 0.1889763779527559, |
|
"grad_norm": 0.5533896497151992, |
|
"learning_rate": 9.762072666790656e-07, |
|
"loss": 0.1654, |
|
"num_tokens": 12961552.0, |
|
"reward": 0.5640625230967998, |
|
"reward_std": 0.36274583265185356, |
|
"rewards/accuracy_reward": 0.5607142888009549, |
|
"rewards/format_reward": 0.006696428847499191, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 836.8739181518555, |
|
"epoch": 0.25196850393700787, |
|
"grad_norm": 0.23700630720254828, |
|
"learning_rate": 9.311572862600138e-07, |
|
"loss": 0.1757, |
|
"num_tokens": 17365979.0, |
|
"reward": 0.5938616335391999, |
|
"reward_std": 0.3463574357330799, |
|
"rewards/accuracy_reward": 0.5930803567171097, |
|
"rewards/format_reward": 0.0015625000698491931, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 818.7475799560547, |
|
"epoch": 0.31496062992125984, |
|
"grad_norm": 0.5349999125528991, |
|
"learning_rate": 8.650895363529172e-07, |
|
"loss": 0.1813, |
|
"num_tokens": 21690080.0, |
|
"reward": 0.6300223484635353, |
|
"reward_std": 0.3288935709744692, |
|
"rewards/accuracy_reward": 0.630769232660532, |
|
"rewards/format_reward": 0.0011160714784637094, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 779.5919998168945, |
|
"epoch": 0.3779527559055118, |
|
"grad_norm": 0.33764515237713627, |
|
"learning_rate": 7.812246438203903e-07, |
|
"loss": 0.1706, |
|
"num_tokens": 25851452.0, |
|
"reward": 0.6796875342726707, |
|
"reward_std": 0.28653539419174195, |
|
"rewards/accuracy_reward": 0.6794642806053162, |
|
"rewards/format_reward": 0.00044642859138548373, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 764.461865234375, |
|
"epoch": 0.4409448818897638, |
|
"grad_norm": 0.17429005608156234, |
|
"learning_rate": 6.836507988323784e-07, |
|
"loss": 0.1559, |
|
"num_tokens": 29947561.0, |
|
"reward": 0.6868303909897804, |
|
"reward_std": 0.27214415185153484, |
|
"rewards/accuracy_reward": 0.6866071395576, |
|
"rewards/format_reward": 0.00044642859138548373, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 762.9011520385742, |
|
"epoch": 0.5039370078740157, |
|
"grad_norm": 0.14957998644439033, |
|
"learning_rate": 5.771244664826511e-07, |
|
"loss": 0.1182, |
|
"num_tokens": 34064990.0, |
|
"reward": 0.6814732447266578, |
|
"reward_std": 0.2671396616846323, |
|
"rewards/accuracy_reward": 0.6814732149243354, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 744.8989166259765, |
|
"epoch": 0.5669291338582677, |
|
"grad_norm": 0.7244950959434313, |
|
"learning_rate": 4.6683852178244817e-07, |
|
"loss": 0.1165, |
|
"num_tokens": 38094369.0, |
|
"reward": 0.6755580686032772, |
|
"reward_std": 0.26786904707551, |
|
"rewards/accuracy_reward": 0.6754464261233807, |
|
"rewards/format_reward": 0.00022321429569274187, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 758.1707931518555, |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 0.12379763987674317, |
|
"learning_rate": 3.5816911083285164e-07, |
|
"loss": 0.1091, |
|
"num_tokens": 42164678.0, |
|
"reward": 0.6937500357627868, |
|
"reward_std": 0.2522166069597006, |
|
"rewards/accuracy_reward": 0.6937499977648258, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 750.6433380126953, |
|
"epoch": 0.6929133858267716, |
|
"grad_norm": 0.22358072329593362, |
|
"learning_rate": 2.5641357801960184e-07, |
|
"loss": 0.1096, |
|
"num_tokens": 46185264.0, |
|
"reward": 0.6895089626312256, |
|
"reward_std": 0.2542113933712244, |
|
"rewards/accuracy_reward": 0.6895089261233807, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 730.3611877441406, |
|
"epoch": 0.7559055118110236, |
|
"grad_norm": 0.42247506921603206, |
|
"learning_rate": 1.665322345816746e-07, |
|
"loss": 0.0853, |
|
"num_tokens": 50096882.0, |
|
"reward": 0.7053571708500386, |
|
"reward_std": 0.23149009980261326, |
|
"rewards/accuracy_reward": 0.7053571432828903, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 739.0698974609375, |
|
"epoch": 0.8188976377952756, |
|
"grad_norm": 0.33241630532899313, |
|
"learning_rate": 9.290655664821296e-08, |
|
"loss": 0.0931, |
|
"num_tokens": 54062275.0, |
|
"reward": 0.7138393133878708, |
|
"reward_std": 0.23870391435921193, |
|
"rewards/accuracy_reward": 0.7138392895460128, |
|
"rewards/format_reward": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 713.727931213379, |
|
"epoch": 0.8818897637795275, |
|
"grad_norm": 0.2063115196222285, |
|
"learning_rate": 3.912559994556086e-08, |
|
"loss": 0.0924, |
|
"num_tokens": 57926568.0, |
|
"reward": 0.7283482469618321, |
|
"reward_std": 0.23798817545175552, |
|
"rewards/accuracy_reward": 0.7294986329972744, |
|
"rewards/format_reward": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 740.9406539916993, |
|
"epoch": 0.9448818897637795, |
|
"grad_norm": 0.1952309372717915, |
|
"learning_rate": 7.811042888637209e-09, |
|
"loss": 0.0778, |
|
"num_tokens": 61914142.0, |
|
"reward": 0.690178605914116, |
|
"reward_std": 0.23276512399315835, |
|
"rewards/accuracy_reward": 0.6901785716414451, |
|
"rewards/format_reward": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 768.2404909133911, |
|
"epoch": 0.9952755905511811, |
|
"num_tokens": 65183080.0, |
|
"reward": 0.6978236902505159, |
|
"reward_std": 0.24844580795615911, |
|
"rewards/accuracy_reward": 0.6978236641734838, |
|
"rewards/format_reward": 0.0, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1304081463361088, |
|
"train_runtime": 17251.9813, |
|
"train_samples_per_second": 0.515, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|