chenggong1995's picture
Model save
9fc7e4f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9952755905511811,
"eval_steps": 2000000,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio": 0.0,
"completion_length": 814.1228103637695,
"epoch": 0.012598425196850394,
"grad_norm": 0.4256682094610429,
"learning_rate": 1.25e-07,
"loss": 0.1859,
"num_tokens": 857038.0,
"reward": 0.5837053805589676,
"reward_std": 0.38259728997945786,
"rewards/accuracy_reward": 0.578125,
"rewards/format_reward": 0.011160714784637094,
"step": 1
},
{
"clip_ratio": 0.0,
"completion_length": 799.2157211303711,
"epoch": 0.06299212598425197,
"grad_norm": 0.2682297103080829,
"learning_rate": 6.249999999999999e-07,
"loss": 0.1749,
"num_tokens": 4268899.0,
"reward": 0.5555245783179998,
"reward_std": 0.3812973015010357,
"rewards/accuracy_reward": 0.5532924123108387,
"rewards/format_reward": 0.004464285884751007,
"step": 5
},
{
"clip_ratio": 0.0,
"completion_length": 818.2045013427735,
"epoch": 0.12598425196850394,
"grad_norm": 0.3636912627924593,
"learning_rate": 9.980434110374724e-07,
"loss": 0.163,
"num_tokens": 8619311.0,
"reward": 0.5619419872760772,
"reward_std": 0.3748646147549152,
"rewards/accuracy_reward": 0.5591517865657807,
"rewards/format_reward": 0.005580357369035482,
"step": 10
},
{
"clip_ratio": 0.0,
"completion_length": 825.4431121826171,
"epoch": 0.1889763779527559,
"grad_norm": 0.5533896497151992,
"learning_rate": 9.762072666790656e-07,
"loss": 0.1654,
"num_tokens": 12961552.0,
"reward": 0.5640625230967998,
"reward_std": 0.36274583265185356,
"rewards/accuracy_reward": 0.5607142888009549,
"rewards/format_reward": 0.006696428847499191,
"step": 15
},
{
"clip_ratio": 0.0,
"completion_length": 836.8739181518555,
"epoch": 0.25196850393700787,
"grad_norm": 0.23700630720254828,
"learning_rate": 9.311572862600138e-07,
"loss": 0.1757,
"num_tokens": 17365979.0,
"reward": 0.5938616335391999,
"reward_std": 0.3463574357330799,
"rewards/accuracy_reward": 0.5930803567171097,
"rewards/format_reward": 0.0015625000698491931,
"step": 20
},
{
"clip_ratio": 0.0,
"completion_length": 818.7475799560547,
"epoch": 0.31496062992125984,
"grad_norm": 0.5349999125528991,
"learning_rate": 8.650895363529172e-07,
"loss": 0.1813,
"num_tokens": 21690080.0,
"reward": 0.6300223484635353,
"reward_std": 0.3288935709744692,
"rewards/accuracy_reward": 0.630769232660532,
"rewards/format_reward": 0.0011160714784637094,
"step": 25
},
{
"clip_ratio": 0.0,
"completion_length": 779.5919998168945,
"epoch": 0.3779527559055118,
"grad_norm": 0.33764515237713627,
"learning_rate": 7.812246438203903e-07,
"loss": 0.1706,
"num_tokens": 25851452.0,
"reward": 0.6796875342726707,
"reward_std": 0.28653539419174195,
"rewards/accuracy_reward": 0.6794642806053162,
"rewards/format_reward": 0.00044642859138548373,
"step": 30
},
{
"clip_ratio": 0.0,
"completion_length": 764.461865234375,
"epoch": 0.4409448818897638,
"grad_norm": 0.17429005608156234,
"learning_rate": 6.836507988323784e-07,
"loss": 0.1559,
"num_tokens": 29947561.0,
"reward": 0.6868303909897804,
"reward_std": 0.27214415185153484,
"rewards/accuracy_reward": 0.6866071395576,
"rewards/format_reward": 0.00044642859138548373,
"step": 35
},
{
"clip_ratio": 0.0,
"completion_length": 762.9011520385742,
"epoch": 0.5039370078740157,
"grad_norm": 0.14957998644439033,
"learning_rate": 5.771244664826511e-07,
"loss": 0.1182,
"num_tokens": 34064990.0,
"reward": 0.6814732447266578,
"reward_std": 0.2671396616846323,
"rewards/accuracy_reward": 0.6814732149243354,
"rewards/format_reward": 0.0,
"step": 40
},
{
"clip_ratio": 0.0,
"completion_length": 744.8989166259765,
"epoch": 0.5669291338582677,
"grad_norm": 0.7244950959434313,
"learning_rate": 4.6683852178244817e-07,
"loss": 0.1165,
"num_tokens": 38094369.0,
"reward": 0.6755580686032772,
"reward_std": 0.26786904707551,
"rewards/accuracy_reward": 0.6754464261233807,
"rewards/format_reward": 0.00022321429569274187,
"step": 45
},
{
"clip_ratio": 0.0,
"completion_length": 758.1707931518555,
"epoch": 0.6299212598425197,
"grad_norm": 0.12379763987674317,
"learning_rate": 3.5816911083285164e-07,
"loss": 0.1091,
"num_tokens": 42164678.0,
"reward": 0.6937500357627868,
"reward_std": 0.2522166069597006,
"rewards/accuracy_reward": 0.6937499977648258,
"rewards/format_reward": 0.0,
"step": 50
},
{
"clip_ratio": 0.0,
"completion_length": 750.6433380126953,
"epoch": 0.6929133858267716,
"grad_norm": 0.22358072329593362,
"learning_rate": 2.5641357801960184e-07,
"loss": 0.1096,
"num_tokens": 46185264.0,
"reward": 0.6895089626312256,
"reward_std": 0.2542113933712244,
"rewards/accuracy_reward": 0.6895089261233807,
"rewards/format_reward": 0.0,
"step": 55
},
{
"clip_ratio": 0.0,
"completion_length": 730.3611877441406,
"epoch": 0.7559055118110236,
"grad_norm": 0.42247506921603206,
"learning_rate": 1.665322345816746e-07,
"loss": 0.0853,
"num_tokens": 50096882.0,
"reward": 0.7053571708500386,
"reward_std": 0.23149009980261326,
"rewards/accuracy_reward": 0.7053571432828903,
"rewards/format_reward": 0.0,
"step": 60
},
{
"clip_ratio": 0.0,
"completion_length": 739.0698974609375,
"epoch": 0.8188976377952756,
"grad_norm": 0.33241630532899313,
"learning_rate": 9.290655664821296e-08,
"loss": 0.0931,
"num_tokens": 54062275.0,
"reward": 0.7138393133878708,
"reward_std": 0.23870391435921193,
"rewards/accuracy_reward": 0.7138392895460128,
"rewards/format_reward": 0.0,
"step": 65
},
{
"clip_ratio": 0.0,
"completion_length": 713.727931213379,
"epoch": 0.8818897637795275,
"grad_norm": 0.2063115196222285,
"learning_rate": 3.912559994556086e-08,
"loss": 0.0924,
"num_tokens": 57926568.0,
"reward": 0.7283482469618321,
"reward_std": 0.23798817545175552,
"rewards/accuracy_reward": 0.7294986329972744,
"rewards/format_reward": 0.0,
"step": 70
},
{
"clip_ratio": 0.0,
"completion_length": 740.9406539916993,
"epoch": 0.9448818897637795,
"grad_norm": 0.1952309372717915,
"learning_rate": 7.811042888637209e-09,
"loss": 0.0778,
"num_tokens": 61914142.0,
"reward": 0.690178605914116,
"reward_std": 0.23276512399315835,
"rewards/accuracy_reward": 0.6901785716414451,
"rewards/format_reward": 0.0,
"step": 75
},
{
"clip_ratio": 0.0,
"completion_length": 768.2404909133911,
"epoch": 0.9952755905511811,
"num_tokens": 65183080.0,
"reward": 0.6978236902505159,
"reward_std": 0.24844580795615911,
"rewards/accuracy_reward": 0.6978236641734838,
"rewards/format_reward": 0.0,
"step": 79,
"total_flos": 0.0,
"train_loss": 0.1304081463361088,
"train_runtime": 17251.9813,
"train_samples_per_second": 0.515,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 79,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}