chenggong1995's picture
Model save
aa46c0a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9954198473282443,
"eval_steps": 2000000,
"global_step": 163,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio": 0.0,
"completion_length": 702.7132034301758,
"epoch": 0.0061068702290076335,
"grad_norm": 0.13332052615614565,
"kl": 0.0,
"learning_rate": 5.88235294117647e-08,
"loss": 0.0341,
"num_tokens": 880751.0,
"reward": 0.770089328289032,
"reward_std": 0.13822399266064167,
"rewards/accuracy_reward": 0.27678571827709675,
"rewards/format_reward": 0.9866071343421936,
"step": 1
},
{
"clip_ratio": 0.0,
"completion_length": 739.6551666259766,
"epoch": 0.030534351145038167,
"grad_norm": 0.2044588463598013,
"kl": 5.408376455307007e-05,
"learning_rate": 2.941176470588235e-07,
"loss": 0.0268,
"num_tokens": 4507619.0,
"reward": 0.7359096258878708,
"reward_std": 0.1397800410632044,
"rewards/accuracy_reward": 0.24414062616415322,
"rewards/format_reward": 0.9835379421710968,
"step": 5
},
{
"clip_ratio": 0.0,
"completion_length": 739.3268173217773,
"epoch": 0.061068702290076333,
"grad_norm": 0.2606158264843265,
"kl": 7.957220077514648e-05,
"learning_rate": 5.88235294117647e-07,
"loss": 0.0332,
"num_tokens": 9079091.0,
"reward": 0.7665178939700127,
"reward_std": 0.14487640419974923,
"rewards/accuracy_reward": 0.2741071430966258,
"rewards/format_reward": 0.9848214223980903,
"step": 10
},
{
"clip_ratio": 0.0,
"completion_length": 733.3120864868164,
"epoch": 0.0916030534351145,
"grad_norm": 0.12473067773197893,
"kl": 0.0001289844512939453,
"learning_rate": 8.823529411764705e-07,
"loss": 0.0328,
"num_tokens": 13590105.0,
"reward": 0.7473214596509934,
"reward_std": 0.1489832980558276,
"rewards/accuracy_reward": 0.2566964281722903,
"rewards/format_reward": 0.9812499925494194,
"step": 15
},
{
"clip_ratio": 0.0,
"completion_length": 713.0060607910157,
"epoch": 0.12213740458015267,
"grad_norm": 0.10652200791094474,
"kl": 0.0002826213836669922,
"learning_rate": 9.989585804326962e-07,
"loss": 0.0185,
"num_tokens": 18037460.0,
"reward": 0.7446428909897804,
"reward_std": 0.13395796837285162,
"rewards/accuracy_reward": 0.2495535710826516,
"rewards/format_reward": 0.9901785641908646,
"step": 20
},
{
"clip_ratio": 0.0,
"completion_length": 721.1808349609375,
"epoch": 0.15267175572519084,
"grad_norm": 0.20214832387838885,
"kl": 0.0041507244110107425,
"learning_rate": 9.926100533780304e-07,
"loss": 0.0254,
"num_tokens": 22546158.0,
"reward": 0.7579241439700126,
"reward_std": 0.14215883370488883,
"rewards/accuracy_reward": 0.26495535727590325,
"rewards/format_reward": 0.9859374895691871,
"step": 25
},
{
"clip_ratio": 0.0,
"completion_length": 742.2864181518555,
"epoch": 0.183206106870229,
"grad_norm": 0.1594816777583089,
"kl": 0.0016126632690429688,
"learning_rate": 9.805648919361503e-07,
"loss": 0.0357,
"num_tokens": 27073905.0,
"reward": 0.7584821790456772,
"reward_std": 0.12880926295183598,
"rewards/accuracy_reward": 0.2645089291036129,
"rewards/format_reward": 0.9879464194178581,
"step": 30
},
{
"clip_ratio": 0.0,
"completion_length": 725.3341888427734,
"epoch": 0.21374045801526717,
"grad_norm": 0.09889141258603189,
"kl": 0.001922607421875,
"learning_rate": 9.62962388596925e-07,
"loss": 0.0234,
"num_tokens": 31605946.0,
"reward": 0.7930803909897804,
"reward_std": 0.12912328215315938,
"rewards/accuracy_reward": 0.2970982141792774,
"rewards/format_reward": 0.9919642791152,
"step": 35
},
{
"clip_ratio": 0.0,
"completion_length": 710.5219085693359,
"epoch": 0.24427480916030533,
"grad_norm": 0.12468270475594573,
"kl": 0.0033203125,
"learning_rate": 9.400061019867678e-07,
"loss": 0.0202,
"num_tokens": 36024036.0,
"reward": 0.7656250327825547,
"reward_std": 0.12553255576640368,
"rewards/accuracy_reward": 0.26986607145518066,
"rewards/format_reward": 0.9915178537368774,
"step": 40
},
{
"clip_ratio": 0.0,
"completion_length": 691.2645370483399,
"epoch": 0.2748091603053435,
"grad_norm": 0.10338268423256407,
"kl": 0.004708480834960937,
"learning_rate": 9.11961502878777e-07,
"loss": 0.025,
"num_tokens": 40342221.0,
"reward": 0.789732177555561,
"reward_std": 0.13373914500698447,
"rewards/accuracy_reward": 0.29419642593711615,
"rewards/format_reward": 0.9910714223980903,
"step": 45
},
{
"clip_ratio": 0.0,
"completion_length": 689.1975814819336,
"epoch": 0.3053435114503817,
"grad_norm": 0.11801883674056522,
"kl": 0.006087493896484375,
"learning_rate": 8.791529042392812e-07,
"loss": 0.0221,
"num_tokens": 44648378.0,
"reward": 0.7669643178582192,
"reward_std": 0.12678753938525916,
"rewards/accuracy_reward": 0.2709821423981339,
"rewards/format_reward": 0.9919642791152,
"step": 50
},
{
"clip_ratio": 0.0,
"completion_length": 681.3627517700195,
"epoch": 0.33587786259541985,
"grad_norm": 0.13594270458309027,
"kl": 0.007537078857421875,
"learning_rate": 8.419597108123053e-07,
"loss": 0.0159,
"num_tokens": 48948507.0,
"reward": 0.7919643267989158,
"reward_std": 0.1272535071708262,
"rewards/accuracy_reward": 0.2941964280791581,
"rewards/format_reward": 0.9955357074737549,
"step": 55
},
{
"clip_ratio": 0.0,
"completion_length": 667.7931106567382,
"epoch": 0.366412213740458,
"grad_norm": 0.12780945861669307,
"kl": 0.00973663330078125,
"learning_rate": 8.008120316124611e-07,
"loss": 0.0151,
"num_tokens": 53166244.0,
"reward": 0.7524553954601287,
"reward_std": 0.12692366167902946,
"rewards/accuracy_reward": 0.25491071604192256,
"rewards/format_reward": 0.9950892791152001,
"step": 60
},
{
"clip_ratio": 0.0,
"completion_length": 696.0245849609375,
"epoch": 0.3969465648854962,
"grad_norm": 0.12051116100632073,
"kl": 0.01003570556640625,
"learning_rate": 7.561857060642119e-07,
"loss": 0.0107,
"num_tokens": 57558962.0,
"reward": 0.7633928894996643,
"reward_std": 0.13064181264489888,
"rewards/accuracy_reward": 0.26562499944120643,
"rewards/format_reward": 0.9955357104539871,
"step": 65
},
{
"clip_ratio": 0.0,
"completion_length": 662.5156509399415,
"epoch": 0.42748091603053434,
"grad_norm": 0.11229836339791986,
"kl": 0.01019439697265625,
"learning_rate": 7.085968013061584e-07,
"loss": 0.0178,
"num_tokens": 61733288.0,
"reward": 0.7448661029338837,
"reward_std": 0.11164179369807244,
"rewards/accuracy_reward": 0.2475446429103613,
"rewards/format_reward": 0.9946428507566452,
"step": 70
},
{
"clip_ratio": 0.0,
"completion_length": 682.3205673217774,
"epoch": 0.4580152671755725,
"grad_norm": 0.12731781754060964,
"kl": 0.009069061279296875,
"learning_rate": 6.585956442945531e-07,
"loss": 0.0177,
"num_tokens": 66027892.0,
"reward": 0.7617187812924385,
"reward_std": 0.11499134246259927,
"rewards/accuracy_reward": 0.26450892849825325,
"rewards/format_reward": 0.9944196343421936,
"step": 75
},
{
"clip_ratio": 0.0,
"completion_length": 700.5924423217773,
"epoch": 0.48854961832061067,
"grad_norm": 0.11204035822940968,
"kl": 0.009134674072265625,
"learning_rate": 6.06760457719898e-07,
"loss": 0.0133,
"num_tokens": 70433842.0,
"reward": 0.8136161074042321,
"reward_std": 0.12341635385528207,
"rewards/accuracy_reward": 0.3149553577415645,
"rewards/format_reward": 0.9973214238882064,
"step": 80
},
{
"clip_ratio": 0.0,
"completion_length": 667.9774856567383,
"epoch": 0.5190839694656488,
"grad_norm": 0.08504179774118455,
"kl": 0.010693359375,
"learning_rate": 5.536906733320815e-07,
"loss": 0.0095,
"num_tokens": 74684629.0,
"reward": 0.7792411088943482,
"reward_std": 0.10279256403446198,
"rewards/accuracy_reward": 0.28125000055879357,
"rewards/format_reward": 0.9959821373224258,
"step": 85
},
{
"clip_ratio": 0.0,
"completion_length": 672.8207870483399,
"epoch": 0.549618320610687,
"grad_norm": 0.09677151139504983,
"kl": 0.0111236572265625,
"learning_rate": 5e-07,
"loss": 0.0082,
"num_tokens": 78941554.0,
"reward": 0.7789062857627869,
"reward_std": 0.10907009486109018,
"rewards/accuracy_reward": 0.2801339304074645,
"rewards/format_reward": 0.997544638812542,
"step": 90
},
{
"clip_ratio": 0.0,
"completion_length": 676.2569496154786,
"epoch": 0.5801526717557252,
"grad_norm": 0.11724442552756359,
"kl": 0.0115997314453125,
"learning_rate": 4.463093266679185e-07,
"loss": 0.0106,
"num_tokens": 83189777.0,
"reward": 0.7709821790456772,
"reward_std": 0.11432018820196391,
"rewards/accuracy_reward": 0.27232142791617664,
"rewards/format_reward": 0.9973214238882064,
"step": 95
},
{
"clip_ratio": 0.0,
"completion_length": 675.6011459350586,
"epoch": 0.6106870229007634,
"grad_norm": 0.10650310445039746,
"kl": 0.01142425537109375,
"learning_rate": 3.932395422801019e-07,
"loss": 0.0117,
"num_tokens": 87446302.0,
"reward": 0.796875037252903,
"reward_std": 0.11783077660948038,
"rewards/accuracy_reward": 0.2979910722468048,
"rewards/format_reward": 0.9977678522467613,
"step": 100
},
{
"clip_ratio": 0.0,
"completion_length": 666.8801651000977,
"epoch": 0.6412213740458015,
"grad_norm": 0.10284875734947345,
"kl": 0.01230621337890625,
"learning_rate": 3.41404355705447e-07,
"loss": 0.0094,
"num_tokens": 91633661.0,
"reward": 0.8254464611411094,
"reward_std": 0.11524229180067777,
"rewards/accuracy_reward": 0.3265624988824129,
"rewards/format_reward": 0.9977678552269935,
"step": 105
},
{
"clip_ratio": 0.0,
"completion_length": 708.1185592651367,
"epoch": 0.6717557251908397,
"grad_norm": 0.09648724954516992,
"kl": 0.0115264892578125,
"learning_rate": 2.914031986938417e-07,
"loss": 0.0124,
"num_tokens": 96055112.0,
"reward": 0.7448661044239998,
"reward_std": 0.12669871849939227,
"rewards/accuracy_reward": 0.24642857336439192,
"rewards/format_reward": 0.9968749955296516,
"step": 110
},
{
"clip_ratio": 0.0,
"completion_length": 677.4897605895997,
"epoch": 0.7022900763358778,
"grad_norm": 0.1011113055763225,
"kl": 0.0118255615234375,
"learning_rate": 2.4381429393578815e-07,
"loss": 0.009,
"num_tokens": 100317890.0,
"reward": 0.7813616394996643,
"reward_std": 0.1330147437751293,
"rewards/accuracy_reward": 0.2825892847031355,
"rewards/format_reward": 0.997544638812542,
"step": 115
},
{
"clip_ratio": 0.0,
"completion_length": 678.3256973266601,
"epoch": 0.732824427480916,
"grad_norm": 0.15245123106678932,
"kl": 0.01221771240234375,
"learning_rate": 1.991879683875386e-07,
"loss": 0.0104,
"num_tokens": 104592629.0,
"reward": 0.7625000357627869,
"reward_std": 0.11710045160725713,
"rewards/accuracy_reward": 0.2633928569033742,
"rewards/format_reward": 0.9982142806053161,
"step": 120
},
{
"clip_ratio": 0.0,
"completion_length": 670.6040435791016,
"epoch": 0.7633587786259542,
"grad_norm": 0.11742459382336433,
"kl": 0.01245880126953125,
"learning_rate": 1.5804028918769485e-07,
"loss": 0.0114,
"num_tokens": 108835759.0,
"reward": 0.7925223559141159,
"reward_std": 0.1248929288238287,
"rewards/accuracy_reward": 0.29352678582072256,
"rewards/format_reward": 0.9979910671710968,
"step": 125
},
{
"clip_ratio": 0.0,
"completion_length": 659.5288223266601,
"epoch": 0.7938931297709924,
"grad_norm": 0.09688477664215772,
"kl": 0.012872314453125,
"learning_rate": 1.2084709576071883e-07,
"loss": 0.0099,
"num_tokens": 112970592.0,
"reward": 0.7906250342726707,
"reward_std": 0.12112973481416703,
"rewards/accuracy_reward": 0.29174106996506455,
"rewards/format_reward": 0.9977678537368775,
"step": 130
},
{
"clip_ratio": 0.0,
"completion_length": 698.9047195434571,
"epoch": 0.8244274809160306,
"grad_norm": 0.10609676202620595,
"kl": 0.0119049072265625,
"learning_rate": 8.803849712122291e-08,
"loss": 0.0084,
"num_tokens": 117351077.0,
"reward": 0.7845982551574707,
"reward_std": 0.11648994972929358,
"rewards/accuracy_reward": 0.28571428507566454,
"rewards/format_reward": 0.9977678522467613,
"step": 135
},
{
"clip_ratio": 0.0,
"completion_length": 688.1453430175782,
"epoch": 0.8549618320610687,
"grad_norm": 0.08115857291878435,
"kl": 0.0126739501953125,
"learning_rate": 5.999389801323218e-08,
"loss": 0.0113,
"num_tokens": 121661224.0,
"reward": 0.8302455723285675,
"reward_std": 0.12670655427500604,
"rewards/accuracy_reward": 0.3314732125028968,
"rewards/format_reward": 0.9975446373224258,
"step": 140
},
{
"clip_ratio": 0.0,
"completion_length": 672.133511352539,
"epoch": 0.8854961832061069,
"grad_norm": 0.10287154621758528,
"kl": 0.01266937255859375,
"learning_rate": 3.7037611403075096e-08,
"loss": 0.0093,
"num_tokens": 125919062.0,
"reward": 0.8181920021772384,
"reward_std": 0.12157530700787902,
"rewards/accuracy_reward": 0.3189732149243355,
"rewards/format_reward": 0.9984374955296517,
"step": 145
},
{
"clip_ratio": 0.0,
"completion_length": 659.8361923217774,
"epoch": 0.916030534351145,
"grad_norm": 0.11539905461898202,
"kl": 0.01181793212890625,
"learning_rate": 1.943510806384968e-08,
"loss": 0.0061,
"num_tokens": 130131040.0,
"reward": 0.781919676065445,
"reward_std": 0.10376817025244237,
"rewards/accuracy_reward": 0.2823660712689161,
"rewards/format_reward": 0.9991071403026581,
"step": 150
},
{
"clip_ratio": 0.0,
"completion_length": 663.9966735839844,
"epoch": 0.9465648854961832,
"grad_norm": 0.14485603251022727,
"kl": 0.01304473876953125,
"learning_rate": 7.389946621969678e-09,
"loss": 0.0114,
"num_tokens": 134339337.0,
"reward": 0.759709857404232,
"reward_std": 0.12261311169713736,
"rewards/accuracy_reward": 0.26138392817229034,
"rewards/format_reward": 0.996651777625084,
"step": 155
},
{
"clip_ratio": 0.0,
"completion_length": 664.3817291259766,
"epoch": 0.9770992366412213,
"grad_norm": 0.10778077233274512,
"kl": 0.0122100830078125,
"learning_rate": 1.0414195673039138e-09,
"loss": 0.0035,
"num_tokens": 138539071.0,
"reward": 0.7744419991970062,
"reward_std": 0.10305451611056923,
"rewards/accuracy_reward": 0.27477678619325163,
"rewards/format_reward": 0.9993303567171097,
"step": 160
},
{
"clip_ratio": 0.0,
"completion_length": 704.554547627767,
"epoch": 0.9954198473282443,
"kl": 0.012700398763020834,
"num_tokens": 141142234.0,
"reward": 0.788318489988645,
"reward_std": 0.12267326470464468,
"rewards/accuracy_reward": 0.2898065475746989,
"rewards/format_reward": 0.9970238034923872,
"step": 163,
"total_flos": 0.0,
"train_loss": 0.015952993104337183,
"train_runtime": 32110.3153,
"train_samples_per_second": 0.571,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 163,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}