|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.997867803837953, |
|
"eval_steps": 100, |
|
"global_step": 117, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 620.6149749755859, |
|
"epoch": 0.008528784648187633, |
|
"grad_norm": 0.43470078706741333, |
|
"kl": 0.0, |
|
"learning_rate": 2.5e-07, |
|
"loss": 0.0087, |
|
"reward": 0.611607164144516, |
|
"reward_std": 0.3698773570358753, |
|
"rewards/accuracy_reward": 0.611607164144516, |
|
"rewards/format_reward": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 593.875862121582, |
|
"epoch": 0.042643923240938165, |
|
"grad_norm": 0.43776455521583557, |
|
"kl": 0.00015407800674438477, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.0231, |
|
"reward": 0.6205357443541288, |
|
"reward_std": 0.37901367666199803, |
|
"rewards/accuracy_reward": 0.6205357443541288, |
|
"rewards/format_reward": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 616.8395332336426, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 0.8126201033592224, |
|
"kl": 0.002016162872314453, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.043, |
|
"reward": 0.6250000275671482, |
|
"reward_std": 0.347636329382658, |
|
"rewards/accuracy_reward": 0.6250000275671482, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 610.7120796203614, |
|
"epoch": 0.1279317697228145, |
|
"grad_norm": 0.1547478586435318, |
|
"kl": 0.013087844848632813, |
|
"learning_rate": 2.993961440992859e-06, |
|
"loss": 0.0896, |
|
"reward": 0.7162946730852127, |
|
"reward_std": 0.2822846982628107, |
|
"rewards/accuracy_reward": 0.7162946730852127, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 612.700473022461, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 0.24769772589206696, |
|
"kl": 0.01591835021972656, |
|
"learning_rate": 2.957235057439301e-06, |
|
"loss": 0.0769, |
|
"reward": 0.7506696745753288, |
|
"reward_std": 0.23058689776808022, |
|
"rewards/accuracy_reward": 0.7506696745753288, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 575.4734634399414, |
|
"epoch": 0.21321961620469082, |
|
"grad_norm": 1.411234736442566, |
|
"kl": 0.006292724609375, |
|
"learning_rate": 2.887956450710995e-06, |
|
"loss": 0.0598, |
|
"reward": 0.8000000357627869, |
|
"reward_std": 0.18773337844759225, |
|
"rewards/accuracy_reward": 0.8000000357627869, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 579.8424324035644, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 0.17543800175189972, |
|
"kl": 0.004089736938476562, |
|
"learning_rate": 2.7876731904027993e-06, |
|
"loss": 0.0551, |
|
"reward": 0.7698661036789417, |
|
"reward_std": 0.20191936586052178, |
|
"rewards/accuracy_reward": 0.7698661036789417, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 577.5357452392578, |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.14460965991020203, |
|
"kl": 0.004575347900390625, |
|
"learning_rate": 2.6586254388368995e-06, |
|
"loss": 0.0494, |
|
"reward": 0.7982143223285675, |
|
"reward_std": 0.17998763117939234, |
|
"rewards/accuracy_reward": 0.7982143223285675, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 573.0772575378418, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 0.10168638080358505, |
|
"kl": 0.004691314697265625, |
|
"learning_rate": 2.5036959095382875e-06, |
|
"loss": 0.0403, |
|
"reward": 0.7850446775555611, |
|
"reward_std": 0.1875380737707019, |
|
"rewards/accuracy_reward": 0.7850446775555611, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 583.6614151000977, |
|
"epoch": 0.3837953091684435, |
|
"grad_norm": 0.15747234225273132, |
|
"kl": 0.007823562622070313, |
|
"learning_rate": 2.3263454721781537e-06, |
|
"loss": 0.042, |
|
"reward": 0.8042411088943482, |
|
"reward_std": 0.17801097435876728, |
|
"rewards/accuracy_reward": 0.8042411088943482, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 582.4127494812012, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 0.13581737875938416, |
|
"kl": 0.005602645874023438, |
|
"learning_rate": 2.1305358424643485e-06, |
|
"loss": 0.04, |
|
"reward": 0.7600446760654449, |
|
"reward_std": 0.18583385180681944, |
|
"rewards/accuracy_reward": 0.7600446760654449, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 570.5196685791016, |
|
"epoch": 0.4690831556503198, |
|
"grad_norm": 0.14316147565841675, |
|
"kl": 0.006391143798828125, |
|
"learning_rate": 1.9206410839590043e-06, |
|
"loss": 0.0253, |
|
"reward": 0.770982176065445, |
|
"reward_std": 0.1873089073225856, |
|
"rewards/accuracy_reward": 0.770982176065445, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 597.8089546203613, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 0.13278549909591675, |
|
"kl": 0.006211090087890625, |
|
"learning_rate": 1.7013498987264833e-06, |
|
"loss": 0.0323, |
|
"reward": 0.7582589626312256, |
|
"reward_std": 0.19469668436795473, |
|
"rewards/accuracy_reward": 0.7582589626312256, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 570.72748336792, |
|
"epoch": 0.5543710021321961, |
|
"grad_norm": 0.11964963376522064, |
|
"kl": 0.0067108154296875, |
|
"learning_rate": 1.4775608894771048e-06, |
|
"loss": 0.0285, |
|
"reward": 0.7604911118745804, |
|
"reward_std": 0.17904917486011981, |
|
"rewards/accuracy_reward": 0.7604911118745804, |
|
"rewards/format_reward": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 583.9893104553223, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.13020634651184082, |
|
"kl": 0.005590057373046875, |
|
"learning_rate": 1.2542731328772936e-06, |
|
"loss": 0.0335, |
|
"reward": 0.7493303909897804, |
|
"reward_std": 0.19070985857397318, |
|
"rewards/accuracy_reward": 0.7493303909897804, |
|
"rewards/format_reward": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 552.5370803833008, |
|
"epoch": 0.6396588486140725, |
|
"grad_norm": 0.193593829870224, |
|
"kl": 0.00690155029296875, |
|
"learning_rate": 1.036474508437579e-06, |
|
"loss": 0.0293, |
|
"reward": 0.7955357491970062, |
|
"reward_std": 0.17065229499712586, |
|
"rewards/accuracy_reward": 0.7955357491970062, |
|
"rewards/format_reward": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 574.2089538574219, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 0.26767051219940186, |
|
"kl": 0.006514739990234375, |
|
"learning_rate": 8.290302775265509e-07, |
|
"loss": 0.0359, |
|
"reward": 0.7752232506871224, |
|
"reward_std": 0.17853977270424365, |
|
"rewards/accuracy_reward": 0.7752232506871224, |
|
"rewards/format_reward": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 587.2473434448242, |
|
"epoch": 0.7249466950959488, |
|
"grad_norm": 0.14498883485794067, |
|
"kl": 0.008393096923828124, |
|
"learning_rate": 6.3657440147149e-07, |
|
"loss": 0.0378, |
|
"reward": 0.77857146859169, |
|
"reward_std": 0.1991961758583784, |
|
"rewards/accuracy_reward": 0.77857146859169, |
|
"rewards/format_reward": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 571.0614074707031, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 0.3927496671676636, |
|
"kl": 0.00813751220703125, |
|
"learning_rate": 4.63406026519703e-07, |
|
"loss": 0.0197, |
|
"reward": 0.753348246216774, |
|
"reward_std": 0.18298904253169895, |
|
"rewards/accuracy_reward": 0.753348246216774, |
|
"rewards/format_reward": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 579.4897560119629, |
|
"epoch": 0.8102345415778252, |
|
"grad_norm": 0.15556038916110992, |
|
"kl": 0.007627105712890625, |
|
"learning_rate": 3.133934480154885e-07, |
|
"loss": 0.0348, |
|
"reward": 0.7566964641213417, |
|
"reward_std": 0.20147906215861439, |
|
"rewards/accuracy_reward": 0.7566964641213417, |
|
"rewards/format_reward": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 0.397614985704422, |
|
"learning_rate": 1.8988769907430552e-07, |
|
"loss": 0.0301, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8528784648187633, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completion_length": 558.2626213378907, |
|
"eval_kl": 0.02655439453125, |
|
"eval_loss": 0.01617129147052765, |
|
"eval_reward": 0.6867000318527222, |
|
"eval_reward_std": 0.22212331930994988, |
|
"eval_rewards/accuracy_reward": 0.6867000318527222, |
|
"eval_rewards/format_reward": 0.0, |
|
"eval_runtime": 17227.6498, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.003, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 574.3294898986817, |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 0.7429274916648865, |
|
"kl": 0.007941818237304688, |
|
"learning_rate": 9.564769404039419e-08, |
|
"loss": 0.0334, |
|
"reward": 0.7816964637488126, |
|
"reward_std": 0.18899818495847284, |
|
"rewards/accuracy_reward": 0.7816964637488126, |
|
"rewards/format_reward": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 574.8442253112793, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 0.1576305478811264, |
|
"kl": 0.00836029052734375, |
|
"learning_rate": 3.277859889929147e-08, |
|
"loss": 0.0323, |
|
"reward": 0.8006696775555611, |
|
"reward_std": 0.19058242589235305, |
|
"rewards/accuracy_reward": 0.8006696775555611, |
|
"rewards/format_reward": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 558.2982414245605, |
|
"epoch": 0.9808102345415778, |
|
"grad_norm": 0.16698426008224487, |
|
"kl": 0.0088043212890625, |
|
"learning_rate": 2.684805348397268e-09, |
|
"loss": 0.0311, |
|
"reward": 0.7834821820259095, |
|
"reward_std": 0.17699794424697757, |
|
"rewards/accuracy_reward": 0.7834821820259095, |
|
"rewards/format_reward": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 580.5032634735107, |
|
"epoch": 0.997867803837953, |
|
"kl": 0.008594512939453125, |
|
"reward": 0.7589286100119352, |
|
"reward_std": 0.21703570941463113, |
|
"rewards/accuracy_reward": 0.7589286100119352, |
|
"rewards/format_reward": 0.0, |
|
"step": 117, |
|
"total_flos": 0.0, |
|
"train_loss": 0.03906208295056708, |
|
"train_runtime": 48868.7895, |
|
"train_samples_per_second": 0.153, |
|
"train_steps_per_second": 0.002 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 117, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|