|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 395.25, |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 1.7927322387695312, |
|
"kl": 0.0012580822076415643, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"reward": 0.5056484336964786, |
|
"reward_std": 0.8683814308606088, |
|
"rewards/concensus_correctness_reward_func": 0.06012500077486038, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0625, |
|
"rewards/question_recreation_reward_func": 0.26574217714369297, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.11728124879300594, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 301.15, |
|
"epoch": 1.0, |
|
"grad_norm": 28.787370681762695, |
|
"kl": 0.007838598499074579, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": 0.0, |
|
"reward": 0.4848457515239716, |
|
"reward_std": 0.6104163944721221, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.1, |
|
"rewards/question_recreation_reward_func": 0.211995729804039, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.17284999787807465, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 366.0625, |
|
"epoch": 1.6153846153846154, |
|
"grad_norm": 1.4565224647521973, |
|
"kl": 0.0048014514686656184, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.0, |
|
"reward": 0.5369042251259089, |
|
"reward_std": 0.7701209064107388, |
|
"rewards/concensus_correctness_reward_func": 0.1015625, |
|
"rewards/consensus_reward_func": 0.0625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0625, |
|
"rewards/question_recreation_reward_func": 0.1854354883544147, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.12490624794736505, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 488.75, |
|
"epoch": 2.0, |
|
"grad_norm": 1.1058247089385986, |
|
"kl": 0.0015862735570408403, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0, |
|
"reward": 0.7044907063245773, |
|
"reward_std": 1.334967276453972, |
|
"rewards/concensus_correctness_reward_func": 0.09620000123977661, |
|
"rewards/consensus_reward_func": 0.1, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.6, |
|
"rewards/question_recreation_reward_func": 0.2195407159626484, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.3112500309944153, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 464.9375, |
|
"epoch": 2.6153846153846154, |
|
"grad_norm": 2.566697359085083, |
|
"kl": 0.0013416979272733442, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": 0.0, |
|
"reward": 0.562221004627645, |
|
"reward_std": 0.8246445022523403, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.25, |
|
"rewards/question_recreation_reward_func": 0.27678349521011114, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.03543749637901783, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 306.25, |
|
"epoch": 3.0, |
|
"grad_norm": 0.7933401465415955, |
|
"kl": 0.0031318686669692397, |
|
"learning_rate": 2.2935516363191693e-07, |
|
"loss": 0.0, |
|
"reward": 1.342978870868683, |
|
"reward_std": 1.444445651769638, |
|
"rewards/concensus_correctness_reward_func": 0.18480000495910645, |
|
"rewards/consensus_reward_func": 0.2, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.2, |
|
"rewards/question_recreation_reward_func": 0.37442886233329775, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.3837499976158142, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 367.0625, |
|
"epoch": 3.6153846153846154, |
|
"grad_norm": 1.901016354560852, |
|
"kl": 0.0015947269203024916, |
|
"learning_rate": 1.4957614383675767e-07, |
|
"loss": 0.0, |
|
"reward": 0.48512101359665394, |
|
"reward_std": 0.8020865241996944, |
|
"rewards/concensus_correctness_reward_func": 0.0234375, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.125, |
|
"rewards/question_recreation_reward_func": 0.2969335000962019, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.03974999859929085, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 421.6, |
|
"epoch": 4.0, |
|
"grad_norm": 1.3564997911453247, |
|
"kl": 0.004650218156166374, |
|
"learning_rate": 8.067960709356478e-08, |
|
"loss": 0.0, |
|
"reward": 0.2924229323863983, |
|
"reward_std": 0.7882522225379944, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.27797292321920397, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.01444999873638153, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 260.75, |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 1.8875579833984375, |
|
"kl": 0.003812392649706453, |
|
"learning_rate": 3.013156219837776e-08, |
|
"loss": 0.0, |
|
"reward": 0.4502076015342027, |
|
"reward_std": 0.5057817947817966, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0625, |
|
"rewards/question_recreation_reward_func": 0.3271451264154166, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.060562501195818186, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 399.15, |
|
"epoch": 5.0, |
|
"grad_norm": 1.042950987815857, |
|
"kl": 0.001434546068776399, |
|
"learning_rate": 3.4096741493194193e-09, |
|
"loss": 0.0, |
|
"reward": 1.0424630206078291, |
|
"reward_std": 0.9998365689069033, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.5, |
|
"rewards/question_recreation_reward_func": 0.273763046041131, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.2687000036239624, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 2.451066382036515e-06, |
|
"train_runtime": 531.9792, |
|
"train_samples_per_second": 0.602, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|