AbSuLaTeZERO's picture
End of training
be81b7f verified
raw
history blame
8.27 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 395.25,
"epoch": 0.6153846153846154,
"grad_norm": 1.7927322387695312,
"kl": 0.0012580822076415643,
"learning_rate": 5e-07,
"loss": 0.0,
"reward": 0.5056484336964786,
"reward_std": 0.8683814308606088,
"rewards/concensus_correctness_reward_func": 0.06012500077486038,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.26574217714369297,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.11728124879300594,
"step": 2
},
{
"completion_length": 301.15,
"epoch": 1.0,
"grad_norm": 28.787370681762695,
"kl": 0.007838598499074579,
"learning_rate": 4.864543104251586e-07,
"loss": 0.0,
"reward": 0.4848457515239716,
"reward_std": 0.6104163944721221,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1,
"rewards/question_recreation_reward_func": 0.211995729804039,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.17284999787807465,
"step": 4
},
{
"completion_length": 366.0625,
"epoch": 1.6153846153846154,
"grad_norm": 1.4565224647521973,
"kl": 0.0048014514686656184,
"learning_rate": 4.472851273490984e-07,
"loss": 0.0,
"reward": 0.5369042251259089,
"reward_std": 0.7701209064107388,
"rewards/concensus_correctness_reward_func": 0.1015625,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.1854354883544147,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.12490624794736505,
"step": 6
},
{
"completion_length": 488.75,
"epoch": 2.0,
"grad_norm": 1.1058247089385986,
"kl": 0.0015862735570408403,
"learning_rate": 3.867370395306068e-07,
"loss": 0.0,
"reward": 0.7044907063245773,
"reward_std": 1.334967276453972,
"rewards/concensus_correctness_reward_func": 0.09620000123977661,
"rewards/consensus_reward_func": 0.1,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.6,
"rewards/question_recreation_reward_func": 0.2195407159626484,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.3112500309944153,
"step": 8
},
{
"completion_length": 464.9375,
"epoch": 2.6153846153846154,
"grad_norm": 2.566697359085083,
"kl": 0.0013416979272733442,
"learning_rate": 3.1137137178519977e-07,
"loss": 0.0,
"reward": 0.562221004627645,
"reward_std": 0.8246445022523403,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.27678349521011114,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.03543749637901783,
"step": 10
},
{
"completion_length": 306.25,
"epoch": 3.0,
"grad_norm": 0.7933401465415955,
"kl": 0.0031318686669692397,
"learning_rate": 2.2935516363191693e-07,
"loss": 0.0,
"reward": 1.342978870868683,
"reward_std": 1.444445651769638,
"rewards/concensus_correctness_reward_func": 0.18480000495910645,
"rewards/consensus_reward_func": 0.2,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.2,
"rewards/question_recreation_reward_func": 0.37442886233329775,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.3837499976158142,
"step": 12
},
{
"completion_length": 367.0625,
"epoch": 3.6153846153846154,
"grad_norm": 1.901016354560852,
"kl": 0.0015947269203024916,
"learning_rate": 1.4957614383675767e-07,
"loss": 0.0,
"reward": 0.48512101359665394,
"reward_std": 0.8020865241996944,
"rewards/concensus_correctness_reward_func": 0.0234375,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.2969335000962019,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.03974999859929085,
"step": 14
},
{
"completion_length": 421.6,
"epoch": 4.0,
"grad_norm": 1.3564997911453247,
"kl": 0.004650218156166374,
"learning_rate": 8.067960709356478e-08,
"loss": 0.0,
"reward": 0.2924229323863983,
"reward_std": 0.7882522225379944,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.27797292321920397,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.01444999873638153,
"step": 16
},
{
"completion_length": 260.75,
"epoch": 4.615384615384615,
"grad_norm": 1.8875579833984375,
"kl": 0.003812392649706453,
"learning_rate": 3.013156219837776e-08,
"loss": 0.0,
"reward": 0.4502076015342027,
"reward_std": 0.5057817947817966,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.3271451264154166,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.060562501195818186,
"step": 18
},
{
"completion_length": 399.15,
"epoch": 5.0,
"grad_norm": 1.042950987815857,
"kl": 0.001434546068776399,
"learning_rate": 3.4096741493194193e-09,
"loss": 0.0,
"reward": 1.0424630206078291,
"reward_std": 0.9998365689069033,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.5,
"rewards/question_recreation_reward_func": 0.273763046041131,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.2687000036239624,
"step": 20
},
{
"epoch": 5.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 2.451066382036515e-06,
"train_runtime": 531.9792,
"train_samples_per_second": 0.602,
"train_steps_per_second": 0.038
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}