hazentr's picture
End of training
091de00 verified
raw
history blame
9.59 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.8,
"eval_steps": 500,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 812.5,
"completions/mean_length": 647.75,
"completions/mean_terminated_length": 475.3166809082031,
"completions/min_length": 138.5,
"completions/min_terminated_length": 138.5,
"epoch": 1.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.236729621887207,
"kl": 0.00027297700398776215,
"learning_rate": 5e-07,
"loss": -0.001,
"num_tokens": 14460.0,
"reward": 0.14074324816465378,
"reward_std": 0.12783230934292078,
"rewards/concensus_correctness_reward_func/mean": 0.07774999737739563,
"rewards/concensus_correctness_reward_func/std": 0.21991021931171417,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.06299325078725815,
"rewards/question_recreation_reward_func/std": 0.018077346496284008,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 754.0,
"completions/mean_length": 397.25,
"completions/mean_terminated_length": 251.08333587646484,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 3.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 4.27091646194458,
"kl": 0.0006203544398886152,
"learning_rate": 4.864543104251586e-07,
"loss": 0.0127,
"num_tokens": 24912.0,
"reward": 0.1337380139157176,
"reward_std": 0.1742639576550573,
"rewards/concensus_correctness_reward_func/mean": 0.11999999731779099,
"rewards/concensus_correctness_reward_func/std": 0.33941125869750977,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.013738006353378296,
"rewards/question_recreation_reward_func/std": 0.010505724931135774,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 846.5,
"completions/mean_length": 528.4375,
"completions/mean_terminated_length": 422.8214416503906,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 5.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.0706632137298584,
"kl": 0.0008973750846053008,
"learning_rate": 4.472851273490984e-07,
"loss": 0.0403,
"num_tokens": 37463.0,
"reward": 0.12599835265427828,
"reward_std": 0.16678897803649306,
"rewards/concensus_correctness_reward_func/mean": 0.1107499971985817,
"rewards/concensus_correctness_reward_func/std": 0.31324827671051025,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.015248352661728859,
"rewards/question_recreation_reward_func/std": 0.015421947930008173,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.3125,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 744.5,
"completions/mean_length": 588.75,
"completions/mean_terminated_length": 384.3666687011719,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 7.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.6464202404022217,
"kl": 0.0010676352321752347,
"learning_rate": 3.867370395306068e-07,
"loss": 0.1472,
"num_tokens": 50979.0,
"reward": 0.05322604067623615,
"reward_std": 0.02834410360082984,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.05322604067623615,
"rewards/question_recreation_reward_func/std": 0.03214400727301836,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.375,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 817.5,
"completions/mean_length": 669.4375,
"completions/mean_terminated_length": 470.0,
"completions/min_length": 148.5,
"completions/min_terminated_length": 148.5,
"epoch": 9.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 3.48903226852417,
"kl": 0.0014760679550818168,
"learning_rate": 3.1137137178519977e-07,
"loss": 0.2054,
"num_tokens": 65786.0,
"reward": 0.011943170800805092,
"reward_std": 0.0064218416810035706,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.011943170800805092,
"rewards/question_recreation_reward_func/std": 0.0063162968726828694,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 10
},
{
"epoch": 9.8,
"step": 10,
"total_flos": 0.0,
"train_loss": 0.08094322010874748,
"train_runtime": 956.1563,
"train_samples_per_second": 0.167,
"train_steps_per_second": 0.021
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 65786,
"num_train_epochs": 10,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}