hazentr's picture
End of training
a0da2eb verified
raw
history blame
9.48 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.8,
"eval_steps": 500,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1875,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 390.0,
"completions/mean_length": 320.5,
"completions/mean_terminated_length": 166.0357151031494,
"completions/min_length": 18.5,
"completions/min_terminated_length": 18.5,
"epoch": 1.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 13.776973724365234,
"kl": 0.0,
"learning_rate": 5e-07,
"loss": 0.0991,
"num_tokens": 9224.0,
"reward": 0.06882327049970627,
"reward_std": 0.029990280978381634,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.06882327049970627,
"rewards/question_recreation_reward_func/std": 0.047005095053464174,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 699.5,
"completions/max_terminated_length": 607.0,
"completions/mean_length": 290.875,
"completions/mean_terminated_length": 193.4791717529297,
"completions/min_length": 5.0,
"completions/min_terminated_length": 5.0,
"epoch": 3.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 11.026899337768555,
"kl": 0.0011377405207895208,
"learning_rate": 4.864543104251586e-07,
"loss": -0.0341,
"num_tokens": 17974.0,
"reward": 0.13937734812498093,
"reward_std": 0.06329425051808357,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.13937734812498093,
"rewards/question_recreation_reward_func/std": 0.07533928006887436,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.25,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 345.0,
"completions/mean_length": 340.0,
"completions/mean_terminated_length": 112.00000381469727,
"completions/min_length": 15.5,
"completions/min_terminated_length": 15.5,
"epoch": 5.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 5.603325366973877,
"kl": 0.0009242366522812517,
"learning_rate": 4.472851273490984e-07,
"loss": 0.1496,
"num_tokens": 27510.0,
"reward": 0.09400873444974422,
"reward_std": 0.06265044631436467,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.09400873444974422,
"rewards/question_recreation_reward_func/std": 0.07358316518366337,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 490.5,
"completions/max_terminated_length": 490.5,
"completions/mean_length": 196.3125,
"completions/mean_terminated_length": 196.3125,
"completions/min_length": 8.5,
"completions/min_terminated_length": 8.5,
"epoch": 7.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 11.331869125366211,
"kl": 0.003321434611279983,
"learning_rate": 3.867370395306068e-07,
"loss": 0.0419,
"num_tokens": 34747.0,
"reward": 0.08949379064142704,
"reward_std": 0.042363014072179794,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.08949379064142704,
"rewards/question_recreation_reward_func/std": 0.05983787029981613,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 750.5,
"completions/max_terminated_length": 456.5,
"completions/mean_length": 192.5,
"completions/mean_terminated_length": 137.23214721679688,
"completions/min_length": 7.5,
"completions/min_terminated_length": 7.5,
"epoch": 9.8,
"frac_reward_zero_std": 0.0,
"grad_norm": 13.011316299438477,
"kl": 0.004956400036462583,
"learning_rate": 3.1137137178519977e-07,
"loss": -0.0044,
"num_tokens": 41923.0,
"reward": 0.11588690988719463,
"reward_std": 0.04010107275098562,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.10807440988719463,
"rewards/question_recreation_reward_func/std": 0.0751601941883564,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0078125,
"rewards/xmlcount_reward_func/std": 0.022097086533904076,
"step": 10
},
{
"epoch": 9.8,
"step": 10,
"total_flos": 0.0,
"train_loss": 0.050427977740764615,
"train_runtime": 870.114,
"train_samples_per_second": 0.184,
"train_steps_per_second": 0.023
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 41923,
"num_train_epochs": 10,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}