|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.8, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.1875, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 390.0, |
|
"completions/mean_length": 320.5, |
|
"completions/mean_terminated_length": 166.0357151031494, |
|
"completions/min_length": 18.5, |
|
"completions/min_terminated_length": 18.5, |
|
"epoch": 1.8, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 13.776973724365234, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0991, |
|
"num_tokens": 9224.0, |
|
"reward": 0.06882327049970627, |
|
"reward_std": 0.029990280978381634, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.06882327049970627, |
|
"rewards/question_recreation_reward_func/std": 0.047005095053464174, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.0, |
|
"rewards/xmlcount_reward_func/std": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.125, |
|
"completions/max_length": 699.5, |
|
"completions/max_terminated_length": 607.0, |
|
"completions/mean_length": 290.875, |
|
"completions/mean_terminated_length": 193.4791717529297, |
|
"completions/min_length": 5.0, |
|
"completions/min_terminated_length": 5.0, |
|
"epoch": 3.8, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 11.026899337768555, |
|
"kl": 0.0011377405207895208, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": -0.0341, |
|
"num_tokens": 17974.0, |
|
"reward": 0.13937734812498093, |
|
"reward_std": 0.06329425051808357, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.13937734812498093, |
|
"rewards/question_recreation_reward_func/std": 0.07533928006887436, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.0, |
|
"rewards/xmlcount_reward_func/std": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.25, |
|
"completions/max_length": 1024.0, |
|
"completions/max_terminated_length": 345.0, |
|
"completions/mean_length": 340.0, |
|
"completions/mean_terminated_length": 112.00000381469727, |
|
"completions/min_length": 15.5, |
|
"completions/min_terminated_length": 15.5, |
|
"epoch": 5.8, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 5.603325366973877, |
|
"kl": 0.0009242366522812517, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.1496, |
|
"num_tokens": 27510.0, |
|
"reward": 0.09400873444974422, |
|
"reward_std": 0.06265044631436467, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.09400873444974422, |
|
"rewards/question_recreation_reward_func/std": 0.07358316518366337, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.0, |
|
"rewards/xmlcount_reward_func/std": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 490.5, |
|
"completions/max_terminated_length": 490.5, |
|
"completions/mean_length": 196.3125, |
|
"completions/mean_terminated_length": 196.3125, |
|
"completions/min_length": 8.5, |
|
"completions/min_terminated_length": 8.5, |
|
"epoch": 7.8, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 11.331869125366211, |
|
"kl": 0.003321434611279983, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0419, |
|
"num_tokens": 34747.0, |
|
"reward": 0.08949379064142704, |
|
"reward_std": 0.042363014072179794, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.08949379064142704, |
|
"rewards/question_recreation_reward_func/std": 0.05983787029981613, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.0, |
|
"rewards/xmlcount_reward_func/std": 0.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0625, |
|
"completions/max_length": 750.5, |
|
"completions/max_terminated_length": 456.5, |
|
"completions/mean_length": 192.5, |
|
"completions/mean_terminated_length": 137.23214721679688, |
|
"completions/min_length": 7.5, |
|
"completions/min_terminated_length": 7.5, |
|
"epoch": 9.8, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 13.011316299438477, |
|
"kl": 0.004956400036462583, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": -0.0044, |
|
"num_tokens": 41923.0, |
|
"reward": 0.11588690988719463, |
|
"reward_std": 0.04010107275098562, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.10807440988719463, |
|
"rewards/question_recreation_reward_func/std": 0.0751601941883564, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.0078125, |
|
"rewards/xmlcount_reward_func/std": 0.022097086533904076, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"step": 10, |
|
"total_flos": 0.0, |
|
"train_loss": 0.050427977740764615, |
|
"train_runtime": 870.114, |
|
"train_samples_per_second": 0.184, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 41923, |
|
"num_train_epochs": 10, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|