End of training

a0da2eb verified 4 months ago

9.48 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 9.8,
	"eval_steps": 500,
	"global_step": 10,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.1875,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 390.0,
	"completions/mean_length": 320.5,
	"completions/mean_terminated_length": 166.0357151031494,
	"completions/min_length": 18.5,
	"completions/min_terminated_length": 18.5,
	"epoch": 1.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 13.776973724365234,
	"kl": 0.0,
	"learning_rate": 5e-07,
	"loss": 0.0991,
	"num_tokens": 9224.0,
	"reward": 0.06882327049970627,
	"reward_std": 0.029990280978381634,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.06882327049970627,
	"rewards/question_recreation_reward_func/std": 0.047005095053464174,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 2
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.125,
	"completions/max_length": 699.5,
	"completions/max_terminated_length": 607.0,
	"completions/mean_length": 290.875,
	"completions/mean_terminated_length": 193.4791717529297,
	"completions/min_length": 5.0,
	"completions/min_terminated_length": 5.0,
	"epoch": 3.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 11.026899337768555,
	"kl": 0.0011377405207895208,
	"learning_rate": 4.864543104251586e-07,
	"loss": -0.0341,
	"num_tokens": 17974.0,
	"reward": 0.13937734812498093,
	"reward_std": 0.06329425051808357,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.13937734812498093,
	"rewards/question_recreation_reward_func/std": 0.07533928006887436,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 4
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.25,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 345.0,
	"completions/mean_length": 340.0,
	"completions/mean_terminated_length": 112.00000381469727,
	"completions/min_length": 15.5,
	"completions/min_terminated_length": 15.5,
	"epoch": 5.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 5.603325366973877,
	"kl": 0.0009242366522812517,
	"learning_rate": 4.472851273490984e-07,
	"loss": 0.1496,
	"num_tokens": 27510.0,
	"reward": 0.09400873444974422,
	"reward_std": 0.06265044631436467,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.09400873444974422,
	"rewards/question_recreation_reward_func/std": 0.07358316518366337,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 6
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 490.5,
	"completions/max_terminated_length": 490.5,
	"completions/mean_length": 196.3125,
	"completions/mean_terminated_length": 196.3125,
	"completions/min_length": 8.5,
	"completions/min_terminated_length": 8.5,
	"epoch": 7.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 11.331869125366211,
	"kl": 0.003321434611279983,
	"learning_rate": 3.867370395306068e-07,
	"loss": 0.0419,
	"num_tokens": 34747.0,
	"reward": 0.08949379064142704,
	"reward_std": 0.042363014072179794,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.08949379064142704,
	"rewards/question_recreation_reward_func/std": 0.05983787029981613,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 8
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0625,
	"completions/max_length": 750.5,
	"completions/max_terminated_length": 456.5,
	"completions/mean_length": 192.5,
	"completions/mean_terminated_length": 137.23214721679688,
	"completions/min_length": 7.5,
	"completions/min_terminated_length": 7.5,
	"epoch": 9.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 13.011316299438477,
	"kl": 0.004956400036462583,
	"learning_rate": 3.1137137178519977e-07,
	"loss": -0.0044,
	"num_tokens": 41923.0,
	"reward": 0.11588690988719463,
	"reward_std": 0.04010107275098562,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.10807440988719463,
	"rewards/question_recreation_reward_func/std": 0.0751601941883564,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0078125,
	"rewards/xmlcount_reward_func/std": 0.022097086533904076,
	"step": 10
	},
	{
	"epoch": 9.8,
	"step": 10,
	"total_flos": 0.0,
	"train_loss": 0.050427977740764615,
	"train_runtime": 870.114,
	"train_samples_per_second": 0.184,
	"train_steps_per_second": 0.023
	}
	],
	"logging_steps": 2,
	"max_steps": 20,
	"num_input_tokens_seen": 41923,
	"num_train_epochs": 10,
	"save_steps": 25,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": false,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}