End of training

091de00 verified 4 months ago

9.59 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 9.8,
	"eval_steps": 500,
	"global_step": 10,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.3125,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 812.5,
	"completions/mean_length": 647.75,
	"completions/mean_terminated_length": 475.3166809082031,
	"completions/min_length": 138.5,
	"completions/min_terminated_length": 138.5,
	"epoch": 1.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 3.236729621887207,
	"kl": 0.00027297700398776215,
	"learning_rate": 5e-07,
	"loss": -0.001,
	"num_tokens": 14460.0,
	"reward": 0.14074324816465378,
	"reward_std": 0.12783230934292078,
	"rewards/concensus_correctness_reward_func/mean": 0.07774999737739563,
	"rewards/concensus_correctness_reward_func/std": 0.21991021931171417,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.06299325078725815,
	"rewards/question_recreation_reward_func/std": 0.018077346496284008,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 2
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.1875,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 754.0,
	"completions/mean_length": 397.25,
	"completions/mean_terminated_length": 251.08333587646484,
	"completions/min_length": 4.0,
	"completions/min_terminated_length": 4.0,
	"epoch": 3.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 4.27091646194458,
	"kl": 0.0006203544398886152,
	"learning_rate": 4.864543104251586e-07,
	"loss": 0.0127,
	"num_tokens": 24912.0,
	"reward": 0.1337380139157176,
	"reward_std": 0.1742639576550573,
	"rewards/concensus_correctness_reward_func/mean": 0.11999999731779099,
	"rewards/concensus_correctness_reward_func/std": 0.33941125869750977,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.013738006353378296,
	"rewards/question_recreation_reward_func/std": 0.010505724931135774,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 4
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.1875,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 846.5,
	"completions/mean_length": 528.4375,
	"completions/mean_terminated_length": 422.8214416503906,
	"completions/min_length": 4.0,
	"completions/min_terminated_length": 4.0,
	"epoch": 5.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 3.0706632137298584,
	"kl": 0.0008973750846053008,
	"learning_rate": 4.472851273490984e-07,
	"loss": 0.0403,
	"num_tokens": 37463.0,
	"reward": 0.12599835265427828,
	"reward_std": 0.16678897803649306,
	"rewards/concensus_correctness_reward_func/mean": 0.1107499971985817,
	"rewards/concensus_correctness_reward_func/std": 0.31324827671051025,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.015248352661728859,
	"rewards/question_recreation_reward_func/std": 0.015421947930008173,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 6
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.3125,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 744.5,
	"completions/mean_length": 588.75,
	"completions/mean_terminated_length": 384.3666687011719,
	"completions/min_length": 119.0,
	"completions/min_terminated_length": 119.0,
	"epoch": 7.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 3.6464202404022217,
	"kl": 0.0010676352321752347,
	"learning_rate": 3.867370395306068e-07,
	"loss": 0.1472,
	"num_tokens": 50979.0,
	"reward": 0.05322604067623615,
	"reward_std": 0.02834410360082984,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.05322604067623615,
	"rewards/question_recreation_reward_func/std": 0.03214400727301836,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 8
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.375,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 817.5,
	"completions/mean_length": 669.4375,
	"completions/mean_terminated_length": 470.0,
	"completions/min_length": 148.5,
	"completions/min_terminated_length": 148.5,
	"epoch": 9.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 3.48903226852417,
	"kl": 0.0014760679550818168,
	"learning_rate": 3.1137137178519977e-07,
	"loss": 0.2054,
	"num_tokens": 65786.0,
	"reward": 0.011943170800805092,
	"reward_std": 0.0064218416810035706,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.011943170800805092,
	"rewards/question_recreation_reward_func/std": 0.0063162968726828694,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.0,
	"rewards/xmlcount_reward_func/std": 0.0,
	"step": 10
	},
	{
	"epoch": 9.8,
	"step": 10,
	"total_flos": 0.0,
	"train_loss": 0.08094322010874748,
	"train_runtime": 956.1563,
	"train_samples_per_second": 0.167,
	"train_steps_per_second": 0.021
	}
	],
	"logging_steps": 2,
	"max_steps": 20,
	"num_input_tokens_seen": 65786,
	"num_train_epochs": 10,
	"save_steps": 25,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": false,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}