|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 439.21875, |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 5.828135013580322, |
|
"kl": 0.0014942500565666705, |
|
"learning_rate": 4.965903258506806e-07, |
|
"loss": 0.0, |
|
"reward": 0.10679153446108103, |
|
"reward_std": 0.6297805532813072, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.20822900370694697, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.10143748670816422, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 345.7083333333333, |
|
"epoch": 1.0, |
|
"grad_norm": 23.521379470825195, |
|
"kl": 0.003981841291533783, |
|
"learning_rate": 4.698684378016222e-07, |
|
"loss": 0.0, |
|
"reward": 1.658778190612793, |
|
"reward_std": 2.896653901785612, |
|
"rewards/concensus_correctness_reward_func": 0.8787500013907751, |
|
"rewards/consensus_reward_func": 0.08333333333333333, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.4166666666666667, |
|
"rewards/question_recreation_reward_func": 0.39598656445741653, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.1159583330154419, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 438.5, |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 1.3683266639709473, |
|
"kl": 0.0017546406452311203, |
|
"learning_rate": 4.193203929064353e-07, |
|
"loss": 0.0, |
|
"reward": 0.36969452537596226, |
|
"reward_std": 0.7297583511099219, |
|
"rewards/concensus_correctness_reward_func": 0.056062500923871994, |
|
"rewards/consensus_reward_func": 0.0625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.24291324615478516, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.008218752220273018, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 378.9583333333333, |
|
"epoch": 2.0, |
|
"grad_norm": 17.58197021484375, |
|
"kl": 0.0063056297464451445, |
|
"learning_rate": 3.5042385616324236e-07, |
|
"loss": 0.0, |
|
"reward": 1.823668549458186, |
|
"reward_std": 2.0832131765782833, |
|
"rewards/concensus_correctness_reward_func": 0.8333333333333334, |
|
"rewards/consensus_reward_func": 0.08333333333333333, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.4166666666666667, |
|
"rewards/question_recreation_reward_func": 0.3380434938396017, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.15229166112840176, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 319.0, |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 4.946495056152344, |
|
"kl": 0.0023342472850345075, |
|
"learning_rate": 2.706448363680831e-07, |
|
"loss": 0.0, |
|
"reward": 0.6347062969580293, |
|
"reward_std": 0.7983217537403107, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.125, |
|
"rewards/question_recreation_reward_func": 0.3073625322431326, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.1398437414318323, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 299.9583333333333, |
|
"epoch": 3.0, |
|
"grad_norm": 32.36935043334961, |
|
"kl": 0.002557387478494396, |
|
"learning_rate": 1.886286282148002e-07, |
|
"loss": 0.0, |
|
"reward": 1.3059624830881755, |
|
"reward_std": 2.1406236762801805, |
|
"rewards/concensus_correctness_reward_func": 0.8333333333333334, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.3333333333333333, |
|
"rewards/question_recreation_reward_func": 0.22862919544180235, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.08933333307504654, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 364.59375, |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 6.134871006011963, |
|
"kl": 0.002294319980137516, |
|
"learning_rate": 1.1326296046939333e-07, |
|
"loss": 0.0, |
|
"reward": 0.9578792434185743, |
|
"reward_std": 0.7915452528977767, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.3125, |
|
"rewards/question_recreation_reward_func": 0.3451604973524809, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.22209375258535147, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 402.0, |
|
"epoch": 4.0, |
|
"grad_norm": 1.2527885437011719, |
|
"kl": 0.004929080818934987, |
|
"learning_rate": 5.271487265090163e-08, |
|
"loss": 0.0, |
|
"reward": 0.3584599271416664, |
|
"reward_std": 1.0923715432484944, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.16666666666666666, |
|
"rewards/question_recreation_reward_func": 0.2636265928546588, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.07183333983023961, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 376.75, |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 2.3929598331451416, |
|
"kl": 0.002563515809015371, |
|
"learning_rate": 1.3545689574841341e-08, |
|
"loss": 0.0, |
|
"reward": 0.241681374842301, |
|
"reward_std": 1.120352853089571, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.1875, |
|
"rewards/question_recreation_reward_func": 0.23099386505782604, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.1768124857917428, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 389.0, |
|
"epoch": 5.0, |
|
"grad_norm": 1.3994996547698975, |
|
"kl": 0.002270005274719248, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"reward": 0.5229276654620966, |
|
"reward_std": 0.7810152769088745, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.16666666666666666, |
|
"rewards/question_recreation_reward_func": 0.30830267692605656, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.047958330561717354, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 2.5685094556138212e-06, |
|
"train_runtime": 536.6447, |
|
"train_samples_per_second": 0.596, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|