|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 282.71875, |
|
"epoch": 0.05, |
|
"grad_norm": 42.93820571899414, |
|
"kl": 0.0, |
|
"learning_rate": 4.965903258506806e-07, |
|
"loss": 0.0, |
|
"reward": 0.6292939200648107, |
|
"reward_std": 0.6506021634559147, |
|
"rewards/concensus_correctness_reward_func": 0.04662499949336052, |
|
"rewards/consensus_reward_func": 0.0625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.25, |
|
"rewards/question_recreation_reward_func": 0.14407516870414838, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.1260937498882413, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 316.15625, |
|
"epoch": 0.1, |
|
"grad_norm": 11.557947158813477, |
|
"kl": 0.002068059044177062, |
|
"learning_rate": 4.698684378016222e-07, |
|
"loss": 0.0, |
|
"reward": 0.5142579441308044, |
|
"reward_std": 0.40489116005483083, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.1875, |
|
"rewards/question_recreation_reward_func": 0.22807045001536608, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.09868749883025885, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 397.90625, |
|
"epoch": 0.15, |
|
"grad_norm": 33.66463851928711, |
|
"kl": 0.0023345137960859574, |
|
"learning_rate": 4.193203929064353e-07, |
|
"loss": 0.0, |
|
"reward": 0.0876373503706418, |
|
"reward_std": 0.2730903436749941, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.12148110551061109, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.0338437519967556, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 322.21875, |
|
"epoch": 0.2, |
|
"grad_norm": 13.174933433532715, |
|
"kl": 0.0018492022645659745, |
|
"learning_rate": 3.5042385616324236e-07, |
|
"loss": 0.0, |
|
"reward": 0.41745682479813695, |
|
"reward_std": 0.3582948070834391, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.125, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0625, |
|
"rewards/question_recreation_reward_func": 0.1773943287844304, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.052562499884516, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 288.125, |
|
"epoch": 0.25, |
|
"grad_norm": 67.86888122558594, |
|
"kl": 0.008549842281354358, |
|
"learning_rate": 2.706448363680831e-07, |
|
"loss": 0.0, |
|
"reward": 0.40863744184025563, |
|
"reward_std": 0.49519592405704316, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0625, |
|
"rewards/question_recreation_reward_func": 0.1976999432372395, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.07031250046566129, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 212.03125, |
|
"epoch": 0.3, |
|
"grad_norm": 24.831544876098633, |
|
"kl": 0.008816897239739774, |
|
"learning_rate": 1.886286282148002e-07, |
|
"loss": 0.0, |
|
"reward": 0.36399466946022585, |
|
"reward_std": 0.2636154612409882, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.18833842262392864, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.17565625021234155, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 369.34375, |
|
"epoch": 0.35, |
|
"grad_norm": 4.622942924499512, |
|
"kl": 0.002492460354915238, |
|
"learning_rate": 1.1326296046939333e-07, |
|
"loss": 0.0, |
|
"reward": 0.13834899943321943, |
|
"reward_std": 0.13620900230307598, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.07897400224464945, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.059374999604187906, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 334.78125, |
|
"epoch": 0.4, |
|
"grad_norm": 42.03730010986328, |
|
"kl": 0.005604283696811763, |
|
"learning_rate": 5.271487265090163e-08, |
|
"loss": 0.0, |
|
"reward": 1.140276842750609, |
|
"reward_std": 0.9167491418556892, |
|
"rewards/concensus_correctness_reward_func": 0.7786874994635582, |
|
"rewards/consensus_reward_func": 0.0625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.125, |
|
"rewards/question_recreation_reward_func": 0.11102682136697695, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.06306249997578561, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 370.53125, |
|
"epoch": 0.45, |
|
"grad_norm": 103.69042205810547, |
|
"kl": 0.016677564308338333, |
|
"learning_rate": 1.3545689574841341e-08, |
|
"loss": 0.0, |
|
"reward": 0.251421230728738, |
|
"reward_std": 0.507526803878136, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0625, |
|
"rewards/question_recreation_reward_func": 0.2050462217302993, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.01612500031478703, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 309.03125, |
|
"epoch": 0.5, |
|
"grad_norm": 18.9988956451416, |
|
"kl": 0.0024696916407265235, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"reward": 0.4606377884047106, |
|
"reward_std": 0.46051671362511115, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.0, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.125, |
|
"rewards/question_recreation_reward_func": 0.16510653705336154, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.1549062510021031, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 5.050748984558595e-06, |
|
"train_runtime": 778.5481, |
|
"train_samples_per_second": 0.411, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|