{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.056338028169014086, "eval_steps": 500, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 480.421875, "epoch": 0.0028169014084507044, "grad_norm": 0.00048306785174645483, "learning_rate": 6.953333333333332e-07, "loss": 0.0002, "reward": 0.328125, "rewards/accuracy_reward": 0.328125, "rewards/tag_count_reward": 0.0, "step": 1 }, { "completion_length": 507.015625, "epoch": 0.005633802816901409, "grad_norm": 0.00925475638359785, "learning_rate": 6.906666666666666e-07, "loss": -0.0003, "reward": 0.265625, "rewards/accuracy_reward": 0.265625, "rewards/tag_count_reward": 0.0, "step": 2 }, { "completion_length": 452.828125, "epoch": 0.011267605633802818, "grad_norm": 0.0011863944819197059, "learning_rate": 6.813333333333333e-07, "loss": -0.0004, "reward": 0.4140625, "rewards/accuracy_reward": 0.4140625, "rewards/tag_count_reward": 0.0, "step": 4 }, { "completion_length": 467.625, "epoch": 0.016901408450704224, "grad_norm": 0.00895125325769186, "learning_rate": 6.72e-07, "loss": -0.0, "reward": 0.3125, "rewards/accuracy_reward": 0.3125, "rewards/tag_count_reward": 0.0, "step": 6 }, { "completion_length": 507.421875, "epoch": 0.022535211267605635, "grad_norm": 0.00034816659172065556, "learning_rate": 6.626666666666666e-07, "loss": -0.0001, "reward": 0.359375, "rewards/accuracy_reward": 0.359375, "rewards/tag_count_reward": 0.0, "step": 8 }, { "completion_length": 497.6875, "epoch": 0.028169014084507043, "grad_norm": 0.033418115228414536, "learning_rate": 6.533333333333334e-07, "loss": -0.0002, "reward": 0.2109375, "rewards/accuracy_reward": 0.2109375, "rewards/tag_count_reward": 0.0, "step": 10 }, { "completion_length": 475.71875, "epoch": 0.03380281690140845, "grad_norm": 0.0005127754993736744, "learning_rate": 6.44e-07, "loss": -0.0003, "reward": 0.2734375, "rewards/accuracy_reward": 0.2734375, "rewards/tag_count_reward": 0.0, "step": 12 }, { "completion_length": 465.9765625, "epoch": 0.03943661971830986, "grad_norm": 0.001344753778539598, "learning_rate": 6.346666666666666e-07, "loss": -0.0007, "reward": 0.3203125, "rewards/accuracy_reward": 0.3203125, "rewards/tag_count_reward": 0.0, "step": 14 }, { "completion_length": 510.34375, "epoch": 0.04507042253521127, "grad_norm": 0.23263047635555267, "learning_rate": 6.253333333333333e-07, "loss": -0.0011, "reward": 0.4765625, "rewards/accuracy_reward": 0.4765625, "rewards/tag_count_reward": 0.0, "step": 16 }, { "completion_length": 498.1484375, "epoch": 0.05070422535211268, "grad_norm": 0.0012522597098723054, "learning_rate": 6.16e-07, "loss": -0.0004, "reward": 0.2734375, "rewards/accuracy_reward": 0.2734375, "rewards/tag_count_reward": 0.0, "step": 18 }, { "completion_length": 509.1171875, "epoch": 0.056338028169014086, "grad_norm": 0.010728111490607262, "learning_rate": 6.066666666666666e-07, "loss": -0.0001, "reward": 0.328125, "rewards/accuracy_reward": 0.328125, "rewards/tag_count_reward": 0.0, "step": 20 } ], "logging_steps": 2, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.832621368030003e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }