{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 500, "global_step": 31, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completion_length": 503.94140625, "epoch": 0.32, "grad_norm": 0.055206917226314545, "kl": 0.0007647275924682617, "learning_rate": 1.5289640103269626e-05, "loss": -0.0178, "num_tokens": 837177.0, "reward": 0.14900912609314218, "reward_std": 0.23200407949070723, "rewards/_accuracy_reward": 0.14900912609314218, "rewards/_format_reward": 0.0, "step": 10 }, { "clip_ratio": 0.0, "completion_length": 513.55390625, "epoch": 0.64, "grad_norm": 0.048017717897892, "kl": 0.0019702911376953125, "learning_rate": 5.5960584844236565e-06, "loss": 0.0085, "num_tokens": 1686614.0, "reward": 0.20446404698841433, "reward_std": 0.26745549326642504, "rewards/_accuracy_reward": 0.20446404698841433, "rewards/_format_reward": 0.0, "step": 20 }, { "clip_ratio": 0.0, "completion_length": 525.440625, "epoch": 0.96, "grad_norm": 0.049744077026844025, "kl": 0.002144050598144531, "learning_rate": 5.1306766081048456e-08, "loss": 0.0187, "num_tokens": 2550850.0, "reward": 0.23339965324412332, "reward_std": 0.3011114658700535, "rewards/_accuracy_reward": 0.23339965324412332, "rewards/_format_reward": 0.0, "step": 30 }, { "clip_ratio": 0.0, "completion_length": 546.765625, "epoch": 0.992, "kl": 0.0019164085388183594, "num_tokens": 2640088.0, "reward": 0.1563290636986494, "reward_std": 0.2728244187310338, "rewards/_accuracy_reward": 0.1563290636986494, "rewards/_format_reward": 0.0, "step": 31, "total_flos": 0.0, "train_loss": 0.0035367666953994383, "train_runtime": 25807.2718, "train_samples_per_second": 0.039, "train_steps_per_second": 0.001 } ], "logging_steps": 10, "max_steps": 31, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }