{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.742397137745975, "eval_steps": 500, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.57, "learning_rate": 0.0002, "loss": 1.6928, "step": 220 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 0.8598, "step": 440 }, { "epoch": 4.72, "learning_rate": 0.0002, "loss": 0.348, "step": 660 }, { "epoch": 6.3, "learning_rate": 0.0002, "loss": 0.1732, "step": 880 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 0.111, "step": 1100 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 0.09, "step": 1320 }, { "epoch": 11.02, "learning_rate": 0.0002, "loss": 0.0797, "step": 1540 }, { "epoch": 12.59, "learning_rate": 0.0002, "loss": 0.0712, "step": 1760 }, { "epoch": 14.17, "learning_rate": 0.0002, "loss": 0.0704, "step": 1980 }, { "epoch": 15.74, "learning_rate": 0.0002, "loss": 0.0673, "step": 2200 } ], "logging_steps": 220, "max_steps": 2200, "num_train_epochs": 16, "save_steps": 220, "total_flos": 1.0258908883550208e+17, "trial_name": null, "trial_params": null }