{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 32, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 0.0002, "loss": 1.1044, "step": 1 }, { "epoch": 0.5, "learning_rate": 0.00019948693233918952, "loss": 1.0409, "step": 2 }, { "epoch": 0.75, "learning_rate": 0.00019795299412524945, "loss": 0.9112, "step": 3 }, { "epoch": 1.0, "learning_rate": 0.00019541392564000488, "loss": 0.7554, "step": 4 }, { "epoch": 1.25, "learning_rate": 0.00019189578116202307, "loss": 0.6185, "step": 5 }, { "epoch": 1.5, "learning_rate": 0.00018743466161445823, "loss": 0.5743, "step": 6 }, { "epoch": 1.75, "learning_rate": 0.00018207634412072764, "loss": 0.5785, "step": 7 }, { "epoch": 2.0, "learning_rate": 0.0001758758122692791, "loss": 0.436, "step": 8 }, { "epoch": 2.25, "learning_rate": 0.00016889669190756868, "loss": 0.3596, "step": 9 }, { "epoch": 2.5, "learning_rate": 0.0001612105982547663, "loss": 0.3703, "step": 10 }, { "epoch": 2.75, "learning_rate": 0.00015289640103269625, "loss": 0.2485, "step": 11 }, { "epoch": 3.0, "learning_rate": 0.00014403941515576344, "loss": 0.2869, "step": 12 }, { "epoch": 3.25, "learning_rate": 0.00013473052528448201, "loss": 0.1574, "step": 13 }, { "epoch": 3.5, "learning_rate": 0.00012506525322587207, "loss": 0.2161, "step": 14 }, { "epoch": 3.75, "learning_rate": 0.00011514277775045768, "loss": 0.1179, "step": 15 }, { "epoch": 4.0, "learning_rate": 0.00010506491688387127, "loss": 0.1789, "step": 16 }, { "epoch": 4.25, "learning_rate": 9.493508311612874e-05, "loss": 0.0866, "step": 17 }, { "epoch": 4.5, "learning_rate": 8.485722224954237e-05, "loss": 0.0816, "step": 18 }, { "epoch": 4.75, "learning_rate": 7.493474677412794e-05, "loss": 0.0849, "step": 19 }, { "epoch": 5.0, "learning_rate": 6.526947471551798e-05, "loss": 0.0903, "step": 20 }, { "epoch": 5.25, "learning_rate": 5.596058484423656e-05, "loss": 0.0444, "step": 21 }, { "epoch": 5.5, "learning_rate": 4.710359896730379e-05, "loss": 0.0389, "step": 22 }, { "epoch": 5.75, "learning_rate": 3.878940174523371e-05, "loss": 0.0437, "step": 23 }, { "epoch": 6.0, "learning_rate": 3.110330809243134e-05, "loss": 0.055, "step": 24 }, { "epoch": 6.25, "learning_rate": 2.4124187730720917e-05, "loss": 0.0347, "step": 25 }, { "epoch": 6.5, "learning_rate": 1.7923655879272393e-05, "loss": 0.0194, "step": 26 }, { "epoch": 6.75, "learning_rate": 1.2565338385541792e-05, "loss": 0.0299, "step": 27 }, { "epoch": 7.0, "learning_rate": 8.10421883797694e-06, "loss": 0.0329, "step": 28 }, { "epoch": 7.25, "learning_rate": 4.586074359995119e-06, "loss": 0.0189, "step": 29 }, { "epoch": 7.5, "learning_rate": 2.0470058747505516e-06, "loss": 0.0244, "step": 30 }, { "epoch": 7.75, "learning_rate": 5.130676608104845e-07, "loss": 0.0259, "step": 31 }, { "epoch": 8.0, "learning_rate": 0.0, "loss": 0.0235, "step": 32 }, { "epoch": 8.0, "step": 32, "total_flos": 1564227624960.0, "train_loss": 0.27154927648371086, "train_runtime": 170.0181, "train_samples_per_second": 2.823, "train_steps_per_second": 0.188 } ], "logging_steps": 1.0, "max_steps": 32, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 50000, "total_flos": 1564227624960.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }