{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991993594875901, "eval_steps": 4, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.051240992794235385, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.2591, "step": 4 }, { "epoch": 0.10248198558847077, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.2358, "step": 8 }, { "epoch": 0.15372297838270615, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.2336, "step": 12 }, { "epoch": 0.20496397117694154, "grad_norm": 0.3318765163421631, "learning_rate": 0.0011547005383792518, "loss": 1.2052, "step": 16 }, { "epoch": 0.2562049639711769, "grad_norm": 0.871834933757782, "learning_rate": 0.0008164965809277262, "loss": 1.1247, "step": 20 }, { "epoch": 0.3074459567654123, "grad_norm": 0.44025564193725586, "learning_rate": 0.0006324555320336759, "loss": 0.9757, "step": 24 }, { "epoch": 0.3586869495596477, "grad_norm": 0.3040187656879425, "learning_rate": 0.0005345224838248488, "loss": 0.9053, "step": 28 }, { "epoch": 0.4099279423538831, "grad_norm": 0.2234366089105606, "learning_rate": 0.00047140452079103175, "loss": 0.8848, "step": 32 }, { "epoch": 0.4611689351481185, "grad_norm": 0.23592019081115723, "learning_rate": 0.00042640143271122083, "loss": 0.8373, "step": 36 }, { "epoch": 0.5124099279423538, "grad_norm": 0.2808322310447693, "learning_rate": 0.0003922322702763681, "loss": 0.8558, "step": 40 }, { "epoch": 0.5636509207365893, "grad_norm": 0.22603166103363037, "learning_rate": 0.00036514837167011074, "loss": 0.8877, "step": 44 }, { "epoch": 0.6148919135308246, "grad_norm": 0.2617782950401306, "learning_rate": 0.00034299717028501764, "loss": 0.8343, "step": 48 }, { "epoch": 0.6661329063250601, "grad_norm": 0.24344013631343842, "learning_rate": 0.0003244428422615251, "loss": 0.8267, "step": 52 }, { "epoch": 0.7173738991192954, "grad_norm": 0.31763675808906555, "learning_rate": 0.00030860669992418383, "loss": 0.8111, "step": 56 }, { "epoch": 0.7686148919135308, "grad_norm": 0.33552709221839905, "learning_rate": 0.0002948839123097943, "loss": 0.826, "step": 60 }, { "epoch": 0.8198558847077662, "grad_norm": 0.21605852246284485, "learning_rate": 0.000282842712474619, "loss": 0.8314, "step": 64 }, { "epoch": 0.8710968775020016, "grad_norm": 0.2202538549900055, "learning_rate": 0.0002721655269759087, "loss": 0.829, "step": 68 }, { "epoch": 0.922337870296237, "grad_norm": 0.21023687720298767, "learning_rate": 0.0002626128657194451, "loss": 0.813, "step": 72 }, { "epoch": 0.9735788630904724, "grad_norm": 0.22085361182689667, "learning_rate": 0.000254000254000381, "loss": 0.8169, "step": 76 }, { "epoch": 0.9991993594875901, "step": 78, "total_flos": 7.922041751265608e+17, "train_loss": 0.944701231442965, "train_runtime": 774.0926, "train_samples_per_second": 12.905, "train_steps_per_second": 0.101 } ], "logging_steps": 4, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.922041751265608e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }