{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991993594875901, "eval_steps": 4, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.051240992794235385, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.3607, "step": 4 }, { "epoch": 0.10248198558847077, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.3651, "step": 8 }, { "epoch": 0.15372297838270615, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.3502, "step": 12 }, { "epoch": 0.20496397117694154, "grad_norm": 0.31328025460243225, "learning_rate": 0.001414213562373095, "loss": 2.2356, "step": 16 }, { "epoch": 0.2562049639711769, "grad_norm": 3.1803669929504395, "learning_rate": 0.001, "loss": 2.6229, "step": 20 }, { "epoch": 0.3074459567654123, "grad_norm": 1.0720810890197754, "learning_rate": 0.0007071067811865475, "loss": 2.6229, "step": 24 }, { "epoch": 0.3586869495596477, "grad_norm": 0.9110113978385925, "learning_rate": 0.0005773502691896259, "loss": 1.9729, "step": 28 }, { "epoch": 0.4099279423538831, "grad_norm": 0.6526729464530945, "learning_rate": 0.0005, "loss": 1.8264, "step": 32 }, { "epoch": 0.4611689351481185, "grad_norm": 0.5115967392921448, "learning_rate": 0.00044721359549995795, "loss": 1.7511, "step": 36 }, { "epoch": 0.5124099279423538, "grad_norm": 0.40264543890953064, "learning_rate": 0.0004082482904638631, "loss": 1.6792, "step": 40 }, { "epoch": 0.5636509207365893, "grad_norm": 0.41392064094543457, "learning_rate": 0.0003779644730092272, "loss": 1.681, "step": 44 }, { "epoch": 0.6148919135308246, "grad_norm": 0.42952096462249756, "learning_rate": 0.00035355339059327376, "loss": 1.614, "step": 48 }, { "epoch": 0.6661329063250601, "grad_norm": 0.4605095386505127, "learning_rate": 0.0003333333333333333, "loss": 1.5879, "step": 52 }, { "epoch": 0.7173738991192954, "grad_norm": 0.4642864763736725, "learning_rate": 0.00031622776601683794, "loss": 1.5134, "step": 56 }, { "epoch": 0.7686148919135308, "grad_norm": 0.528487503528595, "learning_rate": 0.00030151134457776364, "loss": 1.5332, "step": 60 }, { "epoch": 0.8198558847077662, "grad_norm": 0.4958159327507019, "learning_rate": 0.00028867513459481295, "loss": 1.4743, "step": 64 }, { "epoch": 0.8710968775020016, "grad_norm": 0.5096175074577332, "learning_rate": 0.0002773500981126146, "loss": 1.4847, "step": 68 }, { "epoch": 0.922337870296237, "grad_norm": 0.5336413979530334, "learning_rate": 0.0002672612419124244, "loss": 1.4188, "step": 72 }, { "epoch": 0.9735788630904724, "grad_norm": 0.563066840171814, "learning_rate": 0.0002581988897471611, "loss": 1.4082, "step": 76 } ], "logging_steps": 4, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.922041751265608e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }