{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991993594875901, "eval_steps": 4, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.051240992794235385, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.8604, "step": 4 }, { "epoch": 0.10248198558847077, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.9174, "step": 8 }, { "epoch": 0.15372297838270615, "grad_norm": 0.22256438434123993, "learning_rate": 0.002, "loss": 0.9102, "step": 12 }, { "epoch": 0.20496397117694154, "grad_norm": 0.14360007643699646, "learning_rate": 0.0011547005383792518, "loss": 0.8065, "step": 16 }, { "epoch": 0.2562049639711769, "grad_norm": 0.269815057516098, "learning_rate": 0.0007559289460184544, "loss": 0.7581, "step": 20 }, { "epoch": 0.3074459567654123, "grad_norm": 0.1919809728860855, "learning_rate": 0.0006030226891555273, "loss": 0.6564, "step": 24 }, { "epoch": 0.3586869495596477, "grad_norm": 0.20717447996139526, "learning_rate": 0.0005345224838248488, "loss": 0.6483, "step": 28 }, { "epoch": 0.4099279423538831, "grad_norm": 0.16305077075958252, "learning_rate": 0.00047140452079103175, "loss": 0.6444, "step": 32 }, { "epoch": 0.4611689351481185, "grad_norm": 0.1585964411497116, "learning_rate": 0.00042640143271122083, "loss": 0.616, "step": 36 }, { "epoch": 0.5124099279423538, "grad_norm": 0.16819557547569275, "learning_rate": 0.0003922322702763681, "loss": 0.6418, "step": 40 }, { "epoch": 0.5636509207365893, "grad_norm": 0.15789124369621277, "learning_rate": 0.00036514837167011074, "loss": 0.6313, "step": 44 }, { "epoch": 0.6148919135308246, "grad_norm": 0.14483995735645294, "learning_rate": 0.00034299717028501764, "loss": 0.6054, "step": 48 }, { "epoch": 0.6661329063250601, "grad_norm": 0.17608477175235748, "learning_rate": 0.0003287979746107146, "loss": 0.6128, "step": 52 }, { "epoch": 0.7173738991192954, "grad_norm": 0.18501809239387512, "learning_rate": 0.0003123475237772121, "loss": 0.6161, "step": 56 }, { "epoch": 0.7686148919135308, "grad_norm": 0.1523313969373703, "learning_rate": 0.00029814239699997195, "loss": 0.6128, "step": 60 }, { "epoch": 0.8198558847077662, "grad_norm": 0.1530931144952774, "learning_rate": 0.0002857142857142857, "loss": 0.5906, "step": 64 }, { "epoch": 0.8710968775020016, "grad_norm": 0.16880746185779572, "learning_rate": 0.0002747211278973781, "loss": 0.6106, "step": 68 }, { "epoch": 0.922337870296237, "grad_norm": 0.15904685854911804, "learning_rate": 0.00026490647141300875, "loss": 0.6297, "step": 72 }, { "epoch": 0.9735788630904724, "grad_norm": 0.14567361772060394, "learning_rate": 0.000256073759865792, "loss": 0.6091, "step": 76 }, { "epoch": 0.9991993594875901, "step": 78, "total_flos": 7.922041751265608e+17, "train_loss": 0.680653405495179, "train_runtime": 774.7537, "train_samples_per_second": 12.892, "train_steps_per_second": 0.101 } ], "logging_steps": 4, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.922041751265608e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }