{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9926617745163442, "eval_steps": 5, "global_step": 93, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0533689126084056, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.66, "step": 5 }, { "epoch": 0.1067378252168112, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.6231, "step": 10 }, { "epoch": 0.1601067378252168, "grad_norm": 0.2781398892402649, "learning_rate": 0.001414213562373095, "loss": 1.6169, "step": 15 }, { "epoch": 0.2134756504336224, "grad_norm": 2.027355194091797, "learning_rate": 0.001, "loss": 1.5934, "step": 20 }, { "epoch": 0.266844563042028, "grad_norm": 0.6781024932861328, "learning_rate": 0.0006666666666666666, "loss": 1.4104, "step": 25 }, { "epoch": 0.3202134756504336, "grad_norm": 0.8801289796829224, "learning_rate": 0.0005345224838248488, "loss": 1.3078, "step": 30 }, { "epoch": 0.3735823882588392, "grad_norm": 0.37911781668663025, "learning_rate": 0.0004588314677411235, "loss": 1.2235, "step": 35 }, { "epoch": 0.4269513008672448, "grad_norm": 0.3639412224292755, "learning_rate": 0.0004082482904638631, "loss": 1.2058, "step": 40 }, { "epoch": 0.48032021347565046, "grad_norm": 0.3324061930179596, "learning_rate": 0.0003713906763541037, "loss": 1.2022, "step": 45 }, { "epoch": 0.533689126084056, "grad_norm": 0.3669029772281647, "learning_rate": 0.00034299717028501764, "loss": 1.1901, "step": 50 }, { "epoch": 0.5870580386924616, "grad_norm": 0.3316662013530731, "learning_rate": 0.00032025630761017425, "loss": 1.1541, "step": 55 }, { "epoch": 0.6404269513008672, "grad_norm": 0.36560526490211487, "learning_rate": 0.00030151134457776364, "loss": 1.1021, "step": 60 }, { "epoch": 0.6937958639092728, "grad_norm": 0.36231526732444763, "learning_rate": 0.0002857142857142857, "loss": 1.1178, "step": 65 }, { "epoch": 0.7471647765176784, "grad_norm": 0.3893248438835144, "learning_rate": 0.0002721655269759087, "loss": 1.1032, "step": 70 }, { "epoch": 0.800533689126084, "grad_norm": 0.4164714813232422, "learning_rate": 0.0002603778219616478, "loss": 1.0781, "step": 75 }, { "epoch": 0.8539026017344896, "grad_norm": 0.3659443259239197, "learning_rate": 0.00025, "loss": 1.0644, "step": 80 }, { "epoch": 0.9072715143428952, "grad_norm": 0.3580448031425476, "learning_rate": 0.0002407717061715384, "loss": 1.0597, "step": 85 }, { "epoch": 0.9606404269513009, "grad_norm": 0.4161628186702728, "learning_rate": 0.00023249527748763857, "loss": 1.0817, "step": 90 }, { "epoch": 0.9926617745163442, "step": 93, "total_flos": 9.445511318816686e+17, "train_loss": 1.2599219224786247, "train_runtime": 872.6845, "train_samples_per_second": 13.737, "train_steps_per_second": 0.107 } ], "logging_steps": 5, "max_steps": 93, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.445511318816686e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }