{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.049561381771323784, "eval_steps": 200000, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016520460590441263, "grad_norm": 51.92022705078125, "learning_rate": 3.2044928972580115e-07, "loss": 13.3171, "step": 100 }, { "epoch": 0.0033040921180882525, "grad_norm": 68.25243377685547, "learning_rate": 6.508093822266271e-07, "loss": 12.9799, "step": 200 }, { "epoch": 0.004956138177132379, "grad_norm": 69.47785186767578, "learning_rate": 9.811694747274531e-07, "loss": 12.5133, "step": 300 }, { "epoch": 0.006608184236176505, "grad_norm": 73.07315063476562, "learning_rate": 1.311529567228279e-06, "loss": 11.9388, "step": 400 }, { "epoch": 0.008260230295220631, "grad_norm": 82.68733215332031, "learning_rate": 1.6418896597291048e-06, "loss": 11.0616, "step": 500 }, { "epoch": 0.009912276354264758, "grad_norm": 57.61735534667969, "learning_rate": 1.972249752229931e-06, "loss": 10.2712, "step": 600 }, { "epoch": 0.011564322413308884, "grad_norm": 44.42943572998047, "learning_rate": 2.302609844730757e-06, "loss": 9.5253, "step": 700 }, { "epoch": 0.01321636847235301, "grad_norm": 27.03646469116211, "learning_rate": 2.6329699372315828e-06, "loss": 8.7706, "step": 800 }, { "epoch": 0.014868414531397135, "grad_norm": 15.231706619262695, "learning_rate": 2.9633300297324087e-06, "loss": 8.4333, "step": 900 }, { "epoch": 0.016520460590441263, "grad_norm": 14.189949035644531, "learning_rate": 3.2936901222332346e-06, "loss": 8.0902, "step": 1000 }, { "epoch": 0.018172506649485387, "grad_norm": 12.241333961486816, "learning_rate": 3.6240502147340605e-06, "loss": 7.8862, "step": 1100 }, { "epoch": 0.019824552708529515, "grad_norm": 11.400131225585938, "learning_rate": 3.9544103072348865e-06, "loss": 7.7362, "step": 1200 }, { "epoch": 0.02147659876757364, "grad_norm": 12.072014808654785, "learning_rate": 4.284770399735712e-06, "loss": 7.6007, "step": 1300 }, { "epoch": 0.023128644826617768, "grad_norm": 11.08774185180664, "learning_rate": 4.615130492236538e-06, "loss": 7.5304, "step": 1400 }, { "epoch": 0.024780690885661892, "grad_norm": 13.02505874633789, "learning_rate": 4.945490584737364e-06, "loss": 7.4249, "step": 1500 }, { "epoch": 0.02643273694470602, "grad_norm": 13.522186279296875, "learning_rate": 5.27585067723819e-06, "loss": 7.3035, "step": 1600 }, { "epoch": 0.028084783003750145, "grad_norm": 45.22550964355469, "learning_rate": 5.606210769739015e-06, "loss": 7.2026, "step": 1700 }, { "epoch": 0.02973682906279427, "grad_norm": 15.62098503112793, "learning_rate": 5.936570862239842e-06, "loss": 7.1572, "step": 1800 }, { "epoch": 0.0313888751218384, "grad_norm": 16.570518493652344, "learning_rate": 6.266930954740668e-06, "loss": 7.0523, "step": 1900 }, { "epoch": 0.033040921180882525, "grad_norm": 16.82353401184082, "learning_rate": 6.597291047241494e-06, "loss": 7.1158, "step": 2000 }, { "epoch": 0.034692967239926646, "grad_norm": 17.38075828552246, "learning_rate": 6.924347538817311e-06, "loss": 6.9856, "step": 2100 }, { "epoch": 0.036345013298970774, "grad_norm": 93.04572296142578, "learning_rate": 7.2547076313181375e-06, "loss": 7.0865, "step": 2200 }, { "epoch": 0.0379970593580149, "grad_norm": 17.861074447631836, "learning_rate": 7.585067723818963e-06, "loss": 6.9496, "step": 2300 }, { "epoch": 0.03964910541705903, "grad_norm": 19.067747116088867, "learning_rate": 7.91542781631979e-06, "loss": 6.9294, "step": 2400 }, { "epoch": 0.04130115147610315, "grad_norm": 16.43912696838379, "learning_rate": 8.245787908820615e-06, "loss": 6.8825, "step": 2500 }, { "epoch": 0.04295319753514728, "grad_norm": 140.5387725830078, "learning_rate": 8.576148001321441e-06, "loss": 6.8218, "step": 2600 }, { "epoch": 0.04460524359419141, "grad_norm": 22.34341049194336, "learning_rate": 8.903204492897258e-06, "loss": 6.8416, "step": 2700 }, { "epoch": 0.046257289653235535, "grad_norm": 16.260499954223633, "learning_rate": 9.233564585398084e-06, "loss": 6.7184, "step": 2800 }, { "epoch": 0.047909335712279656, "grad_norm": 20.075071334838867, "learning_rate": 9.56392467789891e-06, "loss": 6.9183, "step": 2900 }, { "epoch": 0.049561381771323784, "grad_norm": 45.1911735534668, "learning_rate": 9.894284770399738e-06, "loss": 6.7166, "step": 3000 } ], "logging_steps": 100, "max_steps": 60531, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }