{ "best_metric": 0.9606256742179072, "best_model_checkpoint": "./vit-xray-tumor/checkpoint-125", "epoch": 20.0, "eval_steps": 125, "global_step": 680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7352941176470589, "grad_norm": 0.32342442870140076, "learning_rate": 2.5e-06, "loss": 0.7033, "step": 25 }, { "epoch": 1.4705882352941178, "grad_norm": 0.5360887050628662, "learning_rate": 5e-06, "loss": 0.6638, "step": 50 }, { "epoch": 2.2058823529411766, "grad_norm": 0.32269367575645447, "learning_rate": 7.500000000000001e-06, "loss": 0.5639, "step": 75 }, { "epoch": 2.9411764705882355, "grad_norm": 0.3643856644630432, "learning_rate": 1e-05, "loss": 0.5276, "step": 100 }, { "epoch": 3.6764705882352944, "grad_norm": 0.3383520543575287, "learning_rate": 9.954227982894034e-06, "loss": 0.5283, "step": 125 }, { "epoch": 3.6764705882352944, "eval_accuracy": 0.9606256742179072, "eval_loss": 0.29475122690200806, "eval_runtime": 3.6867, "eval_samples_per_second": 502.885, "eval_steps_per_second": 2.17, "step": 125 }, { "epoch": 4.411764705882353, "grad_norm": 0.3172575831413269, "learning_rate": 9.817749962596115e-06, "loss": 0.5181, "step": 150 }, { "epoch": 5.147058823529412, "grad_norm": 0.2688044309616089, "learning_rate": 9.59306468881811e-06, "loss": 0.5133, "step": 175 }, { "epoch": 5.882352941176471, "grad_norm": 0.47131991386413574, "learning_rate": 9.284285880837947e-06, "loss": 0.5142, "step": 200 }, { "epoch": 6.617647058823529, "grad_norm": 0.437072217464447, "learning_rate": 8.897066910207958e-06, "loss": 0.5043, "step": 225 }, { "epoch": 7.352941176470588, "grad_norm": 0.9283384084701538, "learning_rate": 8.438497294267117e-06, "loss": 0.516, "step": 250 }, { "epoch": 7.352941176470588, "eval_accuracy": 0.9600862998921251, "eval_loss": 0.28430742025375366, "eval_runtime": 3.3555, "eval_samples_per_second": 552.534, "eval_steps_per_second": 2.384, "step": 250 }, { "epoch": 8.088235294117647, "grad_norm": 0.4921472370624542, "learning_rate": 7.916972895537471e-06, "loss": 0.5053, "step": 275 }, { "epoch": 8.823529411764707, "grad_norm": 0.6513373851776123, "learning_rate": 7.342042203498952e-06, "loss": 0.5023, "step": 300 }, { "epoch": 9.558823529411764, "grad_norm": 0.5955798625946045, "learning_rate": 6.724231513139853e-06, "loss": 0.5046, "step": 325 }, { "epoch": 10.294117647058824, "grad_norm": 0.5370674729347229, "learning_rate": 6.074852201055121e-06, "loss": 0.4918, "step": 350 }, { "epoch": 11.029411764705882, "grad_norm": 0.6328652501106262, "learning_rate": 5.405793627637157e-06, "loss": 0.4878, "step": 375 }, { "epoch": 11.029411764705882, "eval_accuracy": 0.9600862998921251, "eval_loss": 0.275637149810791, "eval_runtime": 3.4137, "eval_samples_per_second": 543.107, "eval_steps_per_second": 2.344, "step": 375 }, { "epoch": 11.764705882352942, "grad_norm": 1.029469609260559, "learning_rate": 4.729305457072913e-06, "loss": 0.4806, "step": 400 }, { "epoch": 12.5, "grad_norm": 0.7929940223693848, "learning_rate": 4.057773380608411e-06, "loss": 0.4819, "step": 425 }, { "epoch": 13.235294117647058, "grad_norm": 0.8358489871025085, "learning_rate": 3.403492349320101e-06, "loss": 0.4693, "step": 450 }, { "epoch": 13.970588235294118, "grad_norm": 1.030651569366455, "learning_rate": 2.778441468230483e-06, "loss": 0.4632, "step": 475 }, { "epoch": 14.705882352941176, "grad_norm": 1.082851529121399, "learning_rate": 2.1940646731880887e-06, "loss": 0.459, "step": 500 }, { "epoch": 14.705882352941176, "eval_accuracy": 0.9600862998921251, "eval_loss": 0.2800777554512024, "eval_runtime": 3.4811, "eval_samples_per_second": 532.584, "eval_steps_per_second": 2.298, "step": 500 }, { "epoch": 15.441176470588236, "grad_norm": 0.9598743319511414, "learning_rate": 1.6610612060565235e-06, "loss": 0.4518, "step": 525 }, { "epoch": 16.176470588235293, "grad_norm": 0.8586243987083435, "learning_rate": 1.1891897243618184e-06, "loss": 0.4604, "step": 550 }, { "epoch": 16.91176470588235, "grad_norm": 1.1040153503417969, "learning_rate": 7.870896319167548e-07, "loss": 0.4542, "step": 575 }, { "epoch": 17.647058823529413, "grad_norm": 0.8632078766822815, "learning_rate": 4.6212290164521554e-07, "loss": 0.4473, "step": 600 }, { "epoch": 18.38235294117647, "grad_norm": 0.9933310151100159, "learning_rate": 2.2023928664194229e-07, "loss": 0.4462, "step": 625 }, { "epoch": 18.38235294117647, "eval_accuracy": 0.959546925566343, "eval_loss": 0.2760601043701172, "eval_runtime": 3.319, "eval_samples_per_second": 558.602, "eval_steps_per_second": 2.41, "step": 625 }, { "epoch": 19.11764705882353, "grad_norm": 0.9852302074432373, "learning_rate": 6.58673872923693e-08, "loss": 0.4523, "step": 650 }, { "epoch": 19.852941176470587, "grad_norm": 0.8414192795753479, "learning_rate": 1.8335688835802169e-09, "loss": 0.4441, "step": 675 }, { "epoch": 20.0, "step": 680, "total_flos": 1.3409213882909e+19, "train_loss": 0.5017087179071763, "train_runtime": 256.3708, "train_samples_per_second": 674.96, "train_steps_per_second": 2.652 } ], "logging_steps": 25, "max_steps": 680, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 125, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3409213882909e+19, "train_batch_size": 256, "trial_name": null, "trial_params": null }