{ "best_metric": 0.28261637687683105, "best_model_checkpoint": "/home/fbravo/data/all_results/pawsx/roberta_base_bne/epochs_3_bs_64_lr_3e-5/checkpoint-900", "epoch": 3.0, "global_step": 2316, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.39, "eval_accuracy": 0.8794999718666077, "eval_loss": 0.30226433277130127, "eval_runtime": 0.8479, "eval_samples_per_second": 2358.837, "eval_steps_per_second": 37.741, "step": 300 }, { "epoch": 0.65, "learning_rate": 2.3523316062176165e-05, "loss": 0.318, "step": 500 }, { "epoch": 0.78, "eval_accuracy": 0.8740000128746033, "eval_loss": 0.33601614832878113, "eval_runtime": 0.8536, "eval_samples_per_second": 2343.133, "eval_steps_per_second": 37.49, "step": 600 }, { "epoch": 1.17, "eval_accuracy": 0.8949999809265137, "eval_loss": 0.28261637687683105, "eval_runtime": 0.8516, "eval_samples_per_second": 2348.387, "eval_steps_per_second": 37.574, "step": 900 }, { "epoch": 1.3, "learning_rate": 1.705958549222798e-05, "loss": 0.1498, "step": 1000 }, { "epoch": 1.55, "eval_accuracy": 0.8949999809265137, "eval_loss": 0.3580223321914673, "eval_runtime": 0.8515, "eval_samples_per_second": 2348.84, "eval_steps_per_second": 37.581, "step": 1200 }, { "epoch": 1.94, "learning_rate": 1.0582901554404146e-05, "loss": 0.101, "step": 1500 }, { "epoch": 1.94, "eval_accuracy": 0.8899999856948853, "eval_loss": 0.33210474252700806, "eval_runtime": 0.8482, "eval_samples_per_second": 2357.924, "eval_steps_per_second": 37.727, "step": 1500 }, { "epoch": 2.33, "eval_accuracy": 0.8939999938011169, "eval_loss": 0.4253285527229309, "eval_runtime": 0.8518, "eval_samples_per_second": 2347.886, "eval_steps_per_second": 37.566, "step": 1800 }, { "epoch": 2.59, "learning_rate": 4.106217616580311e-06, "loss": 0.0505, "step": 2000 }, { "epoch": 2.72, "eval_accuracy": 0.8985000252723694, "eval_loss": 0.39427509903907776, "eval_runtime": 0.8509, "eval_samples_per_second": 2350.514, "eval_steps_per_second": 37.608, "step": 2100 }, { "epoch": 3.0, "step": 2316, "total_flos": 8004352140640256.0, "train_loss": 0.14005086088427607, "train_runtime": 226.3922, "train_samples_per_second": 654.629, "train_steps_per_second": 10.23 } ], "max_steps": 2316, "num_train_epochs": 3, "total_flos": 8004352140640256.0, "trial_name": null, "trial_params": null }