{ "best_metric": 0.7413793103448276, "best_model_checkpoint": "/nesi/nobackup/uoa04081/wxy/model/bert_ft/sft/epoch35_6_1/warmup_ratio_lrtype/schedule-0.3-constant_with_warmup-epoch40-lr-2e-5/checkpoint-476", "epoch": 28.0, "eval_steps": 500, "global_step": 476, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.034482758620689655, "eval_loss": 1.9626049995422363, "eval_runtime": 0.3947, "eval_samples_per_second": 146.932, "eval_steps_per_second": 5.067, "step": 17 }, { "epoch": 2.0, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.8860366344451904, "eval_runtime": 0.3952, "eval_samples_per_second": 146.746, "eval_steps_per_second": 5.06, "step": 34 }, { "epoch": 3.0, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.559799313545227, "eval_runtime": 0.3943, "eval_samples_per_second": 147.102, "eval_steps_per_second": 5.072, "step": 51 }, { "epoch": 4.0, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.460962176322937, "eval_runtime": 0.3951, "eval_samples_per_second": 146.813, "eval_steps_per_second": 5.063, "step": 68 }, { "epoch": 5.0, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.4569206237792969, "eval_runtime": 0.3956, "eval_samples_per_second": 146.594, "eval_steps_per_second": 5.055, "step": 85 }, { "epoch": 6.0, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.4397205114364624, "eval_runtime": 0.3971, "eval_samples_per_second": 146.051, "eval_steps_per_second": 5.036, "step": 102 }, { "epoch": 7.0, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.3534083366394043, "eval_runtime": 0.3961, "eval_samples_per_second": 146.439, "eval_steps_per_second": 5.05, "step": 119 }, { "epoch": 8.0, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.2757465839385986, "eval_runtime": 0.3944, "eval_samples_per_second": 147.045, "eval_steps_per_second": 5.071, "step": 136 }, { "epoch": 9.0, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.1723886728286743, "eval_runtime": 0.3961, "eval_samples_per_second": 146.442, "eval_steps_per_second": 5.05, "step": 153 }, { "epoch": 10.0, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.0931514501571655, "eval_runtime": 0.394, "eval_samples_per_second": 147.197, "eval_steps_per_second": 5.076, "step": 170 }, { "epoch": 11.0, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.0942814350128174, "eval_runtime": 0.3944, "eval_samples_per_second": 147.077, "eval_steps_per_second": 5.072, "step": 187 }, { "epoch": 12.0, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.0222851037979126, "eval_runtime": 0.395, "eval_samples_per_second": 146.846, "eval_steps_per_second": 5.064, "step": 204 }, { "epoch": 13.0, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.0476372241973877, "eval_runtime": 0.3945, "eval_samples_per_second": 147.003, "eval_steps_per_second": 5.069, "step": 221 }, { "epoch": 14.0, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.054601788520813, "eval_runtime": 0.3939, "eval_samples_per_second": 147.227, "eval_steps_per_second": 5.077, "step": 238 }, { "epoch": 15.0, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.0961247682571411, "eval_runtime": 0.3955, "eval_samples_per_second": 146.649, "eval_steps_per_second": 5.057, "step": 255 }, { "epoch": 16.0, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.135161280632019, "eval_runtime": 0.3944, "eval_samples_per_second": 147.048, "eval_steps_per_second": 5.071, "step": 272 }, { "epoch": 17.0, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.132529377937317, "eval_runtime": 0.3947, "eval_samples_per_second": 146.937, "eval_steps_per_second": 5.067, "step": 289 }, { "epoch": 18.0, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.1694955825805664, "eval_runtime": 0.3942, "eval_samples_per_second": 147.136, "eval_steps_per_second": 5.074, "step": 306 }, { "epoch": 19.0, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.2848495244979858, "eval_runtime": 0.3945, "eval_samples_per_second": 147.012, "eval_steps_per_second": 5.069, "step": 323 }, { "epoch": 20.0, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.4167975187301636, "eval_runtime": 0.3944, "eval_samples_per_second": 147.046, "eval_steps_per_second": 5.071, "step": 340 }, { "epoch": 21.0, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.3460503816604614, "eval_runtime": 0.3949, "eval_samples_per_second": 146.89, "eval_steps_per_second": 5.065, "step": 357 }, { "epoch": 22.0, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.1948856115341187, "eval_runtime": 0.3941, "eval_samples_per_second": 147.152, "eval_steps_per_second": 5.074, "step": 374 }, { "epoch": 23.0, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.2731900215148926, "eval_runtime": 0.4055, "eval_samples_per_second": 143.034, "eval_steps_per_second": 4.932, "step": 391 }, { "epoch": 24.0, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.2199504375457764, "eval_runtime": 0.3945, "eval_samples_per_second": 147.003, "eval_steps_per_second": 5.069, "step": 408 }, { "epoch": 25.0, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.481724739074707, "eval_runtime": 0.3944, "eval_samples_per_second": 147.043, "eval_steps_per_second": 5.07, "step": 425 }, { "epoch": 26.0, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.297101616859436, "eval_runtime": 0.3951, "eval_samples_per_second": 146.796, "eval_steps_per_second": 5.062, "step": 442 }, { "epoch": 27.0, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.3901363611221313, "eval_runtime": 0.394, "eval_samples_per_second": 147.193, "eval_steps_per_second": 5.076, "step": 459 }, { "epoch": 28.0, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.278436541557312, "eval_runtime": 0.3946, "eval_samples_per_second": 146.998, "eval_steps_per_second": 5.069, "step": 476 } ], "logging_steps": 500, "max_steps": 680, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 3816334065868800.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }