|
{ |
|
"best_metric": 0.28261637687683105, |
|
"best_model_checkpoint": "/home/fbravo/data/all_results/pawsx/roberta_base_bne/epochs_3_bs_64_lr_3e-5/checkpoint-900", |
|
"epoch": 3.0, |
|
"global_step": 2316, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.8794999718666077, |
|
"eval_loss": 0.30226433277130127, |
|
"eval_runtime": 0.8479, |
|
"eval_samples_per_second": 2358.837, |
|
"eval_steps_per_second": 37.741, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.3523316062176165e-05, |
|
"loss": 0.318, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.8740000128746033, |
|
"eval_loss": 0.33601614832878113, |
|
"eval_runtime": 0.8536, |
|
"eval_samples_per_second": 2343.133, |
|
"eval_steps_per_second": 37.49, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.8949999809265137, |
|
"eval_loss": 0.28261637687683105, |
|
"eval_runtime": 0.8516, |
|
"eval_samples_per_second": 2348.387, |
|
"eval_steps_per_second": 37.574, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.705958549222798e-05, |
|
"loss": 0.1498, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8949999809265137, |
|
"eval_loss": 0.3580223321914673, |
|
"eval_runtime": 0.8515, |
|
"eval_samples_per_second": 2348.84, |
|
"eval_steps_per_second": 37.581, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.0582901554404146e-05, |
|
"loss": 0.101, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.8899999856948853, |
|
"eval_loss": 0.33210474252700806, |
|
"eval_runtime": 0.8482, |
|
"eval_samples_per_second": 2357.924, |
|
"eval_steps_per_second": 37.727, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.8939999938011169, |
|
"eval_loss": 0.4253285527229309, |
|
"eval_runtime": 0.8518, |
|
"eval_samples_per_second": 2347.886, |
|
"eval_steps_per_second": 37.566, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.106217616580311e-06, |
|
"loss": 0.0505, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.8985000252723694, |
|
"eval_loss": 0.39427509903907776, |
|
"eval_runtime": 0.8509, |
|
"eval_samples_per_second": 2350.514, |
|
"eval_steps_per_second": 37.608, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2316, |
|
"total_flos": 8004352140640256.0, |
|
"train_loss": 0.14005086088427607, |
|
"train_runtime": 226.3922, |
|
"train_samples_per_second": 654.629, |
|
"train_steps_per_second": 10.23 |
|
} |
|
], |
|
"max_steps": 2316, |
|
"num_train_epochs": 3, |
|
"total_flos": 8004352140640256.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|