|
{ |
|
"best_metric": 0.2894308269023895, |
|
"best_model_checkpoint": "xlm-roberta-base-ner/checkpoint-1257", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 6285, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.39777247414478917, |
|
"grad_norm": 10.322513580322266, |
|
"learning_rate": 1.9204455051710422e-05, |
|
"loss": 0.5382, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7955449482895783, |
|
"grad_norm": 6.790235996246338, |
|
"learning_rate": 1.8408910103420846e-05, |
|
"loss": 0.395, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8085669291338583, |
|
"eval_loss": 0.2894308269023895, |
|
"eval_precision": 0.8323174685514201, |
|
"eval_recall": 0.7861342479176874, |
|
"eval_runtime": 14.4214, |
|
"eval_samples_per_second": 150.817, |
|
"eval_steps_per_second": 18.861, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 1.1933174224343674, |
|
"grad_norm": 2.2459216117858887, |
|
"learning_rate": 1.7613365155131266e-05, |
|
"loss": 0.3372, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5910898965791567, |
|
"grad_norm": 3.4064340591430664, |
|
"learning_rate": 1.681782020684169e-05, |
|
"loss": 0.3252, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.988862370723946, |
|
"grad_norm": 2.284067392349243, |
|
"learning_rate": 1.602227525855211e-05, |
|
"loss": 0.3229, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8084385006592578, |
|
"eval_loss": 0.30411699414253235, |
|
"eval_precision": 0.8293185624114389, |
|
"eval_recall": 0.7885840274375306, |
|
"eval_runtime": 9.1696, |
|
"eval_samples_per_second": 237.197, |
|
"eval_steps_per_second": 29.663, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 2.386634844868735, |
|
"grad_norm": 3.480130672454834, |
|
"learning_rate": 1.5226730310262532e-05, |
|
"loss": 0.2692, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.7844073190135243, |
|
"grad_norm": 9.41341781616211, |
|
"learning_rate": 1.4431185361972953e-05, |
|
"loss": 0.2692, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.7990613807583056, |
|
"eval_loss": 0.3256344497203827, |
|
"eval_precision": 0.8057285180572852, |
|
"eval_recall": 0.7925036746692797, |
|
"eval_runtime": 9.1536, |
|
"eval_samples_per_second": 237.61, |
|
"eval_steps_per_second": 29.715, |
|
"step": 3771 |
|
}, |
|
{ |
|
"epoch": 3.1821797931583133, |
|
"grad_norm": 10.631413459777832, |
|
"learning_rate": 1.3635640413683375e-05, |
|
"loss": 0.2578, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.579952267303103, |
|
"grad_norm": 1.1227880716323853, |
|
"learning_rate": 1.2840095465393797e-05, |
|
"loss": 0.2237, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.977724741447892, |
|
"grad_norm": 3.822526693344116, |
|
"learning_rate": 1.2044550517104217e-05, |
|
"loss": 0.2554, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8008445631248836, |
|
"eval_loss": 0.34640657901763916, |
|
"eval_precision": 0.8121929714069782, |
|
"eval_recall": 0.7898089171974523, |
|
"eval_runtime": 9.0831, |
|
"eval_samples_per_second": 239.454, |
|
"eval_steps_per_second": 29.946, |
|
"step": 5028 |
|
}, |
|
{ |
|
"epoch": 4.375497215592681, |
|
"grad_norm": 15.420336723327637, |
|
"learning_rate": 1.1249005568814639e-05, |
|
"loss": 0.2123, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.77326968973747, |
|
"grad_norm": 2.319063663482666, |
|
"learning_rate": 1.045346062052506e-05, |
|
"loss": 0.2157, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.8262004492199356, |
|
"eval_loss": 0.312337189912796, |
|
"eval_precision": 0.8189914550487424, |
|
"eval_recall": 0.8335374816266536, |
|
"eval_runtime": 9.0172, |
|
"eval_samples_per_second": 241.205, |
|
"eval_steps_per_second": 30.164, |
|
"step": 6285 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12570, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1621809589393752.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|