|
{ |
|
"best_metric": 0.6007231820008035, |
|
"best_model_checkpoint": "model/checkpoint-7504", |
|
"epoch": 4.0, |
|
"eval_steps": 50, |
|
"global_step": 7504, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.232726588845253, |
|
"learning_rate": 9.084300622945162e-06, |
|
"loss": 0.0817, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8464818763326226, |
|
"eval_f1_macro": 0.4624149659863946, |
|
"eval_f1_weighted": 0.8011493190026544, |
|
"eval_loss": 0.06743289530277252, |
|
"eval_precision_macro": 0.7010764171889492, |
|
"eval_precision_weighted": 0.8229165574281427, |
|
"eval_recall_macro": 0.44028979481565683, |
|
"eval_recall_weighted": 0.8464818763326226, |
|
"eval_runtime": 5.0821, |
|
"eval_samples_per_second": 184.568, |
|
"eval_steps_per_second": 46.24, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.18798664212226868, |
|
"learning_rate": 6.593041210714562e-06, |
|
"loss": 0.0588, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8251599147121536, |
|
"eval_f1_macro": 0.5992706992706992, |
|
"eval_f1_weighted": 0.829400158611246, |
|
"eval_loss": 0.06970743834972382, |
|
"eval_precision_macro": 0.6140893452063056, |
|
"eval_precision_weighted": 0.8365234937694226, |
|
"eval_recall_macro": 0.5961441974373008, |
|
"eval_recall_weighted": 0.8251599147121536, |
|
"eval_runtime": 5.0788, |
|
"eval_samples_per_second": 184.689, |
|
"eval_steps_per_second": 46.271, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.03722580894827843, |
|
"learning_rate": 3.486973746877771e-06, |
|
"loss": 0.0395, |
|
"step": 5628 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.849680170575693, |
|
"eval_f1_macro": 0.5773129404708353, |
|
"eval_f1_weighted": 0.8345452465837384, |
|
"eval_loss": 0.08055932819843292, |
|
"eval_precision_macro": 0.6424363946177801, |
|
"eval_precision_weighted": 0.8274003556291218, |
|
"eval_recall_macro": 0.5397910877436739, |
|
"eval_recall_weighted": 0.849680170575693, |
|
"eval_runtime": 5.0482, |
|
"eval_samples_per_second": 185.807, |
|
"eval_steps_per_second": 46.551, |
|
"step": 5628 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.22263826429843903, |
|
"learning_rate": 9.648338779170968e-07, |
|
"loss": 0.0239, |
|
"step": 7504 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8336886993603412, |
|
"eval_f1_macro": 0.6007231820008035, |
|
"eval_f1_weighted": 0.8327353646735044, |
|
"eval_loss": 0.0956118032336235, |
|
"eval_precision_macro": 0.6171054012879108, |
|
"eval_precision_weighted": 0.8329684712250969, |
|
"eval_recall_macro": 0.5898380302115934, |
|
"eval_recall_weighted": 0.8336886993603412, |
|
"eval_runtime": 5.0679, |
|
"eval_samples_per_second": 185.087, |
|
"eval_steps_per_second": 46.371, |
|
"step": 7504 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 9380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1987576294404096.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|