|
{ |
|
"best_metric": 0.9220723199736294, |
|
"best_model_checkpoint": "./saved_models/llama_3b_prompt_sbdh_gpt4_v2_0/checkpoint-144", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 144, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.041247367858887, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.9133, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc_macro": 0.4694269521783949, |
|
"eval_acc_micro": 0.509025270758086, |
|
"eval_auc_macro": 0.9508939125710083, |
|
"eval_auc_micro": 0.9142539505353097, |
|
"eval_f1_at_5": 0.39182832882982277, |
|
"eval_f1_at_8": 0.2863186309798368, |
|
"eval_f1_macro": 0.6055711778382781, |
|
"eval_f1_micro": 0.6746411483252942, |
|
"eval_loss": 0.25163671374320984, |
|
"eval_prec_at_5": 0.2502283105022831, |
|
"eval_prec_at_8": 0.168236301369863, |
|
"eval_prec_macro": 0.8351317849349306, |
|
"eval_prec_micro": 0.7975113122171044, |
|
"eval_rec_at_5": 0.9025875190258752, |
|
"eval_rec_at_8": 0.9604261796042617, |
|
"eval_rec_macro": 0.5519678498265336, |
|
"eval_rec_micro": 0.5845771144278122, |
|
"eval_runtime": 36.2308, |
|
"eval_samples_per_second": 24.178, |
|
"eval_steps_per_second": 3.036, |
|
"eval_threshold": -0.625, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1691702604293823, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.1193, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc_macro": 0.8113749934046343, |
|
"eval_acc_micro": 0.8189149560116702, |
|
"eval_auc_macro": 0.9940036594427881, |
|
"eval_auc_micro": 0.9939886141325751, |
|
"eval_f1_at_5": 0.42918048633589306, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.8921678080506279, |
|
"eval_f1_micro": 0.900443369608956, |
|
"eval_loss": 0.06727338582277298, |
|
"eval_prec_at_5": 0.27488584474885847, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.8696485876912569, |
|
"eval_prec_micro": 0.8760784313724803, |
|
"eval_rec_at_5": 0.978310502283105, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9217289354703175, |
|
"eval_rec_micro": 0.9262023217246329, |
|
"eval_runtime": 121.244, |
|
"eval_samples_per_second": 7.225, |
|
"eval_steps_per_second": 0.907, |
|
"eval_threshold": -1.125, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.8648784160614014, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0587, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc_macro": 0.8392595891870926, |
|
"eval_acc_micro": 0.849157733537454, |
|
"eval_auc_macro": 0.9951075369988027, |
|
"eval_auc_micro": 0.9959963596303174, |
|
"eval_f1_at_5": 0.4298468624753246, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.9088894089226979, |
|
"eval_f1_micro": 0.9184265010351206, |
|
"eval_loss": 0.05153823271393776, |
|
"eval_prec_at_5": 0.2753424657534247, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.9249501329228883, |
|
"eval_prec_micro": 0.9172870140611317, |
|
"eval_rec_at_5": 0.9794520547945206, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9031612618645798, |
|
"eval_rec_micro": 0.9195688225538209, |
|
"eval_runtime": 118.514, |
|
"eval_samples_per_second": 7.392, |
|
"eval_steps_per_second": 0.928, |
|
"eval_threshold": 0.125, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.2277711182832718, |
|
"learning_rate": 9.411764705882353e-05, |
|
"loss": 0.0465, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_acc_macro": 0.8500885385546534, |
|
"eval_acc_micro": 0.8541033434649806, |
|
"eval_auc_macro": 0.9952461152918444, |
|
"eval_auc_micro": 0.9963732184410706, |
|
"eval_f1_at_5": 0.4295136788854873, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.9175170894131637, |
|
"eval_f1_micro": 0.9213114754097605, |
|
"eval_loss": 0.05052410811185837, |
|
"eval_prec_at_5": 0.2751141552511416, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.9087740459598467, |
|
"eval_prec_micro": 0.9108589951376895, |
|
"eval_rec_at_5": 0.9788812785388128, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9275029071213349, |
|
"eval_rec_micro": 0.9320066334990935, |
|
"eval_runtime": 112.7846, |
|
"eval_samples_per_second": 7.767, |
|
"eval_steps_per_second": 0.975, |
|
"eval_threshold": 0.125, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.3336959481239319, |
|
"learning_rate": 8.823529411764706e-05, |
|
"loss": 0.0368, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_acc_macro": 0.8560051578711357, |
|
"eval_acc_micro": 0.8632148377124526, |
|
"eval_auc_macro": 0.9960314271621913, |
|
"eval_auc_micro": 0.9969526516710807, |
|
"eval_f1_at_5": 0.4298468624753246, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.920584293633742, |
|
"eval_f1_micro": 0.9265864786394916, |
|
"eval_loss": 0.047832097858190536, |
|
"eval_prec_at_5": 0.2753424657534247, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.9209783471417811, |
|
"eval_prec_micro": 0.9269709543567695, |
|
"eval_rec_at_5": 0.9794520547945206, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9204814122555302, |
|
"eval_rec_micro": 0.9262023217246329, |
|
"eval_runtime": 116.9753, |
|
"eval_samples_per_second": 7.489, |
|
"eval_steps_per_second": 0.94, |
|
"eval_threshold": 0.125, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.15532971918582916, |
|
"learning_rate": 8.23529411764706e-05, |
|
"loss": 0.0307, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_acc_macro": 0.8581393259022515, |
|
"eval_acc_micro": 0.8658346333852679, |
|
"eval_auc_macro": 0.9960477018781981, |
|
"eval_auc_micro": 0.9967622842874246, |
|
"eval_f1_at_5": 0.4298468624753246, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.9220723199736294, |
|
"eval_f1_micro": 0.9280936454848722, |
|
"eval_loss": 0.05041336640715599, |
|
"eval_prec_at_5": 0.2753424657534247, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.9315146645321158, |
|
"eval_prec_micro": 0.9359190556491622, |
|
"eval_rec_at_5": 0.9794520547945206, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9137683032294842, |
|
"eval_rec_micro": 0.9203980099501724, |
|
"eval_runtime": 118.5157, |
|
"eval_samples_per_second": 7.391, |
|
"eval_steps_per_second": 0.928, |
|
"eval_threshold": 1.125, |
|
"step": 144 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.653734367119278e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|