|
{ |
|
"best_metric": 1.6760838031768799, |
|
"best_model_checkpoint": "bigbird-flight-2/checkpoint-1028", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1028, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09727626459143969, |
|
"grad_norm": 5.640580654144287, |
|
"learning_rate": 1.5197568389057753e-06, |
|
"loss": 2.2808, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19455252918287938, |
|
"grad_norm": 6.845468044281006, |
|
"learning_rate": 3.0395136778115506e-06, |
|
"loss": 2.3041, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2918287937743191, |
|
"grad_norm": 4.374593257904053, |
|
"learning_rate": 4.559270516717325e-06, |
|
"loss": 2.303, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.38910505836575876, |
|
"grad_norm": 3.985168218612671, |
|
"learning_rate": 6.079027355623101e-06, |
|
"loss": 2.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48638132295719844, |
|
"grad_norm": 6.664238929748535, |
|
"learning_rate": 7.5987841945288756e-06, |
|
"loss": 2.249, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5836575875486382, |
|
"grad_norm": 6.191707611083984, |
|
"learning_rate": 9.11854103343465e-06, |
|
"loss": 2.1323, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6809338521400778, |
|
"grad_norm": 6.4777140617370605, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 2.0151, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"grad_norm": 15.601507186889648, |
|
"learning_rate": 1.2158054711246202e-05, |
|
"loss": 1.9318, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8754863813229572, |
|
"grad_norm": 6.998500347137451, |
|
"learning_rate": 1.3677811550151975e-05, |
|
"loss": 1.9021, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9727626459143969, |
|
"grad_norm": 16.937795639038086, |
|
"learning_rate": 1.5197568389057751e-05, |
|
"loss": 1.8199, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2704280155642023, |
|
"eval_f1_macro": 0.18452231398929034, |
|
"eval_f1_micro": 0.2704280155642023, |
|
"eval_f1_weighted": 0.1903075803351923, |
|
"eval_loss": 1.8465327024459839, |
|
"eval_precision_macro": 0.19230244249654188, |
|
"eval_precision_micro": 0.2704280155642023, |
|
"eval_precision_weighted": 0.1960311004178974, |
|
"eval_recall_macro": 0.2598247809762203, |
|
"eval_recall_micro": 0.2704280155642023, |
|
"eval_recall_weighted": 0.2704280155642023, |
|
"eval_runtime": 1595.827, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.041, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.0700389105058365, |
|
"grad_norm": 10.755321502685547, |
|
"learning_rate": 1.6717325227963527e-05, |
|
"loss": 1.8219, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1673151750972763, |
|
"grad_norm": 16.09447479248047, |
|
"learning_rate": 1.82370820668693e-05, |
|
"loss": 1.7428, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.264591439688716, |
|
"grad_norm": 14.368643760681152, |
|
"learning_rate": 1.9756838905775076e-05, |
|
"loss": 1.7054, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3618677042801557, |
|
"grad_norm": 9.273758888244629, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 1.7145, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4591439688715953, |
|
"grad_norm": 12.411294937133789, |
|
"learning_rate": 2.279635258358663e-05, |
|
"loss": 1.6765, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.556420233463035, |
|
"grad_norm": 27.17559051513672, |
|
"learning_rate": 2.4316109422492404e-05, |
|
"loss": 1.7499, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6536964980544746, |
|
"grad_norm": 18.921966552734375, |
|
"learning_rate": 2.5835866261398177e-05, |
|
"loss": 1.6988, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7509727626459144, |
|
"grad_norm": 14.716523170471191, |
|
"learning_rate": 2.735562310030395e-05, |
|
"loss": 1.6935, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8482490272373542, |
|
"grad_norm": 20.14253807067871, |
|
"learning_rate": 2.887537993920973e-05, |
|
"loss": 1.7054, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9455252918287937, |
|
"grad_norm": 5.987732887268066, |
|
"learning_rate": 3.0395136778115502e-05, |
|
"loss": 1.7427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3093385214007782, |
|
"eval_f1_macro": 0.27337963763262785, |
|
"eval_f1_micro": 0.3093385214007782, |
|
"eval_f1_weighted": 0.2814372687206247, |
|
"eval_loss": 1.6760838031768799, |
|
"eval_precision_macro": 0.282150640368379, |
|
"eval_precision_micro": 0.3093385214007782, |
|
"eval_precision_weighted": 0.2911286647426314, |
|
"eval_recall_macro": 0.30072312612988455, |
|
"eval_recall_micro": 0.3093385214007782, |
|
"eval_recall_weighted": 0.3093385214007782, |
|
"eval_runtime": 1588.4184, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.041, |
|
"step": 1028 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 16448, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 32, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2176781982056448.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|