bigbird-flight-2 / checkpoint-1028 /trainer_state.json
pvaluedotone's picture
Upload folder using huggingface_hub
ddd1564 verified
{
"best_metric": 1.6760838031768799,
"best_model_checkpoint": "bigbird-flight-2/checkpoint-1028",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1028,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09727626459143969,
"grad_norm": 5.640580654144287,
"learning_rate": 1.5197568389057753e-06,
"loss": 2.2808,
"step": 50
},
{
"epoch": 0.19455252918287938,
"grad_norm": 6.845468044281006,
"learning_rate": 3.0395136778115506e-06,
"loss": 2.3041,
"step": 100
},
{
"epoch": 0.2918287937743191,
"grad_norm": 4.374593257904053,
"learning_rate": 4.559270516717325e-06,
"loss": 2.303,
"step": 150
},
{
"epoch": 0.38910505836575876,
"grad_norm": 3.985168218612671,
"learning_rate": 6.079027355623101e-06,
"loss": 2.295,
"step": 200
},
{
"epoch": 0.48638132295719844,
"grad_norm": 6.664238929748535,
"learning_rate": 7.5987841945288756e-06,
"loss": 2.249,
"step": 250
},
{
"epoch": 0.5836575875486382,
"grad_norm": 6.191707611083984,
"learning_rate": 9.11854103343465e-06,
"loss": 2.1323,
"step": 300
},
{
"epoch": 0.6809338521400778,
"grad_norm": 6.4777140617370605,
"learning_rate": 1.0638297872340426e-05,
"loss": 2.0151,
"step": 350
},
{
"epoch": 0.7782101167315175,
"grad_norm": 15.601507186889648,
"learning_rate": 1.2158054711246202e-05,
"loss": 1.9318,
"step": 400
},
{
"epoch": 0.8754863813229572,
"grad_norm": 6.998500347137451,
"learning_rate": 1.3677811550151975e-05,
"loss": 1.9021,
"step": 450
},
{
"epoch": 0.9727626459143969,
"grad_norm": 16.937795639038086,
"learning_rate": 1.5197568389057751e-05,
"loss": 1.8199,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.2704280155642023,
"eval_f1_macro": 0.18452231398929034,
"eval_f1_micro": 0.2704280155642023,
"eval_f1_weighted": 0.1903075803351923,
"eval_loss": 1.8465327024459839,
"eval_precision_macro": 0.19230244249654188,
"eval_precision_micro": 0.2704280155642023,
"eval_precision_weighted": 0.1960311004178974,
"eval_recall_macro": 0.2598247809762203,
"eval_recall_micro": 0.2704280155642023,
"eval_recall_weighted": 0.2704280155642023,
"eval_runtime": 1595.827,
"eval_samples_per_second": 0.322,
"eval_steps_per_second": 0.041,
"step": 514
},
{
"epoch": 1.0700389105058365,
"grad_norm": 10.755321502685547,
"learning_rate": 1.6717325227963527e-05,
"loss": 1.8219,
"step": 550
},
{
"epoch": 1.1673151750972763,
"grad_norm": 16.09447479248047,
"learning_rate": 1.82370820668693e-05,
"loss": 1.7428,
"step": 600
},
{
"epoch": 1.264591439688716,
"grad_norm": 14.368643760681152,
"learning_rate": 1.9756838905775076e-05,
"loss": 1.7054,
"step": 650
},
{
"epoch": 1.3618677042801557,
"grad_norm": 9.273758888244629,
"learning_rate": 2.1276595744680852e-05,
"loss": 1.7145,
"step": 700
},
{
"epoch": 1.4591439688715953,
"grad_norm": 12.411294937133789,
"learning_rate": 2.279635258358663e-05,
"loss": 1.6765,
"step": 750
},
{
"epoch": 1.556420233463035,
"grad_norm": 27.17559051513672,
"learning_rate": 2.4316109422492404e-05,
"loss": 1.7499,
"step": 800
},
{
"epoch": 1.6536964980544746,
"grad_norm": 18.921966552734375,
"learning_rate": 2.5835866261398177e-05,
"loss": 1.6988,
"step": 850
},
{
"epoch": 1.7509727626459144,
"grad_norm": 14.716523170471191,
"learning_rate": 2.735562310030395e-05,
"loss": 1.6935,
"step": 900
},
{
"epoch": 1.8482490272373542,
"grad_norm": 20.14253807067871,
"learning_rate": 2.887537993920973e-05,
"loss": 1.7054,
"step": 950
},
{
"epoch": 1.9455252918287937,
"grad_norm": 5.987732887268066,
"learning_rate": 3.0395136778115502e-05,
"loss": 1.7427,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.3093385214007782,
"eval_f1_macro": 0.27337963763262785,
"eval_f1_micro": 0.3093385214007782,
"eval_f1_weighted": 0.2814372687206247,
"eval_loss": 1.6760838031768799,
"eval_precision_macro": 0.282150640368379,
"eval_precision_micro": 0.3093385214007782,
"eval_precision_weighted": 0.2911286647426314,
"eval_recall_macro": 0.30072312612988455,
"eval_recall_micro": 0.3093385214007782,
"eval_recall_weighted": 0.3093385214007782,
"eval_runtime": 1588.4184,
"eval_samples_per_second": 0.324,
"eval_steps_per_second": 0.041,
"step": 1028
}
],
"logging_steps": 50,
"max_steps": 16448,
"num_input_tokens_seen": 0,
"num_train_epochs": 32,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2176781982056448.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}