bigbird-flight / checkpoint-514 /trainer_state.json
pvaluedotone's picture
Upload folder using huggingface_hub
7a01ffa verified
{
"best_metric": 1.798496961593628,
"best_model_checkpoint": "autotrain-bigbird-flight/checkpoint-514",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 514,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04669260700389105,
"grad_norm": 3.0252254009246826,
"learning_rate": 9.724473257698543e-07,
"loss": 2.2957,
"step": 12
},
{
"epoch": 0.0933852140077821,
"grad_norm": 3.4915716648101807,
"learning_rate": 1.9448946515397086e-06,
"loss": 2.3137,
"step": 24
},
{
"epoch": 0.14007782101167315,
"grad_norm": 3.5864250659942627,
"learning_rate": 2.9173419773095627e-06,
"loss": 2.3067,
"step": 36
},
{
"epoch": 0.1867704280155642,
"grad_norm": 4.509471893310547,
"learning_rate": 3.889789303079417e-06,
"loss": 2.3144,
"step": 48
},
{
"epoch": 0.23346303501945526,
"grad_norm": 2.9119784832000732,
"learning_rate": 4.862236628849271e-06,
"loss": 2.3111,
"step": 60
},
{
"epoch": 0.2801556420233463,
"grad_norm": 2.615067481994629,
"learning_rate": 5.8346839546191254e-06,
"loss": 2.3047,
"step": 72
},
{
"epoch": 0.32684824902723736,
"grad_norm": 3.578856945037842,
"learning_rate": 6.807131280388978e-06,
"loss": 2.3064,
"step": 84
},
{
"epoch": 0.3735408560311284,
"grad_norm": 4.135900974273682,
"learning_rate": 7.779578606158834e-06,
"loss": 2.2958,
"step": 96
},
{
"epoch": 0.42023346303501946,
"grad_norm": 3.22177791595459,
"learning_rate": 8.752025931928688e-06,
"loss": 2.3088,
"step": 108
},
{
"epoch": 0.4669260700389105,
"grad_norm": 2.849999189376831,
"learning_rate": 9.724473257698542e-06,
"loss": 2.2959,
"step": 120
},
{
"epoch": 0.5136186770428015,
"grad_norm": 3.1687047481536865,
"learning_rate": 1.0696920583468397e-05,
"loss": 2.2795,
"step": 132
},
{
"epoch": 0.5603112840466926,
"grad_norm": 3.0055816173553467,
"learning_rate": 1.1669367909238251e-05,
"loss": 2.2919,
"step": 144
},
{
"epoch": 0.6070038910505836,
"grad_norm": 3.02952241897583,
"learning_rate": 1.2641815235008103e-05,
"loss": 2.2637,
"step": 156
},
{
"epoch": 0.6536964980544747,
"grad_norm": 3.8288016319274902,
"learning_rate": 1.3614262560777957e-05,
"loss": 2.1604,
"step": 168
},
{
"epoch": 0.7003891050583657,
"grad_norm": 126.01935577392578,
"learning_rate": 1.4586709886547812e-05,
"loss": 2.1408,
"step": 180
},
{
"epoch": 0.7470817120622568,
"grad_norm": 14.168249130249023,
"learning_rate": 1.555915721231767e-05,
"loss": 2.1199,
"step": 192
},
{
"epoch": 0.7937743190661478,
"grad_norm": 5.94619607925415,
"learning_rate": 1.6531604538087523e-05,
"loss": 2.0819,
"step": 204
},
{
"epoch": 0.8404669260700389,
"grad_norm": 10.58005142211914,
"learning_rate": 1.7504051863857376e-05,
"loss": 2.0871,
"step": 216
},
{
"epoch": 0.8871595330739299,
"grad_norm": 9.494062423706055,
"learning_rate": 1.847649918962723e-05,
"loss": 1.9864,
"step": 228
},
{
"epoch": 0.933852140077821,
"grad_norm": 11.363922119140625,
"learning_rate": 1.9448946515397084e-05,
"loss": 1.925,
"step": 240
},
{
"epoch": 0.980544747081712,
"grad_norm": 6.328553676605225,
"learning_rate": 2.0421393841166937e-05,
"loss": 1.9497,
"step": 252
},
{
"epoch": 1.0,
"eval_accuracy": 0.24708171206225682,
"eval_f1_macro": 0.2015263302646054,
"eval_f1_micro": 0.24708171206225682,
"eval_f1_weighted": 0.20916676720406013,
"eval_loss": 1.9002625942230225,
"eval_precision_macro": 0.20560782159414379,
"eval_precision_micro": 0.24708171206225682,
"eval_precision_weighted": 0.21350682757806103,
"eval_recall_macro": 0.23766745468873127,
"eval_recall_micro": 0.24708171206225682,
"eval_recall_weighted": 0.24708171206225682,
"eval_runtime": 145.8531,
"eval_samples_per_second": 3.524,
"eval_steps_per_second": 0.226,
"step": 257
},
{
"epoch": 1.027237354085603,
"grad_norm": 25.507699966430664,
"learning_rate": 2.1393841166936794e-05,
"loss": 1.899,
"step": 264
},
{
"epoch": 1.0739299610894941,
"grad_norm": 9.898531913757324,
"learning_rate": 2.2366288492706648e-05,
"loss": 1.8896,
"step": 276
},
{
"epoch": 1.1206225680933852,
"grad_norm": 5.515951633453369,
"learning_rate": 2.3338735818476502e-05,
"loss": 1.9243,
"step": 288
},
{
"epoch": 1.1673151750972763,
"grad_norm": 9.817304611206055,
"learning_rate": 2.4311183144246355e-05,
"loss": 1.8037,
"step": 300
},
{
"epoch": 1.2140077821011672,
"grad_norm": 8.220943450927734,
"learning_rate": 2.5283630470016206e-05,
"loss": 1.843,
"step": 312
},
{
"epoch": 1.2607003891050583,
"grad_norm": 5.196465492248535,
"learning_rate": 2.625607779578606e-05,
"loss": 1.7539,
"step": 324
},
{
"epoch": 1.3073929961089494,
"grad_norm": 8.13240909576416,
"learning_rate": 2.7228525121555913e-05,
"loss": 1.8114,
"step": 336
},
{
"epoch": 1.3540856031128405,
"grad_norm": 7.483870506286621,
"learning_rate": 2.820097244732577e-05,
"loss": 1.7426,
"step": 348
},
{
"epoch": 1.4007782101167314,
"grad_norm": 11.400574684143066,
"learning_rate": 2.9173419773095624e-05,
"loss": 1.8409,
"step": 360
},
{
"epoch": 1.4474708171206225,
"grad_norm": 15.863828659057617,
"learning_rate": 3.014586709886548e-05,
"loss": 1.6875,
"step": 372
},
{
"epoch": 1.4941634241245136,
"grad_norm": 4.706288814544678,
"learning_rate": 3.111831442463534e-05,
"loss": 1.8176,
"step": 384
},
{
"epoch": 1.5408560311284045,
"grad_norm": 10.757370948791504,
"learning_rate": 3.209076175040519e-05,
"loss": 1.7746,
"step": 396
},
{
"epoch": 1.5875486381322959,
"grad_norm": 10.204288482666016,
"learning_rate": 3.3063209076175045e-05,
"loss": 1.8216,
"step": 408
},
{
"epoch": 1.6342412451361867,
"grad_norm": 9.947104454040527,
"learning_rate": 3.40356564019449e-05,
"loss": 1.6656,
"step": 420
},
{
"epoch": 1.6809338521400778,
"grad_norm": 10.421720504760742,
"learning_rate": 3.500810372771475e-05,
"loss": 1.732,
"step": 432
},
{
"epoch": 1.727626459143969,
"grad_norm": 10.406159400939941,
"learning_rate": 3.5980551053484606e-05,
"loss": 1.6426,
"step": 444
},
{
"epoch": 1.7743190661478598,
"grad_norm": 6.852759838104248,
"learning_rate": 3.695299837925446e-05,
"loss": 1.9205,
"step": 456
},
{
"epoch": 1.821011673151751,
"grad_norm": 8.486828804016113,
"learning_rate": 3.7925445705024314e-05,
"loss": 1.7752,
"step": 468
},
{
"epoch": 1.867704280155642,
"grad_norm": 6.236423969268799,
"learning_rate": 3.889789303079417e-05,
"loss": 1.7871,
"step": 480
},
{
"epoch": 1.914396887159533,
"grad_norm": 5.621742248535156,
"learning_rate": 3.987034035656402e-05,
"loss": 1.8508,
"step": 492
},
{
"epoch": 1.9610894941634243,
"grad_norm": 11.434782981872559,
"learning_rate": 4.0842787682333875e-05,
"loss": 1.821,
"step": 504
},
{
"epoch": 2.0,
"eval_accuracy": 0.26653696498054474,
"eval_f1_macro": 0.22749841710333368,
"eval_f1_micro": 0.26653696498054474,
"eval_f1_weighted": 0.23467290371308847,
"eval_loss": 1.798496961593628,
"eval_precision_macro": 0.2675680367049088,
"eval_precision_micro": 0.26653696498054474,
"eval_precision_weighted": 0.277737992326741,
"eval_recall_macro": 0.2594956658786446,
"eval_recall_micro": 0.26653696498054474,
"eval_recall_weighted": 0.26653696498054474,
"eval_runtime": 139.4857,
"eval_samples_per_second": 3.685,
"eval_steps_per_second": 0.237,
"step": 514
}
],
"logging_steps": 12,
"max_steps": 6168,
"num_input_tokens_seen": 0,
"num_train_epochs": 24,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 272097747757056.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}