|
{ |
|
"best_metric": 1.798496961593628, |
|
"best_model_checkpoint": "autotrain-bigbird-flight/checkpoint-514", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 514, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04669260700389105, |
|
"grad_norm": 3.0252254009246826, |
|
"learning_rate": 9.724473257698543e-07, |
|
"loss": 2.2957, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0933852140077821, |
|
"grad_norm": 3.4915716648101807, |
|
"learning_rate": 1.9448946515397086e-06, |
|
"loss": 2.3137, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.14007782101167315, |
|
"grad_norm": 3.5864250659942627, |
|
"learning_rate": 2.9173419773095627e-06, |
|
"loss": 2.3067, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1867704280155642, |
|
"grad_norm": 4.509471893310547, |
|
"learning_rate": 3.889789303079417e-06, |
|
"loss": 2.3144, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.23346303501945526, |
|
"grad_norm": 2.9119784832000732, |
|
"learning_rate": 4.862236628849271e-06, |
|
"loss": 2.3111, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2801556420233463, |
|
"grad_norm": 2.615067481994629, |
|
"learning_rate": 5.8346839546191254e-06, |
|
"loss": 2.3047, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.32684824902723736, |
|
"grad_norm": 3.578856945037842, |
|
"learning_rate": 6.807131280388978e-06, |
|
"loss": 2.3064, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.3735408560311284, |
|
"grad_norm": 4.135900974273682, |
|
"learning_rate": 7.779578606158834e-06, |
|
"loss": 2.2958, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.42023346303501946, |
|
"grad_norm": 3.22177791595459, |
|
"learning_rate": 8.752025931928688e-06, |
|
"loss": 2.3088, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.4669260700389105, |
|
"grad_norm": 2.849999189376831, |
|
"learning_rate": 9.724473257698542e-06, |
|
"loss": 2.2959, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5136186770428015, |
|
"grad_norm": 3.1687047481536865, |
|
"learning_rate": 1.0696920583468397e-05, |
|
"loss": 2.2795, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5603112840466926, |
|
"grad_norm": 3.0055816173553467, |
|
"learning_rate": 1.1669367909238251e-05, |
|
"loss": 2.2919, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6070038910505836, |
|
"grad_norm": 3.02952241897583, |
|
"learning_rate": 1.2641815235008103e-05, |
|
"loss": 2.2637, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6536964980544747, |
|
"grad_norm": 3.8288016319274902, |
|
"learning_rate": 1.3614262560777957e-05, |
|
"loss": 2.1604, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7003891050583657, |
|
"grad_norm": 126.01935577392578, |
|
"learning_rate": 1.4586709886547812e-05, |
|
"loss": 2.1408, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7470817120622568, |
|
"grad_norm": 14.168249130249023, |
|
"learning_rate": 1.555915721231767e-05, |
|
"loss": 2.1199, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.7937743190661478, |
|
"grad_norm": 5.94619607925415, |
|
"learning_rate": 1.6531604538087523e-05, |
|
"loss": 2.0819, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.8404669260700389, |
|
"grad_norm": 10.58005142211914, |
|
"learning_rate": 1.7504051863857376e-05, |
|
"loss": 2.0871, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.8871595330739299, |
|
"grad_norm": 9.494062423706055, |
|
"learning_rate": 1.847649918962723e-05, |
|
"loss": 1.9864, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.933852140077821, |
|
"grad_norm": 11.363922119140625, |
|
"learning_rate": 1.9448946515397084e-05, |
|
"loss": 1.925, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.980544747081712, |
|
"grad_norm": 6.328553676605225, |
|
"learning_rate": 2.0421393841166937e-05, |
|
"loss": 1.9497, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.24708171206225682, |
|
"eval_f1_macro": 0.2015263302646054, |
|
"eval_f1_micro": 0.24708171206225682, |
|
"eval_f1_weighted": 0.20916676720406013, |
|
"eval_loss": 1.9002625942230225, |
|
"eval_precision_macro": 0.20560782159414379, |
|
"eval_precision_micro": 0.24708171206225682, |
|
"eval_precision_weighted": 0.21350682757806103, |
|
"eval_recall_macro": 0.23766745468873127, |
|
"eval_recall_micro": 0.24708171206225682, |
|
"eval_recall_weighted": 0.24708171206225682, |
|
"eval_runtime": 145.8531, |
|
"eval_samples_per_second": 3.524, |
|
"eval_steps_per_second": 0.226, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.027237354085603, |
|
"grad_norm": 25.507699966430664, |
|
"learning_rate": 2.1393841166936794e-05, |
|
"loss": 1.899, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.0739299610894941, |
|
"grad_norm": 9.898531913757324, |
|
"learning_rate": 2.2366288492706648e-05, |
|
"loss": 1.8896, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.1206225680933852, |
|
"grad_norm": 5.515951633453369, |
|
"learning_rate": 2.3338735818476502e-05, |
|
"loss": 1.9243, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.1673151750972763, |
|
"grad_norm": 9.817304611206055, |
|
"learning_rate": 2.4311183144246355e-05, |
|
"loss": 1.8037, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2140077821011672, |
|
"grad_norm": 8.220943450927734, |
|
"learning_rate": 2.5283630470016206e-05, |
|
"loss": 1.843, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.2607003891050583, |
|
"grad_norm": 5.196465492248535, |
|
"learning_rate": 2.625607779578606e-05, |
|
"loss": 1.7539, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.3073929961089494, |
|
"grad_norm": 8.13240909576416, |
|
"learning_rate": 2.7228525121555913e-05, |
|
"loss": 1.8114, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.3540856031128405, |
|
"grad_norm": 7.483870506286621, |
|
"learning_rate": 2.820097244732577e-05, |
|
"loss": 1.7426, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.4007782101167314, |
|
"grad_norm": 11.400574684143066, |
|
"learning_rate": 2.9173419773095624e-05, |
|
"loss": 1.8409, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.4474708171206225, |
|
"grad_norm": 15.863828659057617, |
|
"learning_rate": 3.014586709886548e-05, |
|
"loss": 1.6875, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.4941634241245136, |
|
"grad_norm": 4.706288814544678, |
|
"learning_rate": 3.111831442463534e-05, |
|
"loss": 1.8176, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.5408560311284045, |
|
"grad_norm": 10.757370948791504, |
|
"learning_rate": 3.209076175040519e-05, |
|
"loss": 1.7746, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.5875486381322959, |
|
"grad_norm": 10.204288482666016, |
|
"learning_rate": 3.3063209076175045e-05, |
|
"loss": 1.8216, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.6342412451361867, |
|
"grad_norm": 9.947104454040527, |
|
"learning_rate": 3.40356564019449e-05, |
|
"loss": 1.6656, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.6809338521400778, |
|
"grad_norm": 10.421720504760742, |
|
"learning_rate": 3.500810372771475e-05, |
|
"loss": 1.732, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.727626459143969, |
|
"grad_norm": 10.406159400939941, |
|
"learning_rate": 3.5980551053484606e-05, |
|
"loss": 1.6426, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.7743190661478598, |
|
"grad_norm": 6.852759838104248, |
|
"learning_rate": 3.695299837925446e-05, |
|
"loss": 1.9205, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.821011673151751, |
|
"grad_norm": 8.486828804016113, |
|
"learning_rate": 3.7925445705024314e-05, |
|
"loss": 1.7752, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.867704280155642, |
|
"grad_norm": 6.236423969268799, |
|
"learning_rate": 3.889789303079417e-05, |
|
"loss": 1.7871, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.914396887159533, |
|
"grad_norm": 5.621742248535156, |
|
"learning_rate": 3.987034035656402e-05, |
|
"loss": 1.8508, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.9610894941634243, |
|
"grad_norm": 11.434782981872559, |
|
"learning_rate": 4.0842787682333875e-05, |
|
"loss": 1.821, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.26653696498054474, |
|
"eval_f1_macro": 0.22749841710333368, |
|
"eval_f1_micro": 0.26653696498054474, |
|
"eval_f1_weighted": 0.23467290371308847, |
|
"eval_loss": 1.798496961593628, |
|
"eval_precision_macro": 0.2675680367049088, |
|
"eval_precision_micro": 0.26653696498054474, |
|
"eval_precision_weighted": 0.277737992326741, |
|
"eval_recall_macro": 0.2594956658786446, |
|
"eval_recall_micro": 0.26653696498054474, |
|
"eval_recall_weighted": 0.26653696498054474, |
|
"eval_runtime": 139.4857, |
|
"eval_samples_per_second": 3.685, |
|
"eval_steps_per_second": 0.237, |
|
"step": 514 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 6168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 24, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 272097747757056.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|