{ "best_metric": 1.798496961593628, "best_model_checkpoint": "autotrain-bigbird-flight/checkpoint-514", "epoch": 2.0, "eval_steps": 500, "global_step": 514, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04669260700389105, "grad_norm": 3.0252254009246826, "learning_rate": 9.724473257698543e-07, "loss": 2.2957, "step": 12 }, { "epoch": 0.0933852140077821, "grad_norm": 3.4915716648101807, "learning_rate": 1.9448946515397086e-06, "loss": 2.3137, "step": 24 }, { "epoch": 0.14007782101167315, "grad_norm": 3.5864250659942627, "learning_rate": 2.9173419773095627e-06, "loss": 2.3067, "step": 36 }, { "epoch": 0.1867704280155642, "grad_norm": 4.509471893310547, "learning_rate": 3.889789303079417e-06, "loss": 2.3144, "step": 48 }, { "epoch": 0.23346303501945526, "grad_norm": 2.9119784832000732, "learning_rate": 4.862236628849271e-06, "loss": 2.3111, "step": 60 }, { "epoch": 0.2801556420233463, "grad_norm": 2.615067481994629, "learning_rate": 5.8346839546191254e-06, "loss": 2.3047, "step": 72 }, { "epoch": 0.32684824902723736, "grad_norm": 3.578856945037842, "learning_rate": 6.807131280388978e-06, "loss": 2.3064, "step": 84 }, { "epoch": 0.3735408560311284, "grad_norm": 4.135900974273682, "learning_rate": 7.779578606158834e-06, "loss": 2.2958, "step": 96 }, { "epoch": 0.42023346303501946, "grad_norm": 3.22177791595459, "learning_rate": 8.752025931928688e-06, "loss": 2.3088, "step": 108 }, { "epoch": 0.4669260700389105, "grad_norm": 2.849999189376831, "learning_rate": 9.724473257698542e-06, "loss": 2.2959, "step": 120 }, { "epoch": 0.5136186770428015, "grad_norm": 3.1687047481536865, "learning_rate": 1.0696920583468397e-05, "loss": 2.2795, "step": 132 }, { "epoch": 0.5603112840466926, "grad_norm": 3.0055816173553467, "learning_rate": 1.1669367909238251e-05, "loss": 2.2919, "step": 144 }, { "epoch": 0.6070038910505836, "grad_norm": 3.02952241897583, "learning_rate": 1.2641815235008103e-05, "loss": 2.2637, "step": 156 }, { "epoch": 0.6536964980544747, "grad_norm": 3.8288016319274902, "learning_rate": 1.3614262560777957e-05, "loss": 2.1604, "step": 168 }, { "epoch": 0.7003891050583657, "grad_norm": 126.01935577392578, "learning_rate": 1.4586709886547812e-05, "loss": 2.1408, "step": 180 }, { "epoch": 0.7470817120622568, "grad_norm": 14.168249130249023, "learning_rate": 1.555915721231767e-05, "loss": 2.1199, "step": 192 }, { "epoch": 0.7937743190661478, "grad_norm": 5.94619607925415, "learning_rate": 1.6531604538087523e-05, "loss": 2.0819, "step": 204 }, { "epoch": 0.8404669260700389, "grad_norm": 10.58005142211914, "learning_rate": 1.7504051863857376e-05, "loss": 2.0871, "step": 216 }, { "epoch": 0.8871595330739299, "grad_norm": 9.494062423706055, "learning_rate": 1.847649918962723e-05, "loss": 1.9864, "step": 228 }, { "epoch": 0.933852140077821, "grad_norm": 11.363922119140625, "learning_rate": 1.9448946515397084e-05, "loss": 1.925, "step": 240 }, { "epoch": 0.980544747081712, "grad_norm": 6.328553676605225, "learning_rate": 2.0421393841166937e-05, "loss": 1.9497, "step": 252 }, { "epoch": 1.0, "eval_accuracy": 0.24708171206225682, "eval_f1_macro": 0.2015263302646054, "eval_f1_micro": 0.24708171206225682, "eval_f1_weighted": 0.20916676720406013, "eval_loss": 1.9002625942230225, "eval_precision_macro": 0.20560782159414379, "eval_precision_micro": 0.24708171206225682, "eval_precision_weighted": 0.21350682757806103, "eval_recall_macro": 0.23766745468873127, "eval_recall_micro": 0.24708171206225682, "eval_recall_weighted": 0.24708171206225682, "eval_runtime": 145.8531, "eval_samples_per_second": 3.524, "eval_steps_per_second": 0.226, "step": 257 }, { "epoch": 1.027237354085603, "grad_norm": 25.507699966430664, "learning_rate": 2.1393841166936794e-05, "loss": 1.899, "step": 264 }, { "epoch": 1.0739299610894941, "grad_norm": 9.898531913757324, "learning_rate": 2.2366288492706648e-05, "loss": 1.8896, "step": 276 }, { "epoch": 1.1206225680933852, "grad_norm": 5.515951633453369, "learning_rate": 2.3338735818476502e-05, "loss": 1.9243, "step": 288 }, { "epoch": 1.1673151750972763, "grad_norm": 9.817304611206055, "learning_rate": 2.4311183144246355e-05, "loss": 1.8037, "step": 300 }, { "epoch": 1.2140077821011672, "grad_norm": 8.220943450927734, "learning_rate": 2.5283630470016206e-05, "loss": 1.843, "step": 312 }, { "epoch": 1.2607003891050583, "grad_norm": 5.196465492248535, "learning_rate": 2.625607779578606e-05, "loss": 1.7539, "step": 324 }, { "epoch": 1.3073929961089494, "grad_norm": 8.13240909576416, "learning_rate": 2.7228525121555913e-05, "loss": 1.8114, "step": 336 }, { "epoch": 1.3540856031128405, "grad_norm": 7.483870506286621, "learning_rate": 2.820097244732577e-05, "loss": 1.7426, "step": 348 }, { "epoch": 1.4007782101167314, "grad_norm": 11.400574684143066, "learning_rate": 2.9173419773095624e-05, "loss": 1.8409, "step": 360 }, { "epoch": 1.4474708171206225, "grad_norm": 15.863828659057617, "learning_rate": 3.014586709886548e-05, "loss": 1.6875, "step": 372 }, { "epoch": 1.4941634241245136, "grad_norm": 4.706288814544678, "learning_rate": 3.111831442463534e-05, "loss": 1.8176, "step": 384 }, { "epoch": 1.5408560311284045, "grad_norm": 10.757370948791504, "learning_rate": 3.209076175040519e-05, "loss": 1.7746, "step": 396 }, { "epoch": 1.5875486381322959, "grad_norm": 10.204288482666016, "learning_rate": 3.3063209076175045e-05, "loss": 1.8216, "step": 408 }, { "epoch": 1.6342412451361867, "grad_norm": 9.947104454040527, "learning_rate": 3.40356564019449e-05, "loss": 1.6656, "step": 420 }, { "epoch": 1.6809338521400778, "grad_norm": 10.421720504760742, "learning_rate": 3.500810372771475e-05, "loss": 1.732, "step": 432 }, { "epoch": 1.727626459143969, "grad_norm": 10.406159400939941, "learning_rate": 3.5980551053484606e-05, "loss": 1.6426, "step": 444 }, { "epoch": 1.7743190661478598, "grad_norm": 6.852759838104248, "learning_rate": 3.695299837925446e-05, "loss": 1.9205, "step": 456 }, { "epoch": 1.821011673151751, "grad_norm": 8.486828804016113, "learning_rate": 3.7925445705024314e-05, "loss": 1.7752, "step": 468 }, { "epoch": 1.867704280155642, "grad_norm": 6.236423969268799, "learning_rate": 3.889789303079417e-05, "loss": 1.7871, "step": 480 }, { "epoch": 1.914396887159533, "grad_norm": 5.621742248535156, "learning_rate": 3.987034035656402e-05, "loss": 1.8508, "step": 492 }, { "epoch": 1.9610894941634243, "grad_norm": 11.434782981872559, "learning_rate": 4.0842787682333875e-05, "loss": 1.821, "step": 504 }, { "epoch": 2.0, "eval_accuracy": 0.26653696498054474, "eval_f1_macro": 0.22749841710333368, "eval_f1_micro": 0.26653696498054474, "eval_f1_weighted": 0.23467290371308847, "eval_loss": 1.798496961593628, "eval_precision_macro": 0.2675680367049088, "eval_precision_micro": 0.26653696498054474, "eval_precision_weighted": 0.277737992326741, "eval_recall_macro": 0.2594956658786446, "eval_recall_micro": 0.26653696498054474, "eval_recall_weighted": 0.26653696498054474, "eval_runtime": 139.4857, "eval_samples_per_second": 3.685, "eval_steps_per_second": 0.237, "step": 514 } ], "logging_steps": 12, "max_steps": 6168, "num_input_tokens_seen": 0, "num_train_epochs": 24, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 272097747757056.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }