{ "best_metric": 1.6760838031768799, "best_model_checkpoint": "bigbird-flight-2/checkpoint-1028", "epoch": 2.0, "eval_steps": 500, "global_step": 1028, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09727626459143969, "grad_norm": 5.640580654144287, "learning_rate": 1.5197568389057753e-06, "loss": 2.2808, "step": 50 }, { "epoch": 0.19455252918287938, "grad_norm": 6.845468044281006, "learning_rate": 3.0395136778115506e-06, "loss": 2.3041, "step": 100 }, { "epoch": 0.2918287937743191, "grad_norm": 4.374593257904053, "learning_rate": 4.559270516717325e-06, "loss": 2.303, "step": 150 }, { "epoch": 0.38910505836575876, "grad_norm": 3.985168218612671, "learning_rate": 6.079027355623101e-06, "loss": 2.295, "step": 200 }, { "epoch": 0.48638132295719844, "grad_norm": 6.664238929748535, "learning_rate": 7.5987841945288756e-06, "loss": 2.249, "step": 250 }, { "epoch": 0.5836575875486382, "grad_norm": 6.191707611083984, "learning_rate": 9.11854103343465e-06, "loss": 2.1323, "step": 300 }, { "epoch": 0.6809338521400778, "grad_norm": 6.4777140617370605, "learning_rate": 1.0638297872340426e-05, "loss": 2.0151, "step": 350 }, { "epoch": 0.7782101167315175, "grad_norm": 15.601507186889648, "learning_rate": 1.2158054711246202e-05, "loss": 1.9318, "step": 400 }, { "epoch": 0.8754863813229572, "grad_norm": 6.998500347137451, "learning_rate": 1.3677811550151975e-05, "loss": 1.9021, "step": 450 }, { "epoch": 0.9727626459143969, "grad_norm": 16.937795639038086, "learning_rate": 1.5197568389057751e-05, "loss": 1.8199, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.2704280155642023, "eval_f1_macro": 0.18452231398929034, "eval_f1_micro": 0.2704280155642023, "eval_f1_weighted": 0.1903075803351923, "eval_loss": 1.8465327024459839, "eval_precision_macro": 0.19230244249654188, "eval_precision_micro": 0.2704280155642023, "eval_precision_weighted": 0.1960311004178974, "eval_recall_macro": 0.2598247809762203, "eval_recall_micro": 0.2704280155642023, "eval_recall_weighted": 0.2704280155642023, "eval_runtime": 1595.827, "eval_samples_per_second": 0.322, "eval_steps_per_second": 0.041, "step": 514 }, { "epoch": 1.0700389105058365, "grad_norm": 10.755321502685547, "learning_rate": 1.6717325227963527e-05, "loss": 1.8219, "step": 550 }, { "epoch": 1.1673151750972763, "grad_norm": 16.09447479248047, "learning_rate": 1.82370820668693e-05, "loss": 1.7428, "step": 600 }, { "epoch": 1.264591439688716, "grad_norm": 14.368643760681152, "learning_rate": 1.9756838905775076e-05, "loss": 1.7054, "step": 650 }, { "epoch": 1.3618677042801557, "grad_norm": 9.273758888244629, "learning_rate": 2.1276595744680852e-05, "loss": 1.7145, "step": 700 }, { "epoch": 1.4591439688715953, "grad_norm": 12.411294937133789, "learning_rate": 2.279635258358663e-05, "loss": 1.6765, "step": 750 }, { "epoch": 1.556420233463035, "grad_norm": 27.17559051513672, "learning_rate": 2.4316109422492404e-05, "loss": 1.7499, "step": 800 }, { "epoch": 1.6536964980544746, "grad_norm": 18.921966552734375, "learning_rate": 2.5835866261398177e-05, "loss": 1.6988, "step": 850 }, { "epoch": 1.7509727626459144, "grad_norm": 14.716523170471191, "learning_rate": 2.735562310030395e-05, "loss": 1.6935, "step": 900 }, { "epoch": 1.8482490272373542, "grad_norm": 20.14253807067871, "learning_rate": 2.887537993920973e-05, "loss": 1.7054, "step": 950 }, { "epoch": 1.9455252918287937, "grad_norm": 5.987732887268066, "learning_rate": 3.0395136778115502e-05, "loss": 1.7427, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.3093385214007782, "eval_f1_macro": 0.27337963763262785, "eval_f1_micro": 0.3093385214007782, "eval_f1_weighted": 0.2814372687206247, "eval_loss": 1.6760838031768799, "eval_precision_macro": 0.282150640368379, "eval_precision_micro": 0.3093385214007782, "eval_precision_weighted": 0.2911286647426314, "eval_recall_macro": 0.30072312612988455, "eval_recall_micro": 0.3093385214007782, "eval_recall_weighted": 0.3093385214007782, "eval_runtime": 1588.4184, "eval_samples_per_second": 0.324, "eval_steps_per_second": 0.041, "step": 1028 } ], "logging_steps": 50, "max_steps": 16448, "num_input_tokens_seen": 0, "num_train_epochs": 32, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2176781982056448.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }