{ "best_metric": 0.15465113520622253, "best_model_checkpoint": "miner_id_24/checkpoint-500", "epoch": 0.16028209648982208, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00032056419297964416, "eval_loss": 0.6954826712608337, "eval_runtime": 534.29, "eval_samples_per_second": 9.834, "eval_steps_per_second": 2.459, "step": 1 }, { "epoch": 0.01602820964898221, "grad_norm": 0.21765097975730896, "learning_rate": 0.0001075, "loss": 0.3418, "step": 50 }, { "epoch": 0.03205641929796442, "grad_norm": 0.20030294358730316, "learning_rate": 0.000215, "loss": 0.2316, "step": 100 }, { "epoch": 0.04808462894694662, "grad_norm": 0.16089896857738495, "learning_rate": 0.0002068170497449633, "loss": 0.2135, "step": 150 }, { "epoch": 0.06411283859592884, "grad_norm": 0.21217867732048035, "learning_rate": 0.00018351397897755384, "loss": 0.1948, "step": 200 }, { "epoch": 0.08014104824491104, "grad_norm": 0.17849212884902954, "learning_rate": 0.00014863846897924716, "loss": 0.1863, "step": 250 }, { "epoch": 0.09616925789389325, "grad_norm": 0.16983668506145477, "learning_rate": 0.0001075, "loss": 0.1731, "step": 300 }, { "epoch": 0.11219746754287546, "grad_norm": 0.19890177249908447, "learning_rate": 6.636153102075285e-05, "loss": 0.1692, "step": 350 }, { "epoch": 0.12822567719185768, "grad_norm": 0.17077498137950897, "learning_rate": 3.148602102244615e-05, "loss": 0.1616, "step": 400 }, { "epoch": 0.14425388684083987, "grad_norm": 0.15306362509727478, "learning_rate": 8.182950255036675e-06, "loss": 0.1583, "step": 450 }, { "epoch": 0.16028209648982208, "grad_norm": 0.16775737702846527, "learning_rate": 0.0, "loss": 0.1519, "step": 500 }, { "epoch": 0.16028209648982208, "eval_loss": 0.15465113520622253, "eval_runtime": 535.2023, "eval_samples_per_second": 9.817, "eval_steps_per_second": 2.455, "step": 500 } ], "logging_steps": 50, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.725721272069652e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }