{ "best_metric": 0.874920666217804, "best_model_checkpoint": "miner_id_24/checkpoint-500", "epoch": 0.099000099000099, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000198000198000198, "eval_loss": 1.231751799583435, "eval_runtime": 149.6855, "eval_samples_per_second": 14.21, "eval_steps_per_second": 3.554, "step": 1 }, { "epoch": 0.00198000198000198, "grad_norm": 0.354102224111557, "learning_rate": 4.0400000000000006e-05, "loss": 0.9371, "step": 10 }, { "epoch": 0.00396000396000396, "grad_norm": 0.33383598923683167, "learning_rate": 8.080000000000001e-05, "loss": 1.0163, "step": 20 }, { "epoch": 0.00594000594000594, "grad_norm": 0.4753391742706299, "learning_rate": 0.00012119999999999999, "loss": 0.7014, "step": 30 }, { "epoch": 0.00792000792000792, "grad_norm": 0.8161477446556091, "learning_rate": 0.00016160000000000002, "loss": 0.9546, "step": 40 }, { "epoch": 0.0099000099000099, "grad_norm": 2.7666869163513184, "learning_rate": 0.000202, "loss": 1.7663, "step": 50 }, { "epoch": 0.0099000099000099, "eval_loss": 1.0730363130569458, "eval_runtime": 149.9563, "eval_samples_per_second": 14.184, "eval_steps_per_second": 3.548, "step": 50 }, { "epoch": 0.01188001188001188, "grad_norm": 0.37532028555870056, "learning_rate": 0.00020175396907624226, "loss": 1.0455, "step": 60 }, { "epoch": 0.01386001386001386, "grad_norm": 0.33691373467445374, "learning_rate": 0.0002010170749428986, "loss": 0.9879, "step": 70 }, { "epoch": 0.01584001584001584, "grad_norm": 0.4678135812282562, "learning_rate": 0.00019979290767411438, "loss": 0.6116, "step": 80 }, { "epoch": 0.01782001782001782, "grad_norm": 1.6949232816696167, "learning_rate": 0.0001980874312897702, "loss": 0.8712, "step": 90 }, { "epoch": 0.0198000198000198, "grad_norm": 7.316667556762695, "learning_rate": 0.00019590895469937675, "loss": 1.6198, "step": 100 }, { "epoch": 0.0198000198000198, "eval_loss": 1.0748445987701416, "eval_runtime": 149.9768, "eval_samples_per_second": 14.182, "eval_steps_per_second": 3.547, "step": 100 }, { "epoch": 0.02178002178002178, "grad_norm": 0.36432355642318726, "learning_rate": 0.0001932680912219027, "loss": 0.8461, "step": 110 }, { "epoch": 0.02376002376002376, "grad_norm": 0.4774848222732544, "learning_rate": 0.00019017770687875164, "loss": 0.9427, "step": 120 }, { "epoch": 0.02574002574002574, "grad_norm": 0.4353265166282654, "learning_rate": 0.000186652857711799, "loss": 0.5503, "step": 130 }, { "epoch": 0.02772002772002772, "grad_norm": 0.8645381331443787, "learning_rate": 0.00018271071643186968, "loss": 0.8073, "step": 140 }, { "epoch": 0.0297000297000297, "grad_norm": 2.1986262798309326, "learning_rate": 0.00017837048875501678, "loss": 1.4616, "step": 150 }, { "epoch": 0.0297000297000297, "eval_loss": 1.0414828062057495, "eval_runtime": 149.7946, "eval_samples_per_second": 14.199, "eval_steps_per_second": 3.552, "step": 150 }, { "epoch": 0.03168003168003168, "grad_norm": 0.2759321630001068, "learning_rate": 0.00017365331983420376, "loss": 1.0256, "step": 160 }, { "epoch": 0.03366003366003366, "grad_norm": 0.30455437302589417, "learning_rate": 0.0001685821912422447, "loss": 1.0959, "step": 170 }, { "epoch": 0.03564003564003564, "grad_norm": 0.3680591583251953, "learning_rate": 0.00016318180900789148, "loss": 0.6245, "step": 180 }, { "epoch": 0.03762003762003762, "grad_norm": 0.8665950298309326, "learning_rate": 0.00015747848325054544, "loss": 0.7264, "step": 190 }, { "epoch": 0.0396000396000396, "grad_norm": 2.6709601879119873, "learning_rate": 0.0001515, "loss": 1.4939, "step": 200 }, { "epoch": 0.0396000396000396, "eval_loss": 0.9481549859046936, "eval_runtime": 150.5367, "eval_samples_per_second": 14.129, "eval_steps_per_second": 3.534, "step": 200 }, { "epoch": 0.04158004158004158, "grad_norm": 0.2926943302154541, "learning_rate": 0.00014527548582569683, "loss": 0.8908, "step": 210 }, { "epoch": 0.04356004356004356, "grad_norm": 0.29274213314056396, "learning_rate": 0.00013883526593500714, "loss": 0.8669, "step": 220 }, { "epoch": 0.04554004554004554, "grad_norm": 0.36993053555488586, "learning_rate": 0.0001322107164318697, "loss": 0.5496, "step": 230 }, { "epoch": 0.04752004752004752, "grad_norm": 0.732137143611908, "learning_rate": 0.00012543411145556643, "loss": 0.7834, "step": 240 }, { "epoch": 0.0495000495000495, "grad_norm": 6.099823474884033, "learning_rate": 0.00011853846594435998, "loss": 1.5626, "step": 250 }, { "epoch": 0.0495000495000495, "eval_loss": 0.9261844158172607, "eval_runtime": 150.5785, "eval_samples_per_second": 14.126, "eval_steps_per_second": 3.533, "step": 250 }, { "epoch": 0.05148005148005148, "grad_norm": 0.18964390456676483, "learning_rate": 0.00011155737479003301, "loss": 0.8016, "step": 260 }, { "epoch": 0.05346005346005346, "grad_norm": 0.2881607115268707, "learning_rate": 0.00010452484916695262, "loss": 0.8889, "step": 270 }, { "epoch": 0.05544005544005544, "grad_norm": 0.3917742073535919, "learning_rate": 9.747515083304742e-05, "loss": 0.5921, "step": 280 }, { "epoch": 0.05742005742005742, "grad_norm": 0.934220552444458, "learning_rate": 9.044262520996702e-05, "loss": 0.9733, "step": 290 }, { "epoch": 0.0594000594000594, "grad_norm": 2.1632134914398193, "learning_rate": 8.346153405564004e-05, "loss": 1.4139, "step": 300 }, { "epoch": 0.0594000594000594, "eval_loss": 0.9079969525337219, "eval_runtime": 150.1159, "eval_samples_per_second": 14.169, "eval_steps_per_second": 3.544, "step": 300 }, { "epoch": 0.06138006138006138, "grad_norm": 0.327788382768631, "learning_rate": 7.656588854443357e-05, "loss": 0.9184, "step": 310 }, { "epoch": 0.06336006336006336, "grad_norm": 0.24874907732009888, "learning_rate": 6.978928356813031e-05, "loss": 0.856, "step": 320 }, { "epoch": 0.06534006534006534, "grad_norm": 0.3710956871509552, "learning_rate": 6.316473406499288e-05, "loss": 0.6467, "step": 330 }, { "epoch": 0.06732006732006732, "grad_norm": 1.4347540140151978, "learning_rate": 5.672451417430317e-05, "loss": 0.8256, "step": 340 }, { "epoch": 0.0693000693000693, "grad_norm": 8.30053997039795, "learning_rate": 5.050000000000002e-05, "loss": 1.4594, "step": 350 }, { "epoch": 0.0693000693000693, "eval_loss": 0.8915612101554871, "eval_runtime": 150.2292, "eval_samples_per_second": 14.158, "eval_steps_per_second": 3.541, "step": 350 }, { "epoch": 0.07128007128007129, "grad_norm": 0.24972504377365112, "learning_rate": 4.452151674945458e-05, "loss": 0.8633, "step": 360 }, { "epoch": 0.07326007326007326, "grad_norm": 0.31628015637397766, "learning_rate": 3.8818190992108515e-05, "loss": 0.8535, "step": 370 }, { "epoch": 0.07524007524007524, "grad_norm": 0.4251038432121277, "learning_rate": 3.3417808757755355e-05, "loss": 0.6023, "step": 380 }, { "epoch": 0.07722007722007722, "grad_norm": 1.0136597156524658, "learning_rate": 2.8346680165796253e-05, "loss": 0.7618, "step": 390 }, { "epoch": 0.0792000792000792, "grad_norm": 5.036191463470459, "learning_rate": 2.362951124498323e-05, "loss": 1.4237, "step": 400 }, { "epoch": 0.0792000792000792, "eval_loss": 0.8824264407157898, "eval_runtime": 150.2954, "eval_samples_per_second": 14.152, "eval_steps_per_second": 3.54, "step": 400 }, { "epoch": 0.08118008118008117, "grad_norm": 0.2351054549217224, "learning_rate": 1.928928356813032e-05, "loss": 0.779, "step": 410 }, { "epoch": 0.08316008316008316, "grad_norm": 0.28532853722572327, "learning_rate": 1.5347142288200977e-05, "loss": 0.8552, "step": 420 }, { "epoch": 0.08514008514008514, "grad_norm": 0.44085174798965454, "learning_rate": 1.1822293121248375e-05, "loss": 0.5278, "step": 430 }, { "epoch": 0.08712008712008712, "grad_norm": 0.758629322052002, "learning_rate": 8.731908778097302e-06, "loss": 0.8978, "step": 440 }, { "epoch": 0.0891000891000891, "grad_norm": 4.285312175750732, "learning_rate": 6.09104530062326e-06, "loss": 1.2102, "step": 450 }, { "epoch": 0.0891000891000891, "eval_loss": 0.880120575428009, "eval_runtime": 149.9634, "eval_samples_per_second": 14.183, "eval_steps_per_second": 3.548, "step": 450 }, { "epoch": 0.09108009108009107, "grad_norm": 0.22733084857463837, "learning_rate": 3.912568710229791e-06, "loss": 0.7512, "step": 460 }, { "epoch": 0.09306009306009307, "grad_norm": 0.2955915033817291, "learning_rate": 2.2070923258856255e-06, "loss": 0.9199, "step": 470 }, { "epoch": 0.09504009504009504, "grad_norm": 0.39384326338768005, "learning_rate": 9.829250571013935e-07, "loss": 0.6052, "step": 480 }, { "epoch": 0.09702009702009702, "grad_norm": 0.7115781903266907, "learning_rate": 2.4603092375775605e-07, "loss": 0.7318, "step": 490 }, { "epoch": 0.099000099000099, "grad_norm": 4.212769031524658, "learning_rate": 0.0, "loss": 1.335, "step": 500 }, { "epoch": 0.099000099000099, "eval_loss": 0.874920666217804, "eval_runtime": 149.9078, "eval_samples_per_second": 14.189, "eval_steps_per_second": 3.549, "step": 500 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0910171005137715e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }