|
{ |
|
"best_metric": 0.0007766146445646882, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 463, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021598272138228943, |
|
"eval_loss": 0.11870575696229935, |
|
"eval_runtime": 28.391, |
|
"eval_samples_per_second": 6.868, |
|
"eval_steps_per_second": 1.726, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02159827213822894, |
|
"grad_norm": 0.6378040313720703, |
|
"learning_rate": 4.08e-05, |
|
"loss": 0.1198, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04319654427645788, |
|
"grad_norm": 0.3366822600364685, |
|
"learning_rate": 8.16e-05, |
|
"loss": 0.0585, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06479481641468683, |
|
"grad_norm": 1.0418509244918823, |
|
"learning_rate": 0.0001224, |
|
"loss": 0.0195, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08639308855291576, |
|
"grad_norm": 0.7536942362785339, |
|
"learning_rate": 0.0001632, |
|
"loss": 0.0173, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1079913606911447, |
|
"grad_norm": 2.1272497177124023, |
|
"learning_rate": 0.000204, |
|
"loss": 0.0183, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1079913606911447, |
|
"eval_loss": 0.011936341412365437, |
|
"eval_runtime": 28.1854, |
|
"eval_samples_per_second": 6.918, |
|
"eval_steps_per_second": 1.738, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12958963282937366, |
|
"grad_norm": 0.5203682780265808, |
|
"learning_rate": 0.00020370504185350093, |
|
"loss": 0.0316, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1511879049676026, |
|
"grad_norm": 0.8338865041732788, |
|
"learning_rate": 0.00020282187330239947, |
|
"loss": 0.0168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17278617710583152, |
|
"grad_norm": 0.63815838098526, |
|
"learning_rate": 0.0002013556021458894, |
|
"loss": 0.0222, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19438444924406048, |
|
"grad_norm": 0.11844746768474579, |
|
"learning_rate": 0.00019931470855304105, |
|
"loss": 0.0154, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2159827213822894, |
|
"grad_norm": 4.035724639892578, |
|
"learning_rate": 0.00019671099601780242, |
|
"loss": 0.016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2159827213822894, |
|
"eval_loss": 0.02815873548388481, |
|
"eval_runtime": 28.4968, |
|
"eval_samples_per_second": 6.843, |
|
"eval_steps_per_second": 1.719, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23758099352051837, |
|
"grad_norm": 0.14545699954032898, |
|
"learning_rate": 0.00019355952309357377, |
|
"loss": 0.0326, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2591792656587473, |
|
"grad_norm": 1.333457350730896, |
|
"learning_rate": 0.00018987851630216821, |
|
"loss": 0.0127, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28077753779697623, |
|
"grad_norm": 0.19191227853298187, |
|
"learning_rate": 0.0001856892647208494, |
|
"loss": 0.0164, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3023758099352052, |
|
"grad_norm": 0.6895551085472107, |
|
"learning_rate": 0.00018101599685710056, |
|
"loss": 0.0143, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32397408207343414, |
|
"grad_norm": 0.06516972184181213, |
|
"learning_rate": 0.00017588574052321935, |
|
"loss": 0.0108, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32397408207343414, |
|
"eval_loss": 0.007597790565341711, |
|
"eval_runtime": 28.5719, |
|
"eval_samples_per_second": 6.825, |
|
"eval_steps_per_second": 1.715, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34557235421166305, |
|
"grad_norm": 0.01516179833561182, |
|
"learning_rate": 0.0001703281665211531, |
|
"loss": 0.0107, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.367170626349892, |
|
"grad_norm": 3.366034507751465, |
|
"learning_rate": 0.00016437541704162093, |
|
"loss": 0.0119, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38876889848812096, |
|
"grad_norm": 0.2688906192779541, |
|
"learning_rate": 0.00015806191976997693, |
|
"loss": 0.0033, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4103671706263499, |
|
"grad_norm": 0.06040094047784805, |
|
"learning_rate": 0.00015142418877393032, |
|
"loss": 0.0028, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4319654427645788, |
|
"grad_norm": 0.10505006462335587, |
|
"learning_rate": 0.00014450061332468804, |
|
"loss": 0.0051, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4319654427645788, |
|
"eval_loss": 0.016910606995224953, |
|
"eval_runtime": 28.5258, |
|
"eval_samples_per_second": 6.836, |
|
"eval_steps_per_second": 1.718, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4535637149028078, |
|
"grad_norm": 0.40670064091682434, |
|
"learning_rate": 0.0001373312358728708, |
|
"loss": 0.0122, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47516198704103674, |
|
"grad_norm": 1.2365318536758423, |
|
"learning_rate": 0.00012995752046327736, |
|
"loss": 0.0043, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49676025917926564, |
|
"grad_norm": 0.05308518186211586, |
|
"learning_rate": 0.0001224221129278691, |
|
"loss": 0.0041, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5183585313174947, |
|
"grad_norm": 0.05913418531417847, |
|
"learning_rate": 0.00011476859424389611, |
|
"loss": 0.0024, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5399568034557235, |
|
"grad_norm": 0.029558856040239334, |
|
"learning_rate": 0.00010704122848361768, |
|
"loss": 0.0031, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5399568034557235, |
|
"eval_loss": 0.0030939881689846516, |
|
"eval_runtime": 28.2074, |
|
"eval_samples_per_second": 6.913, |
|
"eval_steps_per_second": 1.737, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5615550755939525, |
|
"grad_norm": 0.13922685384750366, |
|
"learning_rate": 9.928470681334698e-05, |
|
"loss": 0.0058, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5831533477321814, |
|
"grad_norm": 0.06830060482025146, |
|
"learning_rate": 9.154388902240052e-05, |
|
"loss": 0.0033, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6047516198704104, |
|
"grad_norm": 0.024560509249567986, |
|
"learning_rate": 8.386354407681761e-05, |
|
"loss": 0.0019, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6263498920086393, |
|
"grad_norm": 0.0010884521761909127, |
|
"learning_rate": 7.62880911983556e-05, |
|
"loss": 0.0004, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6479481641468683, |
|
"grad_norm": 0.04456557705998421, |
|
"learning_rate": 6.886134296622857e-05, |
|
"loss": 0.0003, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6479481641468683, |
|
"eval_loss": 0.001991268480196595, |
|
"eval_runtime": 28.47, |
|
"eval_samples_per_second": 6.849, |
|
"eval_steps_per_second": 1.721, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6695464362850972, |
|
"grad_norm": 0.012475444003939629, |
|
"learning_rate": 6.16262519273594e-05, |
|
"loss": 0.0013, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6911447084233261, |
|
"grad_norm": 0.19239909946918488, |
|
"learning_rate": 5.4624662180622114e-05, |
|
"loss": 0.0039, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.712742980561555, |
|
"grad_norm": 0.025792984291911125, |
|
"learning_rate": 4.7897067371786276e-05, |
|
"loss": 0.0023, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.734341252699784, |
|
"grad_norm": 0.013253681361675262, |
|
"learning_rate": 4.148237649879712e-05, |
|
"loss": 0.0021, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.755939524838013, |
|
"grad_norm": 0.0363602451980114, |
|
"learning_rate": 3.541768888185587e-05, |
|
"loss": 0.0029, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.755939524838013, |
|
"eval_loss": 0.0015392228960990906, |
|
"eval_runtime": 28.3248, |
|
"eval_samples_per_second": 6.884, |
|
"eval_steps_per_second": 1.73, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7775377969762419, |
|
"grad_norm": 0.004180555697530508, |
|
"learning_rate": 2.973807959975846e-05, |
|
"loss": 0.004, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7991360691144709, |
|
"grad_norm": 0.009106243029236794, |
|
"learning_rate": 2.447639663342025e-05, |
|
"loss": 0.0021, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8207343412526998, |
|
"grad_norm": 0.011521597392857075, |
|
"learning_rate": 1.9663070889806247e-05, |
|
"loss": 0.0054, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8423326133909287, |
|
"grad_norm": 0.0030731502920389175, |
|
"learning_rate": 1.5325940204991634e-05, |
|
"loss": 0.0011, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8639308855291576, |
|
"grad_norm": 0.04495791345834732, |
|
"learning_rate": 1.1490088344229713e-05, |
|
"loss": 0.0023, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8639308855291576, |
|
"eval_loss": 0.0008633602410554886, |
|
"eval_runtime": 28.3009, |
|
"eval_samples_per_second": 6.89, |
|
"eval_steps_per_second": 1.731, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8855291576673866, |
|
"grad_norm": 0.04856608808040619, |
|
"learning_rate": 8.177699930169379e-06, |
|
"loss": 0.0023, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9071274298056156, |
|
"grad_norm": 0.004636272322386503, |
|
"learning_rate": 5.4079321382439e-06, |
|
"loss": 0.0015, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9287257019438445, |
|
"grad_norm": 0.12631860375404358, |
|
"learning_rate": 3.1968039012795417e-06, |
|
"loss": 0.0005, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9503239740820735, |
|
"grad_norm": 0.0010080524953082204, |
|
"learning_rate": 1.5571032641086151e-06, |
|
"loss": 0.0015, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9719222462203023, |
|
"grad_norm": 0.02956795133650303, |
|
"learning_rate": 4.983134240007501e-07, |
|
"loss": 0.0003, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9719222462203023, |
|
"eval_loss": 0.0007766146445646882, |
|
"eval_runtime": 28.3411, |
|
"eval_samples_per_second": 6.88, |
|
"eval_steps_per_second": 1.729, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9935205183585313, |
|
"grad_norm": 0.060769353061914444, |
|
"learning_rate": 2.6557884657352427e-08, |
|
"loss": 0.0049, |
|
"step": 460 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 463, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3904763809103872e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|