|
{ |
|
"best_metric": 1.05935800075531, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.4155844155844156, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008311688311688312, |
|
"grad_norm": 0.5688855648040771, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2675, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008311688311688312, |
|
"eval_loss": 1.5061113834381104, |
|
"eval_runtime": 25.9468, |
|
"eval_samples_per_second": 31.256, |
|
"eval_steps_per_second": 3.931, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.016623376623376623, |
|
"grad_norm": 1.2492280006408691, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3997, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.024935064935064935, |
|
"grad_norm": 1.0303261280059814, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 1.3554, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03324675324675325, |
|
"grad_norm": 0.8673217296600342, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 1.2871, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04155844155844156, |
|
"grad_norm": 0.4088221490383148, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 1.3092, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04987012987012987, |
|
"grad_norm": 0.48258668184280396, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 1.4485, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05818181818181818, |
|
"grad_norm": 0.5833405256271362, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 1.4425, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0664935064935065, |
|
"grad_norm": 0.6449804306030273, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.4168, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0748051948051948, |
|
"grad_norm": 0.8527534008026123, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 1.3239, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08311688311688312, |
|
"grad_norm": 1.1389973163604736, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.2837, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 0.9949864745140076, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 1.1449, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.09974025974025974, |
|
"grad_norm": 0.9292828440666199, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 1.0885, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10805194805194805, |
|
"grad_norm": 0.516069769859314, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 1.0868, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.11636363636363636, |
|
"grad_norm": 1.1467220783233643, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.052, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12467532467532468, |
|
"grad_norm": 0.8350916504859924, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 0.9652, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.132987012987013, |
|
"grad_norm": 0.5265994071960449, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 0.8324, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1412987012987013, |
|
"grad_norm": 0.3528784215450287, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 0.8969, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1496103896103896, |
|
"grad_norm": 0.39343854784965515, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.3175, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.15792207792207794, |
|
"grad_norm": 0.41462719440460205, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 1.352, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.16623376623376623, |
|
"grad_norm": 0.4772951602935791, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.3498, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17454545454545456, |
|
"grad_norm": 0.5342058539390564, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 1.309, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 0.9034503102302551, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 1.1314, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.19116883116883118, |
|
"grad_norm": 0.613511860370636, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 1.1097, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.19948051948051948, |
|
"grad_norm": 0.6167203783988953, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 0.8848, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2077922077922078, |
|
"grad_norm": 1.535559058189392, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 1.2438, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2077922077922078, |
|
"eval_loss": 1.0861386060714722, |
|
"eval_runtime": 26.149, |
|
"eval_samples_per_second": 31.015, |
|
"eval_steps_per_second": 3.901, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2161038961038961, |
|
"grad_norm": 0.23987476527690887, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9859, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.22441558441558443, |
|
"grad_norm": 0.36977705359458923, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 0.9489, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.23272727272727273, |
|
"grad_norm": 0.28386303782463074, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 0.8192, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.24103896103896105, |
|
"grad_norm": 0.28928664326667786, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 0.8652, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.24935064935064935, |
|
"grad_norm": 0.4036184549331665, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 1.0874, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25766233766233765, |
|
"grad_norm": 0.4978172481060028, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 1.3074, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.265974025974026, |
|
"grad_norm": 0.5747013688087463, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.3194, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 0.4833392798900604, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 1.2504, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2825974025974026, |
|
"grad_norm": 0.573775589466095, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.2703, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 0.5862915515899658, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 1.1775, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2992207792207792, |
|
"grad_norm": 0.483033686876297, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 0.9526, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.30753246753246755, |
|
"grad_norm": 0.6226116418838501, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 0.984, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.31584415584415587, |
|
"grad_norm": 0.2604388892650604, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.8811, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.32415584415584414, |
|
"grad_norm": 0.19333459436893463, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 0.9441, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.33246753246753247, |
|
"grad_norm": 0.201664999127388, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 0.9568, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3407792207792208, |
|
"grad_norm": 0.20325258374214172, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 0.7732, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.3490909090909091, |
|
"grad_norm": 0.2257876992225647, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.8841, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3574025974025974, |
|
"grad_norm": 0.3107191026210785, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 1.307, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.3657142857142857, |
|
"grad_norm": 0.3348431885242462, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 1.2376, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.37402597402597404, |
|
"grad_norm": 0.35244983434677124, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 1.3097, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.38233766233766237, |
|
"grad_norm": 0.3649301826953888, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 1.1649, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.39064935064935064, |
|
"grad_norm": 0.412195086479187, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 1.1256, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.39896103896103896, |
|
"grad_norm": 0.4812576174736023, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 1.0379, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.4072727272727273, |
|
"grad_norm": 0.43518587946891785, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 0.7584, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4155844155844156, |
|
"grad_norm": 0.9060344696044922, |
|
"learning_rate": 0.0, |
|
"loss": 1.162, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4155844155844156, |
|
"eval_loss": 1.05935800075531, |
|
"eval_runtime": 26.4737, |
|
"eval_samples_per_second": 30.634, |
|
"eval_steps_per_second": 3.853, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.010979195163771e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|