|
{ |
|
"best_metric": 0.0005893517518416047, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.10787486515641856, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005393743257820927, |
|
"grad_norm": 0.9549857378005981, |
|
"learning_rate": 1.001e-05, |
|
"loss": 0.2799, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005393743257820927, |
|
"eval_loss": 0.4979684054851532, |
|
"eval_runtime": 45.1708, |
|
"eval_samples_per_second": 17.29, |
|
"eval_steps_per_second": 4.339, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010787486515641855, |
|
"grad_norm": 0.6719993948936462, |
|
"learning_rate": 2.002e-05, |
|
"loss": 0.1665, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0016181229773462784, |
|
"grad_norm": 0.5584933757781982, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 0.1785, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002157497303128371, |
|
"grad_norm": 0.6560541391372681, |
|
"learning_rate": 4.004e-05, |
|
"loss": 0.1566, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002696871628910464, |
|
"grad_norm": 0.8838739991188049, |
|
"learning_rate": 5.005e-05, |
|
"loss": 0.1356, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003236245954692557, |
|
"grad_norm": 0.6256913542747498, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 0.0931, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0037756202804746495, |
|
"grad_norm": 0.6155531406402588, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 0.0667, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004314994606256742, |
|
"grad_norm": 0.584822416305542, |
|
"learning_rate": 8.008e-05, |
|
"loss": 0.0352, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0048543689320388345, |
|
"grad_norm": 1.1850945949554443, |
|
"learning_rate": 9.009e-05, |
|
"loss": 0.0139, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005393743257820928, |
|
"grad_norm": 0.36779069900512695, |
|
"learning_rate": 0.0001001, |
|
"loss": 0.0119, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005933117583603021, |
|
"grad_norm": 1.6826813220977783, |
|
"learning_rate": 9.957315789473684e-05, |
|
"loss": 0.0115, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.006472491909385114, |
|
"grad_norm": 0.3343207538127899, |
|
"learning_rate": 9.904631578947367e-05, |
|
"loss": 0.0034, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007011866235167206, |
|
"grad_norm": 0.03047369234263897, |
|
"learning_rate": 9.851947368421052e-05, |
|
"loss": 0.0005, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.007551240560949299, |
|
"grad_norm": 0.04409059137105942, |
|
"learning_rate": 9.799263157894736e-05, |
|
"loss": 0.0003, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008090614886731391, |
|
"grad_norm": 3.0516834259033203, |
|
"learning_rate": 9.746578947368421e-05, |
|
"loss": 0.0131, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008629989212513484, |
|
"grad_norm": 0.3956446945667267, |
|
"learning_rate": 9.693894736842104e-05, |
|
"loss": 0.001, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.009169363538295576, |
|
"grad_norm": 1.5148561000823975, |
|
"learning_rate": 9.641210526315789e-05, |
|
"loss": 0.0061, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.009708737864077669, |
|
"grad_norm": 0.49977800250053406, |
|
"learning_rate": 9.588526315789473e-05, |
|
"loss": 0.0017, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010248112189859764, |
|
"grad_norm": 0.012113417498767376, |
|
"learning_rate": 9.535842105263157e-05, |
|
"loss": 0.0001, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.010787486515641856, |
|
"grad_norm": 0.008402293547987938, |
|
"learning_rate": 9.483157894736841e-05, |
|
"loss": 0.0002, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011326860841423949, |
|
"grad_norm": 0.002001185668632388, |
|
"learning_rate": 9.430473684210526e-05, |
|
"loss": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.011866235167206042, |
|
"grad_norm": 0.5545786619186401, |
|
"learning_rate": 9.37778947368421e-05, |
|
"loss": 0.0068, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.012405609492988134, |
|
"grad_norm": 0.06889030337333679, |
|
"learning_rate": 9.325105263157894e-05, |
|
"loss": 0.0004, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.012944983818770227, |
|
"grad_norm": 0.15830865502357483, |
|
"learning_rate": 9.272421052631578e-05, |
|
"loss": 0.0011, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01348435814455232, |
|
"grad_norm": 0.7862672805786133, |
|
"learning_rate": 9.219736842105263e-05, |
|
"loss": 0.0063, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014023732470334413, |
|
"grad_norm": 0.009982357732951641, |
|
"learning_rate": 9.167052631578946e-05, |
|
"loss": 0.0001, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.014563106796116505, |
|
"grad_norm": 0.009793877601623535, |
|
"learning_rate": 9.114368421052632e-05, |
|
"loss": 0.0001, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.015102481121898598, |
|
"grad_norm": 0.006467557977885008, |
|
"learning_rate": 9.061684210526315e-05, |
|
"loss": 0.0001, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01564185544768069, |
|
"grad_norm": 0.012111790478229523, |
|
"learning_rate": 9.009e-05, |
|
"loss": 0.0001, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.016181229773462782, |
|
"grad_norm": 0.06000563129782677, |
|
"learning_rate": 8.956315789473683e-05, |
|
"loss": 0.0007, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.016720604099244876, |
|
"grad_norm": 0.03677884489297867, |
|
"learning_rate": 8.903631578947368e-05, |
|
"loss": 0.0007, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.017259978425026967, |
|
"grad_norm": 0.03280596062541008, |
|
"learning_rate": 8.850947368421052e-05, |
|
"loss": 0.0003, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01779935275080906, |
|
"grad_norm": 0.002885522786527872, |
|
"learning_rate": 8.798263157894736e-05, |
|
"loss": 0.0001, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.018338727076591153, |
|
"grad_norm": 0.4575079679489136, |
|
"learning_rate": 8.745578947368422e-05, |
|
"loss": 0.0156, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.018878101402373247, |
|
"grad_norm": 0.0030563841573894024, |
|
"learning_rate": 8.692894736842105e-05, |
|
"loss": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.019417475728155338, |
|
"grad_norm": 0.0014249957166612148, |
|
"learning_rate": 8.64021052631579e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.019956850053937433, |
|
"grad_norm": 0.0230252668261528, |
|
"learning_rate": 8.587526315789473e-05, |
|
"loss": 0.0001, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.020496224379719527, |
|
"grad_norm": 0.005644981749355793, |
|
"learning_rate": 8.534842105263157e-05, |
|
"loss": 0.0001, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.021035598705501618, |
|
"grad_norm": 0.02447853423655033, |
|
"learning_rate": 8.482157894736842e-05, |
|
"loss": 0.0001, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.021574973031283712, |
|
"grad_norm": 0.004401453770697117, |
|
"learning_rate": 8.429473684210525e-05, |
|
"loss": 0.0001, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.022114347357065803, |
|
"grad_norm": 0.05064772441983223, |
|
"learning_rate": 8.376789473684211e-05, |
|
"loss": 0.0004, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.022653721682847898, |
|
"grad_norm": 1.4466474056243896, |
|
"learning_rate": 8.324105263157894e-05, |
|
"loss": 0.0453, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02319309600862999, |
|
"grad_norm": 0.0030545571353286505, |
|
"learning_rate": 8.271421052631579e-05, |
|
"loss": 0.0001, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.023732470334412083, |
|
"grad_norm": 0.45695409178733826, |
|
"learning_rate": 8.218736842105262e-05, |
|
"loss": 0.0142, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.024271844660194174, |
|
"grad_norm": 0.45072630047798157, |
|
"learning_rate": 8.166052631578947e-05, |
|
"loss": 0.0007, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02481121898597627, |
|
"grad_norm": 0.20009317994117737, |
|
"learning_rate": 8.113368421052631e-05, |
|
"loss": 0.0005, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02535059331175836, |
|
"grad_norm": 0.01844087988138199, |
|
"learning_rate": 8.060684210526315e-05, |
|
"loss": 0.0001, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.025889967637540454, |
|
"grad_norm": 0.009140203706920147, |
|
"learning_rate": 8.008e-05, |
|
"loss": 0.0002, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.026429341963322545, |
|
"grad_norm": 1.665168046951294, |
|
"learning_rate": 7.955315789473684e-05, |
|
"loss": 0.0267, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02696871628910464, |
|
"grad_norm": 1.6538095474243164, |
|
"learning_rate": 7.902631578947368e-05, |
|
"loss": 0.0311, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02696871628910464, |
|
"eval_loss": 0.05669600889086723, |
|
"eval_runtime": 43.6492, |
|
"eval_samples_per_second": 17.893, |
|
"eval_steps_per_second": 4.49, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02750809061488673, |
|
"grad_norm": 2.739877223968506, |
|
"learning_rate": 7.849947368421052e-05, |
|
"loss": 0.0604, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.028047464940668825, |
|
"grad_norm": 0.010129177011549473, |
|
"learning_rate": 7.797263157894736e-05, |
|
"loss": 0.0001, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.028586839266450916, |
|
"grad_norm": 0.03367248922586441, |
|
"learning_rate": 7.744578947368421e-05, |
|
"loss": 0.0002, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02912621359223301, |
|
"grad_norm": 1.0684995651245117, |
|
"learning_rate": 7.691894736842104e-05, |
|
"loss": 0.0011, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0296655879180151, |
|
"grad_norm": 0.006163384765386581, |
|
"learning_rate": 7.63921052631579e-05, |
|
"loss": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.030204962243797196, |
|
"grad_norm": 0.004382474347949028, |
|
"learning_rate": 7.586526315789473e-05, |
|
"loss": 0.0001, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.030744336569579287, |
|
"grad_norm": 0.006588236894458532, |
|
"learning_rate": 7.533842105263158e-05, |
|
"loss": 0.0001, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03128371089536138, |
|
"grad_norm": 0.019274592399597168, |
|
"learning_rate": 7.481157894736841e-05, |
|
"loss": 0.0001, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03182308522114347, |
|
"grad_norm": 0.006948168855160475, |
|
"learning_rate": 7.428473684210526e-05, |
|
"loss": 0.0001, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.032362459546925564, |
|
"grad_norm": 0.001927727716974914, |
|
"learning_rate": 7.375789473684209e-05, |
|
"loss": 0.0001, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03290183387270766, |
|
"grad_norm": 0.05395427718758583, |
|
"learning_rate": 7.323105263157895e-05, |
|
"loss": 0.0004, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03344120819848975, |
|
"grad_norm": 0.14390406012535095, |
|
"learning_rate": 7.270421052631578e-05, |
|
"loss": 0.0006, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03398058252427184, |
|
"grad_norm": 0.04436345770955086, |
|
"learning_rate": 7.217736842105263e-05, |
|
"loss": 0.0004, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.034519956850053934, |
|
"grad_norm": 0.005591034423559904, |
|
"learning_rate": 7.165052631578947e-05, |
|
"loss": 0.0001, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03505933117583603, |
|
"grad_norm": 1.5156598091125488, |
|
"learning_rate": 7.11236842105263e-05, |
|
"loss": 0.0033, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03559870550161812, |
|
"grad_norm": 0.20944921672344208, |
|
"learning_rate": 7.059684210526315e-05, |
|
"loss": 0.0015, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.036138079827400214, |
|
"grad_norm": 0.02203325368463993, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 0.0002, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.036677454153182305, |
|
"grad_norm": 0.012391135096549988, |
|
"learning_rate": 6.954315789473684e-05, |
|
"loss": 0.0001, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0372168284789644, |
|
"grad_norm": 0.003814356168732047, |
|
"learning_rate": 6.901631578947368e-05, |
|
"loss": 0.0001, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.037756202804746494, |
|
"grad_norm": 0.43400076031684875, |
|
"learning_rate": 6.848947368421052e-05, |
|
"loss": 0.0108, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.038295577130528585, |
|
"grad_norm": 0.001942815724760294, |
|
"learning_rate": 6.796263157894737e-05, |
|
"loss": 0.0001, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.038834951456310676, |
|
"grad_norm": 0.007045631296932697, |
|
"learning_rate": 6.74357894736842e-05, |
|
"loss": 0.0001, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.039374325782092774, |
|
"grad_norm": 0.0019868926610797644, |
|
"learning_rate": 6.690894736842105e-05, |
|
"loss": 0.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.039913700107874865, |
|
"grad_norm": 0.3928805887699127, |
|
"learning_rate": 6.638210526315788e-05, |
|
"loss": 0.001, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.040453074433656956, |
|
"grad_norm": 0.00091419683303684, |
|
"learning_rate": 6.585526315789474e-05, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.040992448759439054, |
|
"grad_norm": 0.00108022999484092, |
|
"learning_rate": 6.532842105263157e-05, |
|
"loss": 0.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.041531823085221145, |
|
"grad_norm": 0.004014655947685242, |
|
"learning_rate": 6.480157894736842e-05, |
|
"loss": 0.0001, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.042071197411003236, |
|
"grad_norm": 0.0030816688667982817, |
|
"learning_rate": 6.427473684210526e-05, |
|
"loss": 0.0001, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04261057173678533, |
|
"grad_norm": 0.0016585150733590126, |
|
"learning_rate": 6.37478947368421e-05, |
|
"loss": 0.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.043149946062567425, |
|
"grad_norm": 0.00667704688385129, |
|
"learning_rate": 6.322105263157894e-05, |
|
"loss": 0.0001, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.043689320388349516, |
|
"grad_norm": 0.007074718829244375, |
|
"learning_rate": 6.269421052631577e-05, |
|
"loss": 0.0001, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04422869471413161, |
|
"grad_norm": 0.002394700888544321, |
|
"learning_rate": 6.216736842105263e-05, |
|
"loss": 0.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0447680690399137, |
|
"grad_norm": 0.0016268681501969695, |
|
"learning_rate": 6.164052631578947e-05, |
|
"loss": 0.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.045307443365695796, |
|
"grad_norm": 0.00599514739587903, |
|
"learning_rate": 6.111368421052631e-05, |
|
"loss": 0.0001, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04584681769147789, |
|
"grad_norm": 0.0022963311057537794, |
|
"learning_rate": 6.058684210526315e-05, |
|
"loss": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04638619201725998, |
|
"grad_norm": 0.007355900481343269, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 0.0001, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04692556634304207, |
|
"grad_norm": 0.02201763354241848, |
|
"learning_rate": 5.953315789473684e-05, |
|
"loss": 0.0002, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04746494066882417, |
|
"grad_norm": 0.005624725949019194, |
|
"learning_rate": 5.9006315789473676e-05, |
|
"loss": 0.0001, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04800431499460626, |
|
"grad_norm": 0.04811863973736763, |
|
"learning_rate": 5.847947368421053e-05, |
|
"loss": 0.0002, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.04854368932038835, |
|
"grad_norm": 0.0029467041604220867, |
|
"learning_rate": 5.795263157894737e-05, |
|
"loss": 0.0001, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04908306364617044, |
|
"grad_norm": 0.03302125632762909, |
|
"learning_rate": 5.742578947368421e-05, |
|
"loss": 0.0001, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04962243797195254, |
|
"grad_norm": 0.0017132150242105126, |
|
"learning_rate": 5.6898947368421046e-05, |
|
"loss": 0.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05016181229773463, |
|
"grad_norm": 0.0011448146542534232, |
|
"learning_rate": 5.6372105263157886e-05, |
|
"loss": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05070118662351672, |
|
"grad_norm": 0.0019763971213251352, |
|
"learning_rate": 5.584526315789473e-05, |
|
"loss": 0.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05124056094929881, |
|
"grad_norm": 0.0009319214150309563, |
|
"learning_rate": 5.531842105263158e-05, |
|
"loss": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05177993527508091, |
|
"grad_norm": 0.03375813364982605, |
|
"learning_rate": 5.4791578947368424e-05, |
|
"loss": 0.0005, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.052319309600863, |
|
"grad_norm": 0.16154968738555908, |
|
"learning_rate": 5.426473684210526e-05, |
|
"loss": 0.0025, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.05285868392664509, |
|
"grad_norm": 0.0034327851608395576, |
|
"learning_rate": 5.37378947368421e-05, |
|
"loss": 0.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.05339805825242718, |
|
"grad_norm": 0.0067018670961260796, |
|
"learning_rate": 5.321105263157894e-05, |
|
"loss": 0.0001, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05393743257820928, |
|
"grad_norm": 0.21445177495479584, |
|
"learning_rate": 5.268421052631578e-05, |
|
"loss": 0.0017, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05393743257820928, |
|
"eval_loss": 0.0010083840461447835, |
|
"eval_runtime": 43.6657, |
|
"eval_samples_per_second": 17.886, |
|
"eval_steps_per_second": 4.489, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05447680690399137, |
|
"grad_norm": 0.459598571062088, |
|
"learning_rate": 5.2157368421052626e-05, |
|
"loss": 0.0081, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.05501618122977346, |
|
"grad_norm": 0.0010549342259764671, |
|
"learning_rate": 5.163052631578947e-05, |
|
"loss": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 0.002258673310279846, |
|
"learning_rate": 5.110368421052632e-05, |
|
"loss": 0.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.05609492988133765, |
|
"grad_norm": 0.0001912364095915109, |
|
"learning_rate": 5.057684210526316e-05, |
|
"loss": 0.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05663430420711974, |
|
"grad_norm": 0.0003946495126001537, |
|
"learning_rate": 5.005e-05, |
|
"loss": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05717367853290183, |
|
"grad_norm": 0.0004527179116848856, |
|
"learning_rate": 4.9523157894736836e-05, |
|
"loss": 0.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.05771305285868392, |
|
"grad_norm": 0.0007224463624879718, |
|
"learning_rate": 4.899631578947368e-05, |
|
"loss": 0.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.05825242718446602, |
|
"grad_norm": 0.0027744832914322615, |
|
"learning_rate": 4.846947368421052e-05, |
|
"loss": 0.0001, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05879180151024811, |
|
"grad_norm": 0.0003102615592069924, |
|
"learning_rate": 4.794263157894737e-05, |
|
"loss": 0.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0593311758360302, |
|
"grad_norm": 0.00021459658455569297, |
|
"learning_rate": 4.7415789473684206e-05, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.059870550161812294, |
|
"grad_norm": 0.00045238586608320475, |
|
"learning_rate": 4.688894736842105e-05, |
|
"loss": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06040992448759439, |
|
"grad_norm": 0.00048040415276773274, |
|
"learning_rate": 4.636210526315789e-05, |
|
"loss": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06094929881337648, |
|
"grad_norm": 0.00028796499827876687, |
|
"learning_rate": 4.583526315789473e-05, |
|
"loss": 0.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.061488673139158574, |
|
"grad_norm": 0.0009007256594486535, |
|
"learning_rate": 4.530842105263158e-05, |
|
"loss": 0.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06202804746494067, |
|
"grad_norm": 0.001204671454615891, |
|
"learning_rate": 4.4781578947368416e-05, |
|
"loss": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06256742179072276, |
|
"grad_norm": 0.007920761592686176, |
|
"learning_rate": 4.425473684210526e-05, |
|
"loss": 0.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06310679611650485, |
|
"grad_norm": 0.000548014766536653, |
|
"learning_rate": 4.372789473684211e-05, |
|
"loss": 0.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.06364617044228695, |
|
"grad_norm": 0.0007022125646471977, |
|
"learning_rate": 4.320105263157895e-05, |
|
"loss": 0.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06418554476806904, |
|
"grad_norm": 0.000792205857578665, |
|
"learning_rate": 4.2674210526315786e-05, |
|
"loss": 0.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.06472491909385113, |
|
"grad_norm": 0.0005556220421567559, |
|
"learning_rate": 4.2147368421052626e-05, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06526429341963323, |
|
"grad_norm": 0.0013536950573325157, |
|
"learning_rate": 4.162052631578947e-05, |
|
"loss": 0.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.06580366774541532, |
|
"grad_norm": 0.001718906918540597, |
|
"learning_rate": 4.109368421052631e-05, |
|
"loss": 0.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06634304207119741, |
|
"grad_norm": 0.016501938924193382, |
|
"learning_rate": 4.056684210526316e-05, |
|
"loss": 0.0001, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0668824163969795, |
|
"grad_norm": 0.2795630395412445, |
|
"learning_rate": 4.004e-05, |
|
"loss": 0.0005, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0674217907227616, |
|
"grad_norm": 0.035502709448337555, |
|
"learning_rate": 3.951315789473684e-05, |
|
"loss": 0.0001, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06796116504854369, |
|
"grad_norm": 0.002328946255147457, |
|
"learning_rate": 3.898631578947368e-05, |
|
"loss": 0.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.06850053937432578, |
|
"grad_norm": 0.0006106572109274566, |
|
"learning_rate": 3.845947368421052e-05, |
|
"loss": 0.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.06903991370010787, |
|
"grad_norm": 0.0013282396830618382, |
|
"learning_rate": 3.7932631578947367e-05, |
|
"loss": 0.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.06957928802588997, |
|
"grad_norm": 0.41567087173461914, |
|
"learning_rate": 3.7405789473684206e-05, |
|
"loss": 0.0035, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07011866235167206, |
|
"grad_norm": 0.0007774877594783902, |
|
"learning_rate": 3.6878947368421045e-05, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07065803667745416, |
|
"grad_norm": 0.05387866869568825, |
|
"learning_rate": 3.635210526315789e-05, |
|
"loss": 0.0006, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07119741100323625, |
|
"grad_norm": 0.0006187845719978213, |
|
"learning_rate": 3.582526315789474e-05, |
|
"loss": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07173678532901834, |
|
"grad_norm": 0.3249104917049408, |
|
"learning_rate": 3.5298421052631576e-05, |
|
"loss": 0.0058, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.07227615965480043, |
|
"grad_norm": 0.0009034304530359805, |
|
"learning_rate": 3.477157894736842e-05, |
|
"loss": 0.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07281553398058252, |
|
"grad_norm": 0.0008392046438530087, |
|
"learning_rate": 3.424473684210526e-05, |
|
"loss": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07335490830636461, |
|
"grad_norm": 0.001984312431886792, |
|
"learning_rate": 3.37178947368421e-05, |
|
"loss": 0.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07389428263214672, |
|
"grad_norm": 0.05136461928486824, |
|
"learning_rate": 3.319105263157894e-05, |
|
"loss": 0.0003, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0744336569579288, |
|
"grad_norm": 0.0010661619016900659, |
|
"learning_rate": 3.2664210526315786e-05, |
|
"loss": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0749730312837109, |
|
"grad_norm": 0.007086303550750017, |
|
"learning_rate": 3.213736842105263e-05, |
|
"loss": 0.0001, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.07551240560949299, |
|
"grad_norm": 0.0012311493046581745, |
|
"learning_rate": 3.161052631578947e-05, |
|
"loss": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07605177993527508, |
|
"grad_norm": 0.0016970309661701322, |
|
"learning_rate": 3.108368421052632e-05, |
|
"loss": 0.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.07659115426105717, |
|
"grad_norm": 0.6121507883071899, |
|
"learning_rate": 3.0556842105263156e-05, |
|
"loss": 0.0016, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.07713052858683926, |
|
"grad_norm": 0.0011852330062538385, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 0.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.07766990291262135, |
|
"grad_norm": 0.0008150951471179724, |
|
"learning_rate": 2.9503157894736838e-05, |
|
"loss": 0.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.07820927723840346, |
|
"grad_norm": 0.0009472601232118905, |
|
"learning_rate": 2.8976315789473684e-05, |
|
"loss": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.07874865156418555, |
|
"grad_norm": 0.003503723070025444, |
|
"learning_rate": 2.8449473684210523e-05, |
|
"loss": 0.0001, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.07928802588996764, |
|
"grad_norm": 0.0022005646023899317, |
|
"learning_rate": 2.7922631578947366e-05, |
|
"loss": 0.0001, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.07982740021574973, |
|
"grad_norm": 0.5109080672264099, |
|
"learning_rate": 2.7395789473684212e-05, |
|
"loss": 0.0101, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08036677454153182, |
|
"grad_norm": 0.0015276124468073249, |
|
"learning_rate": 2.686894736842105e-05, |
|
"loss": 0.0001, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08090614886731391, |
|
"grad_norm": 0.0025751348584890366, |
|
"learning_rate": 2.634210526315789e-05, |
|
"loss": 0.0001, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08090614886731391, |
|
"eval_loss": 0.0005893517518416047, |
|
"eval_runtime": 43.6869, |
|
"eval_samples_per_second": 17.877, |
|
"eval_steps_per_second": 4.486, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.081445523193096, |
|
"grad_norm": 0.0056745377369225025, |
|
"learning_rate": 2.5815263157894736e-05, |
|
"loss": 0.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.08198489751887811, |
|
"grad_norm": 0.00784875638782978, |
|
"learning_rate": 2.528842105263158e-05, |
|
"loss": 0.0001, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.0825242718446602, |
|
"grad_norm": 0.06929171085357666, |
|
"learning_rate": 2.4761578947368418e-05, |
|
"loss": 0.0015, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.08306364617044229, |
|
"grad_norm": 0.0055921077728271484, |
|
"learning_rate": 2.423473684210526e-05, |
|
"loss": 0.0001, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.08360302049622438, |
|
"grad_norm": 0.002456638030707836, |
|
"learning_rate": 2.3707894736842103e-05, |
|
"loss": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08414239482200647, |
|
"grad_norm": 0.016230836510658264, |
|
"learning_rate": 2.3181052631578946e-05, |
|
"loss": 0.0002, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.08468176914778856, |
|
"grad_norm": 0.0014073759084567428, |
|
"learning_rate": 2.265421052631579e-05, |
|
"loss": 0.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.08522114347357065, |
|
"grad_norm": 0.18957898020744324, |
|
"learning_rate": 2.212736842105263e-05, |
|
"loss": 0.002, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.08576051779935275, |
|
"grad_norm": 0.001173275290057063, |
|
"learning_rate": 2.1600526315789474e-05, |
|
"loss": 0.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.08629989212513485, |
|
"grad_norm": 0.0007326776976697147, |
|
"learning_rate": 2.1073684210526313e-05, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08683926645091694, |
|
"grad_norm": 0.008999370969831944, |
|
"learning_rate": 2.0546842105263155e-05, |
|
"loss": 0.0001, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.08737864077669903, |
|
"grad_norm": 0.0007513080490753055, |
|
"learning_rate": 2.002e-05, |
|
"loss": 0.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.08791801510248112, |
|
"grad_norm": 0.0003164597728755325, |
|
"learning_rate": 1.949315789473684e-05, |
|
"loss": 0.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.08845738942826321, |
|
"grad_norm": 0.0006100442842580378, |
|
"learning_rate": 1.8966315789473683e-05, |
|
"loss": 0.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0889967637540453, |
|
"grad_norm": 0.002358382800593972, |
|
"learning_rate": 1.8439473684210522e-05, |
|
"loss": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0895361380798274, |
|
"grad_norm": 0.0008702076738700271, |
|
"learning_rate": 1.791263157894737e-05, |
|
"loss": 0.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.09007551240560949, |
|
"grad_norm": 0.0013868837850168347, |
|
"learning_rate": 1.738578947368421e-05, |
|
"loss": 0.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.09061488673139159, |
|
"grad_norm": 0.0006682085804641247, |
|
"learning_rate": 1.685894736842105e-05, |
|
"loss": 0.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.09115426105717368, |
|
"grad_norm": 0.0009037026320584118, |
|
"learning_rate": 1.6332105263157893e-05, |
|
"loss": 0.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.09169363538295577, |
|
"grad_norm": 0.0003754556237254292, |
|
"learning_rate": 1.5805263157894735e-05, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09223300970873786, |
|
"grad_norm": 0.0007883550715632737, |
|
"learning_rate": 1.5278421052631578e-05, |
|
"loss": 0.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.09277238403451996, |
|
"grad_norm": 0.00029830558924004436, |
|
"learning_rate": 1.4751578947368419e-05, |
|
"loss": 0.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.09331175836030205, |
|
"grad_norm": 0.00025384570471942425, |
|
"learning_rate": 1.4224736842105262e-05, |
|
"loss": 0.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.09385113268608414, |
|
"grad_norm": 0.000792273145634681, |
|
"learning_rate": 1.3697894736842106e-05, |
|
"loss": 0.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.09439050701186623, |
|
"grad_norm": 0.00023794101434759796, |
|
"learning_rate": 1.3171052631578945e-05, |
|
"loss": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09492988133764833, |
|
"grad_norm": 0.0003109975659754127, |
|
"learning_rate": 1.264421052631579e-05, |
|
"loss": 0.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.09546925566343042, |
|
"grad_norm": 0.0019688678439706564, |
|
"learning_rate": 1.211736842105263e-05, |
|
"loss": 0.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.09600862998921252, |
|
"grad_norm": 0.00809526164084673, |
|
"learning_rate": 1.1590526315789473e-05, |
|
"loss": 0.0001, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0965480043149946, |
|
"grad_norm": 0.0008922089473344386, |
|
"learning_rate": 1.1063684210526316e-05, |
|
"loss": 0.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.0970873786407767, |
|
"grad_norm": 0.0005603457102552056, |
|
"learning_rate": 1.0536842105263156e-05, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09762675296655879, |
|
"grad_norm": 0.001602335018105805, |
|
"learning_rate": 1.001e-05, |
|
"loss": 0.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.09816612729234088, |
|
"grad_norm": 0.000997183844447136, |
|
"learning_rate": 9.483157894736842e-06, |
|
"loss": 0.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.09870550161812297, |
|
"grad_norm": 0.0009953331900760531, |
|
"learning_rate": 8.956315789473684e-06, |
|
"loss": 0.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.09924487594390508, |
|
"grad_norm": 0.0015611740527674556, |
|
"learning_rate": 8.429473684210525e-06, |
|
"loss": 0.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.09978425026968717, |
|
"grad_norm": 0.08797088265419006, |
|
"learning_rate": 7.902631578947368e-06, |
|
"loss": 0.0011, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10032362459546926, |
|
"grad_norm": 0.0015272749587893486, |
|
"learning_rate": 7.3757894736842095e-06, |
|
"loss": 0.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.10086299892125135, |
|
"grad_norm": 0.007411181926727295, |
|
"learning_rate": 6.848947368421053e-06, |
|
"loss": 0.0001, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.10140237324703344, |
|
"grad_norm": 0.001111446414142847, |
|
"learning_rate": 6.322105263157895e-06, |
|
"loss": 0.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.10194174757281553, |
|
"grad_norm": 0.0008587180054746568, |
|
"learning_rate": 5.7952631578947365e-06, |
|
"loss": 0.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.10248112189859762, |
|
"grad_norm": 0.008806944824755192, |
|
"learning_rate": 5.268421052631578e-06, |
|
"loss": 0.0001, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10302049622437973, |
|
"grad_norm": 0.059028804302215576, |
|
"learning_rate": 4.741578947368421e-06, |
|
"loss": 0.0008, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.10355987055016182, |
|
"grad_norm": 0.0017502488335594535, |
|
"learning_rate": 4.2147368421052626e-06, |
|
"loss": 0.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.10409924487594391, |
|
"grad_norm": 0.0008180232835002244, |
|
"learning_rate": 3.6878947368421047e-06, |
|
"loss": 0.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.104638619201726, |
|
"grad_norm": 0.0005253468989394605, |
|
"learning_rate": 3.1610526315789474e-06, |
|
"loss": 0.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.10517799352750809, |
|
"grad_norm": 0.007774029858410358, |
|
"learning_rate": 2.634210526315789e-06, |
|
"loss": 0.0001, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.10571736785329018, |
|
"grad_norm": 0.001689290627837181, |
|
"learning_rate": 2.1073684210526313e-06, |
|
"loss": 0.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.10625674217907227, |
|
"grad_norm": 0.005365308839827776, |
|
"learning_rate": 1.5805263157894737e-06, |
|
"loss": 0.0001, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.10679611650485436, |
|
"grad_norm": 0.0035137736704200506, |
|
"learning_rate": 1.0536842105263156e-06, |
|
"loss": 0.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.10733549083063647, |
|
"grad_norm": 0.0472775362432003, |
|
"learning_rate": 5.268421052631578e-07, |
|
"loss": 0.0005, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.10787486515641856, |
|
"grad_norm": 0.029273375868797302, |
|
"learning_rate": 0.0, |
|
"loss": 0.0003, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10787486515641856, |
|
"eval_loss": 0.000882925814948976, |
|
"eval_runtime": 43.7697, |
|
"eval_samples_per_second": 17.843, |
|
"eval_steps_per_second": 4.478, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.580280733197926e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|