|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 5871, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005109862033725089, |
|
"grad_norm": 57.87140382113745, |
|
"learning_rate": 5e-06, |
|
"loss": 2.3817, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010219724067450179, |
|
"grad_norm": 22.753974610258698, |
|
"learning_rate": 5e-06, |
|
"loss": 1.7816, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015329586101175269, |
|
"grad_norm": 10.200028852824934, |
|
"learning_rate": 5e-06, |
|
"loss": 1.4328, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020439448134900357, |
|
"grad_norm": 10.263199576103078, |
|
"learning_rate": 5e-06, |
|
"loss": 1.2383, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.025549310168625446, |
|
"grad_norm": 4.8615267890156915, |
|
"learning_rate": 5e-06, |
|
"loss": 1.131, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.030659172202350538, |
|
"grad_norm": 4.118120498213385, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0356, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03576903423607562, |
|
"grad_norm": 2.8275806453598338, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0006, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.040878896269800714, |
|
"grad_norm": 2.315792934153761, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9655, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.045988758303525806, |
|
"grad_norm": 2.165672978783245, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9467, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05109862033725089, |
|
"grad_norm": 1.5488035180810096, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9281, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05620848237097598, |
|
"grad_norm": 1.7243341087325224, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9128, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.061318344404701075, |
|
"grad_norm": 1.5359895854005425, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9031, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06642820643842616, |
|
"grad_norm": 1.435850065371256, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8922, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07153806847215124, |
|
"grad_norm": 1.2790521202391505, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8765, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07664793050587634, |
|
"grad_norm": 1.5917071745489466, |
|
"learning_rate": 5e-06, |
|
"loss": 0.883, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08175779253960143, |
|
"grad_norm": 1.4686990635112733, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8874, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08686765457332651, |
|
"grad_norm": 1.2956583323307207, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8411, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09197751660705161, |
|
"grad_norm": 1.2357587318309058, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8726, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0970873786407767, |
|
"grad_norm": 1.1075519305139339, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8548, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10219724067450178, |
|
"grad_norm": 1.086727474202395, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8493, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10730710270822688, |
|
"grad_norm": 1.0270014162441539, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8545, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11241696474195197, |
|
"grad_norm": 0.8505362329385927, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8388, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11752682677567705, |
|
"grad_norm": 1.109098072362057, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8384, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12263668880940215, |
|
"grad_norm": 1.2809391093206264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8471, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12774655084312725, |
|
"grad_norm": 0.9245822751345097, |
|
"learning_rate": 5e-06, |
|
"loss": 0.836, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13285641287685232, |
|
"grad_norm": 0.8873874884959316, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8283, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13796627491057742, |
|
"grad_norm": 0.6839627248028421, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8191, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1430761369443025, |
|
"grad_norm": 0.8006134542844254, |
|
"learning_rate": 5e-06, |
|
"loss": 0.82, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1481859989780276, |
|
"grad_norm": 0.7249813175028869, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8254, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1532958610117527, |
|
"grad_norm": 0.611471064933341, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8148, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15840572304547776, |
|
"grad_norm": 0.8403753613185879, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8091, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16351558507920286, |
|
"grad_norm": 0.5250234838001376, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8328, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16862544711292796, |
|
"grad_norm": 0.5411140081911021, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8171, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17373530914665303, |
|
"grad_norm": 0.7002721154510084, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8073, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.17884517118037813, |
|
"grad_norm": 0.7366019714677299, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8139, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18395503321410323, |
|
"grad_norm": 0.4960107622038265, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8195, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1890648952478283, |
|
"grad_norm": 0.590921742782821, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8131, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1941747572815534, |
|
"grad_norm": 0.43046345355299165, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8166, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1992846193152785, |
|
"grad_norm": 0.37156340060616266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8082, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20439448134900357, |
|
"grad_norm": 0.5029827349762587, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8187, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20950434338272866, |
|
"grad_norm": 0.39268177137347837, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8132, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21461420541645376, |
|
"grad_norm": 0.38787210509179787, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8153, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.21972406745017883, |
|
"grad_norm": 0.5073537687615494, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8032, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22483392948390393, |
|
"grad_norm": 0.36232141966438186, |
|
"learning_rate": 5e-06, |
|
"loss": 0.786, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.22994379151762903, |
|
"grad_norm": 0.3362374048828632, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8019, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2350536535513541, |
|
"grad_norm": 0.3661481336901712, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8012, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2401635155850792, |
|
"grad_norm": 0.3186478374299407, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8088, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2452733776188043, |
|
"grad_norm": 0.3742412176358808, |
|
"learning_rate": 5e-06, |
|
"loss": 0.795, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2503832396525294, |
|
"grad_norm": 0.3687134051282934, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7899, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2554931016862545, |
|
"grad_norm": 0.2844684124400157, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7992, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26060296371997954, |
|
"grad_norm": 0.3445705328013273, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7908, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26571282575370464, |
|
"grad_norm": 0.3100104564359116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7874, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27082268778742974, |
|
"grad_norm": 0.32495110543783357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7982, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.27593254982115484, |
|
"grad_norm": 0.27997656185948167, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7954, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28104241185487994, |
|
"grad_norm": 0.4055280072220847, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7895, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.286152273888605, |
|
"grad_norm": 0.2751499734062912, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7931, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2912621359223301, |
|
"grad_norm": 0.3232277546201772, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7972, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2963719979560552, |
|
"grad_norm": 0.2915040535303487, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7912, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3014818599897803, |
|
"grad_norm": 0.2637267252402153, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7903, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3065917220235054, |
|
"grad_norm": 0.329139944193737, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7906, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3117015840572305, |
|
"grad_norm": 0.24558162809212852, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7947, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3168114460909555, |
|
"grad_norm": 0.27224578177967845, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7998, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3219213081246806, |
|
"grad_norm": 0.2379116871443806, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7702, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3270311701584057, |
|
"grad_norm": 0.2609800304013704, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7799, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3321410321921308, |
|
"grad_norm": 0.23810454322283608, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7897, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3372508942258559, |
|
"grad_norm": 0.252487995051141, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7726, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.342360756259581, |
|
"grad_norm": 0.24925719360242624, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7978, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.34747061829330605, |
|
"grad_norm": 0.2656149332274455, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7915, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.35258048032703115, |
|
"grad_norm": 0.2661464494899381, |
|
"learning_rate": 5e-06, |
|
"loss": 0.78, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.35769034236075625, |
|
"grad_norm": 0.20145837945729328, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7734, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.36280020439448135, |
|
"grad_norm": 0.26148666494272177, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7792, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.36791006642820645, |
|
"grad_norm": 0.24346683962909085, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7816, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.37301992846193155, |
|
"grad_norm": 0.22278218589963927, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7827, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3781297904956566, |
|
"grad_norm": 0.2303051334399858, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7707, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3832396525293817, |
|
"grad_norm": 0.2543406962739334, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7721, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3883495145631068, |
|
"grad_norm": 0.22568609222745584, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7836, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3934593765968319, |
|
"grad_norm": 0.2294845661519681, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7872, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.398569238630557, |
|
"grad_norm": 0.2197759747155408, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7681, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4036791006642821, |
|
"grad_norm": 0.22484907906751414, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7797, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.40878896269800713, |
|
"grad_norm": 0.21381439198625019, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7891, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.41389882473173223, |
|
"grad_norm": 0.21726381466920616, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7779, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4190086867654573, |
|
"grad_norm": 0.21218649602843365, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7756, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4241185487991824, |
|
"grad_norm": 0.2321703903701098, |
|
"learning_rate": 5e-06, |
|
"loss": 0.778, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4292284108329075, |
|
"grad_norm": 0.21376686462742833, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7754, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4343382728666326, |
|
"grad_norm": 0.20773121342846562, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7729, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.43944813490035767, |
|
"grad_norm": 0.23075209429146118, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7743, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.44455799693408277, |
|
"grad_norm": 0.21889813942522324, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7872, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.44966785896780787, |
|
"grad_norm": 0.23158111213452945, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7729, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.45477772100153296, |
|
"grad_norm": 0.23759359366214816, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7781, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.45988758303525806, |
|
"grad_norm": 0.20737512188117463, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7733, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.46499744506898316, |
|
"grad_norm": 0.21978896076440746, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7724, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4701073071027082, |
|
"grad_norm": 0.21326048741734183, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7779, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4752171691364333, |
|
"grad_norm": 0.2127733995734116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7862, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4803270311701584, |
|
"grad_norm": 0.20545254783011138, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7741, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4854368932038835, |
|
"grad_norm": 0.22401938020775877, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7879, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4905467552376086, |
|
"grad_norm": 0.20313108726519555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7795, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4956566172713337, |
|
"grad_norm": 0.20676236489485517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7658, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5007664793050588, |
|
"grad_norm": 0.1997349100496237, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7665, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5058763413387839, |
|
"grad_norm": 0.18582330695996696, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7678, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.510986203372509, |
|
"grad_norm": 0.20480578385489007, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7916, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.516096065406234, |
|
"grad_norm": 0.2068595244192038, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7728, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5212059274399591, |
|
"grad_norm": 0.19202510728167327, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7875, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.20142193391262955, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7691, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5314256515074093, |
|
"grad_norm": 0.19153357968487428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7717, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5365355135411344, |
|
"grad_norm": 0.21070221575444148, |
|
"learning_rate": 5e-06, |
|
"loss": 0.771, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5416453755748595, |
|
"grad_norm": 0.19388651883167224, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7674, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5467552376085846, |
|
"grad_norm": 0.19235966239305358, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7639, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5518650996423097, |
|
"grad_norm": 0.2020128053771939, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7838, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5569749616760348, |
|
"grad_norm": 0.19898211373916952, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7734, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5620848237097599, |
|
"grad_norm": 0.19556108367687597, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7668, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.567194685743485, |
|
"grad_norm": 0.19409586285487196, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7778, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.57230454777721, |
|
"grad_norm": 0.20681598650606026, |
|
"learning_rate": 5e-06, |
|
"loss": 0.779, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5774144098109351, |
|
"grad_norm": 0.2017546177375358, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7573, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5825242718446602, |
|
"grad_norm": 0.1913755514728684, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7625, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5876341338783853, |
|
"grad_norm": 0.18303523769215124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7752, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5927439959121104, |
|
"grad_norm": 0.19646699079126273, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7734, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5978538579458355, |
|
"grad_norm": 0.18589450588787854, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7631, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6029637199795606, |
|
"grad_norm": 0.19141416094028266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7721, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6080735820132857, |
|
"grad_norm": 0.2010700371236207, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7723, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6131834440470108, |
|
"grad_norm": 0.209693144160535, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7709, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6182933060807358, |
|
"grad_norm": 0.19006264249604685, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7731, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.623403168114461, |
|
"grad_norm": 0.19625031587081004, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7683, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.628513030148186, |
|
"grad_norm": 0.19078272960681916, |
|
"learning_rate": 5e-06, |
|
"loss": 0.773, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.633622892181911, |
|
"grad_norm": 0.2007688157803181, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7533, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6387327542156361, |
|
"grad_norm": 0.20004504142951482, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7721, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6438426162493612, |
|
"grad_norm": 0.18940428161938747, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7525, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6489524782830863, |
|
"grad_norm": 0.19192245014604287, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7743, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6540623403168114, |
|
"grad_norm": 0.18803291404020508, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7781, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6591722023505365, |
|
"grad_norm": 0.19034763073417169, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7609, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6642820643842616, |
|
"grad_norm": 0.2000260745898994, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7493, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6693919264179867, |
|
"grad_norm": 0.19721212735320226, |
|
"learning_rate": 5e-06, |
|
"loss": 0.759, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6745017884517118, |
|
"grad_norm": 0.1840594216152482, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7554, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6796116504854369, |
|
"grad_norm": 0.19294533218500587, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7601, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.684721512519162, |
|
"grad_norm": 0.2003437533474394, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7665, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6898313745528871, |
|
"grad_norm": 0.19522682692250634, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7682, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6949412365866121, |
|
"grad_norm": 0.19114649796975278, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7675, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7000510986203372, |
|
"grad_norm": 0.18795490185304542, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7514, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7051609606540623, |
|
"grad_norm": 0.19982770311498202, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7587, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7102708226877874, |
|
"grad_norm": 0.2094868748722276, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7424, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7153806847215125, |
|
"grad_norm": 0.18141939898186682, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7204905467552376, |
|
"grad_norm": 0.184317543793234, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7659, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7256004087889627, |
|
"grad_norm": 0.19482359157799217, |
|
"learning_rate": 5e-06, |
|
"loss": 0.766, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7307102708226878, |
|
"grad_norm": 0.20609416559465576, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7591, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7358201328564129, |
|
"grad_norm": 0.1733817918744796, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7657, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.740929994890138, |
|
"grad_norm": 0.20231819059208814, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7693, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7460398569238631, |
|
"grad_norm": 0.19384075901742115, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7677, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7511497189575882, |
|
"grad_norm": 0.20242534213073207, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7658, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7562595809913132, |
|
"grad_norm": 0.18992152096280124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7604, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7613694430250383, |
|
"grad_norm": 0.20300312286644698, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7654, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7664793050587634, |
|
"grad_norm": 0.2110214114358105, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7579, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7715891670924885, |
|
"grad_norm": 0.18507470375993035, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7535, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7766990291262136, |
|
"grad_norm": 0.20773697020515, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7681, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7818088911599387, |
|
"grad_norm": 0.1846942481775099, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7589, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7869187531936638, |
|
"grad_norm": 0.19873266556747132, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7701, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7920286152273889, |
|
"grad_norm": 0.1889722343751879, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7637, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.797138477261114, |
|
"grad_norm": 0.2160555515101488, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7459, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8022483392948391, |
|
"grad_norm": 0.18958425843094595, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7499, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8073582013285642, |
|
"grad_norm": 0.18917128647246217, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7572, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8124680633622893, |
|
"grad_norm": 0.17914225162735836, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7506, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8175779253960143, |
|
"grad_norm": 0.19189511614416635, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7613, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8226877874297394, |
|
"grad_norm": 0.19059344363394998, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7604, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8277976494634645, |
|
"grad_norm": 0.20852250489781288, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7737, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8329075114971896, |
|
"grad_norm": 0.1877964215413997, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7725, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8380173735309147, |
|
"grad_norm": 0.18419774730049385, |
|
"learning_rate": 5e-06, |
|
"loss": 0.757, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8431272355646398, |
|
"grad_norm": 0.1926784804428757, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7515, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8482370975983649, |
|
"grad_norm": 0.19627214391258455, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7718, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.85334695963209, |
|
"grad_norm": 0.1944045450874202, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7449, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.858456821665815, |
|
"grad_norm": 0.18125259142482736, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7604, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8635666836995401, |
|
"grad_norm": 0.17817637475202877, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7641, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8686765457332652, |
|
"grad_norm": 0.20471658407314103, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7734, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8737864077669902, |
|
"grad_norm": 0.1878161208418682, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7696, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8788962698007153, |
|
"grad_norm": 0.18334210713527221, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7536, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8840061318344404, |
|
"grad_norm": 0.18076699199021762, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7522, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8891159938681655, |
|
"grad_norm": 0.19181861440876702, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7625, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8942258559018906, |
|
"grad_norm": 0.17965104525055625, |
|
"learning_rate": 5e-06, |
|
"loss": 0.754, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8993357179356157, |
|
"grad_norm": 0.19942739585455946, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7663, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9044455799693408, |
|
"grad_norm": 0.19441177912329213, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7643, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9095554420030659, |
|
"grad_norm": 0.1989153914067934, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7538, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.914665304036791, |
|
"grad_norm": 0.18779250602110079, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7582, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9197751660705161, |
|
"grad_norm": 0.18880847142963628, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7834, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9248850281042412, |
|
"grad_norm": 0.19100671149679851, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7633, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9299948901379663, |
|
"grad_norm": 0.19156019315031683, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7644, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9351047521716913, |
|
"grad_norm": 0.18435442552610007, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7597, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9402146142054164, |
|
"grad_norm": 0.2051724242213117, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7504, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9453244762391415, |
|
"grad_norm": 0.18536587248191086, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7396, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9504343382728666, |
|
"grad_norm": 0.17780024213447235, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7506, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9555442003065917, |
|
"grad_norm": 0.18708967218779626, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7455, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9606540623403168, |
|
"grad_norm": 0.17747103600840475, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7444, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9657639243740419, |
|
"grad_norm": 0.19063629309146018, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7533, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.970873786407767, |
|
"grad_norm": 0.19471871824403422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7539, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9759836484414921, |
|
"grad_norm": 0.18403984668742995, |
|
"learning_rate": 5e-06, |
|
"loss": 0.766, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9810935104752172, |
|
"grad_norm": 0.19270062603489418, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7661, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.9862033725089423, |
|
"grad_norm": 0.19463685028697894, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7591, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9913132345426674, |
|
"grad_norm": 0.18870267371498323, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7619, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9964230965763924, |
|
"grad_norm": 0.19561509144751293, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7727, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0015329586101176, |
|
"grad_norm": 0.1865614045173204, |
|
"learning_rate": 5e-06, |
|
"loss": 0.753, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0066428206438427, |
|
"grad_norm": 0.17671337095527262, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7557, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.0117526826775678, |
|
"grad_norm": 0.19010483409505236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7446, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.016862544711293, |
|
"grad_norm": 0.17413564167981435, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7389, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.021972406745018, |
|
"grad_norm": 0.18070567046481728, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7451, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0270822687787429, |
|
"grad_norm": 0.2047148011452083, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7404, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.032192130812468, |
|
"grad_norm": 0.19369702939207809, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7432, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.037301992846193, |
|
"grad_norm": 0.19067195646001925, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7396, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0424118548799182, |
|
"grad_norm": 0.20517054385871566, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7312, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.0475217169136433, |
|
"grad_norm": 0.19412259781936536, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7434, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.1873188104877111, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7411, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0577414409810935, |
|
"grad_norm": 0.1897369700854769, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7345, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.0628513030148186, |
|
"grad_norm": 0.1853976118509793, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7549, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.0679611650485437, |
|
"grad_norm": 0.17933568978007214, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7531, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.0730710270822688, |
|
"grad_norm": 0.20312467005419368, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7498, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0781808891159939, |
|
"grad_norm": 0.18428135458855252, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7404, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.083290751149719, |
|
"grad_norm": 0.1973755972738785, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7329, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.088400613183444, |
|
"grad_norm": 0.1917623224859124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7555, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.0935104752171692, |
|
"grad_norm": 0.18406588696688597, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7433, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.0986203372508943, |
|
"grad_norm": 0.17921040061727433, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7305, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.1037301992846194, |
|
"grad_norm": 0.18963146030246644, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7397, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.1088400613183444, |
|
"grad_norm": 0.18712686418913257, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7409, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1139499233520695, |
|
"grad_norm": 0.19771359798461643, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7427, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1190597853857946, |
|
"grad_norm": 0.1840406013875161, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7611, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1241696474195197, |
|
"grad_norm": 0.18827676786628125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7364, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1292795094532448, |
|
"grad_norm": 0.191153055094231, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7539, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.13438937148697, |
|
"grad_norm": 0.1822666967889064, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7414, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.139499233520695, |
|
"grad_norm": 0.19699245088881503, |
|
"learning_rate": 5e-06, |
|
"loss": 0.738, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.14460909555442, |
|
"grad_norm": 0.1898722587971926, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7461, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1497189575881452, |
|
"grad_norm": 0.18927249674651098, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7429, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1548288196218701, |
|
"grad_norm": 0.18896514284627008, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7549, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1599386816555952, |
|
"grad_norm": 0.18739970646008372, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7499, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.1650485436893203, |
|
"grad_norm": 0.1910100009129843, |
|
"learning_rate": 5e-06, |
|
"loss": 0.747, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1701584057230454, |
|
"grad_norm": 0.20198153170551428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7386, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.1752682677567705, |
|
"grad_norm": 0.18720641288978465, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7578, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1803781297904956, |
|
"grad_norm": 0.18961987449195758, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7529, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.1854879918242207, |
|
"grad_norm": 0.17712198248177036, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7476, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.1905978538579458, |
|
"grad_norm": 0.18490722732969878, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7437, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.195707715891671, |
|
"grad_norm": 0.18822524406653396, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7469, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.200817577925396, |
|
"grad_norm": 0.17979619340691932, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7476, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.205927439959121, |
|
"grad_norm": 0.19302201862857232, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7519, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.2110373019928462, |
|
"grad_norm": 0.17331596795239365, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7421, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2161471640265713, |
|
"grad_norm": 0.1900974772777454, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7491, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2212570260602964, |
|
"grad_norm": 0.18235079887869074, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7398, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.2263668880940215, |
|
"grad_norm": 0.1990024061618005, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7367, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2314767501277466, |
|
"grad_norm": 0.19774880108315787, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7509, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.2365866121614717, |
|
"grad_norm": 0.18038613594979708, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7397, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2416964741951968, |
|
"grad_norm": 0.19148490320343095, |
|
"learning_rate": 5e-06, |
|
"loss": 0.738, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.246806336228922, |
|
"grad_norm": 0.1764579818726389, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7344, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.2519161982626468, |
|
"grad_norm": 0.19292171667184566, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7386, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.257026060296372, |
|
"grad_norm": 0.1830054013064937, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7428, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.262135922330097, |
|
"grad_norm": 0.1771188524320852, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7349, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.2672457843638223, |
|
"grad_norm": 0.19308200340380383, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7428, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.2723556463975472, |
|
"grad_norm": 0.17723395601019812, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7271, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.2774655084312725, |
|
"grad_norm": 0.21808963432632347, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7551, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2825753704649974, |
|
"grad_norm": 0.1944324454402299, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7388, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.2876852324987225, |
|
"grad_norm": 0.1737167892046253, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7383, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.2927950945324476, |
|
"grad_norm": 0.19316243219818216, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7447, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.2979049565661727, |
|
"grad_norm": 0.18271045253382115, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7439, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.3030148185998978, |
|
"grad_norm": 0.19235060247622612, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7362, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.3081246806336229, |
|
"grad_norm": 0.17975422534798727, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7355, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.313234542667348, |
|
"grad_norm": 0.19133431284185412, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7474, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.318344404701073, |
|
"grad_norm": 0.18525107476229646, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7362, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.3234542667347982, |
|
"grad_norm": 0.17940598778920888, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7456, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3285641287685233, |
|
"grad_norm": 0.18377019808836909, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7371, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3336739908022484, |
|
"grad_norm": 0.19380006248186266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7382, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.3387838528359735, |
|
"grad_norm": 0.19130341477919996, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7249, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.3438937148696986, |
|
"grad_norm": 0.19399937386692442, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7461, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.3490035769034237, |
|
"grad_norm": 0.19026606995853784, |
|
"learning_rate": 5e-06, |
|
"loss": 0.744, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.3541134389371488, |
|
"grad_norm": 0.17865393066414276, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7562, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.3592233009708738, |
|
"grad_norm": 0.18268806699965215, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7369, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.364333163004599, |
|
"grad_norm": 0.1859894512893362, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7303, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.369443025038324, |
|
"grad_norm": 0.1858579729895718, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7338, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.3745528870720491, |
|
"grad_norm": 0.18844334675064925, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7399, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.379662749105774, |
|
"grad_norm": 0.19488780104365555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7297, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.3847726111394993, |
|
"grad_norm": 0.2012609184785339, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7438, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.3898824731732242, |
|
"grad_norm": 0.19240664121181153, |
|
"learning_rate": 5e-06, |
|
"loss": 0.724, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.3949923352069495, |
|
"grad_norm": 0.1989866337354731, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7387, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.4001021972406744, |
|
"grad_norm": 0.19173593318229185, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7389, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.4052120592743995, |
|
"grad_norm": 0.18490218706957628, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7354, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.4103219213081246, |
|
"grad_norm": 0.19596927441115194, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7294, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.4154317833418497, |
|
"grad_norm": 0.1884149575008095, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7535, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.4205416453755748, |
|
"grad_norm": 0.19011351454021505, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7311, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.4256515074093, |
|
"grad_norm": 0.18330086878582655, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7238, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.430761369443025, |
|
"grad_norm": 0.18245127655957494, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7297, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.43587123147675, |
|
"grad_norm": 0.1899683440493854, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7388, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.4409810935104752, |
|
"grad_norm": 0.17955923753560576, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7382, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.4460909555442003, |
|
"grad_norm": 0.19252118964036657, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7428, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.4512008175779254, |
|
"grad_norm": 0.1993095814580447, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7453, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4563106796116505, |
|
"grad_norm": 0.19453340379561043, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7316, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4614205416453756, |
|
"grad_norm": 0.18991487065634022, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7386, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.4665304036791007, |
|
"grad_norm": 0.18973483396294438, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7466, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.4716402657128258, |
|
"grad_norm": 0.1919078737205217, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7495, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.476750127746551, |
|
"grad_norm": 0.1839372662925669, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7389, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.481859989780276, |
|
"grad_norm": 0.19249510537236922, |
|
"learning_rate": 5e-06, |
|
"loss": 0.749, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.486969851814001, |
|
"grad_norm": 0.179246890906481, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7282, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.4920797138477262, |
|
"grad_norm": 0.19514232006253573, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7446, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.497189575881451, |
|
"grad_norm": 0.18596165643891152, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7334, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.5022994379151764, |
|
"grad_norm": 0.18786595484474303, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7279, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.5074092999489013, |
|
"grad_norm": 0.1971738538736254, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7461, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.5125191619826266, |
|
"grad_norm": 0.18866040282412957, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7474, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.5176290240163515, |
|
"grad_norm": 0.19040908132898832, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7445, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.5227388860500768, |
|
"grad_norm": 0.1929810400978154, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7362, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.5278487480838017, |
|
"grad_norm": 0.19442375737357984, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7305, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.532958610117527, |
|
"grad_norm": 0.18546826858387847, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7311, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5380684721512519, |
|
"grad_norm": 0.18542784404927515, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7447, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.543178334184977, |
|
"grad_norm": 0.2020846723209545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7113, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.548288196218702, |
|
"grad_norm": 0.19026884893782828, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7157, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.5533980582524272, |
|
"grad_norm": 0.18111421662938304, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7323, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5585079202861523, |
|
"grad_norm": 0.19367385202342016, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7305, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.5636177823198774, |
|
"grad_norm": 0.18590394121821466, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7341, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.5687276443536025, |
|
"grad_norm": 0.18488441186992707, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7482, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.5738375063873276, |
|
"grad_norm": 0.18226306867076514, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7334, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.20053856155426641, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7414, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.5840572304547778, |
|
"grad_norm": 0.19672564131420983, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7508, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.5891670924885029, |
|
"grad_norm": 0.1790851772225089, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7471, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.594276954522228, |
|
"grad_norm": 0.1900047612676954, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7421, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.599386816555953, |
|
"grad_norm": 0.19746465955340986, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7471, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.604496678589678, |
|
"grad_norm": 0.186549540683221, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7275, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.6096065406234032, |
|
"grad_norm": 0.1876261054598287, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7359, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.6147164026571281, |
|
"grad_norm": 0.19082325492370317, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7268, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.6198262646908534, |
|
"grad_norm": 0.2016402119888201, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7377, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.6249361267245783, |
|
"grad_norm": 0.1888126317070555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7265, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.6300459887583036, |
|
"grad_norm": 0.17743730583327474, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7203, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.6351558507920285, |
|
"grad_norm": 0.1826162903853255, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7215, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6402657128257538, |
|
"grad_norm": 0.19419266754404552, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7376, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.6453755748594787, |
|
"grad_norm": 0.1956453565355767, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7316, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.650485436893204, |
|
"grad_norm": 0.19765143129125318, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7374, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.655595298926929, |
|
"grad_norm": 0.19127982430051427, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7405, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.660705160960654, |
|
"grad_norm": 0.1847472801458583, |
|
"learning_rate": 5e-06, |
|
"loss": 0.724, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.6658150229943791, |
|
"grad_norm": 0.18698307703261788, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7374, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.6709248850281042, |
|
"grad_norm": 0.17533798523255767, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7202, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.6760347470618293, |
|
"grad_norm": 0.1806351825859557, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7452, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.6811446090955544, |
|
"grad_norm": 0.1767976805961292, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7338, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.6862544711292795, |
|
"grad_norm": 0.19498984484111873, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7403, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.6913643331630046, |
|
"grad_norm": 0.17701307669892918, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7299, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.6964741951967297, |
|
"grad_norm": 0.19220216566407472, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7314, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.7015840572304548, |
|
"grad_norm": 0.1829279730231264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.743, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.70669391926418, |
|
"grad_norm": 0.19526766653061225, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7222, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.711803781297905, |
|
"grad_norm": 0.19455609962672274, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7253, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.71691364333163, |
|
"grad_norm": 0.21002379536162816, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7429, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.722023505365355, |
|
"grad_norm": 0.1990882316461353, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7443, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.7271333673990803, |
|
"grad_norm": 0.17934672167038826, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7497, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.7322432294328052, |
|
"grad_norm": 0.19501165745940965, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7425, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.7373530914665305, |
|
"grad_norm": 0.19248650606756543, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7297, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.7424629535002554, |
|
"grad_norm": 0.17721599710417338, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7251, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.7475728155339807, |
|
"grad_norm": 0.18509365156353424, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7221, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.7526826775677056, |
|
"grad_norm": 0.18289284691122754, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7327, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.757792539601431, |
|
"grad_norm": 0.18756279151165123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.732, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.7629024016351558, |
|
"grad_norm": 0.17439303769229625, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7344, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.768012263668881, |
|
"grad_norm": 0.17783376482824478, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7323, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.773122125702606, |
|
"grad_norm": 0.19448194078586717, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7292, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.778231987736331, |
|
"grad_norm": 0.18000237860427712, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7219, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.7833418497700562, |
|
"grad_norm": 0.18519882940772772, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7354, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.7884517118037813, |
|
"grad_norm": 0.19301292549147336, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7487, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.7935615738375064, |
|
"grad_norm": 0.17758380897102066, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7373, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.7986714358712315, |
|
"grad_norm": 0.1794720757802905, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7226, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.8037812979049566, |
|
"grad_norm": 0.18100374933694008, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7256, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.8088911599386817, |
|
"grad_norm": 0.1954603145633284, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7283, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.8140010219724068, |
|
"grad_norm": 0.19558607958635285, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7384, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.8191108840061319, |
|
"grad_norm": 0.1772107537935853, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7382, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.824220746039857, |
|
"grad_norm": 0.17916901000763397, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7367, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.829330608073582, |
|
"grad_norm": 0.19083482072843658, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7247, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.8344404701073072, |
|
"grad_norm": 0.1770449813805881, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7187, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.839550332141032, |
|
"grad_norm": 0.18790158384523442, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7403, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8446601941747574, |
|
"grad_norm": 0.17892362311216914, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7368, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.8497700562084822, |
|
"grad_norm": 0.1839659251785667, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7255, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.8548799182422075, |
|
"grad_norm": 0.19138496555502849, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7453, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.8599897802759324, |
|
"grad_norm": 0.18135734354491537, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7166, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.8650996423096577, |
|
"grad_norm": 0.1859082044449026, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7285, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.8702095043433826, |
|
"grad_norm": 0.1913280855307758, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7279, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.875319366377108, |
|
"grad_norm": 0.19148047998384163, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7381, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.8804292284108328, |
|
"grad_norm": 0.190776629149848, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7347, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.8855390904445581, |
|
"grad_norm": 0.18748890464701637, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7214, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.890648952478283, |
|
"grad_norm": 0.19691029617370956, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7396, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.8957588145120083, |
|
"grad_norm": 0.17143385370457725, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7305, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.9008686765457332, |
|
"grad_norm": 0.19115494111352774, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7252, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.9059785385794583, |
|
"grad_norm": 0.18589155104150923, |
|
"learning_rate": 5e-06, |
|
"loss": 0.728, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.9110884006131834, |
|
"grad_norm": 0.1870078279938856, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7259, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.9161982626469085, |
|
"grad_norm": 0.17909224396912188, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7345, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9213081246806336, |
|
"grad_norm": 0.19885632401705697, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7289, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.9264179867143587, |
|
"grad_norm": 0.18778552722329356, |
|
"learning_rate": 5e-06, |
|
"loss": 0.725, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.9315278487480838, |
|
"grad_norm": 0.2019790249495369, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7398, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.936637710781809, |
|
"grad_norm": 0.19338701904495897, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7265, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.941747572815534, |
|
"grad_norm": 0.18703296264974872, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7332, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.946857434849259, |
|
"grad_norm": 0.1700175440342506, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7205, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.9519672968829842, |
|
"grad_norm": 0.18636496202992153, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7154, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.9570771589167093, |
|
"grad_norm": 0.1826391337083993, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7356, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.9621870209504344, |
|
"grad_norm": 0.17766191115765154, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7369, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.9672968829841593, |
|
"grad_norm": 0.18034528150782342, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7288, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.9724067450178846, |
|
"grad_norm": 0.18541894497456152, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7296, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.9775166070516095, |
|
"grad_norm": 0.19539634425789987, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7374, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.9826264690853348, |
|
"grad_norm": 0.1928837424204438, |
|
"learning_rate": 5e-06, |
|
"loss": 0.732, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.9877363311190597, |
|
"grad_norm": 0.18813671735265705, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7285, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.992846193152785, |
|
"grad_norm": 0.19024591983517306, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7386, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.9979560551865099, |
|
"grad_norm": 0.1791330764130833, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7324, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.003065917220235, |
|
"grad_norm": 0.18457472513280188, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7303, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.00817577925396, |
|
"grad_norm": 0.18684788166920566, |
|
"learning_rate": 5e-06, |
|
"loss": 0.735, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.0132856412876854, |
|
"grad_norm": 0.18385152341485855, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7204, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.0183955033214103, |
|
"grad_norm": 0.1893087134675762, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7271, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.0235053653551356, |
|
"grad_norm": 0.19001500071497598, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7286, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.0286152273888605, |
|
"grad_norm": 0.18883227887144083, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7157, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.033725089422586, |
|
"grad_norm": 0.17689498199700174, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7063, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.0388349514563107, |
|
"grad_norm": 0.19270059169949594, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7295, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.043944813490036, |
|
"grad_norm": 0.19597358123850073, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7268, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.049054675523761, |
|
"grad_norm": 0.19711968415004932, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7229, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.0541645375574857, |
|
"grad_norm": 0.18776496261358783, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7158, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.059274399591211, |
|
"grad_norm": 0.18818737294591004, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7194, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.064384261624936, |
|
"grad_norm": 0.19845798280803176, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7265, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.0694941236586613, |
|
"grad_norm": 0.18713228958457867, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7095, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.074603985692386, |
|
"grad_norm": 0.17787870833728897, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7149, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.0797138477261115, |
|
"grad_norm": 0.19472810880013228, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7178, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.0848237097598363, |
|
"grad_norm": 0.19429450371850024, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7118, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.0899335717935617, |
|
"grad_norm": 0.1941609760118733, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7265, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.0950434338272865, |
|
"grad_norm": 0.19290976310635458, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7085, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.100153295861012, |
|
"grad_norm": 0.1765241017205207, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7301, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.17846133756954982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7105, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.110373019928462, |
|
"grad_norm": 0.1990655813201847, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7197, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.115482881962187, |
|
"grad_norm": 0.18227727070573727, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7225, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.1205927439959122, |
|
"grad_norm": 0.1950861579503913, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7151, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.125702606029637, |
|
"grad_norm": 0.1852273630173904, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7349, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.1308124680633624, |
|
"grad_norm": 0.19463032921601597, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7209, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.1359223300970873, |
|
"grad_norm": 0.20049767813158514, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7272, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.1410321921308126, |
|
"grad_norm": 0.1964989410548727, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7205, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.1461420541645375, |
|
"grad_norm": 0.18656787454813695, |
|
"learning_rate": 5e-06, |
|
"loss": 0.73, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.151251916198263, |
|
"grad_norm": 0.1869811709706087, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7211, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.1563617782319877, |
|
"grad_norm": 0.1822452717215839, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7149, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.161471640265713, |
|
"grad_norm": 0.18992052776084958, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7343, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.166581502299438, |
|
"grad_norm": 0.18438525645647527, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7094, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.171691364333163, |
|
"grad_norm": 0.19529306952253045, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7309, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.176801226366888, |
|
"grad_norm": 0.19540047885297857, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7128, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.181911088400613, |
|
"grad_norm": 0.18411641667621176, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7225, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.1870209504343383, |
|
"grad_norm": 0.19493918166759708, |
|
"learning_rate": 5e-06, |
|
"loss": 0.723, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.192130812468063, |
|
"grad_norm": 0.1843201705851165, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7133, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.1972406745017885, |
|
"grad_norm": 0.1945434906423164, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7196, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.2023505365355134, |
|
"grad_norm": 0.19008929161664118, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7224, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.2074603985692387, |
|
"grad_norm": 0.18391558057092405, |
|
"learning_rate": 5e-06, |
|
"loss": 0.723, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.2125702606029636, |
|
"grad_norm": 0.19348061164149957, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7331, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.217680122636689, |
|
"grad_norm": 0.18607054173383442, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7244, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.2227899846704138, |
|
"grad_norm": 0.18618780601208676, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7272, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.227899846704139, |
|
"grad_norm": 0.17823884880532717, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7091, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.233009708737864, |
|
"grad_norm": 0.18561422862650115, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7205, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.2381195707715893, |
|
"grad_norm": 0.17953482905796772, |
|
"learning_rate": 5e-06, |
|
"loss": 0.729, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.243229432805314, |
|
"grad_norm": 0.18191363924178688, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7111, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.2483392948390395, |
|
"grad_norm": 0.180435799688224, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7107, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.2534491568727644, |
|
"grad_norm": 0.18960270587770217, |
|
"learning_rate": 5e-06, |
|
"loss": 0.725, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.2585590189064897, |
|
"grad_norm": 0.17769968679729267, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7143, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.2636688809402146, |
|
"grad_norm": 0.19163278997385685, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7182, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.26877874297394, |
|
"grad_norm": 0.1940788858796441, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7366, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.2738886050076648, |
|
"grad_norm": 0.1776800674678045, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7069, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.27899846704139, |
|
"grad_norm": 0.17843289642160187, |
|
"learning_rate": 5e-06, |
|
"loss": 0.724, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.284108329075115, |
|
"grad_norm": 0.18057741833938729, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7137, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.28921819110884, |
|
"grad_norm": 0.1866133357180047, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7166, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.294328053142565, |
|
"grad_norm": 0.18193348825294622, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7244, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.2994379151762905, |
|
"grad_norm": 0.1929190761683958, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7277, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.3045477772100154, |
|
"grad_norm": 0.19392177452359835, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7198, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.3096576392437402, |
|
"grad_norm": 0.18143577291270357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.728, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.3147675012774656, |
|
"grad_norm": 0.19443872804506757, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7272, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.3198773633111904, |
|
"grad_norm": 0.1851328945489432, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7234, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.3249872253449158, |
|
"grad_norm": 0.19038110214278162, |
|
"learning_rate": 5e-06, |
|
"loss": 0.713, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.3300970873786406, |
|
"grad_norm": 0.1874741106018047, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7161, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.335206949412366, |
|
"grad_norm": 0.19581977792981697, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7162, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.340316811446091, |
|
"grad_norm": 0.17541497625191085, |
|
"learning_rate": 5e-06, |
|
"loss": 0.723, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.345426673479816, |
|
"grad_norm": 0.20023862697490177, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7185, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.350536535513541, |
|
"grad_norm": 0.1959974413991938, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7088, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.3556463975472663, |
|
"grad_norm": 0.18473631827351808, |
|
"learning_rate": 5e-06, |
|
"loss": 0.719, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.3607562595809912, |
|
"grad_norm": 0.19338189496040809, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7239, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.3658661216147165, |
|
"grad_norm": 0.19066355345819264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7266, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.3709759836484414, |
|
"grad_norm": 0.1883524242314222, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7186, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.3760858456821667, |
|
"grad_norm": 0.18502939250064523, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7199, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.3811957077158916, |
|
"grad_norm": 0.1792178984338457, |
|
"learning_rate": 5e-06, |
|
"loss": 0.714, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.386305569749617, |
|
"grad_norm": 0.18226944439325007, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7041, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.391415431783342, |
|
"grad_norm": 0.18015473190902764, |
|
"learning_rate": 5e-06, |
|
"loss": 0.716, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.396525293817067, |
|
"grad_norm": 0.18115429600775665, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7086, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.401635155850792, |
|
"grad_norm": 0.2043473041616948, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7165, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.406745017884517, |
|
"grad_norm": 0.20268566073488395, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7153, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.411854879918242, |
|
"grad_norm": 0.19488489934465453, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7261, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.4169647419519675, |
|
"grad_norm": 0.1869426176451986, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6977, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.4220746039856924, |
|
"grad_norm": 0.19817580694708986, |
|
"learning_rate": 5e-06, |
|
"loss": 0.71, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.4271844660194173, |
|
"grad_norm": 0.2099570721696579, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7204, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.4322943280531426, |
|
"grad_norm": 0.1964657272176851, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7104, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.4374041900868675, |
|
"grad_norm": 0.1886905609835038, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7085, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.442514052120593, |
|
"grad_norm": 0.18220683381465547, |
|
"learning_rate": 5e-06, |
|
"loss": 0.722, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.4476239141543177, |
|
"grad_norm": 0.18487998134947345, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7112, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.452733776188043, |
|
"grad_norm": 0.16830917755794478, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7157, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.457843638221768, |
|
"grad_norm": 0.18354244943334422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7152, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.462953500255493, |
|
"grad_norm": 0.18986840496033355, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7161, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.468063362289218, |
|
"grad_norm": 0.1826318712937385, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7295, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.4731732243229434, |
|
"grad_norm": 0.18527008923688568, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7115, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.4782830863566683, |
|
"grad_norm": 0.18224411387144135, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7173, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.4833929483903936, |
|
"grad_norm": 0.17811035407666295, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7095, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.4885028104241185, |
|
"grad_norm": 0.18261169308001168, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7162, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.493612672457844, |
|
"grad_norm": 0.2002887514635378, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7208, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.4987225344915687, |
|
"grad_norm": 0.1869680657646229, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7081, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.5038323965252935, |
|
"grad_norm": 0.18675570886820952, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7117, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.508942258559019, |
|
"grad_norm": 0.19944201285579877, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7198, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.514052120592744, |
|
"grad_norm": 0.18956975903685294, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7179, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.519161982626469, |
|
"grad_norm": 0.1942120087148544, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7216, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.524271844660194, |
|
"grad_norm": 0.18623164151592378, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6997, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.5293817066939193, |
|
"grad_norm": 0.19081196748322382, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7091, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.5344915687276446, |
|
"grad_norm": 0.191889152108264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7122, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.5396014307613695, |
|
"grad_norm": 0.19018109192168367, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7195, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.5447112927950943, |
|
"grad_norm": 0.18029975730571046, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7067, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.5498211548288197, |
|
"grad_norm": 0.19081905131548996, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7184, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.554931016862545, |
|
"grad_norm": 0.18184068561404432, |
|
"learning_rate": 5e-06, |
|
"loss": 0.711, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.56004087889627, |
|
"grad_norm": 0.18141265501857734, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7298, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.5651507409299947, |
|
"grad_norm": 0.19059042586600472, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7114, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.57026060296372, |
|
"grad_norm": 0.18752747662093455, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7139, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.575370464997445, |
|
"grad_norm": 0.18531393933159526, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7152, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.5804803270311703, |
|
"grad_norm": 0.2020545252464496, |
|
"learning_rate": 5e-06, |
|
"loss": 0.724, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.585590189064895, |
|
"grad_norm": 0.1817661308445167, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7099, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.5907000510986204, |
|
"grad_norm": 0.1874710204305083, |
|
"learning_rate": 5e-06, |
|
"loss": 0.727, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.5958099131323453, |
|
"grad_norm": 0.19697140144619885, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7184, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.6009197751660706, |
|
"grad_norm": 0.1889733854232041, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7169, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.6060296371997955, |
|
"grad_norm": 0.20000474912796498, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7089, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.611139499233521, |
|
"grad_norm": 0.18020813610110156, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7264, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.6162493612672457, |
|
"grad_norm": 0.17876620180082428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7007, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.6213592233009706, |
|
"grad_norm": 0.186937820916383, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7219, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.626469085334696, |
|
"grad_norm": 0.19293145357331443, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7116, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.18779972078705487, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7147, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.636688809402146, |
|
"grad_norm": 0.2004320087337195, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7046, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.641798671435871, |
|
"grad_norm": 0.18155864298582336, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7024, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.6469085334695963, |
|
"grad_norm": 0.18766505517066065, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7092, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.6520183955033216, |
|
"grad_norm": 0.18241808648535346, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7231, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.6571282575370465, |
|
"grad_norm": 0.1956290063678602, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7071, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.6622381195707714, |
|
"grad_norm": 0.19687281837098963, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7237, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.6673479816044967, |
|
"grad_norm": 0.18253071868952309, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7085, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.672457843638222, |
|
"grad_norm": 0.18783632566318903, |
|
"learning_rate": 5e-06, |
|
"loss": 0.717, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.677567705671947, |
|
"grad_norm": 0.1856712541693123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7121, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.682677567705672, |
|
"grad_norm": 0.17913630024093868, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7141, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.687787429739397, |
|
"grad_norm": 0.1814607328558563, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7143, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.692897291773122, |
|
"grad_norm": 0.20516860190923697, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7153, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.6980071538068473, |
|
"grad_norm": 0.18691014833866346, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7021, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.703117015840572, |
|
"grad_norm": 0.19243627032023453, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7198, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.7082268778742975, |
|
"grad_norm": 0.1782810408332171, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7144, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.7133367399080224, |
|
"grad_norm": 0.18246834780709842, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7073, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.7184466019417477, |
|
"grad_norm": 0.19188458416675994, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7033, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.7235564639754726, |
|
"grad_norm": 0.19431224425803312, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7121, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.728666326009198, |
|
"grad_norm": 0.19472680477435397, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7204, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.7337761880429228, |
|
"grad_norm": 0.17416333272267362, |
|
"learning_rate": 5e-06, |
|
"loss": 0.722, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.738886050076648, |
|
"grad_norm": 0.17997245175531543, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7209, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.743995912110373, |
|
"grad_norm": 0.1818254672253798, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7064, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.7491057741440983, |
|
"grad_norm": 0.19143052614657435, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7251, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.754215636177823, |
|
"grad_norm": 0.1849449249230007, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7134, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.759325498211548, |
|
"grad_norm": 0.18960800644635575, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7132, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.7644353602452734, |
|
"grad_norm": 0.18450711551330742, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7218, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.7695452222789987, |
|
"grad_norm": 0.1775798100347251, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7246, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.7746550843127236, |
|
"grad_norm": 0.19385869953809406, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7154, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.7797649463464484, |
|
"grad_norm": 0.18947315882216123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7146, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.7848748083801738, |
|
"grad_norm": 0.19874490028355687, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7098, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.789984670413899, |
|
"grad_norm": 0.19216650910265412, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7157, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.795094532447624, |
|
"grad_norm": 0.1884532290700664, |
|
"learning_rate": 5e-06, |
|
"loss": 0.73, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.800204394481349, |
|
"grad_norm": 0.178944069456414, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7023, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.805314256515074, |
|
"grad_norm": 0.19897724638804584, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7266, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.810424118548799, |
|
"grad_norm": 0.19676207860709408, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7042, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.8155339805825244, |
|
"grad_norm": 0.17311366117436, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7059, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.8206438426162492, |
|
"grad_norm": 0.1862348663291006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7012, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.8257537046499746, |
|
"grad_norm": 0.19532852701482903, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7105, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.8308635666836994, |
|
"grad_norm": 0.18410412264007187, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7163, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.8359734287174247, |
|
"grad_norm": 0.21025902958863738, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7359, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.8410832907511496, |
|
"grad_norm": 0.17818540055082727, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7111, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.846193152784875, |
|
"grad_norm": 0.1842948913149892, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7167, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.8513030148186, |
|
"grad_norm": 0.18686037650976978, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7138, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.856412876852325, |
|
"grad_norm": 0.203403946822995, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6996, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.86152273888605, |
|
"grad_norm": 0.19304193259535427, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7184, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.8666326009197753, |
|
"grad_norm": 0.17636221017029402, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6989, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.8717424629535, |
|
"grad_norm": 0.2037376472703345, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7179, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.876852324987225, |
|
"grad_norm": 0.19200332931870842, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7139, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.8819621870209504, |
|
"grad_norm": 0.18491595431761487, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7128, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.8870720490546757, |
|
"grad_norm": 0.1838785582117868, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6955, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.8921819110884006, |
|
"grad_norm": 0.19428058651152774, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7102, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.8972917731221255, |
|
"grad_norm": 0.17936626194327465, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7078, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.902401635155851, |
|
"grad_norm": 0.17631468596491473, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7162, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.907511497189576, |
|
"grad_norm": 0.18415745158446212, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7233, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.912621359223301, |
|
"grad_norm": 0.17279113129182289, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6992, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.917731221257026, |
|
"grad_norm": 0.19005862366918605, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7015, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.922841083290751, |
|
"grad_norm": 0.1865410089900959, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7043, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.927950945324476, |
|
"grad_norm": 0.19166084388796084, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7212, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.9330608073582014, |
|
"grad_norm": 0.19183886093470404, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7117, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.9381706693919263, |
|
"grad_norm": 0.18542747500934698, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7031, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.9432805314256516, |
|
"grad_norm": 0.19800100325472195, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7003, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.9483903934593765, |
|
"grad_norm": 0.19068782203027565, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7157, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.953500255493102, |
|
"grad_norm": 0.1893011994562663, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7049, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.9586101175268267, |
|
"grad_norm": 0.17894632421886825, |
|
"learning_rate": 5e-06, |
|
"loss": 0.721, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.963719979560552, |
|
"grad_norm": 0.18749644947518349, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7245, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.968829841594277, |
|
"grad_norm": 0.18920042895955116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7086, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.973939703628002, |
|
"grad_norm": 0.18597523756708192, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7143, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.979049565661727, |
|
"grad_norm": 0.18741696462849414, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7195, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.9841594276954524, |
|
"grad_norm": 0.18985455877786367, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7091, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.9892692897291773, |
|
"grad_norm": 0.18003618910403507, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7111, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.994379151762902, |
|
"grad_norm": 0.19511416723954572, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7105, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.9994890137966275, |
|
"grad_norm": 0.18652462173874393, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7294, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 5871, |
|
"total_flos": 2506240622592000.0, |
|
"train_loss": 0.754048796695819, |
|
"train_runtime": 27383.3094, |
|
"train_samples_per_second": 109.726, |
|
"train_steps_per_second": 0.214 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5871, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2506240622592000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|