|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1393, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007178750897343862, |
|
"grad_norm": 0.28594130277633667, |
|
"learning_rate": 0.0, |
|
"loss": 1.2147, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014357501794687725, |
|
"grad_norm": 0.2666853368282318, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.226, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0021536252692031586, |
|
"grad_norm": 0.2672528922557831, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.2442, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002871500358937545, |
|
"grad_norm": 2.0374395847320557, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.6612, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003589375448671931, |
|
"grad_norm": 0.2744747996330261, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.2383, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004307250538406317, |
|
"grad_norm": 0.2731163501739502, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2612, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005025125628140704, |
|
"grad_norm": 0.5806142687797546, |
|
"learning_rate": 1.9999987192609945e-05, |
|
"loss": 1.4832, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00574300071787509, |
|
"grad_norm": 0.29958781599998474, |
|
"learning_rate": 1.9999948770505387e-05, |
|
"loss": 1.2451, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006460875807609476, |
|
"grad_norm": 0.8398181796073914, |
|
"learning_rate": 1.9999884733883163e-05, |
|
"loss": 1.2251, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007178750897343862, |
|
"grad_norm": 0.5620901584625244, |
|
"learning_rate": 1.9999795083071327e-05, |
|
"loss": 1.4215, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007896625987078248, |
|
"grad_norm": 0.31047117710113525, |
|
"learning_rate": 1.999967981852916e-05, |
|
"loss": 1.2814, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008614501076812634, |
|
"grad_norm": 0.33838632702827454, |
|
"learning_rate": 1.9999538940847157e-05, |
|
"loss": 1.2543, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00933237616654702, |
|
"grad_norm": 0.5756534934043884, |
|
"learning_rate": 1.9999372450747025e-05, |
|
"loss": 1.3828, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.010050251256281407, |
|
"grad_norm": 0.6112161874771118, |
|
"learning_rate": 1.9999180349081687e-05, |
|
"loss": 1.4476, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010768126346015794, |
|
"grad_norm": 0.3352164030075073, |
|
"learning_rate": 1.9998962636835273e-05, |
|
"loss": 1.2413, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01148600143575018, |
|
"grad_norm": 0.28125348687171936, |
|
"learning_rate": 1.999871931512311e-05, |
|
"loss": 1.1692, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012203876525484566, |
|
"grad_norm": 0.8347467184066772, |
|
"learning_rate": 1.9998450385191728e-05, |
|
"loss": 1.5037, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.012921751615218953, |
|
"grad_norm": 0.3188466429710388, |
|
"learning_rate": 1.999815584841884e-05, |
|
"loss": 1.2483, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013639626704953339, |
|
"grad_norm": 0.36748605966567993, |
|
"learning_rate": 1.9997835706313347e-05, |
|
"loss": 1.2355, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.014357501794687724, |
|
"grad_norm": 0.37445810437202454, |
|
"learning_rate": 1.999748996051532e-05, |
|
"loss": 1.2169, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01507537688442211, |
|
"grad_norm": 0.3378954827785492, |
|
"learning_rate": 1.9997118612796006e-05, |
|
"loss": 1.2259, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015793251974156496, |
|
"grad_norm": 0.28010478615760803, |
|
"learning_rate": 1.9996721665057797e-05, |
|
"loss": 1.2048, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.016511127063890883, |
|
"grad_norm": 1.0997238159179688, |
|
"learning_rate": 1.999629911933424e-05, |
|
"loss": 1.3627, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01722900215362527, |
|
"grad_norm": 0.28337937593460083, |
|
"learning_rate": 1.9995850977790022e-05, |
|
"loss": 1.2185, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.017946877243359655, |
|
"grad_norm": 0.3515605628490448, |
|
"learning_rate": 1.9995377242720946e-05, |
|
"loss": 1.2808, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01866475233309404, |
|
"grad_norm": 0.37158203125, |
|
"learning_rate": 1.999487791655394e-05, |
|
"loss": 1.2446, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.019382627422828428, |
|
"grad_norm": 0.3290520906448364, |
|
"learning_rate": 1.9994353001847027e-05, |
|
"loss": 1.2313, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.020100502512562814, |
|
"grad_norm": 0.4342392086982727, |
|
"learning_rate": 1.9993802501289328e-05, |
|
"loss": 1.3652, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0208183776022972, |
|
"grad_norm": 0.6782366037368774, |
|
"learning_rate": 1.9993226417701028e-05, |
|
"loss": 1.4407, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.021536252692031587, |
|
"grad_norm": 1.0299732685089111, |
|
"learning_rate": 1.999262475403338e-05, |
|
"loss": 1.4609, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.022254127781765973, |
|
"grad_norm": 0.27583467960357666, |
|
"learning_rate": 1.9991997513368674e-05, |
|
"loss": 1.205, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02297200287150036, |
|
"grad_norm": 0.4443705677986145, |
|
"learning_rate": 1.9991344698920248e-05, |
|
"loss": 1.3496, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.023689877961234746, |
|
"grad_norm": 0.3160634934902191, |
|
"learning_rate": 1.9990666314032436e-05, |
|
"loss": 1.2201, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.024407753050969132, |
|
"grad_norm": 0.5006676316261292, |
|
"learning_rate": 1.998996236218057e-05, |
|
"loss": 1.4296, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02512562814070352, |
|
"grad_norm": 0.42285463213920593, |
|
"learning_rate": 1.998923284697097e-05, |
|
"loss": 1.402, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.025843503230437905, |
|
"grad_norm": 0.5728902220726013, |
|
"learning_rate": 1.9988477772140907e-05, |
|
"loss": 1.2901, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02656137832017229, |
|
"grad_norm": 0.758953869342804, |
|
"learning_rate": 1.9987697141558603e-05, |
|
"loss": 1.3689, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.027279253409906678, |
|
"grad_norm": 0.28098437190055847, |
|
"learning_rate": 1.9986890959223182e-05, |
|
"loss": 1.1861, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02799712849964106, |
|
"grad_norm": 0.3095812201499939, |
|
"learning_rate": 1.998605922926469e-05, |
|
"loss": 1.2307, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.028715003589375447, |
|
"grad_norm": 0.31783121824264526, |
|
"learning_rate": 1.998520195594404e-05, |
|
"loss": 1.1731, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.029432878679109833, |
|
"grad_norm": 0.5449131727218628, |
|
"learning_rate": 1.9984319143653006e-05, |
|
"loss": 1.4122, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03015075376884422, |
|
"grad_norm": 0.3425084352493286, |
|
"learning_rate": 1.9983410796914197e-05, |
|
"loss": 1.232, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.030868628858578606, |
|
"grad_norm": 0.3560698628425598, |
|
"learning_rate": 1.998247692038103e-05, |
|
"loss": 1.228, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03158650394831299, |
|
"grad_norm": 0.2846856415271759, |
|
"learning_rate": 1.998151751883772e-05, |
|
"loss": 1.1939, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03230437903804738, |
|
"grad_norm": 0.3970009684562683, |
|
"learning_rate": 1.9980532597199235e-05, |
|
"loss": 1.3507, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.033022254127781765, |
|
"grad_norm": 0.780319094657898, |
|
"learning_rate": 1.9979522160511284e-05, |
|
"loss": 1.4147, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03374012921751615, |
|
"grad_norm": 0.32033440470695496, |
|
"learning_rate": 1.997848621395029e-05, |
|
"loss": 1.204, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03445800430725054, |
|
"grad_norm": 0.645374059677124, |
|
"learning_rate": 1.9977424762823366e-05, |
|
"loss": 1.2397, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.035175879396984924, |
|
"grad_norm": 0.4876931607723236, |
|
"learning_rate": 1.9976337812568273e-05, |
|
"loss": 1.2226, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03589375448671931, |
|
"grad_norm": 0.293576180934906, |
|
"learning_rate": 1.997522536875341e-05, |
|
"loss": 1.2281, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0366116295764537, |
|
"grad_norm": 0.9625378847122192, |
|
"learning_rate": 1.9974087437077786e-05, |
|
"loss": 1.2895, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03732950466618808, |
|
"grad_norm": 0.2653510272502899, |
|
"learning_rate": 1.9972924023370966e-05, |
|
"loss": 1.1359, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03804737975592247, |
|
"grad_norm": 0.46903130412101746, |
|
"learning_rate": 1.9971735133593074e-05, |
|
"loss": 1.1813, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.038765254845656856, |
|
"grad_norm": 0.33105671405792236, |
|
"learning_rate": 1.9970520773834734e-05, |
|
"loss": 1.2488, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03948312993539124, |
|
"grad_norm": 0.3274335265159607, |
|
"learning_rate": 1.996928095031706e-05, |
|
"loss": 1.1956, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04020100502512563, |
|
"grad_norm": 0.30211400985717773, |
|
"learning_rate": 1.9968015669391612e-05, |
|
"loss": 1.1859, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.040918880114860015, |
|
"grad_norm": 0.4520847797393799, |
|
"learning_rate": 1.9966724937540375e-05, |
|
"loss": 1.3556, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0416367552045944, |
|
"grad_norm": 0.26978108286857605, |
|
"learning_rate": 1.9965408761375703e-05, |
|
"loss": 1.2434, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04235463029432879, |
|
"grad_norm": 0.37947210669517517, |
|
"learning_rate": 1.996406714764031e-05, |
|
"loss": 1.1514, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.043072505384063174, |
|
"grad_norm": 0.363207072019577, |
|
"learning_rate": 1.9962700103207232e-05, |
|
"loss": 1.1866, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04379038047379756, |
|
"grad_norm": 0.3245425522327423, |
|
"learning_rate": 1.9961307635079768e-05, |
|
"loss": 1.1572, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04450825556353195, |
|
"grad_norm": 0.27696144580841064, |
|
"learning_rate": 1.9959889750391474e-05, |
|
"loss": 1.2074, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04522613065326633, |
|
"grad_norm": 0.28961682319641113, |
|
"learning_rate": 1.9958446456406117e-05, |
|
"loss": 1.1847, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04594400574300072, |
|
"grad_norm": 0.3816603422164917, |
|
"learning_rate": 1.9956977760517618e-05, |
|
"loss": 1.2932, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.046661880832735106, |
|
"grad_norm": 0.3908121883869171, |
|
"learning_rate": 1.995548367025005e-05, |
|
"loss": 1.2683, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04737975592246949, |
|
"grad_norm": 0.3329205811023712, |
|
"learning_rate": 1.9953964193257564e-05, |
|
"loss": 1.2455, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04809763101220388, |
|
"grad_norm": 0.5550855398178101, |
|
"learning_rate": 1.9952419337324384e-05, |
|
"loss": 1.2555, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.048815506101938265, |
|
"grad_norm": 0.28543972969055176, |
|
"learning_rate": 1.995084911036473e-05, |
|
"loss": 1.1661, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04953338119167265, |
|
"grad_norm": 0.26742851734161377, |
|
"learning_rate": 1.994925352042281e-05, |
|
"loss": 1.2163, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05025125628140704, |
|
"grad_norm": 0.32335177063941956, |
|
"learning_rate": 1.994763257567276e-05, |
|
"loss": 1.1836, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.050969131371141424, |
|
"grad_norm": 0.6627066135406494, |
|
"learning_rate": 1.9945986284418607e-05, |
|
"loss": 1.3515, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05168700646087581, |
|
"grad_norm": 1.2874963283538818, |
|
"learning_rate": 1.994431465509423e-05, |
|
"loss": 1.6039, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0524048815506102, |
|
"grad_norm": 0.4531334638595581, |
|
"learning_rate": 1.994261769626332e-05, |
|
"loss": 1.2178, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05312275664034458, |
|
"grad_norm": 0.26209312677383423, |
|
"learning_rate": 1.994089541661931e-05, |
|
"loss": 1.1948, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05384063173007897, |
|
"grad_norm": 0.31642234325408936, |
|
"learning_rate": 1.9939147824985366e-05, |
|
"loss": 1.2251, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.054558506819813356, |
|
"grad_norm": 0.681365430355072, |
|
"learning_rate": 1.993737493031433e-05, |
|
"loss": 1.2787, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05527638190954774, |
|
"grad_norm": 0.3119295537471771, |
|
"learning_rate": 1.993557674168866e-05, |
|
"loss": 1.2423, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05599425699928212, |
|
"grad_norm": 0.295529842376709, |
|
"learning_rate": 1.9933753268320394e-05, |
|
"loss": 1.1692, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05671213208901651, |
|
"grad_norm": 0.4035413861274719, |
|
"learning_rate": 1.9931904519551106e-05, |
|
"loss": 1.3287, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.057430007178750894, |
|
"grad_norm": 0.596662700176239, |
|
"learning_rate": 1.993003050485186e-05, |
|
"loss": 1.3592, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05814788226848528, |
|
"grad_norm": 0.4334593117237091, |
|
"learning_rate": 1.992813123382314e-05, |
|
"loss": 1.307, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.05886575735821967, |
|
"grad_norm": 0.27314937114715576, |
|
"learning_rate": 1.9926206716194843e-05, |
|
"loss": 1.1615, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05958363244795405, |
|
"grad_norm": 0.46125203371047974, |
|
"learning_rate": 1.9924256961826177e-05, |
|
"loss": 1.4522, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06030150753768844, |
|
"grad_norm": 0.2750553488731384, |
|
"learning_rate": 1.9922281980705655e-05, |
|
"loss": 1.1959, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.061019382627422826, |
|
"grad_norm": 0.31477832794189453, |
|
"learning_rate": 1.9920281782951013e-05, |
|
"loss": 1.2452, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06173725771715721, |
|
"grad_norm": 0.4933525323867798, |
|
"learning_rate": 1.991825637880918e-05, |
|
"loss": 1.3354, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0624551328068916, |
|
"grad_norm": 0.32180190086364746, |
|
"learning_rate": 1.9916205778656207e-05, |
|
"loss": 1.1748, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06317300789662599, |
|
"grad_norm": 0.7428871393203735, |
|
"learning_rate": 1.9914129992997235e-05, |
|
"loss": 1.21, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06389088298636038, |
|
"grad_norm": 0.531775176525116, |
|
"learning_rate": 1.9912029032466415e-05, |
|
"loss": 1.3917, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06460875807609476, |
|
"grad_norm": 0.4157712459564209, |
|
"learning_rate": 1.9909902907826883e-05, |
|
"loss": 1.2385, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06532663316582915, |
|
"grad_norm": 1.7364940643310547, |
|
"learning_rate": 1.990775162997068e-05, |
|
"loss": 1.532, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06604450825556353, |
|
"grad_norm": 0.5754929184913635, |
|
"learning_rate": 1.9905575209918704e-05, |
|
"loss": 1.3588, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06676238334529792, |
|
"grad_norm": 0.3331480622291565, |
|
"learning_rate": 1.9903373658820667e-05, |
|
"loss": 1.1613, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0674802584350323, |
|
"grad_norm": 0.35004234313964844, |
|
"learning_rate": 1.990114698795501e-05, |
|
"loss": 1.206, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0681981335247667, |
|
"grad_norm": 0.36566850543022156, |
|
"learning_rate": 1.989889520872887e-05, |
|
"loss": 1.1561, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06891600861450108, |
|
"grad_norm": 0.5920188426971436, |
|
"learning_rate": 1.9896618332678022e-05, |
|
"loss": 1.4349, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06963388370423547, |
|
"grad_norm": 0.3459800183773041, |
|
"learning_rate": 1.9894316371466794e-05, |
|
"loss": 1.1681, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07035175879396985, |
|
"grad_norm": 0.49259281158447266, |
|
"learning_rate": 1.9891989336888034e-05, |
|
"loss": 1.312, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07106963388370424, |
|
"grad_norm": 0.31321045756340027, |
|
"learning_rate": 1.988963724086304e-05, |
|
"loss": 1.1947, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07178750897343862, |
|
"grad_norm": 0.38120272755622864, |
|
"learning_rate": 1.988726009544149e-05, |
|
"loss": 1.2053, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07250538406317301, |
|
"grad_norm": 0.3773294985294342, |
|
"learning_rate": 1.9884857912801402e-05, |
|
"loss": 1.1776, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0732232591529074, |
|
"grad_norm": 0.9980801939964294, |
|
"learning_rate": 1.988243070524905e-05, |
|
"loss": 1.1999, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07394113424264179, |
|
"grad_norm": 0.2763481140136719, |
|
"learning_rate": 1.9879978485218913e-05, |
|
"loss": 1.161, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07465900933237617, |
|
"grad_norm": 0.2802128493785858, |
|
"learning_rate": 1.9877501265273606e-05, |
|
"loss": 1.1897, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07537688442211055, |
|
"grad_norm": 1.2885147333145142, |
|
"learning_rate": 1.9874999058103813e-05, |
|
"loss": 1.5123, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07609475951184494, |
|
"grad_norm": 0.3465665280818939, |
|
"learning_rate": 1.9872471876528238e-05, |
|
"loss": 1.1916, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07681263460157932, |
|
"grad_norm": 0.4765189588069916, |
|
"learning_rate": 1.9869919733493517e-05, |
|
"loss": 1.2785, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07753050969131371, |
|
"grad_norm": 0.3360905349254608, |
|
"learning_rate": 1.986734264207417e-05, |
|
"loss": 1.1136, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07824838478104809, |
|
"grad_norm": 0.41545817255973816, |
|
"learning_rate": 1.9864740615472516e-05, |
|
"loss": 1.2077, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.07896625987078248, |
|
"grad_norm": 0.2709164619445801, |
|
"learning_rate": 1.986211366701863e-05, |
|
"loss": 1.2149, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07968413496051686, |
|
"grad_norm": 0.28264105319976807, |
|
"learning_rate": 1.9859461810170248e-05, |
|
"loss": 1.1749, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08040201005025126, |
|
"grad_norm": 0.2910136282444, |
|
"learning_rate": 1.9856785058512723e-05, |
|
"loss": 1.1727, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08111988513998564, |
|
"grad_norm": 0.2950994372367859, |
|
"learning_rate": 1.9854083425758933e-05, |
|
"loss": 1.1458, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08183776022972003, |
|
"grad_norm": 0.40134933590888977, |
|
"learning_rate": 1.9851356925749217e-05, |
|
"loss": 1.2594, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08255563531945441, |
|
"grad_norm": 0.38769832253456116, |
|
"learning_rate": 1.9848605572451326e-05, |
|
"loss": 1.1522, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0832735104091888, |
|
"grad_norm": 0.24265125393867493, |
|
"learning_rate": 1.9845829379960313e-05, |
|
"loss": 1.1218, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08399138549892318, |
|
"grad_norm": 0.2472332864999771, |
|
"learning_rate": 1.9843028362498496e-05, |
|
"loss": 1.1572, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08470926058865758, |
|
"grad_norm": 0.37674251198768616, |
|
"learning_rate": 1.984020253441536e-05, |
|
"loss": 1.1687, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08542713567839195, |
|
"grad_norm": 0.27769795060157776, |
|
"learning_rate": 1.98373519101875e-05, |
|
"loss": 1.183, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08614501076812635, |
|
"grad_norm": 1.3421275615692139, |
|
"learning_rate": 1.9834476504418538e-05, |
|
"loss": 1.3964, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08686288585786073, |
|
"grad_norm": 0.3361498713493347, |
|
"learning_rate": 1.983157633183905e-05, |
|
"loss": 1.1567, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08758076094759512, |
|
"grad_norm": 0.253305584192276, |
|
"learning_rate": 1.9828651407306497e-05, |
|
"loss": 1.1364, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0882986360373295, |
|
"grad_norm": 0.48132067918777466, |
|
"learning_rate": 1.9825701745805136e-05, |
|
"loss": 1.3131, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0890165111270639, |
|
"grad_norm": 0.399525910615921, |
|
"learning_rate": 1.982272736244595e-05, |
|
"loss": 1.2078, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.08973438621679827, |
|
"grad_norm": 0.48796120285987854, |
|
"learning_rate": 1.9819728272466578e-05, |
|
"loss": 1.2872, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09045226130653267, |
|
"grad_norm": 0.37568381428718567, |
|
"learning_rate": 1.9816704491231225e-05, |
|
"loss": 1.0992, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09117013639626705, |
|
"grad_norm": 0.2590203881263733, |
|
"learning_rate": 1.9813656034230593e-05, |
|
"loss": 1.2027, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09188801148600144, |
|
"grad_norm": 0.5362399816513062, |
|
"learning_rate": 1.9810582917081786e-05, |
|
"loss": 1.25, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09260588657573582, |
|
"grad_norm": 0.46542394161224365, |
|
"learning_rate": 1.980748515552825e-05, |
|
"loss": 1.1606, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09332376166547021, |
|
"grad_norm": 0.26864808797836304, |
|
"learning_rate": 1.980436276543969e-05, |
|
"loss": 1.138, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09404163675520459, |
|
"grad_norm": 0.36033275723457336, |
|
"learning_rate": 1.980121576281196e-05, |
|
"loss": 1.1465, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09475951184493898, |
|
"grad_norm": 0.3540986180305481, |
|
"learning_rate": 1.9798044163767023e-05, |
|
"loss": 1.1093, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09547738693467336, |
|
"grad_norm": 0.4412473738193512, |
|
"learning_rate": 1.979484798455284e-05, |
|
"loss": 1.2397, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09619526202440776, |
|
"grad_norm": 0.2563040852546692, |
|
"learning_rate": 1.97916272415433e-05, |
|
"loss": 1.1977, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09691313711414214, |
|
"grad_norm": 0.29979026317596436, |
|
"learning_rate": 1.9788381951238122e-05, |
|
"loss": 1.1749, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09763101220387653, |
|
"grad_norm": 0.5110599398612976, |
|
"learning_rate": 1.9785112130262792e-05, |
|
"loss": 1.4412, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09834888729361091, |
|
"grad_norm": 0.3159632980823517, |
|
"learning_rate": 1.978181779536845e-05, |
|
"loss": 1.1907, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0990667623833453, |
|
"grad_norm": 0.2782893776893616, |
|
"learning_rate": 1.9778498963431837e-05, |
|
"loss": 1.176, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.09978463747307968, |
|
"grad_norm": 0.5137550234794617, |
|
"learning_rate": 1.977515565145518e-05, |
|
"loss": 1.2715, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.10050251256281408, |
|
"grad_norm": 1.1845322847366333, |
|
"learning_rate": 1.9771787876566124e-05, |
|
"loss": 1.1766, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10122038765254845, |
|
"grad_norm": 0.5801928639411926, |
|
"learning_rate": 1.976839565601762e-05, |
|
"loss": 1.2851, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10193826274228285, |
|
"grad_norm": 0.6315880417823792, |
|
"learning_rate": 1.9764979007187875e-05, |
|
"loss": 1.2729, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10265613783201723, |
|
"grad_norm": 0.5468786358833313, |
|
"learning_rate": 1.976153794758023e-05, |
|
"loss": 1.1672, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10337401292175162, |
|
"grad_norm": 0.3717634379863739, |
|
"learning_rate": 1.975807249482307e-05, |
|
"loss": 1.2785, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.104091888011486, |
|
"grad_norm": 0.334043949842453, |
|
"learning_rate": 1.9754582666669776e-05, |
|
"loss": 1.1835, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1048097631012204, |
|
"grad_norm": 0.3045234978199005, |
|
"learning_rate": 1.9751068480998573e-05, |
|
"loss": 1.1433, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.10552763819095477, |
|
"grad_norm": 0.2606176733970642, |
|
"learning_rate": 1.974752995581248e-05, |
|
"loss": 1.1382, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10624551328068917, |
|
"grad_norm": 0.28245827555656433, |
|
"learning_rate": 1.974396710923921e-05, |
|
"loss": 1.2097, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10696338837042355, |
|
"grad_norm": 0.26672127842903137, |
|
"learning_rate": 1.9740379959531063e-05, |
|
"loss": 1.1527, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10768126346015794, |
|
"grad_norm": 0.27380216121673584, |
|
"learning_rate": 1.9736768525064854e-05, |
|
"loss": 1.1372, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10839913854989232, |
|
"grad_norm": 0.27657026052474976, |
|
"learning_rate": 1.9733132824341802e-05, |
|
"loss": 1.1499, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.10911701363962671, |
|
"grad_norm": 0.32931819558143616, |
|
"learning_rate": 1.9729472875987442e-05, |
|
"loss": 1.1688, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.10983488872936109, |
|
"grad_norm": 0.3223106265068054, |
|
"learning_rate": 1.972578869875153e-05, |
|
"loss": 1.2062, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11055276381909548, |
|
"grad_norm": 0.2810954749584198, |
|
"learning_rate": 1.9722080311507938e-05, |
|
"loss": 1.1553, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11127063890882986, |
|
"grad_norm": 0.2533334195613861, |
|
"learning_rate": 1.9718347733254578e-05, |
|
"loss": 1.1613, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11198851399856424, |
|
"grad_norm": 0.44092583656311035, |
|
"learning_rate": 1.971459098311328e-05, |
|
"loss": 1.2391, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11270638908829864, |
|
"grad_norm": 0.4267832636833191, |
|
"learning_rate": 1.971081008032971e-05, |
|
"loss": 1.329, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11342426417803302, |
|
"grad_norm": 0.3565402030944824, |
|
"learning_rate": 1.970700504427327e-05, |
|
"loss": 1.1892, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11414213926776741, |
|
"grad_norm": 0.4627990424633026, |
|
"learning_rate": 1.9703175894436987e-05, |
|
"loss": 1.2786, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11486001435750179, |
|
"grad_norm": 0.31331735849380493, |
|
"learning_rate": 1.9699322650437433e-05, |
|
"loss": 1.1981, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11557788944723618, |
|
"grad_norm": 0.3424057960510254, |
|
"learning_rate": 1.9695445332014605e-05, |
|
"loss": 1.1984, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11629576453697056, |
|
"grad_norm": 0.41167309880256653, |
|
"learning_rate": 1.969154395903183e-05, |
|
"loss": 1.2147, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.11701363962670495, |
|
"grad_norm": 0.2450089007616043, |
|
"learning_rate": 1.968761855147568e-05, |
|
"loss": 1.1662, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11773151471643933, |
|
"grad_norm": 0.39046624302864075, |
|
"learning_rate": 1.9683669129455838e-05, |
|
"loss": 1.1795, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11844938980617373, |
|
"grad_norm": 0.28102466464042664, |
|
"learning_rate": 1.967969571320502e-05, |
|
"loss": 1.1091, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1191672648959081, |
|
"grad_norm": 0.5738700032234192, |
|
"learning_rate": 1.9675698323078864e-05, |
|
"loss": 1.3535, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1198851399856425, |
|
"grad_norm": 0.31552526354789734, |
|
"learning_rate": 1.9671676979555827e-05, |
|
"loss": 1.1579, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12060301507537688, |
|
"grad_norm": 0.280458003282547, |
|
"learning_rate": 1.9667631703237073e-05, |
|
"loss": 1.1366, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12132089016511127, |
|
"grad_norm": 0.3525103032588959, |
|
"learning_rate": 1.9663562514846367e-05, |
|
"loss": 1.1681, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12203876525484565, |
|
"grad_norm": 0.24137817323207855, |
|
"learning_rate": 1.9659469435229993e-05, |
|
"loss": 1.1573, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12275664034458005, |
|
"grad_norm": 0.5218951106071472, |
|
"learning_rate": 1.9655352485356615e-05, |
|
"loss": 1.2824, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12347451543431442, |
|
"grad_norm": 2.0925486087799072, |
|
"learning_rate": 1.965121168631718e-05, |
|
"loss": 1.4668, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12419239052404882, |
|
"grad_norm": 0.40912044048309326, |
|
"learning_rate": 1.9647047059324828e-05, |
|
"loss": 1.3401, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1249102656137832, |
|
"grad_norm": 0.44498252868652344, |
|
"learning_rate": 1.9642858625714754e-05, |
|
"loss": 1.2355, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.12562814070351758, |
|
"grad_norm": 0.47769415378570557, |
|
"learning_rate": 1.9638646406944123e-05, |
|
"loss": 1.2284, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12634601579325197, |
|
"grad_norm": 0.26920589804649353, |
|
"learning_rate": 1.963441042459194e-05, |
|
"loss": 1.0832, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.12706389088298636, |
|
"grad_norm": 0.23602105677127838, |
|
"learning_rate": 1.963015070035897e-05, |
|
"loss": 1.1441, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.12778176597272076, |
|
"grad_norm": 0.31674906611442566, |
|
"learning_rate": 1.9625867256067578e-05, |
|
"loss": 1.1578, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.12849964106245512, |
|
"grad_norm": 0.26973602175712585, |
|
"learning_rate": 1.962156011366167e-05, |
|
"loss": 1.0848, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.12921751615218952, |
|
"grad_norm": 0.3377930223941803, |
|
"learning_rate": 1.961722929520654e-05, |
|
"loss": 1.253, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1299353912419239, |
|
"grad_norm": 0.5862683653831482, |
|
"learning_rate": 1.9612874822888787e-05, |
|
"loss": 1.3045, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1306532663316583, |
|
"grad_norm": 2.053889036178589, |
|
"learning_rate": 1.960849671901618e-05, |
|
"loss": 1.2307, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13137114142139267, |
|
"grad_norm": 0.3686397671699524, |
|
"learning_rate": 1.9604095006017546e-05, |
|
"loss": 1.1462, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13208901651112706, |
|
"grad_norm": 0.2870405912399292, |
|
"learning_rate": 1.9599669706442676e-05, |
|
"loss": 1.1773, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13280689160086145, |
|
"grad_norm": 0.4055803120136261, |
|
"learning_rate": 1.9595220842962178e-05, |
|
"loss": 1.1849, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.13352476669059585, |
|
"grad_norm": 0.3482927978038788, |
|
"learning_rate": 1.959074843836739e-05, |
|
"loss": 1.1565, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1342426417803302, |
|
"grad_norm": 0.348791241645813, |
|
"learning_rate": 1.958625251557024e-05, |
|
"loss": 1.2002, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1349605168700646, |
|
"grad_norm": 0.6808797121047974, |
|
"learning_rate": 1.9581733097603145e-05, |
|
"loss": 1.3044, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.135678391959799, |
|
"grad_norm": 0.2962851822376251, |
|
"learning_rate": 1.9577190207618884e-05, |
|
"loss": 1.1848, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1363962670495334, |
|
"grad_norm": 0.2507338523864746, |
|
"learning_rate": 1.9572623868890483e-05, |
|
"loss": 1.2179, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13711414213926776, |
|
"grad_norm": 0.3296150863170624, |
|
"learning_rate": 1.956803410481109e-05, |
|
"loss": 1.1572, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13783201722900215, |
|
"grad_norm": 0.40111851692199707, |
|
"learning_rate": 1.9563420938893875e-05, |
|
"loss": 1.156, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13854989231873654, |
|
"grad_norm": 0.6545300483703613, |
|
"learning_rate": 1.955878439477187e-05, |
|
"loss": 1.1174, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.13926776740847094, |
|
"grad_norm": 0.3253189027309418, |
|
"learning_rate": 1.95541244961979e-05, |
|
"loss": 1.1701, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1399856424982053, |
|
"grad_norm": 0.2571970522403717, |
|
"learning_rate": 1.954944126704441e-05, |
|
"loss": 1.1431, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1407035175879397, |
|
"grad_norm": 0.2755349278450012, |
|
"learning_rate": 1.9544734731303384e-05, |
|
"loss": 1.0937, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1414213926776741, |
|
"grad_norm": 0.45541664958000183, |
|
"learning_rate": 1.9540004913086196e-05, |
|
"loss": 1.1376, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.14213926776740848, |
|
"grad_norm": 0.3663983941078186, |
|
"learning_rate": 1.9535251836623493e-05, |
|
"loss": 1.1753, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.2416490763425827, |
|
"learning_rate": 1.953047552626508e-05, |
|
"loss": 1.1567, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.14357501794687724, |
|
"grad_norm": 0.3104878067970276, |
|
"learning_rate": 1.9525676006479785e-05, |
|
"loss": 1.1494, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14429289303661164, |
|
"grad_norm": 0.5359750390052795, |
|
"learning_rate": 1.9520853301855335e-05, |
|
"loss": 1.2076, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.14501076812634603, |
|
"grad_norm": 0.2803167700767517, |
|
"learning_rate": 1.951600743709824e-05, |
|
"loss": 1.1267, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1457286432160804, |
|
"grad_norm": 0.2752203643321991, |
|
"learning_rate": 1.951113843703364e-05, |
|
"loss": 1.1385, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1464465183058148, |
|
"grad_norm": 0.7712585926055908, |
|
"learning_rate": 1.950624632660522e-05, |
|
"loss": 1.3345, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14716439339554918, |
|
"grad_norm": 0.4798975884914398, |
|
"learning_rate": 1.950133113087504e-05, |
|
"loss": 1.2706, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14788226848528357, |
|
"grad_norm": 0.5709019303321838, |
|
"learning_rate": 1.9496392875023432e-05, |
|
"loss": 1.1709, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.14860014357501794, |
|
"grad_norm": 0.449605256319046, |
|
"learning_rate": 1.9491431584348866e-05, |
|
"loss": 1.1443, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.14931801866475233, |
|
"grad_norm": 0.42168155312538147, |
|
"learning_rate": 1.9486447284267817e-05, |
|
"loss": 1.1603, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.15003589375448673, |
|
"grad_norm": 0.3384092450141907, |
|
"learning_rate": 1.948144000031463e-05, |
|
"loss": 1.1259, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1507537688442211, |
|
"grad_norm": 0.26280686259269714, |
|
"learning_rate": 1.9476409758141407e-05, |
|
"loss": 1.1936, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15147164393395549, |
|
"grad_norm": 0.39539268612861633, |
|
"learning_rate": 1.947135658351785e-05, |
|
"loss": 1.2688, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.15218951902368988, |
|
"grad_norm": 0.26841455698013306, |
|
"learning_rate": 1.9466280502331157e-05, |
|
"loss": 1.106, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.15290739411342427, |
|
"grad_norm": 0.3093603849411011, |
|
"learning_rate": 1.9461181540585864e-05, |
|
"loss": 1.1567, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.15362526920315864, |
|
"grad_norm": 0.2611793875694275, |
|
"learning_rate": 1.945605972440373e-05, |
|
"loss": 1.142, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.15434314429289303, |
|
"grad_norm": 0.24437759816646576, |
|
"learning_rate": 1.9450915080023595e-05, |
|
"loss": 1.1771, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.15506101938262742, |
|
"grad_norm": 0.6352970600128174, |
|
"learning_rate": 1.9445747633801244e-05, |
|
"loss": 1.2487, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.15577889447236182, |
|
"grad_norm": 0.2860240638256073, |
|
"learning_rate": 1.9440557412209276e-05, |
|
"loss": 1.1032, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.15649676956209618, |
|
"grad_norm": 0.2164093255996704, |
|
"learning_rate": 1.943534444183697e-05, |
|
"loss": 1.1733, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15721464465183058, |
|
"grad_norm": 0.35152971744537354, |
|
"learning_rate": 1.9430108749390144e-05, |
|
"loss": 1.1196, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.15793251974156497, |
|
"grad_norm": 0.30813077092170715, |
|
"learning_rate": 1.9424850361691018e-05, |
|
"loss": 1.1169, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15865039483129936, |
|
"grad_norm": 35.50032424926758, |
|
"learning_rate": 1.9419569305678085e-05, |
|
"loss": 1.6415, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.15936826992103373, |
|
"grad_norm": 0.662775993347168, |
|
"learning_rate": 1.9414265608405957e-05, |
|
"loss": 1.103, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.16008614501076812, |
|
"grad_norm": 0.3208405375480652, |
|
"learning_rate": 1.940893929704525e-05, |
|
"loss": 1.1864, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.16080402010050251, |
|
"grad_norm": 2.3010807037353516, |
|
"learning_rate": 1.9403590398882412e-05, |
|
"loss": 1.3652, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1615218951902369, |
|
"grad_norm": 0.46707963943481445, |
|
"learning_rate": 1.9398218941319623e-05, |
|
"loss": 1.1541, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.16223977027997127, |
|
"grad_norm": 0.44534140825271606, |
|
"learning_rate": 1.9392824951874617e-05, |
|
"loss": 1.124, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.16295764536970567, |
|
"grad_norm": 0.45970529317855835, |
|
"learning_rate": 1.938740845818057e-05, |
|
"loss": 1.0864, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.16367552045944006, |
|
"grad_norm": 0.2933219075202942, |
|
"learning_rate": 1.9381969487985937e-05, |
|
"loss": 1.1468, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16439339554917445, |
|
"grad_norm": 0.34940993785858154, |
|
"learning_rate": 1.9376508069154326e-05, |
|
"loss": 1.1782, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.16511127063890882, |
|
"grad_norm": 0.2643601596355438, |
|
"learning_rate": 1.9371024229664344e-05, |
|
"loss": 1.1619, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1658291457286432, |
|
"grad_norm": 0.5773788690567017, |
|
"learning_rate": 1.9365517997609458e-05, |
|
"loss": 1.3177, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.1665470208183776, |
|
"grad_norm": 0.2795402705669403, |
|
"learning_rate": 1.9359989401197854e-05, |
|
"loss": 1.2282, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.167264895908112, |
|
"grad_norm": 1.007430076599121, |
|
"learning_rate": 1.935443846875229e-05, |
|
"loss": 1.5002, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.16798277099784636, |
|
"grad_norm": 0.35091131925582886, |
|
"learning_rate": 1.934886522870995e-05, |
|
"loss": 1.1605, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.16870064608758076, |
|
"grad_norm": 0.41040295362472534, |
|
"learning_rate": 1.934326970962229e-05, |
|
"loss": 1.1619, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.16941852117731515, |
|
"grad_norm": 0.24819721281528473, |
|
"learning_rate": 1.9337651940154915e-05, |
|
"loss": 1.152, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17013639626704954, |
|
"grad_norm": 0.31907832622528076, |
|
"learning_rate": 1.933201194908741e-05, |
|
"loss": 1.1385, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1708542713567839, |
|
"grad_norm": 0.5197360515594482, |
|
"learning_rate": 1.9326349765313203e-05, |
|
"loss": 1.1882, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.1715721464465183, |
|
"grad_norm": 0.526611328125, |
|
"learning_rate": 1.9320665417839405e-05, |
|
"loss": 1.1695, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1722900215362527, |
|
"grad_norm": 0.4685799479484558, |
|
"learning_rate": 1.9314958935786685e-05, |
|
"loss": 1.2111, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1730078966259871, |
|
"grad_norm": 0.3952530026435852, |
|
"learning_rate": 1.9309230348389097e-05, |
|
"loss": 1.1435, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.17372577171572146, |
|
"grad_norm": 0.7753391861915588, |
|
"learning_rate": 1.9303479684993944e-05, |
|
"loss": 1.3008, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.17444364680545585, |
|
"grad_norm": 0.2788830101490021, |
|
"learning_rate": 1.9297706975061618e-05, |
|
"loss": 1.1781, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.17516152189519024, |
|
"grad_norm": 0.6017007827758789, |
|
"learning_rate": 1.929191224816546e-05, |
|
"loss": 1.2131, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.17587939698492464, |
|
"grad_norm": 0.324994832277298, |
|
"learning_rate": 1.9286095533991608e-05, |
|
"loss": 1.1453, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.176597272074659, |
|
"grad_norm": 0.5178776979446411, |
|
"learning_rate": 1.928025686233882e-05, |
|
"loss": 1.1078, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.1773151471643934, |
|
"grad_norm": 0.371931254863739, |
|
"learning_rate": 1.9274396263118366e-05, |
|
"loss": 1.1255, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1780330222541278, |
|
"grad_norm": 0.38106516003608704, |
|
"learning_rate": 1.926851376635383e-05, |
|
"loss": 1.1887, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.17875089734386218, |
|
"grad_norm": 0.35742947459220886, |
|
"learning_rate": 1.926260940218099e-05, |
|
"loss": 1.1045, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.17946877243359655, |
|
"grad_norm": 0.5332513451576233, |
|
"learning_rate": 1.9256683200847638e-05, |
|
"loss": 1.2387, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18018664752333094, |
|
"grad_norm": 0.24585315585136414, |
|
"learning_rate": 1.9250735192713447e-05, |
|
"loss": 1.0728, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.18090452261306533, |
|
"grad_norm": 0.4742366075515747, |
|
"learning_rate": 1.92447654082498e-05, |
|
"loss": 1.2066, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.18162239770279973, |
|
"grad_norm": 0.2356850951910019, |
|
"learning_rate": 1.9238773878039638e-05, |
|
"loss": 1.0949, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.1823402727925341, |
|
"grad_norm": 0.24765734374523163, |
|
"learning_rate": 1.9232760632777312e-05, |
|
"loss": 1.1209, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.18305814788226848, |
|
"grad_norm": 0.4552963674068451, |
|
"learning_rate": 1.922672570326841e-05, |
|
"loss": 1.1351, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.18377602297200288, |
|
"grad_norm": 0.28184211254119873, |
|
"learning_rate": 1.922066912042961e-05, |
|
"loss": 1.1333, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.18449389806173727, |
|
"grad_norm": 0.32718122005462646, |
|
"learning_rate": 1.921459091528852e-05, |
|
"loss": 1.2047, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.18521177315147164, |
|
"grad_norm": 0.4896772503852844, |
|
"learning_rate": 1.9208491118983517e-05, |
|
"loss": 1.2929, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.18592964824120603, |
|
"grad_norm": 0.6823757886886597, |
|
"learning_rate": 1.9202369762763587e-05, |
|
"loss": 1.3192, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18664752333094042, |
|
"grad_norm": 0.28378769755363464, |
|
"learning_rate": 1.9196226877988175e-05, |
|
"loss": 1.1805, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1873653984206748, |
|
"grad_norm": 0.43191203474998474, |
|
"learning_rate": 1.9190062496127008e-05, |
|
"loss": 1.0961, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.18808327351040918, |
|
"grad_norm": 0.2138504683971405, |
|
"learning_rate": 1.9183876648759938e-05, |
|
"loss": 1.1073, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.18880114860014358, |
|
"grad_norm": 0.25294116139411926, |
|
"learning_rate": 1.9177669367576794e-05, |
|
"loss": 1.108, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.18951902368987797, |
|
"grad_norm": 0.2560763955116272, |
|
"learning_rate": 1.9171440684377204e-05, |
|
"loss": 1.1339, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.19023689877961233, |
|
"grad_norm": 0.2744760811328888, |
|
"learning_rate": 1.9165190631070435e-05, |
|
"loss": 1.2059, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.19095477386934673, |
|
"grad_norm": 0.2920529842376709, |
|
"learning_rate": 1.915891923967524e-05, |
|
"loss": 1.1597, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.19167264895908112, |
|
"grad_norm": 0.4311828315258026, |
|
"learning_rate": 1.9152626542319673e-05, |
|
"loss": 1.2409, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.19239052404881551, |
|
"grad_norm": 0.29880067706108093, |
|
"learning_rate": 1.9146312571240954e-05, |
|
"loss": 1.1518, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.19310839913854988, |
|
"grad_norm": 0.4842053949832916, |
|
"learning_rate": 1.9139977358785277e-05, |
|
"loss": 1.2759, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.19382627422828427, |
|
"grad_norm": 0.3157087564468384, |
|
"learning_rate": 1.9133620937407657e-05, |
|
"loss": 1.0769, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19454414931801867, |
|
"grad_norm": 0.3534604012966156, |
|
"learning_rate": 1.912724333967176e-05, |
|
"loss": 1.1963, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.19526202440775306, |
|
"grad_norm": 0.4059816598892212, |
|
"learning_rate": 1.912084459824974e-05, |
|
"loss": 1.2315, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.19597989949748743, |
|
"grad_norm": 0.521371066570282, |
|
"learning_rate": 1.9114424745922065e-05, |
|
"loss": 1.1831, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.19669777458722182, |
|
"grad_norm": 2.541219472885132, |
|
"learning_rate": 1.910798381557736e-05, |
|
"loss": 1.4695, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.1974156496769562, |
|
"grad_norm": 0.3894226551055908, |
|
"learning_rate": 1.9101521840212225e-05, |
|
"loss": 1.1516, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1981335247666906, |
|
"grad_norm": 0.3359840512275696, |
|
"learning_rate": 1.9095038852931078e-05, |
|
"loss": 1.124, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.19885139985642497, |
|
"grad_norm": 0.336245059967041, |
|
"learning_rate": 1.9088534886945978e-05, |
|
"loss": 1.1803, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.19956927494615936, |
|
"grad_norm": 0.2564994990825653, |
|
"learning_rate": 1.908200997557645e-05, |
|
"loss": 1.0962, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.20028715003589376, |
|
"grad_norm": 0.26010560989379883, |
|
"learning_rate": 1.907546415224934e-05, |
|
"loss": 1.126, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.20100502512562815, |
|
"grad_norm": 0.6532833576202393, |
|
"learning_rate": 1.90688974504986e-05, |
|
"loss": 1.2128, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20172290021536252, |
|
"grad_norm": 0.3625889718532562, |
|
"learning_rate": 1.9062309903965166e-05, |
|
"loss": 1.1788, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2024407753050969, |
|
"grad_norm": 0.6255800127983093, |
|
"learning_rate": 1.905570154639674e-05, |
|
"loss": 1.328, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2031586503948313, |
|
"grad_norm": 0.28508058190345764, |
|
"learning_rate": 1.9049072411647652e-05, |
|
"loss": 1.147, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2038765254845657, |
|
"grad_norm": 2.5196280479431152, |
|
"learning_rate": 1.9042422533678668e-05, |
|
"loss": 1.3916, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.20459440057430006, |
|
"grad_norm": 0.6198713183403015, |
|
"learning_rate": 1.903575194655682e-05, |
|
"loss": 1.2978, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.20531227566403445, |
|
"grad_norm": 0.37188202142715454, |
|
"learning_rate": 1.902906068445523e-05, |
|
"loss": 1.165, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.20603015075376885, |
|
"grad_norm": 0.24965988099575043, |
|
"learning_rate": 1.902234878165294e-05, |
|
"loss": 1.1278, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.20674802584350324, |
|
"grad_norm": 0.7749665379524231, |
|
"learning_rate": 1.9015616272534733e-05, |
|
"loss": 1.2764, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2074659009332376, |
|
"grad_norm": 0.3034153878688812, |
|
"learning_rate": 1.9008863191590964e-05, |
|
"loss": 1.105, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.208183776022972, |
|
"grad_norm": 0.33309271931648254, |
|
"learning_rate": 1.9002089573417357e-05, |
|
"loss": 1.1371, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2089016511127064, |
|
"grad_norm": 0.30133146047592163, |
|
"learning_rate": 1.899529545271487e-05, |
|
"loss": 1.1086, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2096195262024408, |
|
"grad_norm": 0.4333893954753876, |
|
"learning_rate": 1.8988480864289483e-05, |
|
"loss": 1.1897, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.21033740129217515, |
|
"grad_norm": 0.31405553221702576, |
|
"learning_rate": 1.898164584305203e-05, |
|
"loss": 1.1297, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.21105527638190955, |
|
"grad_norm": 0.24001596868038177, |
|
"learning_rate": 1.8974790424018025e-05, |
|
"loss": 1.1327, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.21177315147164394, |
|
"grad_norm": 0.3047102987766266, |
|
"learning_rate": 1.8967914642307476e-05, |
|
"loss": 1.1492, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.21249102656137833, |
|
"grad_norm": 0.3045389652252197, |
|
"learning_rate": 1.896101853314472e-05, |
|
"loss": 1.1083, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2132089016511127, |
|
"grad_norm": 0.3781316578388214, |
|
"learning_rate": 1.8954102131858206e-05, |
|
"loss": 1.157, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2139267767408471, |
|
"grad_norm": 0.38882461190223694, |
|
"learning_rate": 1.8947165473880363e-05, |
|
"loss": 1.1802, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.21464465183058148, |
|
"grad_norm": 0.5264966487884521, |
|
"learning_rate": 1.8940208594747386e-05, |
|
"loss": 1.0983, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.21536252692031588, |
|
"grad_norm": 0.4941420555114746, |
|
"learning_rate": 1.8933231530099058e-05, |
|
"loss": 1.2014, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21608040201005024, |
|
"grad_norm": 0.2968602180480957, |
|
"learning_rate": 1.8926234315678576e-05, |
|
"loss": 1.1773, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.21679827709978464, |
|
"grad_norm": 0.36839526891708374, |
|
"learning_rate": 1.8919216987332358e-05, |
|
"loss": 1.194, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.21751615218951903, |
|
"grad_norm": 0.27106499671936035, |
|
"learning_rate": 1.891217958100987e-05, |
|
"loss": 1.1406, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.21823402727925342, |
|
"grad_norm": 0.4870721995830536, |
|
"learning_rate": 1.890512213276344e-05, |
|
"loss": 1.1759, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.2189519023689878, |
|
"grad_norm": 3.0273375511169434, |
|
"learning_rate": 1.8898044678748054e-05, |
|
"loss": 1.0953, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.21966977745872218, |
|
"grad_norm": 0.5378653407096863, |
|
"learning_rate": 1.889094725522121e-05, |
|
"loss": 1.2847, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.22038765254845658, |
|
"grad_norm": 0.26293638348579407, |
|
"learning_rate": 1.888382989854269e-05, |
|
"loss": 1.118, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.22110552763819097, |
|
"grad_norm": 0.3193033039569855, |
|
"learning_rate": 1.8876692645174398e-05, |
|
"loss": 1.1277, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.22182340272792533, |
|
"grad_norm": 0.47098249197006226, |
|
"learning_rate": 1.8869535531680177e-05, |
|
"loss": 1.2661, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.22254127781765973, |
|
"grad_norm": 0.45273101329803467, |
|
"learning_rate": 1.8862358594725596e-05, |
|
"loss": 1.1281, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22325915290739412, |
|
"grad_norm": 0.27840176224708557, |
|
"learning_rate": 1.8855161871077792e-05, |
|
"loss": 1.1138, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.22397702799712849, |
|
"grad_norm": 0.25028085708618164, |
|
"learning_rate": 1.884794539760526e-05, |
|
"loss": 1.0966, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.22469490308686288, |
|
"grad_norm": 0.33616170287132263, |
|
"learning_rate": 1.884070921127768e-05, |
|
"loss": 1.1496, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.22541277817659727, |
|
"grad_norm": 0.34810954332351685, |
|
"learning_rate": 1.8833453349165714e-05, |
|
"loss": 1.1693, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.22613065326633167, |
|
"grad_norm": 0.3485371768474579, |
|
"learning_rate": 1.8826177848440828e-05, |
|
"loss": 1.1069, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.22684852835606603, |
|
"grad_norm": 0.3029579222202301, |
|
"learning_rate": 1.8818882746375087e-05, |
|
"loss": 1.0823, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.22756640344580042, |
|
"grad_norm": 0.24140548706054688, |
|
"learning_rate": 1.8811568080340984e-05, |
|
"loss": 1.1347, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.22828427853553482, |
|
"grad_norm": 0.25040584802627563, |
|
"learning_rate": 1.8804233887811226e-05, |
|
"loss": 1.1236, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2290021536252692, |
|
"grad_norm": 0.6614571213722229, |
|
"learning_rate": 1.8796880206358563e-05, |
|
"loss": 1.3318, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.22972002871500358, |
|
"grad_norm": 0.2212487757205963, |
|
"learning_rate": 1.8789507073655576e-05, |
|
"loss": 1.1516, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23043790380473797, |
|
"grad_norm": 0.24655699729919434, |
|
"learning_rate": 1.8782114527474504e-05, |
|
"loss": 1.1223, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.23115577889447236, |
|
"grad_norm": 0.23814330995082855, |
|
"learning_rate": 1.8774702605687037e-05, |
|
"loss": 1.1611, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.23187365398420676, |
|
"grad_norm": 0.3251168727874756, |
|
"learning_rate": 1.876727134626412e-05, |
|
"loss": 1.1245, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.23259152907394112, |
|
"grad_norm": 0.24903184175491333, |
|
"learning_rate": 1.8759820787275777e-05, |
|
"loss": 1.0912, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.23330940416367552, |
|
"grad_norm": 0.29283952713012695, |
|
"learning_rate": 1.875235096689088e-05, |
|
"loss": 1.1674, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2340272792534099, |
|
"grad_norm": 0.24461185932159424, |
|
"learning_rate": 1.8744861923377003e-05, |
|
"loss": 1.1241, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2347451543431443, |
|
"grad_norm": 4.819317817687988, |
|
"learning_rate": 1.8737353695100183e-05, |
|
"loss": 1.1464, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.23546302943287867, |
|
"grad_norm": 0.7460237741470337, |
|
"learning_rate": 1.8729826320524737e-05, |
|
"loss": 1.4698, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.23618090452261306, |
|
"grad_norm": 0.3449172377586365, |
|
"learning_rate": 1.8722279838213082e-05, |
|
"loss": 1.1501, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.23689877961234745, |
|
"grad_norm": 0.30681246519088745, |
|
"learning_rate": 1.8714714286825512e-05, |
|
"loss": 1.091, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23761665470208185, |
|
"grad_norm": 0.4400181770324707, |
|
"learning_rate": 1.8707129705120012e-05, |
|
"loss": 1.1017, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2383345297918162, |
|
"grad_norm": 0.5187708735466003, |
|
"learning_rate": 1.8699526131952067e-05, |
|
"loss": 1.2389, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2390524048815506, |
|
"grad_norm": 0.44073569774627686, |
|
"learning_rate": 1.869190360627444e-05, |
|
"loss": 1.1137, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.239770279971285, |
|
"grad_norm": 0.3202749490737915, |
|
"learning_rate": 1.8684262167136998e-05, |
|
"loss": 1.1693, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2404881550610194, |
|
"grad_norm": 0.2710592448711395, |
|
"learning_rate": 1.8676601853686502e-05, |
|
"loss": 1.1118, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.24120603015075376, |
|
"grad_norm": 0.2804523706436157, |
|
"learning_rate": 1.866892270516639e-05, |
|
"loss": 1.0901, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.24192390524048815, |
|
"grad_norm": 0.2933681905269623, |
|
"learning_rate": 1.8661224760916618e-05, |
|
"loss": 1.1076, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.24264178033022255, |
|
"grad_norm": 0.29812660813331604, |
|
"learning_rate": 1.86535080603734e-05, |
|
"loss": 1.0992, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.24335965541995694, |
|
"grad_norm": 0.42179393768310547, |
|
"learning_rate": 1.8645772643069064e-05, |
|
"loss": 1.1911, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.2440775305096913, |
|
"grad_norm": 0.3564510941505432, |
|
"learning_rate": 1.8638018548631808e-05, |
|
"loss": 1.1381, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2447954055994257, |
|
"grad_norm": 0.45639586448669434, |
|
"learning_rate": 1.8630245816785516e-05, |
|
"loss": 1.2329, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2455132806891601, |
|
"grad_norm": 0.28465136885643005, |
|
"learning_rate": 1.862245448734956e-05, |
|
"loss": 1.1781, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.24623115577889448, |
|
"grad_norm": 0.2306850552558899, |
|
"learning_rate": 1.861464460023856e-05, |
|
"loss": 1.1508, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.24694903086862885, |
|
"grad_norm": 0.6161120533943176, |
|
"learning_rate": 1.8606816195462244e-05, |
|
"loss": 1.2499, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.24766690595836324, |
|
"grad_norm": 0.25378888845443726, |
|
"learning_rate": 1.8598969313125175e-05, |
|
"loss": 1.1348, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.24838478104809764, |
|
"grad_norm": 0.7147504687309265, |
|
"learning_rate": 1.859110399342659e-05, |
|
"loss": 1.0882, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.24910265613783203, |
|
"grad_norm": 1.6163944005966187, |
|
"learning_rate": 1.858322027666017e-05, |
|
"loss": 1.466, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2498205312275664, |
|
"grad_norm": 0.3895832598209381, |
|
"learning_rate": 1.8575318203213857e-05, |
|
"loss": 1.1098, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2505384063173008, |
|
"grad_norm": 0.3162965476512909, |
|
"learning_rate": 1.856739781356962e-05, |
|
"loss": 1.1079, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.25125628140703515, |
|
"grad_norm": 0.31275758147239685, |
|
"learning_rate": 1.855945914830327e-05, |
|
"loss": 1.1573, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2519741564967696, |
|
"grad_norm": 0.5552882552146912, |
|
"learning_rate": 1.8551502248084236e-05, |
|
"loss": 1.2549, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.25269203158650394, |
|
"grad_norm": 0.3312999904155731, |
|
"learning_rate": 1.8543527153675375e-05, |
|
"loss": 1.1306, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.25340990667623836, |
|
"grad_norm": 0.30454468727111816, |
|
"learning_rate": 1.8535533905932738e-05, |
|
"loss": 1.1809, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.2541277817659727, |
|
"grad_norm": 0.3623380959033966, |
|
"learning_rate": 1.8527522545805387e-05, |
|
"loss": 1.123, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2548456568557071, |
|
"grad_norm": 0.3103772699832916, |
|
"learning_rate": 1.8519493114335162e-05, |
|
"loss": 1.1182, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2555635319454415, |
|
"grad_norm": 0.2671801447868347, |
|
"learning_rate": 1.8511445652656494e-05, |
|
"loss": 1.1652, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2562814070351759, |
|
"grad_norm": 0.38310304284095764, |
|
"learning_rate": 1.850338020199617e-05, |
|
"loss": 1.1408, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.25699928212491024, |
|
"grad_norm": 0.3919181823730469, |
|
"learning_rate": 1.849529680367314e-05, |
|
"loss": 1.0756, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.25771715721464467, |
|
"grad_norm": 0.29616478085517883, |
|
"learning_rate": 1.84871954990983e-05, |
|
"loss": 1.1288, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.25843503230437903, |
|
"grad_norm": 0.32790932059288025, |
|
"learning_rate": 1.8479076329774275e-05, |
|
"loss": 1.0784, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.25915290739411345, |
|
"grad_norm": 0.43394553661346436, |
|
"learning_rate": 1.8470939337295214e-05, |
|
"loss": 1.1948, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2598707824838478, |
|
"grad_norm": 0.2332722693681717, |
|
"learning_rate": 1.8462784563346568e-05, |
|
"loss": 1.0912, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.2605886575735822, |
|
"grad_norm": 0.2292940765619278, |
|
"learning_rate": 1.845461204970489e-05, |
|
"loss": 1.1111, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2613065326633166, |
|
"grad_norm": 0.4375426769256592, |
|
"learning_rate": 1.8446421838237605e-05, |
|
"loss": 1.1876, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.26202440775305097, |
|
"grad_norm": 0.23707646131515503, |
|
"learning_rate": 1.8438213970902813e-05, |
|
"loss": 1.1889, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.26274228284278534, |
|
"grad_norm": 0.36057260632514954, |
|
"learning_rate": 1.8429988489749048e-05, |
|
"loss": 1.1589, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.26346015793251976, |
|
"grad_norm": 0.33151188492774963, |
|
"learning_rate": 1.842174543691509e-05, |
|
"loss": 1.1056, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2641780330222541, |
|
"grad_norm": 0.45307648181915283, |
|
"learning_rate": 1.841348485462974e-05, |
|
"loss": 1.1998, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.2648959081119885, |
|
"grad_norm": 5.533525466918945, |
|
"learning_rate": 1.8405206785211595e-05, |
|
"loss": 1.1409, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2656137832017229, |
|
"grad_norm": 0.3712926506996155, |
|
"learning_rate": 1.8396911271068843e-05, |
|
"loss": 1.0697, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2663316582914573, |
|
"grad_norm": 0.41767457127571106, |
|
"learning_rate": 1.8388598354699034e-05, |
|
"loss": 1.1379, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2670495333811917, |
|
"grad_norm": 0.42338940501213074, |
|
"learning_rate": 1.838026807868888e-05, |
|
"loss": 1.1044, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.26776740847092606, |
|
"grad_norm": 9.821000099182129, |
|
"learning_rate": 1.837192048571401e-05, |
|
"loss": 1.1888, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2684852835606604, |
|
"grad_norm": 0.2834051549434662, |
|
"learning_rate": 1.836355561853878e-05, |
|
"loss": 1.135, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.26920315865039485, |
|
"grad_norm": 0.44755667448043823, |
|
"learning_rate": 1.835517352001604e-05, |
|
"loss": 1.233, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2699210337401292, |
|
"grad_norm": 0.2938573658466339, |
|
"learning_rate": 1.8346774233086907e-05, |
|
"loss": 1.1495, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2706389088298636, |
|
"grad_norm": 0.41367679834365845, |
|
"learning_rate": 1.833835780078056e-05, |
|
"loss": 1.2121, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.271356783919598, |
|
"grad_norm": 0.2853800654411316, |
|
"learning_rate": 1.8329924266214013e-05, |
|
"loss": 1.1065, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.27207465900933236, |
|
"grad_norm": 0.5363768339157104, |
|
"learning_rate": 1.832147367259189e-05, |
|
"loss": 1.1133, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2727925340990668, |
|
"grad_norm": 0.22328981757164001, |
|
"learning_rate": 1.831300606320621e-05, |
|
"loss": 1.0409, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.27351040918880115, |
|
"grad_norm": 0.48021677136421204, |
|
"learning_rate": 1.8304521481436168e-05, |
|
"loss": 1.2496, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2742282842785355, |
|
"grad_norm": 0.2657185196876526, |
|
"learning_rate": 1.8296019970747904e-05, |
|
"loss": 1.1268, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.27494615936826994, |
|
"grad_norm": 0.3658749759197235, |
|
"learning_rate": 1.8287501574694274e-05, |
|
"loss": 1.1838, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.2756640344580043, |
|
"grad_norm": 0.29449597001075745, |
|
"learning_rate": 1.8278966336914655e-05, |
|
"loss": 1.1806, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.27638190954773867, |
|
"grad_norm": 0.7698999643325806, |
|
"learning_rate": 1.8270414301134696e-05, |
|
"loss": 1.2385, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.2770997846374731, |
|
"grad_norm": 0.3127557933330536, |
|
"learning_rate": 1.8261845511166093e-05, |
|
"loss": 1.0942, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.27781765972720746, |
|
"grad_norm": 0.2682400047779083, |
|
"learning_rate": 1.8253260010906383e-05, |
|
"loss": 1.1652, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2785355348169419, |
|
"grad_norm": 0.4573856592178345, |
|
"learning_rate": 1.824465784433871e-05, |
|
"loss": 1.2724, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.27925340990667624, |
|
"grad_norm": 0.30964091420173645, |
|
"learning_rate": 1.8236039055531588e-05, |
|
"loss": 1.0837, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.2799712849964106, |
|
"grad_norm": 0.5770804286003113, |
|
"learning_rate": 1.82274036886387e-05, |
|
"loss": 1.3268, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.28068916008614503, |
|
"grad_norm": 0.23842279613018036, |
|
"learning_rate": 1.8218751787898648e-05, |
|
"loss": 1.086, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.2814070351758794, |
|
"grad_norm": 0.21970561146736145, |
|
"learning_rate": 1.821008339763474e-05, |
|
"loss": 1.1039, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.28212491026561376, |
|
"grad_norm": 0.5542939305305481, |
|
"learning_rate": 1.8201398562254754e-05, |
|
"loss": 1.2382, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2828427853553482, |
|
"grad_norm": 0.36761996150016785, |
|
"learning_rate": 1.8192697326250723e-05, |
|
"loss": 1.2037, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.28356066044508255, |
|
"grad_norm": 0.23678098618984222, |
|
"learning_rate": 1.81839797341987e-05, |
|
"loss": 1.0795, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.28427853553481697, |
|
"grad_norm": 0.2703116238117218, |
|
"learning_rate": 1.8175245830758515e-05, |
|
"loss": 1.0809, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.28499641062455133, |
|
"grad_norm": 0.6142400503158569, |
|
"learning_rate": 1.8166495660673586e-05, |
|
"loss": 1.2617, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.2628495395183563, |
|
"learning_rate": 1.8157729268770635e-05, |
|
"loss": 1.137, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.2864321608040201, |
|
"grad_norm": 0.30924415588378906, |
|
"learning_rate": 1.814894669995951e-05, |
|
"loss": 1.156, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.2871500358937545, |
|
"grad_norm": 0.2843545973300934, |
|
"learning_rate": 1.8140147999232928e-05, |
|
"loss": 1.1346, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28786791098348885, |
|
"grad_norm": 0.7029257416725159, |
|
"learning_rate": 1.8131333211666236e-05, |
|
"loss": 1.2186, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.28858578607322327, |
|
"grad_norm": 0.2657949924468994, |
|
"learning_rate": 1.8122502382417212e-05, |
|
"loss": 1.1465, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.28930366116295764, |
|
"grad_norm": 0.2940109372138977, |
|
"learning_rate": 1.81136555567258e-05, |
|
"loss": 1.1007, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.29002153625269206, |
|
"grad_norm": 0.5118001699447632, |
|
"learning_rate": 1.8104792779913905e-05, |
|
"loss": 1.179, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2907394113424264, |
|
"grad_norm": 0.27588653564453125, |
|
"learning_rate": 1.8095914097385135e-05, |
|
"loss": 1.1536, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2914572864321608, |
|
"grad_norm": 0.2666616141796112, |
|
"learning_rate": 1.8087019554624597e-05, |
|
"loss": 1.1004, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.2921751615218952, |
|
"grad_norm": 0.2696448564529419, |
|
"learning_rate": 1.807810919719864e-05, |
|
"loss": 1.116, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.2928930366116296, |
|
"grad_norm": 0.44319644570350647, |
|
"learning_rate": 1.806918307075463e-05, |
|
"loss": 1.1544, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.29361091170136394, |
|
"grad_norm": 0.2961446940898895, |
|
"learning_rate": 1.8060241221020724e-05, |
|
"loss": 1.1079, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.29432878679109836, |
|
"grad_norm": 0.535943865776062, |
|
"learning_rate": 1.8051283693805627e-05, |
|
"loss": 1.2377, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29504666188083273, |
|
"grad_norm": 0.22256693243980408, |
|
"learning_rate": 1.804231053499835e-05, |
|
"loss": 1.1848, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.29576453697056715, |
|
"grad_norm": 0.22374652326107025, |
|
"learning_rate": 1.8033321790567996e-05, |
|
"loss": 1.1023, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.2964824120603015, |
|
"grad_norm": 0.241849884390831, |
|
"learning_rate": 1.802431750656351e-05, |
|
"loss": 1.081, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2972002871500359, |
|
"grad_norm": 0.21531161665916443, |
|
"learning_rate": 1.8015297729113437e-05, |
|
"loss": 1.1114, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.2979181622397703, |
|
"grad_norm": 0.5768148899078369, |
|
"learning_rate": 1.80062625044257e-05, |
|
"loss": 1.1076, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.29863603732950467, |
|
"grad_norm": 0.9602562189102173, |
|
"learning_rate": 1.7997211878787367e-05, |
|
"loss": 1.2154, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.29935391241923903, |
|
"grad_norm": 0.5085827112197876, |
|
"learning_rate": 1.7988145898564383e-05, |
|
"loss": 1.0853, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.30007178750897345, |
|
"grad_norm": 0.25142329931259155, |
|
"learning_rate": 1.7979064610201373e-05, |
|
"loss": 1.0924, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3007896625987078, |
|
"grad_norm": 0.35126984119415283, |
|
"learning_rate": 1.7969968060221378e-05, |
|
"loss": 1.0696, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3015075376884422, |
|
"grad_norm": 0.3864904046058655, |
|
"learning_rate": 1.7960856295225618e-05, |
|
"loss": 1.1267, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3022254127781766, |
|
"grad_norm": 0.40476828813552856, |
|
"learning_rate": 1.7951729361893274e-05, |
|
"loss": 1.1587, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.30294328786791097, |
|
"grad_norm": 36.076351165771484, |
|
"learning_rate": 1.7942587306981214e-05, |
|
"loss": 1.0542, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3036611629576454, |
|
"grad_norm": 0.2805517911911011, |
|
"learning_rate": 1.7933430177323788e-05, |
|
"loss": 1.1292, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.30437903804737976, |
|
"grad_norm": 0.28951379656791687, |
|
"learning_rate": 1.792425801983257e-05, |
|
"loss": 1.0955, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3050969131371141, |
|
"grad_norm": 0.25895097851753235, |
|
"learning_rate": 1.7915070881496114e-05, |
|
"loss": 1.1066, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.30581478822684854, |
|
"grad_norm": 0.38622137904167175, |
|
"learning_rate": 1.7905868809379737e-05, |
|
"loss": 1.1115, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3065326633165829, |
|
"grad_norm": 0.7420461177825928, |
|
"learning_rate": 1.7896651850625235e-05, |
|
"loss": 1.185, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3072505384063173, |
|
"grad_norm": 0.25646522641181946, |
|
"learning_rate": 1.788742005245069e-05, |
|
"loss": 1.1046, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3079684134960517, |
|
"grad_norm": 0.38760238885879517, |
|
"learning_rate": 1.78781734621502e-05, |
|
"loss": 1.1659, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.30868628858578606, |
|
"grad_norm": 0.3064006567001343, |
|
"learning_rate": 1.786891212709364e-05, |
|
"loss": 1.092, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3094041636755205, |
|
"grad_norm": 0.4092943072319031, |
|
"learning_rate": 1.7859636094726415e-05, |
|
"loss": 1.1406, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.31012203876525485, |
|
"grad_norm": 0.39417821168899536, |
|
"learning_rate": 1.7850345412569237e-05, |
|
"loss": 1.0614, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3108399138549892, |
|
"grad_norm": 0.3639516830444336, |
|
"learning_rate": 1.784104012821786e-05, |
|
"loss": 1.1227, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.31155778894472363, |
|
"grad_norm": 0.2613206207752228, |
|
"learning_rate": 1.7831720289342852e-05, |
|
"loss": 1.0766, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.312275664034458, |
|
"grad_norm": 0.44435498118400574, |
|
"learning_rate": 1.7822385943689333e-05, |
|
"loss": 1.2368, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.31299353912419237, |
|
"grad_norm": 381.1724853515625, |
|
"learning_rate": 1.7813037139076743e-05, |
|
"loss": 1.1427, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3137114142139268, |
|
"grad_norm": 0.36013710498809814, |
|
"learning_rate": 1.7803673923398602e-05, |
|
"loss": 1.1707, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.31442928930366115, |
|
"grad_norm": 0.4431007504463196, |
|
"learning_rate": 1.7794296344622245e-05, |
|
"loss": 1.1485, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3151471643933956, |
|
"grad_norm": 0.31186437606811523, |
|
"learning_rate": 1.7784904450788608e-05, |
|
"loss": 1.1456, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.31586503948312994, |
|
"grad_norm": 0.28195399045944214, |
|
"learning_rate": 1.7775498290011935e-05, |
|
"loss": 1.1428, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3165829145728643, |
|
"grad_norm": 0.28521496057510376, |
|
"learning_rate": 1.7766077910479584e-05, |
|
"loss": 1.0742, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3173007896625987, |
|
"grad_norm": 0.22959643602371216, |
|
"learning_rate": 1.7756643360451743e-05, |
|
"loss": 1.0827, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3180186647523331, |
|
"grad_norm": 0.4172683358192444, |
|
"learning_rate": 1.7747194688261194e-05, |
|
"loss": 1.0979, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.31873653984206746, |
|
"grad_norm": 0.3398095667362213, |
|
"learning_rate": 1.7737731942313077e-05, |
|
"loss": 1.0966, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3194544149318019, |
|
"grad_norm": 0.3017900586128235, |
|
"learning_rate": 1.7728255171084614e-05, |
|
"loss": 1.1515, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.32017229002153624, |
|
"grad_norm": 0.34165719151496887, |
|
"learning_rate": 1.7718764423124892e-05, |
|
"loss": 1.129, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.32089016511127066, |
|
"grad_norm": 0.46820157766342163, |
|
"learning_rate": 1.7709259747054594e-05, |
|
"loss": 1.1185, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.32160804020100503, |
|
"grad_norm": 0.3458547294139862, |
|
"learning_rate": 1.769974119156576e-05, |
|
"loss": 1.1134, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3223259152907394, |
|
"grad_norm": 0.4865923821926117, |
|
"learning_rate": 1.7690208805421526e-05, |
|
"loss": 1.1727, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3230437903804738, |
|
"grad_norm": 0.2898300886154175, |
|
"learning_rate": 1.7680662637455892e-05, |
|
"loss": 1.0716, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3237616654702082, |
|
"grad_norm": 0.32885676622390747, |
|
"learning_rate": 1.7671102736573454e-05, |
|
"loss": 1.0732, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.32447954055994255, |
|
"grad_norm": 0.3227647542953491, |
|
"learning_rate": 1.7661529151749164e-05, |
|
"loss": 1.0888, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.32519741564967697, |
|
"grad_norm": 0.3530004620552063, |
|
"learning_rate": 1.7651941932028077e-05, |
|
"loss": 1.1758, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.32591529073941133, |
|
"grad_norm": 0.3144376277923584, |
|
"learning_rate": 1.76423411265251e-05, |
|
"loss": 1.2014, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.32663316582914576, |
|
"grad_norm": 0.24854792654514313, |
|
"learning_rate": 1.7632726784424733e-05, |
|
"loss": 1.1555, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3273510409188801, |
|
"grad_norm": 0.33343812823295593, |
|
"learning_rate": 1.762309895498083e-05, |
|
"loss": 1.1325, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3280689160086145, |
|
"grad_norm": 0.2514241635799408, |
|
"learning_rate": 1.761345768751634e-05, |
|
"loss": 1.1321, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.3287867910983489, |
|
"grad_norm": 0.2710927724838257, |
|
"learning_rate": 1.760380303142305e-05, |
|
"loss": 1.1312, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3295046661880833, |
|
"grad_norm": 0.2550286650657654, |
|
"learning_rate": 1.759413503616133e-05, |
|
"loss": 1.1223, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.33022254127781764, |
|
"grad_norm": 0.3207646310329437, |
|
"learning_rate": 1.7584453751259913e-05, |
|
"loss": 1.0433, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33094041636755206, |
|
"grad_norm": 0.24946732819080353, |
|
"learning_rate": 1.7574759226315583e-05, |
|
"loss": 1.1089, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3316582914572864, |
|
"grad_norm": 0.3521324694156647, |
|
"learning_rate": 1.7565051510992966e-05, |
|
"loss": 1.1433, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.33237616654702085, |
|
"grad_norm": 0.2754557728767395, |
|
"learning_rate": 1.7555330655024263e-05, |
|
"loss": 1.1118, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3330940416367552, |
|
"grad_norm": 0.23766492307186127, |
|
"learning_rate": 1.7545596708208993e-05, |
|
"loss": 1.083, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3338119167264896, |
|
"grad_norm": 0.34119418263435364, |
|
"learning_rate": 1.7535849720413732e-05, |
|
"loss": 1.1183, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.334529791816224, |
|
"grad_norm": 0.4450787305831909, |
|
"learning_rate": 1.7526089741571878e-05, |
|
"loss": 1.153, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.33524766690595836, |
|
"grad_norm": 0.3177962899208069, |
|
"learning_rate": 1.7516316821683363e-05, |
|
"loss": 1.1197, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.33596554199569273, |
|
"grad_norm": 0.38232341408729553, |
|
"learning_rate": 1.7506531010814436e-05, |
|
"loss": 1.1731, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.33668341708542715, |
|
"grad_norm": 0.24796949326992035, |
|
"learning_rate": 1.749673235909737e-05, |
|
"loss": 1.1165, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3374012921751615, |
|
"grad_norm": 1.157897710800171, |
|
"learning_rate": 1.7486920916730228e-05, |
|
"loss": 1.2673, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3381191672648959, |
|
"grad_norm": 114.63349914550781, |
|
"learning_rate": 1.74770967339766e-05, |
|
"loss": 1.1722, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3388370423546303, |
|
"grad_norm": 0.2927913963794708, |
|
"learning_rate": 1.7467259861165335e-05, |
|
"loss": 1.116, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.33955491744436467, |
|
"grad_norm": 0.2694433033466339, |
|
"learning_rate": 1.7457410348690312e-05, |
|
"loss": 1.1104, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3402727925340991, |
|
"grad_norm": 0.3550788164138794, |
|
"learning_rate": 1.744754824701014e-05, |
|
"loss": 1.1374, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.34099066762383345, |
|
"grad_norm": 0.3682781755924225, |
|
"learning_rate": 1.7437673606647935e-05, |
|
"loss": 1.1573, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3417085427135678, |
|
"grad_norm": 0.6073625683784485, |
|
"learning_rate": 1.7427786478191042e-05, |
|
"loss": 1.2432, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.34242641780330224, |
|
"grad_norm": 0.268215149641037, |
|
"learning_rate": 1.741788691229079e-05, |
|
"loss": 1.1172, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3431442928930366, |
|
"grad_norm": 0.6302129626274109, |
|
"learning_rate": 1.7407974959662223e-05, |
|
"loss": 1.1231, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.34386216798277097, |
|
"grad_norm": 0.31168705224990845, |
|
"learning_rate": 1.7398050671083833e-05, |
|
"loss": 1.1742, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3445800430725054, |
|
"grad_norm": 0.42064017057418823, |
|
"learning_rate": 1.7388114097397312e-05, |
|
"loss": 1.1146, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.34529791816223976, |
|
"grad_norm": 0.5136387944221497, |
|
"learning_rate": 1.7378165289507296e-05, |
|
"loss": 1.321, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3460157932519742, |
|
"grad_norm": 0.5692410469055176, |
|
"learning_rate": 1.7368204298381086e-05, |
|
"loss": 1.0915, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.34673366834170855, |
|
"grad_norm": 0.42343536019325256, |
|
"learning_rate": 1.7358231175048402e-05, |
|
"loss": 1.1114, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3474515434314429, |
|
"grad_norm": 0.9200416803359985, |
|
"learning_rate": 1.734824597060112e-05, |
|
"loss": 1.0989, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.34816941852117733, |
|
"grad_norm": 1.0450941324234009, |
|
"learning_rate": 1.7338248736192998e-05, |
|
"loss": 1.143, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3488872936109117, |
|
"grad_norm": 0.3270246684551239, |
|
"learning_rate": 1.732823952303943e-05, |
|
"loss": 1.0929, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.34960516870064606, |
|
"grad_norm": 0.6713224053382874, |
|
"learning_rate": 1.7318218382417177e-05, |
|
"loss": 1.1557, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3503230437903805, |
|
"grad_norm": 0.559257984161377, |
|
"learning_rate": 1.73081853656641e-05, |
|
"loss": 1.2446, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.35104091888011485, |
|
"grad_norm": 0.2514427900314331, |
|
"learning_rate": 1.7298140524178905e-05, |
|
"loss": 1.1262, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.35175879396984927, |
|
"grad_norm": 0.4912594258785248, |
|
"learning_rate": 1.7288083909420866e-05, |
|
"loss": 1.2154, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.35247666905958364, |
|
"grad_norm": 0.2987563908100128, |
|
"learning_rate": 1.7278015572909586e-05, |
|
"loss": 1.028, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.353194544149318, |
|
"grad_norm": 0.6520453095436096, |
|
"learning_rate": 1.7267935566224707e-05, |
|
"loss": 1.1186, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3539124192390524, |
|
"grad_norm": 0.28420624136924744, |
|
"learning_rate": 1.7257843941005656e-05, |
|
"loss": 1.0904, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3546302943287868, |
|
"grad_norm": 0.27295732498168945, |
|
"learning_rate": 1.7247740748951398e-05, |
|
"loss": 1.0454, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.35534816941852115, |
|
"grad_norm": 0.32143691182136536, |
|
"learning_rate": 1.7237626041820124e-05, |
|
"loss": 1.0734, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3560660445082556, |
|
"grad_norm": 0.25867822766304016, |
|
"learning_rate": 1.722749987142905e-05, |
|
"loss": 1.0591, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.35678391959798994, |
|
"grad_norm": 0.2761097848415375, |
|
"learning_rate": 1.721736228965409e-05, |
|
"loss": 1.1629, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.35750179468772436, |
|
"grad_norm": 0.2398725152015686, |
|
"learning_rate": 1.720721334842963e-05, |
|
"loss": 1.1519, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3582196697774587, |
|
"grad_norm": 0.22945886850357056, |
|
"learning_rate": 1.719705309974826e-05, |
|
"loss": 1.0608, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3589375448671931, |
|
"grad_norm": 0.23391370475292206, |
|
"learning_rate": 1.7186881595660478e-05, |
|
"loss": 1.063, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3596554199569275, |
|
"grad_norm": 0.8408838510513306, |
|
"learning_rate": 1.7176698888274455e-05, |
|
"loss": 1.3951, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.3603732950466619, |
|
"grad_norm": 0.42259481549263, |
|
"learning_rate": 1.7166505029755752e-05, |
|
"loss": 1.1798, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.36109117013639624, |
|
"grad_norm": 0.3819274306297302, |
|
"learning_rate": 1.715630007232706e-05, |
|
"loss": 1.2017, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.36180904522613067, |
|
"grad_norm": 0.2353987842798233, |
|
"learning_rate": 1.714608406826793e-05, |
|
"loss": 1.0769, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.36252692031586503, |
|
"grad_norm": 0.23951439559459686, |
|
"learning_rate": 1.713585706991449e-05, |
|
"loss": 1.0743, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.36324479540559945, |
|
"grad_norm": 0.2518669664859772, |
|
"learning_rate": 1.7125619129659215e-05, |
|
"loss": 1.0942, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.3639626704953338, |
|
"grad_norm": 0.5533793568611145, |
|
"learning_rate": 1.7115370299950616e-05, |
|
"loss": 1.1229, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.3646805455850682, |
|
"grad_norm": 0.38030925393104553, |
|
"learning_rate": 1.7105110633293e-05, |
|
"loss": 1.1829, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3653984206748026, |
|
"grad_norm": 0.2151963710784912, |
|
"learning_rate": 1.7094840182246186e-05, |
|
"loss": 1.1415, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.36611629576453697, |
|
"grad_norm": 0.47541192173957825, |
|
"learning_rate": 1.7084558999425244e-05, |
|
"loss": 1.3636, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.36683417085427134, |
|
"grad_norm": 0.3854585886001587, |
|
"learning_rate": 1.7074267137500224e-05, |
|
"loss": 1.1861, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.36755204594400576, |
|
"grad_norm": 0.4030381143093109, |
|
"learning_rate": 1.7063964649195876e-05, |
|
"loss": 1.1549, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.3682699210337401, |
|
"grad_norm": 0.24959807097911835, |
|
"learning_rate": 1.7053651587291397e-05, |
|
"loss": 1.1284, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.36898779612347454, |
|
"grad_norm": 0.3913261294364929, |
|
"learning_rate": 1.7043328004620155e-05, |
|
"loss": 1.0848, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.3697056712132089, |
|
"grad_norm": 0.251136839389801, |
|
"learning_rate": 1.7032993954069403e-05, |
|
"loss": 1.1282, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.3704235463029433, |
|
"grad_norm": 0.2389630526304245, |
|
"learning_rate": 1.7022649488580028e-05, |
|
"loss": 1.0215, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.3711414213926777, |
|
"grad_norm": 0.24776770174503326, |
|
"learning_rate": 1.7012294661146278e-05, |
|
"loss": 1.0338, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.37185929648241206, |
|
"grad_norm": 0.4876779317855835, |
|
"learning_rate": 1.700192952481547e-05, |
|
"loss": 1.0991, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.3725771715721464, |
|
"grad_norm": 0.31186917424201965, |
|
"learning_rate": 1.699155413268775e-05, |
|
"loss": 1.093, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.37329504666188085, |
|
"grad_norm": 0.23691536486148834, |
|
"learning_rate": 1.698116853791579e-05, |
|
"loss": 1.0681, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3740129217516152, |
|
"grad_norm": 2.3807168006896973, |
|
"learning_rate": 1.6970772793704536e-05, |
|
"loss": 1.1821, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3747307968413496, |
|
"grad_norm": 0.45725804567337036, |
|
"learning_rate": 1.696036695331093e-05, |
|
"loss": 1.2203, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.375448671931084, |
|
"grad_norm": 0.27984505891799927, |
|
"learning_rate": 1.694995107004364e-05, |
|
"loss": 1.1389, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.37616654702081836, |
|
"grad_norm": 0.32176896929740906, |
|
"learning_rate": 1.6939525197262763e-05, |
|
"loss": 1.1442, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3768844221105528, |
|
"grad_norm": 0.2559623718261719, |
|
"learning_rate": 1.69290893883796e-05, |
|
"loss": 1.0509, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.37760229720028715, |
|
"grad_norm": 0.34299102425575256, |
|
"learning_rate": 1.6918643696856335e-05, |
|
"loss": 1.0765, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3783201722900215, |
|
"grad_norm": 0.36113616824150085, |
|
"learning_rate": 1.690818817620579e-05, |
|
"loss": 1.1227, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.37903804737975594, |
|
"grad_norm": 0.7385759353637695, |
|
"learning_rate": 1.689772287999113e-05, |
|
"loss": 1.2611, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.3797559224694903, |
|
"grad_norm": 0.4936607778072357, |
|
"learning_rate": 1.688724786182562e-05, |
|
"loss": 1.2145, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.38047379755922467, |
|
"grad_norm": 0.6105899214744568, |
|
"learning_rate": 1.6876763175372306e-05, |
|
"loss": 1.0898, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3811916726489591, |
|
"grad_norm": 0.33757439255714417, |
|
"learning_rate": 1.686626887434378e-05, |
|
"loss": 1.1311, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.38190954773869346, |
|
"grad_norm": 0.22930863499641418, |
|
"learning_rate": 1.6855765012501884e-05, |
|
"loss": 1.1404, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.3826274228284279, |
|
"grad_norm": 0.24472716450691223, |
|
"learning_rate": 1.6845251643657442e-05, |
|
"loss": 1.0967, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.38334529791816224, |
|
"grad_norm": 0.9064115285873413, |
|
"learning_rate": 1.683472882166998e-05, |
|
"loss": 1.2872, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3840631730078966, |
|
"grad_norm": 0.49683505296707153, |
|
"learning_rate": 1.6824196600447446e-05, |
|
"loss": 1.1155, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.38478104809763103, |
|
"grad_norm": 0.21948783099651337, |
|
"learning_rate": 1.6813655033945958e-05, |
|
"loss": 1.0843, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.3854989231873654, |
|
"grad_norm": 0.2846214175224304, |
|
"learning_rate": 1.6803104176169486e-05, |
|
"loss": 1.0879, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.38621679827709976, |
|
"grad_norm": 0.36232081055641174, |
|
"learning_rate": 1.6792544081169618e-05, |
|
"loss": 1.165, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.3869346733668342, |
|
"grad_norm": 0.3394874036312103, |
|
"learning_rate": 1.678197480304525e-05, |
|
"loss": 1.092, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.38765254845656855, |
|
"grad_norm": 0.38495680689811707, |
|
"learning_rate": 1.677139639594234e-05, |
|
"loss": 1.1141, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.38837042354630297, |
|
"grad_norm": 0.2894507050514221, |
|
"learning_rate": 1.6760808914053588e-05, |
|
"loss": 1.0822, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.38908829863603733, |
|
"grad_norm": 0.5585907101631165, |
|
"learning_rate": 1.675021241161821e-05, |
|
"loss": 1.1023, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3898061737257717, |
|
"grad_norm": 0.26919737458229065, |
|
"learning_rate": 1.673960694292161e-05, |
|
"loss": 1.1548, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.3905240488155061, |
|
"grad_norm": 0.2462243139743805, |
|
"learning_rate": 1.672899256229515e-05, |
|
"loss": 1.1222, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.3912419239052405, |
|
"grad_norm": 0.244044229388237, |
|
"learning_rate": 1.671836932411583e-05, |
|
"loss": 1.0474, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.39195979899497485, |
|
"grad_norm": 0.39821985363960266, |
|
"learning_rate": 1.6707737282806033e-05, |
|
"loss": 1.0614, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.39267767408470927, |
|
"grad_norm": 0.5356500148773193, |
|
"learning_rate": 1.6697096492833234e-05, |
|
"loss": 1.166, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.39339554917444364, |
|
"grad_norm": 0.28749582171440125, |
|
"learning_rate": 1.6686447008709737e-05, |
|
"loss": 1.125, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.39411342426417806, |
|
"grad_norm": 0.2848498225212097, |
|
"learning_rate": 1.667578888499238e-05, |
|
"loss": 1.1676, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.3948312993539124, |
|
"grad_norm": 0.31614670157432556, |
|
"learning_rate": 1.6665122176282265e-05, |
|
"loss": 1.1118, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3955491744436468, |
|
"grad_norm": 0.3202102482318878, |
|
"learning_rate": 1.6654446937224467e-05, |
|
"loss": 1.0995, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.3962670495333812, |
|
"grad_norm": 0.525974452495575, |
|
"learning_rate": 1.6643763222507766e-05, |
|
"loss": 1.2701, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.3969849246231156, |
|
"grad_norm": 2.5211613178253174, |
|
"learning_rate": 1.6633071086864366e-05, |
|
"loss": 1.5002, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.39770279971284994, |
|
"grad_norm": 0.6316677331924438, |
|
"learning_rate": 1.6622370585069604e-05, |
|
"loss": 1.2834, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.39842067480258436, |
|
"grad_norm": 0.31538158655166626, |
|
"learning_rate": 1.6611661771941686e-05, |
|
"loss": 1.1316, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.39913854989231873, |
|
"grad_norm": 0.35625991225242615, |
|
"learning_rate": 1.6600944702341386e-05, |
|
"loss": 1.0744, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.39985642498205315, |
|
"grad_norm": 0.815897524356842, |
|
"learning_rate": 1.6590219431171782e-05, |
|
"loss": 1.4233, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4005743000717875, |
|
"grad_norm": 0.24649186432361603, |
|
"learning_rate": 1.6579486013377965e-05, |
|
"loss": 1.0445, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4012921751615219, |
|
"grad_norm": 0.38415780663490295, |
|
"learning_rate": 1.656874450394676e-05, |
|
"loss": 1.0642, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4020100502512563, |
|
"grad_norm": 5.395670413970947, |
|
"learning_rate": 1.6557994957906456e-05, |
|
"loss": 1.419, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.40272792534099067, |
|
"grad_norm": 0.43887314200401306, |
|
"learning_rate": 1.6547237430326494e-05, |
|
"loss": 1.2503, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.40344580043072503, |
|
"grad_norm": 0.508529007434845, |
|
"learning_rate": 1.6536471976317227e-05, |
|
"loss": 1.1596, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.40416367552045945, |
|
"grad_norm": 0.9722962379455566, |
|
"learning_rate": 1.6525698651029585e-05, |
|
"loss": 1.1408, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4048815506101938, |
|
"grad_norm": 0.6140845417976379, |
|
"learning_rate": 1.651491750965486e-05, |
|
"loss": 1.1447, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.40559942569992824, |
|
"grad_norm": 0.3369700312614441, |
|
"learning_rate": 1.650412860742435e-05, |
|
"loss": 1.087, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4063173007896626, |
|
"grad_norm": 0.25257930159568787, |
|
"learning_rate": 1.6493331999609133e-05, |
|
"loss": 1.0692, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.40703517587939697, |
|
"grad_norm": 0.2892700433731079, |
|
"learning_rate": 1.6482527741519755e-05, |
|
"loss": 1.1293, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4077530509691314, |
|
"grad_norm": 0.4760481119155884, |
|
"learning_rate": 1.647171588850595e-05, |
|
"loss": 1.0839, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.40847092605886576, |
|
"grad_norm": 0.3079492449760437, |
|
"learning_rate": 1.6460896495956377e-05, |
|
"loss": 1.1194, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.4091888011486001, |
|
"grad_norm": 0.26826906204223633, |
|
"learning_rate": 1.6450069619298302e-05, |
|
"loss": 1.0403, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.40990667623833454, |
|
"grad_norm": 0.3646854758262634, |
|
"learning_rate": 1.6439235313997332e-05, |
|
"loss": 1.0847, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4106245513280689, |
|
"grad_norm": 0.265591561794281, |
|
"learning_rate": 1.6428393635557146e-05, |
|
"loss": 1.0887, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4113424264178033, |
|
"grad_norm": 0.3478126525878906, |
|
"learning_rate": 1.641754463951918e-05, |
|
"loss": 1.0468, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4120603015075377, |
|
"grad_norm": 0.7069317698478699, |
|
"learning_rate": 1.640668838146237e-05, |
|
"loss": 1.0561, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.41277817659727206, |
|
"grad_norm": 0.2756154239177704, |
|
"learning_rate": 1.639582491700284e-05, |
|
"loss": 1.1029, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4134960516870065, |
|
"grad_norm": 0.2211773544549942, |
|
"learning_rate": 1.638495430179365e-05, |
|
"loss": 1.0336, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.41421392677674085, |
|
"grad_norm": 0.5828403830528259, |
|
"learning_rate": 1.637407659152447e-05, |
|
"loss": 1.1292, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4149318018664752, |
|
"grad_norm": 0.2376655787229538, |
|
"learning_rate": 1.6363191841921346e-05, |
|
"loss": 1.0904, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.41564967695620963, |
|
"grad_norm": 0.24718521535396576, |
|
"learning_rate": 1.6352300108746365e-05, |
|
"loss": 1.0573, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.416367552045944, |
|
"grad_norm": 0.6126704216003418, |
|
"learning_rate": 1.6341401447797397e-05, |
|
"loss": 1.3006, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.41708542713567837, |
|
"grad_norm": 0.2663542926311493, |
|
"learning_rate": 1.6330495914907803e-05, |
|
"loss": 1.1387, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4178033022254128, |
|
"grad_norm": 0.24516524374485016, |
|
"learning_rate": 1.631958356594615e-05, |
|
"loss": 1.0717, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.41852117731514715, |
|
"grad_norm": 0.3285379707813263, |
|
"learning_rate": 1.6308664456815914e-05, |
|
"loss": 1.0615, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4192390524048816, |
|
"grad_norm": 0.2067587524652481, |
|
"learning_rate": 1.6297738643455225e-05, |
|
"loss": 1.1256, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.41995692749461594, |
|
"grad_norm": 0.25473785400390625, |
|
"learning_rate": 1.6286806181836535e-05, |
|
"loss": 1.0668, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4206748025843503, |
|
"grad_norm": 0.23833997547626495, |
|
"learning_rate": 1.6275867127966364e-05, |
|
"loss": 1.0937, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4213926776740847, |
|
"grad_norm": 0.2214917689561844, |
|
"learning_rate": 1.6264921537885005e-05, |
|
"loss": 1.0395, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.4221105527638191, |
|
"grad_norm": 0.2731074392795563, |
|
"learning_rate": 1.625396946766624e-05, |
|
"loss": 1.1332, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.42282842785355346, |
|
"grad_norm": 1.6967939138412476, |
|
"learning_rate": 1.6243010973417033e-05, |
|
"loss": 1.175, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.4235463029432879, |
|
"grad_norm": 0.6087107062339783, |
|
"learning_rate": 1.623204611127728e-05, |
|
"loss": 1.3019, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.42426417803302224, |
|
"grad_norm": 0.337079793214798, |
|
"learning_rate": 1.6221074937419476e-05, |
|
"loss": 1.1454, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.42498205312275666, |
|
"grad_norm": 0.5039341449737549, |
|
"learning_rate": 1.621009750804847e-05, |
|
"loss": 1.2322, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.42569992821249103, |
|
"grad_norm": 0.42420458793640137, |
|
"learning_rate": 1.6199113879401143e-05, |
|
"loss": 1.0776, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.4264178033022254, |
|
"grad_norm": 0.3929119110107422, |
|
"learning_rate": 1.618812410774615e-05, |
|
"loss": 1.1037, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4271356783919598, |
|
"grad_norm": 0.24679097533226013, |
|
"learning_rate": 1.61771282493836e-05, |
|
"loss": 1.0428, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4278535534816942, |
|
"grad_norm": 0.1979144811630249, |
|
"learning_rate": 1.6166126360644798e-05, |
|
"loss": 1.0692, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.871056854724884, |
|
"learning_rate": 1.6155118497891936e-05, |
|
"loss": 1.378, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.42928930366116297, |
|
"grad_norm": 0.24336977303028107, |
|
"learning_rate": 1.6144104717517802e-05, |
|
"loss": 1.0514, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.43000717875089733, |
|
"grad_norm": 0.27994900941848755, |
|
"learning_rate": 1.6133085075945518e-05, |
|
"loss": 1.0876, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.43072505384063176, |
|
"grad_norm": 0.447373628616333, |
|
"learning_rate": 1.6122059629628223e-05, |
|
"loss": 1.2451, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4314429289303661, |
|
"grad_norm": 0.2424324005842209, |
|
"learning_rate": 1.611102843504879e-05, |
|
"loss": 1.125, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.4321608040201005, |
|
"grad_norm": 0.29035624861717224, |
|
"learning_rate": 1.609999154871954e-05, |
|
"loss": 1.0825, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4328786791098349, |
|
"grad_norm": 0.2490774393081665, |
|
"learning_rate": 1.608894902718196e-05, |
|
"loss": 1.0882, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4335965541995693, |
|
"grad_norm": 0.2781699001789093, |
|
"learning_rate": 1.607790092700641e-05, |
|
"loss": 1.0924, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.43431442928930364, |
|
"grad_norm": 0.5178611278533936, |
|
"learning_rate": 1.6066847304791808e-05, |
|
"loss": 1.148, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.43503230437903806, |
|
"grad_norm": 0.30101197957992554, |
|
"learning_rate": 1.6055788217165384e-05, |
|
"loss": 1.0633, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.4357501794687724, |
|
"grad_norm": 0.22836507856845856, |
|
"learning_rate": 1.6044723720782353e-05, |
|
"loss": 1.1176, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.43646805455850685, |
|
"grad_norm": 0.31232619285583496, |
|
"learning_rate": 1.6033653872325646e-05, |
|
"loss": 1.1251, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.4371859296482412, |
|
"grad_norm": 0.7165313363075256, |
|
"learning_rate": 1.6022578728505605e-05, |
|
"loss": 1.1678, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4379038047379756, |
|
"grad_norm": 0.40292295813560486, |
|
"learning_rate": 1.6011498346059714e-05, |
|
"loss": 1.1498, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.43862167982771, |
|
"grad_norm": 0.4741491973400116, |
|
"learning_rate": 1.6000412781752274e-05, |
|
"loss": 1.1067, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.43933955491744436, |
|
"grad_norm": 0.2265220284461975, |
|
"learning_rate": 1.598932209237415e-05, |
|
"loss": 1.0956, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.44005743000717873, |
|
"grad_norm": 0.24188081920146942, |
|
"learning_rate": 1.5978226334742454e-05, |
|
"loss": 1.0616, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.44077530509691315, |
|
"grad_norm": 0.26520058512687683, |
|
"learning_rate": 1.5967125565700268e-05, |
|
"loss": 1.0825, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.4414931801866475, |
|
"grad_norm": 0.2989540100097656, |
|
"learning_rate": 1.595601984211634e-05, |
|
"loss": 1.0677, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.44221105527638194, |
|
"grad_norm": 0.24873438477516174, |
|
"learning_rate": 1.5944909220884802e-05, |
|
"loss": 1.1293, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4429289303661163, |
|
"grad_norm": 0.266328364610672, |
|
"learning_rate": 1.593379375892488e-05, |
|
"loss": 1.0891, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.44364680545585067, |
|
"grad_norm": 0.519723117351532, |
|
"learning_rate": 1.59226735131806e-05, |
|
"loss": 1.1946, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4443646805455851, |
|
"grad_norm": 0.27866214513778687, |
|
"learning_rate": 1.5911548540620482e-05, |
|
"loss": 1.0822, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.44508255563531945, |
|
"grad_norm": 0.22386035323143005, |
|
"learning_rate": 1.5900418898237282e-05, |
|
"loss": 1.0814, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4458004307250538, |
|
"grad_norm": 0.2788839638233185, |
|
"learning_rate": 1.5889284643047664e-05, |
|
"loss": 1.0965, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.44651830581478824, |
|
"grad_norm": 0.41083183884620667, |
|
"learning_rate": 1.587814583209193e-05, |
|
"loss": 1.0948, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4472361809045226, |
|
"grad_norm": 1.0803130865097046, |
|
"learning_rate": 1.5867002522433714e-05, |
|
"loss": 1.3797, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.44795405599425697, |
|
"grad_norm": 0.38585397601127625, |
|
"learning_rate": 1.5855854771159706e-05, |
|
"loss": 1.1765, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4486719310839914, |
|
"grad_norm": 0.25229373574256897, |
|
"learning_rate": 1.5844702635379342e-05, |
|
"loss": 1.122, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.44938980617372576, |
|
"grad_norm": 0.40471434593200684, |
|
"learning_rate": 1.583354617222453e-05, |
|
"loss": 1.1815, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.4501076812634602, |
|
"grad_norm": 0.31672340631484985, |
|
"learning_rate": 1.5822385438849327e-05, |
|
"loss": 1.1198, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.45082555635319455, |
|
"grad_norm": 0.2720855176448822, |
|
"learning_rate": 1.5811220492429692e-05, |
|
"loss": 1.1041, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.4515434314429289, |
|
"grad_norm": 0.5343842506408691, |
|
"learning_rate": 1.580005139016315e-05, |
|
"loss": 1.1088, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.45226130653266333, |
|
"grad_norm": 0.784042239189148, |
|
"learning_rate": 1.5788878189268516e-05, |
|
"loss": 1.3126, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4529791816223977, |
|
"grad_norm": 0.26885804533958435, |
|
"learning_rate": 1.5777700946985616e-05, |
|
"loss": 1.1121, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.45369705671213206, |
|
"grad_norm": 0.2665066719055176, |
|
"learning_rate": 1.5766519720574964e-05, |
|
"loss": 1.0813, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.4544149318018665, |
|
"grad_norm": 0.3135133683681488, |
|
"learning_rate": 1.5755334567317492e-05, |
|
"loss": 1.0603, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.45513280689160085, |
|
"grad_norm": 0.38875702023506165, |
|
"learning_rate": 1.574414554451425e-05, |
|
"loss": 1.1628, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.45585068198133527, |
|
"grad_norm": 0.22252540290355682, |
|
"learning_rate": 1.5732952709486108e-05, |
|
"loss": 1.0928, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.45656855707106964, |
|
"grad_norm": 0.36836034059524536, |
|
"learning_rate": 1.572175611957347e-05, |
|
"loss": 1.0618, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.457286432160804, |
|
"grad_norm": 0.3460846543312073, |
|
"learning_rate": 1.5710555832135974e-05, |
|
"loss": 1.0675, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.4580043072505384, |
|
"grad_norm": 0.23035378754138947, |
|
"learning_rate": 1.5699351904552197e-05, |
|
"loss": 1.1211, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.4587221823402728, |
|
"grad_norm": 0.22755472362041473, |
|
"learning_rate": 1.568814439421937e-05, |
|
"loss": 1.0974, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.45944005743000715, |
|
"grad_norm": 0.7951919436454773, |
|
"learning_rate": 1.567693335855307e-05, |
|
"loss": 1.2301, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4601579325197416, |
|
"grad_norm": 0.38795220851898193, |
|
"learning_rate": 1.5665718854986946e-05, |
|
"loss": 1.2064, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.46087580760947594, |
|
"grad_norm": 0.4807037115097046, |
|
"learning_rate": 1.5654500940972405e-05, |
|
"loss": 1.2368, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.46159368269921036, |
|
"grad_norm": 0.40683814883232117, |
|
"learning_rate": 1.5643279673978328e-05, |
|
"loss": 1.0689, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.4623115577889447, |
|
"grad_norm": 0.25337550044059753, |
|
"learning_rate": 1.563205511149077e-05, |
|
"loss": 1.0846, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.4630294328786791, |
|
"grad_norm": 0.24819082021713257, |
|
"learning_rate": 1.562082731101267e-05, |
|
"loss": 1.1202, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4637473079684135, |
|
"grad_norm": 0.5900534987449646, |
|
"learning_rate": 1.560959633006356e-05, |
|
"loss": 1.1474, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.4644651830581479, |
|
"grad_norm": 0.2737368643283844, |
|
"learning_rate": 1.5598362226179256e-05, |
|
"loss": 1.1296, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.46518305814788224, |
|
"grad_norm": 0.3514651358127594, |
|
"learning_rate": 1.558712505691159e-05, |
|
"loss": 1.0973, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.46590093323761667, |
|
"grad_norm": 0.2335313856601715, |
|
"learning_rate": 1.5575884879828068e-05, |
|
"loss": 1.0952, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.46661880832735103, |
|
"grad_norm": 0.2619829773902893, |
|
"learning_rate": 1.5564641752511638e-05, |
|
"loss": 1.1108, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46733668341708545, |
|
"grad_norm": 0.5428041219711304, |
|
"learning_rate": 1.555339573256034e-05, |
|
"loss": 1.2138, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.4680545585068198, |
|
"grad_norm": 0.35615596175193787, |
|
"learning_rate": 1.5542146877587042e-05, |
|
"loss": 1.1146, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.4687724335965542, |
|
"grad_norm": 0.31751585006713867, |
|
"learning_rate": 1.5530895245219132e-05, |
|
"loss": 1.1038, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.4694903086862886, |
|
"grad_norm": 0.46363669633865356, |
|
"learning_rate": 1.5519640893098227e-05, |
|
"loss": 1.0462, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.47020818377602297, |
|
"grad_norm": 0.23141205310821533, |
|
"learning_rate": 1.550838387887988e-05, |
|
"loss": 1.0729, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.47092605886575734, |
|
"grad_norm": 0.2929474711418152, |
|
"learning_rate": 1.549712426023328e-05, |
|
"loss": 1.0883, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.47164393395549176, |
|
"grad_norm": 0.2350241243839264, |
|
"learning_rate": 1.5485862094840954e-05, |
|
"loss": 1.1719, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.4723618090452261, |
|
"grad_norm": 0.41748782992362976, |
|
"learning_rate": 1.5474597440398485e-05, |
|
"loss": 1.1433, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.47307968413496054, |
|
"grad_norm": 0.3260294795036316, |
|
"learning_rate": 1.5463330354614203e-05, |
|
"loss": 1.0893, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.4737975592246949, |
|
"grad_norm": 0.3263336420059204, |
|
"learning_rate": 1.5452060895208886e-05, |
|
"loss": 1.1113, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4745154343144293, |
|
"grad_norm": 0.24381938576698303, |
|
"learning_rate": 1.5440789119915484e-05, |
|
"loss": 1.1028, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.4752333094041637, |
|
"grad_norm": 1.0602526664733887, |
|
"learning_rate": 1.5429515086478804e-05, |
|
"loss": 1.1219, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.47595118449389806, |
|
"grad_norm": 0.31142809987068176, |
|
"learning_rate": 1.5418238852655228e-05, |
|
"loss": 1.0123, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.4766690595836324, |
|
"grad_norm": 0.24484698474407196, |
|
"learning_rate": 1.5406960476212403e-05, |
|
"loss": 1.092, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.47738693467336685, |
|
"grad_norm": 0.8904862403869629, |
|
"learning_rate": 1.5395680014928957e-05, |
|
"loss": 1.4171, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4781048097631012, |
|
"grad_norm": 0.4050968289375305, |
|
"learning_rate": 1.538439752659419e-05, |
|
"loss": 1.1503, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.47882268485283563, |
|
"grad_norm": 0.3236379027366638, |
|
"learning_rate": 1.5373113069007804e-05, |
|
"loss": 1.1444, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.47954055994257, |
|
"grad_norm": 0.21954692900180817, |
|
"learning_rate": 1.536182669997957e-05, |
|
"loss": 1.0946, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.48025843503230436, |
|
"grad_norm": 0.8067034482955933, |
|
"learning_rate": 1.5350538477329065e-05, |
|
"loss": 1.2717, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.4809763101220388, |
|
"grad_norm": 0.2779107689857483, |
|
"learning_rate": 1.533924845888536e-05, |
|
"loss": 1.1264, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.48169418521177315, |
|
"grad_norm": 0.3051629960536957, |
|
"learning_rate": 1.5327956702486716e-05, |
|
"loss": 1.1387, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.4824120603015075, |
|
"grad_norm": 0.4867023825645447, |
|
"learning_rate": 1.531666326598031e-05, |
|
"loss": 1.1812, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.48312993539124194, |
|
"grad_norm": 0.2711983025074005, |
|
"learning_rate": 1.5305368207221918e-05, |
|
"loss": 1.0567, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.4838478104809763, |
|
"grad_norm": 2.6231706142425537, |
|
"learning_rate": 1.5294071584075628e-05, |
|
"loss": 1.1421, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.48456568557071067, |
|
"grad_norm": 0.21511957049369812, |
|
"learning_rate": 1.5282773454413547e-05, |
|
"loss": 1.0781, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4852835606604451, |
|
"grad_norm": 0.3087017834186554, |
|
"learning_rate": 1.5271473876115495e-05, |
|
"loss": 1.0447, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.48600143575017946, |
|
"grad_norm": 0.4581223726272583, |
|
"learning_rate": 1.526017290706871e-05, |
|
"loss": 1.2175, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.4867193108399139, |
|
"grad_norm": 0.2953665554523468, |
|
"learning_rate": 1.5248870605167572e-05, |
|
"loss": 1.0522, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.48743718592964824, |
|
"grad_norm": 0.26089370250701904, |
|
"learning_rate": 1.5237567028313263e-05, |
|
"loss": 1.0479, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.4881550610193826, |
|
"grad_norm": 0.5252697467803955, |
|
"learning_rate": 1.5226262234413517e-05, |
|
"loss": 1.2823, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.48887293610911703, |
|
"grad_norm": 0.38791197538375854, |
|
"learning_rate": 1.5214956281382292e-05, |
|
"loss": 1.1448, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.4895908111988514, |
|
"grad_norm": 0.2559528648853302, |
|
"learning_rate": 1.5203649227139491e-05, |
|
"loss": 1.0822, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.49030868628858576, |
|
"grad_norm": 0.42794185876846313, |
|
"learning_rate": 1.519234112961066e-05, |
|
"loss": 1.0962, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.4910265613783202, |
|
"grad_norm": 1.1970304250717163, |
|
"learning_rate": 1.5181032046726674e-05, |
|
"loss": 1.2315, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.49174443646805455, |
|
"grad_norm": 0.43080934882164, |
|
"learning_rate": 1.516972203642348e-05, |
|
"loss": 1.1181, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.49246231155778897, |
|
"grad_norm": 0.2969713509082794, |
|
"learning_rate": 1.5158411156641753e-05, |
|
"loss": 1.1076, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.49318018664752333, |
|
"grad_norm": 0.5752540826797485, |
|
"learning_rate": 1.5147099465326638e-05, |
|
"loss": 1.2848, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.4938980617372577, |
|
"grad_norm": 0.3028205633163452, |
|
"learning_rate": 1.5135787020427432e-05, |
|
"loss": 1.1198, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.4946159368269921, |
|
"grad_norm": 0.22824488580226898, |
|
"learning_rate": 1.5124473879897292e-05, |
|
"loss": 1.0493, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.4953338119167265, |
|
"grad_norm": 0.2328827828168869, |
|
"learning_rate": 1.5113160101692938e-05, |
|
"loss": 1.1029, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.49605168700646085, |
|
"grad_norm": 0.4405105412006378, |
|
"learning_rate": 1.5101845743774362e-05, |
|
"loss": 1.1827, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.49676956209619527, |
|
"grad_norm": 0.24878334999084473, |
|
"learning_rate": 1.5090530864104518e-05, |
|
"loss": 1.0473, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.49748743718592964, |
|
"grad_norm": 0.25342586636543274, |
|
"learning_rate": 1.5079215520649037e-05, |
|
"loss": 1.084, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.49820531227566406, |
|
"grad_norm": 0.29962748289108276, |
|
"learning_rate": 1.5067899771375931e-05, |
|
"loss": 1.1045, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.4989231873653984, |
|
"grad_norm": 0.37971824407577515, |
|
"learning_rate": 1.5056583674255281e-05, |
|
"loss": 1.1018, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.4996410624551328, |
|
"grad_norm": 0.38523370027542114, |
|
"learning_rate": 1.5045267287258955e-05, |
|
"loss": 1.1416, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5003589375448672, |
|
"grad_norm": 0.2509319484233856, |
|
"learning_rate": 1.5033950668360307e-05, |
|
"loss": 1.1139, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5010768126346016, |
|
"grad_norm": 0.2796666622161865, |
|
"learning_rate": 1.5022633875533879e-05, |
|
"loss": 1.0808, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.501794687724336, |
|
"grad_norm": 0.2840481102466583, |
|
"learning_rate": 1.5011316966755103e-05, |
|
"loss": 1.1403, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5025125628140703, |
|
"grad_norm": 0.24237246811389923, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.058, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5032304379038047, |
|
"grad_norm": 0.2952876687049866, |
|
"learning_rate": 1.4988683033244903e-05, |
|
"loss": 1.0582, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5039483129935391, |
|
"grad_norm": 0.2998954951763153, |
|
"learning_rate": 1.4977366124466123e-05, |
|
"loss": 1.0834, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5046661880832735, |
|
"grad_norm": 0.6092143654823303, |
|
"learning_rate": 1.4966049331639695e-05, |
|
"loss": 1.1065, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5053840631730079, |
|
"grad_norm": 0.2629152536392212, |
|
"learning_rate": 1.4954732712741047e-05, |
|
"loss": 1.1233, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5061019382627423, |
|
"grad_norm": 0.2268097698688507, |
|
"learning_rate": 1.4943416325744725e-05, |
|
"loss": 1.0665, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5068198133524767, |
|
"grad_norm": 0.39073359966278076, |
|
"learning_rate": 1.4932100228624073e-05, |
|
"loss": 1.2042, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.507537688442211, |
|
"grad_norm": 20.153024673461914, |
|
"learning_rate": 1.4920784479350962e-05, |
|
"loss": 1.0943, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5082555635319455, |
|
"grad_norm": 0.48045504093170166, |
|
"learning_rate": 1.4909469135895486e-05, |
|
"loss": 1.137, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5089734386216799, |
|
"grad_norm": 0.22653381526470184, |
|
"learning_rate": 1.4898154256225644e-05, |
|
"loss": 1.0343, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5096913137114142, |
|
"grad_norm": 0.208787739276886, |
|
"learning_rate": 1.4886839898307065e-05, |
|
"loss": 1.0188, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5104091888011486, |
|
"grad_norm": 0.2916351556777954, |
|
"learning_rate": 1.487552612010271e-05, |
|
"loss": 1.1134, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.511127063890883, |
|
"grad_norm": 0.29213395714759827, |
|
"learning_rate": 1.486421297957257e-05, |
|
"loss": 1.0753, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5118449389806173, |
|
"grad_norm": 0.30358755588531494, |
|
"learning_rate": 1.4852900534673364e-05, |
|
"loss": 1.1345, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5125628140703518, |
|
"grad_norm": 0.38216859102249146, |
|
"learning_rate": 1.484158884335825e-05, |
|
"loss": 1.1188, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5132806891600862, |
|
"grad_norm": 0.9039308428764343, |
|
"learning_rate": 1.4830277963576525e-05, |
|
"loss": 1.1187, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5139985642498205, |
|
"grad_norm": 0.2617970108985901, |
|
"learning_rate": 1.4818967953273328e-05, |
|
"loss": 1.0439, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5147164393395549, |
|
"grad_norm": 0.22756274044513702, |
|
"learning_rate": 1.4807658870389346e-05, |
|
"loss": 1.0172, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5154343144292893, |
|
"grad_norm": 0.2641029357910156, |
|
"learning_rate": 1.4796350772860511e-05, |
|
"loss": 1.0294, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5161521895190236, |
|
"grad_norm": 0.26552608609199524, |
|
"learning_rate": 1.4785043718617713e-05, |
|
"loss": 1.092, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5168700646087581, |
|
"grad_norm": 0.5893883109092712, |
|
"learning_rate": 1.4773737765586486e-05, |
|
"loss": 1.2536, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5175879396984925, |
|
"grad_norm": 0.6891839504241943, |
|
"learning_rate": 1.4762432971686743e-05, |
|
"loss": 1.237, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5183058147882269, |
|
"grad_norm": 0.21907195448875427, |
|
"learning_rate": 1.4751129394832432e-05, |
|
"loss": 1.1225, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5190236898779612, |
|
"grad_norm": 0.37343645095825195, |
|
"learning_rate": 1.4739827092931291e-05, |
|
"loss": 1.112, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5197415649676956, |
|
"grad_norm": 0.9030300974845886, |
|
"learning_rate": 1.472852612388451e-05, |
|
"loss": 1.3152, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5204594400574301, |
|
"grad_norm": 0.2366669923067093, |
|
"learning_rate": 1.4717226545586454e-05, |
|
"loss": 1.0398, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5211773151471644, |
|
"grad_norm": 0.6108279824256897, |
|
"learning_rate": 1.4705928415924372e-05, |
|
"loss": 1.323, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5218951902368988, |
|
"grad_norm": 0.29814794659614563, |
|
"learning_rate": 1.4694631792778084e-05, |
|
"loss": 1.071, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5226130653266332, |
|
"grad_norm": 0.2940959334373474, |
|
"learning_rate": 1.4683336734019693e-05, |
|
"loss": 1.0795, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5233309404163675, |
|
"grad_norm": 0.5142949819564819, |
|
"learning_rate": 1.4672043297513288e-05, |
|
"loss": 1.4383, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5240488155061019, |
|
"grad_norm": 0.2306988537311554, |
|
"learning_rate": 1.4660751541114641e-05, |
|
"loss": 0.9869, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5247666905958364, |
|
"grad_norm": 0.24959023296833038, |
|
"learning_rate": 1.4649461522670936e-05, |
|
"loss": 1.0652, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5254845656855707, |
|
"grad_norm": 0.23822470009326935, |
|
"learning_rate": 1.4638173300020433e-05, |
|
"loss": 1.0703, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5262024407753051, |
|
"grad_norm": 0.4634005129337311, |
|
"learning_rate": 1.4626886930992199e-05, |
|
"loss": 1.0548, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5269203158650395, |
|
"grad_norm": 0.23906712234020233, |
|
"learning_rate": 1.4615602473405813e-05, |
|
"loss": 1.0999, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5276381909547738, |
|
"grad_norm": 0.23547573387622833, |
|
"learning_rate": 1.4604319985071047e-05, |
|
"loss": 1.132, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5283560660445082, |
|
"grad_norm": 0.2725988030433655, |
|
"learning_rate": 1.45930395237876e-05, |
|
"loss": 1.0686, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5290739411342427, |
|
"grad_norm": 0.2721105217933655, |
|
"learning_rate": 1.4581761147344776e-05, |
|
"loss": 1.048, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.529791816223977, |
|
"grad_norm": 0.22710049152374268, |
|
"learning_rate": 1.4570484913521197e-05, |
|
"loss": 1.1312, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5305096913137114, |
|
"grad_norm": 0.7076483964920044, |
|
"learning_rate": 1.455921088008452e-05, |
|
"loss": 1.247, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5312275664034458, |
|
"grad_norm": 0.23128236830234528, |
|
"learning_rate": 1.454793910479112e-05, |
|
"loss": 1.0295, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5319454414931802, |
|
"grad_norm": 0.3500516414642334, |
|
"learning_rate": 1.4536669645385803e-05, |
|
"loss": 1.0223, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5326633165829145, |
|
"grad_norm": 0.21426959335803986, |
|
"learning_rate": 1.4525402559601517e-05, |
|
"loss": 1.1087, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.533381191672649, |
|
"grad_norm": 0.3150583505630493, |
|
"learning_rate": 1.4514137905159048e-05, |
|
"loss": 1.0764, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5340990667623834, |
|
"grad_norm": 0.6355829834938049, |
|
"learning_rate": 1.4502875739766724e-05, |
|
"loss": 1.1481, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5348169418521177, |
|
"grad_norm": 0.3957350254058838, |
|
"learning_rate": 1.4491616121120125e-05, |
|
"loss": 1.1127, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5355348169418521, |
|
"grad_norm": 0.22465340793132782, |
|
"learning_rate": 1.4480359106901776e-05, |
|
"loss": 1.1038, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5362526920315865, |
|
"grad_norm": 0.30386146903038025, |
|
"learning_rate": 1.4469104754780872e-05, |
|
"loss": 1.0665, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5369705671213209, |
|
"grad_norm": 0.2654268145561218, |
|
"learning_rate": 1.4457853122412962e-05, |
|
"loss": 1.1224, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5376884422110553, |
|
"grad_norm": 0.40003833174705505, |
|
"learning_rate": 1.4446604267439663e-05, |
|
"loss": 1.1331, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5384063173007897, |
|
"grad_norm": 0.8957158923149109, |
|
"learning_rate": 1.4435358247488368e-05, |
|
"loss": 1.3964, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.539124192390524, |
|
"grad_norm": 0.29989439249038696, |
|
"learning_rate": 1.4424115120171933e-05, |
|
"loss": 1.0824, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5398420674802584, |
|
"grad_norm": 0.2515528202056885, |
|
"learning_rate": 1.4412874943088416e-05, |
|
"loss": 1.0601, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5405599425699928, |
|
"grad_norm": 0.7854285836219788, |
|
"learning_rate": 1.4401637773820744e-05, |
|
"loss": 1.4129, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5412778176597272, |
|
"grad_norm": 0.43629977107048035, |
|
"learning_rate": 1.4390403669936444e-05, |
|
"loss": 1.0334, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5419956927494616, |
|
"grad_norm": 0.3499593734741211, |
|
"learning_rate": 1.4379172688987332e-05, |
|
"loss": 1.133, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.542713567839196, |
|
"grad_norm": 0.22085939347743988, |
|
"learning_rate": 1.4367944888509234e-05, |
|
"loss": 1.043, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5434314429289304, |
|
"grad_norm": 0.2278064340353012, |
|
"learning_rate": 1.4356720326021676e-05, |
|
"loss": 1.1319, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5441493180186647, |
|
"grad_norm": 0.6754797101020813, |
|
"learning_rate": 1.4345499059027597e-05, |
|
"loss": 1.1694, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5448671931083992, |
|
"grad_norm": 0.2929733991622925, |
|
"learning_rate": 1.4334281145013056e-05, |
|
"loss": 1.0546, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5455850681981336, |
|
"grad_norm": 0.2639637887477875, |
|
"learning_rate": 1.4323066641446932e-05, |
|
"loss": 1.0479, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5463029432878679, |
|
"grad_norm": 0.4230010509490967, |
|
"learning_rate": 1.4311855605780633e-05, |
|
"loss": 1.1826, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5470208183776023, |
|
"grad_norm": 0.4472368359565735, |
|
"learning_rate": 1.4300648095447807e-05, |
|
"loss": 1.183, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5477386934673367, |
|
"grad_norm": 0.5689032673835754, |
|
"learning_rate": 1.4289444167864028e-05, |
|
"loss": 1.2423, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.548456568557071, |
|
"grad_norm": 0.23773930966854095, |
|
"learning_rate": 1.427824388042653e-05, |
|
"loss": 1.0871, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5491744436468055, |
|
"grad_norm": 0.29465198516845703, |
|
"learning_rate": 1.4267047290513894e-05, |
|
"loss": 1.0822, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5498923187365399, |
|
"grad_norm": 0.5034121870994568, |
|
"learning_rate": 1.4255854455485753e-05, |
|
"loss": 1.1953, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5506101938262742, |
|
"grad_norm": 0.317012220621109, |
|
"learning_rate": 1.4244665432682509e-05, |
|
"loss": 1.0673, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5513280689160086, |
|
"grad_norm": 0.2964475452899933, |
|
"learning_rate": 1.423348027942504e-05, |
|
"loss": 1.0656, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.552045944005743, |
|
"grad_norm": 0.3850986063480377, |
|
"learning_rate": 1.4222299053014388e-05, |
|
"loss": 1.2415, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.5527638190954773, |
|
"grad_norm": 0.25714462995529175, |
|
"learning_rate": 1.4211121810731484e-05, |
|
"loss": 1.1024, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5534816941852118, |
|
"grad_norm": 0.3553484082221985, |
|
"learning_rate": 1.4199948609836855e-05, |
|
"loss": 1.1913, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5541995692749462, |
|
"grad_norm": 0.2542771100997925, |
|
"learning_rate": 1.4188779507570312e-05, |
|
"loss": 1.1042, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5549174443646806, |
|
"grad_norm": 0.3948219120502472, |
|
"learning_rate": 1.4177614561150674e-05, |
|
"loss": 1.0667, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5556353194544149, |
|
"grad_norm": 0.5587366223335266, |
|
"learning_rate": 1.4166453827775474e-05, |
|
"loss": 1.1596, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5563531945441493, |
|
"grad_norm": 0.8336030840873718, |
|
"learning_rate": 1.415529736462066e-05, |
|
"loss": 1.0753, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5570710696338838, |
|
"grad_norm": 0.28086596727371216, |
|
"learning_rate": 1.4144145228840298e-05, |
|
"loss": 1.0443, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5577889447236181, |
|
"grad_norm": 0.4337119162082672, |
|
"learning_rate": 1.4132997477566287e-05, |
|
"loss": 1.0554, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5585068198133525, |
|
"grad_norm": 0.8185058832168579, |
|
"learning_rate": 1.4121854167908073e-05, |
|
"loss": 1.5668, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5592246949030869, |
|
"grad_norm": 0.4682464301586151, |
|
"learning_rate": 1.4110715356952338e-05, |
|
"loss": 1.1536, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5599425699928212, |
|
"grad_norm": 0.33439281582832336, |
|
"learning_rate": 1.409958110176272e-05, |
|
"loss": 1.114, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5606604450825556, |
|
"grad_norm": 1.218021035194397, |
|
"learning_rate": 1.4088451459379522e-05, |
|
"loss": 1.4435, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5613783201722901, |
|
"grad_norm": 0.2248886376619339, |
|
"learning_rate": 1.4077326486819404e-05, |
|
"loss": 1.0371, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5620961952620244, |
|
"grad_norm": 0.2860560417175293, |
|
"learning_rate": 1.4066206241075124e-05, |
|
"loss": 1.0846, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5628140703517588, |
|
"grad_norm": 0.26693689823150635, |
|
"learning_rate": 1.4055090779115204e-05, |
|
"loss": 1.0495, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5635319454414932, |
|
"grad_norm": 0.45003026723861694, |
|
"learning_rate": 1.4043980157883665e-05, |
|
"loss": 1.2092, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5642498205312275, |
|
"grad_norm": 0.295254647731781, |
|
"learning_rate": 1.4032874434299736e-05, |
|
"loss": 1.1434, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.5649676956209619, |
|
"grad_norm": 0.2593385875225067, |
|
"learning_rate": 1.4021773665257547e-05, |
|
"loss": 1.0443, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5656855707106964, |
|
"grad_norm": 0.2153460830450058, |
|
"learning_rate": 1.4010677907625852e-05, |
|
"loss": 0.9911, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5664034458004307, |
|
"grad_norm": 0.2601267397403717, |
|
"learning_rate": 1.399958721824773e-05, |
|
"loss": 1.0849, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5671213208901651, |
|
"grad_norm": 0.37321093678474426, |
|
"learning_rate": 1.3988501653940292e-05, |
|
"loss": 1.1383, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5678391959798995, |
|
"grad_norm": 0.3128059506416321, |
|
"learning_rate": 1.3977421271494395e-05, |
|
"loss": 1.0725, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5685570710696339, |
|
"grad_norm": 0.2780809700489044, |
|
"learning_rate": 1.3966346127674357e-05, |
|
"loss": 1.0723, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5692749461593682, |
|
"grad_norm": 0.27749887108802795, |
|
"learning_rate": 1.395527627921765e-05, |
|
"loss": 1.1419, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5699928212491027, |
|
"grad_norm": 0.8342905044555664, |
|
"learning_rate": 1.3944211782834617e-05, |
|
"loss": 1.0857, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5707106963388371, |
|
"grad_norm": 0.3646883964538574, |
|
"learning_rate": 1.3933152695208195e-05, |
|
"loss": 1.0302, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.5998344421386719, |
|
"learning_rate": 1.3922099072993595e-05, |
|
"loss": 1.2071, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5721464465183058, |
|
"grad_norm": 0.44291579723358154, |
|
"learning_rate": 1.391105097281804e-05, |
|
"loss": 1.0039, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5728643216080402, |
|
"grad_norm": 0.4438372254371643, |
|
"learning_rate": 1.3900008451280463e-05, |
|
"loss": 1.0657, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5735821966977745, |
|
"grad_norm": 0.2658810615539551, |
|
"learning_rate": 1.3888971564951214e-05, |
|
"loss": 1.128, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.574300071787509, |
|
"grad_norm": 0.24561521410942078, |
|
"learning_rate": 1.387794037037178e-05, |
|
"loss": 1.0229, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5750179468772434, |
|
"grad_norm": 0.24019256234169006, |
|
"learning_rate": 1.3866914924054484e-05, |
|
"loss": 1.0851, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5757358219669777, |
|
"grad_norm": 0.2257530838251114, |
|
"learning_rate": 1.3855895282482202e-05, |
|
"loss": 1.0273, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5764536970567121, |
|
"grad_norm": 0.36407381296157837, |
|
"learning_rate": 1.3844881502108068e-05, |
|
"loss": 1.0881, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.5771715721464465, |
|
"grad_norm": 0.23725035786628723, |
|
"learning_rate": 1.3833873639355205e-05, |
|
"loss": 1.0479, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5778894472361809, |
|
"grad_norm": 0.28036853671073914, |
|
"learning_rate": 1.3822871750616402e-05, |
|
"loss": 1.0658, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5786073223259153, |
|
"grad_norm": 0.2925416827201843, |
|
"learning_rate": 1.3811875892253855e-05, |
|
"loss": 1.0781, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5793251974156497, |
|
"grad_norm": 0.4388463795185089, |
|
"learning_rate": 1.3800886120598859e-05, |
|
"loss": 1.1809, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.5800430725053841, |
|
"grad_norm": 0.23365306854248047, |
|
"learning_rate": 1.3789902491951535e-05, |
|
"loss": 1.1173, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.5807609475951184, |
|
"grad_norm": 0.3360154330730438, |
|
"learning_rate": 1.3778925062580528e-05, |
|
"loss": 1.0896, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.5814788226848528, |
|
"grad_norm": 0.2643462121486664, |
|
"learning_rate": 1.3767953888722726e-05, |
|
"loss": 1.0878, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5821966977745873, |
|
"grad_norm": 0.21491171419620514, |
|
"learning_rate": 1.3756989026582967e-05, |
|
"loss": 1.1152, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.5829145728643216, |
|
"grad_norm": 0.9810214638710022, |
|
"learning_rate": 1.3746030532333765e-05, |
|
"loss": 1.2416, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.583632447954056, |
|
"grad_norm": 0.3351687788963318, |
|
"learning_rate": 1.3735078462114994e-05, |
|
"loss": 1.0991, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.5843503230437904, |
|
"grad_norm": 0.5390233397483826, |
|
"learning_rate": 1.3724132872033637e-05, |
|
"loss": 1.1722, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.5850681981335247, |
|
"grad_norm": 0.20129382610321045, |
|
"learning_rate": 1.3713193818163468e-05, |
|
"loss": 1.0588, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5857860732232592, |
|
"grad_norm": 0.2601025700569153, |
|
"learning_rate": 1.3702261356544778e-05, |
|
"loss": 1.0887, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.5865039483129936, |
|
"grad_norm": 0.3282659351825714, |
|
"learning_rate": 1.3691335543184087e-05, |
|
"loss": 1.1, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.5872218234027279, |
|
"grad_norm": 0.22936835885047913, |
|
"learning_rate": 1.3680416434053854e-05, |
|
"loss": 1.0494, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.5879396984924623, |
|
"grad_norm": 0.6817067861557007, |
|
"learning_rate": 1.3669504085092201e-05, |
|
"loss": 1.1669, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.5886575735821967, |
|
"grad_norm": 0.25036540627479553, |
|
"learning_rate": 1.365859855220261e-05, |
|
"loss": 1.0679, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.589375448671931, |
|
"grad_norm": 0.29915371537208557, |
|
"learning_rate": 1.364769989125364e-05, |
|
"loss": 1.0822, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.5900933237616655, |
|
"grad_norm": 0.2721613049507141, |
|
"learning_rate": 1.3636808158078659e-05, |
|
"loss": 1.1265, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.5908111988513999, |
|
"grad_norm": 0.26752978563308716, |
|
"learning_rate": 1.3625923408475532e-05, |
|
"loss": 1.0853, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.5915290739411343, |
|
"grad_norm": 0.7966066598892212, |
|
"learning_rate": 1.3615045698206357e-05, |
|
"loss": 1.1633, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.5922469490308686, |
|
"grad_norm": 0.29881155490875244, |
|
"learning_rate": 1.3604175082997161e-05, |
|
"loss": 1.0575, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.592964824120603, |
|
"grad_norm": 0.3675658702850342, |
|
"learning_rate": 1.3593311618537635e-05, |
|
"loss": 1.0024, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.5936826992103375, |
|
"grad_norm": 0.25546130537986755, |
|
"learning_rate": 1.3582455360480821e-05, |
|
"loss": 1.1442, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.5944005743000718, |
|
"grad_norm": 0.42129403352737427, |
|
"learning_rate": 1.3571606364442858e-05, |
|
"loss": 1.1836, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.5951184493898062, |
|
"grad_norm": 0.2958780825138092, |
|
"learning_rate": 1.356076468600267e-05, |
|
"loss": 1.0958, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.5958363244795406, |
|
"grad_norm": 0.27691933512687683, |
|
"learning_rate": 1.3549930380701702e-05, |
|
"loss": 1.0744, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5965541995692749, |
|
"grad_norm": 0.5291327834129333, |
|
"learning_rate": 1.3539103504043625e-05, |
|
"loss": 1.0502, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.5972720746590093, |
|
"grad_norm": 0.22101576626300812, |
|
"learning_rate": 1.352828411149405e-05, |
|
"loss": 1.0619, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.5979899497487438, |
|
"grad_norm": 0.5621711611747742, |
|
"learning_rate": 1.3517472258480251e-05, |
|
"loss": 1.2351, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.5987078248384781, |
|
"grad_norm": 0.3979596495628357, |
|
"learning_rate": 1.350666800039087e-05, |
|
"loss": 1.1566, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.5994256999282125, |
|
"grad_norm": 0.28108641505241394, |
|
"learning_rate": 1.3495871392575652e-05, |
|
"loss": 1.0411, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6001435750179469, |
|
"grad_norm": 0.3246525228023529, |
|
"learning_rate": 1.3485082490345144e-05, |
|
"loss": 1.0516, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6008614501076812, |
|
"grad_norm": 0.21024064719676971, |
|
"learning_rate": 1.3474301348970415e-05, |
|
"loss": 1.0598, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6015793251974156, |
|
"grad_norm": 0.46584293246269226, |
|
"learning_rate": 1.346352802368278e-05, |
|
"loss": 1.2195, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6022972002871501, |
|
"grad_norm": 0.2288856953382492, |
|
"learning_rate": 1.3452762569673508e-05, |
|
"loss": 1.1058, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6030150753768844, |
|
"grad_norm": 0.23454317450523376, |
|
"learning_rate": 1.3442005042093546e-05, |
|
"loss": 1.0777, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6037329504666188, |
|
"grad_norm": 0.35311251878738403, |
|
"learning_rate": 1.3431255496053241e-05, |
|
"loss": 1.1497, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6044508255563532, |
|
"grad_norm": 0.3204866051673889, |
|
"learning_rate": 1.342051398662204e-05, |
|
"loss": 1.065, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6051687006460876, |
|
"grad_norm": 0.2498190850019455, |
|
"learning_rate": 1.3409780568828223e-05, |
|
"loss": 1.0621, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6058865757358219, |
|
"grad_norm": 0.31979426741600037, |
|
"learning_rate": 1.3399055297658615e-05, |
|
"loss": 1.0562, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6066044508255564, |
|
"grad_norm": 0.5402187705039978, |
|
"learning_rate": 1.3388338228058314e-05, |
|
"loss": 1.0896, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6073223259152908, |
|
"grad_norm": 0.2143259346485138, |
|
"learning_rate": 1.3377629414930397e-05, |
|
"loss": 1.0851, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6080402010050251, |
|
"grad_norm": 0.41969624161720276, |
|
"learning_rate": 1.3366928913135638e-05, |
|
"loss": 1.1066, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6087580760947595, |
|
"grad_norm": 0.20470081269741058, |
|
"learning_rate": 1.3356236777492238e-05, |
|
"loss": 1.1321, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6094759511844939, |
|
"grad_norm": 0.7368186116218567, |
|
"learning_rate": 1.3345553062775536e-05, |
|
"loss": 1.1907, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6101938262742282, |
|
"grad_norm": 0.32906386256217957, |
|
"learning_rate": 1.3334877823717737e-05, |
|
"loss": 1.042, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6109117013639627, |
|
"grad_norm": 0.24521546065807343, |
|
"learning_rate": 1.3324211115007622e-05, |
|
"loss": 1.0404, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6116295764536971, |
|
"grad_norm": 0.2039506733417511, |
|
"learning_rate": 1.3313552991290264e-05, |
|
"loss": 1.0194, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6123474515434314, |
|
"grad_norm": 0.25647541880607605, |
|
"learning_rate": 1.3302903507166768e-05, |
|
"loss": 1.0275, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6130653266331658, |
|
"grad_norm": 0.253371000289917, |
|
"learning_rate": 1.3292262717193973e-05, |
|
"loss": 1.061, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6137832017229002, |
|
"grad_norm": 0.41155293583869934, |
|
"learning_rate": 1.3281630675884172e-05, |
|
"loss": 1.1474, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6145010768126346, |
|
"grad_norm": 0.2841004729270935, |
|
"learning_rate": 1.3271007437704853e-05, |
|
"loss": 1.0809, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.615218951902369, |
|
"grad_norm": 0.36953848600387573, |
|
"learning_rate": 1.3260393057078391e-05, |
|
"loss": 0.9836, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6159368269921034, |
|
"grad_norm": 8.750887870788574, |
|
"learning_rate": 1.3249787588381797e-05, |
|
"loss": 1.1018, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6166547020818378, |
|
"grad_norm": 0.30933064222335815, |
|
"learning_rate": 1.3239191085946416e-05, |
|
"loss": 1.0642, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6173725771715721, |
|
"grad_norm": 0.2562330961227417, |
|
"learning_rate": 1.3228603604057666e-05, |
|
"loss": 1.0809, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6180904522613065, |
|
"grad_norm": 0.6192391514778137, |
|
"learning_rate": 1.3218025196954752e-05, |
|
"loss": 1.0715, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.618808327351041, |
|
"grad_norm": 0.4535701274871826, |
|
"learning_rate": 1.3207455918830386e-05, |
|
"loss": 1.1654, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6195262024407753, |
|
"grad_norm": 0.2728825807571411, |
|
"learning_rate": 1.3196895823830516e-05, |
|
"loss": 1.0756, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6202440775305097, |
|
"grad_norm": 0.2439497709274292, |
|
"learning_rate": 1.3186344966054048e-05, |
|
"loss": 1.1135, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6209619526202441, |
|
"grad_norm": 0.5989809036254883, |
|
"learning_rate": 1.3175803399552553e-05, |
|
"loss": 1.2831, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6216798277099784, |
|
"grad_norm": 0.2545377314090729, |
|
"learning_rate": 1.3165271178330024e-05, |
|
"loss": 1.053, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6223977027997128, |
|
"grad_norm": 0.27281951904296875, |
|
"learning_rate": 1.3154748356342562e-05, |
|
"loss": 1.1144, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6231155778894473, |
|
"grad_norm": 0.29359978437423706, |
|
"learning_rate": 1.3144234987498118e-05, |
|
"loss": 1.0385, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6238334529791816, |
|
"grad_norm": 0.5095093250274658, |
|
"learning_rate": 1.3133731125656224e-05, |
|
"loss": 1.1486, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.624551328068916, |
|
"grad_norm": 0.28710466623306274, |
|
"learning_rate": 1.3123236824627696e-05, |
|
"loss": 1.061, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6252692031586504, |
|
"grad_norm": 0.22760869562625885, |
|
"learning_rate": 1.3112752138174382e-05, |
|
"loss": 1.0662, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6259870782483847, |
|
"grad_norm": 0.298197478055954, |
|
"learning_rate": 1.310227712000887e-05, |
|
"loss": 1.1286, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6267049533381192, |
|
"grad_norm": 0.2910802364349365, |
|
"learning_rate": 1.3091811823794214e-05, |
|
"loss": 1.0378, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6274228284278536, |
|
"grad_norm": 0.3472384214401245, |
|
"learning_rate": 1.3081356303143669e-05, |
|
"loss": 1.2315, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.628140703517588, |
|
"grad_norm": 0.3093032240867615, |
|
"learning_rate": 1.3070910611620402e-05, |
|
"loss": 1.0685, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6288585786073223, |
|
"grad_norm": 0.2772822976112366, |
|
"learning_rate": 1.306047480273724e-05, |
|
"loss": 1.042, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6295764536970567, |
|
"grad_norm": 0.2791323959827423, |
|
"learning_rate": 1.3050048929956367e-05, |
|
"loss": 1.0863, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6302943287867911, |
|
"grad_norm": 0.26562055945396423, |
|
"learning_rate": 1.3039633046689071e-05, |
|
"loss": 1.0598, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6310122038765255, |
|
"grad_norm": 0.23874737322330475, |
|
"learning_rate": 1.3029227206295465e-05, |
|
"loss": 0.9481, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6317300789662599, |
|
"grad_norm": 0.5026124715805054, |
|
"learning_rate": 1.3018831462084211e-05, |
|
"loss": 1.1357, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6324479540559943, |
|
"grad_norm": 0.24282878637313843, |
|
"learning_rate": 1.3008445867312251e-05, |
|
"loss": 1.0439, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6331658291457286, |
|
"grad_norm": 0.2905985713005066, |
|
"learning_rate": 1.2998070475184533e-05, |
|
"loss": 1.0526, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.633883704235463, |
|
"grad_norm": 0.25067904591560364, |
|
"learning_rate": 1.2987705338853724e-05, |
|
"loss": 1.0396, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6346015793251975, |
|
"grad_norm": 0.2646895945072174, |
|
"learning_rate": 1.2977350511419973e-05, |
|
"loss": 1.0337, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6353194544149318, |
|
"grad_norm": 0.7052388787269592, |
|
"learning_rate": 1.2967006045930602e-05, |
|
"loss": 1.1606, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6360373295046662, |
|
"grad_norm": 0.23824433982372284, |
|
"learning_rate": 1.2956671995379848e-05, |
|
"loss": 1.0932, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6367552045944006, |
|
"grad_norm": 0.3025887906551361, |
|
"learning_rate": 1.2946348412708604e-05, |
|
"loss": 1.0138, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6374730796841349, |
|
"grad_norm": 0.43609052896499634, |
|
"learning_rate": 1.2936035350804127e-05, |
|
"loss": 1.2193, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6381909547738693, |
|
"grad_norm": 0.6852173209190369, |
|
"learning_rate": 1.292573286249978e-05, |
|
"loss": 1.1668, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6389088298636038, |
|
"grad_norm": 0.3734978437423706, |
|
"learning_rate": 1.291544100057476e-05, |
|
"loss": 1.0624, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6396267049533381, |
|
"grad_norm": 0.22185556590557098, |
|
"learning_rate": 1.2905159817753816e-05, |
|
"loss": 1.009, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6403445800430725, |
|
"grad_norm": 0.586806058883667, |
|
"learning_rate": 1.2894889366707001e-05, |
|
"loss": 1.3122, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6410624551328069, |
|
"grad_norm": 0.5803049802780151, |
|
"learning_rate": 1.2884629700049385e-05, |
|
"loss": 1.3217, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6417803302225413, |
|
"grad_norm": 0.29484057426452637, |
|
"learning_rate": 1.2874380870340789e-05, |
|
"loss": 1.1699, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6424982053122756, |
|
"grad_norm": 0.4079907536506653, |
|
"learning_rate": 1.286414293008551e-05, |
|
"loss": 1.0945, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6432160804020101, |
|
"grad_norm": 0.3369198441505432, |
|
"learning_rate": 1.2853915931732073e-05, |
|
"loss": 1.0572, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6439339554917445, |
|
"grad_norm": 0.34682968258857727, |
|
"learning_rate": 1.2843699927672941e-05, |
|
"loss": 1.0567, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6446518305814788, |
|
"grad_norm": 0.22931355237960815, |
|
"learning_rate": 1.2833494970244249e-05, |
|
"loss": 1.0373, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6453697056712132, |
|
"grad_norm": 0.3325895369052887, |
|
"learning_rate": 1.2823301111725547e-05, |
|
"loss": 1.0604, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6460875807609476, |
|
"grad_norm": 0.33054086565971375, |
|
"learning_rate": 1.2813118404339526e-05, |
|
"loss": 1.0503, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6468054558506819, |
|
"grad_norm": 0.22238993644714355, |
|
"learning_rate": 1.2802946900251743e-05, |
|
"loss": 1.0339, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6475233309404164, |
|
"grad_norm": 0.4186948835849762, |
|
"learning_rate": 1.2792786651570373e-05, |
|
"loss": 1.0557, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6482412060301508, |
|
"grad_norm": 0.574847400188446, |
|
"learning_rate": 1.2782637710345917e-05, |
|
"loss": 1.2136, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6489590811198851, |
|
"grad_norm": 0.2611059844493866, |
|
"learning_rate": 1.2772500128570955e-05, |
|
"loss": 1.046, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6496769562096195, |
|
"grad_norm": 0.2530059814453125, |
|
"learning_rate": 1.2762373958179878e-05, |
|
"loss": 1.0891, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6503948312993539, |
|
"grad_norm": 0.23121270537376404, |
|
"learning_rate": 1.2752259251048607e-05, |
|
"loss": 1.1423, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6511127063890882, |
|
"grad_norm": 0.36725449562072754, |
|
"learning_rate": 1.2742156058994343e-05, |
|
"loss": 1.0788, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6518305814788227, |
|
"grad_norm": 0.2668430209159851, |
|
"learning_rate": 1.2732064433775297e-05, |
|
"loss": 1.0675, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6525484565685571, |
|
"grad_norm": 0.696319580078125, |
|
"learning_rate": 1.272198442709042e-05, |
|
"loss": 1.5091, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6532663316582915, |
|
"grad_norm": 0.31487971544265747, |
|
"learning_rate": 1.2711916090579137e-05, |
|
"loss": 1.061, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6539842067480258, |
|
"grad_norm": 0.36551928520202637, |
|
"learning_rate": 1.2701859475821101e-05, |
|
"loss": 1.2043, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6547020818377602, |
|
"grad_norm": 0.22510908544063568, |
|
"learning_rate": 1.2691814634335904e-05, |
|
"loss": 1.0643, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6554199569274947, |
|
"grad_norm": 0.2475784569978714, |
|
"learning_rate": 1.2681781617582827e-05, |
|
"loss": 1.0952, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.656137832017229, |
|
"grad_norm": 0.19480234384536743, |
|
"learning_rate": 1.267176047696057e-05, |
|
"loss": 1.047, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6568557071069634, |
|
"grad_norm": 0.5187938213348389, |
|
"learning_rate": 1.2661751263807004e-05, |
|
"loss": 1.1726, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6575735821966978, |
|
"grad_norm": 0.2590419054031372, |
|
"learning_rate": 1.2651754029398884e-05, |
|
"loss": 1.0265, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6582914572864321, |
|
"grad_norm": 0.31151992082595825, |
|
"learning_rate": 1.2641768824951599e-05, |
|
"loss": 1.0771, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6590093323761665, |
|
"grad_norm": 0.5828796625137329, |
|
"learning_rate": 1.2631795701618916e-05, |
|
"loss": 1.1212, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.659727207465901, |
|
"grad_norm": 0.28743061423301697, |
|
"learning_rate": 1.2621834710492706e-05, |
|
"loss": 1.0482, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6604450825556353, |
|
"grad_norm": 0.2772831916809082, |
|
"learning_rate": 1.261188590260269e-05, |
|
"loss": 0.9182, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6611629576453697, |
|
"grad_norm": 0.24524107575416565, |
|
"learning_rate": 1.2601949328916173e-05, |
|
"loss": 1.0694, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6618808327351041, |
|
"grad_norm": 0.8398991227149963, |
|
"learning_rate": 1.2592025040337782e-05, |
|
"loss": 1.0917, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6625987078248384, |
|
"grad_norm": 0.21522751450538635, |
|
"learning_rate": 1.2582113087709211e-05, |
|
"loss": 1.0471, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6633165829145728, |
|
"grad_norm": 0.6712577939033508, |
|
"learning_rate": 1.257221352180896e-05, |
|
"loss": 1.2126, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6640344580043073, |
|
"grad_norm": 0.2957814931869507, |
|
"learning_rate": 1.2562326393352071e-05, |
|
"loss": 1.0818, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6647523330940417, |
|
"grad_norm": 0.30718493461608887, |
|
"learning_rate": 1.2552451752989866e-05, |
|
"loss": 1.0719, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.665470208183776, |
|
"grad_norm": 0.3204226791858673, |
|
"learning_rate": 1.2542589651309692e-05, |
|
"loss": 1.1139, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.6661880832735104, |
|
"grad_norm": 0.6014212965965271, |
|
"learning_rate": 1.2532740138834667e-05, |
|
"loss": 1.1985, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6669059583632448, |
|
"grad_norm": 0.37291762232780457, |
|
"learning_rate": 1.2522903266023402e-05, |
|
"loss": 1.0647, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6676238334529792, |
|
"grad_norm": 0.2623102068901062, |
|
"learning_rate": 1.2513079083269774e-05, |
|
"loss": 1.0804, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6683417085427136, |
|
"grad_norm": 0.3214200437068939, |
|
"learning_rate": 1.2503267640902634e-05, |
|
"loss": 1.0345, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.669059583632448, |
|
"grad_norm": 0.32022109627723694, |
|
"learning_rate": 1.2493468989185566e-05, |
|
"loss": 1.1192, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6697774587221823, |
|
"grad_norm": 0.20529402792453766, |
|
"learning_rate": 1.248368317831664e-05, |
|
"loss": 1.099, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6704953338119167, |
|
"grad_norm": 0.3141288757324219, |
|
"learning_rate": 1.2473910258428128e-05, |
|
"loss": 0.9543, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.6712132089016511, |
|
"grad_norm": 0.2604081928730011, |
|
"learning_rate": 1.2464150279586269e-05, |
|
"loss": 1.0645, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6719310839913855, |
|
"grad_norm": 0.4819868505001068, |
|
"learning_rate": 1.2454403291791011e-05, |
|
"loss": 1.1232, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6726489590811199, |
|
"grad_norm": 0.5241478085517883, |
|
"learning_rate": 1.2444669344975736e-05, |
|
"loss": 1.1311, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.6733668341708543, |
|
"grad_norm": 0.23811385035514832, |
|
"learning_rate": 1.2434948489007036e-05, |
|
"loss": 1.0201, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6740847092605886, |
|
"grad_norm": 0.5078403353691101, |
|
"learning_rate": 1.2425240773684421e-05, |
|
"loss": 1.1095, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.674802584350323, |
|
"grad_norm": 0.23643115162849426, |
|
"learning_rate": 1.241554624874009e-05, |
|
"loss": 1.0722, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6755204594400575, |
|
"grad_norm": 0.2984163761138916, |
|
"learning_rate": 1.240586496383867e-05, |
|
"loss": 1.1243, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.6762383345297918, |
|
"grad_norm": 0.3374740481376648, |
|
"learning_rate": 1.2396196968576958e-05, |
|
"loss": 1.1053, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.6769562096195262, |
|
"grad_norm": 0.5124977827072144, |
|
"learning_rate": 1.2386542312483665e-05, |
|
"loss": 1.2845, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.6776740847092606, |
|
"grad_norm": 0.24151629209518433, |
|
"learning_rate": 1.2376901045019172e-05, |
|
"loss": 1.076, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.678391959798995, |
|
"grad_norm": 0.3060801029205322, |
|
"learning_rate": 1.2367273215575268e-05, |
|
"loss": 1.0524, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6791098348887293, |
|
"grad_norm": 0.4811931848526001, |
|
"learning_rate": 1.2357658873474902e-05, |
|
"loss": 1.2068, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.6798277099784638, |
|
"grad_norm": 0.20122970640659332, |
|
"learning_rate": 1.2348058067971924e-05, |
|
"loss": 1.0434, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.6805455850681982, |
|
"grad_norm": 0.5476710200309753, |
|
"learning_rate": 1.2338470848250838e-05, |
|
"loss": 1.1639, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.6812634601579325, |
|
"grad_norm": 0.528336763381958, |
|
"learning_rate": 1.2328897263426549e-05, |
|
"loss": 1.0719, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.6819813352476669, |
|
"grad_norm": 0.2582956850528717, |
|
"learning_rate": 1.2319337362544113e-05, |
|
"loss": 1.0254, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6826992103374013, |
|
"grad_norm": 0.48091012239456177, |
|
"learning_rate": 1.2309791194578478e-05, |
|
"loss": 1.0316, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.6834170854271356, |
|
"grad_norm": 0.2367895096540451, |
|
"learning_rate": 1.2300258808434247e-05, |
|
"loss": 1.0122, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6841349605168701, |
|
"grad_norm": 0.3652105927467346, |
|
"learning_rate": 1.229074025294541e-05, |
|
"loss": 1.0398, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.6848528356066045, |
|
"grad_norm": 0.30559220910072327, |
|
"learning_rate": 1.228123557687511e-05, |
|
"loss": 1.1184, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.6855707106963388, |
|
"grad_norm": 0.3181764781475067, |
|
"learning_rate": 1.227174482891539e-05, |
|
"loss": 1.0641, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6862885857860732, |
|
"grad_norm": 0.6308692693710327, |
|
"learning_rate": 1.2262268057686925e-05, |
|
"loss": 1.1922, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6870064608758076, |
|
"grad_norm": 0.2383725345134735, |
|
"learning_rate": 1.2252805311738807e-05, |
|
"loss": 1.1138, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.6877243359655419, |
|
"grad_norm": 0.38409408926963806, |
|
"learning_rate": 1.2243356639548258e-05, |
|
"loss": 1.1791, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.6884422110552764, |
|
"grad_norm": 0.24311621487140656, |
|
"learning_rate": 1.2233922089520419e-05, |
|
"loss": 1.044, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.6891600861450108, |
|
"grad_norm": 0.4635167717933655, |
|
"learning_rate": 1.2224501709988069e-05, |
|
"loss": 1.162, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6898779612347452, |
|
"grad_norm": 0.22711965441703796, |
|
"learning_rate": 1.2215095549211398e-05, |
|
"loss": 1.0656, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.6905958363244795, |
|
"grad_norm": 0.5316239595413208, |
|
"learning_rate": 1.2205703655377756e-05, |
|
"loss": 1.1734, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.6913137114142139, |
|
"grad_norm": 0.3557806611061096, |
|
"learning_rate": 1.21963260766014e-05, |
|
"loss": 1.1013, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.6920315865039484, |
|
"grad_norm": 5.730634689331055, |
|
"learning_rate": 1.2186962860923259e-05, |
|
"loss": 1.0092, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.6927494615936827, |
|
"grad_norm": 0.245683953166008, |
|
"learning_rate": 1.217761405631067e-05, |
|
"loss": 1.0494, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6934673366834171, |
|
"grad_norm": 0.24788272380828857, |
|
"learning_rate": 1.2168279710657149e-05, |
|
"loss": 1.0461, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.6941852117731515, |
|
"grad_norm": 1.2116427421569824, |
|
"learning_rate": 1.2158959871782142e-05, |
|
"loss": 1.3337, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.6949030868628858, |
|
"grad_norm": 0.20844808220863342, |
|
"learning_rate": 1.2149654587430767e-05, |
|
"loss": 1.0395, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.6956209619526202, |
|
"grad_norm": 0.47707536816596985, |
|
"learning_rate": 1.2140363905273586e-05, |
|
"loss": 1.2071, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.6963388370423547, |
|
"grad_norm": 0.3193322420120239, |
|
"learning_rate": 1.2131087872906364e-05, |
|
"loss": 1.0471, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.697056712132089, |
|
"grad_norm": 0.29196181893348694, |
|
"learning_rate": 1.2121826537849803e-05, |
|
"loss": 1.0051, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.6977745872218234, |
|
"grad_norm": 0.30033227801322937, |
|
"learning_rate": 1.2112579947549313e-05, |
|
"loss": 1.05, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.6984924623115578, |
|
"grad_norm": 3.0059380531311035, |
|
"learning_rate": 1.210334814937477e-05, |
|
"loss": 1.2449, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.6992103374012921, |
|
"grad_norm": 0.24989262223243713, |
|
"learning_rate": 1.2094131190620268e-05, |
|
"loss": 1.123, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.6999282124910265, |
|
"grad_norm": 0.22237707674503326, |
|
"learning_rate": 1.2084929118503888e-05, |
|
"loss": 1.0222, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.700646087580761, |
|
"grad_norm": 0.34449517726898193, |
|
"learning_rate": 1.2075741980167432e-05, |
|
"loss": 1.0971, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7013639626704954, |
|
"grad_norm": 0.2557564675807953, |
|
"learning_rate": 1.2066569822676212e-05, |
|
"loss": 1.0674, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7020818377602297, |
|
"grad_norm": 0.24336853623390198, |
|
"learning_rate": 1.2057412693018788e-05, |
|
"loss": 1.0314, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7027997128499641, |
|
"grad_norm": 0.44501352310180664, |
|
"learning_rate": 1.2048270638106729e-05, |
|
"loss": 1.0492, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7035175879396985, |
|
"grad_norm": 0.39975109696388245, |
|
"learning_rate": 1.2039143704774383e-05, |
|
"loss": 1.1048, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7042354630294329, |
|
"grad_norm": 0.2642357349395752, |
|
"learning_rate": 1.2030031939778627e-05, |
|
"loss": 1.0703, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7049533381191673, |
|
"grad_norm": 0.23442108929157257, |
|
"learning_rate": 1.202093538979863e-05, |
|
"loss": 1.0812, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7056712132089017, |
|
"grad_norm": 0.23268234729766846, |
|
"learning_rate": 1.2011854101435621e-05, |
|
"loss": 1.0745, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.706389088298636, |
|
"grad_norm": 0.49588751792907715, |
|
"learning_rate": 1.2002788121212636e-05, |
|
"loss": 1.2131, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7071069633883704, |
|
"grad_norm": 0.5172017812728882, |
|
"learning_rate": 1.19937374955743e-05, |
|
"loss": 1.2266, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7078248384781048, |
|
"grad_norm": 0.25582894682884216, |
|
"learning_rate": 1.1984702270886567e-05, |
|
"loss": 1.0799, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7085427135678392, |
|
"grad_norm": 0.2163057029247284, |
|
"learning_rate": 1.1975682493436494e-05, |
|
"loss": 1.0636, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7092605886575736, |
|
"grad_norm": 0.25939279794692993, |
|
"learning_rate": 1.1966678209432005e-05, |
|
"loss": 1.0804, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.709978463747308, |
|
"grad_norm": 0.7320495843887329, |
|
"learning_rate": 1.1957689465001651e-05, |
|
"loss": 0.9866, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7106963388370423, |
|
"grad_norm": 0.24018999934196472, |
|
"learning_rate": 1.1948716306194377e-05, |
|
"loss": 1.0456, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7114142139267767, |
|
"grad_norm": 0.46888262033462524, |
|
"learning_rate": 1.1939758778979278e-05, |
|
"loss": 1.0181, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7121320890165111, |
|
"grad_norm": 0.342295378446579, |
|
"learning_rate": 1.1930816929245372e-05, |
|
"loss": 1.0113, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7128499641062455, |
|
"grad_norm": 3.6695358753204346, |
|
"learning_rate": 1.1921890802801366e-05, |
|
"loss": 1.0917, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7135678391959799, |
|
"grad_norm": 0.2448820322751999, |
|
"learning_rate": 1.1912980445375407e-05, |
|
"loss": 1.0759, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.260084331035614, |
|
"learning_rate": 1.1904085902614869e-05, |
|
"loss": 1.1095, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7150035893754487, |
|
"grad_norm": 0.3287331759929657, |
|
"learning_rate": 1.18952072200861e-05, |
|
"loss": 1.0174, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.715721464465183, |
|
"grad_norm": 0.5043054819107056, |
|
"learning_rate": 1.18863444432742e-05, |
|
"loss": 1.0854, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7164393395549175, |
|
"grad_norm": 0.31419816613197327, |
|
"learning_rate": 1.1877497617582789e-05, |
|
"loss": 1.1022, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7171572146446519, |
|
"grad_norm": 0.6088225841522217, |
|
"learning_rate": 1.1868666788333765e-05, |
|
"loss": 1.1555, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7178750897343862, |
|
"grad_norm": 0.5905667543411255, |
|
"learning_rate": 1.1859852000767077e-05, |
|
"loss": 1.1458, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7185929648241206, |
|
"grad_norm": 0.20521029829978943, |
|
"learning_rate": 1.1851053300040492e-05, |
|
"loss": 1.0299, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.719310839913855, |
|
"grad_norm": 0.3856906592845917, |
|
"learning_rate": 1.1842270731229365e-05, |
|
"loss": 1.1342, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7200287150035893, |
|
"grad_norm": 0.2432117462158203, |
|
"learning_rate": 1.1833504339326419e-05, |
|
"loss": 1.0733, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7207465900933238, |
|
"grad_norm": 0.20327435433864594, |
|
"learning_rate": 1.1824754169241487e-05, |
|
"loss": 1.0116, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.7214644651830582, |
|
"grad_norm": 0.21202722191810608, |
|
"learning_rate": 1.1816020265801305e-05, |
|
"loss": 1.0366, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7221823402727925, |
|
"grad_norm": 0.3296683132648468, |
|
"learning_rate": 1.180730267374928e-05, |
|
"loss": 1.0156, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.7229002153625269, |
|
"grad_norm": 0.7693004608154297, |
|
"learning_rate": 1.1798601437745247e-05, |
|
"loss": 1.3622, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7236180904522613, |
|
"grad_norm": 0.24518100917339325, |
|
"learning_rate": 1.1789916602365264e-05, |
|
"loss": 1.0569, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7243359655419956, |
|
"grad_norm": 0.4782348573207855, |
|
"learning_rate": 1.1781248212101354e-05, |
|
"loss": 1.1106, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7250538406317301, |
|
"grad_norm": 0.25384142994880676, |
|
"learning_rate": 1.1772596311361299e-05, |
|
"loss": 1.0966, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7257717157214645, |
|
"grad_norm": 0.23078513145446777, |
|
"learning_rate": 1.1763960944468411e-05, |
|
"loss": 0.9975, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7264895908111989, |
|
"grad_norm": 0.2533474862575531, |
|
"learning_rate": 1.1755342155661293e-05, |
|
"loss": 1.029, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7272074659009332, |
|
"grad_norm": 0.3993743658065796, |
|
"learning_rate": 1.1746739989093619e-05, |
|
"loss": 1.1461, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.7279253409906676, |
|
"grad_norm": 0.3081437349319458, |
|
"learning_rate": 1.1738154488833911e-05, |
|
"loss": 1.0549, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.7286432160804021, |
|
"grad_norm": 1.5382530689239502, |
|
"learning_rate": 1.1729585698865308e-05, |
|
"loss": 1.4259, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7293610911701364, |
|
"grad_norm": 0.31933000683784485, |
|
"learning_rate": 1.1721033663085345e-05, |
|
"loss": 1.1181, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.7300789662598708, |
|
"grad_norm": 0.2206488400697708, |
|
"learning_rate": 1.1712498425305729e-05, |
|
"loss": 1.0774, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7307968413496052, |
|
"grad_norm": 1.0508358478546143, |
|
"learning_rate": 1.17039800292521e-05, |
|
"loss": 1.2551, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7315147164393395, |
|
"grad_norm": 0.2564982771873474, |
|
"learning_rate": 1.1695478518563835e-05, |
|
"loss": 1.0844, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7322325915290739, |
|
"grad_norm": 0.2107759267091751, |
|
"learning_rate": 1.1686993936793792e-05, |
|
"loss": 1.0931, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7329504666188084, |
|
"grad_norm": 0.26491451263427734, |
|
"learning_rate": 1.1678526327408114e-05, |
|
"loss": 1.022, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.7336683417085427, |
|
"grad_norm": 0.31346040964126587, |
|
"learning_rate": 1.1670075733785993e-05, |
|
"loss": 0.9739, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7343862167982771, |
|
"grad_norm": 0.3513311743736267, |
|
"learning_rate": 1.1661642199219446e-05, |
|
"loss": 1.0528, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.7351040918880115, |
|
"grad_norm": 0.2472268044948578, |
|
"learning_rate": 1.1653225766913096e-05, |
|
"loss": 1.0371, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7358219669777458, |
|
"grad_norm": 0.38804471492767334, |
|
"learning_rate": 1.1644826479983964e-05, |
|
"loss": 1.1377, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7365398420674802, |
|
"grad_norm": 0.26781803369522095, |
|
"learning_rate": 1.1636444381461223e-05, |
|
"loss": 1.0295, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7372577171572147, |
|
"grad_norm": 0.26370298862457275, |
|
"learning_rate": 1.1628079514285995e-05, |
|
"loss": 1.0728, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.7379755922469491, |
|
"grad_norm": 0.29120156168937683, |
|
"learning_rate": 1.1619731921311124e-05, |
|
"loss": 1.077, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7386934673366834, |
|
"grad_norm": 0.4208977520465851, |
|
"learning_rate": 1.1611401645300968e-05, |
|
"loss": 1.2434, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.7394113424264178, |
|
"grad_norm": 0.4654211401939392, |
|
"learning_rate": 1.1603088728931162e-05, |
|
"loss": 1.2709, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7401292175161522, |
|
"grad_norm": 0.41849827766418457, |
|
"learning_rate": 1.1594793214788406e-05, |
|
"loss": 1.207, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.7408470926058865, |
|
"grad_norm": 0.2996397018432617, |
|
"learning_rate": 1.1586515145370264e-05, |
|
"loss": 1.0608, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.741564967695621, |
|
"grad_norm": 0.2890361547470093, |
|
"learning_rate": 1.1578254563084914e-05, |
|
"loss": 1.0606, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.7422828427853554, |
|
"grad_norm": 0.2434931993484497, |
|
"learning_rate": 1.1570011510250958e-05, |
|
"loss": 1.029, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7430007178750897, |
|
"grad_norm": 0.319659024477005, |
|
"learning_rate": 1.1561786029097193e-05, |
|
"loss": 1.0207, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7437185929648241, |
|
"grad_norm": 0.2164667695760727, |
|
"learning_rate": 1.1553578161762395e-05, |
|
"loss": 1.052, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7444364680545585, |
|
"grad_norm": 0.2233264297246933, |
|
"learning_rate": 1.1545387950295112e-05, |
|
"loss": 1.0718, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7451543431442929, |
|
"grad_norm": 0.5795763731002808, |
|
"learning_rate": 1.1537215436653432e-05, |
|
"loss": 1.133, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7458722182340273, |
|
"grad_norm": 0.46533235907554626, |
|
"learning_rate": 1.152906066270479e-05, |
|
"loss": 1.0287, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7465900933237617, |
|
"grad_norm": 0.27343669533729553, |
|
"learning_rate": 1.152092367022573e-05, |
|
"loss": 1.1057, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.747307968413496, |
|
"grad_norm": 0.21523387730121613, |
|
"learning_rate": 1.1512804500901704e-05, |
|
"loss": 1.0251, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7480258435032304, |
|
"grad_norm": 0.20506061613559723, |
|
"learning_rate": 1.1504703196326864e-05, |
|
"loss": 1.0761, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.7487437185929648, |
|
"grad_norm": 0.2571380138397217, |
|
"learning_rate": 1.1496619798003836e-05, |
|
"loss": 1.1193, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.7494615936826992, |
|
"grad_norm": 0.19685836136341095, |
|
"learning_rate": 1.148855434734351e-05, |
|
"loss": 1.0488, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7501794687724336, |
|
"grad_norm": 0.4925697147846222, |
|
"learning_rate": 1.1480506885664839e-05, |
|
"loss": 1.0208, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.750897343862168, |
|
"grad_norm": 0.3596290647983551, |
|
"learning_rate": 1.1472477454194616e-05, |
|
"loss": 1.0781, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.7516152189519024, |
|
"grad_norm": 0.2801119387149811, |
|
"learning_rate": 1.1464466094067263e-05, |
|
"loss": 1.1354, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7523330940416367, |
|
"grad_norm": 0.32316145300865173, |
|
"learning_rate": 1.1456472846324629e-05, |
|
"loss": 1.1104, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.7530509691313712, |
|
"grad_norm": 0.22448933124542236, |
|
"learning_rate": 1.1448497751915766e-05, |
|
"loss": 1.0667, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7537688442211056, |
|
"grad_norm": 0.253033310174942, |
|
"learning_rate": 1.1440540851696734e-05, |
|
"loss": 1.1096, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7544867193108399, |
|
"grad_norm": 0.3583433926105499, |
|
"learning_rate": 1.1432602186430381e-05, |
|
"loss": 1.0997, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.7552045944005743, |
|
"grad_norm": 0.39129480719566345, |
|
"learning_rate": 1.1424681796786147e-05, |
|
"loss": 1.115, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.7559224694903087, |
|
"grad_norm": 0.5168975591659546, |
|
"learning_rate": 1.1416779723339833e-05, |
|
"loss": 1.2004, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.756640344580043, |
|
"grad_norm": 0.5105748772621155, |
|
"learning_rate": 1.1408896006573414e-05, |
|
"loss": 1.0845, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.7573582196697775, |
|
"grad_norm": 0.23934470117092133, |
|
"learning_rate": 1.1401030686874828e-05, |
|
"loss": 1.0144, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7580760947595119, |
|
"grad_norm": 0.3562638461589813, |
|
"learning_rate": 1.139318380453776e-05, |
|
"loss": 1.0327, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7587939698492462, |
|
"grad_norm": 0.2495163232088089, |
|
"learning_rate": 1.138535539976144e-05, |
|
"loss": 1.028, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.7595118449389806, |
|
"grad_norm": 0.2820242643356323, |
|
"learning_rate": 1.1377545512650447e-05, |
|
"loss": 1.0469, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.760229720028715, |
|
"grad_norm": 0.22608397901058197, |
|
"learning_rate": 1.1369754183214485e-05, |
|
"loss": 1.0533, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.7609475951184493, |
|
"grad_norm": 0.24175365269184113, |
|
"learning_rate": 1.1361981451368196e-05, |
|
"loss": 1.062, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7616654702081838, |
|
"grad_norm": 0.21232882142066956, |
|
"learning_rate": 1.135422735693094e-05, |
|
"loss": 0.9987, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.7623833452979182, |
|
"grad_norm": 0.22817876935005188, |
|
"learning_rate": 1.1346491939626602e-05, |
|
"loss": 1.0101, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.7631012203876526, |
|
"grad_norm": 0.22691325843334198, |
|
"learning_rate": 1.1338775239083386e-05, |
|
"loss": 1.0819, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.7638190954773869, |
|
"grad_norm": 0.19690930843353271, |
|
"learning_rate": 1.133107729483361e-05, |
|
"loss": 1.0059, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.7645369705671213, |
|
"grad_norm": 0.19578997790813446, |
|
"learning_rate": 1.1323398146313502e-05, |
|
"loss": 1.0486, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.7652548456568558, |
|
"grad_norm": 0.2787719964981079, |
|
"learning_rate": 1.1315737832863003e-05, |
|
"loss": 1.0245, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.7659727207465901, |
|
"grad_norm": 0.2493918538093567, |
|
"learning_rate": 1.1308096393725561e-05, |
|
"loss": 1.022, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.7666905958363245, |
|
"grad_norm": 0.4457206130027771, |
|
"learning_rate": 1.1300473868047937e-05, |
|
"loss": 1.2294, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.7674084709260589, |
|
"grad_norm": 0.21728824079036713, |
|
"learning_rate": 1.129287029487999e-05, |
|
"loss": 1.0013, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.7681263460157932, |
|
"grad_norm": 0.26787298917770386, |
|
"learning_rate": 1.128528571317449e-05, |
|
"loss": 1.0784, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7688442211055276, |
|
"grad_norm": 0.5104184150695801, |
|
"learning_rate": 1.127772016178692e-05, |
|
"loss": 1.2148, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.7695620961952621, |
|
"grad_norm": 0.2178836315870285, |
|
"learning_rate": 1.1270173679475265e-05, |
|
"loss": 1.0524, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.7702799712849964, |
|
"grad_norm": 0.7400010228157043, |
|
"learning_rate": 1.1262646304899823e-05, |
|
"loss": 1.0788, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.7709978463747308, |
|
"grad_norm": 0.30098220705986023, |
|
"learning_rate": 1.1255138076623001e-05, |
|
"loss": 1.0991, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.7717157214644652, |
|
"grad_norm": 0.24715134501457214, |
|
"learning_rate": 1.1247649033109123e-05, |
|
"loss": 1.0742, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7724335965541995, |
|
"grad_norm": 0.2401043027639389, |
|
"learning_rate": 1.124017921272423e-05, |
|
"loss": 0.9687, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.7731514716439339, |
|
"grad_norm": 0.22840212285518646, |
|
"learning_rate": 1.123272865373588e-05, |
|
"loss": 1.0217, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.7738693467336684, |
|
"grad_norm": 0.22718718647956848, |
|
"learning_rate": 1.1225297394312966e-05, |
|
"loss": 1.1272, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.7745872218234028, |
|
"grad_norm": 0.3085003197193146, |
|
"learning_rate": 1.12178854725255e-05, |
|
"loss": 1.1134, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.7753050969131371, |
|
"grad_norm": 0.21174356341362, |
|
"learning_rate": 1.1210492926344427e-05, |
|
"loss": 0.9903, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7760229720028715, |
|
"grad_norm": 0.20416200160980225, |
|
"learning_rate": 1.1203119793641443e-05, |
|
"loss": 0.9685, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.7767408470926059, |
|
"grad_norm": 0.9170592427253723, |
|
"learning_rate": 1.119576611218878e-05, |
|
"loss": 1.0274, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.7774587221823402, |
|
"grad_norm": 0.44593900442123413, |
|
"learning_rate": 1.1188431919659022e-05, |
|
"loss": 1.1094, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.7781765972720747, |
|
"grad_norm": 0.2174079269170761, |
|
"learning_rate": 1.1181117253624917e-05, |
|
"loss": 1.0575, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.7788944723618091, |
|
"grad_norm": 2.0982675552368164, |
|
"learning_rate": 1.1173822151559176e-05, |
|
"loss": 1.3757, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7796123474515434, |
|
"grad_norm": 0.3535577356815338, |
|
"learning_rate": 1.1166546650834289e-05, |
|
"loss": 1.1288, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.7803302225412778, |
|
"grad_norm": 0.23848174512386322, |
|
"learning_rate": 1.1159290788722323e-05, |
|
"loss": 1.0412, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.7810480976310122, |
|
"grad_norm": 0.6610331535339355, |
|
"learning_rate": 1.1152054602394742e-05, |
|
"loss": 1.1939, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.7817659727207465, |
|
"grad_norm": 0.25315794348716736, |
|
"learning_rate": 1.1144838128922214e-05, |
|
"loss": 1.0254, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.782483847810481, |
|
"grad_norm": 0.1975483000278473, |
|
"learning_rate": 1.1137641405274407e-05, |
|
"loss": 0.9531, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7832017229002154, |
|
"grad_norm": 0.43651190400123596, |
|
"learning_rate": 1.1130464468319827e-05, |
|
"loss": 1.1897, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.7839195979899497, |
|
"grad_norm": 0.2116590291261673, |
|
"learning_rate": 1.1123307354825603e-05, |
|
"loss": 1.0327, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.7846374730796841, |
|
"grad_norm": 0.33360204100608826, |
|
"learning_rate": 1.1116170101457313e-05, |
|
"loss": 1.1048, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.7853553481694185, |
|
"grad_norm": 0.24831229448318481, |
|
"learning_rate": 1.1109052744778795e-05, |
|
"loss": 1.0805, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.7860732232591529, |
|
"grad_norm": 0.3498152494430542, |
|
"learning_rate": 1.1101955321251946e-05, |
|
"loss": 1.101, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7867910983488873, |
|
"grad_norm": 0.2755047678947449, |
|
"learning_rate": 1.1094877867236567e-05, |
|
"loss": 1.0784, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.7875089734386217, |
|
"grad_norm": 0.30536141991615295, |
|
"learning_rate": 1.1087820418990133e-05, |
|
"loss": 1.1185, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.7882268485283561, |
|
"grad_norm": 0.2670566737651825, |
|
"learning_rate": 1.1080783012667645e-05, |
|
"loss": 0.9521, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.7889447236180904, |
|
"grad_norm": 0.4963916540145874, |
|
"learning_rate": 1.1073765684321426e-05, |
|
"loss": 1.0873, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.7896625987078248, |
|
"grad_norm": 0.6098265051841736, |
|
"learning_rate": 1.1066768469900944e-05, |
|
"loss": 1.1082, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7903804737975593, |
|
"grad_norm": 0.2066364884376526, |
|
"learning_rate": 1.1059791405252616e-05, |
|
"loss": 1.0428, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.7910983488872936, |
|
"grad_norm": 0.4424298405647278, |
|
"learning_rate": 1.1052834526119638e-05, |
|
"loss": 1.1162, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.791816223977028, |
|
"grad_norm": 0.5996174812316895, |
|
"learning_rate": 1.1045897868141797e-05, |
|
"loss": 1.148, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.7925340990667624, |
|
"grad_norm": 0.21078869700431824, |
|
"learning_rate": 1.1038981466855287e-05, |
|
"loss": 0.984, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.7932519741564967, |
|
"grad_norm": 0.2863714098930359, |
|
"learning_rate": 1.1032085357692526e-05, |
|
"loss": 1.0208, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.7939698492462312, |
|
"grad_norm": 0.1967342495918274, |
|
"learning_rate": 1.102520957598198e-05, |
|
"loss": 1.0659, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.7946877243359656, |
|
"grad_norm": 0.261958509683609, |
|
"learning_rate": 1.1018354156947975e-05, |
|
"loss": 1.0927, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.7954055994256999, |
|
"grad_norm": 0.46918782591819763, |
|
"learning_rate": 1.101151913571052e-05, |
|
"loss": 1.1254, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.7961234745154343, |
|
"grad_norm": 0.23906533420085907, |
|
"learning_rate": 1.1004704547285132e-05, |
|
"loss": 1.0926, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.7968413496051687, |
|
"grad_norm": 0.4308425188064575, |
|
"learning_rate": 1.0997910426582646e-05, |
|
"loss": 1.4434, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.797559224694903, |
|
"grad_norm": 0.22563466429710388, |
|
"learning_rate": 1.099113680840904e-05, |
|
"loss": 0.9977, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.7982770997846375, |
|
"grad_norm": 0.199822336435318, |
|
"learning_rate": 1.098438372746527e-05, |
|
"loss": 1.0224, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.7989949748743719, |
|
"grad_norm": 0.5321314334869385, |
|
"learning_rate": 1.0977651218347063e-05, |
|
"loss": 1.0101, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.7997128499641063, |
|
"grad_norm": 0.3771994709968567, |
|
"learning_rate": 1.0970939315544772e-05, |
|
"loss": 1.0763, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8004307250538406, |
|
"grad_norm": 0.254106342792511, |
|
"learning_rate": 1.0964248053443185e-05, |
|
"loss": 1.052, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.801148600143575, |
|
"grad_norm": 0.33369532227516174, |
|
"learning_rate": 1.0957577466321335e-05, |
|
"loss": 1.1463, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8018664752333095, |
|
"grad_norm": 1.6308503150939941, |
|
"learning_rate": 1.0950927588352349e-05, |
|
"loss": 1.3882, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8025843503230438, |
|
"grad_norm": 0.2860555946826935, |
|
"learning_rate": 1.0944298453603261e-05, |
|
"loss": 1.0642, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8033022254127782, |
|
"grad_norm": 0.24643976986408234, |
|
"learning_rate": 1.0937690096034837e-05, |
|
"loss": 1.0377, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8040201005025126, |
|
"grad_norm": 0.5750579237937927, |
|
"learning_rate": 1.09311025495014e-05, |
|
"loss": 1.1976, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8047379755922469, |
|
"grad_norm": 0.23937703669071198, |
|
"learning_rate": 1.0924535847750661e-05, |
|
"loss": 1.1295, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8054558506819813, |
|
"grad_norm": 0.6504701375961304, |
|
"learning_rate": 1.091799002442355e-05, |
|
"loss": 1.1366, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8061737257717158, |
|
"grad_norm": 0.3447827100753784, |
|
"learning_rate": 1.0911465113054024e-05, |
|
"loss": 1.1147, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8068916008614501, |
|
"grad_norm": 0.29246342182159424, |
|
"learning_rate": 1.0904961147068924e-05, |
|
"loss": 1.0298, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8076094759511845, |
|
"grad_norm": 0.2509633004665375, |
|
"learning_rate": 1.0898478159787777e-05, |
|
"loss": 1.0329, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8083273510409189, |
|
"grad_norm": 0.39475613832473755, |
|
"learning_rate": 1.0892016184422643e-05, |
|
"loss": 1.1515, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8090452261306532, |
|
"grad_norm": 0.4760308563709259, |
|
"learning_rate": 1.0885575254077939e-05, |
|
"loss": 1.0339, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.8097631012203876, |
|
"grad_norm": 0.6620875597000122, |
|
"learning_rate": 1.0879155401750264e-05, |
|
"loss": 1.2161, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8104809763101221, |
|
"grad_norm": 0.42044660449028015, |
|
"learning_rate": 1.0872756660328242e-05, |
|
"loss": 1.0563, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8111988513998565, |
|
"grad_norm": 0.23910948634147644, |
|
"learning_rate": 1.0866379062592346e-05, |
|
"loss": 1.0107, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8119167264895908, |
|
"grad_norm": 1.1729207038879395, |
|
"learning_rate": 1.0860022641214725e-05, |
|
"loss": 1.4796, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.8126346015793252, |
|
"grad_norm": 0.2483389973640442, |
|
"learning_rate": 1.0853687428759047e-05, |
|
"loss": 1.0933, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8133524766690596, |
|
"grad_norm": 0.20855219662189484, |
|
"learning_rate": 1.084737345768033e-05, |
|
"loss": 1.0492, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.8140703517587939, |
|
"grad_norm": 0.2710930109024048, |
|
"learning_rate": 1.0841080760324767e-05, |
|
"loss": 0.9982, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8147882268485284, |
|
"grad_norm": 0.23391245305538177, |
|
"learning_rate": 1.0834809368929569e-05, |
|
"loss": 1.0699, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8155061019382628, |
|
"grad_norm": 0.5731115341186523, |
|
"learning_rate": 1.0828559315622802e-05, |
|
"loss": 1.1237, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8162239770279971, |
|
"grad_norm": 0.27705612778663635, |
|
"learning_rate": 1.0822330632423208e-05, |
|
"loss": 1.0773, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.8169418521177315, |
|
"grad_norm": 0.2135666012763977, |
|
"learning_rate": 1.0816123351240065e-05, |
|
"loss": 0.9935, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.8176597272074659, |
|
"grad_norm": 0.6682529449462891, |
|
"learning_rate": 1.0809937503872996e-05, |
|
"loss": 1.117, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8183776022972002, |
|
"grad_norm": 0.22851616144180298, |
|
"learning_rate": 1.0803773122011827e-05, |
|
"loss": 1.0399, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8190954773869347, |
|
"grad_norm": 0.3621741235256195, |
|
"learning_rate": 1.0797630237236414e-05, |
|
"loss": 1.0938, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.8198133524766691, |
|
"grad_norm": 0.20737534761428833, |
|
"learning_rate": 1.0791508881016485e-05, |
|
"loss": 0.9742, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.8205312275664034, |
|
"grad_norm": 1.6472431421279907, |
|
"learning_rate": 1.0785409084711485e-05, |
|
"loss": 1.4099, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.8212491026561378, |
|
"grad_norm": 0.47031158208847046, |
|
"learning_rate": 1.0779330879570394e-05, |
|
"loss": 1.0591, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.8219669777458722, |
|
"grad_norm": 0.22210556268692017, |
|
"learning_rate": 1.0773274296731592e-05, |
|
"loss": 1.099, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8226848528356066, |
|
"grad_norm": 27.147884368896484, |
|
"learning_rate": 1.076723936722269e-05, |
|
"loss": 1.1498, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.823402727925341, |
|
"grad_norm": 0.4893867075443268, |
|
"learning_rate": 1.0761226121960365e-05, |
|
"loss": 1.0403, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.8241206030150754, |
|
"grad_norm": 0.2474132627248764, |
|
"learning_rate": 1.0755234591750203e-05, |
|
"loss": 1.083, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.8248384781048098, |
|
"grad_norm": 0.22155289351940155, |
|
"learning_rate": 1.0749264807286557e-05, |
|
"loss": 1.0682, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.8255563531945441, |
|
"grad_norm": 0.3100580871105194, |
|
"learning_rate": 1.0743316799152363e-05, |
|
"loss": 1.1295, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8262742282842785, |
|
"grad_norm": 0.23339876532554626, |
|
"learning_rate": 1.0737390597819013e-05, |
|
"loss": 1.078, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.826992103374013, |
|
"grad_norm": 0.21114078164100647, |
|
"learning_rate": 1.0731486233646172e-05, |
|
"loss": 1.0884, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.8277099784637473, |
|
"grad_norm": 0.21384155750274658, |
|
"learning_rate": 1.0725603736881636e-05, |
|
"loss": 1.0205, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8284278535534817, |
|
"grad_norm": 0.28435850143432617, |
|
"learning_rate": 1.071974313766118e-05, |
|
"loss": 1.0634, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8291457286432161, |
|
"grad_norm": 0.2539917528629303, |
|
"learning_rate": 1.0713904466008397e-05, |
|
"loss": 1.0601, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8298636037329504, |
|
"grad_norm": 0.2543018162250519, |
|
"learning_rate": 1.070808775183454e-05, |
|
"loss": 1.083, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.8305814788226848, |
|
"grad_norm": 0.5512329936027527, |
|
"learning_rate": 1.0702293024938383e-05, |
|
"loss": 1.1601, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.8312993539124193, |
|
"grad_norm": 0.20794658362865448, |
|
"learning_rate": 1.069652031500606e-05, |
|
"loss": 1.0004, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.8320172290021536, |
|
"grad_norm": 0.3217058479785919, |
|
"learning_rate": 1.0690769651610906e-05, |
|
"loss": 1.0814, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.832735104091888, |
|
"grad_norm": 0.5152641534805298, |
|
"learning_rate": 1.0685041064213318e-05, |
|
"loss": 1.1193, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8334529791816224, |
|
"grad_norm": 0.31184303760528564, |
|
"learning_rate": 1.0679334582160599e-05, |
|
"loss": 1.0972, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8341708542713567, |
|
"grad_norm": 0.2713521122932434, |
|
"learning_rate": 1.0673650234686803e-05, |
|
"loss": 1.0193, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.8348887293610912, |
|
"grad_norm": 0.23324035108089447, |
|
"learning_rate": 1.0667988050912591e-05, |
|
"loss": 1.0967, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8356066044508256, |
|
"grad_norm": 0.2763028144836426, |
|
"learning_rate": 1.0662348059845088e-05, |
|
"loss": 1.0406, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.83632447954056, |
|
"grad_norm": 0.2278720736503601, |
|
"learning_rate": 1.0656730290377712e-05, |
|
"loss": 1.1152, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8370423546302943, |
|
"grad_norm": 0.3886187672615051, |
|
"learning_rate": 1.0651134771290055e-05, |
|
"loss": 1.0825, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.8377602297200287, |
|
"grad_norm": 0.589653730392456, |
|
"learning_rate": 1.0645561531247713e-05, |
|
"loss": 1.1862, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.8384781048097631, |
|
"grad_norm": 0.2417033612728119, |
|
"learning_rate": 1.0640010598802148e-05, |
|
"loss": 1.0709, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8391959798994975, |
|
"grad_norm": 0.641553521156311, |
|
"learning_rate": 1.0634482002390544e-05, |
|
"loss": 1.0698, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.8399138549892319, |
|
"grad_norm": 0.3284968435764313, |
|
"learning_rate": 1.0628975770335662e-05, |
|
"loss": 1.0752, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8406317300789663, |
|
"grad_norm": 0.2477286159992218, |
|
"learning_rate": 1.0623491930845678e-05, |
|
"loss": 1.0343, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8413496051687006, |
|
"grad_norm": 0.1869351863861084, |
|
"learning_rate": 1.0618030512014065e-05, |
|
"loss": 1.0395, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.842067480258435, |
|
"grad_norm": 0.24983994662761688, |
|
"learning_rate": 1.0612591541819432e-05, |
|
"loss": 1.0368, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8427853553481695, |
|
"grad_norm": 0.23476620018482208, |
|
"learning_rate": 1.0607175048125384e-05, |
|
"loss": 0.9778, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8435032304379038, |
|
"grad_norm": 0.26927655935287476, |
|
"learning_rate": 1.060178105868038e-05, |
|
"loss": 0.9862, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8442211055276382, |
|
"grad_norm": 0.20364007353782654, |
|
"learning_rate": 1.059640960111759e-05, |
|
"loss": 1.1004, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.8449389806173726, |
|
"grad_norm": 0.511143684387207, |
|
"learning_rate": 1.0591060702954757e-05, |
|
"loss": 1.0001, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8456568557071069, |
|
"grad_norm": 0.4864940941333771, |
|
"learning_rate": 1.0585734391594045e-05, |
|
"loss": 1.0297, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8463747307968413, |
|
"grad_norm": 0.21658964455127716, |
|
"learning_rate": 1.0580430694321918e-05, |
|
"loss": 0.9969, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8470926058865758, |
|
"grad_norm": 0.5197054743766785, |
|
"learning_rate": 1.0575149638308983e-05, |
|
"loss": 1.0246, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8478104809763102, |
|
"grad_norm": 0.44801488518714905, |
|
"learning_rate": 1.0569891250609858e-05, |
|
"loss": 1.1387, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.8485283560660445, |
|
"grad_norm": 1.1570936441421509, |
|
"learning_rate": 1.0564655558163032e-05, |
|
"loss": 1.3054, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.8492462311557789, |
|
"grad_norm": 0.33803439140319824, |
|
"learning_rate": 1.0559442587790727e-05, |
|
"loss": 1.0835, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.8499641062455133, |
|
"grad_norm": 0.2663414776325226, |
|
"learning_rate": 1.0554252366198759e-05, |
|
"loss": 1.0631, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8506819813352476, |
|
"grad_norm": 0.8476252555847168, |
|
"learning_rate": 1.0549084919976409e-05, |
|
"loss": 1.3256, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8513998564249821, |
|
"grad_norm": 0.30568942427635193, |
|
"learning_rate": 1.0543940275596274e-05, |
|
"loss": 1.0559, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.8521177315147165, |
|
"grad_norm": 2.704880714416504, |
|
"learning_rate": 1.0538818459414139e-05, |
|
"loss": 1.0905, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.8528356066044508, |
|
"grad_norm": 0.22311435639858246, |
|
"learning_rate": 1.0533719497668846e-05, |
|
"loss": 1.06, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.8535534816941852, |
|
"grad_norm": 0.2369927316904068, |
|
"learning_rate": 1.0528643416482152e-05, |
|
"loss": 1.0359, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.8542713567839196, |
|
"grad_norm": 0.29164135456085205, |
|
"learning_rate": 1.0523590241858597e-05, |
|
"loss": 1.0999, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8549892318736539, |
|
"grad_norm": 0.23516468703746796, |
|
"learning_rate": 1.0518559999685371e-05, |
|
"loss": 1.0599, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.8557071069633884, |
|
"grad_norm": 0.332119345664978, |
|
"learning_rate": 1.0513552715732186e-05, |
|
"loss": 1.1189, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.8564249820531228, |
|
"grad_norm": 0.23043331503868103, |
|
"learning_rate": 1.0508568415651135e-05, |
|
"loss": 0.9978, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.2612220048904419, |
|
"learning_rate": 1.0503607124976569e-05, |
|
"loss": 1.0405, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.8578607322325915, |
|
"grad_norm": 0.27752968668937683, |
|
"learning_rate": 1.0498668869124962e-05, |
|
"loss": 1.0965, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.8585786073223259, |
|
"grad_norm": 0.22212113440036774, |
|
"learning_rate": 1.0493753673394782e-05, |
|
"loss": 1.0576, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.8592964824120602, |
|
"grad_norm": 0.24385105073451996, |
|
"learning_rate": 1.0488861562966361e-05, |
|
"loss": 1.0124, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.8600143575017947, |
|
"grad_norm": 0.419596403837204, |
|
"learning_rate": 1.0483992562901765e-05, |
|
"loss": 1.0921, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.8607322325915291, |
|
"grad_norm": 0.39705750346183777, |
|
"learning_rate": 1.0479146698144667e-05, |
|
"loss": 1.0565, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.8614501076812635, |
|
"grad_norm": 0.2379753440618515, |
|
"learning_rate": 1.0474323993520216e-05, |
|
"loss": 1.0736, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8621679827709978, |
|
"grad_norm": 16.06524658203125, |
|
"learning_rate": 1.0469524473734922e-05, |
|
"loss": 1.08, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.8628858578607322, |
|
"grad_norm": 0.5211506485939026, |
|
"learning_rate": 1.046474816337651e-05, |
|
"loss": 1.1859, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.8636037329504667, |
|
"grad_norm": 0.3241841197013855, |
|
"learning_rate": 1.0459995086913808e-05, |
|
"loss": 1.0459, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.864321608040201, |
|
"grad_norm": 0.4102221429347992, |
|
"learning_rate": 1.0455265268696617e-05, |
|
"loss": 1.1504, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.8650394831299354, |
|
"grad_norm": 0.48446452617645264, |
|
"learning_rate": 1.0450558732955591e-05, |
|
"loss": 1.2409, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.8657573582196698, |
|
"grad_norm": 0.32923561334609985, |
|
"learning_rate": 1.0445875503802103e-05, |
|
"loss": 1.086, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.8664752333094041, |
|
"grad_norm": 0.23721420764923096, |
|
"learning_rate": 1.0441215605228133e-05, |
|
"loss": 1.0702, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.8671931083991385, |
|
"grad_norm": 0.2165788859128952, |
|
"learning_rate": 1.043657906110613e-05, |
|
"loss": 1.0695, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.867910983488873, |
|
"grad_norm": 0.4671003520488739, |
|
"learning_rate": 1.0431965895188912e-05, |
|
"loss": 1.0955, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.8686288585786073, |
|
"grad_norm": 0.28291141986846924, |
|
"learning_rate": 1.0427376131109522e-05, |
|
"loss": 1.078, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8693467336683417, |
|
"grad_norm": 1.1975141763687134, |
|
"learning_rate": 1.0422809792381119e-05, |
|
"loss": 1.0479, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.8700646087580761, |
|
"grad_norm": 0.21140481531620026, |
|
"learning_rate": 1.0418266902396856e-05, |
|
"loss": 1.0283, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.8707824838478104, |
|
"grad_norm": 0.22778762876987457, |
|
"learning_rate": 1.0413747484429762e-05, |
|
"loss": 1.0478, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.8715003589375449, |
|
"grad_norm": 0.21997831761837006, |
|
"learning_rate": 1.0409251561632614e-05, |
|
"loss": 1.0211, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.8722182340272793, |
|
"grad_norm": 0.2546640932559967, |
|
"learning_rate": 1.0404779157037823e-05, |
|
"loss": 0.9999, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.8729361091170137, |
|
"grad_norm": 0.44324609637260437, |
|
"learning_rate": 1.040033029355733e-05, |
|
"loss": 1.2576, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.873653984206748, |
|
"grad_norm": 0.39237719774246216, |
|
"learning_rate": 1.0395904993982458e-05, |
|
"loss": 1.0859, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.8743718592964824, |
|
"grad_norm": 0.23994913697242737, |
|
"learning_rate": 1.0391503280983826e-05, |
|
"loss": 1.1024, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.8750897343862168, |
|
"grad_norm": 0.19337226450443268, |
|
"learning_rate": 1.0387125177111217e-05, |
|
"loss": 1.1061, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.8758076094759512, |
|
"grad_norm": 0.24154868721961975, |
|
"learning_rate": 1.0382770704793464e-05, |
|
"loss": 1.0196, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8765254845656856, |
|
"grad_norm": 0.36926552653312683, |
|
"learning_rate": 1.0378439886338336e-05, |
|
"loss": 1.1087, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.87724335965542, |
|
"grad_norm": 0.3358927369117737, |
|
"learning_rate": 1.0374132743932424e-05, |
|
"loss": 1.0951, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.8779612347451543, |
|
"grad_norm": 0.22058887779712677, |
|
"learning_rate": 1.0369849299641033e-05, |
|
"loss": 0.9991, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.8786791098348887, |
|
"grad_norm": 0.6595388054847717, |
|
"learning_rate": 1.036558957540806e-05, |
|
"loss": 1.2939, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.8793969849246231, |
|
"grad_norm": 0.2563266158103943, |
|
"learning_rate": 1.0361353593055878e-05, |
|
"loss": 0.9876, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8801148600143575, |
|
"grad_norm": 0.24463306367397308, |
|
"learning_rate": 1.0357141374285248e-05, |
|
"loss": 1.1054, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.8808327351040919, |
|
"grad_norm": 0.4125424027442932, |
|
"learning_rate": 1.0352952940675177e-05, |
|
"loss": 1.1515, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.8815506101938263, |
|
"grad_norm": 0.2915627360343933, |
|
"learning_rate": 1.0348788313682823e-05, |
|
"loss": 1.069, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.8822684852835606, |
|
"grad_norm": 0.3884046673774719, |
|
"learning_rate": 1.0344647514643391e-05, |
|
"loss": 1.0746, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.882986360373295, |
|
"grad_norm": 0.21901565790176392, |
|
"learning_rate": 1.0340530564770011e-05, |
|
"loss": 1.0689, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8837042354630295, |
|
"grad_norm": 0.24854253232479095, |
|
"learning_rate": 1.0336437485153634e-05, |
|
"loss": 1.0305, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.8844221105527639, |
|
"grad_norm": 0.2502192556858063, |
|
"learning_rate": 1.0332368296762933e-05, |
|
"loss": 1.0243, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.8851399856424982, |
|
"grad_norm": 0.7768297791481018, |
|
"learning_rate": 1.0328323020444176e-05, |
|
"loss": 1.5945, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.8858578607322326, |
|
"grad_norm": 0.4446689188480377, |
|
"learning_rate": 1.0324301676921138e-05, |
|
"loss": 1.1623, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.886575735821967, |
|
"grad_norm": 0.2070520520210266, |
|
"learning_rate": 1.0320304286794983e-05, |
|
"loss": 1.0624, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.8872936109117013, |
|
"grad_norm": 0.5530170202255249, |
|
"learning_rate": 1.0316330870544165e-05, |
|
"loss": 1.1009, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.8880114860014358, |
|
"grad_norm": 0.22923225164413452, |
|
"learning_rate": 1.0312381448524325e-05, |
|
"loss": 1.0953, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.8887293610911702, |
|
"grad_norm": 0.5102125406265259, |
|
"learning_rate": 1.030845604096817e-05, |
|
"loss": 1.178, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.8894472361809045, |
|
"grad_norm": 0.25947073101997375, |
|
"learning_rate": 1.03045546679854e-05, |
|
"loss": 1.0017, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.8901651112706389, |
|
"grad_norm": 0.2402765452861786, |
|
"learning_rate": 1.0300677349562569e-05, |
|
"loss": 1.0853, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8908829863603733, |
|
"grad_norm": 0.2136669158935547, |
|
"learning_rate": 1.0296824105563014e-05, |
|
"loss": 1.0527, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.8916008614501076, |
|
"grad_norm": 0.7674557566642761, |
|
"learning_rate": 1.0292994955726734e-05, |
|
"loss": 1.1384, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.8923187365398421, |
|
"grad_norm": 0.20754656195640564, |
|
"learning_rate": 1.0289189919670293e-05, |
|
"loss": 0.9825, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.8930366116295765, |
|
"grad_norm": 0.4037124216556549, |
|
"learning_rate": 1.0285409016886723e-05, |
|
"loss": 1.0195, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.8937544867193108, |
|
"grad_norm": 0.23209604620933533, |
|
"learning_rate": 1.0281652266745426e-05, |
|
"loss": 1.0857, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8944723618090452, |
|
"grad_norm": 0.4080345928668976, |
|
"learning_rate": 1.0277919688492063e-05, |
|
"loss": 1.1215, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.8951902368987796, |
|
"grad_norm": 0.6738815307617188, |
|
"learning_rate": 1.0274211301248476e-05, |
|
"loss": 1.0, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.8959081119885139, |
|
"grad_norm": 0.4783581495285034, |
|
"learning_rate": 1.0270527124012562e-05, |
|
"loss": 1.2057, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.8966259870782484, |
|
"grad_norm": 0.7883797287940979, |
|
"learning_rate": 1.0266867175658202e-05, |
|
"loss": 1.2773, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.8973438621679828, |
|
"grad_norm": 0.2835233509540558, |
|
"learning_rate": 1.026323147493515e-05, |
|
"loss": 1.0791, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8980617372577172, |
|
"grad_norm": 0.29707083106040955, |
|
"learning_rate": 1.0259620040468941e-05, |
|
"loss": 1.0785, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.8987796123474515, |
|
"grad_norm": 0.24541136622428894, |
|
"learning_rate": 1.0256032890760795e-05, |
|
"loss": 1.023, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.8994974874371859, |
|
"grad_norm": 0.2777314782142639, |
|
"learning_rate": 1.0252470044187522e-05, |
|
"loss": 1.0646, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.9002153625269204, |
|
"grad_norm": 0.21893638372421265, |
|
"learning_rate": 1.024893151900143e-05, |
|
"loss": 1.0038, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.9009332376166547, |
|
"grad_norm": 0.2998167872428894, |
|
"learning_rate": 1.0245417333330225e-05, |
|
"loss": 1.0233, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.9016511127063891, |
|
"grad_norm": 0.2272305190563202, |
|
"learning_rate": 1.024192750517693e-05, |
|
"loss": 1.0, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.9023689877961235, |
|
"grad_norm": 1.0981707572937012, |
|
"learning_rate": 1.0238462052419775e-05, |
|
"loss": 1.0438, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.9030868628858578, |
|
"grad_norm": 0.25254082679748535, |
|
"learning_rate": 1.0235020992812128e-05, |
|
"loss": 1.078, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.9038047379755922, |
|
"grad_norm": 0.4673773944377899, |
|
"learning_rate": 1.0231604343982382e-05, |
|
"loss": 1.1441, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.9045226130653267, |
|
"grad_norm": 0.26296040415763855, |
|
"learning_rate": 1.0228212123433882e-05, |
|
"loss": 0.9469, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.905240488155061, |
|
"grad_norm": 0.3453957140445709, |
|
"learning_rate": 1.022484434854482e-05, |
|
"loss": 1.061, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.9059583632447954, |
|
"grad_norm": 0.21403883397579193, |
|
"learning_rate": 1.0221501036568164e-05, |
|
"loss": 1.0177, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.9066762383345298, |
|
"grad_norm": 0.2306700348854065, |
|
"learning_rate": 1.0218182204631551e-05, |
|
"loss": 0.9859, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.9073941134242641, |
|
"grad_norm": 0.25937196612358093, |
|
"learning_rate": 1.0214887869737212e-05, |
|
"loss": 1.0946, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.9081119885139985, |
|
"grad_norm": 0.3202129602432251, |
|
"learning_rate": 1.0211618048761879e-05, |
|
"loss": 1.1946, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.908829863603733, |
|
"grad_norm": 0.24940723180770874, |
|
"learning_rate": 1.0208372758456702e-05, |
|
"loss": 1.025, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.9095477386934674, |
|
"grad_norm": 0.22642754018306732, |
|
"learning_rate": 1.0205152015447162e-05, |
|
"loss": 1.0356, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.9102656137832017, |
|
"grad_norm": 0.23893611133098602, |
|
"learning_rate": 1.020195583623298e-05, |
|
"loss": 1.0562, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.9109834888729361, |
|
"grad_norm": 0.46950802206993103, |
|
"learning_rate": 1.0198784237188042e-05, |
|
"loss": 1.0755, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.9117013639626705, |
|
"grad_norm": 0.2589515745639801, |
|
"learning_rate": 1.0195637234560314e-05, |
|
"loss": 1.0922, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9124192390524049, |
|
"grad_norm": 0.2856101095676422, |
|
"learning_rate": 1.0192514844471751e-05, |
|
"loss": 1.0005, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.9131371141421393, |
|
"grad_norm": 0.25440752506256104, |
|
"learning_rate": 1.0189417082918216e-05, |
|
"loss": 1.039, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.9138549892318737, |
|
"grad_norm": 0.39413267374038696, |
|
"learning_rate": 1.0186343965769411e-05, |
|
"loss": 1.0912, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.914572864321608, |
|
"grad_norm": 0.27434098720550537, |
|
"learning_rate": 1.0183295508768775e-05, |
|
"loss": 0.9959, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.9152907394113424, |
|
"grad_norm": 0.23922573029994965, |
|
"learning_rate": 1.0180271727533424e-05, |
|
"loss": 1.0208, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.9160086145010768, |
|
"grad_norm": 0.5556219220161438, |
|
"learning_rate": 1.0177272637554052e-05, |
|
"loss": 1.2056, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.9167264895908112, |
|
"grad_norm": 0.19860997796058655, |
|
"learning_rate": 1.0174298254194868e-05, |
|
"loss": 0.9879, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.9174443646805456, |
|
"grad_norm": 0.2307528555393219, |
|
"learning_rate": 1.0171348592693507e-05, |
|
"loss": 1.0108, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.91816223977028, |
|
"grad_norm": 0.5415950417518616, |
|
"learning_rate": 1.0168423668160951e-05, |
|
"loss": 1.1183, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.9188801148600143, |
|
"grad_norm": 0.21727311611175537, |
|
"learning_rate": 1.0165523495581465e-05, |
|
"loss": 1.0008, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9195979899497487, |
|
"grad_norm": 0.6700248718261719, |
|
"learning_rate": 1.0162648089812504e-05, |
|
"loss": 1.3594, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9203158650394831, |
|
"grad_norm": 0.25267294049263, |
|
"learning_rate": 1.0159797465584642e-05, |
|
"loss": 1.0131, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.9210337401292176, |
|
"grad_norm": 0.39105260372161865, |
|
"learning_rate": 1.0156971637501508e-05, |
|
"loss": 0.9928, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.9217516152189519, |
|
"grad_norm": 0.32855525612831116, |
|
"learning_rate": 1.015417062003969e-05, |
|
"loss": 1.0152, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.9224694903086863, |
|
"grad_norm": 0.21902427077293396, |
|
"learning_rate": 1.0151394427548677e-05, |
|
"loss": 1.1189, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.9231873653984207, |
|
"grad_norm": 0.5886435508728027, |
|
"learning_rate": 1.0148643074250783e-05, |
|
"loss": 1.2409, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.923905240488155, |
|
"grad_norm": 0.3337082862854004, |
|
"learning_rate": 1.014591657424107e-05, |
|
"loss": 0.9816, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.9246231155778895, |
|
"grad_norm": 0.22968193888664246, |
|
"learning_rate": 1.014321494148728e-05, |
|
"loss": 1.0607, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.9253409906676239, |
|
"grad_norm": 0.5318540334701538, |
|
"learning_rate": 1.0140538189829754e-05, |
|
"loss": 1.1525, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.9260588657573582, |
|
"grad_norm": 0.20607882738113403, |
|
"learning_rate": 1.0137886332981374e-05, |
|
"loss": 1.0562, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9267767408470926, |
|
"grad_norm": 0.23362112045288086, |
|
"learning_rate": 1.0135259384527487e-05, |
|
"loss": 1.0274, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.927494615936827, |
|
"grad_norm": 0.3322995603084564, |
|
"learning_rate": 1.0132657357925835e-05, |
|
"loss": 1.0506, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.9282124910265613, |
|
"grad_norm": 0.23184844851493835, |
|
"learning_rate": 1.0130080266506486e-05, |
|
"loss": 1.074, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.9289303661162958, |
|
"grad_norm": 0.2552621066570282, |
|
"learning_rate": 1.0127528123471767e-05, |
|
"loss": 1.0205, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.9296482412060302, |
|
"grad_norm": 0.588615357875824, |
|
"learning_rate": 1.0125000941896191e-05, |
|
"loss": 1.077, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9303661162957645, |
|
"grad_norm": 0.2588033676147461, |
|
"learning_rate": 1.0122498734726398e-05, |
|
"loss": 0.9751, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.9310839913854989, |
|
"grad_norm": 0.32702207565307617, |
|
"learning_rate": 1.0120021514781091e-05, |
|
"loss": 1.0659, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.9318018664752333, |
|
"grad_norm": 0.3707316517829895, |
|
"learning_rate": 1.0117569294750953e-05, |
|
"loss": 1.1125, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9325197415649676, |
|
"grad_norm": 0.44583168625831604, |
|
"learning_rate": 1.0115142087198602e-05, |
|
"loss": 0.9946, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9332376166547021, |
|
"grad_norm": 0.4182652533054352, |
|
"learning_rate": 1.0112739904558513e-05, |
|
"loss": 1.0689, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9339554917444365, |
|
"grad_norm": 0.6997318267822266, |
|
"learning_rate": 1.0110362759136967e-05, |
|
"loss": 1.2802, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.9346733668341709, |
|
"grad_norm": 0.22859402000904083, |
|
"learning_rate": 1.0108010663111968e-05, |
|
"loss": 1.019, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.9353912419239052, |
|
"grad_norm": 0.42435020208358765, |
|
"learning_rate": 1.010568362853321e-05, |
|
"loss": 1.1617, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9361091170136396, |
|
"grad_norm": 0.22650395333766937, |
|
"learning_rate": 1.010338166732198e-05, |
|
"loss": 1.069, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9368269921033741, |
|
"grad_norm": 0.3500579595565796, |
|
"learning_rate": 1.010110479127113e-05, |
|
"loss": 1.1423, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9375448671931084, |
|
"grad_norm": 0.6740381121635437, |
|
"learning_rate": 1.0098853012044994e-05, |
|
"loss": 1.154, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.9382627422828428, |
|
"grad_norm": 0.25104475021362305, |
|
"learning_rate": 1.0096626341179337e-05, |
|
"loss": 1.0328, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.9389806173725772, |
|
"grad_norm": 0.26676034927368164, |
|
"learning_rate": 1.0094424790081296e-05, |
|
"loss": 1.0511, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9396984924623115, |
|
"grad_norm": 0.30099838972091675, |
|
"learning_rate": 1.0092248370029322e-05, |
|
"loss": 1.0397, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.9404163675520459, |
|
"grad_norm": 0.30963248014450073, |
|
"learning_rate": 1.009009709217312e-05, |
|
"loss": 1.0699, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9411342426417804, |
|
"grad_norm": 0.22851693630218506, |
|
"learning_rate": 1.0087970967533588e-05, |
|
"loss": 1.0638, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9418521177315147, |
|
"grad_norm": 0.4070218503475189, |
|
"learning_rate": 1.008587000700277e-05, |
|
"loss": 1.0829, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.9425699928212491, |
|
"grad_norm": 0.2321423441171646, |
|
"learning_rate": 1.0083794221343794e-05, |
|
"loss": 1.0626, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9432878679109835, |
|
"grad_norm": 0.4742386043071747, |
|
"learning_rate": 1.0081743621190822e-05, |
|
"loss": 1.0692, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9440057430007178, |
|
"grad_norm": 0.199398010969162, |
|
"learning_rate": 1.0079718217048988e-05, |
|
"loss": 1.1122, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9447236180904522, |
|
"grad_norm": 0.22731392085552216, |
|
"learning_rate": 1.0077718019294348e-05, |
|
"loss": 1.115, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9454414931801867, |
|
"grad_norm": 0.24799813330173492, |
|
"learning_rate": 1.0075743038173823e-05, |
|
"loss": 1.0634, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.9461593682699211, |
|
"grad_norm": 0.4763537049293518, |
|
"learning_rate": 1.007379328380516e-05, |
|
"loss": 1.0495, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.9468772433596554, |
|
"grad_norm": 0.28331029415130615, |
|
"learning_rate": 1.007186876617686e-05, |
|
"loss": 1.0004, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.9475951184493898, |
|
"grad_norm": 0.32906895875930786, |
|
"learning_rate": 1.0069969495148146e-05, |
|
"loss": 1.0571, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9483129935391242, |
|
"grad_norm": 0.23255948722362518, |
|
"learning_rate": 1.0068095480448896e-05, |
|
"loss": 0.9746, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.9490308686288585, |
|
"grad_norm": 0.22251664102077484, |
|
"learning_rate": 1.0066246731679609e-05, |
|
"loss": 1.112, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.949748743718593, |
|
"grad_norm": 0.35669928789138794, |
|
"learning_rate": 1.0064423258311345e-05, |
|
"loss": 1.0062, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.9504666188083274, |
|
"grad_norm": 0.28206053376197815, |
|
"learning_rate": 1.0062625069685673e-05, |
|
"loss": 1.0383, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.9511844938980617, |
|
"grad_norm": 0.4400699734687805, |
|
"learning_rate": 1.0060852175014635e-05, |
|
"loss": 1.1769, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9519023689877961, |
|
"grad_norm": 0.3857263922691345, |
|
"learning_rate": 1.0059104583380692e-05, |
|
"loss": 1.1565, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.9526202440775305, |
|
"grad_norm": 0.2161598801612854, |
|
"learning_rate": 1.0057382303736683e-05, |
|
"loss": 0.9872, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.9533381191672649, |
|
"grad_norm": 1.8909188508987427, |
|
"learning_rate": 1.005568534490577e-05, |
|
"loss": 1.5161, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.9540559942569993, |
|
"grad_norm": 0.2845224142074585, |
|
"learning_rate": 1.0054013715581394e-05, |
|
"loss": 1.0019, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.9547738693467337, |
|
"grad_norm": 0.45807337760925293, |
|
"learning_rate": 1.0052367424327244e-05, |
|
"loss": 1.114, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.955491744436468, |
|
"grad_norm": 0.24572716653347015, |
|
"learning_rate": 1.0050746479577194e-05, |
|
"loss": 1.0517, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.9562096195262024, |
|
"grad_norm": 0.22645676136016846, |
|
"learning_rate": 1.0049150889635272e-05, |
|
"loss": 1.0494, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.9569274946159368, |
|
"grad_norm": 0.6566616296768188, |
|
"learning_rate": 1.004758066267562e-05, |
|
"loss": 1.1728, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.9576453697056713, |
|
"grad_norm": 0.20157983899116516, |
|
"learning_rate": 1.0046035806742438e-05, |
|
"loss": 1.0669, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.9583632447954056, |
|
"grad_norm": 0.5226853489875793, |
|
"learning_rate": 1.0044516329749954e-05, |
|
"loss": 1.1016, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.95908111988514, |
|
"grad_norm": 0.5047028660774231, |
|
"learning_rate": 1.0043022239482385e-05, |
|
"loss": 1.1733, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.9597989949748744, |
|
"grad_norm": 0.290243536233902, |
|
"learning_rate": 1.0041553543593887e-05, |
|
"loss": 1.0053, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.9605168700646087, |
|
"grad_norm": 0.4698556065559387, |
|
"learning_rate": 1.0040110249608527e-05, |
|
"loss": 1.0902, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.9612347451543432, |
|
"grad_norm": 0.23319129645824432, |
|
"learning_rate": 1.0038692364920234e-05, |
|
"loss": 1.0855, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.9619526202440776, |
|
"grad_norm": 0.37685346603393555, |
|
"learning_rate": 1.0037299896792772e-05, |
|
"loss": 0.9907, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9626704953338119, |
|
"grad_norm": 0.22720623016357422, |
|
"learning_rate": 1.0035932852359691e-05, |
|
"loss": 1.0305, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.9633883704235463, |
|
"grad_norm": 0.6094557642936707, |
|
"learning_rate": 1.00345912386243e-05, |
|
"loss": 1.1798, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.9641062455132807, |
|
"grad_norm": 0.23081520199775696, |
|
"learning_rate": 1.003327506245963e-05, |
|
"loss": 0.9901, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.964824120603015, |
|
"grad_norm": 0.24824342131614685, |
|
"learning_rate": 1.0031984330608389e-05, |
|
"loss": 1.0054, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.9655419956927495, |
|
"grad_norm": 0.5930601954460144, |
|
"learning_rate": 1.0030719049682942e-05, |
|
"loss": 1.2524, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.9662598707824839, |
|
"grad_norm": 0.21535564959049225, |
|
"learning_rate": 1.0029479226165268e-05, |
|
"loss": 1.0293, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.9669777458722182, |
|
"grad_norm": 0.26974886655807495, |
|
"learning_rate": 1.0028264866406929e-05, |
|
"loss": 1.0031, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.9676956209619526, |
|
"grad_norm": 0.3035675287246704, |
|
"learning_rate": 1.0027075976629035e-05, |
|
"loss": 1.0578, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.968413496051687, |
|
"grad_norm": 0.3711431324481964, |
|
"learning_rate": 1.0025912562922216e-05, |
|
"loss": 1.1601, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.9691313711414213, |
|
"grad_norm": 0.24194128811359406, |
|
"learning_rate": 1.002477463124659e-05, |
|
"loss": 1.0591, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9698492462311558, |
|
"grad_norm": 0.20900464057922363, |
|
"learning_rate": 1.0023662187431731e-05, |
|
"loss": 1.0051, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.9705671213208902, |
|
"grad_norm": 0.28793370723724365, |
|
"learning_rate": 1.0022575237176638e-05, |
|
"loss": 0.969, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.9712849964106246, |
|
"grad_norm": 0.5335143208503723, |
|
"learning_rate": 1.0021513786049712e-05, |
|
"loss": 1.1496, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.9720028715003589, |
|
"grad_norm": 0.23671367764472961, |
|
"learning_rate": 1.0020477839488718e-05, |
|
"loss": 0.9809, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.9727207465900933, |
|
"grad_norm": 0.2911614775657654, |
|
"learning_rate": 1.001946740280077e-05, |
|
"loss": 1.0569, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.9734386216798278, |
|
"grad_norm": 0.18712709844112396, |
|
"learning_rate": 1.0018482481162282e-05, |
|
"loss": 1.028, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.9741564967695621, |
|
"grad_norm": 0.2784719467163086, |
|
"learning_rate": 1.001752307961897e-05, |
|
"loss": 1.0851, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.9748743718592965, |
|
"grad_norm": 0.2440050095319748, |
|
"learning_rate": 1.0016589203085805e-05, |
|
"loss": 0.997, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.9755922469490309, |
|
"grad_norm": 0.4130586087703705, |
|
"learning_rate": 1.0015680856346996e-05, |
|
"loss": 1.1313, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.9763101220387652, |
|
"grad_norm": 0.5140645503997803, |
|
"learning_rate": 1.0014798044055963e-05, |
|
"loss": 1.1613, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9770279971284996, |
|
"grad_norm": 0.3041968047618866, |
|
"learning_rate": 1.0013940770735313e-05, |
|
"loss": 1.0595, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.9777458722182341, |
|
"grad_norm": 0.47146379947662354, |
|
"learning_rate": 1.001310904077682e-05, |
|
"loss": 1.1859, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.9784637473079684, |
|
"grad_norm": 0.2464003711938858, |
|
"learning_rate": 1.0012302858441401e-05, |
|
"loss": 1.0211, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.9791816223977028, |
|
"grad_norm": 0.2065819799900055, |
|
"learning_rate": 1.0011522227859094e-05, |
|
"loss": 1.0402, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.9798994974874372, |
|
"grad_norm": 0.2763268053531647, |
|
"learning_rate": 1.001076715302903e-05, |
|
"loss": 1.0706, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.9806173725771715, |
|
"grad_norm": 0.20548270642757416, |
|
"learning_rate": 1.0010037637819431e-05, |
|
"loss": 1.0235, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.9813352476669059, |
|
"grad_norm": 0.2394370138645172, |
|
"learning_rate": 1.0009333685967568e-05, |
|
"loss": 0.9968, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.9820531227566404, |
|
"grad_norm": 0.2873808443546295, |
|
"learning_rate": 1.0008655301079755e-05, |
|
"loss": 1.1086, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.9827709978463748, |
|
"grad_norm": 0.23559843003749847, |
|
"learning_rate": 1.0008002486631328e-05, |
|
"loss": 1.0684, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.9834888729361091, |
|
"grad_norm": 0.28017380833625793, |
|
"learning_rate": 1.0007375245966625e-05, |
|
"loss": 1.1085, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9842067480258435, |
|
"grad_norm": 0.27574074268341064, |
|
"learning_rate": 1.0006773582298974e-05, |
|
"loss": 1.0373, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.9849246231155779, |
|
"grad_norm": 0.2589809000492096, |
|
"learning_rate": 1.0006197498710674e-05, |
|
"loss": 1.0808, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.9856424982053122, |
|
"grad_norm": 0.8424662351608276, |
|
"learning_rate": 1.0005646998152973e-05, |
|
"loss": 1.1736, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.9863603732950467, |
|
"grad_norm": 0.1887986660003662, |
|
"learning_rate": 1.0005122083446064e-05, |
|
"loss": 1.028, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.9870782483847811, |
|
"grad_norm": 0.49798160791397095, |
|
"learning_rate": 1.0004622757279057e-05, |
|
"loss": 1.2697, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9877961234745154, |
|
"grad_norm": 0.6020227670669556, |
|
"learning_rate": 1.0004149022209982e-05, |
|
"loss": 1.1899, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.9885139985642498, |
|
"grad_norm": 0.3774351179599762, |
|
"learning_rate": 1.0003700880665761e-05, |
|
"loss": 1.101, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 0.9892318736539842, |
|
"grad_norm": 0.22604165971279144, |
|
"learning_rate": 1.0003278334942206e-05, |
|
"loss": 1.0164, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.9899497487437185, |
|
"grad_norm": 0.4209281802177429, |
|
"learning_rate": 1.0002881387203995e-05, |
|
"loss": 1.0941, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 0.990667623833453, |
|
"grad_norm": 0.22722220420837402, |
|
"learning_rate": 1.0002510039484682e-05, |
|
"loss": 1.0887, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9913854989231874, |
|
"grad_norm": 0.2620321810245514, |
|
"learning_rate": 1.0002164293686655e-05, |
|
"loss": 1.0303, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 0.9921033740129217, |
|
"grad_norm": 0.32566314935684204, |
|
"learning_rate": 1.0001844151581162e-05, |
|
"loss": 1.1484, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.9928212491026561, |
|
"grad_norm": 0.2737228274345398, |
|
"learning_rate": 1.0001549614808275e-05, |
|
"loss": 1.1292, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 0.9935391241923905, |
|
"grad_norm": 0.3287229835987091, |
|
"learning_rate": 1.0001280684876891e-05, |
|
"loss": 1.0606, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.994256999282125, |
|
"grad_norm": 0.28717947006225586, |
|
"learning_rate": 1.000103736316473e-05, |
|
"loss": 0.9971, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.9949748743718593, |
|
"grad_norm": 1.312623381614685, |
|
"learning_rate": 1.0000819650918314e-05, |
|
"loss": 1.1861, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.9956927494615937, |
|
"grad_norm": 0.27503907680511475, |
|
"learning_rate": 1.0000627549252978e-05, |
|
"loss": 1.04, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 0.9964106245513281, |
|
"grad_norm": 0.21305954456329346, |
|
"learning_rate": 1.0000461059152846e-05, |
|
"loss": 0.9879, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.9971284996410624, |
|
"grad_norm": 0.19111517071723938, |
|
"learning_rate": 1.0000320181470842e-05, |
|
"loss": 1.0412, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 0.9978463747307968, |
|
"grad_norm": 0.18863187730312347, |
|
"learning_rate": 1.0000204916928675e-05, |
|
"loss": 1.0658, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9985642498205313, |
|
"grad_norm": 0.560884952545166, |
|
"learning_rate": 1.000011526611684e-05, |
|
"loss": 1.1132, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 0.9992821249102656, |
|
"grad_norm": 0.23772947490215302, |
|
"learning_rate": 1.0000051229494616e-05, |
|
"loss": 0.9555, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3461064100265503, |
|
"learning_rate": 1.0000012807390056e-05, |
|
"loss": 1.0363, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1393, |
|
"total_flos": 2.538580367925838e+18, |
|
"train_loss": 1.1330506813586059, |
|
"train_runtime": 2210.1404, |
|
"train_samples_per_second": 40.323, |
|
"train_steps_per_second": 0.63 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1393, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.538580367925838e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|