|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994008388256441, |
|
"eval_steps": 500, |
|
"global_step": 1251, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000798881565807869, |
|
"grad_norm": 6.264526371887429, |
|
"learning_rate": 7.936507936507937e-08, |
|
"loss": 0.2446, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001597763131615738, |
|
"grad_norm": 6.308694064244479, |
|
"learning_rate": 1.5873015873015874e-07, |
|
"loss": 0.2338, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002396644697423607, |
|
"grad_norm": 5.832783152147895, |
|
"learning_rate": 2.3809523809523811e-07, |
|
"loss": 0.256, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003195526263231476, |
|
"grad_norm": 5.799948682363988, |
|
"learning_rate": 3.174603174603175e-07, |
|
"loss": 0.2472, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003994407829039345, |
|
"grad_norm": 5.976049807500368, |
|
"learning_rate": 3.9682539682539683e-07, |
|
"loss": 0.2477, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004793289394847214, |
|
"grad_norm": 5.519855873058023, |
|
"learning_rate": 4.7619047619047623e-07, |
|
"loss": 0.2579, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005592170960655083, |
|
"grad_norm": 4.19702291174845, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.2701, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006391052526462952, |
|
"grad_norm": 3.5584756391837984, |
|
"learning_rate": 6.34920634920635e-07, |
|
"loss": 0.237, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007189934092270821, |
|
"grad_norm": 3.2434211283881376, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.2415, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00798881565807869, |
|
"grad_norm": 3.137678886876266, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 0.2278, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00878769722388656, |
|
"grad_norm": 2.955592830949147, |
|
"learning_rate": 8.73015873015873e-07, |
|
"loss": 0.2283, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009586578789694428, |
|
"grad_norm": 2.1650634302297247, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 0.2207, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010385460355502297, |
|
"grad_norm": 2.150736904578608, |
|
"learning_rate": 1.0317460317460317e-06, |
|
"loss": 0.2141, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011184341921310166, |
|
"grad_norm": 2.1243728083407585, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.2189, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011983223487118035, |
|
"grad_norm": 2.1170944400671665, |
|
"learning_rate": 1.1904761904761906e-06, |
|
"loss": 0.2051, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012782105052925903, |
|
"grad_norm": 1.9460350133992896, |
|
"learning_rate": 1.26984126984127e-06, |
|
"loss": 0.2019, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013580986618733772, |
|
"grad_norm": 2.25981348368823, |
|
"learning_rate": 1.3492063492063493e-06, |
|
"loss": 0.203, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014379868184541641, |
|
"grad_norm": 2.0595030317939615, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.1913, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01517874975034951, |
|
"grad_norm": 1.801627566947741, |
|
"learning_rate": 1.507936507936508e-06, |
|
"loss": 0.1794, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01597763131615738, |
|
"grad_norm": 1.6285244682529882, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.1919, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.016776512881965248, |
|
"grad_norm": 1.3096573076000306, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.188, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01757539444777312, |
|
"grad_norm": 1.3145326993964916, |
|
"learning_rate": 1.746031746031746e-06, |
|
"loss": 0.1798, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.018374276013580985, |
|
"grad_norm": 1.4510146563932291, |
|
"learning_rate": 1.8253968253968254e-06, |
|
"loss": 0.1679, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.019173157579388856, |
|
"grad_norm": 1.5676004476506082, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 0.1689, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.019972039145196723, |
|
"grad_norm": 1.3717967151760737, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.166, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020770920711004594, |
|
"grad_norm": 1.2215402830411317, |
|
"learning_rate": 2.0634920634920634e-06, |
|
"loss": 0.1536, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02156980227681246, |
|
"grad_norm": 1.0936849815044172, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 0.1468, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02236868384262033, |
|
"grad_norm": 1.0938814113616944, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1515, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.023167565408428202, |
|
"grad_norm": 1.062505185068681, |
|
"learning_rate": 2.301587301587302e-06, |
|
"loss": 0.1402, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02396644697423607, |
|
"grad_norm": 0.9972504126476375, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 0.1444, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02476532854004394, |
|
"grad_norm": 1.1079316413916769, |
|
"learning_rate": 2.4603174603174605e-06, |
|
"loss": 0.1551, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.025564210105851807, |
|
"grad_norm": 1.1116202589649768, |
|
"learning_rate": 2.53968253968254e-06, |
|
"loss": 0.1523, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.026363091671659677, |
|
"grad_norm": 1.0526520024318986, |
|
"learning_rate": 2.6190476190476192e-06, |
|
"loss": 0.1416, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.027161973237467545, |
|
"grad_norm": 1.1059950145911168, |
|
"learning_rate": 2.6984126984126986e-06, |
|
"loss": 0.1345, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.027960854803275415, |
|
"grad_norm": 1.066947971257434, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.14, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.028759736369083282, |
|
"grad_norm": 1.1529644796723812, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.1318, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.029558617934891153, |
|
"grad_norm": 1.1073580581614721, |
|
"learning_rate": 2.936507936507937e-06, |
|
"loss": 0.1322, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03035749950069902, |
|
"grad_norm": 1.1121577182014704, |
|
"learning_rate": 3.015873015873016e-06, |
|
"loss": 0.1414, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03115638106650689, |
|
"grad_norm": 1.0598917305665783, |
|
"learning_rate": 3.0952380952380957e-06, |
|
"loss": 0.1329, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03195526263231476, |
|
"grad_norm": 1.134894863039634, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.1517, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03275414419812263, |
|
"grad_norm": 1.001409942797884, |
|
"learning_rate": 3.2539682539682544e-06, |
|
"loss": 0.1392, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.033553025763930495, |
|
"grad_norm": 0.9891396742941516, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.14, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03435190732973837, |
|
"grad_norm": 1.1266509544929115, |
|
"learning_rate": 3.412698412698413e-06, |
|
"loss": 0.1389, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03515078889554624, |
|
"grad_norm": 0.9628290753555916, |
|
"learning_rate": 3.492063492063492e-06, |
|
"loss": 0.1385, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.035949670461354104, |
|
"grad_norm": 0.9720867605907642, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 0.1262, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03674855202716197, |
|
"grad_norm": 1.0868944036474724, |
|
"learning_rate": 3.6507936507936507e-06, |
|
"loss": 0.1388, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.037547433592969845, |
|
"grad_norm": 1.3146063770283822, |
|
"learning_rate": 3.7301587301587305e-06, |
|
"loss": 0.1285, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03834631515877771, |
|
"grad_norm": 1.1651760656127519, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 0.1382, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03914519672458558, |
|
"grad_norm": 0.9773806832036162, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.1184, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.039944078290393446, |
|
"grad_norm": 0.9830874096474851, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.1234, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04074295985620132, |
|
"grad_norm": 1.3541297874851739, |
|
"learning_rate": 4.047619047619048e-06, |
|
"loss": 0.1308, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04154184142200919, |
|
"grad_norm": 0.970571662092641, |
|
"learning_rate": 4.126984126984127e-06, |
|
"loss": 0.1377, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.042340722987817055, |
|
"grad_norm": 1.2143547903735357, |
|
"learning_rate": 4.206349206349207e-06, |
|
"loss": 0.1237, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04313960455362492, |
|
"grad_norm": 1.0098454532215537, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.1231, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.043938486119432796, |
|
"grad_norm": 1.1067360434608624, |
|
"learning_rate": 4.365079365079366e-06, |
|
"loss": 0.137, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04473736768524066, |
|
"grad_norm": 1.0456018151170878, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.1328, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04553624925104853, |
|
"grad_norm": 0.9886332631374318, |
|
"learning_rate": 4.523809523809524e-06, |
|
"loss": 0.104, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.046335130816856404, |
|
"grad_norm": 1.0663992036951566, |
|
"learning_rate": 4.603174603174604e-06, |
|
"loss": 0.1241, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04713401238266427, |
|
"grad_norm": 1.2133014739756007, |
|
"learning_rate": 4.682539682539683e-06, |
|
"loss": 0.1246, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04793289394847214, |
|
"grad_norm": 1.130294064670252, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.1217, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.048731775514280005, |
|
"grad_norm": 1.087512054416334, |
|
"learning_rate": 4.841269841269842e-06, |
|
"loss": 0.1352, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04953065708008788, |
|
"grad_norm": 1.186584513621857, |
|
"learning_rate": 4.920634920634921e-06, |
|
"loss": 0.1266, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05032953864589575, |
|
"grad_norm": 0.9656151090360315, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1282, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.051128420211703614, |
|
"grad_norm": 1.0445397820159212, |
|
"learning_rate": 4.9999912586879515e-06, |
|
"loss": 0.1205, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05192730177751148, |
|
"grad_norm": 1.0626412035822264, |
|
"learning_rate": 4.999965034812934e-06, |
|
"loss": 0.119, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.052726183343319355, |
|
"grad_norm": 1.0884763391021568, |
|
"learning_rate": 4.999921328558333e-06, |
|
"loss": 0.122, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05352506490912722, |
|
"grad_norm": 1.1255928841636056, |
|
"learning_rate": 4.999860140229788e-06, |
|
"loss": 0.1317, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05432394647493509, |
|
"grad_norm": 1.021100809804264, |
|
"learning_rate": 4.9997814702551914e-06, |
|
"loss": 0.1196, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05512282804074296, |
|
"grad_norm": 0.9267329405198692, |
|
"learning_rate": 4.999685319184688e-06, |
|
"loss": 0.1328, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05592170960655083, |
|
"grad_norm": 1.0294211721447675, |
|
"learning_rate": 4.9995716876906654e-06, |
|
"loss": 0.1161, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0567205911723587, |
|
"grad_norm": 1.0626502091277237, |
|
"learning_rate": 4.999440576567755e-06, |
|
"loss": 0.1346, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.057519472738166565, |
|
"grad_norm": 0.9387601593971938, |
|
"learning_rate": 4.999291986732823e-06, |
|
"loss": 0.1253, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05831835430397444, |
|
"grad_norm": 0.9664307515868064, |
|
"learning_rate": 4.999125919224966e-06, |
|
"loss": 0.118, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.059117235869782306, |
|
"grad_norm": 0.9320980438153565, |
|
"learning_rate": 4.998942375205502e-06, |
|
"loss": 0.1181, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05991611743559017, |
|
"grad_norm": 1.087597810065219, |
|
"learning_rate": 4.998741355957963e-06, |
|
"loss": 0.1123, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06071499900139804, |
|
"grad_norm": 0.9940393858545528, |
|
"learning_rate": 4.998522862888088e-06, |
|
"loss": 0.1274, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.061513880567205914, |
|
"grad_norm": 0.9573078672504775, |
|
"learning_rate": 4.998286897523808e-06, |
|
"loss": 0.1207, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06231276213301378, |
|
"grad_norm": 1.0642550055625968, |
|
"learning_rate": 4.998033461515242e-06, |
|
"loss": 0.1286, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06311164369882165, |
|
"grad_norm": 1.051177584232888, |
|
"learning_rate": 4.99776255663468e-06, |
|
"loss": 0.1243, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06391052526462952, |
|
"grad_norm": 0.954997292990204, |
|
"learning_rate": 4.997474184776573e-06, |
|
"loss": 0.1128, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06470940683043738, |
|
"grad_norm": 0.8950215033759747, |
|
"learning_rate": 4.997168347957521e-06, |
|
"loss": 0.1059, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06550828839624526, |
|
"grad_norm": 1.0278639983929891, |
|
"learning_rate": 4.996845048316253e-06, |
|
"loss": 0.129, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06630716996205313, |
|
"grad_norm": 0.9125269501555735, |
|
"learning_rate": 4.996504288113624e-06, |
|
"loss": 0.1213, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06710605152786099, |
|
"grad_norm": 1.154599556719037, |
|
"learning_rate": 4.996146069732583e-06, |
|
"loss": 0.1153, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06790493309366886, |
|
"grad_norm": 0.9583233046394478, |
|
"learning_rate": 4.995770395678171e-06, |
|
"loss": 0.1303, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06870381465947674, |
|
"grad_norm": 0.956624411481031, |
|
"learning_rate": 4.995377268577495e-06, |
|
"loss": 0.1128, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0695026962252846, |
|
"grad_norm": 0.9303535332486583, |
|
"learning_rate": 4.994966691179712e-06, |
|
"loss": 0.1078, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07030157779109247, |
|
"grad_norm": 0.8445483716745527, |
|
"learning_rate": 4.994538666356009e-06, |
|
"loss": 0.1139, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07110045935690033, |
|
"grad_norm": 1.1838181005074246, |
|
"learning_rate": 4.994093197099587e-06, |
|
"loss": 0.1266, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07189934092270821, |
|
"grad_norm": 0.9212525379918269, |
|
"learning_rate": 4.993630286525634e-06, |
|
"loss": 0.1105, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07269822248851608, |
|
"grad_norm": 0.9557168849403959, |
|
"learning_rate": 4.993149937871306e-06, |
|
"loss": 0.1114, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07349710405432394, |
|
"grad_norm": 0.9335531131462851, |
|
"learning_rate": 4.992652154495706e-06, |
|
"loss": 0.1187, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07429598562013182, |
|
"grad_norm": 1.0684490615988076, |
|
"learning_rate": 4.992136939879857e-06, |
|
"loss": 0.1207, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07509486718593969, |
|
"grad_norm": 0.917221466030395, |
|
"learning_rate": 4.9916042976266795e-06, |
|
"loss": 0.1089, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07589374875174755, |
|
"grad_norm": 0.9102037919118495, |
|
"learning_rate": 4.991054231460969e-06, |
|
"loss": 0.1173, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07669263031755542, |
|
"grad_norm": 0.9957426271614526, |
|
"learning_rate": 4.990486745229364e-06, |
|
"loss": 0.1282, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0774915118833633, |
|
"grad_norm": 1.0340400584386213, |
|
"learning_rate": 4.989901842900326e-06, |
|
"loss": 0.1224, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07829039344917116, |
|
"grad_norm": 0.8694305718065952, |
|
"learning_rate": 4.989299528564103e-06, |
|
"loss": 0.1115, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07908927501497903, |
|
"grad_norm": 1.0607942211887487, |
|
"learning_rate": 4.988679806432712e-06, |
|
"loss": 0.1098, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07988815658078689, |
|
"grad_norm": 0.9482051164342659, |
|
"learning_rate": 4.9880426808398986e-06, |
|
"loss": 0.1089, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08068703814659477, |
|
"grad_norm": 1.1060592404442136, |
|
"learning_rate": 4.987388156241115e-06, |
|
"loss": 0.1071, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08148591971240264, |
|
"grad_norm": 1.0630018016859446, |
|
"learning_rate": 4.986716237213484e-06, |
|
"loss": 0.13, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0822848012782105, |
|
"grad_norm": 0.980659330827137, |
|
"learning_rate": 4.986026928455767e-06, |
|
"loss": 0.1163, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.08308368284401838, |
|
"grad_norm": 1.0796295507919234, |
|
"learning_rate": 4.985320234788337e-06, |
|
"loss": 0.1234, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08388256440982625, |
|
"grad_norm": 1.0235556044811625, |
|
"learning_rate": 4.9845961611531356e-06, |
|
"loss": 0.1082, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08468144597563411, |
|
"grad_norm": 1.0228391219961424, |
|
"learning_rate": 4.983854712613647e-06, |
|
"loss": 0.1168, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08548032754144198, |
|
"grad_norm": 0.9963769291850916, |
|
"learning_rate": 4.983095894354858e-06, |
|
"loss": 0.0995, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08627920910724984, |
|
"grad_norm": 0.9258045063883333, |
|
"learning_rate": 4.982319711683221e-06, |
|
"loss": 0.1138, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08707809067305772, |
|
"grad_norm": 0.9711841814401072, |
|
"learning_rate": 4.981526170026621e-06, |
|
"loss": 0.1168, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08787697223886559, |
|
"grad_norm": 0.8855803559568926, |
|
"learning_rate": 4.980715274934334e-06, |
|
"loss": 0.1087, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08867585380467345, |
|
"grad_norm": 0.8370807746302283, |
|
"learning_rate": 4.9798870320769884e-06, |
|
"loss": 0.1144, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08947473537048133, |
|
"grad_norm": 0.9128597902530756, |
|
"learning_rate": 4.97904144724653e-06, |
|
"loss": 0.1148, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0902736169362892, |
|
"grad_norm": 0.8855921084987907, |
|
"learning_rate": 4.978178526356173e-06, |
|
"loss": 0.115, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09107249850209706, |
|
"grad_norm": 0.8484966494474456, |
|
"learning_rate": 4.977298275440368e-06, |
|
"loss": 0.1131, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09187138006790493, |
|
"grad_norm": 0.9057924487387431, |
|
"learning_rate": 4.976400700654752e-06, |
|
"loss": 0.1192, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09267026163371281, |
|
"grad_norm": 0.8833176940743735, |
|
"learning_rate": 4.975485808276111e-06, |
|
"loss": 0.1134, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.09346914319952067, |
|
"grad_norm": 0.906265232831976, |
|
"learning_rate": 4.974553604702332e-06, |
|
"loss": 0.1141, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09426802476532854, |
|
"grad_norm": 1.167029695596976, |
|
"learning_rate": 4.973604096452361e-06, |
|
"loss": 0.1093, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0950669063311364, |
|
"grad_norm": 0.7956711447411758, |
|
"learning_rate": 4.972637290166158e-06, |
|
"loss": 0.1039, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09586578789694428, |
|
"grad_norm": 0.8384697255234442, |
|
"learning_rate": 4.971653192604645e-06, |
|
"loss": 0.1034, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09666466946275215, |
|
"grad_norm": 0.8244072448799775, |
|
"learning_rate": 4.970651810649666e-06, |
|
"loss": 0.1154, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09746355102856001, |
|
"grad_norm": 0.8661919813344635, |
|
"learning_rate": 4.969633151303934e-06, |
|
"loss": 0.1096, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09826243259436789, |
|
"grad_norm": 0.8808417828216416, |
|
"learning_rate": 4.968597221690986e-06, |
|
"loss": 0.1151, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09906131416017576, |
|
"grad_norm": 0.7526710590860509, |
|
"learning_rate": 4.967544029055128e-06, |
|
"loss": 0.1102, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09986019572598362, |
|
"grad_norm": 0.9100920468208363, |
|
"learning_rate": 4.9664735807613895e-06, |
|
"loss": 0.1135, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1006590772917915, |
|
"grad_norm": 0.8566800660889555, |
|
"learning_rate": 4.965385884295467e-06, |
|
"loss": 0.1124, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10145795885759937, |
|
"grad_norm": 0.8854006584420304, |
|
"learning_rate": 4.964280947263677e-06, |
|
"loss": 0.0986, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10225684042340723, |
|
"grad_norm": 0.969878610508023, |
|
"learning_rate": 4.963158777392898e-06, |
|
"loss": 0.0983, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1030557219892151, |
|
"grad_norm": 0.9892660403306187, |
|
"learning_rate": 4.962019382530521e-06, |
|
"loss": 0.1108, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10385460355502296, |
|
"grad_norm": 0.9753877213466595, |
|
"learning_rate": 4.960862770644389e-06, |
|
"loss": 0.1115, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10465348512083084, |
|
"grad_norm": 0.9033107702224492, |
|
"learning_rate": 4.959688949822748e-06, |
|
"loss": 0.1117, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10545236668663871, |
|
"grad_norm": 0.965532325966032, |
|
"learning_rate": 4.9584979282741856e-06, |
|
"loss": 0.1039, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10625124825244657, |
|
"grad_norm": 0.9844322564004251, |
|
"learning_rate": 4.957289714327572e-06, |
|
"loss": 0.1093, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10705012981825444, |
|
"grad_norm": 0.9619990158546806, |
|
"learning_rate": 4.95606431643201e-06, |
|
"loss": 0.116, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10784901138406232, |
|
"grad_norm": 0.8325965340613319, |
|
"learning_rate": 4.9548217431567665e-06, |
|
"loss": 0.1161, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10864789294987018, |
|
"grad_norm": 1.0194058532875092, |
|
"learning_rate": 4.953562003191219e-06, |
|
"loss": 0.1149, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10944677451567805, |
|
"grad_norm": 0.9170383197008514, |
|
"learning_rate": 4.952285105344792e-06, |
|
"loss": 0.1059, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11024565608148593, |
|
"grad_norm": 0.9189470478972314, |
|
"learning_rate": 4.950991058546893e-06, |
|
"loss": 0.1004, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11104453764729379, |
|
"grad_norm": 1.0687382655451492, |
|
"learning_rate": 4.949679871846857e-06, |
|
"loss": 0.1145, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11184341921310166, |
|
"grad_norm": 0.9134055983302238, |
|
"learning_rate": 4.948351554413879e-06, |
|
"loss": 0.1061, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11264230077890952, |
|
"grad_norm": 1.0565717743207121, |
|
"learning_rate": 4.947006115536947e-06, |
|
"loss": 0.1066, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1134411823447174, |
|
"grad_norm": 0.8523742921266169, |
|
"learning_rate": 4.945643564624782e-06, |
|
"loss": 0.1068, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11424006391052527, |
|
"grad_norm": 0.9427600241720556, |
|
"learning_rate": 4.944263911205772e-06, |
|
"loss": 0.1088, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11503894547633313, |
|
"grad_norm": 0.9823498202200215, |
|
"learning_rate": 4.942867164927899e-06, |
|
"loss": 0.1128, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.115837827042141, |
|
"grad_norm": 0.8481811655422338, |
|
"learning_rate": 4.941453335558682e-06, |
|
"loss": 0.1054, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11663670860794888, |
|
"grad_norm": 0.9044089222762782, |
|
"learning_rate": 4.940022432985096e-06, |
|
"loss": 0.11, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11743559017375674, |
|
"grad_norm": 0.860131950830847, |
|
"learning_rate": 4.938574467213519e-06, |
|
"loss": 0.1061, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11823447173956461, |
|
"grad_norm": 0.9581110488146714, |
|
"learning_rate": 4.937109448369643e-06, |
|
"loss": 0.1127, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11903335330537247, |
|
"grad_norm": 0.8720482965396903, |
|
"learning_rate": 4.935627386698418e-06, |
|
"loss": 0.1085, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11983223487118035, |
|
"grad_norm": 0.8112483294643192, |
|
"learning_rate": 4.934128292563978e-06, |
|
"loss": 0.1041, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12063111643698822, |
|
"grad_norm": 0.9203686211432968, |
|
"learning_rate": 4.93261217644956e-06, |
|
"loss": 0.1036, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.12142999800279608, |
|
"grad_norm": 0.7759012119797701, |
|
"learning_rate": 4.93107904895744e-06, |
|
"loss": 0.095, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.12222887956860395, |
|
"grad_norm": 0.9139977963522751, |
|
"learning_rate": 4.9295289208088545e-06, |
|
"loss": 0.1053, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.12302776113441183, |
|
"grad_norm": 0.8627349081243817, |
|
"learning_rate": 4.927961802843927e-06, |
|
"loss": 0.1117, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.12382664270021969, |
|
"grad_norm": 0.928872031641752, |
|
"learning_rate": 4.92637770602159e-06, |
|
"loss": 0.1184, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12462552426602756, |
|
"grad_norm": 0.8292246195088976, |
|
"learning_rate": 4.924776641419513e-06, |
|
"loss": 0.1058, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.12542440583183542, |
|
"grad_norm": 0.9477438857662375, |
|
"learning_rate": 4.92315862023402e-06, |
|
"loss": 0.1034, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1262232873976433, |
|
"grad_norm": 0.8702969195611917, |
|
"learning_rate": 4.921523653780012e-06, |
|
"loss": 0.1116, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.12702216896345117, |
|
"grad_norm": 0.9493498442525029, |
|
"learning_rate": 4.919871753490892e-06, |
|
"loss": 0.1102, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.12782105052925904, |
|
"grad_norm": 0.9116753131736711, |
|
"learning_rate": 4.9182029309184785e-06, |
|
"loss": 0.1132, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12861993209506692, |
|
"grad_norm": 0.7582793065158208, |
|
"learning_rate": 4.916517197732933e-06, |
|
"loss": 0.1064, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12941881366087477, |
|
"grad_norm": 0.9778914130367525, |
|
"learning_rate": 4.914814565722671e-06, |
|
"loss": 0.1118, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13021769522668264, |
|
"grad_norm": 0.9419365180087802, |
|
"learning_rate": 4.913095046794282e-06, |
|
"loss": 0.1038, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1310165767924905, |
|
"grad_norm": 0.9612330406862204, |
|
"learning_rate": 4.911358652972448e-06, |
|
"loss": 0.1071, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1318154583582984, |
|
"grad_norm": 0.9231553642667918, |
|
"learning_rate": 4.9096053963998555e-06, |
|
"loss": 0.1054, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13261433992410626, |
|
"grad_norm": 0.884065574169948, |
|
"learning_rate": 4.907835289337116e-06, |
|
"loss": 0.103, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1334132214899141, |
|
"grad_norm": 0.9476846210224978, |
|
"learning_rate": 4.906048344162677e-06, |
|
"loss": 0.1099, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.13421210305572198, |
|
"grad_norm": 0.8958610299998433, |
|
"learning_rate": 4.904244573372733e-06, |
|
"loss": 0.1091, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.13501098462152986, |
|
"grad_norm": 0.9020407628170796, |
|
"learning_rate": 4.902423989581143e-06, |
|
"loss": 0.1116, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.13580986618733773, |
|
"grad_norm": 0.9238934392642306, |
|
"learning_rate": 4.900586605519341e-06, |
|
"loss": 0.0988, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1366087477531456, |
|
"grad_norm": 0.9207175470533663, |
|
"learning_rate": 4.8987324340362445e-06, |
|
"loss": 0.1109, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.13740762931895348, |
|
"grad_norm": 0.8920474499664821, |
|
"learning_rate": 4.896861488098165e-06, |
|
"loss": 0.1135, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.13820651088476132, |
|
"grad_norm": 0.9691154046483549, |
|
"learning_rate": 4.894973780788722e-06, |
|
"loss": 0.1128, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1390053924505692, |
|
"grad_norm": 0.902524602492558, |
|
"learning_rate": 4.893069325308747e-06, |
|
"loss": 0.1223, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13980427401637707, |
|
"grad_norm": 0.8576273271722297, |
|
"learning_rate": 4.89114813497619e-06, |
|
"loss": 0.1053, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14060315558218495, |
|
"grad_norm": 0.9412918203296141, |
|
"learning_rate": 4.889210223226032e-06, |
|
"loss": 0.1088, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.14140203714799282, |
|
"grad_norm": 0.8254367045725899, |
|
"learning_rate": 4.8872556036101845e-06, |
|
"loss": 0.1136, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.14220091871380067, |
|
"grad_norm": 0.813317696779795, |
|
"learning_rate": 4.885284289797402e-06, |
|
"loss": 0.0968, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14299980027960854, |
|
"grad_norm": 0.8893584230465316, |
|
"learning_rate": 4.883296295573176e-06, |
|
"loss": 0.101, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.14379868184541642, |
|
"grad_norm": 0.7566705356571157, |
|
"learning_rate": 4.881291634839652e-06, |
|
"loss": 0.0894, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1445975634112243, |
|
"grad_norm": 0.8599708405924054, |
|
"learning_rate": 4.8792703216155205e-06, |
|
"loss": 0.1125, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.14539644497703216, |
|
"grad_norm": 0.8456967021924519, |
|
"learning_rate": 4.877232370035926e-06, |
|
"loss": 0.1049, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.14619532654284004, |
|
"grad_norm": 0.815478274809968, |
|
"learning_rate": 4.875177794352364e-06, |
|
"loss": 0.1073, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.14699420810864788, |
|
"grad_norm": 0.7878779155800018, |
|
"learning_rate": 4.873106608932585e-06, |
|
"loss": 0.0978, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.14779308967445576, |
|
"grad_norm": 0.8810812572826364, |
|
"learning_rate": 4.871018828260492e-06, |
|
"loss": 0.1102, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14859197124026363, |
|
"grad_norm": 0.806104646763574, |
|
"learning_rate": 4.868914466936038e-06, |
|
"loss": 0.1222, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1493908528060715, |
|
"grad_norm": 0.8151326052732307, |
|
"learning_rate": 4.866793539675127e-06, |
|
"loss": 0.1071, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.15018973437187938, |
|
"grad_norm": 0.8000214408032337, |
|
"learning_rate": 4.864656061309507e-06, |
|
"loss": 0.0997, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.15098861593768723, |
|
"grad_norm": 0.7896324428536934, |
|
"learning_rate": 4.862502046786671e-06, |
|
"loss": 0.1062, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1517874975034951, |
|
"grad_norm": 0.7552560858678763, |
|
"learning_rate": 4.860331511169752e-06, |
|
"loss": 0.0976, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15258637906930297, |
|
"grad_norm": 0.790809221885304, |
|
"learning_rate": 4.858144469637409e-06, |
|
"loss": 0.1133, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.15338526063511085, |
|
"grad_norm": 0.7624019298282761, |
|
"learning_rate": 4.8559409374837356e-06, |
|
"loss": 0.0887, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15418414220091872, |
|
"grad_norm": 0.8522514916028995, |
|
"learning_rate": 4.853720930118139e-06, |
|
"loss": 0.1192, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1549830237667266, |
|
"grad_norm": 0.8482297033111496, |
|
"learning_rate": 4.851484463065243e-06, |
|
"loss": 0.1128, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.15578190533253444, |
|
"grad_norm": 0.9895092259087344, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 0.1021, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15658078689834232, |
|
"grad_norm": 0.8597378998737626, |
|
"learning_rate": 4.846962212571443e-06, |
|
"loss": 0.11, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1573796684641502, |
|
"grad_norm": 0.8192511349717183, |
|
"learning_rate": 4.844676460754862e-06, |
|
"loss": 0.0992, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.15817855002995806, |
|
"grad_norm": 0.8928261617816722, |
|
"learning_rate": 4.842374312499405e-06, |
|
"loss": 0.1018, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15897743159576594, |
|
"grad_norm": 0.7985841136149491, |
|
"learning_rate": 4.840055783904106e-06, |
|
"loss": 0.1022, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.15977631316157379, |
|
"grad_norm": 0.789467705600129, |
|
"learning_rate": 4.837720891182553e-06, |
|
"loss": 0.0946, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16057519472738166, |
|
"grad_norm": 0.9446894087276032, |
|
"learning_rate": 4.835369650662767e-06, |
|
"loss": 0.1251, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.16137407629318953, |
|
"grad_norm": 0.7973312335808964, |
|
"learning_rate": 4.833002078787089e-06, |
|
"loss": 0.1061, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1621729578589974, |
|
"grad_norm": 0.7600431803278112, |
|
"learning_rate": 4.830618192112065e-06, |
|
"loss": 0.0966, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.16297183942480528, |
|
"grad_norm": 0.7950707857967954, |
|
"learning_rate": 4.828218007308335e-06, |
|
"loss": 0.0946, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16377072099061313, |
|
"grad_norm": 0.9725967990292367, |
|
"learning_rate": 4.825801541160509e-06, |
|
"loss": 0.1172, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.164569602556421, |
|
"grad_norm": 0.8521609257869547, |
|
"learning_rate": 4.823368810567056e-06, |
|
"loss": 0.1124, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.16536848412222888, |
|
"grad_norm": 0.8352032191193668, |
|
"learning_rate": 4.8209198325401815e-06, |
|
"loss": 0.1014, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.16616736568803675, |
|
"grad_norm": 0.817037921684215, |
|
"learning_rate": 4.818454624205711e-06, |
|
"loss": 0.0925, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.16696624725384462, |
|
"grad_norm": 0.8714751075848507, |
|
"learning_rate": 4.815973202802966e-06, |
|
"loss": 0.1069, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1677651288196525, |
|
"grad_norm": 0.7980826304152385, |
|
"learning_rate": 4.813475585684653e-06, |
|
"loss": 0.1006, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16856401038546034, |
|
"grad_norm": 0.8251849516010518, |
|
"learning_rate": 4.810961790316731e-06, |
|
"loss": 0.106, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.16936289195126822, |
|
"grad_norm": 0.8599512267451508, |
|
"learning_rate": 4.808431834278294e-06, |
|
"loss": 0.0983, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1701617735170761, |
|
"grad_norm": 0.8871414735762658, |
|
"learning_rate": 4.805885735261454e-06, |
|
"loss": 0.1038, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.17096065508288397, |
|
"grad_norm": 0.8473694336902333, |
|
"learning_rate": 4.8033235110712055e-06, |
|
"loss": 0.0985, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.17175953664869184, |
|
"grad_norm": 0.7784578078223237, |
|
"learning_rate": 4.800745179625308e-06, |
|
"loss": 0.0954, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1725584182144997, |
|
"grad_norm": 0.8439903426690519, |
|
"learning_rate": 4.798150758954164e-06, |
|
"loss": 0.0946, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.17335729978030756, |
|
"grad_norm": 0.7683932085699543, |
|
"learning_rate": 4.7955402672006855e-06, |
|
"loss": 0.0991, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.17415618134611544, |
|
"grad_norm": 0.8210534029782739, |
|
"learning_rate": 4.79291372262017e-06, |
|
"loss": 0.1169, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1749550629119233, |
|
"grad_norm": 0.8469001008689524, |
|
"learning_rate": 4.790271143580174e-06, |
|
"loss": 0.1043, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.17575394447773118, |
|
"grad_norm": 0.8221750551755307, |
|
"learning_rate": 4.787612548560385e-06, |
|
"loss": 0.111, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17655282604353906, |
|
"grad_norm": 0.7931373818167854, |
|
"learning_rate": 4.78493795615249e-06, |
|
"loss": 0.1102, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1773517076093469, |
|
"grad_norm": 0.7197354675625041, |
|
"learning_rate": 4.7822473850600444e-06, |
|
"loss": 0.0976, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.17815058917515478, |
|
"grad_norm": 0.7872396703665671, |
|
"learning_rate": 4.779540854098348e-06, |
|
"loss": 0.1009, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.17894947074096265, |
|
"grad_norm": 0.8668934472760813, |
|
"learning_rate": 4.776818382194305e-06, |
|
"loss": 0.0999, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.17974835230677053, |
|
"grad_norm": 0.8481767317765282, |
|
"learning_rate": 4.7740799883862966e-06, |
|
"loss": 0.1037, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1805472338725784, |
|
"grad_norm": 0.8178437972168623, |
|
"learning_rate": 4.771325691824046e-06, |
|
"loss": 0.0978, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.18134611543838625, |
|
"grad_norm": 0.8164863929375823, |
|
"learning_rate": 4.768555511768486e-06, |
|
"loss": 0.1022, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.18214499700419412, |
|
"grad_norm": 0.8689175267561307, |
|
"learning_rate": 4.765769467591626e-06, |
|
"loss": 0.0982, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.182943878570002, |
|
"grad_norm": 0.8402988320046502, |
|
"learning_rate": 4.762967578776406e-06, |
|
"loss": 0.0881, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.18374276013580987, |
|
"grad_norm": 0.7116078445916488, |
|
"learning_rate": 4.760149864916579e-06, |
|
"loss": 0.0948, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18454164170161774, |
|
"grad_norm": 0.814050015517037, |
|
"learning_rate": 4.757316345716554e-06, |
|
"loss": 0.0937, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.18534052326742562, |
|
"grad_norm": 0.7662089706655602, |
|
"learning_rate": 4.754467040991272e-06, |
|
"loss": 0.1019, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.18613940483323346, |
|
"grad_norm": 0.8826194920747791, |
|
"learning_rate": 4.751601970666064e-06, |
|
"loss": 0.1083, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.18693828639904134, |
|
"grad_norm": 0.7980561660257126, |
|
"learning_rate": 4.748721154776508e-06, |
|
"loss": 0.1124, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.1877371679648492, |
|
"grad_norm": 0.7882742889167881, |
|
"learning_rate": 4.745824613468293e-06, |
|
"loss": 0.088, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.18853604953065708, |
|
"grad_norm": 0.7820268690800588, |
|
"learning_rate": 4.742912366997076e-06, |
|
"loss": 0.0871, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.18933493109646496, |
|
"grad_norm": 0.8615828011540341, |
|
"learning_rate": 4.73998443572834e-06, |
|
"loss": 0.1042, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1901338126622728, |
|
"grad_norm": 0.7435755225972785, |
|
"learning_rate": 4.737040840137255e-06, |
|
"loss": 0.0957, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.19093269422808068, |
|
"grad_norm": 0.7918088118130509, |
|
"learning_rate": 4.734081600808531e-06, |
|
"loss": 0.1002, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.19173157579388855, |
|
"grad_norm": 0.8364127760580201, |
|
"learning_rate": 4.731106738436275e-06, |
|
"loss": 0.0896, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19253045735969643, |
|
"grad_norm": 0.7869544838131202, |
|
"learning_rate": 4.728116273823848e-06, |
|
"loss": 0.1011, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1933293389255043, |
|
"grad_norm": 0.780676449932054, |
|
"learning_rate": 4.725110227883716e-06, |
|
"loss": 0.0897, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.19412822049131218, |
|
"grad_norm": 0.8631262185605082, |
|
"learning_rate": 4.7220886216373095e-06, |
|
"loss": 0.1003, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.19492710205712002, |
|
"grad_norm": 0.8807375012349913, |
|
"learning_rate": 4.7190514762148685e-06, |
|
"loss": 0.0998, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1957259836229279, |
|
"grad_norm": 0.7564793946843111, |
|
"learning_rate": 4.715998812855305e-06, |
|
"loss": 0.1009, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.19652486518873577, |
|
"grad_norm": 0.7851241620875284, |
|
"learning_rate": 4.7129306529060415e-06, |
|
"loss": 0.0934, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.19732374675454364, |
|
"grad_norm": 0.83739262621917, |
|
"learning_rate": 4.709847017822876e-06, |
|
"loss": 0.0895, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.19812262832035152, |
|
"grad_norm": 0.7640947304805092, |
|
"learning_rate": 4.706747929169821e-06, |
|
"loss": 0.1096, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.19892150988615936, |
|
"grad_norm": 0.7811995113015094, |
|
"learning_rate": 4.703633408618955e-06, |
|
"loss": 0.1063, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.19972039145196724, |
|
"grad_norm": 0.8544109266568962, |
|
"learning_rate": 4.700503477950278e-06, |
|
"loss": 0.1071, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2005192730177751, |
|
"grad_norm": 0.8431749832390882, |
|
"learning_rate": 4.697358159051549e-06, |
|
"loss": 0.1047, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.201318154583583, |
|
"grad_norm": 0.8570451142462542, |
|
"learning_rate": 4.694197473918139e-06, |
|
"loss": 0.1088, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.20211703614939086, |
|
"grad_norm": 0.7877487371067441, |
|
"learning_rate": 4.691021444652877e-06, |
|
"loss": 0.0954, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.20291591771519873, |
|
"grad_norm": 0.9045557407382668, |
|
"learning_rate": 4.687830093465893e-06, |
|
"loss": 0.1027, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.20371479928100658, |
|
"grad_norm": 0.7246585315986585, |
|
"learning_rate": 4.684623442674463e-06, |
|
"loss": 0.0973, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.20451368084681446, |
|
"grad_norm": 0.818430244234053, |
|
"learning_rate": 4.681401514702856e-06, |
|
"loss": 0.1059, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.20531256241262233, |
|
"grad_norm": 0.7497021560526902, |
|
"learning_rate": 4.678164332082175e-06, |
|
"loss": 0.0938, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2061114439784302, |
|
"grad_norm": 0.8957393415270142, |
|
"learning_rate": 4.674911917450198e-06, |
|
"loss": 0.1061, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.20691032554423808, |
|
"grad_norm": 0.7645317265386067, |
|
"learning_rate": 4.671644293551222e-06, |
|
"loss": 0.0903, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.20770920711004592, |
|
"grad_norm": 0.7682244794090962, |
|
"learning_rate": 4.668361483235903e-06, |
|
"loss": 0.085, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2085080886758538, |
|
"grad_norm": 0.854685496484154, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 0.0994, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.20930697024166167, |
|
"grad_norm": 0.8815096120890409, |
|
"learning_rate": 4.661750395289698e-06, |
|
"loss": 0.1058, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.21010585180746955, |
|
"grad_norm": 0.794089561085315, |
|
"learning_rate": 4.6584221638904775e-06, |
|
"loss": 0.1016, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.21090473337327742, |
|
"grad_norm": 0.8758797896184406, |
|
"learning_rate": 4.655078838537924e-06, |
|
"loss": 0.0972, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.2117036149390853, |
|
"grad_norm": 0.7898730124221975, |
|
"learning_rate": 4.651720442612076e-06, |
|
"loss": 0.0918, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.21250249650489314, |
|
"grad_norm": 0.7317067873877707, |
|
"learning_rate": 4.648346999598364e-06, |
|
"loss": 0.0913, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.21330137807070101, |
|
"grad_norm": 0.7843972077721959, |
|
"learning_rate": 4.644958533087443e-06, |
|
"loss": 0.0951, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2141002596365089, |
|
"grad_norm": 0.792184243401163, |
|
"learning_rate": 4.641555066775027e-06, |
|
"loss": 0.0972, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.21489914120231676, |
|
"grad_norm": 0.7582413630818394, |
|
"learning_rate": 4.638136624461723e-06, |
|
"loss": 0.0932, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.21569802276812464, |
|
"grad_norm": 0.7816406773037096, |
|
"learning_rate": 4.634703230052871e-06, |
|
"loss": 0.1042, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21649690433393248, |
|
"grad_norm": 0.7596966546637388, |
|
"learning_rate": 4.631254907558366e-06, |
|
"loss": 0.0971, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.21729578589974036, |
|
"grad_norm": 0.783304310420589, |
|
"learning_rate": 4.627791681092499e-06, |
|
"loss": 0.095, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.21809466746554823, |
|
"grad_norm": 0.8086579883341541, |
|
"learning_rate": 4.624313574873787e-06, |
|
"loss": 0.1129, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2188935490313561, |
|
"grad_norm": 0.6962761290814109, |
|
"learning_rate": 4.620820613224796e-06, |
|
"loss": 0.089, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.21969243059716398, |
|
"grad_norm": 0.7288448006122578, |
|
"learning_rate": 4.617312820571981e-06, |
|
"loss": 0.0882, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.22049131216297185, |
|
"grad_norm": 0.8048648789646363, |
|
"learning_rate": 4.613790221445511e-06, |
|
"loss": 0.1025, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2212901937287797, |
|
"grad_norm": 0.7929094130785004, |
|
"learning_rate": 4.610252840479097e-06, |
|
"loss": 0.0989, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.22208907529458757, |
|
"grad_norm": 0.8168348398218105, |
|
"learning_rate": 4.606700702409818e-06, |
|
"loss": 0.1048, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.22288795686039545, |
|
"grad_norm": 0.9063263874082697, |
|
"learning_rate": 4.603133832077953e-06, |
|
"loss": 0.1019, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.22368683842620332, |
|
"grad_norm": 0.8822514604154854, |
|
"learning_rate": 4.599552254426804e-06, |
|
"loss": 0.0869, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2244857199920112, |
|
"grad_norm": 0.6911719229844316, |
|
"learning_rate": 4.595955994502519e-06, |
|
"loss": 0.0943, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.22528460155781904, |
|
"grad_norm": 0.9183376642968846, |
|
"learning_rate": 4.592345077453925e-06, |
|
"loss": 0.0993, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.22608348312362692, |
|
"grad_norm": 0.7954093378579109, |
|
"learning_rate": 4.588719528532342e-06, |
|
"loss": 0.0989, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2268823646894348, |
|
"grad_norm": 0.8198658669695411, |
|
"learning_rate": 4.5850793730914135e-06, |
|
"loss": 0.0909, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.22768124625524266, |
|
"grad_norm": 0.7592112175879184, |
|
"learning_rate": 4.5814246365869285e-06, |
|
"loss": 0.1019, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22848012782105054, |
|
"grad_norm": 0.8383090068025297, |
|
"learning_rate": 4.577755344576641e-06, |
|
"loss": 0.1096, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.22927900938685838, |
|
"grad_norm": 0.7719532995528691, |
|
"learning_rate": 4.5740715227200904e-06, |
|
"loss": 0.0919, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.23007789095266626, |
|
"grad_norm": 0.6949182347348394, |
|
"learning_rate": 4.570373196778427e-06, |
|
"loss": 0.0833, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.23087677251847413, |
|
"grad_norm": 0.8542806997632925, |
|
"learning_rate": 4.566660392614229e-06, |
|
"loss": 0.0942, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.231675654084282, |
|
"grad_norm": 0.7942245149648606, |
|
"learning_rate": 4.562933136191317e-06, |
|
"loss": 0.1038, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23247453565008988, |
|
"grad_norm": 0.741118108734099, |
|
"learning_rate": 4.559191453574582e-06, |
|
"loss": 0.0996, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.23327341721589775, |
|
"grad_norm": 0.7401819189973926, |
|
"learning_rate": 4.555435370929797e-06, |
|
"loss": 0.0881, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2340722987817056, |
|
"grad_norm": 0.7225340215993067, |
|
"learning_rate": 4.551664914523433e-06, |
|
"loss": 0.0889, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.23487118034751348, |
|
"grad_norm": 0.7771401029247064, |
|
"learning_rate": 4.54788011072248e-06, |
|
"loss": 0.1059, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.23567006191332135, |
|
"grad_norm": 0.7629163339227021, |
|
"learning_rate": 4.5440809859942585e-06, |
|
"loss": 0.103, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.23646894347912922, |
|
"grad_norm": 0.7448031192353857, |
|
"learning_rate": 4.5402675669062345e-06, |
|
"loss": 0.0995, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2372678250449371, |
|
"grad_norm": 0.7283315899507647, |
|
"learning_rate": 4.53643988012584e-06, |
|
"loss": 0.1025, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.23806670661074494, |
|
"grad_norm": 0.9027319022911194, |
|
"learning_rate": 4.532597952420276e-06, |
|
"loss": 0.1066, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.23886558817655282, |
|
"grad_norm": 0.7785808633007979, |
|
"learning_rate": 4.5287418106563355e-06, |
|
"loss": 0.0973, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2396644697423607, |
|
"grad_norm": 0.9390527397483459, |
|
"learning_rate": 4.52487148180021e-06, |
|
"loss": 0.0863, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24046335130816857, |
|
"grad_norm": 0.865775604026073, |
|
"learning_rate": 4.5209869929172975e-06, |
|
"loss": 0.1084, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.24126223287397644, |
|
"grad_norm": 0.8403236400106889, |
|
"learning_rate": 4.5170883711720245e-06, |
|
"loss": 0.0904, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.24206111443978431, |
|
"grad_norm": 0.8064673147923992, |
|
"learning_rate": 4.513175643827647e-06, |
|
"loss": 0.0941, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.24285999600559216, |
|
"grad_norm": 0.8013189960100633, |
|
"learning_rate": 4.50924883824606e-06, |
|
"loss": 0.0992, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.24365887757140003, |
|
"grad_norm": 0.7728593022833018, |
|
"learning_rate": 4.50530798188761e-06, |
|
"loss": 0.0893, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2444577591372079, |
|
"grad_norm": 0.9052442733312893, |
|
"learning_rate": 4.501353102310901e-06, |
|
"loss": 0.0928, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.24525664070301578, |
|
"grad_norm": 0.6668531899886609, |
|
"learning_rate": 4.497384227172603e-06, |
|
"loss": 0.0877, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.24605552226882366, |
|
"grad_norm": 0.871779640726829, |
|
"learning_rate": 4.493401384227257e-06, |
|
"loss": 0.0968, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2468544038346315, |
|
"grad_norm": 0.8664305902715963, |
|
"learning_rate": 4.489404601327081e-06, |
|
"loss": 0.0941, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.24765328540043938, |
|
"grad_norm": 0.753124311591743, |
|
"learning_rate": 4.485393906421776e-06, |
|
"loss": 0.0867, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24845216696624725, |
|
"grad_norm": 0.8414426164462598, |
|
"learning_rate": 4.48136932755833e-06, |
|
"loss": 0.0917, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.24925104853205513, |
|
"grad_norm": 0.8167935257894663, |
|
"learning_rate": 4.477330892880824e-06, |
|
"loss": 0.0899, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.25004993009786297, |
|
"grad_norm": 0.8240834348323666, |
|
"learning_rate": 4.47327863063023e-06, |
|
"loss": 0.0997, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.25084881166367085, |
|
"grad_norm": 0.8148444956345996, |
|
"learning_rate": 4.469212569144222e-06, |
|
"loss": 0.0929, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.2516476932294787, |
|
"grad_norm": 0.812151270928214, |
|
"learning_rate": 4.4651327368569695e-06, |
|
"loss": 0.1031, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2524465747952866, |
|
"grad_norm": 0.7918672951252329, |
|
"learning_rate": 4.46103916229894e-06, |
|
"loss": 0.0916, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.25324545636109447, |
|
"grad_norm": 0.7824718515480527, |
|
"learning_rate": 4.456931874096705e-06, |
|
"loss": 0.0865, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.25404433792690234, |
|
"grad_norm": 0.7520103031931814, |
|
"learning_rate": 4.452810900972734e-06, |
|
"loss": 0.093, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2548432194927102, |
|
"grad_norm": 0.8388616884327461, |
|
"learning_rate": 4.448676271745198e-06, |
|
"loss": 0.0865, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2556421010585181, |
|
"grad_norm": 0.7264911003280431, |
|
"learning_rate": 4.444528015327763e-06, |
|
"loss": 0.0894, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25644098262432596, |
|
"grad_norm": 0.8182746828018328, |
|
"learning_rate": 4.440366160729393e-06, |
|
"loss": 0.1001, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.25723986419013384, |
|
"grad_norm": 0.8487906160638949, |
|
"learning_rate": 4.436190737054142e-06, |
|
"loss": 0.1168, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.25803874575594166, |
|
"grad_norm": 0.9326635695818013, |
|
"learning_rate": 4.432001773500958e-06, |
|
"loss": 0.1056, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.25883762732174953, |
|
"grad_norm": 0.7153701333351724, |
|
"learning_rate": 4.42779929936347e-06, |
|
"loss": 0.0954, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2596365088875574, |
|
"grad_norm": 0.8066963737724318, |
|
"learning_rate": 4.423583344029786e-06, |
|
"loss": 0.0931, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2604353904533653, |
|
"grad_norm": 0.7908989414413082, |
|
"learning_rate": 4.419353936982293e-06, |
|
"loss": 0.0982, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.26123427201917315, |
|
"grad_norm": 0.846279083139174, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 0.1006, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.262033153584981, |
|
"grad_norm": 1.0343812566729962, |
|
"learning_rate": 4.410854886145556e-06, |
|
"loss": 0.105, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2628320351507889, |
|
"grad_norm": 0.7771271488054676, |
|
"learning_rate": 4.406585301790595e-06, |
|
"loss": 0.0929, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2636309167165968, |
|
"grad_norm": 0.9484884466652146, |
|
"learning_rate": 4.402302384589979e-06, |
|
"loss": 0.096, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26442979828240465, |
|
"grad_norm": 0.9666766702925966, |
|
"learning_rate": 4.398006164494358e-06, |
|
"loss": 0.1055, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2652286798482125, |
|
"grad_norm": 0.7057771202211578, |
|
"learning_rate": 4.393696671547415e-06, |
|
"loss": 0.0904, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2660275614140204, |
|
"grad_norm": 0.9400899783662074, |
|
"learning_rate": 4.3893739358856465e-06, |
|
"loss": 0.0878, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2668264429798282, |
|
"grad_norm": 0.9030024491318343, |
|
"learning_rate": 4.385037987738158e-06, |
|
"loss": 0.1005, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2676253245456361, |
|
"grad_norm": 0.762520292407699, |
|
"learning_rate": 4.38068885742645e-06, |
|
"loss": 0.0922, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.26842420611144396, |
|
"grad_norm": 1.010542949334313, |
|
"learning_rate": 4.376326575364206e-06, |
|
"loss": 0.1021, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.26922308767725184, |
|
"grad_norm": 0.8081986385372553, |
|
"learning_rate": 4.371951172057082e-06, |
|
"loss": 0.1031, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2700219692430597, |
|
"grad_norm": 0.8022669170988722, |
|
"learning_rate": 4.367562678102491e-06, |
|
"loss": 0.1012, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2708208508088676, |
|
"grad_norm": 0.9241574900249333, |
|
"learning_rate": 4.363161124189387e-06, |
|
"loss": 0.0907, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.27161973237467546, |
|
"grad_norm": 0.7707255024516305, |
|
"learning_rate": 4.358746541098057e-06, |
|
"loss": 0.095, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27241861394048333, |
|
"grad_norm": 0.9222172849209264, |
|
"learning_rate": 4.354318959699899e-06, |
|
"loss": 0.0878, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2732174955062912, |
|
"grad_norm": 0.7886919216124627, |
|
"learning_rate": 4.34987841095721e-06, |
|
"loss": 0.0894, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.2740163770720991, |
|
"grad_norm": 0.727790265160795, |
|
"learning_rate": 4.3454249259229665e-06, |
|
"loss": 0.0921, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.27481525863790696, |
|
"grad_norm": 0.8093415051048494, |
|
"learning_rate": 4.340958535740612e-06, |
|
"loss": 0.0889, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2756141402037148, |
|
"grad_norm": 0.8827233794289518, |
|
"learning_rate": 4.336479271643833e-06, |
|
"loss": 0.092, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.27641302176952265, |
|
"grad_norm": 0.7468481018597232, |
|
"learning_rate": 4.3319871649563474e-06, |
|
"loss": 0.0957, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2772119033353305, |
|
"grad_norm": 0.8611203322886116, |
|
"learning_rate": 4.32748224709168e-06, |
|
"loss": 0.102, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2780107849011384, |
|
"grad_norm": 0.7823320107034754, |
|
"learning_rate": 4.322964549552943e-06, |
|
"loss": 0.0955, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.27880966646694627, |
|
"grad_norm": 0.8059112729344878, |
|
"learning_rate": 4.318434103932622e-06, |
|
"loss": 0.0882, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.27960854803275415, |
|
"grad_norm": 0.8138007618828386, |
|
"learning_rate": 4.313890941912347e-06, |
|
"loss": 0.0834, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.280407429598562, |
|
"grad_norm": 0.7404431489736772, |
|
"learning_rate": 4.309335095262675e-06, |
|
"loss": 0.0949, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2812063111643699, |
|
"grad_norm": 0.7470188231140139, |
|
"learning_rate": 4.30476659584287e-06, |
|
"loss": 0.0842, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.28200519273017777, |
|
"grad_norm": 0.797025807552636, |
|
"learning_rate": 4.3001854756006724e-06, |
|
"loss": 0.0834, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.28280407429598564, |
|
"grad_norm": 0.814130943144683, |
|
"learning_rate": 4.295591766572086e-06, |
|
"loss": 0.0932, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2836029558617935, |
|
"grad_norm": 0.8189778490428435, |
|
"learning_rate": 4.290985500881143e-06, |
|
"loss": 0.1058, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.28440183742760133, |
|
"grad_norm": 0.8241913671649066, |
|
"learning_rate": 4.286366710739691e-06, |
|
"loss": 0.0942, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2852007189934092, |
|
"grad_norm": 0.8965816503974172, |
|
"learning_rate": 4.281735428447158e-06, |
|
"loss": 0.0832, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2859996005592171, |
|
"grad_norm": 0.8157444856364388, |
|
"learning_rate": 4.2770916863903295e-06, |
|
"loss": 0.0908, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.28679848212502496, |
|
"grad_norm": 0.7854869982626375, |
|
"learning_rate": 4.272435517043125e-06, |
|
"loss": 0.1096, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.28759736369083283, |
|
"grad_norm": 0.8720888550038643, |
|
"learning_rate": 4.267766952966369e-06, |
|
"loss": 0.0888, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2883962452566407, |
|
"grad_norm": 0.8053883429022618, |
|
"learning_rate": 4.263086026807561e-06, |
|
"loss": 0.0968, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2891951268224486, |
|
"grad_norm": 0.8248678346640207, |
|
"learning_rate": 4.258392771300649e-06, |
|
"loss": 0.0958, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.28999400838825645, |
|
"grad_norm": 0.8242800118706491, |
|
"learning_rate": 4.253687219265803e-06, |
|
"loss": 0.0785, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2907928899540643, |
|
"grad_norm": 0.8003059421660266, |
|
"learning_rate": 4.248969403609182e-06, |
|
"loss": 0.096, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2915917715198722, |
|
"grad_norm": 0.8463871811311655, |
|
"learning_rate": 4.244239357322705e-06, |
|
"loss": 0.106, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2923906530856801, |
|
"grad_norm": 0.8461024953606568, |
|
"learning_rate": 4.239497113483819e-06, |
|
"loss": 0.102, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2931895346514879, |
|
"grad_norm": 0.7888813716191685, |
|
"learning_rate": 4.2347427052552725e-06, |
|
"loss": 0.0965, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.29398841621729577, |
|
"grad_norm": 0.8119369304603392, |
|
"learning_rate": 4.2299761658848775e-06, |
|
"loss": 0.0988, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.29478729778310364, |
|
"grad_norm": 1.014324893212535, |
|
"learning_rate": 4.2251975287052804e-06, |
|
"loss": 0.1018, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2955861793489115, |
|
"grad_norm": 0.8064204728119285, |
|
"learning_rate": 4.220406827133728e-06, |
|
"loss": 0.106, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2963850609147194, |
|
"grad_norm": 0.822626417850115, |
|
"learning_rate": 4.215604094671835e-06, |
|
"loss": 0.0901, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.29718394248052726, |
|
"grad_norm": 0.9253154637853676, |
|
"learning_rate": 4.2107893649053465e-06, |
|
"loss": 0.1098, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.29798282404633514, |
|
"grad_norm": 0.7253683875908528, |
|
"learning_rate": 4.205962671503907e-06, |
|
"loss": 0.105, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.298781705612143, |
|
"grad_norm": 0.8070549842742699, |
|
"learning_rate": 4.201124048220825e-06, |
|
"loss": 0.0902, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2995805871779509, |
|
"grad_norm": 0.8993069957177886, |
|
"learning_rate": 4.196273528892831e-06, |
|
"loss": 0.1005, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.30037946874375876, |
|
"grad_norm": 0.7182651047515527, |
|
"learning_rate": 4.191411147439849e-06, |
|
"loss": 0.0905, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.30117835030956663, |
|
"grad_norm": 0.8275259104357634, |
|
"learning_rate": 4.186536937864752e-06, |
|
"loss": 0.0876, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.30197723187537445, |
|
"grad_norm": 0.7585687841737675, |
|
"learning_rate": 4.181650934253132e-06, |
|
"loss": 0.0926, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3027761134411823, |
|
"grad_norm": 0.7808709996475448, |
|
"learning_rate": 4.176753170773053e-06, |
|
"loss": 0.0974, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3035749950069902, |
|
"grad_norm": 0.8210683387788029, |
|
"learning_rate": 4.171843681674818e-06, |
|
"loss": 0.0999, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3043738765727981, |
|
"grad_norm": 0.7953882184740051, |
|
"learning_rate": 4.16692250129073e-06, |
|
"loss": 0.0836, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.30517275813860595, |
|
"grad_norm": 0.7020914530946579, |
|
"learning_rate": 4.161989664034844e-06, |
|
"loss": 0.0822, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3059716397044138, |
|
"grad_norm": 0.820358474319794, |
|
"learning_rate": 4.157045204402741e-06, |
|
"loss": 0.0901, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3067705212702217, |
|
"grad_norm": 0.7488124118932843, |
|
"learning_rate": 4.152089156971268e-06, |
|
"loss": 0.0898, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.30756940283602957, |
|
"grad_norm": 0.7513953195291163, |
|
"learning_rate": 4.1471215563983125e-06, |
|
"loss": 0.1036, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.30836828440183744, |
|
"grad_norm": 0.8572423002912835, |
|
"learning_rate": 4.142142437422552e-06, |
|
"loss": 0.101, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3091671659676453, |
|
"grad_norm": 0.7548601875565724, |
|
"learning_rate": 4.137151834863213e-06, |
|
"loss": 0.0966, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3099660475334532, |
|
"grad_norm": 0.7870823465308348, |
|
"learning_rate": 4.132149783619826e-06, |
|
"loss": 0.093, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.310764929099261, |
|
"grad_norm": 0.6611014210675613, |
|
"learning_rate": 4.127136318671984e-06, |
|
"loss": 0.0854, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.3115638106650689, |
|
"grad_norm": 0.7054296499042809, |
|
"learning_rate": 4.122111475079097e-06, |
|
"loss": 0.0922, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.31236269223087676, |
|
"grad_norm": 0.8038584158970129, |
|
"learning_rate": 4.117075287980144e-06, |
|
"loss": 0.0859, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.31316157379668463, |
|
"grad_norm": 0.7871099701396741, |
|
"learning_rate": 4.112027792593433e-06, |
|
"loss": 0.0889, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3139604553624925, |
|
"grad_norm": 0.6799036215445816, |
|
"learning_rate": 4.106969024216348e-06, |
|
"loss": 0.0803, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3147593369283004, |
|
"grad_norm": 0.8062246682119243, |
|
"learning_rate": 4.101899018225111e-06, |
|
"loss": 0.0911, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.31555821849410826, |
|
"grad_norm": 0.7615051736680087, |
|
"learning_rate": 4.096817810074521e-06, |
|
"loss": 0.0976, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.31635710005991613, |
|
"grad_norm": 0.7754952681063141, |
|
"learning_rate": 4.091725435297721e-06, |
|
"loss": 0.0915, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.317155981625724, |
|
"grad_norm": 0.8407847771110065, |
|
"learning_rate": 4.086621929505941e-06, |
|
"loss": 0.0931, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3179548631915319, |
|
"grad_norm": 0.8100383074921174, |
|
"learning_rate": 4.0815073283882495e-06, |
|
"loss": 0.0834, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.31875374475733975, |
|
"grad_norm": 0.7969746133003707, |
|
"learning_rate": 4.076381667711306e-06, |
|
"loss": 0.0874, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.31955262632314757, |
|
"grad_norm": 0.8152506864022063, |
|
"learning_rate": 4.0712449833191115e-06, |
|
"loss": 0.0893, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32035150788895544, |
|
"grad_norm": 0.8233851359298267, |
|
"learning_rate": 4.066097311132754e-06, |
|
"loss": 0.0838, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3211503894547633, |
|
"grad_norm": 0.8539151154626614, |
|
"learning_rate": 4.060938687150159e-06, |
|
"loss": 0.0989, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3219492710205712, |
|
"grad_norm": 0.6995749387992817, |
|
"learning_rate": 4.055769147445842e-06, |
|
"loss": 0.0861, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.32274815258637907, |
|
"grad_norm": 0.8859127143940589, |
|
"learning_rate": 4.0505887281706505e-06, |
|
"loss": 0.1025, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.32354703415218694, |
|
"grad_norm": 0.694119492128483, |
|
"learning_rate": 4.045397465551513e-06, |
|
"loss": 0.0808, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3243459157179948, |
|
"grad_norm": 0.7492015043916129, |
|
"learning_rate": 4.040195395891187e-06, |
|
"loss": 0.0873, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.3251447972838027, |
|
"grad_norm": 0.7380399378402056, |
|
"learning_rate": 4.034982555568005e-06, |
|
"loss": 0.0874, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.32594367884961056, |
|
"grad_norm": 0.7576142318886974, |
|
"learning_rate": 4.029758981035617e-06, |
|
"loss": 0.0887, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.32674256041541844, |
|
"grad_norm": 0.6820402330521123, |
|
"learning_rate": 4.024524708822739e-06, |
|
"loss": 0.0876, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.32754144198122626, |
|
"grad_norm": 0.8158511029737862, |
|
"learning_rate": 4.019279775532896e-06, |
|
"loss": 0.0912, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32834032354703413, |
|
"grad_norm": 0.7272739087436181, |
|
"learning_rate": 4.014024217844167e-06, |
|
"loss": 0.0889, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.329139205112842, |
|
"grad_norm": 0.712392288383156, |
|
"learning_rate": 4.008758072508929e-06, |
|
"loss": 0.0885, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3299380866786499, |
|
"grad_norm": 0.7332274428055798, |
|
"learning_rate": 4.0034813763535965e-06, |
|
"loss": 0.0952, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.33073696824445775, |
|
"grad_norm": 0.8308294351934334, |
|
"learning_rate": 3.9981941662783675e-06, |
|
"loss": 0.0943, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3315358498102656, |
|
"grad_norm": 0.7987444077880893, |
|
"learning_rate": 3.992896479256966e-06, |
|
"loss": 0.0905, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3323347313760735, |
|
"grad_norm": 0.752508121022912, |
|
"learning_rate": 3.987588352336379e-06, |
|
"loss": 0.0882, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3331336129418814, |
|
"grad_norm": 0.7419767282856443, |
|
"learning_rate": 3.982269822636602e-06, |
|
"loss": 0.0812, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.33393249450768925, |
|
"grad_norm": 0.6909171898250309, |
|
"learning_rate": 3.976940927350377e-06, |
|
"loss": 0.0837, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3347313760734971, |
|
"grad_norm": 0.7683212481615431, |
|
"learning_rate": 3.971601703742932e-06, |
|
"loss": 0.0873, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.335530257639305, |
|
"grad_norm": 0.6937329591034055, |
|
"learning_rate": 3.966252189151726e-06, |
|
"loss": 0.0794, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3363291392051128, |
|
"grad_norm": 0.7515329965394459, |
|
"learning_rate": 3.960892420986177e-06, |
|
"loss": 0.0842, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3371280207709207, |
|
"grad_norm": 0.726559526657185, |
|
"learning_rate": 3.955522436727412e-06, |
|
"loss": 0.097, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.33792690233672856, |
|
"grad_norm": 0.7327126701823631, |
|
"learning_rate": 3.950142273927996e-06, |
|
"loss": 0.0785, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.33872578390253644, |
|
"grad_norm": 0.7546091429104848, |
|
"learning_rate": 3.944751970211675e-06, |
|
"loss": 0.0777, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3395246654683443, |
|
"grad_norm": 0.6886480296858932, |
|
"learning_rate": 3.93935156327311e-06, |
|
"loss": 0.0863, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3403235470341522, |
|
"grad_norm": 0.7426135389310096, |
|
"learning_rate": 3.933941090877615e-06, |
|
"loss": 0.0916, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.34112242859996006, |
|
"grad_norm": 0.766034385340495, |
|
"learning_rate": 3.928520590860894e-06, |
|
"loss": 0.0936, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.34192131016576793, |
|
"grad_norm": 0.6947732694520958, |
|
"learning_rate": 3.9230901011287695e-06, |
|
"loss": 0.0843, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3427201917315758, |
|
"grad_norm": 0.7502696533350824, |
|
"learning_rate": 3.917649659656927e-06, |
|
"loss": 0.0894, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3435190732973837, |
|
"grad_norm": 0.6987339211471317, |
|
"learning_rate": 3.912199304490645e-06, |
|
"loss": 0.0841, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.34431795486319156, |
|
"grad_norm": 0.7484094471603621, |
|
"learning_rate": 3.906739073744526e-06, |
|
"loss": 0.0867, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3451168364289994, |
|
"grad_norm": 0.8136583365831624, |
|
"learning_rate": 3.901269005602235e-06, |
|
"loss": 0.09, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.34591571799480725, |
|
"grad_norm": 0.6891207387873332, |
|
"learning_rate": 3.895789138316231e-06, |
|
"loss": 0.0757, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3467145995606151, |
|
"grad_norm": 0.7235748175530092, |
|
"learning_rate": 3.8902995102074985e-06, |
|
"loss": 0.0829, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.347513481126423, |
|
"grad_norm": 0.7886249459447628, |
|
"learning_rate": 3.8848001596652765e-06, |
|
"loss": 0.0834, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.34831236269223087, |
|
"grad_norm": 0.7917287800334782, |
|
"learning_rate": 3.879291125146798e-06, |
|
"loss": 0.0863, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.34911124425803874, |
|
"grad_norm": 0.7593521963308881, |
|
"learning_rate": 3.8737724451770155e-06, |
|
"loss": 0.0855, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3499101258238466, |
|
"grad_norm": 0.675290830162991, |
|
"learning_rate": 3.868244158348331e-06, |
|
"loss": 0.0705, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3507090073896545, |
|
"grad_norm": 0.7483252051084053, |
|
"learning_rate": 3.862706303320329e-06, |
|
"loss": 0.0879, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.35150788895546237, |
|
"grad_norm": 0.7298703252341586, |
|
"learning_rate": 3.857158918819506e-06, |
|
"loss": 0.0797, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35230677052127024, |
|
"grad_norm": 0.8238714701549442, |
|
"learning_rate": 3.8516020436389945e-06, |
|
"loss": 0.0941, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3531056520870781, |
|
"grad_norm": 0.7126221155492483, |
|
"learning_rate": 3.8460357166383e-06, |
|
"loss": 0.0851, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.35390453365288593, |
|
"grad_norm": 0.8197766203707731, |
|
"learning_rate": 3.840459976743024e-06, |
|
"loss": 0.0957, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3547034152186938, |
|
"grad_norm": 0.7846087618909572, |
|
"learning_rate": 3.834874862944591e-06, |
|
"loss": 0.099, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3555022967845017, |
|
"grad_norm": 0.716374377569519, |
|
"learning_rate": 3.82928041429998e-06, |
|
"loss": 0.0968, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.35630117835030956, |
|
"grad_norm": 0.8390389818745712, |
|
"learning_rate": 3.823676669931448e-06, |
|
"loss": 0.089, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.35710005991611743, |
|
"grad_norm": 0.747973364886596, |
|
"learning_rate": 3.8180636690262565e-06, |
|
"loss": 0.0958, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3578989414819253, |
|
"grad_norm": 0.6950670448192524, |
|
"learning_rate": 3.8124414508364005e-06, |
|
"loss": 0.0802, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3586978230477332, |
|
"grad_norm": 0.7708194029959454, |
|
"learning_rate": 3.8068100546783315e-06, |
|
"loss": 0.0795, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.35949670461354105, |
|
"grad_norm": 0.7493448729691877, |
|
"learning_rate": 3.801169519932681e-06, |
|
"loss": 0.0884, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3602955861793489, |
|
"grad_norm": 0.8085585194492837, |
|
"learning_rate": 3.7955198860439892e-06, |
|
"loss": 0.0981, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3610944677451568, |
|
"grad_norm": 0.7586954756100585, |
|
"learning_rate": 3.789861192520426e-06, |
|
"loss": 0.096, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3618933493109647, |
|
"grad_norm": 0.7843231944695468, |
|
"learning_rate": 3.7841934789335167e-06, |
|
"loss": 0.0896, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3626922308767725, |
|
"grad_norm": 0.7476912899547512, |
|
"learning_rate": 3.778516784917863e-06, |
|
"loss": 0.0787, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.36349111244258037, |
|
"grad_norm": 0.7189383293665426, |
|
"learning_rate": 3.772831150170868e-06, |
|
"loss": 0.0879, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.36428999400838824, |
|
"grad_norm": 0.7132741109475154, |
|
"learning_rate": 3.767136614452458e-06, |
|
"loss": 0.0865, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3650888755741961, |
|
"grad_norm": 0.7493526039741611, |
|
"learning_rate": 3.761433217584803e-06, |
|
"loss": 0.0869, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.365887757140004, |
|
"grad_norm": 0.8337392813152291, |
|
"learning_rate": 3.7557209994520428e-06, |
|
"loss": 0.0908, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.36668663870581186, |
|
"grad_norm": 0.7810036583492975, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.0785, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.36748552027161974, |
|
"grad_norm": 0.8293572960190608, |
|
"learning_rate": 3.7442702592359094e-06, |
|
"loss": 0.0855, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3682844018374276, |
|
"grad_norm": 0.7663051871847771, |
|
"learning_rate": 3.7385318172281314e-06, |
|
"loss": 0.0821, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3690832834032355, |
|
"grad_norm": 0.6698128445071535, |
|
"learning_rate": 3.732784714105876e-06, |
|
"loss": 0.0814, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.36988216496904336, |
|
"grad_norm": 0.741297757096265, |
|
"learning_rate": 3.727028990058921e-06, |
|
"loss": 0.0788, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.37068104653485123, |
|
"grad_norm": 0.8326064497418644, |
|
"learning_rate": 3.7212646853373304e-06, |
|
"loss": 0.0913, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.37147992810065905, |
|
"grad_norm": 0.7519122240289404, |
|
"learning_rate": 3.715491840251172e-06, |
|
"loss": 0.0923, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3722788096664669, |
|
"grad_norm": 0.8495937228845698, |
|
"learning_rate": 3.70971049517024e-06, |
|
"loss": 0.0991, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3730776912322748, |
|
"grad_norm": 0.7980569435136234, |
|
"learning_rate": 3.7039206905237663e-06, |
|
"loss": 0.0846, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3738765727980827, |
|
"grad_norm": 0.82253196121602, |
|
"learning_rate": 3.6981224668001427e-06, |
|
"loss": 0.0907, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.37467545436389055, |
|
"grad_norm": 0.7813567054207068, |
|
"learning_rate": 3.692315864546635e-06, |
|
"loss": 0.0954, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3754743359296984, |
|
"grad_norm": 0.6900644306541571, |
|
"learning_rate": 3.6865009243691015e-06, |
|
"loss": 0.0852, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3762732174955063, |
|
"grad_norm": 0.8286216266975185, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"loss": 0.0971, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.37707209906131417, |
|
"grad_norm": 0.8411427447979212, |
|
"learning_rate": 3.6748461929566405e-06, |
|
"loss": 0.0976, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.37787098062712204, |
|
"grad_norm": 0.7097804298197564, |
|
"learning_rate": 3.6690064832238287e-06, |
|
"loss": 0.084, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3786698621929299, |
|
"grad_norm": 0.6997311886122694, |
|
"learning_rate": 3.663158598570652e-06, |
|
"loss": 0.0879, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.3794687437587378, |
|
"grad_norm": 0.7892931222413458, |
|
"learning_rate": 3.6573025798916566e-06, |
|
"loss": 0.0858, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3802676253245456, |
|
"grad_norm": 0.7881856449766588, |
|
"learning_rate": 3.6514384681382736e-06, |
|
"loss": 0.0952, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.3810665068903535, |
|
"grad_norm": 0.7870138639216214, |
|
"learning_rate": 3.6455663043185264e-06, |
|
"loss": 0.1006, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.38186538845616136, |
|
"grad_norm": 0.7825807381829545, |
|
"learning_rate": 3.639686129496749e-06, |
|
"loss": 0.0831, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.38266427002196923, |
|
"grad_norm": 0.8412645195997293, |
|
"learning_rate": 3.6337979847932948e-06, |
|
"loss": 0.0878, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3834631515877771, |
|
"grad_norm": 0.7619513322856116, |
|
"learning_rate": 3.627901911384252e-06, |
|
"loss": 0.0857, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.384262033153585, |
|
"grad_norm": 0.920197415542654, |
|
"learning_rate": 3.621997950501156e-06, |
|
"loss": 0.0923, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.38506091471939285, |
|
"grad_norm": 0.9089054249688453, |
|
"learning_rate": 3.616086143430697e-06, |
|
"loss": 0.0831, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.38585979628520073, |
|
"grad_norm": 0.8882827945787267, |
|
"learning_rate": 3.6101665315144357e-06, |
|
"loss": 0.0879, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3866586778510086, |
|
"grad_norm": 0.853626379217836, |
|
"learning_rate": 3.604239156148512e-06, |
|
"loss": 0.0856, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.3874575594168165, |
|
"grad_norm": 0.7468567313834733, |
|
"learning_rate": 3.598304058783357e-06, |
|
"loss": 0.0919, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.38825644098262435, |
|
"grad_norm": 0.8236774995238476, |
|
"learning_rate": 3.5923612809233987e-06, |
|
"loss": 0.0958, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.38905532254843217, |
|
"grad_norm": 0.8150930399340047, |
|
"learning_rate": 3.5864108641267815e-06, |
|
"loss": 0.089, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.38985420411424004, |
|
"grad_norm": 0.7645579281892109, |
|
"learning_rate": 3.580452850005061e-06, |
|
"loss": 0.1047, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3906530856800479, |
|
"grad_norm": 0.8299900496448547, |
|
"learning_rate": 3.5744872802229296e-06, |
|
"loss": 0.088, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3914519672458558, |
|
"grad_norm": 0.8345475593778248, |
|
"learning_rate": 3.56851419649791e-06, |
|
"loss": 0.0878, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.39225084881166367, |
|
"grad_norm": 0.7331030137600181, |
|
"learning_rate": 3.5625336406000752e-06, |
|
"loss": 0.0873, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.39304973037747154, |
|
"grad_norm": 0.751621872360305, |
|
"learning_rate": 3.556545654351749e-06, |
|
"loss": 0.0885, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3938486119432794, |
|
"grad_norm": 1.0573113079653964, |
|
"learning_rate": 3.5505502796272157e-06, |
|
"loss": 0.0967, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3946474935090873, |
|
"grad_norm": 0.8576970399627939, |
|
"learning_rate": 3.5445475583524293e-06, |
|
"loss": 0.0929, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.39544637507489516, |
|
"grad_norm": 0.7726071680089382, |
|
"learning_rate": 3.5385375325047167e-06, |
|
"loss": 0.0861, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.39624525664070304, |
|
"grad_norm": 0.8365490675498872, |
|
"learning_rate": 3.5325202441124875e-06, |
|
"loss": 0.085, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3970441382065109, |
|
"grad_norm": 0.8195417566783639, |
|
"learning_rate": 3.5264957352549378e-06, |
|
"loss": 0.0978, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.39784301977231873, |
|
"grad_norm": 0.787890923867209, |
|
"learning_rate": 3.520464048061758e-06, |
|
"loss": 0.0958, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3986419013381266, |
|
"grad_norm": 0.7294064863532496, |
|
"learning_rate": 3.514425224712835e-06, |
|
"loss": 0.0768, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3994407829039345, |
|
"grad_norm": 0.8964503937639478, |
|
"learning_rate": 3.5083793074379607e-06, |
|
"loss": 0.0779, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3994407829039345, |
|
"eval_loss": 0.08617319911718369, |
|
"eval_runtime": 16.1068, |
|
"eval_samples_per_second": 50.289, |
|
"eval_steps_per_second": 6.333, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.40023966446974235, |
|
"grad_norm": 0.699956224503503, |
|
"learning_rate": 3.5023263385165346e-06, |
|
"loss": 0.0782, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.4010385460355502, |
|
"grad_norm": 0.8000522503429608, |
|
"learning_rate": 3.496266360277269e-06, |
|
"loss": 0.0823, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4018374276013581, |
|
"grad_norm": 0.8310072491457343, |
|
"learning_rate": 3.4901994150978926e-06, |
|
"loss": 0.087, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.402636309167166, |
|
"grad_norm": 0.7698651148649212, |
|
"learning_rate": 3.484125545404854e-06, |
|
"loss": 0.0776, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.40343519073297385, |
|
"grad_norm": 0.6925316210594149, |
|
"learning_rate": 3.478044793673025e-06, |
|
"loss": 0.071, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4042340722987817, |
|
"grad_norm": 0.8646957127737731, |
|
"learning_rate": 3.4719572024254057e-06, |
|
"loss": 0.0854, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4050329538645896, |
|
"grad_norm": 0.8538481314053985, |
|
"learning_rate": 3.4658628142328215e-06, |
|
"loss": 0.0876, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.40583183543039747, |
|
"grad_norm": 0.6801192727986217, |
|
"learning_rate": 3.4597616717136344e-06, |
|
"loss": 0.0871, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4066307169962053, |
|
"grad_norm": 0.7685526683593954, |
|
"learning_rate": 3.453653817533435e-06, |
|
"loss": 0.0786, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.40742959856201316, |
|
"grad_norm": 0.898757778816308, |
|
"learning_rate": 3.4475392944047514e-06, |
|
"loss": 0.0957, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.40822848012782104, |
|
"grad_norm": 0.6611162518031449, |
|
"learning_rate": 3.4414181450867466e-06, |
|
"loss": 0.073, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4090273616936289, |
|
"grad_norm": 0.727245696789342, |
|
"learning_rate": 3.435290412384924e-06, |
|
"loss": 0.0893, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4098262432594368, |
|
"grad_norm": 0.8581774504750863, |
|
"learning_rate": 3.429156139150819e-06, |
|
"loss": 0.0961, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.41062512482524466, |
|
"grad_norm": 0.8327009556188208, |
|
"learning_rate": 3.4230153682817112e-06, |
|
"loss": 0.0868, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.41142400639105253, |
|
"grad_norm": 0.7252986562598082, |
|
"learning_rate": 3.416868142720316e-06, |
|
"loss": 0.0885, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4122228879568604, |
|
"grad_norm": 0.7778675249904831, |
|
"learning_rate": 3.410714505454486e-06, |
|
"loss": 0.0856, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4130217695226683, |
|
"grad_norm": 0.9287656547088576, |
|
"learning_rate": 3.4045544995169126e-06, |
|
"loss": 0.0904, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.41382065108847615, |
|
"grad_norm": 0.6975552514483802, |
|
"learning_rate": 3.398388167984823e-06, |
|
"loss": 0.0883, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.41461953265428403, |
|
"grad_norm": 0.7566168004464808, |
|
"learning_rate": 3.39221555397968e-06, |
|
"loss": 0.0876, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.41541841422009185, |
|
"grad_norm": 0.8230391216164356, |
|
"learning_rate": 3.386036700666879e-06, |
|
"loss": 0.0781, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4162172957858997, |
|
"grad_norm": 0.825174450398779, |
|
"learning_rate": 3.379851651255449e-06, |
|
"loss": 0.0899, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4170161773517076, |
|
"grad_norm": 0.7049547674213612, |
|
"learning_rate": 3.3736604489977465e-06, |
|
"loss": 0.0813, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.41781505891751547, |
|
"grad_norm": 0.7945037941643481, |
|
"learning_rate": 3.3674631371891564e-06, |
|
"loss": 0.0721, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.41861394048332334, |
|
"grad_norm": 0.728321463459383, |
|
"learning_rate": 3.361259759167788e-06, |
|
"loss": 0.0957, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4194128220491312, |
|
"grad_norm": 0.7250715129014192, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"loss": 0.0727, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4202117036149391, |
|
"grad_norm": 0.7777154233383422, |
|
"learning_rate": 3.348834978050957e-06, |
|
"loss": 0.0848, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.42101058518074697, |
|
"grad_norm": 0.7030006532784772, |
|
"learning_rate": 3.3426136618426045e-06, |
|
"loss": 0.0806, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.42180946674655484, |
|
"grad_norm": 0.7258714063056894, |
|
"learning_rate": 3.3363864531950884e-06, |
|
"loss": 0.0798, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4226083483123627, |
|
"grad_norm": 0.7687609634365744, |
|
"learning_rate": 3.3301533956555886e-06, |
|
"loss": 0.0951, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.4234072298781706, |
|
"grad_norm": 0.7146960561827889, |
|
"learning_rate": 3.323914532812184e-06, |
|
"loss": 0.0828, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4242061114439784, |
|
"grad_norm": 0.8026753326015152, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"loss": 0.0876, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4250049930097863, |
|
"grad_norm": 0.7107423975805593, |
|
"learning_rate": 3.311419565768667e-06, |
|
"loss": 0.0753, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.42580387457559415, |
|
"grad_norm": 0.7112921804432786, |
|
"learning_rate": 3.3051635489464793e-06, |
|
"loss": 0.083, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.42660275614140203, |
|
"grad_norm": 0.7737378542138326, |
|
"learning_rate": 3.2989019015756253e-06, |
|
"loss": 0.0932, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4274016377072099, |
|
"grad_norm": 0.7077901777401819, |
|
"learning_rate": 3.2926346674441173e-06, |
|
"loss": 0.0785, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4282005192730178, |
|
"grad_norm": 0.7856747325005221, |
|
"learning_rate": 3.2863618903790346e-06, |
|
"loss": 0.095, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.42899940083882565, |
|
"grad_norm": 0.6681798933516729, |
|
"learning_rate": 3.280083614246218e-06, |
|
"loss": 0.077, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.4297982824046335, |
|
"grad_norm": 0.7425470556630313, |
|
"learning_rate": 3.2737998829499645e-06, |
|
"loss": 0.0843, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4305971639704414, |
|
"grad_norm": 0.7984293642051749, |
|
"learning_rate": 3.2675107404327195e-06, |
|
"loss": 0.0949, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.4313960455362493, |
|
"grad_norm": 0.8364406080946829, |
|
"learning_rate": 3.261216230674768e-06, |
|
"loss": 0.0914, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.43219492710205715, |
|
"grad_norm": 0.7017424910882903, |
|
"learning_rate": 3.2549163976939292e-06, |
|
"loss": 0.0841, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.43299380866786497, |
|
"grad_norm": 0.6971449842375312, |
|
"learning_rate": 3.2486112855452485e-06, |
|
"loss": 0.0797, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.43379269023367284, |
|
"grad_norm": 0.7830009514828633, |
|
"learning_rate": 3.2423009383206876e-06, |
|
"loss": 0.0933, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4345915717994807, |
|
"grad_norm": 0.7647997750857403, |
|
"learning_rate": 3.2359854001488178e-06, |
|
"loss": 0.083, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.4353904533652886, |
|
"grad_norm": 0.6895639349115328, |
|
"learning_rate": 3.2296647151945116e-06, |
|
"loss": 0.0767, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.43618933493109646, |
|
"grad_norm": 0.7409218395782419, |
|
"learning_rate": 3.2233389276586325e-06, |
|
"loss": 0.0802, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.43698821649690434, |
|
"grad_norm": 0.7255590501259186, |
|
"learning_rate": 3.217008081777726e-06, |
|
"loss": 0.0824, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.4377870980627122, |
|
"grad_norm": 0.7111795815121589, |
|
"learning_rate": 3.2106722218237124e-06, |
|
"loss": 0.0822, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.4385859796285201, |
|
"grad_norm": 0.7793608395589445, |
|
"learning_rate": 3.2043313921035747e-06, |
|
"loss": 0.0883, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.43938486119432796, |
|
"grad_norm": 0.7229437869187035, |
|
"learning_rate": 3.19798563695905e-06, |
|
"loss": 0.0791, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.44018374276013583, |
|
"grad_norm": 0.7477705839069457, |
|
"learning_rate": 3.191635000766318e-06, |
|
"loss": 0.0851, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4409826243259437, |
|
"grad_norm": 0.8331293508094471, |
|
"learning_rate": 3.1852795279356946e-06, |
|
"loss": 0.1016, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4417815058917515, |
|
"grad_norm": 0.7681413958594786, |
|
"learning_rate": 3.1789192629113147e-06, |
|
"loss": 0.0875, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.4425803874575594, |
|
"grad_norm": 0.7256511341858173, |
|
"learning_rate": 3.1725542501708302e-06, |
|
"loss": 0.0835, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4433792690233673, |
|
"grad_norm": 0.6987031139534794, |
|
"learning_rate": 3.1661845342250874e-06, |
|
"loss": 0.0832, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.44417815058917515, |
|
"grad_norm": 0.6784474789881185, |
|
"learning_rate": 3.159810159617829e-06, |
|
"loss": 0.0783, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.444977032154983, |
|
"grad_norm": 0.7254284540533688, |
|
"learning_rate": 3.153431170925373e-06, |
|
"loss": 0.0894, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4457759137207909, |
|
"grad_norm": 0.787126427068804, |
|
"learning_rate": 3.147047612756302e-06, |
|
"loss": 0.0896, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.44657479528659877, |
|
"grad_norm": 0.7412461593538884, |
|
"learning_rate": 3.1406595297511568e-06, |
|
"loss": 0.0843, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.44737367685240664, |
|
"grad_norm": 0.7092573353441867, |
|
"learning_rate": 3.1342669665821183e-06, |
|
"loss": 0.087, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4481725584182145, |
|
"grad_norm": 0.7691676535522879, |
|
"learning_rate": 3.127869967952698e-06, |
|
"loss": 0.0955, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4489714399840224, |
|
"grad_norm": 0.6881651194944849, |
|
"learning_rate": 3.1214685785974253e-06, |
|
"loss": 0.079, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.44977032154983027, |
|
"grad_norm": 0.7008388621611868, |
|
"learning_rate": 3.115062843281534e-06, |
|
"loss": 0.0884, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4505692031156381, |
|
"grad_norm": 0.6735736908138019, |
|
"learning_rate": 3.108652806800648e-06, |
|
"loss": 0.0706, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.45136808468144596, |
|
"grad_norm": 0.7315110483129635, |
|
"learning_rate": 3.102238513980471e-06, |
|
"loss": 0.0904, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.45216696624725383, |
|
"grad_norm": 0.7103583696801924, |
|
"learning_rate": 3.095820009676471e-06, |
|
"loss": 0.0846, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4529658478130617, |
|
"grad_norm": 0.7891761809600559, |
|
"learning_rate": 3.089397338773569e-06, |
|
"loss": 0.0878, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4537647293788696, |
|
"grad_norm": 0.7306565027752118, |
|
"learning_rate": 3.0829705461858183e-06, |
|
"loss": 0.0912, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.45456361094467745, |
|
"grad_norm": 0.6606967844352585, |
|
"learning_rate": 3.0765396768561005e-06, |
|
"loss": 0.0825, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.45536249251048533, |
|
"grad_norm": 0.7038331346600363, |
|
"learning_rate": 3.0701047757558046e-06, |
|
"loss": 0.0789, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4561613740762932, |
|
"grad_norm": 0.7487353386872159, |
|
"learning_rate": 3.0636658878845116e-06, |
|
"loss": 0.0764, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4569602556421011, |
|
"grad_norm": 0.8082694931973815, |
|
"learning_rate": 3.0572230582696844e-06, |
|
"loss": 0.0902, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.45775913720790895, |
|
"grad_norm": 0.6660374363774814, |
|
"learning_rate": 3.050776331966352e-06, |
|
"loss": 0.0666, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.45855801877371677, |
|
"grad_norm": 0.7753707402424571, |
|
"learning_rate": 3.0443257540567896e-06, |
|
"loss": 0.0831, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.45935690033952464, |
|
"grad_norm": 0.7653911797345625, |
|
"learning_rate": 3.03787136965021e-06, |
|
"loss": 0.0811, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4601557819053325, |
|
"grad_norm": 0.7319992183728828, |
|
"learning_rate": 3.0314132238824416e-06, |
|
"loss": 0.0846, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4609546634711404, |
|
"grad_norm": 0.7956127905075492, |
|
"learning_rate": 3.0249513619156213e-06, |
|
"loss": 0.0893, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.46175354503694827, |
|
"grad_norm": 0.7781976763250263, |
|
"learning_rate": 3.018485828937868e-06, |
|
"loss": 0.081, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.46255242660275614, |
|
"grad_norm": 0.7686793219032871, |
|
"learning_rate": 3.012016670162977e-06, |
|
"loss": 0.0821, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.463351308168564, |
|
"grad_norm": 0.698093145167194, |
|
"learning_rate": 3.0055439308300954e-06, |
|
"loss": 0.0865, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4641501897343719, |
|
"grad_norm": 0.7476901736827252, |
|
"learning_rate": 2.9990676562034105e-06, |
|
"loss": 0.0815, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.46494907130017976, |
|
"grad_norm": 0.8062220941858489, |
|
"learning_rate": 2.992587891571833e-06, |
|
"loss": 0.0884, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.46574795286598764, |
|
"grad_norm": 0.743755942738479, |
|
"learning_rate": 2.9861046822486774e-06, |
|
"loss": 0.0796, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4665468344317955, |
|
"grad_norm": 0.7501077059752908, |
|
"learning_rate": 2.9796180735713505e-06, |
|
"loss": 0.0856, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.46734571599760333, |
|
"grad_norm": 0.7429044140887799, |
|
"learning_rate": 2.973128110901026e-06, |
|
"loss": 0.0869, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4681445975634112, |
|
"grad_norm": 0.7755885874852674, |
|
"learning_rate": 2.9666348396223367e-06, |
|
"loss": 0.0959, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4689434791292191, |
|
"grad_norm": 0.6801456079207552, |
|
"learning_rate": 2.960138305143051e-06, |
|
"loss": 0.0753, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.46974236069502695, |
|
"grad_norm": 0.6757854650456917, |
|
"learning_rate": 2.9536385528937566e-06, |
|
"loss": 0.0715, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4705412422608348, |
|
"grad_norm": 0.6902680146186407, |
|
"learning_rate": 2.9471356283275444e-06, |
|
"loss": 0.0751, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.4713401238266427, |
|
"grad_norm": 0.7534035340349238, |
|
"learning_rate": 2.9406295769196868e-06, |
|
"loss": 0.0886, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4721390053924506, |
|
"grad_norm": 0.7774300987221902, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"loss": 0.083, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.47293788695825845, |
|
"grad_norm": 0.6527035296264435, |
|
"learning_rate": 2.92760827558915e-06, |
|
"loss": 0.0702, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.4737367685240663, |
|
"grad_norm": 0.783007220913397, |
|
"learning_rate": 2.9210931167250766e-06, |
|
"loss": 0.0833, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.4745356500898742, |
|
"grad_norm": 0.7250650434550187, |
|
"learning_rate": 2.9145750131359356e-06, |
|
"loss": 0.0887, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.47533453165568207, |
|
"grad_norm": 0.769904889862114, |
|
"learning_rate": 2.9080540104031487e-06, |
|
"loss": 0.0839, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4761334132214899, |
|
"grad_norm": 0.7189156153496629, |
|
"learning_rate": 2.901530154128412e-06, |
|
"loss": 0.0844, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.47693229478729776, |
|
"grad_norm": 0.7161425909317404, |
|
"learning_rate": 2.895003489933375e-06, |
|
"loss": 0.0723, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.47773117635310564, |
|
"grad_norm": 0.708947660281964, |
|
"learning_rate": 2.888474063459326e-06, |
|
"loss": 0.081, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.4785300579189135, |
|
"grad_norm": 0.7377958722078087, |
|
"learning_rate": 2.881941920366868e-06, |
|
"loss": 0.0818, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.4793289394847214, |
|
"grad_norm": 0.6998439723966013, |
|
"learning_rate": 2.875407106335601e-06, |
|
"loss": 0.0908, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48012782105052926, |
|
"grad_norm": 0.7234747480385919, |
|
"learning_rate": 2.8688696670638057e-06, |
|
"loss": 0.0893, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.48092670261633713, |
|
"grad_norm": 0.6802799089422156, |
|
"learning_rate": 2.862329648268117e-06, |
|
"loss": 0.0741, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.481725584182145, |
|
"grad_norm": 0.7529275025549153, |
|
"learning_rate": 2.8557870956832135e-06, |
|
"loss": 0.0889, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4825244657479529, |
|
"grad_norm": 0.7184604651026569, |
|
"learning_rate": 2.849242055061488e-06, |
|
"loss": 0.0867, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.48332334731376075, |
|
"grad_norm": 0.6581820285749856, |
|
"learning_rate": 2.842694572172737e-06, |
|
"loss": 0.0741, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.48412222887956863, |
|
"grad_norm": 0.7922487604497372, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"loss": 0.0852, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.48492111044537645, |
|
"grad_norm": 0.7531752613506526, |
|
"learning_rate": 2.829592462758401e-06, |
|
"loss": 0.0777, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4857199920111843, |
|
"grad_norm": 0.8326963935548872, |
|
"learning_rate": 2.8230379278565197e-06, |
|
"loss": 0.0805, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.4865188735769922, |
|
"grad_norm": 0.8237145760032943, |
|
"learning_rate": 2.8164811339343736e-06, |
|
"loss": 0.0856, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.48731775514280007, |
|
"grad_norm": 0.7065915753904118, |
|
"learning_rate": 2.809922126843948e-06, |
|
"loss": 0.0853, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.48811663670860794, |
|
"grad_norm": 0.736514710504635, |
|
"learning_rate": 2.803360952452705e-06, |
|
"loss": 0.0723, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.4889155182744158, |
|
"grad_norm": 0.7872810616902329, |
|
"learning_rate": 2.796797656643263e-06, |
|
"loss": 0.0821, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4897143998402237, |
|
"grad_norm": 0.7936471445453519, |
|
"learning_rate": 2.7902322853130758e-06, |
|
"loss": 0.0796, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.49051328140603156, |
|
"grad_norm": 0.7465503003939695, |
|
"learning_rate": 2.7836648843741105e-06, |
|
"loss": 0.0918, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.49131216297183944, |
|
"grad_norm": 0.7549873274087162, |
|
"learning_rate": 2.7770954997525277e-06, |
|
"loss": 0.0791, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4921110445376473, |
|
"grad_norm": 0.7330346635199269, |
|
"learning_rate": 2.7705241773883607e-06, |
|
"loss": 0.0775, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4929099261034552, |
|
"grad_norm": 0.7262078474613537, |
|
"learning_rate": 2.763950963235193e-06, |
|
"loss": 0.0787, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.493708807669263, |
|
"grad_norm": 0.6433172738294531, |
|
"learning_rate": 2.7573759032598367e-06, |
|
"loss": 0.0736, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4945076892350709, |
|
"grad_norm": 0.7007070811492951, |
|
"learning_rate": 2.7507990434420127e-06, |
|
"loss": 0.0845, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.49530657080087875, |
|
"grad_norm": 0.7649670953818944, |
|
"learning_rate": 2.7442204297740295e-06, |
|
"loss": 0.0851, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.49610545236668663, |
|
"grad_norm": 0.7312713852144791, |
|
"learning_rate": 2.7376401082604563e-06, |
|
"loss": 0.0846, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4969043339324945, |
|
"grad_norm": 0.7290141779950609, |
|
"learning_rate": 2.731058124917812e-06, |
|
"loss": 0.0808, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4977032154983024, |
|
"grad_norm": 0.704482891496262, |
|
"learning_rate": 2.7244745257742293e-06, |
|
"loss": 0.073, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.49850209706411025, |
|
"grad_norm": 0.8613003123017297, |
|
"learning_rate": 2.717889356869146e-06, |
|
"loss": 0.0931, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4993009786299181, |
|
"grad_norm": 0.6828247716612095, |
|
"learning_rate": 2.7113026642529733e-06, |
|
"loss": 0.0844, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5000998601957259, |
|
"grad_norm": 0.6921552059802246, |
|
"learning_rate": 2.704714493986782e-06, |
|
"loss": 0.0759, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5008987417615338, |
|
"grad_norm": 0.7382737232554466, |
|
"learning_rate": 2.6981248921419713e-06, |
|
"loss": 0.0834, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5016976233273417, |
|
"grad_norm": 0.7643514787883916, |
|
"learning_rate": 2.6915339047999563e-06, |
|
"loss": 0.0808, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5024965048931496, |
|
"grad_norm": 0.8171733325574314, |
|
"learning_rate": 2.684941578051836e-06, |
|
"loss": 0.0744, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5032953864589574, |
|
"grad_norm": 0.6993831939859319, |
|
"learning_rate": 2.678347957998081e-06, |
|
"loss": 0.0776, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5040942680247653, |
|
"grad_norm": 0.7118175146789436, |
|
"learning_rate": 2.6717530907482027e-06, |
|
"loss": 0.0733, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5048931495905732, |
|
"grad_norm": 0.7288191153869615, |
|
"learning_rate": 2.6651570224204355e-06, |
|
"loss": 0.0815, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5056920311563811, |
|
"grad_norm": 0.713314556630966, |
|
"learning_rate": 2.6585597991414115e-06, |
|
"loss": 0.076, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5064909127221889, |
|
"grad_norm": 0.7568168430439096, |
|
"learning_rate": 2.651961467045842e-06, |
|
"loss": 0.0912, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5072897942879968, |
|
"grad_norm": 0.7245241109183076, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"loss": 0.0737, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5080886758538047, |
|
"grad_norm": 0.7262894883310934, |
|
"learning_rate": 2.6387616609823506e-06, |
|
"loss": 0.0761, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5088875574196126, |
|
"grad_norm": 0.6999036722622294, |
|
"learning_rate": 2.6321602793213287e-06, |
|
"loss": 0.0741, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5096864389854204, |
|
"grad_norm": 0.6976916656678082, |
|
"learning_rate": 2.625557973456913e-06, |
|
"loss": 0.0667, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5104853205512283, |
|
"grad_norm": 0.6808266325469274, |
|
"learning_rate": 2.6189547895593565e-06, |
|
"loss": 0.0703, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5112842021170362, |
|
"grad_norm": 0.7465421794886118, |
|
"learning_rate": 2.6123507738050513e-06, |
|
"loss": 0.0758, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.512083083682844, |
|
"grad_norm": 0.6816895308085987, |
|
"learning_rate": 2.6057459723762078e-06, |
|
"loss": 0.0834, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5128819652486519, |
|
"grad_norm": 0.7286471926630991, |
|
"learning_rate": 2.599140431460531e-06, |
|
"loss": 0.0792, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5136808468144598, |
|
"grad_norm": 0.7546333240801898, |
|
"learning_rate": 2.5925341972508954e-06, |
|
"loss": 0.0796, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5144797283802677, |
|
"grad_norm": 0.7683040441468793, |
|
"learning_rate": 2.5859273159450247e-06, |
|
"loss": 0.0736, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5152786099460755, |
|
"grad_norm": 0.7653812473026139, |
|
"learning_rate": 2.57931983374517e-06, |
|
"loss": 0.0802, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5160774915118833, |
|
"grad_norm": 0.7601939158747375, |
|
"learning_rate": 2.572711796857779e-06, |
|
"loss": 0.072, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5168763730776912, |
|
"grad_norm": 0.7195969610874816, |
|
"learning_rate": 2.566103251493184e-06, |
|
"loss": 0.0859, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5176752546434991, |
|
"grad_norm": 0.7884083401959953, |
|
"learning_rate": 2.5594942438652685e-06, |
|
"loss": 0.0761, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5184741362093069, |
|
"grad_norm": 0.6291975339991476, |
|
"learning_rate": 2.5528848201911543e-06, |
|
"loss": 0.0739, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5192730177751148, |
|
"grad_norm": 0.6813656053055375, |
|
"learning_rate": 2.5462750266908667e-06, |
|
"loss": 0.0712, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5200718993409227, |
|
"grad_norm": 0.710394947419851, |
|
"learning_rate": 2.53966490958702e-06, |
|
"loss": 0.0831, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5208707809067306, |
|
"grad_norm": 0.7481196303136334, |
|
"learning_rate": 2.5330545151044923e-06, |
|
"loss": 0.0774, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5216696624725384, |
|
"grad_norm": 0.7096377432791056, |
|
"learning_rate": 2.5264438894700994e-06, |
|
"loss": 0.0811, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5224685440383463, |
|
"grad_norm": 0.820591327390357, |
|
"learning_rate": 2.5198330789122743e-06, |
|
"loss": 0.0844, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5232674256041542, |
|
"grad_norm": 0.7737582631833925, |
|
"learning_rate": 2.5132221296607446e-06, |
|
"loss": 0.0777, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.524066307169962, |
|
"grad_norm": 0.7251927679308194, |
|
"learning_rate": 2.5066110879462056e-06, |
|
"loss": 0.0813, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5248651887357699, |
|
"grad_norm": 0.7687650559033102, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0756, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5256640703015778, |
|
"grad_norm": 0.6769216177565233, |
|
"learning_rate": 2.493388912053795e-06, |
|
"loss": 0.0688, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5264629518673857, |
|
"grad_norm": 0.7872330702674987, |
|
"learning_rate": 2.486777870339256e-06, |
|
"loss": 0.0823, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5272618334331935, |
|
"grad_norm": 0.6870673717044596, |
|
"learning_rate": 2.4801669210877265e-06, |
|
"loss": 0.0768, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5280607149990014, |
|
"grad_norm": 0.7150216139915514, |
|
"learning_rate": 2.4735561105299014e-06, |
|
"loss": 0.0797, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5288595965648093, |
|
"grad_norm": 0.6773205786007382, |
|
"learning_rate": 2.466945484895509e-06, |
|
"loss": 0.074, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5296584781306172, |
|
"grad_norm": 0.735174296317646, |
|
"learning_rate": 2.4603350904129802e-06, |
|
"loss": 0.0793, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.530457359696425, |
|
"grad_norm": 0.7042052228414556, |
|
"learning_rate": 2.453724973309134e-06, |
|
"loss": 0.0752, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5312562412622329, |
|
"grad_norm": 0.7035863201389916, |
|
"learning_rate": 2.4471151798088465e-06, |
|
"loss": 0.0792, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5320551228280408, |
|
"grad_norm": 0.6908163217373349, |
|
"learning_rate": 2.440505756134732e-06, |
|
"loss": 0.0834, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5328540043938486, |
|
"grad_norm": 0.6998637701979498, |
|
"learning_rate": 2.433896748506817e-06, |
|
"loss": 0.076, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5336528859596564, |
|
"grad_norm": 0.6806996354305866, |
|
"learning_rate": 2.4272882031422216e-06, |
|
"loss": 0.0737, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5344517675254643, |
|
"grad_norm": 0.7690510059537475, |
|
"learning_rate": 2.4206801662548314e-06, |
|
"loss": 0.082, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5352506490912722, |
|
"grad_norm": 0.7280524380434941, |
|
"learning_rate": 2.4140726840549757e-06, |
|
"loss": 0.0775, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.53604953065708, |
|
"grad_norm": 0.7265244259182214, |
|
"learning_rate": 2.407465802749105e-06, |
|
"loss": 0.0872, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5368484122228879, |
|
"grad_norm": 0.7274938352887984, |
|
"learning_rate": 2.4008595685394694e-06, |
|
"loss": 0.0734, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5376472937886958, |
|
"grad_norm": 0.715787901834074, |
|
"learning_rate": 2.3942540276237926e-06, |
|
"loss": 0.0752, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5384461753545037, |
|
"grad_norm": 0.7044859562275388, |
|
"learning_rate": 2.38764922619495e-06, |
|
"loss": 0.0757, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5392450569203115, |
|
"grad_norm": 0.7252744856596492, |
|
"learning_rate": 2.3810452104406444e-06, |
|
"loss": 0.0867, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5400439384861194, |
|
"grad_norm": 0.7614651844151401, |
|
"learning_rate": 2.3744420265430877e-06, |
|
"loss": 0.0764, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5408428200519273, |
|
"grad_norm": 0.6892639381460605, |
|
"learning_rate": 2.3678397206786717e-06, |
|
"loss": 0.0803, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5416417016177352, |
|
"grad_norm": 0.7211223831734932, |
|
"learning_rate": 2.3612383390176503e-06, |
|
"loss": 0.0731, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.542440583183543, |
|
"grad_norm": 0.7664217487678445, |
|
"learning_rate": 2.3546379277238107e-06, |
|
"loss": 0.0846, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5432394647493509, |
|
"grad_norm": 0.7047009823702027, |
|
"learning_rate": 2.3480385329541587e-06, |
|
"loss": 0.0799, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5440383463151588, |
|
"grad_norm": 0.6821519058187036, |
|
"learning_rate": 2.341440200858589e-06, |
|
"loss": 0.0836, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5448372278809667, |
|
"grad_norm": 0.7121462307791131, |
|
"learning_rate": 2.334842977579565e-06, |
|
"loss": 0.0769, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5456361094467745, |
|
"grad_norm": 0.6546815768203035, |
|
"learning_rate": 2.3282469092517977e-06, |
|
"loss": 0.0735, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5464349910125824, |
|
"grad_norm": 0.6652984324823661, |
|
"learning_rate": 2.3216520420019194e-06, |
|
"loss": 0.0631, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5472338725783903, |
|
"grad_norm": 0.6826190647506108, |
|
"learning_rate": 2.3150584219481644e-06, |
|
"loss": 0.0772, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5480327541441982, |
|
"grad_norm": 0.7082080750495795, |
|
"learning_rate": 2.3084660952000446e-06, |
|
"loss": 0.083, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.548831635710006, |
|
"grad_norm": 0.6804378348146185, |
|
"learning_rate": 2.3018751078580287e-06, |
|
"loss": 0.0764, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5496305172758139, |
|
"grad_norm": 0.6949030543943242, |
|
"learning_rate": 2.2952855060132192e-06, |
|
"loss": 0.0736, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5504293988416217, |
|
"grad_norm": 0.767211454774038, |
|
"learning_rate": 2.288697335747027e-06, |
|
"loss": 0.0782, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5512282804074295, |
|
"grad_norm": 0.6859601195405323, |
|
"learning_rate": 2.2821106431308546e-06, |
|
"loss": 0.0674, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5520271619732374, |
|
"grad_norm": 0.857452027423427, |
|
"learning_rate": 2.275525474225771e-06, |
|
"loss": 0.0913, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5528260435390453, |
|
"grad_norm": 0.7934337014749047, |
|
"learning_rate": 2.2689418750821893e-06, |
|
"loss": 0.0801, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5536249251048532, |
|
"grad_norm": 0.7855467335399277, |
|
"learning_rate": 2.262359891739544e-06, |
|
"loss": 0.0827, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.554423806670661, |
|
"grad_norm": 0.7328485240790014, |
|
"learning_rate": 2.2557795702259717e-06, |
|
"loss": 0.0757, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.5552226882364689, |
|
"grad_norm": 0.7502799227901668, |
|
"learning_rate": 2.2492009565579877e-06, |
|
"loss": 0.0798, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5560215698022768, |
|
"grad_norm": 0.6942830722487661, |
|
"learning_rate": 2.242624096740164e-06, |
|
"loss": 0.0774, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5568204513680847, |
|
"grad_norm": 0.6930784219010419, |
|
"learning_rate": 2.2360490367648083e-06, |
|
"loss": 0.0782, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5576193329338925, |
|
"grad_norm": 0.7163999480603681, |
|
"learning_rate": 2.2294758226116397e-06, |
|
"loss": 0.0798, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5584182144997004, |
|
"grad_norm": 0.734750686150335, |
|
"learning_rate": 2.2229045002474727e-06, |
|
"loss": 0.0796, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5592170960655083, |
|
"grad_norm": 0.766514584513075, |
|
"learning_rate": 2.21633511562589e-06, |
|
"loss": 0.0798, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5600159776313162, |
|
"grad_norm": 0.68052355751497, |
|
"learning_rate": 2.2097677146869242e-06, |
|
"loss": 0.0775, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.560814859197124, |
|
"grad_norm": 0.7544498594904364, |
|
"learning_rate": 2.2032023433567377e-06, |
|
"loss": 0.0842, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5616137407629319, |
|
"grad_norm": 0.7664316105448243, |
|
"learning_rate": 2.1966390475472957e-06, |
|
"loss": 0.0776, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5624126223287398, |
|
"grad_norm": 0.7004685245151888, |
|
"learning_rate": 2.190077873156053e-06, |
|
"loss": 0.0736, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5632115038945477, |
|
"grad_norm": 0.6859491595173901, |
|
"learning_rate": 2.183518866065627e-06, |
|
"loss": 0.0715, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5640103854603555, |
|
"grad_norm": 0.7118232103440043, |
|
"learning_rate": 2.1769620721434816e-06, |
|
"loss": 0.0708, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5648092670261634, |
|
"grad_norm": 0.748576065988089, |
|
"learning_rate": 2.1704075372415993e-06, |
|
"loss": 0.0722, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5656081485919713, |
|
"grad_norm": 0.8434443127714014, |
|
"learning_rate": 2.1638553071961706e-06, |
|
"loss": 0.0786, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5664070301577792, |
|
"grad_norm": 0.7026564190808876, |
|
"learning_rate": 2.157305427827264e-06, |
|
"loss": 0.0736, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.567205911723587, |
|
"grad_norm": 0.7639438616377601, |
|
"learning_rate": 2.1507579449385122e-06, |
|
"loss": 0.0838, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5680047932893948, |
|
"grad_norm": 0.817227335374872, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"loss": 0.0814, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5688036748552027, |
|
"grad_norm": 0.8785656644132279, |
|
"learning_rate": 2.1376703517318835e-06, |
|
"loss": 0.0798, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5696025564210105, |
|
"grad_norm": 0.7552630348609908, |
|
"learning_rate": 2.131130332936195e-06, |
|
"loss": 0.0807, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5704014379868184, |
|
"grad_norm": 0.7272052290720102, |
|
"learning_rate": 2.124592893664399e-06, |
|
"loss": 0.0716, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5712003195526263, |
|
"grad_norm": 0.7859800162478867, |
|
"learning_rate": 2.1180580796331327e-06, |
|
"loss": 0.0731, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5719992011184342, |
|
"grad_norm": 0.7161265041455283, |
|
"learning_rate": 2.1115259365406748e-06, |
|
"loss": 0.0693, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.572798082684242, |
|
"grad_norm": 0.7447936254123377, |
|
"learning_rate": 2.1049965100666255e-06, |
|
"loss": 0.0715, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5735969642500499, |
|
"grad_norm": 0.6911997124068688, |
|
"learning_rate": 2.098469845871589e-06, |
|
"loss": 0.0765, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5743958458158578, |
|
"grad_norm": 0.7134320614605544, |
|
"learning_rate": 2.0919459895968517e-06, |
|
"loss": 0.0728, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5751947273816657, |
|
"grad_norm": 0.7691509712775914, |
|
"learning_rate": 2.0854249868640653e-06, |
|
"loss": 0.0756, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5759936089474735, |
|
"grad_norm": 0.8006755711473525, |
|
"learning_rate": 2.0789068832749242e-06, |
|
"loss": 0.0785, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5767924905132814, |
|
"grad_norm": 0.697757987372097, |
|
"learning_rate": 2.0723917244108503e-06, |
|
"loss": 0.0786, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5775913720790893, |
|
"grad_norm": 0.7270865955123159, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"loss": 0.0712, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5783902536448972, |
|
"grad_norm": 0.7385054287885843, |
|
"learning_rate": 2.059370423080313e-06, |
|
"loss": 0.0721, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.579189135210705, |
|
"grad_norm": 0.7038344544129048, |
|
"learning_rate": 2.0528643716724572e-06, |
|
"loss": 0.0732, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5799880167765129, |
|
"grad_norm": 0.6790065697614551, |
|
"learning_rate": 2.046361447106244e-06, |
|
"loss": 0.0765, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5807868983423208, |
|
"grad_norm": 0.7507492177887934, |
|
"learning_rate": 2.0398616948569495e-06, |
|
"loss": 0.0823, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5815857799081287, |
|
"grad_norm": 0.7816194240267051, |
|
"learning_rate": 2.0333651603776633e-06, |
|
"loss": 0.0818, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5823846614739365, |
|
"grad_norm": 0.6962266578054711, |
|
"learning_rate": 2.0268718890989754e-06, |
|
"loss": 0.0737, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5831835430397444, |
|
"grad_norm": 0.6988143771822519, |
|
"learning_rate": 2.0203819264286512e-06, |
|
"loss": 0.0691, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5839824246055523, |
|
"grad_norm": 0.7030353986937218, |
|
"learning_rate": 2.013895317751323e-06, |
|
"loss": 0.0785, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5847813061713601, |
|
"grad_norm": 0.6909853071674142, |
|
"learning_rate": 2.007412108428168e-06, |
|
"loss": 0.0675, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5855801877371679, |
|
"grad_norm": 0.8305603761683259, |
|
"learning_rate": 2.00093234379659e-06, |
|
"loss": 0.0928, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5863790693029758, |
|
"grad_norm": 0.7351496762893693, |
|
"learning_rate": 1.994456069169906e-06, |
|
"loss": 0.086, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5871779508687837, |
|
"grad_norm": 0.7255003573385873, |
|
"learning_rate": 1.987983329837024e-06, |
|
"loss": 0.0721, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5879768324345915, |
|
"grad_norm": 0.7135520451688296, |
|
"learning_rate": 1.9815141710621323e-06, |
|
"loss": 0.072, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5887757140003994, |
|
"grad_norm": 0.6674729499297173, |
|
"learning_rate": 1.975048638084379e-06, |
|
"loss": 0.0667, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5895745955662073, |
|
"grad_norm": 0.7056694883980723, |
|
"learning_rate": 1.9685867761175584e-06, |
|
"loss": 0.0734, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5903734771320152, |
|
"grad_norm": 0.7122399768693654, |
|
"learning_rate": 1.9621286303497917e-06, |
|
"loss": 0.08, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.591172358697823, |
|
"grad_norm": 0.7093962111319365, |
|
"learning_rate": 1.9556742459432117e-06, |
|
"loss": 0.0719, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5919712402636309, |
|
"grad_norm": 0.6852115591601023, |
|
"learning_rate": 1.9492236680336486e-06, |
|
"loss": 0.0705, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5927701218294388, |
|
"grad_norm": 0.6325866257954883, |
|
"learning_rate": 1.9427769417303156e-06, |
|
"loss": 0.0643, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5935690033952467, |
|
"grad_norm": 0.7204075648734956, |
|
"learning_rate": 1.9363341121154896e-06, |
|
"loss": 0.0757, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5943678849610545, |
|
"grad_norm": 0.8149667568559875, |
|
"learning_rate": 1.929895224244197e-06, |
|
"loss": 0.0773, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5951667665268624, |
|
"grad_norm": 0.8545046907554561, |
|
"learning_rate": 1.9234603231439e-06, |
|
"loss": 0.081, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5959656480926703, |
|
"grad_norm": 0.6657089521252548, |
|
"learning_rate": 1.9170294538141825e-06, |
|
"loss": 0.0711, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5967645296584781, |
|
"grad_norm": 0.6978460546960862, |
|
"learning_rate": 1.9106026612264316e-06, |
|
"loss": 0.0762, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.597563411224286, |
|
"grad_norm": 0.7011638489461521, |
|
"learning_rate": 1.9041799903235297e-06, |
|
"loss": 0.0675, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5983622927900939, |
|
"grad_norm": 0.6939104019221446, |
|
"learning_rate": 1.8977614860195297e-06, |
|
"loss": 0.0711, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5991611743559018, |
|
"grad_norm": 0.6869972949250588, |
|
"learning_rate": 1.891347193199353e-06, |
|
"loss": 0.0727, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5999600559217096, |
|
"grad_norm": 0.680739504999969, |
|
"learning_rate": 1.8849371567184665e-06, |
|
"loss": 0.0674, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6007589374875175, |
|
"grad_norm": 0.6915313764434776, |
|
"learning_rate": 1.8785314214025747e-06, |
|
"loss": 0.0671, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6015578190533254, |
|
"grad_norm": 0.7851580548586923, |
|
"learning_rate": 1.8721300320473023e-06, |
|
"loss": 0.0849, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6023567006191333, |
|
"grad_norm": 0.8202601700256574, |
|
"learning_rate": 1.8657330334178825e-06, |
|
"loss": 0.0842, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.603155582184941, |
|
"grad_norm": 0.7717659960029647, |
|
"learning_rate": 1.8593404702488439e-06, |
|
"loss": 0.0817, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6039544637507489, |
|
"grad_norm": 0.679886045166462, |
|
"learning_rate": 1.852952387243698e-06, |
|
"loss": 0.0712, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6047533453165568, |
|
"grad_norm": 0.7584532497859835, |
|
"learning_rate": 1.8465688290746282e-06, |
|
"loss": 0.072, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6055522268823647, |
|
"grad_norm": 0.8239865399284856, |
|
"learning_rate": 1.8401898403821713e-06, |
|
"loss": 0.0722, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6063511084481725, |
|
"grad_norm": 0.7143969459985934, |
|
"learning_rate": 1.833815465774913e-06, |
|
"loss": 0.0781, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6071499900139804, |
|
"grad_norm": 0.73089086118113, |
|
"learning_rate": 1.8274457498291708e-06, |
|
"loss": 0.0701, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6079488715797883, |
|
"grad_norm": 0.7070920900790865, |
|
"learning_rate": 1.8210807370886851e-06, |
|
"loss": 0.0763, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6087477531455961, |
|
"grad_norm": 0.744531738394602, |
|
"learning_rate": 1.8147204720643066e-06, |
|
"loss": 0.0811, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.609546634711404, |
|
"grad_norm": 0.7196531438853129, |
|
"learning_rate": 1.8083649992336827e-06, |
|
"loss": 0.074, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6103455162772119, |
|
"grad_norm": 0.6935193915987503, |
|
"learning_rate": 1.8020143630409508e-06, |
|
"loss": 0.065, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6111443978430198, |
|
"grad_norm": 0.7035531928233766, |
|
"learning_rate": 1.7956686078964257e-06, |
|
"loss": 0.0653, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6119432794088276, |
|
"grad_norm": 0.716461549347938, |
|
"learning_rate": 1.7893277781762874e-06, |
|
"loss": 0.0763, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6127421609746355, |
|
"grad_norm": 0.7124602838364704, |
|
"learning_rate": 1.7829919182222752e-06, |
|
"loss": 0.0695, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6135410425404434, |
|
"grad_norm": 0.6532144524523916, |
|
"learning_rate": 1.7766610723413686e-06, |
|
"loss": 0.0709, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6143399241062513, |
|
"grad_norm": 0.7262090328751356, |
|
"learning_rate": 1.7703352848054888e-06, |
|
"loss": 0.0729, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6151388056720591, |
|
"grad_norm": 0.769557941797719, |
|
"learning_rate": 1.7640145998511827e-06, |
|
"loss": 0.0755, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.615937687237867, |
|
"grad_norm": 0.7645534014831973, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"loss": 0.0864, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6167365688036749, |
|
"grad_norm": 0.6820825425481823, |
|
"learning_rate": 1.7513887144547523e-06, |
|
"loss": 0.0774, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6175354503694828, |
|
"grad_norm": 0.7137728940600104, |
|
"learning_rate": 1.7450836023060714e-06, |
|
"loss": 0.0776, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6183343319352906, |
|
"grad_norm": 0.7428801445111053, |
|
"learning_rate": 1.738783769325233e-06, |
|
"loss": 0.0809, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6191332135010985, |
|
"grad_norm": 0.7006322216060636, |
|
"learning_rate": 1.7324892595672807e-06, |
|
"loss": 0.0715, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6199320950669064, |
|
"grad_norm": 0.7084563324616765, |
|
"learning_rate": 1.726200117050036e-06, |
|
"loss": 0.072, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6207309766327141, |
|
"grad_norm": 0.6783579971505013, |
|
"learning_rate": 1.7199163857537825e-06, |
|
"loss": 0.0752, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.621529858198522, |
|
"grad_norm": 0.7378250263764751, |
|
"learning_rate": 1.7136381096209665e-06, |
|
"loss": 0.0782, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6223287397643299, |
|
"grad_norm": 0.7384439686090273, |
|
"learning_rate": 1.7073653325558831e-06, |
|
"loss": 0.0752, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6231276213301378, |
|
"grad_norm": 0.7462193513330629, |
|
"learning_rate": 1.7010980984243756e-06, |
|
"loss": 0.0695, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6239265028959456, |
|
"grad_norm": 0.7130619553135901, |
|
"learning_rate": 1.694836451053522e-06, |
|
"loss": 0.0744, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6247253844617535, |
|
"grad_norm": 0.7748719793441474, |
|
"learning_rate": 1.6885804342313334e-06, |
|
"loss": 0.0805, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6255242660275614, |
|
"grad_norm": 0.6944961145847144, |
|
"learning_rate": 1.6823300917064462e-06, |
|
"loss": 0.0684, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6263231475933693, |
|
"grad_norm": 0.6865884916668862, |
|
"learning_rate": 1.6760854671878158e-06, |
|
"loss": 0.0737, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.6271220291591771, |
|
"grad_norm": 0.74800317438897, |
|
"learning_rate": 1.6698466043444122e-06, |
|
"loss": 0.0669, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.627920910724985, |
|
"grad_norm": 0.7581085007620311, |
|
"learning_rate": 1.6636135468049122e-06, |
|
"loss": 0.0784, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6287197922907929, |
|
"grad_norm": 0.7494255172335546, |
|
"learning_rate": 1.6573863381573957e-06, |
|
"loss": 0.0829, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6295186738566008, |
|
"grad_norm": 0.6879131849166294, |
|
"learning_rate": 1.651165021949044e-06, |
|
"loss": 0.0744, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6303175554224086, |
|
"grad_norm": 0.703349951642232, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"loss": 0.0759, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6311164369882165, |
|
"grad_norm": 0.6929744305760658, |
|
"learning_rate": 1.6387402408322128e-06, |
|
"loss": 0.0731, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6319153185540244, |
|
"grad_norm": 0.7454533302688348, |
|
"learning_rate": 1.6325368628108442e-06, |
|
"loss": 0.0758, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6327142001198323, |
|
"grad_norm": 0.742041894343771, |
|
"learning_rate": 1.6263395510022546e-06, |
|
"loss": 0.07, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6335130816856401, |
|
"grad_norm": 0.6948013730334685, |
|
"learning_rate": 1.620148348744552e-06, |
|
"loss": 0.0766, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.634311963251448, |
|
"grad_norm": 0.659586046661146, |
|
"learning_rate": 1.613963299333122e-06, |
|
"loss": 0.0655, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6351108448172559, |
|
"grad_norm": 0.6773851759495868, |
|
"learning_rate": 1.6077844460203207e-06, |
|
"loss": 0.0752, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6359097263830638, |
|
"grad_norm": 0.7605812887772487, |
|
"learning_rate": 1.6016118320151775e-06, |
|
"loss": 0.0788, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6367086079488716, |
|
"grad_norm": 0.7086132798372856, |
|
"learning_rate": 1.595445500483088e-06, |
|
"loss": 0.0715, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6375074895146795, |
|
"grad_norm": 0.7633813224754403, |
|
"learning_rate": 1.589285494545514e-06, |
|
"loss": 0.0795, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6383063710804873, |
|
"grad_norm": 0.7913546925027637, |
|
"learning_rate": 1.583131857279685e-06, |
|
"loss": 0.0763, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6391052526462951, |
|
"grad_norm": 0.6935716177124382, |
|
"learning_rate": 1.5769846317182894e-06, |
|
"loss": 0.0709, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.639904134212103, |
|
"grad_norm": 0.676940355586407, |
|
"learning_rate": 1.5708438608491816e-06, |
|
"loss": 0.0709, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6407030157779109, |
|
"grad_norm": 0.7665289081799466, |
|
"learning_rate": 1.564709587615077e-06, |
|
"loss": 0.0708, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6415018973437188, |
|
"grad_norm": 0.7660458120935258, |
|
"learning_rate": 1.5585818549132532e-06, |
|
"loss": 0.0683, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6423007789095266, |
|
"grad_norm": 0.8084359075182547, |
|
"learning_rate": 1.5524607055952495e-06, |
|
"loss": 0.07, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6430996604753345, |
|
"grad_norm": 0.6834537386319198, |
|
"learning_rate": 1.546346182466566e-06, |
|
"loss": 0.0696, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6438985420411424, |
|
"grad_norm": 0.6882281367042749, |
|
"learning_rate": 1.540238328286366e-06, |
|
"loss": 0.0715, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6446974236069503, |
|
"grad_norm": 0.7798427307031544, |
|
"learning_rate": 1.5341371857671782e-06, |
|
"loss": 0.0816, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6454963051727581, |
|
"grad_norm": 0.7749157428603176, |
|
"learning_rate": 1.528042797574596e-06, |
|
"loss": 0.072, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.646295186738566, |
|
"grad_norm": 0.7503425799671355, |
|
"learning_rate": 1.521955206326976e-06, |
|
"loss": 0.0682, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.6470940683043739, |
|
"grad_norm": 0.7353119596263072, |
|
"learning_rate": 1.5158744545951468e-06, |
|
"loss": 0.074, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6478929498701818, |
|
"grad_norm": 0.7031039234622585, |
|
"learning_rate": 1.509800584902108e-06, |
|
"loss": 0.069, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6486918314359896, |
|
"grad_norm": 0.7552246369614947, |
|
"learning_rate": 1.5037336397227315e-06, |
|
"loss": 0.0695, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6494907130017975, |
|
"grad_norm": 0.795857866513945, |
|
"learning_rate": 1.4976736614834664e-06, |
|
"loss": 0.07, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6502895945676054, |
|
"grad_norm": 0.6939165529698337, |
|
"learning_rate": 1.4916206925620402e-06, |
|
"loss": 0.0698, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.6510884761334133, |
|
"grad_norm": 0.7274411868739028, |
|
"learning_rate": 1.4855747752871659e-06, |
|
"loss": 0.0667, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6518873576992211, |
|
"grad_norm": 0.6874520685052125, |
|
"learning_rate": 1.479535951938243e-06, |
|
"loss": 0.0672, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.652686239265029, |
|
"grad_norm": 0.7012426759740858, |
|
"learning_rate": 1.4735042647450622e-06, |
|
"loss": 0.0655, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6534851208308369, |
|
"grad_norm": 0.7406300636188733, |
|
"learning_rate": 1.4674797558875134e-06, |
|
"loss": 0.0744, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6542840023966447, |
|
"grad_norm": 0.7259284513518274, |
|
"learning_rate": 1.4614624674952843e-06, |
|
"loss": 0.0672, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6550828839624525, |
|
"grad_norm": 0.7178372562662225, |
|
"learning_rate": 1.4554524416475718e-06, |
|
"loss": 0.0781, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6558817655282604, |
|
"grad_norm": 0.7020878145356227, |
|
"learning_rate": 1.4494497203727845e-06, |
|
"loss": 0.0667, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6566806470940683, |
|
"grad_norm": 0.7464350415019525, |
|
"learning_rate": 1.443454345648252e-06, |
|
"loss": 0.0702, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6574795286598761, |
|
"grad_norm": 0.722737579725714, |
|
"learning_rate": 1.4374663593999258e-06, |
|
"loss": 0.0746, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.658278410225684, |
|
"grad_norm": 0.7740457479905273, |
|
"learning_rate": 1.4314858035020905e-06, |
|
"loss": 0.0733, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6590772917914919, |
|
"grad_norm": 0.7125957730795915, |
|
"learning_rate": 1.425512719777071e-06, |
|
"loss": 0.0783, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6598761733572998, |
|
"grad_norm": 0.6616100002029853, |
|
"learning_rate": 1.4195471499949384e-06, |
|
"loss": 0.0693, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6606750549231076, |
|
"grad_norm": 0.6565339471175381, |
|
"learning_rate": 1.4135891358732206e-06, |
|
"loss": 0.0612, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6614739364889155, |
|
"grad_norm": 0.6514137764011035, |
|
"learning_rate": 1.4076387190766017e-06, |
|
"loss": 0.0611, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6622728180547234, |
|
"grad_norm": 0.770350092313725, |
|
"learning_rate": 1.401695941216644e-06, |
|
"loss": 0.0839, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6630716996205313, |
|
"grad_norm": 0.6938098303809659, |
|
"learning_rate": 1.3957608438514877e-06, |
|
"loss": 0.069, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6638705811863391, |
|
"grad_norm": 0.7265480986058926, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"loss": 0.0749, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.664669462752147, |
|
"grad_norm": 0.7054094782275767, |
|
"learning_rate": 1.3839138565693043e-06, |
|
"loss": 0.0756, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6654683443179549, |
|
"grad_norm": 0.6829270622586088, |
|
"learning_rate": 1.3780020494988447e-06, |
|
"loss": 0.0709, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6662672258837627, |
|
"grad_norm": 0.7504241485173638, |
|
"learning_rate": 1.372098088615749e-06, |
|
"loss": 0.0791, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.6670661074495706, |
|
"grad_norm": 0.6924213329453016, |
|
"learning_rate": 1.3662020152067063e-06, |
|
"loss": 0.0765, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6678649890153785, |
|
"grad_norm": 0.7087521677704813, |
|
"learning_rate": 1.3603138705032526e-06, |
|
"loss": 0.0749, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6686638705811864, |
|
"grad_norm": 0.705323244510612, |
|
"learning_rate": 1.354433695681474e-06, |
|
"loss": 0.0752, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6694627521469942, |
|
"grad_norm": 0.6616264857193134, |
|
"learning_rate": 1.3485615318617277e-06, |
|
"loss": 0.0672, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6702616337128021, |
|
"grad_norm": 0.7512713777570923, |
|
"learning_rate": 1.342697420108344e-06, |
|
"loss": 0.0713, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.67106051527861, |
|
"grad_norm": 0.669997265805606, |
|
"learning_rate": 1.3368414014293485e-06, |
|
"loss": 0.0637, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6718593968444179, |
|
"grad_norm": 0.7096381426024272, |
|
"learning_rate": 1.3309935167761717e-06, |
|
"loss": 0.0659, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6726582784102256, |
|
"grad_norm": 0.6742198646615256, |
|
"learning_rate": 1.3251538070433605e-06, |
|
"loss": 0.0728, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6734571599760335, |
|
"grad_norm": 0.6805614755628161, |
|
"learning_rate": 1.3193223130682937e-06, |
|
"loss": 0.0625, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6742560415418414, |
|
"grad_norm": 0.748759021598399, |
|
"learning_rate": 1.313499075630899e-06, |
|
"loss": 0.0702, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6750549231076493, |
|
"grad_norm": 0.7274840234475171, |
|
"learning_rate": 1.3076841354533658e-06, |
|
"loss": 0.0658, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6758538046734571, |
|
"grad_norm": 0.6791766672113594, |
|
"learning_rate": 1.301877533199859e-06, |
|
"loss": 0.065, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.676652686239265, |
|
"grad_norm": 0.7002421668580668, |
|
"learning_rate": 1.2960793094762347e-06, |
|
"loss": 0.0679, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6774515678050729, |
|
"grad_norm": 0.7623718570311951, |
|
"learning_rate": 1.2902895048297602e-06, |
|
"loss": 0.0751, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6782504493708807, |
|
"grad_norm": 0.7237394593375889, |
|
"learning_rate": 1.2845081597488288e-06, |
|
"loss": 0.07, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6790493309366886, |
|
"grad_norm": 0.787447112328663, |
|
"learning_rate": 1.2787353146626715e-06, |
|
"loss": 0.0786, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6798482125024965, |
|
"grad_norm": 0.6916026537019957, |
|
"learning_rate": 1.2729710099410802e-06, |
|
"loss": 0.0673, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6806470940683044, |
|
"grad_norm": 0.7256469081673769, |
|
"learning_rate": 1.2672152858941244e-06, |
|
"loss": 0.0753, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6814459756341122, |
|
"grad_norm": 0.6749009978752328, |
|
"learning_rate": 1.2614681827718697e-06, |
|
"loss": 0.0689, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6822448571999201, |
|
"grad_norm": 0.7376514885118693, |
|
"learning_rate": 1.255729740764091e-06, |
|
"loss": 0.0761, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.683043738765728, |
|
"grad_norm": 0.6933564376107897, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"loss": 0.0682, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6838426203315359, |
|
"grad_norm": 0.6933674935192211, |
|
"learning_rate": 1.2442790005479576e-06, |
|
"loss": 0.0693, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6846415018973437, |
|
"grad_norm": 0.7682302900116749, |
|
"learning_rate": 1.2385667824151972e-06, |
|
"loss": 0.0722, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6854403834631516, |
|
"grad_norm": 0.6524546380366343, |
|
"learning_rate": 1.232863385547543e-06, |
|
"loss": 0.0677, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6862392650289595, |
|
"grad_norm": 0.6905473772407033, |
|
"learning_rate": 1.2271688498291335e-06, |
|
"loss": 0.0755, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6870381465947674, |
|
"grad_norm": 0.7156212244362496, |
|
"learning_rate": 1.2214832150821381e-06, |
|
"loss": 0.0721, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6878370281605752, |
|
"grad_norm": 0.6922194723160341, |
|
"learning_rate": 1.2158065210664848e-06, |
|
"loss": 0.0705, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6886359097263831, |
|
"grad_norm": 0.7115096619101899, |
|
"learning_rate": 1.2101388074795747e-06, |
|
"loss": 0.0775, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.689434791292191, |
|
"grad_norm": 0.702553187808918, |
|
"learning_rate": 1.2044801139560112e-06, |
|
"loss": 0.074, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6902336728579987, |
|
"grad_norm": 0.7364941389984857, |
|
"learning_rate": 1.1988304800673197e-06, |
|
"loss": 0.0762, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6910325544238066, |
|
"grad_norm": 0.7658537575082363, |
|
"learning_rate": 1.1931899453216698e-06, |
|
"loss": 0.0775, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6918314359896145, |
|
"grad_norm": 0.6908885154530607, |
|
"learning_rate": 1.1875585491636e-06, |
|
"loss": 0.0678, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6926303175554224, |
|
"grad_norm": 0.7042159975428317, |
|
"learning_rate": 1.181936330973744e-06, |
|
"loss": 0.0618, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6934291991212302, |
|
"grad_norm": 0.6852437432679035, |
|
"learning_rate": 1.1763233300685533e-06, |
|
"loss": 0.0695, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6942280806870381, |
|
"grad_norm": 0.7176643894140547, |
|
"learning_rate": 1.1707195857000218e-06, |
|
"loss": 0.0682, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.695026962252846, |
|
"grad_norm": 0.6925346322936237, |
|
"learning_rate": 1.16512513705541e-06, |
|
"loss": 0.0688, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6958258438186539, |
|
"grad_norm": 0.7196442480030203, |
|
"learning_rate": 1.159540023256977e-06, |
|
"loss": 0.0805, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6966247253844617, |
|
"grad_norm": 0.7037757813966624, |
|
"learning_rate": 1.1539642833617009e-06, |
|
"loss": 0.0669, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6974236069502696, |
|
"grad_norm": 0.6920780498431013, |
|
"learning_rate": 1.148397956361007e-06, |
|
"loss": 0.068, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6982224885160775, |
|
"grad_norm": 0.7361982589542638, |
|
"learning_rate": 1.1428410811804955e-06, |
|
"loss": 0.0712, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6990213700818854, |
|
"grad_norm": 0.7177168044634765, |
|
"learning_rate": 1.137293696679671e-06, |
|
"loss": 0.0685, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6998202516476932, |
|
"grad_norm": 0.6916265035922443, |
|
"learning_rate": 1.1317558416516696e-06, |
|
"loss": 0.066, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7006191332135011, |
|
"grad_norm": 0.7698316471528456, |
|
"learning_rate": 1.1262275548229852e-06, |
|
"loss": 0.0794, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.701418014779309, |
|
"grad_norm": 0.6875784567261796, |
|
"learning_rate": 1.120708874853203e-06, |
|
"loss": 0.0718, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7022168963451169, |
|
"grad_norm": 0.7595905093967557, |
|
"learning_rate": 1.1151998403347245e-06, |
|
"loss": 0.0708, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7030157779109247, |
|
"grad_norm": 0.682293145613161, |
|
"learning_rate": 1.1097004897925034e-06, |
|
"loss": 0.0621, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7038146594767326, |
|
"grad_norm": 0.7513445525180765, |
|
"learning_rate": 1.1042108616837693e-06, |
|
"loss": 0.0715, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.7046135410425405, |
|
"grad_norm": 0.6956635070524043, |
|
"learning_rate": 1.0987309943977647e-06, |
|
"loss": 0.0659, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7054124226083484, |
|
"grad_norm": 0.7790067433851642, |
|
"learning_rate": 1.0932609262554748e-06, |
|
"loss": 0.0654, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7062113041741562, |
|
"grad_norm": 0.6972694194690369, |
|
"learning_rate": 1.0878006955093566e-06, |
|
"loss": 0.0707, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7070101857399641, |
|
"grad_norm": 0.8285471186520841, |
|
"learning_rate": 1.0823503403430736e-06, |
|
"loss": 0.0701, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7078090673057719, |
|
"grad_norm": 0.6731927224763115, |
|
"learning_rate": 1.076909898871231e-06, |
|
"loss": 0.0611, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7086079488715797, |
|
"grad_norm": 0.8386201804199398, |
|
"learning_rate": 1.0714794091391074e-06, |
|
"loss": 0.0672, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7094068304373876, |
|
"grad_norm": 0.6763207712242757, |
|
"learning_rate": 1.0660589091223854e-06, |
|
"loss": 0.0669, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7102057120031955, |
|
"grad_norm": 0.709984687452426, |
|
"learning_rate": 1.0606484367268906e-06, |
|
"loss": 0.0645, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7110045935690034, |
|
"grad_norm": 0.6914376370508385, |
|
"learning_rate": 1.0552480297883254e-06, |
|
"loss": 0.0659, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7118034751348112, |
|
"grad_norm": 0.6934501764327877, |
|
"learning_rate": 1.049857726072005e-06, |
|
"loss": 0.071, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7126023567006191, |
|
"grad_norm": 0.6859987351243448, |
|
"learning_rate": 1.0444775632725893e-06, |
|
"loss": 0.0626, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.713401238266427, |
|
"grad_norm": 0.7094722335398237, |
|
"learning_rate": 1.0391075790138234e-06, |
|
"loss": 0.065, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7142001198322349, |
|
"grad_norm": 0.7231364811936231, |
|
"learning_rate": 1.0337478108482742e-06, |
|
"loss": 0.0677, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7149990013980427, |
|
"grad_norm": 0.6822025978665478, |
|
"learning_rate": 1.0283982962570683e-06, |
|
"loss": 0.0686, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7157978829638506, |
|
"grad_norm": 0.7410985657955034, |
|
"learning_rate": 1.0230590726496247e-06, |
|
"loss": 0.0726, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7165967645296585, |
|
"grad_norm": 0.8311400386801739, |
|
"learning_rate": 1.0177301773633993e-06, |
|
"loss": 0.0652, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7173956460954664, |
|
"grad_norm": 0.7744302013134797, |
|
"learning_rate": 1.0124116476636218e-06, |
|
"loss": 0.0704, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7181945276612742, |
|
"grad_norm": 0.771110146977676, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"loss": 0.0777, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.7189934092270821, |
|
"grad_norm": 0.7051521238306805, |
|
"learning_rate": 1.0018058337216327e-06, |
|
"loss": 0.0717, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.71979229079289, |
|
"grad_norm": 0.69799351583451, |
|
"learning_rate": 9.965186236464047e-07, |
|
"loss": 0.074, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7205911723586979, |
|
"grad_norm": 0.6916047989468586, |
|
"learning_rate": 9.912419274910717e-07, |
|
"loss": 0.0757, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7213900539245057, |
|
"grad_norm": 0.7923334873265707, |
|
"learning_rate": 9.85975782155834e-07, |
|
"loss": 0.0673, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7221889354903136, |
|
"grad_norm": 0.7968420285846334, |
|
"learning_rate": 9.807202244671049e-07, |
|
"loss": 0.0707, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7229878170561215, |
|
"grad_norm": 0.7609740552798956, |
|
"learning_rate": 9.754752911772616e-07, |
|
"loss": 0.0661, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7237866986219293, |
|
"grad_norm": 0.7002229457651694, |
|
"learning_rate": 9.702410189643838e-07, |
|
"loss": 0.0761, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7245855801877372, |
|
"grad_norm": 0.6715239067203957, |
|
"learning_rate": 9.650174444319957e-07, |
|
"loss": 0.0643, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.725384461753545, |
|
"grad_norm": 0.7107880210572147, |
|
"learning_rate": 9.598046041088127e-07, |
|
"loss": 0.0698, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7261833433193529, |
|
"grad_norm": 0.7691864270821177, |
|
"learning_rate": 9.546025344484868e-07, |
|
"loss": 0.0738, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7269822248851607, |
|
"grad_norm": 0.8738867314756472, |
|
"learning_rate": 9.494112718293503e-07, |
|
"loss": 0.0765, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7277811064509686, |
|
"grad_norm": 0.7710218454079526, |
|
"learning_rate": 9.442308525541591e-07, |
|
"loss": 0.0707, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7285799880167765, |
|
"grad_norm": 0.7302992785764044, |
|
"learning_rate": 9.390613128498419e-07, |
|
"loss": 0.0719, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7293788695825844, |
|
"grad_norm": 0.724458938096606, |
|
"learning_rate": 9.33902688867247e-07, |
|
"loss": 0.072, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7301777511483922, |
|
"grad_norm": 0.7537364633847227, |
|
"learning_rate": 9.287550166808892e-07, |
|
"loss": 0.0821, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.7309766327142001, |
|
"grad_norm": 0.8784951133416654, |
|
"learning_rate": 9.236183322886946e-07, |
|
"loss": 0.0658, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.731775514280008, |
|
"grad_norm": 0.7486887717067566, |
|
"learning_rate": 9.184926716117512e-07, |
|
"loss": 0.0682, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7325743958458159, |
|
"grad_norm": 0.806504603566172, |
|
"learning_rate": 9.133780704940595e-07, |
|
"loss": 0.081, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.7333732774116237, |
|
"grad_norm": 0.7241082712472302, |
|
"learning_rate": 9.082745647022798e-07, |
|
"loss": 0.0704, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7341721589774316, |
|
"grad_norm": 0.7656468783833981, |
|
"learning_rate": 9.031821899254797e-07, |
|
"loss": 0.0759, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.7349710405432395, |
|
"grad_norm": 0.7501681101809219, |
|
"learning_rate": 8.981009817748906e-07, |
|
"loss": 0.064, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7357699221090473, |
|
"grad_norm": 0.7665143868951155, |
|
"learning_rate": 8.930309757836517e-07, |
|
"loss": 0.0782, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7365688036748552, |
|
"grad_norm": 0.7184986991412127, |
|
"learning_rate": 8.879722074065681e-07, |
|
"loss": 0.0671, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.7373676852406631, |
|
"grad_norm": 0.7364620852989838, |
|
"learning_rate": 8.829247120198564e-07, |
|
"loss": 0.0655, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.738166566806471, |
|
"grad_norm": 0.7004868837398479, |
|
"learning_rate": 8.778885249209044e-07, |
|
"loss": 0.0665, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7389654483722788, |
|
"grad_norm": 0.6917945728795157, |
|
"learning_rate": 8.728636813280164e-07, |
|
"loss": 0.0673, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7397643299380867, |
|
"grad_norm": 0.6869868967472806, |
|
"learning_rate": 8.67850216380175e-07, |
|
"loss": 0.0719, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.7405632115038946, |
|
"grad_norm": 0.6947452348575369, |
|
"learning_rate": 8.628481651367876e-07, |
|
"loss": 0.0695, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7413620930697025, |
|
"grad_norm": 0.7189114026034129, |
|
"learning_rate": 8.578575625774476e-07, |
|
"loss": 0.0699, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7421609746355103, |
|
"grad_norm": 0.7220306089561607, |
|
"learning_rate": 8.528784436016879e-07, |
|
"loss": 0.0632, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.7429598562013181, |
|
"grad_norm": 0.700585631371324, |
|
"learning_rate": 8.479108430287331e-07, |
|
"loss": 0.0661, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.743758737767126, |
|
"grad_norm": 0.7070648082934727, |
|
"learning_rate": 8.4295479559726e-07, |
|
"loss": 0.0691, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7445576193329339, |
|
"grad_norm": 0.6752223726474184, |
|
"learning_rate": 8.380103359651554e-07, |
|
"loss": 0.0654, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.7453565008987417, |
|
"grad_norm": 0.7385870850797042, |
|
"learning_rate": 8.330774987092713e-07, |
|
"loss": 0.0665, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7461553824645496, |
|
"grad_norm": 0.7249724054176636, |
|
"learning_rate": 8.281563183251828e-07, |
|
"loss": 0.0753, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.7469542640303575, |
|
"grad_norm": 0.7451824876122812, |
|
"learning_rate": 8.23246829226948e-07, |
|
"loss": 0.0671, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7477531455961653, |
|
"grad_norm": 0.7807983611829348, |
|
"learning_rate": 8.183490657468687e-07, |
|
"loss": 0.0757, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.7485520271619732, |
|
"grad_norm": 0.7735152540739959, |
|
"learning_rate": 8.134630621352485e-07, |
|
"loss": 0.071, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.7493509087277811, |
|
"grad_norm": 0.7090083764371655, |
|
"learning_rate": 8.085888525601526e-07, |
|
"loss": 0.0644, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.750149790293589, |
|
"grad_norm": 0.6867898835739341, |
|
"learning_rate": 8.037264711071699e-07, |
|
"loss": 0.0647, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7509486718593968, |
|
"grad_norm": 0.7196342412506768, |
|
"learning_rate": 7.988759517791759e-07, |
|
"loss": 0.073, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7517475534252047, |
|
"grad_norm": 0.7958248682226311, |
|
"learning_rate": 7.940373284960934e-07, |
|
"loss": 0.0771, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7525464349910126, |
|
"grad_norm": 0.6455907886178134, |
|
"learning_rate": 7.892106350946544e-07, |
|
"loss": 0.0566, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7533453165568205, |
|
"grad_norm": 0.730260246281351, |
|
"learning_rate": 7.843959053281663e-07, |
|
"loss": 0.0744, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7541441981226283, |
|
"grad_norm": 0.660307763513869, |
|
"learning_rate": 7.795931728662726e-07, |
|
"loss": 0.0661, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.7549430796884362, |
|
"grad_norm": 0.7058124058214891, |
|
"learning_rate": 7.748024712947205e-07, |
|
"loss": 0.0635, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7557419612542441, |
|
"grad_norm": 0.7017538636058471, |
|
"learning_rate": 7.700238341151228e-07, |
|
"loss": 0.0622, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.756540842820052, |
|
"grad_norm": 0.7205050140658313, |
|
"learning_rate": 7.652572947447273e-07, |
|
"loss": 0.0707, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7573397243858598, |
|
"grad_norm": 0.7416351156017427, |
|
"learning_rate": 7.60502886516181e-07, |
|
"loss": 0.0669, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7581386059516677, |
|
"grad_norm": 0.7168506440477357, |
|
"learning_rate": 7.557606426772962e-07, |
|
"loss": 0.0622, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.7589374875174756, |
|
"grad_norm": 0.6768418626524917, |
|
"learning_rate": 7.510305963908183e-07, |
|
"loss": 0.0641, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7597363690832835, |
|
"grad_norm": 0.606897378091034, |
|
"learning_rate": 7.463127807341966e-07, |
|
"loss": 0.0571, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7605352506490912, |
|
"grad_norm": 0.7488996315192771, |
|
"learning_rate": 7.416072286993511e-07, |
|
"loss": 0.0656, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7613341322148991, |
|
"grad_norm": 0.6788393377637245, |
|
"learning_rate": 7.369139731924401e-07, |
|
"loss": 0.0674, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.762133013780707, |
|
"grad_norm": 0.6565644655695145, |
|
"learning_rate": 7.322330470336314e-07, |
|
"loss": 0.0639, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7629318953465148, |
|
"grad_norm": 0.7490737747780284, |
|
"learning_rate": 7.275644829568748e-07, |
|
"loss": 0.074, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7637307769123227, |
|
"grad_norm": 0.8045168221078167, |
|
"learning_rate": 7.229083136096712e-07, |
|
"loss": 0.069, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7645296584781306, |
|
"grad_norm": 0.6683960326626098, |
|
"learning_rate": 7.182645715528436e-07, |
|
"loss": 0.0651, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7653285400439385, |
|
"grad_norm": 0.6902827678888699, |
|
"learning_rate": 7.136332892603095e-07, |
|
"loss": 0.0664, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7661274216097463, |
|
"grad_norm": 0.7837175885990169, |
|
"learning_rate": 7.090144991188569e-07, |
|
"loss": 0.0647, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.7669263031755542, |
|
"grad_norm": 0.712113865239181, |
|
"learning_rate": 7.044082334279151e-07, |
|
"loss": 0.0614, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7677251847413621, |
|
"grad_norm": 0.7273442302087524, |
|
"learning_rate": 6.998145243993284e-07, |
|
"loss": 0.0698, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.76852406630717, |
|
"grad_norm": 0.7559547717566453, |
|
"learning_rate": 6.952334041571307e-07, |
|
"loss": 0.068, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7693229478729778, |
|
"grad_norm": 0.7786708332752759, |
|
"learning_rate": 6.906649047373246e-07, |
|
"loss": 0.0696, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7701218294387857, |
|
"grad_norm": 0.7891026104652957, |
|
"learning_rate": 6.861090580876536e-07, |
|
"loss": 0.0802, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7709207110045936, |
|
"grad_norm": 0.7701906687064225, |
|
"learning_rate": 6.815658960673782e-07, |
|
"loss": 0.0673, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7717195925704015, |
|
"grad_norm": 0.7634396300503498, |
|
"learning_rate": 6.770354504470575e-07, |
|
"loss": 0.0727, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7725184741362093, |
|
"grad_norm": 0.7061753171390293, |
|
"learning_rate": 6.72517752908321e-07, |
|
"loss": 0.0645, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7733173557020172, |
|
"grad_norm": 0.8029556468762414, |
|
"learning_rate": 6.680128350436532e-07, |
|
"loss": 0.0689, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7741162372678251, |
|
"grad_norm": 0.6638509857091174, |
|
"learning_rate": 6.635207283561671e-07, |
|
"loss": 0.059, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.774915118833633, |
|
"grad_norm": 0.7287078746384597, |
|
"learning_rate": 6.590414642593882e-07, |
|
"loss": 0.0661, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7757140003994408, |
|
"grad_norm": 0.722876613271591, |
|
"learning_rate": 6.545750740770338e-07, |
|
"loss": 0.0673, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7765128819652487, |
|
"grad_norm": 0.723016808817902, |
|
"learning_rate": 6.501215890427908e-07, |
|
"loss": 0.0646, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7773117635310565, |
|
"grad_norm": 0.7458933545086837, |
|
"learning_rate": 6.456810403001013e-07, |
|
"loss": 0.0654, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7781106450968643, |
|
"grad_norm": 0.699991209892201, |
|
"learning_rate": 6.412534589019429e-07, |
|
"loss": 0.0633, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7789095266626722, |
|
"grad_norm": 0.7015135117059649, |
|
"learning_rate": 6.368388758106134e-07, |
|
"loss": 0.0646, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7797084082284801, |
|
"grad_norm": 0.6776739847527647, |
|
"learning_rate": 6.324373218975105e-07, |
|
"loss": 0.0621, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.780507289794288, |
|
"grad_norm": 0.6857624915005792, |
|
"learning_rate": 6.280488279429186e-07, |
|
"loss": 0.0665, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7813061713600958, |
|
"grad_norm": 0.7100103784080846, |
|
"learning_rate": 6.236734246357948e-07, |
|
"loss": 0.0686, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7821050529259037, |
|
"grad_norm": 0.6860904374907602, |
|
"learning_rate": 6.193111425735515e-07, |
|
"loss": 0.0664, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7829039344917116, |
|
"grad_norm": 0.6653015404082253, |
|
"learning_rate": 6.149620122618438e-07, |
|
"loss": 0.0561, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7837028160575195, |
|
"grad_norm": 0.6722184016732179, |
|
"learning_rate": 6.106260641143547e-07, |
|
"loss": 0.0686, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7845016976233273, |
|
"grad_norm": 0.7017646955590284, |
|
"learning_rate": 6.063033284525854e-07, |
|
"loss": 0.0591, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7853005791891352, |
|
"grad_norm": 0.6992849059506876, |
|
"learning_rate": 6.019938355056423e-07, |
|
"loss": 0.07, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7860994607549431, |
|
"grad_norm": 0.7139128406279165, |
|
"learning_rate": 5.976976154100214e-07, |
|
"loss": 0.0683, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.786898342320751, |
|
"grad_norm": 0.6595093734404093, |
|
"learning_rate": 5.934146982094049e-07, |
|
"loss": 0.0609, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7876972238865588, |
|
"grad_norm": 0.7832364853064704, |
|
"learning_rate": 5.89145113854444e-07, |
|
"loss": 0.0734, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7884961054523667, |
|
"grad_norm": 0.6913376062451335, |
|
"learning_rate": 5.848888922025553e-07, |
|
"loss": 0.0665, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7892949870181746, |
|
"grad_norm": 0.716823109643616, |
|
"learning_rate": 5.806460630177066e-07, |
|
"loss": 0.0612, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7900938685839825, |
|
"grad_norm": 0.7083180087166591, |
|
"learning_rate": 5.764166559702145e-07, |
|
"loss": 0.0652, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7908927501497903, |
|
"grad_norm": 0.711844832993962, |
|
"learning_rate": 5.72200700636531e-07, |
|
"loss": 0.07, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7916916317155982, |
|
"grad_norm": 0.6612044131815857, |
|
"learning_rate": 5.679982264990425e-07, |
|
"loss": 0.0611, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7924905132814061, |
|
"grad_norm": 0.6476141706558939, |
|
"learning_rate": 5.638092629458577e-07, |
|
"loss": 0.0631, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.793289394847214, |
|
"grad_norm": 0.7118404722072632, |
|
"learning_rate": 5.596338392706077e-07, |
|
"loss": 0.0756, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7940882764130218, |
|
"grad_norm": 0.7168675764933466, |
|
"learning_rate": 5.554719846722379e-07, |
|
"loss": 0.0668, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7948871579788296, |
|
"grad_norm": 0.72644232924385, |
|
"learning_rate": 5.513237282548034e-07, |
|
"loss": 0.0705, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7956860395446375, |
|
"grad_norm": 0.7068188562442445, |
|
"learning_rate": 5.471890990272666e-07, |
|
"loss": 0.065, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7964849211104453, |
|
"grad_norm": 0.6931576222204329, |
|
"learning_rate": 5.430681259032958e-07, |
|
"loss": 0.0648, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7972838026762532, |
|
"grad_norm": 0.6720964929440794, |
|
"learning_rate": 5.389608377010608e-07, |
|
"loss": 0.0589, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7980826842420611, |
|
"grad_norm": 0.6425174311401469, |
|
"learning_rate": 5.348672631430319e-07, |
|
"loss": 0.0566, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.798881565807869, |
|
"grad_norm": 0.7460943865422714, |
|
"learning_rate": 5.307874308557778e-07, |
|
"loss": 0.0734, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.798881565807869, |
|
"eval_loss": 0.0669684186577797, |
|
"eval_runtime": 16.0834, |
|
"eval_samples_per_second": 50.362, |
|
"eval_steps_per_second": 6.342, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7996804473736768, |
|
"grad_norm": 0.7220279454829013, |
|
"learning_rate": 5.267213693697696e-07, |
|
"loss": 0.0731, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8004793289394847, |
|
"grad_norm": 0.7666160582662389, |
|
"learning_rate": 5.226691071191773e-07, |
|
"loss": 0.0788, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8012782105052926, |
|
"grad_norm": 0.7134104534854722, |
|
"learning_rate": 5.186306724416714e-07, |
|
"loss": 0.0701, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.8020770920711005, |
|
"grad_norm": 0.6726951640382407, |
|
"learning_rate": 5.146060935782254e-07, |
|
"loss": 0.0681, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.8028759736369083, |
|
"grad_norm": 0.6771021941917784, |
|
"learning_rate": 5.105953986729196e-07, |
|
"loss": 0.0607, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8036748552027162, |
|
"grad_norm": 0.6801062661764998, |
|
"learning_rate": 5.065986157727434e-07, |
|
"loss": 0.0599, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8044737367685241, |
|
"grad_norm": 0.6720162232249413, |
|
"learning_rate": 5.026157728273967e-07, |
|
"loss": 0.0643, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.805272618334332, |
|
"grad_norm": 0.6921585034116611, |
|
"learning_rate": 4.986468976890993e-07, |
|
"loss": 0.0619, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8060714999001398, |
|
"grad_norm": 0.7276889117024663, |
|
"learning_rate": 4.946920181123904e-07, |
|
"loss": 0.0622, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.8068703814659477, |
|
"grad_norm": 0.694598262753411, |
|
"learning_rate": 4.90751161753941e-07, |
|
"loss": 0.0645, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8076692630317556, |
|
"grad_norm": 0.6632330746362429, |
|
"learning_rate": 4.868243561723535e-07, |
|
"loss": 0.0621, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8084681445975634, |
|
"grad_norm": 0.6879543731629699, |
|
"learning_rate": 4.82911628827975e-07, |
|
"loss": 0.0631, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8092670261633713, |
|
"grad_norm": 0.7095362884934977, |
|
"learning_rate": 4.790130070827029e-07, |
|
"loss": 0.0785, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8100659077291792, |
|
"grad_norm": 0.6697569093165596, |
|
"learning_rate": 4.7512851819979196e-07, |
|
"loss": 0.0623, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8108647892949871, |
|
"grad_norm": 0.7039158460244624, |
|
"learning_rate": 4.712581893436646e-07, |
|
"loss": 0.0634, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8116636708607949, |
|
"grad_norm": 0.7098347386238263, |
|
"learning_rate": 4.674020475797239e-07, |
|
"loss": 0.0657, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8124625524266027, |
|
"grad_norm": 0.7165736366942179, |
|
"learning_rate": 4.635601198741607e-07, |
|
"loss": 0.0673, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8132614339924106, |
|
"grad_norm": 0.7064304631677444, |
|
"learning_rate": 4.597324330937661e-07, |
|
"loss": 0.068, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8140603155582185, |
|
"grad_norm": 0.6877118664218234, |
|
"learning_rate": 4.559190140057429e-07, |
|
"loss": 0.0687, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.8148591971240263, |
|
"grad_norm": 0.6944336714307168, |
|
"learning_rate": 4.5211988927752026e-07, |
|
"loss": 0.0638, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8156580786898342, |
|
"grad_norm": 0.6928572650124039, |
|
"learning_rate": 4.483350854765672e-07, |
|
"loss": 0.0653, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8164569602556421, |
|
"grad_norm": 0.7089184972604112, |
|
"learning_rate": 4.445646290702038e-07, |
|
"loss": 0.0691, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.81725584182145, |
|
"grad_norm": 0.7001438532239804, |
|
"learning_rate": 4.4080854642541833e-07, |
|
"loss": 0.0663, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8180547233872578, |
|
"grad_norm": 0.6747722142421564, |
|
"learning_rate": 4.3706686380868336e-07, |
|
"loss": 0.0643, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8188536049530657, |
|
"grad_norm": 0.6712519301600179, |
|
"learning_rate": 4.3333960738577236e-07, |
|
"loss": 0.0673, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8196524865188736, |
|
"grad_norm": 0.7052187515868353, |
|
"learning_rate": 4.2962680322157335e-07, |
|
"loss": 0.0654, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8204513680846814, |
|
"grad_norm": 0.7134673191364518, |
|
"learning_rate": 4.259284772799099e-07, |
|
"loss": 0.067, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.8212502496504893, |
|
"grad_norm": 0.7595854332327621, |
|
"learning_rate": 4.222446554233597e-07, |
|
"loss": 0.0766, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8220491312162972, |
|
"grad_norm": 0.6996209252612581, |
|
"learning_rate": 4.185753634130718e-07, |
|
"loss": 0.0666, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8228480127821051, |
|
"grad_norm": 0.7310601605335255, |
|
"learning_rate": 4.1492062690858673e-07, |
|
"loss": 0.0621, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8236468943479129, |
|
"grad_norm": 0.7071417923105128, |
|
"learning_rate": 4.1128047146765936e-07, |
|
"loss": 0.0588, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.8244457759137208, |
|
"grad_norm": 0.6815499218511121, |
|
"learning_rate": 4.076549225460757e-07, |
|
"loss": 0.0632, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8252446574795287, |
|
"grad_norm": 0.7333725564876801, |
|
"learning_rate": 4.040440054974815e-07, |
|
"loss": 0.0741, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8260435390453366, |
|
"grad_norm": 0.6665570482826972, |
|
"learning_rate": 4.004477455731967e-07, |
|
"loss": 0.0654, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.8268424206111444, |
|
"grad_norm": 0.7246490725287029, |
|
"learning_rate": 3.9686616792204677e-07, |
|
"loss": 0.0714, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8276413021769523, |
|
"grad_norm": 0.7130119435912701, |
|
"learning_rate": 3.932992975901823e-07, |
|
"loss": 0.0647, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.8284401837427602, |
|
"grad_norm": 0.6612979017180838, |
|
"learning_rate": 3.89747159520904e-07, |
|
"loss": 0.0622, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.8292390653085681, |
|
"grad_norm": 0.7209814178475339, |
|
"learning_rate": 3.8620977855448937e-07, |
|
"loss": 0.0682, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8300379468743758, |
|
"grad_norm": 0.665248664635542, |
|
"learning_rate": 3.8268717942801933e-07, |
|
"loss": 0.0613, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.8308368284401837, |
|
"grad_norm": 0.7005544280482982, |
|
"learning_rate": 3.791793867752053e-07, |
|
"loss": 0.0619, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8316357100059916, |
|
"grad_norm": 0.7061284542991632, |
|
"learning_rate": 3.756864251262143e-07, |
|
"loss": 0.062, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8324345915717994, |
|
"grad_norm": 0.7190713395533443, |
|
"learning_rate": 3.722083189075007e-07, |
|
"loss": 0.0673, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.8332334731376073, |
|
"grad_norm": 0.7365262841063743, |
|
"learning_rate": 3.6874509244163414e-07, |
|
"loss": 0.071, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.8340323547034152, |
|
"grad_norm": 0.6987401896536586, |
|
"learning_rate": 3.652967699471299e-07, |
|
"loss": 0.0678, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.8348312362692231, |
|
"grad_norm": 0.6853720241515239, |
|
"learning_rate": 3.6186337553827747e-07, |
|
"loss": 0.0638, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8356301178350309, |
|
"grad_norm": 0.7344667190271366, |
|
"learning_rate": 3.5844493322497425e-07, |
|
"loss": 0.0738, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.8364289994008388, |
|
"grad_norm": 0.6920294795948277, |
|
"learning_rate": 3.5504146691255736e-07, |
|
"loss": 0.0596, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.8372278809666467, |
|
"grad_norm": 0.6869256839862279, |
|
"learning_rate": 3.5165300040163606e-07, |
|
"loss": 0.0601, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.8380267625324546, |
|
"grad_norm": 0.757378304566195, |
|
"learning_rate": 3.482795573879241e-07, |
|
"loss": 0.0642, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.8388256440982624, |
|
"grad_norm": 0.6437095281756511, |
|
"learning_rate": 3.4492116146207677e-07, |
|
"loss": 0.0563, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8396245256640703, |
|
"grad_norm": 0.6795702774739193, |
|
"learning_rate": 3.4157783610952263e-07, |
|
"loss": 0.0613, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.8404234072298782, |
|
"grad_norm": 0.7352355084174876, |
|
"learning_rate": 3.382496047103026e-07, |
|
"loss": 0.0684, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.8412222887956861, |
|
"grad_norm": 0.7290947348513277, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"loss": 0.0697, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8420211703614939, |
|
"grad_norm": 0.698112148548848, |
|
"learning_rate": 3.316385167640976e-07, |
|
"loss": 0.0628, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.8428200519273018, |
|
"grad_norm": 0.7086436557888751, |
|
"learning_rate": 3.2835570644877854e-07, |
|
"loss": 0.0644, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8436189334931097, |
|
"grad_norm": 0.7101992176912245, |
|
"learning_rate": 3.250880825498026e-07, |
|
"loss": 0.0703, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8444178150589176, |
|
"grad_norm": 0.7770285479833771, |
|
"learning_rate": 3.218356679178253e-07, |
|
"loss": 0.0705, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.8452166966247254, |
|
"grad_norm": 0.7644864247124296, |
|
"learning_rate": 3.1859848529714383e-07, |
|
"loss": 0.0609, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.8460155781905333, |
|
"grad_norm": 0.6867453662895092, |
|
"learning_rate": 3.153765573255377e-07, |
|
"loss": 0.0645, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8468144597563412, |
|
"grad_norm": 0.734264697694199, |
|
"learning_rate": 3.1216990653410827e-07, |
|
"loss": 0.0683, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8476133413221489, |
|
"grad_norm": 0.6732599728013793, |
|
"learning_rate": 3.089785553471233e-07, |
|
"loss": 0.0644, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8484122228879568, |
|
"grad_norm": 0.7192281176674538, |
|
"learning_rate": 3.058025260818609e-07, |
|
"loss": 0.0616, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8492111044537647, |
|
"grad_norm": 0.6708308363843544, |
|
"learning_rate": 3.0264184094845135e-07, |
|
"loss": 0.058, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8500099860195726, |
|
"grad_norm": 0.7397694215736755, |
|
"learning_rate": 2.9949652204972257e-07, |
|
"loss": 0.0595, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.8508088675853804, |
|
"grad_norm": 0.6928595213205264, |
|
"learning_rate": 2.963665913810451e-07, |
|
"loss": 0.0666, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8516077491511883, |
|
"grad_norm": 0.6694794166827536, |
|
"learning_rate": 2.9325207083018e-07, |
|
"loss": 0.0611, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8524066307169962, |
|
"grad_norm": 0.666720861965053, |
|
"learning_rate": 2.9015298217712455e-07, |
|
"loss": 0.0617, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8532055122828041, |
|
"grad_norm": 0.6895017056366897, |
|
"learning_rate": 2.8706934709395893e-07, |
|
"loss": 0.0636, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8540043938486119, |
|
"grad_norm": 0.694641273220621, |
|
"learning_rate": 2.840011871446963e-07, |
|
"loss": 0.0615, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.8548032754144198, |
|
"grad_norm": 0.6914158751921184, |
|
"learning_rate": 2.8094852378513144e-07, |
|
"loss": 0.064, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8556021569802277, |
|
"grad_norm": 0.6903052428146241, |
|
"learning_rate": 2.779113783626916e-07, |
|
"loss": 0.0662, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8564010385460356, |
|
"grad_norm": 0.6825082699898156, |
|
"learning_rate": 2.748897721162841e-07, |
|
"loss": 0.0666, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.8571999201118434, |
|
"grad_norm": 0.7049176701648293, |
|
"learning_rate": 2.718837261761528e-07, |
|
"loss": 0.0591, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8579988016776513, |
|
"grad_norm": 0.6985195968248852, |
|
"learning_rate": 2.688932615637252e-07, |
|
"loss": 0.055, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8587976832434592, |
|
"grad_norm": 0.6724955324579563, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"loss": 0.061, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.859596564809267, |
|
"grad_norm": 0.7032826868960578, |
|
"learning_rate": 2.6295915986274513e-07, |
|
"loss": 0.0583, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8603954463750749, |
|
"grad_norm": 0.7608983776246196, |
|
"learning_rate": 2.6001556427166064e-07, |
|
"loss": 0.0731, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8611943279408828, |
|
"grad_norm": 0.6894275105789678, |
|
"learning_rate": 2.570876330029254e-07, |
|
"loss": 0.0682, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8619932095066907, |
|
"grad_norm": 0.7200233498632493, |
|
"learning_rate": 2.541753865317076e-07, |
|
"loss": 0.0685, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8627920910724985, |
|
"grad_norm": 0.6774539347803369, |
|
"learning_rate": 2.512788452234921e-07, |
|
"loss": 0.0675, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8635909726383064, |
|
"grad_norm": 0.7581447101213228, |
|
"learning_rate": 2.483980293339361e-07, |
|
"loss": 0.0716, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8643898542041143, |
|
"grad_norm": 0.7053069803143726, |
|
"learning_rate": 2.4553295900872856e-07, |
|
"loss": 0.0619, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8651887357699221, |
|
"grad_norm": 0.6478546849970215, |
|
"learning_rate": 2.4268365428344737e-07, |
|
"loss": 0.0577, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8659876173357299, |
|
"grad_norm": 0.6662934203390073, |
|
"learning_rate": 2.3985013508342203e-07, |
|
"loss": 0.058, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.8667864989015378, |
|
"grad_norm": 0.6932899856482695, |
|
"learning_rate": 2.370324212235936e-07, |
|
"loss": 0.0604, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8675853804673457, |
|
"grad_norm": 0.7804082086146575, |
|
"learning_rate": 2.3423053240837518e-07, |
|
"loss": 0.0801, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8683842620331536, |
|
"grad_norm": 0.6912072674130147, |
|
"learning_rate": 2.3144448823151394e-07, |
|
"loss": 0.0606, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8691831435989614, |
|
"grad_norm": 0.7051436324198396, |
|
"learning_rate": 2.2867430817595432e-07, |
|
"loss": 0.0632, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8699820251647693, |
|
"grad_norm": 0.7357501611930702, |
|
"learning_rate": 2.2592001161370392e-07, |
|
"loss": 0.0698, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8707809067305772, |
|
"grad_norm": 0.7499428063687636, |
|
"learning_rate": 2.2318161780569558e-07, |
|
"loss": 0.0697, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.871579788296385, |
|
"grad_norm": 0.6763629933404043, |
|
"learning_rate": 2.2045914590165252e-07, |
|
"loss": 0.066, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8723786698621929, |
|
"grad_norm": 0.6698488388739392, |
|
"learning_rate": 2.177526149399556e-07, |
|
"loss": 0.0626, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8731775514280008, |
|
"grad_norm": 0.6977284000368456, |
|
"learning_rate": 2.1506204384751067e-07, |
|
"loss": 0.0695, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8739764329938087, |
|
"grad_norm": 0.7417930261159534, |
|
"learning_rate": 2.1238745143961513e-07, |
|
"loss": 0.0697, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8747753145596165, |
|
"grad_norm": 0.7356179512760564, |
|
"learning_rate": 2.0972885641982605e-07, |
|
"loss": 0.0665, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8755741961254244, |
|
"grad_norm": 0.7922074218678701, |
|
"learning_rate": 2.0708627737983073e-07, |
|
"loss": 0.0724, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8763730776912323, |
|
"grad_norm": 0.6637572696928091, |
|
"learning_rate": 2.044597327993153e-07, |
|
"loss": 0.0613, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8771719592570402, |
|
"grad_norm": 0.6963583688371511, |
|
"learning_rate": 2.0184924104583615e-07, |
|
"loss": 0.0609, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.877970840822848, |
|
"grad_norm": 0.6363025487174039, |
|
"learning_rate": 1.992548203746919e-07, |
|
"loss": 0.0628, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8787697223886559, |
|
"grad_norm": 0.7030310308314632, |
|
"learning_rate": 1.9667648892879532e-07, |
|
"loss": 0.0578, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8795686039544638, |
|
"grad_norm": 0.6918141758573743, |
|
"learning_rate": 1.941142647385469e-07, |
|
"loss": 0.0679, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8803674855202717, |
|
"grad_norm": 0.7414881920940897, |
|
"learning_rate": 1.9156816572170582e-07, |
|
"loss": 0.0681, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8811663670860795, |
|
"grad_norm": 0.6771177978269848, |
|
"learning_rate": 1.8903820968326992e-07, |
|
"loss": 0.0643, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8819652486518874, |
|
"grad_norm": 0.6904228007725154, |
|
"learning_rate": 1.865244143153472e-07, |
|
"loss": 0.0657, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8827641302176952, |
|
"grad_norm": 0.717450106791312, |
|
"learning_rate": 1.840267971970344e-07, |
|
"loss": 0.0645, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.883563011783503, |
|
"grad_norm": 0.6979035372775874, |
|
"learning_rate": 1.8154537579429004e-07, |
|
"loss": 0.0658, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8843618933493109, |
|
"grad_norm": 0.6903449335982373, |
|
"learning_rate": 1.790801674598186e-07, |
|
"loss": 0.0649, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8851607749151188, |
|
"grad_norm": 0.7292202075227271, |
|
"learning_rate": 1.7663118943294367e-07, |
|
"loss": 0.062, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8859596564809267, |
|
"grad_norm": 0.698367465032237, |
|
"learning_rate": 1.74198458839491e-07, |
|
"loss": 0.0678, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8867585380467345, |
|
"grad_norm": 0.6677856153414419, |
|
"learning_rate": 1.7178199269166584e-07, |
|
"loss": 0.0596, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8875574196125424, |
|
"grad_norm": 0.7303795782927686, |
|
"learning_rate": 1.6938180788793557e-07, |
|
"loss": 0.0686, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8883563011783503, |
|
"grad_norm": 0.670118518726451, |
|
"learning_rate": 1.6699792121291248e-07, |
|
"loss": 0.0592, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8891551827441582, |
|
"grad_norm": 0.7462381511634325, |
|
"learning_rate": 1.6463034933723336e-07, |
|
"loss": 0.0768, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.889954064309966, |
|
"grad_norm": 0.6549933917225375, |
|
"learning_rate": 1.6227910881744634e-07, |
|
"loss": 0.0682, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8907529458757739, |
|
"grad_norm": 0.6615109907339011, |
|
"learning_rate": 1.5994421609589388e-07, |
|
"loss": 0.0623, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8915518274415818, |
|
"grad_norm": 0.6974460937921593, |
|
"learning_rate": 1.5762568750059604e-07, |
|
"loss": 0.0669, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8923507090073897, |
|
"grad_norm": 0.6961469220676079, |
|
"learning_rate": 1.553235392451377e-07, |
|
"loss": 0.0646, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8931495905731975, |
|
"grad_norm": 0.6885675903125427, |
|
"learning_rate": 1.5303778742855684e-07, |
|
"loss": 0.0621, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8939484721390054, |
|
"grad_norm": 0.7228656239844607, |
|
"learning_rate": 1.507684480352292e-07, |
|
"loss": 0.0661, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8947473537048133, |
|
"grad_norm": 0.7300348116191165, |
|
"learning_rate": 1.4851553693475768e-07, |
|
"loss": 0.0664, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8955462352706212, |
|
"grad_norm": 0.7416976198498724, |
|
"learning_rate": 1.4627906988186114e-07, |
|
"loss": 0.0676, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.896345116836429, |
|
"grad_norm": 0.6703200074841813, |
|
"learning_rate": 1.4405906251626496e-07, |
|
"loss": 0.0621, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8971439984022369, |
|
"grad_norm": 0.6213139962942775, |
|
"learning_rate": 1.4185553036259097e-07, |
|
"loss": 0.0604, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8979428799680448, |
|
"grad_norm": 0.686848444095792, |
|
"learning_rate": 1.3966848883024936e-07, |
|
"loss": 0.0678, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8987417615338527, |
|
"grad_norm": 0.7053960643095338, |
|
"learning_rate": 1.3749795321332887e-07, |
|
"loss": 0.0656, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8995406430996605, |
|
"grad_norm": 0.6970965606669406, |
|
"learning_rate": 1.3534393869049367e-07, |
|
"loss": 0.0654, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9003395246654683, |
|
"grad_norm": 0.6942277449574915, |
|
"learning_rate": 1.3320646032487394e-07, |
|
"loss": 0.0653, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9011384062312762, |
|
"grad_norm": 0.6744444320008497, |
|
"learning_rate": 1.3108553306396265e-07, |
|
"loss": 0.0666, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.901937287797084, |
|
"grad_norm": 0.6873741371219347, |
|
"learning_rate": 1.289811717395087e-07, |
|
"loss": 0.069, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.9027361693628919, |
|
"grad_norm": 0.6982296762549293, |
|
"learning_rate": 1.2689339106741529e-07, |
|
"loss": 0.0637, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9035350509286998, |
|
"grad_norm": 0.6785495778136779, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"loss": 0.0643, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9043339324945077, |
|
"grad_norm": 0.734961547157777, |
|
"learning_rate": 1.227676299640751e-07, |
|
"loss": 0.0677, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9051328140603155, |
|
"grad_norm": 0.7425938420645702, |
|
"learning_rate": 1.2072967838448053e-07, |
|
"loss": 0.0721, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9059316956261234, |
|
"grad_norm": 0.6651013770128319, |
|
"learning_rate": 1.1870836516034878e-07, |
|
"loss": 0.0601, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9067305771919313, |
|
"grad_norm": 0.6809615376797185, |
|
"learning_rate": 1.1670370442682461e-07, |
|
"loss": 0.0627, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9075294587577392, |
|
"grad_norm": 0.7543516531209598, |
|
"learning_rate": 1.1471571020259919e-07, |
|
"loss": 0.0703, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.908328340323547, |
|
"grad_norm": 0.7413805857709801, |
|
"learning_rate": 1.1274439638981532e-07, |
|
"loss": 0.0616, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9091272218893549, |
|
"grad_norm": 0.7298273683632146, |
|
"learning_rate": 1.1078977677396824e-07, |
|
"loss": 0.0661, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9099261034551628, |
|
"grad_norm": 0.7212675180454935, |
|
"learning_rate": 1.0885186502381018e-07, |
|
"loss": 0.0635, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.9107249850209707, |
|
"grad_norm": 0.7198568372151669, |
|
"learning_rate": 1.0693067469125323e-07, |
|
"loss": 0.0783, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9115238665867785, |
|
"grad_norm": 0.6613205843381404, |
|
"learning_rate": 1.0502621921127776e-07, |
|
"loss": 0.0624, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9123227481525864, |
|
"grad_norm": 0.7217970551572686, |
|
"learning_rate": 1.031385119018355e-07, |
|
"loss": 0.0674, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9131216297183943, |
|
"grad_norm": 0.6811880251542707, |
|
"learning_rate": 1.0126756596375687e-07, |
|
"loss": 0.0694, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9139205112842022, |
|
"grad_norm": 0.6819091612837742, |
|
"learning_rate": 9.94133944806594e-08, |
|
"loss": 0.0622, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.91471939285001, |
|
"grad_norm": 0.6431918047898731, |
|
"learning_rate": 9.757601041885694e-08, |
|
"loss": 0.059, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9155182744158179, |
|
"grad_norm": 0.6570994880947287, |
|
"learning_rate": 9.575542662726756e-08, |
|
"loss": 0.0589, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9163171559816258, |
|
"grad_norm": 0.7107622134999424, |
|
"learning_rate": 9.395165583732379e-08, |
|
"loss": 0.069, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.9171160375474335, |
|
"grad_norm": 0.7012113103311711, |
|
"learning_rate": 9.216471066288396e-08, |
|
"loss": 0.0657, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.9179149191132414, |
|
"grad_norm": 0.6983067783638615, |
|
"learning_rate": 9.03946036001449e-08, |
|
"loss": 0.0641, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.9187138006790493, |
|
"grad_norm": 0.6830731940037315, |
|
"learning_rate": 8.864134702755294e-08, |
|
"loss": 0.0694, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9195126822448572, |
|
"grad_norm": 0.725735904073755, |
|
"learning_rate": 8.69049532057184e-08, |
|
"loss": 0.0687, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.920311563810665, |
|
"grad_norm": 0.6978361407432903, |
|
"learning_rate": 8.518543427732951e-08, |
|
"loss": 0.0635, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.9211104453764729, |
|
"grad_norm": 0.6789409308489616, |
|
"learning_rate": 8.348280226706723e-08, |
|
"loss": 0.0666, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.9219093269422808, |
|
"grad_norm": 0.664337510813219, |
|
"learning_rate": 8.179706908152202e-08, |
|
"loss": 0.0598, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.9227082085080887, |
|
"grad_norm": 0.66126807605164, |
|
"learning_rate": 8.012824650910938e-08, |
|
"loss": 0.0613, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9235070900738965, |
|
"grad_norm": 0.6873606113244728, |
|
"learning_rate": 7.84763462199889e-08, |
|
"loss": 0.0679, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.9243059716397044, |
|
"grad_norm": 0.7083619043314441, |
|
"learning_rate": 7.684137976598089e-08, |
|
"loss": 0.0663, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.9251048532055123, |
|
"grad_norm": 0.7168857530641378, |
|
"learning_rate": 7.522335858048707e-08, |
|
"loss": 0.0697, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.9259037347713202, |
|
"grad_norm": 0.7075639541686788, |
|
"learning_rate": 7.362229397840981e-08, |
|
"loss": 0.0651, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.926702616337128, |
|
"grad_norm": 0.6669509766718363, |
|
"learning_rate": 7.203819715607352e-08, |
|
"loss": 0.0636, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9275014979029359, |
|
"grad_norm": 0.6946840882259397, |
|
"learning_rate": 7.047107919114588e-08, |
|
"loss": 0.0677, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9283003794687438, |
|
"grad_norm": 0.703242279454599, |
|
"learning_rate": 6.892095104256063e-08, |
|
"loss": 0.0728, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.9290992610345516, |
|
"grad_norm": 0.712084470546071, |
|
"learning_rate": 6.738782355044048e-08, |
|
"loss": 0.0555, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.9298981426003595, |
|
"grad_norm": 0.6960123967278037, |
|
"learning_rate": 6.587170743602239e-08, |
|
"loss": 0.0611, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.9306970241661674, |
|
"grad_norm": 0.6634823802335459, |
|
"learning_rate": 6.437261330158206e-08, |
|
"loss": 0.0688, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9314959057319753, |
|
"grad_norm": 0.6768814293109752, |
|
"learning_rate": 6.289055163035851e-08, |
|
"loss": 0.0684, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.9322947872977831, |
|
"grad_norm": 0.6890187647538402, |
|
"learning_rate": 6.142553278648239e-08, |
|
"loss": 0.0658, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.933093668863591, |
|
"grad_norm": 0.6771130077993985, |
|
"learning_rate": 5.997756701490388e-08, |
|
"loss": 0.0605, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.9338925504293989, |
|
"grad_norm": 0.7082091564066306, |
|
"learning_rate": 5.8546664441319346e-08, |
|
"loss": 0.0659, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.9346914319952067, |
|
"grad_norm": 0.7072470961017815, |
|
"learning_rate": 5.7132835072101486e-08, |
|
"loss": 0.0629, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9354903135610145, |
|
"grad_norm": 0.7300223405209104, |
|
"learning_rate": 5.573608879422876e-08, |
|
"loss": 0.0697, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.9362891951268224, |
|
"grad_norm": 0.7374743059950585, |
|
"learning_rate": 5.435643537521767e-08, |
|
"loss": 0.0709, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.9370880766926303, |
|
"grad_norm": 0.703493024507941, |
|
"learning_rate": 5.2993884463053425e-08, |
|
"loss": 0.0663, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.9378869582584382, |
|
"grad_norm": 0.7044199552247555, |
|
"learning_rate": 5.164844558612131e-08, |
|
"loss": 0.0653, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.938685839824246, |
|
"grad_norm": 0.6997176122396134, |
|
"learning_rate": 5.032012815314291e-08, |
|
"loss": 0.064, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9394847213900539, |
|
"grad_norm": 0.7051848938605311, |
|
"learning_rate": 4.9008941453107527e-08, |
|
"loss": 0.0672, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9402836029558618, |
|
"grad_norm": 0.6932942079850664, |
|
"learning_rate": 4.7714894655209174e-08, |
|
"loss": 0.0695, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.9410824845216696, |
|
"grad_norm": 0.6990251492223958, |
|
"learning_rate": 4.6437996808781086e-08, |
|
"loss": 0.0675, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.9418813660874775, |
|
"grad_norm": 0.6961097084977305, |
|
"learning_rate": 4.5178256843233235e-08, |
|
"loss": 0.0662, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.9426802476532854, |
|
"grad_norm": 0.7074362157190855, |
|
"learning_rate": 4.393568356799022e-08, |
|
"loss": 0.0659, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9434791292190933, |
|
"grad_norm": 0.671938873807077, |
|
"learning_rate": 4.271028567242819e-08, |
|
"loss": 0.0574, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.9442780107849011, |
|
"grad_norm": 0.6666315152137079, |
|
"learning_rate": 4.1502071725815216e-08, |
|
"loss": 0.0641, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.945076892350709, |
|
"grad_norm": 0.6501129464536826, |
|
"learning_rate": 4.03110501772519e-08, |
|
"loss": 0.0581, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.9458757739165169, |
|
"grad_norm": 0.7468643186181524, |
|
"learning_rate": 3.91372293556111e-08, |
|
"loss": 0.0714, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.9466746554823248, |
|
"grad_norm": 0.7365552924599144, |
|
"learning_rate": 3.798061746947995e-08, |
|
"loss": 0.0738, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9474735370481326, |
|
"grad_norm": 0.7290239770751575, |
|
"learning_rate": 3.684122260710243e-08, |
|
"loss": 0.0691, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.9482724186139405, |
|
"grad_norm": 0.7436081143712903, |
|
"learning_rate": 3.571905273632381e-08, |
|
"loss": 0.0706, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.9490713001797484, |
|
"grad_norm": 0.7186680096892166, |
|
"learning_rate": 3.461411570453377e-08, |
|
"loss": 0.0702, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9498701817455563, |
|
"grad_norm": 0.6811106939384963, |
|
"learning_rate": 3.352641923861144e-08, |
|
"loss": 0.0629, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.9506690633113641, |
|
"grad_norm": 0.6212175291097755, |
|
"learning_rate": 3.245597094487213e-08, |
|
"loss": 0.0574, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.951467944877172, |
|
"grad_norm": 0.692435690978003, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"loss": 0.069, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9522668264429798, |
|
"grad_norm": 0.6827344843106753, |
|
"learning_rate": 3.0366848696066207e-08, |
|
"loss": 0.0661, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.9530657080087876, |
|
"grad_norm": 0.7180605472986129, |
|
"learning_rate": 2.934818935033501e-08, |
|
"loss": 0.0779, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.9538645895745955, |
|
"grad_norm": 0.7019886548940459, |
|
"learning_rate": 2.834680739535578e-08, |
|
"loss": 0.0692, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9546634711404034, |
|
"grad_norm": 0.6873247468863092, |
|
"learning_rate": 2.736270983384276e-08, |
|
"loss": 0.0678, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9554623527062113, |
|
"grad_norm": 0.6594613297149347, |
|
"learning_rate": 2.6395903547638825e-08, |
|
"loss": 0.0579, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9562612342720191, |
|
"grad_norm": 0.7097562956461467, |
|
"learning_rate": 2.544639529766829e-08, |
|
"loss": 0.0703, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.957060115837827, |
|
"grad_norm": 0.6788638450102409, |
|
"learning_rate": 2.451419172388947e-08, |
|
"loss": 0.0712, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.9578589974036349, |
|
"grad_norm": 0.6941431715350456, |
|
"learning_rate": 2.3599299345248294e-08, |
|
"loss": 0.0649, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.9586578789694428, |
|
"grad_norm": 0.6833715565456336, |
|
"learning_rate": 2.2701724559632542e-08, |
|
"loss": 0.0646, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9594567605352506, |
|
"grad_norm": 0.686174057700068, |
|
"learning_rate": 2.1821473643827142e-08, |
|
"loss": 0.069, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.9602556421010585, |
|
"grad_norm": 0.720252925574729, |
|
"learning_rate": 2.095855275347086e-08, |
|
"loss": 0.0664, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9610545236668664, |
|
"grad_norm": 0.6866213263988475, |
|
"learning_rate": 2.011296792301165e-08, |
|
"loss": 0.0649, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9618534052326743, |
|
"grad_norm": 0.6823240540115703, |
|
"learning_rate": 1.928472506566692e-08, |
|
"loss": 0.0575, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.9626522867984821, |
|
"grad_norm": 0.6850391701055302, |
|
"learning_rate": 1.847382997337943e-08, |
|
"loss": 0.0608, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.96345116836429, |
|
"grad_norm": 0.6581036399279098, |
|
"learning_rate": 1.768028831677926e-08, |
|
"loss": 0.061, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9642500499300979, |
|
"grad_norm": 0.719520898830784, |
|
"learning_rate": 1.6904105645142443e-08, |
|
"loss": 0.0623, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9650489314959058, |
|
"grad_norm": 0.6445948484303641, |
|
"learning_rate": 1.6145287386353236e-08, |
|
"loss": 0.0603, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.9658478130617136, |
|
"grad_norm": 0.6888857806767628, |
|
"learning_rate": 1.5403838846864694e-08, |
|
"loss": 0.0697, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.9666466946275215, |
|
"grad_norm": 0.7515695400681041, |
|
"learning_rate": 1.46797652116637e-08, |
|
"loss": 0.0622, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9674455761933294, |
|
"grad_norm": 0.6790673279650511, |
|
"learning_rate": 1.3973071544233219e-08, |
|
"loss": 0.057, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.9682444577591373, |
|
"grad_norm": 0.6443579913417355, |
|
"learning_rate": 1.3283762786517051e-08, |
|
"loss": 0.0575, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9690433393249451, |
|
"grad_norm": 0.7049514598463905, |
|
"learning_rate": 1.2611843758885412e-08, |
|
"loss": 0.0662, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9698422208907529, |
|
"grad_norm": 0.6778898956156996, |
|
"learning_rate": 1.1957319160101621e-08, |
|
"loss": 0.06, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9706411024565608, |
|
"grad_norm": 0.7373312914103637, |
|
"learning_rate": 1.132019356728853e-08, |
|
"loss": 0.0657, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9714399840223686, |
|
"grad_norm": 0.6786552950388112, |
|
"learning_rate": 1.0700471435897142e-08, |
|
"loss": 0.0575, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9722388655881765, |
|
"grad_norm": 0.6651591444631889, |
|
"learning_rate": 1.0098157099674988e-08, |
|
"loss": 0.0634, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9730377471539844, |
|
"grad_norm": 0.6701675123122681, |
|
"learning_rate": 9.513254770636138e-09, |
|
"loss": 0.0611, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9738366287197923, |
|
"grad_norm": 0.7844222447677253, |
|
"learning_rate": 8.945768539031785e-09, |
|
"loss": 0.0678, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9746355102856001, |
|
"grad_norm": 0.6919149315269185, |
|
"learning_rate": 8.395702373321101e-09, |
|
"loss": 0.0669, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.975434391851408, |
|
"grad_norm": 0.7202289235797771, |
|
"learning_rate": 7.863060120144316e-09, |
|
"loss": 0.0671, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9762332734172159, |
|
"grad_norm": 0.7481230246681578, |
|
"learning_rate": 7.3478455042946814e-09, |
|
"loss": 0.0707, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9770321549830238, |
|
"grad_norm": 0.6638284936549793, |
|
"learning_rate": 6.850062128694046e-09, |
|
"loss": 0.0617, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9778310365488316, |
|
"grad_norm": 0.7160309081965106, |
|
"learning_rate": 6.369713474366213e-09, |
|
"loss": 0.0721, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9786299181146395, |
|
"grad_norm": 0.7047500452248895, |
|
"learning_rate": 5.906802900412789e-09, |
|
"loss": 0.0683, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9794287996804474, |
|
"grad_norm": 0.7197584007832565, |
|
"learning_rate": 5.461333643990985e-09, |
|
"loss": 0.066, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9802276812462553, |
|
"grad_norm": 0.7026888743515561, |
|
"learning_rate": 5.033308820289185e-09, |
|
"loss": 0.0666, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9810265628120631, |
|
"grad_norm": 0.6567740354974405, |
|
"learning_rate": 4.622731422505855e-09, |
|
"loss": 0.0635, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.981825444377871, |
|
"grad_norm": 0.7601422587361972, |
|
"learning_rate": 4.229604321829561e-09, |
|
"loss": 0.0673, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9826243259436789, |
|
"grad_norm": 0.6905698857901205, |
|
"learning_rate": 3.853930267417316e-09, |
|
"loss": 0.064, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9834232075094868, |
|
"grad_norm": 0.664904341590391, |
|
"learning_rate": 3.495711886376818e-09, |
|
"loss": 0.0573, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.9842220890752946, |
|
"grad_norm": 0.6960109391146085, |
|
"learning_rate": 3.154951683746743e-09, |
|
"loss": 0.0645, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9850209706411025, |
|
"grad_norm": 0.6882272325779095, |
|
"learning_rate": 2.8316520424800933e-09, |
|
"loss": 0.0646, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9858198522069104, |
|
"grad_norm": 0.6524760001643685, |
|
"learning_rate": 2.5258152234272637e-09, |
|
"loss": 0.0637, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.9866187337727182, |
|
"grad_norm": 0.6953143167090238, |
|
"learning_rate": 2.237443365320502e-09, |
|
"loss": 0.0674, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.987417615338526, |
|
"grad_norm": 0.6605513970439745, |
|
"learning_rate": 1.9665384847583622e-09, |
|
"loss": 0.0643, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9882164969043339, |
|
"grad_norm": 0.7309814335935096, |
|
"learning_rate": 1.7131024761923854e-09, |
|
"loss": 0.0657, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9890153784701418, |
|
"grad_norm": 0.6825626492438529, |
|
"learning_rate": 1.4771371119126631e-09, |
|
"loss": 0.0681, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9898142600359496, |
|
"grad_norm": 0.7310094281209002, |
|
"learning_rate": 1.2586440420372936e-09, |
|
"loss": 0.073, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9906131416017575, |
|
"grad_norm": 0.7203040005251995, |
|
"learning_rate": 1.0576247944985018e-09, |
|
"loss": 0.0584, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9914120231675654, |
|
"grad_norm": 0.7002852230433981, |
|
"learning_rate": 8.740807750345914e-10, |
|
"loss": 0.0687, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9922109047333733, |
|
"grad_norm": 0.7406493969310309, |
|
"learning_rate": 7.080132671774542e-10, |
|
"loss": 0.0748, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9930097862991811, |
|
"grad_norm": 0.6384302004543542, |
|
"learning_rate": 5.59423432245354e-10, |
|
"loss": 0.0579, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.993808667864989, |
|
"grad_norm": 0.7209129050951093, |
|
"learning_rate": 4.2831230933487735e-10, |
|
"loss": 0.0721, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.9946075494307969, |
|
"grad_norm": 0.6783623313747392, |
|
"learning_rate": 3.146808153123293e-10, |
|
"loss": 0.0668, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9954064309966048, |
|
"grad_norm": 0.6718991145044905, |
|
"learning_rate": 2.1852974480846002e-10, |
|
"loss": 0.0649, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9962053125624126, |
|
"grad_norm": 0.698720262635623, |
|
"learning_rate": 1.398597702123583e-10, |
|
"loss": 0.0659, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9970041941282205, |
|
"grad_norm": 0.6882411009274608, |
|
"learning_rate": 7.867144166728846e-11, |
|
"loss": 0.0668, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9978030756940284, |
|
"grad_norm": 0.7154829911093414, |
|
"learning_rate": 3.496518706597174e-11, |
|
"loss": 0.0678, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9986019572598362, |
|
"grad_norm": 0.6425326575457246, |
|
"learning_rate": 8.74131204864348e-12, |
|
"loss": 0.0605, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9994008388256441, |
|
"grad_norm": 0.6736534858888602, |
|
"learning_rate": 0.0, |
|
"loss": 0.0646, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9994008388256441, |
|
"step": 1251, |
|
"total_flos": 161979572551680.0, |
|
"train_loss": 0.08634093818035152, |
|
"train_runtime": 5655.3143, |
|
"train_samples_per_second": 14.165, |
|
"train_steps_per_second": 0.221 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1251, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 161979572551680.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|