|
{ |
|
"best_global_step": 27454, |
|
"best_metric": 0.9930046909719364, |
|
"best_model_checkpoint": "./checkpoints/checkpoint-27454", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 41181, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003642456472645152, |
|
"grad_norm": 3.125025987625122, |
|
"learning_rate": 1.9976202617712054e-05, |
|
"loss": 0.4704, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007284912945290304, |
|
"grad_norm": 5.938050270080566, |
|
"learning_rate": 1.9951919574561086e-05, |
|
"loss": 0.1739, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.010927369417935456, |
|
"grad_norm": 29.14204216003418, |
|
"learning_rate": 1.9927636531410118e-05, |
|
"loss": 0.1834, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.014569825890580607, |
|
"grad_norm": 0.31962281465530396, |
|
"learning_rate": 1.990335348825915e-05, |
|
"loss": 0.1686, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01821228236322576, |
|
"grad_norm": 11.937419891357422, |
|
"learning_rate": 1.9879070445108182e-05, |
|
"loss": 0.2263, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.021854738835870913, |
|
"grad_norm": 2.3129444122314453, |
|
"learning_rate": 1.9854787401957215e-05, |
|
"loss": 0.1258, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.025497195308516064, |
|
"grad_norm": 26.188411712646484, |
|
"learning_rate": 1.9830504358806247e-05, |
|
"loss": 0.1496, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.029139651781161215, |
|
"grad_norm": 0.12087996304035187, |
|
"learning_rate": 1.980622131565528e-05, |
|
"loss": 0.1263, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03278210825380637, |
|
"grad_norm": 0.7965516448020935, |
|
"learning_rate": 1.978193827250431e-05, |
|
"loss": 0.1308, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03642456472645152, |
|
"grad_norm": 0.20070864260196686, |
|
"learning_rate": 1.9757655229353346e-05, |
|
"loss": 0.1313, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04006702119909667, |
|
"grad_norm": 0.1732178032398224, |
|
"learning_rate": 1.973337218620238e-05, |
|
"loss": 0.0998, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.043709477671741825, |
|
"grad_norm": 18.084930419921875, |
|
"learning_rate": 1.970908914305141e-05, |
|
"loss": 0.1161, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04735193414438697, |
|
"grad_norm": 22.9597110748291, |
|
"learning_rate": 1.9684806099900443e-05, |
|
"loss": 0.1182, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05099439061703213, |
|
"grad_norm": 18.54186248779297, |
|
"learning_rate": 1.9660523056749475e-05, |
|
"loss": 0.1551, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05463684708967728, |
|
"grad_norm": 17.401439666748047, |
|
"learning_rate": 1.9636240013598507e-05, |
|
"loss": 0.0847, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05827930356232243, |
|
"grad_norm": 0.048819124698638916, |
|
"learning_rate": 1.961195697044754e-05, |
|
"loss": 0.0896, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.061921760034967584, |
|
"grad_norm": 9.85204792022705, |
|
"learning_rate": 1.958767392729657e-05, |
|
"loss": 0.1582, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06556421650761274, |
|
"grad_norm": 32.77293014526367, |
|
"learning_rate": 1.9563390884145603e-05, |
|
"loss": 0.094, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06920667298025789, |
|
"grad_norm": 0.10893120616674423, |
|
"learning_rate": 1.9539107840994636e-05, |
|
"loss": 0.0656, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07284912945290303, |
|
"grad_norm": 0.5901679396629333, |
|
"learning_rate": 1.9514824797843668e-05, |
|
"loss": 0.101, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0764915859255482, |
|
"grad_norm": 37.66738510131836, |
|
"learning_rate": 1.94905417546927e-05, |
|
"loss": 0.1199, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08013404239819334, |
|
"grad_norm": 59.68627166748047, |
|
"learning_rate": 1.9466258711541732e-05, |
|
"loss": 0.0967, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08377649887083849, |
|
"grad_norm": 19.41754150390625, |
|
"learning_rate": 1.9441975668390764e-05, |
|
"loss": 0.08, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08741895534348365, |
|
"grad_norm": 2.025850534439087, |
|
"learning_rate": 1.9417692625239796e-05, |
|
"loss": 0.1024, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0910614118161288, |
|
"grad_norm": 8.132311820983887, |
|
"learning_rate": 1.9393409582088828e-05, |
|
"loss": 0.1225, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.09470386828877395, |
|
"grad_norm": 18.516698837280273, |
|
"learning_rate": 1.936912653893786e-05, |
|
"loss": 0.0846, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09834632476141911, |
|
"grad_norm": 19.26643180847168, |
|
"learning_rate": 1.9344843495786893e-05, |
|
"loss": 0.0825, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.10198878123406425, |
|
"grad_norm": 3.29164719581604, |
|
"learning_rate": 1.9320560452635925e-05, |
|
"loss": 0.1053, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1056312377067094, |
|
"grad_norm": 53.27655792236328, |
|
"learning_rate": 1.9296277409484957e-05, |
|
"loss": 0.0704, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.10927369417935456, |
|
"grad_norm": 34.375694274902344, |
|
"learning_rate": 1.927199436633399e-05, |
|
"loss": 0.1131, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11291615065199971, |
|
"grad_norm": 0.2655383050441742, |
|
"learning_rate": 1.924771132318302e-05, |
|
"loss": 0.0903, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.11655860712464486, |
|
"grad_norm": 0.04522942379117012, |
|
"learning_rate": 1.9223428280032057e-05, |
|
"loss": 0.0687, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12020106359729, |
|
"grad_norm": 0.2897571921348572, |
|
"learning_rate": 1.919914523688109e-05, |
|
"loss": 0.0686, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.12384352006993517, |
|
"grad_norm": 0.14382310211658478, |
|
"learning_rate": 1.917486219373012e-05, |
|
"loss": 0.0723, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.12748597654258031, |
|
"grad_norm": 0.36935460567474365, |
|
"learning_rate": 1.9150579150579153e-05, |
|
"loss": 0.0854, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.13112843301522548, |
|
"grad_norm": 0.024039477109909058, |
|
"learning_rate": 1.9126296107428185e-05, |
|
"loss": 0.0577, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.1347708894878706, |
|
"grad_norm": 0.06740175932645798, |
|
"learning_rate": 1.9102013064277217e-05, |
|
"loss": 0.1057, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.13841334596051577, |
|
"grad_norm": 29.55316162109375, |
|
"learning_rate": 1.907773002112625e-05, |
|
"loss": 0.0804, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14205580243316093, |
|
"grad_norm": 0.047345120459795, |
|
"learning_rate": 1.905344697797528e-05, |
|
"loss": 0.0687, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.14569825890580607, |
|
"grad_norm": 55.5791130065918, |
|
"learning_rate": 1.9029163934824314e-05, |
|
"loss": 0.0773, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14934071537845123, |
|
"grad_norm": 0.500926673412323, |
|
"learning_rate": 1.9004880891673346e-05, |
|
"loss": 0.0583, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.1529831718510964, |
|
"grad_norm": 7.522885799407959, |
|
"learning_rate": 1.8980597848522378e-05, |
|
"loss": 0.1171, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15662562832374152, |
|
"grad_norm": 13.693501472473145, |
|
"learning_rate": 1.895631480537141e-05, |
|
"loss": 0.0528, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.16026808479638668, |
|
"grad_norm": 0.020046068355441093, |
|
"learning_rate": 1.8932031762220442e-05, |
|
"loss": 0.0541, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.16391054126903185, |
|
"grad_norm": 53.9521484375, |
|
"learning_rate": 1.8907748719069478e-05, |
|
"loss": 0.0746, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.16755299774167698, |
|
"grad_norm": 23.165634155273438, |
|
"learning_rate": 1.888346567591851e-05, |
|
"loss": 0.1079, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17119545421432214, |
|
"grad_norm": 0.4626220166683197, |
|
"learning_rate": 1.8859182632767542e-05, |
|
"loss": 0.0741, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.1748379106869673, |
|
"grad_norm": 0.025169799104332924, |
|
"learning_rate": 1.8834899589616574e-05, |
|
"loss": 0.0843, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.17848036715961244, |
|
"grad_norm": 2.095708131790161, |
|
"learning_rate": 1.8810616546465606e-05, |
|
"loss": 0.0648, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.1821228236322576, |
|
"grad_norm": 0.03240898996591568, |
|
"learning_rate": 1.8786333503314638e-05, |
|
"loss": 0.0514, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18576528010490276, |
|
"grad_norm": 0.051138218492269516, |
|
"learning_rate": 1.876205046016367e-05, |
|
"loss": 0.0842, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.1894077365775479, |
|
"grad_norm": 2.507848024368286, |
|
"learning_rate": 1.8737767417012702e-05, |
|
"loss": 0.0662, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.19305019305019305, |
|
"grad_norm": 0.06640040874481201, |
|
"learning_rate": 1.8713484373861734e-05, |
|
"loss": 0.0521, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.19669264952283821, |
|
"grad_norm": 49.450050354003906, |
|
"learning_rate": 1.8689201330710767e-05, |
|
"loss": 0.0414, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.20033510599548335, |
|
"grad_norm": 0.41120338439941406, |
|
"learning_rate": 1.86649182875598e-05, |
|
"loss": 0.0775, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.2039775624681285, |
|
"grad_norm": 43.489501953125, |
|
"learning_rate": 1.864063524440883e-05, |
|
"loss": 0.0838, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.20762001894077367, |
|
"grad_norm": 23.450210571289062, |
|
"learning_rate": 1.8616352201257863e-05, |
|
"loss": 0.0529, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.2112624754134188, |
|
"grad_norm": 0.03386552631855011, |
|
"learning_rate": 1.8592069158106895e-05, |
|
"loss": 0.045, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.21490493188606397, |
|
"grad_norm": 0.12325847148895264, |
|
"learning_rate": 1.8567786114955927e-05, |
|
"loss": 0.0501, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.21854738835870913, |
|
"grad_norm": 0.049129586666822433, |
|
"learning_rate": 1.854350307180496e-05, |
|
"loss": 0.0803, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22218984483135426, |
|
"grad_norm": 42.7215690612793, |
|
"learning_rate": 1.851922002865399e-05, |
|
"loss": 0.0345, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.22583230130399942, |
|
"grad_norm": 2.0026121139526367, |
|
"learning_rate": 1.8494936985503024e-05, |
|
"loss": 0.0902, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.22947475777664456, |
|
"grad_norm": 0.048264894634485245, |
|
"learning_rate": 1.8470653942352056e-05, |
|
"loss": 0.0759, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.23311721424928972, |
|
"grad_norm": 0.8339343667030334, |
|
"learning_rate": 1.8446370899201088e-05, |
|
"loss": 0.0836, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.23675967072193488, |
|
"grad_norm": 0.0449523888528347, |
|
"learning_rate": 1.842208785605012e-05, |
|
"loss": 0.0636, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.24040212719458, |
|
"grad_norm": 0.08337419480085373, |
|
"learning_rate": 1.8397804812899152e-05, |
|
"loss": 0.0648, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.24404458366722517, |
|
"grad_norm": 2.0222795009613037, |
|
"learning_rate": 1.8373521769748188e-05, |
|
"loss": 0.0827, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.24768704013987033, |
|
"grad_norm": 0.028120465576648712, |
|
"learning_rate": 1.834923872659722e-05, |
|
"loss": 0.0607, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.25132949661251547, |
|
"grad_norm": 1.3402409553527832, |
|
"learning_rate": 1.8324955683446252e-05, |
|
"loss": 0.0651, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.25497195308516063, |
|
"grad_norm": 0.026899676769971848, |
|
"learning_rate": 1.8300672640295284e-05, |
|
"loss": 0.093, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2586144095578058, |
|
"grad_norm": 0.024266954511404037, |
|
"learning_rate": 1.8276389597144316e-05, |
|
"loss": 0.0465, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.26225686603045095, |
|
"grad_norm": 0.16311734914779663, |
|
"learning_rate": 1.8252106553993348e-05, |
|
"loss": 0.0911, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.2658993225030961, |
|
"grad_norm": 0.02006707340478897, |
|
"learning_rate": 1.822782351084238e-05, |
|
"loss": 0.0606, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.2695417789757412, |
|
"grad_norm": 0.0868283286690712, |
|
"learning_rate": 1.8203540467691412e-05, |
|
"loss": 0.0545, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.2731842354483864, |
|
"grad_norm": 0.014719155617058277, |
|
"learning_rate": 1.8179257424540445e-05, |
|
"loss": 0.0503, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.27682669192103154, |
|
"grad_norm": 14.499321937561035, |
|
"learning_rate": 1.8154974381389477e-05, |
|
"loss": 0.0677, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.2804691483936767, |
|
"grad_norm": 0.010821194387972355, |
|
"learning_rate": 1.813069133823851e-05, |
|
"loss": 0.0846, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.28411160486632187, |
|
"grad_norm": 0.015758154913783073, |
|
"learning_rate": 1.810640829508754e-05, |
|
"loss": 0.0617, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.287754061338967, |
|
"grad_norm": 0.0034217978827655315, |
|
"learning_rate": 1.8082125251936573e-05, |
|
"loss": 0.04, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.29139651781161213, |
|
"grad_norm": 0.09612055867910385, |
|
"learning_rate": 1.805784220878561e-05, |
|
"loss": 0.0835, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2950389742842573, |
|
"grad_norm": 0.019556934013962746, |
|
"learning_rate": 1.803355916563464e-05, |
|
"loss": 0.0492, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.29868143075690246, |
|
"grad_norm": 0.030556684359908104, |
|
"learning_rate": 1.8009276122483673e-05, |
|
"loss": 0.0711, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3023238872295476, |
|
"grad_norm": 0.009000319987535477, |
|
"learning_rate": 1.7984993079332705e-05, |
|
"loss": 0.0401, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.3059663437021928, |
|
"grad_norm": 0.007333697285503149, |
|
"learning_rate": 1.7960710036181737e-05, |
|
"loss": 0.0588, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3096088001748379, |
|
"grad_norm": 1.0338785648345947, |
|
"learning_rate": 1.793642699303077e-05, |
|
"loss": 0.0344, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.31325125664748305, |
|
"grad_norm": 6.352263927459717, |
|
"learning_rate": 1.79121439498798e-05, |
|
"loss": 0.0718, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.3168937131201282, |
|
"grad_norm": 0.0021226832177489996, |
|
"learning_rate": 1.7887860906728833e-05, |
|
"loss": 0.033, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.32053616959277337, |
|
"grad_norm": 2.111945152282715, |
|
"learning_rate": 1.7863577863577866e-05, |
|
"loss": 0.0815, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.32417862606541853, |
|
"grad_norm": 0.0793221965432167, |
|
"learning_rate": 1.7839294820426898e-05, |
|
"loss": 0.0709, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.3278210825380637, |
|
"grad_norm": 26.51352310180664, |
|
"learning_rate": 1.781501177727593e-05, |
|
"loss": 0.0916, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3314635390107088, |
|
"grad_norm": 0.0313892588019371, |
|
"learning_rate": 1.7790728734124962e-05, |
|
"loss": 0.0509, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.33510599548335396, |
|
"grad_norm": 0.09035217761993408, |
|
"learning_rate": 1.7766445690973994e-05, |
|
"loss": 0.0473, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.3387484519559991, |
|
"grad_norm": 0.10966762155294418, |
|
"learning_rate": 1.7742162647823026e-05, |
|
"loss": 0.0612, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.3423909084286443, |
|
"grad_norm": 2.552945613861084, |
|
"learning_rate": 1.771787960467206e-05, |
|
"loss": 0.0219, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.34603336490128944, |
|
"grad_norm": 0.22484628856182098, |
|
"learning_rate": 1.769359656152109e-05, |
|
"loss": 0.1118, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.3496758213739346, |
|
"grad_norm": 0.08616355806589127, |
|
"learning_rate": 1.7669313518370123e-05, |
|
"loss": 0.0759, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.3533182778465797, |
|
"grad_norm": 0.04104432463645935, |
|
"learning_rate": 1.7645030475219155e-05, |
|
"loss": 0.0524, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.35696073431922487, |
|
"grad_norm": 19.837488174438477, |
|
"learning_rate": 1.7620747432068187e-05, |
|
"loss": 0.0566, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.36060319079187003, |
|
"grad_norm": 3.257078170776367, |
|
"learning_rate": 1.759646438891722e-05, |
|
"loss": 0.0426, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.3642456472645152, |
|
"grad_norm": 0.013087151572108269, |
|
"learning_rate": 1.757218134576625e-05, |
|
"loss": 0.0271, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36788810373716035, |
|
"grad_norm": 0.11012174189090729, |
|
"learning_rate": 1.7547898302615287e-05, |
|
"loss": 0.0528, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.3715305602098055, |
|
"grad_norm": 3.975999355316162, |
|
"learning_rate": 1.752361525946432e-05, |
|
"loss": 0.0497, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.3751730166824506, |
|
"grad_norm": 26.727527618408203, |
|
"learning_rate": 1.749933221631335e-05, |
|
"loss": 0.0493, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.3788154731550958, |
|
"grad_norm": 0.2996189296245575, |
|
"learning_rate": 1.7475049173162383e-05, |
|
"loss": 0.0485, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.38245792962774094, |
|
"grad_norm": 0.047304268926382065, |
|
"learning_rate": 1.7450766130011415e-05, |
|
"loss": 0.0576, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 16.381393432617188, |
|
"learning_rate": 1.7426483086860447e-05, |
|
"loss": 0.0399, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.38974284257303127, |
|
"grad_norm": 0.010812154039740562, |
|
"learning_rate": 1.740220004370948e-05, |
|
"loss": 0.0608, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.39338529904567643, |
|
"grad_norm": 0.04363109916448593, |
|
"learning_rate": 1.737791700055851e-05, |
|
"loss": 0.0537, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.39702775551832153, |
|
"grad_norm": 0.039104338735342026, |
|
"learning_rate": 1.7353633957407544e-05, |
|
"loss": 0.0377, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.4006702119909667, |
|
"grad_norm": 0.07699574530124664, |
|
"learning_rate": 1.7329350914256576e-05, |
|
"loss": 0.0581, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.40431266846361186, |
|
"grad_norm": 0.0066749039106070995, |
|
"learning_rate": 1.7305067871105608e-05, |
|
"loss": 0.0552, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.407955124936257, |
|
"grad_norm": 0.024033475667238235, |
|
"learning_rate": 1.728078482795464e-05, |
|
"loss": 0.0254, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.4115975814089022, |
|
"grad_norm": 10.949067115783691, |
|
"learning_rate": 1.7256501784803672e-05, |
|
"loss": 0.085, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.41524003788154734, |
|
"grad_norm": 0.05627704784274101, |
|
"learning_rate": 1.7232218741652704e-05, |
|
"loss": 0.0504, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.41888249435419245, |
|
"grad_norm": 16.499969482421875, |
|
"learning_rate": 1.720793569850174e-05, |
|
"loss": 0.0446, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.4225249508268376, |
|
"grad_norm": 0.5864537954330444, |
|
"learning_rate": 1.7183652655350772e-05, |
|
"loss": 0.0368, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.42616740729948277, |
|
"grad_norm": 26.27457618713379, |
|
"learning_rate": 1.7159369612199804e-05, |
|
"loss": 0.062, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.42980986377212793, |
|
"grad_norm": 0.012596135959029198, |
|
"learning_rate": 1.7135086569048836e-05, |
|
"loss": 0.0761, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.4334523202447731, |
|
"grad_norm": 0.012135145254433155, |
|
"learning_rate": 1.7110803525897868e-05, |
|
"loss": 0.0395, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.43709477671741825, |
|
"grad_norm": 0.01273541059345007, |
|
"learning_rate": 1.70865204827469e-05, |
|
"loss": 0.0587, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.44073723319006336, |
|
"grad_norm": 0.03514949977397919, |
|
"learning_rate": 1.7062237439595932e-05, |
|
"loss": 0.0167, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.4443796896627085, |
|
"grad_norm": 0.035132329910993576, |
|
"learning_rate": 1.7037954396444965e-05, |
|
"loss": 0.0779, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.4480221461353537, |
|
"grad_norm": 0.021125553175807, |
|
"learning_rate": 1.7013671353293997e-05, |
|
"loss": 0.0527, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.45166460260799884, |
|
"grad_norm": 34.94084930419922, |
|
"learning_rate": 1.698938831014303e-05, |
|
"loss": 0.0613, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.455307059080644, |
|
"grad_norm": 0.0016326317563652992, |
|
"learning_rate": 1.696510526699206e-05, |
|
"loss": 0.0316, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.4589495155532891, |
|
"grad_norm": 10.651571273803711, |
|
"learning_rate": 1.6940822223841093e-05, |
|
"loss": 0.0692, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.4625919720259343, |
|
"grad_norm": 4.641509056091309, |
|
"learning_rate": 1.6916539180690125e-05, |
|
"loss": 0.0375, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.46623442849857943, |
|
"grad_norm": 0.05808263644576073, |
|
"learning_rate": 1.6892256137539157e-05, |
|
"loss": 0.0445, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.4698768849712246, |
|
"grad_norm": 0.01535380445420742, |
|
"learning_rate": 1.686797309438819e-05, |
|
"loss": 0.0522, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.47351934144386976, |
|
"grad_norm": 42.68207931518555, |
|
"learning_rate": 1.684369005123722e-05, |
|
"loss": 0.0469, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4771617979165149, |
|
"grad_norm": 0.13109305500984192, |
|
"learning_rate": 1.6819407008086254e-05, |
|
"loss": 0.0385, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.48080425438916, |
|
"grad_norm": 8.600894927978516, |
|
"learning_rate": 1.6795123964935286e-05, |
|
"loss": 0.0574, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.4844467108618052, |
|
"grad_norm": 0.032386887818574905, |
|
"learning_rate": 1.6770840921784318e-05, |
|
"loss": 0.0386, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.48808916733445035, |
|
"grad_norm": 0.02153775282204151, |
|
"learning_rate": 1.674655787863335e-05, |
|
"loss": 0.0296, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.4917316238070955, |
|
"grad_norm": 0.008334655314683914, |
|
"learning_rate": 1.6722274835482386e-05, |
|
"loss": 0.069, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.49537408027974067, |
|
"grad_norm": 0.6017431616783142, |
|
"learning_rate": 1.6697991792331418e-05, |
|
"loss": 0.03, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.49901653675238583, |
|
"grad_norm": 39.31368637084961, |
|
"learning_rate": 1.667370874918045e-05, |
|
"loss": 0.0399, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.5026589932250309, |
|
"grad_norm": 0.12983806431293488, |
|
"learning_rate": 1.6649425706029482e-05, |
|
"loss": 0.0513, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5063014496976761, |
|
"grad_norm": 0.11955763399600983, |
|
"learning_rate": 1.6625142662878514e-05, |
|
"loss": 0.0481, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.5099439061703213, |
|
"grad_norm": 0.0851789265871048, |
|
"learning_rate": 1.6600859619727546e-05, |
|
"loss": 0.0426, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5135863626429664, |
|
"grad_norm": 0.004629646893590689, |
|
"learning_rate": 1.6576576576576578e-05, |
|
"loss": 0.0429, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.5172288191156116, |
|
"grad_norm": 0.035044196993112564, |
|
"learning_rate": 1.655229353342561e-05, |
|
"loss": 0.07, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5208712755882567, |
|
"grad_norm": 1.3505648374557495, |
|
"learning_rate": 1.6528010490274643e-05, |
|
"loss": 0.0542, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.5245137320609019, |
|
"grad_norm": 0.814805269241333, |
|
"learning_rate": 1.6503727447123675e-05, |
|
"loss": 0.059, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5281561885335471, |
|
"grad_norm": 22.16185188293457, |
|
"learning_rate": 1.6479444403972707e-05, |
|
"loss": 0.0455, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.5317986450061922, |
|
"grad_norm": 0.007489902898669243, |
|
"learning_rate": 1.645516136082174e-05, |
|
"loss": 0.0898, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.5354411014788373, |
|
"grad_norm": 0.06646380573511124, |
|
"learning_rate": 1.643087831767077e-05, |
|
"loss": 0.0428, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.5390835579514824, |
|
"grad_norm": 0.016664279624819756, |
|
"learning_rate": 1.6406595274519803e-05, |
|
"loss": 0.0358, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.5427260144241276, |
|
"grad_norm": 38.54388427734375, |
|
"learning_rate": 1.6382312231368835e-05, |
|
"loss": 0.045, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.5463684708967728, |
|
"grad_norm": 0.5691483616828918, |
|
"learning_rate": 1.635802918821787e-05, |
|
"loss": 0.0416, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5500109273694179, |
|
"grad_norm": 0.01708299107849598, |
|
"learning_rate": 1.6333746145066903e-05, |
|
"loss": 0.0408, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.5536533838420631, |
|
"grad_norm": 36.231014251708984, |
|
"learning_rate": 1.6309463101915935e-05, |
|
"loss": 0.0551, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.5572958403147082, |
|
"grad_norm": 11.069836616516113, |
|
"learning_rate": 1.6285180058764967e-05, |
|
"loss": 0.0465, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.5609382967873534, |
|
"grad_norm": 0.14477401971817017, |
|
"learning_rate": 1.6260897015614e-05, |
|
"loss": 0.042, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.5645807532599986, |
|
"grad_norm": 27.404245376586914, |
|
"learning_rate": 1.623661397246303e-05, |
|
"loss": 0.0833, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.5682232097326437, |
|
"grad_norm": 0.02485232800245285, |
|
"learning_rate": 1.6212330929312064e-05, |
|
"loss": 0.0348, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.5718656662052889, |
|
"grad_norm": 0.020260579884052277, |
|
"learning_rate": 1.6188047886161096e-05, |
|
"loss": 0.0549, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.575508122677934, |
|
"grad_norm": 0.002344829263165593, |
|
"learning_rate": 1.6163764843010128e-05, |
|
"loss": 0.0368, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 0.017714491114020348, |
|
"learning_rate": 1.613948179985916e-05, |
|
"loss": 0.0315, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.5827930356232243, |
|
"grad_norm": 0.654873788356781, |
|
"learning_rate": 1.6115198756708192e-05, |
|
"loss": 0.0609, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5864354920958694, |
|
"grad_norm": 0.0007013605791144073, |
|
"learning_rate": 1.6090915713557224e-05, |
|
"loss": 0.0352, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.5900779485685146, |
|
"grad_norm": 0.005927626043558121, |
|
"learning_rate": 1.6066632670406256e-05, |
|
"loss": 0.0376, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5937204050411597, |
|
"grad_norm": 17.91414451599121, |
|
"learning_rate": 1.604234962725529e-05, |
|
"loss": 0.0662, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.5973628615138049, |
|
"grad_norm": 0.004944903310388327, |
|
"learning_rate": 1.601806658410432e-05, |
|
"loss": 0.0438, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6010053179864501, |
|
"grad_norm": 0.05835060402750969, |
|
"learning_rate": 1.5993783540953353e-05, |
|
"loss": 0.0342, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.6046477744590952, |
|
"grad_norm": 12.104302406311035, |
|
"learning_rate": 1.5969500497802385e-05, |
|
"loss": 0.0608, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6082902309317404, |
|
"grad_norm": 0.1027710810303688, |
|
"learning_rate": 1.5945217454651417e-05, |
|
"loss": 0.0361, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.6119326874043856, |
|
"grad_norm": 7.579742908477783, |
|
"learning_rate": 1.592093441150045e-05, |
|
"loss": 0.0292, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.6155751438770307, |
|
"grad_norm": 0.8469115495681763, |
|
"learning_rate": 1.589665136834948e-05, |
|
"loss": 0.0345, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.6192176003496758, |
|
"grad_norm": 0.005625431425869465, |
|
"learning_rate": 1.5872368325198517e-05, |
|
"loss": 0.0293, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6228600568223209, |
|
"grad_norm": 35.629051208496094, |
|
"learning_rate": 1.584808528204755e-05, |
|
"loss": 0.0605, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.6265025132949661, |
|
"grad_norm": 11.747818946838379, |
|
"learning_rate": 1.582380223889658e-05, |
|
"loss": 0.0568, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.6301449697676113, |
|
"grad_norm": 0.0023160045966506004, |
|
"learning_rate": 1.5799519195745613e-05, |
|
"loss": 0.0278, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.6337874262402564, |
|
"grad_norm": 0.1093410775065422, |
|
"learning_rate": 1.5775236152594645e-05, |
|
"loss": 0.0445, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.6374298827129016, |
|
"grad_norm": 0.2161708027124405, |
|
"learning_rate": 1.5750953109443677e-05, |
|
"loss": 0.0918, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.6410723391855467, |
|
"grad_norm": 0.005588351283222437, |
|
"learning_rate": 1.572667006629271e-05, |
|
"loss": 0.0561, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.6447147956581919, |
|
"grad_norm": 0.0424620620906353, |
|
"learning_rate": 1.570238702314174e-05, |
|
"loss": 0.0381, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.6483572521308371, |
|
"grad_norm": 50.92411804199219, |
|
"learning_rate": 1.5678103979990774e-05, |
|
"loss": 0.0467, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.6519997086034822, |
|
"grad_norm": 6.937325477600098, |
|
"learning_rate": 1.5653820936839806e-05, |
|
"loss": 0.0153, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.6556421650761274, |
|
"grad_norm": 0.7903178930282593, |
|
"learning_rate": 1.5629537893688838e-05, |
|
"loss": 0.0505, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6592846215487725, |
|
"grad_norm": 0.012516867369413376, |
|
"learning_rate": 1.560525485053787e-05, |
|
"loss": 0.0501, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.6629270780214176, |
|
"grad_norm": 0.20594048500061035, |
|
"learning_rate": 1.5580971807386902e-05, |
|
"loss": 0.025, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.6665695344940628, |
|
"grad_norm": 0.07625633478164673, |
|
"learning_rate": 1.5556688764235934e-05, |
|
"loss": 0.0672, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.6702119909667079, |
|
"grad_norm": 0.04546148329973221, |
|
"learning_rate": 1.5532405721084966e-05, |
|
"loss": 0.039, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.6738544474393531, |
|
"grad_norm": 14.172306060791016, |
|
"learning_rate": 1.5508122677934002e-05, |
|
"loss": 0.054, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.6774969039119982, |
|
"grad_norm": 3.454416513442993, |
|
"learning_rate": 1.5483839634783034e-05, |
|
"loss": 0.0422, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.6811393603846434, |
|
"grad_norm": 0.01983390562236309, |
|
"learning_rate": 1.5459556591632066e-05, |
|
"loss": 0.0579, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.6847818168572886, |
|
"grad_norm": 0.1887538880109787, |
|
"learning_rate": 1.5435273548481098e-05, |
|
"loss": 0.0357, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.6884242733299337, |
|
"grad_norm": 0.7766327857971191, |
|
"learning_rate": 1.541099050533013e-05, |
|
"loss": 0.0305, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.6920667298025789, |
|
"grad_norm": 0.036548394709825516, |
|
"learning_rate": 1.5386707462179162e-05, |
|
"loss": 0.0592, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.695709186275224, |
|
"grad_norm": 0.3470177948474884, |
|
"learning_rate": 1.5362424419028195e-05, |
|
"loss": 0.0483, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.6993516427478692, |
|
"grad_norm": 0.03801852837204933, |
|
"learning_rate": 1.5338141375877227e-05, |
|
"loss": 0.0489, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7029940992205144, |
|
"grad_norm": 6.873861789703369, |
|
"learning_rate": 1.531385833272626e-05, |
|
"loss": 0.0348, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.7066365556931594, |
|
"grad_norm": 13.36281967163086, |
|
"learning_rate": 1.528957528957529e-05, |
|
"loss": 0.0625, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.7102790121658046, |
|
"grad_norm": 5.6196770668029785, |
|
"learning_rate": 1.5265292246424323e-05, |
|
"loss": 0.0569, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.7139214686384497, |
|
"grad_norm": 0.023543642833828926, |
|
"learning_rate": 1.5241009203273355e-05, |
|
"loss": 0.0494, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.7175639251110949, |
|
"grad_norm": 0.03211181238293648, |
|
"learning_rate": 1.5216726160122387e-05, |
|
"loss": 0.0678, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.7212063815837401, |
|
"grad_norm": 0.07444865256547928, |
|
"learning_rate": 1.5192443116971421e-05, |
|
"loss": 0.0436, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.7248488380563852, |
|
"grad_norm": 0.03234946355223656, |
|
"learning_rate": 1.5168160073820453e-05, |
|
"loss": 0.0361, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.7284912945290304, |
|
"grad_norm": 0.039285432547330856, |
|
"learning_rate": 1.5143877030669485e-05, |
|
"loss": 0.0443, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7321337510016755, |
|
"grad_norm": 1.1601065397262573, |
|
"learning_rate": 1.5119593987518518e-05, |
|
"loss": 0.0481, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.7357762074743207, |
|
"grad_norm": 0.06844893097877502, |
|
"learning_rate": 1.509531094436755e-05, |
|
"loss": 0.0305, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.7394186639469659, |
|
"grad_norm": 0.2859903872013092, |
|
"learning_rate": 1.5071027901216582e-05, |
|
"loss": 0.0563, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.743061120419611, |
|
"grad_norm": 0.008284452371299267, |
|
"learning_rate": 1.5046744858065614e-05, |
|
"loss": 0.0296, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.7467035768922561, |
|
"grad_norm": 59.25529479980469, |
|
"learning_rate": 1.5022461814914646e-05, |
|
"loss": 0.0463, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.7503460333649012, |
|
"grad_norm": 0.005696968175470829, |
|
"learning_rate": 1.4998178771763678e-05, |
|
"loss": 0.0152, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.7539884898375464, |
|
"grad_norm": 0.008841242641210556, |
|
"learning_rate": 1.497389572861271e-05, |
|
"loss": 0.0402, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.7576309463101916, |
|
"grad_norm": 0.029259689152240753, |
|
"learning_rate": 1.4949612685461742e-05, |
|
"loss": 0.0416, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.7612734027828367, |
|
"grad_norm": 0.03281538188457489, |
|
"learning_rate": 1.4925329642310774e-05, |
|
"loss": 0.0475, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.7649158592554819, |
|
"grad_norm": 0.02436155639588833, |
|
"learning_rate": 1.4901046599159807e-05, |
|
"loss": 0.0253, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.768558315728127, |
|
"grad_norm": 0.003656016429886222, |
|
"learning_rate": 1.487676355600884e-05, |
|
"loss": 0.0626, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 0.024915866553783417, |
|
"learning_rate": 1.4852480512857873e-05, |
|
"loss": 0.0306, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.7758432286734174, |
|
"grad_norm": 0.01940196007490158, |
|
"learning_rate": 1.4828197469706905e-05, |
|
"loss": 0.0152, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.7794856851460625, |
|
"grad_norm": 0.046725232154130936, |
|
"learning_rate": 1.4803914426555937e-05, |
|
"loss": 0.0471, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.7831281416187077, |
|
"grad_norm": 0.0040224576368927956, |
|
"learning_rate": 1.4779631383404969e-05, |
|
"loss": 0.0407, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.7867705980913529, |
|
"grad_norm": 0.22005479037761688, |
|
"learning_rate": 1.4755348340254001e-05, |
|
"loss": 0.0481, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.7904130545639979, |
|
"grad_norm": 0.03147716075181961, |
|
"learning_rate": 1.4731065297103033e-05, |
|
"loss": 0.0454, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.7940555110366431, |
|
"grad_norm": 0.01030073594301939, |
|
"learning_rate": 1.4706782253952065e-05, |
|
"loss": 0.0596, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.7976979675092882, |
|
"grad_norm": 1.0909548997879028, |
|
"learning_rate": 1.4682499210801097e-05, |
|
"loss": 0.0316, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.8013404239819334, |
|
"grad_norm": 0.0010297419503331184, |
|
"learning_rate": 1.4658216167650133e-05, |
|
"loss": 0.0159, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.8049828804545786, |
|
"grad_norm": 0.02742888033390045, |
|
"learning_rate": 1.4633933124499165e-05, |
|
"loss": 0.0593, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.8086253369272237, |
|
"grad_norm": 2.101260185241699, |
|
"learning_rate": 1.4609650081348197e-05, |
|
"loss": 0.0262, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.8122677933998689, |
|
"grad_norm": 0.04009443148970604, |
|
"learning_rate": 1.458536703819723e-05, |
|
"loss": 0.0741, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.815910249872514, |
|
"grad_norm": 30.814577102661133, |
|
"learning_rate": 1.4561083995046261e-05, |
|
"loss": 0.0338, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.8195527063451592, |
|
"grad_norm": 12.104594230651855, |
|
"learning_rate": 1.4536800951895294e-05, |
|
"loss": 0.0669, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.8231951628178044, |
|
"grad_norm": 2.798372983932495, |
|
"learning_rate": 1.4512517908744326e-05, |
|
"loss": 0.0474, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.8268376192904495, |
|
"grad_norm": 0.0300067700445652, |
|
"learning_rate": 1.4488234865593358e-05, |
|
"loss": 0.0606, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.8304800757630947, |
|
"grad_norm": 0.0063656955026090145, |
|
"learning_rate": 1.446395182244239e-05, |
|
"loss": 0.0323, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.8341225322357397, |
|
"grad_norm": 0.03575301915407181, |
|
"learning_rate": 1.4439668779291422e-05, |
|
"loss": 0.0597, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.8377649887083849, |
|
"grad_norm": 0.04434814676642418, |
|
"learning_rate": 1.4415385736140454e-05, |
|
"loss": 0.0165, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8414074451810301, |
|
"grad_norm": 0.005864867474883795, |
|
"learning_rate": 1.4391102692989486e-05, |
|
"loss": 0.0363, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.8450499016536752, |
|
"grad_norm": 0.04094311222434044, |
|
"learning_rate": 1.436681964983852e-05, |
|
"loss": 0.0302, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.8486923581263204, |
|
"grad_norm": 0.008224506862461567, |
|
"learning_rate": 1.4342536606687552e-05, |
|
"loss": 0.0513, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.8523348145989655, |
|
"grad_norm": 0.017345773056149483, |
|
"learning_rate": 1.4318253563536584e-05, |
|
"loss": 0.0307, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.8559772710716107, |
|
"grad_norm": 0.2638939321041107, |
|
"learning_rate": 1.4293970520385616e-05, |
|
"loss": 0.0479, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.8596197275442559, |
|
"grad_norm": 0.010684901848435402, |
|
"learning_rate": 1.4269687477234649e-05, |
|
"loss": 0.0235, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.863262184016901, |
|
"grad_norm": 0.06420441716909409, |
|
"learning_rate": 1.424540443408368e-05, |
|
"loss": 0.0196, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.8669046404895462, |
|
"grad_norm": 31.575441360473633, |
|
"learning_rate": 1.4221121390932713e-05, |
|
"loss": 0.034, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.8705470969621913, |
|
"grad_norm": 0.0035045845434069633, |
|
"learning_rate": 1.4196838347781745e-05, |
|
"loss": 0.0233, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.8741895534348365, |
|
"grad_norm": 33.677982330322266, |
|
"learning_rate": 1.4172555304630777e-05, |
|
"loss": 0.0445, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8778320099074816, |
|
"grad_norm": 0.03595130518078804, |
|
"learning_rate": 1.414827226147981e-05, |
|
"loss": 0.0216, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.8814744663801267, |
|
"grad_norm": 3.2487685680389404, |
|
"learning_rate": 1.4123989218328841e-05, |
|
"loss": 0.0491, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.8851169228527719, |
|
"grad_norm": 0.12294740229845047, |
|
"learning_rate": 1.4099706175177873e-05, |
|
"loss": 0.0249, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.888759379325417, |
|
"grad_norm": 0.22951656579971313, |
|
"learning_rate": 1.4075423132026906e-05, |
|
"loss": 0.0288, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.8924018357980622, |
|
"grad_norm": 0.08439470827579498, |
|
"learning_rate": 1.405114008887594e-05, |
|
"loss": 0.0147, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.8960442922707074, |
|
"grad_norm": 0.7205714583396912, |
|
"learning_rate": 1.4026857045724972e-05, |
|
"loss": 0.0689, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.8996867487433525, |
|
"grad_norm": 25.85487174987793, |
|
"learning_rate": 1.4002574002574004e-05, |
|
"loss": 0.0591, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.9033292052159977, |
|
"grad_norm": 0.02333252690732479, |
|
"learning_rate": 1.3978290959423036e-05, |
|
"loss": 0.0356, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.9069716616886428, |
|
"grad_norm": 0.0541374497115612, |
|
"learning_rate": 1.3954007916272068e-05, |
|
"loss": 0.0378, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.910614118161288, |
|
"grad_norm": 0.8333564400672913, |
|
"learning_rate": 1.39297248731211e-05, |
|
"loss": 0.0385, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.9142565746339332, |
|
"grad_norm": 0.015136740170419216, |
|
"learning_rate": 1.3905441829970132e-05, |
|
"loss": 0.0284, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.9178990311065782, |
|
"grad_norm": 0.18541988730430603, |
|
"learning_rate": 1.3881158786819164e-05, |
|
"loss": 0.0552, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.9215414875792234, |
|
"grad_norm": 0.009074806235730648, |
|
"learning_rate": 1.3856875743668196e-05, |
|
"loss": 0.0104, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.9251839440518685, |
|
"grad_norm": 0.044704094529151917, |
|
"learning_rate": 1.3832592700517229e-05, |
|
"loss": 0.0443, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.9288264005245137, |
|
"grad_norm": 0.00702561205253005, |
|
"learning_rate": 1.380830965736626e-05, |
|
"loss": 0.0235, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.9324688569971589, |
|
"grad_norm": 0.2690836191177368, |
|
"learning_rate": 1.3784026614215296e-05, |
|
"loss": 0.0195, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.936111313469804, |
|
"grad_norm": 13.314958572387695, |
|
"learning_rate": 1.3759743571064328e-05, |
|
"loss": 0.0651, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.9397537699424492, |
|
"grad_norm": 0.02814812958240509, |
|
"learning_rate": 1.373546052791336e-05, |
|
"loss": 0.0397, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.9433962264150944, |
|
"grad_norm": 0.09442479908466339, |
|
"learning_rate": 1.3711177484762393e-05, |
|
"loss": 0.0301, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.9470386828877395, |
|
"grad_norm": 0.011217266321182251, |
|
"learning_rate": 1.3686894441611425e-05, |
|
"loss": 0.0419, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.9506811393603847, |
|
"grad_norm": 0.020814361050724983, |
|
"learning_rate": 1.3662611398460457e-05, |
|
"loss": 0.0559, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.9543235958330298, |
|
"grad_norm": 0.2749929428100586, |
|
"learning_rate": 1.3638328355309489e-05, |
|
"loss": 0.0243, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.957966052305675, |
|
"grad_norm": 0.020869776606559753, |
|
"learning_rate": 1.3614045312158521e-05, |
|
"loss": 0.0486, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.96160850877832, |
|
"grad_norm": 0.002463869983330369, |
|
"learning_rate": 1.3589762269007553e-05, |
|
"loss": 0.0135, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.9652509652509652, |
|
"grad_norm": 0.05677419900894165, |
|
"learning_rate": 1.3565479225856585e-05, |
|
"loss": 0.0405, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.9688934217236104, |
|
"grad_norm": 0.05325382575392723, |
|
"learning_rate": 1.3541196182705617e-05, |
|
"loss": 0.0104, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.9725358781962555, |
|
"grad_norm": 0.020237185060977936, |
|
"learning_rate": 1.3516913139554651e-05, |
|
"loss": 0.0416, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.9761783346689007, |
|
"grad_norm": 0.04590833932161331, |
|
"learning_rate": 1.3492630096403683e-05, |
|
"loss": 0.0325, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.9798207911415459, |
|
"grad_norm": 0.010319061577320099, |
|
"learning_rate": 1.3468347053252715e-05, |
|
"loss": 0.0171, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.983463247614191, |
|
"grad_norm": 0.004205926321446896, |
|
"learning_rate": 1.3444064010101748e-05, |
|
"loss": 0.0372, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.9871057040868362, |
|
"grad_norm": 11.123391151428223, |
|
"learning_rate": 1.341978096695078e-05, |
|
"loss": 0.0215, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.9907481605594813, |
|
"grad_norm": 0.029029347002506256, |
|
"learning_rate": 1.3395497923799812e-05, |
|
"loss": 0.03, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.9943906170321265, |
|
"grad_norm": 0.16025957465171814, |
|
"learning_rate": 1.3371214880648844e-05, |
|
"loss": 0.0233, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.9980330735047717, |
|
"grad_norm": 0.030954424291849136, |
|
"learning_rate": 1.3346931837497876e-05, |
|
"loss": 0.0486, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9832404523848549, |
|
"eval_f1": 0.9829817334498399, |
|
"eval_loss": 0.09270735830068588, |
|
"eval_precision": 0.9996614759647935, |
|
"eval_recall": 0.9668494720471474, |
|
"eval_runtime": 84.554, |
|
"eval_samples_per_second": 288.62, |
|
"eval_steps_per_second": 18.048, |
|
"step": 13727 |
|
}, |
|
{ |
|
"epoch": 1.0016755299774167, |
|
"grad_norm": 29.91096305847168, |
|
"learning_rate": 1.3322648794346908e-05, |
|
"loss": 0.0151, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.0053179864500619, |
|
"grad_norm": 1.7452354431152344, |
|
"learning_rate": 1.329836575119594e-05, |
|
"loss": 0.0209, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.008960442922707, |
|
"grad_norm": 0.004070340655744076, |
|
"learning_rate": 1.3274082708044972e-05, |
|
"loss": 0.012, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.0126028993953522, |
|
"grad_norm": 0.003967987839132547, |
|
"learning_rate": 1.3249799664894005e-05, |
|
"loss": 0.0034, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.0162453558679974, |
|
"grad_norm": 0.02903752028942108, |
|
"learning_rate": 1.3225516621743038e-05, |
|
"loss": 0.0001, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.0198878123406425, |
|
"grad_norm": 0.2444075644016266, |
|
"learning_rate": 1.320123357859207e-05, |
|
"loss": 0.0206, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.0235302688132877, |
|
"grad_norm": 42.97541809082031, |
|
"learning_rate": 1.3176950535441103e-05, |
|
"loss": 0.0211, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.0271727252859328, |
|
"grad_norm": 0.019783716648817062, |
|
"learning_rate": 1.3152667492290135e-05, |
|
"loss": 0.0364, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.030815181758578, |
|
"grad_norm": 0.0014639782020822167, |
|
"learning_rate": 1.3128384449139167e-05, |
|
"loss": 0.0142, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.0344576382312232, |
|
"grad_norm": 0.005320954602211714, |
|
"learning_rate": 1.3104101405988199e-05, |
|
"loss": 0.0119, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.0381000947038683, |
|
"grad_norm": 11.11017894744873, |
|
"learning_rate": 1.3079818362837231e-05, |
|
"loss": 0.0344, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.0417425511765135, |
|
"grad_norm": 1.2536836862564087, |
|
"learning_rate": 1.3055535319686263e-05, |
|
"loss": 0.0253, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.0453850076491586, |
|
"grad_norm": 0.03707081824541092, |
|
"learning_rate": 1.3031252276535295e-05, |
|
"loss": 0.025, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.0490274641218038, |
|
"grad_norm": 10.528314590454102, |
|
"learning_rate": 1.3006969233384327e-05, |
|
"loss": 0.0444, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.052669920594449, |
|
"grad_norm": 0.0061897216364741325, |
|
"learning_rate": 1.298268619023336e-05, |
|
"loss": 0.0148, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.0563123770670941, |
|
"grad_norm": 0.004688130225986242, |
|
"learning_rate": 1.2958403147082392e-05, |
|
"loss": 0.0174, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.0599548335397393, |
|
"grad_norm": 0.003912646789103746, |
|
"learning_rate": 1.2934120103931427e-05, |
|
"loss": 0.0214, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.0635972900123845, |
|
"grad_norm": 53.494564056396484, |
|
"learning_rate": 1.290983706078046e-05, |
|
"loss": 0.0069, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.0672397464850296, |
|
"grad_norm": 0.0009451656369492412, |
|
"learning_rate": 1.2885554017629491e-05, |
|
"loss": 0.0118, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.0708822029576746, |
|
"grad_norm": 0.15352405607700348, |
|
"learning_rate": 1.2861270974478524e-05, |
|
"loss": 0.0527, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.0745246594303197, |
|
"grad_norm": 0.006014478392899036, |
|
"learning_rate": 1.2836987931327556e-05, |
|
"loss": 0.0005, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.0781671159029649, |
|
"grad_norm": 0.009621557779610157, |
|
"learning_rate": 1.2812704888176588e-05, |
|
"loss": 0.013, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.08180957237561, |
|
"grad_norm": 0.015051420778036118, |
|
"learning_rate": 1.278842184502562e-05, |
|
"loss": 0.0045, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.0854520288482552, |
|
"grad_norm": 0.21560555696487427, |
|
"learning_rate": 1.2764138801874652e-05, |
|
"loss": 0.0281, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.0890944853209004, |
|
"grad_norm": 72.65242767333984, |
|
"learning_rate": 1.2739855758723684e-05, |
|
"loss": 0.0123, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.0927369417935455, |
|
"grad_norm": 0.0037783372681587934, |
|
"learning_rate": 1.2715572715572716e-05, |
|
"loss": 0.0143, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.0963793982661907, |
|
"grad_norm": 0.011684559285640717, |
|
"learning_rate": 1.269128967242175e-05, |
|
"loss": 0.0432, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 1.1000218547388358, |
|
"grad_norm": 0.04993521422147751, |
|
"learning_rate": 1.2667006629270782e-05, |
|
"loss": 0.0242, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.103664311211481, |
|
"grad_norm": 0.0037215156480669975, |
|
"learning_rate": 1.2642723586119814e-05, |
|
"loss": 0.026, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 1.1073067676841262, |
|
"grad_norm": 10.802151679992676, |
|
"learning_rate": 1.2618440542968847e-05, |
|
"loss": 0.0349, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.1109492241567713, |
|
"grad_norm": 0.015542046166956425, |
|
"learning_rate": 1.2594157499817879e-05, |
|
"loss": 0.0026, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.1145916806294165, |
|
"grad_norm": 0.0019722287543118, |
|
"learning_rate": 1.256987445666691e-05, |
|
"loss": 0.0229, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.1182341371020617, |
|
"grad_norm": 0.2006807178258896, |
|
"learning_rate": 1.2545591413515943e-05, |
|
"loss": 0.0044, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 1.1218765935747068, |
|
"grad_norm": 0.008714217692613602, |
|
"learning_rate": 1.2521308370364975e-05, |
|
"loss": 0.0287, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.125519050047352, |
|
"grad_norm": 0.019259391352534294, |
|
"learning_rate": 1.2497025327214007e-05, |
|
"loss": 0.0191, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 1.1291615065199971, |
|
"grad_norm": 0.004549563396722078, |
|
"learning_rate": 1.247274228406304e-05, |
|
"loss": 0.0125, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.1328039629926423, |
|
"grad_norm": 30.12306022644043, |
|
"learning_rate": 1.2448459240912071e-05, |
|
"loss": 0.0457, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 1.1364464194652875, |
|
"grad_norm": 0.006514878943562508, |
|
"learning_rate": 1.2424176197761104e-05, |
|
"loss": 0.0349, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.1400888759379326, |
|
"grad_norm": 0.009289869107306004, |
|
"learning_rate": 1.2399893154610136e-05, |
|
"loss": 0.0132, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 1.1437313324105778, |
|
"grad_norm": 0.0011842186795547605, |
|
"learning_rate": 1.237561011145917e-05, |
|
"loss": 0.011, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.1473737888832227, |
|
"grad_norm": 0.0015764775453135371, |
|
"learning_rate": 1.2351327068308202e-05, |
|
"loss": 0.0188, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.1510162453558679, |
|
"grad_norm": 0.0017348515102639794, |
|
"learning_rate": 1.2327044025157234e-05, |
|
"loss": 0.0012, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.154658701828513, |
|
"grad_norm": 0.07631982862949371, |
|
"learning_rate": 1.2302760982006266e-05, |
|
"loss": 0.0362, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 0.004188220016658306, |
|
"learning_rate": 1.2278477938855298e-05, |
|
"loss": 0.0156, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.1619436147738034, |
|
"grad_norm": 0.011502295732498169, |
|
"learning_rate": 1.225419489570433e-05, |
|
"loss": 0.0128, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 1.1655860712464485, |
|
"grad_norm": 0.06707991659641266, |
|
"learning_rate": 1.2229911852553362e-05, |
|
"loss": 0.0133, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.1692285277190937, |
|
"grad_norm": 0.0007951713050715625, |
|
"learning_rate": 1.2205628809402394e-05, |
|
"loss": 0.0277, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 1.1728709841917389, |
|
"grad_norm": 0.11289041489362717, |
|
"learning_rate": 1.2181345766251426e-05, |
|
"loss": 0.0457, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.176513440664384, |
|
"grad_norm": 0.0014637082349509, |
|
"learning_rate": 1.2157062723100459e-05, |
|
"loss": 0.0029, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 1.1801558971370292, |
|
"grad_norm": 0.006044124718755484, |
|
"learning_rate": 1.213277967994949e-05, |
|
"loss": 0.0174, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.1837983536096743, |
|
"grad_norm": 0.0031113906297832727, |
|
"learning_rate": 1.2108496636798523e-05, |
|
"loss": 0.0234, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.1874408100823195, |
|
"grad_norm": 0.014058534987270832, |
|
"learning_rate": 1.2084213593647558e-05, |
|
"loss": 0.0305, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.1910832665549647, |
|
"grad_norm": 0.005006034392863512, |
|
"learning_rate": 1.205993055049659e-05, |
|
"loss": 0.0364, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 1.1947257230276098, |
|
"grad_norm": 0.000297638209303841, |
|
"learning_rate": 1.2035647507345623e-05, |
|
"loss": 0.0021, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.198368179500255, |
|
"grad_norm": 0.1398065686225891, |
|
"learning_rate": 1.2011364464194655e-05, |
|
"loss": 0.0002, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 1.2020106359729001, |
|
"grad_norm": 0.0008121460559777915, |
|
"learning_rate": 1.1987081421043687e-05, |
|
"loss": 0.0187, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.2056530924455453, |
|
"grad_norm": 0.001451038639061153, |
|
"learning_rate": 1.1962798377892719e-05, |
|
"loss": 0.0196, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 1.2092955489181905, |
|
"grad_norm": 0.005557178519666195, |
|
"learning_rate": 1.1938515334741751e-05, |
|
"loss": 0.0194, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.2129380053908356, |
|
"grad_norm": 0.026935333386063576, |
|
"learning_rate": 1.1914232291590783e-05, |
|
"loss": 0.0047, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 1.2165804618634808, |
|
"grad_norm": 16.11721420288086, |
|
"learning_rate": 1.1889949248439815e-05, |
|
"loss": 0.0191, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.220222918336126, |
|
"grad_norm": 0.0013423125492408872, |
|
"learning_rate": 1.1865666205288849e-05, |
|
"loss": 0.0119, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.2238653748087711, |
|
"grad_norm": 0.023071033880114555, |
|
"learning_rate": 1.1841383162137881e-05, |
|
"loss": 0.0227, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.2275078312814163, |
|
"grad_norm": 0.004094852600246668, |
|
"learning_rate": 1.1817100118986913e-05, |
|
"loss": 0.0232, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 1.2311502877540614, |
|
"grad_norm": 0.004255661740899086, |
|
"learning_rate": 1.1792817075835945e-05, |
|
"loss": 0.0061, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.2347927442267066, |
|
"grad_norm": 0.016487043350934982, |
|
"learning_rate": 1.1768534032684978e-05, |
|
"loss": 0.0153, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 1.2384352006993518, |
|
"grad_norm": 0.024706464260816574, |
|
"learning_rate": 1.174425098953401e-05, |
|
"loss": 0.0309, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.242077657171997, |
|
"grad_norm": 0.08677077293395996, |
|
"learning_rate": 1.1719967946383042e-05, |
|
"loss": 0.0181, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 1.2457201136446419, |
|
"grad_norm": 0.002082308754324913, |
|
"learning_rate": 1.1695684903232074e-05, |
|
"loss": 0.0314, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.249362570117287, |
|
"grad_norm": 0.021184509620070457, |
|
"learning_rate": 1.1671401860081106e-05, |
|
"loss": 0.0278, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 1.2530050265899322, |
|
"grad_norm": 0.012889835983514786, |
|
"learning_rate": 1.1647118816930138e-05, |
|
"loss": 0.0142, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.2566474830625773, |
|
"grad_norm": 0.043863359838724136, |
|
"learning_rate": 1.162283577377917e-05, |
|
"loss": 0.0008, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.2602899395352225, |
|
"grad_norm": 0.016099713742733, |
|
"learning_rate": 1.1598552730628202e-05, |
|
"loss": 0.0179, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.2639323960078677, |
|
"grad_norm": 0.10825788229703903, |
|
"learning_rate": 1.1574269687477235e-05, |
|
"loss": 0.0212, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 1.2675748524805128, |
|
"grad_norm": 0.16304172575473785, |
|
"learning_rate": 1.1549986644326268e-05, |
|
"loss": 0.0182, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.271217308953158, |
|
"grad_norm": 69.01996612548828, |
|
"learning_rate": 1.15257036011753e-05, |
|
"loss": 0.014, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 1.2748597654258031, |
|
"grad_norm": 0.0009998659370467067, |
|
"learning_rate": 1.1501420558024333e-05, |
|
"loss": 0.0228, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.2785022218984483, |
|
"grad_norm": 0.027977541089057922, |
|
"learning_rate": 1.1477137514873365e-05, |
|
"loss": 0.0208, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 1.2821446783710935, |
|
"grad_norm": 0.07415014505386353, |
|
"learning_rate": 1.1452854471722397e-05, |
|
"loss": 0.0239, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.2857871348437386, |
|
"grad_norm": 0.011434576474130154, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.0143, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 1.2894295913163838, |
|
"grad_norm": 0.09037664532661438, |
|
"learning_rate": 1.1404288385420461e-05, |
|
"loss": 0.0171, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.293072047789029, |
|
"grad_norm": 0.1396547108888626, |
|
"learning_rate": 1.1380005342269493e-05, |
|
"loss": 0.0316, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.2967145042616741, |
|
"grad_norm": 51.16264343261719, |
|
"learning_rate": 1.1355722299118525e-05, |
|
"loss": 0.026, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.3003569607343193, |
|
"grad_norm": 0.002041841158643365, |
|
"learning_rate": 1.1331439255967558e-05, |
|
"loss": 0.0122, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 1.3039994172069644, |
|
"grad_norm": 0.001625225180760026, |
|
"learning_rate": 1.130715621281659e-05, |
|
"loss": 0.0216, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.3076418736796096, |
|
"grad_norm": 0.0011979085393249989, |
|
"learning_rate": 1.1282873169665622e-05, |
|
"loss": 0.0201, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 1.3112843301522548, |
|
"grad_norm": 0.02973480336368084, |
|
"learning_rate": 1.1258590126514654e-05, |
|
"loss": 0.0145, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.3149267866248997, |
|
"grad_norm": 0.007877219468355179, |
|
"learning_rate": 1.123430708336369e-05, |
|
"loss": 0.0427, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 1.3185692430975449, |
|
"grad_norm": 0.045869287103414536, |
|
"learning_rate": 1.1210024040212722e-05, |
|
"loss": 0.0114, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.32221169957019, |
|
"grad_norm": 0.04795412719249725, |
|
"learning_rate": 1.1185740997061754e-05, |
|
"loss": 0.0181, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 1.3258541560428352, |
|
"grad_norm": 0.00023576147214043885, |
|
"learning_rate": 1.1161457953910786e-05, |
|
"loss": 0.0004, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.3294966125154803, |
|
"grad_norm": 0.06866733729839325, |
|
"learning_rate": 1.1137174910759818e-05, |
|
"loss": 0.0259, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.3331390689881255, |
|
"grad_norm": 0.0028649719897657633, |
|
"learning_rate": 1.111289186760885e-05, |
|
"loss": 0.0217, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.3367815254607707, |
|
"grad_norm": 0.0042462581768631935, |
|
"learning_rate": 1.1088608824457882e-05, |
|
"loss": 0.0065, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 1.3404239819334158, |
|
"grad_norm": 10.272193908691406, |
|
"learning_rate": 1.1064325781306914e-05, |
|
"loss": 0.012, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.344066438406061, |
|
"grad_norm": 0.0003935337590519339, |
|
"learning_rate": 1.1040042738155946e-05, |
|
"loss": 0.0139, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 1.3477088948787062, |
|
"grad_norm": 0.01895037479698658, |
|
"learning_rate": 1.101575969500498e-05, |
|
"loss": 0.033, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.015048638917505741, |
|
"learning_rate": 1.0991476651854012e-05, |
|
"loss": 0.0257, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 1.3549938078239965, |
|
"grad_norm": 33.69883728027344, |
|
"learning_rate": 1.0967193608703044e-05, |
|
"loss": 0.0195, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.3586362642966416, |
|
"grad_norm": 0.01808111183345318, |
|
"learning_rate": 1.0942910565552077e-05, |
|
"loss": 0.0177, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 1.3622787207692868, |
|
"grad_norm": 0.00881391391158104, |
|
"learning_rate": 1.0918627522401109e-05, |
|
"loss": 0.0278, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.365921177241932, |
|
"grad_norm": 0.013070676475763321, |
|
"learning_rate": 1.089434447925014e-05, |
|
"loss": 0.0154, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.3695636337145771, |
|
"grad_norm": 0.01971716247498989, |
|
"learning_rate": 1.0870061436099173e-05, |
|
"loss": 0.0297, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.3732060901872223, |
|
"grad_norm": 0.02021178789436817, |
|
"learning_rate": 1.0845778392948205e-05, |
|
"loss": 0.0352, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 1.3768485466598674, |
|
"grad_norm": 68.66587829589844, |
|
"learning_rate": 1.0821495349797237e-05, |
|
"loss": 0.0339, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.3804910031325126, |
|
"grad_norm": 13.299994468688965, |
|
"learning_rate": 1.079721230664627e-05, |
|
"loss": 0.0446, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 1.3841334596051578, |
|
"grad_norm": 0.020304521545767784, |
|
"learning_rate": 1.0772929263495301e-05, |
|
"loss": 0.0216, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.387775916077803, |
|
"grad_norm": 0.03495263308286667, |
|
"learning_rate": 1.0748646220344334e-05, |
|
"loss": 0.0064, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 1.391418372550448, |
|
"grad_norm": 0.09123169630765915, |
|
"learning_rate": 1.0724363177193366e-05, |
|
"loss": 0.0393, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.3950608290230933, |
|
"grad_norm": 0.04099246487021446, |
|
"learning_rate": 1.07000801340424e-05, |
|
"loss": 0.02, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 1.3987032854957384, |
|
"grad_norm": 0.005794774275273085, |
|
"learning_rate": 1.0675797090891432e-05, |
|
"loss": 0.0152, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.4023457419683836, |
|
"grad_norm": 0.013122792355716228, |
|
"learning_rate": 1.0651514047740464e-05, |
|
"loss": 0.0163, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.4059881984410287, |
|
"grad_norm": 0.11310232430696487, |
|
"learning_rate": 1.0627231004589496e-05, |
|
"loss": 0.0098, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.409630654913674, |
|
"grad_norm": 0.001028825412504375, |
|
"learning_rate": 1.0602947961438528e-05, |
|
"loss": 0.0027, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 1.413273111386319, |
|
"grad_norm": 0.0020931228064000607, |
|
"learning_rate": 1.057866491828756e-05, |
|
"loss": 0.0071, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.4169155678589642, |
|
"grad_norm": 47.179866790771484, |
|
"learning_rate": 1.0554381875136592e-05, |
|
"loss": 0.0116, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 1.4205580243316092, |
|
"grad_norm": 0.00044919323408976197, |
|
"learning_rate": 1.0530098831985624e-05, |
|
"loss": 0.0445, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.4242004808042543, |
|
"grad_norm": 0.011017756536602974, |
|
"learning_rate": 1.0505815788834656e-05, |
|
"loss": 0.0199, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 1.4278429372768995, |
|
"grad_norm": 0.0025288155302405357, |
|
"learning_rate": 1.0481532745683689e-05, |
|
"loss": 0.0011, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.4314853937495446, |
|
"grad_norm": 12.777478218078613, |
|
"learning_rate": 1.045724970253272e-05, |
|
"loss": 0.0128, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 1.4351278502221898, |
|
"grad_norm": 0.001780256861820817, |
|
"learning_rate": 1.0432966659381753e-05, |
|
"loss": 0.0001, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.438770306694835, |
|
"grad_norm": 0.014524326659739017, |
|
"learning_rate": 1.0408683616230787e-05, |
|
"loss": 0.0001, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.4424127631674801, |
|
"grad_norm": 0.09238297492265701, |
|
"learning_rate": 1.038440057307982e-05, |
|
"loss": 0.0449, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.4460552196401253, |
|
"grad_norm": 0.03914946690201759, |
|
"learning_rate": 1.0360117529928853e-05, |
|
"loss": 0.011, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 1.4496976761127705, |
|
"grad_norm": 0.09558617323637009, |
|
"learning_rate": 1.0335834486777885e-05, |
|
"loss": 0.0085, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.4533401325854156, |
|
"grad_norm": 0.6379877924919128, |
|
"learning_rate": 1.0311551443626917e-05, |
|
"loss": 0.016, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 1.4569825890580608, |
|
"grad_norm": 0.0090743163600564, |
|
"learning_rate": 1.0287268400475949e-05, |
|
"loss": 0.0458, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.460625045530706, |
|
"grad_norm": 0.03226013854146004, |
|
"learning_rate": 1.0262985357324981e-05, |
|
"loss": 0.0341, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 1.464267502003351, |
|
"grad_norm": 0.005081718321889639, |
|
"learning_rate": 1.0238702314174013e-05, |
|
"loss": 0.025, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.4679099584759963, |
|
"grad_norm": 0.022451894357800484, |
|
"learning_rate": 1.0214419271023045e-05, |
|
"loss": 0.0101, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 1.4715524149486414, |
|
"grad_norm": 0.013083440251648426, |
|
"learning_rate": 1.019013622787208e-05, |
|
"loss": 0.0129, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.4751948714212866, |
|
"grad_norm": 0.0020569232292473316, |
|
"learning_rate": 1.0165853184721111e-05, |
|
"loss": 0.0152, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.4788373278939317, |
|
"grad_norm": 0.0031478700693696737, |
|
"learning_rate": 1.0141570141570143e-05, |
|
"loss": 0.0262, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.482479784366577, |
|
"grad_norm": 0.0022653560154139996, |
|
"learning_rate": 1.0117287098419176e-05, |
|
"loss": 0.0113, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 1.4861222408392218, |
|
"grad_norm": 0.004938697442412376, |
|
"learning_rate": 1.0093004055268208e-05, |
|
"loss": 0.0265, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.489764697311867, |
|
"grad_norm": 0.022760428488254547, |
|
"learning_rate": 1.006872101211724e-05, |
|
"loss": 0.0194, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 1.4934071537845122, |
|
"grad_norm": 0.0077137211337685585, |
|
"learning_rate": 1.0044437968966272e-05, |
|
"loss": 0.0224, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.4970496102571573, |
|
"grad_norm": 0.025409918278455734, |
|
"learning_rate": 1.0020154925815304e-05, |
|
"loss": 0.0333, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 1.5006920667298025, |
|
"grad_norm": 0.0072277626022696495, |
|
"learning_rate": 9.995871882664336e-06, |
|
"loss": 0.0214, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.5043345232024476, |
|
"grad_norm": 0.0006038291030563414, |
|
"learning_rate": 9.971588839513368e-06, |
|
"loss": 0.0042, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 1.5079769796750928, |
|
"grad_norm": 0.0002832627797033638, |
|
"learning_rate": 9.9473057963624e-06, |
|
"loss": 0.0017, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.511619436147738, |
|
"grad_norm": 0.0032675336115062237, |
|
"learning_rate": 9.923022753211433e-06, |
|
"loss": 0.0245, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.5152618926203831, |
|
"grad_norm": 0.0003623686789069325, |
|
"learning_rate": 9.898739710060465e-06, |
|
"loss": 0.0073, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.5189043490930283, |
|
"grad_norm": 0.05859663709998131, |
|
"learning_rate": 9.874456666909498e-06, |
|
"loss": 0.0344, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 1.5225468055656735, |
|
"grad_norm": 0.07969242334365845, |
|
"learning_rate": 9.85017362375853e-06, |
|
"loss": 0.0151, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.5261892620383186, |
|
"grad_norm": 31.178518295288086, |
|
"learning_rate": 9.825890580607563e-06, |
|
"loss": 0.0134, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 1.5298317185109638, |
|
"grad_norm": 0.14027103781700134, |
|
"learning_rate": 9.801607537456595e-06, |
|
"loss": 0.0006, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.533474174983609, |
|
"grad_norm": 0.000762454466894269, |
|
"learning_rate": 9.777324494305627e-06, |
|
"loss": 0.0083, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 1.537116631456254, |
|
"grad_norm": 0.0006950427778065205, |
|
"learning_rate": 9.753041451154659e-06, |
|
"loss": 0.0144, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.5407590879288993, |
|
"grad_norm": 0.03417884558439255, |
|
"learning_rate": 9.728758408003693e-06, |
|
"loss": 0.0391, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 0.002283045556396246, |
|
"learning_rate": 9.704475364852725e-06, |
|
"loss": 0.0287, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.5480440008741896, |
|
"grad_norm": 15.828984260559082, |
|
"learning_rate": 9.680192321701757e-06, |
|
"loss": 0.0148, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.5516864573468347, |
|
"grad_norm": 0.20740853250026703, |
|
"learning_rate": 9.65590927855079e-06, |
|
"loss": 0.0192, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.55532891381948, |
|
"grad_norm": 26.139934539794922, |
|
"learning_rate": 9.631626235399821e-06, |
|
"loss": 0.0511, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 1.558971370292125, |
|
"grad_norm": 0.06346629559993744, |
|
"learning_rate": 9.607343192248854e-06, |
|
"loss": 0.0141, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.5626138267647702, |
|
"grad_norm": 13.111997604370117, |
|
"learning_rate": 9.583060149097886e-06, |
|
"loss": 0.0204, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 1.5662562832374154, |
|
"grad_norm": 0.045694172382354736, |
|
"learning_rate": 9.558777105946918e-06, |
|
"loss": 0.0211, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.5698987397100606, |
|
"grad_norm": 0.005245433654636145, |
|
"learning_rate": 9.53449406279595e-06, |
|
"loss": 0.0245, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 1.5735411961827057, |
|
"grad_norm": 0.0034423854667693377, |
|
"learning_rate": 9.510211019644982e-06, |
|
"loss": 0.0026, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.5771836526553509, |
|
"grad_norm": 0.012051784433424473, |
|
"learning_rate": 9.485927976494014e-06, |
|
"loss": 0.0132, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 1.580826109127996, |
|
"grad_norm": 0.012404332868754864, |
|
"learning_rate": 9.461644933343048e-06, |
|
"loss": 0.0157, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.5844685656006412, |
|
"grad_norm": 0.005301001016050577, |
|
"learning_rate": 9.43736189019208e-06, |
|
"loss": 0.0203, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.5881110220732864, |
|
"grad_norm": 0.04494945704936981, |
|
"learning_rate": 9.413078847041112e-06, |
|
"loss": 0.0481, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.5917534785459315, |
|
"grad_norm": 0.08373013883829117, |
|
"learning_rate": 9.388795803890144e-06, |
|
"loss": 0.0081, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 1.5953959350185767, |
|
"grad_norm": 0.042607564479112625, |
|
"learning_rate": 9.364512760739176e-06, |
|
"loss": 0.0293, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.5990383914912218, |
|
"grad_norm": 0.005291256587952375, |
|
"learning_rate": 9.340229717588209e-06, |
|
"loss": 0.0203, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 1.6026808479638668, |
|
"grad_norm": 0.0010515892645344138, |
|
"learning_rate": 9.315946674437242e-06, |
|
"loss": 0.0065, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.606323304436512, |
|
"grad_norm": 0.0007970785372890532, |
|
"learning_rate": 9.291663631286274e-06, |
|
"loss": 0.0002, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 1.609965760909157, |
|
"grad_norm": 0.005806170403957367, |
|
"learning_rate": 9.267380588135307e-06, |
|
"loss": 0.0262, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.6136082173818023, |
|
"grad_norm": 0.01859590783715248, |
|
"learning_rate": 9.243097544984339e-06, |
|
"loss": 0.0098, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 1.6172506738544474, |
|
"grad_norm": 0.096404068171978, |
|
"learning_rate": 9.218814501833371e-06, |
|
"loss": 0.0096, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.6208931303270926, |
|
"grad_norm": 80.43502044677734, |
|
"learning_rate": 9.194531458682403e-06, |
|
"loss": 0.0144, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 1.6245355867997378, |
|
"grad_norm": 0.0023734932765364647, |
|
"learning_rate": 9.170248415531435e-06, |
|
"loss": 0.0235, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.628178043272383, |
|
"grad_norm": 0.017849687486886978, |
|
"learning_rate": 9.145965372380467e-06, |
|
"loss": 0.0098, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 1.631820499745028, |
|
"grad_norm": 0.0729728415608406, |
|
"learning_rate": 9.1216823292295e-06, |
|
"loss": 0.0107, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.6354629562176732, |
|
"grad_norm": 0.011012891307473183, |
|
"learning_rate": 9.097399286078531e-06, |
|
"loss": 0.0122, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 1.6391054126903184, |
|
"grad_norm": 0.008630420081317425, |
|
"learning_rate": 9.073116242927564e-06, |
|
"loss": 0.0241, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.6427478691629633, |
|
"grad_norm": 0.05083305388689041, |
|
"learning_rate": 9.048833199776597e-06, |
|
"loss": 0.0169, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 1.6463903256356085, |
|
"grad_norm": 0.007316832430660725, |
|
"learning_rate": 9.02455015662563e-06, |
|
"loss": 0.0214, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.6500327821082537, |
|
"grad_norm": 0.45286983251571655, |
|
"learning_rate": 9.000267113474662e-06, |
|
"loss": 0.0012, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 1.6536752385808988, |
|
"grad_norm": 0.017553621903061867, |
|
"learning_rate": 8.975984070323694e-06, |
|
"loss": 0.0107, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.657317695053544, |
|
"grad_norm": 0.0002845645940396935, |
|
"learning_rate": 8.951701027172726e-06, |
|
"loss": 0.006, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 1.6609601515261891, |
|
"grad_norm": 0.031060708686709404, |
|
"learning_rate": 8.927417984021758e-06, |
|
"loss": 0.0443, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.6646026079988343, |
|
"grad_norm": 0.0005244467174634337, |
|
"learning_rate": 8.90313494087079e-06, |
|
"loss": 0.0218, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 1.6682450644714795, |
|
"grad_norm": 0.0013898806646466255, |
|
"learning_rate": 8.878851897719824e-06, |
|
"loss": 0.0117, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.6718875209441246, |
|
"grad_norm": 18.29558563232422, |
|
"learning_rate": 8.854568854568856e-06, |
|
"loss": 0.0293, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 1.6755299774167698, |
|
"grad_norm": 0.004693304654210806, |
|
"learning_rate": 8.830285811417888e-06, |
|
"loss": 0.0204, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.679172433889415, |
|
"grad_norm": 0.0028336478862911463, |
|
"learning_rate": 8.80600276826692e-06, |
|
"loss": 0.0149, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 1.6828148903620601, |
|
"grad_norm": 0.005626322701573372, |
|
"learning_rate": 8.781719725115952e-06, |
|
"loss": 0.0245, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.6864573468347053, |
|
"grad_norm": 0.09887745976448059, |
|
"learning_rate": 8.757436681964985e-06, |
|
"loss": 0.0248, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 1.6900998033073504, |
|
"grad_norm": 0.024264780804514885, |
|
"learning_rate": 8.733153638814017e-06, |
|
"loss": 0.0006, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.6937422597799956, |
|
"grad_norm": 0.001150890369899571, |
|
"learning_rate": 8.708870595663049e-06, |
|
"loss": 0.0054, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 1.6973847162526408, |
|
"grad_norm": 0.025417355820536613, |
|
"learning_rate": 8.684587552512081e-06, |
|
"loss": 0.0284, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.701027172725286, |
|
"grad_norm": 0.0008267374942079186, |
|
"learning_rate": 8.660304509361113e-06, |
|
"loss": 0.011, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 1.704669629197931, |
|
"grad_norm": 0.02794003114104271, |
|
"learning_rate": 8.636021466210145e-06, |
|
"loss": 0.0253, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.7083120856705762, |
|
"grad_norm": 0.002588023664429784, |
|
"learning_rate": 8.611738423059179e-06, |
|
"loss": 0.0061, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 1.7119545421432214, |
|
"grad_norm": 0.010049792937934399, |
|
"learning_rate": 8.587455379908211e-06, |
|
"loss": 0.0187, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.7155969986158666, |
|
"grad_norm": 0.017932184040546417, |
|
"learning_rate": 8.563172336757243e-06, |
|
"loss": 0.0112, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 1.7192394550885117, |
|
"grad_norm": 0.0008630743832327425, |
|
"learning_rate": 8.538889293606275e-06, |
|
"loss": 0.0275, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.7228819115611569, |
|
"grad_norm": 0.011171421967446804, |
|
"learning_rate": 8.514606250455308e-06, |
|
"loss": 0.0209, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 1.726524368033802, |
|
"grad_norm": 0.0002150117652490735, |
|
"learning_rate": 8.49032320730434e-06, |
|
"loss": 0.0092, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.7301668245064472, |
|
"grad_norm": 0.002661017468199134, |
|
"learning_rate": 8.466040164153373e-06, |
|
"loss": 0.0071, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 1.7338092809790924, |
|
"grad_norm": 0.012950947508215904, |
|
"learning_rate": 8.441757121002406e-06, |
|
"loss": 0.0218, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.7374517374517375, |
|
"grad_norm": 0.02281985618174076, |
|
"learning_rate": 8.417474077851438e-06, |
|
"loss": 0.033, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 1.7410941939243827, |
|
"grad_norm": 0.002427387284114957, |
|
"learning_rate": 8.39319103470047e-06, |
|
"loss": 0.0003, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.7447366503970279, |
|
"grad_norm": 0.008654593490064144, |
|
"learning_rate": 8.368907991549502e-06, |
|
"loss": 0.0216, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 1.748379106869673, |
|
"grad_norm": 0.021408561617136, |
|
"learning_rate": 8.344624948398534e-06, |
|
"loss": 0.01, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.7520215633423182, |
|
"grad_norm": 0.005177498795092106, |
|
"learning_rate": 8.320341905247566e-06, |
|
"loss": 0.017, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 1.7556640198149633, |
|
"grad_norm": 0.00880460161715746, |
|
"learning_rate": 8.296058862096598e-06, |
|
"loss": 0.0221, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.7593064762876085, |
|
"grad_norm": 0.25018319487571716, |
|
"learning_rate": 8.27177581894563e-06, |
|
"loss": 0.0339, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 1.7629489327602537, |
|
"grad_norm": 0.10559019446372986, |
|
"learning_rate": 8.247492775794663e-06, |
|
"loss": 0.0104, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.7665913892328988, |
|
"grad_norm": 0.008422456681728363, |
|
"learning_rate": 8.223209732643695e-06, |
|
"loss": 0.0127, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 1.770233845705544, |
|
"grad_norm": 2.1540021896362305, |
|
"learning_rate": 8.198926689492728e-06, |
|
"loss": 0.0127, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.773876302178189, |
|
"grad_norm": 0.02384728379547596, |
|
"learning_rate": 8.17464364634176e-06, |
|
"loss": 0.0407, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 1.777518758650834, |
|
"grad_norm": 0.023053938522934914, |
|
"learning_rate": 8.150360603190793e-06, |
|
"loss": 0.0335, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.7811612151234792, |
|
"grad_norm": 0.006100701633840799, |
|
"learning_rate": 8.126077560039825e-06, |
|
"loss": 0.0156, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 1.7848036715961244, |
|
"grad_norm": 0.054067742079496384, |
|
"learning_rate": 8.101794516888857e-06, |
|
"loss": 0.0138, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.7884461280687696, |
|
"grad_norm": 0.006488916464149952, |
|
"learning_rate": 8.077511473737889e-06, |
|
"loss": 0.0281, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 1.7920885845414147, |
|
"grad_norm": 0.0012505652848631144, |
|
"learning_rate": 8.053228430586921e-06, |
|
"loss": 0.0099, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.79573104101406, |
|
"grad_norm": 0.0013478820910677314, |
|
"learning_rate": 8.028945387435955e-06, |
|
"loss": 0.0087, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 1.799373497486705, |
|
"grad_norm": 0.004789427388459444, |
|
"learning_rate": 8.004662344284987e-06, |
|
"loss": 0.0116, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.8030159539593502, |
|
"grad_norm": 0.04335150867700577, |
|
"learning_rate": 7.98037930113402e-06, |
|
"loss": 0.0119, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 1.8066584104319954, |
|
"grad_norm": 0.027323294430971146, |
|
"learning_rate": 7.956096257983051e-06, |
|
"loss": 0.0158, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.8103008669046405, |
|
"grad_norm": 0.0031347721815109253, |
|
"learning_rate": 7.931813214832084e-06, |
|
"loss": 0.0068, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 1.8139433233772855, |
|
"grad_norm": 0.009408103302121162, |
|
"learning_rate": 7.907530171681116e-06, |
|
"loss": 0.0179, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.8175857798499306, |
|
"grad_norm": 0.0207370575517416, |
|
"learning_rate": 7.883247128530148e-06, |
|
"loss": 0.0212, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 1.8212282363225758, |
|
"grad_norm": 0.0027329414151608944, |
|
"learning_rate": 7.85896408537918e-06, |
|
"loss": 0.0203, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.824870692795221, |
|
"grad_norm": 0.002518197288736701, |
|
"learning_rate": 7.834681042228212e-06, |
|
"loss": 0.0113, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 1.8285131492678661, |
|
"grad_norm": 41.489559173583984, |
|
"learning_rate": 7.810397999077244e-06, |
|
"loss": 0.0062, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.8321556057405113, |
|
"grad_norm": 0.0005144953029230237, |
|
"learning_rate": 7.786114955926278e-06, |
|
"loss": 0.0136, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 1.8357980622131564, |
|
"grad_norm": 0.002726512961089611, |
|
"learning_rate": 7.76183191277531e-06, |
|
"loss": 0.0129, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.8394405186858016, |
|
"grad_norm": 0.0023834258317947388, |
|
"learning_rate": 7.737548869624342e-06, |
|
"loss": 0.0089, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 1.8430829751584468, |
|
"grad_norm": 0.003907207865267992, |
|
"learning_rate": 7.713265826473374e-06, |
|
"loss": 0.0307, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.846725431631092, |
|
"grad_norm": 0.039799224585294724, |
|
"learning_rate": 7.688982783322406e-06, |
|
"loss": 0.0099, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 1.850367888103737, |
|
"grad_norm": 0.001281356206163764, |
|
"learning_rate": 7.664699740171439e-06, |
|
"loss": 0.0084, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.8540103445763823, |
|
"grad_norm": 0.007907239720225334, |
|
"learning_rate": 7.64041669702047e-06, |
|
"loss": 0.0038, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 1.8576528010490274, |
|
"grad_norm": 0.0005870074382983148, |
|
"learning_rate": 7.616133653869504e-06, |
|
"loss": 0.0063, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.8612952575216726, |
|
"grad_norm": 0.0031829806976020336, |
|
"learning_rate": 7.591850610718537e-06, |
|
"loss": 0.0185, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 1.8649377139943177, |
|
"grad_norm": 0.014370903372764587, |
|
"learning_rate": 7.567567567567569e-06, |
|
"loss": 0.0154, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.868580170466963, |
|
"grad_norm": 0.005995270796120167, |
|
"learning_rate": 7.543284524416601e-06, |
|
"loss": 0.0077, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 1.872222626939608, |
|
"grad_norm": 0.0004111470771022141, |
|
"learning_rate": 7.519001481265633e-06, |
|
"loss": 0.031, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.8758650834122532, |
|
"grad_norm": 0.005618993658572435, |
|
"learning_rate": 7.494718438114665e-06, |
|
"loss": 0.0431, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 1.8795075398848984, |
|
"grad_norm": 0.1132090836763382, |
|
"learning_rate": 7.470435394963697e-06, |
|
"loss": 0.017, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.8831499963575435, |
|
"grad_norm": 0.0009725656709633768, |
|
"learning_rate": 7.44615235181273e-06, |
|
"loss": 0.0142, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 1.8867924528301887, |
|
"grad_norm": 0.00332416663877666, |
|
"learning_rate": 7.421869308661762e-06, |
|
"loss": 0.0236, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.8904349093028339, |
|
"grad_norm": 0.00507039949297905, |
|
"learning_rate": 7.3975862655107945e-06, |
|
"loss": 0.0159, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 1.894077365775479, |
|
"grad_norm": 0.00497475266456604, |
|
"learning_rate": 7.373303222359827e-06, |
|
"loss": 0.0179, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.8977198222481242, |
|
"grad_norm": 0.021785583347082138, |
|
"learning_rate": 7.349020179208859e-06, |
|
"loss": 0.0187, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 1.9013622787207693, |
|
"grad_norm": 0.007950580678880215, |
|
"learning_rate": 7.324737136057891e-06, |
|
"loss": 0.0143, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.9050047351934145, |
|
"grad_norm": 0.20120207965373993, |
|
"learning_rate": 7.300454092906924e-06, |
|
"loss": 0.0295, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 1.9086471916660597, |
|
"grad_norm": 2.0345733165740967, |
|
"learning_rate": 7.276171049755956e-06, |
|
"loss": 0.0086, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.9122896481387048, |
|
"grad_norm": 0.06333048641681671, |
|
"learning_rate": 7.251888006604988e-06, |
|
"loss": 0.0144, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 1.91593210461135, |
|
"grad_norm": 0.0013306562323123217, |
|
"learning_rate": 7.22760496345402e-06, |
|
"loss": 0.0156, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.9195745610839952, |
|
"grad_norm": 0.003600737079977989, |
|
"learning_rate": 7.203321920303052e-06, |
|
"loss": 0.0064, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 1.9232170175566403, |
|
"grad_norm": 0.00611618859693408, |
|
"learning_rate": 7.179038877152086e-06, |
|
"loss": 0.0121, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.9268594740292855, |
|
"grad_norm": 0.003872451838105917, |
|
"learning_rate": 7.154755834001118e-06, |
|
"loss": 0.0245, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"grad_norm": 0.05790552869439125, |
|
"learning_rate": 7.13047279085015e-06, |
|
"loss": 0.0092, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.9341443869745758, |
|
"grad_norm": 0.0005884923157282174, |
|
"learning_rate": 7.1061897476991825e-06, |
|
"loss": 0.0001, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 1.937786843447221, |
|
"grad_norm": 0.001672230544500053, |
|
"learning_rate": 7.081906704548215e-06, |
|
"loss": 0.0235, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.941429299919866, |
|
"grad_norm": 0.0062836394645273685, |
|
"learning_rate": 7.057623661397247e-06, |
|
"loss": 0.0084, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 1.945071756392511, |
|
"grad_norm": 0.0007952909800224006, |
|
"learning_rate": 7.03334061824628e-06, |
|
"loss": 0.0005, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.9487142128651562, |
|
"grad_norm": 0.01589018851518631, |
|
"learning_rate": 7.009057575095312e-06, |
|
"loss": 0.0158, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 1.9523566693378014, |
|
"grad_norm": 0.05278225615620613, |
|
"learning_rate": 6.984774531944344e-06, |
|
"loss": 0.0376, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.9559991258104465, |
|
"grad_norm": 0.0019168544095009565, |
|
"learning_rate": 6.960491488793376e-06, |
|
"loss": 0.0304, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 1.9596415822830917, |
|
"grad_norm": 0.05056021362543106, |
|
"learning_rate": 6.936208445642408e-06, |
|
"loss": 0.0206, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.9632840387557369, |
|
"grad_norm": 0.05166146531701088, |
|
"learning_rate": 6.91192540249144e-06, |
|
"loss": 0.0255, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 1.966926495228382, |
|
"grad_norm": 0.0012813842622563243, |
|
"learning_rate": 6.8876423593404725e-06, |
|
"loss": 0.0148, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.9705689517010272, |
|
"grad_norm": 0.0026433581952005625, |
|
"learning_rate": 6.8633593161895054e-06, |
|
"loss": 0.0148, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 1.9742114081736724, |
|
"grad_norm": 0.25513923168182373, |
|
"learning_rate": 6.8390762730385376e-06, |
|
"loss": 0.0003, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.9778538646463175, |
|
"grad_norm": 0.0009773409692570567, |
|
"learning_rate": 6.81479322988757e-06, |
|
"loss": 0.014, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 1.9814963211189627, |
|
"grad_norm": 0.000380076642613858, |
|
"learning_rate": 6.790510186736602e-06, |
|
"loss": 0.0089, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.9851387775916076, |
|
"grad_norm": 15.812542915344238, |
|
"learning_rate": 6.766227143585634e-06, |
|
"loss": 0.0146, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 1.9887812340642528, |
|
"grad_norm": 0.0008573850500397384, |
|
"learning_rate": 6.741944100434668e-06, |
|
"loss": 0.004, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.992423690536898, |
|
"grad_norm": 0.001578275696374476, |
|
"learning_rate": 6.7176610572837e-06, |
|
"loss": 0.0301, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 1.996066147009543, |
|
"grad_norm": 0.0006643402157351375, |
|
"learning_rate": 6.693378014132732e-06, |
|
"loss": 0.0004, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.9997086034821883, |
|
"grad_norm": 0.062037862837314606, |
|
"learning_rate": 6.669094970981764e-06, |
|
"loss": 0.0337, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9930339288641207, |
|
"eval_f1": 0.9930046909719364, |
|
"eval_loss": 0.04103320837020874, |
|
"eval_precision": 0.9984278030616467, |
|
"eval_recall": 0.9876401735286895, |
|
"eval_runtime": 83.7488, |
|
"eval_samples_per_second": 291.395, |
|
"eval_steps_per_second": 18.221, |
|
"step": 27454 |
|
}, |
|
{ |
|
"epoch": 2.0033510599548334, |
|
"grad_norm": 0.04546406865119934, |
|
"learning_rate": 6.644811927830796e-06, |
|
"loss": 0.0038, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.0069935164274786, |
|
"grad_norm": 0.16789191961288452, |
|
"learning_rate": 6.620528884679829e-06, |
|
"loss": 0.0035, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 2.0106359729001237, |
|
"grad_norm": 0.00099283701274544, |
|
"learning_rate": 6.596245841528861e-06, |
|
"loss": 0.0076, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 2.014278429372769, |
|
"grad_norm": 0.0031957163009792566, |
|
"learning_rate": 6.5719627983778935e-06, |
|
"loss": 0.0127, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 2.017920885845414, |
|
"grad_norm": 0.014724645763635635, |
|
"learning_rate": 6.5476797552269256e-06, |
|
"loss": 0.0215, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 2.0215633423180592, |
|
"grad_norm": 0.0008338313200511038, |
|
"learning_rate": 6.523396712075958e-06, |
|
"loss": 0.0001, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 2.0252057987907044, |
|
"grad_norm": 0.00033126515336334705, |
|
"learning_rate": 6.49911366892499e-06, |
|
"loss": 0.0001, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 2.0288482552633496, |
|
"grad_norm": 0.0007063829689286649, |
|
"learning_rate": 6.474830625774022e-06, |
|
"loss": 0.0035, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 2.0324907117359947, |
|
"grad_norm": 0.0002829464210662991, |
|
"learning_rate": 6.450547582623055e-06, |
|
"loss": 0.0062, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 2.03613316820864, |
|
"grad_norm": 0.0023985933512449265, |
|
"learning_rate": 6.426264539472087e-06, |
|
"loss": 0.0118, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 2.039775624681285, |
|
"grad_norm": 0.0006219320930540562, |
|
"learning_rate": 6.401981496321119e-06, |
|
"loss": 0.0005, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.04341808115393, |
|
"grad_norm": 0.0012573492713272572, |
|
"learning_rate": 6.377698453170151e-06, |
|
"loss": 0.0222, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 2.0470605376265754, |
|
"grad_norm": 0.0023939076345413923, |
|
"learning_rate": 6.353415410019183e-06, |
|
"loss": 0.0187, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 2.0507029940992205, |
|
"grad_norm": 0.004897281993180513, |
|
"learning_rate": 6.329132366868217e-06, |
|
"loss": 0.008, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 2.0543454505718657, |
|
"grad_norm": 0.00587815698236227, |
|
"learning_rate": 6.304849323717249e-06, |
|
"loss": 0.0131, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 2.057987907044511, |
|
"grad_norm": 0.2962318956851959, |
|
"learning_rate": 6.2805662805662815e-06, |
|
"loss": 0.0059, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 2.061630363517156, |
|
"grad_norm": 0.08883453905582428, |
|
"learning_rate": 6.256283237415314e-06, |
|
"loss": 0.0, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 2.065272819989801, |
|
"grad_norm": 0.00024352317268494517, |
|
"learning_rate": 6.232000194264346e-06, |
|
"loss": 0.0001, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 2.0689152764624463, |
|
"grad_norm": 0.033922385424375534, |
|
"learning_rate": 6.207717151113378e-06, |
|
"loss": 0.0001, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 2.0725577329350915, |
|
"grad_norm": 0.00033414611243642867, |
|
"learning_rate": 6.183434107962411e-06, |
|
"loss": 0.0106, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 2.0762001894077367, |
|
"grad_norm": 0.000590184354223311, |
|
"learning_rate": 6.159151064811443e-06, |
|
"loss": 0.005, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.079842645880382, |
|
"grad_norm": 0.008075700141489506, |
|
"learning_rate": 6.134868021660475e-06, |
|
"loss": 0.0001, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 2.083485102353027, |
|
"grad_norm": 0.0005011840257793665, |
|
"learning_rate": 6.110584978509507e-06, |
|
"loss": 0.0093, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 2.087127558825672, |
|
"grad_norm": 0.001098008593544364, |
|
"learning_rate": 6.086301935358539e-06, |
|
"loss": 0.0119, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 2.0907700152983173, |
|
"grad_norm": 0.044817641377449036, |
|
"learning_rate": 6.0620188922075714e-06, |
|
"loss": 0.0001, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 2.0944124717709625, |
|
"grad_norm": 0.00016927978140302002, |
|
"learning_rate": 6.037735849056604e-06, |
|
"loss": 0.0001, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 2.0980549282436076, |
|
"grad_norm": 0.00031965531525202096, |
|
"learning_rate": 6.0134528059056365e-06, |
|
"loss": 0.0058, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 2.101697384716253, |
|
"grad_norm": 0.0006141673657111824, |
|
"learning_rate": 5.989169762754669e-06, |
|
"loss": 0.0075, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 2.105339841188898, |
|
"grad_norm": 0.0001372253173030913, |
|
"learning_rate": 5.964886719603701e-06, |
|
"loss": 0.0074, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 2.108982297661543, |
|
"grad_norm": 0.002249737735837698, |
|
"learning_rate": 5.940603676452733e-06, |
|
"loss": 0.0071, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 2.1126247541341883, |
|
"grad_norm": 0.002211878076195717, |
|
"learning_rate": 5.916320633301765e-06, |
|
"loss": 0.0109, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.1162672106068334, |
|
"grad_norm": 4.783966541290283, |
|
"learning_rate": 5.892037590150799e-06, |
|
"loss": 0.0058, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 2.1199096670794786, |
|
"grad_norm": 0.011490519158542156, |
|
"learning_rate": 5.867754546999831e-06, |
|
"loss": 0.0299, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 2.1235521235521237, |
|
"grad_norm": 0.0036625019274652004, |
|
"learning_rate": 5.843471503848863e-06, |
|
"loss": 0.0035, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 2.127194580024769, |
|
"grad_norm": 0.0005369313294067979, |
|
"learning_rate": 5.819188460697895e-06, |
|
"loss": 0.0003, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 2.1308370364974136, |
|
"grad_norm": 0.0003989477118011564, |
|
"learning_rate": 5.794905417546927e-06, |
|
"loss": 0.0001, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 2.1344794929700592, |
|
"grad_norm": 0.004685970023274422, |
|
"learning_rate": 5.77062237439596e-06, |
|
"loss": 0.0, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 2.138121949442704, |
|
"grad_norm": 0.00027460893034003675, |
|
"learning_rate": 5.746339331244992e-06, |
|
"loss": 0.004, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 2.141764405915349, |
|
"grad_norm": 0.00016678418614901602, |
|
"learning_rate": 5.7220562880940245e-06, |
|
"loss": 0.0001, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 2.1454068623879943, |
|
"grad_norm": 0.0007461053319275379, |
|
"learning_rate": 5.697773244943057e-06, |
|
"loss": 0.0035, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 2.1490493188606394, |
|
"grad_norm": 0.001619224320165813, |
|
"learning_rate": 5.673490201792089e-06, |
|
"loss": 0.0, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.1526917753332846, |
|
"grad_norm": 0.0001470306160626933, |
|
"learning_rate": 5.649207158641121e-06, |
|
"loss": 0.0, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 2.1563342318059298, |
|
"grad_norm": 0.042807288467884064, |
|
"learning_rate": 5.624924115490154e-06, |
|
"loss": 0.0176, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 2.159976688278575, |
|
"grad_norm": 0.0003236900956835598, |
|
"learning_rate": 5.600641072339186e-06, |
|
"loss": 0.0, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 2.16361914475122, |
|
"grad_norm": 0.0001237446558661759, |
|
"learning_rate": 5.576358029188218e-06, |
|
"loss": 0.0017, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 2.1672616012238652, |
|
"grad_norm": 0.0007688507903367281, |
|
"learning_rate": 5.55207498603725e-06, |
|
"loss": 0.0004, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 2.1709040576965104, |
|
"grad_norm": 0.00013287120964378119, |
|
"learning_rate": 5.527791942886282e-06, |
|
"loss": 0.0001, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 2.1745465141691556, |
|
"grad_norm": 0.04540300741791725, |
|
"learning_rate": 5.5035088997353145e-06, |
|
"loss": 0.0035, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 2.1781889706418007, |
|
"grad_norm": 0.01039667334407568, |
|
"learning_rate": 5.479225856584348e-06, |
|
"loss": 0.0122, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 2.181831427114446, |
|
"grad_norm": 0.00033356735366396606, |
|
"learning_rate": 5.4549428134333804e-06, |
|
"loss": 0.0, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 2.185473883587091, |
|
"grad_norm": 0.00016488686378579587, |
|
"learning_rate": 5.4306597702824126e-06, |
|
"loss": 0.0002, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.189116340059736, |
|
"grad_norm": 0.0007316088303923607, |
|
"learning_rate": 5.406376727131445e-06, |
|
"loss": 0.0161, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 2.1927587965323814, |
|
"grad_norm": 0.0005663663614541292, |
|
"learning_rate": 5.382093683980477e-06, |
|
"loss": 0.0052, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 2.1964012530050265, |
|
"grad_norm": 0.00010196124640060589, |
|
"learning_rate": 5.35781064082951e-06, |
|
"loss": 0.0105, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 2.2000437094776717, |
|
"grad_norm": 0.0002030348841799423, |
|
"learning_rate": 5.333527597678542e-06, |
|
"loss": 0.0209, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 2.203686165950317, |
|
"grad_norm": 0.0024572645779699087, |
|
"learning_rate": 5.309244554527574e-06, |
|
"loss": 0.0196, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 2.207328622422962, |
|
"grad_norm": 0.010307950899004936, |
|
"learning_rate": 5.284961511376606e-06, |
|
"loss": 0.0087, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 2.210971078895607, |
|
"grad_norm": 0.0012858156114816666, |
|
"learning_rate": 5.260678468225638e-06, |
|
"loss": 0.0001, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 2.2146135353682523, |
|
"grad_norm": 0.00050304492469877, |
|
"learning_rate": 5.23639542507467e-06, |
|
"loss": 0.0075, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 2.2182559918408975, |
|
"grad_norm": 0.0006547793745994568, |
|
"learning_rate": 5.212112381923703e-06, |
|
"loss": 0.0, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 2.2218984483135427, |
|
"grad_norm": 0.00042559910798445344, |
|
"learning_rate": 5.1878293387727355e-06, |
|
"loss": 0.0092, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.225540904786188, |
|
"grad_norm": 0.07011374086141586, |
|
"learning_rate": 5.163546295621768e-06, |
|
"loss": 0.0135, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 2.229183361258833, |
|
"grad_norm": 0.1466972380876541, |
|
"learning_rate": 5.1392632524708e-06, |
|
"loss": 0.0001, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 2.232825817731478, |
|
"grad_norm": 0.001599460723809898, |
|
"learning_rate": 5.114980209319832e-06, |
|
"loss": 0.0078, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 2.2364682742041233, |
|
"grad_norm": 0.0011943153804168105, |
|
"learning_rate": 5.090697166168864e-06, |
|
"loss": 0.0071, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 2.2401107306767685, |
|
"grad_norm": 0.0003815737727563828, |
|
"learning_rate": 5.066414123017896e-06, |
|
"loss": 0.0052, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 2.2437531871494136, |
|
"grad_norm": 0.0013002901105210185, |
|
"learning_rate": 5.04213107986693e-06, |
|
"loss": 0.0, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 2.247395643622059, |
|
"grad_norm": 0.002260852139443159, |
|
"learning_rate": 5.017848036715962e-06, |
|
"loss": 0.0198, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 2.251038100094704, |
|
"grad_norm": 0.020046114921569824, |
|
"learning_rate": 4.993564993564994e-06, |
|
"loss": 0.0065, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 2.254680556567349, |
|
"grad_norm": 0.26015377044677734, |
|
"learning_rate": 4.969281950414026e-06, |
|
"loss": 0.0076, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 2.2583230130399943, |
|
"grad_norm": 0.0006264203693717718, |
|
"learning_rate": 4.944998907263059e-06, |
|
"loss": 0.0003, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.2619654695126394, |
|
"grad_norm": 0.003784018801525235, |
|
"learning_rate": 4.920715864112091e-06, |
|
"loss": 0.0071, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 2.2656079259852846, |
|
"grad_norm": 0.0002754566667135805, |
|
"learning_rate": 4.8964328209611235e-06, |
|
"loss": 0.0002, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 2.2692503824579298, |
|
"grad_norm": 0.0013915307354182005, |
|
"learning_rate": 4.872149777810156e-06, |
|
"loss": 0.0173, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 2.272892838930575, |
|
"grad_norm": 0.006770916748791933, |
|
"learning_rate": 4.847866734659188e-06, |
|
"loss": 0.0112, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 2.27653529540322, |
|
"grad_norm": 0.001048597856424749, |
|
"learning_rate": 4.82358369150822e-06, |
|
"loss": 0.0062, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 2.2801777518758652, |
|
"grad_norm": 0.018874365836381912, |
|
"learning_rate": 4.799300648357252e-06, |
|
"loss": 0.0061, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 2.2838202083485104, |
|
"grad_norm": 0.0011454526102170348, |
|
"learning_rate": 4.775017605206285e-06, |
|
"loss": 0.0015, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 2.2874626648211556, |
|
"grad_norm": 0.015884971246123314, |
|
"learning_rate": 4.750734562055317e-06, |
|
"loss": 0.0078, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 2.2911051212938007, |
|
"grad_norm": 32.194305419921875, |
|
"learning_rate": 4.72645151890435e-06, |
|
"loss": 0.0112, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 2.2947475777664454, |
|
"grad_norm": 0.0007995204068720341, |
|
"learning_rate": 4.702168475753382e-06, |
|
"loss": 0.0091, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.298390034239091, |
|
"grad_norm": 0.0017488624434918165, |
|
"learning_rate": 4.677885432602414e-06, |
|
"loss": 0.0001, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 2.3020324907117358, |
|
"grad_norm": 0.0005560104036703706, |
|
"learning_rate": 4.653602389451446e-06, |
|
"loss": 0.0089, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 2.3056749471843814, |
|
"grad_norm": 0.0004513234307523817, |
|
"learning_rate": 4.6293193463004785e-06, |
|
"loss": 0.0001, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 2.309317403657026, |
|
"grad_norm": 0.0023345474619418383, |
|
"learning_rate": 4.605036303149511e-06, |
|
"loss": 0.0269, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 2.3129598601296717, |
|
"grad_norm": 0.0014278781600296497, |
|
"learning_rate": 4.580753259998544e-06, |
|
"loss": 0.0001, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 0.017123771831393242, |
|
"learning_rate": 4.556470216847576e-06, |
|
"loss": 0.0053, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 2.3202447730749616, |
|
"grad_norm": 0.01515108346939087, |
|
"learning_rate": 4.532187173696608e-06, |
|
"loss": 0.0077, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 2.3238872295476067, |
|
"grad_norm": 0.0008318690815940499, |
|
"learning_rate": 4.507904130545641e-06, |
|
"loss": 0.0064, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 2.327529686020252, |
|
"grad_norm": 0.040002189576625824, |
|
"learning_rate": 4.483621087394673e-06, |
|
"loss": 0.0006, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 2.331172142492897, |
|
"grad_norm": 0.01249318104237318, |
|
"learning_rate": 4.459338044243705e-06, |
|
"loss": 0.0017, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.334814598965542, |
|
"grad_norm": 0.00012860352580901235, |
|
"learning_rate": 4.435055001092737e-06, |
|
"loss": 0.0, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 2.3384570554381874, |
|
"grad_norm": 0.0027027325704693794, |
|
"learning_rate": 4.410771957941769e-06, |
|
"loss": 0.0088, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 2.3420995119108325, |
|
"grad_norm": 0.006262780167162418, |
|
"learning_rate": 4.3864889147908015e-06, |
|
"loss": 0.0207, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 2.3457419683834777, |
|
"grad_norm": 0.11578983068466187, |
|
"learning_rate": 4.3622058716398344e-06, |
|
"loss": 0.0004, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 2.349384424856123, |
|
"grad_norm": 0.009140093810856342, |
|
"learning_rate": 4.3379228284888666e-06, |
|
"loss": 0.0002, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 2.353026881328768, |
|
"grad_norm": 0.012581353075802326, |
|
"learning_rate": 4.313639785337899e-06, |
|
"loss": 0.0072, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 2.356669337801413, |
|
"grad_norm": 0.0004677812976296991, |
|
"learning_rate": 4.289356742186932e-06, |
|
"loss": 0.0013, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 2.3603117942740584, |
|
"grad_norm": 0.0020102383568882942, |
|
"learning_rate": 4.265073699035964e-06, |
|
"loss": 0.0085, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 2.3639542507467035, |
|
"grad_norm": 0.00039819578523747623, |
|
"learning_rate": 4.240790655884996e-06, |
|
"loss": 0.0065, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 2.3675967072193487, |
|
"grad_norm": 0.00021630170522257686, |
|
"learning_rate": 4.216507612734028e-06, |
|
"loss": 0.0, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.371239163691994, |
|
"grad_norm": 1.0400899648666382, |
|
"learning_rate": 4.19222456958306e-06, |
|
"loss": 0.0376, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 2.374881620164639, |
|
"grad_norm": 0.00023875107581261545, |
|
"learning_rate": 4.167941526432093e-06, |
|
"loss": 0.0054, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 2.378524076637284, |
|
"grad_norm": 0.10375426709651947, |
|
"learning_rate": 4.143658483281125e-06, |
|
"loss": 0.005, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 2.3821665331099293, |
|
"grad_norm": 0.0004043069202452898, |
|
"learning_rate": 4.119375440130157e-06, |
|
"loss": 0.0269, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 2.3858089895825745, |
|
"grad_norm": 0.005885982885956764, |
|
"learning_rate": 4.09509239697919e-06, |
|
"loss": 0.016, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 2.3894514460552196, |
|
"grad_norm": 0.002424208912998438, |
|
"learning_rate": 4.0708093538282225e-06, |
|
"loss": 0.0001, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 2.393093902527865, |
|
"grad_norm": 0.003882485907524824, |
|
"learning_rate": 4.046526310677255e-06, |
|
"loss": 0.0158, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 2.39673635900051, |
|
"grad_norm": 0.0025231230538338423, |
|
"learning_rate": 4.022243267526287e-06, |
|
"loss": 0.0003, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 2.400378815473155, |
|
"grad_norm": 0.15265323221683502, |
|
"learning_rate": 3.997960224375319e-06, |
|
"loss": 0.0035, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 2.4040212719458003, |
|
"grad_norm": 0.03218410164117813, |
|
"learning_rate": 3.973677181224351e-06, |
|
"loss": 0.0059, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.4076637284184454, |
|
"grad_norm": 0.0010072742588818073, |
|
"learning_rate": 3.949394138073384e-06, |
|
"loss": 0.005, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 2.4113061848910906, |
|
"grad_norm": 0.002031374257057905, |
|
"learning_rate": 3.925111094922416e-06, |
|
"loss": 0.0002, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 2.4149486413637358, |
|
"grad_norm": 0.00022973962768446654, |
|
"learning_rate": 3.900828051771448e-06, |
|
"loss": 0.0063, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 2.418591097836381, |
|
"grad_norm": 0.001628385973162949, |
|
"learning_rate": 3.876545008620481e-06, |
|
"loss": 0.0, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 2.422233554309026, |
|
"grad_norm": 0.00710854260250926, |
|
"learning_rate": 3.852261965469513e-06, |
|
"loss": 0.0074, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 2.4258760107816713, |
|
"grad_norm": 0.05613460764288902, |
|
"learning_rate": 3.827978922318545e-06, |
|
"loss": 0.0255, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 2.4295184672543164, |
|
"grad_norm": 0.004103382583707571, |
|
"learning_rate": 3.8036958791675775e-06, |
|
"loss": 0.0002, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 2.4331609237269616, |
|
"grad_norm": 0.0012559541501104832, |
|
"learning_rate": 3.77941283601661e-06, |
|
"loss": 0.0058, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 2.4368033801996067, |
|
"grad_norm": 0.01285065058618784, |
|
"learning_rate": 3.755129792865642e-06, |
|
"loss": 0.0, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 2.440445836672252, |
|
"grad_norm": 0.00024041223514359444, |
|
"learning_rate": 3.7308467497146743e-06, |
|
"loss": 0.0061, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.444088293144897, |
|
"grad_norm": 0.0024315116461366415, |
|
"learning_rate": 3.706563706563707e-06, |
|
"loss": 0.0, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 2.4477307496175422, |
|
"grad_norm": 0.0014013125328347087, |
|
"learning_rate": 3.682280663412739e-06, |
|
"loss": 0.0061, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 2.4513732060901874, |
|
"grad_norm": 0.0061280145309865475, |
|
"learning_rate": 3.657997620261772e-06, |
|
"loss": 0.0244, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 2.4550156625628325, |
|
"grad_norm": 0.006391397211700678, |
|
"learning_rate": 3.633714577110804e-06, |
|
"loss": 0.0244, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 2.4586581190354777, |
|
"grad_norm": 0.00020790348935406655, |
|
"learning_rate": 3.609431533959836e-06, |
|
"loss": 0.0036, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 2.462300575508123, |
|
"grad_norm": 0.042637672275304794, |
|
"learning_rate": 3.5851484908088683e-06, |
|
"loss": 0.0003, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 2.4659430319807676, |
|
"grad_norm": 0.0025181127712130547, |
|
"learning_rate": 3.560865447657901e-06, |
|
"loss": 0.0131, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 2.469585488453413, |
|
"grad_norm": 0.012113599106669426, |
|
"learning_rate": 3.536582404506933e-06, |
|
"loss": 0.0069, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 2.473227944926058, |
|
"grad_norm": 0.0006052980315871537, |
|
"learning_rate": 3.512299361355965e-06, |
|
"loss": 0.0011, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 2.4768704013987035, |
|
"grad_norm": 0.0036468636244535446, |
|
"learning_rate": 3.4880163182049976e-06, |
|
"loss": 0.0035, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.4805128578713482, |
|
"grad_norm": 0.00024817034136503935, |
|
"learning_rate": 3.4637332750540298e-06, |
|
"loss": 0.0036, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 2.484155314343994, |
|
"grad_norm": 0.0015875960234552622, |
|
"learning_rate": 3.4394502319030627e-06, |
|
"loss": 0.0, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 2.4877977708166386, |
|
"grad_norm": 0.1749018430709839, |
|
"learning_rate": 3.415167188752095e-06, |
|
"loss": 0.0054, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 2.4914402272892837, |
|
"grad_norm": 0.0002046012959908694, |
|
"learning_rate": 3.390884145601127e-06, |
|
"loss": 0.0142, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 2.495082683761929, |
|
"grad_norm": 0.00013496189785655588, |
|
"learning_rate": 3.3666011024501595e-06, |
|
"loss": 0.0001, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 2.498725140234574, |
|
"grad_norm": 0.0008627015631645918, |
|
"learning_rate": 3.3423180592991917e-06, |
|
"loss": 0.0188, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 2.502367596707219, |
|
"grad_norm": 0.00022307100880425423, |
|
"learning_rate": 3.3180350161482238e-06, |
|
"loss": 0.0231, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 2.5060100531798644, |
|
"grad_norm": 0.0331818088889122, |
|
"learning_rate": 3.2937519729972563e-06, |
|
"loss": 0.0, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 2.5096525096525095, |
|
"grad_norm": 0.0004173066408839077, |
|
"learning_rate": 3.2694689298462885e-06, |
|
"loss": 0.0034, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 2.5132949661251547, |
|
"grad_norm": 0.0004441474738996476, |
|
"learning_rate": 3.245185886695321e-06, |
|
"loss": 0.0, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.5169374225978, |
|
"grad_norm": 0.0006797397509217262, |
|
"learning_rate": 3.2209028435443535e-06, |
|
"loss": 0.0, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 2.520579879070445, |
|
"grad_norm": 0.00027864333242177963, |
|
"learning_rate": 3.1966198003933857e-06, |
|
"loss": 0.0, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 2.52422233554309, |
|
"grad_norm": 0.00039275462040677667, |
|
"learning_rate": 3.172336757242418e-06, |
|
"loss": 0.0093, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 2.5278647920157353, |
|
"grad_norm": 0.7616935968399048, |
|
"learning_rate": 3.1480537140914503e-06, |
|
"loss": 0.0011, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 2.5315072484883805, |
|
"grad_norm": 0.00012195669114589691, |
|
"learning_rate": 3.1237706709404825e-06, |
|
"loss": 0.0001, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 2.5351497049610257, |
|
"grad_norm": 0.00019767122284974903, |
|
"learning_rate": 3.0994876277895146e-06, |
|
"loss": 0.0158, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 2.538792161433671, |
|
"grad_norm": 0.0014518082607537508, |
|
"learning_rate": 3.075204584638547e-06, |
|
"loss": 0.016, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 2.542434617906316, |
|
"grad_norm": 0.0005969086778350174, |
|
"learning_rate": 3.0509215414875793e-06, |
|
"loss": 0.0053, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 2.546077074378961, |
|
"grad_norm": 0.0010351201053708792, |
|
"learning_rate": 3.0266384983366122e-06, |
|
"loss": 0.0001, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 2.5497195308516063, |
|
"grad_norm": 0.001427893410436809, |
|
"learning_rate": 3.0023554551856443e-06, |
|
"loss": 0.0, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.5533619873242515, |
|
"grad_norm": 0.02131476067006588, |
|
"learning_rate": 2.9780724120346765e-06, |
|
"loss": 0.0001, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 2.5570044437968966, |
|
"grad_norm": 0.022580554708838463, |
|
"learning_rate": 2.953789368883709e-06, |
|
"loss": 0.0083, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 2.560646900269542, |
|
"grad_norm": 0.0006684429827146232, |
|
"learning_rate": 2.929506325732741e-06, |
|
"loss": 0.0061, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 2.564289356742187, |
|
"grad_norm": 0.003995757550001144, |
|
"learning_rate": 2.9052232825817733e-06, |
|
"loss": 0.0089, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 2.567931813214832, |
|
"grad_norm": 0.000656424555927515, |
|
"learning_rate": 2.8809402394308054e-06, |
|
"loss": 0.0168, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 2.5715742696874773, |
|
"grad_norm": 0.00873675849288702, |
|
"learning_rate": 2.856657196279838e-06, |
|
"loss": 0.0008, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 2.5752167261601224, |
|
"grad_norm": 0.0002116480318363756, |
|
"learning_rate": 2.83237415312887e-06, |
|
"loss": 0.0063, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 2.5788591826327676, |
|
"grad_norm": 0.003966080024838448, |
|
"learning_rate": 2.808091109977903e-06, |
|
"loss": 0.0063, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 2.5825016391054127, |
|
"grad_norm": 0.0007613358902744949, |
|
"learning_rate": 2.783808066826935e-06, |
|
"loss": 0.0, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 2.586144095578058, |
|
"grad_norm": 0.007331300992518663, |
|
"learning_rate": 2.7595250236759673e-06, |
|
"loss": 0.0, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.589786552050703, |
|
"grad_norm": 0.0009448974160477519, |
|
"learning_rate": 2.735241980525e-06, |
|
"loss": 0.0057, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 2.5934290085233482, |
|
"grad_norm": 0.010762249119579792, |
|
"learning_rate": 2.710958937374032e-06, |
|
"loss": 0.0001, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 2.5970714649959934, |
|
"grad_norm": 0.00015307983267121017, |
|
"learning_rate": 2.686675894223064e-06, |
|
"loss": 0.0, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 2.6007139214686386, |
|
"grad_norm": 0.00037176566547714174, |
|
"learning_rate": 2.6623928510720966e-06, |
|
"loss": 0.0, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 2.6043563779412837, |
|
"grad_norm": 0.04000074788928032, |
|
"learning_rate": 2.6381098079211287e-06, |
|
"loss": 0.0001, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 2.607998834413929, |
|
"grad_norm": 0.00212919432669878, |
|
"learning_rate": 2.613826764770161e-06, |
|
"loss": 0.0025, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 2.611641290886574, |
|
"grad_norm": 0.0005730296252295375, |
|
"learning_rate": 2.589543721619194e-06, |
|
"loss": 0.0085, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 2.615283747359219, |
|
"grad_norm": 0.00018936557171400636, |
|
"learning_rate": 2.565260678468226e-06, |
|
"loss": 0.0, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 2.6189262038318644, |
|
"grad_norm": 0.001116311876103282, |
|
"learning_rate": 2.540977635317258e-06, |
|
"loss": 0.0117, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 2.6225686603045095, |
|
"grad_norm": 0.0002378418721491471, |
|
"learning_rate": 2.5166945921662906e-06, |
|
"loss": 0.0, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.6262111167771547, |
|
"grad_norm": 0.0003085487405769527, |
|
"learning_rate": 2.4924115490153227e-06, |
|
"loss": 0.0124, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 2.6298535732497994, |
|
"grad_norm": 0.0005568100605159998, |
|
"learning_rate": 2.468128505864355e-06, |
|
"loss": 0.0184, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 2.633496029722445, |
|
"grad_norm": 0.000533478450961411, |
|
"learning_rate": 2.4438454627133874e-06, |
|
"loss": 0.0, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 2.6371384861950897, |
|
"grad_norm": 0.005373021587729454, |
|
"learning_rate": 2.41956241956242e-06, |
|
"loss": 0.006, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 2.6407809426677353, |
|
"grad_norm": 0.0018519052537158132, |
|
"learning_rate": 2.395279376411452e-06, |
|
"loss": 0.0165, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 2.64442339914038, |
|
"grad_norm": 0.0007277656113728881, |
|
"learning_rate": 2.370996333260484e-06, |
|
"loss": 0.0076, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 2.6480658556130257, |
|
"grad_norm": 0.035081762820482254, |
|
"learning_rate": 2.3467132901095168e-06, |
|
"loss": 0.0097, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 2.6517083120856704, |
|
"grad_norm": 0.00012679673091042787, |
|
"learning_rate": 2.3224302469585493e-06, |
|
"loss": 0.0073, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 2.655350768558316, |
|
"grad_norm": 0.0004883557558059692, |
|
"learning_rate": 2.2981472038075814e-06, |
|
"loss": 0.0063, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 2.6589932250309607, |
|
"grad_norm": 0.009050990454852581, |
|
"learning_rate": 2.2738641606566135e-06, |
|
"loss": 0.0001, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.6626356815036063, |
|
"grad_norm": 0.000246566953137517, |
|
"learning_rate": 2.249581117505646e-06, |
|
"loss": 0.0071, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 2.666278137976251, |
|
"grad_norm": 0.0010878010652959347, |
|
"learning_rate": 2.2252980743546786e-06, |
|
"loss": 0.0092, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 2.669920594448896, |
|
"grad_norm": 0.00010328316420782357, |
|
"learning_rate": 2.2010150312037108e-06, |
|
"loss": 0.0001, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 2.6735630509215413, |
|
"grad_norm": 0.00040322105633094907, |
|
"learning_rate": 2.176731988052743e-06, |
|
"loss": 0.0061, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 2.6772055073941865, |
|
"grad_norm": 1.994128704071045, |
|
"learning_rate": 2.152448944901775e-06, |
|
"loss": 0.0028, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 2.6808479638668317, |
|
"grad_norm": 0.01508715096861124, |
|
"learning_rate": 2.1281659017508076e-06, |
|
"loss": 0.005, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 2.684490420339477, |
|
"grad_norm": 0.0003937446163035929, |
|
"learning_rate": 2.10388285859984e-06, |
|
"loss": 0.007, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 2.688132876812122, |
|
"grad_norm": 0.00016946149116847664, |
|
"learning_rate": 2.0795998154488722e-06, |
|
"loss": 0.0001, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 2.691775333284767, |
|
"grad_norm": 0.047715552151203156, |
|
"learning_rate": 2.0553167722979043e-06, |
|
"loss": 0.012, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 2.6954177897574123, |
|
"grad_norm": 0.00020978396059945226, |
|
"learning_rate": 2.031033729146937e-06, |
|
"loss": 0.0, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.6990602462300575, |
|
"grad_norm": 0.0037919259630143642, |
|
"learning_rate": 2.0067506859959694e-06, |
|
"loss": 0.0033, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 0.00019998032075818628, |
|
"learning_rate": 1.9824676428450016e-06, |
|
"loss": 0.0, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 2.706345159175348, |
|
"grad_norm": 0.004953719209879637, |
|
"learning_rate": 1.9581845996940337e-06, |
|
"loss": 0.0, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 2.709987615647993, |
|
"grad_norm": 0.0006924102199263871, |
|
"learning_rate": 1.9339015565430662e-06, |
|
"loss": 0.009, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 2.713630072120638, |
|
"grad_norm": 0.002812350867316127, |
|
"learning_rate": 1.9096185133920988e-06, |
|
"loss": 0.0001, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 2.7172725285932833, |
|
"grad_norm": 0.0002599440049380064, |
|
"learning_rate": 1.885335470241131e-06, |
|
"loss": 0.0, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 2.7209149850659284, |
|
"grad_norm": 0.0006807326572015882, |
|
"learning_rate": 1.861052427090163e-06, |
|
"loss": 0.003, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 2.7245574415385736, |
|
"grad_norm": 0.000279091764241457, |
|
"learning_rate": 1.8367693839391954e-06, |
|
"loss": 0.0136, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 2.7281998980112188, |
|
"grad_norm": 0.0008971957140602171, |
|
"learning_rate": 1.8124863407882277e-06, |
|
"loss": 0.008, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 2.731842354483864, |
|
"grad_norm": 0.01421735342592001, |
|
"learning_rate": 1.7882032976372602e-06, |
|
"loss": 0.0, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.735484810956509, |
|
"grad_norm": 0.013042074628174305, |
|
"learning_rate": 1.7639202544862924e-06, |
|
"loss": 0.0069, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 2.7391272674291542, |
|
"grad_norm": 0.0022202201653271914, |
|
"learning_rate": 1.7396372113353247e-06, |
|
"loss": 0.0001, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 2.7427697239017994, |
|
"grad_norm": 0.0003710546006914228, |
|
"learning_rate": 1.7153541681843568e-06, |
|
"loss": 0.0294, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 2.7464121803744446, |
|
"grad_norm": 0.010792004875838757, |
|
"learning_rate": 1.6910711250333894e-06, |
|
"loss": 0.0001, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 2.7500546368470897, |
|
"grad_norm": 0.0077994223684072495, |
|
"learning_rate": 1.6667880818824217e-06, |
|
"loss": 0.0, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 2.753697093319735, |
|
"grad_norm": 0.00011445289419498295, |
|
"learning_rate": 1.642505038731454e-06, |
|
"loss": 0.0001, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 2.75733954979238, |
|
"grad_norm": 0.00014677188300993294, |
|
"learning_rate": 1.6182219955804862e-06, |
|
"loss": 0.0094, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 2.760982006265025, |
|
"grad_norm": 0.0009094238630495965, |
|
"learning_rate": 1.5939389524295185e-06, |
|
"loss": 0.0139, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 2.7646244627376704, |
|
"grad_norm": 0.0006197712500579655, |
|
"learning_rate": 1.569655909278551e-06, |
|
"loss": 0.0047, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 2.7682669192103155, |
|
"grad_norm": 0.0009232255397364497, |
|
"learning_rate": 1.5453728661275832e-06, |
|
"loss": 0.0056, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.7719093756829607, |
|
"grad_norm": 0.027552172541618347, |
|
"learning_rate": 1.5210898229766155e-06, |
|
"loss": 0.0111, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 2.775551832155606, |
|
"grad_norm": 0.001112232101149857, |
|
"learning_rate": 1.4968067798256478e-06, |
|
"loss": 0.0002, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 2.779194288628251, |
|
"grad_norm": 0.00015614053700119257, |
|
"learning_rate": 1.4725237366746804e-06, |
|
"loss": 0.002, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 2.782836745100896, |
|
"grad_norm": 0.0003652343002613634, |
|
"learning_rate": 1.4482406935237125e-06, |
|
"loss": 0.0057, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 2.7864792015735413, |
|
"grad_norm": 0.00010209327592747286, |
|
"learning_rate": 1.4239576503727448e-06, |
|
"loss": 0.0192, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 2.7901216580461865, |
|
"grad_norm": 0.00022443008492700756, |
|
"learning_rate": 1.399674607221777e-06, |
|
"loss": 0.0001, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 2.7937641145188317, |
|
"grad_norm": 0.0005925059085711837, |
|
"learning_rate": 1.3753915640708095e-06, |
|
"loss": 0.0263, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 2.797406570991477, |
|
"grad_norm": 0.00010681914136512205, |
|
"learning_rate": 1.3511085209198418e-06, |
|
"loss": 0.0001, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 2.8010490274641215, |
|
"grad_norm": 0.0002675318391993642, |
|
"learning_rate": 1.3268254777688742e-06, |
|
"loss": 0.0109, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 2.804691483936767, |
|
"grad_norm": 0.0004106853739358485, |
|
"learning_rate": 1.3025424346179063e-06, |
|
"loss": 0.0001, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.808333940409412, |
|
"grad_norm": 0.00042042654240503907, |
|
"learning_rate": 1.2782593914669386e-06, |
|
"loss": 0.0, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 2.8119763968820575, |
|
"grad_norm": 0.0010237845126539469, |
|
"learning_rate": 1.2539763483159712e-06, |
|
"loss": 0.0001, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 2.815618853354702, |
|
"grad_norm": 0.0004960985388606787, |
|
"learning_rate": 1.2296933051650033e-06, |
|
"loss": 0.0171, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 2.819261309827348, |
|
"grad_norm": 0.0009406881290487945, |
|
"learning_rate": 1.2054102620140356e-06, |
|
"loss": 0.015, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 2.8229037662999925, |
|
"grad_norm": 0.00037287117447704077, |
|
"learning_rate": 1.181127218863068e-06, |
|
"loss": 0.0072, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 2.826546222772638, |
|
"grad_norm": 0.00017823204689193517, |
|
"learning_rate": 1.1568441757121003e-06, |
|
"loss": 0.0065, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 2.830188679245283, |
|
"grad_norm": 0.0005524320295080543, |
|
"learning_rate": 1.1325611325611326e-06, |
|
"loss": 0.0001, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 2.8338311357179284, |
|
"grad_norm": 0.05926033854484558, |
|
"learning_rate": 1.108278089410165e-06, |
|
"loss": 0.0109, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 2.837473592190573, |
|
"grad_norm": 0.00016377720749005675, |
|
"learning_rate": 1.0839950462591973e-06, |
|
"loss": 0.0106, |
|
"step": 38950 |
|
}, |
|
{ |
|
"epoch": 2.8411160486632183, |
|
"grad_norm": 0.023527516052126884, |
|
"learning_rate": 1.0597120031082297e-06, |
|
"loss": 0.0073, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.8447585051358635, |
|
"grad_norm": 0.0001331869134446606, |
|
"learning_rate": 1.0354289599572618e-06, |
|
"loss": 0.0055, |
|
"step": 39050 |
|
}, |
|
{ |
|
"epoch": 2.8484009616085086, |
|
"grad_norm": 0.00012386612070258707, |
|
"learning_rate": 1.0111459168062943e-06, |
|
"loss": 0.0032, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 2.852043418081154, |
|
"grad_norm": 0.0007627196027897298, |
|
"learning_rate": 9.868628736553264e-07, |
|
"loss": 0.007, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 2.855685874553799, |
|
"grad_norm": 0.00017398077761754394, |
|
"learning_rate": 9.62579830504359e-07, |
|
"loss": 0.0, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 2.859328331026444, |
|
"grad_norm": 0.00020244729239493608, |
|
"learning_rate": 9.382967873533911e-07, |
|
"loss": 0.0054, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 2.8629707874990893, |
|
"grad_norm": 0.0012788856402039528, |
|
"learning_rate": 9.140137442024236e-07, |
|
"loss": 0.0029, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 2.8666132439717344, |
|
"grad_norm": 0.009543795138597488, |
|
"learning_rate": 8.897307010514558e-07, |
|
"loss": 0.0008, |
|
"step": 39350 |
|
}, |
|
{ |
|
"epoch": 2.8702557004443796, |
|
"grad_norm": 0.0007355239940807223, |
|
"learning_rate": 8.654476579004882e-07, |
|
"loss": 0.0, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 2.8738981569170248, |
|
"grad_norm": 0.0350869782269001, |
|
"learning_rate": 8.411646147495205e-07, |
|
"loss": 0.0001, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 2.87754061338967, |
|
"grad_norm": 0.0003279308439232409, |
|
"learning_rate": 8.168815715985529e-07, |
|
"loss": 0.0141, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.881183069862315, |
|
"grad_norm": 0.00012197823525639251, |
|
"learning_rate": 7.925985284475851e-07, |
|
"loss": 0.0067, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 2.8848255263349603, |
|
"grad_norm": 0.000695875845849514, |
|
"learning_rate": 7.683154852966174e-07, |
|
"loss": 0.0009, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 2.8884679828076054, |
|
"grad_norm": 0.00016589129518251866, |
|
"learning_rate": 7.440324421456498e-07, |
|
"loss": 0.0001, |
|
"step": 39650 |
|
}, |
|
{ |
|
"epoch": 2.8921104392802506, |
|
"grad_norm": 70.67613983154297, |
|
"learning_rate": 7.19749398994682e-07, |
|
"loss": 0.0014, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 2.8957528957528957, |
|
"grad_norm": 0.07578533887863159, |
|
"learning_rate": 6.954663558437145e-07, |
|
"loss": 0.0062, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 2.899395352225541, |
|
"grad_norm": 0.0014393499586731195, |
|
"learning_rate": 6.711833126927467e-07, |
|
"loss": 0.0, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 2.903037808698186, |
|
"grad_norm": 0.0001194909491459839, |
|
"learning_rate": 6.46900269541779e-07, |
|
"loss": 0.0089, |
|
"step": 39850 |
|
}, |
|
{ |
|
"epoch": 2.9066802651708312, |
|
"grad_norm": 0.00014688068768009543, |
|
"learning_rate": 6.226172263908114e-07, |
|
"loss": 0.006, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 2.9103227216434764, |
|
"grad_norm": 0.0015761395916342735, |
|
"learning_rate": 5.983341832398436e-07, |
|
"loss": 0.0002, |
|
"step": 39950 |
|
}, |
|
{ |
|
"epoch": 2.9139651781161215, |
|
"grad_norm": 0.0001682170113781467, |
|
"learning_rate": 5.740511400888759e-07, |
|
"loss": 0.0001, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.9176076345887667, |
|
"grad_norm": 0.002469463273882866, |
|
"learning_rate": 5.497680969379083e-07, |
|
"loss": 0.0013, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 2.921250091061412, |
|
"grad_norm": 0.003323483048006892, |
|
"learning_rate": 5.254850537869406e-07, |
|
"loss": 0.0, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 2.924892547534057, |
|
"grad_norm": 0.00013295111421030015, |
|
"learning_rate": 5.012020106359729e-07, |
|
"loss": 0.0, |
|
"step": 40150 |
|
}, |
|
{ |
|
"epoch": 2.928535004006702, |
|
"grad_norm": 0.00022801663726568222, |
|
"learning_rate": 4.769189674850053e-07, |
|
"loss": 0.0075, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 2.9321774604793474, |
|
"grad_norm": 0.0002737225731834769, |
|
"learning_rate": 4.526359243340376e-07, |
|
"loss": 0.0001, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 2.9358199169519925, |
|
"grad_norm": 0.00039664292125962675, |
|
"learning_rate": 4.2835288118306994e-07, |
|
"loss": 0.0017, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 2.9394623734246377, |
|
"grad_norm": 0.00020792327995877713, |
|
"learning_rate": 4.0406983803210227e-07, |
|
"loss": 0.0, |
|
"step": 40350 |
|
}, |
|
{ |
|
"epoch": 2.943104829897283, |
|
"grad_norm": 0.00011373033339623362, |
|
"learning_rate": 3.7978679488113455e-07, |
|
"loss": 0.0065, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 2.946747286369928, |
|
"grad_norm": 9.335917275166139e-05, |
|
"learning_rate": 3.5550375173016683e-07, |
|
"loss": 0.0061, |
|
"step": 40450 |
|
}, |
|
{ |
|
"epoch": 2.950389742842573, |
|
"grad_norm": 0.00014541124983225018, |
|
"learning_rate": 3.3122070857919917e-07, |
|
"loss": 0.0002, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.9540321993152183, |
|
"grad_norm": 0.011188451200723648, |
|
"learning_rate": 3.0693766542823145e-07, |
|
"loss": 0.013, |
|
"step": 40550 |
|
}, |
|
{ |
|
"epoch": 2.9576746557878635, |
|
"grad_norm": 0.0014574270462617278, |
|
"learning_rate": 2.826546222772638e-07, |
|
"loss": 0.0064, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 2.9613171122605086, |
|
"grad_norm": 0.06211031973361969, |
|
"learning_rate": 2.583715791262961e-07, |
|
"loss": 0.0001, |
|
"step": 40650 |
|
}, |
|
{ |
|
"epoch": 2.964959568733154, |
|
"grad_norm": 0.00016836251597851515, |
|
"learning_rate": 2.3408853597532846e-07, |
|
"loss": 0.0014, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 2.968602025205799, |
|
"grad_norm": 0.00013589864829555154, |
|
"learning_rate": 2.0980549282436076e-07, |
|
"loss": 0.0, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 2.9722444816784437, |
|
"grad_norm": 0.00023493812477681786, |
|
"learning_rate": 1.855224496733931e-07, |
|
"loss": 0.0095, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 2.9758869381510893, |
|
"grad_norm": 0.0005600821459665895, |
|
"learning_rate": 1.6123940652242538e-07, |
|
"loss": 0.0, |
|
"step": 40850 |
|
}, |
|
{ |
|
"epoch": 2.979529394623734, |
|
"grad_norm": 8.820225048111752e-05, |
|
"learning_rate": 1.3695636337145772e-07, |
|
"loss": 0.0044, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 2.9831718510963796, |
|
"grad_norm": 0.00011790011922130361, |
|
"learning_rate": 1.1267332022049004e-07, |
|
"loss": 0.008, |
|
"step": 40950 |
|
}, |
|
{ |
|
"epoch": 2.9868143075690243, |
|
"grad_norm": 0.9950030446052551, |
|
"learning_rate": 8.839027706952236e-08, |
|
"loss": 0.0, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.99045676404167, |
|
"grad_norm": 0.003940385300666094, |
|
"learning_rate": 6.410723391855468e-08, |
|
"loss": 0.0162, |
|
"step": 41050 |
|
}, |
|
{ |
|
"epoch": 2.9940992205143147, |
|
"grad_norm": 0.0008378868806175888, |
|
"learning_rate": 3.9824190767586995e-08, |
|
"loss": 0.0038, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 2.9977416769869603, |
|
"grad_norm": 0.0003403791051823646, |
|
"learning_rate": 1.5541147616619317e-08, |
|
"loss": 0.002, |
|
"step": 41150 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9891821012948697, |
|
"eval_f1": 0.9890836916969897, |
|
"eval_loss": 0.080924853682518, |
|
"eval_precision": 0.9994150580763767, |
|
"eval_recall": 0.9789637390521405, |
|
"eval_runtime": 84.2105, |
|
"eval_samples_per_second": 289.798, |
|
"eval_steps_per_second": 18.121, |
|
"step": 41181 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 41181, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.36409208513239e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|