|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988901220865705, |
|
"eval_steps": 500, |
|
"global_step": 225, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004439511653718091, |
|
"grad_norm": 37.533348083496094, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 1.824, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008879023307436182, |
|
"grad_norm": 46.24335479736328, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.8786, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.013318534961154272, |
|
"grad_norm": 42.045166015625, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 1.7863, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.017758046614872364, |
|
"grad_norm": 8.661040306091309, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 1.2891, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.022197558268590455, |
|
"grad_norm": 3.85249400138855, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 1.2127, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.026637069922308545, |
|
"grad_norm": 7.926912307739258, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 1.2012, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03107658157602664, |
|
"grad_norm": 4.07999324798584, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2275, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03551609322974473, |
|
"grad_norm": 3.668304681777954, |
|
"learning_rate": 1.9998961636899736e-05, |
|
"loss": 1.2301, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03995560488346282, |
|
"grad_norm": 3.8191933631896973, |
|
"learning_rate": 1.9995846763238514e-05, |
|
"loss": 1.18, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04439511653718091, |
|
"grad_norm": 3.0913546085357666, |
|
"learning_rate": 1.9990656025890315e-05, |
|
"loss": 1.1692, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.048834628190899, |
|
"grad_norm": 2.7952189445495605, |
|
"learning_rate": 1.9983390502829168e-05, |
|
"loss": 1.2112, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05327413984461709, |
|
"grad_norm": 2.480602979660034, |
|
"learning_rate": 1.997405170290528e-05, |
|
"loss": 1.2294, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05771365149833518, |
|
"grad_norm": 2.486560344696045, |
|
"learning_rate": 1.9962641565531694e-05, |
|
"loss": 1.222, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06215316315205328, |
|
"grad_norm": 2.59287428855896, |
|
"learning_rate": 1.994916246028154e-05, |
|
"loss": 1.1607, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06659267480577137, |
|
"grad_norm": 2.5444483757019043, |
|
"learning_rate": 1.9933617186395917e-05, |
|
"loss": 1.2571, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07103218645948946, |
|
"grad_norm": 2.5133490562438965, |
|
"learning_rate": 1.9916008972202586e-05, |
|
"loss": 1.1338, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07547169811320754, |
|
"grad_norm": 2.2021095752716064, |
|
"learning_rate": 1.9896341474445526e-05, |
|
"loss": 1.2253, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07991120976692564, |
|
"grad_norm": 2.341919183731079, |
|
"learning_rate": 1.987461877752552e-05, |
|
"loss": 1.2575, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08435072142064373, |
|
"grad_norm": 2.27514386177063, |
|
"learning_rate": 1.985084539265195e-05, |
|
"loss": 1.1895, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08879023307436182, |
|
"grad_norm": 2.0287599563598633, |
|
"learning_rate": 1.982502625690595e-05, |
|
"loss": 1.122, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0932297447280799, |
|
"grad_norm": 2.33461594581604, |
|
"learning_rate": 1.9797166732215078e-05, |
|
"loss": 1.2319, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.097669256381798, |
|
"grad_norm": 2.374116897583008, |
|
"learning_rate": 1.9767272604239823e-05, |
|
"loss": 1.2025, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10210876803551609, |
|
"grad_norm": 2.2956087589263916, |
|
"learning_rate": 1.973535008117207e-05, |
|
"loss": 1.2078, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10654827968923418, |
|
"grad_norm": 2.1211025714874268, |
|
"learning_rate": 1.9701405792445815e-05, |
|
"loss": 1.1912, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11098779134295228, |
|
"grad_norm": 2.2014005184173584, |
|
"learning_rate": 1.9665446787360444e-05, |
|
"loss": 1.1932, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11542730299667037, |
|
"grad_norm": 2.1209704875946045, |
|
"learning_rate": 1.962748053361675e-05, |
|
"loss": 1.2348, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11986681465038845, |
|
"grad_norm": 2.0373430252075195, |
|
"learning_rate": 1.9587514915766124e-05, |
|
"loss": 1.2503, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12430632630410655, |
|
"grad_norm": 2.3817737102508545, |
|
"learning_rate": 1.9545558233573136e-05, |
|
"loss": 1.2131, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12874583795782463, |
|
"grad_norm": 2.1462056636810303, |
|
"learning_rate": 1.950161920029191e-05, |
|
"loss": 1.1736, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.13318534961154274, |
|
"grad_norm": 2.1412036418914795, |
|
"learning_rate": 1.9455706940856602e-05, |
|
"loss": 1.1716, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13762486126526083, |
|
"grad_norm": 2.2089121341705322, |
|
"learning_rate": 1.940783098998643e-05, |
|
"loss": 1.169, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.14206437291897892, |
|
"grad_norm": 2.101334571838379, |
|
"learning_rate": 1.9358001290205542e-05, |
|
"loss": 1.2226, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.146503884572697, |
|
"grad_norm": 2.436990976333618, |
|
"learning_rate": 1.9306228189778255e-05, |
|
"loss": 1.192, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1509433962264151, |
|
"grad_norm": 2.069035768508911, |
|
"learning_rate": 1.925252244055998e-05, |
|
"loss": 1.2419, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.15538290788013318, |
|
"grad_norm": 2.099047899246216, |
|
"learning_rate": 1.9196895195764363e-05, |
|
"loss": 1.2004, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1598224195338513, |
|
"grad_norm": 2.2311224937438965, |
|
"learning_rate": 1.9139358007647085e-05, |
|
"loss": 1.2065, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.16426193118756938, |
|
"grad_norm": 2.1131861209869385, |
|
"learning_rate": 1.907992282510675e-05, |
|
"loss": 1.2158, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16870144284128746, |
|
"grad_norm": 1.924771785736084, |
|
"learning_rate": 1.901860199120344e-05, |
|
"loss": 1.2714, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.17314095449500555, |
|
"grad_norm": 1.9812465906143188, |
|
"learning_rate": 1.8955408240595396e-05, |
|
"loss": 1.2216, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17758046614872364, |
|
"grad_norm": 1.9320334196090698, |
|
"learning_rate": 1.8890354696894374e-05, |
|
"loss": 1.2164, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18201997780244172, |
|
"grad_norm": 1.9847488403320312, |
|
"learning_rate": 1.8823454869940243e-05, |
|
"loss": 1.1705, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1864594894561598, |
|
"grad_norm": 1.973925232887268, |
|
"learning_rate": 1.8754722652995346e-05, |
|
"loss": 1.1984, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.19089900110987792, |
|
"grad_norm": 2.0115818977355957, |
|
"learning_rate": 1.8684172319859258e-05, |
|
"loss": 1.1659, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.195338512763596, |
|
"grad_norm": 2.3217947483062744, |
|
"learning_rate": 1.861181852190451e-05, |
|
"loss": 1.2463, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1997780244173141, |
|
"grad_norm": 2.1839258670806885, |
|
"learning_rate": 1.8537676285033886e-05, |
|
"loss": 1.1657, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.20421753607103219, |
|
"grad_norm": 1.9523669481277466, |
|
"learning_rate": 1.8461761006559982e-05, |
|
"loss": 1.1766, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.20865704772475027, |
|
"grad_norm": 2.1495866775512695, |
|
"learning_rate": 1.838408845200758e-05, |
|
"loss": 1.2138, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.21309655937846836, |
|
"grad_norm": 2.060826539993286, |
|
"learning_rate": 1.8304674751839583e-05, |
|
"loss": 1.1817, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.21753607103218647, |
|
"grad_norm": 2.0041329860687256, |
|
"learning_rate": 1.8223536398107177e-05, |
|
"loss": 1.2327, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.22197558268590456, |
|
"grad_norm": 2.427309989929199, |
|
"learning_rate": 1.8140690241024872e-05, |
|
"loss": 1.2012, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"grad_norm": 2.0693812370300293, |
|
"learning_rate": 1.8056153485471167e-05, |
|
"loss": 1.1806, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.23085460599334073, |
|
"grad_norm": 2.1914312839508057, |
|
"learning_rate": 1.7969943687415575e-05, |
|
"loss": 1.1908, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 2.1551156044006348, |
|
"learning_rate": 1.788207875027274e-05, |
|
"loss": 1.2127, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2397336293007769, |
|
"grad_norm": 2.090184211730957, |
|
"learning_rate": 1.7792576921184374e-05, |
|
"loss": 1.1944, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.244173140954495, |
|
"grad_norm": 1.9871045351028442, |
|
"learning_rate": 1.7701456787229805e-05, |
|
"loss": 1.1736, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2486126526082131, |
|
"grad_norm": 2.1655359268188477, |
|
"learning_rate": 1.7608737271566004e-05, |
|
"loss": 1.2267, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.25305216426193117, |
|
"grad_norm": 2.2310895919799805, |
|
"learning_rate": 1.751443762949772e-05, |
|
"loss": 1.232, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.25749167591564925, |
|
"grad_norm": 2.07645583152771, |
|
"learning_rate": 1.741857744447869e-05, |
|
"loss": 1.1872, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2619311875693674, |
|
"grad_norm": 2.183539628982544, |
|
"learning_rate": 1.732117662404469e-05, |
|
"loss": 1.253, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2663706992230855, |
|
"grad_norm": 2.078212261199951, |
|
"learning_rate": 1.7222255395679298e-05, |
|
"loss": 1.2586, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27081021087680357, |
|
"grad_norm": 1.9478346109390259, |
|
"learning_rate": 1.712183430261319e-05, |
|
"loss": 1.3174, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.27524972253052166, |
|
"grad_norm": 1.9654473066329956, |
|
"learning_rate": 1.7019934199557868e-05, |
|
"loss": 1.2072, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.27968923418423974, |
|
"grad_norm": 2.0590426921844482, |
|
"learning_rate": 1.691657624837472e-05, |
|
"loss": 1.2234, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.28412874583795783, |
|
"grad_norm": 1.9870823621749878, |
|
"learning_rate": 1.6811781913680273e-05, |
|
"loss": 1.2366, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2885682574916759, |
|
"grad_norm": 2.0088582038879395, |
|
"learning_rate": 1.6705572958388576e-05, |
|
"loss": 1.1597, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.293007769145394, |
|
"grad_norm": 2.107692241668701, |
|
"learning_rate": 1.659797143919165e-05, |
|
"loss": 1.1738, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2974472807991121, |
|
"grad_norm": 1.8893859386444092, |
|
"learning_rate": 1.6488999701978905e-05, |
|
"loss": 1.1214, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3018867924528302, |
|
"grad_norm": 1.8780573606491089, |
|
"learning_rate": 1.6378680377196526e-05, |
|
"loss": 1.1799, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.30632630410654826, |
|
"grad_norm": 2.0093393325805664, |
|
"learning_rate": 1.6267036375147728e-05, |
|
"loss": 1.187, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.31076581576026635, |
|
"grad_norm": 1.988927960395813, |
|
"learning_rate": 1.615409088123493e-05, |
|
"loss": 1.2668, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.31520532741398444, |
|
"grad_norm": 1.8668824434280396, |
|
"learning_rate": 1.6039867351144778e-05, |
|
"loss": 1.2241, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3196448390677026, |
|
"grad_norm": 2.02083420753479, |
|
"learning_rate": 1.5924389505977038e-05, |
|
"loss": 1.228, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.32408435072142067, |
|
"grad_norm": 2.080263137817383, |
|
"learning_rate": 1.5807681327318372e-05, |
|
"loss": 1.1584, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.32852386237513875, |
|
"grad_norm": 1.9211077690124512, |
|
"learning_rate": 1.5689767052262028e-05, |
|
"loss": 1.2324, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.33296337402885684, |
|
"grad_norm": 2.1998391151428223, |
|
"learning_rate": 1.557067116837444e-05, |
|
"loss": 1.1829, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3374028856825749, |
|
"grad_norm": 1.973344087600708, |
|
"learning_rate": 1.545041840860986e-05, |
|
"loss": 1.233, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.341842397336293, |
|
"grad_norm": 2.0612385272979736, |
|
"learning_rate": 1.5329033746173975e-05, |
|
"loss": 1.2338, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3462819089900111, |
|
"grad_norm": 2.0529823303222656, |
|
"learning_rate": 1.520654238933767e-05, |
|
"loss": 1.1973, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3507214206437292, |
|
"grad_norm": 1.8720424175262451, |
|
"learning_rate": 1.5082969776201948e-05, |
|
"loss": 1.2133, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3551609322974473, |
|
"grad_norm": 1.9517823457717896, |
|
"learning_rate": 1.4958341569415149e-05, |
|
"loss": 1.2102, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.35960044395116536, |
|
"grad_norm": 1.9612551927566528, |
|
"learning_rate": 1.483268365084351e-05, |
|
"loss": 1.1888, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.36403995560488345, |
|
"grad_norm": 2.1608946323394775, |
|
"learning_rate": 1.4706022116196208e-05, |
|
"loss": 1.2082, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.36847946725860153, |
|
"grad_norm": 1.951665997505188, |
|
"learning_rate": 1.4578383269606004e-05, |
|
"loss": 1.2041, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3729189789123196, |
|
"grad_norm": 1.9137777090072632, |
|
"learning_rate": 1.4449793618166594e-05, |
|
"loss": 1.2091, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 1.9383291006088257, |
|
"learning_rate": 1.4320279866427798e-05, |
|
"loss": 1.1901, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.38179800221975585, |
|
"grad_norm": 1.8352899551391602, |
|
"learning_rate": 1.4189868910849779e-05, |
|
"loss": 1.1741, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.38623751387347394, |
|
"grad_norm": 1.9220095872879028, |
|
"learning_rate": 1.4058587834217356e-05, |
|
"loss": 1.1934, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.390677025527192, |
|
"grad_norm": 1.7801581621170044, |
|
"learning_rate": 1.392646390001569e-05, |
|
"loss": 1.1439, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3951165371809101, |
|
"grad_norm": 1.8106623888015747, |
|
"learning_rate": 1.3793524546768358e-05, |
|
"loss": 1.1316, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3995560488346282, |
|
"grad_norm": 1.822784423828125, |
|
"learning_rate": 1.3659797382339162e-05, |
|
"loss": 1.2326, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4039955604883463, |
|
"grad_norm": 2.061378240585327, |
|
"learning_rate": 1.3525310178198707e-05, |
|
"loss": 1.2022, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.40843507214206437, |
|
"grad_norm": 1.999387264251709, |
|
"learning_rate": 1.3390090863657048e-05, |
|
"loss": 1.2236, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.41287458379578246, |
|
"grad_norm": 1.7549680471420288, |
|
"learning_rate": 1.325416752006351e-05, |
|
"loss": 1.1421, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.41731409544950054, |
|
"grad_norm": 1.8363287448883057, |
|
"learning_rate": 1.311756837497499e-05, |
|
"loss": 1.1325, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.42175360710321863, |
|
"grad_norm": 1.942063808441162, |
|
"learning_rate": 1.2980321796293838e-05, |
|
"loss": 1.1475, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4261931187569367, |
|
"grad_norm": 2.029550790786743, |
|
"learning_rate": 1.284245628637665e-05, |
|
"loss": 1.1715, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4306326304106548, |
|
"grad_norm": 2.0929226875305176, |
|
"learning_rate": 1.2704000476115079e-05, |
|
"loss": 1.1595, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.43507214206437295, |
|
"grad_norm": 2.0529980659484863, |
|
"learning_rate": 1.256498311899001e-05, |
|
"loss": 1.193, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.43951165371809103, |
|
"grad_norm": 1.9034109115600586, |
|
"learning_rate": 1.2425433085100224e-05, |
|
"loss": 1.1566, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4439511653718091, |
|
"grad_norm": 2.2665998935699463, |
|
"learning_rate": 1.2285379355166893e-05, |
|
"loss": 1.1709, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4483906770255272, |
|
"grad_norm": 2.0044686794281006, |
|
"learning_rate": 1.2144851014515055e-05, |
|
"loss": 1.1688, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.4528301886792453, |
|
"grad_norm": 1.9301621913909912, |
|
"learning_rate": 1.2003877247033411e-05, |
|
"loss": 1.1855, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4572697003329634, |
|
"grad_norm": 1.996848464012146, |
|
"learning_rate": 1.1862487329113606e-05, |
|
"loss": 1.1603, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.46170921198668147, |
|
"grad_norm": 2.0279271602630615, |
|
"learning_rate": 1.172071062357035e-05, |
|
"loss": 1.1789, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.46614872364039955, |
|
"grad_norm": 1.9275884628295898, |
|
"learning_rate": 1.1578576573543541e-05, |
|
"loss": 1.1339, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 1.9847296476364136, |
|
"learning_rate": 1.1436114696383749e-05, |
|
"loss": 1.186, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4750277469478357, |
|
"grad_norm": 1.9284783601760864, |
|
"learning_rate": 1.1293354577522264e-05, |
|
"loss": 1.1697, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4794672586015538, |
|
"grad_norm": 1.8462920188903809, |
|
"learning_rate": 1.1150325864327003e-05, |
|
"loss": 1.2233, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.4839067702552719, |
|
"grad_norm": 1.9206749200820923, |
|
"learning_rate": 1.1007058259945584e-05, |
|
"loss": 1.2148, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.48834628190899, |
|
"grad_norm": 1.9734727144241333, |
|
"learning_rate": 1.0863581517136776e-05, |
|
"loss": 1.1367, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.49278579356270813, |
|
"grad_norm": 1.83944833278656, |
|
"learning_rate": 1.0719925432091671e-05, |
|
"loss": 1.1834, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.4972253052164262, |
|
"grad_norm": 1.9524726867675781, |
|
"learning_rate": 1.0576119838245843e-05, |
|
"loss": 1.1046, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5016648168701443, |
|
"grad_norm": 1.8533103466033936, |
|
"learning_rate": 1.043219460008374e-05, |
|
"loss": 1.1694, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5061043285238623, |
|
"grad_norm": 2.0331192016601562, |
|
"learning_rate": 1.0288179606936666e-05, |
|
"loss": 1.2525, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5105438401775805, |
|
"grad_norm": 1.7501027584075928, |
|
"learning_rate": 1.0144104766775574e-05, |
|
"loss": 1.1787, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5149833518312985, |
|
"grad_norm": 1.8049185276031494, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1388, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5194228634850167, |
|
"grad_norm": 1.9093164205551147, |
|
"learning_rate": 9.855895233224431e-06, |
|
"loss": 1.1042, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5238623751387348, |
|
"grad_norm": 1.7633061408996582, |
|
"learning_rate": 9.711820393063337e-06, |
|
"loss": 1.1286, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5283018867924528, |
|
"grad_norm": 1.8089503049850464, |
|
"learning_rate": 9.56780539991626e-06, |
|
"loss": 1.1385, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.532741398446171, |
|
"grad_norm": 1.827928066253662, |
|
"learning_rate": 9.423880161754158e-06, |
|
"loss": 1.1864, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.537180910099889, |
|
"grad_norm": 1.9098917245864868, |
|
"learning_rate": 9.28007456790833e-06, |
|
"loss": 1.1412, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5416204217536071, |
|
"grad_norm": 1.7700555324554443, |
|
"learning_rate": 9.13641848286323e-06, |
|
"loss": 1.1733, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5460599334073252, |
|
"grad_norm": 1.829474687576294, |
|
"learning_rate": 8.992941740054418e-06, |
|
"loss": 1.1297, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5504994450610433, |
|
"grad_norm": 1.9246844053268433, |
|
"learning_rate": 8.849674135672999e-06, |
|
"loss": 1.2096, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5549389567147613, |
|
"grad_norm": 1.9404821395874023, |
|
"learning_rate": 8.706645422477739e-06, |
|
"loss": 1.1531, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5593784683684795, |
|
"grad_norm": 1.8768739700317383, |
|
"learning_rate": 8.563885303616253e-06, |
|
"loss": 1.1896, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5638179800221975, |
|
"grad_norm": 1.9277173280715942, |
|
"learning_rate": 8.42142342645646e-06, |
|
"loss": 1.112, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5682574916759157, |
|
"grad_norm": 1.9244784116744995, |
|
"learning_rate": 8.279289376429653e-06, |
|
"loss": 1.1457, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5726970033296337, |
|
"grad_norm": 1.856616497039795, |
|
"learning_rate": 8.137512670886397e-06, |
|
"loss": 1.139, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5771365149833518, |
|
"grad_norm": 1.7110472917556763, |
|
"learning_rate": 7.996122752966596e-06, |
|
"loss": 1.1181, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.58157602663707, |
|
"grad_norm": 1.7752193212509155, |
|
"learning_rate": 7.855148985484946e-06, |
|
"loss": 1.1517, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.586015538290788, |
|
"grad_norm": 1.789296269416809, |
|
"learning_rate": 7.71462064483311e-06, |
|
"loss": 1.0982, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5904550499445061, |
|
"grad_norm": 2.0241036415100098, |
|
"learning_rate": 7.574566914899779e-06, |
|
"loss": 1.1787, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5948945615982242, |
|
"grad_norm": 2.217411518096924, |
|
"learning_rate": 7.4350168810099955e-06, |
|
"loss": 1.1871, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5993340732519423, |
|
"grad_norm": 1.8291338682174683, |
|
"learning_rate": 7.295999523884921e-06, |
|
"loss": 1.1396, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6037735849056604, |
|
"grad_norm": 1.818039059638977, |
|
"learning_rate": 7.157543713623353e-06, |
|
"loss": 1.1719, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6082130965593785, |
|
"grad_norm": 1.8456182479858398, |
|
"learning_rate": 7.019678203706164e-06, |
|
"loss": 1.1714, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6126526082130965, |
|
"grad_norm": 1.9036660194396973, |
|
"learning_rate": 6.882431625025016e-06, |
|
"loss": 1.1439, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6170921198668147, |
|
"grad_norm": 1.7687510251998901, |
|
"learning_rate": 6.745832479936492e-06, |
|
"loss": 1.1511, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6215316315205327, |
|
"grad_norm": 1.9004747867584229, |
|
"learning_rate": 6.609909136342956e-06, |
|
"loss": 1.172, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6259711431742508, |
|
"grad_norm": 1.736116886138916, |
|
"learning_rate": 6.474689821801295e-06, |
|
"loss": 1.1422, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6304106548279689, |
|
"grad_norm": 2.1278252601623535, |
|
"learning_rate": 6.340202617660842e-06, |
|
"loss": 1.1553, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.634850166481687, |
|
"grad_norm": 1.7536156177520752, |
|
"learning_rate": 6.206475453231644e-06, |
|
"loss": 1.2252, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6392896781354052, |
|
"grad_norm": 1.6779848337173462, |
|
"learning_rate": 6.073536099984314e-06, |
|
"loss": 1.193, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6437291897891232, |
|
"grad_norm": 1.8742091655731201, |
|
"learning_rate": 5.941412165782645e-06, |
|
"loss": 1.1921, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6481687014428413, |
|
"grad_norm": 1.8728663921356201, |
|
"learning_rate": 5.810131089150228e-06, |
|
"loss": 1.1906, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6526082130965594, |
|
"grad_norm": 1.6480120420455933, |
|
"learning_rate": 5.6797201335722064e-06, |
|
"loss": 1.1297, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6570477247502775, |
|
"grad_norm": 1.6961437463760376, |
|
"learning_rate": 5.550206381833409e-06, |
|
"loss": 1.213, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6614872364039955, |
|
"grad_norm": 1.7690813541412354, |
|
"learning_rate": 5.421616730394e-06, |
|
"loss": 1.1643, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6659267480577137, |
|
"grad_norm": 1.621023178100586, |
|
"learning_rate": 5.293977883803797e-06, |
|
"loss": 1.1866, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6703662597114317, |
|
"grad_norm": 1.6755465269088745, |
|
"learning_rate": 5.167316349156495e-06, |
|
"loss": 1.1487, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.6748057713651499, |
|
"grad_norm": 1.739989995956421, |
|
"learning_rate": 5.041658430584852e-06, |
|
"loss": 1.148, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6792452830188679, |
|
"grad_norm": 1.6027244329452515, |
|
"learning_rate": 4.917030223798057e-06, |
|
"loss": 1.1462, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.683684794672586, |
|
"grad_norm": 1.7501620054244995, |
|
"learning_rate": 4.793457610662334e-06, |
|
"loss": 1.1887, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6881243063263041, |
|
"grad_norm": 1.8939120769500732, |
|
"learning_rate": 4.670966253826027e-06, |
|
"loss": 1.0909, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6925638179800222, |
|
"grad_norm": 1.7964297533035278, |
|
"learning_rate": 4.549581591390142e-06, |
|
"loss": 1.1357, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6970033296337403, |
|
"grad_norm": 1.8703395128250122, |
|
"learning_rate": 4.429328831625565e-06, |
|
"loss": 1.1529, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7014428412874584, |
|
"grad_norm": 1.676789402961731, |
|
"learning_rate": 4.310232947737979e-06, |
|
"loss": 1.1435, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 1.8037596940994263, |
|
"learning_rate": 4.192318672681631e-06, |
|
"loss": 1.1878, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7103218645948945, |
|
"grad_norm": 1.6602219343185425, |
|
"learning_rate": 4.0756104940229645e-06, |
|
"loss": 1.1887, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7147613762486127, |
|
"grad_norm": 1.8436843156814575, |
|
"learning_rate": 3.960132648855226e-06, |
|
"loss": 1.1406, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7192008879023307, |
|
"grad_norm": 1.7904314994812012, |
|
"learning_rate": 3.845909118765073e-06, |
|
"loss": 1.149, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7236403995560489, |
|
"grad_norm": 1.7600526809692383, |
|
"learning_rate": 3.732963624852275e-06, |
|
"loss": 1.136, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7280799112097669, |
|
"grad_norm": 1.912719488143921, |
|
"learning_rate": 3.6213196228034796e-06, |
|
"loss": 1.1052, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.732519422863485, |
|
"grad_norm": 1.8316577672958374, |
|
"learning_rate": 3.511000298021098e-06, |
|
"loss": 1.115, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7369589345172031, |
|
"grad_norm": 1.9175002574920654, |
|
"learning_rate": 3.402028560808357e-06, |
|
"loss": 1.1997, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7413984461709212, |
|
"grad_norm": 1.7501415014266968, |
|
"learning_rate": 3.2944270416114256e-06, |
|
"loss": 1.1601, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7458379578246392, |
|
"grad_norm": 1.8293687105178833, |
|
"learning_rate": 3.1882180863197308e-06, |
|
"loss": 1.131, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7502774694783574, |
|
"grad_norm": 1.7824769020080566, |
|
"learning_rate": 3.0834237516252817e-06, |
|
"loss": 1.1655, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 1.672102689743042, |
|
"learning_rate": 2.980065800442137e-06, |
|
"loss": 1.1496, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7591564927857936, |
|
"grad_norm": 1.8063626289367676, |
|
"learning_rate": 2.878165697386812e-06, |
|
"loss": 1.1314, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7635960044395117, |
|
"grad_norm": 1.9365928173065186, |
|
"learning_rate": 2.777744604320706e-06, |
|
"loss": 1.2001, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7680355160932297, |
|
"grad_norm": 2.0963966846466064, |
|
"learning_rate": 2.678823375955314e-06, |
|
"loss": 1.1451, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7724750277469479, |
|
"grad_norm": 1.6925102472305298, |
|
"learning_rate": 2.581422555521316e-06, |
|
"loss": 1.1666, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7769145394006659, |
|
"grad_norm": 1.7280200719833374, |
|
"learning_rate": 2.485562370502279e-06, |
|
"loss": 1.1394, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.781354051054384, |
|
"grad_norm": 2.9482946395874023, |
|
"learning_rate": 2.391262728433995e-06, |
|
"loss": 1.1453, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.7857935627081021, |
|
"grad_norm": 1.7591135501861572, |
|
"learning_rate": 2.2985432127701945e-06, |
|
"loss": 1.1422, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7902330743618202, |
|
"grad_norm": 1.9076871871948242, |
|
"learning_rate": 2.2074230788156325e-06, |
|
"loss": 1.133, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7946725860155383, |
|
"grad_norm": 1.7988766431808472, |
|
"learning_rate": 2.1179212497272582e-06, |
|
"loss": 1.1535, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.7991120976692564, |
|
"grad_norm": 1.9537118673324585, |
|
"learning_rate": 2.030056312584424e-06, |
|
"loss": 1.1871, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8035516093229744, |
|
"grad_norm": 1.6785671710968018, |
|
"learning_rate": 1.9438465145288377e-06, |
|
"loss": 1.0986, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8079911209766926, |
|
"grad_norm": 1.8023606538772583, |
|
"learning_rate": 1.8593097589751318e-06, |
|
"loss": 1.118, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8124306326304107, |
|
"grad_norm": 1.6982240676879883, |
|
"learning_rate": 1.7764636018928249e-06, |
|
"loss": 1.1521, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8168701442841287, |
|
"grad_norm": 1.7196639776229858, |
|
"learning_rate": 1.6953252481604198e-06, |
|
"loss": 1.1243, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8213096559378469, |
|
"grad_norm": 1.6617742776870728, |
|
"learning_rate": 1.6159115479924259e-06, |
|
"loss": 1.1496, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8257491675915649, |
|
"grad_norm": 1.8764078617095947, |
|
"learning_rate": 1.5382389934400199e-06, |
|
"loss": 1.1417, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8301886792452831, |
|
"grad_norm": 1.743654489517212, |
|
"learning_rate": 1.462323714966114e-06, |
|
"loss": 1.1322, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8346281908990011, |
|
"grad_norm": 1.691676139831543, |
|
"learning_rate": 1.3881814780954916e-06, |
|
"loss": 1.104, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8390677025527192, |
|
"grad_norm": 1.6910086870193481, |
|
"learning_rate": 1.3158276801407432e-06, |
|
"loss": 1.181, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8435072142064373, |
|
"grad_norm": 1.7811787128448486, |
|
"learning_rate": 1.2452773470046543e-06, |
|
"loss": 1.2015, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8479467258601554, |
|
"grad_norm": 1.8758302927017212, |
|
"learning_rate": 1.1765451300597574e-06, |
|
"loss": 1.1697, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8523862375138734, |
|
"grad_norm": 1.7708605527877808, |
|
"learning_rate": 1.1096453031056265e-06, |
|
"loss": 1.1007, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8568257491675916, |
|
"grad_norm": 1.7193588018417358, |
|
"learning_rate": 1.0445917594046073e-06, |
|
"loss": 1.1135, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8612652608213096, |
|
"grad_norm": 1.6113449335098267, |
|
"learning_rate": 9.813980087965625e-07, |
|
"loss": 1.1501, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8657047724750278, |
|
"grad_norm": 1.726749062538147, |
|
"learning_rate": 9.200771748932513e-07, |
|
"loss": 1.0861, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8701442841287459, |
|
"grad_norm": 1.7274186611175537, |
|
"learning_rate": 8.606419923529175e-07, |
|
"loss": 1.1256, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8745837957824639, |
|
"grad_norm": 1.7909616231918335, |
|
"learning_rate": 8.031048042356393e-07, |
|
"loss": 1.1844, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.8790233074361821, |
|
"grad_norm": 1.769081473350525, |
|
"learning_rate": 7.474775594400252e-07, |
|
"loss": 1.1142, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.8834628190899001, |
|
"grad_norm": 1.5884616374969482, |
|
"learning_rate": 6.937718102217461e-07, |
|
"loss": 1.1735, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.8879023307436182, |
|
"grad_norm": 1.8353629112243652, |
|
"learning_rate": 6.41998709794458e-07, |
|
"loss": 1.1656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8923418423973363, |
|
"grad_norm": 1.836683988571167, |
|
"learning_rate": 5.921690100135713e-07, |
|
"loss": 1.1665, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.8967813540510544, |
|
"grad_norm": 1.7683050632476807, |
|
"learning_rate": 5.442930591433992e-07, |
|
"loss": 1.0818, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9012208657047724, |
|
"grad_norm": 1.7694001197814941, |
|
"learning_rate": 4.983807997080925e-07, |
|
"loss": 1.1625, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9056603773584906, |
|
"grad_norm": 1.639148473739624, |
|
"learning_rate": 4.544417664268652e-07, |
|
"loss": 1.1001, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9100998890122086, |
|
"grad_norm": 1.682405710220337, |
|
"learning_rate": 4.124850842338779e-07, |
|
"loss": 1.1392, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.9145394006659268, |
|
"grad_norm": 1.705399751663208, |
|
"learning_rate": 3.725194663832521e-07, |
|
"loss": 1.1308, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.9189789123196448, |
|
"grad_norm": 1.77009117603302, |
|
"learning_rate": 3.345532126395579e-07, |
|
"loss": 1.1334, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.9234184239733629, |
|
"grad_norm": 1.6911001205444336, |
|
"learning_rate": 2.985942075541848e-07, |
|
"loss": 1.1135, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.9278579356270811, |
|
"grad_norm": 1.6834074258804321, |
|
"learning_rate": 2.646499188279328e-07, |
|
"loss": 1.1604, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.9322974472807991, |
|
"grad_norm": 1.6770280599594116, |
|
"learning_rate": 2.3272739576017945e-07, |
|
"loss": 1.1296, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9367369589345172, |
|
"grad_norm": 1.8066668510437012, |
|
"learning_rate": 2.028332677849254e-07, |
|
"loss": 1.1261, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 1.8163933753967285, |
|
"learning_rate": 1.7497374309405346e-07, |
|
"loss": 1.174, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9456159822419534, |
|
"grad_norm": 1.6583095788955688, |
|
"learning_rate": 1.49154607348051e-07, |
|
"loss": 1.1626, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9500554938956715, |
|
"grad_norm": 1.7477093935012817, |
|
"learning_rate": 1.2538122247448325e-07, |
|
"loss": 1.1512, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9544950055493896, |
|
"grad_norm": 1.6833035945892334, |
|
"learning_rate": 1.0365852555447642e-07, |
|
"loss": 1.116, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9589345172031076, |
|
"grad_norm": 1.6046544313430786, |
|
"learning_rate": 8.39910277974132e-08, |
|
"loss": 1.1263, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.9633740288568258, |
|
"grad_norm": 1.6821755170822144, |
|
"learning_rate": 6.638281360408339e-08, |
|
"loss": 1.1357, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.9678135405105438, |
|
"grad_norm": 1.7433803081512451, |
|
"learning_rate": 5.083753971846239e-08, |
|
"loss": 1.0744, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.9722530521642619, |
|
"grad_norm": 1.7408146858215332, |
|
"learning_rate": 3.735843446830867e-08, |
|
"loss": 1.1356, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.97669256381798, |
|
"grad_norm": 1.7871052026748657, |
|
"learning_rate": 2.5948297094724463e-08, |
|
"loss": 1.1227, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9811320754716981, |
|
"grad_norm": 1.5904314517974854, |
|
"learning_rate": 1.6609497170834154e-08, |
|
"loss": 1.0993, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.9855715871254163, |
|
"grad_norm": 1.8538672924041748, |
|
"learning_rate": 9.343974109685684e-09, |
|
"loss": 1.1632, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.9900110987791343, |
|
"grad_norm": 1.6794443130493164, |
|
"learning_rate": 4.153236761488266e-09, |
|
"loss": 1.0994, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.9944506104328524, |
|
"grad_norm": 1.6343417167663574, |
|
"learning_rate": 1.0383631002686133e-09, |
|
"loss": 1.1072, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.9988901220865705, |
|
"grad_norm": 1.9944053888320923, |
|
"learning_rate": 0.0, |
|
"loss": 1.1393, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9988901220865705, |
|
"step": 225, |
|
"total_flos": 8.515440547840655e+17, |
|
"train_loss": 1.1814873923195732, |
|
"train_runtime": 2241.1241, |
|
"train_samples_per_second": 25.717, |
|
"train_steps_per_second": 0.1 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 225, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.515440547840655e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|