ruizhe_simplier_dsNsy32kCln32k_QwQNsy32kCln32k_DSCln_QWQCln_Qwen7B_summarized_sft
/
trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 2.993417721518987, | |
"eval_steps": 500, | |
"global_step": 987, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0030379746835443038, | |
"grad_norm": 0.7371621381648503, | |
"learning_rate": 1.0101010101010103e-07, | |
"loss": 1.2715, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.0060759493670886075, | |
"grad_norm": 0.7121243856267792, | |
"learning_rate": 2.0202020202020205e-07, | |
"loss": 1.2093, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.009113924050632912, | |
"grad_norm": 0.6895641068732327, | |
"learning_rate": 3.0303030303030305e-07, | |
"loss": 1.1612, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.012151898734177215, | |
"grad_norm": 0.7547606851238392, | |
"learning_rate": 4.040404040404041e-07, | |
"loss": 1.2536, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.015189873417721518, | |
"grad_norm": 0.7458825481381227, | |
"learning_rate": 5.05050505050505e-07, | |
"loss": 1.2661, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.018227848101265823, | |
"grad_norm": 0.714548498437573, | |
"learning_rate": 6.060606060606061e-07, | |
"loss": 1.2054, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.021265822784810127, | |
"grad_norm": 0.793863863197656, | |
"learning_rate": 7.070707070707071e-07, | |
"loss": 1.2581, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.02430379746835443, | |
"grad_norm": 0.7243675712297406, | |
"learning_rate": 8.080808080808082e-07, | |
"loss": 1.2837, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.027341772151898733, | |
"grad_norm": 0.6869719885583234, | |
"learning_rate": 9.090909090909091e-07, | |
"loss": 1.1936, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.030379746835443037, | |
"grad_norm": 0.7740032140637244, | |
"learning_rate": 1.01010101010101e-06, | |
"loss": 1.2754, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.033417721518987344, | |
"grad_norm": 0.6974671694839412, | |
"learning_rate": 1.111111111111111e-06, | |
"loss": 1.2239, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.03645569620253165, | |
"grad_norm": 0.7177684057673752, | |
"learning_rate": 1.2121212121212122e-06, | |
"loss": 1.2018, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.03949367088607595, | |
"grad_norm": 0.6643882501789584, | |
"learning_rate": 1.3131313131313134e-06, | |
"loss": 1.1589, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.042531645569620254, | |
"grad_norm": 0.5987712476363073, | |
"learning_rate": 1.4141414141414143e-06, | |
"loss": 1.161, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.04556962025316456, | |
"grad_norm": 0.4443403364468464, | |
"learning_rate": 1.5151515151515152e-06, | |
"loss": 1.0032, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.04860759493670886, | |
"grad_norm": 0.4588586719890799, | |
"learning_rate": 1.6161616161616164e-06, | |
"loss": 1.031, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.051645569620253164, | |
"grad_norm": 0.4586292669176633, | |
"learning_rate": 1.7171717171717173e-06, | |
"loss": 1.0449, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.05468354430379747, | |
"grad_norm": 0.45980959938717203, | |
"learning_rate": 1.8181818181818183e-06, | |
"loss": 1.0574, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.05772151898734177, | |
"grad_norm": 0.38993134077596253, | |
"learning_rate": 1.9191919191919192e-06, | |
"loss": 1.0204, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.060759493670886074, | |
"grad_norm": 0.28486435349906397, | |
"learning_rate": 2.02020202020202e-06, | |
"loss": 0.9535, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.06379746835443038, | |
"grad_norm": 0.3428358025619933, | |
"learning_rate": 2.1212121212121216e-06, | |
"loss": 0.9261, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.06683544303797469, | |
"grad_norm": 0.3956603548338995, | |
"learning_rate": 2.222222222222222e-06, | |
"loss": 0.9615, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.06987341772151899, | |
"grad_norm": 0.3420435076099625, | |
"learning_rate": 2.3232323232323234e-06, | |
"loss": 0.9009, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.0729113924050633, | |
"grad_norm": 0.344749396549744, | |
"learning_rate": 2.4242424242424244e-06, | |
"loss": 1.0114, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.0759493670886076, | |
"grad_norm": 0.3499048703598331, | |
"learning_rate": 2.5252525252525258e-06, | |
"loss": 0.8631, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.0789873417721519, | |
"grad_norm": 0.3509790956435934, | |
"learning_rate": 2.6262626262626267e-06, | |
"loss": 0.8845, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.0820253164556962, | |
"grad_norm": 0.32805162453584236, | |
"learning_rate": 2.7272727272727272e-06, | |
"loss": 0.9578, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.08506329113924051, | |
"grad_norm": 0.28390672295466896, | |
"learning_rate": 2.8282828282828286e-06, | |
"loss": 0.9361, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.08810126582278481, | |
"grad_norm": 0.3332420487428435, | |
"learning_rate": 2.9292929292929295e-06, | |
"loss": 0.9165, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.09113924050632911, | |
"grad_norm": 0.3403684512401233, | |
"learning_rate": 3.0303030303030305e-06, | |
"loss": 0.8838, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.09417721518987342, | |
"grad_norm": 0.3400158932791219, | |
"learning_rate": 3.131313131313132e-06, | |
"loss": 0.9255, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.09721518987341772, | |
"grad_norm": 0.3548904001791882, | |
"learning_rate": 3.232323232323233e-06, | |
"loss": 0.9086, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.10025316455696202, | |
"grad_norm": 0.3668768693049186, | |
"learning_rate": 3.3333333333333333e-06, | |
"loss": 0.9455, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.10329113924050633, | |
"grad_norm": 0.2927732263854858, | |
"learning_rate": 3.4343434343434347e-06, | |
"loss": 0.883, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.10632911392405063, | |
"grad_norm": 0.2973135953536413, | |
"learning_rate": 3.5353535353535356e-06, | |
"loss": 0.9051, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.10936708860759493, | |
"grad_norm": 0.2708836297442042, | |
"learning_rate": 3.6363636363636366e-06, | |
"loss": 0.8701, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.11240506329113924, | |
"grad_norm": 0.22016073179409937, | |
"learning_rate": 3.737373737373738e-06, | |
"loss": 0.8791, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.11544303797468354, | |
"grad_norm": 0.21138154937295342, | |
"learning_rate": 3.8383838383838385e-06, | |
"loss": 0.897, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.11848101265822784, | |
"grad_norm": 0.19856129432930908, | |
"learning_rate": 3.93939393939394e-06, | |
"loss": 0.8636, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.12151898734177215, | |
"grad_norm": 0.20801873378861047, | |
"learning_rate": 4.04040404040404e-06, | |
"loss": 0.8969, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.12455696202531645, | |
"grad_norm": 0.23001046959007862, | |
"learning_rate": 4.141414141414142e-06, | |
"loss": 0.7474, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.12759493670886077, | |
"grad_norm": 0.1876660825037611, | |
"learning_rate": 4.242424242424243e-06, | |
"loss": 0.8376, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.13063291139240507, | |
"grad_norm": 0.20820658861653177, | |
"learning_rate": 4.343434343434344e-06, | |
"loss": 0.8653, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.13367088607594937, | |
"grad_norm": 0.19688388810132487, | |
"learning_rate": 4.444444444444444e-06, | |
"loss": 0.7882, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.13670886075949368, | |
"grad_norm": 0.1976424340452257, | |
"learning_rate": 4.5454545454545455e-06, | |
"loss": 0.8823, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.13974683544303798, | |
"grad_norm": 0.21760560999512202, | |
"learning_rate": 4.646464646464647e-06, | |
"loss": 0.8675, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.14278481012658228, | |
"grad_norm": 0.19046598302412762, | |
"learning_rate": 4.747474747474748e-06, | |
"loss": 0.8358, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.1458227848101266, | |
"grad_norm": 0.17689714015085062, | |
"learning_rate": 4.848484848484849e-06, | |
"loss": 0.8077, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.1488607594936709, | |
"grad_norm": 0.18328478564193937, | |
"learning_rate": 4.94949494949495e-06, | |
"loss": 0.8297, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.1518987341772152, | |
"grad_norm": 0.1774577565120857, | |
"learning_rate": 5.0505050505050515e-06, | |
"loss": 0.823, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.1549367088607595, | |
"grad_norm": 0.1703091618116316, | |
"learning_rate": 5.151515151515152e-06, | |
"loss": 0.8012, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.1579746835443038, | |
"grad_norm": 0.18450831545374227, | |
"learning_rate": 5.252525252525253e-06, | |
"loss": 0.8383, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.1610126582278481, | |
"grad_norm": 0.17118317616212286, | |
"learning_rate": 5.353535353535354e-06, | |
"loss": 0.7941, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.1640506329113924, | |
"grad_norm": 0.16625903565725803, | |
"learning_rate": 5.4545454545454545e-06, | |
"loss": 0.8171, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.1670886075949367, | |
"grad_norm": 0.185571990325072, | |
"learning_rate": 5.555555555555557e-06, | |
"loss": 0.8695, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.17012658227848101, | |
"grad_norm": 0.16307355925570016, | |
"learning_rate": 5.656565656565657e-06, | |
"loss": 0.7919, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.17316455696202532, | |
"grad_norm": 0.1628781595142016, | |
"learning_rate": 5.7575757575757586e-06, | |
"loss": 0.8181, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.17620253164556962, | |
"grad_norm": 0.15474465013909197, | |
"learning_rate": 5.858585858585859e-06, | |
"loss": 0.8249, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.17924050632911392, | |
"grad_norm": 0.1639462614140778, | |
"learning_rate": 5.95959595959596e-06, | |
"loss": 0.834, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.18227848101265823, | |
"grad_norm": 0.18542685941142933, | |
"learning_rate": 6.060606060606061e-06, | |
"loss": 0.8308, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.18531645569620253, | |
"grad_norm": 0.15537864217455533, | |
"learning_rate": 6.1616161616161615e-06, | |
"loss": 0.773, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.18835443037974683, | |
"grad_norm": 0.17436631962110918, | |
"learning_rate": 6.262626262626264e-06, | |
"loss": 0.8021, | |
"step": 62 | |
}, | |
{ | |
"epoch": 0.19139240506329114, | |
"grad_norm": 0.1752248753112386, | |
"learning_rate": 6.363636363636364e-06, | |
"loss": 0.8753, | |
"step": 63 | |
}, | |
{ | |
"epoch": 0.19443037974683544, | |
"grad_norm": 0.1657769895142651, | |
"learning_rate": 6.464646464646466e-06, | |
"loss": 0.8567, | |
"step": 64 | |
}, | |
{ | |
"epoch": 0.19746835443037974, | |
"grad_norm": 0.15913039516725547, | |
"learning_rate": 6.565656565656566e-06, | |
"loss": 0.7868, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.20050632911392405, | |
"grad_norm": 0.17115941323518508, | |
"learning_rate": 6.666666666666667e-06, | |
"loss": 0.8183, | |
"step": 66 | |
}, | |
{ | |
"epoch": 0.20354430379746835, | |
"grad_norm": 0.16253020468901574, | |
"learning_rate": 6.767676767676769e-06, | |
"loss": 0.8366, | |
"step": 67 | |
}, | |
{ | |
"epoch": 0.20658227848101265, | |
"grad_norm": 0.15683183443144275, | |
"learning_rate": 6.868686868686869e-06, | |
"loss": 0.7998, | |
"step": 68 | |
}, | |
{ | |
"epoch": 0.20962025316455696, | |
"grad_norm": 0.14324369162375836, | |
"learning_rate": 6.969696969696971e-06, | |
"loss": 0.8049, | |
"step": 69 | |
}, | |
{ | |
"epoch": 0.21265822784810126, | |
"grad_norm": 0.15382265145278182, | |
"learning_rate": 7.070707070707071e-06, | |
"loss": 0.7915, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.21569620253164556, | |
"grad_norm": 0.15335613187813119, | |
"learning_rate": 7.171717171717172e-06, | |
"loss": 0.8056, | |
"step": 71 | |
}, | |
{ | |
"epoch": 0.21873417721518987, | |
"grad_norm": 0.1537382161073269, | |
"learning_rate": 7.272727272727273e-06, | |
"loss": 0.7986, | |
"step": 72 | |
}, | |
{ | |
"epoch": 0.22177215189873417, | |
"grad_norm": 0.17379425501074097, | |
"learning_rate": 7.373737373737374e-06, | |
"loss": 0.8681, | |
"step": 73 | |
}, | |
{ | |
"epoch": 0.22481012658227847, | |
"grad_norm": 0.15843515964361574, | |
"learning_rate": 7.474747474747476e-06, | |
"loss": 0.7744, | |
"step": 74 | |
}, | |
{ | |
"epoch": 0.22784810126582278, | |
"grad_norm": 0.1751604463972058, | |
"learning_rate": 7.5757575757575764e-06, | |
"loss": 0.7857, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.23088607594936708, | |
"grad_norm": 0.15053732125197122, | |
"learning_rate": 7.676767676767677e-06, | |
"loss": 0.8095, | |
"step": 76 | |
}, | |
{ | |
"epoch": 0.23392405063291138, | |
"grad_norm": 0.1532282564562081, | |
"learning_rate": 7.77777777777778e-06, | |
"loss": 0.7742, | |
"step": 77 | |
}, | |
{ | |
"epoch": 0.2369620253164557, | |
"grad_norm": 0.16579482428765188, | |
"learning_rate": 7.87878787878788e-06, | |
"loss": 0.7522, | |
"step": 78 | |
}, | |
{ | |
"epoch": 0.24, | |
"grad_norm": 0.15723235084773576, | |
"learning_rate": 7.97979797979798e-06, | |
"loss": 0.8157, | |
"step": 79 | |
}, | |
{ | |
"epoch": 0.2430379746835443, | |
"grad_norm": 0.14591068220696474, | |
"learning_rate": 8.08080808080808e-06, | |
"loss": 0.8117, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.2460759493670886, | |
"grad_norm": 0.1506364803840211, | |
"learning_rate": 8.181818181818183e-06, | |
"loss": 0.7671, | |
"step": 81 | |
}, | |
{ | |
"epoch": 0.2491139240506329, | |
"grad_norm": 0.172379076159448, | |
"learning_rate": 8.282828282828283e-06, | |
"loss": 0.8397, | |
"step": 82 | |
}, | |
{ | |
"epoch": 0.2521518987341772, | |
"grad_norm": 0.15429561913459985, | |
"learning_rate": 8.383838383838384e-06, | |
"loss": 0.7627, | |
"step": 83 | |
}, | |
{ | |
"epoch": 0.25518987341772154, | |
"grad_norm": 0.16409881360689876, | |
"learning_rate": 8.484848484848486e-06, | |
"loss": 0.8123, | |
"step": 84 | |
}, | |
{ | |
"epoch": 0.2582278481012658, | |
"grad_norm": 0.15262847761041756, | |
"learning_rate": 8.585858585858587e-06, | |
"loss": 0.7831, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.26126582278481014, | |
"grad_norm": 0.14775969810777986, | |
"learning_rate": 8.686868686868687e-06, | |
"loss": 0.7792, | |
"step": 86 | |
}, | |
{ | |
"epoch": 0.2643037974683544, | |
"grad_norm": 0.16227868746752167, | |
"learning_rate": 8.787878787878788e-06, | |
"loss": 0.7481, | |
"step": 87 | |
}, | |
{ | |
"epoch": 0.26734177215189875, | |
"grad_norm": 0.14781371340131963, | |
"learning_rate": 8.888888888888888e-06, | |
"loss": 0.7692, | |
"step": 88 | |
}, | |
{ | |
"epoch": 0.270379746835443, | |
"grad_norm": 0.15246159049852878, | |
"learning_rate": 8.98989898989899e-06, | |
"loss": 0.7777, | |
"step": 89 | |
}, | |
{ | |
"epoch": 0.27341772151898736, | |
"grad_norm": 0.15261383672120973, | |
"learning_rate": 9.090909090909091e-06, | |
"loss": 0.7863, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.27645569620253163, | |
"grad_norm": 0.1555370312321936, | |
"learning_rate": 9.191919191919193e-06, | |
"loss": 0.7787, | |
"step": 91 | |
}, | |
{ | |
"epoch": 0.27949367088607596, | |
"grad_norm": 0.15449424911977483, | |
"learning_rate": 9.292929292929294e-06, | |
"loss": 0.7489, | |
"step": 92 | |
}, | |
{ | |
"epoch": 0.28253164556962024, | |
"grad_norm": 0.1517952399280153, | |
"learning_rate": 9.393939393939396e-06, | |
"loss": 0.7824, | |
"step": 93 | |
}, | |
{ | |
"epoch": 0.28556962025316457, | |
"grad_norm": 0.14848789600098466, | |
"learning_rate": 9.494949494949497e-06, | |
"loss": 0.7366, | |
"step": 94 | |
}, | |
{ | |
"epoch": 0.28860759493670884, | |
"grad_norm": 0.1578591812892815, | |
"learning_rate": 9.595959595959597e-06, | |
"loss": 0.7596, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.2916455696202532, | |
"grad_norm": 0.15400695563654634, | |
"learning_rate": 9.696969696969698e-06, | |
"loss": 0.7449, | |
"step": 96 | |
}, | |
{ | |
"epoch": 0.29468354430379745, | |
"grad_norm": 0.1622217545636064, | |
"learning_rate": 9.797979797979798e-06, | |
"loss": 0.7852, | |
"step": 97 | |
}, | |
{ | |
"epoch": 0.2977215189873418, | |
"grad_norm": 0.1571414989310096, | |
"learning_rate": 9.8989898989899e-06, | |
"loss": 0.798, | |
"step": 98 | |
}, | |
{ | |
"epoch": 0.30075949367088606, | |
"grad_norm": 0.15369871304753976, | |
"learning_rate": 1e-05, | |
"loss": 0.8095, | |
"step": 99 | |
}, | |
{ | |
"epoch": 0.3037974683544304, | |
"grad_norm": 0.1560154961387438, | |
"learning_rate": 9.999968709437563e-06, | |
"loss": 0.7465, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.30683544303797466, | |
"grad_norm": 0.15573381688358523, | |
"learning_rate": 9.999874838141888e-06, | |
"loss": 0.774, | |
"step": 101 | |
}, | |
{ | |
"epoch": 0.309873417721519, | |
"grad_norm": 0.15879548729475618, | |
"learning_rate": 9.999718387287891e-06, | |
"loss": 0.7867, | |
"step": 102 | |
}, | |
{ | |
"epoch": 0.31291139240506327, | |
"grad_norm": 0.1362205521765718, | |
"learning_rate": 9.999499358833745e-06, | |
"loss": 0.7392, | |
"step": 103 | |
}, | |
{ | |
"epoch": 0.3159493670886076, | |
"grad_norm": 0.1554346438640984, | |
"learning_rate": 9.99921775552086e-06, | |
"loss": 0.7829, | |
"step": 104 | |
}, | |
{ | |
"epoch": 0.3189873417721519, | |
"grad_norm": 0.1504818128917585, | |
"learning_rate": 9.998873580873848e-06, | |
"loss": 0.7264, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.3220253164556962, | |
"grad_norm": 0.16944580979135326, | |
"learning_rate": 9.998466839200474e-06, | |
"loss": 0.7611, | |
"step": 106 | |
}, | |
{ | |
"epoch": 0.3250632911392405, | |
"grad_norm": 0.15892279236814086, | |
"learning_rate": 9.99799753559161e-06, | |
"loss": 0.805, | |
"step": 107 | |
}, | |
{ | |
"epoch": 0.3281012658227848, | |
"grad_norm": 0.14533324182381482, | |
"learning_rate": 9.997465675921163e-06, | |
"loss": 0.7302, | |
"step": 108 | |
}, | |
{ | |
"epoch": 0.3311392405063291, | |
"grad_norm": 0.1593407263801221, | |
"learning_rate": 9.99687126684601e-06, | |
"loss": 0.7783, | |
"step": 109 | |
}, | |
{ | |
"epoch": 0.3341772151898734, | |
"grad_norm": 0.15734266679164916, | |
"learning_rate": 9.99621431580591e-06, | |
"loss": 0.7519, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.3372151898734177, | |
"grad_norm": 0.15737748293585987, | |
"learning_rate": 9.99549483102341e-06, | |
"loss": 0.7683, | |
"step": 111 | |
}, | |
{ | |
"epoch": 0.34025316455696203, | |
"grad_norm": 0.16682595959306512, | |
"learning_rate": 9.994712821503737e-06, | |
"loss": 0.7963, | |
"step": 112 | |
}, | |
{ | |
"epoch": 0.3432911392405063, | |
"grad_norm": 0.1437250564900874, | |
"learning_rate": 9.993868297034709e-06, | |
"loss": 0.7697, | |
"step": 113 | |
}, | |
{ | |
"epoch": 0.34632911392405064, | |
"grad_norm": 0.15783291002114871, | |
"learning_rate": 9.992961268186575e-06, | |
"loss": 0.7735, | |
"step": 114 | |
}, | |
{ | |
"epoch": 0.3493670886075949, | |
"grad_norm": 0.1609814436777924, | |
"learning_rate": 9.991991746311916e-06, | |
"loss": 0.7612, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.35240506329113924, | |
"grad_norm": 0.15950583014151243, | |
"learning_rate": 9.990959743545487e-06, | |
"loss": 0.7478, | |
"step": 116 | |
}, | |
{ | |
"epoch": 0.3554430379746835, | |
"grad_norm": 0.15464679834142525, | |
"learning_rate": 9.989865272804064e-06, | |
"loss": 0.739, | |
"step": 117 | |
}, | |
{ | |
"epoch": 0.35848101265822785, | |
"grad_norm": 0.14458722845111444, | |
"learning_rate": 9.988708347786288e-06, | |
"loss": 0.7105, | |
"step": 118 | |
}, | |
{ | |
"epoch": 0.3615189873417721, | |
"grad_norm": 0.15076065245614487, | |
"learning_rate": 9.9874889829725e-06, | |
"loss": 0.7009, | |
"step": 119 | |
}, | |
{ | |
"epoch": 0.36455696202531646, | |
"grad_norm": 0.148555033903784, | |
"learning_rate": 9.986207193624537e-06, | |
"loss": 0.7433, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.3675949367088608, | |
"grad_norm": 0.16560295407333225, | |
"learning_rate": 9.984862995785564e-06, | |
"loss": 0.8137, | |
"step": 121 | |
}, | |
{ | |
"epoch": 0.37063291139240506, | |
"grad_norm": 0.14662078483089422, | |
"learning_rate": 9.983456406279866e-06, | |
"loss": 0.7226, | |
"step": 122 | |
}, | |
{ | |
"epoch": 0.3736708860759494, | |
"grad_norm": 0.1634791948015343, | |
"learning_rate": 9.981987442712634e-06, | |
"loss": 0.7661, | |
"step": 123 | |
}, | |
{ | |
"epoch": 0.37670886075949367, | |
"grad_norm": 0.14887460537724473, | |
"learning_rate": 9.980456123469743e-06, | |
"loss": 0.7335, | |
"step": 124 | |
}, | |
{ | |
"epoch": 0.379746835443038, | |
"grad_norm": 0.15631938007232998, | |
"learning_rate": 9.978862467717532e-06, | |
"loss": 0.798, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.3827848101265823, | |
"grad_norm": 0.15378166270358495, | |
"learning_rate": 9.977206495402554e-06, | |
"loss": 0.7716, | |
"step": 126 | |
}, | |
{ | |
"epoch": 0.3858227848101266, | |
"grad_norm": 0.15944325856222438, | |
"learning_rate": 9.97548822725133e-06, | |
"loss": 0.8056, | |
"step": 127 | |
}, | |
{ | |
"epoch": 0.3888607594936709, | |
"grad_norm": 0.15806585106656948, | |
"learning_rate": 9.973707684770095e-06, | |
"loss": 0.7598, | |
"step": 128 | |
}, | |
{ | |
"epoch": 0.3918987341772152, | |
"grad_norm": 0.15876456210482656, | |
"learning_rate": 9.971864890244514e-06, | |
"loss": 0.7618, | |
"step": 129 | |
}, | |
{ | |
"epoch": 0.3949367088607595, | |
"grad_norm": 0.15077324094201847, | |
"learning_rate": 9.96995986673942e-06, | |
"loss": 0.7586, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.3979746835443038, | |
"grad_norm": 0.16205966422080534, | |
"learning_rate": 9.967992638098517e-06, | |
"loss": 0.7411, | |
"step": 131 | |
}, | |
{ | |
"epoch": 0.4010126582278481, | |
"grad_norm": 0.15759028020464438, | |
"learning_rate": 9.965963228944077e-06, | |
"loss": 0.762, | |
"step": 132 | |
}, | |
{ | |
"epoch": 0.4040506329113924, | |
"grad_norm": 0.1609850388940067, | |
"learning_rate": 9.963871664676647e-06, | |
"loss": 0.7903, | |
"step": 133 | |
}, | |
{ | |
"epoch": 0.4070886075949367, | |
"grad_norm": 0.15140219730566665, | |
"learning_rate": 9.961717971474714e-06, | |
"loss": 0.7086, | |
"step": 134 | |
}, | |
{ | |
"epoch": 0.41012658227848103, | |
"grad_norm": 0.14653263161882255, | |
"learning_rate": 9.959502176294384e-06, | |
"loss": 0.7214, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.4131645569620253, | |
"grad_norm": 0.14811066816184854, | |
"learning_rate": 9.957224306869053e-06, | |
"loss": 0.7414, | |
"step": 136 | |
}, | |
{ | |
"epoch": 0.41620253164556964, | |
"grad_norm": 0.15595540327858165, | |
"learning_rate": 9.954884391709043e-06, | |
"loss": 0.7836, | |
"step": 137 | |
}, | |
{ | |
"epoch": 0.4192405063291139, | |
"grad_norm": 0.14108541773046018, | |
"learning_rate": 9.95248246010126e-06, | |
"loss": 0.7478, | |
"step": 138 | |
}, | |
{ | |
"epoch": 0.42227848101265825, | |
"grad_norm": 0.15906810703639698, | |
"learning_rate": 9.950018542108818e-06, | |
"loss": 0.7606, | |
"step": 139 | |
}, | |
{ | |
"epoch": 0.4253164556962025, | |
"grad_norm": 0.1528383149198257, | |
"learning_rate": 9.947492668570675e-06, | |
"loss": 0.725, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.42835443037974685, | |
"grad_norm": 0.1546681429241187, | |
"learning_rate": 9.944904871101227e-06, | |
"loss": 0.7558, | |
"step": 141 | |
}, | |
{ | |
"epoch": 0.43139240506329113, | |
"grad_norm": 0.15492591908933548, | |
"learning_rate": 9.94225518208993e-06, | |
"loss": 0.7417, | |
"step": 142 | |
}, | |
{ | |
"epoch": 0.43443037974683546, | |
"grad_norm": 0.14717883243941593, | |
"learning_rate": 9.939543634700891e-06, | |
"loss": 0.6828, | |
"step": 143 | |
}, | |
{ | |
"epoch": 0.43746835443037974, | |
"grad_norm": 0.14977763365685373, | |
"learning_rate": 9.936770262872444e-06, | |
"loss": 0.7945, | |
"step": 144 | |
}, | |
{ | |
"epoch": 0.44050632911392407, | |
"grad_norm": 0.15682104947747366, | |
"learning_rate": 9.933935101316735e-06, | |
"loss": 0.7532, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.44354430379746834, | |
"grad_norm": 0.14584023263645782, | |
"learning_rate": 9.931038185519285e-06, | |
"loss": 0.7403, | |
"step": 146 | |
}, | |
{ | |
"epoch": 0.4465822784810127, | |
"grad_norm": 0.14915701695822176, | |
"learning_rate": 9.928079551738542e-06, | |
"loss": 0.7232, | |
"step": 147 | |
}, | |
{ | |
"epoch": 0.44962025316455695, | |
"grad_norm": 0.1521889025235044, | |
"learning_rate": 9.925059237005437e-06, | |
"loss": 0.7788, | |
"step": 148 | |
}, | |
{ | |
"epoch": 0.4526582278481013, | |
"grad_norm": 0.14655442772334604, | |
"learning_rate": 9.9219772791229e-06, | |
"loss": 0.7531, | |
"step": 149 | |
}, | |
{ | |
"epoch": 0.45569620253164556, | |
"grad_norm": 0.15488348310624783, | |
"learning_rate": 9.91883371666542e-06, | |
"loss": 0.7865, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.4587341772151899, | |
"grad_norm": 0.15556737214012695, | |
"learning_rate": 9.915628588978522e-06, | |
"loss": 0.783, | |
"step": 151 | |
}, | |
{ | |
"epoch": 0.46177215189873416, | |
"grad_norm": 0.14407916946359384, | |
"learning_rate": 9.912361936178312e-06, | |
"loss": 0.7452, | |
"step": 152 | |
}, | |
{ | |
"epoch": 0.4648101265822785, | |
"grad_norm": 0.1515203148718222, | |
"learning_rate": 9.909033799150947e-06, | |
"loss": 0.74, | |
"step": 153 | |
}, | |
{ | |
"epoch": 0.46784810126582277, | |
"grad_norm": 0.161850312633236, | |
"learning_rate": 9.90564421955214e-06, | |
"loss": 0.733, | |
"step": 154 | |
}, | |
{ | |
"epoch": 0.4708860759493671, | |
"grad_norm": 0.1567309547943677, | |
"learning_rate": 9.902193239806634e-06, | |
"loss": 0.7717, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.4739240506329114, | |
"grad_norm": 0.14856948096609965, | |
"learning_rate": 9.898680903107668e-06, | |
"loss": 0.7391, | |
"step": 156 | |
}, | |
{ | |
"epoch": 0.4769620253164557, | |
"grad_norm": 0.15989687538267836, | |
"learning_rate": 9.895107253416434e-06, | |
"loss": 0.827, | |
"step": 157 | |
}, | |
{ | |
"epoch": 0.48, | |
"grad_norm": 0.1497020361061319, | |
"learning_rate": 9.891472335461537e-06, | |
"loss": 0.7261, | |
"step": 158 | |
}, | |
{ | |
"epoch": 0.4830379746835443, | |
"grad_norm": 0.14671522059197523, | |
"learning_rate": 9.887776194738433e-06, | |
"loss": 0.7388, | |
"step": 159 | |
}, | |
{ | |
"epoch": 0.4860759493670886, | |
"grad_norm": 0.15967144252922383, | |
"learning_rate": 9.884018877508844e-06, | |
"loss": 0.7581, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.4891139240506329, | |
"grad_norm": 0.14510079743012144, | |
"learning_rate": 9.8802004308002e-06, | |
"loss": 0.7631, | |
"step": 161 | |
}, | |
{ | |
"epoch": 0.4921518987341772, | |
"grad_norm": 0.14504835357709286, | |
"learning_rate": 9.876320902405041e-06, | |
"loss": 0.727, | |
"step": 162 | |
}, | |
{ | |
"epoch": 0.4951898734177215, | |
"grad_norm": 0.14744404256400437, | |
"learning_rate": 9.872380340880416e-06, | |
"loss": 0.7425, | |
"step": 163 | |
}, | |
{ | |
"epoch": 0.4982278481012658, | |
"grad_norm": 0.15465095712268317, | |
"learning_rate": 9.86837879554728e-06, | |
"loss": 0.784, | |
"step": 164 | |
}, | |
{ | |
"epoch": 0.5012658227848101, | |
"grad_norm": 0.15665467457584256, | |
"learning_rate": 9.864316316489873e-06, | |
"loss": 0.7657, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.5043037974683544, | |
"grad_norm": 0.15635361845956908, | |
"learning_rate": 9.860192954555099e-06, | |
"loss": 0.7587, | |
"step": 166 | |
}, | |
{ | |
"epoch": 0.5073417721518987, | |
"grad_norm": 0.15345265613234227, | |
"learning_rate": 9.856008761351882e-06, | |
"loss": 0.7701, | |
"step": 167 | |
}, | |
{ | |
"epoch": 0.5103797468354431, | |
"grad_norm": 0.1522326179620225, | |
"learning_rate": 9.851763789250526e-06, | |
"loss": 0.7705, | |
"step": 168 | |
}, | |
{ | |
"epoch": 0.5134177215189873, | |
"grad_norm": 0.1538498652648587, | |
"learning_rate": 9.847458091382057e-06, | |
"loss": 0.7369, | |
"step": 169 | |
}, | |
{ | |
"epoch": 0.5164556962025316, | |
"grad_norm": 0.14512401623774593, | |
"learning_rate": 9.843091721637559e-06, | |
"loss": 0.7332, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.5194936708860759, | |
"grad_norm": 0.15457558414252634, | |
"learning_rate": 9.838664734667496e-06, | |
"loss": 0.736, | |
"step": 171 | |
}, | |
{ | |
"epoch": 0.5225316455696203, | |
"grad_norm": 0.14212306520883133, | |
"learning_rate": 9.834177185881033e-06, | |
"loss": 0.7453, | |
"step": 172 | |
}, | |
{ | |
"epoch": 0.5255696202531646, | |
"grad_norm": 0.14603440656705588, | |
"learning_rate": 9.829629131445342e-06, | |
"loss": 0.7801, | |
"step": 173 | |
}, | |
{ | |
"epoch": 0.5286075949367088, | |
"grad_norm": 0.15306906645304788, | |
"learning_rate": 9.825020628284896e-06, | |
"loss": 0.7916, | |
"step": 174 | |
}, | |
{ | |
"epoch": 0.5316455696202531, | |
"grad_norm": 0.14991315878050276, | |
"learning_rate": 9.820351734080754e-06, | |
"loss": 0.7729, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.5346835443037975, | |
"grad_norm": 0.14695678972714699, | |
"learning_rate": 9.81562250726985e-06, | |
"loss": 0.7414, | |
"step": 176 | |
}, | |
{ | |
"epoch": 0.5377215189873418, | |
"grad_norm": 0.16546368468082698, | |
"learning_rate": 9.810833007044247e-06, | |
"loss": 0.76, | |
"step": 177 | |
}, | |
{ | |
"epoch": 0.540759493670886, | |
"grad_norm": 0.14445501453584003, | |
"learning_rate": 9.805983293350413e-06, | |
"loss": 0.7533, | |
"step": 178 | |
}, | |
{ | |
"epoch": 0.5437974683544303, | |
"grad_norm": 0.15241407553611241, | |
"learning_rate": 9.801073426888447e-06, | |
"loss": 0.7512, | |
"step": 179 | |
}, | |
{ | |
"epoch": 0.5468354430379747, | |
"grad_norm": 0.14637055604971663, | |
"learning_rate": 9.796103469111352e-06, | |
"loss": 0.7161, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.549873417721519, | |
"grad_norm": 0.1476130284956656, | |
"learning_rate": 9.791073482224229e-06, | |
"loss": 0.7283, | |
"step": 181 | |
}, | |
{ | |
"epoch": 0.5529113924050633, | |
"grad_norm": 0.14070704406258117, | |
"learning_rate": 9.785983529183533e-06, | |
"loss": 0.7019, | |
"step": 182 | |
}, | |
{ | |
"epoch": 0.5559493670886076, | |
"grad_norm": 0.14820788542442023, | |
"learning_rate": 9.780833673696255e-06, | |
"loss": 0.7223, | |
"step": 183 | |
}, | |
{ | |
"epoch": 0.5589873417721519, | |
"grad_norm": 0.1573533382049134, | |
"learning_rate": 9.775623980219149e-06, | |
"loss": 0.7753, | |
"step": 184 | |
}, | |
{ | |
"epoch": 0.5620253164556962, | |
"grad_norm": 0.1564179366024661, | |
"learning_rate": 9.77035451395791e-06, | |
"loss": 0.7879, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.5650632911392405, | |
"grad_norm": 0.1466584778846727, | |
"learning_rate": 9.76502534086636e-06, | |
"loss": 0.7154, | |
"step": 186 | |
}, | |
{ | |
"epoch": 0.5681012658227849, | |
"grad_norm": 0.14713416182841468, | |
"learning_rate": 9.759636527645633e-06, | |
"loss": 0.743, | |
"step": 187 | |
}, | |
{ | |
"epoch": 0.5711392405063291, | |
"grad_norm": 0.14751084971270478, | |
"learning_rate": 9.754188141743326e-06, | |
"loss": 0.722, | |
"step": 188 | |
}, | |
{ | |
"epoch": 0.5741772151898734, | |
"grad_norm": 0.1462856046848872, | |
"learning_rate": 9.74868025135266e-06, | |
"loss": 0.769, | |
"step": 189 | |
}, | |
{ | |
"epoch": 0.5772151898734177, | |
"grad_norm": 0.15188256888820945, | |
"learning_rate": 9.743112925411633e-06, | |
"loss": 0.7497, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.5802531645569621, | |
"grad_norm": 0.1412145269579806, | |
"learning_rate": 9.737486233602149e-06, | |
"loss": 0.7256, | |
"step": 191 | |
}, | |
{ | |
"epoch": 0.5832911392405064, | |
"grad_norm": 0.13894303122510548, | |
"learning_rate": 9.731800246349149e-06, | |
"loss": 0.7028, | |
"step": 192 | |
}, | |
{ | |
"epoch": 0.5863291139240506, | |
"grad_norm": 0.15164906741349582, | |
"learning_rate": 9.726055034819726e-06, | |
"loss": 0.75, | |
"step": 193 | |
}, | |
{ | |
"epoch": 0.5893670886075949, | |
"grad_norm": 0.16580118267141683, | |
"learning_rate": 9.720250670922242e-06, | |
"loss": 0.7142, | |
"step": 194 | |
}, | |
{ | |
"epoch": 0.5924050632911393, | |
"grad_norm": 0.14663787636584952, | |
"learning_rate": 9.714387227305422e-06, | |
"loss": 0.7843, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.5954430379746836, | |
"grad_norm": 0.14951469931311676, | |
"learning_rate": 9.708464777357444e-06, | |
"loss": 0.7794, | |
"step": 196 | |
}, | |
{ | |
"epoch": 0.5984810126582278, | |
"grad_norm": 0.14830789462210187, | |
"learning_rate": 9.702483395205023e-06, | |
"loss": 0.6941, | |
"step": 197 | |
}, | |
{ | |
"epoch": 0.6015189873417721, | |
"grad_norm": 0.15913553670944275, | |
"learning_rate": 9.696443155712488e-06, | |
"loss": 0.7937, | |
"step": 198 | |
}, | |
{ | |
"epoch": 0.6045569620253165, | |
"grad_norm": 0.15406874010366034, | |
"learning_rate": 9.69034413448083e-06, | |
"loss": 0.7306, | |
"step": 199 | |
}, | |
{ | |
"epoch": 0.6075949367088608, | |
"grad_norm": 0.15479127906595072, | |
"learning_rate": 9.684186407846774e-06, | |
"loss": 0.7444, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.610632911392405, | |
"grad_norm": 0.1473992261753897, | |
"learning_rate": 9.677970052881811e-06, | |
"loss": 0.7291, | |
"step": 201 | |
}, | |
{ | |
"epoch": 0.6136708860759493, | |
"grad_norm": 0.1627317899066151, | |
"learning_rate": 9.67169514739124e-06, | |
"loss": 0.7472, | |
"step": 202 | |
}, | |
{ | |
"epoch": 0.6167088607594937, | |
"grad_norm": 0.14415245040716593, | |
"learning_rate": 9.665361769913187e-06, | |
"loss": 0.6764, | |
"step": 203 | |
}, | |
{ | |
"epoch": 0.619746835443038, | |
"grad_norm": 0.1532282639172276, | |
"learning_rate": 9.658969999717631e-06, | |
"loss": 0.7589, | |
"step": 204 | |
}, | |
{ | |
"epoch": 0.6227848101265823, | |
"grad_norm": 0.14807691817541985, | |
"learning_rate": 9.652519916805406e-06, | |
"loss": 0.7312, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.6258227848101265, | |
"grad_norm": 0.14134727920980444, | |
"learning_rate": 9.6460116019072e-06, | |
"loss": 0.716, | |
"step": 206 | |
}, | |
{ | |
"epoch": 0.6288607594936709, | |
"grad_norm": 0.1520848085758245, | |
"learning_rate": 9.639445136482549e-06, | |
"loss": 0.726, | |
"step": 207 | |
}, | |
{ | |
"epoch": 0.6318987341772152, | |
"grad_norm": 0.15130523844782648, | |
"learning_rate": 9.632820602718806e-06, | |
"loss": 0.7231, | |
"step": 208 | |
}, | |
{ | |
"epoch": 0.6349367088607595, | |
"grad_norm": 0.15175606335359032, | |
"learning_rate": 9.62613808353013e-06, | |
"loss": 0.7642, | |
"step": 209 | |
}, | |
{ | |
"epoch": 0.6379746835443038, | |
"grad_norm": 0.15295189011746751, | |
"learning_rate": 9.619397662556434e-06, | |
"loss": 0.7533, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.6410126582278481, | |
"grad_norm": 0.14763127465770362, | |
"learning_rate": 9.612599424162344e-06, | |
"loss": 0.7357, | |
"step": 211 | |
}, | |
{ | |
"epoch": 0.6440506329113924, | |
"grad_norm": 0.15211730113967573, | |
"learning_rate": 9.60574345343614e-06, | |
"loss": 0.7379, | |
"step": 212 | |
}, | |
{ | |
"epoch": 0.6470886075949367, | |
"grad_norm": 0.15562838145344063, | |
"learning_rate": 9.598829836188694e-06, | |
"loss": 0.7353, | |
"step": 213 | |
}, | |
{ | |
"epoch": 0.650126582278481, | |
"grad_norm": 0.14905205328328655, | |
"learning_rate": 9.591858658952396e-06, | |
"loss": 0.7363, | |
"step": 214 | |
}, | |
{ | |
"epoch": 0.6531645569620254, | |
"grad_norm": 0.13994383951137745, | |
"learning_rate": 9.584830008980068e-06, | |
"loss": 0.7145, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.6562025316455696, | |
"grad_norm": 0.15269517601933946, | |
"learning_rate": 9.577743974243875e-06, | |
"loss": 0.7563, | |
"step": 216 | |
}, | |
{ | |
"epoch": 0.6592405063291139, | |
"grad_norm": 0.1449046463380699, | |
"learning_rate": 9.570600643434217e-06, | |
"loss": 0.7046, | |
"step": 217 | |
}, | |
{ | |
"epoch": 0.6622784810126582, | |
"grad_norm": 0.14626206784218754, | |
"learning_rate": 9.563400105958638e-06, | |
"loss": 0.7466, | |
"step": 218 | |
}, | |
{ | |
"epoch": 0.6653164556962026, | |
"grad_norm": 0.14925265069599608, | |
"learning_rate": 9.55614245194068e-06, | |
"loss": 0.7257, | |
"step": 219 | |
}, | |
{ | |
"epoch": 0.6683544303797468, | |
"grad_norm": 0.14900239365835394, | |
"learning_rate": 9.548827772218772e-06, | |
"loss": 0.7217, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.6713924050632911, | |
"grad_norm": 0.14241944963133804, | |
"learning_rate": 9.541456158345094e-06, | |
"loss": 0.7111, | |
"step": 221 | |
}, | |
{ | |
"epoch": 0.6744303797468354, | |
"grad_norm": 0.16303539816736368, | |
"learning_rate": 9.534027702584425e-06, | |
"loss": 0.7823, | |
"step": 222 | |
}, | |
{ | |
"epoch": 0.6774683544303798, | |
"grad_norm": 0.14757579145970603, | |
"learning_rate": 9.526542497912984e-06, | |
"loss": 0.6862, | |
"step": 223 | |
}, | |
{ | |
"epoch": 0.6805063291139241, | |
"grad_norm": 0.15177966078318864, | |
"learning_rate": 9.51900063801728e-06, | |
"loss": 0.7292, | |
"step": 224 | |
}, | |
{ | |
"epoch": 0.6835443037974683, | |
"grad_norm": 0.14879595092520143, | |
"learning_rate": 9.511402217292927e-06, | |
"loss": 0.7449, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.6865822784810126, | |
"grad_norm": 0.16778174898039588, | |
"learning_rate": 9.503747330843468e-06, | |
"loss": 0.7606, | |
"step": 226 | |
}, | |
{ | |
"epoch": 0.689620253164557, | |
"grad_norm": 0.14557534571840097, | |
"learning_rate": 9.496036074479184e-06, | |
"loss": 0.6848, | |
"step": 227 | |
}, | |
{ | |
"epoch": 0.6926582278481013, | |
"grad_norm": 0.14670373935654174, | |
"learning_rate": 9.488268544715897e-06, | |
"loss": 0.7409, | |
"step": 228 | |
}, | |
{ | |
"epoch": 0.6956962025316455, | |
"grad_norm": 0.15596118224861075, | |
"learning_rate": 9.480444838773753e-06, | |
"loss": 0.7307, | |
"step": 229 | |
}, | |
{ | |
"epoch": 0.6987341772151898, | |
"grad_norm": 0.15028654659712917, | |
"learning_rate": 9.472565054576017e-06, | |
"loss": 0.7461, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.7017721518987342, | |
"grad_norm": 0.14769647649955192, | |
"learning_rate": 9.464629290747844e-06, | |
"loss": 0.7447, | |
"step": 231 | |
}, | |
{ | |
"epoch": 0.7048101265822785, | |
"grad_norm": 0.15075594210150053, | |
"learning_rate": 9.456637646615035e-06, | |
"loss": 0.7116, | |
"step": 232 | |
}, | |
{ | |
"epoch": 0.7078481012658228, | |
"grad_norm": 0.1584095297303283, | |
"learning_rate": 9.448590222202808e-06, | |
"loss": 0.7762, | |
"step": 233 | |
}, | |
{ | |
"epoch": 0.710886075949367, | |
"grad_norm": 0.15185429153178392, | |
"learning_rate": 9.440487118234536e-06, | |
"loss": 0.7322, | |
"step": 234 | |
}, | |
{ | |
"epoch": 0.7139240506329114, | |
"grad_norm": 0.15536713995323584, | |
"learning_rate": 9.432328436130493e-06, | |
"loss": 0.7402, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.7169620253164557, | |
"grad_norm": 0.1367116030241254, | |
"learning_rate": 9.42411427800658e-06, | |
"loss": 0.7325, | |
"step": 236 | |
}, | |
{ | |
"epoch": 0.72, | |
"grad_norm": 0.1485314611100739, | |
"learning_rate": 9.415844746673047e-06, | |
"loss": 0.721, | |
"step": 237 | |
}, | |
{ | |
"epoch": 0.7230379746835442, | |
"grad_norm": 0.1513523719206825, | |
"learning_rate": 9.40751994563321e-06, | |
"loss": 0.7038, | |
"step": 238 | |
}, | |
{ | |
"epoch": 0.7260759493670886, | |
"grad_norm": 0.14790160472336378, | |
"learning_rate": 9.399139979082148e-06, | |
"loss": 0.7623, | |
"step": 239 | |
}, | |
{ | |
"epoch": 0.7291139240506329, | |
"grad_norm": 0.14658046245123366, | |
"learning_rate": 9.390704951905412e-06, | |
"loss": 0.7277, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.7321518987341772, | |
"grad_norm": 0.14490984139556115, | |
"learning_rate": 9.382214969677697e-06, | |
"loss": 0.7079, | |
"step": 241 | |
}, | |
{ | |
"epoch": 0.7351898734177216, | |
"grad_norm": 0.15673563004792315, | |
"learning_rate": 9.37367013866153e-06, | |
"loss": 0.7749, | |
"step": 242 | |
}, | |
{ | |
"epoch": 0.7382278481012658, | |
"grad_norm": 0.1532434622423629, | |
"learning_rate": 9.365070565805941e-06, | |
"loss": 0.7542, | |
"step": 243 | |
}, | |
{ | |
"epoch": 0.7412658227848101, | |
"grad_norm": 0.141142025643933, | |
"learning_rate": 9.356416358745119e-06, | |
"loss": 0.7461, | |
"step": 244 | |
}, | |
{ | |
"epoch": 0.7443037974683544, | |
"grad_norm": 0.15458993871915733, | |
"learning_rate": 9.347707625797062e-06, | |
"loss": 0.7564, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.7473417721518988, | |
"grad_norm": 0.1525332937728372, | |
"learning_rate": 9.338944475962236e-06, | |
"loss": 0.7221, | |
"step": 246 | |
}, | |
{ | |
"epoch": 0.7503797468354431, | |
"grad_norm": 0.15582017292256814, | |
"learning_rate": 9.330127018922195e-06, | |
"loss": 0.7577, | |
"step": 247 | |
}, | |
{ | |
"epoch": 0.7534177215189873, | |
"grad_norm": 0.14627498584216264, | |
"learning_rate": 9.32125536503821e-06, | |
"loss": 0.7229, | |
"step": 248 | |
}, | |
{ | |
"epoch": 0.7564556962025316, | |
"grad_norm": 0.13749633267698202, | |
"learning_rate": 9.312329625349903e-06, | |
"loss": 0.7112, | |
"step": 249 | |
}, | |
{ | |
"epoch": 0.759493670886076, | |
"grad_norm": 0.14519207545625734, | |
"learning_rate": 9.303349911573838e-06, | |
"loss": 0.7322, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.7625316455696203, | |
"grad_norm": 0.15127490797847287, | |
"learning_rate": 9.294316336102132e-06, | |
"loss": 0.7206, | |
"step": 251 | |
}, | |
{ | |
"epoch": 0.7655696202531646, | |
"grad_norm": 0.15385069737322865, | |
"learning_rate": 9.285229012001047e-06, | |
"loss": 0.7861, | |
"step": 252 | |
}, | |
{ | |
"epoch": 0.7686075949367088, | |
"grad_norm": 0.1392811216176761, | |
"learning_rate": 9.276088053009578e-06, | |
"loss": 0.715, | |
"step": 253 | |
}, | |
{ | |
"epoch": 0.7716455696202532, | |
"grad_norm": 0.14390845029461158, | |
"learning_rate": 9.266893573538023e-06, | |
"loss": 0.7131, | |
"step": 254 | |
}, | |
{ | |
"epoch": 0.7746835443037975, | |
"grad_norm": 0.14591475005039056, | |
"learning_rate": 9.257645688666557e-06, | |
"loss": 0.7029, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.7777215189873418, | |
"grad_norm": 0.15034474500103215, | |
"learning_rate": 9.248344514143786e-06, | |
"loss": 0.7794, | |
"step": 256 | |
}, | |
{ | |
"epoch": 0.780759493670886, | |
"grad_norm": 0.1329243293321845, | |
"learning_rate": 9.238990166385304e-06, | |
"loss": 0.6886, | |
"step": 257 | |
}, | |
{ | |
"epoch": 0.7837974683544304, | |
"grad_norm": 0.1449519021528343, | |
"learning_rate": 9.229582762472232e-06, | |
"loss": 0.6954, | |
"step": 258 | |
}, | |
{ | |
"epoch": 0.7868354430379747, | |
"grad_norm": 0.14011762932739064, | |
"learning_rate": 9.220122420149753e-06, | |
"loss": 0.7534, | |
"step": 259 | |
}, | |
{ | |
"epoch": 0.789873417721519, | |
"grad_norm": 0.1430832021135705, | |
"learning_rate": 9.21060925782564e-06, | |
"loss": 0.6782, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.7929113924050633, | |
"grad_norm": 0.146329792863665, | |
"learning_rate": 9.201043394568773e-06, | |
"loss": 0.7083, | |
"step": 261 | |
}, | |
{ | |
"epoch": 0.7959493670886076, | |
"grad_norm": 0.13966005655876726, | |
"learning_rate": 9.191424950107648e-06, | |
"loss": 0.753, | |
"step": 262 | |
}, | |
{ | |
"epoch": 0.7989873417721519, | |
"grad_norm": 0.1452187402666851, | |
"learning_rate": 9.181754044828882e-06, | |
"loss": 0.7087, | |
"step": 263 | |
}, | |
{ | |
"epoch": 0.8020253164556962, | |
"grad_norm": 0.14770290588916754, | |
"learning_rate": 9.172030799775698e-06, | |
"loss": 0.7521, | |
"step": 264 | |
}, | |
{ | |
"epoch": 0.8050632911392405, | |
"grad_norm": 0.14859373509481294, | |
"learning_rate": 9.162255336646422e-06, | |
"loss": 0.6864, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.8081012658227849, | |
"grad_norm": 0.13521731389755007, | |
"learning_rate": 9.152427777792947e-06, | |
"loss": 0.7083, | |
"step": 266 | |
}, | |
{ | |
"epoch": 0.8111392405063291, | |
"grad_norm": 0.15899678623809688, | |
"learning_rate": 9.142548246219212e-06, | |
"loss": 0.7191, | |
"step": 267 | |
}, | |
{ | |
"epoch": 0.8141772151898734, | |
"grad_norm": 0.1375227713494758, | |
"learning_rate": 9.132616865579655e-06, | |
"loss": 0.7276, | |
"step": 268 | |
}, | |
{ | |
"epoch": 0.8172151898734177, | |
"grad_norm": 0.15380324048740807, | |
"learning_rate": 9.122633760177674e-06, | |
"loss": 0.7517, | |
"step": 269 | |
}, | |
{ | |
"epoch": 0.8202531645569621, | |
"grad_norm": 0.1425907333848317, | |
"learning_rate": 9.112599054964058e-06, | |
"loss": 0.756, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.8232911392405063, | |
"grad_norm": 0.14836696714745884, | |
"learning_rate": 9.102512875535439e-06, | |
"loss": 0.6865, | |
"step": 271 | |
}, | |
{ | |
"epoch": 0.8263291139240506, | |
"grad_norm": 0.13262857577952253, | |
"learning_rate": 9.092375348132704e-06, | |
"loss": 0.6957, | |
"step": 272 | |
}, | |
{ | |
"epoch": 0.8293670886075949, | |
"grad_norm": 0.13994743933365547, | |
"learning_rate": 9.082186599639429e-06, | |
"loss": 0.6617, | |
"step": 273 | |
}, | |
{ | |
"epoch": 0.8324050632911393, | |
"grad_norm": 0.15723763933695586, | |
"learning_rate": 9.071946757580282e-06, | |
"loss": 0.7624, | |
"step": 274 | |
}, | |
{ | |
"epoch": 0.8354430379746836, | |
"grad_norm": 0.1486656829525549, | |
"learning_rate": 9.06165595011943e-06, | |
"loss": 0.7211, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.8384810126582278, | |
"grad_norm": 0.15779632813651678, | |
"learning_rate": 9.051314306058934e-06, | |
"loss": 0.7503, | |
"step": 276 | |
}, | |
{ | |
"epoch": 0.8415189873417721, | |
"grad_norm": 0.1585128088782449, | |
"learning_rate": 9.040921954837139e-06, | |
"loss": 0.759, | |
"step": 277 | |
}, | |
{ | |
"epoch": 0.8445569620253165, | |
"grad_norm": 0.14943760375413637, | |
"learning_rate": 9.030479026527048e-06, | |
"loss": 0.7452, | |
"step": 278 | |
}, | |
{ | |
"epoch": 0.8475949367088608, | |
"grad_norm": 0.1537621293162188, | |
"learning_rate": 9.019985651834703e-06, | |
"loss": 0.7302, | |
"step": 279 | |
}, | |
{ | |
"epoch": 0.850632911392405, | |
"grad_norm": 0.14310455157465138, | |
"learning_rate": 9.009441962097543e-06, | |
"loss": 0.727, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.8536708860759493, | |
"grad_norm": 0.15263194477343178, | |
"learning_rate": 8.99884808928276e-06, | |
"loss": 0.7591, | |
"step": 281 | |
}, | |
{ | |
"epoch": 0.8567088607594937, | |
"grad_norm": 0.14586809361609301, | |
"learning_rate": 8.98820416598565e-06, | |
"loss": 0.7439, | |
"step": 282 | |
}, | |
{ | |
"epoch": 0.859746835443038, | |
"grad_norm": 0.1504705926875278, | |
"learning_rate": 8.97751032542795e-06, | |
"loss": 0.7565, | |
"step": 283 | |
}, | |
{ | |
"epoch": 0.8627848101265823, | |
"grad_norm": 0.1441103904918681, | |
"learning_rate": 8.966766701456177e-06, | |
"loss": 0.7034, | |
"step": 284 | |
}, | |
{ | |
"epoch": 0.8658227848101265, | |
"grad_norm": 0.14391086855133448, | |
"learning_rate": 8.955973428539943e-06, | |
"loss": 0.724, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.8688607594936709, | |
"grad_norm": 0.15047599099834674, | |
"learning_rate": 8.945130641770281e-06, | |
"loss": 0.7179, | |
"step": 286 | |
}, | |
{ | |
"epoch": 0.8718987341772152, | |
"grad_norm": 0.15074435170430608, | |
"learning_rate": 8.93423847685795e-06, | |
"loss": 0.731, | |
"step": 287 | |
}, | |
{ | |
"epoch": 0.8749367088607595, | |
"grad_norm": 0.16108274909500533, | |
"learning_rate": 8.923297070131738e-06, | |
"loss": 0.7071, | |
"step": 288 | |
}, | |
{ | |
"epoch": 0.8779746835443037, | |
"grad_norm": 0.15524331185932833, | |
"learning_rate": 8.91230655853675e-06, | |
"loss": 0.7153, | |
"step": 289 | |
}, | |
{ | |
"epoch": 0.8810126582278481, | |
"grad_norm": 0.15110074800492984, | |
"learning_rate": 8.901267079632703e-06, | |
"loss": 0.74, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.8840506329113924, | |
"grad_norm": 0.14585560022656124, | |
"learning_rate": 8.890178771592198e-06, | |
"loss": 0.7491, | |
"step": 291 | |
}, | |
{ | |
"epoch": 0.8870886075949367, | |
"grad_norm": 0.14265142947205262, | |
"learning_rate": 8.879041773198996e-06, | |
"loss": 0.7055, | |
"step": 292 | |
}, | |
{ | |
"epoch": 0.890126582278481, | |
"grad_norm": 0.142780593111172, | |
"learning_rate": 8.86785622384627e-06, | |
"loss": 0.687, | |
"step": 293 | |
}, | |
{ | |
"epoch": 0.8931645569620253, | |
"grad_norm": 0.1454614837486431, | |
"learning_rate": 8.856622263534875e-06, | |
"loss": 0.745, | |
"step": 294 | |
}, | |
{ | |
"epoch": 0.8962025316455696, | |
"grad_norm": 0.14081739179379577, | |
"learning_rate": 8.845340032871584e-06, | |
"loss": 0.7713, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.8992405063291139, | |
"grad_norm": 0.14385443388022717, | |
"learning_rate": 8.834009673067337e-06, | |
"loss": 0.7472, | |
"step": 296 | |
}, | |
{ | |
"epoch": 0.9022784810126582, | |
"grad_norm": 0.1473963417078419, | |
"learning_rate": 8.822631325935463e-06, | |
"loss": 0.7249, | |
"step": 297 | |
}, | |
{ | |
"epoch": 0.9053164556962026, | |
"grad_norm": 0.1368896762497509, | |
"learning_rate": 8.811205133889917e-06, | |
"loss": 0.7491, | |
"step": 298 | |
}, | |
{ | |
"epoch": 0.9083544303797468, | |
"grad_norm": 0.14095409495460387, | |
"learning_rate": 8.799731239943488e-06, | |
"loss": 0.7172, | |
"step": 299 | |
}, | |
{ | |
"epoch": 0.9113924050632911, | |
"grad_norm": 0.15441223002212245, | |
"learning_rate": 8.788209787706014e-06, | |
"loss": 0.7573, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.9144303797468355, | |
"grad_norm": 0.14670952915562407, | |
"learning_rate": 8.776640921382585e-06, | |
"loss": 0.6943, | |
"step": 301 | |
}, | |
{ | |
"epoch": 0.9174683544303798, | |
"grad_norm": 0.14641743535819832, | |
"learning_rate": 8.765024785771732e-06, | |
"loss": 0.6878, | |
"step": 302 | |
}, | |
{ | |
"epoch": 0.920506329113924, | |
"grad_norm": 0.14616935890059882, | |
"learning_rate": 8.753361526263622e-06, | |
"loss": 0.7261, | |
"step": 303 | |
}, | |
{ | |
"epoch": 0.9235443037974683, | |
"grad_norm": 0.15240576293183697, | |
"learning_rate": 8.741651288838237e-06, | |
"loss": 0.7324, | |
"step": 304 | |
}, | |
{ | |
"epoch": 0.9265822784810127, | |
"grad_norm": 0.15933931646246116, | |
"learning_rate": 8.729894220063542e-06, | |
"loss": 0.6935, | |
"step": 305 | |
}, | |
{ | |
"epoch": 0.929620253164557, | |
"grad_norm": 0.14649711165417814, | |
"learning_rate": 8.718090467093654e-06, | |
"loss": 0.7483, | |
"step": 306 | |
}, | |
{ | |
"epoch": 0.9326582278481013, | |
"grad_norm": 0.14580689387632406, | |
"learning_rate": 8.706240177667003e-06, | |
"loss": 0.7327, | |
"step": 307 | |
}, | |
{ | |
"epoch": 0.9356962025316455, | |
"grad_norm": 0.14256585147606213, | |
"learning_rate": 8.694343500104474e-06, | |
"loss": 0.7143, | |
"step": 308 | |
}, | |
{ | |
"epoch": 0.9387341772151899, | |
"grad_norm": 0.15788812574603747, | |
"learning_rate": 8.682400583307562e-06, | |
"loss": 0.7508, | |
"step": 309 | |
}, | |
{ | |
"epoch": 0.9417721518987342, | |
"grad_norm": 0.13178792986163443, | |
"learning_rate": 8.670411576756502e-06, | |
"loss": 0.6685, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.9448101265822785, | |
"grad_norm": 0.14325453111842404, | |
"learning_rate": 8.658376630508391e-06, | |
"loss": 0.7384, | |
"step": 311 | |
}, | |
{ | |
"epoch": 0.9478481012658228, | |
"grad_norm": 0.14921601570309354, | |
"learning_rate": 8.646295895195334e-06, | |
"loss": 0.6916, | |
"step": 312 | |
}, | |
{ | |
"epoch": 0.9508860759493671, | |
"grad_norm": 0.14867911627025984, | |
"learning_rate": 8.634169522022522e-06, | |
"loss": 0.7618, | |
"step": 313 | |
}, | |
{ | |
"epoch": 0.9539240506329114, | |
"grad_norm": 0.13408458116847805, | |
"learning_rate": 8.621997662766378e-06, | |
"loss": 0.685, | |
"step": 314 | |
}, | |
{ | |
"epoch": 0.9569620253164557, | |
"grad_norm": 0.14778446535199555, | |
"learning_rate": 8.609780469772623e-06, | |
"loss": 0.7715, | |
"step": 315 | |
}, | |
{ | |
"epoch": 0.96, | |
"grad_norm": 0.1513493124946686, | |
"learning_rate": 8.597518095954399e-06, | |
"loss": 0.7225, | |
"step": 316 | |
}, | |
{ | |
"epoch": 0.9630379746835444, | |
"grad_norm": 0.1498242800163109, | |
"learning_rate": 8.585210694790333e-06, | |
"loss": 0.6919, | |
"step": 317 | |
}, | |
{ | |
"epoch": 0.9660759493670886, | |
"grad_norm": 0.14772376485550806, | |
"learning_rate": 8.572858420322626e-06, | |
"loss": 0.749, | |
"step": 318 | |
}, | |
{ | |
"epoch": 0.9691139240506329, | |
"grad_norm": 0.15452004706677017, | |
"learning_rate": 8.56046142715513e-06, | |
"loss": 0.7202, | |
"step": 319 | |
}, | |
{ | |
"epoch": 0.9721518987341772, | |
"grad_norm": 0.15340112248677157, | |
"learning_rate": 8.548019870451391e-06, | |
"loss": 0.7242, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.9751898734177216, | |
"grad_norm": 0.13675490142166902, | |
"learning_rate": 8.535533905932739e-06, | |
"loss": 0.699, | |
"step": 321 | |
}, | |
{ | |
"epoch": 0.9782278481012658, | |
"grad_norm": 0.14063114842689411, | |
"learning_rate": 8.523003689876312e-06, | |
"loss": 0.7342, | |
"step": 322 | |
}, | |
{ | |
"epoch": 0.9812658227848101, | |
"grad_norm": 0.14103508935438214, | |
"learning_rate": 8.510429379113114e-06, | |
"loss": 0.6986, | |
"step": 323 | |
}, | |
{ | |
"epoch": 0.9843037974683544, | |
"grad_norm": 0.14781753448940838, | |
"learning_rate": 8.497811131026046e-06, | |
"loss": 0.7206, | |
"step": 324 | |
}, | |
{ | |
"epoch": 0.9873417721518988, | |
"grad_norm": 0.13798681094672127, | |
"learning_rate": 8.485149103547943e-06, | |
"loss": 0.6682, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.990379746835443, | |
"grad_norm": 0.13956851609714183, | |
"learning_rate": 8.472443455159586e-06, | |
"loss": 0.7281, | |
"step": 326 | |
}, | |
{ | |
"epoch": 0.9934177215189873, | |
"grad_norm": 0.14795209764564385, | |
"learning_rate": 8.459694344887732e-06, | |
"loss": 0.7449, | |
"step": 327 | |
}, | |
{ | |
"epoch": 0.9964556962025316, | |
"grad_norm": 0.1340859424248503, | |
"learning_rate": 8.446901932303112e-06, | |
"loss": 0.6661, | |
"step": 328 | |
}, | |
{ | |
"epoch": 0.999493670886076, | |
"grad_norm": 0.13943100974007655, | |
"learning_rate": 8.434066377518437e-06, | |
"loss": 0.6925, | |
"step": 329 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 0.13943100974007655, | |
"learning_rate": 8.421187841186402e-06, | |
"loss": 0.7072, | |
"step": 330 | |
}, | |
{ | |
"epoch": 1.0030379746835443, | |
"grad_norm": 0.35195652682604184, | |
"learning_rate": 8.408266484497664e-06, | |
"loss": 0.5878, | |
"step": 331 | |
}, | |
{ | |
"epoch": 1.0060759493670886, | |
"grad_norm": 0.13790447422523408, | |
"learning_rate": 8.395302469178832e-06, | |
"loss": 0.5245, | |
"step": 332 | |
}, | |
{ | |
"epoch": 1.0091139240506328, | |
"grad_norm": 0.1335900726197855, | |
"learning_rate": 8.382295957490435e-06, | |
"loss": 0.5884, | |
"step": 333 | |
}, | |
{ | |
"epoch": 1.0121518987341773, | |
"grad_norm": 0.14393620775276017, | |
"learning_rate": 8.369247112224901e-06, | |
"loss": 0.5912, | |
"step": 334 | |
}, | |
{ | |
"epoch": 1.0151898734177216, | |
"grad_norm": 0.1707656785677369, | |
"learning_rate": 8.356156096704516e-06, | |
"loss": 0.6168, | |
"step": 335 | |
}, | |
{ | |
"epoch": 1.0182278481012659, | |
"grad_norm": 0.1408052023777488, | |
"learning_rate": 8.343023074779368e-06, | |
"loss": 0.5443, | |
"step": 336 | |
}, | |
{ | |
"epoch": 1.0212658227848102, | |
"grad_norm": 0.15435053976419325, | |
"learning_rate": 8.329848210825322e-06, | |
"loss": 0.5769, | |
"step": 337 | |
}, | |
{ | |
"epoch": 1.0243037974683544, | |
"grad_norm": 0.1474285624791992, | |
"learning_rate": 8.316631669741934e-06, | |
"loss": 0.5507, | |
"step": 338 | |
}, | |
{ | |
"epoch": 1.0273417721518987, | |
"grad_norm": 0.13958495408136215, | |
"learning_rate": 8.303373616950408e-06, | |
"loss": 0.5182, | |
"step": 339 | |
}, | |
{ | |
"epoch": 1.030379746835443, | |
"grad_norm": 0.13214638415892638, | |
"learning_rate": 8.290074218391515e-06, | |
"loss": 0.5391, | |
"step": 340 | |
}, | |
{ | |
"epoch": 1.0334177215189873, | |
"grad_norm": 0.14532819580539497, | |
"learning_rate": 8.27673364052352e-06, | |
"loss": 0.5657, | |
"step": 341 | |
}, | |
{ | |
"epoch": 1.0364556962025318, | |
"grad_norm": 0.15512970626334552, | |
"learning_rate": 8.263352050320094e-06, | |
"loss": 0.5656, | |
"step": 342 | |
}, | |
{ | |
"epoch": 1.039493670886076, | |
"grad_norm": 0.14663572118341192, | |
"learning_rate": 8.249929615268234e-06, | |
"loss": 0.5628, | |
"step": 343 | |
}, | |
{ | |
"epoch": 1.0425316455696203, | |
"grad_norm": 0.14225503452449526, | |
"learning_rate": 8.236466503366155e-06, | |
"loss": 0.5692, | |
"step": 344 | |
}, | |
{ | |
"epoch": 1.0455696202531646, | |
"grad_norm": 0.14716192192280636, | |
"learning_rate": 8.222962883121196e-06, | |
"loss": 0.5971, | |
"step": 345 | |
}, | |
{ | |
"epoch": 1.0486075949367089, | |
"grad_norm": 0.14798497203430436, | |
"learning_rate": 8.209418923547706e-06, | |
"loss": 0.5534, | |
"step": 346 | |
}, | |
{ | |
"epoch": 1.0516455696202531, | |
"grad_norm": 0.1428993625557661, | |
"learning_rate": 8.195834794164925e-06, | |
"loss": 0.5548, | |
"step": 347 | |
}, | |
{ | |
"epoch": 1.0546835443037974, | |
"grad_norm": 0.1489771128374146, | |
"learning_rate": 8.182210664994879e-06, | |
"loss": 0.5465, | |
"step": 348 | |
}, | |
{ | |
"epoch": 1.0577215189873417, | |
"grad_norm": 0.14404057693239417, | |
"learning_rate": 8.168546706560231e-06, | |
"loss": 0.566, | |
"step": 349 | |
}, | |
{ | |
"epoch": 1.0607594936708862, | |
"grad_norm": 0.14320281952903854, | |
"learning_rate": 8.154843089882159e-06, | |
"loss": 0.5618, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.0637974683544305, | |
"grad_norm": 0.14217364132694757, | |
"learning_rate": 8.141099986478212e-06, | |
"loss": 0.5758, | |
"step": 351 | |
}, | |
{ | |
"epoch": 1.0668354430379747, | |
"grad_norm": 0.14157409729112905, | |
"learning_rate": 8.127317568360164e-06, | |
"loss": 0.5528, | |
"step": 352 | |
}, | |
{ | |
"epoch": 1.069873417721519, | |
"grad_norm": 0.14493308862688992, | |
"learning_rate": 8.113496008031863e-06, | |
"loss": 0.5814, | |
"step": 353 | |
}, | |
{ | |
"epoch": 1.0729113924050633, | |
"grad_norm": 0.14532728499404404, | |
"learning_rate": 8.099635478487064e-06, | |
"loss": 0.5173, | |
"step": 354 | |
}, | |
{ | |
"epoch": 1.0759493670886076, | |
"grad_norm": 0.1492872905556815, | |
"learning_rate": 8.085736153207277e-06, | |
"loss": 0.562, | |
"step": 355 | |
}, | |
{ | |
"epoch": 1.0789873417721518, | |
"grad_norm": 0.13525206039788887, | |
"learning_rate": 8.07179820615958e-06, | |
"loss": 0.5522, | |
"step": 356 | |
}, | |
{ | |
"epoch": 1.082025316455696, | |
"grad_norm": 0.15253157854907176, | |
"learning_rate": 8.057821811794457e-06, | |
"loss": 0.6107, | |
"step": 357 | |
}, | |
{ | |
"epoch": 1.0850632911392406, | |
"grad_norm": 0.14063398627766024, | |
"learning_rate": 8.043807145043604e-06, | |
"loss": 0.5735, | |
"step": 358 | |
}, | |
{ | |
"epoch": 1.0881012658227849, | |
"grad_norm": 0.14389818718167977, | |
"learning_rate": 8.029754381317741e-06, | |
"loss": 0.5334, | |
"step": 359 | |
}, | |
{ | |
"epoch": 1.0911392405063292, | |
"grad_norm": 0.14167958700306557, | |
"learning_rate": 8.015663696504424e-06, | |
"loss": 0.5773, | |
"step": 360 | |
}, | |
{ | |
"epoch": 1.0941772151898734, | |
"grad_norm": 0.1401169247223358, | |
"learning_rate": 8.001535266965829e-06, | |
"loss": 0.5568, | |
"step": 361 | |
}, | |
{ | |
"epoch": 1.0972151898734177, | |
"grad_norm": 0.14447092652766144, | |
"learning_rate": 7.987369269536563e-06, | |
"loss": 0.5424, | |
"step": 362 | |
}, | |
{ | |
"epoch": 1.100253164556962, | |
"grad_norm": 0.13562624939969367, | |
"learning_rate": 7.973165881521435e-06, | |
"loss": 0.549, | |
"step": 363 | |
}, | |
{ | |
"epoch": 1.1032911392405063, | |
"grad_norm": 0.1449306113760785, | |
"learning_rate": 7.958925280693243e-06, | |
"loss": 0.5304, | |
"step": 364 | |
}, | |
{ | |
"epoch": 1.1063291139240505, | |
"grad_norm": 0.14540166205441302, | |
"learning_rate": 7.944647645290555e-06, | |
"loss": 0.571, | |
"step": 365 | |
}, | |
{ | |
"epoch": 1.109367088607595, | |
"grad_norm": 0.15144055477718635, | |
"learning_rate": 7.930333154015467e-06, | |
"loss": 0.5493, | |
"step": 366 | |
}, | |
{ | |
"epoch": 1.1124050632911393, | |
"grad_norm": 0.14154469089239974, | |
"learning_rate": 7.915981986031367e-06, | |
"loss": 0.606, | |
"step": 367 | |
}, | |
{ | |
"epoch": 1.1154430379746836, | |
"grad_norm": 0.13991802435102235, | |
"learning_rate": 7.901594320960709e-06, | |
"loss": 0.5572, | |
"step": 368 | |
}, | |
{ | |
"epoch": 1.1184810126582279, | |
"grad_norm": 0.13313075315830625, | |
"learning_rate": 7.887170338882742e-06, | |
"loss": 0.5733, | |
"step": 369 | |
}, | |
{ | |
"epoch": 1.1215189873417721, | |
"grad_norm": 0.14195483359233327, | |
"learning_rate": 7.872710220331271e-06, | |
"loss": 0.585, | |
"step": 370 | |
}, | |
{ | |
"epoch": 1.1245569620253164, | |
"grad_norm": 0.13931767214524954, | |
"learning_rate": 7.858214146292394e-06, | |
"loss": 0.5445, | |
"step": 371 | |
}, | |
{ | |
"epoch": 1.1275949367088607, | |
"grad_norm": 0.14365199015958316, | |
"learning_rate": 7.843682298202235e-06, | |
"loss": 0.5742, | |
"step": 372 | |
}, | |
{ | |
"epoch": 1.130632911392405, | |
"grad_norm": 0.13892352185640874, | |
"learning_rate": 7.829114857944672e-06, | |
"loss": 0.5644, | |
"step": 373 | |
}, | |
{ | |
"epoch": 1.1336708860759495, | |
"grad_norm": 0.14101201867485888, | |
"learning_rate": 7.814512007849069e-06, | |
"loss": 0.5616, | |
"step": 374 | |
}, | |
{ | |
"epoch": 1.1367088607594937, | |
"grad_norm": 0.1356625238961039, | |
"learning_rate": 7.799873930687979e-06, | |
"loss": 0.5645, | |
"step": 375 | |
}, | |
{ | |
"epoch": 1.139746835443038, | |
"grad_norm": 0.13564046321608458, | |
"learning_rate": 7.785200809674869e-06, | |
"loss": 0.5798, | |
"step": 376 | |
}, | |
{ | |
"epoch": 1.1427848101265823, | |
"grad_norm": 0.14799014090819052, | |
"learning_rate": 7.770492828461824e-06, | |
"loss": 0.5695, | |
"step": 377 | |
}, | |
{ | |
"epoch": 1.1458227848101266, | |
"grad_norm": 0.1347310269977235, | |
"learning_rate": 7.755750171137245e-06, | |
"loss": 0.5783, | |
"step": 378 | |
}, | |
{ | |
"epoch": 1.1488607594936708, | |
"grad_norm": 0.1405798639605091, | |
"learning_rate": 7.74097302222355e-06, | |
"loss": 0.5589, | |
"step": 379 | |
}, | |
{ | |
"epoch": 1.1518987341772151, | |
"grad_norm": 0.14596056892317547, | |
"learning_rate": 7.726161566674856e-06, | |
"loss": 0.5548, | |
"step": 380 | |
}, | |
{ | |
"epoch": 1.1549367088607596, | |
"grad_norm": 0.14401713097153115, | |
"learning_rate": 7.711315989874677e-06, | |
"loss": 0.5759, | |
"step": 381 | |
}, | |
{ | |
"epoch": 1.1579746835443039, | |
"grad_norm": 0.14374703144254186, | |
"learning_rate": 7.696436477633588e-06, | |
"loss": 0.5391, | |
"step": 382 | |
}, | |
{ | |
"epoch": 1.1610126582278482, | |
"grad_norm": 0.14766321403035243, | |
"learning_rate": 7.681523216186912e-06, | |
"loss": 0.5883, | |
"step": 383 | |
}, | |
{ | |
"epoch": 1.1640506329113924, | |
"grad_norm": 0.1375940357437819, | |
"learning_rate": 7.666576392192389e-06, | |
"loss": 0.5509, | |
"step": 384 | |
}, | |
{ | |
"epoch": 1.1670886075949367, | |
"grad_norm": 0.1381506679407275, | |
"learning_rate": 7.651596192727826e-06, | |
"loss": 0.529, | |
"step": 385 | |
}, | |
{ | |
"epoch": 1.170126582278481, | |
"grad_norm": 0.13807613640205604, | |
"learning_rate": 7.636582805288771e-06, | |
"loss": 0.5022, | |
"step": 386 | |
}, | |
{ | |
"epoch": 1.1731645569620253, | |
"grad_norm": 0.14272839656072073, | |
"learning_rate": 7.621536417786159e-06, | |
"loss": 0.5648, | |
"step": 387 | |
}, | |
{ | |
"epoch": 1.1762025316455695, | |
"grad_norm": 0.14465724664013882, | |
"learning_rate": 7.606457218543961e-06, | |
"loss": 0.5873, | |
"step": 388 | |
}, | |
{ | |
"epoch": 1.1792405063291138, | |
"grad_norm": 0.14449484147463795, | |
"learning_rate": 7.5913453962968296e-06, | |
"loss": 0.5474, | |
"step": 389 | |
}, | |
{ | |
"epoch": 1.1822784810126583, | |
"grad_norm": 0.14376467882067412, | |
"learning_rate": 7.576201140187727e-06, | |
"loss": 0.5545, | |
"step": 390 | |
}, | |
{ | |
"epoch": 1.1853164556962026, | |
"grad_norm": 0.14202851308765727, | |
"learning_rate": 7.5610246397655715e-06, | |
"loss": 0.5828, | |
"step": 391 | |
}, | |
{ | |
"epoch": 1.1883544303797469, | |
"grad_norm": 0.1462502151481694, | |
"learning_rate": 7.54581608498286e-06, | |
"loss": 0.5873, | |
"step": 392 | |
}, | |
{ | |
"epoch": 1.1913924050632911, | |
"grad_norm": 0.13990538383436923, | |
"learning_rate": 7.530575666193283e-06, | |
"loss": 0.5592, | |
"step": 393 | |
}, | |
{ | |
"epoch": 1.1944303797468354, | |
"grad_norm": 0.15276225499982546, | |
"learning_rate": 7.515303574149348e-06, | |
"loss": 0.5807, | |
"step": 394 | |
}, | |
{ | |
"epoch": 1.1974683544303797, | |
"grad_norm": 0.1379333410631157, | |
"learning_rate": 7.500000000000001e-06, | |
"loss": 0.5337, | |
"step": 395 | |
}, | |
{ | |
"epoch": 1.200506329113924, | |
"grad_norm": 0.13943056308693977, | |
"learning_rate": 7.484665135288214e-06, | |
"loss": 0.5438, | |
"step": 396 | |
}, | |
{ | |
"epoch": 1.2035443037974685, | |
"grad_norm": 0.13856734895253275, | |
"learning_rate": 7.469299171948608e-06, | |
"loss": 0.5476, | |
"step": 397 | |
}, | |
{ | |
"epoch": 1.2065822784810127, | |
"grad_norm": 0.13893892952476672, | |
"learning_rate": 7.453902302305032e-06, | |
"loss": 0.5564, | |
"step": 398 | |
}, | |
{ | |
"epoch": 1.209620253164557, | |
"grad_norm": 0.14952936713448245, | |
"learning_rate": 7.438474719068174e-06, | |
"loss": 0.5283, | |
"step": 399 | |
}, | |
{ | |
"epoch": 1.2126582278481013, | |
"grad_norm": 0.13855561393837743, | |
"learning_rate": 7.423016615333135e-06, | |
"loss": 0.5426, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.2156962025316456, | |
"grad_norm": 0.14582609657438356, | |
"learning_rate": 7.4075281845770196e-06, | |
"loss": 0.5555, | |
"step": 401 | |
}, | |
{ | |
"epoch": 1.2187341772151898, | |
"grad_norm": 0.14624318192875743, | |
"learning_rate": 7.392009620656513e-06, | |
"loss": 0.5727, | |
"step": 402 | |
}, | |
{ | |
"epoch": 1.2217721518987341, | |
"grad_norm": 0.14098369650646345, | |
"learning_rate": 7.37646111780545e-06, | |
"loss": 0.5825, | |
"step": 403 | |
}, | |
{ | |
"epoch": 1.2248101265822784, | |
"grad_norm": 0.14295467837134232, | |
"learning_rate": 7.360882870632393e-06, | |
"loss": 0.5587, | |
"step": 404 | |
}, | |
{ | |
"epoch": 1.2278481012658227, | |
"grad_norm": 0.13475859762040648, | |
"learning_rate": 7.3452750741181855e-06, | |
"loss": 0.5642, | |
"step": 405 | |
}, | |
{ | |
"epoch": 1.2308860759493672, | |
"grad_norm": 0.1397869789917611, | |
"learning_rate": 7.329637923613522e-06, | |
"loss": 0.5167, | |
"step": 406 | |
}, | |
{ | |
"epoch": 1.2339240506329114, | |
"grad_norm": 0.14008874268991642, | |
"learning_rate": 7.313971614836496e-06, | |
"loss": 0.5418, | |
"step": 407 | |
}, | |
{ | |
"epoch": 1.2369620253164557, | |
"grad_norm": 0.14927580619892877, | |
"learning_rate": 7.298276343870152e-06, | |
"loss": 0.5826, | |
"step": 408 | |
}, | |
{ | |
"epoch": 1.24, | |
"grad_norm": 0.14527571180153181, | |
"learning_rate": 7.282552307160033e-06, | |
"loss": 0.5518, | |
"step": 409 | |
}, | |
{ | |
"epoch": 1.2430379746835443, | |
"grad_norm": 0.14489892811821217, | |
"learning_rate": 7.26679970151172e-06, | |
"loss": 0.5499, | |
"step": 410 | |
}, | |
{ | |
"epoch": 1.2460759493670885, | |
"grad_norm": 0.1451134850602267, | |
"learning_rate": 7.251018724088367e-06, | |
"loss": 0.579, | |
"step": 411 | |
}, | |
{ | |
"epoch": 1.2491139240506328, | |
"grad_norm": 0.13242470106668042, | |
"learning_rate": 7.235209572408241e-06, | |
"loss": 0.5563, | |
"step": 412 | |
}, | |
{ | |
"epoch": 1.2521518987341773, | |
"grad_norm": 0.14805669523665416, | |
"learning_rate": 7.2193724443422405e-06, | |
"loss": 0.6147, | |
"step": 413 | |
}, | |
{ | |
"epoch": 1.2551898734177216, | |
"grad_norm": 0.1302889421614733, | |
"learning_rate": 7.203507538111423e-06, | |
"loss": 0.5567, | |
"step": 414 | |
}, | |
{ | |
"epoch": 1.2582278481012659, | |
"grad_norm": 0.14285991469932954, | |
"learning_rate": 7.187615052284522e-06, | |
"loss": 0.5718, | |
"step": 415 | |
}, | |
{ | |
"epoch": 1.2612658227848101, | |
"grad_norm": 0.15128709925990969, | |
"learning_rate": 7.171695185775468e-06, | |
"loss": 0.5581, | |
"step": 416 | |
}, | |
{ | |
"epoch": 1.2643037974683544, | |
"grad_norm": 0.1420485997443191, | |
"learning_rate": 7.155748137840892e-06, | |
"loss": 0.574, | |
"step": 417 | |
}, | |
{ | |
"epoch": 1.2673417721518987, | |
"grad_norm": 0.1399838719737584, | |
"learning_rate": 7.139774108077633e-06, | |
"loss": 0.569, | |
"step": 418 | |
}, | |
{ | |
"epoch": 1.270379746835443, | |
"grad_norm": 0.13965944387092968, | |
"learning_rate": 7.12377329642024e-06, | |
"loss": 0.5445, | |
"step": 419 | |
}, | |
{ | |
"epoch": 1.2734177215189875, | |
"grad_norm": 0.149148740952187, | |
"learning_rate": 7.107745903138472e-06, | |
"loss": 0.5923, | |
"step": 420 | |
}, | |
{ | |
"epoch": 1.2764556962025315, | |
"grad_norm": 0.1421914319854205, | |
"learning_rate": 7.09169212883479e-06, | |
"loss": 0.5732, | |
"step": 421 | |
}, | |
{ | |
"epoch": 1.279493670886076, | |
"grad_norm": 0.1498434614111481, | |
"learning_rate": 7.075612174441846e-06, | |
"loss": 0.5646, | |
"step": 422 | |
}, | |
{ | |
"epoch": 1.2825316455696203, | |
"grad_norm": 0.13979599958581326, | |
"learning_rate": 7.059506241219964e-06, | |
"loss": 0.584, | |
"step": 423 | |
}, | |
{ | |
"epoch": 1.2855696202531646, | |
"grad_norm": 0.15321777078459725, | |
"learning_rate": 7.04337453075463e-06, | |
"loss": 0.5634, | |
"step": 424 | |
}, | |
{ | |
"epoch": 1.2886075949367088, | |
"grad_norm": 0.13990758699960726, | |
"learning_rate": 7.027217244953958e-06, | |
"loss": 0.5434, | |
"step": 425 | |
}, | |
{ | |
"epoch": 1.2916455696202531, | |
"grad_norm": 0.14493433075593967, | |
"learning_rate": 7.011034586046177e-06, | |
"loss": 0.5495, | |
"step": 426 | |
}, | |
{ | |
"epoch": 1.2946835443037974, | |
"grad_norm": 0.15209369171144482, | |
"learning_rate": 6.994826756577082e-06, | |
"loss": 0.5894, | |
"step": 427 | |
}, | |
{ | |
"epoch": 1.2977215189873417, | |
"grad_norm": 0.14699225509678257, | |
"learning_rate": 6.978593959407516e-06, | |
"loss": 0.5752, | |
"step": 428 | |
}, | |
{ | |
"epoch": 1.3007594936708862, | |
"grad_norm": 0.1497901452958901, | |
"learning_rate": 6.962336397710819e-06, | |
"loss": 0.5811, | |
"step": 429 | |
}, | |
{ | |
"epoch": 1.3037974683544304, | |
"grad_norm": 0.15080869333540273, | |
"learning_rate": 6.946054274970292e-06, | |
"loss": 0.5838, | |
"step": 430 | |
}, | |
{ | |
"epoch": 1.3068354430379747, | |
"grad_norm": 0.14750628950150269, | |
"learning_rate": 6.9297477949766445e-06, | |
"loss": 0.5655, | |
"step": 431 | |
}, | |
{ | |
"epoch": 1.309873417721519, | |
"grad_norm": 0.1495028601788689, | |
"learning_rate": 6.913417161825449e-06, | |
"loss": 0.557, | |
"step": 432 | |
}, | |
{ | |
"epoch": 1.3129113924050633, | |
"grad_norm": 0.135568113963619, | |
"learning_rate": 6.897062579914587e-06, | |
"loss": 0.5473, | |
"step": 433 | |
}, | |
{ | |
"epoch": 1.3159493670886075, | |
"grad_norm": 0.1409983877447637, | |
"learning_rate": 6.88068425394168e-06, | |
"loss": 0.5412, | |
"step": 434 | |
}, | |
{ | |
"epoch": 1.3189873417721518, | |
"grad_norm": 0.14195644818160238, | |
"learning_rate": 6.864282388901544e-06, | |
"loss": 0.5467, | |
"step": 435 | |
}, | |
{ | |
"epoch": 1.3220253164556963, | |
"grad_norm": 0.14696388488045273, | |
"learning_rate": 6.847857190083611e-06, | |
"loss": 0.5622, | |
"step": 436 | |
}, | |
{ | |
"epoch": 1.3250632911392404, | |
"grad_norm": 0.13234163090116136, | |
"learning_rate": 6.831408863069364e-06, | |
"loss": 0.54, | |
"step": 437 | |
}, | |
{ | |
"epoch": 1.3281012658227849, | |
"grad_norm": 0.1533823957396425, | |
"learning_rate": 6.814937613729766e-06, | |
"loss": 0.5514, | |
"step": 438 | |
}, | |
{ | |
"epoch": 1.3311392405063291, | |
"grad_norm": 0.15116390246065511, | |
"learning_rate": 6.79844364822268e-06, | |
"loss": 0.5368, | |
"step": 439 | |
}, | |
{ | |
"epoch": 1.3341772151898734, | |
"grad_norm": 0.14590384620413216, | |
"learning_rate": 6.781927172990285e-06, | |
"loss": 0.5417, | |
"step": 440 | |
}, | |
{ | |
"epoch": 1.3372151898734177, | |
"grad_norm": 0.1418688933481456, | |
"learning_rate": 6.765388394756504e-06, | |
"loss": 0.5488, | |
"step": 441 | |
}, | |
{ | |
"epoch": 1.340253164556962, | |
"grad_norm": 0.13505338358252206, | |
"learning_rate": 6.748827520524406e-06, | |
"loss": 0.5328, | |
"step": 442 | |
}, | |
{ | |
"epoch": 1.3432911392405062, | |
"grad_norm": 0.14825841588326408, | |
"learning_rate": 6.732244757573619e-06, | |
"loss": 0.5539, | |
"step": 443 | |
}, | |
{ | |
"epoch": 1.3463291139240505, | |
"grad_norm": 0.14176422736895686, | |
"learning_rate": 6.715640313457733e-06, | |
"loss": 0.5239, | |
"step": 444 | |
}, | |
{ | |
"epoch": 1.349367088607595, | |
"grad_norm": 0.15025498831454065, | |
"learning_rate": 6.699014396001707e-06, | |
"loss": 0.5576, | |
"step": 445 | |
}, | |
{ | |
"epoch": 1.3524050632911393, | |
"grad_norm": 0.14849242454803074, | |
"learning_rate": 6.682367213299264e-06, | |
"loss": 0.5901, | |
"step": 446 | |
}, | |
{ | |
"epoch": 1.3554430379746836, | |
"grad_norm": 0.1337453174485716, | |
"learning_rate": 6.665698973710289e-06, | |
"loss": 0.543, | |
"step": 447 | |
}, | |
{ | |
"epoch": 1.3584810126582278, | |
"grad_norm": 0.16216307783943593, | |
"learning_rate": 6.6490098858582176e-06, | |
"loss": 0.6144, | |
"step": 448 | |
}, | |
{ | |
"epoch": 1.3615189873417721, | |
"grad_norm": 0.15076621656691633, | |
"learning_rate": 6.632300158627427e-06, | |
"loss": 0.5305, | |
"step": 449 | |
}, | |
{ | |
"epoch": 1.3645569620253164, | |
"grad_norm": 0.13592067804238195, | |
"learning_rate": 6.615570001160626e-06, | |
"loss": 0.5518, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.3675949367088607, | |
"grad_norm": 0.14079801039067172, | |
"learning_rate": 6.598819622856227e-06, | |
"loss": 0.5732, | |
"step": 451 | |
}, | |
{ | |
"epoch": 1.3706329113924052, | |
"grad_norm": 0.14457330915355807, | |
"learning_rate": 6.582049233365734e-06, | |
"loss": 0.5729, | |
"step": 452 | |
}, | |
{ | |
"epoch": 1.3736708860759494, | |
"grad_norm": 0.15094714189957395, | |
"learning_rate": 6.565259042591112e-06, | |
"loss": 0.5934, | |
"step": 453 | |
}, | |
{ | |
"epoch": 1.3767088607594937, | |
"grad_norm": 0.1466922947639145, | |
"learning_rate": 6.548449260682169e-06, | |
"loss": 0.5623, | |
"step": 454 | |
}, | |
{ | |
"epoch": 1.379746835443038, | |
"grad_norm": 0.1400123201876512, | |
"learning_rate": 6.531620098033919e-06, | |
"loss": 0.5273, | |
"step": 455 | |
}, | |
{ | |
"epoch": 1.3827848101265823, | |
"grad_norm": 0.14133958564834126, | |
"learning_rate": 6.514771765283942e-06, | |
"loss": 0.5396, | |
"step": 456 | |
}, | |
{ | |
"epoch": 1.3858227848101266, | |
"grad_norm": 0.14258031825522197, | |
"learning_rate": 6.497904473309766e-06, | |
"loss": 0.5625, | |
"step": 457 | |
}, | |
{ | |
"epoch": 1.3888607594936708, | |
"grad_norm": 0.14545898625012454, | |
"learning_rate": 6.481018433226212e-06, | |
"loss": 0.5442, | |
"step": 458 | |
}, | |
{ | |
"epoch": 1.3918987341772153, | |
"grad_norm": 0.1424592407904095, | |
"learning_rate": 6.464113856382752e-06, | |
"loss": 0.5796, | |
"step": 459 | |
}, | |
{ | |
"epoch": 1.3949367088607594, | |
"grad_norm": 0.14307497989204254, | |
"learning_rate": 6.447190954360878e-06, | |
"loss": 0.5323, | |
"step": 460 | |
}, | |
{ | |
"epoch": 1.3979746835443039, | |
"grad_norm": 0.1512754411710344, | |
"learning_rate": 6.430249938971438e-06, | |
"loss": 0.561, | |
"step": 461 | |
}, | |
{ | |
"epoch": 1.4010126582278482, | |
"grad_norm": 0.14687045523315304, | |
"learning_rate": 6.41329102225199e-06, | |
"loss": 0.569, | |
"step": 462 | |
}, | |
{ | |
"epoch": 1.4040506329113924, | |
"grad_norm": 0.14183379480187908, | |
"learning_rate": 6.396314416464151e-06, | |
"loss": 0.5528, | |
"step": 463 | |
}, | |
{ | |
"epoch": 1.4070886075949367, | |
"grad_norm": 0.14453430336620318, | |
"learning_rate": 6.37932033409094e-06, | |
"loss": 0.5445, | |
"step": 464 | |
}, | |
{ | |
"epoch": 1.410126582278481, | |
"grad_norm": 0.14993745856428103, | |
"learning_rate": 6.3623089878341146e-06, | |
"loss": 0.554, | |
"step": 465 | |
}, | |
{ | |
"epoch": 1.4131645569620253, | |
"grad_norm": 0.13473778660354907, | |
"learning_rate": 6.345280590611512e-06, | |
"loss": 0.5639, | |
"step": 466 | |
}, | |
{ | |
"epoch": 1.4162025316455695, | |
"grad_norm": 0.1467946635023631, | |
"learning_rate": 6.328235355554382e-06, | |
"loss": 0.5435, | |
"step": 467 | |
}, | |
{ | |
"epoch": 1.419240506329114, | |
"grad_norm": 0.14618662658256054, | |
"learning_rate": 6.311173496004723e-06, | |
"loss": 0.5966, | |
"step": 468 | |
}, | |
{ | |
"epoch": 1.4222784810126583, | |
"grad_norm": 0.13742911095501387, | |
"learning_rate": 6.294095225512604e-06, | |
"loss": 0.5519, | |
"step": 469 | |
}, | |
{ | |
"epoch": 1.4253164556962026, | |
"grad_norm": 0.14077085256802702, | |
"learning_rate": 6.2770007578335044e-06, | |
"loss": 0.5751, | |
"step": 470 | |
}, | |
{ | |
"epoch": 1.4283544303797469, | |
"grad_norm": 0.14400912403186725, | |
"learning_rate": 6.259890306925627e-06, | |
"loss": 0.5635, | |
"step": 471 | |
}, | |
{ | |
"epoch": 1.4313924050632911, | |
"grad_norm": 0.16384884804730898, | |
"learning_rate": 6.2427640869472235e-06, | |
"loss": 0.6224, | |
"step": 472 | |
}, | |
{ | |
"epoch": 1.4344303797468354, | |
"grad_norm": 0.14311619239647858, | |
"learning_rate": 6.225622312253916e-06, | |
"loss": 0.5079, | |
"step": 473 | |
}, | |
{ | |
"epoch": 1.4374683544303797, | |
"grad_norm": 0.14122193279872083, | |
"learning_rate": 6.208465197396013e-06, | |
"loss": 0.5707, | |
"step": 474 | |
}, | |
{ | |
"epoch": 1.4405063291139242, | |
"grad_norm": 0.1364437220835936, | |
"learning_rate": 6.191292957115825e-06, | |
"loss": 0.5401, | |
"step": 475 | |
}, | |
{ | |
"epoch": 1.4435443037974682, | |
"grad_norm": 0.15264933943847964, | |
"learning_rate": 6.174105806344975e-06, | |
"loss": 0.5493, | |
"step": 476 | |
}, | |
{ | |
"epoch": 1.4465822784810127, | |
"grad_norm": 0.16129908194882306, | |
"learning_rate": 6.156903960201709e-06, | |
"loss": 0.6069, | |
"step": 477 | |
}, | |
{ | |
"epoch": 1.449620253164557, | |
"grad_norm": 0.1618394903261924, | |
"learning_rate": 6.1396876339882e-06, | |
"loss": 0.5896, | |
"step": 478 | |
}, | |
{ | |
"epoch": 1.4526582278481013, | |
"grad_norm": 0.14479158934168623, | |
"learning_rate": 6.122457043187863e-06, | |
"loss": 0.6691, | |
"step": 479 | |
}, | |
{ | |
"epoch": 1.4556962025316456, | |
"grad_norm": 0.15826960180874897, | |
"learning_rate": 6.10521240346265e-06, | |
"loss": 0.5565, | |
"step": 480 | |
}, | |
{ | |
"epoch": 1.4587341772151898, | |
"grad_norm": 0.14313720461361779, | |
"learning_rate": 6.087953930650349e-06, | |
"loss": 0.5577, | |
"step": 481 | |
}, | |
{ | |
"epoch": 1.461772151898734, | |
"grad_norm": 0.14961067467424582, | |
"learning_rate": 6.070681840761889e-06, | |
"loss": 0.5598, | |
"step": 482 | |
}, | |
{ | |
"epoch": 1.4648101265822784, | |
"grad_norm": 0.14055235040717554, | |
"learning_rate": 6.053396349978632e-06, | |
"loss": 0.5529, | |
"step": 483 | |
}, | |
{ | |
"epoch": 1.4678481012658229, | |
"grad_norm": 0.13969576574961248, | |
"learning_rate": 6.036097674649672e-06, | |
"loss": 0.5227, | |
"step": 484 | |
}, | |
{ | |
"epoch": 1.4708860759493672, | |
"grad_norm": 0.14004461937572002, | |
"learning_rate": 6.018786031289119e-06, | |
"loss": 0.5596, | |
"step": 485 | |
}, | |
{ | |
"epoch": 1.4739240506329114, | |
"grad_norm": 0.13850250529404448, | |
"learning_rate": 6.001461636573397e-06, | |
"loss": 0.5724, | |
"step": 486 | |
}, | |
{ | |
"epoch": 1.4769620253164557, | |
"grad_norm": 0.1449191119243632, | |
"learning_rate": 5.984124707338528e-06, | |
"loss": 0.5728, | |
"step": 487 | |
}, | |
{ | |
"epoch": 1.48, | |
"grad_norm": 0.14942587196237087, | |
"learning_rate": 5.966775460577418e-06, | |
"loss": 0.5568, | |
"step": 488 | |
}, | |
{ | |
"epoch": 1.4830379746835443, | |
"grad_norm": 0.1440922653563269, | |
"learning_rate": 5.949414113437142e-06, | |
"loss": 0.5308, | |
"step": 489 | |
}, | |
{ | |
"epoch": 1.4860759493670885, | |
"grad_norm": 0.1458257665739855, | |
"learning_rate": 5.932040883216228e-06, | |
"loss": 0.5414, | |
"step": 490 | |
}, | |
{ | |
"epoch": 1.489113924050633, | |
"grad_norm": 0.13936168878725216, | |
"learning_rate": 5.914655987361934e-06, | |
"loss": 0.5689, | |
"step": 491 | |
}, | |
{ | |
"epoch": 1.492151898734177, | |
"grad_norm": 0.14337072371461196, | |
"learning_rate": 5.897259643467528e-06, | |
"loss": 0.5634, | |
"step": 492 | |
}, | |
{ | |
"epoch": 1.4951898734177216, | |
"grad_norm": 0.1487136821718494, | |
"learning_rate": 5.8798520692695605e-06, | |
"loss": 0.5603, | |
"step": 493 | |
}, | |
{ | |
"epoch": 1.4982278481012659, | |
"grad_norm": 0.15621189055198445, | |
"learning_rate": 5.862433482645151e-06, | |
"loss": 0.5822, | |
"step": 494 | |
}, | |
{ | |
"epoch": 1.5012658227848101, | |
"grad_norm": 0.1452785414833775, | |
"learning_rate": 5.8450041016092465e-06, | |
"loss": 0.5745, | |
"step": 495 | |
}, | |
{ | |
"epoch": 1.5043037974683544, | |
"grad_norm": 0.14691813312407356, | |
"learning_rate": 5.8275641443119015e-06, | |
"loss": 0.5651, | |
"step": 496 | |
}, | |
{ | |
"epoch": 1.5073417721518987, | |
"grad_norm": 0.1422962192244908, | |
"learning_rate": 5.810113829035544e-06, | |
"loss": 0.5694, | |
"step": 497 | |
}, | |
{ | |
"epoch": 1.5103797468354432, | |
"grad_norm": 0.13687442215416604, | |
"learning_rate": 5.792653374192245e-06, | |
"loss": 0.5199, | |
"step": 498 | |
}, | |
{ | |
"epoch": 1.5134177215189872, | |
"grad_norm": 0.15559785355845704, | |
"learning_rate": 5.77518299832099e-06, | |
"loss": 0.5667, | |
"step": 499 | |
}, | |
{ | |
"epoch": 1.5164556962025317, | |
"grad_norm": 0.1433021811228919, | |
"learning_rate": 5.757702920084931e-06, | |
"loss": 0.534, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.5194936708860758, | |
"grad_norm": 0.14391604046214512, | |
"learning_rate": 5.740213358268658e-06, | |
"loss": 0.5672, | |
"step": 501 | |
}, | |
{ | |
"epoch": 1.5225316455696203, | |
"grad_norm": 0.13734803599229806, | |
"learning_rate": 5.722714531775463e-06, | |
"loss": 0.5524, | |
"step": 502 | |
}, | |
{ | |
"epoch": 1.5255696202531646, | |
"grad_norm": 0.13897178367227836, | |
"learning_rate": 5.705206659624597e-06, | |
"loss": 0.5246, | |
"step": 503 | |
}, | |
{ | |
"epoch": 1.5286075949367088, | |
"grad_norm": 0.14789109970803385, | |
"learning_rate": 5.687689960948526e-06, | |
"loss": 0.5727, | |
"step": 504 | |
}, | |
{ | |
"epoch": 1.5316455696202531, | |
"grad_norm": 0.15653372973953597, | |
"learning_rate": 5.670164654990189e-06, | |
"loss": 0.5685, | |
"step": 505 | |
}, | |
{ | |
"epoch": 1.5346835443037974, | |
"grad_norm": 0.14160505984214866, | |
"learning_rate": 5.65263096110026e-06, | |
"loss": 0.5333, | |
"step": 506 | |
}, | |
{ | |
"epoch": 1.5377215189873419, | |
"grad_norm": 0.14372485623337306, | |
"learning_rate": 5.635089098734394e-06, | |
"loss": 0.5556, | |
"step": 507 | |
}, | |
{ | |
"epoch": 1.540759493670886, | |
"grad_norm": 0.14182995906457416, | |
"learning_rate": 5.617539287450492e-06, | |
"loss": 0.5597, | |
"step": 508 | |
}, | |
{ | |
"epoch": 1.5437974683544304, | |
"grad_norm": 0.14039262485132767, | |
"learning_rate": 5.599981746905935e-06, | |
"loss": 0.5309, | |
"step": 509 | |
}, | |
{ | |
"epoch": 1.5468354430379747, | |
"grad_norm": 0.14032897325734403, | |
"learning_rate": 5.582416696854853e-06, | |
"loss": 0.5665, | |
"step": 510 | |
}, | |
{ | |
"epoch": 1.549873417721519, | |
"grad_norm": 0.1434217675146914, | |
"learning_rate": 5.564844357145365e-06, | |
"loss": 0.5555, | |
"step": 511 | |
}, | |
{ | |
"epoch": 1.5529113924050633, | |
"grad_norm": 0.14771494262754153, | |
"learning_rate": 5.5472649477168264e-06, | |
"loss": 0.5812, | |
"step": 512 | |
}, | |
{ | |
"epoch": 1.5559493670886075, | |
"grad_norm": 0.14694457454874488, | |
"learning_rate": 5.529678688597081e-06, | |
"loss": 0.5556, | |
"step": 513 | |
}, | |
{ | |
"epoch": 1.558987341772152, | |
"grad_norm": 0.14005032417283586, | |
"learning_rate": 5.512085799899705e-06, | |
"loss": 0.5488, | |
"step": 514 | |
}, | |
{ | |
"epoch": 1.562025316455696, | |
"grad_norm": 0.1495348119581355, | |
"learning_rate": 5.49448650182125e-06, | |
"loss": 0.5535, | |
"step": 515 | |
}, | |
{ | |
"epoch": 1.5650632911392406, | |
"grad_norm": 0.1482663531985799, | |
"learning_rate": 5.476881014638491e-06, | |
"loss": 0.555, | |
"step": 516 | |
}, | |
{ | |
"epoch": 1.5681012658227849, | |
"grad_norm": 0.13579619142220437, | |
"learning_rate": 5.459269558705667e-06, | |
"loss": 0.5095, | |
"step": 517 | |
}, | |
{ | |
"epoch": 1.5711392405063291, | |
"grad_norm": 0.1349516715654807, | |
"learning_rate": 5.441652354451721e-06, | |
"loss": 0.551, | |
"step": 518 | |
}, | |
{ | |
"epoch": 1.5741772151898734, | |
"grad_norm": 0.14250308468757655, | |
"learning_rate": 5.4240296223775465e-06, | |
"loss": 0.5771, | |
"step": 519 | |
}, | |
{ | |
"epoch": 1.5772151898734177, | |
"grad_norm": 0.14898385307777995, | |
"learning_rate": 5.406401583053222e-06, | |
"loss": 0.5495, | |
"step": 520 | |
}, | |
{ | |
"epoch": 1.5802531645569622, | |
"grad_norm": 0.1526056074250204, | |
"learning_rate": 5.388768457115254e-06, | |
"loss": 0.551, | |
"step": 521 | |
}, | |
{ | |
"epoch": 1.5832911392405062, | |
"grad_norm": 0.13895662082873736, | |
"learning_rate": 5.371130465263813e-06, | |
"loss": 0.5661, | |
"step": 522 | |
}, | |
{ | |
"epoch": 1.5863291139240507, | |
"grad_norm": 0.13655961057510518, | |
"learning_rate": 5.353487828259973e-06, | |
"loss": 0.5387, | |
"step": 523 | |
}, | |
{ | |
"epoch": 1.5893670886075948, | |
"grad_norm": 0.13385305077105514, | |
"learning_rate": 5.33584076692295e-06, | |
"loss": 0.4979, | |
"step": 524 | |
}, | |
{ | |
"epoch": 1.5924050632911393, | |
"grad_norm": 0.14461178515510015, | |
"learning_rate": 5.318189502127332e-06, | |
"loss": 0.5621, | |
"step": 525 | |
}, | |
{ | |
"epoch": 1.5954430379746836, | |
"grad_norm": 0.13063832970849976, | |
"learning_rate": 5.300534254800321e-06, | |
"loss": 0.5512, | |
"step": 526 | |
}, | |
{ | |
"epoch": 1.5984810126582278, | |
"grad_norm": 0.1339988600058471, | |
"learning_rate": 5.282875245918963e-06, | |
"loss": 0.5628, | |
"step": 527 | |
}, | |
{ | |
"epoch": 1.6015189873417721, | |
"grad_norm": 0.14952996776146643, | |
"learning_rate": 5.265212696507387e-06, | |
"loss": 0.558, | |
"step": 528 | |
}, | |
{ | |
"epoch": 1.6045569620253164, | |
"grad_norm": 0.14584127089296017, | |
"learning_rate": 5.247546827634035e-06, | |
"loss": 0.5673, | |
"step": 529 | |
}, | |
{ | |
"epoch": 1.6075949367088609, | |
"grad_norm": 0.14976398397944954, | |
"learning_rate": 5.229877860408899e-06, | |
"loss": 0.527, | |
"step": 530 | |
}, | |
{ | |
"epoch": 1.610632911392405, | |
"grad_norm": 0.13993952677265098, | |
"learning_rate": 5.212206015980742e-06, | |
"loss": 0.5669, | |
"step": 531 | |
}, | |
{ | |
"epoch": 1.6136708860759494, | |
"grad_norm": 0.14125637957821963, | |
"learning_rate": 5.194531515534349e-06, | |
"loss": 0.5438, | |
"step": 532 | |
}, | |
{ | |
"epoch": 1.6167088607594937, | |
"grad_norm": 0.1441000126670552, | |
"learning_rate": 5.176854580287744e-06, | |
"loss": 0.5636, | |
"step": 533 | |
}, | |
{ | |
"epoch": 1.619746835443038, | |
"grad_norm": 0.1465477733257787, | |
"learning_rate": 5.159175431489424e-06, | |
"loss": 0.5622, | |
"step": 534 | |
}, | |
{ | |
"epoch": 1.6227848101265823, | |
"grad_norm": 0.1452280680581016, | |
"learning_rate": 5.141494290415592e-06, | |
"loss": 0.5752, | |
"step": 535 | |
}, | |
{ | |
"epoch": 1.6258227848101265, | |
"grad_norm": 0.1490460884264919, | |
"learning_rate": 5.123811378367387e-06, | |
"loss": 0.5956, | |
"step": 536 | |
}, | |
{ | |
"epoch": 1.628860759493671, | |
"grad_norm": 0.14509109070781032, | |
"learning_rate": 5.106126916668118e-06, | |
"loss": 0.5698, | |
"step": 537 | |
}, | |
{ | |
"epoch": 1.631898734177215, | |
"grad_norm": 0.13988927520941075, | |
"learning_rate": 5.088441126660484e-06, | |
"loss": 0.5576, | |
"step": 538 | |
}, | |
{ | |
"epoch": 1.6349367088607596, | |
"grad_norm": 0.15090631504584412, | |
"learning_rate": 5.070754229703811e-06, | |
"loss": 0.5859, | |
"step": 539 | |
}, | |
{ | |
"epoch": 1.6379746835443036, | |
"grad_norm": 0.14046022933676117, | |
"learning_rate": 5.053066447171282e-06, | |
"loss": 0.5364, | |
"step": 540 | |
}, | |
{ | |
"epoch": 1.6410126582278481, | |
"grad_norm": 0.15029782080460274, | |
"learning_rate": 5.0353780004471605e-06, | |
"loss": 0.5898, | |
"step": 541 | |
}, | |
{ | |
"epoch": 1.6440506329113924, | |
"grad_norm": 0.14468053902299152, | |
"learning_rate": 5.0176891109240265e-06, | |
"loss": 0.5559, | |
"step": 542 | |
}, | |
{ | |
"epoch": 1.6470886075949367, | |
"grad_norm": 0.13603753933538643, | |
"learning_rate": 5e-06, | |
"loss": 0.5641, | |
"step": 543 | |
}, | |
{ | |
"epoch": 1.650126582278481, | |
"grad_norm": 0.1312182710525947, | |
"learning_rate": 4.9823108890759735e-06, | |
"loss": 0.5781, | |
"step": 544 | |
}, | |
{ | |
"epoch": 1.6531645569620252, | |
"grad_norm": 0.13893741157551903, | |
"learning_rate": 4.964621999552841e-06, | |
"loss": 0.5382, | |
"step": 545 | |
}, | |
{ | |
"epoch": 1.6562025316455697, | |
"grad_norm": 0.1272389096785783, | |
"learning_rate": 4.94693355282872e-06, | |
"loss": 0.5537, | |
"step": 546 | |
}, | |
{ | |
"epoch": 1.6592405063291138, | |
"grad_norm": 0.1484697926598053, | |
"learning_rate": 4.929245770296191e-06, | |
"loss": 0.5835, | |
"step": 547 | |
}, | |
{ | |
"epoch": 1.6622784810126583, | |
"grad_norm": 0.13218383966226127, | |
"learning_rate": 4.911558873339517e-06, | |
"loss": 0.5722, | |
"step": 548 | |
}, | |
{ | |
"epoch": 1.6653164556962026, | |
"grad_norm": 0.13912939261057347, | |
"learning_rate": 4.8938730833318825e-06, | |
"loss": 0.5168, | |
"step": 549 | |
}, | |
{ | |
"epoch": 1.6683544303797468, | |
"grad_norm": 0.14285050385792192, | |
"learning_rate": 4.876188621632614e-06, | |
"loss": 0.5195, | |
"step": 550 | |
}, | |
{ | |
"epoch": 1.6713924050632911, | |
"grad_norm": 0.14335550085150423, | |
"learning_rate": 4.85850570958441e-06, | |
"loss": 0.5643, | |
"step": 551 | |
}, | |
{ | |
"epoch": 1.6744303797468354, | |
"grad_norm": 0.14417656814244476, | |
"learning_rate": 4.840824568510579e-06, | |
"loss": 0.5453, | |
"step": 552 | |
}, | |
{ | |
"epoch": 1.67746835443038, | |
"grad_norm": 0.14091818725429653, | |
"learning_rate": 4.8231454197122575e-06, | |
"loss": 0.5499, | |
"step": 553 | |
}, | |
{ | |
"epoch": 1.680506329113924, | |
"grad_norm": 0.13630949660068137, | |
"learning_rate": 4.805468484465651e-06, | |
"loss": 0.5529, | |
"step": 554 | |
}, | |
{ | |
"epoch": 1.6835443037974684, | |
"grad_norm": 0.13452973158282802, | |
"learning_rate": 4.78779398401926e-06, | |
"loss": 0.5073, | |
"step": 555 | |
}, | |
{ | |
"epoch": 1.6865822784810125, | |
"grad_norm": 0.13321837957700222, | |
"learning_rate": 4.770122139591103e-06, | |
"loss": 0.5437, | |
"step": 556 | |
}, | |
{ | |
"epoch": 1.689620253164557, | |
"grad_norm": 0.14526224786375885, | |
"learning_rate": 4.752453172365966e-06, | |
"loss": 0.6003, | |
"step": 557 | |
}, | |
{ | |
"epoch": 1.6926582278481013, | |
"grad_norm": 0.14880695833453583, | |
"learning_rate": 4.734787303492615e-06, | |
"loss": 0.5415, | |
"step": 558 | |
}, | |
{ | |
"epoch": 1.6956962025316455, | |
"grad_norm": 0.1354671471518635, | |
"learning_rate": 4.717124754081038e-06, | |
"loss": 0.5415, | |
"step": 559 | |
}, | |
{ | |
"epoch": 1.6987341772151898, | |
"grad_norm": 0.13901671698042392, | |
"learning_rate": 4.6994657451996815e-06, | |
"loss": 0.5451, | |
"step": 560 | |
}, | |
{ | |
"epoch": 1.701772151898734, | |
"grad_norm": 0.14366775666503703, | |
"learning_rate": 4.6818104978726685e-06, | |
"loss": 0.5399, | |
"step": 561 | |
}, | |
{ | |
"epoch": 1.7048101265822786, | |
"grad_norm": 0.137433696428405, | |
"learning_rate": 4.664159233077051e-06, | |
"loss": 0.5274, | |
"step": 562 | |
}, | |
{ | |
"epoch": 1.7078481012658226, | |
"grad_norm": 0.1486497515378418, | |
"learning_rate": 4.646512171740028e-06, | |
"loss": 0.5506, | |
"step": 563 | |
}, | |
{ | |
"epoch": 1.7108860759493671, | |
"grad_norm": 0.14299327049032062, | |
"learning_rate": 4.628869534736187e-06, | |
"loss": 0.5372, | |
"step": 564 | |
}, | |
{ | |
"epoch": 1.7139240506329114, | |
"grad_norm": 0.1432222649388412, | |
"learning_rate": 4.611231542884747e-06, | |
"loss": 0.5628, | |
"step": 565 | |
}, | |
{ | |
"epoch": 1.7169620253164557, | |
"grad_norm": 0.14210217228609906, | |
"learning_rate": 4.593598416946779e-06, | |
"loss": 0.554, | |
"step": 566 | |
}, | |
{ | |
"epoch": 1.72, | |
"grad_norm": 0.14838285283850924, | |
"learning_rate": 4.575970377622456e-06, | |
"loss": 0.5561, | |
"step": 567 | |
}, | |
{ | |
"epoch": 1.7230379746835442, | |
"grad_norm": 0.14626060214205217, | |
"learning_rate": 4.55834764554828e-06, | |
"loss": 0.5573, | |
"step": 568 | |
}, | |
{ | |
"epoch": 1.7260759493670887, | |
"grad_norm": 0.13822402701812242, | |
"learning_rate": 4.540730441294334e-06, | |
"loss": 0.5474, | |
"step": 569 | |
}, | |
{ | |
"epoch": 1.7291139240506328, | |
"grad_norm": 0.13990125190678557, | |
"learning_rate": 4.523118985361511e-06, | |
"loss": 0.5448, | |
"step": 570 | |
}, | |
{ | |
"epoch": 1.7321518987341773, | |
"grad_norm": 0.14647795581607545, | |
"learning_rate": 4.505513498178752e-06, | |
"loss": 0.6005, | |
"step": 571 | |
}, | |
{ | |
"epoch": 1.7351898734177216, | |
"grad_norm": 0.14114549792807873, | |
"learning_rate": 4.487914200100296e-06, | |
"loss": 0.5583, | |
"step": 572 | |
}, | |
{ | |
"epoch": 1.7382278481012658, | |
"grad_norm": 0.14147578201123018, | |
"learning_rate": 4.47032131140292e-06, | |
"loss": 0.5493, | |
"step": 573 | |
}, | |
{ | |
"epoch": 1.7412658227848101, | |
"grad_norm": 0.1461456067023322, | |
"learning_rate": 4.452735052283175e-06, | |
"loss": 0.5728, | |
"step": 574 | |
}, | |
{ | |
"epoch": 1.7443037974683544, | |
"grad_norm": 0.14701108635229268, | |
"learning_rate": 4.435155642854637e-06, | |
"loss": 0.5389, | |
"step": 575 | |
}, | |
{ | |
"epoch": 1.747341772151899, | |
"grad_norm": 0.14844626853283752, | |
"learning_rate": 4.4175833031451475e-06, | |
"loss": 0.5547, | |
"step": 576 | |
}, | |
{ | |
"epoch": 1.750379746835443, | |
"grad_norm": 0.14268537466486175, | |
"learning_rate": 4.400018253094065e-06, | |
"loss": 0.5064, | |
"step": 577 | |
}, | |
{ | |
"epoch": 1.7534177215189874, | |
"grad_norm": 0.1343337939680092, | |
"learning_rate": 4.38246071254951e-06, | |
"loss": 0.5081, | |
"step": 578 | |
}, | |
{ | |
"epoch": 1.7564556962025315, | |
"grad_norm": 0.14358103354354712, | |
"learning_rate": 4.364910901265607e-06, | |
"loss": 0.5263, | |
"step": 579 | |
}, | |
{ | |
"epoch": 1.759493670886076, | |
"grad_norm": 0.14075296925562727, | |
"learning_rate": 4.347369038899744e-06, | |
"loss": 0.5823, | |
"step": 580 | |
}, | |
{ | |
"epoch": 1.7625316455696203, | |
"grad_norm": 0.15086719736162787, | |
"learning_rate": 4.329835345009813e-06, | |
"loss": 0.5728, | |
"step": 581 | |
}, | |
{ | |
"epoch": 1.7655696202531646, | |
"grad_norm": 0.1482949289358636, | |
"learning_rate": 4.312310039051476e-06, | |
"loss": 0.5673, | |
"step": 582 | |
}, | |
{ | |
"epoch": 1.7686075949367088, | |
"grad_norm": 0.14907940397518046, | |
"learning_rate": 4.294793340375405e-06, | |
"loss": 0.5605, | |
"step": 583 | |
}, | |
{ | |
"epoch": 1.771645569620253, | |
"grad_norm": 0.13784144891569491, | |
"learning_rate": 4.2772854682245365e-06, | |
"loss": 0.5087, | |
"step": 584 | |
}, | |
{ | |
"epoch": 1.7746835443037976, | |
"grad_norm": 0.1286612878135622, | |
"learning_rate": 4.259786641731344e-06, | |
"loss": 0.4975, | |
"step": 585 | |
}, | |
{ | |
"epoch": 1.7777215189873417, | |
"grad_norm": 0.13947922425755688, | |
"learning_rate": 4.242297079915071e-06, | |
"loss": 0.5543, | |
"step": 586 | |
}, | |
{ | |
"epoch": 1.7807594936708862, | |
"grad_norm": 0.13628866858565816, | |
"learning_rate": 4.224817001679011e-06, | |
"loss": 0.5616, | |
"step": 587 | |
}, | |
{ | |
"epoch": 1.7837974683544304, | |
"grad_norm": 0.1449043124811266, | |
"learning_rate": 4.2073466258077564e-06, | |
"loss": 0.5507, | |
"step": 588 | |
}, | |
{ | |
"epoch": 1.7868354430379747, | |
"grad_norm": 0.15523180294835678, | |
"learning_rate": 4.189886170964458e-06, | |
"loss": 0.5869, | |
"step": 589 | |
}, | |
{ | |
"epoch": 1.789873417721519, | |
"grad_norm": 0.135928325073954, | |
"learning_rate": 4.172435855688101e-06, | |
"loss": 0.5294, | |
"step": 590 | |
}, | |
{ | |
"epoch": 1.7929113924050633, | |
"grad_norm": 0.13255462114158595, | |
"learning_rate": 4.154995898390756e-06, | |
"loss": 0.5597, | |
"step": 591 | |
}, | |
{ | |
"epoch": 1.7959493670886078, | |
"grad_norm": 0.14201226106350706, | |
"learning_rate": 4.13756651735485e-06, | |
"loss": 0.5635, | |
"step": 592 | |
}, | |
{ | |
"epoch": 1.7989873417721518, | |
"grad_norm": 0.13625044615447665, | |
"learning_rate": 4.12014793073044e-06, | |
"loss": 0.5496, | |
"step": 593 | |
}, | |
{ | |
"epoch": 1.8020253164556963, | |
"grad_norm": 0.13575015781553457, | |
"learning_rate": 4.102740356532474e-06, | |
"loss": 0.5426, | |
"step": 594 | |
}, | |
{ | |
"epoch": 1.8050632911392404, | |
"grad_norm": 0.14705140164593614, | |
"learning_rate": 4.085344012638067e-06, | |
"loss": 0.537, | |
"step": 595 | |
}, | |
{ | |
"epoch": 1.8081012658227849, | |
"grad_norm": 0.1409549906509916, | |
"learning_rate": 4.0679591167837725e-06, | |
"loss": 0.5631, | |
"step": 596 | |
}, | |
{ | |
"epoch": 1.8111392405063291, | |
"grad_norm": 0.14079198651273092, | |
"learning_rate": 4.050585886562858e-06, | |
"loss": 0.5743, | |
"step": 597 | |
}, | |
{ | |
"epoch": 1.8141772151898734, | |
"grad_norm": 0.1401708128735937, | |
"learning_rate": 4.033224539422584e-06, | |
"loss": 0.5572, | |
"step": 598 | |
}, | |
{ | |
"epoch": 1.8172151898734177, | |
"grad_norm": 0.14493156119176476, | |
"learning_rate": 4.015875292661474e-06, | |
"loss": 0.5403, | |
"step": 599 | |
}, | |
{ | |
"epoch": 1.820253164556962, | |
"grad_norm": 0.14715986914181306, | |
"learning_rate": 3.998538363426605e-06, | |
"loss": 0.548, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.8232911392405065, | |
"grad_norm": 0.13644438323976482, | |
"learning_rate": 3.981213968710882e-06, | |
"loss": 0.5266, | |
"step": 601 | |
}, | |
{ | |
"epoch": 1.8263291139240505, | |
"grad_norm": 0.1398253759857167, | |
"learning_rate": 3.96390232535033e-06, | |
"loss": 0.5456, | |
"step": 602 | |
}, | |
{ | |
"epoch": 1.829367088607595, | |
"grad_norm": 0.14760211922260238, | |
"learning_rate": 3.94660365002137e-06, | |
"loss": 0.5765, | |
"step": 603 | |
}, | |
{ | |
"epoch": 1.8324050632911393, | |
"grad_norm": 0.1418957483459648, | |
"learning_rate": 3.929318159238113e-06, | |
"loss": 0.542, | |
"step": 604 | |
}, | |
{ | |
"epoch": 1.8354430379746836, | |
"grad_norm": 0.14314371065917036, | |
"learning_rate": 3.912046069349654e-06, | |
"loss": 0.5754, | |
"step": 605 | |
}, | |
{ | |
"epoch": 1.8384810126582278, | |
"grad_norm": 0.13916767299258234, | |
"learning_rate": 3.894787596537352e-06, | |
"loss": 0.5126, | |
"step": 606 | |
}, | |
{ | |
"epoch": 1.841518987341772, | |
"grad_norm": 0.1396512977982268, | |
"learning_rate": 3.877542956812137e-06, | |
"loss": 0.5511, | |
"step": 607 | |
}, | |
{ | |
"epoch": 1.8445569620253166, | |
"grad_norm": 0.14749351134383998, | |
"learning_rate": 3.860312366011802e-06, | |
"loss": 0.5708, | |
"step": 608 | |
}, | |
{ | |
"epoch": 1.8475949367088607, | |
"grad_norm": 0.1411435574639553, | |
"learning_rate": 3.843096039798293e-06, | |
"loss": 0.5753, | |
"step": 609 | |
}, | |
{ | |
"epoch": 1.8506329113924052, | |
"grad_norm": 0.1428960506415565, | |
"learning_rate": 3.825894193655026e-06, | |
"loss": 0.5805, | |
"step": 610 | |
}, | |
{ | |
"epoch": 1.8536708860759492, | |
"grad_norm": 0.14096006506759617, | |
"learning_rate": 3.808707042884176e-06, | |
"loss": 0.5566, | |
"step": 611 | |
}, | |
{ | |
"epoch": 1.8567088607594937, | |
"grad_norm": 0.14385169997352637, | |
"learning_rate": 3.7915348026039877e-06, | |
"loss": 0.5354, | |
"step": 612 | |
}, | |
{ | |
"epoch": 1.859746835443038, | |
"grad_norm": 0.14384954479264117, | |
"learning_rate": 3.7743776877460864e-06, | |
"loss": 0.5761, | |
"step": 613 | |
}, | |
{ | |
"epoch": 1.8627848101265823, | |
"grad_norm": 0.14325377106229495, | |
"learning_rate": 3.757235913052778e-06, | |
"loss": 0.5609, | |
"step": 614 | |
}, | |
{ | |
"epoch": 1.8658227848101265, | |
"grad_norm": 0.14392767974295542, | |
"learning_rate": 3.7401096930743753e-06, | |
"loss": 0.5351, | |
"step": 615 | |
}, | |
{ | |
"epoch": 1.8688607594936708, | |
"grad_norm": 0.13860263164526795, | |
"learning_rate": 3.722999242166497e-06, | |
"loss": 0.5165, | |
"step": 616 | |
}, | |
{ | |
"epoch": 1.8718987341772153, | |
"grad_norm": 0.14552207830586328, | |
"learning_rate": 3.705904774487396e-06, | |
"loss": 0.5412, | |
"step": 617 | |
}, | |
{ | |
"epoch": 1.8749367088607594, | |
"grad_norm": 0.13798290117274228, | |
"learning_rate": 3.6888265039952796e-06, | |
"loss": 0.5509, | |
"step": 618 | |
}, | |
{ | |
"epoch": 1.8779746835443039, | |
"grad_norm": 0.1492939489068322, | |
"learning_rate": 3.6717646444456196e-06, | |
"loss": 0.5634, | |
"step": 619 | |
}, | |
{ | |
"epoch": 1.8810126582278481, | |
"grad_norm": 0.14346220201822088, | |
"learning_rate": 3.6547194093884907e-06, | |
"loss": 0.5306, | |
"step": 620 | |
}, | |
{ | |
"epoch": 1.8840506329113924, | |
"grad_norm": 0.1372529739765176, | |
"learning_rate": 3.6376910121658867e-06, | |
"loss": 0.5237, | |
"step": 621 | |
}, | |
{ | |
"epoch": 1.8870886075949367, | |
"grad_norm": 0.1476780967270864, | |
"learning_rate": 3.6206796659090605e-06, | |
"loss": 0.5516, | |
"step": 622 | |
}, | |
{ | |
"epoch": 1.890126582278481, | |
"grad_norm": 0.13891558687222758, | |
"learning_rate": 3.60368558353585e-06, | |
"loss": 0.5453, | |
"step": 623 | |
}, | |
{ | |
"epoch": 1.8931645569620255, | |
"grad_norm": 0.14645395153335597, | |
"learning_rate": 3.5867089777480124e-06, | |
"loss": 0.5599, | |
"step": 624 | |
}, | |
{ | |
"epoch": 1.8962025316455695, | |
"grad_norm": 0.14582671520351428, | |
"learning_rate": 3.569750061028565e-06, | |
"loss": 0.5685, | |
"step": 625 | |
}, | |
{ | |
"epoch": 1.899240506329114, | |
"grad_norm": 0.1427846190640779, | |
"learning_rate": 3.552809045639123e-06, | |
"loss": 0.5484, | |
"step": 626 | |
}, | |
{ | |
"epoch": 1.902278481012658, | |
"grad_norm": 0.13630123350371642, | |
"learning_rate": 3.5358861436172487e-06, | |
"loss": 0.5207, | |
"step": 627 | |
}, | |
{ | |
"epoch": 1.9053164556962026, | |
"grad_norm": 0.14311504681101872, | |
"learning_rate": 3.5189815667737916e-06, | |
"loss": 0.5644, | |
"step": 628 | |
}, | |
{ | |
"epoch": 1.9083544303797468, | |
"grad_norm": 0.13509459795763357, | |
"learning_rate": 3.5020955266902344e-06, | |
"loss": 0.5471, | |
"step": 629 | |
}, | |
{ | |
"epoch": 1.9113924050632911, | |
"grad_norm": 0.1450618320538534, | |
"learning_rate": 3.485228234716058e-06, | |
"loss": 0.5984, | |
"step": 630 | |
}, | |
{ | |
"epoch": 1.9144303797468356, | |
"grad_norm": 0.13825705276927988, | |
"learning_rate": 3.4683799019660834e-06, | |
"loss": 0.5328, | |
"step": 631 | |
}, | |
{ | |
"epoch": 1.9174683544303797, | |
"grad_norm": 0.14039425954035306, | |
"learning_rate": 3.4515507393178316e-06, | |
"loss": 0.5688, | |
"step": 632 | |
}, | |
{ | |
"epoch": 1.9205063291139242, | |
"grad_norm": 0.13288585149565324, | |
"learning_rate": 3.4347409574088896e-06, | |
"loss": 0.5796, | |
"step": 633 | |
}, | |
{ | |
"epoch": 1.9235443037974682, | |
"grad_norm": 0.148980738834977, | |
"learning_rate": 3.417950766634268e-06, | |
"loss": 0.5269, | |
"step": 634 | |
}, | |
{ | |
"epoch": 1.9265822784810127, | |
"grad_norm": 0.14609255957393458, | |
"learning_rate": 3.401180377143774e-06, | |
"loss": 0.5677, | |
"step": 635 | |
}, | |
{ | |
"epoch": 1.929620253164557, | |
"grad_norm": 0.1359215016501865, | |
"learning_rate": 3.3844299988393757e-06, | |
"loss": 0.5377, | |
"step": 636 | |
}, | |
{ | |
"epoch": 1.9326582278481013, | |
"grad_norm": 0.13812680406064673, | |
"learning_rate": 3.3676998413725726e-06, | |
"loss": 0.5523, | |
"step": 637 | |
}, | |
{ | |
"epoch": 1.9356962025316455, | |
"grad_norm": 0.1327702988731366, | |
"learning_rate": 3.3509901141417845e-06, | |
"loss": 0.5587, | |
"step": 638 | |
}, | |
{ | |
"epoch": 1.9387341772151898, | |
"grad_norm": 0.14199245720615827, | |
"learning_rate": 3.3343010262897125e-06, | |
"loss": 0.5467, | |
"step": 639 | |
}, | |
{ | |
"epoch": 1.9417721518987343, | |
"grad_norm": 0.14774749378135763, | |
"learning_rate": 3.3176327867007376e-06, | |
"loss": 0.5535, | |
"step": 640 | |
}, | |
{ | |
"epoch": 1.9448101265822784, | |
"grad_norm": 0.15288003903087136, | |
"learning_rate": 3.300985603998296e-06, | |
"loss": 0.5837, | |
"step": 641 | |
}, | |
{ | |
"epoch": 1.9478481012658229, | |
"grad_norm": 0.13722020959482253, | |
"learning_rate": 3.2843596865422687e-06, | |
"loss": 0.5248, | |
"step": 642 | |
}, | |
{ | |
"epoch": 1.9508860759493671, | |
"grad_norm": 0.14417467655168203, | |
"learning_rate": 3.2677552424263836e-06, | |
"loss": 0.5339, | |
"step": 643 | |
}, | |
{ | |
"epoch": 1.9539240506329114, | |
"grad_norm": 0.14278379325376636, | |
"learning_rate": 3.251172479475595e-06, | |
"loss": 0.5584, | |
"step": 644 | |
}, | |
{ | |
"epoch": 1.9569620253164557, | |
"grad_norm": 0.14142202059314476, | |
"learning_rate": 3.234611605243496e-06, | |
"loss": 0.5383, | |
"step": 645 | |
}, | |
{ | |
"epoch": 1.96, | |
"grad_norm": 0.14465300329222652, | |
"learning_rate": 3.2180728270097163e-06, | |
"loss": 0.5618, | |
"step": 646 | |
}, | |
{ | |
"epoch": 1.9630379746835445, | |
"grad_norm": 0.1464687761772569, | |
"learning_rate": 3.2015563517773214e-06, | |
"loss": 0.5597, | |
"step": 647 | |
}, | |
{ | |
"epoch": 1.9660759493670885, | |
"grad_norm": 0.1353865286420906, | |
"learning_rate": 3.1850623862702344e-06, | |
"loss": 0.5133, | |
"step": 648 | |
}, | |
{ | |
"epoch": 1.969113924050633, | |
"grad_norm": 0.1454261849576213, | |
"learning_rate": 3.1685911369306364e-06, | |
"loss": 0.5922, | |
"step": 649 | |
}, | |
{ | |
"epoch": 1.972151898734177, | |
"grad_norm": 0.14700261448719304, | |
"learning_rate": 3.1521428099163897e-06, | |
"loss": 0.5819, | |
"step": 650 | |
}, | |
{ | |
"epoch": 1.9751898734177216, | |
"grad_norm": 0.14221751125407076, | |
"learning_rate": 3.1357176110984578e-06, | |
"loss": 0.5379, | |
"step": 651 | |
}, | |
{ | |
"epoch": 1.9782278481012658, | |
"grad_norm": 0.14822785091064794, | |
"learning_rate": 3.1193157460583217e-06, | |
"loss": 0.582, | |
"step": 652 | |
}, | |
{ | |
"epoch": 1.9812658227848101, | |
"grad_norm": 0.1412575132749595, | |
"learning_rate": 3.1029374200854167e-06, | |
"loss": 0.5356, | |
"step": 653 | |
}, | |
{ | |
"epoch": 1.9843037974683544, | |
"grad_norm": 0.13340040841815526, | |
"learning_rate": 3.0865828381745515e-06, | |
"loss": 0.529, | |
"step": 654 | |
}, | |
{ | |
"epoch": 1.9873417721518987, | |
"grad_norm": 0.14471668869668178, | |
"learning_rate": 3.070252205023356e-06, | |
"loss": 0.5573, | |
"step": 655 | |
}, | |
{ | |
"epoch": 1.9903797468354432, | |
"grad_norm": 0.1343280166917402, | |
"learning_rate": 3.0539457250297095e-06, | |
"loss": 0.517, | |
"step": 656 | |
}, | |
{ | |
"epoch": 1.9934177215189872, | |
"grad_norm": 0.14472286407987167, | |
"learning_rate": 3.0376636022891813e-06, | |
"loss": 0.5618, | |
"step": 657 | |
}, | |
{ | |
"epoch": 1.9964556962025317, | |
"grad_norm": 0.14382602624595334, | |
"learning_rate": 3.0214060405924863e-06, | |
"loss": 0.5265, | |
"step": 658 | |
}, | |
{ | |
"epoch": 1.999493670886076, | |
"grad_norm": 0.14660808079965273, | |
"learning_rate": 3.0051732434229185e-06, | |
"loss": 0.5296, | |
"step": 659 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 0.14660808079965273, | |
"learning_rate": 2.988965413953825e-06, | |
"loss": 0.5292, | |
"step": 660 | |
}, | |
{ | |
"epoch": 2.0030379746835445, | |
"grad_norm": 0.3574667987342137, | |
"learning_rate": 2.972782755046043e-06, | |
"loss": 0.404, | |
"step": 661 | |
}, | |
{ | |
"epoch": 2.0060759493670886, | |
"grad_norm": 0.1386853846579846, | |
"learning_rate": 2.956625469245372e-06, | |
"loss": 0.3793, | |
"step": 662 | |
}, | |
{ | |
"epoch": 2.009113924050633, | |
"grad_norm": 0.14162490732848218, | |
"learning_rate": 2.9404937587800374e-06, | |
"loss": 0.3873, | |
"step": 663 | |
}, | |
{ | |
"epoch": 2.012151898734177, | |
"grad_norm": 0.14617377605368997, | |
"learning_rate": 2.924387825558155e-06, | |
"loss": 0.369, | |
"step": 664 | |
}, | |
{ | |
"epoch": 2.0151898734177216, | |
"grad_norm": 0.1366181219034659, | |
"learning_rate": 2.90830787116521e-06, | |
"loss": 0.3758, | |
"step": 665 | |
}, | |
{ | |
"epoch": 2.0182278481012657, | |
"grad_norm": 0.14211468580081227, | |
"learning_rate": 2.892254096861529e-06, | |
"loss": 0.3897, | |
"step": 666 | |
}, | |
{ | |
"epoch": 2.02126582278481, | |
"grad_norm": 0.14141985614727673, | |
"learning_rate": 2.8762267035797607e-06, | |
"loss": 0.3811, | |
"step": 667 | |
}, | |
{ | |
"epoch": 2.0243037974683546, | |
"grad_norm": 0.13901272165728304, | |
"learning_rate": 2.8602258919223703e-06, | |
"loss": 0.4199, | |
"step": 668 | |
}, | |
{ | |
"epoch": 2.0273417721518987, | |
"grad_norm": 0.1666829934946773, | |
"learning_rate": 2.8442518621591085e-06, | |
"loss": 0.3738, | |
"step": 669 | |
}, | |
{ | |
"epoch": 2.030379746835443, | |
"grad_norm": 0.15163462826568602, | |
"learning_rate": 2.828304814224532e-06, | |
"loss": 0.4043, | |
"step": 670 | |
}, | |
{ | |
"epoch": 2.0334177215189873, | |
"grad_norm": 0.1498595150029576, | |
"learning_rate": 2.8123849477154808e-06, | |
"loss": 0.3674, | |
"step": 671 | |
}, | |
{ | |
"epoch": 2.0364556962025318, | |
"grad_norm": 0.16298861392155772, | |
"learning_rate": 2.796492461888578e-06, | |
"loss": 0.3606, | |
"step": 672 | |
}, | |
{ | |
"epoch": 2.039493670886076, | |
"grad_norm": 0.1651702105333408, | |
"learning_rate": 2.7806275556577624e-06, | |
"loss": 0.3512, | |
"step": 673 | |
}, | |
{ | |
"epoch": 2.0425316455696203, | |
"grad_norm": 0.15287039406201183, | |
"learning_rate": 2.764790427591759e-06, | |
"loss": 0.3949, | |
"step": 674 | |
}, | |
{ | |
"epoch": 2.0455696202531644, | |
"grad_norm": 0.1475889681451995, | |
"learning_rate": 2.748981275911633e-06, | |
"loss": 0.3527, | |
"step": 675 | |
}, | |
{ | |
"epoch": 2.048607594936709, | |
"grad_norm": 0.15041610036768438, | |
"learning_rate": 2.733200298488284e-06, | |
"loss": 0.3996, | |
"step": 676 | |
}, | |
{ | |
"epoch": 2.0516455696202534, | |
"grad_norm": 0.14184260873433086, | |
"learning_rate": 2.7174476928399685e-06, | |
"loss": 0.3757, | |
"step": 677 | |
}, | |
{ | |
"epoch": 2.0546835443037974, | |
"grad_norm": 0.14445511127114966, | |
"learning_rate": 2.701723656129851e-06, | |
"loss": 0.3388, | |
"step": 678 | |
}, | |
{ | |
"epoch": 2.057721518987342, | |
"grad_norm": 0.13942685496204735, | |
"learning_rate": 2.6860283851635067e-06, | |
"loss": 0.3749, | |
"step": 679 | |
}, | |
{ | |
"epoch": 2.060759493670886, | |
"grad_norm": 0.1434478459905437, | |
"learning_rate": 2.670362076386478e-06, | |
"loss": 0.3681, | |
"step": 680 | |
}, | |
{ | |
"epoch": 2.0637974683544305, | |
"grad_norm": 0.1419461369196199, | |
"learning_rate": 2.6547249258818162e-06, | |
"loss": 0.3771, | |
"step": 681 | |
}, | |
{ | |
"epoch": 2.0668354430379745, | |
"grad_norm": 0.14415690241953952, | |
"learning_rate": 2.6391171293676077e-06, | |
"loss": 0.3895, | |
"step": 682 | |
}, | |
{ | |
"epoch": 2.069873417721519, | |
"grad_norm": 0.14211032871865661, | |
"learning_rate": 2.6235388821945497e-06, | |
"loss": 0.345, | |
"step": 683 | |
}, | |
{ | |
"epoch": 2.0729113924050635, | |
"grad_norm": 0.14068917338536288, | |
"learning_rate": 2.607990379343489e-06, | |
"loss": 0.3447, | |
"step": 684 | |
}, | |
{ | |
"epoch": 2.0759493670886076, | |
"grad_norm": 0.14055662228238847, | |
"learning_rate": 2.59247181542298e-06, | |
"loss": 0.3763, | |
"step": 685 | |
}, | |
{ | |
"epoch": 2.078987341772152, | |
"grad_norm": 0.14124406243142484, | |
"learning_rate": 2.576983384666867e-06, | |
"loss": 0.3521, | |
"step": 686 | |
}, | |
{ | |
"epoch": 2.082025316455696, | |
"grad_norm": 0.14291453835375909, | |
"learning_rate": 2.5615252809318287e-06, | |
"loss": 0.3582, | |
"step": 687 | |
}, | |
{ | |
"epoch": 2.0850632911392406, | |
"grad_norm": 0.1460732385329124, | |
"learning_rate": 2.5460976976949686e-06, | |
"loss": 0.3709, | |
"step": 688 | |
}, | |
{ | |
"epoch": 2.0881012658227847, | |
"grad_norm": 0.14485653716892255, | |
"learning_rate": 2.5307008280513956e-06, | |
"loss": 0.3588, | |
"step": 689 | |
}, | |
{ | |
"epoch": 2.091139240506329, | |
"grad_norm": 0.1465911852717234, | |
"learning_rate": 2.515334864711786e-06, | |
"loss": 0.3771, | |
"step": 690 | |
}, | |
{ | |
"epoch": 2.094177215189873, | |
"grad_norm": 0.13197544386108068, | |
"learning_rate": 2.5000000000000015e-06, | |
"loss": 0.3692, | |
"step": 691 | |
}, | |
{ | |
"epoch": 2.0972151898734177, | |
"grad_norm": 0.15871811724241183, | |
"learning_rate": 2.484696425850653e-06, | |
"loss": 0.3685, | |
"step": 692 | |
}, | |
{ | |
"epoch": 2.100253164556962, | |
"grad_norm": 0.13945520601609362, | |
"learning_rate": 2.469424333806718e-06, | |
"loss": 0.3482, | |
"step": 693 | |
}, | |
{ | |
"epoch": 2.1032911392405063, | |
"grad_norm": 0.1431696802686398, | |
"learning_rate": 2.454183915017142e-06, | |
"loss": 0.3681, | |
"step": 694 | |
}, | |
{ | |
"epoch": 2.1063291139240508, | |
"grad_norm": 0.14147201785437902, | |
"learning_rate": 2.4389753602344298e-06, | |
"loss": 0.3589, | |
"step": 695 | |
}, | |
{ | |
"epoch": 2.109367088607595, | |
"grad_norm": 0.1413089483297598, | |
"learning_rate": 2.423798859812275e-06, | |
"loss": 0.3657, | |
"step": 696 | |
}, | |
{ | |
"epoch": 2.1124050632911393, | |
"grad_norm": 0.14505692905921022, | |
"learning_rate": 2.4086546037031734e-06, | |
"loss": 0.3826, | |
"step": 697 | |
}, | |
{ | |
"epoch": 2.1154430379746834, | |
"grad_norm": 0.14175858850346926, | |
"learning_rate": 2.393542781456038e-06, | |
"loss": 0.3625, | |
"step": 698 | |
}, | |
{ | |
"epoch": 2.118481012658228, | |
"grad_norm": 0.14145320968168484, | |
"learning_rate": 2.3784635822138424e-06, | |
"loss": 0.367, | |
"step": 699 | |
}, | |
{ | |
"epoch": 2.1215189873417724, | |
"grad_norm": 0.14472698977269888, | |
"learning_rate": 2.3634171947112307e-06, | |
"loss": 0.3977, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.1245569620253164, | |
"grad_norm": 0.14060009486280914, | |
"learning_rate": 2.348403807272176e-06, | |
"loss": 0.3654, | |
"step": 701 | |
}, | |
{ | |
"epoch": 2.127594936708861, | |
"grad_norm": 0.1323369322662927, | |
"learning_rate": 2.3334236078076126e-06, | |
"loss": 0.374, | |
"step": 702 | |
}, | |
{ | |
"epoch": 2.130632911392405, | |
"grad_norm": 0.13871843025606753, | |
"learning_rate": 2.318476783813088e-06, | |
"loss": 0.3516, | |
"step": 703 | |
}, | |
{ | |
"epoch": 2.1336708860759495, | |
"grad_norm": 0.13665483983345827, | |
"learning_rate": 2.3035635223664136e-06, | |
"loss": 0.3788, | |
"step": 704 | |
}, | |
{ | |
"epoch": 2.1367088607594935, | |
"grad_norm": 0.14156145083764254, | |
"learning_rate": 2.288684010125325e-06, | |
"loss": 0.3719, | |
"step": 705 | |
}, | |
{ | |
"epoch": 2.139746835443038, | |
"grad_norm": 0.1416005014074393, | |
"learning_rate": 2.2738384333251447e-06, | |
"loss": 0.3458, | |
"step": 706 | |
}, | |
{ | |
"epoch": 2.1427848101265825, | |
"grad_norm": 0.1395435783001548, | |
"learning_rate": 2.2590269777764516e-06, | |
"loss": 0.3997, | |
"step": 707 | |
}, | |
{ | |
"epoch": 2.1458227848101266, | |
"grad_norm": 0.14129712405820286, | |
"learning_rate": 2.2442498288627555e-06, | |
"loss": 0.3772, | |
"step": 708 | |
}, | |
{ | |
"epoch": 2.148860759493671, | |
"grad_norm": 0.13423105798210677, | |
"learning_rate": 2.229507171538178e-06, | |
"loss": 0.3562, | |
"step": 709 | |
}, | |
{ | |
"epoch": 2.151898734177215, | |
"grad_norm": 0.1398912160788065, | |
"learning_rate": 2.214799190325133e-06, | |
"loss": 0.3465, | |
"step": 710 | |
}, | |
{ | |
"epoch": 2.1549367088607596, | |
"grad_norm": 0.13951688351084254, | |
"learning_rate": 2.2001260693120236e-06, | |
"loss": 0.3606, | |
"step": 711 | |
}, | |
{ | |
"epoch": 2.1579746835443037, | |
"grad_norm": 0.13894816262705856, | |
"learning_rate": 2.185487992150933e-06, | |
"loss": 0.3653, | |
"step": 712 | |
}, | |
{ | |
"epoch": 2.161012658227848, | |
"grad_norm": 0.14021673321330894, | |
"learning_rate": 2.1708851420553277e-06, | |
"loss": 0.3518, | |
"step": 713 | |
}, | |
{ | |
"epoch": 2.164050632911392, | |
"grad_norm": 0.14815706787540678, | |
"learning_rate": 2.156317701797766e-06, | |
"loss": 0.3618, | |
"step": 714 | |
}, | |
{ | |
"epoch": 2.1670886075949367, | |
"grad_norm": 0.1442615360609417, | |
"learning_rate": 2.141785853707607e-06, | |
"loss": 0.3814, | |
"step": 715 | |
}, | |
{ | |
"epoch": 2.170126582278481, | |
"grad_norm": 0.14693744202965667, | |
"learning_rate": 2.12728977966873e-06, | |
"loss": 0.3702, | |
"step": 716 | |
}, | |
{ | |
"epoch": 2.1731645569620253, | |
"grad_norm": 0.1417303529290372, | |
"learning_rate": 2.1128296611172593e-06, | |
"loss": 0.3511, | |
"step": 717 | |
}, | |
{ | |
"epoch": 2.1762025316455698, | |
"grad_norm": 0.14179184653741922, | |
"learning_rate": 2.0984056790392926e-06, | |
"loss": 0.3635, | |
"step": 718 | |
}, | |
{ | |
"epoch": 2.179240506329114, | |
"grad_norm": 0.13988953083591674, | |
"learning_rate": 2.0840180139686333e-06, | |
"loss": 0.3596, | |
"step": 719 | |
}, | |
{ | |
"epoch": 2.1822784810126583, | |
"grad_norm": 0.13593635557199768, | |
"learning_rate": 2.0696668459845354e-06, | |
"loss": 0.3688, | |
"step": 720 | |
}, | |
{ | |
"epoch": 2.1853164556962024, | |
"grad_norm": 0.1452918041182462, | |
"learning_rate": 2.0553523547094473e-06, | |
"loss": 0.4024, | |
"step": 721 | |
}, | |
{ | |
"epoch": 2.188354430379747, | |
"grad_norm": 0.13179141412839, | |
"learning_rate": 2.041074719306757e-06, | |
"loss": 0.3883, | |
"step": 722 | |
}, | |
{ | |
"epoch": 2.191392405063291, | |
"grad_norm": 0.14527521731366982, | |
"learning_rate": 2.0268341184785674e-06, | |
"loss": 0.3586, | |
"step": 723 | |
}, | |
{ | |
"epoch": 2.1944303797468354, | |
"grad_norm": 0.141640009335079, | |
"learning_rate": 2.0126307304634383e-06, | |
"loss": 0.3418, | |
"step": 724 | |
}, | |
{ | |
"epoch": 2.19746835443038, | |
"grad_norm": 0.13534271988208676, | |
"learning_rate": 1.998464733034172e-06, | |
"loss": 0.3542, | |
"step": 725 | |
}, | |
{ | |
"epoch": 2.200506329113924, | |
"grad_norm": 0.13937271639571533, | |
"learning_rate": 1.98433630349558e-06, | |
"loss": 0.3487, | |
"step": 726 | |
}, | |
{ | |
"epoch": 2.2035443037974685, | |
"grad_norm": 0.14219916038892946, | |
"learning_rate": 1.9702456186822595e-06, | |
"loss": 0.3484, | |
"step": 727 | |
}, | |
{ | |
"epoch": 2.2065822784810125, | |
"grad_norm": 0.13660123224575374, | |
"learning_rate": 1.956192854956397e-06, | |
"loss": 0.3503, | |
"step": 728 | |
}, | |
{ | |
"epoch": 2.209620253164557, | |
"grad_norm": 0.1409506648161819, | |
"learning_rate": 1.9421781882055447e-06, | |
"loss": 0.373, | |
"step": 729 | |
}, | |
{ | |
"epoch": 2.212658227848101, | |
"grad_norm": 0.14047713525681946, | |
"learning_rate": 1.9282017938404202e-06, | |
"loss": 0.3721, | |
"step": 730 | |
}, | |
{ | |
"epoch": 2.2156962025316456, | |
"grad_norm": 0.139451374909164, | |
"learning_rate": 1.9142638467927254e-06, | |
"loss": 0.372, | |
"step": 731 | |
}, | |
{ | |
"epoch": 2.21873417721519, | |
"grad_norm": 0.14095477401855694, | |
"learning_rate": 1.9003645215129356e-06, | |
"loss": 0.3914, | |
"step": 732 | |
}, | |
{ | |
"epoch": 2.221772151898734, | |
"grad_norm": 0.15446900783196774, | |
"learning_rate": 1.8865039919681377e-06, | |
"loss": 0.3807, | |
"step": 733 | |
}, | |
{ | |
"epoch": 2.2248101265822786, | |
"grad_norm": 0.14673994714173552, | |
"learning_rate": 1.8726824316398372e-06, | |
"loss": 0.3609, | |
"step": 734 | |
}, | |
{ | |
"epoch": 2.2278481012658227, | |
"grad_norm": 0.14758184686530929, | |
"learning_rate": 1.8589000135217882e-06, | |
"loss": 0.3395, | |
"step": 735 | |
}, | |
{ | |
"epoch": 2.230886075949367, | |
"grad_norm": 0.14211141269917668, | |
"learning_rate": 1.845156910117843e-06, | |
"loss": 0.3821, | |
"step": 736 | |
}, | |
{ | |
"epoch": 2.233924050632911, | |
"grad_norm": 0.14513257772004282, | |
"learning_rate": 1.831453293439771e-06, | |
"loss": 0.3899, | |
"step": 737 | |
}, | |
{ | |
"epoch": 2.2369620253164557, | |
"grad_norm": 0.14360399357832268, | |
"learning_rate": 1.8177893350051213e-06, | |
"loss": 0.3599, | |
"step": 738 | |
}, | |
{ | |
"epoch": 2.24, | |
"grad_norm": 0.1421345505264031, | |
"learning_rate": 1.8041652058350768e-06, | |
"loss": 0.3604, | |
"step": 739 | |
}, | |
{ | |
"epoch": 2.2430379746835443, | |
"grad_norm": 0.13312641259007316, | |
"learning_rate": 1.7905810764522963e-06, | |
"loss": 0.3654, | |
"step": 740 | |
}, | |
{ | |
"epoch": 2.2460759493670888, | |
"grad_norm": 0.14218988322941647, | |
"learning_rate": 1.7770371168788042e-06, | |
"loss": 0.3559, | |
"step": 741 | |
}, | |
{ | |
"epoch": 2.249113924050633, | |
"grad_norm": 0.14208321089497616, | |
"learning_rate": 1.7635334966338463e-06, | |
"loss": 0.3517, | |
"step": 742 | |
}, | |
{ | |
"epoch": 2.2521518987341773, | |
"grad_norm": 0.14127201594720978, | |
"learning_rate": 1.7500703847317663e-06, | |
"loss": 0.3995, | |
"step": 743 | |
}, | |
{ | |
"epoch": 2.2551898734177214, | |
"grad_norm": 0.14581924224244086, | |
"learning_rate": 1.7366479496799076e-06, | |
"loss": 0.3474, | |
"step": 744 | |
}, | |
{ | |
"epoch": 2.258227848101266, | |
"grad_norm": 0.1402244780578575, | |
"learning_rate": 1.723266359476483e-06, | |
"loss": 0.3411, | |
"step": 745 | |
}, | |
{ | |
"epoch": 2.26126582278481, | |
"grad_norm": 0.1384075219093103, | |
"learning_rate": 1.7099257816084851e-06, | |
"loss": 0.377, | |
"step": 746 | |
}, | |
{ | |
"epoch": 2.2643037974683544, | |
"grad_norm": 0.14542196888379869, | |
"learning_rate": 1.6966263830495939e-06, | |
"loss": 0.3733, | |
"step": 747 | |
}, | |
{ | |
"epoch": 2.267341772151899, | |
"grad_norm": 0.14061074341979693, | |
"learning_rate": 1.6833683302580661e-06, | |
"loss": 0.3611, | |
"step": 748 | |
}, | |
{ | |
"epoch": 2.270379746835443, | |
"grad_norm": 0.14151399997222402, | |
"learning_rate": 1.6701517891746805e-06, | |
"loss": 0.3675, | |
"step": 749 | |
}, | |
{ | |
"epoch": 2.2734177215189875, | |
"grad_norm": 0.14743297739700706, | |
"learning_rate": 1.656976925220633e-06, | |
"loss": 0.359, | |
"step": 750 | |
}, | |
{ | |
"epoch": 2.2764556962025315, | |
"grad_norm": 0.139782212285992, | |
"learning_rate": 1.6438439032954857e-06, | |
"loss": 0.3747, | |
"step": 751 | |
}, | |
{ | |
"epoch": 2.279493670886076, | |
"grad_norm": 0.13955135512445044, | |
"learning_rate": 1.6307528877751e-06, | |
"loss": 0.3526, | |
"step": 752 | |
}, | |
{ | |
"epoch": 2.28253164556962, | |
"grad_norm": 0.13638102492670431, | |
"learning_rate": 1.6177040425095664e-06, | |
"loss": 0.3791, | |
"step": 753 | |
}, | |
{ | |
"epoch": 2.2855696202531646, | |
"grad_norm": 0.13722953132884094, | |
"learning_rate": 1.6046975308211699e-06, | |
"loss": 0.3549, | |
"step": 754 | |
}, | |
{ | |
"epoch": 2.2886075949367086, | |
"grad_norm": 0.13730644404932044, | |
"learning_rate": 1.5917335155023368e-06, | |
"loss": 0.3688, | |
"step": 755 | |
}, | |
{ | |
"epoch": 2.291645569620253, | |
"grad_norm": 0.152856980626857, | |
"learning_rate": 1.5788121588135975e-06, | |
"loss": 0.3652, | |
"step": 756 | |
}, | |
{ | |
"epoch": 2.2946835443037976, | |
"grad_norm": 0.1393588461745144, | |
"learning_rate": 1.5659336224815642e-06, | |
"loss": 0.3606, | |
"step": 757 | |
}, | |
{ | |
"epoch": 2.2977215189873417, | |
"grad_norm": 0.1352559540583668, | |
"learning_rate": 1.553098067696891e-06, | |
"loss": 0.3598, | |
"step": 758 | |
}, | |
{ | |
"epoch": 2.300759493670886, | |
"grad_norm": 0.13975979693883497, | |
"learning_rate": 1.5403056551122697e-06, | |
"loss": 0.3562, | |
"step": 759 | |
}, | |
{ | |
"epoch": 2.3037974683544302, | |
"grad_norm": 0.14082699563333942, | |
"learning_rate": 1.5275565448404146e-06, | |
"loss": 0.3632, | |
"step": 760 | |
}, | |
{ | |
"epoch": 2.3068354430379747, | |
"grad_norm": 0.1444199782676381, | |
"learning_rate": 1.5148508964520586e-06, | |
"loss": 0.3676, | |
"step": 761 | |
}, | |
{ | |
"epoch": 2.309873417721519, | |
"grad_norm": 0.14939812391967536, | |
"learning_rate": 1.502188868973955e-06, | |
"loss": 0.39, | |
"step": 762 | |
}, | |
{ | |
"epoch": 2.3129113924050633, | |
"grad_norm": 0.14827888289725544, | |
"learning_rate": 1.4895706208868876e-06, | |
"loss": 0.3698, | |
"step": 763 | |
}, | |
{ | |
"epoch": 2.3159493670886078, | |
"grad_norm": 0.14193235148758998, | |
"learning_rate": 1.4769963101236894e-06, | |
"loss": 0.3477, | |
"step": 764 | |
}, | |
{ | |
"epoch": 2.318987341772152, | |
"grad_norm": 0.14253457252980234, | |
"learning_rate": 1.4644660940672628e-06, | |
"loss": 0.3658, | |
"step": 765 | |
}, | |
{ | |
"epoch": 2.3220253164556963, | |
"grad_norm": 0.14584547496813957, | |
"learning_rate": 1.4519801295486102e-06, | |
"loss": 0.381, | |
"step": 766 | |
}, | |
{ | |
"epoch": 2.3250632911392404, | |
"grad_norm": 0.1382275600719031, | |
"learning_rate": 1.439538572844873e-06, | |
"loss": 0.3445, | |
"step": 767 | |
}, | |
{ | |
"epoch": 2.328101265822785, | |
"grad_norm": 0.1390590172326061, | |
"learning_rate": 1.4271415796773742e-06, | |
"loss": 0.3788, | |
"step": 768 | |
}, | |
{ | |
"epoch": 2.331139240506329, | |
"grad_norm": 0.14109343766879234, | |
"learning_rate": 1.4147893052096684e-06, | |
"loss": 0.3591, | |
"step": 769 | |
}, | |
{ | |
"epoch": 2.3341772151898734, | |
"grad_norm": 0.14542614353397915, | |
"learning_rate": 1.4024819040456023e-06, | |
"loss": 0.3777, | |
"step": 770 | |
}, | |
{ | |
"epoch": 2.337215189873418, | |
"grad_norm": 0.13499097512948352, | |
"learning_rate": 1.390219530227378e-06, | |
"loss": 0.3611, | |
"step": 771 | |
}, | |
{ | |
"epoch": 2.340253164556962, | |
"grad_norm": 0.14731429626870976, | |
"learning_rate": 1.378002337233625e-06, | |
"loss": 0.3687, | |
"step": 772 | |
}, | |
{ | |
"epoch": 2.3432911392405065, | |
"grad_norm": 0.14898716888323726, | |
"learning_rate": 1.3658304779774784e-06, | |
"loss": 0.3948, | |
"step": 773 | |
}, | |
{ | |
"epoch": 2.3463291139240505, | |
"grad_norm": 0.14050750727274172, | |
"learning_rate": 1.3537041048046696e-06, | |
"loss": 0.3463, | |
"step": 774 | |
}, | |
{ | |
"epoch": 2.349367088607595, | |
"grad_norm": 0.14023730470046572, | |
"learning_rate": 1.3416233694916086e-06, | |
"loss": 0.369, | |
"step": 775 | |
}, | |
{ | |
"epoch": 2.352405063291139, | |
"grad_norm": 0.14551200257588823, | |
"learning_rate": 1.3295884232435008e-06, | |
"loss": 0.3436, | |
"step": 776 | |
}, | |
{ | |
"epoch": 2.3554430379746836, | |
"grad_norm": 0.14391540661810454, | |
"learning_rate": 1.3175994166924394e-06, | |
"loss": 0.3531, | |
"step": 777 | |
}, | |
{ | |
"epoch": 2.3584810126582276, | |
"grad_norm": 0.13062333222456327, | |
"learning_rate": 1.3056564998955274e-06, | |
"loss": 0.3323, | |
"step": 778 | |
}, | |
{ | |
"epoch": 2.361518987341772, | |
"grad_norm": 0.1385906330087921, | |
"learning_rate": 1.2937598223330006e-06, | |
"loss": 0.3611, | |
"step": 779 | |
}, | |
{ | |
"epoch": 2.3645569620253166, | |
"grad_norm": 0.14173852169646894, | |
"learning_rate": 1.2819095329063469e-06, | |
"loss": 0.3573, | |
"step": 780 | |
}, | |
{ | |
"epoch": 2.3675949367088607, | |
"grad_norm": 0.1395324012967561, | |
"learning_rate": 1.2701057799364591e-06, | |
"loss": 0.3744, | |
"step": 781 | |
}, | |
{ | |
"epoch": 2.370632911392405, | |
"grad_norm": 0.15051070985075915, | |
"learning_rate": 1.2583487111617647e-06, | |
"loss": 0.3641, | |
"step": 782 | |
}, | |
{ | |
"epoch": 2.3736708860759492, | |
"grad_norm": 0.14670986093390542, | |
"learning_rate": 1.246638473736378e-06, | |
"loss": 0.3439, | |
"step": 783 | |
}, | |
{ | |
"epoch": 2.3767088607594937, | |
"grad_norm": 0.1457775446421222, | |
"learning_rate": 1.2349752142282706e-06, | |
"loss": 0.3828, | |
"step": 784 | |
}, | |
{ | |
"epoch": 2.379746835443038, | |
"grad_norm": 0.14253862050319224, | |
"learning_rate": 1.223359078617416e-06, | |
"loss": 0.3667, | |
"step": 785 | |
}, | |
{ | |
"epoch": 2.3827848101265823, | |
"grad_norm": 0.14582245384466594, | |
"learning_rate": 1.2117902122939861e-06, | |
"loss": 0.3515, | |
"step": 786 | |
}, | |
{ | |
"epoch": 2.3858227848101268, | |
"grad_norm": 0.1396466440135159, | |
"learning_rate": 1.2002687600565138e-06, | |
"loss": 0.3392, | |
"step": 787 | |
}, | |
{ | |
"epoch": 2.388860759493671, | |
"grad_norm": 0.14497747521117874, | |
"learning_rate": 1.1887948661100833e-06, | |
"loss": 0.3679, | |
"step": 788 | |
}, | |
{ | |
"epoch": 2.3918987341772153, | |
"grad_norm": 0.1400434061550448, | |
"learning_rate": 1.1773686740645384e-06, | |
"loss": 0.3782, | |
"step": 789 | |
}, | |
{ | |
"epoch": 2.3949367088607594, | |
"grad_norm": 0.15068552522031847, | |
"learning_rate": 1.165990326932665e-06, | |
"loss": 0.3982, | |
"step": 790 | |
}, | |
{ | |
"epoch": 2.397974683544304, | |
"grad_norm": 0.14464648985072429, | |
"learning_rate": 1.1546599671284158e-06, | |
"loss": 0.382, | |
"step": 791 | |
}, | |
{ | |
"epoch": 2.401012658227848, | |
"grad_norm": 0.14150707244836175, | |
"learning_rate": 1.1433777364651272e-06, | |
"loss": 0.3692, | |
"step": 792 | |
}, | |
{ | |
"epoch": 2.4040506329113924, | |
"grad_norm": 0.14598665701127156, | |
"learning_rate": 1.1321437761537307e-06, | |
"loss": 0.3608, | |
"step": 793 | |
}, | |
{ | |
"epoch": 2.407088607594937, | |
"grad_norm": 0.13971211615116638, | |
"learning_rate": 1.1209582268010056e-06, | |
"loss": 0.3575, | |
"step": 794 | |
}, | |
{ | |
"epoch": 2.410126582278481, | |
"grad_norm": 0.14477123446370144, | |
"learning_rate": 1.1098212284078037e-06, | |
"loss": 0.3542, | |
"step": 795 | |
}, | |
{ | |
"epoch": 2.4131645569620255, | |
"grad_norm": 0.14076240365795126, | |
"learning_rate": 1.098732920367298e-06, | |
"loss": 0.3545, | |
"step": 796 | |
}, | |
{ | |
"epoch": 2.4162025316455695, | |
"grad_norm": 0.1405893390604257, | |
"learning_rate": 1.0876934414632523e-06, | |
"loss": 0.3741, | |
"step": 797 | |
}, | |
{ | |
"epoch": 2.419240506329114, | |
"grad_norm": 0.14279215153613262, | |
"learning_rate": 1.0767029298682642e-06, | |
"loss": 0.3448, | |
"step": 798 | |
}, | |
{ | |
"epoch": 2.422278481012658, | |
"grad_norm": 0.13748561916619084, | |
"learning_rate": 1.0657615231420492e-06, | |
"loss": 0.3458, | |
"step": 799 | |
}, | |
{ | |
"epoch": 2.4253164556962026, | |
"grad_norm": 0.14494068184127612, | |
"learning_rate": 1.0548693582297203e-06, | |
"loss": 0.353, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.4283544303797466, | |
"grad_norm": 0.14224661128262228, | |
"learning_rate": 1.0440265714600573e-06, | |
"loss": 0.3619, | |
"step": 801 | |
}, | |
{ | |
"epoch": 2.431392405063291, | |
"grad_norm": 0.1422301175985462, | |
"learning_rate": 1.0332332985438248e-06, | |
"loss": 0.3786, | |
"step": 802 | |
}, | |
{ | |
"epoch": 2.4344303797468356, | |
"grad_norm": 0.14197811091570336, | |
"learning_rate": 1.0224896745720513e-06, | |
"loss": 0.3655, | |
"step": 803 | |
}, | |
{ | |
"epoch": 2.4374683544303797, | |
"grad_norm": 0.1408337485003354, | |
"learning_rate": 1.0117958340143508e-06, | |
"loss": 0.3962, | |
"step": 804 | |
}, | |
{ | |
"epoch": 2.440506329113924, | |
"grad_norm": 0.14198169733129237, | |
"learning_rate": 1.0011519107172413e-06, | |
"loss": 0.3544, | |
"step": 805 | |
}, | |
{ | |
"epoch": 2.4435443037974682, | |
"grad_norm": 0.14765256400357799, | |
"learning_rate": 9.905580379024581e-07, | |
"loss": 0.3519, | |
"step": 806 | |
}, | |
{ | |
"epoch": 2.4465822784810127, | |
"grad_norm": 0.13305260404010058, | |
"learning_rate": 9.80014348165298e-07, | |
"loss": 0.346, | |
"step": 807 | |
}, | |
{ | |
"epoch": 2.449620253164557, | |
"grad_norm": 0.1435729151110055, | |
"learning_rate": 9.695209734729533e-07, | |
"loss": 0.3686, | |
"step": 808 | |
}, | |
{ | |
"epoch": 2.4526582278481013, | |
"grad_norm": 0.1493962374386908, | |
"learning_rate": 9.590780451628617e-07, | |
"loss": 0.3647, | |
"step": 809 | |
}, | |
{ | |
"epoch": 2.4556962025316453, | |
"grad_norm": 0.13608529098836616, | |
"learning_rate": 9.486856939410672e-07, | |
"loss": 0.3735, | |
"step": 810 | |
}, | |
{ | |
"epoch": 2.45873417721519, | |
"grad_norm": 0.14503694981671503, | |
"learning_rate": 9.383440498805712e-07, | |
"loss": 0.3489, | |
"step": 811 | |
}, | |
{ | |
"epoch": 2.4617721518987343, | |
"grad_norm": 0.14583750885660682, | |
"learning_rate": 9.280532424197192e-07, | |
"loss": 0.3675, | |
"step": 812 | |
}, | |
{ | |
"epoch": 2.4648101265822784, | |
"grad_norm": 0.1447589564816734, | |
"learning_rate": 9.178134003605721e-07, | |
"loss": 0.3639, | |
"step": 813 | |
}, | |
{ | |
"epoch": 2.467848101265823, | |
"grad_norm": 0.14148984313136934, | |
"learning_rate": 9.076246518672971e-07, | |
"loss": 0.3619, | |
"step": 814 | |
}, | |
{ | |
"epoch": 2.470886075949367, | |
"grad_norm": 0.14750236977694262, | |
"learning_rate": 8.974871244645628e-07, | |
"loss": 0.3552, | |
"step": 815 | |
}, | |
{ | |
"epoch": 2.4739240506329114, | |
"grad_norm": 0.14286668133865993, | |
"learning_rate": 8.874009450359428e-07, | |
"loss": 0.3898, | |
"step": 816 | |
}, | |
{ | |
"epoch": 2.476962025316456, | |
"grad_norm": 0.1484301811121413, | |
"learning_rate": 8.773662398223276e-07, | |
"loss": 0.3748, | |
"step": 817 | |
}, | |
{ | |
"epoch": 2.48, | |
"grad_norm": 0.14341606721481867, | |
"learning_rate": 8.673831344203454e-07, | |
"loss": 0.3498, | |
"step": 818 | |
}, | |
{ | |
"epoch": 2.4830379746835445, | |
"grad_norm": 0.14189771292479822, | |
"learning_rate": 8.574517537807897e-07, | |
"loss": 0.3587, | |
"step": 819 | |
}, | |
{ | |
"epoch": 2.4860759493670885, | |
"grad_norm": 0.14305942408590702, | |
"learning_rate": 8.475722222070542e-07, | |
"loss": 0.3649, | |
"step": 820 | |
}, | |
{ | |
"epoch": 2.489113924050633, | |
"grad_norm": 0.14404611768972017, | |
"learning_rate": 8.377446633535797e-07, | |
"loss": 0.354, | |
"step": 821 | |
}, | |
{ | |
"epoch": 2.492151898734177, | |
"grad_norm": 0.1406676353514383, | |
"learning_rate": 8.279692002243028e-07, | |
"loss": 0.3702, | |
"step": 822 | |
}, | |
{ | |
"epoch": 2.4951898734177216, | |
"grad_norm": 0.14167576764446896, | |
"learning_rate": 8.182459551711197e-07, | |
"loss": 0.3483, | |
"step": 823 | |
}, | |
{ | |
"epoch": 2.4982278481012656, | |
"grad_norm": 0.1413673019085962, | |
"learning_rate": 8.085750498923528e-07, | |
"loss": 0.3613, | |
"step": 824 | |
}, | |
{ | |
"epoch": 2.50126582278481, | |
"grad_norm": 0.13949858187507314, | |
"learning_rate": 7.989566054312286e-07, | |
"loss": 0.3588, | |
"step": 825 | |
}, | |
{ | |
"epoch": 2.5043037974683546, | |
"grad_norm": 0.13399060946873256, | |
"learning_rate": 7.893907421743613e-07, | |
"loss": 0.3508, | |
"step": 826 | |
}, | |
{ | |
"epoch": 2.5073417721518987, | |
"grad_norm": 0.14815186524448404, | |
"learning_rate": 7.798775798502484e-07, | |
"loss": 0.3753, | |
"step": 827 | |
}, | |
{ | |
"epoch": 2.510379746835443, | |
"grad_norm": 0.14107003221585707, | |
"learning_rate": 7.704172375277691e-07, | |
"loss": 0.353, | |
"step": 828 | |
}, | |
{ | |
"epoch": 2.5134177215189872, | |
"grad_norm": 0.12880402651650627, | |
"learning_rate": 7.610098336146965e-07, | |
"loss": 0.3426, | |
"step": 829 | |
}, | |
{ | |
"epoch": 2.5164556962025317, | |
"grad_norm": 0.13893882279621997, | |
"learning_rate": 7.516554858562142e-07, | |
"loss": 0.3742, | |
"step": 830 | |
}, | |
{ | |
"epoch": 2.519493670886076, | |
"grad_norm": 0.14133937833940632, | |
"learning_rate": 7.423543113334436e-07, | |
"loss": 0.353, | |
"step": 831 | |
}, | |
{ | |
"epoch": 2.5225316455696203, | |
"grad_norm": 0.1419575084368612, | |
"learning_rate": 7.331064264619786e-07, | |
"loss": 0.3679, | |
"step": 832 | |
}, | |
{ | |
"epoch": 2.5255696202531643, | |
"grad_norm": 0.14081615107573495, | |
"learning_rate": 7.239119469904227e-07, | |
"loss": 0.3463, | |
"step": 833 | |
}, | |
{ | |
"epoch": 2.528607594936709, | |
"grad_norm": 0.1346841225757085, | |
"learning_rate": 7.147709879989539e-07, | |
"loss": 0.3512, | |
"step": 834 | |
}, | |
{ | |
"epoch": 2.5316455696202533, | |
"grad_norm": 0.14935032059019177, | |
"learning_rate": 7.056836638978698e-07, | |
"loss": 0.3857, | |
"step": 835 | |
}, | |
{ | |
"epoch": 2.5346835443037974, | |
"grad_norm": 0.1439669767055388, | |
"learning_rate": 6.966500884261635e-07, | |
"loss": 0.3654, | |
"step": 836 | |
}, | |
{ | |
"epoch": 2.537721518987342, | |
"grad_norm": 0.1364128007511732, | |
"learning_rate": 6.876703746500984e-07, | |
"loss": 0.3241, | |
"step": 837 | |
}, | |
{ | |
"epoch": 2.540759493670886, | |
"grad_norm": 0.14254432013124188, | |
"learning_rate": 6.787446349617899e-07, | |
"loss": 0.3802, | |
"step": 838 | |
}, | |
{ | |
"epoch": 2.5437974683544304, | |
"grad_norm": 0.1348271565130609, | |
"learning_rate": 6.698729810778065e-07, | |
"loss": 0.3606, | |
"step": 839 | |
}, | |
{ | |
"epoch": 2.546835443037975, | |
"grad_norm": 0.1395010006488868, | |
"learning_rate": 6.610555240377653e-07, | |
"loss": 0.3776, | |
"step": 840 | |
}, | |
{ | |
"epoch": 2.549873417721519, | |
"grad_norm": 0.15111108270200227, | |
"learning_rate": 6.522923742029374e-07, | |
"loss": 0.3711, | |
"step": 841 | |
}, | |
{ | |
"epoch": 2.552911392405063, | |
"grad_norm": 0.13608547533374346, | |
"learning_rate": 6.435836412548835e-07, | |
"loss": 0.3516, | |
"step": 842 | |
}, | |
{ | |
"epoch": 2.5559493670886075, | |
"grad_norm": 0.13381246094662583, | |
"learning_rate": 6.349294341940593e-07, | |
"loss": 0.351, | |
"step": 843 | |
}, | |
{ | |
"epoch": 2.558987341772152, | |
"grad_norm": 0.14005213416771026, | |
"learning_rate": 6.263298613384705e-07, | |
"loss": 0.3508, | |
"step": 844 | |
}, | |
{ | |
"epoch": 2.562025316455696, | |
"grad_norm": 0.13085523859476353, | |
"learning_rate": 6.177850303223059e-07, | |
"loss": 0.3661, | |
"step": 845 | |
}, | |
{ | |
"epoch": 2.5650632911392406, | |
"grad_norm": 0.14060955175856465, | |
"learning_rate": 6.092950480945897e-07, | |
"loss": 0.3472, | |
"step": 846 | |
}, | |
{ | |
"epoch": 2.5681012658227846, | |
"grad_norm": 0.14813730552847526, | |
"learning_rate": 6.008600209178539e-07, | |
"loss": 0.3531, | |
"step": 847 | |
}, | |
{ | |
"epoch": 2.571139240506329, | |
"grad_norm": 0.15000846362334494, | |
"learning_rate": 5.92480054366793e-07, | |
"loss": 0.3681, | |
"step": 848 | |
}, | |
{ | |
"epoch": 2.5741772151898736, | |
"grad_norm": 0.14334858570822884, | |
"learning_rate": 5.841552533269534e-07, | |
"loss": 0.34, | |
"step": 849 | |
}, | |
{ | |
"epoch": 2.5772151898734177, | |
"grad_norm": 0.1314990905808941, | |
"learning_rate": 5.75885721993421e-07, | |
"loss": 0.357, | |
"step": 850 | |
}, | |
{ | |
"epoch": 2.580253164556962, | |
"grad_norm": 0.1461629388988376, | |
"learning_rate": 5.676715638695063e-07, | |
"loss": 0.331, | |
"step": 851 | |
}, | |
{ | |
"epoch": 2.5832911392405062, | |
"grad_norm": 0.137611905623909, | |
"learning_rate": 5.595128817654638e-07, | |
"loss": 0.3517, | |
"step": 852 | |
}, | |
{ | |
"epoch": 2.5863291139240507, | |
"grad_norm": 0.14882425269799035, | |
"learning_rate": 5.514097777971939e-07, | |
"loss": 0.3599, | |
"step": 853 | |
}, | |
{ | |
"epoch": 2.589367088607595, | |
"grad_norm": 0.13474588857180161, | |
"learning_rate": 5.433623533849658e-07, | |
"loss": 0.3518, | |
"step": 854 | |
}, | |
{ | |
"epoch": 2.5924050632911393, | |
"grad_norm": 0.13467344047962448, | |
"learning_rate": 5.353707092521581e-07, | |
"loss": 0.3463, | |
"step": 855 | |
}, | |
{ | |
"epoch": 2.5954430379746833, | |
"grad_norm": 0.13822903597871414, | |
"learning_rate": 5.274349454239836e-07, | |
"loss": 0.3651, | |
"step": 856 | |
}, | |
{ | |
"epoch": 2.598481012658228, | |
"grad_norm": 0.13473144890924296, | |
"learning_rate": 5.195551612262478e-07, | |
"loss": 0.3591, | |
"step": 857 | |
}, | |
{ | |
"epoch": 2.6015189873417723, | |
"grad_norm": 0.14778113657864514, | |
"learning_rate": 5.117314552841052e-07, | |
"loss": 0.3952, | |
"step": 858 | |
}, | |
{ | |
"epoch": 2.6045569620253164, | |
"grad_norm": 0.15168402166456194, | |
"learning_rate": 5.039639255208156e-07, | |
"loss": 0.3542, | |
"step": 859 | |
}, | |
{ | |
"epoch": 2.607594936708861, | |
"grad_norm": 0.14600363407620232, | |
"learning_rate": 4.962526691565333e-07, | |
"loss": 0.3357, | |
"step": 860 | |
}, | |
{ | |
"epoch": 2.610632911392405, | |
"grad_norm": 0.1282479097902379, | |
"learning_rate": 4.885977827070748e-07, | |
"loss": 0.3379, | |
"step": 861 | |
}, | |
{ | |
"epoch": 2.6136708860759494, | |
"grad_norm": 0.14419548611833952, | |
"learning_rate": 4.809993619827203e-07, | |
"loss": 0.3426, | |
"step": 862 | |
}, | |
{ | |
"epoch": 2.616708860759494, | |
"grad_norm": 0.13407436879878423, | |
"learning_rate": 4.734575020870169e-07, | |
"loss": 0.3805, | |
"step": 863 | |
}, | |
{ | |
"epoch": 2.619746835443038, | |
"grad_norm": 0.13430329562453588, | |
"learning_rate": 4.659722974155767e-07, | |
"loss": 0.345, | |
"step": 864 | |
}, | |
{ | |
"epoch": 2.622784810126582, | |
"grad_norm": 0.14431698089054304, | |
"learning_rate": 4.5854384165490596e-07, | |
"loss": 0.3616, | |
"step": 865 | |
}, | |
{ | |
"epoch": 2.6258227848101265, | |
"grad_norm": 0.14455208613547382, | |
"learning_rate": 4.511722277812286e-07, | |
"loss": 0.3448, | |
"step": 866 | |
}, | |
{ | |
"epoch": 2.628860759493671, | |
"grad_norm": 0.13429112067273102, | |
"learning_rate": 4.43857548059321e-07, | |
"loss": 0.3661, | |
"step": 867 | |
}, | |
{ | |
"epoch": 2.631898734177215, | |
"grad_norm": 0.14159181014988267, | |
"learning_rate": 4.365998940413629e-07, | |
"loss": 0.3455, | |
"step": 868 | |
}, | |
{ | |
"epoch": 2.6349367088607596, | |
"grad_norm": 0.136621586848235, | |
"learning_rate": 4.293993565657828e-07, | |
"loss": 0.3436, | |
"step": 869 | |
}, | |
{ | |
"epoch": 2.6379746835443036, | |
"grad_norm": 0.14360285658878558, | |
"learning_rate": 4.222560257561276e-07, | |
"loss": 0.3507, | |
"step": 870 | |
}, | |
{ | |
"epoch": 2.641012658227848, | |
"grad_norm": 0.14225730914449142, | |
"learning_rate": 4.151699910199336e-07, | |
"loss": 0.3528, | |
"step": 871 | |
}, | |
{ | |
"epoch": 2.6440506329113926, | |
"grad_norm": 0.14838260397402092, | |
"learning_rate": 4.0814134104760483e-07, | |
"loss": 0.3755, | |
"step": 872 | |
}, | |
{ | |
"epoch": 2.6470886075949367, | |
"grad_norm": 0.1598367499255771, | |
"learning_rate": 4.0117016381130636e-07, | |
"loss": 0.3891, | |
"step": 873 | |
}, | |
{ | |
"epoch": 2.6501265822784807, | |
"grad_norm": 0.14446123574545194, | |
"learning_rate": 3.9425654656386094e-07, | |
"loss": 0.3716, | |
"step": 874 | |
}, | |
{ | |
"epoch": 2.6531645569620252, | |
"grad_norm": 0.14517669624053445, | |
"learning_rate": 3.87400575837657e-07, | |
"loss": 0.3629, | |
"step": 875 | |
}, | |
{ | |
"epoch": 2.6562025316455697, | |
"grad_norm": 0.13863663951583624, | |
"learning_rate": 3.8060233744356634e-07, | |
"loss": 0.3628, | |
"step": 876 | |
}, | |
{ | |
"epoch": 2.659240506329114, | |
"grad_norm": 0.14367297665910245, | |
"learning_rate": 3.7386191646987094e-07, | |
"loss": 0.3759, | |
"step": 877 | |
}, | |
{ | |
"epoch": 2.6622784810126583, | |
"grad_norm": 0.13598047085266352, | |
"learning_rate": 3.671793972811954e-07, | |
"loss": 0.3703, | |
"step": 878 | |
}, | |
{ | |
"epoch": 2.6653164556962023, | |
"grad_norm": 0.14898777719114867, | |
"learning_rate": 3.6055486351745327e-07, | |
"loss": 0.3452, | |
"step": 879 | |
}, | |
{ | |
"epoch": 2.668354430379747, | |
"grad_norm": 0.13807909580315986, | |
"learning_rate": 3.539883980928005e-07, | |
"loss": 0.3687, | |
"step": 880 | |
}, | |
{ | |
"epoch": 2.6713924050632913, | |
"grad_norm": 0.14897676417163686, | |
"learning_rate": 3.4748008319459457e-07, | |
"loss": 0.3458, | |
"step": 881 | |
}, | |
{ | |
"epoch": 2.6744303797468354, | |
"grad_norm": 0.14839136336747957, | |
"learning_rate": 3.410300002823691e-07, | |
"loss": 0.3541, | |
"step": 882 | |
}, | |
{ | |
"epoch": 2.67746835443038, | |
"grad_norm": 0.1376194355310099, | |
"learning_rate": 3.346382300868134e-07, | |
"loss": 0.333, | |
"step": 883 | |
}, | |
{ | |
"epoch": 2.680506329113924, | |
"grad_norm": 0.14122555818361973, | |
"learning_rate": 3.2830485260876064e-07, | |
"loss": 0.3982, | |
"step": 884 | |
}, | |
{ | |
"epoch": 2.6835443037974684, | |
"grad_norm": 0.14968124197902433, | |
"learning_rate": 3.220299471181898e-07, | |
"loss": 0.3537, | |
"step": 885 | |
}, | |
{ | |
"epoch": 2.6865822784810125, | |
"grad_norm": 0.14116097579071474, | |
"learning_rate": 3.158135921532268e-07, | |
"loss": 0.3525, | |
"step": 886 | |
}, | |
{ | |
"epoch": 2.689620253164557, | |
"grad_norm": 0.1399951176094029, | |
"learning_rate": 3.096558655191706e-07, | |
"loss": 0.3301, | |
"step": 887 | |
}, | |
{ | |
"epoch": 2.692658227848101, | |
"grad_norm": 0.14786696698824425, | |
"learning_rate": 3.035568442875136e-07, | |
"loss": 0.3686, | |
"step": 888 | |
}, | |
{ | |
"epoch": 2.6956962025316455, | |
"grad_norm": 0.13574997006805542, | |
"learning_rate": 2.9751660479497737e-07, | |
"loss": 0.3742, | |
"step": 889 | |
}, | |
{ | |
"epoch": 2.69873417721519, | |
"grad_norm": 0.14162583087805647, | |
"learning_rate": 2.915352226425583e-07, | |
"loss": 0.3502, | |
"step": 890 | |
}, | |
{ | |
"epoch": 2.701772151898734, | |
"grad_norm": 0.1404375267715342, | |
"learning_rate": 2.85612772694579e-07, | |
"loss": 0.3567, | |
"step": 891 | |
}, | |
{ | |
"epoch": 2.7048101265822786, | |
"grad_norm": 0.1418915799031849, | |
"learning_rate": 2.7974932907775863e-07, | |
"loss": 0.349, | |
"step": 892 | |
}, | |
{ | |
"epoch": 2.7078481012658226, | |
"grad_norm": 0.14563236272596766, | |
"learning_rate": 2.739449651802756e-07, | |
"loss": 0.3903, | |
"step": 893 | |
}, | |
{ | |
"epoch": 2.710886075949367, | |
"grad_norm": 0.1416930939009952, | |
"learning_rate": 2.6819975365085237e-07, | |
"loss": 0.3742, | |
"step": 894 | |
}, | |
{ | |
"epoch": 2.7139240506329116, | |
"grad_norm": 0.14797637210969025, | |
"learning_rate": 2.6251376639785163e-07, | |
"loss": 0.3779, | |
"step": 895 | |
}, | |
{ | |
"epoch": 2.7169620253164557, | |
"grad_norm": 0.14863207599810754, | |
"learning_rate": 2.5688707458836724e-07, | |
"loss": 0.3827, | |
"step": 896 | |
}, | |
{ | |
"epoch": 2.7199999999999998, | |
"grad_norm": 0.14561232207790262, | |
"learning_rate": 2.5131974864734063e-07, | |
"loss": 0.3531, | |
"step": 897 | |
}, | |
{ | |
"epoch": 2.7230379746835442, | |
"grad_norm": 0.14766046853660647, | |
"learning_rate": 2.45811858256676e-07, | |
"loss": 0.3312, | |
"step": 898 | |
}, | |
{ | |
"epoch": 2.7260759493670887, | |
"grad_norm": 0.14177051091887588, | |
"learning_rate": 2.403634723543674e-07, | |
"loss": 0.3362, | |
"step": 899 | |
}, | |
{ | |
"epoch": 2.729113924050633, | |
"grad_norm": 0.14501857620086245, | |
"learning_rate": 2.3497465913364047e-07, | |
"loss": 0.354, | |
"step": 900 | |
}, | |
{ | |
"epoch": 2.7321518987341773, | |
"grad_norm": 0.14566914555210908, | |
"learning_rate": 2.2964548604209214e-07, | |
"loss": 0.367, | |
"step": 901 | |
}, | |
{ | |
"epoch": 2.7351898734177214, | |
"grad_norm": 0.14227637192680714, | |
"learning_rate": 2.2437601978085144e-07, | |
"loss": 0.3646, | |
"step": 902 | |
}, | |
{ | |
"epoch": 2.738227848101266, | |
"grad_norm": 0.13705012394654958, | |
"learning_rate": 2.1916632630374579e-07, | |
"loss": 0.3791, | |
"step": 903 | |
}, | |
{ | |
"epoch": 2.7412658227848103, | |
"grad_norm": 0.14332007524431387, | |
"learning_rate": 2.1401647081646825e-07, | |
"loss": 0.3759, | |
"step": 904 | |
}, | |
{ | |
"epoch": 2.7443037974683544, | |
"grad_norm": 0.14995105596897376, | |
"learning_rate": 2.0892651777577045e-07, | |
"loss": 0.3587, | |
"step": 905 | |
}, | |
{ | |
"epoch": 2.747341772151899, | |
"grad_norm": 0.1322865348126154, | |
"learning_rate": 2.0389653088865035e-07, | |
"loss": 0.3266, | |
"step": 906 | |
}, | |
{ | |
"epoch": 2.750379746835443, | |
"grad_norm": 0.13702889779019603, | |
"learning_rate": 1.989265731115525e-07, | |
"loss": 0.3754, | |
"step": 907 | |
}, | |
{ | |
"epoch": 2.7534177215189874, | |
"grad_norm": 0.13943316795835564, | |
"learning_rate": 1.940167066495896e-07, | |
"loss": 0.3391, | |
"step": 908 | |
}, | |
{ | |
"epoch": 2.7564556962025315, | |
"grad_norm": 0.13964099607089664, | |
"learning_rate": 1.8916699295575324e-07, | |
"loss": 0.3519, | |
"step": 909 | |
}, | |
{ | |
"epoch": 2.759493670886076, | |
"grad_norm": 0.13394426066716283, | |
"learning_rate": 1.8437749273015116e-07, | |
"loss": 0.3701, | |
"step": 910 | |
}, | |
{ | |
"epoch": 2.76253164556962, | |
"grad_norm": 0.1542873317357114, | |
"learning_rate": 1.7964826591924722e-07, | |
"loss": 0.3672, | |
"step": 911 | |
}, | |
{ | |
"epoch": 2.7655696202531646, | |
"grad_norm": 0.13601394591415503, | |
"learning_rate": 1.749793717151055e-07, | |
"loss": 0.3794, | |
"step": 912 | |
}, | |
{ | |
"epoch": 2.768607594936709, | |
"grad_norm": 0.149333961320097, | |
"learning_rate": 1.7037086855465902e-07, | |
"loss": 0.3517, | |
"step": 913 | |
}, | |
{ | |
"epoch": 2.771645569620253, | |
"grad_norm": 0.1439570881143369, | |
"learning_rate": 1.6582281411896827e-07, | |
"loss": 0.369, | |
"step": 914 | |
}, | |
{ | |
"epoch": 2.7746835443037976, | |
"grad_norm": 0.1338348522243044, | |
"learning_rate": 1.6133526533250566e-07, | |
"loss": 0.3306, | |
"step": 915 | |
}, | |
{ | |
"epoch": 2.7777215189873417, | |
"grad_norm": 0.14056655146374267, | |
"learning_rate": 1.5690827836244317e-07, | |
"loss": 0.3666, | |
"step": 916 | |
}, | |
{ | |
"epoch": 2.780759493670886, | |
"grad_norm": 0.14694930084022878, | |
"learning_rate": 1.5254190861794415e-07, | |
"loss": 0.3685, | |
"step": 917 | |
}, | |
{ | |
"epoch": 2.7837974683544306, | |
"grad_norm": 0.13942952495633854, | |
"learning_rate": 1.4823621074947503e-07, | |
"loss": 0.3679, | |
"step": 918 | |
}, | |
{ | |
"epoch": 2.7868354430379747, | |
"grad_norm": 0.15093468034751636, | |
"learning_rate": 1.4399123864811904e-07, | |
"loss": 0.3791, | |
"step": 919 | |
}, | |
{ | |
"epoch": 2.7898734177215188, | |
"grad_norm": 0.14746517550482235, | |
"learning_rate": 1.398070454449013e-07, | |
"loss": 0.3986, | |
"step": 920 | |
}, | |
{ | |
"epoch": 2.7929113924050633, | |
"grad_norm": 0.13795572431759273, | |
"learning_rate": 1.3568368351012718e-07, | |
"loss": 0.3442, | |
"step": 921 | |
}, | |
{ | |
"epoch": 2.7959493670886078, | |
"grad_norm": 0.14934161196335263, | |
"learning_rate": 1.3162120445272096e-07, | |
"loss": 0.3431, | |
"step": 922 | |
}, | |
{ | |
"epoch": 2.798987341772152, | |
"grad_norm": 0.13830591712832402, | |
"learning_rate": 1.2761965911958385e-07, | |
"loss": 0.3657, | |
"step": 923 | |
}, | |
{ | |
"epoch": 2.8020253164556963, | |
"grad_norm": 0.14185016126182032, | |
"learning_rate": 1.236790975949592e-07, | |
"loss": 0.3971, | |
"step": 924 | |
}, | |
{ | |
"epoch": 2.8050632911392404, | |
"grad_norm": 0.14893734927509886, | |
"learning_rate": 1.1979956919979996e-07, | |
"loss": 0.3496, | |
"step": 925 | |
}, | |
{ | |
"epoch": 2.808101265822785, | |
"grad_norm": 0.14717788380695743, | |
"learning_rate": 1.1598112249115723e-07, | |
"loss": 0.3702, | |
"step": 926 | |
}, | |
{ | |
"epoch": 2.8111392405063294, | |
"grad_norm": 0.15069631565588076, | |
"learning_rate": 1.1222380526156929e-07, | |
"loss": 0.3706, | |
"step": 927 | |
}, | |
{ | |
"epoch": 2.8141772151898734, | |
"grad_norm": 0.1333471123136446, | |
"learning_rate": 1.0852766453846308e-07, | |
"loss": 0.352, | |
"step": 928 | |
}, | |
{ | |
"epoch": 2.8172151898734175, | |
"grad_norm": 0.15098682346178977, | |
"learning_rate": 1.0489274658356808e-07, | |
"loss": 0.3641, | |
"step": 929 | |
}, | |
{ | |
"epoch": 2.820253164556962, | |
"grad_norm": 0.14501492746112243, | |
"learning_rate": 1.0131909689233444e-07, | |
"loss": 0.3647, | |
"step": 930 | |
}, | |
{ | |
"epoch": 2.8232911392405065, | |
"grad_norm": 0.1425701216690938, | |
"learning_rate": 9.780676019336632e-08, | |
"loss": 0.3441, | |
"step": 931 | |
}, | |
{ | |
"epoch": 2.8263291139240505, | |
"grad_norm": 0.13992220457173968, | |
"learning_rate": 9.435578044786009e-08, | |
"loss": 0.3446, | |
"step": 932 | |
}, | |
{ | |
"epoch": 2.829367088607595, | |
"grad_norm": 0.13753537015830755, | |
"learning_rate": 9.096620084905472e-08, | |
"loss": 0.3222, | |
"step": 933 | |
}, | |
{ | |
"epoch": 2.832405063291139, | |
"grad_norm": 0.1433818687736568, | |
"learning_rate": 8.763806382169005e-08, | |
"loss": 0.3723, | |
"step": 934 | |
}, | |
{ | |
"epoch": 2.8354430379746836, | |
"grad_norm": 0.14635658707240395, | |
"learning_rate": 8.437141102147883e-08, | |
"loss": 0.3519, | |
"step": 935 | |
}, | |
{ | |
"epoch": 2.838481012658228, | |
"grad_norm": 0.14341707235390178, | |
"learning_rate": 8.11662833345822e-08, | |
"loss": 0.3735, | |
"step": 936 | |
}, | |
{ | |
"epoch": 2.841518987341772, | |
"grad_norm": 0.14428097001730178, | |
"learning_rate": 7.802272087709951e-08, | |
"loss": 0.3435, | |
"step": 937 | |
}, | |
{ | |
"epoch": 2.8445569620253166, | |
"grad_norm": 0.13539837299939841, | |
"learning_rate": 7.494076299456531e-08, | |
"loss": 0.3514, | |
"step": 938 | |
}, | |
{ | |
"epoch": 2.8475949367088607, | |
"grad_norm": 0.13340280244063915, | |
"learning_rate": 7.192044826145772e-08, | |
"loss": 0.3121, | |
"step": 939 | |
}, | |
{ | |
"epoch": 2.850632911392405, | |
"grad_norm": 0.13775709740053435, | |
"learning_rate": 6.896181448071582e-08, | |
"loss": 0.3592, | |
"step": 940 | |
}, | |
{ | |
"epoch": 2.853670886075949, | |
"grad_norm": 0.13370052932506574, | |
"learning_rate": 6.606489868326571e-08, | |
"loss": 0.3928, | |
"step": 941 | |
}, | |
{ | |
"epoch": 2.8567088607594937, | |
"grad_norm": 0.13858250025209845, | |
"learning_rate": 6.322973712755698e-08, | |
"loss": 0.3548, | |
"step": 942 | |
}, | |
{ | |
"epoch": 2.8597468354430378, | |
"grad_norm": 0.14100466864912514, | |
"learning_rate": 6.045636529911025e-08, | |
"loss": 0.3814, | |
"step": 943 | |
}, | |
{ | |
"epoch": 2.8627848101265823, | |
"grad_norm": 0.13927977406099926, | |
"learning_rate": 5.7744817910069804e-08, | |
"loss": 0.3571, | |
"step": 944 | |
}, | |
{ | |
"epoch": 2.8658227848101268, | |
"grad_norm": 0.14225333857138436, | |
"learning_rate": 5.509512889877333e-08, | |
"loss": 0.3688, | |
"step": 945 | |
}, | |
{ | |
"epoch": 2.868860759493671, | |
"grad_norm": 0.14509348324717275, | |
"learning_rate": 5.250733142932562e-08, | |
"loss": 0.3615, | |
"step": 946 | |
}, | |
{ | |
"epoch": 2.8718987341772153, | |
"grad_norm": 0.1441834903168049, | |
"learning_rate": 4.998145789118114e-08, | |
"loss": 0.3745, | |
"step": 947 | |
}, | |
{ | |
"epoch": 2.8749367088607594, | |
"grad_norm": 0.1522154692938043, | |
"learning_rate": 4.751753989874153e-08, | |
"loss": 0.3734, | |
"step": 948 | |
}, | |
{ | |
"epoch": 2.877974683544304, | |
"grad_norm": 0.14299093831394943, | |
"learning_rate": 4.511560829095818e-08, | |
"loss": 0.3886, | |
"step": 949 | |
}, | |
{ | |
"epoch": 2.8810126582278484, | |
"grad_norm": 0.15119476391129122, | |
"learning_rate": 4.2775693130948094e-08, | |
"loss": 0.369, | |
"step": 950 | |
}, | |
{ | |
"epoch": 2.8840506329113924, | |
"grad_norm": 0.14903127814300893, | |
"learning_rate": 4.0497823705615836e-08, | |
"loss": 0.3722, | |
"step": 951 | |
}, | |
{ | |
"epoch": 2.8870886075949365, | |
"grad_norm": 0.15050927595878638, | |
"learning_rate": 3.828202852528717e-08, | |
"loss": 0.4036, | |
"step": 952 | |
}, | |
{ | |
"epoch": 2.890126582278481, | |
"grad_norm": 0.1462541853521983, | |
"learning_rate": 3.6128335323353804e-08, | |
"loss": 0.357, | |
"step": 953 | |
}, | |
{ | |
"epoch": 2.8931645569620255, | |
"grad_norm": 0.14359248091321075, | |
"learning_rate": 3.4036771055923066e-08, | |
"loss": 0.3395, | |
"step": 954 | |
}, | |
{ | |
"epoch": 2.8962025316455695, | |
"grad_norm": 0.14417757378782878, | |
"learning_rate": 3.2007361901485455e-08, | |
"loss": 0.3599, | |
"step": 955 | |
}, | |
{ | |
"epoch": 2.899240506329114, | |
"grad_norm": 0.14247102359863195, | |
"learning_rate": 3.004013326058153e-08, | |
"loss": 0.3442, | |
"step": 956 | |
}, | |
{ | |
"epoch": 2.902278481012658, | |
"grad_norm": 0.13709282238228418, | |
"learning_rate": 2.8135109755487723e-08, | |
"loss": 0.3578, | |
"step": 957 | |
}, | |
{ | |
"epoch": 2.9053164556962026, | |
"grad_norm": 0.14387482380283953, | |
"learning_rate": 2.629231522990716e-08, | |
"loss": 0.3592, | |
"step": 958 | |
}, | |
{ | |
"epoch": 2.908354430379747, | |
"grad_norm": 0.14514780847876588, | |
"learning_rate": 2.4511772748669894e-08, | |
"loss": 0.3794, | |
"step": 959 | |
}, | |
{ | |
"epoch": 2.911392405063291, | |
"grad_norm": 0.14241431855080486, | |
"learning_rate": 2.2793504597447003e-08, | |
"loss": 0.3675, | |
"step": 960 | |
}, | |
{ | |
"epoch": 2.9144303797468356, | |
"grad_norm": 0.13736916697690793, | |
"learning_rate": 2.1137532282469176e-08, | |
"loss": 0.3779, | |
"step": 961 | |
}, | |
{ | |
"epoch": 2.9174683544303797, | |
"grad_norm": 0.1448670614839553, | |
"learning_rate": 1.954387653025802e-08, | |
"loss": 0.3528, | |
"step": 962 | |
}, | |
{ | |
"epoch": 2.920506329113924, | |
"grad_norm": 0.13097811503034512, | |
"learning_rate": 1.8012557287367394e-08, | |
"loss": 0.3328, | |
"step": 963 | |
}, | |
{ | |
"epoch": 2.923544303797468, | |
"grad_norm": 0.13561749309077153, | |
"learning_rate": 1.6543593720134142e-08, | |
"loss": 0.3386, | |
"step": 964 | |
}, | |
{ | |
"epoch": 2.9265822784810127, | |
"grad_norm": 0.14165891512714415, | |
"learning_rate": 1.513700421443609e-08, | |
"loss": 0.3546, | |
"step": 965 | |
}, | |
{ | |
"epoch": 2.9296202531645568, | |
"grad_norm": 0.1433081917389733, | |
"learning_rate": 1.379280637546443e-08, | |
"loss": 0.3745, | |
"step": 966 | |
}, | |
{ | |
"epoch": 2.9326582278481013, | |
"grad_norm": 0.14293035691840986, | |
"learning_rate": 1.2511017027501682e-08, | |
"loss": 0.3385, | |
"step": 967 | |
}, | |
{ | |
"epoch": 2.9356962025316458, | |
"grad_norm": 0.1403000487949085, | |
"learning_rate": 1.1291652213710758e-08, | |
"loss": 0.3567, | |
"step": 968 | |
}, | |
{ | |
"epoch": 2.93873417721519, | |
"grad_norm": 0.14281092893489616, | |
"learning_rate": 1.0134727195937332e-08, | |
"loss": 0.3563, | |
"step": 969 | |
}, | |
{ | |
"epoch": 2.9417721518987343, | |
"grad_norm": 0.13034908498073247, | |
"learning_rate": 9.04025645451445e-09, | |
"loss": 0.3518, | |
"step": 970 | |
}, | |
{ | |
"epoch": 2.9448101265822784, | |
"grad_norm": 0.14582338155809607, | |
"learning_rate": 8.008253688084888e-09, | |
"loss": 0.3382, | |
"step": 971 | |
}, | |
{ | |
"epoch": 2.947848101265823, | |
"grad_norm": 0.199628396153666, | |
"learning_rate": 7.038731813426292e-09, | |
"loss": 0.343, | |
"step": 972 | |
}, | |
{ | |
"epoch": 2.9508860759493674, | |
"grad_norm": 0.14061669479319516, | |
"learning_rate": 6.1317029652929734e-09, | |
"loss": 0.3689, | |
"step": 973 | |
}, | |
{ | |
"epoch": 2.9539240506329114, | |
"grad_norm": 0.14376239008080066, | |
"learning_rate": 5.2871784962627015e-09, | |
"loss": 0.3632, | |
"step": 974 | |
}, | |
{ | |
"epoch": 2.9569620253164555, | |
"grad_norm": 0.13594297376826525, | |
"learning_rate": 4.505168976592922e-09, | |
"loss": 0.3553, | |
"step": 975 | |
}, | |
{ | |
"epoch": 2.96, | |
"grad_norm": 0.14081835098065112, | |
"learning_rate": 3.785684194090866e-09, | |
"loss": 0.3531, | |
"step": 976 | |
}, | |
{ | |
"epoch": 2.9630379746835445, | |
"grad_norm": 0.1424973642953699, | |
"learning_rate": 3.1287331539903155e-09, | |
"loss": 0.367, | |
"step": 977 | |
}, | |
{ | |
"epoch": 2.9660759493670885, | |
"grad_norm": 0.1397337473486439, | |
"learning_rate": 2.534324078837802e-09, | |
"loss": 0.3834, | |
"step": 978 | |
}, | |
{ | |
"epoch": 2.969113924050633, | |
"grad_norm": 0.14766955506816515, | |
"learning_rate": 2.002464408392135e-09, | |
"loss": 0.3392, | |
"step": 979 | |
}, | |
{ | |
"epoch": 2.972151898734177, | |
"grad_norm": 0.13454940932320703, | |
"learning_rate": 1.5331607995267006e-09, | |
"loss": 0.3583, | |
"step": 980 | |
}, | |
{ | |
"epoch": 2.9751898734177216, | |
"grad_norm": 0.13195240467317917, | |
"learning_rate": 1.1264191261528557e-09, | |
"loss": 0.3256, | |
"step": 981 | |
}, | |
{ | |
"epoch": 2.978227848101266, | |
"grad_norm": 0.1388110702511361, | |
"learning_rate": 7.82244479139993e-10, | |
"loss": 0.3765, | |
"step": 982 | |
}, | |
{ | |
"epoch": 2.98126582278481, | |
"grad_norm": 0.14580240061434538, | |
"learning_rate": 5.006411662555888e-10, | |
"loss": 0.368, | |
"step": 983 | |
}, | |
{ | |
"epoch": 2.984303797468354, | |
"grad_norm": 0.14285610722978442, | |
"learning_rate": 2.816127121102463e-10, | |
"loss": 0.3497, | |
"step": 984 | |
}, | |
{ | |
"epoch": 2.9873417721518987, | |
"grad_norm": 0.14237084088208307, | |
"learning_rate": 1.251618581127323e-10, | |
"loss": 0.3527, | |
"step": 985 | |
}, | |
{ | |
"epoch": 2.990379746835443, | |
"grad_norm": 0.13991973912714595, | |
"learning_rate": 3.129056243833528e-11, | |
"loss": 0.3807, | |
"step": 986 | |
}, | |
{ | |
"epoch": 2.993417721518987, | |
"grad_norm": 0.1445451641629666, | |
"learning_rate": 0.0, | |
"loss": 0.3581, | |
"step": 987 | |
}, | |
{ | |
"epoch": 2.993417721518987, | |
"step": 987, | |
"total_flos": 2.6892487337023898e+17, | |
"train_loss": 0.5692941358930071, | |
"train_runtime": 10205.646, | |
"train_samples_per_second": 4.644, | |
"train_steps_per_second": 0.097 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 987, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 400, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 2.6892487337023898e+17, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |