{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 17937, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 26.36898422241211, "learning_rate": 3.710575139146568e-08, "loss": 2.9094, "step": 1 }, { "epoch": 0.0, "grad_norm": 24.845380783081055, "learning_rate": 7.421150278293136e-08, "loss": 2.8363, "step": 2 }, { "epoch": 0.0, "grad_norm": 25.9056453704834, "learning_rate": 1.1131725417439704e-07, "loss": 2.8137, "step": 3 }, { "epoch": 0.0, "grad_norm": 25.595260620117188, "learning_rate": 1.484230055658627e-07, "loss": 2.8547, "step": 4 }, { "epoch": 0.0, "grad_norm": 25.397111892700195, "learning_rate": 1.855287569573284e-07, "loss": 2.8158, "step": 5 }, { "epoch": 0.0, "grad_norm": 25.379898071289062, "learning_rate": 2.2263450834879408e-07, "loss": 2.8278, "step": 6 }, { "epoch": 0.0, "grad_norm": 27.134302139282227, "learning_rate": 2.597402597402598e-07, "loss": 2.8172, "step": 7 }, { "epoch": 0.0, "grad_norm": 25.14109992980957, "learning_rate": 2.968460111317254e-07, "loss": 2.7191, "step": 8 }, { "epoch": 0.0, "grad_norm": 25.103017807006836, "learning_rate": 3.339517625231911e-07, "loss": 2.7424, "step": 9 }, { "epoch": 0.0, "grad_norm": 25.53138542175293, "learning_rate": 3.710575139146568e-07, "loss": 2.7803, "step": 10 }, { "epoch": 0.0, "grad_norm": 23.238914489746094, "learning_rate": 4.0816326530612243e-07, "loss": 2.6874, "step": 11 }, { "epoch": 0.0, "grad_norm": 25.84081268310547, "learning_rate": 4.4526901669758817e-07, "loss": 2.8388, "step": 12 }, { "epoch": 0.0, "grad_norm": 25.290952682495117, "learning_rate": 4.823747680890538e-07, "loss": 2.6801, "step": 13 }, { "epoch": 0.0, "grad_norm": 24.379024505615234, "learning_rate": 5.194805194805196e-07, "loss": 2.7311, "step": 14 }, { "epoch": 0.0, "grad_norm": 23.573135375976562, "learning_rate": 5.565862708719852e-07, "loss": 2.646, "step": 15 }, { "epoch": 0.0, "grad_norm": 25.4812068939209, "learning_rate": 5.936920222634509e-07, "loss": 2.7684, "step": 16 }, { "epoch": 0.0, "grad_norm": 24.524606704711914, "learning_rate": 6.307977736549166e-07, "loss": 2.6908, "step": 17 }, { "epoch": 0.0, "grad_norm": 22.298660278320312, "learning_rate": 6.679035250463822e-07, "loss": 2.5847, "step": 18 }, { "epoch": 0.0, "grad_norm": 21.074413299560547, "learning_rate": 7.050092764378479e-07, "loss": 2.496, "step": 19 }, { "epoch": 0.0, "grad_norm": 22.54572868347168, "learning_rate": 7.421150278293136e-07, "loss": 2.5362, "step": 20 }, { "epoch": 0.0, "grad_norm": 21.144603729248047, "learning_rate": 7.792207792207792e-07, "loss": 2.4789, "step": 21 }, { "epoch": 0.0, "grad_norm": 20.659284591674805, "learning_rate": 8.163265306122449e-07, "loss": 2.5046, "step": 22 }, { "epoch": 0.0, "grad_norm": 17.256105422973633, "learning_rate": 8.534322820037107e-07, "loss": 2.1384, "step": 23 }, { "epoch": 0.0, "grad_norm": 16.564172744750977, "learning_rate": 8.905380333951763e-07, "loss": 2.1986, "step": 24 }, { "epoch": 0.0, "grad_norm": 16.546611785888672, "learning_rate": 9.27643784786642e-07, "loss": 2.157, "step": 25 }, { "epoch": 0.0, "grad_norm": 14.055990219116211, "learning_rate": 9.647495361781077e-07, "loss": 2.1711, "step": 26 }, { "epoch": 0.0, "grad_norm": 14.929924964904785, "learning_rate": 1.0018552875695733e-06, "loss": 2.1697, "step": 27 }, { "epoch": 0.0, "grad_norm": 12.344754219055176, "learning_rate": 1.0389610389610392e-06, "loss": 2.0868, "step": 28 }, { "epoch": 0.0, "grad_norm": 12.70649242401123, "learning_rate": 1.0760667903525048e-06, "loss": 2.0683, "step": 29 }, { "epoch": 0.01, "grad_norm": 12.578877449035645, "learning_rate": 1.1131725417439704e-06, "loss": 2.0, "step": 30 }, { "epoch": 0.01, "grad_norm": 10.547303199768066, "learning_rate": 1.150278293135436e-06, "loss": 1.8951, "step": 31 }, { "epoch": 0.01, "grad_norm": 8.17114543914795, "learning_rate": 1.1873840445269017e-06, "loss": 1.7134, "step": 32 }, { "epoch": 0.01, "grad_norm": 7.669851303100586, "learning_rate": 1.2244897959183673e-06, "loss": 1.668, "step": 33 }, { "epoch": 0.01, "grad_norm": 8.15913200378418, "learning_rate": 1.2615955473098332e-06, "loss": 1.6547, "step": 34 }, { "epoch": 0.01, "grad_norm": 7.702418327331543, "learning_rate": 1.2987012987012986e-06, "loss": 1.6841, "step": 35 }, { "epoch": 0.01, "grad_norm": 7.635618686676025, "learning_rate": 1.3358070500927644e-06, "loss": 1.6283, "step": 36 }, { "epoch": 0.01, "grad_norm": 6.818296909332275, "learning_rate": 1.3729128014842303e-06, "loss": 1.5912, "step": 37 }, { "epoch": 0.01, "grad_norm": 6.805778503417969, "learning_rate": 1.4100185528756957e-06, "loss": 1.6128, "step": 38 }, { "epoch": 0.01, "grad_norm": 6.981280326843262, "learning_rate": 1.4471243042671615e-06, "loss": 1.652, "step": 39 }, { "epoch": 0.01, "grad_norm": 6.4375319480896, "learning_rate": 1.4842300556586272e-06, "loss": 1.5129, "step": 40 }, { "epoch": 0.01, "grad_norm": 6.047679424285889, "learning_rate": 1.5213358070500928e-06, "loss": 1.5329, "step": 41 }, { "epoch": 0.01, "grad_norm": 6.0779218673706055, "learning_rate": 1.5584415584415584e-06, "loss": 1.4907, "step": 42 }, { "epoch": 0.01, "grad_norm": 5.919124603271484, "learning_rate": 1.5955473098330243e-06, "loss": 1.4942, "step": 43 }, { "epoch": 0.01, "grad_norm": 5.191917419433594, "learning_rate": 1.6326530612244897e-06, "loss": 1.4102, "step": 44 }, { "epoch": 0.01, "grad_norm": 5.43426513671875, "learning_rate": 1.6697588126159556e-06, "loss": 1.4435, "step": 45 }, { "epoch": 0.01, "grad_norm": 5.035828113555908, "learning_rate": 1.7068645640074214e-06, "loss": 1.3202, "step": 46 }, { "epoch": 0.01, "grad_norm": 4.518633842468262, "learning_rate": 1.7439703153988868e-06, "loss": 1.2685, "step": 47 }, { "epoch": 0.01, "grad_norm": 4.953000068664551, "learning_rate": 1.7810760667903527e-06, "loss": 1.3069, "step": 48 }, { "epoch": 0.01, "grad_norm": 4.549315452575684, "learning_rate": 1.8181818181818183e-06, "loss": 1.2642, "step": 49 }, { "epoch": 0.01, "grad_norm": 4.134478569030762, "learning_rate": 1.855287569573284e-06, "loss": 1.2749, "step": 50 }, { "epoch": 0.01, "grad_norm": 4.211318492889404, "learning_rate": 1.8923933209647496e-06, "loss": 1.2538, "step": 51 }, { "epoch": 0.01, "grad_norm": 4.4607038497924805, "learning_rate": 1.9294990723562154e-06, "loss": 1.1883, "step": 52 }, { "epoch": 0.01, "grad_norm": 4.027714729309082, "learning_rate": 1.966604823747681e-06, "loss": 1.2141, "step": 53 }, { "epoch": 0.01, "grad_norm": 3.873331069946289, "learning_rate": 2.0037105751391467e-06, "loss": 1.204, "step": 54 }, { "epoch": 0.01, "grad_norm": 3.9548704624176025, "learning_rate": 2.0408163265306125e-06, "loss": 1.1591, "step": 55 }, { "epoch": 0.01, "grad_norm": 3.435513973236084, "learning_rate": 2.0779220779220784e-06, "loss": 1.1871, "step": 56 }, { "epoch": 0.01, "grad_norm": 3.7123570442199707, "learning_rate": 2.1150278293135438e-06, "loss": 1.1927, "step": 57 }, { "epoch": 0.01, "grad_norm": 3.7052199840545654, "learning_rate": 2.1521335807050096e-06, "loss": 1.1556, "step": 58 }, { "epoch": 0.01, "grad_norm": 3.5209603309631348, "learning_rate": 2.1892393320964755e-06, "loss": 1.1398, "step": 59 }, { "epoch": 0.01, "grad_norm": 3.456592559814453, "learning_rate": 2.226345083487941e-06, "loss": 1.1001, "step": 60 }, { "epoch": 0.01, "grad_norm": 3.473924160003662, "learning_rate": 2.2634508348794067e-06, "loss": 1.1132, "step": 61 }, { "epoch": 0.01, "grad_norm": 3.3541579246520996, "learning_rate": 2.300556586270872e-06, "loss": 1.0583, "step": 62 }, { "epoch": 0.01, "grad_norm": 3.559927225112915, "learning_rate": 2.337662337662338e-06, "loss": 1.0721, "step": 63 }, { "epoch": 0.01, "grad_norm": 3.690189838409424, "learning_rate": 2.3747680890538034e-06, "loss": 1.0521, "step": 64 }, { "epoch": 0.01, "grad_norm": 2.9963531494140625, "learning_rate": 2.4118738404452692e-06, "loss": 1.0718, "step": 65 }, { "epoch": 0.01, "grad_norm": 3.310349225997925, "learning_rate": 2.4489795918367347e-06, "loss": 1.0635, "step": 66 }, { "epoch": 0.01, "grad_norm": 3.169955253601074, "learning_rate": 2.4860853432282005e-06, "loss": 1.0291, "step": 67 }, { "epoch": 0.01, "grad_norm": 3.138657331466675, "learning_rate": 2.5231910946196664e-06, "loss": 1.0628, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.9926767349243164, "learning_rate": 2.560296846011132e-06, "loss": 1.0402, "step": 69 }, { "epoch": 0.01, "grad_norm": 2.9660251140594482, "learning_rate": 2.597402597402597e-06, "loss": 1.0523, "step": 70 }, { "epoch": 0.01, "grad_norm": 3.006776809692383, "learning_rate": 2.634508348794063e-06, "loss": 0.9967, "step": 71 }, { "epoch": 0.01, "grad_norm": 3.004180669784546, "learning_rate": 2.671614100185529e-06, "loss": 1.0321, "step": 72 }, { "epoch": 0.01, "grad_norm": 3.4546661376953125, "learning_rate": 2.7087198515769947e-06, "loss": 0.9647, "step": 73 }, { "epoch": 0.01, "grad_norm": 3.1099963188171387, "learning_rate": 2.7458256029684606e-06, "loss": 0.9558, "step": 74 }, { "epoch": 0.01, "grad_norm": 3.3496196269989014, "learning_rate": 2.7829313543599264e-06, "loss": 0.9683, "step": 75 }, { "epoch": 0.01, "grad_norm": 3.3693552017211914, "learning_rate": 2.8200371057513914e-06, "loss": 0.9848, "step": 76 }, { "epoch": 0.01, "grad_norm": 3.1295881271362305, "learning_rate": 2.8571428571428573e-06, "loss": 0.9264, "step": 77 }, { "epoch": 0.01, "grad_norm": 3.064664840698242, "learning_rate": 2.894248608534323e-06, "loss": 0.8981, "step": 78 }, { "epoch": 0.01, "grad_norm": 3.1708879470825195, "learning_rate": 2.931354359925789e-06, "loss": 0.9724, "step": 79 }, { "epoch": 0.01, "grad_norm": 3.340420961380005, "learning_rate": 2.9684601113172544e-06, "loss": 0.9477, "step": 80 }, { "epoch": 0.01, "grad_norm": 3.2692129611968994, "learning_rate": 3.00556586270872e-06, "loss": 0.9545, "step": 81 }, { "epoch": 0.01, "grad_norm": 3.885650396347046, "learning_rate": 3.0426716141001856e-06, "loss": 0.9237, "step": 82 }, { "epoch": 0.01, "grad_norm": 3.1491830348968506, "learning_rate": 3.0797773654916515e-06, "loss": 0.9061, "step": 83 }, { "epoch": 0.01, "grad_norm": 3.478410243988037, "learning_rate": 3.116883116883117e-06, "loss": 0.857, "step": 84 }, { "epoch": 0.01, "grad_norm": 3.6959283351898193, "learning_rate": 3.1539888682745827e-06, "loss": 0.9253, "step": 85 }, { "epoch": 0.01, "grad_norm": 3.5496273040771484, "learning_rate": 3.1910946196660486e-06, "loss": 0.8963, "step": 86 }, { "epoch": 0.01, "grad_norm": 3.2896344661712646, "learning_rate": 3.2282003710575144e-06, "loss": 0.8684, "step": 87 }, { "epoch": 0.01, "grad_norm": 2.9594266414642334, "learning_rate": 3.2653061224489794e-06, "loss": 0.8793, "step": 88 }, { "epoch": 0.01, "grad_norm": 3.226518154144287, "learning_rate": 3.3024118738404453e-06, "loss": 0.8872, "step": 89 }, { "epoch": 0.02, "grad_norm": 3.019383430480957, "learning_rate": 3.339517625231911e-06, "loss": 0.874, "step": 90 }, { "epoch": 0.02, "grad_norm": 3.057400941848755, "learning_rate": 3.376623376623377e-06, "loss": 0.8648, "step": 91 }, { "epoch": 0.02, "grad_norm": 3.389247417449951, "learning_rate": 3.413729128014843e-06, "loss": 0.8402, "step": 92 }, { "epoch": 0.02, "grad_norm": 2.8871963024139404, "learning_rate": 3.4508348794063086e-06, "loss": 0.8369, "step": 93 }, { "epoch": 0.02, "grad_norm": 3.092721700668335, "learning_rate": 3.4879406307977736e-06, "loss": 0.8547, "step": 94 }, { "epoch": 0.02, "grad_norm": 2.8793065547943115, "learning_rate": 3.5250463821892395e-06, "loss": 0.7818, "step": 95 }, { "epoch": 0.02, "grad_norm": 3.3874597549438477, "learning_rate": 3.5621521335807053e-06, "loss": 0.8669, "step": 96 }, { "epoch": 0.02, "grad_norm": 3.0211620330810547, "learning_rate": 3.599257884972171e-06, "loss": 0.8456, "step": 97 }, { "epoch": 0.02, "grad_norm": 2.873979091644287, "learning_rate": 3.6363636363636366e-06, "loss": 0.8344, "step": 98 }, { "epoch": 0.02, "grad_norm": 3.4776954650878906, "learning_rate": 3.6734693877551024e-06, "loss": 0.8374, "step": 99 }, { "epoch": 0.02, "grad_norm": 3.3004915714263916, "learning_rate": 3.710575139146568e-06, "loss": 0.8208, "step": 100 }, { "epoch": 0.02, "grad_norm": 3.0056676864624023, "learning_rate": 3.7476808905380337e-06, "loss": 0.8085, "step": 101 }, { "epoch": 0.02, "grad_norm": 3.637700080871582, "learning_rate": 3.784786641929499e-06, "loss": 0.8212, "step": 102 }, { "epoch": 0.02, "grad_norm": 3.0696957111358643, "learning_rate": 3.821892393320965e-06, "loss": 0.8165, "step": 103 }, { "epoch": 0.02, "grad_norm": 3.422537088394165, "learning_rate": 3.858998144712431e-06, "loss": 0.8195, "step": 104 }, { "epoch": 0.02, "grad_norm": 3.602252960205078, "learning_rate": 3.896103896103897e-06, "loss": 0.8368, "step": 105 }, { "epoch": 0.02, "grad_norm": 3.2943220138549805, "learning_rate": 3.933209647495362e-06, "loss": 0.8071, "step": 106 }, { "epoch": 0.02, "grad_norm": 3.089627742767334, "learning_rate": 3.9703153988868275e-06, "loss": 0.8014, "step": 107 }, { "epoch": 0.02, "grad_norm": 3.016984462738037, "learning_rate": 4.007421150278293e-06, "loss": 0.8044, "step": 108 }, { "epoch": 0.02, "grad_norm": 2.967430591583252, "learning_rate": 4.044526901669759e-06, "loss": 0.8099, "step": 109 }, { "epoch": 0.02, "grad_norm": 3.186495304107666, "learning_rate": 4.081632653061225e-06, "loss": 0.7774, "step": 110 }, { "epoch": 0.02, "grad_norm": 3.13470458984375, "learning_rate": 4.118738404452691e-06, "loss": 0.7415, "step": 111 }, { "epoch": 0.02, "grad_norm": 3.0605826377868652, "learning_rate": 4.155844155844157e-06, "loss": 0.7284, "step": 112 }, { "epoch": 0.02, "grad_norm": 3.25195574760437, "learning_rate": 4.192949907235622e-06, "loss": 0.7893, "step": 113 }, { "epoch": 0.02, "grad_norm": 3.232372522354126, "learning_rate": 4.2300556586270875e-06, "loss": 0.8151, "step": 114 }, { "epoch": 0.02, "grad_norm": 3.461376667022705, "learning_rate": 4.267161410018553e-06, "loss": 0.8207, "step": 115 }, { "epoch": 0.02, "grad_norm": 3.5259883403778076, "learning_rate": 4.304267161410019e-06, "loss": 0.7379, "step": 116 }, { "epoch": 0.02, "grad_norm": 3.5626771450042725, "learning_rate": 4.341372912801485e-06, "loss": 0.7461, "step": 117 }, { "epoch": 0.02, "grad_norm": 3.9845142364501953, "learning_rate": 4.378478664192951e-06, "loss": 0.7104, "step": 118 }, { "epoch": 0.02, "grad_norm": 3.7727012634277344, "learning_rate": 4.415584415584416e-06, "loss": 0.7838, "step": 119 }, { "epoch": 0.02, "grad_norm": 3.2760305404663086, "learning_rate": 4.452690166975882e-06, "loss": 0.7512, "step": 120 }, { "epoch": 0.02, "grad_norm": 3.433992385864258, "learning_rate": 4.489795918367348e-06, "loss": 0.7533, "step": 121 }, { "epoch": 0.02, "grad_norm": 3.4961535930633545, "learning_rate": 4.5269016697588134e-06, "loss": 0.6818, "step": 122 }, { "epoch": 0.02, "grad_norm": 3.2385573387145996, "learning_rate": 4.5640074211502784e-06, "loss": 0.785, "step": 123 }, { "epoch": 0.02, "grad_norm": 3.398406744003296, "learning_rate": 4.601113172541744e-06, "loss": 0.7565, "step": 124 }, { "epoch": 0.02, "grad_norm": 3.364015817642212, "learning_rate": 4.63821892393321e-06, "loss": 0.724, "step": 125 }, { "epoch": 0.02, "grad_norm": 3.583069324493408, "learning_rate": 4.675324675324676e-06, "loss": 0.7296, "step": 126 }, { "epoch": 0.02, "grad_norm": 3.5499331951141357, "learning_rate": 4.712430426716141e-06, "loss": 0.718, "step": 127 }, { "epoch": 0.02, "grad_norm": 3.4917266368865967, "learning_rate": 4.749536178107607e-06, "loss": 0.6848, "step": 128 }, { "epoch": 0.02, "grad_norm": 3.873094320297241, "learning_rate": 4.786641929499073e-06, "loss": 0.6975, "step": 129 }, { "epoch": 0.02, "grad_norm": 3.7955024242401123, "learning_rate": 4.8237476808905385e-06, "loss": 0.741, "step": 130 }, { "epoch": 0.02, "grad_norm": 3.571781635284424, "learning_rate": 4.8608534322820035e-06, "loss": 0.7562, "step": 131 }, { "epoch": 0.02, "grad_norm": 3.526510000228882, "learning_rate": 4.897959183673469e-06, "loss": 0.6569, "step": 132 }, { "epoch": 0.02, "grad_norm": 3.6155598163604736, "learning_rate": 4.935064935064935e-06, "loss": 0.7266, "step": 133 }, { "epoch": 0.02, "grad_norm": 3.302088975906372, "learning_rate": 4.972170686456401e-06, "loss": 0.6943, "step": 134 }, { "epoch": 0.02, "grad_norm": 3.564939022064209, "learning_rate": 5.009276437847866e-06, "loss": 0.6901, "step": 135 }, { "epoch": 0.02, "grad_norm": 3.6224265098571777, "learning_rate": 5.046382189239333e-06, "loss": 0.7162, "step": 136 }, { "epoch": 0.02, "grad_norm": 3.463250160217285, "learning_rate": 5.083487940630798e-06, "loss": 0.6869, "step": 137 }, { "epoch": 0.02, "grad_norm": 3.151534080505371, "learning_rate": 5.120593692022264e-06, "loss": 0.6971, "step": 138 }, { "epoch": 0.02, "grad_norm": 3.3097310066223145, "learning_rate": 5.157699443413729e-06, "loss": 0.6519, "step": 139 }, { "epoch": 0.02, "grad_norm": 3.831904649734497, "learning_rate": 5.194805194805194e-06, "loss": 0.717, "step": 140 }, { "epoch": 0.02, "grad_norm": 4.250148773193359, "learning_rate": 5.231910946196661e-06, "loss": 0.7328, "step": 141 }, { "epoch": 0.02, "grad_norm": 3.7305591106414795, "learning_rate": 5.269016697588126e-06, "loss": 0.6604, "step": 142 }, { "epoch": 0.02, "grad_norm": 3.300163507461548, "learning_rate": 5.306122448979593e-06, "loss": 0.635, "step": 143 }, { "epoch": 0.02, "grad_norm": 4.155348777770996, "learning_rate": 5.343228200371058e-06, "loss": 0.6484, "step": 144 }, { "epoch": 0.02, "grad_norm": 3.852877140045166, "learning_rate": 5.3803339517625245e-06, "loss": 0.6932, "step": 145 }, { "epoch": 0.02, "grad_norm": 3.8767776489257812, "learning_rate": 5.4174397031539895e-06, "loss": 0.7259, "step": 146 }, { "epoch": 0.02, "grad_norm": 3.9380528926849365, "learning_rate": 5.4545454545454545e-06, "loss": 0.6363, "step": 147 }, { "epoch": 0.02, "grad_norm": 3.83540415763855, "learning_rate": 5.491651205936921e-06, "loss": 0.6721, "step": 148 }, { "epoch": 0.02, "grad_norm": 3.592844247817993, "learning_rate": 5.528756957328386e-06, "loss": 0.6679, "step": 149 }, { "epoch": 0.03, "grad_norm": 3.279712200164795, "learning_rate": 5.565862708719853e-06, "loss": 0.6597, "step": 150 }, { "epoch": 0.03, "grad_norm": 3.4541358947753906, "learning_rate": 5.602968460111318e-06, "loss": 0.6294, "step": 151 }, { "epoch": 0.03, "grad_norm": 3.9233639240264893, "learning_rate": 5.640074211502783e-06, "loss": 0.6838, "step": 152 }, { "epoch": 0.03, "grad_norm": 3.9507896900177, "learning_rate": 5.6771799628942495e-06, "loss": 0.6927, "step": 153 }, { "epoch": 0.03, "grad_norm": 3.164034843444824, "learning_rate": 5.7142857142857145e-06, "loss": 0.6696, "step": 154 }, { "epoch": 0.03, "grad_norm": 3.766441822052002, "learning_rate": 5.75139146567718e-06, "loss": 0.652, "step": 155 }, { "epoch": 0.03, "grad_norm": 3.8711249828338623, "learning_rate": 5.788497217068646e-06, "loss": 0.651, "step": 156 }, { "epoch": 0.03, "grad_norm": 3.7884528636932373, "learning_rate": 5.825602968460112e-06, "loss": 0.6586, "step": 157 }, { "epoch": 0.03, "grad_norm": 3.780651807785034, "learning_rate": 5.862708719851578e-06, "loss": 0.6226, "step": 158 }, { "epoch": 0.03, "grad_norm": 3.3104965686798096, "learning_rate": 5.899814471243043e-06, "loss": 0.6358, "step": 159 }, { "epoch": 0.03, "grad_norm": 3.8461782932281494, "learning_rate": 5.936920222634509e-06, "loss": 0.6638, "step": 160 }, { "epoch": 0.03, "grad_norm": 4.39020299911499, "learning_rate": 5.9740259740259746e-06, "loss": 0.7141, "step": 161 }, { "epoch": 0.03, "grad_norm": 4.117135524749756, "learning_rate": 6.01113172541744e-06, "loss": 0.624, "step": 162 }, { "epoch": 0.03, "grad_norm": 3.75400447845459, "learning_rate": 6.048237476808905e-06, "loss": 0.6524, "step": 163 }, { "epoch": 0.03, "grad_norm": 4.3590922355651855, "learning_rate": 6.085343228200371e-06, "loss": 0.7081, "step": 164 }, { "epoch": 0.03, "grad_norm": 4.234223365783691, "learning_rate": 6.122448979591837e-06, "loss": 0.6704, "step": 165 }, { "epoch": 0.03, "grad_norm": 3.5579683780670166, "learning_rate": 6.159554730983303e-06, "loss": 0.6235, "step": 166 }, { "epoch": 0.03, "grad_norm": 3.7070491313934326, "learning_rate": 6.196660482374769e-06, "loss": 0.6331, "step": 167 }, { "epoch": 0.03, "grad_norm": 3.860041379928589, "learning_rate": 6.233766233766234e-06, "loss": 0.6304, "step": 168 }, { "epoch": 0.03, "grad_norm": 4.045529365539551, "learning_rate": 6.2708719851577005e-06, "loss": 0.5903, "step": 169 }, { "epoch": 0.03, "grad_norm": 3.6288185119628906, "learning_rate": 6.3079777365491655e-06, "loss": 0.6507, "step": 170 }, { "epoch": 0.03, "grad_norm": 4.324794292449951, "learning_rate": 6.345083487940631e-06, "loss": 0.6615, "step": 171 }, { "epoch": 0.03, "grad_norm": 3.594531536102295, "learning_rate": 6.382189239332097e-06, "loss": 0.6182, "step": 172 }, { "epoch": 0.03, "grad_norm": 4.320672035217285, "learning_rate": 6.419294990723562e-06, "loss": 0.6403, "step": 173 }, { "epoch": 0.03, "grad_norm": 3.416071653366089, "learning_rate": 6.456400742115029e-06, "loss": 0.6318, "step": 174 }, { "epoch": 0.03, "grad_norm": 3.714245319366455, "learning_rate": 6.493506493506494e-06, "loss": 0.5725, "step": 175 }, { "epoch": 0.03, "grad_norm": 3.1902523040771484, "learning_rate": 6.530612244897959e-06, "loss": 0.5802, "step": 176 }, { "epoch": 0.03, "grad_norm": 3.7266318798065186, "learning_rate": 6.5677179962894255e-06, "loss": 0.5989, "step": 177 }, { "epoch": 0.03, "grad_norm": 3.486886978149414, "learning_rate": 6.6048237476808905e-06, "loss": 0.5751, "step": 178 }, { "epoch": 0.03, "grad_norm": 3.5820086002349854, "learning_rate": 6.641929499072357e-06, "loss": 0.6121, "step": 179 }, { "epoch": 0.03, "grad_norm": 3.412262439727783, "learning_rate": 6.679035250463822e-06, "loss": 0.5851, "step": 180 }, { "epoch": 0.03, "grad_norm": 3.754873514175415, "learning_rate": 6.716141001855289e-06, "loss": 0.5766, "step": 181 }, { "epoch": 0.03, "grad_norm": 3.429985523223877, "learning_rate": 6.753246753246754e-06, "loss": 0.5882, "step": 182 }, { "epoch": 0.03, "grad_norm": 3.407564401626587, "learning_rate": 6.790352504638219e-06, "loss": 0.5857, "step": 183 }, { "epoch": 0.03, "grad_norm": 3.583744525909424, "learning_rate": 6.827458256029686e-06, "loss": 0.59, "step": 184 }, { "epoch": 0.03, "grad_norm": 3.9024229049682617, "learning_rate": 6.864564007421151e-06, "loss": 0.5531, "step": 185 }, { "epoch": 0.03, "grad_norm": 3.859156608581543, "learning_rate": 6.901669758812617e-06, "loss": 0.5965, "step": 186 }, { "epoch": 0.03, "grad_norm": 3.4908041954040527, "learning_rate": 6.938775510204082e-06, "loss": 0.5878, "step": 187 }, { "epoch": 0.03, "grad_norm": 3.9591829776763916, "learning_rate": 6.975881261595547e-06, "loss": 0.5753, "step": 188 }, { "epoch": 0.03, "grad_norm": 3.6867127418518066, "learning_rate": 7.012987012987014e-06, "loss": 0.571, "step": 189 }, { "epoch": 0.03, "grad_norm": 4.040102005004883, "learning_rate": 7.050092764378479e-06, "loss": 0.6605, "step": 190 }, { "epoch": 0.03, "grad_norm": 3.891875743865967, "learning_rate": 7.087198515769945e-06, "loss": 0.5555, "step": 191 }, { "epoch": 0.03, "grad_norm": 3.776078224182129, "learning_rate": 7.124304267161411e-06, "loss": 0.5986, "step": 192 }, { "epoch": 0.03, "grad_norm": 3.425426483154297, "learning_rate": 7.1614100185528765e-06, "loss": 0.5674, "step": 193 }, { "epoch": 0.03, "grad_norm": 3.983797788619995, "learning_rate": 7.198515769944342e-06, "loss": 0.6131, "step": 194 }, { "epoch": 0.03, "grad_norm": 3.9011480808258057, "learning_rate": 7.235621521335807e-06, "loss": 0.6162, "step": 195 }, { "epoch": 0.03, "grad_norm": 3.0688822269439697, "learning_rate": 7.272727272727273e-06, "loss": 0.5416, "step": 196 }, { "epoch": 0.03, "grad_norm": 4.23895263671875, "learning_rate": 7.309833024118739e-06, "loss": 0.5756, "step": 197 }, { "epoch": 0.03, "grad_norm": 3.7223408222198486, "learning_rate": 7.346938775510205e-06, "loss": 0.5816, "step": 198 }, { "epoch": 0.03, "grad_norm": 3.4912168979644775, "learning_rate": 7.384044526901671e-06, "loss": 0.5222, "step": 199 }, { "epoch": 0.03, "grad_norm": 3.1945841312408447, "learning_rate": 7.421150278293136e-06, "loss": 0.564, "step": 200 }, { "epoch": 0.03, "grad_norm": 3.1337621212005615, "learning_rate": 7.4582560296846015e-06, "loss": 0.528, "step": 201 }, { "epoch": 0.03, "grad_norm": 3.453765869140625, "learning_rate": 7.495361781076067e-06, "loss": 0.5472, "step": 202 }, { "epoch": 0.03, "grad_norm": 3.574963331222534, "learning_rate": 7.532467532467533e-06, "loss": 0.5531, "step": 203 }, { "epoch": 0.03, "grad_norm": 3.3074138164520264, "learning_rate": 7.569573283858998e-06, "loss": 0.5504, "step": 204 }, { "epoch": 0.03, "grad_norm": 4.012242317199707, "learning_rate": 7.606679035250465e-06, "loss": 0.5337, "step": 205 }, { "epoch": 0.03, "grad_norm": 3.7470946311950684, "learning_rate": 7.64378478664193e-06, "loss": 0.5132, "step": 206 }, { "epoch": 0.03, "grad_norm": 3.041616678237915, "learning_rate": 7.680890538033396e-06, "loss": 0.5023, "step": 207 }, { "epoch": 0.03, "grad_norm": 3.422178268432617, "learning_rate": 7.717996289424862e-06, "loss": 0.5323, "step": 208 }, { "epoch": 0.03, "grad_norm": 2.79860520362854, "learning_rate": 7.755102040816327e-06, "loss": 0.4838, "step": 209 }, { "epoch": 0.04, "grad_norm": 3.68025803565979, "learning_rate": 7.792207792207793e-06, "loss": 0.5557, "step": 210 }, { "epoch": 0.04, "grad_norm": 3.602224111557007, "learning_rate": 7.829313543599259e-06, "loss": 0.5245, "step": 211 }, { "epoch": 0.04, "grad_norm": 3.3612046241760254, "learning_rate": 7.866419294990723e-06, "loss": 0.5263, "step": 212 }, { "epoch": 0.04, "grad_norm": 3.1437251567840576, "learning_rate": 7.90352504638219e-06, "loss": 0.5206, "step": 213 }, { "epoch": 0.04, "grad_norm": 3.8981688022613525, "learning_rate": 7.940630797773655e-06, "loss": 0.5346, "step": 214 }, { "epoch": 0.04, "grad_norm": 3.5525870323181152, "learning_rate": 7.97773654916512e-06, "loss": 0.5068, "step": 215 }, { "epoch": 0.04, "grad_norm": 3.394861936569214, "learning_rate": 8.014842300556587e-06, "loss": 0.484, "step": 216 }, { "epoch": 0.04, "grad_norm": 3.482536792755127, "learning_rate": 8.051948051948052e-06, "loss": 0.5286, "step": 217 }, { "epoch": 0.04, "grad_norm": 3.539320707321167, "learning_rate": 8.089053803339518e-06, "loss": 0.5373, "step": 218 }, { "epoch": 0.04, "grad_norm": 4.05807638168335, "learning_rate": 8.126159554730984e-06, "loss": 0.5422, "step": 219 }, { "epoch": 0.04, "grad_norm": 3.7347044944763184, "learning_rate": 8.16326530612245e-06, "loss": 0.5574, "step": 220 }, { "epoch": 0.04, "grad_norm": 3.9466545581817627, "learning_rate": 8.200371057513916e-06, "loss": 0.5334, "step": 221 }, { "epoch": 0.04, "grad_norm": 3.778435230255127, "learning_rate": 8.237476808905382e-06, "loss": 0.49, "step": 222 }, { "epoch": 0.04, "grad_norm": 3.9007341861724854, "learning_rate": 8.274582560296846e-06, "loss": 0.4926, "step": 223 }, { "epoch": 0.04, "grad_norm": 3.200925588607788, "learning_rate": 8.311688311688313e-06, "loss": 0.5233, "step": 224 }, { "epoch": 0.04, "grad_norm": 3.220580577850342, "learning_rate": 8.348794063079778e-06, "loss": 0.4801, "step": 225 }, { "epoch": 0.04, "grad_norm": 3.3207619190216064, "learning_rate": 8.385899814471243e-06, "loss": 0.5097, "step": 226 }, { "epoch": 0.04, "grad_norm": 3.298804759979248, "learning_rate": 8.42300556586271e-06, "loss": 0.5027, "step": 227 }, { "epoch": 0.04, "grad_norm": 3.783217191696167, "learning_rate": 8.460111317254175e-06, "loss": 0.5672, "step": 228 }, { "epoch": 0.04, "grad_norm": 3.597342014312744, "learning_rate": 8.497217068645641e-06, "loss": 0.4934, "step": 229 }, { "epoch": 0.04, "grad_norm": 3.5125324726104736, "learning_rate": 8.534322820037107e-06, "loss": 0.4521, "step": 230 }, { "epoch": 0.04, "grad_norm": 3.4860289096832275, "learning_rate": 8.571428571428571e-06, "loss": 0.5064, "step": 231 }, { "epoch": 0.04, "grad_norm": 3.4228355884552, "learning_rate": 8.608534322820038e-06, "loss": 0.5261, "step": 232 }, { "epoch": 0.04, "grad_norm": 3.571007490158081, "learning_rate": 8.645640074211503e-06, "loss": 0.4904, "step": 233 }, { "epoch": 0.04, "grad_norm": 3.635704755783081, "learning_rate": 8.68274582560297e-06, "loss": 0.5082, "step": 234 }, { "epoch": 0.04, "grad_norm": 3.511043071746826, "learning_rate": 8.719851576994434e-06, "loss": 0.4948, "step": 235 }, { "epoch": 0.04, "grad_norm": 3.7647082805633545, "learning_rate": 8.756957328385902e-06, "loss": 0.4907, "step": 236 }, { "epoch": 0.04, "grad_norm": 2.9713709354400635, "learning_rate": 8.794063079777366e-06, "loss": 0.4463, "step": 237 }, { "epoch": 0.04, "grad_norm": 3.3980133533477783, "learning_rate": 8.831168831168832e-06, "loss": 0.5065, "step": 238 }, { "epoch": 0.04, "grad_norm": 3.788365364074707, "learning_rate": 8.868274582560298e-06, "loss": 0.5319, "step": 239 }, { "epoch": 0.04, "grad_norm": 3.5779755115509033, "learning_rate": 8.905380333951764e-06, "loss": 0.4977, "step": 240 }, { "epoch": 0.04, "grad_norm": 4.114449977874756, "learning_rate": 8.94248608534323e-06, "loss": 0.5366, "step": 241 }, { "epoch": 0.04, "grad_norm": 3.4340224266052246, "learning_rate": 8.979591836734695e-06, "loss": 0.5174, "step": 242 }, { "epoch": 0.04, "grad_norm": 3.2044870853424072, "learning_rate": 9.01669758812616e-06, "loss": 0.431, "step": 243 }, { "epoch": 0.04, "grad_norm": 3.3005502223968506, "learning_rate": 9.053803339517627e-06, "loss": 0.5189, "step": 244 }, { "epoch": 0.04, "grad_norm": 3.7972776889801025, "learning_rate": 9.090909090909091e-06, "loss": 0.5051, "step": 245 }, { "epoch": 0.04, "grad_norm": 3.482644557952881, "learning_rate": 9.128014842300557e-06, "loss": 0.5191, "step": 246 }, { "epoch": 0.04, "grad_norm": 3.772104263305664, "learning_rate": 9.165120593692023e-06, "loss": 0.5033, "step": 247 }, { "epoch": 0.04, "grad_norm": 4.047839641571045, "learning_rate": 9.202226345083489e-06, "loss": 0.5171, "step": 248 }, { "epoch": 0.04, "grad_norm": 4.193242073059082, "learning_rate": 9.239332096474954e-06, "loss": 0.5235, "step": 249 }, { "epoch": 0.04, "grad_norm": 3.4793667793273926, "learning_rate": 9.27643784786642e-06, "loss": 0.5071, "step": 250 }, { "epoch": 0.04, "grad_norm": 3.4483587741851807, "learning_rate": 9.313543599257886e-06, "loss": 0.5029, "step": 251 }, { "epoch": 0.04, "grad_norm": 3.0795135498046875, "learning_rate": 9.350649350649352e-06, "loss": 0.4392, "step": 252 }, { "epoch": 0.04, "grad_norm": 3.0993294715881348, "learning_rate": 9.387755102040818e-06, "loss": 0.4546, "step": 253 }, { "epoch": 0.04, "grad_norm": 3.1910321712493896, "learning_rate": 9.424860853432282e-06, "loss": 0.4669, "step": 254 }, { "epoch": 0.04, "grad_norm": 3.1376562118530273, "learning_rate": 9.461966604823748e-06, "loss": 0.4974, "step": 255 }, { "epoch": 0.04, "grad_norm": 2.974745273590088, "learning_rate": 9.499072356215214e-06, "loss": 0.4417, "step": 256 }, { "epoch": 0.04, "grad_norm": 3.2186813354492188, "learning_rate": 9.53617810760668e-06, "loss": 0.4445, "step": 257 }, { "epoch": 0.04, "grad_norm": 3.362898588180542, "learning_rate": 9.573283858998145e-06, "loss": 0.4752, "step": 258 }, { "epoch": 0.04, "grad_norm": 3.969586133956909, "learning_rate": 9.610389610389611e-06, "loss": 0.4946, "step": 259 }, { "epoch": 0.04, "grad_norm": 3.138932466506958, "learning_rate": 9.647495361781077e-06, "loss": 0.4887, "step": 260 }, { "epoch": 0.04, "grad_norm": 3.3861734867095947, "learning_rate": 9.684601113172543e-06, "loss": 0.4984, "step": 261 }, { "epoch": 0.04, "grad_norm": 3.3579657077789307, "learning_rate": 9.721706864564007e-06, "loss": 0.4384, "step": 262 }, { "epoch": 0.04, "grad_norm": 3.293699026107788, "learning_rate": 9.758812615955475e-06, "loss": 0.4698, "step": 263 }, { "epoch": 0.04, "grad_norm": 3.5232532024383545, "learning_rate": 9.795918367346939e-06, "loss": 0.457, "step": 264 }, { "epoch": 0.04, "grad_norm": 3.301365375518799, "learning_rate": 9.833024118738406e-06, "loss": 0.4639, "step": 265 }, { "epoch": 0.04, "grad_norm": 2.9535834789276123, "learning_rate": 9.87012987012987e-06, "loss": 0.4337, "step": 266 }, { "epoch": 0.04, "grad_norm": 3.0466232299804688, "learning_rate": 9.907235621521336e-06, "loss": 0.4458, "step": 267 }, { "epoch": 0.04, "grad_norm": 3.7308411598205566, "learning_rate": 9.944341372912802e-06, "loss": 0.4378, "step": 268 }, { "epoch": 0.04, "grad_norm": 3.1197099685668945, "learning_rate": 9.981447124304268e-06, "loss": 0.4582, "step": 269 }, { "epoch": 0.05, "grad_norm": 3.247676372528076, "learning_rate": 1.0018552875695732e-05, "loss": 0.5439, "step": 270 }, { "epoch": 0.05, "grad_norm": 2.764385938644409, "learning_rate": 1.00556586270872e-05, "loss": 0.4062, "step": 271 }, { "epoch": 0.05, "grad_norm": 3.7095327377319336, "learning_rate": 1.0092764378478665e-05, "loss": 0.462, "step": 272 }, { "epoch": 0.05, "grad_norm": 3.4303956031799316, "learning_rate": 1.012987012987013e-05, "loss": 0.5088, "step": 273 }, { "epoch": 0.05, "grad_norm": 3.083489418029785, "learning_rate": 1.0166975881261595e-05, "loss": 0.4403, "step": 274 }, { "epoch": 0.05, "grad_norm": 2.6982645988464355, "learning_rate": 1.0204081632653063e-05, "loss": 0.4275, "step": 275 }, { "epoch": 0.05, "grad_norm": 3.2609076499938965, "learning_rate": 1.0241187384044529e-05, "loss": 0.4419, "step": 276 }, { "epoch": 0.05, "grad_norm": 3.7205398082733154, "learning_rate": 1.0278293135435993e-05, "loss": 0.4428, "step": 277 }, { "epoch": 0.05, "grad_norm": 3.512974500656128, "learning_rate": 1.0315398886827459e-05, "loss": 0.4909, "step": 278 }, { "epoch": 0.05, "grad_norm": 2.820100784301758, "learning_rate": 1.0352504638218925e-05, "loss": 0.4227, "step": 279 }, { "epoch": 0.05, "grad_norm": 3.2768869400024414, "learning_rate": 1.0389610389610389e-05, "loss": 0.4421, "step": 280 }, { "epoch": 0.05, "grad_norm": 3.238290786743164, "learning_rate": 1.0426716141001856e-05, "loss": 0.4549, "step": 281 }, { "epoch": 0.05, "grad_norm": 3.1121633052825928, "learning_rate": 1.0463821892393322e-05, "loss": 0.4215, "step": 282 }, { "epoch": 0.05, "grad_norm": 2.9778265953063965, "learning_rate": 1.0500927643784788e-05, "loss": 0.413, "step": 283 }, { "epoch": 0.05, "grad_norm": 3.22340726852417, "learning_rate": 1.0538033395176252e-05, "loss": 0.3952, "step": 284 }, { "epoch": 0.05, "grad_norm": 3.212219476699829, "learning_rate": 1.057513914656772e-05, "loss": 0.4422, "step": 285 }, { "epoch": 0.05, "grad_norm": 3.142153024673462, "learning_rate": 1.0612244897959186e-05, "loss": 0.4657, "step": 286 }, { "epoch": 0.05, "grad_norm": 2.9514288902282715, "learning_rate": 1.064935064935065e-05, "loss": 0.3817, "step": 287 }, { "epoch": 0.05, "grad_norm": 3.0474562644958496, "learning_rate": 1.0686456400742116e-05, "loss": 0.4149, "step": 288 }, { "epoch": 0.05, "grad_norm": 3.2908480167388916, "learning_rate": 1.0723562152133581e-05, "loss": 0.423, "step": 289 }, { "epoch": 0.05, "grad_norm": 3.313931941986084, "learning_rate": 1.0760667903525049e-05, "loss": 0.4072, "step": 290 }, { "epoch": 0.05, "grad_norm": 3.409170627593994, "learning_rate": 1.0797773654916513e-05, "loss": 0.3903, "step": 291 }, { "epoch": 0.05, "grad_norm": 2.9801747798919678, "learning_rate": 1.0834879406307979e-05, "loss": 0.4337, "step": 292 }, { "epoch": 0.05, "grad_norm": 2.854769706726074, "learning_rate": 1.0871985157699445e-05, "loss": 0.486, "step": 293 }, { "epoch": 0.05, "grad_norm": 2.995771646499634, "learning_rate": 1.0909090909090909e-05, "loss": 0.4005, "step": 294 }, { "epoch": 0.05, "grad_norm": 3.627546548843384, "learning_rate": 1.0946196660482375e-05, "loss": 0.4461, "step": 295 }, { "epoch": 0.05, "grad_norm": 3.505272388458252, "learning_rate": 1.0983302411873842e-05, "loss": 0.4443, "step": 296 }, { "epoch": 0.05, "grad_norm": 3.624537706375122, "learning_rate": 1.1020408163265306e-05, "loss": 0.4631, "step": 297 }, { "epoch": 0.05, "grad_norm": 3.4530317783355713, "learning_rate": 1.1057513914656772e-05, "loss": 0.4604, "step": 298 }, { "epoch": 0.05, "grad_norm": 3.1622731685638428, "learning_rate": 1.1094619666048238e-05, "loss": 0.4307, "step": 299 }, { "epoch": 0.05, "grad_norm": 3.7942235469818115, "learning_rate": 1.1131725417439706e-05, "loss": 0.4132, "step": 300 }, { "epoch": 0.05, "grad_norm": 3.276893377304077, "learning_rate": 1.116883116883117e-05, "loss": 0.4388, "step": 301 }, { "epoch": 0.05, "grad_norm": 2.961852788925171, "learning_rate": 1.1205936920222636e-05, "loss": 0.4001, "step": 302 }, { "epoch": 0.05, "grad_norm": 2.8727002143859863, "learning_rate": 1.1243042671614101e-05, "loss": 0.4098, "step": 303 }, { "epoch": 0.05, "grad_norm": 3.053004503250122, "learning_rate": 1.1280148423005566e-05, "loss": 0.3796, "step": 304 }, { "epoch": 0.05, "grad_norm": 2.9752464294433594, "learning_rate": 1.1317254174397031e-05, "loss": 0.4157, "step": 305 }, { "epoch": 0.05, "grad_norm": 2.882772922515869, "learning_rate": 1.1354359925788499e-05, "loss": 0.4307, "step": 306 }, { "epoch": 0.05, "grad_norm": 3.0538830757141113, "learning_rate": 1.1391465677179965e-05, "loss": 0.4292, "step": 307 }, { "epoch": 0.05, "grad_norm": 3.2163302898406982, "learning_rate": 1.1428571428571429e-05, "loss": 0.4498, "step": 308 }, { "epoch": 0.05, "grad_norm": 3.0732085704803467, "learning_rate": 1.1465677179962895e-05, "loss": 0.4493, "step": 309 }, { "epoch": 0.05, "grad_norm": 3.217205762863159, "learning_rate": 1.150278293135436e-05, "loss": 0.4317, "step": 310 }, { "epoch": 0.05, "grad_norm": 3.0591070652008057, "learning_rate": 1.1539888682745825e-05, "loss": 0.4431, "step": 311 }, { "epoch": 0.05, "grad_norm": 3.1109700202941895, "learning_rate": 1.1576994434137292e-05, "loss": 0.3872, "step": 312 }, { "epoch": 0.05, "grad_norm": 3.03364896774292, "learning_rate": 1.1614100185528758e-05, "loss": 0.4277, "step": 313 }, { "epoch": 0.05, "grad_norm": 3.234057664871216, "learning_rate": 1.1651205936920224e-05, "loss": 0.4269, "step": 314 }, { "epoch": 0.05, "grad_norm": 2.8884172439575195, "learning_rate": 1.1688311688311688e-05, "loss": 0.4275, "step": 315 }, { "epoch": 0.05, "grad_norm": 3.1153557300567627, "learning_rate": 1.1725417439703156e-05, "loss": 0.4167, "step": 316 }, { "epoch": 0.05, "grad_norm": 2.8512983322143555, "learning_rate": 1.1762523191094622e-05, "loss": 0.4446, "step": 317 }, { "epoch": 0.05, "grad_norm": 2.7890186309814453, "learning_rate": 1.1799628942486086e-05, "loss": 0.4086, "step": 318 }, { "epoch": 0.05, "grad_norm": 2.918041944503784, "learning_rate": 1.1836734693877552e-05, "loss": 0.3994, "step": 319 }, { "epoch": 0.05, "grad_norm": 3.1047091484069824, "learning_rate": 1.1873840445269017e-05, "loss": 0.4245, "step": 320 }, { "epoch": 0.05, "grad_norm": 2.868516206741333, "learning_rate": 1.1910946196660482e-05, "loss": 0.3855, "step": 321 }, { "epoch": 0.05, "grad_norm": 3.348405361175537, "learning_rate": 1.1948051948051949e-05, "loss": 0.4772, "step": 322 }, { "epoch": 0.05, "grad_norm": 3.3484182357788086, "learning_rate": 1.1985157699443415e-05, "loss": 0.4093, "step": 323 }, { "epoch": 0.05, "grad_norm": 2.8536248207092285, "learning_rate": 1.202226345083488e-05, "loss": 0.3728, "step": 324 }, { "epoch": 0.05, "grad_norm": 2.9839985370635986, "learning_rate": 1.2059369202226345e-05, "loss": 0.4402, "step": 325 }, { "epoch": 0.05, "grad_norm": 3.0017004013061523, "learning_rate": 1.209647495361781e-05, "loss": 0.4093, "step": 326 }, { "epoch": 0.05, "grad_norm": 2.8062543869018555, "learning_rate": 1.2133580705009278e-05, "loss": 0.3623, "step": 327 }, { "epoch": 0.05, "grad_norm": 2.866265058517456, "learning_rate": 1.2170686456400743e-05, "loss": 0.4031, "step": 328 }, { "epoch": 0.06, "grad_norm": 3.5342464447021484, "learning_rate": 1.2207792207792208e-05, "loss": 0.4119, "step": 329 }, { "epoch": 0.06, "grad_norm": 3.2953803539276123, "learning_rate": 1.2244897959183674e-05, "loss": 0.4166, "step": 330 }, { "epoch": 0.06, "grad_norm": 3.152968168258667, "learning_rate": 1.2282003710575142e-05, "loss": 0.4614, "step": 331 }, { "epoch": 0.06, "grad_norm": 2.9092025756835938, "learning_rate": 1.2319109461966606e-05, "loss": 0.364, "step": 332 }, { "epoch": 0.06, "grad_norm": 2.9890778064727783, "learning_rate": 1.2356215213358072e-05, "loss": 0.3714, "step": 333 }, { "epoch": 0.06, "grad_norm": 3.4394853115081787, "learning_rate": 1.2393320964749538e-05, "loss": 0.3951, "step": 334 }, { "epoch": 0.06, "grad_norm": 2.871321201324463, "learning_rate": 1.2430426716141002e-05, "loss": 0.3615, "step": 335 }, { "epoch": 0.06, "grad_norm": 2.538503885269165, "learning_rate": 1.2467532467532468e-05, "loss": 0.3597, "step": 336 }, { "epoch": 0.06, "grad_norm": 3.042118787765503, "learning_rate": 1.2504638218923935e-05, "loss": 0.3821, "step": 337 }, { "epoch": 0.06, "grad_norm": 2.9602673053741455, "learning_rate": 1.2541743970315401e-05, "loss": 0.4219, "step": 338 }, { "epoch": 0.06, "grad_norm": 2.7855358123779297, "learning_rate": 1.2578849721706865e-05, "loss": 0.4235, "step": 339 }, { "epoch": 0.06, "grad_norm": 2.9826362133026123, "learning_rate": 1.2615955473098331e-05, "loss": 0.4073, "step": 340 }, { "epoch": 0.06, "grad_norm": 3.0339555740356445, "learning_rate": 1.2653061224489798e-05, "loss": 0.4342, "step": 341 }, { "epoch": 0.06, "grad_norm": 2.5553407669067383, "learning_rate": 1.2690166975881263e-05, "loss": 0.3813, "step": 342 }, { "epoch": 0.06, "grad_norm": 3.4479877948760986, "learning_rate": 1.2727272727272728e-05, "loss": 0.416, "step": 343 }, { "epoch": 0.06, "grad_norm": 3.048041582107544, "learning_rate": 1.2764378478664194e-05, "loss": 0.3791, "step": 344 }, { "epoch": 0.06, "grad_norm": 3.0559234619140625, "learning_rate": 1.280148423005566e-05, "loss": 0.4015, "step": 345 }, { "epoch": 0.06, "grad_norm": 3.343156099319458, "learning_rate": 1.2838589981447124e-05, "loss": 0.4114, "step": 346 }, { "epoch": 0.06, "grad_norm": 2.8725478649139404, "learning_rate": 1.2875695732838592e-05, "loss": 0.3795, "step": 347 }, { "epoch": 0.06, "grad_norm": 3.8578460216522217, "learning_rate": 1.2912801484230058e-05, "loss": 0.4171, "step": 348 }, { "epoch": 0.06, "grad_norm": 3.3619072437286377, "learning_rate": 1.2949907235621522e-05, "loss": 0.4232, "step": 349 }, { "epoch": 0.06, "grad_norm": 3.332642078399658, "learning_rate": 1.2987012987012988e-05, "loss": 0.3876, "step": 350 }, { "epoch": 0.06, "grad_norm": 2.9353437423706055, "learning_rate": 1.3024118738404454e-05, "loss": 0.3972, "step": 351 }, { "epoch": 0.06, "grad_norm": 2.979036331176758, "learning_rate": 1.3061224489795918e-05, "loss": 0.3742, "step": 352 }, { "epoch": 0.06, "grad_norm": 2.9515154361724854, "learning_rate": 1.3098330241187385e-05, "loss": 0.366, "step": 353 }, { "epoch": 0.06, "grad_norm": 3.86810302734375, "learning_rate": 1.3135435992578851e-05, "loss": 0.4071, "step": 354 }, { "epoch": 0.06, "grad_norm": 2.8007259368896484, "learning_rate": 1.3172541743970317e-05, "loss": 0.3786, "step": 355 }, { "epoch": 0.06, "grad_norm": 3.0594913959503174, "learning_rate": 1.3209647495361781e-05, "loss": 0.3777, "step": 356 }, { "epoch": 0.06, "grad_norm": 3.1129539012908936, "learning_rate": 1.3246753246753249e-05, "loss": 0.3838, "step": 357 }, { "epoch": 0.06, "grad_norm": 2.8762266635894775, "learning_rate": 1.3283858998144714e-05, "loss": 0.3709, "step": 358 }, { "epoch": 0.06, "grad_norm": 2.931156635284424, "learning_rate": 1.3320964749536179e-05, "loss": 0.4129, "step": 359 }, { "epoch": 0.06, "grad_norm": 3.707425594329834, "learning_rate": 1.3358070500927644e-05, "loss": 0.4209, "step": 360 }, { "epoch": 0.06, "grad_norm": 2.8026282787323, "learning_rate": 1.339517625231911e-05, "loss": 0.3899, "step": 361 }, { "epoch": 0.06, "grad_norm": 2.861218214035034, "learning_rate": 1.3432282003710578e-05, "loss": 0.3743, "step": 362 }, { "epoch": 0.06, "grad_norm": 2.9253275394439697, "learning_rate": 1.3469387755102042e-05, "loss": 0.3584, "step": 363 }, { "epoch": 0.06, "grad_norm": 2.786437749862671, "learning_rate": 1.3506493506493508e-05, "loss": 0.3634, "step": 364 }, { "epoch": 0.06, "grad_norm": 3.3298451900482178, "learning_rate": 1.3543599257884974e-05, "loss": 0.3853, "step": 365 }, { "epoch": 0.06, "grad_norm": 2.884493589401245, "learning_rate": 1.3580705009276438e-05, "loss": 0.391, "step": 366 }, { "epoch": 0.06, "grad_norm": 3.016658067703247, "learning_rate": 1.3617810760667904e-05, "loss": 0.3864, "step": 367 }, { "epoch": 0.06, "grad_norm": 3.251417636871338, "learning_rate": 1.3654916512059371e-05, "loss": 0.3856, "step": 368 }, { "epoch": 0.06, "grad_norm": 3.1370346546173096, "learning_rate": 1.3692022263450837e-05, "loss": 0.4021, "step": 369 }, { "epoch": 0.06, "grad_norm": 3.047682762145996, "learning_rate": 1.3729128014842301e-05, "loss": 0.3903, "step": 370 }, { "epoch": 0.06, "grad_norm": 2.815366744995117, "learning_rate": 1.3766233766233767e-05, "loss": 0.3896, "step": 371 }, { "epoch": 0.06, "grad_norm": 2.98842716217041, "learning_rate": 1.3803339517625235e-05, "loss": 0.3659, "step": 372 }, { "epoch": 0.06, "grad_norm": 2.5882840156555176, "learning_rate": 1.3840445269016699e-05, "loss": 0.3591, "step": 373 }, { "epoch": 0.06, "grad_norm": 2.4198734760284424, "learning_rate": 1.3877551020408165e-05, "loss": 0.3311, "step": 374 }, { "epoch": 0.06, "grad_norm": 2.798917770385742, "learning_rate": 1.391465677179963e-05, "loss": 0.3802, "step": 375 }, { "epoch": 0.06, "grad_norm": 3.0346758365631104, "learning_rate": 1.3951762523191095e-05, "loss": 0.3784, "step": 376 }, { "epoch": 0.06, "grad_norm": 2.994194269180298, "learning_rate": 1.398886827458256e-05, "loss": 0.3473, "step": 377 }, { "epoch": 0.06, "grad_norm": 3.083204746246338, "learning_rate": 1.4025974025974028e-05, "loss": 0.365, "step": 378 }, { "epoch": 0.06, "grad_norm": 2.9725446701049805, "learning_rate": 1.4063079777365494e-05, "loss": 0.3799, "step": 379 }, { "epoch": 0.06, "grad_norm": 2.6326100826263428, "learning_rate": 1.4100185528756958e-05, "loss": 0.3119, "step": 380 }, { "epoch": 0.06, "grad_norm": 2.8231468200683594, "learning_rate": 1.4137291280148424e-05, "loss": 0.3917, "step": 381 }, { "epoch": 0.06, "grad_norm": 2.9972012042999268, "learning_rate": 1.417439703153989e-05, "loss": 0.3729, "step": 382 }, { "epoch": 0.06, "grad_norm": 2.818228244781494, "learning_rate": 1.4211502782931354e-05, "loss": 0.3785, "step": 383 }, { "epoch": 0.06, "grad_norm": 3.154390335083008, "learning_rate": 1.4248608534322821e-05, "loss": 0.3925, "step": 384 }, { "epoch": 0.06, "grad_norm": 2.6806185245513916, "learning_rate": 1.4285714285714287e-05, "loss": 0.3747, "step": 385 }, { "epoch": 0.06, "grad_norm": 2.7996699810028076, "learning_rate": 1.4322820037105753e-05, "loss": 0.3724, "step": 386 }, { "epoch": 0.06, "grad_norm": 2.7865655422210693, "learning_rate": 1.4359925788497217e-05, "loss": 0.3419, "step": 387 }, { "epoch": 0.06, "grad_norm": 2.649108648300171, "learning_rate": 1.4397031539888685e-05, "loss": 0.3769, "step": 388 }, { "epoch": 0.07, "grad_norm": 2.607269525527954, "learning_rate": 1.443413729128015e-05, "loss": 0.3493, "step": 389 }, { "epoch": 0.07, "grad_norm": 2.8508338928222656, "learning_rate": 1.4471243042671615e-05, "loss": 0.3523, "step": 390 }, { "epoch": 0.07, "grad_norm": 3.0675015449523926, "learning_rate": 1.450834879406308e-05, "loss": 0.3731, "step": 391 }, { "epoch": 0.07, "grad_norm": 2.6927874088287354, "learning_rate": 1.4545454545454546e-05, "loss": 0.3425, "step": 392 }, { "epoch": 0.07, "grad_norm": 2.9176721572875977, "learning_rate": 1.4582560296846014e-05, "loss": 0.3587, "step": 393 }, { "epoch": 0.07, "grad_norm": 3.769188404083252, "learning_rate": 1.4619666048237478e-05, "loss": 0.4124, "step": 394 }, { "epoch": 0.07, "grad_norm": 3.2832295894622803, "learning_rate": 1.4656771799628944e-05, "loss": 0.4352, "step": 395 }, { "epoch": 0.07, "grad_norm": 3.0468082427978516, "learning_rate": 1.469387755102041e-05, "loss": 0.3264, "step": 396 }, { "epoch": 0.07, "grad_norm": 2.747523784637451, "learning_rate": 1.4730983302411874e-05, "loss": 0.366, "step": 397 }, { "epoch": 0.07, "grad_norm": 2.7829861640930176, "learning_rate": 1.4768089053803341e-05, "loss": 0.3341, "step": 398 }, { "epoch": 0.07, "grad_norm": 3.4408016204833984, "learning_rate": 1.4805194805194807e-05, "loss": 0.3607, "step": 399 }, { "epoch": 0.07, "grad_norm": 2.6883785724639893, "learning_rate": 1.4842300556586271e-05, "loss": 0.3653, "step": 400 }, { "epoch": 0.07, "grad_norm": 5.566004753112793, "learning_rate": 1.4879406307977737e-05, "loss": 0.3747, "step": 401 }, { "epoch": 0.07, "grad_norm": 2.8463735580444336, "learning_rate": 1.4916512059369203e-05, "loss": 0.3805, "step": 402 }, { "epoch": 0.07, "grad_norm": 4.380993843078613, "learning_rate": 1.495361781076067e-05, "loss": 0.382, "step": 403 }, { "epoch": 0.07, "grad_norm": 3.2086069583892822, "learning_rate": 1.4990723562152135e-05, "loss": 0.3291, "step": 404 }, { "epoch": 0.07, "grad_norm": 3.9451534748077393, "learning_rate": 1.50278293135436e-05, "loss": 0.3496, "step": 405 }, { "epoch": 0.07, "grad_norm": 2.7143611907958984, "learning_rate": 1.5064935064935066e-05, "loss": 0.3476, "step": 406 }, { "epoch": 0.07, "grad_norm": 2.900770902633667, "learning_rate": 1.510204081632653e-05, "loss": 0.3546, "step": 407 }, { "epoch": 0.07, "grad_norm": 2.1911163330078125, "learning_rate": 1.5139146567717996e-05, "loss": 0.3004, "step": 408 }, { "epoch": 0.07, "grad_norm": 3.195427894592285, "learning_rate": 1.5176252319109464e-05, "loss": 0.4016, "step": 409 }, { "epoch": 0.07, "grad_norm": 2.6113522052764893, "learning_rate": 1.521335807050093e-05, "loss": 0.3356, "step": 410 }, { "epoch": 0.07, "grad_norm": 2.733426809310913, "learning_rate": 1.5250463821892394e-05, "loss": 0.3331, "step": 411 }, { "epoch": 0.07, "grad_norm": 2.8701112270355225, "learning_rate": 1.528756957328386e-05, "loss": 0.3154, "step": 412 }, { "epoch": 0.07, "grad_norm": 3.3599023818969727, "learning_rate": 1.5324675324675326e-05, "loss": 0.3067, "step": 413 }, { "epoch": 0.07, "grad_norm": 2.9682343006134033, "learning_rate": 1.536178107606679e-05, "loss": 0.3273, "step": 414 }, { "epoch": 0.07, "grad_norm": 3.101578712463379, "learning_rate": 1.5398886827458257e-05, "loss": 0.3793, "step": 415 }, { "epoch": 0.07, "grad_norm": 3.325718402862549, "learning_rate": 1.5435992578849723e-05, "loss": 0.3606, "step": 416 }, { "epoch": 0.07, "grad_norm": 2.8653206825256348, "learning_rate": 1.547309833024119e-05, "loss": 0.3612, "step": 417 }, { "epoch": 0.07, "grad_norm": 2.789931297302246, "learning_rate": 1.5510204081632655e-05, "loss": 0.3369, "step": 418 }, { "epoch": 0.07, "grad_norm": 2.5312016010284424, "learning_rate": 1.554730983302412e-05, "loss": 0.342, "step": 419 }, { "epoch": 0.07, "grad_norm": 2.4059040546417236, "learning_rate": 1.5584415584415587e-05, "loss": 0.3379, "step": 420 }, { "epoch": 0.07, "grad_norm": 2.579974889755249, "learning_rate": 1.562152133580705e-05, "loss": 0.3421, "step": 421 }, { "epoch": 0.07, "grad_norm": 2.3502678871154785, "learning_rate": 1.5658627087198518e-05, "loss": 0.3074, "step": 422 }, { "epoch": 0.07, "grad_norm": 2.818793773651123, "learning_rate": 1.5695732838589984e-05, "loss": 0.3381, "step": 423 }, { "epoch": 0.07, "grad_norm": 2.764256477355957, "learning_rate": 1.5732838589981447e-05, "loss": 0.371, "step": 424 }, { "epoch": 0.07, "grad_norm": 2.9214913845062256, "learning_rate": 1.5769944341372912e-05, "loss": 0.375, "step": 425 }, { "epoch": 0.07, "grad_norm": 2.9290072917938232, "learning_rate": 1.580705009276438e-05, "loss": 0.3591, "step": 426 }, { "epoch": 0.07, "grad_norm": 2.674772262573242, "learning_rate": 1.5844155844155847e-05, "loss": 0.3645, "step": 427 }, { "epoch": 0.07, "grad_norm": 2.593703508377075, "learning_rate": 1.588126159554731e-05, "loss": 0.3212, "step": 428 }, { "epoch": 0.07, "grad_norm": 2.467851161956787, "learning_rate": 1.5918367346938776e-05, "loss": 0.3584, "step": 429 }, { "epoch": 0.07, "grad_norm": 2.6496379375457764, "learning_rate": 1.595547309833024e-05, "loss": 0.3533, "step": 430 }, { "epoch": 0.07, "grad_norm": 2.4475395679473877, "learning_rate": 1.5992578849721707e-05, "loss": 0.2984, "step": 431 }, { "epoch": 0.07, "grad_norm": 2.2161850929260254, "learning_rate": 1.6029684601113173e-05, "loss": 0.3177, "step": 432 }, { "epoch": 0.07, "grad_norm": 2.706730842590332, "learning_rate": 1.606679035250464e-05, "loss": 0.3517, "step": 433 }, { "epoch": 0.07, "grad_norm": 2.4614243507385254, "learning_rate": 1.6103896103896105e-05, "loss": 0.3042, "step": 434 }, { "epoch": 0.07, "grad_norm": 2.4244754314422607, "learning_rate": 1.614100185528757e-05, "loss": 0.3236, "step": 435 }, { "epoch": 0.07, "grad_norm": 2.470550537109375, "learning_rate": 1.6178107606679037e-05, "loss": 0.3132, "step": 436 }, { "epoch": 0.07, "grad_norm": 2.5792858600616455, "learning_rate": 1.6215213358070503e-05, "loss": 0.312, "step": 437 }, { "epoch": 0.07, "grad_norm": 2.6399548053741455, "learning_rate": 1.625231910946197e-05, "loss": 0.3196, "step": 438 }, { "epoch": 0.07, "grad_norm": 2.602245330810547, "learning_rate": 1.6289424860853434e-05, "loss": 0.3374, "step": 439 }, { "epoch": 0.07, "grad_norm": 2.811546802520752, "learning_rate": 1.63265306122449e-05, "loss": 0.319, "step": 440 }, { "epoch": 0.07, "grad_norm": 2.7016525268554688, "learning_rate": 1.6363636363636366e-05, "loss": 0.3384, "step": 441 }, { "epoch": 0.07, "grad_norm": 2.5632567405700684, "learning_rate": 1.6400742115027832e-05, "loss": 0.3233, "step": 442 }, { "epoch": 0.07, "grad_norm": 2.2743287086486816, "learning_rate": 1.6437847866419298e-05, "loss": 0.2807, "step": 443 }, { "epoch": 0.07, "grad_norm": 2.8323593139648438, "learning_rate": 1.6474953617810763e-05, "loss": 0.3428, "step": 444 }, { "epoch": 0.07, "grad_norm": 3.5116302967071533, "learning_rate": 1.6512059369202226e-05, "loss": 0.3327, "step": 445 }, { "epoch": 0.07, "grad_norm": 2.485853910446167, "learning_rate": 1.6549165120593692e-05, "loss": 0.2985, "step": 446 }, { "epoch": 0.07, "grad_norm": 2.820155382156372, "learning_rate": 1.658627087198516e-05, "loss": 0.3485, "step": 447 }, { "epoch": 0.07, "grad_norm": 2.7102506160736084, "learning_rate": 1.6623376623376627e-05, "loss": 0.3204, "step": 448 }, { "epoch": 0.08, "grad_norm": 2.4396016597747803, "learning_rate": 1.666048237476809e-05, "loss": 0.3116, "step": 449 }, { "epoch": 0.08, "grad_norm": 2.94903302192688, "learning_rate": 1.6697588126159555e-05, "loss": 0.3379, "step": 450 }, { "epoch": 0.08, "grad_norm": 3.1019287109375, "learning_rate": 1.673469387755102e-05, "loss": 0.344, "step": 451 }, { "epoch": 0.08, "grad_norm": 2.834010362625122, "learning_rate": 1.6771799628942487e-05, "loss": 0.3216, "step": 452 }, { "epoch": 0.08, "grad_norm": 4.980465888977051, "learning_rate": 1.6808905380333953e-05, "loss": 0.3313, "step": 453 }, { "epoch": 0.08, "grad_norm": 2.3497769832611084, "learning_rate": 1.684601113172542e-05, "loss": 0.3025, "step": 454 }, { "epoch": 0.08, "grad_norm": 2.6355321407318115, "learning_rate": 1.6883116883116884e-05, "loss": 0.3219, "step": 455 }, { "epoch": 0.08, "grad_norm": 2.384514093399048, "learning_rate": 1.692022263450835e-05, "loss": 0.3178, "step": 456 }, { "epoch": 0.08, "grad_norm": 2.9352357387542725, "learning_rate": 1.6957328385899816e-05, "loss": 0.335, "step": 457 }, { "epoch": 0.08, "grad_norm": 2.7103517055511475, "learning_rate": 1.6994434137291282e-05, "loss": 0.3701, "step": 458 }, { "epoch": 0.08, "grad_norm": 2.6486706733703613, "learning_rate": 1.7031539888682748e-05, "loss": 0.3254, "step": 459 }, { "epoch": 0.08, "grad_norm": 2.7901523113250732, "learning_rate": 1.7068645640074214e-05, "loss": 0.2935, "step": 460 }, { "epoch": 0.08, "grad_norm": 2.7922439575195312, "learning_rate": 1.710575139146568e-05, "loss": 0.3574, "step": 461 }, { "epoch": 0.08, "grad_norm": 2.697707176208496, "learning_rate": 1.7142857142857142e-05, "loss": 0.3145, "step": 462 }, { "epoch": 0.08, "grad_norm": 2.261587142944336, "learning_rate": 1.717996289424861e-05, "loss": 0.3068, "step": 463 }, { "epoch": 0.08, "grad_norm": 2.570329427719116, "learning_rate": 1.7217068645640077e-05, "loss": 0.3354, "step": 464 }, { "epoch": 0.08, "grad_norm": 2.232799768447876, "learning_rate": 1.7254174397031543e-05, "loss": 0.274, "step": 465 }, { "epoch": 0.08, "grad_norm": 2.79732084274292, "learning_rate": 1.7291280148423005e-05, "loss": 0.3429, "step": 466 }, { "epoch": 0.08, "grad_norm": 2.885777473449707, "learning_rate": 1.732838589981447e-05, "loss": 0.3296, "step": 467 }, { "epoch": 0.08, "grad_norm": 2.9824137687683105, "learning_rate": 1.736549165120594e-05, "loss": 0.3656, "step": 468 }, { "epoch": 0.08, "grad_norm": 2.523836851119995, "learning_rate": 1.7402597402597403e-05, "loss": 0.3037, "step": 469 }, { "epoch": 0.08, "grad_norm": 2.5984272956848145, "learning_rate": 1.743970315398887e-05, "loss": 0.2763, "step": 470 }, { "epoch": 0.08, "grad_norm": 2.8980095386505127, "learning_rate": 1.7476808905380334e-05, "loss": 0.3411, "step": 471 }, { "epoch": 0.08, "grad_norm": 3.202725648880005, "learning_rate": 1.7513914656771804e-05, "loss": 0.3034, "step": 472 }, { "epoch": 0.08, "grad_norm": 2.5406734943389893, "learning_rate": 1.7551020408163266e-05, "loss": 0.2946, "step": 473 }, { "epoch": 0.08, "grad_norm": 2.6110904216766357, "learning_rate": 1.7588126159554732e-05, "loss": 0.336, "step": 474 }, { "epoch": 0.08, "grad_norm": 2.4891889095306396, "learning_rate": 1.7625231910946198e-05, "loss": 0.2931, "step": 475 }, { "epoch": 0.08, "grad_norm": 2.735900402069092, "learning_rate": 1.7662337662337664e-05, "loss": 0.3085, "step": 476 }, { "epoch": 0.08, "grad_norm": 2.386859178543091, "learning_rate": 1.769944341372913e-05, "loss": 0.2937, "step": 477 }, { "epoch": 0.08, "grad_norm": 2.653621196746826, "learning_rate": 1.7736549165120595e-05, "loss": 0.3293, "step": 478 }, { "epoch": 0.08, "grad_norm": 2.4359588623046875, "learning_rate": 1.777365491651206e-05, "loss": 0.268, "step": 479 }, { "epoch": 0.08, "grad_norm": 2.8747832775115967, "learning_rate": 1.7810760667903527e-05, "loss": 0.3, "step": 480 }, { "epoch": 0.08, "grad_norm": 2.507591724395752, "learning_rate": 1.7847866419294993e-05, "loss": 0.3071, "step": 481 }, { "epoch": 0.08, "grad_norm": 2.686846971511841, "learning_rate": 1.788497217068646e-05, "loss": 0.3185, "step": 482 }, { "epoch": 0.08, "grad_norm": 2.9076011180877686, "learning_rate": 1.792207792207792e-05, "loss": 0.2981, "step": 483 }, { "epoch": 0.08, "grad_norm": 2.480295419692993, "learning_rate": 1.795918367346939e-05, "loss": 0.3107, "step": 484 }, { "epoch": 0.08, "grad_norm": 3.086437940597534, "learning_rate": 1.7996289424860856e-05, "loss": 0.3136, "step": 485 }, { "epoch": 0.08, "grad_norm": 2.3601903915405273, "learning_rate": 1.803339517625232e-05, "loss": 0.2905, "step": 486 }, { "epoch": 0.08, "grad_norm": 2.224418878555298, "learning_rate": 1.8070500927643785e-05, "loss": 0.291, "step": 487 }, { "epoch": 0.08, "grad_norm": 2.237659454345703, "learning_rate": 1.8107606679035254e-05, "loss": 0.2787, "step": 488 }, { "epoch": 0.08, "grad_norm": 2.523841142654419, "learning_rate": 1.814471243042672e-05, "loss": 0.2788, "step": 489 }, { "epoch": 0.08, "grad_norm": 2.7631421089172363, "learning_rate": 1.8181818181818182e-05, "loss": 0.2911, "step": 490 }, { "epoch": 0.08, "grad_norm": 1.9468494653701782, "learning_rate": 1.8218923933209648e-05, "loss": 0.2854, "step": 491 }, { "epoch": 0.08, "grad_norm": 3.184605360031128, "learning_rate": 1.8256029684601114e-05, "loss": 0.3356, "step": 492 }, { "epoch": 0.08, "grad_norm": 2.675031900405884, "learning_rate": 1.829313543599258e-05, "loss": 0.3041, "step": 493 }, { "epoch": 0.08, "grad_norm": 2.4095458984375, "learning_rate": 1.8330241187384045e-05, "loss": 0.2942, "step": 494 }, { "epoch": 0.08, "grad_norm": 2.188786745071411, "learning_rate": 1.836734693877551e-05, "loss": 0.2951, "step": 495 }, { "epoch": 0.08, "grad_norm": 2.593593120574951, "learning_rate": 1.8404452690166977e-05, "loss": 0.2787, "step": 496 }, { "epoch": 0.08, "grad_norm": 2.7704062461853027, "learning_rate": 1.8441558441558443e-05, "loss": 0.3109, "step": 497 }, { "epoch": 0.08, "grad_norm": 2.6687676906585693, "learning_rate": 1.847866419294991e-05, "loss": 0.2877, "step": 498 }, { "epoch": 0.08, "grad_norm": 2.7621114253997803, "learning_rate": 1.8515769944341375e-05, "loss": 0.3231, "step": 499 }, { "epoch": 0.08, "grad_norm": 2.482236862182617, "learning_rate": 1.855287569573284e-05, "loss": 0.3055, "step": 500 }, { "epoch": 0.08, "grad_norm": 2.3142497539520264, "learning_rate": 1.8589981447124306e-05, "loss": 0.2844, "step": 501 }, { "epoch": 0.08, "grad_norm": 2.506796360015869, "learning_rate": 1.8627087198515772e-05, "loss": 0.3102, "step": 502 }, { "epoch": 0.08, "grad_norm": 2.614366054534912, "learning_rate": 1.8664192949907235e-05, "loss": 0.3134, "step": 503 }, { "epoch": 0.08, "grad_norm": 2.5523979663848877, "learning_rate": 1.8701298701298704e-05, "loss": 0.3142, "step": 504 }, { "epoch": 0.08, "grad_norm": 2.3910086154937744, "learning_rate": 1.873840445269017e-05, "loss": 0.3105, "step": 505 }, { "epoch": 0.08, "grad_norm": 2.0425233840942383, "learning_rate": 1.8775510204081636e-05, "loss": 0.2453, "step": 506 }, { "epoch": 0.08, "grad_norm": 2.6860239505767822, "learning_rate": 1.8812615955473098e-05, "loss": 0.2833, "step": 507 }, { "epoch": 0.08, "grad_norm": 2.739924907684326, "learning_rate": 1.8849721706864564e-05, "loss": 0.2912, "step": 508 }, { "epoch": 0.09, "grad_norm": 2.6570563316345215, "learning_rate": 1.8886827458256033e-05, "loss": 0.2995, "step": 509 }, { "epoch": 0.09, "grad_norm": 2.273470163345337, "learning_rate": 1.8923933209647496e-05, "loss": 0.2951, "step": 510 }, { "epoch": 0.09, "grad_norm": 2.479226589202881, "learning_rate": 1.896103896103896e-05, "loss": 0.2813, "step": 511 }, { "epoch": 0.09, "grad_norm": 2.163621425628662, "learning_rate": 1.8998144712430427e-05, "loss": 0.2424, "step": 512 }, { "epoch": 0.09, "grad_norm": 2.646456003189087, "learning_rate": 1.9035250463821896e-05, "loss": 0.2914, "step": 513 }, { "epoch": 0.09, "grad_norm": 3.0016286373138428, "learning_rate": 1.907235621521336e-05, "loss": 0.2858, "step": 514 }, { "epoch": 0.09, "grad_norm": 2.6308486461639404, "learning_rate": 1.9109461966604825e-05, "loss": 0.2947, "step": 515 }, { "epoch": 0.09, "grad_norm": 2.247215986251831, "learning_rate": 1.914656771799629e-05, "loss": 0.2781, "step": 516 }, { "epoch": 0.09, "grad_norm": 2.4772439002990723, "learning_rate": 1.9183673469387756e-05, "loss": 0.2741, "step": 517 }, { "epoch": 0.09, "grad_norm": 2.5496835708618164, "learning_rate": 1.9220779220779222e-05, "loss": 0.2765, "step": 518 }, { "epoch": 0.09, "grad_norm": 2.166003465652466, "learning_rate": 1.9257884972170688e-05, "loss": 0.2645, "step": 519 }, { "epoch": 0.09, "grad_norm": 2.4519870281219482, "learning_rate": 1.9294990723562154e-05, "loss": 0.2684, "step": 520 }, { "epoch": 0.09, "grad_norm": 2.2446348667144775, "learning_rate": 1.933209647495362e-05, "loss": 0.2753, "step": 521 }, { "epoch": 0.09, "grad_norm": 2.161458730697632, "learning_rate": 1.9369202226345086e-05, "loss": 0.2542, "step": 522 }, { "epoch": 0.09, "grad_norm": 2.8233306407928467, "learning_rate": 1.940630797773655e-05, "loss": 0.3509, "step": 523 }, { "epoch": 0.09, "grad_norm": 1.8861123323440552, "learning_rate": 1.9443413729128014e-05, "loss": 0.2045, "step": 524 }, { "epoch": 0.09, "grad_norm": 2.3632588386535645, "learning_rate": 1.9480519480519483e-05, "loss": 0.2925, "step": 525 }, { "epoch": 0.09, "grad_norm": 2.6537864208221436, "learning_rate": 1.951762523191095e-05, "loss": 0.3166, "step": 526 }, { "epoch": 0.09, "grad_norm": 2.531964063644409, "learning_rate": 1.955473098330241e-05, "loss": 0.264, "step": 527 }, { "epoch": 0.09, "grad_norm": 2.277597188949585, "learning_rate": 1.9591836734693877e-05, "loss": 0.2883, "step": 528 }, { "epoch": 0.09, "grad_norm": 3.0391132831573486, "learning_rate": 1.9628942486085347e-05, "loss": 0.2935, "step": 529 }, { "epoch": 0.09, "grad_norm": 2.3614888191223145, "learning_rate": 1.9666048237476812e-05, "loss": 0.3167, "step": 530 }, { "epoch": 0.09, "grad_norm": 2.3834609985351562, "learning_rate": 1.9703153988868275e-05, "loss": 0.2754, "step": 531 }, { "epoch": 0.09, "grad_norm": 2.113098382949829, "learning_rate": 1.974025974025974e-05, "loss": 0.2609, "step": 532 }, { "epoch": 0.09, "grad_norm": 2.381103038787842, "learning_rate": 1.9777365491651207e-05, "loss": 0.2603, "step": 533 }, { "epoch": 0.09, "grad_norm": 2.3433029651641846, "learning_rate": 1.9814471243042672e-05, "loss": 0.2822, "step": 534 }, { "epoch": 0.09, "grad_norm": 2.4593772888183594, "learning_rate": 1.9851576994434138e-05, "loss": 0.2666, "step": 535 }, { "epoch": 0.09, "grad_norm": 2.3330159187316895, "learning_rate": 1.9888682745825604e-05, "loss": 0.2674, "step": 536 }, { "epoch": 0.09, "grad_norm": 2.3670289516448975, "learning_rate": 1.992578849721707e-05, "loss": 0.254, "step": 537 }, { "epoch": 0.09, "grad_norm": 2.957702398300171, "learning_rate": 1.9962894248608536e-05, "loss": 0.2841, "step": 538 }, { "epoch": 0.09, "grad_norm": 2.152226448059082, "learning_rate": 2e-05, "loss": 0.2615, "step": 539 }, { "epoch": 0.09, "grad_norm": 2.5797228813171387, "learning_rate": 1.9999999836968663e-05, "loss": 0.2994, "step": 540 }, { "epoch": 0.09, "grad_norm": 2.3963992595672607, "learning_rate": 1.999999934787465e-05, "loss": 0.2697, "step": 541 }, { "epoch": 0.09, "grad_norm": 2.4567320346832275, "learning_rate": 1.999999853271798e-05, "loss": 0.2634, "step": 542 }, { "epoch": 0.09, "grad_norm": 2.3412909507751465, "learning_rate": 1.9999997391498686e-05, "loss": 0.2627, "step": 543 }, { "epoch": 0.09, "grad_norm": 2.0276646614074707, "learning_rate": 1.9999995924216793e-05, "loss": 0.252, "step": 544 }, { "epoch": 0.09, "grad_norm": 2.534547805786133, "learning_rate": 1.9999994130872355e-05, "loss": 0.2865, "step": 545 }, { "epoch": 0.09, "grad_norm": 2.268409490585327, "learning_rate": 1.999999201146543e-05, "loss": 0.2636, "step": 546 }, { "epoch": 0.09, "grad_norm": 2.1879475116729736, "learning_rate": 1.999998956599609e-05, "loss": 0.2581, "step": 547 }, { "epoch": 0.09, "grad_norm": 2.2699263095855713, "learning_rate": 1.9999986794464412e-05, "loss": 0.2797, "step": 548 }, { "epoch": 0.09, "grad_norm": 1.892014503479004, "learning_rate": 1.9999983696870484e-05, "loss": 0.2484, "step": 549 }, { "epoch": 0.09, "grad_norm": 1.9955322742462158, "learning_rate": 1.9999980273214414e-05, "loss": 0.2217, "step": 550 }, { "epoch": 0.09, "grad_norm": 2.4266438484191895, "learning_rate": 1.9999976523496305e-05, "loss": 0.2776, "step": 551 }, { "epoch": 0.09, "grad_norm": 2.401709794998169, "learning_rate": 1.9999972447716285e-05, "loss": 0.2829, "step": 552 }, { "epoch": 0.09, "grad_norm": 1.8878871202468872, "learning_rate": 1.999996804587449e-05, "loss": 0.2283, "step": 553 }, { "epoch": 0.09, "grad_norm": 2.7802507877349854, "learning_rate": 1.999996331797105e-05, "loss": 0.3, "step": 554 }, { "epoch": 0.09, "grad_norm": 1.8729991912841797, "learning_rate": 1.9999958264006133e-05, "loss": 0.2395, "step": 555 }, { "epoch": 0.09, "grad_norm": 2.2916131019592285, "learning_rate": 1.9999952883979895e-05, "loss": 0.2578, "step": 556 }, { "epoch": 0.09, "grad_norm": 2.0629429817199707, "learning_rate": 1.999994717789252e-05, "loss": 0.2443, "step": 557 }, { "epoch": 0.09, "grad_norm": 2.249941825866699, "learning_rate": 1.9999941145744186e-05, "loss": 0.2668, "step": 558 }, { "epoch": 0.09, "grad_norm": 2.2284281253814697, "learning_rate": 1.9999934787535094e-05, "loss": 0.2586, "step": 559 }, { "epoch": 0.09, "grad_norm": 2.2318437099456787, "learning_rate": 1.9999928103265454e-05, "loss": 0.2613, "step": 560 }, { "epoch": 0.09, "grad_norm": 2.3887932300567627, "learning_rate": 1.9999921092935475e-05, "loss": 0.2929, "step": 561 }, { "epoch": 0.09, "grad_norm": 2.410754680633545, "learning_rate": 1.9999913756545394e-05, "loss": 0.2712, "step": 562 }, { "epoch": 0.09, "grad_norm": 2.2771358489990234, "learning_rate": 1.9999906094095448e-05, "loss": 0.2627, "step": 563 }, { "epoch": 0.09, "grad_norm": 2.947437047958374, "learning_rate": 1.999989810558588e-05, "loss": 0.3069, "step": 564 }, { "epoch": 0.09, "grad_norm": 1.8471972942352295, "learning_rate": 1.9999889791016962e-05, "loss": 0.237, "step": 565 }, { "epoch": 0.09, "grad_norm": 2.513638734817505, "learning_rate": 1.9999881150388958e-05, "loss": 0.2359, "step": 566 }, { "epoch": 0.09, "grad_norm": 2.0052802562713623, "learning_rate": 1.999987218370215e-05, "loss": 0.2453, "step": 567 }, { "epoch": 0.09, "grad_norm": 2.3171584606170654, "learning_rate": 1.9999862890956834e-05, "loss": 0.244, "step": 568 }, { "epoch": 0.1, "grad_norm": 2.082489252090454, "learning_rate": 1.999985327215331e-05, "loss": 0.2601, "step": 569 }, { "epoch": 0.1, "grad_norm": 2.0029397010803223, "learning_rate": 1.9999843327291886e-05, "loss": 0.2368, "step": 570 }, { "epoch": 0.1, "grad_norm": 1.9418349266052246, "learning_rate": 1.99998330563729e-05, "loss": 0.2599, "step": 571 }, { "epoch": 0.1, "grad_norm": 2.640922784805298, "learning_rate": 1.999982245939668e-05, "loss": 0.256, "step": 572 }, { "epoch": 0.1, "grad_norm": 2.1195366382598877, "learning_rate": 1.9999811536363568e-05, "loss": 0.259, "step": 573 }, { "epoch": 0.1, "grad_norm": 2.2690441608428955, "learning_rate": 1.9999800287273922e-05, "loss": 0.2608, "step": 574 }, { "epoch": 0.1, "grad_norm": 2.6030030250549316, "learning_rate": 1.999978871212811e-05, "loss": 0.2408, "step": 575 }, { "epoch": 0.1, "grad_norm": 2.439208984375, "learning_rate": 1.999977681092651e-05, "loss": 0.264, "step": 576 }, { "epoch": 0.1, "grad_norm": 2.1782174110412598, "learning_rate": 1.999976458366951e-05, "loss": 0.2466, "step": 577 }, { "epoch": 0.1, "grad_norm": 2.7103445529937744, "learning_rate": 1.9999752030357508e-05, "loss": 0.2521, "step": 578 }, { "epoch": 0.1, "grad_norm": 2.012444019317627, "learning_rate": 1.9999739150990912e-05, "loss": 0.2508, "step": 579 }, { "epoch": 0.1, "grad_norm": 2.1685705184936523, "learning_rate": 1.9999725945570146e-05, "loss": 0.2671, "step": 580 }, { "epoch": 0.1, "grad_norm": 2.273426055908203, "learning_rate": 1.9999712414095635e-05, "loss": 0.2384, "step": 581 }, { "epoch": 0.1, "grad_norm": 2.052302598953247, "learning_rate": 1.9999698556567825e-05, "loss": 0.2379, "step": 582 }, { "epoch": 0.1, "grad_norm": 2.355893135070801, "learning_rate": 1.9999684372987164e-05, "loss": 0.2663, "step": 583 }, { "epoch": 0.1, "grad_norm": 1.9274839162826538, "learning_rate": 1.9999669863354116e-05, "loss": 0.1991, "step": 584 }, { "epoch": 0.1, "grad_norm": 2.135192632675171, "learning_rate": 1.999965502766916e-05, "loss": 0.2416, "step": 585 }, { "epoch": 0.1, "grad_norm": 2.2974300384521484, "learning_rate": 1.9999639865932767e-05, "loss": 0.2579, "step": 586 }, { "epoch": 0.1, "grad_norm": 3.135650873184204, "learning_rate": 1.999962437814544e-05, "loss": 0.2955, "step": 587 }, { "epoch": 0.1, "grad_norm": 2.2493035793304443, "learning_rate": 1.9999608564307687e-05, "loss": 0.2696, "step": 588 }, { "epoch": 0.1, "grad_norm": 1.995464563369751, "learning_rate": 1.9999592424420016e-05, "loss": 0.2513, "step": 589 }, { "epoch": 0.1, "grad_norm": 2.6392292976379395, "learning_rate": 1.9999575958482956e-05, "loss": 0.2755, "step": 590 }, { "epoch": 0.1, "grad_norm": 2.5053787231445312, "learning_rate": 1.9999559166497047e-05, "loss": 0.2556, "step": 591 }, { "epoch": 0.1, "grad_norm": 2.112741231918335, "learning_rate": 1.999954204846283e-05, "loss": 0.2625, "step": 592 }, { "epoch": 0.1, "grad_norm": 2.319662094116211, "learning_rate": 1.999952460438087e-05, "loss": 0.2872, "step": 593 }, { "epoch": 0.1, "grad_norm": 2.348881721496582, "learning_rate": 1.9999506834251732e-05, "loss": 0.2946, "step": 594 }, { "epoch": 0.1, "grad_norm": 1.916921615600586, "learning_rate": 1.9999488738075997e-05, "loss": 0.2428, "step": 595 }, { "epoch": 0.1, "grad_norm": 2.0340993404388428, "learning_rate": 1.9999470315854254e-05, "loss": 0.2728, "step": 596 }, { "epoch": 0.1, "grad_norm": 2.232325315475464, "learning_rate": 1.9999451567587105e-05, "loss": 0.2383, "step": 597 }, { "epoch": 0.1, "grad_norm": 2.0882763862609863, "learning_rate": 1.9999432493275157e-05, "loss": 0.2335, "step": 598 }, { "epoch": 0.1, "grad_norm": 2.351099729537964, "learning_rate": 1.999941309291904e-05, "loss": 0.255, "step": 599 }, { "epoch": 0.1, "grad_norm": 2.437408447265625, "learning_rate": 1.9999393366519377e-05, "loss": 0.2793, "step": 600 }, { "epoch": 0.1, "grad_norm": 2.241910457611084, "learning_rate": 1.9999373314076818e-05, "loss": 0.2728, "step": 601 }, { "epoch": 0.1, "grad_norm": 2.268527030944824, "learning_rate": 1.9999352935592016e-05, "loss": 0.2877, "step": 602 }, { "epoch": 0.1, "grad_norm": 2.3726255893707275, "learning_rate": 1.9999332231065632e-05, "loss": 0.2898, "step": 603 }, { "epoch": 0.1, "grad_norm": 1.9385828971862793, "learning_rate": 1.9999311200498347e-05, "loss": 0.2532, "step": 604 }, { "epoch": 0.1, "grad_norm": 2.1560616493225098, "learning_rate": 1.9999289843890838e-05, "loss": 0.2782, "step": 605 }, { "epoch": 0.1, "grad_norm": 1.9772881269454956, "learning_rate": 1.999926816124381e-05, "loss": 0.2629, "step": 606 }, { "epoch": 0.1, "grad_norm": 2.1750640869140625, "learning_rate": 1.9999246152557968e-05, "loss": 0.2527, "step": 607 }, { "epoch": 0.1, "grad_norm": 2.3454642295837402, "learning_rate": 1.9999223817834027e-05, "loss": 0.2827, "step": 608 }, { "epoch": 0.1, "grad_norm": 2.1432626247406006, "learning_rate": 1.9999201157072715e-05, "loss": 0.2681, "step": 609 }, { "epoch": 0.1, "grad_norm": 2.132802724838257, "learning_rate": 1.999917817027477e-05, "loss": 0.2348, "step": 610 }, { "epoch": 0.1, "grad_norm": 1.9253604412078857, "learning_rate": 1.9999154857440947e-05, "loss": 0.2425, "step": 611 }, { "epoch": 0.1, "grad_norm": 2.3232932090759277, "learning_rate": 1.9999131218572005e-05, "loss": 0.2637, "step": 612 }, { "epoch": 0.1, "grad_norm": 2.0583879947662354, "learning_rate": 1.9999107253668708e-05, "loss": 0.2291, "step": 613 }, { "epoch": 0.1, "grad_norm": 1.914912462234497, "learning_rate": 1.9999082962731844e-05, "loss": 0.2295, "step": 614 }, { "epoch": 0.1, "grad_norm": 2.006992816925049, "learning_rate": 1.9999058345762205e-05, "loss": 0.211, "step": 615 }, { "epoch": 0.1, "grad_norm": 2.27290678024292, "learning_rate": 1.9999033402760588e-05, "loss": 0.2638, "step": 616 }, { "epoch": 0.1, "grad_norm": 1.8329657316207886, "learning_rate": 1.999900813372781e-05, "loss": 0.2175, "step": 617 }, { "epoch": 0.1, "grad_norm": 1.8565737009048462, "learning_rate": 1.99989825386647e-05, "loss": 0.2184, "step": 618 }, { "epoch": 0.1, "grad_norm": 2.167536973953247, "learning_rate": 1.9998956617572083e-05, "loss": 0.2491, "step": 619 }, { "epoch": 0.1, "grad_norm": 2.0348613262176514, "learning_rate": 1.9998930370450813e-05, "loss": 0.2371, "step": 620 }, { "epoch": 0.1, "grad_norm": 2.341879367828369, "learning_rate": 1.999890379730174e-05, "loss": 0.2638, "step": 621 }, { "epoch": 0.1, "grad_norm": 2.3196563720703125, "learning_rate": 1.999887689812573e-05, "loss": 0.2421, "step": 622 }, { "epoch": 0.1, "grad_norm": 2.1686997413635254, "learning_rate": 1.9998849672923664e-05, "loss": 0.2256, "step": 623 }, { "epoch": 0.1, "grad_norm": 2.081477165222168, "learning_rate": 1.9998822121696428e-05, "loss": 0.233, "step": 624 }, { "epoch": 0.1, "grad_norm": 1.9626315832138062, "learning_rate": 1.9998794244444916e-05, "loss": 0.2715, "step": 625 }, { "epoch": 0.1, "grad_norm": 2.3838391304016113, "learning_rate": 1.9998766041170046e-05, "loss": 0.2546, "step": 626 }, { "epoch": 0.1, "grad_norm": 1.6773241758346558, "learning_rate": 1.999873751187273e-05, "loss": 0.2252, "step": 627 }, { "epoch": 0.11, "grad_norm": 2.3076932430267334, "learning_rate": 1.9998708656553903e-05, "loss": 0.2458, "step": 628 }, { "epoch": 0.11, "grad_norm": 1.9882863759994507, "learning_rate": 1.9998679475214504e-05, "loss": 0.2247, "step": 629 }, { "epoch": 0.11, "grad_norm": 2.2090654373168945, "learning_rate": 1.9998649967855485e-05, "loss": 0.2302, "step": 630 }, { "epoch": 0.11, "grad_norm": 2.5276880264282227, "learning_rate": 1.9998620134477803e-05, "loss": 0.305, "step": 631 }, { "epoch": 0.11, "grad_norm": 2.13242769241333, "learning_rate": 1.999858997508244e-05, "loss": 0.2781, "step": 632 }, { "epoch": 0.11, "grad_norm": 2.031771183013916, "learning_rate": 1.9998559489670373e-05, "loss": 0.2577, "step": 633 }, { "epoch": 0.11, "grad_norm": 1.8006634712219238, "learning_rate": 1.9998528678242596e-05, "loss": 0.1929, "step": 634 }, { "epoch": 0.11, "grad_norm": 2.4958391189575195, "learning_rate": 1.999849754080012e-05, "loss": 0.3077, "step": 635 }, { "epoch": 0.11, "grad_norm": 1.9051016569137573, "learning_rate": 1.999846607734395e-05, "loss": 0.232, "step": 636 }, { "epoch": 0.11, "grad_norm": 1.680487036705017, "learning_rate": 1.999843428787512e-05, "loss": 0.1986, "step": 637 }, { "epoch": 0.11, "grad_norm": 1.8400346040725708, "learning_rate": 1.9998402172394664e-05, "loss": 0.1966, "step": 638 }, { "epoch": 0.11, "grad_norm": 2.377761125564575, "learning_rate": 1.999836973090363e-05, "loss": 0.2434, "step": 639 }, { "epoch": 0.11, "grad_norm": 1.7694636583328247, "learning_rate": 1.999833696340307e-05, "loss": 0.1995, "step": 640 }, { "epoch": 0.11, "grad_norm": 1.8253978490829468, "learning_rate": 1.999830386989406e-05, "loss": 0.2414, "step": 641 }, { "epoch": 0.11, "grad_norm": 2.266230821609497, "learning_rate": 1.999827045037768e-05, "loss": 0.2479, "step": 642 }, { "epoch": 0.11, "grad_norm": 1.8885369300842285, "learning_rate": 1.9998236704855017e-05, "loss": 0.2449, "step": 643 }, { "epoch": 0.11, "grad_norm": 1.882601022720337, "learning_rate": 1.9998202633327165e-05, "loss": 0.2255, "step": 644 }, { "epoch": 0.11, "grad_norm": 2.21075701713562, "learning_rate": 1.9998168235795243e-05, "loss": 0.2615, "step": 645 }, { "epoch": 0.11, "grad_norm": 2.298866033554077, "learning_rate": 1.999813351226037e-05, "loss": 0.2369, "step": 646 }, { "epoch": 0.11, "grad_norm": 1.9148322343826294, "learning_rate": 1.999809846272368e-05, "loss": 0.2214, "step": 647 }, { "epoch": 0.11, "grad_norm": 1.815319299697876, "learning_rate": 1.9998063087186307e-05, "loss": 0.2285, "step": 648 }, { "epoch": 0.11, "grad_norm": 1.9913952350616455, "learning_rate": 1.9998027385649416e-05, "loss": 0.2207, "step": 649 }, { "epoch": 0.11, "grad_norm": 1.7124074697494507, "learning_rate": 1.999799135811417e-05, "loss": 0.1993, "step": 650 }, { "epoch": 0.11, "grad_norm": 2.2923991680145264, "learning_rate": 1.9997955004581734e-05, "loss": 0.2601, "step": 651 }, { "epoch": 0.11, "grad_norm": 2.0293376445770264, "learning_rate": 1.9997918325053305e-05, "loss": 0.2279, "step": 652 }, { "epoch": 0.11, "grad_norm": 2.389392852783203, "learning_rate": 1.999788131953007e-05, "loss": 0.2607, "step": 653 }, { "epoch": 0.11, "grad_norm": 1.6654623746871948, "learning_rate": 1.999784398801324e-05, "loss": 0.2154, "step": 654 }, { "epoch": 0.11, "grad_norm": 2.275146961212158, "learning_rate": 1.9997806330504033e-05, "loss": 0.2598, "step": 655 }, { "epoch": 0.11, "grad_norm": 2.2667651176452637, "learning_rate": 1.9997768347003675e-05, "loss": 0.2543, "step": 656 }, { "epoch": 0.11, "grad_norm": 1.9322000741958618, "learning_rate": 1.9997730037513404e-05, "loss": 0.2293, "step": 657 }, { "epoch": 0.11, "grad_norm": 2.2284557819366455, "learning_rate": 1.999769140203447e-05, "loss": 0.2505, "step": 658 }, { "epoch": 0.11, "grad_norm": 1.9588760137557983, "learning_rate": 1.9997652440568133e-05, "loss": 0.2356, "step": 659 }, { "epoch": 0.11, "grad_norm": 2.9707415103912354, "learning_rate": 1.999761315311566e-05, "loss": 0.2304, "step": 660 }, { "epoch": 0.11, "grad_norm": 2.8289804458618164, "learning_rate": 1.999757353967834e-05, "loss": 0.2531, "step": 661 }, { "epoch": 0.11, "grad_norm": 1.8935105800628662, "learning_rate": 1.999753360025746e-05, "loss": 0.2309, "step": 662 }, { "epoch": 0.11, "grad_norm": 1.834031343460083, "learning_rate": 1.999749333485432e-05, "loss": 0.2082, "step": 663 }, { "epoch": 0.11, "grad_norm": 1.9279319047927856, "learning_rate": 1.9997452743470235e-05, "loss": 0.2222, "step": 664 }, { "epoch": 0.11, "grad_norm": 1.7792679071426392, "learning_rate": 1.9997411826106527e-05, "loss": 0.2395, "step": 665 }, { "epoch": 0.11, "grad_norm": 1.7022377252578735, "learning_rate": 1.9997370582764533e-05, "loss": 0.2066, "step": 666 }, { "epoch": 0.11, "grad_norm": 2.0346603393554688, "learning_rate": 1.99973290134456e-05, "loss": 0.224, "step": 667 }, { "epoch": 0.11, "grad_norm": 2.2481510639190674, "learning_rate": 1.9997287118151074e-05, "loss": 0.2654, "step": 668 }, { "epoch": 0.11, "grad_norm": 1.8547799587249756, "learning_rate": 1.999724489688233e-05, "loss": 0.2197, "step": 669 }, { "epoch": 0.11, "grad_norm": 2.5063624382019043, "learning_rate": 1.999720234964074e-05, "loss": 0.2278, "step": 670 }, { "epoch": 0.11, "grad_norm": 2.3884923458099365, "learning_rate": 1.9997159476427695e-05, "loss": 0.265, "step": 671 }, { "epoch": 0.11, "grad_norm": 1.9085172414779663, "learning_rate": 1.999711627724459e-05, "loss": 0.2305, "step": 672 }, { "epoch": 0.11, "grad_norm": 2.0539095401763916, "learning_rate": 1.9997072752092835e-05, "loss": 0.2491, "step": 673 }, { "epoch": 0.11, "grad_norm": 1.768384575843811, "learning_rate": 1.9997028900973843e-05, "loss": 0.2012, "step": 674 }, { "epoch": 0.11, "grad_norm": 2.4246835708618164, "learning_rate": 1.999698472388906e-05, "loss": 0.2342, "step": 675 }, { "epoch": 0.11, "grad_norm": 2.334948778152466, "learning_rate": 1.9996940220839908e-05, "loss": 0.2269, "step": 676 }, { "epoch": 0.11, "grad_norm": 1.9898351430892944, "learning_rate": 1.9996895391827847e-05, "loss": 0.2324, "step": 677 }, { "epoch": 0.11, "grad_norm": 2.140617609024048, "learning_rate": 1.999685023685434e-05, "loss": 0.2239, "step": 678 }, { "epoch": 0.11, "grad_norm": 2.014439344406128, "learning_rate": 1.9996804755920855e-05, "loss": 0.2278, "step": 679 }, { "epoch": 0.11, "grad_norm": 2.2067315578460693, "learning_rate": 1.9996758949028877e-05, "loss": 0.2093, "step": 680 }, { "epoch": 0.11, "grad_norm": 2.155200481414795, "learning_rate": 1.99967128161799e-05, "loss": 0.2468, "step": 681 }, { "epoch": 0.11, "grad_norm": 1.9685184955596924, "learning_rate": 1.9996666357375426e-05, "loss": 0.1763, "step": 682 }, { "epoch": 0.11, "grad_norm": 1.7606126070022583, "learning_rate": 1.9996619572616976e-05, "loss": 0.1963, "step": 683 }, { "epoch": 0.11, "grad_norm": 1.8424731492996216, "learning_rate": 1.9996572461906068e-05, "loss": 0.2251, "step": 684 }, { "epoch": 0.11, "grad_norm": 2.1942362785339355, "learning_rate": 1.999652502524424e-05, "loss": 0.2476, "step": 685 }, { "epoch": 0.11, "grad_norm": 1.8033486604690552, "learning_rate": 1.9996477262633043e-05, "loss": 0.2089, "step": 686 }, { "epoch": 0.11, "grad_norm": 1.740820050239563, "learning_rate": 1.9996429174074032e-05, "loss": 0.2063, "step": 687 }, { "epoch": 0.12, "grad_norm": 2.379868745803833, "learning_rate": 1.999638075956877e-05, "loss": 0.2541, "step": 688 }, { "epoch": 0.12, "grad_norm": 2.231586217880249, "learning_rate": 1.9996332019118846e-05, "loss": 0.2318, "step": 689 }, { "epoch": 0.12, "grad_norm": 2.213063955307007, "learning_rate": 1.9996282952725835e-05, "loss": 0.2152, "step": 690 }, { "epoch": 0.12, "grad_norm": 2.0243752002716064, "learning_rate": 1.999623356039135e-05, "loss": 0.2208, "step": 691 }, { "epoch": 0.12, "grad_norm": 2.165828227996826, "learning_rate": 1.9996183842116995e-05, "loss": 0.2318, "step": 692 }, { "epoch": 0.12, "grad_norm": 2.2762389183044434, "learning_rate": 1.9996133797904393e-05, "loss": 0.2672, "step": 693 }, { "epoch": 0.12, "grad_norm": 2.0697622299194336, "learning_rate": 1.999608342775518e-05, "loss": 0.2186, "step": 694 }, { "epoch": 0.12, "grad_norm": 2.107332706451416, "learning_rate": 1.999603273167099e-05, "loss": 0.2059, "step": 695 }, { "epoch": 0.12, "grad_norm": 2.012882709503174, "learning_rate": 1.9995981709653474e-05, "loss": 0.2322, "step": 696 }, { "epoch": 0.12, "grad_norm": 1.720105767250061, "learning_rate": 1.999593036170431e-05, "loss": 0.2119, "step": 697 }, { "epoch": 0.12, "grad_norm": 1.8552322387695312, "learning_rate": 1.999587868782516e-05, "loss": 0.2352, "step": 698 }, { "epoch": 0.12, "grad_norm": 1.8712966442108154, "learning_rate": 1.999582668801771e-05, "loss": 0.2065, "step": 699 }, { "epoch": 0.12, "grad_norm": 2.1163666248321533, "learning_rate": 1.9995774362283664e-05, "loss": 0.2772, "step": 700 }, { "epoch": 0.12, "grad_norm": 1.842442274093628, "learning_rate": 1.999572171062472e-05, "loss": 0.188, "step": 701 }, { "epoch": 0.12, "grad_norm": 2.043743848800659, "learning_rate": 1.9995668733042595e-05, "loss": 0.1882, "step": 702 }, { "epoch": 0.12, "grad_norm": 2.2205097675323486, "learning_rate": 1.999561542953902e-05, "loss": 0.2467, "step": 703 }, { "epoch": 0.12, "grad_norm": 1.8631600141525269, "learning_rate": 1.9995561800115733e-05, "loss": 0.223, "step": 704 }, { "epoch": 0.12, "grad_norm": 2.0603740215301514, "learning_rate": 1.9995507844774477e-05, "loss": 0.1888, "step": 705 }, { "epoch": 0.12, "grad_norm": 2.01115083694458, "learning_rate": 1.999545356351702e-05, "loss": 0.2256, "step": 706 }, { "epoch": 0.12, "grad_norm": 2.0326948165893555, "learning_rate": 1.9995398956345124e-05, "loss": 0.2381, "step": 707 }, { "epoch": 0.12, "grad_norm": 2.268101453781128, "learning_rate": 1.9995344023260574e-05, "loss": 0.1941, "step": 708 }, { "epoch": 0.12, "grad_norm": 2.7984964847564697, "learning_rate": 1.9995288764265162e-05, "loss": 0.2726, "step": 709 }, { "epoch": 0.12, "grad_norm": 1.8138753175735474, "learning_rate": 1.999523317936068e-05, "loss": 0.2049, "step": 710 }, { "epoch": 0.12, "grad_norm": 2.1650195121765137, "learning_rate": 1.999517726854896e-05, "loss": 0.2187, "step": 711 }, { "epoch": 0.12, "grad_norm": 2.0815906524658203, "learning_rate": 1.9995121031831805e-05, "loss": 0.2319, "step": 712 }, { "epoch": 0.12, "grad_norm": 1.9517096281051636, "learning_rate": 1.9995064469211058e-05, "loss": 0.2039, "step": 713 }, { "epoch": 0.12, "grad_norm": 1.9922479391098022, "learning_rate": 1.9995007580688567e-05, "loss": 0.2215, "step": 714 }, { "epoch": 0.12, "grad_norm": 1.538601279258728, "learning_rate": 1.9994950366266175e-05, "loss": 0.2022, "step": 715 }, { "epoch": 0.12, "grad_norm": 2.4934804439544678, "learning_rate": 1.999489282594576e-05, "loss": 0.2281, "step": 716 }, { "epoch": 0.12, "grad_norm": 2.600689172744751, "learning_rate": 1.999483495972919e-05, "loss": 0.229, "step": 717 }, { "epoch": 0.12, "grad_norm": 1.9764058589935303, "learning_rate": 1.9994776767618355e-05, "loss": 0.2202, "step": 718 }, { "epoch": 0.12, "grad_norm": 1.9552712440490723, "learning_rate": 1.9994718249615154e-05, "loss": 0.1978, "step": 719 }, { "epoch": 0.12, "grad_norm": 1.9844210147857666, "learning_rate": 1.9994659405721492e-05, "loss": 0.2217, "step": 720 }, { "epoch": 0.12, "grad_norm": 1.6604030132293701, "learning_rate": 1.999460023593929e-05, "loss": 0.1959, "step": 721 }, { "epoch": 0.12, "grad_norm": 1.9756522178649902, "learning_rate": 1.999454074027047e-05, "loss": 0.2272, "step": 722 }, { "epoch": 0.12, "grad_norm": 2.047459602355957, "learning_rate": 1.9994480918716985e-05, "loss": 0.2368, "step": 723 }, { "epoch": 0.12, "grad_norm": 1.6318038702011108, "learning_rate": 1.9994420771280776e-05, "loss": 0.2109, "step": 724 }, { "epoch": 0.12, "grad_norm": 2.058720588684082, "learning_rate": 1.999436029796381e-05, "loss": 0.2162, "step": 725 }, { "epoch": 0.12, "grad_norm": 2.0931291580200195, "learning_rate": 1.999429949876805e-05, "loss": 0.2073, "step": 726 }, { "epoch": 0.12, "grad_norm": 1.9505714178085327, "learning_rate": 1.9994238373695486e-05, "loss": 0.2298, "step": 727 }, { "epoch": 0.12, "grad_norm": 1.925500512123108, "learning_rate": 1.999417692274811e-05, "loss": 0.2144, "step": 728 }, { "epoch": 0.12, "grad_norm": 1.771780014038086, "learning_rate": 1.9994115145927924e-05, "loss": 0.1985, "step": 729 }, { "epoch": 0.12, "grad_norm": 1.8846279382705688, "learning_rate": 1.9994053043236946e-05, "loss": 0.2363, "step": 730 }, { "epoch": 0.12, "grad_norm": 1.876096248626709, "learning_rate": 1.9993990614677195e-05, "loss": 0.2223, "step": 731 }, { "epoch": 0.12, "grad_norm": 1.710587739944458, "learning_rate": 1.999392786025071e-05, "loss": 0.1987, "step": 732 }, { "epoch": 0.12, "grad_norm": 1.894545555114746, "learning_rate": 1.999386477995954e-05, "loss": 0.2265, "step": 733 }, { "epoch": 0.12, "grad_norm": 1.899539589881897, "learning_rate": 1.9993801373805734e-05, "loss": 0.2064, "step": 734 }, { "epoch": 0.12, "grad_norm": 1.6401880979537964, "learning_rate": 1.9993737641791364e-05, "loss": 0.1903, "step": 735 }, { "epoch": 0.12, "grad_norm": 3.497990369796753, "learning_rate": 1.999367358391851e-05, "loss": 0.2052, "step": 736 }, { "epoch": 0.12, "grad_norm": 2.2724833488464355, "learning_rate": 1.999360920018926e-05, "loss": 0.2235, "step": 737 }, { "epoch": 0.12, "grad_norm": 2.2518019676208496, "learning_rate": 1.999354449060571e-05, "loss": 0.1968, "step": 738 }, { "epoch": 0.12, "grad_norm": 2.745302677154541, "learning_rate": 1.9993479455169973e-05, "loss": 0.202, "step": 739 }, { "epoch": 0.12, "grad_norm": 1.767310380935669, "learning_rate": 1.999341409388417e-05, "loss": 0.1701, "step": 740 }, { "epoch": 0.12, "grad_norm": 2.169126510620117, "learning_rate": 1.9993348406750427e-05, "loss": 0.1941, "step": 741 }, { "epoch": 0.12, "grad_norm": 2.2102363109588623, "learning_rate": 1.9993282393770894e-05, "loss": 0.2218, "step": 742 }, { "epoch": 0.12, "grad_norm": 1.8917572498321533, "learning_rate": 1.9993216054947716e-05, "loss": 0.2018, "step": 743 }, { "epoch": 0.12, "grad_norm": 2.018293857574463, "learning_rate": 1.999314939028306e-05, "loss": 0.2203, "step": 744 }, { "epoch": 0.12, "grad_norm": 1.9122895002365112, "learning_rate": 1.9993082399779098e-05, "loss": 0.1852, "step": 745 }, { "epoch": 0.12, "grad_norm": 1.9497233629226685, "learning_rate": 1.9993015083438018e-05, "loss": 0.1878, "step": 746 }, { "epoch": 0.12, "grad_norm": 1.9802864789962769, "learning_rate": 1.999294744126201e-05, "loss": 0.1983, "step": 747 }, { "epoch": 0.13, "grad_norm": 1.683010220527649, "learning_rate": 1.9992879473253278e-05, "loss": 0.1893, "step": 748 }, { "epoch": 0.13, "grad_norm": 2.1425118446350098, "learning_rate": 1.9992811179414046e-05, "loss": 0.2447, "step": 749 }, { "epoch": 0.13, "grad_norm": 2.0798912048339844, "learning_rate": 1.9992742559746535e-05, "loss": 0.2524, "step": 750 }, { "epoch": 0.13, "grad_norm": 1.9746760129928589, "learning_rate": 1.9992673614252988e-05, "loss": 0.22, "step": 751 }, { "epoch": 0.13, "grad_norm": 1.9321357011795044, "learning_rate": 1.9992604342935644e-05, "loss": 0.2427, "step": 752 }, { "epoch": 0.13, "grad_norm": 1.8194551467895508, "learning_rate": 1.999253474579677e-05, "loss": 0.2145, "step": 753 }, { "epoch": 0.13, "grad_norm": 1.9640580415725708, "learning_rate": 1.999246482283863e-05, "loss": 0.2092, "step": 754 }, { "epoch": 0.13, "grad_norm": 1.843672752380371, "learning_rate": 1.9992394574063504e-05, "loss": 0.2171, "step": 755 }, { "epoch": 0.13, "grad_norm": 1.8140791654586792, "learning_rate": 1.9992323999473688e-05, "loss": 0.2199, "step": 756 }, { "epoch": 0.13, "grad_norm": 2.0000391006469727, "learning_rate": 1.999225309907148e-05, "loss": 0.1896, "step": 757 }, { "epoch": 0.13, "grad_norm": 1.7140395641326904, "learning_rate": 1.9992181872859188e-05, "loss": 0.1947, "step": 758 }, { "epoch": 0.13, "grad_norm": 1.9345495700836182, "learning_rate": 1.9992110320839142e-05, "loss": 0.1957, "step": 759 }, { "epoch": 0.13, "grad_norm": 1.9024704694747925, "learning_rate": 1.999203844301367e-05, "loss": 0.239, "step": 760 }, { "epoch": 0.13, "grad_norm": 1.6938022375106812, "learning_rate": 1.9991966239385116e-05, "loss": 0.2079, "step": 761 }, { "epoch": 0.13, "grad_norm": 1.6418708562850952, "learning_rate": 1.9991893709955835e-05, "loss": 0.1892, "step": 762 }, { "epoch": 0.13, "grad_norm": 2.3953230381011963, "learning_rate": 1.999182085472819e-05, "loss": 0.2074, "step": 763 }, { "epoch": 0.13, "grad_norm": 2.2123208045959473, "learning_rate": 1.9991747673704564e-05, "loss": 0.1912, "step": 764 }, { "epoch": 0.13, "grad_norm": 2.2175915241241455, "learning_rate": 1.999167416688733e-05, "loss": 0.2008, "step": 765 }, { "epoch": 0.13, "grad_norm": 1.8833523988723755, "learning_rate": 1.9991600334278896e-05, "loss": 0.2021, "step": 766 }, { "epoch": 0.13, "grad_norm": 2.1988229751586914, "learning_rate": 1.9991526175881666e-05, "loss": 0.2298, "step": 767 }, { "epoch": 0.13, "grad_norm": 1.8476165533065796, "learning_rate": 1.9991451691698058e-05, "loss": 0.1771, "step": 768 }, { "epoch": 0.13, "grad_norm": 2.028198003768921, "learning_rate": 1.99913768817305e-05, "loss": 0.2155, "step": 769 }, { "epoch": 0.13, "grad_norm": 2.0067577362060547, "learning_rate": 1.999130174598143e-05, "loss": 0.1887, "step": 770 }, { "epoch": 0.13, "grad_norm": 1.6877107620239258, "learning_rate": 1.9991226284453302e-05, "loss": 0.209, "step": 771 }, { "epoch": 0.13, "grad_norm": 2.102344036102295, "learning_rate": 1.9991150497148573e-05, "loss": 0.2018, "step": 772 }, { "epoch": 0.13, "grad_norm": 2.043442964553833, "learning_rate": 1.9991074384069714e-05, "loss": 0.2227, "step": 773 }, { "epoch": 0.13, "grad_norm": 1.8667933940887451, "learning_rate": 1.999099794521921e-05, "loss": 0.2074, "step": 774 }, { "epoch": 0.13, "grad_norm": 1.8886891603469849, "learning_rate": 1.999092118059955e-05, "loss": 0.1907, "step": 775 }, { "epoch": 0.13, "grad_norm": 1.7974013090133667, "learning_rate": 1.999084409021324e-05, "loss": 0.1852, "step": 776 }, { "epoch": 0.13, "grad_norm": 2.0094876289367676, "learning_rate": 1.999076667406279e-05, "loss": 0.2089, "step": 777 }, { "epoch": 0.13, "grad_norm": 2.0902976989746094, "learning_rate": 1.9990688932150724e-05, "loss": 0.2027, "step": 778 }, { "epoch": 0.13, "grad_norm": 1.5792292356491089, "learning_rate": 1.9990610864479585e-05, "loss": 0.1961, "step": 779 }, { "epoch": 0.13, "grad_norm": 1.7491447925567627, "learning_rate": 1.999053247105191e-05, "loss": 0.2113, "step": 780 }, { "epoch": 0.13, "grad_norm": 1.8974844217300415, "learning_rate": 1.999045375187026e-05, "loss": 0.1819, "step": 781 }, { "epoch": 0.13, "grad_norm": 2.1395986080169678, "learning_rate": 1.9990374706937197e-05, "loss": 0.1929, "step": 782 }, { "epoch": 0.13, "grad_norm": 2.069586753845215, "learning_rate": 1.99902953362553e-05, "loss": 0.1988, "step": 783 }, { "epoch": 0.13, "grad_norm": 1.7367976903915405, "learning_rate": 1.9990215639827158e-05, "loss": 0.1766, "step": 784 }, { "epoch": 0.13, "grad_norm": 1.795194149017334, "learning_rate": 1.999013561765537e-05, "loss": 0.1929, "step": 785 }, { "epoch": 0.13, "grad_norm": 1.960924744606018, "learning_rate": 1.9990055269742545e-05, "loss": 0.2182, "step": 786 }, { "epoch": 0.13, "grad_norm": 1.999053955078125, "learning_rate": 1.9989974596091304e-05, "loss": 0.1909, "step": 787 }, { "epoch": 0.13, "grad_norm": 2.2847208976745605, "learning_rate": 1.9989893596704276e-05, "loss": 0.2235, "step": 788 }, { "epoch": 0.13, "grad_norm": 1.7487608194351196, "learning_rate": 1.99898122715841e-05, "loss": 0.1843, "step": 789 }, { "epoch": 0.13, "grad_norm": 2.390150547027588, "learning_rate": 1.998973062073343e-05, "loss": 0.2054, "step": 790 }, { "epoch": 0.13, "grad_norm": 2.304899215698242, "learning_rate": 1.998964864415493e-05, "loss": 0.2282, "step": 791 }, { "epoch": 0.13, "grad_norm": 1.7362074851989746, "learning_rate": 1.998956634185127e-05, "loss": 0.2122, "step": 792 }, { "epoch": 0.13, "grad_norm": 1.825805425643921, "learning_rate": 1.9989483713825133e-05, "loss": 0.189, "step": 793 }, { "epoch": 0.13, "grad_norm": 1.7395515441894531, "learning_rate": 1.9989400760079217e-05, "loss": 0.2027, "step": 794 }, { "epoch": 0.13, "grad_norm": 1.517062783241272, "learning_rate": 1.998931748061622e-05, "loss": 0.1856, "step": 795 }, { "epoch": 0.13, "grad_norm": 2.387639045715332, "learning_rate": 1.9989233875438867e-05, "loss": 0.1958, "step": 796 }, { "epoch": 0.13, "grad_norm": 1.6599668264389038, "learning_rate": 1.998914994454988e-05, "loss": 0.1642, "step": 797 }, { "epoch": 0.13, "grad_norm": 1.3672136068344116, "learning_rate": 1.998906568795199e-05, "loss": 0.1877, "step": 798 }, { "epoch": 0.13, "grad_norm": 1.862993597984314, "learning_rate": 1.9988981105647952e-05, "loss": 0.2045, "step": 799 }, { "epoch": 0.13, "grad_norm": 1.9905496835708618, "learning_rate": 1.998889619764052e-05, "loss": 0.1995, "step": 800 }, { "epoch": 0.13, "grad_norm": 1.9726126194000244, "learning_rate": 1.9988810963932464e-05, "loss": 0.1947, "step": 801 }, { "epoch": 0.13, "grad_norm": 1.796278715133667, "learning_rate": 1.9988725404526563e-05, "loss": 0.1814, "step": 802 }, { "epoch": 0.13, "grad_norm": 1.5591543912887573, "learning_rate": 1.9988639519425605e-05, "loss": 0.1833, "step": 803 }, { "epoch": 0.13, "grad_norm": 1.5158156156539917, "learning_rate": 1.9988553308632394e-05, "loss": 0.1628, "step": 804 }, { "epoch": 0.13, "grad_norm": 1.950462818145752, "learning_rate": 1.998846677214974e-05, "loss": 0.2032, "step": 805 }, { "epoch": 0.13, "grad_norm": 1.867600679397583, "learning_rate": 1.998837990998046e-05, "loss": 0.216, "step": 806 }, { "epoch": 0.13, "grad_norm": 1.771317958831787, "learning_rate": 1.9988292722127393e-05, "loss": 0.1619, "step": 807 }, { "epoch": 0.14, "grad_norm": 2.2839255332946777, "learning_rate": 1.9988205208593373e-05, "loss": 0.2327, "step": 808 }, { "epoch": 0.14, "grad_norm": 1.9239012002944946, "learning_rate": 1.9988117369381266e-05, "loss": 0.1958, "step": 809 }, { "epoch": 0.14, "grad_norm": 1.9106416702270508, "learning_rate": 1.9988029204493925e-05, "loss": 0.2086, "step": 810 }, { "epoch": 0.14, "grad_norm": 1.707162857055664, "learning_rate": 1.998794071393423e-05, "loss": 0.1763, "step": 811 }, { "epoch": 0.14, "grad_norm": 2.373415946960449, "learning_rate": 1.9987851897705065e-05, "loss": 0.2039, "step": 812 }, { "epoch": 0.14, "grad_norm": 1.9987045526504517, "learning_rate": 1.9987762755809326e-05, "loss": 0.2334, "step": 813 }, { "epoch": 0.14, "grad_norm": 1.9556043148040771, "learning_rate": 1.998767328824992e-05, "loss": 0.2003, "step": 814 }, { "epoch": 0.14, "grad_norm": 1.6584006547927856, "learning_rate": 1.9987583495029766e-05, "loss": 0.1697, "step": 815 }, { "epoch": 0.14, "grad_norm": 1.4830445051193237, "learning_rate": 1.9987493376151786e-05, "loss": 0.1701, "step": 816 }, { "epoch": 0.14, "grad_norm": 1.6810964345932007, "learning_rate": 1.9987402931618928e-05, "loss": 0.1765, "step": 817 }, { "epoch": 0.14, "grad_norm": 1.5314356088638306, "learning_rate": 1.998731216143413e-05, "loss": 0.1678, "step": 818 }, { "epoch": 0.14, "grad_norm": 1.5594358444213867, "learning_rate": 1.998722106560036e-05, "loss": 0.1795, "step": 819 }, { "epoch": 0.14, "grad_norm": 1.416670560836792, "learning_rate": 1.9987129644120584e-05, "loss": 0.1849, "step": 820 }, { "epoch": 0.14, "grad_norm": 1.6649876832962036, "learning_rate": 1.9987037896997786e-05, "loss": 0.191, "step": 821 }, { "epoch": 0.14, "grad_norm": 1.50101637840271, "learning_rate": 1.9986945824234954e-05, "loss": 0.1694, "step": 822 }, { "epoch": 0.14, "grad_norm": 1.8268072605133057, "learning_rate": 1.998685342583509e-05, "loss": 0.2105, "step": 823 }, { "epoch": 0.14, "grad_norm": 2.0638976097106934, "learning_rate": 1.998676070180121e-05, "loss": 0.1767, "step": 824 }, { "epoch": 0.14, "grad_norm": 1.448097825050354, "learning_rate": 1.9986667652136337e-05, "loss": 0.1604, "step": 825 }, { "epoch": 0.14, "grad_norm": 1.8929044008255005, "learning_rate": 1.9986574276843505e-05, "loss": 0.1966, "step": 826 }, { "epoch": 0.14, "grad_norm": 1.7136833667755127, "learning_rate": 1.9986480575925756e-05, "loss": 0.1966, "step": 827 }, { "epoch": 0.14, "grad_norm": 1.7966196537017822, "learning_rate": 1.9986386549386146e-05, "loss": 0.2097, "step": 828 }, { "epoch": 0.14, "grad_norm": 1.671631932258606, "learning_rate": 1.9986292197227746e-05, "loss": 0.1608, "step": 829 }, { "epoch": 0.14, "grad_norm": 1.943843126296997, "learning_rate": 1.9986197519453624e-05, "loss": 0.2236, "step": 830 }, { "epoch": 0.14, "grad_norm": 1.498468041419983, "learning_rate": 1.9986102516066875e-05, "loss": 0.149, "step": 831 }, { "epoch": 0.14, "grad_norm": 1.7627896070480347, "learning_rate": 1.998600718707059e-05, "loss": 0.1568, "step": 832 }, { "epoch": 0.14, "grad_norm": 1.9144883155822754, "learning_rate": 1.998591153246788e-05, "loss": 0.2093, "step": 833 }, { "epoch": 0.14, "grad_norm": 1.5444616079330444, "learning_rate": 1.998581555226187e-05, "loss": 0.1663, "step": 834 }, { "epoch": 0.14, "grad_norm": 1.739457607269287, "learning_rate": 1.9985719246455677e-05, "loss": 0.1766, "step": 835 }, { "epoch": 0.14, "grad_norm": 1.619023084640503, "learning_rate": 1.998562261505245e-05, "loss": 0.2071, "step": 836 }, { "epoch": 0.14, "grad_norm": 1.6668028831481934, "learning_rate": 1.998552565805534e-05, "loss": 0.1791, "step": 837 }, { "epoch": 0.14, "grad_norm": 1.9226222038269043, "learning_rate": 1.9985428375467503e-05, "loss": 0.2002, "step": 838 }, { "epoch": 0.14, "grad_norm": 1.7928849458694458, "learning_rate": 1.9985330767292116e-05, "loss": 0.1643, "step": 839 }, { "epoch": 0.14, "grad_norm": 1.9671857357025146, "learning_rate": 1.998523283353236e-05, "loss": 0.2202, "step": 840 }, { "epoch": 0.14, "grad_norm": 1.6903302669525146, "learning_rate": 1.998513457419143e-05, "loss": 0.1878, "step": 841 }, { "epoch": 0.14, "grad_norm": 1.772770881652832, "learning_rate": 1.9985035989272525e-05, "loss": 0.2079, "step": 842 }, { "epoch": 0.14, "grad_norm": 1.889746904373169, "learning_rate": 1.9984937078778862e-05, "loss": 0.2096, "step": 843 }, { "epoch": 0.14, "grad_norm": 2.063230037689209, "learning_rate": 1.998483784271367e-05, "loss": 0.2041, "step": 844 }, { "epoch": 0.14, "grad_norm": 1.5244104862213135, "learning_rate": 1.998473828108018e-05, "loss": 0.1815, "step": 845 }, { "epoch": 0.14, "grad_norm": 1.5112650394439697, "learning_rate": 1.9984638393881636e-05, "loss": 0.1567, "step": 846 }, { "epoch": 0.14, "grad_norm": 1.4269752502441406, "learning_rate": 1.9984538181121305e-05, "loss": 0.1742, "step": 847 }, { "epoch": 0.14, "grad_norm": 1.8871135711669922, "learning_rate": 1.9984437642802447e-05, "loss": 0.1824, "step": 848 }, { "epoch": 0.14, "grad_norm": 1.7872345447540283, "learning_rate": 1.9984336778928333e-05, "loss": 0.1711, "step": 849 }, { "epoch": 0.14, "grad_norm": 1.7504702806472778, "learning_rate": 1.998423558950227e-05, "loss": 0.1842, "step": 850 }, { "epoch": 0.14, "grad_norm": 1.7778822183609009, "learning_rate": 1.9984134074527543e-05, "loss": 0.1778, "step": 851 }, { "epoch": 0.14, "grad_norm": 2.0388615131378174, "learning_rate": 1.998403223400747e-05, "loss": 0.2022, "step": 852 }, { "epoch": 0.14, "grad_norm": 1.4747766256332397, "learning_rate": 1.9983930067945363e-05, "loss": 0.1823, "step": 853 }, { "epoch": 0.14, "grad_norm": 2.096271276473999, "learning_rate": 1.9983827576344562e-05, "loss": 0.1877, "step": 854 }, { "epoch": 0.14, "grad_norm": 1.6088263988494873, "learning_rate": 1.9983724759208406e-05, "loss": 0.1801, "step": 855 }, { "epoch": 0.14, "grad_norm": 1.6034386157989502, "learning_rate": 1.9983621616540246e-05, "loss": 0.1629, "step": 856 }, { "epoch": 0.14, "grad_norm": 1.7672290802001953, "learning_rate": 1.9983518148343443e-05, "loss": 0.1651, "step": 857 }, { "epoch": 0.14, "grad_norm": 1.4867616891860962, "learning_rate": 1.998341435462138e-05, "loss": 0.1623, "step": 858 }, { "epoch": 0.14, "grad_norm": 2.0819649696350098, "learning_rate": 1.9983310235377428e-05, "loss": 0.2196, "step": 859 }, { "epoch": 0.14, "grad_norm": 1.887688398361206, "learning_rate": 1.9983205790614992e-05, "loss": 0.1706, "step": 860 }, { "epoch": 0.14, "grad_norm": 1.499706745147705, "learning_rate": 1.9983101020337476e-05, "loss": 0.1764, "step": 861 }, { "epoch": 0.14, "grad_norm": 1.5563530921936035, "learning_rate": 1.998299592454829e-05, "loss": 0.1672, "step": 862 }, { "epoch": 0.14, "grad_norm": 1.5810072422027588, "learning_rate": 1.998289050325087e-05, "loss": 0.1697, "step": 863 }, { "epoch": 0.14, "grad_norm": 1.8022490739822388, "learning_rate": 1.9982784756448646e-05, "loss": 0.1767, "step": 864 }, { "epoch": 0.14, "grad_norm": 1.5143241882324219, "learning_rate": 1.998267868414507e-05, "loss": 0.1836, "step": 865 }, { "epoch": 0.14, "grad_norm": 1.7564446926116943, "learning_rate": 1.99825722863436e-05, "loss": 0.1747, "step": 866 }, { "epoch": 0.15, "grad_norm": 1.6307483911514282, "learning_rate": 1.9982465563047704e-05, "loss": 0.1746, "step": 867 }, { "epoch": 0.15, "grad_norm": 1.6610974073410034, "learning_rate": 1.9982358514260864e-05, "loss": 0.1907, "step": 868 }, { "epoch": 0.15, "grad_norm": 1.8184317350387573, "learning_rate": 1.9982251139986566e-05, "loss": 0.1938, "step": 869 }, { "epoch": 0.15, "grad_norm": 1.6180486679077148, "learning_rate": 1.9982143440228316e-05, "loss": 0.1947, "step": 870 }, { "epoch": 0.15, "grad_norm": 1.5159823894500732, "learning_rate": 1.9982035414989625e-05, "loss": 0.1993, "step": 871 }, { "epoch": 0.15, "grad_norm": 1.8461369276046753, "learning_rate": 1.9981927064274012e-05, "loss": 0.1665, "step": 872 }, { "epoch": 0.15, "grad_norm": 1.5701488256454468, "learning_rate": 1.9981818388085013e-05, "loss": 0.1722, "step": 873 }, { "epoch": 0.15, "grad_norm": 1.6405246257781982, "learning_rate": 1.9981709386426168e-05, "loss": 0.1637, "step": 874 }, { "epoch": 0.15, "grad_norm": 1.8563497066497803, "learning_rate": 1.9981600059301036e-05, "loss": 0.1829, "step": 875 }, { "epoch": 0.15, "grad_norm": 1.4099154472351074, "learning_rate": 1.9981490406713177e-05, "loss": 0.1592, "step": 876 }, { "epoch": 0.15, "grad_norm": 1.631424069404602, "learning_rate": 1.9981380428666173e-05, "loss": 0.1812, "step": 877 }, { "epoch": 0.15, "grad_norm": 1.5882678031921387, "learning_rate": 1.9981270125163605e-05, "loss": 0.1955, "step": 878 }, { "epoch": 0.15, "grad_norm": 1.5886896848678589, "learning_rate": 1.9981159496209067e-05, "loss": 0.1566, "step": 879 }, { "epoch": 0.15, "grad_norm": 1.6939599514007568, "learning_rate": 1.998104854180617e-05, "loss": 0.1971, "step": 880 }, { "epoch": 0.15, "grad_norm": 1.5032715797424316, "learning_rate": 1.9980937261958534e-05, "loss": 0.1626, "step": 881 }, { "epoch": 0.15, "grad_norm": 1.4149800539016724, "learning_rate": 1.998082565666978e-05, "loss": 0.1651, "step": 882 }, { "epoch": 0.15, "grad_norm": 1.718247413635254, "learning_rate": 1.9980713725943555e-05, "loss": 0.1854, "step": 883 }, { "epoch": 0.15, "grad_norm": 2.3007819652557373, "learning_rate": 1.9980601469783506e-05, "loss": 0.1593, "step": 884 }, { "epoch": 0.15, "grad_norm": 1.728300929069519, "learning_rate": 1.9980488888193294e-05, "loss": 0.1619, "step": 885 }, { "epoch": 0.15, "grad_norm": 3.798227071762085, "learning_rate": 1.9980375981176584e-05, "loss": 0.1924, "step": 886 }, { "epoch": 0.15, "grad_norm": 2.1554319858551025, "learning_rate": 1.9980262748737066e-05, "loss": 0.1764, "step": 887 }, { "epoch": 0.15, "grad_norm": 2.9783706665039062, "learning_rate": 1.9980149190878423e-05, "loss": 0.1832, "step": 888 }, { "epoch": 0.15, "grad_norm": 1.7525566816329956, "learning_rate": 1.998003530760437e-05, "loss": 0.1891, "step": 889 }, { "epoch": 0.15, "grad_norm": 2.3387181758880615, "learning_rate": 1.9979921098918607e-05, "loss": 0.1845, "step": 890 }, { "epoch": 0.15, "grad_norm": 1.7149158716201782, "learning_rate": 1.9979806564824867e-05, "loss": 0.1933, "step": 891 }, { "epoch": 0.15, "grad_norm": 2.3143911361694336, "learning_rate": 1.997969170532688e-05, "loss": 0.1905, "step": 892 }, { "epoch": 0.15, "grad_norm": 1.9947466850280762, "learning_rate": 1.9979576520428395e-05, "loss": 0.2051, "step": 893 }, { "epoch": 0.15, "grad_norm": 1.7370920181274414, "learning_rate": 1.9979461010133164e-05, "loss": 0.1519, "step": 894 }, { "epoch": 0.15, "grad_norm": 1.8415586948394775, "learning_rate": 1.997934517444495e-05, "loss": 0.1656, "step": 895 }, { "epoch": 0.15, "grad_norm": 1.6934067010879517, "learning_rate": 1.997922901336754e-05, "loss": 0.1875, "step": 896 }, { "epoch": 0.15, "grad_norm": 1.6301627159118652, "learning_rate": 1.9979112526904718e-05, "loss": 0.1567, "step": 897 }, { "epoch": 0.15, "grad_norm": 2.0463550090789795, "learning_rate": 1.997899571506028e-05, "loss": 0.1643, "step": 898 }, { "epoch": 0.15, "grad_norm": 1.8283947706222534, "learning_rate": 1.9978878577838032e-05, "loss": 0.1496, "step": 899 }, { "epoch": 0.15, "grad_norm": 1.814814805984497, "learning_rate": 1.9978761115241796e-05, "loss": 0.2015, "step": 900 }, { "epoch": 0.15, "grad_norm": 1.8994437456130981, "learning_rate": 1.9978643327275407e-05, "loss": 0.1886, "step": 901 }, { "epoch": 0.15, "grad_norm": 2.0403027534484863, "learning_rate": 1.9978525213942698e-05, "loss": 0.1972, "step": 902 }, { "epoch": 0.15, "grad_norm": 1.5298444032669067, "learning_rate": 1.9978406775247526e-05, "loss": 0.1655, "step": 903 }, { "epoch": 0.15, "grad_norm": 1.675168752670288, "learning_rate": 1.9978288011193746e-05, "loss": 0.1799, "step": 904 }, { "epoch": 0.15, "grad_norm": 1.790996789932251, "learning_rate": 1.997816892178524e-05, "loss": 0.2048, "step": 905 }, { "epoch": 0.15, "grad_norm": 1.6028350591659546, "learning_rate": 1.9978049507025884e-05, "loss": 0.1708, "step": 906 }, { "epoch": 0.15, "grad_norm": 1.9137710332870483, "learning_rate": 1.9977929766919575e-05, "loss": 0.1677, "step": 907 }, { "epoch": 0.15, "grad_norm": 1.689160704612732, "learning_rate": 1.9977809701470215e-05, "loss": 0.1759, "step": 908 }, { "epoch": 0.15, "grad_norm": 1.5934016704559326, "learning_rate": 1.9977689310681717e-05, "loss": 0.1846, "step": 909 }, { "epoch": 0.15, "grad_norm": 1.6376065015792847, "learning_rate": 1.9977568594558012e-05, "loss": 0.1816, "step": 910 }, { "epoch": 0.15, "grad_norm": 1.7689697742462158, "learning_rate": 1.9977447553103033e-05, "loss": 0.1842, "step": 911 }, { "epoch": 0.15, "grad_norm": 1.8314931392669678, "learning_rate": 1.997732618632073e-05, "loss": 0.1739, "step": 912 }, { "epoch": 0.15, "grad_norm": 1.7139593362808228, "learning_rate": 1.997720449421505e-05, "loss": 0.1738, "step": 913 }, { "epoch": 0.15, "grad_norm": 1.424700379371643, "learning_rate": 1.9977082476789977e-05, "loss": 0.1509, "step": 914 }, { "epoch": 0.15, "grad_norm": 1.5039852857589722, "learning_rate": 1.9976960134049475e-05, "loss": 0.1628, "step": 915 }, { "epoch": 0.15, "grad_norm": 1.350103497505188, "learning_rate": 1.997683746599754e-05, "loss": 0.1781, "step": 916 }, { "epoch": 0.15, "grad_norm": 1.8777883052825928, "learning_rate": 1.9976714472638174e-05, "loss": 0.1949, "step": 917 }, { "epoch": 0.15, "grad_norm": 1.9400296211242676, "learning_rate": 1.997659115397538e-05, "loss": 0.2124, "step": 918 }, { "epoch": 0.15, "grad_norm": 1.6858959197998047, "learning_rate": 1.9976467510013185e-05, "loss": 0.1465, "step": 919 }, { "epoch": 0.15, "grad_norm": 1.727699875831604, "learning_rate": 1.997634354075562e-05, "loss": 0.176, "step": 920 }, { "epoch": 0.15, "grad_norm": 1.4383504390716553, "learning_rate": 1.9976219246206723e-05, "loss": 0.1527, "step": 921 }, { "epoch": 0.15, "grad_norm": 1.728615641593933, "learning_rate": 1.997609462637055e-05, "loss": 0.1778, "step": 922 }, { "epoch": 0.15, "grad_norm": 2.031722068786621, "learning_rate": 1.9975969681251166e-05, "loss": 0.1636, "step": 923 }, { "epoch": 0.15, "grad_norm": 1.6654503345489502, "learning_rate": 1.9975844410852643e-05, "loss": 0.1579, "step": 924 }, { "epoch": 0.15, "grad_norm": 1.416642665863037, "learning_rate": 1.9975718815179062e-05, "loss": 0.159, "step": 925 }, { "epoch": 0.15, "grad_norm": 1.861448884010315, "learning_rate": 1.9975592894234523e-05, "loss": 0.1924, "step": 926 }, { "epoch": 0.16, "grad_norm": 1.924565315246582, "learning_rate": 1.997546664802313e-05, "loss": 0.1898, "step": 927 }, { "epoch": 0.16, "grad_norm": 1.9061775207519531, "learning_rate": 1.9975340076549005e-05, "loss": 0.1965, "step": 928 }, { "epoch": 0.16, "grad_norm": 1.9110023975372314, "learning_rate": 1.9975213179816266e-05, "loss": 0.1843, "step": 929 }, { "epoch": 0.16, "grad_norm": 1.571029543876648, "learning_rate": 1.9975085957829058e-05, "loss": 0.1469, "step": 930 }, { "epoch": 0.16, "grad_norm": 1.7328565120697021, "learning_rate": 1.997495841059152e-05, "loss": 0.193, "step": 931 }, { "epoch": 0.16, "grad_norm": 1.6338576078414917, "learning_rate": 1.997483053810782e-05, "loss": 0.1825, "step": 932 }, { "epoch": 0.16, "grad_norm": 1.3774372339248657, "learning_rate": 1.9974702340382127e-05, "loss": 0.1417, "step": 933 }, { "epoch": 0.16, "grad_norm": 1.871026873588562, "learning_rate": 1.9974573817418615e-05, "loss": 0.2033, "step": 934 }, { "epoch": 0.16, "grad_norm": 1.537691354751587, "learning_rate": 1.997444496922148e-05, "loss": 0.1513, "step": 935 }, { "epoch": 0.16, "grad_norm": 1.637268304824829, "learning_rate": 1.997431579579492e-05, "loss": 0.1837, "step": 936 }, { "epoch": 0.16, "grad_norm": 1.2378193140029907, "learning_rate": 1.9974186297143143e-05, "loss": 0.1434, "step": 937 }, { "epoch": 0.16, "grad_norm": 3.4056806564331055, "learning_rate": 1.997405647327038e-05, "loss": 0.1848, "step": 938 }, { "epoch": 0.16, "grad_norm": 1.5415737628936768, "learning_rate": 1.9973926324180864e-05, "loss": 0.1738, "step": 939 }, { "epoch": 0.16, "grad_norm": 1.6102421283721924, "learning_rate": 1.9973795849878834e-05, "loss": 0.1592, "step": 940 }, { "epoch": 0.16, "grad_norm": 1.7992812395095825, "learning_rate": 1.9973665050368542e-05, "loss": 0.1764, "step": 941 }, { "epoch": 0.16, "grad_norm": 2.045175790786743, "learning_rate": 1.997353392565426e-05, "loss": 0.1924, "step": 942 }, { "epoch": 0.16, "grad_norm": 2.166032075881958, "learning_rate": 1.997340247574026e-05, "loss": 0.2027, "step": 943 }, { "epoch": 0.16, "grad_norm": 3.1668150424957275, "learning_rate": 1.9973270700630826e-05, "loss": 0.1719, "step": 944 }, { "epoch": 0.16, "grad_norm": 1.6855136156082153, "learning_rate": 1.9973138600330255e-05, "loss": 0.174, "step": 945 }, { "epoch": 0.16, "grad_norm": 1.8382781744003296, "learning_rate": 1.9973006174842857e-05, "loss": 0.1907, "step": 946 }, { "epoch": 0.16, "grad_norm": 1.7408987283706665, "learning_rate": 1.997287342417295e-05, "loss": 0.1649, "step": 947 }, { "epoch": 0.16, "grad_norm": 1.8008685111999512, "learning_rate": 1.9972740348324863e-05, "loss": 0.1511, "step": 948 }, { "epoch": 0.16, "grad_norm": 2.02412486076355, "learning_rate": 1.997260694730293e-05, "loss": 0.1664, "step": 949 }, { "epoch": 0.16, "grad_norm": 1.7416532039642334, "learning_rate": 1.997247322111151e-05, "loss": 0.1662, "step": 950 }, { "epoch": 0.16, "grad_norm": 1.3813085556030273, "learning_rate": 1.9972339169754952e-05, "loss": 0.1631, "step": 951 }, { "epoch": 0.16, "grad_norm": 1.6327953338623047, "learning_rate": 1.9972204793237633e-05, "loss": 0.148, "step": 952 }, { "epoch": 0.16, "grad_norm": 1.644267201423645, "learning_rate": 1.9972070091563936e-05, "loss": 0.1826, "step": 953 }, { "epoch": 0.16, "grad_norm": 1.4450504779815674, "learning_rate": 1.997193506473825e-05, "loss": 0.1612, "step": 954 }, { "epoch": 0.16, "grad_norm": 1.495896577835083, "learning_rate": 1.9971799712764976e-05, "loss": 0.1759, "step": 955 }, { "epoch": 0.16, "grad_norm": 3.244556188583374, "learning_rate": 1.9971664035648532e-05, "loss": 0.1922, "step": 956 }, { "epoch": 0.16, "grad_norm": 1.8305130004882812, "learning_rate": 1.9971528033393342e-05, "loss": 0.1626, "step": 957 }, { "epoch": 0.16, "grad_norm": 1.5343657732009888, "learning_rate": 1.9971391706003837e-05, "loss": 0.1586, "step": 958 }, { "epoch": 0.16, "grad_norm": 1.8675493001937866, "learning_rate": 1.9971255053484466e-05, "loss": 0.1879, "step": 959 }, { "epoch": 0.16, "grad_norm": 1.841935396194458, "learning_rate": 1.997111807583968e-05, "loss": 0.186, "step": 960 }, { "epoch": 0.16, "grad_norm": 1.4147239923477173, "learning_rate": 1.9970980773073948e-05, "loss": 0.15, "step": 961 }, { "epoch": 0.16, "grad_norm": 1.6201521158218384, "learning_rate": 1.9970843145191747e-05, "loss": 0.198, "step": 962 }, { "epoch": 0.16, "grad_norm": 1.6395546197891235, "learning_rate": 1.997070519219756e-05, "loss": 0.154, "step": 963 }, { "epoch": 0.16, "grad_norm": 1.7959508895874023, "learning_rate": 1.9970566914095895e-05, "loss": 0.172, "step": 964 }, { "epoch": 0.16, "grad_norm": 1.6547956466674805, "learning_rate": 1.9970428310891254e-05, "loss": 0.1539, "step": 965 }, { "epoch": 0.16, "grad_norm": 1.8250503540039062, "learning_rate": 1.9970289382588158e-05, "loss": 0.174, "step": 966 }, { "epoch": 0.16, "grad_norm": 1.588321328163147, "learning_rate": 1.9970150129191133e-05, "loss": 0.172, "step": 967 }, { "epoch": 0.16, "grad_norm": 1.9633665084838867, "learning_rate": 1.9970010550704726e-05, "loss": 0.1673, "step": 968 }, { "epoch": 0.16, "grad_norm": 1.596713662147522, "learning_rate": 1.9969870647133485e-05, "loss": 0.1433, "step": 969 }, { "epoch": 0.16, "grad_norm": 2.04687237739563, "learning_rate": 1.996973041848197e-05, "loss": 0.1508, "step": 970 }, { "epoch": 0.16, "grad_norm": 1.9895659685134888, "learning_rate": 1.9969589864754756e-05, "loss": 0.1534, "step": 971 }, { "epoch": 0.16, "grad_norm": 1.4974424839019775, "learning_rate": 1.996944898595642e-05, "loss": 0.1504, "step": 972 }, { "epoch": 0.16, "grad_norm": 1.640717625617981, "learning_rate": 1.9969307782091566e-05, "loss": 0.1918, "step": 973 }, { "epoch": 0.16, "grad_norm": 1.8573203086853027, "learning_rate": 1.9969166253164794e-05, "loss": 0.1697, "step": 974 }, { "epoch": 0.16, "grad_norm": 1.6898223161697388, "learning_rate": 1.9969024399180713e-05, "loss": 0.1477, "step": 975 }, { "epoch": 0.16, "grad_norm": 1.925371766090393, "learning_rate": 1.9968882220143956e-05, "loss": 0.2064, "step": 976 }, { "epoch": 0.16, "grad_norm": 1.4692976474761963, "learning_rate": 1.9968739716059154e-05, "loss": 0.1641, "step": 977 }, { "epoch": 0.16, "grad_norm": 1.7532256841659546, "learning_rate": 1.9968596886930957e-05, "loss": 0.1748, "step": 978 }, { "epoch": 0.16, "grad_norm": 1.5801012516021729, "learning_rate": 1.996845373276402e-05, "loss": 0.1617, "step": 979 }, { "epoch": 0.16, "grad_norm": 1.445465087890625, "learning_rate": 1.9968310253563006e-05, "loss": 0.1899, "step": 980 }, { "epoch": 0.16, "grad_norm": 1.3506580591201782, "learning_rate": 1.99681664493326e-05, "loss": 0.1572, "step": 981 }, { "epoch": 0.16, "grad_norm": 2.080568552017212, "learning_rate": 1.9968022320077494e-05, "loss": 0.1704, "step": 982 }, { "epoch": 0.16, "grad_norm": 1.4519479274749756, "learning_rate": 1.996787786580238e-05, "loss": 0.1686, "step": 983 }, { "epoch": 0.16, "grad_norm": 2.1469554901123047, "learning_rate": 1.9967733086511973e-05, "loss": 0.151, "step": 984 }, { "epoch": 0.16, "grad_norm": 1.6606882810592651, "learning_rate": 1.996758798221099e-05, "loss": 0.1879, "step": 985 }, { "epoch": 0.16, "grad_norm": 1.6063156127929688, "learning_rate": 1.996744255290416e-05, "loss": 0.1871, "step": 986 }, { "epoch": 0.17, "grad_norm": 1.650887370109558, "learning_rate": 1.9967296798596236e-05, "loss": 0.15, "step": 987 }, { "epoch": 0.17, "grad_norm": 1.847859263420105, "learning_rate": 1.996715071929196e-05, "loss": 0.1599, "step": 988 }, { "epoch": 0.17, "grad_norm": 1.7134329080581665, "learning_rate": 1.9967004314996098e-05, "loss": 0.193, "step": 989 }, { "epoch": 0.17, "grad_norm": 1.4075090885162354, "learning_rate": 1.9966857585713423e-05, "loss": 0.1529, "step": 990 }, { "epoch": 0.17, "grad_norm": 1.516053318977356, "learning_rate": 1.9966710531448722e-05, "loss": 0.1725, "step": 991 }, { "epoch": 0.17, "grad_norm": 1.513689398765564, "learning_rate": 1.9966563152206788e-05, "loss": 0.1657, "step": 992 }, { "epoch": 0.17, "grad_norm": 1.7570744752883911, "learning_rate": 1.9966415447992426e-05, "loss": 0.2083, "step": 993 }, { "epoch": 0.17, "grad_norm": 1.46107816696167, "learning_rate": 1.9966267418810452e-05, "loss": 0.1565, "step": 994 }, { "epoch": 0.17, "grad_norm": 1.4696142673492432, "learning_rate": 1.9966119064665697e-05, "loss": 0.174, "step": 995 }, { "epoch": 0.17, "grad_norm": 1.4802719354629517, "learning_rate": 1.996597038556299e-05, "loss": 0.1724, "step": 996 }, { "epoch": 0.17, "grad_norm": 1.5051852464675903, "learning_rate": 1.996582138150719e-05, "loss": 0.1678, "step": 997 }, { "epoch": 0.17, "grad_norm": 1.7146193981170654, "learning_rate": 1.9965672052503145e-05, "loss": 0.1992, "step": 998 }, { "epoch": 0.17, "grad_norm": 1.6834286451339722, "learning_rate": 1.9965522398555728e-05, "loss": 0.1787, "step": 999 }, { "epoch": 0.17, "grad_norm": 1.3666728734970093, "learning_rate": 1.9965372419669823e-05, "loss": 0.1627, "step": 1000 }, { "epoch": 0.17, "grad_norm": 1.4959510564804077, "learning_rate": 1.9965222115850312e-05, "loss": 0.1715, "step": 1001 }, { "epoch": 0.17, "grad_norm": 1.2491470575332642, "learning_rate": 1.99650714871021e-05, "loss": 0.1522, "step": 1002 }, { "epoch": 0.17, "grad_norm": 1.5381397008895874, "learning_rate": 1.99649205334301e-05, "loss": 0.1773, "step": 1003 }, { "epoch": 0.17, "grad_norm": 1.7411301136016846, "learning_rate": 1.9964769254839228e-05, "loss": 0.1744, "step": 1004 }, { "epoch": 0.17, "grad_norm": 1.670479416847229, "learning_rate": 1.9964617651334426e-05, "loss": 0.1698, "step": 1005 }, { "epoch": 0.17, "grad_norm": 1.692123532295227, "learning_rate": 1.996446572292063e-05, "loss": 0.1788, "step": 1006 }, { "epoch": 0.17, "grad_norm": 1.4558380842208862, "learning_rate": 1.99643134696028e-05, "loss": 0.1565, "step": 1007 }, { "epoch": 0.17, "grad_norm": 1.4925227165222168, "learning_rate": 1.9964160891385895e-05, "loss": 0.1763, "step": 1008 }, { "epoch": 0.17, "grad_norm": 1.436192512512207, "learning_rate": 1.9964007988274888e-05, "loss": 0.1452, "step": 1009 }, { "epoch": 0.17, "grad_norm": 1.7821743488311768, "learning_rate": 1.9963854760274772e-05, "loss": 0.1672, "step": 1010 }, { "epoch": 0.17, "grad_norm": 1.8964335918426514, "learning_rate": 1.9963701207390538e-05, "loss": 0.1809, "step": 1011 }, { "epoch": 0.17, "grad_norm": 1.3932524919509888, "learning_rate": 1.9963547329627193e-05, "loss": 0.1371, "step": 1012 }, { "epoch": 0.17, "grad_norm": 1.741665005683899, "learning_rate": 1.9963393126989755e-05, "loss": 0.1912, "step": 1013 }, { "epoch": 0.17, "grad_norm": 1.8559465408325195, "learning_rate": 1.9963238599483255e-05, "loss": 0.1652, "step": 1014 }, { "epoch": 0.17, "grad_norm": 1.3628016710281372, "learning_rate": 1.9963083747112728e-05, "loss": 0.1286, "step": 1015 }, { "epoch": 0.17, "grad_norm": 1.518949031829834, "learning_rate": 1.9962928569883223e-05, "loss": 0.1739, "step": 1016 }, { "epoch": 0.17, "grad_norm": 1.9377517700195312, "learning_rate": 1.9962773067799804e-05, "loss": 0.179, "step": 1017 }, { "epoch": 0.17, "grad_norm": 1.5343658924102783, "learning_rate": 1.9962617240867534e-05, "loss": 0.1737, "step": 1018 }, { "epoch": 0.17, "grad_norm": 1.6116617918014526, "learning_rate": 1.9962461089091502e-05, "loss": 0.1886, "step": 1019 }, { "epoch": 0.17, "grad_norm": 1.542884349822998, "learning_rate": 1.9962304612476796e-05, "loss": 0.1973, "step": 1020 }, { "epoch": 0.17, "grad_norm": 1.4227968454360962, "learning_rate": 1.9962147811028516e-05, "loss": 0.1524, "step": 1021 }, { "epoch": 0.17, "grad_norm": 1.768544316291809, "learning_rate": 1.9961990684751774e-05, "loss": 0.1738, "step": 1022 }, { "epoch": 0.17, "grad_norm": 1.317833423614502, "learning_rate": 1.99618332336517e-05, "loss": 0.1562, "step": 1023 }, { "epoch": 0.17, "grad_norm": 1.475756287574768, "learning_rate": 1.9961675457733424e-05, "loss": 0.1368, "step": 1024 }, { "epoch": 0.17, "grad_norm": 1.419943928718567, "learning_rate": 1.996151735700209e-05, "loss": 0.1529, "step": 1025 }, { "epoch": 0.17, "grad_norm": 1.7532223463058472, "learning_rate": 1.9961358931462847e-05, "loss": 0.1603, "step": 1026 }, { "epoch": 0.17, "grad_norm": 1.4777051210403442, "learning_rate": 1.9961200181120873e-05, "loss": 0.1731, "step": 1027 }, { "epoch": 0.17, "grad_norm": 1.5174425840377808, "learning_rate": 1.9961041105981338e-05, "loss": 0.1868, "step": 1028 }, { "epoch": 0.17, "grad_norm": 1.408332109451294, "learning_rate": 1.996088170604943e-05, "loss": 0.1528, "step": 1029 }, { "epoch": 0.17, "grad_norm": 1.4835340976715088, "learning_rate": 1.9960721981330344e-05, "loss": 0.1733, "step": 1030 }, { "epoch": 0.17, "grad_norm": 1.8868507146835327, "learning_rate": 1.9960561931829287e-05, "loss": 0.1734, "step": 1031 }, { "epoch": 0.17, "grad_norm": 1.6506141424179077, "learning_rate": 1.9960401557551484e-05, "loss": 0.15, "step": 1032 }, { "epoch": 0.17, "grad_norm": 1.710518479347229, "learning_rate": 1.996024085850216e-05, "loss": 0.1533, "step": 1033 }, { "epoch": 0.17, "grad_norm": 1.636086106300354, "learning_rate": 1.9960079834686556e-05, "loss": 0.1526, "step": 1034 }, { "epoch": 0.17, "grad_norm": 1.580087661743164, "learning_rate": 1.995991848610992e-05, "loss": 0.174, "step": 1035 }, { "epoch": 0.17, "grad_norm": 1.5077508687973022, "learning_rate": 1.9959756812777513e-05, "loss": 0.1704, "step": 1036 }, { "epoch": 0.17, "grad_norm": 1.5683104991912842, "learning_rate": 1.995959481469461e-05, "loss": 0.1326, "step": 1037 }, { "epoch": 0.17, "grad_norm": 2.0469772815704346, "learning_rate": 1.9959432491866488e-05, "loss": 0.1629, "step": 1038 }, { "epoch": 0.17, "grad_norm": 1.704354166984558, "learning_rate": 1.995926984429845e-05, "loss": 0.1611, "step": 1039 }, { "epoch": 0.17, "grad_norm": 1.54710054397583, "learning_rate": 1.9959106871995787e-05, "loss": 0.1421, "step": 1040 }, { "epoch": 0.17, "grad_norm": 1.6533246040344238, "learning_rate": 1.9958943574963817e-05, "loss": 0.1804, "step": 1041 }, { "epoch": 0.17, "grad_norm": 2.4107563495635986, "learning_rate": 1.995877995320787e-05, "loss": 0.1897, "step": 1042 }, { "epoch": 0.17, "grad_norm": 1.7400805950164795, "learning_rate": 1.9958616006733274e-05, "loss": 0.1659, "step": 1043 }, { "epoch": 0.17, "grad_norm": 1.208361029624939, "learning_rate": 1.9958451735545378e-05, "loss": 0.1386, "step": 1044 }, { "epoch": 0.17, "grad_norm": 1.6144503355026245, "learning_rate": 1.995828713964954e-05, "loss": 0.1328, "step": 1045 }, { "epoch": 0.17, "grad_norm": 1.5079095363616943, "learning_rate": 1.995812221905112e-05, "loss": 0.1658, "step": 1046 }, { "epoch": 0.18, "grad_norm": 1.3696887493133545, "learning_rate": 1.9957956973755504e-05, "loss": 0.1387, "step": 1047 }, { "epoch": 0.18, "grad_norm": 2.101588010787964, "learning_rate": 1.9957791403768076e-05, "loss": 0.2101, "step": 1048 }, { "epoch": 0.18, "grad_norm": 1.8812305927276611, "learning_rate": 1.9957625509094234e-05, "loss": 0.1873, "step": 1049 }, { "epoch": 0.18, "grad_norm": 1.9552996158599854, "learning_rate": 1.9957459289739388e-05, "loss": 0.2009, "step": 1050 }, { "epoch": 0.18, "grad_norm": 1.4650686979293823, "learning_rate": 1.995729274570896e-05, "loss": 0.1679, "step": 1051 }, { "epoch": 0.18, "grad_norm": 1.6061304807662964, "learning_rate": 1.9957125877008373e-05, "loss": 0.1711, "step": 1052 }, { "epoch": 0.18, "grad_norm": 1.991232991218567, "learning_rate": 1.9956958683643077e-05, "loss": 0.1461, "step": 1053 }, { "epoch": 0.18, "grad_norm": 1.9721736907958984, "learning_rate": 1.995679116561852e-05, "loss": 0.1403, "step": 1054 }, { "epoch": 0.18, "grad_norm": 1.7805908918380737, "learning_rate": 1.9956623322940162e-05, "loss": 0.1749, "step": 1055 }, { "epoch": 0.18, "grad_norm": 2.1731491088867188, "learning_rate": 1.9956455155613476e-05, "loss": 0.1728, "step": 1056 }, { "epoch": 0.18, "grad_norm": 1.5651805400848389, "learning_rate": 1.995628666364395e-05, "loss": 0.166, "step": 1057 }, { "epoch": 0.18, "grad_norm": 2.1028740406036377, "learning_rate": 1.9956117847037074e-05, "loss": 0.1441, "step": 1058 }, { "epoch": 0.18, "grad_norm": 1.535136342048645, "learning_rate": 1.9955948705798353e-05, "loss": 0.1795, "step": 1059 }, { "epoch": 0.18, "grad_norm": 1.4276670217514038, "learning_rate": 1.99557792399333e-05, "loss": 0.1441, "step": 1060 }, { "epoch": 0.18, "grad_norm": 1.5951694250106812, "learning_rate": 1.9955609449447446e-05, "loss": 0.1596, "step": 1061 }, { "epoch": 0.18, "grad_norm": 1.6827080249786377, "learning_rate": 1.995543933434632e-05, "loss": 0.1559, "step": 1062 }, { "epoch": 0.18, "grad_norm": 1.6288617849349976, "learning_rate": 1.9955268894635478e-05, "loss": 0.1772, "step": 1063 }, { "epoch": 0.18, "grad_norm": 1.5220812559127808, "learning_rate": 1.995509813032047e-05, "loss": 0.1605, "step": 1064 }, { "epoch": 0.18, "grad_norm": 1.568711519241333, "learning_rate": 1.9954927041406865e-05, "loss": 0.1638, "step": 1065 }, { "epoch": 0.18, "grad_norm": 1.8736315965652466, "learning_rate": 1.9954755627900246e-05, "loss": 0.1742, "step": 1066 }, { "epoch": 0.18, "grad_norm": 1.5726170539855957, "learning_rate": 1.9954583889806197e-05, "loss": 0.1633, "step": 1067 }, { "epoch": 0.18, "grad_norm": 1.4197813272476196, "learning_rate": 1.995441182713032e-05, "loss": 0.1783, "step": 1068 }, { "epoch": 0.18, "grad_norm": 1.771365761756897, "learning_rate": 1.9954239439878226e-05, "loss": 0.1809, "step": 1069 }, { "epoch": 0.18, "grad_norm": 1.8110594749450684, "learning_rate": 1.9954066728055538e-05, "loss": 0.1709, "step": 1070 }, { "epoch": 0.18, "grad_norm": 1.3651467561721802, "learning_rate": 1.9953893691667877e-05, "loss": 0.1409, "step": 1071 }, { "epoch": 0.18, "grad_norm": 1.4076831340789795, "learning_rate": 1.9953720330720897e-05, "loss": 0.1563, "step": 1072 }, { "epoch": 0.18, "grad_norm": 1.5080311298370361, "learning_rate": 1.9953546645220247e-05, "loss": 0.1421, "step": 1073 }, { "epoch": 0.18, "grad_norm": 1.5641695261001587, "learning_rate": 1.995337263517159e-05, "loss": 0.1424, "step": 1074 }, { "epoch": 0.18, "grad_norm": 1.4891395568847656, "learning_rate": 1.9953198300580597e-05, "loss": 0.1634, "step": 1075 }, { "epoch": 0.18, "grad_norm": 1.675120234489441, "learning_rate": 1.9953023641452955e-05, "loss": 0.1532, "step": 1076 }, { "epoch": 0.18, "grad_norm": 1.3900964260101318, "learning_rate": 1.995284865779436e-05, "loss": 0.1613, "step": 1077 }, { "epoch": 0.18, "grad_norm": 1.6070486307144165, "learning_rate": 1.9952673349610517e-05, "loss": 0.1521, "step": 1078 }, { "epoch": 0.18, "grad_norm": 1.8656409978866577, "learning_rate": 1.995249771690714e-05, "loss": 0.1508, "step": 1079 }, { "epoch": 0.18, "grad_norm": 1.5639102458953857, "learning_rate": 1.9952321759689955e-05, "loss": 0.1476, "step": 1080 }, { "epoch": 0.18, "grad_norm": 1.4167678356170654, "learning_rate": 1.9952145477964705e-05, "loss": 0.1336, "step": 1081 }, { "epoch": 0.18, "grad_norm": 1.7882224321365356, "learning_rate": 1.995196887173713e-05, "loss": 0.1589, "step": 1082 }, { "epoch": 0.18, "grad_norm": 1.6170654296875, "learning_rate": 1.9951791941012997e-05, "loss": 0.161, "step": 1083 }, { "epoch": 0.18, "grad_norm": 1.5231603384017944, "learning_rate": 1.995161468579807e-05, "loss": 0.1652, "step": 1084 }, { "epoch": 0.18, "grad_norm": 1.7340580224990845, "learning_rate": 1.9951437106098132e-05, "loss": 0.1739, "step": 1085 }, { "epoch": 0.18, "grad_norm": 1.4459396600723267, "learning_rate": 1.9951259201918964e-05, "loss": 0.1521, "step": 1086 }, { "epoch": 0.18, "grad_norm": 2.0289053916931152, "learning_rate": 1.9951080973266377e-05, "loss": 0.1884, "step": 1087 }, { "epoch": 0.18, "grad_norm": 1.3765407800674438, "learning_rate": 1.995090242014618e-05, "loss": 0.1658, "step": 1088 }, { "epoch": 0.18, "grad_norm": 1.3227051496505737, "learning_rate": 1.9950723542564195e-05, "loss": 0.1552, "step": 1089 }, { "epoch": 0.18, "grad_norm": 1.9666852951049805, "learning_rate": 1.9950544340526247e-05, "loss": 0.1921, "step": 1090 }, { "epoch": 0.18, "grad_norm": 1.3013354539871216, "learning_rate": 1.995036481403819e-05, "loss": 0.1659, "step": 1091 }, { "epoch": 0.18, "grad_norm": 1.231918454170227, "learning_rate": 1.9950184963105874e-05, "loss": 0.1431, "step": 1092 }, { "epoch": 0.18, "grad_norm": 1.6137375831604004, "learning_rate": 1.9950004787735164e-05, "loss": 0.1799, "step": 1093 }, { "epoch": 0.18, "grad_norm": 1.4516890048980713, "learning_rate": 1.994982428793193e-05, "loss": 0.1695, "step": 1094 }, { "epoch": 0.18, "grad_norm": 1.8553893566131592, "learning_rate": 1.994964346370206e-05, "loss": 0.1822, "step": 1095 }, { "epoch": 0.18, "grad_norm": 1.6379262208938599, "learning_rate": 1.994946231505145e-05, "loss": 0.1651, "step": 1096 }, { "epoch": 0.18, "grad_norm": 1.3875019550323486, "learning_rate": 1.994928084198601e-05, "loss": 0.1733, "step": 1097 }, { "epoch": 0.18, "grad_norm": 1.3028923273086548, "learning_rate": 1.994909904451165e-05, "loss": 0.1303, "step": 1098 }, { "epoch": 0.18, "grad_norm": 1.417746663093567, "learning_rate": 1.994891692263431e-05, "loss": 0.1696, "step": 1099 }, { "epoch": 0.18, "grad_norm": 2.0040969848632812, "learning_rate": 1.9948734476359916e-05, "loss": 0.1609, "step": 1100 }, { "epoch": 0.18, "grad_norm": 1.315317153930664, "learning_rate": 1.994855170569442e-05, "loss": 0.1314, "step": 1101 }, { "epoch": 0.18, "grad_norm": 1.6284143924713135, "learning_rate": 1.9948368610643785e-05, "loss": 0.1693, "step": 1102 }, { "epoch": 0.18, "grad_norm": 1.5845694541931152, "learning_rate": 1.9948185191213977e-05, "loss": 0.1542, "step": 1103 }, { "epoch": 0.18, "grad_norm": 1.6306982040405273, "learning_rate": 1.994800144741098e-05, "loss": 0.1619, "step": 1104 }, { "epoch": 0.18, "grad_norm": 1.395944356918335, "learning_rate": 1.9947817379240783e-05, "loss": 0.16, "step": 1105 }, { "epoch": 0.18, "grad_norm": 1.723220944404602, "learning_rate": 1.994763298670939e-05, "loss": 0.1664, "step": 1106 }, { "epoch": 0.19, "grad_norm": 1.5232261419296265, "learning_rate": 1.994744826982281e-05, "loss": 0.1562, "step": 1107 }, { "epoch": 0.19, "grad_norm": 1.9801889657974243, "learning_rate": 1.9947263228587067e-05, "loss": 0.1743, "step": 1108 }, { "epoch": 0.19, "grad_norm": 2.0467731952667236, "learning_rate": 1.9947077863008197e-05, "loss": 0.1522, "step": 1109 }, { "epoch": 0.19, "grad_norm": 1.5303490161895752, "learning_rate": 1.9946892173092242e-05, "loss": 0.1701, "step": 1110 }, { "epoch": 0.19, "grad_norm": 1.6939035654067993, "learning_rate": 1.994670615884526e-05, "loss": 0.1483, "step": 1111 }, { "epoch": 0.19, "grad_norm": 1.219974398612976, "learning_rate": 1.9946519820273308e-05, "loss": 0.1214, "step": 1112 }, { "epoch": 0.19, "grad_norm": 1.5573337078094482, "learning_rate": 1.9946333157382468e-05, "loss": 0.1277, "step": 1113 }, { "epoch": 0.19, "grad_norm": 1.6327489614486694, "learning_rate": 1.9946146170178827e-05, "loss": 0.1546, "step": 1114 }, { "epoch": 0.19, "grad_norm": 1.5438631772994995, "learning_rate": 1.994595885866848e-05, "loss": 0.1438, "step": 1115 }, { "epoch": 0.19, "grad_norm": 1.659044861793518, "learning_rate": 1.994577122285754e-05, "loss": 0.177, "step": 1116 }, { "epoch": 0.19, "grad_norm": 1.519830346107483, "learning_rate": 1.994558326275211e-05, "loss": 0.1713, "step": 1117 }, { "epoch": 0.19, "grad_norm": 1.3184986114501953, "learning_rate": 1.9945394978358335e-05, "loss": 0.1471, "step": 1118 }, { "epoch": 0.19, "grad_norm": 2.224078893661499, "learning_rate": 1.9945206369682346e-05, "loss": 0.1419, "step": 1119 }, { "epoch": 0.19, "grad_norm": 2.072798728942871, "learning_rate": 1.9945017436730295e-05, "loss": 0.1515, "step": 1120 }, { "epoch": 0.19, "grad_norm": 1.6016703844070435, "learning_rate": 1.994482817950834e-05, "loss": 0.1769, "step": 1121 }, { "epoch": 0.19, "grad_norm": 1.3575259447097778, "learning_rate": 1.9944638598022658e-05, "loss": 0.1478, "step": 1122 }, { "epoch": 0.19, "grad_norm": 1.4177204370498657, "learning_rate": 1.9944448692279424e-05, "loss": 0.1634, "step": 1123 }, { "epoch": 0.19, "grad_norm": 1.7674646377563477, "learning_rate": 1.9944258462284836e-05, "loss": 0.1953, "step": 1124 }, { "epoch": 0.19, "grad_norm": 1.5798730850219727, "learning_rate": 1.9944067908045094e-05, "loss": 0.1611, "step": 1125 }, { "epoch": 0.19, "grad_norm": 1.4827150106430054, "learning_rate": 1.9943877029566404e-05, "loss": 0.1407, "step": 1126 }, { "epoch": 0.19, "grad_norm": 1.9178358316421509, "learning_rate": 1.9943685826855002e-05, "loss": 0.1633, "step": 1127 }, { "epoch": 0.19, "grad_norm": 1.3591210842132568, "learning_rate": 1.9943494299917118e-05, "loss": 0.149, "step": 1128 }, { "epoch": 0.19, "grad_norm": 1.8860262632369995, "learning_rate": 1.9943302448758992e-05, "loss": 0.1353, "step": 1129 }, { "epoch": 0.19, "grad_norm": 1.6653883457183838, "learning_rate": 1.9943110273386887e-05, "loss": 0.1457, "step": 1130 }, { "epoch": 0.19, "grad_norm": 1.4775331020355225, "learning_rate": 1.9942917773807062e-05, "loss": 0.1592, "step": 1131 }, { "epoch": 0.19, "grad_norm": 1.5247011184692383, "learning_rate": 1.99427249500258e-05, "loss": 0.1306, "step": 1132 }, { "epoch": 0.19, "grad_norm": 1.5765807628631592, "learning_rate": 1.9942531802049385e-05, "loss": 0.1474, "step": 1133 }, { "epoch": 0.19, "grad_norm": 1.5848685503005981, "learning_rate": 1.9942338329884114e-05, "loss": 0.1561, "step": 1134 }, { "epoch": 0.19, "grad_norm": 2.300499439239502, "learning_rate": 1.99421445335363e-05, "loss": 0.1996, "step": 1135 }, { "epoch": 0.19, "grad_norm": 1.4587020874023438, "learning_rate": 1.9941950413012255e-05, "loss": 0.144, "step": 1136 }, { "epoch": 0.19, "grad_norm": 1.385151982307434, "learning_rate": 1.9941755968318314e-05, "loss": 0.1532, "step": 1137 }, { "epoch": 0.19, "grad_norm": 1.6278586387634277, "learning_rate": 1.9941561199460815e-05, "loss": 0.1596, "step": 1138 }, { "epoch": 0.19, "grad_norm": 1.4834437370300293, "learning_rate": 1.994136610644611e-05, "loss": 0.1579, "step": 1139 }, { "epoch": 0.19, "grad_norm": 1.6009408235549927, "learning_rate": 1.994117068928056e-05, "loss": 0.1477, "step": 1140 }, { "epoch": 0.19, "grad_norm": 1.4534316062927246, "learning_rate": 1.9940974947970536e-05, "loss": 0.178, "step": 1141 }, { "epoch": 0.19, "grad_norm": 1.6322578191757202, "learning_rate": 1.994077888252242e-05, "loss": 0.1794, "step": 1142 }, { "epoch": 0.19, "grad_norm": 2.2363481521606445, "learning_rate": 1.9940582492942605e-05, "loss": 0.1843, "step": 1143 }, { "epoch": 0.19, "grad_norm": 1.659375548362732, "learning_rate": 1.9940385779237496e-05, "loss": 0.1302, "step": 1144 }, { "epoch": 0.19, "grad_norm": 1.2971563339233398, "learning_rate": 1.9940188741413505e-05, "loss": 0.1297, "step": 1145 }, { "epoch": 0.19, "grad_norm": 1.3697905540466309, "learning_rate": 1.993999137947706e-05, "loss": 0.1555, "step": 1146 }, { "epoch": 0.19, "grad_norm": 1.8674474954605103, "learning_rate": 1.993979369343459e-05, "loss": 0.1581, "step": 1147 }, { "epoch": 0.19, "grad_norm": 1.4669685363769531, "learning_rate": 1.993959568329255e-05, "loss": 0.1268, "step": 1148 }, { "epoch": 0.19, "grad_norm": 1.2280173301696777, "learning_rate": 1.9939397349057388e-05, "loss": 0.1334, "step": 1149 }, { "epoch": 0.19, "grad_norm": 1.9641133546829224, "learning_rate": 1.9939198690735577e-05, "loss": 0.1623, "step": 1150 }, { "epoch": 0.19, "grad_norm": 1.877835750579834, "learning_rate": 1.993899970833359e-05, "loss": 0.1645, "step": 1151 }, { "epoch": 0.19, "grad_norm": 1.490617036819458, "learning_rate": 1.9938800401857915e-05, "loss": 0.1525, "step": 1152 }, { "epoch": 0.19, "grad_norm": 1.5312275886535645, "learning_rate": 1.9938600771315055e-05, "loss": 0.145, "step": 1153 }, { "epoch": 0.19, "grad_norm": 1.6605381965637207, "learning_rate": 1.9938400816711518e-05, "loss": 0.1462, "step": 1154 }, { "epoch": 0.19, "grad_norm": 1.5309524536132812, "learning_rate": 1.993820053805382e-05, "loss": 0.1523, "step": 1155 }, { "epoch": 0.19, "grad_norm": 1.6714123487472534, "learning_rate": 1.9937999935348494e-05, "loss": 0.1746, "step": 1156 }, { "epoch": 0.19, "grad_norm": 1.8115383386611938, "learning_rate": 1.993779900860208e-05, "loss": 0.1481, "step": 1157 }, { "epoch": 0.19, "grad_norm": 1.3487666845321655, "learning_rate": 1.9937597757821132e-05, "loss": 0.1551, "step": 1158 }, { "epoch": 0.19, "grad_norm": 1.659530520439148, "learning_rate": 1.9937396183012208e-05, "loss": 0.1662, "step": 1159 }, { "epoch": 0.19, "grad_norm": 1.5555901527404785, "learning_rate": 1.9937194284181885e-05, "loss": 0.1655, "step": 1160 }, { "epoch": 0.19, "grad_norm": 1.474568247795105, "learning_rate": 1.9936992061336743e-05, "loss": 0.1518, "step": 1161 }, { "epoch": 0.19, "grad_norm": 1.5927197933197021, "learning_rate": 1.9936789514483376e-05, "loss": 0.1634, "step": 1162 }, { "epoch": 0.19, "grad_norm": 1.6159560680389404, "learning_rate": 1.993658664362839e-05, "loss": 0.1527, "step": 1163 }, { "epoch": 0.19, "grad_norm": 1.3461894989013672, "learning_rate": 1.99363834487784e-05, "loss": 0.1274, "step": 1164 }, { "epoch": 0.19, "grad_norm": 1.3550236225128174, "learning_rate": 1.9936179929940027e-05, "loss": 0.1551, "step": 1165 }, { "epoch": 0.2, "grad_norm": 1.790219783782959, "learning_rate": 1.993597608711991e-05, "loss": 0.1516, "step": 1166 }, { "epoch": 0.2, "grad_norm": 1.1535733938217163, "learning_rate": 1.99357719203247e-05, "loss": 0.135, "step": 1167 }, { "epoch": 0.2, "grad_norm": 1.636282205581665, "learning_rate": 1.9935567429561045e-05, "loss": 0.1545, "step": 1168 }, { "epoch": 0.2, "grad_norm": 1.5581169128417969, "learning_rate": 1.9935362614835624e-05, "loss": 0.1682, "step": 1169 }, { "epoch": 0.2, "grad_norm": 1.4720131158828735, "learning_rate": 1.9935157476155103e-05, "loss": 0.1253, "step": 1170 }, { "epoch": 0.2, "grad_norm": 1.2834043502807617, "learning_rate": 1.993495201352618e-05, "loss": 0.1425, "step": 1171 }, { "epoch": 0.2, "grad_norm": 1.5259215831756592, "learning_rate": 1.9934746226955553e-05, "loss": 0.1517, "step": 1172 }, { "epoch": 0.2, "grad_norm": 1.475020408630371, "learning_rate": 1.9934540116449927e-05, "loss": 0.1601, "step": 1173 }, { "epoch": 0.2, "grad_norm": 1.379341721534729, "learning_rate": 1.993433368201602e-05, "loss": 0.1386, "step": 1174 }, { "epoch": 0.2, "grad_norm": 1.4557199478149414, "learning_rate": 1.9934126923660576e-05, "loss": 0.1425, "step": 1175 }, { "epoch": 0.2, "grad_norm": 1.5019620656967163, "learning_rate": 1.993391984139033e-05, "loss": 0.165, "step": 1176 }, { "epoch": 0.2, "grad_norm": 1.3336069583892822, "learning_rate": 1.9933712435212028e-05, "loss": 0.1446, "step": 1177 }, { "epoch": 0.2, "grad_norm": 1.5729424953460693, "learning_rate": 1.993350470513244e-05, "loss": 0.1367, "step": 1178 }, { "epoch": 0.2, "grad_norm": 1.4622029066085815, "learning_rate": 1.9933296651158337e-05, "loss": 0.1726, "step": 1179 }, { "epoch": 0.2, "grad_norm": 1.2801045179367065, "learning_rate": 1.9933088273296506e-05, "loss": 0.1556, "step": 1180 }, { "epoch": 0.2, "grad_norm": 1.5990269184112549, "learning_rate": 1.993287957155374e-05, "loss": 0.1518, "step": 1181 }, { "epoch": 0.2, "grad_norm": 1.2312610149383545, "learning_rate": 1.9932670545936835e-05, "loss": 0.1554, "step": 1182 }, { "epoch": 0.2, "grad_norm": 1.915045976638794, "learning_rate": 1.993246119645262e-05, "loss": 0.1989, "step": 1183 }, { "epoch": 0.2, "grad_norm": 1.1826263666152954, "learning_rate": 1.9932251523107916e-05, "loss": 0.1556, "step": 1184 }, { "epoch": 0.2, "grad_norm": 1.28877592086792, "learning_rate": 1.9932041525909557e-05, "loss": 0.1453, "step": 1185 }, { "epoch": 0.2, "grad_norm": 1.4926176071166992, "learning_rate": 1.993183120486439e-05, "loss": 0.1662, "step": 1186 }, { "epoch": 0.2, "grad_norm": 1.3897709846496582, "learning_rate": 1.9931620559979277e-05, "loss": 0.128, "step": 1187 }, { "epoch": 0.2, "grad_norm": 1.3372524976730347, "learning_rate": 1.9931409591261084e-05, "loss": 0.1486, "step": 1188 }, { "epoch": 0.2, "grad_norm": 1.503157138824463, "learning_rate": 1.993119829871669e-05, "loss": 0.134, "step": 1189 }, { "epoch": 0.2, "grad_norm": 1.4440542459487915, "learning_rate": 1.9930986682352986e-05, "loss": 0.156, "step": 1190 }, { "epoch": 0.2, "grad_norm": 1.2669135332107544, "learning_rate": 1.993077474217687e-05, "loss": 0.1378, "step": 1191 }, { "epoch": 0.2, "grad_norm": 1.148877739906311, "learning_rate": 1.9930562478195254e-05, "loss": 0.127, "step": 1192 }, { "epoch": 0.2, "grad_norm": 1.804958462715149, "learning_rate": 1.9930349890415056e-05, "loss": 0.1908, "step": 1193 }, { "epoch": 0.2, "grad_norm": 1.5851022005081177, "learning_rate": 1.993013697884321e-05, "loss": 0.1482, "step": 1194 }, { "epoch": 0.2, "grad_norm": 2.4448118209838867, "learning_rate": 1.9929923743486663e-05, "loss": 0.1545, "step": 1195 }, { "epoch": 0.2, "grad_norm": 1.4734019041061401, "learning_rate": 1.9929710184352358e-05, "loss": 0.1518, "step": 1196 }, { "epoch": 0.2, "grad_norm": 1.3371617794036865, "learning_rate": 1.9929496301447268e-05, "loss": 0.1419, "step": 1197 }, { "epoch": 0.2, "grad_norm": 1.8880445957183838, "learning_rate": 1.992928209477836e-05, "loss": 0.1494, "step": 1198 }, { "epoch": 0.2, "grad_norm": 1.59600830078125, "learning_rate": 1.9929067564352616e-05, "loss": 0.1928, "step": 1199 }, { "epoch": 0.2, "grad_norm": 1.526594877243042, "learning_rate": 1.992885271017704e-05, "loss": 0.1486, "step": 1200 }, { "epoch": 0.2, "grad_norm": 1.4546867609024048, "learning_rate": 1.9928637532258637e-05, "loss": 0.1493, "step": 1201 }, { "epoch": 0.2, "grad_norm": 1.1193697452545166, "learning_rate": 1.9928422030604416e-05, "loss": 0.1312, "step": 1202 }, { "epoch": 0.2, "grad_norm": 1.2094162702560425, "learning_rate": 1.992820620522141e-05, "loss": 0.1292, "step": 1203 }, { "epoch": 0.2, "grad_norm": 1.6842880249023438, "learning_rate": 1.9927990056116653e-05, "loss": 0.2083, "step": 1204 }, { "epoch": 0.2, "grad_norm": 1.5017935037612915, "learning_rate": 1.9927773583297193e-05, "loss": 0.1469, "step": 1205 }, { "epoch": 0.2, "grad_norm": 1.3293187618255615, "learning_rate": 1.992755678677009e-05, "loss": 0.1338, "step": 1206 }, { "epoch": 0.2, "grad_norm": 1.7964736223220825, "learning_rate": 1.9927339666542408e-05, "loss": 0.1264, "step": 1207 }, { "epoch": 0.2, "grad_norm": 1.4356396198272705, "learning_rate": 1.9927122222621232e-05, "loss": 0.1442, "step": 1208 }, { "epoch": 0.2, "grad_norm": 1.3952919244766235, "learning_rate": 1.992690445501365e-05, "loss": 0.1495, "step": 1209 }, { "epoch": 0.2, "grad_norm": 1.9118924140930176, "learning_rate": 1.9926686363726764e-05, "loss": 0.1372, "step": 1210 }, { "epoch": 0.2, "grad_norm": 1.9340282678604126, "learning_rate": 1.9926467948767682e-05, "loss": 0.1736, "step": 1211 }, { "epoch": 0.2, "grad_norm": 1.496254563331604, "learning_rate": 1.992624921014353e-05, "loss": 0.1443, "step": 1212 }, { "epoch": 0.2, "grad_norm": 1.86516273021698, "learning_rate": 1.992603014786144e-05, "loss": 0.1573, "step": 1213 }, { "epoch": 0.2, "grad_norm": 1.2455390691757202, "learning_rate": 1.9925810761928547e-05, "loss": 0.1255, "step": 1214 }, { "epoch": 0.2, "grad_norm": 1.2709070444107056, "learning_rate": 1.9925591052352016e-05, "loss": 0.1376, "step": 1215 }, { "epoch": 0.2, "grad_norm": 1.7767175436019897, "learning_rate": 1.9925371019139002e-05, "loss": 0.1482, "step": 1216 }, { "epoch": 0.2, "grad_norm": 1.4523648023605347, "learning_rate": 1.992515066229668e-05, "loss": 0.1484, "step": 1217 }, { "epoch": 0.2, "grad_norm": 1.4845569133758545, "learning_rate": 1.9924929981832243e-05, "loss": 0.1501, "step": 1218 }, { "epoch": 0.2, "grad_norm": 1.6276506185531616, "learning_rate": 1.992470897775288e-05, "loss": 0.1531, "step": 1219 }, { "epoch": 0.2, "grad_norm": 1.6598706245422363, "learning_rate": 1.9924487650065796e-05, "loss": 0.1822, "step": 1220 }, { "epoch": 0.2, "grad_norm": 1.6242096424102783, "learning_rate": 1.9924265998778208e-05, "loss": 0.158, "step": 1221 }, { "epoch": 0.2, "grad_norm": 1.428911566734314, "learning_rate": 1.9924044023897352e-05, "loss": 0.1477, "step": 1222 }, { "epoch": 0.2, "grad_norm": 1.5772165060043335, "learning_rate": 1.9923821725430452e-05, "loss": 0.1734, "step": 1223 }, { "epoch": 0.2, "grad_norm": 1.318467140197754, "learning_rate": 1.9923599103384766e-05, "loss": 0.1396, "step": 1224 }, { "epoch": 0.2, "grad_norm": 1.6271109580993652, "learning_rate": 1.992337615776755e-05, "loss": 0.1346, "step": 1225 }, { "epoch": 0.21, "grad_norm": 1.2585912942886353, "learning_rate": 1.9923152888586074e-05, "loss": 0.1443, "step": 1226 }, { "epoch": 0.21, "grad_norm": 1.2858521938323975, "learning_rate": 1.9922929295847617e-05, "loss": 0.1367, "step": 1227 }, { "epoch": 0.21, "grad_norm": 1.6517928838729858, "learning_rate": 1.992270537955947e-05, "loss": 0.1493, "step": 1228 }, { "epoch": 0.21, "grad_norm": 1.7115942239761353, "learning_rate": 1.9922481139728933e-05, "loss": 0.1366, "step": 1229 }, { "epoch": 0.21, "grad_norm": 1.5841681957244873, "learning_rate": 1.992225657636332e-05, "loss": 0.1544, "step": 1230 }, { "epoch": 0.21, "grad_norm": 1.3332716226577759, "learning_rate": 1.992203168946995e-05, "loss": 0.1381, "step": 1231 }, { "epoch": 0.21, "grad_norm": 1.2644309997558594, "learning_rate": 1.992180647905616e-05, "loss": 0.1224, "step": 1232 }, { "epoch": 0.21, "grad_norm": 1.5348005294799805, "learning_rate": 1.992158094512929e-05, "loss": 0.1581, "step": 1233 }, { "epoch": 0.21, "grad_norm": 1.500811219215393, "learning_rate": 1.9921355087696695e-05, "loss": 0.1195, "step": 1234 }, { "epoch": 0.21, "grad_norm": 1.58860182762146, "learning_rate": 1.9921128906765736e-05, "loss": 0.1402, "step": 1235 }, { "epoch": 0.21, "grad_norm": 1.4961986541748047, "learning_rate": 1.9920902402343794e-05, "loss": 0.187, "step": 1236 }, { "epoch": 0.21, "grad_norm": 1.464724063873291, "learning_rate": 1.9920675574438252e-05, "loss": 0.1372, "step": 1237 }, { "epoch": 0.21, "grad_norm": 1.490308165550232, "learning_rate": 1.9920448423056505e-05, "loss": 0.1368, "step": 1238 }, { "epoch": 0.21, "grad_norm": 1.7418569326400757, "learning_rate": 1.9920220948205958e-05, "loss": 0.1491, "step": 1239 }, { "epoch": 0.21, "grad_norm": 1.0660380125045776, "learning_rate": 1.991999314989403e-05, "loss": 0.1176, "step": 1240 }, { "epoch": 0.21, "grad_norm": 1.3879656791687012, "learning_rate": 1.9919765028128154e-05, "loss": 0.1432, "step": 1241 }, { "epoch": 0.21, "grad_norm": 1.5173072814941406, "learning_rate": 1.9919536582915758e-05, "loss": 0.1476, "step": 1242 }, { "epoch": 0.21, "grad_norm": 1.2158089876174927, "learning_rate": 1.9919307814264295e-05, "loss": 0.1097, "step": 1243 }, { "epoch": 0.21, "grad_norm": 1.4555342197418213, "learning_rate": 1.9919078722181226e-05, "loss": 0.1524, "step": 1244 }, { "epoch": 0.21, "grad_norm": 1.6600561141967773, "learning_rate": 1.9918849306674022e-05, "loss": 0.1394, "step": 1245 }, { "epoch": 0.21, "grad_norm": 1.1972624063491821, "learning_rate": 1.991861956775016e-05, "loss": 0.1359, "step": 1246 }, { "epoch": 0.21, "grad_norm": 1.2915196418762207, "learning_rate": 1.991838950541713e-05, "loss": 0.1072, "step": 1247 }, { "epoch": 0.21, "grad_norm": 1.6521369218826294, "learning_rate": 1.991815911968244e-05, "loss": 0.1553, "step": 1248 }, { "epoch": 0.21, "grad_norm": 1.4047077894210815, "learning_rate": 1.9917928410553594e-05, "loss": 0.1775, "step": 1249 }, { "epoch": 0.21, "grad_norm": 1.3453375101089478, "learning_rate": 1.9917697378038122e-05, "loss": 0.1604, "step": 1250 }, { "epoch": 0.21, "grad_norm": 1.7235307693481445, "learning_rate": 1.991746602214355e-05, "loss": 0.1824, "step": 1251 }, { "epoch": 0.21, "grad_norm": 1.6882572174072266, "learning_rate": 1.9917234342877422e-05, "loss": 0.1545, "step": 1252 }, { "epoch": 0.21, "grad_norm": 1.2414004802703857, "learning_rate": 1.9917002340247302e-05, "loss": 0.1244, "step": 1253 }, { "epoch": 0.21, "grad_norm": 1.0892831087112427, "learning_rate": 1.991677001426074e-05, "loss": 0.1232, "step": 1254 }, { "epoch": 0.21, "grad_norm": 1.3698798418045044, "learning_rate": 1.9916537364925327e-05, "loss": 0.1371, "step": 1255 }, { "epoch": 0.21, "grad_norm": 1.7849063873291016, "learning_rate": 1.991630439224864e-05, "loss": 0.1282, "step": 1256 }, { "epoch": 0.21, "grad_norm": 1.3773444890975952, "learning_rate": 1.9916071096238272e-05, "loss": 0.138, "step": 1257 }, { "epoch": 0.21, "grad_norm": 1.3638261556625366, "learning_rate": 1.9915837476901834e-05, "loss": 0.1554, "step": 1258 }, { "epoch": 0.21, "grad_norm": 1.261033058166504, "learning_rate": 1.9915603534246947e-05, "loss": 0.133, "step": 1259 }, { "epoch": 0.21, "grad_norm": 1.371886968612671, "learning_rate": 1.9915369268281238e-05, "loss": 0.1323, "step": 1260 }, { "epoch": 0.21, "grad_norm": 1.2897406816482544, "learning_rate": 1.991513467901234e-05, "loss": 0.1201, "step": 1261 }, { "epoch": 0.21, "grad_norm": 1.086983323097229, "learning_rate": 1.9914899766447905e-05, "loss": 0.1278, "step": 1262 }, { "epoch": 0.21, "grad_norm": 1.2082051038742065, "learning_rate": 1.9914664530595593e-05, "loss": 0.1278, "step": 1263 }, { "epoch": 0.21, "grad_norm": 1.388686180114746, "learning_rate": 1.9914428971463077e-05, "loss": 0.1199, "step": 1264 }, { "epoch": 0.21, "grad_norm": 1.058525562286377, "learning_rate": 1.991419308905803e-05, "loss": 0.1236, "step": 1265 }, { "epoch": 0.21, "grad_norm": 1.3793017864227295, "learning_rate": 1.9913956883388153e-05, "loss": 0.1232, "step": 1266 }, { "epoch": 0.21, "grad_norm": 1.3517729043960571, "learning_rate": 1.991372035446114e-05, "loss": 0.1569, "step": 1267 }, { "epoch": 0.21, "grad_norm": 1.521315574645996, "learning_rate": 1.991348350228471e-05, "loss": 0.154, "step": 1268 }, { "epoch": 0.21, "grad_norm": 1.408871054649353, "learning_rate": 1.9913246326866576e-05, "loss": 0.1596, "step": 1269 }, { "epoch": 0.21, "grad_norm": 1.518484115600586, "learning_rate": 1.991300882821448e-05, "loss": 0.1378, "step": 1270 }, { "epoch": 0.21, "grad_norm": 1.9219075441360474, "learning_rate": 1.9912771006336163e-05, "loss": 0.1474, "step": 1271 }, { "epoch": 0.21, "grad_norm": 1.3637950420379639, "learning_rate": 1.991253286123938e-05, "loss": 0.1475, "step": 1272 }, { "epoch": 0.21, "grad_norm": 1.0768096446990967, "learning_rate": 1.9912294392931895e-05, "loss": 0.1157, "step": 1273 }, { "epoch": 0.21, "grad_norm": 1.9275481700897217, "learning_rate": 1.9912055601421486e-05, "loss": 0.145, "step": 1274 }, { "epoch": 0.21, "grad_norm": 1.491003155708313, "learning_rate": 1.991181648671594e-05, "loss": 0.162, "step": 1275 }, { "epoch": 0.21, "grad_norm": 1.3400321006774902, "learning_rate": 1.9911577048823048e-05, "loss": 0.1798, "step": 1276 }, { "epoch": 0.21, "grad_norm": 1.2250605821609497, "learning_rate": 1.991133728775062e-05, "loss": 0.1446, "step": 1277 }, { "epoch": 0.21, "grad_norm": 1.3418763875961304, "learning_rate": 1.9911097203506474e-05, "loss": 0.1577, "step": 1278 }, { "epoch": 0.21, "grad_norm": 1.2442626953125, "learning_rate": 1.9910856796098443e-05, "loss": 0.1288, "step": 1279 }, { "epoch": 0.21, "grad_norm": 1.457796573638916, "learning_rate": 1.9910616065534356e-05, "loss": 0.1385, "step": 1280 }, { "epoch": 0.21, "grad_norm": 1.3415074348449707, "learning_rate": 1.991037501182207e-05, "loss": 0.156, "step": 1281 }, { "epoch": 0.21, "grad_norm": 1.5740916728973389, "learning_rate": 1.991013363496944e-05, "loss": 0.1536, "step": 1282 }, { "epoch": 0.21, "grad_norm": 1.4465585947036743, "learning_rate": 1.990989193498434e-05, "loss": 0.1518, "step": 1283 }, { "epoch": 0.21, "grad_norm": 1.2863688468933105, "learning_rate": 1.990964991187465e-05, "loss": 0.1309, "step": 1284 }, { "epoch": 0.21, "grad_norm": 1.6837395429611206, "learning_rate": 1.9909407565648262e-05, "loss": 0.1319, "step": 1285 }, { "epoch": 0.22, "grad_norm": 1.1879042387008667, "learning_rate": 1.9909164896313077e-05, "loss": 0.1243, "step": 1286 }, { "epoch": 0.22, "grad_norm": 1.489073395729065, "learning_rate": 1.9908921903877006e-05, "loss": 0.1463, "step": 1287 }, { "epoch": 0.22, "grad_norm": 1.155023455619812, "learning_rate": 1.9908678588347975e-05, "loss": 0.1154, "step": 1288 }, { "epoch": 0.22, "grad_norm": 1.6873313188552856, "learning_rate": 1.9908434949733917e-05, "loss": 0.1544, "step": 1289 }, { "epoch": 0.22, "grad_norm": 1.1973923444747925, "learning_rate": 1.9908190988042774e-05, "loss": 0.1139, "step": 1290 }, { "epoch": 0.22, "grad_norm": 1.7181837558746338, "learning_rate": 1.99079467032825e-05, "loss": 0.137, "step": 1291 }, { "epoch": 0.22, "grad_norm": 1.769284963607788, "learning_rate": 1.990770209546107e-05, "loss": 0.1442, "step": 1292 }, { "epoch": 0.22, "grad_norm": 1.2892777919769287, "learning_rate": 1.9907457164586445e-05, "loss": 0.1363, "step": 1293 }, { "epoch": 0.22, "grad_norm": 1.573953628540039, "learning_rate": 1.990721191066662e-05, "loss": 0.1387, "step": 1294 }, { "epoch": 0.22, "grad_norm": 1.4329125881195068, "learning_rate": 1.9906966333709593e-05, "loss": 0.1489, "step": 1295 }, { "epoch": 0.22, "grad_norm": 1.269195556640625, "learning_rate": 1.9906720433723365e-05, "loss": 0.139, "step": 1296 }, { "epoch": 0.22, "grad_norm": 1.4685947895050049, "learning_rate": 1.990647421071596e-05, "loss": 0.1363, "step": 1297 }, { "epoch": 0.22, "grad_norm": 1.6479895114898682, "learning_rate": 1.99062276646954e-05, "loss": 0.1629, "step": 1298 }, { "epoch": 0.22, "grad_norm": 1.1610164642333984, "learning_rate": 1.9905980795669734e-05, "loss": 0.1219, "step": 1299 }, { "epoch": 0.22, "grad_norm": 1.089794397354126, "learning_rate": 1.9905733603647e-05, "loss": 0.118, "step": 1300 }, { "epoch": 0.22, "grad_norm": 1.3625043630599976, "learning_rate": 1.9905486088635265e-05, "loss": 0.1127, "step": 1301 }, { "epoch": 0.22, "grad_norm": 1.087558388710022, "learning_rate": 1.99052382506426e-05, "loss": 0.117, "step": 1302 }, { "epoch": 0.22, "grad_norm": 1.9471583366394043, "learning_rate": 1.990499008967708e-05, "loss": 0.1611, "step": 1303 }, { "epoch": 0.22, "grad_norm": 1.6312255859375, "learning_rate": 1.9904741605746808e-05, "loss": 0.1729, "step": 1304 }, { "epoch": 0.22, "grad_norm": 1.4152237176895142, "learning_rate": 1.990449279885987e-05, "loss": 0.1211, "step": 1305 }, { "epoch": 0.22, "grad_norm": 1.3737465143203735, "learning_rate": 1.9904243669024392e-05, "loss": 0.1322, "step": 1306 }, { "epoch": 0.22, "grad_norm": 1.4814555644989014, "learning_rate": 1.990399421624849e-05, "loss": 0.1226, "step": 1307 }, { "epoch": 0.22, "grad_norm": 1.3718982934951782, "learning_rate": 1.9903744440540302e-05, "loss": 0.126, "step": 1308 }, { "epoch": 0.22, "grad_norm": 1.3404667377471924, "learning_rate": 1.9903494341907973e-05, "loss": 0.1161, "step": 1309 }, { "epoch": 0.22, "grad_norm": 1.3939135074615479, "learning_rate": 1.9903243920359648e-05, "loss": 0.1264, "step": 1310 }, { "epoch": 0.22, "grad_norm": 1.7750377655029297, "learning_rate": 1.9902993175903508e-05, "loss": 0.1663, "step": 1311 }, { "epoch": 0.22, "grad_norm": 1.4187027215957642, "learning_rate": 1.9902742108547716e-05, "loss": 0.1222, "step": 1312 }, { "epoch": 0.22, "grad_norm": 1.1220061779022217, "learning_rate": 1.9902490718300464e-05, "loss": 0.1273, "step": 1313 }, { "epoch": 0.22, "grad_norm": 1.374311089515686, "learning_rate": 1.9902239005169948e-05, "loss": 0.1113, "step": 1314 }, { "epoch": 0.22, "grad_norm": 1.5406163930892944, "learning_rate": 1.9901986969164373e-05, "loss": 0.1237, "step": 1315 }, { "epoch": 0.22, "grad_norm": 1.2317724227905273, "learning_rate": 1.9901734610291957e-05, "loss": 0.1275, "step": 1316 }, { "epoch": 0.22, "grad_norm": 1.6151227951049805, "learning_rate": 1.9901481928560936e-05, "loss": 0.1354, "step": 1317 }, { "epoch": 0.22, "grad_norm": 1.9007192850112915, "learning_rate": 1.9901228923979543e-05, "loss": 0.1497, "step": 1318 }, { "epoch": 0.22, "grad_norm": 1.3171449899673462, "learning_rate": 1.9900975596556024e-05, "loss": 0.1304, "step": 1319 }, { "epoch": 0.22, "grad_norm": 1.2800912857055664, "learning_rate": 1.9900721946298647e-05, "loss": 0.1379, "step": 1320 }, { "epoch": 0.22, "grad_norm": 1.5267913341522217, "learning_rate": 1.990046797321568e-05, "loss": 0.1354, "step": 1321 }, { "epoch": 0.22, "grad_norm": 1.4479620456695557, "learning_rate": 1.99002136773154e-05, "loss": 0.135, "step": 1322 }, { "epoch": 0.22, "grad_norm": 1.9463189840316772, "learning_rate": 1.98999590586061e-05, "loss": 0.1324, "step": 1323 }, { "epoch": 0.22, "grad_norm": 1.2035210132598877, "learning_rate": 1.9899704117096087e-05, "loss": 0.1316, "step": 1324 }, { "epoch": 0.22, "grad_norm": 1.4373916387557983, "learning_rate": 1.989944885279367e-05, "loss": 0.1304, "step": 1325 }, { "epoch": 0.22, "grad_norm": 1.3949118852615356, "learning_rate": 1.989919326570717e-05, "loss": 0.1204, "step": 1326 }, { "epoch": 0.22, "grad_norm": 1.2878997325897217, "learning_rate": 1.9898937355844926e-05, "loss": 0.1196, "step": 1327 }, { "epoch": 0.22, "grad_norm": 1.4383184909820557, "learning_rate": 1.989868112321528e-05, "loss": 0.1616, "step": 1328 }, { "epoch": 0.22, "grad_norm": 1.4655388593673706, "learning_rate": 1.9898424567826583e-05, "loss": 0.1399, "step": 1329 }, { "epoch": 0.22, "grad_norm": 1.2785239219665527, "learning_rate": 1.9898167689687207e-05, "loss": 0.1334, "step": 1330 }, { "epoch": 0.22, "grad_norm": 1.7935105562210083, "learning_rate": 1.9897910488805525e-05, "loss": 0.1675, "step": 1331 }, { "epoch": 0.22, "grad_norm": 1.3043699264526367, "learning_rate": 1.989765296518992e-05, "loss": 0.1519, "step": 1332 }, { "epoch": 0.22, "grad_norm": 1.4985613822937012, "learning_rate": 1.9897395118848792e-05, "loss": 0.1305, "step": 1333 }, { "epoch": 0.22, "grad_norm": 1.6501460075378418, "learning_rate": 1.989713694979055e-05, "loss": 0.1377, "step": 1334 }, { "epoch": 0.22, "grad_norm": 1.4656022787094116, "learning_rate": 1.989687845802361e-05, "loss": 0.1509, "step": 1335 }, { "epoch": 0.22, "grad_norm": 1.8197195529937744, "learning_rate": 1.98966196435564e-05, "loss": 0.1223, "step": 1336 }, { "epoch": 0.22, "grad_norm": 1.5003013610839844, "learning_rate": 1.989636050639736e-05, "loss": 0.1239, "step": 1337 }, { "epoch": 0.22, "grad_norm": 1.54204261302948, "learning_rate": 1.9896101046554937e-05, "loss": 0.1693, "step": 1338 }, { "epoch": 0.22, "grad_norm": 1.2383977174758911, "learning_rate": 1.9895841264037594e-05, "loss": 0.138, "step": 1339 }, { "epoch": 0.22, "grad_norm": 1.157487154006958, "learning_rate": 1.9895581158853803e-05, "loss": 0.1305, "step": 1340 }, { "epoch": 0.22, "grad_norm": 1.0812170505523682, "learning_rate": 1.989532073101204e-05, "loss": 0.116, "step": 1341 }, { "epoch": 0.22, "grad_norm": 1.3606220483779907, "learning_rate": 1.98950599805208e-05, "loss": 0.1463, "step": 1342 }, { "epoch": 0.22, "grad_norm": 1.4840128421783447, "learning_rate": 1.9894798907388587e-05, "loss": 0.15, "step": 1343 }, { "epoch": 0.22, "grad_norm": 1.4797475337982178, "learning_rate": 1.9894537511623908e-05, "loss": 0.1591, "step": 1344 }, { "epoch": 0.22, "grad_norm": 1.2073997259140015, "learning_rate": 1.9894275793235288e-05, "loss": 0.1129, "step": 1345 }, { "epoch": 0.23, "grad_norm": 1.2397546768188477, "learning_rate": 1.9894013752231263e-05, "loss": 0.1316, "step": 1346 }, { "epoch": 0.23, "grad_norm": 1.158272385597229, "learning_rate": 1.9893751388620378e-05, "loss": 0.1216, "step": 1347 }, { "epoch": 0.23, "grad_norm": 1.2871267795562744, "learning_rate": 1.989348870241118e-05, "loss": 0.1361, "step": 1348 }, { "epoch": 0.23, "grad_norm": 1.6427338123321533, "learning_rate": 1.9893225693612244e-05, "loss": 0.1438, "step": 1349 }, { "epoch": 0.23, "grad_norm": 1.4172765016555786, "learning_rate": 1.9892962362232146e-05, "loss": 0.1222, "step": 1350 }, { "epoch": 0.23, "grad_norm": 1.4799894094467163, "learning_rate": 1.989269870827946e-05, "loss": 0.1223, "step": 1351 }, { "epoch": 0.23, "grad_norm": 1.2638967037200928, "learning_rate": 1.9892434731762794e-05, "loss": 0.1257, "step": 1352 }, { "epoch": 0.23, "grad_norm": 1.279590368270874, "learning_rate": 1.9892170432690752e-05, "loss": 0.1265, "step": 1353 }, { "epoch": 0.23, "grad_norm": 1.2451680898666382, "learning_rate": 1.9891905811071952e-05, "loss": 0.1427, "step": 1354 }, { "epoch": 0.23, "grad_norm": 2.1635587215423584, "learning_rate": 1.9891640866915024e-05, "loss": 0.1799, "step": 1355 }, { "epoch": 0.23, "grad_norm": 1.2764101028442383, "learning_rate": 1.9891375600228603e-05, "loss": 0.1425, "step": 1356 }, { "epoch": 0.23, "grad_norm": 1.464367151260376, "learning_rate": 1.9891110011021336e-05, "loss": 0.1696, "step": 1357 }, { "epoch": 0.23, "grad_norm": 1.2587093114852905, "learning_rate": 1.989084409930189e-05, "loss": 0.1343, "step": 1358 }, { "epoch": 0.23, "grad_norm": 1.3489844799041748, "learning_rate": 1.9890577865078936e-05, "loss": 0.1406, "step": 1359 }, { "epoch": 0.23, "grad_norm": 1.2781219482421875, "learning_rate": 1.9890311308361147e-05, "loss": 0.138, "step": 1360 }, { "epoch": 0.23, "grad_norm": 1.3529354333877563, "learning_rate": 1.989004442915722e-05, "loss": 0.1395, "step": 1361 }, { "epoch": 0.23, "grad_norm": 1.3424283266067505, "learning_rate": 1.9889777227475856e-05, "loss": 0.1355, "step": 1362 }, { "epoch": 0.23, "grad_norm": 1.4559123516082764, "learning_rate": 1.988950970332577e-05, "loss": 0.1479, "step": 1363 }, { "epoch": 0.23, "grad_norm": 1.7153342962265015, "learning_rate": 1.988924185671568e-05, "loss": 0.1317, "step": 1364 }, { "epoch": 0.23, "grad_norm": 1.342157244682312, "learning_rate": 1.988897368765432e-05, "loss": 0.1434, "step": 1365 }, { "epoch": 0.23, "grad_norm": 1.7466325759887695, "learning_rate": 1.9888705196150437e-05, "loss": 0.1413, "step": 1366 }, { "epoch": 0.23, "grad_norm": 1.4646393060684204, "learning_rate": 1.9888436382212785e-05, "loss": 0.1278, "step": 1367 }, { "epoch": 0.23, "grad_norm": 1.378034234046936, "learning_rate": 1.9888167245850124e-05, "loss": 0.1301, "step": 1368 }, { "epoch": 0.23, "grad_norm": 1.3102632761001587, "learning_rate": 1.988789778707124e-05, "loss": 0.134, "step": 1369 }, { "epoch": 0.23, "grad_norm": 1.5847386121749878, "learning_rate": 1.988762800588491e-05, "loss": 0.1469, "step": 1370 }, { "epoch": 0.23, "grad_norm": 1.5439276695251465, "learning_rate": 1.9887357902299934e-05, "loss": 0.1382, "step": 1371 }, { "epoch": 0.23, "grad_norm": 1.0796749591827393, "learning_rate": 1.9887087476325115e-05, "loss": 0.1089, "step": 1372 }, { "epoch": 0.23, "grad_norm": 2.062164783477783, "learning_rate": 1.9886816727969278e-05, "loss": 0.1575, "step": 1373 }, { "epoch": 0.23, "grad_norm": 1.550486445426941, "learning_rate": 1.9886545657241246e-05, "loss": 0.1601, "step": 1374 }, { "epoch": 0.23, "grad_norm": 1.1531040668487549, "learning_rate": 1.9886274264149856e-05, "loss": 0.1206, "step": 1375 }, { "epoch": 0.23, "grad_norm": 1.2105939388275146, "learning_rate": 1.9886002548703963e-05, "loss": 0.1328, "step": 1376 }, { "epoch": 0.23, "grad_norm": 1.1561774015426636, "learning_rate": 1.9885730510912422e-05, "loss": 0.1167, "step": 1377 }, { "epoch": 0.23, "grad_norm": 1.1512311697006226, "learning_rate": 1.9885458150784106e-05, "loss": 0.1105, "step": 1378 }, { "epoch": 0.23, "grad_norm": 1.3101533651351929, "learning_rate": 1.988518546832789e-05, "loss": 0.1243, "step": 1379 }, { "epoch": 0.23, "grad_norm": 1.2000761032104492, "learning_rate": 1.988491246355267e-05, "loss": 0.1212, "step": 1380 }, { "epoch": 0.23, "grad_norm": 1.3059545755386353, "learning_rate": 1.988463913646735e-05, "loss": 0.1316, "step": 1381 }, { "epoch": 0.23, "grad_norm": 1.2259254455566406, "learning_rate": 1.988436548708084e-05, "loss": 0.1299, "step": 1382 }, { "epoch": 0.23, "grad_norm": 1.1838897466659546, "learning_rate": 1.9884091515402063e-05, "loss": 0.1403, "step": 1383 }, { "epoch": 0.23, "grad_norm": 1.3712334632873535, "learning_rate": 1.9883817221439946e-05, "loss": 0.1388, "step": 1384 }, { "epoch": 0.23, "grad_norm": 1.3660825490951538, "learning_rate": 1.988354260520344e-05, "loss": 0.1473, "step": 1385 }, { "epoch": 0.23, "grad_norm": 1.2814959287643433, "learning_rate": 1.98832676667015e-05, "loss": 0.1534, "step": 1386 }, { "epoch": 0.23, "grad_norm": 1.1212232112884521, "learning_rate": 1.9882992405943082e-05, "loss": 0.1225, "step": 1387 }, { "epoch": 0.23, "grad_norm": 1.4201487302780151, "learning_rate": 1.988271682293717e-05, "loss": 0.1295, "step": 1388 }, { "epoch": 0.23, "grad_norm": 1.227216124534607, "learning_rate": 1.9882440917692748e-05, "loss": 0.115, "step": 1389 }, { "epoch": 0.23, "grad_norm": 1.2210520505905151, "learning_rate": 1.9882164690218806e-05, "loss": 0.1365, "step": 1390 }, { "epoch": 0.23, "grad_norm": 1.4651676416397095, "learning_rate": 1.988188814052436e-05, "loss": 0.1377, "step": 1391 }, { "epoch": 0.23, "grad_norm": 2.0087435245513916, "learning_rate": 1.9881611268618423e-05, "loss": 0.124, "step": 1392 }, { "epoch": 0.23, "grad_norm": 1.5234131813049316, "learning_rate": 1.988133407451002e-05, "loss": 0.1438, "step": 1393 }, { "epoch": 0.23, "grad_norm": 1.1489578485488892, "learning_rate": 1.9881056558208195e-05, "loss": 0.1264, "step": 1394 }, { "epoch": 0.23, "grad_norm": 1.474602460861206, "learning_rate": 1.9880778719721992e-05, "loss": 0.1175, "step": 1395 }, { "epoch": 0.23, "grad_norm": 1.5383620262145996, "learning_rate": 1.988050055906047e-05, "loss": 0.1397, "step": 1396 }, { "epoch": 0.23, "grad_norm": 1.1231391429901123, "learning_rate": 1.98802220762327e-05, "loss": 0.1154, "step": 1397 }, { "epoch": 0.23, "grad_norm": 1.1307042837142944, "learning_rate": 1.9879943271247766e-05, "loss": 0.1104, "step": 1398 }, { "epoch": 0.23, "grad_norm": 1.694913625717163, "learning_rate": 1.987966414411476e-05, "loss": 0.1446, "step": 1399 }, { "epoch": 0.23, "grad_norm": 1.4440109729766846, "learning_rate": 1.9879384694842774e-05, "loss": 0.1219, "step": 1400 }, { "epoch": 0.23, "grad_norm": 1.2223206758499146, "learning_rate": 1.9879104923440925e-05, "loss": 0.1365, "step": 1401 }, { "epoch": 0.23, "grad_norm": 1.699090600013733, "learning_rate": 1.9878824829918337e-05, "loss": 0.1509, "step": 1402 }, { "epoch": 0.23, "grad_norm": 1.510446548461914, "learning_rate": 1.987854441428414e-05, "loss": 0.1296, "step": 1403 }, { "epoch": 0.23, "grad_norm": 1.3571650981903076, "learning_rate": 1.987826367654748e-05, "loss": 0.1461, "step": 1404 }, { "epoch": 0.23, "grad_norm": 1.246099829673767, "learning_rate": 1.9877982616717505e-05, "loss": 0.1285, "step": 1405 }, { "epoch": 0.24, "grad_norm": 1.4962331056594849, "learning_rate": 1.9877701234803385e-05, "loss": 0.1479, "step": 1406 }, { "epoch": 0.24, "grad_norm": 1.809283971786499, "learning_rate": 1.9877419530814293e-05, "loss": 0.1641, "step": 1407 }, { "epoch": 0.24, "grad_norm": 1.1923903226852417, "learning_rate": 1.9877137504759416e-05, "loss": 0.1204, "step": 1408 }, { "epoch": 0.24, "grad_norm": 1.200477957725525, "learning_rate": 1.987685515664795e-05, "loss": 0.1479, "step": 1409 }, { "epoch": 0.24, "grad_norm": 1.496009111404419, "learning_rate": 1.9876572486489096e-05, "loss": 0.1459, "step": 1410 }, { "epoch": 0.24, "grad_norm": 1.3970829248428345, "learning_rate": 1.9876289494292075e-05, "loss": 0.1558, "step": 1411 }, { "epoch": 0.24, "grad_norm": 1.2473360300064087, "learning_rate": 1.9876006180066113e-05, "loss": 0.1089, "step": 1412 }, { "epoch": 0.24, "grad_norm": 1.2401258945465088, "learning_rate": 1.9875722543820455e-05, "loss": 0.1178, "step": 1413 }, { "epoch": 0.24, "grad_norm": 1.2677607536315918, "learning_rate": 1.9875438585564336e-05, "loss": 0.1269, "step": 1414 }, { "epoch": 0.24, "grad_norm": 1.0995378494262695, "learning_rate": 1.9875154305307025e-05, "loss": 0.1394, "step": 1415 }, { "epoch": 0.24, "grad_norm": 1.4451069831848145, "learning_rate": 1.9874869703057787e-05, "loss": 0.1408, "step": 1416 }, { "epoch": 0.24, "grad_norm": 1.243667721748352, "learning_rate": 1.9874584778825904e-05, "loss": 0.1326, "step": 1417 }, { "epoch": 0.24, "grad_norm": 1.3816286325454712, "learning_rate": 1.9874299532620664e-05, "loss": 0.1461, "step": 1418 }, { "epoch": 0.24, "grad_norm": 1.2662957906723022, "learning_rate": 1.987401396445137e-05, "loss": 0.1091, "step": 1419 }, { "epoch": 0.24, "grad_norm": 1.3251125812530518, "learning_rate": 1.9873728074327333e-05, "loss": 0.1412, "step": 1420 }, { "epoch": 0.24, "grad_norm": 1.5212711095809937, "learning_rate": 1.9873441862257874e-05, "loss": 0.1891, "step": 1421 }, { "epoch": 0.24, "grad_norm": 1.7170490026474, "learning_rate": 1.9873155328252326e-05, "loss": 0.1442, "step": 1422 }, { "epoch": 0.24, "grad_norm": 1.458107829093933, "learning_rate": 1.987286847232003e-05, "loss": 0.1257, "step": 1423 }, { "epoch": 0.24, "grad_norm": 1.1826584339141846, "learning_rate": 1.9872581294470338e-05, "loss": 0.1055, "step": 1424 }, { "epoch": 0.24, "grad_norm": 1.4493964910507202, "learning_rate": 1.987229379471262e-05, "loss": 0.154, "step": 1425 }, { "epoch": 0.24, "grad_norm": 1.533242106437683, "learning_rate": 1.9872005973056246e-05, "loss": 0.1269, "step": 1426 }, { "epoch": 0.24, "grad_norm": 1.4521011114120483, "learning_rate": 1.98717178295106e-05, "loss": 0.1422, "step": 1427 }, { "epoch": 0.24, "grad_norm": 1.1310186386108398, "learning_rate": 1.987142936408508e-05, "loss": 0.1141, "step": 1428 }, { "epoch": 0.24, "grad_norm": 1.2889231443405151, "learning_rate": 1.9871140576789094e-05, "loss": 0.1353, "step": 1429 }, { "epoch": 0.24, "grad_norm": 1.6449631452560425, "learning_rate": 1.987085146763205e-05, "loss": 0.1497, "step": 1430 }, { "epoch": 0.24, "grad_norm": 1.0643737316131592, "learning_rate": 1.9870562036623382e-05, "loss": 0.1283, "step": 1431 }, { "epoch": 0.24, "grad_norm": 1.3955765962600708, "learning_rate": 1.9870272283772523e-05, "loss": 0.1365, "step": 1432 }, { "epoch": 0.24, "grad_norm": 1.2741856575012207, "learning_rate": 1.9869982209088926e-05, "loss": 0.1244, "step": 1433 }, { "epoch": 0.24, "grad_norm": 1.3695906400680542, "learning_rate": 1.9869691812582043e-05, "loss": 0.1135, "step": 1434 }, { "epoch": 0.24, "grad_norm": 1.2341820001602173, "learning_rate": 1.9869401094261346e-05, "loss": 0.132, "step": 1435 }, { "epoch": 0.24, "grad_norm": 1.2168805599212646, "learning_rate": 1.9869110054136315e-05, "loss": 0.1174, "step": 1436 }, { "epoch": 0.24, "grad_norm": 1.020599603652954, "learning_rate": 1.9868818692216437e-05, "loss": 0.1266, "step": 1437 }, { "epoch": 0.24, "grad_norm": 1.2302175760269165, "learning_rate": 1.9868527008511212e-05, "loss": 0.1256, "step": 1438 }, { "epoch": 0.24, "grad_norm": 4.581494331359863, "learning_rate": 1.9868235003030155e-05, "loss": 0.1486, "step": 1439 }, { "epoch": 0.24, "grad_norm": 1.448137879371643, "learning_rate": 1.9867942675782785e-05, "loss": 0.1381, "step": 1440 }, { "epoch": 0.24, "grad_norm": 1.6931146383285522, "learning_rate": 1.9867650026778634e-05, "loss": 0.1496, "step": 1441 }, { "epoch": 0.24, "grad_norm": 1.6737725734710693, "learning_rate": 1.9867357056027243e-05, "loss": 0.1277, "step": 1442 }, { "epoch": 0.24, "grad_norm": 1.3933697938919067, "learning_rate": 1.9867063763538166e-05, "loss": 0.1406, "step": 1443 }, { "epoch": 0.24, "grad_norm": 1.075040340423584, "learning_rate": 1.9866770149320964e-05, "loss": 0.1023, "step": 1444 }, { "epoch": 0.24, "grad_norm": 1.3425036668777466, "learning_rate": 1.9866476213385212e-05, "loss": 0.1202, "step": 1445 }, { "epoch": 0.24, "grad_norm": 1.3575776815414429, "learning_rate": 1.9866181955740496e-05, "loss": 0.1095, "step": 1446 }, { "epoch": 0.24, "grad_norm": 1.4175783395767212, "learning_rate": 1.986588737639641e-05, "loss": 0.1161, "step": 1447 }, { "epoch": 0.24, "grad_norm": 1.3659318685531616, "learning_rate": 1.9865592475362554e-05, "loss": 0.1334, "step": 1448 }, { "epoch": 0.24, "grad_norm": 1.543290138244629, "learning_rate": 1.986529725264855e-05, "loss": 0.1398, "step": 1449 }, { "epoch": 0.24, "grad_norm": 1.501489520072937, "learning_rate": 1.986500170826402e-05, "loss": 0.1367, "step": 1450 }, { "epoch": 0.24, "grad_norm": 1.228055477142334, "learning_rate": 1.9864705842218605e-05, "loss": 0.1413, "step": 1451 }, { "epoch": 0.24, "grad_norm": 1.547317624092102, "learning_rate": 1.986440965452195e-05, "loss": 0.1651, "step": 1452 }, { "epoch": 0.24, "grad_norm": 1.6755774021148682, "learning_rate": 1.986411314518371e-05, "loss": 0.1417, "step": 1453 }, { "epoch": 0.24, "grad_norm": 1.8588488101959229, "learning_rate": 1.9863816314213555e-05, "loss": 0.1478, "step": 1454 }, { "epoch": 0.24, "grad_norm": 1.4650073051452637, "learning_rate": 1.9863519161621167e-05, "loss": 0.1385, "step": 1455 }, { "epoch": 0.24, "grad_norm": 1.5118213891983032, "learning_rate": 1.9863221687416226e-05, "loss": 0.128, "step": 1456 }, { "epoch": 0.24, "grad_norm": 1.480783224105835, "learning_rate": 1.9862923891608444e-05, "loss": 0.1307, "step": 1457 }, { "epoch": 0.24, "grad_norm": 1.5085936784744263, "learning_rate": 1.9862625774207518e-05, "loss": 0.1659, "step": 1458 }, { "epoch": 0.24, "grad_norm": 1.4833455085754395, "learning_rate": 1.986232733522318e-05, "loss": 0.1379, "step": 1459 }, { "epoch": 0.24, "grad_norm": 1.3532541990280151, "learning_rate": 1.9862028574665153e-05, "loss": 0.1348, "step": 1460 }, { "epoch": 0.24, "grad_norm": 1.220845341682434, "learning_rate": 1.986172949254318e-05, "loss": 0.1233, "step": 1461 }, { "epoch": 0.24, "grad_norm": 1.120312213897705, "learning_rate": 1.9861430088867015e-05, "loss": 0.1179, "step": 1462 }, { "epoch": 0.24, "grad_norm": 1.17574942111969, "learning_rate": 1.986113036364642e-05, "loss": 0.1336, "step": 1463 }, { "epoch": 0.24, "grad_norm": 1.5652567148208618, "learning_rate": 1.986083031689117e-05, "loss": 0.1485, "step": 1464 }, { "epoch": 0.25, "grad_norm": 1.6964231729507446, "learning_rate": 1.9860529948611042e-05, "loss": 0.1321, "step": 1465 }, { "epoch": 0.25, "grad_norm": 1.3627533912658691, "learning_rate": 1.9860229258815836e-05, "loss": 0.1378, "step": 1466 }, { "epoch": 0.25, "grad_norm": 1.465545892715454, "learning_rate": 1.9859928247515356e-05, "loss": 0.1403, "step": 1467 }, { "epoch": 0.25, "grad_norm": 1.9405198097229004, "learning_rate": 1.985962691471941e-05, "loss": 0.1493, "step": 1468 }, { "epoch": 0.25, "grad_norm": 1.5882024765014648, "learning_rate": 1.9859325260437833e-05, "loss": 0.1213, "step": 1469 }, { "epoch": 0.25, "grad_norm": 1.8003721237182617, "learning_rate": 1.9859023284680452e-05, "loss": 0.1643, "step": 1470 }, { "epoch": 0.25, "grad_norm": 1.2915338277816772, "learning_rate": 1.9858720987457123e-05, "loss": 0.1324, "step": 1471 }, { "epoch": 0.25, "grad_norm": 2.2422051429748535, "learning_rate": 1.9858418368777693e-05, "loss": 0.1307, "step": 1472 }, { "epoch": 0.25, "grad_norm": 1.4172255992889404, "learning_rate": 1.9858115428652034e-05, "loss": 0.1031, "step": 1473 }, { "epoch": 0.25, "grad_norm": 1.2397961616516113, "learning_rate": 1.9857812167090023e-05, "loss": 0.1313, "step": 1474 }, { "epoch": 0.25, "grad_norm": 1.5670619010925293, "learning_rate": 1.985750858410155e-05, "loss": 0.1121, "step": 1475 }, { "epoch": 0.25, "grad_norm": 2.090210437774658, "learning_rate": 1.9857204679696514e-05, "loss": 0.1503, "step": 1476 }, { "epoch": 0.25, "grad_norm": 1.5480502843856812, "learning_rate": 1.985690045388482e-05, "loss": 0.1633, "step": 1477 }, { "epoch": 0.25, "grad_norm": 1.0867749452590942, "learning_rate": 1.985659590667639e-05, "loss": 0.1143, "step": 1478 }, { "epoch": 0.25, "grad_norm": 1.653401255607605, "learning_rate": 1.9856291038081155e-05, "loss": 0.1393, "step": 1479 }, { "epoch": 0.25, "grad_norm": 1.6006264686584473, "learning_rate": 1.9855985848109052e-05, "loss": 0.1581, "step": 1480 }, { "epoch": 0.25, "grad_norm": 1.4421557188034058, "learning_rate": 1.985568033677004e-05, "loss": 0.1382, "step": 1481 }, { "epoch": 0.25, "grad_norm": 1.332604169845581, "learning_rate": 1.985537450407407e-05, "loss": 0.151, "step": 1482 }, { "epoch": 0.25, "grad_norm": 1.4337561130523682, "learning_rate": 1.9855068350031122e-05, "loss": 0.1374, "step": 1483 }, { "epoch": 0.25, "grad_norm": 1.3606210947036743, "learning_rate": 1.985476187465118e-05, "loss": 0.1333, "step": 1484 }, { "epoch": 0.25, "grad_norm": 1.414477825164795, "learning_rate": 1.9854455077944227e-05, "loss": 0.144, "step": 1485 }, { "epoch": 0.25, "grad_norm": 1.1681773662567139, "learning_rate": 1.9854147959920276e-05, "loss": 0.1336, "step": 1486 }, { "epoch": 0.25, "grad_norm": 1.4040627479553223, "learning_rate": 1.985384052058934e-05, "loss": 0.1239, "step": 1487 }, { "epoch": 0.25, "grad_norm": 1.3969725370407104, "learning_rate": 1.9853532759961436e-05, "loss": 0.1165, "step": 1488 }, { "epoch": 0.25, "grad_norm": 1.5682164430618286, "learning_rate": 1.9853224678046605e-05, "loss": 0.1125, "step": 1489 }, { "epoch": 0.25, "grad_norm": 1.2245182991027832, "learning_rate": 1.9852916274854895e-05, "loss": 0.1188, "step": 1490 }, { "epoch": 0.25, "grad_norm": 1.526711344718933, "learning_rate": 1.9852607550396357e-05, "loss": 0.1183, "step": 1491 }, { "epoch": 0.25, "grad_norm": 1.352603554725647, "learning_rate": 1.9852298504681055e-05, "loss": 0.1398, "step": 1492 }, { "epoch": 0.25, "grad_norm": 1.252238392829895, "learning_rate": 1.9851989137719072e-05, "loss": 0.1381, "step": 1493 }, { "epoch": 0.25, "grad_norm": 1.3096855878829956, "learning_rate": 1.9851679449520494e-05, "loss": 0.1427, "step": 1494 }, { "epoch": 0.25, "grad_norm": 1.516997218132019, "learning_rate": 1.9851369440095415e-05, "loss": 0.1299, "step": 1495 }, { "epoch": 0.25, "grad_norm": 1.2372963428497314, "learning_rate": 1.9851059109453948e-05, "loss": 0.1042, "step": 1496 }, { "epoch": 0.25, "grad_norm": 1.3010203838348389, "learning_rate": 1.985074845760621e-05, "loss": 0.1236, "step": 1497 }, { "epoch": 0.25, "grad_norm": 1.254276990890503, "learning_rate": 1.9850437484562328e-05, "loss": 0.1313, "step": 1498 }, { "epoch": 0.25, "grad_norm": 1.476571798324585, "learning_rate": 1.9850126190332446e-05, "loss": 0.1183, "step": 1499 }, { "epoch": 0.25, "grad_norm": 1.5333552360534668, "learning_rate": 1.984981457492671e-05, "loss": 0.1634, "step": 1500 }, { "epoch": 0.25, "grad_norm": 1.2067900896072388, "learning_rate": 1.9849502638355277e-05, "loss": 0.1226, "step": 1501 }, { "epoch": 0.25, "grad_norm": 1.9026480913162231, "learning_rate": 1.984919038062833e-05, "loss": 0.116, "step": 1502 }, { "epoch": 0.25, "grad_norm": 1.4936244487762451, "learning_rate": 1.9848877801756044e-05, "loss": 0.1546, "step": 1503 }, { "epoch": 0.25, "grad_norm": 1.5204001665115356, "learning_rate": 1.9848564901748606e-05, "loss": 0.1226, "step": 1504 }, { "epoch": 0.25, "grad_norm": 1.188161849975586, "learning_rate": 1.9848251680616225e-05, "loss": 0.114, "step": 1505 }, { "epoch": 0.25, "grad_norm": 1.662381887435913, "learning_rate": 1.9847938138369118e-05, "loss": 0.1613, "step": 1506 }, { "epoch": 0.25, "grad_norm": 1.2301185131072998, "learning_rate": 1.9847624275017495e-05, "loss": 0.1385, "step": 1507 }, { "epoch": 0.25, "grad_norm": 1.03791344165802, "learning_rate": 1.98473100905716e-05, "loss": 0.1148, "step": 1508 }, { "epoch": 0.25, "grad_norm": 1.1682952642440796, "learning_rate": 1.9846995585041673e-05, "loss": 0.137, "step": 1509 }, { "epoch": 0.25, "grad_norm": 1.2774685621261597, "learning_rate": 1.9846680758437976e-05, "loss": 0.1242, "step": 1510 }, { "epoch": 0.25, "grad_norm": 1.0736972093582153, "learning_rate": 1.9846365610770767e-05, "loss": 0.103, "step": 1511 }, { "epoch": 0.25, "grad_norm": 1.0709556341171265, "learning_rate": 1.9846050142050325e-05, "loss": 0.1147, "step": 1512 }, { "epoch": 0.25, "grad_norm": 1.4669049978256226, "learning_rate": 1.9845734352286934e-05, "loss": 0.1612, "step": 1513 }, { "epoch": 0.25, "grad_norm": 1.089054822921753, "learning_rate": 1.9845418241490893e-05, "loss": 0.1264, "step": 1514 }, { "epoch": 0.25, "grad_norm": 1.2920211553573608, "learning_rate": 1.9845101809672506e-05, "loss": 0.1404, "step": 1515 }, { "epoch": 0.25, "grad_norm": 1.218360185623169, "learning_rate": 1.9844785056842097e-05, "loss": 0.121, "step": 1516 }, { "epoch": 0.25, "grad_norm": 1.2578343152999878, "learning_rate": 1.9844467983009985e-05, "loss": 0.1331, "step": 1517 }, { "epoch": 0.25, "grad_norm": 1.2545926570892334, "learning_rate": 1.9844150588186515e-05, "loss": 0.1136, "step": 1518 }, { "epoch": 0.25, "grad_norm": 1.3864612579345703, "learning_rate": 1.9843832872382033e-05, "loss": 0.1185, "step": 1519 }, { "epoch": 0.25, "grad_norm": 1.522984266281128, "learning_rate": 1.9843514835606905e-05, "loss": 0.1385, "step": 1520 }, { "epoch": 0.25, "grad_norm": 1.2174184322357178, "learning_rate": 1.9843196477871492e-05, "loss": 0.1253, "step": 1521 }, { "epoch": 0.25, "grad_norm": 1.180612564086914, "learning_rate": 1.984287779918618e-05, "loss": 0.1186, "step": 1522 }, { "epoch": 0.25, "grad_norm": 1.6094268560409546, "learning_rate": 1.984255879956136e-05, "loss": 0.1373, "step": 1523 }, { "epoch": 0.25, "grad_norm": 1.6472301483154297, "learning_rate": 1.984223947900743e-05, "loss": 0.1292, "step": 1524 }, { "epoch": 0.26, "grad_norm": 1.216903805732727, "learning_rate": 1.9841919837534803e-05, "loss": 0.1262, "step": 1525 }, { "epoch": 0.26, "grad_norm": 1.248081088066101, "learning_rate": 1.9841599875153906e-05, "loss": 0.1143, "step": 1526 }, { "epoch": 0.26, "grad_norm": 1.4139440059661865, "learning_rate": 1.9841279591875163e-05, "loss": 0.1437, "step": 1527 }, { "epoch": 0.26, "grad_norm": 1.0105540752410889, "learning_rate": 1.9840958987709027e-05, "loss": 0.1147, "step": 1528 }, { "epoch": 0.26, "grad_norm": 1.1480813026428223, "learning_rate": 1.9840638062665942e-05, "loss": 0.1216, "step": 1529 }, { "epoch": 0.26, "grad_norm": 1.2609546184539795, "learning_rate": 1.984031681675638e-05, "loss": 0.1257, "step": 1530 }, { "epoch": 0.26, "grad_norm": 1.3249448537826538, "learning_rate": 1.983999524999081e-05, "loss": 0.1273, "step": 1531 }, { "epoch": 0.26, "grad_norm": 1.9014718532562256, "learning_rate": 1.9839673362379725e-05, "loss": 0.1629, "step": 1532 }, { "epoch": 0.26, "grad_norm": 1.09477698802948, "learning_rate": 1.983935115393361e-05, "loss": 0.1337, "step": 1533 }, { "epoch": 0.26, "grad_norm": 1.3205361366271973, "learning_rate": 1.9839028624662978e-05, "loss": 0.1358, "step": 1534 }, { "epoch": 0.26, "grad_norm": 1.1120558977127075, "learning_rate": 1.9838705774578343e-05, "loss": 0.1203, "step": 1535 }, { "epoch": 0.26, "grad_norm": 1.0570582151412964, "learning_rate": 1.9838382603690235e-05, "loss": 0.1092, "step": 1536 }, { "epoch": 0.26, "grad_norm": 1.1054267883300781, "learning_rate": 1.9838059112009188e-05, "loss": 0.1272, "step": 1537 }, { "epoch": 0.26, "grad_norm": 1.4954413175582886, "learning_rate": 1.983773529954575e-05, "loss": 0.1308, "step": 1538 }, { "epoch": 0.26, "grad_norm": 1.3218755722045898, "learning_rate": 1.9837411166310478e-05, "loss": 0.1512, "step": 1539 }, { "epoch": 0.26, "grad_norm": 1.267777681350708, "learning_rate": 1.9837086712313948e-05, "loss": 0.1253, "step": 1540 }, { "epoch": 0.26, "grad_norm": 1.2816622257232666, "learning_rate": 1.9836761937566732e-05, "loss": 0.1257, "step": 1541 }, { "epoch": 0.26, "grad_norm": 1.2318223714828491, "learning_rate": 1.983643684207942e-05, "loss": 0.13, "step": 1542 }, { "epoch": 0.26, "grad_norm": 1.2638144493103027, "learning_rate": 1.9836111425862617e-05, "loss": 0.1033, "step": 1543 }, { "epoch": 0.26, "grad_norm": 1.407200574874878, "learning_rate": 1.983578568892693e-05, "loss": 0.1429, "step": 1544 }, { "epoch": 0.26, "grad_norm": 1.140051007270813, "learning_rate": 1.983545963128298e-05, "loss": 0.1464, "step": 1545 }, { "epoch": 0.26, "grad_norm": 1.3272147178649902, "learning_rate": 1.9835133252941402e-05, "loss": 0.1535, "step": 1546 }, { "epoch": 0.26, "grad_norm": 1.3999075889587402, "learning_rate": 1.983480655391283e-05, "loss": 0.1344, "step": 1547 }, { "epoch": 0.26, "grad_norm": 1.5647350549697876, "learning_rate": 1.9834479534207926e-05, "loss": 0.1246, "step": 1548 }, { "epoch": 0.26, "grad_norm": 1.155699372291565, "learning_rate": 1.9834152193837347e-05, "loss": 0.1324, "step": 1549 }, { "epoch": 0.26, "grad_norm": 1.3118945360183716, "learning_rate": 1.983382453281177e-05, "loss": 0.1642, "step": 1550 }, { "epoch": 0.26, "grad_norm": 1.2156939506530762, "learning_rate": 1.9833496551141874e-05, "loss": 0.1383, "step": 1551 }, { "epoch": 0.26, "grad_norm": 1.7515442371368408, "learning_rate": 1.9833168248838355e-05, "loss": 0.1565, "step": 1552 }, { "epoch": 0.26, "grad_norm": 1.2268283367156982, "learning_rate": 1.983283962591192e-05, "loss": 0.1208, "step": 1553 }, { "epoch": 0.26, "grad_norm": 1.3117225170135498, "learning_rate": 1.9832510682373284e-05, "loss": 0.1526, "step": 1554 }, { "epoch": 0.26, "grad_norm": 1.350454330444336, "learning_rate": 1.983218141823317e-05, "loss": 0.1351, "step": 1555 }, { "epoch": 0.26, "grad_norm": 1.0836466550827026, "learning_rate": 1.983185183350232e-05, "loss": 0.1245, "step": 1556 }, { "epoch": 0.26, "grad_norm": 1.277627944946289, "learning_rate": 1.9831521928191472e-05, "loss": 0.1433, "step": 1557 }, { "epoch": 0.26, "grad_norm": 1.3023402690887451, "learning_rate": 1.983119170231139e-05, "loss": 0.1145, "step": 1558 }, { "epoch": 0.26, "grad_norm": 1.1159416437149048, "learning_rate": 1.9830861155872836e-05, "loss": 0.1238, "step": 1559 }, { "epoch": 0.26, "grad_norm": 1.1428277492523193, "learning_rate": 1.9830530288886592e-05, "loss": 0.131, "step": 1560 }, { "epoch": 0.26, "grad_norm": 1.1037776470184326, "learning_rate": 1.9830199101363443e-05, "loss": 0.1329, "step": 1561 }, { "epoch": 0.26, "grad_norm": 1.2052630186080933, "learning_rate": 1.982986759331419e-05, "loss": 0.1409, "step": 1562 }, { "epoch": 0.26, "grad_norm": 1.377976417541504, "learning_rate": 1.9829535764749642e-05, "loss": 0.1266, "step": 1563 }, { "epoch": 0.26, "grad_norm": 1.4990792274475098, "learning_rate": 1.982920361568062e-05, "loss": 0.1329, "step": 1564 }, { "epoch": 0.26, "grad_norm": 1.2454713582992554, "learning_rate": 1.9828871146117947e-05, "loss": 0.107, "step": 1565 }, { "epoch": 0.26, "grad_norm": 0.9099381566047668, "learning_rate": 1.9828538356072477e-05, "loss": 0.1133, "step": 1566 }, { "epoch": 0.26, "grad_norm": 0.9627493619918823, "learning_rate": 1.982820524555505e-05, "loss": 0.0992, "step": 1567 }, { "epoch": 0.26, "grad_norm": 1.6229257583618164, "learning_rate": 1.9827871814576526e-05, "loss": 0.1186, "step": 1568 }, { "epoch": 0.26, "grad_norm": 1.340831995010376, "learning_rate": 1.982753806314779e-05, "loss": 0.1392, "step": 1569 }, { "epoch": 0.26, "grad_norm": 1.023209571838379, "learning_rate": 1.9827203991279707e-05, "loss": 0.131, "step": 1570 }, { "epoch": 0.26, "grad_norm": 1.2648009061813354, "learning_rate": 1.9826869598983187e-05, "loss": 0.1186, "step": 1571 }, { "epoch": 0.26, "grad_norm": 1.2257994413375854, "learning_rate": 1.9826534886269123e-05, "loss": 0.126, "step": 1572 }, { "epoch": 0.26, "grad_norm": 1.3023006916046143, "learning_rate": 1.9826199853148427e-05, "loss": 0.1118, "step": 1573 }, { "epoch": 0.26, "grad_norm": 1.3355525732040405, "learning_rate": 1.9825864499632034e-05, "loss": 0.0984, "step": 1574 }, { "epoch": 0.26, "grad_norm": 1.0563716888427734, "learning_rate": 1.9825528825730867e-05, "loss": 0.1423, "step": 1575 }, { "epoch": 0.26, "grad_norm": 1.3279615640640259, "learning_rate": 1.9825192831455875e-05, "loss": 0.1326, "step": 1576 }, { "epoch": 0.26, "grad_norm": 1.2060078382492065, "learning_rate": 1.9824856516818017e-05, "loss": 0.1263, "step": 1577 }, { "epoch": 0.26, "grad_norm": 1.2440919876098633, "learning_rate": 1.9824519881828256e-05, "loss": 0.1371, "step": 1578 }, { "epoch": 0.26, "grad_norm": 1.178646206855774, "learning_rate": 1.9824182926497567e-05, "loss": 0.1157, "step": 1579 }, { "epoch": 0.26, "grad_norm": 0.9778912663459778, "learning_rate": 1.982384565083694e-05, "loss": 0.1013, "step": 1580 }, { "epoch": 0.26, "grad_norm": 1.3783237934112549, "learning_rate": 1.982350805485737e-05, "loss": 0.1563, "step": 1581 }, { "epoch": 0.26, "grad_norm": 1.489173173904419, "learning_rate": 1.9823170138569865e-05, "loss": 0.152, "step": 1582 }, { "epoch": 0.26, "grad_norm": 1.2795743942260742, "learning_rate": 1.9822831901985444e-05, "loss": 0.1223, "step": 1583 }, { "epoch": 0.26, "grad_norm": 0.9983588457107544, "learning_rate": 1.9822493345115137e-05, "loss": 0.1266, "step": 1584 }, { "epoch": 0.27, "grad_norm": 1.4789466857910156, "learning_rate": 1.982215446796998e-05, "loss": 0.1688, "step": 1585 }, { "epoch": 0.27, "grad_norm": 1.2931121587753296, "learning_rate": 1.9821815270561027e-05, "loss": 0.1104, "step": 1586 }, { "epoch": 0.27, "grad_norm": 1.6864748001098633, "learning_rate": 1.982147575289933e-05, "loss": 0.1602, "step": 1587 }, { "epoch": 0.27, "grad_norm": 1.156613826751709, "learning_rate": 1.982113591499597e-05, "loss": 0.1228, "step": 1588 }, { "epoch": 0.27, "grad_norm": 1.1548506021499634, "learning_rate": 1.9820795756862016e-05, "loss": 0.1183, "step": 1589 }, { "epoch": 0.27, "grad_norm": 1.3408217430114746, "learning_rate": 1.982045527850857e-05, "loss": 0.144, "step": 1590 }, { "epoch": 0.27, "grad_norm": 1.417561650276184, "learning_rate": 1.9820114479946732e-05, "loss": 0.1129, "step": 1591 }, { "epoch": 0.27, "grad_norm": 1.1236374378204346, "learning_rate": 1.9819773361187604e-05, "loss": 0.1221, "step": 1592 }, { "epoch": 0.27, "grad_norm": 1.3551844358444214, "learning_rate": 1.981943192224232e-05, "loss": 0.136, "step": 1593 }, { "epoch": 0.27, "grad_norm": 1.1826781034469604, "learning_rate": 1.981909016312201e-05, "loss": 0.1082, "step": 1594 }, { "epoch": 0.27, "grad_norm": 1.2077240943908691, "learning_rate": 1.9818748083837812e-05, "loss": 0.1441, "step": 1595 }, { "epoch": 0.27, "grad_norm": 1.0792737007141113, "learning_rate": 1.981840568440089e-05, "loss": 0.1254, "step": 1596 }, { "epoch": 0.27, "grad_norm": 1.3479570150375366, "learning_rate": 1.98180629648224e-05, "loss": 0.1593, "step": 1597 }, { "epoch": 0.27, "grad_norm": 1.0492146015167236, "learning_rate": 1.9817719925113522e-05, "loss": 0.1552, "step": 1598 }, { "epoch": 0.27, "grad_norm": 1.2245776653289795, "learning_rate": 1.9817376565285437e-05, "loss": 0.1237, "step": 1599 }, { "epoch": 0.27, "grad_norm": 1.5208271741867065, "learning_rate": 1.9817032885349337e-05, "loss": 0.1174, "step": 1600 }, { "epoch": 0.27, "grad_norm": 1.2364557981491089, "learning_rate": 1.9816688885316444e-05, "loss": 0.1046, "step": 1601 }, { "epoch": 0.27, "grad_norm": 1.1926867961883545, "learning_rate": 1.981634456519796e-05, "loss": 0.1345, "step": 1602 }, { "epoch": 0.27, "grad_norm": 1.1950587034225464, "learning_rate": 1.9815999925005114e-05, "loss": 0.1431, "step": 1603 }, { "epoch": 0.27, "grad_norm": 1.1520657539367676, "learning_rate": 1.9815654964749146e-05, "loss": 0.1202, "step": 1604 }, { "epoch": 0.27, "grad_norm": 1.435591220855713, "learning_rate": 1.9815309684441305e-05, "loss": 0.1531, "step": 1605 }, { "epoch": 0.27, "grad_norm": 1.1532028913497925, "learning_rate": 1.981496408409285e-05, "loss": 0.1215, "step": 1606 }, { "epoch": 0.27, "grad_norm": 1.2328985929489136, "learning_rate": 1.9814618163715044e-05, "loss": 0.1298, "step": 1607 }, { "epoch": 0.27, "grad_norm": 1.1284425258636475, "learning_rate": 1.9814271923319172e-05, "loss": 0.1307, "step": 1608 }, { "epoch": 0.27, "grad_norm": 1.3301169872283936, "learning_rate": 1.9813925362916523e-05, "loss": 0.1017, "step": 1609 }, { "epoch": 0.27, "grad_norm": 1.526397705078125, "learning_rate": 1.9813578482518393e-05, "loss": 0.1543, "step": 1610 }, { "epoch": 0.27, "grad_norm": 1.344927191734314, "learning_rate": 1.9813231282136094e-05, "loss": 0.1472, "step": 1611 }, { "epoch": 0.27, "grad_norm": 1.0859791040420532, "learning_rate": 1.9812883761780946e-05, "loss": 0.1263, "step": 1612 }, { "epoch": 0.27, "grad_norm": 1.3574715852737427, "learning_rate": 1.9812535921464287e-05, "loss": 0.1345, "step": 1613 }, { "epoch": 0.27, "grad_norm": 1.1013859510421753, "learning_rate": 1.981218776119745e-05, "loss": 0.1219, "step": 1614 }, { "epoch": 0.27, "grad_norm": 1.32987380027771, "learning_rate": 1.9811839280991795e-05, "loss": 0.139, "step": 1615 }, { "epoch": 0.27, "grad_norm": 1.1285754442214966, "learning_rate": 1.9811490480858677e-05, "loss": 0.127, "step": 1616 }, { "epoch": 0.27, "grad_norm": 1.1616246700286865, "learning_rate": 1.9811141360809472e-05, "loss": 0.1122, "step": 1617 }, { "epoch": 0.27, "grad_norm": 1.061583161354065, "learning_rate": 1.981079192085557e-05, "loss": 0.1104, "step": 1618 }, { "epoch": 0.27, "grad_norm": 1.3321889638900757, "learning_rate": 1.9810442161008357e-05, "loss": 0.1592, "step": 1619 }, { "epoch": 0.27, "grad_norm": 1.415734052658081, "learning_rate": 1.981009208127924e-05, "loss": 0.1273, "step": 1620 }, { "epoch": 0.27, "grad_norm": 1.3009597063064575, "learning_rate": 1.980974168167963e-05, "loss": 0.1191, "step": 1621 }, { "epoch": 0.27, "grad_norm": 1.198444128036499, "learning_rate": 1.9809390962220957e-05, "loss": 0.119, "step": 1622 }, { "epoch": 0.27, "grad_norm": 1.1482032537460327, "learning_rate": 1.9809039922914658e-05, "loss": 0.1371, "step": 1623 }, { "epoch": 0.27, "grad_norm": 1.32538640499115, "learning_rate": 1.9808688563772173e-05, "loss": 0.1008, "step": 1624 }, { "epoch": 0.27, "grad_norm": 1.3438230752944946, "learning_rate": 1.9808336884804965e-05, "loss": 0.1397, "step": 1625 }, { "epoch": 0.27, "grad_norm": 1.391239047050476, "learning_rate": 1.9807984886024497e-05, "loss": 0.1227, "step": 1626 }, { "epoch": 0.27, "grad_norm": 1.37568998336792, "learning_rate": 1.9807632567442246e-05, "loss": 0.1272, "step": 1627 }, { "epoch": 0.27, "grad_norm": 1.1324665546417236, "learning_rate": 1.98072799290697e-05, "loss": 0.1265, "step": 1628 }, { "epoch": 0.27, "grad_norm": 1.183929204940796, "learning_rate": 1.980692697091836e-05, "loss": 0.1319, "step": 1629 }, { "epoch": 0.27, "grad_norm": 1.0816597938537598, "learning_rate": 1.9806573692999732e-05, "loss": 0.1134, "step": 1630 }, { "epoch": 0.27, "grad_norm": 1.4554367065429688, "learning_rate": 1.9806220095325337e-05, "loss": 0.1093, "step": 1631 }, { "epoch": 0.27, "grad_norm": 1.4028022289276123, "learning_rate": 1.9805866177906704e-05, "loss": 0.1421, "step": 1632 }, { "epoch": 0.27, "grad_norm": 1.2398052215576172, "learning_rate": 1.980551194075537e-05, "loss": 0.1434, "step": 1633 }, { "epoch": 0.27, "grad_norm": 1.491612434387207, "learning_rate": 1.9805157383882888e-05, "loss": 0.137, "step": 1634 }, { "epoch": 0.27, "grad_norm": 1.3302453756332397, "learning_rate": 1.980480250730082e-05, "loss": 0.1363, "step": 1635 }, { "epoch": 0.27, "grad_norm": 1.2633012533187866, "learning_rate": 1.9804447311020735e-05, "loss": 0.1174, "step": 1636 }, { "epoch": 0.27, "grad_norm": 1.3023335933685303, "learning_rate": 1.9804091795054216e-05, "loss": 0.0919, "step": 1637 }, { "epoch": 0.27, "grad_norm": 1.2881733179092407, "learning_rate": 1.9803735959412854e-05, "loss": 0.1029, "step": 1638 }, { "epoch": 0.27, "grad_norm": 2.233410120010376, "learning_rate": 1.980337980410825e-05, "loss": 0.1332, "step": 1639 }, { "epoch": 0.27, "grad_norm": 1.3593204021453857, "learning_rate": 1.9803023329152022e-05, "loss": 0.12, "step": 1640 }, { "epoch": 0.27, "grad_norm": 1.2722417116165161, "learning_rate": 1.9802666534555788e-05, "loss": 0.1301, "step": 1641 }, { "epoch": 0.27, "grad_norm": 1.0549464225769043, "learning_rate": 1.9802309420331186e-05, "loss": 0.1156, "step": 1642 }, { "epoch": 0.27, "grad_norm": 1.0860763788223267, "learning_rate": 1.9801951986489855e-05, "loss": 0.1209, "step": 1643 }, { "epoch": 0.27, "grad_norm": 1.53933846950531, "learning_rate": 1.9801594233043454e-05, "loss": 0.1317, "step": 1644 }, { "epoch": 0.28, "grad_norm": 1.288631796836853, "learning_rate": 1.9801236160003644e-05, "loss": 0.1076, "step": 1645 }, { "epoch": 0.28, "grad_norm": 1.3446502685546875, "learning_rate": 1.980087776738211e-05, "loss": 0.1361, "step": 1646 }, { "epoch": 0.28, "grad_norm": 1.1945114135742188, "learning_rate": 1.9800519055190522e-05, "loss": 0.1395, "step": 1647 }, { "epoch": 0.28, "grad_norm": 1.0845708847045898, "learning_rate": 1.9800160023440588e-05, "loss": 0.107, "step": 1648 }, { "epoch": 0.28, "grad_norm": 1.271594524383545, "learning_rate": 1.9799800672144015e-05, "loss": 0.1402, "step": 1649 }, { "epoch": 0.28, "grad_norm": 1.35713529586792, "learning_rate": 1.9799441001312516e-05, "loss": 0.1223, "step": 1650 }, { "epoch": 0.28, "grad_norm": 0.9640135765075684, "learning_rate": 1.9799081010957817e-05, "loss": 0.1113, "step": 1651 }, { "epoch": 0.28, "grad_norm": 1.4023321866989136, "learning_rate": 1.979872070109166e-05, "loss": 0.118, "step": 1652 }, { "epoch": 0.28, "grad_norm": 1.6834518909454346, "learning_rate": 1.9798360071725792e-05, "loss": 0.121, "step": 1653 }, { "epoch": 0.28, "grad_norm": 1.4060084819793701, "learning_rate": 1.9797999122871968e-05, "loss": 0.1452, "step": 1654 }, { "epoch": 0.28, "grad_norm": 1.1722168922424316, "learning_rate": 1.9797637854541965e-05, "loss": 0.1288, "step": 1655 }, { "epoch": 0.28, "grad_norm": 1.3323309421539307, "learning_rate": 1.9797276266747557e-05, "loss": 0.1376, "step": 1656 }, { "epoch": 0.28, "grad_norm": 0.9618147015571594, "learning_rate": 1.9796914359500534e-05, "loss": 0.1056, "step": 1657 }, { "epoch": 0.28, "grad_norm": 1.1456599235534668, "learning_rate": 1.9796552132812698e-05, "loss": 0.1263, "step": 1658 }, { "epoch": 0.28, "grad_norm": 1.1182653903961182, "learning_rate": 1.979618958669586e-05, "loss": 0.1442, "step": 1659 }, { "epoch": 0.28, "grad_norm": 1.3309327363967896, "learning_rate": 1.979582672116184e-05, "loss": 0.133, "step": 1660 }, { "epoch": 0.28, "grad_norm": 1.3544152975082397, "learning_rate": 1.9795463536222474e-05, "loss": 0.1273, "step": 1661 }, { "epoch": 0.28, "grad_norm": 1.0630214214324951, "learning_rate": 1.9795100031889597e-05, "loss": 0.1195, "step": 1662 }, { "epoch": 0.28, "grad_norm": 1.2697113752365112, "learning_rate": 1.9794736208175068e-05, "loss": 0.1133, "step": 1663 }, { "epoch": 0.28, "grad_norm": 1.5044777393341064, "learning_rate": 1.9794372065090743e-05, "loss": 0.1549, "step": 1664 }, { "epoch": 0.28, "grad_norm": 1.2532824277877808, "learning_rate": 1.9794007602648505e-05, "loss": 0.1212, "step": 1665 }, { "epoch": 0.28, "grad_norm": 1.1898916959762573, "learning_rate": 1.9793642820860227e-05, "loss": 0.1388, "step": 1666 }, { "epoch": 0.28, "grad_norm": 1.3032747507095337, "learning_rate": 1.979327771973781e-05, "loss": 0.1224, "step": 1667 }, { "epoch": 0.28, "grad_norm": 1.2567064762115479, "learning_rate": 1.979291229929316e-05, "loss": 0.1135, "step": 1668 }, { "epoch": 0.28, "grad_norm": 1.3815064430236816, "learning_rate": 1.9792546559538188e-05, "loss": 0.1345, "step": 1669 }, { "epoch": 0.28, "grad_norm": 1.1067605018615723, "learning_rate": 1.9792180500484818e-05, "loss": 0.1245, "step": 1670 }, { "epoch": 0.28, "grad_norm": 1.1690762042999268, "learning_rate": 1.9791814122144988e-05, "loss": 0.1298, "step": 1671 }, { "epoch": 0.28, "grad_norm": 1.1839570999145508, "learning_rate": 1.9791447424530647e-05, "loss": 0.1307, "step": 1672 }, { "epoch": 0.28, "grad_norm": 1.215511679649353, "learning_rate": 1.979108040765375e-05, "loss": 0.1074, "step": 1673 }, { "epoch": 0.28, "grad_norm": 1.2936179637908936, "learning_rate": 1.979071307152626e-05, "loss": 0.1199, "step": 1674 }, { "epoch": 0.28, "grad_norm": 1.7423700094223022, "learning_rate": 1.9790345416160157e-05, "loss": 0.1251, "step": 1675 }, { "epoch": 0.28, "grad_norm": 1.1848812103271484, "learning_rate": 1.9789977441567433e-05, "loss": 0.1255, "step": 1676 }, { "epoch": 0.28, "grad_norm": 1.3311995267868042, "learning_rate": 1.9789609147760082e-05, "loss": 0.1123, "step": 1677 }, { "epoch": 0.28, "grad_norm": 1.204314112663269, "learning_rate": 1.978924053475011e-05, "loss": 0.1175, "step": 1678 }, { "epoch": 0.28, "grad_norm": 1.421020269393921, "learning_rate": 1.9788871602549542e-05, "loss": 0.1331, "step": 1679 }, { "epoch": 0.28, "grad_norm": 1.2113568782806396, "learning_rate": 1.9788502351170408e-05, "loss": 0.1314, "step": 1680 }, { "epoch": 0.28, "grad_norm": 1.4113420248031616, "learning_rate": 1.978813278062474e-05, "loss": 0.1184, "step": 1681 }, { "epoch": 0.28, "grad_norm": 1.1474597454071045, "learning_rate": 1.9787762890924592e-05, "loss": 0.1168, "step": 1682 }, { "epoch": 0.28, "grad_norm": 1.0883076190948486, "learning_rate": 1.978739268208203e-05, "loss": 0.1093, "step": 1683 }, { "epoch": 0.28, "grad_norm": 1.3180131912231445, "learning_rate": 1.9787022154109123e-05, "loss": 0.128, "step": 1684 }, { "epoch": 0.28, "grad_norm": 0.996225118637085, "learning_rate": 1.9786651307017948e-05, "loss": 0.1144, "step": 1685 }, { "epoch": 0.28, "grad_norm": 1.084244966506958, "learning_rate": 1.97862801408206e-05, "loss": 0.1422, "step": 1686 }, { "epoch": 0.28, "grad_norm": 1.1036986112594604, "learning_rate": 1.978590865552918e-05, "loss": 0.1208, "step": 1687 }, { "epoch": 0.28, "grad_norm": 1.219120740890503, "learning_rate": 1.9785536851155803e-05, "loss": 0.0987, "step": 1688 }, { "epoch": 0.28, "grad_norm": 1.5819110870361328, "learning_rate": 1.9785164727712588e-05, "loss": 0.1423, "step": 1689 }, { "epoch": 0.28, "grad_norm": 1.2849671840667725, "learning_rate": 1.9784792285211672e-05, "loss": 0.1294, "step": 1690 }, { "epoch": 0.28, "grad_norm": 1.086565375328064, "learning_rate": 1.9784419523665203e-05, "loss": 0.1217, "step": 1691 }, { "epoch": 0.28, "grad_norm": 1.2961328029632568, "learning_rate": 1.9784046443085328e-05, "loss": 0.1378, "step": 1692 }, { "epoch": 0.28, "grad_norm": 1.2450405359268188, "learning_rate": 1.9783673043484213e-05, "loss": 0.1296, "step": 1693 }, { "epoch": 0.28, "grad_norm": 1.17038094997406, "learning_rate": 1.9783299324874033e-05, "loss": 0.1463, "step": 1694 }, { "epoch": 0.28, "grad_norm": 1.291407823562622, "learning_rate": 1.9782925287266977e-05, "loss": 0.121, "step": 1695 }, { "epoch": 0.28, "grad_norm": 1.0794833898544312, "learning_rate": 1.978255093067524e-05, "loss": 0.1267, "step": 1696 }, { "epoch": 0.28, "grad_norm": 1.0504279136657715, "learning_rate": 1.978217625511103e-05, "loss": 0.1024, "step": 1697 }, { "epoch": 0.28, "grad_norm": 1.0022412538528442, "learning_rate": 1.9781801260586556e-05, "loss": 0.1019, "step": 1698 }, { "epoch": 0.28, "grad_norm": 1.2013823986053467, "learning_rate": 1.9781425947114056e-05, "loss": 0.1323, "step": 1699 }, { "epoch": 0.28, "grad_norm": 0.9652023911476135, "learning_rate": 1.978105031470576e-05, "loss": 0.1, "step": 1700 }, { "epoch": 0.28, "grad_norm": 1.346266746520996, "learning_rate": 1.9780674363373916e-05, "loss": 0.1242, "step": 1701 }, { "epoch": 0.28, "grad_norm": 1.3206815719604492, "learning_rate": 1.9780298093130787e-05, "loss": 0.1124, "step": 1702 }, { "epoch": 0.28, "grad_norm": 1.1025573015213013, "learning_rate": 1.9779921503988636e-05, "loss": 0.1035, "step": 1703 }, { "epoch": 0.28, "grad_norm": 1.3647526502609253, "learning_rate": 1.9779544595959747e-05, "loss": 0.1369, "step": 1704 }, { "epoch": 0.29, "grad_norm": 1.1557033061981201, "learning_rate": 1.977916736905641e-05, "loss": 0.1163, "step": 1705 }, { "epoch": 0.29, "grad_norm": 1.4361646175384521, "learning_rate": 1.977878982329092e-05, "loss": 0.1289, "step": 1706 }, { "epoch": 0.29, "grad_norm": 1.1937642097473145, "learning_rate": 1.9778411958675593e-05, "loss": 0.1328, "step": 1707 }, { "epoch": 0.29, "grad_norm": 1.1695642471313477, "learning_rate": 1.9778033775222744e-05, "loss": 0.1385, "step": 1708 }, { "epoch": 0.29, "grad_norm": 1.2293859720230103, "learning_rate": 1.9777655272944708e-05, "loss": 0.1295, "step": 1709 }, { "epoch": 0.29, "grad_norm": 1.3245460987091064, "learning_rate": 1.9777276451853828e-05, "loss": 0.1154, "step": 1710 }, { "epoch": 0.29, "grad_norm": 1.6664546728134155, "learning_rate": 1.9776897311962453e-05, "loss": 0.1341, "step": 1711 }, { "epoch": 0.29, "grad_norm": 1.0759568214416504, "learning_rate": 1.9776517853282945e-05, "loss": 0.119, "step": 1712 }, { "epoch": 0.29, "grad_norm": 1.4392656087875366, "learning_rate": 1.9776138075827676e-05, "loss": 0.1616, "step": 1713 }, { "epoch": 0.29, "grad_norm": 1.2303451299667358, "learning_rate": 1.977575797960903e-05, "loss": 0.1169, "step": 1714 }, { "epoch": 0.29, "grad_norm": 1.0955573320388794, "learning_rate": 1.9775377564639407e-05, "loss": 0.0869, "step": 1715 }, { "epoch": 0.29, "grad_norm": 1.3447932004928589, "learning_rate": 1.9774996830931202e-05, "loss": 0.1251, "step": 1716 }, { "epoch": 0.29, "grad_norm": 1.1271616220474243, "learning_rate": 1.977461577849683e-05, "loss": 0.1294, "step": 1717 }, { "epoch": 0.29, "grad_norm": 1.0525027513504028, "learning_rate": 1.9774234407348723e-05, "loss": 0.1283, "step": 1718 }, { "epoch": 0.29, "grad_norm": 1.0948824882507324, "learning_rate": 1.9773852717499308e-05, "loss": 0.1136, "step": 1719 }, { "epoch": 0.29, "grad_norm": 1.2093974351882935, "learning_rate": 1.9773470708961035e-05, "loss": 0.1316, "step": 1720 }, { "epoch": 0.29, "grad_norm": 1.1153706312179565, "learning_rate": 1.9773088381746358e-05, "loss": 0.1174, "step": 1721 }, { "epoch": 0.29, "grad_norm": 1.2029297351837158, "learning_rate": 1.9772705735867745e-05, "loss": 0.1318, "step": 1722 }, { "epoch": 0.29, "grad_norm": 1.1420271396636963, "learning_rate": 1.9772322771337672e-05, "loss": 0.1083, "step": 1723 }, { "epoch": 0.29, "grad_norm": 1.1165138483047485, "learning_rate": 1.9771939488168624e-05, "loss": 0.11, "step": 1724 }, { "epoch": 0.29, "grad_norm": 1.0943313837051392, "learning_rate": 1.97715558863731e-05, "loss": 0.1041, "step": 1725 }, { "epoch": 0.29, "grad_norm": 1.1141517162322998, "learning_rate": 1.977117196596361e-05, "loss": 0.0988, "step": 1726 }, { "epoch": 0.29, "grad_norm": 1.2386780977249146, "learning_rate": 1.9770787726952666e-05, "loss": 0.1092, "step": 1727 }, { "epoch": 0.29, "grad_norm": 1.5753633975982666, "learning_rate": 1.9770403169352804e-05, "loss": 0.1662, "step": 1728 }, { "epoch": 0.29, "grad_norm": 1.0950673818588257, "learning_rate": 1.977001829317656e-05, "loss": 0.1043, "step": 1729 }, { "epoch": 0.29, "grad_norm": 1.0601305961608887, "learning_rate": 1.9769633098436482e-05, "loss": 0.1141, "step": 1730 }, { "epoch": 0.29, "grad_norm": 1.0816189050674438, "learning_rate": 1.9769247585145135e-05, "loss": 0.1146, "step": 1731 }, { "epoch": 0.29, "grad_norm": 1.287049412727356, "learning_rate": 1.976886175331508e-05, "loss": 0.129, "step": 1732 }, { "epoch": 0.29, "grad_norm": 1.3824020624160767, "learning_rate": 1.9768475602958903e-05, "loss": 0.1282, "step": 1733 }, { "epoch": 0.29, "grad_norm": 1.2148631811141968, "learning_rate": 1.9768089134089193e-05, "loss": 0.1239, "step": 1734 }, { "epoch": 0.29, "grad_norm": 1.3179481029510498, "learning_rate": 1.9767702346718557e-05, "loss": 0.1322, "step": 1735 }, { "epoch": 0.29, "grad_norm": 1.2898746728897095, "learning_rate": 1.9767315240859603e-05, "loss": 0.1594, "step": 1736 }, { "epoch": 0.29, "grad_norm": 1.6327557563781738, "learning_rate": 1.9766927816524948e-05, "loss": 0.118, "step": 1737 }, { "epoch": 0.29, "grad_norm": 1.0294053554534912, "learning_rate": 1.9766540073727232e-05, "loss": 0.1079, "step": 1738 }, { "epoch": 0.29, "grad_norm": 1.4039125442504883, "learning_rate": 1.9766152012479095e-05, "loss": 0.1501, "step": 1739 }, { "epoch": 0.29, "grad_norm": 1.1546962261199951, "learning_rate": 1.976576363279319e-05, "loss": 0.1274, "step": 1740 }, { "epoch": 0.29, "grad_norm": 1.5256305932998657, "learning_rate": 1.9765374934682176e-05, "loss": 0.1331, "step": 1741 }, { "epoch": 0.29, "grad_norm": 1.002063512802124, "learning_rate": 1.9764985918158738e-05, "loss": 0.1113, "step": 1742 }, { "epoch": 0.29, "grad_norm": 1.3159137964248657, "learning_rate": 1.976459658323555e-05, "loss": 0.1181, "step": 1743 }, { "epoch": 0.29, "grad_norm": 1.2047786712646484, "learning_rate": 1.9764206929925312e-05, "loss": 0.116, "step": 1744 }, { "epoch": 0.29, "grad_norm": 0.9595859050750732, "learning_rate": 1.976381695824073e-05, "loss": 0.0997, "step": 1745 }, { "epoch": 0.29, "grad_norm": 1.4202134609222412, "learning_rate": 1.9763426668194513e-05, "loss": 0.1359, "step": 1746 }, { "epoch": 0.29, "grad_norm": 1.2642661333084106, "learning_rate": 1.9763036059799393e-05, "loss": 0.1314, "step": 1747 }, { "epoch": 0.29, "grad_norm": 1.2811552286148071, "learning_rate": 1.976264513306811e-05, "loss": 0.111, "step": 1748 }, { "epoch": 0.29, "grad_norm": 1.1522492170333862, "learning_rate": 1.97622538880134e-05, "loss": 0.1165, "step": 1749 }, { "epoch": 0.29, "grad_norm": 1.2168022394180298, "learning_rate": 1.9761862324648024e-05, "loss": 0.1248, "step": 1750 }, { "epoch": 0.29, "grad_norm": 1.2084226608276367, "learning_rate": 1.9761470442984755e-05, "loss": 0.1287, "step": 1751 }, { "epoch": 0.29, "grad_norm": 1.3047962188720703, "learning_rate": 1.9761078243036364e-05, "loss": 0.1208, "step": 1752 }, { "epoch": 0.29, "grad_norm": 1.1299186944961548, "learning_rate": 1.9760685724815642e-05, "loss": 0.1376, "step": 1753 }, { "epoch": 0.29, "grad_norm": 1.4274547100067139, "learning_rate": 1.976029288833539e-05, "loss": 0.128, "step": 1754 }, { "epoch": 0.29, "grad_norm": 1.1585016250610352, "learning_rate": 1.9759899733608413e-05, "loss": 0.0958, "step": 1755 }, { "epoch": 0.29, "grad_norm": 1.1653286218643188, "learning_rate": 1.975950626064753e-05, "loss": 0.0998, "step": 1756 }, { "epoch": 0.29, "grad_norm": 1.213077425956726, "learning_rate": 1.975911246946557e-05, "loss": 0.1477, "step": 1757 }, { "epoch": 0.29, "grad_norm": 1.3462493419647217, "learning_rate": 1.9758718360075383e-05, "loss": 0.1287, "step": 1758 }, { "epoch": 0.29, "grad_norm": 1.2642985582351685, "learning_rate": 1.9758323932489806e-05, "loss": 0.1329, "step": 1759 }, { "epoch": 0.29, "grad_norm": 1.687368631362915, "learning_rate": 1.9757929186721703e-05, "loss": 0.153, "step": 1760 }, { "epoch": 0.29, "grad_norm": 1.2571134567260742, "learning_rate": 1.9757534122783953e-05, "loss": 0.1249, "step": 1761 }, { "epoch": 0.29, "grad_norm": 1.5332449674606323, "learning_rate": 1.975713874068943e-05, "loss": 0.1385, "step": 1762 }, { "epoch": 0.29, "grad_norm": 1.354209065437317, "learning_rate": 1.975674304045103e-05, "loss": 0.0777, "step": 1763 }, { "epoch": 0.3, "grad_norm": 1.1665831804275513, "learning_rate": 1.975634702208165e-05, "loss": 0.1155, "step": 1764 }, { "epoch": 0.3, "grad_norm": 1.044216513633728, "learning_rate": 1.975595068559421e-05, "loss": 0.1106, "step": 1765 }, { "epoch": 0.3, "grad_norm": 1.180978536605835, "learning_rate": 1.9755554031001628e-05, "loss": 0.1332, "step": 1766 }, { "epoch": 0.3, "grad_norm": 1.841177225112915, "learning_rate": 1.975515705831684e-05, "loss": 0.1157, "step": 1767 }, { "epoch": 0.3, "grad_norm": 1.2990822792053223, "learning_rate": 1.9754759767552783e-05, "loss": 0.11, "step": 1768 }, { "epoch": 0.3, "grad_norm": 1.2098814249038696, "learning_rate": 1.9754362158722422e-05, "loss": 0.1156, "step": 1769 }, { "epoch": 0.3, "grad_norm": 1.1715060472488403, "learning_rate": 1.9753964231838717e-05, "loss": 0.1232, "step": 1770 }, { "epoch": 0.3, "grad_norm": 1.208156943321228, "learning_rate": 1.975356598691464e-05, "loss": 0.1437, "step": 1771 }, { "epoch": 0.3, "grad_norm": 1.0013118982315063, "learning_rate": 1.9753167423963177e-05, "loss": 0.1134, "step": 1772 }, { "epoch": 0.3, "grad_norm": 0.9876515865325928, "learning_rate": 1.9752768542997326e-05, "loss": 0.0988, "step": 1773 }, { "epoch": 0.3, "grad_norm": 1.0306599140167236, "learning_rate": 1.975236934403009e-05, "loss": 0.1134, "step": 1774 }, { "epoch": 0.3, "grad_norm": 1.1725831031799316, "learning_rate": 1.9751969827074495e-05, "loss": 0.1272, "step": 1775 }, { "epoch": 0.3, "grad_norm": 0.9916427731513977, "learning_rate": 1.9751569992143553e-05, "loss": 0.1, "step": 1776 }, { "epoch": 0.3, "grad_norm": 1.13751220703125, "learning_rate": 1.975116983925031e-05, "loss": 0.1195, "step": 1777 }, { "epoch": 0.3, "grad_norm": 1.4058449268341064, "learning_rate": 1.975076936840781e-05, "loss": 0.1387, "step": 1778 }, { "epoch": 0.3, "grad_norm": 1.3164483308792114, "learning_rate": 1.9750368579629117e-05, "loss": 0.1363, "step": 1779 }, { "epoch": 0.3, "grad_norm": 1.3154593706130981, "learning_rate": 1.9749967472927292e-05, "loss": 0.1308, "step": 1780 }, { "epoch": 0.3, "grad_norm": 1.2054460048675537, "learning_rate": 1.974956604831542e-05, "loss": 0.1216, "step": 1781 }, { "epoch": 0.3, "grad_norm": 1.057810664176941, "learning_rate": 1.9749164305806584e-05, "loss": 0.1231, "step": 1782 }, { "epoch": 0.3, "grad_norm": 0.9785473346710205, "learning_rate": 1.9748762245413884e-05, "loss": 0.1037, "step": 1783 }, { "epoch": 0.3, "grad_norm": 1.1742660999298096, "learning_rate": 1.9748359867150434e-05, "loss": 0.1371, "step": 1784 }, { "epoch": 0.3, "grad_norm": 1.2060216665267944, "learning_rate": 1.974795717102935e-05, "loss": 0.145, "step": 1785 }, { "epoch": 0.3, "grad_norm": 1.114696741104126, "learning_rate": 1.9747554157063766e-05, "loss": 0.1237, "step": 1786 }, { "epoch": 0.3, "grad_norm": 0.9673375487327576, "learning_rate": 1.974715082526682e-05, "loss": 0.1103, "step": 1787 }, { "epoch": 0.3, "grad_norm": 1.08070707321167, "learning_rate": 1.9746747175651662e-05, "loss": 0.1132, "step": 1788 }, { "epoch": 0.3, "grad_norm": 1.116371512413025, "learning_rate": 1.9746343208231454e-05, "loss": 0.1036, "step": 1789 }, { "epoch": 0.3, "grad_norm": 1.0843442678451538, "learning_rate": 1.9745938923019376e-05, "loss": 0.1235, "step": 1790 }, { "epoch": 0.3, "grad_norm": 1.2778760194778442, "learning_rate": 1.9745534320028596e-05, "loss": 0.1102, "step": 1791 }, { "epoch": 0.3, "grad_norm": 1.2277052402496338, "learning_rate": 1.9745129399272318e-05, "loss": 0.1117, "step": 1792 }, { "epoch": 0.3, "grad_norm": 1.0244777202606201, "learning_rate": 1.974472416076374e-05, "loss": 0.0984, "step": 1793 }, { "epoch": 0.3, "grad_norm": 1.2154607772827148, "learning_rate": 1.9744318604516077e-05, "loss": 0.1003, "step": 1794 }, { "epoch": 0.3, "grad_norm": 0.9814278483390808, "learning_rate": 1.974391273054255e-05, "loss": 0.0947, "step": 1795 }, { "epoch": 0.3, "grad_norm": 1.3488789796829224, "learning_rate": 1.97435065388564e-05, "loss": 0.1264, "step": 1796 }, { "epoch": 0.3, "grad_norm": 1.2085773944854736, "learning_rate": 1.9743100029470862e-05, "loss": 0.1298, "step": 1797 }, { "epoch": 0.3, "grad_norm": 0.9385933876037598, "learning_rate": 1.9742693202399198e-05, "loss": 0.0871, "step": 1798 }, { "epoch": 0.3, "grad_norm": 1.232357382774353, "learning_rate": 1.974228605765467e-05, "loss": 0.1129, "step": 1799 }, { "epoch": 0.3, "grad_norm": 1.069156527519226, "learning_rate": 1.974187859525055e-05, "loss": 0.1067, "step": 1800 }, { "epoch": 0.3, "grad_norm": 1.2078063488006592, "learning_rate": 1.974147081520013e-05, "loss": 0.1293, "step": 1801 }, { "epoch": 0.3, "grad_norm": 1.0829449892044067, "learning_rate": 1.9741062717516706e-05, "loss": 0.1058, "step": 1802 }, { "epoch": 0.3, "grad_norm": 1.1769086122512817, "learning_rate": 1.9740654302213576e-05, "loss": 0.0987, "step": 1803 }, { "epoch": 0.3, "grad_norm": 1.0999709367752075, "learning_rate": 1.9740245569304072e-05, "loss": 0.1121, "step": 1804 }, { "epoch": 0.3, "grad_norm": 1.047881007194519, "learning_rate": 1.9739836518801506e-05, "loss": 0.1224, "step": 1805 }, { "epoch": 0.3, "grad_norm": 1.1270179748535156, "learning_rate": 1.9739427150719223e-05, "loss": 0.1202, "step": 1806 }, { "epoch": 0.3, "grad_norm": 1.1937873363494873, "learning_rate": 1.9739017465070573e-05, "loss": 0.0927, "step": 1807 }, { "epoch": 0.3, "grad_norm": 1.1940953731536865, "learning_rate": 1.973860746186891e-05, "loss": 0.134, "step": 1808 }, { "epoch": 0.3, "grad_norm": 1.1261656284332275, "learning_rate": 1.9738197141127604e-05, "loss": 0.1402, "step": 1809 }, { "epoch": 0.3, "grad_norm": 0.8387306928634644, "learning_rate": 1.9737786502860033e-05, "loss": 0.095, "step": 1810 }, { "epoch": 0.3, "grad_norm": 1.525800108909607, "learning_rate": 1.973737554707959e-05, "loss": 0.121, "step": 1811 }, { "epoch": 0.3, "grad_norm": 1.502030372619629, "learning_rate": 1.973696427379967e-05, "loss": 0.1376, "step": 1812 }, { "epoch": 0.3, "grad_norm": 0.9807289838790894, "learning_rate": 1.9736552683033686e-05, "loss": 0.1054, "step": 1813 }, { "epoch": 0.3, "grad_norm": 1.2885433435440063, "learning_rate": 1.9736140774795055e-05, "loss": 0.1293, "step": 1814 }, { "epoch": 0.3, "grad_norm": 0.9770984053611755, "learning_rate": 1.9735728549097214e-05, "loss": 0.1017, "step": 1815 }, { "epoch": 0.3, "grad_norm": 1.4077545404434204, "learning_rate": 1.9735316005953603e-05, "loss": 0.1421, "step": 1816 }, { "epoch": 0.3, "grad_norm": 1.2918424606323242, "learning_rate": 1.9734903145377665e-05, "loss": 0.1076, "step": 1817 }, { "epoch": 0.3, "grad_norm": 1.1071579456329346, "learning_rate": 1.973448996738287e-05, "loss": 0.1117, "step": 1818 }, { "epoch": 0.3, "grad_norm": 1.0870813131332397, "learning_rate": 1.973407647198269e-05, "loss": 0.102, "step": 1819 }, { "epoch": 0.3, "grad_norm": 1.153566598892212, "learning_rate": 1.9733662659190608e-05, "loss": 0.1094, "step": 1820 }, { "epoch": 0.3, "grad_norm": 1.0797970294952393, "learning_rate": 1.973324852902011e-05, "loss": 0.1332, "step": 1821 }, { "epoch": 0.3, "grad_norm": 1.2404201030731201, "learning_rate": 1.9732834081484703e-05, "loss": 0.1141, "step": 1822 }, { "epoch": 0.3, "grad_norm": 1.0028579235076904, "learning_rate": 1.9732419316597907e-05, "loss": 0.1007, "step": 1823 }, { "epoch": 0.31, "grad_norm": 1.779600977897644, "learning_rate": 1.9732004234373235e-05, "loss": 0.1753, "step": 1824 }, { "epoch": 0.31, "grad_norm": 1.0729693174362183, "learning_rate": 1.9731588834824227e-05, "loss": 0.1217, "step": 1825 }, { "epoch": 0.31, "grad_norm": 1.4855437278747559, "learning_rate": 1.9731173117964433e-05, "loss": 0.1077, "step": 1826 }, { "epoch": 0.31, "grad_norm": 1.174929141998291, "learning_rate": 1.9730757083807398e-05, "loss": 0.1263, "step": 1827 }, { "epoch": 0.31, "grad_norm": 1.0831739902496338, "learning_rate": 1.973034073236669e-05, "loss": 0.1189, "step": 1828 }, { "epoch": 0.31, "grad_norm": 1.0765920877456665, "learning_rate": 1.972992406365589e-05, "loss": 0.1077, "step": 1829 }, { "epoch": 0.31, "grad_norm": 1.350873351097107, "learning_rate": 1.972950707768858e-05, "loss": 0.1246, "step": 1830 }, { "epoch": 0.31, "grad_norm": 1.4115543365478516, "learning_rate": 1.9729089774478355e-05, "loss": 0.1407, "step": 1831 }, { "epoch": 0.31, "grad_norm": 1.38251531124115, "learning_rate": 1.9728672154038823e-05, "loss": 0.143, "step": 1832 }, { "epoch": 0.31, "grad_norm": 1.3684158325195312, "learning_rate": 1.9728254216383602e-05, "loss": 0.1335, "step": 1833 }, { "epoch": 0.31, "grad_norm": 0.9850631952285767, "learning_rate": 1.9727835961526316e-05, "loss": 0.0994, "step": 1834 }, { "epoch": 0.31, "grad_norm": 1.2355568408966064, "learning_rate": 1.972741738948061e-05, "loss": 0.1446, "step": 1835 }, { "epoch": 0.31, "grad_norm": 0.9740267395973206, "learning_rate": 1.9726998500260126e-05, "loss": 0.1006, "step": 1836 }, { "epoch": 0.31, "grad_norm": 1.235837697982788, "learning_rate": 1.9726579293878523e-05, "loss": 0.1052, "step": 1837 }, { "epoch": 0.31, "grad_norm": 1.0360194444656372, "learning_rate": 1.972615977034947e-05, "loss": 0.1283, "step": 1838 }, { "epoch": 0.31, "grad_norm": 1.287171721458435, "learning_rate": 1.972573992968665e-05, "loss": 0.1384, "step": 1839 }, { "epoch": 0.31, "grad_norm": 1.250017523765564, "learning_rate": 1.9725319771903748e-05, "loss": 0.1313, "step": 1840 }, { "epoch": 0.31, "grad_norm": 1.1624854803085327, "learning_rate": 1.9724899297014467e-05, "loss": 0.1347, "step": 1841 }, { "epoch": 0.31, "grad_norm": 0.97478848695755, "learning_rate": 1.972447850503251e-05, "loss": 0.1124, "step": 1842 }, { "epoch": 0.31, "grad_norm": 1.362168788909912, "learning_rate": 1.9724057395971605e-05, "loss": 0.1376, "step": 1843 }, { "epoch": 0.31, "grad_norm": 1.3048981428146362, "learning_rate": 1.9723635969845482e-05, "loss": 0.1207, "step": 1844 }, { "epoch": 0.31, "grad_norm": 0.9730905890464783, "learning_rate": 1.972321422666788e-05, "loss": 0.1025, "step": 1845 }, { "epoch": 0.31, "grad_norm": 0.9842174053192139, "learning_rate": 1.972279216645255e-05, "loss": 0.1029, "step": 1846 }, { "epoch": 0.31, "grad_norm": 1.4165838956832886, "learning_rate": 1.9722369789213256e-05, "loss": 0.135, "step": 1847 }, { "epoch": 0.31, "grad_norm": 1.1344883441925049, "learning_rate": 1.9721947094963768e-05, "loss": 0.1228, "step": 1848 }, { "epoch": 0.31, "grad_norm": 1.0218408107757568, "learning_rate": 1.9721524083717867e-05, "loss": 0.1129, "step": 1849 }, { "epoch": 0.31, "grad_norm": 1.0962525606155396, "learning_rate": 1.972110075548935e-05, "loss": 0.1281, "step": 1850 }, { "epoch": 0.31, "grad_norm": 1.1190141439437866, "learning_rate": 1.972067711029202e-05, "loss": 0.1187, "step": 1851 }, { "epoch": 0.31, "grad_norm": 1.3109456300735474, "learning_rate": 1.972025314813969e-05, "loss": 0.1136, "step": 1852 }, { "epoch": 0.31, "grad_norm": 0.9240089654922485, "learning_rate": 1.9719828869046182e-05, "loss": 0.1141, "step": 1853 }, { "epoch": 0.31, "grad_norm": 1.8650741577148438, "learning_rate": 1.971940427302533e-05, "loss": 0.1361, "step": 1854 }, { "epoch": 0.31, "grad_norm": 1.0619275569915771, "learning_rate": 1.9718979360090977e-05, "loss": 0.1308, "step": 1855 }, { "epoch": 0.31, "grad_norm": 1.6592979431152344, "learning_rate": 1.9718554130256983e-05, "loss": 0.1301, "step": 1856 }, { "epoch": 0.31, "grad_norm": 1.3501834869384766, "learning_rate": 1.971812858353721e-05, "loss": 0.1308, "step": 1857 }, { "epoch": 0.31, "grad_norm": 0.9118834733963013, "learning_rate": 1.9717702719945535e-05, "loss": 0.0902, "step": 1858 }, { "epoch": 0.31, "grad_norm": 1.0649876594543457, "learning_rate": 1.971727653949584e-05, "loss": 0.097, "step": 1859 }, { "epoch": 0.31, "grad_norm": 1.2893645763397217, "learning_rate": 1.9716850042202025e-05, "loss": 0.1182, "step": 1860 }, { "epoch": 0.31, "grad_norm": 1.3116084337234497, "learning_rate": 1.9716423228077996e-05, "loss": 0.1227, "step": 1861 }, { "epoch": 0.31, "grad_norm": 0.9560944437980652, "learning_rate": 1.971599609713767e-05, "loss": 0.0963, "step": 1862 }, { "epoch": 0.31, "grad_norm": 1.1994436979293823, "learning_rate": 1.971556864939497e-05, "loss": 0.1059, "step": 1863 }, { "epoch": 0.31, "grad_norm": 1.3919873237609863, "learning_rate": 1.971514088486384e-05, "loss": 0.1313, "step": 1864 }, { "epoch": 0.31, "grad_norm": 1.2442525625228882, "learning_rate": 1.971471280355822e-05, "loss": 0.1179, "step": 1865 }, { "epoch": 0.31, "grad_norm": 1.0329986810684204, "learning_rate": 1.9714284405492078e-05, "loss": 0.1227, "step": 1866 }, { "epoch": 0.31, "grad_norm": 0.9584802985191345, "learning_rate": 1.9713855690679374e-05, "loss": 0.1183, "step": 1867 }, { "epoch": 0.31, "grad_norm": 1.1848036050796509, "learning_rate": 1.9713426659134095e-05, "loss": 0.1061, "step": 1868 }, { "epoch": 0.31, "grad_norm": 1.552045226097107, "learning_rate": 1.971299731087022e-05, "loss": 0.1088, "step": 1869 }, { "epoch": 0.31, "grad_norm": 0.9001320004463196, "learning_rate": 1.9712567645901753e-05, "loss": 0.1086, "step": 1870 }, { "epoch": 0.31, "grad_norm": 1.4541015625, "learning_rate": 1.9712137664242704e-05, "loss": 0.1177, "step": 1871 }, { "epoch": 0.31, "grad_norm": 1.2059297561645508, "learning_rate": 1.97117073659071e-05, "loss": 0.1406, "step": 1872 }, { "epoch": 0.31, "grad_norm": 1.0634334087371826, "learning_rate": 1.9711276750908958e-05, "loss": 0.1081, "step": 1873 }, { "epoch": 0.31, "grad_norm": 0.9753596782684326, "learning_rate": 1.971084581926233e-05, "loss": 0.1164, "step": 1874 }, { "epoch": 0.31, "grad_norm": 1.103638768196106, "learning_rate": 1.971041457098126e-05, "loss": 0.1194, "step": 1875 }, { "epoch": 0.31, "grad_norm": 0.9173415899276733, "learning_rate": 1.9709983006079812e-05, "loss": 0.1026, "step": 1876 }, { "epoch": 0.31, "grad_norm": 1.2804985046386719, "learning_rate": 1.9709551124572056e-05, "loss": 0.1381, "step": 1877 }, { "epoch": 0.31, "grad_norm": 1.3276000022888184, "learning_rate": 1.9709118926472078e-05, "loss": 0.1213, "step": 1878 }, { "epoch": 0.31, "grad_norm": 1.2473543882369995, "learning_rate": 1.9708686411793966e-05, "loss": 0.1265, "step": 1879 }, { "epoch": 0.31, "grad_norm": 1.600506067276001, "learning_rate": 1.970825358055183e-05, "loss": 0.1766, "step": 1880 }, { "epoch": 0.31, "grad_norm": 1.2326993942260742, "learning_rate": 1.9707820432759774e-05, "loss": 0.1293, "step": 1881 }, { "epoch": 0.31, "grad_norm": 1.0852645635604858, "learning_rate": 1.9707386968431924e-05, "loss": 0.1094, "step": 1882 }, { "epoch": 0.31, "grad_norm": 1.5104924440383911, "learning_rate": 1.970695318758242e-05, "loss": 0.1169, "step": 1883 }, { "epoch": 0.32, "grad_norm": 1.0578899383544922, "learning_rate": 1.9706519090225398e-05, "loss": 0.0992, "step": 1884 }, { "epoch": 0.32, "grad_norm": 1.355041742324829, "learning_rate": 1.9706084676375013e-05, "loss": 0.1314, "step": 1885 }, { "epoch": 0.32, "grad_norm": 1.1574921607971191, "learning_rate": 1.9705649946045434e-05, "loss": 0.1028, "step": 1886 }, { "epoch": 0.32, "grad_norm": 1.086918592453003, "learning_rate": 1.970521489925083e-05, "loss": 0.1032, "step": 1887 }, { "epoch": 0.32, "grad_norm": 0.8779876232147217, "learning_rate": 1.9704779536005394e-05, "loss": 0.0878, "step": 1888 }, { "epoch": 0.32, "grad_norm": 1.2842248678207397, "learning_rate": 1.9704343856323314e-05, "loss": 0.1294, "step": 1889 }, { "epoch": 0.32, "grad_norm": 1.028414249420166, "learning_rate": 1.9703907860218806e-05, "loss": 0.0915, "step": 1890 }, { "epoch": 0.32, "grad_norm": 1.0253710746765137, "learning_rate": 1.9703471547706075e-05, "loss": 0.1158, "step": 1891 }, { "epoch": 0.32, "grad_norm": 1.2741401195526123, "learning_rate": 1.9703034918799353e-05, "loss": 0.1175, "step": 1892 }, { "epoch": 0.32, "grad_norm": 1.2969691753387451, "learning_rate": 1.9702597973512874e-05, "loss": 0.1469, "step": 1893 }, { "epoch": 0.32, "grad_norm": 2.0945451259613037, "learning_rate": 1.9702160711860888e-05, "loss": 0.1143, "step": 1894 }, { "epoch": 0.32, "grad_norm": 1.1777102947235107, "learning_rate": 1.9701723133857654e-05, "loss": 0.0981, "step": 1895 }, { "epoch": 0.32, "grad_norm": 1.3538469076156616, "learning_rate": 1.9701285239517434e-05, "loss": 0.1231, "step": 1896 }, { "epoch": 0.32, "grad_norm": 0.9015446305274963, "learning_rate": 1.970084702885451e-05, "loss": 0.1184, "step": 1897 }, { "epoch": 0.32, "grad_norm": 1.3353583812713623, "learning_rate": 1.9700408501883173e-05, "loss": 0.1318, "step": 1898 }, { "epoch": 0.32, "grad_norm": 4.592441082000732, "learning_rate": 1.9699969658617715e-05, "loss": 0.1091, "step": 1899 }, { "epoch": 0.32, "grad_norm": 1.6595102548599243, "learning_rate": 1.969953049907245e-05, "loss": 0.1315, "step": 1900 }, { "epoch": 0.32, "grad_norm": 1.3117107152938843, "learning_rate": 1.9699091023261698e-05, "loss": 0.1309, "step": 1901 }, { "epoch": 0.32, "grad_norm": 3.898205518722534, "learning_rate": 1.9698651231199785e-05, "loss": 0.1758, "step": 1902 }, { "epoch": 0.32, "grad_norm": 1.118736743927002, "learning_rate": 1.969821112290105e-05, "loss": 0.1326, "step": 1903 }, { "epoch": 0.32, "grad_norm": 0.9435914158821106, "learning_rate": 1.9697770698379845e-05, "loss": 0.117, "step": 1904 }, { "epoch": 0.32, "grad_norm": 1.2069499492645264, "learning_rate": 1.969732995765054e-05, "loss": 0.1082, "step": 1905 }, { "epoch": 0.32, "grad_norm": 0.859840989112854, "learning_rate": 1.969688890072749e-05, "loss": 0.0903, "step": 1906 }, { "epoch": 0.32, "grad_norm": 1.2644215822219849, "learning_rate": 1.9696447527625085e-05, "loss": 0.1244, "step": 1907 }, { "epoch": 0.32, "grad_norm": 1.6100475788116455, "learning_rate": 1.9696005838357717e-05, "loss": 0.1391, "step": 1908 }, { "epoch": 0.32, "grad_norm": 1.1112165451049805, "learning_rate": 1.9695563832939783e-05, "loss": 0.1041, "step": 1909 }, { "epoch": 0.32, "grad_norm": 1.7431442737579346, "learning_rate": 1.96951215113857e-05, "loss": 0.1125, "step": 1910 }, { "epoch": 0.32, "grad_norm": 0.8570444583892822, "learning_rate": 1.9694678873709887e-05, "loss": 0.0971, "step": 1911 }, { "epoch": 0.32, "grad_norm": 1.1828209161758423, "learning_rate": 1.969423591992678e-05, "loss": 0.1254, "step": 1912 }, { "epoch": 0.32, "grad_norm": 1.2176940441131592, "learning_rate": 1.9693792650050823e-05, "loss": 0.1245, "step": 1913 }, { "epoch": 0.32, "grad_norm": 1.4318225383758545, "learning_rate": 1.9693349064096464e-05, "loss": 0.0908, "step": 1914 }, { "epoch": 0.32, "grad_norm": 1.0764415264129639, "learning_rate": 1.9692905162078168e-05, "loss": 0.0901, "step": 1915 }, { "epoch": 0.32, "grad_norm": 1.1521341800689697, "learning_rate": 1.9692460944010416e-05, "loss": 0.1141, "step": 1916 }, { "epoch": 0.32, "grad_norm": 1.084122896194458, "learning_rate": 1.969201640990768e-05, "loss": 0.1218, "step": 1917 }, { "epoch": 0.32, "grad_norm": 1.2127201557159424, "learning_rate": 1.9691571559784465e-05, "loss": 0.1111, "step": 1918 }, { "epoch": 0.32, "grad_norm": 1.2671229839324951, "learning_rate": 1.9691126393655272e-05, "loss": 0.1445, "step": 1919 }, { "epoch": 0.32, "grad_norm": 1.126630425453186, "learning_rate": 1.9690680911534614e-05, "loss": 0.1259, "step": 1920 }, { "epoch": 0.32, "grad_norm": 1.1460950374603271, "learning_rate": 1.9690235113437023e-05, "loss": 0.1268, "step": 1921 }, { "epoch": 0.32, "grad_norm": 0.9794965982437134, "learning_rate": 1.968978899937703e-05, "loss": 0.0906, "step": 1922 }, { "epoch": 0.32, "grad_norm": 1.144216537475586, "learning_rate": 1.968934256936918e-05, "loss": 0.1182, "step": 1923 }, { "epoch": 0.32, "grad_norm": 1.2136269807815552, "learning_rate": 1.9688895823428033e-05, "loss": 0.1086, "step": 1924 }, { "epoch": 0.32, "grad_norm": 1.1526463031768799, "learning_rate": 1.9688448761568154e-05, "loss": 0.1088, "step": 1925 }, { "epoch": 0.32, "grad_norm": 1.1501779556274414, "learning_rate": 1.9688001383804118e-05, "loss": 0.1208, "step": 1926 }, { "epoch": 0.32, "grad_norm": 1.7440887689590454, "learning_rate": 1.9687553690150515e-05, "loss": 0.1111, "step": 1927 }, { "epoch": 0.32, "grad_norm": 1.9650113582611084, "learning_rate": 1.9687105680621944e-05, "loss": 0.1235, "step": 1928 }, { "epoch": 0.32, "grad_norm": 1.057064175605774, "learning_rate": 1.968665735523301e-05, "loss": 0.1081, "step": 1929 }, { "epoch": 0.32, "grad_norm": 1.0882327556610107, "learning_rate": 1.9686208713998332e-05, "loss": 0.1048, "step": 1930 }, { "epoch": 0.32, "grad_norm": 1.1477417945861816, "learning_rate": 1.968575975693254e-05, "loss": 0.1385, "step": 1931 }, { "epoch": 0.32, "grad_norm": 1.3224284648895264, "learning_rate": 1.968531048405027e-05, "loss": 0.1237, "step": 1932 }, { "epoch": 0.32, "grad_norm": 1.229731798171997, "learning_rate": 1.968486089536617e-05, "loss": 0.1219, "step": 1933 }, { "epoch": 0.32, "grad_norm": 1.2478466033935547, "learning_rate": 1.9684410990894905e-05, "loss": 0.1303, "step": 1934 }, { "epoch": 0.32, "grad_norm": 1.3177392482757568, "learning_rate": 1.9683960770651142e-05, "loss": 0.1218, "step": 1935 }, { "epoch": 0.32, "grad_norm": 1.2702897787094116, "learning_rate": 1.9683510234649563e-05, "loss": 0.118, "step": 1936 }, { "epoch": 0.32, "grad_norm": 1.3362572193145752, "learning_rate": 1.968305938290485e-05, "loss": 0.1315, "step": 1937 }, { "epoch": 0.32, "grad_norm": 1.5257049798965454, "learning_rate": 1.9682608215431712e-05, "loss": 0.1338, "step": 1938 }, { "epoch": 0.32, "grad_norm": 1.0114245414733887, "learning_rate": 1.968215673224486e-05, "loss": 0.0953, "step": 1939 }, { "epoch": 0.32, "grad_norm": 1.5500702857971191, "learning_rate": 1.968170493335901e-05, "loss": 0.125, "step": 1940 }, { "epoch": 0.32, "grad_norm": 1.0823172330856323, "learning_rate": 1.9681252818788897e-05, "loss": 0.1117, "step": 1941 }, { "epoch": 0.32, "grad_norm": 1.481134057044983, "learning_rate": 1.9680800388549263e-05, "loss": 0.1312, "step": 1942 }, { "epoch": 0.32, "grad_norm": 1.3038517236709595, "learning_rate": 1.9680347642654858e-05, "loss": 0.111, "step": 1943 }, { "epoch": 0.33, "grad_norm": 0.9199886322021484, "learning_rate": 1.9679894581120446e-05, "loss": 0.124, "step": 1944 }, { "epoch": 0.33, "grad_norm": 1.2088006734848022, "learning_rate": 1.9679441203960796e-05, "loss": 0.1089, "step": 1945 }, { "epoch": 0.33, "grad_norm": 1.2316902875900269, "learning_rate": 1.9678987511190696e-05, "loss": 0.1058, "step": 1946 }, { "epoch": 0.33, "grad_norm": 1.1670151948928833, "learning_rate": 1.967853350282494e-05, "loss": 0.1001, "step": 1947 }, { "epoch": 0.33, "grad_norm": 1.4297109842300415, "learning_rate": 1.9678079178878325e-05, "loss": 0.1138, "step": 1948 }, { "epoch": 0.33, "grad_norm": 0.8978703022003174, "learning_rate": 1.967762453936567e-05, "loss": 0.1014, "step": 1949 }, { "epoch": 0.33, "grad_norm": 0.9654186964035034, "learning_rate": 1.9677169584301796e-05, "loss": 0.1202, "step": 1950 }, { "epoch": 0.33, "grad_norm": 1.1100578308105469, "learning_rate": 1.967671431370154e-05, "loss": 0.133, "step": 1951 }, { "epoch": 0.33, "grad_norm": 1.12506103515625, "learning_rate": 1.9676258727579748e-05, "loss": 0.1005, "step": 1952 }, { "epoch": 0.33, "grad_norm": 0.970716655254364, "learning_rate": 1.967580282595127e-05, "loss": 0.0947, "step": 1953 }, { "epoch": 0.33, "grad_norm": 0.9671463370323181, "learning_rate": 1.9675346608830976e-05, "loss": 0.1056, "step": 1954 }, { "epoch": 0.33, "grad_norm": 1.1507047414779663, "learning_rate": 1.9674890076233734e-05, "loss": 0.1128, "step": 1955 }, { "epoch": 0.33, "grad_norm": 1.1170066595077515, "learning_rate": 1.967443322817444e-05, "loss": 0.1115, "step": 1956 }, { "epoch": 0.33, "grad_norm": 1.214965581893921, "learning_rate": 1.9673976064667983e-05, "loss": 0.1147, "step": 1957 }, { "epoch": 0.33, "grad_norm": 1.3391590118408203, "learning_rate": 1.9673518585729273e-05, "loss": 0.1182, "step": 1958 }, { "epoch": 0.33, "grad_norm": 1.5683863162994385, "learning_rate": 1.9673060791373223e-05, "loss": 0.1086, "step": 1959 }, { "epoch": 0.33, "grad_norm": 1.2999908924102783, "learning_rate": 1.967260268161477e-05, "loss": 0.1276, "step": 1960 }, { "epoch": 0.33, "grad_norm": 0.8788919448852539, "learning_rate": 1.9672144256468835e-05, "loss": 0.0936, "step": 1961 }, { "epoch": 0.33, "grad_norm": 1.4513198137283325, "learning_rate": 1.967168551595038e-05, "loss": 0.1003, "step": 1962 }, { "epoch": 0.33, "grad_norm": 1.0977519750595093, "learning_rate": 1.967122646007435e-05, "loss": 0.1146, "step": 1963 }, { "epoch": 0.33, "grad_norm": 1.1070767641067505, "learning_rate": 1.9670767088855726e-05, "loss": 0.1192, "step": 1964 }, { "epoch": 0.33, "grad_norm": 1.496550440788269, "learning_rate": 1.967030740230948e-05, "loss": 0.1331, "step": 1965 }, { "epoch": 0.33, "grad_norm": 1.048248291015625, "learning_rate": 1.9669847400450595e-05, "loss": 0.1086, "step": 1966 }, { "epoch": 0.33, "grad_norm": 1.4024308919906616, "learning_rate": 1.9669387083294086e-05, "loss": 0.1034, "step": 1967 }, { "epoch": 0.33, "grad_norm": 1.0060741901397705, "learning_rate": 1.9668926450854945e-05, "loss": 0.1106, "step": 1968 }, { "epoch": 0.33, "grad_norm": 1.0765349864959717, "learning_rate": 1.9668465503148203e-05, "loss": 0.1115, "step": 1969 }, { "epoch": 0.33, "grad_norm": 1.1374356746673584, "learning_rate": 1.9668004240188884e-05, "loss": 0.1142, "step": 1970 }, { "epoch": 0.33, "grad_norm": 1.1539125442504883, "learning_rate": 1.9667542661992026e-05, "loss": 0.1047, "step": 1971 }, { "epoch": 0.33, "grad_norm": 1.605175256729126, "learning_rate": 1.9667080768572687e-05, "loss": 0.1078, "step": 1972 }, { "epoch": 0.33, "grad_norm": 1.5017749071121216, "learning_rate": 1.9666618559945924e-05, "loss": 0.0929, "step": 1973 }, { "epoch": 0.33, "grad_norm": 1.06459641456604, "learning_rate": 1.9666156036126803e-05, "loss": 0.1186, "step": 1974 }, { "epoch": 0.33, "grad_norm": 1.6007568836212158, "learning_rate": 1.9665693197130413e-05, "loss": 0.105, "step": 1975 }, { "epoch": 0.33, "grad_norm": 1.0215113162994385, "learning_rate": 1.9665230042971843e-05, "loss": 0.0956, "step": 1976 }, { "epoch": 0.33, "grad_norm": 1.1332430839538574, "learning_rate": 1.966476657366619e-05, "loss": 0.1101, "step": 1977 }, { "epoch": 0.33, "grad_norm": 0.906672477722168, "learning_rate": 1.9664302789228576e-05, "loss": 0.0948, "step": 1978 }, { "epoch": 0.33, "grad_norm": 1.348665475845337, "learning_rate": 1.9663838689674114e-05, "loss": 0.1195, "step": 1979 }, { "epoch": 0.33, "grad_norm": 1.5114665031433105, "learning_rate": 1.9663374275017938e-05, "loss": 0.1233, "step": 1980 }, { "epoch": 0.33, "grad_norm": 1.422073245048523, "learning_rate": 1.9662909545275196e-05, "loss": 0.1167, "step": 1981 }, { "epoch": 0.33, "grad_norm": 1.2157524824142456, "learning_rate": 1.966244450046103e-05, "loss": 0.1213, "step": 1982 }, { "epoch": 0.33, "grad_norm": 1.2330719232559204, "learning_rate": 1.9661979140590622e-05, "loss": 0.1156, "step": 1983 }, { "epoch": 0.33, "grad_norm": 1.0495413541793823, "learning_rate": 1.9661513465679128e-05, "loss": 0.1155, "step": 1984 }, { "epoch": 0.33, "grad_norm": 1.2614631652832031, "learning_rate": 1.966104747574174e-05, "loss": 0.1076, "step": 1985 }, { "epoch": 0.33, "grad_norm": 1.3029292821884155, "learning_rate": 1.9660581170793647e-05, "loss": 0.1253, "step": 1986 }, { "epoch": 0.33, "grad_norm": 1.3243420124053955, "learning_rate": 1.9660114550850062e-05, "loss": 0.1123, "step": 1987 }, { "epoch": 0.33, "grad_norm": 1.1385270357131958, "learning_rate": 1.9659647615926193e-05, "loss": 0.1074, "step": 1988 }, { "epoch": 0.33, "grad_norm": 1.6689720153808594, "learning_rate": 1.965918036603727e-05, "loss": 0.1297, "step": 1989 }, { "epoch": 0.33, "grad_norm": 1.3420268297195435, "learning_rate": 1.9658712801198522e-05, "loss": 0.1321, "step": 1990 }, { "epoch": 0.33, "grad_norm": 0.869260847568512, "learning_rate": 1.96582449214252e-05, "loss": 0.1056, "step": 1991 }, { "epoch": 0.33, "grad_norm": 1.5156090259552002, "learning_rate": 1.9657776726732555e-05, "loss": 0.1078, "step": 1992 }, { "epoch": 0.33, "grad_norm": 1.0595839023590088, "learning_rate": 1.9657308217135857e-05, "loss": 0.1175, "step": 1993 }, { "epoch": 0.33, "grad_norm": 0.9008481502532959, "learning_rate": 1.965683939265038e-05, "loss": 0.1031, "step": 1994 }, { "epoch": 0.33, "grad_norm": 0.8385311961174011, "learning_rate": 1.965637025329141e-05, "loss": 0.0945, "step": 1995 }, { "epoch": 0.33, "grad_norm": 1.4474858045578003, "learning_rate": 1.965590079907425e-05, "loss": 0.12, "step": 1996 }, { "epoch": 0.33, "grad_norm": 1.0590314865112305, "learning_rate": 1.9655431030014202e-05, "loss": 0.1109, "step": 1997 }, { "epoch": 0.33, "grad_norm": 0.8972005248069763, "learning_rate": 1.965496094612658e-05, "loss": 0.1104, "step": 1998 }, { "epoch": 0.33, "grad_norm": 1.4628300666809082, "learning_rate": 1.965449054742672e-05, "loss": 0.1229, "step": 1999 }, { "epoch": 0.33, "grad_norm": 1.1288169622421265, "learning_rate": 1.9654019833929955e-05, "loss": 0.1211, "step": 2000 }, { "epoch": 0.33, "grad_norm": 1.1709829568862915, "learning_rate": 1.9653548805651632e-05, "loss": 0.1294, "step": 2001 }, { "epoch": 0.33, "grad_norm": 0.9732480645179749, "learning_rate": 1.9653077462607112e-05, "loss": 0.1417, "step": 2002 }, { "epoch": 0.34, "grad_norm": 2.1327836513519287, "learning_rate": 1.9652605804811762e-05, "loss": 0.0917, "step": 2003 }, { "epoch": 0.34, "grad_norm": 0.9579270482063293, "learning_rate": 1.965213383228097e-05, "loss": 0.107, "step": 2004 }, { "epoch": 0.34, "grad_norm": 1.1189020872116089, "learning_rate": 1.965166154503011e-05, "loss": 0.0945, "step": 2005 }, { "epoch": 0.34, "grad_norm": 1.2246469259262085, "learning_rate": 1.9651188943074587e-05, "loss": 0.1325, "step": 2006 }, { "epoch": 0.34, "grad_norm": 1.5678728818893433, "learning_rate": 1.965071602642982e-05, "loss": 0.117, "step": 2007 }, { "epoch": 0.34, "grad_norm": 0.9484965205192566, "learning_rate": 1.9650242795111215e-05, "loss": 0.0992, "step": 2008 }, { "epoch": 0.34, "grad_norm": 1.1861975193023682, "learning_rate": 1.964976924913421e-05, "loss": 0.1204, "step": 2009 }, { "epoch": 0.34, "grad_norm": 0.9735077619552612, "learning_rate": 1.9649295388514248e-05, "loss": 0.104, "step": 2010 }, { "epoch": 0.34, "grad_norm": 1.0925554037094116, "learning_rate": 1.9648821213266773e-05, "loss": 0.0935, "step": 2011 }, { "epoch": 0.34, "grad_norm": 1.2001944780349731, "learning_rate": 1.964834672340725e-05, "loss": 0.1326, "step": 2012 }, { "epoch": 0.34, "grad_norm": 1.3949856758117676, "learning_rate": 1.9647871918951153e-05, "loss": 0.1251, "step": 2013 }, { "epoch": 0.34, "grad_norm": 1.0859055519104004, "learning_rate": 1.9647396799913958e-05, "loss": 0.1177, "step": 2014 }, { "epoch": 0.34, "grad_norm": 1.141603946685791, "learning_rate": 1.964692136631116e-05, "loss": 0.1089, "step": 2015 }, { "epoch": 0.34, "grad_norm": 1.1826282739639282, "learning_rate": 1.964644561815826e-05, "loss": 0.0942, "step": 2016 }, { "epoch": 0.34, "grad_norm": 1.2337404489517212, "learning_rate": 1.9645969555470773e-05, "loss": 0.1226, "step": 2017 }, { "epoch": 0.34, "grad_norm": 1.243415117263794, "learning_rate": 1.9645493178264217e-05, "loss": 0.1254, "step": 2018 }, { "epoch": 0.34, "grad_norm": 1.369320034980774, "learning_rate": 1.9645016486554125e-05, "loss": 0.1381, "step": 2019 }, { "epoch": 0.34, "grad_norm": 1.4074312448501587, "learning_rate": 1.9644539480356045e-05, "loss": 0.1487, "step": 2020 }, { "epoch": 0.34, "grad_norm": 1.0854495763778687, "learning_rate": 1.964406215968553e-05, "loss": 0.1112, "step": 2021 }, { "epoch": 0.34, "grad_norm": 0.9104245901107788, "learning_rate": 1.9643584524558142e-05, "loss": 0.0874, "step": 2022 }, { "epoch": 0.34, "grad_norm": 1.2726188898086548, "learning_rate": 1.964310657498945e-05, "loss": 0.1096, "step": 2023 }, { "epoch": 0.34, "grad_norm": 1.356805682182312, "learning_rate": 1.9642628310995045e-05, "loss": 0.1283, "step": 2024 }, { "epoch": 0.34, "grad_norm": 1.0650312900543213, "learning_rate": 1.964214973259052e-05, "loss": 0.1136, "step": 2025 }, { "epoch": 0.34, "grad_norm": 0.953580379486084, "learning_rate": 1.9641670839791476e-05, "loss": 0.0912, "step": 2026 }, { "epoch": 0.34, "grad_norm": 1.2374937534332275, "learning_rate": 1.9641191632613532e-05, "loss": 0.1142, "step": 2027 }, { "epoch": 0.34, "grad_norm": 1.2166337966918945, "learning_rate": 1.964071211107231e-05, "loss": 0.1508, "step": 2028 }, { "epoch": 0.34, "grad_norm": 1.2056522369384766, "learning_rate": 1.964023227518345e-05, "loss": 0.1127, "step": 2029 }, { "epoch": 0.34, "grad_norm": 1.2379037141799927, "learning_rate": 1.9639752124962594e-05, "loss": 0.1099, "step": 2030 }, { "epoch": 0.34, "grad_norm": 1.1155279874801636, "learning_rate": 1.96392716604254e-05, "loss": 0.1151, "step": 2031 }, { "epoch": 0.34, "grad_norm": 1.3675971031188965, "learning_rate": 1.963879088158753e-05, "loss": 0.1319, "step": 2032 }, { "epoch": 0.34, "grad_norm": 1.0785452127456665, "learning_rate": 1.9638309788464664e-05, "loss": 0.0906, "step": 2033 }, { "epoch": 0.34, "grad_norm": 1.1898788213729858, "learning_rate": 1.9637828381072486e-05, "loss": 0.1285, "step": 2034 }, { "epoch": 0.34, "grad_norm": 1.0850744247436523, "learning_rate": 1.9637346659426698e-05, "loss": 0.1268, "step": 2035 }, { "epoch": 0.34, "grad_norm": 1.2164703607559204, "learning_rate": 1.9636864623543002e-05, "loss": 0.0989, "step": 2036 }, { "epoch": 0.34, "grad_norm": 1.1405783891677856, "learning_rate": 1.9636382273437117e-05, "loss": 0.0922, "step": 2037 }, { "epoch": 0.34, "grad_norm": 1.1919119358062744, "learning_rate": 1.9635899609124774e-05, "loss": 0.1235, "step": 2038 }, { "epoch": 0.34, "grad_norm": 1.2287206649780273, "learning_rate": 1.9635416630621703e-05, "loss": 0.0973, "step": 2039 }, { "epoch": 0.34, "grad_norm": 1.321906566619873, "learning_rate": 1.9634933337943663e-05, "loss": 0.1111, "step": 2040 }, { "epoch": 0.34, "grad_norm": 1.016701579093933, "learning_rate": 1.9634449731106402e-05, "loss": 0.1024, "step": 2041 }, { "epoch": 0.34, "grad_norm": 1.5235384702682495, "learning_rate": 1.9633965810125692e-05, "loss": 0.1022, "step": 2042 }, { "epoch": 0.34, "grad_norm": 1.5038756132125854, "learning_rate": 1.9633481575017317e-05, "loss": 0.1181, "step": 2043 }, { "epoch": 0.34, "grad_norm": 1.2411993741989136, "learning_rate": 1.9632997025797058e-05, "loss": 0.1231, "step": 2044 }, { "epoch": 0.34, "grad_norm": 1.683681845664978, "learning_rate": 1.963251216248072e-05, "loss": 0.1155, "step": 2045 }, { "epoch": 0.34, "grad_norm": 1.2345123291015625, "learning_rate": 1.963202698508411e-05, "loss": 0.1367, "step": 2046 }, { "epoch": 0.34, "grad_norm": 1.1128519773483276, "learning_rate": 1.9631541493623048e-05, "loss": 0.0736, "step": 2047 }, { "epoch": 0.34, "grad_norm": 0.7306832671165466, "learning_rate": 1.9631055688113367e-05, "loss": 0.08, "step": 2048 }, { "epoch": 0.34, "grad_norm": 1.172102689743042, "learning_rate": 1.9630569568570906e-05, "loss": 0.0866, "step": 2049 }, { "epoch": 0.34, "grad_norm": 1.1180634498596191, "learning_rate": 1.9630083135011513e-05, "loss": 0.0948, "step": 2050 }, { "epoch": 0.34, "grad_norm": 1.0725582838058472, "learning_rate": 1.9629596387451046e-05, "loss": 0.1261, "step": 2051 }, { "epoch": 0.34, "grad_norm": 1.0219628810882568, "learning_rate": 1.9629109325905386e-05, "loss": 0.119, "step": 2052 }, { "epoch": 0.34, "grad_norm": 1.3679832220077515, "learning_rate": 1.9628621950390406e-05, "loss": 0.108, "step": 2053 }, { "epoch": 0.34, "grad_norm": 1.0331714153289795, "learning_rate": 1.9628134260922e-05, "loss": 0.0895, "step": 2054 }, { "epoch": 0.34, "grad_norm": 1.925977110862732, "learning_rate": 1.9627646257516072e-05, "loss": 0.1252, "step": 2055 }, { "epoch": 0.34, "grad_norm": 1.1148287057876587, "learning_rate": 1.9627157940188533e-05, "loss": 0.1272, "step": 2056 }, { "epoch": 0.34, "grad_norm": 1.1925843954086304, "learning_rate": 1.96266693089553e-05, "loss": 0.1267, "step": 2057 }, { "epoch": 0.34, "grad_norm": 1.2679094076156616, "learning_rate": 1.9626180363832313e-05, "loss": 0.1236, "step": 2058 }, { "epoch": 0.34, "grad_norm": 2.256523370742798, "learning_rate": 1.9625691104835506e-05, "loss": 0.1492, "step": 2059 }, { "epoch": 0.34, "grad_norm": 0.9886878132820129, "learning_rate": 1.962520153198084e-05, "loss": 0.1033, "step": 2060 }, { "epoch": 0.34, "grad_norm": 1.1153314113616943, "learning_rate": 1.9624711645284277e-05, "loss": 0.107, "step": 2061 }, { "epoch": 0.34, "grad_norm": 1.0765113830566406, "learning_rate": 1.9624221444761786e-05, "loss": 0.0996, "step": 2062 }, { "epoch": 0.35, "grad_norm": 1.5440738201141357, "learning_rate": 1.9623730930429353e-05, "loss": 0.0887, "step": 2063 }, { "epoch": 0.35, "grad_norm": 1.049265742301941, "learning_rate": 1.9623240102302974e-05, "loss": 0.1448, "step": 2064 }, { "epoch": 0.35, "grad_norm": 1.0542086362838745, "learning_rate": 1.962274896039865e-05, "loss": 0.1305, "step": 2065 }, { "epoch": 0.35, "grad_norm": 0.8736131191253662, "learning_rate": 1.9622257504732397e-05, "loss": 0.0872, "step": 2066 }, { "epoch": 0.35, "grad_norm": 1.1669251918792725, "learning_rate": 1.962176573532024e-05, "loss": 0.1202, "step": 2067 }, { "epoch": 0.35, "grad_norm": 1.137358546257019, "learning_rate": 1.9621273652178213e-05, "loss": 0.1136, "step": 2068 }, { "epoch": 0.35, "grad_norm": 1.0289885997772217, "learning_rate": 1.9620781255322356e-05, "loss": 0.1093, "step": 2069 }, { "epoch": 0.35, "grad_norm": 1.2452912330627441, "learning_rate": 1.962028854476873e-05, "loss": 0.1208, "step": 2070 }, { "epoch": 0.35, "grad_norm": 1.1162927150726318, "learning_rate": 1.9619795520533406e-05, "loss": 0.1052, "step": 2071 }, { "epoch": 0.35, "grad_norm": 1.2705496549606323, "learning_rate": 1.9619302182632446e-05, "loss": 0.1358, "step": 2072 }, { "epoch": 0.35, "grad_norm": 1.1348950862884521, "learning_rate": 1.9618808531081944e-05, "loss": 0.1174, "step": 2073 }, { "epoch": 0.35, "grad_norm": 1.073390007019043, "learning_rate": 1.9618314565898e-05, "loss": 0.1049, "step": 2074 }, { "epoch": 0.35, "grad_norm": 1.2151122093200684, "learning_rate": 1.961782028709671e-05, "loss": 0.125, "step": 2075 }, { "epoch": 0.35, "grad_norm": 1.1301274299621582, "learning_rate": 1.9617325694694198e-05, "loss": 0.0948, "step": 2076 }, { "epoch": 0.35, "grad_norm": 1.2371044158935547, "learning_rate": 1.9616830788706588e-05, "loss": 0.095, "step": 2077 }, { "epoch": 0.35, "grad_norm": 1.3248707056045532, "learning_rate": 1.961633556915002e-05, "loss": 0.1208, "step": 2078 }, { "epoch": 0.35, "grad_norm": 1.07843017578125, "learning_rate": 1.9615840036040638e-05, "loss": 0.1175, "step": 2079 }, { "epoch": 0.35, "grad_norm": 1.1182438135147095, "learning_rate": 1.9615344189394597e-05, "loss": 0.1015, "step": 2080 }, { "epoch": 0.35, "grad_norm": 0.9059363007545471, "learning_rate": 1.9614848029228073e-05, "loss": 0.0867, "step": 2081 }, { "epoch": 0.35, "grad_norm": 1.0871798992156982, "learning_rate": 1.9614351555557237e-05, "loss": 0.0978, "step": 2082 }, { "epoch": 0.35, "grad_norm": 1.0112237930297852, "learning_rate": 1.961385476839828e-05, "loss": 0.1035, "step": 2083 }, { "epoch": 0.35, "grad_norm": 0.9184685349464417, "learning_rate": 1.9613357667767403e-05, "loss": 0.1032, "step": 2084 }, { "epoch": 0.35, "grad_norm": 1.2637630701065063, "learning_rate": 1.9612860253680805e-05, "loss": 0.1419, "step": 2085 }, { "epoch": 0.35, "grad_norm": 1.210715889930725, "learning_rate": 1.9612362526154717e-05, "loss": 0.119, "step": 2086 }, { "epoch": 0.35, "grad_norm": 0.7622205018997192, "learning_rate": 1.961186448520536e-05, "loss": 0.0879, "step": 2087 }, { "epoch": 0.35, "grad_norm": 1.1183840036392212, "learning_rate": 1.9611366130848976e-05, "loss": 0.115, "step": 2088 }, { "epoch": 0.35, "grad_norm": 1.1652560234069824, "learning_rate": 1.961086746310181e-05, "loss": 0.1075, "step": 2089 }, { "epoch": 0.35, "grad_norm": 0.9462792277336121, "learning_rate": 1.9610368481980135e-05, "loss": 0.1074, "step": 2090 }, { "epoch": 0.35, "grad_norm": 1.0486289262771606, "learning_rate": 1.9609869187500205e-05, "loss": 0.0969, "step": 2091 }, { "epoch": 0.35, "grad_norm": 0.8360891938209534, "learning_rate": 1.9609369579678305e-05, "loss": 0.1135, "step": 2092 }, { "epoch": 0.35, "grad_norm": 1.3842848539352417, "learning_rate": 1.960886965853073e-05, "loss": 0.1066, "step": 2093 }, { "epoch": 0.35, "grad_norm": 1.0897009372711182, "learning_rate": 1.9608369424073776e-05, "loss": 0.1107, "step": 2094 }, { "epoch": 0.35, "grad_norm": 3.2533557415008545, "learning_rate": 1.9607868876323758e-05, "loss": 0.1277, "step": 2095 }, { "epoch": 0.35, "grad_norm": 1.0987801551818848, "learning_rate": 1.9607368015296992e-05, "loss": 0.1161, "step": 2096 }, { "epoch": 0.35, "grad_norm": 1.2091773748397827, "learning_rate": 1.9606866841009812e-05, "loss": 0.1239, "step": 2097 }, { "epoch": 0.35, "grad_norm": 1.0883655548095703, "learning_rate": 1.9606365353478562e-05, "loss": 0.1377, "step": 2098 }, { "epoch": 0.35, "grad_norm": 1.386387586593628, "learning_rate": 1.960586355271959e-05, "loss": 0.1209, "step": 2099 }, { "epoch": 0.35, "grad_norm": 1.240748405456543, "learning_rate": 1.9605361438749254e-05, "loss": 0.1066, "step": 2100 }, { "epoch": 0.35, "grad_norm": 1.2967637777328491, "learning_rate": 1.960485901158393e-05, "loss": 0.1181, "step": 2101 }, { "epoch": 0.35, "grad_norm": 1.055092215538025, "learning_rate": 1.960435627124001e-05, "loss": 0.1179, "step": 2102 }, { "epoch": 0.35, "grad_norm": 1.716212511062622, "learning_rate": 1.9603853217733866e-05, "loss": 0.1261, "step": 2103 }, { "epoch": 0.35, "grad_norm": 1.0415444374084473, "learning_rate": 1.960334985108192e-05, "loss": 0.1243, "step": 2104 }, { "epoch": 0.35, "grad_norm": 1.226013422012329, "learning_rate": 1.9602846171300576e-05, "loss": 0.1231, "step": 2105 }, { "epoch": 0.35, "grad_norm": 0.8732147812843323, "learning_rate": 1.9602342178406255e-05, "loss": 0.0959, "step": 2106 }, { "epoch": 0.35, "grad_norm": 1.3779844045639038, "learning_rate": 1.9601837872415398e-05, "loss": 0.1335, "step": 2107 }, { "epoch": 0.35, "grad_norm": 1.67241632938385, "learning_rate": 1.960133325334444e-05, "loss": 0.1347, "step": 2108 }, { "epoch": 0.35, "grad_norm": 1.0992887020111084, "learning_rate": 1.960082832120984e-05, "loss": 0.1088, "step": 2109 }, { "epoch": 0.35, "grad_norm": 1.198822021484375, "learning_rate": 1.960032307602806e-05, "loss": 0.1474, "step": 2110 }, { "epoch": 0.35, "grad_norm": 1.173412799835205, "learning_rate": 1.9599817517815576e-05, "loss": 0.1037, "step": 2111 }, { "epoch": 0.35, "grad_norm": 1.609863042831421, "learning_rate": 1.959931164658887e-05, "loss": 0.0968, "step": 2112 }, { "epoch": 0.35, "grad_norm": 1.5154502391815186, "learning_rate": 1.9598805462364437e-05, "loss": 0.1161, "step": 2113 }, { "epoch": 0.35, "grad_norm": 1.3978608846664429, "learning_rate": 1.9598298965158783e-05, "loss": 0.1261, "step": 2114 }, { "epoch": 0.35, "grad_norm": 1.4309911727905273, "learning_rate": 1.959779215498842e-05, "loss": 0.1057, "step": 2115 }, { "epoch": 0.35, "grad_norm": 1.2351510524749756, "learning_rate": 1.959728503186988e-05, "loss": 0.1161, "step": 2116 }, { "epoch": 0.35, "grad_norm": 1.2774361371994019, "learning_rate": 1.9596777595819694e-05, "loss": 0.0804, "step": 2117 }, { "epoch": 0.35, "grad_norm": 1.1887671947479248, "learning_rate": 1.9596269846854406e-05, "loss": 0.1287, "step": 2118 }, { "epoch": 0.35, "grad_norm": 1.159158706665039, "learning_rate": 1.9595761784990575e-05, "loss": 0.0976, "step": 2119 }, { "epoch": 0.35, "grad_norm": 0.992876410484314, "learning_rate": 1.9595253410244765e-05, "loss": 0.0934, "step": 2120 }, { "epoch": 0.35, "grad_norm": 1.1997721195220947, "learning_rate": 1.959474472263355e-05, "loss": 0.1389, "step": 2121 }, { "epoch": 0.35, "grad_norm": 1.2186237573623657, "learning_rate": 1.9594235722173523e-05, "loss": 0.1063, "step": 2122 }, { "epoch": 0.36, "grad_norm": 1.5118651390075684, "learning_rate": 1.9593726408881272e-05, "loss": 0.1393, "step": 2123 }, { "epoch": 0.36, "grad_norm": 1.0178844928741455, "learning_rate": 1.959321678277341e-05, "loss": 0.1163, "step": 2124 }, { "epoch": 0.36, "grad_norm": 1.1516952514648438, "learning_rate": 1.9592706843866555e-05, "loss": 0.1004, "step": 2125 }, { "epoch": 0.36, "grad_norm": 1.0502275228500366, "learning_rate": 1.959219659217733e-05, "loss": 0.1127, "step": 2126 }, { "epoch": 0.36, "grad_norm": 1.241463303565979, "learning_rate": 1.9591686027722378e-05, "loss": 0.0946, "step": 2127 }, { "epoch": 0.36, "grad_norm": 1.0927480459213257, "learning_rate": 1.959117515051834e-05, "loss": 0.1195, "step": 2128 }, { "epoch": 0.36, "grad_norm": 1.1973216533660889, "learning_rate": 1.9590663960581872e-05, "loss": 0.1212, "step": 2129 }, { "epoch": 0.36, "grad_norm": 1.0103996992111206, "learning_rate": 1.9590152457929653e-05, "loss": 0.1144, "step": 2130 }, { "epoch": 0.36, "grad_norm": 1.053412914276123, "learning_rate": 1.9589640642578353e-05, "loss": 0.1293, "step": 2131 }, { "epoch": 0.36, "grad_norm": 1.0239264965057373, "learning_rate": 1.9589128514544657e-05, "loss": 0.1046, "step": 2132 }, { "epoch": 0.36, "grad_norm": 1.0438114404678345, "learning_rate": 1.9588616073845275e-05, "loss": 0.1228, "step": 2133 }, { "epoch": 0.36, "grad_norm": 1.0050355195999146, "learning_rate": 1.9588103320496906e-05, "loss": 0.1157, "step": 2134 }, { "epoch": 0.36, "grad_norm": 1.2537990808486938, "learning_rate": 1.9587590254516275e-05, "loss": 0.1113, "step": 2135 }, { "epoch": 0.36, "grad_norm": 0.9951109886169434, "learning_rate": 1.9587076875920107e-05, "loss": 0.1138, "step": 2136 }, { "epoch": 0.36, "grad_norm": 0.9243948459625244, "learning_rate": 1.958656318472514e-05, "loss": 0.1109, "step": 2137 }, { "epoch": 0.36, "grad_norm": 0.909635066986084, "learning_rate": 1.958604918094813e-05, "loss": 0.0806, "step": 2138 }, { "epoch": 0.36, "grad_norm": 1.050925850868225, "learning_rate": 1.9585534864605832e-05, "loss": 0.0952, "step": 2139 }, { "epoch": 0.36, "grad_norm": 1.279081106185913, "learning_rate": 1.958502023571502e-05, "loss": 0.1405, "step": 2140 }, { "epoch": 0.36, "grad_norm": 0.8839218020439148, "learning_rate": 1.958450529429247e-05, "loss": 0.0963, "step": 2141 }, { "epoch": 0.36, "grad_norm": 1.0740448236465454, "learning_rate": 1.958399004035497e-05, "loss": 0.0954, "step": 2142 }, { "epoch": 0.36, "grad_norm": 1.220015048980713, "learning_rate": 1.9583474473919327e-05, "loss": 0.1157, "step": 2143 }, { "epoch": 0.36, "grad_norm": 1.267430067062378, "learning_rate": 1.958295859500235e-05, "loss": 0.106, "step": 2144 }, { "epoch": 0.36, "grad_norm": 1.0503172874450684, "learning_rate": 1.9582442403620854e-05, "loss": 0.1395, "step": 2145 }, { "epoch": 0.36, "grad_norm": 1.1239848136901855, "learning_rate": 1.958192589979168e-05, "loss": 0.1007, "step": 2146 }, { "epoch": 0.36, "grad_norm": 1.0694332122802734, "learning_rate": 1.958140908353166e-05, "loss": 0.1114, "step": 2147 }, { "epoch": 0.36, "grad_norm": 0.9657241106033325, "learning_rate": 1.9580891954857652e-05, "loss": 0.0964, "step": 2148 }, { "epoch": 0.36, "grad_norm": 1.0040407180786133, "learning_rate": 1.9580374513786515e-05, "loss": 0.1128, "step": 2149 }, { "epoch": 0.36, "grad_norm": 1.1259753704071045, "learning_rate": 1.9579856760335122e-05, "loss": 0.1207, "step": 2150 }, { "epoch": 0.36, "grad_norm": 1.5105680227279663, "learning_rate": 1.9579338694520353e-05, "loss": 0.1117, "step": 2151 }, { "epoch": 0.36, "grad_norm": 1.6309853792190552, "learning_rate": 1.9578820316359102e-05, "loss": 0.1317, "step": 2152 }, { "epoch": 0.36, "grad_norm": 1.529719591140747, "learning_rate": 1.957830162586827e-05, "loss": 0.112, "step": 2153 }, { "epoch": 0.36, "grad_norm": 0.9335795640945435, "learning_rate": 1.957778262306477e-05, "loss": 0.0908, "step": 2154 }, { "epoch": 0.36, "grad_norm": 2.2236180305480957, "learning_rate": 1.957726330796552e-05, "loss": 0.1071, "step": 2155 }, { "epoch": 0.36, "grad_norm": 1.4151924848556519, "learning_rate": 1.9576743680587466e-05, "loss": 0.1332, "step": 2156 }, { "epoch": 0.36, "grad_norm": 1.0435909032821655, "learning_rate": 1.9576223740947536e-05, "loss": 0.0991, "step": 2157 }, { "epoch": 0.36, "grad_norm": 1.1233123540878296, "learning_rate": 1.9575703489062693e-05, "loss": 0.093, "step": 2158 }, { "epoch": 0.36, "grad_norm": 1.3204401731491089, "learning_rate": 1.95751829249499e-05, "loss": 0.1572, "step": 2159 }, { "epoch": 0.36, "grad_norm": 1.4622570276260376, "learning_rate": 1.9574662048626125e-05, "loss": 0.1218, "step": 2160 }, { "epoch": 0.36, "grad_norm": 1.5188353061676025, "learning_rate": 1.9574140860108353e-05, "loss": 0.1058, "step": 2161 }, { "epoch": 0.36, "grad_norm": 0.9504758715629578, "learning_rate": 1.9573619359413586e-05, "loss": 0.1183, "step": 2162 }, { "epoch": 0.36, "grad_norm": 1.3154962062835693, "learning_rate": 1.957309754655882e-05, "loss": 0.0993, "step": 2163 }, { "epoch": 0.36, "grad_norm": 1.3034954071044922, "learning_rate": 1.9572575421561072e-05, "loss": 0.1313, "step": 2164 }, { "epoch": 0.36, "grad_norm": 0.9431872367858887, "learning_rate": 1.9572052984437365e-05, "loss": 0.1161, "step": 2165 }, { "epoch": 0.36, "grad_norm": 1.0681959390640259, "learning_rate": 1.9571530235204735e-05, "loss": 0.0937, "step": 2166 }, { "epoch": 0.36, "grad_norm": 1.6450920104980469, "learning_rate": 1.9571007173880226e-05, "loss": 0.1265, "step": 2167 }, { "epoch": 0.36, "grad_norm": 1.249108910560608, "learning_rate": 1.9570483800480896e-05, "loss": 0.1493, "step": 2168 }, { "epoch": 0.36, "grad_norm": 1.1527005434036255, "learning_rate": 1.9569960115023806e-05, "loss": 0.1103, "step": 2169 }, { "epoch": 0.36, "grad_norm": 1.2446154356002808, "learning_rate": 1.9569436117526036e-05, "loss": 0.1354, "step": 2170 }, { "epoch": 0.36, "grad_norm": 1.2318649291992188, "learning_rate": 1.956891180800467e-05, "loss": 0.1209, "step": 2171 }, { "epoch": 0.36, "grad_norm": 1.0126399993896484, "learning_rate": 1.9568387186476802e-05, "loss": 0.0857, "step": 2172 }, { "epoch": 0.36, "grad_norm": 1.063870906829834, "learning_rate": 1.9567862252959538e-05, "loss": 0.0985, "step": 2173 }, { "epoch": 0.36, "grad_norm": 1.445108413696289, "learning_rate": 1.9567337007469996e-05, "loss": 0.1123, "step": 2174 }, { "epoch": 0.36, "grad_norm": 1.0198897123336792, "learning_rate": 1.95668114500253e-05, "loss": 0.0865, "step": 2175 }, { "epoch": 0.36, "grad_norm": 1.8106632232666016, "learning_rate": 1.9566285580642588e-05, "loss": 0.1462, "step": 2176 }, { "epoch": 0.36, "grad_norm": 1.1015123128890991, "learning_rate": 1.9565759399339005e-05, "loss": 0.107, "step": 2177 }, { "epoch": 0.36, "grad_norm": 1.475669264793396, "learning_rate": 1.956523290613171e-05, "loss": 0.1363, "step": 2178 }, { "epoch": 0.36, "grad_norm": 1.1341620683670044, "learning_rate": 1.9564706101037873e-05, "loss": 0.1077, "step": 2179 }, { "epoch": 0.36, "grad_norm": 1.178935170173645, "learning_rate": 1.9564178984074663e-05, "loss": 0.1098, "step": 2180 }, { "epoch": 0.36, "grad_norm": 1.1112136840820312, "learning_rate": 1.9563651555259275e-05, "loss": 0.1308, "step": 2181 }, { "epoch": 0.36, "grad_norm": 1.010291576385498, "learning_rate": 1.95631238146089e-05, "loss": 0.1196, "step": 2182 }, { "epoch": 0.37, "grad_norm": 0.9423213005065918, "learning_rate": 1.9562595762140755e-05, "loss": 0.1049, "step": 2183 }, { "epoch": 0.37, "grad_norm": 1.0106379985809326, "learning_rate": 1.9562067397872043e-05, "loss": 0.1111, "step": 2184 }, { "epoch": 0.37, "grad_norm": 0.8130627870559692, "learning_rate": 1.9561538721820007e-05, "loss": 0.0881, "step": 2185 }, { "epoch": 0.37, "grad_norm": 0.9898910522460938, "learning_rate": 1.9561009734001878e-05, "loss": 0.0971, "step": 2186 }, { "epoch": 0.37, "grad_norm": 1.0413293838500977, "learning_rate": 1.9560480434434903e-05, "loss": 0.1096, "step": 2187 }, { "epoch": 0.37, "grad_norm": 1.273383617401123, "learning_rate": 1.9559950823136344e-05, "loss": 0.1295, "step": 2188 }, { "epoch": 0.37, "grad_norm": 1.0298296213150024, "learning_rate": 1.955942090012347e-05, "loss": 0.1065, "step": 2189 }, { "epoch": 0.37, "grad_norm": 0.9509395360946655, "learning_rate": 1.9558890665413555e-05, "loss": 0.1072, "step": 2190 }, { "epoch": 0.37, "grad_norm": 1.2234241962432861, "learning_rate": 1.955836011902389e-05, "loss": 0.1066, "step": 2191 }, { "epoch": 0.37, "grad_norm": 0.8983230590820312, "learning_rate": 1.955782926097178e-05, "loss": 0.0903, "step": 2192 }, { "epoch": 0.37, "grad_norm": 1.4087201356887817, "learning_rate": 1.9557298091274527e-05, "loss": 0.1086, "step": 2193 }, { "epoch": 0.37, "grad_norm": 0.8804011344909668, "learning_rate": 1.955676660994945e-05, "loss": 0.114, "step": 2194 }, { "epoch": 0.37, "grad_norm": 1.0531777143478394, "learning_rate": 1.9556234817013887e-05, "loss": 0.0985, "step": 2195 }, { "epoch": 0.37, "grad_norm": 1.1012578010559082, "learning_rate": 1.955570271248517e-05, "loss": 0.1151, "step": 2196 }, { "epoch": 0.37, "grad_norm": 1.1930532455444336, "learning_rate": 1.955517029638065e-05, "loss": 0.1219, "step": 2197 }, { "epoch": 0.37, "grad_norm": 1.2473663091659546, "learning_rate": 1.955463756871769e-05, "loss": 0.1031, "step": 2198 }, { "epoch": 0.37, "grad_norm": 0.9306249618530273, "learning_rate": 1.955410452951366e-05, "loss": 0.1142, "step": 2199 }, { "epoch": 0.37, "grad_norm": 0.9861418604850769, "learning_rate": 1.9553571178785934e-05, "loss": 0.1098, "step": 2200 }, { "epoch": 0.37, "grad_norm": 0.981347382068634, "learning_rate": 1.9553037516551915e-05, "loss": 0.112, "step": 2201 }, { "epoch": 0.37, "grad_norm": 0.8601561188697815, "learning_rate": 1.955250354282899e-05, "loss": 0.0919, "step": 2202 }, { "epoch": 0.37, "grad_norm": 1.2721556425094604, "learning_rate": 1.955196925763458e-05, "loss": 0.112, "step": 2203 }, { "epoch": 0.37, "grad_norm": 1.1079756021499634, "learning_rate": 1.95514346609861e-05, "loss": 0.1119, "step": 2204 }, { "epoch": 0.37, "grad_norm": 1.3945643901824951, "learning_rate": 1.9550899752900983e-05, "loss": 0.1251, "step": 2205 }, { "epoch": 0.37, "grad_norm": 0.9950612187385559, "learning_rate": 1.9550364533396674e-05, "loss": 0.085, "step": 2206 }, { "epoch": 0.37, "grad_norm": 1.1719012260437012, "learning_rate": 1.954982900249062e-05, "loss": 0.1137, "step": 2207 }, { "epoch": 0.37, "grad_norm": 1.239694356918335, "learning_rate": 1.9549293160200283e-05, "loss": 0.1122, "step": 2208 }, { "epoch": 0.37, "grad_norm": 1.0892512798309326, "learning_rate": 1.9548757006543137e-05, "loss": 0.1043, "step": 2209 }, { "epoch": 0.37, "grad_norm": 0.9515162706375122, "learning_rate": 1.9548220541536662e-05, "loss": 0.1028, "step": 2210 }, { "epoch": 0.37, "grad_norm": 1.1555581092834473, "learning_rate": 1.954768376519835e-05, "loss": 0.1047, "step": 2211 }, { "epoch": 0.37, "grad_norm": 0.9956529140472412, "learning_rate": 1.9547146677545707e-05, "loss": 0.0994, "step": 2212 }, { "epoch": 0.37, "grad_norm": 1.195066213607788, "learning_rate": 1.9546609278596243e-05, "loss": 0.1036, "step": 2213 }, { "epoch": 0.37, "grad_norm": 0.9287089109420776, "learning_rate": 1.9546071568367476e-05, "loss": 0.0912, "step": 2214 }, { "epoch": 0.37, "grad_norm": 1.2118170261383057, "learning_rate": 1.9545533546876947e-05, "loss": 0.0962, "step": 2215 }, { "epoch": 0.37, "grad_norm": 1.0912235975265503, "learning_rate": 1.9544995214142194e-05, "loss": 0.1026, "step": 2216 }, { "epoch": 0.37, "grad_norm": 1.0469869375228882, "learning_rate": 1.9544456570180773e-05, "loss": 0.0816, "step": 2217 }, { "epoch": 0.37, "grad_norm": 1.0599253177642822, "learning_rate": 1.954391761501024e-05, "loss": 0.0928, "step": 2218 }, { "epoch": 0.37, "grad_norm": 1.3527055978775024, "learning_rate": 1.9543378348648176e-05, "loss": 0.1173, "step": 2219 }, { "epoch": 0.37, "grad_norm": 0.9453797340393066, "learning_rate": 1.954283877111216e-05, "loss": 0.0957, "step": 2220 }, { "epoch": 0.37, "grad_norm": 1.1252586841583252, "learning_rate": 1.954229888241979e-05, "loss": 0.1211, "step": 2221 }, { "epoch": 0.37, "grad_norm": 1.26348078250885, "learning_rate": 1.9541758682588668e-05, "loss": 0.1079, "step": 2222 }, { "epoch": 0.37, "grad_norm": 1.252737283706665, "learning_rate": 1.9541218171636405e-05, "loss": 0.1412, "step": 2223 }, { "epoch": 0.37, "grad_norm": 1.1548649072647095, "learning_rate": 1.954067734958063e-05, "loss": 0.1205, "step": 2224 }, { "epoch": 0.37, "grad_norm": 1.0459412336349487, "learning_rate": 1.9540136216438966e-05, "loss": 0.1375, "step": 2225 }, { "epoch": 0.37, "grad_norm": 0.9592022895812988, "learning_rate": 1.9539594772229074e-05, "loss": 0.0926, "step": 2226 }, { "epoch": 0.37, "grad_norm": 1.084649920463562, "learning_rate": 1.95390530169686e-05, "loss": 0.1184, "step": 2227 }, { "epoch": 0.37, "grad_norm": 1.194392204284668, "learning_rate": 1.9538510950675204e-05, "loss": 0.0928, "step": 2228 }, { "epoch": 0.37, "grad_norm": 1.6085373163223267, "learning_rate": 1.9537968573366567e-05, "loss": 0.1476, "step": 2229 }, { "epoch": 0.37, "grad_norm": 1.604964017868042, "learning_rate": 1.9537425885060375e-05, "loss": 0.1452, "step": 2230 }, { "epoch": 0.37, "grad_norm": 1.4375747442245483, "learning_rate": 1.9536882885774317e-05, "loss": 0.1109, "step": 2231 }, { "epoch": 0.37, "grad_norm": 1.078608751296997, "learning_rate": 1.9536339575526105e-05, "loss": 0.101, "step": 2232 }, { "epoch": 0.37, "grad_norm": 1.0903905630111694, "learning_rate": 1.953579595433345e-05, "loss": 0.1049, "step": 2233 }, { "epoch": 0.37, "grad_norm": 0.9881641268730164, "learning_rate": 1.9535252022214074e-05, "loss": 0.104, "step": 2234 }, { "epoch": 0.37, "grad_norm": 1.7423349618911743, "learning_rate": 1.9534707779185724e-05, "loss": 0.141, "step": 2235 }, { "epoch": 0.37, "grad_norm": 1.0314584970474243, "learning_rate": 1.9534163225266134e-05, "loss": 0.1154, "step": 2236 }, { "epoch": 0.37, "grad_norm": 0.942112147808075, "learning_rate": 1.9533618360473066e-05, "loss": 0.1046, "step": 2237 }, { "epoch": 0.37, "grad_norm": 1.3321887254714966, "learning_rate": 1.9533073184824284e-05, "loss": 0.1041, "step": 2238 }, { "epoch": 0.37, "grad_norm": 1.143500804901123, "learning_rate": 1.9532527698337564e-05, "loss": 0.1036, "step": 2239 }, { "epoch": 0.37, "grad_norm": 1.1114819049835205, "learning_rate": 1.9531981901030695e-05, "loss": 0.0792, "step": 2240 }, { "epoch": 0.37, "grad_norm": 1.0909775495529175, "learning_rate": 1.953143579292147e-05, "loss": 0.0867, "step": 2241 }, { "epoch": 0.37, "grad_norm": 1.5350240468978882, "learning_rate": 1.95308893740277e-05, "loss": 0.1401, "step": 2242 }, { "epoch": 0.38, "grad_norm": 1.0292288064956665, "learning_rate": 1.9530342644367196e-05, "loss": 0.115, "step": 2243 }, { "epoch": 0.38, "grad_norm": 1.2829030752182007, "learning_rate": 1.952979560395779e-05, "loss": 0.1202, "step": 2244 }, { "epoch": 0.38, "grad_norm": 1.2003440856933594, "learning_rate": 1.9529248252817313e-05, "loss": 0.1059, "step": 2245 }, { "epoch": 0.38, "grad_norm": 1.2013804912567139, "learning_rate": 1.9528700590963617e-05, "loss": 0.1109, "step": 2246 }, { "epoch": 0.38, "grad_norm": 0.9767107367515564, "learning_rate": 1.9528152618414556e-05, "loss": 0.0962, "step": 2247 }, { "epoch": 0.38, "grad_norm": 1.195831537246704, "learning_rate": 1.9527604335188003e-05, "loss": 0.1215, "step": 2248 }, { "epoch": 0.38, "grad_norm": 0.9746685028076172, "learning_rate": 1.9527055741301824e-05, "loss": 0.0881, "step": 2249 }, { "epoch": 0.38, "grad_norm": 1.0049817562103271, "learning_rate": 1.952650683677392e-05, "loss": 0.1198, "step": 2250 }, { "epoch": 0.38, "grad_norm": 1.1971089839935303, "learning_rate": 1.9525957621622184e-05, "loss": 0.1204, "step": 2251 }, { "epoch": 0.38, "grad_norm": 1.28564453125, "learning_rate": 1.9525408095864522e-05, "loss": 0.1014, "step": 2252 }, { "epoch": 0.38, "grad_norm": 1.113180160522461, "learning_rate": 1.9524858259518852e-05, "loss": 0.1032, "step": 2253 }, { "epoch": 0.38, "grad_norm": 1.0751804113388062, "learning_rate": 1.95243081126031e-05, "loss": 0.1132, "step": 2254 }, { "epoch": 0.38, "grad_norm": 0.8551537394523621, "learning_rate": 1.952375765513521e-05, "loss": 0.0897, "step": 2255 }, { "epoch": 0.38, "grad_norm": 0.923078715801239, "learning_rate": 1.9523206887133126e-05, "loss": 0.1102, "step": 2256 }, { "epoch": 0.38, "grad_norm": 1.5076959133148193, "learning_rate": 1.952265580861481e-05, "loss": 0.1075, "step": 2257 }, { "epoch": 0.38, "grad_norm": 0.9017732739448547, "learning_rate": 1.9522104419598226e-05, "loss": 0.0979, "step": 2258 }, { "epoch": 0.38, "grad_norm": 1.1716943979263306, "learning_rate": 1.952155272010136e-05, "loss": 0.1052, "step": 2259 }, { "epoch": 0.38, "grad_norm": 1.3723772764205933, "learning_rate": 1.952100071014219e-05, "loss": 0.1264, "step": 2260 }, { "epoch": 0.38, "grad_norm": 0.9346956014633179, "learning_rate": 1.9520448389738722e-05, "loss": 0.0999, "step": 2261 }, { "epoch": 0.38, "grad_norm": 1.5559451580047607, "learning_rate": 1.9519895758908966e-05, "loss": 0.1218, "step": 2262 }, { "epoch": 0.38, "grad_norm": 1.3246843814849854, "learning_rate": 1.951934281767094e-05, "loss": 0.109, "step": 2263 }, { "epoch": 0.38, "grad_norm": 1.2526553869247437, "learning_rate": 1.9518789566042673e-05, "loss": 0.1207, "step": 2264 }, { "epoch": 0.38, "grad_norm": 1.6102280616760254, "learning_rate": 1.95182360040422e-05, "loss": 0.1291, "step": 2265 }, { "epoch": 0.38, "grad_norm": 1.3087422847747803, "learning_rate": 1.9517682131687577e-05, "loss": 0.1047, "step": 2266 }, { "epoch": 0.38, "grad_norm": 1.3514022827148438, "learning_rate": 1.9517127948996866e-05, "loss": 0.109, "step": 2267 }, { "epoch": 0.38, "grad_norm": 1.179494023323059, "learning_rate": 1.9516573455988127e-05, "loss": 0.1082, "step": 2268 }, { "epoch": 0.38, "grad_norm": 1.208091139793396, "learning_rate": 1.951601865267945e-05, "loss": 0.111, "step": 2269 }, { "epoch": 0.38, "grad_norm": 1.1960140466690063, "learning_rate": 1.9515463539088917e-05, "loss": 0.1307, "step": 2270 }, { "epoch": 0.38, "grad_norm": 1.198843240737915, "learning_rate": 1.951490811523463e-05, "loss": 0.1023, "step": 2271 }, { "epoch": 0.38, "grad_norm": 1.1652371883392334, "learning_rate": 1.9514352381134705e-05, "loss": 0.1217, "step": 2272 }, { "epoch": 0.38, "grad_norm": 1.0056638717651367, "learning_rate": 1.9513796336807254e-05, "loss": 0.0985, "step": 2273 }, { "epoch": 0.38, "grad_norm": 1.4640936851501465, "learning_rate": 1.9513239982270415e-05, "loss": 0.1288, "step": 2274 }, { "epoch": 0.38, "grad_norm": 1.1409715414047241, "learning_rate": 1.9512683317542325e-05, "loss": 0.1189, "step": 2275 }, { "epoch": 0.38, "grad_norm": 1.0092005729675293, "learning_rate": 1.9512126342641138e-05, "loss": 0.1301, "step": 2276 }, { "epoch": 0.38, "grad_norm": 1.2657707929611206, "learning_rate": 1.9511569057585006e-05, "loss": 0.1299, "step": 2277 }, { "epoch": 0.38, "grad_norm": 1.4821230173110962, "learning_rate": 1.951101146239211e-05, "loss": 0.1083, "step": 2278 }, { "epoch": 0.38, "grad_norm": 0.9663313031196594, "learning_rate": 1.9510453557080627e-05, "loss": 0.1109, "step": 2279 }, { "epoch": 0.38, "grad_norm": 1.1661566495895386, "learning_rate": 1.9509895341668748e-05, "loss": 0.1322, "step": 2280 }, { "epoch": 0.38, "grad_norm": 1.0621907711029053, "learning_rate": 1.9509336816174675e-05, "loss": 0.1284, "step": 2281 }, { "epoch": 0.38, "grad_norm": 1.3349627256393433, "learning_rate": 1.950877798061662e-05, "loss": 0.1418, "step": 2282 }, { "epoch": 0.38, "grad_norm": 0.9515792727470398, "learning_rate": 1.95082188350128e-05, "loss": 0.1005, "step": 2283 }, { "epoch": 0.38, "grad_norm": 1.2717108726501465, "learning_rate": 1.9507659379381453e-05, "loss": 0.1069, "step": 2284 }, { "epoch": 0.38, "grad_norm": 0.9668344259262085, "learning_rate": 1.9507099613740818e-05, "loss": 0.0988, "step": 2285 }, { "epoch": 0.38, "grad_norm": 1.1814205646514893, "learning_rate": 1.9506539538109142e-05, "loss": 0.1352, "step": 2286 }, { "epoch": 0.38, "grad_norm": 1.723777413368225, "learning_rate": 1.95059791525047e-05, "loss": 0.1279, "step": 2287 }, { "epoch": 0.38, "grad_norm": 1.3418586254119873, "learning_rate": 1.9505418456945754e-05, "loss": 0.1314, "step": 2288 }, { "epoch": 0.38, "grad_norm": 1.3996977806091309, "learning_rate": 1.9504857451450586e-05, "loss": 0.1458, "step": 2289 }, { "epoch": 0.38, "grad_norm": 1.1941859722137451, "learning_rate": 1.950429613603749e-05, "loss": 0.1356, "step": 2290 }, { "epoch": 0.38, "grad_norm": 1.0145604610443115, "learning_rate": 1.950373451072477e-05, "loss": 0.124, "step": 2291 }, { "epoch": 0.38, "grad_norm": 1.1232752799987793, "learning_rate": 1.950317257553074e-05, "loss": 0.107, "step": 2292 }, { "epoch": 0.38, "grad_norm": 1.3707982301712036, "learning_rate": 1.9502610330473715e-05, "loss": 0.1367, "step": 2293 }, { "epoch": 0.38, "grad_norm": 1.083462119102478, "learning_rate": 1.9502047775572036e-05, "loss": 0.0939, "step": 2294 }, { "epoch": 0.38, "grad_norm": 1.097699522972107, "learning_rate": 1.950148491084404e-05, "loss": 0.1179, "step": 2295 }, { "epoch": 0.38, "grad_norm": 1.0450434684753418, "learning_rate": 1.9500921736308088e-05, "loss": 0.0838, "step": 2296 }, { "epoch": 0.38, "grad_norm": 0.9231737852096558, "learning_rate": 1.9500358251982533e-05, "loss": 0.118, "step": 2297 }, { "epoch": 0.38, "grad_norm": 1.007659673690796, "learning_rate": 1.9499794457885753e-05, "loss": 0.0838, "step": 2298 }, { "epoch": 0.38, "grad_norm": 0.8735063672065735, "learning_rate": 1.9499230354036133e-05, "loss": 0.101, "step": 2299 }, { "epoch": 0.38, "grad_norm": 0.9021313190460205, "learning_rate": 1.9498665940452063e-05, "loss": 0.0987, "step": 2300 }, { "epoch": 0.38, "grad_norm": 0.9397376179695129, "learning_rate": 1.9498101217151944e-05, "loss": 0.1021, "step": 2301 }, { "epoch": 0.39, "grad_norm": 1.1290913820266724, "learning_rate": 1.9497536184154198e-05, "loss": 0.1245, "step": 2302 }, { "epoch": 0.39, "grad_norm": 1.096154808998108, "learning_rate": 1.9496970841477242e-05, "loss": 0.1139, "step": 2303 }, { "epoch": 0.39, "grad_norm": 1.223215103149414, "learning_rate": 1.9496405189139512e-05, "loss": 0.1023, "step": 2304 }, { "epoch": 0.39, "grad_norm": 1.1627881526947021, "learning_rate": 1.9495839227159448e-05, "loss": 0.0963, "step": 2305 }, { "epoch": 0.39, "grad_norm": 1.1742075681686401, "learning_rate": 1.949527295555551e-05, "loss": 0.1298, "step": 2306 }, { "epoch": 0.39, "grad_norm": 1.2059510946273804, "learning_rate": 1.9494706374346158e-05, "loss": 0.1406, "step": 2307 }, { "epoch": 0.39, "grad_norm": 1.178207278251648, "learning_rate": 1.949413948354987e-05, "loss": 0.1175, "step": 2308 }, { "epoch": 0.39, "grad_norm": 1.352957844734192, "learning_rate": 1.9493572283185126e-05, "loss": 0.1448, "step": 2309 }, { "epoch": 0.39, "grad_norm": 1.0914967060089111, "learning_rate": 1.949300477327042e-05, "loss": 0.1153, "step": 2310 }, { "epoch": 0.39, "grad_norm": 0.9186986684799194, "learning_rate": 1.9492436953824258e-05, "loss": 0.1029, "step": 2311 }, { "epoch": 0.39, "grad_norm": 0.9614089727401733, "learning_rate": 1.9491868824865158e-05, "loss": 0.0913, "step": 2312 }, { "epoch": 0.39, "grad_norm": 0.9275227785110474, "learning_rate": 1.9491300386411637e-05, "loss": 0.1076, "step": 2313 }, { "epoch": 0.39, "grad_norm": 1.1457384824752808, "learning_rate": 1.9490731638482236e-05, "loss": 0.1287, "step": 2314 }, { "epoch": 0.39, "grad_norm": 1.1695383787155151, "learning_rate": 1.9490162581095496e-05, "loss": 0.1031, "step": 2315 }, { "epoch": 0.39, "grad_norm": 0.871924877166748, "learning_rate": 1.9489593214269976e-05, "loss": 0.1172, "step": 2316 }, { "epoch": 0.39, "grad_norm": 0.972500205039978, "learning_rate": 1.9489023538024238e-05, "loss": 0.1189, "step": 2317 }, { "epoch": 0.39, "grad_norm": 0.9562506079673767, "learning_rate": 1.9488453552376856e-05, "loss": 0.1113, "step": 2318 }, { "epoch": 0.39, "grad_norm": 0.9464185833930969, "learning_rate": 1.9487883257346414e-05, "loss": 0.1139, "step": 2319 }, { "epoch": 0.39, "grad_norm": 0.9131965637207031, "learning_rate": 1.9487312652951514e-05, "loss": 0.0932, "step": 2320 }, { "epoch": 0.39, "grad_norm": 0.966636061668396, "learning_rate": 1.948674173921076e-05, "loss": 0.0928, "step": 2321 }, { "epoch": 0.39, "grad_norm": 0.9823619723320007, "learning_rate": 1.9486170516142756e-05, "loss": 0.0979, "step": 2322 }, { "epoch": 0.39, "grad_norm": 0.9364995360374451, "learning_rate": 1.9485598983766142e-05, "loss": 0.1153, "step": 2323 }, { "epoch": 0.39, "grad_norm": 1.2613306045532227, "learning_rate": 1.9485027142099543e-05, "loss": 0.1222, "step": 2324 }, { "epoch": 0.39, "grad_norm": 1.2345689535140991, "learning_rate": 1.948445499116161e-05, "loss": 0.0879, "step": 2325 }, { "epoch": 0.39, "grad_norm": 0.9968721866607666, "learning_rate": 1.9483882530970998e-05, "loss": 0.0984, "step": 2326 }, { "epoch": 0.39, "grad_norm": 1.4659111499786377, "learning_rate": 1.9483309761546372e-05, "loss": 0.1049, "step": 2327 }, { "epoch": 0.39, "grad_norm": 0.8074511885643005, "learning_rate": 1.948273668290641e-05, "loss": 0.0923, "step": 2328 }, { "epoch": 0.39, "grad_norm": 0.8892139196395874, "learning_rate": 1.9482163295069795e-05, "loss": 0.1149, "step": 2329 }, { "epoch": 0.39, "grad_norm": 1.2292660474777222, "learning_rate": 1.9481589598055225e-05, "loss": 0.0971, "step": 2330 }, { "epoch": 0.39, "grad_norm": 1.0413563251495361, "learning_rate": 1.9481015591881402e-05, "loss": 0.1112, "step": 2331 }, { "epoch": 0.39, "grad_norm": 1.227150559425354, "learning_rate": 1.9480441276567048e-05, "loss": 0.1039, "step": 2332 }, { "epoch": 0.39, "grad_norm": 1.2848073244094849, "learning_rate": 1.9479866652130886e-05, "loss": 0.1129, "step": 2333 }, { "epoch": 0.39, "grad_norm": 1.0456105470657349, "learning_rate": 1.9479291718591657e-05, "loss": 0.0986, "step": 2334 }, { "epoch": 0.39, "grad_norm": 1.1782320737838745, "learning_rate": 1.94787164759681e-05, "loss": 0.1142, "step": 2335 }, { "epoch": 0.39, "grad_norm": 1.012959361076355, "learning_rate": 1.9478140924278976e-05, "loss": 0.115, "step": 2336 }, { "epoch": 0.39, "grad_norm": 1.0786932706832886, "learning_rate": 1.947756506354305e-05, "loss": 0.0788, "step": 2337 }, { "epoch": 0.39, "grad_norm": 1.3649516105651855, "learning_rate": 1.94769888937791e-05, "loss": 0.1346, "step": 2338 }, { "epoch": 0.39, "grad_norm": 0.9381360411643982, "learning_rate": 1.947641241500591e-05, "loss": 0.1185, "step": 2339 }, { "epoch": 0.39, "grad_norm": 1.0576369762420654, "learning_rate": 1.9475835627242287e-05, "loss": 0.1308, "step": 2340 }, { "epoch": 0.39, "grad_norm": 0.9286512136459351, "learning_rate": 1.9475258530507023e-05, "loss": 0.0928, "step": 2341 }, { "epoch": 0.39, "grad_norm": 0.9009131789207458, "learning_rate": 1.947468112481894e-05, "loss": 0.0927, "step": 2342 }, { "epoch": 0.39, "grad_norm": 1.1992948055267334, "learning_rate": 1.9474103410196872e-05, "loss": 0.1017, "step": 2343 }, { "epoch": 0.39, "grad_norm": 0.9999916553497314, "learning_rate": 1.947352538665965e-05, "loss": 0.1269, "step": 2344 }, { "epoch": 0.39, "grad_norm": 1.1086565256118774, "learning_rate": 1.9472947054226117e-05, "loss": 0.1154, "step": 2345 }, { "epoch": 0.39, "grad_norm": 0.9665881395339966, "learning_rate": 1.947236841291514e-05, "loss": 0.0991, "step": 2346 }, { "epoch": 0.39, "grad_norm": 1.0285240411758423, "learning_rate": 1.9471789462745584e-05, "loss": 0.1158, "step": 2347 }, { "epoch": 0.39, "grad_norm": 1.0331064462661743, "learning_rate": 1.9471210203736318e-05, "loss": 0.1265, "step": 2348 }, { "epoch": 0.39, "grad_norm": 1.0835005044937134, "learning_rate": 1.9470630635906243e-05, "loss": 0.1117, "step": 2349 }, { "epoch": 0.39, "grad_norm": 0.954266369342804, "learning_rate": 1.9470050759274244e-05, "loss": 0.0886, "step": 2350 }, { "epoch": 0.39, "grad_norm": 1.0058796405792236, "learning_rate": 1.9469470573859237e-05, "loss": 0.1097, "step": 2351 }, { "epoch": 0.39, "grad_norm": 1.2354040145874023, "learning_rate": 1.9468890079680134e-05, "loss": 0.1178, "step": 2352 }, { "epoch": 0.39, "grad_norm": 1.05037522315979, "learning_rate": 1.9468309276755865e-05, "loss": 0.112, "step": 2353 }, { "epoch": 0.39, "grad_norm": 1.100658655166626, "learning_rate": 1.946772816510537e-05, "loss": 0.106, "step": 2354 }, { "epoch": 0.39, "grad_norm": 1.0976718664169312, "learning_rate": 1.9467146744747594e-05, "loss": 0.1102, "step": 2355 }, { "epoch": 0.39, "grad_norm": 1.1870027780532837, "learning_rate": 1.9466565015701496e-05, "loss": 0.1178, "step": 2356 }, { "epoch": 0.39, "grad_norm": 0.9821544885635376, "learning_rate": 1.9465982977986044e-05, "loss": 0.1093, "step": 2357 }, { "epoch": 0.39, "grad_norm": 0.932974100112915, "learning_rate": 1.9465400631620216e-05, "loss": 0.0978, "step": 2358 }, { "epoch": 0.39, "grad_norm": 1.0429037809371948, "learning_rate": 1.9464817976623e-05, "loss": 0.1269, "step": 2359 }, { "epoch": 0.39, "grad_norm": 1.1596497297286987, "learning_rate": 1.9464235013013396e-05, "loss": 0.1051, "step": 2360 }, { "epoch": 0.39, "grad_norm": 1.228810429573059, "learning_rate": 1.946365174081041e-05, "loss": 0.1317, "step": 2361 }, { "epoch": 0.4, "grad_norm": 1.1782370805740356, "learning_rate": 1.9463068160033058e-05, "loss": 0.0984, "step": 2362 }, { "epoch": 0.4, "grad_norm": 1.2419679164886475, "learning_rate": 1.9462484270700375e-05, "loss": 0.1198, "step": 2363 }, { "epoch": 0.4, "grad_norm": 0.9205508828163147, "learning_rate": 1.9461900072831394e-05, "loss": 0.0944, "step": 2364 }, { "epoch": 0.4, "grad_norm": 0.9378589391708374, "learning_rate": 1.9461315566445163e-05, "loss": 0.0887, "step": 2365 }, { "epoch": 0.4, "grad_norm": 0.966231107711792, "learning_rate": 1.9460730751560747e-05, "loss": 0.0828, "step": 2366 }, { "epoch": 0.4, "grad_norm": 0.8629338145256042, "learning_rate": 1.9460145628197208e-05, "loss": 0.0878, "step": 2367 }, { "epoch": 0.4, "grad_norm": 1.102302074432373, "learning_rate": 1.9459560196373628e-05, "loss": 0.0996, "step": 2368 }, { "epoch": 0.4, "grad_norm": 1.6175767183303833, "learning_rate": 1.9458974456109095e-05, "loss": 0.1311, "step": 2369 }, { "epoch": 0.4, "grad_norm": 1.3180440664291382, "learning_rate": 1.945838840742271e-05, "loss": 0.1276, "step": 2370 }, { "epoch": 0.4, "grad_norm": 1.086190938949585, "learning_rate": 1.9457802050333575e-05, "loss": 0.0971, "step": 2371 }, { "epoch": 0.4, "grad_norm": 0.7101432085037231, "learning_rate": 1.9457215384860816e-05, "loss": 0.0824, "step": 2372 }, { "epoch": 0.4, "grad_norm": 1.1409294605255127, "learning_rate": 1.945662841102356e-05, "loss": 0.0913, "step": 2373 }, { "epoch": 0.4, "grad_norm": 0.8920586705207825, "learning_rate": 1.9456041128840942e-05, "loss": 0.0946, "step": 2374 }, { "epoch": 0.4, "grad_norm": 1.1295446157455444, "learning_rate": 1.945545353833212e-05, "loss": 0.1099, "step": 2375 }, { "epoch": 0.4, "grad_norm": 1.2273857593536377, "learning_rate": 1.9454865639516245e-05, "loss": 0.0983, "step": 2376 }, { "epoch": 0.4, "grad_norm": 1.996766209602356, "learning_rate": 1.9454277432412486e-05, "loss": 0.1039, "step": 2377 }, { "epoch": 0.4, "grad_norm": 1.0606064796447754, "learning_rate": 1.945368891704003e-05, "loss": 0.0909, "step": 2378 }, { "epoch": 0.4, "grad_norm": 1.0753746032714844, "learning_rate": 1.945310009341806e-05, "loss": 0.0926, "step": 2379 }, { "epoch": 0.4, "grad_norm": 1.075494647026062, "learning_rate": 1.9452510961565776e-05, "loss": 0.1036, "step": 2380 }, { "epoch": 0.4, "grad_norm": 1.331460952758789, "learning_rate": 1.9451921521502387e-05, "loss": 0.1022, "step": 2381 }, { "epoch": 0.4, "grad_norm": 1.417811632156372, "learning_rate": 1.9451331773247117e-05, "loss": 0.1181, "step": 2382 }, { "epoch": 0.4, "grad_norm": 0.877776563167572, "learning_rate": 1.945074171681919e-05, "loss": 0.1115, "step": 2383 }, { "epoch": 0.4, "grad_norm": 1.1935198307037354, "learning_rate": 1.945015135223785e-05, "loss": 0.0993, "step": 2384 }, { "epoch": 0.4, "grad_norm": 1.1247754096984863, "learning_rate": 1.944956067952234e-05, "loss": 0.1265, "step": 2385 }, { "epoch": 0.4, "grad_norm": 1.293923020362854, "learning_rate": 1.9448969698691926e-05, "loss": 0.1476, "step": 2386 }, { "epoch": 0.4, "grad_norm": 0.8268091678619385, "learning_rate": 1.9448378409765874e-05, "loss": 0.0723, "step": 2387 }, { "epoch": 0.4, "grad_norm": 1.3867753744125366, "learning_rate": 1.944778681276347e-05, "loss": 0.1028, "step": 2388 }, { "epoch": 0.4, "grad_norm": 0.8849698901176453, "learning_rate": 1.9447194907703996e-05, "loss": 0.1053, "step": 2389 }, { "epoch": 0.4, "grad_norm": 1.0875263214111328, "learning_rate": 1.9446602694606753e-05, "loss": 0.1153, "step": 2390 }, { "epoch": 0.4, "grad_norm": 0.9639552235603333, "learning_rate": 1.9446010173491054e-05, "loss": 0.1129, "step": 2391 }, { "epoch": 0.4, "grad_norm": 1.2718966007232666, "learning_rate": 1.944541734437622e-05, "loss": 0.1103, "step": 2392 }, { "epoch": 0.4, "grad_norm": 0.9688811302185059, "learning_rate": 1.9444824207281576e-05, "loss": 0.1062, "step": 2393 }, { "epoch": 0.4, "grad_norm": 0.9044179916381836, "learning_rate": 1.9444230762226465e-05, "loss": 0.0704, "step": 2394 }, { "epoch": 0.4, "grad_norm": 1.2274914979934692, "learning_rate": 1.9443637009230236e-05, "loss": 0.1236, "step": 2395 }, { "epoch": 0.4, "grad_norm": 1.0776029825210571, "learning_rate": 1.944304294831225e-05, "loss": 0.1152, "step": 2396 }, { "epoch": 0.4, "grad_norm": 1.0376290082931519, "learning_rate": 1.944244857949188e-05, "loss": 0.0969, "step": 2397 }, { "epoch": 0.4, "grad_norm": 0.8880900740623474, "learning_rate": 1.94418539027885e-05, "loss": 0.1075, "step": 2398 }, { "epoch": 0.4, "grad_norm": 0.8771215081214905, "learning_rate": 1.94412589182215e-05, "loss": 0.099, "step": 2399 }, { "epoch": 0.4, "grad_norm": 0.9528613090515137, "learning_rate": 1.9440663625810287e-05, "loss": 0.1031, "step": 2400 }, { "epoch": 0.4, "grad_norm": 0.7875416278839111, "learning_rate": 1.944006802557427e-05, "loss": 0.1023, "step": 2401 }, { "epoch": 0.4, "grad_norm": 1.0804429054260254, "learning_rate": 1.943947211753286e-05, "loss": 0.1153, "step": 2402 }, { "epoch": 0.4, "grad_norm": 1.0532742738723755, "learning_rate": 1.94388759017055e-05, "loss": 0.0895, "step": 2403 }, { "epoch": 0.4, "grad_norm": 0.8536878228187561, "learning_rate": 1.943827937811162e-05, "loss": 0.0874, "step": 2404 }, { "epoch": 0.4, "grad_norm": 1.0998928546905518, "learning_rate": 1.943768254677068e-05, "loss": 0.1239, "step": 2405 }, { "epoch": 0.4, "grad_norm": 0.8508163094520569, "learning_rate": 1.943708540770213e-05, "loss": 0.1159, "step": 2406 }, { "epoch": 0.4, "grad_norm": 1.0288952589035034, "learning_rate": 1.9436487960925452e-05, "loss": 0.1204, "step": 2407 }, { "epoch": 0.4, "grad_norm": 0.733452320098877, "learning_rate": 1.9435890206460118e-05, "loss": 0.0757, "step": 2408 }, { "epoch": 0.4, "grad_norm": 1.0291790962219238, "learning_rate": 1.943529214432562e-05, "loss": 0.0984, "step": 2409 }, { "epoch": 0.4, "grad_norm": 0.9966174960136414, "learning_rate": 1.9434693774541458e-05, "loss": 0.0939, "step": 2410 }, { "epoch": 0.4, "grad_norm": 0.9943555593490601, "learning_rate": 1.9434095097127148e-05, "loss": 0.1025, "step": 2411 }, { "epoch": 0.4, "grad_norm": 0.7611062526702881, "learning_rate": 1.9433496112102204e-05, "loss": 0.0889, "step": 2412 }, { "epoch": 0.4, "grad_norm": 0.9867941737174988, "learning_rate": 1.943289681948616e-05, "loss": 0.1078, "step": 2413 }, { "epoch": 0.4, "grad_norm": 0.9229453802108765, "learning_rate": 1.9432297219298557e-05, "loss": 0.1019, "step": 2414 }, { "epoch": 0.4, "grad_norm": 1.0797584056854248, "learning_rate": 1.9431697311558943e-05, "loss": 0.1039, "step": 2415 }, { "epoch": 0.4, "grad_norm": 1.1292802095413208, "learning_rate": 1.9431097096286883e-05, "loss": 0.0918, "step": 2416 }, { "epoch": 0.4, "grad_norm": 1.0059093236923218, "learning_rate": 1.9430496573501945e-05, "loss": 0.0935, "step": 2417 }, { "epoch": 0.4, "grad_norm": 1.051035761833191, "learning_rate": 1.942989574322371e-05, "loss": 0.1029, "step": 2418 }, { "epoch": 0.4, "grad_norm": 0.9065150022506714, "learning_rate": 1.942929460547177e-05, "loss": 0.0943, "step": 2419 }, { "epoch": 0.4, "grad_norm": 0.7238724231719971, "learning_rate": 1.942869316026572e-05, "loss": 0.0772, "step": 2420 }, { "epoch": 0.4, "grad_norm": 1.452017068862915, "learning_rate": 1.942809140762518e-05, "loss": 0.0868, "step": 2421 }, { "epoch": 0.41, "grad_norm": 0.870486319065094, "learning_rate": 1.9427489347569768e-05, "loss": 0.1168, "step": 2422 }, { "epoch": 0.41, "grad_norm": 1.1822645664215088, "learning_rate": 1.942688698011911e-05, "loss": 0.1028, "step": 2423 }, { "epoch": 0.41, "grad_norm": 1.130683183670044, "learning_rate": 1.9426284305292853e-05, "loss": 0.1078, "step": 2424 }, { "epoch": 0.41, "grad_norm": 1.1809061765670776, "learning_rate": 1.9425681323110646e-05, "loss": 0.1191, "step": 2425 }, { "epoch": 0.41, "grad_norm": 0.9338909983634949, "learning_rate": 1.9425078033592145e-05, "loss": 0.1039, "step": 2426 }, { "epoch": 0.41, "grad_norm": 0.9642696380615234, "learning_rate": 1.942447443675703e-05, "loss": 0.0751, "step": 2427 }, { "epoch": 0.41, "grad_norm": 0.972433865070343, "learning_rate": 1.9423870532624976e-05, "loss": 0.1165, "step": 2428 }, { "epoch": 0.41, "grad_norm": 1.2371480464935303, "learning_rate": 1.9423266321215678e-05, "loss": 0.0981, "step": 2429 }, { "epoch": 0.41, "grad_norm": 0.8314095139503479, "learning_rate": 1.9422661802548833e-05, "loss": 0.0842, "step": 2430 }, { "epoch": 0.41, "grad_norm": 1.2625842094421387, "learning_rate": 1.942205697664415e-05, "loss": 0.1039, "step": 2431 }, { "epoch": 0.41, "grad_norm": 1.2228786945343018, "learning_rate": 1.942145184352136e-05, "loss": 0.1261, "step": 2432 }, { "epoch": 0.41, "grad_norm": 1.0461673736572266, "learning_rate": 1.9420846403200186e-05, "loss": 0.1098, "step": 2433 }, { "epoch": 0.41, "grad_norm": 1.0543395280838013, "learning_rate": 1.942024065570037e-05, "loss": 0.1157, "step": 2434 }, { "epoch": 0.41, "grad_norm": 0.8245903253555298, "learning_rate": 1.9419634601041666e-05, "loss": 0.0694, "step": 2435 }, { "epoch": 0.41, "grad_norm": 1.0805035829544067, "learning_rate": 1.9419028239243834e-05, "loss": 0.1054, "step": 2436 }, { "epoch": 0.41, "grad_norm": 0.929450273513794, "learning_rate": 1.941842157032664e-05, "loss": 0.0896, "step": 2437 }, { "epoch": 0.41, "grad_norm": 0.9440580606460571, "learning_rate": 1.9417814594309877e-05, "loss": 0.1002, "step": 2438 }, { "epoch": 0.41, "grad_norm": 0.9024208784103394, "learning_rate": 1.9417207311213325e-05, "loss": 0.1132, "step": 2439 }, { "epoch": 0.41, "grad_norm": 0.9619565010070801, "learning_rate": 1.9416599721056793e-05, "loss": 0.0959, "step": 2440 }, { "epoch": 0.41, "grad_norm": 1.0057787895202637, "learning_rate": 1.9415991823860085e-05, "loss": 0.0956, "step": 2441 }, { "epoch": 0.41, "grad_norm": 0.8472241163253784, "learning_rate": 1.9415383619643026e-05, "loss": 0.0911, "step": 2442 }, { "epoch": 0.41, "grad_norm": 0.7676971554756165, "learning_rate": 1.941477510842545e-05, "loss": 0.0683, "step": 2443 }, { "epoch": 0.41, "grad_norm": 0.9763007164001465, "learning_rate": 1.941416629022719e-05, "loss": 0.1144, "step": 2444 }, { "epoch": 0.41, "grad_norm": 0.963310182094574, "learning_rate": 1.941355716506811e-05, "loss": 0.0936, "step": 2445 }, { "epoch": 0.41, "grad_norm": 0.969312310218811, "learning_rate": 1.941294773296806e-05, "loss": 0.0948, "step": 2446 }, { "epoch": 0.41, "grad_norm": 0.9091221690177917, "learning_rate": 1.9412337993946917e-05, "loss": 0.0948, "step": 2447 }, { "epoch": 0.41, "grad_norm": 1.0017939805984497, "learning_rate": 1.941172794802456e-05, "loss": 0.1044, "step": 2448 }, { "epoch": 0.41, "grad_norm": 0.9051522016525269, "learning_rate": 1.941111759522088e-05, "loss": 0.0817, "step": 2449 }, { "epoch": 0.41, "grad_norm": 0.7872950434684753, "learning_rate": 1.941050693555578e-05, "loss": 0.0864, "step": 2450 }, { "epoch": 0.41, "grad_norm": 1.0462921857833862, "learning_rate": 1.9409895969049173e-05, "loss": 0.1109, "step": 2451 }, { "epoch": 0.41, "grad_norm": 0.9335331916809082, "learning_rate": 1.9409284695720974e-05, "loss": 0.0877, "step": 2452 }, { "epoch": 0.41, "grad_norm": 0.8933117985725403, "learning_rate": 1.9408673115591122e-05, "loss": 0.0923, "step": 2453 }, { "epoch": 0.41, "grad_norm": 1.128307819366455, "learning_rate": 1.9408061228679554e-05, "loss": 0.1389, "step": 2454 }, { "epoch": 0.41, "grad_norm": 0.9151224493980408, "learning_rate": 1.940744903500622e-05, "loss": 0.095, "step": 2455 }, { "epoch": 0.41, "grad_norm": 0.9991167187690735, "learning_rate": 1.9406836534591086e-05, "loss": 0.0874, "step": 2456 }, { "epoch": 0.41, "grad_norm": 1.114050269126892, "learning_rate": 1.9406223727454118e-05, "loss": 0.115, "step": 2457 }, { "epoch": 0.41, "grad_norm": 0.8723911046981812, "learning_rate": 1.94056106136153e-05, "loss": 0.0892, "step": 2458 }, { "epoch": 0.41, "grad_norm": 1.153799295425415, "learning_rate": 1.9404997193094625e-05, "loss": 0.106, "step": 2459 }, { "epoch": 0.41, "grad_norm": 1.129396915435791, "learning_rate": 1.9404383465912096e-05, "loss": 0.0997, "step": 2460 }, { "epoch": 0.41, "grad_norm": 1.0448195934295654, "learning_rate": 1.9403769432087716e-05, "loss": 0.094, "step": 2461 }, { "epoch": 0.41, "grad_norm": 1.3573148250579834, "learning_rate": 1.9403155091641515e-05, "loss": 0.1187, "step": 2462 }, { "epoch": 0.41, "grad_norm": 0.9300227165222168, "learning_rate": 1.940254044459352e-05, "loss": 0.0942, "step": 2463 }, { "epoch": 0.41, "grad_norm": 1.1053088903427124, "learning_rate": 1.9401925490963772e-05, "loss": 0.0855, "step": 2464 }, { "epoch": 0.41, "grad_norm": 0.8572197556495667, "learning_rate": 1.9401310230772325e-05, "loss": 0.084, "step": 2465 }, { "epoch": 0.41, "grad_norm": 0.8794727921485901, "learning_rate": 1.9400694664039233e-05, "loss": 0.0731, "step": 2466 }, { "epoch": 0.41, "grad_norm": 0.7681010961532593, "learning_rate": 1.9400078790784583e-05, "loss": 0.0792, "step": 2467 }, { "epoch": 0.41, "grad_norm": 1.1381350755691528, "learning_rate": 1.939946261102844e-05, "loss": 0.1004, "step": 2468 }, { "epoch": 0.41, "grad_norm": 0.9967790246009827, "learning_rate": 1.9398846124790905e-05, "loss": 0.1177, "step": 2469 }, { "epoch": 0.41, "grad_norm": 1.2378945350646973, "learning_rate": 1.9398229332092073e-05, "loss": 0.114, "step": 2470 }, { "epoch": 0.41, "grad_norm": 1.2681782245635986, "learning_rate": 1.9397612232952063e-05, "loss": 0.0946, "step": 2471 }, { "epoch": 0.41, "grad_norm": 0.7456480264663696, "learning_rate": 1.9396994827390986e-05, "loss": 0.0804, "step": 2472 }, { "epoch": 0.41, "grad_norm": 0.9995819330215454, "learning_rate": 1.9396377115428985e-05, "loss": 0.1074, "step": 2473 }, { "epoch": 0.41, "grad_norm": 0.9532726407051086, "learning_rate": 1.9395759097086194e-05, "loss": 0.0855, "step": 2474 }, { "epoch": 0.41, "grad_norm": 0.9381608963012695, "learning_rate": 1.9395140772382764e-05, "loss": 0.1094, "step": 2475 }, { "epoch": 0.41, "grad_norm": 1.0211365222930908, "learning_rate": 1.9394522141338857e-05, "loss": 0.119, "step": 2476 }, { "epoch": 0.41, "grad_norm": 0.8730046153068542, "learning_rate": 1.9393903203974647e-05, "loss": 0.0838, "step": 2477 }, { "epoch": 0.41, "grad_norm": 0.9981149435043335, "learning_rate": 1.9393283960310315e-05, "loss": 0.1089, "step": 2478 }, { "epoch": 0.41, "grad_norm": 0.9302062392234802, "learning_rate": 1.9392664410366046e-05, "loss": 0.1067, "step": 2479 }, { "epoch": 0.41, "grad_norm": 1.1998631954193115, "learning_rate": 1.939204455416205e-05, "loss": 0.103, "step": 2480 }, { "epoch": 0.41, "grad_norm": 0.9328965544700623, "learning_rate": 1.9391424391718533e-05, "loss": 0.1053, "step": 2481 }, { "epoch": 0.42, "grad_norm": 0.8428508043289185, "learning_rate": 1.9390803923055715e-05, "loss": 0.0773, "step": 2482 }, { "epoch": 0.42, "grad_norm": 1.2776528596878052, "learning_rate": 1.939018314819383e-05, "loss": 0.1135, "step": 2483 }, { "epoch": 0.42, "grad_norm": 1.062146544456482, "learning_rate": 1.9389562067153122e-05, "loss": 0.0957, "step": 2484 }, { "epoch": 0.42, "grad_norm": 0.8293456435203552, "learning_rate": 1.9388940679953834e-05, "loss": 0.0963, "step": 2485 }, { "epoch": 0.42, "grad_norm": 1.0611175298690796, "learning_rate": 1.938831898661624e-05, "loss": 0.1264, "step": 2486 }, { "epoch": 0.42, "grad_norm": 0.9698078632354736, "learning_rate": 1.9387696987160596e-05, "loss": 0.1054, "step": 2487 }, { "epoch": 0.42, "grad_norm": 1.0969043970108032, "learning_rate": 1.938707468160719e-05, "loss": 0.1281, "step": 2488 }, { "epoch": 0.42, "grad_norm": 0.9606468081474304, "learning_rate": 1.9386452069976313e-05, "loss": 0.1318, "step": 2489 }, { "epoch": 0.42, "grad_norm": 1.0029828548431396, "learning_rate": 1.938582915228827e-05, "loss": 0.1147, "step": 2490 }, { "epoch": 0.42, "grad_norm": 0.9449719786643982, "learning_rate": 1.9385205928563364e-05, "loss": 0.0958, "step": 2491 }, { "epoch": 0.42, "grad_norm": 0.9374810457229614, "learning_rate": 1.9384582398821924e-05, "loss": 0.1184, "step": 2492 }, { "epoch": 0.42, "grad_norm": 1.2013075351715088, "learning_rate": 1.9383958563084272e-05, "loss": 0.1267, "step": 2493 }, { "epoch": 0.42, "grad_norm": 1.5252418518066406, "learning_rate": 1.9383334421370757e-05, "loss": 0.0948, "step": 2494 }, { "epoch": 0.42, "grad_norm": 1.2516343593597412, "learning_rate": 1.9382709973701725e-05, "loss": 0.1195, "step": 2495 }, { "epoch": 0.42, "grad_norm": 0.8994771838188171, "learning_rate": 1.938208522009754e-05, "loss": 0.0937, "step": 2496 }, { "epoch": 0.42, "grad_norm": 0.9665722846984863, "learning_rate": 1.9381460160578576e-05, "loss": 0.1026, "step": 2497 }, { "epoch": 0.42, "grad_norm": 0.940434455871582, "learning_rate": 1.9380834795165205e-05, "loss": 0.11, "step": 2498 }, { "epoch": 0.42, "grad_norm": 0.8519721031188965, "learning_rate": 1.9380209123877822e-05, "loss": 0.0888, "step": 2499 }, { "epoch": 0.42, "grad_norm": 1.0657001733779907, "learning_rate": 1.937958314673683e-05, "loss": 0.1127, "step": 2500 }, { "epoch": 0.42, "grad_norm": 1.1192578077316284, "learning_rate": 1.9378956863762636e-05, "loss": 0.1158, "step": 2501 }, { "epoch": 0.42, "grad_norm": 0.7872443199157715, "learning_rate": 1.9378330274975666e-05, "loss": 0.0995, "step": 2502 }, { "epoch": 0.42, "grad_norm": 1.0402932167053223, "learning_rate": 1.9377703380396348e-05, "loss": 0.1088, "step": 2503 }, { "epoch": 0.42, "grad_norm": 1.0192612409591675, "learning_rate": 1.937707618004512e-05, "loss": 0.1076, "step": 2504 }, { "epoch": 0.42, "grad_norm": 0.9048705697059631, "learning_rate": 1.9376448673942433e-05, "loss": 0.1034, "step": 2505 }, { "epoch": 0.42, "grad_norm": 0.9530667066574097, "learning_rate": 1.937582086210875e-05, "loss": 0.1003, "step": 2506 }, { "epoch": 0.42, "grad_norm": 1.015233039855957, "learning_rate": 1.9375192744564544e-05, "loss": 0.0797, "step": 2507 }, { "epoch": 0.42, "grad_norm": 1.2698184251785278, "learning_rate": 1.937456432133029e-05, "loss": 0.1107, "step": 2508 }, { "epoch": 0.42, "grad_norm": 0.9378464818000793, "learning_rate": 1.9373935592426485e-05, "loss": 0.1277, "step": 2509 }, { "epoch": 0.42, "grad_norm": 0.7534515857696533, "learning_rate": 1.9373306557873622e-05, "loss": 0.0932, "step": 2510 }, { "epoch": 0.42, "grad_norm": 1.2495365142822266, "learning_rate": 1.9372677217692216e-05, "loss": 0.0677, "step": 2511 }, { "epoch": 0.42, "grad_norm": 1.0883169174194336, "learning_rate": 1.9372047571902787e-05, "loss": 0.1064, "step": 2512 }, { "epoch": 0.42, "grad_norm": 0.8120844960212708, "learning_rate": 1.9371417620525867e-05, "loss": 0.106, "step": 2513 }, { "epoch": 0.42, "grad_norm": 0.8790150880813599, "learning_rate": 1.937078736358199e-05, "loss": 0.0864, "step": 2514 }, { "epoch": 0.42, "grad_norm": 1.0309810638427734, "learning_rate": 1.9370156801091716e-05, "loss": 0.1093, "step": 2515 }, { "epoch": 0.42, "grad_norm": 0.9341787099838257, "learning_rate": 1.9369525933075594e-05, "loss": 0.1058, "step": 2516 }, { "epoch": 0.42, "grad_norm": 0.8284798264503479, "learning_rate": 1.9368894759554204e-05, "loss": 0.0787, "step": 2517 }, { "epoch": 0.42, "grad_norm": 1.022367000579834, "learning_rate": 1.9368263280548125e-05, "loss": 0.1222, "step": 2518 }, { "epoch": 0.42, "grad_norm": 1.0608428716659546, "learning_rate": 1.936763149607794e-05, "loss": 0.0896, "step": 2519 }, { "epoch": 0.42, "grad_norm": 1.2850275039672852, "learning_rate": 1.936699940616425e-05, "loss": 0.0965, "step": 2520 }, { "epoch": 0.42, "grad_norm": 1.4118521213531494, "learning_rate": 1.9366367010827678e-05, "loss": 0.0981, "step": 2521 }, { "epoch": 0.42, "grad_norm": 1.474071741104126, "learning_rate": 1.936573431008883e-05, "loss": 0.1256, "step": 2522 }, { "epoch": 0.42, "grad_norm": 0.8595535159111023, "learning_rate": 1.9365101303968342e-05, "loss": 0.0912, "step": 2523 }, { "epoch": 0.42, "grad_norm": 0.9445375204086304, "learning_rate": 1.936446799248685e-05, "loss": 0.0923, "step": 2524 }, { "epoch": 0.42, "grad_norm": 1.5229960680007935, "learning_rate": 1.9363834375665013e-05, "loss": 0.093, "step": 2525 }, { "epoch": 0.42, "grad_norm": 0.9252650141716003, "learning_rate": 1.936320045352348e-05, "loss": 0.0932, "step": 2526 }, { "epoch": 0.42, "grad_norm": 1.0473453998565674, "learning_rate": 1.936256622608293e-05, "loss": 0.1154, "step": 2527 }, { "epoch": 0.42, "grad_norm": 1.3229806423187256, "learning_rate": 1.936193169336403e-05, "loss": 0.1249, "step": 2528 }, { "epoch": 0.42, "grad_norm": 1.3015000820159912, "learning_rate": 1.9361296855387483e-05, "loss": 0.1006, "step": 2529 }, { "epoch": 0.42, "grad_norm": 1.1279486417770386, "learning_rate": 1.9360661712173985e-05, "loss": 0.1019, "step": 2530 }, { "epoch": 0.42, "grad_norm": 1.081617832183838, "learning_rate": 1.9360026263744242e-05, "loss": 0.0901, "step": 2531 }, { "epoch": 0.42, "grad_norm": 1.2304298877716064, "learning_rate": 1.9359390510118978e-05, "loss": 0.0907, "step": 2532 }, { "epoch": 0.42, "grad_norm": 0.9169684648513794, "learning_rate": 1.9358754451318923e-05, "loss": 0.1101, "step": 2533 }, { "epoch": 0.42, "grad_norm": 1.0596221685409546, "learning_rate": 1.9358118087364807e-05, "loss": 0.0983, "step": 2534 }, { "epoch": 0.42, "grad_norm": 1.1363451480865479, "learning_rate": 1.9357481418277392e-05, "loss": 0.1015, "step": 2535 }, { "epoch": 0.42, "grad_norm": 0.9022520184516907, "learning_rate": 1.935684444407743e-05, "loss": 0.1011, "step": 2536 }, { "epoch": 0.42, "grad_norm": 1.4561445713043213, "learning_rate": 1.935620716478569e-05, "loss": 0.0874, "step": 2537 }, { "epoch": 0.42, "grad_norm": 0.9058523774147034, "learning_rate": 1.935556958042296e-05, "loss": 0.0972, "step": 2538 }, { "epoch": 0.42, "grad_norm": 0.958560585975647, "learning_rate": 1.9354931691010015e-05, "loss": 0.0959, "step": 2539 }, { "epoch": 0.42, "grad_norm": 0.8761704564094543, "learning_rate": 1.9354293496567668e-05, "loss": 0.0971, "step": 2540 }, { "epoch": 0.42, "grad_norm": 1.096932053565979, "learning_rate": 1.935365499711672e-05, "loss": 0.1413, "step": 2541 }, { "epoch": 0.43, "grad_norm": 0.7481234669685364, "learning_rate": 1.9353016192677993e-05, "loss": 0.0923, "step": 2542 }, { "epoch": 0.43, "grad_norm": 1.1428757905960083, "learning_rate": 1.9352377083272312e-05, "loss": 0.1325, "step": 2543 }, { "epoch": 0.43, "grad_norm": 1.0581527948379517, "learning_rate": 1.9351737668920524e-05, "loss": 0.1003, "step": 2544 }, { "epoch": 0.43, "grad_norm": 1.0357376337051392, "learning_rate": 1.935109794964347e-05, "loss": 0.0973, "step": 2545 }, { "epoch": 0.43, "grad_norm": 1.0911595821380615, "learning_rate": 1.935045792546201e-05, "loss": 0.1242, "step": 2546 }, { "epoch": 0.43, "grad_norm": 0.7833142876625061, "learning_rate": 1.9349817596397022e-05, "loss": 0.0912, "step": 2547 }, { "epoch": 0.43, "grad_norm": 0.8486307859420776, "learning_rate": 1.934917696246937e-05, "loss": 0.098, "step": 2548 }, { "epoch": 0.43, "grad_norm": 1.5874484777450562, "learning_rate": 1.9348536023699953e-05, "loss": 0.1157, "step": 2549 }, { "epoch": 0.43, "grad_norm": 1.1281782388687134, "learning_rate": 1.934789478010967e-05, "loss": 0.1214, "step": 2550 }, { "epoch": 0.43, "grad_norm": 0.8505154252052307, "learning_rate": 1.934725323171942e-05, "loss": 0.1045, "step": 2551 }, { "epoch": 0.43, "grad_norm": 1.065839171409607, "learning_rate": 1.9346611378550133e-05, "loss": 0.1173, "step": 2552 }, { "epoch": 0.43, "grad_norm": 1.2885082960128784, "learning_rate": 1.934596922062273e-05, "loss": 0.098, "step": 2553 }, { "epoch": 0.43, "grad_norm": 1.2225786447525024, "learning_rate": 1.9345326757958153e-05, "loss": 0.1102, "step": 2554 }, { "epoch": 0.43, "grad_norm": 1.246340274810791, "learning_rate": 1.934468399057735e-05, "loss": 0.1259, "step": 2555 }, { "epoch": 0.43, "grad_norm": 1.0602240562438965, "learning_rate": 1.9344040918501277e-05, "loss": 0.0936, "step": 2556 }, { "epoch": 0.43, "grad_norm": 1.0729037523269653, "learning_rate": 1.9343397541750903e-05, "loss": 0.0985, "step": 2557 }, { "epoch": 0.43, "grad_norm": 1.2264901399612427, "learning_rate": 1.9342753860347206e-05, "loss": 0.11, "step": 2558 }, { "epoch": 0.43, "grad_norm": 0.9693756103515625, "learning_rate": 1.9342109874311176e-05, "loss": 0.0983, "step": 2559 }, { "epoch": 0.43, "grad_norm": 1.0361034870147705, "learning_rate": 1.934146558366381e-05, "loss": 0.1165, "step": 2560 }, { "epoch": 0.43, "grad_norm": 0.927314817905426, "learning_rate": 1.934082098842611e-05, "loss": 0.1069, "step": 2561 }, { "epoch": 0.43, "grad_norm": 1.4093266725540161, "learning_rate": 1.9340176088619107e-05, "loss": 0.1419, "step": 2562 }, { "epoch": 0.43, "grad_norm": 0.7662842869758606, "learning_rate": 1.933953088426382e-05, "loss": 0.1011, "step": 2563 }, { "epoch": 0.43, "grad_norm": 0.8720845580101013, "learning_rate": 1.9338885375381283e-05, "loss": 0.0962, "step": 2564 }, { "epoch": 0.43, "grad_norm": 1.0309908390045166, "learning_rate": 1.9338239561992555e-05, "loss": 0.0715, "step": 2565 }, { "epoch": 0.43, "grad_norm": 1.0358697175979614, "learning_rate": 1.933759344411868e-05, "loss": 0.1033, "step": 2566 }, { "epoch": 0.43, "grad_norm": 0.9773366451263428, "learning_rate": 1.9336947021780737e-05, "loss": 0.0929, "step": 2567 }, { "epoch": 0.43, "grad_norm": 1.0096344947814941, "learning_rate": 1.93363002949998e-05, "loss": 0.0961, "step": 2568 }, { "epoch": 0.43, "grad_norm": 1.2620491981506348, "learning_rate": 1.9335653263796953e-05, "loss": 0.1109, "step": 2569 }, { "epoch": 0.43, "grad_norm": 0.9175177216529846, "learning_rate": 1.9335005928193294e-05, "loss": 0.1096, "step": 2570 }, { "epoch": 0.43, "grad_norm": 2.1110594272613525, "learning_rate": 1.9334358288209934e-05, "loss": 0.101, "step": 2571 }, { "epoch": 0.43, "grad_norm": 1.0146592855453491, "learning_rate": 1.9333710343867985e-05, "loss": 0.0962, "step": 2572 }, { "epoch": 0.43, "grad_norm": 1.1345497369766235, "learning_rate": 1.933306209518858e-05, "loss": 0.1433, "step": 2573 }, { "epoch": 0.43, "grad_norm": 0.8880932331085205, "learning_rate": 1.9332413542192854e-05, "loss": 0.0967, "step": 2574 }, { "epoch": 0.43, "grad_norm": 1.068053126335144, "learning_rate": 1.933176468490195e-05, "loss": 0.1236, "step": 2575 }, { "epoch": 0.43, "grad_norm": 2.7672009468078613, "learning_rate": 1.9331115523337026e-05, "loss": 0.113, "step": 2576 }, { "epoch": 0.43, "grad_norm": 0.8786324262619019, "learning_rate": 1.9330466057519254e-05, "loss": 0.0926, "step": 2577 }, { "epoch": 0.43, "grad_norm": 1.0856839418411255, "learning_rate": 1.9329816287469805e-05, "loss": 0.0928, "step": 2578 }, { "epoch": 0.43, "grad_norm": 0.9981300234794617, "learning_rate": 1.932916621320987e-05, "loss": 0.1102, "step": 2579 }, { "epoch": 0.43, "grad_norm": 0.9198726415634155, "learning_rate": 1.932851583476064e-05, "loss": 0.0838, "step": 2580 }, { "epoch": 0.43, "grad_norm": 1.0992530584335327, "learning_rate": 1.932786515214333e-05, "loss": 0.0977, "step": 2581 }, { "epoch": 0.43, "grad_norm": 1.0664438009262085, "learning_rate": 1.9327214165379146e-05, "loss": 0.102, "step": 2582 }, { "epoch": 0.43, "grad_norm": 1.0935091972351074, "learning_rate": 1.932656287448932e-05, "loss": 0.0923, "step": 2583 }, { "epoch": 0.43, "grad_norm": 1.0335203409194946, "learning_rate": 1.9325911279495086e-05, "loss": 0.1084, "step": 2584 }, { "epoch": 0.43, "grad_norm": 0.7521085143089294, "learning_rate": 1.9325259380417693e-05, "loss": 0.0764, "step": 2585 }, { "epoch": 0.43, "grad_norm": 1.5254247188568115, "learning_rate": 1.9324607177278392e-05, "loss": 0.1293, "step": 2586 }, { "epoch": 0.43, "grad_norm": 0.8344036340713501, "learning_rate": 1.932395467009846e-05, "loss": 0.0925, "step": 2587 }, { "epoch": 0.43, "grad_norm": 0.9107720851898193, "learning_rate": 1.932330185889916e-05, "loss": 0.1109, "step": 2588 }, { "epoch": 0.43, "grad_norm": 0.8284915685653687, "learning_rate": 1.932264874370178e-05, "loss": 0.0957, "step": 2589 }, { "epoch": 0.43, "grad_norm": 1.1394752264022827, "learning_rate": 1.932199532452762e-05, "loss": 0.0944, "step": 2590 }, { "epoch": 0.43, "grad_norm": 0.9157455563545227, "learning_rate": 1.9321341601397986e-05, "loss": 0.1046, "step": 2591 }, { "epoch": 0.43, "grad_norm": 1.0182198286056519, "learning_rate": 1.932068757433419e-05, "loss": 0.0876, "step": 2592 }, { "epoch": 0.43, "grad_norm": 1.228812575340271, "learning_rate": 1.932003324335756e-05, "loss": 0.137, "step": 2593 }, { "epoch": 0.43, "grad_norm": 1.2625627517700195, "learning_rate": 1.931937860848943e-05, "loss": 0.143, "step": 2594 }, { "epoch": 0.43, "grad_norm": 1.2496105432510376, "learning_rate": 1.931872366975115e-05, "loss": 0.1361, "step": 2595 }, { "epoch": 0.43, "grad_norm": 1.3516924381256104, "learning_rate": 1.931806842716406e-05, "loss": 0.1278, "step": 2596 }, { "epoch": 0.43, "grad_norm": 1.0847084522247314, "learning_rate": 1.9317412880749543e-05, "loss": 0.1067, "step": 2597 }, { "epoch": 0.43, "grad_norm": 0.8540021181106567, "learning_rate": 1.931675703052896e-05, "loss": 0.0767, "step": 2598 }, { "epoch": 0.43, "grad_norm": 0.8675567507743835, "learning_rate": 1.931610087652371e-05, "loss": 0.1042, "step": 2599 }, { "epoch": 0.43, "grad_norm": 1.3131687641143799, "learning_rate": 1.9315444418755174e-05, "loss": 0.1187, "step": 2600 }, { "epoch": 0.44, "grad_norm": 0.953150749206543, "learning_rate": 1.9314787657244768e-05, "loss": 0.0887, "step": 2601 }, { "epoch": 0.44, "grad_norm": 0.9507735371589661, "learning_rate": 1.93141305920139e-05, "loss": 0.108, "step": 2602 }, { "epoch": 0.44, "grad_norm": 1.0102264881134033, "learning_rate": 1.931347322308399e-05, "loss": 0.1008, "step": 2603 }, { "epoch": 0.44, "grad_norm": 0.8324961066246033, "learning_rate": 1.9312815550476484e-05, "loss": 0.098, "step": 2604 }, { "epoch": 0.44, "grad_norm": 0.9362568855285645, "learning_rate": 1.9312157574212817e-05, "loss": 0.0951, "step": 2605 }, { "epoch": 0.44, "grad_norm": 0.8031677007675171, "learning_rate": 1.9311499294314446e-05, "loss": 0.0941, "step": 2606 }, { "epoch": 0.44, "grad_norm": 1.1229705810546875, "learning_rate": 1.9310840710802837e-05, "loss": 0.1065, "step": 2607 }, { "epoch": 0.44, "grad_norm": 1.2594774961471558, "learning_rate": 1.931018182369946e-05, "loss": 0.0818, "step": 2608 }, { "epoch": 0.44, "grad_norm": 1.1488667726516724, "learning_rate": 1.9309522633025803e-05, "loss": 0.114, "step": 2609 }, { "epoch": 0.44, "grad_norm": 0.9158180356025696, "learning_rate": 1.9308863138803356e-05, "loss": 0.1104, "step": 2610 }, { "epoch": 0.44, "grad_norm": 0.848847508430481, "learning_rate": 1.930820334105363e-05, "loss": 0.0918, "step": 2611 }, { "epoch": 0.44, "grad_norm": 1.016348958015442, "learning_rate": 1.9307543239798126e-05, "loss": 0.0766, "step": 2612 }, { "epoch": 0.44, "grad_norm": 1.0807311534881592, "learning_rate": 1.9306882835058382e-05, "loss": 0.1001, "step": 2613 }, { "epoch": 0.44, "grad_norm": 1.1538751125335693, "learning_rate": 1.930622212685592e-05, "loss": 0.111, "step": 2614 }, { "epoch": 0.44, "grad_norm": 1.0930910110473633, "learning_rate": 1.9305561115212287e-05, "loss": 0.1009, "step": 2615 }, { "epoch": 0.44, "grad_norm": 1.1968451738357544, "learning_rate": 1.9304899800149036e-05, "loss": 0.0989, "step": 2616 }, { "epoch": 0.44, "grad_norm": 1.0428680181503296, "learning_rate": 1.9304238181687728e-05, "loss": 0.094, "step": 2617 }, { "epoch": 0.44, "grad_norm": 0.7807887196540833, "learning_rate": 1.9303576259849942e-05, "loss": 0.0803, "step": 2618 }, { "epoch": 0.44, "grad_norm": 0.8123558163642883, "learning_rate": 1.9302914034657254e-05, "loss": 0.0947, "step": 2619 }, { "epoch": 0.44, "grad_norm": 1.1662391424179077, "learning_rate": 1.9302251506131262e-05, "loss": 0.1092, "step": 2620 }, { "epoch": 0.44, "grad_norm": 1.026529312133789, "learning_rate": 1.9301588674293567e-05, "loss": 0.105, "step": 2621 }, { "epoch": 0.44, "grad_norm": 1.0233769416809082, "learning_rate": 1.9300925539165777e-05, "loss": 0.097, "step": 2622 }, { "epoch": 0.44, "grad_norm": 1.0210626125335693, "learning_rate": 1.930026210076952e-05, "loss": 0.1058, "step": 2623 }, { "epoch": 0.44, "grad_norm": 1.1123580932617188, "learning_rate": 1.9299598359126427e-05, "loss": 0.1197, "step": 2624 }, { "epoch": 0.44, "grad_norm": 0.8693264722824097, "learning_rate": 1.9298934314258136e-05, "loss": 0.1013, "step": 2625 }, { "epoch": 0.44, "grad_norm": 0.9082635045051575, "learning_rate": 1.929826996618631e-05, "loss": 0.0994, "step": 2626 }, { "epoch": 0.44, "grad_norm": 0.6775072813034058, "learning_rate": 1.9297605314932596e-05, "loss": 0.0885, "step": 2627 }, { "epoch": 0.44, "grad_norm": 0.8897063136100769, "learning_rate": 1.9296940360518677e-05, "loss": 0.0936, "step": 2628 }, { "epoch": 0.44, "grad_norm": 0.9246452450752258, "learning_rate": 1.9296275102966228e-05, "loss": 0.0875, "step": 2629 }, { "epoch": 0.44, "grad_norm": 0.9060840606689453, "learning_rate": 1.9295609542296947e-05, "loss": 0.0906, "step": 2630 }, { "epoch": 0.44, "grad_norm": 1.0943340063095093, "learning_rate": 1.929494367853253e-05, "loss": 0.1138, "step": 2631 }, { "epoch": 0.44, "grad_norm": 0.844508945941925, "learning_rate": 1.929427751169469e-05, "loss": 0.0858, "step": 2632 }, { "epoch": 0.44, "grad_norm": 1.2735308408737183, "learning_rate": 1.9293611041805146e-05, "loss": 0.123, "step": 2633 }, { "epoch": 0.44, "grad_norm": 1.0491048097610474, "learning_rate": 1.9292944268885635e-05, "loss": 0.1109, "step": 2634 }, { "epoch": 0.44, "grad_norm": 1.1838208436965942, "learning_rate": 1.9292277192957892e-05, "loss": 0.1069, "step": 2635 }, { "epoch": 0.44, "grad_norm": 1.0214301347732544, "learning_rate": 1.9291609814043675e-05, "loss": 0.1059, "step": 2636 }, { "epoch": 0.44, "grad_norm": 1.2635241746902466, "learning_rate": 1.9290942132164736e-05, "loss": 0.1036, "step": 2637 }, { "epoch": 0.44, "grad_norm": 0.7788549661636353, "learning_rate": 1.929027414734285e-05, "loss": 0.0975, "step": 2638 }, { "epoch": 0.44, "grad_norm": 0.9529968500137329, "learning_rate": 1.92896058595998e-05, "loss": 0.0929, "step": 2639 }, { "epoch": 0.44, "grad_norm": 1.1479238271713257, "learning_rate": 1.928893726895737e-05, "loss": 0.1209, "step": 2640 }, { "epoch": 0.44, "grad_norm": 0.9427185654640198, "learning_rate": 1.9288268375437368e-05, "loss": 0.1023, "step": 2641 }, { "epoch": 0.44, "grad_norm": 0.9194220900535583, "learning_rate": 1.9287599179061595e-05, "loss": 0.0957, "step": 2642 }, { "epoch": 0.44, "grad_norm": 1.134668231010437, "learning_rate": 1.928692967985188e-05, "loss": 0.1126, "step": 2643 }, { "epoch": 0.44, "grad_norm": 0.7450429201126099, "learning_rate": 1.928625987783005e-05, "loss": 0.0847, "step": 2644 }, { "epoch": 0.44, "grad_norm": 1.1444953680038452, "learning_rate": 1.9285589773017935e-05, "loss": 0.1081, "step": 2645 }, { "epoch": 0.44, "grad_norm": 1.3326133489608765, "learning_rate": 1.92849193654374e-05, "loss": 0.1439, "step": 2646 }, { "epoch": 0.44, "grad_norm": 0.8634976148605347, "learning_rate": 1.9284248655110296e-05, "loss": 0.0784, "step": 2647 }, { "epoch": 0.44, "grad_norm": 0.7229533195495605, "learning_rate": 1.9283577642058496e-05, "loss": 0.0825, "step": 2648 }, { "epoch": 0.44, "grad_norm": 0.7787070274353027, "learning_rate": 1.9282906326303876e-05, "loss": 0.0884, "step": 2649 }, { "epoch": 0.44, "grad_norm": 1.1610380411148071, "learning_rate": 1.9282234707868324e-05, "loss": 0.1176, "step": 2650 }, { "epoch": 0.44, "grad_norm": 0.9617300033569336, "learning_rate": 1.928156278677374e-05, "loss": 0.0867, "step": 2651 }, { "epoch": 0.44, "grad_norm": 1.5175751447677612, "learning_rate": 1.928089056304204e-05, "loss": 0.1259, "step": 2652 }, { "epoch": 0.44, "grad_norm": 1.033508062362671, "learning_rate": 1.9280218036695136e-05, "loss": 0.1069, "step": 2653 }, { "epoch": 0.44, "grad_norm": 1.0261211395263672, "learning_rate": 1.9279545207754953e-05, "loss": 0.0906, "step": 2654 }, { "epoch": 0.44, "grad_norm": 0.896335780620575, "learning_rate": 1.9278872076243437e-05, "loss": 0.0999, "step": 2655 }, { "epoch": 0.44, "grad_norm": 0.9919697046279907, "learning_rate": 1.927819864218253e-05, "loss": 0.0989, "step": 2656 }, { "epoch": 0.44, "grad_norm": 1.8559166193008423, "learning_rate": 1.92775249055942e-05, "loss": 0.1211, "step": 2657 }, { "epoch": 0.44, "grad_norm": 1.0072516202926636, "learning_rate": 1.9276850866500402e-05, "loss": 0.1249, "step": 2658 }, { "epoch": 0.44, "grad_norm": 0.935944139957428, "learning_rate": 1.9276176524923124e-05, "loss": 0.1179, "step": 2659 }, { "epoch": 0.44, "grad_norm": 0.857757568359375, "learning_rate": 1.927550188088435e-05, "loss": 0.1021, "step": 2660 }, { "epoch": 0.45, "grad_norm": 0.8818808794021606, "learning_rate": 1.927482693440607e-05, "loss": 0.1045, "step": 2661 }, { "epoch": 0.45, "grad_norm": 0.9448637366294861, "learning_rate": 1.927415168551031e-05, "loss": 0.1011, "step": 2662 }, { "epoch": 0.45, "grad_norm": 0.834963321685791, "learning_rate": 1.927347613421907e-05, "loss": 0.092, "step": 2663 }, { "epoch": 0.45, "grad_norm": 1.076650857925415, "learning_rate": 1.9272800280554388e-05, "loss": 0.1011, "step": 2664 }, { "epoch": 0.45, "grad_norm": 1.0996335744857788, "learning_rate": 1.9272124124538292e-05, "loss": 0.1205, "step": 2665 }, { "epoch": 0.45, "grad_norm": 1.0094093084335327, "learning_rate": 1.9271447666192834e-05, "loss": 0.0963, "step": 2666 }, { "epoch": 0.45, "grad_norm": 1.2501685619354248, "learning_rate": 1.9270770905540078e-05, "loss": 0.0888, "step": 2667 }, { "epoch": 0.45, "grad_norm": 0.8415953516960144, "learning_rate": 1.9270093842602075e-05, "loss": 0.0998, "step": 2668 }, { "epoch": 0.45, "grad_norm": 0.9692384004592896, "learning_rate": 1.9269416477400914e-05, "loss": 0.0878, "step": 2669 }, { "epoch": 0.45, "grad_norm": 1.03810453414917, "learning_rate": 1.9268738809958672e-05, "loss": 0.1103, "step": 2670 }, { "epoch": 0.45, "grad_norm": 1.0188028812408447, "learning_rate": 1.9268060840297456e-05, "loss": 0.0969, "step": 2671 }, { "epoch": 0.45, "grad_norm": 1.0793753862380981, "learning_rate": 1.9267382568439364e-05, "loss": 0.1113, "step": 2672 }, { "epoch": 0.45, "grad_norm": 0.9497972130775452, "learning_rate": 1.926670399440651e-05, "loss": 0.1028, "step": 2673 }, { "epoch": 0.45, "grad_norm": 1.087646484375, "learning_rate": 1.926602511822103e-05, "loss": 0.0931, "step": 2674 }, { "epoch": 0.45, "grad_norm": 0.8052651286125183, "learning_rate": 1.926534593990505e-05, "loss": 0.0928, "step": 2675 }, { "epoch": 0.45, "grad_norm": 0.9350137114524841, "learning_rate": 1.9264666459480722e-05, "loss": 0.1032, "step": 2676 }, { "epoch": 0.45, "grad_norm": 0.9946900010108948, "learning_rate": 1.9263986676970194e-05, "loss": 0.0988, "step": 2677 }, { "epoch": 0.45, "grad_norm": 1.0554323196411133, "learning_rate": 1.926330659239564e-05, "loss": 0.0956, "step": 2678 }, { "epoch": 0.45, "grad_norm": 0.8048049211502075, "learning_rate": 1.926262620577923e-05, "loss": 0.091, "step": 2679 }, { "epoch": 0.45, "grad_norm": 0.8904149532318115, "learning_rate": 1.9261945517143144e-05, "loss": 0.1018, "step": 2680 }, { "epoch": 0.45, "grad_norm": 1.15426766872406, "learning_rate": 1.9261264526509588e-05, "loss": 0.1046, "step": 2681 }, { "epoch": 0.45, "grad_norm": 1.1114633083343506, "learning_rate": 1.926058323390076e-05, "loss": 0.1147, "step": 2682 }, { "epoch": 0.45, "grad_norm": 0.8966839909553528, "learning_rate": 1.925990163933887e-05, "loss": 0.095, "step": 2683 }, { "epoch": 0.45, "grad_norm": 1.2078979015350342, "learning_rate": 1.9259219742846152e-05, "loss": 0.1135, "step": 2684 }, { "epoch": 0.45, "grad_norm": 0.8706923723220825, "learning_rate": 1.9258537544444836e-05, "loss": 0.09, "step": 2685 }, { "epoch": 0.45, "grad_norm": 0.9942439794540405, "learning_rate": 1.9257855044157163e-05, "loss": 0.0939, "step": 2686 }, { "epoch": 0.45, "grad_norm": 1.5748902559280396, "learning_rate": 1.925717224200539e-05, "loss": 0.107, "step": 2687 }, { "epoch": 0.45, "grad_norm": 0.9880238175392151, "learning_rate": 1.925648913801178e-05, "loss": 0.105, "step": 2688 }, { "epoch": 0.45, "grad_norm": 0.9351451396942139, "learning_rate": 1.9255805732198607e-05, "loss": 0.0902, "step": 2689 }, { "epoch": 0.45, "grad_norm": 0.7253570556640625, "learning_rate": 1.925512202458815e-05, "loss": 0.0804, "step": 2690 }, { "epoch": 0.45, "grad_norm": 0.8595664501190186, "learning_rate": 1.9254438015202707e-05, "loss": 0.0976, "step": 2691 }, { "epoch": 0.45, "grad_norm": 1.4160091876983643, "learning_rate": 1.9253753704064584e-05, "loss": 0.1072, "step": 2692 }, { "epoch": 0.45, "grad_norm": 0.8868973851203918, "learning_rate": 1.9253069091196086e-05, "loss": 0.0921, "step": 2693 }, { "epoch": 0.45, "grad_norm": 0.9271553158760071, "learning_rate": 1.9252384176619538e-05, "loss": 0.0896, "step": 2694 }, { "epoch": 0.45, "grad_norm": 0.8697153925895691, "learning_rate": 1.9251698960357277e-05, "loss": 0.0969, "step": 2695 }, { "epoch": 0.45, "grad_norm": 0.9460586309432983, "learning_rate": 1.9251013442431642e-05, "loss": 0.0993, "step": 2696 }, { "epoch": 0.45, "grad_norm": 1.0496610403060913, "learning_rate": 1.925032762286498e-05, "loss": 0.1052, "step": 2697 }, { "epoch": 0.45, "grad_norm": 0.8300247192382812, "learning_rate": 1.9249641501679663e-05, "loss": 0.0874, "step": 2698 }, { "epoch": 0.45, "grad_norm": 0.9741727113723755, "learning_rate": 1.9248955078898058e-05, "loss": 0.0914, "step": 2699 }, { "epoch": 0.45, "grad_norm": 0.7994710803031921, "learning_rate": 1.9248268354542546e-05, "loss": 0.0758, "step": 2700 }, { "epoch": 0.45, "grad_norm": 0.7762228846549988, "learning_rate": 1.924758132863552e-05, "loss": 0.0798, "step": 2701 }, { "epoch": 0.45, "grad_norm": 1.0539047718048096, "learning_rate": 1.924689400119938e-05, "loss": 0.1184, "step": 2702 }, { "epoch": 0.45, "grad_norm": 1.2923598289489746, "learning_rate": 1.924620637225654e-05, "loss": 0.112, "step": 2703 }, { "epoch": 0.45, "grad_norm": 0.789348304271698, "learning_rate": 1.924551844182941e-05, "loss": 0.0866, "step": 2704 }, { "epoch": 0.45, "grad_norm": 1.3427302837371826, "learning_rate": 1.924483020994044e-05, "loss": 0.0832, "step": 2705 }, { "epoch": 0.45, "grad_norm": 0.7722994089126587, "learning_rate": 1.9244141676612056e-05, "loss": 0.0984, "step": 2706 }, { "epoch": 0.45, "grad_norm": 1.174270749092102, "learning_rate": 1.9243452841866715e-05, "loss": 0.1137, "step": 2707 }, { "epoch": 0.45, "grad_norm": 1.088330864906311, "learning_rate": 1.9242763705726873e-05, "loss": 0.0973, "step": 2708 }, { "epoch": 0.45, "grad_norm": 1.0200384855270386, "learning_rate": 1.9242074268215e-05, "loss": 0.104, "step": 2709 }, { "epoch": 0.45, "grad_norm": 1.238937258720398, "learning_rate": 1.9241384529353584e-05, "loss": 0.1199, "step": 2710 }, { "epoch": 0.45, "grad_norm": 1.0336281061172485, "learning_rate": 1.9240694489165105e-05, "loss": 0.1107, "step": 2711 }, { "epoch": 0.45, "grad_norm": 1.1459720134735107, "learning_rate": 1.924000414767207e-05, "loss": 0.1087, "step": 2712 }, { "epoch": 0.45, "grad_norm": 1.1790168285369873, "learning_rate": 1.9239313504896983e-05, "loss": 0.0956, "step": 2713 }, { "epoch": 0.45, "grad_norm": 0.9954588413238525, "learning_rate": 1.9238622560862367e-05, "loss": 0.1161, "step": 2714 }, { "epoch": 0.45, "grad_norm": 0.9105089902877808, "learning_rate": 1.9237931315590746e-05, "loss": 0.091, "step": 2715 }, { "epoch": 0.45, "grad_norm": 0.8566074967384338, "learning_rate": 1.923723976910467e-05, "loss": 0.1014, "step": 2716 }, { "epoch": 0.45, "grad_norm": 0.6959720849990845, "learning_rate": 1.9236547921426672e-05, "loss": 0.0704, "step": 2717 }, { "epoch": 0.45, "grad_norm": 0.9083656668663025, "learning_rate": 1.9235855772579324e-05, "loss": 0.1074, "step": 2718 }, { "epoch": 0.45, "grad_norm": 0.9145970344543457, "learning_rate": 1.9235163322585187e-05, "loss": 0.099, "step": 2719 }, { "epoch": 0.45, "grad_norm": 0.8777256011962891, "learning_rate": 1.9234470571466843e-05, "loss": 0.0789, "step": 2720 }, { "epoch": 0.46, "grad_norm": 1.085461139678955, "learning_rate": 1.923377751924688e-05, "loss": 0.1122, "step": 2721 }, { "epoch": 0.46, "grad_norm": 0.9970821142196655, "learning_rate": 1.923308416594789e-05, "loss": 0.109, "step": 2722 }, { "epoch": 0.46, "grad_norm": 1.1904370784759521, "learning_rate": 1.9232390511592486e-05, "loss": 0.1129, "step": 2723 }, { "epoch": 0.46, "grad_norm": 0.909369707107544, "learning_rate": 1.9231696556203287e-05, "loss": 0.0998, "step": 2724 }, { "epoch": 0.46, "grad_norm": 0.7829056978225708, "learning_rate": 1.9231002299802915e-05, "loss": 0.0736, "step": 2725 }, { "epoch": 0.46, "grad_norm": 0.8227499127388, "learning_rate": 1.9230307742414014e-05, "loss": 0.1094, "step": 2726 }, { "epoch": 0.46, "grad_norm": 1.0483496189117432, "learning_rate": 1.9229612884059223e-05, "loss": 0.1132, "step": 2727 }, { "epoch": 0.46, "grad_norm": 0.9361321926116943, "learning_rate": 1.9228917724761205e-05, "loss": 0.0994, "step": 2728 }, { "epoch": 0.46, "grad_norm": 0.918032169342041, "learning_rate": 1.9228222264542623e-05, "loss": 0.1076, "step": 2729 }, { "epoch": 0.46, "grad_norm": 1.1061584949493408, "learning_rate": 1.9227526503426154e-05, "loss": 0.127, "step": 2730 }, { "epoch": 0.46, "grad_norm": 0.8928791284561157, "learning_rate": 1.922683044143449e-05, "loss": 0.1103, "step": 2731 }, { "epoch": 0.46, "grad_norm": 1.10958993434906, "learning_rate": 1.9226134078590314e-05, "loss": 0.0985, "step": 2732 }, { "epoch": 0.46, "grad_norm": 0.9226235151290894, "learning_rate": 1.922543741491634e-05, "loss": 0.0951, "step": 2733 }, { "epoch": 0.46, "grad_norm": 1.5340250730514526, "learning_rate": 1.9224740450435288e-05, "loss": 0.0907, "step": 2734 }, { "epoch": 0.46, "grad_norm": 1.086531639099121, "learning_rate": 1.9224043185169875e-05, "loss": 0.109, "step": 2735 }, { "epoch": 0.46, "grad_norm": 0.9949246048927307, "learning_rate": 1.9223345619142842e-05, "loss": 0.0974, "step": 2736 }, { "epoch": 0.46, "grad_norm": 1.1045252084732056, "learning_rate": 1.922264775237693e-05, "loss": 0.0898, "step": 2737 }, { "epoch": 0.46, "grad_norm": 0.8847085237503052, "learning_rate": 1.9221949584894895e-05, "loss": 0.1066, "step": 2738 }, { "epoch": 0.46, "grad_norm": 1.559290885925293, "learning_rate": 1.9221251116719505e-05, "loss": 0.0991, "step": 2739 }, { "epoch": 0.46, "grad_norm": 0.8365925550460815, "learning_rate": 1.9220552347873527e-05, "loss": 0.0731, "step": 2740 }, { "epoch": 0.46, "grad_norm": 0.8979273438453674, "learning_rate": 1.9219853278379753e-05, "loss": 0.0886, "step": 2741 }, { "epoch": 0.46, "grad_norm": 0.7704338431358337, "learning_rate": 1.9219153908260973e-05, "loss": 0.0996, "step": 2742 }, { "epoch": 0.46, "grad_norm": 1.0891473293304443, "learning_rate": 1.9218454237539995e-05, "loss": 0.0922, "step": 2743 }, { "epoch": 0.46, "grad_norm": 0.9408734440803528, "learning_rate": 1.9217754266239625e-05, "loss": 0.1059, "step": 2744 }, { "epoch": 0.46, "grad_norm": 1.3126897811889648, "learning_rate": 1.921705399438269e-05, "loss": 0.125, "step": 2745 }, { "epoch": 0.46, "grad_norm": 1.4030601978302002, "learning_rate": 1.921635342199203e-05, "loss": 0.1115, "step": 2746 }, { "epoch": 0.46, "grad_norm": 0.9543119072914124, "learning_rate": 1.9215652549090475e-05, "loss": 0.1131, "step": 2747 }, { "epoch": 0.46, "grad_norm": 1.57988440990448, "learning_rate": 1.921495137570089e-05, "loss": 0.1002, "step": 2748 }, { "epoch": 0.46, "grad_norm": 1.1320985555648804, "learning_rate": 1.921424990184613e-05, "loss": 0.1285, "step": 2749 }, { "epoch": 0.46, "grad_norm": 0.9171133637428284, "learning_rate": 1.9213548127549074e-05, "loss": 0.0987, "step": 2750 }, { "epoch": 0.46, "grad_norm": 1.0398648977279663, "learning_rate": 1.9212846052832595e-05, "loss": 0.1117, "step": 2751 }, { "epoch": 0.46, "grad_norm": 1.1039676666259766, "learning_rate": 1.9212143677719595e-05, "loss": 0.1147, "step": 2752 }, { "epoch": 0.46, "grad_norm": 0.8077039122581482, "learning_rate": 1.9211441002232968e-05, "loss": 0.0993, "step": 2753 }, { "epoch": 0.46, "grad_norm": 0.8384808897972107, "learning_rate": 1.9210738026395633e-05, "loss": 0.1275, "step": 2754 }, { "epoch": 0.46, "grad_norm": 0.954816460609436, "learning_rate": 1.9210034750230502e-05, "loss": 0.0838, "step": 2755 }, { "epoch": 0.46, "grad_norm": 0.9605753421783447, "learning_rate": 1.9209331173760516e-05, "loss": 0.0953, "step": 2756 }, { "epoch": 0.46, "grad_norm": 1.0345336198806763, "learning_rate": 1.9208627297008608e-05, "loss": 0.1167, "step": 2757 }, { "epoch": 0.46, "grad_norm": 1.148053526878357, "learning_rate": 1.9207923119997734e-05, "loss": 0.1005, "step": 2758 }, { "epoch": 0.46, "grad_norm": 0.8584249019622803, "learning_rate": 1.9207218642750854e-05, "loss": 0.1088, "step": 2759 }, { "epoch": 0.46, "grad_norm": 0.918571949005127, "learning_rate": 1.920651386529093e-05, "loss": 0.1083, "step": 2760 }, { "epoch": 0.46, "grad_norm": 0.8759031295776367, "learning_rate": 1.920580878764096e-05, "loss": 0.0922, "step": 2761 }, { "epoch": 0.46, "grad_norm": 0.7409655451774597, "learning_rate": 1.9205103409823917e-05, "loss": 0.0857, "step": 2762 }, { "epoch": 0.46, "grad_norm": 1.0358669757843018, "learning_rate": 1.920439773186281e-05, "loss": 0.0942, "step": 2763 }, { "epoch": 0.46, "grad_norm": 0.916300892829895, "learning_rate": 1.9203691753780643e-05, "loss": 0.0989, "step": 2764 }, { "epoch": 0.46, "grad_norm": 1.3329927921295166, "learning_rate": 1.920298547560044e-05, "loss": 0.1029, "step": 2765 }, { "epoch": 0.46, "grad_norm": 0.7961259484291077, "learning_rate": 1.9202278897345224e-05, "loss": 0.0751, "step": 2766 }, { "epoch": 0.46, "grad_norm": 0.986568033695221, "learning_rate": 1.920157201903804e-05, "loss": 0.1072, "step": 2767 }, { "epoch": 0.46, "grad_norm": 1.069855809211731, "learning_rate": 1.9200864840701938e-05, "loss": 0.1082, "step": 2768 }, { "epoch": 0.46, "grad_norm": 1.071987509727478, "learning_rate": 1.9200157362359972e-05, "loss": 0.0911, "step": 2769 }, { "epoch": 0.46, "grad_norm": 1.0490660667419434, "learning_rate": 1.919944958403521e-05, "loss": 0.093, "step": 2770 }, { "epoch": 0.46, "grad_norm": 0.8291184902191162, "learning_rate": 1.9198741505750727e-05, "loss": 0.0862, "step": 2771 }, { "epoch": 0.46, "grad_norm": 0.7650600671768188, "learning_rate": 1.9198033127529618e-05, "loss": 0.0817, "step": 2772 }, { "epoch": 0.46, "grad_norm": 1.1829209327697754, "learning_rate": 1.919732444939498e-05, "loss": 0.0927, "step": 2773 }, { "epoch": 0.46, "grad_norm": 0.8715455532073975, "learning_rate": 1.9196615471369915e-05, "loss": 0.0859, "step": 2774 }, { "epoch": 0.46, "grad_norm": 1.199561595916748, "learning_rate": 1.9195906193477545e-05, "loss": 0.1146, "step": 2775 }, { "epoch": 0.46, "grad_norm": 0.9113649725914001, "learning_rate": 1.9195196615740993e-05, "loss": 0.092, "step": 2776 }, { "epoch": 0.46, "grad_norm": 1.0541609525680542, "learning_rate": 1.91944867381834e-05, "loss": 0.1035, "step": 2777 }, { "epoch": 0.46, "grad_norm": 0.9699761271476746, "learning_rate": 1.9193776560827907e-05, "loss": 0.1015, "step": 2778 }, { "epoch": 0.46, "grad_norm": 1.6488746404647827, "learning_rate": 1.9193066083697678e-05, "loss": 0.1124, "step": 2779 }, { "epoch": 0.46, "grad_norm": 1.169474720954895, "learning_rate": 1.9192355306815872e-05, "loss": 0.0776, "step": 2780 }, { "epoch": 0.47, "grad_norm": 1.0550117492675781, "learning_rate": 1.9191644230205667e-05, "loss": 0.1081, "step": 2781 }, { "epoch": 0.47, "grad_norm": 1.2757929563522339, "learning_rate": 1.919093285389025e-05, "loss": 0.1217, "step": 2782 }, { "epoch": 0.47, "grad_norm": 1.5725783109664917, "learning_rate": 1.919022117789281e-05, "loss": 0.1003, "step": 2783 }, { "epoch": 0.47, "grad_norm": 1.0188770294189453, "learning_rate": 1.918950920223656e-05, "loss": 0.1139, "step": 2784 }, { "epoch": 0.47, "grad_norm": 0.9469914436340332, "learning_rate": 1.9188796926944713e-05, "loss": 0.0993, "step": 2785 }, { "epoch": 0.47, "grad_norm": 0.961247444152832, "learning_rate": 1.9188084352040493e-05, "loss": 0.1139, "step": 2786 }, { "epoch": 0.47, "grad_norm": 0.9384210109710693, "learning_rate": 1.9187371477547133e-05, "loss": 0.1203, "step": 2787 }, { "epoch": 0.47, "grad_norm": 0.9540103077888489, "learning_rate": 1.918665830348788e-05, "loss": 0.0989, "step": 2788 }, { "epoch": 0.47, "grad_norm": 1.1810011863708496, "learning_rate": 1.9185944829885984e-05, "loss": 0.1333, "step": 2789 }, { "epoch": 0.47, "grad_norm": 1.1016125679016113, "learning_rate": 1.918523105676471e-05, "loss": 0.0706, "step": 2790 }, { "epoch": 0.47, "grad_norm": 0.923664927482605, "learning_rate": 1.9184516984147333e-05, "loss": 0.0992, "step": 2791 }, { "epoch": 0.47, "grad_norm": 0.9561367034912109, "learning_rate": 1.9183802612057137e-05, "loss": 0.1142, "step": 2792 }, { "epoch": 0.47, "grad_norm": 1.1330760717391968, "learning_rate": 1.9183087940517413e-05, "loss": 0.0928, "step": 2793 }, { "epoch": 0.47, "grad_norm": 0.8411343097686768, "learning_rate": 1.9182372969551463e-05, "loss": 0.0826, "step": 2794 }, { "epoch": 0.47, "grad_norm": 0.9925259351730347, "learning_rate": 1.9181657699182603e-05, "loss": 0.1114, "step": 2795 }, { "epoch": 0.47, "grad_norm": 0.9903892874717712, "learning_rate": 1.9180942129434152e-05, "loss": 0.1175, "step": 2796 }, { "epoch": 0.47, "grad_norm": 0.7548855543136597, "learning_rate": 1.9180226260329445e-05, "loss": 0.0629, "step": 2797 }, { "epoch": 0.47, "grad_norm": 1.0616008043289185, "learning_rate": 1.917951009189182e-05, "loss": 0.1101, "step": 2798 }, { "epoch": 0.47, "grad_norm": 0.8926742672920227, "learning_rate": 1.917879362414463e-05, "loss": 0.0848, "step": 2799 }, { "epoch": 0.47, "grad_norm": 1.131813645362854, "learning_rate": 1.917807685711124e-05, "loss": 0.1144, "step": 2800 }, { "epoch": 0.47, "grad_norm": 1.0450266599655151, "learning_rate": 1.9177359790815013e-05, "loss": 0.1013, "step": 2801 }, { "epoch": 0.47, "grad_norm": 1.1581873893737793, "learning_rate": 1.9176642425279343e-05, "loss": 0.1136, "step": 2802 }, { "epoch": 0.47, "grad_norm": 0.9551581144332886, "learning_rate": 1.9175924760527603e-05, "loss": 0.0989, "step": 2803 }, { "epoch": 0.47, "grad_norm": 0.9485329985618591, "learning_rate": 1.917520679658321e-05, "loss": 0.0849, "step": 2804 }, { "epoch": 0.47, "grad_norm": 0.8796623349189758, "learning_rate": 1.9174488533469564e-05, "loss": 0.0887, "step": 2805 }, { "epoch": 0.47, "grad_norm": 0.8693220019340515, "learning_rate": 1.917376997121009e-05, "loss": 0.0817, "step": 2806 }, { "epoch": 0.47, "grad_norm": 1.0383890867233276, "learning_rate": 1.9173051109828213e-05, "loss": 0.1066, "step": 2807 }, { "epoch": 0.47, "grad_norm": 0.9347425699234009, "learning_rate": 1.9172331949347376e-05, "loss": 0.1161, "step": 2808 }, { "epoch": 0.47, "grad_norm": 0.8164317607879639, "learning_rate": 1.9171612489791026e-05, "loss": 0.0724, "step": 2809 }, { "epoch": 0.47, "grad_norm": 1.0430341958999634, "learning_rate": 1.9170892731182626e-05, "loss": 0.0956, "step": 2810 }, { "epoch": 0.47, "grad_norm": 0.9046928286552429, "learning_rate": 1.917017267354564e-05, "loss": 0.0951, "step": 2811 }, { "epoch": 0.47, "grad_norm": 1.1531801223754883, "learning_rate": 1.916945231690355e-05, "loss": 0.0964, "step": 2812 }, { "epoch": 0.47, "grad_norm": 0.8388567566871643, "learning_rate": 1.9168731661279838e-05, "loss": 0.0829, "step": 2813 }, { "epoch": 0.47, "grad_norm": 1.0621479749679565, "learning_rate": 1.9168010706698007e-05, "loss": 0.1054, "step": 2814 }, { "epoch": 0.47, "grad_norm": 0.737916111946106, "learning_rate": 1.9167289453181565e-05, "loss": 0.0947, "step": 2815 }, { "epoch": 0.47, "grad_norm": 1.2203110456466675, "learning_rate": 1.9166567900754027e-05, "loss": 0.1094, "step": 2816 }, { "epoch": 0.47, "grad_norm": 0.841580331325531, "learning_rate": 1.9165846049438922e-05, "loss": 0.0907, "step": 2817 }, { "epoch": 0.47, "grad_norm": 0.8049021363258362, "learning_rate": 1.9165123899259786e-05, "loss": 0.1099, "step": 2818 }, { "epoch": 0.47, "grad_norm": 1.6855182647705078, "learning_rate": 1.9164401450240165e-05, "loss": 0.1082, "step": 2819 }, { "epoch": 0.47, "grad_norm": 0.9045683145523071, "learning_rate": 1.9163678702403617e-05, "loss": 0.1013, "step": 2820 }, { "epoch": 0.47, "grad_norm": 0.9203464388847351, "learning_rate": 1.9162955655773707e-05, "loss": 0.0847, "step": 2821 }, { "epoch": 0.47, "grad_norm": 1.0215749740600586, "learning_rate": 1.916223231037401e-05, "loss": 0.0918, "step": 2822 }, { "epoch": 0.47, "grad_norm": 1.075278878211975, "learning_rate": 1.9161508666228115e-05, "loss": 0.1293, "step": 2823 }, { "epoch": 0.47, "grad_norm": 1.1931707859039307, "learning_rate": 1.9160784723359612e-05, "loss": 0.1524, "step": 2824 }, { "epoch": 0.47, "grad_norm": 0.9597657918930054, "learning_rate": 1.916006048179211e-05, "loss": 0.0974, "step": 2825 }, { "epoch": 0.47, "grad_norm": 0.8697646856307983, "learning_rate": 1.9159335941549228e-05, "loss": 0.1048, "step": 2826 }, { "epoch": 0.47, "grad_norm": 1.0181488990783691, "learning_rate": 1.915861110265458e-05, "loss": 0.1218, "step": 2827 }, { "epoch": 0.47, "grad_norm": 1.631185531616211, "learning_rate": 1.9157885965131804e-05, "loss": 0.1207, "step": 2828 }, { "epoch": 0.47, "grad_norm": 1.1632550954818726, "learning_rate": 1.9157160529004548e-05, "loss": 0.0975, "step": 2829 }, { "epoch": 0.47, "grad_norm": 0.7924829721450806, "learning_rate": 1.9156434794296464e-05, "loss": 0.0822, "step": 2830 }, { "epoch": 0.47, "grad_norm": 0.7512032985687256, "learning_rate": 1.9155708761031214e-05, "loss": 0.0766, "step": 2831 }, { "epoch": 0.47, "grad_norm": 0.8876598477363586, "learning_rate": 1.9154982429232475e-05, "loss": 0.0879, "step": 2832 }, { "epoch": 0.47, "grad_norm": 1.0431652069091797, "learning_rate": 1.9154255798923923e-05, "loss": 0.1021, "step": 2833 }, { "epoch": 0.47, "grad_norm": 2.9713382720947266, "learning_rate": 1.9153528870129258e-05, "loss": 0.0801, "step": 2834 }, { "epoch": 0.47, "grad_norm": 0.825635552406311, "learning_rate": 1.9152801642872176e-05, "loss": 0.1048, "step": 2835 }, { "epoch": 0.47, "grad_norm": 1.1727838516235352, "learning_rate": 1.9152074117176395e-05, "loss": 0.094, "step": 2836 }, { "epoch": 0.47, "grad_norm": 0.6911177635192871, "learning_rate": 1.9151346293065634e-05, "loss": 0.0681, "step": 2837 }, { "epoch": 0.47, "grad_norm": 1.3063609600067139, "learning_rate": 1.9150618170563625e-05, "loss": 0.0999, "step": 2838 }, { "epoch": 0.47, "grad_norm": 1.0328967571258545, "learning_rate": 1.9149889749694107e-05, "loss": 0.096, "step": 2839 }, { "epoch": 0.47, "grad_norm": 1.243003010749817, "learning_rate": 1.914916103048084e-05, "loss": 0.1388, "step": 2840 }, { "epoch": 0.48, "grad_norm": 1.0451725721359253, "learning_rate": 1.914843201294757e-05, "loss": 0.0801, "step": 2841 }, { "epoch": 0.48, "grad_norm": 1.034160852432251, "learning_rate": 1.9147702697118082e-05, "loss": 0.0823, "step": 2842 }, { "epoch": 0.48, "grad_norm": 0.888494610786438, "learning_rate": 1.914697308301615e-05, "loss": 0.0791, "step": 2843 }, { "epoch": 0.48, "grad_norm": 1.1537851095199585, "learning_rate": 1.9146243170665562e-05, "loss": 0.1147, "step": 2844 }, { "epoch": 0.48, "grad_norm": 0.7648332118988037, "learning_rate": 1.914551296009012e-05, "loss": 0.0831, "step": 2845 }, { "epoch": 0.48, "grad_norm": 0.8638182282447815, "learning_rate": 1.9144782451313633e-05, "loss": 0.1003, "step": 2846 }, { "epoch": 0.48, "grad_norm": 0.8959183692932129, "learning_rate": 1.914405164435992e-05, "loss": 0.1055, "step": 2847 }, { "epoch": 0.48, "grad_norm": 0.93858802318573, "learning_rate": 1.914332053925281e-05, "loss": 0.1034, "step": 2848 }, { "epoch": 0.48, "grad_norm": 1.0000245571136475, "learning_rate": 1.9142589136016143e-05, "loss": 0.1144, "step": 2849 }, { "epoch": 0.48, "grad_norm": 0.9786055684089661, "learning_rate": 1.9141857434673764e-05, "loss": 0.1077, "step": 2850 }, { "epoch": 0.48, "grad_norm": 0.9917508959770203, "learning_rate": 1.9141125435249534e-05, "loss": 0.096, "step": 2851 }, { "epoch": 0.48, "grad_norm": 1.2967808246612549, "learning_rate": 1.9140393137767325e-05, "loss": 0.101, "step": 2852 }, { "epoch": 0.48, "grad_norm": 1.003379464149475, "learning_rate": 1.9139660542251004e-05, "loss": 0.1192, "step": 2853 }, { "epoch": 0.48, "grad_norm": 1.0592671632766724, "learning_rate": 1.9138927648724467e-05, "loss": 0.1181, "step": 2854 }, { "epoch": 0.48, "grad_norm": 1.0009328126907349, "learning_rate": 1.9138194457211603e-05, "loss": 0.0919, "step": 2855 }, { "epoch": 0.48, "grad_norm": 1.207688331604004, "learning_rate": 1.9137460967736326e-05, "loss": 0.099, "step": 2856 }, { "epoch": 0.48, "grad_norm": 1.1319352388381958, "learning_rate": 1.913672718032255e-05, "loss": 0.1336, "step": 2857 }, { "epoch": 0.48, "grad_norm": 0.8009436726570129, "learning_rate": 1.91359930949942e-05, "loss": 0.1075, "step": 2858 }, { "epoch": 0.48, "grad_norm": 0.8808671236038208, "learning_rate": 1.9135258711775214e-05, "loss": 0.0954, "step": 2859 }, { "epoch": 0.48, "grad_norm": 0.8084663152694702, "learning_rate": 1.9134524030689535e-05, "loss": 0.1061, "step": 2860 }, { "epoch": 0.48, "grad_norm": 0.8790119290351868, "learning_rate": 1.9133789051761122e-05, "loss": 0.1004, "step": 2861 }, { "epoch": 0.48, "grad_norm": 0.9040923118591309, "learning_rate": 1.9133053775013933e-05, "loss": 0.1006, "step": 2862 }, { "epoch": 0.48, "grad_norm": 1.0613903999328613, "learning_rate": 1.913231820047195e-05, "loss": 0.0992, "step": 2863 }, { "epoch": 0.48, "grad_norm": 1.4827637672424316, "learning_rate": 1.913158232815915e-05, "loss": 0.1087, "step": 2864 }, { "epoch": 0.48, "grad_norm": 0.8070435523986816, "learning_rate": 1.9130846158099535e-05, "loss": 0.0734, "step": 2865 }, { "epoch": 0.48, "grad_norm": 0.9463756084442139, "learning_rate": 1.9130109690317103e-05, "loss": 0.093, "step": 2866 }, { "epoch": 0.48, "grad_norm": 1.0830066204071045, "learning_rate": 1.9129372924835868e-05, "loss": 0.0983, "step": 2867 }, { "epoch": 0.48, "grad_norm": 1.2170711755752563, "learning_rate": 1.912863586167986e-05, "loss": 0.0924, "step": 2868 }, { "epoch": 0.48, "grad_norm": 1.0764251947402954, "learning_rate": 1.91278985008731e-05, "loss": 0.1051, "step": 2869 }, { "epoch": 0.48, "grad_norm": 1.2048460245132446, "learning_rate": 1.912716084243964e-05, "loss": 0.1007, "step": 2870 }, { "epoch": 0.48, "grad_norm": 0.9061402678489685, "learning_rate": 1.912642288640353e-05, "loss": 0.0705, "step": 2871 }, { "epoch": 0.48, "grad_norm": 0.759423553943634, "learning_rate": 1.912568463278883e-05, "loss": 0.0779, "step": 2872 }, { "epoch": 0.48, "grad_norm": 0.8616987466812134, "learning_rate": 1.9124946081619613e-05, "loss": 0.0806, "step": 2873 }, { "epoch": 0.48, "grad_norm": 0.9702621698379517, "learning_rate": 1.912420723291996e-05, "loss": 0.1038, "step": 2874 }, { "epoch": 0.48, "grad_norm": 0.7841331362724304, "learning_rate": 1.9123468086713965e-05, "loss": 0.0896, "step": 2875 }, { "epoch": 0.48, "grad_norm": 1.1410051584243774, "learning_rate": 1.912272864302572e-05, "loss": 0.0757, "step": 2876 }, { "epoch": 0.48, "grad_norm": 0.7357247471809387, "learning_rate": 1.912198890187935e-05, "loss": 0.076, "step": 2877 }, { "epoch": 0.48, "grad_norm": 0.9000664949417114, "learning_rate": 1.9121248863298962e-05, "loss": 0.0924, "step": 2878 }, { "epoch": 0.48, "grad_norm": 0.7741978168487549, "learning_rate": 1.9120508527308692e-05, "loss": 0.0774, "step": 2879 }, { "epoch": 0.48, "grad_norm": 1.058538556098938, "learning_rate": 1.911976789393268e-05, "loss": 0.0872, "step": 2880 }, { "epoch": 0.48, "grad_norm": 0.7465929388999939, "learning_rate": 1.911902696319507e-05, "loss": 0.0972, "step": 2881 }, { "epoch": 0.48, "grad_norm": 0.9523395299911499, "learning_rate": 1.911828573512003e-05, "loss": 0.1011, "step": 2882 }, { "epoch": 0.48, "grad_norm": 0.8465768098831177, "learning_rate": 1.911754420973172e-05, "loss": 0.0745, "step": 2883 }, { "epoch": 0.48, "grad_norm": 0.8932531476020813, "learning_rate": 1.911680238705432e-05, "loss": 0.096, "step": 2884 }, { "epoch": 0.48, "grad_norm": 0.9936158061027527, "learning_rate": 1.911606026711202e-05, "loss": 0.1099, "step": 2885 }, { "epoch": 0.48, "grad_norm": 1.0234200954437256, "learning_rate": 1.911531784992902e-05, "loss": 0.0925, "step": 2886 }, { "epoch": 0.48, "grad_norm": 1.2001069784164429, "learning_rate": 1.9114575135529527e-05, "loss": 0.0941, "step": 2887 }, { "epoch": 0.48, "grad_norm": 0.9824643731117249, "learning_rate": 1.9113832123937753e-05, "loss": 0.1022, "step": 2888 }, { "epoch": 0.48, "grad_norm": 0.8818870186805725, "learning_rate": 1.911308881517793e-05, "loss": 0.1104, "step": 2889 }, { "epoch": 0.48, "grad_norm": 0.8673105835914612, "learning_rate": 1.911234520927429e-05, "loss": 0.0891, "step": 2890 }, { "epoch": 0.48, "grad_norm": 0.8947502970695496, "learning_rate": 1.9111601306251086e-05, "loss": 0.0842, "step": 2891 }, { "epoch": 0.48, "grad_norm": 0.9273698925971985, "learning_rate": 1.9110857106132564e-05, "loss": 0.0941, "step": 2892 }, { "epoch": 0.48, "grad_norm": 0.7238667607307434, "learning_rate": 1.9110112608943e-05, "loss": 0.0721, "step": 2893 }, { "epoch": 0.48, "grad_norm": 0.948512077331543, "learning_rate": 1.910936781470666e-05, "loss": 0.092, "step": 2894 }, { "epoch": 0.48, "grad_norm": 0.8347882628440857, "learning_rate": 1.9108622723447837e-05, "loss": 0.0993, "step": 2895 }, { "epoch": 0.48, "grad_norm": 0.9524086117744446, "learning_rate": 1.9107877335190823e-05, "loss": 0.0799, "step": 2896 }, { "epoch": 0.48, "grad_norm": 0.9186903834342957, "learning_rate": 1.910713164995992e-05, "loss": 0.0992, "step": 2897 }, { "epoch": 0.48, "grad_norm": 0.7868735194206238, "learning_rate": 1.9106385667779443e-05, "loss": 0.0705, "step": 2898 }, { "epoch": 0.48, "grad_norm": 0.8533288240432739, "learning_rate": 1.9105639388673718e-05, "loss": 0.097, "step": 2899 }, { "epoch": 0.49, "grad_norm": 0.9170148968696594, "learning_rate": 1.9104892812667074e-05, "loss": 0.0841, "step": 2900 }, { "epoch": 0.49, "grad_norm": 0.7956652045249939, "learning_rate": 1.9104145939783857e-05, "loss": 0.0949, "step": 2901 }, { "epoch": 0.49, "grad_norm": 0.8245270252227783, "learning_rate": 1.9103398770048422e-05, "loss": 0.1091, "step": 2902 }, { "epoch": 0.49, "grad_norm": 0.902617335319519, "learning_rate": 1.9102651303485127e-05, "loss": 0.0881, "step": 2903 }, { "epoch": 0.49, "grad_norm": 0.8114748001098633, "learning_rate": 1.9101903540118344e-05, "loss": 0.0853, "step": 2904 }, { "epoch": 0.49, "grad_norm": 0.8825427889823914, "learning_rate": 1.910115547997246e-05, "loss": 0.0905, "step": 2905 }, { "epoch": 0.49, "grad_norm": 0.7644917964935303, "learning_rate": 1.910040712307186e-05, "loss": 0.0829, "step": 2906 }, { "epoch": 0.49, "grad_norm": 1.1186211109161377, "learning_rate": 1.909965846944095e-05, "loss": 0.1424, "step": 2907 }, { "epoch": 0.49, "grad_norm": 0.9662491083145142, "learning_rate": 1.909890951910414e-05, "loss": 0.1174, "step": 2908 }, { "epoch": 0.49, "grad_norm": 2.757324457168579, "learning_rate": 1.909816027208585e-05, "loss": 0.1073, "step": 2909 }, { "epoch": 0.49, "grad_norm": 0.7854401469230652, "learning_rate": 1.9097410728410505e-05, "loss": 0.0916, "step": 2910 }, { "epoch": 0.49, "grad_norm": 0.8878661394119263, "learning_rate": 1.9096660888102553e-05, "loss": 0.0984, "step": 2911 }, { "epoch": 0.49, "grad_norm": 1.0182604789733887, "learning_rate": 1.909591075118644e-05, "loss": 0.0924, "step": 2912 }, { "epoch": 0.49, "grad_norm": 1.0613781213760376, "learning_rate": 1.9095160317686626e-05, "loss": 0.1154, "step": 2913 }, { "epoch": 0.49, "grad_norm": 1.1894972324371338, "learning_rate": 1.909440958762758e-05, "loss": 0.0933, "step": 2914 }, { "epoch": 0.49, "grad_norm": 1.1962475776672363, "learning_rate": 1.9093658561033775e-05, "loss": 0.0857, "step": 2915 }, { "epoch": 0.49, "grad_norm": 1.016413927078247, "learning_rate": 1.9092907237929706e-05, "loss": 0.0912, "step": 2916 }, { "epoch": 0.49, "grad_norm": 0.9326044917106628, "learning_rate": 1.9092155618339866e-05, "loss": 0.1014, "step": 2917 }, { "epoch": 0.49, "grad_norm": 0.9929134249687195, "learning_rate": 1.909140370228877e-05, "loss": 0.1025, "step": 2918 }, { "epoch": 0.49, "grad_norm": 1.1746569871902466, "learning_rate": 1.9090651489800928e-05, "loss": 0.1316, "step": 2919 }, { "epoch": 0.49, "grad_norm": 0.7652073502540588, "learning_rate": 1.9089898980900866e-05, "loss": 0.0944, "step": 2920 }, { "epoch": 0.49, "grad_norm": 1.1127278804779053, "learning_rate": 1.9089146175613128e-05, "loss": 0.0938, "step": 2921 }, { "epoch": 0.49, "grad_norm": 0.9481162428855896, "learning_rate": 1.9088393073962252e-05, "loss": 0.0667, "step": 2922 }, { "epoch": 0.49, "grad_norm": 1.3163388967514038, "learning_rate": 1.90876396759728e-05, "loss": 0.1196, "step": 2923 }, { "epoch": 0.49, "grad_norm": 1.0075286626815796, "learning_rate": 1.9086885981669333e-05, "loss": 0.1049, "step": 2924 }, { "epoch": 0.49, "grad_norm": 0.9560242891311646, "learning_rate": 1.9086131991076428e-05, "loss": 0.1123, "step": 2925 }, { "epoch": 0.49, "grad_norm": 1.2103466987609863, "learning_rate": 1.908537770421867e-05, "loss": 0.1048, "step": 2926 }, { "epoch": 0.49, "grad_norm": 0.9635504484176636, "learning_rate": 1.908462312112066e-05, "loss": 0.1063, "step": 2927 }, { "epoch": 0.49, "grad_norm": 0.9675124883651733, "learning_rate": 1.908386824180699e-05, "loss": 0.1039, "step": 2928 }, { "epoch": 0.49, "grad_norm": 1.096391201019287, "learning_rate": 1.908311306630228e-05, "loss": 0.111, "step": 2929 }, { "epoch": 0.49, "grad_norm": 0.9240862727165222, "learning_rate": 1.908235759463115e-05, "loss": 0.1188, "step": 2930 }, { "epoch": 0.49, "grad_norm": 0.9190300107002258, "learning_rate": 1.908160182681824e-05, "loss": 0.1141, "step": 2931 }, { "epoch": 0.49, "grad_norm": 0.9818776845932007, "learning_rate": 1.9080845762888188e-05, "loss": 0.1031, "step": 2932 }, { "epoch": 0.49, "grad_norm": 0.7695561051368713, "learning_rate": 1.9080089402865648e-05, "loss": 0.0907, "step": 2933 }, { "epoch": 0.49, "grad_norm": 1.620926022529602, "learning_rate": 1.9079332746775278e-05, "loss": 0.1021, "step": 2934 }, { "epoch": 0.49, "grad_norm": 0.8236085176467896, "learning_rate": 1.9078575794641756e-05, "loss": 0.0987, "step": 2935 }, { "epoch": 0.49, "grad_norm": 0.7482346892356873, "learning_rate": 1.907781854648976e-05, "loss": 0.0991, "step": 2936 }, { "epoch": 0.49, "grad_norm": 1.2130845785140991, "learning_rate": 1.9077061002343978e-05, "loss": 0.1022, "step": 2937 }, { "epoch": 0.49, "grad_norm": 0.9019642472267151, "learning_rate": 1.907630316222912e-05, "loss": 0.1163, "step": 2938 }, { "epoch": 0.49, "grad_norm": 0.8655699491500854, "learning_rate": 1.9075545026169886e-05, "loss": 0.1008, "step": 2939 }, { "epoch": 0.49, "grad_norm": 1.3332158327102661, "learning_rate": 1.9074786594191e-05, "loss": 0.1113, "step": 2940 }, { "epoch": 0.49, "grad_norm": 0.9732233881950378, "learning_rate": 1.9074027866317195e-05, "loss": 0.1003, "step": 2941 }, { "epoch": 0.49, "grad_norm": 1.0333153009414673, "learning_rate": 1.907326884257321e-05, "loss": 0.1094, "step": 2942 }, { "epoch": 0.49, "grad_norm": 1.04940664768219, "learning_rate": 1.9072509522983785e-05, "loss": 0.1078, "step": 2943 }, { "epoch": 0.49, "grad_norm": 0.9779009222984314, "learning_rate": 1.907174990757369e-05, "loss": 0.1078, "step": 2944 }, { "epoch": 0.49, "grad_norm": 0.9772958755493164, "learning_rate": 1.9070989996367682e-05, "loss": 0.0957, "step": 2945 }, { "epoch": 0.49, "grad_norm": 0.9680584669113159, "learning_rate": 1.9070229789390548e-05, "loss": 0.1134, "step": 2946 }, { "epoch": 0.49, "grad_norm": 0.9260647296905518, "learning_rate": 1.9069469286667072e-05, "loss": 0.0819, "step": 2947 }, { "epoch": 0.49, "grad_norm": 1.0880300998687744, "learning_rate": 1.9068708488222053e-05, "loss": 0.0887, "step": 2948 }, { "epoch": 0.49, "grad_norm": 0.7501139044761658, "learning_rate": 1.9067947394080296e-05, "loss": 0.0778, "step": 2949 }, { "epoch": 0.49, "grad_norm": 0.7186444401741028, "learning_rate": 1.9067186004266614e-05, "loss": 0.0876, "step": 2950 }, { "epoch": 0.49, "grad_norm": 0.8850650191307068, "learning_rate": 1.9066424318805844e-05, "loss": 0.0867, "step": 2951 }, { "epoch": 0.49, "grad_norm": 0.8685157299041748, "learning_rate": 1.906566233772281e-05, "loss": 0.114, "step": 2952 }, { "epoch": 0.49, "grad_norm": 0.9641095399856567, "learning_rate": 1.9064900061042362e-05, "loss": 0.0897, "step": 2953 }, { "epoch": 0.49, "grad_norm": 0.7166152596473694, "learning_rate": 1.9064137488789355e-05, "loss": 0.083, "step": 2954 }, { "epoch": 0.49, "grad_norm": 1.0751569271087646, "learning_rate": 1.9063374620988656e-05, "loss": 0.0751, "step": 2955 }, { "epoch": 0.49, "grad_norm": 0.8347177505493164, "learning_rate": 1.906261145766514e-05, "loss": 0.0823, "step": 2956 }, { "epoch": 0.49, "grad_norm": 1.2131943702697754, "learning_rate": 1.906184799884368e-05, "loss": 0.0934, "step": 2957 }, { "epoch": 0.49, "grad_norm": 1.0153748989105225, "learning_rate": 1.9061084244549182e-05, "loss": 0.0924, "step": 2958 }, { "epoch": 0.49, "grad_norm": 1.4745609760284424, "learning_rate": 1.9060320194806542e-05, "loss": 0.0957, "step": 2959 }, { "epoch": 0.5, "grad_norm": 0.8365800976753235, "learning_rate": 1.9059555849640677e-05, "loss": 0.1045, "step": 2960 }, { "epoch": 0.5, "grad_norm": 0.9551053643226624, "learning_rate": 1.9058791209076507e-05, "loss": 0.102, "step": 2961 }, { "epoch": 0.5, "grad_norm": 0.9767804741859436, "learning_rate": 1.9058026273138964e-05, "loss": 0.0967, "step": 2962 }, { "epoch": 0.5, "grad_norm": 1.0167980194091797, "learning_rate": 1.9057261041852994e-05, "loss": 0.1049, "step": 2963 }, { "epoch": 0.5, "grad_norm": 0.9033505320549011, "learning_rate": 1.9056495515243544e-05, "loss": 0.0803, "step": 2964 }, { "epoch": 0.5, "grad_norm": 0.9875251650810242, "learning_rate": 1.9055729693335574e-05, "loss": 0.0938, "step": 2965 }, { "epoch": 0.5, "grad_norm": 1.1324621438980103, "learning_rate": 1.905496357615406e-05, "loss": 0.1017, "step": 2966 }, { "epoch": 0.5, "grad_norm": 0.8910234570503235, "learning_rate": 1.9054197163723974e-05, "loss": 0.0894, "step": 2967 }, { "epoch": 0.5, "grad_norm": 0.9648171663284302, "learning_rate": 1.905343045607031e-05, "loss": 0.0904, "step": 2968 }, { "epoch": 0.5, "grad_norm": 0.8404733538627625, "learning_rate": 1.905266345321807e-05, "loss": 0.0937, "step": 2969 }, { "epoch": 0.5, "grad_norm": 1.1153781414031982, "learning_rate": 1.905189615519226e-05, "loss": 0.1022, "step": 2970 }, { "epoch": 0.5, "grad_norm": 1.0482194423675537, "learning_rate": 1.9051128562017905e-05, "loss": 0.0991, "step": 2971 }, { "epoch": 0.5, "grad_norm": 0.9775332808494568, "learning_rate": 1.9050360673720022e-05, "loss": 0.1022, "step": 2972 }, { "epoch": 0.5, "grad_norm": 0.7884889841079712, "learning_rate": 1.904959249032366e-05, "loss": 0.0962, "step": 2973 }, { "epoch": 0.5, "grad_norm": 1.0077706575393677, "learning_rate": 1.9048824011853855e-05, "loss": 0.1021, "step": 2974 }, { "epoch": 0.5, "grad_norm": 1.1463226079940796, "learning_rate": 1.904805523833568e-05, "loss": 0.0893, "step": 2975 }, { "epoch": 0.5, "grad_norm": 0.7813463807106018, "learning_rate": 1.9047286169794186e-05, "loss": 0.0907, "step": 2976 }, { "epoch": 0.5, "grad_norm": 0.7988889217376709, "learning_rate": 1.904651680625446e-05, "loss": 0.0695, "step": 2977 }, { "epoch": 0.5, "grad_norm": 0.8129236698150635, "learning_rate": 1.904574714774158e-05, "loss": 0.0971, "step": 2978 }, { "epoch": 0.5, "grad_norm": 0.7859541177749634, "learning_rate": 1.904497719428065e-05, "loss": 0.0916, "step": 2979 }, { "epoch": 0.5, "grad_norm": 1.1075290441513062, "learning_rate": 1.904420694589677e-05, "loss": 0.1112, "step": 2980 }, { "epoch": 0.5, "grad_norm": 0.9235920310020447, "learning_rate": 1.9043436402615058e-05, "loss": 0.1195, "step": 2981 }, { "epoch": 0.5, "grad_norm": 0.6988478302955627, "learning_rate": 1.9042665564460632e-05, "loss": 0.0961, "step": 2982 }, { "epoch": 0.5, "grad_norm": 0.926357090473175, "learning_rate": 1.9041894431458636e-05, "loss": 0.0889, "step": 2983 }, { "epoch": 0.5, "grad_norm": 0.8697753548622131, "learning_rate": 1.9041123003634208e-05, "loss": 0.091, "step": 2984 }, { "epoch": 0.5, "grad_norm": 0.9308438301086426, "learning_rate": 1.9040351281012502e-05, "loss": 0.1098, "step": 2985 }, { "epoch": 0.5, "grad_norm": 0.8741866946220398, "learning_rate": 1.903957926361868e-05, "loss": 0.1034, "step": 2986 }, { "epoch": 0.5, "grad_norm": 0.8752495050430298, "learning_rate": 1.9038806951477915e-05, "loss": 0.094, "step": 2987 }, { "epoch": 0.5, "grad_norm": 0.9231854677200317, "learning_rate": 1.9038034344615388e-05, "loss": 0.0856, "step": 2988 }, { "epoch": 0.5, "grad_norm": 1.137040138244629, "learning_rate": 1.9037261443056295e-05, "loss": 0.117, "step": 2989 }, { "epoch": 0.5, "grad_norm": 0.7948854565620422, "learning_rate": 1.9036488246825838e-05, "loss": 0.0809, "step": 2990 }, { "epoch": 0.5, "grad_norm": 0.755829930305481, "learning_rate": 1.903571475594922e-05, "loss": 0.0997, "step": 2991 }, { "epoch": 0.5, "grad_norm": 0.9511950016021729, "learning_rate": 1.903494097045167e-05, "loss": 0.0823, "step": 2992 }, { "epoch": 0.5, "grad_norm": 0.9422808885574341, "learning_rate": 1.9034166890358417e-05, "loss": 0.1078, "step": 2993 }, { "epoch": 0.5, "grad_norm": 0.6889848113059998, "learning_rate": 1.9033392515694696e-05, "loss": 0.0715, "step": 2994 }, { "epoch": 0.5, "grad_norm": 1.0377084016799927, "learning_rate": 1.9032617846485762e-05, "loss": 0.1067, "step": 2995 }, { "epoch": 0.5, "grad_norm": 0.7137747406959534, "learning_rate": 1.903184288275687e-05, "loss": 0.097, "step": 2996 }, { "epoch": 0.5, "grad_norm": 0.9940060973167419, "learning_rate": 1.9031067624533287e-05, "loss": 0.104, "step": 2997 }, { "epoch": 0.5, "grad_norm": 0.96693354845047, "learning_rate": 1.90302920718403e-05, "loss": 0.0779, "step": 2998 }, { "epoch": 0.5, "grad_norm": 0.9132030606269836, "learning_rate": 1.902951622470319e-05, "loss": 0.0801, "step": 2999 }, { "epoch": 0.5, "grad_norm": 0.8309427499771118, "learning_rate": 1.9028740083147254e-05, "loss": 0.0952, "step": 3000 }, { "epoch": 0.5, "grad_norm": 0.8072255253791809, "learning_rate": 1.90279636471978e-05, "loss": 0.1163, "step": 3001 }, { "epoch": 0.5, "grad_norm": 0.9553287625312805, "learning_rate": 1.9027186916880146e-05, "loss": 0.1049, "step": 3002 }, { "epoch": 0.5, "grad_norm": 0.9245548248291016, "learning_rate": 1.9026409892219623e-05, "loss": 0.1103, "step": 3003 }, { "epoch": 0.5, "grad_norm": 1.0496606826782227, "learning_rate": 1.902563257324156e-05, "loss": 0.0903, "step": 3004 }, { "epoch": 0.5, "grad_norm": 1.8811970949172974, "learning_rate": 1.9024854959971298e-05, "loss": 0.0933, "step": 3005 }, { "epoch": 0.5, "grad_norm": 0.8249226212501526, "learning_rate": 1.9024077052434203e-05, "loss": 0.0971, "step": 3006 }, { "epoch": 0.5, "grad_norm": 1.1939215660095215, "learning_rate": 1.9023298850655635e-05, "loss": 0.1018, "step": 3007 }, { "epoch": 0.5, "grad_norm": 0.8907766342163086, "learning_rate": 1.9022520354660966e-05, "loss": 0.0794, "step": 3008 }, { "epoch": 0.5, "grad_norm": 2.469278573989868, "learning_rate": 1.9021741564475584e-05, "loss": 0.1103, "step": 3009 }, { "epoch": 0.5, "grad_norm": 0.7768643498420715, "learning_rate": 1.902096248012488e-05, "loss": 0.0831, "step": 3010 }, { "epoch": 0.5, "grad_norm": 0.9687069058418274, "learning_rate": 1.9020183101634258e-05, "loss": 0.0998, "step": 3011 }, { "epoch": 0.5, "grad_norm": 0.7659558057785034, "learning_rate": 1.901940342902913e-05, "loss": 0.088, "step": 3012 }, { "epoch": 0.5, "grad_norm": 0.9721199870109558, "learning_rate": 1.9018623462334917e-05, "loss": 0.104, "step": 3013 }, { "epoch": 0.5, "grad_norm": 1.3016040325164795, "learning_rate": 1.9017843201577054e-05, "loss": 0.1302, "step": 3014 }, { "epoch": 0.5, "grad_norm": 1.1177150011062622, "learning_rate": 1.901706264678098e-05, "loss": 0.1017, "step": 3015 }, { "epoch": 0.5, "grad_norm": 1.0160597562789917, "learning_rate": 1.9016281797972146e-05, "loss": 0.0935, "step": 3016 }, { "epoch": 0.5, "grad_norm": 1.0772364139556885, "learning_rate": 1.9015500655176015e-05, "loss": 0.09, "step": 3017 }, { "epoch": 0.5, "grad_norm": 0.7239834666252136, "learning_rate": 1.9014719218418052e-05, "loss": 0.0778, "step": 3018 }, { "epoch": 0.5, "grad_norm": 0.953822910785675, "learning_rate": 1.9013937487723742e-05, "loss": 0.1087, "step": 3019 }, { "epoch": 0.51, "grad_norm": 0.9790655970573425, "learning_rate": 1.901315546311857e-05, "loss": 0.0927, "step": 3020 }, { "epoch": 0.51, "grad_norm": 0.9092608094215393, "learning_rate": 1.901237314462804e-05, "loss": 0.0882, "step": 3021 }, { "epoch": 0.51, "grad_norm": 1.2381983995437622, "learning_rate": 1.9011590532277658e-05, "loss": 0.0984, "step": 3022 }, { "epoch": 0.51, "grad_norm": 0.9374671578407288, "learning_rate": 1.901080762609294e-05, "loss": 0.0962, "step": 3023 }, { "epoch": 0.51, "grad_norm": 0.9715684056282043, "learning_rate": 1.9010024426099418e-05, "loss": 0.0886, "step": 3024 }, { "epoch": 0.51, "grad_norm": 1.7980945110321045, "learning_rate": 1.9009240932322623e-05, "loss": 0.0767, "step": 3025 }, { "epoch": 0.51, "grad_norm": 0.8746585845947266, "learning_rate": 1.9008457144788107e-05, "loss": 0.0799, "step": 3026 }, { "epoch": 0.51, "grad_norm": 0.8503754734992981, "learning_rate": 1.9007673063521427e-05, "loss": 0.0958, "step": 3027 }, { "epoch": 0.51, "grad_norm": 1.324777364730835, "learning_rate": 1.9006888688548146e-05, "loss": 0.0898, "step": 3028 }, { "epoch": 0.51, "grad_norm": 0.7647464275360107, "learning_rate": 1.9006104019893838e-05, "loss": 0.0856, "step": 3029 }, { "epoch": 0.51, "grad_norm": 1.19242525100708, "learning_rate": 1.9005319057584093e-05, "loss": 0.0862, "step": 3030 }, { "epoch": 0.51, "grad_norm": 1.9691845178604126, "learning_rate": 1.90045338016445e-05, "loss": 0.1198, "step": 3031 }, { "epoch": 0.51, "grad_norm": 0.8945536017417908, "learning_rate": 1.9003748252100668e-05, "loss": 0.0788, "step": 3032 }, { "epoch": 0.51, "grad_norm": 1.1524734497070312, "learning_rate": 1.900296240897821e-05, "loss": 0.0835, "step": 3033 }, { "epoch": 0.51, "grad_norm": 1.2556816339492798, "learning_rate": 1.900217627230275e-05, "loss": 0.1228, "step": 3034 }, { "epoch": 0.51, "grad_norm": 0.7693094611167908, "learning_rate": 1.9001389842099916e-05, "loss": 0.0925, "step": 3035 }, { "epoch": 0.51, "grad_norm": 1.1266478300094604, "learning_rate": 1.900060311839536e-05, "loss": 0.1042, "step": 3036 }, { "epoch": 0.51, "grad_norm": 0.9376640915870667, "learning_rate": 1.8999816101214722e-05, "loss": 0.0976, "step": 3037 }, { "epoch": 0.51, "grad_norm": 0.802176833152771, "learning_rate": 1.8999028790583672e-05, "loss": 0.1054, "step": 3038 }, { "epoch": 0.51, "grad_norm": 0.9870275259017944, "learning_rate": 1.899824118652788e-05, "loss": 0.0959, "step": 3039 }, { "epoch": 0.51, "grad_norm": 0.9944806694984436, "learning_rate": 1.8997453289073026e-05, "loss": 0.0852, "step": 3040 }, { "epoch": 0.51, "grad_norm": 1.0346572399139404, "learning_rate": 1.89966650982448e-05, "loss": 0.1152, "step": 3041 }, { "epoch": 0.51, "grad_norm": 1.8607044219970703, "learning_rate": 1.89958766140689e-05, "loss": 0.0856, "step": 3042 }, { "epoch": 0.51, "grad_norm": 1.1666909456253052, "learning_rate": 1.8995087836571045e-05, "loss": 0.0994, "step": 3043 }, { "epoch": 0.51, "grad_norm": 0.9106898307800293, "learning_rate": 1.899429876577694e-05, "loss": 0.0859, "step": 3044 }, { "epoch": 0.51, "grad_norm": 1.0287353992462158, "learning_rate": 1.8993509401712326e-05, "loss": 0.0784, "step": 3045 }, { "epoch": 0.51, "grad_norm": 1.3292690515518188, "learning_rate": 1.8992719744402933e-05, "loss": 0.102, "step": 3046 }, { "epoch": 0.51, "grad_norm": 1.253652811050415, "learning_rate": 1.8991929793874514e-05, "loss": 0.1141, "step": 3047 }, { "epoch": 0.51, "grad_norm": 1.2581772804260254, "learning_rate": 1.899113955015282e-05, "loss": 0.0859, "step": 3048 }, { "epoch": 0.51, "grad_norm": 1.0199780464172363, "learning_rate": 1.8990349013263625e-05, "loss": 0.1011, "step": 3049 }, { "epoch": 0.51, "grad_norm": 0.8999339938163757, "learning_rate": 1.8989558183232702e-05, "loss": 0.1127, "step": 3050 }, { "epoch": 0.51, "grad_norm": 1.148066759109497, "learning_rate": 1.8988767060085837e-05, "loss": 0.0878, "step": 3051 }, { "epoch": 0.51, "grad_norm": 1.0448564291000366, "learning_rate": 1.898797564384883e-05, "loss": 0.0931, "step": 3052 }, { "epoch": 0.51, "grad_norm": 1.0063081979751587, "learning_rate": 1.8987183934547477e-05, "loss": 0.1198, "step": 3053 }, { "epoch": 0.51, "grad_norm": 0.9574524164199829, "learning_rate": 1.8986391932207596e-05, "loss": 0.1147, "step": 3054 }, { "epoch": 0.51, "grad_norm": 1.0203630924224854, "learning_rate": 1.898559963685502e-05, "loss": 0.1112, "step": 3055 }, { "epoch": 0.51, "grad_norm": 1.0228270292282104, "learning_rate": 1.898480704851557e-05, "loss": 0.0964, "step": 3056 }, { "epoch": 0.51, "grad_norm": 0.7584741115570068, "learning_rate": 1.89840141672151e-05, "loss": 0.0743, "step": 3057 }, { "epoch": 0.51, "grad_norm": 1.2578188180923462, "learning_rate": 1.8983220992979454e-05, "loss": 0.0998, "step": 3058 }, { "epoch": 0.51, "grad_norm": 1.2324061393737793, "learning_rate": 1.89824275258345e-05, "loss": 0.0888, "step": 3059 }, { "epoch": 0.51, "grad_norm": 0.9310564994812012, "learning_rate": 1.8981633765806108e-05, "loss": 0.1062, "step": 3060 }, { "epoch": 0.51, "grad_norm": 1.0272109508514404, "learning_rate": 1.8980839712920164e-05, "loss": 0.0985, "step": 3061 }, { "epoch": 0.51, "grad_norm": 0.7956578731536865, "learning_rate": 1.898004536720255e-05, "loss": 0.0719, "step": 3062 }, { "epoch": 0.51, "grad_norm": 1.1519570350646973, "learning_rate": 1.8979250728679174e-05, "loss": 0.1033, "step": 3063 }, { "epoch": 0.51, "grad_norm": 0.9592735171318054, "learning_rate": 1.8978455797375944e-05, "loss": 0.1148, "step": 3064 }, { "epoch": 0.51, "grad_norm": 1.1011326313018799, "learning_rate": 1.897766057331878e-05, "loss": 0.0946, "step": 3065 }, { "epoch": 0.51, "grad_norm": 1.2045422792434692, "learning_rate": 1.897686505653361e-05, "loss": 0.123, "step": 3066 }, { "epoch": 0.51, "grad_norm": 1.36360502243042, "learning_rate": 1.8976069247046376e-05, "loss": 0.1021, "step": 3067 }, { "epoch": 0.51, "grad_norm": 0.7315461039543152, "learning_rate": 1.8975273144883024e-05, "loss": 0.0741, "step": 3068 }, { "epoch": 0.51, "grad_norm": 1.0383027791976929, "learning_rate": 1.897447675006951e-05, "loss": 0.1035, "step": 3069 }, { "epoch": 0.51, "grad_norm": 0.7000315189361572, "learning_rate": 1.8973680062631804e-05, "loss": 0.0646, "step": 3070 }, { "epoch": 0.51, "grad_norm": 0.9426497220993042, "learning_rate": 1.8972883082595886e-05, "loss": 0.0988, "step": 3071 }, { "epoch": 0.51, "grad_norm": 1.0148069858551025, "learning_rate": 1.8972085809987737e-05, "loss": 0.1166, "step": 3072 }, { "epoch": 0.51, "grad_norm": 0.9181277751922607, "learning_rate": 1.8971288244833355e-05, "loss": 0.0858, "step": 3073 }, { "epoch": 0.51, "grad_norm": 0.972951352596283, "learning_rate": 1.8970490387158747e-05, "loss": 0.0819, "step": 3074 }, { "epoch": 0.51, "grad_norm": 0.8150300979614258, "learning_rate": 1.8969692236989926e-05, "loss": 0.0957, "step": 3075 }, { "epoch": 0.51, "grad_norm": 0.9644772410392761, "learning_rate": 1.8968893794352917e-05, "loss": 0.11, "step": 3076 }, { "epoch": 0.51, "grad_norm": 0.9814803004264832, "learning_rate": 1.8968095059273758e-05, "loss": 0.1084, "step": 3077 }, { "epoch": 0.51, "grad_norm": 1.0361323356628418, "learning_rate": 1.8967296031778487e-05, "loss": 0.1039, "step": 3078 }, { "epoch": 0.51, "grad_norm": 1.2246119976043701, "learning_rate": 1.896649671189316e-05, "loss": 0.1073, "step": 3079 }, { "epoch": 0.52, "grad_norm": 0.8413578271865845, "learning_rate": 1.8965697099643843e-05, "loss": 0.096, "step": 3080 }, { "epoch": 0.52, "grad_norm": 0.8993083834648132, "learning_rate": 1.8964897195056604e-05, "loss": 0.0827, "step": 3081 }, { "epoch": 0.52, "grad_norm": 0.8152218461036682, "learning_rate": 1.8964096998157522e-05, "loss": 0.0972, "step": 3082 }, { "epoch": 0.52, "grad_norm": 0.7218074798583984, "learning_rate": 1.89632965089727e-05, "loss": 0.077, "step": 3083 }, { "epoch": 0.52, "grad_norm": 0.9672343134880066, "learning_rate": 1.8962495727528226e-05, "loss": 0.0959, "step": 3084 }, { "epoch": 0.52, "grad_norm": 0.8099420666694641, "learning_rate": 1.896169465385022e-05, "loss": 0.0921, "step": 3085 }, { "epoch": 0.52, "grad_norm": 0.8457155227661133, "learning_rate": 1.8960893287964796e-05, "loss": 0.0979, "step": 3086 }, { "epoch": 0.52, "grad_norm": 0.694741427898407, "learning_rate": 1.8960091629898088e-05, "loss": 0.0852, "step": 3087 }, { "epoch": 0.52, "grad_norm": 0.8260155320167542, "learning_rate": 1.8959289679676228e-05, "loss": 0.0924, "step": 3088 }, { "epoch": 0.52, "grad_norm": 0.8370951414108276, "learning_rate": 1.8958487437325373e-05, "loss": 0.0808, "step": 3089 }, { "epoch": 0.52, "grad_norm": 0.9096266031265259, "learning_rate": 1.895768490287168e-05, "loss": 0.1052, "step": 3090 }, { "epoch": 0.52, "grad_norm": 0.9617384672164917, "learning_rate": 1.8956882076341306e-05, "loss": 0.1059, "step": 3091 }, { "epoch": 0.52, "grad_norm": 0.8346560001373291, "learning_rate": 1.895607895776044e-05, "loss": 0.0873, "step": 3092 }, { "epoch": 0.52, "grad_norm": 0.721342921257019, "learning_rate": 1.895527554715527e-05, "loss": 0.0885, "step": 3093 }, { "epoch": 0.52, "grad_norm": 1.0122344493865967, "learning_rate": 1.895447184455198e-05, "loss": 0.1072, "step": 3094 }, { "epoch": 0.52, "grad_norm": 0.7592955827713013, "learning_rate": 1.8953667849976787e-05, "loss": 0.0866, "step": 3095 }, { "epoch": 0.52, "grad_norm": 0.9303037524223328, "learning_rate": 1.8952863563455903e-05, "loss": 0.1049, "step": 3096 }, { "epoch": 0.52, "grad_norm": 0.6367442011833191, "learning_rate": 1.8952058985015552e-05, "loss": 0.0702, "step": 3097 }, { "epoch": 0.52, "grad_norm": 0.8762998580932617, "learning_rate": 1.8951254114681966e-05, "loss": 0.0941, "step": 3098 }, { "epoch": 0.52, "grad_norm": 0.7621783018112183, "learning_rate": 1.8950448952481392e-05, "loss": 0.0796, "step": 3099 }, { "epoch": 0.52, "grad_norm": 1.068421483039856, "learning_rate": 1.8949643498440085e-05, "loss": 0.0867, "step": 3100 }, { "epoch": 0.52, "grad_norm": 1.0175583362579346, "learning_rate": 1.8948837752584303e-05, "loss": 0.114, "step": 3101 }, { "epoch": 0.52, "grad_norm": 1.0954780578613281, "learning_rate": 1.894803171494032e-05, "loss": 0.1259, "step": 3102 }, { "epoch": 0.52, "grad_norm": 0.6947394609451294, "learning_rate": 1.8947225385534417e-05, "loss": 0.0838, "step": 3103 }, { "epoch": 0.52, "grad_norm": 0.8303308486938477, "learning_rate": 1.894641876439289e-05, "loss": 0.0842, "step": 3104 }, { "epoch": 0.52, "grad_norm": 0.9715065360069275, "learning_rate": 1.8945611851542036e-05, "loss": 0.0881, "step": 3105 }, { "epoch": 0.52, "grad_norm": 1.1118782758712769, "learning_rate": 1.894480464700817e-05, "loss": 0.0992, "step": 3106 }, { "epoch": 0.52, "grad_norm": 0.6026630401611328, "learning_rate": 1.89439971508176e-05, "loss": 0.0993, "step": 3107 }, { "epoch": 0.52, "grad_norm": 0.7917866110801697, "learning_rate": 1.894318936299667e-05, "loss": 0.0937, "step": 3108 }, { "epoch": 0.52, "grad_norm": 0.7302008867263794, "learning_rate": 1.8942381283571714e-05, "loss": 0.0883, "step": 3109 }, { "epoch": 0.52, "grad_norm": 0.5620943307876587, "learning_rate": 1.8941572912569074e-05, "loss": 0.0863, "step": 3110 }, { "epoch": 0.52, "grad_norm": 1.0781564712524414, "learning_rate": 1.8940764250015115e-05, "loss": 0.0969, "step": 3111 }, { "epoch": 0.52, "grad_norm": 4.514431476593018, "learning_rate": 1.8939955295936204e-05, "loss": 0.111, "step": 3112 }, { "epoch": 0.52, "grad_norm": 0.757803738117218, "learning_rate": 1.8939146050358713e-05, "loss": 0.0937, "step": 3113 }, { "epoch": 0.52, "grad_norm": 0.9597312211990356, "learning_rate": 1.8938336513309033e-05, "loss": 0.0781, "step": 3114 }, { "epoch": 0.52, "grad_norm": 1.2388556003570557, "learning_rate": 1.8937526684813562e-05, "loss": 0.0927, "step": 3115 }, { "epoch": 0.52, "grad_norm": 0.7582470178604126, "learning_rate": 1.8936716564898702e-05, "loss": 0.0899, "step": 3116 }, { "epoch": 0.52, "grad_norm": 0.8131532073020935, "learning_rate": 1.8935906153590867e-05, "loss": 0.0805, "step": 3117 }, { "epoch": 0.52, "grad_norm": 1.4469523429870605, "learning_rate": 1.8935095450916482e-05, "loss": 0.1304, "step": 3118 }, { "epoch": 0.52, "grad_norm": 0.9690854549407959, "learning_rate": 1.8934284456901985e-05, "loss": 0.0971, "step": 3119 }, { "epoch": 0.52, "grad_norm": 0.9068394303321838, "learning_rate": 1.8933473171573812e-05, "loss": 0.1087, "step": 3120 }, { "epoch": 0.52, "grad_norm": 0.6986176371574402, "learning_rate": 1.893266159495842e-05, "loss": 0.0784, "step": 3121 }, { "epoch": 0.52, "grad_norm": 0.8177449703216553, "learning_rate": 1.8931849727082275e-05, "loss": 0.1092, "step": 3122 }, { "epoch": 0.52, "grad_norm": 0.783878743648529, "learning_rate": 1.8931037567971848e-05, "loss": 0.0758, "step": 3123 }, { "epoch": 0.52, "grad_norm": 0.962552547454834, "learning_rate": 1.8930225117653617e-05, "loss": 0.1176, "step": 3124 }, { "epoch": 0.52, "grad_norm": 1.0329111814498901, "learning_rate": 1.892941237615407e-05, "loss": 0.0996, "step": 3125 }, { "epoch": 0.52, "grad_norm": 0.8184923529624939, "learning_rate": 1.8928599343499715e-05, "loss": 0.1008, "step": 3126 }, { "epoch": 0.52, "grad_norm": 1.0126497745513916, "learning_rate": 1.8927786019717057e-05, "loss": 0.1117, "step": 3127 }, { "epoch": 0.52, "grad_norm": 0.9907206892967224, "learning_rate": 1.8926972404832618e-05, "loss": 0.0983, "step": 3128 }, { "epoch": 0.52, "grad_norm": 0.8892202973365784, "learning_rate": 1.8926158498872926e-05, "loss": 0.0988, "step": 3129 }, { "epoch": 0.52, "grad_norm": 1.1094825267791748, "learning_rate": 1.8925344301864518e-05, "loss": 0.0921, "step": 3130 }, { "epoch": 0.52, "grad_norm": 2.0094053745269775, "learning_rate": 1.8924529813833946e-05, "loss": 0.1228, "step": 3131 }, { "epoch": 0.52, "grad_norm": 1.1822395324707031, "learning_rate": 1.8923715034807766e-05, "loss": 0.1166, "step": 3132 }, { "epoch": 0.52, "grad_norm": 0.8094543814659119, "learning_rate": 1.8922899964812542e-05, "loss": 0.0842, "step": 3133 }, { "epoch": 0.52, "grad_norm": 0.8720975518226624, "learning_rate": 1.8922084603874853e-05, "loss": 0.0976, "step": 3134 }, { "epoch": 0.52, "grad_norm": 1.1788767576217651, "learning_rate": 1.8921268952021282e-05, "loss": 0.1243, "step": 3135 }, { "epoch": 0.52, "grad_norm": 1.038477897644043, "learning_rate": 1.8920453009278427e-05, "loss": 0.1097, "step": 3136 }, { "epoch": 0.52, "grad_norm": 1.5616157054901123, "learning_rate": 1.891963677567289e-05, "loss": 0.1505, "step": 3137 }, { "epoch": 0.52, "grad_norm": 1.6427091360092163, "learning_rate": 1.8918820251231294e-05, "loss": 0.0965, "step": 3138 }, { "epoch": 0.53, "grad_norm": 5.4560370445251465, "learning_rate": 1.891800343598025e-05, "loss": 0.131, "step": 3139 }, { "epoch": 0.53, "grad_norm": 1.264404535293579, "learning_rate": 1.8917186329946405e-05, "loss": 0.0996, "step": 3140 }, { "epoch": 0.53, "grad_norm": 1.4379521608352661, "learning_rate": 1.8916368933156387e-05, "loss": 0.0977, "step": 3141 }, { "epoch": 0.53, "grad_norm": 0.9226726293563843, "learning_rate": 1.891555124563686e-05, "loss": 0.1139, "step": 3142 }, { "epoch": 0.53, "grad_norm": 0.8663817644119263, "learning_rate": 1.891473326741448e-05, "loss": 0.0984, "step": 3143 }, { "epoch": 0.53, "grad_norm": 0.8503304123878479, "learning_rate": 1.891391499851592e-05, "loss": 0.083, "step": 3144 }, { "epoch": 0.53, "grad_norm": 1.0007539987564087, "learning_rate": 1.8913096438967858e-05, "loss": 0.1273, "step": 3145 }, { "epoch": 0.53, "grad_norm": 1.006790280342102, "learning_rate": 1.8912277588796992e-05, "loss": 0.1092, "step": 3146 }, { "epoch": 0.53, "grad_norm": 1.5847936868667603, "learning_rate": 1.8911458448030012e-05, "loss": 0.0928, "step": 3147 }, { "epoch": 0.53, "grad_norm": 0.8819052577018738, "learning_rate": 1.8910639016693632e-05, "loss": 0.1087, "step": 3148 }, { "epoch": 0.53, "grad_norm": 1.002963662147522, "learning_rate": 1.890981929481457e-05, "loss": 0.1116, "step": 3149 }, { "epoch": 0.53, "grad_norm": 0.9553590416908264, "learning_rate": 1.8908999282419557e-05, "loss": 0.1228, "step": 3150 }, { "epoch": 0.53, "grad_norm": 0.9600933790206909, "learning_rate": 1.8908178979535324e-05, "loss": 0.0801, "step": 3151 }, { "epoch": 0.53, "grad_norm": 1.2738326787948608, "learning_rate": 1.8907358386188622e-05, "loss": 0.1129, "step": 3152 }, { "epoch": 0.53, "grad_norm": 0.9524524211883545, "learning_rate": 1.8906537502406206e-05, "loss": 0.0932, "step": 3153 }, { "epoch": 0.53, "grad_norm": 0.945723295211792, "learning_rate": 1.8905716328214843e-05, "loss": 0.1038, "step": 3154 }, { "epoch": 0.53, "grad_norm": 0.9481092691421509, "learning_rate": 1.890489486364131e-05, "loss": 0.1001, "step": 3155 }, { "epoch": 0.53, "grad_norm": 1.415216326713562, "learning_rate": 1.8904073108712393e-05, "loss": 0.1437, "step": 3156 }, { "epoch": 0.53, "grad_norm": 1.0764232873916626, "learning_rate": 1.890325106345488e-05, "loss": 0.1076, "step": 3157 }, { "epoch": 0.53, "grad_norm": 0.9569740891456604, "learning_rate": 1.8902428727895578e-05, "loss": 0.1042, "step": 3158 }, { "epoch": 0.53, "grad_norm": 1.4213792085647583, "learning_rate": 1.8901606102061302e-05, "loss": 0.1146, "step": 3159 }, { "epoch": 0.53, "grad_norm": 1.2089265584945679, "learning_rate": 1.8900783185978874e-05, "loss": 0.1182, "step": 3160 }, { "epoch": 0.53, "grad_norm": 1.2453056573867798, "learning_rate": 1.8899959979675125e-05, "loss": 0.1197, "step": 3161 }, { "epoch": 0.53, "grad_norm": 1.351380705833435, "learning_rate": 1.88991364831769e-05, "loss": 0.098, "step": 3162 }, { "epoch": 0.53, "grad_norm": 0.9107605814933777, "learning_rate": 1.8898312696511043e-05, "loss": 0.1019, "step": 3163 }, { "epoch": 0.53, "grad_norm": 0.9570043683052063, "learning_rate": 1.8897488619704422e-05, "loss": 0.1153, "step": 3164 }, { "epoch": 0.53, "grad_norm": 0.9690867066383362, "learning_rate": 1.8896664252783905e-05, "loss": 0.0998, "step": 3165 }, { "epoch": 0.53, "grad_norm": 1.0559135675430298, "learning_rate": 1.889583959577637e-05, "loss": 0.1036, "step": 3166 }, { "epoch": 0.53, "grad_norm": 1.178540825843811, "learning_rate": 1.8895014648708703e-05, "loss": 0.0825, "step": 3167 }, { "epoch": 0.53, "grad_norm": 1.2266367673873901, "learning_rate": 1.8894189411607817e-05, "loss": 0.1071, "step": 3168 }, { "epoch": 0.53, "grad_norm": 0.8319258093833923, "learning_rate": 1.88933638845006e-05, "loss": 0.0789, "step": 3169 }, { "epoch": 0.53, "grad_norm": 0.9260448813438416, "learning_rate": 1.889253806741398e-05, "loss": 0.109, "step": 3170 }, { "epoch": 0.53, "grad_norm": 0.84940505027771, "learning_rate": 1.8891711960374884e-05, "loss": 0.1158, "step": 3171 }, { "epoch": 0.53, "grad_norm": 1.071062445640564, "learning_rate": 1.8890885563410244e-05, "loss": 0.1148, "step": 3172 }, { "epoch": 0.53, "grad_norm": 1.2131714820861816, "learning_rate": 1.889005887654701e-05, "loss": 0.0979, "step": 3173 }, { "epoch": 0.53, "grad_norm": 0.769110918045044, "learning_rate": 1.8889231899812136e-05, "loss": 0.107, "step": 3174 }, { "epoch": 0.53, "grad_norm": 0.7491472959518433, "learning_rate": 1.8888404633232583e-05, "loss": 0.0895, "step": 3175 }, { "epoch": 0.53, "grad_norm": 0.8068487644195557, "learning_rate": 1.8887577076835333e-05, "loss": 0.0777, "step": 3176 }, { "epoch": 0.53, "grad_norm": 0.944915235042572, "learning_rate": 1.888674923064736e-05, "loss": 0.1051, "step": 3177 }, { "epoch": 0.53, "grad_norm": 0.9057110548019409, "learning_rate": 1.8885921094695663e-05, "loss": 0.0866, "step": 3178 }, { "epoch": 0.53, "grad_norm": 1.0692086219787598, "learning_rate": 1.8885092669007247e-05, "loss": 0.1063, "step": 3179 }, { "epoch": 0.53, "grad_norm": 1.207872748374939, "learning_rate": 1.8884263953609116e-05, "loss": 0.0682, "step": 3180 }, { "epoch": 0.53, "grad_norm": 1.0771905183792114, "learning_rate": 1.8883434948528297e-05, "loss": 0.1197, "step": 3181 }, { "epoch": 0.53, "grad_norm": 0.9387573599815369, "learning_rate": 1.8882605653791818e-05, "loss": 0.0754, "step": 3182 }, { "epoch": 0.53, "grad_norm": 0.8457120656967163, "learning_rate": 1.888177606942672e-05, "loss": 0.1065, "step": 3183 }, { "epoch": 0.53, "grad_norm": 0.7570690512657166, "learning_rate": 1.8880946195460056e-05, "loss": 0.0863, "step": 3184 }, { "epoch": 0.53, "grad_norm": 0.8756694793701172, "learning_rate": 1.8880116031918878e-05, "loss": 0.0847, "step": 3185 }, { "epoch": 0.53, "grad_norm": 0.6685662269592285, "learning_rate": 1.887928557883026e-05, "loss": 0.0945, "step": 3186 }, { "epoch": 0.53, "grad_norm": 0.9920437932014465, "learning_rate": 1.887845483622128e-05, "loss": 0.1006, "step": 3187 }, { "epoch": 0.53, "grad_norm": 1.2993894815444946, "learning_rate": 1.8877623804119022e-05, "loss": 0.1091, "step": 3188 }, { "epoch": 0.53, "grad_norm": 1.1046059131622314, "learning_rate": 1.8876792482550584e-05, "loss": 0.1178, "step": 3189 }, { "epoch": 0.53, "grad_norm": 0.9850139617919922, "learning_rate": 1.8875960871543073e-05, "loss": 0.131, "step": 3190 }, { "epoch": 0.53, "grad_norm": 0.8701260685920715, "learning_rate": 1.8875128971123608e-05, "loss": 0.0982, "step": 3191 }, { "epoch": 0.53, "grad_norm": 0.9458629488945007, "learning_rate": 1.887429678131931e-05, "loss": 0.1024, "step": 3192 }, { "epoch": 0.53, "grad_norm": 0.7661186456680298, "learning_rate": 1.8873464302157313e-05, "loss": 0.0845, "step": 3193 }, { "epoch": 0.53, "grad_norm": 1.0247896909713745, "learning_rate": 1.8872631533664764e-05, "loss": 0.1256, "step": 3194 }, { "epoch": 0.53, "grad_norm": 0.7451407313346863, "learning_rate": 1.8871798475868814e-05, "loss": 0.0879, "step": 3195 }, { "epoch": 0.53, "grad_norm": 0.9130457639694214, "learning_rate": 1.887096512879663e-05, "loss": 0.0919, "step": 3196 }, { "epoch": 0.53, "grad_norm": 0.7024295330047607, "learning_rate": 1.8870131492475376e-05, "loss": 0.0785, "step": 3197 }, { "epoch": 0.53, "grad_norm": 1.0235915184020996, "learning_rate": 1.8869297566932244e-05, "loss": 0.0751, "step": 3198 }, { "epoch": 0.54, "grad_norm": 1.0101665258407593, "learning_rate": 1.886846335219442e-05, "loss": 0.086, "step": 3199 }, { "epoch": 0.54, "grad_norm": 0.8977973461151123, "learning_rate": 1.88676288482891e-05, "loss": 0.1106, "step": 3200 }, { "epoch": 0.54, "grad_norm": 0.7932583689689636, "learning_rate": 1.8866794055243503e-05, "loss": 0.089, "step": 3201 }, { "epoch": 0.54, "grad_norm": 0.8375905752182007, "learning_rate": 1.8865958973084843e-05, "loss": 0.1057, "step": 3202 }, { "epoch": 0.54, "grad_norm": 0.8691831231117249, "learning_rate": 1.886512360184035e-05, "loss": 0.1002, "step": 3203 }, { "epoch": 0.54, "grad_norm": 0.8178951144218445, "learning_rate": 1.8864287941537266e-05, "loss": 0.0882, "step": 3204 }, { "epoch": 0.54, "grad_norm": 0.9493398666381836, "learning_rate": 1.8863451992202833e-05, "loss": 0.1115, "step": 3205 }, { "epoch": 0.54, "grad_norm": 0.8496473431587219, "learning_rate": 1.886261575386431e-05, "loss": 0.1025, "step": 3206 }, { "epoch": 0.54, "grad_norm": 0.6971516609191895, "learning_rate": 1.8861779226548966e-05, "loss": 0.0933, "step": 3207 }, { "epoch": 0.54, "grad_norm": 0.6298024654388428, "learning_rate": 1.8860942410284077e-05, "loss": 0.0732, "step": 3208 }, { "epoch": 0.54, "grad_norm": 0.8113134503364563, "learning_rate": 1.886010530509692e-05, "loss": 0.103, "step": 3209 }, { "epoch": 0.54, "grad_norm": 1.214059829711914, "learning_rate": 1.8859267911014805e-05, "loss": 0.111, "step": 3210 }, { "epoch": 0.54, "grad_norm": 1.1921372413635254, "learning_rate": 1.885843022806502e-05, "loss": 0.0997, "step": 3211 }, { "epoch": 0.54, "grad_norm": 0.7584640979766846, "learning_rate": 1.8857592256274892e-05, "loss": 0.0942, "step": 3212 }, { "epoch": 0.54, "grad_norm": 0.8653737306594849, "learning_rate": 1.8856753995671736e-05, "loss": 0.0965, "step": 3213 }, { "epoch": 0.54, "grad_norm": 1.3382883071899414, "learning_rate": 1.885591544628289e-05, "loss": 0.0934, "step": 3214 }, { "epoch": 0.54, "grad_norm": 0.8734152317047119, "learning_rate": 1.8855076608135693e-05, "loss": 0.0897, "step": 3215 }, { "epoch": 0.54, "grad_norm": 1.127185344696045, "learning_rate": 1.8854237481257494e-05, "loss": 0.1067, "step": 3216 }, { "epoch": 0.54, "grad_norm": 0.7351331114768982, "learning_rate": 1.885339806567566e-05, "loss": 0.091, "step": 3217 }, { "epoch": 0.54, "grad_norm": 0.8034701943397522, "learning_rate": 1.8852558361417556e-05, "loss": 0.0882, "step": 3218 }, { "epoch": 0.54, "grad_norm": 0.9160712361335754, "learning_rate": 1.8851718368510564e-05, "loss": 0.0946, "step": 3219 }, { "epoch": 0.54, "grad_norm": 0.7924577593803406, "learning_rate": 1.8850878086982073e-05, "loss": 0.0777, "step": 3220 }, { "epoch": 0.54, "grad_norm": 0.9653427600860596, "learning_rate": 1.885003751685948e-05, "loss": 0.0913, "step": 3221 }, { "epoch": 0.54, "grad_norm": 1.2395414113998413, "learning_rate": 1.884919665817019e-05, "loss": 0.0881, "step": 3222 }, { "epoch": 0.54, "grad_norm": 0.851447582244873, "learning_rate": 1.884835551094163e-05, "loss": 0.1023, "step": 3223 }, { "epoch": 0.54, "grad_norm": 1.1622109413146973, "learning_rate": 1.8847514075201215e-05, "loss": 0.0943, "step": 3224 }, { "epoch": 0.54, "grad_norm": 0.9803044199943542, "learning_rate": 1.8846672350976393e-05, "loss": 0.1158, "step": 3225 }, { "epoch": 0.54, "grad_norm": 0.8341485857963562, "learning_rate": 1.88458303382946e-05, "loss": 0.0787, "step": 3226 }, { "epoch": 0.54, "grad_norm": 0.94700026512146, "learning_rate": 1.884498803718329e-05, "loss": 0.0955, "step": 3227 }, { "epoch": 0.54, "grad_norm": 0.669990062713623, "learning_rate": 1.8844145447669934e-05, "loss": 0.0671, "step": 3228 }, { "epoch": 0.54, "grad_norm": 1.1925711631774902, "learning_rate": 1.8843302569782006e-05, "loss": 0.0971, "step": 3229 }, { "epoch": 0.54, "grad_norm": 0.8624903559684753, "learning_rate": 1.8842459403546983e-05, "loss": 0.0809, "step": 3230 }, { "epoch": 0.54, "grad_norm": 0.8655558228492737, "learning_rate": 1.8841615948992363e-05, "loss": 0.1063, "step": 3231 }, { "epoch": 0.54, "grad_norm": 1.148368000984192, "learning_rate": 1.884077220614564e-05, "loss": 0.0887, "step": 3232 }, { "epoch": 0.54, "grad_norm": 0.9620258212089539, "learning_rate": 1.883992817503434e-05, "loss": 0.0857, "step": 3233 }, { "epoch": 0.54, "grad_norm": 1.3156629800796509, "learning_rate": 1.8839083855685966e-05, "loss": 0.1081, "step": 3234 }, { "epoch": 0.54, "grad_norm": 0.7962096333503723, "learning_rate": 1.883823924812806e-05, "loss": 0.0966, "step": 3235 }, { "epoch": 0.54, "grad_norm": 0.8648247122764587, "learning_rate": 1.883739435238816e-05, "loss": 0.0916, "step": 3236 }, { "epoch": 0.54, "grad_norm": 0.758833646774292, "learning_rate": 1.8836549168493816e-05, "loss": 0.0818, "step": 3237 }, { "epoch": 0.54, "grad_norm": 0.820844829082489, "learning_rate": 1.8835703696472575e-05, "loss": 0.0951, "step": 3238 }, { "epoch": 0.54, "grad_norm": 0.9088525772094727, "learning_rate": 1.8834857936352016e-05, "loss": 0.072, "step": 3239 }, { "epoch": 0.54, "grad_norm": 1.172876238822937, "learning_rate": 1.8834011888159717e-05, "loss": 0.1201, "step": 3240 }, { "epoch": 0.54, "grad_norm": 0.9004836678504944, "learning_rate": 1.883316555192326e-05, "loss": 0.1045, "step": 3241 }, { "epoch": 0.54, "grad_norm": 0.9804535508155823, "learning_rate": 1.883231892767024e-05, "loss": 0.1057, "step": 3242 }, { "epoch": 0.54, "grad_norm": 0.8106614351272583, "learning_rate": 1.883147201542826e-05, "loss": 0.1197, "step": 3243 }, { "epoch": 0.54, "grad_norm": 0.8499751687049866, "learning_rate": 1.883062481522494e-05, "loss": 0.0856, "step": 3244 }, { "epoch": 0.54, "grad_norm": 0.9068162441253662, "learning_rate": 1.8829777327087906e-05, "loss": 0.099, "step": 3245 }, { "epoch": 0.54, "grad_norm": 0.9658403396606445, "learning_rate": 1.8828929551044783e-05, "loss": 0.1288, "step": 3246 }, { "epoch": 0.54, "grad_norm": 1.174209713935852, "learning_rate": 1.8828081487123222e-05, "loss": 0.117, "step": 3247 }, { "epoch": 0.54, "grad_norm": 1.2133644819259644, "learning_rate": 1.882723313535087e-05, "loss": 0.112, "step": 3248 }, { "epoch": 0.54, "grad_norm": 0.782599151134491, "learning_rate": 1.8826384495755393e-05, "loss": 0.0915, "step": 3249 }, { "epoch": 0.54, "grad_norm": 1.1192668676376343, "learning_rate": 1.8825535568364458e-05, "loss": 0.0949, "step": 3250 }, { "epoch": 0.54, "grad_norm": 0.7891767024993896, "learning_rate": 1.8824686353205747e-05, "loss": 0.0757, "step": 3251 }, { "epoch": 0.54, "grad_norm": 1.2689988613128662, "learning_rate": 1.882383685030695e-05, "loss": 0.0952, "step": 3252 }, { "epoch": 0.54, "grad_norm": 1.3707879781723022, "learning_rate": 1.8822987059695762e-05, "loss": 0.099, "step": 3253 }, { "epoch": 0.54, "grad_norm": 1.1607552766799927, "learning_rate": 1.88221369813999e-05, "loss": 0.0968, "step": 3254 }, { "epoch": 0.54, "grad_norm": 0.8294552564620972, "learning_rate": 1.8821286615447076e-05, "loss": 0.0774, "step": 3255 }, { "epoch": 0.54, "grad_norm": 0.788109302520752, "learning_rate": 1.8820435961865017e-05, "loss": 0.0719, "step": 3256 }, { "epoch": 0.54, "grad_norm": 0.8235303163528442, "learning_rate": 1.881958502068146e-05, "loss": 0.0835, "step": 3257 }, { "epoch": 0.54, "grad_norm": 0.943698525428772, "learning_rate": 1.881873379192415e-05, "loss": 0.105, "step": 3258 }, { "epoch": 0.55, "grad_norm": 0.8368180394172668, "learning_rate": 1.881788227562085e-05, "loss": 0.0949, "step": 3259 }, { "epoch": 0.55, "grad_norm": 0.9568254947662354, "learning_rate": 1.8817030471799315e-05, "loss": 0.1243, "step": 3260 }, { "epoch": 0.55, "grad_norm": 0.838808536529541, "learning_rate": 1.8816178380487323e-05, "loss": 0.0846, "step": 3261 }, { "epoch": 0.55, "grad_norm": 1.1567186117172241, "learning_rate": 1.881532600171266e-05, "loss": 0.1274, "step": 3262 }, { "epoch": 0.55, "grad_norm": 0.7151160836219788, "learning_rate": 1.8814473335503117e-05, "loss": 0.079, "step": 3263 }, { "epoch": 0.55, "grad_norm": 0.9457135796546936, "learning_rate": 1.8813620381886495e-05, "loss": 0.1076, "step": 3264 }, { "epoch": 0.55, "grad_norm": 0.9562546014785767, "learning_rate": 1.8812767140890606e-05, "loss": 0.0962, "step": 3265 }, { "epoch": 0.55, "grad_norm": 0.730134904384613, "learning_rate": 1.881191361254327e-05, "loss": 0.0857, "step": 3266 }, { "epoch": 0.55, "grad_norm": 0.8450090289115906, "learning_rate": 1.881105979687232e-05, "loss": 0.0627, "step": 3267 }, { "epoch": 0.55, "grad_norm": 0.9725310802459717, "learning_rate": 1.8810205693905598e-05, "loss": 0.0769, "step": 3268 }, { "epoch": 0.55, "grad_norm": 1.1156851053237915, "learning_rate": 1.8809351303670947e-05, "loss": 0.1072, "step": 3269 }, { "epoch": 0.55, "grad_norm": 0.7541478872299194, "learning_rate": 1.8808496626196225e-05, "loss": 0.0794, "step": 3270 }, { "epoch": 0.55, "grad_norm": 0.7987709641456604, "learning_rate": 1.8807641661509308e-05, "loss": 0.0847, "step": 3271 }, { "epoch": 0.55, "grad_norm": 0.624410092830658, "learning_rate": 1.880678640963807e-05, "loss": 0.078, "step": 3272 }, { "epoch": 0.55, "grad_norm": 0.6454174518585205, "learning_rate": 1.8805930870610388e-05, "loss": 0.0978, "step": 3273 }, { "epoch": 0.55, "grad_norm": 0.8367000222206116, "learning_rate": 1.8805075044454172e-05, "loss": 0.1077, "step": 3274 }, { "epoch": 0.55, "grad_norm": 0.9645248055458069, "learning_rate": 1.8804218931197317e-05, "loss": 0.0828, "step": 3275 }, { "epoch": 0.55, "grad_norm": 1.0025252103805542, "learning_rate": 1.8803362530867745e-05, "loss": 0.0929, "step": 3276 }, { "epoch": 0.55, "grad_norm": 1.519675850868225, "learning_rate": 1.8802505843493375e-05, "loss": 0.1073, "step": 3277 }, { "epoch": 0.55, "grad_norm": 0.8375986218452454, "learning_rate": 1.8801648869102144e-05, "loss": 0.0914, "step": 3278 }, { "epoch": 0.55, "grad_norm": 0.9267567992210388, "learning_rate": 1.8800791607721992e-05, "loss": 0.075, "step": 3279 }, { "epoch": 0.55, "grad_norm": 0.9564904570579529, "learning_rate": 1.879993405938087e-05, "loss": 0.1025, "step": 3280 }, { "epoch": 0.55, "grad_norm": 0.6721853613853455, "learning_rate": 1.8799076224106746e-05, "loss": 0.0789, "step": 3281 }, { "epoch": 0.55, "grad_norm": 0.7497117519378662, "learning_rate": 1.8798218101927583e-05, "loss": 0.0917, "step": 3282 }, { "epoch": 0.55, "grad_norm": 0.61465984582901, "learning_rate": 1.8797359692871364e-05, "loss": 0.0611, "step": 3283 }, { "epoch": 0.55, "grad_norm": 0.7113951444625854, "learning_rate": 1.879650099696608e-05, "loss": 0.0938, "step": 3284 }, { "epoch": 0.55, "grad_norm": 1.6855084896087646, "learning_rate": 1.8795642014239727e-05, "loss": 0.1134, "step": 3285 }, { "epoch": 0.55, "grad_norm": 0.8112597465515137, "learning_rate": 1.8794782744720318e-05, "loss": 0.0864, "step": 3286 }, { "epoch": 0.55, "grad_norm": 1.2114869356155396, "learning_rate": 1.879392318843586e-05, "loss": 0.0779, "step": 3287 }, { "epoch": 0.55, "grad_norm": 0.9380178451538086, "learning_rate": 1.8793063345414398e-05, "loss": 0.0947, "step": 3288 }, { "epoch": 0.55, "grad_norm": 0.7157978415489197, "learning_rate": 1.8792203215683954e-05, "loss": 0.0856, "step": 3289 }, { "epoch": 0.55, "grad_norm": 0.8188259601593018, "learning_rate": 1.8791342799272573e-05, "loss": 0.0935, "step": 3290 }, { "epoch": 0.55, "grad_norm": 0.8394256830215454, "learning_rate": 1.879048209620832e-05, "loss": 0.0876, "step": 3291 }, { "epoch": 0.55, "grad_norm": 0.9890744090080261, "learning_rate": 1.8789621106519253e-05, "loss": 0.1118, "step": 3292 }, { "epoch": 0.55, "grad_norm": 1.2264548540115356, "learning_rate": 1.878875983023344e-05, "loss": 0.1164, "step": 3293 }, { "epoch": 0.55, "grad_norm": 0.891700267791748, "learning_rate": 1.8787898267378975e-05, "loss": 0.0759, "step": 3294 }, { "epoch": 0.55, "grad_norm": 0.8687661290168762, "learning_rate": 1.878703641798395e-05, "loss": 0.0784, "step": 3295 }, { "epoch": 0.55, "grad_norm": 1.0409613847732544, "learning_rate": 1.8786174282076458e-05, "loss": 0.1056, "step": 3296 }, { "epoch": 0.55, "grad_norm": 0.7214404940605164, "learning_rate": 1.8785311859684612e-05, "loss": 0.0809, "step": 3297 }, { "epoch": 0.55, "grad_norm": 0.9216775894165039, "learning_rate": 1.8784449150836537e-05, "loss": 0.1035, "step": 3298 }, { "epoch": 0.55, "grad_norm": 0.7151994705200195, "learning_rate": 1.878358615556036e-05, "loss": 0.0858, "step": 3299 }, { "epoch": 0.55, "grad_norm": 1.0208134651184082, "learning_rate": 1.878272287388422e-05, "loss": 0.0688, "step": 3300 }, { "epoch": 0.55, "grad_norm": 0.6665542125701904, "learning_rate": 1.878185930583627e-05, "loss": 0.0936, "step": 3301 }, { "epoch": 0.55, "grad_norm": 1.2296918630599976, "learning_rate": 1.878099545144466e-05, "loss": 0.0964, "step": 3302 }, { "epoch": 0.55, "grad_norm": 0.6891323924064636, "learning_rate": 1.8780131310737562e-05, "loss": 0.0577, "step": 3303 }, { "epoch": 0.55, "grad_norm": 1.2376471757888794, "learning_rate": 1.877926688374315e-05, "loss": 0.1155, "step": 3304 }, { "epoch": 0.55, "grad_norm": 1.020372748374939, "learning_rate": 1.877840217048961e-05, "loss": 0.1063, "step": 3305 }, { "epoch": 0.55, "grad_norm": 0.7780992388725281, "learning_rate": 1.877753717100514e-05, "loss": 0.0901, "step": 3306 }, { "epoch": 0.55, "grad_norm": 0.8379420042037964, "learning_rate": 1.8776671885317942e-05, "loss": 0.0791, "step": 3307 }, { "epoch": 0.55, "grad_norm": 0.8710950016975403, "learning_rate": 1.877580631345623e-05, "loss": 0.1085, "step": 3308 }, { "epoch": 0.55, "grad_norm": 0.8511636257171631, "learning_rate": 1.8774940455448228e-05, "loss": 0.0939, "step": 3309 }, { "epoch": 0.55, "grad_norm": 1.3045694828033447, "learning_rate": 1.8774074311322168e-05, "loss": 0.1212, "step": 3310 }, { "epoch": 0.55, "grad_norm": 0.9372842311859131, "learning_rate": 1.877320788110629e-05, "loss": 0.0746, "step": 3311 }, { "epoch": 0.55, "grad_norm": 0.8953707218170166, "learning_rate": 1.8772341164828846e-05, "loss": 0.107, "step": 3312 }, { "epoch": 0.55, "grad_norm": 0.9438858032226562, "learning_rate": 1.8771474162518094e-05, "loss": 0.0924, "step": 3313 }, { "epoch": 0.55, "grad_norm": 0.7929443120956421, "learning_rate": 1.8770606874202314e-05, "loss": 0.0963, "step": 3314 }, { "epoch": 0.55, "grad_norm": 0.8452234268188477, "learning_rate": 1.876973929990977e-05, "loss": 0.0712, "step": 3315 }, { "epoch": 0.55, "grad_norm": 0.7931906580924988, "learning_rate": 1.876887143966876e-05, "loss": 0.0903, "step": 3316 }, { "epoch": 0.55, "grad_norm": 1.0667616128921509, "learning_rate": 1.876800329350758e-05, "loss": 0.0779, "step": 3317 }, { "epoch": 0.55, "grad_norm": 1.1906567811965942, "learning_rate": 1.876713486145454e-05, "loss": 0.13, "step": 3318 }, { "epoch": 0.56, "grad_norm": 0.70582115650177, "learning_rate": 1.8766266143537946e-05, "loss": 0.0862, "step": 3319 }, { "epoch": 0.56, "grad_norm": 0.816718578338623, "learning_rate": 1.8765397139786135e-05, "loss": 0.0942, "step": 3320 }, { "epoch": 0.56, "grad_norm": 0.7870321273803711, "learning_rate": 1.8764527850227437e-05, "loss": 0.0871, "step": 3321 }, { "epoch": 0.56, "grad_norm": 0.7585605978965759, "learning_rate": 1.8763658274890194e-05, "loss": 0.1037, "step": 3322 }, { "epoch": 0.56, "grad_norm": 0.7653074860572815, "learning_rate": 1.8762788413802767e-05, "loss": 0.0835, "step": 3323 }, { "epoch": 0.56, "grad_norm": 0.8952004313468933, "learning_rate": 1.8761918266993513e-05, "loss": 0.1223, "step": 3324 }, { "epoch": 0.56, "grad_norm": 0.8915942311286926, "learning_rate": 1.8761047834490802e-05, "loss": 0.1002, "step": 3325 }, { "epoch": 0.56, "grad_norm": 0.8564304709434509, "learning_rate": 1.8760177116323025e-05, "loss": 0.0996, "step": 3326 }, { "epoch": 0.56, "grad_norm": 0.9028379917144775, "learning_rate": 1.8759306112518564e-05, "loss": 0.1004, "step": 3327 }, { "epoch": 0.56, "grad_norm": 1.0065523386001587, "learning_rate": 1.875843482310582e-05, "loss": 0.089, "step": 3328 }, { "epoch": 0.56, "grad_norm": 0.8679072260856628, "learning_rate": 1.8757563248113206e-05, "loss": 0.0936, "step": 3329 }, { "epoch": 0.56, "grad_norm": 1.1784367561340332, "learning_rate": 1.875669138756914e-05, "loss": 0.1078, "step": 3330 }, { "epoch": 0.56, "grad_norm": 0.9858102798461914, "learning_rate": 1.875581924150205e-05, "loss": 0.1009, "step": 3331 }, { "epoch": 0.56, "grad_norm": 1.107649326324463, "learning_rate": 1.875494680994037e-05, "loss": 0.0953, "step": 3332 }, { "epoch": 0.56, "grad_norm": 0.7288194894790649, "learning_rate": 1.8754074092912555e-05, "loss": 0.0824, "step": 3333 }, { "epoch": 0.56, "grad_norm": 0.8104130625724792, "learning_rate": 1.8753201090447047e-05, "loss": 0.111, "step": 3334 }, { "epoch": 0.56, "grad_norm": 0.9778519868850708, "learning_rate": 1.8752327802572327e-05, "loss": 0.1206, "step": 3335 }, { "epoch": 0.56, "grad_norm": 0.8313745856285095, "learning_rate": 1.8751454229316863e-05, "loss": 0.1029, "step": 3336 }, { "epoch": 0.56, "grad_norm": 0.8187837600708008, "learning_rate": 1.8750580370709138e-05, "loss": 0.1069, "step": 3337 }, { "epoch": 0.56, "grad_norm": 0.9885507822036743, "learning_rate": 1.8749706226777643e-05, "loss": 0.0868, "step": 3338 }, { "epoch": 0.56, "grad_norm": 0.8089391589164734, "learning_rate": 1.874883179755089e-05, "loss": 0.0882, "step": 3339 }, { "epoch": 0.56, "grad_norm": 0.8024974465370178, "learning_rate": 1.8747957083057377e-05, "loss": 0.0894, "step": 3340 }, { "epoch": 0.56, "grad_norm": 1.0052425861358643, "learning_rate": 1.8747082083325636e-05, "loss": 0.0993, "step": 3341 }, { "epoch": 0.56, "grad_norm": 0.8667216300964355, "learning_rate": 1.8746206798384196e-05, "loss": 0.069, "step": 3342 }, { "epoch": 0.56, "grad_norm": 0.668834924697876, "learning_rate": 1.8745331228261593e-05, "loss": 0.0894, "step": 3343 }, { "epoch": 0.56, "grad_norm": 0.9979814291000366, "learning_rate": 1.8744455372986374e-05, "loss": 0.0938, "step": 3344 }, { "epoch": 0.56, "grad_norm": 0.6998886466026306, "learning_rate": 1.874357923258711e-05, "loss": 0.0936, "step": 3345 }, { "epoch": 0.56, "grad_norm": 0.7894003391265869, "learning_rate": 1.8742702807092354e-05, "loss": 0.0943, "step": 3346 }, { "epoch": 0.56, "grad_norm": 0.977985680103302, "learning_rate": 1.874182609653069e-05, "loss": 0.0703, "step": 3347 }, { "epoch": 0.56, "grad_norm": 0.9190047979354858, "learning_rate": 1.87409491009307e-05, "loss": 0.1064, "step": 3348 }, { "epoch": 0.56, "grad_norm": 0.745234489440918, "learning_rate": 1.8740071820320985e-05, "loss": 0.0957, "step": 3349 }, { "epoch": 0.56, "grad_norm": 0.7634402513504028, "learning_rate": 1.873919425473015e-05, "loss": 0.103, "step": 3350 }, { "epoch": 0.56, "grad_norm": 0.8368374109268188, "learning_rate": 1.8738316404186806e-05, "loss": 0.0877, "step": 3351 }, { "epoch": 0.56, "grad_norm": 0.8574857711791992, "learning_rate": 1.8737438268719578e-05, "loss": 0.0987, "step": 3352 }, { "epoch": 0.56, "grad_norm": 0.6428007483482361, "learning_rate": 1.8736559848357094e-05, "loss": 0.0883, "step": 3353 }, { "epoch": 0.56, "grad_norm": 0.7864424586296082, "learning_rate": 1.8735681143128004e-05, "loss": 0.0919, "step": 3354 }, { "epoch": 0.56, "grad_norm": 0.9073980450630188, "learning_rate": 1.8734802153060952e-05, "loss": 0.0974, "step": 3355 }, { "epoch": 0.56, "grad_norm": 0.9945582747459412, "learning_rate": 1.8733922878184604e-05, "loss": 0.1235, "step": 3356 }, { "epoch": 0.56, "grad_norm": 0.7826347351074219, "learning_rate": 1.8733043318527627e-05, "loss": 0.0919, "step": 3357 }, { "epoch": 0.56, "grad_norm": 0.7930367588996887, "learning_rate": 1.8732163474118702e-05, "loss": 0.0994, "step": 3358 }, { "epoch": 0.56, "grad_norm": 0.7765018343925476, "learning_rate": 1.8731283344986516e-05, "loss": 0.0802, "step": 3359 }, { "epoch": 0.56, "grad_norm": 0.8866979479789734, "learning_rate": 1.8730402931159763e-05, "loss": 0.107, "step": 3360 }, { "epoch": 0.56, "grad_norm": 0.6133362650871277, "learning_rate": 1.8729522232667158e-05, "loss": 0.0744, "step": 3361 }, { "epoch": 0.56, "grad_norm": 0.6968606114387512, "learning_rate": 1.872864124953741e-05, "loss": 0.0878, "step": 3362 }, { "epoch": 0.56, "grad_norm": 0.7883791923522949, "learning_rate": 1.8727759981799255e-05, "loss": 0.073, "step": 3363 }, { "epoch": 0.56, "grad_norm": 0.883431613445282, "learning_rate": 1.8726878429481414e-05, "loss": 0.0943, "step": 3364 }, { "epoch": 0.56, "grad_norm": 0.8059903383255005, "learning_rate": 1.8725996592612638e-05, "loss": 0.1033, "step": 3365 }, { "epoch": 0.56, "grad_norm": 0.6424482464790344, "learning_rate": 1.8725114471221684e-05, "loss": 0.0797, "step": 3366 }, { "epoch": 0.56, "grad_norm": 0.760263979434967, "learning_rate": 1.8724232065337306e-05, "loss": 0.0948, "step": 3367 }, { "epoch": 0.56, "grad_norm": 1.6340962648391724, "learning_rate": 1.8723349374988284e-05, "loss": 0.0871, "step": 3368 }, { "epoch": 0.56, "grad_norm": 0.9221137166023254, "learning_rate": 1.87224664002034e-05, "loss": 0.1015, "step": 3369 }, { "epoch": 0.56, "grad_norm": 0.9002836346626282, "learning_rate": 1.8721583141011433e-05, "loss": 0.095, "step": 3370 }, { "epoch": 0.56, "grad_norm": 0.8897041082382202, "learning_rate": 1.8720699597441197e-05, "loss": 0.0851, "step": 3371 }, { "epoch": 0.56, "grad_norm": 1.0854768753051758, "learning_rate": 1.871981576952149e-05, "loss": 0.0983, "step": 3372 }, { "epoch": 0.56, "grad_norm": 1.2181411981582642, "learning_rate": 1.8718931657281137e-05, "loss": 0.0716, "step": 3373 }, { "epoch": 0.56, "grad_norm": 0.7974948883056641, "learning_rate": 1.871804726074896e-05, "loss": 0.0811, "step": 3374 }, { "epoch": 0.56, "grad_norm": 0.8803855180740356, "learning_rate": 1.8717162579953803e-05, "loss": 0.084, "step": 3375 }, { "epoch": 0.56, "grad_norm": 1.172805666923523, "learning_rate": 1.871627761492451e-05, "loss": 0.1091, "step": 3376 }, { "epoch": 0.56, "grad_norm": 0.9544684886932373, "learning_rate": 1.871539236568993e-05, "loss": 0.0949, "step": 3377 }, { "epoch": 0.56, "grad_norm": 1.0293591022491455, "learning_rate": 1.871450683227894e-05, "loss": 0.0816, "step": 3378 }, { "epoch": 0.57, "grad_norm": 1.4165512323379517, "learning_rate": 1.8713621014720397e-05, "loss": 0.1237, "step": 3379 }, { "epoch": 0.57, "grad_norm": 0.8515938520431519, "learning_rate": 1.87127349130432e-05, "loss": 0.0709, "step": 3380 }, { "epoch": 0.57, "grad_norm": 0.9870484471321106, "learning_rate": 1.871184852727623e-05, "loss": 0.1293, "step": 3381 }, { "epoch": 0.57, "grad_norm": 1.1118333339691162, "learning_rate": 1.8710961857448397e-05, "loss": 0.1195, "step": 3382 }, { "epoch": 0.57, "grad_norm": 0.7470322251319885, "learning_rate": 1.871007490358861e-05, "loss": 0.0825, "step": 3383 }, { "epoch": 0.57, "grad_norm": 0.7581586241722107, "learning_rate": 1.8709187665725787e-05, "loss": 0.0981, "step": 3384 }, { "epoch": 0.57, "grad_norm": 0.8887770175933838, "learning_rate": 1.870830014388886e-05, "loss": 0.0912, "step": 3385 }, { "epoch": 0.57, "grad_norm": 0.8818956613540649, "learning_rate": 1.8707412338106766e-05, "loss": 0.0913, "step": 3386 }, { "epoch": 0.57, "grad_norm": 0.8395074009895325, "learning_rate": 1.870652424840845e-05, "loss": 0.0854, "step": 3387 }, { "epoch": 0.57, "grad_norm": 0.9337259531021118, "learning_rate": 1.8705635874822877e-05, "loss": 0.0895, "step": 3388 }, { "epoch": 0.57, "grad_norm": 0.994430422782898, "learning_rate": 1.8704747217379004e-05, "loss": 0.0729, "step": 3389 }, { "epoch": 0.57, "grad_norm": 1.1536407470703125, "learning_rate": 1.8703858276105816e-05, "loss": 0.1036, "step": 3390 }, { "epoch": 0.57, "grad_norm": 0.9077914357185364, "learning_rate": 1.8702969051032297e-05, "loss": 0.0898, "step": 3391 }, { "epoch": 0.57, "grad_norm": 1.016816258430481, "learning_rate": 1.8702079542187434e-05, "loss": 0.0809, "step": 3392 }, { "epoch": 0.57, "grad_norm": 0.8706825375556946, "learning_rate": 1.8701189749600234e-05, "loss": 0.1163, "step": 3393 }, { "epoch": 0.57, "grad_norm": 0.7654616236686707, "learning_rate": 1.8700299673299712e-05, "loss": 0.0889, "step": 3394 }, { "epoch": 0.57, "grad_norm": 1.0631229877471924, "learning_rate": 1.8699409313314892e-05, "loss": 0.097, "step": 3395 }, { "epoch": 0.57, "grad_norm": 0.7399210929870605, "learning_rate": 1.86985186696748e-05, "loss": 0.0916, "step": 3396 }, { "epoch": 0.57, "grad_norm": 0.981745183467865, "learning_rate": 1.8697627742408477e-05, "loss": 0.1213, "step": 3397 }, { "epoch": 0.57, "grad_norm": 0.8557744026184082, "learning_rate": 1.8696736531544977e-05, "loss": 0.0952, "step": 3398 }, { "epoch": 0.57, "grad_norm": 1.1556475162506104, "learning_rate": 1.8695845037113356e-05, "loss": 0.1176, "step": 3399 }, { "epoch": 0.57, "grad_norm": 0.9424968957901001, "learning_rate": 1.8694953259142684e-05, "loss": 0.0822, "step": 3400 }, { "epoch": 0.57, "grad_norm": 0.9187909960746765, "learning_rate": 1.8694061197662034e-05, "loss": 0.0976, "step": 3401 }, { "epoch": 0.57, "grad_norm": 0.9534798860549927, "learning_rate": 1.8693168852700497e-05, "loss": 0.0956, "step": 3402 }, { "epoch": 0.57, "grad_norm": 0.7689226865768433, "learning_rate": 1.869227622428717e-05, "loss": 0.095, "step": 3403 }, { "epoch": 0.57, "grad_norm": 0.6834802031517029, "learning_rate": 1.8691383312451158e-05, "loss": 0.0761, "step": 3404 }, { "epoch": 0.57, "grad_norm": 0.6846498250961304, "learning_rate": 1.8690490117221573e-05, "loss": 0.0839, "step": 3405 }, { "epoch": 0.57, "grad_norm": 0.7854721546173096, "learning_rate": 1.8689596638627542e-05, "loss": 0.068, "step": 3406 }, { "epoch": 0.57, "grad_norm": 0.7312083840370178, "learning_rate": 1.8688702876698194e-05, "loss": 0.0774, "step": 3407 }, { "epoch": 0.57, "grad_norm": 0.8510854840278625, "learning_rate": 1.8687808831462672e-05, "loss": 0.0935, "step": 3408 }, { "epoch": 0.57, "grad_norm": 0.9815986752510071, "learning_rate": 1.8686914502950126e-05, "loss": 0.0849, "step": 3409 }, { "epoch": 0.57, "grad_norm": 0.8401256203651428, "learning_rate": 1.8686019891189726e-05, "loss": 0.1021, "step": 3410 }, { "epoch": 0.57, "grad_norm": 0.7275055050849915, "learning_rate": 1.8685124996210632e-05, "loss": 0.1013, "step": 3411 }, { "epoch": 0.57, "grad_norm": 0.8105292916297913, "learning_rate": 1.8684229818042024e-05, "loss": 0.0866, "step": 3412 }, { "epoch": 0.57, "grad_norm": 0.83952397108078, "learning_rate": 1.8683334356713097e-05, "loss": 0.0927, "step": 3413 }, { "epoch": 0.57, "grad_norm": 0.8448535799980164, "learning_rate": 1.8682438612253043e-05, "loss": 0.0733, "step": 3414 }, { "epoch": 0.57, "grad_norm": 0.9573808312416077, "learning_rate": 1.8681542584691067e-05, "loss": 0.0703, "step": 3415 }, { "epoch": 0.57, "grad_norm": 0.8479419350624084, "learning_rate": 1.8680646274056397e-05, "loss": 0.0884, "step": 3416 }, { "epoch": 0.57, "grad_norm": 0.6410831212997437, "learning_rate": 1.867974968037824e-05, "loss": 0.0921, "step": 3417 }, { "epoch": 0.57, "grad_norm": 1.023094892501831, "learning_rate": 1.8678852803685847e-05, "loss": 0.0978, "step": 3418 }, { "epoch": 0.57, "grad_norm": 0.851389467716217, "learning_rate": 1.8677955644008454e-05, "loss": 0.0822, "step": 3419 }, { "epoch": 0.57, "grad_norm": 0.9651231169700623, "learning_rate": 1.8677058201375313e-05, "loss": 0.1237, "step": 3420 }, { "epoch": 0.57, "grad_norm": 0.8666680455207825, "learning_rate": 1.867616047581569e-05, "loss": 0.0864, "step": 3421 }, { "epoch": 0.57, "grad_norm": 0.7107951045036316, "learning_rate": 1.8675262467358858e-05, "loss": 0.0852, "step": 3422 }, { "epoch": 0.57, "grad_norm": 0.9312927722930908, "learning_rate": 1.867436417603409e-05, "loss": 0.0789, "step": 3423 }, { "epoch": 0.57, "grad_norm": 0.8868876099586487, "learning_rate": 1.8673465601870683e-05, "loss": 0.0905, "step": 3424 }, { "epoch": 0.57, "grad_norm": 0.6733935475349426, "learning_rate": 1.8672566744897933e-05, "loss": 0.0701, "step": 3425 }, { "epoch": 0.57, "grad_norm": 1.1364469528198242, "learning_rate": 1.8671667605145148e-05, "loss": 0.0804, "step": 3426 }, { "epoch": 0.57, "grad_norm": 1.3824611902236938, "learning_rate": 1.867076818264165e-05, "loss": 0.0911, "step": 3427 }, { "epoch": 0.57, "grad_norm": 0.9884241819381714, "learning_rate": 1.8669868477416763e-05, "loss": 0.0895, "step": 3428 }, { "epoch": 0.57, "grad_norm": 1.2573093175888062, "learning_rate": 1.8668968489499818e-05, "loss": 0.104, "step": 3429 }, { "epoch": 0.57, "grad_norm": 1.0698856115341187, "learning_rate": 1.866806821892017e-05, "loss": 0.0798, "step": 3430 }, { "epoch": 0.57, "grad_norm": 0.7968253493309021, "learning_rate": 1.8667167665707164e-05, "loss": 0.0874, "step": 3431 }, { "epoch": 0.57, "grad_norm": 0.6973167061805725, "learning_rate": 1.866626682989017e-05, "loss": 0.0836, "step": 3432 }, { "epoch": 0.57, "grad_norm": 0.8749793171882629, "learning_rate": 1.866536571149856e-05, "loss": 0.1101, "step": 3433 }, { "epoch": 0.57, "grad_norm": 0.8135474920272827, "learning_rate": 1.8664464310561712e-05, "loss": 0.0699, "step": 3434 }, { "epoch": 0.57, "grad_norm": 0.9927347302436829, "learning_rate": 1.8663562627109023e-05, "loss": 0.0972, "step": 3435 }, { "epoch": 0.57, "grad_norm": 0.665706217288971, "learning_rate": 1.866266066116989e-05, "loss": 0.0885, "step": 3436 }, { "epoch": 0.57, "grad_norm": 0.8750110864639282, "learning_rate": 1.866175841277372e-05, "loss": 0.0893, "step": 3437 }, { "epoch": 0.58, "grad_norm": 0.7273557186126709, "learning_rate": 1.8660855881949942e-05, "loss": 0.076, "step": 3438 }, { "epoch": 0.58, "grad_norm": 0.8608626127243042, "learning_rate": 1.8659953068727974e-05, "loss": 0.0908, "step": 3439 }, { "epoch": 0.58, "grad_norm": 0.8460777997970581, "learning_rate": 1.8659049973137257e-05, "loss": 0.1128, "step": 3440 }, { "epoch": 0.58, "grad_norm": 0.6493711471557617, "learning_rate": 1.865814659520724e-05, "loss": 0.086, "step": 3441 }, { "epoch": 0.58, "grad_norm": 0.6913089752197266, "learning_rate": 1.865724293496737e-05, "loss": 0.0796, "step": 3442 }, { "epoch": 0.58, "grad_norm": 0.8117510080337524, "learning_rate": 1.8656338992447125e-05, "loss": 0.0852, "step": 3443 }, { "epoch": 0.58, "grad_norm": 0.7792556285858154, "learning_rate": 1.8655434767675973e-05, "loss": 0.0816, "step": 3444 }, { "epoch": 0.58, "grad_norm": 0.7351274490356445, "learning_rate": 1.8654530260683393e-05, "loss": 0.072, "step": 3445 }, { "epoch": 0.58, "grad_norm": 1.0568338632583618, "learning_rate": 1.8653625471498883e-05, "loss": 0.1296, "step": 3446 }, { "epoch": 0.58, "grad_norm": 1.1879901885986328, "learning_rate": 1.8652720400151944e-05, "loss": 0.0857, "step": 3447 }, { "epoch": 0.58, "grad_norm": 0.6847042441368103, "learning_rate": 1.865181504667209e-05, "loss": 0.0788, "step": 3448 }, { "epoch": 0.58, "grad_norm": 0.8578944206237793, "learning_rate": 1.8650909411088833e-05, "loss": 0.0922, "step": 3449 }, { "epoch": 0.58, "grad_norm": 0.7824242115020752, "learning_rate": 1.865000349343171e-05, "loss": 0.0978, "step": 3450 }, { "epoch": 0.58, "grad_norm": 0.971765398979187, "learning_rate": 1.864909729373025e-05, "loss": 0.0726, "step": 3451 }, { "epoch": 0.58, "grad_norm": 0.7692728638648987, "learning_rate": 1.864819081201401e-05, "loss": 0.0826, "step": 3452 }, { "epoch": 0.58, "grad_norm": 0.9945104718208313, "learning_rate": 1.864728404831255e-05, "loss": 0.1194, "step": 3453 }, { "epoch": 0.58, "grad_norm": 0.7897043824195862, "learning_rate": 1.8646377002655423e-05, "loss": 0.0982, "step": 3454 }, { "epoch": 0.58, "grad_norm": 0.8268508315086365, "learning_rate": 1.864546967507222e-05, "loss": 0.0882, "step": 3455 }, { "epoch": 0.58, "grad_norm": 0.7566133737564087, "learning_rate": 1.864456206559251e-05, "loss": 0.0919, "step": 3456 }, { "epoch": 0.58, "grad_norm": 0.7300699949264526, "learning_rate": 1.86436541742459e-05, "loss": 0.0827, "step": 3457 }, { "epoch": 0.58, "grad_norm": 1.0571948289871216, "learning_rate": 1.8642746001061982e-05, "loss": 0.0982, "step": 3458 }, { "epoch": 0.58, "grad_norm": 0.8153133392333984, "learning_rate": 1.8641837546070377e-05, "loss": 0.0887, "step": 3459 }, { "epoch": 0.58, "grad_norm": 0.7444422245025635, "learning_rate": 1.8640928809300702e-05, "loss": 0.0808, "step": 3460 }, { "epoch": 0.58, "grad_norm": 0.8601295351982117, "learning_rate": 1.8640019790782587e-05, "loss": 0.0931, "step": 3461 }, { "epoch": 0.58, "grad_norm": 0.8445085287094116, "learning_rate": 1.8639110490545677e-05, "loss": 0.0896, "step": 3462 }, { "epoch": 0.58, "grad_norm": 0.9021602869033813, "learning_rate": 1.863820090861961e-05, "loss": 0.1253, "step": 3463 }, { "epoch": 0.58, "grad_norm": 1.2118022441864014, "learning_rate": 1.8637291045034052e-05, "loss": 0.0985, "step": 3464 }, { "epoch": 0.58, "grad_norm": 0.8308842182159424, "learning_rate": 1.8636380899818674e-05, "loss": 0.1012, "step": 3465 }, { "epoch": 0.58, "grad_norm": 0.8661943674087524, "learning_rate": 1.8635470473003146e-05, "loss": 0.0884, "step": 3466 }, { "epoch": 0.58, "grad_norm": 0.7872759103775024, "learning_rate": 1.8634559764617154e-05, "loss": 0.1075, "step": 3467 }, { "epoch": 0.58, "grad_norm": 0.8914802074432373, "learning_rate": 1.8633648774690393e-05, "loss": 0.0983, "step": 3468 }, { "epoch": 0.58, "grad_norm": 0.8235772848129272, "learning_rate": 1.863273750325257e-05, "loss": 0.1153, "step": 3469 }, { "epoch": 0.58, "grad_norm": 0.7903813719749451, "learning_rate": 1.8631825950333394e-05, "loss": 0.1027, "step": 3470 }, { "epoch": 0.58, "grad_norm": 0.6878637671470642, "learning_rate": 1.863091411596259e-05, "loss": 0.0785, "step": 3471 }, { "epoch": 0.58, "grad_norm": 0.7393410801887512, "learning_rate": 1.863000200016989e-05, "loss": 0.0714, "step": 3472 }, { "epoch": 0.58, "grad_norm": 0.6270647048950195, "learning_rate": 1.8629089602985033e-05, "loss": 0.1016, "step": 3473 }, { "epoch": 0.58, "grad_norm": 0.8210551738739014, "learning_rate": 1.862817692443777e-05, "loss": 0.0849, "step": 3474 }, { "epoch": 0.58, "grad_norm": 0.8919277787208557, "learning_rate": 1.8627263964557856e-05, "loss": 0.0904, "step": 3475 }, { "epoch": 0.58, "grad_norm": 0.7900357246398926, "learning_rate": 1.862635072337507e-05, "loss": 0.07, "step": 3476 }, { "epoch": 0.58, "grad_norm": 0.8117693662643433, "learning_rate": 1.8625437200919177e-05, "loss": 0.0797, "step": 3477 }, { "epoch": 0.58, "grad_norm": 0.8978240489959717, "learning_rate": 1.862452339721997e-05, "loss": 0.109, "step": 3478 }, { "epoch": 0.58, "grad_norm": 1.0219117403030396, "learning_rate": 1.862360931230724e-05, "loss": 0.0768, "step": 3479 }, { "epoch": 0.58, "grad_norm": 0.6951485872268677, "learning_rate": 1.8622694946210802e-05, "loss": 0.0896, "step": 3480 }, { "epoch": 0.58, "grad_norm": 0.7024786472320557, "learning_rate": 1.862178029896046e-05, "loss": 0.1015, "step": 3481 }, { "epoch": 0.58, "grad_norm": 0.7280920743942261, "learning_rate": 1.862086537058604e-05, "loss": 0.0653, "step": 3482 }, { "epoch": 0.58, "grad_norm": 1.0666344165802002, "learning_rate": 1.8619950161117374e-05, "loss": 0.0788, "step": 3483 }, { "epoch": 0.58, "grad_norm": 1.061097264289856, "learning_rate": 1.8619034670584308e-05, "loss": 0.114, "step": 3484 }, { "epoch": 0.58, "grad_norm": 0.7636284232139587, "learning_rate": 1.8618118899016687e-05, "loss": 0.0894, "step": 3485 }, { "epoch": 0.58, "grad_norm": 0.8434049487113953, "learning_rate": 1.8617202846444372e-05, "loss": 0.0985, "step": 3486 }, { "epoch": 0.58, "grad_norm": 0.8927242159843445, "learning_rate": 1.861628651289724e-05, "loss": 0.0785, "step": 3487 }, { "epoch": 0.58, "grad_norm": 0.8282383680343628, "learning_rate": 1.8615369898405153e-05, "loss": 0.1033, "step": 3488 }, { "epoch": 0.58, "grad_norm": 0.9637783765792847, "learning_rate": 1.8614453002998013e-05, "loss": 0.1147, "step": 3489 }, { "epoch": 0.58, "grad_norm": 0.9489292502403259, "learning_rate": 1.861353582670571e-05, "loss": 0.1232, "step": 3490 }, { "epoch": 0.58, "grad_norm": 0.745072066783905, "learning_rate": 1.8612618369558152e-05, "loss": 0.0952, "step": 3491 }, { "epoch": 0.58, "grad_norm": 0.9995390772819519, "learning_rate": 1.8611700631585252e-05, "loss": 0.1034, "step": 3492 }, { "epoch": 0.58, "grad_norm": 0.8504295945167542, "learning_rate": 1.861078261281694e-05, "loss": 0.0853, "step": 3493 }, { "epoch": 0.58, "grad_norm": 0.867388904094696, "learning_rate": 1.8609864313283136e-05, "loss": 0.0953, "step": 3494 }, { "epoch": 0.58, "grad_norm": 0.5692546367645264, "learning_rate": 1.8608945733013795e-05, "loss": 0.0663, "step": 3495 }, { "epoch": 0.58, "grad_norm": 0.8766017556190491, "learning_rate": 1.8608026872038864e-05, "loss": 0.0795, "step": 3496 }, { "epoch": 0.58, "grad_norm": 0.7990301847457886, "learning_rate": 1.86071077303883e-05, "loss": 0.0894, "step": 3497 }, { "epoch": 0.59, "grad_norm": 0.8273128867149353, "learning_rate": 1.8606188308092082e-05, "loss": 0.1009, "step": 3498 }, { "epoch": 0.59, "grad_norm": 0.7697015404701233, "learning_rate": 1.860526860518018e-05, "loss": 0.0909, "step": 3499 }, { "epoch": 0.59, "grad_norm": 0.6747329235076904, "learning_rate": 1.8604348621682585e-05, "loss": 0.0849, "step": 3500 }, { "epoch": 0.59, "grad_norm": 0.8002321124076843, "learning_rate": 1.8603428357629293e-05, "loss": 0.0942, "step": 3501 }, { "epoch": 0.59, "grad_norm": 1.1008468866348267, "learning_rate": 1.8602507813050317e-05, "loss": 0.136, "step": 3502 }, { "epoch": 0.59, "grad_norm": 0.5696807503700256, "learning_rate": 1.8601586987975665e-05, "loss": 0.0614, "step": 3503 }, { "epoch": 0.59, "grad_norm": 0.6162496209144592, "learning_rate": 1.8600665882435364e-05, "loss": 0.0812, "step": 3504 }, { "epoch": 0.59, "grad_norm": 1.0420283079147339, "learning_rate": 1.859974449645945e-05, "loss": 0.0879, "step": 3505 }, { "epoch": 0.59, "grad_norm": 1.4474778175354004, "learning_rate": 1.859882283007796e-05, "loss": 0.0925, "step": 3506 }, { "epoch": 0.59, "grad_norm": 1.0950320959091187, "learning_rate": 1.859790088332095e-05, "loss": 0.1116, "step": 3507 }, { "epoch": 0.59, "grad_norm": 0.8764297962188721, "learning_rate": 1.8596978656218482e-05, "loss": 0.0899, "step": 3508 }, { "epoch": 0.59, "grad_norm": 1.2798519134521484, "learning_rate": 1.859605614880063e-05, "loss": 0.0784, "step": 3509 }, { "epoch": 0.59, "grad_norm": 2.5294084548950195, "learning_rate": 1.8595133361097465e-05, "loss": 0.0968, "step": 3510 }, { "epoch": 0.59, "grad_norm": 0.9927922487258911, "learning_rate": 1.8594210293139082e-05, "loss": 0.0923, "step": 3511 }, { "epoch": 0.59, "grad_norm": 0.9511128664016724, "learning_rate": 1.8593286944955572e-05, "loss": 0.0998, "step": 3512 }, { "epoch": 0.59, "grad_norm": 0.8548786044120789, "learning_rate": 1.859236331657705e-05, "loss": 0.0971, "step": 3513 }, { "epoch": 0.59, "grad_norm": 0.9456377625465393, "learning_rate": 1.859143940803363e-05, "loss": 0.1291, "step": 3514 }, { "epoch": 0.59, "grad_norm": 0.9358652234077454, "learning_rate": 1.8590515219355433e-05, "loss": 0.0853, "step": 3515 }, { "epoch": 0.59, "grad_norm": 0.7918145656585693, "learning_rate": 1.8589590750572596e-05, "loss": 0.0801, "step": 3516 }, { "epoch": 0.59, "grad_norm": 0.7303755879402161, "learning_rate": 1.8588666001715266e-05, "loss": 0.0707, "step": 3517 }, { "epoch": 0.59, "grad_norm": 0.9150583148002625, "learning_rate": 1.858774097281359e-05, "loss": 0.0788, "step": 3518 }, { "epoch": 0.59, "grad_norm": 0.8493038415908813, "learning_rate": 1.8586815663897733e-05, "loss": 0.0895, "step": 3519 }, { "epoch": 0.59, "grad_norm": 0.875869631767273, "learning_rate": 1.8585890074997866e-05, "loss": 0.1037, "step": 3520 }, { "epoch": 0.59, "grad_norm": 0.7293341159820557, "learning_rate": 1.8584964206144164e-05, "loss": 0.1014, "step": 3521 }, { "epoch": 0.59, "grad_norm": 0.9187565445899963, "learning_rate": 1.8584038057366825e-05, "loss": 0.1021, "step": 3522 }, { "epoch": 0.59, "grad_norm": 1.0928869247436523, "learning_rate": 1.8583111628696038e-05, "loss": 0.0873, "step": 3523 }, { "epoch": 0.59, "grad_norm": 0.7197282314300537, "learning_rate": 1.8582184920162017e-05, "loss": 0.0886, "step": 3524 }, { "epoch": 0.59, "grad_norm": 0.8113405704498291, "learning_rate": 1.8581257931794975e-05, "loss": 0.0912, "step": 3525 }, { "epoch": 0.59, "grad_norm": 0.7931607961654663, "learning_rate": 1.8580330663625138e-05, "loss": 0.0692, "step": 3526 }, { "epoch": 0.59, "grad_norm": 0.906768262386322, "learning_rate": 1.857940311568274e-05, "loss": 0.0964, "step": 3527 }, { "epoch": 0.59, "grad_norm": 0.9875622987747192, "learning_rate": 1.8578475287998033e-05, "loss": 0.1111, "step": 3528 }, { "epoch": 0.59, "grad_norm": 1.005822777748108, "learning_rate": 1.857754718060126e-05, "loss": 0.075, "step": 3529 }, { "epoch": 0.59, "grad_norm": 0.6809669137001038, "learning_rate": 1.857661879352268e-05, "loss": 0.0813, "step": 3530 }, { "epoch": 0.59, "grad_norm": 0.7220934629440308, "learning_rate": 1.857569012679258e-05, "loss": 0.0853, "step": 3531 }, { "epoch": 0.59, "grad_norm": 0.7304158210754395, "learning_rate": 1.857476118044123e-05, "loss": 0.0821, "step": 3532 }, { "epoch": 0.59, "grad_norm": 0.979410707950592, "learning_rate": 1.8573831954498917e-05, "loss": 0.0792, "step": 3533 }, { "epoch": 0.59, "grad_norm": 0.7275266051292419, "learning_rate": 1.8572902448995945e-05, "loss": 0.0786, "step": 3534 }, { "epoch": 0.59, "grad_norm": 0.8152807950973511, "learning_rate": 1.857197266396262e-05, "loss": 0.0793, "step": 3535 }, { "epoch": 0.59, "grad_norm": 0.8050700426101685, "learning_rate": 1.857104259942926e-05, "loss": 0.0906, "step": 3536 }, { "epoch": 0.59, "grad_norm": 1.1668096780776978, "learning_rate": 1.8570112255426183e-05, "loss": 0.1036, "step": 3537 }, { "epoch": 0.59, "grad_norm": 1.1457761526107788, "learning_rate": 1.856918163198374e-05, "loss": 0.097, "step": 3538 }, { "epoch": 0.59, "grad_norm": 0.7082080841064453, "learning_rate": 1.856825072913226e-05, "loss": 0.0767, "step": 3539 }, { "epoch": 0.59, "grad_norm": 0.9615572690963745, "learning_rate": 1.8567319546902106e-05, "loss": 0.0865, "step": 3540 }, { "epoch": 0.59, "grad_norm": 0.8410216569900513, "learning_rate": 1.856638808532363e-05, "loss": 0.0737, "step": 3541 }, { "epoch": 0.59, "grad_norm": 0.8334599733352661, "learning_rate": 1.856545634442722e-05, "loss": 0.0795, "step": 3542 }, { "epoch": 0.59, "grad_norm": 0.6560694575309753, "learning_rate": 1.8564524324243237e-05, "loss": 0.0828, "step": 3543 }, { "epoch": 0.59, "grad_norm": 0.7970775961875916, "learning_rate": 1.8563592024802085e-05, "loss": 0.0888, "step": 3544 }, { "epoch": 0.59, "grad_norm": 0.7756149172782898, "learning_rate": 1.8562659446134156e-05, "loss": 0.0787, "step": 3545 }, { "epoch": 0.59, "grad_norm": 0.770428478717804, "learning_rate": 1.856172658826986e-05, "loss": 0.1028, "step": 3546 }, { "epoch": 0.59, "grad_norm": 0.8781447410583496, "learning_rate": 1.8560793451239616e-05, "loss": 0.083, "step": 3547 }, { "epoch": 0.59, "grad_norm": 0.8659023642539978, "learning_rate": 1.8559860035073848e-05, "loss": 0.0728, "step": 3548 }, { "epoch": 0.59, "grad_norm": 1.0219563245773315, "learning_rate": 1.855892633980299e-05, "loss": 0.1355, "step": 3549 }, { "epoch": 0.59, "grad_norm": 1.138237714767456, "learning_rate": 1.8557992365457487e-05, "loss": 0.1082, "step": 3550 }, { "epoch": 0.59, "grad_norm": 0.9572103023529053, "learning_rate": 1.8557058112067796e-05, "loss": 0.0867, "step": 3551 }, { "epoch": 0.59, "grad_norm": 0.7849854826927185, "learning_rate": 1.8556123579664375e-05, "loss": 0.0756, "step": 3552 }, { "epoch": 0.59, "grad_norm": 0.7639734148979187, "learning_rate": 1.8555188768277696e-05, "loss": 0.0839, "step": 3553 }, { "epoch": 0.59, "grad_norm": 0.8711333274841309, "learning_rate": 1.8554253677938243e-05, "loss": 0.0688, "step": 3554 }, { "epoch": 0.59, "grad_norm": 0.709355890750885, "learning_rate": 1.8553318308676502e-05, "loss": 0.0883, "step": 3555 }, { "epoch": 0.59, "grad_norm": 0.8953630924224854, "learning_rate": 1.8552382660522973e-05, "loss": 0.0943, "step": 3556 }, { "epoch": 0.59, "grad_norm": 0.8687648177146912, "learning_rate": 1.8551446733508166e-05, "loss": 0.1036, "step": 3557 }, { "epoch": 0.6, "grad_norm": 0.9358887076377869, "learning_rate": 1.85505105276626e-05, "loss": 0.1023, "step": 3558 }, { "epoch": 0.6, "grad_norm": 0.833635151386261, "learning_rate": 1.854957404301679e-05, "loss": 0.0964, "step": 3559 }, { "epoch": 0.6, "grad_norm": 0.7627719640731812, "learning_rate": 1.854863727960129e-05, "loss": 0.0805, "step": 3560 }, { "epoch": 0.6, "grad_norm": 0.8561954498291016, "learning_rate": 1.854770023744662e-05, "loss": 0.0872, "step": 3561 }, { "epoch": 0.6, "grad_norm": 0.9312578439712524, "learning_rate": 1.8546762916583357e-05, "loss": 0.1129, "step": 3562 }, { "epoch": 0.6, "grad_norm": 0.6830171346664429, "learning_rate": 1.854582531704205e-05, "loss": 0.0858, "step": 3563 }, { "epoch": 0.6, "grad_norm": 0.6623774766921997, "learning_rate": 1.8544887438853276e-05, "loss": 0.083, "step": 3564 }, { "epoch": 0.6, "grad_norm": 0.6695672273635864, "learning_rate": 1.8543949282047614e-05, "loss": 0.0826, "step": 3565 }, { "epoch": 0.6, "grad_norm": 0.6322051882743835, "learning_rate": 1.8543010846655647e-05, "loss": 0.0745, "step": 3566 }, { "epoch": 0.6, "grad_norm": 0.788209080696106, "learning_rate": 1.854207213270799e-05, "loss": 0.0946, "step": 3567 }, { "epoch": 0.6, "grad_norm": 0.7210556864738464, "learning_rate": 1.8541133140235237e-05, "loss": 0.0842, "step": 3568 }, { "epoch": 0.6, "grad_norm": 0.6874264478683472, "learning_rate": 1.854019386926801e-05, "loss": 0.0755, "step": 3569 }, { "epoch": 0.6, "grad_norm": 0.641574501991272, "learning_rate": 1.8539254319836932e-05, "loss": 0.0744, "step": 3570 }, { "epoch": 0.6, "grad_norm": 0.8547300100326538, "learning_rate": 1.8538314491972643e-05, "loss": 0.0904, "step": 3571 }, { "epoch": 0.6, "grad_norm": 0.7820848226547241, "learning_rate": 1.8537374385705785e-05, "loss": 0.0756, "step": 3572 }, { "epoch": 0.6, "grad_norm": 1.0203678607940674, "learning_rate": 1.853643400106701e-05, "loss": 0.0946, "step": 3573 }, { "epoch": 0.6, "grad_norm": 0.9501619935035706, "learning_rate": 1.8535493338086987e-05, "loss": 0.1083, "step": 3574 }, { "epoch": 0.6, "grad_norm": 1.143368124961853, "learning_rate": 1.8534552396796378e-05, "loss": 0.1005, "step": 3575 }, { "epoch": 0.6, "grad_norm": 0.9406003952026367, "learning_rate": 1.853361117722587e-05, "loss": 0.103, "step": 3576 }, { "epoch": 0.6, "grad_norm": 0.601249098777771, "learning_rate": 1.853266967940615e-05, "loss": 0.0694, "step": 3577 }, { "epoch": 0.6, "grad_norm": 0.721443235874176, "learning_rate": 1.8531727903367914e-05, "loss": 0.0949, "step": 3578 }, { "epoch": 0.6, "grad_norm": 1.4332525730133057, "learning_rate": 1.8530785849141874e-05, "loss": 0.0862, "step": 3579 }, { "epoch": 0.6, "grad_norm": 0.8920552134513855, "learning_rate": 1.852984351675875e-05, "loss": 0.0833, "step": 3580 }, { "epoch": 0.6, "grad_norm": 0.7315024733543396, "learning_rate": 1.8528900906249263e-05, "loss": 0.1056, "step": 3581 }, { "epoch": 0.6, "grad_norm": 0.7238548398017883, "learning_rate": 1.8527958017644148e-05, "loss": 0.0832, "step": 3582 }, { "epoch": 0.6, "grad_norm": 0.8480680584907532, "learning_rate": 1.852701485097415e-05, "loss": 0.0743, "step": 3583 }, { "epoch": 0.6, "grad_norm": 0.8757266998291016, "learning_rate": 1.852607140627002e-05, "loss": 0.092, "step": 3584 }, { "epoch": 0.6, "grad_norm": 0.9962085485458374, "learning_rate": 1.8525127683562524e-05, "loss": 0.0819, "step": 3585 }, { "epoch": 0.6, "grad_norm": 0.7262884974479675, "learning_rate": 1.8524183682882432e-05, "loss": 0.0856, "step": 3586 }, { "epoch": 0.6, "grad_norm": 0.8675050139427185, "learning_rate": 1.8523239404260524e-05, "loss": 0.0785, "step": 3587 }, { "epoch": 0.6, "grad_norm": 0.6294103860855103, "learning_rate": 1.8522294847727593e-05, "loss": 0.0746, "step": 3588 }, { "epoch": 0.6, "grad_norm": 0.8238442540168762, "learning_rate": 1.852135001331443e-05, "loss": 0.0885, "step": 3589 }, { "epoch": 0.6, "grad_norm": 0.9088493585586548, "learning_rate": 1.8520404901051847e-05, "loss": 0.0959, "step": 3590 }, { "epoch": 0.6, "grad_norm": 0.6772658824920654, "learning_rate": 1.851945951097066e-05, "loss": 0.0892, "step": 3591 }, { "epoch": 0.6, "grad_norm": 0.7646352052688599, "learning_rate": 1.8518513843101696e-05, "loss": 0.0755, "step": 3592 }, { "epoch": 0.6, "grad_norm": 0.8195433616638184, "learning_rate": 1.8517567897475785e-05, "loss": 0.0698, "step": 3593 }, { "epoch": 0.6, "grad_norm": 1.0212652683258057, "learning_rate": 1.8516621674123777e-05, "loss": 0.1031, "step": 3594 }, { "epoch": 0.6, "grad_norm": 0.8840683102607727, "learning_rate": 1.851567517307652e-05, "loss": 0.1059, "step": 3595 }, { "epoch": 0.6, "grad_norm": 1.555864930152893, "learning_rate": 1.851472839436488e-05, "loss": 0.0971, "step": 3596 }, { "epoch": 0.6, "grad_norm": 0.8092486262321472, "learning_rate": 1.8513781338019723e-05, "loss": 0.0739, "step": 3597 }, { "epoch": 0.6, "grad_norm": 0.754857063293457, "learning_rate": 1.8512834004071934e-05, "loss": 0.0953, "step": 3598 }, { "epoch": 0.6, "grad_norm": 0.844136655330658, "learning_rate": 1.85118863925524e-05, "loss": 0.0926, "step": 3599 }, { "epoch": 0.6, "grad_norm": 0.6787381768226624, "learning_rate": 1.8510938503492017e-05, "loss": 0.0731, "step": 3600 }, { "epoch": 0.6, "grad_norm": 1.2827904224395752, "learning_rate": 1.8509990336921693e-05, "loss": 0.0827, "step": 3601 }, { "epoch": 0.6, "grad_norm": 0.7488519549369812, "learning_rate": 1.8509041892872348e-05, "loss": 0.0876, "step": 3602 }, { "epoch": 0.6, "grad_norm": 0.9144026041030884, "learning_rate": 1.8508093171374905e-05, "loss": 0.0963, "step": 3603 }, { "epoch": 0.6, "grad_norm": 0.711050271987915, "learning_rate": 1.8507144172460294e-05, "loss": 0.0812, "step": 3604 }, { "epoch": 0.6, "grad_norm": 0.8213106393814087, "learning_rate": 1.850619489615946e-05, "loss": 0.1035, "step": 3605 }, { "epoch": 0.6, "grad_norm": 0.8500064611434937, "learning_rate": 1.8505245342503363e-05, "loss": 0.0798, "step": 3606 }, { "epoch": 0.6, "grad_norm": 0.5667248964309692, "learning_rate": 1.8504295511522958e-05, "loss": 0.0679, "step": 3607 }, { "epoch": 0.6, "grad_norm": 0.8447193503379822, "learning_rate": 1.8503345403249214e-05, "loss": 0.0944, "step": 3608 }, { "epoch": 0.6, "grad_norm": 1.3247627019882202, "learning_rate": 1.8502395017713113e-05, "loss": 0.1082, "step": 3609 }, { "epoch": 0.6, "grad_norm": 0.9846928119659424, "learning_rate": 1.8501444354945642e-05, "loss": 0.1084, "step": 3610 }, { "epoch": 0.6, "grad_norm": 0.9223631024360657, "learning_rate": 1.8500493414977803e-05, "loss": 0.0878, "step": 3611 }, { "epoch": 0.6, "grad_norm": 0.9654775857925415, "learning_rate": 1.8499542197840594e-05, "loss": 0.0868, "step": 3612 }, { "epoch": 0.6, "grad_norm": 0.7466325163841248, "learning_rate": 1.849859070356504e-05, "loss": 0.0738, "step": 3613 }, { "epoch": 0.6, "grad_norm": 1.0591686964035034, "learning_rate": 1.8497638932182162e-05, "loss": 0.1137, "step": 3614 }, { "epoch": 0.6, "grad_norm": 0.9253994226455688, "learning_rate": 1.849668688372299e-05, "loss": 0.0931, "step": 3615 }, { "epoch": 0.6, "grad_norm": 0.8522130250930786, "learning_rate": 1.849573455821857e-05, "loss": 0.095, "step": 3616 }, { "epoch": 0.6, "grad_norm": 0.8040969967842102, "learning_rate": 1.8494781955699955e-05, "loss": 0.0696, "step": 3617 }, { "epoch": 0.61, "grad_norm": 0.848466694355011, "learning_rate": 1.8493829076198208e-05, "loss": 0.0958, "step": 3618 }, { "epoch": 0.61, "grad_norm": 0.7327965497970581, "learning_rate": 1.8492875919744392e-05, "loss": 0.1038, "step": 3619 }, { "epoch": 0.61, "grad_norm": 0.8759572505950928, "learning_rate": 1.849192248636959e-05, "loss": 0.1002, "step": 3620 }, { "epoch": 0.61, "grad_norm": 0.7912656664848328, "learning_rate": 1.8490968776104887e-05, "loss": 0.0905, "step": 3621 }, { "epoch": 0.61, "grad_norm": 0.7097240686416626, "learning_rate": 1.8490014788981384e-05, "loss": 0.0723, "step": 3622 }, { "epoch": 0.61, "grad_norm": 0.797880232334137, "learning_rate": 1.848906052503019e-05, "loss": 0.1028, "step": 3623 }, { "epoch": 0.61, "grad_norm": 0.7563408613204956, "learning_rate": 1.848810598428241e-05, "loss": 0.0973, "step": 3624 }, { "epoch": 0.61, "grad_norm": 0.7461533546447754, "learning_rate": 1.848715116676917e-05, "loss": 0.0996, "step": 3625 }, { "epoch": 0.61, "grad_norm": 0.9122897982597351, "learning_rate": 1.848619607252161e-05, "loss": 0.1058, "step": 3626 }, { "epoch": 0.61, "grad_norm": 0.8396524786949158, "learning_rate": 1.848524070157087e-05, "loss": 0.0896, "step": 3627 }, { "epoch": 0.61, "grad_norm": 0.9760554432868958, "learning_rate": 1.8484285053948095e-05, "loss": 0.1105, "step": 3628 }, { "epoch": 0.61, "grad_norm": 1.0361655950546265, "learning_rate": 1.8483329129684457e-05, "loss": 0.1057, "step": 3629 }, { "epoch": 0.61, "grad_norm": 0.704541027545929, "learning_rate": 1.848237292881111e-05, "loss": 0.0988, "step": 3630 }, { "epoch": 0.61, "grad_norm": 0.9286929965019226, "learning_rate": 1.848141645135924e-05, "loss": 0.0686, "step": 3631 }, { "epoch": 0.61, "grad_norm": 0.7676939964294434, "learning_rate": 1.8480459697360038e-05, "loss": 0.0992, "step": 3632 }, { "epoch": 0.61, "grad_norm": 0.5820310711860657, "learning_rate": 1.8479502666844693e-05, "loss": 0.0737, "step": 3633 }, { "epoch": 0.61, "grad_norm": 0.8414909243583679, "learning_rate": 1.8478545359844413e-05, "loss": 0.0764, "step": 3634 }, { "epoch": 0.61, "grad_norm": 0.6206024289131165, "learning_rate": 1.8477587776390415e-05, "loss": 0.0719, "step": 3635 }, { "epoch": 0.61, "grad_norm": 0.5634062886238098, "learning_rate": 1.847662991651392e-05, "loss": 0.0635, "step": 3636 }, { "epoch": 0.61, "grad_norm": 0.7914181351661682, "learning_rate": 1.8475671780246155e-05, "loss": 0.1021, "step": 3637 }, { "epoch": 0.61, "grad_norm": 0.9977076053619385, "learning_rate": 1.847471336761837e-05, "loss": 0.1009, "step": 3638 }, { "epoch": 0.61, "grad_norm": 0.7203056812286377, "learning_rate": 1.847375467866181e-05, "loss": 0.1081, "step": 3639 }, { "epoch": 0.61, "grad_norm": 1.013890027999878, "learning_rate": 1.847279571340774e-05, "loss": 0.0835, "step": 3640 }, { "epoch": 0.61, "grad_norm": 0.9209913015365601, "learning_rate": 1.8471836471887415e-05, "loss": 0.1134, "step": 3641 }, { "epoch": 0.61, "grad_norm": 0.9010593295097351, "learning_rate": 1.847087695413213e-05, "loss": 0.0976, "step": 3642 }, { "epoch": 0.61, "grad_norm": 0.6834477782249451, "learning_rate": 1.846991716017316e-05, "loss": 0.0786, "step": 3643 }, { "epoch": 0.61, "grad_norm": 0.6869216561317444, "learning_rate": 1.84689570900418e-05, "loss": 0.0834, "step": 3644 }, { "epoch": 0.61, "grad_norm": 0.8028872609138489, "learning_rate": 1.8467996743769357e-05, "loss": 0.1158, "step": 3645 }, { "epoch": 0.61, "grad_norm": 0.8938417434692383, "learning_rate": 1.8467036121387146e-05, "loss": 0.1077, "step": 3646 }, { "epoch": 0.61, "grad_norm": 0.7441613078117371, "learning_rate": 1.846607522292649e-05, "loss": 0.0766, "step": 3647 }, { "epoch": 0.61, "grad_norm": 1.1154812574386597, "learning_rate": 1.8465114048418716e-05, "loss": 0.0837, "step": 3648 }, { "epoch": 0.61, "grad_norm": 0.8065111637115479, "learning_rate": 1.8464152597895167e-05, "loss": 0.0796, "step": 3649 }, { "epoch": 0.61, "grad_norm": 0.8574355244636536, "learning_rate": 1.846319087138719e-05, "loss": 0.0931, "step": 3650 }, { "epoch": 0.61, "grad_norm": 0.7339103817939758, "learning_rate": 1.8462228868926148e-05, "loss": 0.0926, "step": 3651 }, { "epoch": 0.61, "grad_norm": 1.1339216232299805, "learning_rate": 1.84612665905434e-05, "loss": 0.0909, "step": 3652 }, { "epoch": 0.61, "grad_norm": 1.5844734907150269, "learning_rate": 1.846030403627033e-05, "loss": 0.0673, "step": 3653 }, { "epoch": 0.61, "grad_norm": 1.036219835281372, "learning_rate": 1.8459341206138322e-05, "loss": 0.1008, "step": 3654 }, { "epoch": 0.61, "grad_norm": 0.7156174182891846, "learning_rate": 1.845837810017877e-05, "loss": 0.0843, "step": 3655 }, { "epoch": 0.61, "grad_norm": 1.00363290309906, "learning_rate": 1.8457414718423075e-05, "loss": 0.099, "step": 3656 }, { "epoch": 0.61, "grad_norm": 0.7147552371025085, "learning_rate": 1.8456451060902653e-05, "loss": 0.0782, "step": 3657 }, { "epoch": 0.61, "grad_norm": 2.3184990882873535, "learning_rate": 1.845548712764892e-05, "loss": 0.0988, "step": 3658 }, { "epoch": 0.61, "grad_norm": 0.7040097713470459, "learning_rate": 1.8454522918693308e-05, "loss": 0.1037, "step": 3659 }, { "epoch": 0.61, "grad_norm": 1.2328763008117676, "learning_rate": 1.8453558434067262e-05, "loss": 0.1157, "step": 3660 }, { "epoch": 0.61, "grad_norm": 0.7940439581871033, "learning_rate": 1.8452593673802225e-05, "loss": 0.0799, "step": 3661 }, { "epoch": 0.61, "grad_norm": 0.6575446724891663, "learning_rate": 1.8451628637929652e-05, "loss": 0.0725, "step": 3662 }, { "epoch": 0.61, "grad_norm": 0.8717228770256042, "learning_rate": 1.8450663326481014e-05, "loss": 0.0958, "step": 3663 }, { "epoch": 0.61, "grad_norm": 0.9876289963722229, "learning_rate": 1.8449697739487785e-05, "loss": 0.0776, "step": 3664 }, { "epoch": 0.61, "grad_norm": 0.9157716035842896, "learning_rate": 1.844873187698145e-05, "loss": 0.0786, "step": 3665 }, { "epoch": 0.61, "grad_norm": 0.8293876647949219, "learning_rate": 1.84477657389935e-05, "loss": 0.0913, "step": 3666 }, { "epoch": 0.61, "grad_norm": 0.6668652296066284, "learning_rate": 1.8446799325555434e-05, "loss": 0.0743, "step": 3667 }, { "epoch": 0.61, "grad_norm": 0.7408357262611389, "learning_rate": 1.8445832636698772e-05, "loss": 0.1228, "step": 3668 }, { "epoch": 0.61, "grad_norm": 0.9362218379974365, "learning_rate": 1.8444865672455026e-05, "loss": 0.0694, "step": 3669 }, { "epoch": 0.61, "grad_norm": 1.2903176546096802, "learning_rate": 1.8443898432855733e-05, "loss": 0.0969, "step": 3670 }, { "epoch": 0.61, "grad_norm": 0.8609079718589783, "learning_rate": 1.844293091793242e-05, "loss": 0.0943, "step": 3671 }, { "epoch": 0.61, "grad_norm": 0.9615067839622498, "learning_rate": 1.8441963127716644e-05, "loss": 0.0898, "step": 3672 }, { "epoch": 0.61, "grad_norm": 0.67417973279953, "learning_rate": 1.8440995062239955e-05, "loss": 0.0779, "step": 3673 }, { "epoch": 0.61, "grad_norm": 0.7402226328849792, "learning_rate": 1.8440026721533924e-05, "loss": 0.0672, "step": 3674 }, { "epoch": 0.61, "grad_norm": 0.6333996653556824, "learning_rate": 1.843905810563012e-05, "loss": 0.0608, "step": 3675 }, { "epoch": 0.61, "grad_norm": 0.7735124230384827, "learning_rate": 1.843808921456013e-05, "loss": 0.083, "step": 3676 }, { "epoch": 0.61, "grad_norm": 0.839436948299408, "learning_rate": 1.843712004835554e-05, "loss": 0.0744, "step": 3677 }, { "epoch": 0.62, "grad_norm": 0.6000044941902161, "learning_rate": 1.8436150607047952e-05, "loss": 0.0653, "step": 3678 }, { "epoch": 0.62, "grad_norm": 0.7810673117637634, "learning_rate": 1.8435180890668986e-05, "loss": 0.0721, "step": 3679 }, { "epoch": 0.62, "grad_norm": 0.6759157776832581, "learning_rate": 1.8434210899250246e-05, "loss": 0.0851, "step": 3680 }, { "epoch": 0.62, "grad_norm": 1.8723087310791016, "learning_rate": 1.8433240632823368e-05, "loss": 0.0894, "step": 3681 }, { "epoch": 0.62, "grad_norm": 0.8769448399543762, "learning_rate": 1.843227009141999e-05, "loss": 0.0803, "step": 3682 }, { "epoch": 0.62, "grad_norm": 0.8589674830436707, "learning_rate": 1.843129927507175e-05, "loss": 0.0883, "step": 3683 }, { "epoch": 0.62, "grad_norm": 0.9038573503494263, "learning_rate": 1.8430328183810312e-05, "loss": 0.0857, "step": 3684 }, { "epoch": 0.62, "grad_norm": 0.8644160032272339, "learning_rate": 1.8429356817667338e-05, "loss": 0.1114, "step": 3685 }, { "epoch": 0.62, "grad_norm": 0.878746509552002, "learning_rate": 1.842838517667449e-05, "loss": 0.0881, "step": 3686 }, { "epoch": 0.62, "grad_norm": 0.7965055704116821, "learning_rate": 1.8427413260863466e-05, "loss": 0.0874, "step": 3687 }, { "epoch": 0.62, "grad_norm": 0.9367748498916626, "learning_rate": 1.8426441070265943e-05, "loss": 0.0746, "step": 3688 }, { "epoch": 0.62, "grad_norm": 0.8547359108924866, "learning_rate": 1.842546860491363e-05, "loss": 0.0924, "step": 3689 }, { "epoch": 0.62, "grad_norm": 0.7213195562362671, "learning_rate": 1.8424495864838226e-05, "loss": 0.0844, "step": 3690 }, { "epoch": 0.62, "grad_norm": 0.7445073127746582, "learning_rate": 1.842352285007146e-05, "loss": 0.0834, "step": 3691 }, { "epoch": 0.62, "grad_norm": 0.9400060176849365, "learning_rate": 1.842254956064505e-05, "loss": 0.0905, "step": 3692 }, { "epoch": 0.62, "grad_norm": 0.7610102295875549, "learning_rate": 1.842157599659073e-05, "loss": 0.0865, "step": 3693 }, { "epoch": 0.62, "grad_norm": 0.8046721816062927, "learning_rate": 1.842060215794025e-05, "loss": 0.0807, "step": 3694 }, { "epoch": 0.62, "grad_norm": 1.127744436264038, "learning_rate": 1.8419628044725365e-05, "loss": 0.0912, "step": 3695 }, { "epoch": 0.62, "grad_norm": 0.8524706363677979, "learning_rate": 1.8418653656977828e-05, "loss": 0.0869, "step": 3696 }, { "epoch": 0.62, "grad_norm": 0.7866064310073853, "learning_rate": 1.8417678994729416e-05, "loss": 0.0793, "step": 3697 }, { "epoch": 0.62, "grad_norm": 0.7201782464981079, "learning_rate": 1.8416704058011908e-05, "loss": 0.0873, "step": 3698 }, { "epoch": 0.62, "grad_norm": 0.8149520754814148, "learning_rate": 1.8415728846857094e-05, "loss": 0.0928, "step": 3699 }, { "epoch": 0.62, "grad_norm": 0.7716862559318542, "learning_rate": 1.8414753361296774e-05, "loss": 0.0938, "step": 3700 }, { "epoch": 0.62, "grad_norm": 0.6016415357589722, "learning_rate": 1.841377760136275e-05, "loss": 0.0784, "step": 3701 }, { "epoch": 0.62, "grad_norm": 0.5609806776046753, "learning_rate": 1.841280156708684e-05, "loss": 0.0648, "step": 3702 }, { "epoch": 0.62, "grad_norm": 0.710765540599823, "learning_rate": 1.841182525850087e-05, "loss": 0.0806, "step": 3703 }, { "epoch": 0.62, "grad_norm": 0.6245403289794922, "learning_rate": 1.8410848675636672e-05, "loss": 0.0576, "step": 3704 }, { "epoch": 0.62, "grad_norm": 0.5335845351219177, "learning_rate": 1.8409871818526093e-05, "loss": 0.0587, "step": 3705 }, { "epoch": 0.62, "grad_norm": 0.6472234129905701, "learning_rate": 1.8408894687200977e-05, "loss": 0.104, "step": 3706 }, { "epoch": 0.62, "grad_norm": 0.7251203656196594, "learning_rate": 1.840791728169319e-05, "loss": 0.0872, "step": 3707 }, { "epoch": 0.62, "grad_norm": 0.6338942646980286, "learning_rate": 1.8406939602034605e-05, "loss": 0.0701, "step": 3708 }, { "epoch": 0.62, "grad_norm": 0.5480802059173584, "learning_rate": 1.840596164825709e-05, "loss": 0.0543, "step": 3709 }, { "epoch": 0.62, "grad_norm": 0.7561771273612976, "learning_rate": 1.840498342039254e-05, "loss": 0.0932, "step": 3710 }, { "epoch": 0.62, "grad_norm": 0.8974176049232483, "learning_rate": 1.840400491847285e-05, "loss": 0.1116, "step": 3711 }, { "epoch": 0.62, "grad_norm": 0.6621408462524414, "learning_rate": 1.8403026142529927e-05, "loss": 0.0769, "step": 3712 }, { "epoch": 0.62, "grad_norm": 0.6463733315467834, "learning_rate": 1.840204709259568e-05, "loss": 0.075, "step": 3713 }, { "epoch": 0.62, "grad_norm": 0.7765437364578247, "learning_rate": 1.8401067768702036e-05, "loss": 0.106, "step": 3714 }, { "epoch": 0.62, "grad_norm": 0.7232966423034668, "learning_rate": 1.8400088170880926e-05, "loss": 0.0939, "step": 3715 }, { "epoch": 0.62, "grad_norm": 0.813217043876648, "learning_rate": 1.8399108299164295e-05, "loss": 0.0813, "step": 3716 }, { "epoch": 0.62, "grad_norm": 1.103716492652893, "learning_rate": 1.8398128153584088e-05, "loss": 0.11, "step": 3717 }, { "epoch": 0.62, "grad_norm": 0.6381298303604126, "learning_rate": 1.8397147734172264e-05, "loss": 0.0708, "step": 3718 }, { "epoch": 0.62, "grad_norm": 0.7625046372413635, "learning_rate": 1.8396167040960794e-05, "loss": 0.0798, "step": 3719 }, { "epoch": 0.62, "grad_norm": 1.2241026163101196, "learning_rate": 1.8395186073981648e-05, "loss": 0.0751, "step": 3720 }, { "epoch": 0.62, "grad_norm": 0.6782296895980835, "learning_rate": 1.839420483326682e-05, "loss": 0.0785, "step": 3721 }, { "epoch": 0.62, "grad_norm": 0.7795558571815491, "learning_rate": 1.8393223318848302e-05, "loss": 0.079, "step": 3722 }, { "epoch": 0.62, "grad_norm": 0.7019252777099609, "learning_rate": 1.8392241530758097e-05, "loss": 0.0802, "step": 3723 }, { "epoch": 0.62, "grad_norm": 0.7770284414291382, "learning_rate": 1.8391259469028217e-05, "loss": 0.0722, "step": 3724 }, { "epoch": 0.62, "grad_norm": 0.977691113948822, "learning_rate": 1.8390277133690682e-05, "loss": 0.0993, "step": 3725 }, { "epoch": 0.62, "grad_norm": 0.789608359336853, "learning_rate": 1.8389294524777525e-05, "loss": 0.1049, "step": 3726 }, { "epoch": 0.62, "grad_norm": 0.8047356605529785, "learning_rate": 1.8388311642320783e-05, "loss": 0.0987, "step": 3727 }, { "epoch": 0.62, "grad_norm": 0.6069962382316589, "learning_rate": 1.838732848635251e-05, "loss": 0.0808, "step": 3728 }, { "epoch": 0.62, "grad_norm": 0.643644392490387, "learning_rate": 1.8386345056904754e-05, "loss": 0.0795, "step": 3729 }, { "epoch": 0.62, "grad_norm": 0.755300760269165, "learning_rate": 1.8385361354009585e-05, "loss": 0.0814, "step": 3730 }, { "epoch": 0.62, "grad_norm": 1.0785472393035889, "learning_rate": 1.838437737769908e-05, "loss": 0.0876, "step": 3731 }, { "epoch": 0.62, "grad_norm": 0.8396201729774475, "learning_rate": 1.8383393128005318e-05, "loss": 0.1144, "step": 3732 }, { "epoch": 0.62, "grad_norm": 1.1801259517669678, "learning_rate": 1.8382408604960395e-05, "loss": 0.1004, "step": 3733 }, { "epoch": 0.62, "grad_norm": 1.1427782773971558, "learning_rate": 1.838142380859641e-05, "loss": 0.0984, "step": 3734 }, { "epoch": 0.62, "grad_norm": 0.7654452323913574, "learning_rate": 1.8380438738945484e-05, "loss": 0.0896, "step": 3735 }, { "epoch": 0.62, "grad_norm": 0.8996630311012268, "learning_rate": 1.837945339603972e-05, "loss": 0.093, "step": 3736 }, { "epoch": 0.63, "grad_norm": 0.5075281858444214, "learning_rate": 1.8378467779911258e-05, "loss": 0.0735, "step": 3737 }, { "epoch": 0.63, "grad_norm": 1.194849967956543, "learning_rate": 1.837748189059223e-05, "loss": 0.1037, "step": 3738 }, { "epoch": 0.63, "grad_norm": 0.632652997970581, "learning_rate": 1.8376495728114786e-05, "loss": 0.0645, "step": 3739 }, { "epoch": 0.63, "grad_norm": 0.6921085715293884, "learning_rate": 1.837550929251108e-05, "loss": 0.0696, "step": 3740 }, { "epoch": 0.63, "grad_norm": 0.8005045652389526, "learning_rate": 1.8374522583813273e-05, "loss": 0.073, "step": 3741 }, { "epoch": 0.63, "grad_norm": 1.0210094451904297, "learning_rate": 1.837353560205354e-05, "loss": 0.0967, "step": 3742 }, { "epoch": 0.63, "grad_norm": 1.0063326358795166, "learning_rate": 1.8372548347264066e-05, "loss": 0.0902, "step": 3743 }, { "epoch": 0.63, "grad_norm": 0.7671386003494263, "learning_rate": 1.8371560819477034e-05, "loss": 0.0905, "step": 3744 }, { "epoch": 0.63, "grad_norm": 0.803293764591217, "learning_rate": 1.837057301872465e-05, "loss": 0.0732, "step": 3745 }, { "epoch": 0.63, "grad_norm": 0.8806427121162415, "learning_rate": 1.836958494503912e-05, "loss": 0.0701, "step": 3746 }, { "epoch": 0.63, "grad_norm": 0.8458312749862671, "learning_rate": 1.836859659845266e-05, "loss": 0.0939, "step": 3747 }, { "epoch": 0.63, "grad_norm": 0.6831758618354797, "learning_rate": 1.8367607978997498e-05, "loss": 0.0785, "step": 3748 }, { "epoch": 0.63, "grad_norm": 0.6918340921401978, "learning_rate": 1.8366619086705874e-05, "loss": 0.0754, "step": 3749 }, { "epoch": 0.63, "grad_norm": 0.7676317095756531, "learning_rate": 1.8365629921610026e-05, "loss": 0.0931, "step": 3750 }, { "epoch": 0.63, "grad_norm": 0.6725614666938782, "learning_rate": 1.8364640483742207e-05, "loss": 0.0758, "step": 3751 }, { "epoch": 0.63, "grad_norm": 0.7220199108123779, "learning_rate": 1.8363650773134677e-05, "loss": 0.0836, "step": 3752 }, { "epoch": 0.63, "grad_norm": 0.8360840678215027, "learning_rate": 1.8362660789819714e-05, "loss": 0.0955, "step": 3753 }, { "epoch": 0.63, "grad_norm": 0.8107457756996155, "learning_rate": 1.8361670533829595e-05, "loss": 0.1072, "step": 3754 }, { "epoch": 0.63, "grad_norm": 0.5968965888023376, "learning_rate": 1.8360680005196605e-05, "loss": 0.0742, "step": 3755 }, { "epoch": 0.63, "grad_norm": 0.8006020784378052, "learning_rate": 1.8359689203953045e-05, "loss": 0.0806, "step": 3756 }, { "epoch": 0.63, "grad_norm": 1.8053902387619019, "learning_rate": 1.8358698130131222e-05, "loss": 0.0829, "step": 3757 }, { "epoch": 0.63, "grad_norm": 0.7043805122375488, "learning_rate": 1.8357706783763444e-05, "loss": 0.0948, "step": 3758 }, { "epoch": 0.63, "grad_norm": 0.8971559405326843, "learning_rate": 1.8356715164882045e-05, "loss": 0.0961, "step": 3759 }, { "epoch": 0.63, "grad_norm": 0.7577815055847168, "learning_rate": 1.835572327351935e-05, "loss": 0.0743, "step": 3760 }, { "epoch": 0.63, "grad_norm": 1.2316913604736328, "learning_rate": 1.8354731109707704e-05, "loss": 0.0921, "step": 3761 }, { "epoch": 0.63, "grad_norm": 0.8173249363899231, "learning_rate": 1.835373867347946e-05, "loss": 0.0805, "step": 3762 }, { "epoch": 0.63, "grad_norm": 0.7408024072647095, "learning_rate": 1.8352745964866975e-05, "loss": 0.0908, "step": 3763 }, { "epoch": 0.63, "grad_norm": 0.929329514503479, "learning_rate": 1.835175298390262e-05, "loss": 0.0813, "step": 3764 }, { "epoch": 0.63, "grad_norm": 0.7846735715866089, "learning_rate": 1.8350759730618767e-05, "loss": 0.099, "step": 3765 }, { "epoch": 0.63, "grad_norm": 0.7311637997627258, "learning_rate": 1.834976620504781e-05, "loss": 0.1078, "step": 3766 }, { "epoch": 0.63, "grad_norm": 0.720750093460083, "learning_rate": 1.8348772407222138e-05, "loss": 0.0627, "step": 3767 }, { "epoch": 0.63, "grad_norm": 0.7235113382339478, "learning_rate": 1.8347778337174157e-05, "loss": 0.0997, "step": 3768 }, { "epoch": 0.63, "grad_norm": 1.0346770286560059, "learning_rate": 1.834678399493628e-05, "loss": 0.0929, "step": 3769 }, { "epoch": 0.63, "grad_norm": 1.2319384813308716, "learning_rate": 1.834578938054093e-05, "loss": 0.0741, "step": 3770 }, { "epoch": 0.63, "grad_norm": 0.9416593909263611, "learning_rate": 1.8344794494020536e-05, "loss": 0.093, "step": 3771 }, { "epoch": 0.63, "grad_norm": 0.860831618309021, "learning_rate": 1.8343799335407537e-05, "loss": 0.0919, "step": 3772 }, { "epoch": 0.63, "grad_norm": 1.0175014734268188, "learning_rate": 1.834280390473438e-05, "loss": 0.0954, "step": 3773 }, { "epoch": 0.63, "grad_norm": 0.6852126717567444, "learning_rate": 1.834180820203353e-05, "loss": 0.0862, "step": 3774 }, { "epoch": 0.63, "grad_norm": 0.7656999826431274, "learning_rate": 1.8340812227337445e-05, "loss": 0.0829, "step": 3775 }, { "epoch": 0.63, "grad_norm": 0.8427731394767761, "learning_rate": 1.83398159806786e-05, "loss": 0.1047, "step": 3776 }, { "epoch": 0.63, "grad_norm": 0.7140036225318909, "learning_rate": 1.8338819462089487e-05, "loss": 0.0944, "step": 3777 }, { "epoch": 0.63, "grad_norm": 0.8182423710823059, "learning_rate": 1.833782267160259e-05, "loss": 0.0824, "step": 3778 }, { "epoch": 0.63, "grad_norm": 1.1374623775482178, "learning_rate": 1.8336825609250415e-05, "loss": 0.0851, "step": 3779 }, { "epoch": 0.63, "grad_norm": 0.9738556742668152, "learning_rate": 1.8335828275065474e-05, "loss": 0.0909, "step": 3780 }, { "epoch": 0.63, "grad_norm": 0.7447429299354553, "learning_rate": 1.8334830669080278e-05, "loss": 0.0877, "step": 3781 }, { "epoch": 0.63, "grad_norm": 1.030246615409851, "learning_rate": 1.8333832791327366e-05, "loss": 0.0987, "step": 3782 }, { "epoch": 0.63, "grad_norm": 1.1014885902404785, "learning_rate": 1.8332834641839268e-05, "loss": 0.1226, "step": 3783 }, { "epoch": 0.63, "grad_norm": 2.0659539699554443, "learning_rate": 1.8331836220648533e-05, "loss": 0.106, "step": 3784 }, { "epoch": 0.63, "grad_norm": 0.9881905317306519, "learning_rate": 1.8330837527787715e-05, "loss": 0.0796, "step": 3785 }, { "epoch": 0.63, "grad_norm": 0.8236458897590637, "learning_rate": 1.8329838563289377e-05, "loss": 0.0931, "step": 3786 }, { "epoch": 0.63, "grad_norm": 0.8349462151527405, "learning_rate": 1.8328839327186093e-05, "loss": 0.1031, "step": 3787 }, { "epoch": 0.63, "grad_norm": 0.8073337078094482, "learning_rate": 1.8327839819510442e-05, "loss": 0.0979, "step": 3788 }, { "epoch": 0.63, "grad_norm": 0.8047396540641785, "learning_rate": 1.832684004029502e-05, "loss": 0.0992, "step": 3789 }, { "epoch": 0.63, "grad_norm": 0.8555633425712585, "learning_rate": 1.8325839989572417e-05, "loss": 0.0702, "step": 3790 }, { "epoch": 0.63, "grad_norm": 1.4439531564712524, "learning_rate": 1.8324839667375248e-05, "loss": 0.0738, "step": 3791 }, { "epoch": 0.63, "grad_norm": 0.7730358242988586, "learning_rate": 1.8323839073736125e-05, "loss": 0.0979, "step": 3792 }, { "epoch": 0.63, "grad_norm": 0.8211524486541748, "learning_rate": 1.8322838208687675e-05, "loss": 0.0946, "step": 3793 }, { "epoch": 0.63, "grad_norm": 0.7828194499015808, "learning_rate": 1.8321837072262538e-05, "loss": 0.096, "step": 3794 }, { "epoch": 0.63, "grad_norm": 0.9990326166152954, "learning_rate": 1.832083566449335e-05, "loss": 0.1041, "step": 3795 }, { "epoch": 0.63, "grad_norm": 0.7734086513519287, "learning_rate": 1.8319833985412768e-05, "loss": 0.0988, "step": 3796 }, { "epoch": 0.64, "grad_norm": 0.8571088314056396, "learning_rate": 1.8318832035053448e-05, "loss": 0.0868, "step": 3797 }, { "epoch": 0.64, "grad_norm": 0.8310582637786865, "learning_rate": 1.8317829813448066e-05, "loss": 0.0898, "step": 3798 }, { "epoch": 0.64, "grad_norm": 0.7504344582557678, "learning_rate": 1.8316827320629294e-05, "loss": 0.0608, "step": 3799 }, { "epoch": 0.64, "grad_norm": 0.811832070350647, "learning_rate": 1.8315824556629823e-05, "loss": 0.097, "step": 3800 }, { "epoch": 0.64, "grad_norm": 0.8943349719047546, "learning_rate": 1.8314821521482353e-05, "loss": 0.0796, "step": 3801 }, { "epoch": 0.64, "grad_norm": 0.9468461275100708, "learning_rate": 1.8313818215219586e-05, "loss": 0.0915, "step": 3802 }, { "epoch": 0.64, "grad_norm": 0.7897821068763733, "learning_rate": 1.831281463787423e-05, "loss": 0.0789, "step": 3803 }, { "epoch": 0.64, "grad_norm": 1.1446256637573242, "learning_rate": 1.831181078947902e-05, "loss": 0.0852, "step": 3804 }, { "epoch": 0.64, "grad_norm": 0.6921563148498535, "learning_rate": 1.8310806670066678e-05, "loss": 0.0674, "step": 3805 }, { "epoch": 0.64, "grad_norm": 0.948154866695404, "learning_rate": 1.830980227966995e-05, "loss": 0.0973, "step": 3806 }, { "epoch": 0.64, "grad_norm": 0.6953455209732056, "learning_rate": 1.830879761832158e-05, "loss": 0.0751, "step": 3807 }, { "epoch": 0.64, "grad_norm": 0.7746891975402832, "learning_rate": 1.8307792686054335e-05, "loss": 0.0813, "step": 3808 }, { "epoch": 0.64, "grad_norm": 0.9564676880836487, "learning_rate": 1.8306787482900978e-05, "loss": 0.1152, "step": 3809 }, { "epoch": 0.64, "grad_norm": 0.8164478540420532, "learning_rate": 1.8305782008894277e-05, "loss": 0.0769, "step": 3810 }, { "epoch": 0.64, "grad_norm": 0.8402560353279114, "learning_rate": 1.8304776264067026e-05, "loss": 0.0899, "step": 3811 }, { "epoch": 0.64, "grad_norm": 0.7517692446708679, "learning_rate": 1.8303770248452016e-05, "loss": 0.0842, "step": 3812 }, { "epoch": 0.64, "grad_norm": 0.7304825186729431, "learning_rate": 1.830276396208205e-05, "loss": 0.0752, "step": 3813 }, { "epoch": 0.64, "grad_norm": 0.7077839970588684, "learning_rate": 1.830175740498994e-05, "loss": 0.073, "step": 3814 }, { "epoch": 0.64, "grad_norm": 0.8160470724105835, "learning_rate": 1.8300750577208504e-05, "loss": 0.0768, "step": 3815 }, { "epoch": 0.64, "grad_norm": 0.5893492698669434, "learning_rate": 1.8299743478770572e-05, "loss": 0.0819, "step": 3816 }, { "epoch": 0.64, "grad_norm": 0.7402725219726562, "learning_rate": 1.829873610970898e-05, "loss": 0.0859, "step": 3817 }, { "epoch": 0.64, "grad_norm": 0.7236104607582092, "learning_rate": 1.8297728470056576e-05, "loss": 0.0827, "step": 3818 }, { "epoch": 0.64, "grad_norm": 0.6035426259040833, "learning_rate": 1.829672055984622e-05, "loss": 0.0566, "step": 3819 }, { "epoch": 0.64, "grad_norm": 0.9143230319023132, "learning_rate": 1.8295712379110762e-05, "loss": 0.1038, "step": 3820 }, { "epoch": 0.64, "grad_norm": 0.6538226008415222, "learning_rate": 1.8294703927883094e-05, "loss": 0.0883, "step": 3821 }, { "epoch": 0.64, "grad_norm": 0.7978992462158203, "learning_rate": 1.8293695206196084e-05, "loss": 0.0985, "step": 3822 }, { "epoch": 0.64, "grad_norm": 0.7605432868003845, "learning_rate": 1.8292686214082628e-05, "loss": 0.0929, "step": 3823 }, { "epoch": 0.64, "grad_norm": 0.7729348540306091, "learning_rate": 1.8291676951575625e-05, "loss": 0.0799, "step": 3824 }, { "epoch": 0.64, "grad_norm": 0.7644819021224976, "learning_rate": 1.829066741870798e-05, "loss": 0.0863, "step": 3825 }, { "epoch": 0.64, "grad_norm": 0.765705406665802, "learning_rate": 1.8289657615512616e-05, "loss": 0.0942, "step": 3826 }, { "epoch": 0.64, "grad_norm": 0.7370056509971619, "learning_rate": 1.8288647542022456e-05, "loss": 0.0806, "step": 3827 }, { "epoch": 0.64, "grad_norm": 0.7560639977455139, "learning_rate": 1.8287637198270435e-05, "loss": 0.0721, "step": 3828 }, { "epoch": 0.64, "grad_norm": 0.7923449873924255, "learning_rate": 1.828662658428949e-05, "loss": 0.0984, "step": 3829 }, { "epoch": 0.64, "grad_norm": 0.8333002924919128, "learning_rate": 1.8285615700112583e-05, "loss": 0.074, "step": 3830 }, { "epoch": 0.64, "grad_norm": 0.7769981622695923, "learning_rate": 1.828460454577267e-05, "loss": 0.0728, "step": 3831 }, { "epoch": 0.64, "grad_norm": 0.7224574089050293, "learning_rate": 1.8283593121302728e-05, "loss": 0.0808, "step": 3832 }, { "epoch": 0.64, "grad_norm": 0.7787107229232788, "learning_rate": 1.8282581426735725e-05, "loss": 0.0811, "step": 3833 }, { "epoch": 0.64, "grad_norm": 0.7984852194786072, "learning_rate": 1.828156946210466e-05, "loss": 0.092, "step": 3834 }, { "epoch": 0.64, "grad_norm": 0.7185860872268677, "learning_rate": 1.8280557227442517e-05, "loss": 0.0964, "step": 3835 }, { "epoch": 0.64, "grad_norm": 1.0296610593795776, "learning_rate": 1.8279544722782308e-05, "loss": 0.1219, "step": 3836 }, { "epoch": 0.64, "grad_norm": 0.7726308107376099, "learning_rate": 1.8278531948157052e-05, "loss": 0.0784, "step": 3837 }, { "epoch": 0.64, "grad_norm": 0.7843214869499207, "learning_rate": 1.8277518903599764e-05, "loss": 0.086, "step": 3838 }, { "epoch": 0.64, "grad_norm": 0.7417286038398743, "learning_rate": 1.8276505589143474e-05, "loss": 0.0832, "step": 3839 }, { "epoch": 0.64, "grad_norm": 0.7731792330741882, "learning_rate": 1.827549200482123e-05, "loss": 0.0899, "step": 3840 }, { "epoch": 0.64, "grad_norm": 0.7972432971000671, "learning_rate": 1.8274478150666076e-05, "loss": 0.0828, "step": 3841 }, { "epoch": 0.64, "grad_norm": 1.154313087463379, "learning_rate": 1.827346402671107e-05, "loss": 0.1033, "step": 3842 }, { "epoch": 0.64, "grad_norm": 0.6914381384849548, "learning_rate": 1.8272449632989282e-05, "loss": 0.0855, "step": 3843 }, { "epoch": 0.64, "grad_norm": 0.6853053569793701, "learning_rate": 1.8271434969533785e-05, "loss": 0.0657, "step": 3844 }, { "epoch": 0.64, "grad_norm": 0.5894160866737366, "learning_rate": 1.8270420036377668e-05, "loss": 0.081, "step": 3845 }, { "epoch": 0.64, "grad_norm": 0.5612815022468567, "learning_rate": 1.8269404833554016e-05, "loss": 0.072, "step": 3846 }, { "epoch": 0.64, "grad_norm": 0.9558992385864258, "learning_rate": 1.8268389361095935e-05, "loss": 0.1026, "step": 3847 }, { "epoch": 0.64, "grad_norm": 0.886063277721405, "learning_rate": 1.826737361903654e-05, "loss": 0.0874, "step": 3848 }, { "epoch": 0.64, "grad_norm": 0.9970815181732178, "learning_rate": 1.8266357607408943e-05, "loss": 0.0957, "step": 3849 }, { "epoch": 0.64, "grad_norm": 0.742101788520813, "learning_rate": 1.826534132624628e-05, "loss": 0.0803, "step": 3850 }, { "epoch": 0.64, "grad_norm": 0.8619897961616516, "learning_rate": 1.8264324775581682e-05, "loss": 0.0855, "step": 3851 }, { "epoch": 0.64, "grad_norm": 0.7468652725219727, "learning_rate": 1.8263307955448297e-05, "loss": 0.0742, "step": 3852 }, { "epoch": 0.64, "grad_norm": 1.0059798955917358, "learning_rate": 1.826229086587928e-05, "loss": 0.099, "step": 3853 }, { "epoch": 0.64, "grad_norm": 0.8775355815887451, "learning_rate": 1.8261273506907793e-05, "loss": 0.0933, "step": 3854 }, { "epoch": 0.64, "grad_norm": 1.0701749324798584, "learning_rate": 1.8260255878567014e-05, "loss": 0.1065, "step": 3855 }, { "epoch": 0.64, "grad_norm": 0.7663626074790955, "learning_rate": 1.8259237980890114e-05, "loss": 0.0827, "step": 3856 }, { "epoch": 0.65, "grad_norm": 0.6858493089675903, "learning_rate": 1.8258219813910292e-05, "loss": 0.0814, "step": 3857 }, { "epoch": 0.65, "grad_norm": 0.8656685948371887, "learning_rate": 1.8257201377660744e-05, "loss": 0.0881, "step": 3858 }, { "epoch": 0.65, "grad_norm": 0.8875246047973633, "learning_rate": 1.8256182672174675e-05, "loss": 0.084, "step": 3859 }, { "epoch": 0.65, "grad_norm": 1.1823440790176392, "learning_rate": 1.8255163697485303e-05, "loss": 0.1055, "step": 3860 }, { "epoch": 0.65, "grad_norm": 0.8540641665458679, "learning_rate": 1.825414445362585e-05, "loss": 0.0977, "step": 3861 }, { "epoch": 0.65, "grad_norm": 0.8469303250312805, "learning_rate": 1.8253124940629558e-05, "loss": 0.1062, "step": 3862 }, { "epoch": 0.65, "grad_norm": 0.7623737454414368, "learning_rate": 1.825210515852966e-05, "loss": 0.0879, "step": 3863 }, { "epoch": 0.65, "grad_norm": 0.7595600485801697, "learning_rate": 1.8251085107359415e-05, "loss": 0.0721, "step": 3864 }, { "epoch": 0.65, "grad_norm": 0.6937364935874939, "learning_rate": 1.8250064787152077e-05, "loss": 0.0757, "step": 3865 }, { "epoch": 0.65, "grad_norm": 0.7113355994224548, "learning_rate": 1.8249044197940915e-05, "loss": 0.0874, "step": 3866 }, { "epoch": 0.65, "grad_norm": 0.8179939389228821, "learning_rate": 1.824802333975921e-05, "loss": 0.1085, "step": 3867 }, { "epoch": 0.65, "grad_norm": 0.5656367540359497, "learning_rate": 1.8247002212640248e-05, "loss": 0.0805, "step": 3868 }, { "epoch": 0.65, "grad_norm": 0.8156682848930359, "learning_rate": 1.8245980816617322e-05, "loss": 0.108, "step": 3869 }, { "epoch": 0.65, "grad_norm": 0.984734296798706, "learning_rate": 1.8244959151723737e-05, "loss": 0.0921, "step": 3870 }, { "epoch": 0.65, "grad_norm": 0.9731590151786804, "learning_rate": 1.824393721799281e-05, "loss": 0.0911, "step": 3871 }, { "epoch": 0.65, "grad_norm": 0.7312173247337341, "learning_rate": 1.8242915015457855e-05, "loss": 0.0693, "step": 3872 }, { "epoch": 0.65, "grad_norm": 1.1194839477539062, "learning_rate": 1.8241892544152207e-05, "loss": 0.1093, "step": 3873 }, { "epoch": 0.65, "grad_norm": 0.8114229440689087, "learning_rate": 1.82408698041092e-05, "loss": 0.0909, "step": 3874 }, { "epoch": 0.65, "grad_norm": 0.8069378137588501, "learning_rate": 1.823984679536219e-05, "loss": 0.0832, "step": 3875 }, { "epoch": 0.65, "grad_norm": 0.6739144325256348, "learning_rate": 1.8238823517944525e-05, "loss": 0.0874, "step": 3876 }, { "epoch": 0.65, "grad_norm": 1.0042239427566528, "learning_rate": 1.8237799971889575e-05, "loss": 0.0748, "step": 3877 }, { "epoch": 0.65, "grad_norm": 0.641140878200531, "learning_rate": 1.8236776157230713e-05, "loss": 0.0637, "step": 3878 }, { "epoch": 0.65, "grad_norm": 0.7374155521392822, "learning_rate": 1.8235752074001323e-05, "loss": 0.0882, "step": 3879 }, { "epoch": 0.65, "grad_norm": 0.8084949851036072, "learning_rate": 1.8234727722234796e-05, "loss": 0.0962, "step": 3880 }, { "epoch": 0.65, "grad_norm": 0.8955854773521423, "learning_rate": 1.8233703101964528e-05, "loss": 0.0916, "step": 3881 }, { "epoch": 0.65, "grad_norm": 1.0566610097885132, "learning_rate": 1.8232678213223935e-05, "loss": 0.0777, "step": 3882 }, { "epoch": 0.65, "grad_norm": 0.6558647155761719, "learning_rate": 1.823165305604643e-05, "loss": 0.0749, "step": 3883 }, { "epoch": 0.65, "grad_norm": 0.8650108575820923, "learning_rate": 1.8230627630465442e-05, "loss": 0.1033, "step": 3884 }, { "epoch": 0.65, "grad_norm": 0.6832753419876099, "learning_rate": 1.8229601936514404e-05, "loss": 0.0762, "step": 3885 }, { "epoch": 0.65, "grad_norm": 1.019864797592163, "learning_rate": 1.8228575974226764e-05, "loss": 0.0994, "step": 3886 }, { "epoch": 0.65, "grad_norm": 0.780698835849762, "learning_rate": 1.822754974363597e-05, "loss": 0.0922, "step": 3887 }, { "epoch": 0.65, "grad_norm": 0.7237594723701477, "learning_rate": 1.822652324477548e-05, "loss": 0.0835, "step": 3888 }, { "epoch": 0.65, "grad_norm": 0.8897885084152222, "learning_rate": 1.822549647767878e-05, "loss": 0.0751, "step": 3889 }, { "epoch": 0.65, "grad_norm": 1.0591081380844116, "learning_rate": 1.8224469442379335e-05, "loss": 0.0852, "step": 3890 }, { "epoch": 0.65, "grad_norm": 0.7284974455833435, "learning_rate": 1.8223442138910636e-05, "loss": 0.0908, "step": 3891 }, { "epoch": 0.65, "grad_norm": 1.0503400564193726, "learning_rate": 1.822241456730618e-05, "loss": 0.0912, "step": 3892 }, { "epoch": 0.65, "grad_norm": 0.986082136631012, "learning_rate": 1.8221386727599472e-05, "loss": 0.0831, "step": 3893 }, { "epoch": 0.65, "grad_norm": 0.8946693539619446, "learning_rate": 1.822035861982403e-05, "loss": 0.0883, "step": 3894 }, { "epoch": 0.65, "grad_norm": 0.6821070313453674, "learning_rate": 1.8219330244013372e-05, "loss": 0.0815, "step": 3895 }, { "epoch": 0.65, "grad_norm": 0.8165785074234009, "learning_rate": 1.821830160020103e-05, "loss": 0.0964, "step": 3896 }, { "epoch": 0.65, "grad_norm": 0.7458159923553467, "learning_rate": 1.8217272688420545e-05, "loss": 0.0765, "step": 3897 }, { "epoch": 0.65, "grad_norm": 0.7766512036323547, "learning_rate": 1.821624350870547e-05, "loss": 0.0741, "step": 3898 }, { "epoch": 0.65, "grad_norm": 0.8441063761711121, "learning_rate": 1.821521406108935e-05, "loss": 0.1012, "step": 3899 }, { "epoch": 0.65, "grad_norm": 0.821257472038269, "learning_rate": 1.821418434560577e-05, "loss": 0.0832, "step": 3900 }, { "epoch": 0.65, "grad_norm": 0.9051394462585449, "learning_rate": 1.821315436228829e-05, "loss": 0.0958, "step": 3901 }, { "epoch": 0.65, "grad_norm": 0.6244733929634094, "learning_rate": 1.82121241111705e-05, "loss": 0.0783, "step": 3902 }, { "epoch": 0.65, "grad_norm": 0.8191055059432983, "learning_rate": 1.8211093592285993e-05, "loss": 0.0921, "step": 3903 }, { "epoch": 0.65, "grad_norm": 0.8799372315406799, "learning_rate": 1.8210062805668367e-05, "loss": 0.1053, "step": 3904 }, { "epoch": 0.65, "grad_norm": 0.8845820426940918, "learning_rate": 1.8209031751351238e-05, "loss": 0.0837, "step": 3905 }, { "epoch": 0.65, "grad_norm": 1.2814576625823975, "learning_rate": 1.820800042936822e-05, "loss": 0.0587, "step": 3906 }, { "epoch": 0.65, "grad_norm": 0.8223716616630554, "learning_rate": 1.8206968839752942e-05, "loss": 0.0733, "step": 3907 }, { "epoch": 0.65, "grad_norm": 0.8582982420921326, "learning_rate": 1.820593698253904e-05, "loss": 0.0934, "step": 3908 }, { "epoch": 0.65, "grad_norm": 0.7065194845199585, "learning_rate": 1.820490485776016e-05, "loss": 0.0761, "step": 3909 }, { "epoch": 0.65, "grad_norm": 0.9238486289978027, "learning_rate": 1.820387246544995e-05, "loss": 0.0986, "step": 3910 }, { "epoch": 0.65, "grad_norm": 0.9940495491027832, "learning_rate": 1.8202839805642084e-05, "loss": 0.0953, "step": 3911 }, { "epoch": 0.65, "grad_norm": 1.376036286354065, "learning_rate": 1.8201806878370224e-05, "loss": 0.0948, "step": 3912 }, { "epoch": 0.65, "grad_norm": 2.223284959793091, "learning_rate": 1.820077368366805e-05, "loss": 0.0885, "step": 3913 }, { "epoch": 0.65, "grad_norm": 0.852709174156189, "learning_rate": 1.8199740221569254e-05, "loss": 0.1113, "step": 3914 }, { "epoch": 0.65, "grad_norm": 0.6827889680862427, "learning_rate": 1.8198706492107537e-05, "loss": 0.0756, "step": 3915 }, { "epoch": 0.65, "grad_norm": 1.216532826423645, "learning_rate": 1.8197672495316597e-05, "loss": 0.1102, "step": 3916 }, { "epoch": 0.66, "grad_norm": 1.793662428855896, "learning_rate": 1.819663823123015e-05, "loss": 0.0973, "step": 3917 }, { "epoch": 0.66, "grad_norm": 1.0668561458587646, "learning_rate": 1.819560369988193e-05, "loss": 0.0922, "step": 3918 }, { "epoch": 0.66, "grad_norm": 1.1659960746765137, "learning_rate": 1.819456890130565e-05, "loss": 0.0896, "step": 3919 }, { "epoch": 0.66, "grad_norm": 1.4788130521774292, "learning_rate": 1.819353383553507e-05, "loss": 0.0977, "step": 3920 }, { "epoch": 0.66, "grad_norm": 1.083796501159668, "learning_rate": 1.8192498502603927e-05, "loss": 0.075, "step": 3921 }, { "epoch": 0.66, "grad_norm": 1.0687204599380493, "learning_rate": 1.8191462902545985e-05, "loss": 0.1064, "step": 3922 }, { "epoch": 0.66, "grad_norm": 0.8772405982017517, "learning_rate": 1.819042703539501e-05, "loss": 0.1029, "step": 3923 }, { "epoch": 0.66, "grad_norm": 0.7584906220436096, "learning_rate": 1.818939090118478e-05, "loss": 0.0805, "step": 3924 }, { "epoch": 0.66, "grad_norm": 1.037634015083313, "learning_rate": 1.8188354499949072e-05, "loss": 0.0943, "step": 3925 }, { "epoch": 0.66, "grad_norm": 1.552292823791504, "learning_rate": 1.818731783172169e-05, "loss": 0.1088, "step": 3926 }, { "epoch": 0.66, "grad_norm": 1.2912627458572388, "learning_rate": 1.8186280896536427e-05, "loss": 0.0943, "step": 3927 }, { "epoch": 0.66, "grad_norm": 0.8613605499267578, "learning_rate": 1.8185243694427098e-05, "loss": 0.0882, "step": 3928 }, { "epoch": 0.66, "grad_norm": 0.7910138964653015, "learning_rate": 1.818420622542752e-05, "loss": 0.0879, "step": 3929 }, { "epoch": 0.66, "grad_norm": 1.3951104879379272, "learning_rate": 1.818316848957152e-05, "loss": 0.075, "step": 3930 }, { "epoch": 0.66, "grad_norm": 1.048453450202942, "learning_rate": 1.818213048689294e-05, "loss": 0.1193, "step": 3931 }, { "epoch": 0.66, "grad_norm": 1.117687463760376, "learning_rate": 1.818109221742562e-05, "loss": 0.0893, "step": 3932 }, { "epoch": 0.66, "grad_norm": 0.9273361563682556, "learning_rate": 1.818005368120342e-05, "loss": 0.0964, "step": 3933 }, { "epoch": 0.66, "grad_norm": 0.719890832901001, "learning_rate": 1.8179014878260197e-05, "loss": 0.08, "step": 3934 }, { "epoch": 0.66, "grad_norm": 0.6168434023857117, "learning_rate": 1.8177975808629825e-05, "loss": 0.0744, "step": 3935 }, { "epoch": 0.66, "grad_norm": 1.0397837162017822, "learning_rate": 1.8176936472346182e-05, "loss": 0.0997, "step": 3936 }, { "epoch": 0.66, "grad_norm": 1.0202291011810303, "learning_rate": 1.8175896869443158e-05, "loss": 0.0846, "step": 3937 }, { "epoch": 0.66, "grad_norm": 0.7829870581626892, "learning_rate": 1.8174856999954653e-05, "loss": 0.0723, "step": 3938 }, { "epoch": 0.66, "grad_norm": 0.8342706561088562, "learning_rate": 1.8173816863914574e-05, "loss": 0.096, "step": 3939 }, { "epoch": 0.66, "grad_norm": 0.7504724264144897, "learning_rate": 1.8172776461356828e-05, "loss": 0.0766, "step": 3940 }, { "epoch": 0.66, "grad_norm": 0.8964791297912598, "learning_rate": 1.8171735792315347e-05, "loss": 0.0901, "step": 3941 }, { "epoch": 0.66, "grad_norm": 0.6727593541145325, "learning_rate": 1.817069485682406e-05, "loss": 0.0582, "step": 3942 }, { "epoch": 0.66, "grad_norm": 0.8165672421455383, "learning_rate": 1.8169653654916904e-05, "loss": 0.0979, "step": 3943 }, { "epoch": 0.66, "grad_norm": 0.9242165088653564, "learning_rate": 1.816861218662784e-05, "loss": 0.0651, "step": 3944 }, { "epoch": 0.66, "grad_norm": 0.7292496562004089, "learning_rate": 1.8167570451990814e-05, "loss": 0.0687, "step": 3945 }, { "epoch": 0.66, "grad_norm": 0.9032397866249084, "learning_rate": 1.8166528451039803e-05, "loss": 0.1042, "step": 3946 }, { "epoch": 0.66, "grad_norm": 0.7810202240943909, "learning_rate": 1.8165486183808778e-05, "loss": 0.0918, "step": 3947 }, { "epoch": 0.66, "grad_norm": 0.8330039381980896, "learning_rate": 1.816444365033172e-05, "loss": 0.0647, "step": 3948 }, { "epoch": 0.66, "grad_norm": 1.1690572500228882, "learning_rate": 1.816340085064263e-05, "loss": 0.084, "step": 3949 }, { "epoch": 0.66, "grad_norm": 0.9948681592941284, "learning_rate": 1.8162357784775504e-05, "loss": 0.096, "step": 3950 }, { "epoch": 0.66, "grad_norm": 1.1270859241485596, "learning_rate": 1.8161314452764355e-05, "loss": 0.0973, "step": 3951 }, { "epoch": 0.66, "grad_norm": 0.8124518394470215, "learning_rate": 1.8160270854643197e-05, "loss": 0.0811, "step": 3952 }, { "epoch": 0.66, "grad_norm": 0.7817276120185852, "learning_rate": 1.8159226990446066e-05, "loss": 0.0872, "step": 3953 }, { "epoch": 0.66, "grad_norm": 0.7103145718574524, "learning_rate": 1.8158182860206996e-05, "loss": 0.0848, "step": 3954 }, { "epoch": 0.66, "grad_norm": 0.8702818751335144, "learning_rate": 1.8157138463960027e-05, "loss": 0.0982, "step": 3955 }, { "epoch": 0.66, "grad_norm": 0.6070399880409241, "learning_rate": 1.8156093801739217e-05, "loss": 0.0688, "step": 3956 }, { "epoch": 0.66, "grad_norm": 1.0392966270446777, "learning_rate": 1.815504887357863e-05, "loss": 0.0851, "step": 3957 }, { "epoch": 0.66, "grad_norm": 0.7411405444145203, "learning_rate": 1.815400367951234e-05, "loss": 0.0821, "step": 3958 }, { "epoch": 0.66, "grad_norm": 0.9608147740364075, "learning_rate": 1.8152958219574415e-05, "loss": 0.1132, "step": 3959 }, { "epoch": 0.66, "grad_norm": 0.799111545085907, "learning_rate": 1.8151912493798952e-05, "loss": 0.085, "step": 3960 }, { "epoch": 0.66, "grad_norm": 0.8872697353363037, "learning_rate": 1.815086650222005e-05, "loss": 0.1061, "step": 3961 }, { "epoch": 0.66, "grad_norm": 0.7999774813652039, "learning_rate": 1.814982024487181e-05, "loss": 0.1077, "step": 3962 }, { "epoch": 0.66, "grad_norm": 0.6896386742591858, "learning_rate": 1.814877372178835e-05, "loss": 0.0773, "step": 3963 }, { "epoch": 0.66, "grad_norm": 0.9608442187309265, "learning_rate": 1.814772693300379e-05, "loss": 0.111, "step": 3964 }, { "epoch": 0.66, "grad_norm": 0.6820970177650452, "learning_rate": 1.8146679878552266e-05, "loss": 0.067, "step": 3965 }, { "epoch": 0.66, "grad_norm": 0.8434610962867737, "learning_rate": 1.8145632558467916e-05, "loss": 0.0868, "step": 3966 }, { "epoch": 0.66, "grad_norm": 0.7545525431632996, "learning_rate": 1.814458497278489e-05, "loss": 0.0952, "step": 3967 }, { "epoch": 0.66, "grad_norm": 0.7859029173851013, "learning_rate": 1.814353712153734e-05, "loss": 0.0897, "step": 3968 }, { "epoch": 0.66, "grad_norm": 0.8155562281608582, "learning_rate": 1.8142489004759442e-05, "loss": 0.108, "step": 3969 }, { "epoch": 0.66, "grad_norm": 0.902388870716095, "learning_rate": 1.814144062248537e-05, "loss": 0.0979, "step": 3970 }, { "epoch": 0.66, "grad_norm": 0.778106689453125, "learning_rate": 1.8140391974749295e-05, "loss": 0.0856, "step": 3971 }, { "epoch": 0.66, "grad_norm": 0.8482239246368408, "learning_rate": 1.813934306158543e-05, "loss": 0.0815, "step": 3972 }, { "epoch": 0.66, "grad_norm": 1.1703675985336304, "learning_rate": 1.813829388302796e-05, "loss": 0.0991, "step": 3973 }, { "epoch": 0.66, "grad_norm": 1.1509946584701538, "learning_rate": 1.81372444391111e-05, "loss": 0.0998, "step": 3974 }, { "epoch": 0.66, "grad_norm": 0.9786803722381592, "learning_rate": 1.8136194729869068e-05, "loss": 0.0933, "step": 3975 }, { "epoch": 0.66, "grad_norm": 0.8089635372161865, "learning_rate": 1.8135144755336093e-05, "loss": 0.0898, "step": 3976 }, { "epoch": 0.67, "grad_norm": 0.8298758864402771, "learning_rate": 1.813409451554641e-05, "loss": 0.0867, "step": 3977 }, { "epoch": 0.67, "grad_norm": 0.7732557654380798, "learning_rate": 1.8133044010534262e-05, "loss": 0.1117, "step": 3978 }, { "epoch": 0.67, "grad_norm": 0.7436382174491882, "learning_rate": 1.8131993240333906e-05, "loss": 0.0921, "step": 3979 }, { "epoch": 0.67, "grad_norm": 0.8099895119667053, "learning_rate": 1.8130942204979596e-05, "loss": 0.0837, "step": 3980 }, { "epoch": 0.67, "grad_norm": 0.8153477907180786, "learning_rate": 1.812989090450561e-05, "loss": 0.0976, "step": 3981 }, { "epoch": 0.67, "grad_norm": 0.7950857877731323, "learning_rate": 1.8128839338946222e-05, "loss": 0.0776, "step": 3982 }, { "epoch": 0.67, "grad_norm": 0.7692915797233582, "learning_rate": 1.8127787508335724e-05, "loss": 0.1022, "step": 3983 }, { "epoch": 0.67, "grad_norm": 0.5362192392349243, "learning_rate": 1.8126735412708407e-05, "loss": 0.0702, "step": 3984 }, { "epoch": 0.67, "grad_norm": 0.958634614944458, "learning_rate": 1.8125683052098583e-05, "loss": 0.0889, "step": 3985 }, { "epoch": 0.67, "grad_norm": 0.6421512365341187, "learning_rate": 1.8124630426540555e-05, "loss": 0.0611, "step": 3986 }, { "epoch": 0.67, "grad_norm": 0.7800718545913696, "learning_rate": 1.8123577536068654e-05, "loss": 0.0742, "step": 3987 }, { "epoch": 0.67, "grad_norm": 0.6670809388160706, "learning_rate": 1.8122524380717214e-05, "loss": 0.0656, "step": 3988 }, { "epoch": 0.67, "grad_norm": 1.1518582105636597, "learning_rate": 1.8121470960520563e-05, "loss": 0.0981, "step": 3989 }, { "epoch": 0.67, "grad_norm": 0.823925256729126, "learning_rate": 1.8120417275513055e-05, "loss": 0.0923, "step": 3990 }, { "epoch": 0.67, "grad_norm": 0.5897912979125977, "learning_rate": 1.8119363325729048e-05, "loss": 0.0738, "step": 3991 }, { "epoch": 0.67, "grad_norm": 0.9968577027320862, "learning_rate": 1.8118309111202904e-05, "loss": 0.0958, "step": 3992 }, { "epoch": 0.67, "grad_norm": 1.035353422164917, "learning_rate": 1.8117254631969003e-05, "loss": 0.1019, "step": 3993 }, { "epoch": 0.67, "grad_norm": 1.048319935798645, "learning_rate": 1.811619988806172e-05, "loss": 0.0964, "step": 3994 }, { "epoch": 0.67, "grad_norm": 0.6234295964241028, "learning_rate": 1.8115144879515454e-05, "loss": 0.0833, "step": 3995 }, { "epoch": 0.67, "grad_norm": 0.7965455055236816, "learning_rate": 1.8114089606364595e-05, "loss": 0.0937, "step": 3996 }, { "epoch": 0.67, "grad_norm": 0.7397464513778687, "learning_rate": 1.811303406864356e-05, "loss": 0.0842, "step": 3997 }, { "epoch": 0.67, "grad_norm": 1.4968775510787964, "learning_rate": 1.8111978266386768e-05, "loss": 0.1187, "step": 3998 }, { "epoch": 0.67, "grad_norm": 0.8313891887664795, "learning_rate": 1.8110922199628637e-05, "loss": 0.0861, "step": 3999 }, { "epoch": 0.67, "grad_norm": 0.6796692609786987, "learning_rate": 1.81098658684036e-05, "loss": 0.0882, "step": 4000 }, { "epoch": 0.67, "grad_norm": 0.7622343301773071, "learning_rate": 1.810880927274611e-05, "loss": 0.0836, "step": 4001 }, { "epoch": 0.67, "grad_norm": 0.8164510130882263, "learning_rate": 1.8107752412690608e-05, "loss": 0.0895, "step": 4002 }, { "epoch": 0.67, "grad_norm": 0.8074567914009094, "learning_rate": 1.8106695288271567e-05, "loss": 0.0881, "step": 4003 }, { "epoch": 0.67, "grad_norm": 0.7804201245307922, "learning_rate": 1.8105637899523443e-05, "loss": 0.1118, "step": 4004 }, { "epoch": 0.67, "grad_norm": 0.8026860952377319, "learning_rate": 1.8104580246480724e-05, "loss": 0.0948, "step": 4005 }, { "epoch": 0.67, "grad_norm": 0.8205186128616333, "learning_rate": 1.8103522329177885e-05, "loss": 0.0673, "step": 4006 }, { "epoch": 0.67, "grad_norm": 0.8792144656181335, "learning_rate": 1.810246414764943e-05, "loss": 0.0991, "step": 4007 }, { "epoch": 0.67, "grad_norm": 0.5370821952819824, "learning_rate": 1.8101405701929857e-05, "loss": 0.0561, "step": 4008 }, { "epoch": 0.67, "grad_norm": 0.8125017285346985, "learning_rate": 1.8100346992053684e-05, "loss": 0.0856, "step": 4009 }, { "epoch": 0.67, "grad_norm": 0.8402009010314941, "learning_rate": 1.8099288018055424e-05, "loss": 0.0756, "step": 4010 }, { "epoch": 0.67, "grad_norm": 0.7933691143989563, "learning_rate": 1.809822877996961e-05, "loss": 0.086, "step": 4011 }, { "epoch": 0.67, "grad_norm": 1.0007051229476929, "learning_rate": 1.8097169277830783e-05, "loss": 0.1005, "step": 4012 }, { "epoch": 0.67, "grad_norm": 0.8818656206130981, "learning_rate": 1.8096109511673483e-05, "loss": 0.1027, "step": 4013 }, { "epoch": 0.67, "grad_norm": 0.6375101804733276, "learning_rate": 1.809504948153227e-05, "loss": 0.0736, "step": 4014 }, { "epoch": 0.67, "grad_norm": 0.6185370087623596, "learning_rate": 1.8093989187441706e-05, "loss": 0.0797, "step": 4015 }, { "epoch": 0.67, "grad_norm": 1.310560703277588, "learning_rate": 1.809292862943636e-05, "loss": 0.0804, "step": 4016 }, { "epoch": 0.67, "grad_norm": 0.7516772747039795, "learning_rate": 1.8091867807550816e-05, "loss": 0.0788, "step": 4017 }, { "epoch": 0.67, "grad_norm": 0.7938093543052673, "learning_rate": 1.8090806721819664e-05, "loss": 0.0718, "step": 4018 }, { "epoch": 0.67, "grad_norm": 0.8602066040039062, "learning_rate": 1.8089745372277506e-05, "loss": 0.0704, "step": 4019 }, { "epoch": 0.67, "grad_norm": 0.7467474937438965, "learning_rate": 1.8088683758958938e-05, "loss": 0.0852, "step": 4020 }, { "epoch": 0.67, "grad_norm": 0.8438509702682495, "learning_rate": 1.8087621881898582e-05, "loss": 0.1085, "step": 4021 }, { "epoch": 0.67, "grad_norm": 0.7189898490905762, "learning_rate": 1.8086559741131062e-05, "loss": 0.0923, "step": 4022 }, { "epoch": 0.67, "grad_norm": 0.8183339834213257, "learning_rate": 1.8085497336691012e-05, "loss": 0.0688, "step": 4023 }, { "epoch": 0.67, "grad_norm": 0.7906711101531982, "learning_rate": 1.8084434668613066e-05, "loss": 0.0834, "step": 4024 }, { "epoch": 0.67, "grad_norm": 1.3617950677871704, "learning_rate": 1.8083371736931882e-05, "loss": 0.0925, "step": 4025 }, { "epoch": 0.67, "grad_norm": 0.8401695489883423, "learning_rate": 1.8082308541682113e-05, "loss": 0.0862, "step": 4026 }, { "epoch": 0.67, "grad_norm": 0.6924389600753784, "learning_rate": 1.8081245082898425e-05, "loss": 0.0998, "step": 4027 }, { "epoch": 0.67, "grad_norm": 0.8352039456367493, "learning_rate": 1.80801813606155e-05, "loss": 0.0829, "step": 4028 }, { "epoch": 0.67, "grad_norm": 0.7426961064338684, "learning_rate": 1.8079117374868015e-05, "loss": 0.0958, "step": 4029 }, { "epoch": 0.67, "grad_norm": 0.7409701943397522, "learning_rate": 1.8078053125690664e-05, "loss": 0.0956, "step": 4030 }, { "epoch": 0.67, "grad_norm": 0.784232497215271, "learning_rate": 1.8076988613118152e-05, "loss": 0.1008, "step": 4031 }, { "epoch": 0.67, "grad_norm": 0.7588709592819214, "learning_rate": 1.8075923837185187e-05, "loss": 0.0874, "step": 4032 }, { "epoch": 0.67, "grad_norm": 0.8043075799942017, "learning_rate": 1.8074858797926484e-05, "loss": 0.0953, "step": 4033 }, { "epoch": 0.67, "grad_norm": 0.6946576833724976, "learning_rate": 1.8073793495376774e-05, "loss": 0.0875, "step": 4034 }, { "epoch": 0.67, "grad_norm": 0.6771055459976196, "learning_rate": 1.807272792957079e-05, "loss": 0.0991, "step": 4035 }, { "epoch": 0.68, "grad_norm": 0.7476613521575928, "learning_rate": 1.8071662100543276e-05, "loss": 0.0838, "step": 4036 }, { "epoch": 0.68, "grad_norm": 0.67589271068573, "learning_rate": 1.807059600832899e-05, "loss": 0.0714, "step": 4037 }, { "epoch": 0.68, "grad_norm": 0.6563923358917236, "learning_rate": 1.8069529652962686e-05, "loss": 0.0789, "step": 4038 }, { "epoch": 0.68, "grad_norm": 1.0242018699645996, "learning_rate": 1.8068463034479137e-05, "loss": 0.1166, "step": 4039 }, { "epoch": 0.68, "grad_norm": 0.7498611211776733, "learning_rate": 1.8067396152913125e-05, "loss": 0.0914, "step": 4040 }, { "epoch": 0.68, "grad_norm": 0.6280302405357361, "learning_rate": 1.8066329008299428e-05, "loss": 0.0839, "step": 4041 }, { "epoch": 0.68, "grad_norm": 0.7759609222412109, "learning_rate": 1.8065261600672853e-05, "loss": 0.0692, "step": 4042 }, { "epoch": 0.68, "grad_norm": 0.7989157438278198, "learning_rate": 1.8064193930068193e-05, "loss": 0.0909, "step": 4043 }, { "epoch": 0.68, "grad_norm": 0.7705819010734558, "learning_rate": 1.806312599652027e-05, "loss": 0.0877, "step": 4044 }, { "epoch": 0.68, "grad_norm": 0.6914607882499695, "learning_rate": 1.80620578000639e-05, "loss": 0.0775, "step": 4045 }, { "epoch": 0.68, "grad_norm": 1.0255913734436035, "learning_rate": 1.8060989340733913e-05, "loss": 0.0883, "step": 4046 }, { "epoch": 0.68, "grad_norm": 0.7652072310447693, "learning_rate": 1.8059920618565154e-05, "loss": 0.0951, "step": 4047 }, { "epoch": 0.68, "grad_norm": 0.5533939599990845, "learning_rate": 1.805885163359246e-05, "loss": 0.0632, "step": 4048 }, { "epoch": 0.68, "grad_norm": 0.816518247127533, "learning_rate": 1.8057782385850693e-05, "loss": 0.1117, "step": 4049 }, { "epoch": 0.68, "grad_norm": 0.7945713996887207, "learning_rate": 1.8056712875374713e-05, "loss": 0.0889, "step": 4050 }, { "epoch": 0.68, "grad_norm": 0.8261563777923584, "learning_rate": 1.80556431021994e-05, "loss": 0.0934, "step": 4051 }, { "epoch": 0.68, "grad_norm": 0.8679637908935547, "learning_rate": 1.8054573066359625e-05, "loss": 0.0873, "step": 4052 }, { "epoch": 0.68, "grad_norm": 0.8137367367744446, "learning_rate": 1.805350276789029e-05, "loss": 0.0685, "step": 4053 }, { "epoch": 0.68, "grad_norm": 0.7443971037864685, "learning_rate": 1.8052432206826283e-05, "loss": 0.0883, "step": 4054 }, { "epoch": 0.68, "grad_norm": 0.6741156578063965, "learning_rate": 1.8051361383202514e-05, "loss": 0.0847, "step": 4055 }, { "epoch": 0.68, "grad_norm": 0.7585614919662476, "learning_rate": 1.80502902970539e-05, "loss": 0.0875, "step": 4056 }, { "epoch": 0.68, "grad_norm": 0.8336877822875977, "learning_rate": 1.8049218948415367e-05, "loss": 0.0895, "step": 4057 }, { "epoch": 0.68, "grad_norm": 0.6989441514015198, "learning_rate": 1.8048147337321845e-05, "loss": 0.0785, "step": 4058 }, { "epoch": 0.68, "grad_norm": 0.6494542360305786, "learning_rate": 1.8047075463808276e-05, "loss": 0.0896, "step": 4059 }, { "epoch": 0.68, "grad_norm": 0.624304473400116, "learning_rate": 1.804600332790961e-05, "loss": 0.0596, "step": 4060 }, { "epoch": 0.68, "grad_norm": 0.6983996629714966, "learning_rate": 1.80449309296608e-05, "loss": 0.0958, "step": 4061 }, { "epoch": 0.68, "grad_norm": 0.7289878726005554, "learning_rate": 1.804385826909682e-05, "loss": 0.0879, "step": 4062 }, { "epoch": 0.68, "grad_norm": 1.2474002838134766, "learning_rate": 1.8042785346252647e-05, "loss": 0.061, "step": 4063 }, { "epoch": 0.68, "grad_norm": 0.7111255526542664, "learning_rate": 1.8041712161163255e-05, "loss": 0.0942, "step": 4064 }, { "epoch": 0.68, "grad_norm": 0.5864938497543335, "learning_rate": 1.804063871386365e-05, "loss": 0.0719, "step": 4065 }, { "epoch": 0.68, "grad_norm": 0.6571208238601685, "learning_rate": 1.803956500438882e-05, "loss": 0.0643, "step": 4066 }, { "epoch": 0.68, "grad_norm": 0.7146235704421997, "learning_rate": 1.803849103277378e-05, "loss": 0.0684, "step": 4067 }, { "epoch": 0.68, "grad_norm": 0.7498424053192139, "learning_rate": 1.8037416799053554e-05, "loss": 0.0801, "step": 4068 }, { "epoch": 0.68, "grad_norm": 0.9308764338493347, "learning_rate": 1.8036342303263156e-05, "loss": 0.0814, "step": 4069 }, { "epoch": 0.68, "grad_norm": 0.5944055914878845, "learning_rate": 1.8035267545437633e-05, "loss": 0.0676, "step": 4070 }, { "epoch": 0.68, "grad_norm": 0.760786235332489, "learning_rate": 1.8034192525612027e-05, "loss": 0.0835, "step": 4071 }, { "epoch": 0.68, "grad_norm": 0.7733598351478577, "learning_rate": 1.8033117243821383e-05, "loss": 0.0875, "step": 4072 }, { "epoch": 0.68, "grad_norm": 0.8085383772850037, "learning_rate": 1.803204170010077e-05, "loss": 0.0781, "step": 4073 }, { "epoch": 0.68, "grad_norm": 0.7380790710449219, "learning_rate": 1.8030965894485252e-05, "loss": 0.0931, "step": 4074 }, { "epoch": 0.68, "grad_norm": 0.972216010093689, "learning_rate": 1.8029889827009913e-05, "loss": 0.0856, "step": 4075 }, { "epoch": 0.68, "grad_norm": 0.548033595085144, "learning_rate": 1.8028813497709832e-05, "loss": 0.0545, "step": 4076 }, { "epoch": 0.68, "grad_norm": 1.1663440465927124, "learning_rate": 1.802773690662011e-05, "loss": 0.0827, "step": 4077 }, { "epoch": 0.68, "grad_norm": 0.6259416341781616, "learning_rate": 1.8026660053775847e-05, "loss": 0.0778, "step": 4078 }, { "epoch": 0.68, "grad_norm": 0.8766859173774719, "learning_rate": 1.8025582939212158e-05, "loss": 0.0959, "step": 4079 }, { "epoch": 0.68, "grad_norm": 0.7135632634162903, "learning_rate": 1.8024505562964162e-05, "loss": 0.067, "step": 4080 }, { "epoch": 0.68, "grad_norm": 0.7515193223953247, "learning_rate": 1.802342792506699e-05, "loss": 0.064, "step": 4081 }, { "epoch": 0.68, "grad_norm": 0.9533038139343262, "learning_rate": 1.8022350025555775e-05, "loss": 0.0907, "step": 4082 }, { "epoch": 0.68, "grad_norm": 0.8403437733650208, "learning_rate": 1.8021271864465672e-05, "loss": 0.0687, "step": 4083 }, { "epoch": 0.68, "grad_norm": 0.9335361123085022, "learning_rate": 1.8020193441831825e-05, "loss": 0.0919, "step": 4084 }, { "epoch": 0.68, "grad_norm": 0.6460625529289246, "learning_rate": 1.8019114757689403e-05, "loss": 0.0729, "step": 4085 }, { "epoch": 0.68, "grad_norm": 0.6540259122848511, "learning_rate": 1.8018035812073582e-05, "loss": 0.077, "step": 4086 }, { "epoch": 0.68, "grad_norm": 0.737317681312561, "learning_rate": 1.8016956605019534e-05, "loss": 0.0884, "step": 4087 }, { "epoch": 0.68, "grad_norm": 0.8322553038597107, "learning_rate": 1.8015877136562455e-05, "loss": 0.1016, "step": 4088 }, { "epoch": 0.68, "grad_norm": 0.9371144771575928, "learning_rate": 1.801479740673754e-05, "loss": 0.0983, "step": 4089 }, { "epoch": 0.68, "grad_norm": 0.651607096195221, "learning_rate": 1.801371741557999e-05, "loss": 0.0844, "step": 4090 }, { "epoch": 0.68, "grad_norm": 0.9068033695220947, "learning_rate": 1.801263716312503e-05, "loss": 0.0906, "step": 4091 }, { "epoch": 0.68, "grad_norm": 0.6433637738227844, "learning_rate": 1.801155664940787e-05, "loss": 0.0824, "step": 4092 }, { "epoch": 0.68, "grad_norm": 0.6785829067230225, "learning_rate": 1.801047587446375e-05, "loss": 0.0769, "step": 4093 }, { "epoch": 0.68, "grad_norm": 1.031304121017456, "learning_rate": 1.8009394838327913e-05, "loss": 0.0984, "step": 4094 }, { "epoch": 0.68, "grad_norm": 0.5048850774765015, "learning_rate": 1.80083135410356e-05, "loss": 0.0527, "step": 4095 }, { "epoch": 0.69, "grad_norm": 0.8553940057754517, "learning_rate": 1.8007231982622072e-05, "loss": 0.0964, "step": 4096 }, { "epoch": 0.69, "grad_norm": 0.9840506315231323, "learning_rate": 1.8006150163122595e-05, "loss": 0.1038, "step": 4097 }, { "epoch": 0.69, "grad_norm": 0.6881411671638489, "learning_rate": 1.8005068082572435e-05, "loss": 0.0673, "step": 4098 }, { "epoch": 0.69, "grad_norm": 0.7561087012290955, "learning_rate": 1.8003985741006887e-05, "loss": 0.0814, "step": 4099 }, { "epoch": 0.69, "grad_norm": 0.9930940866470337, "learning_rate": 1.8002903138461235e-05, "loss": 0.091, "step": 4100 }, { "epoch": 0.69, "grad_norm": 0.7394000887870789, "learning_rate": 1.800182027497078e-05, "loss": 0.0817, "step": 4101 }, { "epoch": 0.69, "grad_norm": 0.5826160311698914, "learning_rate": 1.800073715057083e-05, "loss": 0.0581, "step": 4102 }, { "epoch": 0.69, "grad_norm": 0.7085882425308228, "learning_rate": 1.7999653765296702e-05, "loss": 0.0755, "step": 4103 }, { "epoch": 0.69, "grad_norm": 1.4028154611587524, "learning_rate": 1.799857011918372e-05, "loss": 0.0666, "step": 4104 }, { "epoch": 0.69, "grad_norm": 0.6483830809593201, "learning_rate": 1.7997486212267222e-05, "loss": 0.0826, "step": 4105 }, { "epoch": 0.69, "grad_norm": 0.8738676905632019, "learning_rate": 1.7996402044582544e-05, "loss": 0.1025, "step": 4106 }, { "epoch": 0.69, "grad_norm": 1.0314745903015137, "learning_rate": 1.799531761616504e-05, "loss": 0.089, "step": 4107 }, { "epoch": 0.69, "grad_norm": 0.8754022121429443, "learning_rate": 1.7994232927050068e-05, "loss": 0.0982, "step": 4108 }, { "epoch": 0.69, "grad_norm": 0.8188311457633972, "learning_rate": 1.7993147977273e-05, "loss": 0.1002, "step": 4109 }, { "epoch": 0.69, "grad_norm": 0.8477310538291931, "learning_rate": 1.7992062766869206e-05, "loss": 0.0967, "step": 4110 }, { "epoch": 0.69, "grad_norm": 0.9408719539642334, "learning_rate": 1.799097729587407e-05, "loss": 0.0834, "step": 4111 }, { "epoch": 0.69, "grad_norm": 0.807221531867981, "learning_rate": 1.798989156432299e-05, "loss": 0.0923, "step": 4112 }, { "epoch": 0.69, "grad_norm": 1.2525336742401123, "learning_rate": 1.798880557225137e-05, "loss": 0.1143, "step": 4113 }, { "epoch": 0.69, "grad_norm": 0.7468522191047668, "learning_rate": 1.798771931969461e-05, "loss": 0.0785, "step": 4114 }, { "epoch": 0.69, "grad_norm": 0.8285285234451294, "learning_rate": 1.798663280668814e-05, "loss": 0.0871, "step": 4115 }, { "epoch": 0.69, "grad_norm": 0.7509372234344482, "learning_rate": 1.7985546033267377e-05, "loss": 0.1116, "step": 4116 }, { "epoch": 0.69, "grad_norm": 0.6006680727005005, "learning_rate": 1.7984458999467763e-05, "loss": 0.0833, "step": 4117 }, { "epoch": 0.69, "grad_norm": 0.6287937760353088, "learning_rate": 1.7983371705324744e-05, "loss": 0.0829, "step": 4118 }, { "epoch": 0.69, "grad_norm": 0.8767995238304138, "learning_rate": 1.7982284150873765e-05, "loss": 0.0882, "step": 4119 }, { "epoch": 0.69, "grad_norm": 0.9545342326164246, "learning_rate": 1.7981196336150296e-05, "loss": 0.0779, "step": 4120 }, { "epoch": 0.69, "grad_norm": 1.262569785118103, "learning_rate": 1.7980108261189798e-05, "loss": 0.0855, "step": 4121 }, { "epoch": 0.69, "grad_norm": 0.6055977940559387, "learning_rate": 1.7979019926027755e-05, "loss": 0.0857, "step": 4122 }, { "epoch": 0.69, "grad_norm": 0.6832489967346191, "learning_rate": 1.797793133069965e-05, "loss": 0.0752, "step": 4123 }, { "epoch": 0.69, "grad_norm": 0.557838499546051, "learning_rate": 1.797684247524098e-05, "loss": 0.0821, "step": 4124 }, { "epoch": 0.69, "grad_norm": 0.7486187815666199, "learning_rate": 1.797575335968725e-05, "loss": 0.0996, "step": 4125 }, { "epoch": 0.69, "grad_norm": 0.7834559679031372, "learning_rate": 1.7974663984073965e-05, "loss": 0.0615, "step": 4126 }, { "epoch": 0.69, "grad_norm": 0.6582287549972534, "learning_rate": 1.7973574348436655e-05, "loss": 0.074, "step": 4127 }, { "epoch": 0.69, "grad_norm": 0.9467206597328186, "learning_rate": 1.797248445281084e-05, "loss": 0.0892, "step": 4128 }, { "epoch": 0.69, "grad_norm": 0.6730304956436157, "learning_rate": 1.7971394297232066e-05, "loss": 0.0879, "step": 4129 }, { "epoch": 0.69, "grad_norm": 0.7554039359092712, "learning_rate": 1.7970303881735875e-05, "loss": 0.0763, "step": 4130 }, { "epoch": 0.69, "grad_norm": 1.645755648612976, "learning_rate": 1.796921320635782e-05, "loss": 0.0979, "step": 4131 }, { "epoch": 0.69, "grad_norm": 0.868879497051239, "learning_rate": 1.7968122271133463e-05, "loss": 0.1063, "step": 4132 }, { "epoch": 0.69, "grad_norm": 0.8926588892936707, "learning_rate": 1.796703107609838e-05, "loss": 0.0865, "step": 4133 }, { "epoch": 0.69, "grad_norm": 0.646823525428772, "learning_rate": 1.7965939621288147e-05, "loss": 0.0704, "step": 4134 }, { "epoch": 0.69, "grad_norm": 0.6958905458450317, "learning_rate": 1.7964847906738355e-05, "loss": 0.0529, "step": 4135 }, { "epoch": 0.69, "grad_norm": 0.8865640759468079, "learning_rate": 1.7963755932484598e-05, "loss": 0.0954, "step": 4136 }, { "epoch": 0.69, "grad_norm": 0.789632260799408, "learning_rate": 1.796266369856248e-05, "loss": 0.0911, "step": 4137 }, { "epoch": 0.69, "grad_norm": 0.6665111780166626, "learning_rate": 1.796157120500762e-05, "loss": 0.0744, "step": 4138 }, { "epoch": 0.69, "grad_norm": 0.7046594023704529, "learning_rate": 1.7960478451855636e-05, "loss": 0.0727, "step": 4139 }, { "epoch": 0.69, "grad_norm": 0.8717254996299744, "learning_rate": 1.795938543914216e-05, "loss": 0.0888, "step": 4140 }, { "epoch": 0.69, "grad_norm": 0.759017825126648, "learning_rate": 1.795829216690283e-05, "loss": 0.107, "step": 4141 }, { "epoch": 0.69, "grad_norm": 0.7660875916481018, "learning_rate": 1.7957198635173296e-05, "loss": 0.082, "step": 4142 }, { "epoch": 0.69, "grad_norm": 0.9315849542617798, "learning_rate": 1.795610484398921e-05, "loss": 0.0741, "step": 4143 }, { "epoch": 0.69, "grad_norm": 0.6751892566680908, "learning_rate": 1.795501079338624e-05, "loss": 0.0841, "step": 4144 }, { "epoch": 0.69, "grad_norm": 0.9404386281967163, "learning_rate": 1.795391648340006e-05, "loss": 0.0704, "step": 4145 }, { "epoch": 0.69, "grad_norm": 0.9127230048179626, "learning_rate": 1.7952821914066348e-05, "loss": 0.0975, "step": 4146 }, { "epoch": 0.69, "grad_norm": 0.8940105438232422, "learning_rate": 1.7951727085420793e-05, "loss": 0.0685, "step": 4147 }, { "epoch": 0.69, "grad_norm": 0.663806676864624, "learning_rate": 1.7950631997499096e-05, "loss": 0.0832, "step": 4148 }, { "epoch": 0.69, "grad_norm": 0.574532151222229, "learning_rate": 1.7949536650336962e-05, "loss": 0.0675, "step": 4149 }, { "epoch": 0.69, "grad_norm": 0.820823609828949, "learning_rate": 1.7948441043970112e-05, "loss": 0.1055, "step": 4150 }, { "epoch": 0.69, "grad_norm": 0.9909505248069763, "learning_rate": 1.7947345178434256e-05, "loss": 0.0784, "step": 4151 }, { "epoch": 0.69, "grad_norm": 0.5667752027511597, "learning_rate": 1.794624905376514e-05, "loss": 0.0542, "step": 4152 }, { "epoch": 0.69, "grad_norm": 0.7711182832717896, "learning_rate": 1.7945152669998502e-05, "loss": 0.0608, "step": 4153 }, { "epoch": 0.69, "grad_norm": 0.8573163151741028, "learning_rate": 1.7944056027170082e-05, "loss": 0.0853, "step": 4154 }, { "epoch": 0.69, "grad_norm": 0.7672711610794067, "learning_rate": 1.794295912531565e-05, "loss": 0.0855, "step": 4155 }, { "epoch": 0.7, "grad_norm": 0.9854814410209656, "learning_rate": 1.794186196447096e-05, "loss": 0.0938, "step": 4156 }, { "epoch": 0.7, "grad_norm": 0.8143845796585083, "learning_rate": 1.7940764544671795e-05, "loss": 0.0971, "step": 4157 }, { "epoch": 0.7, "grad_norm": 0.7606521844863892, "learning_rate": 1.7939666865953936e-05, "loss": 0.0667, "step": 4158 }, { "epoch": 0.7, "grad_norm": 1.3290523290634155, "learning_rate": 1.793856892835317e-05, "loss": 0.0778, "step": 4159 }, { "epoch": 0.7, "grad_norm": 0.671991229057312, "learning_rate": 1.79374707319053e-05, "loss": 0.0858, "step": 4160 }, { "epoch": 0.7, "grad_norm": 0.6735081672668457, "learning_rate": 1.7936372276646134e-05, "loss": 0.0689, "step": 4161 }, { "epoch": 0.7, "grad_norm": 0.8139294981956482, "learning_rate": 1.7935273562611488e-05, "loss": 0.0964, "step": 4162 }, { "epoch": 0.7, "grad_norm": 0.7484202980995178, "learning_rate": 1.793417458983719e-05, "loss": 0.0884, "step": 4163 }, { "epoch": 0.7, "grad_norm": 0.8097774386405945, "learning_rate": 1.793307535835907e-05, "loss": 0.0725, "step": 4164 }, { "epoch": 0.7, "grad_norm": 0.9018165469169617, "learning_rate": 1.7931975868212968e-05, "loss": 0.075, "step": 4165 }, { "epoch": 0.7, "grad_norm": 0.9040600657463074, "learning_rate": 1.7930876119434735e-05, "loss": 0.0922, "step": 4166 }, { "epoch": 0.7, "grad_norm": 0.7707933783531189, "learning_rate": 1.7929776112060233e-05, "loss": 0.0685, "step": 4167 }, { "epoch": 0.7, "grad_norm": 0.6910591125488281, "learning_rate": 1.792867584612533e-05, "loss": 0.065, "step": 4168 }, { "epoch": 0.7, "grad_norm": 0.9113003015518188, "learning_rate": 1.7927575321665894e-05, "loss": 0.0889, "step": 4169 }, { "epoch": 0.7, "grad_norm": 1.0570473670959473, "learning_rate": 1.7926474538717817e-05, "loss": 0.1076, "step": 4170 }, { "epoch": 0.7, "grad_norm": 0.6549849510192871, "learning_rate": 1.792537349731699e-05, "loss": 0.0617, "step": 4171 }, { "epoch": 0.7, "grad_norm": 0.8204537034034729, "learning_rate": 1.792427219749931e-05, "loss": 0.0749, "step": 4172 }, { "epoch": 0.7, "grad_norm": 0.6717638373374939, "learning_rate": 1.792317063930069e-05, "loss": 0.0757, "step": 4173 }, { "epoch": 0.7, "grad_norm": 0.6705218553543091, "learning_rate": 1.7922068822757046e-05, "loss": 0.0592, "step": 4174 }, { "epoch": 0.7, "grad_norm": 0.7073222398757935, "learning_rate": 1.7920966747904302e-05, "loss": 0.0768, "step": 4175 }, { "epoch": 0.7, "grad_norm": 0.762432336807251, "learning_rate": 1.79198644147784e-05, "loss": 0.1137, "step": 4176 }, { "epoch": 0.7, "grad_norm": 0.83664470911026, "learning_rate": 1.7918761823415272e-05, "loss": 0.0961, "step": 4177 }, { "epoch": 0.7, "grad_norm": 0.9251970052719116, "learning_rate": 1.791765897385088e-05, "loss": 0.1136, "step": 4178 }, { "epoch": 0.7, "grad_norm": 1.0511549711227417, "learning_rate": 1.7916555866121178e-05, "loss": 0.108, "step": 4179 }, { "epoch": 0.7, "grad_norm": 0.7342883944511414, "learning_rate": 1.7915452500262135e-05, "loss": 0.0784, "step": 4180 }, { "epoch": 0.7, "grad_norm": 0.6737179756164551, "learning_rate": 1.7914348876309726e-05, "loss": 0.0907, "step": 4181 }, { "epoch": 0.7, "grad_norm": 0.6534196734428406, "learning_rate": 1.791324499429994e-05, "loss": 0.0705, "step": 4182 }, { "epoch": 0.7, "grad_norm": 0.5756143927574158, "learning_rate": 1.791214085426877e-05, "loss": 0.0981, "step": 4183 }, { "epoch": 0.7, "grad_norm": 0.5905675888061523, "learning_rate": 1.791103645625221e-05, "loss": 0.0592, "step": 4184 }, { "epoch": 0.7, "grad_norm": 0.8146266937255859, "learning_rate": 1.7909931800286284e-05, "loss": 0.1029, "step": 4185 }, { "epoch": 0.7, "grad_norm": 0.88271164894104, "learning_rate": 1.7908826886407e-05, "loss": 0.0913, "step": 4186 }, { "epoch": 0.7, "grad_norm": 1.0114964246749878, "learning_rate": 1.790772171465039e-05, "loss": 0.1025, "step": 4187 }, { "epoch": 0.7, "grad_norm": 0.5984085202217102, "learning_rate": 1.790661628505249e-05, "loss": 0.0923, "step": 4188 }, { "epoch": 0.7, "grad_norm": 0.7391047477722168, "learning_rate": 1.7905510597649338e-05, "loss": 0.084, "step": 4189 }, { "epoch": 0.7, "grad_norm": 0.6331743001937866, "learning_rate": 1.790440465247699e-05, "loss": 0.0716, "step": 4190 }, { "epoch": 0.7, "grad_norm": 1.1032447814941406, "learning_rate": 1.790329844957151e-05, "loss": 0.1108, "step": 4191 }, { "epoch": 0.7, "grad_norm": 0.6802269816398621, "learning_rate": 1.7902191988968962e-05, "loss": 0.078, "step": 4192 }, { "epoch": 0.7, "grad_norm": 0.6672481894493103, "learning_rate": 1.7901085270705427e-05, "loss": 0.0786, "step": 4193 }, { "epoch": 0.7, "grad_norm": 0.8522356152534485, "learning_rate": 1.789997829481699e-05, "loss": 0.0955, "step": 4194 }, { "epoch": 0.7, "grad_norm": 0.7461534142494202, "learning_rate": 1.7898871061339744e-05, "loss": 0.0882, "step": 4195 }, { "epoch": 0.7, "grad_norm": 0.7267016768455505, "learning_rate": 1.7897763570309793e-05, "loss": 0.0766, "step": 4196 }, { "epoch": 0.7, "grad_norm": 0.7617211937904358, "learning_rate": 1.789665582176325e-05, "loss": 0.0851, "step": 4197 }, { "epoch": 0.7, "grad_norm": 0.7179238200187683, "learning_rate": 1.789554781573623e-05, "loss": 0.079, "step": 4198 }, { "epoch": 0.7, "grad_norm": 0.8141071200370789, "learning_rate": 1.7894439552264864e-05, "loss": 0.0874, "step": 4199 }, { "epoch": 0.7, "grad_norm": 0.8341511487960815, "learning_rate": 1.789333103138529e-05, "loss": 0.0802, "step": 4200 }, { "epoch": 0.7, "grad_norm": 0.7191738486289978, "learning_rate": 1.7892222253133646e-05, "loss": 0.0797, "step": 4201 }, { "epoch": 0.7, "grad_norm": 0.6800904870033264, "learning_rate": 1.7891113217546095e-05, "loss": 0.0667, "step": 4202 }, { "epoch": 0.7, "grad_norm": 0.8715111613273621, "learning_rate": 1.789000392465879e-05, "loss": 0.0837, "step": 4203 }, { "epoch": 0.7, "grad_norm": 1.0487924814224243, "learning_rate": 1.7888894374507904e-05, "loss": 0.0879, "step": 4204 }, { "epoch": 0.7, "grad_norm": 0.7772823572158813, "learning_rate": 1.7887784567129617e-05, "loss": 0.087, "step": 4205 }, { "epoch": 0.7, "grad_norm": 0.8611441254615784, "learning_rate": 1.788667450256011e-05, "loss": 0.0848, "step": 4206 }, { "epoch": 0.7, "grad_norm": 1.1960996389389038, "learning_rate": 1.7885564180835587e-05, "loss": 0.0932, "step": 4207 }, { "epoch": 0.7, "grad_norm": 0.6413004994392395, "learning_rate": 1.7884453601992244e-05, "loss": 0.0741, "step": 4208 }, { "epoch": 0.7, "grad_norm": 0.7163740396499634, "learning_rate": 1.7883342766066295e-05, "loss": 0.0815, "step": 4209 }, { "epoch": 0.7, "grad_norm": 0.9188684225082397, "learning_rate": 1.788223167309396e-05, "loss": 0.0978, "step": 4210 }, { "epoch": 0.7, "grad_norm": 0.7816643118858337, "learning_rate": 1.788112032311147e-05, "loss": 0.097, "step": 4211 }, { "epoch": 0.7, "grad_norm": 0.5490330457687378, "learning_rate": 1.788000871615506e-05, "loss": 0.0591, "step": 4212 }, { "epoch": 0.7, "grad_norm": 0.8989414572715759, "learning_rate": 1.7878896852260975e-05, "loss": 0.0878, "step": 4213 }, { "epoch": 0.7, "grad_norm": 0.7192882895469666, "learning_rate": 1.7877784731465473e-05, "loss": 0.0903, "step": 4214 }, { "epoch": 0.7, "grad_norm": 1.1332581043243408, "learning_rate": 1.787667235380481e-05, "loss": 0.1221, "step": 4215 }, { "epoch": 0.71, "grad_norm": 0.9195358157157898, "learning_rate": 1.7875559719315256e-05, "loss": 0.1022, "step": 4216 }, { "epoch": 0.71, "grad_norm": 0.7074797749519348, "learning_rate": 1.7874446828033096e-05, "loss": 0.0746, "step": 4217 }, { "epoch": 0.71, "grad_norm": 0.7290644645690918, "learning_rate": 1.7873333679994612e-05, "loss": 0.096, "step": 4218 }, { "epoch": 0.71, "grad_norm": 0.7451256513595581, "learning_rate": 1.78722202752361e-05, "loss": 0.0731, "step": 4219 }, { "epoch": 0.71, "grad_norm": 0.8790632486343384, "learning_rate": 1.7871106613793866e-05, "loss": 0.0734, "step": 4220 }, { "epoch": 0.71, "grad_norm": 0.7517185211181641, "learning_rate": 1.7869992695704224e-05, "loss": 0.0706, "step": 4221 }, { "epoch": 0.71, "grad_norm": 0.6577708721160889, "learning_rate": 1.7868878521003495e-05, "loss": 0.0819, "step": 4222 }, { "epoch": 0.71, "grad_norm": 1.4519531726837158, "learning_rate": 1.7867764089728003e-05, "loss": 0.1225, "step": 4223 }, { "epoch": 0.71, "grad_norm": 0.8022634387016296, "learning_rate": 1.7866649401914084e-05, "loss": 0.0909, "step": 4224 }, { "epoch": 0.71, "grad_norm": 0.6366205215454102, "learning_rate": 1.786553445759809e-05, "loss": 0.0676, "step": 4225 }, { "epoch": 0.71, "grad_norm": 1.0497602224349976, "learning_rate": 1.7864419256816378e-05, "loss": 0.1005, "step": 4226 }, { "epoch": 0.71, "grad_norm": 0.8521907329559326, "learning_rate": 1.7863303799605303e-05, "loss": 0.0856, "step": 4227 }, { "epoch": 0.71, "grad_norm": 0.6298814415931702, "learning_rate": 1.7862188086001238e-05, "loss": 0.0785, "step": 4228 }, { "epoch": 0.71, "grad_norm": 0.9203845262527466, "learning_rate": 1.786107211604056e-05, "loss": 0.0915, "step": 4229 }, { "epoch": 0.71, "grad_norm": 0.789274275302887, "learning_rate": 1.7859955889759663e-05, "loss": 0.0923, "step": 4230 }, { "epoch": 0.71, "grad_norm": 0.6439633965492249, "learning_rate": 1.7858839407194937e-05, "loss": 0.0952, "step": 4231 }, { "epoch": 0.71, "grad_norm": 0.6939886212348938, "learning_rate": 1.785772266838279e-05, "loss": 0.0693, "step": 4232 }, { "epoch": 0.71, "grad_norm": 0.8802188038825989, "learning_rate": 1.7856605673359634e-05, "loss": 0.0927, "step": 4233 }, { "epoch": 0.71, "grad_norm": 0.691624104976654, "learning_rate": 1.7855488422161888e-05, "loss": 0.0701, "step": 4234 }, { "epoch": 0.71, "grad_norm": 0.8836978077888489, "learning_rate": 1.7854370914825986e-05, "loss": 0.098, "step": 4235 }, { "epoch": 0.71, "grad_norm": 0.7101722955703735, "learning_rate": 1.7853253151388358e-05, "loss": 0.0735, "step": 4236 }, { "epoch": 0.71, "grad_norm": 0.6947230100631714, "learning_rate": 1.7852135131885456e-05, "loss": 0.0933, "step": 4237 }, { "epoch": 0.71, "grad_norm": 1.3817358016967773, "learning_rate": 1.7851016856353735e-05, "loss": 0.0907, "step": 4238 }, { "epoch": 0.71, "grad_norm": 0.9711581468582153, "learning_rate": 1.7849898324829652e-05, "loss": 0.0882, "step": 4239 }, { "epoch": 0.71, "grad_norm": 0.6445136070251465, "learning_rate": 1.7848779537349682e-05, "loss": 0.0678, "step": 4240 }, { "epoch": 0.71, "grad_norm": 1.121314525604248, "learning_rate": 1.7847660493950308e-05, "loss": 0.083, "step": 4241 }, { "epoch": 0.71, "grad_norm": 0.6954829096794128, "learning_rate": 1.7846541194668017e-05, "loss": 0.0758, "step": 4242 }, { "epoch": 0.71, "grad_norm": 0.7367213368415833, "learning_rate": 1.78454216395393e-05, "loss": 0.0688, "step": 4243 }, { "epoch": 0.71, "grad_norm": 1.0273762941360474, "learning_rate": 1.7844301828600657e-05, "loss": 0.0937, "step": 4244 }, { "epoch": 0.71, "grad_norm": 0.947221040725708, "learning_rate": 1.7843181761888616e-05, "loss": 0.0891, "step": 4245 }, { "epoch": 0.71, "grad_norm": 0.8315688967704773, "learning_rate": 1.7842061439439687e-05, "loss": 0.0983, "step": 4246 }, { "epoch": 0.71, "grad_norm": 0.6600057482719421, "learning_rate": 1.7840940861290402e-05, "loss": 0.0841, "step": 4247 }, { "epoch": 0.71, "grad_norm": 0.7594487071037292, "learning_rate": 1.78398200274773e-05, "loss": 0.0961, "step": 4248 }, { "epoch": 0.71, "grad_norm": 0.8832510709762573, "learning_rate": 1.7838698938036927e-05, "loss": 0.0782, "step": 4249 }, { "epoch": 0.71, "grad_norm": 0.6404479742050171, "learning_rate": 1.783757759300584e-05, "loss": 0.0824, "step": 4250 }, { "epoch": 0.71, "grad_norm": 0.6738224029541016, "learning_rate": 1.7836455992420597e-05, "loss": 0.0781, "step": 4251 }, { "epoch": 0.71, "grad_norm": 0.6429944038391113, "learning_rate": 1.7835334136317767e-05, "loss": 0.0774, "step": 4252 }, { "epoch": 0.71, "grad_norm": 0.7775359749794006, "learning_rate": 1.783421202473394e-05, "loss": 0.0776, "step": 4253 }, { "epoch": 0.71, "grad_norm": 1.1407397985458374, "learning_rate": 1.7833089657705695e-05, "loss": 0.081, "step": 4254 }, { "epoch": 0.71, "grad_norm": 0.687465250492096, "learning_rate": 1.783196703526963e-05, "loss": 0.0812, "step": 4255 }, { "epoch": 0.71, "grad_norm": 0.908755362033844, "learning_rate": 1.7830844157462353e-05, "loss": 0.084, "step": 4256 }, { "epoch": 0.71, "grad_norm": 0.8112311363220215, "learning_rate": 1.782972102432047e-05, "loss": 0.0717, "step": 4257 }, { "epoch": 0.71, "grad_norm": 0.8647748231887817, "learning_rate": 1.7828597635880613e-05, "loss": 0.0903, "step": 4258 }, { "epoch": 0.71, "grad_norm": 0.7624556422233582, "learning_rate": 1.78274739921794e-05, "loss": 0.0908, "step": 4259 }, { "epoch": 0.71, "grad_norm": 0.7439150810241699, "learning_rate": 1.7826350093253475e-05, "loss": 0.0894, "step": 4260 }, { "epoch": 0.71, "grad_norm": 0.7786937355995178, "learning_rate": 1.7825225939139485e-05, "loss": 0.0903, "step": 4261 }, { "epoch": 0.71, "grad_norm": 0.6679126024246216, "learning_rate": 1.7824101529874078e-05, "loss": 0.0782, "step": 4262 }, { "epoch": 0.71, "grad_norm": 0.9311540722846985, "learning_rate": 1.7822976865493925e-05, "loss": 0.0882, "step": 4263 }, { "epoch": 0.71, "grad_norm": 0.7756119966506958, "learning_rate": 1.7821851946035692e-05, "loss": 0.0904, "step": 4264 }, { "epoch": 0.71, "grad_norm": 0.854789674282074, "learning_rate": 1.782072677153606e-05, "loss": 0.0954, "step": 4265 }, { "epoch": 0.71, "grad_norm": 0.9039271473884583, "learning_rate": 1.7819601342031715e-05, "loss": 0.0782, "step": 4266 }, { "epoch": 0.71, "grad_norm": 0.9536340236663818, "learning_rate": 1.7818475657559355e-05, "loss": 0.0781, "step": 4267 }, { "epoch": 0.71, "grad_norm": 1.0733896493911743, "learning_rate": 1.7817349718155685e-05, "loss": 0.0992, "step": 4268 }, { "epoch": 0.71, "grad_norm": 1.1993911266326904, "learning_rate": 1.7816223523857416e-05, "loss": 0.1053, "step": 4269 }, { "epoch": 0.71, "grad_norm": 0.8556056618690491, "learning_rate": 1.781509707470127e-05, "loss": 0.0863, "step": 4270 }, { "epoch": 0.71, "grad_norm": 0.5967483520507812, "learning_rate": 1.7813970370723973e-05, "loss": 0.0586, "step": 4271 }, { "epoch": 0.71, "grad_norm": 0.9077247977256775, "learning_rate": 1.781284341196227e-05, "loss": 0.083, "step": 4272 }, { "epoch": 0.71, "grad_norm": 0.6172086000442505, "learning_rate": 1.78117161984529e-05, "loss": 0.0747, "step": 4273 }, { "epoch": 0.71, "grad_norm": 0.9361357092857361, "learning_rate": 1.7810588730232622e-05, "loss": 0.1018, "step": 4274 }, { "epoch": 0.72, "grad_norm": 0.5863590836524963, "learning_rate": 1.7809461007338195e-05, "loss": 0.0749, "step": 4275 }, { "epoch": 0.72, "grad_norm": 0.6474326848983765, "learning_rate": 1.780833302980639e-05, "loss": 0.0945, "step": 4276 }, { "epoch": 0.72, "grad_norm": 0.7312130331993103, "learning_rate": 1.7807204797673985e-05, "loss": 0.0806, "step": 4277 }, { "epoch": 0.72, "grad_norm": 0.6976248025894165, "learning_rate": 1.7806076310977772e-05, "loss": 0.0867, "step": 4278 }, { "epoch": 0.72, "grad_norm": 0.9573804140090942, "learning_rate": 1.7804947569754545e-05, "loss": 0.0879, "step": 4279 }, { "epoch": 0.72, "grad_norm": 0.7058680653572083, "learning_rate": 1.7803818574041106e-05, "loss": 0.08, "step": 4280 }, { "epoch": 0.72, "grad_norm": 0.6460515856742859, "learning_rate": 1.780268932387427e-05, "loss": 0.0698, "step": 4281 }, { "epoch": 0.72, "grad_norm": 0.7304143905639648, "learning_rate": 1.7801559819290853e-05, "loss": 0.0945, "step": 4282 }, { "epoch": 0.72, "grad_norm": 0.9926034808158875, "learning_rate": 1.780043006032769e-05, "loss": 0.0999, "step": 4283 }, { "epoch": 0.72, "grad_norm": 0.7030094265937805, "learning_rate": 1.779930004702162e-05, "loss": 0.0946, "step": 4284 }, { "epoch": 0.72, "grad_norm": 0.6013909578323364, "learning_rate": 1.7798169779409476e-05, "loss": 0.0696, "step": 4285 }, { "epoch": 0.72, "grad_norm": 0.8153520822525024, "learning_rate": 1.7797039257528122e-05, "loss": 0.0827, "step": 4286 }, { "epoch": 0.72, "grad_norm": 0.7505491971969604, "learning_rate": 1.779590848141442e-05, "loss": 0.0921, "step": 4287 }, { "epoch": 0.72, "grad_norm": 0.751103401184082, "learning_rate": 1.7794777451105236e-05, "loss": 0.0734, "step": 4288 }, { "epoch": 0.72, "grad_norm": 0.7288132309913635, "learning_rate": 1.7793646166637453e-05, "loss": 0.0856, "step": 4289 }, { "epoch": 0.72, "grad_norm": 0.7900828719139099, "learning_rate": 1.7792514628047955e-05, "loss": 0.0937, "step": 4290 }, { "epoch": 0.72, "grad_norm": 0.663625955581665, "learning_rate": 1.779138283537364e-05, "loss": 0.0713, "step": 4291 }, { "epoch": 0.72, "grad_norm": 0.6985719799995422, "learning_rate": 1.7790250788651406e-05, "loss": 0.0639, "step": 4292 }, { "epoch": 0.72, "grad_norm": 0.9676957130432129, "learning_rate": 1.7789118487918172e-05, "loss": 0.0797, "step": 4293 }, { "epoch": 0.72, "grad_norm": 0.8075591921806335, "learning_rate": 1.7787985933210854e-05, "loss": 0.0878, "step": 4294 }, { "epoch": 0.72, "grad_norm": 0.8140998482704163, "learning_rate": 1.778685312456638e-05, "loss": 0.0831, "step": 4295 }, { "epoch": 0.72, "grad_norm": 0.9153005480766296, "learning_rate": 1.7785720062021687e-05, "loss": 0.0948, "step": 4296 }, { "epoch": 0.72, "grad_norm": 0.5601939558982849, "learning_rate": 1.7784586745613722e-05, "loss": 0.0835, "step": 4297 }, { "epoch": 0.72, "grad_norm": 0.7195088267326355, "learning_rate": 1.7783453175379437e-05, "loss": 0.0892, "step": 4298 }, { "epoch": 0.72, "grad_norm": 0.9249626994132996, "learning_rate": 1.778231935135579e-05, "loss": 0.0836, "step": 4299 }, { "epoch": 0.72, "grad_norm": 1.1201139688491821, "learning_rate": 1.778118527357976e-05, "loss": 0.0878, "step": 4300 }, { "epoch": 0.72, "grad_norm": 0.7368614077568054, "learning_rate": 1.7780050942088315e-05, "loss": 0.0904, "step": 4301 }, { "epoch": 0.72, "grad_norm": 0.6277403235435486, "learning_rate": 1.7778916356918444e-05, "loss": 0.068, "step": 4302 }, { "epoch": 0.72, "grad_norm": 0.7740164995193481, "learning_rate": 1.777778151810715e-05, "loss": 0.0913, "step": 4303 }, { "epoch": 0.72, "grad_norm": 0.6039140820503235, "learning_rate": 1.777664642569142e-05, "loss": 0.0734, "step": 4304 }, { "epoch": 0.72, "grad_norm": 0.5967577695846558, "learning_rate": 1.777551107970828e-05, "loss": 0.0814, "step": 4305 }, { "epoch": 0.72, "grad_norm": 1.1092010736465454, "learning_rate": 1.7774375480194743e-05, "loss": 0.0888, "step": 4306 }, { "epoch": 0.72, "grad_norm": 0.793796181678772, "learning_rate": 1.7773239627187834e-05, "loss": 0.0922, "step": 4307 }, { "epoch": 0.72, "grad_norm": 1.8282010555267334, "learning_rate": 1.7772103520724594e-05, "loss": 0.1136, "step": 4308 }, { "epoch": 0.72, "grad_norm": 0.731647253036499, "learning_rate": 1.7770967160842066e-05, "loss": 0.0729, "step": 4309 }, { "epoch": 0.72, "grad_norm": 1.2548189163208008, "learning_rate": 1.77698305475773e-05, "loss": 0.0928, "step": 4310 }, { "epoch": 0.72, "grad_norm": 0.9154706597328186, "learning_rate": 1.776869368096736e-05, "loss": 0.1093, "step": 4311 }, { "epoch": 0.72, "grad_norm": 0.9578049778938293, "learning_rate": 1.7767556561049314e-05, "loss": 0.1059, "step": 4312 }, { "epoch": 0.72, "grad_norm": 0.7358598709106445, "learning_rate": 1.7766419187860234e-05, "loss": 0.0913, "step": 4313 }, { "epoch": 0.72, "grad_norm": 0.6467428207397461, "learning_rate": 1.7765281561437214e-05, "loss": 0.0639, "step": 4314 }, { "epoch": 0.72, "grad_norm": 0.5938075184822083, "learning_rate": 1.7764143681817344e-05, "loss": 0.0654, "step": 4315 }, { "epoch": 0.72, "grad_norm": 0.7362730503082275, "learning_rate": 1.7763005549037723e-05, "loss": 0.0771, "step": 4316 }, { "epoch": 0.72, "grad_norm": 0.9596080183982849, "learning_rate": 1.776186716313547e-05, "loss": 0.0718, "step": 4317 }, { "epoch": 0.72, "grad_norm": 0.6102712750434875, "learning_rate": 1.7760728524147692e-05, "loss": 0.0755, "step": 4318 }, { "epoch": 0.72, "grad_norm": 0.9331917762756348, "learning_rate": 1.7759589632111522e-05, "loss": 0.0722, "step": 4319 }, { "epoch": 0.72, "grad_norm": 0.9318839311599731, "learning_rate": 1.7758450487064097e-05, "loss": 0.0981, "step": 4320 }, { "epoch": 0.72, "grad_norm": 0.5146337747573853, "learning_rate": 1.7757311089042553e-05, "loss": 0.0708, "step": 4321 }, { "epoch": 0.72, "grad_norm": 0.9708269238471985, "learning_rate": 1.7756171438084053e-05, "loss": 0.0848, "step": 4322 }, { "epoch": 0.72, "grad_norm": 0.7749739289283752, "learning_rate": 1.7755031534225743e-05, "loss": 0.0723, "step": 4323 }, { "epoch": 0.72, "grad_norm": 1.077685832977295, "learning_rate": 1.7753891377504805e-05, "loss": 0.1033, "step": 4324 }, { "epoch": 0.72, "grad_norm": 0.8548460602760315, "learning_rate": 1.7752750967958404e-05, "loss": 0.0904, "step": 4325 }, { "epoch": 0.72, "grad_norm": 0.5973462462425232, "learning_rate": 1.775161030562373e-05, "loss": 0.0786, "step": 4326 }, { "epoch": 0.72, "grad_norm": 0.6002786755561829, "learning_rate": 1.775046939053797e-05, "loss": 0.0726, "step": 4327 }, { "epoch": 0.72, "grad_norm": 1.0907750129699707, "learning_rate": 1.7749328222738335e-05, "loss": 0.0937, "step": 4328 }, { "epoch": 0.72, "grad_norm": 0.7014174461364746, "learning_rate": 1.7748186802262027e-05, "loss": 0.0808, "step": 4329 }, { "epoch": 0.72, "grad_norm": 0.8986218571662903, "learning_rate": 1.7747045129146264e-05, "loss": 0.0944, "step": 4330 }, { "epoch": 0.72, "grad_norm": 0.7950147986412048, "learning_rate": 1.7745903203428275e-05, "loss": 0.0965, "step": 4331 }, { "epoch": 0.72, "grad_norm": 0.668360710144043, "learning_rate": 1.774476102514529e-05, "loss": 0.0753, "step": 4332 }, { "epoch": 0.72, "grad_norm": 0.6859085559844971, "learning_rate": 1.7743618594334555e-05, "loss": 0.0944, "step": 4333 }, { "epoch": 0.72, "grad_norm": 0.8289254307746887, "learning_rate": 1.7742475911033317e-05, "loss": 0.0875, "step": 4334 }, { "epoch": 0.73, "grad_norm": 0.7785412073135376, "learning_rate": 1.7741332975278835e-05, "loss": 0.0699, "step": 4335 }, { "epoch": 0.73, "grad_norm": 0.6298400163650513, "learning_rate": 1.7740189787108378e-05, "loss": 0.0671, "step": 4336 }, { "epoch": 0.73, "grad_norm": 0.7304459810256958, "learning_rate": 1.7739046346559217e-05, "loss": 0.0673, "step": 4337 }, { "epoch": 0.73, "grad_norm": 0.9086406230926514, "learning_rate": 1.773790265366864e-05, "loss": 0.0844, "step": 4338 }, { "epoch": 0.73, "grad_norm": 0.712120532989502, "learning_rate": 1.773675870847394e-05, "loss": 0.0848, "step": 4339 }, { "epoch": 0.73, "grad_norm": 0.6818220019340515, "learning_rate": 1.7735614511012414e-05, "loss": 0.0803, "step": 4340 }, { "epoch": 0.73, "grad_norm": 0.714678943157196, "learning_rate": 1.7734470061321366e-05, "loss": 0.0828, "step": 4341 }, { "epoch": 0.73, "grad_norm": 0.7829672694206238, "learning_rate": 1.7733325359438116e-05, "loss": 0.1078, "step": 4342 }, { "epoch": 0.73, "grad_norm": 0.9295474886894226, "learning_rate": 1.773218040539999e-05, "loss": 0.0905, "step": 4343 }, { "epoch": 0.73, "grad_norm": 0.7786834836006165, "learning_rate": 1.7731035199244317e-05, "loss": 0.0778, "step": 4344 }, { "epoch": 0.73, "grad_norm": 0.7519257664680481, "learning_rate": 1.7729889741008442e-05, "loss": 0.0817, "step": 4345 }, { "epoch": 0.73, "grad_norm": 0.7021132111549377, "learning_rate": 1.772874403072971e-05, "loss": 0.0884, "step": 4346 }, { "epoch": 0.73, "grad_norm": 1.0368558168411255, "learning_rate": 1.772759806844548e-05, "loss": 0.1115, "step": 4347 }, { "epoch": 0.73, "grad_norm": 0.767895519733429, "learning_rate": 1.7726451854193123e-05, "loss": 0.0656, "step": 4348 }, { "epoch": 0.73, "grad_norm": 0.7154362797737122, "learning_rate": 1.7725305388010003e-05, "loss": 0.0834, "step": 4349 }, { "epoch": 0.73, "grad_norm": 0.8615300059318542, "learning_rate": 1.7724158669933507e-05, "loss": 0.1018, "step": 4350 }, { "epoch": 0.73, "grad_norm": 0.7757440805435181, "learning_rate": 1.7723011700001028e-05, "loss": 0.09, "step": 4351 }, { "epoch": 0.73, "grad_norm": 0.7014903426170349, "learning_rate": 1.7721864478249956e-05, "loss": 0.0851, "step": 4352 }, { "epoch": 0.73, "grad_norm": 0.9040984511375427, "learning_rate": 1.7720717004717704e-05, "loss": 0.0676, "step": 4353 }, { "epoch": 0.73, "grad_norm": 0.9159500598907471, "learning_rate": 1.771956927944169e-05, "loss": 0.1119, "step": 4354 }, { "epoch": 0.73, "grad_norm": 0.6909576058387756, "learning_rate": 1.7718421302459328e-05, "loss": 0.0772, "step": 4355 }, { "epoch": 0.73, "grad_norm": 0.7586302757263184, "learning_rate": 1.7717273073808056e-05, "loss": 0.0768, "step": 4356 }, { "epoch": 0.73, "grad_norm": 0.7794085144996643, "learning_rate": 1.7716124593525314e-05, "loss": 0.0891, "step": 4357 }, { "epoch": 0.73, "grad_norm": 0.6560558080673218, "learning_rate": 1.7714975861648545e-05, "loss": 0.046, "step": 4358 }, { "epoch": 0.73, "grad_norm": 0.7158491611480713, "learning_rate": 1.771382687821521e-05, "loss": 0.084, "step": 4359 }, { "epoch": 0.73, "grad_norm": 0.9678618907928467, "learning_rate": 1.7712677643262764e-05, "loss": 0.102, "step": 4360 }, { "epoch": 0.73, "grad_norm": 0.7886837720870972, "learning_rate": 1.7711528156828687e-05, "loss": 0.1023, "step": 4361 }, { "epoch": 0.73, "grad_norm": 0.9796044826507568, "learning_rate": 1.771037841895046e-05, "loss": 0.0965, "step": 4362 }, { "epoch": 0.73, "grad_norm": 0.8188115954399109, "learning_rate": 1.7709228429665567e-05, "loss": 0.0699, "step": 4363 }, { "epoch": 0.73, "grad_norm": 0.8045567870140076, "learning_rate": 1.770807818901151e-05, "loss": 0.1118, "step": 4364 }, { "epoch": 0.73, "grad_norm": 0.6517372131347656, "learning_rate": 1.7706927697025786e-05, "loss": 0.0659, "step": 4365 }, { "epoch": 0.73, "grad_norm": 0.7345989942550659, "learning_rate": 1.7705776953745922e-05, "loss": 0.0826, "step": 4366 }, { "epoch": 0.73, "grad_norm": 0.8843322396278381, "learning_rate": 1.7704625959209422e-05, "loss": 0.0907, "step": 4367 }, { "epoch": 0.73, "grad_norm": 0.8799217939376831, "learning_rate": 1.770347471345383e-05, "loss": 0.0822, "step": 4368 }, { "epoch": 0.73, "grad_norm": 0.729115903377533, "learning_rate": 1.7702323216516676e-05, "loss": 0.078, "step": 4369 }, { "epoch": 0.73, "grad_norm": 0.7718684673309326, "learning_rate": 1.770117146843551e-05, "loss": 0.0854, "step": 4370 }, { "epoch": 0.73, "grad_norm": 1.0149112939834595, "learning_rate": 1.7700019469247886e-05, "loss": 0.0975, "step": 4371 }, { "epoch": 0.73, "grad_norm": 1.0850563049316406, "learning_rate": 1.7698867218991363e-05, "loss": 0.0929, "step": 4372 }, { "epoch": 0.73, "grad_norm": 0.7771019339561462, "learning_rate": 1.7697714717703515e-05, "loss": 0.0897, "step": 4373 }, { "epoch": 0.73, "grad_norm": 0.6219708323478699, "learning_rate": 1.7696561965421918e-05, "loss": 0.0813, "step": 4374 }, { "epoch": 0.73, "grad_norm": 1.1476033926010132, "learning_rate": 1.769540896218416e-05, "loss": 0.0973, "step": 4375 }, { "epoch": 0.73, "grad_norm": 0.7628770470619202, "learning_rate": 1.7694255708027842e-05, "loss": 0.0889, "step": 4376 }, { "epoch": 0.73, "grad_norm": 0.6883411407470703, "learning_rate": 1.7693102202990555e-05, "loss": 0.0886, "step": 4377 }, { "epoch": 0.73, "grad_norm": 0.9660937786102295, "learning_rate": 1.769194844710992e-05, "loss": 0.0962, "step": 4378 }, { "epoch": 0.73, "grad_norm": 0.7570127248764038, "learning_rate": 1.7690794440423557e-05, "loss": 0.0765, "step": 4379 }, { "epoch": 0.73, "grad_norm": 0.7544196844100952, "learning_rate": 1.7689640182969085e-05, "loss": 0.0755, "step": 4380 }, { "epoch": 0.73, "grad_norm": 0.570044219493866, "learning_rate": 1.768848567478415e-05, "loss": 0.0786, "step": 4381 }, { "epoch": 0.73, "grad_norm": 0.7929983139038086, "learning_rate": 1.768733091590639e-05, "loss": 0.1057, "step": 4382 }, { "epoch": 0.73, "grad_norm": 0.7747450470924377, "learning_rate": 1.7686175906373462e-05, "loss": 0.0762, "step": 4383 }, { "epoch": 0.73, "grad_norm": 0.8130630850791931, "learning_rate": 1.768502064622302e-05, "loss": 0.0924, "step": 4384 }, { "epoch": 0.73, "grad_norm": 0.6172561645507812, "learning_rate": 1.7683865135492738e-05, "loss": 0.0753, "step": 4385 }, { "epoch": 0.73, "grad_norm": 0.9116986989974976, "learning_rate": 1.7682709374220295e-05, "loss": 0.0982, "step": 4386 }, { "epoch": 0.73, "grad_norm": 0.7172421813011169, "learning_rate": 1.768155336244337e-05, "loss": 0.0729, "step": 4387 }, { "epoch": 0.73, "grad_norm": 1.0546363592147827, "learning_rate": 1.768039710019966e-05, "loss": 0.1101, "step": 4388 }, { "epoch": 0.73, "grad_norm": 0.9740580916404724, "learning_rate": 1.767924058752686e-05, "loss": 0.0974, "step": 4389 }, { "epoch": 0.73, "grad_norm": 0.6426469683647156, "learning_rate": 1.7678083824462693e-05, "loss": 0.0833, "step": 4390 }, { "epoch": 0.73, "grad_norm": 0.7721999883651733, "learning_rate": 1.7676926811044867e-05, "loss": 0.0868, "step": 4391 }, { "epoch": 0.73, "grad_norm": 0.8928671479225159, "learning_rate": 1.7675769547311104e-05, "loss": 0.0936, "step": 4392 }, { "epoch": 0.73, "grad_norm": 0.6817028522491455, "learning_rate": 1.7674612033299147e-05, "loss": 0.0813, "step": 4393 }, { "epoch": 0.73, "grad_norm": 0.5865314602851868, "learning_rate": 1.7673454269046738e-05, "loss": 0.0669, "step": 4394 }, { "epoch": 0.74, "grad_norm": 0.7453423142433167, "learning_rate": 1.7672296254591618e-05, "loss": 0.0827, "step": 4395 }, { "epoch": 0.74, "grad_norm": 1.0165435075759888, "learning_rate": 1.7671137989971555e-05, "loss": 0.0802, "step": 4396 }, { "epoch": 0.74, "grad_norm": 1.0204815864562988, "learning_rate": 1.7669979475224313e-05, "loss": 0.1038, "step": 4397 }, { "epoch": 0.74, "grad_norm": 0.8165515065193176, "learning_rate": 1.7668820710387665e-05, "loss": 0.0803, "step": 4398 }, { "epoch": 0.74, "grad_norm": 0.6106678247451782, "learning_rate": 1.7667661695499397e-05, "loss": 0.0815, "step": 4399 }, { "epoch": 0.74, "grad_norm": 0.8303773403167725, "learning_rate": 1.7666502430597296e-05, "loss": 0.1001, "step": 4400 }, { "epoch": 0.74, "grad_norm": 0.5349671244621277, "learning_rate": 1.7665342915719164e-05, "loss": 0.0725, "step": 4401 }, { "epoch": 0.74, "grad_norm": 0.614766538143158, "learning_rate": 1.766418315090281e-05, "loss": 0.0798, "step": 4402 }, { "epoch": 0.74, "grad_norm": 0.6322181224822998, "learning_rate": 1.7663023136186045e-05, "loss": 0.0785, "step": 4403 }, { "epoch": 0.74, "grad_norm": 0.9148258566856384, "learning_rate": 1.7661862871606695e-05, "loss": 0.0818, "step": 4404 }, { "epoch": 0.74, "grad_norm": 0.6957597136497498, "learning_rate": 1.7660702357202593e-05, "loss": 0.0725, "step": 4405 }, { "epoch": 0.74, "grad_norm": 0.7047164440155029, "learning_rate": 1.765954159301158e-05, "loss": 0.0766, "step": 4406 }, { "epoch": 0.74, "grad_norm": 0.5496972799301147, "learning_rate": 1.7658380579071505e-05, "loss": 0.075, "step": 4407 }, { "epoch": 0.74, "grad_norm": 0.8501333594322205, "learning_rate": 1.7657219315420216e-05, "loss": 0.1064, "step": 4408 }, { "epoch": 0.74, "grad_norm": 0.85395348072052, "learning_rate": 1.7656057802095584e-05, "loss": 0.0822, "step": 4409 }, { "epoch": 0.74, "grad_norm": 0.657055139541626, "learning_rate": 1.7654896039135485e-05, "loss": 0.0735, "step": 4410 }, { "epoch": 0.74, "grad_norm": 0.943601131439209, "learning_rate": 1.7653734026577793e-05, "loss": 0.0888, "step": 4411 }, { "epoch": 0.74, "grad_norm": 0.8359038233757019, "learning_rate": 1.7652571764460403e-05, "loss": 0.0766, "step": 4412 }, { "epoch": 0.74, "grad_norm": 0.8891600966453552, "learning_rate": 1.7651409252821205e-05, "loss": 0.0883, "step": 4413 }, { "epoch": 0.74, "grad_norm": 0.7927082777023315, "learning_rate": 1.7650246491698108e-05, "loss": 0.0804, "step": 4414 }, { "epoch": 0.74, "grad_norm": 0.5708109140396118, "learning_rate": 1.764908348112903e-05, "loss": 0.0615, "step": 4415 }, { "epoch": 0.74, "grad_norm": 0.7338800430297852, "learning_rate": 1.764792022115188e-05, "loss": 0.0872, "step": 4416 }, { "epoch": 0.74, "grad_norm": 0.7756050825119019, "learning_rate": 1.76467567118046e-05, "loss": 0.0769, "step": 4417 }, { "epoch": 0.74, "grad_norm": 0.6302144527435303, "learning_rate": 1.7645592953125122e-05, "loss": 0.0628, "step": 4418 }, { "epoch": 0.74, "grad_norm": 0.6780795454978943, "learning_rate": 1.764442894515139e-05, "loss": 0.0636, "step": 4419 }, { "epoch": 0.74, "grad_norm": 0.7259856462478638, "learning_rate": 1.7643264687921365e-05, "loss": 0.0811, "step": 4420 }, { "epoch": 0.74, "grad_norm": 0.6823182106018066, "learning_rate": 1.7642100181473002e-05, "loss": 0.0669, "step": 4421 }, { "epoch": 0.74, "grad_norm": 0.7429310083389282, "learning_rate": 1.764093542584427e-05, "loss": 0.0848, "step": 4422 }, { "epoch": 0.74, "grad_norm": 0.7719377279281616, "learning_rate": 1.7639770421073158e-05, "loss": 0.0948, "step": 4423 }, { "epoch": 0.74, "grad_norm": 1.1912118196487427, "learning_rate": 1.7638605167197644e-05, "loss": 0.1098, "step": 4424 }, { "epoch": 0.74, "grad_norm": 0.7879271507263184, "learning_rate": 1.7637439664255724e-05, "loss": 0.0706, "step": 4425 }, { "epoch": 0.74, "grad_norm": 1.1164125204086304, "learning_rate": 1.76362739122854e-05, "loss": 0.0763, "step": 4426 }, { "epoch": 0.74, "grad_norm": 0.822558581829071, "learning_rate": 1.763510791132469e-05, "loss": 0.1056, "step": 4427 }, { "epoch": 0.74, "grad_norm": 0.6038727164268494, "learning_rate": 1.7633941661411595e-05, "loss": 0.115, "step": 4428 }, { "epoch": 0.74, "grad_norm": 0.9353829026222229, "learning_rate": 1.7632775162584162e-05, "loss": 0.0942, "step": 4429 }, { "epoch": 0.74, "grad_norm": 0.7483006715774536, "learning_rate": 1.7631608414880418e-05, "loss": 0.0892, "step": 4430 }, { "epoch": 0.74, "grad_norm": 1.1017481088638306, "learning_rate": 1.76304414183384e-05, "loss": 0.0941, "step": 4431 }, { "epoch": 0.74, "grad_norm": 0.7382081151008606, "learning_rate": 1.7629274172996172e-05, "loss": 0.0958, "step": 4432 }, { "epoch": 0.74, "grad_norm": 0.6868336796760559, "learning_rate": 1.7628106678891782e-05, "loss": 0.0911, "step": 4433 }, { "epoch": 0.74, "grad_norm": 0.9098405838012695, "learning_rate": 1.762693893606331e-05, "loss": 0.0876, "step": 4434 }, { "epoch": 0.74, "grad_norm": 0.7076530456542969, "learning_rate": 1.7625770944548816e-05, "loss": 0.0788, "step": 4435 }, { "epoch": 0.74, "grad_norm": 0.6699444651603699, "learning_rate": 1.76246027043864e-05, "loss": 0.0821, "step": 4436 }, { "epoch": 0.74, "grad_norm": 0.6724746227264404, "learning_rate": 1.7623434215614142e-05, "loss": 0.0916, "step": 4437 }, { "epoch": 0.74, "grad_norm": 0.989687979221344, "learning_rate": 1.7622265478270147e-05, "loss": 0.0933, "step": 4438 }, { "epoch": 0.74, "grad_norm": 0.5016214847564697, "learning_rate": 1.7621096492392523e-05, "loss": 0.0558, "step": 4439 }, { "epoch": 0.74, "grad_norm": 0.7118891477584839, "learning_rate": 1.7619927258019386e-05, "loss": 0.0975, "step": 4440 }, { "epoch": 0.74, "grad_norm": 0.7530961036682129, "learning_rate": 1.761875777518886e-05, "loss": 0.0776, "step": 4441 }, { "epoch": 0.74, "grad_norm": 0.6473992466926575, "learning_rate": 1.7617588043939078e-05, "loss": 0.0632, "step": 4442 }, { "epoch": 0.74, "grad_norm": 0.6022979021072388, "learning_rate": 1.761641806430818e-05, "loss": 0.0674, "step": 4443 }, { "epoch": 0.74, "grad_norm": 0.7665995359420776, "learning_rate": 1.7615247836334314e-05, "loss": 0.0704, "step": 4444 }, { "epoch": 0.74, "grad_norm": 0.7859365344047546, "learning_rate": 1.761407736005564e-05, "loss": 0.0951, "step": 4445 }, { "epoch": 0.74, "grad_norm": 0.644217848777771, "learning_rate": 1.761290663551032e-05, "loss": 0.0752, "step": 4446 }, { "epoch": 0.74, "grad_norm": 0.7558369636535645, "learning_rate": 1.7611735662736523e-05, "loss": 0.0931, "step": 4447 }, { "epoch": 0.74, "grad_norm": 1.1352455615997314, "learning_rate": 1.761056444177244e-05, "loss": 0.076, "step": 4448 }, { "epoch": 0.74, "grad_norm": 0.9389371275901794, "learning_rate": 1.760939297265625e-05, "loss": 0.0787, "step": 4449 }, { "epoch": 0.74, "grad_norm": 0.6732544302940369, "learning_rate": 1.7608221255426163e-05, "loss": 0.0982, "step": 4450 }, { "epoch": 0.74, "grad_norm": 0.5599470734596252, "learning_rate": 1.760704929012037e-05, "loss": 0.0727, "step": 4451 }, { "epoch": 0.74, "grad_norm": 0.6041504144668579, "learning_rate": 1.760587707677709e-05, "loss": 0.0868, "step": 4452 }, { "epoch": 0.74, "grad_norm": 0.9957083463668823, "learning_rate": 1.7604704615434545e-05, "loss": 0.077, "step": 4453 }, { "epoch": 0.74, "grad_norm": 0.6838530898094177, "learning_rate": 1.7603531906130968e-05, "loss": 0.0899, "step": 4454 }, { "epoch": 0.75, "grad_norm": 0.7191842794418335, "learning_rate": 1.760235894890459e-05, "loss": 0.0825, "step": 4455 }, { "epoch": 0.75, "grad_norm": 0.9755266904830933, "learning_rate": 1.7601185743793664e-05, "loss": 0.0865, "step": 4456 }, { "epoch": 0.75, "grad_norm": 0.9032658338546753, "learning_rate": 1.7600012290836437e-05, "loss": 0.1187, "step": 4457 }, { "epoch": 0.75, "grad_norm": 0.5935686826705933, "learning_rate": 1.7598838590071173e-05, "loss": 0.0597, "step": 4458 }, { "epoch": 0.75, "grad_norm": 0.8909429907798767, "learning_rate": 1.7597664641536148e-05, "loss": 0.0875, "step": 4459 }, { "epoch": 0.75, "grad_norm": 0.9320024251937866, "learning_rate": 1.759649044526963e-05, "loss": 0.0821, "step": 4460 }, { "epoch": 0.75, "grad_norm": 0.8097462058067322, "learning_rate": 1.7595316001309914e-05, "loss": 0.0978, "step": 4461 }, { "epoch": 0.75, "grad_norm": 0.6535259485244751, "learning_rate": 1.7594141309695287e-05, "loss": 0.0635, "step": 4462 }, { "epoch": 0.75, "grad_norm": 0.7381717562675476, "learning_rate": 1.7592966370464055e-05, "loss": 0.093, "step": 4463 }, { "epoch": 0.75, "grad_norm": 0.7199825644493103, "learning_rate": 1.7591791183654533e-05, "loss": 0.0894, "step": 4464 }, { "epoch": 0.75, "grad_norm": 0.9278694987297058, "learning_rate": 1.759061574930503e-05, "loss": 0.0823, "step": 4465 }, { "epoch": 0.75, "grad_norm": 0.9921517968177795, "learning_rate": 1.7589440067453876e-05, "loss": 0.106, "step": 4466 }, { "epoch": 0.75, "grad_norm": 0.5620639324188232, "learning_rate": 1.758826413813941e-05, "loss": 0.0687, "step": 4467 }, { "epoch": 0.75, "grad_norm": 0.6675019264221191, "learning_rate": 1.758708796139997e-05, "loss": 0.0687, "step": 4468 }, { "epoch": 0.75, "grad_norm": 0.7257286906242371, "learning_rate": 1.7585911537273904e-05, "loss": 0.0931, "step": 4469 }, { "epoch": 0.75, "grad_norm": 0.7887291312217712, "learning_rate": 1.7584734865799583e-05, "loss": 0.0749, "step": 4470 }, { "epoch": 0.75, "grad_norm": 0.6785452365875244, "learning_rate": 1.7583557947015362e-05, "loss": 0.0629, "step": 4471 }, { "epoch": 0.75, "grad_norm": 1.5340231657028198, "learning_rate": 1.758238078095962e-05, "loss": 0.1066, "step": 4472 }, { "epoch": 0.75, "grad_norm": 1.1192057132720947, "learning_rate": 1.758120336767074e-05, "loss": 0.0735, "step": 4473 }, { "epoch": 0.75, "grad_norm": 0.7162492275238037, "learning_rate": 1.758002570718711e-05, "loss": 0.0775, "step": 4474 }, { "epoch": 0.75, "grad_norm": 1.0764490365982056, "learning_rate": 1.7578847799547132e-05, "loss": 0.1292, "step": 4475 }, { "epoch": 0.75, "grad_norm": 0.7619262933731079, "learning_rate": 1.7577669644789216e-05, "loss": 0.0859, "step": 4476 }, { "epoch": 0.75, "grad_norm": 0.775507926940918, "learning_rate": 1.7576491242951772e-05, "loss": 0.0716, "step": 4477 }, { "epoch": 0.75, "grad_norm": 0.9282331466674805, "learning_rate": 1.7575312594073226e-05, "loss": 0.0798, "step": 4478 }, { "epoch": 0.75, "grad_norm": 0.7295771837234497, "learning_rate": 1.7574133698192008e-05, "loss": 0.0758, "step": 4479 }, { "epoch": 0.75, "grad_norm": 0.788948118686676, "learning_rate": 1.757295455534656e-05, "loss": 0.0698, "step": 4480 }, { "epoch": 0.75, "grad_norm": 0.8801592588424683, "learning_rate": 1.757177516557533e-05, "loss": 0.0693, "step": 4481 }, { "epoch": 0.75, "grad_norm": 0.835060715675354, "learning_rate": 1.7570595528916765e-05, "loss": 0.0731, "step": 4482 }, { "epoch": 0.75, "grad_norm": 0.6147009134292603, "learning_rate": 1.756941564540934e-05, "loss": 0.0639, "step": 4483 }, { "epoch": 0.75, "grad_norm": 1.0489070415496826, "learning_rate": 1.7568235515091518e-05, "loss": 0.0649, "step": 4484 }, { "epoch": 0.75, "grad_norm": 0.8083577156066895, "learning_rate": 1.7567055138001783e-05, "loss": 0.0745, "step": 4485 }, { "epoch": 0.75, "grad_norm": 0.7400290369987488, "learning_rate": 1.7565874514178624e-05, "loss": 0.0857, "step": 4486 }, { "epoch": 0.75, "grad_norm": 0.9693394303321838, "learning_rate": 1.7564693643660532e-05, "loss": 0.0857, "step": 4487 }, { "epoch": 0.75, "grad_norm": 0.8439965844154358, "learning_rate": 1.7563512526486012e-05, "loss": 0.0957, "step": 4488 }, { "epoch": 0.75, "grad_norm": 0.8839462399482727, "learning_rate": 1.7562331162693583e-05, "loss": 0.0865, "step": 4489 }, { "epoch": 0.75, "grad_norm": 0.6047796010971069, "learning_rate": 1.7561149552321756e-05, "loss": 0.0736, "step": 4490 }, { "epoch": 0.75, "grad_norm": 0.809636116027832, "learning_rate": 1.7559967695409057e-05, "loss": 0.0861, "step": 4491 }, { "epoch": 0.75, "grad_norm": 0.5747431516647339, "learning_rate": 1.7558785591994028e-05, "loss": 0.0662, "step": 4492 }, { "epoch": 0.75, "grad_norm": 0.6539835929870605, "learning_rate": 1.7557603242115218e-05, "loss": 0.09, "step": 4493 }, { "epoch": 0.75, "grad_norm": 0.7985215187072754, "learning_rate": 1.7556420645811168e-05, "loss": 0.0933, "step": 4494 }, { "epoch": 0.75, "grad_norm": 0.6163797378540039, "learning_rate": 1.755523780312044e-05, "loss": 0.0668, "step": 4495 }, { "epoch": 0.75, "grad_norm": 0.7043247222900391, "learning_rate": 1.755405471408161e-05, "loss": 0.0923, "step": 4496 }, { "epoch": 0.75, "grad_norm": 0.7268713116645813, "learning_rate": 1.755287137873325e-05, "loss": 0.0887, "step": 4497 }, { "epoch": 0.75, "grad_norm": 0.7230585813522339, "learning_rate": 1.755168779711394e-05, "loss": 0.0788, "step": 4498 }, { "epoch": 0.75, "grad_norm": 0.6432814598083496, "learning_rate": 1.7550503969262274e-05, "loss": 0.0861, "step": 4499 }, { "epoch": 0.75, "grad_norm": 0.7902443408966064, "learning_rate": 1.7549319895216857e-05, "loss": 0.0961, "step": 4500 }, { "epoch": 0.75, "grad_norm": 0.8469744324684143, "learning_rate": 1.7548135575016293e-05, "loss": 0.0793, "step": 4501 }, { "epoch": 0.75, "grad_norm": 0.6135974526405334, "learning_rate": 1.7546951008699197e-05, "loss": 0.0689, "step": 4502 }, { "epoch": 0.75, "grad_norm": 0.9671135544776917, "learning_rate": 1.7545766196304196e-05, "loss": 0.1025, "step": 4503 }, { "epoch": 0.75, "grad_norm": 0.6600891351699829, "learning_rate": 1.7544581137869923e-05, "loss": 0.0772, "step": 4504 }, { "epoch": 0.75, "grad_norm": 0.6340506672859192, "learning_rate": 1.7543395833435014e-05, "loss": 0.0857, "step": 4505 }, { "epoch": 0.75, "grad_norm": 0.6206469535827637, "learning_rate": 1.7542210283038123e-05, "loss": 0.0773, "step": 4506 }, { "epoch": 0.75, "grad_norm": 0.6784083247184753, "learning_rate": 1.7541024486717904e-05, "loss": 0.068, "step": 4507 }, { "epoch": 0.75, "grad_norm": 0.7652648687362671, "learning_rate": 1.753983844451302e-05, "loss": 0.1004, "step": 4508 }, { "epoch": 0.75, "grad_norm": 0.7554531693458557, "learning_rate": 1.7538652156462145e-05, "loss": 0.0735, "step": 4509 }, { "epoch": 0.75, "grad_norm": 0.5886398553848267, "learning_rate": 1.753746562260396e-05, "loss": 0.0593, "step": 4510 }, { "epoch": 0.75, "grad_norm": 0.8836436867713928, "learning_rate": 1.7536278842977147e-05, "loss": 0.1063, "step": 4511 }, { "epoch": 0.75, "grad_norm": 0.5988697409629822, "learning_rate": 1.7535091817620412e-05, "loss": 0.0746, "step": 4512 }, { "epoch": 0.75, "grad_norm": 0.6306729912757874, "learning_rate": 1.7533904546572457e-05, "loss": 0.0654, "step": 4513 }, { "epoch": 0.75, "grad_norm": 0.6470886468887329, "learning_rate": 1.753271702987199e-05, "loss": 0.0585, "step": 4514 }, { "epoch": 0.76, "grad_norm": 0.5947723388671875, "learning_rate": 1.7531529267557737e-05, "loss": 0.0609, "step": 4515 }, { "epoch": 0.76, "grad_norm": 0.6916799545288086, "learning_rate": 1.753034125966842e-05, "loss": 0.0784, "step": 4516 }, { "epoch": 0.76, "grad_norm": 0.5129432678222656, "learning_rate": 1.752915300624278e-05, "loss": 0.0667, "step": 4517 }, { "epoch": 0.76, "grad_norm": 0.7340688705444336, "learning_rate": 1.7527964507319562e-05, "loss": 0.0768, "step": 4518 }, { "epoch": 0.76, "grad_norm": 0.8092746734619141, "learning_rate": 1.752677576293752e-05, "loss": 0.0978, "step": 4519 }, { "epoch": 0.76, "grad_norm": 0.7311174273490906, "learning_rate": 1.7525586773135406e-05, "loss": 0.0831, "step": 4520 }, { "epoch": 0.76, "grad_norm": 0.6540862917900085, "learning_rate": 1.7524397537951997e-05, "loss": 0.0884, "step": 4521 }, { "epoch": 0.76, "grad_norm": 0.9443502426147461, "learning_rate": 1.7523208057426066e-05, "loss": 0.0877, "step": 4522 }, { "epoch": 0.76, "grad_norm": 0.7009217143058777, "learning_rate": 1.75220183315964e-05, "loss": 0.0643, "step": 4523 }, { "epoch": 0.76, "grad_norm": 0.6729273200035095, "learning_rate": 1.752082836050179e-05, "loss": 0.0647, "step": 4524 }, { "epoch": 0.76, "grad_norm": 0.6501518487930298, "learning_rate": 1.751963814418103e-05, "loss": 0.093, "step": 4525 }, { "epoch": 0.76, "grad_norm": 0.7872136235237122, "learning_rate": 1.7518447682672946e-05, "loss": 0.0942, "step": 4526 }, { "epoch": 0.76, "grad_norm": 0.9195176362991333, "learning_rate": 1.7517256976016338e-05, "loss": 0.0812, "step": 4527 }, { "epoch": 0.76, "grad_norm": 1.3559472560882568, "learning_rate": 1.7516066024250035e-05, "loss": 0.085, "step": 4528 }, { "epoch": 0.76, "grad_norm": 0.9692866206169128, "learning_rate": 1.7514874827412872e-05, "loss": 0.0875, "step": 4529 }, { "epoch": 0.76, "grad_norm": 0.745376467704773, "learning_rate": 1.7513683385543687e-05, "loss": 0.0704, "step": 4530 }, { "epoch": 0.76, "grad_norm": 0.6012809872627258, "learning_rate": 1.7512491698681328e-05, "loss": 0.0786, "step": 4531 }, { "epoch": 0.76, "grad_norm": 0.9774410724639893, "learning_rate": 1.7511299766864654e-05, "loss": 0.0663, "step": 4532 }, { "epoch": 0.76, "grad_norm": 0.8167629837989807, "learning_rate": 1.751010759013253e-05, "loss": 0.0843, "step": 4533 }, { "epoch": 0.76, "grad_norm": 0.6265169382095337, "learning_rate": 1.7508915168523824e-05, "loss": 0.0908, "step": 4534 }, { "epoch": 0.76, "grad_norm": 1.3399982452392578, "learning_rate": 1.750772250207742e-05, "loss": 0.0809, "step": 4535 }, { "epoch": 0.76, "grad_norm": 0.6808735132217407, "learning_rate": 1.7506529590832205e-05, "loss": 0.0688, "step": 4536 }, { "epoch": 0.76, "grad_norm": 0.8433057069778442, "learning_rate": 1.7505336434827076e-05, "loss": 0.0936, "step": 4537 }, { "epoch": 0.76, "grad_norm": 0.6347018480300903, "learning_rate": 1.7504143034100938e-05, "loss": 0.0821, "step": 4538 }, { "epoch": 0.76, "grad_norm": 0.6716694235801697, "learning_rate": 1.75029493886927e-05, "loss": 0.0697, "step": 4539 }, { "epoch": 0.76, "grad_norm": 0.6816357374191284, "learning_rate": 1.750175549864129e-05, "loss": 0.0845, "step": 4540 }, { "epoch": 0.76, "grad_norm": 0.8180259466171265, "learning_rate": 1.7500561363985626e-05, "loss": 0.0788, "step": 4541 }, { "epoch": 0.76, "grad_norm": 1.165497899055481, "learning_rate": 1.7499366984764652e-05, "loss": 0.0798, "step": 4542 }, { "epoch": 0.76, "grad_norm": 0.5975250601768494, "learning_rate": 1.749817236101731e-05, "loss": 0.0774, "step": 4543 }, { "epoch": 0.76, "grad_norm": 0.6392019391059875, "learning_rate": 1.749697749278255e-05, "loss": 0.0693, "step": 4544 }, { "epoch": 0.76, "grad_norm": 0.6621900200843811, "learning_rate": 1.7495782380099337e-05, "loss": 0.0688, "step": 4545 }, { "epoch": 0.76, "grad_norm": 0.6323577165603638, "learning_rate": 1.7494587023006636e-05, "loss": 0.0652, "step": 4546 }, { "epoch": 0.76, "grad_norm": 0.6246105432510376, "learning_rate": 1.7493391421543422e-05, "loss": 0.0658, "step": 4547 }, { "epoch": 0.76, "grad_norm": 0.592894971370697, "learning_rate": 1.7492195575748682e-05, "loss": 0.0855, "step": 4548 }, { "epoch": 0.76, "grad_norm": 0.6157046556472778, "learning_rate": 1.7490999485661407e-05, "loss": 0.0784, "step": 4549 }, { "epoch": 0.76, "grad_norm": 0.8163313865661621, "learning_rate": 1.7489803151320594e-05, "loss": 0.0786, "step": 4550 }, { "epoch": 0.76, "grad_norm": 0.5196424126625061, "learning_rate": 1.7488606572765255e-05, "loss": 0.065, "step": 4551 }, { "epoch": 0.76, "grad_norm": 0.6159282922744751, "learning_rate": 1.7487409750034406e-05, "loss": 0.0631, "step": 4552 }, { "epoch": 0.76, "grad_norm": 0.8128244876861572, "learning_rate": 1.7486212683167066e-05, "loss": 0.0806, "step": 4553 }, { "epoch": 0.76, "grad_norm": 0.8273476362228394, "learning_rate": 1.7485015372202273e-05, "loss": 0.0805, "step": 4554 }, { "epoch": 0.76, "grad_norm": 0.6322569251060486, "learning_rate": 1.7483817817179064e-05, "loss": 0.0955, "step": 4555 }, { "epoch": 0.76, "grad_norm": 0.6860891580581665, "learning_rate": 1.7482620018136485e-05, "loss": 0.0765, "step": 4556 }, { "epoch": 0.76, "grad_norm": 0.6029111742973328, "learning_rate": 1.7481421975113598e-05, "loss": 0.0773, "step": 4557 }, { "epoch": 0.76, "grad_norm": 0.8185973763465881, "learning_rate": 1.748022368814946e-05, "loss": 0.0886, "step": 4558 }, { "epoch": 0.76, "grad_norm": 0.46237969398498535, "learning_rate": 1.7479025157283147e-05, "loss": 0.0535, "step": 4559 }, { "epoch": 0.76, "grad_norm": 0.6392209529876709, "learning_rate": 1.7477826382553735e-05, "loss": 0.0765, "step": 4560 }, { "epoch": 0.76, "grad_norm": 0.6512594819068909, "learning_rate": 1.7476627364000315e-05, "loss": 0.0815, "step": 4561 }, { "epoch": 0.76, "grad_norm": 0.8420239090919495, "learning_rate": 1.7475428101661977e-05, "loss": 0.099, "step": 4562 }, { "epoch": 0.76, "grad_norm": 1.097320556640625, "learning_rate": 1.747422859557783e-05, "loss": 0.0767, "step": 4563 }, { "epoch": 0.76, "grad_norm": 0.7031513452529907, "learning_rate": 1.747302884578699e-05, "loss": 0.067, "step": 4564 }, { "epoch": 0.76, "grad_norm": 0.927534282207489, "learning_rate": 1.747182885232856e-05, "loss": 0.0964, "step": 4565 }, { "epoch": 0.76, "grad_norm": 0.5328226685523987, "learning_rate": 1.747062861524168e-05, "loss": 0.0777, "step": 4566 }, { "epoch": 0.76, "grad_norm": 0.6644806265830994, "learning_rate": 1.7469428134565487e-05, "loss": 0.0684, "step": 4567 }, { "epoch": 0.76, "grad_norm": 0.7388453483581543, "learning_rate": 1.746822741033912e-05, "loss": 0.096, "step": 4568 }, { "epoch": 0.76, "grad_norm": 0.7685388326644897, "learning_rate": 1.746702644260173e-05, "loss": 0.0858, "step": 4569 }, { "epoch": 0.76, "grad_norm": 0.6247028708457947, "learning_rate": 1.7465825231392474e-05, "loss": 0.0667, "step": 4570 }, { "epoch": 0.76, "grad_norm": 1.0266934633255005, "learning_rate": 1.746462377675052e-05, "loss": 0.0955, "step": 4571 }, { "epoch": 0.76, "grad_norm": 0.7776997685432434, "learning_rate": 1.7463422078715046e-05, "loss": 0.0963, "step": 4572 }, { "epoch": 0.76, "grad_norm": 0.6955598592758179, "learning_rate": 1.7462220137325232e-05, "loss": 0.0757, "step": 4573 }, { "epoch": 0.77, "grad_norm": 0.7222100496292114, "learning_rate": 1.746101795262027e-05, "loss": 0.0771, "step": 4574 }, { "epoch": 0.77, "grad_norm": 0.6372802257537842, "learning_rate": 1.7459815524639356e-05, "loss": 0.0789, "step": 4575 }, { "epoch": 0.77, "grad_norm": 0.8827503323554993, "learning_rate": 1.7458612853421702e-05, "loss": 0.1053, "step": 4576 }, { "epoch": 0.77, "grad_norm": 0.6374872326850891, "learning_rate": 1.7457409939006524e-05, "loss": 0.0654, "step": 4577 }, { "epoch": 0.77, "grad_norm": 0.8127100467681885, "learning_rate": 1.7456206781433034e-05, "loss": 0.0831, "step": 4578 }, { "epoch": 0.77, "grad_norm": 0.953584611415863, "learning_rate": 1.745500338074047e-05, "loss": 0.0963, "step": 4579 }, { "epoch": 0.77, "grad_norm": 0.6025550365447998, "learning_rate": 1.7453799736968073e-05, "loss": 0.066, "step": 4580 }, { "epoch": 0.77, "grad_norm": 0.7273090481758118, "learning_rate": 1.745259585015508e-05, "loss": 0.0831, "step": 4581 }, { "epoch": 0.77, "grad_norm": 0.8064854741096497, "learning_rate": 1.7451391720340755e-05, "loss": 0.0773, "step": 4582 }, { "epoch": 0.77, "grad_norm": 0.7481212019920349, "learning_rate": 1.7450187347564353e-05, "loss": 0.077, "step": 4583 }, { "epoch": 0.77, "grad_norm": 0.7990534901618958, "learning_rate": 1.744898273186515e-05, "loss": 0.0709, "step": 4584 }, { "epoch": 0.77, "grad_norm": 0.6410264372825623, "learning_rate": 1.744777787328242e-05, "loss": 0.0668, "step": 4585 }, { "epoch": 0.77, "grad_norm": 0.7970716953277588, "learning_rate": 1.744657277185545e-05, "loss": 0.1076, "step": 4586 }, { "epoch": 0.77, "grad_norm": 0.69402676820755, "learning_rate": 1.7445367427623537e-05, "loss": 0.0739, "step": 4587 }, { "epoch": 0.77, "grad_norm": 0.7635341286659241, "learning_rate": 1.7444161840625975e-05, "loss": 0.0832, "step": 4588 }, { "epoch": 0.77, "grad_norm": 0.8410236835479736, "learning_rate": 1.744295601090208e-05, "loss": 0.1071, "step": 4589 }, { "epoch": 0.77, "grad_norm": 0.5460818409919739, "learning_rate": 1.744174993849117e-05, "loss": 0.0536, "step": 4590 }, { "epoch": 0.77, "grad_norm": 0.8805351853370667, "learning_rate": 1.7440543623432568e-05, "loss": 0.1, "step": 4591 }, { "epoch": 0.77, "grad_norm": 0.6660604476928711, "learning_rate": 1.7439337065765605e-05, "loss": 0.0733, "step": 4592 }, { "epoch": 0.77, "grad_norm": 0.7101987600326538, "learning_rate": 1.743813026552963e-05, "loss": 0.0766, "step": 4593 }, { "epoch": 0.77, "grad_norm": 1.9664108753204346, "learning_rate": 1.7436923222763985e-05, "loss": 0.0678, "step": 4594 }, { "epoch": 0.77, "grad_norm": 0.6876214146614075, "learning_rate": 1.743571593750803e-05, "loss": 0.0664, "step": 4595 }, { "epoch": 0.77, "grad_norm": 0.7186604738235474, "learning_rate": 1.743450840980113e-05, "loss": 0.0813, "step": 4596 }, { "epoch": 0.77, "grad_norm": 0.8427573442459106, "learning_rate": 1.7433300639682658e-05, "loss": 0.0884, "step": 4597 }, { "epoch": 0.77, "grad_norm": 0.8283159732818604, "learning_rate": 1.7432092627191994e-05, "loss": 0.0879, "step": 4598 }, { "epoch": 0.77, "grad_norm": 0.5950155854225159, "learning_rate": 1.7430884372368527e-05, "loss": 0.0708, "step": 4599 }, { "epoch": 0.77, "grad_norm": 0.884727418422699, "learning_rate": 1.742967587525166e-05, "loss": 0.0817, "step": 4600 }, { "epoch": 0.77, "grad_norm": 1.0448572635650635, "learning_rate": 1.7428467135880782e-05, "loss": 0.086, "step": 4601 }, { "epoch": 0.77, "grad_norm": 0.7812385559082031, "learning_rate": 1.7427258154295322e-05, "loss": 0.0822, "step": 4602 }, { "epoch": 0.77, "grad_norm": 0.8559831976890564, "learning_rate": 1.742604893053469e-05, "loss": 0.0928, "step": 4603 }, { "epoch": 0.77, "grad_norm": 0.830129086971283, "learning_rate": 1.742483946463832e-05, "loss": 0.1005, "step": 4604 }, { "epoch": 0.77, "grad_norm": 0.5760737061500549, "learning_rate": 1.7423629756645644e-05, "loss": 0.0669, "step": 4605 }, { "epoch": 0.77, "grad_norm": 0.6722663044929504, "learning_rate": 1.742241980659611e-05, "loss": 0.0797, "step": 4606 }, { "epoch": 0.77, "grad_norm": 0.741280198097229, "learning_rate": 1.742120961452917e-05, "loss": 0.0858, "step": 4607 }, { "epoch": 0.77, "grad_norm": 0.6628077030181885, "learning_rate": 1.7419999180484276e-05, "loss": 0.088, "step": 4608 }, { "epoch": 0.77, "grad_norm": 0.6982709169387817, "learning_rate": 1.7418788504500905e-05, "loss": 0.0858, "step": 4609 }, { "epoch": 0.77, "grad_norm": 0.7986642718315125, "learning_rate": 1.7417577586618527e-05, "loss": 0.0907, "step": 4610 }, { "epoch": 0.77, "grad_norm": 0.8568715453147888, "learning_rate": 1.741636642687663e-05, "loss": 0.1059, "step": 4611 }, { "epoch": 0.77, "grad_norm": 0.7031329274177551, "learning_rate": 1.7415155025314704e-05, "loss": 0.0808, "step": 4612 }, { "epoch": 0.77, "grad_norm": 0.8993245363235474, "learning_rate": 1.7413943381972246e-05, "loss": 0.0865, "step": 4613 }, { "epoch": 0.77, "grad_norm": 0.7840202450752258, "learning_rate": 1.7412731496888766e-05, "loss": 0.0905, "step": 4614 }, { "epoch": 0.77, "grad_norm": 0.8332347869873047, "learning_rate": 1.7411519370103778e-05, "loss": 0.073, "step": 4615 }, { "epoch": 0.77, "grad_norm": 0.6378310322761536, "learning_rate": 1.7410307001656804e-05, "loss": 0.0782, "step": 4616 }, { "epoch": 0.77, "grad_norm": 0.8326584100723267, "learning_rate": 1.7409094391587373e-05, "loss": 0.092, "step": 4617 }, { "epoch": 0.77, "grad_norm": 0.7578030824661255, "learning_rate": 1.740788153993503e-05, "loss": 0.0781, "step": 4618 }, { "epoch": 0.77, "grad_norm": 0.5422630906105042, "learning_rate": 1.7406668446739315e-05, "loss": 0.0733, "step": 4619 }, { "epoch": 0.77, "grad_norm": 0.6978214979171753, "learning_rate": 1.7405455112039784e-05, "loss": 0.0828, "step": 4620 }, { "epoch": 0.77, "grad_norm": 0.5941764116287231, "learning_rate": 1.7404241535876005e-05, "loss": 0.0828, "step": 4621 }, { "epoch": 0.77, "grad_norm": 0.8573561310768127, "learning_rate": 1.740302771828754e-05, "loss": 0.0745, "step": 4622 }, { "epoch": 0.77, "grad_norm": 0.6712387204170227, "learning_rate": 1.7401813659313972e-05, "loss": 0.0921, "step": 4623 }, { "epoch": 0.77, "grad_norm": 0.5871412754058838, "learning_rate": 1.7400599358994882e-05, "loss": 0.0676, "step": 4624 }, { "epoch": 0.77, "grad_norm": 0.8118022680282593, "learning_rate": 1.7399384817369875e-05, "loss": 0.097, "step": 4625 }, { "epoch": 0.77, "grad_norm": 0.7908214926719666, "learning_rate": 1.7398170034478537e-05, "loss": 0.0828, "step": 4626 }, { "epoch": 0.77, "grad_norm": 0.7594838738441467, "learning_rate": 1.739695501036049e-05, "loss": 0.0827, "step": 4627 }, { "epoch": 0.77, "grad_norm": 0.9810462594032288, "learning_rate": 1.7395739745055344e-05, "loss": 0.0721, "step": 4628 }, { "epoch": 0.77, "grad_norm": 0.6571326851844788, "learning_rate": 1.739452423860273e-05, "loss": 0.0607, "step": 4629 }, { "epoch": 0.77, "grad_norm": 0.9316761493682861, "learning_rate": 1.739330849104228e-05, "loss": 0.0841, "step": 4630 }, { "epoch": 0.77, "grad_norm": 0.6027587652206421, "learning_rate": 1.739209250241363e-05, "loss": 0.0618, "step": 4631 }, { "epoch": 0.77, "grad_norm": 0.6186361312866211, "learning_rate": 1.739087627275643e-05, "loss": 0.0788, "step": 4632 }, { "epoch": 0.77, "grad_norm": 0.6431012153625488, "learning_rate": 1.7389659802110347e-05, "loss": 0.0718, "step": 4633 }, { "epoch": 0.78, "grad_norm": 0.5859457850456238, "learning_rate": 1.738844309051503e-05, "loss": 0.0673, "step": 4634 }, { "epoch": 0.78, "grad_norm": 0.6784754395484924, "learning_rate": 1.738722613801016e-05, "loss": 0.0766, "step": 4635 }, { "epoch": 0.78, "grad_norm": 0.5952721238136292, "learning_rate": 1.738600894463542e-05, "loss": 0.0796, "step": 4636 }, { "epoch": 0.78, "grad_norm": 0.7445899844169617, "learning_rate": 1.7384791510430493e-05, "loss": 0.0639, "step": 4637 }, { "epoch": 0.78, "grad_norm": 0.8677686452865601, "learning_rate": 1.7383573835435074e-05, "loss": 0.0924, "step": 4638 }, { "epoch": 0.78, "grad_norm": 0.9845924377441406, "learning_rate": 1.738235591968887e-05, "loss": 0.0967, "step": 4639 }, { "epoch": 0.78, "grad_norm": 0.8246722221374512, "learning_rate": 1.7381137763231593e-05, "loss": 0.1002, "step": 4640 }, { "epoch": 0.78, "grad_norm": 0.7816400527954102, "learning_rate": 1.7379919366102962e-05, "loss": 0.0883, "step": 4641 }, { "epoch": 0.78, "grad_norm": 0.778856635093689, "learning_rate": 1.7378700728342703e-05, "loss": 0.0803, "step": 4642 }, { "epoch": 0.78, "grad_norm": 0.7238221168518066, "learning_rate": 1.737748184999055e-05, "loss": 0.077, "step": 4643 }, { "epoch": 0.78, "grad_norm": 1.0667520761489868, "learning_rate": 1.7376262731086254e-05, "loss": 0.0912, "step": 4644 }, { "epoch": 0.78, "grad_norm": 0.5922546982765198, "learning_rate": 1.7375043371669553e-05, "loss": 0.0818, "step": 4645 }, { "epoch": 0.78, "grad_norm": 0.6888355612754822, "learning_rate": 1.7373823771780215e-05, "loss": 0.0698, "step": 4646 }, { "epoch": 0.78, "grad_norm": 0.854392945766449, "learning_rate": 1.7372603931458004e-05, "loss": 0.078, "step": 4647 }, { "epoch": 0.78, "grad_norm": 0.9655362367630005, "learning_rate": 1.73713838507427e-05, "loss": 0.0692, "step": 4648 }, { "epoch": 0.78, "grad_norm": 0.6677196621894836, "learning_rate": 1.7370163529674077e-05, "loss": 0.0721, "step": 4649 }, { "epoch": 0.78, "grad_norm": 0.778610348701477, "learning_rate": 1.7368942968291925e-05, "loss": 0.0824, "step": 4650 }, { "epoch": 0.78, "grad_norm": 0.9334247708320618, "learning_rate": 1.7367722166636046e-05, "loss": 0.1077, "step": 4651 }, { "epoch": 0.78, "grad_norm": 0.6167022585868835, "learning_rate": 1.7366501124746245e-05, "loss": 0.0767, "step": 4652 }, { "epoch": 0.78, "grad_norm": 0.7862952351570129, "learning_rate": 1.736527984266234e-05, "loss": 0.0905, "step": 4653 }, { "epoch": 0.78, "grad_norm": 0.7384108304977417, "learning_rate": 1.736405832042414e-05, "loss": 0.091, "step": 4654 }, { "epoch": 0.78, "grad_norm": 0.699896514415741, "learning_rate": 1.736283655807149e-05, "loss": 0.076, "step": 4655 }, { "epoch": 0.78, "grad_norm": 0.6782553791999817, "learning_rate": 1.7361614555644216e-05, "loss": 0.0774, "step": 4656 }, { "epoch": 0.78, "grad_norm": 0.708238422870636, "learning_rate": 1.7360392313182167e-05, "loss": 0.0652, "step": 4657 }, { "epoch": 0.78, "grad_norm": 0.7641343474388123, "learning_rate": 1.7359169830725195e-05, "loss": 0.0694, "step": 4658 }, { "epoch": 0.78, "grad_norm": 0.6153444647789001, "learning_rate": 1.735794710831316e-05, "loss": 0.0523, "step": 4659 }, { "epoch": 0.78, "grad_norm": 0.7673454284667969, "learning_rate": 1.7356724145985934e-05, "loss": 0.0794, "step": 4660 }, { "epoch": 0.78, "grad_norm": 0.7499797940254211, "learning_rate": 1.735550094378339e-05, "loss": 0.0711, "step": 4661 }, { "epoch": 0.78, "grad_norm": 1.0247089862823486, "learning_rate": 1.735427750174541e-05, "loss": 0.0757, "step": 4662 }, { "epoch": 0.78, "grad_norm": 0.7775270938873291, "learning_rate": 1.7353053819911887e-05, "loss": 0.0825, "step": 4663 }, { "epoch": 0.78, "grad_norm": 0.6837779879570007, "learning_rate": 1.735182989832272e-05, "loss": 0.0713, "step": 4664 }, { "epoch": 0.78, "grad_norm": 0.6399427652359009, "learning_rate": 1.7350605737017824e-05, "loss": 0.0875, "step": 4665 }, { "epoch": 0.78, "grad_norm": 1.0809288024902344, "learning_rate": 1.734938133603711e-05, "loss": 0.0932, "step": 4666 }, { "epoch": 0.78, "grad_norm": 0.8230781555175781, "learning_rate": 1.7348156695420495e-05, "loss": 0.0893, "step": 4667 }, { "epoch": 0.78, "grad_norm": 0.686573326587677, "learning_rate": 1.7346931815207917e-05, "loss": 0.0896, "step": 4668 }, { "epoch": 0.78, "grad_norm": 0.7377808094024658, "learning_rate": 1.7345706695439314e-05, "loss": 0.0877, "step": 4669 }, { "epoch": 0.78, "grad_norm": 0.6799304485321045, "learning_rate": 1.7344481336154628e-05, "loss": 0.1043, "step": 4670 }, { "epoch": 0.78, "grad_norm": 1.1154818534851074, "learning_rate": 1.734325573739382e-05, "loss": 0.1107, "step": 4671 }, { "epoch": 0.78, "grad_norm": 0.6209505796432495, "learning_rate": 1.7342029899196845e-05, "loss": 0.101, "step": 4672 }, { "epoch": 0.78, "grad_norm": 0.6757534742355347, "learning_rate": 1.734080382160368e-05, "loss": 0.0735, "step": 4673 }, { "epoch": 0.78, "grad_norm": 0.8359424471855164, "learning_rate": 1.73395775046543e-05, "loss": 0.0998, "step": 4674 }, { "epoch": 0.78, "grad_norm": 0.7557624578475952, "learning_rate": 1.733835094838869e-05, "loss": 0.091, "step": 4675 }, { "epoch": 0.78, "grad_norm": 0.6534428000450134, "learning_rate": 1.7337124152846844e-05, "loss": 0.084, "step": 4676 }, { "epoch": 0.78, "grad_norm": 0.8873539566993713, "learning_rate": 1.7335897118068762e-05, "loss": 0.1008, "step": 4677 }, { "epoch": 0.78, "grad_norm": 0.9969508647918701, "learning_rate": 1.733466984409445e-05, "loss": 0.0808, "step": 4678 }, { "epoch": 0.78, "grad_norm": 0.6576361060142517, "learning_rate": 1.7333442330963935e-05, "loss": 0.0755, "step": 4679 }, { "epoch": 0.78, "grad_norm": 1.005613088607788, "learning_rate": 1.7332214578717234e-05, "loss": 0.0761, "step": 4680 }, { "epoch": 0.78, "grad_norm": 0.5513375401496887, "learning_rate": 1.733098658739438e-05, "loss": 0.0575, "step": 4681 }, { "epoch": 0.78, "grad_norm": 0.6344546675682068, "learning_rate": 1.7329758357035412e-05, "loss": 0.0728, "step": 4682 }, { "epoch": 0.78, "grad_norm": 0.9750669002532959, "learning_rate": 1.7328529887680386e-05, "loss": 0.0745, "step": 4683 }, { "epoch": 0.78, "grad_norm": 0.7043086290359497, "learning_rate": 1.7327301179369346e-05, "loss": 0.0687, "step": 4684 }, { "epoch": 0.78, "grad_norm": 0.8941947817802429, "learning_rate": 1.7326072232142363e-05, "loss": 0.081, "step": 4685 }, { "epoch": 0.78, "grad_norm": 0.6603742837905884, "learning_rate": 1.732484304603951e-05, "loss": 0.0991, "step": 4686 }, { "epoch": 0.78, "grad_norm": 0.6354779005050659, "learning_rate": 1.732361362110086e-05, "loss": 0.0637, "step": 4687 }, { "epoch": 0.78, "grad_norm": 0.9251957535743713, "learning_rate": 1.7322383957366506e-05, "loss": 0.0996, "step": 4688 }, { "epoch": 0.78, "grad_norm": 0.6424940824508667, "learning_rate": 1.7321154054876537e-05, "loss": 0.0953, "step": 4689 }, { "epoch": 0.78, "grad_norm": 0.9503957629203796, "learning_rate": 1.7319923913671055e-05, "loss": 0.1038, "step": 4690 }, { "epoch": 0.78, "grad_norm": 0.8442139029502869, "learning_rate": 1.731869353379018e-05, "loss": 0.0996, "step": 4691 }, { "epoch": 0.78, "grad_norm": 0.7730706334114075, "learning_rate": 1.731746291527402e-05, "loss": 0.0771, "step": 4692 }, { "epoch": 0.78, "grad_norm": 0.8718470335006714, "learning_rate": 1.7316232058162706e-05, "loss": 0.0996, "step": 4693 }, { "epoch": 0.79, "grad_norm": 0.6412806510925293, "learning_rate": 1.7315000962496368e-05, "loss": 0.0688, "step": 4694 }, { "epoch": 0.79, "grad_norm": 1.1452209949493408, "learning_rate": 1.731376962831515e-05, "loss": 0.0689, "step": 4695 }, { "epoch": 0.79, "grad_norm": 0.5076080560684204, "learning_rate": 1.7312538055659204e-05, "loss": 0.0536, "step": 4696 }, { "epoch": 0.79, "grad_norm": 0.664448618888855, "learning_rate": 1.731130624456868e-05, "loss": 0.0838, "step": 4697 }, { "epoch": 0.79, "grad_norm": 0.6964185237884521, "learning_rate": 1.7310074195083747e-05, "loss": 0.0613, "step": 4698 }, { "epoch": 0.79, "grad_norm": 0.714909017086029, "learning_rate": 1.7308841907244584e-05, "loss": 0.0715, "step": 4699 }, { "epoch": 0.79, "grad_norm": 0.8589877486228943, "learning_rate": 1.7307609381091356e-05, "loss": 0.074, "step": 4700 }, { "epoch": 0.79, "grad_norm": 0.8353623747825623, "learning_rate": 1.7306376616664263e-05, "loss": 0.105, "step": 4701 }, { "epoch": 0.79, "grad_norm": 0.814239501953125, "learning_rate": 1.7305143614003496e-05, "loss": 0.0956, "step": 4702 }, { "epoch": 0.79, "grad_norm": 0.6753386855125427, "learning_rate": 1.730391037314926e-05, "loss": 0.0705, "step": 4703 }, { "epoch": 0.79, "grad_norm": 0.7001227140426636, "learning_rate": 1.730267689414177e-05, "loss": 0.0883, "step": 4704 }, { "epoch": 0.79, "grad_norm": 0.8014772534370422, "learning_rate": 1.7301443177021237e-05, "loss": 0.1056, "step": 4705 }, { "epoch": 0.79, "grad_norm": 0.5827784538269043, "learning_rate": 1.7300209221827895e-05, "loss": 0.0629, "step": 4706 }, { "epoch": 0.79, "grad_norm": 0.7569044232368469, "learning_rate": 1.7298975028601975e-05, "loss": 0.0812, "step": 4707 }, { "epoch": 0.79, "grad_norm": 0.9055231809616089, "learning_rate": 1.729774059738372e-05, "loss": 0.0663, "step": 4708 }, { "epoch": 0.79, "grad_norm": 0.7475617527961731, "learning_rate": 1.7296505928213384e-05, "loss": 0.0939, "step": 4709 }, { "epoch": 0.79, "grad_norm": 0.8058241605758667, "learning_rate": 1.7295271021131216e-05, "loss": 0.0822, "step": 4710 }, { "epoch": 0.79, "grad_norm": 0.6429585814476013, "learning_rate": 1.7294035876177494e-05, "loss": 0.0813, "step": 4711 }, { "epoch": 0.79, "grad_norm": 0.8568418025970459, "learning_rate": 1.7292800493392482e-05, "loss": 0.0911, "step": 4712 }, { "epoch": 0.79, "grad_norm": 0.7622599005699158, "learning_rate": 1.729156487281646e-05, "loss": 0.0623, "step": 4713 }, { "epoch": 0.79, "grad_norm": 0.7178061008453369, "learning_rate": 1.7290329014489727e-05, "loss": 0.0697, "step": 4714 }, { "epoch": 0.79, "grad_norm": 0.5678706169128418, "learning_rate": 1.728909291845257e-05, "loss": 0.0724, "step": 4715 }, { "epoch": 0.79, "grad_norm": 0.5937590003013611, "learning_rate": 1.72878565847453e-05, "loss": 0.0723, "step": 4716 }, { "epoch": 0.79, "grad_norm": 0.7585147023200989, "learning_rate": 1.7286620013408223e-05, "loss": 0.0676, "step": 4717 }, { "epoch": 0.79, "grad_norm": 0.6815471053123474, "learning_rate": 1.7285383204481667e-05, "loss": 0.0703, "step": 4718 }, { "epoch": 0.79, "grad_norm": 0.5602837800979614, "learning_rate": 1.728414615800595e-05, "loss": 0.0721, "step": 4719 }, { "epoch": 0.79, "grad_norm": 0.7679678201675415, "learning_rate": 1.7282908874021415e-05, "loss": 0.0852, "step": 4720 }, { "epoch": 0.79, "grad_norm": 0.8482455611228943, "learning_rate": 1.7281671352568406e-05, "loss": 0.0803, "step": 4721 }, { "epoch": 0.79, "grad_norm": 0.6396328210830688, "learning_rate": 1.7280433593687268e-05, "loss": 0.0691, "step": 4722 }, { "epoch": 0.79, "grad_norm": 0.7588964700698853, "learning_rate": 1.727919559741836e-05, "loss": 0.0648, "step": 4723 }, { "epoch": 0.79, "grad_norm": 0.827343761920929, "learning_rate": 1.7277957363802052e-05, "loss": 0.088, "step": 4724 }, { "epoch": 0.79, "grad_norm": 0.7335355877876282, "learning_rate": 1.727671889287872e-05, "loss": 0.0838, "step": 4725 }, { "epoch": 0.79, "grad_norm": 0.9545019268989563, "learning_rate": 1.727548018468874e-05, "loss": 0.0855, "step": 4726 }, { "epoch": 0.79, "grad_norm": 0.9218949675559998, "learning_rate": 1.727424123927251e-05, "loss": 0.0889, "step": 4727 }, { "epoch": 0.79, "grad_norm": 0.8744357228279114, "learning_rate": 1.727300205667042e-05, "loss": 0.0865, "step": 4728 }, { "epoch": 0.79, "grad_norm": 0.7193215489387512, "learning_rate": 1.7271762636922873e-05, "loss": 0.0837, "step": 4729 }, { "epoch": 0.79, "grad_norm": 0.6286945939064026, "learning_rate": 1.727052298007029e-05, "loss": 0.0719, "step": 4730 }, { "epoch": 0.79, "grad_norm": 0.7882845401763916, "learning_rate": 1.7269283086153087e-05, "loss": 0.0864, "step": 4731 }, { "epoch": 0.79, "grad_norm": 0.8647074699401855, "learning_rate": 1.7268042955211692e-05, "loss": 0.095, "step": 4732 }, { "epoch": 0.79, "grad_norm": 1.043529987335205, "learning_rate": 1.726680258728654e-05, "loss": 0.1011, "step": 4733 }, { "epoch": 0.79, "grad_norm": 0.809201717376709, "learning_rate": 1.7265561982418082e-05, "loss": 0.1006, "step": 4734 }, { "epoch": 0.79, "grad_norm": 0.825715184211731, "learning_rate": 1.726432114064676e-05, "loss": 0.0864, "step": 4735 }, { "epoch": 0.79, "grad_norm": 0.6936352849006653, "learning_rate": 1.726308006201304e-05, "loss": 0.0649, "step": 4736 }, { "epoch": 0.79, "grad_norm": 0.828558623790741, "learning_rate": 1.7261838746557386e-05, "loss": 0.0609, "step": 4737 }, { "epoch": 0.79, "grad_norm": 0.6542869210243225, "learning_rate": 1.7260597194320272e-05, "loss": 0.068, "step": 4738 }, { "epoch": 0.79, "grad_norm": 0.9100255966186523, "learning_rate": 1.7259355405342182e-05, "loss": 0.1043, "step": 4739 }, { "epoch": 0.79, "grad_norm": 0.9501198530197144, "learning_rate": 1.7258113379663605e-05, "loss": 0.0867, "step": 4740 }, { "epoch": 0.79, "grad_norm": 0.687465488910675, "learning_rate": 1.725687111732504e-05, "loss": 0.0677, "step": 4741 }, { "epoch": 0.79, "grad_norm": 0.468048632144928, "learning_rate": 1.7255628618366992e-05, "loss": 0.064, "step": 4742 }, { "epoch": 0.79, "grad_norm": 0.6332291960716248, "learning_rate": 1.7254385882829973e-05, "loss": 0.0632, "step": 4743 }, { "epoch": 0.79, "grad_norm": 0.8668445348739624, "learning_rate": 1.7253142910754505e-05, "loss": 0.0775, "step": 4744 }, { "epoch": 0.79, "grad_norm": 0.6790481209754944, "learning_rate": 1.7251899702181122e-05, "loss": 0.0839, "step": 4745 }, { "epoch": 0.79, "grad_norm": 0.6324851512908936, "learning_rate": 1.725065625715035e-05, "loss": 0.0684, "step": 4746 }, { "epoch": 0.79, "grad_norm": 0.7866756916046143, "learning_rate": 1.7249412575702738e-05, "loss": 0.0735, "step": 4747 }, { "epoch": 0.79, "grad_norm": 0.6896975040435791, "learning_rate": 1.724816865787884e-05, "loss": 0.0707, "step": 4748 }, { "epoch": 0.79, "grad_norm": 0.7487788796424866, "learning_rate": 1.7246924503719213e-05, "loss": 0.0811, "step": 4749 }, { "epoch": 0.79, "grad_norm": 1.0957807302474976, "learning_rate": 1.7245680113264428e-05, "loss": 0.0976, "step": 4750 }, { "epoch": 0.79, "grad_norm": 0.8409658074378967, "learning_rate": 1.724443548655505e-05, "loss": 0.0995, "step": 4751 }, { "epoch": 0.79, "grad_norm": 0.5536360144615173, "learning_rate": 1.7243190623631677e-05, "loss": 0.0584, "step": 4752 }, { "epoch": 0.79, "grad_norm": 0.8164709210395813, "learning_rate": 1.7241945524534885e-05, "loss": 0.1114, "step": 4753 }, { "epoch": 0.8, "grad_norm": 0.7859660387039185, "learning_rate": 1.7240700189305276e-05, "loss": 0.0717, "step": 4754 }, { "epoch": 0.8, "grad_norm": 0.6181915402412415, "learning_rate": 1.7239454617983465e-05, "loss": 0.0886, "step": 4755 }, { "epoch": 0.8, "grad_norm": 0.7116958498954773, "learning_rate": 1.7238208810610052e-05, "loss": 0.0904, "step": 4756 }, { "epoch": 0.8, "grad_norm": 0.708199679851532, "learning_rate": 1.7236962767225666e-05, "loss": 0.0695, "step": 4757 }, { "epoch": 0.8, "grad_norm": 0.583010196685791, "learning_rate": 1.7235716487870932e-05, "loss": 0.0692, "step": 4758 }, { "epoch": 0.8, "grad_norm": 0.5840925574302673, "learning_rate": 1.723446997258649e-05, "loss": 0.0794, "step": 4759 }, { "epoch": 0.8, "grad_norm": 0.9680643677711487, "learning_rate": 1.7233223221412982e-05, "loss": 0.1144, "step": 4760 }, { "epoch": 0.8, "grad_norm": 0.7926366925239563, "learning_rate": 1.723197623439106e-05, "loss": 0.0702, "step": 4761 }, { "epoch": 0.8, "grad_norm": 0.6020233035087585, "learning_rate": 1.723072901156139e-05, "loss": 0.0715, "step": 4762 }, { "epoch": 0.8, "grad_norm": 0.75785893201828, "learning_rate": 1.7229481552964626e-05, "loss": 0.091, "step": 4763 }, { "epoch": 0.8, "grad_norm": 0.7709425687789917, "learning_rate": 1.7228233858641455e-05, "loss": 0.0784, "step": 4764 }, { "epoch": 0.8, "grad_norm": 0.7317882776260376, "learning_rate": 1.7226985928632553e-05, "loss": 0.0949, "step": 4765 }, { "epoch": 0.8, "grad_norm": 2.2346503734588623, "learning_rate": 1.7225737762978613e-05, "loss": 0.0769, "step": 4766 }, { "epoch": 0.8, "grad_norm": 0.8471499681472778, "learning_rate": 1.7224489361720332e-05, "loss": 0.0628, "step": 4767 }, { "epoch": 0.8, "grad_norm": 0.7051152586936951, "learning_rate": 1.7223240724898417e-05, "loss": 0.0616, "step": 4768 }, { "epoch": 0.8, "grad_norm": 0.5894469618797302, "learning_rate": 1.722199185255358e-05, "loss": 0.0645, "step": 4769 }, { "epoch": 0.8, "grad_norm": 0.7204229831695557, "learning_rate": 1.722074274472654e-05, "loss": 0.0993, "step": 4770 }, { "epoch": 0.8, "grad_norm": 0.6722187995910645, "learning_rate": 1.7219493401458032e-05, "loss": 0.0892, "step": 4771 }, { "epoch": 0.8, "grad_norm": 0.7817703485488892, "learning_rate": 1.7218243822788786e-05, "loss": 0.0865, "step": 4772 }, { "epoch": 0.8, "grad_norm": 0.8710528612136841, "learning_rate": 1.7216994008759552e-05, "loss": 0.0805, "step": 4773 }, { "epoch": 0.8, "grad_norm": 0.6388811469078064, "learning_rate": 1.7215743959411074e-05, "loss": 0.0732, "step": 4774 }, { "epoch": 0.8, "grad_norm": 0.630695641040802, "learning_rate": 1.7214493674784122e-05, "loss": 0.0876, "step": 4775 }, { "epoch": 0.8, "grad_norm": 0.7152488827705383, "learning_rate": 1.7213243154919454e-05, "loss": 0.076, "step": 4776 }, { "epoch": 0.8, "grad_norm": 0.9411439895629883, "learning_rate": 1.721199239985785e-05, "loss": 0.0932, "step": 4777 }, { "epoch": 0.8, "grad_norm": 0.7079170346260071, "learning_rate": 1.721074140964009e-05, "loss": 0.0761, "step": 4778 }, { "epoch": 0.8, "grad_norm": 0.7767558693885803, "learning_rate": 1.7209490184306957e-05, "loss": 0.0872, "step": 4779 }, { "epoch": 0.8, "grad_norm": 1.3285385370254517, "learning_rate": 1.7208238723899266e-05, "loss": 0.0892, "step": 4780 }, { "epoch": 0.8, "grad_norm": 0.6621845364570618, "learning_rate": 1.7206987028457807e-05, "loss": 0.078, "step": 4781 }, { "epoch": 0.8, "grad_norm": 0.7922054529190063, "learning_rate": 1.7205735098023406e-05, "loss": 0.0822, "step": 4782 }, { "epoch": 0.8, "grad_norm": 0.6751195788383484, "learning_rate": 1.720448293263687e-05, "loss": 0.0775, "step": 4783 }, { "epoch": 0.8, "grad_norm": 0.8248017430305481, "learning_rate": 1.7203230532339032e-05, "loss": 0.0658, "step": 4784 }, { "epoch": 0.8, "grad_norm": 0.5794811248779297, "learning_rate": 1.7201977897170738e-05, "loss": 0.0662, "step": 4785 }, { "epoch": 0.8, "grad_norm": 1.0319256782531738, "learning_rate": 1.7200725027172818e-05, "loss": 0.1082, "step": 4786 }, { "epoch": 0.8, "grad_norm": 2.048330068588257, "learning_rate": 1.719947192238613e-05, "loss": 0.1135, "step": 4787 }, { "epoch": 0.8, "grad_norm": 0.5082797408103943, "learning_rate": 1.7198218582851536e-05, "loss": 0.0842, "step": 4788 }, { "epoch": 0.8, "grad_norm": 0.655833899974823, "learning_rate": 1.7196965008609896e-05, "loss": 0.0827, "step": 4789 }, { "epoch": 0.8, "grad_norm": 0.6794308423995972, "learning_rate": 1.719571119970209e-05, "loss": 0.0909, "step": 4790 }, { "epoch": 0.8, "grad_norm": 1.1925657987594604, "learning_rate": 1.7194457156168994e-05, "loss": 0.0838, "step": 4791 }, { "epoch": 0.8, "grad_norm": 0.738767147064209, "learning_rate": 1.7193202878051506e-05, "loss": 0.1071, "step": 4792 }, { "epoch": 0.8, "grad_norm": 0.9004443287849426, "learning_rate": 1.7191948365390513e-05, "loss": 0.0911, "step": 4793 }, { "epoch": 0.8, "grad_norm": 0.7339502573013306, "learning_rate": 1.719069361822693e-05, "loss": 0.0872, "step": 4794 }, { "epoch": 0.8, "grad_norm": 0.7070730328559875, "learning_rate": 1.7189438636601665e-05, "loss": 0.0709, "step": 4795 }, { "epoch": 0.8, "grad_norm": 0.8577039837837219, "learning_rate": 1.718818342055564e-05, "loss": 0.0771, "step": 4796 }, { "epoch": 0.8, "grad_norm": 0.611293613910675, "learning_rate": 1.7186927970129776e-05, "loss": 0.0781, "step": 4797 }, { "epoch": 0.8, "grad_norm": 0.8219683170318604, "learning_rate": 1.7185672285365018e-05, "loss": 0.096, "step": 4798 }, { "epoch": 0.8, "grad_norm": 0.6594956517219543, "learning_rate": 1.7184416366302303e-05, "loss": 0.0927, "step": 4799 }, { "epoch": 0.8, "grad_norm": 0.6119162440299988, "learning_rate": 1.7183160212982585e-05, "loss": 0.0718, "step": 4800 }, { "epoch": 0.8, "grad_norm": 0.9542108774185181, "learning_rate": 1.718190382544682e-05, "loss": 0.0941, "step": 4801 }, { "epoch": 0.8, "grad_norm": 1.9971339702606201, "learning_rate": 1.718064720373598e-05, "loss": 0.106, "step": 4802 }, { "epoch": 0.8, "grad_norm": 0.5781491994857788, "learning_rate": 1.7179390347891027e-05, "loss": 0.0589, "step": 4803 }, { "epoch": 0.8, "grad_norm": 0.7485765814781189, "learning_rate": 1.7178133257952955e-05, "loss": 0.0814, "step": 4804 }, { "epoch": 0.8, "grad_norm": 0.8238335847854614, "learning_rate": 1.7176875933962746e-05, "loss": 0.126, "step": 4805 }, { "epoch": 0.8, "grad_norm": 0.6536183953285217, "learning_rate": 1.71756183759614e-05, "loss": 0.0743, "step": 4806 }, { "epoch": 0.8, "grad_norm": 1.0499049425125122, "learning_rate": 1.7174360583989918e-05, "loss": 0.0955, "step": 4807 }, { "epoch": 0.8, "grad_norm": 0.9250169992446899, "learning_rate": 1.7173102558089313e-05, "loss": 0.0751, "step": 4808 }, { "epoch": 0.8, "grad_norm": 0.5811768770217896, "learning_rate": 1.7171844298300607e-05, "loss": 0.0683, "step": 4809 }, { "epoch": 0.8, "grad_norm": 1.3719122409820557, "learning_rate": 1.717058580466482e-05, "loss": 0.0789, "step": 4810 }, { "epoch": 0.8, "grad_norm": 1.0852144956588745, "learning_rate": 1.7169327077223e-05, "loss": 0.0864, "step": 4811 }, { "epoch": 0.8, "grad_norm": 0.8965024948120117, "learning_rate": 1.7168068116016173e-05, "loss": 0.0688, "step": 4812 }, { "epoch": 0.8, "grad_norm": 0.8128953576087952, "learning_rate": 1.7166808921085404e-05, "loss": 0.0733, "step": 4813 }, { "epoch": 0.81, "grad_norm": 0.7745363712310791, "learning_rate": 1.716554949247174e-05, "loss": 0.1197, "step": 4814 }, { "epoch": 0.81, "grad_norm": 0.7039933204650879, "learning_rate": 1.716428983021625e-05, "loss": 0.0836, "step": 4815 }, { "epoch": 0.81, "grad_norm": 1.1160755157470703, "learning_rate": 1.7163029934360012e-05, "loss": 0.1078, "step": 4816 }, { "epoch": 0.81, "grad_norm": 0.6780837178230286, "learning_rate": 1.7161769804944098e-05, "loss": 0.078, "step": 4817 }, { "epoch": 0.81, "grad_norm": 0.9573500156402588, "learning_rate": 1.7160509442009602e-05, "loss": 0.0753, "step": 4818 }, { "epoch": 0.81, "grad_norm": 0.9869953393936157, "learning_rate": 1.7159248845597614e-05, "loss": 0.1119, "step": 4819 }, { "epoch": 0.81, "grad_norm": 0.9341111183166504, "learning_rate": 1.7157988015749244e-05, "loss": 0.0701, "step": 4820 }, { "epoch": 0.81, "grad_norm": 0.7274725437164307, "learning_rate": 1.7156726952505597e-05, "loss": 0.0788, "step": 4821 }, { "epoch": 0.81, "grad_norm": 0.6551872491836548, "learning_rate": 1.7155465655907796e-05, "loss": 0.0736, "step": 4822 }, { "epoch": 0.81, "grad_norm": 1.1834884881973267, "learning_rate": 1.7154204125996968e-05, "loss": 0.0751, "step": 4823 }, { "epoch": 0.81, "grad_norm": 0.7343172430992126, "learning_rate": 1.715294236281424e-05, "loss": 0.0894, "step": 4824 }, { "epoch": 0.81, "grad_norm": 0.5735587477684021, "learning_rate": 1.7151680366400763e-05, "loss": 0.0817, "step": 4825 }, { "epoch": 0.81, "grad_norm": 1.073356032371521, "learning_rate": 1.7150418136797675e-05, "loss": 0.0792, "step": 4826 }, { "epoch": 0.81, "grad_norm": 0.5802480578422546, "learning_rate": 1.714915567404614e-05, "loss": 0.0676, "step": 4827 }, { "epoch": 0.81, "grad_norm": 0.832974910736084, "learning_rate": 1.7147892978187323e-05, "loss": 0.0913, "step": 4828 }, { "epoch": 0.81, "grad_norm": 0.6904898881912231, "learning_rate": 1.714663004926239e-05, "loss": 0.0669, "step": 4829 }, { "epoch": 0.81, "grad_norm": 0.7186638116836548, "learning_rate": 1.7145366887312526e-05, "loss": 0.0559, "step": 4830 }, { "epoch": 0.81, "grad_norm": 1.9175952672958374, "learning_rate": 1.7144103492378916e-05, "loss": 0.1008, "step": 4831 }, { "epoch": 0.81, "grad_norm": 0.9409124851226807, "learning_rate": 1.7142839864502752e-05, "loss": 0.09, "step": 4832 }, { "epoch": 0.81, "grad_norm": 0.839548647403717, "learning_rate": 1.714157600372524e-05, "loss": 0.0862, "step": 4833 }, { "epoch": 0.81, "grad_norm": 1.009973168373108, "learning_rate": 1.7140311910087587e-05, "loss": 0.0856, "step": 4834 }, { "epoch": 0.81, "grad_norm": 0.7651566863059998, "learning_rate": 1.7139047583631015e-05, "loss": 0.0679, "step": 4835 }, { "epoch": 0.81, "grad_norm": 0.5945038795471191, "learning_rate": 1.7137783024396745e-05, "loss": 0.0701, "step": 4836 }, { "epoch": 0.81, "grad_norm": 0.8945736885070801, "learning_rate": 1.7136518232426007e-05, "loss": 0.0877, "step": 4837 }, { "epoch": 0.81, "grad_norm": 0.7737256288528442, "learning_rate": 1.7135253207760045e-05, "loss": 0.0742, "step": 4838 }, { "epoch": 0.81, "grad_norm": 0.7009397149085999, "learning_rate": 1.713398795044011e-05, "loss": 0.0647, "step": 4839 }, { "epoch": 0.81, "grad_norm": 0.9169483780860901, "learning_rate": 1.7132722460507446e-05, "loss": 0.075, "step": 4840 }, { "epoch": 0.81, "grad_norm": 0.7356328368186951, "learning_rate": 1.713145673800333e-05, "loss": 0.0703, "step": 4841 }, { "epoch": 0.81, "grad_norm": 1.6613500118255615, "learning_rate": 1.7130190782969023e-05, "loss": 0.0829, "step": 4842 }, { "epoch": 0.81, "grad_norm": 0.8309826850891113, "learning_rate": 1.712892459544581e-05, "loss": 0.0927, "step": 4843 }, { "epoch": 0.81, "grad_norm": 0.6232730150222778, "learning_rate": 1.7127658175474964e-05, "loss": 0.0623, "step": 4844 }, { "epoch": 0.81, "grad_norm": 0.8629278540611267, "learning_rate": 1.7126391523097795e-05, "loss": 0.0861, "step": 4845 }, { "epoch": 0.81, "grad_norm": 0.7614699006080627, "learning_rate": 1.712512463835559e-05, "loss": 0.0675, "step": 4846 }, { "epoch": 0.81, "grad_norm": 0.8237228989601135, "learning_rate": 1.7123857521289666e-05, "loss": 0.085, "step": 4847 }, { "epoch": 0.81, "grad_norm": 0.7711511850357056, "learning_rate": 1.7122590171941332e-05, "loss": 0.0843, "step": 4848 }, { "epoch": 0.81, "grad_norm": 1.2418122291564941, "learning_rate": 1.712132259035192e-05, "loss": 0.0829, "step": 4849 }, { "epoch": 0.81, "grad_norm": 1.1069241762161255, "learning_rate": 1.7120054776562757e-05, "loss": 0.0859, "step": 4850 }, { "epoch": 0.81, "grad_norm": 0.7769922018051147, "learning_rate": 1.7118786730615178e-05, "loss": 0.0809, "step": 4851 }, { "epoch": 0.81, "grad_norm": 0.6782005429267883, "learning_rate": 1.7117518452550533e-05, "loss": 0.0748, "step": 4852 }, { "epoch": 0.81, "grad_norm": 0.7379095554351807, "learning_rate": 1.7116249942410176e-05, "loss": 0.0794, "step": 4853 }, { "epoch": 0.81, "grad_norm": 0.8373317718505859, "learning_rate": 1.7114981200235464e-05, "loss": 0.0857, "step": 4854 }, { "epoch": 0.81, "grad_norm": 0.6394983530044556, "learning_rate": 1.7113712226067776e-05, "loss": 0.0704, "step": 4855 }, { "epoch": 0.81, "grad_norm": 0.7723267674446106, "learning_rate": 1.7112443019948476e-05, "loss": 0.064, "step": 4856 }, { "epoch": 0.81, "grad_norm": 0.6850736737251282, "learning_rate": 1.711117358191896e-05, "loss": 0.077, "step": 4857 }, { "epoch": 0.81, "grad_norm": 0.8339296579360962, "learning_rate": 1.710990391202061e-05, "loss": 0.1017, "step": 4858 }, { "epoch": 0.81, "grad_norm": 0.6939607262611389, "learning_rate": 1.710863401029483e-05, "loss": 0.0733, "step": 4859 }, { "epoch": 0.81, "grad_norm": 0.8052058219909668, "learning_rate": 1.7107363876783025e-05, "loss": 0.093, "step": 4860 }, { "epoch": 0.81, "grad_norm": 0.8344056606292725, "learning_rate": 1.710609351152661e-05, "loss": 0.0907, "step": 4861 }, { "epoch": 0.81, "grad_norm": 0.8617323637008667, "learning_rate": 1.710482291456701e-05, "loss": 0.0763, "step": 4862 }, { "epoch": 0.81, "grad_norm": 0.8826695680618286, "learning_rate": 1.7103552085945647e-05, "loss": 0.0942, "step": 4863 }, { "epoch": 0.81, "grad_norm": 1.0184119939804077, "learning_rate": 1.7102281025703966e-05, "loss": 0.0827, "step": 4864 }, { "epoch": 0.81, "grad_norm": 0.5755358338356018, "learning_rate": 1.7101009733883405e-05, "loss": 0.0723, "step": 4865 }, { "epoch": 0.81, "grad_norm": 0.890498697757721, "learning_rate": 1.709973821052542e-05, "loss": 0.0762, "step": 4866 }, { "epoch": 0.81, "grad_norm": 0.6628041863441467, "learning_rate": 1.709846645567147e-05, "loss": 0.0725, "step": 4867 }, { "epoch": 0.81, "grad_norm": 0.9385595321655273, "learning_rate": 1.709719446936302e-05, "loss": 0.0919, "step": 4868 }, { "epoch": 0.81, "grad_norm": 0.6051962971687317, "learning_rate": 1.709592225164155e-05, "loss": 0.0757, "step": 4869 }, { "epoch": 0.81, "grad_norm": 0.6625328063964844, "learning_rate": 1.7094649802548537e-05, "loss": 0.0948, "step": 4870 }, { "epoch": 0.81, "grad_norm": 0.8095815777778625, "learning_rate": 1.709337712212547e-05, "loss": 0.0684, "step": 4871 }, { "epoch": 0.81, "grad_norm": 0.8288064002990723, "learning_rate": 1.7092104210413853e-05, "loss": 0.0661, "step": 4872 }, { "epoch": 0.82, "grad_norm": 1.1875487565994263, "learning_rate": 1.7090831067455184e-05, "loss": 0.1024, "step": 4873 }, { "epoch": 0.82, "grad_norm": 0.660835862159729, "learning_rate": 1.7089557693290977e-05, "loss": 0.0832, "step": 4874 }, { "epoch": 0.82, "grad_norm": 0.7350305914878845, "learning_rate": 1.7088284087962755e-05, "loss": 0.0817, "step": 4875 }, { "epoch": 0.82, "grad_norm": 0.5710332989692688, "learning_rate": 1.7087010251512046e-05, "loss": 0.0812, "step": 4876 }, { "epoch": 0.82, "grad_norm": 0.7294913530349731, "learning_rate": 1.708573618398038e-05, "loss": 0.08, "step": 4877 }, { "epoch": 0.82, "grad_norm": 1.3333604335784912, "learning_rate": 1.7084461885409305e-05, "loss": 0.0622, "step": 4878 }, { "epoch": 0.82, "grad_norm": 1.080427646636963, "learning_rate": 1.7083187355840368e-05, "loss": 0.0854, "step": 4879 }, { "epoch": 0.82, "grad_norm": 0.8904407620429993, "learning_rate": 1.7081912595315124e-05, "loss": 0.1005, "step": 4880 }, { "epoch": 0.82, "grad_norm": 0.9657540321350098, "learning_rate": 1.7080637603875147e-05, "loss": 0.095, "step": 4881 }, { "epoch": 0.82, "grad_norm": 0.6772802472114563, "learning_rate": 1.7079362381562e-05, "loss": 0.0594, "step": 4882 }, { "epoch": 0.82, "grad_norm": 1.005447268486023, "learning_rate": 1.707808692841727e-05, "loss": 0.1109, "step": 4883 }, { "epoch": 0.82, "grad_norm": 0.8513481616973877, "learning_rate": 1.7076811244482537e-05, "loss": 0.0899, "step": 4884 }, { "epoch": 0.82, "grad_norm": 0.5737144351005554, "learning_rate": 1.7075535329799402e-05, "loss": 0.0589, "step": 4885 }, { "epoch": 0.82, "grad_norm": 0.7973966002464294, "learning_rate": 1.7074259184409472e-05, "loss": 0.1053, "step": 4886 }, { "epoch": 0.82, "grad_norm": 0.752548098564148, "learning_rate": 1.7072982808354348e-05, "loss": 0.088, "step": 4887 }, { "epoch": 0.82, "grad_norm": 0.7738847136497498, "learning_rate": 1.7071706201675656e-05, "loss": 0.083, "step": 4888 }, { "epoch": 0.82, "grad_norm": 0.5924791693687439, "learning_rate": 1.707042936441502e-05, "loss": 0.0752, "step": 4889 }, { "epoch": 0.82, "grad_norm": 0.6298345923423767, "learning_rate": 1.7069152296614063e-05, "loss": 0.0648, "step": 4890 }, { "epoch": 0.82, "grad_norm": 0.7893600463867188, "learning_rate": 1.7067874998314436e-05, "loss": 0.0935, "step": 4891 }, { "epoch": 0.82, "grad_norm": 0.9392815232276917, "learning_rate": 1.7066597469557785e-05, "loss": 0.0661, "step": 4892 }, { "epoch": 0.82, "grad_norm": 0.605273425579071, "learning_rate": 1.7065319710385763e-05, "loss": 0.0617, "step": 4893 }, { "epoch": 0.82, "grad_norm": 0.6099002957344055, "learning_rate": 1.7064041720840037e-05, "loss": 0.0791, "step": 4894 }, { "epoch": 0.82, "grad_norm": 0.7892453670501709, "learning_rate": 1.7062763500962272e-05, "loss": 0.1011, "step": 4895 }, { "epoch": 0.82, "grad_norm": 0.8511099815368652, "learning_rate": 1.706148505079415e-05, "loss": 0.0774, "step": 4896 }, { "epoch": 0.82, "grad_norm": 0.832580029964447, "learning_rate": 1.706020637037736e-05, "loss": 0.0705, "step": 4897 }, { "epoch": 0.82, "grad_norm": 0.6338300704956055, "learning_rate": 1.7058927459753584e-05, "loss": 0.082, "step": 4898 }, { "epoch": 0.82, "grad_norm": 0.6076798439025879, "learning_rate": 1.705764831896453e-05, "loss": 0.0659, "step": 4899 }, { "epoch": 0.82, "grad_norm": 0.600233793258667, "learning_rate": 1.7056368948051906e-05, "loss": 0.0699, "step": 4900 }, { "epoch": 0.82, "grad_norm": 0.8013719916343689, "learning_rate": 1.7055089347057427e-05, "loss": 0.1042, "step": 4901 }, { "epoch": 0.82, "grad_norm": 0.6319501399993896, "learning_rate": 1.7053809516022816e-05, "loss": 0.0838, "step": 4902 }, { "epoch": 0.82, "grad_norm": 0.7306362986564636, "learning_rate": 1.7052529454989805e-05, "loss": 0.0732, "step": 4903 }, { "epoch": 0.82, "grad_norm": 0.7539167404174805, "learning_rate": 1.705124916400012e-05, "loss": 0.0933, "step": 4904 }, { "epoch": 0.82, "grad_norm": 1.864492416381836, "learning_rate": 1.7049968643095525e-05, "loss": 0.079, "step": 4905 }, { "epoch": 0.82, "grad_norm": 0.9468814730644226, "learning_rate": 1.704868789231776e-05, "loss": 0.0825, "step": 4906 }, { "epoch": 0.82, "grad_norm": 1.150672197341919, "learning_rate": 1.7047406911708594e-05, "loss": 0.0872, "step": 4907 }, { "epoch": 0.82, "grad_norm": 0.5956262946128845, "learning_rate": 1.704612570130979e-05, "loss": 0.0993, "step": 4908 }, { "epoch": 0.82, "grad_norm": 0.7133823037147522, "learning_rate": 1.704484426116312e-05, "loss": 0.0735, "step": 4909 }, { "epoch": 0.82, "grad_norm": 0.7469522953033447, "learning_rate": 1.704356259131038e-05, "loss": 0.0919, "step": 4910 }, { "epoch": 0.82, "grad_norm": 0.7057955861091614, "learning_rate": 1.7042280691793346e-05, "loss": 0.0977, "step": 4911 }, { "epoch": 0.82, "grad_norm": 0.6296183466911316, "learning_rate": 1.7040998562653822e-05, "loss": 0.0712, "step": 4912 }, { "epoch": 0.82, "grad_norm": 0.7230401039123535, "learning_rate": 1.7039716203933613e-05, "loss": 0.0808, "step": 4913 }, { "epoch": 0.82, "grad_norm": 0.763335645198822, "learning_rate": 1.7038433615674534e-05, "loss": 0.0595, "step": 4914 }, { "epoch": 0.82, "grad_norm": 0.6885664463043213, "learning_rate": 1.7037150797918403e-05, "loss": 0.0655, "step": 4915 }, { "epoch": 0.82, "grad_norm": 0.9955548048019409, "learning_rate": 1.7035867750707046e-05, "loss": 0.0985, "step": 4916 }, { "epoch": 0.82, "grad_norm": 0.5638670325279236, "learning_rate": 1.7034584474082305e-05, "loss": 0.0623, "step": 4917 }, { "epoch": 0.82, "grad_norm": 0.9836651086807251, "learning_rate": 1.7033300968086016e-05, "loss": 0.0936, "step": 4918 }, { "epoch": 0.82, "grad_norm": 2.4712934494018555, "learning_rate": 1.703201723276003e-05, "loss": 0.0735, "step": 4919 }, { "epoch": 0.82, "grad_norm": 0.7953673601150513, "learning_rate": 1.7030733268146213e-05, "loss": 0.0825, "step": 4920 }, { "epoch": 0.82, "grad_norm": 1.2568249702453613, "learning_rate": 1.7029449074286423e-05, "loss": 0.0727, "step": 4921 }, { "epoch": 0.82, "grad_norm": 0.9773331880569458, "learning_rate": 1.702816465122253e-05, "loss": 0.0958, "step": 4922 }, { "epoch": 0.82, "grad_norm": 0.9031600952148438, "learning_rate": 1.7026879998996418e-05, "loss": 0.1111, "step": 4923 }, { "epoch": 0.82, "grad_norm": 0.5814361572265625, "learning_rate": 1.702559511764998e-05, "loss": 0.0935, "step": 4924 }, { "epoch": 0.82, "grad_norm": 0.6268376111984253, "learning_rate": 1.7024310007225106e-05, "loss": 0.0646, "step": 4925 }, { "epoch": 0.82, "grad_norm": 0.8028600811958313, "learning_rate": 1.702302466776369e-05, "loss": 0.079, "step": 4926 }, { "epoch": 0.82, "grad_norm": 1.0333071947097778, "learning_rate": 1.702173909930766e-05, "loss": 0.0972, "step": 4927 }, { "epoch": 0.82, "grad_norm": 0.7170370817184448, "learning_rate": 1.702045330189892e-05, "loss": 0.0734, "step": 4928 }, { "epoch": 0.82, "grad_norm": 0.8526982665061951, "learning_rate": 1.7019167275579402e-05, "loss": 0.0985, "step": 4929 }, { "epoch": 0.82, "grad_norm": 0.7702510356903076, "learning_rate": 1.7017881020391034e-05, "loss": 0.0797, "step": 4930 }, { "epoch": 0.82, "grad_norm": 0.6487959623336792, "learning_rate": 1.7016594536375763e-05, "loss": 0.0672, "step": 4931 }, { "epoch": 0.82, "grad_norm": 1.0793322324752808, "learning_rate": 1.7015307823575524e-05, "loss": 0.1009, "step": 4932 }, { "epoch": 0.83, "grad_norm": 0.9783657193183899, "learning_rate": 1.7014020882032282e-05, "loss": 0.0785, "step": 4933 }, { "epoch": 0.83, "grad_norm": 0.5525414347648621, "learning_rate": 1.7012733711787997e-05, "loss": 0.0674, "step": 4934 }, { "epoch": 0.83, "grad_norm": 0.7721003293991089, "learning_rate": 1.701144631288464e-05, "loss": 0.0715, "step": 4935 }, { "epoch": 0.83, "grad_norm": 0.6235769987106323, "learning_rate": 1.7010158685364187e-05, "loss": 0.0777, "step": 4936 }, { "epoch": 0.83, "grad_norm": 0.6463533043861389, "learning_rate": 1.7008870829268624e-05, "loss": 0.0598, "step": 4937 }, { "epoch": 0.83, "grad_norm": 0.7615863680839539, "learning_rate": 1.700758274463994e-05, "loss": 0.075, "step": 4938 }, { "epoch": 0.83, "grad_norm": 0.6583293080329895, "learning_rate": 1.7006294431520132e-05, "loss": 0.0702, "step": 4939 }, { "epoch": 0.83, "grad_norm": 0.8626818060874939, "learning_rate": 1.7005005889951217e-05, "loss": 0.0741, "step": 4940 }, { "epoch": 0.83, "grad_norm": 0.677710235118866, "learning_rate": 1.70037171199752e-05, "loss": 0.0592, "step": 4941 }, { "epoch": 0.83, "grad_norm": 0.7834033370018005, "learning_rate": 1.7002428121634112e-05, "loss": 0.0675, "step": 4942 }, { "epoch": 0.83, "grad_norm": 1.0343354940414429, "learning_rate": 1.7001138894969973e-05, "loss": 0.1032, "step": 4943 }, { "epoch": 0.83, "grad_norm": 0.7538549900054932, "learning_rate": 1.6999849440024824e-05, "loss": 0.0772, "step": 4944 }, { "epoch": 0.83, "grad_norm": 0.7126469016075134, "learning_rate": 1.6998559756840714e-05, "loss": 0.0896, "step": 4945 }, { "epoch": 0.83, "grad_norm": 0.6105203628540039, "learning_rate": 1.6997269845459688e-05, "loss": 0.073, "step": 4946 }, { "epoch": 0.83, "grad_norm": 0.7708622813224792, "learning_rate": 1.6995979705923808e-05, "loss": 0.0775, "step": 4947 }, { "epoch": 0.83, "grad_norm": 0.4742097854614258, "learning_rate": 1.6994689338275135e-05, "loss": 0.0608, "step": 4948 }, { "epoch": 0.83, "grad_norm": 0.6757234930992126, "learning_rate": 1.6993398742555757e-05, "loss": 0.0793, "step": 4949 }, { "epoch": 0.83, "grad_norm": 1.0091323852539062, "learning_rate": 1.699210791880774e-05, "loss": 0.084, "step": 4950 }, { "epoch": 0.83, "grad_norm": 0.6475536227226257, "learning_rate": 1.6990816867073185e-05, "loss": 0.0772, "step": 4951 }, { "epoch": 0.83, "grad_norm": 0.656696081161499, "learning_rate": 1.6989525587394176e-05, "loss": 0.0785, "step": 4952 }, { "epoch": 0.83, "grad_norm": 0.7718027830123901, "learning_rate": 1.6988234079812832e-05, "loss": 0.0748, "step": 4953 }, { "epoch": 0.83, "grad_norm": 1.4672067165374756, "learning_rate": 1.698694234437125e-05, "loss": 0.0612, "step": 4954 }, { "epoch": 0.83, "grad_norm": 0.7171961665153503, "learning_rate": 1.6985650381111557e-05, "loss": 0.0899, "step": 4955 }, { "epoch": 0.83, "grad_norm": 0.7939091324806213, "learning_rate": 1.6984358190075875e-05, "loss": 0.0725, "step": 4956 }, { "epoch": 0.83, "grad_norm": 0.6745585799217224, "learning_rate": 1.6983065771306343e-05, "loss": 0.0709, "step": 4957 }, { "epoch": 0.83, "grad_norm": 0.6167616248130798, "learning_rate": 1.6981773124845094e-05, "loss": 0.0762, "step": 4958 }, { "epoch": 0.83, "grad_norm": 0.670257031917572, "learning_rate": 1.6980480250734288e-05, "loss": 0.0732, "step": 4959 }, { "epoch": 0.83, "grad_norm": 0.7276984453201294, "learning_rate": 1.6979187149016066e-05, "loss": 0.0898, "step": 4960 }, { "epoch": 0.83, "grad_norm": 0.7980970740318298, "learning_rate": 1.6977893819732604e-05, "loss": 0.0776, "step": 4961 }, { "epoch": 0.83, "grad_norm": 0.7182255983352661, "learning_rate": 1.6976600262926063e-05, "loss": 0.0819, "step": 4962 }, { "epoch": 0.83, "grad_norm": 0.8599918484687805, "learning_rate": 1.697530647863863e-05, "loss": 0.1027, "step": 4963 }, { "epoch": 0.83, "grad_norm": 0.8404242992401123, "learning_rate": 1.6974012466912482e-05, "loss": 0.0769, "step": 4964 }, { "epoch": 0.83, "grad_norm": 0.7091131210327148, "learning_rate": 1.6972718227789816e-05, "loss": 0.0755, "step": 4965 }, { "epoch": 0.83, "grad_norm": 0.9049817323684692, "learning_rate": 1.6971423761312835e-05, "loss": 0.0923, "step": 4966 }, { "epoch": 0.83, "grad_norm": 0.8643588423728943, "learning_rate": 1.6970129067523743e-05, "loss": 0.0878, "step": 4967 }, { "epoch": 0.83, "grad_norm": 0.8089569211006165, "learning_rate": 1.6968834146464755e-05, "loss": 0.0814, "step": 4968 }, { "epoch": 0.83, "grad_norm": 0.7059643268585205, "learning_rate": 1.6967538998178096e-05, "loss": 0.0812, "step": 4969 }, { "epoch": 0.83, "grad_norm": 0.7924268245697021, "learning_rate": 1.6966243622705993e-05, "loss": 0.0906, "step": 4970 }, { "epoch": 0.83, "grad_norm": 0.6528181433677673, "learning_rate": 1.6964948020090685e-05, "loss": 0.0805, "step": 4971 }, { "epoch": 0.83, "grad_norm": 0.669798731803894, "learning_rate": 1.6963652190374416e-05, "loss": 0.0669, "step": 4972 }, { "epoch": 0.83, "grad_norm": 0.9183904528617859, "learning_rate": 1.696235613359944e-05, "loss": 0.0807, "step": 4973 }, { "epoch": 0.83, "grad_norm": 0.6287607550621033, "learning_rate": 1.6961059849808014e-05, "loss": 0.0833, "step": 4974 }, { "epoch": 0.83, "grad_norm": 0.9039124846458435, "learning_rate": 1.695976333904241e-05, "loss": 0.0828, "step": 4975 }, { "epoch": 0.83, "grad_norm": 0.8108989000320435, "learning_rate": 1.6958466601344896e-05, "loss": 0.0673, "step": 4976 }, { "epoch": 0.83, "grad_norm": 0.729198694229126, "learning_rate": 1.6957169636757756e-05, "loss": 0.0774, "step": 4977 }, { "epoch": 0.83, "grad_norm": 0.7502338290214539, "learning_rate": 1.695587244532328e-05, "loss": 0.0636, "step": 4978 }, { "epoch": 0.83, "grad_norm": 0.6722752451896667, "learning_rate": 1.6954575027083764e-05, "loss": 0.0699, "step": 4979 }, { "epoch": 0.83, "grad_norm": 0.8273719549179077, "learning_rate": 1.6953277382081515e-05, "loss": 0.066, "step": 4980 }, { "epoch": 0.83, "grad_norm": 0.4717636704444885, "learning_rate": 1.6951979510358837e-05, "loss": 0.0748, "step": 4981 }, { "epoch": 0.83, "grad_norm": 0.6976234912872314, "learning_rate": 1.6950681411958053e-05, "loss": 0.0651, "step": 4982 }, { "epoch": 0.83, "grad_norm": 0.7705563306808472, "learning_rate": 1.6949383086921495e-05, "loss": 0.065, "step": 4983 }, { "epoch": 0.83, "grad_norm": 0.6675629615783691, "learning_rate": 1.6948084535291488e-05, "loss": 0.0981, "step": 4984 }, { "epoch": 0.83, "grad_norm": 0.6185959577560425, "learning_rate": 1.694678575711038e-05, "loss": 0.067, "step": 4985 }, { "epoch": 0.83, "grad_norm": 0.7819051146507263, "learning_rate": 1.6945486752420506e-05, "loss": 0.0869, "step": 4986 }, { "epoch": 0.83, "grad_norm": 1.351302146911621, "learning_rate": 1.6944187521264236e-05, "loss": 0.0781, "step": 4987 }, { "epoch": 0.83, "grad_norm": 0.748506486415863, "learning_rate": 1.694288806368393e-05, "loss": 0.0818, "step": 4988 }, { "epoch": 0.83, "grad_norm": 0.8291387557983398, "learning_rate": 1.6941588379721958e-05, "loss": 0.0912, "step": 4989 }, { "epoch": 0.83, "grad_norm": 0.946360170841217, "learning_rate": 1.694028846942069e-05, "loss": 0.106, "step": 4990 }, { "epoch": 0.83, "grad_norm": 0.8454190492630005, "learning_rate": 1.693898833282252e-05, "loss": 0.0809, "step": 4991 }, { "epoch": 0.83, "grad_norm": 0.6140562295913696, "learning_rate": 1.693768796996984e-05, "loss": 0.066, "step": 4992 }, { "epoch": 0.84, "grad_norm": 0.9405034780502319, "learning_rate": 1.6936387380905045e-05, "loss": 0.088, "step": 4993 }, { "epoch": 0.84, "grad_norm": 0.636836588382721, "learning_rate": 1.693508656567055e-05, "loss": 0.0684, "step": 4994 }, { "epoch": 0.84, "grad_norm": 0.7581771016120911, "learning_rate": 1.693378552430876e-05, "loss": 0.083, "step": 4995 }, { "epoch": 0.84, "grad_norm": 0.701531708240509, "learning_rate": 1.6932484256862105e-05, "loss": 0.0953, "step": 4996 }, { "epoch": 0.84, "grad_norm": 0.8677437901496887, "learning_rate": 1.693118276337301e-05, "loss": 0.0788, "step": 4997 }, { "epoch": 0.84, "grad_norm": 0.6292547583580017, "learning_rate": 1.6929881043883916e-05, "loss": 0.0855, "step": 4998 }, { "epoch": 0.84, "grad_norm": 0.5446681380271912, "learning_rate": 1.6928579098437265e-05, "loss": 0.0957, "step": 4999 }, { "epoch": 0.84, "grad_norm": 0.5461327433586121, "learning_rate": 1.6927276927075508e-05, "loss": 0.0636, "step": 5000 }, { "epoch": 0.84, "grad_norm": 0.6385879516601562, "learning_rate": 1.6925974529841103e-05, "loss": 0.0731, "step": 5001 }, { "epoch": 0.84, "grad_norm": 0.6115297079086304, "learning_rate": 1.6924671906776517e-05, "loss": 0.0755, "step": 5002 }, { "epoch": 0.84, "grad_norm": 0.6377688646316528, "learning_rate": 1.6923369057924226e-05, "loss": 0.0615, "step": 5003 }, { "epoch": 0.84, "grad_norm": 0.571022093296051, "learning_rate": 1.692206598332671e-05, "loss": 0.0668, "step": 5004 }, { "epoch": 0.84, "grad_norm": 0.9217655062675476, "learning_rate": 1.6920762683026455e-05, "loss": 0.0707, "step": 5005 }, { "epoch": 0.84, "grad_norm": 0.6248024702072144, "learning_rate": 1.691945915706596e-05, "loss": 0.0525, "step": 5006 }, { "epoch": 0.84, "grad_norm": 0.9687361717224121, "learning_rate": 1.6918155405487727e-05, "loss": 0.0909, "step": 5007 }, { "epoch": 0.84, "grad_norm": 0.6684622764587402, "learning_rate": 1.6916851428334268e-05, "loss": 0.0609, "step": 5008 }, { "epoch": 0.84, "grad_norm": 0.5591751933097839, "learning_rate": 1.6915547225648096e-05, "loss": 0.0932, "step": 5009 }, { "epoch": 0.84, "grad_norm": 0.6284738779067993, "learning_rate": 1.6914242797471735e-05, "loss": 0.0758, "step": 5010 }, { "epoch": 0.84, "grad_norm": 0.8675960302352905, "learning_rate": 1.691293814384773e-05, "loss": 0.0942, "step": 5011 }, { "epoch": 0.84, "grad_norm": 0.5748511552810669, "learning_rate": 1.6911633264818606e-05, "loss": 0.0761, "step": 5012 }, { "epoch": 0.84, "grad_norm": 0.6479445099830627, "learning_rate": 1.6910328160426922e-05, "loss": 0.0684, "step": 5013 }, { "epoch": 0.84, "grad_norm": 0.8170279860496521, "learning_rate": 1.6909022830715224e-05, "loss": 0.0862, "step": 5014 }, { "epoch": 0.84, "grad_norm": 0.7864373922348022, "learning_rate": 1.6907717275726077e-05, "loss": 0.069, "step": 5015 }, { "epoch": 0.84, "grad_norm": 0.6659027338027954, "learning_rate": 1.6906411495502055e-05, "loss": 0.0801, "step": 5016 }, { "epoch": 0.84, "grad_norm": 2.007930278778076, "learning_rate": 1.6905105490085727e-05, "loss": 0.0913, "step": 5017 }, { "epoch": 0.84, "grad_norm": 0.6427311897277832, "learning_rate": 1.6903799259519677e-05, "loss": 0.0625, "step": 5018 }, { "epoch": 0.84, "grad_norm": 0.6065215468406677, "learning_rate": 1.6902492803846502e-05, "loss": 0.0728, "step": 5019 }, { "epoch": 0.84, "grad_norm": 0.4761725664138794, "learning_rate": 1.69011861231088e-05, "loss": 0.055, "step": 5020 }, { "epoch": 0.84, "grad_norm": 1.8554757833480835, "learning_rate": 1.6899879217349176e-05, "loss": 0.0923, "step": 5021 }, { "epoch": 0.84, "grad_norm": 0.5312740802764893, "learning_rate": 1.689857208661024e-05, "loss": 0.0519, "step": 5022 }, { "epoch": 0.84, "grad_norm": 0.6334168910980225, "learning_rate": 1.6897264730934617e-05, "loss": 0.0674, "step": 5023 }, { "epoch": 0.84, "grad_norm": 0.739372193813324, "learning_rate": 1.689595715036493e-05, "loss": 0.0806, "step": 5024 }, { "epoch": 0.84, "grad_norm": 0.8248491883277893, "learning_rate": 1.689464934494382e-05, "loss": 0.0844, "step": 5025 }, { "epoch": 0.84, "grad_norm": 0.7606717348098755, "learning_rate": 1.6893341314713928e-05, "loss": 0.0794, "step": 5026 }, { "epoch": 0.84, "grad_norm": 0.9433539509773254, "learning_rate": 1.6892033059717897e-05, "loss": 0.1122, "step": 5027 }, { "epoch": 0.84, "grad_norm": 1.039272665977478, "learning_rate": 1.6890724579998397e-05, "loss": 0.0805, "step": 5028 }, { "epoch": 0.84, "grad_norm": 0.7429170608520508, "learning_rate": 1.6889415875598082e-05, "loss": 0.0803, "step": 5029 }, { "epoch": 0.84, "grad_norm": 0.6463636755943298, "learning_rate": 1.6888106946559632e-05, "loss": 0.0769, "step": 5030 }, { "epoch": 0.84, "grad_norm": 1.1081771850585938, "learning_rate": 1.6886797792925717e-05, "loss": 0.0737, "step": 5031 }, { "epoch": 0.84, "grad_norm": 0.5968837141990662, "learning_rate": 1.688548841473903e-05, "loss": 0.0595, "step": 5032 }, { "epoch": 0.84, "grad_norm": 0.7008680701255798, "learning_rate": 1.688417881204227e-05, "loss": 0.0805, "step": 5033 }, { "epoch": 0.84, "grad_norm": 0.7782245874404907, "learning_rate": 1.6882868984878123e-05, "loss": 0.0711, "step": 5034 }, { "epoch": 0.84, "grad_norm": 0.7355011105537415, "learning_rate": 1.6881558933289314e-05, "loss": 0.0792, "step": 5035 }, { "epoch": 0.84, "grad_norm": 0.8226422071456909, "learning_rate": 1.688024865731855e-05, "loss": 0.0944, "step": 5036 }, { "epoch": 0.84, "grad_norm": 0.5832789540290833, "learning_rate": 1.6878938157008554e-05, "loss": 0.0821, "step": 5037 }, { "epoch": 0.84, "grad_norm": 0.833314836025238, "learning_rate": 1.6877627432402056e-05, "loss": 0.0559, "step": 5038 }, { "epoch": 0.84, "grad_norm": 0.8136623501777649, "learning_rate": 1.68763164835418e-05, "loss": 0.0822, "step": 5039 }, { "epoch": 0.84, "grad_norm": 0.9985839128494263, "learning_rate": 1.6875005310470525e-05, "loss": 0.1233, "step": 5040 }, { "epoch": 0.84, "grad_norm": 0.7453720569610596, "learning_rate": 1.6873693913230983e-05, "loss": 0.0718, "step": 5041 }, { "epoch": 0.84, "grad_norm": 0.6088612079620361, "learning_rate": 1.6872382291865943e-05, "loss": 0.0832, "step": 5042 }, { "epoch": 0.84, "grad_norm": 0.987188994884491, "learning_rate": 1.6871070446418164e-05, "loss": 0.0876, "step": 5043 }, { "epoch": 0.84, "grad_norm": 0.9203979969024658, "learning_rate": 1.6869758376930418e-05, "loss": 0.0954, "step": 5044 }, { "epoch": 0.84, "grad_norm": 0.6982521414756775, "learning_rate": 1.6868446083445493e-05, "loss": 0.0867, "step": 5045 }, { "epoch": 0.84, "grad_norm": 0.61861652135849, "learning_rate": 1.686713356600618e-05, "loss": 0.0835, "step": 5046 }, { "epoch": 0.84, "grad_norm": 0.7541865110397339, "learning_rate": 1.6865820824655264e-05, "loss": 0.0922, "step": 5047 }, { "epoch": 0.84, "grad_norm": 0.7203717231750488, "learning_rate": 1.6864507859435556e-05, "loss": 0.1012, "step": 5048 }, { "epoch": 0.84, "grad_norm": 0.5740472674369812, "learning_rate": 1.686319467038987e-05, "loss": 0.0836, "step": 5049 }, { "epoch": 0.84, "grad_norm": 0.6199268102645874, "learning_rate": 1.6861881257561018e-05, "loss": 0.0836, "step": 5050 }, { "epoch": 0.84, "grad_norm": 0.6343309879302979, "learning_rate": 1.686056762099183e-05, "loss": 0.0683, "step": 5051 }, { "epoch": 0.84, "grad_norm": 0.7008465528488159, "learning_rate": 1.6859253760725136e-05, "loss": 0.068, "step": 5052 }, { "epoch": 0.85, "grad_norm": 0.6372484564781189, "learning_rate": 1.6857939676803777e-05, "loss": 0.0915, "step": 5053 }, { "epoch": 0.85, "grad_norm": 0.6549018621444702, "learning_rate": 1.6856625369270602e-05, "loss": 0.0676, "step": 5054 }, { "epoch": 0.85, "grad_norm": 0.7325875759124756, "learning_rate": 1.685531083816846e-05, "loss": 0.0698, "step": 5055 }, { "epoch": 0.85, "grad_norm": 1.064322829246521, "learning_rate": 1.6853996083540217e-05, "loss": 0.0913, "step": 5056 }, { "epoch": 0.85, "grad_norm": 0.5129755139350891, "learning_rate": 1.6852681105428748e-05, "loss": 0.0738, "step": 5057 }, { "epoch": 0.85, "grad_norm": 0.43255987763404846, "learning_rate": 1.685136590387692e-05, "loss": 0.0619, "step": 5058 }, { "epoch": 0.85, "grad_norm": 0.6748932003974915, "learning_rate": 1.6850050478927617e-05, "loss": 0.0587, "step": 5059 }, { "epoch": 0.85, "grad_norm": 1.1032644510269165, "learning_rate": 1.6848734830623737e-05, "loss": 0.1022, "step": 5060 }, { "epoch": 0.85, "grad_norm": 0.7610539197921753, "learning_rate": 1.684741895900818e-05, "loss": 0.0853, "step": 5061 }, { "epoch": 0.85, "grad_norm": 1.2350904941558838, "learning_rate": 1.684610286412384e-05, "loss": 0.1033, "step": 5062 }, { "epoch": 0.85, "grad_norm": 0.7370057702064514, "learning_rate": 1.684478654601364e-05, "loss": 0.0725, "step": 5063 }, { "epoch": 0.85, "grad_norm": 0.6094563007354736, "learning_rate": 1.6843470004720494e-05, "loss": 0.0791, "step": 5064 }, { "epoch": 0.85, "grad_norm": 0.8152024149894714, "learning_rate": 1.6842153240287334e-05, "loss": 0.1044, "step": 5065 }, { "epoch": 0.85, "grad_norm": 0.7580138444900513, "learning_rate": 1.6840836252757093e-05, "loss": 0.0798, "step": 5066 }, { "epoch": 0.85, "grad_norm": 0.6665176749229431, "learning_rate": 1.683951904217271e-05, "loss": 0.0643, "step": 5067 }, { "epoch": 0.85, "grad_norm": 1.2512418031692505, "learning_rate": 1.6838201608577144e-05, "loss": 0.0741, "step": 5068 }, { "epoch": 0.85, "grad_norm": 0.7212514877319336, "learning_rate": 1.683688395201334e-05, "loss": 0.0859, "step": 5069 }, { "epoch": 0.85, "grad_norm": 0.5091173648834229, "learning_rate": 1.6835566072524268e-05, "loss": 0.0485, "step": 5070 }, { "epoch": 0.85, "grad_norm": 0.7759990692138672, "learning_rate": 1.68342479701529e-05, "loss": 0.0584, "step": 5071 }, { "epoch": 0.85, "grad_norm": 0.9053118824958801, "learning_rate": 1.6832929644942208e-05, "loss": 0.0647, "step": 5072 }, { "epoch": 0.85, "grad_norm": 0.6079359650611877, "learning_rate": 1.6831611096935186e-05, "loss": 0.0729, "step": 5073 }, { "epoch": 0.85, "grad_norm": 0.6493949890136719, "learning_rate": 1.6830292326174822e-05, "loss": 0.0723, "step": 5074 }, { "epoch": 0.85, "grad_norm": 0.862964391708374, "learning_rate": 1.6828973332704117e-05, "loss": 0.0797, "step": 5075 }, { "epoch": 0.85, "grad_norm": 0.7406294941902161, "learning_rate": 1.6827654116566078e-05, "loss": 0.0634, "step": 5076 }, { "epoch": 0.85, "grad_norm": 0.8306417465209961, "learning_rate": 1.682633467780372e-05, "loss": 0.0864, "step": 5077 }, { "epoch": 0.85, "grad_norm": 0.9434652924537659, "learning_rate": 1.682501501646007e-05, "loss": 0.1011, "step": 5078 }, { "epoch": 0.85, "grad_norm": 1.3816559314727783, "learning_rate": 1.682369513257815e-05, "loss": 0.0895, "step": 5079 }, { "epoch": 0.85, "grad_norm": 0.6208773255348206, "learning_rate": 1.6822375026200997e-05, "loss": 0.0676, "step": 5080 }, { "epoch": 0.85, "grad_norm": 0.797870397567749, "learning_rate": 1.6821054697371656e-05, "loss": 0.0735, "step": 5081 }, { "epoch": 0.85, "grad_norm": 0.6615751385688782, "learning_rate": 1.681973414613318e-05, "loss": 0.0763, "step": 5082 }, { "epoch": 0.85, "grad_norm": 0.9578794240951538, "learning_rate": 1.681841337252863e-05, "loss": 0.087, "step": 5083 }, { "epoch": 0.85, "grad_norm": 0.5426303148269653, "learning_rate": 1.6817092376601064e-05, "loss": 0.0722, "step": 5084 }, { "epoch": 0.85, "grad_norm": 0.7731760740280151, "learning_rate": 1.6815771158393562e-05, "loss": 0.0846, "step": 5085 }, { "epoch": 0.85, "grad_norm": 1.049599289894104, "learning_rate": 1.68144497179492e-05, "loss": 0.0826, "step": 5086 }, { "epoch": 0.85, "grad_norm": 0.660810112953186, "learning_rate": 1.6813128055311058e-05, "loss": 0.0786, "step": 5087 }, { "epoch": 0.85, "grad_norm": 0.8448794484138489, "learning_rate": 1.6811806170522244e-05, "loss": 0.0801, "step": 5088 }, { "epoch": 0.85, "grad_norm": 0.7957385182380676, "learning_rate": 1.6810484063625853e-05, "loss": 0.0886, "step": 5089 }, { "epoch": 0.85, "grad_norm": 2.1041347980499268, "learning_rate": 1.6809161734664992e-05, "loss": 0.0934, "step": 5090 }, { "epoch": 0.85, "grad_norm": 0.6458122134208679, "learning_rate": 1.6807839183682783e-05, "loss": 0.0728, "step": 5091 }, { "epoch": 0.85, "grad_norm": 0.8240588903427124, "learning_rate": 1.6806516410722345e-05, "loss": 0.064, "step": 5092 }, { "epoch": 0.85, "grad_norm": 0.6049485206604004, "learning_rate": 1.6805193415826814e-05, "loss": 0.064, "step": 5093 }, { "epoch": 0.85, "grad_norm": 0.8425576686859131, "learning_rate": 1.680387019903932e-05, "loss": 0.0688, "step": 5094 }, { "epoch": 0.85, "grad_norm": 1.0213189125061035, "learning_rate": 1.6802546760403015e-05, "loss": 0.1045, "step": 5095 }, { "epoch": 0.85, "grad_norm": 0.7916008830070496, "learning_rate": 1.6801223099961045e-05, "loss": 0.0798, "step": 5096 }, { "epoch": 0.85, "grad_norm": 0.7704256176948547, "learning_rate": 1.6799899217756577e-05, "loss": 0.0671, "step": 5097 }, { "epoch": 0.85, "grad_norm": 0.7569260001182556, "learning_rate": 1.679857511383277e-05, "loss": 0.0827, "step": 5098 }, { "epoch": 0.85, "grad_norm": 0.6632531881332397, "learning_rate": 1.6797250788232807e-05, "loss": 0.0682, "step": 5099 }, { "epoch": 0.85, "grad_norm": 0.7256683707237244, "learning_rate": 1.679592624099986e-05, "loss": 0.0868, "step": 5100 }, { "epoch": 0.85, "grad_norm": 0.6465203762054443, "learning_rate": 1.6794601472177127e-05, "loss": 0.0741, "step": 5101 }, { "epoch": 0.85, "grad_norm": 0.9729951024055481, "learning_rate": 1.6793276481807792e-05, "loss": 0.0792, "step": 5102 }, { "epoch": 0.85, "grad_norm": 0.8028579354286194, "learning_rate": 1.679195126993507e-05, "loss": 0.0749, "step": 5103 }, { "epoch": 0.85, "grad_norm": 0.6311137676239014, "learning_rate": 1.6790625836602163e-05, "loss": 0.0744, "step": 5104 }, { "epoch": 0.85, "grad_norm": 0.761193037033081, "learning_rate": 1.6789300181852294e-05, "loss": 0.0843, "step": 5105 }, { "epoch": 0.85, "grad_norm": 0.6346054077148438, "learning_rate": 1.6787974305728683e-05, "loss": 0.0681, "step": 5106 }, { "epoch": 0.85, "grad_norm": 0.6243395805358887, "learning_rate": 1.6786648208274564e-05, "loss": 0.0698, "step": 5107 }, { "epoch": 0.85, "grad_norm": 0.5181125402450562, "learning_rate": 1.678532188953318e-05, "loss": 0.071, "step": 5108 }, { "epoch": 0.85, "grad_norm": 0.747279942035675, "learning_rate": 1.6783995349547767e-05, "loss": 0.0988, "step": 5109 }, { "epoch": 0.85, "grad_norm": 1.0056769847869873, "learning_rate": 1.678266858836159e-05, "loss": 0.081, "step": 5110 }, { "epoch": 0.85, "grad_norm": 0.7114883065223694, "learning_rate": 1.67813416060179e-05, "loss": 0.0722, "step": 5111 }, { "epoch": 0.85, "grad_norm": 0.806652843952179, "learning_rate": 1.678001440255997e-05, "loss": 0.0913, "step": 5112 }, { "epoch": 0.86, "grad_norm": 0.7843213081359863, "learning_rate": 1.677868697803108e-05, "loss": 0.084, "step": 5113 }, { "epoch": 0.86, "grad_norm": 0.7369728684425354, "learning_rate": 1.6777359332474504e-05, "loss": 0.0808, "step": 5114 }, { "epoch": 0.86, "grad_norm": 1.1607847213745117, "learning_rate": 1.6776031465933537e-05, "loss": 0.0819, "step": 5115 }, { "epoch": 0.86, "grad_norm": 0.6181356310844421, "learning_rate": 1.6774703378451467e-05, "loss": 0.094, "step": 5116 }, { "epoch": 0.86, "grad_norm": 0.5890963077545166, "learning_rate": 1.677337507007161e-05, "loss": 0.0751, "step": 5117 }, { "epoch": 0.86, "grad_norm": 0.7908966541290283, "learning_rate": 1.677204654083727e-05, "loss": 0.0855, "step": 5118 }, { "epoch": 0.86, "grad_norm": 0.657237708568573, "learning_rate": 1.6770717790791767e-05, "loss": 0.0738, "step": 5119 }, { "epoch": 0.86, "grad_norm": 1.4056686162948608, "learning_rate": 1.6769388819978427e-05, "loss": 0.0729, "step": 5120 }, { "epoch": 0.86, "grad_norm": 0.7437232136726379, "learning_rate": 1.6768059628440582e-05, "loss": 0.1046, "step": 5121 }, { "epoch": 0.86, "grad_norm": 0.6886544227600098, "learning_rate": 1.676673021622157e-05, "loss": 0.0889, "step": 5122 }, { "epoch": 0.86, "grad_norm": 0.9712440967559814, "learning_rate": 1.676540058336474e-05, "loss": 0.0909, "step": 5123 }, { "epoch": 0.86, "grad_norm": 0.7285502552986145, "learning_rate": 1.676407072991345e-05, "loss": 0.078, "step": 5124 }, { "epoch": 0.86, "grad_norm": 0.6275630593299866, "learning_rate": 1.676274065591106e-05, "loss": 0.0651, "step": 5125 }, { "epoch": 0.86, "grad_norm": 0.6456653475761414, "learning_rate": 1.676141036140093e-05, "loss": 0.0595, "step": 5126 }, { "epoch": 0.86, "grad_norm": 0.8045451045036316, "learning_rate": 1.6760079846426453e-05, "loss": 0.0851, "step": 5127 }, { "epoch": 0.86, "grad_norm": 0.9414423108100891, "learning_rate": 1.6758749111030993e-05, "loss": 0.1013, "step": 5128 }, { "epoch": 0.86, "grad_norm": 0.805080235004425, "learning_rate": 1.6757418155257953e-05, "loss": 0.0687, "step": 5129 }, { "epoch": 0.86, "grad_norm": 0.7096648812294006, "learning_rate": 1.6756086979150726e-05, "loss": 0.0592, "step": 5130 }, { "epoch": 0.86, "grad_norm": 0.7634415030479431, "learning_rate": 1.675475558275272e-05, "loss": 0.0598, "step": 5131 }, { "epoch": 0.86, "grad_norm": 0.5825778841972351, "learning_rate": 1.6753423966107342e-05, "loss": 0.0674, "step": 5132 }, { "epoch": 0.86, "grad_norm": 0.7613228559494019, "learning_rate": 1.6752092129258014e-05, "loss": 0.0865, "step": 5133 }, { "epoch": 0.86, "grad_norm": 0.6737871170043945, "learning_rate": 1.6750760072248163e-05, "loss": 0.0921, "step": 5134 }, { "epoch": 0.86, "grad_norm": 0.8307148814201355, "learning_rate": 1.674942779512122e-05, "loss": 0.09, "step": 5135 }, { "epoch": 0.86, "grad_norm": 0.7108442187309265, "learning_rate": 1.674809529792063e-05, "loss": 0.0899, "step": 5136 }, { "epoch": 0.86, "grad_norm": 0.5735564827919006, "learning_rate": 1.6746762580689833e-05, "loss": 0.0561, "step": 5137 }, { "epoch": 0.86, "grad_norm": 0.9028509855270386, "learning_rate": 1.6745429643472292e-05, "loss": 0.1027, "step": 5138 }, { "epoch": 0.86, "grad_norm": 0.6887441277503967, "learning_rate": 1.6744096486311465e-05, "loss": 0.0794, "step": 5139 }, { "epoch": 0.86, "grad_norm": 0.9158569574356079, "learning_rate": 1.6742763109250818e-05, "loss": 0.0821, "step": 5140 }, { "epoch": 0.86, "grad_norm": 0.7594628930091858, "learning_rate": 1.6741429512333836e-05, "loss": 0.0848, "step": 5141 }, { "epoch": 0.86, "grad_norm": 0.770301103591919, "learning_rate": 1.6740095695603997e-05, "loss": 0.0776, "step": 5142 }, { "epoch": 0.86, "grad_norm": 0.6286722421646118, "learning_rate": 1.6738761659104793e-05, "loss": 0.0775, "step": 5143 }, { "epoch": 0.86, "grad_norm": 0.9456760287284851, "learning_rate": 1.673742740287972e-05, "loss": 0.1289, "step": 5144 }, { "epoch": 0.86, "grad_norm": 0.7417073249816895, "learning_rate": 1.6736092926972283e-05, "loss": 0.0833, "step": 5145 }, { "epoch": 0.86, "grad_norm": 0.664622962474823, "learning_rate": 1.6734758231426e-05, "loss": 0.0847, "step": 5146 }, { "epoch": 0.86, "grad_norm": 0.6791138052940369, "learning_rate": 1.6733423316284384e-05, "loss": 0.1018, "step": 5147 }, { "epoch": 0.86, "grad_norm": 1.1315269470214844, "learning_rate": 1.6732088181590968e-05, "loss": 0.0911, "step": 5148 }, { "epoch": 0.86, "grad_norm": 0.6868062019348145, "learning_rate": 1.673075282738928e-05, "loss": 0.0563, "step": 5149 }, { "epoch": 0.86, "grad_norm": 0.5245023369789124, "learning_rate": 1.672941725372286e-05, "loss": 0.0682, "step": 5150 }, { "epoch": 0.86, "grad_norm": 0.6250229477882385, "learning_rate": 1.6728081460635258e-05, "loss": 0.0708, "step": 5151 }, { "epoch": 0.86, "grad_norm": 0.8554136157035828, "learning_rate": 1.6726745448170035e-05, "loss": 0.0987, "step": 5152 }, { "epoch": 0.86, "grad_norm": 0.820161759853363, "learning_rate": 1.6725409216370743e-05, "loss": 0.0999, "step": 5153 }, { "epoch": 0.86, "grad_norm": 0.7396658658981323, "learning_rate": 1.6724072765280964e-05, "loss": 0.0833, "step": 5154 }, { "epoch": 0.86, "grad_norm": 0.6286609172821045, "learning_rate": 1.6722736094944262e-05, "loss": 0.0772, "step": 5155 }, { "epoch": 0.86, "grad_norm": 0.5349915623664856, "learning_rate": 1.672139920540423e-05, "loss": 0.0566, "step": 5156 }, { "epoch": 0.86, "grad_norm": 0.6997877955436707, "learning_rate": 1.6720062096704453e-05, "loss": 0.072, "step": 5157 }, { "epoch": 0.86, "grad_norm": 0.5458827614784241, "learning_rate": 1.6718724768888532e-05, "loss": 0.0589, "step": 5158 }, { "epoch": 0.86, "grad_norm": 0.6879476308822632, "learning_rate": 1.6717387222000073e-05, "loss": 0.0786, "step": 5159 }, { "epoch": 0.86, "grad_norm": 1.02435302734375, "learning_rate": 1.671604945608269e-05, "loss": 0.1089, "step": 5160 }, { "epoch": 0.86, "grad_norm": 0.6891131401062012, "learning_rate": 1.6714711471179995e-05, "loss": 0.0767, "step": 5161 }, { "epoch": 0.86, "grad_norm": 0.5627931356430054, "learning_rate": 1.671337326733562e-05, "loss": 0.0675, "step": 5162 }, { "epoch": 0.86, "grad_norm": 0.6486125588417053, "learning_rate": 1.6712034844593203e-05, "loss": 0.0667, "step": 5163 }, { "epoch": 0.86, "grad_norm": 0.7958359718322754, "learning_rate": 1.6710696202996374e-05, "loss": 0.0933, "step": 5164 }, { "epoch": 0.86, "grad_norm": 0.6467710733413696, "learning_rate": 1.670935734258879e-05, "loss": 0.0559, "step": 5165 }, { "epoch": 0.86, "grad_norm": 0.5999149680137634, "learning_rate": 1.6708018263414107e-05, "loss": 0.0726, "step": 5166 }, { "epoch": 0.86, "grad_norm": 0.6150429844856262, "learning_rate": 1.6706678965515977e-05, "loss": 0.0755, "step": 5167 }, { "epoch": 0.86, "grad_norm": 0.7917792201042175, "learning_rate": 1.670533944893808e-05, "loss": 0.0563, "step": 5168 }, { "epoch": 0.86, "grad_norm": 0.7547169327735901, "learning_rate": 1.6703999713724092e-05, "loss": 0.0807, "step": 5169 }, { "epoch": 0.86, "grad_norm": 1.069595217704773, "learning_rate": 1.6702659759917692e-05, "loss": 0.0684, "step": 5170 }, { "epoch": 0.86, "grad_norm": 0.8492429852485657, "learning_rate": 1.6701319587562572e-05, "loss": 0.0781, "step": 5171 }, { "epoch": 0.87, "grad_norm": 0.5698192119598389, "learning_rate": 1.669997919670243e-05, "loss": 0.0598, "step": 5172 }, { "epoch": 0.87, "grad_norm": 0.7488822340965271, "learning_rate": 1.6698638587380978e-05, "loss": 0.0745, "step": 5173 }, { "epoch": 0.87, "grad_norm": 0.9224265813827515, "learning_rate": 1.6697297759641917e-05, "loss": 0.1153, "step": 5174 }, { "epoch": 0.87, "grad_norm": 0.7674142122268677, "learning_rate": 1.669595671352897e-05, "loss": 0.089, "step": 5175 }, { "epoch": 0.87, "grad_norm": 0.58614581823349, "learning_rate": 1.669461544908587e-05, "loss": 0.0703, "step": 5176 }, { "epoch": 0.87, "grad_norm": 0.5719634890556335, "learning_rate": 1.6693273966356346e-05, "loss": 0.0674, "step": 5177 }, { "epoch": 0.87, "grad_norm": 0.7341902256011963, "learning_rate": 1.6691932265384133e-05, "loss": 0.0999, "step": 5178 }, { "epoch": 0.87, "grad_norm": 0.5999396443367004, "learning_rate": 1.669059034621299e-05, "loss": 0.0714, "step": 5179 }, { "epoch": 0.87, "grad_norm": 0.6094653010368347, "learning_rate": 1.6689248208886664e-05, "loss": 0.0546, "step": 5180 }, { "epoch": 0.87, "grad_norm": 0.6815565824508667, "learning_rate": 1.6687905853448917e-05, "loss": 0.098, "step": 5181 }, { "epoch": 0.87, "grad_norm": 1.0385371446609497, "learning_rate": 1.6686563279943528e-05, "loss": 0.1218, "step": 5182 }, { "epoch": 0.87, "grad_norm": 0.6512495279312134, "learning_rate": 1.668522048841426e-05, "loss": 0.0767, "step": 5183 }, { "epoch": 0.87, "grad_norm": 0.4828273355960846, "learning_rate": 1.6683877478904905e-05, "loss": 0.055, "step": 5184 }, { "epoch": 0.87, "grad_norm": 0.6118106842041016, "learning_rate": 1.668253425145925e-05, "loss": 0.0769, "step": 5185 }, { "epoch": 0.87, "grad_norm": 0.8605585694313049, "learning_rate": 1.668119080612109e-05, "loss": 0.0609, "step": 5186 }, { "epoch": 0.87, "grad_norm": 0.5788450241088867, "learning_rate": 1.667984714293424e-05, "loss": 0.0647, "step": 5187 }, { "epoch": 0.87, "grad_norm": 0.5647663474082947, "learning_rate": 1.66785032619425e-05, "loss": 0.0816, "step": 5188 }, { "epoch": 0.87, "grad_norm": 0.7931719422340393, "learning_rate": 1.6677159163189694e-05, "loss": 0.0891, "step": 5189 }, { "epoch": 0.87, "grad_norm": 0.7091003060340881, "learning_rate": 1.6675814846719652e-05, "loss": 0.0598, "step": 5190 }, { "epoch": 0.87, "grad_norm": 0.9944799542427063, "learning_rate": 1.6674470312576204e-05, "loss": 0.0871, "step": 5191 }, { "epoch": 0.87, "grad_norm": 0.6514475345611572, "learning_rate": 1.6673125560803184e-05, "loss": 0.0925, "step": 5192 }, { "epoch": 0.87, "grad_norm": 0.608214259147644, "learning_rate": 1.667178059144445e-05, "loss": 0.0689, "step": 5193 }, { "epoch": 0.87, "grad_norm": 0.5704972147941589, "learning_rate": 1.6670435404543848e-05, "loss": 0.0819, "step": 5194 }, { "epoch": 0.87, "grad_norm": 0.7816498875617981, "learning_rate": 1.6669090000145244e-05, "loss": 0.0812, "step": 5195 }, { "epoch": 0.87, "grad_norm": 0.8959964513778687, "learning_rate": 1.6667744378292504e-05, "loss": 0.1145, "step": 5196 }, { "epoch": 0.87, "grad_norm": 0.5938454866409302, "learning_rate": 1.666639853902951e-05, "loss": 0.0722, "step": 5197 }, { "epoch": 0.87, "grad_norm": 0.6920198202133179, "learning_rate": 1.6665052482400133e-05, "loss": 0.0795, "step": 5198 }, { "epoch": 0.87, "grad_norm": 0.5757547616958618, "learning_rate": 1.6663706208448273e-05, "loss": 0.0728, "step": 5199 }, { "epoch": 0.87, "grad_norm": 0.607605516910553, "learning_rate": 1.6662359717217826e-05, "loss": 0.073, "step": 5200 }, { "epoch": 0.87, "grad_norm": 0.6384401321411133, "learning_rate": 1.6661013008752694e-05, "loss": 0.0758, "step": 5201 }, { "epoch": 0.87, "grad_norm": 0.8493096828460693, "learning_rate": 1.6659666083096784e-05, "loss": 0.1011, "step": 5202 }, { "epoch": 0.87, "grad_norm": 0.6927019953727722, "learning_rate": 1.665831894029402e-05, "loss": 0.0952, "step": 5203 }, { "epoch": 0.87, "grad_norm": 0.7473151087760925, "learning_rate": 1.6656971580388325e-05, "loss": 0.0841, "step": 5204 }, { "epoch": 0.87, "grad_norm": 0.5269164443016052, "learning_rate": 1.6655624003423635e-05, "loss": 0.0741, "step": 5205 }, { "epoch": 0.87, "grad_norm": 0.6370682716369629, "learning_rate": 1.6654276209443884e-05, "loss": 0.0682, "step": 5206 }, { "epoch": 0.87, "grad_norm": 0.5831012725830078, "learning_rate": 1.6652928198493017e-05, "loss": 0.0675, "step": 5207 }, { "epoch": 0.87, "grad_norm": 0.559527575969696, "learning_rate": 1.6651579970615e-05, "loss": 0.0636, "step": 5208 }, { "epoch": 0.87, "grad_norm": 0.5727801322937012, "learning_rate": 1.6650231525853778e-05, "loss": 0.0732, "step": 5209 }, { "epoch": 0.87, "grad_norm": 0.712474524974823, "learning_rate": 1.664888286425333e-05, "loss": 0.0861, "step": 5210 }, { "epoch": 0.87, "grad_norm": 0.6271874904632568, "learning_rate": 1.6647533985857625e-05, "loss": 0.0658, "step": 5211 }, { "epoch": 0.87, "grad_norm": 0.5199757218360901, "learning_rate": 1.6646184890710648e-05, "loss": 0.0664, "step": 5212 }, { "epoch": 0.87, "grad_norm": 0.8349776864051819, "learning_rate": 1.6644835578856383e-05, "loss": 0.0871, "step": 5213 }, { "epoch": 0.87, "grad_norm": 0.5918932557106018, "learning_rate": 1.6643486050338835e-05, "loss": 0.0779, "step": 5214 }, { "epoch": 0.87, "grad_norm": 0.6304992437362671, "learning_rate": 1.6642136305202e-05, "loss": 0.0806, "step": 5215 }, { "epoch": 0.87, "grad_norm": 0.7023440003395081, "learning_rate": 1.664078634348989e-05, "loss": 0.0861, "step": 5216 }, { "epoch": 0.87, "grad_norm": 0.891193151473999, "learning_rate": 1.6639436165246516e-05, "loss": 0.0915, "step": 5217 }, { "epoch": 0.87, "grad_norm": 0.7736336588859558, "learning_rate": 1.6638085770515916e-05, "loss": 0.1016, "step": 5218 }, { "epoch": 0.87, "grad_norm": 0.720192551612854, "learning_rate": 1.663673515934211e-05, "loss": 0.068, "step": 5219 }, { "epoch": 0.87, "grad_norm": 0.6723824143409729, "learning_rate": 1.663538433176914e-05, "loss": 0.077, "step": 5220 }, { "epoch": 0.87, "grad_norm": 0.5165495276451111, "learning_rate": 1.663403328784105e-05, "loss": 0.0711, "step": 5221 }, { "epoch": 0.87, "grad_norm": 0.8761051297187805, "learning_rate": 1.66326820276019e-05, "loss": 0.0802, "step": 5222 }, { "epoch": 0.87, "grad_norm": 0.6440725922584534, "learning_rate": 1.6631330551095736e-05, "loss": 0.0665, "step": 5223 }, { "epoch": 0.87, "grad_norm": 0.6005043983459473, "learning_rate": 1.662997885836664e-05, "loss": 0.0708, "step": 5224 }, { "epoch": 0.87, "grad_norm": 0.6063329577445984, "learning_rate": 1.662862694945867e-05, "loss": 0.0794, "step": 5225 }, { "epoch": 0.87, "grad_norm": 0.9816546440124512, "learning_rate": 1.662727482441592e-05, "loss": 0.0738, "step": 5226 }, { "epoch": 0.87, "grad_norm": 0.5513136386871338, "learning_rate": 1.662592248328247e-05, "loss": 0.0549, "step": 5227 }, { "epoch": 0.87, "grad_norm": 0.5424852967262268, "learning_rate": 1.6624569926102418e-05, "loss": 0.0643, "step": 5228 }, { "epoch": 0.87, "grad_norm": 0.571164071559906, "learning_rate": 1.6623217152919866e-05, "loss": 0.0889, "step": 5229 }, { "epoch": 0.87, "grad_norm": 0.5678236484527588, "learning_rate": 1.662186416377892e-05, "loss": 0.0654, "step": 5230 }, { "epoch": 0.87, "grad_norm": 0.5959117412567139, "learning_rate": 1.6620510958723696e-05, "loss": 0.0955, "step": 5231 }, { "epoch": 0.88, "grad_norm": 0.6817308068275452, "learning_rate": 1.6619157537798322e-05, "loss": 0.0817, "step": 5232 }, { "epoch": 0.88, "grad_norm": 0.5822732448577881, "learning_rate": 1.6617803901046924e-05, "loss": 0.0673, "step": 5233 }, { "epoch": 0.88, "grad_norm": 0.8358700275421143, "learning_rate": 1.661645004851364e-05, "loss": 0.0913, "step": 5234 }, { "epoch": 0.88, "grad_norm": 0.6136894226074219, "learning_rate": 1.661509598024261e-05, "loss": 0.0792, "step": 5235 }, { "epoch": 0.88, "grad_norm": 0.8858882188796997, "learning_rate": 1.6613741696277995e-05, "loss": 0.0869, "step": 5236 }, { "epoch": 0.88, "grad_norm": 0.6229445934295654, "learning_rate": 1.6612387196663942e-05, "loss": 0.0788, "step": 5237 }, { "epoch": 0.88, "grad_norm": 0.820531964302063, "learning_rate": 1.661103248144462e-05, "loss": 0.082, "step": 5238 }, { "epoch": 0.88, "grad_norm": 0.6053138971328735, "learning_rate": 1.6609677550664208e-05, "loss": 0.0677, "step": 5239 }, { "epoch": 0.88, "grad_norm": 0.6391909122467041, "learning_rate": 1.660832240436688e-05, "loss": 0.0627, "step": 5240 }, { "epoch": 0.88, "grad_norm": 0.678412914276123, "learning_rate": 1.6606967042596818e-05, "loss": 0.0689, "step": 5241 }, { "epoch": 0.88, "grad_norm": 0.7361013293266296, "learning_rate": 1.660561146539822e-05, "loss": 0.0736, "step": 5242 }, { "epoch": 0.88, "grad_norm": 0.6705554723739624, "learning_rate": 1.6604255672815285e-05, "loss": 0.0681, "step": 5243 }, { "epoch": 0.88, "grad_norm": 0.6052228808403015, "learning_rate": 1.6602899664892224e-05, "loss": 0.081, "step": 5244 }, { "epoch": 0.88, "grad_norm": 0.704942524433136, "learning_rate": 1.6601543441673246e-05, "loss": 0.08, "step": 5245 }, { "epoch": 0.88, "grad_norm": 0.7229135036468506, "learning_rate": 1.6600187003202576e-05, "loss": 0.076, "step": 5246 }, { "epoch": 0.88, "grad_norm": 0.7214256525039673, "learning_rate": 1.659883034952444e-05, "loss": 0.0941, "step": 5247 }, { "epoch": 0.88, "grad_norm": 0.5938209295272827, "learning_rate": 1.6597473480683074e-05, "loss": 0.0575, "step": 5248 }, { "epoch": 0.88, "grad_norm": 0.7603389024734497, "learning_rate": 1.659611639672272e-05, "loss": 0.094, "step": 5249 }, { "epoch": 0.88, "grad_norm": 0.6017431616783142, "learning_rate": 1.6594759097687632e-05, "loss": 0.0735, "step": 5250 }, { "epoch": 0.88, "grad_norm": 0.9246655702590942, "learning_rate": 1.659340158362206e-05, "loss": 0.0887, "step": 5251 }, { "epoch": 0.88, "grad_norm": 0.5684270262718201, "learning_rate": 1.6592043854570267e-05, "loss": 0.0604, "step": 5252 }, { "epoch": 0.88, "grad_norm": 0.4875974953174591, "learning_rate": 1.659068591057653e-05, "loss": 0.0666, "step": 5253 }, { "epoch": 0.88, "grad_norm": 0.614639163017273, "learning_rate": 1.6589327751685126e-05, "loss": 0.088, "step": 5254 }, { "epoch": 0.88, "grad_norm": 0.48016026616096497, "learning_rate": 1.6587969377940334e-05, "loss": 0.0626, "step": 5255 }, { "epoch": 0.88, "grad_norm": 0.656789243221283, "learning_rate": 1.658661078938645e-05, "loss": 0.0835, "step": 5256 }, { "epoch": 0.88, "grad_norm": 0.5180070400238037, "learning_rate": 1.658525198606777e-05, "loss": 0.0558, "step": 5257 }, { "epoch": 0.88, "grad_norm": 0.939117431640625, "learning_rate": 1.6583892968028595e-05, "loss": 0.0688, "step": 5258 }, { "epoch": 0.88, "grad_norm": 1.0031861066818237, "learning_rate": 1.6582533735313248e-05, "loss": 0.0774, "step": 5259 }, { "epoch": 0.88, "grad_norm": 0.5747939348220825, "learning_rate": 1.658117428796604e-05, "loss": 0.052, "step": 5260 }, { "epoch": 0.88, "grad_norm": 0.5955926775932312, "learning_rate": 1.6579814626031306e-05, "loss": 0.0616, "step": 5261 }, { "epoch": 0.88, "grad_norm": 0.8173959851264954, "learning_rate": 1.657845474955337e-05, "loss": 0.0759, "step": 5262 }, { "epoch": 0.88, "grad_norm": 0.884200930595398, "learning_rate": 1.6577094658576574e-05, "loss": 0.1056, "step": 5263 }, { "epoch": 0.88, "grad_norm": 0.6030626893043518, "learning_rate": 1.6575734353145275e-05, "loss": 0.0751, "step": 5264 }, { "epoch": 0.88, "grad_norm": 1.2412095069885254, "learning_rate": 1.6574373833303815e-05, "loss": 0.0768, "step": 5265 }, { "epoch": 0.88, "grad_norm": 0.8370862007141113, "learning_rate": 1.6573013099096567e-05, "loss": 0.0952, "step": 5266 }, { "epoch": 0.88, "grad_norm": 0.9693639278411865, "learning_rate": 1.657165215056789e-05, "loss": 0.095, "step": 5267 }, { "epoch": 0.88, "grad_norm": 1.2461777925491333, "learning_rate": 1.6570290987762164e-05, "loss": 0.0897, "step": 5268 }, { "epoch": 0.88, "grad_norm": 0.4701043963432312, "learning_rate": 1.6568929610723768e-05, "loss": 0.0515, "step": 5269 }, { "epoch": 0.88, "grad_norm": 0.620661199092865, "learning_rate": 1.6567568019497095e-05, "loss": 0.0811, "step": 5270 }, { "epoch": 0.88, "grad_norm": 0.6117709875106812, "learning_rate": 1.6566206214126545e-05, "loss": 0.0894, "step": 5271 }, { "epoch": 0.88, "grad_norm": 0.6469999551773071, "learning_rate": 1.656484419465651e-05, "loss": 0.09, "step": 5272 }, { "epoch": 0.88, "grad_norm": 0.6445299983024597, "learning_rate": 1.656348196113141e-05, "loss": 0.0879, "step": 5273 }, { "epoch": 0.88, "grad_norm": 0.5938782095909119, "learning_rate": 1.6562119513595663e-05, "loss": 0.0883, "step": 5274 }, { "epoch": 0.88, "grad_norm": 0.6383653283119202, "learning_rate": 1.6560756852093683e-05, "loss": 0.0877, "step": 5275 }, { "epoch": 0.88, "grad_norm": 0.8604979515075684, "learning_rate": 1.655939397666991e-05, "loss": 0.0863, "step": 5276 }, { "epoch": 0.88, "grad_norm": 0.7270466685295105, "learning_rate": 1.6558030887368783e-05, "loss": 0.0981, "step": 5277 }, { "epoch": 0.88, "grad_norm": 0.6052990555763245, "learning_rate": 1.6556667584234745e-05, "loss": 0.0607, "step": 5278 }, { "epoch": 0.88, "grad_norm": 0.9569981098175049, "learning_rate": 1.6555304067312246e-05, "loss": 0.0983, "step": 5279 }, { "epoch": 0.88, "grad_norm": 0.8112359046936035, "learning_rate": 1.6553940336645747e-05, "loss": 0.0975, "step": 5280 }, { "epoch": 0.88, "grad_norm": 0.5200640559196472, "learning_rate": 1.6552576392279714e-05, "loss": 0.066, "step": 5281 }, { "epoch": 0.88, "grad_norm": 0.6858423948287964, "learning_rate": 1.655121223425862e-05, "loss": 0.0965, "step": 5282 }, { "epoch": 0.88, "grad_norm": 0.7325052618980408, "learning_rate": 1.6549847862626946e-05, "loss": 0.0849, "step": 5283 }, { "epoch": 0.88, "grad_norm": 0.7074587941169739, "learning_rate": 1.6548483277429178e-05, "loss": 0.0715, "step": 5284 }, { "epoch": 0.88, "grad_norm": 0.6132060885429382, "learning_rate": 1.6547118478709813e-05, "loss": 0.0835, "step": 5285 }, { "epoch": 0.88, "grad_norm": 0.9087380170822144, "learning_rate": 1.6545753466513348e-05, "loss": 0.0765, "step": 5286 }, { "epoch": 0.88, "grad_norm": 0.9728269577026367, "learning_rate": 1.654438824088429e-05, "loss": 0.1069, "step": 5287 }, { "epoch": 0.88, "grad_norm": 0.5076737403869629, "learning_rate": 1.6543022801867163e-05, "loss": 0.0575, "step": 5288 }, { "epoch": 0.88, "grad_norm": 0.9789949059486389, "learning_rate": 1.6541657149506476e-05, "loss": 0.1099, "step": 5289 }, { "epoch": 0.88, "grad_norm": 0.6523011922836304, "learning_rate": 1.654029128384677e-05, "loss": 0.0682, "step": 5290 }, { "epoch": 0.88, "grad_norm": 0.648041844367981, "learning_rate": 1.6538925204932564e-05, "loss": 0.0747, "step": 5291 }, { "epoch": 0.89, "grad_norm": 0.7423474192619324, "learning_rate": 1.6537558912808424e-05, "loss": 0.0893, "step": 5292 }, { "epoch": 0.89, "grad_norm": 0.6733304262161255, "learning_rate": 1.653619240751888e-05, "loss": 0.0963, "step": 5293 }, { "epoch": 0.89, "grad_norm": 0.6463368535041809, "learning_rate": 1.6534825689108497e-05, "loss": 0.0666, "step": 5294 }, { "epoch": 0.89, "grad_norm": 0.6126164793968201, "learning_rate": 1.6533458757621842e-05, "loss": 0.061, "step": 5295 }, { "epoch": 0.89, "grad_norm": 0.6641249656677246, "learning_rate": 1.6532091613103477e-05, "loss": 0.0732, "step": 5296 }, { "epoch": 0.89, "grad_norm": 0.8415684700012207, "learning_rate": 1.6530724255597982e-05, "loss": 0.0839, "step": 5297 }, { "epoch": 0.89, "grad_norm": 0.6714141368865967, "learning_rate": 1.6529356685149943e-05, "loss": 0.0837, "step": 5298 }, { "epoch": 0.89, "grad_norm": 0.6047099232673645, "learning_rate": 1.6527988901803957e-05, "loss": 0.0764, "step": 5299 }, { "epoch": 0.89, "grad_norm": 0.8716290593147278, "learning_rate": 1.6526620905604615e-05, "loss": 0.0838, "step": 5300 }, { "epoch": 0.89, "grad_norm": 0.652132511138916, "learning_rate": 1.652525269659652e-05, "loss": 0.0776, "step": 5301 }, { "epoch": 0.89, "grad_norm": 0.5665686726570129, "learning_rate": 1.6523884274824287e-05, "loss": 0.0603, "step": 5302 }, { "epoch": 0.89, "grad_norm": 0.48364579677581787, "learning_rate": 1.652251564033254e-05, "loss": 0.0652, "step": 5303 }, { "epoch": 0.89, "grad_norm": 0.8511363863945007, "learning_rate": 1.6521146793165903e-05, "loss": 0.0972, "step": 5304 }, { "epoch": 0.89, "grad_norm": 0.7667664885520935, "learning_rate": 1.6519777733369e-05, "loss": 0.0756, "step": 5305 }, { "epoch": 0.89, "grad_norm": 0.8230410814285278, "learning_rate": 1.6518408460986487e-05, "loss": 0.0794, "step": 5306 }, { "epoch": 0.89, "grad_norm": 0.7006301879882812, "learning_rate": 1.6517038976063e-05, "loss": 0.0663, "step": 5307 }, { "epoch": 0.89, "grad_norm": 0.7029027342796326, "learning_rate": 1.651566927864319e-05, "loss": 0.0712, "step": 5308 }, { "epoch": 0.89, "grad_norm": 0.5762102603912354, "learning_rate": 1.6514299368771727e-05, "loss": 0.0779, "step": 5309 }, { "epoch": 0.89, "grad_norm": 0.5045766830444336, "learning_rate": 1.651292924649327e-05, "loss": 0.0808, "step": 5310 }, { "epoch": 0.89, "grad_norm": 0.7157463431358337, "learning_rate": 1.6511558911852504e-05, "loss": 0.0929, "step": 5311 }, { "epoch": 0.89, "grad_norm": 0.6128275394439697, "learning_rate": 1.65101883648941e-05, "loss": 0.0756, "step": 5312 }, { "epoch": 0.89, "grad_norm": 0.7956307530403137, "learning_rate": 1.650881760566275e-05, "loss": 0.079, "step": 5313 }, { "epoch": 0.89, "grad_norm": 0.8615260720252991, "learning_rate": 1.6507446634203152e-05, "loss": 0.1007, "step": 5314 }, { "epoch": 0.89, "grad_norm": 0.671286940574646, "learning_rate": 1.6506075450560006e-05, "loss": 0.0727, "step": 5315 }, { "epoch": 0.89, "grad_norm": 0.5749092698097229, "learning_rate": 1.6504704054778026e-05, "loss": 0.0676, "step": 5316 }, { "epoch": 0.89, "grad_norm": 0.6448858976364136, "learning_rate": 1.6503332446901917e-05, "loss": 0.0895, "step": 5317 }, { "epoch": 0.89, "grad_norm": 0.7008416056632996, "learning_rate": 1.650196062697641e-05, "loss": 0.0852, "step": 5318 }, { "epoch": 0.89, "grad_norm": 0.8271311521530151, "learning_rate": 1.6500588595046232e-05, "loss": 0.0625, "step": 5319 }, { "epoch": 0.89, "grad_norm": 0.6853523254394531, "learning_rate": 1.6499216351156126e-05, "loss": 0.0802, "step": 5320 }, { "epoch": 0.89, "grad_norm": 0.7164683938026428, "learning_rate": 1.6497843895350828e-05, "loss": 0.0748, "step": 5321 }, { "epoch": 0.89, "grad_norm": 0.570732057094574, "learning_rate": 1.649647122767509e-05, "loss": 0.0653, "step": 5322 }, { "epoch": 0.89, "grad_norm": 0.8741536736488342, "learning_rate": 1.6495098348173678e-05, "loss": 0.0966, "step": 5323 }, { "epoch": 0.89, "grad_norm": 0.6492180228233337, "learning_rate": 1.6493725256891347e-05, "loss": 0.075, "step": 5324 }, { "epoch": 0.89, "grad_norm": 0.6155480742454529, "learning_rate": 1.6492351953872866e-05, "loss": 0.0786, "step": 5325 }, { "epoch": 0.89, "grad_norm": 0.7831256985664368, "learning_rate": 1.6490978439163027e-05, "loss": 0.088, "step": 5326 }, { "epoch": 0.89, "grad_norm": 0.5627887845039368, "learning_rate": 1.64896047128066e-05, "loss": 0.0796, "step": 5327 }, { "epoch": 0.89, "grad_norm": 0.6649146676063538, "learning_rate": 1.6488230774848387e-05, "loss": 0.0609, "step": 5328 }, { "epoch": 0.89, "grad_norm": 0.6841012835502625, "learning_rate": 1.6486856625333182e-05, "loss": 0.0775, "step": 5329 }, { "epoch": 0.89, "grad_norm": 0.6069280505180359, "learning_rate": 1.6485482264305793e-05, "loss": 0.0696, "step": 5330 }, { "epoch": 0.89, "grad_norm": 0.6440757513046265, "learning_rate": 1.6484107691811035e-05, "loss": 0.0803, "step": 5331 }, { "epoch": 0.89, "grad_norm": 0.6538782715797424, "learning_rate": 1.6482732907893723e-05, "loss": 0.0734, "step": 5332 }, { "epoch": 0.89, "grad_norm": 0.3976247310638428, "learning_rate": 1.6481357912598685e-05, "loss": 0.0559, "step": 5333 }, { "epoch": 0.89, "grad_norm": 0.5626494288444519, "learning_rate": 1.6479982705970756e-05, "loss": 0.0833, "step": 5334 }, { "epoch": 0.89, "grad_norm": 0.5056717991828918, "learning_rate": 1.6478607288054774e-05, "loss": 0.0801, "step": 5335 }, { "epoch": 0.89, "grad_norm": 0.6755471229553223, "learning_rate": 1.6477231658895593e-05, "loss": 0.0717, "step": 5336 }, { "epoch": 0.89, "grad_norm": 0.5700093507766724, "learning_rate": 1.6475855818538057e-05, "loss": 0.0748, "step": 5337 }, { "epoch": 0.89, "grad_norm": 0.6539471745491028, "learning_rate": 1.6474479767027035e-05, "loss": 0.0806, "step": 5338 }, { "epoch": 0.89, "grad_norm": 0.4860822558403015, "learning_rate": 1.647310350440739e-05, "loss": 0.0529, "step": 5339 }, { "epoch": 0.89, "grad_norm": 0.7336251139640808, "learning_rate": 1.6471727030723993e-05, "loss": 0.0678, "step": 5340 }, { "epoch": 0.89, "grad_norm": 0.485834538936615, "learning_rate": 1.6470350346021738e-05, "loss": 0.0642, "step": 5341 }, { "epoch": 0.89, "grad_norm": 0.5132704973220825, "learning_rate": 1.6468973450345508e-05, "loss": 0.0595, "step": 5342 }, { "epoch": 0.89, "grad_norm": 0.5160811543464661, "learning_rate": 1.6467596343740194e-05, "loss": 0.0543, "step": 5343 }, { "epoch": 0.89, "grad_norm": 0.6481834650039673, "learning_rate": 1.6466219026250703e-05, "loss": 0.0628, "step": 5344 }, { "epoch": 0.89, "grad_norm": 0.6072438359260559, "learning_rate": 1.6464841497921942e-05, "loss": 0.0598, "step": 5345 }, { "epoch": 0.89, "grad_norm": 0.9137881994247437, "learning_rate": 1.6463463758798827e-05, "loss": 0.1083, "step": 5346 }, { "epoch": 0.89, "grad_norm": 0.7186222672462463, "learning_rate": 1.6462085808926285e-05, "loss": 0.09, "step": 5347 }, { "epoch": 0.89, "grad_norm": 0.7547255158424377, "learning_rate": 1.6460707648349242e-05, "loss": 0.0647, "step": 5348 }, { "epoch": 0.89, "grad_norm": 0.8648916482925415, "learning_rate": 1.6459329277112632e-05, "loss": 0.0707, "step": 5349 }, { "epoch": 0.89, "grad_norm": 0.7544140219688416, "learning_rate": 1.64579506952614e-05, "loss": 0.072, "step": 5350 }, { "epoch": 0.89, "grad_norm": 0.7521888017654419, "learning_rate": 1.6456571902840507e-05, "loss": 0.1014, "step": 5351 }, { "epoch": 0.9, "grad_norm": 0.6298279166221619, "learning_rate": 1.6455192899894897e-05, "loss": 0.0874, "step": 5352 }, { "epoch": 0.9, "grad_norm": 0.6001271605491638, "learning_rate": 1.6453813686469535e-05, "loss": 0.086, "step": 5353 }, { "epoch": 0.9, "grad_norm": 0.6074095964431763, "learning_rate": 1.6452434262609397e-05, "loss": 0.0709, "step": 5354 }, { "epoch": 0.9, "grad_norm": 0.9869176745414734, "learning_rate": 1.6451054628359462e-05, "loss": 0.0754, "step": 5355 }, { "epoch": 0.9, "grad_norm": 0.7550625205039978, "learning_rate": 1.644967478376471e-05, "loss": 0.0892, "step": 5356 }, { "epoch": 0.9, "grad_norm": 0.8548815846443176, "learning_rate": 1.6448294728870137e-05, "loss": 0.0759, "step": 5357 }, { "epoch": 0.9, "grad_norm": 1.4222185611724854, "learning_rate": 1.644691446372074e-05, "loss": 0.0875, "step": 5358 }, { "epoch": 0.9, "grad_norm": 0.7671900391578674, "learning_rate": 1.644553398836152e-05, "loss": 0.0711, "step": 5359 }, { "epoch": 0.9, "grad_norm": 0.7522592544555664, "learning_rate": 1.6444153302837493e-05, "loss": 0.0831, "step": 5360 }, { "epoch": 0.9, "grad_norm": 0.7128918170928955, "learning_rate": 1.644277240719368e-05, "loss": 0.0734, "step": 5361 }, { "epoch": 0.9, "grad_norm": 0.59235018491745, "learning_rate": 1.6441391301475103e-05, "loss": 0.0866, "step": 5362 }, { "epoch": 0.9, "grad_norm": 0.7493088841438293, "learning_rate": 1.6440009985726796e-05, "loss": 0.0939, "step": 5363 }, { "epoch": 0.9, "grad_norm": 0.6910860538482666, "learning_rate": 1.6438628459993797e-05, "loss": 0.0803, "step": 5364 }, { "epoch": 0.9, "grad_norm": 0.7051317095756531, "learning_rate": 1.6437246724321158e-05, "loss": 0.0838, "step": 5365 }, { "epoch": 0.9, "grad_norm": 0.8047508001327515, "learning_rate": 1.6435864778753927e-05, "loss": 0.1086, "step": 5366 }, { "epoch": 0.9, "grad_norm": 0.6324968338012695, "learning_rate": 1.6434482623337164e-05, "loss": 0.0816, "step": 5367 }, { "epoch": 0.9, "grad_norm": 1.1572675704956055, "learning_rate": 1.643310025811594e-05, "loss": 0.1063, "step": 5368 }, { "epoch": 0.9, "grad_norm": 0.6817368865013123, "learning_rate": 1.643171768313532e-05, "loss": 0.0713, "step": 5369 }, { "epoch": 0.9, "grad_norm": 0.6642577648162842, "learning_rate": 1.6430334898440397e-05, "loss": 0.0902, "step": 5370 }, { "epoch": 0.9, "grad_norm": 0.6370989680290222, "learning_rate": 1.642895190407625e-05, "loss": 0.1071, "step": 5371 }, { "epoch": 0.9, "grad_norm": 0.6391515731811523, "learning_rate": 1.642756870008798e-05, "loss": 0.0898, "step": 5372 }, { "epoch": 0.9, "grad_norm": 0.7155295610427856, "learning_rate": 1.6426185286520676e-05, "loss": 0.0749, "step": 5373 }, { "epoch": 0.9, "grad_norm": 0.6724854111671448, "learning_rate": 1.6424801663419462e-05, "loss": 0.0681, "step": 5374 }, { "epoch": 0.9, "grad_norm": 0.54510498046875, "learning_rate": 1.6423417830829437e-05, "loss": 0.0588, "step": 5375 }, { "epoch": 0.9, "grad_norm": 0.6999828219413757, "learning_rate": 1.6422033788795736e-05, "loss": 0.0845, "step": 5376 }, { "epoch": 0.9, "grad_norm": 0.7760178446769714, "learning_rate": 1.6420649537363476e-05, "loss": 0.0891, "step": 5377 }, { "epoch": 0.9, "grad_norm": 0.5753107070922852, "learning_rate": 1.64192650765778e-05, "loss": 0.0668, "step": 5378 }, { "epoch": 0.9, "grad_norm": 0.6613947153091431, "learning_rate": 1.6417880406483847e-05, "loss": 0.091, "step": 5379 }, { "epoch": 0.9, "grad_norm": 0.6901583671569824, "learning_rate": 1.6416495527126768e-05, "loss": 0.092, "step": 5380 }, { "epoch": 0.9, "grad_norm": 0.7826330661773682, "learning_rate": 1.6415110438551717e-05, "loss": 0.0821, "step": 5381 }, { "epoch": 0.9, "grad_norm": 0.6721317768096924, "learning_rate": 1.6413725140803856e-05, "loss": 0.0681, "step": 5382 }, { "epoch": 0.9, "grad_norm": 0.7501860857009888, "learning_rate": 1.6412339633928357e-05, "loss": 0.0835, "step": 5383 }, { "epoch": 0.9, "grad_norm": 0.5428225994110107, "learning_rate": 1.6410953917970392e-05, "loss": 0.0821, "step": 5384 }, { "epoch": 0.9, "grad_norm": 0.8378201723098755, "learning_rate": 1.640956799297515e-05, "loss": 0.0859, "step": 5385 }, { "epoch": 0.9, "grad_norm": 1.542656421661377, "learning_rate": 1.6408181858987816e-05, "loss": 0.0717, "step": 5386 }, { "epoch": 0.9, "grad_norm": 0.6821023225784302, "learning_rate": 1.640679551605359e-05, "loss": 0.0755, "step": 5387 }, { "epoch": 0.9, "grad_norm": 0.9578588008880615, "learning_rate": 1.6405408964217674e-05, "loss": 0.0659, "step": 5388 }, { "epoch": 0.9, "grad_norm": 0.6151819825172424, "learning_rate": 1.6404022203525274e-05, "loss": 0.0716, "step": 5389 }, { "epoch": 0.9, "grad_norm": 0.5970706939697266, "learning_rate": 1.6402635234021614e-05, "loss": 0.0677, "step": 5390 }, { "epoch": 0.9, "grad_norm": 0.6769123673439026, "learning_rate": 1.640124805575191e-05, "loss": 0.0799, "step": 5391 }, { "epoch": 0.9, "grad_norm": 2.05405855178833, "learning_rate": 1.6399860668761405e-05, "loss": 0.0778, "step": 5392 }, { "epoch": 0.9, "grad_norm": 0.8225135207176208, "learning_rate": 1.6398473073095327e-05, "loss": 0.077, "step": 5393 }, { "epoch": 0.9, "grad_norm": 0.7560281157493591, "learning_rate": 1.639708526879892e-05, "loss": 0.0843, "step": 5394 }, { "epoch": 0.9, "grad_norm": 0.6747023463249207, "learning_rate": 1.639569725591744e-05, "loss": 0.0661, "step": 5395 }, { "epoch": 0.9, "grad_norm": 0.6363098621368408, "learning_rate": 1.639430903449614e-05, "loss": 0.0716, "step": 5396 }, { "epoch": 0.9, "grad_norm": 1.1518566608428955, "learning_rate": 1.6392920604580288e-05, "loss": 0.0755, "step": 5397 }, { "epoch": 0.9, "grad_norm": 0.4684644341468811, "learning_rate": 1.6391531966215154e-05, "loss": 0.0593, "step": 5398 }, { "epoch": 0.9, "grad_norm": 0.6479926109313965, "learning_rate": 1.639014311944602e-05, "loss": 0.0611, "step": 5399 }, { "epoch": 0.9, "grad_norm": 0.8094618320465088, "learning_rate": 1.638875406431817e-05, "loss": 0.0909, "step": 5400 }, { "epoch": 0.9, "grad_norm": 0.9396870136260986, "learning_rate": 1.6387364800876885e-05, "loss": 0.0658, "step": 5401 }, { "epoch": 0.9, "grad_norm": 0.5841017365455627, "learning_rate": 1.638597532916748e-05, "loss": 0.0601, "step": 5402 }, { "epoch": 0.9, "grad_norm": 0.653580367565155, "learning_rate": 1.6384585649235248e-05, "loss": 0.0692, "step": 5403 }, { "epoch": 0.9, "grad_norm": 0.7840298414230347, "learning_rate": 1.6383195761125513e-05, "loss": 0.0693, "step": 5404 }, { "epoch": 0.9, "grad_norm": 0.6562392711639404, "learning_rate": 1.6381805664883584e-05, "loss": 0.08, "step": 5405 }, { "epoch": 0.9, "grad_norm": 0.6280723810195923, "learning_rate": 1.638041536055479e-05, "loss": 0.0609, "step": 5406 }, { "epoch": 0.9, "grad_norm": 0.6835314035415649, "learning_rate": 1.6379024848184467e-05, "loss": 0.0631, "step": 5407 }, { "epoch": 0.9, "grad_norm": 0.5215386152267456, "learning_rate": 1.6377634127817953e-05, "loss": 0.0623, "step": 5408 }, { "epoch": 0.9, "grad_norm": 1.0790562629699707, "learning_rate": 1.6376243199500584e-05, "loss": 0.0789, "step": 5409 }, { "epoch": 0.9, "grad_norm": 0.7092482447624207, "learning_rate": 1.637485206327773e-05, "loss": 0.0779, "step": 5410 }, { "epoch": 0.91, "grad_norm": 0.5517691373825073, "learning_rate": 1.637346071919474e-05, "loss": 0.0682, "step": 5411 }, { "epoch": 0.91, "grad_norm": 0.5924370884895325, "learning_rate": 1.637206916729698e-05, "loss": 0.0691, "step": 5412 }, { "epoch": 0.91, "grad_norm": 0.6843705773353577, "learning_rate": 1.6370677407629828e-05, "loss": 0.0707, "step": 5413 }, { "epoch": 0.91, "grad_norm": 0.8984946012496948, "learning_rate": 1.6369285440238664e-05, "loss": 0.0826, "step": 5414 }, { "epoch": 0.91, "grad_norm": 0.5691632032394409, "learning_rate": 1.6367893265168872e-05, "loss": 0.0705, "step": 5415 }, { "epoch": 0.91, "grad_norm": 0.8385004997253418, "learning_rate": 1.6366500882465846e-05, "loss": 0.0789, "step": 5416 }, { "epoch": 0.91, "grad_norm": 0.6643189787864685, "learning_rate": 1.6365108292174988e-05, "loss": 0.0946, "step": 5417 }, { "epoch": 0.91, "grad_norm": 0.6555461287498474, "learning_rate": 1.6363715494341704e-05, "loss": 0.0853, "step": 5418 }, { "epoch": 0.91, "grad_norm": 0.6676856279373169, "learning_rate": 1.636232248901141e-05, "loss": 0.0634, "step": 5419 }, { "epoch": 0.91, "grad_norm": 0.7291398048400879, "learning_rate": 1.6360929276229523e-05, "loss": 0.0984, "step": 5420 }, { "epoch": 0.91, "grad_norm": 0.796563446521759, "learning_rate": 1.6359535856041472e-05, "loss": 0.0777, "step": 5421 }, { "epoch": 0.91, "grad_norm": 0.6675823330879211, "learning_rate": 1.6358142228492697e-05, "loss": 0.0952, "step": 5422 }, { "epoch": 0.91, "grad_norm": 0.7595288753509521, "learning_rate": 1.6356748393628628e-05, "loss": 0.0593, "step": 5423 }, { "epoch": 0.91, "grad_norm": 0.7418738007545471, "learning_rate": 1.6355354351494723e-05, "loss": 0.0684, "step": 5424 }, { "epoch": 0.91, "grad_norm": 0.7912361025810242, "learning_rate": 1.635396010213643e-05, "loss": 0.0752, "step": 5425 }, { "epoch": 0.91, "grad_norm": 0.6480037569999695, "learning_rate": 1.6352565645599213e-05, "loss": 0.0751, "step": 5426 }, { "epoch": 0.91, "grad_norm": 0.9808728098869324, "learning_rate": 1.6351170981928536e-05, "loss": 0.0676, "step": 5427 }, { "epoch": 0.91, "grad_norm": 0.6756396293640137, "learning_rate": 1.634977611116988e-05, "loss": 0.0804, "step": 5428 }, { "epoch": 0.91, "grad_norm": 0.749998927116394, "learning_rate": 1.6348381033368723e-05, "loss": 0.0825, "step": 5429 }, { "epoch": 0.91, "grad_norm": 0.847554087638855, "learning_rate": 1.634698574857055e-05, "loss": 0.0716, "step": 5430 }, { "epoch": 0.91, "grad_norm": 0.7626892924308777, "learning_rate": 1.6345590256820868e-05, "loss": 0.0896, "step": 5431 }, { "epoch": 0.91, "grad_norm": 0.7429717779159546, "learning_rate": 1.6344194558165166e-05, "loss": 0.076, "step": 5432 }, { "epoch": 0.91, "grad_norm": 0.7415067553520203, "learning_rate": 1.6342798652648955e-05, "loss": 0.059, "step": 5433 }, { "epoch": 0.91, "grad_norm": 0.7116036415100098, "learning_rate": 1.6341402540317758e-05, "loss": 0.0631, "step": 5434 }, { "epoch": 0.91, "grad_norm": 0.8530421853065491, "learning_rate": 1.6340006221217087e-05, "loss": 0.0758, "step": 5435 }, { "epoch": 0.91, "grad_norm": 0.8457171320915222, "learning_rate": 1.633860969539248e-05, "loss": 0.0995, "step": 5436 }, { "epoch": 0.91, "grad_norm": 0.668890118598938, "learning_rate": 1.6337212962889463e-05, "loss": 0.0756, "step": 5437 }, { "epoch": 0.91, "grad_norm": 0.6854732632637024, "learning_rate": 1.6335816023753586e-05, "loss": 0.0608, "step": 5438 }, { "epoch": 0.91, "grad_norm": 0.7074885964393616, "learning_rate": 1.6334418878030392e-05, "loss": 0.0887, "step": 5439 }, { "epoch": 0.91, "grad_norm": 0.6519723534584045, "learning_rate": 1.633302152576544e-05, "loss": 0.085, "step": 5440 }, { "epoch": 0.91, "grad_norm": 0.7013764381408691, "learning_rate": 1.6331623967004296e-05, "loss": 0.0748, "step": 5441 }, { "epoch": 0.91, "grad_norm": 0.836834192276001, "learning_rate": 1.6330226201792524e-05, "loss": 0.0764, "step": 5442 }, { "epoch": 0.91, "grad_norm": 0.8972595930099487, "learning_rate": 1.63288282301757e-05, "loss": 0.0756, "step": 5443 }, { "epoch": 0.91, "grad_norm": 0.5089814066886902, "learning_rate": 1.6327430052199406e-05, "loss": 0.0632, "step": 5444 }, { "epoch": 0.91, "grad_norm": 0.6981950998306274, "learning_rate": 1.6326031667909237e-05, "loss": 0.0815, "step": 5445 }, { "epoch": 0.91, "grad_norm": 0.7844945788383484, "learning_rate": 1.6324633077350784e-05, "loss": 0.0861, "step": 5446 }, { "epoch": 0.91, "grad_norm": 0.8148171901702881, "learning_rate": 1.632323428056965e-05, "loss": 0.0879, "step": 5447 }, { "epoch": 0.91, "grad_norm": 0.6085726022720337, "learning_rate": 1.6321835277611446e-05, "loss": 0.0617, "step": 5448 }, { "epoch": 0.91, "grad_norm": 0.5918945670127869, "learning_rate": 1.6320436068521786e-05, "loss": 0.061, "step": 5449 }, { "epoch": 0.91, "grad_norm": 0.6186773777008057, "learning_rate": 1.63190366533463e-05, "loss": 0.0722, "step": 5450 }, { "epoch": 0.91, "grad_norm": 0.5989558696746826, "learning_rate": 1.6317637032130608e-05, "loss": 0.069, "step": 5451 }, { "epoch": 0.91, "grad_norm": 0.5974867343902588, "learning_rate": 1.6316237204920353e-05, "loss": 0.0561, "step": 5452 }, { "epoch": 0.91, "grad_norm": 0.5913438200950623, "learning_rate": 1.6314837171761178e-05, "loss": 0.0645, "step": 5453 }, { "epoch": 0.91, "grad_norm": 0.6243845820426941, "learning_rate": 1.631343693269873e-05, "loss": 0.0669, "step": 5454 }, { "epoch": 0.91, "grad_norm": 0.6569207310676575, "learning_rate": 1.6312036487778665e-05, "loss": 0.0724, "step": 5455 }, { "epoch": 0.91, "grad_norm": 0.8811753392219543, "learning_rate": 1.6310635837046647e-05, "loss": 0.0704, "step": 5456 }, { "epoch": 0.91, "grad_norm": 0.7808831930160522, "learning_rate": 1.630923498054835e-05, "loss": 0.0756, "step": 5457 }, { "epoch": 0.91, "grad_norm": 0.5509744882583618, "learning_rate": 1.6307833918329445e-05, "loss": 0.078, "step": 5458 }, { "epoch": 0.91, "grad_norm": 0.7106332182884216, "learning_rate": 1.630643265043562e-05, "loss": 0.1, "step": 5459 }, { "epoch": 0.91, "grad_norm": 0.5257959961891174, "learning_rate": 1.6305031176912564e-05, "loss": 0.0839, "step": 5460 }, { "epoch": 0.91, "grad_norm": 0.5457006692886353, "learning_rate": 1.630362949780597e-05, "loss": 0.0678, "step": 5461 }, { "epoch": 0.91, "grad_norm": 0.8068917989730835, "learning_rate": 1.6302227613161545e-05, "loss": 0.0714, "step": 5462 }, { "epoch": 0.91, "grad_norm": 0.7487630844116211, "learning_rate": 1.6300825523025e-05, "loss": 0.0862, "step": 5463 }, { "epoch": 0.91, "grad_norm": 0.5353148579597473, "learning_rate": 1.629942322744205e-05, "loss": 0.0722, "step": 5464 }, { "epoch": 0.91, "grad_norm": 0.6426108479499817, "learning_rate": 1.6298020726458417e-05, "loss": 0.0704, "step": 5465 }, { "epoch": 0.91, "grad_norm": 0.5741441249847412, "learning_rate": 1.6296618020119836e-05, "loss": 0.0828, "step": 5466 }, { "epoch": 0.91, "grad_norm": 0.5514253973960876, "learning_rate": 1.6295215108472043e-05, "loss": 0.0546, "step": 5467 }, { "epoch": 0.91, "grad_norm": 0.697833776473999, "learning_rate": 1.6293811991560776e-05, "loss": 0.0565, "step": 5468 }, { "epoch": 0.91, "grad_norm": 0.5452154874801636, "learning_rate": 1.6292408669431794e-05, "loss": 0.0639, "step": 5469 }, { "epoch": 0.91, "grad_norm": 0.7306157946586609, "learning_rate": 1.6291005142130845e-05, "loss": 0.0882, "step": 5470 }, { "epoch": 0.92, "grad_norm": 0.6748140454292297, "learning_rate": 1.6289601409703704e-05, "loss": 0.0714, "step": 5471 }, { "epoch": 0.92, "grad_norm": 0.5933974981307983, "learning_rate": 1.628819747219613e-05, "loss": 0.0792, "step": 5472 }, { "epoch": 0.92, "grad_norm": 0.6855509281158447, "learning_rate": 1.628679332965391e-05, "loss": 0.0905, "step": 5473 }, { "epoch": 0.92, "grad_norm": 0.45067480206489563, "learning_rate": 1.628538898212282e-05, "loss": 0.0582, "step": 5474 }, { "epoch": 0.92, "grad_norm": 0.5697938203811646, "learning_rate": 1.628398442964865e-05, "loss": 0.0687, "step": 5475 }, { "epoch": 0.92, "grad_norm": 0.6671808362007141, "learning_rate": 1.6282579672277204e-05, "loss": 0.0655, "step": 5476 }, { "epoch": 0.92, "grad_norm": 0.6101135015487671, "learning_rate": 1.6281174710054287e-05, "loss": 0.0602, "step": 5477 }, { "epoch": 0.92, "grad_norm": 0.6710940599441528, "learning_rate": 1.6279769543025702e-05, "loss": 0.0518, "step": 5478 }, { "epoch": 0.92, "grad_norm": 0.4940458834171295, "learning_rate": 1.6278364171237266e-05, "loss": 0.0594, "step": 5479 }, { "epoch": 0.92, "grad_norm": 0.6821586489677429, "learning_rate": 1.627695859473481e-05, "loss": 0.0754, "step": 5480 }, { "epoch": 0.92, "grad_norm": 0.6075993776321411, "learning_rate": 1.627555281356416e-05, "loss": 0.0486, "step": 5481 }, { "epoch": 0.92, "grad_norm": 0.5976157784461975, "learning_rate": 1.6274146827771157e-05, "loss": 0.0545, "step": 5482 }, { "epoch": 0.92, "grad_norm": 0.7971425652503967, "learning_rate": 1.6272740637401638e-05, "loss": 0.084, "step": 5483 }, { "epoch": 0.92, "grad_norm": 0.8007932901382446, "learning_rate": 1.627133424250146e-05, "loss": 0.0748, "step": 5484 }, { "epoch": 0.92, "grad_norm": 0.7421713471412659, "learning_rate": 1.626992764311648e-05, "loss": 0.0913, "step": 5485 }, { "epoch": 0.92, "grad_norm": 0.7487137913703918, "learning_rate": 1.6268520839292557e-05, "loss": 0.0864, "step": 5486 }, { "epoch": 0.92, "grad_norm": 0.5872600674629211, "learning_rate": 1.6267113831075566e-05, "loss": 0.0724, "step": 5487 }, { "epoch": 0.92, "grad_norm": 0.6650011539459229, "learning_rate": 1.6265706618511385e-05, "loss": 0.0781, "step": 5488 }, { "epoch": 0.92, "grad_norm": 0.5565874576568604, "learning_rate": 1.6264299201645893e-05, "loss": 0.076, "step": 5489 }, { "epoch": 0.92, "grad_norm": 0.6557715535163879, "learning_rate": 1.6262891580524986e-05, "loss": 0.0827, "step": 5490 }, { "epoch": 0.92, "grad_norm": 0.49815359711647034, "learning_rate": 1.626148375519456e-05, "loss": 0.075, "step": 5491 }, { "epoch": 0.92, "grad_norm": 0.46244198083877563, "learning_rate": 1.6260075725700514e-05, "loss": 0.0598, "step": 5492 }, { "epoch": 0.92, "grad_norm": 0.6218968629837036, "learning_rate": 1.625866749208876e-05, "loss": 0.0672, "step": 5493 }, { "epoch": 0.92, "grad_norm": 0.6738855838775635, "learning_rate": 1.6257259054405223e-05, "loss": 0.0948, "step": 5494 }, { "epoch": 0.92, "grad_norm": 0.5482898354530334, "learning_rate": 1.6255850412695822e-05, "loss": 0.0585, "step": 5495 }, { "epoch": 0.92, "grad_norm": 0.6284372806549072, "learning_rate": 1.6254441567006484e-05, "loss": 0.0785, "step": 5496 }, { "epoch": 0.92, "grad_norm": 0.6117902994155884, "learning_rate": 1.6253032517383153e-05, "loss": 0.072, "step": 5497 }, { "epoch": 0.92, "grad_norm": 0.8591582179069519, "learning_rate": 1.6251623263871764e-05, "loss": 0.0863, "step": 5498 }, { "epoch": 0.92, "grad_norm": 0.6555208563804626, "learning_rate": 1.625021380651828e-05, "loss": 0.0761, "step": 5499 }, { "epoch": 0.92, "grad_norm": 0.5789096355438232, "learning_rate": 1.6248804145368645e-05, "loss": 0.0624, "step": 5500 }, { "epoch": 0.92, "grad_norm": 0.6686468124389648, "learning_rate": 1.6247394280468828e-05, "loss": 0.0718, "step": 5501 }, { "epoch": 0.92, "grad_norm": 0.6895235180854797, "learning_rate": 1.6245984211864803e-05, "loss": 0.1034, "step": 5502 }, { "epoch": 0.92, "grad_norm": 0.5454933643341064, "learning_rate": 1.6244573939602545e-05, "loss": 0.0678, "step": 5503 }, { "epoch": 0.92, "grad_norm": 0.6698014140129089, "learning_rate": 1.6243163463728034e-05, "loss": 0.0595, "step": 5504 }, { "epoch": 0.92, "grad_norm": 0.8441550731658936, "learning_rate": 1.624175278428726e-05, "loss": 0.0838, "step": 5505 }, { "epoch": 0.92, "grad_norm": 0.6321614980697632, "learning_rate": 1.624034190132623e-05, "loss": 0.084, "step": 5506 }, { "epoch": 0.92, "grad_norm": 0.5555621981620789, "learning_rate": 1.6238930814890937e-05, "loss": 0.0632, "step": 5507 }, { "epoch": 0.92, "grad_norm": 0.5339215993881226, "learning_rate": 1.6237519525027396e-05, "loss": 0.0662, "step": 5508 }, { "epoch": 0.92, "grad_norm": 0.5682430863380432, "learning_rate": 1.6236108031781625e-05, "loss": 0.0741, "step": 5509 }, { "epoch": 0.92, "grad_norm": 0.6986581087112427, "learning_rate": 1.6234696335199643e-05, "loss": 0.0846, "step": 5510 }, { "epoch": 0.92, "grad_norm": 0.6318466067314148, "learning_rate": 1.6233284435327483e-05, "loss": 0.0839, "step": 5511 }, { "epoch": 0.92, "grad_norm": 0.5917158722877502, "learning_rate": 1.623187233221118e-05, "loss": 0.0631, "step": 5512 }, { "epoch": 0.92, "grad_norm": 0.7014573216438293, "learning_rate": 1.6230460025896783e-05, "loss": 0.0823, "step": 5513 }, { "epoch": 0.92, "grad_norm": 0.6349965333938599, "learning_rate": 1.6229047516430335e-05, "loss": 0.0816, "step": 5514 }, { "epoch": 0.92, "grad_norm": 0.6582543253898621, "learning_rate": 1.6227634803857896e-05, "loss": 0.083, "step": 5515 }, { "epoch": 0.92, "grad_norm": 0.9097058773040771, "learning_rate": 1.6226221888225526e-05, "loss": 0.0778, "step": 5516 }, { "epoch": 0.92, "grad_norm": 0.5384747982025146, "learning_rate": 1.6224808769579302e-05, "loss": 0.0631, "step": 5517 }, { "epoch": 0.92, "grad_norm": 0.7437201738357544, "learning_rate": 1.6223395447965294e-05, "loss": 0.0906, "step": 5518 }, { "epoch": 0.92, "grad_norm": 0.4251421391963959, "learning_rate": 1.6221981923429586e-05, "loss": 0.059, "step": 5519 }, { "epoch": 0.92, "grad_norm": 0.7014287114143372, "learning_rate": 1.6220568196018272e-05, "loss": 0.0662, "step": 5520 }, { "epoch": 0.92, "grad_norm": 0.7878556847572327, "learning_rate": 1.6219154265777444e-05, "loss": 0.0892, "step": 5521 }, { "epoch": 0.92, "grad_norm": 0.608012318611145, "learning_rate": 1.621774013275321e-05, "loss": 0.0763, "step": 5522 }, { "epoch": 0.92, "grad_norm": 0.7472782135009766, "learning_rate": 1.621632579699167e-05, "loss": 0.0635, "step": 5523 }, { "epoch": 0.92, "grad_norm": 0.6435030698776245, "learning_rate": 1.621491125853895e-05, "loss": 0.0789, "step": 5524 }, { "epoch": 0.92, "grad_norm": 0.6166301965713501, "learning_rate": 1.621349651744117e-05, "loss": 0.0679, "step": 5525 }, { "epoch": 0.92, "grad_norm": 0.560799241065979, "learning_rate": 1.6212081573744453e-05, "loss": 0.0705, "step": 5526 }, { "epoch": 0.92, "grad_norm": 0.5358301401138306, "learning_rate": 1.6210666427494944e-05, "loss": 0.0676, "step": 5527 }, { "epoch": 0.92, "grad_norm": 0.5420883893966675, "learning_rate": 1.6209251078738787e-05, "loss": 0.0681, "step": 5528 }, { "epoch": 0.92, "grad_norm": 0.7087445259094238, "learning_rate": 1.620783552752212e-05, "loss": 0.0827, "step": 5529 }, { "epoch": 0.92, "grad_norm": 0.6340785622596741, "learning_rate": 1.620641977389111e-05, "loss": 0.1105, "step": 5530 }, { "epoch": 0.93, "grad_norm": 0.5141128897666931, "learning_rate": 1.620500381789191e-05, "loss": 0.0617, "step": 5531 }, { "epoch": 0.93, "grad_norm": 0.6401954293251038, "learning_rate": 1.6203587659570695e-05, "loss": 0.0609, "step": 5532 }, { "epoch": 0.93, "grad_norm": 0.6799548864364624, "learning_rate": 1.6202171298973644e-05, "loss": 0.075, "step": 5533 }, { "epoch": 0.93, "grad_norm": 0.8375356197357178, "learning_rate": 1.6200754736146932e-05, "loss": 0.0701, "step": 5534 }, { "epoch": 0.93, "grad_norm": 0.5865892767906189, "learning_rate": 1.619933797113675e-05, "loss": 0.0649, "step": 5535 }, { "epoch": 0.93, "grad_norm": 0.6700939536094666, "learning_rate": 1.6197921003989292e-05, "loss": 0.0771, "step": 5536 }, { "epoch": 0.93, "grad_norm": 0.7122863531112671, "learning_rate": 1.6196503834750764e-05, "loss": 0.0864, "step": 5537 }, { "epoch": 0.93, "grad_norm": 0.6802319288253784, "learning_rate": 1.619508646346737e-05, "loss": 0.0619, "step": 5538 }, { "epoch": 0.93, "grad_norm": 0.7124251127243042, "learning_rate": 1.6193668890185332e-05, "loss": 0.0831, "step": 5539 }, { "epoch": 0.93, "grad_norm": 0.5694634914398193, "learning_rate": 1.619225111495086e-05, "loss": 0.0605, "step": 5540 }, { "epoch": 0.93, "grad_norm": 0.9073671698570251, "learning_rate": 1.61908331378102e-05, "loss": 0.0888, "step": 5541 }, { "epoch": 0.93, "grad_norm": 0.8351063132286072, "learning_rate": 1.6189414958809573e-05, "loss": 0.0531, "step": 5542 }, { "epoch": 0.93, "grad_norm": 0.5533739328384399, "learning_rate": 1.6187996577995223e-05, "loss": 0.071, "step": 5543 }, { "epoch": 0.93, "grad_norm": 0.6166213154792786, "learning_rate": 1.61865779954134e-05, "loss": 0.0614, "step": 5544 }, { "epoch": 0.93, "grad_norm": 0.5327982902526855, "learning_rate": 1.6185159211110355e-05, "loss": 0.064, "step": 5545 }, { "epoch": 0.93, "grad_norm": 0.5315725803375244, "learning_rate": 1.6183740225132357e-05, "loss": 0.0739, "step": 5546 }, { "epoch": 0.93, "grad_norm": 0.5408925414085388, "learning_rate": 1.6182321037525667e-05, "loss": 0.0691, "step": 5547 }, { "epoch": 0.93, "grad_norm": 0.7902771234512329, "learning_rate": 1.618090164833656e-05, "loss": 0.0841, "step": 5548 }, { "epoch": 0.93, "grad_norm": 0.6060118079185486, "learning_rate": 1.617948205761132e-05, "loss": 0.065, "step": 5549 }, { "epoch": 0.93, "grad_norm": 0.61995929479599, "learning_rate": 1.6178062265396235e-05, "loss": 0.0903, "step": 5550 }, { "epoch": 0.93, "grad_norm": 0.633124053478241, "learning_rate": 1.6176642271737597e-05, "loss": 0.0708, "step": 5551 }, { "epoch": 0.93, "grad_norm": 0.6060678362846375, "learning_rate": 1.6175222076681704e-05, "loss": 0.083, "step": 5552 }, { "epoch": 0.93, "grad_norm": 0.6164645552635193, "learning_rate": 1.617380168027487e-05, "loss": 0.052, "step": 5553 }, { "epoch": 0.93, "grad_norm": 0.49478524923324585, "learning_rate": 1.61723810825634e-05, "loss": 0.062, "step": 5554 }, { "epoch": 0.93, "grad_norm": 0.5877234935760498, "learning_rate": 1.6170960283593626e-05, "loss": 0.0643, "step": 5555 }, { "epoch": 0.93, "grad_norm": 0.9488772749900818, "learning_rate": 1.6169539283411862e-05, "loss": 0.0868, "step": 5556 }, { "epoch": 0.93, "grad_norm": 0.5334867238998413, "learning_rate": 1.6168118082064453e-05, "loss": 0.0734, "step": 5557 }, { "epoch": 0.93, "grad_norm": 0.6162529587745667, "learning_rate": 1.6166696679597733e-05, "loss": 0.0842, "step": 5558 }, { "epoch": 0.93, "grad_norm": 0.5726556181907654, "learning_rate": 1.6165275076058047e-05, "loss": 0.0624, "step": 5559 }, { "epoch": 0.93, "grad_norm": 0.7699509859085083, "learning_rate": 1.6163853271491753e-05, "loss": 0.0729, "step": 5560 }, { "epoch": 0.93, "grad_norm": 0.7833166122436523, "learning_rate": 1.6162431265945206e-05, "loss": 0.0765, "step": 5561 }, { "epoch": 0.93, "grad_norm": 0.6165924072265625, "learning_rate": 1.6161009059464777e-05, "loss": 0.0793, "step": 5562 }, { "epoch": 0.93, "grad_norm": 0.7489335536956787, "learning_rate": 1.6159586652096838e-05, "loss": 0.0765, "step": 5563 }, { "epoch": 0.93, "grad_norm": 0.5735785365104675, "learning_rate": 1.6158164043887764e-05, "loss": 0.0663, "step": 5564 }, { "epoch": 0.93, "grad_norm": 0.7277443408966064, "learning_rate": 1.6156741234883947e-05, "loss": 0.0635, "step": 5565 }, { "epoch": 0.93, "grad_norm": 0.6914894580841064, "learning_rate": 1.6155318225131776e-05, "loss": 0.0915, "step": 5566 }, { "epoch": 0.93, "grad_norm": 0.9021314382553101, "learning_rate": 1.615389501467765e-05, "loss": 0.0865, "step": 5567 }, { "epoch": 0.93, "grad_norm": 0.6113263964653015, "learning_rate": 1.615247160356797e-05, "loss": 0.0516, "step": 5568 }, { "epoch": 0.93, "grad_norm": 0.6114548444747925, "learning_rate": 1.6151047991849157e-05, "loss": 0.0566, "step": 5569 }, { "epoch": 0.93, "grad_norm": 0.45552998781204224, "learning_rate": 1.6149624179567628e-05, "loss": 0.0483, "step": 5570 }, { "epoch": 0.93, "grad_norm": 0.43265071511268616, "learning_rate": 1.6148200166769805e-05, "loss": 0.0654, "step": 5571 }, { "epoch": 0.93, "grad_norm": 0.5738341212272644, "learning_rate": 1.6146775953502123e-05, "loss": 0.0786, "step": 5572 }, { "epoch": 0.93, "grad_norm": 0.5350082516670227, "learning_rate": 1.6145351539811013e-05, "loss": 0.0684, "step": 5573 }, { "epoch": 0.93, "grad_norm": 0.7451879978179932, "learning_rate": 1.614392692574293e-05, "loss": 0.0936, "step": 5574 }, { "epoch": 0.93, "grad_norm": 1.1074936389923096, "learning_rate": 1.6142502111344316e-05, "loss": 0.0637, "step": 5575 }, { "epoch": 0.93, "grad_norm": 0.6414697766304016, "learning_rate": 1.6141077096661635e-05, "loss": 0.0619, "step": 5576 }, { "epoch": 0.93, "grad_norm": 0.7767238020896912, "learning_rate": 1.6139651881741352e-05, "loss": 0.0703, "step": 5577 }, { "epoch": 0.93, "grad_norm": 0.6052998304367065, "learning_rate": 1.613822646662993e-05, "loss": 0.0681, "step": 5578 }, { "epoch": 0.93, "grad_norm": 0.5637183785438538, "learning_rate": 1.6136800851373857e-05, "loss": 0.0611, "step": 5579 }, { "epoch": 0.93, "grad_norm": 0.5225614905357361, "learning_rate": 1.613537503601961e-05, "loss": 0.0744, "step": 5580 }, { "epoch": 0.93, "grad_norm": 0.5302346348762512, "learning_rate": 1.6133949020613684e-05, "loss": 0.0644, "step": 5581 }, { "epoch": 0.93, "grad_norm": 1.0833138227462769, "learning_rate": 1.613252280520257e-05, "loss": 0.0733, "step": 5582 }, { "epoch": 0.93, "grad_norm": 0.4884500205516815, "learning_rate": 1.6131096389832776e-05, "loss": 0.0535, "step": 5583 }, { "epoch": 0.93, "grad_norm": 0.5998600125312805, "learning_rate": 1.612966977455081e-05, "loss": 0.0741, "step": 5584 }, { "epoch": 0.93, "grad_norm": 0.6962148547172546, "learning_rate": 1.612824295940319e-05, "loss": 0.0796, "step": 5585 }, { "epoch": 0.93, "grad_norm": 0.8508638739585876, "learning_rate": 1.6126815944436444e-05, "loss": 0.064, "step": 5586 }, { "epoch": 0.93, "grad_norm": 0.6144561767578125, "learning_rate": 1.6125388729697092e-05, "loss": 0.0635, "step": 5587 }, { "epoch": 0.93, "grad_norm": 0.6030434370040894, "learning_rate": 1.6123961315231677e-05, "loss": 0.0735, "step": 5588 }, { "epoch": 0.93, "grad_norm": 0.5665420889854431, "learning_rate": 1.6122533701086735e-05, "loss": 0.0662, "step": 5589 }, { "epoch": 0.93, "grad_norm": 0.6221441626548767, "learning_rate": 1.6121105887308823e-05, "loss": 0.0818, "step": 5590 }, { "epoch": 0.94, "grad_norm": 0.5267055034637451, "learning_rate": 1.6119677873944493e-05, "loss": 0.0584, "step": 5591 }, { "epoch": 0.94, "grad_norm": 0.6810109615325928, "learning_rate": 1.6118249661040306e-05, "loss": 0.0651, "step": 5592 }, { "epoch": 0.94, "grad_norm": 0.6313510537147522, "learning_rate": 1.6116821248642834e-05, "loss": 0.0682, "step": 5593 }, { "epoch": 0.94, "grad_norm": 0.508147656917572, "learning_rate": 1.611539263679865e-05, "loss": 0.056, "step": 5594 }, { "epoch": 0.94, "grad_norm": 0.7936069965362549, "learning_rate": 1.6113963825554334e-05, "loss": 0.1019, "step": 5595 }, { "epoch": 0.94, "grad_norm": 0.5681514739990234, "learning_rate": 1.611253481495648e-05, "loss": 0.0721, "step": 5596 }, { "epoch": 0.94, "grad_norm": 0.717097282409668, "learning_rate": 1.6111105605051673e-05, "loss": 0.09, "step": 5597 }, { "epoch": 0.94, "grad_norm": 0.6226915121078491, "learning_rate": 1.6109676195886527e-05, "loss": 0.0849, "step": 5598 }, { "epoch": 0.94, "grad_norm": 0.5906611084938049, "learning_rate": 1.6108246587507638e-05, "loss": 0.0551, "step": 5599 }, { "epoch": 0.94, "grad_norm": 1.6648428440093994, "learning_rate": 1.6106816779961626e-05, "loss": 0.0794, "step": 5600 }, { "epoch": 0.94, "grad_norm": 0.5910905599594116, "learning_rate": 1.610538677329511e-05, "loss": 0.0746, "step": 5601 }, { "epoch": 0.94, "grad_norm": 0.5721676349639893, "learning_rate": 1.6103956567554718e-05, "loss": 0.0733, "step": 5602 }, { "epoch": 0.94, "grad_norm": 0.8433046936988831, "learning_rate": 1.6102526162787083e-05, "loss": 0.0863, "step": 5603 }, { "epoch": 0.94, "grad_norm": 0.7073931694030762, "learning_rate": 1.610109555903885e-05, "loss": 0.0904, "step": 5604 }, { "epoch": 0.94, "grad_norm": 0.511052668094635, "learning_rate": 1.6099664756356657e-05, "loss": 0.0537, "step": 5605 }, { "epoch": 0.94, "grad_norm": 0.9550876021385193, "learning_rate": 1.609823375478716e-05, "loss": 0.111, "step": 5606 }, { "epoch": 0.94, "grad_norm": 0.708756685256958, "learning_rate": 1.6096802554377023e-05, "loss": 0.0575, "step": 5607 }, { "epoch": 0.94, "grad_norm": 0.7615904211997986, "learning_rate": 1.609537115517291e-05, "loss": 0.07, "step": 5608 }, { "epoch": 0.94, "grad_norm": 0.6852483749389648, "learning_rate": 1.609393955722149e-05, "loss": 0.0849, "step": 5609 }, { "epoch": 0.94, "grad_norm": 0.7774103879928589, "learning_rate": 1.6092507760569445e-05, "loss": 0.0793, "step": 5610 }, { "epoch": 0.94, "grad_norm": 0.7918062210083008, "learning_rate": 1.609107576526346e-05, "loss": 0.0647, "step": 5611 }, { "epoch": 0.94, "grad_norm": 0.8047553300857544, "learning_rate": 1.6089643571350232e-05, "loss": 0.0616, "step": 5612 }, { "epoch": 0.94, "grad_norm": 0.6575495004653931, "learning_rate": 1.608821117887645e-05, "loss": 0.0777, "step": 5613 }, { "epoch": 0.94, "grad_norm": 0.5564283132553101, "learning_rate": 1.608677858788883e-05, "loss": 0.0703, "step": 5614 }, { "epoch": 0.94, "grad_norm": 0.7214251756668091, "learning_rate": 1.6085345798434072e-05, "loss": 0.0713, "step": 5615 }, { "epoch": 0.94, "grad_norm": 0.6916906237602234, "learning_rate": 1.60839128105589e-05, "loss": 0.0685, "step": 5616 }, { "epoch": 0.94, "grad_norm": 0.6080299019813538, "learning_rate": 1.6082479624310038e-05, "loss": 0.0708, "step": 5617 }, { "epoch": 0.94, "grad_norm": 0.6468853950500488, "learning_rate": 1.6081046239734214e-05, "loss": 0.0948, "step": 5618 }, { "epoch": 0.94, "grad_norm": 0.6683017611503601, "learning_rate": 1.607961265687817e-05, "loss": 0.0842, "step": 5619 }, { "epoch": 0.94, "grad_norm": 0.7345103025436401, "learning_rate": 1.607817887578865e-05, "loss": 0.1098, "step": 5620 }, { "epoch": 0.94, "grad_norm": 0.7531156539916992, "learning_rate": 1.60767448965124e-05, "loss": 0.0919, "step": 5621 }, { "epoch": 0.94, "grad_norm": 0.7120552659034729, "learning_rate": 1.607531071909618e-05, "loss": 0.0737, "step": 5622 }, { "epoch": 0.94, "grad_norm": 0.8965209126472473, "learning_rate": 1.6073876343586748e-05, "loss": 0.0782, "step": 5623 }, { "epoch": 0.94, "grad_norm": 0.6642830967903137, "learning_rate": 1.6072441770030877e-05, "loss": 0.0805, "step": 5624 }, { "epoch": 0.94, "grad_norm": 0.4639524817466736, "learning_rate": 1.6071006998475346e-05, "loss": 0.0597, "step": 5625 }, { "epoch": 0.94, "grad_norm": 0.6054775714874268, "learning_rate": 1.6069572028966934e-05, "loss": 0.0863, "step": 5626 }, { "epoch": 0.94, "grad_norm": 0.4584541320800781, "learning_rate": 1.606813686155243e-05, "loss": 0.0669, "step": 5627 }, { "epoch": 0.94, "grad_norm": 0.8076491951942444, "learning_rate": 1.6066701496278633e-05, "loss": 0.0843, "step": 5628 }, { "epoch": 0.94, "grad_norm": 0.8318204879760742, "learning_rate": 1.6065265933192344e-05, "loss": 0.0839, "step": 5629 }, { "epoch": 0.94, "grad_norm": 0.5881622433662415, "learning_rate": 1.6063830172340365e-05, "loss": 0.0729, "step": 5630 }, { "epoch": 0.94, "grad_norm": 0.5902923345565796, "learning_rate": 1.6062394213769517e-05, "loss": 0.0723, "step": 5631 }, { "epoch": 0.94, "grad_norm": 0.6113845705986023, "learning_rate": 1.606095805752662e-05, "loss": 0.0904, "step": 5632 }, { "epoch": 0.94, "grad_norm": 0.8133909106254578, "learning_rate": 1.6059521703658504e-05, "loss": 0.0953, "step": 5633 }, { "epoch": 0.94, "grad_norm": 0.6688376069068909, "learning_rate": 1.6058085152211997e-05, "loss": 0.0915, "step": 5634 }, { "epoch": 0.94, "grad_norm": 0.5918256044387817, "learning_rate": 1.6056648403233944e-05, "loss": 0.0891, "step": 5635 }, { "epoch": 0.94, "grad_norm": 0.5143628716468811, "learning_rate": 1.6055211456771192e-05, "loss": 0.0623, "step": 5636 }, { "epoch": 0.94, "grad_norm": 1.220626711845398, "learning_rate": 1.6053774312870594e-05, "loss": 0.1, "step": 5637 }, { "epoch": 0.94, "grad_norm": 0.7467254400253296, "learning_rate": 1.6052336971579005e-05, "loss": 0.0666, "step": 5638 }, { "epoch": 0.94, "grad_norm": 0.5687249898910522, "learning_rate": 1.60508994329433e-05, "loss": 0.0764, "step": 5639 }, { "epoch": 0.94, "grad_norm": 0.4694334864616394, "learning_rate": 1.6049461697010347e-05, "loss": 0.0559, "step": 5640 }, { "epoch": 0.94, "grad_norm": 0.8160822987556458, "learning_rate": 1.6048023763827026e-05, "loss": 0.0876, "step": 5641 }, { "epoch": 0.94, "grad_norm": 0.6157384514808655, "learning_rate": 1.6046585633440222e-05, "loss": 0.082, "step": 5642 }, { "epoch": 0.94, "grad_norm": 0.6979514956474304, "learning_rate": 1.604514730589683e-05, "loss": 0.0789, "step": 5643 }, { "epoch": 0.94, "grad_norm": 0.5029453039169312, "learning_rate": 1.6043708781243745e-05, "loss": 0.0602, "step": 5644 }, { "epoch": 0.94, "grad_norm": 0.6307936310768127, "learning_rate": 1.604227005952787e-05, "loss": 0.0634, "step": 5645 }, { "epoch": 0.94, "grad_norm": 0.6395432949066162, "learning_rate": 1.6040831140796123e-05, "loss": 0.0787, "step": 5646 }, { "epoch": 0.94, "grad_norm": 0.6074640154838562, "learning_rate": 1.6039392025095417e-05, "loss": 0.1109, "step": 5647 }, { "epoch": 0.94, "grad_norm": 0.6109057068824768, "learning_rate": 1.6037952712472673e-05, "loss": 0.0791, "step": 5648 }, { "epoch": 0.94, "grad_norm": 0.685728132724762, "learning_rate": 1.603651320297483e-05, "loss": 0.0665, "step": 5649 }, { "epoch": 0.94, "grad_norm": 0.5377321839332581, "learning_rate": 1.6035073496648822e-05, "loss": 0.068, "step": 5650 }, { "epoch": 0.95, "grad_norm": 0.5852047801017761, "learning_rate": 1.603363359354159e-05, "loss": 0.0523, "step": 5651 }, { "epoch": 0.95, "grad_norm": 0.5116533041000366, "learning_rate": 1.6032193493700085e-05, "loss": 0.0642, "step": 5652 }, { "epoch": 0.95, "grad_norm": 0.558650016784668, "learning_rate": 1.6030753197171263e-05, "loss": 0.0625, "step": 5653 }, { "epoch": 0.95, "grad_norm": 0.7285194993019104, "learning_rate": 1.6029312704002088e-05, "loss": 0.0889, "step": 5654 }, { "epoch": 0.95, "grad_norm": 0.5521958470344543, "learning_rate": 1.6027872014239528e-05, "loss": 0.0666, "step": 5655 }, { "epoch": 0.95, "grad_norm": 0.7152537703514099, "learning_rate": 1.602643112793056e-05, "loss": 0.0663, "step": 5656 }, { "epoch": 0.95, "grad_norm": 0.7512460350990295, "learning_rate": 1.6024990045122163e-05, "loss": 0.0599, "step": 5657 }, { "epoch": 0.95, "grad_norm": 0.7463423609733582, "learning_rate": 1.6023548765861327e-05, "loss": 0.0636, "step": 5658 }, { "epoch": 0.95, "grad_norm": 0.6718271374702454, "learning_rate": 1.602210729019505e-05, "loss": 0.0893, "step": 5659 }, { "epoch": 0.95, "grad_norm": 0.5815229415893555, "learning_rate": 1.6020665618170325e-05, "loss": 0.0669, "step": 5660 }, { "epoch": 0.95, "grad_norm": 0.6869394779205322, "learning_rate": 1.6019223749834165e-05, "loss": 0.1181, "step": 5661 }, { "epoch": 0.95, "grad_norm": 0.677751898765564, "learning_rate": 1.6017781685233587e-05, "loss": 0.0646, "step": 5662 }, { "epoch": 0.95, "grad_norm": 0.5501713752746582, "learning_rate": 1.6016339424415607e-05, "loss": 0.0541, "step": 5663 }, { "epoch": 0.95, "grad_norm": 0.7050647139549255, "learning_rate": 1.601489696742725e-05, "loss": 0.0692, "step": 5664 }, { "epoch": 0.95, "grad_norm": 0.4599023163318634, "learning_rate": 1.6013454314315552e-05, "loss": 0.0531, "step": 5665 }, { "epoch": 0.95, "grad_norm": 0.6340625286102295, "learning_rate": 1.6012011465127555e-05, "loss": 0.0686, "step": 5666 }, { "epoch": 0.95, "grad_norm": 1.0489054918289185, "learning_rate": 1.6010568419910303e-05, "loss": 0.1106, "step": 5667 }, { "epoch": 0.95, "grad_norm": 0.6337026953697205, "learning_rate": 1.6009125178710844e-05, "loss": 0.0838, "step": 5668 }, { "epoch": 0.95, "grad_norm": 0.5620794296264648, "learning_rate": 1.600768174157624e-05, "loss": 0.0647, "step": 5669 }, { "epoch": 0.95, "grad_norm": 0.6562123894691467, "learning_rate": 1.600623810855356e-05, "loss": 0.0683, "step": 5670 }, { "epoch": 0.95, "grad_norm": 0.7345315217971802, "learning_rate": 1.600479427968987e-05, "loss": 0.0852, "step": 5671 }, { "epoch": 0.95, "grad_norm": 0.5332775115966797, "learning_rate": 1.6003350255032245e-05, "loss": 0.0661, "step": 5672 }, { "epoch": 0.95, "grad_norm": 0.7286118865013123, "learning_rate": 1.6001906034627777e-05, "loss": 0.0704, "step": 5673 }, { "epoch": 0.95, "grad_norm": 0.6318448781967163, "learning_rate": 1.6000461618523553e-05, "loss": 0.0591, "step": 5674 }, { "epoch": 0.95, "grad_norm": 0.6163068413734436, "learning_rate": 1.5999017006766675e-05, "loss": 0.07, "step": 5675 }, { "epoch": 0.95, "grad_norm": 0.6719121336936951, "learning_rate": 1.5997572199404235e-05, "loss": 0.0646, "step": 5676 }, { "epoch": 0.95, "grad_norm": 0.4942071735858917, "learning_rate": 1.5996127196483358e-05, "loss": 0.0595, "step": 5677 }, { "epoch": 0.95, "grad_norm": 0.8784149885177612, "learning_rate": 1.599468199805115e-05, "loss": 0.0712, "step": 5678 }, { "epoch": 0.95, "grad_norm": 0.6263311505317688, "learning_rate": 1.599323660415473e-05, "loss": 0.0691, "step": 5679 }, { "epoch": 0.95, "grad_norm": 0.608207643032074, "learning_rate": 1.5991791014841234e-05, "loss": 0.0638, "step": 5680 }, { "epoch": 0.95, "grad_norm": 0.38350534439086914, "learning_rate": 1.5990345230157796e-05, "loss": 0.05, "step": 5681 }, { "epoch": 0.95, "grad_norm": 1.109610915184021, "learning_rate": 1.598889925015156e-05, "loss": 0.08, "step": 5682 }, { "epoch": 0.95, "grad_norm": 0.7683792114257812, "learning_rate": 1.5987453074869666e-05, "loss": 0.0745, "step": 5683 }, { "epoch": 0.95, "grad_norm": 0.6500988602638245, "learning_rate": 1.598600670435928e-05, "loss": 0.0653, "step": 5684 }, { "epoch": 0.95, "grad_norm": 0.6305310130119324, "learning_rate": 1.598456013866755e-05, "loss": 0.0739, "step": 5685 }, { "epoch": 0.95, "grad_norm": 0.6635962724685669, "learning_rate": 1.5983113377841653e-05, "loss": 0.0863, "step": 5686 }, { "epoch": 0.95, "grad_norm": 0.5689963102340698, "learning_rate": 1.5981666421928758e-05, "loss": 0.0895, "step": 5687 }, { "epoch": 0.95, "grad_norm": 0.7513865232467651, "learning_rate": 1.5980219270976047e-05, "loss": 0.0607, "step": 5688 }, { "epoch": 0.95, "grad_norm": 0.6075102686882019, "learning_rate": 1.5978771925030703e-05, "loss": 0.0575, "step": 5689 }, { "epoch": 0.95, "grad_norm": 0.5365842580795288, "learning_rate": 1.597732438413992e-05, "loss": 0.0643, "step": 5690 }, { "epoch": 0.95, "grad_norm": 0.6887415051460266, "learning_rate": 1.59758766483509e-05, "loss": 0.0868, "step": 5691 }, { "epoch": 0.95, "grad_norm": 0.7356839179992676, "learning_rate": 1.5974428717710844e-05, "loss": 0.0744, "step": 5692 }, { "epoch": 0.95, "grad_norm": 0.6704765558242798, "learning_rate": 1.5972980592266965e-05, "loss": 0.0694, "step": 5693 }, { "epoch": 0.95, "grad_norm": 0.5592464208602905, "learning_rate": 1.597153227206648e-05, "loss": 0.0753, "step": 5694 }, { "epoch": 0.95, "grad_norm": 0.6039524674415588, "learning_rate": 1.597008375715662e-05, "loss": 0.0488, "step": 5695 }, { "epoch": 0.95, "grad_norm": 0.5617808103561401, "learning_rate": 1.5968635047584604e-05, "loss": 0.0669, "step": 5696 }, { "epoch": 0.95, "grad_norm": 0.8778820633888245, "learning_rate": 1.5967186143397677e-05, "loss": 0.0873, "step": 5697 }, { "epoch": 0.95, "grad_norm": 0.8697112202644348, "learning_rate": 1.596573704464308e-05, "loss": 0.0799, "step": 5698 }, { "epoch": 0.95, "grad_norm": 0.7132080793380737, "learning_rate": 1.5964287751368067e-05, "loss": 0.0847, "step": 5699 }, { "epoch": 0.95, "grad_norm": 0.6210633516311646, "learning_rate": 1.596283826361989e-05, "loss": 0.0658, "step": 5700 }, { "epoch": 0.95, "grad_norm": 0.6328871846199036, "learning_rate": 1.5961388581445807e-05, "loss": 0.0664, "step": 5701 }, { "epoch": 0.95, "grad_norm": 0.47974687814712524, "learning_rate": 1.5959938704893097e-05, "loss": 0.0498, "step": 5702 }, { "epoch": 0.95, "grad_norm": 0.6844127178192139, "learning_rate": 1.5958488634009027e-05, "loss": 0.061, "step": 5703 }, { "epoch": 0.95, "grad_norm": 0.866346538066864, "learning_rate": 1.5957038368840877e-05, "loss": 0.1104, "step": 5704 }, { "epoch": 0.95, "grad_norm": 0.4806691110134125, "learning_rate": 1.5955587909435943e-05, "loss": 0.0562, "step": 5705 }, { "epoch": 0.95, "grad_norm": 0.7528172135353088, "learning_rate": 1.5954137255841512e-05, "loss": 0.0534, "step": 5706 }, { "epoch": 0.95, "grad_norm": 0.5067823529243469, "learning_rate": 1.595268640810489e-05, "loss": 0.0529, "step": 5707 }, { "epoch": 0.95, "grad_norm": 0.6195541620254517, "learning_rate": 1.595123536627338e-05, "loss": 0.0777, "step": 5708 }, { "epoch": 0.95, "grad_norm": 0.5863826274871826, "learning_rate": 1.5949784130394296e-05, "loss": 0.0779, "step": 5709 }, { "epoch": 0.96, "grad_norm": 0.6907615661621094, "learning_rate": 1.5948332700514955e-05, "loss": 0.0892, "step": 5710 }, { "epoch": 0.96, "grad_norm": 0.6820560097694397, "learning_rate": 1.594688107668269e-05, "loss": 0.079, "step": 5711 }, { "epoch": 0.96, "grad_norm": 0.6794945001602173, "learning_rate": 1.5945429258944824e-05, "loss": 0.0781, "step": 5712 }, { "epoch": 0.96, "grad_norm": 0.6115612983703613, "learning_rate": 1.59439772473487e-05, "loss": 0.0602, "step": 5713 }, { "epoch": 0.96, "grad_norm": 0.45146846771240234, "learning_rate": 1.594252504194166e-05, "loss": 0.0566, "step": 5714 }, { "epoch": 0.96, "grad_norm": 0.6972431540489197, "learning_rate": 1.594107264277106e-05, "loss": 0.0915, "step": 5715 }, { "epoch": 0.96, "grad_norm": 0.7386440634727478, "learning_rate": 1.5939620049884258e-05, "loss": 0.0553, "step": 5716 }, { "epoch": 0.96, "grad_norm": 0.6306697726249695, "learning_rate": 1.5938167263328607e-05, "loss": 0.0851, "step": 5717 }, { "epoch": 0.96, "grad_norm": 0.6388913989067078, "learning_rate": 1.5936714283151484e-05, "loss": 0.0712, "step": 5718 }, { "epoch": 0.96, "grad_norm": 0.5134467482566833, "learning_rate": 1.593526110940027e-05, "loss": 0.0517, "step": 5719 }, { "epoch": 0.96, "grad_norm": 0.6747592687606812, "learning_rate": 1.5933807742122342e-05, "loss": 0.0866, "step": 5720 }, { "epoch": 0.96, "grad_norm": 0.6430930495262146, "learning_rate": 1.5932354181365086e-05, "loss": 0.0859, "step": 5721 }, { "epoch": 0.96, "grad_norm": 0.6116566061973572, "learning_rate": 1.5930900427175904e-05, "loss": 0.0712, "step": 5722 }, { "epoch": 0.96, "grad_norm": 0.5131745934486389, "learning_rate": 1.5929446479602194e-05, "loss": 0.0662, "step": 5723 }, { "epoch": 0.96, "grad_norm": 0.5444244146347046, "learning_rate": 1.5927992338691364e-05, "loss": 0.0742, "step": 5724 }, { "epoch": 0.96, "grad_norm": 0.6007295846939087, "learning_rate": 1.5926538004490828e-05, "loss": 0.0788, "step": 5725 }, { "epoch": 0.96, "grad_norm": 0.8777145147323608, "learning_rate": 1.5925083477048005e-05, "loss": 0.0997, "step": 5726 }, { "epoch": 0.96, "grad_norm": 0.9692882895469666, "learning_rate": 1.5923628756410326e-05, "loss": 0.0855, "step": 5727 }, { "epoch": 0.96, "grad_norm": 0.7052194476127625, "learning_rate": 1.5922173842625223e-05, "loss": 0.0796, "step": 5728 }, { "epoch": 0.96, "grad_norm": 0.7339844107627869, "learning_rate": 1.592071873574013e-05, "loss": 0.0649, "step": 5729 }, { "epoch": 0.96, "grad_norm": 0.6262183785438538, "learning_rate": 1.5919263435802497e-05, "loss": 0.0862, "step": 5730 }, { "epoch": 0.96, "grad_norm": 0.49046018719673157, "learning_rate": 1.5917807942859775e-05, "loss": 0.048, "step": 5731 }, { "epoch": 0.96, "grad_norm": 0.7279825806617737, "learning_rate": 1.5916352256959423e-05, "loss": 0.0791, "step": 5732 }, { "epoch": 0.96, "grad_norm": 0.5751273036003113, "learning_rate": 1.5914896378148906e-05, "loss": 0.0682, "step": 5733 }, { "epoch": 0.96, "grad_norm": 0.4846772849559784, "learning_rate": 1.5913440306475692e-05, "loss": 0.0779, "step": 5734 }, { "epoch": 0.96, "grad_norm": 0.6116702556610107, "learning_rate": 1.591198404198726e-05, "loss": 0.0763, "step": 5735 }, { "epoch": 0.96, "grad_norm": 0.9054170846939087, "learning_rate": 1.5910527584731092e-05, "loss": 0.0891, "step": 5736 }, { "epoch": 0.96, "grad_norm": 0.6492041945457458, "learning_rate": 1.5909070934754686e-05, "loss": 0.0685, "step": 5737 }, { "epoch": 0.96, "grad_norm": 0.5188426971435547, "learning_rate": 1.590761409210552e-05, "loss": 0.0703, "step": 5738 }, { "epoch": 0.96, "grad_norm": 0.5422227382659912, "learning_rate": 1.5906157056831112e-05, "loss": 0.0707, "step": 5739 }, { "epoch": 0.96, "grad_norm": 0.651280403137207, "learning_rate": 1.5904699828978966e-05, "loss": 0.0852, "step": 5740 }, { "epoch": 0.96, "grad_norm": 0.9433550238609314, "learning_rate": 1.5903242408596597e-05, "loss": 0.0793, "step": 5741 }, { "epoch": 0.96, "grad_norm": 0.49454307556152344, "learning_rate": 1.590178479573152e-05, "loss": 0.0829, "step": 5742 }, { "epoch": 0.96, "grad_norm": 0.6484120488166809, "learning_rate": 1.5900326990431274e-05, "loss": 0.0596, "step": 5743 }, { "epoch": 0.96, "grad_norm": 0.6527848839759827, "learning_rate": 1.589886899274338e-05, "loss": 0.0875, "step": 5744 }, { "epoch": 0.96, "grad_norm": 0.6016010046005249, "learning_rate": 1.5897410802715386e-05, "loss": 0.0549, "step": 5745 }, { "epoch": 0.96, "grad_norm": 0.6656273603439331, "learning_rate": 1.589595242039484e-05, "loss": 0.0672, "step": 5746 }, { "epoch": 0.96, "grad_norm": 0.5548568964004517, "learning_rate": 1.5894493845829286e-05, "loss": 0.056, "step": 5747 }, { "epoch": 0.96, "grad_norm": 0.766450822353363, "learning_rate": 1.5893035079066285e-05, "loss": 0.0783, "step": 5748 }, { "epoch": 0.96, "grad_norm": 0.5879281163215637, "learning_rate": 1.589157612015341e-05, "loss": 0.0818, "step": 5749 }, { "epoch": 0.96, "grad_norm": 0.629956066608429, "learning_rate": 1.589011696913822e-05, "loss": 0.0672, "step": 5750 }, { "epoch": 0.96, "grad_norm": 0.6413252949714661, "learning_rate": 1.5888657626068304e-05, "loss": 0.0846, "step": 5751 }, { "epoch": 0.96, "grad_norm": 0.6978940367698669, "learning_rate": 1.588719809099124e-05, "loss": 0.0875, "step": 5752 }, { "epoch": 0.96, "grad_norm": 0.8344597220420837, "learning_rate": 1.5885738363954618e-05, "loss": 0.1025, "step": 5753 }, { "epoch": 0.96, "grad_norm": 0.44249406456947327, "learning_rate": 1.588427844500603e-05, "loss": 0.0638, "step": 5754 }, { "epoch": 0.96, "grad_norm": 0.6301674842834473, "learning_rate": 1.5882818334193088e-05, "loss": 0.048, "step": 5755 }, { "epoch": 0.96, "grad_norm": 0.5765949487686157, "learning_rate": 1.5881358031563397e-05, "loss": 0.0563, "step": 5756 }, { "epoch": 0.96, "grad_norm": 0.540051281452179, "learning_rate": 1.5879897537164567e-05, "loss": 0.0757, "step": 5757 }, { "epoch": 0.96, "grad_norm": 0.5022540092468262, "learning_rate": 1.5878436851044226e-05, "loss": 0.0699, "step": 5758 }, { "epoch": 0.96, "grad_norm": 0.7261889576911926, "learning_rate": 1.5876975973249996e-05, "loss": 0.0808, "step": 5759 }, { "epoch": 0.96, "grad_norm": 0.8991665840148926, "learning_rate": 1.5875514903829517e-05, "loss": 0.0949, "step": 5760 }, { "epoch": 0.96, "grad_norm": 0.5932690501213074, "learning_rate": 1.5874053642830426e-05, "loss": 0.0747, "step": 5761 }, { "epoch": 0.96, "grad_norm": 0.7637264132499695, "learning_rate": 1.5872592190300364e-05, "loss": 0.0681, "step": 5762 }, { "epoch": 0.96, "grad_norm": 0.6056786179542542, "learning_rate": 1.587113054628699e-05, "loss": 0.0398, "step": 5763 }, { "epoch": 0.96, "grad_norm": 0.5586113333702087, "learning_rate": 1.586966871083796e-05, "loss": 0.0567, "step": 5764 }, { "epoch": 0.96, "grad_norm": 0.5557774305343628, "learning_rate": 1.5868206684000946e-05, "loss": 0.0631, "step": 5765 }, { "epoch": 0.96, "grad_norm": 0.714209794998169, "learning_rate": 1.586674446582361e-05, "loss": 0.0814, "step": 5766 }, { "epoch": 0.96, "grad_norm": 0.6299823522567749, "learning_rate": 1.586528205635363e-05, "loss": 0.0658, "step": 5767 }, { "epoch": 0.96, "grad_norm": 0.7655079960823059, "learning_rate": 1.5863819455638698e-05, "loss": 0.0714, "step": 5768 }, { "epoch": 0.96, "grad_norm": 0.7905228734016418, "learning_rate": 1.5862356663726495e-05, "loss": 0.0855, "step": 5769 }, { "epoch": 0.97, "grad_norm": 0.6406217217445374, "learning_rate": 1.586089368066472e-05, "loss": 0.0666, "step": 5770 }, { "epoch": 0.97, "grad_norm": 0.6570181250572205, "learning_rate": 1.5859430506501076e-05, "loss": 0.0829, "step": 5771 }, { "epoch": 0.97, "grad_norm": 0.5090401768684387, "learning_rate": 1.5857967141283276e-05, "loss": 0.0712, "step": 5772 }, { "epoch": 0.97, "grad_norm": 1.1763877868652344, "learning_rate": 1.5856503585059028e-05, "loss": 0.0728, "step": 5773 }, { "epoch": 0.97, "grad_norm": 0.6277832984924316, "learning_rate": 1.5855039837876053e-05, "loss": 0.0616, "step": 5774 }, { "epoch": 0.97, "grad_norm": 0.5494105219841003, "learning_rate": 1.585357589978208e-05, "loss": 0.0763, "step": 5775 }, { "epoch": 0.97, "grad_norm": 0.8017040491104126, "learning_rate": 1.585211177082485e-05, "loss": 0.0843, "step": 5776 }, { "epoch": 0.97, "grad_norm": 0.65751713514328, "learning_rate": 1.5850647451052094e-05, "loss": 0.0658, "step": 5777 }, { "epoch": 0.97, "grad_norm": 0.4824751317501068, "learning_rate": 1.584918294051156e-05, "loss": 0.0668, "step": 5778 }, { "epoch": 0.97, "grad_norm": 1.5234876871109009, "learning_rate": 1.5847718239251e-05, "loss": 0.102, "step": 5779 }, { "epoch": 0.97, "grad_norm": 0.6719114780426025, "learning_rate": 1.584625334731817e-05, "loss": 0.0927, "step": 5780 }, { "epoch": 0.97, "grad_norm": 0.7591649889945984, "learning_rate": 1.5844788264760844e-05, "loss": 0.0783, "step": 5781 }, { "epoch": 0.97, "grad_norm": 0.5899299383163452, "learning_rate": 1.5843322991626785e-05, "loss": 0.0643, "step": 5782 }, { "epoch": 0.97, "grad_norm": 0.715392529964447, "learning_rate": 1.5841857527963768e-05, "loss": 0.0675, "step": 5783 }, { "epoch": 0.97, "grad_norm": 0.5830076336860657, "learning_rate": 1.5840391873819582e-05, "loss": 0.071, "step": 5784 }, { "epoch": 0.97, "grad_norm": 0.614403486251831, "learning_rate": 1.5838926029242013e-05, "loss": 0.0666, "step": 5785 }, { "epoch": 0.97, "grad_norm": 0.4522773027420044, "learning_rate": 1.5837459994278864e-05, "loss": 0.0601, "step": 5786 }, { "epoch": 0.97, "grad_norm": 0.47208282351493835, "learning_rate": 1.5835993768977926e-05, "loss": 0.0804, "step": 5787 }, { "epoch": 0.97, "grad_norm": 0.4155557453632355, "learning_rate": 1.5834527353387014e-05, "loss": 0.0563, "step": 5788 }, { "epoch": 0.97, "grad_norm": 0.5996929407119751, "learning_rate": 1.583306074755394e-05, "loss": 0.0703, "step": 5789 }, { "epoch": 0.97, "grad_norm": 0.806179940700531, "learning_rate": 1.5831593951526523e-05, "loss": 0.1119, "step": 5790 }, { "epoch": 0.97, "grad_norm": 0.6375572085380554, "learning_rate": 1.5830126965352595e-05, "loss": 0.0679, "step": 5791 }, { "epoch": 0.97, "grad_norm": 0.5311498045921326, "learning_rate": 1.5828659789079984e-05, "loss": 0.093, "step": 5792 }, { "epoch": 0.97, "grad_norm": 0.7822182774543762, "learning_rate": 1.5827192422756534e-05, "loss": 0.0716, "step": 5793 }, { "epoch": 0.97, "grad_norm": 0.6317006945610046, "learning_rate": 1.5825724866430085e-05, "loss": 0.0721, "step": 5794 }, { "epoch": 0.97, "grad_norm": 0.6312989592552185, "learning_rate": 1.582425712014849e-05, "loss": 0.067, "step": 5795 }, { "epoch": 0.97, "grad_norm": 0.8593655228614807, "learning_rate": 1.582278918395961e-05, "loss": 0.1, "step": 5796 }, { "epoch": 0.97, "grad_norm": 0.7019772529602051, "learning_rate": 1.5821321057911305e-05, "loss": 0.0729, "step": 5797 }, { "epoch": 0.97, "grad_norm": 0.6334748268127441, "learning_rate": 1.5819852742051446e-05, "loss": 0.1179, "step": 5798 }, { "epoch": 0.97, "grad_norm": 0.9639840126037598, "learning_rate": 1.581838423642791e-05, "loss": 0.0598, "step": 5799 }, { "epoch": 0.97, "grad_norm": 0.9696192145347595, "learning_rate": 1.5816915541088582e-05, "loss": 0.0712, "step": 5800 }, { "epoch": 0.97, "grad_norm": 0.5699067115783691, "learning_rate": 1.5815446656081347e-05, "loss": 0.0726, "step": 5801 }, { "epoch": 0.97, "grad_norm": 0.659392774105072, "learning_rate": 1.5813977581454096e-05, "loss": 0.0736, "step": 5802 }, { "epoch": 0.97, "grad_norm": 0.6707895398139954, "learning_rate": 1.5812508317254744e-05, "loss": 0.0931, "step": 5803 }, { "epoch": 0.97, "grad_norm": 0.6227641701698303, "learning_rate": 1.5811038863531183e-05, "loss": 0.0565, "step": 5804 }, { "epoch": 0.97, "grad_norm": 0.8389121890068054, "learning_rate": 1.5809569220331332e-05, "loss": 0.0822, "step": 5805 }, { "epoch": 0.97, "grad_norm": 0.7246707081794739, "learning_rate": 1.5808099387703113e-05, "loss": 0.0614, "step": 5806 }, { "epoch": 0.97, "grad_norm": 0.5290038585662842, "learning_rate": 1.5806629365694452e-05, "loss": 0.0547, "step": 5807 }, { "epoch": 0.97, "grad_norm": 0.6175944805145264, "learning_rate": 1.5805159154353273e-05, "loss": 0.0693, "step": 5808 }, { "epoch": 0.97, "grad_norm": 0.5530907511711121, "learning_rate": 1.5803688753727528e-05, "loss": 0.057, "step": 5809 }, { "epoch": 0.97, "grad_norm": 0.6189440488815308, "learning_rate": 1.580221816386515e-05, "loss": 0.0818, "step": 5810 }, { "epoch": 0.97, "grad_norm": 0.5443224906921387, "learning_rate": 1.580074738481409e-05, "loss": 0.0492, "step": 5811 }, { "epoch": 0.97, "grad_norm": 0.7577109336853027, "learning_rate": 1.5799276416622307e-05, "loss": 0.0948, "step": 5812 }, { "epoch": 0.97, "grad_norm": 0.5595210194587708, "learning_rate": 1.5797805259337765e-05, "loss": 0.0671, "step": 5813 }, { "epoch": 0.97, "grad_norm": 0.7453737854957581, "learning_rate": 1.579633391300844e-05, "loss": 0.0936, "step": 5814 }, { "epoch": 0.97, "grad_norm": 0.6373938918113708, "learning_rate": 1.579486237768229e-05, "loss": 0.0764, "step": 5815 }, { "epoch": 0.97, "grad_norm": 0.6566736698150635, "learning_rate": 1.5793390653407304e-05, "loss": 0.0938, "step": 5816 }, { "epoch": 0.97, "grad_norm": 0.7210596799850464, "learning_rate": 1.5791918740231477e-05, "loss": 0.0996, "step": 5817 }, { "epoch": 0.97, "grad_norm": 0.5515421032905579, "learning_rate": 1.5790446638202795e-05, "loss": 0.0555, "step": 5818 }, { "epoch": 0.97, "grad_norm": 0.5397937893867493, "learning_rate": 1.5788974347369258e-05, "loss": 0.0677, "step": 5819 }, { "epoch": 0.97, "grad_norm": 0.5712437033653259, "learning_rate": 1.578750186777887e-05, "loss": 0.0812, "step": 5820 }, { "epoch": 0.97, "grad_norm": 0.9025179147720337, "learning_rate": 1.578602919947965e-05, "loss": 0.0561, "step": 5821 }, { "epoch": 0.97, "grad_norm": 0.5707396864891052, "learning_rate": 1.5784556342519613e-05, "loss": 0.0622, "step": 5822 }, { "epoch": 0.97, "grad_norm": 0.5388628244400024, "learning_rate": 1.578308329694678e-05, "loss": 0.0758, "step": 5823 }, { "epoch": 0.97, "grad_norm": 0.5675376653671265, "learning_rate": 1.578161006280919e-05, "loss": 0.0686, "step": 5824 }, { "epoch": 0.97, "grad_norm": 0.5949831604957581, "learning_rate": 1.578013664015487e-05, "loss": 0.0723, "step": 5825 }, { "epoch": 0.97, "grad_norm": 0.8239988684654236, "learning_rate": 1.5778663029031865e-05, "loss": 0.0663, "step": 5826 }, { "epoch": 0.97, "grad_norm": 0.4739333987236023, "learning_rate": 1.5777189229488227e-05, "loss": 0.0605, "step": 5827 }, { "epoch": 0.97, "grad_norm": 0.6627941727638245, "learning_rate": 1.577571524157201e-05, "loss": 0.0732, "step": 5828 }, { "epoch": 0.97, "grad_norm": 0.6716079115867615, "learning_rate": 1.577424106533128e-05, "loss": 0.0989, "step": 5829 }, { "epoch": 0.98, "grad_norm": 0.6329681873321533, "learning_rate": 1.577276670081409e-05, "loss": 0.0696, "step": 5830 }, { "epoch": 0.98, "grad_norm": 0.592426061630249, "learning_rate": 1.5771292148068533e-05, "loss": 0.0763, "step": 5831 }, { "epoch": 0.98, "grad_norm": 0.6668137311935425, "learning_rate": 1.5769817407142672e-05, "loss": 0.0664, "step": 5832 }, { "epoch": 0.98, "grad_norm": 0.7089515924453735, "learning_rate": 1.57683424780846e-05, "loss": 0.0725, "step": 5833 }, { "epoch": 0.98, "grad_norm": 0.6305603384971619, "learning_rate": 1.576686736094241e-05, "loss": 0.0655, "step": 5834 }, { "epoch": 0.98, "grad_norm": 0.6390029191970825, "learning_rate": 1.57653920557642e-05, "loss": 0.0851, "step": 5835 }, { "epoch": 0.98, "grad_norm": 0.7463380098342896, "learning_rate": 1.5763916562598068e-05, "loss": 0.1069, "step": 5836 }, { "epoch": 0.98, "grad_norm": 0.5970305800437927, "learning_rate": 1.576244088149213e-05, "loss": 0.0884, "step": 5837 }, { "epoch": 0.98, "grad_norm": 0.502345860004425, "learning_rate": 1.5760965012494503e-05, "loss": 0.0521, "step": 5838 }, { "epoch": 0.98, "grad_norm": 0.5561521053314209, "learning_rate": 1.575948895565331e-05, "loss": 0.0743, "step": 5839 }, { "epoch": 0.98, "grad_norm": 0.5818692445755005, "learning_rate": 1.5758012711016674e-05, "loss": 0.0614, "step": 5840 }, { "epoch": 0.98, "grad_norm": 0.5681845545768738, "learning_rate": 1.5756536278632734e-05, "loss": 0.0723, "step": 5841 }, { "epoch": 0.98, "grad_norm": 0.6218370795249939, "learning_rate": 1.5755059658549633e-05, "loss": 0.0688, "step": 5842 }, { "epoch": 0.98, "grad_norm": 0.7767592668533325, "learning_rate": 1.5753582850815514e-05, "loss": 0.0689, "step": 5843 }, { "epoch": 0.98, "grad_norm": 0.6649380326271057, "learning_rate": 1.575210585547853e-05, "loss": 0.0676, "step": 5844 }, { "epoch": 0.98, "grad_norm": 0.7410458326339722, "learning_rate": 1.575062867258685e-05, "loss": 0.0765, "step": 5845 }, { "epoch": 0.98, "grad_norm": 0.7977293133735657, "learning_rate": 1.5749151302188624e-05, "loss": 0.0827, "step": 5846 }, { "epoch": 0.98, "grad_norm": 1.0399188995361328, "learning_rate": 1.574767374433203e-05, "loss": 0.0707, "step": 5847 }, { "epoch": 0.98, "grad_norm": 0.7104618549346924, "learning_rate": 1.574619599906525e-05, "loss": 0.0659, "step": 5848 }, { "epoch": 0.98, "grad_norm": 0.5675511360168457, "learning_rate": 1.5744718066436465e-05, "loss": 0.0703, "step": 5849 }, { "epoch": 0.98, "grad_norm": 0.6494616866111755, "learning_rate": 1.5743239946493862e-05, "loss": 0.0693, "step": 5850 }, { "epoch": 0.98, "grad_norm": 0.9389921426773071, "learning_rate": 1.5741761639285637e-05, "loss": 0.0914, "step": 5851 }, { "epoch": 0.98, "grad_norm": 0.8023056387901306, "learning_rate": 1.574028314486e-05, "loss": 0.0759, "step": 5852 }, { "epoch": 0.98, "grad_norm": 0.7494672536849976, "learning_rate": 1.5738804463265148e-05, "loss": 0.0793, "step": 5853 }, { "epoch": 0.98, "grad_norm": 0.5038257837295532, "learning_rate": 1.5737325594549306e-05, "loss": 0.0719, "step": 5854 }, { "epoch": 0.98, "grad_norm": 0.5386136174201965, "learning_rate": 1.5735846538760685e-05, "loss": 0.067, "step": 5855 }, { "epoch": 0.98, "grad_norm": 0.6733835339546204, "learning_rate": 1.5734367295947517e-05, "loss": 0.0648, "step": 5856 }, { "epoch": 0.98, "grad_norm": 0.5555366277694702, "learning_rate": 1.5732887866158034e-05, "loss": 0.0875, "step": 5857 }, { "epoch": 0.98, "grad_norm": 0.9329566955566406, "learning_rate": 1.5731408249440475e-05, "loss": 0.0831, "step": 5858 }, { "epoch": 0.98, "grad_norm": 0.6559364199638367, "learning_rate": 1.5729928445843086e-05, "loss": 0.0822, "step": 5859 }, { "epoch": 0.98, "grad_norm": 0.6109301447868347, "learning_rate": 1.572844845541411e-05, "loss": 0.091, "step": 5860 }, { "epoch": 0.98, "grad_norm": 0.5861473679542542, "learning_rate": 1.572696827820181e-05, "loss": 0.0508, "step": 5861 }, { "epoch": 0.98, "grad_norm": 0.7112137675285339, "learning_rate": 1.5725487914254453e-05, "loss": 0.0886, "step": 5862 }, { "epoch": 0.98, "grad_norm": 0.6830494999885559, "learning_rate": 1.57240073636203e-05, "loss": 0.0633, "step": 5863 }, { "epoch": 0.98, "grad_norm": 1.0840091705322266, "learning_rate": 1.572252662634763e-05, "loss": 0.0874, "step": 5864 }, { "epoch": 0.98, "grad_norm": 0.8166729807853699, "learning_rate": 1.5721045702484726e-05, "loss": 0.073, "step": 5865 }, { "epoch": 0.98, "grad_norm": 0.5499355792999268, "learning_rate": 1.5719564592079875e-05, "loss": 0.0827, "step": 5866 }, { "epoch": 0.98, "grad_norm": 0.8795605897903442, "learning_rate": 1.5718083295181365e-05, "loss": 0.0781, "step": 5867 }, { "epoch": 0.98, "grad_norm": 0.964809000492096, "learning_rate": 1.57166018118375e-05, "loss": 0.0822, "step": 5868 }, { "epoch": 0.98, "grad_norm": 0.6240335702896118, "learning_rate": 1.5715120142096586e-05, "loss": 0.0609, "step": 5869 }, { "epoch": 0.98, "grad_norm": 0.9049032926559448, "learning_rate": 1.5713638286006938e-05, "loss": 0.0752, "step": 5870 }, { "epoch": 0.98, "grad_norm": 0.8130543231964111, "learning_rate": 1.571215624361687e-05, "loss": 0.088, "step": 5871 }, { "epoch": 0.98, "grad_norm": 1.0164166688919067, "learning_rate": 1.57106740149747e-05, "loss": 0.0692, "step": 5872 }, { "epoch": 0.98, "grad_norm": 0.702393114566803, "learning_rate": 1.570919160012877e-05, "loss": 0.0975, "step": 5873 }, { "epoch": 0.98, "grad_norm": 0.7291963696479797, "learning_rate": 1.5707708999127404e-05, "loss": 0.0861, "step": 5874 }, { "epoch": 0.98, "grad_norm": 0.6653562188148499, "learning_rate": 1.5706226212018953e-05, "loss": 0.0786, "step": 5875 }, { "epoch": 0.98, "grad_norm": 0.572929859161377, "learning_rate": 1.570474323885176e-05, "loss": 0.054, "step": 5876 }, { "epoch": 0.98, "grad_norm": 0.8761343955993652, "learning_rate": 1.5703260079674185e-05, "loss": 0.0804, "step": 5877 }, { "epoch": 0.98, "grad_norm": 0.5920180082321167, "learning_rate": 1.5701776734534582e-05, "loss": 0.0546, "step": 5878 }, { "epoch": 0.98, "grad_norm": 0.522061824798584, "learning_rate": 1.570029320348132e-05, "loss": 0.0584, "step": 5879 }, { "epoch": 0.98, "grad_norm": 0.7357649803161621, "learning_rate": 1.5698809486562773e-05, "loss": 0.0808, "step": 5880 }, { "epoch": 0.98, "grad_norm": 0.6645908951759338, "learning_rate": 1.5697325583827313e-05, "loss": 0.07, "step": 5881 }, { "epoch": 0.98, "grad_norm": 0.7199094891548157, "learning_rate": 1.569584149532333e-05, "loss": 0.069, "step": 5882 }, { "epoch": 0.98, "grad_norm": 0.7183393836021423, "learning_rate": 1.569435722109922e-05, "loss": 0.0844, "step": 5883 }, { "epoch": 0.98, "grad_norm": 0.5975900292396545, "learning_rate": 1.569287276120337e-05, "loss": 0.0643, "step": 5884 }, { "epoch": 0.98, "grad_norm": 0.7074434161186218, "learning_rate": 1.5691388115684186e-05, "loss": 0.0607, "step": 5885 }, { "epoch": 0.98, "grad_norm": 0.6249862313270569, "learning_rate": 1.5689903284590075e-05, "loss": 0.0784, "step": 5886 }, { "epoch": 0.98, "grad_norm": 0.7871512770652771, "learning_rate": 1.5688418267969457e-05, "loss": 0.08, "step": 5887 }, { "epoch": 0.98, "grad_norm": 0.6753396987915039, "learning_rate": 1.5686933065870748e-05, "loss": 0.0694, "step": 5888 }, { "epoch": 0.98, "grad_norm": 0.717950701713562, "learning_rate": 1.5685447678342374e-05, "loss": 0.0985, "step": 5889 }, { "epoch": 0.99, "grad_norm": 0.7866508960723877, "learning_rate": 1.5683962105432773e-05, "loss": 0.0878, "step": 5890 }, { "epoch": 0.99, "grad_norm": 0.5633660554885864, "learning_rate": 1.5682476347190384e-05, "loss": 0.0709, "step": 5891 }, { "epoch": 0.99, "grad_norm": 0.754514753818512, "learning_rate": 1.5680990403663645e-05, "loss": 0.0885, "step": 5892 }, { "epoch": 0.99, "grad_norm": 0.650505542755127, "learning_rate": 1.5679504274901015e-05, "loss": 0.0688, "step": 5893 }, { "epoch": 0.99, "grad_norm": 0.6400406956672668, "learning_rate": 1.5678017960950945e-05, "loss": 0.0624, "step": 5894 }, { "epoch": 0.99, "grad_norm": 0.5851660966873169, "learning_rate": 1.5676531461861902e-05, "loss": 0.0706, "step": 5895 }, { "epoch": 0.99, "grad_norm": 0.6219439506530762, "learning_rate": 1.5675044777682354e-05, "loss": 0.0692, "step": 5896 }, { "epoch": 0.99, "grad_norm": 0.5969982743263245, "learning_rate": 1.5673557908460774e-05, "loss": 0.0722, "step": 5897 }, { "epoch": 0.99, "grad_norm": 0.6311523914337158, "learning_rate": 1.5672070854245646e-05, "loss": 0.0667, "step": 5898 }, { "epoch": 0.99, "grad_norm": 0.9843969941139221, "learning_rate": 1.567058361508546e-05, "loss": 0.0727, "step": 5899 }, { "epoch": 0.99, "grad_norm": 0.7922382354736328, "learning_rate": 1.5669096191028702e-05, "loss": 0.0638, "step": 5900 }, { "epoch": 0.99, "grad_norm": 1.5023307800292969, "learning_rate": 1.5667608582123878e-05, "loss": 0.0688, "step": 5901 }, { "epoch": 0.99, "grad_norm": 0.9352686405181885, "learning_rate": 1.566612078841949e-05, "loss": 0.1075, "step": 5902 }, { "epoch": 0.99, "grad_norm": 0.6338685154914856, "learning_rate": 1.566463280996405e-05, "loss": 0.0671, "step": 5903 }, { "epoch": 0.99, "grad_norm": 0.8654293417930603, "learning_rate": 1.5663144646806078e-05, "loss": 0.0581, "step": 5904 }, { "epoch": 0.99, "grad_norm": 0.6171883344650269, "learning_rate": 1.566165629899409e-05, "loss": 0.0591, "step": 5905 }, { "epoch": 0.99, "grad_norm": 0.8520505428314209, "learning_rate": 1.5660167766576628e-05, "loss": 0.0881, "step": 5906 }, { "epoch": 0.99, "grad_norm": 0.5190403461456299, "learning_rate": 1.565867904960221e-05, "loss": 0.065, "step": 5907 }, { "epoch": 0.99, "grad_norm": 0.9662407040596008, "learning_rate": 1.5657190148119397e-05, "loss": 0.1085, "step": 5908 }, { "epoch": 0.99, "grad_norm": 0.5467478632926941, "learning_rate": 1.5655701062176724e-05, "loss": 0.0478, "step": 5909 }, { "epoch": 0.99, "grad_norm": 0.7500823736190796, "learning_rate": 1.5654211791822744e-05, "loss": 0.0637, "step": 5910 }, { "epoch": 0.99, "grad_norm": 0.6786803603172302, "learning_rate": 1.5652722337106025e-05, "loss": 0.0913, "step": 5911 }, { "epoch": 0.99, "grad_norm": 1.0168099403381348, "learning_rate": 1.5651232698075123e-05, "loss": 0.0809, "step": 5912 }, { "epoch": 0.99, "grad_norm": 0.44653749465942383, "learning_rate": 1.5649742874778618e-05, "loss": 0.0493, "step": 5913 }, { "epoch": 0.99, "grad_norm": 0.8227493762969971, "learning_rate": 1.564825286726508e-05, "loss": 0.0713, "step": 5914 }, { "epoch": 0.99, "grad_norm": 0.48058146238327026, "learning_rate": 1.5646762675583098e-05, "loss": 0.0543, "step": 5915 }, { "epoch": 0.99, "grad_norm": 0.5345425009727478, "learning_rate": 1.564527229978126e-05, "loss": 0.0557, "step": 5916 }, { "epoch": 0.99, "grad_norm": 0.4924672544002533, "learning_rate": 1.5643781739908168e-05, "loss": 0.0673, "step": 5917 }, { "epoch": 0.99, "grad_norm": 0.7888213992118835, "learning_rate": 1.564229099601241e-05, "loss": 0.1012, "step": 5918 }, { "epoch": 0.99, "grad_norm": 0.6454260945320129, "learning_rate": 1.5640800068142602e-05, "loss": 0.0558, "step": 5919 }, { "epoch": 0.99, "grad_norm": 1.480852484703064, "learning_rate": 1.5639308956347353e-05, "loss": 0.0801, "step": 5920 }, { "epoch": 0.99, "grad_norm": 0.6351490020751953, "learning_rate": 1.563781766067529e-05, "loss": 0.0682, "step": 5921 }, { "epoch": 0.99, "grad_norm": 0.6340401768684387, "learning_rate": 1.5636326181175034e-05, "loss": 0.0753, "step": 5922 }, { "epoch": 0.99, "grad_norm": 0.6524255871772766, "learning_rate": 1.563483451789522e-05, "loss": 0.0644, "step": 5923 }, { "epoch": 0.99, "grad_norm": 0.6863978505134583, "learning_rate": 1.5633342670884477e-05, "loss": 0.0633, "step": 5924 }, { "epoch": 0.99, "grad_norm": 0.4937788248062134, "learning_rate": 1.5631850640191457e-05, "loss": 0.0632, "step": 5925 }, { "epoch": 0.99, "grad_norm": 0.5342184901237488, "learning_rate": 1.563035842586481e-05, "loss": 0.0647, "step": 5926 }, { "epoch": 0.99, "grad_norm": 0.6803490519523621, "learning_rate": 1.5628866027953184e-05, "loss": 0.0811, "step": 5927 }, { "epoch": 0.99, "grad_norm": 0.8669490218162537, "learning_rate": 1.5627373446505244e-05, "loss": 0.0546, "step": 5928 }, { "epoch": 0.99, "grad_norm": 0.6698249578475952, "learning_rate": 1.562588068156966e-05, "loss": 0.082, "step": 5929 }, { "epoch": 0.99, "grad_norm": 0.731353759765625, "learning_rate": 1.5624387733195104e-05, "loss": 0.0827, "step": 5930 }, { "epoch": 0.99, "grad_norm": 0.5834861993789673, "learning_rate": 1.5622894601430258e-05, "loss": 0.0768, "step": 5931 }, { "epoch": 0.99, "grad_norm": 0.6963982582092285, "learning_rate": 1.5621401286323803e-05, "loss": 0.0886, "step": 5932 }, { "epoch": 0.99, "grad_norm": 0.7662914395332336, "learning_rate": 1.5619907787924432e-05, "loss": 0.0898, "step": 5933 }, { "epoch": 0.99, "grad_norm": 0.9150437116622925, "learning_rate": 1.561841410628084e-05, "loss": 0.0611, "step": 5934 }, { "epoch": 0.99, "grad_norm": 0.6148656010627747, "learning_rate": 1.5616920241441735e-05, "loss": 0.0734, "step": 5935 }, { "epoch": 0.99, "grad_norm": 0.8787457346916199, "learning_rate": 1.5615426193455827e-05, "loss": 0.0833, "step": 5936 }, { "epoch": 0.99, "grad_norm": 0.6139631867408752, "learning_rate": 1.5613931962371826e-05, "loss": 0.0709, "step": 5937 }, { "epoch": 0.99, "grad_norm": 0.6588608622550964, "learning_rate": 1.5612437548238454e-05, "loss": 0.0693, "step": 5938 }, { "epoch": 0.99, "grad_norm": 0.7933009266853333, "learning_rate": 1.5610942951104445e-05, "loss": 0.0898, "step": 5939 }, { "epoch": 0.99, "grad_norm": 0.5713435411453247, "learning_rate": 1.5609448171018525e-05, "loss": 0.0631, "step": 5940 }, { "epoch": 0.99, "grad_norm": 0.6175626516342163, "learning_rate": 1.5607953208029437e-05, "loss": 0.0738, "step": 5941 }, { "epoch": 0.99, "grad_norm": 0.4390566945075989, "learning_rate": 1.5606458062185922e-05, "loss": 0.0784, "step": 5942 }, { "epoch": 0.99, "grad_norm": 2.9137158393859863, "learning_rate": 1.5604962733536736e-05, "loss": 0.0777, "step": 5943 }, { "epoch": 0.99, "grad_norm": 0.6806554198265076, "learning_rate": 1.5603467222130633e-05, "loss": 0.0659, "step": 5944 }, { "epoch": 0.99, "grad_norm": 1.7960071563720703, "learning_rate": 1.5601971528016376e-05, "loss": 0.0784, "step": 5945 }, { "epoch": 0.99, "grad_norm": 0.857782781124115, "learning_rate": 1.560047565124274e-05, "loss": 0.1062, "step": 5946 }, { "epoch": 0.99, "grad_norm": 0.8004257082939148, "learning_rate": 1.5598979591858495e-05, "loss": 0.0984, "step": 5947 }, { "epoch": 0.99, "grad_norm": 0.670010507106781, "learning_rate": 1.5597483349912415e-05, "loss": 0.0699, "step": 5948 }, { "epoch": 0.99, "grad_norm": 0.9563024044036865, "learning_rate": 1.55959869254533e-05, "loss": 0.0656, "step": 5949 }, { "epoch": 1.0, "grad_norm": 0.6677714586257935, "learning_rate": 1.5594490318529932e-05, "loss": 0.076, "step": 5950 }, { "epoch": 1.0, "grad_norm": 0.5623463988304138, "learning_rate": 1.559299352919112e-05, "loss": 0.0862, "step": 5951 }, { "epoch": 1.0, "grad_norm": 0.549026370048523, "learning_rate": 1.5591496557485658e-05, "loss": 0.0477, "step": 5952 }, { "epoch": 1.0, "grad_norm": 0.5958946347236633, "learning_rate": 1.5589999403462364e-05, "loss": 0.0879, "step": 5953 }, { "epoch": 1.0, "grad_norm": 0.7559699416160583, "learning_rate": 1.5588502067170055e-05, "loss": 0.0742, "step": 5954 }, { "epoch": 1.0, "grad_norm": 2.700312852859497, "learning_rate": 1.558700454865755e-05, "loss": 0.0742, "step": 5955 }, { "epoch": 1.0, "grad_norm": 0.6799514889717102, "learning_rate": 1.5585506847973677e-05, "loss": 0.0618, "step": 5956 }, { "epoch": 1.0, "grad_norm": 0.7416157722473145, "learning_rate": 1.558400896516727e-05, "loss": 0.0854, "step": 5957 }, { "epoch": 1.0, "grad_norm": 0.8921558856964111, "learning_rate": 1.5582510900287172e-05, "loss": 0.0927, "step": 5958 }, { "epoch": 1.0, "grad_norm": 0.9567750692367554, "learning_rate": 1.5581012653382233e-05, "loss": 0.0858, "step": 5959 }, { "epoch": 1.0, "grad_norm": 0.9834759831428528, "learning_rate": 1.5579514224501296e-05, "loss": 0.0732, "step": 5960 }, { "epoch": 1.0, "grad_norm": 0.675346314907074, "learning_rate": 1.5578015613693226e-05, "loss": 0.081, "step": 5961 }, { "epoch": 1.0, "grad_norm": 5.862879753112793, "learning_rate": 1.5576516821006884e-05, "loss": 0.0835, "step": 5962 }, { "epoch": 1.0, "grad_norm": 0.7739394307136536, "learning_rate": 1.557501784649114e-05, "loss": 0.0964, "step": 5963 }, { "epoch": 1.0, "grad_norm": 0.6819384098052979, "learning_rate": 1.557351869019487e-05, "loss": 0.0787, "step": 5964 }, { "epoch": 1.0, "grad_norm": 0.6685717701911926, "learning_rate": 1.5572019352166966e-05, "loss": 0.0762, "step": 5965 }, { "epoch": 1.0, "grad_norm": 1.1287680864334106, "learning_rate": 1.5570519832456298e-05, "loss": 0.0799, "step": 5966 }, { "epoch": 1.0, "grad_norm": 0.792479395866394, "learning_rate": 1.556902013111177e-05, "loss": 0.0706, "step": 5967 }, { "epoch": 1.0, "grad_norm": 1.111613154411316, "learning_rate": 1.5567520248182284e-05, "loss": 0.0959, "step": 5968 }, { "epoch": 1.0, "grad_norm": 0.6857154369354248, "learning_rate": 1.5566020183716738e-05, "loss": 0.0657, "step": 5969 }, { "epoch": 1.0, "grad_norm": 0.6535655856132507, "learning_rate": 1.556451993776405e-05, "loss": 0.0766, "step": 5970 }, { "epoch": 1.0, "grad_norm": 0.5126081705093384, "learning_rate": 1.5563019510373135e-05, "loss": 0.0684, "step": 5971 }, { "epoch": 1.0, "grad_norm": 0.7328999638557434, "learning_rate": 1.556151890159291e-05, "loss": 0.1014, "step": 5972 }, { "epoch": 1.0, "grad_norm": 0.7252584099769592, "learning_rate": 1.5560018111472317e-05, "loss": 0.1082, "step": 5973 }, { "epoch": 1.0, "grad_norm": 0.6289843320846558, "learning_rate": 1.5558517140060283e-05, "loss": 0.075, "step": 5974 }, { "epoch": 1.0, "grad_norm": 0.7050564289093018, "learning_rate": 1.555701598740575e-05, "loss": 0.0849, "step": 5975 }, { "epoch": 1.0, "grad_norm": 0.6851904988288879, "learning_rate": 1.5555514653557665e-05, "loss": 0.0817, "step": 5976 }, { "epoch": 1.0, "grad_norm": 0.6632027626037598, "learning_rate": 1.555401313856498e-05, "loss": 0.0662, "step": 5977 }, { "epoch": 1.0, "grad_norm": 0.6834226846694946, "learning_rate": 1.5552511442476656e-05, "loss": 0.0993, "step": 5978 }, { "epoch": 1.0, "grad_norm": 0.5052722096443176, "learning_rate": 1.555100956534166e-05, "loss": 0.0603, "step": 5979 }, { "epoch": 1.0, "grad_norm": 0.4716073274612427, "learning_rate": 1.5549507507208956e-05, "loss": 0.0529, "step": 5980 }, { "epoch": 1.0, "grad_norm": 0.4965132176876068, "learning_rate": 1.5548005268127525e-05, "loss": 0.0626, "step": 5981 }, { "epoch": 1.0, "grad_norm": 0.7624384164810181, "learning_rate": 1.5546502848146348e-05, "loss": 0.0596, "step": 5982 }, { "epoch": 1.0, "grad_norm": 0.6292052865028381, "learning_rate": 1.5545000247314414e-05, "loss": 0.0673, "step": 5983 }, { "epoch": 1.0, "grad_norm": 0.5310641527175903, "learning_rate": 1.554349746568072e-05, "loss": 0.0626, "step": 5984 }, { "epoch": 1.0, "grad_norm": 0.5551413297653198, "learning_rate": 1.554199450329426e-05, "loss": 0.0529, "step": 5985 }, { "epoch": 1.0, "grad_norm": 0.5342599153518677, "learning_rate": 1.5540491360204044e-05, "loss": 0.0711, "step": 5986 }, { "epoch": 1.0, "grad_norm": 0.5151304602622986, "learning_rate": 1.5538988036459082e-05, "loss": 0.0425, "step": 5987 }, { "epoch": 1.0, "grad_norm": 0.488037109375, "learning_rate": 1.5537484532108392e-05, "loss": 0.0651, "step": 5988 }, { "epoch": 1.0, "grad_norm": 0.6195737719535828, "learning_rate": 1.5535980847201002e-05, "loss": 0.06, "step": 5989 }, { "epoch": 1.0, "grad_norm": 0.5624719262123108, "learning_rate": 1.5534476981785934e-05, "loss": 0.0734, "step": 5990 }, { "epoch": 1.0, "grad_norm": 0.9992918372154236, "learning_rate": 1.5532972935912228e-05, "loss": 0.0744, "step": 5991 }, { "epoch": 1.0, "grad_norm": 0.5587053894996643, "learning_rate": 1.5531468709628924e-05, "loss": 0.0677, "step": 5992 }, { "epoch": 1.0, "grad_norm": 0.8242866396903992, "learning_rate": 1.5529964302985075e-05, "loss": 0.073, "step": 5993 }, { "epoch": 1.0, "grad_norm": 0.6976316571235657, "learning_rate": 1.5528459716029724e-05, "loss": 0.0541, "step": 5994 }, { "epoch": 1.0, "grad_norm": 0.5413699150085449, "learning_rate": 1.5526954948811938e-05, "loss": 0.0647, "step": 5995 }, { "epoch": 1.0, "grad_norm": 0.5086904168128967, "learning_rate": 1.5525450001380778e-05, "loss": 0.0602, "step": 5996 }, { "epoch": 1.0, "grad_norm": 0.7132719159126282, "learning_rate": 1.5523944873785313e-05, "loss": 0.0596, "step": 5997 }, { "epoch": 1.0, "grad_norm": 0.7404279708862305, "learning_rate": 1.5522439566074625e-05, "loss": 0.0521, "step": 5998 }, { "epoch": 1.0, "grad_norm": 0.6494187116622925, "learning_rate": 1.552093407829779e-05, "loss": 0.0686, "step": 5999 }, { "epoch": 1.0, "grad_norm": 0.5464925765991211, "learning_rate": 1.5519428410503905e-05, "loss": 0.071, "step": 6000 }, { "epoch": 1.0, "grad_norm": 0.7718977332115173, "learning_rate": 1.551792256274206e-05, "loss": 0.0791, "step": 6001 }, { "epoch": 1.0, "grad_norm": 0.7380101680755615, "learning_rate": 1.5516416535061348e-05, "loss": 0.0782, "step": 6002 }, { "epoch": 1.0, "grad_norm": 0.5183768272399902, "learning_rate": 1.5514910327510883e-05, "loss": 0.0513, "step": 6003 }, { "epoch": 1.0, "grad_norm": 0.7668967247009277, "learning_rate": 1.551340394013978e-05, "loss": 0.0606, "step": 6004 }, { "epoch": 1.0, "grad_norm": 0.7040594220161438, "learning_rate": 1.5511897372997143e-05, "loss": 0.0609, "step": 6005 }, { "epoch": 1.0, "grad_norm": 0.38976773619651794, "learning_rate": 1.551039062613211e-05, "loss": 0.0485, "step": 6006 }, { "epoch": 1.0, "grad_norm": 0.7273548245429993, "learning_rate": 1.55088836995938e-05, "loss": 0.0646, "step": 6007 }, { "epoch": 1.0, "grad_norm": 0.6833207607269287, "learning_rate": 1.550737659343136e-05, "loss": 0.0637, "step": 6008 }, { "epoch": 1.01, "grad_norm": 0.8060728311538696, "learning_rate": 1.550586930769392e-05, "loss": 0.0837, "step": 6009 }, { "epoch": 1.01, "grad_norm": 0.5942651033401489, "learning_rate": 1.550436184243063e-05, "loss": 0.0779, "step": 6010 }, { "epoch": 1.01, "grad_norm": 0.5563105344772339, "learning_rate": 1.5502854197690644e-05, "loss": 0.0555, "step": 6011 }, { "epoch": 1.01, "grad_norm": 0.6615878343582153, "learning_rate": 1.550134637352312e-05, "loss": 0.0696, "step": 6012 }, { "epoch": 1.01, "grad_norm": 0.8796133399009705, "learning_rate": 1.5499838369977226e-05, "loss": 0.0938, "step": 6013 }, { "epoch": 1.01, "grad_norm": 0.8011976480484009, "learning_rate": 1.5498330187102127e-05, "loss": 0.0637, "step": 6014 }, { "epoch": 1.01, "grad_norm": 0.9810433387756348, "learning_rate": 1.5496821824946998e-05, "loss": 0.0792, "step": 6015 }, { "epoch": 1.01, "grad_norm": 0.8463547229766846, "learning_rate": 1.549531328356103e-05, "loss": 0.0713, "step": 6016 }, { "epoch": 1.01, "grad_norm": 0.4879130423069, "learning_rate": 1.5493804562993403e-05, "loss": 0.057, "step": 6017 }, { "epoch": 1.01, "grad_norm": 0.44328808784484863, "learning_rate": 1.5492295663293314e-05, "loss": 0.0663, "step": 6018 }, { "epoch": 1.01, "grad_norm": 0.3443668782711029, "learning_rate": 1.5490786584509962e-05, "loss": 0.0526, "step": 6019 }, { "epoch": 1.01, "grad_norm": 0.6923150420188904, "learning_rate": 1.5489277326692552e-05, "loss": 0.0771, "step": 6020 }, { "epoch": 1.01, "grad_norm": 0.6164368987083435, "learning_rate": 1.5487767889890294e-05, "loss": 0.0635, "step": 6021 }, { "epoch": 1.01, "grad_norm": 2.099747896194458, "learning_rate": 1.5486258274152408e-05, "loss": 0.0676, "step": 6022 }, { "epoch": 1.01, "grad_norm": 0.41181638836860657, "learning_rate": 1.5484748479528112e-05, "loss": 0.0542, "step": 6023 }, { "epoch": 1.01, "grad_norm": 0.8523330092430115, "learning_rate": 1.5483238506066643e-05, "loss": 0.0642, "step": 6024 }, { "epoch": 1.01, "grad_norm": 2.044379711151123, "learning_rate": 1.5481728353817227e-05, "loss": 0.0733, "step": 6025 }, { "epoch": 1.01, "grad_norm": 0.6934373378753662, "learning_rate": 1.548021802282911e-05, "loss": 0.0795, "step": 6026 }, { "epoch": 1.01, "grad_norm": 0.9055825471878052, "learning_rate": 1.5478707513151537e-05, "loss": 0.0805, "step": 6027 }, { "epoch": 1.01, "grad_norm": 0.6448729634284973, "learning_rate": 1.547719682483376e-05, "loss": 0.0673, "step": 6028 }, { "epoch": 1.01, "grad_norm": 2.095736265182495, "learning_rate": 1.5475685957925034e-05, "loss": 0.0627, "step": 6029 }, { "epoch": 1.01, "grad_norm": 0.6883023381233215, "learning_rate": 1.547417491247463e-05, "loss": 0.0711, "step": 6030 }, { "epoch": 1.01, "grad_norm": 2.0335662364959717, "learning_rate": 1.547266368853181e-05, "loss": 0.0645, "step": 6031 }, { "epoch": 1.01, "grad_norm": 0.4570254385471344, "learning_rate": 1.547115228614585e-05, "loss": 0.0619, "step": 6032 }, { "epoch": 1.01, "grad_norm": 0.5225634574890137, "learning_rate": 1.5469640705366034e-05, "loss": 0.0802, "step": 6033 }, { "epoch": 1.01, "grad_norm": 0.47782960534095764, "learning_rate": 1.5468128946241653e-05, "loss": 0.0458, "step": 6034 }, { "epoch": 1.01, "grad_norm": 0.5456507802009583, "learning_rate": 1.5466617008821992e-05, "loss": 0.0663, "step": 6035 }, { "epoch": 1.01, "grad_norm": 0.5292290449142456, "learning_rate": 1.5465104893156354e-05, "loss": 0.0486, "step": 6036 }, { "epoch": 1.01, "grad_norm": 2.0350613594055176, "learning_rate": 1.546359259929404e-05, "loss": 0.0584, "step": 6037 }, { "epoch": 1.01, "grad_norm": 0.7377671003341675, "learning_rate": 1.5462080127284364e-05, "loss": 0.0793, "step": 6038 }, { "epoch": 1.01, "grad_norm": 0.5649918913841248, "learning_rate": 1.546056747717664e-05, "loss": 0.0669, "step": 6039 }, { "epoch": 1.01, "grad_norm": 0.6655089259147644, "learning_rate": 1.545905464902019e-05, "loss": 0.0721, "step": 6040 }, { "epoch": 1.01, "grad_norm": 0.5808119773864746, "learning_rate": 1.5457541642864344e-05, "loss": 0.0565, "step": 6041 }, { "epoch": 1.01, "grad_norm": 0.9675660729408264, "learning_rate": 1.545602845875843e-05, "loss": 0.0542, "step": 6042 }, { "epoch": 1.01, "grad_norm": 0.633693277835846, "learning_rate": 1.5454515096751793e-05, "loss": 0.0502, "step": 6043 }, { "epoch": 1.01, "grad_norm": 0.6711612939834595, "learning_rate": 1.545300155689378e-05, "loss": 0.0577, "step": 6044 }, { "epoch": 1.01, "grad_norm": 1.3559905290603638, "learning_rate": 1.5451487839233733e-05, "loss": 0.081, "step": 6045 }, { "epoch": 1.01, "grad_norm": 0.7240831255912781, "learning_rate": 1.5449973943821016e-05, "loss": 0.0679, "step": 6046 }, { "epoch": 1.01, "grad_norm": 0.4357483685016632, "learning_rate": 1.5448459870704988e-05, "loss": 0.0557, "step": 6047 }, { "epoch": 1.01, "grad_norm": 0.6422097682952881, "learning_rate": 1.5446945619935018e-05, "loss": 0.0678, "step": 6048 }, { "epoch": 1.01, "grad_norm": 0.7206681966781616, "learning_rate": 1.544543119156048e-05, "loss": 0.0772, "step": 6049 }, { "epoch": 1.01, "grad_norm": 0.9080162048339844, "learning_rate": 1.5443916585630757e-05, "loss": 0.0626, "step": 6050 }, { "epoch": 1.01, "grad_norm": 0.46069321036338806, "learning_rate": 1.5442401802195227e-05, "loss": 0.0595, "step": 6051 }, { "epoch": 1.01, "grad_norm": 0.5340871214866638, "learning_rate": 1.544088684130329e-05, "loss": 0.0736, "step": 6052 }, { "epoch": 1.01, "grad_norm": 0.9170456528663635, "learning_rate": 1.543937170300434e-05, "loss": 0.0635, "step": 6053 }, { "epoch": 1.01, "grad_norm": 0.5988587737083435, "learning_rate": 1.5437856387347777e-05, "loss": 0.0696, "step": 6054 }, { "epoch": 1.01, "grad_norm": 0.5467358827590942, "learning_rate": 1.5436340894383013e-05, "loss": 0.0591, "step": 6055 }, { "epoch": 1.01, "grad_norm": 0.5734603404998779, "learning_rate": 1.5434825224159465e-05, "loss": 0.0565, "step": 6056 }, { "epoch": 1.01, "grad_norm": 0.5805233120918274, "learning_rate": 1.5433309376726548e-05, "loss": 0.0647, "step": 6057 }, { "epoch": 1.01, "grad_norm": 0.5438962578773499, "learning_rate": 1.543179335213369e-05, "loss": 0.0608, "step": 6058 }, { "epoch": 1.01, "grad_norm": 0.6758489608764648, "learning_rate": 1.5430277150430325e-05, "loss": 0.0731, "step": 6059 }, { "epoch": 1.01, "grad_norm": 0.792716383934021, "learning_rate": 1.5428760771665884e-05, "loss": 0.0918, "step": 6060 }, { "epoch": 1.01, "grad_norm": 0.5839347839355469, "learning_rate": 1.5427244215889824e-05, "loss": 0.062, "step": 6061 }, { "epoch": 1.01, "grad_norm": 0.5221823453903198, "learning_rate": 1.542572748315158e-05, "loss": 0.0698, "step": 6062 }, { "epoch": 1.01, "grad_norm": 0.6048769950866699, "learning_rate": 1.5424210573500612e-05, "loss": 0.0647, "step": 6063 }, { "epoch": 1.01, "grad_norm": 0.6377294063568115, "learning_rate": 1.5422693486986382e-05, "loss": 0.083, "step": 6064 }, { "epoch": 1.01, "grad_norm": 0.8179551959037781, "learning_rate": 1.5421176223658357e-05, "loss": 0.0552, "step": 6065 }, { "epoch": 1.01, "grad_norm": 0.6093735098838806, "learning_rate": 1.5419658783566008e-05, "loss": 0.0674, "step": 6066 }, { "epoch": 1.01, "grad_norm": 0.7479444146156311, "learning_rate": 1.5418141166758815e-05, "loss": 0.0809, "step": 6067 }, { "epoch": 1.01, "grad_norm": 0.7590732574462891, "learning_rate": 1.5416623373286258e-05, "loss": 0.0796, "step": 6068 }, { "epoch": 1.02, "grad_norm": 0.6763361692428589, "learning_rate": 1.5415105403197828e-05, "loss": 0.0667, "step": 6069 }, { "epoch": 1.02, "grad_norm": 0.5546638369560242, "learning_rate": 1.5413587256543022e-05, "loss": 0.0624, "step": 6070 }, { "epoch": 1.02, "grad_norm": 0.4547567665576935, "learning_rate": 1.541206893337134e-05, "loss": 0.0379, "step": 6071 }, { "epoch": 1.02, "grad_norm": 0.49714046716690063, "learning_rate": 1.5410550433732292e-05, "loss": 0.0527, "step": 6072 }, { "epoch": 1.02, "grad_norm": 0.5171791911125183, "learning_rate": 1.5409031757675385e-05, "loss": 0.0755, "step": 6073 }, { "epoch": 1.02, "grad_norm": 0.4707823395729065, "learning_rate": 1.540751290525014e-05, "loss": 0.0567, "step": 6074 }, { "epoch": 1.02, "grad_norm": 0.3862881362438202, "learning_rate": 1.540599387650608e-05, "loss": 0.048, "step": 6075 }, { "epoch": 1.02, "grad_norm": 0.5231046676635742, "learning_rate": 1.540447467149274e-05, "loss": 0.0503, "step": 6076 }, { "epoch": 1.02, "grad_norm": 0.475399911403656, "learning_rate": 1.5402955290259646e-05, "loss": 0.0624, "step": 6077 }, { "epoch": 1.02, "grad_norm": 0.5368184447288513, "learning_rate": 1.5401435732856345e-05, "loss": 0.0595, "step": 6078 }, { "epoch": 1.02, "grad_norm": 0.48988038301467896, "learning_rate": 1.5399915999332387e-05, "loss": 0.0592, "step": 6079 }, { "epoch": 1.02, "grad_norm": 0.5901340246200562, "learning_rate": 1.5398396089737324e-05, "loss": 0.0548, "step": 6080 }, { "epoch": 1.02, "grad_norm": 0.643502414226532, "learning_rate": 1.539687600412071e-05, "loss": 0.0545, "step": 6081 }, { "epoch": 1.02, "grad_norm": 0.8628395199775696, "learning_rate": 1.539535574253211e-05, "loss": 0.1042, "step": 6082 }, { "epoch": 1.02, "grad_norm": 0.4814077615737915, "learning_rate": 1.5393835305021095e-05, "loss": 0.0423, "step": 6083 }, { "epoch": 1.02, "grad_norm": 0.36801716685295105, "learning_rate": 1.5392314691637245e-05, "loss": 0.0396, "step": 6084 }, { "epoch": 1.02, "grad_norm": 0.6147252321243286, "learning_rate": 1.5390793902430138e-05, "loss": 0.0964, "step": 6085 }, { "epoch": 1.02, "grad_norm": 0.3987635374069214, "learning_rate": 1.538927293744936e-05, "loss": 0.0748, "step": 6086 }, { "epoch": 1.02, "grad_norm": 0.5704227089881897, "learning_rate": 1.5387751796744504e-05, "loss": 0.0493, "step": 6087 }, { "epoch": 1.02, "grad_norm": 0.6749137043952942, "learning_rate": 1.5386230480365172e-05, "loss": 0.0531, "step": 6088 }, { "epoch": 1.02, "grad_norm": 1.040346622467041, "learning_rate": 1.5384708988360964e-05, "loss": 0.058, "step": 6089 }, { "epoch": 1.02, "grad_norm": 0.570380687713623, "learning_rate": 1.5383187320781494e-05, "loss": 0.0697, "step": 6090 }, { "epoch": 1.02, "grad_norm": 0.6299099326133728, "learning_rate": 1.538166547767638e-05, "loss": 0.0577, "step": 6091 }, { "epoch": 1.02, "grad_norm": 0.6870939135551453, "learning_rate": 1.538014345909524e-05, "loss": 0.0633, "step": 6092 }, { "epoch": 1.02, "grad_norm": 0.5606485605239868, "learning_rate": 1.5378621265087695e-05, "loss": 0.0597, "step": 6093 }, { "epoch": 1.02, "grad_norm": 0.47288593649864197, "learning_rate": 1.537709889570339e-05, "loss": 0.05, "step": 6094 }, { "epoch": 1.02, "grad_norm": 0.5687072277069092, "learning_rate": 1.5375576350991958e-05, "loss": 0.0598, "step": 6095 }, { "epoch": 1.02, "grad_norm": 0.6207560300827026, "learning_rate": 1.5374053631003042e-05, "loss": 0.064, "step": 6096 }, { "epoch": 1.02, "grad_norm": 0.5120599269866943, "learning_rate": 1.5372530735786294e-05, "loss": 0.042, "step": 6097 }, { "epoch": 1.02, "grad_norm": 0.9036623239517212, "learning_rate": 1.537100766539137e-05, "loss": 0.0776, "step": 6098 }, { "epoch": 1.02, "grad_norm": 0.6247778534889221, "learning_rate": 1.5369484419867932e-05, "loss": 0.0497, "step": 6099 }, { "epoch": 1.02, "grad_norm": 0.47725415229797363, "learning_rate": 1.536796099926565e-05, "loss": 0.059, "step": 6100 }, { "epoch": 1.02, "grad_norm": 0.7028553485870361, "learning_rate": 1.5366437403634188e-05, "loss": 0.0698, "step": 6101 }, { "epoch": 1.02, "grad_norm": 0.6489059925079346, "learning_rate": 1.5364913633023237e-05, "loss": 0.0567, "step": 6102 }, { "epoch": 1.02, "grad_norm": 0.5921006798744202, "learning_rate": 1.5363389687482473e-05, "loss": 0.0541, "step": 6103 }, { "epoch": 1.02, "grad_norm": 0.8906194567680359, "learning_rate": 1.536186556706159e-05, "loss": 0.0698, "step": 6104 }, { "epoch": 1.02, "grad_norm": 1.1231331825256348, "learning_rate": 1.536034127181028e-05, "loss": 0.1054, "step": 6105 }, { "epoch": 1.02, "grad_norm": 0.6950773596763611, "learning_rate": 1.5358816801778247e-05, "loss": 0.0738, "step": 6106 }, { "epoch": 1.02, "grad_norm": 0.7770678400993347, "learning_rate": 1.53572921570152e-05, "loss": 0.0654, "step": 6107 }, { "epoch": 1.02, "grad_norm": 0.5235402584075928, "learning_rate": 1.5355767337570852e-05, "loss": 0.0621, "step": 6108 }, { "epoch": 1.02, "grad_norm": 0.5030210614204407, "learning_rate": 1.5354242343494917e-05, "loss": 0.0421, "step": 6109 }, { "epoch": 1.02, "grad_norm": 0.5299075841903687, "learning_rate": 1.535271717483712e-05, "loss": 0.0605, "step": 6110 }, { "epoch": 1.02, "grad_norm": 0.5560789108276367, "learning_rate": 1.5351191831647195e-05, "loss": 0.0639, "step": 6111 }, { "epoch": 1.02, "grad_norm": 0.37417516112327576, "learning_rate": 1.534966631397488e-05, "loss": 0.061, "step": 6112 }, { "epoch": 1.02, "grad_norm": 0.5111253261566162, "learning_rate": 1.5348140621869908e-05, "loss": 0.0746, "step": 6113 }, { "epoch": 1.02, "grad_norm": 0.5328138470649719, "learning_rate": 1.534661475538203e-05, "loss": 0.0814, "step": 6114 }, { "epoch": 1.02, "grad_norm": 0.6169489026069641, "learning_rate": 1.5345088714561006e-05, "loss": 0.0587, "step": 6115 }, { "epoch": 1.02, "grad_norm": 0.7641744613647461, "learning_rate": 1.534356249945658e-05, "loss": 0.0693, "step": 6116 }, { "epoch": 1.02, "grad_norm": 0.7406914234161377, "learning_rate": 1.534203611011853e-05, "loss": 0.0594, "step": 6117 }, { "epoch": 1.02, "grad_norm": 0.4289681911468506, "learning_rate": 1.5340509546596613e-05, "loss": 0.0691, "step": 6118 }, { "epoch": 1.02, "grad_norm": 0.5748094320297241, "learning_rate": 1.5338982808940623e-05, "loss": 0.0634, "step": 6119 }, { "epoch": 1.02, "grad_norm": 0.8370270729064941, "learning_rate": 1.533745589720032e-05, "loss": 0.0523, "step": 6120 }, { "epoch": 1.02, "grad_norm": 0.7851932644844055, "learning_rate": 1.53359288114255e-05, "loss": 0.08, "step": 6121 }, { "epoch": 1.02, "grad_norm": 0.5790007710456848, "learning_rate": 1.533440155166596e-05, "loss": 0.0549, "step": 6122 }, { "epoch": 1.02, "grad_norm": 0.6152228713035583, "learning_rate": 1.5332874117971495e-05, "loss": 0.0593, "step": 6123 }, { "epoch": 1.02, "grad_norm": 0.5309690833091736, "learning_rate": 1.533134651039191e-05, "loss": 0.0686, "step": 6124 }, { "epoch": 1.02, "grad_norm": 0.5664148330688477, "learning_rate": 1.5329818728977008e-05, "loss": 0.0603, "step": 6125 }, { "epoch": 1.02, "grad_norm": 0.6942049860954285, "learning_rate": 1.5328290773776615e-05, "loss": 0.0808, "step": 6126 }, { "epoch": 1.02, "grad_norm": 0.5541733503341675, "learning_rate": 1.532676264484054e-05, "loss": 0.0689, "step": 6127 }, { "epoch": 1.02, "grad_norm": 0.47012341022491455, "learning_rate": 1.532523434221862e-05, "loss": 0.0494, "step": 6128 }, { "epoch": 1.03, "grad_norm": 0.5693872570991516, "learning_rate": 1.5323705865960682e-05, "loss": 0.0591, "step": 6129 }, { "epoch": 1.03, "grad_norm": 0.5751878023147583, "learning_rate": 1.5322177216116564e-05, "loss": 0.0547, "step": 6130 }, { "epoch": 1.03, "grad_norm": 0.8822405934333801, "learning_rate": 1.5320648392736108e-05, "loss": 0.0677, "step": 6131 }, { "epoch": 1.03, "grad_norm": 0.5182796716690063, "learning_rate": 1.5319119395869168e-05, "loss": 0.0588, "step": 6132 }, { "epoch": 1.03, "grad_norm": 0.6879094839096069, "learning_rate": 1.53175902255656e-05, "loss": 0.0603, "step": 6133 }, { "epoch": 1.03, "grad_norm": 0.5162172913551331, "learning_rate": 1.5316060881875256e-05, "loss": 0.053, "step": 6134 }, { "epoch": 1.03, "grad_norm": 0.5445594191551208, "learning_rate": 1.531453136484801e-05, "loss": 0.0506, "step": 6135 }, { "epoch": 1.03, "grad_norm": 0.5337517261505127, "learning_rate": 1.531300167453373e-05, "loss": 0.0548, "step": 6136 }, { "epoch": 1.03, "grad_norm": 0.5463461875915527, "learning_rate": 1.531147181098229e-05, "loss": 0.0607, "step": 6137 }, { "epoch": 1.03, "grad_norm": 0.6463030576705933, "learning_rate": 1.5309941774243584e-05, "loss": 0.0523, "step": 6138 }, { "epoch": 1.03, "grad_norm": 0.9144941568374634, "learning_rate": 1.5308411564367493e-05, "loss": 0.0711, "step": 6139 }, { "epoch": 1.03, "grad_norm": 1.0683718919754028, "learning_rate": 1.5306881181403915e-05, "loss": 0.0685, "step": 6140 }, { "epoch": 1.03, "grad_norm": 0.7420088648796082, "learning_rate": 1.5305350625402745e-05, "loss": 0.0898, "step": 6141 }, { "epoch": 1.03, "grad_norm": 0.5765920877456665, "learning_rate": 1.5303819896413893e-05, "loss": 0.0572, "step": 6142 }, { "epoch": 1.03, "grad_norm": 0.7757882475852966, "learning_rate": 1.530228899448727e-05, "loss": 0.0972, "step": 6143 }, { "epoch": 1.03, "grad_norm": 0.9727186560630798, "learning_rate": 1.530075791967279e-05, "loss": 0.0584, "step": 6144 }, { "epoch": 1.03, "grad_norm": 0.6250975728034973, "learning_rate": 1.5299226672020377e-05, "loss": 0.0626, "step": 6145 }, { "epoch": 1.03, "grad_norm": 0.965112030506134, "learning_rate": 1.5297695251579957e-05, "loss": 0.0677, "step": 6146 }, { "epoch": 1.03, "grad_norm": 0.598099410533905, "learning_rate": 1.5296163658401474e-05, "loss": 0.0614, "step": 6147 }, { "epoch": 1.03, "grad_norm": 0.6492077708244324, "learning_rate": 1.5294631892534858e-05, "loss": 0.0608, "step": 6148 }, { "epoch": 1.03, "grad_norm": 0.6593387126922607, "learning_rate": 1.5293099954030056e-05, "loss": 0.0636, "step": 6149 }, { "epoch": 1.03, "grad_norm": 0.6383630633354187, "learning_rate": 1.5291567842937022e-05, "loss": 0.0614, "step": 6150 }, { "epoch": 1.03, "grad_norm": 0.6115183234214783, "learning_rate": 1.5290035559305704e-05, "loss": 0.0581, "step": 6151 }, { "epoch": 1.03, "grad_norm": 0.5652276277542114, "learning_rate": 1.528850310318607e-05, "loss": 0.0564, "step": 6152 }, { "epoch": 1.03, "grad_norm": 0.7041302919387817, "learning_rate": 1.5286970474628094e-05, "loss": 0.0774, "step": 6153 }, { "epoch": 1.03, "grad_norm": 0.4518250823020935, "learning_rate": 1.528543767368174e-05, "loss": 0.052, "step": 6154 }, { "epoch": 1.03, "grad_norm": 0.8649611473083496, "learning_rate": 1.5283904700396987e-05, "loss": 0.0638, "step": 6155 }, { "epoch": 1.03, "grad_norm": 0.47689446806907654, "learning_rate": 1.5282371554823825e-05, "loss": 0.0625, "step": 6156 }, { "epoch": 1.03, "grad_norm": 0.9440860152244568, "learning_rate": 1.5280838237012245e-05, "loss": 0.0743, "step": 6157 }, { "epoch": 1.03, "grad_norm": 0.695263683795929, "learning_rate": 1.5279304747012233e-05, "loss": 0.0623, "step": 6158 }, { "epoch": 1.03, "grad_norm": 0.6364914774894714, "learning_rate": 1.5277771084873798e-05, "loss": 0.0741, "step": 6159 }, { "epoch": 1.03, "grad_norm": 0.534458339214325, "learning_rate": 1.5276237250646948e-05, "loss": 0.0525, "step": 6160 }, { "epoch": 1.03, "grad_norm": 0.6025232672691345, "learning_rate": 1.5274703244381693e-05, "loss": 0.0564, "step": 6161 }, { "epoch": 1.03, "grad_norm": 0.5942635536193848, "learning_rate": 1.527316906612805e-05, "loss": 0.0649, "step": 6162 }, { "epoch": 1.03, "grad_norm": 0.7242276668548584, "learning_rate": 1.5271634715936043e-05, "loss": 0.095, "step": 6163 }, { "epoch": 1.03, "grad_norm": 0.6238572001457214, "learning_rate": 1.5270100193855702e-05, "loss": 0.0699, "step": 6164 }, { "epoch": 1.03, "grad_norm": 0.5881960391998291, "learning_rate": 1.5268565499937066e-05, "loss": 0.0564, "step": 6165 }, { "epoch": 1.03, "grad_norm": 0.5710307359695435, "learning_rate": 1.526703063423017e-05, "loss": 0.0575, "step": 6166 }, { "epoch": 1.03, "grad_norm": 0.5761308073997498, "learning_rate": 1.5265495596785063e-05, "loss": 0.0629, "step": 6167 }, { "epoch": 1.03, "grad_norm": 0.6028927564620972, "learning_rate": 1.5263960387651803e-05, "loss": 0.0932, "step": 6168 }, { "epoch": 1.03, "grad_norm": 0.5440261363983154, "learning_rate": 1.526242500688043e-05, "loss": 0.0781, "step": 6169 }, { "epoch": 1.03, "grad_norm": 0.8245655298233032, "learning_rate": 1.5260889454521025e-05, "loss": 0.0821, "step": 6170 }, { "epoch": 1.03, "grad_norm": 0.6297270059585571, "learning_rate": 1.5259353730623647e-05, "loss": 0.0676, "step": 6171 }, { "epoch": 1.03, "grad_norm": 0.5012305974960327, "learning_rate": 1.5257817835238374e-05, "loss": 0.0615, "step": 6172 }, { "epoch": 1.03, "grad_norm": 0.8118022084236145, "learning_rate": 1.5256281768415284e-05, "loss": 0.0746, "step": 6173 }, { "epoch": 1.03, "grad_norm": 0.5445385575294495, "learning_rate": 1.525474553020446e-05, "loss": 0.062, "step": 6174 }, { "epoch": 1.03, "grad_norm": 0.7917400002479553, "learning_rate": 1.5253209120656002e-05, "loss": 0.099, "step": 6175 }, { "epoch": 1.03, "grad_norm": 0.5624645948410034, "learning_rate": 1.5251672539819998e-05, "loss": 0.0605, "step": 6176 }, { "epoch": 1.03, "grad_norm": 0.5983802080154419, "learning_rate": 1.5250135787746552e-05, "loss": 0.069, "step": 6177 }, { "epoch": 1.03, "grad_norm": 0.7527289986610413, "learning_rate": 1.5248598864485773e-05, "loss": 0.0538, "step": 6178 }, { "epoch": 1.03, "grad_norm": 0.5940254330635071, "learning_rate": 1.5247061770087771e-05, "loss": 0.0547, "step": 6179 }, { "epoch": 1.03, "grad_norm": 0.4378305673599243, "learning_rate": 1.5245524504602674e-05, "loss": 0.0601, "step": 6180 }, { "epoch": 1.03, "grad_norm": 0.5593688488006592, "learning_rate": 1.5243987068080595e-05, "loss": 0.061, "step": 6181 }, { "epoch": 1.03, "grad_norm": 0.5434738397598267, "learning_rate": 1.524244946057167e-05, "loss": 0.0774, "step": 6182 }, { "epoch": 1.03, "grad_norm": 0.620991051197052, "learning_rate": 1.5240911682126036e-05, "loss": 0.0746, "step": 6183 }, { "epoch": 1.03, "grad_norm": 0.4471074044704437, "learning_rate": 1.5239373732793832e-05, "loss": 0.0603, "step": 6184 }, { "epoch": 1.03, "grad_norm": 1.0818241834640503, "learning_rate": 1.5237835612625204e-05, "loss": 0.0531, "step": 6185 }, { "epoch": 1.03, "grad_norm": 0.6625862121582031, "learning_rate": 1.5236297321670303e-05, "loss": 0.0756, "step": 6186 }, { "epoch": 1.03, "grad_norm": 0.8905460238456726, "learning_rate": 1.5234758859979292e-05, "loss": 0.0675, "step": 6187 }, { "epoch": 1.03, "grad_norm": 0.6161551475524902, "learning_rate": 1.5233220227602332e-05, "loss": 0.0581, "step": 6188 }, { "epoch": 1.04, "grad_norm": 0.8032914400100708, "learning_rate": 1.5231681424589593e-05, "loss": 0.0938, "step": 6189 }, { "epoch": 1.04, "grad_norm": 0.5647671222686768, "learning_rate": 1.5230142450991246e-05, "loss": 0.0687, "step": 6190 }, { "epoch": 1.04, "grad_norm": 0.5812419652938843, "learning_rate": 1.5228603306857472e-05, "loss": 0.0744, "step": 6191 }, { "epoch": 1.04, "grad_norm": 0.5157350301742554, "learning_rate": 1.5227063992238464e-05, "loss": 0.0491, "step": 6192 }, { "epoch": 1.04, "grad_norm": 0.47042760252952576, "learning_rate": 1.5225524507184402e-05, "loss": 0.0532, "step": 6193 }, { "epoch": 1.04, "grad_norm": 0.6455239653587341, "learning_rate": 1.522398485174549e-05, "loss": 0.0756, "step": 6194 }, { "epoch": 1.04, "grad_norm": 0.49101951718330383, "learning_rate": 1.522244502597193e-05, "loss": 0.0566, "step": 6195 }, { "epoch": 1.04, "grad_norm": 0.5818213224411011, "learning_rate": 1.522090502991393e-05, "loss": 0.0643, "step": 6196 }, { "epoch": 1.04, "grad_norm": 0.5656633377075195, "learning_rate": 1.52193648636217e-05, "loss": 0.0569, "step": 6197 }, { "epoch": 1.04, "grad_norm": 1.2772982120513916, "learning_rate": 1.521782452714546e-05, "loss": 0.0747, "step": 6198 }, { "epoch": 1.04, "grad_norm": 0.45470738410949707, "learning_rate": 1.5216284020535441e-05, "loss": 0.0587, "step": 6199 }, { "epoch": 1.04, "grad_norm": 0.6244461536407471, "learning_rate": 1.5214743343841867e-05, "loss": 0.0647, "step": 6200 }, { "epoch": 1.04, "grad_norm": 0.6111029982566833, "learning_rate": 1.5213202497114972e-05, "loss": 0.066, "step": 6201 }, { "epoch": 1.04, "grad_norm": 0.4981629550457001, "learning_rate": 1.5211661480405e-05, "loss": 0.0568, "step": 6202 }, { "epoch": 1.04, "grad_norm": 0.5424197912216187, "learning_rate": 1.52101202937622e-05, "loss": 0.0691, "step": 6203 }, { "epoch": 1.04, "grad_norm": 0.6027401089668274, "learning_rate": 1.5208578937236825e-05, "loss": 0.08, "step": 6204 }, { "epoch": 1.04, "grad_norm": 0.6408120393753052, "learning_rate": 1.5207037410879126e-05, "loss": 0.0779, "step": 6205 }, { "epoch": 1.04, "grad_norm": 0.5015915632247925, "learning_rate": 1.5205495714739374e-05, "loss": 0.0571, "step": 6206 }, { "epoch": 1.04, "grad_norm": 0.7221513390541077, "learning_rate": 1.5203953848867833e-05, "loss": 0.098, "step": 6207 }, { "epoch": 1.04, "grad_norm": 0.5248286128044128, "learning_rate": 1.5202411813314779e-05, "loss": 0.0676, "step": 6208 }, { "epoch": 1.04, "grad_norm": 0.5828608870506287, "learning_rate": 1.5200869608130493e-05, "loss": 0.0887, "step": 6209 }, { "epoch": 1.04, "grad_norm": 0.5152732133865356, "learning_rate": 1.5199327233365257e-05, "loss": 0.0652, "step": 6210 }, { "epoch": 1.04, "grad_norm": 0.5022909641265869, "learning_rate": 1.519778468906937e-05, "loss": 0.0598, "step": 6211 }, { "epoch": 1.04, "grad_norm": 0.5904338359832764, "learning_rate": 1.519624197529312e-05, "loss": 0.0593, "step": 6212 }, { "epoch": 1.04, "grad_norm": 1.019932508468628, "learning_rate": 1.5194699092086812e-05, "loss": 0.0625, "step": 6213 }, { "epoch": 1.04, "grad_norm": 0.4163816273212433, "learning_rate": 1.5193156039500757e-05, "loss": 0.0682, "step": 6214 }, { "epoch": 1.04, "grad_norm": 0.6075500845909119, "learning_rate": 1.5191612817585264e-05, "loss": 0.0663, "step": 6215 }, { "epoch": 1.04, "grad_norm": 0.6076999306678772, "learning_rate": 1.5190069426390652e-05, "loss": 0.0785, "step": 6216 }, { "epoch": 1.04, "grad_norm": 0.5128253102302551, "learning_rate": 1.518852586596725e-05, "loss": 0.0667, "step": 6217 }, { "epoch": 1.04, "grad_norm": 0.4913051128387451, "learning_rate": 1.5186982136365379e-05, "loss": 0.0587, "step": 6218 }, { "epoch": 1.04, "grad_norm": 0.6503951549530029, "learning_rate": 1.5185438237635382e-05, "loss": 0.062, "step": 6219 }, { "epoch": 1.04, "grad_norm": 0.5681101679801941, "learning_rate": 1.5183894169827598e-05, "loss": 0.0712, "step": 6220 }, { "epoch": 1.04, "grad_norm": 0.5131732821464539, "learning_rate": 1.518234993299237e-05, "loss": 0.0561, "step": 6221 }, { "epoch": 1.04, "grad_norm": 0.6384491920471191, "learning_rate": 1.5180805527180051e-05, "loss": 0.064, "step": 6222 }, { "epoch": 1.04, "grad_norm": 0.689741313457489, "learning_rate": 1.5179260952441005e-05, "loss": 0.0641, "step": 6223 }, { "epoch": 1.04, "grad_norm": 0.6672943830490112, "learning_rate": 1.5177716208825586e-05, "loss": 0.0838, "step": 6224 }, { "epoch": 1.04, "grad_norm": 0.5396650433540344, "learning_rate": 1.5176171296384166e-05, "loss": 0.0657, "step": 6225 }, { "epoch": 1.04, "grad_norm": 0.7308437824249268, "learning_rate": 1.5174626215167116e-05, "loss": 0.0842, "step": 6226 }, { "epoch": 1.04, "grad_norm": 0.4827263355255127, "learning_rate": 1.5173080965224825e-05, "loss": 0.0437, "step": 6227 }, { "epoch": 1.04, "grad_norm": 0.6619675755500793, "learning_rate": 1.5171535546607665e-05, "loss": 0.0794, "step": 6228 }, { "epoch": 1.04, "grad_norm": 0.5313024520874023, "learning_rate": 1.5169989959366034e-05, "loss": 0.0479, "step": 6229 }, { "epoch": 1.04, "grad_norm": 0.6586967706680298, "learning_rate": 1.5168444203550324e-05, "loss": 0.0755, "step": 6230 }, { "epoch": 1.04, "grad_norm": 0.5288307666778564, "learning_rate": 1.516689827921094e-05, "loss": 0.0571, "step": 6231 }, { "epoch": 1.04, "grad_norm": 0.5358598828315735, "learning_rate": 1.5165352186398288e-05, "loss": 0.0594, "step": 6232 }, { "epoch": 1.04, "grad_norm": 0.5190076231956482, "learning_rate": 1.5163805925162776e-05, "loss": 0.061, "step": 6233 }, { "epoch": 1.04, "grad_norm": 0.4857363998889923, "learning_rate": 1.5162259495554827e-05, "loss": 0.0598, "step": 6234 }, { "epoch": 1.04, "grad_norm": 0.5982822775840759, "learning_rate": 1.5160712897624863e-05, "loss": 0.0777, "step": 6235 }, { "epoch": 1.04, "grad_norm": 0.7138565182685852, "learning_rate": 1.515916613142331e-05, "loss": 0.0753, "step": 6236 }, { "epoch": 1.04, "grad_norm": 0.5959367156028748, "learning_rate": 1.5157619197000606e-05, "loss": 0.083, "step": 6237 }, { "epoch": 1.04, "grad_norm": 0.6169390082359314, "learning_rate": 1.5156072094407188e-05, "loss": 0.0657, "step": 6238 }, { "epoch": 1.04, "grad_norm": 0.5043836236000061, "learning_rate": 1.5154524823693504e-05, "loss": 0.0523, "step": 6239 }, { "epoch": 1.04, "grad_norm": 0.6893633008003235, "learning_rate": 1.5152977384910004e-05, "loss": 0.0649, "step": 6240 }, { "epoch": 1.04, "grad_norm": 0.46612218022346497, "learning_rate": 1.5151429778107143e-05, "loss": 0.0553, "step": 6241 }, { "epoch": 1.04, "grad_norm": 0.7422820329666138, "learning_rate": 1.5149882003335381e-05, "loss": 0.0865, "step": 6242 }, { "epoch": 1.04, "grad_norm": 0.47975870966911316, "learning_rate": 1.5148334060645189e-05, "loss": 0.054, "step": 6243 }, { "epoch": 1.04, "grad_norm": 0.7654434442520142, "learning_rate": 1.5146785950087036e-05, "loss": 0.0564, "step": 6244 }, { "epoch": 1.04, "grad_norm": 0.44178348779678345, "learning_rate": 1.5145237671711406e-05, "loss": 0.0574, "step": 6245 }, { "epoch": 1.04, "grad_norm": 0.5488196015357971, "learning_rate": 1.5143689225568774e-05, "loss": 0.0587, "step": 6246 }, { "epoch": 1.04, "grad_norm": 0.5118618011474609, "learning_rate": 1.5142140611709635e-05, "loss": 0.0596, "step": 6247 }, { "epoch": 1.04, "grad_norm": 0.6365330219268799, "learning_rate": 1.5140591830184484e-05, "loss": 0.0577, "step": 6248 }, { "epoch": 1.05, "grad_norm": 0.5284585952758789, "learning_rate": 1.5139042881043817e-05, "loss": 0.0592, "step": 6249 }, { "epoch": 1.05, "grad_norm": 0.80629563331604, "learning_rate": 1.513749376433814e-05, "loss": 0.0917, "step": 6250 }, { "epoch": 1.05, "grad_norm": 0.5338879823684692, "learning_rate": 1.513594448011797e-05, "loss": 0.0646, "step": 6251 }, { "epoch": 1.05, "grad_norm": 0.4645514190196991, "learning_rate": 1.5134395028433817e-05, "loss": 0.0533, "step": 6252 }, { "epoch": 1.05, "grad_norm": 0.49059176445007324, "learning_rate": 1.5132845409336204e-05, "loss": 0.0615, "step": 6253 }, { "epoch": 1.05, "grad_norm": 0.6020627617835999, "learning_rate": 1.5131295622875657e-05, "loss": 0.0779, "step": 6254 }, { "epoch": 1.05, "grad_norm": 0.5657055377960205, "learning_rate": 1.5129745669102716e-05, "loss": 0.0616, "step": 6255 }, { "epoch": 1.05, "grad_norm": 0.6890016198158264, "learning_rate": 1.5128195548067912e-05, "loss": 0.0691, "step": 6256 }, { "epoch": 1.05, "grad_norm": 0.6511141657829285, "learning_rate": 1.5126645259821788e-05, "loss": 0.058, "step": 6257 }, { "epoch": 1.05, "grad_norm": 0.6774379014968872, "learning_rate": 1.5125094804414895e-05, "loss": 0.0726, "step": 6258 }, { "epoch": 1.05, "grad_norm": 0.8353446125984192, "learning_rate": 1.5123544181897793e-05, "loss": 0.0722, "step": 6259 }, { "epoch": 1.05, "grad_norm": 0.3207301199436188, "learning_rate": 1.5121993392321036e-05, "loss": 0.0496, "step": 6260 }, { "epoch": 1.05, "grad_norm": 0.4853333830833435, "learning_rate": 1.5120442435735188e-05, "loss": 0.0548, "step": 6261 }, { "epoch": 1.05, "grad_norm": 0.6549435257911682, "learning_rate": 1.5118891312190824e-05, "loss": 0.0682, "step": 6262 }, { "epoch": 1.05, "grad_norm": 0.48640936613082886, "learning_rate": 1.511734002173852e-05, "loss": 0.0673, "step": 6263 }, { "epoch": 1.05, "grad_norm": 0.4367714822292328, "learning_rate": 1.5115788564428857e-05, "loss": 0.0437, "step": 6264 }, { "epoch": 1.05, "grad_norm": 0.49088209867477417, "learning_rate": 1.5114236940312422e-05, "loss": 0.0519, "step": 6265 }, { "epoch": 1.05, "grad_norm": 0.5122116208076477, "learning_rate": 1.5112685149439808e-05, "loss": 0.0594, "step": 6266 }, { "epoch": 1.05, "grad_norm": 0.4741511642932892, "learning_rate": 1.5111133191861611e-05, "loss": 0.0584, "step": 6267 }, { "epoch": 1.05, "grad_norm": 0.5959044098854065, "learning_rate": 1.5109581067628437e-05, "loss": 0.0805, "step": 6268 }, { "epoch": 1.05, "grad_norm": 0.7130835056304932, "learning_rate": 1.5108028776790895e-05, "loss": 0.0635, "step": 6269 }, { "epoch": 1.05, "grad_norm": 0.47756877541542053, "learning_rate": 1.5106476319399597e-05, "loss": 0.0397, "step": 6270 }, { "epoch": 1.05, "grad_norm": 0.5750952959060669, "learning_rate": 1.5104923695505166e-05, "loss": 0.0674, "step": 6271 }, { "epoch": 1.05, "grad_norm": 0.5623418688774109, "learning_rate": 1.5103370905158225e-05, "loss": 0.0543, "step": 6272 }, { "epoch": 1.05, "grad_norm": 0.4678594172000885, "learning_rate": 1.510181794840941e-05, "loss": 0.0487, "step": 6273 }, { "epoch": 1.05, "grad_norm": 0.5684288740158081, "learning_rate": 1.5100264825309346e-05, "loss": 0.0644, "step": 6274 }, { "epoch": 1.05, "grad_norm": 0.5628042221069336, "learning_rate": 1.5098711535908683e-05, "loss": 0.0628, "step": 6275 }, { "epoch": 1.05, "grad_norm": 0.803001880645752, "learning_rate": 1.509715808025807e-05, "loss": 0.0524, "step": 6276 }, { "epoch": 1.05, "grad_norm": 0.710621178150177, "learning_rate": 1.5095604458408154e-05, "loss": 0.0761, "step": 6277 }, { "epoch": 1.05, "grad_norm": 0.5259975790977478, "learning_rate": 1.5094050670409593e-05, "loss": 0.056, "step": 6278 }, { "epoch": 1.05, "grad_norm": 0.6413924098014832, "learning_rate": 1.509249671631305e-05, "loss": 0.0658, "step": 6279 }, { "epoch": 1.05, "grad_norm": 0.5303467512130737, "learning_rate": 1.5090942596169198e-05, "loss": 0.0485, "step": 6280 }, { "epoch": 1.05, "grad_norm": 0.438740998506546, "learning_rate": 1.5089388310028708e-05, "loss": 0.0553, "step": 6281 }, { "epoch": 1.05, "grad_norm": 0.44286271929740906, "learning_rate": 1.5087833857942257e-05, "loss": 0.0505, "step": 6282 }, { "epoch": 1.05, "grad_norm": 0.6851491928100586, "learning_rate": 1.5086279239960537e-05, "loss": 0.0725, "step": 6283 }, { "epoch": 1.05, "grad_norm": 0.5457538962364197, "learning_rate": 1.5084724456134234e-05, "loss": 0.0477, "step": 6284 }, { "epoch": 1.05, "grad_norm": 0.5929224491119385, "learning_rate": 1.5083169506514042e-05, "loss": 0.0543, "step": 6285 }, { "epoch": 1.05, "grad_norm": 0.46678388118743896, "learning_rate": 1.508161439115066e-05, "loss": 0.0533, "step": 6286 }, { "epoch": 1.05, "grad_norm": 1.417391061782837, "learning_rate": 1.5080059110094804e-05, "loss": 0.0584, "step": 6287 }, { "epoch": 1.05, "grad_norm": 0.7594224214553833, "learning_rate": 1.507850366339718e-05, "loss": 0.0725, "step": 6288 }, { "epoch": 1.05, "grad_norm": 0.5748514533042908, "learning_rate": 1.5076948051108504e-05, "loss": 0.0651, "step": 6289 }, { "epoch": 1.05, "grad_norm": 0.6259363293647766, "learning_rate": 1.50753922732795e-05, "loss": 0.0614, "step": 6290 }, { "epoch": 1.05, "grad_norm": 0.3823643922805786, "learning_rate": 1.5073836329960893e-05, "loss": 0.0492, "step": 6291 }, { "epoch": 1.05, "grad_norm": 0.5281460881233215, "learning_rate": 1.5072280221203427e-05, "loss": 0.0567, "step": 6292 }, { "epoch": 1.05, "grad_norm": 0.5648703575134277, "learning_rate": 1.5070723947057829e-05, "loss": 0.0621, "step": 6293 }, { "epoch": 1.05, "grad_norm": 0.6958845257759094, "learning_rate": 1.5069167507574847e-05, "loss": 0.0671, "step": 6294 }, { "epoch": 1.05, "grad_norm": 0.6386035680770874, "learning_rate": 1.5067610902805234e-05, "loss": 0.0564, "step": 6295 }, { "epoch": 1.05, "grad_norm": 0.568186342716217, "learning_rate": 1.5066054132799744e-05, "loss": 0.062, "step": 6296 }, { "epoch": 1.05, "grad_norm": 0.5076964497566223, "learning_rate": 1.5064497197609136e-05, "loss": 0.0597, "step": 6297 }, { "epoch": 1.05, "grad_norm": 0.5989150404930115, "learning_rate": 1.506294009728417e-05, "loss": 0.0574, "step": 6298 }, { "epoch": 1.05, "grad_norm": 0.4952438473701477, "learning_rate": 1.5061382831875628e-05, "loss": 0.0538, "step": 6299 }, { "epoch": 1.05, "grad_norm": 0.48128730058670044, "learning_rate": 1.5059825401434283e-05, "loss": 0.0528, "step": 6300 }, { "epoch": 1.05, "grad_norm": 0.6436426639556885, "learning_rate": 1.5058267806010917e-05, "loss": 0.0562, "step": 6301 }, { "epoch": 1.05, "grad_norm": 0.5717819929122925, "learning_rate": 1.5056710045656311e-05, "loss": 0.0632, "step": 6302 }, { "epoch": 1.05, "grad_norm": 0.757167637348175, "learning_rate": 1.5055152120421266e-05, "loss": 0.0696, "step": 6303 }, { "epoch": 1.05, "grad_norm": 1.0040837526321411, "learning_rate": 1.5053594030356581e-05, "loss": 0.0976, "step": 6304 }, { "epoch": 1.05, "grad_norm": 0.7275223731994629, "learning_rate": 1.505203577551305e-05, "loss": 0.0689, "step": 6305 }, { "epoch": 1.05, "grad_norm": 0.5845154523849487, "learning_rate": 1.505047735594149e-05, "loss": 0.0542, "step": 6306 }, { "epoch": 1.05, "grad_norm": 0.5393111109733582, "learning_rate": 1.504891877169271e-05, "loss": 0.0598, "step": 6307 }, { "epoch": 1.06, "grad_norm": 0.5834580659866333, "learning_rate": 1.504736002281754e-05, "loss": 0.0677, "step": 6308 }, { "epoch": 1.06, "grad_norm": 0.43778231739997864, "learning_rate": 1.5045801109366789e-05, "loss": 0.0594, "step": 6309 }, { "epoch": 1.06, "grad_norm": 0.6227732300758362, "learning_rate": 1.5044242031391299e-05, "loss": 0.0727, "step": 6310 }, { "epoch": 1.06, "grad_norm": 0.4789315164089203, "learning_rate": 1.5042682788941903e-05, "loss": 0.0542, "step": 6311 }, { "epoch": 1.06, "grad_norm": 0.7353882193565369, "learning_rate": 1.504112338206944e-05, "loss": 0.0792, "step": 6312 }, { "epoch": 1.06, "grad_norm": 0.41471585631370544, "learning_rate": 1.503956381082476e-05, "loss": 0.0416, "step": 6313 }, { "epoch": 1.06, "grad_norm": 0.647646963596344, "learning_rate": 1.503800407525871e-05, "loss": 0.0608, "step": 6314 }, { "epoch": 1.06, "grad_norm": 0.8634386658668518, "learning_rate": 1.5036444175422154e-05, "loss": 0.0596, "step": 6315 }, { "epoch": 1.06, "grad_norm": 0.5575586557388306, "learning_rate": 1.5034884111365946e-05, "loss": 0.0591, "step": 6316 }, { "epoch": 1.06, "grad_norm": 0.9526451826095581, "learning_rate": 1.503332388314096e-05, "loss": 0.0746, "step": 6317 }, { "epoch": 1.06, "grad_norm": 0.4792839288711548, "learning_rate": 1.5031763490798069e-05, "loss": 0.0544, "step": 6318 }, { "epoch": 1.06, "grad_norm": 0.7537522912025452, "learning_rate": 1.5030202934388149e-05, "loss": 0.0816, "step": 6319 }, { "epoch": 1.06, "grad_norm": 0.6446675062179565, "learning_rate": 1.5028642213962083e-05, "loss": 0.0585, "step": 6320 }, { "epoch": 1.06, "grad_norm": 0.47891584038734436, "learning_rate": 1.5027081329570767e-05, "loss": 0.0524, "step": 6321 }, { "epoch": 1.06, "grad_norm": 0.4962327480316162, "learning_rate": 1.502552028126509e-05, "loss": 0.0685, "step": 6322 }, { "epoch": 1.06, "grad_norm": 0.45479443669319153, "learning_rate": 1.5023959069095949e-05, "loss": 0.0482, "step": 6323 }, { "epoch": 1.06, "grad_norm": 0.6330968141555786, "learning_rate": 1.5022397693114254e-05, "loss": 0.0656, "step": 6324 }, { "epoch": 1.06, "grad_norm": 0.6085960865020752, "learning_rate": 1.502083615337092e-05, "loss": 0.07, "step": 6325 }, { "epoch": 1.06, "grad_norm": 0.6481524705886841, "learning_rate": 1.5019274449916852e-05, "loss": 0.0582, "step": 6326 }, { "epoch": 1.06, "grad_norm": 0.5228093862533569, "learning_rate": 1.5017712582802977e-05, "loss": 0.0619, "step": 6327 }, { "epoch": 1.06, "grad_norm": 0.6713574528694153, "learning_rate": 1.5016150552080224e-05, "loss": 0.0783, "step": 6328 }, { "epoch": 1.06, "grad_norm": 0.5655743479728699, "learning_rate": 1.5014588357799525e-05, "loss": 0.0554, "step": 6329 }, { "epoch": 1.06, "grad_norm": 0.6583895683288574, "learning_rate": 1.501302600001181e-05, "loss": 0.0608, "step": 6330 }, { "epoch": 1.06, "grad_norm": 0.5240795016288757, "learning_rate": 1.5011463478768029e-05, "loss": 0.0745, "step": 6331 }, { "epoch": 1.06, "grad_norm": 0.4738641679286957, "learning_rate": 1.500990079411913e-05, "loss": 0.0522, "step": 6332 }, { "epoch": 1.06, "grad_norm": 0.5032605528831482, "learning_rate": 1.500833794611606e-05, "loss": 0.0603, "step": 6333 }, { "epoch": 1.06, "grad_norm": 0.6536141037940979, "learning_rate": 1.5006774934809785e-05, "loss": 0.0673, "step": 6334 }, { "epoch": 1.06, "grad_norm": 0.5377628207206726, "learning_rate": 1.500521176025126e-05, "loss": 0.0724, "step": 6335 }, { "epoch": 1.06, "grad_norm": 0.4219966232776642, "learning_rate": 1.5003648422491468e-05, "loss": 0.0474, "step": 6336 }, { "epoch": 1.06, "grad_norm": 0.42141956090927124, "learning_rate": 1.5002084921581372e-05, "loss": 0.0388, "step": 6337 }, { "epoch": 1.06, "grad_norm": 0.5635663866996765, "learning_rate": 1.5000521257571954e-05, "loss": 0.0546, "step": 6338 }, { "epoch": 1.06, "grad_norm": 0.5048403739929199, "learning_rate": 1.4998957430514205e-05, "loss": 0.0563, "step": 6339 }, { "epoch": 1.06, "grad_norm": 0.5791006684303284, "learning_rate": 1.4997393440459107e-05, "loss": 0.0648, "step": 6340 }, { "epoch": 1.06, "grad_norm": 0.5207422971725464, "learning_rate": 1.4995829287457662e-05, "loss": 0.0619, "step": 6341 }, { "epoch": 1.06, "grad_norm": 0.557653546333313, "learning_rate": 1.4994264971560868e-05, "loss": 0.0526, "step": 6342 }, { "epoch": 1.06, "grad_norm": 0.47755876183509827, "learning_rate": 1.4992700492819733e-05, "loss": 0.0771, "step": 6343 }, { "epoch": 1.06, "grad_norm": 0.48552292585372925, "learning_rate": 1.499113585128527e-05, "loss": 0.0508, "step": 6344 }, { "epoch": 1.06, "grad_norm": 0.4417704641819, "learning_rate": 1.4989571047008492e-05, "loss": 0.0528, "step": 6345 }, { "epoch": 1.06, "grad_norm": 0.5064375400543213, "learning_rate": 1.4988006080040428e-05, "loss": 0.0548, "step": 6346 }, { "epoch": 1.06, "grad_norm": 0.8966624736785889, "learning_rate": 1.49864409504321e-05, "loss": 0.066, "step": 6347 }, { "epoch": 1.06, "grad_norm": 0.4861469864845276, "learning_rate": 1.4984875658234544e-05, "loss": 0.0457, "step": 6348 }, { "epoch": 1.06, "grad_norm": 0.9650934934616089, "learning_rate": 1.4983310203498795e-05, "loss": 0.0705, "step": 6349 }, { "epoch": 1.06, "grad_norm": 0.7009086608886719, "learning_rate": 1.4981744586275902e-05, "loss": 0.0795, "step": 6350 }, { "epoch": 1.06, "grad_norm": 0.5940930843353271, "learning_rate": 1.498017880661691e-05, "loss": 0.0649, "step": 6351 }, { "epoch": 1.06, "grad_norm": 0.5662358999252319, "learning_rate": 1.4978612864572872e-05, "loss": 0.0748, "step": 6352 }, { "epoch": 1.06, "grad_norm": 0.6724101901054382, "learning_rate": 1.4977046760194853e-05, "loss": 0.0718, "step": 6353 }, { "epoch": 1.06, "grad_norm": 0.7467091679573059, "learning_rate": 1.4975480493533912e-05, "loss": 0.0558, "step": 6354 }, { "epoch": 1.06, "grad_norm": 0.6259014010429382, "learning_rate": 1.4973914064641122e-05, "loss": 0.0822, "step": 6355 }, { "epoch": 1.06, "grad_norm": 0.764363706111908, "learning_rate": 1.4972347473567558e-05, "loss": 0.105, "step": 6356 }, { "epoch": 1.06, "grad_norm": 0.4389934241771698, "learning_rate": 1.4970780720364304e-05, "loss": 0.0643, "step": 6357 }, { "epoch": 1.06, "grad_norm": 0.5500853657722473, "learning_rate": 1.4969213805082437e-05, "loss": 0.0591, "step": 6358 }, { "epoch": 1.06, "grad_norm": 0.528954029083252, "learning_rate": 1.4967646727773056e-05, "loss": 0.0563, "step": 6359 }, { "epoch": 1.06, "grad_norm": 0.5163760781288147, "learning_rate": 1.4966079488487258e-05, "loss": 0.0612, "step": 6360 }, { "epoch": 1.06, "grad_norm": 0.6305266618728638, "learning_rate": 1.496451208727614e-05, "loss": 0.0608, "step": 6361 }, { "epoch": 1.06, "grad_norm": 0.5263510942459106, "learning_rate": 1.496294452419081e-05, "loss": 0.0667, "step": 6362 }, { "epoch": 1.06, "grad_norm": 0.74884033203125, "learning_rate": 1.4961376799282383e-05, "loss": 0.0884, "step": 6363 }, { "epoch": 1.06, "grad_norm": 0.581109881401062, "learning_rate": 1.4959808912601977e-05, "loss": 0.0681, "step": 6364 }, { "epoch": 1.06, "grad_norm": 0.7190799117088318, "learning_rate": 1.4958240864200712e-05, "loss": 0.0628, "step": 6365 }, { "epoch": 1.06, "grad_norm": 0.5470770597457886, "learning_rate": 1.4956672654129715e-05, "loss": 0.0786, "step": 6366 }, { "epoch": 1.06, "grad_norm": 0.5750861763954163, "learning_rate": 1.4955104282440127e-05, "loss": 0.0765, "step": 6367 }, { "epoch": 1.07, "grad_norm": 0.5767456293106079, "learning_rate": 1.4953535749183079e-05, "loss": 0.0795, "step": 6368 }, { "epoch": 1.07, "grad_norm": 0.5078576803207397, "learning_rate": 1.495196705440972e-05, "loss": 0.0527, "step": 6369 }, { "epoch": 1.07, "grad_norm": 0.48765167593955994, "learning_rate": 1.4950398198171194e-05, "loss": 0.0446, "step": 6370 }, { "epoch": 1.07, "grad_norm": 0.7561877965927124, "learning_rate": 1.494882918051866e-05, "loss": 0.0666, "step": 6371 }, { "epoch": 1.07, "grad_norm": 0.5108757019042969, "learning_rate": 1.4947260001503276e-05, "loss": 0.0461, "step": 6372 }, { "epoch": 1.07, "grad_norm": 0.5834140777587891, "learning_rate": 1.4945690661176206e-05, "loss": 0.0677, "step": 6373 }, { "epoch": 1.07, "grad_norm": 0.43709424138069153, "learning_rate": 1.4944121159588624e-05, "loss": 0.0434, "step": 6374 }, { "epoch": 1.07, "grad_norm": 0.6157812476158142, "learning_rate": 1.4942551496791704e-05, "loss": 0.0583, "step": 6375 }, { "epoch": 1.07, "grad_norm": 0.5108286738395691, "learning_rate": 1.4940981672836625e-05, "loss": 0.0498, "step": 6376 }, { "epoch": 1.07, "grad_norm": 0.5259588956832886, "learning_rate": 1.4939411687774575e-05, "loss": 0.0682, "step": 6377 }, { "epoch": 1.07, "grad_norm": 0.8781396150588989, "learning_rate": 1.4937841541656743e-05, "loss": 0.0796, "step": 6378 }, { "epoch": 1.07, "grad_norm": 0.6402780413627625, "learning_rate": 1.493627123453433e-05, "loss": 0.0685, "step": 6379 }, { "epoch": 1.07, "grad_norm": 0.47601979970932007, "learning_rate": 1.4934700766458534e-05, "loss": 0.0703, "step": 6380 }, { "epoch": 1.07, "grad_norm": 0.4363439977169037, "learning_rate": 1.4933130137480566e-05, "loss": 0.0359, "step": 6381 }, { "epoch": 1.07, "grad_norm": 0.5256175398826599, "learning_rate": 1.493155934765163e-05, "loss": 0.0544, "step": 6382 }, { "epoch": 1.07, "grad_norm": 0.5901049971580505, "learning_rate": 1.4929988397022952e-05, "loss": 0.0596, "step": 6383 }, { "epoch": 1.07, "grad_norm": 0.5277729630470276, "learning_rate": 1.4928417285645753e-05, "loss": 0.0643, "step": 6384 }, { "epoch": 1.07, "grad_norm": 0.5536746382713318, "learning_rate": 1.4926846013571262e-05, "loss": 0.0671, "step": 6385 }, { "epoch": 1.07, "grad_norm": 0.5642574429512024, "learning_rate": 1.4925274580850704e-05, "loss": 0.0742, "step": 6386 }, { "epoch": 1.07, "grad_norm": 0.6568228006362915, "learning_rate": 1.4923702987535326e-05, "loss": 0.056, "step": 6387 }, { "epoch": 1.07, "grad_norm": 0.681633710861206, "learning_rate": 1.4922131233676375e-05, "loss": 0.0528, "step": 6388 }, { "epoch": 1.07, "grad_norm": 0.5778408646583557, "learning_rate": 1.4920559319325092e-05, "loss": 0.0678, "step": 6389 }, { "epoch": 1.07, "grad_norm": 0.5361059904098511, "learning_rate": 1.491898724453273e-05, "loss": 0.0698, "step": 6390 }, { "epoch": 1.07, "grad_norm": 0.5784854888916016, "learning_rate": 1.4917415009350556e-05, "loss": 0.0724, "step": 6391 }, { "epoch": 1.07, "grad_norm": 0.5577854514122009, "learning_rate": 1.4915842613829834e-05, "loss": 0.0596, "step": 6392 }, { "epoch": 1.07, "grad_norm": 0.7458483576774597, "learning_rate": 1.491427005802183e-05, "loss": 0.0623, "step": 6393 }, { "epoch": 1.07, "grad_norm": 0.6161456108093262, "learning_rate": 1.4912697341977815e-05, "loss": 0.0597, "step": 6394 }, { "epoch": 1.07, "grad_norm": 0.7624074220657349, "learning_rate": 1.491112446574908e-05, "loss": 0.0658, "step": 6395 }, { "epoch": 1.07, "grad_norm": 0.5490350723266602, "learning_rate": 1.4909551429386906e-05, "loss": 0.0623, "step": 6396 }, { "epoch": 1.07, "grad_norm": 0.6830354332923889, "learning_rate": 1.4907978232942582e-05, "loss": 0.06, "step": 6397 }, { "epoch": 1.07, "grad_norm": 0.7748963236808777, "learning_rate": 1.4906404876467404e-05, "loss": 0.0684, "step": 6398 }, { "epoch": 1.07, "grad_norm": 0.5047609210014343, "learning_rate": 1.4904831360012675e-05, "loss": 0.0777, "step": 6399 }, { "epoch": 1.07, "grad_norm": 0.738258421421051, "learning_rate": 1.4903257683629701e-05, "loss": 0.0626, "step": 6400 }, { "epoch": 1.07, "grad_norm": 0.5337563157081604, "learning_rate": 1.4901683847369794e-05, "loss": 0.0658, "step": 6401 }, { "epoch": 1.07, "grad_norm": 0.5121847987174988, "learning_rate": 1.4900109851284273e-05, "loss": 0.0633, "step": 6402 }, { "epoch": 1.07, "grad_norm": 0.6145153641700745, "learning_rate": 1.4898535695424457e-05, "loss": 0.068, "step": 6403 }, { "epoch": 1.07, "grad_norm": 0.5326908230781555, "learning_rate": 1.4896961379841673e-05, "loss": 0.058, "step": 6404 }, { "epoch": 1.07, "grad_norm": 0.5830137133598328, "learning_rate": 1.4895386904587255e-05, "loss": 0.0563, "step": 6405 }, { "epoch": 1.07, "grad_norm": 0.5882829427719116, "learning_rate": 1.4893812269712542e-05, "loss": 0.0617, "step": 6406 }, { "epoch": 1.07, "grad_norm": 0.5051535964012146, "learning_rate": 1.4892237475268873e-05, "loss": 0.0469, "step": 6407 }, { "epoch": 1.07, "grad_norm": 0.38808178901672363, "learning_rate": 1.4890662521307602e-05, "loss": 0.0449, "step": 6408 }, { "epoch": 1.07, "grad_norm": 0.5654789805412292, "learning_rate": 1.4889087407880078e-05, "loss": 0.0694, "step": 6409 }, { "epoch": 1.07, "grad_norm": 0.3789428770542145, "learning_rate": 1.4887512135037659e-05, "loss": 0.0459, "step": 6410 }, { "epoch": 1.07, "grad_norm": 0.4363519251346588, "learning_rate": 1.4885936702831711e-05, "loss": 0.0511, "step": 6411 }, { "epoch": 1.07, "grad_norm": 0.594976544380188, "learning_rate": 1.4884361111313603e-05, "loss": 0.0825, "step": 6412 }, { "epoch": 1.07, "grad_norm": 0.5328496098518372, "learning_rate": 1.488278536053471e-05, "loss": 0.097, "step": 6413 }, { "epoch": 1.07, "grad_norm": 0.43411192297935486, "learning_rate": 1.4881209450546405e-05, "loss": 0.055, "step": 6414 }, { "epoch": 1.07, "grad_norm": 0.608135461807251, "learning_rate": 1.4879633381400082e-05, "loss": 0.0692, "step": 6415 }, { "epoch": 1.07, "grad_norm": 0.5295547842979431, "learning_rate": 1.4878057153147128e-05, "loss": 0.0556, "step": 6416 }, { "epoch": 1.07, "grad_norm": 0.5411741137504578, "learning_rate": 1.4876480765838931e-05, "loss": 0.0567, "step": 6417 }, { "epoch": 1.07, "grad_norm": 0.7387092113494873, "learning_rate": 1.4874904219526898e-05, "loss": 0.0769, "step": 6418 }, { "epoch": 1.07, "grad_norm": 0.5627980828285217, "learning_rate": 1.4873327514262433e-05, "loss": 0.0539, "step": 6419 }, { "epoch": 1.07, "grad_norm": 0.5280675292015076, "learning_rate": 1.4871750650096947e-05, "loss": 0.057, "step": 6420 }, { "epoch": 1.07, "grad_norm": 0.6454041004180908, "learning_rate": 1.4870173627081852e-05, "loss": 0.0631, "step": 6421 }, { "epoch": 1.07, "grad_norm": 0.7350188493728638, "learning_rate": 1.4868596445268572e-05, "loss": 0.0479, "step": 6422 }, { "epoch": 1.07, "grad_norm": 0.5988546013832092, "learning_rate": 1.4867019104708536e-05, "loss": 0.0783, "step": 6423 }, { "epoch": 1.07, "grad_norm": 0.37030160427093506, "learning_rate": 1.4865441605453168e-05, "loss": 0.0403, "step": 6424 }, { "epoch": 1.07, "grad_norm": 0.6828325390815735, "learning_rate": 1.4863863947553909e-05, "loss": 0.0704, "step": 6425 }, { "epoch": 1.07, "grad_norm": 0.6333096027374268, "learning_rate": 1.48622861310622e-05, "loss": 0.1129, "step": 6426 }, { "epoch": 1.07, "grad_norm": 0.8109713792800903, "learning_rate": 1.4860708156029486e-05, "loss": 0.0733, "step": 6427 }, { "epoch": 1.08, "grad_norm": 0.611821174621582, "learning_rate": 1.4859130022507222e-05, "loss": 0.0806, "step": 6428 }, { "epoch": 1.08, "grad_norm": 0.5389736890792847, "learning_rate": 1.4857551730546861e-05, "loss": 0.0655, "step": 6429 }, { "epoch": 1.08, "grad_norm": 0.7468608021736145, "learning_rate": 1.4855973280199869e-05, "loss": 0.0758, "step": 6430 }, { "epoch": 1.08, "grad_norm": 0.6092444062232971, "learning_rate": 1.4854394671517712e-05, "loss": 0.0477, "step": 6431 }, { "epoch": 1.08, "grad_norm": 0.581369936466217, "learning_rate": 1.4852815904551864e-05, "loss": 0.0684, "step": 6432 }, { "epoch": 1.08, "grad_norm": 0.3502764105796814, "learning_rate": 1.4851236979353796e-05, "loss": 0.0571, "step": 6433 }, { "epoch": 1.08, "grad_norm": 0.496573269367218, "learning_rate": 1.4849657895974999e-05, "loss": 0.0537, "step": 6434 }, { "epoch": 1.08, "grad_norm": 0.46215948462486267, "learning_rate": 1.4848078654466958e-05, "loss": 0.069, "step": 6435 }, { "epoch": 1.08, "grad_norm": 0.6174614429473877, "learning_rate": 1.4846499254881165e-05, "loss": 0.0677, "step": 6436 }, { "epoch": 1.08, "grad_norm": 0.5434517860412598, "learning_rate": 1.484491969726912e-05, "loss": 0.0747, "step": 6437 }, { "epoch": 1.08, "grad_norm": 0.4019106328487396, "learning_rate": 1.4843339981682325e-05, "loss": 0.0477, "step": 6438 }, { "epoch": 1.08, "grad_norm": 0.5321158766746521, "learning_rate": 1.4841760108172292e-05, "loss": 0.0556, "step": 6439 }, { "epoch": 1.08, "grad_norm": 0.4444676637649536, "learning_rate": 1.4840180076790529e-05, "loss": 0.0546, "step": 6440 }, { "epoch": 1.08, "grad_norm": 0.49815618991851807, "learning_rate": 1.4838599887588562e-05, "loss": 0.056, "step": 6441 }, { "epoch": 1.08, "grad_norm": 0.44098037481307983, "learning_rate": 1.4837019540617907e-05, "loss": 0.0594, "step": 6442 }, { "epoch": 1.08, "grad_norm": 0.5506470203399658, "learning_rate": 1.48354390359301e-05, "loss": 0.0478, "step": 6443 }, { "epoch": 1.08, "grad_norm": 0.7220464944839478, "learning_rate": 1.4833858373576674e-05, "loss": 0.0782, "step": 6444 }, { "epoch": 1.08, "grad_norm": 0.5736932754516602, "learning_rate": 1.4832277553609165e-05, "loss": 0.0697, "step": 6445 }, { "epoch": 1.08, "grad_norm": 0.562815248966217, "learning_rate": 1.483069657607912e-05, "loss": 0.0556, "step": 6446 }, { "epoch": 1.08, "grad_norm": 0.7744805812835693, "learning_rate": 1.4829115441038088e-05, "loss": 0.0353, "step": 6447 }, { "epoch": 1.08, "grad_norm": 0.5459378957748413, "learning_rate": 1.482753414853763e-05, "loss": 0.0501, "step": 6448 }, { "epoch": 1.08, "grad_norm": 0.4851686358451843, "learning_rate": 1.4825952698629296e-05, "loss": 0.0579, "step": 6449 }, { "epoch": 1.08, "grad_norm": 0.6227426528930664, "learning_rate": 1.4824371091364657e-05, "loss": 0.0659, "step": 6450 }, { "epoch": 1.08, "grad_norm": 0.5242767930030823, "learning_rate": 1.4822789326795279e-05, "loss": 0.0631, "step": 6451 }, { "epoch": 1.08, "grad_norm": 0.5165908932685852, "learning_rate": 1.4821207404972746e-05, "loss": 0.0589, "step": 6452 }, { "epoch": 1.08, "grad_norm": 0.5924929976463318, "learning_rate": 1.4819625325948632e-05, "loss": 0.084, "step": 6453 }, { "epoch": 1.08, "grad_norm": 0.564139187335968, "learning_rate": 1.481804308977452e-05, "loss": 0.0584, "step": 6454 }, { "epoch": 1.08, "grad_norm": 0.3647044003009796, "learning_rate": 1.481646069650201e-05, "loss": 0.0587, "step": 6455 }, { "epoch": 1.08, "grad_norm": 0.4914833903312683, "learning_rate": 1.481487814618269e-05, "loss": 0.0719, "step": 6456 }, { "epoch": 1.08, "grad_norm": 0.5290237665176392, "learning_rate": 1.4813295438868162e-05, "loss": 0.0536, "step": 6457 }, { "epoch": 1.08, "grad_norm": 0.522239089012146, "learning_rate": 1.4811712574610038e-05, "loss": 0.0497, "step": 6458 }, { "epoch": 1.08, "grad_norm": 0.5843082070350647, "learning_rate": 1.4810129553459922e-05, "loss": 0.0594, "step": 6459 }, { "epoch": 1.08, "grad_norm": 0.45158523321151733, "learning_rate": 1.4808546375469434e-05, "loss": 0.0714, "step": 6460 }, { "epoch": 1.08, "grad_norm": 0.4069320857524872, "learning_rate": 1.4806963040690197e-05, "loss": 0.0565, "step": 6461 }, { "epoch": 1.08, "grad_norm": 0.5252458453178406, "learning_rate": 1.4805379549173835e-05, "loss": 0.0454, "step": 6462 }, { "epoch": 1.08, "grad_norm": 0.9852659702301025, "learning_rate": 1.480379590097198e-05, "loss": 0.0716, "step": 6463 }, { "epoch": 1.08, "grad_norm": 0.5280677676200867, "learning_rate": 1.4802212096136267e-05, "loss": 0.0519, "step": 6464 }, { "epoch": 1.08, "grad_norm": 0.7514767646789551, "learning_rate": 1.4800628134718344e-05, "loss": 0.0466, "step": 6465 }, { "epoch": 1.08, "grad_norm": 0.5423937439918518, "learning_rate": 1.4799044016769855e-05, "loss": 0.0497, "step": 6466 }, { "epoch": 1.08, "grad_norm": 0.9591596126556396, "learning_rate": 1.4797459742342448e-05, "loss": 0.0642, "step": 6467 }, { "epoch": 1.08, "grad_norm": 0.45357125997543335, "learning_rate": 1.4795875311487786e-05, "loss": 0.0538, "step": 6468 }, { "epoch": 1.08, "grad_norm": 0.699203372001648, "learning_rate": 1.4794290724257532e-05, "loss": 0.0579, "step": 6469 }, { "epoch": 1.08, "grad_norm": 0.581081748008728, "learning_rate": 1.4792705980703346e-05, "loss": 0.0535, "step": 6470 }, { "epoch": 1.08, "grad_norm": 0.82928466796875, "learning_rate": 1.4791121080876907e-05, "loss": 0.0658, "step": 6471 }, { "epoch": 1.08, "grad_norm": 0.8885686993598938, "learning_rate": 1.478953602482989e-05, "loss": 0.0661, "step": 6472 }, { "epoch": 1.08, "grad_norm": 0.7550974488258362, "learning_rate": 1.478795081261398e-05, "loss": 0.0567, "step": 6473 }, { "epoch": 1.08, "grad_norm": 0.6951205730438232, "learning_rate": 1.4786365444280865e-05, "loss": 0.0791, "step": 6474 }, { "epoch": 1.08, "grad_norm": 0.6685310006141663, "learning_rate": 1.4784779919882233e-05, "loss": 0.0732, "step": 6475 }, { "epoch": 1.08, "grad_norm": 0.7986636161804199, "learning_rate": 1.4783194239469788e-05, "loss": 0.0592, "step": 6476 }, { "epoch": 1.08, "grad_norm": 0.5208551287651062, "learning_rate": 1.478160840309523e-05, "loss": 0.0702, "step": 6477 }, { "epoch": 1.08, "grad_norm": 0.6882641911506653, "learning_rate": 1.4780022410810269e-05, "loss": 0.054, "step": 6478 }, { "epoch": 1.08, "grad_norm": 0.571802020072937, "learning_rate": 1.4778436262666614e-05, "loss": 0.0798, "step": 6479 }, { "epoch": 1.08, "grad_norm": 0.4974498450756073, "learning_rate": 1.4776849958715991e-05, "loss": 0.0705, "step": 6480 }, { "epoch": 1.08, "grad_norm": 0.8379408717155457, "learning_rate": 1.4775263499010116e-05, "loss": 0.0682, "step": 6481 }, { "epoch": 1.08, "grad_norm": 0.6040840148925781, "learning_rate": 1.477367688360072e-05, "loss": 0.0699, "step": 6482 }, { "epoch": 1.08, "grad_norm": 0.49351322650909424, "learning_rate": 1.4772090112539538e-05, "loss": 0.0586, "step": 6483 }, { "epoch": 1.08, "grad_norm": 0.7681826949119568, "learning_rate": 1.4770503185878309e-05, "loss": 0.0892, "step": 6484 }, { "epoch": 1.08, "grad_norm": 0.6554493308067322, "learning_rate": 1.4768916103668773e-05, "loss": 0.061, "step": 6485 }, { "epoch": 1.08, "grad_norm": 0.5871302485466003, "learning_rate": 1.4767328865962682e-05, "loss": 0.0579, "step": 6486 }, { "epoch": 1.08, "grad_norm": 0.457584023475647, "learning_rate": 1.4765741472811793e-05, "loss": 0.0567, "step": 6487 }, { "epoch": 1.09, "grad_norm": 0.4983829855918884, "learning_rate": 1.4764153924267857e-05, "loss": 0.0895, "step": 6488 }, { "epoch": 1.09, "grad_norm": 0.5284901857376099, "learning_rate": 1.4762566220382644e-05, "loss": 0.0692, "step": 6489 }, { "epoch": 1.09, "grad_norm": 0.48328110575675964, "learning_rate": 1.476097836120792e-05, "loss": 0.0486, "step": 6490 }, { "epoch": 1.09, "grad_norm": 0.46287503838539124, "learning_rate": 1.4759390346795464e-05, "loss": 0.0539, "step": 6491 }, { "epoch": 1.09, "grad_norm": 0.5464030504226685, "learning_rate": 1.475780217719705e-05, "loss": 0.0693, "step": 6492 }, { "epoch": 1.09, "grad_norm": 0.3907022774219513, "learning_rate": 1.4756213852464463e-05, "loss": 0.0589, "step": 6493 }, { "epoch": 1.09, "grad_norm": 0.42757466435432434, "learning_rate": 1.4754625372649493e-05, "loss": 0.0485, "step": 6494 }, { "epoch": 1.09, "grad_norm": 0.4709450602531433, "learning_rate": 1.4753036737803937e-05, "loss": 0.0551, "step": 6495 }, { "epoch": 1.09, "grad_norm": 0.9453620314598083, "learning_rate": 1.4751447947979589e-05, "loss": 0.0731, "step": 6496 }, { "epoch": 1.09, "grad_norm": 0.5801973938941956, "learning_rate": 1.4749859003228261e-05, "loss": 0.0525, "step": 6497 }, { "epoch": 1.09, "grad_norm": 0.6835130453109741, "learning_rate": 1.4748269903601756e-05, "loss": 0.0744, "step": 6498 }, { "epoch": 1.09, "grad_norm": 0.6389941573143005, "learning_rate": 1.474668064915189e-05, "loss": 0.0821, "step": 6499 }, { "epoch": 1.09, "grad_norm": 0.5233198404312134, "learning_rate": 1.4745091239930482e-05, "loss": 0.0649, "step": 6500 }, { "epoch": 1.09, "grad_norm": 0.730840802192688, "learning_rate": 1.4743501675989365e-05, "loss": 0.0636, "step": 6501 }, { "epoch": 1.09, "grad_norm": 0.6519699096679688, "learning_rate": 1.4741911957380356e-05, "loss": 0.0575, "step": 6502 }, { "epoch": 1.09, "grad_norm": 0.5838014483451843, "learning_rate": 1.4740322084155297e-05, "loss": 0.0566, "step": 6503 }, { "epoch": 1.09, "grad_norm": 0.56868577003479, "learning_rate": 1.473873205636603e-05, "loss": 0.0587, "step": 6504 }, { "epoch": 1.09, "grad_norm": 0.6315398216247559, "learning_rate": 1.4737141874064393e-05, "loss": 0.0496, "step": 6505 }, { "epoch": 1.09, "grad_norm": 0.6309517621994019, "learning_rate": 1.4735551537302239e-05, "loss": 0.0666, "step": 6506 }, { "epoch": 1.09, "grad_norm": 0.5729247331619263, "learning_rate": 1.4733961046131425e-05, "loss": 0.058, "step": 6507 }, { "epoch": 1.09, "grad_norm": 0.44133225083351135, "learning_rate": 1.473237040060381e-05, "loss": 0.0407, "step": 6508 }, { "epoch": 1.09, "grad_norm": 0.5485915541648865, "learning_rate": 1.4730779600771256e-05, "loss": 0.0439, "step": 6509 }, { "epoch": 1.09, "grad_norm": 0.6177430152893066, "learning_rate": 1.4729188646685638e-05, "loss": 0.0527, "step": 6510 }, { "epoch": 1.09, "grad_norm": 0.7012985348701477, "learning_rate": 1.4727597538398828e-05, "loss": 0.0722, "step": 6511 }, { "epoch": 1.09, "grad_norm": 0.5208997130393982, "learning_rate": 1.4726006275962705e-05, "loss": 0.0452, "step": 6512 }, { "epoch": 1.09, "grad_norm": 0.409238338470459, "learning_rate": 1.4724414859429157e-05, "loss": 0.0485, "step": 6513 }, { "epoch": 1.09, "grad_norm": 0.4553813338279724, "learning_rate": 1.4722823288850072e-05, "loss": 0.0574, "step": 6514 }, { "epoch": 1.09, "grad_norm": 0.5235239863395691, "learning_rate": 1.4721231564277348e-05, "loss": 0.0647, "step": 6515 }, { "epoch": 1.09, "grad_norm": 0.7424911856651306, "learning_rate": 1.471963968576288e-05, "loss": 0.0721, "step": 6516 }, { "epoch": 1.09, "grad_norm": 0.5323860049247742, "learning_rate": 1.4718047653358578e-05, "loss": 0.0554, "step": 6517 }, { "epoch": 1.09, "grad_norm": 0.5023689270019531, "learning_rate": 1.4716455467116353e-05, "loss": 0.0485, "step": 6518 }, { "epoch": 1.09, "grad_norm": 0.6007881760597229, "learning_rate": 1.4714863127088114e-05, "loss": 0.0552, "step": 6519 }, { "epoch": 1.09, "grad_norm": 0.4976586103439331, "learning_rate": 1.4713270633325789e-05, "loss": 0.0543, "step": 6520 }, { "epoch": 1.09, "grad_norm": 0.5238742828369141, "learning_rate": 1.4711677985881296e-05, "loss": 0.0567, "step": 6521 }, { "epoch": 1.09, "grad_norm": 0.7987158298492432, "learning_rate": 1.471008518480657e-05, "loss": 0.0688, "step": 6522 }, { "epoch": 1.09, "grad_norm": 0.7505457401275635, "learning_rate": 1.4708492230153546e-05, "loss": 0.0646, "step": 6523 }, { "epoch": 1.09, "grad_norm": 0.44576799869537354, "learning_rate": 1.4706899121974163e-05, "loss": 0.0486, "step": 6524 }, { "epoch": 1.09, "grad_norm": 0.4545970857143402, "learning_rate": 1.4705305860320368e-05, "loss": 0.0492, "step": 6525 }, { "epoch": 1.09, "grad_norm": 0.49117588996887207, "learning_rate": 1.4703712445244108e-05, "loss": 0.0618, "step": 6526 }, { "epoch": 1.09, "grad_norm": 0.6089938282966614, "learning_rate": 1.4702118876797341e-05, "loss": 0.0655, "step": 6527 }, { "epoch": 1.09, "grad_norm": 0.6848088502883911, "learning_rate": 1.4700525155032026e-05, "loss": 0.0596, "step": 6528 }, { "epoch": 1.09, "grad_norm": 0.4124598503112793, "learning_rate": 1.4698931280000133e-05, "loss": 0.0428, "step": 6529 }, { "epoch": 1.09, "grad_norm": 0.5475935935974121, "learning_rate": 1.4697337251753623e-05, "loss": 0.0588, "step": 6530 }, { "epoch": 1.09, "grad_norm": 0.5346702337265015, "learning_rate": 1.4695743070344477e-05, "loss": 0.0555, "step": 6531 }, { "epoch": 1.09, "grad_norm": 0.5896667242050171, "learning_rate": 1.4694148735824679e-05, "loss": 0.0781, "step": 6532 }, { "epoch": 1.09, "grad_norm": 0.5748724341392517, "learning_rate": 1.4692554248246207e-05, "loss": 0.0563, "step": 6533 }, { "epoch": 1.09, "grad_norm": 0.42430976033210754, "learning_rate": 1.4690959607661057e-05, "loss": 0.0493, "step": 6534 }, { "epoch": 1.09, "grad_norm": 0.8151285648345947, "learning_rate": 1.4689364814121216e-05, "loss": 0.055, "step": 6535 }, { "epoch": 1.09, "grad_norm": 0.4801153242588043, "learning_rate": 1.4687769867678696e-05, "loss": 0.0571, "step": 6536 }, { "epoch": 1.09, "grad_norm": 0.9147272706031799, "learning_rate": 1.4686174768385495e-05, "loss": 0.0626, "step": 6537 }, { "epoch": 1.09, "grad_norm": 0.6013635993003845, "learning_rate": 1.4684579516293625e-05, "loss": 0.0534, "step": 6538 }, { "epoch": 1.09, "grad_norm": 0.6308104991912842, "learning_rate": 1.4682984111455098e-05, "loss": 0.0721, "step": 6539 }, { "epoch": 1.09, "grad_norm": 0.5389953255653381, "learning_rate": 1.4681388553921938e-05, "loss": 0.0525, "step": 6540 }, { "epoch": 1.09, "grad_norm": 0.5314630270004272, "learning_rate": 1.467979284374617e-05, "loss": 0.0491, "step": 6541 }, { "epoch": 1.09, "grad_norm": 0.45454347133636475, "learning_rate": 1.4678196980979824e-05, "loss": 0.0564, "step": 6542 }, { "epoch": 1.09, "grad_norm": 0.5082696676254272, "learning_rate": 1.4676600965674932e-05, "loss": 0.0667, "step": 6543 }, { "epoch": 1.09, "grad_norm": 0.5683205127716064, "learning_rate": 1.4675004797883539e-05, "loss": 0.0646, "step": 6544 }, { "epoch": 1.09, "grad_norm": 0.5524827837944031, "learning_rate": 1.4673408477657687e-05, "loss": 0.0367, "step": 6545 }, { "epoch": 1.09, "grad_norm": 0.549462080001831, "learning_rate": 1.4671812005049426e-05, "loss": 0.066, "step": 6546 }, { "epoch": 1.09, "grad_norm": 0.7146230936050415, "learning_rate": 1.467021538011081e-05, "loss": 0.0732, "step": 6547 }, { "epoch": 1.1, "grad_norm": 0.7033975124359131, "learning_rate": 1.4668618602893903e-05, "loss": 0.061, "step": 6548 }, { "epoch": 1.1, "grad_norm": 0.5667228698730469, "learning_rate": 1.4667021673450768e-05, "loss": 0.0521, "step": 6549 }, { "epoch": 1.1, "grad_norm": 0.7490537166595459, "learning_rate": 1.4665424591833473e-05, "loss": 0.0894, "step": 6550 }, { "epoch": 1.1, "grad_norm": 0.588016152381897, "learning_rate": 1.4663827358094094e-05, "loss": 0.0522, "step": 6551 }, { "epoch": 1.1, "grad_norm": 0.783646285533905, "learning_rate": 1.4662229972284711e-05, "loss": 0.0629, "step": 6552 }, { "epoch": 1.1, "grad_norm": 0.7941831350326538, "learning_rate": 1.4660632434457411e-05, "loss": 0.0514, "step": 6553 }, { "epoch": 1.1, "grad_norm": 0.43682998418807983, "learning_rate": 1.4659034744664282e-05, "loss": 0.0632, "step": 6554 }, { "epoch": 1.1, "grad_norm": 0.6487892866134644, "learning_rate": 1.4657436902957413e-05, "loss": 0.0422, "step": 6555 }, { "epoch": 1.1, "grad_norm": 0.5880622863769531, "learning_rate": 1.4655838909388916e-05, "loss": 0.0507, "step": 6556 }, { "epoch": 1.1, "grad_norm": 0.5927279591560364, "learning_rate": 1.4654240764010888e-05, "loss": 0.0548, "step": 6557 }, { "epoch": 1.1, "grad_norm": 0.5904607176780701, "learning_rate": 1.4652642466875435e-05, "loss": 0.0603, "step": 6558 }, { "epoch": 1.1, "grad_norm": 0.6750385165214539, "learning_rate": 1.4651044018034678e-05, "loss": 0.0564, "step": 6559 }, { "epoch": 1.1, "grad_norm": 0.4586631655693054, "learning_rate": 1.4649445417540733e-05, "loss": 0.0568, "step": 6560 }, { "epoch": 1.1, "grad_norm": 0.8355165123939514, "learning_rate": 1.4647846665445726e-05, "loss": 0.0805, "step": 6561 }, { "epoch": 1.1, "grad_norm": 0.7013655304908752, "learning_rate": 1.4646247761801786e-05, "loss": 0.0596, "step": 6562 }, { "epoch": 1.1, "grad_norm": 0.8243400454521179, "learning_rate": 1.4644648706661046e-05, "loss": 0.0724, "step": 6563 }, { "epoch": 1.1, "grad_norm": 0.4562893509864807, "learning_rate": 1.4643049500075651e-05, "loss": 0.056, "step": 6564 }, { "epoch": 1.1, "grad_norm": 0.39828163385391235, "learning_rate": 1.4641450142097738e-05, "loss": 0.0382, "step": 6565 }, { "epoch": 1.1, "grad_norm": 0.6860331892967224, "learning_rate": 1.4639850632779458e-05, "loss": 0.0705, "step": 6566 }, { "epoch": 1.1, "grad_norm": 0.9031692743301392, "learning_rate": 1.4638250972172965e-05, "loss": 0.0675, "step": 6567 }, { "epoch": 1.1, "grad_norm": 0.6169330477714539, "learning_rate": 1.4636651160330419e-05, "loss": 0.0604, "step": 6568 }, { "epoch": 1.1, "grad_norm": 0.478708416223526, "learning_rate": 1.4635051197303985e-05, "loss": 0.0759, "step": 6569 }, { "epoch": 1.1, "grad_norm": 0.5263132452964783, "learning_rate": 1.4633451083145829e-05, "loss": 0.0504, "step": 6570 }, { "epoch": 1.1, "grad_norm": 0.6098876595497131, "learning_rate": 1.4631850817908127e-05, "loss": 0.0858, "step": 6571 }, { "epoch": 1.1, "grad_norm": 0.552978515625, "learning_rate": 1.4630250401643055e-05, "loss": 0.0632, "step": 6572 }, { "epoch": 1.1, "grad_norm": 0.7508718967437744, "learning_rate": 1.4628649834402802e-05, "loss": 0.0622, "step": 6573 }, { "epoch": 1.1, "grad_norm": 0.672836184501648, "learning_rate": 1.4627049116239549e-05, "loss": 0.0629, "step": 6574 }, { "epoch": 1.1, "grad_norm": 0.45945975184440613, "learning_rate": 1.4625448247205495e-05, "loss": 0.0539, "step": 6575 }, { "epoch": 1.1, "grad_norm": 0.5896488428115845, "learning_rate": 1.462384722735284e-05, "loss": 0.0605, "step": 6576 }, { "epoch": 1.1, "grad_norm": 0.7127854228019714, "learning_rate": 1.4622246056733778e-05, "loss": 0.0631, "step": 6577 }, { "epoch": 1.1, "grad_norm": 0.581519603729248, "learning_rate": 1.4620644735400526e-05, "loss": 0.0483, "step": 6578 }, { "epoch": 1.1, "grad_norm": 0.6886776089668274, "learning_rate": 1.4619043263405294e-05, "loss": 0.0734, "step": 6579 }, { "epoch": 1.1, "grad_norm": 0.6592018008232117, "learning_rate": 1.46174416408003e-05, "loss": 0.0742, "step": 6580 }, { "epoch": 1.1, "grad_norm": 0.5109686851501465, "learning_rate": 1.461583986763777e-05, "loss": 0.0588, "step": 6581 }, { "epoch": 1.1, "grad_norm": 0.5075156092643738, "learning_rate": 1.461423794396993e-05, "loss": 0.0486, "step": 6582 }, { "epoch": 1.1, "grad_norm": 0.5685518980026245, "learning_rate": 1.4612635869849005e-05, "loss": 0.0638, "step": 6583 }, { "epoch": 1.1, "grad_norm": 0.552010178565979, "learning_rate": 1.4611033645327246e-05, "loss": 0.0626, "step": 6584 }, { "epoch": 1.1, "grad_norm": 0.5530622005462646, "learning_rate": 1.4609431270456886e-05, "loss": 0.0591, "step": 6585 }, { "epoch": 1.1, "grad_norm": 0.4463420808315277, "learning_rate": 1.4607828745290178e-05, "loss": 0.0518, "step": 6586 }, { "epoch": 1.1, "grad_norm": 0.40601497888565063, "learning_rate": 1.4606226069879368e-05, "loss": 0.0411, "step": 6587 }, { "epoch": 1.1, "grad_norm": 0.5356107950210571, "learning_rate": 1.4604623244276721e-05, "loss": 0.0469, "step": 6588 }, { "epoch": 1.1, "grad_norm": 0.9542414546012878, "learning_rate": 1.4603020268534493e-05, "loss": 0.0575, "step": 6589 }, { "epoch": 1.1, "grad_norm": 0.6577091217041016, "learning_rate": 1.4601417142704955e-05, "loss": 0.0724, "step": 6590 }, { "epoch": 1.1, "grad_norm": 0.5518650412559509, "learning_rate": 1.4599813866840375e-05, "loss": 0.0658, "step": 6591 }, { "epoch": 1.1, "grad_norm": 0.5795169472694397, "learning_rate": 1.4598210440993038e-05, "loss": 0.0764, "step": 6592 }, { "epoch": 1.1, "grad_norm": 0.6288238167762756, "learning_rate": 1.4596606865215214e-05, "loss": 0.0633, "step": 6593 }, { "epoch": 1.1, "grad_norm": 0.649140477180481, "learning_rate": 1.4595003139559198e-05, "loss": 0.0661, "step": 6594 }, { "epoch": 1.1, "grad_norm": 0.7073063850402832, "learning_rate": 1.4593399264077279e-05, "loss": 0.0596, "step": 6595 }, { "epoch": 1.1, "grad_norm": 0.6756022572517395, "learning_rate": 1.4591795238821754e-05, "loss": 0.0548, "step": 6596 }, { "epoch": 1.1, "grad_norm": 0.5264790058135986, "learning_rate": 1.459019106384492e-05, "loss": 0.0513, "step": 6597 }, { "epoch": 1.1, "grad_norm": 0.4738011658191681, "learning_rate": 1.4588586739199092e-05, "loss": 0.0716, "step": 6598 }, { "epoch": 1.1, "grad_norm": 0.5553362965583801, "learning_rate": 1.4586982264936571e-05, "loss": 0.0601, "step": 6599 }, { "epoch": 1.1, "grad_norm": 0.7035349011421204, "learning_rate": 1.4585377641109681e-05, "loss": 0.0699, "step": 6600 }, { "epoch": 1.1, "grad_norm": 0.6087133288383484, "learning_rate": 1.4583772867770737e-05, "loss": 0.0643, "step": 6601 }, { "epoch": 1.1, "grad_norm": 0.5272920727729797, "learning_rate": 1.4582167944972072e-05, "loss": 0.0541, "step": 6602 }, { "epoch": 1.1, "grad_norm": 0.5768681168556213, "learning_rate": 1.4580562872766008e-05, "loss": 0.0655, "step": 6603 }, { "epoch": 1.1, "grad_norm": 0.8351712226867676, "learning_rate": 1.4578957651204886e-05, "loss": 0.0685, "step": 6604 }, { "epoch": 1.1, "grad_norm": 0.5542676448822021, "learning_rate": 1.4577352280341044e-05, "loss": 0.0626, "step": 6605 }, { "epoch": 1.1, "grad_norm": 0.772632896900177, "learning_rate": 1.457574676022683e-05, "loss": 0.0692, "step": 6606 }, { "epoch": 1.11, "grad_norm": 0.47676628828048706, "learning_rate": 1.4574141090914586e-05, "loss": 0.0481, "step": 6607 }, { "epoch": 1.11, "grad_norm": 0.8131025433540344, "learning_rate": 1.4572535272456678e-05, "loss": 0.0737, "step": 6608 }, { "epoch": 1.11, "grad_norm": 1.0443518161773682, "learning_rate": 1.457092930490546e-05, "loss": 0.0787, "step": 6609 }, { "epoch": 1.11, "grad_norm": 0.42997223138809204, "learning_rate": 1.4569323188313296e-05, "loss": 0.0398, "step": 6610 }, { "epoch": 1.11, "grad_norm": 0.6192270517349243, "learning_rate": 1.4567716922732553e-05, "loss": 0.0703, "step": 6611 }, { "epoch": 1.11, "grad_norm": 0.4489087760448456, "learning_rate": 1.4566110508215611e-05, "loss": 0.0545, "step": 6612 }, { "epoch": 1.11, "grad_norm": 0.6284188032150269, "learning_rate": 1.4564503944814852e-05, "loss": 0.0539, "step": 6613 }, { "epoch": 1.11, "grad_norm": 0.42456451058387756, "learning_rate": 1.456289723258265e-05, "loss": 0.05, "step": 6614 }, { "epoch": 1.11, "grad_norm": 0.6440200209617615, "learning_rate": 1.4561290371571397e-05, "loss": 0.0634, "step": 6615 }, { "epoch": 1.11, "grad_norm": 0.4987553656101227, "learning_rate": 1.4559683361833492e-05, "loss": 0.0557, "step": 6616 }, { "epoch": 1.11, "grad_norm": 0.8207200765609741, "learning_rate": 1.455807620342133e-05, "loss": 0.0632, "step": 6617 }, { "epoch": 1.11, "grad_norm": 0.799513578414917, "learning_rate": 1.4556468896387315e-05, "loss": 0.0689, "step": 6618 }, { "epoch": 1.11, "grad_norm": 0.7442229390144348, "learning_rate": 1.4554861440783852e-05, "loss": 0.0613, "step": 6619 }, { "epoch": 1.11, "grad_norm": 0.6474069952964783, "learning_rate": 1.455325383666336e-05, "loss": 0.0747, "step": 6620 }, { "epoch": 1.11, "grad_norm": 0.5692200064659119, "learning_rate": 1.4551646084078253e-05, "loss": 0.058, "step": 6621 }, { "epoch": 1.11, "grad_norm": 0.6014395356178284, "learning_rate": 1.4550038183080954e-05, "loss": 0.061, "step": 6622 }, { "epoch": 1.11, "grad_norm": 0.6241312623023987, "learning_rate": 1.4548430133723895e-05, "loss": 0.0585, "step": 6623 }, { "epoch": 1.11, "grad_norm": 0.4747311770915985, "learning_rate": 1.4546821936059502e-05, "loss": 0.0504, "step": 6624 }, { "epoch": 1.11, "grad_norm": 0.6848382949829102, "learning_rate": 1.4545213590140214e-05, "loss": 0.0533, "step": 6625 }, { "epoch": 1.11, "grad_norm": 0.5313546061515808, "learning_rate": 1.4543605096018475e-05, "loss": 0.0487, "step": 6626 }, { "epoch": 1.11, "grad_norm": 0.7710586786270142, "learning_rate": 1.4541996453746734e-05, "loss": 0.0584, "step": 6627 }, { "epoch": 1.11, "grad_norm": 0.6722971796989441, "learning_rate": 1.4540387663377437e-05, "loss": 0.0565, "step": 6628 }, { "epoch": 1.11, "grad_norm": 0.6848035454750061, "learning_rate": 1.4538778724963047e-05, "loss": 0.0482, "step": 6629 }, { "epoch": 1.11, "grad_norm": 0.5903974771499634, "learning_rate": 1.4537169638556022e-05, "loss": 0.0593, "step": 6630 }, { "epoch": 1.11, "grad_norm": 0.4925454258918762, "learning_rate": 1.4535560404208826e-05, "loss": 0.0697, "step": 6631 }, { "epoch": 1.11, "grad_norm": 0.5552824139595032, "learning_rate": 1.4533951021973935e-05, "loss": 0.07, "step": 6632 }, { "epoch": 1.11, "grad_norm": 0.6506112217903137, "learning_rate": 1.4532341491903824e-05, "loss": 0.0638, "step": 6633 }, { "epoch": 1.11, "grad_norm": 0.7267345190048218, "learning_rate": 1.4530731814050971e-05, "loss": 0.0652, "step": 6634 }, { "epoch": 1.11, "grad_norm": 0.49970176815986633, "learning_rate": 1.4529121988467863e-05, "loss": 0.0538, "step": 6635 }, { "epoch": 1.11, "grad_norm": 0.8480633497238159, "learning_rate": 1.452751201520699e-05, "loss": 0.0606, "step": 6636 }, { "epoch": 1.11, "grad_norm": 0.4190010130405426, "learning_rate": 1.452590189432085e-05, "loss": 0.0356, "step": 6637 }, { "epoch": 1.11, "grad_norm": 0.46293920278549194, "learning_rate": 1.452429162586194e-05, "loss": 0.0608, "step": 6638 }, { "epoch": 1.11, "grad_norm": 0.7270894050598145, "learning_rate": 1.452268120988276e-05, "loss": 0.061, "step": 6639 }, { "epoch": 1.11, "grad_norm": 0.7357636094093323, "learning_rate": 1.4521070646435832e-05, "loss": 0.0556, "step": 6640 }, { "epoch": 1.11, "grad_norm": 0.603412389755249, "learning_rate": 1.4519459935573664e-05, "loss": 0.0488, "step": 6641 }, { "epoch": 1.11, "grad_norm": 0.560819685459137, "learning_rate": 1.4517849077348771e-05, "loss": 0.0681, "step": 6642 }, { "epoch": 1.11, "grad_norm": 0.6350914239883423, "learning_rate": 1.4516238071813682e-05, "loss": 0.0547, "step": 6643 }, { "epoch": 1.11, "grad_norm": 0.6660561561584473, "learning_rate": 1.4514626919020928e-05, "loss": 0.0573, "step": 6644 }, { "epoch": 1.11, "grad_norm": 1.0826306343078613, "learning_rate": 1.4513015619023036e-05, "loss": 0.0662, "step": 6645 }, { "epoch": 1.11, "grad_norm": 0.7468818426132202, "learning_rate": 1.4511404171872552e-05, "loss": 0.0572, "step": 6646 }, { "epoch": 1.11, "grad_norm": 0.6611214280128479, "learning_rate": 1.450979257762201e-05, "loss": 0.0671, "step": 6647 }, { "epoch": 1.11, "grad_norm": 0.5470431447029114, "learning_rate": 1.4508180836323969e-05, "loss": 0.0575, "step": 6648 }, { "epoch": 1.11, "grad_norm": 0.6943677663803101, "learning_rate": 1.4506568948030972e-05, "loss": 0.0617, "step": 6649 }, { "epoch": 1.11, "grad_norm": 0.424564927816391, "learning_rate": 1.4504956912795583e-05, "loss": 0.0457, "step": 6650 }, { "epoch": 1.11, "grad_norm": 0.5629873871803284, "learning_rate": 1.4503344730670364e-05, "loss": 0.0742, "step": 6651 }, { "epoch": 1.11, "grad_norm": 0.7270671129226685, "learning_rate": 1.4501732401707877e-05, "loss": 0.0765, "step": 6652 }, { "epoch": 1.11, "grad_norm": 0.9027171730995178, "learning_rate": 1.4500119925960701e-05, "loss": 0.0501, "step": 6653 }, { "epoch": 1.11, "grad_norm": 0.46847233176231384, "learning_rate": 1.4498507303481408e-05, "loss": 0.0627, "step": 6654 }, { "epoch": 1.11, "grad_norm": 0.625691294670105, "learning_rate": 1.4496894534322581e-05, "loss": 0.067, "step": 6655 }, { "epoch": 1.11, "grad_norm": 0.4732916057109833, "learning_rate": 1.4495281618536807e-05, "loss": 0.0602, "step": 6656 }, { "epoch": 1.11, "grad_norm": 0.5036839842796326, "learning_rate": 1.4493668556176677e-05, "loss": 0.0598, "step": 6657 }, { "epoch": 1.11, "grad_norm": 0.5753771066665649, "learning_rate": 1.4492055347294789e-05, "loss": 0.0524, "step": 6658 }, { "epoch": 1.11, "grad_norm": 0.5251813530921936, "learning_rate": 1.4490441991943735e-05, "loss": 0.0642, "step": 6659 }, { "epoch": 1.11, "grad_norm": 0.5302148461341858, "learning_rate": 1.4488828490176133e-05, "loss": 0.0619, "step": 6660 }, { "epoch": 1.11, "grad_norm": 0.6159641742706299, "learning_rate": 1.4487214842044583e-05, "loss": 0.068, "step": 6661 }, { "epoch": 1.11, "grad_norm": 0.5589789152145386, "learning_rate": 1.4485601047601707e-05, "loss": 0.0712, "step": 6662 }, { "epoch": 1.11, "grad_norm": 0.5554517507553101, "learning_rate": 1.4483987106900118e-05, "loss": 0.0638, "step": 6663 }, { "epoch": 1.11, "grad_norm": 0.6196103692054749, "learning_rate": 1.4482373019992449e-05, "loss": 0.0672, "step": 6664 }, { "epoch": 1.11, "grad_norm": 1.1324669122695923, "learning_rate": 1.4480758786931325e-05, "loss": 0.0515, "step": 6665 }, { "epoch": 1.11, "grad_norm": 0.45957115292549133, "learning_rate": 1.447914440776938e-05, "loss": 0.0539, "step": 6666 }, { "epoch": 1.12, "grad_norm": 0.8943498134613037, "learning_rate": 1.4477529882559246e-05, "loss": 0.0888, "step": 6667 }, { "epoch": 1.12, "grad_norm": 0.5594621300697327, "learning_rate": 1.4475915211353581e-05, "loss": 0.0529, "step": 6668 }, { "epoch": 1.12, "grad_norm": 0.7203047275543213, "learning_rate": 1.4474300394205025e-05, "loss": 0.0906, "step": 6669 }, { "epoch": 1.12, "grad_norm": 0.8309109210968018, "learning_rate": 1.4472685431166232e-05, "loss": 0.0597, "step": 6670 }, { "epoch": 1.12, "grad_norm": 0.4509604275226593, "learning_rate": 1.4471070322289855e-05, "loss": 0.0597, "step": 6671 }, { "epoch": 1.12, "grad_norm": 0.4772603511810303, "learning_rate": 1.4469455067628569e-05, "loss": 0.0466, "step": 6672 }, { "epoch": 1.12, "grad_norm": 0.5070797801017761, "learning_rate": 1.4467839667235031e-05, "loss": 0.0604, "step": 6673 }, { "epoch": 1.12, "grad_norm": 0.6447916030883789, "learning_rate": 1.4466224121161917e-05, "loss": 0.0637, "step": 6674 }, { "epoch": 1.12, "grad_norm": 0.6092429161071777, "learning_rate": 1.44646084294619e-05, "loss": 0.0577, "step": 6675 }, { "epoch": 1.12, "grad_norm": 0.5234747529029846, "learning_rate": 1.4462992592187667e-05, "loss": 0.0599, "step": 6676 }, { "epoch": 1.12, "grad_norm": 0.5230962038040161, "learning_rate": 1.4461376609391904e-05, "loss": 0.0615, "step": 6677 }, { "epoch": 1.12, "grad_norm": 0.5198740363121033, "learning_rate": 1.4459760481127298e-05, "loss": 0.0738, "step": 6678 }, { "epoch": 1.12, "grad_norm": 0.47287195920944214, "learning_rate": 1.4458144207446548e-05, "loss": 0.0474, "step": 6679 }, { "epoch": 1.12, "grad_norm": 0.5251888632774353, "learning_rate": 1.4456527788402355e-05, "loss": 0.0845, "step": 6680 }, { "epoch": 1.12, "grad_norm": 0.6904727220535278, "learning_rate": 1.4454911224047424e-05, "loss": 0.0767, "step": 6681 }, { "epoch": 1.12, "grad_norm": 0.5413066148757935, "learning_rate": 1.4453294514434462e-05, "loss": 0.0616, "step": 6682 }, { "epoch": 1.12, "grad_norm": 0.5039079785346985, "learning_rate": 1.4451677659616187e-05, "loss": 0.059, "step": 6683 }, { "epoch": 1.12, "grad_norm": 0.6982630491256714, "learning_rate": 1.445006065964532e-05, "loss": 0.0624, "step": 6684 }, { "epoch": 1.12, "grad_norm": 0.5306962132453918, "learning_rate": 1.4448443514574582e-05, "loss": 0.0565, "step": 6685 }, { "epoch": 1.12, "grad_norm": 0.5584259033203125, "learning_rate": 1.4446826224456707e-05, "loss": 0.0486, "step": 6686 }, { "epoch": 1.12, "grad_norm": 0.5584192276000977, "learning_rate": 1.444520878934442e-05, "loss": 0.0678, "step": 6687 }, { "epoch": 1.12, "grad_norm": 0.6554821729660034, "learning_rate": 1.4443591209290466e-05, "loss": 0.0516, "step": 6688 }, { "epoch": 1.12, "grad_norm": 0.5301254391670227, "learning_rate": 1.444197348434759e-05, "loss": 0.057, "step": 6689 }, { "epoch": 1.12, "grad_norm": 0.8512237071990967, "learning_rate": 1.4440355614568536e-05, "loss": 0.0495, "step": 6690 }, { "epoch": 1.12, "grad_norm": 0.6612192392349243, "learning_rate": 1.4438737600006055e-05, "loss": 0.0752, "step": 6691 }, { "epoch": 1.12, "grad_norm": 0.5364535450935364, "learning_rate": 1.4437119440712907e-05, "loss": 0.061, "step": 6692 }, { "epoch": 1.12, "grad_norm": 0.7395131587982178, "learning_rate": 1.4435501136741859e-05, "loss": 0.0646, "step": 6693 }, { "epoch": 1.12, "grad_norm": 0.5024904012680054, "learning_rate": 1.4433882688145668e-05, "loss": 0.0649, "step": 6694 }, { "epoch": 1.12, "grad_norm": 0.50212162733078, "learning_rate": 1.4432264094977114e-05, "loss": 0.0654, "step": 6695 }, { "epoch": 1.12, "grad_norm": 0.39205682277679443, "learning_rate": 1.4430645357288967e-05, "loss": 0.0415, "step": 6696 }, { "epoch": 1.12, "grad_norm": 0.6981784701347351, "learning_rate": 1.4429026475134014e-05, "loss": 0.0427, "step": 6697 }, { "epoch": 1.12, "grad_norm": 0.43535906076431274, "learning_rate": 1.4427407448565036e-05, "loss": 0.0555, "step": 6698 }, { "epoch": 1.12, "grad_norm": 0.512384295463562, "learning_rate": 1.4425788277634824e-05, "loss": 0.0618, "step": 6699 }, { "epoch": 1.12, "grad_norm": 0.5883331298828125, "learning_rate": 1.4424168962396179e-05, "loss": 0.061, "step": 6700 }, { "epoch": 1.12, "grad_norm": 0.6871927976608276, "learning_rate": 1.442254950290189e-05, "loss": 0.0422, "step": 6701 }, { "epoch": 1.12, "grad_norm": 0.6523765921592712, "learning_rate": 1.4420929899204771e-05, "loss": 0.0542, "step": 6702 }, { "epoch": 1.12, "grad_norm": 0.5730792284011841, "learning_rate": 1.4419310151357623e-05, "loss": 0.0743, "step": 6703 }, { "epoch": 1.12, "grad_norm": 0.5094913840293884, "learning_rate": 1.4417690259413273e-05, "loss": 0.0662, "step": 6704 }, { "epoch": 1.12, "grad_norm": 0.5501196384429932, "learning_rate": 1.4416070223424527e-05, "loss": 0.0508, "step": 6705 }, { "epoch": 1.12, "grad_norm": 0.49763989448547363, "learning_rate": 1.4414450043444212e-05, "loss": 0.0586, "step": 6706 }, { "epoch": 1.12, "grad_norm": 0.4048677980899811, "learning_rate": 1.4412829719525157e-05, "loss": 0.0518, "step": 6707 }, { "epoch": 1.12, "grad_norm": 0.6404078602790833, "learning_rate": 1.4411209251720196e-05, "loss": 0.058, "step": 6708 }, { "epoch": 1.12, "grad_norm": 0.7289262413978577, "learning_rate": 1.4409588640082162e-05, "loss": 0.0625, "step": 6709 }, { "epoch": 1.12, "grad_norm": 0.4273766279220581, "learning_rate": 1.44079678846639e-05, "loss": 0.049, "step": 6710 }, { "epoch": 1.12, "grad_norm": 0.6476860642433167, "learning_rate": 1.440634698551826e-05, "loss": 0.0577, "step": 6711 }, { "epoch": 1.12, "grad_norm": 0.5420938730239868, "learning_rate": 1.4404725942698088e-05, "loss": 0.0774, "step": 6712 }, { "epoch": 1.12, "grad_norm": 0.46613216400146484, "learning_rate": 1.440310475625624e-05, "loss": 0.0636, "step": 6713 }, { "epoch": 1.12, "grad_norm": 0.5561772584915161, "learning_rate": 1.4401483426245584e-05, "loss": 0.0726, "step": 6714 }, { "epoch": 1.12, "grad_norm": 0.6501024961471558, "learning_rate": 1.4399861952718977e-05, "loss": 0.0655, "step": 6715 }, { "epoch": 1.12, "grad_norm": 0.5836576819419861, "learning_rate": 1.4398240335729294e-05, "loss": 0.0607, "step": 6716 }, { "epoch": 1.12, "grad_norm": 0.787457287311554, "learning_rate": 1.4396618575329409e-05, "loss": 0.056, "step": 6717 }, { "epoch": 1.12, "grad_norm": 0.41724351048469543, "learning_rate": 1.4394996671572203e-05, "loss": 0.0809, "step": 6718 }, { "epoch": 1.12, "grad_norm": 0.8782228827476501, "learning_rate": 1.4393374624510554e-05, "loss": 0.1017, "step": 6719 }, { "epoch": 1.12, "grad_norm": 0.5496443510055542, "learning_rate": 1.439175243419736e-05, "loss": 0.0588, "step": 6720 }, { "epoch": 1.12, "grad_norm": 0.48620375990867615, "learning_rate": 1.4390130100685509e-05, "loss": 0.0498, "step": 6721 }, { "epoch": 1.12, "grad_norm": 0.39393675327301025, "learning_rate": 1.4388507624027899e-05, "loss": 0.0481, "step": 6722 }, { "epoch": 1.12, "grad_norm": 0.6780629754066467, "learning_rate": 1.4386885004277433e-05, "loss": 0.0567, "step": 6723 }, { "epoch": 1.12, "grad_norm": 0.65293288230896, "learning_rate": 1.4385262241487022e-05, "loss": 0.0864, "step": 6724 }, { "epoch": 1.12, "grad_norm": 0.4402940571308136, "learning_rate": 1.4383639335709576e-05, "loss": 0.0645, "step": 6725 }, { "epoch": 1.12, "grad_norm": 0.5673407912254333, "learning_rate": 1.4382016286998011e-05, "loss": 0.0701, "step": 6726 }, { "epoch": 1.13, "grad_norm": 0.4979539215564728, "learning_rate": 1.4380393095405247e-05, "loss": 0.0483, "step": 6727 }, { "epoch": 1.13, "grad_norm": 0.4776197671890259, "learning_rate": 1.4378769760984219e-05, "loss": 0.0717, "step": 6728 }, { "epoch": 1.13, "grad_norm": 0.48712557554244995, "learning_rate": 1.4377146283787848e-05, "loss": 0.0603, "step": 6729 }, { "epoch": 1.13, "grad_norm": 0.593605637550354, "learning_rate": 1.4375522663869072e-05, "loss": 0.0767, "step": 6730 }, { "epoch": 1.13, "grad_norm": 1.2227449417114258, "learning_rate": 1.4373898901280831e-05, "loss": 0.0977, "step": 6731 }, { "epoch": 1.13, "grad_norm": 0.5539222955703735, "learning_rate": 1.4372274996076074e-05, "loss": 0.0506, "step": 6732 }, { "epoch": 1.13, "grad_norm": 0.5312293767929077, "learning_rate": 1.4370650948307746e-05, "loss": 0.0545, "step": 6733 }, { "epoch": 1.13, "grad_norm": 0.4922032356262207, "learning_rate": 1.4369026758028806e-05, "loss": 0.0526, "step": 6734 }, { "epoch": 1.13, "grad_norm": 0.5873792767524719, "learning_rate": 1.4367402425292207e-05, "loss": 0.0664, "step": 6735 }, { "epoch": 1.13, "grad_norm": 0.5797178745269775, "learning_rate": 1.4365777950150914e-05, "loss": 0.0692, "step": 6736 }, { "epoch": 1.13, "grad_norm": 0.639094352722168, "learning_rate": 1.4364153332657896e-05, "loss": 0.0612, "step": 6737 }, { "epoch": 1.13, "grad_norm": 0.38986843824386597, "learning_rate": 1.4362528572866126e-05, "loss": 0.0576, "step": 6738 }, { "epoch": 1.13, "grad_norm": 0.469247967004776, "learning_rate": 1.4360903670828583e-05, "loss": 0.0493, "step": 6739 }, { "epoch": 1.13, "grad_norm": 0.6261418461799622, "learning_rate": 1.4359278626598245e-05, "loss": 0.0728, "step": 6740 }, { "epoch": 1.13, "grad_norm": 0.5081225037574768, "learning_rate": 1.4357653440228101e-05, "loss": 0.0538, "step": 6741 }, { "epoch": 1.13, "grad_norm": 0.7901596426963806, "learning_rate": 1.4356028111771143e-05, "loss": 0.0599, "step": 6742 }, { "epoch": 1.13, "grad_norm": 0.5618782043457031, "learning_rate": 1.4354402641280365e-05, "loss": 0.0632, "step": 6743 }, { "epoch": 1.13, "grad_norm": 0.4739299416542053, "learning_rate": 1.435277702880877e-05, "loss": 0.0521, "step": 6744 }, { "epoch": 1.13, "grad_norm": 0.7167664170265198, "learning_rate": 1.4351151274409359e-05, "loss": 0.078, "step": 6745 }, { "epoch": 1.13, "grad_norm": 0.6620738506317139, "learning_rate": 1.4349525378135146e-05, "loss": 0.0651, "step": 6746 }, { "epoch": 1.13, "grad_norm": 0.4989062249660492, "learning_rate": 1.434789934003914e-05, "loss": 0.0718, "step": 6747 }, { "epoch": 1.13, "grad_norm": 0.46663612127304077, "learning_rate": 1.4346273160174367e-05, "loss": 0.0571, "step": 6748 }, { "epoch": 1.13, "grad_norm": 0.4334481954574585, "learning_rate": 1.434464683859385e-05, "loss": 0.0451, "step": 6749 }, { "epoch": 1.13, "grad_norm": 0.5088945627212524, "learning_rate": 1.4343020375350612e-05, "loss": 0.0583, "step": 6750 }, { "epoch": 1.13, "grad_norm": 0.4874979853630066, "learning_rate": 1.4341393770497685e-05, "loss": 0.0563, "step": 6751 }, { "epoch": 1.13, "grad_norm": 0.6869795918464661, "learning_rate": 1.4339767024088114e-05, "loss": 0.0572, "step": 6752 }, { "epoch": 1.13, "grad_norm": 0.3887101411819458, "learning_rate": 1.4338140136174938e-05, "loss": 0.0597, "step": 6753 }, { "epoch": 1.13, "grad_norm": 0.8268141150474548, "learning_rate": 1.43365131068112e-05, "loss": 0.0544, "step": 6754 }, { "epoch": 1.13, "grad_norm": 0.4957191050052643, "learning_rate": 1.4334885936049957e-05, "loss": 0.0602, "step": 6755 }, { "epoch": 1.13, "grad_norm": 0.5430400967597961, "learning_rate": 1.4333258623944261e-05, "loss": 0.0615, "step": 6756 }, { "epoch": 1.13, "grad_norm": 0.4899424910545349, "learning_rate": 1.4331631170547173e-05, "loss": 0.0486, "step": 6757 }, { "epoch": 1.13, "grad_norm": 0.4615474343299866, "learning_rate": 1.4330003575911761e-05, "loss": 0.0787, "step": 6758 }, { "epoch": 1.13, "grad_norm": 0.6863395571708679, "learning_rate": 1.4328375840091089e-05, "loss": 0.0727, "step": 6759 }, { "epoch": 1.13, "grad_norm": 0.43017420172691345, "learning_rate": 1.4326747963138242e-05, "loss": 0.047, "step": 6760 }, { "epoch": 1.13, "grad_norm": 0.47195860743522644, "learning_rate": 1.4325119945106287e-05, "loss": 0.0494, "step": 6761 }, { "epoch": 1.13, "grad_norm": 0.6612123250961304, "learning_rate": 1.4323491786048313e-05, "loss": 0.0657, "step": 6762 }, { "epoch": 1.13, "grad_norm": 0.6632274985313416, "learning_rate": 1.4321863486017408e-05, "loss": 0.0787, "step": 6763 }, { "epoch": 1.13, "grad_norm": 0.7310505509376526, "learning_rate": 1.4320235045066669e-05, "loss": 0.0588, "step": 6764 }, { "epoch": 1.13, "grad_norm": 0.6695038676261902, "learning_rate": 1.4318606463249185e-05, "loss": 0.0876, "step": 6765 }, { "epoch": 1.13, "grad_norm": 0.6989794969558716, "learning_rate": 1.4316977740618061e-05, "loss": 0.0741, "step": 6766 }, { "epoch": 1.13, "grad_norm": 0.7541155815124512, "learning_rate": 1.4315348877226407e-05, "loss": 0.0833, "step": 6767 }, { "epoch": 1.13, "grad_norm": 0.5685365796089172, "learning_rate": 1.4313719873127332e-05, "loss": 0.0706, "step": 6768 }, { "epoch": 1.13, "grad_norm": 0.4896821677684784, "learning_rate": 1.431209072837395e-05, "loss": 0.0606, "step": 6769 }, { "epoch": 1.13, "grad_norm": 0.4242500364780426, "learning_rate": 1.4310461443019388e-05, "loss": 0.0447, "step": 6770 }, { "epoch": 1.13, "grad_norm": 0.4041525423526764, "learning_rate": 1.4308832017116759e-05, "loss": 0.0453, "step": 6771 }, { "epoch": 1.13, "grad_norm": 0.6354022026062012, "learning_rate": 1.4307202450719202e-05, "loss": 0.0503, "step": 6772 }, { "epoch": 1.13, "grad_norm": 0.5629956126213074, "learning_rate": 1.4305572743879848e-05, "loss": 0.0649, "step": 6773 }, { "epoch": 1.13, "grad_norm": 0.5884623527526855, "learning_rate": 1.430394289665184e-05, "loss": 0.0638, "step": 6774 }, { "epoch": 1.13, "grad_norm": 0.600109875202179, "learning_rate": 1.4302312909088312e-05, "loss": 0.0623, "step": 6775 }, { "epoch": 1.13, "grad_norm": 0.5240516662597656, "learning_rate": 1.4300682781242419e-05, "loss": 0.0632, "step": 6776 }, { "epoch": 1.13, "grad_norm": 0.42611631751060486, "learning_rate": 1.4299052513167314e-05, "loss": 0.0442, "step": 6777 }, { "epoch": 1.13, "grad_norm": 0.4319426715373993, "learning_rate": 1.4297422104916147e-05, "loss": 0.0521, "step": 6778 }, { "epoch": 1.13, "grad_norm": 0.5359275341033936, "learning_rate": 1.4295791556542085e-05, "loss": 0.0624, "step": 6779 }, { "epoch": 1.13, "grad_norm": 0.515360951423645, "learning_rate": 1.4294160868098294e-05, "loss": 0.0541, "step": 6780 }, { "epoch": 1.13, "grad_norm": 0.5468793511390686, "learning_rate": 1.4292530039637945e-05, "loss": 0.0489, "step": 6781 }, { "epoch": 1.13, "grad_norm": 0.5473506450653076, "learning_rate": 1.429089907121421e-05, "loss": 0.0704, "step": 6782 }, { "epoch": 1.13, "grad_norm": 0.4678751230239868, "learning_rate": 1.4289267962880272e-05, "loss": 0.0558, "step": 6783 }, { "epoch": 1.13, "grad_norm": 0.507336437702179, "learning_rate": 1.4287636714689315e-05, "loss": 0.0497, "step": 6784 }, { "epoch": 1.13, "grad_norm": 0.5841175317764282, "learning_rate": 1.4286005326694525e-05, "loss": 0.0669, "step": 6785 }, { "epoch": 1.13, "grad_norm": 0.48813509941101074, "learning_rate": 1.4284373798949099e-05, "loss": 0.0541, "step": 6786 }, { "epoch": 1.14, "grad_norm": 0.6100643873214722, "learning_rate": 1.428274213150623e-05, "loss": 0.0743, "step": 6787 }, { "epoch": 1.14, "grad_norm": 0.4658578038215637, "learning_rate": 1.428111032441913e-05, "loss": 0.0574, "step": 6788 }, { "epoch": 1.14, "grad_norm": 0.5748389959335327, "learning_rate": 1.4279478377740996e-05, "loss": 0.0671, "step": 6789 }, { "epoch": 1.14, "grad_norm": 0.5695223212242126, "learning_rate": 1.4277846291525045e-05, "loss": 0.0499, "step": 6790 }, { "epoch": 1.14, "grad_norm": 0.5137593150138855, "learning_rate": 1.4276214065824491e-05, "loss": 0.0746, "step": 6791 }, { "epoch": 1.14, "grad_norm": 0.5509438514709473, "learning_rate": 1.4274581700692558e-05, "loss": 0.0525, "step": 6792 }, { "epoch": 1.14, "grad_norm": 0.3863549828529358, "learning_rate": 1.4272949196182469e-05, "loss": 0.0536, "step": 6793 }, { "epoch": 1.14, "grad_norm": 0.42985036969184875, "learning_rate": 1.4271316552347455e-05, "loss": 0.0549, "step": 6794 }, { "epoch": 1.14, "grad_norm": 0.9208225607872009, "learning_rate": 1.4269683769240747e-05, "loss": 0.0675, "step": 6795 }, { "epoch": 1.14, "grad_norm": 0.6528892517089844, "learning_rate": 1.4268050846915589e-05, "loss": 0.0448, "step": 6796 }, { "epoch": 1.14, "grad_norm": 0.6289474964141846, "learning_rate": 1.4266417785425221e-05, "loss": 0.0662, "step": 6797 }, { "epoch": 1.14, "grad_norm": 0.9513221979141235, "learning_rate": 1.4264784584822894e-05, "loss": 0.0584, "step": 6798 }, { "epoch": 1.14, "grad_norm": 0.46766215562820435, "learning_rate": 1.4263151245161855e-05, "loss": 0.0525, "step": 6799 }, { "epoch": 1.14, "grad_norm": 0.600067138671875, "learning_rate": 1.4261517766495368e-05, "loss": 0.0693, "step": 6800 }, { "epoch": 1.14, "grad_norm": 0.4428669810295105, "learning_rate": 1.4259884148876688e-05, "loss": 0.0664, "step": 6801 }, { "epoch": 1.14, "grad_norm": 0.5740749835968018, "learning_rate": 1.425825039235909e-05, "loss": 0.0652, "step": 6802 }, { "epoch": 1.14, "grad_norm": 0.4975683093070984, "learning_rate": 1.4256616496995833e-05, "loss": 0.0532, "step": 6803 }, { "epoch": 1.14, "grad_norm": 0.6107968688011169, "learning_rate": 1.4254982462840204e-05, "loss": 0.0639, "step": 6804 }, { "epoch": 1.14, "grad_norm": 0.6258834600448608, "learning_rate": 1.4253348289945477e-05, "loss": 0.0913, "step": 6805 }, { "epoch": 1.14, "grad_norm": 1.295201301574707, "learning_rate": 1.4251713978364933e-05, "loss": 0.0518, "step": 6806 }, { "epoch": 1.14, "grad_norm": 0.5581960082054138, "learning_rate": 1.4250079528151863e-05, "loss": 0.0676, "step": 6807 }, { "epoch": 1.14, "grad_norm": 0.5688300728797913, "learning_rate": 1.4248444939359567e-05, "loss": 0.0538, "step": 6808 }, { "epoch": 1.14, "grad_norm": 0.5719844102859497, "learning_rate": 1.4246810212041337e-05, "loss": 0.0485, "step": 6809 }, { "epoch": 1.14, "grad_norm": 0.6640146374702454, "learning_rate": 1.4245175346250472e-05, "loss": 0.0533, "step": 6810 }, { "epoch": 1.14, "grad_norm": 0.6216385364532471, "learning_rate": 1.4243540342040286e-05, "loss": 0.0605, "step": 6811 }, { "epoch": 1.14, "grad_norm": 0.4308044910430908, "learning_rate": 1.4241905199464088e-05, "loss": 0.0526, "step": 6812 }, { "epoch": 1.14, "grad_norm": 0.705818772315979, "learning_rate": 1.424026991857519e-05, "loss": 0.0642, "step": 6813 }, { "epoch": 1.14, "grad_norm": 1.0780422687530518, "learning_rate": 1.4238634499426918e-05, "loss": 0.0718, "step": 6814 }, { "epoch": 1.14, "grad_norm": 0.620692789554596, "learning_rate": 1.4236998942072592e-05, "loss": 0.0575, "step": 6815 }, { "epoch": 1.14, "grad_norm": 0.7255431413650513, "learning_rate": 1.4235363246565546e-05, "loss": 0.0877, "step": 6816 }, { "epoch": 1.14, "grad_norm": 0.7027849555015564, "learning_rate": 1.423372741295911e-05, "loss": 0.0697, "step": 6817 }, { "epoch": 1.14, "grad_norm": 0.5362290740013123, "learning_rate": 1.4232091441306626e-05, "loss": 0.0625, "step": 6818 }, { "epoch": 1.14, "grad_norm": 0.5980563163757324, "learning_rate": 1.4230455331661433e-05, "loss": 0.0637, "step": 6819 }, { "epoch": 1.14, "grad_norm": 0.5998128652572632, "learning_rate": 1.4228819084076882e-05, "loss": 0.0704, "step": 6820 }, { "epoch": 1.14, "grad_norm": 0.8166725635528564, "learning_rate": 1.4227182698606323e-05, "loss": 0.0818, "step": 6821 }, { "epoch": 1.14, "grad_norm": 0.7770459651947021, "learning_rate": 1.4225546175303113e-05, "loss": 0.0654, "step": 6822 }, { "epoch": 1.14, "grad_norm": 0.4527991712093353, "learning_rate": 1.4223909514220613e-05, "loss": 0.0395, "step": 6823 }, { "epoch": 1.14, "grad_norm": 0.5214488506317139, "learning_rate": 1.4222272715412187e-05, "loss": 0.0642, "step": 6824 }, { "epoch": 1.14, "grad_norm": 0.638821005821228, "learning_rate": 1.4220635778931207e-05, "loss": 0.0779, "step": 6825 }, { "epoch": 1.14, "grad_norm": 0.7045202255249023, "learning_rate": 1.4218998704831049e-05, "loss": 0.0709, "step": 6826 }, { "epoch": 1.14, "grad_norm": 0.9409793615341187, "learning_rate": 1.4217361493165083e-05, "loss": 0.0595, "step": 6827 }, { "epoch": 1.14, "grad_norm": 0.5441760420799255, "learning_rate": 1.4215724143986705e-05, "loss": 0.0625, "step": 6828 }, { "epoch": 1.14, "grad_norm": 0.5398804545402527, "learning_rate": 1.4214086657349294e-05, "loss": 0.0569, "step": 6829 }, { "epoch": 1.14, "grad_norm": 0.6290777325630188, "learning_rate": 1.4212449033306246e-05, "loss": 0.0691, "step": 6830 }, { "epoch": 1.14, "grad_norm": 0.7492220401763916, "learning_rate": 1.4210811271910954e-05, "loss": 0.0797, "step": 6831 }, { "epoch": 1.14, "grad_norm": 0.7707051038742065, "learning_rate": 1.4209173373216822e-05, "loss": 0.0735, "step": 6832 }, { "epoch": 1.14, "grad_norm": 0.45694950222969055, "learning_rate": 1.420753533727726e-05, "loss": 0.0579, "step": 6833 }, { "epoch": 1.14, "grad_norm": 0.36507654190063477, "learning_rate": 1.4205897164145672e-05, "loss": 0.0458, "step": 6834 }, { "epoch": 1.14, "grad_norm": 0.42471829056739807, "learning_rate": 1.4204258853875471e-05, "loss": 0.0512, "step": 6835 }, { "epoch": 1.14, "grad_norm": 0.5164762139320374, "learning_rate": 1.4202620406520085e-05, "loss": 0.0631, "step": 6836 }, { "epoch": 1.14, "grad_norm": 0.4903658330440521, "learning_rate": 1.4200981822132931e-05, "loss": 0.0613, "step": 6837 }, { "epoch": 1.14, "grad_norm": 0.5459798574447632, "learning_rate": 1.419934310076744e-05, "loss": 0.0832, "step": 6838 }, { "epoch": 1.14, "grad_norm": 0.5879098773002625, "learning_rate": 1.419770424247704e-05, "loss": 0.0691, "step": 6839 }, { "epoch": 1.14, "grad_norm": 0.7299372553825378, "learning_rate": 1.4196065247315174e-05, "loss": 0.0839, "step": 6840 }, { "epoch": 1.14, "grad_norm": 0.5087147355079651, "learning_rate": 1.4194426115335281e-05, "loss": 0.0767, "step": 6841 }, { "epoch": 1.14, "grad_norm": 0.5077140927314758, "learning_rate": 1.4192786846590805e-05, "loss": 0.0529, "step": 6842 }, { "epoch": 1.14, "grad_norm": 1.3962814807891846, "learning_rate": 1.4191147441135198e-05, "loss": 0.0661, "step": 6843 }, { "epoch": 1.14, "grad_norm": 0.6627390384674072, "learning_rate": 1.4189507899021915e-05, "loss": 0.059, "step": 6844 }, { "epoch": 1.14, "grad_norm": 0.5585137605667114, "learning_rate": 1.4187868220304417e-05, "loss": 0.0668, "step": 6845 }, { "epoch": 1.15, "grad_norm": 0.4900992512702942, "learning_rate": 1.4186228405036166e-05, "loss": 0.0559, "step": 6846 }, { "epoch": 1.15, "grad_norm": 0.5296323299407959, "learning_rate": 1.4184588453270629e-05, "loss": 0.0496, "step": 6847 }, { "epoch": 1.15, "grad_norm": 0.32937994599342346, "learning_rate": 1.4182948365061282e-05, "loss": 0.0458, "step": 6848 }, { "epoch": 1.15, "grad_norm": 0.5024797916412354, "learning_rate": 1.4181308140461601e-05, "loss": 0.0626, "step": 6849 }, { "epoch": 1.15, "grad_norm": 0.6518442630767822, "learning_rate": 1.4179667779525064e-05, "loss": 0.0591, "step": 6850 }, { "epoch": 1.15, "grad_norm": 0.5198506712913513, "learning_rate": 1.4178027282305162e-05, "loss": 0.0562, "step": 6851 }, { "epoch": 1.15, "grad_norm": 0.565609335899353, "learning_rate": 1.4176386648855385e-05, "loss": 0.064, "step": 6852 }, { "epoch": 1.15, "grad_norm": 0.40105047821998596, "learning_rate": 1.4174745879229223e-05, "loss": 0.0584, "step": 6853 }, { "epoch": 1.15, "grad_norm": 0.6000922322273254, "learning_rate": 1.4173104973480185e-05, "loss": 0.0704, "step": 6854 }, { "epoch": 1.15, "grad_norm": 0.4950525164604187, "learning_rate": 1.4171463931661763e-05, "loss": 0.0717, "step": 6855 }, { "epoch": 1.15, "grad_norm": 0.603092610836029, "learning_rate": 1.416982275382747e-05, "loss": 0.0493, "step": 6856 }, { "epoch": 1.15, "grad_norm": 0.6172778010368347, "learning_rate": 1.4168181440030822e-05, "loss": 0.0717, "step": 6857 }, { "epoch": 1.15, "grad_norm": 0.7524190545082092, "learning_rate": 1.4166539990325337e-05, "loss": 0.0704, "step": 6858 }, { "epoch": 1.15, "grad_norm": 0.5521812438964844, "learning_rate": 1.4164898404764526e-05, "loss": 0.0649, "step": 6859 }, { "epoch": 1.15, "grad_norm": 0.5915570855140686, "learning_rate": 1.4163256683401927e-05, "loss": 0.0578, "step": 6860 }, { "epoch": 1.15, "grad_norm": 0.5788470506668091, "learning_rate": 1.4161614826291069e-05, "loss": 0.0508, "step": 6861 }, { "epoch": 1.15, "grad_norm": 0.7502289414405823, "learning_rate": 1.4159972833485479e-05, "loss": 0.0508, "step": 6862 }, { "epoch": 1.15, "grad_norm": 1.244128704071045, "learning_rate": 1.4158330705038697e-05, "loss": 0.0583, "step": 6863 }, { "epoch": 1.15, "grad_norm": 0.7949538230895996, "learning_rate": 1.4156688441004275e-05, "loss": 0.0644, "step": 6864 }, { "epoch": 1.15, "grad_norm": 0.5437282919883728, "learning_rate": 1.4155046041435757e-05, "loss": 0.0746, "step": 6865 }, { "epoch": 1.15, "grad_norm": 0.3993186950683594, "learning_rate": 1.4153403506386693e-05, "loss": 0.0493, "step": 6866 }, { "epoch": 1.15, "grad_norm": 0.8066571354866028, "learning_rate": 1.4151760835910644e-05, "loss": 0.0669, "step": 6867 }, { "epoch": 1.15, "grad_norm": 0.6313014626502991, "learning_rate": 1.4150118030061168e-05, "loss": 0.0483, "step": 6868 }, { "epoch": 1.15, "grad_norm": 0.48561984300613403, "learning_rate": 1.4148475088891834e-05, "loss": 0.0582, "step": 6869 }, { "epoch": 1.15, "grad_norm": 0.6385293006896973, "learning_rate": 1.4146832012456208e-05, "loss": 0.0627, "step": 6870 }, { "epoch": 1.15, "grad_norm": 0.3968343734741211, "learning_rate": 1.4145188800807868e-05, "loss": 0.0403, "step": 6871 }, { "epoch": 1.15, "grad_norm": 0.5025259852409363, "learning_rate": 1.414354545400039e-05, "loss": 0.0675, "step": 6872 }, { "epoch": 1.15, "grad_norm": 0.46979182958602905, "learning_rate": 1.4141901972087362e-05, "loss": 0.0571, "step": 6873 }, { "epoch": 1.15, "grad_norm": 0.8208833932876587, "learning_rate": 1.4140258355122367e-05, "loss": 0.0539, "step": 6874 }, { "epoch": 1.15, "grad_norm": 0.4838447868824005, "learning_rate": 1.4138614603159e-05, "loss": 0.048, "step": 6875 }, { "epoch": 1.15, "grad_norm": 0.5274282097816467, "learning_rate": 1.4136970716250857e-05, "loss": 0.0493, "step": 6876 }, { "epoch": 1.15, "grad_norm": 0.6079912781715393, "learning_rate": 1.4135326694451539e-05, "loss": 0.0723, "step": 6877 }, { "epoch": 1.15, "grad_norm": 0.5567020773887634, "learning_rate": 1.4133682537814653e-05, "loss": 0.0718, "step": 6878 }, { "epoch": 1.15, "grad_norm": 0.7307789325714111, "learning_rate": 1.4132038246393804e-05, "loss": 0.0599, "step": 6879 }, { "epoch": 1.15, "grad_norm": 0.5060835480690002, "learning_rate": 1.4130393820242612e-05, "loss": 0.0496, "step": 6880 }, { "epoch": 1.15, "grad_norm": 0.45540323853492737, "learning_rate": 1.4128749259414692e-05, "loss": 0.0569, "step": 6881 }, { "epoch": 1.15, "grad_norm": 0.7702581286430359, "learning_rate": 1.4127104563963667e-05, "loss": 0.0695, "step": 6882 }, { "epoch": 1.15, "grad_norm": 0.5413158535957336, "learning_rate": 1.4125459733943166e-05, "loss": 0.0636, "step": 6883 }, { "epoch": 1.15, "grad_norm": 0.619073212146759, "learning_rate": 1.4123814769406821e-05, "loss": 0.0715, "step": 6884 }, { "epoch": 1.15, "grad_norm": 0.6524409055709839, "learning_rate": 1.4122169670408265e-05, "loss": 0.0968, "step": 6885 }, { "epoch": 1.15, "grad_norm": 0.4987274408340454, "learning_rate": 1.4120524437001144e-05, "loss": 0.0476, "step": 6886 }, { "epoch": 1.15, "grad_norm": 1.021979808807373, "learning_rate": 1.4118879069239096e-05, "loss": 0.0737, "step": 6887 }, { "epoch": 1.15, "grad_norm": 0.5180621147155762, "learning_rate": 1.4117233567175777e-05, "loss": 0.0585, "step": 6888 }, { "epoch": 1.15, "grad_norm": 0.5371472835540771, "learning_rate": 1.4115587930864838e-05, "loss": 0.0813, "step": 6889 }, { "epoch": 1.15, "grad_norm": 0.5991894602775574, "learning_rate": 1.4113942160359934e-05, "loss": 0.0604, "step": 6890 }, { "epoch": 1.15, "grad_norm": 0.5049927830696106, "learning_rate": 1.411229625571473e-05, "loss": 0.0517, "step": 6891 }, { "epoch": 1.15, "grad_norm": 0.6662359833717346, "learning_rate": 1.4110650216982895e-05, "loss": 0.0617, "step": 6892 }, { "epoch": 1.15, "grad_norm": 0.495417982339859, "learning_rate": 1.41090040442181e-05, "loss": 0.0632, "step": 6893 }, { "epoch": 1.15, "grad_norm": 0.41635027527809143, "learning_rate": 1.4107357737474017e-05, "loss": 0.0403, "step": 6894 }, { "epoch": 1.15, "grad_norm": 0.6418130397796631, "learning_rate": 1.4105711296804327e-05, "loss": 0.0707, "step": 6895 }, { "epoch": 1.15, "grad_norm": 0.520110547542572, "learning_rate": 1.4104064722262717e-05, "loss": 0.0508, "step": 6896 }, { "epoch": 1.15, "grad_norm": 0.7534917593002319, "learning_rate": 1.410241801390287e-05, "loss": 0.0763, "step": 6897 }, { "epoch": 1.15, "grad_norm": 0.4368368685245514, "learning_rate": 1.4100771171778485e-05, "loss": 0.055, "step": 6898 }, { "epoch": 1.15, "grad_norm": 0.45782431960105896, "learning_rate": 1.4099124195943255e-05, "loss": 0.0577, "step": 6899 }, { "epoch": 1.15, "grad_norm": 0.5848463773727417, "learning_rate": 1.4097477086450883e-05, "loss": 0.0707, "step": 6900 }, { "epoch": 1.15, "grad_norm": 0.44990894198417664, "learning_rate": 1.409582984335508e-05, "loss": 0.0612, "step": 6901 }, { "epoch": 1.15, "grad_norm": 0.45948079228401184, "learning_rate": 1.4094182466709549e-05, "loss": 0.0469, "step": 6902 }, { "epoch": 1.15, "grad_norm": 0.6576551795005798, "learning_rate": 1.409253495656801e-05, "loss": 0.0578, "step": 6903 }, { "epoch": 1.15, "grad_norm": 0.45506367087364197, "learning_rate": 1.4090887312984178e-05, "loss": 0.0531, "step": 6904 }, { "epoch": 1.15, "grad_norm": 0.6931139230728149, "learning_rate": 1.408923953601178e-05, "loss": 0.0499, "step": 6905 }, { "epoch": 1.16, "grad_norm": 0.7376765012741089, "learning_rate": 1.4087591625704543e-05, "loss": 0.0631, "step": 6906 }, { "epoch": 1.16, "grad_norm": 0.5005943775177002, "learning_rate": 1.40859435821162e-05, "loss": 0.0531, "step": 6907 }, { "epoch": 1.16, "grad_norm": 0.41077321767807007, "learning_rate": 1.4084295405300483e-05, "loss": 0.0535, "step": 6908 }, { "epoch": 1.16, "grad_norm": 0.6043238639831543, "learning_rate": 1.4082647095311139e-05, "loss": 0.0711, "step": 6909 }, { "epoch": 1.16, "grad_norm": 0.8470011949539185, "learning_rate": 1.408099865220191e-05, "loss": 0.0584, "step": 6910 }, { "epoch": 1.16, "grad_norm": 0.4545920789241791, "learning_rate": 1.4079350076026543e-05, "loss": 0.0345, "step": 6911 }, { "epoch": 1.16, "grad_norm": 0.6273968815803528, "learning_rate": 1.4077701366838799e-05, "loss": 0.0632, "step": 6912 }, { "epoch": 1.16, "grad_norm": 0.5932420492172241, "learning_rate": 1.4076052524692429e-05, "loss": 0.0652, "step": 6913 }, { "epoch": 1.16, "grad_norm": 0.6391251087188721, "learning_rate": 1.4074403549641203e-05, "loss": 0.0522, "step": 6914 }, { "epoch": 1.16, "grad_norm": 0.5699265599250793, "learning_rate": 1.4072754441738879e-05, "loss": 0.0559, "step": 6915 }, { "epoch": 1.16, "grad_norm": 0.5237109065055847, "learning_rate": 1.4071105201039235e-05, "loss": 0.0711, "step": 6916 }, { "epoch": 1.16, "grad_norm": 0.5694307684898376, "learning_rate": 1.4069455827596048e-05, "loss": 0.0572, "step": 6917 }, { "epoch": 1.16, "grad_norm": 0.5117085576057434, "learning_rate": 1.406780632146309e-05, "loss": 0.0751, "step": 6918 }, { "epoch": 1.16, "grad_norm": 0.5959608554840088, "learning_rate": 1.4066156682694151e-05, "loss": 0.0634, "step": 6919 }, { "epoch": 1.16, "grad_norm": 0.5686455368995667, "learning_rate": 1.4064506911343016e-05, "loss": 0.082, "step": 6920 }, { "epoch": 1.16, "grad_norm": 0.5511669516563416, "learning_rate": 1.4062857007463485e-05, "loss": 0.0547, "step": 6921 }, { "epoch": 1.16, "grad_norm": 0.5880168676376343, "learning_rate": 1.4061206971109347e-05, "loss": 0.0598, "step": 6922 }, { "epoch": 1.16, "grad_norm": 0.6498198509216309, "learning_rate": 1.4059556802334408e-05, "loss": 0.0638, "step": 6923 }, { "epoch": 1.16, "grad_norm": 0.5258195996284485, "learning_rate": 1.405790650119247e-05, "loss": 0.0686, "step": 6924 }, { "epoch": 1.16, "grad_norm": 0.5980705618858337, "learning_rate": 1.4056256067737351e-05, "loss": 0.0485, "step": 6925 }, { "epoch": 1.16, "grad_norm": 0.5114454030990601, "learning_rate": 1.4054605502022858e-05, "loss": 0.0699, "step": 6926 }, { "epoch": 1.16, "grad_norm": 0.8242676854133606, "learning_rate": 1.4052954804102812e-05, "loss": 0.0678, "step": 6927 }, { "epoch": 1.16, "grad_norm": 0.46584776043891907, "learning_rate": 1.4051303974031033e-05, "loss": 0.0576, "step": 6928 }, { "epoch": 1.16, "grad_norm": 0.5292734503746033, "learning_rate": 1.4049653011861355e-05, "loss": 0.0694, "step": 6929 }, { "epoch": 1.16, "grad_norm": 1.03223717212677, "learning_rate": 1.4048001917647607e-05, "loss": 0.0554, "step": 6930 }, { "epoch": 1.16, "grad_norm": 0.5530077219009399, "learning_rate": 1.4046350691443625e-05, "loss": 0.0712, "step": 6931 }, { "epoch": 1.16, "grad_norm": 0.4113480746746063, "learning_rate": 1.4044699333303247e-05, "loss": 0.0578, "step": 6932 }, { "epoch": 1.16, "grad_norm": 0.5185456275939941, "learning_rate": 1.4043047843280318e-05, "loss": 0.0525, "step": 6933 }, { "epoch": 1.16, "grad_norm": 0.523583173751831, "learning_rate": 1.4041396221428692e-05, "loss": 0.0639, "step": 6934 }, { "epoch": 1.16, "grad_norm": 0.5171438455581665, "learning_rate": 1.4039744467802215e-05, "loss": 0.0578, "step": 6935 }, { "epoch": 1.16, "grad_norm": 0.9369146227836609, "learning_rate": 1.4038092582454747e-05, "loss": 0.0602, "step": 6936 }, { "epoch": 1.16, "grad_norm": 0.7922234535217285, "learning_rate": 1.4036440565440155e-05, "loss": 0.0705, "step": 6937 }, { "epoch": 1.16, "grad_norm": 0.7028577923774719, "learning_rate": 1.40347884168123e-05, "loss": 0.0706, "step": 6938 }, { "epoch": 1.16, "grad_norm": 0.7022638320922852, "learning_rate": 1.4033136136625052e-05, "loss": 0.0564, "step": 6939 }, { "epoch": 1.16, "grad_norm": 0.6595310568809509, "learning_rate": 1.4031483724932287e-05, "loss": 0.0441, "step": 6940 }, { "epoch": 1.16, "grad_norm": 0.5366339683532715, "learning_rate": 1.4029831181787886e-05, "loss": 0.0719, "step": 6941 }, { "epoch": 1.16, "grad_norm": 0.46483051776885986, "learning_rate": 1.4028178507245728e-05, "loss": 0.0552, "step": 6942 }, { "epoch": 1.16, "grad_norm": 0.7722336649894714, "learning_rate": 1.4026525701359703e-05, "loss": 0.0651, "step": 6943 }, { "epoch": 1.16, "grad_norm": 0.6010501980781555, "learning_rate": 1.4024872764183703e-05, "loss": 0.0686, "step": 6944 }, { "epoch": 1.16, "grad_norm": 0.8788253664970398, "learning_rate": 1.4023219695771623e-05, "loss": 0.0581, "step": 6945 }, { "epoch": 1.16, "grad_norm": 0.9339020848274231, "learning_rate": 1.4021566496177367e-05, "loss": 0.0654, "step": 6946 }, { "epoch": 1.16, "grad_norm": 0.4855813682079315, "learning_rate": 1.4019913165454837e-05, "loss": 0.0489, "step": 6947 }, { "epoch": 1.16, "grad_norm": 0.48636704683303833, "learning_rate": 1.4018259703657937e-05, "loss": 0.0681, "step": 6948 }, { "epoch": 1.16, "grad_norm": 0.6096869111061096, "learning_rate": 1.4016606110840592e-05, "loss": 0.0647, "step": 6949 }, { "epoch": 1.16, "grad_norm": 0.5472757816314697, "learning_rate": 1.4014952387056705e-05, "loss": 0.0592, "step": 6950 }, { "epoch": 1.16, "grad_norm": 0.7103046178817749, "learning_rate": 1.401329853236021e-05, "loss": 0.0452, "step": 6951 }, { "epoch": 1.16, "grad_norm": 0.552445113658905, "learning_rate": 1.4011644546805026e-05, "loss": 0.0792, "step": 6952 }, { "epoch": 1.16, "grad_norm": 0.5386016964912415, "learning_rate": 1.4009990430445089e-05, "loss": 0.0582, "step": 6953 }, { "epoch": 1.16, "grad_norm": 0.5307090282440186, "learning_rate": 1.4008336183334326e-05, "loss": 0.0622, "step": 6954 }, { "epoch": 1.16, "grad_norm": 0.6267282962799072, "learning_rate": 1.4006681805526683e-05, "loss": 0.068, "step": 6955 }, { "epoch": 1.16, "grad_norm": 0.4381280839443207, "learning_rate": 1.4005027297076099e-05, "loss": 0.0496, "step": 6956 }, { "epoch": 1.16, "grad_norm": 0.5798733830451965, "learning_rate": 1.4003372658036521e-05, "loss": 0.0711, "step": 6957 }, { "epoch": 1.16, "grad_norm": 0.6675382256507874, "learning_rate": 1.4001717888461903e-05, "loss": 0.0481, "step": 6958 }, { "epoch": 1.16, "grad_norm": 0.6135886907577515, "learning_rate": 1.40000629884062e-05, "loss": 0.0697, "step": 6959 }, { "epoch": 1.16, "grad_norm": 0.46960482001304626, "learning_rate": 1.3998407957923372e-05, "loss": 0.0591, "step": 6960 }, { "epoch": 1.16, "grad_norm": 0.9406466484069824, "learning_rate": 1.3996752797067383e-05, "loss": 0.0604, "step": 6961 }, { "epoch": 1.16, "grad_norm": 0.5244860649108887, "learning_rate": 1.3995097505892204e-05, "loss": 0.0683, "step": 6962 }, { "epoch": 1.16, "grad_norm": 0.7340640425682068, "learning_rate": 1.3993442084451803e-05, "loss": 0.0998, "step": 6963 }, { "epoch": 1.16, "grad_norm": 0.5314042568206787, "learning_rate": 1.399178653280016e-05, "loss": 0.0479, "step": 6964 }, { "epoch": 1.16, "grad_norm": 0.5786492824554443, "learning_rate": 1.3990130850991256e-05, "loss": 0.0701, "step": 6965 }, { "epoch": 1.17, "grad_norm": 0.5011036396026611, "learning_rate": 1.398847503907908e-05, "loss": 0.07, "step": 6966 }, { "epoch": 1.17, "grad_norm": 0.7315990924835205, "learning_rate": 1.3986819097117616e-05, "loss": 0.0581, "step": 6967 }, { "epoch": 1.17, "grad_norm": 0.41751500964164734, "learning_rate": 1.3985163025160861e-05, "loss": 0.0434, "step": 6968 }, { "epoch": 1.17, "grad_norm": 0.6718026995658875, "learning_rate": 1.3983506823262816e-05, "loss": 0.0743, "step": 6969 }, { "epoch": 1.17, "grad_norm": 0.4642338752746582, "learning_rate": 1.398185049147748e-05, "loss": 0.0582, "step": 6970 }, { "epoch": 1.17, "grad_norm": 0.5374246835708618, "learning_rate": 1.3980194029858856e-05, "loss": 0.0478, "step": 6971 }, { "epoch": 1.17, "grad_norm": 0.5810381174087524, "learning_rate": 1.3978537438460964e-05, "loss": 0.0682, "step": 6972 }, { "epoch": 1.17, "grad_norm": 0.5259922742843628, "learning_rate": 1.3976880717337812e-05, "loss": 0.0609, "step": 6973 }, { "epoch": 1.17, "grad_norm": 0.6026848554611206, "learning_rate": 1.3975223866543425e-05, "loss": 0.0547, "step": 6974 }, { "epoch": 1.17, "grad_norm": 0.5677543878555298, "learning_rate": 1.3973566886131824e-05, "loss": 0.0686, "step": 6975 }, { "epoch": 1.17, "grad_norm": 0.49492576718330383, "learning_rate": 1.3971909776157034e-05, "loss": 0.064, "step": 6976 }, { "epoch": 1.17, "grad_norm": 0.5848442316055298, "learning_rate": 1.3970252536673095e-05, "loss": 0.0884, "step": 6977 }, { "epoch": 1.17, "grad_norm": 0.44615212082862854, "learning_rate": 1.3968595167734033e-05, "loss": 0.0414, "step": 6978 }, { "epoch": 1.17, "grad_norm": 0.5923396944999695, "learning_rate": 1.3966937669393898e-05, "loss": 0.0624, "step": 6979 }, { "epoch": 1.17, "grad_norm": 0.4463047683238983, "learning_rate": 1.3965280041706729e-05, "loss": 0.0505, "step": 6980 }, { "epoch": 1.17, "grad_norm": 0.4786266088485718, "learning_rate": 1.396362228472658e-05, "loss": 0.0529, "step": 6981 }, { "epoch": 1.17, "grad_norm": 0.4934811294078827, "learning_rate": 1.3961964398507497e-05, "loss": 0.0699, "step": 6982 }, { "epoch": 1.17, "grad_norm": 0.43072035908699036, "learning_rate": 1.3960306383103543e-05, "loss": 0.0635, "step": 6983 }, { "epoch": 1.17, "grad_norm": 0.3982582688331604, "learning_rate": 1.395864823856878e-05, "loss": 0.0402, "step": 6984 }, { "epoch": 1.17, "grad_norm": 0.5667015314102173, "learning_rate": 1.3956989964957273e-05, "loss": 0.066, "step": 6985 }, { "epoch": 1.17, "grad_norm": 0.572878360748291, "learning_rate": 1.395533156232309e-05, "loss": 0.0757, "step": 6986 }, { "epoch": 1.17, "grad_norm": 0.6554169058799744, "learning_rate": 1.3953673030720306e-05, "loss": 0.0461, "step": 6987 }, { "epoch": 1.17, "grad_norm": 0.6541467308998108, "learning_rate": 1.3952014370203001e-05, "loss": 0.073, "step": 6988 }, { "epoch": 1.17, "grad_norm": 0.527193009853363, "learning_rate": 1.3950355580825259e-05, "loss": 0.059, "step": 6989 }, { "epoch": 1.17, "grad_norm": 0.718885600566864, "learning_rate": 1.3948696662641164e-05, "loss": 0.0691, "step": 6990 }, { "epoch": 1.17, "grad_norm": 0.4082808792591095, "learning_rate": 1.3947037615704805e-05, "loss": 0.0661, "step": 6991 }, { "epoch": 1.17, "grad_norm": 0.7133969068527222, "learning_rate": 1.3945378440070286e-05, "loss": 0.0722, "step": 6992 }, { "epoch": 1.17, "grad_norm": 0.3988467752933502, "learning_rate": 1.3943719135791696e-05, "loss": 0.0384, "step": 6993 }, { "epoch": 1.17, "grad_norm": 0.49706918001174927, "learning_rate": 1.3942059702923145e-05, "loss": 0.0684, "step": 6994 }, { "epoch": 1.17, "grad_norm": 0.6794837117195129, "learning_rate": 1.3940400141518742e-05, "loss": 0.0852, "step": 6995 }, { "epoch": 1.17, "grad_norm": 0.7827320694923401, "learning_rate": 1.3938740451632596e-05, "loss": 0.0677, "step": 6996 }, { "epoch": 1.17, "grad_norm": 0.9324159622192383, "learning_rate": 1.3937080633318823e-05, "loss": 0.0653, "step": 6997 }, { "epoch": 1.17, "grad_norm": 0.396016389131546, "learning_rate": 1.3935420686631545e-05, "loss": 0.061, "step": 6998 }, { "epoch": 1.17, "grad_norm": 0.5057376027107239, "learning_rate": 1.3933760611624887e-05, "loss": 0.0528, "step": 6999 }, { "epoch": 1.17, "grad_norm": 0.4594126343727112, "learning_rate": 1.3932100408352973e-05, "loss": 0.0582, "step": 7000 }, { "epoch": 1.17, "grad_norm": 0.43981844186782837, "learning_rate": 1.3930440076869945e-05, "loss": 0.0552, "step": 7001 }, { "epoch": 1.17, "grad_norm": 0.43720096349716187, "learning_rate": 1.3928779617229933e-05, "loss": 0.041, "step": 7002 }, { "epoch": 1.17, "grad_norm": 0.7691702842712402, "learning_rate": 1.3927119029487082e-05, "loss": 0.0573, "step": 7003 }, { "epoch": 1.17, "grad_norm": 0.7299690246582031, "learning_rate": 1.3925458313695532e-05, "loss": 0.0858, "step": 7004 }, { "epoch": 1.17, "grad_norm": 0.6052428483963013, "learning_rate": 1.3923797469909443e-05, "loss": 0.0683, "step": 7005 }, { "epoch": 1.17, "grad_norm": 0.6010617613792419, "learning_rate": 1.3922136498182958e-05, "loss": 0.061, "step": 7006 }, { "epoch": 1.17, "grad_norm": 0.604261577129364, "learning_rate": 1.3920475398570244e-05, "loss": 0.0521, "step": 7007 }, { "epoch": 1.17, "grad_norm": 0.6045283675193787, "learning_rate": 1.3918814171125454e-05, "loss": 0.0748, "step": 7008 }, { "epoch": 1.17, "grad_norm": 0.4368745684623718, "learning_rate": 1.3917152815902765e-05, "loss": 0.0471, "step": 7009 }, { "epoch": 1.17, "grad_norm": 0.6050583124160767, "learning_rate": 1.3915491332956342e-05, "loss": 0.0622, "step": 7010 }, { "epoch": 1.17, "grad_norm": 0.4754351079463959, "learning_rate": 1.3913829722340358e-05, "loss": 0.0643, "step": 7011 }, { "epoch": 1.17, "grad_norm": 0.6375389099121094, "learning_rate": 1.3912167984108992e-05, "loss": 0.0786, "step": 7012 }, { "epoch": 1.17, "grad_norm": 0.5425060391426086, "learning_rate": 1.3910506118316432e-05, "loss": 0.066, "step": 7013 }, { "epoch": 1.17, "grad_norm": 0.4533184766769409, "learning_rate": 1.390884412501686e-05, "loss": 0.053, "step": 7014 }, { "epoch": 1.17, "grad_norm": 0.5555402636528015, "learning_rate": 1.3907182004264472e-05, "loss": 0.0676, "step": 7015 }, { "epoch": 1.17, "grad_norm": 0.5968599319458008, "learning_rate": 1.3905519756113459e-05, "loss": 0.0679, "step": 7016 }, { "epoch": 1.17, "grad_norm": 0.46004557609558105, "learning_rate": 1.3903857380618027e-05, "loss": 0.0537, "step": 7017 }, { "epoch": 1.17, "grad_norm": 0.5998705625534058, "learning_rate": 1.390219487783237e-05, "loss": 0.0586, "step": 7018 }, { "epoch": 1.17, "grad_norm": 0.7401391267776489, "learning_rate": 1.3900532247810708e-05, "loss": 0.0652, "step": 7019 }, { "epoch": 1.17, "grad_norm": 0.4760119915008545, "learning_rate": 1.3898869490607241e-05, "loss": 0.0512, "step": 7020 }, { "epoch": 1.17, "grad_norm": 0.6131419539451599, "learning_rate": 1.3897206606276195e-05, "loss": 0.0819, "step": 7021 }, { "epoch": 1.17, "grad_norm": 0.7455340623855591, "learning_rate": 1.3895543594871785e-05, "loss": 0.054, "step": 7022 }, { "epoch": 1.17, "grad_norm": 0.5345681309700012, "learning_rate": 1.3893880456448238e-05, "loss": 0.0686, "step": 7023 }, { "epoch": 1.17, "grad_norm": 0.51787930727005, "learning_rate": 1.3892217191059783e-05, "loss": 0.0548, "step": 7024 }, { "epoch": 1.17, "grad_norm": 0.4920821785926819, "learning_rate": 1.3890553798760652e-05, "loss": 0.0527, "step": 7025 }, { "epoch": 1.18, "grad_norm": 0.47886762022972107, "learning_rate": 1.388889027960508e-05, "loss": 0.0527, "step": 7026 }, { "epoch": 1.18, "grad_norm": 0.4857846200466156, "learning_rate": 1.3887226633647312e-05, "loss": 0.0454, "step": 7027 }, { "epoch": 1.18, "grad_norm": 0.477698415517807, "learning_rate": 1.3885562860941588e-05, "loss": 0.0573, "step": 7028 }, { "epoch": 1.18, "grad_norm": 0.8091458082199097, "learning_rate": 1.3883898961542163e-05, "loss": 0.0758, "step": 7029 }, { "epoch": 1.18, "grad_norm": 0.6355077624320984, "learning_rate": 1.3882234935503291e-05, "loss": 0.0669, "step": 7030 }, { "epoch": 1.18, "grad_norm": 0.4622464179992676, "learning_rate": 1.3880570782879224e-05, "loss": 0.0468, "step": 7031 }, { "epoch": 1.18, "grad_norm": 0.510016143321991, "learning_rate": 1.3878906503724226e-05, "loss": 0.0618, "step": 7032 }, { "epoch": 1.18, "grad_norm": 0.8485610485076904, "learning_rate": 1.3877242098092566e-05, "loss": 0.0869, "step": 7033 }, { "epoch": 1.18, "grad_norm": 0.6371856331825256, "learning_rate": 1.3875577566038512e-05, "loss": 0.0695, "step": 7034 }, { "epoch": 1.18, "grad_norm": 0.4697269797325134, "learning_rate": 1.3873912907616336e-05, "loss": 0.0511, "step": 7035 }, { "epoch": 1.18, "grad_norm": 0.5988136529922485, "learning_rate": 1.3872248122880319e-05, "loss": 0.0529, "step": 7036 }, { "epoch": 1.18, "grad_norm": 0.45853447914123535, "learning_rate": 1.3870583211884742e-05, "loss": 0.0671, "step": 7037 }, { "epoch": 1.18, "grad_norm": 0.6130564212799072, "learning_rate": 1.3868918174683894e-05, "loss": 0.0678, "step": 7038 }, { "epoch": 1.18, "grad_norm": 0.6704992055892944, "learning_rate": 1.3867253011332062e-05, "loss": 0.068, "step": 7039 }, { "epoch": 1.18, "grad_norm": 0.7239909768104553, "learning_rate": 1.3865587721883544e-05, "loss": 0.0694, "step": 7040 }, { "epoch": 1.18, "grad_norm": 0.5211614966392517, "learning_rate": 1.3863922306392638e-05, "loss": 0.0558, "step": 7041 }, { "epoch": 1.18, "grad_norm": 0.49139806628227234, "learning_rate": 1.3862256764913646e-05, "loss": 0.0698, "step": 7042 }, { "epoch": 1.18, "grad_norm": 0.6391714215278625, "learning_rate": 1.3860591097500874e-05, "loss": 0.0653, "step": 7043 }, { "epoch": 1.18, "grad_norm": 0.43268582224845886, "learning_rate": 1.3858925304208638e-05, "loss": 0.0554, "step": 7044 }, { "epoch": 1.18, "grad_norm": 0.5825664401054382, "learning_rate": 1.3857259385091246e-05, "loss": 0.0677, "step": 7045 }, { "epoch": 1.18, "grad_norm": 0.6812184453010559, "learning_rate": 1.3855593340203024e-05, "loss": 0.0478, "step": 7046 }, { "epoch": 1.18, "grad_norm": 0.5334511995315552, "learning_rate": 1.3853927169598293e-05, "loss": 0.0486, "step": 7047 }, { "epoch": 1.18, "grad_norm": 0.7048026323318481, "learning_rate": 1.385226087333138e-05, "loss": 0.0529, "step": 7048 }, { "epoch": 1.18, "grad_norm": 0.5077288150787354, "learning_rate": 1.3850594451456619e-05, "loss": 0.0525, "step": 7049 }, { "epoch": 1.18, "grad_norm": 0.4563788175582886, "learning_rate": 1.3848927904028342e-05, "loss": 0.0538, "step": 7050 }, { "epoch": 1.18, "grad_norm": 0.49750226736068726, "learning_rate": 1.3847261231100895e-05, "loss": 0.0782, "step": 7051 }, { "epoch": 1.18, "grad_norm": 0.7243830561637878, "learning_rate": 1.3845594432728611e-05, "loss": 0.0752, "step": 7052 }, { "epoch": 1.18, "grad_norm": 0.7874423265457153, "learning_rate": 1.3843927508965849e-05, "loss": 0.0494, "step": 7053 }, { "epoch": 1.18, "grad_norm": 0.5057847499847412, "learning_rate": 1.3842260459866959e-05, "loss": 0.0767, "step": 7054 }, { "epoch": 1.18, "grad_norm": 0.5488975048065186, "learning_rate": 1.3840593285486295e-05, "loss": 0.0675, "step": 7055 }, { "epoch": 1.18, "grad_norm": 0.5619189739227295, "learning_rate": 1.3838925985878213e-05, "loss": 0.0635, "step": 7056 }, { "epoch": 1.18, "grad_norm": 0.5344574451446533, "learning_rate": 1.3837258561097085e-05, "loss": 0.0526, "step": 7057 }, { "epoch": 1.18, "grad_norm": 0.4182761609554291, "learning_rate": 1.383559101119728e-05, "loss": 0.0519, "step": 7058 }, { "epoch": 1.18, "grad_norm": 0.46779391169548035, "learning_rate": 1.3833923336233163e-05, "loss": 0.0552, "step": 7059 }, { "epoch": 1.18, "grad_norm": 0.43731555342674255, "learning_rate": 1.3832255536259115e-05, "loss": 0.0565, "step": 7060 }, { "epoch": 1.18, "grad_norm": 0.5616949200630188, "learning_rate": 1.383058761132952e-05, "loss": 0.0544, "step": 7061 }, { "epoch": 1.18, "grad_norm": 0.47480565309524536, "learning_rate": 1.3828919561498757e-05, "loss": 0.0513, "step": 7062 }, { "epoch": 1.18, "grad_norm": 0.7782621383666992, "learning_rate": 1.3827251386821219e-05, "loss": 0.0514, "step": 7063 }, { "epoch": 1.18, "grad_norm": 0.41884106397628784, "learning_rate": 1.3825583087351295e-05, "loss": 0.0522, "step": 7064 }, { "epoch": 1.18, "grad_norm": 0.5451740622520447, "learning_rate": 1.3823914663143387e-05, "loss": 0.0653, "step": 7065 }, { "epoch": 1.18, "grad_norm": 0.4758048951625824, "learning_rate": 1.3822246114251892e-05, "loss": 0.0494, "step": 7066 }, { "epoch": 1.18, "grad_norm": 0.5231012105941772, "learning_rate": 1.3820577440731217e-05, "loss": 0.047, "step": 7067 }, { "epoch": 1.18, "grad_norm": 0.7278181314468384, "learning_rate": 1.3818908642635772e-05, "loss": 0.0595, "step": 7068 }, { "epoch": 1.18, "grad_norm": 0.6453313827514648, "learning_rate": 1.3817239720019966e-05, "loss": 0.0685, "step": 7069 }, { "epoch": 1.18, "grad_norm": 0.6634548902511597, "learning_rate": 1.3815570672938222e-05, "loss": 0.0823, "step": 7070 }, { "epoch": 1.18, "grad_norm": 0.6681334972381592, "learning_rate": 1.3813901501444958e-05, "loss": 0.0673, "step": 7071 }, { "epoch": 1.18, "grad_norm": 0.4596587121486664, "learning_rate": 1.38122322055946e-05, "loss": 0.0667, "step": 7072 }, { "epoch": 1.18, "grad_norm": 0.484734445810318, "learning_rate": 1.3810562785441577e-05, "loss": 0.0734, "step": 7073 }, { "epoch": 1.18, "grad_norm": 0.4996456801891327, "learning_rate": 1.3808893241040323e-05, "loss": 0.0572, "step": 7074 }, { "epoch": 1.18, "grad_norm": 0.41609129309654236, "learning_rate": 1.380722357244528e-05, "loss": 0.0458, "step": 7075 }, { "epoch": 1.18, "grad_norm": 0.7437493801116943, "learning_rate": 1.380555377971088e-05, "loss": 0.0454, "step": 7076 }, { "epoch": 1.18, "grad_norm": 0.5710305571556091, "learning_rate": 1.3803883862891578e-05, "loss": 0.0587, "step": 7077 }, { "epoch": 1.18, "grad_norm": 0.6212573051452637, "learning_rate": 1.3802213822041818e-05, "loss": 0.0701, "step": 7078 }, { "epoch": 1.18, "grad_norm": 0.5600652098655701, "learning_rate": 1.3800543657216061e-05, "loss": 0.083, "step": 7079 }, { "epoch": 1.18, "grad_norm": 0.4727037250995636, "learning_rate": 1.3798873368468755e-05, "loss": 0.0492, "step": 7080 }, { "epoch": 1.18, "grad_norm": 0.5229247212409973, "learning_rate": 1.3797202955854368e-05, "loss": 0.0474, "step": 7081 }, { "epoch": 1.18, "grad_norm": 0.5868343710899353, "learning_rate": 1.3795532419427364e-05, "loss": 0.0526, "step": 7082 }, { "epoch": 1.18, "grad_norm": 0.44293344020843506, "learning_rate": 1.3793861759242213e-05, "loss": 0.0682, "step": 7083 }, { "epoch": 1.18, "grad_norm": 0.6857480406761169, "learning_rate": 1.379219097535339e-05, "loss": 0.0667, "step": 7084 }, { "epoch": 1.18, "grad_norm": 0.5287569165229797, "learning_rate": 1.3790520067815371e-05, "loss": 0.0624, "step": 7085 }, { "epoch": 1.19, "grad_norm": 0.70472651720047, "learning_rate": 1.3788849036682644e-05, "loss": 0.061, "step": 7086 }, { "epoch": 1.19, "grad_norm": 0.47247210144996643, "learning_rate": 1.3787177882009687e-05, "loss": 0.0633, "step": 7087 }, { "epoch": 1.19, "grad_norm": 0.3541755676269531, "learning_rate": 1.3785506603850993e-05, "loss": 0.0302, "step": 7088 }, { "epoch": 1.19, "grad_norm": 0.5558890104293823, "learning_rate": 1.378383520226106e-05, "loss": 0.0534, "step": 7089 }, { "epoch": 1.19, "grad_norm": 0.5864507555961609, "learning_rate": 1.378216367729438e-05, "loss": 0.0703, "step": 7090 }, { "epoch": 1.19, "grad_norm": 0.3589339256286621, "learning_rate": 1.3780492029005462e-05, "loss": 0.0471, "step": 7091 }, { "epoch": 1.19, "grad_norm": 0.513892412185669, "learning_rate": 1.3778820257448802e-05, "loss": 0.0553, "step": 7092 }, { "epoch": 1.19, "grad_norm": 0.6086449027061462, "learning_rate": 1.3777148362678922e-05, "loss": 0.0846, "step": 7093 }, { "epoch": 1.19, "grad_norm": 0.7244274020195007, "learning_rate": 1.377547634475033e-05, "loss": 0.0827, "step": 7094 }, { "epoch": 1.19, "grad_norm": 0.7221723198890686, "learning_rate": 1.3773804203717544e-05, "loss": 0.0758, "step": 7095 }, { "epoch": 1.19, "grad_norm": 0.6107105612754822, "learning_rate": 1.377213193963509e-05, "loss": 0.0488, "step": 7096 }, { "epoch": 1.19, "grad_norm": 0.6182087659835815, "learning_rate": 1.3770459552557492e-05, "loss": 0.0607, "step": 7097 }, { "epoch": 1.19, "grad_norm": 0.5508935451507568, "learning_rate": 1.3768787042539278e-05, "loss": 0.0522, "step": 7098 }, { "epoch": 1.19, "grad_norm": 0.5223382115364075, "learning_rate": 1.3767114409634983e-05, "loss": 0.0672, "step": 7099 }, { "epoch": 1.19, "grad_norm": 0.6172325611114502, "learning_rate": 1.3765441653899148e-05, "loss": 0.078, "step": 7100 }, { "epoch": 1.19, "grad_norm": 0.5382634997367859, "learning_rate": 1.3763768775386314e-05, "loss": 0.0683, "step": 7101 }, { "epoch": 1.19, "grad_norm": 0.4518408179283142, "learning_rate": 1.376209577415103e-05, "loss": 0.0482, "step": 7102 }, { "epoch": 1.19, "grad_norm": 0.5175284743309021, "learning_rate": 1.376042265024784e-05, "loss": 0.0575, "step": 7103 }, { "epoch": 1.19, "grad_norm": 0.348470002412796, "learning_rate": 1.3758749403731304e-05, "loss": 0.0479, "step": 7104 }, { "epoch": 1.19, "grad_norm": 0.48668280243873596, "learning_rate": 1.3757076034655979e-05, "loss": 0.0543, "step": 7105 }, { "epoch": 1.19, "grad_norm": 0.4135076701641083, "learning_rate": 1.3755402543076424e-05, "loss": 0.046, "step": 7106 }, { "epoch": 1.19, "grad_norm": 0.5275532007217407, "learning_rate": 1.3753728929047212e-05, "loss": 0.0536, "step": 7107 }, { "epoch": 1.19, "grad_norm": 0.5093510150909424, "learning_rate": 1.3752055192622904e-05, "loss": 0.0748, "step": 7108 }, { "epoch": 1.19, "grad_norm": 0.56502366065979, "learning_rate": 1.3750381333858083e-05, "loss": 0.0618, "step": 7109 }, { "epoch": 1.19, "grad_norm": 0.6143286228179932, "learning_rate": 1.3748707352807324e-05, "loss": 0.0748, "step": 7110 }, { "epoch": 1.19, "grad_norm": 0.5846658945083618, "learning_rate": 1.3747033249525211e-05, "loss": 0.0675, "step": 7111 }, { "epoch": 1.19, "grad_norm": 0.527683675289154, "learning_rate": 1.3745359024066322e-05, "loss": 0.073, "step": 7112 }, { "epoch": 1.19, "grad_norm": 0.442981094121933, "learning_rate": 1.3743684676485259e-05, "loss": 0.0464, "step": 7113 }, { "epoch": 1.19, "grad_norm": 0.9544013738632202, "learning_rate": 1.3742010206836608e-05, "loss": 0.058, "step": 7114 }, { "epoch": 1.19, "grad_norm": 0.4898110032081604, "learning_rate": 1.3740335615174973e-05, "loss": 0.0646, "step": 7115 }, { "epoch": 1.19, "grad_norm": 0.6120734810829163, "learning_rate": 1.373866090155495e-05, "loss": 0.0738, "step": 7116 }, { "epoch": 1.19, "grad_norm": 0.4467930495738983, "learning_rate": 1.373698606603115e-05, "loss": 0.0458, "step": 7117 }, { "epoch": 1.19, "grad_norm": 0.5490432381629944, "learning_rate": 1.3735311108658183e-05, "loss": 0.0567, "step": 7118 }, { "epoch": 1.19, "grad_norm": 0.43415367603302, "learning_rate": 1.373363602949066e-05, "loss": 0.063, "step": 7119 }, { "epoch": 1.19, "grad_norm": 0.39471009373664856, "learning_rate": 1.37319608285832e-05, "loss": 0.0738, "step": 7120 }, { "epoch": 1.19, "grad_norm": 1.6288255453109741, "learning_rate": 1.3730285505990429e-05, "loss": 0.0807, "step": 7121 }, { "epoch": 1.19, "grad_norm": 0.6398922801017761, "learning_rate": 1.3728610061766967e-05, "loss": 0.0623, "step": 7122 }, { "epoch": 1.19, "grad_norm": 0.5688582062721252, "learning_rate": 1.3726934495967447e-05, "loss": 0.0639, "step": 7123 }, { "epoch": 1.19, "grad_norm": 0.5665708780288696, "learning_rate": 1.3725258808646502e-05, "loss": 0.081, "step": 7124 }, { "epoch": 1.19, "grad_norm": 0.47236135601997375, "learning_rate": 1.3723582999858773e-05, "loss": 0.0505, "step": 7125 }, { "epoch": 1.19, "grad_norm": 0.46845731139183044, "learning_rate": 1.3721907069658898e-05, "loss": 0.0407, "step": 7126 }, { "epoch": 1.19, "grad_norm": 0.5151424407958984, "learning_rate": 1.3720231018101524e-05, "loss": 0.0531, "step": 7127 }, { "epoch": 1.19, "grad_norm": 0.4586057960987091, "learning_rate": 1.3718554845241303e-05, "loss": 0.0696, "step": 7128 }, { "epoch": 1.19, "grad_norm": 0.5814123749732971, "learning_rate": 1.3716878551132884e-05, "loss": 0.0546, "step": 7129 }, { "epoch": 1.19, "grad_norm": 0.3903161287307739, "learning_rate": 1.371520213583093e-05, "loss": 0.0613, "step": 7130 }, { "epoch": 1.19, "grad_norm": 0.5038506984710693, "learning_rate": 1.3713525599390102e-05, "loss": 0.0579, "step": 7131 }, { "epoch": 1.19, "grad_norm": 0.4777204394340515, "learning_rate": 1.3711848941865058e-05, "loss": 0.0558, "step": 7132 }, { "epoch": 1.19, "grad_norm": 0.5628743767738342, "learning_rate": 1.3710172163310478e-05, "loss": 0.0572, "step": 7133 }, { "epoch": 1.19, "grad_norm": 0.7984967827796936, "learning_rate": 1.3708495263781027e-05, "loss": 0.0584, "step": 7134 }, { "epoch": 1.19, "grad_norm": 0.6038984656333923, "learning_rate": 1.3706818243331393e-05, "loss": 0.0793, "step": 7135 }, { "epoch": 1.19, "grad_norm": 0.566864013671875, "learning_rate": 1.3705141102016244e-05, "loss": 0.0694, "step": 7136 }, { "epoch": 1.19, "grad_norm": 0.5115327835083008, "learning_rate": 1.3703463839890275e-05, "loss": 0.0552, "step": 7137 }, { "epoch": 1.19, "grad_norm": 0.5709474086761475, "learning_rate": 1.3701786457008173e-05, "loss": 0.0686, "step": 7138 }, { "epoch": 1.19, "grad_norm": 0.8728247880935669, "learning_rate": 1.3700108953424629e-05, "loss": 0.0781, "step": 7139 }, { "epoch": 1.19, "grad_norm": 0.4894762933254242, "learning_rate": 1.369843132919434e-05, "loss": 0.0556, "step": 7140 }, { "epoch": 1.19, "grad_norm": 0.5438471436500549, "learning_rate": 1.3696753584372008e-05, "loss": 0.0547, "step": 7141 }, { "epoch": 1.19, "grad_norm": 1.011273741722107, "learning_rate": 1.3695075719012346e-05, "loss": 0.0765, "step": 7142 }, { "epoch": 1.19, "grad_norm": 0.5732934474945068, "learning_rate": 1.3693397733170049e-05, "loss": 0.0479, "step": 7143 }, { "epoch": 1.19, "grad_norm": 1.2778762578964233, "learning_rate": 1.3691719626899835e-05, "loss": 0.0864, "step": 7144 }, { "epoch": 1.2, "grad_norm": 1.3076646327972412, "learning_rate": 1.3690041400256427e-05, "loss": 0.0671, "step": 7145 }, { "epoch": 1.2, "grad_norm": 0.40504124760627747, "learning_rate": 1.368836305329454e-05, "loss": 0.0525, "step": 7146 }, { "epoch": 1.2, "grad_norm": 0.5029780864715576, "learning_rate": 1.36866845860689e-05, "loss": 0.0642, "step": 7147 }, { "epoch": 1.2, "grad_norm": 0.5643284320831299, "learning_rate": 1.3685005998634229e-05, "loss": 0.0705, "step": 7148 }, { "epoch": 1.2, "grad_norm": 1.109907865524292, "learning_rate": 1.3683327291045271e-05, "loss": 0.0677, "step": 7149 }, { "epoch": 1.2, "grad_norm": 0.5060971975326538, "learning_rate": 1.3681648463356756e-05, "loss": 0.0455, "step": 7150 }, { "epoch": 1.2, "grad_norm": 0.5469726324081421, "learning_rate": 1.3679969515623426e-05, "loss": 0.0605, "step": 7151 }, { "epoch": 1.2, "grad_norm": 0.470101922750473, "learning_rate": 1.3678290447900025e-05, "loss": 0.0586, "step": 7152 }, { "epoch": 1.2, "grad_norm": 0.6901474595069885, "learning_rate": 1.3676611260241299e-05, "loss": 0.0676, "step": 7153 }, { "epoch": 1.2, "grad_norm": 0.6141456961631775, "learning_rate": 1.3674931952702001e-05, "loss": 0.067, "step": 7154 }, { "epoch": 1.2, "grad_norm": 0.49603021144866943, "learning_rate": 1.3673252525336888e-05, "loss": 0.0591, "step": 7155 }, { "epoch": 1.2, "grad_norm": 0.40467017889022827, "learning_rate": 1.367157297820072e-05, "loss": 0.0601, "step": 7156 }, { "epoch": 1.2, "grad_norm": 0.7230486869812012, "learning_rate": 1.3669893311348261e-05, "loss": 0.043, "step": 7157 }, { "epoch": 1.2, "grad_norm": 0.5616985559463501, "learning_rate": 1.3668213524834277e-05, "loss": 0.0419, "step": 7158 }, { "epoch": 1.2, "grad_norm": 0.4845753610134125, "learning_rate": 1.3666533618713542e-05, "loss": 0.0632, "step": 7159 }, { "epoch": 1.2, "grad_norm": 0.47008490562438965, "learning_rate": 1.3664853593040828e-05, "loss": 0.0685, "step": 7160 }, { "epoch": 1.2, "grad_norm": 0.5842297077178955, "learning_rate": 1.3663173447870915e-05, "loss": 0.0775, "step": 7161 }, { "epoch": 1.2, "grad_norm": 0.453085720539093, "learning_rate": 1.3661493183258589e-05, "loss": 0.0516, "step": 7162 }, { "epoch": 1.2, "grad_norm": 0.5201780200004578, "learning_rate": 1.3659812799258638e-05, "loss": 0.0697, "step": 7163 }, { "epoch": 1.2, "grad_norm": 0.48130038380622864, "learning_rate": 1.3658132295925847e-05, "loss": 0.0422, "step": 7164 }, { "epoch": 1.2, "grad_norm": 0.5695622563362122, "learning_rate": 1.3656451673315016e-05, "loss": 0.0597, "step": 7165 }, { "epoch": 1.2, "grad_norm": 0.743382453918457, "learning_rate": 1.3654770931480946e-05, "loss": 0.0883, "step": 7166 }, { "epoch": 1.2, "grad_norm": 0.6939098238945007, "learning_rate": 1.3653090070478434e-05, "loss": 0.0672, "step": 7167 }, { "epoch": 1.2, "grad_norm": 0.586527943611145, "learning_rate": 1.3651409090362284e-05, "loss": 0.0543, "step": 7168 }, { "epoch": 1.2, "grad_norm": 0.9500270485877991, "learning_rate": 1.3649727991187316e-05, "loss": 0.071, "step": 7169 }, { "epoch": 1.2, "grad_norm": 0.5572781562805176, "learning_rate": 1.364804677300834e-05, "loss": 0.0666, "step": 7170 }, { "epoch": 1.2, "grad_norm": 0.5711447596549988, "learning_rate": 1.3646365435880176e-05, "loss": 0.0717, "step": 7171 }, { "epoch": 1.2, "grad_norm": 0.6859536170959473, "learning_rate": 1.3644683979857637e-05, "loss": 0.0752, "step": 7172 }, { "epoch": 1.2, "grad_norm": 0.49565479159355164, "learning_rate": 1.3643002404995565e-05, "loss": 0.0538, "step": 7173 }, { "epoch": 1.2, "grad_norm": 0.736235499382019, "learning_rate": 1.3641320711348773e-05, "loss": 0.0719, "step": 7174 }, { "epoch": 1.2, "grad_norm": 1.1017403602600098, "learning_rate": 1.3639638898972108e-05, "loss": 0.0942, "step": 7175 }, { "epoch": 1.2, "grad_norm": 0.5771804451942444, "learning_rate": 1.3637956967920399e-05, "loss": 0.0724, "step": 7176 }, { "epoch": 1.2, "grad_norm": 0.43854597210884094, "learning_rate": 1.3636274918248493e-05, "loss": 0.0531, "step": 7177 }, { "epoch": 1.2, "grad_norm": 0.4034566879272461, "learning_rate": 1.3634592750011233e-05, "loss": 0.0532, "step": 7178 }, { "epoch": 1.2, "grad_norm": 0.5132377743721008, "learning_rate": 1.3632910463263466e-05, "loss": 0.0706, "step": 7179 }, { "epoch": 1.2, "grad_norm": 0.6273715496063232, "learning_rate": 1.363122805806005e-05, "loss": 0.0483, "step": 7180 }, { "epoch": 1.2, "grad_norm": 0.4784606099128723, "learning_rate": 1.362954553445584e-05, "loss": 0.054, "step": 7181 }, { "epoch": 1.2, "grad_norm": 0.6264678835868835, "learning_rate": 1.3627862892505694e-05, "loss": 0.0753, "step": 7182 }, { "epoch": 1.2, "grad_norm": 0.542599618434906, "learning_rate": 1.362618013226448e-05, "loss": 0.0542, "step": 7183 }, { "epoch": 1.2, "grad_norm": 0.6404092311859131, "learning_rate": 1.3624497253787066e-05, "loss": 0.0616, "step": 7184 }, { "epoch": 1.2, "grad_norm": 0.3977701663970947, "learning_rate": 1.3622814257128324e-05, "loss": 0.0545, "step": 7185 }, { "epoch": 1.2, "grad_norm": 0.5629672408103943, "learning_rate": 1.3621131142343128e-05, "loss": 0.0739, "step": 7186 }, { "epoch": 1.2, "grad_norm": 0.4171040654182434, "learning_rate": 1.3619447909486366e-05, "loss": 0.0468, "step": 7187 }, { "epoch": 1.2, "grad_norm": 0.3953682780265808, "learning_rate": 1.361776455861291e-05, "loss": 0.0526, "step": 7188 }, { "epoch": 1.2, "grad_norm": 1.1228001117706299, "learning_rate": 1.3616081089777655e-05, "loss": 0.0814, "step": 7189 }, { "epoch": 1.2, "grad_norm": 0.528322160243988, "learning_rate": 1.3614397503035493e-05, "loss": 0.0594, "step": 7190 }, { "epoch": 1.2, "grad_norm": 0.49103474617004395, "learning_rate": 1.361271379844132e-05, "loss": 0.0757, "step": 7191 }, { "epoch": 1.2, "grad_norm": 0.7277998924255371, "learning_rate": 1.361102997605003e-05, "loss": 0.0801, "step": 7192 }, { "epoch": 1.2, "grad_norm": 0.695375382900238, "learning_rate": 1.3609346035916529e-05, "loss": 0.0825, "step": 7193 }, { "epoch": 1.2, "grad_norm": 0.5973742604255676, "learning_rate": 1.3607661978095728e-05, "loss": 0.053, "step": 7194 }, { "epoch": 1.2, "grad_norm": 0.6117118000984192, "learning_rate": 1.360597780264253e-05, "loss": 0.0677, "step": 7195 }, { "epoch": 1.2, "grad_norm": 0.46764543652534485, "learning_rate": 1.3604293509611857e-05, "loss": 0.0655, "step": 7196 }, { "epoch": 1.2, "grad_norm": 0.648838996887207, "learning_rate": 1.3602609099058625e-05, "loss": 0.0724, "step": 7197 }, { "epoch": 1.2, "grad_norm": 0.6076167225837708, "learning_rate": 1.3600924571037757e-05, "loss": 0.067, "step": 7198 }, { "epoch": 1.2, "grad_norm": 0.4297662377357483, "learning_rate": 1.3599239925604176e-05, "loss": 0.0486, "step": 7199 }, { "epoch": 1.2, "grad_norm": 0.47179701924324036, "learning_rate": 1.3597555162812813e-05, "loss": 0.0634, "step": 7200 }, { "epoch": 1.2, "grad_norm": 0.47886520624160767, "learning_rate": 1.3595870282718606e-05, "loss": 0.0581, "step": 7201 }, { "epoch": 1.2, "grad_norm": 0.46289610862731934, "learning_rate": 1.3594185285376486e-05, "loss": 0.0454, "step": 7202 }, { "epoch": 1.2, "grad_norm": 0.5752596259117126, "learning_rate": 1.3592500170841401e-05, "loss": 0.0573, "step": 7203 }, { "epoch": 1.2, "grad_norm": 0.6655195355415344, "learning_rate": 1.359081493916829e-05, "loss": 0.0668, "step": 7204 }, { "epoch": 1.21, "grad_norm": 0.3750603199005127, "learning_rate": 1.3589129590412111e-05, "loss": 0.063, "step": 7205 }, { "epoch": 1.21, "grad_norm": 0.6437928676605225, "learning_rate": 1.3587444124627807e-05, "loss": 0.074, "step": 7206 }, { "epoch": 1.21, "grad_norm": 0.4567089378833771, "learning_rate": 1.3585758541870339e-05, "loss": 0.0619, "step": 7207 }, { "epoch": 1.21, "grad_norm": 0.47778043150901794, "learning_rate": 1.3584072842194669e-05, "loss": 0.0507, "step": 7208 }, { "epoch": 1.21, "grad_norm": 0.538777768611908, "learning_rate": 1.358238702565576e-05, "loss": 0.0642, "step": 7209 }, { "epoch": 1.21, "grad_norm": 0.43190324306488037, "learning_rate": 1.3580701092308581e-05, "loss": 0.0454, "step": 7210 }, { "epoch": 1.21, "grad_norm": 0.5615764856338501, "learning_rate": 1.3579015042208103e-05, "loss": 0.0453, "step": 7211 }, { "epoch": 1.21, "grad_norm": 0.42492544651031494, "learning_rate": 1.3577328875409301e-05, "loss": 0.0637, "step": 7212 }, { "epoch": 1.21, "grad_norm": 0.43236175179481506, "learning_rate": 1.3575642591967157e-05, "loss": 0.0442, "step": 7213 }, { "epoch": 1.21, "grad_norm": 0.5174235701560974, "learning_rate": 1.3573956191936653e-05, "loss": 0.0807, "step": 7214 }, { "epoch": 1.21, "grad_norm": 0.8328017592430115, "learning_rate": 1.3572269675372778e-05, "loss": 0.0652, "step": 7215 }, { "epoch": 1.21, "grad_norm": 0.6378893852233887, "learning_rate": 1.3570583042330516e-05, "loss": 0.062, "step": 7216 }, { "epoch": 1.21, "grad_norm": 0.47929683327674866, "learning_rate": 1.3568896292864871e-05, "loss": 0.0525, "step": 7217 }, { "epoch": 1.21, "grad_norm": 0.8289587497711182, "learning_rate": 1.3567209427030834e-05, "loss": 0.0711, "step": 7218 }, { "epoch": 1.21, "grad_norm": 0.5237821340560913, "learning_rate": 1.3565522444883416e-05, "loss": 0.0606, "step": 7219 }, { "epoch": 1.21, "grad_norm": 0.45884791016578674, "learning_rate": 1.3563835346477617e-05, "loss": 0.0424, "step": 7220 }, { "epoch": 1.21, "grad_norm": 0.5331241488456726, "learning_rate": 1.3562148131868447e-05, "loss": 0.0671, "step": 7221 }, { "epoch": 1.21, "grad_norm": 4.06370210647583, "learning_rate": 1.3560460801110921e-05, "loss": 0.0768, "step": 7222 }, { "epoch": 1.21, "grad_norm": 0.5540679693222046, "learning_rate": 1.3558773354260057e-05, "loss": 0.0482, "step": 7223 }, { "epoch": 1.21, "grad_norm": 0.49087727069854736, "learning_rate": 1.3557085791370877e-05, "loss": 0.0587, "step": 7224 }, { "epoch": 1.21, "grad_norm": 0.6605756878852844, "learning_rate": 1.3555398112498403e-05, "loss": 0.0742, "step": 7225 }, { "epoch": 1.21, "grad_norm": 0.5419835448265076, "learning_rate": 1.3553710317697668e-05, "loss": 0.0776, "step": 7226 }, { "epoch": 1.21, "grad_norm": 0.6106476783752441, "learning_rate": 1.3552022407023704e-05, "loss": 0.0658, "step": 7227 }, { "epoch": 1.21, "grad_norm": 0.574709951877594, "learning_rate": 1.355033438053154e-05, "loss": 0.0576, "step": 7228 }, { "epoch": 1.21, "grad_norm": 0.8067757487297058, "learning_rate": 1.3548646238276228e-05, "loss": 0.0852, "step": 7229 }, { "epoch": 1.21, "grad_norm": 0.701336681842804, "learning_rate": 1.3546957980312807e-05, "loss": 0.0478, "step": 7230 }, { "epoch": 1.21, "grad_norm": 0.5885974764823914, "learning_rate": 1.3545269606696321e-05, "loss": 0.0833, "step": 7231 }, { "epoch": 1.21, "grad_norm": 0.48480522632598877, "learning_rate": 1.3543581117481823e-05, "loss": 0.0683, "step": 7232 }, { "epoch": 1.21, "grad_norm": 0.5103471279144287, "learning_rate": 1.3541892512724376e-05, "loss": 0.0704, "step": 7233 }, { "epoch": 1.21, "grad_norm": 1.0369457006454468, "learning_rate": 1.3540203792479029e-05, "loss": 0.0757, "step": 7234 }, { "epoch": 1.21, "grad_norm": 0.531528115272522, "learning_rate": 1.3538514956800852e-05, "loss": 0.0664, "step": 7235 }, { "epoch": 1.21, "grad_norm": 0.667655885219574, "learning_rate": 1.3536826005744904e-05, "loss": 0.0606, "step": 7236 }, { "epoch": 1.21, "grad_norm": 0.5134852528572083, "learning_rate": 1.3535136939366264e-05, "loss": 0.0529, "step": 7237 }, { "epoch": 1.21, "grad_norm": 0.4338104724884033, "learning_rate": 1.3533447757720001e-05, "loss": 0.0468, "step": 7238 }, { "epoch": 1.21, "grad_norm": 0.9752128720283508, "learning_rate": 1.3531758460861196e-05, "loss": 0.0595, "step": 7239 }, { "epoch": 1.21, "grad_norm": 0.5871821045875549, "learning_rate": 1.3530069048844927e-05, "loss": 0.0611, "step": 7240 }, { "epoch": 1.21, "grad_norm": 1.738200306892395, "learning_rate": 1.352837952172628e-05, "loss": 0.066, "step": 7241 }, { "epoch": 1.21, "grad_norm": 0.48896217346191406, "learning_rate": 1.3526689879560348e-05, "loss": 0.0534, "step": 7242 }, { "epoch": 1.21, "grad_norm": 0.7982845306396484, "learning_rate": 1.3525000122402222e-05, "loss": 0.0712, "step": 7243 }, { "epoch": 1.21, "grad_norm": 0.37968680262565613, "learning_rate": 1.3523310250306993e-05, "loss": 0.0461, "step": 7244 }, { "epoch": 1.21, "grad_norm": 0.8402169942855835, "learning_rate": 1.3521620263329771e-05, "loss": 0.0606, "step": 7245 }, { "epoch": 1.21, "grad_norm": 0.4238729774951935, "learning_rate": 1.3519930161525654e-05, "loss": 0.0555, "step": 7246 }, { "epoch": 1.21, "grad_norm": 0.4004814624786377, "learning_rate": 1.3518239944949753e-05, "loss": 0.0497, "step": 7247 }, { "epoch": 1.21, "grad_norm": 0.6947763562202454, "learning_rate": 1.3516549613657175e-05, "loss": 0.0838, "step": 7248 }, { "epoch": 1.21, "grad_norm": 0.49964916706085205, "learning_rate": 1.3514859167703041e-05, "loss": 0.0473, "step": 7249 }, { "epoch": 1.21, "grad_norm": 0.6927536725997925, "learning_rate": 1.3513168607142468e-05, "loss": 0.0536, "step": 7250 }, { "epoch": 1.21, "grad_norm": 0.5359566807746887, "learning_rate": 1.3511477932030577e-05, "loss": 0.0544, "step": 7251 }, { "epoch": 1.21, "grad_norm": 0.816051721572876, "learning_rate": 1.3509787142422495e-05, "loss": 0.0851, "step": 7252 }, { "epoch": 1.21, "grad_norm": 1.0646663904190063, "learning_rate": 1.3508096238373356e-05, "loss": 0.0759, "step": 7253 }, { "epoch": 1.21, "grad_norm": 0.6170501708984375, "learning_rate": 1.3506405219938292e-05, "loss": 0.0742, "step": 7254 }, { "epoch": 1.21, "grad_norm": 0.5257692933082581, "learning_rate": 1.350471408717244e-05, "loss": 0.0542, "step": 7255 }, { "epoch": 1.21, "grad_norm": 0.46791279315948486, "learning_rate": 1.350302284013094e-05, "loss": 0.0444, "step": 7256 }, { "epoch": 1.21, "grad_norm": 0.41955193877220154, "learning_rate": 1.350133147886894e-05, "loss": 0.0424, "step": 7257 }, { "epoch": 1.21, "grad_norm": 0.7819185853004456, "learning_rate": 1.349964000344159e-05, "loss": 0.0747, "step": 7258 }, { "epoch": 1.21, "grad_norm": 0.6260104179382324, "learning_rate": 1.3497948413904038e-05, "loss": 0.0633, "step": 7259 }, { "epoch": 1.21, "grad_norm": 0.6407154202461243, "learning_rate": 1.3496256710311446e-05, "loss": 0.0472, "step": 7260 }, { "epoch": 1.21, "grad_norm": 0.5727518796920776, "learning_rate": 1.3494564892718968e-05, "loss": 0.0583, "step": 7261 }, { "epoch": 1.21, "grad_norm": 0.7254419922828674, "learning_rate": 1.3492872961181773e-05, "loss": 0.0643, "step": 7262 }, { "epoch": 1.21, "grad_norm": 0.49101361632347107, "learning_rate": 1.3491180915755027e-05, "loss": 0.0573, "step": 7263 }, { "epoch": 1.21, "grad_norm": 0.5192291140556335, "learning_rate": 1.3489488756493903e-05, "loss": 0.0507, "step": 7264 }, { "epoch": 1.22, "grad_norm": 0.561813473701477, "learning_rate": 1.348779648345357e-05, "loss": 0.067, "step": 7265 }, { "epoch": 1.22, "grad_norm": 0.5707076191902161, "learning_rate": 1.3486104096689215e-05, "loss": 0.0689, "step": 7266 }, { "epoch": 1.22, "grad_norm": 0.6514265537261963, "learning_rate": 1.3484411596256013e-05, "loss": 0.0584, "step": 7267 }, { "epoch": 1.22, "grad_norm": 0.5625335574150085, "learning_rate": 1.3482718982209155e-05, "loss": 0.0572, "step": 7268 }, { "epoch": 1.22, "grad_norm": 0.6211228370666504, "learning_rate": 1.3481026254603828e-05, "loss": 0.0548, "step": 7269 }, { "epoch": 1.22, "grad_norm": 0.4404193162918091, "learning_rate": 1.3479333413495229e-05, "loss": 0.0389, "step": 7270 }, { "epoch": 1.22, "grad_norm": 0.6507841348648071, "learning_rate": 1.3477640458938552e-05, "loss": 0.0633, "step": 7271 }, { "epoch": 1.22, "grad_norm": 0.6619618535041809, "learning_rate": 1.3475947390988997e-05, "loss": 0.0752, "step": 7272 }, { "epoch": 1.22, "grad_norm": 0.5527700781822205, "learning_rate": 1.3474254209701771e-05, "loss": 0.0701, "step": 7273 }, { "epoch": 1.22, "grad_norm": 0.5809959769248962, "learning_rate": 1.3472560915132083e-05, "loss": 0.0666, "step": 7274 }, { "epoch": 1.22, "grad_norm": 0.6609983444213867, "learning_rate": 1.3470867507335146e-05, "loss": 0.0467, "step": 7275 }, { "epoch": 1.22, "grad_norm": 0.6323490738868713, "learning_rate": 1.3469173986366168e-05, "loss": 0.0669, "step": 7276 }, { "epoch": 1.22, "grad_norm": 0.5401042103767395, "learning_rate": 1.3467480352280377e-05, "loss": 0.0689, "step": 7277 }, { "epoch": 1.22, "grad_norm": 0.6461739540100098, "learning_rate": 1.3465786605132996e-05, "loss": 0.0593, "step": 7278 }, { "epoch": 1.22, "grad_norm": 0.7885899543762207, "learning_rate": 1.3464092744979245e-05, "loss": 0.0705, "step": 7279 }, { "epoch": 1.22, "grad_norm": 0.5419579148292542, "learning_rate": 1.3462398771874356e-05, "loss": 0.0524, "step": 7280 }, { "epoch": 1.22, "grad_norm": 0.6724129319190979, "learning_rate": 1.346070468587357e-05, "loss": 0.0596, "step": 7281 }, { "epoch": 1.22, "grad_norm": 0.42521217465400696, "learning_rate": 1.345901048703212e-05, "loss": 0.0421, "step": 7282 }, { "epoch": 1.22, "grad_norm": 0.4298825263977051, "learning_rate": 1.3457316175405246e-05, "loss": 0.0433, "step": 7283 }, { "epoch": 1.22, "grad_norm": 0.40783992409706116, "learning_rate": 1.3455621751048193e-05, "loss": 0.0353, "step": 7284 }, { "epoch": 1.22, "grad_norm": 0.5248305201530457, "learning_rate": 1.3453927214016215e-05, "loss": 0.0496, "step": 7285 }, { "epoch": 1.22, "grad_norm": 0.6241267919540405, "learning_rate": 1.3452232564364562e-05, "loss": 0.0693, "step": 7286 }, { "epoch": 1.22, "grad_norm": 0.5834805369377136, "learning_rate": 1.3450537802148489e-05, "loss": 0.0572, "step": 7287 }, { "epoch": 1.22, "grad_norm": 0.5325966477394104, "learning_rate": 1.3448842927423254e-05, "loss": 0.0658, "step": 7288 }, { "epoch": 1.22, "grad_norm": 0.5573566555976868, "learning_rate": 1.3447147940244123e-05, "loss": 0.0675, "step": 7289 }, { "epoch": 1.22, "grad_norm": 0.4527299404144287, "learning_rate": 1.3445452840666367e-05, "loss": 0.0574, "step": 7290 }, { "epoch": 1.22, "grad_norm": 0.5555104613304138, "learning_rate": 1.344375762874525e-05, "loss": 0.059, "step": 7291 }, { "epoch": 1.22, "grad_norm": 0.4628346264362335, "learning_rate": 1.3442062304536048e-05, "loss": 0.0667, "step": 7292 }, { "epoch": 1.22, "grad_norm": 1.241153597831726, "learning_rate": 1.3440366868094042e-05, "loss": 0.0819, "step": 7293 }, { "epoch": 1.22, "grad_norm": 0.468680739402771, "learning_rate": 1.3438671319474512e-05, "loss": 0.0503, "step": 7294 }, { "epoch": 1.22, "grad_norm": 0.6051858067512512, "learning_rate": 1.3436975658732748e-05, "loss": 0.0612, "step": 7295 }, { "epoch": 1.22, "grad_norm": 0.4606018662452698, "learning_rate": 1.343527988592403e-05, "loss": 0.0564, "step": 7296 }, { "epoch": 1.22, "grad_norm": 0.38100239634513855, "learning_rate": 1.343358400110366e-05, "loss": 0.0488, "step": 7297 }, { "epoch": 1.22, "grad_norm": 0.6112974882125854, "learning_rate": 1.3431888004326927e-05, "loss": 0.0768, "step": 7298 }, { "epoch": 1.22, "grad_norm": 0.5659736394882202, "learning_rate": 1.3430191895649141e-05, "loss": 0.0526, "step": 7299 }, { "epoch": 1.22, "grad_norm": 0.4701548218727112, "learning_rate": 1.342849567512559e-05, "loss": 0.0614, "step": 7300 }, { "epoch": 1.22, "grad_norm": 0.42626193165779114, "learning_rate": 1.3426799342811597e-05, "loss": 0.0578, "step": 7301 }, { "epoch": 1.22, "grad_norm": 0.41328129172325134, "learning_rate": 1.3425102898762466e-05, "loss": 0.0433, "step": 7302 }, { "epoch": 1.22, "grad_norm": 0.6084409952163696, "learning_rate": 1.3423406343033514e-05, "loss": 0.0627, "step": 7303 }, { "epoch": 1.22, "grad_norm": 0.5156788229942322, "learning_rate": 1.3421709675680054e-05, "loss": 0.0539, "step": 7304 }, { "epoch": 1.22, "grad_norm": 0.6405453085899353, "learning_rate": 1.3420012896757415e-05, "loss": 0.0593, "step": 7305 }, { "epoch": 1.22, "grad_norm": 0.42801719903945923, "learning_rate": 1.3418316006320921e-05, "loss": 0.0466, "step": 7306 }, { "epoch": 1.22, "grad_norm": 0.5451996326446533, "learning_rate": 1.3416619004425898e-05, "loss": 0.0717, "step": 7307 }, { "epoch": 1.22, "grad_norm": 0.5794710516929626, "learning_rate": 1.3414921891127677e-05, "loss": 0.0664, "step": 7308 }, { "epoch": 1.22, "grad_norm": 0.6910960078239441, "learning_rate": 1.3413224666481601e-05, "loss": 0.0829, "step": 7309 }, { "epoch": 1.22, "grad_norm": 0.6789909601211548, "learning_rate": 1.3411527330543011e-05, "loss": 0.0573, "step": 7310 }, { "epoch": 1.22, "grad_norm": 0.4672732949256897, "learning_rate": 1.3409829883367245e-05, "loss": 0.0472, "step": 7311 }, { "epoch": 1.22, "grad_norm": 0.5127456784248352, "learning_rate": 1.3408132325009653e-05, "loss": 0.054, "step": 7312 }, { "epoch": 1.22, "grad_norm": 0.5846840739250183, "learning_rate": 1.3406434655525585e-05, "loss": 0.0733, "step": 7313 }, { "epoch": 1.22, "grad_norm": 0.47568729519844055, "learning_rate": 1.3404736874970397e-05, "loss": 0.0551, "step": 7314 }, { "epoch": 1.22, "grad_norm": 0.6331685781478882, "learning_rate": 1.3403038983399444e-05, "loss": 0.0777, "step": 7315 }, { "epoch": 1.22, "grad_norm": 0.3491227328777313, "learning_rate": 1.3401340980868094e-05, "loss": 0.0477, "step": 7316 }, { "epoch": 1.22, "grad_norm": 0.5407036542892456, "learning_rate": 1.3399642867431706e-05, "loss": 0.0537, "step": 7317 }, { "epoch": 1.22, "grad_norm": 0.44694048166275024, "learning_rate": 1.3397944643145655e-05, "loss": 0.0521, "step": 7318 }, { "epoch": 1.22, "grad_norm": 0.5119076371192932, "learning_rate": 1.3396246308065308e-05, "loss": 0.0541, "step": 7319 }, { "epoch": 1.22, "grad_norm": 0.6979897618293762, "learning_rate": 1.3394547862246046e-05, "loss": 0.0774, "step": 7320 }, { "epoch": 1.22, "grad_norm": 0.3895935118198395, "learning_rate": 1.3392849305743248e-05, "loss": 0.0461, "step": 7321 }, { "epoch": 1.22, "grad_norm": 0.565854549407959, "learning_rate": 1.3391150638612296e-05, "loss": 0.0469, "step": 7322 }, { "epoch": 1.22, "grad_norm": 0.4374460279941559, "learning_rate": 1.3389451860908577e-05, "loss": 0.0457, "step": 7323 }, { "epoch": 1.22, "grad_norm": 0.4350065588951111, "learning_rate": 1.3387752972687486e-05, "loss": 0.0688, "step": 7324 }, { "epoch": 1.23, "grad_norm": 0.4342929720878601, "learning_rate": 1.3386053974004414e-05, "loss": 0.0662, "step": 7325 }, { "epoch": 1.23, "grad_norm": 0.5036056637763977, "learning_rate": 1.3384354864914757e-05, "loss": 0.072, "step": 7326 }, { "epoch": 1.23, "grad_norm": 0.5169892311096191, "learning_rate": 1.3382655645473924e-05, "loss": 0.0568, "step": 7327 }, { "epoch": 1.23, "grad_norm": 0.4779205918312073, "learning_rate": 1.3380956315737309e-05, "loss": 0.0662, "step": 7328 }, { "epoch": 1.23, "grad_norm": 0.3937470018863678, "learning_rate": 1.3379256875760328e-05, "loss": 0.0633, "step": 7329 }, { "epoch": 1.23, "grad_norm": 0.5595902800559998, "learning_rate": 1.3377557325598392e-05, "loss": 0.0596, "step": 7330 }, { "epoch": 1.23, "grad_norm": 0.6185951232910156, "learning_rate": 1.3375857665306922e-05, "loss": 0.0514, "step": 7331 }, { "epoch": 1.23, "grad_norm": 0.6730614900588989, "learning_rate": 1.3374157894941326e-05, "loss": 0.0494, "step": 7332 }, { "epoch": 1.23, "grad_norm": 0.5520564913749695, "learning_rate": 1.337245801455704e-05, "loss": 0.0597, "step": 7333 }, { "epoch": 1.23, "grad_norm": 0.5071619153022766, "learning_rate": 1.3370758024209483e-05, "loss": 0.0665, "step": 7334 }, { "epoch": 1.23, "grad_norm": 0.4144783616065979, "learning_rate": 1.3369057923954087e-05, "loss": 0.0528, "step": 7335 }, { "epoch": 1.23, "grad_norm": 0.4706340432167053, "learning_rate": 1.3367357713846284e-05, "loss": 0.0509, "step": 7336 }, { "epoch": 1.23, "grad_norm": 0.4808157980442047, "learning_rate": 1.3365657393941514e-05, "loss": 0.0755, "step": 7337 }, { "epoch": 1.23, "grad_norm": 0.35910072922706604, "learning_rate": 1.3363956964295222e-05, "loss": 0.0549, "step": 7338 }, { "epoch": 1.23, "grad_norm": 0.5934259295463562, "learning_rate": 1.3362256424962845e-05, "loss": 0.0685, "step": 7339 }, { "epoch": 1.23, "grad_norm": 0.6900262832641602, "learning_rate": 1.3360555775999835e-05, "loss": 0.0631, "step": 7340 }, { "epoch": 1.23, "grad_norm": 0.4596231281757355, "learning_rate": 1.3358855017461642e-05, "loss": 0.0532, "step": 7341 }, { "epoch": 1.23, "grad_norm": 0.8194257616996765, "learning_rate": 1.3357154149403724e-05, "loss": 0.0799, "step": 7342 }, { "epoch": 1.23, "grad_norm": 0.6128559708595276, "learning_rate": 1.3355453171881541e-05, "loss": 0.0615, "step": 7343 }, { "epoch": 1.23, "grad_norm": 0.6732853055000305, "learning_rate": 1.3353752084950552e-05, "loss": 0.0628, "step": 7344 }, { "epoch": 1.23, "grad_norm": 0.6414886116981506, "learning_rate": 1.3352050888666221e-05, "loss": 0.065, "step": 7345 }, { "epoch": 1.23, "grad_norm": 0.4199504852294922, "learning_rate": 1.3350349583084024e-05, "loss": 0.048, "step": 7346 }, { "epoch": 1.23, "grad_norm": 0.522854745388031, "learning_rate": 1.334864816825943e-05, "loss": 0.0651, "step": 7347 }, { "epoch": 1.23, "grad_norm": 0.5415330529212952, "learning_rate": 1.3346946644247918e-05, "loss": 0.0663, "step": 7348 }, { "epoch": 1.23, "grad_norm": 0.43826955556869507, "learning_rate": 1.3345245011104968e-05, "loss": 0.0486, "step": 7349 }, { "epoch": 1.23, "grad_norm": 0.5592243075370789, "learning_rate": 1.3343543268886063e-05, "loss": 0.065, "step": 7350 }, { "epoch": 1.23, "grad_norm": 0.458609402179718, "learning_rate": 1.334184141764669e-05, "loss": 0.0582, "step": 7351 }, { "epoch": 1.23, "grad_norm": 0.5691761374473572, "learning_rate": 1.334013945744234e-05, "loss": 0.0751, "step": 7352 }, { "epoch": 1.23, "grad_norm": 0.4932524859905243, "learning_rate": 1.333843738832851e-05, "loss": 0.0659, "step": 7353 }, { "epoch": 1.23, "grad_norm": 0.44450056552886963, "learning_rate": 1.3336735210360696e-05, "loss": 0.0504, "step": 7354 }, { "epoch": 1.23, "grad_norm": 0.6853430867195129, "learning_rate": 1.3335032923594401e-05, "loss": 0.0848, "step": 7355 }, { "epoch": 1.23, "grad_norm": 0.5600168108940125, "learning_rate": 1.3333330528085129e-05, "loss": 0.0609, "step": 7356 }, { "epoch": 1.23, "grad_norm": 0.457927942276001, "learning_rate": 1.3331628023888389e-05, "loss": 0.0429, "step": 7357 }, { "epoch": 1.23, "grad_norm": 0.8729307055473328, "learning_rate": 1.332992541105969e-05, "loss": 0.0679, "step": 7358 }, { "epoch": 1.23, "grad_norm": 0.5529678463935852, "learning_rate": 1.3328222689654557e-05, "loss": 0.0518, "step": 7359 }, { "epoch": 1.23, "grad_norm": 0.47548842430114746, "learning_rate": 1.33265198597285e-05, "loss": 0.0642, "step": 7360 }, { "epoch": 1.23, "grad_norm": 0.4406042695045471, "learning_rate": 1.3324816921337048e-05, "loss": 0.0545, "step": 7361 }, { "epoch": 1.23, "grad_norm": 0.41975605487823486, "learning_rate": 1.3323113874535723e-05, "loss": 0.039, "step": 7362 }, { "epoch": 1.23, "grad_norm": 0.6732774972915649, "learning_rate": 1.3321410719380059e-05, "loss": 0.0847, "step": 7363 }, { "epoch": 1.23, "grad_norm": 0.6232834458351135, "learning_rate": 1.3319707455925584e-05, "loss": 0.0663, "step": 7364 }, { "epoch": 1.23, "grad_norm": 0.4211186468601227, "learning_rate": 1.331800408422784e-05, "loss": 0.0589, "step": 7365 }, { "epoch": 1.23, "grad_norm": 0.5168102979660034, "learning_rate": 1.331630060434237e-05, "loss": 0.056, "step": 7366 }, { "epoch": 1.23, "grad_norm": 0.5591400265693665, "learning_rate": 1.3314597016324712e-05, "loss": 0.0469, "step": 7367 }, { "epoch": 1.23, "grad_norm": 0.5135408043861389, "learning_rate": 1.3312893320230416e-05, "loss": 0.0636, "step": 7368 }, { "epoch": 1.23, "grad_norm": 0.6445445418357849, "learning_rate": 1.331118951611503e-05, "loss": 0.0569, "step": 7369 }, { "epoch": 1.23, "grad_norm": 0.5850862264633179, "learning_rate": 1.3309485604034115e-05, "loss": 0.082, "step": 7370 }, { "epoch": 1.23, "grad_norm": 0.5081186890602112, "learning_rate": 1.3307781584043227e-05, "loss": 0.0768, "step": 7371 }, { "epoch": 1.23, "grad_norm": 0.4147856533527374, "learning_rate": 1.3306077456197925e-05, "loss": 0.0586, "step": 7372 }, { "epoch": 1.23, "grad_norm": 0.49492743611335754, "learning_rate": 1.3304373220553776e-05, "loss": 0.0714, "step": 7373 }, { "epoch": 1.23, "grad_norm": 0.4742443561553955, "learning_rate": 1.330266887716635e-05, "loss": 0.0414, "step": 7374 }, { "epoch": 1.23, "grad_norm": 0.4059319496154785, "learning_rate": 1.3300964426091218e-05, "loss": 0.073, "step": 7375 }, { "epoch": 1.23, "grad_norm": 0.4513067603111267, "learning_rate": 1.3299259867383955e-05, "loss": 0.0621, "step": 7376 }, { "epoch": 1.23, "grad_norm": 0.5565862059593201, "learning_rate": 1.3297555201100143e-05, "loss": 0.0634, "step": 7377 }, { "epoch": 1.23, "grad_norm": 0.3722238838672638, "learning_rate": 1.3295850427295363e-05, "loss": 0.0493, "step": 7378 }, { "epoch": 1.23, "grad_norm": 0.6436963677406311, "learning_rate": 1.3294145546025199e-05, "loss": 0.0655, "step": 7379 }, { "epoch": 1.23, "grad_norm": 0.5297313332557678, "learning_rate": 1.3292440557345243e-05, "loss": 0.0638, "step": 7380 }, { "epoch": 1.23, "grad_norm": 0.5470246076583862, "learning_rate": 1.3290735461311091e-05, "loss": 0.0568, "step": 7381 }, { "epoch": 1.23, "grad_norm": 0.4891255497932434, "learning_rate": 1.3289030257978338e-05, "loss": 0.0636, "step": 7382 }, { "epoch": 1.23, "grad_norm": 0.7458849549293518, "learning_rate": 1.3287324947402582e-05, "loss": 0.0734, "step": 7383 }, { "epoch": 1.23, "grad_norm": 0.5199945569038391, "learning_rate": 1.3285619529639426e-05, "loss": 0.0575, "step": 7384 }, { "epoch": 1.24, "grad_norm": 0.4970249533653259, "learning_rate": 1.3283914004744482e-05, "loss": 0.0679, "step": 7385 }, { "epoch": 1.24, "grad_norm": 0.562528133392334, "learning_rate": 1.3282208372773357e-05, "loss": 0.0636, "step": 7386 }, { "epoch": 1.24, "grad_norm": 0.4519347846508026, "learning_rate": 1.328050263378167e-05, "loss": 0.0549, "step": 7387 }, { "epoch": 1.24, "grad_norm": 0.522493302822113, "learning_rate": 1.3278796787825032e-05, "loss": 0.051, "step": 7388 }, { "epoch": 1.24, "grad_norm": 0.4660842716693878, "learning_rate": 1.327709083495907e-05, "loss": 0.0575, "step": 7389 }, { "epoch": 1.24, "grad_norm": 0.5335837006568909, "learning_rate": 1.3275384775239408e-05, "loss": 0.0473, "step": 7390 }, { "epoch": 1.24, "grad_norm": 0.5527163743972778, "learning_rate": 1.3273678608721672e-05, "loss": 0.0603, "step": 7391 }, { "epoch": 1.24, "grad_norm": 0.5378372073173523, "learning_rate": 1.3271972335461491e-05, "loss": 0.0732, "step": 7392 }, { "epoch": 1.24, "grad_norm": 0.39197221398353577, "learning_rate": 1.3270265955514505e-05, "loss": 0.0496, "step": 7393 }, { "epoch": 1.24, "grad_norm": 0.5604230165481567, "learning_rate": 1.3268559468936355e-05, "loss": 0.062, "step": 7394 }, { "epoch": 1.24, "grad_norm": 0.6290794610977173, "learning_rate": 1.3266852875782679e-05, "loss": 0.0646, "step": 7395 }, { "epoch": 1.24, "grad_norm": 0.4695087671279907, "learning_rate": 1.326514617610912e-05, "loss": 0.062, "step": 7396 }, { "epoch": 1.24, "grad_norm": 0.5900958776473999, "learning_rate": 1.3263439369971334e-05, "loss": 0.0744, "step": 7397 }, { "epoch": 1.24, "grad_norm": 0.37485384941101074, "learning_rate": 1.3261732457424967e-05, "loss": 0.0541, "step": 7398 }, { "epoch": 1.24, "grad_norm": 1.0103923082351685, "learning_rate": 1.3260025438525679e-05, "loss": 0.0693, "step": 7399 }, { "epoch": 1.24, "grad_norm": 0.4728626310825348, "learning_rate": 1.3258318313329128e-05, "loss": 0.0535, "step": 7400 }, { "epoch": 1.24, "grad_norm": 0.45194441080093384, "learning_rate": 1.325661108189098e-05, "loss": 0.0562, "step": 7401 }, { "epoch": 1.24, "grad_norm": 0.6499832272529602, "learning_rate": 1.3254903744266896e-05, "loss": 0.0662, "step": 7402 }, { "epoch": 1.24, "grad_norm": 0.5929144620895386, "learning_rate": 1.3253196300512549e-05, "loss": 0.0624, "step": 7403 }, { "epoch": 1.24, "grad_norm": 0.446404367685318, "learning_rate": 1.3251488750683614e-05, "loss": 0.0461, "step": 7404 }, { "epoch": 1.24, "grad_norm": 0.7159644961357117, "learning_rate": 1.3249781094835765e-05, "loss": 0.0772, "step": 7405 }, { "epoch": 1.24, "grad_norm": 0.8847960233688354, "learning_rate": 1.3248073333024682e-05, "loss": 0.0672, "step": 7406 }, { "epoch": 1.24, "grad_norm": 0.5280494689941406, "learning_rate": 1.3246365465306052e-05, "loss": 0.0672, "step": 7407 }, { "epoch": 1.24, "grad_norm": 0.6731426119804382, "learning_rate": 1.324465749173556e-05, "loss": 0.0701, "step": 7408 }, { "epoch": 1.24, "grad_norm": 0.6736473441123962, "learning_rate": 1.3242949412368897e-05, "loss": 0.056, "step": 7409 }, { "epoch": 1.24, "grad_norm": 0.596412181854248, "learning_rate": 1.3241241227261757e-05, "loss": 0.0642, "step": 7410 }, { "epoch": 1.24, "grad_norm": 0.5640788674354553, "learning_rate": 1.3239532936469838e-05, "loss": 0.0568, "step": 7411 }, { "epoch": 1.24, "grad_norm": 0.52156662940979, "learning_rate": 1.3237824540048838e-05, "loss": 0.0659, "step": 7412 }, { "epoch": 1.24, "grad_norm": 0.42711424827575684, "learning_rate": 1.3236116038054465e-05, "loss": 0.0565, "step": 7413 }, { "epoch": 1.24, "grad_norm": 0.5711327791213989, "learning_rate": 1.3234407430542426e-05, "loss": 0.0562, "step": 7414 }, { "epoch": 1.24, "grad_norm": 0.5813998579978943, "learning_rate": 1.3232698717568434e-05, "loss": 0.0667, "step": 7415 }, { "epoch": 1.24, "grad_norm": 0.46757009625434875, "learning_rate": 1.32309898991882e-05, "loss": 0.0723, "step": 7416 }, { "epoch": 1.24, "grad_norm": 0.5380082726478577, "learning_rate": 1.3229280975457444e-05, "loss": 0.0521, "step": 7417 }, { "epoch": 1.24, "grad_norm": 0.5379630327224731, "learning_rate": 1.322757194643189e-05, "loss": 0.0511, "step": 7418 }, { "epoch": 1.24, "grad_norm": 0.6007052063941956, "learning_rate": 1.3225862812167258e-05, "loss": 0.0485, "step": 7419 }, { "epoch": 1.24, "grad_norm": 0.7095043063163757, "learning_rate": 1.322415357271928e-05, "loss": 0.0763, "step": 7420 }, { "epoch": 1.24, "grad_norm": 0.3705938160419464, "learning_rate": 1.3222444228143687e-05, "loss": 0.0424, "step": 7421 }, { "epoch": 1.24, "grad_norm": 0.37290945649147034, "learning_rate": 1.3220734778496218e-05, "loss": 0.0542, "step": 7422 }, { "epoch": 1.24, "grad_norm": 0.5241116881370544, "learning_rate": 1.3219025223832605e-05, "loss": 0.0495, "step": 7423 }, { "epoch": 1.24, "grad_norm": 0.46601802110671997, "learning_rate": 1.3217315564208593e-05, "loss": 0.0565, "step": 7424 }, { "epoch": 1.24, "grad_norm": 0.42040905356407166, "learning_rate": 1.3215605799679929e-05, "loss": 0.0438, "step": 7425 }, { "epoch": 1.24, "grad_norm": 0.5947714447975159, "learning_rate": 1.3213895930302361e-05, "loss": 0.0679, "step": 7426 }, { "epoch": 1.24, "grad_norm": 0.41570064425468445, "learning_rate": 1.321218595613164e-05, "loss": 0.0465, "step": 7427 }, { "epoch": 1.24, "grad_norm": 0.4730706512928009, "learning_rate": 1.3210475877223526e-05, "loss": 0.0709, "step": 7428 }, { "epoch": 1.24, "grad_norm": 0.5348201990127563, "learning_rate": 1.3208765693633777e-05, "loss": 0.0712, "step": 7429 }, { "epoch": 1.24, "grad_norm": 0.4380843937397003, "learning_rate": 1.320705540541815e-05, "loss": 0.0612, "step": 7430 }, { "epoch": 1.24, "grad_norm": 0.5449928045272827, "learning_rate": 1.3205345012632417e-05, "loss": 0.0565, "step": 7431 }, { "epoch": 1.24, "grad_norm": 0.4488467276096344, "learning_rate": 1.3203634515332348e-05, "loss": 0.0651, "step": 7432 }, { "epoch": 1.24, "grad_norm": 0.6449933648109436, "learning_rate": 1.3201923913573714e-05, "loss": 0.0589, "step": 7433 }, { "epoch": 1.24, "grad_norm": 0.7256874442100525, "learning_rate": 1.3200213207412289e-05, "loss": 0.0632, "step": 7434 }, { "epoch": 1.24, "grad_norm": 0.6255992650985718, "learning_rate": 1.3198502396903857e-05, "loss": 0.0571, "step": 7435 }, { "epoch": 1.24, "grad_norm": 0.5897459387779236, "learning_rate": 1.3196791482104198e-05, "loss": 0.0797, "step": 7436 }, { "epoch": 1.24, "grad_norm": 0.6272991895675659, "learning_rate": 1.3195080463069101e-05, "loss": 0.0668, "step": 7437 }, { "epoch": 1.24, "grad_norm": 0.7265517711639404, "learning_rate": 1.3193369339854353e-05, "loss": 0.0762, "step": 7438 }, { "epoch": 1.24, "grad_norm": 0.7366159558296204, "learning_rate": 1.3191658112515753e-05, "loss": 0.064, "step": 7439 }, { "epoch": 1.24, "grad_norm": 0.5985684394836426, "learning_rate": 1.3189946781109091e-05, "loss": 0.082, "step": 7440 }, { "epoch": 1.24, "grad_norm": 0.5760094523429871, "learning_rate": 1.3188235345690171e-05, "loss": 0.0602, "step": 7441 }, { "epoch": 1.24, "grad_norm": 0.6359381675720215, "learning_rate": 1.3186523806314792e-05, "loss": 0.0465, "step": 7442 }, { "epoch": 1.24, "grad_norm": 0.4539739191532135, "learning_rate": 1.318481216303877e-05, "loss": 0.041, "step": 7443 }, { "epoch": 1.25, "grad_norm": 0.5976839065551758, "learning_rate": 1.3183100415917908e-05, "loss": 0.0548, "step": 7444 }, { "epoch": 1.25, "grad_norm": 0.4415128231048584, "learning_rate": 1.3181388565008018e-05, "loss": 0.0488, "step": 7445 }, { "epoch": 1.25, "grad_norm": 0.5171676278114319, "learning_rate": 1.3179676610364925e-05, "loss": 0.0686, "step": 7446 }, { "epoch": 1.25, "grad_norm": 0.4854256510734558, "learning_rate": 1.3177964552044441e-05, "loss": 0.0714, "step": 7447 }, { "epoch": 1.25, "grad_norm": 0.4193466007709503, "learning_rate": 1.3176252390102398e-05, "loss": 0.0542, "step": 7448 }, { "epoch": 1.25, "grad_norm": 0.44523897767066956, "learning_rate": 1.3174540124594613e-05, "loss": 0.058, "step": 7449 }, { "epoch": 1.25, "grad_norm": 0.4483901262283325, "learning_rate": 1.3172827755576929e-05, "loss": 0.056, "step": 7450 }, { "epoch": 1.25, "grad_norm": 0.5312241911888123, "learning_rate": 1.317111528310517e-05, "loss": 0.0821, "step": 7451 }, { "epoch": 1.25, "grad_norm": 0.4513856768608093, "learning_rate": 1.3169402707235177e-05, "loss": 0.0557, "step": 7452 }, { "epoch": 1.25, "grad_norm": 0.6003806591033936, "learning_rate": 1.3167690028022788e-05, "loss": 0.0492, "step": 7453 }, { "epoch": 1.25, "grad_norm": 0.5756558179855347, "learning_rate": 1.3165977245523856e-05, "loss": 0.0684, "step": 7454 }, { "epoch": 1.25, "grad_norm": 0.643236517906189, "learning_rate": 1.3164264359794219e-05, "loss": 0.0838, "step": 7455 }, { "epoch": 1.25, "grad_norm": 0.49302035570144653, "learning_rate": 1.316255137088973e-05, "loss": 0.0591, "step": 7456 }, { "epoch": 1.25, "grad_norm": 0.48447781801223755, "learning_rate": 1.3160838278866243e-05, "loss": 0.0547, "step": 7457 }, { "epoch": 1.25, "grad_norm": 0.47309523820877075, "learning_rate": 1.315912508377962e-05, "loss": 0.0413, "step": 7458 }, { "epoch": 1.25, "grad_norm": 0.7400217652320862, "learning_rate": 1.3157411785685716e-05, "loss": 0.0601, "step": 7459 }, { "epoch": 1.25, "grad_norm": 0.37129876017570496, "learning_rate": 1.3155698384640399e-05, "loss": 0.0485, "step": 7460 }, { "epoch": 1.25, "grad_norm": 0.40084972977638245, "learning_rate": 1.3153984880699533e-05, "loss": 0.0584, "step": 7461 }, { "epoch": 1.25, "grad_norm": 0.5048534870147705, "learning_rate": 1.3152271273918995e-05, "loss": 0.0519, "step": 7462 }, { "epoch": 1.25, "grad_norm": 0.5583838224411011, "learning_rate": 1.315055756435465e-05, "loss": 0.0519, "step": 7463 }, { "epoch": 1.25, "grad_norm": 0.5510520339012146, "learning_rate": 1.3148843752062388e-05, "loss": 0.0646, "step": 7464 }, { "epoch": 1.25, "grad_norm": 0.44410449266433716, "learning_rate": 1.314712983709808e-05, "loss": 0.0505, "step": 7465 }, { "epoch": 1.25, "grad_norm": 0.40613552927970886, "learning_rate": 1.3145415819517613e-05, "loss": 0.0514, "step": 7466 }, { "epoch": 1.25, "grad_norm": 0.5680087208747864, "learning_rate": 1.3143701699376878e-05, "loss": 0.0609, "step": 7467 }, { "epoch": 1.25, "grad_norm": 0.4582246243953705, "learning_rate": 1.314198747673176e-05, "loss": 0.0593, "step": 7468 }, { "epoch": 1.25, "grad_norm": 0.5105573534965515, "learning_rate": 1.314027315163816e-05, "loss": 0.0657, "step": 7469 }, { "epoch": 1.25, "grad_norm": 0.646385133266449, "learning_rate": 1.3138558724151973e-05, "loss": 0.0769, "step": 7470 }, { "epoch": 1.25, "grad_norm": 0.6158478856086731, "learning_rate": 1.3136844194329099e-05, "loss": 0.0569, "step": 7471 }, { "epoch": 1.25, "grad_norm": 0.5542858242988586, "learning_rate": 1.3135129562225444e-05, "loss": 0.0546, "step": 7472 }, { "epoch": 1.25, "grad_norm": 0.4129881262779236, "learning_rate": 1.3133414827896911e-05, "loss": 0.0474, "step": 7473 }, { "epoch": 1.25, "grad_norm": 0.5328545570373535, "learning_rate": 1.3131699991399415e-05, "loss": 0.063, "step": 7474 }, { "epoch": 1.25, "grad_norm": 0.514453113079071, "learning_rate": 1.3129985052788876e-05, "loss": 0.0671, "step": 7475 }, { "epoch": 1.25, "grad_norm": 0.3411675691604614, "learning_rate": 1.3128270012121203e-05, "loss": 0.0482, "step": 7476 }, { "epoch": 1.25, "grad_norm": 0.4176224172115326, "learning_rate": 1.3126554869452317e-05, "loss": 0.0511, "step": 7477 }, { "epoch": 1.25, "grad_norm": 0.45240986347198486, "learning_rate": 1.3124839624838153e-05, "loss": 0.0547, "step": 7478 }, { "epoch": 1.25, "grad_norm": 0.4959847331047058, "learning_rate": 1.3123124278334627e-05, "loss": 0.0497, "step": 7479 }, { "epoch": 1.25, "grad_norm": 0.5898889899253845, "learning_rate": 1.3121408829997674e-05, "loss": 0.0572, "step": 7480 }, { "epoch": 1.25, "grad_norm": 0.3930796682834625, "learning_rate": 1.3119693279883228e-05, "loss": 0.0483, "step": 7481 }, { "epoch": 1.25, "grad_norm": 0.4629996716976166, "learning_rate": 1.311797762804723e-05, "loss": 0.0477, "step": 7482 }, { "epoch": 1.25, "grad_norm": 0.5123606324195862, "learning_rate": 1.3116261874545618e-05, "loss": 0.0513, "step": 7483 }, { "epoch": 1.25, "grad_norm": 0.48408252000808716, "learning_rate": 1.3114546019434336e-05, "loss": 0.063, "step": 7484 }, { "epoch": 1.25, "grad_norm": 0.6589263677597046, "learning_rate": 1.311283006276933e-05, "loss": 0.0664, "step": 7485 }, { "epoch": 1.25, "grad_norm": 0.569564700126648, "learning_rate": 1.3111114004606557e-05, "loss": 0.0533, "step": 7486 }, { "epoch": 1.25, "grad_norm": 0.5178940296173096, "learning_rate": 1.3109397845001966e-05, "loss": 0.0745, "step": 7487 }, { "epoch": 1.25, "grad_norm": 0.5864221453666687, "learning_rate": 1.3107681584011515e-05, "loss": 0.0658, "step": 7488 }, { "epoch": 1.25, "grad_norm": 0.6831367015838623, "learning_rate": 1.3105965221691167e-05, "loss": 0.0706, "step": 7489 }, { "epoch": 1.25, "grad_norm": 0.4364023208618164, "learning_rate": 1.3104248758096887e-05, "loss": 0.0472, "step": 7490 }, { "epoch": 1.25, "grad_norm": 0.499379962682724, "learning_rate": 1.3102532193284641e-05, "loss": 0.0595, "step": 7491 }, { "epoch": 1.25, "grad_norm": 0.5454065203666687, "learning_rate": 1.3100815527310396e-05, "loss": 0.0831, "step": 7492 }, { "epoch": 1.25, "grad_norm": 0.6548221707344055, "learning_rate": 1.3099098760230131e-05, "loss": 0.0636, "step": 7493 }, { "epoch": 1.25, "grad_norm": 0.5449154376983643, "learning_rate": 1.3097381892099824e-05, "loss": 0.0533, "step": 7494 }, { "epoch": 1.25, "grad_norm": 0.5378542542457581, "learning_rate": 1.309566492297545e-05, "loss": 0.0628, "step": 7495 }, { "epoch": 1.25, "grad_norm": 0.6555647850036621, "learning_rate": 1.3093947852912998e-05, "loss": 0.0465, "step": 7496 }, { "epoch": 1.25, "grad_norm": 0.48893916606903076, "learning_rate": 1.3092230681968454e-05, "loss": 0.0642, "step": 7497 }, { "epoch": 1.25, "grad_norm": 0.6023610234260559, "learning_rate": 1.3090513410197809e-05, "loss": 0.047, "step": 7498 }, { "epoch": 1.25, "grad_norm": 0.34924235939979553, "learning_rate": 1.3088796037657057e-05, "loss": 0.0464, "step": 7499 }, { "epoch": 1.25, "grad_norm": 0.5683792233467102, "learning_rate": 1.3087078564402193e-05, "loss": 0.0616, "step": 7500 }, { "epoch": 1.25, "grad_norm": 0.6065294146537781, "learning_rate": 1.3085360990489216e-05, "loss": 0.0578, "step": 7501 }, { "epoch": 1.25, "grad_norm": 0.6028105616569519, "learning_rate": 1.3083643315974135e-05, "loss": 0.0647, "step": 7502 }, { "epoch": 1.25, "grad_norm": 0.6425749659538269, "learning_rate": 1.3081925540912954e-05, "loss": 0.0598, "step": 7503 }, { "epoch": 1.26, "grad_norm": 0.48373734951019287, "learning_rate": 1.3080207665361685e-05, "loss": 0.0471, "step": 7504 }, { "epoch": 1.26, "grad_norm": 0.5918623208999634, "learning_rate": 1.3078489689376335e-05, "loss": 0.074, "step": 7505 }, { "epoch": 1.26, "grad_norm": 0.4160202741622925, "learning_rate": 1.307677161301293e-05, "loss": 0.0509, "step": 7506 }, { "epoch": 1.26, "grad_norm": 0.4278768002986908, "learning_rate": 1.3075053436327485e-05, "loss": 0.0541, "step": 7507 }, { "epoch": 1.26, "grad_norm": 0.6101424098014832, "learning_rate": 1.3073335159376023e-05, "loss": 0.0573, "step": 7508 }, { "epoch": 1.26, "grad_norm": 0.7130849957466125, "learning_rate": 1.3071616782214572e-05, "loss": 0.062, "step": 7509 }, { "epoch": 1.26, "grad_norm": 0.813059389591217, "learning_rate": 1.3069898304899162e-05, "loss": 0.0583, "step": 7510 }, { "epoch": 1.26, "grad_norm": 0.859765350818634, "learning_rate": 1.3068179727485828e-05, "loss": 0.0897, "step": 7511 }, { "epoch": 1.26, "grad_norm": 0.6134911179542542, "learning_rate": 1.3066461050030602e-05, "loss": 0.0437, "step": 7512 }, { "epoch": 1.26, "grad_norm": 0.7631625533103943, "learning_rate": 1.3064742272589526e-05, "loss": 0.0626, "step": 7513 }, { "epoch": 1.26, "grad_norm": 0.6849133372306824, "learning_rate": 1.306302339521864e-05, "loss": 0.0583, "step": 7514 }, { "epoch": 1.26, "grad_norm": 0.6339712738990784, "learning_rate": 1.3061304417973997e-05, "loss": 0.0685, "step": 7515 }, { "epoch": 1.26, "grad_norm": 0.6529701948165894, "learning_rate": 1.305958534091164e-05, "loss": 0.0765, "step": 7516 }, { "epoch": 1.26, "grad_norm": 0.5686367154121399, "learning_rate": 1.3057866164087625e-05, "loss": 0.0565, "step": 7517 }, { "epoch": 1.26, "grad_norm": 0.5022217035293579, "learning_rate": 1.3056146887558008e-05, "loss": 0.0503, "step": 7518 }, { "epoch": 1.26, "grad_norm": 0.6475876569747925, "learning_rate": 1.3054427511378844e-05, "loss": 0.0792, "step": 7519 }, { "epoch": 1.26, "grad_norm": 0.4838431179523468, "learning_rate": 1.30527080356062e-05, "loss": 0.0689, "step": 7520 }, { "epoch": 1.26, "grad_norm": 0.5492844581604004, "learning_rate": 1.3050988460296141e-05, "loss": 0.067, "step": 7521 }, { "epoch": 1.26, "grad_norm": 0.5508440732955933, "learning_rate": 1.3049268785504736e-05, "loss": 0.0767, "step": 7522 }, { "epoch": 1.26, "grad_norm": 0.535625159740448, "learning_rate": 1.3047549011288055e-05, "loss": 0.057, "step": 7523 }, { "epoch": 1.26, "grad_norm": 0.43082743883132935, "learning_rate": 1.3045829137702177e-05, "loss": 0.0622, "step": 7524 }, { "epoch": 1.26, "grad_norm": 0.6291815638542175, "learning_rate": 1.3044109164803174e-05, "loss": 0.0611, "step": 7525 }, { "epoch": 1.26, "grad_norm": 0.46929293870925903, "learning_rate": 1.3042389092647136e-05, "loss": 0.0574, "step": 7526 }, { "epoch": 1.26, "grad_norm": 0.5003916621208191, "learning_rate": 1.3040668921290145e-05, "loss": 0.0598, "step": 7527 }, { "epoch": 1.26, "grad_norm": 0.519425630569458, "learning_rate": 1.3038948650788288e-05, "loss": 0.0719, "step": 7528 }, { "epoch": 1.26, "grad_norm": 1.1925753355026245, "learning_rate": 1.3037228281197653e-05, "loss": 0.073, "step": 7529 }, { "epoch": 1.26, "grad_norm": 0.7235745191574097, "learning_rate": 1.3035507812574344e-05, "loss": 0.0693, "step": 7530 }, { "epoch": 1.26, "grad_norm": 0.5081969499588013, "learning_rate": 1.3033787244974454e-05, "loss": 0.0582, "step": 7531 }, { "epoch": 1.26, "grad_norm": 0.48501503467559814, "learning_rate": 1.3032066578454084e-05, "loss": 0.0575, "step": 7532 }, { "epoch": 1.26, "grad_norm": 0.6685150265693665, "learning_rate": 1.3030345813069336e-05, "loss": 0.0726, "step": 7533 }, { "epoch": 1.26, "grad_norm": 1.1432359218597412, "learning_rate": 1.3028624948876328e-05, "loss": 0.054, "step": 7534 }, { "epoch": 1.26, "grad_norm": 0.5690010786056519, "learning_rate": 1.3026903985931159e-05, "loss": 0.0669, "step": 7535 }, { "epoch": 1.26, "grad_norm": 0.5149436593055725, "learning_rate": 1.302518292428995e-05, "loss": 0.0587, "step": 7536 }, { "epoch": 1.26, "grad_norm": 0.37634921073913574, "learning_rate": 1.302346176400881e-05, "loss": 0.0484, "step": 7537 }, { "epoch": 1.26, "grad_norm": 0.6309657096862793, "learning_rate": 1.3021740505143875e-05, "loss": 0.0466, "step": 7538 }, { "epoch": 1.26, "grad_norm": 0.5404759645462036, "learning_rate": 1.3020019147751256e-05, "loss": 0.0516, "step": 7539 }, { "epoch": 1.26, "grad_norm": 0.5225015878677368, "learning_rate": 1.3018297691887085e-05, "loss": 0.0559, "step": 7540 }, { "epoch": 1.26, "grad_norm": 1.1197623014450073, "learning_rate": 1.3016576137607491e-05, "loss": 0.0741, "step": 7541 }, { "epoch": 1.26, "grad_norm": 0.4999673664569855, "learning_rate": 1.3014854484968606e-05, "loss": 0.0605, "step": 7542 }, { "epoch": 1.26, "grad_norm": 0.5563116669654846, "learning_rate": 1.3013132734026568e-05, "loss": 0.059, "step": 7543 }, { "epoch": 1.26, "grad_norm": 0.6739795804023743, "learning_rate": 1.3011410884837518e-05, "loss": 0.0556, "step": 7544 }, { "epoch": 1.26, "grad_norm": 0.6923438906669617, "learning_rate": 1.3009688937457598e-05, "loss": 0.08, "step": 7545 }, { "epoch": 1.26, "grad_norm": 0.6051527261734009, "learning_rate": 1.3007966891942956e-05, "loss": 0.0583, "step": 7546 }, { "epoch": 1.26, "grad_norm": 0.5152933597564697, "learning_rate": 1.300624474834974e-05, "loss": 0.0822, "step": 7547 }, { "epoch": 1.26, "grad_norm": 0.5978091359138489, "learning_rate": 1.30045225067341e-05, "loss": 0.0565, "step": 7548 }, { "epoch": 1.26, "grad_norm": 0.5653769373893738, "learning_rate": 1.3002800167152196e-05, "loss": 0.0476, "step": 7549 }, { "epoch": 1.26, "grad_norm": 0.5044312477111816, "learning_rate": 1.3001077729660185e-05, "loss": 0.0609, "step": 7550 }, { "epoch": 1.26, "grad_norm": 0.5809903144836426, "learning_rate": 1.2999355194314232e-05, "loss": 0.0642, "step": 7551 }, { "epoch": 1.26, "grad_norm": 0.5901470184326172, "learning_rate": 1.29976325611705e-05, "loss": 0.0601, "step": 7552 }, { "epoch": 1.26, "grad_norm": 0.5664169788360596, "learning_rate": 1.2995909830285154e-05, "loss": 0.0748, "step": 7553 }, { "epoch": 1.26, "grad_norm": 0.3435176610946655, "learning_rate": 1.2994187001714374e-05, "loss": 0.0384, "step": 7554 }, { "epoch": 1.26, "grad_norm": 0.6286701560020447, "learning_rate": 1.299246407551433e-05, "loss": 0.0692, "step": 7555 }, { "epoch": 1.26, "grad_norm": 0.7475573420524597, "learning_rate": 1.29907410517412e-05, "loss": 0.0752, "step": 7556 }, { "epoch": 1.26, "grad_norm": 0.7143480181694031, "learning_rate": 1.2989017930451165e-05, "loss": 0.0834, "step": 7557 }, { "epoch": 1.26, "grad_norm": 0.5777961611747742, "learning_rate": 1.2987294711700412e-05, "loss": 0.066, "step": 7558 }, { "epoch": 1.26, "grad_norm": 0.6834333539009094, "learning_rate": 1.298557139554513e-05, "loss": 0.0753, "step": 7559 }, { "epoch": 1.26, "grad_norm": 0.40881913900375366, "learning_rate": 1.2983847982041506e-05, "loss": 0.052, "step": 7560 }, { "epoch": 1.26, "grad_norm": 0.483687162399292, "learning_rate": 1.2982124471245735e-05, "loss": 0.0422, "step": 7561 }, { "epoch": 1.26, "grad_norm": 0.5834571719169617, "learning_rate": 1.2980400863214016e-05, "loss": 0.0677, "step": 7562 }, { "epoch": 1.26, "grad_norm": 0.42007941007614136, "learning_rate": 1.2978677158002546e-05, "loss": 0.0463, "step": 7563 }, { "epoch": 1.27, "grad_norm": 0.38086986541748047, "learning_rate": 1.2976953355667531e-05, "loss": 0.0446, "step": 7564 }, { "epoch": 1.27, "grad_norm": 0.5780743360519409, "learning_rate": 1.297522945626518e-05, "loss": 0.0457, "step": 7565 }, { "epoch": 1.27, "grad_norm": 0.3593830466270447, "learning_rate": 1.2973505459851701e-05, "loss": 0.0499, "step": 7566 }, { "epoch": 1.27, "grad_norm": 0.6968755125999451, "learning_rate": 1.2971781366483306e-05, "loss": 0.082, "step": 7567 }, { "epoch": 1.27, "grad_norm": 0.49535536766052246, "learning_rate": 1.2970057176216208e-05, "loss": 0.0497, "step": 7568 }, { "epoch": 1.27, "grad_norm": 0.5556076169013977, "learning_rate": 1.2968332889106634e-05, "loss": 0.0599, "step": 7569 }, { "epoch": 1.27, "grad_norm": 0.36815279722213745, "learning_rate": 1.2966608505210803e-05, "loss": 0.0512, "step": 7570 }, { "epoch": 1.27, "grad_norm": 0.9102540016174316, "learning_rate": 1.2964884024584938e-05, "loss": 0.0653, "step": 7571 }, { "epoch": 1.27, "grad_norm": 0.6064486503601074, "learning_rate": 1.2963159447285275e-05, "loss": 0.0626, "step": 7572 }, { "epoch": 1.27, "grad_norm": 0.5403134822845459, "learning_rate": 1.2961434773368037e-05, "loss": 0.0446, "step": 7573 }, { "epoch": 1.27, "grad_norm": 0.45877158641815186, "learning_rate": 1.2959710002889464e-05, "loss": 0.056, "step": 7574 }, { "epoch": 1.27, "grad_norm": 0.49401766061782837, "learning_rate": 1.2957985135905797e-05, "loss": 0.0645, "step": 7575 }, { "epoch": 1.27, "grad_norm": 0.38285475969314575, "learning_rate": 1.2956260172473276e-05, "loss": 0.06, "step": 7576 }, { "epoch": 1.27, "grad_norm": 0.4687710404396057, "learning_rate": 1.2954535112648138e-05, "loss": 0.0601, "step": 7577 }, { "epoch": 1.27, "grad_norm": 0.6427228450775146, "learning_rate": 1.295280995648664e-05, "loss": 0.0546, "step": 7578 }, { "epoch": 1.27, "grad_norm": 0.4876479208469391, "learning_rate": 1.295108470404503e-05, "loss": 0.0597, "step": 7579 }, { "epoch": 1.27, "grad_norm": 0.4728674590587616, "learning_rate": 1.2949359355379566e-05, "loss": 0.0408, "step": 7580 }, { "epoch": 1.27, "grad_norm": 0.5068570971488953, "learning_rate": 1.2947633910546493e-05, "loss": 0.0619, "step": 7581 }, { "epoch": 1.27, "grad_norm": 0.4483289420604706, "learning_rate": 1.2945908369602082e-05, "loss": 0.0582, "step": 7582 }, { "epoch": 1.27, "grad_norm": 0.4680456221103668, "learning_rate": 1.2944182732602597e-05, "loss": 0.0534, "step": 7583 }, { "epoch": 1.27, "grad_norm": 0.4784029722213745, "learning_rate": 1.2942456999604301e-05, "loss": 0.0481, "step": 7584 }, { "epoch": 1.27, "grad_norm": 0.5990853309631348, "learning_rate": 1.294073117066346e-05, "loss": 0.0573, "step": 7585 }, { "epoch": 1.27, "grad_norm": 0.48237308859825134, "learning_rate": 1.2939005245836354e-05, "loss": 0.0504, "step": 7586 }, { "epoch": 1.27, "grad_norm": 0.459577351808548, "learning_rate": 1.2937279225179255e-05, "loss": 0.0464, "step": 7587 }, { "epoch": 1.27, "grad_norm": 0.7713369727134705, "learning_rate": 1.2935553108748444e-05, "loss": 0.0646, "step": 7588 }, { "epoch": 1.27, "grad_norm": 0.5324008464813232, "learning_rate": 1.29338268966002e-05, "loss": 0.049, "step": 7589 }, { "epoch": 1.27, "grad_norm": 0.5532823204994202, "learning_rate": 1.2932100588790812e-05, "loss": 0.0736, "step": 7590 }, { "epoch": 1.27, "grad_norm": 0.5156381726264954, "learning_rate": 1.2930374185376567e-05, "loss": 0.057, "step": 7591 }, { "epoch": 1.27, "grad_norm": 0.4876042604446411, "learning_rate": 1.2928647686413758e-05, "loss": 0.0665, "step": 7592 }, { "epoch": 1.27, "grad_norm": 0.41544896364212036, "learning_rate": 1.2926921091958673e-05, "loss": 0.041, "step": 7593 }, { "epoch": 1.27, "grad_norm": 0.4858100414276123, "learning_rate": 1.292519440206762e-05, "loss": 0.0563, "step": 7594 }, { "epoch": 1.27, "grad_norm": 0.4916757643222809, "learning_rate": 1.2923467616796894e-05, "loss": 0.0542, "step": 7595 }, { "epoch": 1.27, "grad_norm": 0.5854223370552063, "learning_rate": 1.2921740736202799e-05, "loss": 0.0593, "step": 7596 }, { "epoch": 1.27, "grad_norm": 0.6676183342933655, "learning_rate": 1.2920013760341643e-05, "loss": 0.0583, "step": 7597 }, { "epoch": 1.27, "grad_norm": 0.4178782105445862, "learning_rate": 1.2918286689269737e-05, "loss": 0.0486, "step": 7598 }, { "epoch": 1.27, "grad_norm": 0.5138430595397949, "learning_rate": 1.2916559523043394e-05, "loss": 0.0592, "step": 7599 }, { "epoch": 1.27, "grad_norm": 0.48897692561149597, "learning_rate": 1.291483226171893e-05, "loss": 0.0498, "step": 7600 }, { "epoch": 1.27, "grad_norm": 0.6706858277320862, "learning_rate": 1.2913104905352664e-05, "loss": 0.0733, "step": 7601 }, { "epoch": 1.27, "grad_norm": 0.3611277639865875, "learning_rate": 1.291137745400092e-05, "loss": 0.0389, "step": 7602 }, { "epoch": 1.27, "grad_norm": 0.4858829081058502, "learning_rate": 1.2909649907720024e-05, "loss": 0.0694, "step": 7603 }, { "epoch": 1.27, "grad_norm": 0.40906888246536255, "learning_rate": 1.2907922266566304e-05, "loss": 0.0629, "step": 7604 }, { "epoch": 1.27, "grad_norm": 0.6617217063903809, "learning_rate": 1.2906194530596087e-05, "loss": 0.0687, "step": 7605 }, { "epoch": 1.27, "grad_norm": 0.5037770867347717, "learning_rate": 1.2904466699865718e-05, "loss": 0.047, "step": 7606 }, { "epoch": 1.27, "grad_norm": 0.4635170102119446, "learning_rate": 1.2902738774431529e-05, "loss": 0.0636, "step": 7607 }, { "epoch": 1.27, "grad_norm": 0.4178662896156311, "learning_rate": 1.2901010754349863e-05, "loss": 0.0526, "step": 7608 }, { "epoch": 1.27, "grad_norm": 0.4325566291809082, "learning_rate": 1.289928263967706e-05, "loss": 0.0456, "step": 7609 }, { "epoch": 1.27, "grad_norm": 0.672701895236969, "learning_rate": 1.2897554430469471e-05, "loss": 0.0692, "step": 7610 }, { "epoch": 1.27, "grad_norm": 0.4687395691871643, "learning_rate": 1.289582612678345e-05, "loss": 0.0656, "step": 7611 }, { "epoch": 1.27, "grad_norm": 0.6580632328987122, "learning_rate": 1.2894097728675345e-05, "loss": 0.0789, "step": 7612 }, { "epoch": 1.27, "grad_norm": 0.42587095499038696, "learning_rate": 1.2892369236201511e-05, "loss": 0.047, "step": 7613 }, { "epoch": 1.27, "grad_norm": 0.5397363305091858, "learning_rate": 1.2890640649418315e-05, "loss": 0.057, "step": 7614 }, { "epoch": 1.27, "grad_norm": 0.4749283194541931, "learning_rate": 1.288891196838212e-05, "loss": 0.0671, "step": 7615 }, { "epoch": 1.27, "grad_norm": 0.5161576867103577, "learning_rate": 1.2887183193149281e-05, "loss": 0.0684, "step": 7616 }, { "epoch": 1.27, "grad_norm": 0.5440675616264343, "learning_rate": 1.2885454323776174e-05, "loss": 0.0545, "step": 7617 }, { "epoch": 1.27, "grad_norm": 0.4582787752151489, "learning_rate": 1.2883725360319173e-05, "loss": 0.0558, "step": 7618 }, { "epoch": 1.27, "grad_norm": 0.40511059761047363, "learning_rate": 1.288199630283465e-05, "loss": 0.0574, "step": 7619 }, { "epoch": 1.27, "grad_norm": 0.5471593737602234, "learning_rate": 1.2880267151378983e-05, "loss": 0.0596, "step": 7620 }, { "epoch": 1.27, "grad_norm": 0.5582206845283508, "learning_rate": 1.2878537906008553e-05, "loss": 0.0665, "step": 7621 }, { "epoch": 1.27, "grad_norm": 0.3682096600532532, "learning_rate": 1.2876808566779749e-05, "loss": 0.0337, "step": 7622 }, { "epoch": 1.27, "grad_norm": 0.45928794145584106, "learning_rate": 1.2875079133748951e-05, "loss": 0.0492, "step": 7623 }, { "epoch": 1.28, "grad_norm": 0.43022316694259644, "learning_rate": 1.2873349606972555e-05, "loss": 0.06, "step": 7624 }, { "epoch": 1.28, "grad_norm": 0.5256895422935486, "learning_rate": 1.2871619986506948e-05, "loss": 0.069, "step": 7625 }, { "epoch": 1.28, "grad_norm": 0.8006190657615662, "learning_rate": 1.2869890272408535e-05, "loss": 0.0771, "step": 7626 }, { "epoch": 1.28, "grad_norm": 0.6077194809913635, "learning_rate": 1.286816046473371e-05, "loss": 0.0762, "step": 7627 }, { "epoch": 1.28, "grad_norm": 0.6019928455352783, "learning_rate": 1.2866430563538877e-05, "loss": 0.0645, "step": 7628 }, { "epoch": 1.28, "grad_norm": 0.5063906908035278, "learning_rate": 1.2864700568880442e-05, "loss": 0.0615, "step": 7629 }, { "epoch": 1.28, "grad_norm": 0.7338858842849731, "learning_rate": 1.2862970480814812e-05, "loss": 0.0631, "step": 7630 }, { "epoch": 1.28, "grad_norm": 0.570659875869751, "learning_rate": 1.2861240299398399e-05, "loss": 0.0679, "step": 7631 }, { "epoch": 1.28, "grad_norm": 0.7721654772758484, "learning_rate": 1.285951002468762e-05, "loss": 0.0758, "step": 7632 }, { "epoch": 1.28, "grad_norm": 0.3504045307636261, "learning_rate": 1.285777965673889e-05, "loss": 0.0379, "step": 7633 }, { "epoch": 1.28, "grad_norm": 0.6340557932853699, "learning_rate": 1.2856049195608633e-05, "loss": 0.0637, "step": 7634 }, { "epoch": 1.28, "grad_norm": 0.4320738911628723, "learning_rate": 1.2854318641353269e-05, "loss": 0.0451, "step": 7635 }, { "epoch": 1.28, "grad_norm": 0.497223824262619, "learning_rate": 1.2852587994029231e-05, "loss": 0.0549, "step": 7636 }, { "epoch": 1.28, "grad_norm": 0.6086704730987549, "learning_rate": 1.2850857253692939e-05, "loss": 0.0716, "step": 7637 }, { "epoch": 1.28, "grad_norm": 0.4809199869632721, "learning_rate": 1.2849126420400836e-05, "loss": 0.0569, "step": 7638 }, { "epoch": 1.28, "grad_norm": 0.4122658967971802, "learning_rate": 1.2847395494209353e-05, "loss": 0.0667, "step": 7639 }, { "epoch": 1.28, "grad_norm": 0.49861061573028564, "learning_rate": 1.284566447517493e-05, "loss": 0.0536, "step": 7640 }, { "epoch": 1.28, "grad_norm": 0.521878719329834, "learning_rate": 1.2843933363354006e-05, "loss": 0.0644, "step": 7641 }, { "epoch": 1.28, "grad_norm": 0.4123518764972687, "learning_rate": 1.2842202158803034e-05, "loss": 0.0588, "step": 7642 }, { "epoch": 1.28, "grad_norm": 0.6320269703865051, "learning_rate": 1.2840470861578456e-05, "loss": 0.0844, "step": 7643 }, { "epoch": 1.28, "grad_norm": 0.4572911858558655, "learning_rate": 1.2838739471736722e-05, "loss": 0.0557, "step": 7644 }, { "epoch": 1.28, "grad_norm": 0.4245533049106598, "learning_rate": 1.283700798933429e-05, "loss": 0.0438, "step": 7645 }, { "epoch": 1.28, "grad_norm": 0.5050250887870789, "learning_rate": 1.283527641442762e-05, "loss": 0.0443, "step": 7646 }, { "epoch": 1.28, "grad_norm": 0.6124039888381958, "learning_rate": 1.2833544747073163e-05, "loss": 0.0529, "step": 7647 }, { "epoch": 1.28, "grad_norm": 1.1454603672027588, "learning_rate": 1.2831812987327388e-05, "loss": 0.054, "step": 7648 }, { "epoch": 1.28, "grad_norm": 0.523140013217926, "learning_rate": 1.2830081135246758e-05, "loss": 0.0667, "step": 7649 }, { "epoch": 1.28, "grad_norm": 0.8365749716758728, "learning_rate": 1.2828349190887748e-05, "loss": 0.0654, "step": 7650 }, { "epoch": 1.28, "grad_norm": 0.4929184913635254, "learning_rate": 1.2826617154306826e-05, "loss": 0.0516, "step": 7651 }, { "epoch": 1.28, "grad_norm": 0.6378390789031982, "learning_rate": 1.282488502556047e-05, "loss": 0.0641, "step": 7652 }, { "epoch": 1.28, "grad_norm": 0.5125830173492432, "learning_rate": 1.2823152804705152e-05, "loss": 0.0799, "step": 7653 }, { "epoch": 1.28, "grad_norm": 0.4624755382537842, "learning_rate": 1.282142049179736e-05, "loss": 0.0582, "step": 7654 }, { "epoch": 1.28, "grad_norm": 0.5348767638206482, "learning_rate": 1.2819688086893575e-05, "loss": 0.0815, "step": 7655 }, { "epoch": 1.28, "grad_norm": 0.4877147972583771, "learning_rate": 1.2817955590050288e-05, "loss": 0.0532, "step": 7656 }, { "epoch": 1.28, "grad_norm": 0.4224388599395752, "learning_rate": 1.2816223001323984e-05, "loss": 0.0556, "step": 7657 }, { "epoch": 1.28, "grad_norm": 0.6109165549278259, "learning_rate": 1.2814490320771159e-05, "loss": 0.0922, "step": 7658 }, { "epoch": 1.28, "grad_norm": 0.7045662999153137, "learning_rate": 1.2812757548448307e-05, "loss": 0.0896, "step": 7659 }, { "epoch": 1.28, "grad_norm": 0.518150806427002, "learning_rate": 1.2811024684411934e-05, "loss": 0.0478, "step": 7660 }, { "epoch": 1.28, "grad_norm": 0.566704273223877, "learning_rate": 1.2809291728718531e-05, "loss": 0.0447, "step": 7661 }, { "epoch": 1.28, "grad_norm": 0.6851274371147156, "learning_rate": 1.2807558681424614e-05, "loss": 0.0723, "step": 7662 }, { "epoch": 1.28, "grad_norm": 0.6815763711929321, "learning_rate": 1.2805825542586685e-05, "loss": 0.0623, "step": 7663 }, { "epoch": 1.28, "grad_norm": 0.5966147780418396, "learning_rate": 1.280409231226126e-05, "loss": 0.0804, "step": 7664 }, { "epoch": 1.28, "grad_norm": 0.5697107911109924, "learning_rate": 1.2802358990504846e-05, "loss": 0.0539, "step": 7665 }, { "epoch": 1.28, "grad_norm": 0.5776801705360413, "learning_rate": 1.2800625577373965e-05, "loss": 0.0678, "step": 7666 }, { "epoch": 1.28, "grad_norm": 0.4667714536190033, "learning_rate": 1.2798892072925139e-05, "loss": 0.0429, "step": 7667 }, { "epoch": 1.28, "grad_norm": 0.44119346141815186, "learning_rate": 1.2797158477214886e-05, "loss": 0.053, "step": 7668 }, { "epoch": 1.28, "grad_norm": 0.44481730461120605, "learning_rate": 1.2795424790299733e-05, "loss": 0.0424, "step": 7669 }, { "epoch": 1.28, "grad_norm": 0.672797679901123, "learning_rate": 1.2793691012236214e-05, "loss": 0.0483, "step": 7670 }, { "epoch": 1.28, "grad_norm": 0.5897740721702576, "learning_rate": 1.279195714308086e-05, "loss": 0.0716, "step": 7671 }, { "epoch": 1.28, "grad_norm": 0.6260491609573364, "learning_rate": 1.27902231828902e-05, "loss": 0.0472, "step": 7672 }, { "epoch": 1.28, "grad_norm": 0.4547666609287262, "learning_rate": 1.2788489131720773e-05, "loss": 0.0627, "step": 7673 }, { "epoch": 1.28, "grad_norm": 0.411746084690094, "learning_rate": 1.2786754989629128e-05, "loss": 0.0492, "step": 7674 }, { "epoch": 1.28, "grad_norm": 0.5228908658027649, "learning_rate": 1.2785020756671803e-05, "loss": 0.0509, "step": 7675 }, { "epoch": 1.28, "grad_norm": 0.3733367323875427, "learning_rate": 1.2783286432905343e-05, "loss": 0.0569, "step": 7676 }, { "epoch": 1.28, "grad_norm": 0.6637449264526367, "learning_rate": 1.2781552018386299e-05, "loss": 0.0714, "step": 7677 }, { "epoch": 1.28, "grad_norm": 0.6378380656242371, "learning_rate": 1.2779817513171229e-05, "loss": 0.0586, "step": 7678 }, { "epoch": 1.28, "grad_norm": 0.4806610941886902, "learning_rate": 1.2778082917316681e-05, "loss": 0.064, "step": 7679 }, { "epoch": 1.28, "grad_norm": 0.649725079536438, "learning_rate": 1.2776348230879219e-05, "loss": 0.0748, "step": 7680 }, { "epoch": 1.28, "grad_norm": 1.4965300559997559, "learning_rate": 1.2774613453915404e-05, "loss": 0.0841, "step": 7681 }, { "epoch": 1.28, "grad_norm": 0.6268951296806335, "learning_rate": 1.2772878586481797e-05, "loss": 0.0856, "step": 7682 }, { "epoch": 1.28, "grad_norm": 0.4309549033641815, "learning_rate": 1.277114362863497e-05, "loss": 0.0359, "step": 7683 }, { "epoch": 1.29, "grad_norm": 0.4031537175178528, "learning_rate": 1.2769408580431492e-05, "loss": 0.0393, "step": 7684 }, { "epoch": 1.29, "grad_norm": 0.6913952827453613, "learning_rate": 1.2767673441927935e-05, "loss": 0.0673, "step": 7685 }, { "epoch": 1.29, "grad_norm": 0.5622918009757996, "learning_rate": 1.2765938213180878e-05, "loss": 0.0706, "step": 7686 }, { "epoch": 1.29, "grad_norm": 0.5012783408164978, "learning_rate": 1.2764202894246897e-05, "loss": 0.0644, "step": 7687 }, { "epoch": 1.29, "grad_norm": 0.6250683069229126, "learning_rate": 1.2762467485182581e-05, "loss": 0.0681, "step": 7688 }, { "epoch": 1.29, "grad_norm": 0.39394038915634155, "learning_rate": 1.2760731986044503e-05, "loss": 0.0491, "step": 7689 }, { "epoch": 1.29, "grad_norm": 0.45179927349090576, "learning_rate": 1.2758996396889263e-05, "loss": 0.0573, "step": 7690 }, { "epoch": 1.29, "grad_norm": 0.5414849519729614, "learning_rate": 1.2757260717773447e-05, "loss": 0.0642, "step": 7691 }, { "epoch": 1.29, "grad_norm": 0.4677949547767639, "learning_rate": 1.2755524948753653e-05, "loss": 0.0531, "step": 7692 }, { "epoch": 1.29, "grad_norm": 0.493691623210907, "learning_rate": 1.2753789089886469e-05, "loss": 0.0512, "step": 7693 }, { "epoch": 1.29, "grad_norm": 0.4890846312046051, "learning_rate": 1.2752053141228503e-05, "loss": 0.0644, "step": 7694 }, { "epoch": 1.29, "grad_norm": 0.5074313879013062, "learning_rate": 1.2750317102836356e-05, "loss": 0.0677, "step": 7695 }, { "epoch": 1.29, "grad_norm": 0.5528688430786133, "learning_rate": 1.2748580974766631e-05, "loss": 0.0475, "step": 7696 }, { "epoch": 1.29, "grad_norm": 0.47119730710983276, "learning_rate": 1.2746844757075937e-05, "loss": 0.0505, "step": 7697 }, { "epoch": 1.29, "grad_norm": 0.49141812324523926, "learning_rate": 1.2745108449820892e-05, "loss": 0.0569, "step": 7698 }, { "epoch": 1.29, "grad_norm": 0.72337406873703, "learning_rate": 1.2743372053058102e-05, "loss": 0.0796, "step": 7699 }, { "epoch": 1.29, "grad_norm": 0.592301070690155, "learning_rate": 1.2741635566844193e-05, "loss": 0.0647, "step": 7700 }, { "epoch": 1.29, "grad_norm": 0.35299959778785706, "learning_rate": 1.2739898991235773e-05, "loss": 0.0579, "step": 7701 }, { "epoch": 1.29, "grad_norm": 0.5126475095748901, "learning_rate": 1.2738162326289479e-05, "loss": 0.0591, "step": 7702 }, { "epoch": 1.29, "grad_norm": 0.7823171019554138, "learning_rate": 1.273642557206193e-05, "loss": 0.0708, "step": 7703 }, { "epoch": 1.29, "grad_norm": 0.5180416703224182, "learning_rate": 1.2734688728609754e-05, "loss": 0.0487, "step": 7704 }, { "epoch": 1.29, "grad_norm": 0.5222670435905457, "learning_rate": 1.2732951795989586e-05, "loss": 0.0627, "step": 7705 }, { "epoch": 1.29, "grad_norm": 0.5897996425628662, "learning_rate": 1.2731214774258058e-05, "loss": 0.0586, "step": 7706 }, { "epoch": 1.29, "grad_norm": 0.4154928922653198, "learning_rate": 1.2729477663471812e-05, "loss": 0.0596, "step": 7707 }, { "epoch": 1.29, "grad_norm": 0.6898928284645081, "learning_rate": 1.2727740463687485e-05, "loss": 0.0686, "step": 7708 }, { "epoch": 1.29, "grad_norm": 0.4429704546928406, "learning_rate": 1.2726003174961723e-05, "loss": 0.0632, "step": 7709 }, { "epoch": 1.29, "grad_norm": 0.6140855550765991, "learning_rate": 1.2724265797351172e-05, "loss": 0.0564, "step": 7710 }, { "epoch": 1.29, "grad_norm": 0.6432837247848511, "learning_rate": 1.2722528330912478e-05, "loss": 0.0627, "step": 7711 }, { "epoch": 1.29, "grad_norm": 0.568675696849823, "learning_rate": 1.2720790775702298e-05, "loss": 0.0444, "step": 7712 }, { "epoch": 1.29, "grad_norm": 0.49013209342956543, "learning_rate": 1.2719053131777286e-05, "loss": 0.061, "step": 7713 }, { "epoch": 1.29, "grad_norm": 0.36357346177101135, "learning_rate": 1.2717315399194099e-05, "loss": 0.043, "step": 7714 }, { "epoch": 1.29, "grad_norm": 0.4347198009490967, "learning_rate": 1.2715577578009397e-05, "loss": 0.051, "step": 7715 }, { "epoch": 1.29, "grad_norm": 0.478471040725708, "learning_rate": 1.2713839668279847e-05, "loss": 0.0574, "step": 7716 }, { "epoch": 1.29, "grad_norm": 0.4034862220287323, "learning_rate": 1.2712101670062114e-05, "loss": 0.0712, "step": 7717 }, { "epoch": 1.29, "grad_norm": 0.4540654420852661, "learning_rate": 1.2710363583412865e-05, "loss": 0.0415, "step": 7718 }, { "epoch": 1.29, "grad_norm": 0.6077934503555298, "learning_rate": 1.2708625408388778e-05, "loss": 0.0541, "step": 7719 }, { "epoch": 1.29, "grad_norm": 0.47396910190582275, "learning_rate": 1.2706887145046526e-05, "loss": 0.053, "step": 7720 }, { "epoch": 1.29, "grad_norm": 0.40444159507751465, "learning_rate": 1.2705148793442783e-05, "loss": 0.0505, "step": 7721 }, { "epoch": 1.29, "grad_norm": 0.5304534435272217, "learning_rate": 1.2703410353634238e-05, "loss": 0.068, "step": 7722 }, { "epoch": 1.29, "grad_norm": 0.5196669101715088, "learning_rate": 1.2701671825677571e-05, "loss": 0.0634, "step": 7723 }, { "epoch": 1.29, "grad_norm": 0.3764400780200958, "learning_rate": 1.2699933209629467e-05, "loss": 0.0474, "step": 7724 }, { "epoch": 1.29, "grad_norm": 0.6246241927146912, "learning_rate": 1.2698194505546617e-05, "loss": 0.0629, "step": 7725 }, { "epoch": 1.29, "grad_norm": 0.48527762293815613, "learning_rate": 1.2696455713485714e-05, "loss": 0.057, "step": 7726 }, { "epoch": 1.29, "grad_norm": 0.39025646448135376, "learning_rate": 1.2694716833503457e-05, "loss": 0.0499, "step": 7727 }, { "epoch": 1.29, "grad_norm": 0.4348011016845703, "learning_rate": 1.2692977865656541e-05, "loss": 0.0474, "step": 7728 }, { "epoch": 1.29, "grad_norm": 0.4248596131801605, "learning_rate": 1.2691238810001665e-05, "loss": 0.0534, "step": 7729 }, { "epoch": 1.29, "grad_norm": 0.415626585483551, "learning_rate": 1.2689499666595538e-05, "loss": 0.0615, "step": 7730 }, { "epoch": 1.29, "grad_norm": 0.44255688786506653, "learning_rate": 1.2687760435494863e-05, "loss": 0.0478, "step": 7731 }, { "epoch": 1.29, "grad_norm": 0.49685293436050415, "learning_rate": 1.2686021116756352e-05, "loss": 0.0564, "step": 7732 }, { "epoch": 1.29, "grad_norm": 0.41712698340415955, "learning_rate": 1.2684281710436717e-05, "loss": 0.0501, "step": 7733 }, { "epoch": 1.29, "grad_norm": 0.6968013644218445, "learning_rate": 1.2682542216592675e-05, "loss": 0.0615, "step": 7734 }, { "epoch": 1.29, "grad_norm": 0.4600709080696106, "learning_rate": 1.268080263528094e-05, "loss": 0.0526, "step": 7735 }, { "epoch": 1.29, "grad_norm": 0.47196024656295776, "learning_rate": 1.267906296655824e-05, "loss": 0.0468, "step": 7736 }, { "epoch": 1.29, "grad_norm": 0.42369645833969116, "learning_rate": 1.2677323210481292e-05, "loss": 0.0704, "step": 7737 }, { "epoch": 1.29, "grad_norm": 0.4290551543235779, "learning_rate": 1.2675583367106827e-05, "loss": 0.0511, "step": 7738 }, { "epoch": 1.29, "grad_norm": 0.4174307882785797, "learning_rate": 1.2673843436491578e-05, "loss": 0.0537, "step": 7739 }, { "epoch": 1.29, "grad_norm": 0.7822008728981018, "learning_rate": 1.267210341869227e-05, "loss": 0.0488, "step": 7740 }, { "epoch": 1.29, "grad_norm": 0.4818059504032135, "learning_rate": 1.2670363313765644e-05, "loss": 0.0485, "step": 7741 }, { "epoch": 1.29, "grad_norm": 0.7978203892707825, "learning_rate": 1.2668623121768436e-05, "loss": 0.0577, "step": 7742 }, { "epoch": 1.3, "grad_norm": 0.4431880712509155, "learning_rate": 1.2666882842757387e-05, "loss": 0.0557, "step": 7743 }, { "epoch": 1.3, "grad_norm": 0.4714873135089874, "learning_rate": 1.2665142476789245e-05, "loss": 0.0659, "step": 7744 }, { "epoch": 1.3, "grad_norm": 0.6776067614555359, "learning_rate": 1.2663402023920748e-05, "loss": 0.0669, "step": 7745 }, { "epoch": 1.3, "grad_norm": 0.3131354749202728, "learning_rate": 1.2661661484208655e-05, "loss": 0.046, "step": 7746 }, { "epoch": 1.3, "grad_norm": 0.5983402729034424, "learning_rate": 1.2659920857709715e-05, "loss": 0.0667, "step": 7747 }, { "epoch": 1.3, "grad_norm": 0.6004834771156311, "learning_rate": 1.2658180144480685e-05, "loss": 0.0714, "step": 7748 }, { "epoch": 1.3, "grad_norm": 0.8000513315200806, "learning_rate": 1.2656439344578317e-05, "loss": 0.0643, "step": 7749 }, { "epoch": 1.3, "grad_norm": 0.5489891171455383, "learning_rate": 1.2654698458059377e-05, "loss": 0.0663, "step": 7750 }, { "epoch": 1.3, "grad_norm": 0.8830916881561279, "learning_rate": 1.2652957484980633e-05, "loss": 0.0704, "step": 7751 }, { "epoch": 1.3, "grad_norm": 0.43864864110946655, "learning_rate": 1.2651216425398842e-05, "loss": 0.0598, "step": 7752 }, { "epoch": 1.3, "grad_norm": 0.46474015712738037, "learning_rate": 1.264947527937078e-05, "loss": 0.0689, "step": 7753 }, { "epoch": 1.3, "grad_norm": 0.6001889705657959, "learning_rate": 1.2647734046953217e-05, "loss": 0.0569, "step": 7754 }, { "epoch": 1.3, "grad_norm": 0.7771832942962646, "learning_rate": 1.264599272820293e-05, "loss": 0.0504, "step": 7755 }, { "epoch": 1.3, "grad_norm": 0.5420470833778381, "learning_rate": 1.2644251323176694e-05, "loss": 0.05, "step": 7756 }, { "epoch": 1.3, "grad_norm": 0.6769436001777649, "learning_rate": 1.264250983193129e-05, "loss": 0.066, "step": 7757 }, { "epoch": 1.3, "grad_norm": 0.5183398127555847, "learning_rate": 1.2640768254523506e-05, "loss": 0.0564, "step": 7758 }, { "epoch": 1.3, "grad_norm": 0.4131152927875519, "learning_rate": 1.2639026591010122e-05, "loss": 0.0443, "step": 7759 }, { "epoch": 1.3, "grad_norm": 0.5551892518997192, "learning_rate": 1.2637284841447932e-05, "loss": 0.0746, "step": 7760 }, { "epoch": 1.3, "grad_norm": 0.5246230363845825, "learning_rate": 1.2635543005893727e-05, "loss": 0.0674, "step": 7761 }, { "epoch": 1.3, "grad_norm": 0.45449352264404297, "learning_rate": 1.2633801084404297e-05, "loss": 0.0565, "step": 7762 }, { "epoch": 1.3, "grad_norm": 0.5532888770103455, "learning_rate": 1.2632059077036447e-05, "loss": 0.0513, "step": 7763 }, { "epoch": 1.3, "grad_norm": 0.5133354663848877, "learning_rate": 1.2630316983846973e-05, "loss": 0.0448, "step": 7764 }, { "epoch": 1.3, "grad_norm": 0.6189525723457336, "learning_rate": 1.2628574804892679e-05, "loss": 0.0526, "step": 7765 }, { "epoch": 1.3, "grad_norm": 0.42121127247810364, "learning_rate": 1.2626832540230372e-05, "loss": 0.0519, "step": 7766 }, { "epoch": 1.3, "grad_norm": 0.5887957215309143, "learning_rate": 1.2625090189916858e-05, "loss": 0.0558, "step": 7767 }, { "epoch": 1.3, "grad_norm": 0.3748084306716919, "learning_rate": 1.262334775400895e-05, "loss": 0.0486, "step": 7768 }, { "epoch": 1.3, "grad_norm": 0.4392770230770111, "learning_rate": 1.2621605232563463e-05, "loss": 0.0626, "step": 7769 }, { "epoch": 1.3, "grad_norm": 0.47843366861343384, "learning_rate": 1.2619862625637214e-05, "loss": 0.0519, "step": 7770 }, { "epoch": 1.3, "grad_norm": 0.5273039937019348, "learning_rate": 1.2618119933287023e-05, "loss": 0.0678, "step": 7771 }, { "epoch": 1.3, "grad_norm": 0.6600030660629272, "learning_rate": 1.2616377155569714e-05, "loss": 0.0486, "step": 7772 }, { "epoch": 1.3, "grad_norm": 0.5152909755706787, "learning_rate": 1.2614634292542108e-05, "loss": 0.0589, "step": 7773 }, { "epoch": 1.3, "grad_norm": 0.5000320672988892, "learning_rate": 1.2612891344261039e-05, "loss": 0.0655, "step": 7774 }, { "epoch": 1.3, "grad_norm": 0.4556547999382019, "learning_rate": 1.261114831078333e-05, "loss": 0.0469, "step": 7775 }, { "epoch": 1.3, "grad_norm": 0.4014377295970917, "learning_rate": 1.2609405192165826e-05, "loss": 0.0588, "step": 7776 }, { "epoch": 1.3, "grad_norm": 0.5478550791740417, "learning_rate": 1.2607661988465352e-05, "loss": 0.0623, "step": 7777 }, { "epoch": 1.3, "grad_norm": 0.7688347101211548, "learning_rate": 1.2605918699738754e-05, "loss": 0.0634, "step": 7778 }, { "epoch": 1.3, "grad_norm": 0.40327826142311096, "learning_rate": 1.2604175326042879e-05, "loss": 0.0412, "step": 7779 }, { "epoch": 1.3, "grad_norm": 0.4557494819164276, "learning_rate": 1.2602431867434559e-05, "loss": 0.0439, "step": 7780 }, { "epoch": 1.3, "grad_norm": 0.8193127512931824, "learning_rate": 1.2600688323970651e-05, "loss": 0.0588, "step": 7781 }, { "epoch": 1.3, "grad_norm": 0.48869413137435913, "learning_rate": 1.2598944695708002e-05, "loss": 0.0594, "step": 7782 }, { "epoch": 1.3, "grad_norm": 0.7515615820884705, "learning_rate": 1.2597200982703468e-05, "loss": 0.0774, "step": 7783 }, { "epoch": 1.3, "grad_norm": 0.871547281742096, "learning_rate": 1.2595457185013904e-05, "loss": 0.0759, "step": 7784 }, { "epoch": 1.3, "grad_norm": 0.4957602918148041, "learning_rate": 1.2593713302696165e-05, "loss": 0.0503, "step": 7785 }, { "epoch": 1.3, "grad_norm": 0.5724624991416931, "learning_rate": 1.2591969335807118e-05, "loss": 0.0548, "step": 7786 }, { "epoch": 1.3, "grad_norm": 0.506576657295227, "learning_rate": 1.2590225284403623e-05, "loss": 0.0702, "step": 7787 }, { "epoch": 1.3, "grad_norm": 0.34222477674484253, "learning_rate": 1.2588481148542548e-05, "loss": 0.0282, "step": 7788 }, { "epoch": 1.3, "grad_norm": 0.6656916737556458, "learning_rate": 1.2586736928280765e-05, "loss": 0.0611, "step": 7789 }, { "epoch": 1.3, "grad_norm": 0.5078103542327881, "learning_rate": 1.2584992623675145e-05, "loss": 0.0731, "step": 7790 }, { "epoch": 1.3, "grad_norm": 0.6193680763244629, "learning_rate": 1.2583248234782564e-05, "loss": 0.0668, "step": 7791 }, { "epoch": 1.3, "grad_norm": 0.41912466287612915, "learning_rate": 1.2581503761659896e-05, "loss": 0.0426, "step": 7792 }, { "epoch": 1.3, "grad_norm": 0.5997406244277954, "learning_rate": 1.2579759204364028e-05, "loss": 0.0594, "step": 7793 }, { "epoch": 1.3, "grad_norm": 0.5348281264305115, "learning_rate": 1.257801456295184e-05, "loss": 0.0402, "step": 7794 }, { "epoch": 1.3, "grad_norm": 0.5550340414047241, "learning_rate": 1.2576269837480217e-05, "loss": 0.0614, "step": 7795 }, { "epoch": 1.3, "grad_norm": 0.4672113358974457, "learning_rate": 1.2574525028006053e-05, "loss": 0.0509, "step": 7796 }, { "epoch": 1.3, "grad_norm": 0.5006173253059387, "learning_rate": 1.2572780134586233e-05, "loss": 0.0674, "step": 7797 }, { "epoch": 1.3, "grad_norm": 0.655039370059967, "learning_rate": 1.2571035157277658e-05, "loss": 0.0786, "step": 7798 }, { "epoch": 1.3, "grad_norm": 0.5632697343826294, "learning_rate": 1.2569290096137219e-05, "loss": 0.0533, "step": 7799 }, { "epoch": 1.3, "grad_norm": 0.4030951261520386, "learning_rate": 1.2567544951221823e-05, "loss": 0.0459, "step": 7800 }, { "epoch": 1.3, "grad_norm": 0.7332925796508789, "learning_rate": 1.2565799722588366e-05, "loss": 0.066, "step": 7801 }, { "epoch": 1.3, "grad_norm": 0.5863486528396606, "learning_rate": 1.2564054410293755e-05, "loss": 0.061, "step": 7802 }, { "epoch": 1.31, "grad_norm": 0.5061216950416565, "learning_rate": 1.2562309014394903e-05, "loss": 0.0568, "step": 7803 }, { "epoch": 1.31, "grad_norm": 0.6454516649246216, "learning_rate": 1.2560563534948715e-05, "loss": 0.0691, "step": 7804 }, { "epoch": 1.31, "grad_norm": 0.375744104385376, "learning_rate": 1.2558817972012105e-05, "loss": 0.0484, "step": 7805 }, { "epoch": 1.31, "grad_norm": 0.573937177658081, "learning_rate": 1.2557072325641992e-05, "loss": 0.0635, "step": 7806 }, { "epoch": 1.31, "grad_norm": 0.43851038813591003, "learning_rate": 1.2555326595895296e-05, "loss": 0.0574, "step": 7807 }, { "epoch": 1.31, "grad_norm": 0.47960758209228516, "learning_rate": 1.2553580782828936e-05, "loss": 0.0415, "step": 7808 }, { "epoch": 1.31, "grad_norm": 0.5971444249153137, "learning_rate": 1.2551834886499835e-05, "loss": 0.0456, "step": 7809 }, { "epoch": 1.31, "grad_norm": 0.6535817980766296, "learning_rate": 1.2550088906964924e-05, "loss": 0.0745, "step": 7810 }, { "epoch": 1.31, "grad_norm": 0.480947345495224, "learning_rate": 1.2548342844281132e-05, "loss": 0.0563, "step": 7811 }, { "epoch": 1.31, "grad_norm": 0.5554484724998474, "learning_rate": 1.2546596698505388e-05, "loss": 0.0479, "step": 7812 }, { "epoch": 1.31, "grad_norm": 1.011070728302002, "learning_rate": 1.2544850469694632e-05, "loss": 0.08, "step": 7813 }, { "epoch": 1.31, "grad_norm": 0.6536530256271362, "learning_rate": 1.2543104157905801e-05, "loss": 0.0548, "step": 7814 }, { "epoch": 1.31, "grad_norm": 0.4269697666168213, "learning_rate": 1.2541357763195832e-05, "loss": 0.0587, "step": 7815 }, { "epoch": 1.31, "grad_norm": 0.41948968172073364, "learning_rate": 1.2539611285621673e-05, "loss": 0.0531, "step": 7816 }, { "epoch": 1.31, "grad_norm": 0.42851489782333374, "learning_rate": 1.2537864725240267e-05, "loss": 0.0478, "step": 7817 }, { "epoch": 1.31, "grad_norm": 0.6635278463363647, "learning_rate": 1.2536118082108566e-05, "loss": 0.0641, "step": 7818 }, { "epoch": 1.31, "grad_norm": 0.5230586528778076, "learning_rate": 1.2534371356283519e-05, "loss": 0.0719, "step": 7819 }, { "epoch": 1.31, "grad_norm": 0.46433475613594055, "learning_rate": 1.2532624547822079e-05, "loss": 0.0574, "step": 7820 }, { "epoch": 1.31, "grad_norm": 0.641426146030426, "learning_rate": 1.2530877656781208e-05, "loss": 0.0531, "step": 7821 }, { "epoch": 1.31, "grad_norm": 0.6352847218513489, "learning_rate": 1.252913068321786e-05, "loss": 0.0641, "step": 7822 }, { "epoch": 1.31, "grad_norm": 0.47421813011169434, "learning_rate": 1.2527383627189e-05, "loss": 0.0628, "step": 7823 }, { "epoch": 1.31, "grad_norm": 0.6351252198219299, "learning_rate": 1.2525636488751593e-05, "loss": 0.0701, "step": 7824 }, { "epoch": 1.31, "grad_norm": 0.7829893827438354, "learning_rate": 1.2523889267962605e-05, "loss": 0.0679, "step": 7825 }, { "epoch": 1.31, "grad_norm": 0.640990138053894, "learning_rate": 1.2522141964879009e-05, "loss": 0.0647, "step": 7826 }, { "epoch": 1.31, "grad_norm": 0.5593612790107727, "learning_rate": 1.2520394579557777e-05, "loss": 0.066, "step": 7827 }, { "epoch": 1.31, "grad_norm": 0.5679575204849243, "learning_rate": 1.2518647112055887e-05, "loss": 0.0664, "step": 7828 }, { "epoch": 1.31, "grad_norm": 0.47070398926734924, "learning_rate": 1.251689956243031e-05, "loss": 0.0555, "step": 7829 }, { "epoch": 1.31, "grad_norm": 0.5666206479072571, "learning_rate": 1.2515151930738032e-05, "loss": 0.0553, "step": 7830 }, { "epoch": 1.31, "grad_norm": 0.6375483274459839, "learning_rate": 1.2513404217036039e-05, "loss": 0.0967, "step": 7831 }, { "epoch": 1.31, "grad_norm": 0.6646023988723755, "learning_rate": 1.2511656421381316e-05, "loss": 0.0606, "step": 7832 }, { "epoch": 1.31, "grad_norm": 0.44719961285591125, "learning_rate": 1.2509908543830847e-05, "loss": 0.0446, "step": 7833 }, { "epoch": 1.31, "grad_norm": 0.8869576454162598, "learning_rate": 1.250816058444163e-05, "loss": 0.0647, "step": 7834 }, { "epoch": 1.31, "grad_norm": 0.7105469703674316, "learning_rate": 1.2506412543270658e-05, "loss": 0.0826, "step": 7835 }, { "epoch": 1.31, "grad_norm": 0.4586513638496399, "learning_rate": 1.2504664420374926e-05, "loss": 0.0578, "step": 7836 }, { "epoch": 1.31, "grad_norm": 0.609607994556427, "learning_rate": 1.2502916215811436e-05, "loss": 0.0676, "step": 7837 }, { "epoch": 1.31, "grad_norm": 0.5683492422103882, "learning_rate": 1.2501167929637185e-05, "loss": 0.0646, "step": 7838 }, { "epoch": 1.31, "grad_norm": 0.588958740234375, "learning_rate": 1.2499419561909192e-05, "loss": 0.0561, "step": 7839 }, { "epoch": 1.31, "grad_norm": 0.40593674778938293, "learning_rate": 1.249767111268445e-05, "loss": 0.0444, "step": 7840 }, { "epoch": 1.31, "grad_norm": 0.7060976624488831, "learning_rate": 1.2495922582019974e-05, "loss": 0.0535, "step": 7841 }, { "epoch": 1.31, "grad_norm": 0.501052975654602, "learning_rate": 1.249417396997278e-05, "loss": 0.0521, "step": 7842 }, { "epoch": 1.31, "grad_norm": 0.3840858042240143, "learning_rate": 1.249242527659988e-05, "loss": 0.0544, "step": 7843 }, { "epoch": 1.31, "grad_norm": 0.6532110571861267, "learning_rate": 1.2490676501958294e-05, "loss": 0.0801, "step": 7844 }, { "epoch": 1.31, "grad_norm": 0.46444952487945557, "learning_rate": 1.2488927646105045e-05, "loss": 0.0485, "step": 7845 }, { "epoch": 1.31, "grad_norm": 0.503166913986206, "learning_rate": 1.2487178709097152e-05, "loss": 0.0695, "step": 7846 }, { "epoch": 1.31, "grad_norm": 0.4234065115451813, "learning_rate": 1.2485429690991647e-05, "loss": 0.0409, "step": 7847 }, { "epoch": 1.31, "grad_norm": 0.7010741233825684, "learning_rate": 1.2483680591845553e-05, "loss": 0.0844, "step": 7848 }, { "epoch": 1.31, "grad_norm": 0.6197695136070251, "learning_rate": 1.2481931411715907e-05, "loss": 0.0645, "step": 7849 }, { "epoch": 1.31, "grad_norm": 0.5752769112586975, "learning_rate": 1.2480182150659736e-05, "loss": 0.0625, "step": 7850 }, { "epoch": 1.31, "grad_norm": 0.49058830738067627, "learning_rate": 1.2478432808734086e-05, "loss": 0.0553, "step": 7851 }, { "epoch": 1.31, "grad_norm": 0.5244954228401184, "learning_rate": 1.2476683385995991e-05, "loss": 0.069, "step": 7852 }, { "epoch": 1.31, "grad_norm": 0.4149129092693329, "learning_rate": 1.2474933882502494e-05, "loss": 0.0679, "step": 7853 }, { "epoch": 1.31, "grad_norm": 0.42682787775993347, "learning_rate": 1.247318429831064e-05, "loss": 0.0558, "step": 7854 }, { "epoch": 1.31, "grad_norm": 0.47705069184303284, "learning_rate": 1.2471434633477478e-05, "loss": 0.0646, "step": 7855 }, { "epoch": 1.31, "grad_norm": 0.556651771068573, "learning_rate": 1.2469684888060057e-05, "loss": 0.0724, "step": 7856 }, { "epoch": 1.31, "grad_norm": 0.4305509626865387, "learning_rate": 1.2467935062115423e-05, "loss": 0.0515, "step": 7857 }, { "epoch": 1.31, "grad_norm": 0.5127379298210144, "learning_rate": 1.2466185155700643e-05, "loss": 0.0596, "step": 7858 }, { "epoch": 1.31, "grad_norm": 0.58568274974823, "learning_rate": 1.2464435168872768e-05, "loss": 0.0591, "step": 7859 }, { "epoch": 1.31, "grad_norm": 0.5157971978187561, "learning_rate": 1.2462685101688862e-05, "loss": 0.0586, "step": 7860 }, { "epoch": 1.31, "grad_norm": 0.44500288367271423, "learning_rate": 1.2460934954205984e-05, "loss": 0.0501, "step": 7861 }, { "epoch": 1.31, "grad_norm": 0.5990158915519714, "learning_rate": 1.24591847264812e-05, "loss": 0.0747, "step": 7862 }, { "epoch": 1.32, "grad_norm": 0.5592418313026428, "learning_rate": 1.2457434418571585e-05, "loss": 0.0479, "step": 7863 }, { "epoch": 1.32, "grad_norm": 0.647800862789154, "learning_rate": 1.2455684030534202e-05, "loss": 0.054, "step": 7864 }, { "epoch": 1.32, "grad_norm": 0.4472559690475464, "learning_rate": 1.2453933562426128e-05, "loss": 0.0517, "step": 7865 }, { "epoch": 1.32, "grad_norm": 0.39516302943229675, "learning_rate": 1.2452183014304438e-05, "loss": 0.0442, "step": 7866 }, { "epoch": 1.32, "grad_norm": 0.5408539175987244, "learning_rate": 1.2450432386226215e-05, "loss": 0.0678, "step": 7867 }, { "epoch": 1.32, "grad_norm": 0.3965802788734436, "learning_rate": 1.2448681678248535e-05, "loss": 0.0607, "step": 7868 }, { "epoch": 1.32, "grad_norm": 0.7062634825706482, "learning_rate": 1.2446930890428486e-05, "loss": 0.067, "step": 7869 }, { "epoch": 1.32, "grad_norm": 0.5702236890792847, "learning_rate": 1.2445180022823151e-05, "loss": 0.051, "step": 7870 }, { "epoch": 1.32, "grad_norm": 0.5602123141288757, "learning_rate": 1.2443429075489624e-05, "loss": 0.0687, "step": 7871 }, { "epoch": 1.32, "grad_norm": 0.6130992770195007, "learning_rate": 1.244167804848499e-05, "loss": 0.0645, "step": 7872 }, { "epoch": 1.32, "grad_norm": 0.49167194962501526, "learning_rate": 1.2439926941866352e-05, "loss": 0.0522, "step": 7873 }, { "epoch": 1.32, "grad_norm": 0.43383491039276123, "learning_rate": 1.2438175755690802e-05, "loss": 0.0505, "step": 7874 }, { "epoch": 1.32, "grad_norm": 0.611499011516571, "learning_rate": 1.243642449001544e-05, "loss": 0.0675, "step": 7875 }, { "epoch": 1.32, "grad_norm": 0.5448439121246338, "learning_rate": 1.2434673144897367e-05, "loss": 0.069, "step": 7876 }, { "epoch": 1.32, "grad_norm": 0.8596929311752319, "learning_rate": 1.243292172039369e-05, "loss": 0.0645, "step": 7877 }, { "epoch": 1.32, "grad_norm": 0.5107598304748535, "learning_rate": 1.2431170216561517e-05, "loss": 0.0598, "step": 7878 }, { "epoch": 1.32, "grad_norm": 0.5836235880851746, "learning_rate": 1.2429418633457954e-05, "loss": 0.0576, "step": 7879 }, { "epoch": 1.32, "grad_norm": 0.468282550573349, "learning_rate": 1.242766697114012e-05, "loss": 0.0624, "step": 7880 }, { "epoch": 1.32, "grad_norm": 0.5112684965133667, "learning_rate": 1.2425915229665127e-05, "loss": 0.0666, "step": 7881 }, { "epoch": 1.32, "grad_norm": 0.5440428853034973, "learning_rate": 1.2424163409090089e-05, "loss": 0.0596, "step": 7882 }, { "epoch": 1.32, "grad_norm": 0.6900200843811035, "learning_rate": 1.2422411509472132e-05, "loss": 0.0664, "step": 7883 }, { "epoch": 1.32, "grad_norm": 0.6959503293037415, "learning_rate": 1.2420659530868378e-05, "loss": 0.0752, "step": 7884 }, { "epoch": 1.32, "grad_norm": 0.5737928152084351, "learning_rate": 1.2418907473335946e-05, "loss": 0.0624, "step": 7885 }, { "epoch": 1.32, "grad_norm": 0.7215805649757385, "learning_rate": 1.2417155336931972e-05, "loss": 0.0679, "step": 7886 }, { "epoch": 1.32, "grad_norm": 0.472846657037735, "learning_rate": 1.2415403121713582e-05, "loss": 0.073, "step": 7887 }, { "epoch": 1.32, "grad_norm": 0.5044549107551575, "learning_rate": 1.2413650827737917e-05, "loss": 0.0497, "step": 7888 }, { "epoch": 1.32, "grad_norm": 0.45433783531188965, "learning_rate": 1.2411898455062102e-05, "loss": 0.054, "step": 7889 }, { "epoch": 1.32, "grad_norm": 0.4243088960647583, "learning_rate": 1.241014600374328e-05, "loss": 0.0641, "step": 7890 }, { "epoch": 1.32, "grad_norm": 0.5981335639953613, "learning_rate": 1.2408393473838593e-05, "loss": 0.0575, "step": 7891 }, { "epoch": 1.32, "grad_norm": 0.49934831261634827, "learning_rate": 1.2406640865405184e-05, "loss": 0.0559, "step": 7892 }, { "epoch": 1.32, "grad_norm": 0.5307656526565552, "learning_rate": 1.2404888178500196e-05, "loss": 0.0586, "step": 7893 }, { "epoch": 1.32, "grad_norm": 0.6095361709594727, "learning_rate": 1.240313541318078e-05, "loss": 0.0613, "step": 7894 }, { "epoch": 1.32, "grad_norm": 0.4839010238647461, "learning_rate": 1.240138256950409e-05, "loss": 0.0489, "step": 7895 }, { "epoch": 1.32, "grad_norm": 0.41693004965782166, "learning_rate": 1.2399629647527276e-05, "loss": 0.0434, "step": 7896 }, { "epoch": 1.32, "grad_norm": 0.49121683835983276, "learning_rate": 1.2397876647307494e-05, "loss": 0.057, "step": 7897 }, { "epoch": 1.32, "grad_norm": 0.42881590127944946, "learning_rate": 1.2396123568901906e-05, "loss": 0.0502, "step": 7898 }, { "epoch": 1.32, "grad_norm": 0.5201939940452576, "learning_rate": 1.2394370412367667e-05, "loss": 0.0765, "step": 7899 }, { "epoch": 1.32, "grad_norm": 0.4610244631767273, "learning_rate": 1.2392617177761948e-05, "loss": 0.0525, "step": 7900 }, { "epoch": 1.32, "grad_norm": 0.6589928269386292, "learning_rate": 1.2390863865141913e-05, "loss": 0.0645, "step": 7901 }, { "epoch": 1.32, "grad_norm": 0.6788647770881653, "learning_rate": 1.2389110474564729e-05, "loss": 0.0549, "step": 7902 }, { "epoch": 1.32, "grad_norm": 0.47897136211395264, "learning_rate": 1.238735700608757e-05, "loss": 0.0548, "step": 7903 }, { "epoch": 1.32, "grad_norm": 0.5041751861572266, "learning_rate": 1.238560345976761e-05, "loss": 0.0533, "step": 7904 }, { "epoch": 1.32, "grad_norm": 0.4733237624168396, "learning_rate": 1.2383849835662024e-05, "loss": 0.0612, "step": 7905 }, { "epoch": 1.32, "grad_norm": 0.5424948334693909, "learning_rate": 1.238209613382799e-05, "loss": 0.0508, "step": 7906 }, { "epoch": 1.32, "grad_norm": 0.5625592470169067, "learning_rate": 1.2380342354322692e-05, "loss": 0.0644, "step": 7907 }, { "epoch": 1.32, "grad_norm": 0.39498627185821533, "learning_rate": 1.2378588497203316e-05, "loss": 0.0389, "step": 7908 }, { "epoch": 1.32, "grad_norm": 0.6271566152572632, "learning_rate": 1.2376834562527045e-05, "loss": 0.0597, "step": 7909 }, { "epoch": 1.32, "grad_norm": 0.5980409979820251, "learning_rate": 1.237508055035107e-05, "loss": 0.0663, "step": 7910 }, { "epoch": 1.32, "grad_norm": 0.44787752628326416, "learning_rate": 1.2373326460732582e-05, "loss": 0.0496, "step": 7911 }, { "epoch": 1.32, "grad_norm": 0.6412588953971863, "learning_rate": 1.237157229372878e-05, "loss": 0.0718, "step": 7912 }, { "epoch": 1.32, "grad_norm": 0.46913912892341614, "learning_rate": 1.236981804939685e-05, "loss": 0.0567, "step": 7913 }, { "epoch": 1.32, "grad_norm": 0.430040568113327, "learning_rate": 1.2368063727794e-05, "loss": 0.0481, "step": 7914 }, { "epoch": 1.32, "grad_norm": 0.7304078936576843, "learning_rate": 1.236630932897743e-05, "loss": 0.084, "step": 7915 }, { "epoch": 1.32, "grad_norm": 0.4589430093765259, "learning_rate": 1.236455485300435e-05, "loss": 0.0572, "step": 7916 }, { "epoch": 1.32, "grad_norm": 0.8149488568305969, "learning_rate": 1.2362800299931956e-05, "loss": 0.0714, "step": 7917 }, { "epoch": 1.32, "grad_norm": 0.48521727323532104, "learning_rate": 1.236104566981746e-05, "loss": 0.0671, "step": 7918 }, { "epoch": 1.32, "grad_norm": 0.6093348860740662, "learning_rate": 1.2359290962718083e-05, "loss": 0.0862, "step": 7919 }, { "epoch": 1.32, "grad_norm": 0.5571469068527222, "learning_rate": 1.235753617869103e-05, "loss": 0.0666, "step": 7920 }, { "epoch": 1.32, "grad_norm": 0.6274174451828003, "learning_rate": 1.235578131779352e-05, "loss": 0.074, "step": 7921 }, { "epoch": 1.32, "grad_norm": 0.48890313506126404, "learning_rate": 1.235402638008277e-05, "loss": 0.056, "step": 7922 }, { "epoch": 1.33, "grad_norm": 0.4543735980987549, "learning_rate": 1.2352271365616013e-05, "loss": 0.0632, "step": 7923 }, { "epoch": 1.33, "grad_norm": 0.379839688539505, "learning_rate": 1.2350516274450464e-05, "loss": 0.0504, "step": 7924 }, { "epoch": 1.33, "grad_norm": 0.4342459440231323, "learning_rate": 1.234876110664335e-05, "loss": 0.0457, "step": 7925 }, { "epoch": 1.33, "grad_norm": 0.5511451363563538, "learning_rate": 1.2347005862251903e-05, "loss": 0.0637, "step": 7926 }, { "epoch": 1.33, "grad_norm": 0.491300106048584, "learning_rate": 1.2345250541333353e-05, "loss": 0.0667, "step": 7927 }, { "epoch": 1.33, "grad_norm": 0.7094665765762329, "learning_rate": 1.2343495143944938e-05, "loss": 0.0663, "step": 7928 }, { "epoch": 1.33, "grad_norm": 0.5518612861633301, "learning_rate": 1.2341739670143891e-05, "loss": 0.0666, "step": 7929 }, { "epoch": 1.33, "grad_norm": 0.5723085999488831, "learning_rate": 1.2339984119987453e-05, "loss": 0.0653, "step": 7930 }, { "epoch": 1.33, "grad_norm": 0.6212442517280579, "learning_rate": 1.2338228493532869e-05, "loss": 0.0589, "step": 7931 }, { "epoch": 1.33, "grad_norm": 0.5926452875137329, "learning_rate": 1.2336472790837378e-05, "loss": 0.0776, "step": 7932 }, { "epoch": 1.33, "grad_norm": 0.49752727150917053, "learning_rate": 1.233471701195823e-05, "loss": 0.0642, "step": 7933 }, { "epoch": 1.33, "grad_norm": 0.7093541622161865, "learning_rate": 1.2332961156952675e-05, "loss": 0.0842, "step": 7934 }, { "epoch": 1.33, "grad_norm": 0.4052996039390564, "learning_rate": 1.233120522587796e-05, "loss": 0.0636, "step": 7935 }, { "epoch": 1.33, "grad_norm": 0.5876361131668091, "learning_rate": 1.2329449218791348e-05, "loss": 0.0688, "step": 7936 }, { "epoch": 1.33, "grad_norm": 0.494057297706604, "learning_rate": 1.2327693135750087e-05, "loss": 0.0734, "step": 7937 }, { "epoch": 1.33, "grad_norm": 0.6556054949760437, "learning_rate": 1.2325936976811441e-05, "loss": 0.0678, "step": 7938 }, { "epoch": 1.33, "grad_norm": 0.4269294738769531, "learning_rate": 1.2324180742032671e-05, "loss": 0.055, "step": 7939 }, { "epoch": 1.33, "grad_norm": 0.42970040440559387, "learning_rate": 1.2322424431471048e-05, "loss": 0.0636, "step": 7940 }, { "epoch": 1.33, "grad_norm": 0.4763333797454834, "learning_rate": 1.2320668045183824e-05, "loss": 0.052, "step": 7941 }, { "epoch": 1.33, "grad_norm": 0.45944422483444214, "learning_rate": 1.2318911583228276e-05, "loss": 0.0451, "step": 7942 }, { "epoch": 1.33, "grad_norm": 0.3345688283443451, "learning_rate": 1.2317155045661679e-05, "loss": 0.0431, "step": 7943 }, { "epoch": 1.33, "grad_norm": 0.6248340010643005, "learning_rate": 1.2315398432541305e-05, "loss": 0.0534, "step": 7944 }, { "epoch": 1.33, "grad_norm": 0.37212544679641724, "learning_rate": 1.2313641743924429e-05, "loss": 0.0408, "step": 7945 }, { "epoch": 1.33, "grad_norm": 0.4913914203643799, "learning_rate": 1.2311884979868328e-05, "loss": 0.0482, "step": 7946 }, { "epoch": 1.33, "grad_norm": 0.5531773567199707, "learning_rate": 1.2310128140430288e-05, "loss": 0.0719, "step": 7947 }, { "epoch": 1.33, "grad_norm": 0.4799037575721741, "learning_rate": 1.2308371225667593e-05, "loss": 0.0599, "step": 7948 }, { "epoch": 1.33, "grad_norm": 0.4575914740562439, "learning_rate": 1.2306614235637525e-05, "loss": 0.0509, "step": 7949 }, { "epoch": 1.33, "grad_norm": 0.5049802660942078, "learning_rate": 1.2304857170397374e-05, "loss": 0.0509, "step": 7950 }, { "epoch": 1.33, "grad_norm": 0.5688210129737854, "learning_rate": 1.2303100030004437e-05, "loss": 0.0604, "step": 7951 }, { "epoch": 1.33, "grad_norm": 0.48285770416259766, "learning_rate": 1.2301342814516005e-05, "loss": 0.0743, "step": 7952 }, { "epoch": 1.33, "grad_norm": 0.7672193050384521, "learning_rate": 1.2299585523989368e-05, "loss": 0.0655, "step": 7953 }, { "epoch": 1.33, "grad_norm": 0.46642056107521057, "learning_rate": 1.229782815848183e-05, "loss": 0.0438, "step": 7954 }, { "epoch": 1.33, "grad_norm": 0.5032579898834229, "learning_rate": 1.2296070718050694e-05, "loss": 0.0685, "step": 7955 }, { "epoch": 1.33, "grad_norm": 0.41523969173431396, "learning_rate": 1.2294313202753261e-05, "loss": 0.0617, "step": 7956 }, { "epoch": 1.33, "grad_norm": 0.5265172123908997, "learning_rate": 1.2292555612646835e-05, "loss": 0.0521, "step": 7957 }, { "epoch": 1.33, "grad_norm": 0.782727837562561, "learning_rate": 1.2290797947788729e-05, "loss": 0.0788, "step": 7958 }, { "epoch": 1.33, "grad_norm": 0.4426330029964447, "learning_rate": 1.2289040208236252e-05, "loss": 0.0512, "step": 7959 }, { "epoch": 1.33, "grad_norm": 0.5215047597885132, "learning_rate": 1.2287282394046716e-05, "loss": 0.0645, "step": 7960 }, { "epoch": 1.33, "grad_norm": 0.3748752176761627, "learning_rate": 1.2285524505277437e-05, "loss": 0.0423, "step": 7961 }, { "epoch": 1.33, "grad_norm": 0.5477082133293152, "learning_rate": 1.2283766541985733e-05, "loss": 0.0418, "step": 7962 }, { "epoch": 1.33, "grad_norm": 0.6348974704742432, "learning_rate": 1.2282008504228929e-05, "loss": 0.075, "step": 7963 }, { "epoch": 1.33, "grad_norm": 0.48207053542137146, "learning_rate": 1.2280250392064341e-05, "loss": 0.0472, "step": 7964 }, { "epoch": 1.33, "grad_norm": 0.40832147002220154, "learning_rate": 1.22784922055493e-05, "loss": 0.047, "step": 7965 }, { "epoch": 1.33, "grad_norm": 0.4409593939781189, "learning_rate": 1.2276733944741133e-05, "loss": 0.0667, "step": 7966 }, { "epoch": 1.33, "grad_norm": 0.44629210233688354, "learning_rate": 1.2274975609697167e-05, "loss": 0.0657, "step": 7967 }, { "epoch": 1.33, "grad_norm": 0.5582334995269775, "learning_rate": 1.227321720047474e-05, "loss": 0.0496, "step": 7968 }, { "epoch": 1.33, "grad_norm": 0.40211549401283264, "learning_rate": 1.2271458717131181e-05, "loss": 0.0483, "step": 7969 }, { "epoch": 1.33, "grad_norm": 0.6844498515129089, "learning_rate": 1.2269700159723833e-05, "loss": 0.0589, "step": 7970 }, { "epoch": 1.33, "grad_norm": 0.65604168176651, "learning_rate": 1.226794152831003e-05, "loss": 0.0608, "step": 7971 }, { "epoch": 1.33, "grad_norm": 0.4629172384738922, "learning_rate": 1.2266182822947124e-05, "loss": 0.0612, "step": 7972 }, { "epoch": 1.33, "grad_norm": 0.5285162925720215, "learning_rate": 1.226442404369245e-05, "loss": 0.0696, "step": 7973 }, { "epoch": 1.33, "grad_norm": 0.6710754632949829, "learning_rate": 1.2262665190603358e-05, "loss": 0.0963, "step": 7974 }, { "epoch": 1.33, "grad_norm": 0.5255330801010132, "learning_rate": 1.2260906263737201e-05, "loss": 0.0735, "step": 7975 }, { "epoch": 1.33, "grad_norm": 0.6407688856124878, "learning_rate": 1.225914726315133e-05, "loss": 0.0653, "step": 7976 }, { "epoch": 1.33, "grad_norm": 0.4701845347881317, "learning_rate": 1.2257388188903097e-05, "loss": 0.047, "step": 7977 }, { "epoch": 1.33, "grad_norm": 0.4535968601703644, "learning_rate": 1.2255629041049859e-05, "loss": 0.0503, "step": 7978 }, { "epoch": 1.33, "grad_norm": 0.41022756695747375, "learning_rate": 1.225386981964898e-05, "loss": 0.0495, "step": 7979 }, { "epoch": 1.33, "grad_norm": 0.48056164383888245, "learning_rate": 1.2252110524757814e-05, "loss": 0.0664, "step": 7980 }, { "epoch": 1.33, "grad_norm": 0.47323012351989746, "learning_rate": 1.2250351156433732e-05, "loss": 0.0561, "step": 7981 }, { "epoch": 1.34, "grad_norm": 0.4735782742500305, "learning_rate": 1.2248591714734096e-05, "loss": 0.0489, "step": 7982 }, { "epoch": 1.34, "grad_norm": 0.5080779790878296, "learning_rate": 1.2246832199716276e-05, "loss": 0.0458, "step": 7983 }, { "epoch": 1.34, "grad_norm": 0.5907402634620667, "learning_rate": 1.2245072611437646e-05, "loss": 0.0795, "step": 7984 }, { "epoch": 1.34, "grad_norm": 0.6348797678947449, "learning_rate": 1.2243312949955575e-05, "loss": 0.0662, "step": 7985 }, { "epoch": 1.34, "grad_norm": 0.4829604923725128, "learning_rate": 1.2241553215327442e-05, "loss": 0.0504, "step": 7986 }, { "epoch": 1.34, "grad_norm": 0.6010751128196716, "learning_rate": 1.2239793407610625e-05, "loss": 0.06, "step": 7987 }, { "epoch": 1.34, "grad_norm": 0.4299497604370117, "learning_rate": 1.2238033526862503e-05, "loss": 0.059, "step": 7988 }, { "epoch": 1.34, "grad_norm": 0.42466622591018677, "learning_rate": 1.223627357314046e-05, "loss": 0.0378, "step": 7989 }, { "epoch": 1.34, "grad_norm": 0.5393142104148865, "learning_rate": 1.2234513546501883e-05, "loss": 0.0605, "step": 7990 }, { "epoch": 1.34, "grad_norm": 0.5109707117080688, "learning_rate": 1.223275344700416e-05, "loss": 0.0527, "step": 7991 }, { "epoch": 1.34, "grad_norm": 0.5549370050430298, "learning_rate": 1.2230993274704678e-05, "loss": 0.0746, "step": 7992 }, { "epoch": 1.34, "grad_norm": 0.38686829805374146, "learning_rate": 1.2229233029660834e-05, "loss": 0.045, "step": 7993 }, { "epoch": 1.34, "grad_norm": 0.5913988351821899, "learning_rate": 1.222747271193002e-05, "loss": 0.0461, "step": 7994 }, { "epoch": 1.34, "grad_norm": 0.487540066242218, "learning_rate": 1.2225712321569635e-05, "loss": 0.0539, "step": 7995 }, { "epoch": 1.34, "grad_norm": 0.6577216982841492, "learning_rate": 1.2223951858637076e-05, "loss": 0.064, "step": 7996 }, { "epoch": 1.34, "grad_norm": 0.49292922019958496, "learning_rate": 1.222219132318975e-05, "loss": 0.0515, "step": 7997 }, { "epoch": 1.34, "grad_norm": 0.6102376580238342, "learning_rate": 1.2220430715285054e-05, "loss": 0.0658, "step": 7998 }, { "epoch": 1.34, "grad_norm": 0.4327258765697479, "learning_rate": 1.2218670034980402e-05, "loss": 0.0495, "step": 7999 }, { "epoch": 1.34, "grad_norm": 0.44873735308647156, "learning_rate": 1.2216909282333204e-05, "loss": 0.0619, "step": 8000 }, { "epoch": 1.34, "grad_norm": 0.4852392375469208, "learning_rate": 1.2215148457400865e-05, "loss": 0.069, "step": 8001 }, { "epoch": 1.34, "grad_norm": 0.4165891408920288, "learning_rate": 1.2213387560240798e-05, "loss": 0.0406, "step": 8002 }, { "epoch": 1.34, "grad_norm": 0.5309861898422241, "learning_rate": 1.2211626590910431e-05, "loss": 0.0585, "step": 8003 }, { "epoch": 1.34, "grad_norm": 0.49452635645866394, "learning_rate": 1.2209865549467172e-05, "loss": 0.0405, "step": 8004 }, { "epoch": 1.34, "grad_norm": 0.4833468496799469, "learning_rate": 1.2208104435968446e-05, "loss": 0.045, "step": 8005 }, { "epoch": 1.34, "grad_norm": 0.5994383096694946, "learning_rate": 1.2206343250471672e-05, "loss": 0.0816, "step": 8006 }, { "epoch": 1.34, "grad_norm": 0.574188232421875, "learning_rate": 1.2204581993034283e-05, "loss": 0.0512, "step": 8007 }, { "epoch": 1.34, "grad_norm": 0.5002033710479736, "learning_rate": 1.2202820663713702e-05, "loss": 0.0483, "step": 8008 }, { "epoch": 1.34, "grad_norm": 0.5068668723106384, "learning_rate": 1.2201059262567361e-05, "loss": 0.0497, "step": 8009 }, { "epoch": 1.34, "grad_norm": 0.5409654378890991, "learning_rate": 1.219929778965269e-05, "loss": 0.0577, "step": 8010 }, { "epoch": 1.34, "grad_norm": 0.7359579205513, "learning_rate": 1.2197536245027128e-05, "loss": 0.0758, "step": 8011 }, { "epoch": 1.34, "grad_norm": 0.43252110481262207, "learning_rate": 1.2195774628748111e-05, "loss": 0.0457, "step": 8012 }, { "epoch": 1.34, "grad_norm": 0.5647807121276855, "learning_rate": 1.2194012940873076e-05, "loss": 0.0578, "step": 8013 }, { "epoch": 1.34, "grad_norm": 0.512211263179779, "learning_rate": 1.2192251181459471e-05, "loss": 0.0618, "step": 8014 }, { "epoch": 1.34, "grad_norm": 0.4682868421077728, "learning_rate": 1.2190489350564736e-05, "loss": 0.0638, "step": 8015 }, { "epoch": 1.34, "grad_norm": 0.6308727264404297, "learning_rate": 1.2188727448246315e-05, "loss": 0.0649, "step": 8016 }, { "epoch": 1.34, "grad_norm": 0.9111846685409546, "learning_rate": 1.2186965474561665e-05, "loss": 0.0467, "step": 8017 }, { "epoch": 1.34, "grad_norm": 0.563866376876831, "learning_rate": 1.218520342956823e-05, "loss": 0.0589, "step": 8018 }, { "epoch": 1.34, "grad_norm": 0.5718457698822021, "learning_rate": 1.218344131332347e-05, "loss": 0.0595, "step": 8019 }, { "epoch": 1.34, "grad_norm": 0.36263224482536316, "learning_rate": 1.2181679125884835e-05, "loss": 0.0415, "step": 8020 }, { "epoch": 1.34, "grad_norm": 0.4766882061958313, "learning_rate": 1.2179916867309788e-05, "loss": 0.0558, "step": 8021 }, { "epoch": 1.34, "grad_norm": 0.5792388319969177, "learning_rate": 1.2178154537655787e-05, "loss": 0.0473, "step": 8022 }, { "epoch": 1.34, "grad_norm": 0.39839303493499756, "learning_rate": 1.2176392136980295e-05, "loss": 0.041, "step": 8023 }, { "epoch": 1.34, "grad_norm": 0.5953978896141052, "learning_rate": 1.217462966534078e-05, "loss": 0.0682, "step": 8024 }, { "epoch": 1.34, "grad_norm": 0.3742224872112274, "learning_rate": 1.217286712279471e-05, "loss": 0.0527, "step": 8025 }, { "epoch": 1.34, "grad_norm": 0.5408855676651001, "learning_rate": 1.2171104509399547e-05, "loss": 0.0559, "step": 8026 }, { "epoch": 1.34, "grad_norm": 0.6514071822166443, "learning_rate": 1.216934182521277e-05, "loss": 0.0496, "step": 8027 }, { "epoch": 1.34, "grad_norm": 0.3613702356815338, "learning_rate": 1.2167579070291857e-05, "loss": 0.0375, "step": 8028 }, { "epoch": 1.34, "grad_norm": 0.5377563238143921, "learning_rate": 1.2165816244694278e-05, "loss": 0.0516, "step": 8029 }, { "epoch": 1.34, "grad_norm": 0.37521278858184814, "learning_rate": 1.2164053348477509e-05, "loss": 0.0381, "step": 8030 }, { "epoch": 1.34, "grad_norm": 0.4442596435546875, "learning_rate": 1.2162290381699043e-05, "loss": 0.0559, "step": 8031 }, { "epoch": 1.34, "grad_norm": 0.4878586232662201, "learning_rate": 1.216052734441636e-05, "loss": 0.0597, "step": 8032 }, { "epoch": 1.34, "grad_norm": 0.5676252841949463, "learning_rate": 1.215876423668694e-05, "loss": 0.0596, "step": 8033 }, { "epoch": 1.34, "grad_norm": 0.4837912619113922, "learning_rate": 1.2157001058568271e-05, "loss": 0.0532, "step": 8034 }, { "epoch": 1.34, "grad_norm": 0.39695194363594055, "learning_rate": 1.2155237810117857e-05, "loss": 0.0591, "step": 8035 }, { "epoch": 1.34, "grad_norm": 0.662670373916626, "learning_rate": 1.2153474491393176e-05, "loss": 0.0627, "step": 8036 }, { "epoch": 1.34, "grad_norm": 0.5758670568466187, "learning_rate": 1.215171110245173e-05, "loss": 0.0402, "step": 8037 }, { "epoch": 1.34, "grad_norm": 0.5117998719215393, "learning_rate": 1.2149947643351014e-05, "loss": 0.0572, "step": 8038 }, { "epoch": 1.34, "grad_norm": 0.4271024167537689, "learning_rate": 1.2148184114148533e-05, "loss": 0.0748, "step": 8039 }, { "epoch": 1.34, "grad_norm": 0.4635563790798187, "learning_rate": 1.2146420514901782e-05, "loss": 0.0562, "step": 8040 }, { "epoch": 1.34, "grad_norm": 0.49835777282714844, "learning_rate": 1.2144656845668273e-05, "loss": 0.0661, "step": 8041 }, { "epoch": 1.35, "grad_norm": 0.43940040469169617, "learning_rate": 1.2142893106505504e-05, "loss": 0.0577, "step": 8042 }, { "epoch": 1.35, "grad_norm": 0.5007457137107849, "learning_rate": 1.214112929747099e-05, "loss": 0.0422, "step": 8043 }, { "epoch": 1.35, "grad_norm": 0.551441490650177, "learning_rate": 1.2139365418622241e-05, "loss": 0.0725, "step": 8044 }, { "epoch": 1.35, "grad_norm": 0.4646163880825043, "learning_rate": 1.2137601470016771e-05, "loss": 0.0589, "step": 8045 }, { "epoch": 1.35, "grad_norm": 0.4473355710506439, "learning_rate": 1.2135837451712096e-05, "loss": 0.0571, "step": 8046 }, { "epoch": 1.35, "grad_norm": 0.5607967376708984, "learning_rate": 1.213407336376573e-05, "loss": 0.0623, "step": 8047 }, { "epoch": 1.35, "grad_norm": 0.5217573046684265, "learning_rate": 1.2132309206235198e-05, "loss": 0.0614, "step": 8048 }, { "epoch": 1.35, "grad_norm": 0.6121222376823425, "learning_rate": 1.2130544979178023e-05, "loss": 0.0611, "step": 8049 }, { "epoch": 1.35, "grad_norm": 0.5509546399116516, "learning_rate": 1.2128780682651725e-05, "loss": 0.0733, "step": 8050 }, { "epoch": 1.35, "grad_norm": 0.4920177161693573, "learning_rate": 1.2127016316713833e-05, "loss": 0.0595, "step": 8051 }, { "epoch": 1.35, "grad_norm": 0.5279003381729126, "learning_rate": 1.212525188142188e-05, "loss": 0.0594, "step": 8052 }, { "epoch": 1.35, "grad_norm": 0.4811278283596039, "learning_rate": 1.2123487376833396e-05, "loss": 0.0531, "step": 8053 }, { "epoch": 1.35, "grad_norm": 0.4484371542930603, "learning_rate": 1.2121722803005909e-05, "loss": 0.052, "step": 8054 }, { "epoch": 1.35, "grad_norm": 0.4833744168281555, "learning_rate": 1.2119958159996962e-05, "loss": 0.0348, "step": 8055 }, { "epoch": 1.35, "grad_norm": 0.6160186529159546, "learning_rate": 1.2118193447864095e-05, "loss": 0.0686, "step": 8056 }, { "epoch": 1.35, "grad_norm": 0.5581498146057129, "learning_rate": 1.2116428666664843e-05, "loss": 0.0751, "step": 8057 }, { "epoch": 1.35, "grad_norm": 0.5505594611167908, "learning_rate": 1.2114663816456748e-05, "loss": 0.0712, "step": 8058 }, { "epoch": 1.35, "grad_norm": 0.40163445472717285, "learning_rate": 1.2112898897297363e-05, "loss": 0.0389, "step": 8059 }, { "epoch": 1.35, "grad_norm": 0.4864220917224884, "learning_rate": 1.2111133909244231e-05, "loss": 0.0546, "step": 8060 }, { "epoch": 1.35, "grad_norm": 0.32080456614494324, "learning_rate": 1.21093688523549e-05, "loss": 0.0365, "step": 8061 }, { "epoch": 1.35, "grad_norm": 0.44897785782814026, "learning_rate": 1.2107603726686918e-05, "loss": 0.0462, "step": 8062 }, { "epoch": 1.35, "grad_norm": 0.5315614938735962, "learning_rate": 1.2105838532297853e-05, "loss": 0.0682, "step": 8063 }, { "epoch": 1.35, "grad_norm": 0.6696727275848389, "learning_rate": 1.210407326924525e-05, "loss": 0.0357, "step": 8064 }, { "epoch": 1.35, "grad_norm": 0.3805040717124939, "learning_rate": 1.2102307937586668e-05, "loss": 0.0535, "step": 8065 }, { "epoch": 1.35, "grad_norm": 0.6264687776565552, "learning_rate": 1.2100542537379669e-05, "loss": 0.069, "step": 8066 }, { "epoch": 1.35, "grad_norm": 0.6234757900238037, "learning_rate": 1.2098777068681821e-05, "loss": 0.0608, "step": 8067 }, { "epoch": 1.35, "grad_norm": 0.6910066604614258, "learning_rate": 1.2097011531550686e-05, "loss": 0.0803, "step": 8068 }, { "epoch": 1.35, "grad_norm": 0.36710605025291443, "learning_rate": 1.209524592604383e-05, "loss": 0.0522, "step": 8069 }, { "epoch": 1.35, "grad_norm": 0.5262662768363953, "learning_rate": 1.2093480252218826e-05, "loss": 0.0534, "step": 8070 }, { "epoch": 1.35, "grad_norm": 0.4897880256175995, "learning_rate": 1.2091714510133241e-05, "loss": 0.0469, "step": 8071 }, { "epoch": 1.35, "grad_norm": 0.5688501596450806, "learning_rate": 1.2089948699844656e-05, "loss": 0.0726, "step": 8072 }, { "epoch": 1.35, "grad_norm": 0.4306562542915344, "learning_rate": 1.2088182821410644e-05, "loss": 0.0539, "step": 8073 }, { "epoch": 1.35, "grad_norm": 0.4276820719242096, "learning_rate": 1.208641687488878e-05, "loss": 0.0531, "step": 8074 }, { "epoch": 1.35, "grad_norm": 0.5418925881385803, "learning_rate": 1.2084650860336651e-05, "loss": 0.0671, "step": 8075 }, { "epoch": 1.35, "grad_norm": 0.6547116637229919, "learning_rate": 1.2082884777811838e-05, "loss": 0.0571, "step": 8076 }, { "epoch": 1.35, "grad_norm": 0.3916967511177063, "learning_rate": 1.2081118627371928e-05, "loss": 0.0555, "step": 8077 }, { "epoch": 1.35, "grad_norm": 0.5824996829032898, "learning_rate": 1.2079352409074501e-05, "loss": 0.0476, "step": 8078 }, { "epoch": 1.35, "grad_norm": 0.5639327168464661, "learning_rate": 1.2077586122977155e-05, "loss": 0.0539, "step": 8079 }, { "epoch": 1.35, "grad_norm": 0.4703729748725891, "learning_rate": 1.2075819769137482e-05, "loss": 0.0528, "step": 8080 }, { "epoch": 1.35, "grad_norm": 0.4068840444087982, "learning_rate": 1.2074053347613075e-05, "loss": 0.0614, "step": 8081 }, { "epoch": 1.35, "grad_norm": 0.5691633820533752, "learning_rate": 1.2072286858461524e-05, "loss": 0.0606, "step": 8082 }, { "epoch": 1.35, "grad_norm": 0.5258880853652954, "learning_rate": 1.2070520301740434e-05, "loss": 0.062, "step": 8083 }, { "epoch": 1.35, "grad_norm": 0.40072405338287354, "learning_rate": 1.2068753677507407e-05, "loss": 0.0491, "step": 8084 }, { "epoch": 1.35, "grad_norm": 0.541908860206604, "learning_rate": 1.2066986985820044e-05, "loss": 0.0594, "step": 8085 }, { "epoch": 1.35, "grad_norm": 0.5559009313583374, "learning_rate": 1.2065220226735945e-05, "loss": 0.0651, "step": 8086 }, { "epoch": 1.35, "grad_norm": 0.5348414778709412, "learning_rate": 1.2063453400312723e-05, "loss": 0.054, "step": 8087 }, { "epoch": 1.35, "grad_norm": 0.5417068004608154, "learning_rate": 1.2061686506607992e-05, "loss": 0.0645, "step": 8088 }, { "epoch": 1.35, "grad_norm": 0.5351317524909973, "learning_rate": 1.2059919545679356e-05, "loss": 0.0518, "step": 8089 }, { "epoch": 1.35, "grad_norm": 0.5818567276000977, "learning_rate": 1.2058152517584429e-05, "loss": 0.0657, "step": 8090 }, { "epoch": 1.35, "grad_norm": 0.40380072593688965, "learning_rate": 1.2056385422380834e-05, "loss": 0.0487, "step": 8091 }, { "epoch": 1.35, "grad_norm": 0.9034416675567627, "learning_rate": 1.2054618260126184e-05, "loss": 0.0678, "step": 8092 }, { "epoch": 1.35, "grad_norm": 0.3733631670475006, "learning_rate": 1.2052851030878099e-05, "loss": 0.0514, "step": 8093 }, { "epoch": 1.35, "grad_norm": 0.5893561840057373, "learning_rate": 1.2051083734694202e-05, "loss": 0.0579, "step": 8094 }, { "epoch": 1.35, "grad_norm": 0.37514960765838623, "learning_rate": 1.2049316371632125e-05, "loss": 0.0425, "step": 8095 }, { "epoch": 1.35, "grad_norm": 1.020420789718628, "learning_rate": 1.2047548941749487e-05, "loss": 0.0761, "step": 8096 }, { "epoch": 1.35, "grad_norm": 0.5623622536659241, "learning_rate": 1.2045781445103919e-05, "loss": 0.067, "step": 8097 }, { "epoch": 1.35, "grad_norm": 0.4942074418067932, "learning_rate": 1.2044013881753054e-05, "loss": 0.0453, "step": 8098 }, { "epoch": 1.35, "grad_norm": 0.4910315275192261, "learning_rate": 1.2042246251754523e-05, "loss": 0.0408, "step": 8099 }, { "epoch": 1.35, "grad_norm": 0.4811505377292633, "learning_rate": 1.2040478555165967e-05, "loss": 0.0527, "step": 8100 }, { "epoch": 1.35, "grad_norm": 0.5983974933624268, "learning_rate": 1.2038710792045018e-05, "loss": 0.0466, "step": 8101 }, { "epoch": 1.36, "grad_norm": 0.46415457129478455, "learning_rate": 1.2036942962449321e-05, "loss": 0.0563, "step": 8102 }, { "epoch": 1.36, "grad_norm": 0.6624227166175842, "learning_rate": 1.2035175066436515e-05, "loss": 0.0472, "step": 8103 }, { "epoch": 1.36, "grad_norm": 0.49807819724082947, "learning_rate": 1.2033407104064247e-05, "loss": 0.0638, "step": 8104 }, { "epoch": 1.36, "grad_norm": 0.6189529895782471, "learning_rate": 1.2031639075390164e-05, "loss": 0.0547, "step": 8105 }, { "epoch": 1.36, "grad_norm": 0.42727112770080566, "learning_rate": 1.202987098047191e-05, "loss": 0.0484, "step": 8106 }, { "epoch": 1.36, "grad_norm": 0.6611759066581726, "learning_rate": 1.202810281936714e-05, "loss": 0.0474, "step": 8107 }, { "epoch": 1.36, "grad_norm": 0.6921817660331726, "learning_rate": 1.2026334592133506e-05, "loss": 0.0836, "step": 8108 }, { "epoch": 1.36, "grad_norm": 0.48313775658607483, "learning_rate": 1.2024566298828665e-05, "loss": 0.0607, "step": 8109 }, { "epoch": 1.36, "grad_norm": 0.6219015121459961, "learning_rate": 1.2022797939510271e-05, "loss": 0.0692, "step": 8110 }, { "epoch": 1.36, "grad_norm": 0.385355144739151, "learning_rate": 1.2021029514235988e-05, "loss": 0.0488, "step": 8111 }, { "epoch": 1.36, "grad_norm": 0.4186384379863739, "learning_rate": 1.2019261023063476e-05, "loss": 0.0419, "step": 8112 }, { "epoch": 1.36, "grad_norm": 0.5424379706382751, "learning_rate": 1.2017492466050395e-05, "loss": 0.0655, "step": 8113 }, { "epoch": 1.36, "grad_norm": 0.544806957244873, "learning_rate": 1.2015723843254415e-05, "loss": 0.0589, "step": 8114 }, { "epoch": 1.36, "grad_norm": 0.39789554476737976, "learning_rate": 1.2013955154733203e-05, "loss": 0.0602, "step": 8115 }, { "epoch": 1.36, "grad_norm": 0.4485962986946106, "learning_rate": 1.2012186400544435e-05, "loss": 0.0565, "step": 8116 }, { "epoch": 1.36, "grad_norm": 0.44480472803115845, "learning_rate": 1.2010417580745774e-05, "loss": 0.0801, "step": 8117 }, { "epoch": 1.36, "grad_norm": 0.4577893912792206, "learning_rate": 1.2008648695394898e-05, "loss": 0.0585, "step": 8118 }, { "epoch": 1.36, "grad_norm": 0.3681655824184418, "learning_rate": 1.2006879744549487e-05, "loss": 0.0386, "step": 8119 }, { "epoch": 1.36, "grad_norm": 0.3054830729961395, "learning_rate": 1.2005110728267217e-05, "loss": 0.0382, "step": 8120 }, { "epoch": 1.36, "grad_norm": 0.6260152459144592, "learning_rate": 1.2003341646605771e-05, "loss": 0.0587, "step": 8121 }, { "epoch": 1.36, "grad_norm": 0.5440188646316528, "learning_rate": 1.2001572499622827e-05, "loss": 0.0687, "step": 8122 }, { "epoch": 1.36, "grad_norm": 0.44764840602874756, "learning_rate": 1.1999803287376078e-05, "loss": 0.0388, "step": 8123 }, { "epoch": 1.36, "grad_norm": 0.5004682540893555, "learning_rate": 1.1998034009923205e-05, "loss": 0.0638, "step": 8124 }, { "epoch": 1.36, "grad_norm": 0.7702969312667847, "learning_rate": 1.1996264667321903e-05, "loss": 0.0691, "step": 8125 }, { "epoch": 1.36, "grad_norm": 0.5362379550933838, "learning_rate": 1.1994495259629857e-05, "loss": 0.0558, "step": 8126 }, { "epoch": 1.36, "grad_norm": 0.6176888346672058, "learning_rate": 1.1992725786904766e-05, "loss": 0.0589, "step": 8127 }, { "epoch": 1.36, "grad_norm": 0.4917254149913788, "learning_rate": 1.1990956249204324e-05, "loss": 0.0592, "step": 8128 }, { "epoch": 1.36, "grad_norm": 0.4838366210460663, "learning_rate": 1.1989186646586231e-05, "loss": 0.0533, "step": 8129 }, { "epoch": 1.36, "grad_norm": 0.6662773489952087, "learning_rate": 1.1987416979108184e-05, "loss": 0.0818, "step": 8130 }, { "epoch": 1.36, "grad_norm": 0.6839183568954468, "learning_rate": 1.1985647246827887e-05, "loss": 0.0559, "step": 8131 }, { "epoch": 1.36, "grad_norm": 0.5393132567405701, "learning_rate": 1.1983877449803043e-05, "loss": 0.0627, "step": 8132 }, { "epoch": 1.36, "grad_norm": 0.33880332112312317, "learning_rate": 1.1982107588091363e-05, "loss": 0.0625, "step": 8133 }, { "epoch": 1.36, "grad_norm": 0.3609122335910797, "learning_rate": 1.1980337661750547e-05, "loss": 0.042, "step": 8134 }, { "epoch": 1.36, "grad_norm": 0.5887267589569092, "learning_rate": 1.1978567670838314e-05, "loss": 0.0449, "step": 8135 }, { "epoch": 1.36, "grad_norm": 0.4793126881122589, "learning_rate": 1.1976797615412374e-05, "loss": 0.0536, "step": 8136 }, { "epoch": 1.36, "grad_norm": 0.5097075700759888, "learning_rate": 1.1975027495530443e-05, "loss": 0.0551, "step": 8137 }, { "epoch": 1.36, "grad_norm": 0.6200281977653503, "learning_rate": 1.1973257311250232e-05, "loss": 0.0639, "step": 8138 }, { "epoch": 1.36, "grad_norm": 0.4770514667034149, "learning_rate": 1.1971487062629467e-05, "loss": 0.0495, "step": 8139 }, { "epoch": 1.36, "grad_norm": 0.3862498700618744, "learning_rate": 1.1969716749725869e-05, "loss": 0.044, "step": 8140 }, { "epoch": 1.36, "grad_norm": 0.6358771324157715, "learning_rate": 1.1967946372597156e-05, "loss": 0.0687, "step": 8141 }, { "epoch": 1.36, "grad_norm": 0.5721862316131592, "learning_rate": 1.1966175931301056e-05, "loss": 0.0641, "step": 8142 }, { "epoch": 1.36, "grad_norm": 0.47638148069381714, "learning_rate": 1.1964405425895298e-05, "loss": 0.0665, "step": 8143 }, { "epoch": 1.36, "grad_norm": 0.6641896367073059, "learning_rate": 1.1962634856437614e-05, "loss": 0.0617, "step": 8144 }, { "epoch": 1.36, "grad_norm": 0.6310769319534302, "learning_rate": 1.196086422298573e-05, "loss": 0.0699, "step": 8145 }, { "epoch": 1.36, "grad_norm": 0.43082645535469055, "learning_rate": 1.1959093525597379e-05, "loss": 0.0492, "step": 8146 }, { "epoch": 1.36, "grad_norm": 0.5937941670417786, "learning_rate": 1.1957322764330305e-05, "loss": 0.0634, "step": 8147 }, { "epoch": 1.36, "grad_norm": 0.627724289894104, "learning_rate": 1.1955551939242238e-05, "loss": 0.0419, "step": 8148 }, { "epoch": 1.36, "grad_norm": 0.48327621817588806, "learning_rate": 1.195378105039092e-05, "loss": 0.0646, "step": 8149 }, { "epoch": 1.36, "grad_norm": 0.5445162057876587, "learning_rate": 1.1952010097834097e-05, "loss": 0.0687, "step": 8150 }, { "epoch": 1.36, "grad_norm": 0.4402396082878113, "learning_rate": 1.1950239081629507e-05, "loss": 0.0663, "step": 8151 }, { "epoch": 1.36, "grad_norm": 0.5402502417564392, "learning_rate": 1.1948468001834899e-05, "loss": 0.0455, "step": 8152 }, { "epoch": 1.36, "grad_norm": 0.4769269824028015, "learning_rate": 1.1946696858508022e-05, "loss": 0.0479, "step": 8153 }, { "epoch": 1.36, "grad_norm": 0.4827192425727844, "learning_rate": 1.1944925651706628e-05, "loss": 0.0594, "step": 8154 }, { "epoch": 1.36, "grad_norm": 0.45750996470451355, "learning_rate": 1.1943154381488466e-05, "loss": 0.063, "step": 8155 }, { "epoch": 1.36, "grad_norm": 0.5534437298774719, "learning_rate": 1.194138304791129e-05, "loss": 0.0695, "step": 8156 }, { "epoch": 1.36, "grad_norm": 0.6598028540611267, "learning_rate": 1.193961165103286e-05, "loss": 0.0638, "step": 8157 }, { "epoch": 1.36, "grad_norm": 0.40632346272468567, "learning_rate": 1.1937840190910934e-05, "loss": 0.0611, "step": 8158 }, { "epoch": 1.36, "grad_norm": 0.4594615697860718, "learning_rate": 1.1936068667603271e-05, "loss": 0.0625, "step": 8159 }, { "epoch": 1.36, "grad_norm": 0.509734034538269, "learning_rate": 1.1934297081167634e-05, "loss": 0.0461, "step": 8160 }, { "epoch": 1.36, "grad_norm": 0.5562077760696411, "learning_rate": 1.1932525431661793e-05, "loss": 0.065, "step": 8161 }, { "epoch": 1.37, "grad_norm": 0.4979715645313263, "learning_rate": 1.1930753719143502e-05, "loss": 0.0707, "step": 8162 }, { "epoch": 1.37, "grad_norm": 0.4604324996471405, "learning_rate": 1.1928981943670543e-05, "loss": 0.0448, "step": 8163 }, { "epoch": 1.37, "grad_norm": 0.5153425335884094, "learning_rate": 1.1927210105300682e-05, "loss": 0.0482, "step": 8164 }, { "epoch": 1.37, "grad_norm": 0.4884813725948334, "learning_rate": 1.1925438204091692e-05, "loss": 0.065, "step": 8165 }, { "epoch": 1.37, "grad_norm": 0.6363540291786194, "learning_rate": 1.1923666240101346e-05, "loss": 0.0592, "step": 8166 }, { "epoch": 1.37, "grad_norm": 0.6800013184547424, "learning_rate": 1.1921894213387425e-05, "loss": 0.0435, "step": 8167 }, { "epoch": 1.37, "grad_norm": 0.47257745265960693, "learning_rate": 1.1920122124007709e-05, "loss": 0.0444, "step": 8168 }, { "epoch": 1.37, "grad_norm": 0.6549305319786072, "learning_rate": 1.1918349972019975e-05, "loss": 0.0566, "step": 8169 }, { "epoch": 1.37, "grad_norm": 0.6006363034248352, "learning_rate": 1.1916577757482006e-05, "loss": 0.0718, "step": 8170 }, { "epoch": 1.37, "grad_norm": 0.687518835067749, "learning_rate": 1.191480548045159e-05, "loss": 0.0589, "step": 8171 }, { "epoch": 1.37, "grad_norm": 0.5412899851799011, "learning_rate": 1.1913033140986518e-05, "loss": 0.0439, "step": 8172 }, { "epoch": 1.37, "grad_norm": 0.4071716368198395, "learning_rate": 1.1911260739144573e-05, "loss": 0.0484, "step": 8173 }, { "epoch": 1.37, "grad_norm": 0.5636833906173706, "learning_rate": 1.1909488274983545e-05, "loss": 0.0398, "step": 8174 }, { "epoch": 1.37, "grad_norm": 0.5807887315750122, "learning_rate": 1.1907715748561234e-05, "loss": 0.0544, "step": 8175 }, { "epoch": 1.37, "grad_norm": 0.6513386368751526, "learning_rate": 1.1905943159935433e-05, "loss": 0.0606, "step": 8176 }, { "epoch": 1.37, "grad_norm": 0.44619491696357727, "learning_rate": 1.1904170509163937e-05, "loss": 0.0505, "step": 8177 }, { "epoch": 1.37, "grad_norm": 0.7088197469711304, "learning_rate": 1.1902397796304547e-05, "loss": 0.0679, "step": 8178 }, { "epoch": 1.37, "grad_norm": 0.7441220283508301, "learning_rate": 1.1900625021415066e-05, "loss": 0.046, "step": 8179 }, { "epoch": 1.37, "grad_norm": 0.4691925346851349, "learning_rate": 1.1898852184553298e-05, "loss": 0.0574, "step": 8180 }, { "epoch": 1.37, "grad_norm": 0.611449658870697, "learning_rate": 1.1897079285777047e-05, "loss": 0.0704, "step": 8181 }, { "epoch": 1.37, "grad_norm": 0.5787796378135681, "learning_rate": 1.1895306325144118e-05, "loss": 0.0584, "step": 8182 }, { "epoch": 1.37, "grad_norm": 0.43344423174858093, "learning_rate": 1.1893533302712326e-05, "loss": 0.0501, "step": 8183 }, { "epoch": 1.37, "grad_norm": 0.6083495616912842, "learning_rate": 1.1891760218539478e-05, "loss": 0.0549, "step": 8184 }, { "epoch": 1.37, "grad_norm": 0.6256555318832397, "learning_rate": 1.1889987072683391e-05, "loss": 0.0704, "step": 8185 }, { "epoch": 1.37, "grad_norm": 0.46139976382255554, "learning_rate": 1.1888213865201878e-05, "loss": 0.0701, "step": 8186 }, { "epoch": 1.37, "grad_norm": 0.6062437891960144, "learning_rate": 1.188644059615276e-05, "loss": 0.0772, "step": 8187 }, { "epoch": 1.37, "grad_norm": 0.9986203908920288, "learning_rate": 1.1884667265593855e-05, "loss": 0.0478, "step": 8188 }, { "epoch": 1.37, "grad_norm": 0.515709400177002, "learning_rate": 1.1882893873582983e-05, "loss": 0.0883, "step": 8189 }, { "epoch": 1.37, "grad_norm": 0.4607941210269928, "learning_rate": 1.1881120420177968e-05, "loss": 0.0527, "step": 8190 }, { "epoch": 1.37, "grad_norm": 0.5074267387390137, "learning_rate": 1.1879346905436639e-05, "loss": 0.0499, "step": 8191 }, { "epoch": 1.37, "grad_norm": 0.5742107033729553, "learning_rate": 1.1877573329416821e-05, "loss": 0.0627, "step": 8192 }, { "epoch": 1.37, "grad_norm": 0.45789170265197754, "learning_rate": 1.1875799692176346e-05, "loss": 0.0526, "step": 8193 }, { "epoch": 1.37, "grad_norm": 0.6471379995346069, "learning_rate": 1.187402599377304e-05, "loss": 0.0568, "step": 8194 }, { "epoch": 1.37, "grad_norm": 0.5575209856033325, "learning_rate": 1.1872252234264741e-05, "loss": 0.058, "step": 8195 }, { "epoch": 1.37, "grad_norm": 0.450554221868515, "learning_rate": 1.1870478413709288e-05, "loss": 0.0442, "step": 8196 }, { "epoch": 1.37, "grad_norm": 0.5748203992843628, "learning_rate": 1.1868704532164512e-05, "loss": 0.0647, "step": 8197 }, { "epoch": 1.37, "grad_norm": 0.5169352293014526, "learning_rate": 1.1866930589688255e-05, "loss": 0.0592, "step": 8198 }, { "epoch": 1.37, "grad_norm": 0.6451796293258667, "learning_rate": 1.186515658633836e-05, "loss": 0.0505, "step": 8199 }, { "epoch": 1.37, "grad_norm": 0.7956293821334839, "learning_rate": 1.1863382522172672e-05, "loss": 0.0625, "step": 8200 }, { "epoch": 1.37, "grad_norm": 0.39584800601005554, "learning_rate": 1.1861608397249034e-05, "loss": 0.0472, "step": 8201 }, { "epoch": 1.37, "grad_norm": 0.560854971408844, "learning_rate": 1.1859834211625289e-05, "loss": 0.0576, "step": 8202 }, { "epoch": 1.37, "grad_norm": 0.36952558159828186, "learning_rate": 1.1858059965359298e-05, "loss": 0.0597, "step": 8203 }, { "epoch": 1.37, "grad_norm": 0.5456534028053284, "learning_rate": 1.1856285658508903e-05, "loss": 0.0582, "step": 8204 }, { "epoch": 1.37, "grad_norm": 0.6223370432853699, "learning_rate": 1.1854511291131963e-05, "loss": 0.069, "step": 8205 }, { "epoch": 1.37, "grad_norm": 0.6346473693847656, "learning_rate": 1.1852736863286326e-05, "loss": 0.0586, "step": 8206 }, { "epoch": 1.37, "grad_norm": 0.5823270082473755, "learning_rate": 1.185096237502986e-05, "loss": 0.0694, "step": 8207 }, { "epoch": 1.37, "grad_norm": 0.4045690894126892, "learning_rate": 1.1849187826420417e-05, "loss": 0.0518, "step": 8208 }, { "epoch": 1.37, "grad_norm": 0.6438201665878296, "learning_rate": 1.1847413217515862e-05, "loss": 0.0758, "step": 8209 }, { "epoch": 1.37, "grad_norm": 0.3462023138999939, "learning_rate": 1.1845638548374054e-05, "loss": 0.0488, "step": 8210 }, { "epoch": 1.37, "grad_norm": 0.8612385392189026, "learning_rate": 1.1843863819052863e-05, "loss": 0.0832, "step": 8211 }, { "epoch": 1.37, "grad_norm": 0.5761834383010864, "learning_rate": 1.1842089029610154e-05, "loss": 0.0668, "step": 8212 }, { "epoch": 1.37, "grad_norm": 0.5099660754203796, "learning_rate": 1.1840314180103798e-05, "loss": 0.0525, "step": 8213 }, { "epoch": 1.37, "grad_norm": 0.42729654908180237, "learning_rate": 1.1838539270591665e-05, "loss": 0.046, "step": 8214 }, { "epoch": 1.37, "grad_norm": 0.5565998554229736, "learning_rate": 1.1836764301131627e-05, "loss": 0.0535, "step": 8215 }, { "epoch": 1.37, "grad_norm": 0.4496612250804901, "learning_rate": 1.1834989271781559e-05, "loss": 0.056, "step": 8216 }, { "epoch": 1.37, "grad_norm": 0.5342094898223877, "learning_rate": 1.1833214182599345e-05, "loss": 0.0625, "step": 8217 }, { "epoch": 1.37, "grad_norm": 0.4535287320613861, "learning_rate": 1.1831439033642851e-05, "loss": 0.0476, "step": 8218 }, { "epoch": 1.37, "grad_norm": 0.6846669316291809, "learning_rate": 1.1829663824969968e-05, "loss": 0.0616, "step": 8219 }, { "epoch": 1.37, "grad_norm": 0.5428475141525269, "learning_rate": 1.1827888556638578e-05, "loss": 0.0668, "step": 8220 }, { "epoch": 1.37, "grad_norm": 0.5258626341819763, "learning_rate": 1.1826113228706566e-05, "loss": 0.045, "step": 8221 }, { "epoch": 1.38, "grad_norm": 0.6150916814804077, "learning_rate": 1.1824337841231813e-05, "loss": 0.0779, "step": 8222 }, { "epoch": 1.38, "grad_norm": 0.5651768445968628, "learning_rate": 1.182256239427221e-05, "loss": 0.0609, "step": 8223 }, { "epoch": 1.38, "grad_norm": 0.5592424273490906, "learning_rate": 1.1820786887885655e-05, "loss": 0.0659, "step": 8224 }, { "epoch": 1.38, "grad_norm": 0.515805184841156, "learning_rate": 1.1819011322130033e-05, "loss": 0.0551, "step": 8225 }, { "epoch": 1.38, "grad_norm": 0.4791255593299866, "learning_rate": 1.1817235697063238e-05, "loss": 0.0607, "step": 8226 }, { "epoch": 1.38, "grad_norm": 0.4858383238315582, "learning_rate": 1.1815460012743171e-05, "loss": 0.0463, "step": 8227 }, { "epoch": 1.38, "grad_norm": 0.6230878233909607, "learning_rate": 1.1813684269227731e-05, "loss": 0.0667, "step": 8228 }, { "epoch": 1.38, "grad_norm": 0.5983055233955383, "learning_rate": 1.1811908466574817e-05, "loss": 0.0523, "step": 8229 }, { "epoch": 1.38, "grad_norm": 0.44970545172691345, "learning_rate": 1.1810132604842326e-05, "loss": 0.0617, "step": 8230 }, { "epoch": 1.38, "grad_norm": 0.5826897025108337, "learning_rate": 1.1808356684088169e-05, "loss": 0.0552, "step": 8231 }, { "epoch": 1.38, "grad_norm": 0.430200457572937, "learning_rate": 1.180658070437025e-05, "loss": 0.0599, "step": 8232 }, { "epoch": 1.38, "grad_norm": 0.4886140525341034, "learning_rate": 1.1804804665746474e-05, "loss": 0.0574, "step": 8233 }, { "epoch": 1.38, "grad_norm": 0.5129945278167725, "learning_rate": 1.1803028568274755e-05, "loss": 0.0441, "step": 8234 }, { "epoch": 1.38, "grad_norm": 0.366468220949173, "learning_rate": 1.1801252412013004e-05, "loss": 0.0515, "step": 8235 }, { "epoch": 1.38, "grad_norm": 0.5203572511672974, "learning_rate": 1.1799476197019136e-05, "loss": 0.0435, "step": 8236 }, { "epoch": 1.38, "grad_norm": 0.8567966222763062, "learning_rate": 1.1797699923351063e-05, "loss": 0.0557, "step": 8237 }, { "epoch": 1.38, "grad_norm": 0.5172783136367798, "learning_rate": 1.1795923591066705e-05, "loss": 0.0527, "step": 8238 }, { "epoch": 1.38, "grad_norm": 0.448647141456604, "learning_rate": 1.1794147200223982e-05, "loss": 0.0476, "step": 8239 }, { "epoch": 1.38, "grad_norm": 0.4601183235645294, "learning_rate": 1.1792370750880815e-05, "loss": 0.0612, "step": 8240 }, { "epoch": 1.38, "grad_norm": 0.514451801776886, "learning_rate": 1.1790594243095126e-05, "loss": 0.0519, "step": 8241 }, { "epoch": 1.38, "grad_norm": 0.4128921926021576, "learning_rate": 1.1788817676924845e-05, "loss": 0.0666, "step": 8242 }, { "epoch": 1.38, "grad_norm": 0.5929149985313416, "learning_rate": 1.1787041052427892e-05, "loss": 0.0582, "step": 8243 }, { "epoch": 1.38, "grad_norm": 0.7273010015487671, "learning_rate": 1.1785264369662202e-05, "loss": 0.079, "step": 8244 }, { "epoch": 1.38, "grad_norm": 0.48453962802886963, "learning_rate": 1.1783487628685703e-05, "loss": 0.0557, "step": 8245 }, { "epoch": 1.38, "grad_norm": 0.4672878086566925, "learning_rate": 1.178171082955633e-05, "loss": 0.0395, "step": 8246 }, { "epoch": 1.38, "grad_norm": 0.4062466323375702, "learning_rate": 1.1779933972332015e-05, "loss": 0.0442, "step": 8247 }, { "epoch": 1.38, "grad_norm": 0.47501787543296814, "learning_rate": 1.1778157057070696e-05, "loss": 0.0598, "step": 8248 }, { "epoch": 1.38, "grad_norm": 0.48676007986068726, "learning_rate": 1.1776380083830314e-05, "loss": 0.0582, "step": 8249 }, { "epoch": 1.38, "grad_norm": 0.5614207983016968, "learning_rate": 1.1774603052668804e-05, "loss": 0.0562, "step": 8250 }, { "epoch": 1.38, "grad_norm": 0.4732528030872345, "learning_rate": 1.1772825963644114e-05, "loss": 0.0598, "step": 8251 }, { "epoch": 1.38, "grad_norm": 0.586371123790741, "learning_rate": 1.1771048816814187e-05, "loss": 0.058, "step": 8252 }, { "epoch": 1.38, "grad_norm": 0.658732533454895, "learning_rate": 1.1769271612236966e-05, "loss": 0.0697, "step": 8253 }, { "epoch": 1.38, "grad_norm": 0.3919438123703003, "learning_rate": 1.17674943499704e-05, "loss": 0.0507, "step": 8254 }, { "epoch": 1.38, "grad_norm": 0.591327965259552, "learning_rate": 1.176571703007244e-05, "loss": 0.0464, "step": 8255 }, { "epoch": 1.38, "grad_norm": 1.0331417322158813, "learning_rate": 1.1763939652601041e-05, "loss": 0.0579, "step": 8256 }, { "epoch": 1.38, "grad_norm": 0.5009512901306152, "learning_rate": 1.1762162217614153e-05, "loss": 0.07, "step": 8257 }, { "epoch": 1.38, "grad_norm": 0.5142578482627869, "learning_rate": 1.1760384725169729e-05, "loss": 0.0596, "step": 8258 }, { "epoch": 1.38, "grad_norm": 0.49694105982780457, "learning_rate": 1.1758607175325729e-05, "loss": 0.0564, "step": 8259 }, { "epoch": 1.38, "grad_norm": 0.6640741229057312, "learning_rate": 1.1756829568140114e-05, "loss": 0.0818, "step": 8260 }, { "epoch": 1.38, "grad_norm": 0.8264404535293579, "learning_rate": 1.1755051903670845e-05, "loss": 0.0588, "step": 8261 }, { "epoch": 1.38, "grad_norm": 0.4236903488636017, "learning_rate": 1.175327418197588e-05, "loss": 0.0579, "step": 8262 }, { "epoch": 1.38, "grad_norm": 0.40835922956466675, "learning_rate": 1.175149640311319e-05, "loss": 0.0545, "step": 8263 }, { "epoch": 1.38, "grad_norm": 0.5332040190696716, "learning_rate": 1.1749718567140738e-05, "loss": 0.0686, "step": 8264 }, { "epoch": 1.38, "grad_norm": 0.46007785201072693, "learning_rate": 1.1747940674116493e-05, "loss": 0.0693, "step": 8265 }, { "epoch": 1.38, "grad_norm": 0.4789241850376129, "learning_rate": 1.174616272409843e-05, "loss": 0.0723, "step": 8266 }, { "epoch": 1.38, "grad_norm": 0.33555418252944946, "learning_rate": 1.1744384717144514e-05, "loss": 0.0505, "step": 8267 }, { "epoch": 1.38, "grad_norm": 0.29467710852622986, "learning_rate": 1.1742606653312724e-05, "loss": 0.0439, "step": 8268 }, { "epoch": 1.38, "grad_norm": 0.4565297067165375, "learning_rate": 1.1740828532661036e-05, "loss": 0.0519, "step": 8269 }, { "epoch": 1.38, "grad_norm": 0.43717190623283386, "learning_rate": 1.1739050355247426e-05, "loss": 0.0466, "step": 8270 }, { "epoch": 1.38, "grad_norm": 0.43173328042030334, "learning_rate": 1.1737272121129872e-05, "loss": 0.0449, "step": 8271 }, { "epoch": 1.38, "grad_norm": 0.39027515053749084, "learning_rate": 1.173549383036636e-05, "loss": 0.0509, "step": 8272 }, { "epoch": 1.38, "grad_norm": 0.5141762495040894, "learning_rate": 1.1733715483014873e-05, "loss": 0.0693, "step": 8273 }, { "epoch": 1.38, "grad_norm": 0.48363441228866577, "learning_rate": 1.173193707913339e-05, "loss": 0.0535, "step": 8274 }, { "epoch": 1.38, "grad_norm": 0.45642027258872986, "learning_rate": 1.1730158618779908e-05, "loss": 0.0518, "step": 8275 }, { "epoch": 1.38, "grad_norm": 0.5043975710868835, "learning_rate": 1.1728380102012406e-05, "loss": 0.0541, "step": 8276 }, { "epoch": 1.38, "grad_norm": 0.39963552355766296, "learning_rate": 1.1726601528888885e-05, "loss": 0.0508, "step": 8277 }, { "epoch": 1.38, "grad_norm": 0.504227876663208, "learning_rate": 1.1724822899467328e-05, "loss": 0.0747, "step": 8278 }, { "epoch": 1.38, "grad_norm": 0.40748143196105957, "learning_rate": 1.1723044213805734e-05, "loss": 0.047, "step": 8279 }, { "epoch": 1.38, "grad_norm": 0.483859121799469, "learning_rate": 1.1721265471962103e-05, "loss": 0.0516, "step": 8280 }, { "epoch": 1.39, "grad_norm": 0.4775525629520416, "learning_rate": 1.1719486673994425e-05, "loss": 0.0594, "step": 8281 }, { "epoch": 1.39, "grad_norm": 0.5023345947265625, "learning_rate": 1.1717707819960704e-05, "loss": 0.0626, "step": 8282 }, { "epoch": 1.39, "grad_norm": 0.45593908429145813, "learning_rate": 1.171592890991894e-05, "loss": 0.0643, "step": 8283 }, { "epoch": 1.39, "grad_norm": 0.5482690334320068, "learning_rate": 1.1714149943927144e-05, "loss": 0.0621, "step": 8284 }, { "epoch": 1.39, "grad_norm": 0.6993181109428406, "learning_rate": 1.1712370922043313e-05, "loss": 0.0698, "step": 8285 }, { "epoch": 1.39, "grad_norm": 0.7516646385192871, "learning_rate": 1.1710591844325457e-05, "loss": 0.0493, "step": 8286 }, { "epoch": 1.39, "grad_norm": 0.4608824849128723, "learning_rate": 1.1708812710831586e-05, "loss": 0.0624, "step": 8287 }, { "epoch": 1.39, "grad_norm": 0.4798426032066345, "learning_rate": 1.1707033521619708e-05, "loss": 0.0585, "step": 8288 }, { "epoch": 1.39, "grad_norm": 0.7631617784500122, "learning_rate": 1.170525427674784e-05, "loss": 0.0693, "step": 8289 }, { "epoch": 1.39, "grad_norm": 0.5708633065223694, "learning_rate": 1.1703474976273995e-05, "loss": 0.0659, "step": 8290 }, { "epoch": 1.39, "grad_norm": 0.6850212216377258, "learning_rate": 1.1701695620256186e-05, "loss": 0.0719, "step": 8291 }, { "epoch": 1.39, "grad_norm": 0.4587934613227844, "learning_rate": 1.1699916208752435e-05, "loss": 0.0579, "step": 8292 }, { "epoch": 1.39, "grad_norm": 0.41582179069519043, "learning_rate": 1.169813674182076e-05, "loss": 0.0392, "step": 8293 }, { "epoch": 1.39, "grad_norm": 0.46969854831695557, "learning_rate": 1.1696357219519186e-05, "loss": 0.0648, "step": 8294 }, { "epoch": 1.39, "grad_norm": 0.5005156993865967, "learning_rate": 1.1694577641905732e-05, "loss": 0.0479, "step": 8295 }, { "epoch": 1.39, "grad_norm": 0.5654756426811218, "learning_rate": 1.1692798009038426e-05, "loss": 0.0528, "step": 8296 }, { "epoch": 1.39, "grad_norm": 0.5351980328559875, "learning_rate": 1.1691018320975297e-05, "loss": 0.0572, "step": 8297 }, { "epoch": 1.39, "grad_norm": 0.7578015327453613, "learning_rate": 1.1689238577774369e-05, "loss": 0.0534, "step": 8298 }, { "epoch": 1.39, "grad_norm": 0.3886774480342865, "learning_rate": 1.1687458779493677e-05, "loss": 0.0439, "step": 8299 }, { "epoch": 1.39, "grad_norm": 0.4380963444709778, "learning_rate": 1.1685678926191251e-05, "loss": 0.047, "step": 8300 }, { "epoch": 1.39, "grad_norm": 0.5922439098358154, "learning_rate": 1.1683899017925129e-05, "loss": 0.0581, "step": 8301 }, { "epoch": 1.39, "grad_norm": 0.4370439350605011, "learning_rate": 1.168211905475334e-05, "loss": 0.0497, "step": 8302 }, { "epoch": 1.39, "grad_norm": 0.7822383046150208, "learning_rate": 1.1680339036733929e-05, "loss": 0.0803, "step": 8303 }, { "epoch": 1.39, "grad_norm": 0.39698290824890137, "learning_rate": 1.1678558963924936e-05, "loss": 0.0414, "step": 8304 }, { "epoch": 1.39, "grad_norm": 1.0106078386306763, "learning_rate": 1.16767788363844e-05, "loss": 0.0703, "step": 8305 }, { "epoch": 1.39, "grad_norm": 0.5964436531066895, "learning_rate": 1.1674998654170359e-05, "loss": 0.0595, "step": 8306 }, { "epoch": 1.39, "grad_norm": 0.6653978228569031, "learning_rate": 1.1673218417340866e-05, "loss": 0.0527, "step": 8307 }, { "epoch": 1.39, "grad_norm": 0.569520115852356, "learning_rate": 1.1671438125953969e-05, "loss": 0.042, "step": 8308 }, { "epoch": 1.39, "grad_norm": 0.6503156423568726, "learning_rate": 1.1669657780067708e-05, "loss": 0.0629, "step": 8309 }, { "epoch": 1.39, "grad_norm": 0.47206878662109375, "learning_rate": 1.166787737974014e-05, "loss": 0.0493, "step": 8310 }, { "epoch": 1.39, "grad_norm": 0.6858813166618347, "learning_rate": 1.1666096925029314e-05, "loss": 0.0748, "step": 8311 }, { "epoch": 1.39, "grad_norm": 0.4572012424468994, "learning_rate": 1.1664316415993286e-05, "loss": 0.0546, "step": 8312 }, { "epoch": 1.39, "grad_norm": 0.4689471423625946, "learning_rate": 1.1662535852690114e-05, "loss": 0.0417, "step": 8313 }, { "epoch": 1.39, "grad_norm": 1.1822888851165771, "learning_rate": 1.166075523517785e-05, "loss": 0.0636, "step": 8314 }, { "epoch": 1.39, "grad_norm": 0.4817841649055481, "learning_rate": 1.1658974563514555e-05, "loss": 0.0516, "step": 8315 }, { "epoch": 1.39, "grad_norm": 0.3980713188648224, "learning_rate": 1.1657193837758294e-05, "loss": 0.0395, "step": 8316 }, { "epoch": 1.39, "grad_norm": 0.4895782470703125, "learning_rate": 1.1655413057967126e-05, "loss": 0.0432, "step": 8317 }, { "epoch": 1.39, "grad_norm": 0.5295345187187195, "learning_rate": 1.1653632224199117e-05, "loss": 0.0611, "step": 8318 }, { "epoch": 1.39, "grad_norm": 0.4511200487613678, "learning_rate": 1.165185133651233e-05, "loss": 0.0562, "step": 8319 }, { "epoch": 1.39, "grad_norm": 0.5556789040565491, "learning_rate": 1.1650070394964838e-05, "loss": 0.0755, "step": 8320 }, { "epoch": 1.39, "grad_norm": 0.6220173239707947, "learning_rate": 1.164828939961471e-05, "loss": 0.0657, "step": 8321 }, { "epoch": 1.39, "grad_norm": 0.559202253818512, "learning_rate": 1.1646508350520014e-05, "loss": 0.0694, "step": 8322 }, { "epoch": 1.39, "grad_norm": 0.558639645576477, "learning_rate": 1.1644727247738827e-05, "loss": 0.0769, "step": 8323 }, { "epoch": 1.39, "grad_norm": 0.4876613914966583, "learning_rate": 1.1642946091329224e-05, "loss": 0.0703, "step": 8324 }, { "epoch": 1.39, "grad_norm": 0.45511752367019653, "learning_rate": 1.1641164881349279e-05, "loss": 0.0452, "step": 8325 }, { "epoch": 1.39, "grad_norm": 0.6123755574226379, "learning_rate": 1.1639383617857075e-05, "loss": 0.0535, "step": 8326 }, { "epoch": 1.39, "grad_norm": 0.49198251962661743, "learning_rate": 1.1637602300910686e-05, "loss": 0.0445, "step": 8327 }, { "epoch": 1.39, "grad_norm": 0.5127339959144592, "learning_rate": 1.16358209305682e-05, "loss": 0.075, "step": 8328 }, { "epoch": 1.39, "grad_norm": 0.6543902158737183, "learning_rate": 1.16340395068877e-05, "loss": 0.08, "step": 8329 }, { "epoch": 1.39, "grad_norm": 0.4263554513454437, "learning_rate": 1.1632258029927265e-05, "loss": 0.0465, "step": 8330 }, { "epoch": 1.39, "grad_norm": 0.7058126330375671, "learning_rate": 1.1630476499744991e-05, "loss": 0.0564, "step": 8331 }, { "epoch": 1.39, "grad_norm": 0.5505353808403015, "learning_rate": 1.1628694916398963e-05, "loss": 0.0695, "step": 8332 }, { "epoch": 1.39, "grad_norm": 0.41787970066070557, "learning_rate": 1.1626913279947277e-05, "loss": 0.0614, "step": 8333 }, { "epoch": 1.39, "grad_norm": 0.5449123382568359, "learning_rate": 1.1625131590448016e-05, "loss": 0.0588, "step": 8334 }, { "epoch": 1.39, "grad_norm": 0.5940191745758057, "learning_rate": 1.1623349847959278e-05, "loss": 0.078, "step": 8335 }, { "epoch": 1.39, "grad_norm": 0.5133163332939148, "learning_rate": 1.1621568052539165e-05, "loss": 0.0447, "step": 8336 }, { "epoch": 1.39, "grad_norm": 0.5188626646995544, "learning_rate": 1.1619786204245766e-05, "loss": 0.0435, "step": 8337 }, { "epoch": 1.39, "grad_norm": 0.6157768964767456, "learning_rate": 1.1618004303137185e-05, "loss": 0.0494, "step": 8338 }, { "epoch": 1.39, "grad_norm": 0.4879556894302368, "learning_rate": 1.1616222349271523e-05, "loss": 0.0578, "step": 8339 }, { "epoch": 1.39, "grad_norm": 0.6360810995101929, "learning_rate": 1.1614440342706884e-05, "loss": 0.0656, "step": 8340 }, { "epoch": 1.4, "grad_norm": 0.5387732982635498, "learning_rate": 1.1612658283501368e-05, "loss": 0.0459, "step": 8341 }, { "epoch": 1.4, "grad_norm": 0.595295250415802, "learning_rate": 1.1610876171713084e-05, "loss": 0.0574, "step": 8342 }, { "epoch": 1.4, "grad_norm": 0.5930383205413818, "learning_rate": 1.1609094007400141e-05, "loss": 0.0571, "step": 8343 }, { "epoch": 1.4, "grad_norm": 0.5511655211448669, "learning_rate": 1.1607311790620648e-05, "loss": 0.0491, "step": 8344 }, { "epoch": 1.4, "grad_norm": 0.5581170916557312, "learning_rate": 1.1605529521432715e-05, "loss": 0.0646, "step": 8345 }, { "epoch": 1.4, "grad_norm": 0.3697885274887085, "learning_rate": 1.1603747199894457e-05, "loss": 0.0509, "step": 8346 }, { "epoch": 1.4, "grad_norm": 0.5427976250648499, "learning_rate": 1.160196482606399e-05, "loss": 0.0517, "step": 8347 }, { "epoch": 1.4, "grad_norm": 0.3606695234775543, "learning_rate": 1.1600182399999427e-05, "loss": 0.0595, "step": 8348 }, { "epoch": 1.4, "grad_norm": 0.4019721448421478, "learning_rate": 1.159839992175889e-05, "loss": 0.0395, "step": 8349 }, { "epoch": 1.4, "grad_norm": 0.3560360074043274, "learning_rate": 1.1596617391400496e-05, "loss": 0.0488, "step": 8350 }, { "epoch": 1.4, "grad_norm": 0.4296626150608063, "learning_rate": 1.1594834808982366e-05, "loss": 0.0475, "step": 8351 }, { "epoch": 1.4, "grad_norm": 0.44835591316223145, "learning_rate": 1.159305217456263e-05, "loss": 0.0494, "step": 8352 }, { "epoch": 1.4, "grad_norm": 0.4914228320121765, "learning_rate": 1.1591269488199403e-05, "loss": 0.0647, "step": 8353 }, { "epoch": 1.4, "grad_norm": 0.6254087686538696, "learning_rate": 1.1589486749950818e-05, "loss": 0.0531, "step": 8354 }, { "epoch": 1.4, "grad_norm": 0.37693795561790466, "learning_rate": 1.1587703959875006e-05, "loss": 0.0482, "step": 8355 }, { "epoch": 1.4, "grad_norm": 0.4047714173793793, "learning_rate": 1.1585921118030088e-05, "loss": 0.0655, "step": 8356 }, { "epoch": 1.4, "grad_norm": 0.6859800219535828, "learning_rate": 1.158413822447421e-05, "loss": 0.0639, "step": 8357 }, { "epoch": 1.4, "grad_norm": 0.6555088758468628, "learning_rate": 1.1582355279265487e-05, "loss": 0.0548, "step": 8358 }, { "epoch": 1.4, "grad_norm": 0.49493709206581116, "learning_rate": 1.1580572282462068e-05, "loss": 0.0427, "step": 8359 }, { "epoch": 1.4, "grad_norm": 0.4810265600681305, "learning_rate": 1.1578789234122085e-05, "loss": 0.0639, "step": 8360 }, { "epoch": 1.4, "grad_norm": 0.36067211627960205, "learning_rate": 1.1577006134303682e-05, "loss": 0.0502, "step": 8361 }, { "epoch": 1.4, "grad_norm": 0.5625926852226257, "learning_rate": 1.157522298306499e-05, "loss": 0.0552, "step": 8362 }, { "epoch": 1.4, "grad_norm": 0.6101846694946289, "learning_rate": 1.1573439780464154e-05, "loss": 0.045, "step": 8363 }, { "epoch": 1.4, "grad_norm": 0.3770866096019745, "learning_rate": 1.1571656526559324e-05, "loss": 0.0427, "step": 8364 }, { "epoch": 1.4, "grad_norm": 0.5656566023826599, "learning_rate": 1.1569873221408638e-05, "loss": 0.0569, "step": 8365 }, { "epoch": 1.4, "grad_norm": 0.38381558656692505, "learning_rate": 1.1568089865070245e-05, "loss": 0.05, "step": 8366 }, { "epoch": 1.4, "grad_norm": 0.4491310715675354, "learning_rate": 1.1566306457602292e-05, "loss": 0.0449, "step": 8367 }, { "epoch": 1.4, "grad_norm": 0.5546620488166809, "learning_rate": 1.1564522999062935e-05, "loss": 0.0548, "step": 8368 }, { "epoch": 1.4, "grad_norm": 0.4066273868083954, "learning_rate": 1.1562739489510322e-05, "loss": 0.0506, "step": 8369 }, { "epoch": 1.4, "grad_norm": 0.5261754989624023, "learning_rate": 1.1560955929002604e-05, "loss": 0.0511, "step": 8370 }, { "epoch": 1.4, "grad_norm": 0.6652519106864929, "learning_rate": 1.155917231759794e-05, "loss": 0.0633, "step": 8371 }, { "epoch": 1.4, "grad_norm": 0.5918216109275818, "learning_rate": 1.1557388655354486e-05, "loss": 0.0688, "step": 8372 }, { "epoch": 1.4, "grad_norm": 0.3805326223373413, "learning_rate": 1.15556049423304e-05, "loss": 0.0447, "step": 8373 }, { "epoch": 1.4, "grad_norm": 0.4357803761959076, "learning_rate": 1.1553821178583844e-05, "loss": 0.0562, "step": 8374 }, { "epoch": 1.4, "grad_norm": 0.3867184817790985, "learning_rate": 1.1552037364172976e-05, "loss": 0.0424, "step": 8375 }, { "epoch": 1.4, "grad_norm": 0.700564444065094, "learning_rate": 1.1550253499155965e-05, "loss": 0.0587, "step": 8376 }, { "epoch": 1.4, "grad_norm": 0.5582382082939148, "learning_rate": 1.154846958359097e-05, "loss": 0.0557, "step": 8377 }, { "epoch": 1.4, "grad_norm": 0.6643750071525574, "learning_rate": 1.1546685617536167e-05, "loss": 0.068, "step": 8378 }, { "epoch": 1.4, "grad_norm": 0.4048599898815155, "learning_rate": 1.1544901601049712e-05, "loss": 0.0574, "step": 8379 }, { "epoch": 1.4, "grad_norm": 0.5175735950469971, "learning_rate": 1.1543117534189784e-05, "loss": 0.0607, "step": 8380 }, { "epoch": 1.4, "grad_norm": 0.4018561840057373, "learning_rate": 1.1541333417014553e-05, "loss": 0.0682, "step": 8381 }, { "epoch": 1.4, "grad_norm": 0.602207601070404, "learning_rate": 1.153954924958219e-05, "loss": 0.0694, "step": 8382 }, { "epoch": 1.4, "grad_norm": 0.404840886592865, "learning_rate": 1.1537765031950876e-05, "loss": 0.063, "step": 8383 }, { "epoch": 1.4, "grad_norm": 0.37440791726112366, "learning_rate": 1.1535980764178781e-05, "loss": 0.0398, "step": 8384 }, { "epoch": 1.4, "grad_norm": 0.510757565498352, "learning_rate": 1.1534196446324089e-05, "loss": 0.0604, "step": 8385 }, { "epoch": 1.4, "grad_norm": 0.40005752444267273, "learning_rate": 1.1532412078444971e-05, "loss": 0.05, "step": 8386 }, { "epoch": 1.4, "grad_norm": 0.4405694603919983, "learning_rate": 1.1530627660599616e-05, "loss": 0.0483, "step": 8387 }, { "epoch": 1.4, "grad_norm": 0.6367645263671875, "learning_rate": 1.1528843192846207e-05, "loss": 0.0745, "step": 8388 }, { "epoch": 1.4, "grad_norm": 0.8833972811698914, "learning_rate": 1.1527058675242928e-05, "loss": 0.0559, "step": 8389 }, { "epoch": 1.4, "grad_norm": 0.36018022894859314, "learning_rate": 1.1525274107847965e-05, "loss": 0.0467, "step": 8390 }, { "epoch": 1.4, "grad_norm": 0.4458964765071869, "learning_rate": 1.1523489490719502e-05, "loss": 0.0615, "step": 8391 }, { "epoch": 1.4, "grad_norm": 0.9232444763183594, "learning_rate": 1.1521704823915739e-05, "loss": 0.0664, "step": 8392 }, { "epoch": 1.4, "grad_norm": 0.5274572372436523, "learning_rate": 1.1519920107494857e-05, "loss": 0.044, "step": 8393 }, { "epoch": 1.4, "grad_norm": 0.6005691289901733, "learning_rate": 1.1518135341515053e-05, "loss": 0.0488, "step": 8394 }, { "epoch": 1.4, "grad_norm": 0.6400679349899292, "learning_rate": 1.1516350526034522e-05, "loss": 0.0689, "step": 8395 }, { "epoch": 1.4, "grad_norm": 0.41198447346687317, "learning_rate": 1.151456566111146e-05, "loss": 0.0459, "step": 8396 }, { "epoch": 1.4, "grad_norm": 0.41815465688705444, "learning_rate": 1.1512780746804065e-05, "loss": 0.062, "step": 8397 }, { "epoch": 1.4, "grad_norm": 0.4652553200721741, "learning_rate": 1.1510995783170534e-05, "loss": 0.0607, "step": 8398 }, { "epoch": 1.4, "grad_norm": 0.45815685391426086, "learning_rate": 1.150921077026907e-05, "loss": 0.0534, "step": 8399 }, { "epoch": 1.4, "grad_norm": 0.3637983798980713, "learning_rate": 1.1507425708157878e-05, "loss": 0.0449, "step": 8400 }, { "epoch": 1.41, "grad_norm": 0.47139182686805725, "learning_rate": 1.1505640596895156e-05, "loss": 0.058, "step": 8401 }, { "epoch": 1.41, "grad_norm": 0.5911071300506592, "learning_rate": 1.1503855436539116e-05, "loss": 0.0726, "step": 8402 }, { "epoch": 1.41, "grad_norm": 0.4349595010280609, "learning_rate": 1.1502070227147964e-05, "loss": 0.0631, "step": 8403 }, { "epoch": 1.41, "grad_norm": 0.461442768573761, "learning_rate": 1.1500284968779905e-05, "loss": 0.0523, "step": 8404 }, { "epoch": 1.41, "grad_norm": 0.5363150238990784, "learning_rate": 1.1498499661493152e-05, "loss": 0.0545, "step": 8405 }, { "epoch": 1.41, "grad_norm": 0.6080751419067383, "learning_rate": 1.1496714305345919e-05, "loss": 0.0632, "step": 8406 }, { "epoch": 1.41, "grad_norm": 0.39932766556739807, "learning_rate": 1.149492890039642e-05, "loss": 0.0541, "step": 8407 }, { "epoch": 1.41, "grad_norm": 1.5474075078964233, "learning_rate": 1.1493143446702868e-05, "loss": 0.0595, "step": 8408 }, { "epoch": 1.41, "grad_norm": 0.5969498753547668, "learning_rate": 1.1491357944323482e-05, "loss": 0.0793, "step": 8409 }, { "epoch": 1.41, "grad_norm": 0.5013144612312317, "learning_rate": 1.1489572393316477e-05, "loss": 0.06, "step": 8410 }, { "epoch": 1.41, "grad_norm": 0.812039852142334, "learning_rate": 1.1487786793740077e-05, "loss": 0.0765, "step": 8411 }, { "epoch": 1.41, "grad_norm": 0.4416777491569519, "learning_rate": 1.1486001145652503e-05, "loss": 0.056, "step": 8412 }, { "epoch": 1.41, "grad_norm": 0.5094749331474304, "learning_rate": 1.148421544911198e-05, "loss": 0.0572, "step": 8413 }, { "epoch": 1.41, "grad_norm": 0.7070565819740295, "learning_rate": 1.1482429704176727e-05, "loss": 0.0686, "step": 8414 }, { "epoch": 1.41, "grad_norm": 0.3699231743812561, "learning_rate": 1.1480643910904972e-05, "loss": 0.0428, "step": 8415 }, { "epoch": 1.41, "grad_norm": 0.3905069828033447, "learning_rate": 1.1478858069354947e-05, "loss": 0.0469, "step": 8416 }, { "epoch": 1.41, "grad_norm": 0.5850476026535034, "learning_rate": 1.1477072179584882e-05, "loss": 0.0655, "step": 8417 }, { "epoch": 1.41, "grad_norm": 0.44012051820755005, "learning_rate": 1.1475286241653006e-05, "loss": 0.0503, "step": 8418 }, { "epoch": 1.41, "grad_norm": 0.46478426456451416, "learning_rate": 1.1473500255617546e-05, "loss": 0.0654, "step": 8419 }, { "epoch": 1.41, "grad_norm": 0.5757770538330078, "learning_rate": 1.147171422153675e-05, "loss": 0.0756, "step": 8420 }, { "epoch": 1.41, "grad_norm": 0.43075984716415405, "learning_rate": 1.1469928139468842e-05, "loss": 0.0543, "step": 8421 }, { "epoch": 1.41, "grad_norm": 0.5493122339248657, "learning_rate": 1.1468142009472062e-05, "loss": 0.0491, "step": 8422 }, { "epoch": 1.41, "grad_norm": 0.47959649562835693, "learning_rate": 1.1466355831604652e-05, "loss": 0.0624, "step": 8423 }, { "epoch": 1.41, "grad_norm": 0.8128238320350647, "learning_rate": 1.1464569605924853e-05, "loss": 0.084, "step": 8424 }, { "epoch": 1.41, "grad_norm": 0.5764593482017517, "learning_rate": 1.1462783332490903e-05, "loss": 0.0522, "step": 8425 }, { "epoch": 1.41, "grad_norm": 0.47014808654785156, "learning_rate": 1.1460997011361047e-05, "loss": 0.0679, "step": 8426 }, { "epoch": 1.41, "grad_norm": 0.493651807308197, "learning_rate": 1.1459210642593535e-05, "loss": 0.0671, "step": 8427 }, { "epoch": 1.41, "grad_norm": 0.44533461332321167, "learning_rate": 1.1457424226246607e-05, "loss": 0.0589, "step": 8428 }, { "epoch": 1.41, "grad_norm": 0.7861815690994263, "learning_rate": 1.1455637762378515e-05, "loss": 0.0687, "step": 8429 }, { "epoch": 1.41, "grad_norm": 0.43113380670547485, "learning_rate": 1.1453851251047508e-05, "loss": 0.046, "step": 8430 }, { "epoch": 1.41, "grad_norm": 0.6121916174888611, "learning_rate": 1.1452064692311837e-05, "loss": 0.0468, "step": 8431 }, { "epoch": 1.41, "grad_norm": 0.46586939692497253, "learning_rate": 1.1450278086229757e-05, "loss": 0.0642, "step": 8432 }, { "epoch": 1.41, "grad_norm": 0.5278680324554443, "learning_rate": 1.144849143285952e-05, "loss": 0.0317, "step": 8433 }, { "epoch": 1.41, "grad_norm": 0.473063200712204, "learning_rate": 1.1446704732259386e-05, "loss": 0.0545, "step": 8434 }, { "epoch": 1.41, "grad_norm": 0.3674374222755432, "learning_rate": 1.1444917984487608e-05, "loss": 0.0498, "step": 8435 }, { "epoch": 1.41, "grad_norm": 0.46792155504226685, "learning_rate": 1.1443131189602447e-05, "loss": 0.0605, "step": 8436 }, { "epoch": 1.41, "grad_norm": 0.49394717812538147, "learning_rate": 1.1441344347662166e-05, "loss": 0.0707, "step": 8437 }, { "epoch": 1.41, "grad_norm": 0.47601422667503357, "learning_rate": 1.1439557458725023e-05, "loss": 0.0553, "step": 8438 }, { "epoch": 1.41, "grad_norm": 0.3450040817260742, "learning_rate": 1.1437770522849285e-05, "loss": 0.0497, "step": 8439 }, { "epoch": 1.41, "grad_norm": 0.6363344192504883, "learning_rate": 1.1435983540093215e-05, "loss": 0.087, "step": 8440 }, { "epoch": 1.41, "grad_norm": 0.45454612374305725, "learning_rate": 1.1434196510515086e-05, "loss": 0.0465, "step": 8441 }, { "epoch": 1.41, "grad_norm": 0.8764200806617737, "learning_rate": 1.1432409434173157e-05, "loss": 0.0584, "step": 8442 }, { "epoch": 1.41, "grad_norm": 0.4613218605518341, "learning_rate": 1.14306223111257e-05, "loss": 0.0511, "step": 8443 }, { "epoch": 1.41, "grad_norm": 0.43558043241500854, "learning_rate": 1.1428835141430994e-05, "loss": 0.0543, "step": 8444 }, { "epoch": 1.41, "grad_norm": 0.5557766556739807, "learning_rate": 1.1427047925147308e-05, "loss": 0.056, "step": 8445 }, { "epoch": 1.41, "grad_norm": 0.4977416694164276, "learning_rate": 1.1425260662332911e-05, "loss": 0.0448, "step": 8446 }, { "epoch": 1.41, "grad_norm": 0.47521987557411194, "learning_rate": 1.1423473353046081e-05, "loss": 0.0448, "step": 8447 }, { "epoch": 1.41, "grad_norm": 0.7181467413902283, "learning_rate": 1.1421685997345105e-05, "loss": 0.0716, "step": 8448 }, { "epoch": 1.41, "grad_norm": 0.680800199508667, "learning_rate": 1.1419898595288249e-05, "loss": 0.0669, "step": 8449 }, { "epoch": 1.41, "grad_norm": 0.5231813788414001, "learning_rate": 1.14181111469338e-05, "loss": 0.047, "step": 8450 }, { "epoch": 1.41, "grad_norm": 0.6386248469352722, "learning_rate": 1.1416323652340038e-05, "loss": 0.0594, "step": 8451 }, { "epoch": 1.41, "grad_norm": 0.526239275932312, "learning_rate": 1.1414536111565251e-05, "loss": 0.0528, "step": 8452 }, { "epoch": 1.41, "grad_norm": 0.5043694972991943, "learning_rate": 1.141274852466772e-05, "loss": 0.065, "step": 8453 }, { "epoch": 1.41, "grad_norm": 0.9123210906982422, "learning_rate": 1.1410960891705731e-05, "loss": 0.0612, "step": 8454 }, { "epoch": 1.41, "grad_norm": 0.9611122608184814, "learning_rate": 1.1409173212737575e-05, "loss": 0.0657, "step": 8455 }, { "epoch": 1.41, "grad_norm": 0.3831668198108673, "learning_rate": 1.1407385487821535e-05, "loss": 0.0535, "step": 8456 }, { "epoch": 1.41, "grad_norm": 0.5207782983779907, "learning_rate": 1.1405597717015912e-05, "loss": 0.0536, "step": 8457 }, { "epoch": 1.41, "grad_norm": 0.4789681136608124, "learning_rate": 1.1403809900378993e-05, "loss": 0.0506, "step": 8458 }, { "epoch": 1.41, "grad_norm": 0.4030703604221344, "learning_rate": 1.1402022037969069e-05, "loss": 0.0552, "step": 8459 }, { "epoch": 1.41, "grad_norm": 0.5483132004737854, "learning_rate": 1.1400234129844443e-05, "loss": 0.0918, "step": 8460 }, { "epoch": 1.42, "grad_norm": 0.4488079249858856, "learning_rate": 1.1398446176063404e-05, "loss": 0.0318, "step": 8461 }, { "epoch": 1.42, "grad_norm": 0.6528756022453308, "learning_rate": 1.1396658176684258e-05, "loss": 0.0744, "step": 8462 }, { "epoch": 1.42, "grad_norm": 0.5643875002861023, "learning_rate": 1.13948701317653e-05, "loss": 0.0542, "step": 8463 }, { "epoch": 1.42, "grad_norm": 0.6297383904457092, "learning_rate": 1.1393082041364834e-05, "loss": 0.0625, "step": 8464 }, { "epoch": 1.42, "grad_norm": 0.5585431456565857, "learning_rate": 1.1391293905541162e-05, "loss": 0.0593, "step": 8465 }, { "epoch": 1.42, "grad_norm": 0.5421984791755676, "learning_rate": 1.1389505724352587e-05, "loss": 0.0562, "step": 8466 }, { "epoch": 1.42, "grad_norm": 0.562972903251648, "learning_rate": 1.1387717497857416e-05, "loss": 0.057, "step": 8467 }, { "epoch": 1.42, "grad_norm": 0.34928154945373535, "learning_rate": 1.1385929226113956e-05, "loss": 0.0344, "step": 8468 }, { "epoch": 1.42, "grad_norm": 0.5234887599945068, "learning_rate": 1.1384140909180521e-05, "loss": 0.0553, "step": 8469 }, { "epoch": 1.42, "grad_norm": 0.5904953479766846, "learning_rate": 1.1382352547115411e-05, "loss": 0.0718, "step": 8470 }, { "epoch": 1.42, "grad_norm": 0.38269343972206116, "learning_rate": 1.1380564139976947e-05, "loss": 0.0524, "step": 8471 }, { "epoch": 1.42, "grad_norm": 0.3587368130683899, "learning_rate": 1.1378775687823435e-05, "loss": 0.0587, "step": 8472 }, { "epoch": 1.42, "grad_norm": 0.5113865733146667, "learning_rate": 1.1376987190713201e-05, "loss": 0.0571, "step": 8473 }, { "epoch": 1.42, "grad_norm": 0.4139021337032318, "learning_rate": 1.1375198648704549e-05, "loss": 0.0422, "step": 8474 }, { "epoch": 1.42, "grad_norm": 0.3819267749786377, "learning_rate": 1.13734100618558e-05, "loss": 0.0445, "step": 8475 }, { "epoch": 1.42, "grad_norm": 0.4103502035140991, "learning_rate": 1.1371621430225277e-05, "loss": 0.0522, "step": 8476 }, { "epoch": 1.42, "grad_norm": 0.4901667833328247, "learning_rate": 1.13698327538713e-05, "loss": 0.0587, "step": 8477 }, { "epoch": 1.42, "grad_norm": 0.40828534960746765, "learning_rate": 1.1368044032852189e-05, "loss": 0.0646, "step": 8478 }, { "epoch": 1.42, "grad_norm": 0.8559368252754211, "learning_rate": 1.1366255267226262e-05, "loss": 0.0755, "step": 8479 }, { "epoch": 1.42, "grad_norm": 0.4313977062702179, "learning_rate": 1.1364466457051858e-05, "loss": 0.049, "step": 8480 }, { "epoch": 1.42, "grad_norm": 0.37847745418548584, "learning_rate": 1.136267760238729e-05, "loss": 0.0607, "step": 8481 }, { "epoch": 1.42, "grad_norm": 0.6749129295349121, "learning_rate": 1.1360888703290896e-05, "loss": 0.067, "step": 8482 }, { "epoch": 1.42, "grad_norm": 0.42380207777023315, "learning_rate": 1.1359099759820993e-05, "loss": 0.052, "step": 8483 }, { "epoch": 1.42, "grad_norm": 0.497633159160614, "learning_rate": 1.1357310772035928e-05, "loss": 0.047, "step": 8484 }, { "epoch": 1.42, "grad_norm": 0.4203217625617981, "learning_rate": 1.1355521739994018e-05, "loss": 0.0435, "step": 8485 }, { "epoch": 1.42, "grad_norm": 0.3563951551914215, "learning_rate": 1.1353732663753605e-05, "loss": 0.0454, "step": 8486 }, { "epoch": 1.42, "grad_norm": 0.6430805325508118, "learning_rate": 1.1351943543373023e-05, "loss": 0.0727, "step": 8487 }, { "epoch": 1.42, "grad_norm": 0.4631800651550293, "learning_rate": 1.1350154378910607e-05, "loss": 0.0608, "step": 8488 }, { "epoch": 1.42, "grad_norm": 0.5521069169044495, "learning_rate": 1.1348365170424695e-05, "loss": 0.069, "step": 8489 }, { "epoch": 1.42, "grad_norm": 0.5076723694801331, "learning_rate": 1.134657591797363e-05, "loss": 0.0574, "step": 8490 }, { "epoch": 1.42, "grad_norm": 0.5046278238296509, "learning_rate": 1.1344786621615747e-05, "loss": 0.06, "step": 8491 }, { "epoch": 1.42, "grad_norm": 0.4256805181503296, "learning_rate": 1.1342997281409393e-05, "loss": 0.0451, "step": 8492 }, { "epoch": 1.42, "grad_norm": 0.4475812613964081, "learning_rate": 1.1341207897412908e-05, "loss": 0.0502, "step": 8493 }, { "epoch": 1.42, "grad_norm": 0.5516064167022705, "learning_rate": 1.1339418469684644e-05, "loss": 0.0568, "step": 8494 }, { "epoch": 1.42, "grad_norm": 0.41840478777885437, "learning_rate": 1.1337628998282936e-05, "loss": 0.0312, "step": 8495 }, { "epoch": 1.42, "grad_norm": 0.9776256680488586, "learning_rate": 1.1335839483266143e-05, "loss": 0.0614, "step": 8496 }, { "epoch": 1.42, "grad_norm": 0.4787636399269104, "learning_rate": 1.133404992469261e-05, "loss": 0.0561, "step": 8497 }, { "epoch": 1.42, "grad_norm": 0.37953823804855347, "learning_rate": 1.1332260322620688e-05, "loss": 0.0516, "step": 8498 }, { "epoch": 1.42, "grad_norm": 0.44390031695365906, "learning_rate": 1.1330470677108725e-05, "loss": 0.0683, "step": 8499 }, { "epoch": 1.42, "grad_norm": 0.5175818800926208, "learning_rate": 1.1328680988215082e-05, "loss": 0.079, "step": 8500 }, { "epoch": 1.42, "grad_norm": 0.7217516303062439, "learning_rate": 1.1326891255998114e-05, "loss": 0.051, "step": 8501 }, { "epoch": 1.42, "grad_norm": 0.5659946203231812, "learning_rate": 1.1325101480516169e-05, "loss": 0.0791, "step": 8502 }, { "epoch": 1.42, "grad_norm": 0.725080132484436, "learning_rate": 1.1323311661827612e-05, "loss": 0.0631, "step": 8503 }, { "epoch": 1.42, "grad_norm": 0.501294732093811, "learning_rate": 1.13215217999908e-05, "loss": 0.0673, "step": 8504 }, { "epoch": 1.42, "grad_norm": 0.7880034446716309, "learning_rate": 1.1319731895064097e-05, "loss": 0.0663, "step": 8505 }, { "epoch": 1.42, "grad_norm": 0.4225107729434967, "learning_rate": 1.131794194710586e-05, "loss": 0.0538, "step": 8506 }, { "epoch": 1.42, "grad_norm": 0.5299925208091736, "learning_rate": 1.1316151956174455e-05, "loss": 0.0688, "step": 8507 }, { "epoch": 1.42, "grad_norm": 0.4214261770248413, "learning_rate": 1.1314361922328248e-05, "loss": 0.0528, "step": 8508 }, { "epoch": 1.42, "grad_norm": 0.6407500505447388, "learning_rate": 1.1312571845625603e-05, "loss": 0.089, "step": 8509 }, { "epoch": 1.42, "grad_norm": 0.4185516834259033, "learning_rate": 1.1310781726124891e-05, "loss": 0.0412, "step": 8510 }, { "epoch": 1.42, "grad_norm": 0.43915095925331116, "learning_rate": 1.1308991563884475e-05, "loss": 0.0623, "step": 8511 }, { "epoch": 1.42, "grad_norm": 0.6232606768608093, "learning_rate": 1.1307201358962734e-05, "loss": 0.0706, "step": 8512 }, { "epoch": 1.42, "grad_norm": 1.349120855331421, "learning_rate": 1.1305411111418031e-05, "loss": 0.0669, "step": 8513 }, { "epoch": 1.42, "grad_norm": 0.5262176394462585, "learning_rate": 1.1303620821308747e-05, "loss": 0.0529, "step": 8514 }, { "epoch": 1.42, "grad_norm": 0.32089048624038696, "learning_rate": 1.1301830488693253e-05, "loss": 0.0492, "step": 8515 }, { "epoch": 1.42, "grad_norm": 0.396761029958725, "learning_rate": 1.1300040113629924e-05, "loss": 0.0436, "step": 8516 }, { "epoch": 1.42, "grad_norm": 0.3922102153301239, "learning_rate": 1.1298249696177139e-05, "loss": 0.0438, "step": 8517 }, { "epoch": 1.42, "grad_norm": 0.43571555614471436, "learning_rate": 1.1296459236393278e-05, "loss": 0.0641, "step": 8518 }, { "epoch": 1.42, "grad_norm": 0.5228754878044128, "learning_rate": 1.129466873433672e-05, "loss": 0.0722, "step": 8519 }, { "epoch": 1.42, "grad_norm": 0.7746258974075317, "learning_rate": 1.1292878190065846e-05, "loss": 0.0662, "step": 8520 }, { "epoch": 1.43, "grad_norm": 0.42392498254776, "learning_rate": 1.129108760363904e-05, "loss": 0.0602, "step": 8521 }, { "epoch": 1.43, "grad_norm": 0.5871928930282593, "learning_rate": 1.1289296975114688e-05, "loss": 0.049, "step": 8522 }, { "epoch": 1.43, "grad_norm": 0.47438281774520874, "learning_rate": 1.1287506304551168e-05, "loss": 0.059, "step": 8523 }, { "epoch": 1.43, "grad_norm": 0.783552885055542, "learning_rate": 1.1285715592006874e-05, "loss": 0.0534, "step": 8524 }, { "epoch": 1.43, "grad_norm": 0.4259325861930847, "learning_rate": 1.1283924837540194e-05, "loss": 0.0588, "step": 8525 }, { "epoch": 1.43, "grad_norm": 0.5098695158958435, "learning_rate": 1.128213404120952e-05, "loss": 0.05, "step": 8526 }, { "epoch": 1.43, "grad_norm": 0.7230070233345032, "learning_rate": 1.1280343203073236e-05, "loss": 0.0824, "step": 8527 }, { "epoch": 1.43, "grad_norm": 0.41386494040489197, "learning_rate": 1.127855232318974e-05, "loss": 0.0596, "step": 8528 }, { "epoch": 1.43, "grad_norm": 0.5723603367805481, "learning_rate": 1.1276761401617426e-05, "loss": 0.0501, "step": 8529 }, { "epoch": 1.43, "grad_norm": 0.37821894884109497, "learning_rate": 1.1274970438414688e-05, "loss": 0.058, "step": 8530 }, { "epoch": 1.43, "grad_norm": 1.2023332118988037, "learning_rate": 1.1273179433639918e-05, "loss": 0.0691, "step": 8531 }, { "epoch": 1.43, "grad_norm": 0.6081916689872742, "learning_rate": 1.1271388387351523e-05, "loss": 0.0669, "step": 8532 }, { "epoch": 1.43, "grad_norm": 0.4625062346458435, "learning_rate": 1.1269597299607898e-05, "loss": 0.0368, "step": 8533 }, { "epoch": 1.43, "grad_norm": 0.47215569019317627, "learning_rate": 1.1267806170467441e-05, "loss": 0.0571, "step": 8534 }, { "epoch": 1.43, "grad_norm": 0.6177505850791931, "learning_rate": 1.1266014999988554e-05, "loss": 0.0685, "step": 8535 }, { "epoch": 1.43, "grad_norm": 0.6253588795661926, "learning_rate": 1.126422378822965e-05, "loss": 0.0542, "step": 8536 }, { "epoch": 1.43, "grad_norm": 0.5028029084205627, "learning_rate": 1.1262432535249122e-05, "loss": 0.061, "step": 8537 }, { "epoch": 1.43, "grad_norm": 0.5331460237503052, "learning_rate": 1.1260641241105381e-05, "loss": 0.0649, "step": 8538 }, { "epoch": 1.43, "grad_norm": 0.4612743556499481, "learning_rate": 1.1258849905856833e-05, "loss": 0.0846, "step": 8539 }, { "epoch": 1.43, "grad_norm": 0.7462925910949707, "learning_rate": 1.1257058529561891e-05, "loss": 0.0881, "step": 8540 }, { "epoch": 1.43, "grad_norm": 0.7882060408592224, "learning_rate": 1.125526711227896e-05, "loss": 0.0536, "step": 8541 }, { "epoch": 1.43, "grad_norm": 0.6061484813690186, "learning_rate": 1.1253475654066453e-05, "loss": 0.0705, "step": 8542 }, { "epoch": 1.43, "grad_norm": 0.4727845788002014, "learning_rate": 1.1251684154982783e-05, "loss": 0.05, "step": 8543 }, { "epoch": 1.43, "grad_norm": 0.572805643081665, "learning_rate": 1.1249892615086365e-05, "loss": 0.0531, "step": 8544 }, { "epoch": 1.43, "grad_norm": 0.607697069644928, "learning_rate": 1.1248101034435614e-05, "loss": 0.0629, "step": 8545 }, { "epoch": 1.43, "grad_norm": 0.3457000255584717, "learning_rate": 1.1246309413088945e-05, "loss": 0.0296, "step": 8546 }, { "epoch": 1.43, "grad_norm": 0.40430331230163574, "learning_rate": 1.1244517751104779e-05, "loss": 0.0546, "step": 8547 }, { "epoch": 1.43, "grad_norm": 0.667521595954895, "learning_rate": 1.1242726048541533e-05, "loss": 0.0555, "step": 8548 }, { "epoch": 1.43, "grad_norm": 0.41019728779792786, "learning_rate": 1.1240934305457629e-05, "loss": 0.07, "step": 8549 }, { "epoch": 1.43, "grad_norm": 0.5404382348060608, "learning_rate": 1.123914252191149e-05, "loss": 0.0511, "step": 8550 }, { "epoch": 1.43, "grad_norm": 0.6185874938964844, "learning_rate": 1.1237350697961535e-05, "loss": 0.0572, "step": 8551 }, { "epoch": 1.43, "grad_norm": 0.4830528497695923, "learning_rate": 1.1235558833666193e-05, "loss": 0.0674, "step": 8552 }, { "epoch": 1.43, "grad_norm": 0.4494953453540802, "learning_rate": 1.1233766929083892e-05, "loss": 0.0485, "step": 8553 }, { "epoch": 1.43, "grad_norm": 0.4571671485900879, "learning_rate": 1.1231974984273055e-05, "loss": 0.0622, "step": 8554 }, { "epoch": 1.43, "grad_norm": 0.37896183133125305, "learning_rate": 1.1230182999292107e-05, "loss": 0.0412, "step": 8555 }, { "epoch": 1.43, "grad_norm": 0.4124957025051117, "learning_rate": 1.1228390974199486e-05, "loss": 0.0506, "step": 8556 }, { "epoch": 1.43, "grad_norm": 0.7371788620948792, "learning_rate": 1.1226598909053623e-05, "loss": 0.0564, "step": 8557 }, { "epoch": 1.43, "grad_norm": 0.6315762400627136, "learning_rate": 1.1224806803912944e-05, "loss": 0.0539, "step": 8558 }, { "epoch": 1.43, "grad_norm": 0.6006453037261963, "learning_rate": 1.1223014658835885e-05, "loss": 0.0663, "step": 8559 }, { "epoch": 1.43, "grad_norm": 0.4286692142486572, "learning_rate": 1.1221222473880884e-05, "loss": 0.0447, "step": 8560 }, { "epoch": 1.43, "grad_norm": 0.6201434135437012, "learning_rate": 1.1219430249106378e-05, "loss": 0.0861, "step": 8561 }, { "epoch": 1.43, "grad_norm": 0.6784946322441101, "learning_rate": 1.1217637984570802e-05, "loss": 0.0403, "step": 8562 }, { "epoch": 1.43, "grad_norm": 0.6043176054954529, "learning_rate": 1.121584568033259e-05, "loss": 0.0578, "step": 8563 }, { "epoch": 1.43, "grad_norm": 0.6614445447921753, "learning_rate": 1.1214053336450197e-05, "loss": 0.0637, "step": 8564 }, { "epoch": 1.43, "grad_norm": 0.496764212846756, "learning_rate": 1.1212260952982051e-05, "loss": 0.04, "step": 8565 }, { "epoch": 1.43, "grad_norm": 0.5666031837463379, "learning_rate": 1.12104685299866e-05, "loss": 0.0907, "step": 8566 }, { "epoch": 1.43, "grad_norm": 0.5079988241195679, "learning_rate": 1.1208676067522286e-05, "loss": 0.0544, "step": 8567 }, { "epoch": 1.43, "grad_norm": 0.5452287197113037, "learning_rate": 1.1206883565647562e-05, "loss": 0.0673, "step": 8568 }, { "epoch": 1.43, "grad_norm": 0.4511188566684723, "learning_rate": 1.1205091024420864e-05, "loss": 0.0498, "step": 8569 }, { "epoch": 1.43, "grad_norm": 0.45661216974258423, "learning_rate": 1.1203298443900648e-05, "loss": 0.0609, "step": 8570 }, { "epoch": 1.43, "grad_norm": 0.6200494170188904, "learning_rate": 1.120150582414536e-05, "loss": 0.0641, "step": 8571 }, { "epoch": 1.43, "grad_norm": 0.3863900601863861, "learning_rate": 1.119971316521345e-05, "loss": 0.0512, "step": 8572 }, { "epoch": 1.43, "grad_norm": 0.41288959980010986, "learning_rate": 1.1197920467163375e-05, "loss": 0.0583, "step": 8573 }, { "epoch": 1.43, "grad_norm": 0.5457158088684082, "learning_rate": 1.119612773005358e-05, "loss": 0.0554, "step": 8574 }, { "epoch": 1.43, "grad_norm": 0.3552955687046051, "learning_rate": 1.1194334953942527e-05, "loss": 0.0555, "step": 8575 }, { "epoch": 1.43, "grad_norm": 0.48895734548568726, "learning_rate": 1.1192542138888666e-05, "loss": 0.0471, "step": 8576 }, { "epoch": 1.43, "grad_norm": 0.3362249433994293, "learning_rate": 1.1190749284950459e-05, "loss": 0.0509, "step": 8577 }, { "epoch": 1.43, "grad_norm": 0.7527135610580444, "learning_rate": 1.1188956392186363e-05, "loss": 0.0591, "step": 8578 }, { "epoch": 1.43, "grad_norm": 0.42420127987861633, "learning_rate": 1.1187163460654834e-05, "loss": 0.0587, "step": 8579 }, { "epoch": 1.44, "grad_norm": 0.714560329914093, "learning_rate": 1.1185370490414335e-05, "loss": 0.0643, "step": 8580 }, { "epoch": 1.44, "grad_norm": 0.3387232720851898, "learning_rate": 1.1183577481523329e-05, "loss": 0.044, "step": 8581 }, { "epoch": 1.44, "grad_norm": 0.6624349355697632, "learning_rate": 1.1181784434040282e-05, "loss": 0.059, "step": 8582 }, { "epoch": 1.44, "grad_norm": 0.44183841347694397, "learning_rate": 1.117999134802365e-05, "loss": 0.0631, "step": 8583 }, { "epoch": 1.44, "grad_norm": 0.39642906188964844, "learning_rate": 1.1178198223531904e-05, "loss": 0.0485, "step": 8584 }, { "epoch": 1.44, "grad_norm": 0.5191687941551208, "learning_rate": 1.1176405060623516e-05, "loss": 0.0712, "step": 8585 }, { "epoch": 1.44, "grad_norm": 0.5917227268218994, "learning_rate": 1.1174611859356948e-05, "loss": 0.0565, "step": 8586 }, { "epoch": 1.44, "grad_norm": 0.5000686049461365, "learning_rate": 1.1172818619790669e-05, "loss": 0.0588, "step": 8587 }, { "epoch": 1.44, "grad_norm": 0.383973628282547, "learning_rate": 1.1171025341983153e-05, "loss": 0.0467, "step": 8588 }, { "epoch": 1.44, "grad_norm": 0.4569745361804962, "learning_rate": 1.1169232025992874e-05, "loss": 0.0587, "step": 8589 }, { "epoch": 1.44, "grad_norm": 0.6045196056365967, "learning_rate": 1.1167438671878299e-05, "loss": 0.0659, "step": 8590 }, { "epoch": 1.44, "grad_norm": 0.8949030637741089, "learning_rate": 1.1165645279697903e-05, "loss": 0.0649, "step": 8591 }, { "epoch": 1.44, "grad_norm": 0.6215848326683044, "learning_rate": 1.1163851849510172e-05, "loss": 0.057, "step": 8592 }, { "epoch": 1.44, "grad_norm": 0.6288299560546875, "learning_rate": 1.1162058381373572e-05, "loss": 0.0665, "step": 8593 }, { "epoch": 1.44, "grad_norm": 0.4030887186527252, "learning_rate": 1.1160264875346584e-05, "loss": 0.0556, "step": 8594 }, { "epoch": 1.44, "grad_norm": 0.410274475812912, "learning_rate": 1.115847133148769e-05, "loss": 0.0644, "step": 8595 }, { "epoch": 1.44, "grad_norm": 0.6338700652122498, "learning_rate": 1.1156677749855371e-05, "loss": 0.0598, "step": 8596 }, { "epoch": 1.44, "grad_norm": 0.505921483039856, "learning_rate": 1.1154884130508107e-05, "loss": 0.0537, "step": 8597 }, { "epoch": 1.44, "grad_norm": 0.6471299529075623, "learning_rate": 1.1153090473504381e-05, "loss": 0.0479, "step": 8598 }, { "epoch": 1.44, "grad_norm": 0.4479062259197235, "learning_rate": 1.1151296778902679e-05, "loss": 0.0465, "step": 8599 }, { "epoch": 1.44, "grad_norm": 0.3050229549407959, "learning_rate": 1.1149503046761484e-05, "loss": 0.0435, "step": 8600 }, { "epoch": 1.44, "grad_norm": 0.4853416085243225, "learning_rate": 1.1147709277139287e-05, "loss": 0.0601, "step": 8601 }, { "epoch": 1.44, "grad_norm": 0.5010019540786743, "learning_rate": 1.1145915470094572e-05, "loss": 0.0523, "step": 8602 }, { "epoch": 1.44, "grad_norm": 0.5145965814590454, "learning_rate": 1.1144121625685833e-05, "loss": 0.061, "step": 8603 }, { "epoch": 1.44, "grad_norm": 0.6187939047813416, "learning_rate": 1.1142327743971557e-05, "loss": 0.0458, "step": 8604 }, { "epoch": 1.44, "grad_norm": 0.6866157054901123, "learning_rate": 1.1140533825010236e-05, "loss": 0.0558, "step": 8605 }, { "epoch": 1.44, "grad_norm": 0.47162926197052, "learning_rate": 1.1138739868860364e-05, "loss": 0.0671, "step": 8606 }, { "epoch": 1.44, "grad_norm": 0.5645763874053955, "learning_rate": 1.1136945875580434e-05, "loss": 0.06, "step": 8607 }, { "epoch": 1.44, "grad_norm": 0.442311555147171, "learning_rate": 1.1135151845228942e-05, "loss": 0.0598, "step": 8608 }, { "epoch": 1.44, "grad_norm": 0.6016161441802979, "learning_rate": 1.1133357777864386e-05, "loss": 0.0513, "step": 8609 }, { "epoch": 1.44, "grad_norm": 0.4789820611476898, "learning_rate": 1.1131563673545264e-05, "loss": 0.0615, "step": 8610 }, { "epoch": 1.44, "grad_norm": 0.5250211358070374, "learning_rate": 1.112976953233007e-05, "loss": 0.0451, "step": 8611 }, { "epoch": 1.44, "grad_norm": 0.49254393577575684, "learning_rate": 1.1127975354277311e-05, "loss": 0.0583, "step": 8612 }, { "epoch": 1.44, "grad_norm": 0.44359204173088074, "learning_rate": 1.1126181139445487e-05, "loss": 0.052, "step": 8613 }, { "epoch": 1.44, "grad_norm": 0.7423868179321289, "learning_rate": 1.1124386887893096e-05, "loss": 0.0536, "step": 8614 }, { "epoch": 1.44, "grad_norm": 0.4005304276943207, "learning_rate": 1.1122592599678646e-05, "loss": 0.0365, "step": 8615 }, { "epoch": 1.44, "grad_norm": 0.5426118969917297, "learning_rate": 1.112079827486064e-05, "loss": 0.0584, "step": 8616 }, { "epoch": 1.44, "grad_norm": 0.4183894395828247, "learning_rate": 1.1119003913497588e-05, "loss": 0.0615, "step": 8617 }, { "epoch": 1.44, "grad_norm": 0.47538599371910095, "learning_rate": 1.1117209515647994e-05, "loss": 0.0486, "step": 8618 }, { "epoch": 1.44, "grad_norm": 0.7839111685752869, "learning_rate": 1.1115415081370364e-05, "loss": 0.0538, "step": 8619 }, { "epoch": 1.44, "grad_norm": 0.4495302438735962, "learning_rate": 1.1113620610723218e-05, "loss": 0.0316, "step": 8620 }, { "epoch": 1.44, "grad_norm": 1.0747179985046387, "learning_rate": 1.1111826103765053e-05, "loss": 0.0587, "step": 8621 }, { "epoch": 1.44, "grad_norm": 0.45500436425209045, "learning_rate": 1.1110031560554394e-05, "loss": 0.0486, "step": 8622 }, { "epoch": 1.44, "grad_norm": 0.3956497013568878, "learning_rate": 1.1108236981149745e-05, "loss": 0.0556, "step": 8623 }, { "epoch": 1.44, "grad_norm": 0.5415120720863342, "learning_rate": 1.1106442365609624e-05, "loss": 0.0479, "step": 8624 }, { "epoch": 1.44, "grad_norm": 0.6268133521080017, "learning_rate": 1.1104647713992547e-05, "loss": 0.0708, "step": 8625 }, { "epoch": 1.44, "grad_norm": 0.4415498375892639, "learning_rate": 1.1102853026357033e-05, "loss": 0.054, "step": 8626 }, { "epoch": 1.44, "grad_norm": 0.547017514705658, "learning_rate": 1.1101058302761596e-05, "loss": 0.0706, "step": 8627 }, { "epoch": 1.44, "grad_norm": 0.5348935723304749, "learning_rate": 1.1099263543264758e-05, "loss": 0.0602, "step": 8628 }, { "epoch": 1.44, "grad_norm": 0.35887041687965393, "learning_rate": 1.1097468747925038e-05, "loss": 0.0397, "step": 8629 }, { "epoch": 1.44, "grad_norm": 0.42500147223472595, "learning_rate": 1.1095673916800958e-05, "loss": 0.037, "step": 8630 }, { "epoch": 1.44, "grad_norm": 0.3938947319984436, "learning_rate": 1.1093879049951042e-05, "loss": 0.0533, "step": 8631 }, { "epoch": 1.44, "grad_norm": 0.5399072766304016, "learning_rate": 1.1092084147433812e-05, "loss": 0.0601, "step": 8632 }, { "epoch": 1.44, "grad_norm": 0.47521787881851196, "learning_rate": 1.1090289209307794e-05, "loss": 0.0489, "step": 8633 }, { "epoch": 1.44, "grad_norm": 0.44352883100509644, "learning_rate": 1.1088494235631516e-05, "loss": 0.0454, "step": 8634 }, { "epoch": 1.44, "grad_norm": 0.778303861618042, "learning_rate": 1.1086699226463497e-05, "loss": 0.0691, "step": 8635 }, { "epoch": 1.44, "grad_norm": 0.8202863931655884, "learning_rate": 1.1084904181862277e-05, "loss": 0.0641, "step": 8636 }, { "epoch": 1.44, "grad_norm": 0.474489688873291, "learning_rate": 1.108310910188638e-05, "loss": 0.0535, "step": 8637 }, { "epoch": 1.44, "grad_norm": 0.5287292003631592, "learning_rate": 1.108131398659434e-05, "loss": 0.0455, "step": 8638 }, { "epoch": 1.44, "grad_norm": 0.4198032021522522, "learning_rate": 1.1079518836044682e-05, "loss": 0.0461, "step": 8639 }, { "epoch": 1.45, "grad_norm": 0.7580660581588745, "learning_rate": 1.1077723650295946e-05, "loss": 0.0738, "step": 8640 }, { "epoch": 1.45, "grad_norm": 0.5072652101516724, "learning_rate": 1.1075928429406666e-05, "loss": 0.0611, "step": 8641 }, { "epoch": 1.45, "grad_norm": 0.3932251036167145, "learning_rate": 1.1074133173435373e-05, "loss": 0.0425, "step": 8642 }, { "epoch": 1.45, "grad_norm": 0.3830731213092804, "learning_rate": 1.1072337882440605e-05, "loss": 0.0509, "step": 8643 }, { "epoch": 1.45, "grad_norm": 0.37951961159706116, "learning_rate": 1.10705425564809e-05, "loss": 0.0476, "step": 8644 }, { "epoch": 1.45, "grad_norm": 0.44914501905441284, "learning_rate": 1.1068747195614803e-05, "loss": 0.0371, "step": 8645 }, { "epoch": 1.45, "grad_norm": 0.4340413212776184, "learning_rate": 1.1066951799900846e-05, "loss": 0.0525, "step": 8646 }, { "epoch": 1.45, "grad_norm": 0.40839579701423645, "learning_rate": 1.1065156369397572e-05, "loss": 0.0414, "step": 8647 }, { "epoch": 1.45, "grad_norm": 0.4237518310546875, "learning_rate": 1.1063360904163528e-05, "loss": 0.0355, "step": 8648 }, { "epoch": 1.45, "grad_norm": 0.3651272654533386, "learning_rate": 1.1061565404257249e-05, "loss": 0.0426, "step": 8649 }, { "epoch": 1.45, "grad_norm": 0.5281249284744263, "learning_rate": 1.1059769869737285e-05, "loss": 0.0704, "step": 8650 }, { "epoch": 1.45, "grad_norm": 0.57901930809021, "learning_rate": 1.1057974300662182e-05, "loss": 0.0626, "step": 8651 }, { "epoch": 1.45, "grad_norm": 0.5097842216491699, "learning_rate": 1.1056178697090485e-05, "loss": 0.0546, "step": 8652 }, { "epoch": 1.45, "grad_norm": 0.6791254281997681, "learning_rate": 1.1054383059080742e-05, "loss": 0.0616, "step": 8653 }, { "epoch": 1.45, "grad_norm": 0.5628568530082703, "learning_rate": 1.1052587386691502e-05, "loss": 0.0737, "step": 8654 }, { "epoch": 1.45, "grad_norm": 0.5155136585235596, "learning_rate": 1.1050791679981319e-05, "loss": 0.0624, "step": 8655 }, { "epoch": 1.45, "grad_norm": 0.6997764706611633, "learning_rate": 1.1048995939008738e-05, "loss": 0.0488, "step": 8656 }, { "epoch": 1.45, "grad_norm": 0.5417163968086243, "learning_rate": 1.1047200163832315e-05, "loss": 0.0613, "step": 8657 }, { "epoch": 1.45, "grad_norm": 0.4961176812648773, "learning_rate": 1.1045404354510602e-05, "loss": 0.0695, "step": 8658 }, { "epoch": 1.45, "grad_norm": 0.4398749768733978, "learning_rate": 1.1043608511102158e-05, "loss": 0.0468, "step": 8659 }, { "epoch": 1.45, "grad_norm": 0.6026003360748291, "learning_rate": 1.1041812633665534e-05, "loss": 0.0723, "step": 8660 }, { "epoch": 1.45, "grad_norm": 0.4084489941596985, "learning_rate": 1.1040016722259288e-05, "loss": 0.0425, "step": 8661 }, { "epoch": 1.45, "grad_norm": 0.4275166094303131, "learning_rate": 1.1038220776941979e-05, "loss": 0.0555, "step": 8662 }, { "epoch": 1.45, "grad_norm": 0.5195651054382324, "learning_rate": 1.1036424797772163e-05, "loss": 0.034, "step": 8663 }, { "epoch": 1.45, "grad_norm": 0.498635858297348, "learning_rate": 1.1034628784808404e-05, "loss": 0.0561, "step": 8664 }, { "epoch": 1.45, "grad_norm": 0.4537792205810547, "learning_rate": 1.103283273810926e-05, "loss": 0.0448, "step": 8665 }, { "epoch": 1.45, "grad_norm": 0.47035324573516846, "learning_rate": 1.1031036657733301e-05, "loss": 0.0512, "step": 8666 }, { "epoch": 1.45, "grad_norm": 0.45432454347610474, "learning_rate": 1.1029240543739077e-05, "loss": 0.0429, "step": 8667 }, { "epoch": 1.45, "grad_norm": 0.5714919567108154, "learning_rate": 1.1027444396185162e-05, "loss": 0.0605, "step": 8668 }, { "epoch": 1.45, "grad_norm": 0.5936264395713806, "learning_rate": 1.1025648215130125e-05, "loss": 0.0612, "step": 8669 }, { "epoch": 1.45, "grad_norm": 0.5839818120002747, "learning_rate": 1.1023852000632525e-05, "loss": 0.0762, "step": 8670 }, { "epoch": 1.45, "grad_norm": 0.5030234456062317, "learning_rate": 1.1022055752750929e-05, "loss": 0.0639, "step": 8671 }, { "epoch": 1.45, "grad_norm": 0.7204459309577942, "learning_rate": 1.1020259471543912e-05, "loss": 0.0827, "step": 8672 }, { "epoch": 1.45, "grad_norm": 0.5708585977554321, "learning_rate": 1.1018463157070043e-05, "loss": 0.0585, "step": 8673 }, { "epoch": 1.45, "grad_norm": 0.47288811206817627, "learning_rate": 1.101666680938789e-05, "loss": 0.0502, "step": 8674 }, { "epoch": 1.45, "grad_norm": 0.7375094890594482, "learning_rate": 1.1014870428556026e-05, "loss": 0.0665, "step": 8675 }, { "epoch": 1.45, "grad_norm": 0.5585927963256836, "learning_rate": 1.1013074014633028e-05, "loss": 0.0562, "step": 8676 }, { "epoch": 1.45, "grad_norm": 0.46652230620384216, "learning_rate": 1.1011277567677462e-05, "loss": 0.0676, "step": 8677 }, { "epoch": 1.45, "grad_norm": 0.4097021520137787, "learning_rate": 1.1009481087747912e-05, "loss": 0.0452, "step": 8678 }, { "epoch": 1.45, "grad_norm": 0.4632185995578766, "learning_rate": 1.1007684574902953e-05, "loss": 0.0541, "step": 8679 }, { "epoch": 1.45, "grad_norm": 0.3864467144012451, "learning_rate": 1.1005888029201159e-05, "loss": 0.0454, "step": 8680 }, { "epoch": 1.45, "grad_norm": 0.4936130940914154, "learning_rate": 1.100409145070111e-05, "loss": 0.061, "step": 8681 }, { "epoch": 1.45, "grad_norm": 0.5374462008476257, "learning_rate": 1.1002294839461387e-05, "loss": 0.0592, "step": 8682 }, { "epoch": 1.45, "grad_norm": 0.4827526807785034, "learning_rate": 1.100049819554057e-05, "loss": 0.0525, "step": 8683 }, { "epoch": 1.45, "grad_norm": 0.5096871852874756, "learning_rate": 1.099870151899724e-05, "loss": 0.0624, "step": 8684 }, { "epoch": 1.45, "grad_norm": 0.49392253160476685, "learning_rate": 1.0996904809889984e-05, "loss": 0.0508, "step": 8685 }, { "epoch": 1.45, "grad_norm": 0.599960446357727, "learning_rate": 1.0995108068277379e-05, "loss": 0.0676, "step": 8686 }, { "epoch": 1.45, "grad_norm": 0.43951302766799927, "learning_rate": 1.0993311294218016e-05, "loss": 0.0533, "step": 8687 }, { "epoch": 1.45, "grad_norm": 0.31609711050987244, "learning_rate": 1.0991514487770478e-05, "loss": 0.0454, "step": 8688 }, { "epoch": 1.45, "grad_norm": 0.5124462246894836, "learning_rate": 1.0989717648993352e-05, "loss": 0.0694, "step": 8689 }, { "epoch": 1.45, "grad_norm": 0.4355957508087158, "learning_rate": 1.0987920777945232e-05, "loss": 0.0582, "step": 8690 }, { "epoch": 1.45, "grad_norm": 0.6317915320396423, "learning_rate": 1.0986123874684696e-05, "loss": 0.0736, "step": 8691 }, { "epoch": 1.45, "grad_norm": 0.5804389715194702, "learning_rate": 1.0984326939270345e-05, "loss": 0.061, "step": 8692 }, { "epoch": 1.45, "grad_norm": 0.5643161535263062, "learning_rate": 1.0982529971760764e-05, "loss": 0.0677, "step": 8693 }, { "epoch": 1.45, "grad_norm": 0.6890256404876709, "learning_rate": 1.0980732972214551e-05, "loss": 0.0576, "step": 8694 }, { "epoch": 1.45, "grad_norm": 0.4348710775375366, "learning_rate": 1.0978935940690292e-05, "loss": 0.0659, "step": 8695 }, { "epoch": 1.45, "grad_norm": 0.4093548059463501, "learning_rate": 1.0977138877246586e-05, "loss": 0.0578, "step": 8696 }, { "epoch": 1.45, "grad_norm": 0.8338037729263306, "learning_rate": 1.0975341781942033e-05, "loss": 0.0524, "step": 8697 }, { "epoch": 1.45, "grad_norm": 0.48161590099334717, "learning_rate": 1.0973544654835222e-05, "loss": 0.0553, "step": 8698 }, { "epoch": 1.45, "grad_norm": 0.5383610725402832, "learning_rate": 1.097174749598475e-05, "loss": 0.0576, "step": 8699 }, { "epoch": 1.46, "grad_norm": 0.556522786617279, "learning_rate": 1.0969950305449222e-05, "loss": 0.0497, "step": 8700 }, { "epoch": 1.46, "grad_norm": 0.47419992089271545, "learning_rate": 1.0968153083287236e-05, "loss": 0.0527, "step": 8701 }, { "epoch": 1.46, "grad_norm": 0.5034763813018799, "learning_rate": 1.096635582955739e-05, "loss": 0.0646, "step": 8702 }, { "epoch": 1.46, "grad_norm": 0.4155277907848358, "learning_rate": 1.0964558544318285e-05, "loss": 0.0489, "step": 8703 }, { "epoch": 1.46, "grad_norm": 0.41877827048301697, "learning_rate": 1.0962761227628527e-05, "loss": 0.0544, "step": 8704 }, { "epoch": 1.46, "grad_norm": 0.6846053600311279, "learning_rate": 1.0960963879546718e-05, "loss": 0.0613, "step": 8705 }, { "epoch": 1.46, "grad_norm": 0.5033707022666931, "learning_rate": 1.0959166500131465e-05, "loss": 0.0529, "step": 8706 }, { "epoch": 1.46, "grad_norm": 0.4487583339214325, "learning_rate": 1.095736908944137e-05, "loss": 0.0525, "step": 8707 }, { "epoch": 1.46, "grad_norm": 0.5802544951438904, "learning_rate": 1.0955571647535044e-05, "loss": 0.0833, "step": 8708 }, { "epoch": 1.46, "grad_norm": 0.5545439720153809, "learning_rate": 1.0953774174471092e-05, "loss": 0.0617, "step": 8709 }, { "epoch": 1.46, "grad_norm": 0.38949164748191833, "learning_rate": 1.0951976670308124e-05, "loss": 0.0534, "step": 8710 }, { "epoch": 1.46, "grad_norm": 0.49526432156562805, "learning_rate": 1.095017913510475e-05, "loss": 0.0684, "step": 8711 }, { "epoch": 1.46, "grad_norm": 0.4689379930496216, "learning_rate": 1.0948381568919582e-05, "loss": 0.0367, "step": 8712 }, { "epoch": 1.46, "grad_norm": 0.36031606793403625, "learning_rate": 1.0946583971811229e-05, "loss": 0.0492, "step": 8713 }, { "epoch": 1.46, "grad_norm": 0.5135157704353333, "learning_rate": 1.0944786343838306e-05, "loss": 0.0659, "step": 8714 }, { "epoch": 1.46, "grad_norm": 0.5498506426811218, "learning_rate": 1.0942988685059423e-05, "loss": 0.0573, "step": 8715 }, { "epoch": 1.46, "grad_norm": 0.49460357427597046, "learning_rate": 1.0941190995533204e-05, "loss": 0.0512, "step": 8716 }, { "epoch": 1.46, "grad_norm": 0.4578867554664612, "learning_rate": 1.0939393275318256e-05, "loss": 0.0522, "step": 8717 }, { "epoch": 1.46, "grad_norm": 0.32428330183029175, "learning_rate": 1.0937595524473202e-05, "loss": 0.0403, "step": 8718 }, { "epoch": 1.46, "grad_norm": 0.504311203956604, "learning_rate": 1.0935797743056652e-05, "loss": 0.054, "step": 8719 }, { "epoch": 1.46, "grad_norm": 0.5380313396453857, "learning_rate": 1.0933999931127235e-05, "loss": 0.0694, "step": 8720 }, { "epoch": 1.46, "grad_norm": 0.6196826100349426, "learning_rate": 1.0932202088743565e-05, "loss": 0.0728, "step": 8721 }, { "epoch": 1.46, "grad_norm": 0.41314706206321716, "learning_rate": 1.0930404215964266e-05, "loss": 0.0599, "step": 8722 }, { "epoch": 1.46, "grad_norm": 0.39283186197280884, "learning_rate": 1.0928606312847954e-05, "loss": 0.0418, "step": 8723 }, { "epoch": 1.46, "grad_norm": 0.5177405476570129, "learning_rate": 1.0926808379453258e-05, "loss": 0.0583, "step": 8724 }, { "epoch": 1.46, "grad_norm": 0.3335961401462555, "learning_rate": 1.09250104158388e-05, "loss": 0.0477, "step": 8725 }, { "epoch": 1.46, "grad_norm": 0.4819789528846741, "learning_rate": 1.0923212422063202e-05, "loss": 0.0472, "step": 8726 }, { "epoch": 1.46, "grad_norm": 0.3735417127609253, "learning_rate": 1.0921414398185098e-05, "loss": 0.0533, "step": 8727 }, { "epoch": 1.46, "grad_norm": 0.45424124598503113, "learning_rate": 1.0919616344263102e-05, "loss": 0.0831, "step": 8728 }, { "epoch": 1.46, "grad_norm": 0.5203502178192139, "learning_rate": 1.0917818260355854e-05, "loss": 0.0523, "step": 8729 }, { "epoch": 1.46, "grad_norm": 0.6677400469779968, "learning_rate": 1.091602014652198e-05, "loss": 0.0591, "step": 8730 }, { "epoch": 1.46, "grad_norm": 0.43713462352752686, "learning_rate": 1.0914222002820103e-05, "loss": 0.0564, "step": 8731 }, { "epoch": 1.46, "grad_norm": 0.5205512642860413, "learning_rate": 1.091242382930886e-05, "loss": 0.0526, "step": 8732 }, { "epoch": 1.46, "grad_norm": 0.4144623279571533, "learning_rate": 1.0910625626046883e-05, "loss": 0.0393, "step": 8733 }, { "epoch": 1.46, "grad_norm": 0.5938518643379211, "learning_rate": 1.0908827393092802e-05, "loss": 0.0539, "step": 8734 }, { "epoch": 1.46, "grad_norm": 0.4609096050262451, "learning_rate": 1.0907029130505251e-05, "loss": 0.0729, "step": 8735 }, { "epoch": 1.46, "grad_norm": 0.5110997557640076, "learning_rate": 1.0905230838342864e-05, "loss": 0.0436, "step": 8736 }, { "epoch": 1.46, "grad_norm": 0.36238399147987366, "learning_rate": 1.090343251666428e-05, "loss": 0.0535, "step": 8737 }, { "epoch": 1.46, "grad_norm": 0.6682121157646179, "learning_rate": 1.0901634165528134e-05, "loss": 0.0601, "step": 8738 }, { "epoch": 1.46, "grad_norm": 0.4004818797111511, "learning_rate": 1.089983578499306e-05, "loss": 0.0564, "step": 8739 }, { "epoch": 1.46, "grad_norm": 0.4430740475654602, "learning_rate": 1.0898037375117703e-05, "loss": 0.0519, "step": 8740 }, { "epoch": 1.46, "grad_norm": 0.4707989990711212, "learning_rate": 1.0896238935960697e-05, "loss": 0.0436, "step": 8741 }, { "epoch": 1.46, "grad_norm": 0.4844517111778259, "learning_rate": 1.0894440467580684e-05, "loss": 0.062, "step": 8742 }, { "epoch": 1.46, "grad_norm": 0.4295162260532379, "learning_rate": 1.0892641970036306e-05, "loss": 0.0602, "step": 8743 }, { "epoch": 1.46, "grad_norm": 0.6737536191940308, "learning_rate": 1.0890843443386205e-05, "loss": 0.0503, "step": 8744 }, { "epoch": 1.46, "grad_norm": 0.5946932435035706, "learning_rate": 1.0889044887689025e-05, "loss": 0.0575, "step": 8745 }, { "epoch": 1.46, "grad_norm": 0.49870893359184265, "learning_rate": 1.088724630300341e-05, "loss": 0.0492, "step": 8746 }, { "epoch": 1.46, "grad_norm": 0.5647797584533691, "learning_rate": 1.0885447689388001e-05, "loss": 0.0538, "step": 8747 }, { "epoch": 1.46, "grad_norm": 0.5116159319877625, "learning_rate": 1.0883649046901449e-05, "loss": 0.0554, "step": 8748 }, { "epoch": 1.46, "grad_norm": 0.5532553195953369, "learning_rate": 1.0881850375602398e-05, "loss": 0.0554, "step": 8749 }, { "epoch": 1.46, "grad_norm": 0.4999710023403168, "learning_rate": 1.0880051675549501e-05, "loss": 0.0673, "step": 8750 }, { "epoch": 1.46, "grad_norm": 0.5771549940109253, "learning_rate": 1.08782529468014e-05, "loss": 0.0692, "step": 8751 }, { "epoch": 1.46, "grad_norm": 0.5033147931098938, "learning_rate": 1.087645418941675e-05, "loss": 0.0539, "step": 8752 }, { "epoch": 1.46, "grad_norm": 0.4522938132286072, "learning_rate": 1.0874655403454201e-05, "loss": 0.0497, "step": 8753 }, { "epoch": 1.46, "grad_norm": 0.4245832562446594, "learning_rate": 1.0872856588972402e-05, "loss": 0.0582, "step": 8754 }, { "epoch": 1.46, "grad_norm": 0.4161209464073181, "learning_rate": 1.0871057746030008e-05, "loss": 0.0541, "step": 8755 }, { "epoch": 1.46, "grad_norm": 0.5879026055335999, "learning_rate": 1.086925887468567e-05, "loss": 0.064, "step": 8756 }, { "epoch": 1.46, "grad_norm": 0.417708158493042, "learning_rate": 1.0867459974998047e-05, "loss": 0.0418, "step": 8757 }, { "epoch": 1.46, "grad_norm": 0.3377462923526764, "learning_rate": 1.086566104702579e-05, "loss": 0.0413, "step": 8758 }, { "epoch": 1.46, "grad_norm": 1.6133917570114136, "learning_rate": 1.0863862090827559e-05, "loss": 0.0662, "step": 8759 }, { "epoch": 1.47, "grad_norm": 0.4521791934967041, "learning_rate": 1.0862063106462009e-05, "loss": 0.0535, "step": 8760 }, { "epoch": 1.47, "grad_norm": 0.5210585594177246, "learning_rate": 1.0860264093987797e-05, "loss": 0.0711, "step": 8761 }, { "epoch": 1.47, "grad_norm": 0.49973514676094055, "learning_rate": 1.0858465053463585e-05, "loss": 0.0561, "step": 8762 }, { "epoch": 1.47, "grad_norm": 0.4150986671447754, "learning_rate": 1.0856665984948029e-05, "loss": 0.0403, "step": 8763 }, { "epoch": 1.47, "grad_norm": 0.37883737683296204, "learning_rate": 1.0854866888499796e-05, "loss": 0.0556, "step": 8764 }, { "epoch": 1.47, "grad_norm": 0.3622884154319763, "learning_rate": 1.085306776417754e-05, "loss": 0.0384, "step": 8765 }, { "epoch": 1.47, "grad_norm": 0.40460097789764404, "learning_rate": 1.085126861203993e-05, "loss": 0.063, "step": 8766 }, { "epoch": 1.47, "grad_norm": 0.5126140117645264, "learning_rate": 1.0849469432145629e-05, "loss": 0.0629, "step": 8767 }, { "epoch": 1.47, "grad_norm": 0.5483717918395996, "learning_rate": 1.0847670224553299e-05, "loss": 0.0586, "step": 8768 }, { "epoch": 1.47, "grad_norm": 0.4362376034259796, "learning_rate": 1.0845870989321608e-05, "loss": 0.0501, "step": 8769 }, { "epoch": 1.47, "grad_norm": 0.4882054030895233, "learning_rate": 1.084407172650922e-05, "loss": 0.0775, "step": 8770 }, { "epoch": 1.47, "grad_norm": 0.4939269721508026, "learning_rate": 1.0842272436174803e-05, "loss": 0.0556, "step": 8771 }, { "epoch": 1.47, "grad_norm": 0.6482648849487305, "learning_rate": 1.0840473118377027e-05, "loss": 0.0686, "step": 8772 }, { "epoch": 1.47, "grad_norm": 0.4869150221347809, "learning_rate": 1.0838673773174557e-05, "loss": 0.0558, "step": 8773 }, { "epoch": 1.47, "grad_norm": 0.49908706545829773, "learning_rate": 1.0836874400626069e-05, "loss": 0.0556, "step": 8774 }, { "epoch": 1.47, "grad_norm": 0.6194527745246887, "learning_rate": 1.0835075000790225e-05, "loss": 0.0517, "step": 8775 }, { "epoch": 1.47, "grad_norm": 0.4078448712825775, "learning_rate": 1.0833275573725705e-05, "loss": 0.0573, "step": 8776 }, { "epoch": 1.47, "grad_norm": 0.6546052098274231, "learning_rate": 1.0831476119491177e-05, "loss": 0.0461, "step": 8777 }, { "epoch": 1.47, "grad_norm": 0.47858208417892456, "learning_rate": 1.0829676638145319e-05, "loss": 0.0665, "step": 8778 }, { "epoch": 1.47, "grad_norm": 0.538181722164154, "learning_rate": 1.08278771297468e-05, "loss": 0.0705, "step": 8779 }, { "epoch": 1.47, "grad_norm": 0.43255189061164856, "learning_rate": 1.0826077594354296e-05, "loss": 0.0568, "step": 8780 }, { "epoch": 1.47, "grad_norm": 0.456794798374176, "learning_rate": 1.0824278032026489e-05, "loss": 0.0474, "step": 8781 }, { "epoch": 1.47, "grad_norm": 0.4192041754722595, "learning_rate": 1.082247844282205e-05, "loss": 0.0436, "step": 8782 }, { "epoch": 1.47, "grad_norm": 0.5017374157905579, "learning_rate": 1.0820678826799657e-05, "loss": 0.0449, "step": 8783 }, { "epoch": 1.47, "grad_norm": 0.4414384365081787, "learning_rate": 1.0818879184017989e-05, "loss": 0.0561, "step": 8784 }, { "epoch": 1.47, "grad_norm": 0.4262790083885193, "learning_rate": 1.0817079514535733e-05, "loss": 0.0312, "step": 8785 }, { "epoch": 1.47, "grad_norm": 0.4616053104400635, "learning_rate": 1.0815279818411559e-05, "loss": 0.0559, "step": 8786 }, { "epoch": 1.47, "grad_norm": 0.45426425337791443, "learning_rate": 1.0813480095704155e-05, "loss": 0.0607, "step": 8787 }, { "epoch": 1.47, "grad_norm": 0.5374237298965454, "learning_rate": 1.08116803464722e-05, "loss": 0.0551, "step": 8788 }, { "epoch": 1.47, "grad_norm": 0.43423986434936523, "learning_rate": 1.0809880570774378e-05, "loss": 0.0434, "step": 8789 }, { "epoch": 1.47, "grad_norm": 0.4480728805065155, "learning_rate": 1.0808080768669373e-05, "loss": 0.0564, "step": 8790 }, { "epoch": 1.47, "grad_norm": 0.635328471660614, "learning_rate": 1.0806280940215869e-05, "loss": 0.0702, "step": 8791 }, { "epoch": 1.47, "grad_norm": 0.4769193232059479, "learning_rate": 1.0804481085472556e-05, "loss": 0.0574, "step": 8792 }, { "epoch": 1.47, "grad_norm": 0.4474189579486847, "learning_rate": 1.0802681204498116e-05, "loss": 0.044, "step": 8793 }, { "epoch": 1.47, "grad_norm": 0.4380635619163513, "learning_rate": 1.0800881297351236e-05, "loss": 0.0404, "step": 8794 }, { "epoch": 1.47, "grad_norm": 0.5357339978218079, "learning_rate": 1.0799081364090607e-05, "loss": 0.0548, "step": 8795 }, { "epoch": 1.47, "grad_norm": 0.49915462732315063, "learning_rate": 1.0797281404774918e-05, "loss": 0.0617, "step": 8796 }, { "epoch": 1.47, "grad_norm": 0.5865271687507629, "learning_rate": 1.0795481419462857e-05, "loss": 0.0573, "step": 8797 }, { "epoch": 1.47, "grad_norm": 0.6423400044441223, "learning_rate": 1.0793681408213117e-05, "loss": 0.0635, "step": 8798 }, { "epoch": 1.47, "grad_norm": 0.5197361707687378, "learning_rate": 1.0791881371084388e-05, "loss": 0.0668, "step": 8799 }, { "epoch": 1.47, "grad_norm": 0.4541756510734558, "learning_rate": 1.079008130813536e-05, "loss": 0.0593, "step": 8800 }, { "epoch": 1.47, "grad_norm": 0.30759134888648987, "learning_rate": 1.0788281219424731e-05, "loss": 0.0593, "step": 8801 }, { "epoch": 1.47, "grad_norm": 0.48654288053512573, "learning_rate": 1.0786481105011196e-05, "loss": 0.0582, "step": 8802 }, { "epoch": 1.47, "grad_norm": 0.5094423294067383, "learning_rate": 1.0784680964953444e-05, "loss": 0.0725, "step": 8803 }, { "epoch": 1.47, "grad_norm": 0.5492438673973083, "learning_rate": 1.0782880799310178e-05, "loss": 0.0609, "step": 8804 }, { "epoch": 1.47, "grad_norm": 0.412130206823349, "learning_rate": 1.0781080608140089e-05, "loss": 0.0374, "step": 8805 }, { "epoch": 1.47, "grad_norm": 0.7680668830871582, "learning_rate": 1.0779280391501878e-05, "loss": 0.0669, "step": 8806 }, { "epoch": 1.47, "grad_norm": 0.9364926815032959, "learning_rate": 1.077748014945424e-05, "loss": 0.0677, "step": 8807 }, { "epoch": 1.47, "grad_norm": 0.37999847531318665, "learning_rate": 1.0775679882055874e-05, "loss": 0.038, "step": 8808 }, { "epoch": 1.47, "grad_norm": 0.4682212769985199, "learning_rate": 1.0773879589365489e-05, "loss": 0.0537, "step": 8809 }, { "epoch": 1.47, "grad_norm": 0.5151818990707397, "learning_rate": 1.0772079271441771e-05, "loss": 0.0548, "step": 8810 }, { "epoch": 1.47, "grad_norm": 0.6086146831512451, "learning_rate": 1.0770278928343434e-05, "loss": 0.0674, "step": 8811 }, { "epoch": 1.47, "grad_norm": 0.44858071208000183, "learning_rate": 1.0768478560129171e-05, "loss": 0.0609, "step": 8812 }, { "epoch": 1.47, "grad_norm": 0.6291012763977051, "learning_rate": 1.0766678166857697e-05, "loss": 0.0552, "step": 8813 }, { "epoch": 1.47, "grad_norm": 0.5112938284873962, "learning_rate": 1.0764877748587708e-05, "loss": 0.0577, "step": 8814 }, { "epoch": 1.47, "grad_norm": 0.41348299384117126, "learning_rate": 1.0763077305377908e-05, "loss": 0.0511, "step": 8815 }, { "epoch": 1.47, "grad_norm": 0.5921801328659058, "learning_rate": 1.0761276837287004e-05, "loss": 0.0642, "step": 8816 }, { "epoch": 1.47, "grad_norm": 0.6264888644218445, "learning_rate": 1.0759476344373707e-05, "loss": 0.0639, "step": 8817 }, { "epoch": 1.47, "grad_norm": 0.5515425205230713, "learning_rate": 1.0757675826696718e-05, "loss": 0.0568, "step": 8818 }, { "epoch": 1.47, "grad_norm": 0.4832203686237335, "learning_rate": 1.075587528431475e-05, "loss": 0.0522, "step": 8819 }, { "epoch": 1.48, "grad_norm": 0.4473632276058197, "learning_rate": 1.0754074717286509e-05, "loss": 0.0592, "step": 8820 }, { "epoch": 1.48, "grad_norm": 0.5119449496269226, "learning_rate": 1.0752274125670708e-05, "loss": 0.0585, "step": 8821 }, { "epoch": 1.48, "grad_norm": 0.7995581030845642, "learning_rate": 1.0750473509526052e-05, "loss": 0.0515, "step": 8822 }, { "epoch": 1.48, "grad_norm": 0.5984193682670593, "learning_rate": 1.0748672868911259e-05, "loss": 0.0528, "step": 8823 }, { "epoch": 1.48, "grad_norm": 0.331707239151001, "learning_rate": 1.0746872203885036e-05, "loss": 0.0469, "step": 8824 }, { "epoch": 1.48, "grad_norm": 0.4981057941913605, "learning_rate": 1.0745071514506095e-05, "loss": 0.0414, "step": 8825 }, { "epoch": 1.48, "grad_norm": 0.5598576068878174, "learning_rate": 1.0743270800833159e-05, "loss": 0.0614, "step": 8826 }, { "epoch": 1.48, "grad_norm": 0.6247496604919434, "learning_rate": 1.0741470062924932e-05, "loss": 0.0656, "step": 8827 }, { "epoch": 1.48, "grad_norm": 0.5694398283958435, "learning_rate": 1.0739669300840135e-05, "loss": 0.0529, "step": 8828 }, { "epoch": 1.48, "grad_norm": 0.5368900299072266, "learning_rate": 1.0737868514637481e-05, "loss": 0.055, "step": 8829 }, { "epoch": 1.48, "grad_norm": 0.45958155393600464, "learning_rate": 1.0736067704375692e-05, "loss": 0.0472, "step": 8830 }, { "epoch": 1.48, "grad_norm": 0.39280423521995544, "learning_rate": 1.0734266870113479e-05, "loss": 0.0421, "step": 8831 }, { "epoch": 1.48, "grad_norm": 0.4395327866077423, "learning_rate": 1.0732466011909564e-05, "loss": 0.0516, "step": 8832 }, { "epoch": 1.48, "grad_norm": 0.6455875635147095, "learning_rate": 1.0730665129822668e-05, "loss": 0.0563, "step": 8833 }, { "epoch": 1.48, "grad_norm": 0.5050426125526428, "learning_rate": 1.0728864223911511e-05, "loss": 0.0512, "step": 8834 }, { "epoch": 1.48, "grad_norm": 0.4226098954677582, "learning_rate": 1.0727063294234807e-05, "loss": 0.0616, "step": 8835 }, { "epoch": 1.48, "grad_norm": 0.4291001856327057, "learning_rate": 1.0725262340851284e-05, "loss": 0.0506, "step": 8836 }, { "epoch": 1.48, "grad_norm": 0.6138811707496643, "learning_rate": 1.0723461363819665e-05, "loss": 0.0428, "step": 8837 }, { "epoch": 1.48, "grad_norm": 0.39034780859947205, "learning_rate": 1.072166036319867e-05, "loss": 0.0392, "step": 8838 }, { "epoch": 1.48, "grad_norm": 0.5369425415992737, "learning_rate": 1.0719859339047023e-05, "loss": 0.0509, "step": 8839 }, { "epoch": 1.48, "grad_norm": 0.43629494309425354, "learning_rate": 1.0718058291423447e-05, "loss": 0.0404, "step": 8840 }, { "epoch": 1.48, "grad_norm": 0.7850555777549744, "learning_rate": 1.0716257220386677e-05, "loss": 0.0616, "step": 8841 }, { "epoch": 1.48, "grad_norm": 0.5197024941444397, "learning_rate": 1.0714456125995428e-05, "loss": 0.0512, "step": 8842 }, { "epoch": 1.48, "grad_norm": 0.5878269076347351, "learning_rate": 1.0712655008308433e-05, "loss": 0.063, "step": 8843 }, { "epoch": 1.48, "grad_norm": 0.48223865032196045, "learning_rate": 1.0710853867384414e-05, "loss": 0.0508, "step": 8844 }, { "epoch": 1.48, "grad_norm": 0.5741778016090393, "learning_rate": 1.0709052703282107e-05, "loss": 0.0512, "step": 8845 }, { "epoch": 1.48, "grad_norm": 0.7123188376426697, "learning_rate": 1.0707251516060238e-05, "loss": 0.0678, "step": 8846 }, { "epoch": 1.48, "grad_norm": 0.5419285893440247, "learning_rate": 1.0705450305777535e-05, "loss": 0.0605, "step": 8847 }, { "epoch": 1.48, "grad_norm": 0.4864952862262726, "learning_rate": 1.070364907249273e-05, "loss": 0.0492, "step": 8848 }, { "epoch": 1.48, "grad_norm": 0.42464184761047363, "learning_rate": 1.0701847816264555e-05, "loss": 0.0492, "step": 8849 }, { "epoch": 1.48, "grad_norm": 0.6274709105491638, "learning_rate": 1.0700046537151742e-05, "loss": 0.0818, "step": 8850 }, { "epoch": 1.48, "grad_norm": 0.45998361706733704, "learning_rate": 1.0698245235213025e-05, "loss": 0.054, "step": 8851 }, { "epoch": 1.48, "grad_norm": 0.5523056983947754, "learning_rate": 1.0696443910507137e-05, "loss": 0.0593, "step": 8852 }, { "epoch": 1.48, "grad_norm": 0.562841534614563, "learning_rate": 1.0694642563092812e-05, "loss": 0.0573, "step": 8853 }, { "epoch": 1.48, "grad_norm": 0.6556398868560791, "learning_rate": 1.0692841193028787e-05, "loss": 0.0386, "step": 8854 }, { "epoch": 1.48, "grad_norm": 0.5749450922012329, "learning_rate": 1.0691039800373795e-05, "loss": 0.0755, "step": 8855 }, { "epoch": 1.48, "grad_norm": 0.623073935508728, "learning_rate": 1.0689238385186574e-05, "loss": 0.0728, "step": 8856 }, { "epoch": 1.48, "grad_norm": 0.42912209033966064, "learning_rate": 1.0687436947525862e-05, "loss": 0.0454, "step": 8857 }, { "epoch": 1.48, "grad_norm": 0.41968029737472534, "learning_rate": 1.0685635487450399e-05, "loss": 0.0401, "step": 8858 }, { "epoch": 1.48, "grad_norm": 0.4646010398864746, "learning_rate": 1.068383400501892e-05, "loss": 0.0639, "step": 8859 }, { "epoch": 1.48, "grad_norm": 0.5882210731506348, "learning_rate": 1.0682032500290162e-05, "loss": 0.0442, "step": 8860 }, { "epoch": 1.48, "grad_norm": 0.5462551712989807, "learning_rate": 1.0680230973322873e-05, "loss": 0.0662, "step": 8861 }, { "epoch": 1.48, "grad_norm": 0.40345892310142517, "learning_rate": 1.0678429424175796e-05, "loss": 0.0579, "step": 8862 }, { "epoch": 1.48, "grad_norm": 0.5805839896202087, "learning_rate": 1.0676627852907662e-05, "loss": 0.0486, "step": 8863 }, { "epoch": 1.48, "grad_norm": 0.7001888751983643, "learning_rate": 1.0674826259577219e-05, "loss": 0.0628, "step": 8864 }, { "epoch": 1.48, "grad_norm": 0.5429767966270447, "learning_rate": 1.067302464424321e-05, "loss": 0.0811, "step": 8865 }, { "epoch": 1.48, "grad_norm": 0.43897542357444763, "learning_rate": 1.0671223006964381e-05, "loss": 0.0399, "step": 8866 }, { "epoch": 1.48, "grad_norm": 0.49870944023132324, "learning_rate": 1.0669421347799474e-05, "loss": 0.064, "step": 8867 }, { "epoch": 1.48, "grad_norm": 0.3417198061943054, "learning_rate": 1.0667619666807234e-05, "loss": 0.0543, "step": 8868 }, { "epoch": 1.48, "grad_norm": 0.6475801467895508, "learning_rate": 1.0665817964046412e-05, "loss": 0.0841, "step": 8869 }, { "epoch": 1.48, "grad_norm": 0.42777255177497864, "learning_rate": 1.0664016239575748e-05, "loss": 0.055, "step": 8870 }, { "epoch": 1.48, "grad_norm": 0.4572276771068573, "learning_rate": 1.0662214493453993e-05, "loss": 0.0578, "step": 8871 }, { "epoch": 1.48, "grad_norm": 0.5374535322189331, "learning_rate": 1.0660412725739898e-05, "loss": 0.0563, "step": 8872 }, { "epoch": 1.48, "grad_norm": 0.4607873558998108, "learning_rate": 1.0658610936492204e-05, "loss": 0.0533, "step": 8873 }, { "epoch": 1.48, "grad_norm": 0.42908620834350586, "learning_rate": 1.0656809125769667e-05, "loss": 0.0602, "step": 8874 }, { "epoch": 1.48, "grad_norm": 0.4992946982383728, "learning_rate": 1.0655007293631037e-05, "loss": 0.0648, "step": 8875 }, { "epoch": 1.48, "grad_norm": 0.5164917707443237, "learning_rate": 1.0653205440135064e-05, "loss": 0.0796, "step": 8876 }, { "epoch": 1.48, "grad_norm": 0.3276183009147644, "learning_rate": 1.0651403565340498e-05, "loss": 0.0502, "step": 8877 }, { "epoch": 1.48, "grad_norm": 0.758143961429596, "learning_rate": 1.0649601669306094e-05, "loss": 0.0541, "step": 8878 }, { "epoch": 1.49, "grad_norm": 0.4428302049636841, "learning_rate": 1.0647799752090604e-05, "loss": 0.0348, "step": 8879 }, { "epoch": 1.49, "grad_norm": 0.5912840366363525, "learning_rate": 1.064599781375278e-05, "loss": 0.0633, "step": 8880 }, { "epoch": 1.49, "grad_norm": 0.4440383017063141, "learning_rate": 1.064419585435138e-05, "loss": 0.0496, "step": 8881 }, { "epoch": 1.49, "grad_norm": 0.6034644246101379, "learning_rate": 1.0642393873945157e-05, "loss": 0.0502, "step": 8882 }, { "epoch": 1.49, "grad_norm": 0.5061523914337158, "learning_rate": 1.0640591872592868e-05, "loss": 0.047, "step": 8883 }, { "epoch": 1.49, "grad_norm": 0.7540358304977417, "learning_rate": 1.0638789850353268e-05, "loss": 0.0716, "step": 8884 }, { "epoch": 1.49, "grad_norm": 0.46886077523231506, "learning_rate": 1.0636987807285116e-05, "loss": 0.0608, "step": 8885 }, { "epoch": 1.49, "grad_norm": 0.4679115116596222, "learning_rate": 1.0635185743447169e-05, "loss": 0.0446, "step": 8886 }, { "epoch": 1.49, "grad_norm": 0.4733160734176636, "learning_rate": 1.0633383658898184e-05, "loss": 0.0628, "step": 8887 }, { "epoch": 1.49, "grad_norm": 0.6550703048706055, "learning_rate": 1.0631581553696921e-05, "loss": 0.0753, "step": 8888 }, { "epoch": 1.49, "grad_norm": 0.46179911494255066, "learning_rate": 1.0629779427902143e-05, "loss": 0.055, "step": 8889 }, { "epoch": 1.49, "grad_norm": 0.5033962726593018, "learning_rate": 1.062797728157261e-05, "loss": 0.0476, "step": 8890 }, { "epoch": 1.49, "grad_norm": 0.47380927205085754, "learning_rate": 1.0626175114767079e-05, "loss": 0.0558, "step": 8891 }, { "epoch": 1.49, "grad_norm": 0.758438229560852, "learning_rate": 1.0624372927544313e-05, "loss": 0.062, "step": 8892 }, { "epoch": 1.49, "grad_norm": 0.594257116317749, "learning_rate": 1.0622570719963079e-05, "loss": 0.0677, "step": 8893 }, { "epoch": 1.49, "grad_norm": 0.5023083686828613, "learning_rate": 1.0620768492082136e-05, "loss": 0.0512, "step": 8894 }, { "epoch": 1.49, "grad_norm": 1.9036332368850708, "learning_rate": 1.0618966243960249e-05, "loss": 0.0413, "step": 8895 }, { "epoch": 1.49, "grad_norm": 0.5259807705879211, "learning_rate": 1.0617163975656182e-05, "loss": 0.0672, "step": 8896 }, { "epoch": 1.49, "grad_norm": 0.5377373695373535, "learning_rate": 1.0615361687228704e-05, "loss": 0.0551, "step": 8897 }, { "epoch": 1.49, "grad_norm": 0.5930311679840088, "learning_rate": 1.0613559378736578e-05, "loss": 0.0473, "step": 8898 }, { "epoch": 1.49, "grad_norm": 0.408983439207077, "learning_rate": 1.0611757050238569e-05, "loss": 0.0591, "step": 8899 }, { "epoch": 1.49, "grad_norm": 0.7699291110038757, "learning_rate": 1.0609954701793445e-05, "loss": 0.0644, "step": 8900 }, { "epoch": 1.49, "grad_norm": 0.43527305126190186, "learning_rate": 1.0608152333459976e-05, "loss": 0.0433, "step": 8901 }, { "epoch": 1.49, "grad_norm": 0.5622298121452332, "learning_rate": 1.060634994529693e-05, "loss": 0.0687, "step": 8902 }, { "epoch": 1.49, "grad_norm": 0.5407382845878601, "learning_rate": 1.0604547537363072e-05, "loss": 0.0333, "step": 8903 }, { "epoch": 1.49, "grad_norm": 0.8155989050865173, "learning_rate": 1.0602745109717178e-05, "loss": 0.0846, "step": 8904 }, { "epoch": 1.49, "grad_norm": 0.5785768032073975, "learning_rate": 1.0600942662418014e-05, "loss": 0.0537, "step": 8905 }, { "epoch": 1.49, "grad_norm": 0.5013877153396606, "learning_rate": 1.0599140195524353e-05, "loss": 0.0667, "step": 8906 }, { "epoch": 1.49, "grad_norm": 0.44645851850509644, "learning_rate": 1.0597337709094967e-05, "loss": 0.0544, "step": 8907 }, { "epoch": 1.49, "grad_norm": 0.8240917325019836, "learning_rate": 1.0595535203188628e-05, "loss": 0.0588, "step": 8908 }, { "epoch": 1.49, "grad_norm": 0.5613591074943542, "learning_rate": 1.0593732677864109e-05, "loss": 0.0572, "step": 8909 }, { "epoch": 1.49, "grad_norm": 1.2145144939422607, "learning_rate": 1.0591930133180181e-05, "loss": 0.0532, "step": 8910 }, { "epoch": 1.49, "grad_norm": 0.5079084634780884, "learning_rate": 1.0590127569195623e-05, "loss": 0.0537, "step": 8911 }, { "epoch": 1.49, "grad_norm": 0.8013147115707397, "learning_rate": 1.0588324985969206e-05, "loss": 0.07, "step": 8912 }, { "epoch": 1.49, "grad_norm": 0.4231787919998169, "learning_rate": 1.0586522383559708e-05, "loss": 0.0431, "step": 8913 }, { "epoch": 1.49, "grad_norm": 0.38525745272636414, "learning_rate": 1.0584719762025904e-05, "loss": 0.0544, "step": 8914 }, { "epoch": 1.49, "grad_norm": 0.6201332211494446, "learning_rate": 1.058291712142657e-05, "loss": 0.0596, "step": 8915 }, { "epoch": 1.49, "grad_norm": 0.7634186744689941, "learning_rate": 1.0581114461820484e-05, "loss": 0.0578, "step": 8916 }, { "epoch": 1.49, "grad_norm": 0.48387598991394043, "learning_rate": 1.0579311783266424e-05, "loss": 0.0729, "step": 8917 }, { "epoch": 1.49, "grad_norm": 0.46804895997047424, "learning_rate": 1.0577509085823172e-05, "loss": 0.0738, "step": 8918 }, { "epoch": 1.49, "grad_norm": 0.5609332919120789, "learning_rate": 1.0575706369549502e-05, "loss": 0.0536, "step": 8919 }, { "epoch": 1.49, "grad_norm": 0.7148993015289307, "learning_rate": 1.0573903634504192e-05, "loss": 0.0552, "step": 8920 }, { "epoch": 1.49, "grad_norm": 0.3489130139350891, "learning_rate": 1.0572100880746033e-05, "loss": 0.0469, "step": 8921 }, { "epoch": 1.49, "grad_norm": 0.37362316250801086, "learning_rate": 1.0570298108333797e-05, "loss": 0.0499, "step": 8922 }, { "epoch": 1.49, "grad_norm": 0.41047555208206177, "learning_rate": 1.0568495317326266e-05, "loss": 0.0453, "step": 8923 }, { "epoch": 1.49, "grad_norm": 0.753538966178894, "learning_rate": 1.0566692507782223e-05, "loss": 0.0731, "step": 8924 }, { "epoch": 1.49, "grad_norm": 0.5018448829650879, "learning_rate": 1.056488967976046e-05, "loss": 0.0504, "step": 8925 }, { "epoch": 1.49, "grad_norm": 0.5109763741493225, "learning_rate": 1.0563086833319747e-05, "loss": 0.0545, "step": 8926 }, { "epoch": 1.49, "grad_norm": 0.4193556308746338, "learning_rate": 1.0561283968518873e-05, "loss": 0.0376, "step": 8927 }, { "epoch": 1.49, "grad_norm": 0.6117927432060242, "learning_rate": 1.0559481085416628e-05, "loss": 0.0859, "step": 8928 }, { "epoch": 1.49, "grad_norm": 0.6774471402168274, "learning_rate": 1.055767818407179e-05, "loss": 0.0532, "step": 8929 }, { "epoch": 1.49, "grad_norm": 0.43415480852127075, "learning_rate": 1.0555875264543148e-05, "loss": 0.0544, "step": 8930 }, { "epoch": 1.49, "grad_norm": 0.5397109389305115, "learning_rate": 1.0554072326889489e-05, "loss": 0.0702, "step": 8931 }, { "epoch": 1.49, "grad_norm": 0.5359331369400024, "learning_rate": 1.05522693711696e-05, "loss": 0.0413, "step": 8932 }, { "epoch": 1.49, "grad_norm": 0.6089327335357666, "learning_rate": 1.0550466397442266e-05, "loss": 0.0736, "step": 8933 }, { "epoch": 1.49, "grad_norm": 0.711589515209198, "learning_rate": 1.0548663405766278e-05, "loss": 0.0704, "step": 8934 }, { "epoch": 1.49, "grad_norm": 0.516838788986206, "learning_rate": 1.0546860396200425e-05, "loss": 0.062, "step": 8935 }, { "epoch": 1.49, "grad_norm": 0.5940632820129395, "learning_rate": 1.0545057368803492e-05, "loss": 0.0385, "step": 8936 }, { "epoch": 1.49, "grad_norm": 0.5461083650588989, "learning_rate": 1.0543254323634278e-05, "loss": 0.0414, "step": 8937 }, { "epoch": 1.49, "grad_norm": 0.4320499300956726, "learning_rate": 1.0541451260751563e-05, "loss": 0.0494, "step": 8938 }, { "epoch": 1.5, "grad_norm": 0.5665191411972046, "learning_rate": 1.0539648180214148e-05, "loss": 0.0755, "step": 8939 }, { "epoch": 1.5, "grad_norm": 0.432661235332489, "learning_rate": 1.0537845082080815e-05, "loss": 0.046, "step": 8940 }, { "epoch": 1.5, "grad_norm": 0.4695607125759125, "learning_rate": 1.0536041966410363e-05, "loss": 0.0589, "step": 8941 }, { "epoch": 1.5, "grad_norm": 0.5266635417938232, "learning_rate": 1.0534238833261588e-05, "loss": 0.06, "step": 8942 }, { "epoch": 1.5, "grad_norm": 0.7332221269607544, "learning_rate": 1.0532435682693273e-05, "loss": 0.0676, "step": 8943 }, { "epoch": 1.5, "grad_norm": 0.472836434841156, "learning_rate": 1.0530632514764216e-05, "loss": 0.0584, "step": 8944 }, { "epoch": 1.5, "grad_norm": 0.47795894742012024, "learning_rate": 1.0528829329533217e-05, "loss": 0.0463, "step": 8945 }, { "epoch": 1.5, "grad_norm": 0.524174153804779, "learning_rate": 1.0527026127059068e-05, "loss": 0.0589, "step": 8946 }, { "epoch": 1.5, "grad_norm": 0.45325198769569397, "learning_rate": 1.052522290740056e-05, "loss": 0.0451, "step": 8947 }, { "epoch": 1.5, "grad_norm": 0.49213120341300964, "learning_rate": 1.0523419670616495e-05, "loss": 0.0395, "step": 8948 }, { "epoch": 1.5, "grad_norm": 0.6898466944694519, "learning_rate": 1.052161641676567e-05, "loss": 0.0639, "step": 8949 }, { "epoch": 1.5, "grad_norm": 0.5656096339225769, "learning_rate": 1.051981314590688e-05, "loss": 0.0656, "step": 8950 }, { "epoch": 1.5, "grad_norm": 0.36773380637168884, "learning_rate": 1.051800985809892e-05, "loss": 0.0534, "step": 8951 }, { "epoch": 1.5, "grad_norm": 0.5301379561424255, "learning_rate": 1.0516206553400591e-05, "loss": 0.0573, "step": 8952 }, { "epoch": 1.5, "grad_norm": 0.42207881808280945, "learning_rate": 1.05144032318707e-05, "loss": 0.0488, "step": 8953 }, { "epoch": 1.5, "grad_norm": 0.5919144153594971, "learning_rate": 1.0512599893568033e-05, "loss": 0.0594, "step": 8954 }, { "epoch": 1.5, "grad_norm": 0.6653299331665039, "learning_rate": 1.05107965385514e-05, "loss": 0.064, "step": 8955 }, { "epoch": 1.5, "grad_norm": 0.663520872592926, "learning_rate": 1.0508993166879597e-05, "loss": 0.0589, "step": 8956 }, { "epoch": 1.5, "grad_norm": 0.45770159363746643, "learning_rate": 1.0507189778611426e-05, "loss": 0.0592, "step": 8957 }, { "epoch": 1.5, "grad_norm": 0.440000981092453, "learning_rate": 1.050538637380569e-05, "loss": 0.0553, "step": 8958 }, { "epoch": 1.5, "grad_norm": 0.7102193236351013, "learning_rate": 1.0503582952521192e-05, "loss": 0.0701, "step": 8959 }, { "epoch": 1.5, "grad_norm": 0.34901461005210876, "learning_rate": 1.0501779514816733e-05, "loss": 0.0524, "step": 8960 }, { "epoch": 1.5, "grad_norm": 0.5469920635223389, "learning_rate": 1.0499976060751114e-05, "loss": 0.0574, "step": 8961 }, { "epoch": 1.5, "grad_norm": 0.38045910000801086, "learning_rate": 1.0498172590383142e-05, "loss": 0.0453, "step": 8962 }, { "epoch": 1.5, "grad_norm": 0.502859890460968, "learning_rate": 1.0496369103771624e-05, "loss": 0.0576, "step": 8963 }, { "epoch": 1.5, "grad_norm": 0.49541473388671875, "learning_rate": 1.049456560097536e-05, "loss": 0.0692, "step": 8964 }, { "epoch": 1.5, "grad_norm": 0.4253721237182617, "learning_rate": 1.0492762082053159e-05, "loss": 0.0508, "step": 8965 }, { "epoch": 1.5, "grad_norm": 0.4818035066127777, "learning_rate": 1.0490958547063824e-05, "loss": 0.0528, "step": 8966 }, { "epoch": 1.5, "grad_norm": 0.4730151295661926, "learning_rate": 1.0489154996066164e-05, "loss": 0.054, "step": 8967 }, { "epoch": 1.5, "grad_norm": 0.3439958989620209, "learning_rate": 1.0487351429118983e-05, "loss": 0.0436, "step": 8968 }, { "epoch": 1.5, "grad_norm": 0.3812822103500366, "learning_rate": 1.0485547846281094e-05, "loss": 0.0605, "step": 8969 }, { "epoch": 1.5, "grad_norm": 0.4541195034980774, "learning_rate": 1.0483744247611303e-05, "loss": 0.0491, "step": 8970 }, { "epoch": 1.5, "grad_norm": 0.6098740100860596, "learning_rate": 1.0481940633168415e-05, "loss": 0.0638, "step": 8971 }, { "epoch": 1.5, "grad_norm": 0.865271270275116, "learning_rate": 1.0480137003011239e-05, "loss": 0.0389, "step": 8972 }, { "epoch": 1.5, "grad_norm": 0.4725019931793213, "learning_rate": 1.047833335719859e-05, "loss": 0.041, "step": 8973 }, { "epoch": 1.5, "grad_norm": 0.4505653977394104, "learning_rate": 1.0476529695789277e-05, "loss": 0.0493, "step": 8974 }, { "epoch": 1.5, "grad_norm": 0.449160099029541, "learning_rate": 1.0474726018842107e-05, "loss": 0.0648, "step": 8975 }, { "epoch": 1.5, "grad_norm": 0.423696905374527, "learning_rate": 1.0472922326415892e-05, "loss": 0.0745, "step": 8976 }, { "epoch": 1.5, "grad_norm": 0.41113948822021484, "learning_rate": 1.0471118618569448e-05, "loss": 0.0365, "step": 8977 }, { "epoch": 1.5, "grad_norm": 0.5600639581680298, "learning_rate": 1.0469314895361584e-05, "loss": 0.0498, "step": 8978 }, { "epoch": 1.5, "grad_norm": 0.47518834471702576, "learning_rate": 1.0467511156851113e-05, "loss": 0.0717, "step": 8979 }, { "epoch": 1.5, "grad_norm": 0.6453464031219482, "learning_rate": 1.0465707403096845e-05, "loss": 0.0644, "step": 8980 }, { "epoch": 1.5, "grad_norm": 0.6506378054618835, "learning_rate": 1.0463903634157599e-05, "loss": 0.0914, "step": 8981 }, { "epoch": 1.5, "grad_norm": 0.39667195081710815, "learning_rate": 1.046209985009219e-05, "loss": 0.0611, "step": 8982 }, { "epoch": 1.5, "grad_norm": 0.3548412322998047, "learning_rate": 1.0460296050959424e-05, "loss": 0.0473, "step": 8983 }, { "epoch": 1.5, "grad_norm": 0.5523231029510498, "learning_rate": 1.0458492236818121e-05, "loss": 0.0548, "step": 8984 }, { "epoch": 1.5, "grad_norm": 0.6991123557090759, "learning_rate": 1.0456688407727102e-05, "loss": 0.0869, "step": 8985 }, { "epoch": 1.5, "grad_norm": 0.38285911083221436, "learning_rate": 1.0454884563745179e-05, "loss": 0.0537, "step": 8986 }, { "epoch": 1.5, "grad_norm": 0.4153299331665039, "learning_rate": 1.0453080704931165e-05, "loss": 0.0594, "step": 8987 }, { "epoch": 1.5, "grad_norm": 0.6707654595375061, "learning_rate": 1.045127683134388e-05, "loss": 0.0639, "step": 8988 }, { "epoch": 1.5, "grad_norm": 0.5027385354042053, "learning_rate": 1.0449472943042142e-05, "loss": 0.0422, "step": 8989 }, { "epoch": 1.5, "grad_norm": 0.5114653706550598, "learning_rate": 1.0447669040084769e-05, "loss": 0.0585, "step": 8990 }, { "epoch": 1.5, "grad_norm": 0.5001872181892395, "learning_rate": 1.044586512253058e-05, "loss": 0.0535, "step": 8991 }, { "epoch": 1.5, "grad_norm": 0.5151638984680176, "learning_rate": 1.0444061190438393e-05, "loss": 0.0476, "step": 8992 }, { "epoch": 1.5, "grad_norm": 0.5822786092758179, "learning_rate": 1.0442257243867029e-05, "loss": 0.0609, "step": 8993 }, { "epoch": 1.5, "grad_norm": 0.47639670968055725, "learning_rate": 1.0440453282875305e-05, "loss": 0.0507, "step": 8994 }, { "epoch": 1.5, "grad_norm": 0.45799121260643005, "learning_rate": 1.0438649307522046e-05, "loss": 0.0511, "step": 8995 }, { "epoch": 1.5, "grad_norm": 0.39804303646087646, "learning_rate": 1.0436845317866067e-05, "loss": 0.0578, "step": 8996 }, { "epoch": 1.5, "grad_norm": 0.44174230098724365, "learning_rate": 1.0435041313966194e-05, "loss": 0.0464, "step": 8997 }, { "epoch": 1.5, "grad_norm": 0.5490466952323914, "learning_rate": 1.043323729588125e-05, "loss": 0.0638, "step": 8998 }, { "epoch": 1.51, "grad_norm": 0.5111703872680664, "learning_rate": 1.0431433263670052e-05, "loss": 0.0613, "step": 8999 }, { "epoch": 1.51, "grad_norm": 0.4551801085472107, "learning_rate": 1.0429629217391425e-05, "loss": 0.0512, "step": 9000 }, { "epoch": 1.51, "grad_norm": 0.4993394911289215, "learning_rate": 1.0427825157104192e-05, "loss": 0.0429, "step": 9001 }, { "epoch": 1.51, "grad_norm": 0.5328559279441833, "learning_rate": 1.0426021082867182e-05, "loss": 0.0674, "step": 9002 }, { "epoch": 1.51, "grad_norm": 0.5019882321357727, "learning_rate": 1.0424216994739211e-05, "loss": 0.0712, "step": 9003 }, { "epoch": 1.51, "grad_norm": 0.46271809935569763, "learning_rate": 1.0422412892779103e-05, "loss": 0.0462, "step": 9004 }, { "epoch": 1.51, "grad_norm": 0.5012098550796509, "learning_rate": 1.0420608777045693e-05, "loss": 0.0495, "step": 9005 }, { "epoch": 1.51, "grad_norm": 0.567834198474884, "learning_rate": 1.04188046475978e-05, "loss": 0.0629, "step": 9006 }, { "epoch": 1.51, "grad_norm": 0.4914572834968567, "learning_rate": 1.041700050449425e-05, "loss": 0.053, "step": 9007 }, { "epoch": 1.51, "grad_norm": 0.45177778601646423, "learning_rate": 1.0415196347793868e-05, "loss": 0.0605, "step": 9008 }, { "epoch": 1.51, "grad_norm": 0.6020941138267517, "learning_rate": 1.0413392177555486e-05, "loss": 0.0535, "step": 9009 }, { "epoch": 1.51, "grad_norm": 0.3749665319919586, "learning_rate": 1.0411587993837926e-05, "loss": 0.0466, "step": 9010 }, { "epoch": 1.51, "grad_norm": 0.49928414821624756, "learning_rate": 1.0409783796700017e-05, "loss": 0.045, "step": 9011 }, { "epoch": 1.51, "grad_norm": 0.6055489182472229, "learning_rate": 1.0407979586200588e-05, "loss": 0.0533, "step": 9012 }, { "epoch": 1.51, "grad_norm": 0.3314122259616852, "learning_rate": 1.040617536239847e-05, "loss": 0.0461, "step": 9013 }, { "epoch": 1.51, "grad_norm": 0.5597983002662659, "learning_rate": 1.0404371125352486e-05, "loss": 0.0568, "step": 9014 }, { "epoch": 1.51, "grad_norm": 0.3488178849220276, "learning_rate": 1.0402566875121469e-05, "loss": 0.0466, "step": 9015 }, { "epoch": 1.51, "grad_norm": 0.5628492832183838, "learning_rate": 1.040076261176425e-05, "loss": 0.06, "step": 9016 }, { "epoch": 1.51, "grad_norm": 0.4437450170516968, "learning_rate": 1.0398958335339658e-05, "loss": 0.0381, "step": 9017 }, { "epoch": 1.51, "grad_norm": 0.57485032081604, "learning_rate": 1.0397154045906523e-05, "loss": 0.0637, "step": 9018 }, { "epoch": 1.51, "grad_norm": 0.39379605650901794, "learning_rate": 1.0395349743523677e-05, "loss": 0.0312, "step": 9019 }, { "epoch": 1.51, "grad_norm": 0.336690217256546, "learning_rate": 1.039354542824995e-05, "loss": 0.0426, "step": 9020 }, { "epoch": 1.51, "grad_norm": 0.3900896906852722, "learning_rate": 1.0391741100144176e-05, "loss": 0.0525, "step": 9021 }, { "epoch": 1.51, "grad_norm": 0.5064721703529358, "learning_rate": 1.038993675926519e-05, "loss": 0.0641, "step": 9022 }, { "epoch": 1.51, "grad_norm": 0.4009602963924408, "learning_rate": 1.0388132405671821e-05, "loss": 0.0568, "step": 9023 }, { "epoch": 1.51, "grad_norm": 0.3380042314529419, "learning_rate": 1.03863280394229e-05, "loss": 0.0394, "step": 9024 }, { "epoch": 1.51, "grad_norm": 0.5085774064064026, "learning_rate": 1.0384523660577263e-05, "loss": 0.051, "step": 9025 }, { "epoch": 1.51, "grad_norm": 0.6054047346115112, "learning_rate": 1.0382719269193747e-05, "loss": 0.0565, "step": 9026 }, { "epoch": 1.51, "grad_norm": 0.7253029942512512, "learning_rate": 1.0380914865331184e-05, "loss": 0.0652, "step": 9027 }, { "epoch": 1.51, "grad_norm": 0.4645111858844757, "learning_rate": 1.0379110449048406e-05, "loss": 0.0542, "step": 9028 }, { "epoch": 1.51, "grad_norm": 0.3609713613986969, "learning_rate": 1.0377306020404254e-05, "loss": 0.049, "step": 9029 }, { "epoch": 1.51, "grad_norm": 0.547885000705719, "learning_rate": 1.037550157945756e-05, "loss": 0.045, "step": 9030 }, { "epoch": 1.51, "grad_norm": 0.42176851630210876, "learning_rate": 1.0373697126267161e-05, "loss": 0.0399, "step": 9031 }, { "epoch": 1.51, "grad_norm": 0.6856069564819336, "learning_rate": 1.0371892660891892e-05, "loss": 0.0669, "step": 9032 }, { "epoch": 1.51, "grad_norm": 0.44178301095962524, "learning_rate": 1.0370088183390592e-05, "loss": 0.0495, "step": 9033 }, { "epoch": 1.51, "grad_norm": 0.4726022481918335, "learning_rate": 1.0368283693822101e-05, "loss": 0.0402, "step": 9034 }, { "epoch": 1.51, "grad_norm": 0.36154624819755554, "learning_rate": 1.0366479192245251e-05, "loss": 0.0423, "step": 9035 }, { "epoch": 1.51, "grad_norm": 0.6320942044258118, "learning_rate": 1.036467467871888e-05, "loss": 0.0635, "step": 9036 }, { "epoch": 1.51, "grad_norm": 0.49979448318481445, "learning_rate": 1.0362870153301832e-05, "loss": 0.0496, "step": 9037 }, { "epoch": 1.51, "grad_norm": 0.37121346592903137, "learning_rate": 1.0361065616052942e-05, "loss": 0.0518, "step": 9038 }, { "epoch": 1.51, "grad_norm": 0.4725041389465332, "learning_rate": 1.0359261067031048e-05, "loss": 0.0367, "step": 9039 }, { "epoch": 1.51, "grad_norm": 0.47896769642829895, "learning_rate": 1.035745650629499e-05, "loss": 0.0743, "step": 9040 }, { "epoch": 1.51, "grad_norm": 0.386930376291275, "learning_rate": 1.0355651933903614e-05, "loss": 0.0435, "step": 9041 }, { "epoch": 1.51, "grad_norm": 0.3721997141838074, "learning_rate": 1.0353847349915754e-05, "loss": 0.0381, "step": 9042 }, { "epoch": 1.51, "grad_norm": 0.4571433663368225, "learning_rate": 1.0352042754390252e-05, "loss": 0.0597, "step": 9043 }, { "epoch": 1.51, "grad_norm": 0.4579467475414276, "learning_rate": 1.0350238147385949e-05, "loss": 0.0571, "step": 9044 }, { "epoch": 1.51, "grad_norm": 0.562371015548706, "learning_rate": 1.034843352896169e-05, "loss": 0.0641, "step": 9045 }, { "epoch": 1.51, "grad_norm": 0.5333788394927979, "learning_rate": 1.0346628899176313e-05, "loss": 0.0556, "step": 9046 }, { "epoch": 1.51, "grad_norm": 0.482103168964386, "learning_rate": 1.0344824258088659e-05, "loss": 0.0739, "step": 9047 }, { "epoch": 1.51, "grad_norm": 0.4065210819244385, "learning_rate": 1.0343019605757573e-05, "loss": 0.0439, "step": 9048 }, { "epoch": 1.51, "grad_norm": 0.43741658329963684, "learning_rate": 1.03412149422419e-05, "loss": 0.0602, "step": 9049 }, { "epoch": 1.51, "grad_norm": 0.4412800669670105, "learning_rate": 1.033941026760048e-05, "loss": 0.0574, "step": 9050 }, { "epoch": 1.51, "grad_norm": 0.7652655839920044, "learning_rate": 1.0337605581892161e-05, "loss": 0.0515, "step": 9051 }, { "epoch": 1.51, "grad_norm": 0.7632470726966858, "learning_rate": 1.033580088517578e-05, "loss": 0.0704, "step": 9052 }, { "epoch": 1.51, "grad_norm": 0.4768002927303314, "learning_rate": 1.0333996177510186e-05, "loss": 0.0545, "step": 9053 }, { "epoch": 1.51, "grad_norm": 0.4435991048812866, "learning_rate": 1.0332191458954224e-05, "loss": 0.0581, "step": 9054 }, { "epoch": 1.51, "grad_norm": 0.3113102614879608, "learning_rate": 1.0330386729566739e-05, "loss": 0.0474, "step": 9055 }, { "epoch": 1.51, "grad_norm": 0.44444000720977783, "learning_rate": 1.0328581989406574e-05, "loss": 0.0719, "step": 9056 }, { "epoch": 1.51, "grad_norm": 0.4477778673171997, "learning_rate": 1.0326777238532577e-05, "loss": 0.0483, "step": 9057 }, { "epoch": 1.51, "grad_norm": 0.30165186524391174, "learning_rate": 1.0324972477003596e-05, "loss": 0.047, "step": 9058 }, { "epoch": 1.52, "grad_norm": 0.5645326375961304, "learning_rate": 1.0323167704878473e-05, "loss": 0.0579, "step": 9059 }, { "epoch": 1.52, "grad_norm": 0.44035813212394714, "learning_rate": 1.0321362922216058e-05, "loss": 0.0547, "step": 9060 }, { "epoch": 1.52, "grad_norm": 0.5068355798721313, "learning_rate": 1.0319558129075197e-05, "loss": 0.0437, "step": 9061 }, { "epoch": 1.52, "grad_norm": 0.6015343070030212, "learning_rate": 1.031775332551474e-05, "loss": 0.0805, "step": 9062 }, { "epoch": 1.52, "grad_norm": 0.5928253531455994, "learning_rate": 1.0315948511593531e-05, "loss": 0.0375, "step": 9063 }, { "epoch": 1.52, "grad_norm": 0.4710269272327423, "learning_rate": 1.031414368737042e-05, "loss": 0.0491, "step": 9064 }, { "epoch": 1.52, "grad_norm": 0.33490845561027527, "learning_rate": 1.031233885290426e-05, "loss": 0.0414, "step": 9065 }, { "epoch": 1.52, "grad_norm": 0.4757707715034485, "learning_rate": 1.0310534008253893e-05, "loss": 0.0678, "step": 9066 }, { "epoch": 1.52, "grad_norm": 0.4563058018684387, "learning_rate": 1.030872915347817e-05, "loss": 0.0523, "step": 9067 }, { "epoch": 1.52, "grad_norm": 0.4786447286605835, "learning_rate": 1.030692428863594e-05, "loss": 0.0504, "step": 9068 }, { "epoch": 1.52, "grad_norm": 0.5658118724822998, "learning_rate": 1.0305119413786058e-05, "loss": 0.0605, "step": 9069 }, { "epoch": 1.52, "grad_norm": 0.7548795938491821, "learning_rate": 1.030331452898737e-05, "loss": 0.0624, "step": 9070 }, { "epoch": 1.52, "grad_norm": 0.709697425365448, "learning_rate": 1.0301509634298726e-05, "loss": 0.0662, "step": 9071 }, { "epoch": 1.52, "grad_norm": 0.5365030765533447, "learning_rate": 1.0299704729778976e-05, "loss": 0.0422, "step": 9072 }, { "epoch": 1.52, "grad_norm": 0.6034022569656372, "learning_rate": 1.0297899815486976e-05, "loss": 0.0432, "step": 9073 }, { "epoch": 1.52, "grad_norm": 0.46596819162368774, "learning_rate": 1.0296094891481577e-05, "loss": 0.0466, "step": 9074 }, { "epoch": 1.52, "grad_norm": 0.49705106019973755, "learning_rate": 1.0294289957821623e-05, "loss": 0.0518, "step": 9075 }, { "epoch": 1.52, "grad_norm": 0.4448603689670563, "learning_rate": 1.0292485014565976e-05, "loss": 0.0532, "step": 9076 }, { "epoch": 1.52, "grad_norm": 0.554812490940094, "learning_rate": 1.0290680061773484e-05, "loss": 0.0408, "step": 9077 }, { "epoch": 1.52, "grad_norm": 0.4216548204421997, "learning_rate": 1.0288875099502999e-05, "loss": 0.0438, "step": 9078 }, { "epoch": 1.52, "grad_norm": 0.49704694747924805, "learning_rate": 1.0287070127813379e-05, "loss": 0.0469, "step": 9079 }, { "epoch": 1.52, "grad_norm": 0.4579862654209137, "learning_rate": 1.0285265146763468e-05, "loss": 0.0589, "step": 9080 }, { "epoch": 1.52, "grad_norm": 0.31926289200782776, "learning_rate": 1.0283460156412126e-05, "loss": 0.0526, "step": 9081 }, { "epoch": 1.52, "grad_norm": 0.7163176536560059, "learning_rate": 1.0281655156818208e-05, "loss": 0.0446, "step": 9082 }, { "epoch": 1.52, "grad_norm": 0.7506973743438721, "learning_rate": 1.0279850148040569e-05, "loss": 0.0764, "step": 9083 }, { "epoch": 1.52, "grad_norm": 0.45441386103630066, "learning_rate": 1.0278045130138054e-05, "loss": 0.0515, "step": 9084 }, { "epoch": 1.52, "grad_norm": 0.6408712863922119, "learning_rate": 1.027624010316953e-05, "loss": 0.0547, "step": 9085 }, { "epoch": 1.52, "grad_norm": 0.6045015454292297, "learning_rate": 1.0274435067193848e-05, "loss": 0.0701, "step": 9086 }, { "epoch": 1.52, "grad_norm": 0.3984444737434387, "learning_rate": 1.0272630022269863e-05, "loss": 0.0513, "step": 9087 }, { "epoch": 1.52, "grad_norm": 0.635952889919281, "learning_rate": 1.0270824968456425e-05, "loss": 0.0562, "step": 9088 }, { "epoch": 1.52, "grad_norm": 0.571636974811554, "learning_rate": 1.0269019905812399e-05, "loss": 0.0573, "step": 9089 }, { "epoch": 1.52, "grad_norm": 0.5684022903442383, "learning_rate": 1.0267214834396641e-05, "loss": 0.056, "step": 9090 }, { "epoch": 1.52, "grad_norm": 0.4847278594970703, "learning_rate": 1.0265409754268e-05, "loss": 0.0536, "step": 9091 }, { "epoch": 1.52, "grad_norm": 0.4218672215938568, "learning_rate": 1.0263604665485339e-05, "loss": 0.0509, "step": 9092 }, { "epoch": 1.52, "grad_norm": 0.5850771069526672, "learning_rate": 1.0261799568107516e-05, "loss": 0.0492, "step": 9093 }, { "epoch": 1.52, "grad_norm": 0.9670163989067078, "learning_rate": 1.0259994462193384e-05, "loss": 0.0423, "step": 9094 }, { "epoch": 1.52, "grad_norm": 0.5422879457473755, "learning_rate": 1.0258189347801802e-05, "loss": 0.05, "step": 9095 }, { "epoch": 1.52, "grad_norm": 0.37462079524993896, "learning_rate": 1.0256384224991631e-05, "loss": 0.0495, "step": 9096 }, { "epoch": 1.52, "grad_norm": 0.5371469855308533, "learning_rate": 1.0254579093821726e-05, "loss": 0.0469, "step": 9097 }, { "epoch": 1.52, "grad_norm": 0.3329100012779236, "learning_rate": 1.0252773954350948e-05, "loss": 0.0434, "step": 9098 }, { "epoch": 1.52, "grad_norm": 0.9156681895256042, "learning_rate": 1.0250968806638152e-05, "loss": 0.0452, "step": 9099 }, { "epoch": 1.52, "grad_norm": 0.6013639569282532, "learning_rate": 1.0249163650742202e-05, "loss": 0.0552, "step": 9100 }, { "epoch": 1.52, "grad_norm": 0.39102619886398315, "learning_rate": 1.0247358486721955e-05, "loss": 0.0455, "step": 9101 }, { "epoch": 1.52, "grad_norm": 0.8247934579849243, "learning_rate": 1.0245553314636272e-05, "loss": 0.0768, "step": 9102 }, { "epoch": 1.52, "grad_norm": 0.3732903003692627, "learning_rate": 1.0243748134544009e-05, "loss": 0.0424, "step": 9103 }, { "epoch": 1.52, "grad_norm": 0.5342268943786621, "learning_rate": 1.024194294650403e-05, "loss": 0.0511, "step": 9104 }, { "epoch": 1.52, "grad_norm": 0.5752056241035461, "learning_rate": 1.0240137750575197e-05, "loss": 0.0604, "step": 9105 }, { "epoch": 1.52, "grad_norm": 0.46293458342552185, "learning_rate": 1.0238332546816367e-05, "loss": 0.0468, "step": 9106 }, { "epoch": 1.52, "grad_norm": 0.3420451283454895, "learning_rate": 1.0236527335286403e-05, "loss": 0.054, "step": 9107 }, { "epoch": 1.52, "grad_norm": 0.4708235561847687, "learning_rate": 1.0234722116044162e-05, "loss": 0.063, "step": 9108 }, { "epoch": 1.52, "grad_norm": 0.6930934190750122, "learning_rate": 1.0232916889148512e-05, "loss": 0.0448, "step": 9109 }, { "epoch": 1.52, "grad_norm": 0.4255485534667969, "learning_rate": 1.023111165465831e-05, "loss": 0.065, "step": 9110 }, { "epoch": 1.52, "grad_norm": 0.529840350151062, "learning_rate": 1.022930641263242e-05, "loss": 0.0586, "step": 9111 }, { "epoch": 1.52, "grad_norm": 0.6690221428871155, "learning_rate": 1.0227501163129702e-05, "loss": 0.059, "step": 9112 }, { "epoch": 1.52, "grad_norm": 0.47944819927215576, "learning_rate": 1.0225695906209023e-05, "loss": 0.0616, "step": 9113 }, { "epoch": 1.52, "grad_norm": 0.604350209236145, "learning_rate": 1.0223890641929243e-05, "loss": 0.0615, "step": 9114 }, { "epoch": 1.52, "grad_norm": 0.5785848498344421, "learning_rate": 1.0222085370349224e-05, "loss": 0.063, "step": 9115 }, { "epoch": 1.52, "grad_norm": 0.5033850073814392, "learning_rate": 1.0220280091527827e-05, "loss": 0.0437, "step": 9116 }, { "epoch": 1.52, "grad_norm": 0.4969930648803711, "learning_rate": 1.0218474805523921e-05, "loss": 0.051, "step": 9117 }, { "epoch": 1.53, "grad_norm": 0.40126901865005493, "learning_rate": 1.021666951239637e-05, "loss": 0.0484, "step": 9118 }, { "epoch": 1.53, "grad_norm": 0.3497813045978546, "learning_rate": 1.021486421220403e-05, "loss": 0.0444, "step": 9119 }, { "epoch": 1.53, "grad_norm": 0.3543228209018707, "learning_rate": 1.021305890500577e-05, "loss": 0.0466, "step": 9120 }, { "epoch": 1.53, "grad_norm": 0.36421772837638855, "learning_rate": 1.0211253590860456e-05, "loss": 0.0558, "step": 9121 }, { "epoch": 1.53, "grad_norm": 0.545254647731781, "learning_rate": 1.0209448269826952e-05, "loss": 0.0472, "step": 9122 }, { "epoch": 1.53, "grad_norm": 0.539897620677948, "learning_rate": 1.020764294196412e-05, "loss": 0.0556, "step": 9123 }, { "epoch": 1.53, "grad_norm": 0.6103752851486206, "learning_rate": 1.0205837607330827e-05, "loss": 0.0542, "step": 9124 }, { "epoch": 1.53, "grad_norm": 0.895085871219635, "learning_rate": 1.0204032265985937e-05, "loss": 0.0446, "step": 9125 }, { "epoch": 1.53, "grad_norm": 0.5867260694503784, "learning_rate": 1.0202226917988316e-05, "loss": 0.0526, "step": 9126 }, { "epoch": 1.53, "grad_norm": 0.41223713755607605, "learning_rate": 1.0200421563396832e-05, "loss": 0.0424, "step": 9127 }, { "epoch": 1.53, "grad_norm": 0.6233325600624084, "learning_rate": 1.0198616202270348e-05, "loss": 0.0485, "step": 9128 }, { "epoch": 1.53, "grad_norm": 0.4220145344734192, "learning_rate": 1.019681083466773e-05, "loss": 0.0507, "step": 9129 }, { "epoch": 1.53, "grad_norm": 0.6104238629341125, "learning_rate": 1.0195005460647847e-05, "loss": 0.0602, "step": 9130 }, { "epoch": 1.53, "grad_norm": 0.5463777184486389, "learning_rate": 1.0193200080269563e-05, "loss": 0.0425, "step": 9131 }, { "epoch": 1.53, "grad_norm": 0.4885269105434418, "learning_rate": 1.0191394693591744e-05, "loss": 0.0516, "step": 9132 }, { "epoch": 1.53, "grad_norm": 0.4566049873828888, "learning_rate": 1.0189589300673259e-05, "loss": 0.0663, "step": 9133 }, { "epoch": 1.53, "grad_norm": 0.5036664605140686, "learning_rate": 1.0187783901572976e-05, "loss": 0.0469, "step": 9134 }, { "epoch": 1.53, "grad_norm": 0.47547516226768494, "learning_rate": 1.0185978496349762e-05, "loss": 0.0594, "step": 9135 }, { "epoch": 1.53, "grad_norm": 0.4894367456436157, "learning_rate": 1.0184173085062478e-05, "loss": 0.0635, "step": 9136 }, { "epoch": 1.53, "grad_norm": 0.46424925327301025, "learning_rate": 1.0182367667770002e-05, "loss": 0.0664, "step": 9137 }, { "epoch": 1.53, "grad_norm": 0.45102062821388245, "learning_rate": 1.0180562244531193e-05, "loss": 0.0501, "step": 9138 }, { "epoch": 1.53, "grad_norm": 0.7336665987968445, "learning_rate": 1.017875681540493e-05, "loss": 0.069, "step": 9139 }, { "epoch": 1.53, "grad_norm": 0.3028568923473358, "learning_rate": 1.0176951380450065e-05, "loss": 0.0325, "step": 9140 }, { "epoch": 1.53, "grad_norm": 0.4942625164985657, "learning_rate": 1.0175145939725482e-05, "loss": 0.0517, "step": 9141 }, { "epoch": 1.53, "grad_norm": 0.41078460216522217, "learning_rate": 1.0173340493290043e-05, "loss": 0.0414, "step": 9142 }, { "epoch": 1.53, "grad_norm": 0.7193931341171265, "learning_rate": 1.0171535041202617e-05, "loss": 0.073, "step": 9143 }, { "epoch": 1.53, "grad_norm": 0.33032435178756714, "learning_rate": 1.016972958352207e-05, "loss": 0.0497, "step": 9144 }, { "epoch": 1.53, "grad_norm": 0.5463448762893677, "learning_rate": 1.0167924120307278e-05, "loss": 0.0619, "step": 9145 }, { "epoch": 1.53, "grad_norm": 0.38087567687034607, "learning_rate": 1.016611865161711e-05, "loss": 0.0347, "step": 9146 }, { "epoch": 1.53, "grad_norm": 0.4278956651687622, "learning_rate": 1.016431317751043e-05, "loss": 0.0423, "step": 9147 }, { "epoch": 1.53, "grad_norm": 0.62490314245224, "learning_rate": 1.0162507698046109e-05, "loss": 0.0792, "step": 9148 }, { "epoch": 1.53, "grad_norm": 0.5308923125267029, "learning_rate": 1.016070221328302e-05, "loss": 0.0524, "step": 9149 }, { "epoch": 1.53, "grad_norm": 0.48962631821632385, "learning_rate": 1.0158896723280032e-05, "loss": 0.0596, "step": 9150 }, { "epoch": 1.53, "grad_norm": 0.8006799817085266, "learning_rate": 1.0157091228096015e-05, "loss": 0.0923, "step": 9151 }, { "epoch": 1.53, "grad_norm": 0.727472722530365, "learning_rate": 1.0155285727789836e-05, "loss": 0.054, "step": 9152 }, { "epoch": 1.53, "grad_norm": 0.5206012725830078, "learning_rate": 1.0153480222420373e-05, "loss": 0.0607, "step": 9153 }, { "epoch": 1.53, "grad_norm": 0.5669044256210327, "learning_rate": 1.015167471204649e-05, "loss": 0.0885, "step": 9154 }, { "epoch": 1.53, "grad_norm": 0.5943374037742615, "learning_rate": 1.0149869196727062e-05, "loss": 0.07, "step": 9155 }, { "epoch": 1.53, "grad_norm": 0.5127378106117249, "learning_rate": 1.014806367652096e-05, "loss": 0.0613, "step": 9156 }, { "epoch": 1.53, "grad_norm": 0.5700677037239075, "learning_rate": 1.0146258151487053e-05, "loss": 0.0624, "step": 9157 }, { "epoch": 1.53, "grad_norm": 0.37022867798805237, "learning_rate": 1.0144452621684214e-05, "loss": 0.0446, "step": 9158 }, { "epoch": 1.53, "grad_norm": 0.35684219002723694, "learning_rate": 1.0142647087171314e-05, "loss": 0.0402, "step": 9159 }, { "epoch": 1.53, "grad_norm": 0.44040656089782715, "learning_rate": 1.0140841548007223e-05, "loss": 0.0713, "step": 9160 }, { "epoch": 1.53, "grad_norm": 0.39254632592201233, "learning_rate": 1.0139036004250817e-05, "loss": 0.0595, "step": 9161 }, { "epoch": 1.53, "grad_norm": 1.0247673988342285, "learning_rate": 1.0137230455960965e-05, "loss": 0.0774, "step": 9162 }, { "epoch": 1.53, "grad_norm": 0.43334269523620605, "learning_rate": 1.0135424903196543e-05, "loss": 0.0462, "step": 9163 }, { "epoch": 1.53, "grad_norm": 0.5194206833839417, "learning_rate": 1.0133619346016414e-05, "loss": 0.0594, "step": 9164 }, { "epoch": 1.53, "grad_norm": 0.7571325302124023, "learning_rate": 1.0131813784479461e-05, "loss": 0.0611, "step": 9165 }, { "epoch": 1.53, "grad_norm": 0.48402756452560425, "learning_rate": 1.0130008218644552e-05, "loss": 0.0541, "step": 9166 }, { "epoch": 1.53, "grad_norm": 0.6784209609031677, "learning_rate": 1.0128202648570562e-05, "loss": 0.043, "step": 9167 }, { "epoch": 1.53, "grad_norm": 0.5947457551956177, "learning_rate": 1.0126397074316359e-05, "loss": 0.0517, "step": 9168 }, { "epoch": 1.53, "grad_norm": 0.45762863755226135, "learning_rate": 1.0124591495940818e-05, "loss": 0.0544, "step": 9169 }, { "epoch": 1.53, "grad_norm": 0.43358200788497925, "learning_rate": 1.0122785913502819e-05, "loss": 0.0494, "step": 9170 }, { "epoch": 1.53, "grad_norm": 0.5290437340736389, "learning_rate": 1.0120980327061226e-05, "loss": 0.0563, "step": 9171 }, { "epoch": 1.53, "grad_norm": 0.46751418709754944, "learning_rate": 1.0119174736674912e-05, "loss": 0.0553, "step": 9172 }, { "epoch": 1.53, "grad_norm": 0.5604039430618286, "learning_rate": 1.011736914240276e-05, "loss": 0.0606, "step": 9173 }, { "epoch": 1.53, "grad_norm": 0.4564060866832733, "learning_rate": 1.0115563544303639e-05, "loss": 0.0578, "step": 9174 }, { "epoch": 1.53, "grad_norm": 0.38995933532714844, "learning_rate": 1.0113757942436422e-05, "loss": 0.0381, "step": 9175 }, { "epoch": 1.53, "grad_norm": 0.44222480058670044, "learning_rate": 1.011195233685998e-05, "loss": 0.0399, "step": 9176 }, { "epoch": 1.53, "grad_norm": 0.5905422568321228, "learning_rate": 1.0110146727633192e-05, "loss": 0.0505, "step": 9177 }, { "epoch": 1.54, "grad_norm": 0.3603697121143341, "learning_rate": 1.010834111481493e-05, "loss": 0.0497, "step": 9178 }, { "epoch": 1.54, "grad_norm": 0.4017309248447418, "learning_rate": 1.0106535498464068e-05, "loss": 0.0635, "step": 9179 }, { "epoch": 1.54, "grad_norm": 0.5299516320228577, "learning_rate": 1.0104729878639481e-05, "loss": 0.0686, "step": 9180 }, { "epoch": 1.54, "grad_norm": 0.4692378342151642, "learning_rate": 1.0102924255400044e-05, "loss": 0.0749, "step": 9181 }, { "epoch": 1.54, "grad_norm": 0.4462094008922577, "learning_rate": 1.0101118628804632e-05, "loss": 0.0454, "step": 9182 }, { "epoch": 1.54, "grad_norm": 0.45737412571907043, "learning_rate": 1.0099312998912116e-05, "loss": 0.057, "step": 9183 }, { "epoch": 1.54, "grad_norm": 0.42245879769325256, "learning_rate": 1.0097507365781377e-05, "loss": 0.0664, "step": 9184 }, { "epoch": 1.54, "grad_norm": 0.6482017636299133, "learning_rate": 1.0095701729471286e-05, "loss": 0.0354, "step": 9185 }, { "epoch": 1.54, "grad_norm": 0.34615492820739746, "learning_rate": 1.0093896090040718e-05, "loss": 0.0386, "step": 9186 }, { "epoch": 1.54, "grad_norm": 0.4471536874771118, "learning_rate": 1.0092090447548549e-05, "loss": 0.0423, "step": 9187 }, { "epoch": 1.54, "grad_norm": 0.49027690291404724, "learning_rate": 1.0090284802053653e-05, "loss": 0.054, "step": 9188 }, { "epoch": 1.54, "grad_norm": 0.38482826948165894, "learning_rate": 1.0088479153614908e-05, "loss": 0.0655, "step": 9189 }, { "epoch": 1.54, "grad_norm": 0.8792689442634583, "learning_rate": 1.0086673502291189e-05, "loss": 0.0416, "step": 9190 }, { "epoch": 1.54, "grad_norm": 0.6432280540466309, "learning_rate": 1.0084867848141372e-05, "loss": 0.0649, "step": 9191 }, { "epoch": 1.54, "grad_norm": 0.7658731341362, "learning_rate": 1.0083062191224326e-05, "loss": 0.064, "step": 9192 }, { "epoch": 1.54, "grad_norm": 0.4728694558143616, "learning_rate": 1.0081256531598932e-05, "loss": 0.0489, "step": 9193 }, { "epoch": 1.54, "grad_norm": 0.3933185338973999, "learning_rate": 1.007945086932407e-05, "loss": 0.0514, "step": 9194 }, { "epoch": 1.54, "grad_norm": 0.4965636432170868, "learning_rate": 1.0077645204458613e-05, "loss": 0.0465, "step": 9195 }, { "epoch": 1.54, "grad_norm": 0.4815843105316162, "learning_rate": 1.0075839537061431e-05, "loss": 0.0606, "step": 9196 }, { "epoch": 1.54, "grad_norm": 0.28235098719596863, "learning_rate": 1.0074033867191405e-05, "loss": 0.0407, "step": 9197 }, { "epoch": 1.54, "grad_norm": 0.24889948964118958, "learning_rate": 1.0072228194907416e-05, "loss": 0.0442, "step": 9198 }, { "epoch": 1.54, "grad_norm": 0.38823702931404114, "learning_rate": 1.007042252026833e-05, "loss": 0.0474, "step": 9199 }, { "epoch": 1.54, "grad_norm": 0.674262523651123, "learning_rate": 1.006861684333303e-05, "loss": 0.0718, "step": 9200 }, { "epoch": 1.54, "grad_norm": 0.4130094051361084, "learning_rate": 1.0066811164160391e-05, "loss": 0.0475, "step": 9201 }, { "epoch": 1.54, "grad_norm": 0.5511410236358643, "learning_rate": 1.006500548280929e-05, "loss": 0.0708, "step": 9202 }, { "epoch": 1.54, "grad_norm": 1.2811241149902344, "learning_rate": 1.0063199799338602e-05, "loss": 0.0482, "step": 9203 }, { "epoch": 1.54, "grad_norm": 0.5495415925979614, "learning_rate": 1.0061394113807203e-05, "loss": 0.0508, "step": 9204 }, { "epoch": 1.54, "grad_norm": 0.5195115804672241, "learning_rate": 1.0059588426273974e-05, "loss": 0.0539, "step": 9205 }, { "epoch": 1.54, "grad_norm": 0.39174726605415344, "learning_rate": 1.0057782736797787e-05, "loss": 0.0367, "step": 9206 }, { "epoch": 1.54, "grad_norm": 0.39257243275642395, "learning_rate": 1.0055977045437521e-05, "loss": 0.065, "step": 9207 }, { "epoch": 1.54, "grad_norm": 0.4483726918697357, "learning_rate": 1.0054171352252051e-05, "loss": 0.0524, "step": 9208 }, { "epoch": 1.54, "grad_norm": 0.5330719947814941, "learning_rate": 1.0052365657300259e-05, "loss": 0.0438, "step": 9209 }, { "epoch": 1.54, "grad_norm": 0.49358657002449036, "learning_rate": 1.0050559960641015e-05, "loss": 0.0544, "step": 9210 }, { "epoch": 1.54, "grad_norm": 0.3380764424800873, "learning_rate": 1.00487542623332e-05, "loss": 0.0513, "step": 9211 }, { "epoch": 1.54, "grad_norm": 0.6284328103065491, "learning_rate": 1.0046948562435693e-05, "loss": 0.0581, "step": 9212 }, { "epoch": 1.54, "grad_norm": 0.42070579528808594, "learning_rate": 1.0045142861007364e-05, "loss": 0.0537, "step": 9213 }, { "epoch": 1.54, "grad_norm": 0.5117006301879883, "learning_rate": 1.0043337158107098e-05, "loss": 0.0601, "step": 9214 }, { "epoch": 1.54, "grad_norm": 0.43637773394584656, "learning_rate": 1.004153145379377e-05, "loss": 0.0405, "step": 9215 }, { "epoch": 1.54, "grad_norm": 0.41124647855758667, "learning_rate": 1.0039725748126252e-05, "loss": 0.0404, "step": 9216 }, { "epoch": 1.54, "grad_norm": 0.518182635307312, "learning_rate": 1.0037920041163428e-05, "loss": 0.0669, "step": 9217 }, { "epoch": 1.54, "grad_norm": 0.45483195781707764, "learning_rate": 1.0036114332964174e-05, "loss": 0.0528, "step": 9218 }, { "epoch": 1.54, "grad_norm": 0.4582020342350006, "learning_rate": 1.0034308623587368e-05, "loss": 0.0566, "step": 9219 }, { "epoch": 1.54, "grad_norm": 0.5080099701881409, "learning_rate": 1.0032502913091881e-05, "loss": 0.0473, "step": 9220 }, { "epoch": 1.54, "grad_norm": 1.0771061182022095, "learning_rate": 1.00306972015366e-05, "loss": 0.0539, "step": 9221 }, { "epoch": 1.54, "grad_norm": 0.3837953209877014, "learning_rate": 1.0028891488980394e-05, "loss": 0.0429, "step": 9222 }, { "epoch": 1.54, "grad_norm": 0.5642789006233215, "learning_rate": 1.0027085775482148e-05, "loss": 0.0637, "step": 9223 }, { "epoch": 1.54, "grad_norm": 0.5365238785743713, "learning_rate": 1.002528006110073e-05, "loss": 0.0546, "step": 9224 }, { "epoch": 1.54, "grad_norm": 0.6153002381324768, "learning_rate": 1.002347434589503e-05, "loss": 0.0542, "step": 9225 }, { "epoch": 1.54, "grad_norm": 0.39969509840011597, "learning_rate": 1.0021668629923917e-05, "loss": 0.0497, "step": 9226 }, { "epoch": 1.54, "grad_norm": 0.4014571011066437, "learning_rate": 1.0019862913246271e-05, "loss": 0.0579, "step": 9227 }, { "epoch": 1.54, "grad_norm": 0.4048389494419098, "learning_rate": 1.001805719592097e-05, "loss": 0.0512, "step": 9228 }, { "epoch": 1.54, "grad_norm": 0.37853389978408813, "learning_rate": 1.0016251478006886e-05, "loss": 0.0435, "step": 9229 }, { "epoch": 1.54, "grad_norm": 0.34811800718307495, "learning_rate": 1.001444575956291e-05, "loss": 0.0531, "step": 9230 }, { "epoch": 1.54, "grad_norm": 0.4814123213291168, "learning_rate": 1.0012640040647907e-05, "loss": 0.0534, "step": 9231 }, { "epoch": 1.54, "grad_norm": 0.5338158011436462, "learning_rate": 1.0010834321320761e-05, "loss": 0.0441, "step": 9232 }, { "epoch": 1.54, "grad_norm": 0.5124381184577942, "learning_rate": 1.0009028601640347e-05, "loss": 0.0609, "step": 9233 }, { "epoch": 1.54, "grad_norm": 0.32925137877464294, "learning_rate": 1.0007222881665546e-05, "loss": 0.0392, "step": 9234 }, { "epoch": 1.54, "grad_norm": 0.5004536509513855, "learning_rate": 1.0005417161455231e-05, "loss": 0.0617, "step": 9235 }, { "epoch": 1.54, "grad_norm": 0.5478856563568115, "learning_rate": 1.0003611441068284e-05, "loss": 0.0678, "step": 9236 }, { "epoch": 1.54, "grad_norm": 0.3942163288593292, "learning_rate": 1.0001805720563583e-05, "loss": 0.0388, "step": 9237 }, { "epoch": 1.55, "grad_norm": 0.6293579339981079, "learning_rate": 1e-05, "loss": 0.0701, "step": 9238 }, { "epoch": 1.55, "grad_norm": 0.5832912921905518, "learning_rate": 9.998194279436422e-06, "loss": 0.0509, "step": 9239 }, { "epoch": 1.55, "grad_norm": 0.433255136013031, "learning_rate": 9.996388558931718e-06, "loss": 0.0655, "step": 9240 }, { "epoch": 1.55, "grad_norm": 0.4052141308784485, "learning_rate": 9.99458283854477e-06, "loss": 0.0556, "step": 9241 }, { "epoch": 1.55, "grad_norm": 0.45083343982696533, "learning_rate": 9.99277711833446e-06, "loss": 0.0378, "step": 9242 }, { "epoch": 1.55, "grad_norm": 0.6326855421066284, "learning_rate": 9.990971398359655e-06, "loss": 0.048, "step": 9243 }, { "epoch": 1.55, "grad_norm": 0.4108416438102722, "learning_rate": 9.989165678679242e-06, "loss": 0.0535, "step": 9244 }, { "epoch": 1.55, "grad_norm": 0.7044495344161987, "learning_rate": 9.987359959352093e-06, "loss": 0.0579, "step": 9245 }, { "epoch": 1.55, "grad_norm": 0.34958475828170776, "learning_rate": 9.985554240437092e-06, "loss": 0.0404, "step": 9246 }, { "epoch": 1.55, "grad_norm": 0.4251064658164978, "learning_rate": 9.983748521993116e-06, "loss": 0.0502, "step": 9247 }, { "epoch": 1.55, "grad_norm": 0.5869078636169434, "learning_rate": 9.981942804079034e-06, "loss": 0.0756, "step": 9248 }, { "epoch": 1.55, "grad_norm": 0.5113023519515991, "learning_rate": 9.980137086753732e-06, "loss": 0.06, "step": 9249 }, { "epoch": 1.55, "grad_norm": 0.4488084316253662, "learning_rate": 9.978331370076088e-06, "loss": 0.0441, "step": 9250 }, { "epoch": 1.55, "grad_norm": 0.6698867678642273, "learning_rate": 9.976525654104971e-06, "loss": 0.0617, "step": 9251 }, { "epoch": 1.55, "grad_norm": 0.5624598860740662, "learning_rate": 9.97471993889927e-06, "loss": 0.0487, "step": 9252 }, { "epoch": 1.55, "grad_norm": 0.4306494891643524, "learning_rate": 9.972914224517857e-06, "loss": 0.0543, "step": 9253 }, { "epoch": 1.55, "grad_norm": 0.7228264212608337, "learning_rate": 9.971108511019607e-06, "loss": 0.0454, "step": 9254 }, { "epoch": 1.55, "grad_norm": 0.4168665111064911, "learning_rate": 9.969302798463406e-06, "loss": 0.0529, "step": 9255 }, { "epoch": 1.55, "grad_norm": 0.5589596033096313, "learning_rate": 9.96749708690812e-06, "loss": 0.0737, "step": 9256 }, { "epoch": 1.55, "grad_norm": 1.5808959007263184, "learning_rate": 9.965691376412635e-06, "loss": 0.057, "step": 9257 }, { "epoch": 1.55, "grad_norm": 0.4220837950706482, "learning_rate": 9.963885667035829e-06, "loss": 0.0626, "step": 9258 }, { "epoch": 1.55, "grad_norm": 0.3877333402633667, "learning_rate": 9.962079958836571e-06, "loss": 0.0625, "step": 9259 }, { "epoch": 1.55, "grad_norm": 0.3825654983520508, "learning_rate": 9.96027425187375e-06, "loss": 0.0617, "step": 9260 }, { "epoch": 1.55, "grad_norm": 0.4438604712486267, "learning_rate": 9.958468546206235e-06, "loss": 0.0571, "step": 9261 }, { "epoch": 1.55, "grad_norm": 0.8472937941551208, "learning_rate": 9.956662841892902e-06, "loss": 0.0729, "step": 9262 }, { "epoch": 1.55, "grad_norm": 0.44460389018058777, "learning_rate": 9.954857138992639e-06, "loss": 0.0555, "step": 9263 }, { "epoch": 1.55, "grad_norm": 0.5946431159973145, "learning_rate": 9.953051437564312e-06, "loss": 0.061, "step": 9264 }, { "epoch": 1.55, "grad_norm": 2.1220922470092773, "learning_rate": 9.951245737666801e-06, "loss": 0.0589, "step": 9265 }, { "epoch": 1.55, "grad_norm": 0.6992831230163574, "learning_rate": 9.949440039358988e-06, "loss": 0.0773, "step": 9266 }, { "epoch": 1.55, "grad_norm": 0.5246556997299194, "learning_rate": 9.947634342699746e-06, "loss": 0.0617, "step": 9267 }, { "epoch": 1.55, "grad_norm": 0.4279453158378601, "learning_rate": 9.94582864774795e-06, "loss": 0.052, "step": 9268 }, { "epoch": 1.55, "grad_norm": 1.417881965637207, "learning_rate": 9.944022954562482e-06, "loss": 0.0416, "step": 9269 }, { "epoch": 1.55, "grad_norm": 0.4107729196548462, "learning_rate": 9.942217263202218e-06, "loss": 0.0355, "step": 9270 }, { "epoch": 1.55, "grad_norm": 0.42687687277793884, "learning_rate": 9.94041157372603e-06, "loss": 0.0625, "step": 9271 }, { "epoch": 1.55, "grad_norm": 0.44256386160850525, "learning_rate": 9.938605886192799e-06, "loss": 0.0513, "step": 9272 }, { "epoch": 1.55, "grad_norm": 0.7777152061462402, "learning_rate": 9.936800200661398e-06, "loss": 0.0496, "step": 9273 }, { "epoch": 1.55, "grad_norm": 2.593609094619751, "learning_rate": 9.934994517190712e-06, "loss": 0.0564, "step": 9274 }, { "epoch": 1.55, "grad_norm": 0.5338427424430847, "learning_rate": 9.933188835839614e-06, "loss": 0.0632, "step": 9275 }, { "epoch": 1.55, "grad_norm": 0.4820142984390259, "learning_rate": 9.931383156666971e-06, "loss": 0.0603, "step": 9276 }, { "epoch": 1.55, "grad_norm": 0.35647502541542053, "learning_rate": 9.929577479731671e-06, "loss": 0.0497, "step": 9277 }, { "epoch": 1.55, "grad_norm": 0.35004934668540955, "learning_rate": 9.92777180509259e-06, "loss": 0.0568, "step": 9278 }, { "epoch": 1.55, "grad_norm": 0.49833858013153076, "learning_rate": 9.925966132808594e-06, "loss": 0.0382, "step": 9279 }, { "epoch": 1.55, "grad_norm": 1.1265487670898438, "learning_rate": 9.924160462938572e-06, "loss": 0.0605, "step": 9280 }, { "epoch": 1.55, "grad_norm": 0.950295090675354, "learning_rate": 9.922354795541392e-06, "loss": 0.0617, "step": 9281 }, { "epoch": 1.55, "grad_norm": 1.1088061332702637, "learning_rate": 9.92054913067593e-06, "loss": 0.0688, "step": 9282 }, { "epoch": 1.55, "grad_norm": 0.5977994799613953, "learning_rate": 9.91874346840107e-06, "loss": 0.0424, "step": 9283 }, { "epoch": 1.55, "grad_norm": 0.5237595438957214, "learning_rate": 9.916937808775677e-06, "loss": 0.0488, "step": 9284 }, { "epoch": 1.55, "grad_norm": 1.9564682245254517, "learning_rate": 9.915132151858632e-06, "loss": 0.0554, "step": 9285 }, { "epoch": 1.55, "grad_norm": 0.5012400150299072, "learning_rate": 9.913326497708814e-06, "loss": 0.0484, "step": 9286 }, { "epoch": 1.55, "grad_norm": 0.49596887826919556, "learning_rate": 9.911520846385092e-06, "loss": 0.0572, "step": 9287 }, { "epoch": 1.55, "grad_norm": 0.4785348176956177, "learning_rate": 9.909715197946349e-06, "loss": 0.0558, "step": 9288 }, { "epoch": 1.55, "grad_norm": 1.4616343975067139, "learning_rate": 9.907909552451455e-06, "loss": 0.0715, "step": 9289 }, { "epoch": 1.55, "grad_norm": 0.9695257544517517, "learning_rate": 9.906103909959284e-06, "loss": 0.0381, "step": 9290 }, { "epoch": 1.55, "grad_norm": 0.6565049290657043, "learning_rate": 9.904298270528719e-06, "loss": 0.0546, "step": 9291 }, { "epoch": 1.55, "grad_norm": 0.979922354221344, "learning_rate": 9.902492634218627e-06, "loss": 0.0675, "step": 9292 }, { "epoch": 1.55, "grad_norm": 0.7444257736206055, "learning_rate": 9.900687001087884e-06, "loss": 0.0854, "step": 9293 }, { "epoch": 1.55, "grad_norm": 0.6043025255203247, "learning_rate": 9.898881371195373e-06, "loss": 0.059, "step": 9294 }, { "epoch": 1.55, "grad_norm": 0.4902268350124359, "learning_rate": 9.89707574459996e-06, "loss": 0.0657, "step": 9295 }, { "epoch": 1.55, "grad_norm": 0.5292961001396179, "learning_rate": 9.895270121360522e-06, "loss": 0.0361, "step": 9296 }, { "epoch": 1.55, "grad_norm": 0.48693346977233887, "learning_rate": 9.893464501535936e-06, "loss": 0.0506, "step": 9297 }, { "epoch": 1.56, "grad_norm": 0.5309969186782837, "learning_rate": 9.891658885185075e-06, "loss": 0.0568, "step": 9298 }, { "epoch": 1.56, "grad_norm": 0.3438083529472351, "learning_rate": 9.889853272366812e-06, "loss": 0.044, "step": 9299 }, { "epoch": 1.56, "grad_norm": 0.4835416376590729, "learning_rate": 9.888047663140022e-06, "loss": 0.0713, "step": 9300 }, { "epoch": 1.56, "grad_norm": 0.48659616708755493, "learning_rate": 9.88624205756358e-06, "loss": 0.058, "step": 9301 }, { "epoch": 1.56, "grad_norm": 0.4126259386539459, "learning_rate": 9.884436455696363e-06, "loss": 0.0668, "step": 9302 }, { "epoch": 1.56, "grad_norm": 0.8353232741355896, "learning_rate": 9.882630857597243e-06, "loss": 0.0501, "step": 9303 }, { "epoch": 1.56, "grad_norm": 0.5413306355476379, "learning_rate": 9.880825263325086e-06, "loss": 0.0777, "step": 9304 }, { "epoch": 1.56, "grad_norm": 0.4368867576122284, "learning_rate": 9.879019672938778e-06, "loss": 0.0411, "step": 9305 }, { "epoch": 1.56, "grad_norm": 0.36932605504989624, "learning_rate": 9.877214086497186e-06, "loss": 0.0279, "step": 9306 }, { "epoch": 1.56, "grad_norm": 0.6582365036010742, "learning_rate": 9.875408504059182e-06, "loss": 0.0844, "step": 9307 }, { "epoch": 1.56, "grad_norm": 0.5802915692329407, "learning_rate": 9.873602925683645e-06, "loss": 0.071, "step": 9308 }, { "epoch": 1.56, "grad_norm": 0.5412526726722717, "learning_rate": 9.871797351429443e-06, "loss": 0.0605, "step": 9309 }, { "epoch": 1.56, "grad_norm": 0.5644814372062683, "learning_rate": 9.86999178135545e-06, "loss": 0.0533, "step": 9310 }, { "epoch": 1.56, "grad_norm": 0.48691996932029724, "learning_rate": 9.868186215520542e-06, "loss": 0.0511, "step": 9311 }, { "epoch": 1.56, "grad_norm": 0.9237470030784607, "learning_rate": 9.866380653983587e-06, "loss": 0.0617, "step": 9312 }, { "epoch": 1.56, "grad_norm": 0.49433383345603943, "learning_rate": 9.86457509680346e-06, "loss": 0.0554, "step": 9313 }, { "epoch": 1.56, "grad_norm": 0.4326745569705963, "learning_rate": 9.862769544039038e-06, "loss": 0.0453, "step": 9314 }, { "epoch": 1.56, "grad_norm": 0.5913280844688416, "learning_rate": 9.860963995749183e-06, "loss": 0.0454, "step": 9315 }, { "epoch": 1.56, "grad_norm": 0.44062909483909607, "learning_rate": 9.85915845199278e-06, "loss": 0.0455, "step": 9316 }, { "epoch": 1.56, "grad_norm": 0.456646203994751, "learning_rate": 9.85735291282869e-06, "loss": 0.0571, "step": 9317 }, { "epoch": 1.56, "grad_norm": 0.6857669353485107, "learning_rate": 9.855547378315788e-06, "loss": 0.0798, "step": 9318 }, { "epoch": 1.56, "grad_norm": 0.2893902361392975, "learning_rate": 9.85374184851295e-06, "loss": 0.0433, "step": 9319 }, { "epoch": 1.56, "grad_norm": 0.42244064807891846, "learning_rate": 9.851936323479043e-06, "loss": 0.0582, "step": 9320 }, { "epoch": 1.56, "grad_norm": 0.5848799347877502, "learning_rate": 9.850130803272938e-06, "loss": 0.0685, "step": 9321 }, { "epoch": 1.56, "grad_norm": 0.4509039521217346, "learning_rate": 9.848325287953511e-06, "loss": 0.0537, "step": 9322 }, { "epoch": 1.56, "grad_norm": 0.434406042098999, "learning_rate": 9.846519777579634e-06, "loss": 0.0581, "step": 9323 }, { "epoch": 1.56, "grad_norm": 0.5482066869735718, "learning_rate": 9.844714272210166e-06, "loss": 0.0724, "step": 9324 }, { "epoch": 1.56, "grad_norm": 0.6001856327056885, "learning_rate": 9.84290877190399e-06, "loss": 0.0604, "step": 9325 }, { "epoch": 1.56, "grad_norm": 0.40536049008369446, "learning_rate": 9.841103276719973e-06, "loss": 0.0495, "step": 9326 }, { "epoch": 1.56, "grad_norm": 0.3714562952518463, "learning_rate": 9.839297786716982e-06, "loss": 0.0376, "step": 9327 }, { "epoch": 1.56, "grad_norm": 0.8430951833724976, "learning_rate": 9.837492301953893e-06, "loss": 0.0727, "step": 9328 }, { "epoch": 1.56, "grad_norm": 0.5443314909934998, "learning_rate": 9.835686822489574e-06, "loss": 0.059, "step": 9329 }, { "epoch": 1.56, "grad_norm": 0.6762691736221313, "learning_rate": 9.833881348382893e-06, "loss": 0.0462, "step": 9330 }, { "epoch": 1.56, "grad_norm": 0.4340983033180237, "learning_rate": 9.832075879692725e-06, "loss": 0.0421, "step": 9331 }, { "epoch": 1.56, "grad_norm": 0.35327914357185364, "learning_rate": 9.83027041647793e-06, "loss": 0.0464, "step": 9332 }, { "epoch": 1.56, "grad_norm": 0.4265393018722534, "learning_rate": 9.828464958797387e-06, "loss": 0.0599, "step": 9333 }, { "epoch": 1.56, "grad_norm": 0.4031113386154175, "learning_rate": 9.826659506709962e-06, "loss": 0.0378, "step": 9334 }, { "epoch": 1.56, "grad_norm": 0.4946022033691406, "learning_rate": 9.82485406027452e-06, "loss": 0.0729, "step": 9335 }, { "epoch": 1.56, "grad_norm": 0.3973275125026703, "learning_rate": 9.823048619549937e-06, "loss": 0.0413, "step": 9336 }, { "epoch": 1.56, "grad_norm": 0.4635304808616638, "learning_rate": 9.821243184595077e-06, "loss": 0.0514, "step": 9337 }, { "epoch": 1.56, "grad_norm": 0.37079721689224243, "learning_rate": 9.819437755468807e-06, "loss": 0.0376, "step": 9338 }, { "epoch": 1.56, "grad_norm": 0.3293260335922241, "learning_rate": 9.817632332230003e-06, "loss": 0.0384, "step": 9339 }, { "epoch": 1.56, "grad_norm": 0.40983080863952637, "learning_rate": 9.815826914937525e-06, "loss": 0.0424, "step": 9340 }, { "epoch": 1.56, "grad_norm": 0.3996489644050598, "learning_rate": 9.814021503650241e-06, "loss": 0.0464, "step": 9341 }, { "epoch": 1.56, "grad_norm": 0.4424811899662018, "learning_rate": 9.812216098427027e-06, "loss": 0.064, "step": 9342 }, { "epoch": 1.56, "grad_norm": 0.4682285189628601, "learning_rate": 9.810410699326741e-06, "loss": 0.0499, "step": 9343 }, { "epoch": 1.56, "grad_norm": 0.6113885641098022, "learning_rate": 9.808605306408258e-06, "loss": 0.0628, "step": 9344 }, { "epoch": 1.56, "grad_norm": 0.49545255303382874, "learning_rate": 9.80679991973044e-06, "loss": 0.0406, "step": 9345 }, { "epoch": 1.56, "grad_norm": 0.5117062926292419, "learning_rate": 9.804994539352155e-06, "loss": 0.0503, "step": 9346 }, { "epoch": 1.56, "grad_norm": 0.3840639591217041, "learning_rate": 9.803189165332272e-06, "loss": 0.0306, "step": 9347 }, { "epoch": 1.56, "grad_norm": 0.5146268606185913, "learning_rate": 9.801383797729656e-06, "loss": 0.0631, "step": 9348 }, { "epoch": 1.56, "grad_norm": 0.5174093842506409, "learning_rate": 9.799578436603168e-06, "loss": 0.0707, "step": 9349 }, { "epoch": 1.56, "grad_norm": 0.39090701937675476, "learning_rate": 9.797773082011686e-06, "loss": 0.0493, "step": 9350 }, { "epoch": 1.56, "grad_norm": 0.41770240664482117, "learning_rate": 9.795967734014068e-06, "loss": 0.0435, "step": 9351 }, { "epoch": 1.56, "grad_norm": 0.7561498284339905, "learning_rate": 9.794162392669176e-06, "loss": 0.0506, "step": 9352 }, { "epoch": 1.56, "grad_norm": 0.6614740490913391, "learning_rate": 9.792357058035882e-06, "loss": 0.0663, "step": 9353 }, { "epoch": 1.56, "grad_norm": 0.37124258279800415, "learning_rate": 9.790551730173053e-06, "loss": 0.0562, "step": 9354 }, { "epoch": 1.56, "grad_norm": 0.34402433037757874, "learning_rate": 9.788746409139545e-06, "loss": 0.0416, "step": 9355 }, { "epoch": 1.56, "grad_norm": 0.40142208337783813, "learning_rate": 9.786941094994232e-06, "loss": 0.0395, "step": 9356 }, { "epoch": 1.56, "grad_norm": 0.5596887469291687, "learning_rate": 9.785135787795972e-06, "loss": 0.085, "step": 9357 }, { "epoch": 1.57, "grad_norm": 0.4324740171432495, "learning_rate": 9.783330487603636e-06, "loss": 0.0576, "step": 9358 }, { "epoch": 1.57, "grad_norm": 0.5380675196647644, "learning_rate": 9.781525194476082e-06, "loss": 0.0606, "step": 9359 }, { "epoch": 1.57, "grad_norm": 0.45252782106399536, "learning_rate": 9.779719908472175e-06, "loss": 0.0432, "step": 9360 }, { "epoch": 1.57, "grad_norm": 0.4241744577884674, "learning_rate": 9.77791462965078e-06, "loss": 0.0607, "step": 9361 }, { "epoch": 1.57, "grad_norm": 0.6263060569763184, "learning_rate": 9.776109358070762e-06, "loss": 0.0519, "step": 9362 }, { "epoch": 1.57, "grad_norm": 0.33983114361763, "learning_rate": 9.774304093790979e-06, "loss": 0.0458, "step": 9363 }, { "epoch": 1.57, "grad_norm": 0.5355608463287354, "learning_rate": 9.7724988368703e-06, "loss": 0.0606, "step": 9364 }, { "epoch": 1.57, "grad_norm": 0.5087541937828064, "learning_rate": 9.770693587367583e-06, "loss": 0.0488, "step": 9365 }, { "epoch": 1.57, "grad_norm": 0.529559314250946, "learning_rate": 9.768888345341692e-06, "loss": 0.054, "step": 9366 }, { "epoch": 1.57, "grad_norm": 0.3571666479110718, "learning_rate": 9.767083110851492e-06, "loss": 0.0304, "step": 9367 }, { "epoch": 1.57, "grad_norm": 0.5390861630439758, "learning_rate": 9.76527788395584e-06, "loss": 0.0584, "step": 9368 }, { "epoch": 1.57, "grad_norm": 0.45902296900749207, "learning_rate": 9.763472664713599e-06, "loss": 0.0454, "step": 9369 }, { "epoch": 1.57, "grad_norm": 0.3901140093803406, "learning_rate": 9.761667453183636e-06, "loss": 0.0524, "step": 9370 }, { "epoch": 1.57, "grad_norm": 0.41331106424331665, "learning_rate": 9.759862249424805e-06, "loss": 0.0623, "step": 9371 }, { "epoch": 1.57, "grad_norm": 0.4980961084365845, "learning_rate": 9.758057053495971e-06, "loss": 0.0574, "step": 9372 }, { "epoch": 1.57, "grad_norm": 0.8679917454719543, "learning_rate": 9.756251865455993e-06, "loss": 0.0784, "step": 9373 }, { "epoch": 1.57, "grad_norm": 0.48882338404655457, "learning_rate": 9.75444668536373e-06, "loss": 0.0443, "step": 9374 }, { "epoch": 1.57, "grad_norm": 0.48784181475639343, "learning_rate": 9.752641513278048e-06, "loss": 0.0543, "step": 9375 }, { "epoch": 1.57, "grad_norm": 0.5349960327148438, "learning_rate": 9.7508363492578e-06, "loss": 0.0484, "step": 9376 }, { "epoch": 1.57, "grad_norm": 0.4030451476573944, "learning_rate": 9.74903119336185e-06, "loss": 0.0537, "step": 9377 }, { "epoch": 1.57, "grad_norm": 0.45107772946357727, "learning_rate": 9.747226045649057e-06, "loss": 0.0439, "step": 9378 }, { "epoch": 1.57, "grad_norm": 0.5652207136154175, "learning_rate": 9.74542090617828e-06, "loss": 0.0573, "step": 9379 }, { "epoch": 1.57, "grad_norm": 0.3652261197566986, "learning_rate": 9.743615775008372e-06, "loss": 0.0332, "step": 9380 }, { "epoch": 1.57, "grad_norm": 0.4912105202674866, "learning_rate": 9.7418106521982e-06, "loss": 0.0454, "step": 9381 }, { "epoch": 1.57, "grad_norm": 0.4358308017253876, "learning_rate": 9.740005537806621e-06, "loss": 0.0603, "step": 9382 }, { "epoch": 1.57, "grad_norm": 0.43896934390068054, "learning_rate": 9.738200431892487e-06, "loss": 0.0577, "step": 9383 }, { "epoch": 1.57, "grad_norm": 0.4052071273326874, "learning_rate": 9.736395334514665e-06, "loss": 0.0449, "step": 9384 }, { "epoch": 1.57, "grad_norm": 0.4211438000202179, "learning_rate": 9.734590245732e-06, "loss": 0.042, "step": 9385 }, { "epoch": 1.57, "grad_norm": 0.4641459584236145, "learning_rate": 9.732785165603362e-06, "loss": 0.0525, "step": 9386 }, { "epoch": 1.57, "grad_norm": 0.3058377504348755, "learning_rate": 9.730980094187603e-06, "loss": 0.0458, "step": 9387 }, { "epoch": 1.57, "grad_norm": 0.6253279447555542, "learning_rate": 9.729175031543575e-06, "loss": 0.0639, "step": 9388 }, { "epoch": 1.57, "grad_norm": 0.4162241220474243, "learning_rate": 9.72736997773014e-06, "loss": 0.055, "step": 9389 }, { "epoch": 1.57, "grad_norm": 0.5424242615699768, "learning_rate": 9.725564932806157e-06, "loss": 0.0477, "step": 9390 }, { "epoch": 1.57, "grad_norm": 0.5398575067520142, "learning_rate": 9.723759896830471e-06, "loss": 0.0606, "step": 9391 }, { "epoch": 1.57, "grad_norm": 0.44633105397224426, "learning_rate": 9.721954869861947e-06, "loss": 0.0611, "step": 9392 }, { "epoch": 1.57, "grad_norm": 0.43525850772857666, "learning_rate": 9.720149851959438e-06, "loss": 0.0495, "step": 9393 }, { "epoch": 1.57, "grad_norm": 0.4281361699104309, "learning_rate": 9.718344843181794e-06, "loss": 0.0392, "step": 9394 }, { "epoch": 1.57, "grad_norm": 0.5892727375030518, "learning_rate": 9.716539843587877e-06, "loss": 0.0928, "step": 9395 }, { "epoch": 1.57, "grad_norm": 0.4223078191280365, "learning_rate": 9.714734853236535e-06, "loss": 0.083, "step": 9396 }, { "epoch": 1.57, "grad_norm": 0.5007465481758118, "learning_rate": 9.712929872186625e-06, "loss": 0.0375, "step": 9397 }, { "epoch": 1.57, "grad_norm": 0.4693973958492279, "learning_rate": 9.711124900497003e-06, "loss": 0.074, "step": 9398 }, { "epoch": 1.57, "grad_norm": 0.39391419291496277, "learning_rate": 9.709319938226517e-06, "loss": 0.0383, "step": 9399 }, { "epoch": 1.57, "grad_norm": 0.41788649559020996, "learning_rate": 9.707514985434026e-06, "loss": 0.0561, "step": 9400 }, { "epoch": 1.57, "grad_norm": 0.49463173747062683, "learning_rate": 9.705710042178378e-06, "loss": 0.0392, "step": 9401 }, { "epoch": 1.57, "grad_norm": 0.6990969777107239, "learning_rate": 9.703905108518425e-06, "loss": 0.0504, "step": 9402 }, { "epoch": 1.57, "grad_norm": 0.41196534037590027, "learning_rate": 9.702100184513025e-06, "loss": 0.0566, "step": 9403 }, { "epoch": 1.57, "grad_norm": 0.49716416001319885, "learning_rate": 9.700295270221026e-06, "loss": 0.0386, "step": 9404 }, { "epoch": 1.57, "grad_norm": 0.37816867232322693, "learning_rate": 9.698490365701276e-06, "loss": 0.0521, "step": 9405 }, { "epoch": 1.57, "grad_norm": 0.47740066051483154, "learning_rate": 9.696685471012635e-06, "loss": 0.0528, "step": 9406 }, { "epoch": 1.57, "grad_norm": 0.3866393268108368, "learning_rate": 9.694880586213948e-06, "loss": 0.0583, "step": 9407 }, { "epoch": 1.57, "grad_norm": 0.5593414306640625, "learning_rate": 9.693075711364062e-06, "loss": 0.0624, "step": 9408 }, { "epoch": 1.57, "grad_norm": 0.4937732517719269, "learning_rate": 9.691270846521833e-06, "loss": 0.0458, "step": 9409 }, { "epoch": 1.57, "grad_norm": 0.3898709714412689, "learning_rate": 9.689465991746112e-06, "loss": 0.0529, "step": 9410 }, { "epoch": 1.57, "grad_norm": 0.3650752604007721, "learning_rate": 9.687661147095744e-06, "loss": 0.0482, "step": 9411 }, { "epoch": 1.57, "grad_norm": 0.3372499644756317, "learning_rate": 9.685856312629582e-06, "loss": 0.0452, "step": 9412 }, { "epoch": 1.57, "grad_norm": 0.34648773074150085, "learning_rate": 9.68405148840647e-06, "loss": 0.04, "step": 9413 }, { "epoch": 1.57, "grad_norm": 0.34452855587005615, "learning_rate": 9.682246674485261e-06, "loss": 0.0438, "step": 9414 }, { "epoch": 1.57, "grad_norm": 0.4756219983100891, "learning_rate": 9.680441870924806e-06, "loss": 0.0514, "step": 9415 }, { "epoch": 1.57, "grad_norm": 0.5104026198387146, "learning_rate": 9.678637077783944e-06, "loss": 0.0679, "step": 9416 }, { "epoch": 1.58, "grad_norm": 0.3103325664997101, "learning_rate": 9.676832295121528e-06, "loss": 0.0446, "step": 9417 }, { "epoch": 1.58, "grad_norm": 0.47610950469970703, "learning_rate": 9.675027522996409e-06, "loss": 0.0519, "step": 9418 }, { "epoch": 1.58, "grad_norm": 0.3364991247653961, "learning_rate": 9.673222761467423e-06, "loss": 0.0457, "step": 9419 }, { "epoch": 1.58, "grad_norm": 0.29641056060791016, "learning_rate": 9.671418010593429e-06, "loss": 0.0343, "step": 9420 }, { "epoch": 1.58, "grad_norm": 0.43378132581710815, "learning_rate": 9.669613270433266e-06, "loss": 0.0548, "step": 9421 }, { "epoch": 1.58, "grad_norm": 0.5681194067001343, "learning_rate": 9.667808541045777e-06, "loss": 0.067, "step": 9422 }, { "epoch": 1.58, "grad_norm": 0.816487193107605, "learning_rate": 9.666003822489817e-06, "loss": 0.0577, "step": 9423 }, { "epoch": 1.58, "grad_norm": 0.5081641674041748, "learning_rate": 9.664199114824224e-06, "loss": 0.0513, "step": 9424 }, { "epoch": 1.58, "grad_norm": 0.5093066096305847, "learning_rate": 9.662394418107842e-06, "loss": 0.0423, "step": 9425 }, { "epoch": 1.58, "grad_norm": 0.41409537196159363, "learning_rate": 9.660589732399522e-06, "loss": 0.0621, "step": 9426 }, { "epoch": 1.58, "grad_norm": 0.5029083490371704, "learning_rate": 9.658785057758101e-06, "loss": 0.0624, "step": 9427 }, { "epoch": 1.58, "grad_norm": 0.3656100630760193, "learning_rate": 9.656980394242428e-06, "loss": 0.0477, "step": 9428 }, { "epoch": 1.58, "grad_norm": 0.3913632333278656, "learning_rate": 9.655175741911344e-06, "loss": 0.0438, "step": 9429 }, { "epoch": 1.58, "grad_norm": 0.31908610463142395, "learning_rate": 9.653371100823689e-06, "loss": 0.0322, "step": 9430 }, { "epoch": 1.58, "grad_norm": 0.44650816917419434, "learning_rate": 9.651566471038313e-06, "loss": 0.0531, "step": 9431 }, { "epoch": 1.58, "grad_norm": 0.6258013844490051, "learning_rate": 9.649761852614053e-06, "loss": 0.0701, "step": 9432 }, { "epoch": 1.58, "grad_norm": 0.4248187839984894, "learning_rate": 9.647957245609748e-06, "loss": 0.0431, "step": 9433 }, { "epoch": 1.58, "grad_norm": 0.3944774568080902, "learning_rate": 9.646152650084248e-06, "loss": 0.0446, "step": 9434 }, { "epoch": 1.58, "grad_norm": 0.46123573184013367, "learning_rate": 9.644348066096391e-06, "loss": 0.051, "step": 9435 }, { "epoch": 1.58, "grad_norm": 0.6927733421325684, "learning_rate": 9.64254349370501e-06, "loss": 0.053, "step": 9436 }, { "epoch": 1.58, "grad_norm": 0.3694300055503845, "learning_rate": 9.640738932968954e-06, "loss": 0.0466, "step": 9437 }, { "epoch": 1.58, "grad_norm": 0.6510922312736511, "learning_rate": 9.638934383947063e-06, "loss": 0.0794, "step": 9438 }, { "epoch": 1.58, "grad_norm": 0.4514681398868561, "learning_rate": 9.637129846698172e-06, "loss": 0.0484, "step": 9439 }, { "epoch": 1.58, "grad_norm": 0.5196832418441772, "learning_rate": 9.635325321281124e-06, "loss": 0.0555, "step": 9440 }, { "epoch": 1.58, "grad_norm": 0.5713344216346741, "learning_rate": 9.633520807754752e-06, "loss": 0.0509, "step": 9441 }, { "epoch": 1.58, "grad_norm": 0.26257678866386414, "learning_rate": 9.631716306177902e-06, "loss": 0.0399, "step": 9442 }, { "epoch": 1.58, "grad_norm": 0.43259716033935547, "learning_rate": 9.62991181660941e-06, "loss": 0.0595, "step": 9443 }, { "epoch": 1.58, "grad_norm": 0.6664734482765198, "learning_rate": 9.628107339108108e-06, "loss": 0.0748, "step": 9444 }, { "epoch": 1.58, "grad_norm": 0.5984434485435486, "learning_rate": 9.62630287373284e-06, "loss": 0.0646, "step": 9445 }, { "epoch": 1.58, "grad_norm": 0.42311686277389526, "learning_rate": 9.624498420542445e-06, "loss": 0.0605, "step": 9446 }, { "epoch": 1.58, "grad_norm": 0.571718692779541, "learning_rate": 9.622693979595748e-06, "loss": 0.0456, "step": 9447 }, { "epoch": 1.58, "grad_norm": 0.43244045972824097, "learning_rate": 9.620889550951596e-06, "loss": 0.0503, "step": 9448 }, { "epoch": 1.58, "grad_norm": 0.5179217457771301, "learning_rate": 9.619085134668821e-06, "loss": 0.0691, "step": 9449 }, { "epoch": 1.58, "grad_norm": 0.3033130168914795, "learning_rate": 9.617280730806255e-06, "loss": 0.0375, "step": 9450 }, { "epoch": 1.58, "grad_norm": 0.4200727045536041, "learning_rate": 9.61547633942274e-06, "loss": 0.0582, "step": 9451 }, { "epoch": 1.58, "grad_norm": 0.46618813276290894, "learning_rate": 9.613671960577104e-06, "loss": 0.0491, "step": 9452 }, { "epoch": 1.58, "grad_norm": 0.5156457424163818, "learning_rate": 9.611867594328182e-06, "loss": 0.0406, "step": 9453 }, { "epoch": 1.58, "grad_norm": 0.4503972828388214, "learning_rate": 9.610063240734814e-06, "loss": 0.0602, "step": 9454 }, { "epoch": 1.58, "grad_norm": 0.40486764907836914, "learning_rate": 9.608258899855822e-06, "loss": 0.0599, "step": 9455 }, { "epoch": 1.58, "grad_norm": 0.6284778714179993, "learning_rate": 9.606454571750051e-06, "loss": 0.0596, "step": 9456 }, { "epoch": 1.58, "grad_norm": 0.5580898523330688, "learning_rate": 9.604650256476326e-06, "loss": 0.0587, "step": 9457 }, { "epoch": 1.58, "grad_norm": 0.37484851479530334, "learning_rate": 9.602845954093479e-06, "loss": 0.0569, "step": 9458 }, { "epoch": 1.58, "grad_norm": 0.7001559734344482, "learning_rate": 9.601041664660346e-06, "loss": 0.0663, "step": 9459 }, { "epoch": 1.58, "grad_norm": 0.42744094133377075, "learning_rate": 9.599237388235753e-06, "loss": 0.0585, "step": 9460 }, { "epoch": 1.58, "grad_norm": 0.36245784163475037, "learning_rate": 9.597433124878534e-06, "loss": 0.0415, "step": 9461 }, { "epoch": 1.58, "grad_norm": 1.0459076166152954, "learning_rate": 9.595628874647517e-06, "loss": 0.0635, "step": 9462 }, { "epoch": 1.58, "grad_norm": 0.46133264899253845, "learning_rate": 9.593824637601537e-06, "loss": 0.067, "step": 9463 }, { "epoch": 1.58, "grad_norm": 0.636339008808136, "learning_rate": 9.592020413799414e-06, "loss": 0.0712, "step": 9464 }, { "epoch": 1.58, "grad_norm": 0.42892658710479736, "learning_rate": 9.590216203299985e-06, "loss": 0.059, "step": 9465 }, { "epoch": 1.58, "grad_norm": 0.520767867565155, "learning_rate": 9.588412006162079e-06, "loss": 0.0596, "step": 9466 }, { "epoch": 1.58, "grad_norm": 0.44024908542633057, "learning_rate": 9.586607822444517e-06, "loss": 0.0501, "step": 9467 }, { "epoch": 1.58, "grad_norm": 0.4459201395511627, "learning_rate": 9.584803652206135e-06, "loss": 0.0542, "step": 9468 }, { "epoch": 1.58, "grad_norm": 0.45426589250564575, "learning_rate": 9.582999495505753e-06, "loss": 0.0481, "step": 9469 }, { "epoch": 1.58, "grad_norm": 0.32089442014694214, "learning_rate": 9.581195352402202e-06, "loss": 0.0486, "step": 9470 }, { "epoch": 1.58, "grad_norm": 0.48882150650024414, "learning_rate": 9.57939122295431e-06, "loss": 0.0468, "step": 9471 }, { "epoch": 1.58, "grad_norm": 0.6408484578132629, "learning_rate": 9.577587107220896e-06, "loss": 0.0625, "step": 9472 }, { "epoch": 1.58, "grad_norm": 0.40416765213012695, "learning_rate": 9.575783005260792e-06, "loss": 0.0391, "step": 9473 }, { "epoch": 1.58, "grad_norm": 0.4538668096065521, "learning_rate": 9.573978917132823e-06, "loss": 0.0718, "step": 9474 }, { "epoch": 1.58, "grad_norm": 0.3913402259349823, "learning_rate": 9.572174842895808e-06, "loss": 0.048, "step": 9475 }, { "epoch": 1.58, "grad_norm": 0.3870198130607605, "learning_rate": 9.570370782608579e-06, "loss": 0.0661, "step": 9476 }, { "epoch": 1.59, "grad_norm": 0.330549418926239, "learning_rate": 9.568566736329953e-06, "loss": 0.0473, "step": 9477 }, { "epoch": 1.59, "grad_norm": 0.6232270002365112, "learning_rate": 9.566762704118752e-06, "loss": 0.0661, "step": 9478 }, { "epoch": 1.59, "grad_norm": 0.42557284235954285, "learning_rate": 9.564958686033808e-06, "loss": 0.0324, "step": 9479 }, { "epoch": 1.59, "grad_norm": 0.5045092701911926, "learning_rate": 9.563154682133934e-06, "loss": 0.0489, "step": 9480 }, { "epoch": 1.59, "grad_norm": 0.5146514177322388, "learning_rate": 9.561350692477955e-06, "loss": 0.0644, "step": 9481 }, { "epoch": 1.59, "grad_norm": 0.43737441301345825, "learning_rate": 9.559546717124696e-06, "loss": 0.0499, "step": 9482 }, { "epoch": 1.59, "grad_norm": 0.5832018256187439, "learning_rate": 9.557742756132971e-06, "loss": 0.0529, "step": 9483 }, { "epoch": 1.59, "grad_norm": 0.43534737825393677, "learning_rate": 9.555938809561609e-06, "loss": 0.0488, "step": 9484 }, { "epoch": 1.59, "grad_norm": 0.7138282060623169, "learning_rate": 9.554134877469421e-06, "loss": 0.0613, "step": 9485 }, { "epoch": 1.59, "grad_norm": 0.48057863116264343, "learning_rate": 9.552330959915231e-06, "loss": 0.0782, "step": 9486 }, { "epoch": 1.59, "grad_norm": 0.4331916272640228, "learning_rate": 9.550527056957861e-06, "loss": 0.0522, "step": 9487 }, { "epoch": 1.59, "grad_norm": 0.4439723491668701, "learning_rate": 9.548723168656123e-06, "loss": 0.0412, "step": 9488 }, { "epoch": 1.59, "grad_norm": 0.38783541321754456, "learning_rate": 9.546919295068839e-06, "loss": 0.0615, "step": 9489 }, { "epoch": 1.59, "grad_norm": 0.4109331965446472, "learning_rate": 9.545115436254826e-06, "loss": 0.0481, "step": 9490 }, { "epoch": 1.59, "grad_norm": 0.36524322628974915, "learning_rate": 9.543311592272903e-06, "loss": 0.0501, "step": 9491 }, { "epoch": 1.59, "grad_norm": 0.4887196123600006, "learning_rate": 9.54150776318188e-06, "loss": 0.0537, "step": 9492 }, { "epoch": 1.59, "grad_norm": 0.4361768960952759, "learning_rate": 9.539703949040579e-06, "loss": 0.0592, "step": 9493 }, { "epoch": 1.59, "grad_norm": 0.5275826454162598, "learning_rate": 9.537900149907817e-06, "loss": 0.0698, "step": 9494 }, { "epoch": 1.59, "grad_norm": 0.49979475140571594, "learning_rate": 9.536096365842403e-06, "loss": 0.055, "step": 9495 }, { "epoch": 1.59, "grad_norm": 0.4757746756076813, "learning_rate": 9.534292596903158e-06, "loss": 0.0453, "step": 9496 }, { "epoch": 1.59, "grad_norm": 0.5544678568840027, "learning_rate": 9.53248884314889e-06, "loss": 0.0673, "step": 9497 }, { "epoch": 1.59, "grad_norm": 0.4113108515739441, "learning_rate": 9.530685104638417e-06, "loss": 0.0433, "step": 9498 }, { "epoch": 1.59, "grad_norm": 0.380772203207016, "learning_rate": 9.528881381430555e-06, "loss": 0.044, "step": 9499 }, { "epoch": 1.59, "grad_norm": 0.49748197197914124, "learning_rate": 9.527077673584108e-06, "loss": 0.0565, "step": 9500 }, { "epoch": 1.59, "grad_norm": 0.39661869406700134, "learning_rate": 9.525273981157895e-06, "loss": 0.0569, "step": 9501 }, { "epoch": 1.59, "grad_norm": 0.3591785430908203, "learning_rate": 9.523470304210728e-06, "loss": 0.0478, "step": 9502 }, { "epoch": 1.59, "grad_norm": 0.6520319581031799, "learning_rate": 9.52166664280141e-06, "loss": 0.0566, "step": 9503 }, { "epoch": 1.59, "grad_norm": 0.5648554563522339, "learning_rate": 9.519862996988763e-06, "loss": 0.0589, "step": 9504 }, { "epoch": 1.59, "grad_norm": 0.4782247841358185, "learning_rate": 9.518059366831591e-06, "loss": 0.054, "step": 9505 }, { "epoch": 1.59, "grad_norm": 0.652584433555603, "learning_rate": 9.5162557523887e-06, "loss": 0.0445, "step": 9506 }, { "epoch": 1.59, "grad_norm": 0.4024182856082916, "learning_rate": 9.51445215371891e-06, "loss": 0.0528, "step": 9507 }, { "epoch": 1.59, "grad_norm": 0.527053952217102, "learning_rate": 9.512648570881018e-06, "loss": 0.046, "step": 9508 }, { "epoch": 1.59, "grad_norm": 0.46412962675094604, "learning_rate": 9.510845003933838e-06, "loss": 0.041, "step": 9509 }, { "epoch": 1.59, "grad_norm": 0.47766298055648804, "learning_rate": 9.50904145293618e-06, "loss": 0.0614, "step": 9510 }, { "epoch": 1.59, "grad_norm": 0.4161405861377716, "learning_rate": 9.507237917946843e-06, "loss": 0.0393, "step": 9511 }, { "epoch": 1.59, "grad_norm": 0.36105817556381226, "learning_rate": 9.505434399024643e-06, "loss": 0.045, "step": 9512 }, { "epoch": 1.59, "grad_norm": 0.38489091396331787, "learning_rate": 9.503630896228379e-06, "loss": 0.05, "step": 9513 }, { "epoch": 1.59, "grad_norm": 0.5227742195129395, "learning_rate": 9.501827409616858e-06, "loss": 0.0711, "step": 9514 }, { "epoch": 1.59, "grad_norm": 0.5947520136833191, "learning_rate": 9.500023939248889e-06, "loss": 0.0524, "step": 9515 }, { "epoch": 1.59, "grad_norm": 0.5387385487556458, "learning_rate": 9.498220485183274e-06, "loss": 0.0514, "step": 9516 }, { "epoch": 1.59, "grad_norm": 0.3795281946659088, "learning_rate": 9.496417047478812e-06, "loss": 0.0522, "step": 9517 }, { "epoch": 1.59, "grad_norm": 0.4628092050552368, "learning_rate": 9.494613626194312e-06, "loss": 0.0625, "step": 9518 }, { "epoch": 1.59, "grad_norm": 0.4271946847438812, "learning_rate": 9.492810221388577e-06, "loss": 0.0598, "step": 9519 }, { "epoch": 1.59, "grad_norm": 0.6657258868217468, "learning_rate": 9.491006833120405e-06, "loss": 0.0646, "step": 9520 }, { "epoch": 1.59, "grad_norm": 0.2644072473049164, "learning_rate": 9.489203461448603e-06, "loss": 0.0436, "step": 9521 }, { "epoch": 1.59, "grad_norm": 0.4249265491962433, "learning_rate": 9.48740010643197e-06, "loss": 0.0612, "step": 9522 }, { "epoch": 1.59, "grad_norm": 0.40273144841194153, "learning_rate": 9.485596768129304e-06, "loss": 0.0596, "step": 9523 }, { "epoch": 1.59, "grad_norm": 0.487670361995697, "learning_rate": 9.48379344659941e-06, "loss": 0.0513, "step": 9524 }, { "epoch": 1.59, "grad_norm": 0.37501636147499084, "learning_rate": 9.481990141901081e-06, "loss": 0.0473, "step": 9525 }, { "epoch": 1.59, "grad_norm": 0.3830193281173706, "learning_rate": 9.480186854093124e-06, "loss": 0.0546, "step": 9526 }, { "epoch": 1.59, "grad_norm": 0.47520357370376587, "learning_rate": 9.478383583234335e-06, "loss": 0.0499, "step": 9527 }, { "epoch": 1.59, "grad_norm": 0.43039199709892273, "learning_rate": 9.476580329383505e-06, "loss": 0.0586, "step": 9528 }, { "epoch": 1.59, "grad_norm": 0.4471273422241211, "learning_rate": 9.47477709259944e-06, "loss": 0.0532, "step": 9529 }, { "epoch": 1.59, "grad_norm": 0.5996883511543274, "learning_rate": 9.472973872940937e-06, "loss": 0.0726, "step": 9530 }, { "epoch": 1.59, "grad_norm": 0.480404257774353, "learning_rate": 9.471170670466784e-06, "loss": 0.0511, "step": 9531 }, { "epoch": 1.59, "grad_norm": 0.4472023844718933, "learning_rate": 9.469367485235786e-06, "loss": 0.0512, "step": 9532 }, { "epoch": 1.59, "grad_norm": 0.4158194661140442, "learning_rate": 9.467564317306732e-06, "loss": 0.0556, "step": 9533 }, { "epoch": 1.59, "grad_norm": 0.3931235671043396, "learning_rate": 9.465761166738417e-06, "loss": 0.0447, "step": 9534 }, { "epoch": 1.59, "grad_norm": 0.4854978621006012, "learning_rate": 9.463958033589638e-06, "loss": 0.0709, "step": 9535 }, { "epoch": 1.59, "grad_norm": 0.46290814876556396, "learning_rate": 9.462154917919188e-06, "loss": 0.0388, "step": 9536 }, { "epoch": 1.6, "grad_norm": 0.4356047213077545, "learning_rate": 9.460351819785855e-06, "loss": 0.0407, "step": 9537 }, { "epoch": 1.6, "grad_norm": 0.47391024231910706, "learning_rate": 9.458548739248439e-06, "loss": 0.0572, "step": 9538 }, { "epoch": 1.6, "grad_norm": 0.5011035203933716, "learning_rate": 9.456745676365724e-06, "loss": 0.0464, "step": 9539 }, { "epoch": 1.6, "grad_norm": 0.3652069866657257, "learning_rate": 9.45494263119651e-06, "loss": 0.0563, "step": 9540 }, { "epoch": 1.6, "grad_norm": 0.37447476387023926, "learning_rate": 9.453139603799579e-06, "loss": 0.0496, "step": 9541 }, { "epoch": 1.6, "grad_norm": 0.7915332317352295, "learning_rate": 9.451336594233724e-06, "loss": 0.0631, "step": 9542 }, { "epoch": 1.6, "grad_norm": 0.41595637798309326, "learning_rate": 9.449533602557736e-06, "loss": 0.0558, "step": 9543 }, { "epoch": 1.6, "grad_norm": 0.4059832692146301, "learning_rate": 9.447730628830406e-06, "loss": 0.0395, "step": 9544 }, { "epoch": 1.6, "grad_norm": 0.43209898471832275, "learning_rate": 9.445927673110514e-06, "loss": 0.0448, "step": 9545 }, { "epoch": 1.6, "grad_norm": 0.6111231446266174, "learning_rate": 9.444124735456854e-06, "loss": 0.0489, "step": 9546 }, { "epoch": 1.6, "grad_norm": 0.46302375197410583, "learning_rate": 9.442321815928215e-06, "loss": 0.0607, "step": 9547 }, { "epoch": 1.6, "grad_norm": 0.38987022638320923, "learning_rate": 9.440518914583376e-06, "loss": 0.0597, "step": 9548 }, { "epoch": 1.6, "grad_norm": 0.5633686184883118, "learning_rate": 9.438716031481128e-06, "loss": 0.0602, "step": 9549 }, { "epoch": 1.6, "grad_norm": 0.44155794382095337, "learning_rate": 9.436913166680258e-06, "loss": 0.0417, "step": 9550 }, { "epoch": 1.6, "grad_norm": 0.5138351917266846, "learning_rate": 9.435110320239545e-06, "loss": 0.0398, "step": 9551 }, { "epoch": 1.6, "grad_norm": 0.6163389086723328, "learning_rate": 9.433307492217779e-06, "loss": 0.0544, "step": 9552 }, { "epoch": 1.6, "grad_norm": 0.6089514493942261, "learning_rate": 9.431504682673736e-06, "loss": 0.0526, "step": 9553 }, { "epoch": 1.6, "grad_norm": 0.38748475909233093, "learning_rate": 9.429701891666208e-06, "loss": 0.0735, "step": 9554 }, { "epoch": 1.6, "grad_norm": 0.53449946641922, "learning_rate": 9.427899119253973e-06, "loss": 0.0554, "step": 9555 }, { "epoch": 1.6, "grad_norm": 0.47851699590682983, "learning_rate": 9.426096365495808e-06, "loss": 0.0517, "step": 9556 }, { "epoch": 1.6, "grad_norm": 0.3715131878852844, "learning_rate": 9.424293630450502e-06, "loss": 0.0427, "step": 9557 }, { "epoch": 1.6, "grad_norm": 0.564909040927887, "learning_rate": 9.422490914176833e-06, "loss": 0.0455, "step": 9558 }, { "epoch": 1.6, "grad_norm": 0.4525262415409088, "learning_rate": 9.420688216733578e-06, "loss": 0.0518, "step": 9559 }, { "epoch": 1.6, "grad_norm": 0.4547633230686188, "learning_rate": 9.41888553817952e-06, "loss": 0.0495, "step": 9560 }, { "epoch": 1.6, "grad_norm": 0.5817776322364807, "learning_rate": 9.417082878573434e-06, "loss": 0.0616, "step": 9561 }, { "epoch": 1.6, "grad_norm": 0.3841192424297333, "learning_rate": 9.415280237974098e-06, "loss": 0.0419, "step": 9562 }, { "epoch": 1.6, "grad_norm": 0.5261709690093994, "learning_rate": 9.413477616440297e-06, "loss": 0.0588, "step": 9563 }, { "epoch": 1.6, "grad_norm": 0.5006425976753235, "learning_rate": 9.411675014030798e-06, "loss": 0.0521, "step": 9564 }, { "epoch": 1.6, "grad_norm": 0.48394691944122314, "learning_rate": 9.40987243080438e-06, "loss": 0.0657, "step": 9565 }, { "epoch": 1.6, "grad_norm": 0.5848997235298157, "learning_rate": 9.40806986681982e-06, "loss": 0.0835, "step": 9566 }, { "epoch": 1.6, "grad_norm": 0.5594567656517029, "learning_rate": 9.406267322135893e-06, "loss": 0.0501, "step": 9567 }, { "epoch": 1.6, "grad_norm": 0.5848796963691711, "learning_rate": 9.404464796811374e-06, "loss": 0.0466, "step": 9568 }, { "epoch": 1.6, "grad_norm": 0.4408417046070099, "learning_rate": 9.402662290905034e-06, "loss": 0.0617, "step": 9569 }, { "epoch": 1.6, "grad_norm": 0.4132789671421051, "learning_rate": 9.400859804475647e-06, "loss": 0.0465, "step": 9570 }, { "epoch": 1.6, "grad_norm": 0.44113126397132874, "learning_rate": 9.399057337581988e-06, "loss": 0.0492, "step": 9571 }, { "epoch": 1.6, "grad_norm": 0.47627273201942444, "learning_rate": 9.397254890282827e-06, "loss": 0.0611, "step": 9572 }, { "epoch": 1.6, "grad_norm": 0.4927760660648346, "learning_rate": 9.39545246263693e-06, "loss": 0.0354, "step": 9573 }, { "epoch": 1.6, "grad_norm": 0.4156437814235687, "learning_rate": 9.393650054703074e-06, "loss": 0.0541, "step": 9574 }, { "epoch": 1.6, "grad_norm": 0.5779348015785217, "learning_rate": 9.391847666540029e-06, "loss": 0.0766, "step": 9575 }, { "epoch": 1.6, "grad_norm": 0.317469984292984, "learning_rate": 9.390045298206556e-06, "loss": 0.0477, "step": 9576 }, { "epoch": 1.6, "grad_norm": 0.4192931056022644, "learning_rate": 9.388242949761434e-06, "loss": 0.0645, "step": 9577 }, { "epoch": 1.6, "grad_norm": 0.36008337140083313, "learning_rate": 9.386440621263427e-06, "loss": 0.051, "step": 9578 }, { "epoch": 1.6, "grad_norm": 0.38851287961006165, "learning_rate": 9.384638312771299e-06, "loss": 0.0519, "step": 9579 }, { "epoch": 1.6, "grad_norm": 0.4105260670185089, "learning_rate": 9.382836024343821e-06, "loss": 0.0455, "step": 9580 }, { "epoch": 1.6, "grad_norm": 0.4979625940322876, "learning_rate": 9.381033756039753e-06, "loss": 0.0505, "step": 9581 }, { "epoch": 1.6, "grad_norm": 0.5989803671836853, "learning_rate": 9.379231507917867e-06, "loss": 0.0649, "step": 9582 }, { "epoch": 1.6, "grad_norm": 0.4542672336101532, "learning_rate": 9.377429280036926e-06, "loss": 0.046, "step": 9583 }, { "epoch": 1.6, "grad_norm": 0.5481047630310059, "learning_rate": 9.375627072455689e-06, "loss": 0.0448, "step": 9584 }, { "epoch": 1.6, "grad_norm": 0.6203670501708984, "learning_rate": 9.373824885232924e-06, "loss": 0.0511, "step": 9585 }, { "epoch": 1.6, "grad_norm": 0.40635645389556885, "learning_rate": 9.372022718427396e-06, "loss": 0.0491, "step": 9586 }, { "epoch": 1.6, "grad_norm": 0.38261502981185913, "learning_rate": 9.370220572097857e-06, "loss": 0.0409, "step": 9587 }, { "epoch": 1.6, "grad_norm": 0.40389901399612427, "learning_rate": 9.368418446303082e-06, "loss": 0.0483, "step": 9588 }, { "epoch": 1.6, "grad_norm": 0.39149025082588196, "learning_rate": 9.36661634110182e-06, "loss": 0.0469, "step": 9589 }, { "epoch": 1.6, "grad_norm": 0.7170634269714355, "learning_rate": 9.364814256552833e-06, "loss": 0.0627, "step": 9590 }, { "epoch": 1.6, "grad_norm": 0.4708345830440521, "learning_rate": 9.363012192714887e-06, "loss": 0.0464, "step": 9591 }, { "epoch": 1.6, "grad_norm": 0.49351051449775696, "learning_rate": 9.361210149646736e-06, "loss": 0.0376, "step": 9592 }, { "epoch": 1.6, "grad_norm": 0.7528892159461975, "learning_rate": 9.359408127407136e-06, "loss": 0.0528, "step": 9593 }, { "epoch": 1.6, "grad_norm": 0.7547743320465088, "learning_rate": 9.357606126054845e-06, "loss": 0.0494, "step": 9594 }, { "epoch": 1.6, "grad_norm": 0.4634789526462555, "learning_rate": 9.35580414564862e-06, "loss": 0.0446, "step": 9595 }, { "epoch": 1.6, "grad_norm": 0.5855703949928284, "learning_rate": 9.354002186247223e-06, "loss": 0.0625, "step": 9596 }, { "epoch": 1.61, "grad_norm": 0.47316974401474, "learning_rate": 9.3522002479094e-06, "loss": 0.0507, "step": 9597 }, { "epoch": 1.61, "grad_norm": 0.3964739143848419, "learning_rate": 9.350398330693908e-06, "loss": 0.0431, "step": 9598 }, { "epoch": 1.61, "grad_norm": 0.5217533707618713, "learning_rate": 9.348596434659505e-06, "loss": 0.061, "step": 9599 }, { "epoch": 1.61, "grad_norm": 0.3868138790130615, "learning_rate": 9.346794559864941e-06, "loss": 0.041, "step": 9600 }, { "epoch": 1.61, "grad_norm": 0.6834481954574585, "learning_rate": 9.344992706368967e-06, "loss": 0.0854, "step": 9601 }, { "epoch": 1.61, "grad_norm": 0.49040138721466064, "learning_rate": 9.343190874230335e-06, "loss": 0.0493, "step": 9602 }, { "epoch": 1.61, "grad_norm": 0.53706294298172, "learning_rate": 9.341389063507801e-06, "loss": 0.0327, "step": 9603 }, { "epoch": 1.61, "grad_norm": 0.34610608220100403, "learning_rate": 9.339587274260107e-06, "loss": 0.0355, "step": 9604 }, { "epoch": 1.61, "grad_norm": 0.591620922088623, "learning_rate": 9.337785506546009e-06, "loss": 0.0602, "step": 9605 }, { "epoch": 1.61, "grad_norm": 0.3697526454925537, "learning_rate": 9.335983760424257e-06, "loss": 0.0435, "step": 9606 }, { "epoch": 1.61, "grad_norm": 0.4644109904766083, "learning_rate": 9.334182035953591e-06, "loss": 0.0559, "step": 9607 }, { "epoch": 1.61, "grad_norm": 0.4690864384174347, "learning_rate": 9.33238033319277e-06, "loss": 0.0463, "step": 9608 }, { "epoch": 1.61, "grad_norm": 0.46920645236968994, "learning_rate": 9.330578652200528e-06, "loss": 0.0531, "step": 9609 }, { "epoch": 1.61, "grad_norm": 0.5952560901641846, "learning_rate": 9.32877699303562e-06, "loss": 0.0744, "step": 9610 }, { "epoch": 1.61, "grad_norm": 0.5550530552864075, "learning_rate": 9.326975355756793e-06, "loss": 0.0623, "step": 9611 }, { "epoch": 1.61, "grad_norm": 0.42459771037101746, "learning_rate": 9.325173740422783e-06, "loss": 0.0533, "step": 9612 }, { "epoch": 1.61, "grad_norm": 0.5275505185127258, "learning_rate": 9.323372147092341e-06, "loss": 0.0613, "step": 9613 }, { "epoch": 1.61, "grad_norm": 0.5002591013908386, "learning_rate": 9.32157057582421e-06, "loss": 0.0525, "step": 9614 }, { "epoch": 1.61, "grad_norm": 0.40121859312057495, "learning_rate": 9.319769026677125e-06, "loss": 0.0469, "step": 9615 }, { "epoch": 1.61, "grad_norm": 0.29376211762428284, "learning_rate": 9.31796749970984e-06, "loss": 0.03, "step": 9616 }, { "epoch": 1.61, "grad_norm": 0.4900602698326111, "learning_rate": 9.316165994981085e-06, "loss": 0.0425, "step": 9617 }, { "epoch": 1.61, "grad_norm": 0.40320366621017456, "learning_rate": 9.314364512549604e-06, "loss": 0.0675, "step": 9618 }, { "epoch": 1.61, "grad_norm": 0.4197483956813812, "learning_rate": 9.31256305247414e-06, "loss": 0.0469, "step": 9619 }, { "epoch": 1.61, "grad_norm": 0.6452180743217468, "learning_rate": 9.310761614813431e-06, "loss": 0.0588, "step": 9620 }, { "epoch": 1.61, "grad_norm": 0.6624560952186584, "learning_rate": 9.308960199626208e-06, "loss": 0.058, "step": 9621 }, { "epoch": 1.61, "grad_norm": 0.4537433683872223, "learning_rate": 9.307158806971216e-06, "loss": 0.0459, "step": 9622 }, { "epoch": 1.61, "grad_norm": 0.5786276459693909, "learning_rate": 9.305357436907187e-06, "loss": 0.0521, "step": 9623 }, { "epoch": 1.61, "grad_norm": 0.42619404196739197, "learning_rate": 9.303556089492866e-06, "loss": 0.0468, "step": 9624 }, { "epoch": 1.61, "grad_norm": 0.4432051181793213, "learning_rate": 9.301754764786977e-06, "loss": 0.0583, "step": 9625 }, { "epoch": 1.61, "grad_norm": 0.3500039875507355, "learning_rate": 9.299953462848258e-06, "loss": 0.0432, "step": 9626 }, { "epoch": 1.61, "grad_norm": 0.7482709288597107, "learning_rate": 9.298152183735447e-06, "loss": 0.0668, "step": 9627 }, { "epoch": 1.61, "grad_norm": 0.41316211223602295, "learning_rate": 9.296350927507275e-06, "loss": 0.0575, "step": 9628 }, { "epoch": 1.61, "grad_norm": 0.4948353171348572, "learning_rate": 9.294549694222469e-06, "loss": 0.0511, "step": 9629 }, { "epoch": 1.61, "grad_norm": 0.5028508901596069, "learning_rate": 9.292748483939767e-06, "loss": 0.0491, "step": 9630 }, { "epoch": 1.61, "grad_norm": 0.5536815524101257, "learning_rate": 9.290947296717898e-06, "loss": 0.0563, "step": 9631 }, { "epoch": 1.61, "grad_norm": 0.46920669078826904, "learning_rate": 9.289146132615588e-06, "loss": 0.0569, "step": 9632 }, { "epoch": 1.61, "grad_norm": 0.44888150691986084, "learning_rate": 9.287344991691572e-06, "loss": 0.0531, "step": 9633 }, { "epoch": 1.61, "grad_norm": 0.4851363003253937, "learning_rate": 9.285543874004577e-06, "loss": 0.0545, "step": 9634 }, { "epoch": 1.61, "grad_norm": 0.4580692648887634, "learning_rate": 9.283742779613326e-06, "loss": 0.0589, "step": 9635 }, { "epoch": 1.61, "grad_norm": 0.4265034794807434, "learning_rate": 9.281941708576554e-06, "loss": 0.0482, "step": 9636 }, { "epoch": 1.61, "grad_norm": 0.43887442350387573, "learning_rate": 9.28014066095298e-06, "loss": 0.057, "step": 9637 }, { "epoch": 1.61, "grad_norm": 0.49723726511001587, "learning_rate": 9.278339636801334e-06, "loss": 0.0525, "step": 9638 }, { "epoch": 1.61, "grad_norm": 0.6275441646575928, "learning_rate": 9.27653863618034e-06, "loss": 0.0482, "step": 9639 }, { "epoch": 1.61, "grad_norm": 0.429712176322937, "learning_rate": 9.274737659148718e-06, "loss": 0.0585, "step": 9640 }, { "epoch": 1.61, "grad_norm": 0.6132498979568481, "learning_rate": 9.272936705765195e-06, "loss": 0.07, "step": 9641 }, { "epoch": 1.61, "grad_norm": 0.36877021193504333, "learning_rate": 9.271135776088494e-06, "loss": 0.0514, "step": 9642 }, { "epoch": 1.61, "grad_norm": 0.4010089337825775, "learning_rate": 9.269334870177334e-06, "loss": 0.055, "step": 9643 }, { "epoch": 1.61, "grad_norm": 0.4141693711280823, "learning_rate": 9.267533988090438e-06, "loss": 0.0611, "step": 9644 }, { "epoch": 1.61, "grad_norm": 0.4526095390319824, "learning_rate": 9.265733129886524e-06, "loss": 0.06, "step": 9645 }, { "epoch": 1.61, "grad_norm": 0.5425195097923279, "learning_rate": 9.263932295624311e-06, "loss": 0.0493, "step": 9646 }, { "epoch": 1.61, "grad_norm": 0.5048269033432007, "learning_rate": 9.26213148536252e-06, "loss": 0.057, "step": 9647 }, { "epoch": 1.61, "grad_norm": 0.5368417501449585, "learning_rate": 9.26033069915987e-06, "loss": 0.0338, "step": 9648 }, { "epoch": 1.61, "grad_norm": 0.585685670375824, "learning_rate": 9.258529937075071e-06, "loss": 0.0735, "step": 9649 }, { "epoch": 1.61, "grad_norm": 0.6265912652015686, "learning_rate": 9.256729199166845e-06, "loss": 0.063, "step": 9650 }, { "epoch": 1.61, "grad_norm": 0.476738840341568, "learning_rate": 9.254928485493903e-06, "loss": 0.0537, "step": 9651 }, { "epoch": 1.61, "grad_norm": 0.46981877088546753, "learning_rate": 9.253127796114968e-06, "loss": 0.0749, "step": 9652 }, { "epoch": 1.61, "grad_norm": 0.4336041212081909, "learning_rate": 9.251327131088745e-06, "loss": 0.055, "step": 9653 }, { "epoch": 1.61, "grad_norm": 0.43998149037361145, "learning_rate": 9.249526490473948e-06, "loss": 0.0543, "step": 9654 }, { "epoch": 1.61, "grad_norm": 0.4393426477909088, "learning_rate": 9.247725874329297e-06, "loss": 0.0472, "step": 9655 }, { "epoch": 1.61, "grad_norm": 0.5892032384872437, "learning_rate": 9.245925282713496e-06, "loss": 0.0772, "step": 9656 }, { "epoch": 1.62, "grad_norm": 0.3950594663619995, "learning_rate": 9.244124715685253e-06, "loss": 0.0421, "step": 9657 }, { "epoch": 1.62, "grad_norm": 0.47402119636535645, "learning_rate": 9.242324173303283e-06, "loss": 0.0624, "step": 9658 }, { "epoch": 1.62, "grad_norm": 0.3858669698238373, "learning_rate": 9.240523655626299e-06, "loss": 0.042, "step": 9659 }, { "epoch": 1.62, "grad_norm": 0.445939838886261, "learning_rate": 9.238723162712998e-06, "loss": 0.0563, "step": 9660 }, { "epoch": 1.62, "grad_norm": 0.48992323875427246, "learning_rate": 9.236922694622094e-06, "loss": 0.0574, "step": 9661 }, { "epoch": 1.62, "grad_norm": 0.5577414035797119, "learning_rate": 9.235122251412299e-06, "loss": 0.0596, "step": 9662 }, { "epoch": 1.62, "grad_norm": 0.3871500790119171, "learning_rate": 9.233321833142306e-06, "loss": 0.0542, "step": 9663 }, { "epoch": 1.62, "grad_norm": 0.39207297563552856, "learning_rate": 9.23152143987083e-06, "loss": 0.0679, "step": 9664 }, { "epoch": 1.62, "grad_norm": 0.45491424202919006, "learning_rate": 9.229721071656568e-06, "loss": 0.0518, "step": 9665 }, { "epoch": 1.62, "grad_norm": 0.3773749768733978, "learning_rate": 9.22792072855823e-06, "loss": 0.0686, "step": 9666 }, { "epoch": 1.62, "grad_norm": 0.409006267786026, "learning_rate": 9.226120410634518e-06, "loss": 0.0386, "step": 9667 }, { "epoch": 1.62, "grad_norm": 0.48327794671058655, "learning_rate": 9.224320117944127e-06, "loss": 0.0526, "step": 9668 }, { "epoch": 1.62, "grad_norm": 0.4885021150112152, "learning_rate": 9.222519850545763e-06, "loss": 0.0401, "step": 9669 }, { "epoch": 1.62, "grad_norm": 0.45932167768478394, "learning_rate": 9.220719608498127e-06, "loss": 0.041, "step": 9670 }, { "epoch": 1.62, "grad_norm": 0.7993336915969849, "learning_rate": 9.218919391859913e-06, "loss": 0.066, "step": 9671 }, { "epoch": 1.62, "grad_norm": 0.4417302906513214, "learning_rate": 9.217119200689826e-06, "loss": 0.0619, "step": 9672 }, { "epoch": 1.62, "grad_norm": 0.4675024449825287, "learning_rate": 9.215319035046557e-06, "loss": 0.0651, "step": 9673 }, { "epoch": 1.62, "grad_norm": 0.5434013605117798, "learning_rate": 9.213518894988805e-06, "loss": 0.0688, "step": 9674 }, { "epoch": 1.62, "grad_norm": 0.5381692051887512, "learning_rate": 9.21171878057527e-06, "loss": 0.0541, "step": 9675 }, { "epoch": 1.62, "grad_norm": 0.38510143756866455, "learning_rate": 9.209918691864645e-06, "loss": 0.0435, "step": 9676 }, { "epoch": 1.62, "grad_norm": 0.4001007676124573, "learning_rate": 9.208118628915617e-06, "loss": 0.0572, "step": 9677 }, { "epoch": 1.62, "grad_norm": 0.40727880597114563, "learning_rate": 9.206318591786886e-06, "loss": 0.0458, "step": 9678 }, { "epoch": 1.62, "grad_norm": 0.43060699105262756, "learning_rate": 9.204518580537143e-06, "loss": 0.0617, "step": 9679 }, { "epoch": 1.62, "grad_norm": 0.522485077381134, "learning_rate": 9.202718595225085e-06, "loss": 0.068, "step": 9680 }, { "epoch": 1.62, "grad_norm": 0.6052435636520386, "learning_rate": 9.200918635909395e-06, "loss": 0.0618, "step": 9681 }, { "epoch": 1.62, "grad_norm": 0.38549134135246277, "learning_rate": 9.199118702648764e-06, "loss": 0.0556, "step": 9682 }, { "epoch": 1.62, "grad_norm": 0.4841987192630768, "learning_rate": 9.197318795501887e-06, "loss": 0.0527, "step": 9683 }, { "epoch": 1.62, "grad_norm": 0.37723132967948914, "learning_rate": 9.19551891452745e-06, "loss": 0.0446, "step": 9684 }, { "epoch": 1.62, "grad_norm": 0.49199992418289185, "learning_rate": 9.193719059784133e-06, "loss": 0.0494, "step": 9685 }, { "epoch": 1.62, "grad_norm": 0.39910250902175903, "learning_rate": 9.19191923133063e-06, "loss": 0.0445, "step": 9686 }, { "epoch": 1.62, "grad_norm": 0.4803677499294281, "learning_rate": 9.190119429225627e-06, "loss": 0.0471, "step": 9687 }, { "epoch": 1.62, "grad_norm": 0.4614807367324829, "learning_rate": 9.188319653527805e-06, "loss": 0.057, "step": 9688 }, { "epoch": 1.62, "grad_norm": 0.48556211590766907, "learning_rate": 9.186519904295849e-06, "loss": 0.045, "step": 9689 }, { "epoch": 1.62, "grad_norm": 0.49814099073410034, "learning_rate": 9.184720181588446e-06, "loss": 0.0499, "step": 9690 }, { "epoch": 1.62, "grad_norm": 0.4101003110408783, "learning_rate": 9.18292048546427e-06, "loss": 0.057, "step": 9691 }, { "epoch": 1.62, "grad_norm": 0.42415547370910645, "learning_rate": 9.181120815982013e-06, "loss": 0.0444, "step": 9692 }, { "epoch": 1.62, "grad_norm": 0.5155892372131348, "learning_rate": 9.179321173200344e-06, "loss": 0.0708, "step": 9693 }, { "epoch": 1.62, "grad_norm": 0.49431830644607544, "learning_rate": 9.177521557177953e-06, "loss": 0.0485, "step": 9694 }, { "epoch": 1.62, "grad_norm": 0.6494408845901489, "learning_rate": 9.175721967973516e-06, "loss": 0.0688, "step": 9695 }, { "epoch": 1.62, "grad_norm": 0.3737463653087616, "learning_rate": 9.173922405645704e-06, "loss": 0.0531, "step": 9696 }, { "epoch": 1.62, "grad_norm": 0.5236090421676636, "learning_rate": 9.172122870253204e-06, "loss": 0.0541, "step": 9697 }, { "epoch": 1.62, "grad_norm": 0.30904531478881836, "learning_rate": 9.170323361854685e-06, "loss": 0.0414, "step": 9698 }, { "epoch": 1.62, "grad_norm": 0.5652279257774353, "learning_rate": 9.168523880508823e-06, "loss": 0.064, "step": 9699 }, { "epoch": 1.62, "grad_norm": 0.6323594450950623, "learning_rate": 9.166724426274298e-06, "loss": 0.0579, "step": 9700 }, { "epoch": 1.62, "grad_norm": 0.5568949580192566, "learning_rate": 9.164924999209779e-06, "loss": 0.0533, "step": 9701 }, { "epoch": 1.62, "grad_norm": 0.4991038739681244, "learning_rate": 9.163125599373934e-06, "loss": 0.0585, "step": 9702 }, { "epoch": 1.62, "grad_norm": 0.4358745217323303, "learning_rate": 9.161326226825446e-06, "loss": 0.0455, "step": 9703 }, { "epoch": 1.62, "grad_norm": 0.6130101680755615, "learning_rate": 9.159526881622978e-06, "loss": 0.0373, "step": 9704 }, { "epoch": 1.62, "grad_norm": 0.5567045211791992, "learning_rate": 9.157727563825199e-06, "loss": 0.0458, "step": 9705 }, { "epoch": 1.62, "grad_norm": 0.48193174600601196, "learning_rate": 9.155928273490782e-06, "loss": 0.0394, "step": 9706 }, { "epoch": 1.62, "grad_norm": 0.5758301019668579, "learning_rate": 9.154129010678392e-06, "loss": 0.0486, "step": 9707 }, { "epoch": 1.62, "grad_norm": 0.4581091105937958, "learning_rate": 9.152329775446703e-06, "loss": 0.0343, "step": 9708 }, { "epoch": 1.62, "grad_norm": 0.6804196238517761, "learning_rate": 9.150530567854373e-06, "loss": 0.0605, "step": 9709 }, { "epoch": 1.62, "grad_norm": 0.526584267616272, "learning_rate": 9.14873138796007e-06, "loss": 0.0584, "step": 9710 }, { "epoch": 1.62, "grad_norm": 0.37971216440200806, "learning_rate": 9.146932235822461e-06, "loss": 0.0459, "step": 9711 }, { "epoch": 1.62, "grad_norm": 0.3301261067390442, "learning_rate": 9.14513311150021e-06, "loss": 0.0607, "step": 9712 }, { "epoch": 1.62, "grad_norm": 0.3033420741558075, "learning_rate": 9.143334015051973e-06, "loss": 0.0328, "step": 9713 }, { "epoch": 1.62, "grad_norm": 0.452450156211853, "learning_rate": 9.141534946536418e-06, "loss": 0.0622, "step": 9714 }, { "epoch": 1.62, "grad_norm": 0.499528706073761, "learning_rate": 9.139735906012208e-06, "loss": 0.0609, "step": 9715 }, { "epoch": 1.63, "grad_norm": 0.4936205744743347, "learning_rate": 9.137936893537995e-06, "loss": 0.0739, "step": 9716 }, { "epoch": 1.63, "grad_norm": 0.6419824957847595, "learning_rate": 9.136137909172443e-06, "loss": 0.0546, "step": 9717 }, { "epoch": 1.63, "grad_norm": 0.5073099732398987, "learning_rate": 9.134338952974213e-06, "loss": 0.0563, "step": 9718 }, { "epoch": 1.63, "grad_norm": 0.3813021779060364, "learning_rate": 9.132540025001955e-06, "loss": 0.0391, "step": 9719 }, { "epoch": 1.63, "grad_norm": 0.5066360235214233, "learning_rate": 9.130741125314333e-06, "loss": 0.0608, "step": 9720 }, { "epoch": 1.63, "grad_norm": 0.37371766567230225, "learning_rate": 9.128942253969995e-06, "loss": 0.0397, "step": 9721 }, { "epoch": 1.63, "grad_norm": 0.4547412097454071, "learning_rate": 9.1271434110276e-06, "loss": 0.0632, "step": 9722 }, { "epoch": 1.63, "grad_norm": 0.4385627806186676, "learning_rate": 9.125344596545804e-06, "loss": 0.0468, "step": 9723 }, { "epoch": 1.63, "grad_norm": 0.4455172121524811, "learning_rate": 9.123545810583252e-06, "loss": 0.0534, "step": 9724 }, { "epoch": 1.63, "grad_norm": 0.35941630601882935, "learning_rate": 9.121747053198604e-06, "loss": 0.0525, "step": 9725 }, { "epoch": 1.63, "grad_norm": 0.4541183114051819, "learning_rate": 9.119948324450504e-06, "loss": 0.0371, "step": 9726 }, { "epoch": 1.63, "grad_norm": 0.5015833973884583, "learning_rate": 9.118149624397604e-06, "loss": 0.0509, "step": 9727 }, { "epoch": 1.63, "grad_norm": 0.49624738097190857, "learning_rate": 9.116350953098556e-06, "loss": 0.0453, "step": 9728 }, { "epoch": 1.63, "grad_norm": 0.39991307258605957, "learning_rate": 9.114552310612004e-06, "loss": 0.0494, "step": 9729 }, { "epoch": 1.63, "grad_norm": 0.3181477189064026, "learning_rate": 9.112753696996595e-06, "loss": 0.0364, "step": 9730 }, { "epoch": 1.63, "grad_norm": 0.5277513861656189, "learning_rate": 9.11095511231098e-06, "loss": 0.0561, "step": 9731 }, { "epoch": 1.63, "grad_norm": 0.4838131070137024, "learning_rate": 9.1091565566138e-06, "loss": 0.0679, "step": 9732 }, { "epoch": 1.63, "grad_norm": 0.5250962972640991, "learning_rate": 9.107358029963698e-06, "loss": 0.0633, "step": 9733 }, { "epoch": 1.63, "grad_norm": 0.49449872970581055, "learning_rate": 9.10555953241932e-06, "loss": 0.0581, "step": 9734 }, { "epoch": 1.63, "grad_norm": 0.4303324222564697, "learning_rate": 9.103761064039303e-06, "loss": 0.0589, "step": 9735 }, { "epoch": 1.63, "grad_norm": 0.4305972158908844, "learning_rate": 9.1019626248823e-06, "loss": 0.0486, "step": 9736 }, { "epoch": 1.63, "grad_norm": 0.3525146245956421, "learning_rate": 9.100164215006941e-06, "loss": 0.0431, "step": 9737 }, { "epoch": 1.63, "grad_norm": 0.46954283118247986, "learning_rate": 9.098365834471867e-06, "loss": 0.0553, "step": 9738 }, { "epoch": 1.63, "grad_norm": 0.3703591823577881, "learning_rate": 9.096567483335723e-06, "loss": 0.056, "step": 9739 }, { "epoch": 1.63, "grad_norm": 0.4772043526172638, "learning_rate": 9.09476916165714e-06, "loss": 0.0496, "step": 9740 }, { "epoch": 1.63, "grad_norm": 0.4478888213634491, "learning_rate": 9.092970869494752e-06, "loss": 0.0451, "step": 9741 }, { "epoch": 1.63, "grad_norm": 0.45346739888191223, "learning_rate": 9.091172606907201e-06, "loss": 0.073, "step": 9742 }, { "epoch": 1.63, "grad_norm": 0.3949640095233917, "learning_rate": 9.089374373953122e-06, "loss": 0.0469, "step": 9743 }, { "epoch": 1.63, "grad_norm": 0.3723064064979553, "learning_rate": 9.087576170691142e-06, "loss": 0.0543, "step": 9744 }, { "epoch": 1.63, "grad_norm": 0.6750535368919373, "learning_rate": 9.0857779971799e-06, "loss": 0.0678, "step": 9745 }, { "epoch": 1.63, "grad_norm": 0.541602611541748, "learning_rate": 9.083979853478027e-06, "loss": 0.061, "step": 9746 }, { "epoch": 1.63, "grad_norm": 0.4233205318450928, "learning_rate": 9.082181739644147e-06, "loss": 0.0434, "step": 9747 }, { "epoch": 1.63, "grad_norm": 0.5141949653625488, "learning_rate": 9.080383655736901e-06, "loss": 0.0662, "step": 9748 }, { "epoch": 1.63, "grad_norm": 0.4966531991958618, "learning_rate": 9.078585601814907e-06, "loss": 0.0497, "step": 9749 }, { "epoch": 1.63, "grad_norm": 0.49135828018188477, "learning_rate": 9.0767875779368e-06, "loss": 0.0588, "step": 9750 }, { "epoch": 1.63, "grad_norm": 0.32612791657447815, "learning_rate": 9.074989584161204e-06, "loss": 0.0443, "step": 9751 }, { "epoch": 1.63, "grad_norm": 0.489639014005661, "learning_rate": 9.073191620546744e-06, "loss": 0.0576, "step": 9752 }, { "epoch": 1.63, "grad_norm": 0.4586259424686432, "learning_rate": 9.07139368715205e-06, "loss": 0.0443, "step": 9753 }, { "epoch": 1.63, "grad_norm": 0.5198731422424316, "learning_rate": 9.06959578403574e-06, "loss": 0.0597, "step": 9754 }, { "epoch": 1.63, "grad_norm": 0.37762752175331116, "learning_rate": 9.067797911256437e-06, "loss": 0.0405, "step": 9755 }, { "epoch": 1.63, "grad_norm": 0.49880221486091614, "learning_rate": 9.066000068872768e-06, "loss": 0.0613, "step": 9756 }, { "epoch": 1.63, "grad_norm": 0.5034342408180237, "learning_rate": 9.06420225694335e-06, "loss": 0.0607, "step": 9757 }, { "epoch": 1.63, "grad_norm": 0.4184573292732239, "learning_rate": 9.0624044755268e-06, "loss": 0.0484, "step": 9758 }, { "epoch": 1.63, "grad_norm": 0.3909977972507477, "learning_rate": 9.060606724681748e-06, "loss": 0.0368, "step": 9759 }, { "epoch": 1.63, "grad_norm": 0.3652976453304291, "learning_rate": 9.058809004466798e-06, "loss": 0.0526, "step": 9760 }, { "epoch": 1.63, "grad_norm": 0.4812532365322113, "learning_rate": 9.057011314940578e-06, "loss": 0.044, "step": 9761 }, { "epoch": 1.63, "grad_norm": 0.41685009002685547, "learning_rate": 9.055213656161699e-06, "loss": 0.0312, "step": 9762 }, { "epoch": 1.63, "grad_norm": 0.4147661626338959, "learning_rate": 9.053416028188773e-06, "loss": 0.0438, "step": 9763 }, { "epoch": 1.63, "grad_norm": 0.39705950021743774, "learning_rate": 9.051618431080423e-06, "loss": 0.0564, "step": 9764 }, { "epoch": 1.63, "grad_norm": 0.4890036880970001, "learning_rate": 9.049820864895252e-06, "loss": 0.0589, "step": 9765 }, { "epoch": 1.63, "grad_norm": 0.37065577507019043, "learning_rate": 9.048023329691876e-06, "loss": 0.0398, "step": 9766 }, { "epoch": 1.63, "grad_norm": 0.526596188545227, "learning_rate": 9.04622582552891e-06, "loss": 0.0645, "step": 9767 }, { "epoch": 1.63, "grad_norm": 0.5372962355613708, "learning_rate": 9.04442835246496e-06, "loss": 0.0617, "step": 9768 }, { "epoch": 1.63, "grad_norm": 0.43980705738067627, "learning_rate": 9.042630910558632e-06, "loss": 0.0758, "step": 9769 }, { "epoch": 1.63, "grad_norm": 0.5705177783966064, "learning_rate": 9.040833499868539e-06, "loss": 0.0542, "step": 9770 }, { "epoch": 1.63, "grad_norm": 0.4549551010131836, "learning_rate": 9.039036120453285e-06, "loss": 0.0462, "step": 9771 }, { "epoch": 1.63, "grad_norm": 0.4259052574634552, "learning_rate": 9.037238772371477e-06, "loss": 0.0474, "step": 9772 }, { "epoch": 1.63, "grad_norm": 0.4527498483657837, "learning_rate": 9.035441455681718e-06, "loss": 0.055, "step": 9773 }, { "epoch": 1.63, "grad_norm": 0.3388196527957916, "learning_rate": 9.033644170442616e-06, "loss": 0.033, "step": 9774 }, { "epoch": 1.63, "grad_norm": 0.3633366525173187, "learning_rate": 9.031846916712767e-06, "loss": 0.0416, "step": 9775 }, { "epoch": 1.64, "grad_norm": 0.5103726387023926, "learning_rate": 9.030049694550781e-06, "loss": 0.056, "step": 9776 }, { "epoch": 1.64, "grad_norm": 0.33437618613243103, "learning_rate": 9.028252504015251e-06, "loss": 0.0293, "step": 9777 }, { "epoch": 1.64, "grad_norm": 0.39306971430778503, "learning_rate": 9.026455345164782e-06, "loss": 0.039, "step": 9778 }, { "epoch": 1.64, "grad_norm": 0.43980976939201355, "learning_rate": 9.024658218057972e-06, "loss": 0.0509, "step": 9779 }, { "epoch": 1.64, "grad_norm": 0.47631168365478516, "learning_rate": 9.022861122753412e-06, "loss": 0.0369, "step": 9780 }, { "epoch": 1.64, "grad_norm": 0.47691890597343445, "learning_rate": 9.021064059309712e-06, "loss": 0.0587, "step": 9781 }, { "epoch": 1.64, "grad_norm": 0.4568897485733032, "learning_rate": 9.019267027785454e-06, "loss": 0.0437, "step": 9782 }, { "epoch": 1.64, "grad_norm": 0.5752278566360474, "learning_rate": 9.017470028239236e-06, "loss": 0.0682, "step": 9783 }, { "epoch": 1.64, "grad_norm": 0.6711852550506592, "learning_rate": 9.015673060729659e-06, "loss": 0.0626, "step": 9784 }, { "epoch": 1.64, "grad_norm": 0.4870499074459076, "learning_rate": 9.013876125315305e-06, "loss": 0.0407, "step": 9785 }, { "epoch": 1.64, "grad_norm": 0.431100994348526, "learning_rate": 9.012079222054772e-06, "loss": 0.0483, "step": 9786 }, { "epoch": 1.64, "grad_norm": 0.5078464150428772, "learning_rate": 9.01028235100665e-06, "loss": 0.064, "step": 9787 }, { "epoch": 1.64, "grad_norm": 0.45783475041389465, "learning_rate": 9.008485512229523e-06, "loss": 0.0678, "step": 9788 }, { "epoch": 1.64, "grad_norm": 0.49189692735671997, "learning_rate": 9.006688705781987e-06, "loss": 0.0497, "step": 9789 }, { "epoch": 1.64, "grad_norm": 0.5040202736854553, "learning_rate": 9.004891931722623e-06, "loss": 0.0334, "step": 9790 }, { "epoch": 1.64, "grad_norm": 0.4253709316253662, "learning_rate": 9.003095190110018e-06, "loss": 0.0485, "step": 9791 }, { "epoch": 1.64, "grad_norm": 0.4099646210670471, "learning_rate": 9.001298481002761e-06, "loss": 0.0463, "step": 9792 }, { "epoch": 1.64, "grad_norm": 0.5090892910957336, "learning_rate": 8.999501804459433e-06, "loss": 0.0518, "step": 9793 }, { "epoch": 1.64, "grad_norm": 0.3765868842601776, "learning_rate": 8.997705160538614e-06, "loss": 0.0395, "step": 9794 }, { "epoch": 1.64, "grad_norm": 0.6116255521774292, "learning_rate": 8.995908549298893e-06, "loss": 0.083, "step": 9795 }, { "epoch": 1.64, "grad_norm": 0.35095295310020447, "learning_rate": 8.994111970798846e-06, "loss": 0.0329, "step": 9796 }, { "epoch": 1.64, "grad_norm": 0.4465120732784271, "learning_rate": 8.99231542509705e-06, "loss": 0.0384, "step": 9797 }, { "epoch": 1.64, "grad_norm": 0.39684441685676575, "learning_rate": 8.99051891225209e-06, "loss": 0.0598, "step": 9798 }, { "epoch": 1.64, "grad_norm": 0.39684078097343445, "learning_rate": 8.988722432322543e-06, "loss": 0.0541, "step": 9799 }, { "epoch": 1.64, "grad_norm": 0.5215551853179932, "learning_rate": 8.986925985366977e-06, "loss": 0.0722, "step": 9800 }, { "epoch": 1.64, "grad_norm": 0.47298532724380493, "learning_rate": 8.985129571443975e-06, "loss": 0.06, "step": 9801 }, { "epoch": 1.64, "grad_norm": 0.5650551915168762, "learning_rate": 8.983333190612113e-06, "loss": 0.0727, "step": 9802 }, { "epoch": 1.64, "grad_norm": 0.3918949365615845, "learning_rate": 8.981536842929959e-06, "loss": 0.0481, "step": 9803 }, { "epoch": 1.64, "grad_norm": 0.40459296107292175, "learning_rate": 8.979740528456091e-06, "loss": 0.0481, "step": 9804 }, { "epoch": 1.64, "grad_norm": 0.342378169298172, "learning_rate": 8.977944247249071e-06, "loss": 0.0393, "step": 9805 }, { "epoch": 1.64, "grad_norm": 0.3829145133495331, "learning_rate": 8.976147999367478e-06, "loss": 0.0623, "step": 9806 }, { "epoch": 1.64, "grad_norm": 0.3878953456878662, "learning_rate": 8.97435178486988e-06, "loss": 0.0503, "step": 9807 }, { "epoch": 1.64, "grad_norm": 0.9457377791404724, "learning_rate": 8.972555603814836e-06, "loss": 0.0663, "step": 9808 }, { "epoch": 1.64, "grad_norm": 0.42333555221557617, "learning_rate": 8.970759456260926e-06, "loss": 0.0518, "step": 9809 }, { "epoch": 1.64, "grad_norm": 0.5924695134162903, "learning_rate": 8.968963342266705e-06, "loss": 0.0566, "step": 9810 }, { "epoch": 1.64, "grad_norm": 0.42490023374557495, "learning_rate": 8.96716726189074e-06, "loss": 0.0675, "step": 9811 }, { "epoch": 1.64, "grad_norm": 0.3893589377403259, "learning_rate": 8.9653712151916e-06, "loss": 0.0401, "step": 9812 }, { "epoch": 1.64, "grad_norm": 0.4199916422367096, "learning_rate": 8.96357520222784e-06, "loss": 0.0501, "step": 9813 }, { "epoch": 1.64, "grad_norm": 0.453809916973114, "learning_rate": 8.961779223058024e-06, "loss": 0.066, "step": 9814 }, { "epoch": 1.64, "grad_norm": 0.9114823341369629, "learning_rate": 8.959983277740715e-06, "loss": 0.0864, "step": 9815 }, { "epoch": 1.64, "grad_norm": 0.46952563524246216, "learning_rate": 8.958187366334468e-06, "loss": 0.0552, "step": 9816 }, { "epoch": 1.64, "grad_norm": 0.4752243459224701, "learning_rate": 8.956391488897846e-06, "loss": 0.0588, "step": 9817 }, { "epoch": 1.64, "grad_norm": 0.407904714345932, "learning_rate": 8.9545956454894e-06, "loss": 0.0459, "step": 9818 }, { "epoch": 1.64, "grad_norm": 0.5565629601478577, "learning_rate": 8.952799836167685e-06, "loss": 0.0663, "step": 9819 }, { "epoch": 1.64, "grad_norm": 0.3332575559616089, "learning_rate": 8.951004060991265e-06, "loss": 0.046, "step": 9820 }, { "epoch": 1.64, "grad_norm": 0.49072906374931335, "learning_rate": 8.949208320018686e-06, "loss": 0.0516, "step": 9821 }, { "epoch": 1.64, "grad_norm": 0.47137024998664856, "learning_rate": 8.947412613308498e-06, "loss": 0.0558, "step": 9822 }, { "epoch": 1.64, "grad_norm": 0.4158594310283661, "learning_rate": 8.94561694091926e-06, "loss": 0.0614, "step": 9823 }, { "epoch": 1.64, "grad_norm": 0.5805002450942993, "learning_rate": 8.94382130290952e-06, "loss": 0.0765, "step": 9824 }, { "epoch": 1.64, "grad_norm": 0.48866915702819824, "learning_rate": 8.942025699337822e-06, "loss": 0.0548, "step": 9825 }, { "epoch": 1.64, "grad_norm": 0.3645532727241516, "learning_rate": 8.940230130262718e-06, "loss": 0.0342, "step": 9826 }, { "epoch": 1.64, "grad_norm": 0.34837737679481506, "learning_rate": 8.938434595742756e-06, "loss": 0.0435, "step": 9827 }, { "epoch": 1.64, "grad_norm": 0.5647616982460022, "learning_rate": 8.936639095836477e-06, "loss": 0.0525, "step": 9828 }, { "epoch": 1.64, "grad_norm": 0.5507311224937439, "learning_rate": 8.934843630602432e-06, "loss": 0.0564, "step": 9829 }, { "epoch": 1.64, "grad_norm": 0.4973065257072449, "learning_rate": 8.933048200099156e-06, "loss": 0.0553, "step": 9830 }, { "epoch": 1.64, "grad_norm": 0.4945051670074463, "learning_rate": 8.931252804385198e-06, "loss": 0.0419, "step": 9831 }, { "epoch": 1.64, "grad_norm": 0.45376408100128174, "learning_rate": 8.929457443519101e-06, "loss": 0.0682, "step": 9832 }, { "epoch": 1.64, "grad_norm": 0.48813673853874207, "learning_rate": 8.927662117559396e-06, "loss": 0.0536, "step": 9833 }, { "epoch": 1.64, "grad_norm": 0.4024374485015869, "learning_rate": 8.92586682656463e-06, "loss": 0.0525, "step": 9834 }, { "epoch": 1.64, "grad_norm": 0.5208032727241516, "learning_rate": 8.924071570593339e-06, "loss": 0.044, "step": 9835 }, { "epoch": 1.65, "grad_norm": 0.4563608169555664, "learning_rate": 8.922276349704054e-06, "loss": 0.0581, "step": 9836 }, { "epoch": 1.65, "grad_norm": 0.5316576957702637, "learning_rate": 8.920481163955321e-06, "loss": 0.0602, "step": 9837 }, { "epoch": 1.65, "grad_norm": 0.3080257773399353, "learning_rate": 8.918686013405665e-06, "loss": 0.0375, "step": 9838 }, { "epoch": 1.65, "grad_norm": 0.3577178120613098, "learning_rate": 8.91689089811362e-06, "loss": 0.0353, "step": 9839 }, { "epoch": 1.65, "grad_norm": 0.4054737091064453, "learning_rate": 8.915095818137726e-06, "loss": 0.0583, "step": 9840 }, { "epoch": 1.65, "grad_norm": 0.35426509380340576, "learning_rate": 8.913300773536505e-06, "loss": 0.069, "step": 9841 }, { "epoch": 1.65, "grad_norm": 0.4871712327003479, "learning_rate": 8.911505764368488e-06, "loss": 0.0604, "step": 9842 }, { "epoch": 1.65, "grad_norm": 0.49511319398880005, "learning_rate": 8.90971079069221e-06, "loss": 0.0626, "step": 9843 }, { "epoch": 1.65, "grad_norm": 0.5993711948394775, "learning_rate": 8.90791585256619e-06, "loss": 0.0546, "step": 9844 }, { "epoch": 1.65, "grad_norm": 0.4498025178909302, "learning_rate": 8.906120950048961e-06, "loss": 0.0415, "step": 9845 }, { "epoch": 1.65, "grad_norm": 0.5425281524658203, "learning_rate": 8.904326083199044e-06, "loss": 0.0582, "step": 9846 }, { "epoch": 1.65, "grad_norm": 0.5068477988243103, "learning_rate": 8.902531252074963e-06, "loss": 0.057, "step": 9847 }, { "epoch": 1.65, "grad_norm": 0.5025370121002197, "learning_rate": 8.900736456735245e-06, "loss": 0.0656, "step": 9848 }, { "epoch": 1.65, "grad_norm": 0.35632577538490295, "learning_rate": 8.898941697238407e-06, "loss": 0.0421, "step": 9849 }, { "epoch": 1.65, "grad_norm": 0.4713174104690552, "learning_rate": 8.897146973642969e-06, "loss": 0.0633, "step": 9850 }, { "epoch": 1.65, "grad_norm": 0.4241233468055725, "learning_rate": 8.895352286007454e-06, "loss": 0.0456, "step": 9851 }, { "epoch": 1.65, "grad_norm": 0.28871506452560425, "learning_rate": 8.893557634390382e-06, "loss": 0.0429, "step": 9852 }, { "epoch": 1.65, "grad_norm": 0.4303011894226074, "learning_rate": 8.89176301885026e-06, "loss": 0.0371, "step": 9853 }, { "epoch": 1.65, "grad_norm": 0.5596973299980164, "learning_rate": 8.889968439445611e-06, "loss": 0.0587, "step": 9854 }, { "epoch": 1.65, "grad_norm": 0.46084657311439514, "learning_rate": 8.88817389623495e-06, "loss": 0.0547, "step": 9855 }, { "epoch": 1.65, "grad_norm": 0.3513372242450714, "learning_rate": 8.886379389276787e-06, "loss": 0.0678, "step": 9856 }, { "epoch": 1.65, "grad_norm": 0.5139215588569641, "learning_rate": 8.884584918629638e-06, "loss": 0.066, "step": 9857 }, { "epoch": 1.65, "grad_norm": 0.4543490409851074, "learning_rate": 8.882790484352008e-06, "loss": 0.0574, "step": 9858 }, { "epoch": 1.65, "grad_norm": 0.32744744420051575, "learning_rate": 8.880996086502414e-06, "loss": 0.0463, "step": 9859 }, { "epoch": 1.65, "grad_norm": 0.41967830061912537, "learning_rate": 8.879201725139362e-06, "loss": 0.0475, "step": 9860 }, { "epoch": 1.65, "grad_norm": 0.2784578204154968, "learning_rate": 8.877407400321355e-06, "loss": 0.045, "step": 9861 }, { "epoch": 1.65, "grad_norm": 0.3437615633010864, "learning_rate": 8.875613112106905e-06, "loss": 0.0497, "step": 9862 }, { "epoch": 1.65, "grad_norm": 0.4962197244167328, "learning_rate": 8.873818860554518e-06, "loss": 0.061, "step": 9863 }, { "epoch": 1.65, "grad_norm": 0.3563396632671356, "learning_rate": 8.87202464572269e-06, "loss": 0.0386, "step": 9864 }, { "epoch": 1.65, "grad_norm": 0.3689951002597809, "learning_rate": 8.870230467669932e-06, "loss": 0.0513, "step": 9865 }, { "epoch": 1.65, "grad_norm": 0.4883955717086792, "learning_rate": 8.868436326454741e-06, "loss": 0.0467, "step": 9866 }, { "epoch": 1.65, "grad_norm": 0.4049631655216217, "learning_rate": 8.866642222135616e-06, "loss": 0.0513, "step": 9867 }, { "epoch": 1.65, "grad_norm": 0.322483092546463, "learning_rate": 8.864848154771061e-06, "loss": 0.051, "step": 9868 }, { "epoch": 1.65, "grad_norm": 0.39067763090133667, "learning_rate": 8.86305412441957e-06, "loss": 0.0437, "step": 9869 }, { "epoch": 1.65, "grad_norm": 0.42544129490852356, "learning_rate": 8.861260131139638e-06, "loss": 0.0475, "step": 9870 }, { "epoch": 1.65, "grad_norm": 0.3196133077144623, "learning_rate": 8.859466174989769e-06, "loss": 0.0358, "step": 9871 }, { "epoch": 1.65, "grad_norm": 0.3713817000389099, "learning_rate": 8.857672256028444e-06, "loss": 0.0458, "step": 9872 }, { "epoch": 1.65, "grad_norm": 0.39813992381095886, "learning_rate": 8.85587837431417e-06, "loss": 0.0483, "step": 9873 }, { "epoch": 1.65, "grad_norm": 0.47053253650665283, "learning_rate": 8.85408452990543e-06, "loss": 0.0492, "step": 9874 }, { "epoch": 1.65, "grad_norm": 0.3668985962867737, "learning_rate": 8.852290722860713e-06, "loss": 0.041, "step": 9875 }, { "epoch": 1.65, "grad_norm": 0.4938121736049652, "learning_rate": 8.850496953238517e-06, "loss": 0.0391, "step": 9876 }, { "epoch": 1.65, "grad_norm": 0.5808262825012207, "learning_rate": 8.848703221097324e-06, "loss": 0.0551, "step": 9877 }, { "epoch": 1.65, "grad_norm": 0.49420803785324097, "learning_rate": 8.84690952649562e-06, "loss": 0.0618, "step": 9878 }, { "epoch": 1.65, "grad_norm": 0.4442344903945923, "learning_rate": 8.845115869491896e-06, "loss": 0.0678, "step": 9879 }, { "epoch": 1.65, "grad_norm": 0.48765888810157776, "learning_rate": 8.843322250144634e-06, "loss": 0.0594, "step": 9880 }, { "epoch": 1.65, "grad_norm": 0.5084838271141052, "learning_rate": 8.841528668512312e-06, "loss": 0.0641, "step": 9881 }, { "epoch": 1.65, "grad_norm": 0.5184617638587952, "learning_rate": 8.839735124653417e-06, "loss": 0.0543, "step": 9882 }, { "epoch": 1.65, "grad_norm": 0.47111380100250244, "learning_rate": 8.837941618626433e-06, "loss": 0.0495, "step": 9883 }, { "epoch": 1.65, "grad_norm": 0.35886910557746887, "learning_rate": 8.836148150489831e-06, "loss": 0.0552, "step": 9884 }, { "epoch": 1.65, "grad_norm": 0.4424891769886017, "learning_rate": 8.834354720302099e-06, "loss": 0.0656, "step": 9885 }, { "epoch": 1.65, "grad_norm": 0.47342345118522644, "learning_rate": 8.832561328121704e-06, "loss": 0.0519, "step": 9886 }, { "epoch": 1.65, "grad_norm": 0.4414401650428772, "learning_rate": 8.83076797400713e-06, "loss": 0.0592, "step": 9887 }, { "epoch": 1.65, "grad_norm": 0.345663458108902, "learning_rate": 8.82897465801685e-06, "loss": 0.0461, "step": 9888 }, { "epoch": 1.65, "grad_norm": 0.5231429934501648, "learning_rate": 8.827181380209333e-06, "loss": 0.0686, "step": 9889 }, { "epoch": 1.65, "grad_norm": 0.5882371068000793, "learning_rate": 8.825388140643056e-06, "loss": 0.0668, "step": 9890 }, { "epoch": 1.65, "grad_norm": 0.3713085949420929, "learning_rate": 8.82359493937649e-06, "loss": 0.0449, "step": 9891 }, { "epoch": 1.65, "grad_norm": 0.498221755027771, "learning_rate": 8.821801776468095e-06, "loss": 0.0435, "step": 9892 }, { "epoch": 1.65, "grad_norm": 0.364717036485672, "learning_rate": 8.820008651976354e-06, "loss": 0.0392, "step": 9893 }, { "epoch": 1.65, "grad_norm": 1.8218716382980347, "learning_rate": 8.818215565959723e-06, "loss": 0.0414, "step": 9894 }, { "epoch": 1.65, "grad_norm": 0.4881344139575958, "learning_rate": 8.816422518476671e-06, "loss": 0.041, "step": 9895 }, { "epoch": 1.66, "grad_norm": 0.5535500049591064, "learning_rate": 8.814629509585668e-06, "loss": 0.0393, "step": 9896 }, { "epoch": 1.66, "grad_norm": 0.3731977343559265, "learning_rate": 8.81283653934517e-06, "loss": 0.0593, "step": 9897 }, { "epoch": 1.66, "grad_norm": 0.3796752691268921, "learning_rate": 8.81104360781364e-06, "loss": 0.0529, "step": 9898 }, { "epoch": 1.66, "grad_norm": 0.4745361804962158, "learning_rate": 8.809250715049543e-06, "loss": 0.0675, "step": 9899 }, { "epoch": 1.66, "grad_norm": 0.5747207999229431, "learning_rate": 8.807457861111333e-06, "loss": 0.0544, "step": 9900 }, { "epoch": 1.66, "grad_norm": 0.4535728394985199, "learning_rate": 8.805665046057476e-06, "loss": 0.047, "step": 9901 }, { "epoch": 1.66, "grad_norm": 0.32304906845092773, "learning_rate": 8.803872269946421e-06, "loss": 0.0389, "step": 9902 }, { "epoch": 1.66, "grad_norm": 0.357178658246994, "learning_rate": 8.802079532836627e-06, "loss": 0.0436, "step": 9903 }, { "epoch": 1.66, "grad_norm": 0.4031969904899597, "learning_rate": 8.800286834786551e-06, "loss": 0.0524, "step": 9904 }, { "epoch": 1.66, "grad_norm": 0.46954235434532166, "learning_rate": 8.798494175854642e-06, "loss": 0.0675, "step": 9905 }, { "epoch": 1.66, "grad_norm": 0.3924162685871124, "learning_rate": 8.796701556099355e-06, "loss": 0.0643, "step": 9906 }, { "epoch": 1.66, "grad_norm": 0.5692342519760132, "learning_rate": 8.794908975579138e-06, "loss": 0.0362, "step": 9907 }, { "epoch": 1.66, "grad_norm": 0.4964548945426941, "learning_rate": 8.793116434352445e-06, "loss": 0.0525, "step": 9908 }, { "epoch": 1.66, "grad_norm": 0.5575105547904968, "learning_rate": 8.791323932477714e-06, "loss": 0.0467, "step": 9909 }, { "epoch": 1.66, "grad_norm": 0.5142772793769836, "learning_rate": 8.789531470013403e-06, "loss": 0.0626, "step": 9910 }, { "epoch": 1.66, "grad_norm": 0.4905216097831726, "learning_rate": 8.787739047017954e-06, "loss": 0.0706, "step": 9911 }, { "epoch": 1.66, "grad_norm": 0.5914981961250305, "learning_rate": 8.785946663549808e-06, "loss": 0.0559, "step": 9912 }, { "epoch": 1.66, "grad_norm": 0.5125390887260437, "learning_rate": 8.784154319667413e-06, "loss": 0.0516, "step": 9913 }, { "epoch": 1.66, "grad_norm": 0.42066240310668945, "learning_rate": 8.782362015429202e-06, "loss": 0.0393, "step": 9914 }, { "epoch": 1.66, "grad_norm": 0.5232331156730652, "learning_rate": 8.780569750893626e-06, "loss": 0.0901, "step": 9915 }, { "epoch": 1.66, "grad_norm": 0.4759841561317444, "learning_rate": 8.77877752611912e-06, "loss": 0.0586, "step": 9916 }, { "epoch": 1.66, "grad_norm": 0.3958355784416199, "learning_rate": 8.776985341164117e-06, "loss": 0.0337, "step": 9917 }, { "epoch": 1.66, "grad_norm": 0.5701013803482056, "learning_rate": 8.77519319608706e-06, "loss": 0.0572, "step": 9918 }, { "epoch": 1.66, "grad_norm": 0.5650581121444702, "learning_rate": 8.773401090946382e-06, "loss": 0.0479, "step": 9919 }, { "epoch": 1.66, "grad_norm": 0.5947343707084656, "learning_rate": 8.771609025800514e-06, "loss": 0.0607, "step": 9920 }, { "epoch": 1.66, "grad_norm": 0.625277578830719, "learning_rate": 8.769817000707896e-06, "loss": 0.0642, "step": 9921 }, { "epoch": 1.66, "grad_norm": 0.3916262686252594, "learning_rate": 8.76802501572695e-06, "loss": 0.0344, "step": 9922 }, { "epoch": 1.66, "grad_norm": 0.37623557448387146, "learning_rate": 8.76623307091611e-06, "loss": 0.0512, "step": 9923 }, { "epoch": 1.66, "grad_norm": 0.4305518865585327, "learning_rate": 8.764441166333809e-06, "loss": 0.0537, "step": 9924 }, { "epoch": 1.66, "grad_norm": 0.4755187928676605, "learning_rate": 8.762649302038468e-06, "loss": 0.0651, "step": 9925 }, { "epoch": 1.66, "grad_norm": 0.2765856385231018, "learning_rate": 8.760857478088511e-06, "loss": 0.0351, "step": 9926 }, { "epoch": 1.66, "grad_norm": 0.5759363174438477, "learning_rate": 8.759065694542373e-06, "loss": 0.0778, "step": 9927 }, { "epoch": 1.66, "grad_norm": 0.34360069036483765, "learning_rate": 8.757273951458467e-06, "loss": 0.0441, "step": 9928 }, { "epoch": 1.66, "grad_norm": 0.5824199318885803, "learning_rate": 8.755482248895224e-06, "loss": 0.0757, "step": 9929 }, { "epoch": 1.66, "grad_norm": 0.5174678564071655, "learning_rate": 8.753690586911059e-06, "loss": 0.0427, "step": 9930 }, { "epoch": 1.66, "grad_norm": 0.4173775315284729, "learning_rate": 8.751898965564388e-06, "loss": 0.0468, "step": 9931 }, { "epoch": 1.66, "grad_norm": 0.33420926332473755, "learning_rate": 8.750107384913637e-06, "loss": 0.0493, "step": 9932 }, { "epoch": 1.66, "grad_norm": 0.6152409911155701, "learning_rate": 8.748315845017219e-06, "loss": 0.057, "step": 9933 }, { "epoch": 1.66, "grad_norm": 0.47809261083602905, "learning_rate": 8.74652434593355e-06, "loss": 0.061, "step": 9934 }, { "epoch": 1.66, "grad_norm": 0.3351997137069702, "learning_rate": 8.744732887721044e-06, "loss": 0.0441, "step": 9935 }, { "epoch": 1.66, "grad_norm": 0.6263273358345032, "learning_rate": 8.742941470438116e-06, "loss": 0.0459, "step": 9936 }, { "epoch": 1.66, "grad_norm": 0.40101319551467896, "learning_rate": 8.741150094143169e-06, "loss": 0.0593, "step": 9937 }, { "epoch": 1.66, "grad_norm": 0.4673190116882324, "learning_rate": 8.739358758894622e-06, "loss": 0.0464, "step": 9938 }, { "epoch": 1.66, "grad_norm": 0.4188482463359833, "learning_rate": 8.737567464750883e-06, "loss": 0.052, "step": 9939 }, { "epoch": 1.66, "grad_norm": 0.5794010758399963, "learning_rate": 8.735776211770354e-06, "loss": 0.0386, "step": 9940 }, { "epoch": 1.66, "grad_norm": 0.39244481921195984, "learning_rate": 8.733985000011447e-06, "loss": 0.0579, "step": 9941 }, { "epoch": 1.66, "grad_norm": 0.35142982006073, "learning_rate": 8.732193829532562e-06, "loss": 0.0472, "step": 9942 }, { "epoch": 1.66, "grad_norm": 0.4092019498348236, "learning_rate": 8.730402700392105e-06, "loss": 0.0388, "step": 9943 }, { "epoch": 1.66, "grad_norm": 0.5511279106140137, "learning_rate": 8.728611612648482e-06, "loss": 0.044, "step": 9944 }, { "epoch": 1.66, "grad_norm": 0.5833755135536194, "learning_rate": 8.726820566360082e-06, "loss": 0.0542, "step": 9945 }, { "epoch": 1.66, "grad_norm": 0.5371293425559998, "learning_rate": 8.725029561585315e-06, "loss": 0.0457, "step": 9946 }, { "epoch": 1.66, "grad_norm": 0.6041122674942017, "learning_rate": 8.723238598382577e-06, "loss": 0.0452, "step": 9947 }, { "epoch": 1.66, "grad_norm": 0.4380130171775818, "learning_rate": 8.72144767681026e-06, "loss": 0.0567, "step": 9948 }, { "epoch": 1.66, "grad_norm": 0.4433382451534271, "learning_rate": 8.719656796926766e-06, "loss": 0.0452, "step": 9949 }, { "epoch": 1.66, "grad_norm": 0.46672070026397705, "learning_rate": 8.717865958790484e-06, "loss": 0.0615, "step": 9950 }, { "epoch": 1.66, "grad_norm": 0.5519267320632935, "learning_rate": 8.716075162459806e-06, "loss": 0.0486, "step": 9951 }, { "epoch": 1.66, "grad_norm": 0.3576904237270355, "learning_rate": 8.714284407993128e-06, "loss": 0.0496, "step": 9952 }, { "epoch": 1.66, "grad_norm": 0.49364739656448364, "learning_rate": 8.712493695448837e-06, "loss": 0.0765, "step": 9953 }, { "epoch": 1.66, "grad_norm": 0.4311043918132782, "learning_rate": 8.710703024885316e-06, "loss": 0.0591, "step": 9954 }, { "epoch": 1.66, "grad_norm": 0.4442286193370819, "learning_rate": 8.708912396360963e-06, "loss": 0.0575, "step": 9955 }, { "epoch": 1.67, "grad_norm": 0.40627506375312805, "learning_rate": 8.707121809934155e-06, "loss": 0.062, "step": 9956 }, { "epoch": 1.67, "grad_norm": 0.31780150532722473, "learning_rate": 8.705331265663282e-06, "loss": 0.032, "step": 9957 }, { "epoch": 1.67, "grad_norm": 0.5029049515724182, "learning_rate": 8.703540763606724e-06, "loss": 0.069, "step": 9958 }, { "epoch": 1.67, "grad_norm": 0.4719029664993286, "learning_rate": 8.70175030382286e-06, "loss": 0.055, "step": 9959 }, { "epoch": 1.67, "grad_norm": 0.8999128937721252, "learning_rate": 8.699959886370078e-06, "loss": 0.0631, "step": 9960 }, { "epoch": 1.67, "grad_norm": 0.3377569317817688, "learning_rate": 8.698169511306752e-06, "loss": 0.0429, "step": 9961 }, { "epoch": 1.67, "grad_norm": 0.46175292134284973, "learning_rate": 8.696379178691256e-06, "loss": 0.0464, "step": 9962 }, { "epoch": 1.67, "grad_norm": 0.387256920337677, "learning_rate": 8.69458888858197e-06, "loss": 0.0306, "step": 9963 }, { "epoch": 1.67, "grad_norm": 0.5155298709869385, "learning_rate": 8.692798641037272e-06, "loss": 0.0564, "step": 9964 }, { "epoch": 1.67, "grad_norm": 0.3249005377292633, "learning_rate": 8.691008436115527e-06, "loss": 0.0339, "step": 9965 }, { "epoch": 1.67, "grad_norm": 0.5237707495689392, "learning_rate": 8.689218273875112e-06, "loss": 0.0765, "step": 9966 }, { "epoch": 1.67, "grad_norm": 0.4617743492126465, "learning_rate": 8.687428154374402e-06, "loss": 0.0493, "step": 9967 }, { "epoch": 1.67, "grad_norm": 0.40463948249816895, "learning_rate": 8.685638077671753e-06, "loss": 0.049, "step": 9968 }, { "epoch": 1.67, "grad_norm": 0.6576496362686157, "learning_rate": 8.683848043825548e-06, "loss": 0.0618, "step": 9969 }, { "epoch": 1.67, "grad_norm": 0.4679125249385834, "learning_rate": 8.682058052894143e-06, "loss": 0.0522, "step": 9970 }, { "epoch": 1.67, "grad_norm": 0.8613434433937073, "learning_rate": 8.680268104935905e-06, "loss": 0.0467, "step": 9971 }, { "epoch": 1.67, "grad_norm": 0.4724535048007965, "learning_rate": 8.678478200009204e-06, "loss": 0.0645, "step": 9972 }, { "epoch": 1.67, "grad_norm": 0.5321024060249329, "learning_rate": 8.67668833817239e-06, "loss": 0.0507, "step": 9973 }, { "epoch": 1.67, "grad_norm": 0.3585551679134369, "learning_rate": 8.674898519483833e-06, "loss": 0.0488, "step": 9974 }, { "epoch": 1.67, "grad_norm": 0.6038668751716614, "learning_rate": 8.673108744001893e-06, "loss": 0.0519, "step": 9975 }, { "epoch": 1.67, "grad_norm": 0.4696156084537506, "learning_rate": 8.671319011784918e-06, "loss": 0.0576, "step": 9976 }, { "epoch": 1.67, "grad_norm": 0.3916790783405304, "learning_rate": 8.669529322891277e-06, "loss": 0.0525, "step": 9977 }, { "epoch": 1.67, "grad_norm": 0.48304182291030884, "learning_rate": 8.667739677379317e-06, "loss": 0.0492, "step": 9978 }, { "epoch": 1.67, "grad_norm": 0.39025449752807617, "learning_rate": 8.665950075307392e-06, "loss": 0.031, "step": 9979 }, { "epoch": 1.67, "grad_norm": 0.2835172414779663, "learning_rate": 8.664160516733859e-06, "loss": 0.039, "step": 9980 }, { "epoch": 1.67, "grad_norm": 0.3837701380252838, "learning_rate": 8.662371001717065e-06, "loss": 0.0493, "step": 9981 }, { "epoch": 1.67, "grad_norm": 0.36005616188049316, "learning_rate": 8.660581530315359e-06, "loss": 0.048, "step": 9982 }, { "epoch": 1.67, "grad_norm": 1.0872026681900024, "learning_rate": 8.658792102587093e-06, "loss": 0.0583, "step": 9983 }, { "epoch": 1.67, "grad_norm": 0.3862382769584656, "learning_rate": 8.657002718590608e-06, "loss": 0.0497, "step": 9984 }, { "epoch": 1.67, "grad_norm": 0.38570424914360046, "learning_rate": 8.655213378384255e-06, "loss": 0.0464, "step": 9985 }, { "epoch": 1.67, "grad_norm": 0.5101261138916016, "learning_rate": 8.653424082026374e-06, "loss": 0.0647, "step": 9986 }, { "epoch": 1.67, "grad_norm": 0.31143131852149963, "learning_rate": 8.651634829575303e-06, "loss": 0.0551, "step": 9987 }, { "epoch": 1.67, "grad_norm": 0.4694998860359192, "learning_rate": 8.649845621089395e-06, "loss": 0.0543, "step": 9988 }, { "epoch": 1.67, "grad_norm": 0.5968002080917358, "learning_rate": 8.648056456626982e-06, "loss": 0.0536, "step": 9989 }, { "epoch": 1.67, "grad_norm": 0.44610193371772766, "learning_rate": 8.646267336246397e-06, "loss": 0.061, "step": 9990 }, { "epoch": 1.67, "grad_norm": 0.3640710115432739, "learning_rate": 8.644478260005984e-06, "loss": 0.0518, "step": 9991 }, { "epoch": 1.67, "grad_norm": 0.36198684573173523, "learning_rate": 8.642689227964079e-06, "loss": 0.0467, "step": 9992 }, { "epoch": 1.67, "grad_norm": 0.436349093914032, "learning_rate": 8.640900240179007e-06, "loss": 0.0534, "step": 9993 }, { "epoch": 1.67, "grad_norm": 0.36180463433265686, "learning_rate": 8.63911129670911e-06, "loss": 0.0394, "step": 9994 }, { "epoch": 1.67, "grad_norm": 0.45337799191474915, "learning_rate": 8.637322397612713e-06, "loss": 0.0449, "step": 9995 }, { "epoch": 1.67, "grad_norm": 0.4353838264942169, "learning_rate": 8.635533542948144e-06, "loss": 0.0531, "step": 9996 }, { "epoch": 1.67, "grad_norm": 0.3232569694519043, "learning_rate": 8.63374473277374e-06, "loss": 0.0469, "step": 9997 }, { "epoch": 1.67, "grad_norm": 0.6930415034294128, "learning_rate": 8.631955967147815e-06, "loss": 0.0613, "step": 9998 }, { "epoch": 1.67, "grad_norm": 0.3990422189235687, "learning_rate": 8.630167246128702e-06, "loss": 0.0528, "step": 9999 }, { "epoch": 1.67, "grad_norm": 0.37654009461402893, "learning_rate": 8.628378569774725e-06, "loss": 0.0628, "step": 10000 }, { "epoch": 1.67, "grad_norm": 0.3830237090587616, "learning_rate": 8.626589938144202e-06, "loss": 0.0457, "step": 10001 }, { "epoch": 1.67, "grad_norm": 0.5614100098609924, "learning_rate": 8.624801351295453e-06, "loss": 0.064, "step": 10002 }, { "epoch": 1.67, "grad_norm": 0.5392687320709229, "learning_rate": 8.623012809286804e-06, "loss": 0.0426, "step": 10003 }, { "epoch": 1.67, "grad_norm": 0.36004379391670227, "learning_rate": 8.621224312176563e-06, "loss": 0.0532, "step": 10004 }, { "epoch": 1.67, "grad_norm": 0.3884410560131073, "learning_rate": 8.619435860023058e-06, "loss": 0.0409, "step": 10005 }, { "epoch": 1.67, "grad_norm": 0.4980196952819824, "learning_rate": 8.617647452884592e-06, "loss": 0.0557, "step": 10006 }, { "epoch": 1.67, "grad_norm": 0.7820681929588318, "learning_rate": 8.615859090819484e-06, "loss": 0.0401, "step": 10007 }, { "epoch": 1.67, "grad_norm": 0.37143540382385254, "learning_rate": 8.614070773886046e-06, "loss": 0.0565, "step": 10008 }, { "epoch": 1.67, "grad_norm": 0.5175699591636658, "learning_rate": 8.612282502142588e-06, "loss": 0.0597, "step": 10009 }, { "epoch": 1.67, "grad_norm": 0.5421616435050964, "learning_rate": 8.610494275647415e-06, "loss": 0.0451, "step": 10010 }, { "epoch": 1.67, "grad_norm": 0.4462140202522278, "learning_rate": 8.608706094458843e-06, "loss": 0.0463, "step": 10011 }, { "epoch": 1.67, "grad_norm": 0.41285938024520874, "learning_rate": 8.606917958635168e-06, "loss": 0.0548, "step": 10012 }, { "epoch": 1.67, "grad_norm": 0.3953208327293396, "learning_rate": 8.605129868234703e-06, "loss": 0.0544, "step": 10013 }, { "epoch": 1.67, "grad_norm": 0.4250740110874176, "learning_rate": 8.603341823315745e-06, "loss": 0.0569, "step": 10014 }, { "epoch": 1.68, "grad_norm": 0.3429392874240875, "learning_rate": 8.601553823936596e-06, "loss": 0.0395, "step": 10015 }, { "epoch": 1.68, "grad_norm": 0.544262707233429, "learning_rate": 8.59976587015556e-06, "loss": 0.0608, "step": 10016 }, { "epoch": 1.68, "grad_norm": 0.48206111788749695, "learning_rate": 8.597977962030934e-06, "loss": 0.0506, "step": 10017 }, { "epoch": 1.68, "grad_norm": 0.5768939256668091, "learning_rate": 8.596190099621012e-06, "loss": 0.0484, "step": 10018 }, { "epoch": 1.68, "grad_norm": 0.5284419655799866, "learning_rate": 8.594402282984091e-06, "loss": 0.0549, "step": 10019 }, { "epoch": 1.68, "grad_norm": 0.47994518280029297, "learning_rate": 8.592614512178468e-06, "loss": 0.0522, "step": 10020 }, { "epoch": 1.68, "grad_norm": 0.3925308287143707, "learning_rate": 8.59082678726243e-06, "loss": 0.052, "step": 10021 }, { "epoch": 1.68, "grad_norm": 0.5028274655342102, "learning_rate": 8.589039108294272e-06, "loss": 0.0719, "step": 10022 }, { "epoch": 1.68, "grad_norm": 0.3672735095024109, "learning_rate": 8.587251475332285e-06, "loss": 0.0422, "step": 10023 }, { "epoch": 1.68, "grad_norm": 0.3367146849632263, "learning_rate": 8.585463888434752e-06, "loss": 0.0552, "step": 10024 }, { "epoch": 1.68, "grad_norm": 0.38806667923927307, "learning_rate": 8.583676347659965e-06, "loss": 0.043, "step": 10025 }, { "epoch": 1.68, "grad_norm": 0.5167466402053833, "learning_rate": 8.581888853066202e-06, "loss": 0.0672, "step": 10026 }, { "epoch": 1.68, "grad_norm": 0.5205001831054688, "learning_rate": 8.580101404711753e-06, "loss": 0.0537, "step": 10027 }, { "epoch": 1.68, "grad_norm": 0.6498207449913025, "learning_rate": 8.578314002654902e-06, "loss": 0.075, "step": 10028 }, { "epoch": 1.68, "grad_norm": 0.47592630982398987, "learning_rate": 8.576526646953919e-06, "loss": 0.0433, "step": 10029 }, { "epoch": 1.68, "grad_norm": 0.34922417998313904, "learning_rate": 8.574739337667094e-06, "loss": 0.0474, "step": 10030 }, { "epoch": 1.68, "grad_norm": 0.43924659490585327, "learning_rate": 8.572952074852699e-06, "loss": 0.0448, "step": 10031 }, { "epoch": 1.68, "grad_norm": 0.45697522163391113, "learning_rate": 8.571164858569006e-06, "loss": 0.056, "step": 10032 }, { "epoch": 1.68, "grad_norm": 0.620881199836731, "learning_rate": 8.569377688874301e-06, "loss": 0.0641, "step": 10033 }, { "epoch": 1.68, "grad_norm": 0.445450097322464, "learning_rate": 8.567590565826847e-06, "loss": 0.0664, "step": 10034 }, { "epoch": 1.68, "grad_norm": 0.4391205310821533, "learning_rate": 8.565803489484916e-06, "loss": 0.0406, "step": 10035 }, { "epoch": 1.68, "grad_norm": 0.420407235622406, "learning_rate": 8.564016459906786e-06, "loss": 0.0528, "step": 10036 }, { "epoch": 1.68, "grad_norm": 0.3828411400318146, "learning_rate": 8.562229477150717e-06, "loss": 0.0462, "step": 10037 }, { "epoch": 1.68, "grad_norm": 0.36651015281677246, "learning_rate": 8.560442541274979e-06, "loss": 0.0541, "step": 10038 }, { "epoch": 1.68, "grad_norm": 0.8891146779060364, "learning_rate": 8.558655652337837e-06, "loss": 0.0406, "step": 10039 }, { "epoch": 1.68, "grad_norm": 0.5905393362045288, "learning_rate": 8.556868810397553e-06, "loss": 0.0765, "step": 10040 }, { "epoch": 1.68, "grad_norm": 0.3412967324256897, "learning_rate": 8.555082015512395e-06, "loss": 0.0461, "step": 10041 }, { "epoch": 1.68, "grad_norm": 0.48408836126327515, "learning_rate": 8.553295267740617e-06, "loss": 0.0691, "step": 10042 }, { "epoch": 1.68, "grad_norm": 0.3855442404747009, "learning_rate": 8.55150856714048e-06, "loss": 0.0473, "step": 10043 }, { "epoch": 1.68, "grad_norm": 0.38819628953933716, "learning_rate": 8.549721913770246e-06, "loss": 0.0454, "step": 10044 }, { "epoch": 1.68, "grad_norm": 0.5940983295440674, "learning_rate": 8.547935307688168e-06, "loss": 0.0426, "step": 10045 }, { "epoch": 1.68, "grad_norm": 0.6307715773582458, "learning_rate": 8.546148748952496e-06, "loss": 0.0523, "step": 10046 }, { "epoch": 1.68, "grad_norm": 0.36117419600486755, "learning_rate": 8.544362237621488e-06, "loss": 0.0359, "step": 10047 }, { "epoch": 1.68, "grad_norm": 0.3570217192173004, "learning_rate": 8.542575773753398e-06, "loss": 0.0371, "step": 10048 }, { "epoch": 1.68, "grad_norm": 0.5324905514717102, "learning_rate": 8.54078935740647e-06, "loss": 0.0451, "step": 10049 }, { "epoch": 1.68, "grad_norm": 0.4939686954021454, "learning_rate": 8.539002988638954e-06, "loss": 0.0397, "step": 10050 }, { "epoch": 1.68, "grad_norm": 0.2853480279445648, "learning_rate": 8.537216667509102e-06, "loss": 0.0263, "step": 10051 }, { "epoch": 1.68, "grad_norm": 0.4623372554779053, "learning_rate": 8.53543039407515e-06, "loss": 0.0412, "step": 10052 }, { "epoch": 1.68, "grad_norm": 0.4976244568824768, "learning_rate": 8.53364416839535e-06, "loss": 0.0667, "step": 10053 }, { "epoch": 1.68, "grad_norm": 0.4605059325695038, "learning_rate": 8.53185799052794e-06, "loss": 0.0506, "step": 10054 }, { "epoch": 1.68, "grad_norm": 0.3728410303592682, "learning_rate": 8.53007186053116e-06, "loss": 0.0476, "step": 10055 }, { "epoch": 1.68, "grad_norm": 0.3378945589065552, "learning_rate": 8.528285778463255e-06, "loss": 0.0342, "step": 10056 }, { "epoch": 1.68, "grad_norm": 0.3673006594181061, "learning_rate": 8.526499744382452e-06, "loss": 0.0583, "step": 10057 }, { "epoch": 1.68, "grad_norm": 0.5263652205467224, "learning_rate": 8.524713758346997e-06, "loss": 0.0405, "step": 10058 }, { "epoch": 1.68, "grad_norm": 0.3431922495365143, "learning_rate": 8.522927820415121e-06, "loss": 0.0489, "step": 10059 }, { "epoch": 1.68, "grad_norm": 0.3577995300292969, "learning_rate": 8.521141930645054e-06, "loss": 0.0427, "step": 10060 }, { "epoch": 1.68, "grad_norm": 0.4252444803714752, "learning_rate": 8.519356089095032e-06, "loss": 0.0537, "step": 10061 }, { "epoch": 1.68, "grad_norm": 0.45777350664138794, "learning_rate": 8.517570295823278e-06, "loss": 0.0419, "step": 10062 }, { "epoch": 1.68, "grad_norm": 0.5485606789588928, "learning_rate": 8.515784550888025e-06, "loss": 0.0517, "step": 10063 }, { "epoch": 1.68, "grad_norm": 0.5149556994438171, "learning_rate": 8.5139988543475e-06, "loss": 0.0365, "step": 10064 }, { "epoch": 1.68, "grad_norm": 0.6803702116012573, "learning_rate": 8.512213206259924e-06, "loss": 0.0562, "step": 10065 }, { "epoch": 1.68, "grad_norm": 0.5082335472106934, "learning_rate": 8.510427606683526e-06, "loss": 0.0696, "step": 10066 }, { "epoch": 1.68, "grad_norm": 0.55165034532547, "learning_rate": 8.508642055676521e-06, "loss": 0.0545, "step": 10067 }, { "epoch": 1.68, "grad_norm": 0.3932003974914551, "learning_rate": 8.506856553297133e-06, "loss": 0.0678, "step": 10068 }, { "epoch": 1.68, "grad_norm": 0.5195083022117615, "learning_rate": 8.505071099603582e-06, "loss": 0.0443, "step": 10069 }, { "epoch": 1.68, "grad_norm": 0.4409500062465668, "learning_rate": 8.503285694654083e-06, "loss": 0.0635, "step": 10070 }, { "epoch": 1.68, "grad_norm": 0.411864310503006, "learning_rate": 8.501500338506848e-06, "loss": 0.0431, "step": 10071 }, { "epoch": 1.68, "grad_norm": 0.5229682922363281, "learning_rate": 8.499715031220098e-06, "loss": 0.0507, "step": 10072 }, { "epoch": 1.68, "grad_norm": 0.33830201625823975, "learning_rate": 8.497929772852043e-06, "loss": 0.0505, "step": 10073 }, { "epoch": 1.68, "grad_norm": 0.7927839159965515, "learning_rate": 8.496144563460886e-06, "loss": 0.0537, "step": 10074 }, { "epoch": 1.69, "grad_norm": 0.5224721431732178, "learning_rate": 8.494359403104845e-06, "loss": 0.0647, "step": 10075 }, { "epoch": 1.69, "grad_norm": 0.7151769995689392, "learning_rate": 8.492574291842128e-06, "loss": 0.0633, "step": 10076 }, { "epoch": 1.69, "grad_norm": 0.43719837069511414, "learning_rate": 8.490789229730933e-06, "loss": 0.0599, "step": 10077 }, { "epoch": 1.69, "grad_norm": 0.3405498266220093, "learning_rate": 8.48900421682947e-06, "loss": 0.0341, "step": 10078 }, { "epoch": 1.69, "grad_norm": 0.7614384293556213, "learning_rate": 8.487219253195942e-06, "loss": 0.0603, "step": 10079 }, { "epoch": 1.69, "grad_norm": 0.34091418981552124, "learning_rate": 8.485434338888543e-06, "loss": 0.0408, "step": 10080 }, { "epoch": 1.69, "grad_norm": 0.8390620946884155, "learning_rate": 8.483649473965483e-06, "loss": 0.042, "step": 10081 }, { "epoch": 1.69, "grad_norm": 0.6178953647613525, "learning_rate": 8.481864658484949e-06, "loss": 0.0421, "step": 10082 }, { "epoch": 1.69, "grad_norm": 0.47596102952957153, "learning_rate": 8.480079892505147e-06, "loss": 0.0466, "step": 10083 }, { "epoch": 1.69, "grad_norm": 0.4163086712360382, "learning_rate": 8.478295176084266e-06, "loss": 0.0555, "step": 10084 }, { "epoch": 1.69, "grad_norm": 0.5341538786888123, "learning_rate": 8.476510509280497e-06, "loss": 0.0488, "step": 10085 }, { "epoch": 1.69, "grad_norm": 0.4856302738189697, "learning_rate": 8.474725892152038e-06, "loss": 0.0559, "step": 10086 }, { "epoch": 1.69, "grad_norm": 0.5883336067199707, "learning_rate": 8.472941324757076e-06, "loss": 0.0655, "step": 10087 }, { "epoch": 1.69, "grad_norm": 0.6767590641975403, "learning_rate": 8.471156807153795e-06, "loss": 0.0751, "step": 10088 }, { "epoch": 1.69, "grad_norm": 0.48755744099617004, "learning_rate": 8.469372339400387e-06, "loss": 0.0686, "step": 10089 }, { "epoch": 1.69, "grad_norm": 0.40998801589012146, "learning_rate": 8.467587921555032e-06, "loss": 0.0401, "step": 10090 }, { "epoch": 1.69, "grad_norm": 0.5281583070755005, "learning_rate": 8.465803553675914e-06, "loss": 0.0622, "step": 10091 }, { "epoch": 1.69, "grad_norm": 0.47587278485298157, "learning_rate": 8.464019235821222e-06, "loss": 0.0446, "step": 10092 }, { "epoch": 1.69, "grad_norm": 0.3272113800048828, "learning_rate": 8.462234968049129e-06, "loss": 0.0425, "step": 10093 }, { "epoch": 1.69, "grad_norm": 0.38241124153137207, "learning_rate": 8.460450750417811e-06, "loss": 0.0545, "step": 10094 }, { "epoch": 1.69, "grad_norm": 0.4698350429534912, "learning_rate": 8.458666582985448e-06, "loss": 0.0423, "step": 10095 }, { "epoch": 1.69, "grad_norm": 0.37408387660980225, "learning_rate": 8.456882465810215e-06, "loss": 0.047, "step": 10096 }, { "epoch": 1.69, "grad_norm": 0.40586698055267334, "learning_rate": 8.45509839895029e-06, "loss": 0.0475, "step": 10097 }, { "epoch": 1.69, "grad_norm": 0.5286692380905151, "learning_rate": 8.453314382463837e-06, "loss": 0.0726, "step": 10098 }, { "epoch": 1.69, "grad_norm": 0.4053087830543518, "learning_rate": 8.451530416409028e-06, "loss": 0.0497, "step": 10099 }, { "epoch": 1.69, "grad_norm": 0.44309696555137634, "learning_rate": 8.449746500844036e-06, "loss": 0.0593, "step": 10100 }, { "epoch": 1.69, "grad_norm": 0.639477550983429, "learning_rate": 8.447962635827027e-06, "loss": 0.0497, "step": 10101 }, { "epoch": 1.69, "grad_norm": 0.4924042224884033, "learning_rate": 8.446178821416159e-06, "loss": 0.0501, "step": 10102 }, { "epoch": 1.69, "grad_norm": 0.36193162202835083, "learning_rate": 8.444395057669603e-06, "loss": 0.0403, "step": 10103 }, { "epoch": 1.69, "grad_norm": 0.47191277146339417, "learning_rate": 8.442611344645519e-06, "loss": 0.0504, "step": 10104 }, { "epoch": 1.69, "grad_norm": 0.3929983377456665, "learning_rate": 8.440827682402063e-06, "loss": 0.0467, "step": 10105 }, { "epoch": 1.69, "grad_norm": 0.4849517047405243, "learning_rate": 8.439044070997399e-06, "loss": 0.0609, "step": 10106 }, { "epoch": 1.69, "grad_norm": 0.5146969556808472, "learning_rate": 8.437260510489685e-06, "loss": 0.0554, "step": 10107 }, { "epoch": 1.69, "grad_norm": 0.48479339480400085, "learning_rate": 8.435477000937069e-06, "loss": 0.0325, "step": 10108 }, { "epoch": 1.69, "grad_norm": 0.37326502799987793, "learning_rate": 8.433693542397712e-06, "loss": 0.0516, "step": 10109 }, { "epoch": 1.69, "grad_norm": 0.3429102599620819, "learning_rate": 8.431910134929759e-06, "loss": 0.0534, "step": 10110 }, { "epoch": 1.69, "grad_norm": 0.7237803339958191, "learning_rate": 8.430126778591366e-06, "loss": 0.0527, "step": 10111 }, { "epoch": 1.69, "grad_norm": 0.5337991714477539, "learning_rate": 8.428343473440681e-06, "loss": 0.0549, "step": 10112 }, { "epoch": 1.69, "grad_norm": 0.3748515546321869, "learning_rate": 8.426560219535848e-06, "loss": 0.0343, "step": 10113 }, { "epoch": 1.69, "grad_norm": 0.5769405961036682, "learning_rate": 8.424777016935012e-06, "loss": 0.0483, "step": 10114 }, { "epoch": 1.69, "grad_norm": 0.4289049208164215, "learning_rate": 8.422993865696325e-06, "loss": 0.0539, "step": 10115 }, { "epoch": 1.69, "grad_norm": 0.47166141867637634, "learning_rate": 8.421210765877917e-06, "loss": 0.053, "step": 10116 }, { "epoch": 1.69, "grad_norm": 0.41869521141052246, "learning_rate": 8.419427717537935e-06, "loss": 0.0528, "step": 10117 }, { "epoch": 1.69, "grad_norm": 0.5102907419204712, "learning_rate": 8.417644720734517e-06, "loss": 0.0652, "step": 10118 }, { "epoch": 1.69, "grad_norm": 0.6648926734924316, "learning_rate": 8.415861775525796e-06, "loss": 0.0516, "step": 10119 }, { "epoch": 1.69, "grad_norm": 0.7847163081169128, "learning_rate": 8.414078881969913e-06, "loss": 0.0497, "step": 10120 }, { "epoch": 1.69, "grad_norm": 0.5073842406272888, "learning_rate": 8.412296040125e-06, "loss": 0.0389, "step": 10121 }, { "epoch": 1.69, "grad_norm": 0.48091569542884827, "learning_rate": 8.410513250049184e-06, "loss": 0.0604, "step": 10122 }, { "epoch": 1.69, "grad_norm": 0.46122756600379944, "learning_rate": 8.4087305118006e-06, "loss": 0.0467, "step": 10123 }, { "epoch": 1.69, "grad_norm": 0.41547927260398865, "learning_rate": 8.406947825437372e-06, "loss": 0.0365, "step": 10124 }, { "epoch": 1.69, "grad_norm": 0.49097520112991333, "learning_rate": 8.405165191017635e-06, "loss": 0.0664, "step": 10125 }, { "epoch": 1.69, "grad_norm": 0.38941043615341187, "learning_rate": 8.403382608599507e-06, "loss": 0.0442, "step": 10126 }, { "epoch": 1.69, "grad_norm": 0.41762158274650574, "learning_rate": 8.40160007824111e-06, "loss": 0.0474, "step": 10127 }, { "epoch": 1.69, "grad_norm": 0.42419686913490295, "learning_rate": 8.399817600000574e-06, "loss": 0.075, "step": 10128 }, { "epoch": 1.69, "grad_norm": 0.439974308013916, "learning_rate": 8.398035173936016e-06, "loss": 0.0643, "step": 10129 }, { "epoch": 1.69, "grad_norm": 0.45098066329956055, "learning_rate": 8.396252800105545e-06, "loss": 0.0522, "step": 10130 }, { "epoch": 1.69, "grad_norm": 0.5989642143249512, "learning_rate": 8.394470478567286e-06, "loss": 0.0644, "step": 10131 }, { "epoch": 1.69, "grad_norm": 0.5033578276634216, "learning_rate": 8.392688209379359e-06, "loss": 0.0488, "step": 10132 }, { "epoch": 1.69, "grad_norm": 0.33606070280075073, "learning_rate": 8.390905992599862e-06, "loss": 0.0431, "step": 10133 }, { "epoch": 1.69, "grad_norm": 1.3437918424606323, "learning_rate": 8.38912382828692e-06, "loss": 0.0417, "step": 10134 }, { "epoch": 1.7, "grad_norm": 0.26006752252578735, "learning_rate": 8.387341716498638e-06, "loss": 0.029, "step": 10135 }, { "epoch": 1.7, "grad_norm": 0.4165116250514984, "learning_rate": 8.385559657293121e-06, "loss": 0.0555, "step": 10136 }, { "epoch": 1.7, "grad_norm": 0.37595903873443604, "learning_rate": 8.383777650728482e-06, "loss": 0.0522, "step": 10137 }, { "epoch": 1.7, "grad_norm": 0.34587541222572327, "learning_rate": 8.381995696862817e-06, "loss": 0.0348, "step": 10138 }, { "epoch": 1.7, "grad_norm": 0.33634230494499207, "learning_rate": 8.380213795754235e-06, "loss": 0.0287, "step": 10139 }, { "epoch": 1.7, "grad_norm": 0.28563690185546875, "learning_rate": 8.37843194746084e-06, "loss": 0.0387, "step": 10140 }, { "epoch": 1.7, "grad_norm": 0.5081517696380615, "learning_rate": 8.376650152040722e-06, "loss": 0.0473, "step": 10141 }, { "epoch": 1.7, "grad_norm": 0.46327105164527893, "learning_rate": 8.374868409551988e-06, "loss": 0.0595, "step": 10142 }, { "epoch": 1.7, "grad_norm": 0.35638555884361267, "learning_rate": 8.373086720052728e-06, "loss": 0.0437, "step": 10143 }, { "epoch": 1.7, "grad_norm": 0.4605180025100708, "learning_rate": 8.371305083601036e-06, "loss": 0.0526, "step": 10144 }, { "epoch": 1.7, "grad_norm": 0.3426033854484558, "learning_rate": 8.36952350025501e-06, "loss": 0.0563, "step": 10145 }, { "epoch": 1.7, "grad_norm": 0.5167229771614075, "learning_rate": 8.367741970072736e-06, "loss": 0.0485, "step": 10146 }, { "epoch": 1.7, "grad_norm": 0.47955626249313354, "learning_rate": 8.365960493112304e-06, "loss": 0.0453, "step": 10147 }, { "epoch": 1.7, "grad_norm": 0.41266870498657227, "learning_rate": 8.364179069431804e-06, "loss": 0.0417, "step": 10148 }, { "epoch": 1.7, "grad_norm": 0.3431681990623474, "learning_rate": 8.36239769908932e-06, "loss": 0.0435, "step": 10149 }, { "epoch": 1.7, "grad_norm": 0.35796767473220825, "learning_rate": 8.36061638214293e-06, "loss": 0.0516, "step": 10150 }, { "epoch": 1.7, "grad_norm": 0.43951675295829773, "learning_rate": 8.358835118650723e-06, "loss": 0.0618, "step": 10151 }, { "epoch": 1.7, "grad_norm": 0.6359372138977051, "learning_rate": 8.357053908670777e-06, "loss": 0.058, "step": 10152 }, { "epoch": 1.7, "grad_norm": 0.5196036100387573, "learning_rate": 8.355272752261175e-06, "loss": 0.0434, "step": 10153 }, { "epoch": 1.7, "grad_norm": 0.4541948139667511, "learning_rate": 8.353491649479988e-06, "loss": 0.0547, "step": 10154 }, { "epoch": 1.7, "grad_norm": 0.37682417035102844, "learning_rate": 8.35171060038529e-06, "loss": 0.0374, "step": 10155 }, { "epoch": 1.7, "grad_norm": 0.37319839000701904, "learning_rate": 8.349929605035164e-06, "loss": 0.0412, "step": 10156 }, { "epoch": 1.7, "grad_norm": 0.468779593706131, "learning_rate": 8.348148663487674e-06, "loss": 0.0648, "step": 10157 }, { "epoch": 1.7, "grad_norm": 0.4459090530872345, "learning_rate": 8.346367775800887e-06, "loss": 0.0562, "step": 10158 }, { "epoch": 1.7, "grad_norm": 0.42288661003112793, "learning_rate": 8.344586942032878e-06, "loss": 0.0468, "step": 10159 }, { "epoch": 1.7, "grad_norm": 0.4735010266304016, "learning_rate": 8.342806162241711e-06, "loss": 0.0524, "step": 10160 }, { "epoch": 1.7, "grad_norm": 0.3323880136013031, "learning_rate": 8.341025436485446e-06, "loss": 0.0455, "step": 10161 }, { "epoch": 1.7, "grad_norm": 0.4490586221218109, "learning_rate": 8.339244764822154e-06, "loss": 0.0494, "step": 10162 }, { "epoch": 1.7, "grad_norm": 0.4480154812335968, "learning_rate": 8.337464147309891e-06, "loss": 0.0496, "step": 10163 }, { "epoch": 1.7, "grad_norm": 0.6318504214286804, "learning_rate": 8.335683584006716e-06, "loss": 0.0596, "step": 10164 }, { "epoch": 1.7, "grad_norm": 0.4120498597621918, "learning_rate": 8.333903074970691e-06, "loss": 0.055, "step": 10165 }, { "epoch": 1.7, "grad_norm": 0.46489372849464417, "learning_rate": 8.332122620259862e-06, "loss": 0.0576, "step": 10166 }, { "epoch": 1.7, "grad_norm": 0.5069397687911987, "learning_rate": 8.330342219932295e-06, "loss": 0.0375, "step": 10167 }, { "epoch": 1.7, "grad_norm": 0.44877949357032776, "learning_rate": 8.328561874046038e-06, "loss": 0.0517, "step": 10168 }, { "epoch": 1.7, "grad_norm": 0.4125937521457672, "learning_rate": 8.326781582659134e-06, "loss": 0.0436, "step": 10169 }, { "epoch": 1.7, "grad_norm": 0.8715916275978088, "learning_rate": 8.325001345829644e-06, "loss": 0.0633, "step": 10170 }, { "epoch": 1.7, "grad_norm": 0.41711172461509705, "learning_rate": 8.323221163615606e-06, "loss": 0.0605, "step": 10171 }, { "epoch": 1.7, "grad_norm": 0.4370345175266266, "learning_rate": 8.321441036075066e-06, "loss": 0.0694, "step": 10172 }, { "epoch": 1.7, "grad_norm": 0.5832127332687378, "learning_rate": 8.319660963266073e-06, "loss": 0.0658, "step": 10173 }, { "epoch": 1.7, "grad_norm": 0.4970449209213257, "learning_rate": 8.317880945246662e-06, "loss": 0.0511, "step": 10174 }, { "epoch": 1.7, "grad_norm": 0.49015629291534424, "learning_rate": 8.316100982074873e-06, "loss": 0.0525, "step": 10175 }, { "epoch": 1.7, "grad_norm": 0.3745203912258148, "learning_rate": 8.314321073808752e-06, "loss": 0.0464, "step": 10176 }, { "epoch": 1.7, "grad_norm": 0.3583826720714569, "learning_rate": 8.312541220506328e-06, "loss": 0.05, "step": 10177 }, { "epoch": 1.7, "grad_norm": 0.4882076680660248, "learning_rate": 8.310761422225634e-06, "loss": 0.0595, "step": 10178 }, { "epoch": 1.7, "grad_norm": 0.2848610281944275, "learning_rate": 8.308981679024707e-06, "loss": 0.0486, "step": 10179 }, { "epoch": 1.7, "grad_norm": 0.5594279170036316, "learning_rate": 8.307201990961573e-06, "loss": 0.0562, "step": 10180 }, { "epoch": 1.7, "grad_norm": 0.3970586955547333, "learning_rate": 8.30542235809427e-06, "loss": 0.0334, "step": 10181 }, { "epoch": 1.7, "grad_norm": 0.8930762410163879, "learning_rate": 8.303642780480818e-06, "loss": 0.0578, "step": 10182 }, { "epoch": 1.7, "grad_norm": 0.41372135281562805, "learning_rate": 8.30186325817924e-06, "loss": 0.0743, "step": 10183 }, { "epoch": 1.7, "grad_norm": 0.46606844663619995, "learning_rate": 8.300083791247568e-06, "loss": 0.0436, "step": 10184 }, { "epoch": 1.7, "grad_norm": 0.5762079358100891, "learning_rate": 8.29830437974382e-06, "loss": 0.0662, "step": 10185 }, { "epoch": 1.7, "grad_norm": 0.4185478091239929, "learning_rate": 8.29652502372601e-06, "loss": 0.0466, "step": 10186 }, { "epoch": 1.7, "grad_norm": 0.37092551589012146, "learning_rate": 8.294745723252164e-06, "loss": 0.0749, "step": 10187 }, { "epoch": 1.7, "grad_norm": 0.41341981291770935, "learning_rate": 8.292966478380297e-06, "loss": 0.0493, "step": 10188 }, { "epoch": 1.7, "grad_norm": 0.4824497103691101, "learning_rate": 8.291187289168418e-06, "loss": 0.0665, "step": 10189 }, { "epoch": 1.7, "grad_norm": 0.41128432750701904, "learning_rate": 8.289408155674545e-06, "loss": 0.0388, "step": 10190 }, { "epoch": 1.7, "grad_norm": 0.38804537057876587, "learning_rate": 8.287629077956692e-06, "loss": 0.0418, "step": 10191 }, { "epoch": 1.7, "grad_norm": 0.4462210237979889, "learning_rate": 8.285850056072859e-06, "loss": 0.0307, "step": 10192 }, { "epoch": 1.7, "grad_norm": 0.3688839077949524, "learning_rate": 8.284071090081061e-06, "loss": 0.0506, "step": 10193 }, { "epoch": 1.7, "grad_norm": 0.4635411202907562, "learning_rate": 8.2822921800393e-06, "loss": 0.0624, "step": 10194 }, { "epoch": 1.71, "grad_norm": 0.6583521366119385, "learning_rate": 8.280513326005579e-06, "loss": 0.0706, "step": 10195 }, { "epoch": 1.71, "grad_norm": 0.41112905740737915, "learning_rate": 8.278734528037902e-06, "loss": 0.0444, "step": 10196 }, { "epoch": 1.71, "grad_norm": 0.34487834572792053, "learning_rate": 8.276955786194266e-06, "loss": 0.0592, "step": 10197 }, { "epoch": 1.71, "grad_norm": 0.38878607749938965, "learning_rate": 8.275177100532676e-06, "loss": 0.0433, "step": 10198 }, { "epoch": 1.71, "grad_norm": 0.3775947690010071, "learning_rate": 8.27339847111112e-06, "loss": 0.0434, "step": 10199 }, { "epoch": 1.71, "grad_norm": 0.46034952998161316, "learning_rate": 8.271619897987592e-06, "loss": 0.0549, "step": 10200 }, { "epoch": 1.71, "grad_norm": 0.5854190587997437, "learning_rate": 8.269841381220096e-06, "loss": 0.0576, "step": 10201 }, { "epoch": 1.71, "grad_norm": 0.5711613297462463, "learning_rate": 8.268062920866611e-06, "loss": 0.0736, "step": 10202 }, { "epoch": 1.71, "grad_norm": 0.4303039312362671, "learning_rate": 8.26628451698513e-06, "loss": 0.0633, "step": 10203 }, { "epoch": 1.71, "grad_norm": 0.4442107081413269, "learning_rate": 8.264506169633643e-06, "loss": 0.0576, "step": 10204 }, { "epoch": 1.71, "grad_norm": 0.6058841347694397, "learning_rate": 8.262727878870133e-06, "loss": 0.074, "step": 10205 }, { "epoch": 1.71, "grad_norm": 0.46940138936042786, "learning_rate": 8.260949644752579e-06, "loss": 0.0522, "step": 10206 }, { "epoch": 1.71, "grad_norm": 0.5941447615623474, "learning_rate": 8.259171467338968e-06, "loss": 0.04, "step": 10207 }, { "epoch": 1.71, "grad_norm": 0.3334285020828247, "learning_rate": 8.257393346687276e-06, "loss": 0.0322, "step": 10208 }, { "epoch": 1.71, "grad_norm": 0.4142294228076935, "learning_rate": 8.255615282855488e-06, "loss": 0.0444, "step": 10209 }, { "epoch": 1.71, "grad_norm": 0.4777369797229767, "learning_rate": 8.253837275901574e-06, "loss": 0.0559, "step": 10210 }, { "epoch": 1.71, "grad_norm": 0.3998180627822876, "learning_rate": 8.252059325883505e-06, "loss": 0.0341, "step": 10211 }, { "epoch": 1.71, "grad_norm": 0.3970353305339813, "learning_rate": 8.250281432859263e-06, "loss": 0.0493, "step": 10212 }, { "epoch": 1.71, "grad_norm": 0.4668043255805969, "learning_rate": 8.248503596886815e-06, "loss": 0.0558, "step": 10213 }, { "epoch": 1.71, "grad_norm": 0.36066246032714844, "learning_rate": 8.246725818024122e-06, "loss": 0.0435, "step": 10214 }, { "epoch": 1.71, "grad_norm": 0.4944117069244385, "learning_rate": 8.24494809632916e-06, "loss": 0.0428, "step": 10215 }, { "epoch": 1.71, "grad_norm": 0.4824630916118622, "learning_rate": 8.243170431859891e-06, "loss": 0.0495, "step": 10216 }, { "epoch": 1.71, "grad_norm": 0.4428650736808777, "learning_rate": 8.241392824674274e-06, "loss": 0.066, "step": 10217 }, { "epoch": 1.71, "grad_norm": 0.3266679048538208, "learning_rate": 8.239615274830274e-06, "loss": 0.0429, "step": 10218 }, { "epoch": 1.71, "grad_norm": 0.3839609920978546, "learning_rate": 8.237837782385853e-06, "loss": 0.0408, "step": 10219 }, { "epoch": 1.71, "grad_norm": 0.3909125030040741, "learning_rate": 8.236060347398962e-06, "loss": 0.0475, "step": 10220 }, { "epoch": 1.71, "grad_norm": 0.4674508273601532, "learning_rate": 8.234282969927563e-06, "loss": 0.0378, "step": 10221 }, { "epoch": 1.71, "grad_norm": 0.4167369306087494, "learning_rate": 8.2325056500296e-06, "loss": 0.047, "step": 10222 }, { "epoch": 1.71, "grad_norm": 0.4961892366409302, "learning_rate": 8.230728387763036e-06, "loss": 0.0445, "step": 10223 }, { "epoch": 1.71, "grad_norm": 0.3823079764842987, "learning_rate": 8.228951183185818e-06, "loss": 0.0454, "step": 10224 }, { "epoch": 1.71, "grad_norm": 0.35112738609313965, "learning_rate": 8.227174036355888e-06, "loss": 0.035, "step": 10225 }, { "epoch": 1.71, "grad_norm": 0.3991802930831909, "learning_rate": 8.2253969473312e-06, "loss": 0.0431, "step": 10226 }, { "epoch": 1.71, "grad_norm": 0.3644249141216278, "learning_rate": 8.223619916169691e-06, "loss": 0.0573, "step": 10227 }, { "epoch": 1.71, "grad_norm": 0.6972977519035339, "learning_rate": 8.221842942929304e-06, "loss": 0.0582, "step": 10228 }, { "epoch": 1.71, "grad_norm": 0.4051038920879364, "learning_rate": 8.220066027667989e-06, "loss": 0.0446, "step": 10229 }, { "epoch": 1.71, "grad_norm": 0.4369581639766693, "learning_rate": 8.218289170443673e-06, "loss": 0.0483, "step": 10230 }, { "epoch": 1.71, "grad_norm": 0.4490572512149811, "learning_rate": 8.216512371314298e-06, "loss": 0.0588, "step": 10231 }, { "epoch": 1.71, "grad_norm": 0.37180015444755554, "learning_rate": 8.214735630337801e-06, "loss": 0.0373, "step": 10232 }, { "epoch": 1.71, "grad_norm": 0.6593812108039856, "learning_rate": 8.212958947572113e-06, "loss": 0.074, "step": 10233 }, { "epoch": 1.71, "grad_norm": 0.45314639806747437, "learning_rate": 8.21118232307516e-06, "loss": 0.0567, "step": 10234 }, { "epoch": 1.71, "grad_norm": 0.4524990916252136, "learning_rate": 8.209405756904876e-06, "loss": 0.0475, "step": 10235 }, { "epoch": 1.71, "grad_norm": 0.6112839579582214, "learning_rate": 8.207629249119185e-06, "loss": 0.0483, "step": 10236 }, { "epoch": 1.71, "grad_norm": 0.4289962649345398, "learning_rate": 8.205852799776021e-06, "loss": 0.0438, "step": 10237 }, { "epoch": 1.71, "grad_norm": 0.5796912908554077, "learning_rate": 8.204076408933298e-06, "loss": 0.0508, "step": 10238 }, { "epoch": 1.71, "grad_norm": 0.39010006189346313, "learning_rate": 8.202300076648937e-06, "loss": 0.0505, "step": 10239 }, { "epoch": 1.71, "grad_norm": 1.2014405727386475, "learning_rate": 8.200523802980868e-06, "loss": 0.05, "step": 10240 }, { "epoch": 1.71, "grad_norm": 0.5360472202301025, "learning_rate": 8.198747587987e-06, "loss": 0.0723, "step": 10241 }, { "epoch": 1.71, "grad_norm": 0.3651808798313141, "learning_rate": 8.196971431725246e-06, "loss": 0.0423, "step": 10242 }, { "epoch": 1.71, "grad_norm": 0.41993653774261475, "learning_rate": 8.195195334253528e-06, "loss": 0.0651, "step": 10243 }, { "epoch": 1.71, "grad_norm": 0.5268232822418213, "learning_rate": 8.193419295629756e-06, "loss": 0.0647, "step": 10244 }, { "epoch": 1.71, "grad_norm": 1.0266622304916382, "learning_rate": 8.191643315911833e-06, "loss": 0.0539, "step": 10245 }, { "epoch": 1.71, "grad_norm": 0.3786734640598297, "learning_rate": 8.189867395157676e-06, "loss": 0.0403, "step": 10246 }, { "epoch": 1.71, "grad_norm": 0.5652618408203125, "learning_rate": 8.18809153342519e-06, "loss": 0.0541, "step": 10247 }, { "epoch": 1.71, "grad_norm": 0.3096321225166321, "learning_rate": 8.18631573077227e-06, "loss": 0.036, "step": 10248 }, { "epoch": 1.71, "grad_norm": 0.39253756403923035, "learning_rate": 8.18453998725683e-06, "loss": 0.0506, "step": 10249 }, { "epoch": 1.71, "grad_norm": 0.4354551434516907, "learning_rate": 8.18276430293676e-06, "loss": 0.0607, "step": 10250 }, { "epoch": 1.71, "grad_norm": 0.39030399918556213, "learning_rate": 8.180988677869968e-06, "loss": 0.0465, "step": 10251 }, { "epoch": 1.71, "grad_norm": 0.49733632802963257, "learning_rate": 8.17921311211435e-06, "loss": 0.0596, "step": 10252 }, { "epoch": 1.71, "grad_norm": 0.5730996131896973, "learning_rate": 8.177437605727789e-06, "loss": 0.0506, "step": 10253 }, { "epoch": 1.72, "grad_norm": 0.36239245533943176, "learning_rate": 8.175662158768192e-06, "loss": 0.042, "step": 10254 }, { "epoch": 1.72, "grad_norm": 0.7182410955429077, "learning_rate": 8.17388677129344e-06, "loss": 0.046, "step": 10255 }, { "epoch": 1.72, "grad_norm": 0.33846956491470337, "learning_rate": 8.172111443361423e-06, "loss": 0.0477, "step": 10256 }, { "epoch": 1.72, "grad_norm": 0.46000349521636963, "learning_rate": 8.170336175030035e-06, "loss": 0.0459, "step": 10257 }, { "epoch": 1.72, "grad_norm": 0.3674146234989166, "learning_rate": 8.168560966357152e-06, "loss": 0.0537, "step": 10258 }, { "epoch": 1.72, "grad_norm": 0.7143633365631104, "learning_rate": 8.16678581740066e-06, "loss": 0.059, "step": 10259 }, { "epoch": 1.72, "grad_norm": 0.37706825137138367, "learning_rate": 8.165010728218443e-06, "loss": 0.0385, "step": 10260 }, { "epoch": 1.72, "grad_norm": 0.5078359246253967, "learning_rate": 8.16323569886838e-06, "loss": 0.0667, "step": 10261 }, { "epoch": 1.72, "grad_norm": 0.5124061107635498, "learning_rate": 8.16146072940834e-06, "loss": 0.0597, "step": 10262 }, { "epoch": 1.72, "grad_norm": 0.5795984268188477, "learning_rate": 8.159685819896205e-06, "loss": 0.0545, "step": 10263 }, { "epoch": 1.72, "grad_norm": 0.4301777780056, "learning_rate": 8.157910970389846e-06, "loss": 0.0521, "step": 10264 }, { "epoch": 1.72, "grad_norm": 0.3878152072429657, "learning_rate": 8.156136180947139e-06, "loss": 0.0475, "step": 10265 }, { "epoch": 1.72, "grad_norm": 0.330948144197464, "learning_rate": 8.154361451625949e-06, "loss": 0.0572, "step": 10266 }, { "epoch": 1.72, "grad_norm": 0.41048598289489746, "learning_rate": 8.15258678248414e-06, "loss": 0.0567, "step": 10267 }, { "epoch": 1.72, "grad_norm": 0.5388374924659729, "learning_rate": 8.150812173579585e-06, "loss": 0.0605, "step": 10268 }, { "epoch": 1.72, "grad_norm": 0.6374558210372925, "learning_rate": 8.149037624970145e-06, "loss": 0.0596, "step": 10269 }, { "epoch": 1.72, "grad_norm": 0.47264087200164795, "learning_rate": 8.147263136713675e-06, "loss": 0.0491, "step": 10270 }, { "epoch": 1.72, "grad_norm": 0.2826271653175354, "learning_rate": 8.14548870886804e-06, "loss": 0.0392, "step": 10271 }, { "epoch": 1.72, "grad_norm": 0.5414196252822876, "learning_rate": 8.143714341491102e-06, "loss": 0.05, "step": 10272 }, { "epoch": 1.72, "grad_norm": 0.3397830128669739, "learning_rate": 8.141940034640705e-06, "loss": 0.0556, "step": 10273 }, { "epoch": 1.72, "grad_norm": 0.37318721413612366, "learning_rate": 8.140165788374713e-06, "loss": 0.0363, "step": 10274 }, { "epoch": 1.72, "grad_norm": 0.607958197593689, "learning_rate": 8.13839160275097e-06, "loss": 0.0549, "step": 10275 }, { "epoch": 1.72, "grad_norm": 0.39414867758750916, "learning_rate": 8.136617477827331e-06, "loss": 0.0425, "step": 10276 }, { "epoch": 1.72, "grad_norm": 0.505832850933075, "learning_rate": 8.134843413661643e-06, "loss": 0.0834, "step": 10277 }, { "epoch": 1.72, "grad_norm": 0.3831441104412079, "learning_rate": 8.133069410311745e-06, "loss": 0.0468, "step": 10278 }, { "epoch": 1.72, "grad_norm": 0.4063005745410919, "learning_rate": 8.13129546783549e-06, "loss": 0.0465, "step": 10279 }, { "epoch": 1.72, "grad_norm": 0.379041850566864, "learning_rate": 8.129521586290717e-06, "loss": 0.0424, "step": 10280 }, { "epoch": 1.72, "grad_norm": 0.41466712951660156, "learning_rate": 8.127747765735259e-06, "loss": 0.049, "step": 10281 }, { "epoch": 1.72, "grad_norm": 0.5940467715263367, "learning_rate": 8.125974006226965e-06, "loss": 0.0645, "step": 10282 }, { "epoch": 1.72, "grad_norm": 0.49888020753860474, "learning_rate": 8.12420030782366e-06, "loss": 0.0363, "step": 10283 }, { "epoch": 1.72, "grad_norm": 0.43383532762527466, "learning_rate": 8.12242667058318e-06, "loss": 0.0489, "step": 10284 }, { "epoch": 1.72, "grad_norm": 0.38908836245536804, "learning_rate": 8.120653094563364e-06, "loss": 0.0491, "step": 10285 }, { "epoch": 1.72, "grad_norm": 0.6215780973434448, "learning_rate": 8.118879579822034e-06, "loss": 0.078, "step": 10286 }, { "epoch": 1.72, "grad_norm": 0.3331291973590851, "learning_rate": 8.117106126417018e-06, "loss": 0.0423, "step": 10287 }, { "epoch": 1.72, "grad_norm": 0.43798816204071045, "learning_rate": 8.11533273440615e-06, "loss": 0.0482, "step": 10288 }, { "epoch": 1.72, "grad_norm": 0.36444026231765747, "learning_rate": 8.113559403847241e-06, "loss": 0.0453, "step": 10289 }, { "epoch": 1.72, "grad_norm": 0.484896719455719, "learning_rate": 8.111786134798123e-06, "loss": 0.052, "step": 10290 }, { "epoch": 1.72, "grad_norm": 0.44036075472831726, "learning_rate": 8.110012927316612e-06, "loss": 0.0616, "step": 10291 }, { "epoch": 1.72, "grad_norm": 0.5995274186134338, "learning_rate": 8.108239781460523e-06, "loss": 0.062, "step": 10292 }, { "epoch": 1.72, "grad_norm": 0.5034188032150269, "learning_rate": 8.106466697287678e-06, "loss": 0.058, "step": 10293 }, { "epoch": 1.72, "grad_norm": 0.5472462177276611, "learning_rate": 8.104693674855884e-06, "loss": 0.0705, "step": 10294 }, { "epoch": 1.72, "grad_norm": 0.4854200780391693, "learning_rate": 8.102920714222955e-06, "loss": 0.0521, "step": 10295 }, { "epoch": 1.72, "grad_norm": 0.3508833944797516, "learning_rate": 8.101147815446705e-06, "loss": 0.0505, "step": 10296 }, { "epoch": 1.72, "grad_norm": 1.0309288501739502, "learning_rate": 8.099374978584938e-06, "loss": 0.0724, "step": 10297 }, { "epoch": 1.72, "grad_norm": 0.24377432465553284, "learning_rate": 8.097602203695455e-06, "loss": 0.027, "step": 10298 }, { "epoch": 1.72, "grad_norm": 0.4950641989707947, "learning_rate": 8.095829490836066e-06, "loss": 0.0548, "step": 10299 }, { "epoch": 1.72, "grad_norm": 0.42450636625289917, "learning_rate": 8.094056840064572e-06, "loss": 0.0412, "step": 10300 }, { "epoch": 1.72, "grad_norm": 0.37416040897369385, "learning_rate": 8.092284251438767e-06, "loss": 0.0428, "step": 10301 }, { "epoch": 1.72, "grad_norm": 0.4367883503437042, "learning_rate": 8.090511725016459e-06, "loss": 0.0675, "step": 10302 }, { "epoch": 1.72, "grad_norm": 0.2819461226463318, "learning_rate": 8.08873926085543e-06, "loss": 0.0403, "step": 10303 }, { "epoch": 1.72, "grad_norm": 0.4898485541343689, "learning_rate": 8.086966859013486e-06, "loss": 0.0426, "step": 10304 }, { "epoch": 1.72, "grad_norm": 0.3355306088924408, "learning_rate": 8.085194519548411e-06, "loss": 0.0468, "step": 10305 }, { "epoch": 1.72, "grad_norm": 0.40258947014808655, "learning_rate": 8.083422242517996e-06, "loss": 0.0447, "step": 10306 }, { "epoch": 1.72, "grad_norm": 0.26121020317077637, "learning_rate": 8.081650027980028e-06, "loss": 0.0276, "step": 10307 }, { "epoch": 1.72, "grad_norm": 0.5393244028091431, "learning_rate": 8.079877875992295e-06, "loss": 0.0737, "step": 10308 }, { "epoch": 1.72, "grad_norm": 0.5181951522827148, "learning_rate": 8.078105786612576e-06, "loss": 0.0679, "step": 10309 }, { "epoch": 1.72, "grad_norm": 0.4237247705459595, "learning_rate": 8.076333759898656e-06, "loss": 0.053, "step": 10310 }, { "epoch": 1.72, "grad_norm": 0.40286174416542053, "learning_rate": 8.074561795908313e-06, "loss": 0.06, "step": 10311 }, { "epoch": 1.72, "grad_norm": 0.5143263339996338, "learning_rate": 8.07278989469932e-06, "loss": 0.0514, "step": 10312 }, { "epoch": 1.72, "grad_norm": 0.41615843772888184, "learning_rate": 8.07101805632946e-06, "loss": 0.0491, "step": 10313 }, { "epoch": 1.73, "grad_norm": 0.3353087306022644, "learning_rate": 8.0692462808565e-06, "loss": 0.0547, "step": 10314 }, { "epoch": 1.73, "grad_norm": 0.45040804147720337, "learning_rate": 8.067474568338212e-06, "loss": 0.0537, "step": 10315 }, { "epoch": 1.73, "grad_norm": 0.5017960667610168, "learning_rate": 8.065702918832368e-06, "loss": 0.0349, "step": 10316 }, { "epoch": 1.73, "grad_norm": 0.5760892629623413, "learning_rate": 8.063931332396729e-06, "loss": 0.059, "step": 10317 }, { "epoch": 1.73, "grad_norm": 0.3405350148677826, "learning_rate": 8.062159809089068e-06, "loss": 0.0468, "step": 10318 }, { "epoch": 1.73, "grad_norm": 0.44780316948890686, "learning_rate": 8.060388348967143e-06, "loss": 0.0438, "step": 10319 }, { "epoch": 1.73, "grad_norm": 0.45121100544929504, "learning_rate": 8.058616952088709e-06, "loss": 0.0537, "step": 10320 }, { "epoch": 1.73, "grad_norm": 0.4719744026660919, "learning_rate": 8.056845618511537e-06, "loss": 0.0607, "step": 10321 }, { "epoch": 1.73, "grad_norm": 0.5205404758453369, "learning_rate": 8.055074348293374e-06, "loss": 0.0502, "step": 10322 }, { "epoch": 1.73, "grad_norm": 0.5281650424003601, "learning_rate": 8.053303141491978e-06, "loss": 0.0458, "step": 10323 }, { "epoch": 1.73, "grad_norm": 0.8394023180007935, "learning_rate": 8.051531998165103e-06, "loss": 0.0705, "step": 10324 }, { "epoch": 1.73, "grad_norm": 0.5855293273925781, "learning_rate": 8.049760918370498e-06, "loss": 0.0477, "step": 10325 }, { "epoch": 1.73, "grad_norm": 0.3975174129009247, "learning_rate": 8.047989902165907e-06, "loss": 0.0507, "step": 10326 }, { "epoch": 1.73, "grad_norm": 0.5318471789360046, "learning_rate": 8.046218949609081e-06, "loss": 0.0637, "step": 10327 }, { "epoch": 1.73, "grad_norm": 0.40955159068107605, "learning_rate": 8.044448060757767e-06, "loss": 0.0339, "step": 10328 }, { "epoch": 1.73, "grad_norm": 0.4554043412208557, "learning_rate": 8.042677235669698e-06, "loss": 0.042, "step": 10329 }, { "epoch": 1.73, "grad_norm": 0.34166231751441956, "learning_rate": 8.040906474402623e-06, "loss": 0.041, "step": 10330 }, { "epoch": 1.73, "grad_norm": 0.43060940504074097, "learning_rate": 8.039135777014273e-06, "loss": 0.0569, "step": 10331 }, { "epoch": 1.73, "grad_norm": 0.5423146486282349, "learning_rate": 8.037365143562387e-06, "loss": 0.0672, "step": 10332 }, { "epoch": 1.73, "grad_norm": 0.2828545868396759, "learning_rate": 8.035594574104703e-06, "loss": 0.0312, "step": 10333 }, { "epoch": 1.73, "grad_norm": 0.45152318477630615, "learning_rate": 8.033824068698944e-06, "loss": 0.0514, "step": 10334 }, { "epoch": 1.73, "grad_norm": 0.42446839809417725, "learning_rate": 8.032053627402846e-06, "loss": 0.0511, "step": 10335 }, { "epoch": 1.73, "grad_norm": 0.5292583107948303, "learning_rate": 8.030283250274136e-06, "loss": 0.0464, "step": 10336 }, { "epoch": 1.73, "grad_norm": 0.46974819898605347, "learning_rate": 8.028512937370535e-06, "loss": 0.0314, "step": 10337 }, { "epoch": 1.73, "grad_norm": 0.30206239223480225, "learning_rate": 8.026742688749771e-06, "loss": 0.0423, "step": 10338 }, { "epoch": 1.73, "grad_norm": 0.34712105989456177, "learning_rate": 8.024972504469562e-06, "loss": 0.0349, "step": 10339 }, { "epoch": 1.73, "grad_norm": 0.3977091312408447, "learning_rate": 8.023202384587627e-06, "loss": 0.0434, "step": 10340 }, { "epoch": 1.73, "grad_norm": 0.5758650898933411, "learning_rate": 8.021432329161689e-06, "loss": 0.0552, "step": 10341 }, { "epoch": 1.73, "grad_norm": 0.3913036584854126, "learning_rate": 8.019662338249455e-06, "loss": 0.0568, "step": 10342 }, { "epoch": 1.73, "grad_norm": 0.42013946175575256, "learning_rate": 8.01789241190864e-06, "loss": 0.0481, "step": 10343 }, { "epoch": 1.73, "grad_norm": 0.4182680547237396, "learning_rate": 8.016122550196959e-06, "loss": 0.0573, "step": 10344 }, { "epoch": 1.73, "grad_norm": 0.38422730565071106, "learning_rate": 8.014352753172114e-06, "loss": 0.0428, "step": 10345 }, { "epoch": 1.73, "grad_norm": 0.4330906271934509, "learning_rate": 8.01258302089182e-06, "loss": 0.0465, "step": 10346 }, { "epoch": 1.73, "grad_norm": 0.36360952258110046, "learning_rate": 8.010813353413772e-06, "loss": 0.0527, "step": 10347 }, { "epoch": 1.73, "grad_norm": 0.5059096217155457, "learning_rate": 8.009043750795676e-06, "loss": 0.0481, "step": 10348 }, { "epoch": 1.73, "grad_norm": 0.4229678511619568, "learning_rate": 8.007274213095237e-06, "loss": 0.0417, "step": 10349 }, { "epoch": 1.73, "grad_norm": 0.35248491168022156, "learning_rate": 8.005504740370145e-06, "loss": 0.0591, "step": 10350 }, { "epoch": 1.73, "grad_norm": 0.4427417516708374, "learning_rate": 8.0037353326781e-06, "loss": 0.0517, "step": 10351 }, { "epoch": 1.73, "grad_norm": 0.43029218912124634, "learning_rate": 8.001965990076797e-06, "loss": 0.0637, "step": 10352 }, { "epoch": 1.73, "grad_norm": 0.46257269382476807, "learning_rate": 8.000196712623927e-06, "loss": 0.0576, "step": 10353 }, { "epoch": 1.73, "grad_norm": 0.37852346897125244, "learning_rate": 7.998427500377173e-06, "loss": 0.0436, "step": 10354 }, { "epoch": 1.73, "grad_norm": 0.32959285378456116, "learning_rate": 7.996658353394232e-06, "loss": 0.0376, "step": 10355 }, { "epoch": 1.73, "grad_norm": 0.3872871696949005, "learning_rate": 7.994889271732788e-06, "loss": 0.044, "step": 10356 }, { "epoch": 1.73, "grad_norm": 0.3117964565753937, "learning_rate": 7.993120255450515e-06, "loss": 0.0384, "step": 10357 }, { "epoch": 1.73, "grad_norm": 0.453762948513031, "learning_rate": 7.991351304605107e-06, "loss": 0.0333, "step": 10358 }, { "epoch": 1.73, "grad_norm": 0.3580106794834137, "learning_rate": 7.989582419254229e-06, "loss": 0.0446, "step": 10359 }, { "epoch": 1.73, "grad_norm": 0.43843284249305725, "learning_rate": 7.98781359945557e-06, "loss": 0.0653, "step": 10360 }, { "epoch": 1.73, "grad_norm": 0.3752492368221283, "learning_rate": 7.986044845266799e-06, "loss": 0.0491, "step": 10361 }, { "epoch": 1.73, "grad_norm": 0.31371042132377625, "learning_rate": 7.984276156745587e-06, "loss": 0.0444, "step": 10362 }, { "epoch": 1.73, "grad_norm": 0.5151777267456055, "learning_rate": 7.982507533949606e-06, "loss": 0.0494, "step": 10363 }, { "epoch": 1.73, "grad_norm": 0.49080681800842285, "learning_rate": 7.980738976936531e-06, "loss": 0.0489, "step": 10364 }, { "epoch": 1.73, "grad_norm": 0.6783578395843506, "learning_rate": 7.978970485764014e-06, "loss": 0.0554, "step": 10365 }, { "epoch": 1.73, "grad_norm": 0.4482005834579468, "learning_rate": 7.977202060489732e-06, "loss": 0.0333, "step": 10366 }, { "epoch": 1.73, "grad_norm": 0.4491110146045685, "learning_rate": 7.97543370117134e-06, "loss": 0.0724, "step": 10367 }, { "epoch": 1.73, "grad_norm": 0.37470564246177673, "learning_rate": 7.973665407866495e-06, "loss": 0.0492, "step": 10368 }, { "epoch": 1.73, "grad_norm": 0.4033392369747162, "learning_rate": 7.971897180632863e-06, "loss": 0.0534, "step": 10369 }, { "epoch": 1.73, "grad_norm": 0.33480674028396606, "learning_rate": 7.970129019528095e-06, "loss": 0.0586, "step": 10370 }, { "epoch": 1.73, "grad_norm": 0.3884492516517639, "learning_rate": 7.968360924609839e-06, "loss": 0.0463, "step": 10371 }, { "epoch": 1.73, "grad_norm": 0.3966585099697113, "learning_rate": 7.966592895935755e-06, "loss": 0.0553, "step": 10372 }, { "epoch": 1.73, "grad_norm": 0.44911864399909973, "learning_rate": 7.964824933563485e-06, "loss": 0.0448, "step": 10373 }, { "epoch": 1.74, "grad_norm": 0.456033855676651, "learning_rate": 7.96305703755068e-06, "loss": 0.0499, "step": 10374 }, { "epoch": 1.74, "grad_norm": 0.44596022367477417, "learning_rate": 7.961289207954984e-06, "loss": 0.0512, "step": 10375 }, { "epoch": 1.74, "grad_norm": 0.3371656835079193, "learning_rate": 7.959521444834034e-06, "loss": 0.0611, "step": 10376 }, { "epoch": 1.74, "grad_norm": 0.4272902011871338, "learning_rate": 7.957753748245478e-06, "loss": 0.0432, "step": 10377 }, { "epoch": 1.74, "grad_norm": 0.4791509211063385, "learning_rate": 7.95598611824695e-06, "loss": 0.0602, "step": 10378 }, { "epoch": 1.74, "grad_norm": 0.322639524936676, "learning_rate": 7.954218554896083e-06, "loss": 0.0454, "step": 10379 }, { "epoch": 1.74, "grad_norm": 0.5223100781440735, "learning_rate": 7.952451058250517e-06, "loss": 0.0588, "step": 10380 }, { "epoch": 1.74, "grad_norm": 0.3791556656360626, "learning_rate": 7.95068362836788e-06, "loss": 0.037, "step": 10381 }, { "epoch": 1.74, "grad_norm": 0.45074892044067383, "learning_rate": 7.948916265305798e-06, "loss": 0.0485, "step": 10382 }, { "epoch": 1.74, "grad_norm": 0.3452140986919403, "learning_rate": 7.947148969121903e-06, "loss": 0.0495, "step": 10383 }, { "epoch": 1.74, "grad_norm": 0.7200257778167725, "learning_rate": 7.945381739873821e-06, "loss": 0.0543, "step": 10384 }, { "epoch": 1.74, "grad_norm": 0.4005977213382721, "learning_rate": 7.943614577619168e-06, "loss": 0.0485, "step": 10385 }, { "epoch": 1.74, "grad_norm": 0.38915011286735535, "learning_rate": 7.941847482415573e-06, "loss": 0.0446, "step": 10386 }, { "epoch": 1.74, "grad_norm": 0.3862038552761078, "learning_rate": 7.940080454320647e-06, "loss": 0.0365, "step": 10387 }, { "epoch": 1.74, "grad_norm": 0.4721531569957733, "learning_rate": 7.93831349339201e-06, "loss": 0.0497, "step": 10388 }, { "epoch": 1.74, "grad_norm": 0.48877981305122375, "learning_rate": 7.936546599687279e-06, "loss": 0.0363, "step": 10389 }, { "epoch": 1.74, "grad_norm": 0.35441645979881287, "learning_rate": 7.934779773264057e-06, "loss": 0.0448, "step": 10390 }, { "epoch": 1.74, "grad_norm": 0.3275962471961975, "learning_rate": 7.93301301417996e-06, "loss": 0.0402, "step": 10391 }, { "epoch": 1.74, "grad_norm": 0.2915169894695282, "learning_rate": 7.931246322492598e-06, "loss": 0.0364, "step": 10392 }, { "epoch": 1.74, "grad_norm": 0.5306097269058228, "learning_rate": 7.929479698259566e-06, "loss": 0.0579, "step": 10393 }, { "epoch": 1.74, "grad_norm": 0.4720141589641571, "learning_rate": 7.92771314153848e-06, "loss": 0.0627, "step": 10394 }, { "epoch": 1.74, "grad_norm": 0.3919355869293213, "learning_rate": 7.925946652386931e-06, "loss": 0.0457, "step": 10395 }, { "epoch": 1.74, "grad_norm": 0.41579335927963257, "learning_rate": 7.924180230862517e-06, "loss": 0.0512, "step": 10396 }, { "epoch": 1.74, "grad_norm": 0.4603945314884186, "learning_rate": 7.922413877022846e-06, "loss": 0.0429, "step": 10397 }, { "epoch": 1.74, "grad_norm": 0.46312960982322693, "learning_rate": 7.9206475909255e-06, "loss": 0.0514, "step": 10398 }, { "epoch": 1.74, "grad_norm": 0.32336360216140747, "learning_rate": 7.918881372628075e-06, "loss": 0.0534, "step": 10399 }, { "epoch": 1.74, "grad_norm": 0.3577274978160858, "learning_rate": 7.917115222188165e-06, "loss": 0.0479, "step": 10400 }, { "epoch": 1.74, "grad_norm": 0.3350439667701721, "learning_rate": 7.91534913966335e-06, "loss": 0.0342, "step": 10401 }, { "epoch": 1.74, "grad_norm": 0.40977931022644043, "learning_rate": 7.913583125111223e-06, "loss": 0.0639, "step": 10402 }, { "epoch": 1.74, "grad_norm": 0.4500278830528259, "learning_rate": 7.911817178589361e-06, "loss": 0.043, "step": 10403 }, { "epoch": 1.74, "grad_norm": 0.35112687945365906, "learning_rate": 7.910051300155344e-06, "loss": 0.0497, "step": 10404 }, { "epoch": 1.74, "grad_norm": 0.37655991315841675, "learning_rate": 7.90828548986676e-06, "loss": 0.0401, "step": 10405 }, { "epoch": 1.74, "grad_norm": 0.4799419939517975, "learning_rate": 7.906519747781179e-06, "loss": 0.0545, "step": 10406 }, { "epoch": 1.74, "grad_norm": 0.38543349504470825, "learning_rate": 7.904754073956173e-06, "loss": 0.0385, "step": 10407 }, { "epoch": 1.74, "grad_norm": 0.3450314700603485, "learning_rate": 7.902988468449316e-06, "loss": 0.0447, "step": 10408 }, { "epoch": 1.74, "grad_norm": 0.49464333057403564, "learning_rate": 7.901222931318182e-06, "loss": 0.0486, "step": 10409 }, { "epoch": 1.74, "grad_norm": 0.4724293649196625, "learning_rate": 7.899457462620333e-06, "loss": 0.0557, "step": 10410 }, { "epoch": 1.74, "grad_norm": 0.473270446062088, "learning_rate": 7.897692062413335e-06, "loss": 0.0588, "step": 10411 }, { "epoch": 1.74, "grad_norm": 0.3525463342666626, "learning_rate": 7.895926730754757e-06, "loss": 0.0437, "step": 10412 }, { "epoch": 1.74, "grad_norm": 0.3653983175754547, "learning_rate": 7.89416146770215e-06, "loss": 0.0411, "step": 10413 }, { "epoch": 1.74, "grad_norm": 0.37933194637298584, "learning_rate": 7.892396273313083e-06, "loss": 0.0538, "step": 10414 }, { "epoch": 1.74, "grad_norm": 0.43292921781539917, "learning_rate": 7.890631147645105e-06, "loss": 0.0507, "step": 10415 }, { "epoch": 1.74, "grad_norm": 0.5592200756072998, "learning_rate": 7.888866090755772e-06, "loss": 0.0535, "step": 10416 }, { "epoch": 1.74, "grad_norm": 0.3907563090324402, "learning_rate": 7.88710110270264e-06, "loss": 0.0439, "step": 10417 }, { "epoch": 1.74, "grad_norm": 0.3412497043609619, "learning_rate": 7.88533618354325e-06, "loss": 0.0484, "step": 10418 }, { "epoch": 1.74, "grad_norm": 0.42808955907821655, "learning_rate": 7.883571333335159e-06, "loss": 0.04, "step": 10419 }, { "epoch": 1.74, "grad_norm": 0.4364855885505676, "learning_rate": 7.881806552135909e-06, "loss": 0.038, "step": 10420 }, { "epoch": 1.74, "grad_norm": 0.5121949911117554, "learning_rate": 7.880041840003038e-06, "loss": 0.0463, "step": 10421 }, { "epoch": 1.74, "grad_norm": 0.3825794756412506, "learning_rate": 7.878277196994093e-06, "loss": 0.0511, "step": 10422 }, { "epoch": 1.74, "grad_norm": 0.4590268135070801, "learning_rate": 7.87651262316661e-06, "loss": 0.0508, "step": 10423 }, { "epoch": 1.74, "grad_norm": 0.4770374298095703, "learning_rate": 7.874748118578122e-06, "loss": 0.0649, "step": 10424 }, { "epoch": 1.74, "grad_norm": 0.412221223115921, "learning_rate": 7.87298368328617e-06, "loss": 0.0358, "step": 10425 }, { "epoch": 1.74, "grad_norm": 0.525238573551178, "learning_rate": 7.871219317348279e-06, "loss": 0.0505, "step": 10426 }, { "epoch": 1.74, "grad_norm": 0.8724735379219055, "learning_rate": 7.869455020821979e-06, "loss": 0.0613, "step": 10427 }, { "epoch": 1.74, "grad_norm": 0.35104233026504517, "learning_rate": 7.867690793764804e-06, "loss": 0.0518, "step": 10428 }, { "epoch": 1.74, "grad_norm": 0.5254251956939697, "learning_rate": 7.86592663623427e-06, "loss": 0.058, "step": 10429 }, { "epoch": 1.74, "grad_norm": 0.5133519768714905, "learning_rate": 7.864162548287907e-06, "loss": 0.0541, "step": 10430 }, { "epoch": 1.74, "grad_norm": 0.349729061126709, "learning_rate": 7.862398529983232e-06, "loss": 0.0442, "step": 10431 }, { "epoch": 1.74, "grad_norm": 0.49448102712631226, "learning_rate": 7.860634581377759e-06, "loss": 0.0457, "step": 10432 }, { "epoch": 1.74, "grad_norm": 0.35128703713417053, "learning_rate": 7.858870702529011e-06, "loss": 0.0397, "step": 10433 }, { "epoch": 1.75, "grad_norm": 0.3842068314552307, "learning_rate": 7.857106893494501e-06, "loss": 0.0514, "step": 10434 }, { "epoch": 1.75, "grad_norm": 0.4804052710533142, "learning_rate": 7.85534315433173e-06, "loss": 0.0739, "step": 10435 }, { "epoch": 1.75, "grad_norm": 0.2819896340370178, "learning_rate": 7.85357948509822e-06, "loss": 0.0333, "step": 10436 }, { "epoch": 1.75, "grad_norm": 0.4288022220134735, "learning_rate": 7.851815885851472e-06, "loss": 0.06, "step": 10437 }, { "epoch": 1.75, "grad_norm": 0.6596407294273376, "learning_rate": 7.850052356648987e-06, "loss": 0.0553, "step": 10438 }, { "epoch": 1.75, "grad_norm": 0.6931980848312378, "learning_rate": 7.848288897548272e-06, "loss": 0.0582, "step": 10439 }, { "epoch": 1.75, "grad_norm": 0.4173409938812256, "learning_rate": 7.846525508606829e-06, "loss": 0.0668, "step": 10440 }, { "epoch": 1.75, "grad_norm": 0.4402567446231842, "learning_rate": 7.844762189882146e-06, "loss": 0.0625, "step": 10441 }, { "epoch": 1.75, "grad_norm": 0.2993941903114319, "learning_rate": 7.84299894143173e-06, "loss": 0.0305, "step": 10442 }, { "epoch": 1.75, "grad_norm": 0.4210607707500458, "learning_rate": 7.841235763313064e-06, "loss": 0.0628, "step": 10443 }, { "epoch": 1.75, "grad_norm": 0.49558204412460327, "learning_rate": 7.839472655583643e-06, "loss": 0.0542, "step": 10444 }, { "epoch": 1.75, "grad_norm": 0.4395133852958679, "learning_rate": 7.837709618300959e-06, "loss": 0.0638, "step": 10445 }, { "epoch": 1.75, "grad_norm": 0.3920595347881317, "learning_rate": 7.83594665152249e-06, "loss": 0.0518, "step": 10446 }, { "epoch": 1.75, "grad_norm": 0.32809749245643616, "learning_rate": 7.834183755305727e-06, "loss": 0.046, "step": 10447 }, { "epoch": 1.75, "grad_norm": 0.5122544765472412, "learning_rate": 7.832420929708148e-06, "loss": 0.0838, "step": 10448 }, { "epoch": 1.75, "grad_norm": 0.3593229055404663, "learning_rate": 7.83065817478723e-06, "loss": 0.0357, "step": 10449 }, { "epoch": 1.75, "grad_norm": 0.4788842797279358, "learning_rate": 7.828895490600457e-06, "loss": 0.061, "step": 10450 }, { "epoch": 1.75, "grad_norm": 0.38786202669143677, "learning_rate": 7.827132877205297e-06, "loss": 0.0451, "step": 10451 }, { "epoch": 1.75, "grad_norm": 0.3352580666542053, "learning_rate": 7.825370334659222e-06, "loss": 0.0476, "step": 10452 }, { "epoch": 1.75, "grad_norm": 0.42114728689193726, "learning_rate": 7.823607863019706e-06, "loss": 0.0438, "step": 10453 }, { "epoch": 1.75, "grad_norm": 0.4944397211074829, "learning_rate": 7.821845462344215e-06, "loss": 0.0484, "step": 10454 }, { "epoch": 1.75, "grad_norm": 0.4155397415161133, "learning_rate": 7.820083132690213e-06, "loss": 0.0577, "step": 10455 }, { "epoch": 1.75, "grad_norm": 0.3520107865333557, "learning_rate": 7.818320874115168e-06, "loss": 0.0363, "step": 10456 }, { "epoch": 1.75, "grad_norm": 0.43264874815940857, "learning_rate": 7.816558686676531e-06, "loss": 0.0524, "step": 10457 }, { "epoch": 1.75, "grad_norm": 0.40406474471092224, "learning_rate": 7.814796570431773e-06, "loss": 0.0563, "step": 10458 }, { "epoch": 1.75, "grad_norm": 0.4991839826107025, "learning_rate": 7.813034525438338e-06, "loss": 0.056, "step": 10459 }, { "epoch": 1.75, "grad_norm": 0.49485647678375244, "learning_rate": 7.811272551753683e-06, "loss": 0.047, "step": 10460 }, { "epoch": 1.75, "grad_norm": 0.39467135071754456, "learning_rate": 7.809510649435268e-06, "loss": 0.0416, "step": 10461 }, { "epoch": 1.75, "grad_norm": 0.3589365482330322, "learning_rate": 7.807748818540535e-06, "loss": 0.0406, "step": 10462 }, { "epoch": 1.75, "grad_norm": 0.30201566219329834, "learning_rate": 7.805987059126925e-06, "loss": 0.055, "step": 10463 }, { "epoch": 1.75, "grad_norm": 0.3097573518753052, "learning_rate": 7.804225371251892e-06, "loss": 0.0418, "step": 10464 }, { "epoch": 1.75, "grad_norm": 0.44646143913269043, "learning_rate": 7.802463754972877e-06, "loss": 0.0396, "step": 10465 }, { "epoch": 1.75, "grad_norm": 0.39863288402557373, "learning_rate": 7.800702210347312e-06, "loss": 0.0456, "step": 10466 }, { "epoch": 1.75, "grad_norm": 0.5385872721672058, "learning_rate": 7.798940737432644e-06, "loss": 0.0575, "step": 10467 }, { "epoch": 1.75, "grad_norm": 0.397213876247406, "learning_rate": 7.797179336286303e-06, "loss": 0.0544, "step": 10468 }, { "epoch": 1.75, "grad_norm": 0.4666720926761627, "learning_rate": 7.795418006965719e-06, "loss": 0.0647, "step": 10469 }, { "epoch": 1.75, "grad_norm": 0.44184520840644836, "learning_rate": 7.793656749528331e-06, "loss": 0.0437, "step": 10470 }, { "epoch": 1.75, "grad_norm": 0.48613792657852173, "learning_rate": 7.791895564031557e-06, "loss": 0.0559, "step": 10471 }, { "epoch": 1.75, "grad_norm": 0.3820873498916626, "learning_rate": 7.79013445053283e-06, "loss": 0.055, "step": 10472 }, { "epoch": 1.75, "grad_norm": 0.38950109481811523, "learning_rate": 7.788373409089574e-06, "loss": 0.0621, "step": 10473 }, { "epoch": 1.75, "grad_norm": 0.4053553640842438, "learning_rate": 7.7866124397592e-06, "loss": 0.0391, "step": 10474 }, { "epoch": 1.75, "grad_norm": 0.44727030396461487, "learning_rate": 7.784851542599138e-06, "loss": 0.0761, "step": 10475 }, { "epoch": 1.75, "grad_norm": 0.5391811728477478, "learning_rate": 7.7830907176668e-06, "loss": 0.051, "step": 10476 }, { "epoch": 1.75, "grad_norm": 0.5376538634300232, "learning_rate": 7.781329965019597e-06, "loss": 0.0613, "step": 10477 }, { "epoch": 1.75, "grad_norm": 0.4185079336166382, "learning_rate": 7.77956928471495e-06, "loss": 0.0408, "step": 10478 }, { "epoch": 1.75, "grad_norm": 0.47571220993995667, "learning_rate": 7.777808676810254e-06, "loss": 0.0472, "step": 10479 }, { "epoch": 1.75, "grad_norm": 0.46741241216659546, "learning_rate": 7.776048141362925e-06, "loss": 0.0624, "step": 10480 }, { "epoch": 1.75, "grad_norm": 0.5463297367095947, "learning_rate": 7.774287678430369e-06, "loss": 0.0569, "step": 10481 }, { "epoch": 1.75, "grad_norm": 0.5917584300041199, "learning_rate": 7.772527288069982e-06, "loss": 0.06, "step": 10482 }, { "epoch": 1.75, "grad_norm": 0.4034130573272705, "learning_rate": 7.77076697033917e-06, "loss": 0.0474, "step": 10483 }, { "epoch": 1.75, "grad_norm": 0.5443742871284485, "learning_rate": 7.769006725295323e-06, "loss": 0.0656, "step": 10484 }, { "epoch": 1.75, "grad_norm": 0.42989206314086914, "learning_rate": 7.76724655299584e-06, "loss": 0.0384, "step": 10485 }, { "epoch": 1.75, "grad_norm": 0.5442795157432556, "learning_rate": 7.765486453498118e-06, "loss": 0.0736, "step": 10486 }, { "epoch": 1.75, "grad_norm": 0.4175681471824646, "learning_rate": 7.763726426859541e-06, "loss": 0.0637, "step": 10487 }, { "epoch": 1.75, "grad_norm": 0.4131995737552643, "learning_rate": 7.761966473137498e-06, "loss": 0.0537, "step": 10488 }, { "epoch": 1.75, "grad_norm": 0.7425475120544434, "learning_rate": 7.760206592389379e-06, "loss": 0.0545, "step": 10489 }, { "epoch": 1.75, "grad_norm": 0.5663549900054932, "learning_rate": 7.758446784672563e-06, "loss": 0.0824, "step": 10490 }, { "epoch": 1.75, "grad_norm": 0.453795462846756, "learning_rate": 7.756687050044429e-06, "loss": 0.0403, "step": 10491 }, { "epoch": 1.75, "grad_norm": 0.34251630306243896, "learning_rate": 7.754927388562356e-06, "loss": 0.0481, "step": 10492 }, { "epoch": 1.75, "grad_norm": 0.372079074382782, "learning_rate": 7.753167800283727e-06, "loss": 0.0542, "step": 10493 }, { "epoch": 1.76, "grad_norm": 0.3991876244544983, "learning_rate": 7.751408285265906e-06, "loss": 0.0465, "step": 10494 }, { "epoch": 1.76, "grad_norm": 0.6342317461967468, "learning_rate": 7.749648843566271e-06, "loss": 0.0503, "step": 10495 }, { "epoch": 1.76, "grad_norm": 0.3677560091018677, "learning_rate": 7.74788947524219e-06, "loss": 0.0588, "step": 10496 }, { "epoch": 1.76, "grad_norm": 0.5520308017730713, "learning_rate": 7.746130180351023e-06, "loss": 0.0581, "step": 10497 }, { "epoch": 1.76, "grad_norm": 0.38809555768966675, "learning_rate": 7.744370958950143e-06, "loss": 0.0455, "step": 10498 }, { "epoch": 1.76, "grad_norm": 0.6022321581840515, "learning_rate": 7.742611811096905e-06, "loss": 0.055, "step": 10499 }, { "epoch": 1.76, "grad_norm": 0.3492460548877716, "learning_rate": 7.740852736848671e-06, "loss": 0.0365, "step": 10500 }, { "epoch": 1.76, "grad_norm": 0.8020095825195312, "learning_rate": 7.7390937362628e-06, "loss": 0.0578, "step": 10501 }, { "epoch": 1.76, "grad_norm": 0.4496079683303833, "learning_rate": 7.737334809396643e-06, "loss": 0.0619, "step": 10502 }, { "epoch": 1.76, "grad_norm": 0.49612507224082947, "learning_rate": 7.735575956307552e-06, "loss": 0.0414, "step": 10503 }, { "epoch": 1.76, "grad_norm": 0.3651714026927948, "learning_rate": 7.733817177052881e-06, "loss": 0.0478, "step": 10504 }, { "epoch": 1.76, "grad_norm": 0.5163170099258423, "learning_rate": 7.73205847168997e-06, "loss": 0.07, "step": 10505 }, { "epoch": 1.76, "grad_norm": 0.47775909304618835, "learning_rate": 7.730299840276172e-06, "loss": 0.0457, "step": 10506 }, { "epoch": 1.76, "grad_norm": 0.42217710614204407, "learning_rate": 7.728541282868824e-06, "loss": 0.0462, "step": 10507 }, { "epoch": 1.76, "grad_norm": 0.5806643962860107, "learning_rate": 7.726782799525262e-06, "loss": 0.0881, "step": 10508 }, { "epoch": 1.76, "grad_norm": 0.376149982213974, "learning_rate": 7.725024390302835e-06, "loss": 0.0283, "step": 10509 }, { "epoch": 1.76, "grad_norm": 0.5571582317352295, "learning_rate": 7.72326605525887e-06, "loss": 0.0653, "step": 10510 }, { "epoch": 1.76, "grad_norm": 0.43094781041145325, "learning_rate": 7.721507794450702e-06, "loss": 0.053, "step": 10511 }, { "epoch": 1.76, "grad_norm": 0.41004034876823425, "learning_rate": 7.719749607935662e-06, "loss": 0.0396, "step": 10512 }, { "epoch": 1.76, "grad_norm": 0.566843569278717, "learning_rate": 7.717991495771073e-06, "loss": 0.049, "step": 10513 }, { "epoch": 1.76, "grad_norm": 0.4562298655509949, "learning_rate": 7.716233458014268e-06, "loss": 0.0613, "step": 10514 }, { "epoch": 1.76, "grad_norm": 0.42820483446121216, "learning_rate": 7.714475494722566e-06, "loss": 0.0597, "step": 10515 }, { "epoch": 1.76, "grad_norm": 0.42471975088119507, "learning_rate": 7.712717605953286e-06, "loss": 0.053, "step": 10516 }, { "epoch": 1.76, "grad_norm": 0.37735339999198914, "learning_rate": 7.710959791763752e-06, "loss": 0.0471, "step": 10517 }, { "epoch": 1.76, "grad_norm": 0.4178229570388794, "learning_rate": 7.709202052211275e-06, "loss": 0.0435, "step": 10518 }, { "epoch": 1.76, "grad_norm": 0.47129571437835693, "learning_rate": 7.707444387353166e-06, "loss": 0.0533, "step": 10519 }, { "epoch": 1.76, "grad_norm": 0.39613643288612366, "learning_rate": 7.705686797246742e-06, "loss": 0.0397, "step": 10520 }, { "epoch": 1.76, "grad_norm": 0.6055181622505188, "learning_rate": 7.703929281949311e-06, "loss": 0.0567, "step": 10521 }, { "epoch": 1.76, "grad_norm": 0.5482242107391357, "learning_rate": 7.702171841518171e-06, "loss": 0.0554, "step": 10522 }, { "epoch": 1.76, "grad_norm": 0.4571734368801117, "learning_rate": 7.700414476010634e-06, "loss": 0.0526, "step": 10523 }, { "epoch": 1.76, "grad_norm": 0.4946492612361908, "learning_rate": 7.698657185484002e-06, "loss": 0.039, "step": 10524 }, { "epoch": 1.76, "grad_norm": 0.5883460640907288, "learning_rate": 7.696899969995565e-06, "loss": 0.0696, "step": 10525 }, { "epoch": 1.76, "grad_norm": 0.5527397394180298, "learning_rate": 7.695142829602627e-06, "loss": 0.0606, "step": 10526 }, { "epoch": 1.76, "grad_norm": 0.4430240988731384, "learning_rate": 7.693385764362478e-06, "loss": 0.0657, "step": 10527 }, { "epoch": 1.76, "grad_norm": 0.3368101119995117, "learning_rate": 7.691628774332409e-06, "loss": 0.0391, "step": 10528 }, { "epoch": 1.76, "grad_norm": 0.5301900506019592, "learning_rate": 7.689871859569716e-06, "loss": 0.0427, "step": 10529 }, { "epoch": 1.76, "grad_norm": 0.23106805980205536, "learning_rate": 7.688115020131672e-06, "loss": 0.0276, "step": 10530 }, { "epoch": 1.76, "grad_norm": 0.5162220001220703, "learning_rate": 7.686358256075573e-06, "loss": 0.0498, "step": 10531 }, { "epoch": 1.76, "grad_norm": 0.3388768136501312, "learning_rate": 7.6846015674587e-06, "loss": 0.0508, "step": 10532 }, { "epoch": 1.76, "grad_norm": 0.3728204667568207, "learning_rate": 7.68284495433832e-06, "loss": 0.0461, "step": 10533 }, { "epoch": 1.76, "grad_norm": 0.38678690791130066, "learning_rate": 7.681088416771725e-06, "loss": 0.0408, "step": 10534 }, { "epoch": 1.76, "grad_norm": 0.5288563966751099, "learning_rate": 7.67933195481618e-06, "loss": 0.0523, "step": 10535 }, { "epoch": 1.76, "grad_norm": 0.37035319209098816, "learning_rate": 7.677575568528957e-06, "loss": 0.0505, "step": 10536 }, { "epoch": 1.76, "grad_norm": 0.6275476813316345, "learning_rate": 7.67581925796733e-06, "loss": 0.0598, "step": 10537 }, { "epoch": 1.76, "grad_norm": 0.3703543543815613, "learning_rate": 7.674063023188562e-06, "loss": 0.0427, "step": 10538 }, { "epoch": 1.76, "grad_norm": 0.43342283368110657, "learning_rate": 7.672306864249916e-06, "loss": 0.035, "step": 10539 }, { "epoch": 1.76, "grad_norm": 0.2880823314189911, "learning_rate": 7.670550781208657e-06, "loss": 0.0451, "step": 10540 }, { "epoch": 1.76, "grad_norm": 0.5215612053871155, "learning_rate": 7.668794774122039e-06, "loss": 0.0461, "step": 10541 }, { "epoch": 1.76, "grad_norm": 0.4630490839481354, "learning_rate": 7.66703884304733e-06, "loss": 0.0535, "step": 10542 }, { "epoch": 1.76, "grad_norm": 0.4086224436759949, "learning_rate": 7.665282988041773e-06, "loss": 0.0375, "step": 10543 }, { "epoch": 1.76, "grad_norm": 0.3435535430908203, "learning_rate": 7.663527209162624e-06, "loss": 0.0543, "step": 10544 }, { "epoch": 1.76, "grad_norm": 0.38883525133132935, "learning_rate": 7.661771506467134e-06, "loss": 0.0442, "step": 10545 }, { "epoch": 1.76, "grad_norm": 0.4248410761356354, "learning_rate": 7.660015880012552e-06, "loss": 0.0607, "step": 10546 }, { "epoch": 1.76, "grad_norm": 0.42518073320388794, "learning_rate": 7.658260329856112e-06, "loss": 0.031, "step": 10547 }, { "epoch": 1.76, "grad_norm": 0.3422352075576782, "learning_rate": 7.656504856055064e-06, "loss": 0.0449, "step": 10548 }, { "epoch": 1.76, "grad_norm": 0.49300503730773926, "learning_rate": 7.65474945866665e-06, "loss": 0.0612, "step": 10549 }, { "epoch": 1.76, "grad_norm": 0.33029353618621826, "learning_rate": 7.652994137748099e-06, "loss": 0.0432, "step": 10550 }, { "epoch": 1.76, "grad_norm": 0.5674318075180054, "learning_rate": 7.651238893356653e-06, "loss": 0.0606, "step": 10551 }, { "epoch": 1.76, "grad_norm": 0.42410728335380554, "learning_rate": 7.649483725549541e-06, "loss": 0.0505, "step": 10552 }, { "epoch": 1.77, "grad_norm": 0.3766670823097229, "learning_rate": 7.647728634383989e-06, "loss": 0.0471, "step": 10553 }, { "epoch": 1.77, "grad_norm": 0.3568820059299469, "learning_rate": 7.645973619917231e-06, "loss": 0.0496, "step": 10554 }, { "epoch": 1.77, "grad_norm": 0.47788187861442566, "learning_rate": 7.644218682206483e-06, "loss": 0.0511, "step": 10555 }, { "epoch": 1.77, "grad_norm": 0.4916534423828125, "learning_rate": 7.642463821308974e-06, "loss": 0.0564, "step": 10556 }, { "epoch": 1.77, "grad_norm": 0.36197829246520996, "learning_rate": 7.640709037281922e-06, "loss": 0.0407, "step": 10557 }, { "epoch": 1.77, "grad_norm": 0.4182039201259613, "learning_rate": 7.63895433018254e-06, "loss": 0.0432, "step": 10558 }, { "epoch": 1.77, "grad_norm": 0.37840384244918823, "learning_rate": 7.637199700068048e-06, "loss": 0.0391, "step": 10559 }, { "epoch": 1.77, "grad_norm": 0.6715785264968872, "learning_rate": 7.635445146995656e-06, "loss": 0.0361, "step": 10560 }, { "epoch": 1.77, "grad_norm": 0.27899301052093506, "learning_rate": 7.63369067102257e-06, "loss": 0.0479, "step": 10561 }, { "epoch": 1.77, "grad_norm": 0.3691193461418152, "learning_rate": 7.631936272206002e-06, "loss": 0.0402, "step": 10562 }, { "epoch": 1.77, "grad_norm": 0.5628451704978943, "learning_rate": 7.630181950603152e-06, "loss": 0.0527, "step": 10563 }, { "epoch": 1.77, "grad_norm": 0.4251108765602112, "learning_rate": 7.628427706271224e-06, "loss": 0.0586, "step": 10564 }, { "epoch": 1.77, "grad_norm": 0.3671960234642029, "learning_rate": 7.626673539267421e-06, "loss": 0.035, "step": 10565 }, { "epoch": 1.77, "grad_norm": 0.3303054869174957, "learning_rate": 7.624919449648935e-06, "loss": 0.0529, "step": 10566 }, { "epoch": 1.77, "grad_norm": 0.45192068815231323, "learning_rate": 7.623165437472958e-06, "loss": 0.0453, "step": 10567 }, { "epoch": 1.77, "grad_norm": 0.68699711561203, "learning_rate": 7.621411502796687e-06, "loss": 0.0547, "step": 10568 }, { "epoch": 1.77, "grad_norm": 0.33121752738952637, "learning_rate": 7.6196576456773075e-06, "loss": 0.0528, "step": 10569 }, { "epoch": 1.77, "grad_norm": 0.3338238000869751, "learning_rate": 7.617903866172013e-06, "loss": 0.0309, "step": 10570 }, { "epoch": 1.77, "grad_norm": 0.36402782797813416, "learning_rate": 7.61615016433798e-06, "loss": 0.0371, "step": 10571 }, { "epoch": 1.77, "grad_norm": 0.44553399085998535, "learning_rate": 7.614396540232392e-06, "loss": 0.0373, "step": 10572 }, { "epoch": 1.77, "grad_norm": 0.4226026237010956, "learning_rate": 7.6126429939124325e-06, "loss": 0.0608, "step": 10573 }, { "epoch": 1.77, "grad_norm": 0.3517332077026367, "learning_rate": 7.610889525435275e-06, "loss": 0.0441, "step": 10574 }, { "epoch": 1.77, "grad_norm": 0.42467084527015686, "learning_rate": 7.609136134858091e-06, "loss": 0.0629, "step": 10575 }, { "epoch": 1.77, "grad_norm": 0.44468969106674194, "learning_rate": 7.607382822238054e-06, "loss": 0.0571, "step": 10576 }, { "epoch": 1.77, "grad_norm": 0.3601744771003723, "learning_rate": 7.6056295876323376e-06, "loss": 0.0419, "step": 10577 }, { "epoch": 1.77, "grad_norm": 0.4546484351158142, "learning_rate": 7.603876431098098e-06, "loss": 0.0518, "step": 10578 }, { "epoch": 1.77, "grad_norm": 0.4653993248939514, "learning_rate": 7.602123352692508e-06, "loss": 0.0612, "step": 10579 }, { "epoch": 1.77, "grad_norm": 0.46859875321388245, "learning_rate": 7.600370352472729e-06, "loss": 0.0618, "step": 10580 }, { "epoch": 1.77, "grad_norm": 0.38179948925971985, "learning_rate": 7.598617430495913e-06, "loss": 0.0438, "step": 10581 }, { "epoch": 1.77, "grad_norm": 0.40675899386405945, "learning_rate": 7.596864586819223e-06, "loss": 0.0642, "step": 10582 }, { "epoch": 1.77, "grad_norm": 0.6835211515426636, "learning_rate": 7.595111821499806e-06, "loss": 0.0593, "step": 10583 }, { "epoch": 1.77, "grad_norm": 0.9653587341308594, "learning_rate": 7.59335913459482e-06, "loss": 0.0665, "step": 10584 }, { "epoch": 1.77, "grad_norm": 0.5110663771629333, "learning_rate": 7.591606526161413e-06, "loss": 0.0692, "step": 10585 }, { "epoch": 1.77, "grad_norm": 0.365583598613739, "learning_rate": 7.589853996256722e-06, "loss": 0.0521, "step": 10586 }, { "epoch": 1.77, "grad_norm": 0.3508915901184082, "learning_rate": 7.5881015449379e-06, "loss": 0.0357, "step": 10587 }, { "epoch": 1.77, "grad_norm": 0.36599746346473694, "learning_rate": 7.586349172262089e-06, "loss": 0.0526, "step": 10588 }, { "epoch": 1.77, "grad_norm": 0.4425981938838959, "learning_rate": 7.584596878286417e-06, "loss": 0.0449, "step": 10589 }, { "epoch": 1.77, "grad_norm": 0.4181814193725586, "learning_rate": 7.582844663068031e-06, "loss": 0.0392, "step": 10590 }, { "epoch": 1.77, "grad_norm": 0.4297732710838318, "learning_rate": 7.581092526664056e-06, "loss": 0.0498, "step": 10591 }, { "epoch": 1.77, "grad_norm": 0.40033701062202454, "learning_rate": 7.579340469131625e-06, "loss": 0.0451, "step": 10592 }, { "epoch": 1.77, "grad_norm": 0.5617408156394958, "learning_rate": 7.577588490527871e-06, "loss": 0.0662, "step": 10593 }, { "epoch": 1.77, "grad_norm": 0.38144662976264954, "learning_rate": 7.575836590909916e-06, "loss": 0.0456, "step": 10594 }, { "epoch": 1.77, "grad_norm": 0.6408211588859558, "learning_rate": 7.5740847703348775e-06, "loss": 0.0806, "step": 10595 }, { "epoch": 1.77, "grad_norm": 0.4235728979110718, "learning_rate": 7.572333028859883e-06, "loss": 0.042, "step": 10596 }, { "epoch": 1.77, "grad_norm": 0.3284604847431183, "learning_rate": 7.570581366542045e-06, "loss": 0.0387, "step": 10597 }, { "epoch": 1.77, "grad_norm": 0.3267419934272766, "learning_rate": 7.568829783438486e-06, "loss": 0.0422, "step": 10598 }, { "epoch": 1.77, "grad_norm": 0.4571381211280823, "learning_rate": 7.5670782796063126e-06, "loss": 0.0443, "step": 10599 }, { "epoch": 1.77, "grad_norm": 0.3921646177768707, "learning_rate": 7.565326855102634e-06, "loss": 0.0475, "step": 10600 }, { "epoch": 1.77, "grad_norm": 0.5485714077949524, "learning_rate": 7.563575509984564e-06, "loss": 0.0482, "step": 10601 }, { "epoch": 1.77, "grad_norm": 0.5337483882904053, "learning_rate": 7.561824244309204e-06, "loss": 0.0701, "step": 10602 }, { "epoch": 1.77, "grad_norm": 0.2786783277988434, "learning_rate": 7.56007305813365e-06, "loss": 0.0368, "step": 10603 }, { "epoch": 1.77, "grad_norm": 0.414950966835022, "learning_rate": 7.558321951515011e-06, "loss": 0.064, "step": 10604 }, { "epoch": 1.77, "grad_norm": 0.2847987711429596, "learning_rate": 7.556570924510381e-06, "loss": 0.0458, "step": 10605 }, { "epoch": 1.77, "grad_norm": 0.58151775598526, "learning_rate": 7.55481997717685e-06, "loss": 0.0658, "step": 10606 }, { "epoch": 1.77, "grad_norm": 0.31116172671318054, "learning_rate": 7.553069109571517e-06, "loss": 0.0503, "step": 10607 }, { "epoch": 1.77, "grad_norm": 0.35730496048927307, "learning_rate": 7.55131832175147e-06, "loss": 0.0482, "step": 10608 }, { "epoch": 1.77, "grad_norm": 0.5150800347328186, "learning_rate": 7.549567613773787e-06, "loss": 0.0594, "step": 10609 }, { "epoch": 1.77, "grad_norm": 0.42379525303840637, "learning_rate": 7.547816985695565e-06, "loss": 0.078, "step": 10610 }, { "epoch": 1.77, "grad_norm": 0.507623016834259, "learning_rate": 7.546066437573875e-06, "loss": 0.039, "step": 10611 }, { "epoch": 1.77, "grad_norm": 0.41148579120635986, "learning_rate": 7.544315969465801e-06, "loss": 0.0426, "step": 10612 }, { "epoch": 1.78, "grad_norm": 0.5932959318161011, "learning_rate": 7.542565581428421e-06, "loss": 0.0562, "step": 10613 }, { "epoch": 1.78, "grad_norm": 0.6266580820083618, "learning_rate": 7.5408152735188e-06, "loss": 0.0559, "step": 10614 }, { "epoch": 1.78, "grad_norm": 0.4504392147064209, "learning_rate": 7.539065045794021e-06, "loss": 0.0527, "step": 10615 }, { "epoch": 1.78, "grad_norm": 0.3956909775733948, "learning_rate": 7.537314898311142e-06, "loss": 0.0603, "step": 10616 }, { "epoch": 1.78, "grad_norm": 0.41949304938316345, "learning_rate": 7.535564831127232e-06, "loss": 0.045, "step": 10617 }, { "epoch": 1.78, "grad_norm": 0.39862504601478577, "learning_rate": 7.533814844299358e-06, "loss": 0.0449, "step": 10618 }, { "epoch": 1.78, "grad_norm": 0.5033011436462402, "learning_rate": 7.532064937884578e-06, "loss": 0.0595, "step": 10619 }, { "epoch": 1.78, "grad_norm": 0.4465441405773163, "learning_rate": 7.5303151119399465e-06, "loss": 0.0429, "step": 10620 }, { "epoch": 1.78, "grad_norm": 0.45185723900794983, "learning_rate": 7.5285653665225255e-06, "loss": 0.0625, "step": 10621 }, { "epoch": 1.78, "grad_norm": 0.41454458236694336, "learning_rate": 7.526815701689364e-06, "loss": 0.0464, "step": 10622 }, { "epoch": 1.78, "grad_norm": 0.3720284104347229, "learning_rate": 7.525066117497508e-06, "loss": 0.0451, "step": 10623 }, { "epoch": 1.78, "grad_norm": 0.5976992249488831, "learning_rate": 7.5233166140040115e-06, "loss": 0.05, "step": 10624 }, { "epoch": 1.78, "grad_norm": 0.3493548631668091, "learning_rate": 7.5215671912659135e-06, "loss": 0.0602, "step": 10625 }, { "epoch": 1.78, "grad_norm": 1.0700751543045044, "learning_rate": 7.519817849340265e-06, "loss": 0.0497, "step": 10626 }, { "epoch": 1.78, "grad_norm": 0.37510350346565247, "learning_rate": 7.5180685882840974e-06, "loss": 0.058, "step": 10627 }, { "epoch": 1.78, "grad_norm": 0.4849492013454437, "learning_rate": 7.516319408154448e-06, "loss": 0.0381, "step": 10628 }, { "epoch": 1.78, "grad_norm": 0.39852988719940186, "learning_rate": 7.514570309008357e-06, "loss": 0.0516, "step": 10629 }, { "epoch": 1.78, "grad_norm": 0.34803640842437744, "learning_rate": 7.512821290902852e-06, "loss": 0.0447, "step": 10630 }, { "epoch": 1.78, "grad_norm": 0.3809456527233124, "learning_rate": 7.511072353894958e-06, "loss": 0.0609, "step": 10631 }, { "epoch": 1.78, "grad_norm": 0.4187141954898834, "learning_rate": 7.509323498041708e-06, "loss": 0.0638, "step": 10632 }, { "epoch": 1.78, "grad_norm": 0.6506668329238892, "learning_rate": 7.507574723400124e-06, "loss": 0.041, "step": 10633 }, { "epoch": 1.78, "grad_norm": 0.3952021896839142, "learning_rate": 7.505826030027224e-06, "loss": 0.0556, "step": 10634 }, { "epoch": 1.78, "grad_norm": 0.4623918831348419, "learning_rate": 7.504077417980028e-06, "loss": 0.0483, "step": 10635 }, { "epoch": 1.78, "grad_norm": 0.40257877111434937, "learning_rate": 7.502328887315555e-06, "loss": 0.058, "step": 10636 }, { "epoch": 1.78, "grad_norm": 0.5523548126220703, "learning_rate": 7.500580438090812e-06, "loss": 0.0721, "step": 10637 }, { "epoch": 1.78, "grad_norm": 0.3526970148086548, "learning_rate": 7.498832070362816e-06, "loss": 0.0364, "step": 10638 }, { "epoch": 1.78, "grad_norm": 0.5039694905281067, "learning_rate": 7.497083784188567e-06, "loss": 0.0596, "step": 10639 }, { "epoch": 1.78, "grad_norm": 0.5200841426849365, "learning_rate": 7.495335579625076e-06, "loss": 0.0604, "step": 10640 }, { "epoch": 1.78, "grad_norm": 0.33436596393585205, "learning_rate": 7.493587456729348e-06, "loss": 0.0494, "step": 10641 }, { "epoch": 1.78, "grad_norm": 0.2868729531764984, "learning_rate": 7.491839415558373e-06, "loss": 0.0431, "step": 10642 }, { "epoch": 1.78, "grad_norm": 0.4036180377006531, "learning_rate": 7.4900914561691576e-06, "loss": 0.0593, "step": 10643 }, { "epoch": 1.78, "grad_norm": 0.3238099217414856, "learning_rate": 7.488343578618691e-06, "loss": 0.0497, "step": 10644 }, { "epoch": 1.78, "grad_norm": 0.5253553986549377, "learning_rate": 7.486595782963964e-06, "loss": 0.0632, "step": 10645 }, { "epoch": 1.78, "grad_norm": 0.40018218755722046, "learning_rate": 7.484848069261972e-06, "loss": 0.0396, "step": 10646 }, { "epoch": 1.78, "grad_norm": 0.8593461513519287, "learning_rate": 7.483100437569695e-06, "loss": 0.0461, "step": 10647 }, { "epoch": 1.78, "grad_norm": 0.37540802359580994, "learning_rate": 7.481352887944118e-06, "loss": 0.0484, "step": 10648 }, { "epoch": 1.78, "grad_norm": 0.2586019039154053, "learning_rate": 7.479605420442227e-06, "loss": 0.0307, "step": 10649 }, { "epoch": 1.78, "grad_norm": 0.33832135796546936, "learning_rate": 7.477858035120995e-06, "loss": 0.0386, "step": 10650 }, { "epoch": 1.78, "grad_norm": 0.433945894241333, "learning_rate": 7.476110732037397e-06, "loss": 0.0413, "step": 10651 }, { "epoch": 1.78, "grad_norm": 0.4537029266357422, "learning_rate": 7.4743635112484105e-06, "loss": 0.0427, "step": 10652 }, { "epoch": 1.78, "grad_norm": 0.5912003517150879, "learning_rate": 7.472616372811001e-06, "loss": 0.0391, "step": 10653 }, { "epoch": 1.78, "grad_norm": 0.47967252135276794, "learning_rate": 7.4708693167821434e-06, "loss": 0.0543, "step": 10654 }, { "epoch": 1.78, "grad_norm": 0.9841614365577698, "learning_rate": 7.469122343218795e-06, "loss": 0.0763, "step": 10655 }, { "epoch": 1.78, "grad_norm": 0.3427942097187042, "learning_rate": 7.46737545217792e-06, "loss": 0.0398, "step": 10656 }, { "epoch": 1.78, "grad_norm": 0.33045536279678345, "learning_rate": 7.465628643716485e-06, "loss": 0.0404, "step": 10657 }, { "epoch": 1.78, "grad_norm": 0.34554222226142883, "learning_rate": 7.463881917891439e-06, "loss": 0.0299, "step": 10658 }, { "epoch": 1.78, "grad_norm": 0.4358275532722473, "learning_rate": 7.462135274759734e-06, "loss": 0.0378, "step": 10659 }, { "epoch": 1.78, "grad_norm": 0.4122191369533539, "learning_rate": 7.460388714378331e-06, "loss": 0.0415, "step": 10660 }, { "epoch": 1.78, "grad_norm": 0.8998454213142395, "learning_rate": 7.458642236804173e-06, "loss": 0.0517, "step": 10661 }, { "epoch": 1.78, "grad_norm": 0.5563358068466187, "learning_rate": 7.4568958420942024e-06, "loss": 0.0618, "step": 10662 }, { "epoch": 1.78, "grad_norm": 0.536426305770874, "learning_rate": 7.45514953030537e-06, "loss": 0.0518, "step": 10663 }, { "epoch": 1.78, "grad_norm": 0.43603718280792236, "learning_rate": 7.453403301494616e-06, "loss": 0.0558, "step": 10664 }, { "epoch": 1.78, "grad_norm": 0.32890403270721436, "learning_rate": 7.4516571557188725e-06, "loss": 0.051, "step": 10665 }, { "epoch": 1.78, "grad_norm": 0.32524988055229187, "learning_rate": 7.449911093035081e-06, "loss": 0.0342, "step": 10666 }, { "epoch": 1.78, "grad_norm": 0.38737088441848755, "learning_rate": 7.448165113500168e-06, "loss": 0.0461, "step": 10667 }, { "epoch": 1.78, "grad_norm": 0.28022250533103943, "learning_rate": 7.446419217171067e-06, "loss": 0.0331, "step": 10668 }, { "epoch": 1.78, "grad_norm": 0.4227600395679474, "learning_rate": 7.444673404104707e-06, "loss": 0.0527, "step": 10669 }, { "epoch": 1.78, "grad_norm": 0.33437830209732056, "learning_rate": 7.442927674358009e-06, "loss": 0.0476, "step": 10670 }, { "epoch": 1.78, "grad_norm": 0.8342479467391968, "learning_rate": 7.441182027987899e-06, "loss": 0.079, "step": 10671 }, { "epoch": 1.78, "grad_norm": 0.3891429901123047, "learning_rate": 7.4394364650512894e-06, "loss": 0.0423, "step": 10672 }, { "epoch": 1.79, "grad_norm": 0.5205122232437134, "learning_rate": 7.4376909856051e-06, "loss": 0.0605, "step": 10673 }, { "epoch": 1.79, "grad_norm": 0.49822744727134705, "learning_rate": 7.435945589706247e-06, "loss": 0.0451, "step": 10674 }, { "epoch": 1.79, "grad_norm": 0.48092004656791687, "learning_rate": 7.4342002774116385e-06, "loss": 0.0465, "step": 10675 }, { "epoch": 1.79, "grad_norm": 0.4927607476711273, "learning_rate": 7.43245504877818e-06, "loss": 0.0445, "step": 10676 }, { "epoch": 1.79, "grad_norm": 0.5204039216041565, "learning_rate": 7.4307099038627826e-06, "loss": 0.0428, "step": 10677 }, { "epoch": 1.79, "grad_norm": 0.3314729928970337, "learning_rate": 7.4289648427223485e-06, "loss": 0.05, "step": 10678 }, { "epoch": 1.79, "grad_norm": 0.5134850144386292, "learning_rate": 7.427219865413768e-06, "loss": 0.0377, "step": 10679 }, { "epoch": 1.79, "grad_norm": 0.44283854961395264, "learning_rate": 7.425474971993951e-06, "loss": 0.0525, "step": 10680 }, { "epoch": 1.79, "grad_norm": 0.3899584710597992, "learning_rate": 7.423730162519782e-06, "loss": 0.0348, "step": 10681 }, { "epoch": 1.79, "grad_norm": 0.47034311294555664, "learning_rate": 7.421985437048162e-06, "loss": 0.0395, "step": 10682 }, { "epoch": 1.79, "grad_norm": 0.42887091636657715, "learning_rate": 7.420240795635975e-06, "loss": 0.0569, "step": 10683 }, { "epoch": 1.79, "grad_norm": 0.433137983083725, "learning_rate": 7.4184962383401035e-06, "loss": 0.0397, "step": 10684 }, { "epoch": 1.79, "grad_norm": 0.4626242220401764, "learning_rate": 7.41675176521744e-06, "loss": 0.0298, "step": 10685 }, { "epoch": 1.79, "grad_norm": 0.3477247357368469, "learning_rate": 7.41500737632486e-06, "loss": 0.037, "step": 10686 }, { "epoch": 1.79, "grad_norm": 0.4144537150859833, "learning_rate": 7.413263071719237e-06, "loss": 0.0486, "step": 10687 }, { "epoch": 1.79, "grad_norm": 0.3454781770706177, "learning_rate": 7.411518851457453e-06, "loss": 0.0385, "step": 10688 }, { "epoch": 1.79, "grad_norm": 0.48031142354011536, "learning_rate": 7.409774715596383e-06, "loss": 0.0359, "step": 10689 }, { "epoch": 1.79, "grad_norm": 0.4651027321815491, "learning_rate": 7.408030664192886e-06, "loss": 0.0492, "step": 10690 }, { "epoch": 1.79, "grad_norm": 0.30178672075271606, "learning_rate": 7.4062866973038376e-06, "loss": 0.0371, "step": 10691 }, { "epoch": 1.79, "grad_norm": 0.4597844183444977, "learning_rate": 7.404542814986102e-06, "loss": 0.0607, "step": 10692 }, { "epoch": 1.79, "grad_norm": 0.4861504137516022, "learning_rate": 7.402799017296533e-06, "loss": 0.0612, "step": 10693 }, { "epoch": 1.79, "grad_norm": 0.4014177620410919, "learning_rate": 7.401055304292001e-06, "loss": 0.0387, "step": 10694 }, { "epoch": 1.79, "grad_norm": 0.5893719792366028, "learning_rate": 7.399311676029351e-06, "loss": 0.0468, "step": 10695 }, { "epoch": 1.79, "grad_norm": 0.4437864124774933, "learning_rate": 7.397568132565442e-06, "loss": 0.0404, "step": 10696 }, { "epoch": 1.79, "grad_norm": 0.37658771872520447, "learning_rate": 7.395824673957127e-06, "loss": 0.0624, "step": 10697 }, { "epoch": 1.79, "grad_norm": 0.3141897916793823, "learning_rate": 7.394081300261244e-06, "loss": 0.0509, "step": 10698 }, { "epoch": 1.79, "grad_norm": 0.4231025278568268, "learning_rate": 7.39233801153465e-06, "loss": 0.0456, "step": 10699 }, { "epoch": 1.79, "grad_norm": 0.37659984827041626, "learning_rate": 7.3905948078341796e-06, "loss": 0.0431, "step": 10700 }, { "epoch": 1.79, "grad_norm": 0.5970999002456665, "learning_rate": 7.388851689216669e-06, "loss": 0.047, "step": 10701 }, { "epoch": 1.79, "grad_norm": 0.39369791746139526, "learning_rate": 7.387108655738966e-06, "loss": 0.036, "step": 10702 }, { "epoch": 1.79, "grad_norm": 0.48637816309928894, "learning_rate": 7.385365707457894e-06, "loss": 0.0346, "step": 10703 }, { "epoch": 1.79, "grad_norm": 0.3432106077671051, "learning_rate": 7.383622844430287e-06, "loss": 0.0343, "step": 10704 }, { "epoch": 1.79, "grad_norm": 0.45714712142944336, "learning_rate": 7.381880066712979e-06, "loss": 0.0484, "step": 10705 }, { "epoch": 1.79, "grad_norm": 0.42533963918685913, "learning_rate": 7.38013737436279e-06, "loss": 0.043, "step": 10706 }, { "epoch": 1.79, "grad_norm": 0.4699780344963074, "learning_rate": 7.3783947674365385e-06, "loss": 0.0456, "step": 10707 }, { "epoch": 1.79, "grad_norm": 0.3779003322124481, "learning_rate": 7.376652245991052e-06, "loss": 0.0475, "step": 10708 }, { "epoch": 1.79, "grad_norm": 0.4240582287311554, "learning_rate": 7.374909810083144e-06, "loss": 0.0404, "step": 10709 }, { "epoch": 1.79, "grad_norm": 0.3898545801639557, "learning_rate": 7.3731674597696326e-06, "loss": 0.0384, "step": 10710 }, { "epoch": 1.79, "grad_norm": 0.38613656163215637, "learning_rate": 7.3714251951073244e-06, "loss": 0.0521, "step": 10711 }, { "epoch": 1.79, "grad_norm": 0.43939995765686035, "learning_rate": 7.369683016153028e-06, "loss": 0.0557, "step": 10712 }, { "epoch": 1.79, "grad_norm": 0.4275764226913452, "learning_rate": 7.367940922963555e-06, "loss": 0.0469, "step": 10713 }, { "epoch": 1.79, "grad_norm": 0.3808768391609192, "learning_rate": 7.366198915595707e-06, "loss": 0.0588, "step": 10714 }, { "epoch": 1.79, "grad_norm": 0.3642866015434265, "learning_rate": 7.364456994106277e-06, "loss": 0.0426, "step": 10715 }, { "epoch": 1.79, "grad_norm": 0.3644343614578247, "learning_rate": 7.362715158552071e-06, "loss": 0.0389, "step": 10716 }, { "epoch": 1.79, "grad_norm": 0.514114260673523, "learning_rate": 7.360973408989882e-06, "loss": 0.0527, "step": 10717 }, { "epoch": 1.79, "grad_norm": 0.3374086022377014, "learning_rate": 7.359231745476498e-06, "loss": 0.0474, "step": 10718 }, { "epoch": 1.79, "grad_norm": 0.4876461625099182, "learning_rate": 7.357490168068714e-06, "loss": 0.0414, "step": 10719 }, { "epoch": 1.79, "grad_norm": 0.41478532552719116, "learning_rate": 7.3557486768233115e-06, "loss": 0.0437, "step": 10720 }, { "epoch": 1.79, "grad_norm": 0.463245689868927, "learning_rate": 7.3540072717970724e-06, "loss": 0.0406, "step": 10721 }, { "epoch": 1.79, "grad_norm": 0.43918946385383606, "learning_rate": 7.352265953046786e-06, "loss": 0.0533, "step": 10722 }, { "epoch": 1.79, "grad_norm": 0.5232739448547363, "learning_rate": 7.350524720629222e-06, "loss": 0.0605, "step": 10723 }, { "epoch": 1.79, "grad_norm": 0.5059155225753784, "learning_rate": 7.348783574601159e-06, "loss": 0.0501, "step": 10724 }, { "epoch": 1.79, "grad_norm": 0.32014700770378113, "learning_rate": 7.347042515019371e-06, "loss": 0.0386, "step": 10725 }, { "epoch": 1.79, "grad_norm": 0.6551592946052551, "learning_rate": 7.345301541940622e-06, "loss": 0.0509, "step": 10726 }, { "epoch": 1.79, "grad_norm": 0.5011731386184692, "learning_rate": 7.343560655421686e-06, "loss": 0.0491, "step": 10727 }, { "epoch": 1.79, "grad_norm": 0.5099555253982544, "learning_rate": 7.34181985551932e-06, "loss": 0.037, "step": 10728 }, { "epoch": 1.79, "grad_norm": 0.42106765508651733, "learning_rate": 7.340079142290285e-06, "loss": 0.0588, "step": 10729 }, { "epoch": 1.79, "grad_norm": 0.32344377040863037, "learning_rate": 7.338338515791347e-06, "loss": 0.0475, "step": 10730 }, { "epoch": 1.79, "grad_norm": 0.40803295373916626, "learning_rate": 7.336597976079254e-06, "loss": 0.0423, "step": 10731 }, { "epoch": 1.79, "grad_norm": 0.5606310963630676, "learning_rate": 7.334857523210758e-06, "loss": 0.0554, "step": 10732 }, { "epoch": 1.8, "grad_norm": 0.43771228194236755, "learning_rate": 7.333117157242615e-06, "loss": 0.0437, "step": 10733 }, { "epoch": 1.8, "grad_norm": 0.5417669415473938, "learning_rate": 7.3313768782315695e-06, "loss": 0.045, "step": 10734 }, { "epoch": 1.8, "grad_norm": 0.552352249622345, "learning_rate": 7.32963668623436e-06, "loss": 0.055, "step": 10735 }, { "epoch": 1.8, "grad_norm": 0.28881317377090454, "learning_rate": 7.327896581307733e-06, "loss": 0.0432, "step": 10736 }, { "epoch": 1.8, "grad_norm": 0.40840670466423035, "learning_rate": 7.326156563508424e-06, "loss": 0.0462, "step": 10737 }, { "epoch": 1.8, "grad_norm": 0.3342995047569275, "learning_rate": 7.324416632893174e-06, "loss": 0.0429, "step": 10738 }, { "epoch": 1.8, "grad_norm": 0.27244141697883606, "learning_rate": 7.32267678951871e-06, "loss": 0.0428, "step": 10739 }, { "epoch": 1.8, "grad_norm": 0.3338956832885742, "learning_rate": 7.320937033441762e-06, "loss": 0.0339, "step": 10740 }, { "epoch": 1.8, "grad_norm": 0.4408717453479767, "learning_rate": 7.319197364719062e-06, "loss": 0.0388, "step": 10741 }, { "epoch": 1.8, "grad_norm": 0.34460270404815674, "learning_rate": 7.317457783407331e-06, "loss": 0.0508, "step": 10742 }, { "epoch": 1.8, "grad_norm": 0.40509533882141113, "learning_rate": 7.315718289563285e-06, "loss": 0.0468, "step": 10743 }, { "epoch": 1.8, "grad_norm": 0.31771937012672424, "learning_rate": 7.313978883243651e-06, "loss": 0.0468, "step": 10744 }, { "epoch": 1.8, "grad_norm": 0.4821573793888092, "learning_rate": 7.312239564505142e-06, "loss": 0.0464, "step": 10745 }, { "epoch": 1.8, "grad_norm": 0.42297422885894775, "learning_rate": 7.310500333404464e-06, "loss": 0.0551, "step": 10746 }, { "epoch": 1.8, "grad_norm": 0.6929263472557068, "learning_rate": 7.308761189998339e-06, "loss": 0.0465, "step": 10747 }, { "epoch": 1.8, "grad_norm": 0.43925759196281433, "learning_rate": 7.307022134343465e-06, "loss": 0.051, "step": 10748 }, { "epoch": 1.8, "grad_norm": 0.40902185440063477, "learning_rate": 7.305283166496544e-06, "loss": 0.0455, "step": 10749 }, { "epoch": 1.8, "grad_norm": 0.49221619963645935, "learning_rate": 7.303544286514288e-06, "loss": 0.0518, "step": 10750 }, { "epoch": 1.8, "grad_norm": 0.4516788721084595, "learning_rate": 7.3018054944533845e-06, "loss": 0.0736, "step": 10751 }, { "epoch": 1.8, "grad_norm": 0.757279098033905, "learning_rate": 7.300066790370534e-06, "loss": 0.0434, "step": 10752 }, { "epoch": 1.8, "grad_norm": 0.2373601496219635, "learning_rate": 7.298328174322433e-06, "loss": 0.0395, "step": 10753 }, { "epoch": 1.8, "grad_norm": 0.45327621698379517, "learning_rate": 7.296589646365764e-06, "loss": 0.049, "step": 10754 }, { "epoch": 1.8, "grad_norm": 0.4044051766395569, "learning_rate": 7.2948512065572186e-06, "loss": 0.0529, "step": 10755 }, { "epoch": 1.8, "grad_norm": 0.43929389119148254, "learning_rate": 7.2931128549534795e-06, "loss": 0.0565, "step": 10756 }, { "epoch": 1.8, "grad_norm": 0.39439815282821655, "learning_rate": 7.291374591611222e-06, "loss": 0.0571, "step": 10757 }, { "epoch": 1.8, "grad_norm": 0.5344461798667908, "learning_rate": 7.2896364165871366e-06, "loss": 0.0713, "step": 10758 }, { "epoch": 1.8, "grad_norm": 0.4905840754508972, "learning_rate": 7.28789832993789e-06, "loss": 0.0522, "step": 10759 }, { "epoch": 1.8, "grad_norm": 0.493908554315567, "learning_rate": 7.286160331720153e-06, "loss": 0.0409, "step": 10760 }, { "epoch": 1.8, "grad_norm": 0.520934522151947, "learning_rate": 7.284422421990604e-06, "loss": 0.0415, "step": 10761 }, { "epoch": 1.8, "grad_norm": 0.32410627603530884, "learning_rate": 7.282684600805907e-06, "loss": 0.047, "step": 10762 }, { "epoch": 1.8, "grad_norm": 0.579044759273529, "learning_rate": 7.2809468682227165e-06, "loss": 0.0682, "step": 10763 }, { "epoch": 1.8, "grad_norm": 0.317892849445343, "learning_rate": 7.279209224297704e-06, "loss": 0.0505, "step": 10764 }, { "epoch": 1.8, "grad_norm": 0.6085790991783142, "learning_rate": 7.277471669087521e-06, "loss": 0.0754, "step": 10765 }, { "epoch": 1.8, "grad_norm": 0.43425649404525757, "learning_rate": 7.275734202648832e-06, "loss": 0.0435, "step": 10766 }, { "epoch": 1.8, "grad_norm": 0.4113733768463135, "learning_rate": 7.27399682503828e-06, "loss": 0.0438, "step": 10767 }, { "epoch": 1.8, "grad_norm": 0.41704103350639343, "learning_rate": 7.272259536312515e-06, "loss": 0.0484, "step": 10768 }, { "epoch": 1.8, "grad_norm": 0.35527893900871277, "learning_rate": 7.270522336528191e-06, "loss": 0.0385, "step": 10769 }, { "epoch": 1.8, "grad_norm": 0.33152422308921814, "learning_rate": 7.268785225741946e-06, "loss": 0.0329, "step": 10770 }, { "epoch": 1.8, "grad_norm": 0.4002625346183777, "learning_rate": 7.267048204010418e-06, "loss": 0.0464, "step": 10771 }, { "epoch": 1.8, "grad_norm": 0.3611619174480438, "learning_rate": 7.26531127139025e-06, "loss": 0.0333, "step": 10772 }, { "epoch": 1.8, "grad_norm": 0.34143269062042236, "learning_rate": 7.263574427938076e-06, "loss": 0.0413, "step": 10773 }, { "epoch": 1.8, "grad_norm": 0.4474986791610718, "learning_rate": 7.261837673710524e-06, "loss": 0.0504, "step": 10774 }, { "epoch": 1.8, "grad_norm": 0.47045814990997314, "learning_rate": 7.260101008764229e-06, "loss": 0.0404, "step": 10775 }, { "epoch": 1.8, "grad_norm": 0.40001988410949707, "learning_rate": 7.258364433155812e-06, "loss": 0.0378, "step": 10776 }, { "epoch": 1.8, "grad_norm": 0.34043261408805847, "learning_rate": 7.2566279469418986e-06, "loss": 0.0597, "step": 10777 }, { "epoch": 1.8, "grad_norm": 0.6152670383453369, "learning_rate": 7.2548915501791126e-06, "loss": 0.0569, "step": 10778 }, { "epoch": 1.8, "grad_norm": 0.43311962485313416, "learning_rate": 7.253155242924063e-06, "loss": 0.051, "step": 10779 }, { "epoch": 1.8, "grad_norm": 0.39089420437812805, "learning_rate": 7.2514190252333716e-06, "loss": 0.045, "step": 10780 }, { "epoch": 1.8, "grad_norm": 0.49795442819595337, "learning_rate": 7.2496828971636505e-06, "loss": 0.0445, "step": 10781 }, { "epoch": 1.8, "grad_norm": 0.5720736384391785, "learning_rate": 7.2479468587715e-06, "loss": 0.0598, "step": 10782 }, { "epoch": 1.8, "grad_norm": 0.4805295169353485, "learning_rate": 7.246210910113535e-06, "loss": 0.0661, "step": 10783 }, { "epoch": 1.8, "grad_norm": 0.37725338339805603, "learning_rate": 7.244475051246354e-06, "loss": 0.0561, "step": 10784 }, { "epoch": 1.8, "grad_norm": 0.37252840399742126, "learning_rate": 7.242739282226554e-06, "loss": 0.0412, "step": 10785 }, { "epoch": 1.8, "grad_norm": 0.3878538906574249, "learning_rate": 7.241003603110739e-06, "loss": 0.0413, "step": 10786 }, { "epoch": 1.8, "grad_norm": 0.40680307149887085, "learning_rate": 7.239268013955498e-06, "loss": 0.0438, "step": 10787 }, { "epoch": 1.8, "grad_norm": 0.47433215379714966, "learning_rate": 7.237532514817422e-06, "loss": 0.0516, "step": 10788 }, { "epoch": 1.8, "grad_norm": 0.400232195854187, "learning_rate": 7.235797105753104e-06, "loss": 0.04, "step": 10789 }, { "epoch": 1.8, "grad_norm": 0.39945268630981445, "learning_rate": 7.234061786819122e-06, "loss": 0.0449, "step": 10790 }, { "epoch": 1.8, "grad_norm": 0.4929516315460205, "learning_rate": 7.232326558072067e-06, "loss": 0.0546, "step": 10791 }, { "epoch": 1.8, "grad_norm": 0.35752224922180176, "learning_rate": 7.230591419568511e-06, "loss": 0.0542, "step": 10792 }, { "epoch": 1.81, "grad_norm": 0.5369531512260437, "learning_rate": 7.22885637136503e-06, "loss": 0.0432, "step": 10793 }, { "epoch": 1.81, "grad_norm": 0.3172285556793213, "learning_rate": 7.227121413518206e-06, "loss": 0.0395, "step": 10794 }, { "epoch": 1.81, "grad_norm": 0.5541665554046631, "learning_rate": 7.2253865460846e-06, "loss": 0.0622, "step": 10795 }, { "epoch": 1.81, "grad_norm": 0.3979601263999939, "learning_rate": 7.223651769120784e-06, "loss": 0.0447, "step": 10796 }, { "epoch": 1.81, "grad_norm": 0.7763032913208008, "learning_rate": 7.221917082683322e-06, "loss": 0.0445, "step": 10797 }, { "epoch": 1.81, "grad_norm": 0.3884010910987854, "learning_rate": 7.220182486828777e-06, "loss": 0.0402, "step": 10798 }, { "epoch": 1.81, "grad_norm": 0.41886183619499207, "learning_rate": 7.218447981613703e-06, "loss": 0.0441, "step": 10799 }, { "epoch": 1.81, "grad_norm": 0.37305521965026855, "learning_rate": 7.216713567094661e-06, "loss": 0.0535, "step": 10800 }, { "epoch": 1.81, "grad_norm": 0.4493541121482849, "learning_rate": 7.214979243328204e-06, "loss": 0.0648, "step": 10801 }, { "epoch": 1.81, "grad_norm": 0.576288640499115, "learning_rate": 7.213245010370875e-06, "loss": 0.0547, "step": 10802 }, { "epoch": 1.81, "grad_norm": 0.3878347873687744, "learning_rate": 7.21151086827923e-06, "loss": 0.0508, "step": 10803 }, { "epoch": 1.81, "grad_norm": 0.5001779198646545, "learning_rate": 7.2097768171098035e-06, "loss": 0.0496, "step": 10804 }, { "epoch": 1.81, "grad_norm": 0.3918384909629822, "learning_rate": 7.208042856919144e-06, "loss": 0.0616, "step": 10805 }, { "epoch": 1.81, "grad_norm": 0.3809737265110016, "learning_rate": 7.206308987763788e-06, "loss": 0.0404, "step": 10806 }, { "epoch": 1.81, "grad_norm": 0.35096269845962524, "learning_rate": 7.2045752097002665e-06, "loss": 0.0305, "step": 10807 }, { "epoch": 1.81, "grad_norm": 0.5240968465805054, "learning_rate": 7.202841522785115e-06, "loss": 0.0381, "step": 10808 }, { "epoch": 1.81, "grad_norm": 0.37568411231040955, "learning_rate": 7.201107927074866e-06, "loss": 0.0618, "step": 10809 }, { "epoch": 1.81, "grad_norm": 0.33044278621673584, "learning_rate": 7.199374422626035e-06, "loss": 0.0462, "step": 10810 }, { "epoch": 1.81, "grad_norm": 0.6547287702560425, "learning_rate": 7.197641009495158e-06, "loss": 0.0539, "step": 10811 }, { "epoch": 1.81, "grad_norm": 0.6041680574417114, "learning_rate": 7.195907687738745e-06, "loss": 0.0464, "step": 10812 }, { "epoch": 1.81, "grad_norm": 0.5256621241569519, "learning_rate": 7.194174457413315e-06, "loss": 0.0536, "step": 10813 }, { "epoch": 1.81, "grad_norm": 0.30905258655548096, "learning_rate": 7.19244131857539e-06, "loss": 0.0433, "step": 10814 }, { "epoch": 1.81, "grad_norm": 0.5107397437095642, "learning_rate": 7.19070827128147e-06, "loss": 0.0528, "step": 10815 }, { "epoch": 1.81, "grad_norm": 0.3439366817474365, "learning_rate": 7.188975315588069e-06, "loss": 0.0371, "step": 10816 }, { "epoch": 1.81, "grad_norm": 0.49279722571372986, "learning_rate": 7.187242451551695e-06, "loss": 0.0492, "step": 10817 }, { "epoch": 1.81, "grad_norm": 0.3205074071884155, "learning_rate": 7.185509679228842e-06, "loss": 0.0425, "step": 10818 }, { "epoch": 1.81, "grad_norm": 0.580074667930603, "learning_rate": 7.18377699867602e-06, "loss": 0.0436, "step": 10819 }, { "epoch": 1.81, "grad_norm": 0.4918557405471802, "learning_rate": 7.182044409949716e-06, "loss": 0.05, "step": 10820 }, { "epoch": 1.81, "grad_norm": 0.38183093070983887, "learning_rate": 7.180311913106424e-06, "loss": 0.0369, "step": 10821 }, { "epoch": 1.81, "grad_norm": 0.4270952641963959, "learning_rate": 7.1785795082026424e-06, "loss": 0.0501, "step": 10822 }, { "epoch": 1.81, "grad_norm": 0.40279293060302734, "learning_rate": 7.1768471952948495e-06, "loss": 0.0616, "step": 10823 }, { "epoch": 1.81, "grad_norm": 0.49517956376075745, "learning_rate": 7.175114974439534e-06, "loss": 0.0672, "step": 10824 }, { "epoch": 1.81, "grad_norm": 0.4916965365409851, "learning_rate": 7.173382845693176e-06, "loss": 0.0774, "step": 10825 }, { "epoch": 1.81, "grad_norm": 0.36621078848838806, "learning_rate": 7.1716508091122565e-06, "loss": 0.0399, "step": 10826 }, { "epoch": 1.81, "grad_norm": 0.3920920789241791, "learning_rate": 7.169918864753242e-06, "loss": 0.0365, "step": 10827 }, { "epoch": 1.81, "grad_norm": 0.43965259194374084, "learning_rate": 7.168187012672616e-06, "loss": 0.0447, "step": 10828 }, { "epoch": 1.81, "grad_norm": 0.36959099769592285, "learning_rate": 7.166455252926843e-06, "loss": 0.0401, "step": 10829 }, { "epoch": 1.81, "grad_norm": 0.4265506863594055, "learning_rate": 7.164723585572384e-06, "loss": 0.0393, "step": 10830 }, { "epoch": 1.81, "grad_norm": 0.37497255206108093, "learning_rate": 7.162992010665712e-06, "loss": 0.0388, "step": 10831 }, { "epoch": 1.81, "grad_norm": 0.3268148601055145, "learning_rate": 7.1612605282632785e-06, "loss": 0.0606, "step": 10832 }, { "epoch": 1.81, "grad_norm": 0.4043002426624298, "learning_rate": 7.159529138421546e-06, "loss": 0.041, "step": 10833 }, { "epoch": 1.81, "grad_norm": 0.329598993062973, "learning_rate": 7.1577978411969695e-06, "loss": 0.0449, "step": 10834 }, { "epoch": 1.81, "grad_norm": 0.5774082541465759, "learning_rate": 7.156066636645993e-06, "loss": 0.0511, "step": 10835 }, { "epoch": 1.81, "grad_norm": 0.40573766827583313, "learning_rate": 7.154335524825073e-06, "loss": 0.0481, "step": 10836 }, { "epoch": 1.81, "grad_norm": 0.37326493859291077, "learning_rate": 7.1526045057906515e-06, "loss": 0.0483, "step": 10837 }, { "epoch": 1.81, "grad_norm": 0.40832850337028503, "learning_rate": 7.150873579599166e-06, "loss": 0.0503, "step": 10838 }, { "epoch": 1.81, "grad_norm": 0.39979830384254456, "learning_rate": 7.1491427463070636e-06, "loss": 0.0273, "step": 10839 }, { "epoch": 1.81, "grad_norm": 0.40951719880104065, "learning_rate": 7.147412005970775e-06, "loss": 0.0607, "step": 10840 }, { "epoch": 1.81, "grad_norm": 0.42898356914520264, "learning_rate": 7.145681358646732e-06, "loss": 0.0563, "step": 10841 }, { "epoch": 1.81, "grad_norm": 0.5555890798568726, "learning_rate": 7.143950804391371e-06, "loss": 0.0541, "step": 10842 }, { "epoch": 1.81, "grad_norm": 0.5323765277862549, "learning_rate": 7.142220343261113e-06, "loss": 0.0519, "step": 10843 }, { "epoch": 1.81, "grad_norm": 0.6066566705703735, "learning_rate": 7.140489975312381e-06, "loss": 0.0531, "step": 10844 }, { "epoch": 1.81, "grad_norm": 0.45479822158813477, "learning_rate": 7.138759700601604e-06, "loss": 0.0365, "step": 10845 }, { "epoch": 1.81, "grad_norm": 0.4041678309440613, "learning_rate": 7.13702951918519e-06, "loss": 0.0406, "step": 10846 }, { "epoch": 1.81, "grad_norm": 0.8150380849838257, "learning_rate": 7.135299431119562e-06, "loss": 0.0688, "step": 10847 }, { "epoch": 1.81, "grad_norm": 0.48288655281066895, "learning_rate": 7.133569436461127e-06, "loss": 0.0537, "step": 10848 }, { "epoch": 1.81, "grad_norm": 0.2503868341445923, "learning_rate": 7.13183953526629e-06, "loss": 0.037, "step": 10849 }, { "epoch": 1.81, "grad_norm": 0.41179949045181274, "learning_rate": 7.130109727591468e-06, "loss": 0.0437, "step": 10850 }, { "epoch": 1.81, "grad_norm": 0.4157405495643616, "learning_rate": 7.128380013493055e-06, "loss": 0.0518, "step": 10851 }, { "epoch": 1.82, "grad_norm": 0.5162404775619507, "learning_rate": 7.126650393027449e-06, "loss": 0.0634, "step": 10852 }, { "epoch": 1.82, "grad_norm": 0.4965653419494629, "learning_rate": 7.1249208662510515e-06, "loss": 0.045, "step": 10853 }, { "epoch": 1.82, "grad_norm": 0.4352042078971863, "learning_rate": 7.123191433220257e-06, "loss": 0.0537, "step": 10854 }, { "epoch": 1.82, "grad_norm": 0.46401578187942505, "learning_rate": 7.121462093991448e-06, "loss": 0.0645, "step": 10855 }, { "epoch": 1.82, "grad_norm": 0.4568203091621399, "learning_rate": 7.1197328486210195e-06, "loss": 0.0473, "step": 10856 }, { "epoch": 1.82, "grad_norm": 0.38064849376678467, "learning_rate": 7.118003697165354e-06, "loss": 0.0517, "step": 10857 }, { "epoch": 1.82, "grad_norm": 0.31477460265159607, "learning_rate": 7.116274639680829e-06, "loss": 0.0421, "step": 10858 }, { "epoch": 1.82, "grad_norm": 0.35378366708755493, "learning_rate": 7.11454567622383e-06, "loss": 0.0329, "step": 10859 }, { "epoch": 1.82, "grad_norm": 0.3351137042045593, "learning_rate": 7.112816806850722e-06, "loss": 0.047, "step": 10860 }, { "epoch": 1.82, "grad_norm": 0.5550222396850586, "learning_rate": 7.111088031617885e-06, "loss": 0.0691, "step": 10861 }, { "epoch": 1.82, "grad_norm": 0.4408651292324066, "learning_rate": 7.109359350581687e-06, "loss": 0.0449, "step": 10862 }, { "epoch": 1.82, "grad_norm": 0.4079921245574951, "learning_rate": 7.107630763798487e-06, "loss": 0.0515, "step": 10863 }, { "epoch": 1.82, "grad_norm": 0.3874019384384155, "learning_rate": 7.105902271324657e-06, "loss": 0.0404, "step": 10864 }, { "epoch": 1.82, "grad_norm": 0.317371129989624, "learning_rate": 7.104173873216554e-06, "loss": 0.0312, "step": 10865 }, { "epoch": 1.82, "grad_norm": 0.419107049703598, "learning_rate": 7.102445569530529e-06, "loss": 0.0371, "step": 10866 }, { "epoch": 1.82, "grad_norm": 0.3919825553894043, "learning_rate": 7.100717360322944e-06, "loss": 0.0491, "step": 10867 }, { "epoch": 1.82, "grad_norm": 0.3033420741558075, "learning_rate": 7.098989245650143e-06, "loss": 0.0412, "step": 10868 }, { "epoch": 1.82, "grad_norm": 0.43370282649993896, "learning_rate": 7.097261225568473e-06, "loss": 0.0458, "step": 10869 }, { "epoch": 1.82, "grad_norm": 0.37103909254074097, "learning_rate": 7.095533300134285e-06, "loss": 0.0297, "step": 10870 }, { "epoch": 1.82, "grad_norm": 0.34195834398269653, "learning_rate": 7.093805469403915e-06, "loss": 0.0543, "step": 10871 }, { "epoch": 1.82, "grad_norm": 0.3319704234600067, "learning_rate": 7.0920777334336985e-06, "loss": 0.0424, "step": 10872 }, { "epoch": 1.82, "grad_norm": 0.38566139340400696, "learning_rate": 7.09035009227998e-06, "loss": 0.0419, "step": 10873 }, { "epoch": 1.82, "grad_norm": 0.4226670563220978, "learning_rate": 7.08862254599908e-06, "loss": 0.0602, "step": 10874 }, { "epoch": 1.82, "grad_norm": 0.4527400732040405, "learning_rate": 7.086895094647338e-06, "loss": 0.0536, "step": 10875 }, { "epoch": 1.82, "grad_norm": 0.3729041814804077, "learning_rate": 7.085167738281073e-06, "loss": 0.0463, "step": 10876 }, { "epoch": 1.82, "grad_norm": 0.6737346053123474, "learning_rate": 7.0834404769566065e-06, "loss": 0.068, "step": 10877 }, { "epoch": 1.82, "grad_norm": 0.46107736229896545, "learning_rate": 7.081713310730265e-06, "loss": 0.0567, "step": 10878 }, { "epoch": 1.82, "grad_norm": 0.44474560022354126, "learning_rate": 7.079986239658361e-06, "loss": 0.0644, "step": 10879 }, { "epoch": 1.82, "grad_norm": 0.3291827142238617, "learning_rate": 7.078259263797203e-06, "loss": 0.0496, "step": 10880 }, { "epoch": 1.82, "grad_norm": 0.36770185828208923, "learning_rate": 7.0765323832031085e-06, "loss": 0.0437, "step": 10881 }, { "epoch": 1.82, "grad_norm": 0.5374966859817505, "learning_rate": 7.074805597932384e-06, "loss": 0.0518, "step": 10882 }, { "epoch": 1.82, "grad_norm": 0.5969152450561523, "learning_rate": 7.073078908041328e-06, "loss": 0.046, "step": 10883 }, { "epoch": 1.82, "grad_norm": 0.42995303869247437, "learning_rate": 7.071352313586246e-06, "loss": 0.0423, "step": 10884 }, { "epoch": 1.82, "grad_norm": 0.380075603723526, "learning_rate": 7.069625814623437e-06, "loss": 0.0464, "step": 10885 }, { "epoch": 1.82, "grad_norm": 0.6175217032432556, "learning_rate": 7.067899411209189e-06, "loss": 0.057, "step": 10886 }, { "epoch": 1.82, "grad_norm": 0.4154508113861084, "learning_rate": 7.0661731033998035e-06, "loss": 0.049, "step": 10887 }, { "epoch": 1.82, "grad_norm": 0.3494015634059906, "learning_rate": 7.06444689125156e-06, "loss": 0.0552, "step": 10888 }, { "epoch": 1.82, "grad_norm": 0.3377006947994232, "learning_rate": 7.062720774820746e-06, "loss": 0.0337, "step": 10889 }, { "epoch": 1.82, "grad_norm": 0.3273703157901764, "learning_rate": 7.06099475416365e-06, "loss": 0.0332, "step": 10890 }, { "epoch": 1.82, "grad_norm": 0.39334821701049805, "learning_rate": 7.05926882933654e-06, "loss": 0.0451, "step": 10891 }, { "epoch": 1.82, "grad_norm": 0.31187424063682556, "learning_rate": 7.057543000395702e-06, "loss": 0.0287, "step": 10892 }, { "epoch": 1.82, "grad_norm": 0.3355986773967743, "learning_rate": 7.055817267397407e-06, "loss": 0.0474, "step": 10893 }, { "epoch": 1.82, "grad_norm": 0.41238468885421753, "learning_rate": 7.054091630397917e-06, "loss": 0.044, "step": 10894 }, { "epoch": 1.82, "grad_norm": 0.39273542165756226, "learning_rate": 7.05236608945351e-06, "loss": 0.0516, "step": 10895 }, { "epoch": 1.82, "grad_norm": 0.5642284750938416, "learning_rate": 7.05064064462044e-06, "loss": 0.0687, "step": 10896 }, { "epoch": 1.82, "grad_norm": 0.4614526033401489, "learning_rate": 7.04891529595497e-06, "loss": 0.0434, "step": 10897 }, { "epoch": 1.82, "grad_norm": 0.3955308198928833, "learning_rate": 7.047190043513361e-06, "loss": 0.049, "step": 10898 }, { "epoch": 1.82, "grad_norm": 0.3084244430065155, "learning_rate": 7.045464887351864e-06, "loss": 0.0503, "step": 10899 }, { "epoch": 1.82, "grad_norm": 0.5134535431861877, "learning_rate": 7.043739827526726e-06, "loss": 0.0614, "step": 10900 }, { "epoch": 1.82, "grad_norm": 0.4104922115802765, "learning_rate": 7.042014864094205e-06, "loss": 0.0475, "step": 10901 }, { "epoch": 1.82, "grad_norm": 0.4883228838443756, "learning_rate": 7.040289997110535e-06, "loss": 0.0557, "step": 10902 }, { "epoch": 1.82, "grad_norm": 0.4390174150466919, "learning_rate": 7.038565226631966e-06, "loss": 0.0751, "step": 10903 }, { "epoch": 1.82, "grad_norm": 0.5089315176010132, "learning_rate": 7.036840552714729e-06, "loss": 0.0393, "step": 10904 }, { "epoch": 1.82, "grad_norm": 0.2890416979789734, "learning_rate": 7.035115975415061e-06, "loss": 0.0393, "step": 10905 }, { "epoch": 1.82, "grad_norm": 0.3231913149356842, "learning_rate": 7.0333914947892e-06, "loss": 0.0426, "step": 10906 }, { "epoch": 1.82, "grad_norm": 0.5678675770759583, "learning_rate": 7.03166711089337e-06, "loss": 0.0694, "step": 10907 }, { "epoch": 1.82, "grad_norm": 0.5402624607086182, "learning_rate": 7.029942823783793e-06, "loss": 0.0709, "step": 10908 }, { "epoch": 1.82, "grad_norm": 0.2746868133544922, "learning_rate": 7.0282186335166995e-06, "loss": 0.0313, "step": 10909 }, { "epoch": 1.82, "grad_norm": 0.38929909467697144, "learning_rate": 7.026494540148305e-06, "loss": 0.0398, "step": 10910 }, { "epoch": 1.82, "grad_norm": 0.427072137594223, "learning_rate": 7.024770543734823e-06, "loss": 0.0414, "step": 10911 }, { "epoch": 1.83, "grad_norm": 0.37219083309173584, "learning_rate": 7.0230466443324695e-06, "loss": 0.0402, "step": 10912 }, { "epoch": 1.83, "grad_norm": 0.45281097292900085, "learning_rate": 7.021322841997458e-06, "loss": 0.0495, "step": 10913 }, { "epoch": 1.83, "grad_norm": 0.6318402886390686, "learning_rate": 7.019599136785988e-06, "loss": 0.0754, "step": 10914 }, { "epoch": 1.83, "grad_norm": 0.5193896889686584, "learning_rate": 7.01787552875427e-06, "loss": 0.0563, "step": 10915 }, { "epoch": 1.83, "grad_norm": 0.41928768157958984, "learning_rate": 7.016152017958497e-06, "loss": 0.0449, "step": 10916 }, { "epoch": 1.83, "grad_norm": 0.4542860686779022, "learning_rate": 7.014428604454873e-06, "loss": 0.0589, "step": 10917 }, { "epoch": 1.83, "grad_norm": 0.27400606870651245, "learning_rate": 7.012705288299591e-06, "loss": 0.037, "step": 10918 }, { "epoch": 1.83, "grad_norm": 0.5773294568061829, "learning_rate": 7.010982069548836e-06, "loss": 0.0678, "step": 10919 }, { "epoch": 1.83, "grad_norm": 0.38323578238487244, "learning_rate": 7.009258948258802e-06, "loss": 0.0518, "step": 10920 }, { "epoch": 1.83, "grad_norm": 0.4455193281173706, "learning_rate": 7.007535924485676e-06, "loss": 0.0526, "step": 10921 }, { "epoch": 1.83, "grad_norm": 0.32027164101600647, "learning_rate": 7.005812998285628e-06, "loss": 0.0433, "step": 10922 }, { "epoch": 1.83, "grad_norm": 0.5529314875602722, "learning_rate": 7.004090169714848e-06, "loss": 0.0634, "step": 10923 }, { "epoch": 1.83, "grad_norm": 0.5264379978179932, "learning_rate": 7.002367438829505e-06, "loss": 0.0439, "step": 10924 }, { "epoch": 1.83, "grad_norm": 0.36971646547317505, "learning_rate": 7.00064480568577e-06, "loss": 0.0495, "step": 10925 }, { "epoch": 1.83, "grad_norm": 0.44283708930015564, "learning_rate": 6.9989222703398175e-06, "loss": 0.0605, "step": 10926 }, { "epoch": 1.83, "grad_norm": 0.2656288743019104, "learning_rate": 6.997199832847807e-06, "loss": 0.0314, "step": 10927 }, { "epoch": 1.83, "grad_norm": 0.3715466260910034, "learning_rate": 6.995477493265903e-06, "loss": 0.0523, "step": 10928 }, { "epoch": 1.83, "grad_norm": 0.9451460838317871, "learning_rate": 6.993755251650265e-06, "loss": 0.0546, "step": 10929 }, { "epoch": 1.83, "grad_norm": 0.3812142014503479, "learning_rate": 6.9920331080570455e-06, "loss": 0.0456, "step": 10930 }, { "epoch": 1.83, "grad_norm": 0.4816693961620331, "learning_rate": 6.990311062542404e-06, "loss": 0.0526, "step": 10931 }, { "epoch": 1.83, "grad_norm": 0.34302884340286255, "learning_rate": 6.988589115162485e-06, "loss": 0.0441, "step": 10932 }, { "epoch": 1.83, "grad_norm": 0.4653284549713135, "learning_rate": 6.986867265973432e-06, "loss": 0.0528, "step": 10933 }, { "epoch": 1.83, "grad_norm": 0.40966618061065674, "learning_rate": 6.985145515031397e-06, "loss": 0.0504, "step": 10934 }, { "epoch": 1.83, "grad_norm": 0.4032178521156311, "learning_rate": 6.983423862392515e-06, "loss": 0.0596, "step": 10935 }, { "epoch": 1.83, "grad_norm": 0.369556725025177, "learning_rate": 6.981702308112919e-06, "loss": 0.0479, "step": 10936 }, { "epoch": 1.83, "grad_norm": 0.6632532477378845, "learning_rate": 6.979980852248747e-06, "loss": 0.0683, "step": 10937 }, { "epoch": 1.83, "grad_norm": 0.42189252376556396, "learning_rate": 6.97825949485613e-06, "loss": 0.0593, "step": 10938 }, { "epoch": 1.83, "grad_norm": 0.3932073414325714, "learning_rate": 6.9765382359911886e-06, "loss": 0.0548, "step": 10939 }, { "epoch": 1.83, "grad_norm": 0.415260910987854, "learning_rate": 6.974817075710054e-06, "loss": 0.0585, "step": 10940 }, { "epoch": 1.83, "grad_norm": 0.4397101402282715, "learning_rate": 6.9730960140688455e-06, "loss": 0.0415, "step": 10941 }, { "epoch": 1.83, "grad_norm": 0.5164549946784973, "learning_rate": 6.971375051123675e-06, "loss": 0.0591, "step": 10942 }, { "epoch": 1.83, "grad_norm": 0.5711939334869385, "learning_rate": 6.969654186930666e-06, "loss": 0.075, "step": 10943 }, { "epoch": 1.83, "grad_norm": 0.4006190598011017, "learning_rate": 6.9679334215459185e-06, "loss": 0.028, "step": 10944 }, { "epoch": 1.83, "grad_norm": 0.3691781163215637, "learning_rate": 6.9662127550255475e-06, "loss": 0.0631, "step": 10945 }, { "epoch": 1.83, "grad_norm": 0.3233773112297058, "learning_rate": 6.96449218742566e-06, "loss": 0.0434, "step": 10946 }, { "epoch": 1.83, "grad_norm": 0.6699310541152954, "learning_rate": 6.962771718802348e-06, "loss": 0.0404, "step": 10947 }, { "epoch": 1.83, "grad_norm": 0.3099687397480011, "learning_rate": 6.961051349211716e-06, "loss": 0.0426, "step": 10948 }, { "epoch": 1.83, "grad_norm": 0.38752099871635437, "learning_rate": 6.95933107870986e-06, "loss": 0.0482, "step": 10949 }, { "epoch": 1.83, "grad_norm": 0.4532293677330017, "learning_rate": 6.957610907352865e-06, "loss": 0.048, "step": 10950 }, { "epoch": 1.83, "grad_norm": 0.30040234327316284, "learning_rate": 6.955890835196828e-06, "loss": 0.0335, "step": 10951 }, { "epoch": 1.83, "grad_norm": 0.38185176253318787, "learning_rate": 6.954170862297828e-06, "loss": 0.0422, "step": 10952 }, { "epoch": 1.83, "grad_norm": 0.4464814364910126, "learning_rate": 6.952450988711945e-06, "loss": 0.0429, "step": 10953 }, { "epoch": 1.83, "grad_norm": 0.6600521802902222, "learning_rate": 6.950731214495268e-06, "loss": 0.0769, "step": 10954 }, { "epoch": 1.83, "grad_norm": 0.3753190040588379, "learning_rate": 6.94901153970386e-06, "loss": 0.049, "step": 10955 }, { "epoch": 1.83, "grad_norm": 0.3804576098918915, "learning_rate": 6.9472919643938005e-06, "loss": 0.0497, "step": 10956 }, { "epoch": 1.83, "grad_norm": 0.3629242181777954, "learning_rate": 6.945572488621157e-06, "loss": 0.0421, "step": 10957 }, { "epoch": 1.83, "grad_norm": 0.529037356376648, "learning_rate": 6.9438531124419935e-06, "loss": 0.0537, "step": 10958 }, { "epoch": 1.83, "grad_norm": 0.45308929681777954, "learning_rate": 6.9421338359123765e-06, "loss": 0.0548, "step": 10959 }, { "epoch": 1.83, "grad_norm": 0.4822899401187897, "learning_rate": 6.940414659088361e-06, "loss": 0.0473, "step": 10960 }, { "epoch": 1.83, "grad_norm": 0.5441577434539795, "learning_rate": 6.9386955820260035e-06, "loss": 0.0544, "step": 10961 }, { "epoch": 1.83, "grad_norm": 0.40121930837631226, "learning_rate": 6.93697660478136e-06, "loss": 0.0588, "step": 10962 }, { "epoch": 1.83, "grad_norm": 0.2567692995071411, "learning_rate": 6.935257727410481e-06, "loss": 0.0361, "step": 10963 }, { "epoch": 1.83, "grad_norm": 0.42176830768585205, "learning_rate": 6.933538949969401e-06, "loss": 0.051, "step": 10964 }, { "epoch": 1.83, "grad_norm": 0.38821905851364136, "learning_rate": 6.931820272514176e-06, "loss": 0.037, "step": 10965 }, { "epoch": 1.83, "grad_norm": 0.45537182688713074, "learning_rate": 6.930101695100842e-06, "loss": 0.0578, "step": 10966 }, { "epoch": 1.83, "grad_norm": 0.3836798369884491, "learning_rate": 6.928383217785429e-06, "loss": 0.0431, "step": 10967 }, { "epoch": 1.83, "grad_norm": 0.3623303174972534, "learning_rate": 6.926664840623979e-06, "loss": 0.0397, "step": 10968 }, { "epoch": 1.83, "grad_norm": 0.4152480661869049, "learning_rate": 6.924946563672519e-06, "loss": 0.0501, "step": 10969 }, { "epoch": 1.83, "grad_norm": 0.2823592722415924, "learning_rate": 6.923228386987072e-06, "loss": 0.0411, "step": 10970 }, { "epoch": 1.83, "grad_norm": 0.25051963329315186, "learning_rate": 6.921510310623668e-06, "loss": 0.0297, "step": 10971 }, { "epoch": 1.84, "grad_norm": 0.4915463924407959, "learning_rate": 6.919792334638319e-06, "loss": 0.0538, "step": 10972 }, { "epoch": 1.84, "grad_norm": 0.31393513083457947, "learning_rate": 6.9180744590870475e-06, "loss": 0.0336, "step": 10973 }, { "epoch": 1.84, "grad_norm": 0.5399960875511169, "learning_rate": 6.916356684025869e-06, "loss": 0.0631, "step": 10974 }, { "epoch": 1.84, "grad_norm": 0.4366251230239868, "learning_rate": 6.914639009510785e-06, "loss": 0.0285, "step": 10975 }, { "epoch": 1.84, "grad_norm": 0.3098936975002289, "learning_rate": 6.91292143559781e-06, "loss": 0.0401, "step": 10976 }, { "epoch": 1.84, "grad_norm": 0.3535127639770508, "learning_rate": 6.911203962342947e-06, "loss": 0.0445, "step": 10977 }, { "epoch": 1.84, "grad_norm": 0.419096976518631, "learning_rate": 6.909486589802193e-06, "loss": 0.0438, "step": 10978 }, { "epoch": 1.84, "grad_norm": 0.4100414216518402, "learning_rate": 6.907769318031548e-06, "loss": 0.0434, "step": 10979 }, { "epoch": 1.84, "grad_norm": 0.33494213223457336, "learning_rate": 6.906052147087004e-06, "loss": 0.0392, "step": 10980 }, { "epoch": 1.84, "grad_norm": 0.34675532579421997, "learning_rate": 6.90433507702455e-06, "loss": 0.0547, "step": 10981 }, { "epoch": 1.84, "grad_norm": 0.2895653247833252, "learning_rate": 6.9026181079001795e-06, "loss": 0.0372, "step": 10982 }, { "epoch": 1.84, "grad_norm": 0.5081000924110413, "learning_rate": 6.900901239769873e-06, "loss": 0.0522, "step": 10983 }, { "epoch": 1.84, "grad_norm": 0.3349155783653259, "learning_rate": 6.899184472689606e-06, "loss": 0.0379, "step": 10984 }, { "epoch": 1.84, "grad_norm": 0.4453447461128235, "learning_rate": 6.897467806715363e-06, "loss": 0.0399, "step": 10985 }, { "epoch": 1.84, "grad_norm": 0.2636212706565857, "learning_rate": 6.895751241903112e-06, "loss": 0.0465, "step": 10986 }, { "epoch": 1.84, "grad_norm": 0.3844616413116455, "learning_rate": 6.8940347783088334e-06, "loss": 0.0599, "step": 10987 }, { "epoch": 1.84, "grad_norm": 0.4617730975151062, "learning_rate": 6.892318415988486e-06, "loss": 0.0621, "step": 10988 }, { "epoch": 1.84, "grad_norm": 0.6945325136184692, "learning_rate": 6.890602154998034e-06, "loss": 0.0525, "step": 10989 }, { "epoch": 1.84, "grad_norm": 0.4139724373817444, "learning_rate": 6.888885995393446e-06, "loss": 0.0478, "step": 10990 }, { "epoch": 1.84, "grad_norm": 0.32944735884666443, "learning_rate": 6.887169937230673e-06, "loss": 0.0454, "step": 10991 }, { "epoch": 1.84, "grad_norm": 0.4438253939151764, "learning_rate": 6.885453980565668e-06, "loss": 0.0421, "step": 10992 }, { "epoch": 1.84, "grad_norm": 0.4831867218017578, "learning_rate": 6.8837381254543866e-06, "loss": 0.0757, "step": 10993 }, { "epoch": 1.84, "grad_norm": 0.3854965269565582, "learning_rate": 6.882022371952774e-06, "loss": 0.0547, "step": 10994 }, { "epoch": 1.84, "grad_norm": 0.302130788564682, "learning_rate": 6.880306720116774e-06, "loss": 0.0539, "step": 10995 }, { "epoch": 1.84, "grad_norm": 0.3431507647037506, "learning_rate": 6.878591170002329e-06, "loss": 0.0407, "step": 10996 }, { "epoch": 1.84, "grad_norm": 0.35907822847366333, "learning_rate": 6.87687572166538e-06, "loss": 0.0446, "step": 10997 }, { "epoch": 1.84, "grad_norm": 0.4574667811393738, "learning_rate": 6.875160375161851e-06, "loss": 0.0596, "step": 10998 }, { "epoch": 1.84, "grad_norm": 0.44509026408195496, "learning_rate": 6.873445130547685e-06, "loss": 0.0549, "step": 10999 }, { "epoch": 1.84, "grad_norm": 0.46487271785736084, "learning_rate": 6.8717299878787995e-06, "loss": 0.0414, "step": 11000 }, { "epoch": 1.84, "grad_norm": 0.37030908465385437, "learning_rate": 6.870014947211127e-06, "loss": 0.0363, "step": 11001 }, { "epoch": 1.84, "grad_norm": 0.5453066825866699, "learning_rate": 6.868300008600586e-06, "loss": 0.0456, "step": 11002 }, { "epoch": 1.84, "grad_norm": 0.38840267062187195, "learning_rate": 6.866585172103091e-06, "loss": 0.0296, "step": 11003 }, { "epoch": 1.84, "grad_norm": 0.3300120532512665, "learning_rate": 6.86487043777456e-06, "loss": 0.0477, "step": 11004 }, { "epoch": 1.84, "grad_norm": 0.3827008306980133, "learning_rate": 6.863155805670905e-06, "loss": 0.0598, "step": 11005 }, { "epoch": 1.84, "grad_norm": 0.5262092351913452, "learning_rate": 6.861441275848029e-06, "loss": 0.0526, "step": 11006 }, { "epoch": 1.84, "grad_norm": 0.46347615122795105, "learning_rate": 6.859726848361843e-06, "loss": 0.0552, "step": 11007 }, { "epoch": 1.84, "grad_norm": 0.37397196888923645, "learning_rate": 6.85801252326824e-06, "loss": 0.0533, "step": 11008 }, { "epoch": 1.84, "grad_norm": 0.546152651309967, "learning_rate": 6.856298300623123e-06, "loss": 0.0476, "step": 11009 }, { "epoch": 1.84, "grad_norm": 0.4839188754558563, "learning_rate": 6.854584180482389e-06, "loss": 0.0471, "step": 11010 }, { "epoch": 1.84, "grad_norm": 0.4189911484718323, "learning_rate": 6.852870162901926e-06, "loss": 0.0578, "step": 11011 }, { "epoch": 1.84, "grad_norm": 0.4749317169189453, "learning_rate": 6.851156247937616e-06, "loss": 0.0368, "step": 11012 }, { "epoch": 1.84, "grad_norm": 0.4225411117076874, "learning_rate": 6.84944243564535e-06, "loss": 0.0599, "step": 11013 }, { "epoch": 1.84, "grad_norm": 0.44564497470855713, "learning_rate": 6.847728726081007e-06, "loss": 0.0556, "step": 11014 }, { "epoch": 1.84, "grad_norm": 0.6629483103752136, "learning_rate": 6.8460151193004685e-06, "loss": 0.0672, "step": 11015 }, { "epoch": 1.84, "grad_norm": 0.26991185545921326, "learning_rate": 6.844301615359606e-06, "loss": 0.0275, "step": 11016 }, { "epoch": 1.84, "grad_norm": 0.46719837188720703, "learning_rate": 6.842588214314285e-06, "loss": 0.0425, "step": 11017 }, { "epoch": 1.84, "grad_norm": 0.3329980671405792, "learning_rate": 6.840874916220384e-06, "loss": 0.0386, "step": 11018 }, { "epoch": 1.84, "grad_norm": 0.3275858163833618, "learning_rate": 6.83916172113376e-06, "loss": 0.0388, "step": 11019 }, { "epoch": 1.84, "grad_norm": 0.4679654538631439, "learning_rate": 6.837448629110273e-06, "loss": 0.0503, "step": 11020 }, { "epoch": 1.84, "grad_norm": 0.39116349816322327, "learning_rate": 6.835735640205785e-06, "loss": 0.0553, "step": 11021 }, { "epoch": 1.84, "grad_norm": 0.25432077050209045, "learning_rate": 6.83402275447615e-06, "loss": 0.0343, "step": 11022 }, { "epoch": 1.84, "grad_norm": 0.4181434214115143, "learning_rate": 6.832309971977211e-06, "loss": 0.0401, "step": 11023 }, { "epoch": 1.84, "grad_norm": 0.4366934895515442, "learning_rate": 6.830597292764825e-06, "loss": 0.0562, "step": 11024 }, { "epoch": 1.84, "grad_norm": 0.3088929057121277, "learning_rate": 6.828884716894834e-06, "loss": 0.0501, "step": 11025 }, { "epoch": 1.84, "grad_norm": 0.3531472980976105, "learning_rate": 6.8271722444230745e-06, "loss": 0.0467, "step": 11026 }, { "epoch": 1.84, "grad_norm": 0.3301917612552643, "learning_rate": 6.8254598754053895e-06, "loss": 0.0456, "step": 11027 }, { "epoch": 1.84, "grad_norm": 0.3162558376789093, "learning_rate": 6.8237476098976065e-06, "loss": 0.0381, "step": 11028 }, { "epoch": 1.84, "grad_norm": 0.416698694229126, "learning_rate": 6.82203544795556e-06, "loss": 0.0494, "step": 11029 }, { "epoch": 1.84, "grad_norm": 0.47394484281539917, "learning_rate": 6.82032338963508e-06, "loss": 0.0337, "step": 11030 }, { "epoch": 1.84, "grad_norm": 0.3390566110610962, "learning_rate": 6.818611434991983e-06, "loss": 0.0488, "step": 11031 }, { "epoch": 1.85, "grad_norm": 0.43066859245300293, "learning_rate": 6.816899584082095e-06, "loss": 0.0623, "step": 11032 }, { "epoch": 1.85, "grad_norm": 0.319864958524704, "learning_rate": 6.815187836961235e-06, "loss": 0.0345, "step": 11033 }, { "epoch": 1.85, "grad_norm": 0.23792679607868195, "learning_rate": 6.813476193685207e-06, "loss": 0.0216, "step": 11034 }, { "epoch": 1.85, "grad_norm": 0.40282294154167175, "learning_rate": 6.811764654309833e-06, "loss": 0.0454, "step": 11035 }, { "epoch": 1.85, "grad_norm": 0.5106588006019592, "learning_rate": 6.810053218890913e-06, "loss": 0.0767, "step": 11036 }, { "epoch": 1.85, "grad_norm": 0.3956432342529297, "learning_rate": 6.8083418874842484e-06, "loss": 0.0451, "step": 11037 }, { "epoch": 1.85, "grad_norm": 0.4016047418117523, "learning_rate": 6.806630660145648e-06, "loss": 0.0332, "step": 11038 }, { "epoch": 1.85, "grad_norm": 0.3827522099018097, "learning_rate": 6.804919536930904e-06, "loss": 0.0591, "step": 11039 }, { "epoch": 1.85, "grad_norm": 0.4538068175315857, "learning_rate": 6.803208517895805e-06, "loss": 0.049, "step": 11040 }, { "epoch": 1.85, "grad_norm": 0.3047269880771637, "learning_rate": 6.801497603096146e-06, "loss": 0.0405, "step": 11041 }, { "epoch": 1.85, "grad_norm": 0.4435633420944214, "learning_rate": 6.799786792587711e-06, "loss": 0.0456, "step": 11042 }, { "epoch": 1.85, "grad_norm": 0.4091266691684723, "learning_rate": 6.7980760864262906e-06, "loss": 0.0373, "step": 11043 }, { "epoch": 1.85, "grad_norm": 0.3612283766269684, "learning_rate": 6.7963654846676555e-06, "loss": 0.0536, "step": 11044 }, { "epoch": 1.85, "grad_norm": 0.5816491842269897, "learning_rate": 6.794654987367584e-06, "loss": 0.0614, "step": 11045 }, { "epoch": 1.85, "grad_norm": 0.38338664174079895, "learning_rate": 6.792944594581853e-06, "loss": 0.0498, "step": 11046 }, { "epoch": 1.85, "grad_norm": 0.38650718331336975, "learning_rate": 6.79123430636623e-06, "loss": 0.0424, "step": 11047 }, { "epoch": 1.85, "grad_norm": 0.619666576385498, "learning_rate": 6.789524122776476e-06, "loss": 0.0437, "step": 11048 }, { "epoch": 1.85, "grad_norm": 0.3289806842803955, "learning_rate": 6.787814043868362e-06, "loss": 0.0313, "step": 11049 }, { "epoch": 1.85, "grad_norm": 0.49734675884246826, "learning_rate": 6.786104069697645e-06, "loss": 0.0595, "step": 11050 }, { "epoch": 1.85, "grad_norm": 0.3937987983226776, "learning_rate": 6.784394200320075e-06, "loss": 0.0511, "step": 11051 }, { "epoch": 1.85, "grad_norm": 0.31809744238853455, "learning_rate": 6.78268443579141e-06, "loss": 0.04, "step": 11052 }, { "epoch": 1.85, "grad_norm": 0.41372770071029663, "learning_rate": 6.780974776167402e-06, "loss": 0.0503, "step": 11053 }, { "epoch": 1.85, "grad_norm": 0.43380722403526306, "learning_rate": 6.779265221503786e-06, "loss": 0.0381, "step": 11054 }, { "epoch": 1.85, "grad_norm": 0.4140997529029846, "learning_rate": 6.7775557718563155e-06, "loss": 0.0587, "step": 11055 }, { "epoch": 1.85, "grad_norm": 0.3466641902923584, "learning_rate": 6.7758464272807215e-06, "loss": 0.0307, "step": 11056 }, { "epoch": 1.85, "grad_norm": 0.35687702894210815, "learning_rate": 6.774137187832743e-06, "loss": 0.0433, "step": 11057 }, { "epoch": 1.85, "grad_norm": 0.4034714996814728, "learning_rate": 6.772428053568114e-06, "loss": 0.0424, "step": 11058 }, { "epoch": 1.85, "grad_norm": 0.4032115936279297, "learning_rate": 6.770719024542556e-06, "loss": 0.0494, "step": 11059 }, { "epoch": 1.85, "grad_norm": 0.33754321932792664, "learning_rate": 6.769010100811804e-06, "loss": 0.0624, "step": 11060 }, { "epoch": 1.85, "grad_norm": 0.6148936748504639, "learning_rate": 6.767301282431569e-06, "loss": 0.054, "step": 11061 }, { "epoch": 1.85, "grad_norm": 0.38576725125312805, "learning_rate": 6.765592569457573e-06, "loss": 0.052, "step": 11062 }, { "epoch": 1.85, "grad_norm": 0.5770471096038818, "learning_rate": 6.763883961945536e-06, "loss": 0.0627, "step": 11063 }, { "epoch": 1.85, "grad_norm": 0.3374519646167755, "learning_rate": 6.762175459951165e-06, "loss": 0.039, "step": 11064 }, { "epoch": 1.85, "grad_norm": 0.3363606631755829, "learning_rate": 6.760467063530165e-06, "loss": 0.0388, "step": 11065 }, { "epoch": 1.85, "grad_norm": 0.4184144139289856, "learning_rate": 6.758758772738246e-06, "loss": 0.0613, "step": 11066 }, { "epoch": 1.85, "grad_norm": 0.36388927698135376, "learning_rate": 6.757050587631109e-06, "loss": 0.0513, "step": 11067 }, { "epoch": 1.85, "grad_norm": 0.4162799119949341, "learning_rate": 6.755342508264442e-06, "loss": 0.0416, "step": 11068 }, { "epoch": 1.85, "grad_norm": 0.5284202098846436, "learning_rate": 6.7536345346939505e-06, "loss": 0.061, "step": 11069 }, { "epoch": 1.85, "grad_norm": 0.3285086154937744, "learning_rate": 6.7519266669753166e-06, "loss": 0.046, "step": 11070 }, { "epoch": 1.85, "grad_norm": 0.46124711632728577, "learning_rate": 6.7502189051642385e-06, "loss": 0.0434, "step": 11071 }, { "epoch": 1.85, "grad_norm": 0.3983430564403534, "learning_rate": 6.74851124931639e-06, "loss": 0.0533, "step": 11072 }, { "epoch": 1.85, "grad_norm": 0.6670066118240356, "learning_rate": 6.746803699487451e-06, "loss": 0.056, "step": 11073 }, { "epoch": 1.85, "grad_norm": 0.3566749691963196, "learning_rate": 6.745096255733107e-06, "loss": 0.0411, "step": 11074 }, { "epoch": 1.85, "grad_norm": 0.43852880597114563, "learning_rate": 6.743388918109026e-06, "loss": 0.0531, "step": 11075 }, { "epoch": 1.85, "grad_norm": 0.5880138278007507, "learning_rate": 6.741681686670873e-06, "loss": 0.0406, "step": 11076 }, { "epoch": 1.85, "grad_norm": 0.4174428880214691, "learning_rate": 6.7399745614743226e-06, "loss": 0.0523, "step": 11077 }, { "epoch": 1.85, "grad_norm": 0.23720715939998627, "learning_rate": 6.738267542575038e-06, "loss": 0.023, "step": 11078 }, { "epoch": 1.85, "grad_norm": 0.5392698049545288, "learning_rate": 6.73656063002867e-06, "loss": 0.0564, "step": 11079 }, { "epoch": 1.85, "grad_norm": 0.37233662605285645, "learning_rate": 6.734853823890881e-06, "loss": 0.0596, "step": 11080 }, { "epoch": 1.85, "grad_norm": 0.5839314460754395, "learning_rate": 6.733147124217326e-06, "loss": 0.061, "step": 11081 }, { "epoch": 1.85, "grad_norm": 0.30357077717781067, "learning_rate": 6.731440531063646e-06, "loss": 0.0476, "step": 11082 }, { "epoch": 1.85, "grad_norm": 0.3186279535293579, "learning_rate": 6.729734044485497e-06, "loss": 0.0453, "step": 11083 }, { "epoch": 1.85, "grad_norm": 0.6301882266998291, "learning_rate": 6.72802766453851e-06, "loss": 0.0494, "step": 11084 }, { "epoch": 1.85, "grad_norm": 0.42469650506973267, "learning_rate": 6.726321391278332e-06, "loss": 0.0503, "step": 11085 }, { "epoch": 1.85, "grad_norm": 0.43023982644081116, "learning_rate": 6.724615224760597e-06, "loss": 0.0454, "step": 11086 }, { "epoch": 1.85, "grad_norm": 0.39687538146972656, "learning_rate": 6.722909165040932e-06, "loss": 0.0349, "step": 11087 }, { "epoch": 1.85, "grad_norm": 0.24447430670261383, "learning_rate": 6.72120321217497e-06, "loss": 0.0261, "step": 11088 }, { "epoch": 1.85, "grad_norm": 0.42896828055381775, "learning_rate": 6.719497366218335e-06, "loss": 0.0484, "step": 11089 }, { "epoch": 1.85, "grad_norm": 0.3829186260700226, "learning_rate": 6.717791627226643e-06, "loss": 0.0472, "step": 11090 }, { "epoch": 1.85, "grad_norm": 0.3711329400539398, "learning_rate": 6.716085995255521e-06, "loss": 0.0497, "step": 11091 }, { "epoch": 1.86, "grad_norm": 0.40391451120376587, "learning_rate": 6.714380470360578e-06, "loss": 0.0523, "step": 11092 }, { "epoch": 1.86, "grad_norm": 0.37405991554260254, "learning_rate": 6.7126750525974215e-06, "loss": 0.0344, "step": 11093 }, { "epoch": 1.86, "grad_norm": 0.47718581557273865, "learning_rate": 6.710969742021667e-06, "loss": 0.052, "step": 11094 }, { "epoch": 1.86, "grad_norm": 0.49613168835639954, "learning_rate": 6.709264538688914e-06, "loss": 0.0381, "step": 11095 }, { "epoch": 1.86, "grad_norm": 0.3130037486553192, "learning_rate": 6.707559442654758e-06, "loss": 0.0478, "step": 11096 }, { "epoch": 1.86, "grad_norm": 0.4392985701560974, "learning_rate": 6.705854453974804e-06, "loss": 0.0408, "step": 11097 }, { "epoch": 1.86, "grad_norm": 0.42889100313186646, "learning_rate": 6.704149572704639e-06, "loss": 0.0558, "step": 11098 }, { "epoch": 1.86, "grad_norm": 0.35586947202682495, "learning_rate": 6.7024447988998595e-06, "loss": 0.0468, "step": 11099 }, { "epoch": 1.86, "grad_norm": 0.3219049870967865, "learning_rate": 6.7007401326160474e-06, "loss": 0.0371, "step": 11100 }, { "epoch": 1.86, "grad_norm": 0.3755336105823517, "learning_rate": 6.699035573908783e-06, "loss": 0.0397, "step": 11101 }, { "epoch": 1.86, "grad_norm": 0.45879673957824707, "learning_rate": 6.697331122833653e-06, "loss": 0.0509, "step": 11102 }, { "epoch": 1.86, "grad_norm": 0.5319411754608154, "learning_rate": 6.695626779446227e-06, "loss": 0.0494, "step": 11103 }, { "epoch": 1.86, "grad_norm": 0.29637032747268677, "learning_rate": 6.693922543802077e-06, "loss": 0.0434, "step": 11104 }, { "epoch": 1.86, "grad_norm": 0.345211923122406, "learning_rate": 6.692218415956776e-06, "loss": 0.0418, "step": 11105 }, { "epoch": 1.86, "grad_norm": 0.4082459509372711, "learning_rate": 6.690514395965888e-06, "loss": 0.0673, "step": 11106 }, { "epoch": 1.86, "grad_norm": 0.4644765257835388, "learning_rate": 6.688810483884971e-06, "loss": 0.0361, "step": 11107 }, { "epoch": 1.86, "grad_norm": 0.518322765827179, "learning_rate": 6.687106679769588e-06, "loss": 0.0544, "step": 11108 }, { "epoch": 1.86, "grad_norm": 0.4878983795642853, "learning_rate": 6.6854029836752935e-06, "loss": 0.056, "step": 11109 }, { "epoch": 1.86, "grad_norm": 0.4784248471260071, "learning_rate": 6.6836993956576335e-06, "loss": 0.0773, "step": 11110 }, { "epoch": 1.86, "grad_norm": 0.45728909969329834, "learning_rate": 6.681995915772162e-06, "loss": 0.0617, "step": 11111 }, { "epoch": 1.86, "grad_norm": 0.5758048892021179, "learning_rate": 6.680292544074418e-06, "loss": 0.0517, "step": 11112 }, { "epoch": 1.86, "grad_norm": 0.27706804871559143, "learning_rate": 6.678589280619945e-06, "loss": 0.0321, "step": 11113 }, { "epoch": 1.86, "grad_norm": 0.3827618658542633, "learning_rate": 6.6768861254642816e-06, "loss": 0.0381, "step": 11114 }, { "epoch": 1.86, "grad_norm": 0.3233520984649658, "learning_rate": 6.675183078662955e-06, "loss": 0.0476, "step": 11115 }, { "epoch": 1.86, "grad_norm": 0.44181424379348755, "learning_rate": 6.673480140271503e-06, "loss": 0.0372, "step": 11116 }, { "epoch": 1.86, "grad_norm": 0.37003201246261597, "learning_rate": 6.6717773103454474e-06, "loss": 0.0384, "step": 11117 }, { "epoch": 1.86, "grad_norm": 0.4699283540248871, "learning_rate": 6.6700745889403095e-06, "loss": 0.0453, "step": 11118 }, { "epoch": 1.86, "grad_norm": 0.5077916979789734, "learning_rate": 6.6683719761116165e-06, "loss": 0.0498, "step": 11119 }, { "epoch": 1.86, "grad_norm": 0.38183510303497314, "learning_rate": 6.666669471914875e-06, "loss": 0.0453, "step": 11120 }, { "epoch": 1.86, "grad_norm": 0.40929874777793884, "learning_rate": 6.664967076405601e-06, "loss": 0.0642, "step": 11121 }, { "epoch": 1.86, "grad_norm": 0.3927317261695862, "learning_rate": 6.663264789639307e-06, "loss": 0.0495, "step": 11122 }, { "epoch": 1.86, "grad_norm": 0.3775467574596405, "learning_rate": 6.6615626116714946e-06, "loss": 0.0495, "step": 11123 }, { "epoch": 1.86, "grad_norm": 0.4746238589286804, "learning_rate": 6.659860542557661e-06, "loss": 0.0592, "step": 11124 }, { "epoch": 1.86, "grad_norm": 0.5720437169075012, "learning_rate": 6.6581585823533135e-06, "loss": 0.0742, "step": 11125 }, { "epoch": 1.86, "grad_norm": 0.5072489976882935, "learning_rate": 6.656456731113938e-06, "loss": 0.0699, "step": 11126 }, { "epoch": 1.86, "grad_norm": 0.6065875291824341, "learning_rate": 6.654754988895034e-06, "loss": 0.0469, "step": 11127 }, { "epoch": 1.86, "grad_norm": 0.41449418663978577, "learning_rate": 6.653053355752084e-06, "loss": 0.0534, "step": 11128 }, { "epoch": 1.86, "grad_norm": 0.45909714698791504, "learning_rate": 6.65135183174057e-06, "loss": 0.0467, "step": 11129 }, { "epoch": 1.86, "grad_norm": 0.5141977667808533, "learning_rate": 6.649650416915978e-06, "loss": 0.0544, "step": 11130 }, { "epoch": 1.86, "grad_norm": 0.35403820872306824, "learning_rate": 6.647949111333784e-06, "loss": 0.0385, "step": 11131 }, { "epoch": 1.86, "grad_norm": 0.4352714419364929, "learning_rate": 6.646247915049453e-06, "loss": 0.0469, "step": 11132 }, { "epoch": 1.86, "grad_norm": 0.45556509494781494, "learning_rate": 6.644546828118463e-06, "loss": 0.0431, "step": 11133 }, { "epoch": 1.86, "grad_norm": 0.4258548617362976, "learning_rate": 6.64284585059628e-06, "loss": 0.0696, "step": 11134 }, { "epoch": 1.86, "grad_norm": 0.5488852262496948, "learning_rate": 6.64114498253836e-06, "loss": 0.0444, "step": 11135 }, { "epoch": 1.86, "grad_norm": 0.3902425765991211, "learning_rate": 6.639444224000169e-06, "loss": 0.036, "step": 11136 }, { "epoch": 1.86, "grad_norm": 0.5176177024841309, "learning_rate": 6.637743575037161e-06, "loss": 0.0427, "step": 11137 }, { "epoch": 1.86, "grad_norm": 0.33780911564826965, "learning_rate": 6.636043035704782e-06, "loss": 0.0379, "step": 11138 }, { "epoch": 1.86, "grad_norm": 0.38270804286003113, "learning_rate": 6.634342606058488e-06, "loss": 0.0492, "step": 11139 }, { "epoch": 1.86, "grad_norm": 0.3554099500179291, "learning_rate": 6.632642286153718e-06, "loss": 0.0506, "step": 11140 }, { "epoch": 1.86, "grad_norm": 0.463523805141449, "learning_rate": 6.630942076045916e-06, "loss": 0.0621, "step": 11141 }, { "epoch": 1.86, "grad_norm": 0.5973471999168396, "learning_rate": 6.629241975790523e-06, "loss": 0.065, "step": 11142 }, { "epoch": 1.86, "grad_norm": 0.32568708062171936, "learning_rate": 6.6275419854429625e-06, "loss": 0.0419, "step": 11143 }, { "epoch": 1.86, "grad_norm": 0.44940465688705444, "learning_rate": 6.625842105058676e-06, "loss": 0.0574, "step": 11144 }, { "epoch": 1.86, "grad_norm": 0.32155486941337585, "learning_rate": 6.624142334693084e-06, "loss": 0.0414, "step": 11145 }, { "epoch": 1.86, "grad_norm": 0.5488505959510803, "learning_rate": 6.622442674401608e-06, "loss": 0.0474, "step": 11146 }, { "epoch": 1.86, "grad_norm": 0.6024429202079773, "learning_rate": 6.620743124239676e-06, "loss": 0.0491, "step": 11147 }, { "epoch": 1.86, "grad_norm": 0.3795081377029419, "learning_rate": 6.6190436842626955e-06, "loss": 0.0414, "step": 11148 }, { "epoch": 1.86, "grad_norm": 0.4340977072715759, "learning_rate": 6.617344354526081e-06, "loss": 0.0534, "step": 11149 }, { "epoch": 1.86, "grad_norm": 0.36735689640045166, "learning_rate": 6.615645135085246e-06, "loss": 0.0418, "step": 11150 }, { "epoch": 1.87, "grad_norm": 0.34298238158226013, "learning_rate": 6.613946025995592e-06, "loss": 0.0525, "step": 11151 }, { "epoch": 1.87, "grad_norm": 0.5064518451690674, "learning_rate": 6.6122470273125175e-06, "loss": 0.052, "step": 11152 }, { "epoch": 1.87, "grad_norm": 0.3674223721027374, "learning_rate": 6.6105481390914235e-06, "loss": 0.0522, "step": 11153 }, { "epoch": 1.87, "grad_norm": 0.3289492726325989, "learning_rate": 6.608849361387704e-06, "loss": 0.0441, "step": 11154 }, { "epoch": 1.87, "grad_norm": 0.42927759885787964, "learning_rate": 6.6071506942567544e-06, "loss": 0.0533, "step": 11155 }, { "epoch": 1.87, "grad_norm": 0.4243925213813782, "learning_rate": 6.605452137753956e-06, "loss": 0.0452, "step": 11156 }, { "epoch": 1.87, "grad_norm": 0.40040016174316406, "learning_rate": 6.603753691934692e-06, "loss": 0.0407, "step": 11157 }, { "epoch": 1.87, "grad_norm": 0.4796661138534546, "learning_rate": 6.602055356854349e-06, "loss": 0.0578, "step": 11158 }, { "epoch": 1.87, "grad_norm": 0.3833586573600769, "learning_rate": 6.600357132568298e-06, "loss": 0.0595, "step": 11159 }, { "epoch": 1.87, "grad_norm": 0.3749602437019348, "learning_rate": 6.59865901913191e-06, "loss": 0.0446, "step": 11160 }, { "epoch": 1.87, "grad_norm": 0.3562239110469818, "learning_rate": 6.596961016600558e-06, "loss": 0.0321, "step": 11161 }, { "epoch": 1.87, "grad_norm": 0.3839608430862427, "learning_rate": 6.59526312502961e-06, "loss": 0.0559, "step": 11162 }, { "epoch": 1.87, "grad_norm": 0.41879257559776306, "learning_rate": 6.593565344474418e-06, "loss": 0.04, "step": 11163 }, { "epoch": 1.87, "grad_norm": 0.36430656909942627, "learning_rate": 6.591867674990351e-06, "loss": 0.0464, "step": 11164 }, { "epoch": 1.87, "grad_norm": 0.2893822491168976, "learning_rate": 6.5901701166327595e-06, "loss": 0.0491, "step": 11165 }, { "epoch": 1.87, "grad_norm": 0.6426834464073181, "learning_rate": 6.588472669456991e-06, "loss": 0.0605, "step": 11166 }, { "epoch": 1.87, "grad_norm": 0.6200670599937439, "learning_rate": 6.586775333518401e-06, "loss": 0.0557, "step": 11167 }, { "epoch": 1.87, "grad_norm": 0.46606799960136414, "learning_rate": 6.5850781088723224e-06, "loss": 0.0377, "step": 11168 }, { "epoch": 1.87, "grad_norm": 0.3495882749557495, "learning_rate": 6.5833809955741055e-06, "loss": 0.0541, "step": 11169 }, { "epoch": 1.87, "grad_norm": 0.31042659282684326, "learning_rate": 6.581683993679084e-06, "loss": 0.0301, "step": 11170 }, { "epoch": 1.87, "grad_norm": 0.3858557939529419, "learning_rate": 6.579987103242587e-06, "loss": 0.0409, "step": 11171 }, { "epoch": 1.87, "grad_norm": 0.28559568524360657, "learning_rate": 6.578290324319949e-06, "loss": 0.0344, "step": 11172 }, { "epoch": 1.87, "grad_norm": 0.34930065274238586, "learning_rate": 6.57659365696649e-06, "loss": 0.0435, "step": 11173 }, { "epoch": 1.87, "grad_norm": 0.7212622165679932, "learning_rate": 6.574897101237535e-06, "loss": 0.0615, "step": 11174 }, { "epoch": 1.87, "grad_norm": 0.4295841455459595, "learning_rate": 6.573200657188406e-06, "loss": 0.0813, "step": 11175 }, { "epoch": 1.87, "grad_norm": 0.3741861581802368, "learning_rate": 6.571504324874411e-06, "loss": 0.0457, "step": 11176 }, { "epoch": 1.87, "grad_norm": 0.43910136818885803, "learning_rate": 6.569808104350863e-06, "loss": 0.0607, "step": 11177 }, { "epoch": 1.87, "grad_norm": 0.40875428915023804, "learning_rate": 6.568111995673075e-06, "loss": 0.0633, "step": 11178 }, { "epoch": 1.87, "grad_norm": 0.6395532488822937, "learning_rate": 6.566415998896346e-06, "loss": 0.0615, "step": 11179 }, { "epoch": 1.87, "grad_norm": 0.3799339830875397, "learning_rate": 6.564720114075972e-06, "loss": 0.0658, "step": 11180 }, { "epoch": 1.87, "grad_norm": 0.5146073698997498, "learning_rate": 6.5630243412672565e-06, "loss": 0.0739, "step": 11181 }, { "epoch": 1.87, "grad_norm": 0.3435891568660736, "learning_rate": 6.561328680525487e-06, "loss": 0.044, "step": 11182 }, { "epoch": 1.87, "grad_norm": 0.3797685205936432, "learning_rate": 6.55963313190596e-06, "loss": 0.0418, "step": 11183 }, { "epoch": 1.87, "grad_norm": 0.41761934757232666, "learning_rate": 6.557937695463954e-06, "loss": 0.0373, "step": 11184 }, { "epoch": 1.87, "grad_norm": 0.37316012382507324, "learning_rate": 6.556242371254753e-06, "loss": 0.0517, "step": 11185 }, { "epoch": 1.87, "grad_norm": 0.48339521884918213, "learning_rate": 6.554547159333637e-06, "loss": 0.0587, "step": 11186 }, { "epoch": 1.87, "grad_norm": 0.4894671142101288, "learning_rate": 6.552852059755881e-06, "loss": 0.0547, "step": 11187 }, { "epoch": 1.87, "grad_norm": 0.39907336235046387, "learning_rate": 6.551157072576748e-06, "loss": 0.0516, "step": 11188 }, { "epoch": 1.87, "grad_norm": 0.4311957359313965, "learning_rate": 6.549462197851515e-06, "loss": 0.0444, "step": 11189 }, { "epoch": 1.87, "grad_norm": 0.3449633717536926, "learning_rate": 6.547767435635442e-06, "loss": 0.0357, "step": 11190 }, { "epoch": 1.87, "grad_norm": 0.5673086643218994, "learning_rate": 6.546072785983786e-06, "loss": 0.0511, "step": 11191 }, { "epoch": 1.87, "grad_norm": 0.5071749687194824, "learning_rate": 6.544378248951808e-06, "loss": 0.0493, "step": 11192 }, { "epoch": 1.87, "grad_norm": 0.5142666101455688, "learning_rate": 6.5426838245947576e-06, "loss": 0.0491, "step": 11193 }, { "epoch": 1.87, "grad_norm": 0.7065287828445435, "learning_rate": 6.540989512967882e-06, "loss": 0.0517, "step": 11194 }, { "epoch": 1.87, "grad_norm": 0.3832065463066101, "learning_rate": 6.539295314126432e-06, "loss": 0.0497, "step": 11195 }, { "epoch": 1.87, "grad_norm": 0.526035487651825, "learning_rate": 6.537601228125643e-06, "loss": 0.0535, "step": 11196 }, { "epoch": 1.87, "grad_norm": 0.5153618454933167, "learning_rate": 6.535907255020757e-06, "loss": 0.0655, "step": 11197 }, { "epoch": 1.87, "grad_norm": 0.6046105623245239, "learning_rate": 6.5342133948670095e-06, "loss": 0.0673, "step": 11198 }, { "epoch": 1.87, "grad_norm": 0.3933302164077759, "learning_rate": 6.532519647719624e-06, "loss": 0.0534, "step": 11199 }, { "epoch": 1.87, "grad_norm": 0.4646513760089874, "learning_rate": 6.530826013633834e-06, "loss": 0.0566, "step": 11200 }, { "epoch": 1.87, "grad_norm": 0.5199809074401855, "learning_rate": 6.5291324926648595e-06, "loss": 0.0505, "step": 11201 }, { "epoch": 1.87, "grad_norm": 0.27388525009155273, "learning_rate": 6.527439084867917e-06, "loss": 0.0305, "step": 11202 }, { "epoch": 1.87, "grad_norm": 0.5837263464927673, "learning_rate": 6.525745790298231e-06, "loss": 0.0679, "step": 11203 }, { "epoch": 1.87, "grad_norm": 0.42821216583251953, "learning_rate": 6.524052609011007e-06, "loss": 0.045, "step": 11204 }, { "epoch": 1.87, "grad_norm": 0.2694281339645386, "learning_rate": 6.522359541061451e-06, "loss": 0.0307, "step": 11205 }, { "epoch": 1.87, "grad_norm": 0.38700586557388306, "learning_rate": 6.520666586504775e-06, "loss": 0.0419, "step": 11206 }, { "epoch": 1.87, "grad_norm": 0.42614778876304626, "learning_rate": 6.518973745396176e-06, "loss": 0.0361, "step": 11207 }, { "epoch": 1.87, "grad_norm": 0.30678629875183105, "learning_rate": 6.517281017790849e-06, "loss": 0.0406, "step": 11208 }, { "epoch": 1.87, "grad_norm": 0.3464311361312866, "learning_rate": 6.515588403743988e-06, "loss": 0.0282, "step": 11209 }, { "epoch": 1.87, "grad_norm": 0.364872545003891, "learning_rate": 6.513895903310787e-06, "loss": 0.0427, "step": 11210 }, { "epoch": 1.88, "grad_norm": 0.5782365202903748, "learning_rate": 6.512203516546432e-06, "loss": 0.0592, "step": 11211 }, { "epoch": 1.88, "grad_norm": 0.5576833486557007, "learning_rate": 6.510511243506101e-06, "loss": 0.0535, "step": 11212 }, { "epoch": 1.88, "grad_norm": 0.32986316084861755, "learning_rate": 6.5088190842449725e-06, "loss": 0.0535, "step": 11213 }, { "epoch": 1.88, "grad_norm": 0.38679924607276917, "learning_rate": 6.507127038818229e-06, "loss": 0.0478, "step": 11214 }, { "epoch": 1.88, "grad_norm": 0.498339980840683, "learning_rate": 6.505435107281036e-06, "loss": 0.0462, "step": 11215 }, { "epoch": 1.88, "grad_norm": 0.4564286172389984, "learning_rate": 6.503743289688558e-06, "loss": 0.0412, "step": 11216 }, { "epoch": 1.88, "grad_norm": 0.332961767911911, "learning_rate": 6.502051586095964e-06, "loss": 0.039, "step": 11217 }, { "epoch": 1.88, "grad_norm": 0.4792552590370178, "learning_rate": 6.5003599965584156e-06, "loss": 0.0577, "step": 11218 }, { "epoch": 1.88, "grad_norm": 0.37188389897346497, "learning_rate": 6.498668521131062e-06, "loss": 0.0436, "step": 11219 }, { "epoch": 1.88, "grad_norm": 0.294233113527298, "learning_rate": 6.496977159869065e-06, "loss": 0.0436, "step": 11220 }, { "epoch": 1.88, "grad_norm": 0.5198498368263245, "learning_rate": 6.495285912827565e-06, "loss": 0.0488, "step": 11221 }, { "epoch": 1.88, "grad_norm": 0.45721733570098877, "learning_rate": 6.493594780061709e-06, "loss": 0.0327, "step": 11222 }, { "epoch": 1.88, "grad_norm": 0.3576944172382355, "learning_rate": 6.491903761626648e-06, "loss": 0.0465, "step": 11223 }, { "epoch": 1.88, "grad_norm": 0.5929362773895264, "learning_rate": 6.490212857577505e-06, "loss": 0.0512, "step": 11224 }, { "epoch": 1.88, "grad_norm": 0.6056092977523804, "learning_rate": 6.488522067969425e-06, "loss": 0.0596, "step": 11225 }, { "epoch": 1.88, "grad_norm": 0.2535780370235443, "learning_rate": 6.486831392857537e-06, "loss": 0.0256, "step": 11226 }, { "epoch": 1.88, "grad_norm": 0.3655233681201935, "learning_rate": 6.485140832296961e-06, "loss": 0.0462, "step": 11227 }, { "epoch": 1.88, "grad_norm": 0.6029447317123413, "learning_rate": 6.483450386342827e-06, "loss": 0.0668, "step": 11228 }, { "epoch": 1.88, "grad_norm": 0.44377779960632324, "learning_rate": 6.481760055050253e-06, "loss": 0.0491, "step": 11229 }, { "epoch": 1.88, "grad_norm": 0.4045116603374481, "learning_rate": 6.480069838474348e-06, "loss": 0.0405, "step": 11230 }, { "epoch": 1.88, "grad_norm": 0.40843114256858826, "learning_rate": 6.4783797366702325e-06, "loss": 0.0385, "step": 11231 }, { "epoch": 1.88, "grad_norm": 0.47376394271850586, "learning_rate": 6.4766897496930095e-06, "loss": 0.0514, "step": 11232 }, { "epoch": 1.88, "grad_norm": 0.3415290415287018, "learning_rate": 6.474999877597781e-06, "loss": 0.0583, "step": 11233 }, { "epoch": 1.88, "grad_norm": 0.3440561890602112, "learning_rate": 6.473310120439656e-06, "loss": 0.0433, "step": 11234 }, { "epoch": 1.88, "grad_norm": 0.4434778392314911, "learning_rate": 6.471620478273724e-06, "loss": 0.0472, "step": 11235 }, { "epoch": 1.88, "grad_norm": 0.5346097946166992, "learning_rate": 6.469930951155077e-06, "loss": 0.047, "step": 11236 }, { "epoch": 1.88, "grad_norm": 0.691338300704956, "learning_rate": 6.468241539138808e-06, "loss": 0.05, "step": 11237 }, { "epoch": 1.88, "grad_norm": 0.46422871947288513, "learning_rate": 6.466552242279999e-06, "loss": 0.0563, "step": 11238 }, { "epoch": 1.88, "grad_norm": 0.4170847535133362, "learning_rate": 6.464863060633738e-06, "loss": 0.0541, "step": 11239 }, { "epoch": 1.88, "grad_norm": 0.5105420351028442, "learning_rate": 6.463173994255096e-06, "loss": 0.0442, "step": 11240 }, { "epoch": 1.88, "grad_norm": 0.39191851019859314, "learning_rate": 6.461485043199149e-06, "loss": 0.045, "step": 11241 }, { "epoch": 1.88, "grad_norm": 0.3075186014175415, "learning_rate": 6.459796207520973e-06, "loss": 0.0436, "step": 11242 }, { "epoch": 1.88, "grad_norm": 0.3894895911216736, "learning_rate": 6.4581074872756285e-06, "loss": 0.0489, "step": 11243 }, { "epoch": 1.88, "grad_norm": 0.39398959279060364, "learning_rate": 6.456418882518176e-06, "loss": 0.0638, "step": 11244 }, { "epoch": 1.88, "grad_norm": 0.3706938624382019, "learning_rate": 6.454730393303681e-06, "loss": 0.0478, "step": 11245 }, { "epoch": 1.88, "grad_norm": 0.47882089018821716, "learning_rate": 6.453042019687199e-06, "loss": 0.055, "step": 11246 }, { "epoch": 1.88, "grad_norm": 0.2975010275840759, "learning_rate": 6.451353761723773e-06, "loss": 0.0392, "step": 11247 }, { "epoch": 1.88, "grad_norm": 0.3233420252799988, "learning_rate": 6.449665619468461e-06, "loss": 0.0439, "step": 11248 }, { "epoch": 1.88, "grad_norm": 0.7176980972290039, "learning_rate": 6.447977592976302e-06, "loss": 0.0541, "step": 11249 }, { "epoch": 1.88, "grad_norm": 0.708710253238678, "learning_rate": 6.446289682302333e-06, "loss": 0.0696, "step": 11250 }, { "epoch": 1.88, "grad_norm": 0.4374886453151703, "learning_rate": 6.4446018875016e-06, "loss": 0.056, "step": 11251 }, { "epoch": 1.88, "grad_norm": 0.3589118421077728, "learning_rate": 6.442914208629125e-06, "loss": 0.0471, "step": 11252 }, { "epoch": 1.88, "grad_norm": 0.3078635036945343, "learning_rate": 6.4412266457399445e-06, "loss": 0.0287, "step": 11253 }, { "epoch": 1.88, "grad_norm": 0.36252978444099426, "learning_rate": 6.439539198889082e-06, "loss": 0.0487, "step": 11254 }, { "epoch": 1.88, "grad_norm": 0.34069639444351196, "learning_rate": 6.437851868131555e-06, "loss": 0.0442, "step": 11255 }, { "epoch": 1.88, "grad_norm": 0.4050588309764862, "learning_rate": 6.436164653522388e-06, "loss": 0.0542, "step": 11256 }, { "epoch": 1.88, "grad_norm": 0.40725645422935486, "learning_rate": 6.434477555116587e-06, "loss": 0.053, "step": 11257 }, { "epoch": 1.88, "grad_norm": 0.39870086312294006, "learning_rate": 6.4327905729691655e-06, "loss": 0.0448, "step": 11258 }, { "epoch": 1.88, "grad_norm": 0.5149367451667786, "learning_rate": 6.431103707135133e-06, "loss": 0.0723, "step": 11259 }, { "epoch": 1.88, "grad_norm": 0.4271264970302582, "learning_rate": 6.4294169576694874e-06, "loss": 0.0476, "step": 11260 }, { "epoch": 1.88, "grad_norm": 0.2995608448982239, "learning_rate": 6.427730324627226e-06, "loss": 0.0349, "step": 11261 }, { "epoch": 1.88, "grad_norm": 0.5285981297492981, "learning_rate": 6.426043808063352e-06, "loss": 0.061, "step": 11262 }, { "epoch": 1.88, "grad_norm": 0.35376280546188354, "learning_rate": 6.424357408032848e-06, "loss": 0.0567, "step": 11263 }, { "epoch": 1.88, "grad_norm": 0.7134750485420227, "learning_rate": 6.4226711245907015e-06, "loss": 0.0607, "step": 11264 }, { "epoch": 1.88, "grad_norm": 0.40847447514533997, "learning_rate": 6.4209849577919e-06, "loss": 0.045, "step": 11265 }, { "epoch": 1.88, "grad_norm": 0.30971086025238037, "learning_rate": 6.4192989076914195e-06, "loss": 0.0458, "step": 11266 }, { "epoch": 1.88, "grad_norm": 0.30492913722991943, "learning_rate": 6.417612974344242e-06, "loss": 0.0345, "step": 11267 }, { "epoch": 1.88, "grad_norm": 0.35256969928741455, "learning_rate": 6.415927157805333e-06, "loss": 0.056, "step": 11268 }, { "epoch": 1.88, "grad_norm": 0.40884047746658325, "learning_rate": 6.414241458129662e-06, "loss": 0.0475, "step": 11269 }, { "epoch": 1.88, "grad_norm": 0.40194493532180786, "learning_rate": 6.412555875372195e-06, "loss": 0.0464, "step": 11270 }, { "epoch": 1.89, "grad_norm": 0.4403323531150818, "learning_rate": 6.410870409587895e-06, "loss": 0.0445, "step": 11271 }, { "epoch": 1.89, "grad_norm": 0.24520248174667358, "learning_rate": 6.409185060831711e-06, "loss": 0.0302, "step": 11272 }, { "epoch": 1.89, "grad_norm": 0.4227312207221985, "learning_rate": 6.407499829158601e-06, "loss": 0.0311, "step": 11273 }, { "epoch": 1.89, "grad_norm": 0.5528237819671631, "learning_rate": 6.405814714623517e-06, "loss": 0.05, "step": 11274 }, { "epoch": 1.89, "grad_norm": 0.4365263283252716, "learning_rate": 6.404129717281397e-06, "loss": 0.0412, "step": 11275 }, { "epoch": 1.89, "grad_norm": 0.4117507338523865, "learning_rate": 6.402444837187191e-06, "loss": 0.0494, "step": 11276 }, { "epoch": 1.89, "grad_norm": 0.41957810521125793, "learning_rate": 6.400760074395829e-06, "loss": 0.0367, "step": 11277 }, { "epoch": 1.89, "grad_norm": 0.6018229126930237, "learning_rate": 6.399075428962246e-06, "loss": 0.0572, "step": 11278 }, { "epoch": 1.89, "grad_norm": 0.43058133125305176, "learning_rate": 6.397390900941378e-06, "loss": 0.0526, "step": 11279 }, { "epoch": 1.89, "grad_norm": 0.38210126757621765, "learning_rate": 6.395706490388143e-06, "loss": 0.0565, "step": 11280 }, { "epoch": 1.89, "grad_norm": 0.44983914494514465, "learning_rate": 6.39402219735747e-06, "loss": 0.0714, "step": 11281 }, { "epoch": 1.89, "grad_norm": 0.4067235589027405, "learning_rate": 6.392338021904277e-06, "loss": 0.0504, "step": 11282 }, { "epoch": 1.89, "grad_norm": 0.3890562355518341, "learning_rate": 6.390653964083472e-06, "loss": 0.0412, "step": 11283 }, { "epoch": 1.89, "grad_norm": 0.3339889347553253, "learning_rate": 6.388970023949974e-06, "loss": 0.0512, "step": 11284 }, { "epoch": 1.89, "grad_norm": 0.5527865886688232, "learning_rate": 6.387286201558685e-06, "loss": 0.0544, "step": 11285 }, { "epoch": 1.89, "grad_norm": 0.44014236330986023, "learning_rate": 6.385602496964508e-06, "loss": 0.0382, "step": 11286 }, { "epoch": 1.89, "grad_norm": 0.4462641775608063, "learning_rate": 6.383918910222347e-06, "loss": 0.0508, "step": 11287 }, { "epoch": 1.89, "grad_norm": 0.4044007956981659, "learning_rate": 6.382235441387094e-06, "loss": 0.0323, "step": 11288 }, { "epoch": 1.89, "grad_norm": 0.2989087402820587, "learning_rate": 6.380552090513637e-06, "loss": 0.0446, "step": 11289 }, { "epoch": 1.89, "grad_norm": 0.36711153388023376, "learning_rate": 6.378868857656873e-06, "loss": 0.0521, "step": 11290 }, { "epoch": 1.89, "grad_norm": 0.862827479839325, "learning_rate": 6.377185742871676e-06, "loss": 0.0708, "step": 11291 }, { "epoch": 1.89, "grad_norm": 0.35593804717063904, "learning_rate": 6.375502746212936e-06, "loss": 0.0531, "step": 11292 }, { "epoch": 1.89, "grad_norm": 0.3772146701812744, "learning_rate": 6.373819867735522e-06, "loss": 0.032, "step": 11293 }, { "epoch": 1.89, "grad_norm": 0.3612079620361328, "learning_rate": 6.372137107494306e-06, "loss": 0.0378, "step": 11294 }, { "epoch": 1.89, "grad_norm": 0.402922660112381, "learning_rate": 6.370454465544163e-06, "loss": 0.0431, "step": 11295 }, { "epoch": 1.89, "grad_norm": 0.6324074268341064, "learning_rate": 6.368771941939954e-06, "loss": 0.0509, "step": 11296 }, { "epoch": 1.89, "grad_norm": 0.3879038393497467, "learning_rate": 6.367089536736535e-06, "loss": 0.0548, "step": 11297 }, { "epoch": 1.89, "grad_norm": 0.41846245527267456, "learning_rate": 6.365407249988771e-06, "loss": 0.0588, "step": 11298 }, { "epoch": 1.89, "grad_norm": 0.41285353899002075, "learning_rate": 6.363725081751512e-06, "loss": 0.0695, "step": 11299 }, { "epoch": 1.89, "grad_norm": 0.3968147933483124, "learning_rate": 6.362043032079604e-06, "loss": 0.0453, "step": 11300 }, { "epoch": 1.89, "grad_norm": 0.3963061571121216, "learning_rate": 6.360361101027896e-06, "loss": 0.0527, "step": 11301 }, { "epoch": 1.89, "grad_norm": 0.464665025472641, "learning_rate": 6.35867928865123e-06, "loss": 0.0405, "step": 11302 }, { "epoch": 1.89, "grad_norm": 0.9720841646194458, "learning_rate": 6.3569975950044394e-06, "loss": 0.0801, "step": 11303 }, { "epoch": 1.89, "grad_norm": 0.35721826553344727, "learning_rate": 6.355316020142365e-06, "loss": 0.0362, "step": 11304 }, { "epoch": 1.89, "grad_norm": 0.34386321902275085, "learning_rate": 6.353634564119828e-06, "loss": 0.0316, "step": 11305 }, { "epoch": 1.89, "grad_norm": 0.30411437153816223, "learning_rate": 6.35195322699166e-06, "loss": 0.0426, "step": 11306 }, { "epoch": 1.89, "grad_norm": 0.44022318720817566, "learning_rate": 6.350272008812686e-06, "loss": 0.0291, "step": 11307 }, { "epoch": 1.89, "grad_norm": 0.4070375859737396, "learning_rate": 6.3485909096377155e-06, "loss": 0.0543, "step": 11308 }, { "epoch": 1.89, "grad_norm": 0.5567353963851929, "learning_rate": 6.34690992952157e-06, "loss": 0.0659, "step": 11309 }, { "epoch": 1.89, "grad_norm": 0.36950817704200745, "learning_rate": 6.345229068519059e-06, "loss": 0.037, "step": 11310 }, { "epoch": 1.89, "grad_norm": 0.3787512183189392, "learning_rate": 6.3435483266849825e-06, "loss": 0.0364, "step": 11311 }, { "epoch": 1.89, "grad_norm": 0.3553471267223358, "learning_rate": 6.341867704074155e-06, "loss": 0.0376, "step": 11312 }, { "epoch": 1.89, "grad_norm": 0.37904614210128784, "learning_rate": 6.340187200741366e-06, "loss": 0.0471, "step": 11313 }, { "epoch": 1.89, "grad_norm": 0.4534050226211548, "learning_rate": 6.338506816741411e-06, "loss": 0.0603, "step": 11314 }, { "epoch": 1.89, "grad_norm": 0.3914039433002472, "learning_rate": 6.336826552129088e-06, "loss": 0.0609, "step": 11315 }, { "epoch": 1.89, "grad_norm": 0.4595789611339569, "learning_rate": 6.335146406959176e-06, "loss": 0.0517, "step": 11316 }, { "epoch": 1.89, "grad_norm": 0.5568411350250244, "learning_rate": 6.333466381286459e-06, "loss": 0.0491, "step": 11317 }, { "epoch": 1.89, "grad_norm": 0.4228285253047943, "learning_rate": 6.3317864751657265e-06, "loss": 0.0556, "step": 11318 }, { "epoch": 1.89, "grad_norm": 0.3659948408603668, "learning_rate": 6.3301066886517405e-06, "loss": 0.033, "step": 11319 }, { "epoch": 1.89, "grad_norm": 0.38674789667129517, "learning_rate": 6.328427021799281e-06, "loss": 0.0511, "step": 11320 }, { "epoch": 1.89, "grad_norm": 0.49680429697036743, "learning_rate": 6.326747474663114e-06, "loss": 0.042, "step": 11321 }, { "epoch": 1.89, "grad_norm": 0.4480942487716675, "learning_rate": 6.325068047297999e-06, "loss": 0.0495, "step": 11322 }, { "epoch": 1.89, "grad_norm": 0.8756257891654968, "learning_rate": 6.3233887397587046e-06, "loss": 0.0443, "step": 11323 }, { "epoch": 1.89, "grad_norm": 0.32139700651168823, "learning_rate": 6.321709552099982e-06, "loss": 0.0367, "step": 11324 }, { "epoch": 1.89, "grad_norm": 0.4836667776107788, "learning_rate": 6.320030484376576e-06, "loss": 0.0658, "step": 11325 }, { "epoch": 1.89, "grad_norm": 0.3967192471027374, "learning_rate": 6.318351536643246e-06, "loss": 0.0483, "step": 11326 }, { "epoch": 1.89, "grad_norm": 0.5174278020858765, "learning_rate": 6.316672708954732e-06, "loss": 0.0751, "step": 11327 }, { "epoch": 1.89, "grad_norm": 0.6268552541732788, "learning_rate": 6.314994001365772e-06, "loss": 0.0677, "step": 11328 }, { "epoch": 1.89, "grad_norm": 0.5012263059616089, "learning_rate": 6.3133154139311044e-06, "loss": 0.0481, "step": 11329 }, { "epoch": 1.89, "grad_norm": 0.41366830468177795, "learning_rate": 6.3116369467054665e-06, "loss": 0.0491, "step": 11330 }, { "epoch": 1.9, "grad_norm": 0.5306727886199951, "learning_rate": 6.3099585997435755e-06, "loss": 0.0829, "step": 11331 }, { "epoch": 1.9, "grad_norm": 0.2971252202987671, "learning_rate": 6.308280373100168e-06, "loss": 0.0348, "step": 11332 }, { "epoch": 1.9, "grad_norm": 0.36441221833229065, "learning_rate": 6.3066022668299545e-06, "loss": 0.0441, "step": 11333 }, { "epoch": 1.9, "grad_norm": 0.3982226848602295, "learning_rate": 6.304924280987659e-06, "loss": 0.057, "step": 11334 }, { "epoch": 1.9, "grad_norm": 0.3461727797985077, "learning_rate": 6.303246415627992e-06, "loss": 0.0572, "step": 11335 }, { "epoch": 1.9, "grad_norm": 0.4837115705013275, "learning_rate": 6.301568670805662e-06, "loss": 0.057, "step": 11336 }, { "epoch": 1.9, "grad_norm": 0.3098103404045105, "learning_rate": 6.299891046575375e-06, "loss": 0.0423, "step": 11337 }, { "epoch": 1.9, "grad_norm": 0.288472443819046, "learning_rate": 6.298213542991833e-06, "loss": 0.0401, "step": 11338 }, { "epoch": 1.9, "grad_norm": 0.48058223724365234, "learning_rate": 6.296536160109728e-06, "loss": 0.0526, "step": 11339 }, { "epoch": 1.9, "grad_norm": 0.30087465047836304, "learning_rate": 6.294858897983758e-06, "loss": 0.0387, "step": 11340 }, { "epoch": 1.9, "grad_norm": 0.32935482263565063, "learning_rate": 6.293181756668613e-06, "loss": 0.0355, "step": 11341 }, { "epoch": 1.9, "grad_norm": 0.5339785218238831, "learning_rate": 6.291504736218972e-06, "loss": 0.0385, "step": 11342 }, { "epoch": 1.9, "grad_norm": 0.578453540802002, "learning_rate": 6.289827836689526e-06, "loss": 0.0531, "step": 11343 }, { "epoch": 1.9, "grad_norm": 0.5925120711326599, "learning_rate": 6.288151058134944e-06, "loss": 0.0595, "step": 11344 }, { "epoch": 1.9, "grad_norm": 0.6994328498840332, "learning_rate": 6.286474400609901e-06, "loss": 0.0517, "step": 11345 }, { "epoch": 1.9, "grad_norm": 0.448920339345932, "learning_rate": 6.284797864169072e-06, "loss": 0.0568, "step": 11346 }, { "epoch": 1.9, "grad_norm": 0.3468863368034363, "learning_rate": 6.283121448867115e-06, "loss": 0.0548, "step": 11347 }, { "epoch": 1.9, "grad_norm": 0.5663973689079285, "learning_rate": 6.281445154758701e-06, "loss": 0.0505, "step": 11348 }, { "epoch": 1.9, "grad_norm": 0.391477108001709, "learning_rate": 6.279768981898477e-06, "loss": 0.0323, "step": 11349 }, { "epoch": 1.9, "grad_norm": 0.4160030782222748, "learning_rate": 6.278092930341102e-06, "loss": 0.0495, "step": 11350 }, { "epoch": 1.9, "grad_norm": 0.7854399085044861, "learning_rate": 6.2764170001412305e-06, "loss": 0.0585, "step": 11351 }, { "epoch": 1.9, "grad_norm": 0.5169529914855957, "learning_rate": 6.274741191353502e-06, "loss": 0.0529, "step": 11352 }, { "epoch": 1.9, "grad_norm": 0.3324657082557678, "learning_rate": 6.273065504032556e-06, "loss": 0.0305, "step": 11353 }, { "epoch": 1.9, "grad_norm": 0.4691881835460663, "learning_rate": 6.271389938233037e-06, "loss": 0.0588, "step": 11354 }, { "epoch": 1.9, "grad_norm": 0.453145295381546, "learning_rate": 6.269714494009576e-06, "loss": 0.0412, "step": 11355 }, { "epoch": 1.9, "grad_norm": 0.3553142547607422, "learning_rate": 6.268039171416802e-06, "loss": 0.0407, "step": 11356 }, { "epoch": 1.9, "grad_norm": 0.4188506603240967, "learning_rate": 6.266363970509342e-06, "loss": 0.05, "step": 11357 }, { "epoch": 1.9, "grad_norm": 0.2655514180660248, "learning_rate": 6.264688891341821e-06, "loss": 0.0325, "step": 11358 }, { "epoch": 1.9, "grad_norm": 0.44271111488342285, "learning_rate": 6.263013933968851e-06, "loss": 0.0451, "step": 11359 }, { "epoch": 1.9, "grad_norm": 0.4082748591899872, "learning_rate": 6.261339098445054e-06, "loss": 0.0424, "step": 11360 }, { "epoch": 1.9, "grad_norm": 0.4518758952617645, "learning_rate": 6.25966438482503e-06, "loss": 0.0508, "step": 11361 }, { "epoch": 1.9, "grad_norm": 0.42116525769233704, "learning_rate": 6.257989793163392e-06, "loss": 0.0658, "step": 11362 }, { "epoch": 1.9, "grad_norm": 0.5071281790733337, "learning_rate": 6.256315323514746e-06, "loss": 0.0662, "step": 11363 }, { "epoch": 1.9, "grad_norm": 0.4048440754413605, "learning_rate": 6.254640975933679e-06, "loss": 0.0434, "step": 11364 }, { "epoch": 1.9, "grad_norm": 0.40427324175834656, "learning_rate": 6.252966750474794e-06, "loss": 0.0388, "step": 11365 }, { "epoch": 1.9, "grad_norm": 0.30315351486206055, "learning_rate": 6.2512926471926795e-06, "loss": 0.0353, "step": 11366 }, { "epoch": 1.9, "grad_norm": 0.3236589729785919, "learning_rate": 6.249618666141918e-06, "loss": 0.0559, "step": 11367 }, { "epoch": 1.9, "grad_norm": 0.3561532199382782, "learning_rate": 6.247944807377098e-06, "loss": 0.0511, "step": 11368 }, { "epoch": 1.9, "grad_norm": 0.445187509059906, "learning_rate": 6.246271070952793e-06, "loss": 0.0489, "step": 11369 }, { "epoch": 1.9, "grad_norm": 0.4204038083553314, "learning_rate": 6.2445974569235755e-06, "loss": 0.0609, "step": 11370 }, { "epoch": 1.9, "grad_norm": 0.3709227442741394, "learning_rate": 6.242923965344025e-06, "loss": 0.0506, "step": 11371 }, { "epoch": 1.9, "grad_norm": 0.5635256171226501, "learning_rate": 6.241250596268699e-06, "loss": 0.061, "step": 11372 }, { "epoch": 1.9, "grad_norm": 0.37245607376098633, "learning_rate": 6.239577349752161e-06, "loss": 0.0537, "step": 11373 }, { "epoch": 1.9, "grad_norm": 0.6697105169296265, "learning_rate": 6.237904225848974e-06, "loss": 0.0833, "step": 11374 }, { "epoch": 1.9, "grad_norm": 0.6387196779251099, "learning_rate": 6.236231224613686e-06, "loss": 0.0379, "step": 11375 }, { "epoch": 1.9, "grad_norm": 0.37634143233299255, "learning_rate": 6.234558346100854e-06, "loss": 0.0526, "step": 11376 }, { "epoch": 1.9, "grad_norm": 0.39564648270606995, "learning_rate": 6.232885590365018e-06, "loss": 0.0496, "step": 11377 }, { "epoch": 1.9, "grad_norm": 0.5427507758140564, "learning_rate": 6.231212957460724e-06, "loss": 0.0417, "step": 11378 }, { "epoch": 1.9, "grad_norm": 0.5835080742835999, "learning_rate": 6.229540447442512e-06, "loss": 0.0564, "step": 11379 }, { "epoch": 1.9, "grad_norm": 0.2802681624889374, "learning_rate": 6.227868060364914e-06, "loss": 0.0406, "step": 11380 }, { "epoch": 1.9, "grad_norm": 0.7372761368751526, "learning_rate": 6.226195796282457e-06, "loss": 0.0599, "step": 11381 }, { "epoch": 1.9, "grad_norm": 0.4897146224975586, "learning_rate": 6.2245236552496725e-06, "loss": 0.0432, "step": 11382 }, { "epoch": 1.9, "grad_norm": 0.7027702331542969, "learning_rate": 6.222851637321081e-06, "loss": 0.0568, "step": 11383 }, { "epoch": 1.9, "grad_norm": 0.3363119661808014, "learning_rate": 6.221179742551198e-06, "loss": 0.0376, "step": 11384 }, { "epoch": 1.9, "grad_norm": 0.2996387481689453, "learning_rate": 6.219507970994543e-06, "loss": 0.0466, "step": 11385 }, { "epoch": 1.9, "grad_norm": 0.4152733087539673, "learning_rate": 6.217836322705624e-06, "loss": 0.0448, "step": 11386 }, { "epoch": 1.9, "grad_norm": 0.4099527895450592, "learning_rate": 6.216164797738944e-06, "loss": 0.0515, "step": 11387 }, { "epoch": 1.9, "grad_norm": 0.3752261996269226, "learning_rate": 6.214493396149011e-06, "loss": 0.0423, "step": 11388 }, { "epoch": 1.9, "grad_norm": 0.38377976417541504, "learning_rate": 6.212822117990316e-06, "loss": 0.0366, "step": 11389 }, { "epoch": 1.91, "grad_norm": 0.39946120977401733, "learning_rate": 6.2111509633173596e-06, "loss": 0.055, "step": 11390 }, { "epoch": 1.91, "grad_norm": 0.38909807801246643, "learning_rate": 6.209479932184632e-06, "loss": 0.0471, "step": 11391 }, { "epoch": 1.91, "grad_norm": 0.6424933671951294, "learning_rate": 6.207809024646611e-06, "loss": 0.0574, "step": 11392 }, { "epoch": 1.91, "grad_norm": 0.5043240189552307, "learning_rate": 6.206138240757788e-06, "loss": 0.0574, "step": 11393 }, { "epoch": 1.91, "grad_norm": 0.33544042706489563, "learning_rate": 6.204467580572639e-06, "loss": 0.0294, "step": 11394 }, { "epoch": 1.91, "grad_norm": 0.3987874984741211, "learning_rate": 6.202797044145635e-06, "loss": 0.048, "step": 11395 }, { "epoch": 1.91, "grad_norm": 0.39849552512168884, "learning_rate": 6.201126631531249e-06, "loss": 0.0452, "step": 11396 }, { "epoch": 1.91, "grad_norm": 0.32226404547691345, "learning_rate": 6.1994563427839425e-06, "loss": 0.0358, "step": 11397 }, { "epoch": 1.91, "grad_norm": 0.3512280583381653, "learning_rate": 6.19778617795818e-06, "loss": 0.0312, "step": 11398 }, { "epoch": 1.91, "grad_norm": 0.36017465591430664, "learning_rate": 6.196116137108423e-06, "loss": 0.0472, "step": 11399 }, { "epoch": 1.91, "grad_norm": 0.5528204441070557, "learning_rate": 6.194446220289121e-06, "loss": 0.0453, "step": 11400 }, { "epoch": 1.91, "grad_norm": 0.39365851879119873, "learning_rate": 6.192776427554723e-06, "loss": 0.0432, "step": 11401 }, { "epoch": 1.91, "grad_norm": 0.37802058458328247, "learning_rate": 6.191106758959677e-06, "loss": 0.0443, "step": 11402 }, { "epoch": 1.91, "grad_norm": 0.34216922521591187, "learning_rate": 6.189437214558423e-06, "loss": 0.055, "step": 11403 }, { "epoch": 1.91, "grad_norm": 0.5759690999984741, "learning_rate": 6.187767794405404e-06, "loss": 0.069, "step": 11404 }, { "epoch": 1.91, "grad_norm": 0.27613404393196106, "learning_rate": 6.186098498555046e-06, "loss": 0.0306, "step": 11405 }, { "epoch": 1.91, "grad_norm": 0.35724228620529175, "learning_rate": 6.184429327061778e-06, "loss": 0.0485, "step": 11406 }, { "epoch": 1.91, "grad_norm": 0.3718552589416504, "learning_rate": 6.182760279980036e-06, "loss": 0.0355, "step": 11407 }, { "epoch": 1.91, "grad_norm": 0.47175365686416626, "learning_rate": 6.181091357364234e-06, "loss": 0.0411, "step": 11408 }, { "epoch": 1.91, "grad_norm": 0.33230534195899963, "learning_rate": 6.179422559268786e-06, "loss": 0.0394, "step": 11409 }, { "epoch": 1.91, "grad_norm": 0.3981129229068756, "learning_rate": 6.17775388574811e-06, "loss": 0.043, "step": 11410 }, { "epoch": 1.91, "grad_norm": 0.4586496353149414, "learning_rate": 6.176085336856617e-06, "loss": 0.0628, "step": 11411 }, { "epoch": 1.91, "grad_norm": 0.3853919208049774, "learning_rate": 6.174416912648706e-06, "loss": 0.0443, "step": 11412 }, { "epoch": 1.91, "grad_norm": 0.34002912044525146, "learning_rate": 6.172748613178783e-06, "loss": 0.0395, "step": 11413 }, { "epoch": 1.91, "grad_norm": 0.35169798135757446, "learning_rate": 6.171080438501247e-06, "loss": 0.0484, "step": 11414 }, { "epoch": 1.91, "grad_norm": 0.4750198423862457, "learning_rate": 6.169412388670482e-06, "loss": 0.0463, "step": 11415 }, { "epoch": 1.91, "grad_norm": 0.35229939222335815, "learning_rate": 6.167744463740888e-06, "loss": 0.0411, "step": 11416 }, { "epoch": 1.91, "grad_norm": 0.32765331864356995, "learning_rate": 6.166076663766839e-06, "loss": 0.0315, "step": 11417 }, { "epoch": 1.91, "grad_norm": 0.46069246530532837, "learning_rate": 6.164408988802724e-06, "loss": 0.0402, "step": 11418 }, { "epoch": 1.91, "grad_norm": 0.428226500749588, "learning_rate": 6.162741438902917e-06, "loss": 0.0558, "step": 11419 }, { "epoch": 1.91, "grad_norm": 0.4599003493785858, "learning_rate": 6.161074014121788e-06, "loss": 0.0429, "step": 11420 }, { "epoch": 1.91, "grad_norm": 0.27903327345848083, "learning_rate": 6.15940671451371e-06, "loss": 0.0305, "step": 11421 }, { "epoch": 1.91, "grad_norm": 0.3755987584590912, "learning_rate": 6.157739540133045e-06, "loss": 0.0481, "step": 11422 }, { "epoch": 1.91, "grad_norm": 0.2936878502368927, "learning_rate": 6.156072491034152e-06, "loss": 0.029, "step": 11423 }, { "epoch": 1.91, "grad_norm": 0.3578287661075592, "learning_rate": 6.154405567271391e-06, "loss": 0.0367, "step": 11424 }, { "epoch": 1.91, "grad_norm": 0.5022977590560913, "learning_rate": 6.1527387688991115e-06, "loss": 0.0574, "step": 11425 }, { "epoch": 1.91, "grad_norm": 0.38469815254211426, "learning_rate": 6.15107209597166e-06, "loss": 0.0531, "step": 11426 }, { "epoch": 1.91, "grad_norm": 0.459180623292923, "learning_rate": 6.1494055485433845e-06, "loss": 0.0392, "step": 11427 }, { "epoch": 1.91, "grad_norm": 0.46669262647628784, "learning_rate": 6.1477391266686235e-06, "loss": 0.0422, "step": 11428 }, { "epoch": 1.91, "grad_norm": 0.33862143754959106, "learning_rate": 6.146072830401709e-06, "loss": 0.0422, "step": 11429 }, { "epoch": 1.91, "grad_norm": 0.39487579464912415, "learning_rate": 6.144406659796977e-06, "loss": 0.0434, "step": 11430 }, { "epoch": 1.91, "grad_norm": 0.2542151212692261, "learning_rate": 6.1427406149087535e-06, "loss": 0.0296, "step": 11431 }, { "epoch": 1.91, "grad_norm": 0.4004251956939697, "learning_rate": 6.141074695791366e-06, "loss": 0.0445, "step": 11432 }, { "epoch": 1.91, "grad_norm": 0.9476401805877686, "learning_rate": 6.139408902499128e-06, "loss": 0.0605, "step": 11433 }, { "epoch": 1.91, "grad_norm": 0.4428013265132904, "learning_rate": 6.137743235086355e-06, "loss": 0.0467, "step": 11434 }, { "epoch": 1.91, "grad_norm": 0.4163190424442291, "learning_rate": 6.136077693607365e-06, "loss": 0.0469, "step": 11435 }, { "epoch": 1.91, "grad_norm": 0.5204823017120361, "learning_rate": 6.134412278116459e-06, "loss": 0.0613, "step": 11436 }, { "epoch": 1.91, "grad_norm": 0.3852832317352295, "learning_rate": 6.132746988667939e-06, "loss": 0.0545, "step": 11437 }, { "epoch": 1.91, "grad_norm": 0.5981705188751221, "learning_rate": 6.1310818253161085e-06, "loss": 0.0545, "step": 11438 }, { "epoch": 1.91, "grad_norm": 0.49961555004119873, "learning_rate": 6.129416788115261e-06, "loss": 0.0353, "step": 11439 }, { "epoch": 1.91, "grad_norm": 0.38714590668678284, "learning_rate": 6.127751877119683e-06, "loss": 0.0537, "step": 11440 }, { "epoch": 1.91, "grad_norm": 0.4062727093696594, "learning_rate": 6.126087092383666e-06, "loss": 0.0724, "step": 11441 }, { "epoch": 1.91, "grad_norm": 0.3629657030105591, "learning_rate": 6.124422433961493e-06, "loss": 0.0363, "step": 11442 }, { "epoch": 1.91, "grad_norm": 0.47947606444358826, "learning_rate": 6.122757901907437e-06, "loss": 0.055, "step": 11443 }, { "epoch": 1.91, "grad_norm": 1.0529468059539795, "learning_rate": 6.121093496275778e-06, "loss": 0.0471, "step": 11444 }, { "epoch": 1.91, "grad_norm": 0.4982805550098419, "learning_rate": 6.119429217120778e-06, "loss": 0.0466, "step": 11445 }, { "epoch": 1.91, "grad_norm": 0.5275492072105408, "learning_rate": 6.117765064496712e-06, "loss": 0.0426, "step": 11446 }, { "epoch": 1.91, "grad_norm": 0.44520723819732666, "learning_rate": 6.11610103845784e-06, "loss": 0.0429, "step": 11447 }, { "epoch": 1.91, "grad_norm": 0.45958149433135986, "learning_rate": 6.114437139058413e-06, "loss": 0.049, "step": 11448 }, { "epoch": 1.91, "grad_norm": 0.5136246681213379, "learning_rate": 6.112773366352691e-06, "loss": 0.0432, "step": 11449 }, { "epoch": 1.92, "grad_norm": 0.43158790469169617, "learning_rate": 6.1111097203949235e-06, "loss": 0.0342, "step": 11450 }, { "epoch": 1.92, "grad_norm": 0.4999171197414398, "learning_rate": 6.109446201239351e-06, "loss": 0.0391, "step": 11451 }, { "epoch": 1.92, "grad_norm": 0.350913941860199, "learning_rate": 6.107782808940221e-06, "loss": 0.0348, "step": 11452 }, { "epoch": 1.92, "grad_norm": 0.44328540563583374, "learning_rate": 6.106119543551765e-06, "loss": 0.0546, "step": 11453 }, { "epoch": 1.92, "grad_norm": 0.5398753881454468, "learning_rate": 6.104456405128215e-06, "loss": 0.0413, "step": 11454 }, { "epoch": 1.92, "grad_norm": 0.4259454011917114, "learning_rate": 6.102793393723809e-06, "loss": 0.0579, "step": 11455 }, { "epoch": 1.92, "grad_norm": 0.5747295022010803, "learning_rate": 6.101130509392763e-06, "loss": 0.0407, "step": 11456 }, { "epoch": 1.92, "grad_norm": 0.4457160532474518, "learning_rate": 6.099467752189296e-06, "loss": 0.0793, "step": 11457 }, { "epoch": 1.92, "grad_norm": 0.3049772381782532, "learning_rate": 6.09780512216763e-06, "loss": 0.0301, "step": 11458 }, { "epoch": 1.92, "grad_norm": 0.5321768522262573, "learning_rate": 6.096142619381975e-06, "loss": 0.0485, "step": 11459 }, { "epoch": 1.92, "grad_norm": 0.4440154731273651, "learning_rate": 6.094480243886542e-06, "loss": 0.0535, "step": 11460 }, { "epoch": 1.92, "grad_norm": 0.28670161962509155, "learning_rate": 6.09281799573553e-06, "loss": 0.038, "step": 11461 }, { "epoch": 1.92, "grad_norm": 0.39534229040145874, "learning_rate": 6.091155874983139e-06, "loss": 0.0392, "step": 11462 }, { "epoch": 1.92, "grad_norm": 0.4378248155117035, "learning_rate": 6.08949388168357e-06, "loss": 0.0449, "step": 11463 }, { "epoch": 1.92, "grad_norm": 0.39076802134513855, "learning_rate": 6.087832015891012e-06, "loss": 0.0624, "step": 11464 }, { "epoch": 1.92, "grad_norm": 0.38336893916130066, "learning_rate": 6.086170277659646e-06, "loss": 0.0529, "step": 11465 }, { "epoch": 1.92, "grad_norm": 0.4684666395187378, "learning_rate": 6.084508667043663e-06, "loss": 0.0586, "step": 11466 }, { "epoch": 1.92, "grad_norm": 0.238493412733078, "learning_rate": 6.082847184097239e-06, "loss": 0.0222, "step": 11467 }, { "epoch": 1.92, "grad_norm": 0.35106807947158813, "learning_rate": 6.0811858288745464e-06, "loss": 0.0609, "step": 11468 }, { "epoch": 1.92, "grad_norm": 0.24127978086471558, "learning_rate": 6.07952460142976e-06, "loss": 0.0239, "step": 11469 }, { "epoch": 1.92, "grad_norm": 0.4049666225910187, "learning_rate": 6.077863501817045e-06, "loss": 0.0338, "step": 11470 }, { "epoch": 1.92, "grad_norm": 0.4229965806007385, "learning_rate": 6.0762025300905605e-06, "loss": 0.0614, "step": 11471 }, { "epoch": 1.92, "grad_norm": 0.4113517105579376, "learning_rate": 6.07454168630447e-06, "loss": 0.0519, "step": 11472 }, { "epoch": 1.92, "grad_norm": 0.37034958600997925, "learning_rate": 6.072880970512922e-06, "loss": 0.029, "step": 11473 }, { "epoch": 1.92, "grad_norm": 0.2496698796749115, "learning_rate": 6.071220382770069e-06, "loss": 0.0232, "step": 11474 }, { "epoch": 1.92, "grad_norm": 0.32402488589286804, "learning_rate": 6.069559923130059e-06, "loss": 0.0308, "step": 11475 }, { "epoch": 1.92, "grad_norm": 0.4235630929470062, "learning_rate": 6.067899591647027e-06, "loss": 0.0445, "step": 11476 }, { "epoch": 1.92, "grad_norm": 0.4281734824180603, "learning_rate": 6.066239388375117e-06, "loss": 0.0531, "step": 11477 }, { "epoch": 1.92, "grad_norm": 0.6596173644065857, "learning_rate": 6.0645793133684595e-06, "loss": 0.0538, "step": 11478 }, { "epoch": 1.92, "grad_norm": 0.26606643199920654, "learning_rate": 6.062919366681179e-06, "loss": 0.0376, "step": 11479 }, { "epoch": 1.92, "grad_norm": 0.4147627055644989, "learning_rate": 6.061259548367408e-06, "loss": 0.0366, "step": 11480 }, { "epoch": 1.92, "grad_norm": 0.510979175567627, "learning_rate": 6.059599858481261e-06, "loss": 0.0579, "step": 11481 }, { "epoch": 1.92, "grad_norm": 0.46476224064826965, "learning_rate": 6.057940297076854e-06, "loss": 0.046, "step": 11482 }, { "epoch": 1.92, "grad_norm": 0.28567472100257874, "learning_rate": 6.056280864208306e-06, "loss": 0.0377, "step": 11483 }, { "epoch": 1.92, "grad_norm": 0.3605048954486847, "learning_rate": 6.054621559929721e-06, "loss": 0.0437, "step": 11484 }, { "epoch": 1.92, "grad_norm": 0.35587164759635925, "learning_rate": 6.052962384295197e-06, "loss": 0.0352, "step": 11485 }, { "epoch": 1.92, "grad_norm": 0.3529518246650696, "learning_rate": 6.05130333735884e-06, "loss": 0.0613, "step": 11486 }, { "epoch": 1.92, "grad_norm": 0.532304048538208, "learning_rate": 6.049644419174743e-06, "loss": 0.0491, "step": 11487 }, { "epoch": 1.92, "grad_norm": 0.33676159381866455, "learning_rate": 6.047985629797002e-06, "loss": 0.0538, "step": 11488 }, { "epoch": 1.92, "grad_norm": 0.27182185649871826, "learning_rate": 6.046326969279697e-06, "loss": 0.0311, "step": 11489 }, { "epoch": 1.92, "grad_norm": 0.6020445227622986, "learning_rate": 6.044668437676912e-06, "loss": 0.0604, "step": 11490 }, { "epoch": 1.92, "grad_norm": 0.45949310064315796, "learning_rate": 6.043010035042732e-06, "loss": 0.057, "step": 11491 }, { "epoch": 1.92, "grad_norm": 0.4302138090133667, "learning_rate": 6.041351761431225e-06, "loss": 0.0496, "step": 11492 }, { "epoch": 1.92, "grad_norm": 0.573747992515564, "learning_rate": 6.039693616896458e-06, "loss": 0.0666, "step": 11493 }, { "epoch": 1.92, "grad_norm": 0.38402754068374634, "learning_rate": 6.038035601492504e-06, "loss": 0.0392, "step": 11494 }, { "epoch": 1.92, "grad_norm": 0.41062483191490173, "learning_rate": 6.036377715273427e-06, "loss": 0.0418, "step": 11495 }, { "epoch": 1.92, "grad_norm": 0.4721364676952362, "learning_rate": 6.034719958293274e-06, "loss": 0.0509, "step": 11496 }, { "epoch": 1.92, "grad_norm": 0.43469128012657166, "learning_rate": 6.033062330606106e-06, "loss": 0.0629, "step": 11497 }, { "epoch": 1.92, "grad_norm": 0.2988303303718567, "learning_rate": 6.03140483226597e-06, "loss": 0.0327, "step": 11498 }, { "epoch": 1.92, "grad_norm": 0.5513801574707031, "learning_rate": 6.029747463326909e-06, "loss": 0.0557, "step": 11499 }, { "epoch": 1.92, "grad_norm": 0.44110211730003357, "learning_rate": 6.028090223842969e-06, "loss": 0.0585, "step": 11500 }, { "epoch": 1.92, "grad_norm": 0.35752949118614197, "learning_rate": 6.026433113868179e-06, "loss": 0.0421, "step": 11501 }, { "epoch": 1.92, "grad_norm": 0.3964191675186157, "learning_rate": 6.024776133456575e-06, "loss": 0.0672, "step": 11502 }, { "epoch": 1.92, "grad_norm": 0.46281516551971436, "learning_rate": 6.023119282662191e-06, "loss": 0.0599, "step": 11503 }, { "epoch": 1.92, "grad_norm": 0.6688307523727417, "learning_rate": 6.021462561539037e-06, "loss": 0.0472, "step": 11504 }, { "epoch": 1.92, "grad_norm": 0.583602786064148, "learning_rate": 6.019805970141146e-06, "loss": 0.064, "step": 11505 }, { "epoch": 1.92, "grad_norm": 0.3924373686313629, "learning_rate": 6.018149508522526e-06, "loss": 0.0418, "step": 11506 }, { "epoch": 1.92, "grad_norm": 0.34481608867645264, "learning_rate": 6.016493176737186e-06, "loss": 0.0387, "step": 11507 }, { "epoch": 1.92, "grad_norm": 0.2954784333705902, "learning_rate": 6.014836974839141e-06, "loss": 0.034, "step": 11508 }, { "epoch": 1.92, "grad_norm": 0.4948148727416992, "learning_rate": 6.013180902882387e-06, "loss": 0.0592, "step": 11509 }, { "epoch": 1.93, "grad_norm": 0.27044591307640076, "learning_rate": 6.011524960920921e-06, "loss": 0.0264, "step": 11510 }, { "epoch": 1.93, "grad_norm": 0.4431900978088379, "learning_rate": 6.009869149008746e-06, "loss": 0.0599, "step": 11511 }, { "epoch": 1.93, "grad_norm": 0.25856277346611023, "learning_rate": 6.008213467199845e-06, "loss": 0.039, "step": 11512 }, { "epoch": 1.93, "grad_norm": 0.37847238779067993, "learning_rate": 6.0065579155482016e-06, "loss": 0.0353, "step": 11513 }, { "epoch": 1.93, "grad_norm": 0.35746127367019653, "learning_rate": 6.004902494107801e-06, "loss": 0.0526, "step": 11514 }, { "epoch": 1.93, "grad_norm": 0.40063512325286865, "learning_rate": 6.003247202932618e-06, "loss": 0.0326, "step": 11515 }, { "epoch": 1.93, "grad_norm": 0.5642064809799194, "learning_rate": 6.001592042076632e-06, "loss": 0.0597, "step": 11516 }, { "epoch": 1.93, "grad_norm": 0.4353989064693451, "learning_rate": 5.999937011593803e-06, "loss": 0.0416, "step": 11517 }, { "epoch": 1.93, "grad_norm": 0.2972186803817749, "learning_rate": 5.998282111538097e-06, "loss": 0.0408, "step": 11518 }, { "epoch": 1.93, "grad_norm": 0.3494437038898468, "learning_rate": 5.996627341963482e-06, "loss": 0.033, "step": 11519 }, { "epoch": 1.93, "grad_norm": 0.453529417514801, "learning_rate": 5.994972702923907e-06, "loss": 0.0477, "step": 11520 }, { "epoch": 1.93, "grad_norm": 0.5363940000534058, "learning_rate": 5.993318194473321e-06, "loss": 0.058, "step": 11521 }, { "epoch": 1.93, "grad_norm": 0.3658984899520874, "learning_rate": 5.991663816665676e-06, "loss": 0.0403, "step": 11522 }, { "epoch": 1.93, "grad_norm": 0.4189228117465973, "learning_rate": 5.990009569554916e-06, "loss": 0.0537, "step": 11523 }, { "epoch": 1.93, "grad_norm": 0.5029861330986023, "learning_rate": 5.988355453194975e-06, "loss": 0.0463, "step": 11524 }, { "epoch": 1.93, "grad_norm": 0.5079760551452637, "learning_rate": 5.986701467639792e-06, "loss": 0.0638, "step": 11525 }, { "epoch": 1.93, "grad_norm": 0.4202505946159363, "learning_rate": 5.985047612943298e-06, "loss": 0.05, "step": 11526 }, { "epoch": 1.93, "grad_norm": 0.3328855633735657, "learning_rate": 5.983393889159412e-06, "loss": 0.0374, "step": 11527 }, { "epoch": 1.93, "grad_norm": 0.32084938883781433, "learning_rate": 5.981740296342064e-06, "loss": 0.0297, "step": 11528 }, { "epoch": 1.93, "grad_norm": 0.41720813512802124, "learning_rate": 5.980086834545166e-06, "loss": 0.0509, "step": 11529 }, { "epoch": 1.93, "grad_norm": 0.4085710048675537, "learning_rate": 5.978433503822634e-06, "loss": 0.0647, "step": 11530 }, { "epoch": 1.93, "grad_norm": 0.39224132895469666, "learning_rate": 5.976780304228378e-06, "loss": 0.0616, "step": 11531 }, { "epoch": 1.93, "grad_norm": 0.3748408555984497, "learning_rate": 5.975127235816297e-06, "loss": 0.0538, "step": 11532 }, { "epoch": 1.93, "grad_norm": 0.7083471417427063, "learning_rate": 5.9734742986402995e-06, "loss": 0.0608, "step": 11533 }, { "epoch": 1.93, "grad_norm": 0.5746471881866455, "learning_rate": 5.971821492754275e-06, "loss": 0.0484, "step": 11534 }, { "epoch": 1.93, "grad_norm": 0.3705388605594635, "learning_rate": 5.970168818212117e-06, "loss": 0.0488, "step": 11535 }, { "epoch": 1.93, "grad_norm": 0.4671468138694763, "learning_rate": 5.968516275067715e-06, "loss": 0.0561, "step": 11536 }, { "epoch": 1.93, "grad_norm": 0.5674586296081543, "learning_rate": 5.966863863374951e-06, "loss": 0.0509, "step": 11537 }, { "epoch": 1.93, "grad_norm": 0.3689892888069153, "learning_rate": 5.965211583187702e-06, "loss": 0.0475, "step": 11538 }, { "epoch": 1.93, "grad_norm": 0.43221449851989746, "learning_rate": 5.963559434559847e-06, "loss": 0.0568, "step": 11539 }, { "epoch": 1.93, "grad_norm": 0.25024306774139404, "learning_rate": 5.961907417545256e-06, "loss": 0.0318, "step": 11540 }, { "epoch": 1.93, "grad_norm": 0.3812447190284729, "learning_rate": 5.9602555321977885e-06, "loss": 0.0493, "step": 11541 }, { "epoch": 1.93, "grad_norm": 0.31159502267837524, "learning_rate": 5.958603778571313e-06, "loss": 0.0446, "step": 11542 }, { "epoch": 1.93, "grad_norm": 0.46474453806877136, "learning_rate": 5.956952156719682e-06, "loss": 0.0436, "step": 11543 }, { "epoch": 1.93, "grad_norm": 0.5726184248924255, "learning_rate": 5.9553006666967575e-06, "loss": 0.055, "step": 11544 }, { "epoch": 1.93, "grad_norm": 0.38669583201408386, "learning_rate": 5.953649308556379e-06, "loss": 0.0373, "step": 11545 }, { "epoch": 1.93, "grad_norm": 0.6310974955558777, "learning_rate": 5.951998082352392e-06, "loss": 0.0413, "step": 11546 }, { "epoch": 1.93, "grad_norm": 0.4770919382572174, "learning_rate": 5.950346988138647e-06, "loss": 0.0564, "step": 11547 }, { "epoch": 1.93, "grad_norm": 0.3950529396533966, "learning_rate": 5.94869602596897e-06, "loss": 0.0448, "step": 11548 }, { "epoch": 1.93, "grad_norm": 0.3594442307949066, "learning_rate": 5.947045195897192e-06, "loss": 0.051, "step": 11549 }, { "epoch": 1.93, "grad_norm": 0.5839982032775879, "learning_rate": 5.945394497977146e-06, "loss": 0.0668, "step": 11550 }, { "epoch": 1.93, "grad_norm": 0.34303969144821167, "learning_rate": 5.943743932262655e-06, "loss": 0.048, "step": 11551 }, { "epoch": 1.93, "grad_norm": 0.35346296429634094, "learning_rate": 5.942093498807531e-06, "loss": 0.0569, "step": 11552 }, { "epoch": 1.93, "grad_norm": 0.3846338391304016, "learning_rate": 5.940443197665595e-06, "loss": 0.0421, "step": 11553 }, { "epoch": 1.93, "grad_norm": 0.3830457031726837, "learning_rate": 5.938793028890657e-06, "loss": 0.0356, "step": 11554 }, { "epoch": 1.93, "grad_norm": 0.3570122718811035, "learning_rate": 5.937142992536518e-06, "loss": 0.0468, "step": 11555 }, { "epoch": 1.93, "grad_norm": 0.41530969738960266, "learning_rate": 5.935493088656986e-06, "loss": 0.048, "step": 11556 }, { "epoch": 1.93, "grad_norm": 0.33879736065864563, "learning_rate": 5.933843317305852e-06, "loss": 0.0402, "step": 11557 }, { "epoch": 1.93, "grad_norm": 0.3945908546447754, "learning_rate": 5.932193678536913e-06, "loss": 0.0376, "step": 11558 }, { "epoch": 1.93, "grad_norm": 0.4470430910587311, "learning_rate": 5.9305441724039584e-06, "loss": 0.0457, "step": 11559 }, { "epoch": 1.93, "grad_norm": 0.3332245349884033, "learning_rate": 5.928894798960765e-06, "loss": 0.0382, "step": 11560 }, { "epoch": 1.93, "grad_norm": 0.31032413244247437, "learning_rate": 5.927245558261123e-06, "loss": 0.0426, "step": 11561 }, { "epoch": 1.93, "grad_norm": 0.40687525272369385, "learning_rate": 5.9255964503588016e-06, "loss": 0.0486, "step": 11562 }, { "epoch": 1.93, "grad_norm": 0.4049188196659088, "learning_rate": 5.92394747530757e-06, "loss": 0.0418, "step": 11563 }, { "epoch": 1.93, "grad_norm": 0.4562644064426422, "learning_rate": 5.922298633161204e-06, "loss": 0.0531, "step": 11564 }, { "epoch": 1.93, "grad_norm": 0.3857635259628296, "learning_rate": 5.920649923973458e-06, "loss": 0.0375, "step": 11565 }, { "epoch": 1.93, "grad_norm": 0.31653130054473877, "learning_rate": 5.9190013477980926e-06, "loss": 0.044, "step": 11566 }, { "epoch": 1.93, "grad_norm": 0.35567688941955566, "learning_rate": 5.9173529046888645e-06, "loss": 0.0413, "step": 11567 }, { "epoch": 1.93, "grad_norm": 0.44749271869659424, "learning_rate": 5.915704594699521e-06, "loss": 0.0493, "step": 11568 }, { "epoch": 1.93, "grad_norm": 0.4933024048805237, "learning_rate": 5.914056417883805e-06, "loss": 0.0475, "step": 11569 }, { "epoch": 1.94, "grad_norm": 0.450569748878479, "learning_rate": 5.91240837429546e-06, "loss": 0.0489, "step": 11570 }, { "epoch": 1.94, "grad_norm": 0.4628632068634033, "learning_rate": 5.91076046398822e-06, "loss": 0.0512, "step": 11571 }, { "epoch": 1.94, "grad_norm": 0.3020384907722473, "learning_rate": 5.909112687015823e-06, "loss": 0.0376, "step": 11572 }, { "epoch": 1.94, "grad_norm": 0.29158639907836914, "learning_rate": 5.907465043431993e-06, "loss": 0.0432, "step": 11573 }, { "epoch": 1.94, "grad_norm": 0.30920034646987915, "learning_rate": 5.905817533290451e-06, "loss": 0.0425, "step": 11574 }, { "epoch": 1.94, "grad_norm": 0.45031654834747314, "learning_rate": 5.904170156644924e-06, "loss": 0.0538, "step": 11575 }, { "epoch": 1.94, "grad_norm": 0.6118711829185486, "learning_rate": 5.90252291354912e-06, "loss": 0.076, "step": 11576 }, { "epoch": 1.94, "grad_norm": 0.4796842038631439, "learning_rate": 5.900875804056747e-06, "loss": 0.0506, "step": 11577 }, { "epoch": 1.94, "grad_norm": 0.40464726090431213, "learning_rate": 5.89922882822152e-06, "loss": 0.0346, "step": 11578 }, { "epoch": 1.94, "grad_norm": 0.5615671277046204, "learning_rate": 5.897581986097135e-06, "loss": 0.0698, "step": 11579 }, { "epoch": 1.94, "grad_norm": 0.4140171408653259, "learning_rate": 5.895935277737288e-06, "loss": 0.0382, "step": 11580 }, { "epoch": 1.94, "grad_norm": 0.4146965742111206, "learning_rate": 5.894288703195676e-06, "loss": 0.0634, "step": 11581 }, { "epoch": 1.94, "grad_norm": 0.337727427482605, "learning_rate": 5.892642262525988e-06, "loss": 0.0451, "step": 11582 }, { "epoch": 1.94, "grad_norm": 0.2699609100818634, "learning_rate": 5.890995955781902e-06, "loss": 0.0293, "step": 11583 }, { "epoch": 1.94, "grad_norm": 0.32964450120925903, "learning_rate": 5.889349783017108e-06, "loss": 0.0452, "step": 11584 }, { "epoch": 1.94, "grad_norm": 0.3939700126647949, "learning_rate": 5.88770374428527e-06, "loss": 0.0611, "step": 11585 }, { "epoch": 1.94, "grad_norm": 0.4255341589450836, "learning_rate": 5.886057839640067e-06, "loss": 0.0624, "step": 11586 }, { "epoch": 1.94, "grad_norm": 0.29671555757522583, "learning_rate": 5.8844120691351665e-06, "loss": 0.0376, "step": 11587 }, { "epoch": 1.94, "grad_norm": 0.2813664972782135, "learning_rate": 5.882766432824225e-06, "loss": 0.0374, "step": 11588 }, { "epoch": 1.94, "grad_norm": 0.3854723572731018, "learning_rate": 5.8811209307609064e-06, "loss": 0.042, "step": 11589 }, { "epoch": 1.94, "grad_norm": 0.3503306806087494, "learning_rate": 5.87947556299886e-06, "loss": 0.0302, "step": 11590 }, { "epoch": 1.94, "grad_norm": 0.4955701529979706, "learning_rate": 5.8778303295917365e-06, "loss": 0.0569, "step": 11591 }, { "epoch": 1.94, "grad_norm": 0.485324889421463, "learning_rate": 5.876185230593183e-06, "loss": 0.0512, "step": 11592 }, { "epoch": 1.94, "grad_norm": 0.360526978969574, "learning_rate": 5.874540266056837e-06, "loss": 0.0501, "step": 11593 }, { "epoch": 1.94, "grad_norm": 0.38126200437545776, "learning_rate": 5.872895436036334e-06, "loss": 0.0494, "step": 11594 }, { "epoch": 1.94, "grad_norm": 0.2691904306411743, "learning_rate": 5.871250740585313e-06, "loss": 0.031, "step": 11595 }, { "epoch": 1.94, "grad_norm": 0.4382089674472809, "learning_rate": 5.869606179757394e-06, "loss": 0.0403, "step": 11596 }, { "epoch": 1.94, "grad_norm": 0.394872784614563, "learning_rate": 5.867961753606199e-06, "loss": 0.067, "step": 11597 }, { "epoch": 1.94, "grad_norm": 0.35709622502326965, "learning_rate": 5.866317462185351e-06, "loss": 0.0572, "step": 11598 }, { "epoch": 1.94, "grad_norm": 0.35124146938323975, "learning_rate": 5.864673305548461e-06, "loss": 0.047, "step": 11599 }, { "epoch": 1.94, "grad_norm": 0.31845179200172424, "learning_rate": 5.863029283749146e-06, "loss": 0.0544, "step": 11600 }, { "epoch": 1.94, "grad_norm": 0.3649311065673828, "learning_rate": 5.8613853968410015e-06, "loss": 0.0488, "step": 11601 }, { "epoch": 1.94, "grad_norm": 0.3075445592403412, "learning_rate": 5.8597416448776336e-06, "loss": 0.0416, "step": 11602 }, { "epoch": 1.94, "grad_norm": 0.4945560693740845, "learning_rate": 5.858098027912641e-06, "loss": 0.0528, "step": 11603 }, { "epoch": 1.94, "grad_norm": 0.42756009101867676, "learning_rate": 5.8564545459996136e-06, "loss": 0.0534, "step": 11604 }, { "epoch": 1.94, "grad_norm": 0.37365275621414185, "learning_rate": 5.854811199192135e-06, "loss": 0.0456, "step": 11605 }, { "epoch": 1.94, "grad_norm": 0.39958810806274414, "learning_rate": 5.853167987543795e-06, "loss": 0.0607, "step": 11606 }, { "epoch": 1.94, "grad_norm": 0.3262251615524292, "learning_rate": 5.85152491110817e-06, "loss": 0.0298, "step": 11607 }, { "epoch": 1.94, "grad_norm": 0.364655077457428, "learning_rate": 5.849881969938833e-06, "loss": 0.0559, "step": 11608 }, { "epoch": 1.94, "grad_norm": 0.4691629111766815, "learning_rate": 5.848239164089357e-06, "loss": 0.0634, "step": 11609 }, { "epoch": 1.94, "grad_norm": 0.37780481576919556, "learning_rate": 5.84659649361331e-06, "loss": 0.0589, "step": 11610 }, { "epoch": 1.94, "grad_norm": 0.3380526006221771, "learning_rate": 5.844953958564246e-06, "loss": 0.0546, "step": 11611 }, { "epoch": 1.94, "grad_norm": 0.4647276699542999, "learning_rate": 5.843311558995728e-06, "loss": 0.0489, "step": 11612 }, { "epoch": 1.94, "grad_norm": 0.4243430495262146, "learning_rate": 5.841669294961303e-06, "loss": 0.0492, "step": 11613 }, { "epoch": 1.94, "grad_norm": 0.45195019245147705, "learning_rate": 5.8400271665145256e-06, "loss": 0.0499, "step": 11614 }, { "epoch": 1.94, "grad_norm": 0.31871429085731506, "learning_rate": 5.838385173708936e-06, "loss": 0.0499, "step": 11615 }, { "epoch": 1.94, "grad_norm": 0.45663511753082275, "learning_rate": 5.836743316598072e-06, "loss": 0.0428, "step": 11616 }, { "epoch": 1.94, "grad_norm": 0.4086327850818634, "learning_rate": 5.835101595235474e-06, "loss": 0.0396, "step": 11617 }, { "epoch": 1.94, "grad_norm": 0.41871631145477295, "learning_rate": 5.833460009674669e-06, "loss": 0.0441, "step": 11618 }, { "epoch": 1.94, "grad_norm": 0.30357131361961365, "learning_rate": 5.8318185599691775e-06, "loss": 0.0269, "step": 11619 }, { "epoch": 1.94, "grad_norm": 0.4159089922904968, "learning_rate": 5.8301772461725325e-06, "loss": 0.0389, "step": 11620 }, { "epoch": 1.94, "grad_norm": 0.379118412733078, "learning_rate": 5.828536068338244e-06, "loss": 0.0485, "step": 11621 }, { "epoch": 1.94, "grad_norm": 0.3305143415927887, "learning_rate": 5.826895026519821e-06, "loss": 0.0402, "step": 11622 }, { "epoch": 1.94, "grad_norm": 0.42875057458877563, "learning_rate": 5.82525412077078e-06, "loss": 0.0582, "step": 11623 }, { "epoch": 1.94, "grad_norm": 0.45542240142822266, "learning_rate": 5.823613351144619e-06, "loss": 0.0382, "step": 11624 }, { "epoch": 1.94, "grad_norm": 0.3848361670970917, "learning_rate": 5.8219727176948384e-06, "loss": 0.0565, "step": 11625 }, { "epoch": 1.94, "grad_norm": 0.42689046263694763, "learning_rate": 5.820332220474938e-06, "loss": 0.0476, "step": 11626 }, { "epoch": 1.94, "grad_norm": 0.45398885011672974, "learning_rate": 5.818691859538401e-06, "loss": 0.0494, "step": 11627 }, { "epoch": 1.94, "grad_norm": 0.6455947160720825, "learning_rate": 5.81705163493872e-06, "loss": 0.0448, "step": 11628 }, { "epoch": 1.94, "grad_norm": 0.4182993769645691, "learning_rate": 5.8154115467293746e-06, "loss": 0.0573, "step": 11629 }, { "epoch": 1.95, "grad_norm": 0.3444395661354065, "learning_rate": 5.813771594963837e-06, "loss": 0.0316, "step": 11630 }, { "epoch": 1.95, "grad_norm": 0.373579204082489, "learning_rate": 5.812131779695588e-06, "loss": 0.0544, "step": 11631 }, { "epoch": 1.95, "grad_norm": 0.3686559498310089, "learning_rate": 5.810492100978087e-06, "loss": 0.0417, "step": 11632 }, { "epoch": 1.95, "grad_norm": 0.30891507863998413, "learning_rate": 5.808852558864803e-06, "loss": 0.0398, "step": 11633 }, { "epoch": 1.95, "grad_norm": 0.49137020111083984, "learning_rate": 5.807213153409199e-06, "loss": 0.0434, "step": 11634 }, { "epoch": 1.95, "grad_norm": 0.332379013299942, "learning_rate": 5.805573884664726e-06, "loss": 0.0428, "step": 11635 }, { "epoch": 1.95, "grad_norm": 0.4104706645011902, "learning_rate": 5.80393475268483e-06, "loss": 0.0542, "step": 11636 }, { "epoch": 1.95, "grad_norm": 0.3286203444004059, "learning_rate": 5.802295757522964e-06, "loss": 0.0407, "step": 11637 }, { "epoch": 1.95, "grad_norm": 0.31824791431427, "learning_rate": 5.800656899232569e-06, "loss": 0.0294, "step": 11638 }, { "epoch": 1.95, "grad_norm": 0.363491952419281, "learning_rate": 5.799018177867073e-06, "loss": 0.06, "step": 11639 }, { "epoch": 1.95, "grad_norm": 0.5168530941009521, "learning_rate": 5.797379593479917e-06, "loss": 0.0424, "step": 11640 }, { "epoch": 1.95, "grad_norm": 0.37446144223213196, "learning_rate": 5.795741146124528e-06, "loss": 0.0385, "step": 11641 }, { "epoch": 1.95, "grad_norm": 0.5510578155517578, "learning_rate": 5.794102835854331e-06, "loss": 0.0474, "step": 11642 }, { "epoch": 1.95, "grad_norm": 0.5027610659599304, "learning_rate": 5.792464662722744e-06, "loss": 0.0475, "step": 11643 }, { "epoch": 1.95, "grad_norm": 0.440243661403656, "learning_rate": 5.790826626783178e-06, "loss": 0.0435, "step": 11644 }, { "epoch": 1.95, "grad_norm": 0.2904650568962097, "learning_rate": 5.7891887280890504e-06, "loss": 0.0512, "step": 11645 }, { "epoch": 1.95, "grad_norm": 0.3408734202384949, "learning_rate": 5.787550966693761e-06, "loss": 0.0464, "step": 11646 }, { "epoch": 1.95, "grad_norm": 0.49460819363594055, "learning_rate": 5.7859133426507105e-06, "loss": 0.0768, "step": 11647 }, { "epoch": 1.95, "grad_norm": 0.5107102394104004, "learning_rate": 5.784275856013297e-06, "loss": 0.0426, "step": 11648 }, { "epoch": 1.95, "grad_norm": 0.3053567409515381, "learning_rate": 5.782638506834919e-06, "loss": 0.0284, "step": 11649 }, { "epoch": 1.95, "grad_norm": 0.482442706823349, "learning_rate": 5.7810012951689555e-06, "loss": 0.0582, "step": 11650 }, { "epoch": 1.95, "grad_norm": 0.40921279788017273, "learning_rate": 5.779364221068796e-06, "loss": 0.0486, "step": 11651 }, { "epoch": 1.95, "grad_norm": 0.5364067554473877, "learning_rate": 5.777727284587817e-06, "loss": 0.0407, "step": 11652 }, { "epoch": 1.95, "grad_norm": 0.34840285778045654, "learning_rate": 5.776090485779391e-06, "loss": 0.0417, "step": 11653 }, { "epoch": 1.95, "grad_norm": 0.39237672090530396, "learning_rate": 5.774453824696893e-06, "loss": 0.0477, "step": 11654 }, { "epoch": 1.95, "grad_norm": 0.41336265206336975, "learning_rate": 5.772817301393681e-06, "loss": 0.0393, "step": 11655 }, { "epoch": 1.95, "grad_norm": 0.3233669102191925, "learning_rate": 5.771180915923119e-06, "loss": 0.0364, "step": 11656 }, { "epoch": 1.95, "grad_norm": 0.3790445625782013, "learning_rate": 5.7695446683385695e-06, "loss": 0.0469, "step": 11657 }, { "epoch": 1.95, "grad_norm": 0.3170677721500397, "learning_rate": 5.7679085586933754e-06, "loss": 0.0355, "step": 11658 }, { "epoch": 1.95, "grad_norm": 0.36772873997688293, "learning_rate": 5.766272587040893e-06, "loss": 0.0366, "step": 11659 }, { "epoch": 1.95, "grad_norm": 0.40764689445495605, "learning_rate": 5.76463675343446e-06, "loss": 0.0481, "step": 11660 }, { "epoch": 1.95, "grad_norm": 0.37048596143722534, "learning_rate": 5.763001057927411e-06, "loss": 0.0436, "step": 11661 }, { "epoch": 1.95, "grad_norm": 0.687940239906311, "learning_rate": 5.7613655005730875e-06, "loss": 0.07, "step": 11662 }, { "epoch": 1.95, "grad_norm": 0.5472993850708008, "learning_rate": 5.759730081424813e-06, "loss": 0.0644, "step": 11663 }, { "epoch": 1.95, "grad_norm": 0.6319555640220642, "learning_rate": 5.758094800535914e-06, "loss": 0.0536, "step": 11664 }, { "epoch": 1.95, "grad_norm": 0.4008525609970093, "learning_rate": 5.756459657959717e-06, "loss": 0.0437, "step": 11665 }, { "epoch": 1.95, "grad_norm": 0.6341353058815002, "learning_rate": 5.754824653749531e-06, "loss": 0.0456, "step": 11666 }, { "epoch": 1.95, "grad_norm": 0.6384096145629883, "learning_rate": 5.7531897879586665e-06, "loss": 0.0659, "step": 11667 }, { "epoch": 1.95, "grad_norm": 0.48025941848754883, "learning_rate": 5.751555060640437e-06, "loss": 0.0428, "step": 11668 }, { "epoch": 1.95, "grad_norm": 0.38952749967575073, "learning_rate": 5.749920471848136e-06, "loss": 0.0404, "step": 11669 }, { "epoch": 1.95, "grad_norm": 0.6856414079666138, "learning_rate": 5.748286021635071e-06, "loss": 0.0637, "step": 11670 }, { "epoch": 1.95, "grad_norm": 0.28968602418899536, "learning_rate": 5.746651710054527e-06, "loss": 0.0406, "step": 11671 }, { "epoch": 1.95, "grad_norm": 0.3997468948364258, "learning_rate": 5.745017537159796e-06, "loss": 0.053, "step": 11672 }, { "epoch": 1.95, "grad_norm": 0.3924644887447357, "learning_rate": 5.743383503004167e-06, "loss": 0.0457, "step": 11673 }, { "epoch": 1.95, "grad_norm": 0.6016021966934204, "learning_rate": 5.741749607640915e-06, "loss": 0.0347, "step": 11674 }, { "epoch": 1.95, "grad_norm": 0.28943654894828796, "learning_rate": 5.740115851123311e-06, "loss": 0.0347, "step": 11675 }, { "epoch": 1.95, "grad_norm": 0.3053095042705536, "learning_rate": 5.738482233504636e-06, "loss": 0.0376, "step": 11676 }, { "epoch": 1.95, "grad_norm": 0.5158257484436035, "learning_rate": 5.736848754838149e-06, "loss": 0.062, "step": 11677 }, { "epoch": 1.95, "grad_norm": 0.5035228133201599, "learning_rate": 5.7352154151771115e-06, "loss": 0.0376, "step": 11678 }, { "epoch": 1.95, "grad_norm": 0.3338380455970764, "learning_rate": 5.733582214574784e-06, "loss": 0.0499, "step": 11679 }, { "epoch": 1.95, "grad_norm": 0.40073326230049133, "learning_rate": 5.731949153084414e-06, "loss": 0.0449, "step": 11680 }, { "epoch": 1.95, "grad_norm": 0.43437227606773376, "learning_rate": 5.730316230759253e-06, "loss": 0.0354, "step": 11681 }, { "epoch": 1.95, "grad_norm": 0.35034650564193726, "learning_rate": 5.728683447652548e-06, "loss": 0.0451, "step": 11682 }, { "epoch": 1.95, "grad_norm": 0.5782214403152466, "learning_rate": 5.727050803817531e-06, "loss": 0.0667, "step": 11683 }, { "epoch": 1.95, "grad_norm": 0.4015040993690491, "learning_rate": 5.725418299307444e-06, "loss": 0.0422, "step": 11684 }, { "epoch": 1.95, "grad_norm": 0.44321802258491516, "learning_rate": 5.723785934175512e-06, "loss": 0.0565, "step": 11685 }, { "epoch": 1.95, "grad_norm": 0.4037536382675171, "learning_rate": 5.722153708474958e-06, "loss": 0.0402, "step": 11686 }, { "epoch": 1.95, "grad_norm": 0.39536604285240173, "learning_rate": 5.720521622259009e-06, "loss": 0.0595, "step": 11687 }, { "epoch": 1.95, "grad_norm": 0.4217844307422638, "learning_rate": 5.718889675580874e-06, "loss": 0.0523, "step": 11688 }, { "epoch": 1.96, "grad_norm": 0.36218371987342834, "learning_rate": 5.717257868493769e-06, "loss": 0.0385, "step": 11689 }, { "epoch": 1.96, "grad_norm": 0.3137848973274231, "learning_rate": 5.715626201050905e-06, "loss": 0.033, "step": 11690 }, { "epoch": 1.96, "grad_norm": 0.6293001770973206, "learning_rate": 5.713994673305479e-06, "loss": 0.053, "step": 11691 }, { "epoch": 1.96, "grad_norm": 0.36763057112693787, "learning_rate": 5.712363285310689e-06, "loss": 0.0293, "step": 11692 }, { "epoch": 1.96, "grad_norm": 0.4233997166156769, "learning_rate": 5.710732037119733e-06, "loss": 0.0497, "step": 11693 }, { "epoch": 1.96, "grad_norm": 0.31350353360176086, "learning_rate": 5.709100928785796e-06, "loss": 0.0373, "step": 11694 }, { "epoch": 1.96, "grad_norm": 0.2798713743686676, "learning_rate": 5.7074699603620594e-06, "loss": 0.0358, "step": 11695 }, { "epoch": 1.96, "grad_norm": 0.3105376362800598, "learning_rate": 5.705839131901707e-06, "loss": 0.0444, "step": 11696 }, { "epoch": 1.96, "grad_norm": 0.4065113663673401, "learning_rate": 5.704208443457915e-06, "loss": 0.0401, "step": 11697 }, { "epoch": 1.96, "grad_norm": 0.5308026671409607, "learning_rate": 5.702577895083855e-06, "loss": 0.0487, "step": 11698 }, { "epoch": 1.96, "grad_norm": 0.6035728454589844, "learning_rate": 5.700947486832692e-06, "loss": 0.0471, "step": 11699 }, { "epoch": 1.96, "grad_norm": 0.275729775428772, "learning_rate": 5.699317218757583e-06, "loss": 0.0359, "step": 11700 }, { "epoch": 1.96, "grad_norm": 0.5130888819694519, "learning_rate": 5.697687090911692e-06, "loss": 0.0634, "step": 11701 }, { "epoch": 1.96, "grad_norm": 0.4077734351158142, "learning_rate": 5.696057103348167e-06, "loss": 0.0445, "step": 11702 }, { "epoch": 1.96, "grad_norm": 0.40054330229759216, "learning_rate": 5.694427256120154e-06, "loss": 0.0552, "step": 11703 }, { "epoch": 1.96, "grad_norm": 0.3327775001525879, "learning_rate": 5.692797549280799e-06, "loss": 0.0342, "step": 11704 }, { "epoch": 1.96, "grad_norm": 0.36191919445991516, "learning_rate": 5.6911679828832435e-06, "loss": 0.045, "step": 11705 }, { "epoch": 1.96, "grad_norm": 0.39859676361083984, "learning_rate": 5.689538556980616e-06, "loss": 0.0367, "step": 11706 }, { "epoch": 1.96, "grad_norm": 0.37410926818847656, "learning_rate": 5.687909271626052e-06, "loss": 0.0429, "step": 11707 }, { "epoch": 1.96, "grad_norm": 0.39587971568107605, "learning_rate": 5.686280126872673e-06, "loss": 0.045, "step": 11708 }, { "epoch": 1.96, "grad_norm": 0.3060726821422577, "learning_rate": 5.684651122773595e-06, "loss": 0.0262, "step": 11709 }, { "epoch": 1.96, "grad_norm": 0.4767071306705475, "learning_rate": 5.683022259381943e-06, "loss": 0.0489, "step": 11710 }, { "epoch": 1.96, "grad_norm": 0.3459970951080322, "learning_rate": 5.681393536750819e-06, "loss": 0.0448, "step": 11711 }, { "epoch": 1.96, "grad_norm": 0.3703313171863556, "learning_rate": 5.679764954933334e-06, "loss": 0.0601, "step": 11712 }, { "epoch": 1.96, "grad_norm": 0.46762174367904663, "learning_rate": 5.678136513982593e-06, "loss": 0.0435, "step": 11713 }, { "epoch": 1.96, "grad_norm": 0.38166868686676025, "learning_rate": 5.676508213951688e-06, "loss": 0.0395, "step": 11714 }, { "epoch": 1.96, "grad_norm": 0.40573030710220337, "learning_rate": 5.674880054893717e-06, "loss": 0.0481, "step": 11715 }, { "epoch": 1.96, "grad_norm": 0.38777586817741394, "learning_rate": 5.673252036861765e-06, "loss": 0.0529, "step": 11716 }, { "epoch": 1.96, "grad_norm": 0.3492090702056885, "learning_rate": 5.6716241599089105e-06, "loss": 0.0335, "step": 11717 }, { "epoch": 1.96, "grad_norm": 0.372759073972702, "learning_rate": 5.669996424088244e-06, "loss": 0.0362, "step": 11718 }, { "epoch": 1.96, "grad_norm": 0.30541378259658813, "learning_rate": 5.668368829452828e-06, "loss": 0.0435, "step": 11719 }, { "epoch": 1.96, "grad_norm": 0.4788108170032501, "learning_rate": 5.66674137605574e-06, "loss": 0.0496, "step": 11720 }, { "epoch": 1.96, "grad_norm": 0.4286693036556244, "learning_rate": 5.665114063950046e-06, "loss": 0.0441, "step": 11721 }, { "epoch": 1.96, "grad_norm": 0.362890362739563, "learning_rate": 5.6634868931888035e-06, "loss": 0.0464, "step": 11722 }, { "epoch": 1.96, "grad_norm": 0.41783058643341064, "learning_rate": 5.661859863825066e-06, "loss": 0.0555, "step": 11723 }, { "epoch": 1.96, "grad_norm": 0.40435028076171875, "learning_rate": 5.66023297591189e-06, "loss": 0.0475, "step": 11724 }, { "epoch": 1.96, "grad_norm": 0.4152211546897888, "learning_rate": 5.658606229502316e-06, "loss": 0.0488, "step": 11725 }, { "epoch": 1.96, "grad_norm": 0.3368225395679474, "learning_rate": 5.656979624649395e-06, "loss": 0.0403, "step": 11726 }, { "epoch": 1.96, "grad_norm": 0.4042131006717682, "learning_rate": 5.655353161406154e-06, "loss": 0.046, "step": 11727 }, { "epoch": 1.96, "grad_norm": 0.38749590516090393, "learning_rate": 5.6537268398256354e-06, "loss": 0.0461, "step": 11728 }, { "epoch": 1.96, "grad_norm": 0.39877933263778687, "learning_rate": 5.652100659960859e-06, "loss": 0.041, "step": 11729 }, { "epoch": 1.96, "grad_norm": 0.4774450361728668, "learning_rate": 5.6504746218648584e-06, "loss": 0.0669, "step": 11730 }, { "epoch": 1.96, "grad_norm": 0.3907947838306427, "learning_rate": 5.648848725590643e-06, "loss": 0.0439, "step": 11731 }, { "epoch": 1.96, "grad_norm": 0.35574549436569214, "learning_rate": 5.647222971191235e-06, "loss": 0.0359, "step": 11732 }, { "epoch": 1.96, "grad_norm": 0.39208582043647766, "learning_rate": 5.6455973587196415e-06, "loss": 0.0371, "step": 11733 }, { "epoch": 1.96, "grad_norm": 0.4151773452758789, "learning_rate": 5.643971888228861e-06, "loss": 0.0543, "step": 11734 }, { "epoch": 1.96, "grad_norm": 0.4286917746067047, "learning_rate": 5.642346559771903e-06, "loss": 0.037, "step": 11735 }, { "epoch": 1.96, "grad_norm": 0.3762088119983673, "learning_rate": 5.640721373401758e-06, "loss": 0.0413, "step": 11736 }, { "epoch": 1.96, "grad_norm": 0.4111921787261963, "learning_rate": 5.639096329171419e-06, "loss": 0.0546, "step": 11737 }, { "epoch": 1.96, "grad_norm": 0.3296346962451935, "learning_rate": 5.637471427133876e-06, "loss": 0.0401, "step": 11738 }, { "epoch": 1.96, "grad_norm": 0.4394698143005371, "learning_rate": 5.635846667342104e-06, "loss": 0.0381, "step": 11739 }, { "epoch": 1.96, "grad_norm": 0.3330809772014618, "learning_rate": 5.6342220498490896e-06, "loss": 0.0381, "step": 11740 }, { "epoch": 1.96, "grad_norm": 0.42491868138313293, "learning_rate": 5.6325975747077985e-06, "loss": 0.0563, "step": 11741 }, { "epoch": 1.96, "grad_norm": 0.2927854657173157, "learning_rate": 5.6309732419711984e-06, "loss": 0.0337, "step": 11742 }, { "epoch": 1.96, "grad_norm": 0.4329022467136383, "learning_rate": 5.629349051692257e-06, "loss": 0.0538, "step": 11743 }, { "epoch": 1.96, "grad_norm": 0.4577164947986603, "learning_rate": 5.627725003923927e-06, "loss": 0.0514, "step": 11744 }, { "epoch": 1.96, "grad_norm": 0.4483272135257721, "learning_rate": 5.626101098719168e-06, "loss": 0.0342, "step": 11745 }, { "epoch": 1.96, "grad_norm": 0.37049102783203125, "learning_rate": 5.624477336130931e-06, "loss": 0.0529, "step": 11746 }, { "epoch": 1.96, "grad_norm": 0.4740085005760193, "learning_rate": 5.622853716212157e-06, "loss": 0.0473, "step": 11747 }, { "epoch": 1.96, "grad_norm": 0.371360719203949, "learning_rate": 5.621230239015785e-06, "loss": 0.0496, "step": 11748 }, { "epoch": 1.97, "grad_norm": 0.5254412889480591, "learning_rate": 5.619606904594754e-06, "loss": 0.0356, "step": 11749 }, { "epoch": 1.97, "grad_norm": 0.3855000138282776, "learning_rate": 5.6179837130019955e-06, "loss": 0.0552, "step": 11750 }, { "epoch": 1.97, "grad_norm": 0.46803537011146545, "learning_rate": 5.616360664290428e-06, "loss": 0.0522, "step": 11751 }, { "epoch": 1.97, "grad_norm": 0.47977587580680847, "learning_rate": 5.614737758512979e-06, "loss": 0.0366, "step": 11752 }, { "epoch": 1.97, "grad_norm": 0.4161687195301056, "learning_rate": 5.6131149957225665e-06, "loss": 0.0385, "step": 11753 }, { "epoch": 1.97, "grad_norm": 0.4760567247867584, "learning_rate": 5.611492375972103e-06, "loss": 0.0508, "step": 11754 }, { "epoch": 1.97, "grad_norm": 0.7705959677696228, "learning_rate": 5.609869899314495e-06, "loss": 0.0441, "step": 11755 }, { "epoch": 1.97, "grad_norm": 0.3378046751022339, "learning_rate": 5.608247565802641e-06, "loss": 0.0291, "step": 11756 }, { "epoch": 1.97, "grad_norm": 0.6278876066207886, "learning_rate": 5.606625375489447e-06, "loss": 0.0756, "step": 11757 }, { "epoch": 1.97, "grad_norm": 0.3367939293384552, "learning_rate": 5.605003328427802e-06, "loss": 0.0372, "step": 11758 }, { "epoch": 1.97, "grad_norm": 0.4006475508213043, "learning_rate": 5.603381424670593e-06, "loss": 0.0468, "step": 11759 }, { "epoch": 1.97, "grad_norm": 0.4935356378555298, "learning_rate": 5.601759664270706e-06, "loss": 0.0544, "step": 11760 }, { "epoch": 1.97, "grad_norm": 0.43527641892433167, "learning_rate": 5.600138047281025e-06, "loss": 0.0535, "step": 11761 }, { "epoch": 1.97, "grad_norm": 0.4352355897426605, "learning_rate": 5.598516573754418e-06, "loss": 0.0515, "step": 11762 }, { "epoch": 1.97, "grad_norm": 0.6749693155288696, "learning_rate": 5.5968952437437605e-06, "loss": 0.0775, "step": 11763 }, { "epoch": 1.97, "grad_norm": 0.42648541927337646, "learning_rate": 5.595274057301918e-06, "loss": 0.0516, "step": 11764 }, { "epoch": 1.97, "grad_norm": 0.4653416872024536, "learning_rate": 5.5936530144817436e-06, "loss": 0.0573, "step": 11765 }, { "epoch": 1.97, "grad_norm": 0.4239139258861542, "learning_rate": 5.592032115336103e-06, "loss": 0.0464, "step": 11766 }, { "epoch": 1.97, "grad_norm": 0.2901897132396698, "learning_rate": 5.590411359917841e-06, "loss": 0.0475, "step": 11767 }, { "epoch": 1.97, "grad_norm": 0.3011198937892914, "learning_rate": 5.588790748279807e-06, "loss": 0.0259, "step": 11768 }, { "epoch": 1.97, "grad_norm": 0.4311584532260895, "learning_rate": 5.587170280474845e-06, "loss": 0.0388, "step": 11769 }, { "epoch": 1.97, "grad_norm": 0.3041323125362396, "learning_rate": 5.585549956555789e-06, "loss": 0.0363, "step": 11770 }, { "epoch": 1.97, "grad_norm": 0.3115639090538025, "learning_rate": 5.583929776575476e-06, "loss": 0.0382, "step": 11771 }, { "epoch": 1.97, "grad_norm": 0.475847452878952, "learning_rate": 5.582309740586732e-06, "loss": 0.0597, "step": 11772 }, { "epoch": 1.97, "grad_norm": 0.291775643825531, "learning_rate": 5.580689848642376e-06, "loss": 0.0361, "step": 11773 }, { "epoch": 1.97, "grad_norm": 0.38053959608078003, "learning_rate": 5.579070100795234e-06, "loss": 0.0279, "step": 11774 }, { "epoch": 1.97, "grad_norm": 0.39880114793777466, "learning_rate": 5.577450497098112e-06, "loss": 0.0497, "step": 11775 }, { "epoch": 1.97, "grad_norm": 0.40652206540107727, "learning_rate": 5.5758310376038226e-06, "loss": 0.0653, "step": 11776 }, { "epoch": 1.97, "grad_norm": 0.368550181388855, "learning_rate": 5.574211722365177e-06, "loss": 0.0381, "step": 11777 }, { "epoch": 1.97, "grad_norm": 0.25179997086524963, "learning_rate": 5.572592551434968e-06, "loss": 0.0249, "step": 11778 }, { "epoch": 1.97, "grad_norm": 0.46882688999176025, "learning_rate": 5.570973524865988e-06, "loss": 0.0436, "step": 11779 }, { "epoch": 1.97, "grad_norm": 0.3298976421356201, "learning_rate": 5.569354642711036e-06, "loss": 0.0497, "step": 11780 }, { "epoch": 1.97, "grad_norm": 0.3401932418346405, "learning_rate": 5.567735905022889e-06, "loss": 0.0404, "step": 11781 }, { "epoch": 1.97, "grad_norm": 0.4632452130317688, "learning_rate": 5.566117311854336e-06, "loss": 0.0507, "step": 11782 }, { "epoch": 1.97, "grad_norm": 0.48043033480644226, "learning_rate": 5.564498863258147e-06, "loss": 0.0435, "step": 11783 }, { "epoch": 1.97, "grad_norm": 0.3214874863624573, "learning_rate": 5.562880559287095e-06, "loss": 0.034, "step": 11784 }, { "epoch": 1.97, "grad_norm": 0.32497742772102356, "learning_rate": 5.5612623999939475e-06, "loss": 0.0516, "step": 11785 }, { "epoch": 1.97, "grad_norm": 0.4754118025302887, "learning_rate": 5.559644385431469e-06, "loss": 0.0517, "step": 11786 }, { "epoch": 1.97, "grad_norm": 0.3568088710308075, "learning_rate": 5.5580265156524124e-06, "loss": 0.049, "step": 11787 }, { "epoch": 1.97, "grad_norm": 0.4052369296550751, "learning_rate": 5.5564087907095356e-06, "loss": 0.0574, "step": 11788 }, { "epoch": 1.97, "grad_norm": 0.5016438961029053, "learning_rate": 5.5547912106555855e-06, "loss": 0.0461, "step": 11789 }, { "epoch": 1.97, "grad_norm": 0.4193515479564667, "learning_rate": 5.553173775543298e-06, "loss": 0.0477, "step": 11790 }, { "epoch": 1.97, "grad_norm": 0.37766560912132263, "learning_rate": 5.551556485425422e-06, "loss": 0.0472, "step": 11791 }, { "epoch": 1.97, "grad_norm": 0.4321877360343933, "learning_rate": 5.549939340354682e-06, "loss": 0.0561, "step": 11792 }, { "epoch": 1.97, "grad_norm": 0.39812466502189636, "learning_rate": 5.548322340383813e-06, "loss": 0.0402, "step": 11793 }, { "epoch": 1.97, "grad_norm": 0.45072534680366516, "learning_rate": 5.54670548556554e-06, "loss": 0.0604, "step": 11794 }, { "epoch": 1.97, "grad_norm": 0.3824763894081116, "learning_rate": 5.5450887759525784e-06, "loss": 0.0341, "step": 11795 }, { "epoch": 1.97, "grad_norm": 0.5643881559371948, "learning_rate": 5.543472211597648e-06, "loss": 0.074, "step": 11796 }, { "epoch": 1.97, "grad_norm": 0.8129421472549438, "learning_rate": 5.541855792553456e-06, "loss": 0.0545, "step": 11797 }, { "epoch": 1.97, "grad_norm": 0.36116620898246765, "learning_rate": 5.540239518872705e-06, "loss": 0.0451, "step": 11798 }, { "epoch": 1.97, "grad_norm": 0.46320974826812744, "learning_rate": 5.538623390608102e-06, "loss": 0.0539, "step": 11799 }, { "epoch": 1.97, "grad_norm": 0.5365185141563416, "learning_rate": 5.537007407812336e-06, "loss": 0.0726, "step": 11800 }, { "epoch": 1.97, "grad_norm": 0.29051995277404785, "learning_rate": 5.5353915705381e-06, "loss": 0.0324, "step": 11801 }, { "epoch": 1.97, "grad_norm": 0.48425930738449097, "learning_rate": 5.533775878838088e-06, "loss": 0.0694, "step": 11802 }, { "epoch": 1.97, "grad_norm": 0.4528002142906189, "learning_rate": 5.532160332764976e-06, "loss": 0.0452, "step": 11803 }, { "epoch": 1.97, "grad_norm": 0.6351098418235779, "learning_rate": 5.530544932371435e-06, "loss": 0.0497, "step": 11804 }, { "epoch": 1.97, "grad_norm": 0.33892229199409485, "learning_rate": 5.528929677710146e-06, "loss": 0.0322, "step": 11805 }, { "epoch": 1.97, "grad_norm": 0.5496717691421509, "learning_rate": 5.527314568833772e-06, "loss": 0.0588, "step": 11806 }, { "epoch": 1.97, "grad_norm": 0.32749247550964355, "learning_rate": 5.52569960579498e-06, "loss": 0.0443, "step": 11807 }, { "epoch": 1.97, "grad_norm": 0.4411076605319977, "learning_rate": 5.52408478864642e-06, "loss": 0.0426, "step": 11808 }, { "epoch": 1.98, "grad_norm": 0.4817837178707123, "learning_rate": 5.52247011744075e-06, "loss": 0.039, "step": 11809 }, { "epoch": 1.98, "grad_norm": 0.6876884698867798, "learning_rate": 5.520855592230624e-06, "loss": 0.0414, "step": 11810 }, { "epoch": 1.98, "grad_norm": 0.3199208080768585, "learning_rate": 5.51924121306868e-06, "loss": 0.0366, "step": 11811 }, { "epoch": 1.98, "grad_norm": 0.3667616844177246, "learning_rate": 5.517626980007551e-06, "loss": 0.042, "step": 11812 }, { "epoch": 1.98, "grad_norm": 0.40320098400115967, "learning_rate": 5.516012893099882e-06, "loss": 0.0446, "step": 11813 }, { "epoch": 1.98, "grad_norm": 0.4480692148208618, "learning_rate": 5.514398952398299e-06, "loss": 0.0511, "step": 11814 }, { "epoch": 1.98, "grad_norm": 0.3376815617084503, "learning_rate": 5.51278515795542e-06, "loss": 0.0329, "step": 11815 }, { "epoch": 1.98, "grad_norm": 0.42866259813308716, "learning_rate": 5.51117150982387e-06, "loss": 0.0575, "step": 11816 }, { "epoch": 1.98, "grad_norm": 0.4858306050300598, "learning_rate": 5.509558008056266e-06, "loss": 0.0729, "step": 11817 }, { "epoch": 1.98, "grad_norm": 0.31920838356018066, "learning_rate": 5.507944652705215e-06, "loss": 0.0424, "step": 11818 }, { "epoch": 1.98, "grad_norm": 0.44493499398231506, "learning_rate": 5.506331443823325e-06, "loss": 0.0618, "step": 11819 }, { "epoch": 1.98, "grad_norm": 0.3991638422012329, "learning_rate": 5.504718381463193e-06, "loss": 0.053, "step": 11820 }, { "epoch": 1.98, "grad_norm": 0.3076997995376587, "learning_rate": 5.503105465677421e-06, "loss": 0.0535, "step": 11821 }, { "epoch": 1.98, "grad_norm": 0.3417651951313019, "learning_rate": 5.501492696518596e-06, "loss": 0.0423, "step": 11822 }, { "epoch": 1.98, "grad_norm": 0.5430517792701721, "learning_rate": 5.499880074039302e-06, "loss": 0.0382, "step": 11823 }, { "epoch": 1.98, "grad_norm": 0.2661082148551941, "learning_rate": 5.498267598292122e-06, "loss": 0.0324, "step": 11824 }, { "epoch": 1.98, "grad_norm": 0.4650922417640686, "learning_rate": 5.4966552693296395e-06, "loss": 0.0567, "step": 11825 }, { "epoch": 1.98, "grad_norm": 0.7936928868293762, "learning_rate": 5.495043087204417e-06, "loss": 0.0561, "step": 11826 }, { "epoch": 1.98, "grad_norm": 0.42579925060272217, "learning_rate": 5.493431051969029e-06, "loss": 0.0436, "step": 11827 }, { "epoch": 1.98, "grad_norm": 0.22698993980884552, "learning_rate": 5.491819163676036e-06, "loss": 0.038, "step": 11828 }, { "epoch": 1.98, "grad_norm": 0.402506560087204, "learning_rate": 5.490207422377991e-06, "loss": 0.0464, "step": 11829 }, { "epoch": 1.98, "grad_norm": 0.33367499709129333, "learning_rate": 5.488595828127455e-06, "loss": 0.0442, "step": 11830 }, { "epoch": 1.98, "grad_norm": 0.36311614513397217, "learning_rate": 5.486984380976965e-06, "loss": 0.0416, "step": 11831 }, { "epoch": 1.98, "grad_norm": 0.5620391368865967, "learning_rate": 5.4853730809790775e-06, "loss": 0.0682, "step": 11832 }, { "epoch": 1.98, "grad_norm": 0.3637911081314087, "learning_rate": 5.483761928186318e-06, "loss": 0.0406, "step": 11833 }, { "epoch": 1.98, "grad_norm": 0.3930487036705017, "learning_rate": 5.482150922651228e-06, "loss": 0.0557, "step": 11834 }, { "epoch": 1.98, "grad_norm": 0.46022722125053406, "learning_rate": 5.4805400644263396e-06, "loss": 0.0708, "step": 11835 }, { "epoch": 1.98, "grad_norm": 0.6207284927368164, "learning_rate": 5.478929353564171e-06, "loss": 0.0563, "step": 11836 }, { "epoch": 1.98, "grad_norm": 0.4040147364139557, "learning_rate": 5.4773187901172385e-06, "loss": 0.0426, "step": 11837 }, { "epoch": 1.98, "grad_norm": 0.2860751450061798, "learning_rate": 5.4757083741380656e-06, "loss": 0.0374, "step": 11838 }, { "epoch": 1.98, "grad_norm": 0.4085577428340912, "learning_rate": 5.474098105679158e-06, "loss": 0.0523, "step": 11839 }, { "epoch": 1.98, "grad_norm": 0.3990868031978607, "learning_rate": 5.4724879847930136e-06, "loss": 0.0408, "step": 11840 }, { "epoch": 1.98, "grad_norm": 0.5137540102005005, "learning_rate": 5.4708780115321395e-06, "loss": 0.055, "step": 11841 }, { "epoch": 1.98, "grad_norm": 0.4157160222530365, "learning_rate": 5.469268185949032e-06, "loss": 0.0311, "step": 11842 }, { "epoch": 1.98, "grad_norm": 0.38787466287612915, "learning_rate": 5.467658508096179e-06, "loss": 0.0343, "step": 11843 }, { "epoch": 1.98, "grad_norm": 0.37037375569343567, "learning_rate": 5.466048978026067e-06, "loss": 0.0391, "step": 11844 }, { "epoch": 1.98, "grad_norm": 0.4470231235027313, "learning_rate": 5.464439595791177e-06, "loss": 0.0586, "step": 11845 }, { "epoch": 1.98, "grad_norm": 0.46987184882164, "learning_rate": 5.462830361443982e-06, "loss": 0.0643, "step": 11846 }, { "epoch": 1.98, "grad_norm": 0.35513198375701904, "learning_rate": 5.4612212750369574e-06, "loss": 0.0346, "step": 11847 }, { "epoch": 1.98, "grad_norm": 0.48318323493003845, "learning_rate": 5.459612336622564e-06, "loss": 0.0457, "step": 11848 }, { "epoch": 1.98, "grad_norm": 0.4497106969356537, "learning_rate": 5.458003546253268e-06, "loss": 0.058, "step": 11849 }, { "epoch": 1.98, "grad_norm": 0.31792062520980835, "learning_rate": 5.456394903981526e-06, "loss": 0.0524, "step": 11850 }, { "epoch": 1.98, "grad_norm": 0.33370891213417053, "learning_rate": 5.454786409859787e-06, "loss": 0.0504, "step": 11851 }, { "epoch": 1.98, "grad_norm": 0.346540242433548, "learning_rate": 5.453178063940502e-06, "loss": 0.0504, "step": 11852 }, { "epoch": 1.98, "grad_norm": 0.31006619334220886, "learning_rate": 5.451569866276112e-06, "loss": 0.0349, "step": 11853 }, { "epoch": 1.98, "grad_norm": 0.24314390122890472, "learning_rate": 5.449961816919047e-06, "loss": 0.0294, "step": 11854 }, { "epoch": 1.98, "grad_norm": 0.41222071647644043, "learning_rate": 5.4483539159217514e-06, "loss": 0.049, "step": 11855 }, { "epoch": 1.98, "grad_norm": 0.24123990535736084, "learning_rate": 5.446746163336643e-06, "loss": 0.03, "step": 11856 }, { "epoch": 1.98, "grad_norm": 0.49735647439956665, "learning_rate": 5.445138559216148e-06, "loss": 0.0547, "step": 11857 }, { "epoch": 1.98, "grad_norm": 0.44714778661727905, "learning_rate": 5.443531103612688e-06, "loss": 0.0607, "step": 11858 }, { "epoch": 1.98, "grad_norm": 0.5316362380981445, "learning_rate": 5.441923796578674e-06, "loss": 0.0551, "step": 11859 }, { "epoch": 1.98, "grad_norm": 0.5752307772636414, "learning_rate": 5.4403166381665096e-06, "loss": 0.0488, "step": 11860 }, { "epoch": 1.98, "grad_norm": 0.43436726927757263, "learning_rate": 5.438709628428605e-06, "loss": 0.0421, "step": 11861 }, { "epoch": 1.98, "grad_norm": 0.30069881677627563, "learning_rate": 5.437102767417353e-06, "loss": 0.0219, "step": 11862 }, { "epoch": 1.98, "grad_norm": 0.6925665140151978, "learning_rate": 5.435496055185154e-06, "loss": 0.0416, "step": 11863 }, { "epoch": 1.98, "grad_norm": 0.5625143051147461, "learning_rate": 5.433889491784389e-06, "loss": 0.0559, "step": 11864 }, { "epoch": 1.98, "grad_norm": 0.38235360383987427, "learning_rate": 5.432283077267445e-06, "loss": 0.0523, "step": 11865 }, { "epoch": 1.98, "grad_norm": 0.4732441008090973, "learning_rate": 5.430676811686707e-06, "loss": 0.0388, "step": 11866 }, { "epoch": 1.98, "grad_norm": 0.4561871588230133, "learning_rate": 5.4290706950945445e-06, "loss": 0.0452, "step": 11867 }, { "epoch": 1.98, "grad_norm": 0.27771425247192383, "learning_rate": 5.427464727543323e-06, "loss": 0.0406, "step": 11868 }, { "epoch": 1.99, "grad_norm": 0.4029599726200104, "learning_rate": 5.4258589090854156e-06, "loss": 0.0418, "step": 11869 }, { "epoch": 1.99, "grad_norm": 0.2781202793121338, "learning_rate": 5.424253239773176e-06, "loss": 0.0295, "step": 11870 }, { "epoch": 1.99, "grad_norm": 0.30805888772010803, "learning_rate": 5.422647719658958e-06, "loss": 0.0515, "step": 11871 }, { "epoch": 1.99, "grad_norm": 0.3216233551502228, "learning_rate": 5.421042348795115e-06, "loss": 0.0396, "step": 11872 }, { "epoch": 1.99, "grad_norm": 0.4294070899486542, "learning_rate": 5.419437127233994e-06, "loss": 0.0597, "step": 11873 }, { "epoch": 1.99, "grad_norm": 0.4069328010082245, "learning_rate": 5.417832055027929e-06, "loss": 0.0269, "step": 11874 }, { "epoch": 1.99, "grad_norm": 0.37100860476493835, "learning_rate": 5.4162271322292635e-06, "loss": 0.0427, "step": 11875 }, { "epoch": 1.99, "grad_norm": 0.2576196789741516, "learning_rate": 5.41462235889032e-06, "loss": 0.04, "step": 11876 }, { "epoch": 1.99, "grad_norm": 0.4520133435726166, "learning_rate": 5.41301773506343e-06, "loss": 0.0623, "step": 11877 }, { "epoch": 1.99, "grad_norm": 0.3542119562625885, "learning_rate": 5.411413260800915e-06, "loss": 0.0329, "step": 11878 }, { "epoch": 1.99, "grad_norm": 0.473832368850708, "learning_rate": 5.409808936155082e-06, "loss": 0.0414, "step": 11879 }, { "epoch": 1.99, "grad_norm": 0.35200831294059753, "learning_rate": 5.408204761178249e-06, "loss": 0.0449, "step": 11880 }, { "epoch": 1.99, "grad_norm": 0.38485878705978394, "learning_rate": 5.4066007359227245e-06, "loss": 0.0471, "step": 11881 }, { "epoch": 1.99, "grad_norm": 0.36971578001976013, "learning_rate": 5.4049968604408035e-06, "loss": 0.0375, "step": 11882 }, { "epoch": 1.99, "grad_norm": 0.43748006224632263, "learning_rate": 5.403393134784789e-06, "loss": 0.0563, "step": 11883 }, { "epoch": 1.99, "grad_norm": 0.3507135510444641, "learning_rate": 5.401789559006969e-06, "loss": 0.0468, "step": 11884 }, { "epoch": 1.99, "grad_norm": 0.4141651690006256, "learning_rate": 5.400186133159625e-06, "loss": 0.0486, "step": 11885 }, { "epoch": 1.99, "grad_norm": 0.2805064916610718, "learning_rate": 5.39858285729505e-06, "loss": 0.045, "step": 11886 }, { "epoch": 1.99, "grad_norm": 0.4507763683795929, "learning_rate": 5.396979731465513e-06, "loss": 0.0522, "step": 11887 }, { "epoch": 1.99, "grad_norm": 0.46975281834602356, "learning_rate": 5.395376755723283e-06, "loss": 0.0363, "step": 11888 }, { "epoch": 1.99, "grad_norm": 0.700558066368103, "learning_rate": 5.393773930120633e-06, "loss": 0.0369, "step": 11889 }, { "epoch": 1.99, "grad_norm": 0.7706477046012878, "learning_rate": 5.392171254709823e-06, "loss": 0.0609, "step": 11890 }, { "epoch": 1.99, "grad_norm": 0.33372360467910767, "learning_rate": 5.390568729543115e-06, "loss": 0.0329, "step": 11891 }, { "epoch": 1.99, "grad_norm": 0.40763425827026367, "learning_rate": 5.388966354672758e-06, "loss": 0.0388, "step": 11892 }, { "epoch": 1.99, "grad_norm": 0.34103941917419434, "learning_rate": 5.387364130150994e-06, "loss": 0.041, "step": 11893 }, { "epoch": 1.99, "grad_norm": 0.42638659477233887, "learning_rate": 5.385762056030075e-06, "loss": 0.0435, "step": 11894 }, { "epoch": 1.99, "grad_norm": 0.5964129567146301, "learning_rate": 5.384160132362235e-06, "loss": 0.0574, "step": 11895 }, { "epoch": 1.99, "grad_norm": 0.44356390833854675, "learning_rate": 5.382558359199701e-06, "loss": 0.0705, "step": 11896 }, { "epoch": 1.99, "grad_norm": 0.322185218334198, "learning_rate": 5.380956736594706e-06, "loss": 0.0407, "step": 11897 }, { "epoch": 1.99, "grad_norm": 0.369539737701416, "learning_rate": 5.379355264599477e-06, "loss": 0.0354, "step": 11898 }, { "epoch": 1.99, "grad_norm": 0.46198463439941406, "learning_rate": 5.3777539432662215e-06, "loss": 0.0478, "step": 11899 }, { "epoch": 1.99, "grad_norm": 0.5733581185340881, "learning_rate": 5.376152772647165e-06, "loss": 0.0407, "step": 11900 }, { "epoch": 1.99, "grad_norm": 0.47436457872390747, "learning_rate": 5.374551752794509e-06, "loss": 0.0582, "step": 11901 }, { "epoch": 1.99, "grad_norm": 0.3528585731983185, "learning_rate": 5.3729508837604525e-06, "loss": 0.0365, "step": 11902 }, { "epoch": 1.99, "grad_norm": 0.5576057434082031, "learning_rate": 5.371350165597204e-06, "loss": 0.0429, "step": 11903 }, { "epoch": 1.99, "grad_norm": 0.41231369972229004, "learning_rate": 5.369749598356946e-06, "loss": 0.0442, "step": 11904 }, { "epoch": 1.99, "grad_norm": 0.4545991122722626, "learning_rate": 5.3681491820918745e-06, "loss": 0.053, "step": 11905 }, { "epoch": 1.99, "grad_norm": 0.47531214356422424, "learning_rate": 5.366548916854173e-06, "loss": 0.0379, "step": 11906 }, { "epoch": 1.99, "grad_norm": 0.3914117217063904, "learning_rate": 5.364948802696016e-06, "loss": 0.0522, "step": 11907 }, { "epoch": 1.99, "grad_norm": 0.46476176381111145, "learning_rate": 5.3633488396695836e-06, "loss": 0.0599, "step": 11908 }, { "epoch": 1.99, "grad_norm": 0.34798118472099304, "learning_rate": 5.361749027827039e-06, "loss": 0.0676, "step": 11909 }, { "epoch": 1.99, "grad_norm": 0.33450445532798767, "learning_rate": 5.360149367220546e-06, "loss": 0.0396, "step": 11910 }, { "epoch": 1.99, "grad_norm": 0.2923891842365265, "learning_rate": 5.3585498579022675e-06, "loss": 0.0368, "step": 11911 }, { "epoch": 1.99, "grad_norm": 0.3923605680465698, "learning_rate": 5.356950499924352e-06, "loss": 0.0466, "step": 11912 }, { "epoch": 1.99, "grad_norm": 0.35622352361679077, "learning_rate": 5.355351293338953e-06, "loss": 0.0499, "step": 11913 }, { "epoch": 1.99, "grad_norm": 0.6391239762306213, "learning_rate": 5.353752238198215e-06, "loss": 0.0325, "step": 11914 }, { "epoch": 1.99, "grad_norm": 0.4319297969341278, "learning_rate": 5.352153334554278e-06, "loss": 0.0464, "step": 11915 }, { "epoch": 1.99, "grad_norm": 0.5467872619628906, "learning_rate": 5.350554582459269e-06, "loss": 0.0666, "step": 11916 }, { "epoch": 1.99, "grad_norm": 1.3000727891921997, "learning_rate": 5.348955981965327e-06, "loss": 0.0536, "step": 11917 }, { "epoch": 1.99, "grad_norm": 0.5032472014427185, "learning_rate": 5.347357533124568e-06, "loss": 0.0363, "step": 11918 }, { "epoch": 1.99, "grad_norm": 0.3353784382343292, "learning_rate": 5.345759235989119e-06, "loss": 0.0367, "step": 11919 }, { "epoch": 1.99, "grad_norm": 0.40309765934944153, "learning_rate": 5.3441610906110865e-06, "loss": 0.0502, "step": 11920 }, { "epoch": 1.99, "grad_norm": 0.34180697798728943, "learning_rate": 5.3425630970425835e-06, "loss": 0.0539, "step": 11921 }, { "epoch": 1.99, "grad_norm": 0.3450258672237396, "learning_rate": 5.3409652553357216e-06, "loss": 0.0445, "step": 11922 }, { "epoch": 1.99, "grad_norm": 0.3247779607772827, "learning_rate": 5.339367565542592e-06, "loss": 0.0417, "step": 11923 }, { "epoch": 1.99, "grad_norm": 0.42668190598487854, "learning_rate": 5.33777002771529e-06, "loss": 0.051, "step": 11924 }, { "epoch": 1.99, "grad_norm": 0.4439351260662079, "learning_rate": 5.336172641905909e-06, "loss": 0.0518, "step": 11925 }, { "epoch": 1.99, "grad_norm": 0.36661916971206665, "learning_rate": 5.334575408166532e-06, "loss": 0.0451, "step": 11926 }, { "epoch": 1.99, "grad_norm": 0.3576135039329529, "learning_rate": 5.3329783265492364e-06, "loss": 0.0514, "step": 11927 }, { "epoch": 1.99, "grad_norm": 0.4266339838504791, "learning_rate": 5.331381397106098e-06, "loss": 0.0522, "step": 11928 }, { "epoch": 2.0, "grad_norm": 0.3503502607345581, "learning_rate": 5.329784619889192e-06, "loss": 0.0292, "step": 11929 }, { "epoch": 2.0, "grad_norm": 0.394496887922287, "learning_rate": 5.3281879949505755e-06, "loss": 0.0542, "step": 11930 }, { "epoch": 2.0, "grad_norm": 0.36082953214645386, "learning_rate": 5.326591522342317e-06, "loss": 0.0401, "step": 11931 }, { "epoch": 2.0, "grad_norm": 0.3669590651988983, "learning_rate": 5.324995202116462e-06, "loss": 0.0516, "step": 11932 }, { "epoch": 2.0, "grad_norm": 0.4189838767051697, "learning_rate": 5.3233990343250695e-06, "loss": 0.0405, "step": 11933 }, { "epoch": 2.0, "grad_norm": 0.5937958359718323, "learning_rate": 5.321803019020182e-06, "loss": 0.0636, "step": 11934 }, { "epoch": 2.0, "grad_norm": 0.3763503432273865, "learning_rate": 5.320207156253832e-06, "loss": 0.0473, "step": 11935 }, { "epoch": 2.0, "grad_norm": 0.425586462020874, "learning_rate": 5.318611446078061e-06, "loss": 0.0492, "step": 11936 }, { "epoch": 2.0, "grad_norm": 0.45150527358055115, "learning_rate": 5.317015888544904e-06, "loss": 0.0428, "step": 11937 }, { "epoch": 2.0, "grad_norm": 0.3801744878292084, "learning_rate": 5.315420483706377e-06, "loss": 0.0475, "step": 11938 }, { "epoch": 2.0, "grad_norm": 0.39095407724380493, "learning_rate": 5.3138252316145076e-06, "loss": 0.0463, "step": 11939 }, { "epoch": 2.0, "grad_norm": 0.5753963589668274, "learning_rate": 5.312230132321308e-06, "loss": 0.0646, "step": 11940 }, { "epoch": 2.0, "grad_norm": 0.43304261565208435, "learning_rate": 5.3106351858787845e-06, "loss": 0.0488, "step": 11941 }, { "epoch": 2.0, "grad_norm": 0.3965052664279938, "learning_rate": 5.309040392338949e-06, "loss": 0.0459, "step": 11942 }, { "epoch": 2.0, "grad_norm": 0.47978848218917847, "learning_rate": 5.307445751753799e-06, "loss": 0.0615, "step": 11943 }, { "epoch": 2.0, "grad_norm": 0.2853028476238251, "learning_rate": 5.305851264175325e-06, "loss": 0.0463, "step": 11944 }, { "epoch": 2.0, "grad_norm": 0.46933913230895996, "learning_rate": 5.304256929655522e-06, "loss": 0.0693, "step": 11945 }, { "epoch": 2.0, "grad_norm": 0.3569500744342804, "learning_rate": 5.3026627482463765e-06, "loss": 0.0473, "step": 11946 }, { "epoch": 2.0, "grad_norm": 0.31959372758865356, "learning_rate": 5.301068719999871e-06, "loss": 0.0403, "step": 11947 }, { "epoch": 2.0, "grad_norm": 0.5700734853744507, "learning_rate": 5.299474844967975e-06, "loss": 0.0556, "step": 11948 }, { "epoch": 2.0, "grad_norm": 0.36632201075553894, "learning_rate": 5.297881123202659e-06, "loss": 0.0556, "step": 11949 }, { "epoch": 2.0, "grad_norm": 0.3273521065711975, "learning_rate": 5.296287554755895e-06, "loss": 0.042, "step": 11950 }, { "epoch": 2.0, "grad_norm": 0.4572867751121521, "learning_rate": 5.294694139679637e-06, "loss": 0.0435, "step": 11951 }, { "epoch": 2.0, "grad_norm": 0.3938116431236267, "learning_rate": 5.293100878025839e-06, "loss": 0.0537, "step": 11952 }, { "epoch": 2.0, "grad_norm": 0.3829382359981537, "learning_rate": 5.291507769846453e-06, "loss": 0.0399, "step": 11953 }, { "epoch": 2.0, "grad_norm": 0.35543254017829895, "learning_rate": 5.289914815193431e-06, "loss": 0.0314, "step": 11954 }, { "epoch": 2.0, "grad_norm": 0.36060166358947754, "learning_rate": 5.288322014118704e-06, "loss": 0.0316, "step": 11955 }, { "epoch": 2.0, "grad_norm": 0.46314069628715515, "learning_rate": 5.286729366674215e-06, "loss": 0.052, "step": 11956 }, { "epoch": 2.0, "grad_norm": 0.44960883259773254, "learning_rate": 5.285136872911889e-06, "loss": 0.0571, "step": 11957 }, { "epoch": 2.0, "grad_norm": 0.3540039360523224, "learning_rate": 5.283544532883651e-06, "loss": 0.0455, "step": 11958 }, { "epoch": 2.0, "grad_norm": 0.25281450152397156, "learning_rate": 5.281952346641426e-06, "loss": 0.0308, "step": 11959 }, { "epoch": 2.0, "grad_norm": 0.20724472403526306, "learning_rate": 5.280360314237122e-06, "loss": 0.0317, "step": 11960 }, { "epoch": 2.0, "grad_norm": 0.2264138162136078, "learning_rate": 5.278768435722654e-06, "loss": 0.027, "step": 11961 }, { "epoch": 2.0, "grad_norm": 0.32601261138916016, "learning_rate": 5.277176711149931e-06, "loss": 0.0469, "step": 11962 }, { "epoch": 2.0, "grad_norm": 0.44747862219810486, "learning_rate": 5.275585140570845e-06, "loss": 0.0744, "step": 11963 }, { "epoch": 2.0, "grad_norm": 0.3781138062477112, "learning_rate": 5.273993724037298e-06, "loss": 0.0461, "step": 11964 }, { "epoch": 2.0, "grad_norm": 0.29513147473335266, "learning_rate": 5.2724024616011774e-06, "loss": 0.0438, "step": 11965 }, { "epoch": 2.0, "grad_norm": 0.30308184027671814, "learning_rate": 5.270811353314365e-06, "loss": 0.0432, "step": 11966 }, { "epoch": 2.0, "grad_norm": 0.29473045468330383, "learning_rate": 5.2692203992287475e-06, "loss": 0.0357, "step": 11967 }, { "epoch": 2.0, "grad_norm": 0.40581560134887695, "learning_rate": 5.267629599396193e-06, "loss": 0.0424, "step": 11968 }, { "epoch": 2.0, "grad_norm": 0.301522821187973, "learning_rate": 5.266038953868575e-06, "loss": 0.038, "step": 11969 }, { "epoch": 2.0, "grad_norm": 0.2420951873064041, "learning_rate": 5.264448462697762e-06, "loss": 0.0323, "step": 11970 }, { "epoch": 2.0, "grad_norm": 0.353003591299057, "learning_rate": 5.262858125935612e-06, "loss": 0.0425, "step": 11971 }, { "epoch": 2.0, "grad_norm": 0.31729856133461, "learning_rate": 5.261267943633974e-06, "loss": 0.0443, "step": 11972 }, { "epoch": 2.0, "grad_norm": 0.340335875749588, "learning_rate": 5.2596779158447056e-06, "loss": 0.0426, "step": 11973 }, { "epoch": 2.0, "grad_norm": 0.39458462595939636, "learning_rate": 5.258088042619645e-06, "loss": 0.0445, "step": 11974 }, { "epoch": 2.0, "grad_norm": 0.30584412813186646, "learning_rate": 5.25649832401064e-06, "loss": 0.0352, "step": 11975 }, { "epoch": 2.0, "grad_norm": 0.25751379132270813, "learning_rate": 5.254908760069517e-06, "loss": 0.0243, "step": 11976 }, { "epoch": 2.0, "grad_norm": 0.36633023619651794, "learning_rate": 5.25331935084811e-06, "loss": 0.0379, "step": 11977 }, { "epoch": 2.0, "grad_norm": 0.26722511649131775, "learning_rate": 5.251730096398247e-06, "loss": 0.0297, "step": 11978 }, { "epoch": 2.0, "grad_norm": 0.35308754444122314, "learning_rate": 5.250140996771744e-06, "loss": 0.033, "step": 11979 }, { "epoch": 2.0, "grad_norm": 0.3520025610923767, "learning_rate": 5.248552052020411e-06, "loss": 0.0441, "step": 11980 }, { "epoch": 2.0, "grad_norm": 0.19724611937999725, "learning_rate": 5.246963262196068e-06, "loss": 0.0204, "step": 11981 }, { "epoch": 2.0, "grad_norm": 0.291422039270401, "learning_rate": 5.245374627350513e-06, "loss": 0.0367, "step": 11982 }, { "epoch": 2.0, "grad_norm": 0.25234338641166687, "learning_rate": 5.243786147535541e-06, "loss": 0.0337, "step": 11983 }, { "epoch": 2.0, "grad_norm": 0.35820817947387695, "learning_rate": 5.242197822802953e-06, "loss": 0.0304, "step": 11984 }, { "epoch": 2.0, "grad_norm": 0.4309839606285095, "learning_rate": 5.24060965320454e-06, "loss": 0.0359, "step": 11985 }, { "epoch": 2.0, "grad_norm": 0.28545546531677246, "learning_rate": 5.239021638792079e-06, "loss": 0.0408, "step": 11986 }, { "epoch": 2.0, "grad_norm": 0.5515168905258179, "learning_rate": 5.237433779617358e-06, "loss": 0.0429, "step": 11987 }, { "epoch": 2.01, "grad_norm": 0.3751046061515808, "learning_rate": 5.2358460757321425e-06, "loss": 0.0396, "step": 11988 }, { "epoch": 2.01, "grad_norm": 0.4748985767364502, "learning_rate": 5.234258527188211e-06, "loss": 0.032, "step": 11989 }, { "epoch": 2.01, "grad_norm": 0.28017279505729675, "learning_rate": 5.2326711340373215e-06, "loss": 0.0339, "step": 11990 }, { "epoch": 2.01, "grad_norm": 0.27826836705207825, "learning_rate": 5.231083896331229e-06, "loss": 0.0438, "step": 11991 }, { "epoch": 2.01, "grad_norm": 0.3207124173641205, "learning_rate": 5.229496814121696e-06, "loss": 0.034, "step": 11992 }, { "epoch": 2.01, "grad_norm": 0.37272369861602783, "learning_rate": 5.227909887460465e-06, "loss": 0.0411, "step": 11993 }, { "epoch": 2.01, "grad_norm": 0.39845001697540283, "learning_rate": 5.226323116399281e-06, "loss": 0.0438, "step": 11994 }, { "epoch": 2.01, "grad_norm": 0.25478455424308777, "learning_rate": 5.2247365009898876e-06, "loss": 0.0358, "step": 11995 }, { "epoch": 2.01, "grad_norm": 0.34589579701423645, "learning_rate": 5.223150041284015e-06, "loss": 0.0455, "step": 11996 }, { "epoch": 2.01, "grad_norm": 0.284070760011673, "learning_rate": 5.221563737333388e-06, "loss": 0.0301, "step": 11997 }, { "epoch": 2.01, "grad_norm": 0.47863897681236267, "learning_rate": 5.219977589189736e-06, "loss": 0.0359, "step": 11998 }, { "epoch": 2.01, "grad_norm": 0.31318286061286926, "learning_rate": 5.218391596904776e-06, "loss": 0.0385, "step": 11999 }, { "epoch": 2.01, "grad_norm": 0.6156413555145264, "learning_rate": 5.216805760530217e-06, "loss": 0.0324, "step": 12000 }, { "epoch": 2.01, "grad_norm": 0.3554092049598694, "learning_rate": 5.2152200801177686e-06, "loss": 0.0315, "step": 12001 }, { "epoch": 2.01, "grad_norm": 0.4787932336330414, "learning_rate": 5.213634555719136e-06, "loss": 0.0355, "step": 12002 }, { "epoch": 2.01, "grad_norm": 0.31801551580429077, "learning_rate": 5.212049187386021e-06, "loss": 0.0321, "step": 12003 }, { "epoch": 2.01, "grad_norm": 0.3837835192680359, "learning_rate": 5.210463975170113e-06, "loss": 0.0614, "step": 12004 }, { "epoch": 2.01, "grad_norm": 0.49527984857559204, "learning_rate": 5.208878919123095e-06, "loss": 0.0362, "step": 12005 }, { "epoch": 2.01, "grad_norm": 0.3273029029369354, "learning_rate": 5.207294019296658e-06, "loss": 0.0428, "step": 12006 }, { "epoch": 2.01, "grad_norm": 0.304050475358963, "learning_rate": 5.205709275742475e-06, "loss": 0.0313, "step": 12007 }, { "epoch": 2.01, "grad_norm": 0.40160778164863586, "learning_rate": 5.204124688512216e-06, "loss": 0.0399, "step": 12008 }, { "epoch": 2.01, "grad_norm": 0.37588658928871155, "learning_rate": 5.202540257657551e-06, "loss": 0.0323, "step": 12009 }, { "epoch": 2.01, "grad_norm": 0.34264975786209106, "learning_rate": 5.200955983230149e-06, "loss": 0.0407, "step": 12010 }, { "epoch": 2.01, "grad_norm": 0.6745773553848267, "learning_rate": 5.199371865281656e-06, "loss": 0.0315, "step": 12011 }, { "epoch": 2.01, "grad_norm": 0.31644752621650696, "learning_rate": 5.197787903863733e-06, "loss": 0.0474, "step": 12012 }, { "epoch": 2.01, "grad_norm": 0.39917707443237305, "learning_rate": 5.196204099028025e-06, "loss": 0.0383, "step": 12013 }, { "epoch": 2.01, "grad_norm": 0.40155866742134094, "learning_rate": 5.1946204508261685e-06, "loss": 0.0395, "step": 12014 }, { "epoch": 2.01, "grad_norm": 0.3477165699005127, "learning_rate": 5.193036959309807e-06, "loss": 0.0312, "step": 12015 }, { "epoch": 2.01, "grad_norm": 0.30373337864875793, "learning_rate": 5.191453624530567e-06, "loss": 0.0347, "step": 12016 }, { "epoch": 2.01, "grad_norm": 0.3301970958709717, "learning_rate": 5.189870446540077e-06, "loss": 0.0219, "step": 12017 }, { "epoch": 2.01, "grad_norm": 0.23260721564292908, "learning_rate": 5.188287425389965e-06, "loss": 0.0218, "step": 12018 }, { "epoch": 2.01, "grad_norm": 0.32399943470954895, "learning_rate": 5.186704561131837e-06, "loss": 0.0243, "step": 12019 }, { "epoch": 2.01, "grad_norm": 0.507024884223938, "learning_rate": 5.185121853817312e-06, "loss": 0.0358, "step": 12020 }, { "epoch": 2.01, "grad_norm": 0.4416751265525818, "learning_rate": 5.183539303497996e-06, "loss": 0.0389, "step": 12021 }, { "epoch": 2.01, "grad_norm": 0.5301170349121094, "learning_rate": 5.1819569102254805e-06, "loss": 0.0461, "step": 12022 }, { "epoch": 2.01, "grad_norm": 0.29424041509628296, "learning_rate": 5.180374674051374e-06, "loss": 0.028, "step": 12023 }, { "epoch": 2.01, "grad_norm": 0.4463200569152832, "learning_rate": 5.178792595027256e-06, "loss": 0.0422, "step": 12024 }, { "epoch": 2.01, "grad_norm": 0.35782065987586975, "learning_rate": 5.177210673204719e-06, "loss": 0.0358, "step": 12025 }, { "epoch": 2.01, "grad_norm": 0.340427964925766, "learning_rate": 5.175628908635346e-06, "loss": 0.0388, "step": 12026 }, { "epoch": 2.01, "grad_norm": 0.28299757838249207, "learning_rate": 5.1740473013707085e-06, "loss": 0.0267, "step": 12027 }, { "epoch": 2.01, "grad_norm": 0.4392702579498291, "learning_rate": 5.172465851462374e-06, "loss": 0.0535, "step": 12028 }, { "epoch": 2.01, "grad_norm": 0.2943980097770691, "learning_rate": 5.170884558961913e-06, "loss": 0.0315, "step": 12029 }, { "epoch": 2.01, "grad_norm": 0.23220017552375793, "learning_rate": 5.1693034239208815e-06, "loss": 0.0254, "step": 12030 }, { "epoch": 2.01, "grad_norm": 0.34781157970428467, "learning_rate": 5.16772244639084e-06, "loss": 0.0425, "step": 12031 }, { "epoch": 2.01, "grad_norm": 0.3802693486213684, "learning_rate": 5.166141626423329e-06, "loss": 0.0502, "step": 12032 }, { "epoch": 2.01, "grad_norm": 0.31090256571769714, "learning_rate": 5.164560964069899e-06, "loss": 0.0471, "step": 12033 }, { "epoch": 2.01, "grad_norm": 0.3128950297832489, "learning_rate": 5.162980459382094e-06, "loss": 0.0345, "step": 12034 }, { "epoch": 2.01, "grad_norm": 0.41248416900634766, "learning_rate": 5.1614001124114435e-06, "loss": 0.0507, "step": 12035 }, { "epoch": 2.01, "grad_norm": 0.37863093614578247, "learning_rate": 5.159819923209472e-06, "loss": 0.0432, "step": 12036 }, { "epoch": 2.01, "grad_norm": 0.30676451325416565, "learning_rate": 5.158239891827712e-06, "loss": 0.039, "step": 12037 }, { "epoch": 2.01, "grad_norm": 0.4122432768344879, "learning_rate": 5.1566600183176786e-06, "loss": 0.0301, "step": 12038 }, { "epoch": 2.01, "grad_norm": 0.36246341466903687, "learning_rate": 5.1550803027308835e-06, "loss": 0.0381, "step": 12039 }, { "epoch": 2.01, "grad_norm": 0.48743516206741333, "learning_rate": 5.153500745118836e-06, "loss": 0.034, "step": 12040 }, { "epoch": 2.01, "grad_norm": 0.38340994715690613, "learning_rate": 5.151921345533045e-06, "loss": 0.0477, "step": 12041 }, { "epoch": 2.01, "grad_norm": 0.41618242859840393, "learning_rate": 5.150342104025001e-06, "loss": 0.0301, "step": 12042 }, { "epoch": 2.01, "grad_norm": 0.3474111258983612, "learning_rate": 5.1487630206462055e-06, "loss": 0.0517, "step": 12043 }, { "epoch": 2.01, "grad_norm": 0.3451026678085327, "learning_rate": 5.147184095448139e-06, "loss": 0.0327, "step": 12044 }, { "epoch": 2.01, "grad_norm": 0.3501226007938385, "learning_rate": 5.145605328482291e-06, "loss": 0.0462, "step": 12045 }, { "epoch": 2.01, "grad_norm": 0.24091637134552002, "learning_rate": 5.144026719800135e-06, "loss": 0.0263, "step": 12046 }, { "epoch": 2.01, "grad_norm": 0.26967036724090576, "learning_rate": 5.142448269453141e-06, "loss": 0.0286, "step": 12047 }, { "epoch": 2.02, "grad_norm": 0.2875455319881439, "learning_rate": 5.140869977492784e-06, "loss": 0.0346, "step": 12048 }, { "epoch": 2.02, "grad_norm": 0.3314979076385498, "learning_rate": 5.139291843970516e-06, "loss": 0.0539, "step": 12049 }, { "epoch": 2.02, "grad_norm": 0.27355608344078064, "learning_rate": 5.137713868937802e-06, "loss": 0.0225, "step": 12050 }, { "epoch": 2.02, "grad_norm": 0.3354854881763458, "learning_rate": 5.136136052446094e-06, "loss": 0.0262, "step": 12051 }, { "epoch": 2.02, "grad_norm": 0.26545166969299316, "learning_rate": 5.1345583945468356e-06, "loss": 0.0296, "step": 12052 }, { "epoch": 2.02, "grad_norm": 0.33790552616119385, "learning_rate": 5.132980895291469e-06, "loss": 0.0396, "step": 12053 }, { "epoch": 2.02, "grad_norm": 0.37602874636650085, "learning_rate": 5.131403554731431e-06, "loss": 0.055, "step": 12054 }, { "epoch": 2.02, "grad_norm": 0.2531079947948456, "learning_rate": 5.129826372918154e-06, "loss": 0.0251, "step": 12055 }, { "epoch": 2.02, "grad_norm": 0.22003144025802612, "learning_rate": 5.128249349903059e-06, "loss": 0.0235, "step": 12056 }, { "epoch": 2.02, "grad_norm": 0.3159431517124176, "learning_rate": 5.12667248573757e-06, "loss": 0.0391, "step": 12057 }, { "epoch": 2.02, "grad_norm": 0.42030149698257446, "learning_rate": 5.1250957804731016e-06, "loss": 0.0383, "step": 12058 }, { "epoch": 2.02, "grad_norm": 0.2699201703071594, "learning_rate": 5.12351923416107e-06, "loss": 0.0257, "step": 12059 }, { "epoch": 2.02, "grad_norm": 0.3077528774738312, "learning_rate": 5.121942846852877e-06, "loss": 0.0425, "step": 12060 }, { "epoch": 2.02, "grad_norm": 0.40820786356925964, "learning_rate": 5.120366618599919e-06, "loss": 0.0381, "step": 12061 }, { "epoch": 2.02, "grad_norm": 0.46312394738197327, "learning_rate": 5.118790549453596e-06, "loss": 0.0573, "step": 12062 }, { "epoch": 2.02, "grad_norm": 0.372894287109375, "learning_rate": 5.1172146394652965e-06, "loss": 0.0326, "step": 12063 }, { "epoch": 2.02, "grad_norm": 0.5090510249137878, "learning_rate": 5.1156388886864e-06, "loss": 0.0668, "step": 12064 }, { "epoch": 2.02, "grad_norm": 0.3309004306793213, "learning_rate": 5.11406329716829e-06, "loss": 0.0294, "step": 12065 }, { "epoch": 2.02, "grad_norm": 0.37662217020988464, "learning_rate": 5.112487864962344e-06, "loss": 0.0326, "step": 12066 }, { "epoch": 2.02, "grad_norm": 0.3141787648200989, "learning_rate": 5.110912592119924e-06, "loss": 0.0289, "step": 12067 }, { "epoch": 2.02, "grad_norm": 0.3143918216228485, "learning_rate": 5.109337478692402e-06, "loss": 0.0401, "step": 12068 }, { "epoch": 2.02, "grad_norm": 0.34091734886169434, "learning_rate": 5.1077625247311305e-06, "loss": 0.0446, "step": 12069 }, { "epoch": 2.02, "grad_norm": 0.33754217624664307, "learning_rate": 5.1061877302874615e-06, "loss": 0.0223, "step": 12070 }, { "epoch": 2.02, "grad_norm": 0.38313013315200806, "learning_rate": 5.104613095412749e-06, "loss": 0.0407, "step": 12071 }, { "epoch": 2.02, "grad_norm": 0.2840522825717926, "learning_rate": 5.103038620158329e-06, "loss": 0.043, "step": 12072 }, { "epoch": 2.02, "grad_norm": 0.3148443400859833, "learning_rate": 5.101464304575545e-06, "loss": 0.0462, "step": 12073 }, { "epoch": 2.02, "grad_norm": 0.31967589259147644, "learning_rate": 5.09989014871573e-06, "loss": 0.0305, "step": 12074 }, { "epoch": 2.02, "grad_norm": 0.45882782340049744, "learning_rate": 5.098316152630205e-06, "loss": 0.0566, "step": 12075 }, { "epoch": 2.02, "grad_norm": 0.32026687264442444, "learning_rate": 5.0967423163703e-06, "loss": 0.0377, "step": 12076 }, { "epoch": 2.02, "grad_norm": 0.25410377979278564, "learning_rate": 5.095168639987329e-06, "loss": 0.0348, "step": 12077 }, { "epoch": 2.02, "grad_norm": 0.361313134431839, "learning_rate": 5.093595123532598e-06, "loss": 0.0304, "step": 12078 }, { "epoch": 2.02, "grad_norm": 0.39634209871292114, "learning_rate": 5.092021767057423e-06, "loss": 0.0529, "step": 12079 }, { "epoch": 2.02, "grad_norm": 0.3645096719264984, "learning_rate": 5.090448570613098e-06, "loss": 0.0304, "step": 12080 }, { "epoch": 2.02, "grad_norm": 0.4871627390384674, "learning_rate": 5.088875534250919e-06, "loss": 0.037, "step": 12081 }, { "epoch": 2.02, "grad_norm": 0.34665539860725403, "learning_rate": 5.087302658022185e-06, "loss": 0.0346, "step": 12082 }, { "epoch": 2.02, "grad_norm": 0.3520629405975342, "learning_rate": 5.085729941978177e-06, "loss": 0.039, "step": 12083 }, { "epoch": 2.02, "grad_norm": 0.43779727816581726, "learning_rate": 5.084157386170169e-06, "loss": 0.0372, "step": 12084 }, { "epoch": 2.02, "grad_norm": 0.4060889184474945, "learning_rate": 5.082584990649446e-06, "loss": 0.0315, "step": 12085 }, { "epoch": 2.02, "grad_norm": 0.46857908368110657, "learning_rate": 5.081012755467269e-06, "loss": 0.0411, "step": 12086 }, { "epoch": 2.02, "grad_norm": 0.33254900574684143, "learning_rate": 5.079440680674914e-06, "loss": 0.0297, "step": 12087 }, { "epoch": 2.02, "grad_norm": 0.2950780689716339, "learning_rate": 5.077868766323628e-06, "loss": 0.0225, "step": 12088 }, { "epoch": 2.02, "grad_norm": 0.31909748911857605, "learning_rate": 5.07629701246467e-06, "loss": 0.0282, "step": 12089 }, { "epoch": 2.02, "grad_norm": 0.4400133788585663, "learning_rate": 5.074725419149297e-06, "loss": 0.0426, "step": 12090 }, { "epoch": 2.02, "grad_norm": 0.4237997233867645, "learning_rate": 5.073153986428745e-06, "loss": 0.0351, "step": 12091 }, { "epoch": 2.02, "grad_norm": 0.49297088384628296, "learning_rate": 5.071582714354248e-06, "loss": 0.0328, "step": 12092 }, { "epoch": 2.02, "grad_norm": 0.36552637815475464, "learning_rate": 5.070011602977051e-06, "loss": 0.0301, "step": 12093 }, { "epoch": 2.02, "grad_norm": 0.4521867334842682, "learning_rate": 5.068440652348375e-06, "loss": 0.0275, "step": 12094 }, { "epoch": 2.02, "grad_norm": 0.3594190776348114, "learning_rate": 5.0668698625194394e-06, "loss": 0.0412, "step": 12095 }, { "epoch": 2.02, "grad_norm": 0.3872361183166504, "learning_rate": 5.06529923354147e-06, "loss": 0.0383, "step": 12096 }, { "epoch": 2.02, "grad_norm": 0.2960900664329529, "learning_rate": 5.063728765465672e-06, "loss": 0.0263, "step": 12097 }, { "epoch": 2.02, "grad_norm": 0.6351105570793152, "learning_rate": 5.062158458343256e-06, "loss": 0.0482, "step": 12098 }, { "epoch": 2.02, "grad_norm": 0.28438493609428406, "learning_rate": 5.060588312225427e-06, "loss": 0.0273, "step": 12099 }, { "epoch": 2.02, "grad_norm": 0.34640324115753174, "learning_rate": 5.059018327163375e-06, "loss": 0.032, "step": 12100 }, { "epoch": 2.02, "grad_norm": 0.39862218499183655, "learning_rate": 5.057448503208298e-06, "loss": 0.057, "step": 12101 }, { "epoch": 2.02, "grad_norm": 0.39752012491226196, "learning_rate": 5.055878840411379e-06, "loss": 0.0209, "step": 12102 }, { "epoch": 2.02, "grad_norm": 0.3894170820713043, "learning_rate": 5.054309338823795e-06, "loss": 0.0457, "step": 12103 }, { "epoch": 2.02, "grad_norm": 0.3234926164150238, "learning_rate": 5.0527399984967285e-06, "loss": 0.0254, "step": 12104 }, { "epoch": 2.02, "grad_norm": 0.43284252285957336, "learning_rate": 5.0511708194813436e-06, "loss": 0.054, "step": 12105 }, { "epoch": 2.02, "grad_norm": 0.6488990783691406, "learning_rate": 5.049601801828807e-06, "loss": 0.0458, "step": 12106 }, { "epoch": 2.02, "grad_norm": 0.35480445623397827, "learning_rate": 5.048032945590285e-06, "loss": 0.0418, "step": 12107 }, { "epoch": 2.03, "grad_norm": 0.37674710154533386, "learning_rate": 5.046464250816925e-06, "loss": 0.0331, "step": 12108 }, { "epoch": 2.03, "grad_norm": 0.30848026275634766, "learning_rate": 5.0448957175598754e-06, "loss": 0.0322, "step": 12109 }, { "epoch": 2.03, "grad_norm": 0.46129119396209717, "learning_rate": 5.043327345870287e-06, "loss": 0.0512, "step": 12110 }, { "epoch": 2.03, "grad_norm": 0.409602552652359, "learning_rate": 5.0417591357992944e-06, "loss": 0.0351, "step": 12111 }, { "epoch": 2.03, "grad_norm": 0.4258653521537781, "learning_rate": 5.0401910873980276e-06, "loss": 0.0465, "step": 12112 }, { "epoch": 2.03, "grad_norm": 0.4166046380996704, "learning_rate": 5.038623200717618e-06, "loss": 0.0487, "step": 12113 }, { "epoch": 2.03, "grad_norm": 0.308601975440979, "learning_rate": 5.037055475809189e-06, "loss": 0.0351, "step": 12114 }, { "epoch": 2.03, "grad_norm": 0.35672861337661743, "learning_rate": 5.035487912723862e-06, "loss": 0.0404, "step": 12115 }, { "epoch": 2.03, "grad_norm": 0.4022156298160553, "learning_rate": 5.033920511512747e-06, "loss": 0.0357, "step": 12116 }, { "epoch": 2.03, "grad_norm": 0.3892561197280884, "learning_rate": 5.032353272226944e-06, "loss": 0.0362, "step": 12117 }, { "epoch": 2.03, "grad_norm": 0.3784707486629486, "learning_rate": 5.030786194917566e-06, "loss": 0.0418, "step": 12118 }, { "epoch": 2.03, "grad_norm": 0.5308259725570679, "learning_rate": 5.029219279635704e-06, "loss": 0.0339, "step": 12119 }, { "epoch": 2.03, "grad_norm": 0.3736359775066376, "learning_rate": 5.027652526432445e-06, "loss": 0.0218, "step": 12120 }, { "epoch": 2.03, "grad_norm": 0.407684862613678, "learning_rate": 5.026085935358879e-06, "loss": 0.0454, "step": 12121 }, { "epoch": 2.03, "grad_norm": 0.3212091624736786, "learning_rate": 5.024519506466091e-06, "loss": 0.0312, "step": 12122 }, { "epoch": 2.03, "grad_norm": 0.36348316073417664, "learning_rate": 5.022953239805148e-06, "loss": 0.034, "step": 12123 }, { "epoch": 2.03, "grad_norm": 0.49262627959251404, "learning_rate": 5.021387135427129e-06, "loss": 0.0368, "step": 12124 }, { "epoch": 2.03, "grad_norm": 0.28005287051200867, "learning_rate": 5.019821193383095e-06, "loss": 0.0437, "step": 12125 }, { "epoch": 2.03, "grad_norm": 0.43178480863571167, "learning_rate": 5.0182554137241e-06, "loss": 0.0379, "step": 12126 }, { "epoch": 2.03, "grad_norm": 0.3703816533088684, "learning_rate": 5.0166897965012085e-06, "loss": 0.035, "step": 12127 }, { "epoch": 2.03, "grad_norm": 0.4494788944721222, "learning_rate": 5.015124341765458e-06, "loss": 0.0351, "step": 12128 }, { "epoch": 2.03, "grad_norm": 0.3386806845664978, "learning_rate": 5.0135590495679e-06, "loss": 0.0398, "step": 12129 }, { "epoch": 2.03, "grad_norm": 0.2922993302345276, "learning_rate": 5.011993919959575e-06, "loss": 0.0303, "step": 12130 }, { "epoch": 2.03, "grad_norm": 0.2925836443901062, "learning_rate": 5.010428952991507e-06, "loss": 0.0333, "step": 12131 }, { "epoch": 2.03, "grad_norm": 0.6011928915977478, "learning_rate": 5.008864148714733e-06, "loss": 0.0252, "step": 12132 }, { "epoch": 2.03, "grad_norm": 0.3997958302497864, "learning_rate": 5.007299507180271e-06, "loss": 0.0317, "step": 12133 }, { "epoch": 2.03, "grad_norm": 0.515523374080658, "learning_rate": 5.005735028439134e-06, "loss": 0.0322, "step": 12134 }, { "epoch": 2.03, "grad_norm": 0.2799578309059143, "learning_rate": 5.004170712542343e-06, "loss": 0.0349, "step": 12135 }, { "epoch": 2.03, "grad_norm": 0.35552623867988586, "learning_rate": 5.002606559540896e-06, "loss": 0.052, "step": 12136 }, { "epoch": 2.03, "grad_norm": 0.3580741286277771, "learning_rate": 5.001042569485797e-06, "loss": 0.0238, "step": 12137 }, { "epoch": 2.03, "grad_norm": 0.2518600821495056, "learning_rate": 4.999478742428047e-06, "loss": 0.0426, "step": 12138 }, { "epoch": 2.03, "grad_norm": 0.4783209562301636, "learning_rate": 4.997915078418632e-06, "loss": 0.0351, "step": 12139 }, { "epoch": 2.03, "grad_norm": 0.48621776700019836, "learning_rate": 4.996351577508533e-06, "loss": 0.0436, "step": 12140 }, { "epoch": 2.03, "grad_norm": 0.29150569438934326, "learning_rate": 4.99478823974874e-06, "loss": 0.0224, "step": 12141 }, { "epoch": 2.03, "grad_norm": 0.5459529757499695, "learning_rate": 4.993225065190217e-06, "loss": 0.0534, "step": 12142 }, { "epoch": 2.03, "grad_norm": 0.2835095226764679, "learning_rate": 4.9916620538839436e-06, "loss": 0.0266, "step": 12143 }, { "epoch": 2.03, "grad_norm": 0.28901544213294983, "learning_rate": 4.990099205880873e-06, "loss": 0.0296, "step": 12144 }, { "epoch": 2.03, "grad_norm": 0.48213937878608704, "learning_rate": 4.988536521231974e-06, "loss": 0.0459, "step": 12145 }, { "epoch": 2.03, "grad_norm": 0.46116194128990173, "learning_rate": 4.986973999988192e-06, "loss": 0.0315, "step": 12146 }, { "epoch": 2.03, "grad_norm": 0.7038301825523376, "learning_rate": 4.98541164220048e-06, "loss": 0.0452, "step": 12147 }, { "epoch": 2.03, "grad_norm": 0.38690778613090515, "learning_rate": 4.983849447919778e-06, "loss": 0.0421, "step": 12148 }, { "epoch": 2.03, "grad_norm": 0.2893332540988922, "learning_rate": 4.982287417197026e-06, "loss": 0.0306, "step": 12149 }, { "epoch": 2.03, "grad_norm": 0.2615146040916443, "learning_rate": 4.980725550083154e-06, "loss": 0.0254, "step": 12150 }, { "epoch": 2.03, "grad_norm": 0.4805338382720947, "learning_rate": 4.979163846629086e-06, "loss": 0.0483, "step": 12151 }, { "epoch": 2.03, "grad_norm": 0.29371729493141174, "learning_rate": 4.97760230688575e-06, "loss": 0.0431, "step": 12152 }, { "epoch": 2.03, "grad_norm": 0.40865564346313477, "learning_rate": 4.976040930904055e-06, "loss": 0.0322, "step": 12153 }, { "epoch": 2.03, "grad_norm": 0.30689501762390137, "learning_rate": 4.974479718734914e-06, "loss": 0.0208, "step": 12154 }, { "epoch": 2.03, "grad_norm": 0.43544989824295044, "learning_rate": 4.972918670429236e-06, "loss": 0.0318, "step": 12155 }, { "epoch": 2.03, "grad_norm": 0.28103044629096985, "learning_rate": 4.971357786037916e-06, "loss": 0.0241, "step": 12156 }, { "epoch": 2.03, "grad_norm": 0.274616003036499, "learning_rate": 4.9697970656118535e-06, "loss": 0.022, "step": 12157 }, { "epoch": 2.03, "grad_norm": 0.3460030257701874, "learning_rate": 4.9682365092019355e-06, "loss": 0.0376, "step": 12158 }, { "epoch": 2.03, "grad_norm": 0.8500440716743469, "learning_rate": 4.966676116859042e-06, "loss": 0.0491, "step": 12159 }, { "epoch": 2.03, "grad_norm": 0.3180966377258301, "learning_rate": 4.965115888634058e-06, "loss": 0.0207, "step": 12160 }, { "epoch": 2.03, "grad_norm": 0.29276517033576965, "learning_rate": 4.963555824577849e-06, "loss": 0.0245, "step": 12161 }, { "epoch": 2.03, "grad_norm": 0.4082048833370209, "learning_rate": 4.9619959247412895e-06, "loss": 0.0546, "step": 12162 }, { "epoch": 2.03, "grad_norm": 0.32010871171951294, "learning_rate": 4.9604361891752425e-06, "loss": 0.0307, "step": 12163 }, { "epoch": 2.03, "grad_norm": 0.3713158071041107, "learning_rate": 4.958876617930563e-06, "loss": 0.0343, "step": 12164 }, { "epoch": 2.03, "grad_norm": 0.5900370478630066, "learning_rate": 4.9573172110580995e-06, "loss": 0.0577, "step": 12165 }, { "epoch": 2.03, "grad_norm": 0.290894478559494, "learning_rate": 4.955757968608704e-06, "loss": 0.038, "step": 12166 }, { "epoch": 2.03, "grad_norm": 0.37488558888435364, "learning_rate": 4.954198890633216e-06, "loss": 0.0287, "step": 12167 }, { "epoch": 2.04, "grad_norm": 0.3453111946582794, "learning_rate": 4.9526399771824664e-06, "loss": 0.03, "step": 12168 }, { "epoch": 2.04, "grad_norm": 0.30487996339797974, "learning_rate": 4.951081228307289e-06, "loss": 0.0304, "step": 12169 }, { "epoch": 2.04, "grad_norm": 0.3955042362213135, "learning_rate": 4.94952264405851e-06, "loss": 0.0332, "step": 12170 }, { "epoch": 2.04, "grad_norm": 0.3406691253185272, "learning_rate": 4.947964224486952e-06, "loss": 0.0316, "step": 12171 }, { "epoch": 2.04, "grad_norm": 0.39426276087760925, "learning_rate": 4.946405969643424e-06, "loss": 0.0329, "step": 12172 }, { "epoch": 2.04, "grad_norm": 0.3085435628890991, "learning_rate": 4.944847879578734e-06, "loss": 0.0292, "step": 12173 }, { "epoch": 2.04, "grad_norm": 0.31332141160964966, "learning_rate": 4.943289954343691e-06, "loss": 0.031, "step": 12174 }, { "epoch": 2.04, "grad_norm": 0.4131505489349365, "learning_rate": 4.9417321939890895e-06, "loss": 0.0421, "step": 12175 }, { "epoch": 2.04, "grad_norm": 0.3970800042152405, "learning_rate": 4.940174598565719e-06, "loss": 0.0295, "step": 12176 }, { "epoch": 2.04, "grad_norm": 0.34714818000793457, "learning_rate": 4.938617168124371e-06, "loss": 0.0225, "step": 12177 }, { "epoch": 2.04, "grad_norm": 0.23761671781539917, "learning_rate": 4.937059902715832e-06, "loss": 0.0212, "step": 12178 }, { "epoch": 2.04, "grad_norm": 0.32100778818130493, "learning_rate": 4.935502802390868e-06, "loss": 0.0332, "step": 12179 }, { "epoch": 2.04, "grad_norm": 0.3354133665561676, "learning_rate": 4.9339458672002605e-06, "loss": 0.0366, "step": 12180 }, { "epoch": 2.04, "grad_norm": 0.4323275089263916, "learning_rate": 4.932389097194771e-06, "loss": 0.0509, "step": 12181 }, { "epoch": 2.04, "grad_norm": 0.3233020007610321, "learning_rate": 4.930832492425155e-06, "loss": 0.035, "step": 12182 }, { "epoch": 2.04, "grad_norm": 0.4437680244445801, "learning_rate": 4.929276052942177e-06, "loss": 0.0423, "step": 12183 }, { "epoch": 2.04, "grad_norm": 0.5195522904396057, "learning_rate": 4.927719778796578e-06, "loss": 0.054, "step": 12184 }, { "epoch": 2.04, "grad_norm": 0.8468700647354126, "learning_rate": 4.926163670039106e-06, "loss": 0.0371, "step": 12185 }, { "epoch": 2.04, "grad_norm": 0.38396385312080383, "learning_rate": 4.924607726720504e-06, "loss": 0.0433, "step": 12186 }, { "epoch": 2.04, "grad_norm": 0.5633082985877991, "learning_rate": 4.923051948891497e-06, "loss": 0.0478, "step": 12187 }, { "epoch": 2.04, "grad_norm": 0.35783782601356506, "learning_rate": 4.921496336602824e-06, "loss": 0.0286, "step": 12188 }, { "epoch": 2.04, "grad_norm": 0.3319382965564728, "learning_rate": 4.919940889905199e-06, "loss": 0.0271, "step": 12189 }, { "epoch": 2.04, "grad_norm": 0.42278483510017395, "learning_rate": 4.91838560884934e-06, "loss": 0.0306, "step": 12190 }, { "epoch": 2.04, "grad_norm": 0.3047567307949066, "learning_rate": 4.916830493485964e-06, "loss": 0.0327, "step": 12191 }, { "epoch": 2.04, "grad_norm": 0.2890889048576355, "learning_rate": 4.915275543865769e-06, "loss": 0.0214, "step": 12192 }, { "epoch": 2.04, "grad_norm": 0.32760438323020935, "learning_rate": 4.913720760039462e-06, "loss": 0.0376, "step": 12193 }, { "epoch": 2.04, "grad_norm": 0.34975892305374146, "learning_rate": 4.912166142057743e-06, "loss": 0.0226, "step": 12194 }, { "epoch": 2.04, "grad_norm": 0.30574488639831543, "learning_rate": 4.9106116899712965e-06, "loss": 0.0328, "step": 12195 }, { "epoch": 2.04, "grad_norm": 0.4509451389312744, "learning_rate": 4.909057403830804e-06, "loss": 0.0573, "step": 12196 }, { "epoch": 2.04, "grad_norm": 0.2644067704677582, "learning_rate": 4.907503283686953e-06, "loss": 0.026, "step": 12197 }, { "epoch": 2.04, "grad_norm": 0.34353116154670715, "learning_rate": 4.90594932959041e-06, "loss": 0.038, "step": 12198 }, { "epoch": 2.04, "grad_norm": 0.32784026861190796, "learning_rate": 4.9043955415918505e-06, "loss": 0.0346, "step": 12199 }, { "epoch": 2.04, "grad_norm": 0.5841544270515442, "learning_rate": 4.9028419197419365e-06, "loss": 0.0434, "step": 12200 }, { "epoch": 2.04, "grad_norm": 0.4489782154560089, "learning_rate": 4.901288464091318e-06, "loss": 0.0428, "step": 12201 }, { "epoch": 2.04, "grad_norm": 0.3199974298477173, "learning_rate": 4.899735174690655e-06, "loss": 0.0347, "step": 12202 }, { "epoch": 2.04, "grad_norm": 0.2183668166399002, "learning_rate": 4.8981820515905964e-06, "loss": 0.0203, "step": 12203 }, { "epoch": 2.04, "grad_norm": 0.38742002844810486, "learning_rate": 4.896629094841775e-06, "loss": 0.038, "step": 12204 }, { "epoch": 2.04, "grad_norm": 0.43362656235694885, "learning_rate": 4.895076304494837e-06, "loss": 0.0353, "step": 12205 }, { "epoch": 2.04, "grad_norm": 0.5638031959533691, "learning_rate": 4.8935236806004085e-06, "loss": 0.0372, "step": 12206 }, { "epoch": 2.04, "grad_norm": 0.2843368351459503, "learning_rate": 4.891971223209109e-06, "loss": 0.0245, "step": 12207 }, { "epoch": 2.04, "grad_norm": 0.324420303106308, "learning_rate": 4.890418932371569e-06, "loss": 0.0293, "step": 12208 }, { "epoch": 2.04, "grad_norm": 0.41501227021217346, "learning_rate": 4.888866808138393e-06, "loss": 0.0381, "step": 12209 }, { "epoch": 2.04, "grad_norm": 0.2714623212814331, "learning_rate": 4.887314850560194e-06, "loss": 0.0207, "step": 12210 }, { "epoch": 2.04, "grad_norm": 0.3159717619419098, "learning_rate": 4.885763059687581e-06, "loss": 0.0337, "step": 12211 }, { "epoch": 2.04, "grad_norm": 0.28547796607017517, "learning_rate": 4.884211435571143e-06, "loss": 0.03, "step": 12212 }, { "epoch": 2.04, "grad_norm": 0.3955635130405426, "learning_rate": 4.882659978261483e-06, "loss": 0.0392, "step": 12213 }, { "epoch": 2.04, "grad_norm": 0.31103742122650146, "learning_rate": 4.8811086878091795e-06, "loss": 0.0238, "step": 12214 }, { "epoch": 2.04, "grad_norm": 0.3665067255496979, "learning_rate": 4.879557564264814e-06, "loss": 0.037, "step": 12215 }, { "epoch": 2.04, "grad_norm": 0.38385817408561707, "learning_rate": 4.8780066076789686e-06, "loss": 0.0341, "step": 12216 }, { "epoch": 2.04, "grad_norm": 0.3739049434661865, "learning_rate": 4.876455818102209e-06, "loss": 0.0429, "step": 12217 }, { "epoch": 2.04, "grad_norm": 0.4371930658817291, "learning_rate": 4.874905195585103e-06, "loss": 0.0391, "step": 12218 }, { "epoch": 2.04, "grad_norm": 0.5416250824928284, "learning_rate": 4.873354740178214e-06, "loss": 0.0422, "step": 12219 }, { "epoch": 2.04, "grad_norm": 0.37583988904953003, "learning_rate": 4.871804451932094e-06, "loss": 0.0305, "step": 12220 }, { "epoch": 2.04, "grad_norm": 0.39759495854377747, "learning_rate": 4.870254330897287e-06, "loss": 0.0232, "step": 12221 }, { "epoch": 2.04, "grad_norm": 0.4273086190223694, "learning_rate": 4.868704377124345e-06, "loss": 0.0263, "step": 12222 }, { "epoch": 2.04, "grad_norm": 0.45024731755256653, "learning_rate": 4.8671545906638015e-06, "loss": 0.0371, "step": 12223 }, { "epoch": 2.04, "grad_norm": 0.39778846502304077, "learning_rate": 4.865604971566188e-06, "loss": 0.0437, "step": 12224 }, { "epoch": 2.04, "grad_norm": 0.418468713760376, "learning_rate": 4.864055519882033e-06, "loss": 0.0398, "step": 12225 }, { "epoch": 2.04, "grad_norm": 0.3159545063972473, "learning_rate": 4.862506235661858e-06, "loss": 0.0426, "step": 12226 }, { "epoch": 2.04, "grad_norm": 0.3671706020832062, "learning_rate": 4.860957118956185e-06, "loss": 0.0373, "step": 12227 }, { "epoch": 2.05, "grad_norm": 0.3110661804676056, "learning_rate": 4.859408169815521e-06, "loss": 0.0331, "step": 12228 }, { "epoch": 2.05, "grad_norm": 0.41518786549568176, "learning_rate": 4.8578593882903665e-06, "loss": 0.0519, "step": 12229 }, { "epoch": 2.05, "grad_norm": 0.5113163590431213, "learning_rate": 4.85631077443123e-06, "loss": 0.0411, "step": 12230 }, { "epoch": 2.05, "grad_norm": 0.3897691071033478, "learning_rate": 4.854762328288601e-06, "loss": 0.0339, "step": 12231 }, { "epoch": 2.05, "grad_norm": 0.40435680747032166, "learning_rate": 4.853214049912967e-06, "loss": 0.0393, "step": 12232 }, { "epoch": 2.05, "grad_norm": 0.30266842246055603, "learning_rate": 4.8516659393548135e-06, "loss": 0.0323, "step": 12233 }, { "epoch": 2.05, "grad_norm": 0.35090184211730957, "learning_rate": 4.850117996664623e-06, "loss": 0.0308, "step": 12234 }, { "epoch": 2.05, "grad_norm": 0.43869587779045105, "learning_rate": 4.84857022189286e-06, "loss": 0.0378, "step": 12235 }, { "epoch": 2.05, "grad_norm": 0.26001477241516113, "learning_rate": 4.8470226150900005e-06, "loss": 0.0288, "step": 12236 }, { "epoch": 2.05, "grad_norm": 0.34189948439598083, "learning_rate": 4.845475176306501e-06, "loss": 0.0385, "step": 12237 }, { "epoch": 2.05, "grad_norm": 0.5406850576400757, "learning_rate": 4.843927905592815e-06, "loss": 0.0597, "step": 12238 }, { "epoch": 2.05, "grad_norm": 0.36273038387298584, "learning_rate": 4.842380802999399e-06, "loss": 0.0292, "step": 12239 }, { "epoch": 2.05, "grad_norm": 0.5773473978042603, "learning_rate": 4.840833868576693e-06, "loss": 0.0307, "step": 12240 }, { "epoch": 2.05, "grad_norm": 0.4009620249271393, "learning_rate": 4.8392871023751395e-06, "loss": 0.0477, "step": 12241 }, { "epoch": 2.05, "grad_norm": 0.32038456201553345, "learning_rate": 4.837740504445177e-06, "loss": 0.0325, "step": 12242 }, { "epoch": 2.05, "grad_norm": 0.9232609868049622, "learning_rate": 4.836194074837225e-06, "loss": 0.0666, "step": 12243 }, { "epoch": 2.05, "grad_norm": 0.4559464156627655, "learning_rate": 4.834647813601716e-06, "loss": 0.0397, "step": 12244 }, { "epoch": 2.05, "grad_norm": 0.3366975486278534, "learning_rate": 4.833101720789065e-06, "loss": 0.0362, "step": 12245 }, { "epoch": 2.05, "grad_norm": 0.34997034072875977, "learning_rate": 4.8315557964496786e-06, "loss": 0.0297, "step": 12246 }, { "epoch": 2.05, "grad_norm": 0.3575722277164459, "learning_rate": 4.8300100406339715e-06, "loss": 0.0243, "step": 12247 }, { "epoch": 2.05, "grad_norm": 0.29089418053627014, "learning_rate": 4.828464453392337e-06, "loss": 0.033, "step": 12248 }, { "epoch": 2.05, "grad_norm": 0.2877453565597534, "learning_rate": 4.826919034775176e-06, "loss": 0.0313, "step": 12249 }, { "epoch": 2.05, "grad_norm": 0.5908889174461365, "learning_rate": 4.825373784832883e-06, "loss": 0.0467, "step": 12250 }, { "epoch": 2.05, "grad_norm": 0.45320239663124084, "learning_rate": 4.823828703615837e-06, "loss": 0.0454, "step": 12251 }, { "epoch": 2.05, "grad_norm": 0.3968615233898163, "learning_rate": 4.822283791174416e-06, "loss": 0.0369, "step": 12252 }, { "epoch": 2.05, "grad_norm": 0.4159245193004608, "learning_rate": 4.820739047558999e-06, "loss": 0.0345, "step": 12253 }, { "epoch": 2.05, "grad_norm": 0.5154927372932434, "learning_rate": 4.819194472819948e-06, "loss": 0.0257, "step": 12254 }, { "epoch": 2.05, "grad_norm": 0.4070700705051422, "learning_rate": 4.817650067007634e-06, "loss": 0.0253, "step": 12255 }, { "epoch": 2.05, "grad_norm": 0.3207207918167114, "learning_rate": 4.81610583017241e-06, "loss": 0.0317, "step": 12256 }, { "epoch": 2.05, "grad_norm": 0.3938439190387726, "learning_rate": 4.814561762364623e-06, "loss": 0.0604, "step": 12257 }, { "epoch": 2.05, "grad_norm": 0.7462937831878662, "learning_rate": 4.813017863634623e-06, "loss": 0.0393, "step": 12258 }, { "epoch": 2.05, "grad_norm": 0.37674057483673096, "learning_rate": 4.8114741340327564e-06, "loss": 0.0335, "step": 12259 }, { "epoch": 2.05, "grad_norm": 0.5032551288604736, "learning_rate": 4.80993057360935e-06, "loss": 0.0494, "step": 12260 }, { "epoch": 2.05, "grad_norm": 0.45002424716949463, "learning_rate": 4.80838718241474e-06, "loss": 0.0349, "step": 12261 }, { "epoch": 2.05, "grad_norm": 0.41474565863609314, "learning_rate": 4.806843960499249e-06, "loss": 0.0272, "step": 12262 }, { "epoch": 2.05, "grad_norm": 0.48177671432495117, "learning_rate": 4.805300907913191e-06, "loss": 0.0398, "step": 12263 }, { "epoch": 2.05, "grad_norm": 0.2722049653530121, "learning_rate": 4.803758024706885e-06, "loss": 0.0316, "step": 12264 }, { "epoch": 2.05, "grad_norm": 0.46035289764404297, "learning_rate": 4.802215310930634e-06, "loss": 0.0355, "step": 12265 }, { "epoch": 2.05, "grad_norm": 0.37443187832832336, "learning_rate": 4.800672766634742e-06, "loss": 0.0436, "step": 12266 }, { "epoch": 2.05, "grad_norm": 0.29940861463546753, "learning_rate": 4.79913039186951e-06, "loss": 0.0222, "step": 12267 }, { "epoch": 2.05, "grad_norm": 0.37306931614875793, "learning_rate": 4.7975881866852214e-06, "loss": 0.0481, "step": 12268 }, { "epoch": 2.05, "grad_norm": 0.39072495698928833, "learning_rate": 4.79604615113217e-06, "loss": 0.0423, "step": 12269 }, { "epoch": 2.05, "grad_norm": 0.45983046293258667, "learning_rate": 4.79450428526063e-06, "loss": 0.0418, "step": 12270 }, { "epoch": 2.05, "grad_norm": 0.3185281753540039, "learning_rate": 4.792962589120875e-06, "loss": 0.0203, "step": 12271 }, { "epoch": 2.05, "grad_norm": 0.4593449831008911, "learning_rate": 4.79142106276318e-06, "loss": 0.0321, "step": 12272 }, { "epoch": 2.05, "grad_norm": 0.4890862703323364, "learning_rate": 4.7898797062378015e-06, "loss": 0.0375, "step": 12273 }, { "epoch": 2.05, "grad_norm": 0.39336803555488586, "learning_rate": 4.788338519594998e-06, "loss": 0.038, "step": 12274 }, { "epoch": 2.05, "grad_norm": 0.23074790835380554, "learning_rate": 4.786797502885031e-06, "loss": 0.024, "step": 12275 }, { "epoch": 2.05, "grad_norm": 0.4199422299861908, "learning_rate": 4.785256656158139e-06, "loss": 0.0278, "step": 12276 }, { "epoch": 2.05, "grad_norm": 0.31925326585769653, "learning_rate": 4.783715979464561e-06, "loss": 0.0334, "step": 12277 }, { "epoch": 2.05, "grad_norm": 0.48374220728874207, "learning_rate": 4.782175472854542e-06, "loss": 0.0446, "step": 12278 }, { "epoch": 2.05, "grad_norm": 0.4223320484161377, "learning_rate": 4.780635136378307e-06, "loss": 0.0467, "step": 12279 }, { "epoch": 2.05, "grad_norm": 0.48150497674942017, "learning_rate": 4.779094970086075e-06, "loss": 0.0451, "step": 12280 }, { "epoch": 2.05, "grad_norm": 0.40025925636291504, "learning_rate": 4.777554974028072e-06, "loss": 0.0447, "step": 12281 }, { "epoch": 2.05, "grad_norm": 0.4868529140949249, "learning_rate": 4.776015148254509e-06, "loss": 0.0345, "step": 12282 }, { "epoch": 2.05, "grad_norm": 0.4452162981033325, "learning_rate": 4.7744754928156e-06, "loss": 0.0453, "step": 12283 }, { "epoch": 2.05, "grad_norm": 0.3415737748146057, "learning_rate": 4.772936007761541e-06, "loss": 0.0486, "step": 12284 }, { "epoch": 2.05, "grad_norm": 0.3908722400665283, "learning_rate": 4.771396693142528e-06, "loss": 0.0362, "step": 12285 }, { "epoch": 2.05, "grad_norm": 0.40820327401161194, "learning_rate": 4.769857549008758e-06, "loss": 0.0326, "step": 12286 }, { "epoch": 2.06, "grad_norm": 0.3134908974170685, "learning_rate": 4.768318575410414e-06, "loss": 0.0327, "step": 12287 }, { "epoch": 2.06, "grad_norm": 0.4559648334980011, "learning_rate": 4.766779772397671e-06, "loss": 0.0338, "step": 12288 }, { "epoch": 2.06, "grad_norm": 0.35121655464172363, "learning_rate": 4.765241140020708e-06, "loss": 0.0258, "step": 12289 }, { "epoch": 2.06, "grad_norm": 0.4053500294685364, "learning_rate": 4.763702678329698e-06, "loss": 0.0355, "step": 12290 }, { "epoch": 2.06, "grad_norm": 0.3019281029701233, "learning_rate": 4.762164387374798e-06, "loss": 0.023, "step": 12291 }, { "epoch": 2.06, "grad_norm": 0.3579005300998688, "learning_rate": 4.760626267206171e-06, "loss": 0.0299, "step": 12292 }, { "epoch": 2.06, "grad_norm": 0.45743754506111145, "learning_rate": 4.7590883178739675e-06, "loss": 0.04, "step": 12293 }, { "epoch": 2.06, "grad_norm": 0.4776463210582733, "learning_rate": 4.757550539428331e-06, "loss": 0.0431, "step": 12294 }, { "epoch": 2.06, "grad_norm": 0.34100061655044556, "learning_rate": 4.75601293191941e-06, "loss": 0.0379, "step": 12295 }, { "epoch": 2.06, "grad_norm": 0.32148823142051697, "learning_rate": 4.754475495397329e-06, "loss": 0.0339, "step": 12296 }, { "epoch": 2.06, "grad_norm": 0.4050237536430359, "learning_rate": 4.752938229912228e-06, "loss": 0.0343, "step": 12297 }, { "epoch": 2.06, "grad_norm": 0.40893545746803284, "learning_rate": 4.75140113551423e-06, "loss": 0.0468, "step": 12298 }, { "epoch": 2.06, "grad_norm": 0.3709043860435486, "learning_rate": 4.749864212253449e-06, "loss": 0.0334, "step": 12299 }, { "epoch": 2.06, "grad_norm": 0.330700546503067, "learning_rate": 4.748327460180005e-06, "loss": 0.0329, "step": 12300 }, { "epoch": 2.06, "grad_norm": 0.29249307513237, "learning_rate": 4.746790879344002e-06, "loss": 0.0223, "step": 12301 }, { "epoch": 2.06, "grad_norm": 0.4864712059497833, "learning_rate": 4.745254469795539e-06, "loss": 0.0396, "step": 12302 }, { "epoch": 2.06, "grad_norm": 0.36599501967430115, "learning_rate": 4.743718231584721e-06, "loss": 0.0374, "step": 12303 }, { "epoch": 2.06, "grad_norm": 0.4205547273159027, "learning_rate": 4.7421821647616285e-06, "loss": 0.0512, "step": 12304 }, { "epoch": 2.06, "grad_norm": 0.3463348150253296, "learning_rate": 4.740646269376356e-06, "loss": 0.0299, "step": 12305 }, { "epoch": 2.06, "grad_norm": 0.3475785553455353, "learning_rate": 4.739110545478977e-06, "loss": 0.044, "step": 12306 }, { "epoch": 2.06, "grad_norm": 0.2722513675689697, "learning_rate": 4.737574993119573e-06, "loss": 0.0259, "step": 12307 }, { "epoch": 2.06, "grad_norm": 0.2913767099380493, "learning_rate": 4.736039612348202e-06, "loss": 0.0288, "step": 12308 }, { "epoch": 2.06, "grad_norm": 0.390336811542511, "learning_rate": 4.734504403214938e-06, "loss": 0.022, "step": 12309 }, { "epoch": 2.06, "grad_norm": 0.4271499514579773, "learning_rate": 4.732969365769831e-06, "loss": 0.0271, "step": 12310 }, { "epoch": 2.06, "grad_norm": 0.4039663076400757, "learning_rate": 4.731434500062937e-06, "loss": 0.034, "step": 12311 }, { "epoch": 2.06, "grad_norm": 0.41263845562934875, "learning_rate": 4.729899806144301e-06, "loss": 0.0474, "step": 12312 }, { "epoch": 2.06, "grad_norm": 0.4342392086982727, "learning_rate": 4.7283652840639605e-06, "loss": 0.0471, "step": 12313 }, { "epoch": 2.06, "grad_norm": 0.40418484807014465, "learning_rate": 4.726830933871953e-06, "loss": 0.0452, "step": 12314 }, { "epoch": 2.06, "grad_norm": 0.5914288759231567, "learning_rate": 4.7252967556183125e-06, "loss": 0.0425, "step": 12315 }, { "epoch": 2.06, "grad_norm": 0.32823801040649414, "learning_rate": 4.723762749353053e-06, "loss": 0.0471, "step": 12316 }, { "epoch": 2.06, "grad_norm": 0.7348656058311462, "learning_rate": 4.722228915126205e-06, "loss": 0.0498, "step": 12317 }, { "epoch": 2.06, "grad_norm": 0.4760761857032776, "learning_rate": 4.720695252987771e-06, "loss": 0.049, "step": 12318 }, { "epoch": 2.06, "grad_norm": 0.20554699003696442, "learning_rate": 4.71916176298776e-06, "loss": 0.019, "step": 12319 }, { "epoch": 2.06, "grad_norm": 0.5338960289955139, "learning_rate": 4.717628445176178e-06, "loss": 0.034, "step": 12320 }, { "epoch": 2.06, "grad_norm": 0.3887602984905243, "learning_rate": 4.716095299603014e-06, "loss": 0.0422, "step": 12321 }, { "epoch": 2.06, "grad_norm": 0.3868742287158966, "learning_rate": 4.714562326318261e-06, "loss": 0.0311, "step": 12322 }, { "epoch": 2.06, "grad_norm": 0.4703293442726135, "learning_rate": 4.713029525371908e-06, "loss": 0.0271, "step": 12323 }, { "epoch": 2.06, "grad_norm": 0.3398931324481964, "learning_rate": 4.711496896813928e-06, "loss": 0.027, "step": 12324 }, { "epoch": 2.06, "grad_norm": 0.3969636857509613, "learning_rate": 4.709964440694298e-06, "loss": 0.0449, "step": 12325 }, { "epoch": 2.06, "grad_norm": 0.41200414299964905, "learning_rate": 4.7084321570629855e-06, "loss": 0.0327, "step": 12326 }, { "epoch": 2.06, "grad_norm": 0.4007716774940491, "learning_rate": 4.706900045969947e-06, "loss": 0.0317, "step": 12327 }, { "epoch": 2.06, "grad_norm": 0.3239833116531372, "learning_rate": 4.705368107465147e-06, "loss": 0.0407, "step": 12328 }, { "epoch": 2.06, "grad_norm": 0.4356258511543274, "learning_rate": 4.703836341598529e-06, "loss": 0.0472, "step": 12329 }, { "epoch": 2.06, "grad_norm": 0.27780187129974365, "learning_rate": 4.70230474842004e-06, "loss": 0.017, "step": 12330 }, { "epoch": 2.06, "grad_norm": 0.5583133101463318, "learning_rate": 4.700773327979626e-06, "loss": 0.0425, "step": 12331 }, { "epoch": 2.06, "grad_norm": 0.3341600298881531, "learning_rate": 4.6992420803272165e-06, "loss": 0.0288, "step": 12332 }, { "epoch": 2.06, "grad_norm": 0.4228939116001129, "learning_rate": 4.697711005512735e-06, "loss": 0.0281, "step": 12333 }, { "epoch": 2.06, "grad_norm": 0.8400689363479614, "learning_rate": 4.6961801035861125e-06, "loss": 0.0352, "step": 12334 }, { "epoch": 2.06, "grad_norm": 0.5071368217468262, "learning_rate": 4.694649374597258e-06, "loss": 0.0518, "step": 12335 }, { "epoch": 2.06, "grad_norm": 0.5730555057525635, "learning_rate": 4.69311881859609e-06, "loss": 0.0505, "step": 12336 }, { "epoch": 2.06, "grad_norm": 0.5454450249671936, "learning_rate": 4.691588435632508e-06, "loss": 0.0347, "step": 12337 }, { "epoch": 2.06, "grad_norm": 0.36611053347587585, "learning_rate": 4.690058225756415e-06, "loss": 0.0306, "step": 12338 }, { "epoch": 2.06, "grad_norm": 0.3429776430130005, "learning_rate": 4.6885281890177095e-06, "loss": 0.0465, "step": 12339 }, { "epoch": 2.06, "grad_norm": 0.32390132546424866, "learning_rate": 4.6869983254662755e-06, "loss": 0.042, "step": 12340 }, { "epoch": 2.06, "grad_norm": 0.7545566558837891, "learning_rate": 4.6854686351519926e-06, "loss": 0.0456, "step": 12341 }, { "epoch": 2.06, "grad_norm": 0.3923165798187256, "learning_rate": 4.683939118124748e-06, "loss": 0.03, "step": 12342 }, { "epoch": 2.06, "grad_norm": 0.4393158555030823, "learning_rate": 4.682409774434408e-06, "loss": 0.0414, "step": 12343 }, { "epoch": 2.06, "grad_norm": 0.36253267526626587, "learning_rate": 4.680880604130834e-06, "loss": 0.0283, "step": 12344 }, { "epoch": 2.06, "grad_norm": 0.391357421875, "learning_rate": 4.679351607263892e-06, "loss": 0.0548, "step": 12345 }, { "epoch": 2.06, "grad_norm": 0.3245016634464264, "learning_rate": 4.67782278388344e-06, "loss": 0.0366, "step": 12346 }, { "epoch": 2.07, "grad_norm": 0.4435620605945587, "learning_rate": 4.676294134039321e-06, "loss": 0.0417, "step": 12347 }, { "epoch": 2.07, "grad_norm": 0.4040791988372803, "learning_rate": 4.674765657781384e-06, "loss": 0.0423, "step": 12348 }, { "epoch": 2.07, "grad_norm": 0.31425151228904724, "learning_rate": 4.67323735515946e-06, "loss": 0.0271, "step": 12349 }, { "epoch": 2.07, "grad_norm": 0.2571758031845093, "learning_rate": 4.67170922622339e-06, "loss": 0.0262, "step": 12350 }, { "epoch": 2.07, "grad_norm": 0.3961485028266907, "learning_rate": 4.670181271022995e-06, "loss": 0.055, "step": 12351 }, { "epoch": 2.07, "grad_norm": 0.3787684142589569, "learning_rate": 4.6686534896080935e-06, "loss": 0.0445, "step": 12352 }, { "epoch": 2.07, "grad_norm": 0.352367639541626, "learning_rate": 4.667125882028504e-06, "loss": 0.0223, "step": 12353 }, { "epoch": 2.07, "grad_norm": 0.3703785836696625, "learning_rate": 4.66559844833404e-06, "loss": 0.0347, "step": 12354 }, { "epoch": 2.07, "grad_norm": 0.4443211853504181, "learning_rate": 4.664071188574497e-06, "loss": 0.0412, "step": 12355 }, { "epoch": 2.07, "grad_norm": 0.25377973914146423, "learning_rate": 4.662544102799683e-06, "loss": 0.0272, "step": 12356 }, { "epoch": 2.07, "grad_norm": 0.25643691420555115, "learning_rate": 4.661017191059385e-06, "loss": 0.0218, "step": 12357 }, { "epoch": 2.07, "grad_norm": 0.41489896178245544, "learning_rate": 4.659490453403386e-06, "loss": 0.0401, "step": 12358 }, { "epoch": 2.07, "grad_norm": 0.646363377571106, "learning_rate": 4.657963889881475e-06, "loss": 0.02, "step": 12359 }, { "epoch": 2.07, "grad_norm": 0.23459038138389587, "learning_rate": 4.656437500543425e-06, "loss": 0.0197, "step": 12360 }, { "epoch": 2.07, "grad_norm": 0.2997809946537018, "learning_rate": 4.654911285439e-06, "loss": 0.0202, "step": 12361 }, { "epoch": 2.07, "grad_norm": 0.2969019412994385, "learning_rate": 4.6533852446179694e-06, "loss": 0.0282, "step": 12362 }, { "epoch": 2.07, "grad_norm": 0.7778173685073853, "learning_rate": 4.6518593781300925e-06, "loss": 0.0364, "step": 12363 }, { "epoch": 2.07, "grad_norm": 0.5406718254089355, "learning_rate": 4.650333686025124e-06, "loss": 0.0321, "step": 12364 }, { "epoch": 2.07, "grad_norm": 0.29266443848609924, "learning_rate": 4.648808168352807e-06, "loss": 0.0253, "step": 12365 }, { "epoch": 2.07, "grad_norm": 0.6814978122711182, "learning_rate": 4.647282825162881e-06, "loss": 0.0437, "step": 12366 }, { "epoch": 2.07, "grad_norm": 0.3467807173728943, "learning_rate": 4.6457576565050885e-06, "loss": 0.0383, "step": 12367 }, { "epoch": 2.07, "grad_norm": 0.5393187403678894, "learning_rate": 4.644232662429156e-06, "loss": 0.0278, "step": 12368 }, { "epoch": 2.07, "grad_norm": 0.33281803131103516, "learning_rate": 4.642707842984803e-06, "loss": 0.0273, "step": 12369 }, { "epoch": 2.07, "grad_norm": 0.44856882095336914, "learning_rate": 4.641183198221754e-06, "loss": 0.0319, "step": 12370 }, { "epoch": 2.07, "grad_norm": 0.48069995641708374, "learning_rate": 4.6396587281897235e-06, "loss": 0.0398, "step": 12371 }, { "epoch": 2.07, "grad_norm": 0.3915947675704956, "learning_rate": 4.638134432938413e-06, "loss": 0.0244, "step": 12372 }, { "epoch": 2.07, "grad_norm": 0.4093247354030609, "learning_rate": 4.63661031251753e-06, "loss": 0.0268, "step": 12373 }, { "epoch": 2.07, "grad_norm": 0.4731747508049011, "learning_rate": 4.635086366976768e-06, "loss": 0.0415, "step": 12374 }, { "epoch": 2.07, "grad_norm": 0.5207217335700989, "learning_rate": 4.633562596365813e-06, "loss": 0.0379, "step": 12375 }, { "epoch": 2.07, "grad_norm": 0.5398063063621521, "learning_rate": 4.6320390007343554e-06, "loss": 0.0317, "step": 12376 }, { "epoch": 2.07, "grad_norm": 0.31171128153800964, "learning_rate": 4.63051558013207e-06, "loss": 0.0311, "step": 12377 }, { "epoch": 2.07, "grad_norm": 0.2912282943725586, "learning_rate": 4.628992334608631e-06, "loss": 0.0316, "step": 12378 }, { "epoch": 2.07, "grad_norm": 0.41748014092445374, "learning_rate": 4.627469264213709e-06, "loss": 0.0281, "step": 12379 }, { "epoch": 2.07, "grad_norm": 0.25270557403564453, "learning_rate": 4.62594636899696e-06, "loss": 0.0275, "step": 12380 }, { "epoch": 2.07, "grad_norm": 0.4192970395088196, "learning_rate": 4.624423649008047e-06, "loss": 0.0499, "step": 12381 }, { "epoch": 2.07, "grad_norm": 0.395567387342453, "learning_rate": 4.622901104296616e-06, "loss": 0.0215, "step": 12382 }, { "epoch": 2.07, "grad_norm": 0.33986517786979675, "learning_rate": 4.6213787349123075e-06, "loss": 0.0336, "step": 12383 }, { "epoch": 2.07, "grad_norm": 0.3572891354560852, "learning_rate": 4.619856540904768e-06, "loss": 0.0301, "step": 12384 }, { "epoch": 2.07, "grad_norm": 0.42210620641708374, "learning_rate": 4.618334522323623e-06, "loss": 0.0395, "step": 12385 }, { "epoch": 2.07, "grad_norm": 0.44515350461006165, "learning_rate": 4.616812679218504e-06, "loss": 0.0241, "step": 12386 }, { "epoch": 2.07, "grad_norm": 0.8171916007995605, "learning_rate": 4.6152910116390374e-06, "loss": 0.0483, "step": 12387 }, { "epoch": 2.07, "grad_norm": 0.4160102903842926, "learning_rate": 4.613769519634832e-06, "loss": 0.0367, "step": 12388 }, { "epoch": 2.07, "grad_norm": 0.31456857919692993, "learning_rate": 4.612248203255498e-06, "loss": 0.0322, "step": 12389 }, { "epoch": 2.07, "grad_norm": 0.3548038601875305, "learning_rate": 4.610727062550645e-06, "loss": 0.0416, "step": 12390 }, { "epoch": 2.07, "grad_norm": 0.3777643144130707, "learning_rate": 4.6092060975698645e-06, "loss": 0.0432, "step": 12391 }, { "epoch": 2.07, "grad_norm": 0.42149415612220764, "learning_rate": 4.607685308362758e-06, "loss": 0.0291, "step": 12392 }, { "epoch": 2.07, "grad_norm": 0.5391337275505066, "learning_rate": 4.606164694978905e-06, "loss": 0.0293, "step": 12393 }, { "epoch": 2.07, "grad_norm": 0.39763370156288147, "learning_rate": 4.604644257467891e-06, "loss": 0.0433, "step": 12394 }, { "epoch": 2.07, "grad_norm": 0.46461281180381775, "learning_rate": 4.603123995879293e-06, "loss": 0.0448, "step": 12395 }, { "epoch": 2.07, "grad_norm": 0.4213506579399109, "learning_rate": 4.601603910262681e-06, "loss": 0.0341, "step": 12396 }, { "epoch": 2.07, "grad_norm": 0.467960000038147, "learning_rate": 4.6000840006676125e-06, "loss": 0.0242, "step": 12397 }, { "epoch": 2.07, "grad_norm": 0.35084268450737, "learning_rate": 4.5985642671436555e-06, "loss": 0.0412, "step": 12398 }, { "epoch": 2.07, "grad_norm": 0.36581993103027344, "learning_rate": 4.597044709740359e-06, "loss": 0.0391, "step": 12399 }, { "epoch": 2.07, "grad_norm": 0.28903868794441223, "learning_rate": 4.595525328507265e-06, "loss": 0.0355, "step": 12400 }, { "epoch": 2.07, "grad_norm": 0.49046388268470764, "learning_rate": 4.59400612349392e-06, "loss": 0.0382, "step": 12401 }, { "epoch": 2.07, "grad_norm": 0.5652106404304504, "learning_rate": 4.5924870947498635e-06, "loss": 0.0419, "step": 12402 }, { "epoch": 2.07, "grad_norm": 0.3933541476726532, "learning_rate": 4.5909682423246164e-06, "loss": 0.0407, "step": 12403 }, { "epoch": 2.07, "grad_norm": 0.46511325240135193, "learning_rate": 4.589449566267711e-06, "loss": 0.0469, "step": 12404 }, { "epoch": 2.07, "grad_norm": 0.3589981496334076, "learning_rate": 4.587931066628659e-06, "loss": 0.0326, "step": 12405 }, { "epoch": 2.07, "grad_norm": 0.4167463183403015, "learning_rate": 4.586412743456979e-06, "loss": 0.0296, "step": 12406 }, { "epoch": 2.08, "grad_norm": 0.4651930034160614, "learning_rate": 4.584894596802175e-06, "loss": 0.0539, "step": 12407 }, { "epoch": 2.08, "grad_norm": 0.3787457346916199, "learning_rate": 4.583376626713744e-06, "loss": 0.0388, "step": 12408 }, { "epoch": 2.08, "grad_norm": 0.4089643955230713, "learning_rate": 4.5818588332411894e-06, "loss": 0.0426, "step": 12409 }, { "epoch": 2.08, "grad_norm": 0.6708919405937195, "learning_rate": 4.580341216433993e-06, "loss": 0.0391, "step": 12410 }, { "epoch": 2.08, "grad_norm": 0.4876813292503357, "learning_rate": 4.578823776341642e-06, "loss": 0.0598, "step": 12411 }, { "epoch": 2.08, "grad_norm": 0.2948412299156189, "learning_rate": 4.577306513013618e-06, "loss": 0.0157, "step": 12412 }, { "epoch": 2.08, "grad_norm": 0.23433303833007812, "learning_rate": 4.575789426499392e-06, "loss": 0.0184, "step": 12413 }, { "epoch": 2.08, "grad_norm": 0.38874316215515137, "learning_rate": 4.574272516848424e-06, "loss": 0.0388, "step": 12414 }, { "epoch": 2.08, "grad_norm": 0.4174356758594513, "learning_rate": 4.572755784110182e-06, "loss": 0.053, "step": 12415 }, { "epoch": 2.08, "grad_norm": 0.520408570766449, "learning_rate": 4.571239228334119e-06, "loss": 0.0487, "step": 12416 }, { "epoch": 2.08, "grad_norm": 0.3565307855606079, "learning_rate": 4.56972284956968e-06, "loss": 0.0264, "step": 12417 }, { "epoch": 2.08, "grad_norm": 0.35774746537208557, "learning_rate": 4.568206647866311e-06, "loss": 0.0442, "step": 12418 }, { "epoch": 2.08, "grad_norm": 0.49028676748275757, "learning_rate": 4.566690623273452e-06, "loss": 0.0463, "step": 12419 }, { "epoch": 2.08, "grad_norm": 0.3673200309276581, "learning_rate": 4.5651747758405375e-06, "loss": 0.0335, "step": 12420 }, { "epoch": 2.08, "grad_norm": 0.30568528175354004, "learning_rate": 4.563659105616989e-06, "loss": 0.021, "step": 12421 }, { "epoch": 2.08, "grad_norm": 0.5236226320266724, "learning_rate": 4.562143612652223e-06, "loss": 0.0436, "step": 12422 }, { "epoch": 2.08, "grad_norm": 0.454591304063797, "learning_rate": 4.560628296995664e-06, "loss": 0.0377, "step": 12423 }, { "epoch": 2.08, "grad_norm": 0.3904912769794464, "learning_rate": 4.559113158696714e-06, "loss": 0.034, "step": 12424 }, { "epoch": 2.08, "grad_norm": 0.47318175435066223, "learning_rate": 4.557598197804775e-06, "loss": 0.0316, "step": 12425 }, { "epoch": 2.08, "grad_norm": 0.2969059944152832, "learning_rate": 4.556083414369246e-06, "loss": 0.0284, "step": 12426 }, { "epoch": 2.08, "grad_norm": 0.43147680163383484, "learning_rate": 4.554568808439522e-06, "loss": 0.038, "step": 12427 }, { "epoch": 2.08, "grad_norm": 0.3080653250217438, "learning_rate": 4.553054380064984e-06, "loss": 0.0281, "step": 12428 }, { "epoch": 2.08, "grad_norm": 0.3293994963169098, "learning_rate": 4.551540129295015e-06, "loss": 0.0401, "step": 12429 }, { "epoch": 2.08, "grad_norm": 0.6516090631484985, "learning_rate": 4.55002605617899e-06, "loss": 0.0385, "step": 12430 }, { "epoch": 2.08, "grad_norm": 0.446994811296463, "learning_rate": 4.54851216076627e-06, "loss": 0.0477, "step": 12431 }, { "epoch": 2.08, "grad_norm": 0.3267582654953003, "learning_rate": 4.546998443106227e-06, "loss": 0.0424, "step": 12432 }, { "epoch": 2.08, "grad_norm": 0.32350507378578186, "learning_rate": 4.545484903248208e-06, "loss": 0.0271, "step": 12433 }, { "epoch": 2.08, "grad_norm": 0.30086150765419006, "learning_rate": 4.54397154124157e-06, "loss": 0.0361, "step": 12434 }, { "epoch": 2.08, "grad_norm": 0.3310225307941437, "learning_rate": 4.54245835713566e-06, "loss": 0.0354, "step": 12435 }, { "epoch": 2.08, "grad_norm": 0.3285965323448181, "learning_rate": 4.540945350979811e-06, "loss": 0.0337, "step": 12436 }, { "epoch": 2.08, "grad_norm": 0.5450702905654907, "learning_rate": 4.539432522823363e-06, "loss": 0.0476, "step": 12437 }, { "epoch": 2.08, "grad_norm": 0.533289909362793, "learning_rate": 4.5379198727156415e-06, "loss": 0.0323, "step": 12438 }, { "epoch": 2.08, "grad_norm": 0.3121199905872345, "learning_rate": 4.536407400705963e-06, "loss": 0.0235, "step": 12439 }, { "epoch": 2.08, "grad_norm": 0.36667606234550476, "learning_rate": 4.534895106843652e-06, "loss": 0.0325, "step": 12440 }, { "epoch": 2.08, "grad_norm": 0.4245084822177887, "learning_rate": 4.53338299117801e-06, "loss": 0.0241, "step": 12441 }, { "epoch": 2.08, "grad_norm": 0.3367106318473816, "learning_rate": 4.5318710537583474e-06, "loss": 0.0265, "step": 12442 }, { "epoch": 2.08, "grad_norm": 0.27861884236335754, "learning_rate": 4.530359294633966e-06, "loss": 0.029, "step": 12443 }, { "epoch": 2.08, "grad_norm": 0.32084301114082336, "learning_rate": 4.5288477138541534e-06, "loss": 0.0294, "step": 12444 }, { "epoch": 2.08, "grad_norm": 0.37174728512763977, "learning_rate": 4.527336311468192e-06, "loss": 0.0457, "step": 12445 }, { "epoch": 2.08, "grad_norm": 0.36401262879371643, "learning_rate": 4.525825087525374e-06, "loss": 0.0428, "step": 12446 }, { "epoch": 2.08, "grad_norm": 0.39380210638046265, "learning_rate": 4.524314042074966e-06, "loss": 0.0437, "step": 12447 }, { "epoch": 2.08, "grad_norm": 0.45527952909469604, "learning_rate": 4.522803175166244e-06, "loss": 0.0525, "step": 12448 }, { "epoch": 2.08, "grad_norm": 0.3677934408187866, "learning_rate": 4.521292486848464e-06, "loss": 0.0457, "step": 12449 }, { "epoch": 2.08, "grad_norm": 0.5581904649734497, "learning_rate": 4.5197819771708885e-06, "loss": 0.0418, "step": 12450 }, { "epoch": 2.08, "grad_norm": 0.29638436436653137, "learning_rate": 4.518271646182774e-06, "loss": 0.0203, "step": 12451 }, { "epoch": 2.08, "grad_norm": 0.3114467263221741, "learning_rate": 4.516761493933361e-06, "loss": 0.0388, "step": 12452 }, { "epoch": 2.08, "grad_norm": 0.4781543016433716, "learning_rate": 4.515251520471888e-06, "loss": 0.0434, "step": 12453 }, { "epoch": 2.08, "grad_norm": 0.33732590079307556, "learning_rate": 4.513741725847597e-06, "loss": 0.0371, "step": 12454 }, { "epoch": 2.08, "grad_norm": 0.43540579080581665, "learning_rate": 4.512232110109712e-06, "loss": 0.0448, "step": 12455 }, { "epoch": 2.08, "grad_norm": 0.6350048184394836, "learning_rate": 4.510722673307453e-06, "loss": 0.0405, "step": 12456 }, { "epoch": 2.08, "grad_norm": 0.43484827876091003, "learning_rate": 4.50921341549004e-06, "loss": 0.0305, "step": 12457 }, { "epoch": 2.08, "grad_norm": 0.40446826815605164, "learning_rate": 4.5077043367066894e-06, "loss": 0.038, "step": 12458 }, { "epoch": 2.08, "grad_norm": 0.3059696555137634, "learning_rate": 4.506195437006599e-06, "loss": 0.0378, "step": 12459 }, { "epoch": 2.08, "grad_norm": 0.4048979878425598, "learning_rate": 4.504686716438973e-06, "loss": 0.0337, "step": 12460 }, { "epoch": 2.08, "grad_norm": 0.33697786927223206, "learning_rate": 4.503178175053001e-06, "loss": 0.0277, "step": 12461 }, { "epoch": 2.08, "grad_norm": 0.34892576932907104, "learning_rate": 4.501669812897878e-06, "loss": 0.0346, "step": 12462 }, { "epoch": 2.08, "grad_norm": 0.48219001293182373, "learning_rate": 4.50016163002278e-06, "loss": 0.0453, "step": 12463 }, { "epoch": 2.08, "grad_norm": 0.316923588514328, "learning_rate": 4.498653626476882e-06, "loss": 0.0277, "step": 12464 }, { "epoch": 2.08, "grad_norm": 0.4256490468978882, "learning_rate": 4.497145802309361e-06, "loss": 0.043, "step": 12465 }, { "epoch": 2.08, "grad_norm": 0.4817015528678894, "learning_rate": 4.495638157569373e-06, "loss": 0.0382, "step": 12466 }, { "epoch": 2.09, "grad_norm": 0.381997287273407, "learning_rate": 4.494130692306081e-06, "loss": 0.0331, "step": 12467 }, { "epoch": 2.09, "grad_norm": 0.2827851474285126, "learning_rate": 4.492623406568643e-06, "loss": 0.0251, "step": 12468 }, { "epoch": 2.09, "grad_norm": 0.3765946924686432, "learning_rate": 4.491116300406201e-06, "loss": 0.0398, "step": 12469 }, { "epoch": 2.09, "grad_norm": 0.4151614308357239, "learning_rate": 4.489609373867892e-06, "loss": 0.0326, "step": 12470 }, { "epoch": 2.09, "grad_norm": 0.43399667739868164, "learning_rate": 4.48810262700286e-06, "loss": 0.0378, "step": 12471 }, { "epoch": 2.09, "grad_norm": 0.32611560821533203, "learning_rate": 4.4865960598602295e-06, "loss": 0.0445, "step": 12472 }, { "epoch": 2.09, "grad_norm": 0.42677539587020874, "learning_rate": 4.48508967248912e-06, "loss": 0.0549, "step": 12473 }, { "epoch": 2.09, "grad_norm": 0.2531546652317047, "learning_rate": 4.483583464938655e-06, "loss": 0.0303, "step": 12474 }, { "epoch": 2.09, "grad_norm": 0.28206267952919006, "learning_rate": 4.482077437257944e-06, "loss": 0.0261, "step": 12475 }, { "epoch": 2.09, "grad_norm": 0.3540247976779938, "learning_rate": 4.480571589496096e-06, "loss": 0.0434, "step": 12476 }, { "epoch": 2.09, "grad_norm": 0.43173739314079285, "learning_rate": 4.479065921702211e-06, "loss": 0.0375, "step": 12477 }, { "epoch": 2.09, "grad_norm": 0.2538284957408905, "learning_rate": 4.477560433925377e-06, "loss": 0.0196, "step": 12478 }, { "epoch": 2.09, "grad_norm": 0.27610456943511963, "learning_rate": 4.476055126214689e-06, "loss": 0.0229, "step": 12479 }, { "epoch": 2.09, "grad_norm": 0.5430706143379211, "learning_rate": 4.474549998619227e-06, "loss": 0.0407, "step": 12480 }, { "epoch": 2.09, "grad_norm": 0.2907786965370178, "learning_rate": 4.473045051188065e-06, "loss": 0.0233, "step": 12481 }, { "epoch": 2.09, "grad_norm": 0.35757315158843994, "learning_rate": 4.471540283970276e-06, "loss": 0.0338, "step": 12482 }, { "epoch": 2.09, "grad_norm": 0.3415205180644989, "learning_rate": 4.470035697014929e-06, "loss": 0.0285, "step": 12483 }, { "epoch": 2.09, "grad_norm": 0.3226851224899292, "learning_rate": 4.468531290371074e-06, "loss": 0.0255, "step": 12484 }, { "epoch": 2.09, "grad_norm": 0.5278657674789429, "learning_rate": 4.4670270640877735e-06, "loss": 0.0297, "step": 12485 }, { "epoch": 2.09, "grad_norm": 0.42027294635772705, "learning_rate": 4.465523018214071e-06, "loss": 0.0404, "step": 12486 }, { "epoch": 2.09, "grad_norm": 0.4175092875957489, "learning_rate": 4.464019152799002e-06, "loss": 0.0404, "step": 12487 }, { "epoch": 2.09, "grad_norm": 0.37402257323265076, "learning_rate": 4.462515467891612e-06, "loss": 0.0266, "step": 12488 }, { "epoch": 2.09, "grad_norm": 0.6122322082519531, "learning_rate": 4.461011963540921e-06, "loss": 0.029, "step": 12489 }, { "epoch": 2.09, "grad_norm": 0.2714267373085022, "learning_rate": 4.459508639795958e-06, "loss": 0.0244, "step": 12490 }, { "epoch": 2.09, "grad_norm": 0.402147114276886, "learning_rate": 4.458005496705743e-06, "loss": 0.0214, "step": 12491 }, { "epoch": 2.09, "grad_norm": 0.5357303023338318, "learning_rate": 4.456502534319281e-06, "loss": 0.0688, "step": 12492 }, { "epoch": 2.09, "grad_norm": 0.22426675260066986, "learning_rate": 4.454999752685587e-06, "loss": 0.0145, "step": 12493 }, { "epoch": 2.09, "grad_norm": 0.32371827960014343, "learning_rate": 4.453497151853655e-06, "loss": 0.0212, "step": 12494 }, { "epoch": 2.09, "grad_norm": 0.4027084708213806, "learning_rate": 4.4519947318724765e-06, "loss": 0.0445, "step": 12495 }, { "epoch": 2.09, "grad_norm": 0.32274553179740906, "learning_rate": 4.450492492791048e-06, "loss": 0.0317, "step": 12496 }, { "epoch": 2.09, "grad_norm": 0.3992861807346344, "learning_rate": 4.4489904346583436e-06, "loss": 0.0391, "step": 12497 }, { "epoch": 2.09, "grad_norm": 0.47119465470314026, "learning_rate": 4.447488557523343e-06, "loss": 0.0344, "step": 12498 }, { "epoch": 2.09, "grad_norm": 0.36666160821914673, "learning_rate": 4.445986861435021e-06, "loss": 0.0379, "step": 12499 }, { "epoch": 2.09, "grad_norm": 0.2736980617046356, "learning_rate": 4.44448534644234e-06, "loss": 0.0307, "step": 12500 }, { "epoch": 2.09, "grad_norm": 0.46274206042289734, "learning_rate": 4.4429840125942526e-06, "loss": 0.0294, "step": 12501 }, { "epoch": 2.09, "grad_norm": 0.4439374804496765, "learning_rate": 4.441482859939721e-06, "loss": 0.0516, "step": 12502 }, { "epoch": 2.09, "grad_norm": 0.36757123470306396, "learning_rate": 4.439981888527685e-06, "loss": 0.0362, "step": 12503 }, { "epoch": 2.09, "grad_norm": 0.43651697039604187, "learning_rate": 4.4384810984070914e-06, "loss": 0.032, "step": 12504 }, { "epoch": 2.09, "grad_norm": 0.3728230893611908, "learning_rate": 4.436980489626869e-06, "loss": 0.0309, "step": 12505 }, { "epoch": 2.09, "grad_norm": 0.37052640318870544, "learning_rate": 4.4354800622359495e-06, "loss": 0.048, "step": 12506 }, { "epoch": 2.09, "grad_norm": 0.4011768102645874, "learning_rate": 4.433979816283261e-06, "loss": 0.0362, "step": 12507 }, { "epoch": 2.09, "grad_norm": 1.4288066625595093, "learning_rate": 4.432479751817719e-06, "loss": 0.0269, "step": 12508 }, { "epoch": 2.09, "grad_norm": 0.3729745149612427, "learning_rate": 4.430979868888229e-06, "loss": 0.0355, "step": 12509 }, { "epoch": 2.09, "grad_norm": 0.3209972381591797, "learning_rate": 4.429480167543704e-06, "loss": 0.0245, "step": 12510 }, { "epoch": 2.09, "grad_norm": 0.46053531765937805, "learning_rate": 4.42798064783304e-06, "loss": 0.0347, "step": 12511 }, { "epoch": 2.09, "grad_norm": 0.527155876159668, "learning_rate": 4.426481309805128e-06, "loss": 0.0395, "step": 12512 }, { "epoch": 2.09, "grad_norm": 0.44697147607803345, "learning_rate": 4.424982153508859e-06, "loss": 0.04, "step": 12513 }, { "epoch": 2.09, "grad_norm": 0.31395116448402405, "learning_rate": 4.423483178993119e-06, "loss": 0.0241, "step": 12514 }, { "epoch": 2.09, "grad_norm": 0.3371422290802002, "learning_rate": 4.421984386306776e-06, "loss": 0.0339, "step": 12515 }, { "epoch": 2.09, "grad_norm": 0.2954978048801422, "learning_rate": 4.420485775498706e-06, "loss": 0.0228, "step": 12516 }, { "epoch": 2.09, "grad_norm": 0.46804535388946533, "learning_rate": 4.418987346617769e-06, "loss": 0.0414, "step": 12517 }, { "epoch": 2.09, "grad_norm": 0.33579766750335693, "learning_rate": 4.417489099712829e-06, "loss": 0.0229, "step": 12518 }, { "epoch": 2.09, "grad_norm": 0.4594792425632477, "learning_rate": 4.415991034832734e-06, "loss": 0.045, "step": 12519 }, { "epoch": 2.09, "grad_norm": 1.18145751953125, "learning_rate": 4.414493152026327e-06, "loss": 0.0391, "step": 12520 }, { "epoch": 2.09, "grad_norm": 0.4379243552684784, "learning_rate": 4.412995451342456e-06, "loss": 0.0459, "step": 12521 }, { "epoch": 2.09, "grad_norm": 0.4032596945762634, "learning_rate": 4.411497932829948e-06, "loss": 0.0462, "step": 12522 }, { "epoch": 2.09, "grad_norm": 0.5619880557060242, "learning_rate": 4.410000596537635e-06, "loss": 0.0342, "step": 12523 }, { "epoch": 2.09, "grad_norm": 0.44460150599479675, "learning_rate": 4.408503442514343e-06, "loss": 0.0343, "step": 12524 }, { "epoch": 2.09, "grad_norm": 0.4045359194278717, "learning_rate": 4.4070064708088846e-06, "loss": 0.0317, "step": 12525 }, { "epoch": 2.09, "grad_norm": 0.40179407596588135, "learning_rate": 4.405509681470068e-06, "loss": 0.0299, "step": 12526 }, { "epoch": 2.1, "grad_norm": 0.3616257607936859, "learning_rate": 4.404013074546705e-06, "loss": 0.0517, "step": 12527 }, { "epoch": 2.1, "grad_norm": 0.3812173306941986, "learning_rate": 4.402516650087589e-06, "loss": 0.0461, "step": 12528 }, { "epoch": 2.1, "grad_norm": 0.5249478816986084, "learning_rate": 4.401020408141512e-06, "loss": 0.0297, "step": 12529 }, { "epoch": 2.1, "grad_norm": 0.4202893376350403, "learning_rate": 4.399524348757262e-06, "loss": 0.0365, "step": 12530 }, { "epoch": 2.1, "grad_norm": 0.4185156524181366, "learning_rate": 4.398028471983621e-06, "loss": 0.0372, "step": 12531 }, { "epoch": 2.1, "grad_norm": 0.3814438581466675, "learning_rate": 4.3965327778693675e-06, "loss": 0.0273, "step": 12532 }, { "epoch": 2.1, "grad_norm": 0.38259926438331604, "learning_rate": 4.395037266463268e-06, "loss": 0.0363, "step": 12533 }, { "epoch": 2.1, "grad_norm": 0.3261410892009735, "learning_rate": 4.3935419378140795e-06, "loss": 0.0276, "step": 12534 }, { "epoch": 2.1, "grad_norm": 0.3371397852897644, "learning_rate": 4.392046791970568e-06, "loss": 0.0284, "step": 12535 }, { "epoch": 2.1, "grad_norm": 0.4551430940628052, "learning_rate": 4.390551828981481e-06, "loss": 0.044, "step": 12536 }, { "epoch": 2.1, "grad_norm": 0.3201614022254944, "learning_rate": 4.389057048895559e-06, "loss": 0.0326, "step": 12537 }, { "epoch": 2.1, "grad_norm": 0.4243576228618622, "learning_rate": 4.387562451761546e-06, "loss": 0.0353, "step": 12538 }, { "epoch": 2.1, "grad_norm": 0.3277882933616638, "learning_rate": 4.386068037628178e-06, "loss": 0.0265, "step": 12539 }, { "epoch": 2.1, "grad_norm": 0.30306509137153625, "learning_rate": 4.384573806544176e-06, "loss": 0.0295, "step": 12540 }, { "epoch": 2.1, "grad_norm": 0.33160272240638733, "learning_rate": 4.383079758558266e-06, "loss": 0.0351, "step": 12541 }, { "epoch": 2.1, "grad_norm": 0.2939319908618927, "learning_rate": 4.381585893719163e-06, "loss": 0.0269, "step": 12542 }, { "epoch": 2.1, "grad_norm": 0.3710024952888489, "learning_rate": 4.380092212075572e-06, "loss": 0.0246, "step": 12543 }, { "epoch": 2.1, "grad_norm": 0.4551927447319031, "learning_rate": 4.378598713676203e-06, "loss": 0.0383, "step": 12544 }, { "epoch": 2.1, "grad_norm": 0.38657447695732117, "learning_rate": 4.377105398569745e-06, "loss": 0.061, "step": 12545 }, { "epoch": 2.1, "grad_norm": 0.4126521944999695, "learning_rate": 4.375612266804895e-06, "loss": 0.0222, "step": 12546 }, { "epoch": 2.1, "grad_norm": 0.22390957176685333, "learning_rate": 4.374119318430341e-06, "loss": 0.0298, "step": 12547 }, { "epoch": 2.1, "grad_norm": 0.4264097511768341, "learning_rate": 4.372626553494756e-06, "loss": 0.0385, "step": 12548 }, { "epoch": 2.1, "grad_norm": 0.5700833797454834, "learning_rate": 4.371133972046819e-06, "loss": 0.0333, "step": 12549 }, { "epoch": 2.1, "grad_norm": 0.3038690388202667, "learning_rate": 4.369641574135196e-06, "loss": 0.0393, "step": 12550 }, { "epoch": 2.1, "grad_norm": 0.27214381098747253, "learning_rate": 4.368149359808544e-06, "loss": 0.0272, "step": 12551 }, { "epoch": 2.1, "grad_norm": 0.2636524438858032, "learning_rate": 4.366657329115526e-06, "loss": 0.0312, "step": 12552 }, { "epoch": 2.1, "grad_norm": 0.4166004955768585, "learning_rate": 4.365165482104784e-06, "loss": 0.0415, "step": 12553 }, { "epoch": 2.1, "grad_norm": 0.5092612504959106, "learning_rate": 4.363673818824965e-06, "loss": 0.0537, "step": 12554 }, { "epoch": 2.1, "grad_norm": 0.3155291676521301, "learning_rate": 4.362182339324711e-06, "loss": 0.0347, "step": 12555 }, { "epoch": 2.1, "grad_norm": 0.38064879179000854, "learning_rate": 4.360691043652649e-06, "loss": 0.0301, "step": 12556 }, { "epoch": 2.1, "grad_norm": 0.35396334528923035, "learning_rate": 4.359199931857402e-06, "loss": 0.0381, "step": 12557 }, { "epoch": 2.1, "grad_norm": 0.36684975028038025, "learning_rate": 4.357709003987596e-06, "loss": 0.0415, "step": 12558 }, { "epoch": 2.1, "grad_norm": 0.4001772999763489, "learning_rate": 4.356218260091837e-06, "loss": 0.0425, "step": 12559 }, { "epoch": 2.1, "grad_norm": 0.4185701012611389, "learning_rate": 4.354727700218741e-06, "loss": 0.0373, "step": 12560 }, { "epoch": 2.1, "grad_norm": 0.35491839051246643, "learning_rate": 4.353237324416902e-06, "loss": 0.0362, "step": 12561 }, { "epoch": 2.1, "grad_norm": 0.37426748871803284, "learning_rate": 4.351747132734919e-06, "loss": 0.0404, "step": 12562 }, { "epoch": 2.1, "grad_norm": 0.4131465256214142, "learning_rate": 4.3502571252213855e-06, "loss": 0.0362, "step": 12563 }, { "epoch": 2.1, "grad_norm": 0.3081696331501007, "learning_rate": 4.34876730192488e-06, "loss": 0.0265, "step": 12564 }, { "epoch": 2.1, "grad_norm": 0.3320322036743164, "learning_rate": 4.347277662893978e-06, "loss": 0.0322, "step": 12565 }, { "epoch": 2.1, "grad_norm": 0.37039998173713684, "learning_rate": 4.345788208177258e-06, "loss": 0.0382, "step": 12566 }, { "epoch": 2.1, "grad_norm": 0.43676286935806274, "learning_rate": 4.344298937823284e-06, "loss": 0.0428, "step": 12567 }, { "epoch": 2.1, "grad_norm": 0.4300127923488617, "learning_rate": 4.342809851880607e-06, "loss": 0.0388, "step": 12568 }, { "epoch": 2.1, "grad_norm": 0.2932812571525574, "learning_rate": 4.3413209503977915e-06, "loss": 0.0313, "step": 12569 }, { "epoch": 2.1, "grad_norm": 0.3961953818798065, "learning_rate": 4.3398322334233774e-06, "loss": 0.0277, "step": 12570 }, { "epoch": 2.1, "grad_norm": 0.3047643005847931, "learning_rate": 4.338343701005909e-06, "loss": 0.0257, "step": 12571 }, { "epoch": 2.1, "grad_norm": 0.5069851875305176, "learning_rate": 4.336855353193926e-06, "loss": 0.0392, "step": 12572 }, { "epoch": 2.1, "grad_norm": 0.33519700169563293, "learning_rate": 4.33536719003595e-06, "loss": 0.0306, "step": 12573 }, { "epoch": 2.1, "grad_norm": 0.33853065967559814, "learning_rate": 4.333879211580512e-06, "loss": 0.031, "step": 12574 }, { "epoch": 2.1, "grad_norm": 0.37202757596969604, "learning_rate": 4.332391417876126e-06, "loss": 0.0392, "step": 12575 }, { "epoch": 2.1, "grad_norm": 0.5543196797370911, "learning_rate": 4.330903808971299e-06, "loss": 0.0235, "step": 12576 }, { "epoch": 2.1, "grad_norm": 0.49482929706573486, "learning_rate": 4.3294163849145454e-06, "loss": 0.0338, "step": 12577 }, { "epoch": 2.1, "grad_norm": 0.42499107122421265, "learning_rate": 4.327929145754355e-06, "loss": 0.0331, "step": 12578 }, { "epoch": 2.1, "grad_norm": 0.3953401446342468, "learning_rate": 4.326442091539227e-06, "loss": 0.0516, "step": 12579 }, { "epoch": 2.1, "grad_norm": 0.44088035821914673, "learning_rate": 4.3249552223176506e-06, "loss": 0.0413, "step": 12580 }, { "epoch": 2.1, "grad_norm": 0.3492352366447449, "learning_rate": 4.323468538138104e-06, "loss": 0.0257, "step": 12581 }, { "epoch": 2.1, "grad_norm": 0.3808352053165436, "learning_rate": 4.3219820390490585e-06, "loss": 0.0237, "step": 12582 }, { "epoch": 2.1, "grad_norm": 0.5879310369491577, "learning_rate": 4.320495725098991e-06, "loss": 0.0386, "step": 12583 }, { "epoch": 2.1, "grad_norm": 0.3579651117324829, "learning_rate": 4.319009596336361e-06, "loss": 0.035, "step": 12584 }, { "epoch": 2.1, "grad_norm": 0.5325177311897278, "learning_rate": 4.3175236528096225e-06, "loss": 0.0399, "step": 12585 }, { "epoch": 2.11, "grad_norm": 0.47906243801116943, "learning_rate": 4.316037894567228e-06, "loss": 0.0239, "step": 12586 }, { "epoch": 2.11, "grad_norm": 0.352488249540329, "learning_rate": 4.314552321657625e-06, "loss": 0.035, "step": 12587 }, { "epoch": 2.11, "grad_norm": 0.3172299861907959, "learning_rate": 4.3130669341292555e-06, "loss": 0.0294, "step": 12588 }, { "epoch": 2.11, "grad_norm": 0.4078058898448944, "learning_rate": 4.311581732030547e-06, "loss": 0.0412, "step": 12589 }, { "epoch": 2.11, "grad_norm": 0.39137041568756104, "learning_rate": 4.310096715409926e-06, "loss": 0.0319, "step": 12590 }, { "epoch": 2.11, "grad_norm": 0.42285749316215515, "learning_rate": 4.308611884315819e-06, "loss": 0.0498, "step": 12591 }, { "epoch": 2.11, "grad_norm": 0.4925994575023651, "learning_rate": 4.307127238796637e-06, "loss": 0.0481, "step": 12592 }, { "epoch": 2.11, "grad_norm": 0.5146403312683105, "learning_rate": 4.305642778900784e-06, "loss": 0.04, "step": 12593 }, { "epoch": 2.11, "grad_norm": 0.5118091702461243, "learning_rate": 4.304158504676669e-06, "loss": 0.0352, "step": 12594 }, { "epoch": 2.11, "grad_norm": 0.2987699508666992, "learning_rate": 4.3026744161726895e-06, "loss": 0.0285, "step": 12595 }, { "epoch": 2.11, "grad_norm": 0.5704124569892883, "learning_rate": 4.30119051343723e-06, "loss": 0.04, "step": 12596 }, { "epoch": 2.11, "grad_norm": 0.36340537667274475, "learning_rate": 4.299706796518683e-06, "loss": 0.0364, "step": 12597 }, { "epoch": 2.11, "grad_norm": 0.5341596007347107, "learning_rate": 4.298223265465423e-06, "loss": 0.0512, "step": 12598 }, { "epoch": 2.11, "grad_norm": 0.3882480263710022, "learning_rate": 4.296739920325819e-06, "loss": 0.0276, "step": 12599 }, { "epoch": 2.11, "grad_norm": 0.3520906865596771, "learning_rate": 4.295256761148243e-06, "loss": 0.0341, "step": 12600 }, { "epoch": 2.11, "grad_norm": 0.3140115439891815, "learning_rate": 4.293773787981049e-06, "loss": 0.0344, "step": 12601 }, { "epoch": 2.11, "grad_norm": 0.340533584356308, "learning_rate": 4.292291000872596e-06, "loss": 0.0365, "step": 12602 }, { "epoch": 2.11, "grad_norm": 0.366039514541626, "learning_rate": 4.2908083998712345e-06, "loss": 0.0327, "step": 12603 }, { "epoch": 2.11, "grad_norm": 0.26922115683555603, "learning_rate": 4.2893259850253e-06, "loss": 0.0239, "step": 12604 }, { "epoch": 2.11, "grad_norm": 0.28011012077331543, "learning_rate": 4.287843756383134e-06, "loss": 0.0221, "step": 12605 }, { "epoch": 2.11, "grad_norm": 0.3884657025337219, "learning_rate": 4.286361713993066e-06, "loss": 0.0323, "step": 12606 }, { "epoch": 2.11, "grad_norm": 0.4146502614021301, "learning_rate": 4.2848798579034135e-06, "loss": 0.0371, "step": 12607 }, { "epoch": 2.11, "grad_norm": 0.5832114815711975, "learning_rate": 4.283398188162502e-06, "loss": 0.041, "step": 12608 }, { "epoch": 2.11, "grad_norm": 0.3430965542793274, "learning_rate": 4.281916704818637e-06, "loss": 0.0363, "step": 12609 }, { "epoch": 2.11, "grad_norm": 0.3578946888446808, "learning_rate": 4.2804354079201284e-06, "loss": 0.0369, "step": 12610 }, { "epoch": 2.11, "grad_norm": 0.4509378969669342, "learning_rate": 4.278954297515276e-06, "loss": 0.0443, "step": 12611 }, { "epoch": 2.11, "grad_norm": 0.38570132851600647, "learning_rate": 4.277473373652373e-06, "loss": 0.0334, "step": 12612 }, { "epoch": 2.11, "grad_norm": 0.3776579797267914, "learning_rate": 4.275992636379703e-06, "loss": 0.0566, "step": 12613 }, { "epoch": 2.11, "grad_norm": 0.33383458852767944, "learning_rate": 4.2745120857455525e-06, "loss": 0.0253, "step": 12614 }, { "epoch": 2.11, "grad_norm": 0.6373018622398376, "learning_rate": 4.273031721798191e-06, "loss": 0.0445, "step": 12615 }, { "epoch": 2.11, "grad_norm": 0.32601284980773926, "learning_rate": 4.271551544585895e-06, "loss": 0.0341, "step": 12616 }, { "epoch": 2.11, "grad_norm": 0.3565891981124878, "learning_rate": 4.270071554156918e-06, "loss": 0.0506, "step": 12617 }, { "epoch": 2.11, "grad_norm": 0.42093032598495483, "learning_rate": 4.268591750559528e-06, "loss": 0.0409, "step": 12618 }, { "epoch": 2.11, "grad_norm": 0.6404981017112732, "learning_rate": 4.267112133841965e-06, "loss": 0.048, "step": 12619 }, { "epoch": 2.11, "grad_norm": 0.4280252456665039, "learning_rate": 4.265632704052483e-06, "loss": 0.0402, "step": 12620 }, { "epoch": 2.11, "grad_norm": 0.3607185482978821, "learning_rate": 4.264153461239314e-06, "loss": 0.0312, "step": 12621 }, { "epoch": 2.11, "grad_norm": 0.3790472447872162, "learning_rate": 4.262674405450698e-06, "loss": 0.0293, "step": 12622 }, { "epoch": 2.11, "grad_norm": 0.4015868902206421, "learning_rate": 4.261195536734854e-06, "loss": 0.0434, "step": 12623 }, { "epoch": 2.11, "grad_norm": 0.372550368309021, "learning_rate": 4.2597168551400034e-06, "loss": 0.0337, "step": 12624 }, { "epoch": 2.11, "grad_norm": 0.2900930643081665, "learning_rate": 4.258238360714366e-06, "loss": 0.0297, "step": 12625 }, { "epoch": 2.11, "grad_norm": 2.629093885421753, "learning_rate": 4.2567600535061415e-06, "loss": 0.0449, "step": 12626 }, { "epoch": 2.11, "grad_norm": 0.27874982357025146, "learning_rate": 4.255281933563538e-06, "loss": 0.0308, "step": 12627 }, { "epoch": 2.11, "grad_norm": 0.5321431159973145, "learning_rate": 4.2538040009347525e-06, "loss": 0.0508, "step": 12628 }, { "epoch": 2.11, "grad_norm": 0.5087595582008362, "learning_rate": 4.25232625566797e-06, "loss": 0.0387, "step": 12629 }, { "epoch": 2.11, "grad_norm": 0.3668071925640106, "learning_rate": 4.2508486978113804e-06, "loss": 0.021, "step": 12630 }, { "epoch": 2.11, "grad_norm": 0.48036178946495056, "learning_rate": 4.2493713274131575e-06, "loss": 0.04, "step": 12631 }, { "epoch": 2.11, "grad_norm": 0.48961141705513, "learning_rate": 4.2478941445214694e-06, "loss": 0.0452, "step": 12632 }, { "epoch": 2.11, "grad_norm": 0.5669052600860596, "learning_rate": 4.24641714918449e-06, "loss": 0.044, "step": 12633 }, { "epoch": 2.11, "grad_norm": 0.399772047996521, "learning_rate": 4.244940341450369e-06, "loss": 0.0347, "step": 12634 }, { "epoch": 2.11, "grad_norm": 0.37771785259246826, "learning_rate": 4.243463721367264e-06, "loss": 0.0251, "step": 12635 }, { "epoch": 2.11, "grad_norm": 0.3250974118709564, "learning_rate": 4.241987288983327e-06, "loss": 0.0395, "step": 12636 }, { "epoch": 2.11, "grad_norm": 0.32415124773979187, "learning_rate": 4.240511044346695e-06, "loss": 0.0323, "step": 12637 }, { "epoch": 2.11, "grad_norm": 0.2716148793697357, "learning_rate": 4.239034987505497e-06, "loss": 0.0251, "step": 12638 }, { "epoch": 2.11, "grad_norm": 0.4125087559223175, "learning_rate": 4.2375591185078725e-06, "loss": 0.0441, "step": 12639 }, { "epoch": 2.11, "grad_norm": 0.4727611243724823, "learning_rate": 4.236083437401937e-06, "loss": 0.0349, "step": 12640 }, { "epoch": 2.11, "grad_norm": 0.34314775466918945, "learning_rate": 4.234607944235806e-06, "loss": 0.0419, "step": 12641 }, { "epoch": 2.11, "grad_norm": 0.441346675157547, "learning_rate": 4.233132639057592e-06, "loss": 0.0332, "step": 12642 }, { "epoch": 2.11, "grad_norm": 0.37791380286216736, "learning_rate": 4.2316575219153996e-06, "loss": 0.022, "step": 12643 }, { "epoch": 2.11, "grad_norm": 0.28264302015304565, "learning_rate": 4.2301825928573305e-06, "loss": 0.0289, "step": 12644 }, { "epoch": 2.11, "grad_norm": 0.35339581966400146, "learning_rate": 4.2287078519314724e-06, "loss": 0.0269, "step": 12645 }, { "epoch": 2.12, "grad_norm": 0.4233379662036896, "learning_rate": 4.227233299185909e-06, "loss": 0.0288, "step": 12646 }, { "epoch": 2.12, "grad_norm": 0.675482451915741, "learning_rate": 4.2257589346687255e-06, "loss": 0.041, "step": 12647 }, { "epoch": 2.12, "grad_norm": 0.5184721350669861, "learning_rate": 4.224284758427993e-06, "loss": 0.0519, "step": 12648 }, { "epoch": 2.12, "grad_norm": 0.3126879632472992, "learning_rate": 4.222810770511775e-06, "loss": 0.0247, "step": 12649 }, { "epoch": 2.12, "grad_norm": 0.4319204092025757, "learning_rate": 4.221336970968135e-06, "loss": 0.0368, "step": 12650 }, { "epoch": 2.12, "grad_norm": 0.3579087257385254, "learning_rate": 4.2198633598451345e-06, "loss": 0.0515, "step": 12651 }, { "epoch": 2.12, "grad_norm": 0.28109654784202576, "learning_rate": 4.218389937190812e-06, "loss": 0.0282, "step": 12652 }, { "epoch": 2.12, "grad_norm": 0.34129735827445984, "learning_rate": 4.21691670305322e-06, "loss": 0.0259, "step": 12653 }, { "epoch": 2.12, "grad_norm": 0.4708234369754791, "learning_rate": 4.21544365748039e-06, "loss": 0.0497, "step": 12654 }, { "epoch": 2.12, "grad_norm": 0.4985644817352295, "learning_rate": 4.213970800520351e-06, "loss": 0.0421, "step": 12655 }, { "epoch": 2.12, "grad_norm": 0.39525845646858215, "learning_rate": 4.212498132221132e-06, "loss": 0.0292, "step": 12656 }, { "epoch": 2.12, "grad_norm": 0.3768177628517151, "learning_rate": 4.211025652630746e-06, "loss": 0.0337, "step": 12657 }, { "epoch": 2.12, "grad_norm": 0.44484424591064453, "learning_rate": 4.209553361797207e-06, "loss": 0.0233, "step": 12658 }, { "epoch": 2.12, "grad_norm": 0.3937125504016876, "learning_rate": 4.208081259768526e-06, "loss": 0.0286, "step": 12659 }, { "epoch": 2.12, "grad_norm": 0.27212271094322205, "learning_rate": 4.206609346592695e-06, "loss": 0.0342, "step": 12660 }, { "epoch": 2.12, "grad_norm": 0.3279687166213989, "learning_rate": 4.2051376223177145e-06, "loss": 0.0243, "step": 12661 }, { "epoch": 2.12, "grad_norm": 0.43667638301849365, "learning_rate": 4.203666086991569e-06, "loss": 0.0298, "step": 12662 }, { "epoch": 2.12, "grad_norm": 0.35795891284942627, "learning_rate": 4.202194740662235e-06, "loss": 0.0291, "step": 12663 }, { "epoch": 2.12, "grad_norm": 0.5378718376159668, "learning_rate": 4.200723583377696e-06, "loss": 0.0484, "step": 12664 }, { "epoch": 2.12, "grad_norm": 0.34320834279060364, "learning_rate": 4.199252615185914e-06, "loss": 0.0246, "step": 12665 }, { "epoch": 2.12, "grad_norm": 0.4681837558746338, "learning_rate": 4.197781836134853e-06, "loss": 0.0272, "step": 12666 }, { "epoch": 2.12, "grad_norm": 0.25317612290382385, "learning_rate": 4.196311246272476e-06, "loss": 0.0247, "step": 12667 }, { "epoch": 2.12, "grad_norm": 0.3615880012512207, "learning_rate": 4.194840845646728e-06, "loss": 0.0373, "step": 12668 }, { "epoch": 2.12, "grad_norm": 0.33840006589889526, "learning_rate": 4.193370634305551e-06, "loss": 0.0262, "step": 12669 }, { "epoch": 2.12, "grad_norm": 0.3411741256713867, "learning_rate": 4.191900612296891e-06, "loss": 0.0389, "step": 12670 }, { "epoch": 2.12, "grad_norm": 0.3508620262145996, "learning_rate": 4.1904307796686695e-06, "loss": 0.0385, "step": 12671 }, { "epoch": 2.12, "grad_norm": 0.45614513754844666, "learning_rate": 4.188961136468822e-06, "loss": 0.0552, "step": 12672 }, { "epoch": 2.12, "grad_norm": 0.34962552785873413, "learning_rate": 4.187491682745264e-06, "loss": 0.0353, "step": 12673 }, { "epoch": 2.12, "grad_norm": 0.4450107514858246, "learning_rate": 4.186022418545905e-06, "loss": 0.0328, "step": 12674 }, { "epoch": 2.12, "grad_norm": 0.46403712034225464, "learning_rate": 4.184553343918657e-06, "loss": 0.0384, "step": 12675 }, { "epoch": 2.12, "grad_norm": 0.5987973809242249, "learning_rate": 4.183084458911423e-06, "loss": 0.0354, "step": 12676 }, { "epoch": 2.12, "grad_norm": 0.7610551118850708, "learning_rate": 4.181615763572091e-06, "loss": 0.0386, "step": 12677 }, { "epoch": 2.12, "grad_norm": 0.34331589937210083, "learning_rate": 4.180147257948557e-06, "loss": 0.0331, "step": 12678 }, { "epoch": 2.12, "grad_norm": 0.8944577574729919, "learning_rate": 4.178678942088701e-06, "loss": 0.0494, "step": 12679 }, { "epoch": 2.12, "grad_norm": 0.6484714150428772, "learning_rate": 4.177210816040393e-06, "loss": 0.0336, "step": 12680 }, { "epoch": 2.12, "grad_norm": 0.6635839343070984, "learning_rate": 4.1757428798515145e-06, "loss": 0.0452, "step": 12681 }, { "epoch": 2.12, "grad_norm": 0.36872267723083496, "learning_rate": 4.174275133569918e-06, "loss": 0.0327, "step": 12682 }, { "epoch": 2.12, "grad_norm": 0.34358611702919006, "learning_rate": 4.172807577243468e-06, "loss": 0.0337, "step": 12683 }, { "epoch": 2.12, "grad_norm": 0.480743408203125, "learning_rate": 4.171340210920018e-06, "loss": 0.0277, "step": 12684 }, { "epoch": 2.12, "grad_norm": 0.3671349585056305, "learning_rate": 4.169873034647406e-06, "loss": 0.0358, "step": 12685 }, { "epoch": 2.12, "grad_norm": 0.28935280442237854, "learning_rate": 4.1684060484734775e-06, "loss": 0.0341, "step": 12686 }, { "epoch": 2.12, "grad_norm": 0.5458329319953918, "learning_rate": 4.166939252446065e-06, "loss": 0.0392, "step": 12687 }, { "epoch": 2.12, "grad_norm": 0.4941886067390442, "learning_rate": 4.165472646612989e-06, "loss": 0.0418, "step": 12688 }, { "epoch": 2.12, "grad_norm": 0.4070477783679962, "learning_rate": 4.1640062310220785e-06, "loss": 0.0308, "step": 12689 }, { "epoch": 2.12, "grad_norm": 0.36243343353271484, "learning_rate": 4.16254000572114e-06, "loss": 0.0227, "step": 12690 }, { "epoch": 2.12, "grad_norm": 0.29133522510528564, "learning_rate": 4.161073970757985e-06, "loss": 0.0404, "step": 12691 }, { "epoch": 2.12, "grad_norm": 0.5277355909347534, "learning_rate": 4.159608126180419e-06, "loss": 0.0452, "step": 12692 }, { "epoch": 2.12, "grad_norm": 0.5281229019165039, "learning_rate": 4.158142472036235e-06, "loss": 0.0375, "step": 12693 }, { "epoch": 2.12, "grad_norm": 0.44238677620887756, "learning_rate": 4.156677008373219e-06, "loss": 0.0401, "step": 12694 }, { "epoch": 2.12, "grad_norm": 0.6792963743209839, "learning_rate": 4.155211735239159e-06, "loss": 0.0425, "step": 12695 }, { "epoch": 2.12, "grad_norm": 0.34503045678138733, "learning_rate": 4.153746652681833e-06, "loss": 0.0325, "step": 12696 }, { "epoch": 2.12, "grad_norm": 0.2971682548522949, "learning_rate": 4.152281760749004e-06, "loss": 0.0263, "step": 12697 }, { "epoch": 2.12, "grad_norm": 1.0746190547943115, "learning_rate": 4.150817059488443e-06, "loss": 0.0372, "step": 12698 }, { "epoch": 2.12, "grad_norm": 0.306241899728775, "learning_rate": 4.149352548947907e-06, "loss": 0.0238, "step": 12699 }, { "epoch": 2.12, "grad_norm": 0.4167138338088989, "learning_rate": 4.147888229175152e-06, "loss": 0.0435, "step": 12700 }, { "epoch": 2.12, "grad_norm": 0.32997187972068787, "learning_rate": 4.146424100217921e-06, "loss": 0.0299, "step": 12701 }, { "epoch": 2.12, "grad_norm": 0.35756951570510864, "learning_rate": 4.144960162123949e-06, "loss": 0.0369, "step": 12702 }, { "epoch": 2.12, "grad_norm": 0.34867745637893677, "learning_rate": 4.1434964149409775e-06, "loss": 0.0222, "step": 12703 }, { "epoch": 2.12, "grad_norm": 0.5162965059280396, "learning_rate": 4.142032858716731e-06, "loss": 0.0498, "step": 12704 }, { "epoch": 2.12, "grad_norm": 0.6424620151519775, "learning_rate": 4.140569493498926e-06, "loss": 0.0449, "step": 12705 }, { "epoch": 2.13, "grad_norm": 0.33257627487182617, "learning_rate": 4.139106319335281e-06, "loss": 0.0348, "step": 12706 }, { "epoch": 2.13, "grad_norm": 0.38223862648010254, "learning_rate": 4.137643336273509e-06, "loss": 0.0326, "step": 12707 }, { "epoch": 2.13, "grad_norm": 0.49973392486572266, "learning_rate": 4.1361805443613045e-06, "loss": 0.036, "step": 12708 }, { "epoch": 2.13, "grad_norm": 0.4026535153388977, "learning_rate": 4.134717943646372e-06, "loss": 0.0467, "step": 12709 }, { "epoch": 2.13, "grad_norm": 0.39541926980018616, "learning_rate": 4.133255534176396e-06, "loss": 0.0401, "step": 12710 }, { "epoch": 2.13, "grad_norm": 0.4232175052165985, "learning_rate": 4.131793315999057e-06, "loss": 0.0493, "step": 12711 }, { "epoch": 2.13, "grad_norm": 0.36076366901397705, "learning_rate": 4.130331289162042e-06, "loss": 0.0357, "step": 12712 }, { "epoch": 2.13, "grad_norm": 0.6619784235954285, "learning_rate": 4.128869453713012e-06, "loss": 0.0347, "step": 12713 }, { "epoch": 2.13, "grad_norm": 0.3939143121242523, "learning_rate": 4.127407809699638e-06, "loss": 0.042, "step": 12714 }, { "epoch": 2.13, "grad_norm": 0.36503762006759644, "learning_rate": 4.125946357169579e-06, "loss": 0.0389, "step": 12715 }, { "epoch": 2.13, "grad_norm": 0.469840407371521, "learning_rate": 4.124485096170484e-06, "loss": 0.0378, "step": 12716 }, { "epoch": 2.13, "grad_norm": 0.3143690526485443, "learning_rate": 4.123024026750005e-06, "loss": 0.0401, "step": 12717 }, { "epoch": 2.13, "grad_norm": 0.39761948585510254, "learning_rate": 4.121563148955779e-06, "loss": 0.0377, "step": 12718 }, { "epoch": 2.13, "grad_norm": 0.2772758901119232, "learning_rate": 4.120102462835435e-06, "loss": 0.0247, "step": 12719 }, { "epoch": 2.13, "grad_norm": 0.330342561006546, "learning_rate": 4.118641968436609e-06, "loss": 0.0278, "step": 12720 }, { "epoch": 2.13, "grad_norm": 0.43555840849876404, "learning_rate": 4.117181665806913e-06, "loss": 0.058, "step": 12721 }, { "epoch": 2.13, "grad_norm": 0.404242604970932, "learning_rate": 4.115721554993972e-06, "loss": 0.0416, "step": 12722 }, { "epoch": 2.13, "grad_norm": 0.36593711376190186, "learning_rate": 4.114261636045386e-06, "loss": 0.0441, "step": 12723 }, { "epoch": 2.13, "grad_norm": 0.4959506392478943, "learning_rate": 4.112801909008764e-06, "loss": 0.0459, "step": 12724 }, { "epoch": 2.13, "grad_norm": 0.35794344544410706, "learning_rate": 4.1113423739316975e-06, "loss": 0.031, "step": 12725 }, { "epoch": 2.13, "grad_norm": 0.31071799993515015, "learning_rate": 4.109883030861781e-06, "loss": 0.0345, "step": 12726 }, { "epoch": 2.13, "grad_norm": 0.31471753120422363, "learning_rate": 4.108423879846593e-06, "loss": 0.0301, "step": 12727 }, { "epoch": 2.13, "grad_norm": 0.330681174993515, "learning_rate": 4.106964920933716e-06, "loss": 0.0239, "step": 12728 }, { "epoch": 2.13, "grad_norm": 0.4186002016067505, "learning_rate": 4.1055061541707195e-06, "loss": 0.0357, "step": 12729 }, { "epoch": 2.13, "grad_norm": 0.40771228075027466, "learning_rate": 4.104047579605165e-06, "loss": 0.0328, "step": 12730 }, { "epoch": 2.13, "grad_norm": 0.6131537556648254, "learning_rate": 4.102589197284614e-06, "loss": 0.038, "step": 12731 }, { "epoch": 2.13, "grad_norm": 0.4188036322593689, "learning_rate": 4.101131007256622e-06, "loss": 0.0406, "step": 12732 }, { "epoch": 2.13, "grad_norm": 0.4841107726097107, "learning_rate": 4.099673009568729e-06, "loss": 0.0527, "step": 12733 }, { "epoch": 2.13, "grad_norm": 0.3217317461967468, "learning_rate": 4.098215204268482e-06, "loss": 0.0301, "step": 12734 }, { "epoch": 2.13, "grad_norm": 0.5766127109527588, "learning_rate": 4.09675759140341e-06, "loss": 0.0283, "step": 12735 }, { "epoch": 2.13, "grad_norm": 0.29897987842559814, "learning_rate": 4.095300171021036e-06, "loss": 0.0242, "step": 12736 }, { "epoch": 2.13, "grad_norm": 0.3555789887905121, "learning_rate": 4.093842943168891e-06, "loss": 0.0332, "step": 12737 }, { "epoch": 2.13, "grad_norm": 0.38599514961242676, "learning_rate": 4.092385907894482e-06, "loss": 0.0363, "step": 12738 }, { "epoch": 2.13, "grad_norm": 0.3937508761882782, "learning_rate": 4.0909290652453185e-06, "loss": 0.0259, "step": 12739 }, { "epoch": 2.13, "grad_norm": 0.3880040645599365, "learning_rate": 4.089472415268908e-06, "loss": 0.0515, "step": 12740 }, { "epoch": 2.13, "grad_norm": 0.3330058157444, "learning_rate": 4.0880159580127395e-06, "loss": 0.0353, "step": 12741 }, { "epoch": 2.13, "grad_norm": 0.4927162528038025, "learning_rate": 4.08655969352431e-06, "loss": 0.035, "step": 12742 }, { "epoch": 2.13, "grad_norm": 0.4169805943965912, "learning_rate": 4.085103621851098e-06, "loss": 0.0413, "step": 12743 }, { "epoch": 2.13, "grad_norm": 0.45081424713134766, "learning_rate": 4.083647743040578e-06, "loss": 0.0405, "step": 12744 }, { "epoch": 2.13, "grad_norm": 0.262533038854599, "learning_rate": 4.082192057140229e-06, "loss": 0.0243, "step": 12745 }, { "epoch": 2.13, "grad_norm": 0.43800288438796997, "learning_rate": 4.080736564197505e-06, "loss": 0.0427, "step": 12746 }, { "epoch": 2.13, "grad_norm": 0.5428812503814697, "learning_rate": 4.079281264259871e-06, "loss": 0.0294, "step": 12747 }, { "epoch": 2.13, "grad_norm": 0.3964173495769501, "learning_rate": 4.077826157374782e-06, "loss": 0.0271, "step": 12748 }, { "epoch": 2.13, "grad_norm": 0.3676155209541321, "learning_rate": 4.076371243589678e-06, "loss": 0.0342, "step": 12749 }, { "epoch": 2.13, "grad_norm": 0.3222988247871399, "learning_rate": 4.074916522951995e-06, "loss": 0.0212, "step": 12750 }, { "epoch": 2.13, "grad_norm": 0.3918341100215912, "learning_rate": 4.073461995509176e-06, "loss": 0.0385, "step": 12751 }, { "epoch": 2.13, "grad_norm": 0.35133659839630127, "learning_rate": 4.072007661308641e-06, "loss": 0.0282, "step": 12752 }, { "epoch": 2.13, "grad_norm": 0.3733215034008026, "learning_rate": 4.0705535203978094e-06, "loss": 0.0338, "step": 12753 }, { "epoch": 2.13, "grad_norm": 0.27580177783966064, "learning_rate": 4.0690995728240964e-06, "loss": 0.0336, "step": 12754 }, { "epoch": 2.13, "grad_norm": 0.3134874105453491, "learning_rate": 4.067645818634912e-06, "loss": 0.0255, "step": 12755 }, { "epoch": 2.13, "grad_norm": 0.41822579503059387, "learning_rate": 4.06619225787766e-06, "loss": 0.037, "step": 12756 }, { "epoch": 2.13, "grad_norm": 0.46490219235420227, "learning_rate": 4.064738890599733e-06, "loss": 0.0473, "step": 12757 }, { "epoch": 2.13, "grad_norm": 0.33628350496292114, "learning_rate": 4.063285716848515e-06, "loss": 0.0315, "step": 12758 }, { "epoch": 2.13, "grad_norm": 0.27911850810050964, "learning_rate": 4.061832736671396e-06, "loss": 0.0293, "step": 12759 }, { "epoch": 2.13, "grad_norm": 0.5597044825553894, "learning_rate": 4.06037995011575e-06, "loss": 0.0568, "step": 12760 }, { "epoch": 2.13, "grad_norm": 0.3462388813495636, "learning_rate": 4.058927357228942e-06, "loss": 0.0401, "step": 12761 }, { "epoch": 2.13, "grad_norm": 0.44480443000793457, "learning_rate": 4.057474958058339e-06, "loss": 0.0329, "step": 12762 }, { "epoch": 2.13, "grad_norm": 0.2964884340763092, "learning_rate": 4.056022752651303e-06, "loss": 0.0349, "step": 12763 }, { "epoch": 2.13, "grad_norm": 0.37344497442245483, "learning_rate": 4.054570741055178e-06, "loss": 0.0285, "step": 12764 }, { "epoch": 2.13, "grad_norm": 0.4884093701839447, "learning_rate": 4.053118923317314e-06, "loss": 0.0327, "step": 12765 }, { "epoch": 2.14, "grad_norm": 0.471635639667511, "learning_rate": 4.051667299485048e-06, "loss": 0.0383, "step": 12766 }, { "epoch": 2.14, "grad_norm": 0.43343958258628845, "learning_rate": 4.050215869605707e-06, "loss": 0.0285, "step": 12767 }, { "epoch": 2.14, "grad_norm": 0.4957146644592285, "learning_rate": 4.048764633726624e-06, "loss": 0.026, "step": 12768 }, { "epoch": 2.14, "grad_norm": 0.36085245013237, "learning_rate": 4.047313591895112e-06, "loss": 0.0333, "step": 12769 }, { "epoch": 2.14, "grad_norm": 0.47552812099456787, "learning_rate": 4.0458627441584875e-06, "loss": 0.0289, "step": 12770 }, { "epoch": 2.14, "grad_norm": 0.4295575022697449, "learning_rate": 4.04441209056406e-06, "loss": 0.0217, "step": 12771 }, { "epoch": 2.14, "grad_norm": 0.4881533086299896, "learning_rate": 4.042961631159122e-06, "loss": 0.0545, "step": 12772 }, { "epoch": 2.14, "grad_norm": 0.4532015919685364, "learning_rate": 4.0415113659909775e-06, "loss": 0.0382, "step": 12773 }, { "epoch": 2.14, "grad_norm": 0.4618473947048187, "learning_rate": 4.0400612951069095e-06, "loss": 0.0441, "step": 12774 }, { "epoch": 2.14, "grad_norm": 0.5909515619277954, "learning_rate": 4.0386114185541945e-06, "loss": 0.0492, "step": 12775 }, { "epoch": 2.14, "grad_norm": 0.4321126937866211, "learning_rate": 4.037161736380116e-06, "loss": 0.0286, "step": 12776 }, { "epoch": 2.14, "grad_norm": 0.2823264002799988, "learning_rate": 4.035712248631939e-06, "loss": 0.0191, "step": 12777 }, { "epoch": 2.14, "grad_norm": 0.33523857593536377, "learning_rate": 4.034262955356921e-06, "loss": 0.039, "step": 12778 }, { "epoch": 2.14, "grad_norm": 0.3803865611553192, "learning_rate": 4.0328138566023235e-06, "loss": 0.033, "step": 12779 }, { "epoch": 2.14, "grad_norm": 0.31144678592681885, "learning_rate": 4.031364952415399e-06, "loss": 0.0223, "step": 12780 }, { "epoch": 2.14, "grad_norm": 0.43153631687164307, "learning_rate": 4.0299162428433835e-06, "loss": 0.0458, "step": 12781 }, { "epoch": 2.14, "grad_norm": 0.42796504497528076, "learning_rate": 4.028467727933522e-06, "loss": 0.0369, "step": 12782 }, { "epoch": 2.14, "grad_norm": 0.35362571477890015, "learning_rate": 4.027019407733037e-06, "loss": 0.0318, "step": 12783 }, { "epoch": 2.14, "grad_norm": 0.31463417410850525, "learning_rate": 4.02557128228916e-06, "loss": 0.0282, "step": 12784 }, { "epoch": 2.14, "grad_norm": 0.45414257049560547, "learning_rate": 4.024123351649107e-06, "loss": 0.0371, "step": 12785 }, { "epoch": 2.14, "grad_norm": 0.41795939207077026, "learning_rate": 4.022675615860083e-06, "loss": 0.0378, "step": 12786 }, { "epoch": 2.14, "grad_norm": 0.3684004247188568, "learning_rate": 4.021228074969299e-06, "loss": 0.0336, "step": 12787 }, { "epoch": 2.14, "grad_norm": 0.4219796061515808, "learning_rate": 4.019780729023957e-06, "loss": 0.024, "step": 12788 }, { "epoch": 2.14, "grad_norm": 0.5409844517707825, "learning_rate": 4.018333578071244e-06, "loss": 0.0313, "step": 12789 }, { "epoch": 2.14, "grad_norm": 0.3776715397834778, "learning_rate": 4.01688662215835e-06, "loss": 0.0312, "step": 12790 }, { "epoch": 2.14, "grad_norm": 0.33934274315834045, "learning_rate": 4.015439861332454e-06, "loss": 0.0253, "step": 12791 }, { "epoch": 2.14, "grad_norm": 0.5292930603027344, "learning_rate": 4.013993295640725e-06, "loss": 0.0349, "step": 12792 }, { "epoch": 2.14, "grad_norm": 0.46616512537002563, "learning_rate": 4.012546925130337e-06, "loss": 0.0376, "step": 12793 }, { "epoch": 2.14, "grad_norm": 0.769846498966217, "learning_rate": 4.011100749848445e-06, "loss": 0.0629, "step": 12794 }, { "epoch": 2.14, "grad_norm": 0.47804856300354004, "learning_rate": 4.009654769842204e-06, "loss": 0.0446, "step": 12795 }, { "epoch": 2.14, "grad_norm": 0.4881977140903473, "learning_rate": 4.0082089851587694e-06, "loss": 0.0391, "step": 12796 }, { "epoch": 2.14, "grad_norm": 0.3894604742527008, "learning_rate": 4.006763395845271e-06, "loss": 0.0434, "step": 12797 }, { "epoch": 2.14, "grad_norm": 0.4519277811050415, "learning_rate": 4.005318001948856e-06, "loss": 0.0343, "step": 12798 }, { "epoch": 2.14, "grad_norm": 0.3689176142215729, "learning_rate": 4.003872803516647e-06, "loss": 0.032, "step": 12799 }, { "epoch": 2.14, "grad_norm": 0.34186649322509766, "learning_rate": 4.002427800595764e-06, "loss": 0.0307, "step": 12800 }, { "epoch": 2.14, "grad_norm": 0.3998628854751587, "learning_rate": 4.000982993233329e-06, "loss": 0.0346, "step": 12801 }, { "epoch": 2.14, "grad_norm": 0.43405720591545105, "learning_rate": 3.999538381476446e-06, "loss": 0.0404, "step": 12802 }, { "epoch": 2.14, "grad_norm": 0.401039183139801, "learning_rate": 3.998093965372222e-06, "loss": 0.0278, "step": 12803 }, { "epoch": 2.14, "grad_norm": 0.3426079750061035, "learning_rate": 3.9966497449677555e-06, "loss": 0.0324, "step": 12804 }, { "epoch": 2.14, "grad_norm": 0.33546963334083557, "learning_rate": 3.995205720310137e-06, "loss": 0.0389, "step": 12805 }, { "epoch": 2.14, "grad_norm": 0.35808834433555603, "learning_rate": 3.993761891446444e-06, "loss": 0.0338, "step": 12806 }, { "epoch": 2.14, "grad_norm": 0.38931363821029663, "learning_rate": 3.992318258423763e-06, "loss": 0.0293, "step": 12807 }, { "epoch": 2.14, "grad_norm": 0.42100754380226135, "learning_rate": 3.990874821289162e-06, "loss": 0.042, "step": 12808 }, { "epoch": 2.14, "grad_norm": 0.28020796179771423, "learning_rate": 3.989431580089702e-06, "loss": 0.0244, "step": 12809 }, { "epoch": 2.14, "grad_norm": 0.38092970848083496, "learning_rate": 3.987988534872446e-06, "loss": 0.0447, "step": 12810 }, { "epoch": 2.14, "grad_norm": 0.3858203887939453, "learning_rate": 3.986545685684446e-06, "loss": 0.0349, "step": 12811 }, { "epoch": 2.14, "grad_norm": 0.27138790488243103, "learning_rate": 3.985103032572751e-06, "loss": 0.0312, "step": 12812 }, { "epoch": 2.14, "grad_norm": 0.3842178285121918, "learning_rate": 3.983660575584397e-06, "loss": 0.0238, "step": 12813 }, { "epoch": 2.14, "grad_norm": 0.5098624229431152, "learning_rate": 3.982218314766415e-06, "loss": 0.0561, "step": 12814 }, { "epoch": 2.14, "grad_norm": 0.2644045948982239, "learning_rate": 3.9807762501658364e-06, "loss": 0.0286, "step": 12815 }, { "epoch": 2.14, "grad_norm": 0.25912198424339294, "learning_rate": 3.97933438182968e-06, "loss": 0.019, "step": 12816 }, { "epoch": 2.14, "grad_norm": 0.32513508200645447, "learning_rate": 3.977892709804956e-06, "loss": 0.0229, "step": 12817 }, { "epoch": 2.14, "grad_norm": 0.3550693690776825, "learning_rate": 3.976451234138674e-06, "loss": 0.0476, "step": 12818 }, { "epoch": 2.14, "grad_norm": 0.38194337487220764, "learning_rate": 3.975009954877841e-06, "loss": 0.0327, "step": 12819 }, { "epoch": 2.14, "grad_norm": 0.4778670072555542, "learning_rate": 3.973568872069442e-06, "loss": 0.0434, "step": 12820 }, { "epoch": 2.14, "grad_norm": 0.3714812397956848, "learning_rate": 3.972127985760475e-06, "loss": 0.0287, "step": 12821 }, { "epoch": 2.14, "grad_norm": 0.3138274550437927, "learning_rate": 3.970687295997913e-06, "loss": 0.022, "step": 12822 }, { "epoch": 2.14, "grad_norm": 0.5017386078834534, "learning_rate": 3.96924680282874e-06, "loss": 0.0348, "step": 12823 }, { "epoch": 2.14, "grad_norm": 0.4073861241340637, "learning_rate": 3.967806506299919e-06, "loss": 0.0323, "step": 12824 }, { "epoch": 2.15, "grad_norm": 0.41774657368659973, "learning_rate": 3.966366406458413e-06, "loss": 0.0426, "step": 12825 }, { "epoch": 2.15, "grad_norm": 0.32497698068618774, "learning_rate": 3.964926503351178e-06, "loss": 0.0231, "step": 12826 }, { "epoch": 2.15, "grad_norm": 0.4202239513397217, "learning_rate": 3.963486797025171e-06, "loss": 0.043, "step": 12827 }, { "epoch": 2.15, "grad_norm": 0.32267066836357117, "learning_rate": 3.962047287527325e-06, "loss": 0.0317, "step": 12828 }, { "epoch": 2.15, "grad_norm": 0.33896833658218384, "learning_rate": 3.9606079749045865e-06, "loss": 0.0268, "step": 12829 }, { "epoch": 2.15, "grad_norm": 0.3485028147697449, "learning_rate": 3.9591688592038825e-06, "loss": 0.0382, "step": 12830 }, { "epoch": 2.15, "grad_norm": 0.35398104786872864, "learning_rate": 3.957729940472131e-06, "loss": 0.0356, "step": 12831 }, { "epoch": 2.15, "grad_norm": 0.42912009358406067, "learning_rate": 3.95629121875626e-06, "loss": 0.0343, "step": 12832 }, { "epoch": 2.15, "grad_norm": 0.3714001774787903, "learning_rate": 3.954852694103176e-06, "loss": 0.0281, "step": 12833 }, { "epoch": 2.15, "grad_norm": 0.42370906472206116, "learning_rate": 3.95341436655978e-06, "loss": 0.0356, "step": 12834 }, { "epoch": 2.15, "grad_norm": 0.4092184007167816, "learning_rate": 3.9519762361729745e-06, "loss": 0.0319, "step": 12835 }, { "epoch": 2.15, "grad_norm": 0.24576975405216217, "learning_rate": 3.950538302989651e-06, "loss": 0.0292, "step": 12836 }, { "epoch": 2.15, "grad_norm": 0.35344648361206055, "learning_rate": 3.9491005670567004e-06, "loss": 0.0294, "step": 12837 }, { "epoch": 2.15, "grad_norm": 0.46046391129493713, "learning_rate": 3.947663028420997e-06, "loss": 0.0342, "step": 12838 }, { "epoch": 2.15, "grad_norm": 0.34945493936538696, "learning_rate": 3.946225687129409e-06, "loss": 0.0232, "step": 12839 }, { "epoch": 2.15, "grad_norm": 0.45472124218940735, "learning_rate": 3.944788543228813e-06, "loss": 0.0209, "step": 12840 }, { "epoch": 2.15, "grad_norm": 0.5738368034362793, "learning_rate": 3.943351596766062e-06, "loss": 0.0533, "step": 12841 }, { "epoch": 2.15, "grad_norm": 0.3661687672138214, "learning_rate": 3.941914847788006e-06, "loss": 0.0271, "step": 12842 }, { "epoch": 2.15, "grad_norm": 0.3761058747768402, "learning_rate": 3.940478296341499e-06, "loss": 0.0339, "step": 12843 }, { "epoch": 2.15, "grad_norm": 0.39641526341438293, "learning_rate": 3.939041942473383e-06, "loss": 0.0426, "step": 12844 }, { "epoch": 2.15, "grad_norm": 0.34501051902770996, "learning_rate": 3.937605786230484e-06, "loss": 0.0329, "step": 12845 }, { "epoch": 2.15, "grad_norm": 0.24410602450370789, "learning_rate": 3.936169827659638e-06, "loss": 0.0207, "step": 12846 }, { "epoch": 2.15, "grad_norm": 0.3329639434814453, "learning_rate": 3.934734066807663e-06, "loss": 0.0276, "step": 12847 }, { "epoch": 2.15, "grad_norm": 0.291042685508728, "learning_rate": 3.933298503721369e-06, "loss": 0.0334, "step": 12848 }, { "epoch": 2.15, "grad_norm": 0.3310850262641907, "learning_rate": 3.9318631384475725e-06, "loss": 0.0282, "step": 12849 }, { "epoch": 2.15, "grad_norm": 0.4293077886104584, "learning_rate": 3.930427971033069e-06, "loss": 0.0418, "step": 12850 }, { "epoch": 2.15, "grad_norm": 0.4481232464313507, "learning_rate": 3.928993001524655e-06, "loss": 0.0347, "step": 12851 }, { "epoch": 2.15, "grad_norm": 0.4344418942928314, "learning_rate": 3.927558229969125e-06, "loss": 0.0421, "step": 12852 }, { "epoch": 2.15, "grad_norm": 0.2432127743959427, "learning_rate": 3.9261236564132545e-06, "loss": 0.0291, "step": 12853 }, { "epoch": 2.15, "grad_norm": 0.3526299297809601, "learning_rate": 3.924689280903825e-06, "loss": 0.0311, "step": 12854 }, { "epoch": 2.15, "grad_norm": 0.4237864911556244, "learning_rate": 3.923255103487605e-06, "loss": 0.0357, "step": 12855 }, { "epoch": 2.15, "grad_norm": 0.4350223243236542, "learning_rate": 3.921821124211353e-06, "loss": 0.036, "step": 12856 }, { "epoch": 2.15, "grad_norm": 0.2494925558567047, "learning_rate": 3.920387343121832e-06, "loss": 0.0169, "step": 12857 }, { "epoch": 2.15, "grad_norm": 0.4358607530593872, "learning_rate": 3.918953760265787e-06, "loss": 0.0294, "step": 12858 }, { "epoch": 2.15, "grad_norm": 0.450484037399292, "learning_rate": 3.917520375689963e-06, "loss": 0.0312, "step": 12859 }, { "epoch": 2.15, "grad_norm": 0.6748276352882385, "learning_rate": 3.916087189441102e-06, "loss": 0.0379, "step": 12860 }, { "epoch": 2.15, "grad_norm": 0.46984249353408813, "learning_rate": 3.9146542015659325e-06, "loss": 0.0499, "step": 12861 }, { "epoch": 2.15, "grad_norm": 0.403203547000885, "learning_rate": 3.913221412111173e-06, "loss": 0.0351, "step": 12862 }, { "epoch": 2.15, "grad_norm": 0.31852856278419495, "learning_rate": 3.911788821123551e-06, "loss": 0.0213, "step": 12863 }, { "epoch": 2.15, "grad_norm": 0.44610869884490967, "learning_rate": 3.91035642864977e-06, "loss": 0.0458, "step": 12864 }, { "epoch": 2.15, "grad_norm": 0.5501106977462769, "learning_rate": 3.90892423473654e-06, "loss": 0.0395, "step": 12865 }, { "epoch": 2.15, "grad_norm": 0.3042137920856476, "learning_rate": 3.907492239430555e-06, "loss": 0.0285, "step": 12866 }, { "epoch": 2.15, "grad_norm": 0.36012017726898193, "learning_rate": 3.906060442778509e-06, "loss": 0.0293, "step": 12867 }, { "epoch": 2.15, "grad_norm": 0.4878154993057251, "learning_rate": 3.904628844827092e-06, "loss": 0.0309, "step": 12868 }, { "epoch": 2.15, "grad_norm": 0.3801709711551666, "learning_rate": 3.903197445622979e-06, "loss": 0.0278, "step": 12869 }, { "epoch": 2.15, "grad_norm": 0.3437133729457855, "learning_rate": 3.90176624521284e-06, "loss": 0.0387, "step": 12870 }, { "epoch": 2.15, "grad_norm": 0.35583364963531494, "learning_rate": 3.9003352436433476e-06, "loss": 0.0303, "step": 12871 }, { "epoch": 2.15, "grad_norm": 0.2364487498998642, "learning_rate": 3.898904440961158e-06, "loss": 0.0194, "step": 12872 }, { "epoch": 2.15, "grad_norm": 0.41051188111305237, "learning_rate": 3.89747383721292e-06, "loss": 0.0443, "step": 12873 }, { "epoch": 2.15, "grad_norm": 0.26867204904556274, "learning_rate": 3.896043432445283e-06, "loss": 0.0267, "step": 12874 }, { "epoch": 2.15, "grad_norm": 0.2847478985786438, "learning_rate": 3.8946132267048935e-06, "loss": 0.0315, "step": 12875 }, { "epoch": 2.15, "grad_norm": 0.34005850553512573, "learning_rate": 3.8931832200383765e-06, "loss": 0.0295, "step": 12876 }, { "epoch": 2.15, "grad_norm": 0.31224140524864197, "learning_rate": 3.891753412492366e-06, "loss": 0.0315, "step": 12877 }, { "epoch": 2.15, "grad_norm": 0.28786975145339966, "learning_rate": 3.890323804113476e-06, "loss": 0.0305, "step": 12878 }, { "epoch": 2.15, "grad_norm": 0.2175283432006836, "learning_rate": 3.888894394948328e-06, "loss": 0.0233, "step": 12879 }, { "epoch": 2.15, "grad_norm": 0.3025265336036682, "learning_rate": 3.887465185043526e-06, "loss": 0.0321, "step": 12880 }, { "epoch": 2.15, "grad_norm": 0.42242756485939026, "learning_rate": 3.886036174445668e-06, "loss": 0.0363, "step": 12881 }, { "epoch": 2.15, "grad_norm": 0.48690006136894226, "learning_rate": 3.884607363201355e-06, "loss": 0.0468, "step": 12882 }, { "epoch": 2.15, "grad_norm": 0.35855552554130554, "learning_rate": 3.8831787513571684e-06, "loss": 0.0316, "step": 12883 }, { "epoch": 2.15, "grad_norm": 0.3935631215572357, "learning_rate": 3.881750338959693e-06, "loss": 0.0393, "step": 12884 }, { "epoch": 2.16, "grad_norm": 0.37925928831100464, "learning_rate": 3.880322126055509e-06, "loss": 0.0347, "step": 12885 }, { "epoch": 2.16, "grad_norm": 0.35792219638824463, "learning_rate": 3.878894112691181e-06, "loss": 0.0273, "step": 12886 }, { "epoch": 2.16, "grad_norm": 0.38460367918014526, "learning_rate": 3.877466298913266e-06, "loss": 0.0335, "step": 12887 }, { "epoch": 2.16, "grad_norm": 0.4122034013271332, "learning_rate": 3.876038684768328e-06, "loss": 0.0405, "step": 12888 }, { "epoch": 2.16, "grad_norm": 0.3947676122188568, "learning_rate": 3.874611270302914e-06, "loss": 0.0386, "step": 12889 }, { "epoch": 2.16, "grad_norm": 0.40848857164382935, "learning_rate": 3.8731840555635606e-06, "loss": 0.03, "step": 12890 }, { "epoch": 2.16, "grad_norm": 0.29508543014526367, "learning_rate": 3.871757040596808e-06, "loss": 0.0332, "step": 12891 }, { "epoch": 2.16, "grad_norm": 0.3404502868652344, "learning_rate": 3.870330225449188e-06, "loss": 0.0213, "step": 12892 }, { "epoch": 2.16, "grad_norm": 0.5684270262718201, "learning_rate": 3.868903610167226e-06, "loss": 0.0486, "step": 12893 }, { "epoch": 2.16, "grad_norm": 0.41342681646347046, "learning_rate": 3.867477194797433e-06, "loss": 0.0173, "step": 12894 }, { "epoch": 2.16, "grad_norm": 0.45142242312431335, "learning_rate": 3.8660509793863185e-06, "loss": 0.0351, "step": 12895 }, { "epoch": 2.16, "grad_norm": 0.32211488485336304, "learning_rate": 3.864624963980393e-06, "loss": 0.0339, "step": 12896 }, { "epoch": 2.16, "grad_norm": 0.3057439923286438, "learning_rate": 3.8631991486261475e-06, "loss": 0.0375, "step": 12897 }, { "epoch": 2.16, "grad_norm": 0.30334892868995667, "learning_rate": 3.861773533370071e-06, "loss": 0.037, "step": 12898 }, { "epoch": 2.16, "grad_norm": 0.3236851096153259, "learning_rate": 3.860348118258651e-06, "loss": 0.0286, "step": 12899 }, { "epoch": 2.16, "grad_norm": 0.36919426918029785, "learning_rate": 3.858922903338367e-06, "loss": 0.0329, "step": 12900 }, { "epoch": 2.16, "grad_norm": 0.3606436252593994, "learning_rate": 3.857497888655685e-06, "loss": 0.025, "step": 12901 }, { "epoch": 2.16, "grad_norm": 0.4185332953929901, "learning_rate": 3.856073074257074e-06, "loss": 0.0454, "step": 12902 }, { "epoch": 2.16, "grad_norm": 0.21378621459007263, "learning_rate": 3.85464846018899e-06, "loss": 0.0198, "step": 12903 }, { "epoch": 2.16, "grad_norm": 0.4176883101463318, "learning_rate": 3.853224046497881e-06, "loss": 0.0364, "step": 12904 }, { "epoch": 2.16, "grad_norm": 0.24664300680160522, "learning_rate": 3.851799833230199e-06, "loss": 0.0188, "step": 12905 }, { "epoch": 2.16, "grad_norm": 0.2928014099597931, "learning_rate": 3.8503758204323734e-06, "loss": 0.0363, "step": 12906 }, { "epoch": 2.16, "grad_norm": 0.35656434297561646, "learning_rate": 3.848952008150842e-06, "loss": 0.0366, "step": 12907 }, { "epoch": 2.16, "grad_norm": 0.37845176458358765, "learning_rate": 3.847528396432031e-06, "loss": 0.0459, "step": 12908 }, { "epoch": 2.16, "grad_norm": 0.391733318567276, "learning_rate": 3.846104985322353e-06, "loss": 0.0372, "step": 12909 }, { "epoch": 2.16, "grad_norm": 0.3309672772884369, "learning_rate": 3.844681774868228e-06, "loss": 0.0386, "step": 12910 }, { "epoch": 2.16, "grad_norm": 0.4679971933364868, "learning_rate": 3.843258765116058e-06, "loss": 0.0388, "step": 12911 }, { "epoch": 2.16, "grad_norm": 0.3003740608692169, "learning_rate": 3.8418359561122376e-06, "loss": 0.028, "step": 12912 }, { "epoch": 2.16, "grad_norm": 0.41010335087776184, "learning_rate": 3.840413347903167e-06, "loss": 0.0422, "step": 12913 }, { "epoch": 2.16, "grad_norm": 0.26482853293418884, "learning_rate": 3.838990940535225e-06, "loss": 0.0276, "step": 12914 }, { "epoch": 2.16, "grad_norm": 0.4098838269710541, "learning_rate": 3.837568734054793e-06, "loss": 0.0381, "step": 12915 }, { "epoch": 2.16, "grad_norm": 0.3218212425708771, "learning_rate": 3.83614672850825e-06, "loss": 0.0246, "step": 12916 }, { "epoch": 2.16, "grad_norm": 0.2500348389148712, "learning_rate": 3.834724923941956e-06, "loss": 0.0231, "step": 12917 }, { "epoch": 2.16, "grad_norm": 0.44338980317115784, "learning_rate": 3.83330332040227e-06, "loss": 0.0365, "step": 12918 }, { "epoch": 2.16, "grad_norm": 0.4858759045600891, "learning_rate": 3.8318819179355505e-06, "loss": 0.0448, "step": 12919 }, { "epoch": 2.16, "grad_norm": 0.44326338171958923, "learning_rate": 3.830460716588138e-06, "loss": 0.0361, "step": 12920 }, { "epoch": 2.16, "grad_norm": 0.3292124569416046, "learning_rate": 3.829039716406378e-06, "loss": 0.0281, "step": 12921 }, { "epoch": 2.16, "grad_norm": 0.357375830411911, "learning_rate": 3.827618917436599e-06, "loss": 0.0314, "step": 12922 }, { "epoch": 2.16, "grad_norm": 0.47470223903656006, "learning_rate": 3.826198319725131e-06, "loss": 0.0389, "step": 12923 }, { "epoch": 2.16, "grad_norm": 0.3383335769176483, "learning_rate": 3.824777923318296e-06, "loss": 0.0258, "step": 12924 }, { "epoch": 2.16, "grad_norm": 0.4600413143634796, "learning_rate": 3.823357728262407e-06, "loss": 0.0443, "step": 12925 }, { "epoch": 2.16, "grad_norm": 0.39306312799453735, "learning_rate": 3.821937734603766e-06, "loss": 0.0383, "step": 12926 }, { "epoch": 2.16, "grad_norm": 0.3809921145439148, "learning_rate": 3.8205179423886816e-06, "loss": 0.0276, "step": 12927 }, { "epoch": 2.16, "grad_norm": 0.30749237537384033, "learning_rate": 3.819098351663444e-06, "loss": 0.0355, "step": 12928 }, { "epoch": 2.16, "grad_norm": 0.415178507566452, "learning_rate": 3.817678962474336e-06, "loss": 0.0384, "step": 12929 }, { "epoch": 2.16, "grad_norm": 0.4050486385822296, "learning_rate": 3.816259774867646e-06, "loss": 0.0316, "step": 12930 }, { "epoch": 2.16, "grad_norm": 0.19134043157100677, "learning_rate": 3.814840788889647e-06, "loss": 0.0129, "step": 12931 }, { "epoch": 2.16, "grad_norm": 0.645576000213623, "learning_rate": 3.8134220045866033e-06, "loss": 0.0415, "step": 12932 }, { "epoch": 2.16, "grad_norm": 0.3866867423057556, "learning_rate": 3.8120034220047807e-06, "loss": 0.0313, "step": 12933 }, { "epoch": 2.16, "grad_norm": 0.37673231959342957, "learning_rate": 3.8105850411904298e-06, "loss": 0.0271, "step": 12934 }, { "epoch": 2.16, "grad_norm": 0.38444823026657104, "learning_rate": 3.8091668621898037e-06, "loss": 0.0376, "step": 12935 }, { "epoch": 2.16, "grad_norm": 0.4411930739879608, "learning_rate": 3.8077488850491395e-06, "loss": 0.0572, "step": 12936 }, { "epoch": 2.16, "grad_norm": 0.4497844874858856, "learning_rate": 3.8063311098146716e-06, "loss": 0.0334, "step": 12937 }, { "epoch": 2.16, "grad_norm": 0.5189823508262634, "learning_rate": 3.8049135365326327e-06, "loss": 0.0473, "step": 12938 }, { "epoch": 2.16, "grad_norm": 0.27114319801330566, "learning_rate": 3.8034961652492395e-06, "loss": 0.0171, "step": 12939 }, { "epoch": 2.16, "grad_norm": 0.4347774386405945, "learning_rate": 3.8020789960107095e-06, "loss": 0.035, "step": 12940 }, { "epoch": 2.16, "grad_norm": 0.5129401683807373, "learning_rate": 3.8006620288632547e-06, "loss": 0.047, "step": 12941 }, { "epoch": 2.16, "grad_norm": 0.3745807111263275, "learning_rate": 3.799245263853074e-06, "loss": 0.029, "step": 12942 }, { "epoch": 2.16, "grad_norm": 0.3543749749660492, "learning_rate": 3.7978287010263594e-06, "loss": 0.0401, "step": 12943 }, { "epoch": 2.16, "grad_norm": 0.38541075587272644, "learning_rate": 3.7964123404293064e-06, "loss": 0.0363, "step": 12944 }, { "epoch": 2.17, "grad_norm": 0.553571879863739, "learning_rate": 3.794996182108095e-06, "loss": 0.0378, "step": 12945 }, { "epoch": 2.17, "grad_norm": 0.3210340142250061, "learning_rate": 3.793580226108895e-06, "loss": 0.0237, "step": 12946 }, { "epoch": 2.17, "grad_norm": 0.29259955883026123, "learning_rate": 3.7921644724778805e-06, "loss": 0.0171, "step": 12947 }, { "epoch": 2.17, "grad_norm": 0.37557855248451233, "learning_rate": 3.7907489212612147e-06, "loss": 0.0331, "step": 12948 }, { "epoch": 2.17, "grad_norm": 0.4398707449436188, "learning_rate": 3.789333572505055e-06, "loss": 0.0315, "step": 12949 }, { "epoch": 2.17, "grad_norm": 0.3602905571460724, "learning_rate": 3.787918426255548e-06, "loss": 0.0302, "step": 12950 }, { "epoch": 2.17, "grad_norm": 0.2976253628730774, "learning_rate": 3.786503482558833e-06, "loss": 0.0279, "step": 12951 }, { "epoch": 2.17, "grad_norm": 0.29519644379615784, "learning_rate": 3.7850887414610537e-06, "loss": 0.0269, "step": 12952 }, { "epoch": 2.17, "grad_norm": 0.27614283561706543, "learning_rate": 3.7836742030083352e-06, "loss": 0.0215, "step": 12953 }, { "epoch": 2.17, "grad_norm": 0.3189312517642975, "learning_rate": 3.7822598672467957e-06, "loss": 0.0303, "step": 12954 }, { "epoch": 2.17, "grad_norm": 0.3158501088619232, "learning_rate": 3.780845734222557e-06, "loss": 0.0217, "step": 12955 }, { "epoch": 2.17, "grad_norm": 0.43337780237197876, "learning_rate": 3.779431803981731e-06, "loss": 0.0371, "step": 12956 }, { "epoch": 2.17, "grad_norm": 0.4520297646522522, "learning_rate": 3.7780180765704144e-06, "loss": 0.031, "step": 12957 }, { "epoch": 2.17, "grad_norm": 0.3431090712547302, "learning_rate": 3.7766045520347093e-06, "loss": 0.044, "step": 12958 }, { "epoch": 2.17, "grad_norm": 0.4200020134449005, "learning_rate": 3.775191230420704e-06, "loss": 0.0268, "step": 12959 }, { "epoch": 2.17, "grad_norm": 0.36930468678474426, "learning_rate": 3.7737781117744766e-06, "loss": 0.033, "step": 12960 }, { "epoch": 2.17, "grad_norm": 0.7841464877128601, "learning_rate": 3.7723651961421106e-06, "loss": 0.022, "step": 12961 }, { "epoch": 2.17, "grad_norm": 0.37083595991134644, "learning_rate": 3.7709524835696687e-06, "loss": 0.041, "step": 12962 }, { "epoch": 2.17, "grad_norm": 0.39165541529655457, "learning_rate": 3.769539974103219e-06, "loss": 0.0528, "step": 12963 }, { "epoch": 2.17, "grad_norm": 0.3367438316345215, "learning_rate": 3.768127667788821e-06, "loss": 0.0247, "step": 12964 }, { "epoch": 2.17, "grad_norm": 0.3662204444408417, "learning_rate": 3.7667155646725183e-06, "loss": 0.0321, "step": 12965 }, { "epoch": 2.17, "grad_norm": 0.2875080108642578, "learning_rate": 3.7653036648003592e-06, "loss": 0.0267, "step": 12966 }, { "epoch": 2.17, "grad_norm": 0.2551427185535431, "learning_rate": 3.7638919682183792e-06, "loss": 0.0223, "step": 12967 }, { "epoch": 2.17, "grad_norm": 0.3730628192424774, "learning_rate": 3.7624804749726052e-06, "loss": 0.0401, "step": 12968 }, { "epoch": 2.17, "grad_norm": 0.23168469965457916, "learning_rate": 3.7610691851090654e-06, "loss": 0.0195, "step": 12969 }, { "epoch": 2.17, "grad_norm": 0.24578756093978882, "learning_rate": 3.759658098673772e-06, "loss": 0.0319, "step": 12970 }, { "epoch": 2.17, "grad_norm": 0.7731054425239563, "learning_rate": 3.758247215712737e-06, "loss": 0.0357, "step": 12971 }, { "epoch": 2.17, "grad_norm": 0.325839102268219, "learning_rate": 3.7568365362719685e-06, "loss": 0.0356, "step": 12972 }, { "epoch": 2.17, "grad_norm": 0.3653513789176941, "learning_rate": 3.75542606039746e-06, "loss": 0.0347, "step": 12973 }, { "epoch": 2.17, "grad_norm": 0.2711147964000702, "learning_rate": 3.7540157881351992e-06, "loss": 0.0294, "step": 12974 }, { "epoch": 2.17, "grad_norm": 0.5472782850265503, "learning_rate": 3.752605719531174e-06, "loss": 0.0418, "step": 12975 }, { "epoch": 2.17, "grad_norm": 0.40172287821769714, "learning_rate": 3.7511958546313578e-06, "loss": 0.0305, "step": 12976 }, { "epoch": 2.17, "grad_norm": 0.32714229822158813, "learning_rate": 3.7497861934817257e-06, "loss": 0.0356, "step": 12977 }, { "epoch": 2.17, "grad_norm": 0.28652089834213257, "learning_rate": 3.748376736128235e-06, "loss": 0.0182, "step": 12978 }, { "epoch": 2.17, "grad_norm": 0.5283606052398682, "learning_rate": 3.7469674826168465e-06, "loss": 0.05, "step": 12979 }, { "epoch": 2.17, "grad_norm": 0.3889736831188202, "learning_rate": 3.7455584329935156e-06, "loss": 0.0402, "step": 12980 }, { "epoch": 2.17, "grad_norm": 0.33798834681510925, "learning_rate": 3.7441495873041812e-06, "loss": 0.0258, "step": 12981 }, { "epoch": 2.17, "grad_norm": 0.40341880917549133, "learning_rate": 3.742740945594776e-06, "loss": 0.0334, "step": 12982 }, { "epoch": 2.17, "grad_norm": 0.3571704030036926, "learning_rate": 3.7413325079112397e-06, "loss": 0.0293, "step": 12983 }, { "epoch": 2.17, "grad_norm": 0.2606879770755768, "learning_rate": 3.739924274299491e-06, "loss": 0.0263, "step": 12984 }, { "epoch": 2.17, "grad_norm": 0.4131448268890381, "learning_rate": 3.7385162448054457e-06, "loss": 0.0294, "step": 12985 }, { "epoch": 2.17, "grad_norm": 0.2908252477645874, "learning_rate": 3.7371084194750185e-06, "loss": 0.0226, "step": 12986 }, { "epoch": 2.17, "grad_norm": 0.4288886487483978, "learning_rate": 3.7357007983541095e-06, "loss": 0.0265, "step": 12987 }, { "epoch": 2.17, "grad_norm": 0.43822258710861206, "learning_rate": 3.734293381488616e-06, "loss": 0.0412, "step": 12988 }, { "epoch": 2.17, "grad_norm": 0.39532503485679626, "learning_rate": 3.7328861689244358e-06, "loss": 0.062, "step": 12989 }, { "epoch": 2.17, "grad_norm": 0.41828975081443787, "learning_rate": 3.731479160707443e-06, "loss": 0.0343, "step": 12990 }, { "epoch": 2.17, "grad_norm": 0.35560983419418335, "learning_rate": 3.730072356883523e-06, "loss": 0.0285, "step": 12991 }, { "epoch": 2.17, "grad_norm": 0.49975237250328064, "learning_rate": 3.728665757498543e-06, "loss": 0.0354, "step": 12992 }, { "epoch": 2.17, "grad_norm": 0.3459714949131012, "learning_rate": 3.727259362598363e-06, "loss": 0.0284, "step": 12993 }, { "epoch": 2.17, "grad_norm": 0.2903735041618347, "learning_rate": 3.725853172228848e-06, "loss": 0.0355, "step": 12994 }, { "epoch": 2.17, "grad_norm": 0.4933835566043854, "learning_rate": 3.724447186435841e-06, "loss": 0.046, "step": 12995 }, { "epoch": 2.17, "grad_norm": 0.2977646291255951, "learning_rate": 3.72304140526519e-06, "loss": 0.0284, "step": 12996 }, { "epoch": 2.17, "grad_norm": 0.3405614495277405, "learning_rate": 3.7216358287627354e-06, "loss": 0.0404, "step": 12997 }, { "epoch": 2.17, "grad_norm": 0.2929682433605194, "learning_rate": 3.7202304569743042e-06, "loss": 0.0268, "step": 12998 }, { "epoch": 2.17, "grad_norm": 0.3941137492656708, "learning_rate": 3.7188252899457165e-06, "loss": 0.0287, "step": 12999 }, { "epoch": 2.17, "grad_norm": 0.3926072120666504, "learning_rate": 3.7174203277227973e-06, "loss": 0.0393, "step": 13000 }, { "epoch": 2.17, "grad_norm": 0.34344854950904846, "learning_rate": 3.7160155703513535e-06, "loss": 0.0405, "step": 13001 }, { "epoch": 2.17, "grad_norm": 0.4598824679851532, "learning_rate": 3.7146110178771857e-06, "loss": 0.0299, "step": 13002 }, { "epoch": 2.17, "grad_norm": 0.47100991010665894, "learning_rate": 3.7132066703460936e-06, "loss": 0.0327, "step": 13003 }, { "epoch": 2.17, "grad_norm": 0.6845499277114868, "learning_rate": 3.7118025278038683e-06, "loss": 0.0375, "step": 13004 }, { "epoch": 2.18, "grad_norm": 0.35989442467689514, "learning_rate": 3.710398590296298e-06, "loss": 0.0333, "step": 13005 }, { "epoch": 2.18, "grad_norm": 0.38381919264793396, "learning_rate": 3.708994857869156e-06, "loss": 0.0407, "step": 13006 }, { "epoch": 2.18, "grad_norm": 0.3643704950809479, "learning_rate": 3.7075913305682075e-06, "loss": 0.0437, "step": 13007 }, { "epoch": 2.18, "grad_norm": 0.42566022276878357, "learning_rate": 3.7061880084392254e-06, "loss": 0.0466, "step": 13008 }, { "epoch": 2.18, "grad_norm": 0.2675628662109375, "learning_rate": 3.704784891527963e-06, "loss": 0.0189, "step": 13009 }, { "epoch": 2.18, "grad_norm": 0.27157023549079895, "learning_rate": 3.7033819798801663e-06, "loss": 0.0216, "step": 13010 }, { "epoch": 2.18, "grad_norm": 0.43455883860588074, "learning_rate": 3.701979273541584e-06, "loss": 0.0347, "step": 13011 }, { "epoch": 2.18, "grad_norm": 0.5307254791259766, "learning_rate": 3.7005767725579544e-06, "loss": 0.0335, "step": 13012 }, { "epoch": 2.18, "grad_norm": 0.5412499308586121, "learning_rate": 3.6991744769750025e-06, "loss": 0.0428, "step": 13013 }, { "epoch": 2.18, "grad_norm": 0.2959679067134857, "learning_rate": 3.6977723868384584e-06, "loss": 0.0278, "step": 13014 }, { "epoch": 2.18, "grad_norm": 0.45409706234931946, "learning_rate": 3.6963705021940356e-06, "loss": 0.041, "step": 13015 }, { "epoch": 2.18, "grad_norm": 0.306312620639801, "learning_rate": 3.6949688230874416e-06, "loss": 0.0266, "step": 13016 }, { "epoch": 2.18, "grad_norm": 0.47477784752845764, "learning_rate": 3.693567349564384e-06, "loss": 0.0425, "step": 13017 }, { "epoch": 2.18, "grad_norm": 0.43279245495796204, "learning_rate": 3.6921660816705575e-06, "loss": 0.0364, "step": 13018 }, { "epoch": 2.18, "grad_norm": 0.3913934826850891, "learning_rate": 3.690765019451651e-06, "loss": 0.036, "step": 13019 }, { "epoch": 2.18, "grad_norm": 0.49251335859298706, "learning_rate": 3.689364162953355e-06, "loss": 0.0424, "step": 13020 }, { "epoch": 2.18, "grad_norm": 0.4682667851448059, "learning_rate": 3.6879635122213364e-06, "loss": 0.0299, "step": 13021 }, { "epoch": 2.18, "grad_norm": 0.5222976803779602, "learning_rate": 3.6865630673012743e-06, "loss": 0.0352, "step": 13022 }, { "epoch": 2.18, "grad_norm": 0.3936719000339508, "learning_rate": 3.685162828238826e-06, "loss": 0.026, "step": 13023 }, { "epoch": 2.18, "grad_norm": 0.38908687233924866, "learning_rate": 3.6837627950796474e-06, "loss": 0.0433, "step": 13024 }, { "epoch": 2.18, "grad_norm": 0.4889468550682068, "learning_rate": 3.682362967869394e-06, "loss": 0.0277, "step": 13025 }, { "epoch": 2.18, "grad_norm": 0.4832307696342468, "learning_rate": 3.680963346653702e-06, "loss": 0.0417, "step": 13026 }, { "epoch": 2.18, "grad_norm": 0.33209866285324097, "learning_rate": 3.6795639314782115e-06, "loss": 0.0204, "step": 13027 }, { "epoch": 2.18, "grad_norm": 0.3763743042945862, "learning_rate": 3.678164722388555e-06, "loss": 0.0406, "step": 13028 }, { "epoch": 2.18, "grad_norm": 0.3785911798477173, "learning_rate": 3.676765719430354e-06, "loss": 0.0371, "step": 13029 }, { "epoch": 2.18, "grad_norm": 0.4179963171482086, "learning_rate": 3.6753669226492182e-06, "loss": 0.038, "step": 13030 }, { "epoch": 2.18, "grad_norm": 0.45143356919288635, "learning_rate": 3.673968332090767e-06, "loss": 0.0321, "step": 13031 }, { "epoch": 2.18, "grad_norm": 0.25205421447753906, "learning_rate": 3.6725699478005937e-06, "loss": 0.0187, "step": 13032 }, { "epoch": 2.18, "grad_norm": 0.4632260799407959, "learning_rate": 3.671171769824304e-06, "loss": 0.0244, "step": 13033 }, { "epoch": 2.18, "grad_norm": 0.47674521803855896, "learning_rate": 3.6697737982074786e-06, "loss": 0.0489, "step": 13034 }, { "epoch": 2.18, "grad_norm": 0.38524845242500305, "learning_rate": 3.668376032995704e-06, "loss": 0.034, "step": 13035 }, { "epoch": 2.18, "grad_norm": 0.3632614016532898, "learning_rate": 3.6669784742345594e-06, "loss": 0.038, "step": 13036 }, { "epoch": 2.18, "grad_norm": 0.39397916197776794, "learning_rate": 3.66558112196961e-06, "loss": 0.0439, "step": 13037 }, { "epoch": 2.18, "grad_norm": 0.39022794365882874, "learning_rate": 3.664183976246416e-06, "loss": 0.045, "step": 13038 }, { "epoch": 2.18, "grad_norm": 0.27861517667770386, "learning_rate": 3.66278703711054e-06, "loss": 0.0282, "step": 13039 }, { "epoch": 2.18, "grad_norm": 0.7210575342178345, "learning_rate": 3.661390304607526e-06, "loss": 0.025, "step": 13040 }, { "epoch": 2.18, "grad_norm": 0.2992255985736847, "learning_rate": 3.6599937787829144e-06, "loss": 0.02, "step": 13041 }, { "epoch": 2.18, "grad_norm": 0.2885396480560303, "learning_rate": 3.658597459682247e-06, "loss": 0.0181, "step": 13042 }, { "epoch": 2.18, "grad_norm": 0.268889844417572, "learning_rate": 3.657201347351046e-06, "loss": 0.0206, "step": 13043 }, { "epoch": 2.18, "grad_norm": 0.357365220785141, "learning_rate": 3.6558054418348356e-06, "loss": 0.0281, "step": 13044 }, { "epoch": 2.18, "grad_norm": 0.4061760902404785, "learning_rate": 3.654409743179136e-06, "loss": 0.0462, "step": 13045 }, { "epoch": 2.18, "grad_norm": 0.5442349910736084, "learning_rate": 3.6530142514294475e-06, "loss": 0.0383, "step": 13046 }, { "epoch": 2.18, "grad_norm": 0.33304911851882935, "learning_rate": 3.651618966631281e-06, "loss": 0.0292, "step": 13047 }, { "epoch": 2.18, "grad_norm": 0.35369911789894104, "learning_rate": 3.6502238888301254e-06, "loss": 0.039, "step": 13048 }, { "epoch": 2.18, "grad_norm": 0.3964010179042816, "learning_rate": 3.648829018071467e-06, "loss": 0.0265, "step": 13049 }, { "epoch": 2.18, "grad_norm": 0.44036999344825745, "learning_rate": 3.647434354400794e-06, "loss": 0.0356, "step": 13050 }, { "epoch": 2.18, "grad_norm": 0.3374806344509125, "learning_rate": 3.6460398978635735e-06, "loss": 0.0374, "step": 13051 }, { "epoch": 2.18, "grad_norm": 0.3905958831310272, "learning_rate": 3.6446456485052782e-06, "loss": 0.0358, "step": 13052 }, { "epoch": 2.18, "grad_norm": 0.42270293831825256, "learning_rate": 3.643251606371373e-06, "loss": 0.0569, "step": 13053 }, { "epoch": 2.18, "grad_norm": 0.4306570589542389, "learning_rate": 3.641857771507308e-06, "loss": 0.045, "step": 13054 }, { "epoch": 2.18, "grad_norm": 0.3409149646759033, "learning_rate": 3.6404641439585277e-06, "loss": 0.0338, "step": 13055 }, { "epoch": 2.18, "grad_norm": 0.4102611541748047, "learning_rate": 3.639070723770479e-06, "loss": 0.0346, "step": 13056 }, { "epoch": 2.18, "grad_norm": 0.4224343001842499, "learning_rate": 3.6376775109885953e-06, "loss": 0.0339, "step": 13057 }, { "epoch": 2.18, "grad_norm": 0.3522360920906067, "learning_rate": 3.6362845056582986e-06, "loss": 0.0262, "step": 13058 }, { "epoch": 2.18, "grad_norm": 0.3909018039703369, "learning_rate": 3.6348917078250133e-06, "loss": 0.0308, "step": 13059 }, { "epoch": 2.18, "grad_norm": 0.4172477126121521, "learning_rate": 3.633499117534154e-06, "loss": 0.0463, "step": 13060 }, { "epoch": 2.18, "grad_norm": 0.294546902179718, "learning_rate": 3.63210673483113e-06, "loss": 0.0205, "step": 13061 }, { "epoch": 2.18, "grad_norm": 0.34855708479881287, "learning_rate": 3.63071455976134e-06, "loss": 0.0315, "step": 13062 }, { "epoch": 2.18, "grad_norm": 0.5703759789466858, "learning_rate": 3.6293225923701724e-06, "loss": 0.0303, "step": 13063 }, { "epoch": 2.18, "grad_norm": 0.320605605840683, "learning_rate": 3.6279308327030226e-06, "loss": 0.0254, "step": 13064 }, { "epoch": 2.19, "grad_norm": 0.421853631734848, "learning_rate": 3.6265392808052667e-06, "loss": 0.0468, "step": 13065 }, { "epoch": 2.19, "grad_norm": 0.45073559880256653, "learning_rate": 3.6251479367222744e-06, "loss": 0.0392, "step": 13066 }, { "epoch": 2.19, "grad_norm": 0.25570887327194214, "learning_rate": 3.623756800499415e-06, "loss": 0.0301, "step": 13067 }, { "epoch": 2.19, "grad_norm": 0.34889674186706543, "learning_rate": 3.6223658721820533e-06, "loss": 0.04, "step": 13068 }, { "epoch": 2.19, "grad_norm": 0.33418890833854675, "learning_rate": 3.6209751518155334e-06, "loss": 0.0395, "step": 13069 }, { "epoch": 2.19, "grad_norm": 0.24253228306770325, "learning_rate": 3.6195846394452106e-06, "loss": 0.0244, "step": 13070 }, { "epoch": 2.19, "grad_norm": 0.3777640461921692, "learning_rate": 3.6181943351164194e-06, "loss": 0.0294, "step": 13071 }, { "epoch": 2.19, "grad_norm": 0.2750855088233948, "learning_rate": 3.6168042388744884e-06, "loss": 0.0226, "step": 13072 }, { "epoch": 2.19, "grad_norm": 0.40512847900390625, "learning_rate": 3.615414350764752e-06, "loss": 0.0277, "step": 13073 }, { "epoch": 2.19, "grad_norm": 0.4594651460647583, "learning_rate": 3.6140246708325224e-06, "loss": 0.0328, "step": 13074 }, { "epoch": 2.19, "grad_norm": 0.3749309778213501, "learning_rate": 3.6126351991231135e-06, "loss": 0.0288, "step": 13075 }, { "epoch": 2.19, "grad_norm": 0.30618342757225037, "learning_rate": 3.6112459356818354e-06, "loss": 0.0306, "step": 13076 }, { "epoch": 2.19, "grad_norm": 0.39737918972969055, "learning_rate": 3.60985688055398e-06, "loss": 0.0511, "step": 13077 }, { "epoch": 2.19, "grad_norm": 0.389059841632843, "learning_rate": 3.6084680337848453e-06, "loss": 0.0366, "step": 13078 }, { "epoch": 2.19, "grad_norm": 0.3495492935180664, "learning_rate": 3.6070793954197146e-06, "loss": 0.0175, "step": 13079 }, { "epoch": 2.19, "grad_norm": 0.35104504227638245, "learning_rate": 3.605690965503862e-06, "loss": 0.0237, "step": 13080 }, { "epoch": 2.19, "grad_norm": 0.4438391923904419, "learning_rate": 3.604302744082564e-06, "loss": 0.032, "step": 13081 }, { "epoch": 2.19, "grad_norm": 0.45452675223350525, "learning_rate": 3.6029147312010814e-06, "loss": 0.0286, "step": 13082 }, { "epoch": 2.19, "grad_norm": 0.5799261331558228, "learning_rate": 3.6015269269046738e-06, "loss": 0.0398, "step": 13083 }, { "epoch": 2.19, "grad_norm": 0.4412339925765991, "learning_rate": 3.600139331238597e-06, "loss": 0.0443, "step": 13084 }, { "epoch": 2.19, "grad_norm": 0.3458337187767029, "learning_rate": 3.59875194424809e-06, "loss": 0.0259, "step": 13085 }, { "epoch": 2.19, "grad_norm": 0.5437342524528503, "learning_rate": 3.5973647659783883e-06, "loss": 0.0369, "step": 13086 }, { "epoch": 2.19, "grad_norm": 0.43886351585388184, "learning_rate": 3.595977796474729e-06, "loss": 0.0373, "step": 13087 }, { "epoch": 2.19, "grad_norm": 0.6630999445915222, "learning_rate": 3.594591035782329e-06, "loss": 0.0315, "step": 13088 }, { "epoch": 2.19, "grad_norm": 0.43455323576927185, "learning_rate": 3.5932044839464132e-06, "loss": 0.0333, "step": 13089 }, { "epoch": 2.19, "grad_norm": 0.4082489311695099, "learning_rate": 3.5918181410121834e-06, "loss": 0.0376, "step": 13090 }, { "epoch": 2.19, "grad_norm": 0.4297187626361847, "learning_rate": 3.590432007024852e-06, "loss": 0.036, "step": 13091 }, { "epoch": 2.19, "grad_norm": 0.3637223541736603, "learning_rate": 3.5890460820296067e-06, "loss": 0.0364, "step": 13092 }, { "epoch": 2.19, "grad_norm": 0.7219606637954712, "learning_rate": 3.587660366071646e-06, "loss": 0.0377, "step": 13093 }, { "epoch": 2.19, "grad_norm": 0.34098753333091736, "learning_rate": 3.586274859196144e-06, "loss": 0.0356, "step": 13094 }, { "epoch": 2.19, "grad_norm": 0.43701937794685364, "learning_rate": 3.5848895614482857e-06, "loss": 0.0413, "step": 13095 }, { "epoch": 2.19, "grad_norm": 0.5390465259552002, "learning_rate": 3.583504472873237e-06, "loss": 0.0408, "step": 13096 }, { "epoch": 2.19, "grad_norm": 0.400884747505188, "learning_rate": 3.582119593516156e-06, "loss": 0.0378, "step": 13097 }, { "epoch": 2.19, "grad_norm": 0.31439217925071716, "learning_rate": 3.5807349234222044e-06, "loss": 0.0239, "step": 13098 }, { "epoch": 2.19, "grad_norm": 0.5160370469093323, "learning_rate": 3.579350462636527e-06, "loss": 0.035, "step": 13099 }, { "epoch": 2.19, "grad_norm": 0.3189578652381897, "learning_rate": 3.5779662112042677e-06, "loss": 0.0326, "step": 13100 }, { "epoch": 2.19, "grad_norm": 0.393449604511261, "learning_rate": 3.5765821691705647e-06, "loss": 0.0391, "step": 13101 }, { "epoch": 2.19, "grad_norm": 0.4313792884349823, "learning_rate": 3.575198336580541e-06, "loss": 0.0329, "step": 13102 }, { "epoch": 2.19, "grad_norm": 0.369358092546463, "learning_rate": 3.5738147134793244e-06, "loss": 0.0476, "step": 13103 }, { "epoch": 2.19, "grad_norm": 0.45207202434539795, "learning_rate": 3.572431299912026e-06, "loss": 0.0247, "step": 13104 }, { "epoch": 2.19, "grad_norm": 0.4879920780658722, "learning_rate": 3.5710480959237503e-06, "loss": 0.0473, "step": 13105 }, { "epoch": 2.19, "grad_norm": 0.44252657890319824, "learning_rate": 3.5696651015596053e-06, "loss": 0.031, "step": 13106 }, { "epoch": 2.19, "grad_norm": 0.39536410570144653, "learning_rate": 3.5682823168646796e-06, "loss": 0.0355, "step": 13107 }, { "epoch": 2.19, "grad_norm": 0.4084477424621582, "learning_rate": 3.566899741884062e-06, "loss": 0.0475, "step": 13108 }, { "epoch": 2.19, "grad_norm": 0.3575960099697113, "learning_rate": 3.5655173766628384e-06, "loss": 0.0328, "step": 13109 }, { "epoch": 2.19, "grad_norm": 0.4499127268791199, "learning_rate": 3.564135221246078e-06, "loss": 0.0504, "step": 13110 }, { "epoch": 2.19, "grad_norm": 0.21152764558792114, "learning_rate": 3.5627532756788453e-06, "loss": 0.0137, "step": 13111 }, { "epoch": 2.19, "grad_norm": 0.5173113942146301, "learning_rate": 3.5613715400062054e-06, "loss": 0.0384, "step": 13112 }, { "epoch": 2.19, "grad_norm": 0.3501094877719879, "learning_rate": 3.5599900142732103e-06, "loss": 0.0395, "step": 13113 }, { "epoch": 2.19, "grad_norm": 0.23173829913139343, "learning_rate": 3.5586086985249026e-06, "loss": 0.018, "step": 13114 }, { "epoch": 2.19, "grad_norm": 0.2480684518814087, "learning_rate": 3.557227592806324e-06, "loss": 0.0301, "step": 13115 }, { "epoch": 2.19, "grad_norm": 0.29294997453689575, "learning_rate": 3.5558466971625073e-06, "loss": 0.0275, "step": 13116 }, { "epoch": 2.19, "grad_norm": 0.3234996795654297, "learning_rate": 3.5544660116384833e-06, "loss": 0.0279, "step": 13117 }, { "epoch": 2.19, "grad_norm": 0.3104937672615051, "learning_rate": 3.553085536279266e-06, "loss": 0.0348, "step": 13118 }, { "epoch": 2.19, "grad_norm": 0.37770482897758484, "learning_rate": 3.551705271129865e-06, "loss": 0.0273, "step": 13119 }, { "epoch": 2.19, "grad_norm": 0.3233700096607208, "learning_rate": 3.550325216235292e-06, "loss": 0.0402, "step": 13120 }, { "epoch": 2.19, "grad_norm": 0.33836445212364197, "learning_rate": 3.5489453716405432e-06, "loss": 0.0382, "step": 13121 }, { "epoch": 2.19, "grad_norm": 0.46880805492401123, "learning_rate": 3.547565737390605e-06, "loss": 0.0409, "step": 13122 }, { "epoch": 2.19, "grad_norm": 0.2527824938297272, "learning_rate": 3.5461863135304665e-06, "loss": 0.0204, "step": 13123 }, { "epoch": 2.2, "grad_norm": 0.3249039053916931, "learning_rate": 3.5448071001051086e-06, "loss": 0.0344, "step": 13124 }, { "epoch": 2.2, "grad_norm": 0.4074352979660034, "learning_rate": 3.543428097159496e-06, "loss": 0.0441, "step": 13125 }, { "epoch": 2.2, "grad_norm": 0.35719728469848633, "learning_rate": 3.5420493047385994e-06, "loss": 0.0405, "step": 13126 }, { "epoch": 2.2, "grad_norm": 0.3073057532310486, "learning_rate": 3.5406707228873726e-06, "loss": 0.0313, "step": 13127 }, { "epoch": 2.2, "grad_norm": 0.27837681770324707, "learning_rate": 3.5392923516507615e-06, "loss": 0.0281, "step": 13128 }, { "epoch": 2.2, "grad_norm": 0.34952935576438904, "learning_rate": 3.537914191073719e-06, "loss": 0.0386, "step": 13129 }, { "epoch": 2.2, "grad_norm": 0.3497219681739807, "learning_rate": 3.5365362412011737e-06, "loss": 0.025, "step": 13130 }, { "epoch": 2.2, "grad_norm": 0.4637031555175781, "learning_rate": 3.5351585020780576e-06, "loss": 0.0296, "step": 13131 }, { "epoch": 2.2, "grad_norm": 0.45804619789123535, "learning_rate": 3.5337809737492988e-06, "loss": 0.0409, "step": 13132 }, { "epoch": 2.2, "grad_norm": 0.2749222218990326, "learning_rate": 3.5324036562598053e-06, "loss": 0.0368, "step": 13133 }, { "epoch": 2.2, "grad_norm": 0.28857624530792236, "learning_rate": 3.531026549654495e-06, "loss": 0.024, "step": 13134 }, { "epoch": 2.2, "grad_norm": 0.35868051648139954, "learning_rate": 3.5296496539782643e-06, "loss": 0.0346, "step": 13135 }, { "epoch": 2.2, "grad_norm": 0.7494688034057617, "learning_rate": 3.5282729692760064e-06, "loss": 0.0369, "step": 13136 }, { "epoch": 2.2, "grad_norm": 0.44622036814689636, "learning_rate": 3.5268964955926166e-06, "loss": 0.0363, "step": 13137 }, { "epoch": 2.2, "grad_norm": 0.31595107913017273, "learning_rate": 3.52552023297297e-06, "loss": 0.033, "step": 13138 }, { "epoch": 2.2, "grad_norm": 0.4474353790283203, "learning_rate": 3.5241441814619437e-06, "loss": 0.0399, "step": 13139 }, { "epoch": 2.2, "grad_norm": 0.396535724401474, "learning_rate": 3.522768341104411e-06, "loss": 0.046, "step": 13140 }, { "epoch": 2.2, "grad_norm": 0.31737402081489563, "learning_rate": 3.5213927119452284e-06, "loss": 0.0254, "step": 13141 }, { "epoch": 2.2, "grad_norm": 0.32842177152633667, "learning_rate": 3.5200172940292455e-06, "loss": 0.0229, "step": 13142 }, { "epoch": 2.2, "grad_norm": 0.33987680077552795, "learning_rate": 3.5186420874013182e-06, "loss": 0.0316, "step": 13143 }, { "epoch": 2.2, "grad_norm": 0.34515780210494995, "learning_rate": 3.51726709210628e-06, "loss": 0.0279, "step": 13144 }, { "epoch": 2.2, "grad_norm": 0.5185719728469849, "learning_rate": 3.5158923081889694e-06, "loss": 0.055, "step": 13145 }, { "epoch": 2.2, "grad_norm": 0.3930386006832123, "learning_rate": 3.5145177356942117e-06, "loss": 0.044, "step": 13146 }, { "epoch": 2.2, "grad_norm": 0.3884861469268799, "learning_rate": 3.513143374666821e-06, "loss": 0.0361, "step": 13147 }, { "epoch": 2.2, "grad_norm": 0.30880287289619446, "learning_rate": 3.5117692251516156e-06, "loss": 0.026, "step": 13148 }, { "epoch": 2.2, "grad_norm": 0.24710360169410706, "learning_rate": 3.5103952871934033e-06, "loss": 0.0273, "step": 13149 }, { "epoch": 2.2, "grad_norm": 0.4146278500556946, "learning_rate": 3.509021560836977e-06, "loss": 0.0353, "step": 13150 }, { "epoch": 2.2, "grad_norm": 0.4578562378883362, "learning_rate": 3.5076480461271347e-06, "loss": 0.0414, "step": 13151 }, { "epoch": 2.2, "grad_norm": 0.3807855546474457, "learning_rate": 3.5062747431086597e-06, "loss": 0.0401, "step": 13152 }, { "epoch": 2.2, "grad_norm": 0.48762884736061096, "learning_rate": 3.504901651826326e-06, "loss": 0.03, "step": 13153 }, { "epoch": 2.2, "grad_norm": 0.43161940574645996, "learning_rate": 3.503528772324911e-06, "loss": 0.027, "step": 13154 }, { "epoch": 2.2, "grad_norm": 0.36809736490249634, "learning_rate": 3.5021561046491746e-06, "loss": 0.0365, "step": 13155 }, { "epoch": 2.2, "grad_norm": 0.3727644681930542, "learning_rate": 3.500783648843875e-06, "loss": 0.0323, "step": 13156 }, { "epoch": 2.2, "grad_norm": 0.38153818249702454, "learning_rate": 3.4994114049537686e-06, "loss": 0.0368, "step": 13157 }, { "epoch": 2.2, "grad_norm": 0.3235362470149994, "learning_rate": 3.4980393730235918e-06, "loss": 0.0303, "step": 13158 }, { "epoch": 2.2, "grad_norm": 0.29910317063331604, "learning_rate": 3.4966675530980867e-06, "loss": 0.0315, "step": 13159 }, { "epoch": 2.2, "grad_norm": 0.40566954016685486, "learning_rate": 3.4952959452219813e-06, "loss": 0.0273, "step": 13160 }, { "epoch": 2.2, "grad_norm": 0.369076669216156, "learning_rate": 3.493924549439995e-06, "loss": 0.0393, "step": 13161 }, { "epoch": 2.2, "grad_norm": 0.3745875358581543, "learning_rate": 3.4925533657968513e-06, "loss": 0.039, "step": 13162 }, { "epoch": 2.2, "grad_norm": 0.3311748802661896, "learning_rate": 3.4911823943372513e-06, "loss": 0.0256, "step": 13163 }, { "epoch": 2.2, "grad_norm": 0.338986337184906, "learning_rate": 3.4898116351059007e-06, "loss": 0.034, "step": 13164 }, { "epoch": 2.2, "grad_norm": 0.3668580949306488, "learning_rate": 3.4884410881474997e-06, "loss": 0.029, "step": 13165 }, { "epoch": 2.2, "grad_norm": 0.37885037064552307, "learning_rate": 3.4870707535067317e-06, "loss": 0.0274, "step": 13166 }, { "epoch": 2.2, "grad_norm": 0.42001867294311523, "learning_rate": 3.485700631228276e-06, "loss": 0.0421, "step": 13167 }, { "epoch": 2.2, "grad_norm": 0.34803780913352966, "learning_rate": 3.4843307213568124e-06, "loss": 0.0459, "step": 13168 }, { "epoch": 2.2, "grad_norm": 0.39186325669288635, "learning_rate": 3.482961023937007e-06, "loss": 0.0282, "step": 13169 }, { "epoch": 2.2, "grad_norm": 0.47907736897468567, "learning_rate": 3.4815915390135167e-06, "loss": 0.0404, "step": 13170 }, { "epoch": 2.2, "grad_norm": 0.47216418385505676, "learning_rate": 3.4802222666309983e-06, "loss": 0.0226, "step": 13171 }, { "epoch": 2.2, "grad_norm": 0.3393002450466156, "learning_rate": 3.4788532068340985e-06, "loss": 0.0342, "step": 13172 }, { "epoch": 2.2, "grad_norm": 0.3346673250198364, "learning_rate": 3.47748435966746e-06, "loss": 0.0341, "step": 13173 }, { "epoch": 2.2, "grad_norm": 0.3432080149650574, "learning_rate": 3.476115725175714e-06, "loss": 0.039, "step": 13174 }, { "epoch": 2.2, "grad_norm": 0.3372790515422821, "learning_rate": 3.474747303403483e-06, "loss": 0.038, "step": 13175 }, { "epoch": 2.2, "grad_norm": 0.2200685441493988, "learning_rate": 3.4733790943953914e-06, "loss": 0.0161, "step": 13176 }, { "epoch": 2.2, "grad_norm": 0.33842554688453674, "learning_rate": 3.4720110981960485e-06, "loss": 0.0191, "step": 13177 }, { "epoch": 2.2, "grad_norm": 0.5335679650306702, "learning_rate": 3.4706433148500573e-06, "loss": 0.0301, "step": 13178 }, { "epoch": 2.2, "grad_norm": 0.45745062828063965, "learning_rate": 3.469275744402019e-06, "loss": 0.0388, "step": 13179 }, { "epoch": 2.2, "grad_norm": 0.33809328079223633, "learning_rate": 3.467908386896528e-06, "loss": 0.0321, "step": 13180 }, { "epoch": 2.2, "grad_norm": 0.42461225390434265, "learning_rate": 3.466541242378161e-06, "loss": 0.0357, "step": 13181 }, { "epoch": 2.2, "grad_norm": 0.31130000948905945, "learning_rate": 3.4651743108915048e-06, "loss": 0.0308, "step": 13182 }, { "epoch": 2.2, "grad_norm": 0.33197855949401855, "learning_rate": 3.4638075924811242e-06, "loss": 0.0293, "step": 13183 }, { "epoch": 2.21, "grad_norm": 0.29772695899009705, "learning_rate": 3.4624410871915804e-06, "loss": 0.0322, "step": 13184 }, { "epoch": 2.21, "grad_norm": 0.4156115651130676, "learning_rate": 3.461074795067436e-06, "loss": 0.0365, "step": 13185 }, { "epoch": 2.21, "grad_norm": 0.3833240866661072, "learning_rate": 3.4597087161532363e-06, "loss": 0.0349, "step": 13186 }, { "epoch": 2.21, "grad_norm": 0.4396897256374359, "learning_rate": 3.4583428504935244e-06, "loss": 0.0535, "step": 13187 }, { "epoch": 2.21, "grad_norm": 0.3845185339450836, "learning_rate": 3.4569771981328416e-06, "loss": 0.0495, "step": 13188 }, { "epoch": 2.21, "grad_norm": 0.39231008291244507, "learning_rate": 3.455611759115708e-06, "loss": 0.0314, "step": 13189 }, { "epoch": 2.21, "grad_norm": 0.41295596957206726, "learning_rate": 3.454246533486654e-06, "loss": 0.038, "step": 13190 }, { "epoch": 2.21, "grad_norm": 0.2511783540248871, "learning_rate": 3.45288152129019e-06, "loss": 0.0297, "step": 13191 }, { "epoch": 2.21, "grad_norm": 0.3708506226539612, "learning_rate": 3.4515167225708225e-06, "loss": 0.0377, "step": 13192 }, { "epoch": 2.21, "grad_norm": 0.3267876207828522, "learning_rate": 3.450152137373056e-06, "loss": 0.0275, "step": 13193 }, { "epoch": 2.21, "grad_norm": 0.4577147662639618, "learning_rate": 3.4487877657413816e-06, "loss": 0.0481, "step": 13194 }, { "epoch": 2.21, "grad_norm": 0.44336506724357605, "learning_rate": 3.4474236077202893e-06, "loss": 0.0393, "step": 13195 }, { "epoch": 2.21, "grad_norm": 0.47233885526657104, "learning_rate": 3.4460596633542553e-06, "loss": 0.0469, "step": 13196 }, { "epoch": 2.21, "grad_norm": 0.3100983500480652, "learning_rate": 3.4446959326877584e-06, "loss": 0.0295, "step": 13197 }, { "epoch": 2.21, "grad_norm": 0.4348354637622833, "learning_rate": 3.443332415765257e-06, "loss": 0.0251, "step": 13198 }, { "epoch": 2.21, "grad_norm": 0.4413018822669983, "learning_rate": 3.4419691126312196e-06, "loss": 0.0423, "step": 13199 }, { "epoch": 2.21, "grad_norm": 0.35679513216018677, "learning_rate": 3.4406060233300897e-06, "loss": 0.0361, "step": 13200 }, { "epoch": 2.21, "grad_norm": 0.5304316878318787, "learning_rate": 3.4392431479063193e-06, "loss": 0.0211, "step": 13201 }, { "epoch": 2.21, "grad_norm": 0.3947893977165222, "learning_rate": 3.4378804864043446e-06, "loss": 0.0384, "step": 13202 }, { "epoch": 2.21, "grad_norm": 0.30085158348083496, "learning_rate": 3.4365180388685917e-06, "loss": 0.0349, "step": 13203 }, { "epoch": 2.21, "grad_norm": 0.3904368281364441, "learning_rate": 3.4351558053434907e-06, "loss": 0.0396, "step": 13204 }, { "epoch": 2.21, "grad_norm": 0.4896251857280731, "learning_rate": 3.4337937858734594e-06, "loss": 0.0376, "step": 13205 }, { "epoch": 2.21, "grad_norm": 0.3302382230758667, "learning_rate": 3.4324319805029037e-06, "loss": 0.0318, "step": 13206 }, { "epoch": 2.21, "grad_norm": 0.4593391716480255, "learning_rate": 3.4310703892762332e-06, "loss": 0.0523, "step": 13207 }, { "epoch": 2.21, "grad_norm": 0.35970407724380493, "learning_rate": 3.4297090122378405e-06, "loss": 0.0288, "step": 13208 }, { "epoch": 2.21, "grad_norm": 0.5008670091629028, "learning_rate": 3.428347849432113e-06, "loss": 0.0453, "step": 13209 }, { "epoch": 2.21, "grad_norm": 0.3751241862773895, "learning_rate": 3.426986900903437e-06, "loss": 0.0538, "step": 13210 }, { "epoch": 2.21, "grad_norm": 0.38716331124305725, "learning_rate": 3.425626166696184e-06, "loss": 0.0383, "step": 13211 }, { "epoch": 2.21, "grad_norm": 0.5107439160346985, "learning_rate": 3.4242656468547254e-06, "loss": 0.0314, "step": 13212 }, { "epoch": 2.21, "grad_norm": 0.3382229208946228, "learning_rate": 3.4229053414234247e-06, "loss": 0.0254, "step": 13213 }, { "epoch": 2.21, "grad_norm": 0.846328854560852, "learning_rate": 3.4215452504466305e-06, "loss": 0.0385, "step": 13214 }, { "epoch": 2.21, "grad_norm": 0.5028828382492065, "learning_rate": 3.4201853739686975e-06, "loss": 0.0488, "step": 13215 }, { "epoch": 2.21, "grad_norm": 0.3267923593521118, "learning_rate": 3.4188257120339617e-06, "loss": 0.0416, "step": 13216 }, { "epoch": 2.21, "grad_norm": 0.2523716390132904, "learning_rate": 3.417466264686754e-06, "loss": 0.0341, "step": 13217 }, { "epoch": 2.21, "grad_norm": 0.3892573118209839, "learning_rate": 3.4161070319714074e-06, "loss": 0.0406, "step": 13218 }, { "epoch": 2.21, "grad_norm": 0.46371084451675415, "learning_rate": 3.4147480139322352e-06, "loss": 0.0421, "step": 13219 }, { "epoch": 2.21, "grad_norm": 0.3868016004562378, "learning_rate": 3.4133892106135524e-06, "loss": 0.0302, "step": 13220 }, { "epoch": 2.21, "grad_norm": 0.5108206868171692, "learning_rate": 3.412030622059669e-06, "loss": 0.0394, "step": 13221 }, { "epoch": 2.21, "grad_norm": 0.4651615619659424, "learning_rate": 3.410672248314878e-06, "loss": 0.043, "step": 13222 }, { "epoch": 2.21, "grad_norm": 0.3971417546272278, "learning_rate": 3.4093140894234698e-06, "loss": 0.0319, "step": 13223 }, { "epoch": 2.21, "grad_norm": 0.36308056116104126, "learning_rate": 3.407956145429735e-06, "loss": 0.0423, "step": 13224 }, { "epoch": 2.21, "grad_norm": 0.2568303346633911, "learning_rate": 3.4065984163779464e-06, "loss": 0.0222, "step": 13225 }, { "epoch": 2.21, "grad_norm": 0.29667767882347107, "learning_rate": 3.4052409023123734e-06, "loss": 0.029, "step": 13226 }, { "epoch": 2.21, "grad_norm": 0.39568838477134705, "learning_rate": 3.4038836032772814e-06, "loss": 0.037, "step": 13227 }, { "epoch": 2.21, "grad_norm": 0.3224189877510071, "learning_rate": 3.4025265193169265e-06, "loss": 0.0479, "step": 13228 }, { "epoch": 2.21, "grad_norm": 0.290036678314209, "learning_rate": 3.401169650475562e-06, "loss": 0.0252, "step": 13229 }, { "epoch": 2.21, "grad_norm": 0.326801598072052, "learning_rate": 3.399812996797428e-06, "loss": 0.0368, "step": 13230 }, { "epoch": 2.21, "grad_norm": 0.4244769811630249, "learning_rate": 3.3984565583267547e-06, "loss": 0.038, "step": 13231 }, { "epoch": 2.21, "grad_norm": 0.28913265466690063, "learning_rate": 3.397100335107779e-06, "loss": 0.0254, "step": 13232 }, { "epoch": 2.21, "grad_norm": 0.30187538266181946, "learning_rate": 3.395744327184718e-06, "loss": 0.0256, "step": 13233 }, { "epoch": 2.21, "grad_norm": 0.39735180139541626, "learning_rate": 3.3943885346017824e-06, "loss": 0.0198, "step": 13234 }, { "epoch": 2.21, "grad_norm": 0.28643998503685, "learning_rate": 3.393032957403183e-06, "loss": 0.029, "step": 13235 }, { "epoch": 2.21, "grad_norm": 0.3444961607456207, "learning_rate": 3.391677595633124e-06, "loss": 0.0311, "step": 13236 }, { "epoch": 2.21, "grad_norm": 0.2943688929080963, "learning_rate": 3.390322449335791e-06, "loss": 0.0471, "step": 13237 }, { "epoch": 2.21, "grad_norm": 0.33021822571754456, "learning_rate": 3.388967518555378e-06, "loss": 0.0307, "step": 13238 }, { "epoch": 2.21, "grad_norm": 0.3099278211593628, "learning_rate": 3.387612803336061e-06, "loss": 0.0248, "step": 13239 }, { "epoch": 2.21, "grad_norm": 0.3571147620677948, "learning_rate": 3.386258303722009e-06, "loss": 0.0256, "step": 13240 }, { "epoch": 2.21, "grad_norm": 0.30433395504951477, "learning_rate": 3.3849040197573926e-06, "loss": 0.0346, "step": 13241 }, { "epoch": 2.21, "grad_norm": 0.48949742317199707, "learning_rate": 3.3835499514863633e-06, "loss": 0.0276, "step": 13242 }, { "epoch": 2.21, "grad_norm": 0.40252238512039185, "learning_rate": 3.382196098953077e-06, "loss": 0.0316, "step": 13243 }, { "epoch": 2.22, "grad_norm": 0.290179580450058, "learning_rate": 3.38084246220168e-06, "loss": 0.0354, "step": 13244 }, { "epoch": 2.22, "grad_norm": 0.45966798067092896, "learning_rate": 3.379489041276304e-06, "loss": 0.0369, "step": 13245 }, { "epoch": 2.22, "grad_norm": 0.4314534366130829, "learning_rate": 3.3781358362210827e-06, "loss": 0.0491, "step": 13246 }, { "epoch": 2.22, "grad_norm": 0.49768656492233276, "learning_rate": 3.3767828470801388e-06, "loss": 0.0471, "step": 13247 }, { "epoch": 2.22, "grad_norm": 0.5273480415344238, "learning_rate": 3.375430073897583e-06, "loss": 0.027, "step": 13248 }, { "epoch": 2.22, "grad_norm": 0.38893967866897583, "learning_rate": 3.3740775167175332e-06, "loss": 0.0283, "step": 13249 }, { "epoch": 2.22, "grad_norm": 0.2984258234500885, "learning_rate": 3.372725175584085e-06, "loss": 0.0267, "step": 13250 }, { "epoch": 2.22, "grad_norm": 0.30040469765663147, "learning_rate": 3.371373050541331e-06, "loss": 0.0394, "step": 13251 }, { "epoch": 2.22, "grad_norm": 0.29366129636764526, "learning_rate": 3.370021141633364e-06, "loss": 0.0234, "step": 13252 }, { "epoch": 2.22, "grad_norm": 0.36683332920074463, "learning_rate": 3.368669448904265e-06, "loss": 0.0339, "step": 13253 }, { "epoch": 2.22, "grad_norm": 0.31578463315963745, "learning_rate": 3.3673179723981033e-06, "loss": 0.0331, "step": 13254 }, { "epoch": 2.22, "grad_norm": 0.3581545948982239, "learning_rate": 3.3659667121589512e-06, "loss": 0.0318, "step": 13255 }, { "epoch": 2.22, "grad_norm": 0.3880528211593628, "learning_rate": 3.364615668230862e-06, "loss": 0.0222, "step": 13256 }, { "epoch": 2.22, "grad_norm": 0.29742127656936646, "learning_rate": 3.363264840657894e-06, "loss": 0.0225, "step": 13257 }, { "epoch": 2.22, "grad_norm": 0.444884717464447, "learning_rate": 3.36191422948409e-06, "loss": 0.0463, "step": 13258 }, { "epoch": 2.22, "grad_norm": 0.38532179594039917, "learning_rate": 3.3605638347534853e-06, "loss": 0.042, "step": 13259 }, { "epoch": 2.22, "grad_norm": 0.5698049664497375, "learning_rate": 3.3592136565101154e-06, "loss": 0.0387, "step": 13260 }, { "epoch": 2.22, "grad_norm": 0.4539645314216614, "learning_rate": 3.357863694798005e-06, "loss": 0.0293, "step": 13261 }, { "epoch": 2.22, "grad_norm": 0.40380826592445374, "learning_rate": 3.3565139496611665e-06, "loss": 0.0437, "step": 13262 }, { "epoch": 2.22, "grad_norm": 0.41048499941825867, "learning_rate": 3.3551644211436175e-06, "loss": 0.0383, "step": 13263 }, { "epoch": 2.22, "grad_norm": 0.3849761188030243, "learning_rate": 3.3538151092893567e-06, "loss": 0.0289, "step": 13264 }, { "epoch": 2.22, "grad_norm": 0.3551122844219208, "learning_rate": 3.352466014142377e-06, "loss": 0.0304, "step": 13265 }, { "epoch": 2.22, "grad_norm": 0.2884523570537567, "learning_rate": 3.3511171357466744e-06, "loss": 0.0264, "step": 13266 }, { "epoch": 2.22, "grad_norm": 0.32023730874061584, "learning_rate": 3.3497684741462223e-06, "loss": 0.0369, "step": 13267 }, { "epoch": 2.22, "grad_norm": 0.3760873079299927, "learning_rate": 3.348420029385001e-06, "loss": 0.0317, "step": 13268 }, { "epoch": 2.22, "grad_norm": 0.3310163915157318, "learning_rate": 3.3470718015069816e-06, "loss": 0.024, "step": 13269 }, { "epoch": 2.22, "grad_norm": 0.37444740533828735, "learning_rate": 3.345723790556117e-06, "loss": 0.0281, "step": 13270 }, { "epoch": 2.22, "grad_norm": 0.3899671733379364, "learning_rate": 3.3443759965763677e-06, "loss": 0.0235, "step": 13271 }, { "epoch": 2.22, "grad_norm": 0.3683837354183197, "learning_rate": 3.343028419611677e-06, "loss": 0.0312, "step": 13272 }, { "epoch": 2.22, "grad_norm": 0.45671576261520386, "learning_rate": 3.3416810597059813e-06, "loss": 0.0393, "step": 13273 }, { "epoch": 2.22, "grad_norm": 0.36489030718803406, "learning_rate": 3.340333916903219e-06, "loss": 0.0461, "step": 13274 }, { "epoch": 2.22, "grad_norm": 0.4210062325000763, "learning_rate": 3.3389869912473095e-06, "loss": 0.0496, "step": 13275 }, { "epoch": 2.22, "grad_norm": 0.5594897866249084, "learning_rate": 3.337640282782174e-06, "loss": 0.0374, "step": 13276 }, { "epoch": 2.22, "grad_norm": 0.4668857753276825, "learning_rate": 3.336293791551727e-06, "loss": 0.0464, "step": 13277 }, { "epoch": 2.22, "grad_norm": 0.3099829852581024, "learning_rate": 3.334947517599869e-06, "loss": 0.0256, "step": 13278 }, { "epoch": 2.22, "grad_norm": 0.43315303325653076, "learning_rate": 3.3336014609704934e-06, "loss": 0.0396, "step": 13279 }, { "epoch": 2.22, "grad_norm": 0.40434184670448303, "learning_rate": 3.3322556217074975e-06, "loss": 0.0314, "step": 13280 }, { "epoch": 2.22, "grad_norm": 0.2898862063884735, "learning_rate": 3.330909999854761e-06, "loss": 0.0269, "step": 13281 }, { "epoch": 2.22, "grad_norm": 0.4121706783771515, "learning_rate": 3.329564595456155e-06, "loss": 0.0303, "step": 13282 }, { "epoch": 2.22, "grad_norm": 0.5134109854698181, "learning_rate": 3.328219408555553e-06, "loss": 0.0426, "step": 13283 }, { "epoch": 2.22, "grad_norm": 0.4047839939594269, "learning_rate": 3.3268744391968157e-06, "loss": 0.0418, "step": 13284 }, { "epoch": 2.22, "grad_norm": 0.4342377185821533, "learning_rate": 3.3255296874237998e-06, "loss": 0.0368, "step": 13285 }, { "epoch": 2.22, "grad_norm": 0.44554272294044495, "learning_rate": 3.3241851532803502e-06, "loss": 0.0287, "step": 13286 }, { "epoch": 2.22, "grad_norm": 0.5241796374320984, "learning_rate": 3.322840836810305e-06, "loss": 0.0396, "step": 13287 }, { "epoch": 2.22, "grad_norm": 0.40531501173973083, "learning_rate": 3.3214967380575026e-06, "loss": 0.0377, "step": 13288 }, { "epoch": 2.22, "grad_norm": 0.37799301743507385, "learning_rate": 3.320152857065766e-06, "loss": 0.0431, "step": 13289 }, { "epoch": 2.22, "grad_norm": 0.5189914107322693, "learning_rate": 3.318809193878911e-06, "loss": 0.0263, "step": 13290 }, { "epoch": 2.22, "grad_norm": 0.326520711183548, "learning_rate": 3.3174657485407537e-06, "loss": 0.0235, "step": 13291 }, { "epoch": 2.22, "grad_norm": 0.5572400689125061, "learning_rate": 3.3161225210951e-06, "loss": 0.049, "step": 13292 }, { "epoch": 2.22, "grad_norm": 0.2822543680667877, "learning_rate": 3.314779511585742e-06, "loss": 0.0184, "step": 13293 }, { "epoch": 2.22, "grad_norm": 0.4475215971469879, "learning_rate": 3.3134367200564765e-06, "loss": 0.0383, "step": 13294 }, { "epoch": 2.22, "grad_norm": 0.40367600321769714, "learning_rate": 3.312094146551085e-06, "loss": 0.0239, "step": 13295 }, { "epoch": 2.22, "grad_norm": 0.3302610218524933, "learning_rate": 3.310751791113339e-06, "loss": 0.0357, "step": 13296 }, { "epoch": 2.22, "grad_norm": 0.27282583713531494, "learning_rate": 3.3094096537870145e-06, "loss": 0.0292, "step": 13297 }, { "epoch": 2.22, "grad_norm": 0.32372602820396423, "learning_rate": 3.3080677346158673e-06, "loss": 0.0398, "step": 13298 }, { "epoch": 2.22, "grad_norm": 0.4913117587566376, "learning_rate": 3.3067260336436603e-06, "loss": 0.0339, "step": 13299 }, { "epoch": 2.22, "grad_norm": 0.36876609921455383, "learning_rate": 3.3053845509141313e-06, "loss": 0.0272, "step": 13300 }, { "epoch": 2.22, "grad_norm": 0.43834757804870605, "learning_rate": 3.304043286471028e-06, "loss": 0.029, "step": 13301 }, { "epoch": 2.22, "grad_norm": 0.40844762325286865, "learning_rate": 3.3027022403580855e-06, "loss": 0.0368, "step": 13302 }, { "epoch": 2.22, "grad_norm": 0.3136124610900879, "learning_rate": 3.3013614126190274e-06, "loss": 0.033, "step": 13303 }, { "epoch": 2.23, "grad_norm": 0.3327239751815796, "learning_rate": 3.300020803297569e-06, "loss": 0.0389, "step": 13304 }, { "epoch": 2.23, "grad_norm": 0.4638666808605194, "learning_rate": 3.2986804124374306e-06, "loss": 0.0393, "step": 13305 }, { "epoch": 2.23, "grad_norm": 0.33933043479919434, "learning_rate": 3.297340240082313e-06, "loss": 0.0331, "step": 13306 }, { "epoch": 2.23, "grad_norm": 0.3651224970817566, "learning_rate": 3.296000286275911e-06, "loss": 0.034, "step": 13307 }, { "epoch": 2.23, "grad_norm": 0.2457253336906433, "learning_rate": 3.294660551061919e-06, "loss": 0.019, "step": 13308 }, { "epoch": 2.23, "grad_norm": 0.2790326476097107, "learning_rate": 3.293321034484025e-06, "loss": 0.0242, "step": 13309 }, { "epoch": 2.23, "grad_norm": 0.4389893412590027, "learning_rate": 3.2919817365858976e-06, "loss": 0.0543, "step": 13310 }, { "epoch": 2.23, "grad_norm": 0.35422542691230774, "learning_rate": 3.290642657411213e-06, "loss": 0.0363, "step": 13311 }, { "epoch": 2.23, "grad_norm": 0.3181045651435852, "learning_rate": 3.289303797003628e-06, "loss": 0.0453, "step": 13312 }, { "epoch": 2.23, "grad_norm": 0.25063613057136536, "learning_rate": 3.2879651554068037e-06, "loss": 0.0239, "step": 13313 }, { "epoch": 2.23, "grad_norm": 0.30080175399780273, "learning_rate": 3.2866267326643843e-06, "loss": 0.0294, "step": 13314 }, { "epoch": 2.23, "grad_norm": 0.5016169548034668, "learning_rate": 3.285288528820009e-06, "loss": 0.0412, "step": 13315 }, { "epoch": 2.23, "grad_norm": 0.49500757455825806, "learning_rate": 3.2839505439173135e-06, "loss": 0.0321, "step": 13316 }, { "epoch": 2.23, "grad_norm": 0.3302246630191803, "learning_rate": 3.2826127779999294e-06, "loss": 0.0332, "step": 13317 }, { "epoch": 2.23, "grad_norm": 0.4394969642162323, "learning_rate": 3.281275231111468e-06, "loss": 0.0369, "step": 13318 }, { "epoch": 2.23, "grad_norm": 0.42408308386802673, "learning_rate": 3.2799379032955494e-06, "loss": 0.0573, "step": 13319 }, { "epoch": 2.23, "grad_norm": 0.42660751938819885, "learning_rate": 3.278600794595774e-06, "loss": 0.0393, "step": 13320 }, { "epoch": 2.23, "grad_norm": 0.3973727524280548, "learning_rate": 3.2772639050557396e-06, "loss": 0.0374, "step": 13321 }, { "epoch": 2.23, "grad_norm": 0.330287367105484, "learning_rate": 3.275927234719041e-06, "loss": 0.0293, "step": 13322 }, { "epoch": 2.23, "grad_norm": 0.5117772221565247, "learning_rate": 3.2745907836292566e-06, "loss": 0.0345, "step": 13323 }, { "epoch": 2.23, "grad_norm": 0.3822733759880066, "learning_rate": 3.273254551829966e-06, "loss": 0.0334, "step": 13324 }, { "epoch": 2.23, "grad_norm": 0.5336891412734985, "learning_rate": 3.2719185393647425e-06, "loss": 0.0254, "step": 13325 }, { "epoch": 2.23, "grad_norm": 0.4590584933757782, "learning_rate": 3.2705827462771424e-06, "loss": 0.0452, "step": 13326 }, { "epoch": 2.23, "grad_norm": 0.40933310985565186, "learning_rate": 3.2692471726107256e-06, "loss": 0.035, "step": 13327 }, { "epoch": 2.23, "grad_norm": 0.3841865658760071, "learning_rate": 3.2679118184090377e-06, "loss": 0.0226, "step": 13328 }, { "epoch": 2.23, "grad_norm": 0.33366087079048157, "learning_rate": 3.2665766837156175e-06, "loss": 0.029, "step": 13329 }, { "epoch": 2.23, "grad_norm": 0.29019394516944885, "learning_rate": 3.2652417685740046e-06, "loss": 0.0331, "step": 13330 }, { "epoch": 2.23, "grad_norm": 0.4008951783180237, "learning_rate": 3.263907073027718e-06, "loss": 0.0362, "step": 13331 }, { "epoch": 2.23, "grad_norm": 0.5005243420600891, "learning_rate": 3.2625725971202816e-06, "loss": 0.0359, "step": 13332 }, { "epoch": 2.23, "grad_norm": 0.3554490804672241, "learning_rate": 3.261238340895211e-06, "loss": 0.0366, "step": 13333 }, { "epoch": 2.23, "grad_norm": 0.3887389004230499, "learning_rate": 3.2599043043960077e-06, "loss": 0.0343, "step": 13334 }, { "epoch": 2.23, "grad_norm": 0.37602218985557556, "learning_rate": 3.2585704876661662e-06, "loss": 0.0301, "step": 13335 }, { "epoch": 2.23, "grad_norm": 0.3281455338001251, "learning_rate": 3.2572368907491834e-06, "loss": 0.0385, "step": 13336 }, { "epoch": 2.23, "grad_norm": 0.35703495144844055, "learning_rate": 3.2559035136885386e-06, "loss": 0.0267, "step": 13337 }, { "epoch": 2.23, "grad_norm": 0.39804038405418396, "learning_rate": 3.254570356527712e-06, "loss": 0.0273, "step": 13338 }, { "epoch": 2.23, "grad_norm": 0.4339723587036133, "learning_rate": 3.2532374193101677e-06, "loss": 0.0277, "step": 13339 }, { "epoch": 2.23, "grad_norm": 0.3797188401222229, "learning_rate": 3.2519047020793713e-06, "loss": 0.0387, "step": 13340 }, { "epoch": 2.23, "grad_norm": 0.27882176637649536, "learning_rate": 3.2505722048787804e-06, "loss": 0.0332, "step": 13341 }, { "epoch": 2.23, "grad_norm": 0.41861990094184875, "learning_rate": 3.2492399277518394e-06, "loss": 0.0339, "step": 13342 }, { "epoch": 2.23, "grad_norm": 0.658546507358551, "learning_rate": 3.2479078707419863e-06, "loss": 0.0298, "step": 13343 }, { "epoch": 2.23, "grad_norm": 0.23762981593608856, "learning_rate": 3.246576033892661e-06, "loss": 0.0278, "step": 13344 }, { "epoch": 2.23, "grad_norm": 0.43178457021713257, "learning_rate": 3.2452444172472853e-06, "loss": 0.0466, "step": 13345 }, { "epoch": 2.23, "grad_norm": 0.3839981257915497, "learning_rate": 3.2439130208492765e-06, "loss": 0.0437, "step": 13346 }, { "epoch": 2.23, "grad_norm": 0.4728862941265106, "learning_rate": 3.2425818447420486e-06, "loss": 0.0292, "step": 13347 }, { "epoch": 2.23, "grad_norm": 0.381274551153183, "learning_rate": 3.24125088896901e-06, "loss": 0.0438, "step": 13348 }, { "epoch": 2.23, "grad_norm": 0.5494613647460938, "learning_rate": 3.2399201535735513e-06, "loss": 0.0369, "step": 13349 }, { "epoch": 2.23, "grad_norm": 0.3128369152545929, "learning_rate": 3.2385896385990714e-06, "loss": 0.0277, "step": 13350 }, { "epoch": 2.23, "grad_norm": 0.4262567162513733, "learning_rate": 3.2372593440889434e-06, "loss": 0.0376, "step": 13351 }, { "epoch": 2.23, "grad_norm": 0.3371572494506836, "learning_rate": 3.235929270086552e-06, "loss": 0.0338, "step": 13352 }, { "epoch": 2.23, "grad_norm": 0.5082088708877563, "learning_rate": 3.2345994166352623e-06, "loss": 0.0486, "step": 13353 }, { "epoch": 2.23, "grad_norm": 0.3031024932861328, "learning_rate": 3.2332697837784332e-06, "loss": 0.0417, "step": 13354 }, { "epoch": 2.23, "grad_norm": 0.5767612457275391, "learning_rate": 3.2319403715594245e-06, "loss": 0.0278, "step": 13355 }, { "epoch": 2.23, "grad_norm": 0.28220078349113464, "learning_rate": 3.230611180021577e-06, "loss": 0.0252, "step": 13356 }, { "epoch": 2.23, "grad_norm": 0.45206254720687866, "learning_rate": 3.2292822092082345e-06, "loss": 0.0383, "step": 13357 }, { "epoch": 2.23, "grad_norm": 0.3488272428512573, "learning_rate": 3.2279534591627325e-06, "loss": 0.0283, "step": 13358 }, { "epoch": 2.23, "grad_norm": 0.41741499304771423, "learning_rate": 3.2266249299283937e-06, "loss": 0.0326, "step": 13359 }, { "epoch": 2.23, "grad_norm": 0.48959654569625854, "learning_rate": 3.2252966215485336e-06, "loss": 0.0473, "step": 13360 }, { "epoch": 2.23, "grad_norm": 0.40994635224342346, "learning_rate": 3.2239685340664683e-06, "loss": 0.0457, "step": 13361 }, { "epoch": 2.23, "grad_norm": 0.25283190608024597, "learning_rate": 3.2226406675255006e-06, "loss": 0.0203, "step": 13362 }, { "epoch": 2.23, "grad_norm": 0.4026670455932617, "learning_rate": 3.2213130219689228e-06, "loss": 0.0328, "step": 13363 }, { "epoch": 2.24, "grad_norm": 0.41148918867111206, "learning_rate": 3.2199855974400284e-06, "loss": 0.0328, "step": 13364 }, { "epoch": 2.24, "grad_norm": 0.44026193022727966, "learning_rate": 3.2186583939820982e-06, "loss": 0.0401, "step": 13365 }, { "epoch": 2.24, "grad_norm": 0.34769803285598755, "learning_rate": 3.2173314116384123e-06, "loss": 0.0384, "step": 13366 }, { "epoch": 2.24, "grad_norm": 0.31168508529663086, "learning_rate": 3.216004650452236e-06, "loss": 0.0248, "step": 13367 }, { "epoch": 2.24, "grad_norm": 0.35344555974006653, "learning_rate": 3.2146781104668235e-06, "loss": 0.043, "step": 13368 }, { "epoch": 2.24, "grad_norm": 0.4259534776210785, "learning_rate": 3.2133517917254386e-06, "loss": 0.0325, "step": 13369 }, { "epoch": 2.24, "grad_norm": 0.4446331560611725, "learning_rate": 3.2120256942713213e-06, "loss": 0.0374, "step": 13370 }, { "epoch": 2.24, "grad_norm": 0.3868882954120636, "learning_rate": 3.21069981814771e-06, "loss": 0.0426, "step": 13371 }, { "epoch": 2.24, "grad_norm": 0.41046586632728577, "learning_rate": 3.209374163397838e-06, "loss": 0.0433, "step": 13372 }, { "epoch": 2.24, "grad_norm": 0.3733026385307312, "learning_rate": 3.208048730064933e-06, "loss": 0.032, "step": 13373 }, { "epoch": 2.24, "grad_norm": 0.3573446273803711, "learning_rate": 3.2067235181922084e-06, "loss": 0.0333, "step": 13374 }, { "epoch": 2.24, "grad_norm": 0.3546675741672516, "learning_rate": 3.2053985278228782e-06, "loss": 0.0419, "step": 13375 }, { "epoch": 2.24, "grad_norm": 0.44440317153930664, "learning_rate": 3.2040737590001437e-06, "loss": 0.0395, "step": 13376 }, { "epoch": 2.24, "grad_norm": 0.50343918800354, "learning_rate": 3.2027492117671964e-06, "loss": 0.0356, "step": 13377 }, { "epoch": 2.24, "grad_norm": 0.251313716173172, "learning_rate": 3.2014248861672325e-06, "loss": 0.0187, "step": 13378 }, { "epoch": 2.24, "grad_norm": 0.36285388469696045, "learning_rate": 3.200100782243426e-06, "loss": 0.027, "step": 13379 }, { "epoch": 2.24, "grad_norm": 0.34737491607666016, "learning_rate": 3.1987769000389544e-06, "loss": 0.0307, "step": 13380 }, { "epoch": 2.24, "grad_norm": 0.3093402087688446, "learning_rate": 3.197453239596988e-06, "loss": 0.0301, "step": 13381 }, { "epoch": 2.24, "grad_norm": 0.2746019661426544, "learning_rate": 3.19612980096068e-06, "loss": 0.0191, "step": 13382 }, { "epoch": 2.24, "grad_norm": 0.4008459448814392, "learning_rate": 3.194806584173189e-06, "loss": 0.0388, "step": 13383 }, { "epoch": 2.24, "grad_norm": 0.5622719526290894, "learning_rate": 3.193483589277656e-06, "loss": 0.0474, "step": 13384 }, { "epoch": 2.24, "grad_norm": 0.4263226389884949, "learning_rate": 3.1921608163172167e-06, "loss": 0.0255, "step": 13385 }, { "epoch": 2.24, "grad_norm": 0.3377062678337097, "learning_rate": 3.190838265335009e-06, "loss": 0.0247, "step": 13386 }, { "epoch": 2.24, "grad_norm": 0.38484489917755127, "learning_rate": 3.189515936374149e-06, "loss": 0.0316, "step": 13387 }, { "epoch": 2.24, "grad_norm": 0.34618815779685974, "learning_rate": 3.188193829477757e-06, "loss": 0.0343, "step": 13388 }, { "epoch": 2.24, "grad_norm": 0.3089808523654938, "learning_rate": 3.186871944688943e-06, "loss": 0.0244, "step": 13389 }, { "epoch": 2.24, "grad_norm": 0.3745706379413605, "learning_rate": 3.185550282050808e-06, "loss": 0.038, "step": 13390 }, { "epoch": 2.24, "grad_norm": 0.3826966881752014, "learning_rate": 3.1842288416064416e-06, "loss": 0.0396, "step": 13391 }, { "epoch": 2.24, "grad_norm": 0.5093250274658203, "learning_rate": 3.182907623398939e-06, "loss": 0.0436, "step": 13392 }, { "epoch": 2.24, "grad_norm": 0.5677410364151001, "learning_rate": 3.1815866274713713e-06, "loss": 0.0307, "step": 13393 }, { "epoch": 2.24, "grad_norm": 0.43669673800468445, "learning_rate": 3.1802658538668196e-06, "loss": 0.0334, "step": 13394 }, { "epoch": 2.24, "grad_norm": 0.3313051462173462, "learning_rate": 3.1789453026283434e-06, "loss": 0.0256, "step": 13395 }, { "epoch": 2.24, "grad_norm": 0.30179429054260254, "learning_rate": 3.177624973799003e-06, "loss": 0.0327, "step": 13396 }, { "epoch": 2.24, "grad_norm": 0.3683057427406311, "learning_rate": 3.1763048674218533e-06, "loss": 0.0396, "step": 13397 }, { "epoch": 2.24, "grad_norm": 0.3531959056854248, "learning_rate": 3.174984983539934e-06, "loss": 0.039, "step": 13398 }, { "epoch": 2.24, "grad_norm": 0.464842826128006, "learning_rate": 3.1736653221962787e-06, "loss": 0.0588, "step": 13399 }, { "epoch": 2.24, "grad_norm": 0.5886073708534241, "learning_rate": 3.1723458834339238e-06, "loss": 0.0456, "step": 13400 }, { "epoch": 2.24, "grad_norm": 0.30740684270858765, "learning_rate": 3.1710266672958877e-06, "loss": 0.028, "step": 13401 }, { "epoch": 2.24, "grad_norm": 0.3982119560241699, "learning_rate": 3.169707673825181e-06, "loss": 0.0349, "step": 13402 }, { "epoch": 2.24, "grad_norm": 0.5987967252731323, "learning_rate": 3.168388903064815e-06, "loss": 0.0344, "step": 13403 }, { "epoch": 2.24, "grad_norm": 0.4391082525253296, "learning_rate": 3.167070355057794e-06, "loss": 0.0456, "step": 13404 }, { "epoch": 2.24, "grad_norm": 0.2876587510108948, "learning_rate": 3.1657520298471034e-06, "loss": 0.0255, "step": 13405 }, { "epoch": 2.24, "grad_norm": 0.38916927576065063, "learning_rate": 3.1644339274757353e-06, "loss": 0.0306, "step": 13406 }, { "epoch": 2.24, "grad_norm": 0.37308022379875183, "learning_rate": 3.163116047986662e-06, "loss": 0.0317, "step": 13407 }, { "epoch": 2.24, "grad_norm": 0.4387173056602478, "learning_rate": 3.16179839142286e-06, "loss": 0.0318, "step": 13408 }, { "epoch": 2.24, "grad_norm": 0.3084182143211365, "learning_rate": 3.1604809578272923e-06, "loss": 0.0391, "step": 13409 }, { "epoch": 2.24, "grad_norm": 0.3163505792617798, "learning_rate": 3.1591637472429105e-06, "loss": 0.0359, "step": 13410 }, { "epoch": 2.24, "grad_norm": 0.22984661161899567, "learning_rate": 3.1578467597126706e-06, "loss": 0.0181, "step": 13411 }, { "epoch": 2.24, "grad_norm": 0.38507279753685, "learning_rate": 3.1565299952795093e-06, "loss": 0.0299, "step": 13412 }, { "epoch": 2.24, "grad_norm": 0.36320069432258606, "learning_rate": 3.155213453986362e-06, "loss": 0.0259, "step": 13413 }, { "epoch": 2.24, "grad_norm": 0.43315190076828003, "learning_rate": 3.1538971358761627e-06, "loss": 0.0355, "step": 13414 }, { "epoch": 2.24, "grad_norm": 0.3788485825061798, "learning_rate": 3.1525810409918255e-06, "loss": 0.024, "step": 13415 }, { "epoch": 2.24, "grad_norm": 0.369890034198761, "learning_rate": 3.1512651693762617e-06, "loss": 0.0369, "step": 13416 }, { "epoch": 2.24, "grad_norm": 0.5704896450042725, "learning_rate": 3.149949521072384e-06, "loss": 0.0306, "step": 13417 }, { "epoch": 2.24, "grad_norm": 0.5578597187995911, "learning_rate": 3.1486340961230864e-06, "loss": 0.0457, "step": 13418 }, { "epoch": 2.24, "grad_norm": 0.28786081075668335, "learning_rate": 3.1473188945712564e-06, "loss": 0.0292, "step": 13419 }, { "epoch": 2.24, "grad_norm": 0.38106051087379456, "learning_rate": 3.146003916459782e-06, "loss": 0.0272, "step": 13420 }, { "epoch": 2.24, "grad_norm": 0.41772764921188354, "learning_rate": 3.14468916183154e-06, "loss": 0.0375, "step": 13421 }, { "epoch": 2.24, "grad_norm": 0.4366404414176941, "learning_rate": 3.143374630729402e-06, "loss": 0.0308, "step": 13422 }, { "epoch": 2.25, "grad_norm": 0.35064539313316345, "learning_rate": 3.1420603231962267e-06, "loss": 0.0379, "step": 13423 }, { "epoch": 2.25, "grad_norm": 0.4029953181743622, "learning_rate": 3.1407462392748656e-06, "loss": 0.0251, "step": 13424 }, { "epoch": 2.25, "grad_norm": 0.315759539604187, "learning_rate": 3.1394323790081737e-06, "loss": 0.0328, "step": 13425 }, { "epoch": 2.25, "grad_norm": 0.3453792929649353, "learning_rate": 3.138118742438986e-06, "loss": 0.0367, "step": 13426 }, { "epoch": 2.25, "grad_norm": 0.35239177942276, "learning_rate": 3.136805329610133e-06, "loss": 0.0408, "step": 13427 }, { "epoch": 2.25, "grad_norm": 0.34002646803855896, "learning_rate": 3.1354921405644447e-06, "loss": 0.0359, "step": 13428 }, { "epoch": 2.25, "grad_norm": 0.7020300626754761, "learning_rate": 3.1341791753447402e-06, "loss": 0.0352, "step": 13429 }, { "epoch": 2.25, "grad_norm": 0.315573126077652, "learning_rate": 3.1328664339938252e-06, "loss": 0.036, "step": 13430 }, { "epoch": 2.25, "grad_norm": 0.3523479700088501, "learning_rate": 3.1315539165545096e-06, "loss": 0.0374, "step": 13431 }, { "epoch": 2.25, "grad_norm": 0.32891133427619934, "learning_rate": 3.1302416230695855e-06, "loss": 0.0364, "step": 13432 }, { "epoch": 2.25, "grad_norm": 0.4230121672153473, "learning_rate": 3.128929553581841e-06, "loss": 0.0372, "step": 13433 }, { "epoch": 2.25, "grad_norm": 0.369840145111084, "learning_rate": 3.127617708134062e-06, "loss": 0.0321, "step": 13434 }, { "epoch": 2.25, "grad_norm": 0.4379889667034149, "learning_rate": 3.1263060867690164e-06, "loss": 0.0403, "step": 13435 }, { "epoch": 2.25, "grad_norm": 0.3955335319042206, "learning_rate": 3.1249946895294767e-06, "loss": 0.0473, "step": 13436 }, { "epoch": 2.25, "grad_norm": 0.39361822605133057, "learning_rate": 3.1236835164582035e-06, "loss": 0.0313, "step": 13437 }, { "epoch": 2.25, "grad_norm": 0.6114875078201294, "learning_rate": 3.1223725675979443e-06, "loss": 0.043, "step": 13438 }, { "epoch": 2.25, "grad_norm": 0.38815852999687195, "learning_rate": 3.1210618429914507e-06, "loss": 0.027, "step": 13439 }, { "epoch": 2.25, "grad_norm": 0.34039267897605896, "learning_rate": 3.1197513426814563e-06, "loss": 0.0344, "step": 13440 }, { "epoch": 2.25, "grad_norm": 0.28668418526649475, "learning_rate": 3.118441066710689e-06, "loss": 0.0299, "step": 13441 }, { "epoch": 2.25, "grad_norm": 0.41113150119781494, "learning_rate": 3.1171310151218793e-06, "loss": 0.0352, "step": 13442 }, { "epoch": 2.25, "grad_norm": 0.4461744427680969, "learning_rate": 3.115821187957735e-06, "loss": 0.0304, "step": 13443 }, { "epoch": 2.25, "grad_norm": 0.3471422791481018, "learning_rate": 3.1145115852609687e-06, "loss": 0.0342, "step": 13444 }, { "epoch": 2.25, "grad_norm": 0.44081899523735046, "learning_rate": 3.113202207074284e-06, "loss": 0.0353, "step": 13445 }, { "epoch": 2.25, "grad_norm": 0.3998417258262634, "learning_rate": 3.1118930534403736e-06, "loss": 0.0272, "step": 13446 }, { "epoch": 2.25, "grad_norm": 0.4080335199832916, "learning_rate": 3.1105841244019196e-06, "loss": 0.0342, "step": 13447 }, { "epoch": 2.25, "grad_norm": 0.6176578998565674, "learning_rate": 3.1092754200016075e-06, "loss": 0.0245, "step": 13448 }, { "epoch": 2.25, "grad_norm": 0.4607607126235962, "learning_rate": 3.107966940282103e-06, "loss": 0.0372, "step": 13449 }, { "epoch": 2.25, "grad_norm": 0.35070228576660156, "learning_rate": 3.106658685286078e-06, "loss": 0.0319, "step": 13450 }, { "epoch": 2.25, "grad_norm": 0.3600362539291382, "learning_rate": 3.1053506550561818e-06, "loss": 0.0362, "step": 13451 }, { "epoch": 2.25, "grad_norm": 0.35385310649871826, "learning_rate": 3.1040428496350695e-06, "loss": 0.0336, "step": 13452 }, { "epoch": 2.25, "grad_norm": 0.2999829649925232, "learning_rate": 3.1027352690653857e-06, "loss": 0.0312, "step": 13453 }, { "epoch": 2.25, "grad_norm": 0.31532010436058044, "learning_rate": 3.101427913389763e-06, "loss": 0.0368, "step": 13454 }, { "epoch": 2.25, "grad_norm": 0.25231513381004333, "learning_rate": 3.100120782650825e-06, "loss": 0.0189, "step": 13455 }, { "epoch": 2.25, "grad_norm": 0.3428060710430145, "learning_rate": 3.0988138768912013e-06, "loss": 0.0243, "step": 13456 }, { "epoch": 2.25, "grad_norm": 0.4279554784297943, "learning_rate": 3.0975071961534996e-06, "loss": 0.0332, "step": 13457 }, { "epoch": 2.25, "grad_norm": 0.4176177978515625, "learning_rate": 3.096200740480324e-06, "loss": 0.0374, "step": 13458 }, { "epoch": 2.25, "grad_norm": 0.37272021174430847, "learning_rate": 3.094894509914279e-06, "loss": 0.0318, "step": 13459 }, { "epoch": 2.25, "grad_norm": 0.3749074339866638, "learning_rate": 3.0935885044979484e-06, "loss": 0.0308, "step": 13460 }, { "epoch": 2.25, "grad_norm": 0.31199249625205994, "learning_rate": 3.092282724273923e-06, "loss": 0.0295, "step": 13461 }, { "epoch": 2.25, "grad_norm": 0.44091299176216125, "learning_rate": 3.0909771692847778e-06, "loss": 0.0367, "step": 13462 }, { "epoch": 2.25, "grad_norm": 0.7230521440505981, "learning_rate": 3.0896718395730795e-06, "loss": 0.0626, "step": 13463 }, { "epoch": 2.25, "grad_norm": 0.3376844525337219, "learning_rate": 3.088366735181395e-06, "loss": 0.0334, "step": 13464 }, { "epoch": 2.25, "grad_norm": 0.4808341860771179, "learning_rate": 3.0870618561522747e-06, "loss": 0.0372, "step": 13465 }, { "epoch": 2.25, "grad_norm": 0.3357461094856262, "learning_rate": 3.0857572025282645e-06, "loss": 0.0242, "step": 13466 }, { "epoch": 2.25, "grad_norm": 0.3409620523452759, "learning_rate": 3.0844527743519094e-06, "loss": 0.0347, "step": 13467 }, { "epoch": 2.25, "grad_norm": 0.4048672318458557, "learning_rate": 3.083148571665736e-06, "loss": 0.0276, "step": 13468 }, { "epoch": 2.25, "grad_norm": 0.5205522179603577, "learning_rate": 3.0818445945122733e-06, "loss": 0.0321, "step": 13469 }, { "epoch": 2.25, "grad_norm": 0.41710036993026733, "learning_rate": 3.080540842934041e-06, "loss": 0.0319, "step": 13470 }, { "epoch": 2.25, "grad_norm": 0.3910416066646576, "learning_rate": 3.0792373169735466e-06, "loss": 0.038, "step": 13471 }, { "epoch": 2.25, "grad_norm": 0.3338453769683838, "learning_rate": 3.077934016673292e-06, "loss": 0.0246, "step": 13472 }, { "epoch": 2.25, "grad_norm": 0.717231810092926, "learning_rate": 3.0766309420757757e-06, "loss": 0.035, "step": 13473 }, { "epoch": 2.25, "grad_norm": 0.2640993893146515, "learning_rate": 3.0753280932234864e-06, "loss": 0.0214, "step": 13474 }, { "epoch": 2.25, "grad_norm": 0.46189597249031067, "learning_rate": 3.0740254701589e-06, "loss": 0.058, "step": 13475 }, { "epoch": 2.25, "grad_norm": 0.45948612689971924, "learning_rate": 3.072723072924495e-06, "loss": 0.0276, "step": 13476 }, { "epoch": 2.25, "grad_norm": 0.5602495670318604, "learning_rate": 3.071420901562735e-06, "loss": 0.0427, "step": 13477 }, { "epoch": 2.25, "grad_norm": 0.28323787450790405, "learning_rate": 3.070118956116085e-06, "loss": 0.0254, "step": 13478 }, { "epoch": 2.25, "grad_norm": 0.5505783557891846, "learning_rate": 3.068817236626992e-06, "loss": 0.0279, "step": 13479 }, { "epoch": 2.25, "grad_norm": 0.40528643131256104, "learning_rate": 3.0675157431378964e-06, "loss": 0.0295, "step": 13480 }, { "epoch": 2.25, "grad_norm": 0.6153066158294678, "learning_rate": 3.0662144756912425e-06, "loss": 0.0256, "step": 13481 }, { "epoch": 2.25, "grad_norm": 0.4176537096500397, "learning_rate": 3.0649134343294563e-06, "loss": 0.0325, "step": 13482 }, { "epoch": 2.26, "grad_norm": 0.48297107219696045, "learning_rate": 3.063612619094958e-06, "loss": 0.0479, "step": 13483 }, { "epoch": 2.26, "grad_norm": 0.3590956926345825, "learning_rate": 3.062312030030162e-06, "loss": 0.036, "step": 13484 }, { "epoch": 2.26, "grad_norm": 0.45338255167007446, "learning_rate": 3.0610116671774827e-06, "loss": 0.0423, "step": 13485 }, { "epoch": 2.26, "grad_norm": 0.40133053064346313, "learning_rate": 3.059711530579311e-06, "loss": 0.0379, "step": 13486 }, { "epoch": 2.26, "grad_norm": 0.462645560503006, "learning_rate": 3.058411620278048e-06, "loss": 0.0409, "step": 13487 }, { "epoch": 2.26, "grad_norm": 0.3694174885749817, "learning_rate": 3.0571119363160738e-06, "loss": 0.0337, "step": 13488 }, { "epoch": 2.26, "grad_norm": 0.5172454118728638, "learning_rate": 3.0558124787357648e-06, "loss": 0.0564, "step": 13489 }, { "epoch": 2.26, "grad_norm": 0.5568253993988037, "learning_rate": 3.0545132475794968e-06, "loss": 0.0217, "step": 13490 }, { "epoch": 2.26, "grad_norm": 0.40117478370666504, "learning_rate": 3.053214242889626e-06, "loss": 0.0277, "step": 13491 }, { "epoch": 2.26, "grad_norm": 0.4070936441421509, "learning_rate": 3.0519154647085127e-06, "loss": 0.0519, "step": 13492 }, { "epoch": 2.26, "grad_norm": 0.59710693359375, "learning_rate": 3.0506169130785067e-06, "loss": 0.0407, "step": 13493 }, { "epoch": 2.26, "grad_norm": 0.3937097489833832, "learning_rate": 3.0493185880419453e-06, "loss": 0.0296, "step": 13494 }, { "epoch": 2.26, "grad_norm": 0.3644331693649292, "learning_rate": 3.0480204896411645e-06, "loss": 0.0384, "step": 13495 }, { "epoch": 2.26, "grad_norm": 0.19718414545059204, "learning_rate": 3.0467226179184905e-06, "loss": 0.0175, "step": 13496 }, { "epoch": 2.26, "grad_norm": 0.6172083020210266, "learning_rate": 3.0454249729162377e-06, "loss": 0.0476, "step": 13497 }, { "epoch": 2.26, "grad_norm": 0.42901143431663513, "learning_rate": 3.0441275546767234e-06, "loss": 0.0459, "step": 13498 }, { "epoch": 2.26, "grad_norm": 0.2893863618373871, "learning_rate": 3.0428303632422464e-06, "loss": 0.0244, "step": 13499 }, { "epoch": 2.26, "grad_norm": 0.43290624022483826, "learning_rate": 3.0415333986551053e-06, "loss": 0.0391, "step": 13500 }, { "epoch": 2.26, "grad_norm": 0.3670434355735779, "learning_rate": 3.040236660957593e-06, "loss": 0.0332, "step": 13501 }, { "epoch": 2.26, "grad_norm": 0.42279067635536194, "learning_rate": 3.038940150191988e-06, "loss": 0.0466, "step": 13502 }, { "epoch": 2.26, "grad_norm": 0.4010894000530243, "learning_rate": 3.0376438664005615e-06, "loss": 0.0319, "step": 13503 }, { "epoch": 2.26, "grad_norm": 0.5518633723258972, "learning_rate": 3.0363478096255873e-06, "loss": 0.0394, "step": 13504 }, { "epoch": 2.26, "grad_norm": 0.3773404359817505, "learning_rate": 3.035051979909317e-06, "loss": 0.0341, "step": 13505 }, { "epoch": 2.26, "grad_norm": 0.329031378030777, "learning_rate": 3.0337563772940116e-06, "loss": 0.0418, "step": 13506 }, { "epoch": 2.26, "grad_norm": 0.313138872385025, "learning_rate": 3.032461001821907e-06, "loss": 0.029, "step": 13507 }, { "epoch": 2.26, "grad_norm": 0.31744393706321716, "learning_rate": 3.0311658535352486e-06, "loss": 0.0297, "step": 13508 }, { "epoch": 2.26, "grad_norm": 0.34224480390548706, "learning_rate": 3.0298709324762587e-06, "loss": 0.0395, "step": 13509 }, { "epoch": 2.26, "grad_norm": 0.6020793318748474, "learning_rate": 3.0285762386871674e-06, "loss": 0.0378, "step": 13510 }, { "epoch": 2.26, "grad_norm": 0.3802100419998169, "learning_rate": 3.027281772210184e-06, "loss": 0.0454, "step": 13511 }, { "epoch": 2.26, "grad_norm": 0.32753920555114746, "learning_rate": 3.025987533087521e-06, "loss": 0.0354, "step": 13512 }, { "epoch": 2.26, "grad_norm": 0.38788414001464844, "learning_rate": 3.024693521361376e-06, "loss": 0.0294, "step": 13513 }, { "epoch": 2.26, "grad_norm": 0.2952594757080078, "learning_rate": 3.0233997370739387e-06, "loss": 0.0261, "step": 13514 }, { "epoch": 2.26, "grad_norm": 0.4153158366680145, "learning_rate": 3.0221061802674013e-06, "loss": 0.0331, "step": 13515 }, { "epoch": 2.26, "grad_norm": 0.34153103828430176, "learning_rate": 3.0208128509839353e-06, "loss": 0.027, "step": 13516 }, { "epoch": 2.26, "grad_norm": 0.4316774606704712, "learning_rate": 3.0195197492657147e-06, "loss": 0.0478, "step": 13517 }, { "epoch": 2.26, "grad_norm": 0.34818267822265625, "learning_rate": 3.018226875154906e-06, "loss": 0.024, "step": 13518 }, { "epoch": 2.26, "grad_norm": 0.40313732624053955, "learning_rate": 3.0169342286936577e-06, "loss": 0.031, "step": 13519 }, { "epoch": 2.26, "grad_norm": 0.5718851089477539, "learning_rate": 3.015641809924126e-06, "loss": 0.039, "step": 13520 }, { "epoch": 2.26, "grad_norm": 0.45290255546569824, "learning_rate": 3.014349618888447e-06, "loss": 0.0538, "step": 13521 }, { "epoch": 2.26, "grad_norm": 0.3242068886756897, "learning_rate": 3.0130576556287515e-06, "loss": 0.0339, "step": 13522 }, { "epoch": 2.26, "grad_norm": 0.34414082765579224, "learning_rate": 3.011765920187174e-06, "loss": 0.0264, "step": 13523 }, { "epoch": 2.26, "grad_norm": 0.3947323262691498, "learning_rate": 3.010474412605824e-06, "loss": 0.0383, "step": 13524 }, { "epoch": 2.26, "grad_norm": 0.5006511807441711, "learning_rate": 3.0091831329268172e-06, "loss": 0.0383, "step": 13525 }, { "epoch": 2.26, "grad_norm": 0.30654269456863403, "learning_rate": 3.0078920811922617e-06, "loss": 0.0226, "step": 13526 }, { "epoch": 2.26, "grad_norm": 0.5030139088630676, "learning_rate": 3.0066012574442484e-06, "loss": 0.0501, "step": 13527 }, { "epoch": 2.26, "grad_norm": 0.33727574348449707, "learning_rate": 3.005310661724864e-06, "loss": 0.0244, "step": 13528 }, { "epoch": 2.26, "grad_norm": 0.5530507564544678, "learning_rate": 3.0040202940761975e-06, "loss": 0.0486, "step": 13529 }, { "epoch": 2.26, "grad_norm": 0.34641462564468384, "learning_rate": 3.0027301545403176e-06, "loss": 0.0381, "step": 13530 }, { "epoch": 2.26, "grad_norm": 0.34487777948379517, "learning_rate": 3.00144024315929e-06, "loss": 0.0288, "step": 13531 }, { "epoch": 2.26, "grad_norm": 0.3491571545600891, "learning_rate": 3.0001505599751755e-06, "loss": 0.0367, "step": 13532 }, { "epoch": 2.26, "grad_norm": 0.5011155009269714, "learning_rate": 2.9988611050300265e-06, "loss": 0.047, "step": 13533 }, { "epoch": 2.26, "grad_norm": 0.3753563463687897, "learning_rate": 2.9975718783658913e-06, "loss": 0.0387, "step": 13534 }, { "epoch": 2.26, "grad_norm": 1.9066280126571655, "learning_rate": 2.9962828800248013e-06, "loss": 0.0283, "step": 13535 }, { "epoch": 2.26, "grad_norm": 0.5656232237815857, "learning_rate": 2.9949941100487854e-06, "loss": 0.0313, "step": 13536 }, { "epoch": 2.26, "grad_norm": 0.42747578024864197, "learning_rate": 2.99370556847987e-06, "loss": 0.0308, "step": 13537 }, { "epoch": 2.26, "grad_norm": 0.3101271986961365, "learning_rate": 2.9924172553600673e-06, "loss": 0.0263, "step": 13538 }, { "epoch": 2.26, "grad_norm": 0.40032899379730225, "learning_rate": 2.9911291707313805e-06, "loss": 0.0351, "step": 13539 }, { "epoch": 2.26, "grad_norm": 0.34512025117874146, "learning_rate": 2.989841314635814e-06, "loss": 0.0422, "step": 13540 }, { "epoch": 2.26, "grad_norm": 0.3360998034477234, "learning_rate": 2.988553687115362e-06, "loss": 0.0221, "step": 13541 }, { "epoch": 2.26, "grad_norm": 0.39817023277282715, "learning_rate": 2.9872662882120016e-06, "loss": 0.0328, "step": 13542 }, { "epoch": 2.27, "grad_norm": 0.3585142493247986, "learning_rate": 2.985979117967719e-06, "loss": 0.0352, "step": 13543 }, { "epoch": 2.27, "grad_norm": 0.5983827114105225, "learning_rate": 2.9846921764244795e-06, "loss": 0.0401, "step": 13544 }, { "epoch": 2.27, "grad_norm": 0.2915457487106323, "learning_rate": 2.9834054636242426e-06, "loss": 0.0187, "step": 13545 }, { "epoch": 2.27, "grad_norm": 0.323381245136261, "learning_rate": 2.982118979608969e-06, "loss": 0.0248, "step": 13546 }, { "epoch": 2.27, "grad_norm": 0.40575921535491943, "learning_rate": 2.9808327244206e-06, "loss": 0.0298, "step": 13547 }, { "epoch": 2.27, "grad_norm": 0.4072294235229492, "learning_rate": 2.97954669810108e-06, "loss": 0.0319, "step": 13548 }, { "epoch": 2.27, "grad_norm": 0.570066511631012, "learning_rate": 2.9782609006923423e-06, "loss": 0.0336, "step": 13549 }, { "epoch": 2.27, "grad_norm": 0.26603102684020996, "learning_rate": 2.976975332236308e-06, "loss": 0.0178, "step": 13550 }, { "epoch": 2.27, "grad_norm": 0.38179340958595276, "learning_rate": 2.9756899927748995e-06, "loss": 0.0391, "step": 13551 }, { "epoch": 2.27, "grad_norm": 0.2848934531211853, "learning_rate": 2.974404882350025e-06, "loss": 0.0318, "step": 13552 }, { "epoch": 2.27, "grad_norm": 0.36376240849494934, "learning_rate": 2.973120001003582e-06, "loss": 0.0375, "step": 13553 }, { "epoch": 2.27, "grad_norm": 0.41845494508743286, "learning_rate": 2.9718353487774733e-06, "loss": 0.0341, "step": 13554 }, { "epoch": 2.27, "grad_norm": 0.4254073202610016, "learning_rate": 2.9705509257135814e-06, "loss": 0.0373, "step": 13555 }, { "epoch": 2.27, "grad_norm": 0.46661922335624695, "learning_rate": 2.969266731853787e-06, "loss": 0.0466, "step": 13556 }, { "epoch": 2.27, "grad_norm": 0.2544555962085724, "learning_rate": 2.9679827672399687e-06, "loss": 0.0321, "step": 13557 }, { "epoch": 2.27, "grad_norm": 0.342924565076828, "learning_rate": 2.966699031913988e-06, "loss": 0.019, "step": 13558 }, { "epoch": 2.27, "grad_norm": 0.33332645893096924, "learning_rate": 2.9654155259176974e-06, "loss": 0.0394, "step": 13559 }, { "epoch": 2.27, "grad_norm": 0.2707979083061218, "learning_rate": 2.9641322492929558e-06, "loss": 0.031, "step": 13560 }, { "epoch": 2.27, "grad_norm": 0.4546140432357788, "learning_rate": 2.962849202081599e-06, "loss": 0.0418, "step": 13561 }, { "epoch": 2.27, "grad_norm": 0.3377261459827423, "learning_rate": 2.961566384325469e-06, "loss": 0.0372, "step": 13562 }, { "epoch": 2.27, "grad_norm": 0.30534419417381287, "learning_rate": 2.9602837960663912e-06, "loss": 0.0324, "step": 13563 }, { "epoch": 2.27, "grad_norm": 0.3097570538520813, "learning_rate": 2.959001437346182e-06, "loss": 0.0297, "step": 13564 }, { "epoch": 2.27, "grad_norm": 0.2992710471153259, "learning_rate": 2.957719308206657e-06, "loss": 0.0344, "step": 13565 }, { "epoch": 2.27, "grad_norm": 0.4242086112499237, "learning_rate": 2.956437408689624e-06, "loss": 0.0383, "step": 13566 }, { "epoch": 2.27, "grad_norm": 0.3715399205684662, "learning_rate": 2.9551557388368777e-06, "loss": 0.0296, "step": 13567 }, { "epoch": 2.27, "grad_norm": 0.567457914352417, "learning_rate": 2.9538742986902124e-06, "loss": 0.0298, "step": 13568 }, { "epoch": 2.27, "grad_norm": 0.3032873570919037, "learning_rate": 2.95259308829141e-06, "loss": 0.0271, "step": 13569 }, { "epoch": 2.27, "grad_norm": 0.5491082668304443, "learning_rate": 2.9513121076822406e-06, "loss": 0.0456, "step": 13570 }, { "epoch": 2.27, "grad_norm": 0.32788175344467163, "learning_rate": 2.950031356904479e-06, "loss": 0.0423, "step": 13571 }, { "epoch": 2.27, "grad_norm": 0.3187093436717987, "learning_rate": 2.9487508359998797e-06, "loss": 0.0207, "step": 13572 }, { "epoch": 2.27, "grad_norm": 0.2868861258029938, "learning_rate": 2.9474705450102003e-06, "loss": 0.0407, "step": 13573 }, { "epoch": 2.27, "grad_norm": 0.2968709468841553, "learning_rate": 2.946190483977187e-06, "loss": 0.0287, "step": 13574 }, { "epoch": 2.27, "grad_norm": 0.3533129394054413, "learning_rate": 2.9449106529425732e-06, "loss": 0.0381, "step": 13575 }, { "epoch": 2.27, "grad_norm": 0.41034001111984253, "learning_rate": 2.9436310519480947e-06, "loss": 0.0263, "step": 13576 }, { "epoch": 2.27, "grad_norm": 0.454314649105072, "learning_rate": 2.9423516810354724e-06, "loss": 0.0474, "step": 13577 }, { "epoch": 2.27, "grad_norm": 0.39993101358413696, "learning_rate": 2.941072540246418e-06, "loss": 0.0347, "step": 13578 }, { "epoch": 2.27, "grad_norm": 0.48137137293815613, "learning_rate": 2.939793629622645e-06, "loss": 0.0356, "step": 13579 }, { "epoch": 2.27, "grad_norm": 0.33119869232177734, "learning_rate": 2.9385149492058496e-06, "loss": 0.0269, "step": 13580 }, { "epoch": 2.27, "grad_norm": 0.4560886323451996, "learning_rate": 2.937236499037727e-06, "loss": 0.0493, "step": 13581 }, { "epoch": 2.27, "grad_norm": 0.6138084530830383, "learning_rate": 2.935958279159965e-06, "loss": 0.0653, "step": 13582 }, { "epoch": 2.27, "grad_norm": 0.435450941324234, "learning_rate": 2.934680289614239e-06, "loss": 0.0473, "step": 13583 }, { "epoch": 2.27, "grad_norm": 0.3627241551876068, "learning_rate": 2.9334025304422166e-06, "loss": 0.0325, "step": 13584 }, { "epoch": 2.27, "grad_norm": 0.2535378038883209, "learning_rate": 2.9321250016855675e-06, "loss": 0.021, "step": 13585 }, { "epoch": 2.27, "grad_norm": 0.34730392694473267, "learning_rate": 2.930847703385943e-06, "loss": 0.0338, "step": 13586 }, { "epoch": 2.27, "grad_norm": 0.25637733936309814, "learning_rate": 2.9295706355849874e-06, "loss": 0.031, "step": 13587 }, { "epoch": 2.27, "grad_norm": 0.41634470224380493, "learning_rate": 2.9282937983243464e-06, "loss": 0.0344, "step": 13588 }, { "epoch": 2.27, "grad_norm": 0.41043201088905334, "learning_rate": 2.927017191645651e-06, "loss": 0.029, "step": 13589 }, { "epoch": 2.27, "grad_norm": 0.2687108814716339, "learning_rate": 2.9257408155905308e-06, "loss": 0.0323, "step": 13590 }, { "epoch": 2.27, "grad_norm": 0.4198879599571228, "learning_rate": 2.9244646702005995e-06, "loss": 0.0301, "step": 13591 }, { "epoch": 2.27, "grad_norm": 0.2799323499202728, "learning_rate": 2.9231887555174653e-06, "loss": 0.0249, "step": 13592 }, { "epoch": 2.27, "grad_norm": 0.31044623255729675, "learning_rate": 2.921913071582737e-06, "loss": 0.0384, "step": 13593 }, { "epoch": 2.27, "grad_norm": 0.40721502900123596, "learning_rate": 2.9206376184380057e-06, "loss": 0.0374, "step": 13594 }, { "epoch": 2.27, "grad_norm": 0.48394107818603516, "learning_rate": 2.9193623961248585e-06, "loss": 0.0491, "step": 13595 }, { "epoch": 2.27, "grad_norm": 0.34706220030784607, "learning_rate": 2.9180874046848762e-06, "loss": 0.0304, "step": 13596 }, { "epoch": 2.27, "grad_norm": 0.4131108224391937, "learning_rate": 2.9168126441596355e-06, "loss": 0.0449, "step": 13597 }, { "epoch": 2.27, "grad_norm": 0.410823792219162, "learning_rate": 2.9155381145906956e-06, "loss": 0.0367, "step": 13598 }, { "epoch": 2.27, "grad_norm": 0.3368914723396301, "learning_rate": 2.9142638160196213e-06, "loss": 0.0232, "step": 13599 }, { "epoch": 2.27, "grad_norm": 0.38443291187286377, "learning_rate": 2.912989748487958e-06, "loss": 0.0446, "step": 13600 }, { "epoch": 2.27, "grad_norm": 0.3854362368583679, "learning_rate": 2.911715912037245e-06, "loss": 0.022, "step": 13601 }, { "epoch": 2.27, "grad_norm": 0.3769015371799469, "learning_rate": 2.9104423067090247e-06, "loss": 0.0394, "step": 13602 }, { "epoch": 2.28, "grad_norm": 0.37542492151260376, "learning_rate": 2.909168932544819e-06, "loss": 0.0387, "step": 13603 }, { "epoch": 2.28, "grad_norm": 0.33627602458000183, "learning_rate": 2.9078957895861494e-06, "loss": 0.031, "step": 13604 }, { "epoch": 2.28, "grad_norm": 0.3908438980579376, "learning_rate": 2.906622877874532e-06, "loss": 0.0501, "step": 13605 }, { "epoch": 2.28, "grad_norm": 0.33757686614990234, "learning_rate": 2.9053501974514654e-06, "loss": 0.0324, "step": 13606 }, { "epoch": 2.28, "grad_norm": 0.532674252986908, "learning_rate": 2.9040777483584538e-06, "loss": 0.0331, "step": 13607 }, { "epoch": 2.28, "grad_norm": 0.3970150351524353, "learning_rate": 2.902805530636983e-06, "loss": 0.0418, "step": 13608 }, { "epoch": 2.28, "grad_norm": 0.33952534198760986, "learning_rate": 2.901533544328533e-06, "loss": 0.0384, "step": 13609 }, { "epoch": 2.28, "grad_norm": 0.48077648878097534, "learning_rate": 2.9002617894745834e-06, "loss": 0.0237, "step": 13610 }, { "epoch": 2.28, "grad_norm": 0.36501607298851013, "learning_rate": 2.898990266116597e-06, "loss": 0.0332, "step": 13611 }, { "epoch": 2.28, "grad_norm": 0.3092080056667328, "learning_rate": 2.8977189742960353e-06, "loss": 0.0226, "step": 13612 }, { "epoch": 2.28, "grad_norm": 0.29769083857536316, "learning_rate": 2.8964479140543544e-06, "loss": 0.0361, "step": 13613 }, { "epoch": 2.28, "grad_norm": 0.32202771306037903, "learning_rate": 2.8951770854329952e-06, "loss": 0.0351, "step": 13614 }, { "epoch": 2.28, "grad_norm": 0.31988444924354553, "learning_rate": 2.8939064884733904e-06, "loss": 0.0239, "step": 13615 }, { "epoch": 2.28, "grad_norm": 0.35704725980758667, "learning_rate": 2.8926361232169776e-06, "loss": 0.0432, "step": 13616 }, { "epoch": 2.28, "grad_norm": 0.4235333502292633, "learning_rate": 2.8913659897051716e-06, "loss": 0.0383, "step": 13617 }, { "epoch": 2.28, "grad_norm": 0.36947333812713623, "learning_rate": 2.8900960879793937e-06, "loss": 0.0252, "step": 13618 }, { "epoch": 2.28, "grad_norm": 0.41674351692199707, "learning_rate": 2.8888264180810455e-06, "loss": 0.0405, "step": 13619 }, { "epoch": 2.28, "grad_norm": 0.4497377574443817, "learning_rate": 2.8875569800515245e-06, "loss": 0.0461, "step": 13620 }, { "epoch": 2.28, "grad_norm": 0.3691634237766266, "learning_rate": 2.886287773932226e-06, "loss": 0.0416, "step": 13621 }, { "epoch": 2.28, "grad_norm": 0.43200966715812683, "learning_rate": 2.885018799764536e-06, "loss": 0.0332, "step": 13622 }, { "epoch": 2.28, "grad_norm": 0.5679542422294617, "learning_rate": 2.8837500575898257e-06, "loss": 0.047, "step": 13623 }, { "epoch": 2.28, "grad_norm": 0.3521616458892822, "learning_rate": 2.8824815474494704e-06, "loss": 0.0273, "step": 13624 }, { "epoch": 2.28, "grad_norm": 0.39075493812561035, "learning_rate": 2.881213269384827e-06, "loss": 0.0229, "step": 13625 }, { "epoch": 2.28, "grad_norm": 0.3763718903064728, "learning_rate": 2.8799452234372473e-06, "loss": 0.03, "step": 13626 }, { "epoch": 2.28, "grad_norm": 0.3355522155761719, "learning_rate": 2.878677409648084e-06, "loss": 0.0277, "step": 13627 }, { "epoch": 2.28, "grad_norm": 0.3906518816947937, "learning_rate": 2.8774098280586683e-06, "loss": 0.0402, "step": 13628 }, { "epoch": 2.28, "grad_norm": 0.3087627589702606, "learning_rate": 2.8761424787103354e-06, "loss": 0.0245, "step": 13629 }, { "epoch": 2.28, "grad_norm": 0.2751711905002594, "learning_rate": 2.8748753616444126e-06, "loss": 0.0314, "step": 13630 }, { "epoch": 2.28, "grad_norm": 0.350850373506546, "learning_rate": 2.8736084769022076e-06, "loss": 0.032, "step": 13631 }, { "epoch": 2.28, "grad_norm": 0.29880064725875854, "learning_rate": 2.872341824525037e-06, "loss": 0.0265, "step": 13632 }, { "epoch": 2.28, "grad_norm": 0.6423968076705933, "learning_rate": 2.8710754045541968e-06, "loss": 0.0496, "step": 13633 }, { "epoch": 2.28, "grad_norm": 0.3342387080192566, "learning_rate": 2.869809217030979e-06, "loss": 0.0283, "step": 13634 }, { "epoch": 2.28, "grad_norm": 0.4166753888130188, "learning_rate": 2.8685432619966734e-06, "loss": 0.0317, "step": 13635 }, { "epoch": 2.28, "grad_norm": 0.4006916880607605, "learning_rate": 2.8672775394925543e-06, "loss": 0.0317, "step": 13636 }, { "epoch": 2.28, "grad_norm": 0.492801308631897, "learning_rate": 2.866012049559893e-06, "loss": 0.0406, "step": 13637 }, { "epoch": 2.28, "grad_norm": 0.420688271522522, "learning_rate": 2.864746792239956e-06, "loss": 0.0407, "step": 13638 }, { "epoch": 2.28, "grad_norm": 0.41106539964675903, "learning_rate": 2.8634817675739968e-06, "loss": 0.0304, "step": 13639 }, { "epoch": 2.28, "grad_norm": 0.3785075545310974, "learning_rate": 2.862216975603259e-06, "loss": 0.0405, "step": 13640 }, { "epoch": 2.28, "grad_norm": 0.4461756646633148, "learning_rate": 2.8609524163689883e-06, "loss": 0.0362, "step": 13641 }, { "epoch": 2.28, "grad_norm": 0.3205185830593109, "learning_rate": 2.8596880899124156e-06, "loss": 0.0258, "step": 13642 }, { "epoch": 2.28, "grad_norm": 0.38049808144569397, "learning_rate": 2.858423996274763e-06, "loss": 0.0233, "step": 13643 }, { "epoch": 2.28, "grad_norm": 0.44789373874664307, "learning_rate": 2.857160135497249e-06, "loss": 0.0339, "step": 13644 }, { "epoch": 2.28, "grad_norm": 0.2884335517883301, "learning_rate": 2.8558965076210854e-06, "loss": 0.0293, "step": 13645 }, { "epoch": 2.28, "grad_norm": 0.3548007309436798, "learning_rate": 2.854633112687475e-06, "loss": 0.0298, "step": 13646 }, { "epoch": 2.28, "grad_norm": 0.3465059697628021, "learning_rate": 2.853369950737612e-06, "loss": 0.0352, "step": 13647 }, { "epoch": 2.28, "grad_norm": 0.4446744918823242, "learning_rate": 2.852107021812679e-06, "loss": 0.0429, "step": 13648 }, { "epoch": 2.28, "grad_norm": 0.3712809085845947, "learning_rate": 2.850844325953862e-06, "loss": 0.0392, "step": 13649 }, { "epoch": 2.28, "grad_norm": 0.513687252998352, "learning_rate": 2.8495818632023297e-06, "loss": 0.0408, "step": 13650 }, { "epoch": 2.28, "grad_norm": 0.5053980350494385, "learning_rate": 2.8483196335992426e-06, "loss": 0.0474, "step": 13651 }, { "epoch": 2.28, "grad_norm": 0.2554517388343811, "learning_rate": 2.8470576371857604e-06, "loss": 0.0332, "step": 13652 }, { "epoch": 2.28, "grad_norm": 0.31291231513023376, "learning_rate": 2.8457958740030358e-06, "loss": 0.0417, "step": 13653 }, { "epoch": 2.28, "grad_norm": 0.29735955595970154, "learning_rate": 2.8445343440922035e-06, "loss": 0.0204, "step": 13654 }, { "epoch": 2.28, "grad_norm": 0.32264330983161926, "learning_rate": 2.8432730474944048e-06, "loss": 0.0293, "step": 13655 }, { "epoch": 2.28, "grad_norm": 0.31578394770622253, "learning_rate": 2.842011984250761e-06, "loss": 0.0266, "step": 13656 }, { "epoch": 2.28, "grad_norm": 0.3140721321105957, "learning_rate": 2.840751154402388e-06, "loss": 0.0244, "step": 13657 }, { "epoch": 2.28, "grad_norm": 0.31498172879219055, "learning_rate": 2.839490557990403e-06, "loss": 0.0273, "step": 13658 }, { "epoch": 2.28, "grad_norm": 0.40196090936660767, "learning_rate": 2.8382301950559043e-06, "loss": 0.0265, "step": 13659 }, { "epoch": 2.28, "grad_norm": 0.4018767476081848, "learning_rate": 2.836970065639989e-06, "loss": 0.0242, "step": 13660 }, { "epoch": 2.28, "grad_norm": 0.3624589741230011, "learning_rate": 2.8357101697837496e-06, "loss": 0.0363, "step": 13661 }, { "epoch": 2.28, "grad_norm": 0.3824867606163025, "learning_rate": 2.8344505075282604e-06, "loss": 0.0308, "step": 13662 }, { "epoch": 2.29, "grad_norm": 0.2914028465747833, "learning_rate": 2.833191078914599e-06, "loss": 0.0215, "step": 13663 }, { "epoch": 2.29, "grad_norm": 0.38712480664253235, "learning_rate": 2.8319318839838293e-06, "loss": 0.0335, "step": 13664 }, { "epoch": 2.29, "grad_norm": 0.5955736041069031, "learning_rate": 2.8306729227770046e-06, "loss": 0.0418, "step": 13665 }, { "epoch": 2.29, "grad_norm": 0.36130183935165405, "learning_rate": 2.829414195335182e-06, "loss": 0.0352, "step": 13666 }, { "epoch": 2.29, "grad_norm": 0.318207323551178, "learning_rate": 2.8281557016993964e-06, "loss": 0.0326, "step": 13667 }, { "epoch": 2.29, "grad_norm": 0.3504277169704437, "learning_rate": 2.8268974419106908e-06, "loss": 0.0403, "step": 13668 }, { "epoch": 2.29, "grad_norm": 0.37460824847221375, "learning_rate": 2.825639416010085e-06, "loss": 0.0327, "step": 13669 }, { "epoch": 2.29, "grad_norm": 0.32082799077033997, "learning_rate": 2.8243816240386046e-06, "loss": 0.0241, "step": 13670 }, { "epoch": 2.29, "grad_norm": 0.2642388641834259, "learning_rate": 2.8231240660372563e-06, "loss": 0.0257, "step": 13671 }, { "epoch": 2.29, "grad_norm": 0.3700656592845917, "learning_rate": 2.821866742047048e-06, "loss": 0.0278, "step": 13672 }, { "epoch": 2.29, "grad_norm": 0.2476128190755844, "learning_rate": 2.8206096521089733e-06, "loss": 0.0231, "step": 13673 }, { "epoch": 2.29, "grad_norm": 0.7397556304931641, "learning_rate": 2.8193527962640256e-06, "loss": 0.0473, "step": 13674 }, { "epoch": 2.29, "grad_norm": 0.3528035283088684, "learning_rate": 2.818096174553184e-06, "loss": 0.0302, "step": 13675 }, { "epoch": 2.29, "grad_norm": 0.37908482551574707, "learning_rate": 2.816839787017418e-06, "loss": 0.0333, "step": 13676 }, { "epoch": 2.29, "grad_norm": 0.4876285195350647, "learning_rate": 2.815583633697698e-06, "loss": 0.0585, "step": 13677 }, { "epoch": 2.29, "grad_norm": 0.4772820770740509, "learning_rate": 2.8143277146349857e-06, "loss": 0.031, "step": 13678 }, { "epoch": 2.29, "grad_norm": 0.45790594816207886, "learning_rate": 2.813072029870225e-06, "loss": 0.0398, "step": 13679 }, { "epoch": 2.29, "grad_norm": 0.3578329086303711, "learning_rate": 2.8118165794443654e-06, "loss": 0.0408, "step": 13680 }, { "epoch": 2.29, "grad_norm": 0.34968462586402893, "learning_rate": 2.810561363398339e-06, "loss": 0.0319, "step": 13681 }, { "epoch": 2.29, "grad_norm": 0.521988570690155, "learning_rate": 2.8093063817730714e-06, "loss": 0.0544, "step": 13682 }, { "epoch": 2.29, "grad_norm": 0.3769441843032837, "learning_rate": 2.808051634609489e-06, "loss": 0.0367, "step": 13683 }, { "epoch": 2.29, "grad_norm": 0.30156034231185913, "learning_rate": 2.806797121948497e-06, "loss": 0.0219, "step": 13684 }, { "epoch": 2.29, "grad_norm": 0.396085649728775, "learning_rate": 2.805542843831005e-06, "loss": 0.0395, "step": 13685 }, { "epoch": 2.29, "grad_norm": 0.3934134840965271, "learning_rate": 2.804288800297913e-06, "loss": 0.0382, "step": 13686 }, { "epoch": 2.29, "grad_norm": 0.2841309905052185, "learning_rate": 2.8030349913901032e-06, "loss": 0.0225, "step": 13687 }, { "epoch": 2.29, "grad_norm": 0.5064019560813904, "learning_rate": 2.801781417148467e-06, "loss": 0.05, "step": 13688 }, { "epoch": 2.29, "grad_norm": 0.37779080867767334, "learning_rate": 2.800528077613872e-06, "loss": 0.0369, "step": 13689 }, { "epoch": 2.29, "grad_norm": 0.33527135848999023, "learning_rate": 2.799274972827184e-06, "loss": 0.0266, "step": 13690 }, { "epoch": 2.29, "grad_norm": 0.4101256728172302, "learning_rate": 2.7980221028292678e-06, "loss": 0.0351, "step": 13691 }, { "epoch": 2.29, "grad_norm": 0.30712148547172546, "learning_rate": 2.796769467660968e-06, "loss": 0.0387, "step": 13692 }, { "epoch": 2.29, "grad_norm": 0.38423165678977966, "learning_rate": 2.7955170673631327e-06, "loss": 0.0392, "step": 13693 }, { "epoch": 2.29, "grad_norm": 0.4436837136745453, "learning_rate": 2.7942649019766e-06, "loss": 0.0505, "step": 13694 }, { "epoch": 2.29, "grad_norm": 0.34607449173927307, "learning_rate": 2.793012971542195e-06, "loss": 0.0398, "step": 13695 }, { "epoch": 2.29, "grad_norm": 0.38441282510757446, "learning_rate": 2.791761276100736e-06, "loss": 0.0238, "step": 13696 }, { "epoch": 2.29, "grad_norm": 0.27645301818847656, "learning_rate": 2.790509815693043e-06, "loss": 0.0203, "step": 13697 }, { "epoch": 2.29, "grad_norm": 0.3323662281036377, "learning_rate": 2.789258590359918e-06, "loss": 0.0316, "step": 13698 }, { "epoch": 2.29, "grad_norm": 0.2388271689414978, "learning_rate": 2.7880076001421553e-06, "loss": 0.0238, "step": 13699 }, { "epoch": 2.29, "grad_norm": 0.3535931408405304, "learning_rate": 2.7867568450805484e-06, "loss": 0.0238, "step": 13700 }, { "epoch": 2.29, "grad_norm": 0.3381025195121765, "learning_rate": 2.785506325215879e-06, "loss": 0.0357, "step": 13701 }, { "epoch": 2.29, "grad_norm": 0.37035882472991943, "learning_rate": 2.784256040588925e-06, "loss": 0.0304, "step": 13702 }, { "epoch": 2.29, "grad_norm": 0.3701113760471344, "learning_rate": 2.7830059912404516e-06, "loss": 0.0268, "step": 13703 }, { "epoch": 2.29, "grad_norm": 0.41488713026046753, "learning_rate": 2.7817561772112144e-06, "loss": 0.0373, "step": 13704 }, { "epoch": 2.29, "grad_norm": 0.34911251068115234, "learning_rate": 2.780506598541971e-06, "loss": 0.0318, "step": 13705 }, { "epoch": 2.29, "grad_norm": 0.27592721581459045, "learning_rate": 2.779257255273463e-06, "loss": 0.0312, "step": 13706 }, { "epoch": 2.29, "grad_norm": 0.3763345181941986, "learning_rate": 2.778008147446424e-06, "loss": 0.0474, "step": 13707 }, { "epoch": 2.29, "grad_norm": 0.2637970447540283, "learning_rate": 2.7767592751015847e-06, "loss": 0.0252, "step": 13708 }, { "epoch": 2.29, "grad_norm": 0.39977097511291504, "learning_rate": 2.7755106382796714e-06, "loss": 0.031, "step": 13709 }, { "epoch": 2.29, "grad_norm": 0.3417561948299408, "learning_rate": 2.774262237021389e-06, "loss": 0.0391, "step": 13710 }, { "epoch": 2.29, "grad_norm": 0.3789261281490326, "learning_rate": 2.7730140713674504e-06, "loss": 0.0482, "step": 13711 }, { "epoch": 2.29, "grad_norm": 0.800144374370575, "learning_rate": 2.7717661413585497e-06, "loss": 0.0442, "step": 13712 }, { "epoch": 2.29, "grad_norm": 0.3188803493976593, "learning_rate": 2.770518447035375e-06, "loss": 0.0248, "step": 13713 }, { "epoch": 2.29, "grad_norm": 0.3513447046279907, "learning_rate": 2.769270988438616e-06, "loss": 0.0306, "step": 13714 }, { "epoch": 2.29, "grad_norm": 0.5790645480155945, "learning_rate": 2.7680237656089393e-06, "loss": 0.058, "step": 13715 }, { "epoch": 2.29, "grad_norm": 0.3040928244590759, "learning_rate": 2.766776778587017e-06, "loss": 0.0235, "step": 13716 }, { "epoch": 2.29, "grad_norm": 0.2775227725505829, "learning_rate": 2.7655300274135124e-06, "loss": 0.0263, "step": 13717 }, { "epoch": 2.29, "grad_norm": 0.6017504334449768, "learning_rate": 2.7642835121290678e-06, "loss": 0.04, "step": 13718 }, { "epoch": 2.29, "grad_norm": 0.44827800989151, "learning_rate": 2.763037232774337e-06, "loss": 0.0353, "step": 13719 }, { "epoch": 2.29, "grad_norm": 0.4358038306236267, "learning_rate": 2.761791189389953e-06, "loss": 0.0401, "step": 13720 }, { "epoch": 2.29, "grad_norm": 0.36162716150283813, "learning_rate": 2.7605453820165393e-06, "loss": 0.0229, "step": 13721 }, { "epoch": 2.3, "grad_norm": 0.3064143657684326, "learning_rate": 2.7592998106947253e-06, "loss": 0.0283, "step": 13722 }, { "epoch": 2.3, "grad_norm": 0.37002843618392944, "learning_rate": 2.7580544754651207e-06, "loss": 0.038, "step": 13723 }, { "epoch": 2.3, "grad_norm": 0.45265957713127136, "learning_rate": 2.756809376368329e-06, "loss": 0.0397, "step": 13724 }, { "epoch": 2.3, "grad_norm": 0.2908236086368561, "learning_rate": 2.7555645134449492e-06, "loss": 0.0247, "step": 13725 }, { "epoch": 2.3, "grad_norm": 0.3603435754776001, "learning_rate": 2.754319886735577e-06, "loss": 0.0359, "step": 13726 }, { "epoch": 2.3, "grad_norm": 0.41364362835884094, "learning_rate": 2.753075496280788e-06, "loss": 0.039, "step": 13727 }, { "epoch": 2.3, "grad_norm": 0.3081285357475281, "learning_rate": 2.751831342121163e-06, "loss": 0.0277, "step": 13728 }, { "epoch": 2.3, "grad_norm": 0.3990839123725891, "learning_rate": 2.750587424297263e-06, "loss": 0.0259, "step": 13729 }, { "epoch": 2.3, "grad_norm": 0.4249800145626068, "learning_rate": 2.749343742849654e-06, "loss": 0.0471, "step": 13730 }, { "epoch": 2.3, "grad_norm": 0.3544106185436249, "learning_rate": 2.748100297818884e-06, "loss": 0.0329, "step": 13731 }, { "epoch": 2.3, "grad_norm": 0.699871838092804, "learning_rate": 2.746857089245496e-06, "loss": 0.042, "step": 13732 }, { "epoch": 2.3, "grad_norm": 0.26088812947273254, "learning_rate": 2.7456141171700278e-06, "loss": 0.0181, "step": 13733 }, { "epoch": 2.3, "grad_norm": 0.47404924035072327, "learning_rate": 2.7443713816330108e-06, "loss": 0.0321, "step": 13734 }, { "epoch": 2.3, "grad_norm": 0.5270100831985474, "learning_rate": 2.743128882674961e-06, "loss": 0.0368, "step": 13735 }, { "epoch": 2.3, "grad_norm": 0.31436246633529663, "learning_rate": 2.741886620336398e-06, "loss": 0.028, "step": 13736 }, { "epoch": 2.3, "grad_norm": 0.3345237970352173, "learning_rate": 2.740644594657823e-06, "loss": 0.0243, "step": 13737 }, { "epoch": 2.3, "grad_norm": 0.3761143684387207, "learning_rate": 2.7394028056797307e-06, "loss": 0.0395, "step": 13738 }, { "epoch": 2.3, "grad_norm": 0.6174100041389465, "learning_rate": 2.738161253442618e-06, "loss": 0.0398, "step": 13739 }, { "epoch": 2.3, "grad_norm": 0.4225795269012451, "learning_rate": 2.7369199379869626e-06, "loss": 0.0314, "step": 13740 }, { "epoch": 2.3, "grad_norm": 0.39121493697166443, "learning_rate": 2.7356788593532403e-06, "loss": 0.0282, "step": 13741 }, { "epoch": 2.3, "grad_norm": 0.22908589243888855, "learning_rate": 2.7344380175819207e-06, "loss": 0.0142, "step": 13742 }, { "epoch": 2.3, "grad_norm": 0.25064530968666077, "learning_rate": 2.7331974127134587e-06, "loss": 0.0211, "step": 13743 }, { "epoch": 2.3, "grad_norm": 0.40210556983947754, "learning_rate": 2.7319570447883106e-06, "loss": 0.0337, "step": 13744 }, { "epoch": 2.3, "grad_norm": 0.32450634241104126, "learning_rate": 2.730716913846917e-06, "loss": 0.0329, "step": 13745 }, { "epoch": 2.3, "grad_norm": 0.3729095458984375, "learning_rate": 2.7294770199297115e-06, "loss": 0.0385, "step": 13746 }, { "epoch": 2.3, "grad_norm": 0.31110984086990356, "learning_rate": 2.7282373630771296e-06, "loss": 0.025, "step": 13747 }, { "epoch": 2.3, "grad_norm": 0.3583407700061798, "learning_rate": 2.7269979433295836e-06, "loss": 0.0379, "step": 13748 }, { "epoch": 2.3, "grad_norm": 0.4349491596221924, "learning_rate": 2.7257587607274905e-06, "loss": 0.045, "step": 13749 }, { "epoch": 2.3, "grad_norm": 0.307328462600708, "learning_rate": 2.7245198153112583e-06, "loss": 0.0282, "step": 13750 }, { "epoch": 2.3, "grad_norm": 0.3605751693248749, "learning_rate": 2.7232811071212816e-06, "loss": 0.0341, "step": 13751 }, { "epoch": 2.3, "grad_norm": 0.4315990209579468, "learning_rate": 2.722042636197947e-06, "loss": 0.0437, "step": 13752 }, { "epoch": 2.3, "grad_norm": 0.29031839966773987, "learning_rate": 2.7208044025816414e-06, "loss": 0.0258, "step": 13753 }, { "epoch": 2.3, "grad_norm": 0.371995747089386, "learning_rate": 2.719566406312738e-06, "loss": 0.0375, "step": 13754 }, { "epoch": 2.3, "grad_norm": 0.4096451699733734, "learning_rate": 2.7183286474315986e-06, "loss": 0.0308, "step": 13755 }, { "epoch": 2.3, "grad_norm": 0.4369931221008301, "learning_rate": 2.717091125978585e-06, "loss": 0.0284, "step": 13756 }, { "epoch": 2.3, "grad_norm": 0.40642282366752625, "learning_rate": 2.7158538419940494e-06, "loss": 0.0356, "step": 13757 }, { "epoch": 2.3, "grad_norm": 0.45341962575912476, "learning_rate": 2.7146167955183366e-06, "loss": 0.0383, "step": 13758 }, { "epoch": 2.3, "grad_norm": 0.39173373579978943, "learning_rate": 2.713379986591779e-06, "loss": 0.0288, "step": 13759 }, { "epoch": 2.3, "grad_norm": 0.39297589659690857, "learning_rate": 2.712143415254703e-06, "loss": 0.0241, "step": 13760 }, { "epoch": 2.3, "grad_norm": 0.4072245955467224, "learning_rate": 2.7109070815474326e-06, "loss": 0.0329, "step": 13761 }, { "epoch": 2.3, "grad_norm": 0.3361347019672394, "learning_rate": 2.7096709855102787e-06, "loss": 0.0245, "step": 13762 }, { "epoch": 2.3, "grad_norm": 0.41376155614852905, "learning_rate": 2.7084351271835417e-06, "loss": 0.0308, "step": 13763 }, { "epoch": 2.3, "grad_norm": 0.24474172294139862, "learning_rate": 2.707199506607522e-06, "loss": 0.0139, "step": 13764 }, { "epoch": 2.3, "grad_norm": 0.3976849317550659, "learning_rate": 2.705964123822511e-06, "loss": 0.0357, "step": 13765 }, { "epoch": 2.3, "grad_norm": 0.35220471024513245, "learning_rate": 2.7047289788687835e-06, "loss": 0.0258, "step": 13766 }, { "epoch": 2.3, "grad_norm": 0.5218302011489868, "learning_rate": 2.70349407178662e-06, "loss": 0.0294, "step": 13767 }, { "epoch": 2.3, "grad_norm": 0.4524267613887787, "learning_rate": 2.702259402616283e-06, "loss": 0.038, "step": 13768 }, { "epoch": 2.3, "grad_norm": 0.2540960907936096, "learning_rate": 2.7010249713980272e-06, "loss": 0.017, "step": 13769 }, { "epoch": 2.3, "grad_norm": 0.4588586688041687, "learning_rate": 2.699790778172109e-06, "loss": 0.0443, "step": 13770 }, { "epoch": 2.3, "grad_norm": 0.4622437059879303, "learning_rate": 2.698556822978765e-06, "loss": 0.0344, "step": 13771 }, { "epoch": 2.3, "grad_norm": 0.5508694648742676, "learning_rate": 2.697323105858235e-06, "loss": 0.0449, "step": 13772 }, { "epoch": 2.3, "grad_norm": 0.44812968373298645, "learning_rate": 2.696089626850741e-06, "loss": 0.0406, "step": 13773 }, { "epoch": 2.3, "grad_norm": 0.43253639340400696, "learning_rate": 2.6948563859965037e-06, "loss": 0.0322, "step": 13774 }, { "epoch": 2.3, "grad_norm": 0.5320484638214111, "learning_rate": 2.6936233833357394e-06, "loss": 0.0293, "step": 13775 }, { "epoch": 2.3, "grad_norm": 0.42062675952911377, "learning_rate": 2.692390618908647e-06, "loss": 0.0306, "step": 13776 }, { "epoch": 2.3, "grad_norm": 0.39644888043403625, "learning_rate": 2.6911580927554206e-06, "loss": 0.0281, "step": 13777 }, { "epoch": 2.3, "grad_norm": 0.3073815703392029, "learning_rate": 2.6899258049162536e-06, "loss": 0.0336, "step": 13778 }, { "epoch": 2.3, "grad_norm": 0.37129995226860046, "learning_rate": 2.688693755431324e-06, "loss": 0.0383, "step": 13779 }, { "epoch": 2.3, "grad_norm": 0.31048381328582764, "learning_rate": 2.6874619443408e-06, "loss": 0.0322, "step": 13780 }, { "epoch": 2.3, "grad_norm": 0.31437063217163086, "learning_rate": 2.6862303716848502e-06, "loss": 0.0265, "step": 13781 }, { "epoch": 2.31, "grad_norm": 0.3108297288417816, "learning_rate": 2.684999037503635e-06, "loss": 0.0332, "step": 13782 }, { "epoch": 2.31, "grad_norm": 0.3073256313800812, "learning_rate": 2.6837679418372977e-06, "loss": 0.0282, "step": 13783 }, { "epoch": 2.31, "grad_norm": 0.23947329819202423, "learning_rate": 2.6825370847259835e-06, "loss": 0.024, "step": 13784 }, { "epoch": 2.31, "grad_norm": 0.3822278678417206, "learning_rate": 2.681306466209823e-06, "loss": 0.0451, "step": 13785 }, { "epoch": 2.31, "grad_norm": 0.41176238656044006, "learning_rate": 2.6800760863289463e-06, "loss": 0.0458, "step": 13786 }, { "epoch": 2.31, "grad_norm": 0.21561458706855774, "learning_rate": 2.6788459451234692e-06, "loss": 0.0248, "step": 13787 }, { "epoch": 2.31, "grad_norm": 0.2785087525844574, "learning_rate": 2.6776160426334997e-06, "loss": 0.024, "step": 13788 }, { "epoch": 2.31, "grad_norm": 0.3919012248516083, "learning_rate": 2.676386378899142e-06, "loss": 0.0347, "step": 13789 }, { "epoch": 2.31, "grad_norm": 0.3300182819366455, "learning_rate": 2.6751569539604937e-06, "loss": 0.0395, "step": 13790 }, { "epoch": 2.31, "grad_norm": 0.3346627652645111, "learning_rate": 2.673927767857636e-06, "loss": 0.0301, "step": 13791 }, { "epoch": 2.31, "grad_norm": 0.307893306016922, "learning_rate": 2.672698820630656e-06, "loss": 0.0252, "step": 13792 }, { "epoch": 2.31, "grad_norm": 0.472791850566864, "learning_rate": 2.6714701123196196e-06, "loss": 0.0381, "step": 13793 }, { "epoch": 2.31, "grad_norm": 0.3904300332069397, "learning_rate": 2.6702416429645883e-06, "loss": 0.0312, "step": 13794 }, { "epoch": 2.31, "grad_norm": 0.36432766914367676, "learning_rate": 2.669013412605623e-06, "loss": 0.0351, "step": 13795 }, { "epoch": 2.31, "grad_norm": 0.36023396253585815, "learning_rate": 2.667785421282768e-06, "loss": 0.0368, "step": 13796 }, { "epoch": 2.31, "grad_norm": 0.3191891014575958, "learning_rate": 2.666557669036065e-06, "loss": 0.0326, "step": 13797 }, { "epoch": 2.31, "grad_norm": 0.5092860460281372, "learning_rate": 2.665330155905549e-06, "loss": 0.0557, "step": 13798 }, { "epoch": 2.31, "grad_norm": 0.5451717972755432, "learning_rate": 2.6641028819312397e-06, "loss": 0.0298, "step": 13799 }, { "epoch": 2.31, "grad_norm": 0.3116438686847687, "learning_rate": 2.662875847153159e-06, "loss": 0.0392, "step": 13800 }, { "epoch": 2.31, "grad_norm": 0.3289319574832916, "learning_rate": 2.6616490516113136e-06, "loss": 0.0242, "step": 13801 }, { "epoch": 2.31, "grad_norm": 0.42611923813819885, "learning_rate": 2.6604224953457016e-06, "loss": 0.0383, "step": 13802 }, { "epoch": 2.31, "grad_norm": 0.3314290940761566, "learning_rate": 2.6591961783963227e-06, "loss": 0.0285, "step": 13803 }, { "epoch": 2.31, "grad_norm": 0.405516654253006, "learning_rate": 2.6579701008031555e-06, "loss": 0.0462, "step": 13804 }, { "epoch": 2.31, "grad_norm": 0.37384268641471863, "learning_rate": 2.6567442626061814e-06, "loss": 0.0366, "step": 13805 }, { "epoch": 2.31, "grad_norm": 0.3895816504955292, "learning_rate": 2.6555186638453735e-06, "loss": 0.0354, "step": 13806 }, { "epoch": 2.31, "grad_norm": 0.27611881494522095, "learning_rate": 2.65429330456069e-06, "loss": 0.0283, "step": 13807 }, { "epoch": 2.31, "grad_norm": 0.5694555044174194, "learning_rate": 2.653068184792085e-06, "loss": 0.0504, "step": 13808 }, { "epoch": 2.31, "grad_norm": 0.5385879874229431, "learning_rate": 2.651843304579508e-06, "loss": 0.0383, "step": 13809 }, { "epoch": 2.31, "grad_norm": 0.34414446353912354, "learning_rate": 2.650618663962896e-06, "loss": 0.0331, "step": 13810 }, { "epoch": 2.31, "grad_norm": 0.4786912202835083, "learning_rate": 2.649394262982178e-06, "loss": 0.0337, "step": 13811 }, { "epoch": 2.31, "grad_norm": 0.36420565843582153, "learning_rate": 2.648170101677279e-06, "loss": 0.0288, "step": 13812 }, { "epoch": 2.31, "grad_norm": 0.31380200386047363, "learning_rate": 2.6469461800881137e-06, "loss": 0.0198, "step": 13813 }, { "epoch": 2.31, "grad_norm": 0.4029386341571808, "learning_rate": 2.6457224982545936e-06, "loss": 0.036, "step": 13814 }, { "epoch": 2.31, "grad_norm": 0.297554075717926, "learning_rate": 2.644499056216616e-06, "loss": 0.0277, "step": 13815 }, { "epoch": 2.31, "grad_norm": 0.4983212351799011, "learning_rate": 2.643275854014068e-06, "loss": 0.0624, "step": 13816 }, { "epoch": 2.31, "grad_norm": 0.2520585358142853, "learning_rate": 2.6420528916868416e-06, "loss": 0.0179, "step": 13817 }, { "epoch": 2.31, "grad_norm": 0.3046827018260956, "learning_rate": 2.6408301692748082e-06, "loss": 0.0391, "step": 13818 }, { "epoch": 2.31, "grad_norm": 0.43609651923179626, "learning_rate": 2.6396076868178343e-06, "loss": 0.047, "step": 13819 }, { "epoch": 2.31, "grad_norm": 0.36390116810798645, "learning_rate": 2.6383854443557845e-06, "loss": 0.038, "step": 13820 }, { "epoch": 2.31, "grad_norm": 0.3081905245780945, "learning_rate": 2.6371634419285118e-06, "loss": 0.0259, "step": 13821 }, { "epoch": 2.31, "grad_norm": 0.2755395472049713, "learning_rate": 2.6359416795758576e-06, "loss": 0.0249, "step": 13822 }, { "epoch": 2.31, "grad_norm": 0.30412063002586365, "learning_rate": 2.6347201573376634e-06, "loss": 0.0263, "step": 13823 }, { "epoch": 2.31, "grad_norm": 0.4109128415584564, "learning_rate": 2.6334988752537572e-06, "loss": 0.0474, "step": 13824 }, { "epoch": 2.31, "grad_norm": 0.3492664694786072, "learning_rate": 2.6322778333639554e-06, "loss": 0.0312, "step": 13825 }, { "epoch": 2.31, "grad_norm": 0.41431906819343567, "learning_rate": 2.6310570317080795e-06, "loss": 0.0479, "step": 13826 }, { "epoch": 2.31, "grad_norm": 0.4243521988391876, "learning_rate": 2.6298364703259283e-06, "loss": 0.0447, "step": 13827 }, { "epoch": 2.31, "grad_norm": 0.4259565770626068, "learning_rate": 2.6286161492573046e-06, "loss": 0.036, "step": 13828 }, { "epoch": 2.31, "grad_norm": 0.43180811405181885, "learning_rate": 2.6273960685419954e-06, "loss": 0.0366, "step": 13829 }, { "epoch": 2.31, "grad_norm": 0.34483596682548523, "learning_rate": 2.6261762282197844e-06, "loss": 0.0223, "step": 13830 }, { "epoch": 2.31, "grad_norm": 0.36728474497795105, "learning_rate": 2.6249566283304484e-06, "loss": 0.0393, "step": 13831 }, { "epoch": 2.31, "grad_norm": 0.5796356201171875, "learning_rate": 2.623737268913752e-06, "loss": 0.045, "step": 13832 }, { "epoch": 2.31, "grad_norm": 0.386829674243927, "learning_rate": 2.6225181500094498e-06, "loss": 0.0283, "step": 13833 }, { "epoch": 2.31, "grad_norm": 0.48626822233200073, "learning_rate": 2.6212992716573003e-06, "loss": 0.0433, "step": 13834 }, { "epoch": 2.31, "grad_norm": 0.316696435213089, "learning_rate": 2.620080633897043e-06, "loss": 0.0247, "step": 13835 }, { "epoch": 2.31, "grad_norm": 0.5081489682197571, "learning_rate": 2.618862236768409e-06, "loss": 0.0348, "step": 13836 }, { "epoch": 2.31, "grad_norm": 0.3699674606323242, "learning_rate": 2.6176440803111304e-06, "loss": 0.0308, "step": 13837 }, { "epoch": 2.31, "grad_norm": 0.3025261461734772, "learning_rate": 2.616426164564928e-06, "loss": 0.0302, "step": 13838 }, { "epoch": 2.31, "grad_norm": 0.4015662372112274, "learning_rate": 2.6152084895695095e-06, "loss": 0.0305, "step": 13839 }, { "epoch": 2.31, "grad_norm": 0.7637363076210022, "learning_rate": 2.613991055364583e-06, "loss": 0.0326, "step": 13840 }, { "epoch": 2.31, "grad_norm": 0.3467998802661896, "learning_rate": 2.6127738619898403e-06, "loss": 0.0346, "step": 13841 }, { "epoch": 2.32, "grad_norm": 0.4811633229255676, "learning_rate": 2.6115569094849725e-06, "loss": 0.0315, "step": 13842 }, { "epoch": 2.32, "grad_norm": 0.3778878450393677, "learning_rate": 2.61034019788966e-06, "loss": 0.0292, "step": 13843 }, { "epoch": 2.32, "grad_norm": 0.34995245933532715, "learning_rate": 2.609123727243571e-06, "loss": 0.0409, "step": 13844 }, { "epoch": 2.32, "grad_norm": 0.3371657431125641, "learning_rate": 2.6079074975863726e-06, "loss": 0.0252, "step": 13845 }, { "epoch": 2.32, "grad_norm": 0.46572908759117126, "learning_rate": 2.606691508957725e-06, "loss": 0.031, "step": 13846 }, { "epoch": 2.32, "grad_norm": 0.24145326018333435, "learning_rate": 2.6054757613972715e-06, "loss": 0.0208, "step": 13847 }, { "epoch": 2.32, "grad_norm": 0.34714680910110474, "learning_rate": 2.604260254944657e-06, "loss": 0.0341, "step": 13848 }, { "epoch": 2.32, "grad_norm": 0.42029374837875366, "learning_rate": 2.603044989639515e-06, "loss": 0.0367, "step": 13849 }, { "epoch": 2.32, "grad_norm": 0.28356143832206726, "learning_rate": 2.6018299655214652e-06, "loss": 0.0277, "step": 13850 }, { "epoch": 2.32, "grad_norm": 0.4400355815887451, "learning_rate": 2.6006151826301327e-06, "loss": 0.0451, "step": 13851 }, { "epoch": 2.32, "grad_norm": 0.7081102728843689, "learning_rate": 2.5994006410051188e-06, "loss": 0.0384, "step": 13852 }, { "epoch": 2.32, "grad_norm": 0.3280567526817322, "learning_rate": 2.598186340686031e-06, "loss": 0.032, "step": 13853 }, { "epoch": 2.32, "grad_norm": 0.41702625155448914, "learning_rate": 2.596972281712463e-06, "loss": 0.0278, "step": 13854 }, { "epoch": 2.32, "grad_norm": 0.3231724798679352, "learning_rate": 2.5957584641239975e-06, "loss": 0.0205, "step": 13855 }, { "epoch": 2.32, "grad_norm": 0.5066417455673218, "learning_rate": 2.5945448879602162e-06, "loss": 0.051, "step": 13856 }, { "epoch": 2.32, "grad_norm": 0.3953763544559479, "learning_rate": 2.5933315532606894e-06, "loss": 0.0462, "step": 13857 }, { "epoch": 2.32, "grad_norm": 0.33214306831359863, "learning_rate": 2.592118460064973e-06, "loss": 0.0272, "step": 13858 }, { "epoch": 2.32, "grad_norm": 0.39031070470809937, "learning_rate": 2.590905608412629e-06, "loss": 0.0295, "step": 13859 }, { "epoch": 2.32, "grad_norm": 0.28886348009109497, "learning_rate": 2.5896929983431984e-06, "loss": 0.0217, "step": 13860 }, { "epoch": 2.32, "grad_norm": 0.4015265107154846, "learning_rate": 2.588480629896223e-06, "loss": 0.0368, "step": 13861 }, { "epoch": 2.32, "grad_norm": 0.3352143466472626, "learning_rate": 2.5872685031112345e-06, "loss": 0.0208, "step": 13862 }, { "epoch": 2.32, "grad_norm": 0.4508304297924042, "learning_rate": 2.5860566180277557e-06, "loss": 0.0293, "step": 13863 }, { "epoch": 2.32, "grad_norm": 0.2912904918193817, "learning_rate": 2.5848449746852968e-06, "loss": 0.0325, "step": 13864 }, { "epoch": 2.32, "grad_norm": 0.29096719622612, "learning_rate": 2.5836335731233706e-06, "loss": 0.0285, "step": 13865 }, { "epoch": 2.32, "grad_norm": 0.2574653923511505, "learning_rate": 2.5824224133814722e-06, "loss": 0.0325, "step": 13866 }, { "epoch": 2.32, "grad_norm": 0.3691270053386688, "learning_rate": 2.5812114954990975e-06, "loss": 0.0437, "step": 13867 }, { "epoch": 2.32, "grad_norm": 0.3384428322315216, "learning_rate": 2.5800008195157257e-06, "loss": 0.0286, "step": 13868 }, { "epoch": 2.32, "grad_norm": 0.33526280522346497, "learning_rate": 2.578790385470833e-06, "loss": 0.0375, "step": 13869 }, { "epoch": 2.32, "grad_norm": 0.45490404963493347, "learning_rate": 2.5775801934038913e-06, "loss": 0.0476, "step": 13870 }, { "epoch": 2.32, "grad_norm": 0.4478640556335449, "learning_rate": 2.576370243354359e-06, "loss": 0.0427, "step": 13871 }, { "epoch": 2.32, "grad_norm": 0.28855305910110474, "learning_rate": 2.5751605353616827e-06, "loss": 0.0168, "step": 13872 }, { "epoch": 2.32, "grad_norm": 0.37996265292167664, "learning_rate": 2.5739510694653134e-06, "loss": 0.0309, "step": 13873 }, { "epoch": 2.32, "grad_norm": 0.7523597478866577, "learning_rate": 2.5727418457046836e-06, "loss": 0.0415, "step": 13874 }, { "epoch": 2.32, "grad_norm": 0.39442354440689087, "learning_rate": 2.5715328641192204e-06, "loss": 0.0316, "step": 13875 }, { "epoch": 2.32, "grad_norm": 0.23053519427776337, "learning_rate": 2.5703241247483456e-06, "loss": 0.0217, "step": 13876 }, { "epoch": 2.32, "grad_norm": 0.5103558301925659, "learning_rate": 2.5691156276314742e-06, "loss": 0.0239, "step": 13877 }, { "epoch": 2.32, "grad_norm": 0.47048741579055786, "learning_rate": 2.5679073728080072e-06, "loss": 0.0379, "step": 13878 }, { "epoch": 2.32, "grad_norm": 0.4965469539165497, "learning_rate": 2.5666993603173453e-06, "loss": 0.0317, "step": 13879 }, { "epoch": 2.32, "grad_norm": 0.5015219449996948, "learning_rate": 2.5654915901988707e-06, "loss": 0.0351, "step": 13880 }, { "epoch": 2.32, "grad_norm": 0.40392938256263733, "learning_rate": 2.5642840624919717e-06, "loss": 0.0286, "step": 13881 }, { "epoch": 2.32, "grad_norm": 0.5237239003181458, "learning_rate": 2.563076777236019e-06, "loss": 0.0371, "step": 13882 }, { "epoch": 2.32, "grad_norm": 0.27613723278045654, "learning_rate": 2.5618697344703723e-06, "loss": 0.0277, "step": 13883 }, { "epoch": 2.32, "grad_norm": 0.4347204267978668, "learning_rate": 2.560662934234397e-06, "loss": 0.0322, "step": 13884 }, { "epoch": 2.32, "grad_norm": 0.45171263813972473, "learning_rate": 2.559456376567434e-06, "loss": 0.0327, "step": 13885 }, { "epoch": 2.32, "grad_norm": 0.44852155447006226, "learning_rate": 2.55825006150883e-06, "loss": 0.0428, "step": 13886 }, { "epoch": 2.32, "grad_norm": 0.420209676027298, "learning_rate": 2.5570439890979204e-06, "loss": 0.0327, "step": 13887 }, { "epoch": 2.32, "grad_norm": 0.37068137526512146, "learning_rate": 2.5558381593740267e-06, "loss": 0.0304, "step": 13888 }, { "epoch": 2.32, "grad_norm": 0.44595444202423096, "learning_rate": 2.554632572376465e-06, "loss": 0.0342, "step": 13889 }, { "epoch": 2.32, "grad_norm": 0.35159891843795776, "learning_rate": 2.5534272281445517e-06, "loss": 0.0312, "step": 13890 }, { "epoch": 2.32, "grad_norm": 0.32776138186454773, "learning_rate": 2.5522221267175838e-06, "loss": 0.0233, "step": 13891 }, { "epoch": 2.32, "grad_norm": 0.3504365384578705, "learning_rate": 2.551017268134852e-06, "loss": 0.0369, "step": 13892 }, { "epoch": 2.32, "grad_norm": 0.22293949127197266, "learning_rate": 2.549812652435647e-06, "loss": 0.0229, "step": 13893 }, { "epoch": 2.32, "grad_norm": 0.4839061200618744, "learning_rate": 2.548608279659245e-06, "loss": 0.0495, "step": 13894 }, { "epoch": 2.32, "grad_norm": 0.34189704060554504, "learning_rate": 2.547404149844921e-06, "loss": 0.0276, "step": 13895 }, { "epoch": 2.32, "grad_norm": 0.5151146650314331, "learning_rate": 2.5462002630319315e-06, "loss": 0.0416, "step": 13896 }, { "epoch": 2.32, "grad_norm": 0.351636677980423, "learning_rate": 2.5449966192595312e-06, "loss": 0.0241, "step": 13897 }, { "epoch": 2.32, "grad_norm": 0.5245115160942078, "learning_rate": 2.543793218566969e-06, "loss": 0.0435, "step": 13898 }, { "epoch": 2.32, "grad_norm": 0.5136939883232117, "learning_rate": 2.5425900609934828e-06, "loss": 0.0293, "step": 13899 }, { "epoch": 2.32, "grad_norm": 0.3762044310569763, "learning_rate": 2.541387146578299e-06, "loss": 0.035, "step": 13900 }, { "epoch": 2.32, "grad_norm": 0.3129660487174988, "learning_rate": 2.540184475360644e-06, "loss": 0.0301, "step": 13901 }, { "epoch": 2.33, "grad_norm": 0.3924582302570343, "learning_rate": 2.5389820473797333e-06, "loss": 0.0313, "step": 13902 }, { "epoch": 2.33, "grad_norm": 0.3697280287742615, "learning_rate": 2.5377798626747707e-06, "loss": 0.0369, "step": 13903 }, { "epoch": 2.33, "grad_norm": 0.3948284089565277, "learning_rate": 2.5365779212849582e-06, "loss": 0.0424, "step": 13904 }, { "epoch": 2.33, "grad_norm": 0.297397643327713, "learning_rate": 2.535376223249485e-06, "loss": 0.0213, "step": 13905 }, { "epoch": 2.33, "grad_norm": 0.27365511655807495, "learning_rate": 2.534174768607531e-06, "loss": 0.029, "step": 13906 }, { "epoch": 2.33, "grad_norm": 0.36437490582466125, "learning_rate": 2.532973557398276e-06, "loss": 0.0462, "step": 13907 }, { "epoch": 2.33, "grad_norm": 0.5959230065345764, "learning_rate": 2.5317725896608827e-06, "loss": 0.0495, "step": 13908 }, { "epoch": 2.33, "grad_norm": 0.3183708190917969, "learning_rate": 2.5305718654345125e-06, "loss": 0.0353, "step": 13909 }, { "epoch": 2.33, "grad_norm": 0.36850136518478394, "learning_rate": 2.529371384758319e-06, "loss": 0.0296, "step": 13910 }, { "epoch": 2.33, "grad_norm": 0.5202484726905823, "learning_rate": 2.52817114767144e-06, "loss": 0.0521, "step": 13911 }, { "epoch": 2.33, "grad_norm": 0.5552360415458679, "learning_rate": 2.526971154213016e-06, "loss": 0.0489, "step": 13912 }, { "epoch": 2.33, "grad_norm": 0.4567587673664093, "learning_rate": 2.5257714044221727e-06, "loss": 0.037, "step": 13913 }, { "epoch": 2.33, "grad_norm": 0.3195502758026123, "learning_rate": 2.524571898338024e-06, "loss": 0.0371, "step": 13914 }, { "epoch": 2.33, "grad_norm": 0.28135690093040466, "learning_rate": 2.52337263599969e-06, "loss": 0.024, "step": 13915 }, { "epoch": 2.33, "grad_norm": 0.4262450933456421, "learning_rate": 2.5221736174462674e-06, "loss": 0.0269, "step": 13916 }, { "epoch": 2.33, "grad_norm": 0.47386959195137024, "learning_rate": 2.5209748427168547e-06, "loss": 0.0381, "step": 13917 }, { "epoch": 2.33, "grad_norm": 0.38667625188827515, "learning_rate": 2.5197763118505415e-06, "loss": 0.0356, "step": 13918 }, { "epoch": 2.33, "grad_norm": 0.34526750445365906, "learning_rate": 2.5185780248864054e-06, "loss": 0.028, "step": 13919 }, { "epoch": 2.33, "grad_norm": 0.4319245219230652, "learning_rate": 2.517379981863515e-06, "loss": 0.0357, "step": 13920 }, { "epoch": 2.33, "grad_norm": 0.23243387043476105, "learning_rate": 2.516182182820939e-06, "loss": 0.0184, "step": 13921 }, { "epoch": 2.33, "grad_norm": 0.36366546154022217, "learning_rate": 2.5149846277977287e-06, "loss": 0.028, "step": 13922 }, { "epoch": 2.33, "grad_norm": 0.2990114390850067, "learning_rate": 2.5137873168329363e-06, "loss": 0.042, "step": 13923 }, { "epoch": 2.33, "grad_norm": 0.36636123061180115, "learning_rate": 2.5125902499655964e-06, "loss": 0.0327, "step": 13924 }, { "epoch": 2.33, "grad_norm": 0.31391018629074097, "learning_rate": 2.511393427234745e-06, "loss": 0.0271, "step": 13925 }, { "epoch": 2.33, "grad_norm": 0.3792341649532318, "learning_rate": 2.510196848679407e-06, "loss": 0.0257, "step": 13926 }, { "epoch": 2.33, "grad_norm": 0.5009569525718689, "learning_rate": 2.5090005143385976e-06, "loss": 0.0384, "step": 13927 }, { "epoch": 2.33, "grad_norm": 0.38046589493751526, "learning_rate": 2.507804424251319e-06, "loss": 0.0291, "step": 13928 }, { "epoch": 2.33, "grad_norm": 0.3050980269908905, "learning_rate": 2.50660857845658e-06, "loss": 0.0257, "step": 13929 }, { "epoch": 2.33, "grad_norm": 0.28947722911834717, "learning_rate": 2.505412976993368e-06, "loss": 0.0235, "step": 13930 }, { "epoch": 2.33, "grad_norm": 0.3214515149593353, "learning_rate": 2.504217619900664e-06, "loss": 0.026, "step": 13931 }, { "epoch": 2.33, "grad_norm": 0.5162748098373413, "learning_rate": 2.503022507217452e-06, "loss": 0.0418, "step": 13932 }, { "epoch": 2.33, "grad_norm": 0.48076966404914856, "learning_rate": 2.5018276389826912e-06, "loss": 0.0255, "step": 13933 }, { "epoch": 2.33, "grad_norm": 0.3897458612918854, "learning_rate": 2.5006330152353475e-06, "loss": 0.0445, "step": 13934 }, { "epoch": 2.33, "grad_norm": 0.4735361635684967, "learning_rate": 2.499438636014375e-06, "loss": 0.0431, "step": 13935 }, { "epoch": 2.33, "grad_norm": 0.3588773012161255, "learning_rate": 2.498244501358712e-06, "loss": 0.035, "step": 13936 }, { "epoch": 2.33, "grad_norm": 0.37270674109458923, "learning_rate": 2.4970506113073013e-06, "loss": 0.0339, "step": 13937 }, { "epoch": 2.33, "grad_norm": 0.22453255951404572, "learning_rate": 2.495856965899066e-06, "loss": 0.0231, "step": 13938 }, { "epoch": 2.33, "grad_norm": 0.3567117750644684, "learning_rate": 2.4946635651729267e-06, "loss": 0.0259, "step": 13939 }, { "epoch": 2.33, "grad_norm": 0.30859482288360596, "learning_rate": 2.4934704091678e-06, "loss": 0.0345, "step": 13940 }, { "epoch": 2.33, "grad_norm": 0.5005421042442322, "learning_rate": 2.492277497922584e-06, "loss": 0.0291, "step": 13941 }, { "epoch": 2.33, "grad_norm": 0.40413883328437805, "learning_rate": 2.491084831476178e-06, "loss": 0.0395, "step": 13942 }, { "epoch": 2.33, "grad_norm": 0.22346676886081696, "learning_rate": 2.4898924098674748e-06, "loss": 0.0204, "step": 13943 }, { "epoch": 2.33, "grad_norm": 0.4914659857749939, "learning_rate": 2.4887002331353507e-06, "loss": 0.0346, "step": 13944 }, { "epoch": 2.33, "grad_norm": 0.32455509901046753, "learning_rate": 2.487508301318674e-06, "loss": 0.0213, "step": 13945 }, { "epoch": 2.33, "grad_norm": 0.3902626931667328, "learning_rate": 2.486316614456318e-06, "loss": 0.0353, "step": 13946 }, { "epoch": 2.33, "grad_norm": 0.26476359367370605, "learning_rate": 2.485125172587134e-06, "loss": 0.0208, "step": 13947 }, { "epoch": 2.33, "grad_norm": 1.0052375793457031, "learning_rate": 2.483933975749969e-06, "loss": 0.0365, "step": 13948 }, { "epoch": 2.33, "grad_norm": 0.4626111388206482, "learning_rate": 2.4827430239836647e-06, "loss": 0.0432, "step": 13949 }, { "epoch": 2.33, "grad_norm": 0.3893199563026428, "learning_rate": 2.481552317327055e-06, "loss": 0.0203, "step": 13950 }, { "epoch": 2.33, "grad_norm": 0.4128621220588684, "learning_rate": 2.4803618558189667e-06, "loss": 0.0393, "step": 13951 }, { "epoch": 2.33, "grad_norm": 0.37719255685806274, "learning_rate": 2.4791716394982134e-06, "loss": 0.0359, "step": 13952 }, { "epoch": 2.33, "grad_norm": 0.3537455201148987, "learning_rate": 2.477981668403601e-06, "loss": 0.0255, "step": 13953 }, { "epoch": 2.33, "grad_norm": 0.44610700011253357, "learning_rate": 2.4767919425739363e-06, "loss": 0.0369, "step": 13954 }, { "epoch": 2.33, "grad_norm": 0.28400754928588867, "learning_rate": 2.4756024620480068e-06, "loss": 0.0231, "step": 13955 }, { "epoch": 2.33, "grad_norm": 0.3520360291004181, "learning_rate": 2.474413226864596e-06, "loss": 0.0252, "step": 13956 }, { "epoch": 2.33, "grad_norm": 0.411355584859848, "learning_rate": 2.4732242370624835e-06, "loss": 0.0279, "step": 13957 }, { "epoch": 2.33, "grad_norm": 0.2886212468147278, "learning_rate": 2.47203549268044e-06, "loss": 0.0294, "step": 13958 }, { "epoch": 2.33, "grad_norm": 0.37315377593040466, "learning_rate": 2.4708469937572197e-06, "loss": 0.0415, "step": 13959 }, { "epoch": 2.33, "grad_norm": 0.46755483746528625, "learning_rate": 2.469658740331583e-06, "loss": 0.0358, "step": 13960 }, { "epoch": 2.34, "grad_norm": 0.4978993237018585, "learning_rate": 2.4684707324422684e-06, "loss": 0.0372, "step": 13961 }, { "epoch": 2.34, "grad_norm": 0.37714412808418274, "learning_rate": 2.467282970128012e-06, "loss": 0.0264, "step": 13962 }, { "epoch": 2.34, "grad_norm": 0.40270939469337463, "learning_rate": 2.466095453427547e-06, "loss": 0.024, "step": 13963 }, { "epoch": 2.34, "grad_norm": 0.38871335983276367, "learning_rate": 2.4649081823795882e-06, "loss": 0.0361, "step": 13964 }, { "epoch": 2.34, "grad_norm": 0.47727110981941223, "learning_rate": 2.4637211570228515e-06, "loss": 0.0413, "step": 13965 }, { "epoch": 2.34, "grad_norm": 0.3718266785144806, "learning_rate": 2.462534377396044e-06, "loss": 0.0292, "step": 13966 }, { "epoch": 2.34, "grad_norm": 0.3577624261379242, "learning_rate": 2.461347843537856e-06, "loss": 0.0302, "step": 13967 }, { "epoch": 2.34, "grad_norm": 0.3735325336456299, "learning_rate": 2.460161555486982e-06, "loss": 0.0374, "step": 13968 }, { "epoch": 2.34, "grad_norm": 0.5923203229904175, "learning_rate": 2.4589755132821e-06, "loss": 0.0323, "step": 13969 }, { "epoch": 2.34, "grad_norm": 0.27835747599601746, "learning_rate": 2.457789716961878e-06, "loss": 0.0254, "step": 13970 }, { "epoch": 2.34, "grad_norm": 0.5557839870452881, "learning_rate": 2.4566041665649887e-06, "loss": 0.0638, "step": 13971 }, { "epoch": 2.34, "grad_norm": 0.40986108779907227, "learning_rate": 2.45541886213008e-06, "loss": 0.0268, "step": 13972 }, { "epoch": 2.34, "grad_norm": 0.23437272012233734, "learning_rate": 2.4542338036958046e-06, "loss": 0.0135, "step": 13973 }, { "epoch": 2.34, "grad_norm": 0.44956544041633606, "learning_rate": 2.4530489913008047e-06, "loss": 0.0267, "step": 13974 }, { "epoch": 2.34, "grad_norm": 0.42478621006011963, "learning_rate": 2.451864424983712e-06, "loss": 0.0293, "step": 13975 }, { "epoch": 2.34, "grad_norm": 0.3908308744430542, "learning_rate": 2.4506801047831454e-06, "loss": 0.0244, "step": 13976 }, { "epoch": 2.34, "grad_norm": 0.4708782732486725, "learning_rate": 2.4494960307377282e-06, "loss": 0.0428, "step": 13977 }, { "epoch": 2.34, "grad_norm": 0.37907034158706665, "learning_rate": 2.448312202886063e-06, "loss": 0.0341, "step": 13978 }, { "epoch": 2.34, "grad_norm": 0.35431739687919617, "learning_rate": 2.4471286212667546e-06, "loss": 0.0263, "step": 13979 }, { "epoch": 2.34, "grad_norm": 0.3142175078392029, "learning_rate": 2.445945285918391e-06, "loss": 0.0322, "step": 13980 }, { "epoch": 2.34, "grad_norm": 0.31192219257354736, "learning_rate": 2.44476219687956e-06, "loss": 0.0239, "step": 13981 }, { "epoch": 2.34, "grad_norm": 0.4151293933391571, "learning_rate": 2.443579354188834e-06, "loss": 0.0357, "step": 13982 }, { "epoch": 2.34, "grad_norm": 0.4405708909034729, "learning_rate": 2.442396757884786e-06, "loss": 0.0343, "step": 13983 }, { "epoch": 2.34, "grad_norm": 0.2392731010913849, "learning_rate": 2.441214408005971e-06, "loss": 0.0177, "step": 13984 }, { "epoch": 2.34, "grad_norm": 0.2542657256126404, "learning_rate": 2.440032304590946e-06, "loss": 0.0212, "step": 13985 }, { "epoch": 2.34, "grad_norm": 0.3141447901725769, "learning_rate": 2.4388504476782515e-06, "loss": 0.0277, "step": 13986 }, { "epoch": 2.34, "grad_norm": 0.4060138761997223, "learning_rate": 2.4376688373064217e-06, "loss": 0.0226, "step": 13987 }, { "epoch": 2.34, "grad_norm": 0.2465556412935257, "learning_rate": 2.43648747351399e-06, "loss": 0.025, "step": 13988 }, { "epoch": 2.34, "grad_norm": 0.3561219573020935, "learning_rate": 2.435306356339471e-06, "loss": 0.0358, "step": 13989 }, { "epoch": 2.34, "grad_norm": 0.7526623606681824, "learning_rate": 2.434125485821377e-06, "loss": 0.0386, "step": 13990 }, { "epoch": 2.34, "grad_norm": 0.5209562182426453, "learning_rate": 2.4329448619982178e-06, "loss": 0.0285, "step": 13991 }, { "epoch": 2.34, "grad_norm": 0.3832319974899292, "learning_rate": 2.431764484908482e-06, "loss": 0.0388, "step": 13992 }, { "epoch": 2.34, "grad_norm": 0.3330345153808594, "learning_rate": 2.4305843545906626e-06, "loss": 0.0247, "step": 13993 }, { "epoch": 2.34, "grad_norm": 0.372175008058548, "learning_rate": 2.4294044710832377e-06, "loss": 0.0306, "step": 13994 }, { "epoch": 2.34, "grad_norm": 0.3071402907371521, "learning_rate": 2.428224834424674e-06, "loss": 0.0284, "step": 13995 }, { "epoch": 2.34, "grad_norm": 0.4460650384426117, "learning_rate": 2.427045444653443e-06, "loss": 0.036, "step": 13996 }, { "epoch": 2.34, "grad_norm": 0.5162351131439209, "learning_rate": 2.425866301807993e-06, "loss": 0.0296, "step": 13997 }, { "epoch": 2.34, "grad_norm": 0.290002703666687, "learning_rate": 2.4246874059267745e-06, "loss": 0.03, "step": 13998 }, { "epoch": 2.34, "grad_norm": 0.6105276346206665, "learning_rate": 2.4235087570482306e-06, "loss": 0.0547, "step": 13999 }, { "epoch": 2.34, "grad_norm": 0.3533940017223358, "learning_rate": 2.422330355210789e-06, "loss": 0.0345, "step": 14000 }, { "epoch": 2.34, "grad_norm": 0.26441553235054016, "learning_rate": 2.42115220045287e-06, "loss": 0.0232, "step": 14001 }, { "epoch": 2.34, "grad_norm": 0.4182604253292084, "learning_rate": 2.4199742928128946e-06, "loss": 0.0481, "step": 14002 }, { "epoch": 2.34, "grad_norm": 0.33120647072792053, "learning_rate": 2.4187966323292676e-06, "loss": 0.0272, "step": 14003 }, { "epoch": 2.34, "grad_norm": 0.3973997235298157, "learning_rate": 2.4176192190403856e-06, "loss": 0.0333, "step": 14004 }, { "epoch": 2.34, "grad_norm": 0.39603033661842346, "learning_rate": 2.416442052984641e-06, "loss": 0.0358, "step": 14005 }, { "epoch": 2.34, "grad_norm": 0.3659129738807678, "learning_rate": 2.4152651342004185e-06, "loss": 0.0439, "step": 14006 }, { "epoch": 2.34, "grad_norm": 0.40514087677001953, "learning_rate": 2.414088462726095e-06, "loss": 0.0267, "step": 14007 }, { "epoch": 2.34, "grad_norm": 0.9407734274864197, "learning_rate": 2.412912038600034e-06, "loss": 0.0324, "step": 14008 }, { "epoch": 2.34, "grad_norm": 0.31077954173088074, "learning_rate": 2.4117358618605923e-06, "loss": 0.0266, "step": 14009 }, { "epoch": 2.34, "grad_norm": 0.590040385723114, "learning_rate": 2.4105599325461272e-06, "loss": 0.0222, "step": 14010 }, { "epoch": 2.34, "grad_norm": 0.3300999104976654, "learning_rate": 2.409384250694976e-06, "loss": 0.0259, "step": 14011 }, { "epoch": 2.34, "grad_norm": 0.31004780530929565, "learning_rate": 2.408208816345472e-06, "loss": 0.0242, "step": 14012 }, { "epoch": 2.34, "grad_norm": 0.3711647689342499, "learning_rate": 2.407033629535945e-06, "loss": 0.0386, "step": 14013 }, { "epoch": 2.34, "grad_norm": 0.2726840674877167, "learning_rate": 2.4058586903047154e-06, "loss": 0.0178, "step": 14014 }, { "epoch": 2.34, "grad_norm": 0.34628885984420776, "learning_rate": 2.404683998690088e-06, "loss": 0.0215, "step": 14015 }, { "epoch": 2.34, "grad_norm": 0.44669249653816223, "learning_rate": 2.403509554730372e-06, "loss": 0.0278, "step": 14016 }, { "epoch": 2.34, "grad_norm": 0.37266165018081665, "learning_rate": 2.4023353584638577e-06, "loss": 0.0316, "step": 14017 }, { "epoch": 2.34, "grad_norm": 0.30077826976776123, "learning_rate": 2.401161409928827e-06, "loss": 0.0271, "step": 14018 }, { "epoch": 2.34, "grad_norm": 0.3097047805786133, "learning_rate": 2.3999877091635672e-06, "loss": 0.0271, "step": 14019 }, { "epoch": 2.34, "grad_norm": 0.38967421650886536, "learning_rate": 2.39881425620634e-06, "loss": 0.0423, "step": 14020 }, { "epoch": 2.35, "grad_norm": 0.3090655207633972, "learning_rate": 2.3976410510954094e-06, "loss": 0.0301, "step": 14021 }, { "epoch": 2.35, "grad_norm": 0.5007396340370178, "learning_rate": 2.3964680938690355e-06, "loss": 0.0304, "step": 14022 }, { "epoch": 2.35, "grad_norm": 0.5040550231933594, "learning_rate": 2.3952953845654546e-06, "loss": 0.0368, "step": 14023 }, { "epoch": 2.35, "grad_norm": 0.4013548493385315, "learning_rate": 2.3941229232229136e-06, "loss": 0.0379, "step": 14024 }, { "epoch": 2.35, "grad_norm": 0.2687729299068451, "learning_rate": 2.3929507098796357e-06, "loss": 0.0255, "step": 14025 }, { "epoch": 2.35, "grad_norm": 0.43270474672317505, "learning_rate": 2.3917787445738416e-06, "loss": 0.0345, "step": 14026 }, { "epoch": 2.35, "grad_norm": 0.4440552294254303, "learning_rate": 2.3906070273437508e-06, "loss": 0.0482, "step": 14027 }, { "epoch": 2.35, "grad_norm": 0.3352055549621582, "learning_rate": 2.3894355582275617e-06, "loss": 0.0253, "step": 14028 }, { "epoch": 2.35, "grad_norm": 0.3580501973628998, "learning_rate": 2.388264337263476e-06, "loss": 0.0432, "step": 14029 }, { "epoch": 2.35, "grad_norm": 0.4320530891418457, "learning_rate": 2.3870933644896833e-06, "loss": 0.0475, "step": 14030 }, { "epoch": 2.35, "grad_norm": 0.3870382606983185, "learning_rate": 2.3859226399443638e-06, "loss": 0.0308, "step": 14031 }, { "epoch": 2.35, "grad_norm": 0.3271328806877136, "learning_rate": 2.384752163665687e-06, "loss": 0.0381, "step": 14032 }, { "epoch": 2.35, "grad_norm": 0.38106194138526917, "learning_rate": 2.383581935691823e-06, "loss": 0.0307, "step": 14033 }, { "epoch": 2.35, "grad_norm": 0.307149201631546, "learning_rate": 2.3824119560609237e-06, "loss": 0.0269, "step": 14034 }, { "epoch": 2.35, "grad_norm": 0.41396594047546387, "learning_rate": 2.381242224811142e-06, "loss": 0.0245, "step": 14035 }, { "epoch": 2.35, "grad_norm": 0.3661651313304901, "learning_rate": 2.380072741980618e-06, "loss": 0.0317, "step": 14036 }, { "epoch": 2.35, "grad_norm": 0.6948790550231934, "learning_rate": 2.3789035076074797e-06, "loss": 0.0409, "step": 14037 }, { "epoch": 2.35, "grad_norm": 0.3873138129711151, "learning_rate": 2.377734521729853e-06, "loss": 0.0231, "step": 14038 }, { "epoch": 2.35, "grad_norm": 0.3499312102794647, "learning_rate": 2.37656578438586e-06, "loss": 0.0312, "step": 14039 }, { "epoch": 2.35, "grad_norm": 0.36065319180488586, "learning_rate": 2.375397295613602e-06, "loss": 0.0348, "step": 14040 }, { "epoch": 2.35, "grad_norm": 0.38785913586616516, "learning_rate": 2.3742290554511837e-06, "loss": 0.0307, "step": 14041 }, { "epoch": 2.35, "grad_norm": 0.558233380317688, "learning_rate": 2.3730610639366957e-06, "loss": 0.0479, "step": 14042 }, { "epoch": 2.35, "grad_norm": 0.48878321051597595, "learning_rate": 2.3718933211082185e-06, "loss": 0.0345, "step": 14043 }, { "epoch": 2.35, "grad_norm": 0.3806706666946411, "learning_rate": 2.3707258270038326e-06, "loss": 0.0323, "step": 14044 }, { "epoch": 2.35, "grad_norm": 0.7621597647666931, "learning_rate": 2.3695585816616006e-06, "loss": 0.0439, "step": 14045 }, { "epoch": 2.35, "grad_norm": 0.3127971589565277, "learning_rate": 2.368391585119586e-06, "loss": 0.03, "step": 14046 }, { "epoch": 2.35, "grad_norm": 0.26095056533813477, "learning_rate": 2.3672248374158413e-06, "loss": 0.0248, "step": 14047 }, { "epoch": 2.35, "grad_norm": 0.34004509449005127, "learning_rate": 2.366058338588404e-06, "loss": 0.033, "step": 14048 }, { "epoch": 2.35, "grad_norm": 0.32021111249923706, "learning_rate": 2.364892088675317e-06, "loss": 0.0327, "step": 14049 }, { "epoch": 2.35, "grad_norm": 0.5862702131271362, "learning_rate": 2.363726087714603e-06, "loss": 0.0421, "step": 14050 }, { "epoch": 2.35, "grad_norm": 0.3270474076271057, "learning_rate": 2.3625603357442784e-06, "loss": 0.0278, "step": 14051 }, { "epoch": 2.35, "grad_norm": 0.4167689383029938, "learning_rate": 2.3613948328023594e-06, "loss": 0.0464, "step": 14052 }, { "epoch": 2.35, "grad_norm": 0.37745150923728943, "learning_rate": 2.3602295789268427e-06, "loss": 0.0262, "step": 14053 }, { "epoch": 2.35, "grad_norm": 0.323198527097702, "learning_rate": 2.359064574155727e-06, "loss": 0.0256, "step": 14054 }, { "epoch": 2.35, "grad_norm": 0.3607637584209442, "learning_rate": 2.357899818527001e-06, "loss": 0.027, "step": 14055 }, { "epoch": 2.35, "grad_norm": 0.47339561581611633, "learning_rate": 2.3567353120786394e-06, "loss": 0.0372, "step": 14056 }, { "epoch": 2.35, "grad_norm": 0.4765718877315521, "learning_rate": 2.355571054848611e-06, "loss": 0.0359, "step": 14057 }, { "epoch": 2.35, "grad_norm": 0.29008039832115173, "learning_rate": 2.3544070468748823e-06, "loss": 0.02, "step": 14058 }, { "epoch": 2.35, "grad_norm": 0.3864631652832031, "learning_rate": 2.353243288195406e-06, "loss": 0.0311, "step": 14059 }, { "epoch": 2.35, "grad_norm": 0.2744472324848175, "learning_rate": 2.352079778848123e-06, "loss": 0.0264, "step": 14060 }, { "epoch": 2.35, "grad_norm": 0.4047777056694031, "learning_rate": 2.350916518870975e-06, "loss": 0.0436, "step": 14061 }, { "epoch": 2.35, "grad_norm": 0.3876960873603821, "learning_rate": 2.3497535083018917e-06, "loss": 0.0201, "step": 14062 }, { "epoch": 2.35, "grad_norm": 0.47463932633399963, "learning_rate": 2.348590747178797e-06, "loss": 0.0253, "step": 14063 }, { "epoch": 2.35, "grad_norm": 0.4415890574455261, "learning_rate": 2.3474282355396017e-06, "loss": 0.0312, "step": 14064 }, { "epoch": 2.35, "grad_norm": 0.3360542058944702, "learning_rate": 2.3462659734222082e-06, "loss": 0.0293, "step": 14065 }, { "epoch": 2.35, "grad_norm": 0.3751499652862549, "learning_rate": 2.3451039608645176e-06, "loss": 0.0321, "step": 14066 }, { "epoch": 2.35, "grad_norm": 0.498046338558197, "learning_rate": 2.3439421979044188e-06, "loss": 0.0381, "step": 14067 }, { "epoch": 2.35, "grad_norm": 0.35657382011413574, "learning_rate": 2.3427806845797873e-06, "loss": 0.0292, "step": 14068 }, { "epoch": 2.35, "grad_norm": 0.35575294494628906, "learning_rate": 2.341619420928499e-06, "loss": 0.0326, "step": 14069 }, { "epoch": 2.35, "grad_norm": 0.46874335408210754, "learning_rate": 2.3404584069884218e-06, "loss": 0.0316, "step": 14070 }, { "epoch": 2.35, "grad_norm": 0.3876386880874634, "learning_rate": 2.339297642797407e-06, "loss": 0.0292, "step": 14071 }, { "epoch": 2.35, "grad_norm": 0.29528144001960754, "learning_rate": 2.3381371283933076e-06, "loss": 0.029, "step": 14072 }, { "epoch": 2.35, "grad_norm": 0.47678735852241516, "learning_rate": 2.33697686381396e-06, "loss": 0.0396, "step": 14073 }, { "epoch": 2.35, "grad_norm": 0.4318673312664032, "learning_rate": 2.3358168490971942e-06, "loss": 0.0255, "step": 14074 }, { "epoch": 2.35, "grad_norm": 0.44380253553390503, "learning_rate": 2.33465708428084e-06, "loss": 0.0429, "step": 14075 }, { "epoch": 2.35, "grad_norm": 0.2790668308734894, "learning_rate": 2.333497569402707e-06, "loss": 0.0227, "step": 14076 }, { "epoch": 2.35, "grad_norm": 0.3411802351474762, "learning_rate": 2.3323383045006055e-06, "loss": 0.0278, "step": 14077 }, { "epoch": 2.35, "grad_norm": 0.3712235987186432, "learning_rate": 2.3311792896123374e-06, "loss": 0.0212, "step": 14078 }, { "epoch": 2.35, "grad_norm": 0.37741348147392273, "learning_rate": 2.330020524775688e-06, "loss": 0.03, "step": 14079 }, { "epoch": 2.35, "grad_norm": 0.502672553062439, "learning_rate": 2.3288620100284466e-06, "loss": 0.0406, "step": 14080 }, { "epoch": 2.36, "grad_norm": 0.21365949511528015, "learning_rate": 2.327703745408384e-06, "loss": 0.0148, "step": 14081 }, { "epoch": 2.36, "grad_norm": 0.40084609389305115, "learning_rate": 2.326545730953266e-06, "loss": 0.0325, "step": 14082 }, { "epoch": 2.36, "grad_norm": 0.3011649250984192, "learning_rate": 2.325387966700855e-06, "loss": 0.0204, "step": 14083 }, { "epoch": 2.36, "grad_norm": 0.43669867515563965, "learning_rate": 2.3242304526888958e-06, "loss": 0.0456, "step": 14084 }, { "epoch": 2.36, "grad_norm": 0.45965299010276794, "learning_rate": 2.3230731889551385e-06, "loss": 0.0373, "step": 14085 }, { "epoch": 2.36, "grad_norm": 0.2729390561580658, "learning_rate": 2.3219161755373087e-06, "loss": 0.0233, "step": 14086 }, { "epoch": 2.36, "grad_norm": 0.4110453426837921, "learning_rate": 2.320759412473139e-06, "loss": 0.0363, "step": 14087 }, { "epoch": 2.36, "grad_norm": 0.30374208092689514, "learning_rate": 2.3196028998003426e-06, "loss": 0.0356, "step": 14088 }, { "epoch": 2.36, "grad_norm": 0.5261257290840149, "learning_rate": 2.318446637556633e-06, "loss": 0.0328, "step": 14089 }, { "epoch": 2.36, "grad_norm": 0.932375967502594, "learning_rate": 2.317290625779707e-06, "loss": 0.0322, "step": 14090 }, { "epoch": 2.36, "grad_norm": 0.24914531409740448, "learning_rate": 2.3161348645072635e-06, "loss": 0.0163, "step": 14091 }, { "epoch": 2.36, "grad_norm": 0.37183016538619995, "learning_rate": 2.314979353776984e-06, "loss": 0.0239, "step": 14092 }, { "epoch": 2.36, "grad_norm": 0.3602280616760254, "learning_rate": 2.313824093626542e-06, "loss": 0.0304, "step": 14093 }, { "epoch": 2.36, "grad_norm": 0.3607347011566162, "learning_rate": 2.3126690840936107e-06, "loss": 0.0337, "step": 14094 }, { "epoch": 2.36, "grad_norm": 0.6341458559036255, "learning_rate": 2.3115143252158532e-06, "loss": 0.0381, "step": 14095 }, { "epoch": 2.36, "grad_norm": 0.37994909286499023, "learning_rate": 2.3103598170309148e-06, "loss": 0.0346, "step": 14096 }, { "epoch": 2.36, "grad_norm": 0.34140247106552124, "learning_rate": 2.3092055595764472e-06, "loss": 0.0269, "step": 14097 }, { "epoch": 2.36, "grad_norm": 0.28629985451698303, "learning_rate": 2.3080515528900825e-06, "loss": 0.0224, "step": 14098 }, { "epoch": 2.36, "grad_norm": 0.301557719707489, "learning_rate": 2.3068977970094464e-06, "loss": 0.0183, "step": 14099 }, { "epoch": 2.36, "grad_norm": 0.35629281401634216, "learning_rate": 2.305744291972163e-06, "loss": 0.0401, "step": 14100 }, { "epoch": 2.36, "grad_norm": 0.24001342058181763, "learning_rate": 2.3045910378158396e-06, "loss": 0.0274, "step": 14101 }, { "epoch": 2.36, "grad_norm": 0.27760908007621765, "learning_rate": 2.3034380345780817e-06, "loss": 0.0188, "step": 14102 }, { "epoch": 2.36, "grad_norm": 0.47629615664482117, "learning_rate": 2.302285282296487e-06, "loss": 0.0259, "step": 14103 }, { "epoch": 2.36, "grad_norm": 0.32018011808395386, "learning_rate": 2.301132781008637e-06, "loss": 0.0249, "step": 14104 }, { "epoch": 2.36, "grad_norm": 0.4332045316696167, "learning_rate": 2.299980530752116e-06, "loss": 0.0264, "step": 14105 }, { "epoch": 2.36, "grad_norm": 0.3638642132282257, "learning_rate": 2.298828531564492e-06, "loss": 0.0484, "step": 14106 }, { "epoch": 2.36, "grad_norm": 0.4388439953327179, "learning_rate": 2.297676783483325e-06, "loss": 0.0441, "step": 14107 }, { "epoch": 2.36, "grad_norm": 0.3904760479927063, "learning_rate": 2.2965252865461728e-06, "loss": 0.0258, "step": 14108 }, { "epoch": 2.36, "grad_norm": 0.39253273606300354, "learning_rate": 2.295374040790579e-06, "loss": 0.0341, "step": 14109 }, { "epoch": 2.36, "grad_norm": 0.350016713142395, "learning_rate": 2.2942230462540815e-06, "loss": 0.0237, "step": 14110 }, { "epoch": 2.36, "grad_norm": 0.4947206676006317, "learning_rate": 2.2930723029742143e-06, "loss": 0.0335, "step": 14111 }, { "epoch": 2.36, "grad_norm": 0.33664119243621826, "learning_rate": 2.2919218109884957e-06, "loss": 0.0341, "step": 14112 }, { "epoch": 2.36, "grad_norm": 0.41770273447036743, "learning_rate": 2.2907715703344346e-06, "loss": 0.0392, "step": 14113 }, { "epoch": 2.36, "grad_norm": 0.22474277019500732, "learning_rate": 2.289621581049545e-06, "loss": 0.0147, "step": 14114 }, { "epoch": 2.36, "grad_norm": 0.4451809525489807, "learning_rate": 2.288471843171318e-06, "loss": 0.0168, "step": 14115 }, { "epoch": 2.36, "grad_norm": 0.33779338002204895, "learning_rate": 2.2873223567372404e-06, "loss": 0.0286, "step": 14116 }, { "epoch": 2.36, "grad_norm": 0.3754691779613495, "learning_rate": 2.286173121784796e-06, "loss": 0.0288, "step": 14117 }, { "epoch": 2.36, "grad_norm": 0.38615912199020386, "learning_rate": 2.285024138351456e-06, "loss": 0.0264, "step": 14118 }, { "epoch": 2.36, "grad_norm": 0.3805471956729889, "learning_rate": 2.283875406474688e-06, "loss": 0.0299, "step": 14119 }, { "epoch": 2.36, "grad_norm": 0.3744926452636719, "learning_rate": 2.282726926191945e-06, "loss": 0.0407, "step": 14120 }, { "epoch": 2.36, "grad_norm": 0.441457062959671, "learning_rate": 2.2815786975406717e-06, "loss": 0.0221, "step": 14121 }, { "epoch": 2.36, "grad_norm": 0.3588917553424835, "learning_rate": 2.280430720558313e-06, "loss": 0.0363, "step": 14122 }, { "epoch": 2.36, "grad_norm": 0.3902863562107086, "learning_rate": 2.279282995282298e-06, "loss": 0.0267, "step": 14123 }, { "epoch": 2.36, "grad_norm": 0.3028041422367096, "learning_rate": 2.2781355217500454e-06, "loss": 0.032, "step": 14124 }, { "epoch": 2.36, "grad_norm": 0.3217846751213074, "learning_rate": 2.276988299998976e-06, "loss": 0.0232, "step": 14125 }, { "epoch": 2.36, "grad_norm": 0.28330641984939575, "learning_rate": 2.275841330066495e-06, "loss": 0.0257, "step": 14126 }, { "epoch": 2.36, "grad_norm": 0.4440337121486664, "learning_rate": 2.274694611989998e-06, "loss": 0.0219, "step": 14127 }, { "epoch": 2.36, "grad_norm": 0.3242470920085907, "learning_rate": 2.273548145806881e-06, "loss": 0.0267, "step": 14128 }, { "epoch": 2.36, "grad_norm": 0.34109190106391907, "learning_rate": 2.272401931554522e-06, "loss": 0.0285, "step": 14129 }, { "epoch": 2.36, "grad_norm": 0.28423815965652466, "learning_rate": 2.2712559692702916e-06, "loss": 0.0248, "step": 14130 }, { "epoch": 2.36, "grad_norm": 0.5763154029846191, "learning_rate": 2.2701102589915624e-06, "loss": 0.0445, "step": 14131 }, { "epoch": 2.36, "grad_norm": 0.7619557976722717, "learning_rate": 2.268964800755685e-06, "loss": 0.0556, "step": 14132 }, { "epoch": 2.36, "grad_norm": 0.4677782654762268, "learning_rate": 2.2678195946000113e-06, "loss": 0.0382, "step": 14133 }, { "epoch": 2.36, "grad_norm": 0.3158945143222809, "learning_rate": 2.2666746405618867e-06, "loss": 0.0294, "step": 14134 }, { "epoch": 2.36, "grad_norm": 0.3603004813194275, "learning_rate": 2.2655299386786357e-06, "loss": 0.0381, "step": 14135 }, { "epoch": 2.36, "grad_norm": 0.2980758249759674, "learning_rate": 2.2643854889875906e-06, "loss": 0.029, "step": 14136 }, { "epoch": 2.36, "grad_norm": 0.31844428181648254, "learning_rate": 2.2632412915260627e-06, "loss": 0.0282, "step": 14137 }, { "epoch": 2.36, "grad_norm": 0.6277550458908081, "learning_rate": 2.262097346331359e-06, "loss": 0.026, "step": 14138 }, { "epoch": 2.36, "grad_norm": 0.3178611695766449, "learning_rate": 2.260953653440785e-06, "loss": 0.0154, "step": 14139 }, { "epoch": 2.36, "grad_norm": 0.3219034671783447, "learning_rate": 2.2598102128916277e-06, "loss": 0.0271, "step": 14140 }, { "epoch": 2.37, "grad_norm": 0.3642657995223999, "learning_rate": 2.2586670247211684e-06, "loss": 0.0391, "step": 14141 }, { "epoch": 2.37, "grad_norm": 0.3970181345939636, "learning_rate": 2.2575240889666862e-06, "loss": 0.0277, "step": 14142 }, { "epoch": 2.37, "grad_norm": 0.2927248179912567, "learning_rate": 2.2563814056654497e-06, "loss": 0.0297, "step": 14143 }, { "epoch": 2.37, "grad_norm": 0.43491339683532715, "learning_rate": 2.255238974854711e-06, "loss": 0.036, "step": 14144 }, { "epoch": 2.37, "grad_norm": 0.25779497623443604, "learning_rate": 2.2540967965717286e-06, "loss": 0.028, "step": 14145 }, { "epoch": 2.37, "grad_norm": 0.5015851855278015, "learning_rate": 2.252954870853736e-06, "loss": 0.0553, "step": 14146 }, { "epoch": 2.37, "grad_norm": 0.4817598760128021, "learning_rate": 2.2518131977379763e-06, "loss": 0.0459, "step": 14147 }, { "epoch": 2.37, "grad_norm": 0.4096478521823883, "learning_rate": 2.250671777261669e-06, "loss": 0.0426, "step": 14148 }, { "epoch": 2.37, "grad_norm": 0.37696507573127747, "learning_rate": 2.2495306094620304e-06, "loss": 0.0312, "step": 14149 }, { "epoch": 2.37, "grad_norm": 0.3545052409172058, "learning_rate": 2.248389694376273e-06, "loss": 0.0249, "step": 14150 }, { "epoch": 2.37, "grad_norm": 0.3859403729438782, "learning_rate": 2.2472490320415997e-06, "loss": 0.0291, "step": 14151 }, { "epoch": 2.37, "grad_norm": 0.4060702919960022, "learning_rate": 2.2461086224951976e-06, "loss": 0.0338, "step": 14152 }, { "epoch": 2.37, "grad_norm": 0.45532816648483276, "learning_rate": 2.2449684657742575e-06, "loss": 0.0345, "step": 14153 }, { "epoch": 2.37, "grad_norm": 0.7206263542175293, "learning_rate": 2.2438285619159515e-06, "loss": 0.0278, "step": 14154 }, { "epoch": 2.37, "grad_norm": 0.5737968683242798, "learning_rate": 2.2426889109574466e-06, "loss": 0.029, "step": 14155 }, { "epoch": 2.37, "grad_norm": 0.371898353099823, "learning_rate": 2.2415495129359075e-06, "loss": 0.0297, "step": 14156 }, { "epoch": 2.37, "grad_norm": 0.4336886703968048, "learning_rate": 2.2404103678884793e-06, "loss": 0.042, "step": 14157 }, { "epoch": 2.37, "grad_norm": 0.27983400225639343, "learning_rate": 2.2392714758523094e-06, "loss": 0.0194, "step": 14158 }, { "epoch": 2.37, "grad_norm": 0.2513919770717621, "learning_rate": 2.238132836864534e-06, "loss": 0.0278, "step": 14159 }, { "epoch": 2.37, "grad_norm": 0.4581053853034973, "learning_rate": 2.2369944509622755e-06, "loss": 0.0371, "step": 14160 }, { "epoch": 2.37, "grad_norm": 0.40817490220069885, "learning_rate": 2.2358563181826578e-06, "loss": 0.0349, "step": 14161 }, { "epoch": 2.37, "grad_norm": 0.3177635073661804, "learning_rate": 2.2347184385627883e-06, "loss": 0.0193, "step": 14162 }, { "epoch": 2.37, "grad_norm": 0.49768972396850586, "learning_rate": 2.2335808121397663e-06, "loss": 0.0342, "step": 14163 }, { "epoch": 2.37, "grad_norm": 0.3427460491657257, "learning_rate": 2.232443438950691e-06, "loss": 0.0257, "step": 14164 }, { "epoch": 2.37, "grad_norm": 0.35986706614494324, "learning_rate": 2.231306319032641e-06, "loss": 0.0475, "step": 14165 }, { "epoch": 2.37, "grad_norm": 0.5508844256401062, "learning_rate": 2.230169452422699e-06, "loss": 0.038, "step": 14166 }, { "epoch": 2.37, "grad_norm": 0.28966090083122253, "learning_rate": 2.229032839157935e-06, "loss": 0.0193, "step": 14167 }, { "epoch": 2.37, "grad_norm": 0.38308069109916687, "learning_rate": 2.2278964792754076e-06, "loss": 0.0232, "step": 14168 }, { "epoch": 2.37, "grad_norm": 0.3105992078781128, "learning_rate": 2.2267603728121668e-06, "loss": 0.0282, "step": 14169 }, { "epoch": 2.37, "grad_norm": 0.4389958083629608, "learning_rate": 2.2256245198052607e-06, "loss": 0.0387, "step": 14170 }, { "epoch": 2.37, "grad_norm": 0.4849025011062622, "learning_rate": 2.224488920291724e-06, "loss": 0.0332, "step": 14171 }, { "epoch": 2.37, "grad_norm": 0.5976356267929077, "learning_rate": 2.223353574308581e-06, "loss": 0.0458, "step": 14172 }, { "epoch": 2.37, "grad_norm": 0.28783828020095825, "learning_rate": 2.2222184818928537e-06, "loss": 0.028, "step": 14173 }, { "epoch": 2.37, "grad_norm": 0.40121757984161377, "learning_rate": 2.2210836430815543e-06, "loss": 0.0206, "step": 14174 }, { "epoch": 2.37, "grad_norm": 0.4454249143600464, "learning_rate": 2.2199490579116866e-06, "loss": 0.0388, "step": 14175 }, { "epoch": 2.37, "grad_norm": 0.2559739947319031, "learning_rate": 2.218814726420244e-06, "loss": 0.0249, "step": 14176 }, { "epoch": 2.37, "grad_norm": 0.292814701795578, "learning_rate": 2.2176806486442093e-06, "loss": 0.018, "step": 14177 }, { "epoch": 2.37, "grad_norm": 0.3724900484085083, "learning_rate": 2.2165468246205667e-06, "loss": 0.0271, "step": 14178 }, { "epoch": 2.37, "grad_norm": 0.32375669479370117, "learning_rate": 2.2154132543862818e-06, "loss": 0.036, "step": 14179 }, { "epoch": 2.37, "grad_norm": 0.261461079120636, "learning_rate": 2.2142799379783155e-06, "loss": 0.0363, "step": 14180 }, { "epoch": 2.37, "grad_norm": 0.3096582293510437, "learning_rate": 2.2131468754336215e-06, "loss": 0.0261, "step": 14181 }, { "epoch": 2.37, "grad_norm": 0.3359498977661133, "learning_rate": 2.2120140667891486e-06, "loss": 0.0199, "step": 14182 }, { "epoch": 2.37, "grad_norm": 0.36298975348472595, "learning_rate": 2.2108815120818295e-06, "loss": 0.0307, "step": 14183 }, { "epoch": 2.37, "grad_norm": 0.3193623423576355, "learning_rate": 2.2097492113485952e-06, "loss": 0.0319, "step": 14184 }, { "epoch": 2.37, "grad_norm": 0.45578017830848694, "learning_rate": 2.2086171646263643e-06, "loss": 0.0373, "step": 14185 }, { "epoch": 2.37, "grad_norm": 0.3055717349052429, "learning_rate": 2.207485371952046e-06, "loss": 0.0268, "step": 14186 }, { "epoch": 2.37, "grad_norm": 0.3808991014957428, "learning_rate": 2.20635383336255e-06, "loss": 0.0385, "step": 14187 }, { "epoch": 2.37, "grad_norm": 0.4105299115180969, "learning_rate": 2.2052225488947642e-06, "loss": 0.0478, "step": 14188 }, { "epoch": 2.37, "grad_norm": 0.298062264919281, "learning_rate": 2.2040915185855803e-06, "loss": 0.0407, "step": 14189 }, { "epoch": 2.37, "grad_norm": 0.279161274433136, "learning_rate": 2.202960742471879e-06, "loss": 0.0271, "step": 14190 }, { "epoch": 2.37, "grad_norm": 0.30449235439300537, "learning_rate": 2.201830220590524e-06, "loss": 0.0219, "step": 14191 }, { "epoch": 2.37, "grad_norm": 0.3419557213783264, "learning_rate": 2.2006999529783855e-06, "loss": 0.0195, "step": 14192 }, { "epoch": 2.37, "grad_norm": 0.4093391001224518, "learning_rate": 2.199569939672311e-06, "loss": 0.0406, "step": 14193 }, { "epoch": 2.37, "grad_norm": 0.25673189759254456, "learning_rate": 2.198440180709147e-06, "loss": 0.021, "step": 14194 }, { "epoch": 2.37, "grad_norm": 0.3975409269332886, "learning_rate": 2.1973106761257347e-06, "loss": 0.0347, "step": 14195 }, { "epoch": 2.37, "grad_norm": 0.43860840797424316, "learning_rate": 2.1961814259588986e-06, "loss": 0.0376, "step": 14196 }, { "epoch": 2.37, "grad_norm": 0.3151789605617523, "learning_rate": 2.195052430245459e-06, "loss": 0.038, "step": 14197 }, { "epoch": 2.37, "grad_norm": 0.2536177635192871, "learning_rate": 2.1939236890222293e-06, "loss": 0.0192, "step": 14198 }, { "epoch": 2.37, "grad_norm": 0.4039529860019684, "learning_rate": 2.192795202326018e-06, "loss": 0.0219, "step": 14199 }, { "epoch": 2.37, "grad_norm": 0.34397485852241516, "learning_rate": 2.1916669701936135e-06, "loss": 0.032, "step": 14200 }, { "epoch": 2.38, "grad_norm": 0.36753755807876587, "learning_rate": 2.19053899266181e-06, "loss": 0.0337, "step": 14201 }, { "epoch": 2.38, "grad_norm": 0.37927892804145813, "learning_rate": 2.1894112697673808e-06, "loss": 0.0292, "step": 14202 }, { "epoch": 2.38, "grad_norm": 0.3331730365753174, "learning_rate": 2.188283801547103e-06, "loss": 0.0253, "step": 14203 }, { "epoch": 2.38, "grad_norm": 0.6825222373008728, "learning_rate": 2.1871565880377345e-06, "loss": 0.0475, "step": 14204 }, { "epoch": 2.38, "grad_norm": 0.320577472448349, "learning_rate": 2.1860296292760274e-06, "loss": 0.0183, "step": 14205 }, { "epoch": 2.38, "grad_norm": 0.36686062812805176, "learning_rate": 2.184902925298732e-06, "loss": 0.0211, "step": 14206 }, { "epoch": 2.38, "grad_norm": 0.29855582118034363, "learning_rate": 2.1837764761425874e-06, "loss": 0.0271, "step": 14207 }, { "epoch": 2.38, "grad_norm": 0.5065099596977234, "learning_rate": 2.1826502818443163e-06, "loss": 0.046, "step": 14208 }, { "epoch": 2.38, "grad_norm": 0.3080383241176605, "learning_rate": 2.1815243424406475e-06, "loss": 0.0281, "step": 14209 }, { "epoch": 2.38, "grad_norm": 0.4572349786758423, "learning_rate": 2.180398657968289e-06, "loss": 0.0335, "step": 14210 }, { "epoch": 2.38, "grad_norm": 0.5941762924194336, "learning_rate": 2.179273228463944e-06, "loss": 0.041, "step": 14211 }, { "epoch": 2.38, "grad_norm": 0.3614310324192047, "learning_rate": 2.178148053964313e-06, "loss": 0.0285, "step": 14212 }, { "epoch": 2.38, "grad_norm": 0.3957254886627197, "learning_rate": 2.177023134506079e-06, "loss": 0.0423, "step": 14213 }, { "epoch": 2.38, "grad_norm": 0.5198783278465271, "learning_rate": 2.1758984701259224e-06, "loss": 0.0363, "step": 14214 }, { "epoch": 2.38, "grad_norm": 0.33286088705062866, "learning_rate": 2.1747740608605192e-06, "loss": 0.0408, "step": 14215 }, { "epoch": 2.38, "grad_norm": 0.25825321674346924, "learning_rate": 2.1736499067465255e-06, "loss": 0.0265, "step": 14216 }, { "epoch": 2.38, "grad_norm": 0.4209865927696228, "learning_rate": 2.1725260078206025e-06, "loss": 0.0357, "step": 14217 }, { "epoch": 2.38, "grad_norm": 0.3538222908973694, "learning_rate": 2.1714023641193916e-06, "loss": 0.0319, "step": 14218 }, { "epoch": 2.38, "grad_norm": 0.37181952595710754, "learning_rate": 2.1702789756795295e-06, "loss": 0.0275, "step": 14219 }, { "epoch": 2.38, "grad_norm": 0.3224646747112274, "learning_rate": 2.1691558425376512e-06, "loss": 0.0178, "step": 14220 }, { "epoch": 2.38, "grad_norm": 0.489278107881546, "learning_rate": 2.168032964730371e-06, "loss": 0.0345, "step": 14221 }, { "epoch": 2.38, "grad_norm": 0.37637269496917725, "learning_rate": 2.1669103422943062e-06, "loss": 0.028, "step": 14222 }, { "epoch": 2.38, "grad_norm": 0.2869423031806946, "learning_rate": 2.1657879752660626e-06, "loss": 0.022, "step": 14223 }, { "epoch": 2.38, "grad_norm": 0.37325385212898254, "learning_rate": 2.164665863682234e-06, "loss": 0.0427, "step": 14224 }, { "epoch": 2.38, "grad_norm": 0.32741275429725647, "learning_rate": 2.1635440075794067e-06, "loss": 0.0232, "step": 14225 }, { "epoch": 2.38, "grad_norm": 0.5701401829719543, "learning_rate": 2.1624224069941647e-06, "loss": 0.0502, "step": 14226 }, { "epoch": 2.38, "grad_norm": 0.36849188804626465, "learning_rate": 2.161301061963076e-06, "loss": 0.0251, "step": 14227 }, { "epoch": 2.38, "grad_norm": 0.5983116030693054, "learning_rate": 2.1601799725227013e-06, "loss": 0.0282, "step": 14228 }, { "epoch": 2.38, "grad_norm": 0.47812536358833313, "learning_rate": 2.1590591387095985e-06, "loss": 0.0279, "step": 14229 }, { "epoch": 2.38, "grad_norm": 0.4876771867275238, "learning_rate": 2.157938560560313e-06, "loss": 0.0397, "step": 14230 }, { "epoch": 2.38, "grad_norm": 0.3406703770160675, "learning_rate": 2.1568182381113854e-06, "loss": 0.0242, "step": 14231 }, { "epoch": 2.38, "grad_norm": 0.3735089600086212, "learning_rate": 2.155698171399344e-06, "loss": 0.0387, "step": 14232 }, { "epoch": 2.38, "grad_norm": 0.6195962429046631, "learning_rate": 2.154578360460705e-06, "loss": 0.0521, "step": 14233 }, { "epoch": 2.38, "grad_norm": 0.3192368745803833, "learning_rate": 2.1534588053319884e-06, "loss": 0.0266, "step": 14234 }, { "epoch": 2.38, "grad_norm": 0.3907635509967804, "learning_rate": 2.1523395060496942e-06, "loss": 0.021, "step": 14235 }, { "epoch": 2.38, "grad_norm": 0.2791209816932678, "learning_rate": 2.1512204626503173e-06, "loss": 0.0322, "step": 14236 }, { "epoch": 2.38, "grad_norm": 0.5263617038726807, "learning_rate": 2.1501016751703487e-06, "loss": 0.0326, "step": 14237 }, { "epoch": 2.38, "grad_norm": 0.25401726365089417, "learning_rate": 2.1489831436462692e-06, "loss": 0.0268, "step": 14238 }, { "epoch": 2.38, "grad_norm": 0.3377404808998108, "learning_rate": 2.1478648681145454e-06, "loss": 0.0306, "step": 14239 }, { "epoch": 2.38, "grad_norm": 0.258182555437088, "learning_rate": 2.146746848611645e-06, "loss": 0.0215, "step": 14240 }, { "epoch": 2.38, "grad_norm": 0.5187616348266602, "learning_rate": 2.14562908517402e-06, "loss": 0.0366, "step": 14241 }, { "epoch": 2.38, "grad_norm": 0.3797807991504669, "learning_rate": 2.1445115778381145e-06, "loss": 0.0297, "step": 14242 }, { "epoch": 2.38, "grad_norm": 0.35734814405441284, "learning_rate": 2.14339432664037e-06, "loss": 0.0232, "step": 14243 }, { "epoch": 2.38, "grad_norm": 0.384389191865921, "learning_rate": 2.142277331617212e-06, "loss": 0.0332, "step": 14244 }, { "epoch": 2.38, "grad_norm": 0.21636782586574554, "learning_rate": 2.141160592805066e-06, "loss": 0.0122, "step": 14245 }, { "epoch": 2.38, "grad_norm": 0.5380257368087769, "learning_rate": 2.1400441102403392e-06, "loss": 0.0366, "step": 14246 }, { "epoch": 2.38, "grad_norm": 0.4234059453010559, "learning_rate": 2.1389278839594397e-06, "loss": 0.0241, "step": 14247 }, { "epoch": 2.38, "grad_norm": 0.3633764684200287, "learning_rate": 2.1378119139987654e-06, "loss": 0.0344, "step": 14248 }, { "epoch": 2.38, "grad_norm": 0.3453535735607147, "learning_rate": 2.1366962003947013e-06, "loss": 0.0281, "step": 14249 }, { "epoch": 2.38, "grad_norm": 0.2418237030506134, "learning_rate": 2.135580743183624e-06, "loss": 0.019, "step": 14250 }, { "epoch": 2.38, "grad_norm": 0.4228596091270447, "learning_rate": 2.13446554240191e-06, "loss": 0.0494, "step": 14251 }, { "epoch": 2.38, "grad_norm": 0.29973065853118896, "learning_rate": 2.1333505980859183e-06, "loss": 0.0288, "step": 14252 }, { "epoch": 2.38, "grad_norm": 0.3957468569278717, "learning_rate": 2.1322359102720015e-06, "loss": 0.0301, "step": 14253 }, { "epoch": 2.38, "grad_norm": 0.8770973682403564, "learning_rate": 2.1311214789965084e-06, "loss": 0.0623, "step": 14254 }, { "epoch": 2.38, "grad_norm": 0.25679901242256165, "learning_rate": 2.130007304295777e-06, "loss": 0.0269, "step": 14255 }, { "epoch": 2.38, "grad_norm": 0.41572609543800354, "learning_rate": 2.1288933862061335e-06, "loss": 0.0419, "step": 14256 }, { "epoch": 2.38, "grad_norm": 0.3127303719520569, "learning_rate": 2.127779724763902e-06, "loss": 0.0253, "step": 14257 }, { "epoch": 2.38, "grad_norm": 0.4454253017902374, "learning_rate": 2.1266663200053906e-06, "loss": 0.0292, "step": 14258 }, { "epoch": 2.38, "grad_norm": 0.4194553792476654, "learning_rate": 2.1255531719669085e-06, "loss": 0.041, "step": 14259 }, { "epoch": 2.39, "grad_norm": 0.3215304911136627, "learning_rate": 2.1244402806847486e-06, "loss": 0.0374, "step": 14260 }, { "epoch": 2.39, "grad_norm": 0.3674751818180084, "learning_rate": 2.1233276461951947e-06, "loss": 0.0337, "step": 14261 }, { "epoch": 2.39, "grad_norm": 0.4575739800930023, "learning_rate": 2.1222152685345287e-06, "loss": 0.0327, "step": 14262 }, { "epoch": 2.39, "grad_norm": 0.4999241232872009, "learning_rate": 2.1211031477390252e-06, "loss": 0.0252, "step": 14263 }, { "epoch": 2.39, "grad_norm": 0.3946433961391449, "learning_rate": 2.119991283844939e-06, "loss": 0.0373, "step": 14264 }, { "epoch": 2.39, "grad_norm": 0.31159278750419617, "learning_rate": 2.11887967688853e-06, "loss": 0.028, "step": 14265 }, { "epoch": 2.39, "grad_norm": 0.6657072901725769, "learning_rate": 2.117768326906041e-06, "loss": 0.0427, "step": 14266 }, { "epoch": 2.39, "grad_norm": 0.38264262676239014, "learning_rate": 2.1166572339337066e-06, "loss": 0.0432, "step": 14267 }, { "epoch": 2.39, "grad_norm": 0.4662168323993683, "learning_rate": 2.1155463980077595e-06, "loss": 0.0362, "step": 14268 }, { "epoch": 2.39, "grad_norm": 0.36333346366882324, "learning_rate": 2.1144358191644163e-06, "loss": 0.0266, "step": 14269 }, { "epoch": 2.39, "grad_norm": 0.4258286952972412, "learning_rate": 2.11332549743989e-06, "loss": 0.0439, "step": 14270 }, { "epoch": 2.39, "grad_norm": 0.5449074506759644, "learning_rate": 2.112215432870387e-06, "loss": 0.0435, "step": 14271 }, { "epoch": 2.39, "grad_norm": 0.39083805680274963, "learning_rate": 2.111105625492098e-06, "loss": 0.0445, "step": 14272 }, { "epoch": 2.39, "grad_norm": 0.36016207933425903, "learning_rate": 2.109996075341213e-06, "loss": 0.0325, "step": 14273 }, { "epoch": 2.39, "grad_norm": 0.5258142948150635, "learning_rate": 2.1088867824539105e-06, "loss": 0.0379, "step": 14274 }, { "epoch": 2.39, "grad_norm": 0.34895023703575134, "learning_rate": 2.1077777468663554e-06, "loss": 0.0318, "step": 14275 }, { "epoch": 2.39, "grad_norm": 0.37286341190338135, "learning_rate": 2.1066689686147157e-06, "loss": 0.0386, "step": 14276 }, { "epoch": 2.39, "grad_norm": 0.42781201004981995, "learning_rate": 2.1055604477351378e-06, "loss": 0.0283, "step": 14277 }, { "epoch": 2.39, "grad_norm": 0.40022799372673035, "learning_rate": 2.1044521842637713e-06, "loss": 0.0384, "step": 14278 }, { "epoch": 2.39, "grad_norm": 0.4295688569545746, "learning_rate": 2.1033441782367535e-06, "loss": 0.0411, "step": 14279 }, { "epoch": 2.39, "grad_norm": 0.3911448121070862, "learning_rate": 2.1022364296902097e-06, "loss": 0.0298, "step": 14280 }, { "epoch": 2.39, "grad_norm": 0.3708069622516632, "learning_rate": 2.1011289386602586e-06, "loss": 0.0383, "step": 14281 }, { "epoch": 2.39, "grad_norm": 0.3585789203643799, "learning_rate": 2.1000217051830143e-06, "loss": 0.0282, "step": 14282 }, { "epoch": 2.39, "grad_norm": 0.4480268359184265, "learning_rate": 2.0989147292945776e-06, "loss": 0.028, "step": 14283 }, { "epoch": 2.39, "grad_norm": 0.3090890049934387, "learning_rate": 2.097808011031041e-06, "loss": 0.0281, "step": 14284 }, { "epoch": 2.39, "grad_norm": 0.3848972022533417, "learning_rate": 2.0967015504284925e-06, "loss": 0.0365, "step": 14285 }, { "epoch": 2.39, "grad_norm": 0.3121483027935028, "learning_rate": 2.0955953475230094e-06, "loss": 0.0218, "step": 14286 }, { "epoch": 2.39, "grad_norm": 0.3546673357486725, "learning_rate": 2.0944894023506648e-06, "loss": 0.0361, "step": 14287 }, { "epoch": 2.39, "grad_norm": 0.4051623046398163, "learning_rate": 2.0933837149475158e-06, "loss": 0.0232, "step": 14288 }, { "epoch": 2.39, "grad_norm": 0.5085070729255676, "learning_rate": 2.0922782853496113e-06, "loss": 0.0336, "step": 14289 }, { "epoch": 2.39, "grad_norm": 0.4754598140716553, "learning_rate": 2.0911731135930024e-06, "loss": 0.0341, "step": 14290 }, { "epoch": 2.39, "grad_norm": 0.3759213387966156, "learning_rate": 2.0900681997137206e-06, "loss": 0.0308, "step": 14291 }, { "epoch": 2.39, "grad_norm": 0.3707786500453949, "learning_rate": 2.0889635437477906e-06, "loss": 0.0241, "step": 14292 }, { "epoch": 2.39, "grad_norm": 0.49871906638145447, "learning_rate": 2.0878591457312337e-06, "loss": 0.0507, "step": 14293 }, { "epoch": 2.39, "grad_norm": 0.496476411819458, "learning_rate": 2.086755005700063e-06, "loss": 0.0315, "step": 14294 }, { "epoch": 2.39, "grad_norm": 0.4153392016887665, "learning_rate": 2.0856511236902756e-06, "loss": 0.0402, "step": 14295 }, { "epoch": 2.39, "grad_norm": 0.4247865378856659, "learning_rate": 2.0845474997378688e-06, "loss": 0.0434, "step": 14296 }, { "epoch": 2.39, "grad_norm": 0.5036489963531494, "learning_rate": 2.083444133878827e-06, "loss": 0.0368, "step": 14297 }, { "epoch": 2.39, "grad_norm": 0.47626906633377075, "learning_rate": 2.082341026149123e-06, "loss": 0.0603, "step": 14298 }, { "epoch": 2.39, "grad_norm": 0.5627762079238892, "learning_rate": 2.081238176584731e-06, "loss": 0.0406, "step": 14299 }, { "epoch": 2.39, "grad_norm": 0.44372987747192383, "learning_rate": 2.080135585221604e-06, "loss": 0.0251, "step": 14300 }, { "epoch": 2.39, "grad_norm": 0.3427900969982147, "learning_rate": 2.0790332520957e-06, "loss": 0.0361, "step": 14301 }, { "epoch": 2.39, "grad_norm": 0.34326061606407166, "learning_rate": 2.0779311772429565e-06, "loss": 0.0234, "step": 14302 }, { "epoch": 2.39, "grad_norm": 0.4675074517726898, "learning_rate": 2.0768293606993116e-06, "loss": 0.026, "step": 14303 }, { "epoch": 2.39, "grad_norm": 0.3820614218711853, "learning_rate": 2.0757278025006923e-06, "loss": 0.0397, "step": 14304 }, { "epoch": 2.39, "grad_norm": 0.705093502998352, "learning_rate": 2.0746265026830137e-06, "loss": 0.0551, "step": 14305 }, { "epoch": 2.39, "grad_norm": 0.5416328310966492, "learning_rate": 2.0735254612821844e-06, "loss": 0.0283, "step": 14306 }, { "epoch": 2.39, "grad_norm": 0.39070311188697815, "learning_rate": 2.0724246783341085e-06, "loss": 0.0239, "step": 14307 }, { "epoch": 2.39, "grad_norm": 0.3701799809932709, "learning_rate": 2.0713241538746765e-06, "loss": 0.0339, "step": 14308 }, { "epoch": 2.39, "grad_norm": 0.2999400496482849, "learning_rate": 2.0702238879397695e-06, "loss": 0.0278, "step": 14309 }, { "epoch": 2.39, "grad_norm": 0.42463216185569763, "learning_rate": 2.069123880565267e-06, "loss": 0.0347, "step": 14310 }, { "epoch": 2.39, "grad_norm": 3.0418202877044678, "learning_rate": 2.0680241317870375e-06, "loss": 0.0446, "step": 14311 }, { "epoch": 2.39, "grad_norm": 0.3280555009841919, "learning_rate": 2.0669246416409337e-06, "loss": 0.0208, "step": 14312 }, { "epoch": 2.39, "grad_norm": 0.3528289496898651, "learning_rate": 2.065825410162814e-06, "loss": 0.026, "step": 14313 }, { "epoch": 2.39, "grad_norm": 0.42733559012413025, "learning_rate": 2.0647264373885124e-06, "loss": 0.0359, "step": 14314 }, { "epoch": 2.39, "grad_norm": 0.5014718174934387, "learning_rate": 2.063627723353867e-06, "loss": 0.0162, "step": 14315 }, { "epoch": 2.39, "grad_norm": 0.8850020170211792, "learning_rate": 2.062529268094703e-06, "loss": 0.0355, "step": 14316 }, { "epoch": 2.39, "grad_norm": 0.397136390209198, "learning_rate": 2.061431071646832e-06, "loss": 0.0327, "step": 14317 }, { "epoch": 2.39, "grad_norm": 0.41155076026916504, "learning_rate": 2.0603331340460664e-06, "loss": 0.0285, "step": 14318 }, { "epoch": 2.39, "grad_norm": 0.5279954075813293, "learning_rate": 2.0592354553282067e-06, "loss": 0.0298, "step": 14319 }, { "epoch": 2.4, "grad_norm": 0.409296452999115, "learning_rate": 2.0581380355290393e-06, "loss": 0.0378, "step": 14320 }, { "epoch": 2.4, "grad_norm": 2.444572687149048, "learning_rate": 2.057040874684354e-06, "loss": 0.0441, "step": 14321 }, { "epoch": 2.4, "grad_norm": 0.45419901609420776, "learning_rate": 2.0559439728299203e-06, "loss": 0.0351, "step": 14322 }, { "epoch": 2.4, "grad_norm": 0.32588279247283936, "learning_rate": 2.0548473300015015e-06, "loss": 0.0258, "step": 14323 }, { "epoch": 2.4, "grad_norm": 0.47241201996803284, "learning_rate": 2.0537509462348613e-06, "loss": 0.0377, "step": 14324 }, { "epoch": 2.4, "grad_norm": 0.29683467745780945, "learning_rate": 2.0526548215657436e-06, "loss": 0.0168, "step": 14325 }, { "epoch": 2.4, "grad_norm": 0.34292587637901306, "learning_rate": 2.051558956029891e-06, "loss": 0.0254, "step": 14326 }, { "epoch": 2.4, "grad_norm": 0.3702471852302551, "learning_rate": 2.0504633496630388e-06, "loss": 0.0427, "step": 14327 }, { "epoch": 2.4, "grad_norm": 0.4152754247188568, "learning_rate": 2.0493680025009044e-06, "loss": 0.0311, "step": 14328 }, { "epoch": 2.4, "grad_norm": 0.39587071537971497, "learning_rate": 2.048272914579209e-06, "loss": 0.0285, "step": 14329 }, { "epoch": 2.4, "grad_norm": 0.3495006859302521, "learning_rate": 2.0471780859336566e-06, "loss": 0.0295, "step": 14330 }, { "epoch": 2.4, "grad_norm": 0.4136441946029663, "learning_rate": 2.0460835165999417e-06, "loss": 0.0288, "step": 14331 }, { "epoch": 2.4, "grad_norm": 0.24668926000595093, "learning_rate": 2.0449892066137613e-06, "loss": 0.0262, "step": 14332 }, { "epoch": 2.4, "grad_norm": 0.4420529305934906, "learning_rate": 2.04389515601079e-06, "loss": 0.037, "step": 14333 }, { "epoch": 2.4, "grad_norm": 0.4345841109752655, "learning_rate": 2.042801364826704e-06, "loss": 0.0336, "step": 14334 }, { "epoch": 2.4, "grad_norm": 0.33845120668411255, "learning_rate": 2.0417078330971706e-06, "loss": 0.0265, "step": 14335 }, { "epoch": 2.4, "grad_norm": 0.35993075370788574, "learning_rate": 2.040614560857843e-06, "loss": 0.0318, "step": 14336 }, { "epoch": 2.4, "grad_norm": 0.35639703273773193, "learning_rate": 2.0395215481443653e-06, "loss": 0.0244, "step": 14337 }, { "epoch": 2.4, "grad_norm": 0.4045165479183197, "learning_rate": 2.0384287949923833e-06, "loss": 0.0316, "step": 14338 }, { "epoch": 2.4, "grad_norm": 0.39689362049102783, "learning_rate": 2.0373363014375237e-06, "loss": 0.024, "step": 14339 }, { "epoch": 2.4, "grad_norm": 0.35443010926246643, "learning_rate": 2.036244067515406e-06, "loss": 0.0211, "step": 14340 }, { "epoch": 2.4, "grad_norm": 0.3926602303981781, "learning_rate": 2.0351520932616474e-06, "loss": 0.022, "step": 14341 }, { "epoch": 2.4, "grad_norm": 0.33439669013023376, "learning_rate": 2.034060378711853e-06, "loss": 0.0328, "step": 14342 }, { "epoch": 2.4, "grad_norm": 0.42634549736976624, "learning_rate": 2.0329689239016216e-06, "loss": 0.0401, "step": 14343 }, { "epoch": 2.4, "grad_norm": 0.3781021237373352, "learning_rate": 2.0318777288665393e-06, "loss": 0.0378, "step": 14344 }, { "epoch": 2.4, "grad_norm": 0.3049706816673279, "learning_rate": 2.0307867936421822e-06, "loss": 0.0322, "step": 14345 }, { "epoch": 2.4, "grad_norm": 0.39143645763397217, "learning_rate": 2.0296961182641285e-06, "loss": 0.04, "step": 14346 }, { "epoch": 2.4, "grad_norm": 0.3297724723815918, "learning_rate": 2.0286057027679375e-06, "loss": 0.0234, "step": 14347 }, { "epoch": 2.4, "grad_norm": 0.655479907989502, "learning_rate": 2.0275155471891605e-06, "loss": 0.0517, "step": 14348 }, { "epoch": 2.4, "grad_norm": 0.5985630750656128, "learning_rate": 2.0264256515633495e-06, "loss": 0.0307, "step": 14349 }, { "epoch": 2.4, "grad_norm": 0.7355442643165588, "learning_rate": 2.025336015926037e-06, "loss": 0.0292, "step": 14350 }, { "epoch": 2.4, "grad_norm": 0.4878320097923279, "learning_rate": 2.0242466403127536e-06, "loss": 0.035, "step": 14351 }, { "epoch": 2.4, "grad_norm": 0.4430195987224579, "learning_rate": 2.023157524759023e-06, "loss": 0.0394, "step": 14352 }, { "epoch": 2.4, "grad_norm": 0.43799716234207153, "learning_rate": 2.0220686693003543e-06, "loss": 0.0315, "step": 14353 }, { "epoch": 2.4, "grad_norm": 0.3531259000301361, "learning_rate": 2.0209800739722484e-06, "loss": 0.0317, "step": 14354 }, { "epoch": 2.4, "grad_norm": 0.3761296570301056, "learning_rate": 2.019891738810206e-06, "loss": 0.0215, "step": 14355 }, { "epoch": 2.4, "grad_norm": 0.2959437370300293, "learning_rate": 2.0188036638497078e-06, "loss": 0.0281, "step": 14356 }, { "epoch": 2.4, "grad_norm": 0.5930234789848328, "learning_rate": 2.0177158491262363e-06, "loss": 0.0454, "step": 14357 }, { "epoch": 2.4, "grad_norm": 0.5626826286315918, "learning_rate": 2.016628294675258e-06, "loss": 0.0436, "step": 14358 }, { "epoch": 2.4, "grad_norm": 0.2952974736690521, "learning_rate": 2.015541000532235e-06, "loss": 0.0276, "step": 14359 }, { "epoch": 2.4, "grad_norm": 0.3046240508556366, "learning_rate": 2.0144539667326234e-06, "loss": 0.0308, "step": 14360 }, { "epoch": 2.4, "grad_norm": 0.36198705434799194, "learning_rate": 2.013367193311865e-06, "loss": 0.0229, "step": 14361 }, { "epoch": 2.4, "grad_norm": 0.3431054651737213, "learning_rate": 2.0122806803053905e-06, "loss": 0.0188, "step": 14362 }, { "epoch": 2.4, "grad_norm": 0.46618619561195374, "learning_rate": 2.011194427748635e-06, "loss": 0.0296, "step": 14363 }, { "epoch": 2.4, "grad_norm": 0.4346228241920471, "learning_rate": 2.010108435677013e-06, "loss": 0.0448, "step": 14364 }, { "epoch": 2.4, "grad_norm": 0.4126468002796173, "learning_rate": 2.0090227041259326e-06, "loss": 0.0434, "step": 14365 }, { "epoch": 2.4, "grad_norm": 0.5757728219032288, "learning_rate": 2.0079372331307977e-06, "loss": 0.0427, "step": 14366 }, { "epoch": 2.4, "grad_norm": 0.33785781264305115, "learning_rate": 2.0068520227270016e-06, "loss": 0.0275, "step": 14367 }, { "epoch": 2.4, "grad_norm": 0.3608504831790924, "learning_rate": 2.0057670729499325e-06, "loss": 0.0354, "step": 14368 }, { "epoch": 2.4, "grad_norm": 0.3226589262485504, "learning_rate": 2.004682383834963e-06, "loss": 0.036, "step": 14369 }, { "epoch": 2.4, "grad_norm": 0.5953952670097351, "learning_rate": 2.0035979554174577e-06, "loss": 0.0315, "step": 14370 }, { "epoch": 2.4, "grad_norm": 0.5060319304466248, "learning_rate": 2.0025137877327818e-06, "loss": 0.0565, "step": 14371 }, { "epoch": 2.4, "grad_norm": 0.38630253076553345, "learning_rate": 2.0014298808162822e-06, "loss": 0.0262, "step": 14372 }, { "epoch": 2.4, "grad_norm": 0.45284128189086914, "learning_rate": 2.0003462347033e-06, "loss": 0.055, "step": 14373 }, { "epoch": 2.4, "grad_norm": 0.3638682961463928, "learning_rate": 1.999262849429171e-06, "loss": 0.0291, "step": 14374 }, { "epoch": 2.4, "grad_norm": 0.45584094524383545, "learning_rate": 1.9981797250292224e-06, "loss": 0.0361, "step": 14375 }, { "epoch": 2.4, "grad_norm": 0.27575254440307617, "learning_rate": 1.9970968615387666e-06, "loss": 0.024, "step": 14376 }, { "epoch": 2.4, "grad_norm": 0.35276877880096436, "learning_rate": 1.9960142589931164e-06, "loss": 0.0431, "step": 14377 }, { "epoch": 2.4, "grad_norm": 0.31501588225364685, "learning_rate": 1.9949319174275685e-06, "loss": 0.024, "step": 14378 }, { "epoch": 2.4, "grad_norm": 0.37452229857444763, "learning_rate": 1.9938498368774107e-06, "loss": 0.0353, "step": 14379 }, { "epoch": 2.41, "grad_norm": 0.3059874176979065, "learning_rate": 1.992768017377933e-06, "loss": 0.0317, "step": 14380 }, { "epoch": 2.41, "grad_norm": 0.4123339056968689, "learning_rate": 1.9916864589644026e-06, "loss": 0.0323, "step": 14381 }, { "epoch": 2.41, "grad_norm": 0.28940972685813904, "learning_rate": 1.9906051616720876e-06, "loss": 0.0238, "step": 14382 }, { "epoch": 2.41, "grad_norm": 0.378749281167984, "learning_rate": 1.9895241255362497e-06, "loss": 0.0336, "step": 14383 }, { "epoch": 2.41, "grad_norm": 0.34843677282333374, "learning_rate": 1.9884433505921296e-06, "loss": 0.028, "step": 14384 }, { "epoch": 2.41, "grad_norm": 0.22243383526802063, "learning_rate": 1.987362836874974e-06, "loss": 0.0205, "step": 14385 }, { "epoch": 2.41, "grad_norm": 0.39975103735923767, "learning_rate": 1.9862825844200117e-06, "loss": 0.0328, "step": 14386 }, { "epoch": 2.41, "grad_norm": 0.40252313017845154, "learning_rate": 1.985202593262463e-06, "loss": 0.0349, "step": 14387 }, { "epoch": 2.41, "grad_norm": 0.30318233370780945, "learning_rate": 1.9841228634375474e-06, "loss": 0.0301, "step": 14388 }, { "epoch": 2.41, "grad_norm": 0.3401474952697754, "learning_rate": 1.983043394980466e-06, "loss": 0.0205, "step": 14389 }, { "epoch": 2.41, "grad_norm": 0.3680316209793091, "learning_rate": 1.9819641879264186e-06, "loss": 0.0355, "step": 14390 }, { "epoch": 2.41, "grad_norm": 0.3718988299369812, "learning_rate": 1.9808852423105964e-06, "loss": 0.0422, "step": 14391 }, { "epoch": 2.41, "grad_norm": 0.45612311363220215, "learning_rate": 1.979806558168178e-06, "loss": 0.0305, "step": 14392 }, { "epoch": 2.41, "grad_norm": 0.8543562889099121, "learning_rate": 1.978728135534331e-06, "loss": 0.0355, "step": 14393 }, { "epoch": 2.41, "grad_norm": 0.5153994560241699, "learning_rate": 1.977649974444227e-06, "loss": 0.0355, "step": 14394 }, { "epoch": 2.41, "grad_norm": 0.40991389751434326, "learning_rate": 1.976572074933012e-06, "loss": 0.0563, "step": 14395 }, { "epoch": 2.41, "grad_norm": 0.2810526490211487, "learning_rate": 1.975494437035841e-06, "loss": 0.0264, "step": 14396 }, { "epoch": 2.41, "grad_norm": 0.2959635555744171, "learning_rate": 1.9744170607878433e-06, "loss": 0.0259, "step": 14397 }, { "epoch": 2.41, "grad_norm": 0.45180097222328186, "learning_rate": 1.973339946224153e-06, "loss": 0.0302, "step": 14398 }, { "epoch": 2.41, "grad_norm": 0.5404174327850342, "learning_rate": 1.9722630933798913e-06, "loss": 0.0379, "step": 14399 }, { "epoch": 2.41, "grad_norm": 0.4273156225681305, "learning_rate": 1.971186502290171e-06, "loss": 0.0273, "step": 14400 }, { "epoch": 2.41, "grad_norm": 0.41250646114349365, "learning_rate": 1.9701101729900897e-06, "loss": 0.0341, "step": 14401 }, { "epoch": 2.41, "grad_norm": 0.8078438639640808, "learning_rate": 1.96903410551475e-06, "loss": 0.0443, "step": 14402 }, { "epoch": 2.41, "grad_norm": 0.33146536350250244, "learning_rate": 1.967958299899234e-06, "loss": 0.0221, "step": 14403 }, { "epoch": 2.41, "grad_norm": 0.2952134609222412, "learning_rate": 1.9668827561786184e-06, "loss": 0.0213, "step": 14404 }, { "epoch": 2.41, "grad_norm": 0.26975518465042114, "learning_rate": 1.9658074743879785e-06, "loss": 0.0196, "step": 14405 }, { "epoch": 2.41, "grad_norm": 0.447528213262558, "learning_rate": 1.9647324545623683e-06, "loss": 0.0354, "step": 14406 }, { "epoch": 2.41, "grad_norm": 0.35955849289894104, "learning_rate": 1.9636576967368435e-06, "loss": 0.034, "step": 14407 }, { "epoch": 2.41, "grad_norm": 0.3510062098503113, "learning_rate": 1.9625832009464506e-06, "loss": 0.0216, "step": 14408 }, { "epoch": 2.41, "grad_norm": 0.30019524693489075, "learning_rate": 1.96150896722622e-06, "loss": 0.0187, "step": 14409 }, { "epoch": 2.41, "grad_norm": 0.3702513873577118, "learning_rate": 1.9604349956111833e-06, "loss": 0.0463, "step": 14410 }, { "epoch": 2.41, "grad_norm": 0.3698189854621887, "learning_rate": 1.959361286136355e-06, "loss": 0.0231, "step": 14411 }, { "epoch": 2.41, "grad_norm": 0.4315303862094879, "learning_rate": 1.9582878388367454e-06, "loss": 0.0298, "step": 14412 }, { "epoch": 2.41, "grad_norm": 0.47486281394958496, "learning_rate": 1.957214653747358e-06, "loss": 0.0381, "step": 14413 }, { "epoch": 2.41, "grad_norm": 0.4677719175815582, "learning_rate": 1.9561417309031804e-06, "loss": 0.0376, "step": 14414 }, { "epoch": 2.41, "grad_norm": 0.32026639580726624, "learning_rate": 1.9550690703391994e-06, "loss": 0.0343, "step": 14415 }, { "epoch": 2.41, "grad_norm": 0.458747535943985, "learning_rate": 1.9539966720903937e-06, "loss": 0.0386, "step": 14416 }, { "epoch": 2.41, "grad_norm": 0.24025969207286835, "learning_rate": 1.9529245361917272e-06, "loss": 0.025, "step": 14417 }, { "epoch": 2.41, "grad_norm": 0.39923205971717834, "learning_rate": 1.9518526626781565e-06, "loss": 0.0403, "step": 14418 }, { "epoch": 2.41, "grad_norm": 0.4450385272502899, "learning_rate": 1.950781051584635e-06, "loss": 0.0399, "step": 14419 }, { "epoch": 2.41, "grad_norm": 0.42106208205223083, "learning_rate": 1.949709702946102e-06, "loss": 0.0203, "step": 14420 }, { "epoch": 2.41, "grad_norm": 0.3021845817565918, "learning_rate": 1.948638616797488e-06, "loss": 0.0235, "step": 14421 }, { "epoch": 2.41, "grad_norm": 0.40233173966407776, "learning_rate": 1.947567793173719e-06, "loss": 0.0374, "step": 14422 }, { "epoch": 2.41, "grad_norm": 0.4381202757358551, "learning_rate": 1.946497232109712e-06, "loss": 0.0432, "step": 14423 }, { "epoch": 2.41, "grad_norm": 0.29932403564453125, "learning_rate": 1.9454269336403753e-06, "loss": 0.0284, "step": 14424 }, { "epoch": 2.41, "grad_norm": 0.3916126787662506, "learning_rate": 1.9443568978006043e-06, "loss": 0.0421, "step": 14425 }, { "epoch": 2.41, "grad_norm": 0.5606231093406677, "learning_rate": 1.9432871246252873e-06, "loss": 0.0278, "step": 14426 }, { "epoch": 2.41, "grad_norm": 0.39363229274749756, "learning_rate": 1.942217614149311e-06, "loss": 0.0396, "step": 14427 }, { "epoch": 2.41, "grad_norm": 0.38524818420410156, "learning_rate": 1.9411483664075447e-06, "loss": 0.0247, "step": 14428 }, { "epoch": 2.41, "grad_norm": 0.37432917952537537, "learning_rate": 1.9400793814348505e-06, "loss": 0.0317, "step": 14429 }, { "epoch": 2.41, "grad_norm": 0.4358862638473511, "learning_rate": 1.9390106592660863e-06, "loss": 0.0392, "step": 14430 }, { "epoch": 2.41, "grad_norm": 0.4195955991744995, "learning_rate": 1.9379421999361027e-06, "loss": 0.025, "step": 14431 }, { "epoch": 2.41, "grad_norm": 0.27992716431617737, "learning_rate": 1.9368740034797305e-06, "loss": 0.0308, "step": 14432 }, { "epoch": 2.41, "grad_norm": 0.3210475444793701, "learning_rate": 1.9358060699318083e-06, "loss": 0.0392, "step": 14433 }, { "epoch": 2.41, "grad_norm": 0.34242957830429077, "learning_rate": 1.9347383993271516e-06, "loss": 0.0335, "step": 14434 }, { "epoch": 2.41, "grad_norm": 0.5776249766349792, "learning_rate": 1.9336709917005726e-06, "loss": 0.0305, "step": 14435 }, { "epoch": 2.41, "grad_norm": 0.40437689423561096, "learning_rate": 1.93260384708688e-06, "loss": 0.0284, "step": 14436 }, { "epoch": 2.41, "grad_norm": 0.5444029569625854, "learning_rate": 1.9315369655208636e-06, "loss": 0.0455, "step": 14437 }, { "epoch": 2.41, "grad_norm": 0.3912840783596039, "learning_rate": 1.9304703470373153e-06, "loss": 0.0452, "step": 14438 }, { "epoch": 2.41, "grad_norm": 0.40549492835998535, "learning_rate": 1.9294039916710126e-06, "loss": 0.0331, "step": 14439 }, { "epoch": 2.42, "grad_norm": 0.3183946907520294, "learning_rate": 1.9283378994567237e-06, "loss": 0.0254, "step": 14440 }, { "epoch": 2.42, "grad_norm": 0.3859369158744812, "learning_rate": 1.9272720704292126e-06, "loss": 0.0344, "step": 14441 }, { "epoch": 2.42, "grad_norm": 0.30785760283470154, "learning_rate": 1.9262065046232305e-06, "loss": 0.0364, "step": 14442 }, { "epoch": 2.42, "grad_norm": 0.4506267011165619, "learning_rate": 1.9251412020735184e-06, "loss": 0.0291, "step": 14443 }, { "epoch": 2.42, "grad_norm": 0.5698063373565674, "learning_rate": 1.9240761628148174e-06, "loss": 0.0274, "step": 14444 }, { "epoch": 2.42, "grad_norm": 0.4647476077079773, "learning_rate": 1.923011386881849e-06, "loss": 0.0491, "step": 14445 }, { "epoch": 2.42, "grad_norm": 0.5520392060279846, "learning_rate": 1.921946874309335e-06, "loss": 0.0324, "step": 14446 }, { "epoch": 2.42, "grad_norm": 0.300735741853714, "learning_rate": 1.920882625131987e-06, "loss": 0.0235, "step": 14447 }, { "epoch": 2.42, "grad_norm": 0.3921831250190735, "learning_rate": 1.919818639384503e-06, "loss": 0.0363, "step": 14448 }, { "epoch": 2.42, "grad_norm": 0.3477928340435028, "learning_rate": 1.9187549171015753e-06, "loss": 0.0306, "step": 14449 }, { "epoch": 2.42, "grad_norm": 0.3669503629207611, "learning_rate": 1.9176914583178898e-06, "loss": 0.0362, "step": 14450 }, { "epoch": 2.42, "grad_norm": 0.5393096804618835, "learning_rate": 1.91662826306812e-06, "loss": 0.04, "step": 14451 }, { "epoch": 2.42, "grad_norm": 0.6060794591903687, "learning_rate": 1.915565331386935e-06, "loss": 0.0353, "step": 14452 }, { "epoch": 2.42, "grad_norm": 0.5053701400756836, "learning_rate": 1.91450266330899e-06, "loss": 0.0357, "step": 14453 }, { "epoch": 2.42, "grad_norm": 0.3352223336696625, "learning_rate": 1.9134402588689392e-06, "loss": 0.028, "step": 14454 }, { "epoch": 2.42, "grad_norm": 0.3017180562019348, "learning_rate": 1.912378118101418e-06, "loss": 0.0304, "step": 14455 }, { "epoch": 2.42, "grad_norm": 0.2826039791107178, "learning_rate": 1.9113162410410648e-06, "loss": 0.0175, "step": 14456 }, { "epoch": 2.42, "grad_norm": 0.4445384442806244, "learning_rate": 1.9102546277224967e-06, "loss": 0.042, "step": 14457 }, { "epoch": 2.42, "grad_norm": 0.5299040079116821, "learning_rate": 1.9091932781803357e-06, "loss": 0.0381, "step": 14458 }, { "epoch": 2.42, "grad_norm": 0.28786420822143555, "learning_rate": 1.9081321924491857e-06, "loss": 0.0241, "step": 14459 }, { "epoch": 2.42, "grad_norm": 0.4147081971168518, "learning_rate": 1.9070713705636422e-06, "loss": 0.0339, "step": 14460 }, { "epoch": 2.42, "grad_norm": 0.4814486801624298, "learning_rate": 1.9060108125582998e-06, "loss": 0.0375, "step": 14461 }, { "epoch": 2.42, "grad_norm": 0.3429684340953827, "learning_rate": 1.9049505184677331e-06, "loss": 0.0274, "step": 14462 }, { "epoch": 2.42, "grad_norm": 0.4419292211532593, "learning_rate": 1.9038904883265175e-06, "loss": 0.0333, "step": 14463 }, { "epoch": 2.42, "grad_norm": 0.2858295738697052, "learning_rate": 1.90283072216922e-06, "loss": 0.0196, "step": 14464 }, { "epoch": 2.42, "grad_norm": 0.36613643169403076, "learning_rate": 1.9017712200303896e-06, "loss": 0.0344, "step": 14465 }, { "epoch": 2.42, "grad_norm": 0.31773293018341064, "learning_rate": 1.9007119819445785e-06, "loss": 0.0296, "step": 14466 }, { "epoch": 2.42, "grad_norm": 0.3504420816898346, "learning_rate": 1.8996530079463206e-06, "loss": 0.0283, "step": 14467 }, { "epoch": 2.42, "grad_norm": 0.41184401512145996, "learning_rate": 1.8985942980701445e-06, "loss": 0.0447, "step": 14468 }, { "epoch": 2.42, "grad_norm": 0.3769877254962921, "learning_rate": 1.8975358523505739e-06, "loss": 0.0375, "step": 14469 }, { "epoch": 2.42, "grad_norm": 0.3521517217159271, "learning_rate": 1.8964776708221167e-06, "loss": 0.0299, "step": 14470 }, { "epoch": 2.42, "grad_norm": 0.41618308424949646, "learning_rate": 1.8954197535192798e-06, "loss": 0.0315, "step": 14471 }, { "epoch": 2.42, "grad_norm": 0.3771190047264099, "learning_rate": 1.8943621004765579e-06, "loss": 0.0291, "step": 14472 }, { "epoch": 2.42, "grad_norm": 0.3586772084236145, "learning_rate": 1.8933047117284375e-06, "loss": 0.0342, "step": 14473 }, { "epoch": 2.42, "grad_norm": 0.44292908906936646, "learning_rate": 1.8922475873093915e-06, "loss": 0.0396, "step": 14474 }, { "epoch": 2.42, "grad_norm": 0.3466840088367462, "learning_rate": 1.8911907272538942e-06, "loss": 0.0277, "step": 14475 }, { "epoch": 2.42, "grad_norm": 0.5114974975585938, "learning_rate": 1.8901341315964039e-06, "loss": 0.0356, "step": 14476 }, { "epoch": 2.42, "grad_norm": 0.36999914050102234, "learning_rate": 1.8890778003713695e-06, "loss": 0.0357, "step": 14477 }, { "epoch": 2.42, "grad_norm": 0.4653027653694153, "learning_rate": 1.8880217336132357e-06, "loss": 0.0257, "step": 14478 }, { "epoch": 2.42, "grad_norm": 0.48856592178344727, "learning_rate": 1.8869659313564381e-06, "loss": 0.0359, "step": 14479 }, { "epoch": 2.42, "grad_norm": 0.2809831500053406, "learning_rate": 1.885910393635405e-06, "loss": 0.0194, "step": 14480 }, { "epoch": 2.42, "grad_norm": 0.4984126687049866, "learning_rate": 1.88485512048455e-06, "loss": 0.0385, "step": 14481 }, { "epoch": 2.42, "grad_norm": 0.349495530128479, "learning_rate": 1.883800111938281e-06, "loss": 0.0314, "step": 14482 }, { "epoch": 2.42, "grad_norm": 0.3051571547985077, "learning_rate": 1.8827453680310004e-06, "loss": 0.0273, "step": 14483 }, { "epoch": 2.42, "grad_norm": 0.44694986939430237, "learning_rate": 1.8816908887970986e-06, "loss": 0.0417, "step": 14484 }, { "epoch": 2.42, "grad_norm": 0.3920663297176361, "learning_rate": 1.8806366742709547e-06, "loss": 0.0309, "step": 14485 }, { "epoch": 2.42, "grad_norm": 0.44501444697380066, "learning_rate": 1.8795827244869459e-06, "loss": 0.0391, "step": 14486 }, { "epoch": 2.42, "grad_norm": 0.3491392135620117, "learning_rate": 1.8785290394794409e-06, "loss": 0.0294, "step": 14487 }, { "epoch": 2.42, "grad_norm": 0.4340991973876953, "learning_rate": 1.8774756192827891e-06, "loss": 0.0351, "step": 14488 }, { "epoch": 2.42, "grad_norm": 0.4792834520339966, "learning_rate": 1.8764224639313456e-06, "loss": 0.0361, "step": 14489 }, { "epoch": 2.42, "grad_norm": 0.28309351205825806, "learning_rate": 1.875369573459447e-06, "loss": 0.0252, "step": 14490 }, { "epoch": 2.42, "grad_norm": 0.6495345234870911, "learning_rate": 1.874316947901421e-06, "loss": 0.0359, "step": 14491 }, { "epoch": 2.42, "grad_norm": 0.4701586961746216, "learning_rate": 1.8732645872915956e-06, "loss": 0.041, "step": 14492 }, { "epoch": 2.42, "grad_norm": 0.34901487827301025, "learning_rate": 1.8722124916642792e-06, "loss": 0.021, "step": 14493 }, { "epoch": 2.42, "grad_norm": 0.33466920256614685, "learning_rate": 1.871160661053778e-06, "loss": 0.0284, "step": 14494 }, { "epoch": 2.42, "grad_norm": 0.4072970449924469, "learning_rate": 1.8701090954943923e-06, "loss": 0.0351, "step": 14495 }, { "epoch": 2.42, "grad_norm": 0.4633820950984955, "learning_rate": 1.8690577950204047e-06, "loss": 0.0426, "step": 14496 }, { "epoch": 2.42, "grad_norm": 0.4888123869895935, "learning_rate": 1.8680067596660978e-06, "loss": 0.0361, "step": 14497 }, { "epoch": 2.42, "grad_norm": 0.544465184211731, "learning_rate": 1.8669559894657408e-06, "loss": 0.0357, "step": 14498 }, { "epoch": 2.42, "grad_norm": 0.5130533576011658, "learning_rate": 1.8659054844535918e-06, "loss": 0.0323, "step": 14499 }, { "epoch": 2.43, "grad_norm": 0.3062870502471924, "learning_rate": 1.8648552446639101e-06, "loss": 0.0215, "step": 14500 }, { "epoch": 2.43, "grad_norm": 0.4125468134880066, "learning_rate": 1.8638052701309338e-06, "loss": 0.0374, "step": 14501 }, { "epoch": 2.43, "grad_norm": 0.3254442512989044, "learning_rate": 1.8627555608889015e-06, "loss": 0.0328, "step": 14502 }, { "epoch": 2.43, "grad_norm": 0.3552209734916687, "learning_rate": 1.861706116972044e-06, "loss": 0.0235, "step": 14503 }, { "epoch": 2.43, "grad_norm": 0.38406142592430115, "learning_rate": 1.8606569384145757e-06, "loss": 0.0294, "step": 14504 }, { "epoch": 2.43, "grad_norm": 0.5468005537986755, "learning_rate": 1.8596080252507043e-06, "loss": 0.0349, "step": 14505 }, { "epoch": 2.43, "grad_norm": 0.503369152545929, "learning_rate": 1.858559377514636e-06, "loss": 0.049, "step": 14506 }, { "epoch": 2.43, "grad_norm": 0.35966572165489197, "learning_rate": 1.857510995240559e-06, "loss": 0.0294, "step": 14507 }, { "epoch": 2.43, "grad_norm": 0.32859957218170166, "learning_rate": 1.8564628784626614e-06, "loss": 0.0227, "step": 14508 }, { "epoch": 2.43, "grad_norm": 0.42731624841690063, "learning_rate": 1.8554150272151161e-06, "loss": 0.0351, "step": 14509 }, { "epoch": 2.43, "grad_norm": 0.42273473739624023, "learning_rate": 1.8543674415320878e-06, "loss": 0.0274, "step": 14510 }, { "epoch": 2.43, "grad_norm": 0.3569786846637726, "learning_rate": 1.8533201214477348e-06, "loss": 0.0228, "step": 14511 }, { "epoch": 2.43, "grad_norm": 0.34641939401626587, "learning_rate": 1.8522730669962109e-06, "loss": 0.0327, "step": 14512 }, { "epoch": 2.43, "grad_norm": 0.4168771505355835, "learning_rate": 1.8512262782116509e-06, "loss": 0.0345, "step": 14513 }, { "epoch": 2.43, "grad_norm": 0.32318738102912903, "learning_rate": 1.850179755128192e-06, "loss": 0.0304, "step": 14514 }, { "epoch": 2.43, "grad_norm": 0.4188676178455353, "learning_rate": 1.8491334977799535e-06, "loss": 0.0372, "step": 14515 }, { "epoch": 2.43, "grad_norm": 0.33120015263557434, "learning_rate": 1.8480875062010483e-06, "loss": 0.0263, "step": 14516 }, { "epoch": 2.43, "grad_norm": 0.3857148289680481, "learning_rate": 1.8470417804255892e-06, "loss": 0.0353, "step": 14517 }, { "epoch": 2.43, "grad_norm": 0.6700711250305176, "learning_rate": 1.8459963204876652e-06, "loss": 0.0432, "step": 14518 }, { "epoch": 2.43, "grad_norm": 0.3917219042778015, "learning_rate": 1.8449511264213693e-06, "loss": 0.032, "step": 14519 }, { "epoch": 2.43, "grad_norm": 0.430568128824234, "learning_rate": 1.843906198260783e-06, "loss": 0.0381, "step": 14520 }, { "epoch": 2.43, "grad_norm": 0.39470598101615906, "learning_rate": 1.8428615360399738e-06, "loss": 0.032, "step": 14521 }, { "epoch": 2.43, "grad_norm": 0.2858251631259918, "learning_rate": 1.8418171397930085e-06, "loss": 0.0234, "step": 14522 }, { "epoch": 2.43, "grad_norm": 0.3212665319442749, "learning_rate": 1.8407730095539367e-06, "loss": 0.027, "step": 14523 }, { "epoch": 2.43, "grad_norm": 0.2624427378177643, "learning_rate": 1.8397291453568045e-06, "loss": 0.0235, "step": 14524 }, { "epoch": 2.43, "grad_norm": 0.33468320965766907, "learning_rate": 1.8386855472356501e-06, "loss": 0.0357, "step": 14525 }, { "epoch": 2.43, "grad_norm": 0.37451791763305664, "learning_rate": 1.8376422152244988e-06, "loss": 0.031, "step": 14526 }, { "epoch": 2.43, "grad_norm": 0.30832046270370483, "learning_rate": 1.836599149357372e-06, "loss": 0.02, "step": 14527 }, { "epoch": 2.43, "grad_norm": 0.3753107786178589, "learning_rate": 1.8355563496682804e-06, "loss": 0.0291, "step": 14528 }, { "epoch": 2.43, "grad_norm": 0.4420349597930908, "learning_rate": 1.8345138161912256e-06, "loss": 0.0278, "step": 14529 }, { "epoch": 2.43, "grad_norm": 0.2866004407405853, "learning_rate": 1.8334715489601985e-06, "loss": 0.0163, "step": 14530 }, { "epoch": 2.43, "grad_norm": 0.34688249230384827, "learning_rate": 1.832429548009187e-06, "loss": 0.0243, "step": 14531 }, { "epoch": 2.43, "grad_norm": 0.40720194578170776, "learning_rate": 1.8313878133721641e-06, "loss": 0.0294, "step": 14532 }, { "epoch": 2.43, "grad_norm": 0.5131068229675293, "learning_rate": 1.830346345083096e-06, "loss": 0.0432, "step": 14533 }, { "epoch": 2.43, "grad_norm": 0.3947051167488098, "learning_rate": 1.8293051431759435e-06, "loss": 0.0449, "step": 14534 }, { "epoch": 2.43, "grad_norm": 0.4687630534172058, "learning_rate": 1.8282642076846546e-06, "loss": 0.0408, "step": 14535 }, { "epoch": 2.43, "grad_norm": 0.41209349036216736, "learning_rate": 1.8272235386431736e-06, "loss": 0.0309, "step": 14536 }, { "epoch": 2.43, "grad_norm": 0.3180084824562073, "learning_rate": 1.826183136085431e-06, "loss": 0.0384, "step": 14537 }, { "epoch": 2.43, "grad_norm": 0.4013671875, "learning_rate": 1.8251430000453485e-06, "loss": 0.0356, "step": 14538 }, { "epoch": 2.43, "grad_norm": 0.3894853889942169, "learning_rate": 1.8241031305568434e-06, "loss": 0.0359, "step": 14539 }, { "epoch": 2.43, "grad_norm": 0.7709190249443054, "learning_rate": 1.8230635276538223e-06, "loss": 0.0196, "step": 14540 }, { "epoch": 2.43, "grad_norm": 0.4668799638748169, "learning_rate": 1.8220241913701787e-06, "loss": 0.0262, "step": 14541 }, { "epoch": 2.43, "grad_norm": 0.2981237769126892, "learning_rate": 1.8209851217398044e-06, "loss": 0.032, "step": 14542 }, { "epoch": 2.43, "grad_norm": 0.5932178497314453, "learning_rate": 1.8199463187965826e-06, "loss": 0.034, "step": 14543 }, { "epoch": 2.43, "grad_norm": 0.36710333824157715, "learning_rate": 1.8189077825743785e-06, "loss": 0.0412, "step": 14544 }, { "epoch": 2.43, "grad_norm": 0.32887861132621765, "learning_rate": 1.8178695131070612e-06, "loss": 0.0299, "step": 14545 }, { "epoch": 2.43, "grad_norm": 0.4355252981185913, "learning_rate": 1.8168315104284816e-06, "loss": 0.0454, "step": 14546 }, { "epoch": 2.43, "grad_norm": 0.37168270349502563, "learning_rate": 1.815793774572483e-06, "loss": 0.0282, "step": 14547 }, { "epoch": 2.43, "grad_norm": 0.4465083181858063, "learning_rate": 1.8147563055729068e-06, "loss": 0.0354, "step": 14548 }, { "epoch": 2.43, "grad_norm": 0.3647499680519104, "learning_rate": 1.8137191034635759e-06, "loss": 0.0307, "step": 14549 }, { "epoch": 2.43, "grad_norm": 0.40166106820106506, "learning_rate": 1.8126821682783125e-06, "loss": 0.0353, "step": 14550 }, { "epoch": 2.43, "grad_norm": 0.44535890221595764, "learning_rate": 1.8116455000509292e-06, "loss": 0.0376, "step": 14551 }, { "epoch": 2.43, "grad_norm": 0.3037485182285309, "learning_rate": 1.8106090988152225e-06, "loss": 0.0241, "step": 14552 }, { "epoch": 2.43, "grad_norm": 0.35499292612075806, "learning_rate": 1.809572964604992e-06, "loss": 0.0286, "step": 14553 }, { "epoch": 2.43, "grad_norm": 0.35104402899742126, "learning_rate": 1.8085370974540185e-06, "loss": 0.0305, "step": 14554 }, { "epoch": 2.43, "grad_norm": 0.45723363757133484, "learning_rate": 1.8075014973960748e-06, "loss": 0.0381, "step": 14555 }, { "epoch": 2.43, "grad_norm": 0.4541851878166199, "learning_rate": 1.806466164464935e-06, "loss": 0.0425, "step": 14556 }, { "epoch": 2.43, "grad_norm": 0.5306214690208435, "learning_rate": 1.8054310986943502e-06, "loss": 0.0416, "step": 14557 }, { "epoch": 2.43, "grad_norm": 0.5290473699569702, "learning_rate": 1.8043963001180776e-06, "loss": 0.0292, "step": 14558 }, { "epoch": 2.44, "grad_norm": 0.35221144556999207, "learning_rate": 1.8033617687698502e-06, "loss": 0.0336, "step": 14559 }, { "epoch": 2.44, "grad_norm": 0.2973155975341797, "learning_rate": 1.8023275046834066e-06, "loss": 0.0231, "step": 14560 }, { "epoch": 2.44, "grad_norm": 0.3742266893386841, "learning_rate": 1.8012935078924653e-06, "loss": 0.0362, "step": 14561 }, { "epoch": 2.44, "grad_norm": 0.2513907849788666, "learning_rate": 1.8002597784307463e-06, "loss": 0.0263, "step": 14562 }, { "epoch": 2.44, "grad_norm": 0.2881128787994385, "learning_rate": 1.7992263163319501e-06, "loss": 0.0273, "step": 14563 }, { "epoch": 2.44, "grad_norm": 0.229773610830307, "learning_rate": 1.79819312162978e-06, "loss": 0.018, "step": 14564 }, { "epoch": 2.44, "grad_norm": 0.35375162959098816, "learning_rate": 1.797160194357921e-06, "loss": 0.0259, "step": 14565 }, { "epoch": 2.44, "grad_norm": 0.38168051838874817, "learning_rate": 1.7961275345500507e-06, "loss": 0.0361, "step": 14566 }, { "epoch": 2.44, "grad_norm": 0.47991088032722473, "learning_rate": 1.7950951422398432e-06, "loss": 0.0374, "step": 14567 }, { "epoch": 2.44, "grad_norm": 0.37157776951789856, "learning_rate": 1.794063017460963e-06, "loss": 0.0346, "step": 14568 }, { "epoch": 2.44, "grad_norm": 0.27483075857162476, "learning_rate": 1.7930311602470596e-06, "loss": 0.0243, "step": 14569 }, { "epoch": 2.44, "grad_norm": 0.35351231694221497, "learning_rate": 1.7919995706317827e-06, "loss": 0.0175, "step": 14570 }, { "epoch": 2.44, "grad_norm": 0.41498416662216187, "learning_rate": 1.7909682486487657e-06, "loss": 0.0199, "step": 14571 }, { "epoch": 2.44, "grad_norm": 0.2729455232620239, "learning_rate": 1.7899371943316336e-06, "loss": 0.0264, "step": 14572 }, { "epoch": 2.44, "grad_norm": 0.5479966998100281, "learning_rate": 1.7889064077140106e-06, "loss": 0.0388, "step": 14573 }, { "epoch": 2.44, "grad_norm": 0.810272216796875, "learning_rate": 1.7878758888295022e-06, "loss": 0.0389, "step": 14574 }, { "epoch": 2.44, "grad_norm": 0.41607666015625, "learning_rate": 1.7868456377117116e-06, "loss": 0.0353, "step": 14575 }, { "epoch": 2.44, "grad_norm": 0.3765343725681305, "learning_rate": 1.785815654394234e-06, "loss": 0.0357, "step": 14576 }, { "epoch": 2.44, "grad_norm": 0.3268129527568817, "learning_rate": 1.7847859389106482e-06, "loss": 0.0374, "step": 14577 }, { "epoch": 2.44, "grad_norm": 0.39676281809806824, "learning_rate": 1.783756491294535e-06, "loss": 0.0396, "step": 14578 }, { "epoch": 2.44, "grad_norm": 0.3245481848716736, "learning_rate": 1.7827273115794586e-06, "loss": 0.0296, "step": 14579 }, { "epoch": 2.44, "grad_norm": 0.547028124332428, "learning_rate": 1.7816983997989723e-06, "loss": 0.0445, "step": 14580 }, { "epoch": 2.44, "grad_norm": 0.33354485034942627, "learning_rate": 1.7806697559866327e-06, "loss": 0.0368, "step": 14581 }, { "epoch": 2.44, "grad_norm": 0.521562933921814, "learning_rate": 1.7796413801759726e-06, "loss": 0.0331, "step": 14582 }, { "epoch": 2.44, "grad_norm": 0.2607415020465851, "learning_rate": 1.7786132724005268e-06, "loss": 0.0272, "step": 14583 }, { "epoch": 2.44, "grad_norm": 0.3484896123409271, "learning_rate": 1.7775854326938225e-06, "loss": 0.0251, "step": 14584 }, { "epoch": 2.44, "grad_norm": 0.3445990979671478, "learning_rate": 1.7765578610893685e-06, "loss": 0.0422, "step": 14585 }, { "epoch": 2.44, "grad_norm": 0.3111194372177124, "learning_rate": 1.7755305576206682e-06, "loss": 0.0362, "step": 14586 }, { "epoch": 2.44, "grad_norm": 0.37535032629966736, "learning_rate": 1.7745035223212237e-06, "loss": 0.033, "step": 14587 }, { "epoch": 2.44, "grad_norm": 0.6367189288139343, "learning_rate": 1.7734767552245214e-06, "loss": 0.0379, "step": 14588 }, { "epoch": 2.44, "grad_norm": 0.2691950798034668, "learning_rate": 1.7724502563640356e-06, "loss": 0.0216, "step": 14589 }, { "epoch": 2.44, "grad_norm": 0.43226101994514465, "learning_rate": 1.7714240257732406e-06, "loss": 0.0396, "step": 14590 }, { "epoch": 2.44, "grad_norm": 0.41239824891090393, "learning_rate": 1.7703980634855967e-06, "loss": 0.0299, "step": 14591 }, { "epoch": 2.44, "grad_norm": 0.3256092071533203, "learning_rate": 1.76937236953456e-06, "loss": 0.0293, "step": 14592 }, { "epoch": 2.44, "grad_norm": 0.39442580938339233, "learning_rate": 1.7683469439535728e-06, "loss": 0.0416, "step": 14593 }, { "epoch": 2.44, "grad_norm": 0.5117161870002747, "learning_rate": 1.7673217867760662e-06, "loss": 0.0706, "step": 14594 }, { "epoch": 2.44, "grad_norm": 0.42681795358657837, "learning_rate": 1.7662968980354734e-06, "loss": 0.0335, "step": 14595 }, { "epoch": 2.44, "grad_norm": 0.2858867943286896, "learning_rate": 1.7652722777652087e-06, "loss": 0.0215, "step": 14596 }, { "epoch": 2.44, "grad_norm": 0.41392064094543457, "learning_rate": 1.7642479259986788e-06, "loss": 0.037, "step": 14597 }, { "epoch": 2.44, "grad_norm": 0.3926961421966553, "learning_rate": 1.7632238427692872e-06, "loss": 0.0222, "step": 14598 }, { "epoch": 2.44, "grad_norm": 0.5073891282081604, "learning_rate": 1.7622000281104268e-06, "loss": 0.0347, "step": 14599 }, { "epoch": 2.44, "grad_norm": 0.2960274815559387, "learning_rate": 1.7611764820554756e-06, "loss": 0.0304, "step": 14600 }, { "epoch": 2.44, "grad_norm": 0.3785446286201477, "learning_rate": 1.7601532046378135e-06, "loss": 0.0236, "step": 14601 }, { "epoch": 2.44, "grad_norm": 0.5072503089904785, "learning_rate": 1.7591301958908025e-06, "loss": 0.0473, "step": 14602 }, { "epoch": 2.44, "grad_norm": 0.29685989022254944, "learning_rate": 1.7581074558477961e-06, "loss": 0.0203, "step": 14603 }, { "epoch": 2.44, "grad_norm": 0.32097190618515015, "learning_rate": 1.757084984542149e-06, "loss": 0.0216, "step": 14604 }, { "epoch": 2.44, "grad_norm": 0.3477976322174072, "learning_rate": 1.756062782007193e-06, "loss": 0.0339, "step": 14605 }, { "epoch": 2.44, "grad_norm": 0.302607923746109, "learning_rate": 1.7550408482762616e-06, "loss": 0.0342, "step": 14606 }, { "epoch": 2.44, "grad_norm": 0.38317883014678955, "learning_rate": 1.7540191833826792e-06, "loss": 0.0492, "step": 14607 }, { "epoch": 2.44, "grad_norm": 0.3526923954486847, "learning_rate": 1.7529977873597526e-06, "loss": 0.039, "step": 14608 }, { "epoch": 2.44, "grad_norm": 0.30952194333076477, "learning_rate": 1.7519766602407917e-06, "loss": 0.0212, "step": 14609 }, { "epoch": 2.44, "grad_norm": 0.39590123295783997, "learning_rate": 1.7509558020590888e-06, "loss": 0.0246, "step": 14610 }, { "epoch": 2.44, "grad_norm": 0.36569005250930786, "learning_rate": 1.7499352128479274e-06, "loss": 0.0275, "step": 14611 }, { "epoch": 2.44, "grad_norm": 0.5615814924240112, "learning_rate": 1.7489148926405898e-06, "loss": 0.0424, "step": 14612 }, { "epoch": 2.44, "grad_norm": 0.4576185941696167, "learning_rate": 1.7478948414703435e-06, "loss": 0.0411, "step": 14613 }, { "epoch": 2.44, "grad_norm": 0.3655373454093933, "learning_rate": 1.7468750593704454e-06, "loss": 0.0247, "step": 14614 }, { "epoch": 2.44, "grad_norm": 0.2358165681362152, "learning_rate": 1.7458555463741488e-06, "loss": 0.0194, "step": 14615 }, { "epoch": 2.44, "grad_norm": 0.38961687684059143, "learning_rate": 1.7448363025146998e-06, "loss": 0.0349, "step": 14616 }, { "epoch": 2.44, "grad_norm": 0.4114549160003662, "learning_rate": 1.7438173278253267e-06, "loss": 0.0479, "step": 14617 }, { "epoch": 2.44, "grad_norm": 0.268973171710968, "learning_rate": 1.742798622339259e-06, "loss": 0.0222, "step": 14618 }, { "epoch": 2.45, "grad_norm": 0.41776153445243835, "learning_rate": 1.7417801860897087e-06, "loss": 0.0344, "step": 14619 }, { "epoch": 2.45, "grad_norm": 0.4886661171913147, "learning_rate": 1.7407620191098874e-06, "loss": 0.0258, "step": 14620 }, { "epoch": 2.45, "grad_norm": 0.3517794907093048, "learning_rate": 1.7397441214329913e-06, "loss": 0.0286, "step": 14621 }, { "epoch": 2.45, "grad_norm": 0.35447967052459717, "learning_rate": 1.7387264930922087e-06, "loss": 0.0254, "step": 14622 }, { "epoch": 2.45, "grad_norm": 0.3220924735069275, "learning_rate": 1.7377091341207219e-06, "loss": 0.0444, "step": 14623 }, { "epoch": 2.45, "grad_norm": 0.2624816596508026, "learning_rate": 1.7366920445517056e-06, "loss": 0.0252, "step": 14624 }, { "epoch": 2.45, "grad_norm": 0.4209095239639282, "learning_rate": 1.7356752244183196e-06, "loss": 0.0345, "step": 14625 }, { "epoch": 2.45, "grad_norm": 0.4029780328273773, "learning_rate": 1.734658673753723e-06, "loss": 0.0279, "step": 14626 }, { "epoch": 2.45, "grad_norm": 0.28787457942962646, "learning_rate": 1.7336423925910594e-06, "loss": 0.0274, "step": 14627 }, { "epoch": 2.45, "grad_norm": 0.4069507122039795, "learning_rate": 1.7326263809634637e-06, "loss": 0.0299, "step": 14628 }, { "epoch": 2.45, "grad_norm": 0.3202923834323883, "learning_rate": 1.7316106389040676e-06, "loss": 0.0373, "step": 14629 }, { "epoch": 2.45, "grad_norm": 0.603587806224823, "learning_rate": 1.7305951664459874e-06, "loss": 0.0546, "step": 14630 }, { "epoch": 2.45, "grad_norm": 0.35452771186828613, "learning_rate": 1.7295799636223354e-06, "loss": 0.039, "step": 14631 }, { "epoch": 2.45, "grad_norm": 0.4252570569515228, "learning_rate": 1.728565030466216e-06, "loss": 0.027, "step": 14632 }, { "epoch": 2.45, "grad_norm": 0.22021928429603577, "learning_rate": 1.7275503670107186e-06, "loss": 0.0185, "step": 14633 }, { "epoch": 2.45, "grad_norm": 0.3821668326854706, "learning_rate": 1.7265359732889319e-06, "loss": 0.0285, "step": 14634 }, { "epoch": 2.45, "grad_norm": 0.41260841488838196, "learning_rate": 1.7255218493339288e-06, "loss": 0.0265, "step": 14635 }, { "epoch": 2.45, "grad_norm": 0.30885499715805054, "learning_rate": 1.7245079951787736e-06, "loss": 0.0235, "step": 14636 }, { "epoch": 2.45, "grad_norm": 0.41800934076309204, "learning_rate": 1.7234944108565298e-06, "loss": 0.0384, "step": 14637 }, { "epoch": 2.45, "grad_norm": 0.35401448607444763, "learning_rate": 1.7224810964002403e-06, "loss": 0.0243, "step": 14638 }, { "epoch": 2.45, "grad_norm": 0.5295811295509338, "learning_rate": 1.7214680518429494e-06, "loss": 0.0265, "step": 14639 }, { "epoch": 2.45, "grad_norm": 0.6434028744697571, "learning_rate": 1.7204552772176908e-06, "loss": 0.0429, "step": 14640 }, { "epoch": 2.45, "grad_norm": 0.4362247884273529, "learning_rate": 1.7194427725574846e-06, "loss": 0.0442, "step": 14641 }, { "epoch": 2.45, "grad_norm": 0.25642701983451843, "learning_rate": 1.7184305378953437e-06, "loss": 0.0213, "step": 14642 }, { "epoch": 2.45, "grad_norm": 0.34472131729125977, "learning_rate": 1.7174185732642755e-06, "loss": 0.0235, "step": 14643 }, { "epoch": 2.45, "grad_norm": 0.35784444212913513, "learning_rate": 1.7164068786972754e-06, "loss": 0.0365, "step": 14644 }, { "epoch": 2.45, "grad_norm": 0.28956663608551025, "learning_rate": 1.7153954542273288e-06, "loss": 0.0254, "step": 14645 }, { "epoch": 2.45, "grad_norm": 0.402317076921463, "learning_rate": 1.714384299887417e-06, "loss": 0.0374, "step": 14646 }, { "epoch": 2.45, "grad_norm": 0.30263566970825195, "learning_rate": 1.7133734157105097e-06, "loss": 0.0246, "step": 14647 }, { "epoch": 2.45, "grad_norm": 0.4873334467411041, "learning_rate": 1.7123628017295702e-06, "loss": 0.0572, "step": 14648 }, { "epoch": 2.45, "grad_norm": 0.4151243269443512, "learning_rate": 1.7113524579775476e-06, "loss": 0.0504, "step": 14649 }, { "epoch": 2.45, "grad_norm": 0.3399263918399811, "learning_rate": 1.7103423844873857e-06, "loss": 0.0279, "step": 14650 }, { "epoch": 2.45, "grad_norm": 0.43874308466911316, "learning_rate": 1.7093325812920214e-06, "loss": 0.027, "step": 14651 }, { "epoch": 2.45, "grad_norm": 0.43416064977645874, "learning_rate": 1.7083230484243796e-06, "loss": 0.032, "step": 14652 }, { "epoch": 2.45, "grad_norm": 0.4922925531864166, "learning_rate": 1.7073137859173749e-06, "loss": 0.0336, "step": 14653 }, { "epoch": 2.45, "grad_norm": 0.3447464108467102, "learning_rate": 1.7063047938039168e-06, "loss": 0.044, "step": 14654 }, { "epoch": 2.45, "grad_norm": 0.34035900235176086, "learning_rate": 1.7052960721169087e-06, "loss": 0.0261, "step": 14655 }, { "epoch": 2.45, "grad_norm": 0.8429017663002014, "learning_rate": 1.7042876208892356e-06, "loss": 0.0555, "step": 14656 }, { "epoch": 2.45, "grad_norm": 0.44924041628837585, "learning_rate": 1.7032794401537844e-06, "loss": 0.0454, "step": 14657 }, { "epoch": 2.45, "grad_norm": 0.33558592200279236, "learning_rate": 1.7022715299434257e-06, "loss": 0.038, "step": 14658 }, { "epoch": 2.45, "grad_norm": 0.3199564814567566, "learning_rate": 1.7012638902910217e-06, "loss": 0.026, "step": 14659 }, { "epoch": 2.45, "grad_norm": 0.36969494819641113, "learning_rate": 1.700256521229432e-06, "loss": 0.029, "step": 14660 }, { "epoch": 2.45, "grad_norm": 0.3522758185863495, "learning_rate": 1.699249422791498e-06, "loss": 0.0291, "step": 14661 }, { "epoch": 2.45, "grad_norm": 0.2635214328765869, "learning_rate": 1.6982425950100623e-06, "loss": 0.0232, "step": 14662 }, { "epoch": 2.45, "grad_norm": 0.38989782333374023, "learning_rate": 1.697236037917951e-06, "loss": 0.0293, "step": 14663 }, { "epoch": 2.45, "grad_norm": 0.3094763457775116, "learning_rate": 1.696229751547983e-06, "loss": 0.0306, "step": 14664 }, { "epoch": 2.45, "grad_norm": 0.3469700813293457, "learning_rate": 1.695223735932975e-06, "loss": 0.0237, "step": 14665 }, { "epoch": 2.45, "grad_norm": 0.4915880858898163, "learning_rate": 1.6942179911057255e-06, "loss": 0.0431, "step": 14666 }, { "epoch": 2.45, "grad_norm": 0.3719446063041687, "learning_rate": 1.6932125170990265e-06, "loss": 0.0269, "step": 14667 }, { "epoch": 2.45, "grad_norm": 0.33385103940963745, "learning_rate": 1.6922073139456674e-06, "loss": 0.0183, "step": 14668 }, { "epoch": 2.45, "grad_norm": 0.49067243933677673, "learning_rate": 1.6912023816784207e-06, "loss": 0.0302, "step": 14669 }, { "epoch": 2.45, "grad_norm": 0.49121445417404175, "learning_rate": 1.6901977203300524e-06, "loss": 0.0271, "step": 14670 }, { "epoch": 2.45, "grad_norm": 0.4072421193122864, "learning_rate": 1.6891933299333218e-06, "loss": 0.0198, "step": 14671 }, { "epoch": 2.45, "grad_norm": 0.3716410994529724, "learning_rate": 1.6881892105209829e-06, "loss": 0.0406, "step": 14672 }, { "epoch": 2.45, "grad_norm": 0.3816707134246826, "learning_rate": 1.6871853621257695e-06, "loss": 0.0385, "step": 14673 }, { "epoch": 2.45, "grad_norm": 0.4910058081150055, "learning_rate": 1.6861817847804185e-06, "loss": 0.0383, "step": 14674 }, { "epoch": 2.45, "grad_norm": 0.27190160751342773, "learning_rate": 1.6851784785176483e-06, "loss": 0.0261, "step": 14675 }, { "epoch": 2.45, "grad_norm": 0.3897477090358734, "learning_rate": 1.6841754433701785e-06, "loss": 0.031, "step": 14676 }, { "epoch": 2.45, "grad_norm": 0.32950425148010254, "learning_rate": 1.6831726793707105e-06, "loss": 0.0298, "step": 14677 }, { "epoch": 2.45, "grad_norm": 0.3847734034061432, "learning_rate": 1.6821701865519392e-06, "loss": 0.0389, "step": 14678 }, { "epoch": 2.46, "grad_norm": 0.5776837468147278, "learning_rate": 1.681167964946554e-06, "loss": 0.0312, "step": 14679 }, { "epoch": 2.46, "grad_norm": 0.3857211470603943, "learning_rate": 1.6801660145872367e-06, "loss": 0.0426, "step": 14680 }, { "epoch": 2.46, "grad_norm": 0.3302541971206665, "learning_rate": 1.6791643355066522e-06, "loss": 0.0275, "step": 14681 }, { "epoch": 2.46, "grad_norm": 0.3838180601596832, "learning_rate": 1.6781629277374656e-06, "loss": 0.0349, "step": 14682 }, { "epoch": 2.46, "grad_norm": 0.3375135660171509, "learning_rate": 1.6771617913123273e-06, "loss": 0.0214, "step": 14683 }, { "epoch": 2.46, "grad_norm": 0.2905326783657074, "learning_rate": 1.6761609262638777e-06, "loss": 0.0288, "step": 14684 }, { "epoch": 2.46, "grad_norm": 0.29790034890174866, "learning_rate": 1.675160332624758e-06, "loss": 0.019, "step": 14685 }, { "epoch": 2.46, "grad_norm": 0.48353302478790283, "learning_rate": 1.6741600104275856e-06, "loss": 0.052, "step": 14686 }, { "epoch": 2.46, "grad_norm": 0.2890738844871521, "learning_rate": 1.673159959704983e-06, "loss": 0.0256, "step": 14687 }, { "epoch": 2.46, "grad_norm": 0.2746705114841461, "learning_rate": 1.6721601804895592e-06, "loss": 0.024, "step": 14688 }, { "epoch": 2.46, "grad_norm": 0.3500906825065613, "learning_rate": 1.6711606728139074e-06, "loss": 0.0325, "step": 14689 }, { "epoch": 2.46, "grad_norm": 0.31195762753486633, "learning_rate": 1.6701614367106244e-06, "loss": 0.029, "step": 14690 }, { "epoch": 2.46, "grad_norm": 0.40030962228775024, "learning_rate": 1.669162472212288e-06, "loss": 0.0354, "step": 14691 }, { "epoch": 2.46, "grad_norm": 0.3518904149532318, "learning_rate": 1.6681637793514682e-06, "loss": 0.0246, "step": 14692 }, { "epoch": 2.46, "grad_norm": 0.2759135365486145, "learning_rate": 1.6671653581607349e-06, "loss": 0.0256, "step": 14693 }, { "epoch": 2.46, "grad_norm": 0.4032053053379059, "learning_rate": 1.6661672086726366e-06, "loss": 0.0301, "step": 14694 }, { "epoch": 2.46, "grad_norm": 0.39055338501930237, "learning_rate": 1.6651693309197214e-06, "loss": 0.032, "step": 14695 }, { "epoch": 2.46, "grad_norm": 0.48499795794487, "learning_rate": 1.6641717249345302e-06, "loss": 0.0402, "step": 14696 }, { "epoch": 2.46, "grad_norm": 0.3734091520309448, "learning_rate": 1.6631743907495878e-06, "loss": 0.0275, "step": 14697 }, { "epoch": 2.46, "grad_norm": 0.3763171136379242, "learning_rate": 1.6621773283974118e-06, "loss": 0.0263, "step": 14698 }, { "epoch": 2.46, "grad_norm": 0.6184273958206177, "learning_rate": 1.6611805379105173e-06, "loss": 0.0414, "step": 14699 }, { "epoch": 2.46, "grad_norm": 0.3357696831226349, "learning_rate": 1.6601840193214026e-06, "loss": 0.0323, "step": 14700 }, { "epoch": 2.46, "grad_norm": 0.38400962948799133, "learning_rate": 1.6591877726625594e-06, "loss": 0.022, "step": 14701 }, { "epoch": 2.46, "grad_norm": 0.34656405448913574, "learning_rate": 1.658191797966473e-06, "loss": 0.022, "step": 14702 }, { "epoch": 2.46, "grad_norm": 0.33583593368530273, "learning_rate": 1.6571960952656197e-06, "loss": 0.0272, "step": 14703 }, { "epoch": 2.46, "grad_norm": 0.3279440104961395, "learning_rate": 1.6562006645924667e-06, "loss": 0.0289, "step": 14704 }, { "epoch": 2.46, "grad_norm": 0.24169108271598816, "learning_rate": 1.655205505979468e-06, "loss": 0.0241, "step": 14705 }, { "epoch": 2.46, "grad_norm": 0.39917874336242676, "learning_rate": 1.6542106194590724e-06, "loss": 0.0391, "step": 14706 }, { "epoch": 2.46, "grad_norm": 0.36369141936302185, "learning_rate": 1.6532160050637225e-06, "loss": 0.0299, "step": 14707 }, { "epoch": 2.46, "grad_norm": 0.36186760663986206, "learning_rate": 1.652221662825847e-06, "loss": 0.023, "step": 14708 }, { "epoch": 2.46, "grad_norm": 0.47867801785469055, "learning_rate": 1.6512275927778643e-06, "loss": 0.0509, "step": 14709 }, { "epoch": 2.46, "grad_norm": 0.4894867539405823, "learning_rate": 1.6502337949521917e-06, "loss": 0.0319, "step": 14710 }, { "epoch": 2.46, "grad_norm": 0.38580381870269775, "learning_rate": 1.6492402693812337e-06, "loss": 0.0315, "step": 14711 }, { "epoch": 2.46, "grad_norm": 0.3833808898925781, "learning_rate": 1.6482470160973817e-06, "loss": 0.0315, "step": 14712 }, { "epoch": 2.46, "grad_norm": 0.3200331926345825, "learning_rate": 1.6472540351330258e-06, "loss": 0.0264, "step": 14713 }, { "epoch": 2.46, "grad_norm": 0.3829513490200043, "learning_rate": 1.646261326520543e-06, "loss": 0.0246, "step": 14714 }, { "epoch": 2.46, "grad_norm": 0.3730746805667877, "learning_rate": 1.6452688902922975e-06, "loss": 0.0228, "step": 14715 }, { "epoch": 2.46, "grad_norm": 0.2734963595867157, "learning_rate": 1.6442767264806547e-06, "loss": 0.0255, "step": 14716 }, { "epoch": 2.46, "grad_norm": 0.43135327100753784, "learning_rate": 1.6432848351179599e-06, "loss": 0.0273, "step": 14717 }, { "epoch": 2.46, "grad_norm": 0.38199740648269653, "learning_rate": 1.64229321623656e-06, "loss": 0.0337, "step": 14718 }, { "epoch": 2.46, "grad_norm": 0.4362315535545349, "learning_rate": 1.6413018698687832e-06, "loss": 0.0397, "step": 14719 }, { "epoch": 2.46, "grad_norm": 0.4309002459049225, "learning_rate": 1.6403107960469567e-06, "loss": 0.0317, "step": 14720 }, { "epoch": 2.46, "grad_norm": 0.4051857590675354, "learning_rate": 1.6393199948033966e-06, "loss": 0.0281, "step": 14721 }, { "epoch": 2.46, "grad_norm": 0.38123053312301636, "learning_rate": 1.6383294661704086e-06, "loss": 0.032, "step": 14722 }, { "epoch": 2.46, "grad_norm": 0.367337703704834, "learning_rate": 1.6373392101802864e-06, "loss": 0.0222, "step": 14723 }, { "epoch": 2.46, "grad_norm": 0.27608823776245117, "learning_rate": 1.6363492268653248e-06, "loss": 0.0191, "step": 14724 }, { "epoch": 2.46, "grad_norm": 0.2778601050376892, "learning_rate": 1.6353595162577995e-06, "loss": 0.0228, "step": 14725 }, { "epoch": 2.46, "grad_norm": 0.3543635308742523, "learning_rate": 1.6343700783899785e-06, "loss": 0.0285, "step": 14726 }, { "epoch": 2.46, "grad_norm": 0.3887351155281067, "learning_rate": 1.633380913294128e-06, "loss": 0.0277, "step": 14727 }, { "epoch": 2.46, "grad_norm": 0.43972331285476685, "learning_rate": 1.6323920210025034e-06, "loss": 0.0345, "step": 14728 }, { "epoch": 2.46, "grad_norm": 0.35709288716316223, "learning_rate": 1.6314034015473412e-06, "loss": 0.0345, "step": 14729 }, { "epoch": 2.46, "grad_norm": 0.4036135673522949, "learning_rate": 1.6304150549608844e-06, "loss": 0.0325, "step": 14730 }, { "epoch": 2.46, "grad_norm": 0.3757403790950775, "learning_rate": 1.6294269812753528e-06, "loss": 0.0303, "step": 14731 }, { "epoch": 2.46, "grad_norm": 0.2983221411705017, "learning_rate": 1.6284391805229693e-06, "loss": 0.0255, "step": 14732 }, { "epoch": 2.46, "grad_norm": 0.48286810517311096, "learning_rate": 1.6274516527359408e-06, "loss": 0.0368, "step": 14733 }, { "epoch": 2.46, "grad_norm": 0.31279146671295166, "learning_rate": 1.6264643979464623e-06, "loss": 0.0294, "step": 14734 }, { "epoch": 2.46, "grad_norm": 0.43973031640052795, "learning_rate": 1.6254774161867282e-06, "loss": 0.0336, "step": 14735 }, { "epoch": 2.46, "grad_norm": 0.46362343430519104, "learning_rate": 1.6244907074889226e-06, "loss": 0.0352, "step": 14736 }, { "epoch": 2.46, "grad_norm": 0.2927022874355316, "learning_rate": 1.6235042718852135e-06, "loss": 0.0242, "step": 14737 }, { "epoch": 2.46, "grad_norm": 0.4473968744277954, "learning_rate": 1.6225181094077702e-06, "loss": 0.0417, "step": 14738 }, { "epoch": 2.47, "grad_norm": 0.446242094039917, "learning_rate": 1.6215322200887452e-06, "loss": 0.039, "step": 14739 }, { "epoch": 2.47, "grad_norm": 0.47780993580818176, "learning_rate": 1.6205466039602812e-06, "loss": 0.0377, "step": 14740 }, { "epoch": 2.47, "grad_norm": 0.2434873729944229, "learning_rate": 1.619561261054522e-06, "loss": 0.0307, "step": 14741 }, { "epoch": 2.47, "grad_norm": 0.3733077943325043, "learning_rate": 1.6185761914035892e-06, "loss": 0.0409, "step": 14742 }, { "epoch": 2.47, "grad_norm": 0.5159209370613098, "learning_rate": 1.617591395039606e-06, "loss": 0.0438, "step": 14743 }, { "epoch": 2.47, "grad_norm": 0.42181432247161865, "learning_rate": 1.616606871994686e-06, "loss": 0.0416, "step": 14744 }, { "epoch": 2.47, "grad_norm": 0.39308813214302063, "learning_rate": 1.615622622300923e-06, "loss": 0.0205, "step": 14745 }, { "epoch": 2.47, "grad_norm": 1.0901765823364258, "learning_rate": 1.6146386459904183e-06, "loss": 0.0494, "step": 14746 }, { "epoch": 2.47, "grad_norm": 0.31932786107063293, "learning_rate": 1.6136549430952508e-06, "loss": 0.0186, "step": 14747 }, { "epoch": 2.47, "grad_norm": 0.4344426393508911, "learning_rate": 1.6126715136474935e-06, "loss": 0.0424, "step": 14748 }, { "epoch": 2.47, "grad_norm": 0.3126676082611084, "learning_rate": 1.6116883576792185e-06, "loss": 0.0275, "step": 14749 }, { "epoch": 2.47, "grad_norm": 0.2628202438354492, "learning_rate": 1.610705475222476e-06, "loss": 0.0291, "step": 14750 }, { "epoch": 2.47, "grad_norm": 0.4690397381782532, "learning_rate": 1.6097228663093178e-06, "loss": 0.0399, "step": 14751 }, { "epoch": 2.47, "grad_norm": 0.30767932534217834, "learning_rate": 1.6087405309717852e-06, "loss": 0.0192, "step": 14752 }, { "epoch": 2.47, "grad_norm": 0.39104923605918884, "learning_rate": 1.6077584692419057e-06, "loss": 0.0367, "step": 14753 }, { "epoch": 2.47, "grad_norm": 0.3372194170951843, "learning_rate": 1.6067766811516983e-06, "loss": 0.0282, "step": 14754 }, { "epoch": 2.47, "grad_norm": 0.43271195888519287, "learning_rate": 1.6057951667331817e-06, "loss": 0.0297, "step": 14755 }, { "epoch": 2.47, "grad_norm": 0.5898579955101013, "learning_rate": 1.6048139260183548e-06, "loss": 0.0539, "step": 14756 }, { "epoch": 2.47, "grad_norm": 0.385688841342926, "learning_rate": 1.603832959039211e-06, "loss": 0.023, "step": 14757 }, { "epoch": 2.47, "grad_norm": 0.4388448894023895, "learning_rate": 1.602852265827739e-06, "loss": 0.0414, "step": 14758 }, { "epoch": 2.47, "grad_norm": 0.3991609513759613, "learning_rate": 1.6018718464159144e-06, "loss": 0.0344, "step": 14759 }, { "epoch": 2.47, "grad_norm": 0.3042050898075104, "learning_rate": 1.6008917008357072e-06, "loss": 0.0313, "step": 14760 }, { "epoch": 2.47, "grad_norm": 0.4103690981864929, "learning_rate": 1.5999118291190752e-06, "loss": 0.0305, "step": 14761 }, { "epoch": 2.47, "grad_norm": 0.44459617137908936, "learning_rate": 1.598932231297966e-06, "loss": 0.0451, "step": 14762 }, { "epoch": 2.47, "grad_norm": 0.39994508028030396, "learning_rate": 1.5979529074043233e-06, "loss": 0.0287, "step": 14763 }, { "epoch": 2.47, "grad_norm": 0.472721129655838, "learning_rate": 1.5969738574700788e-06, "loss": 0.0382, "step": 14764 }, { "epoch": 2.47, "grad_norm": 0.41099295020103455, "learning_rate": 1.595995081527153e-06, "loss": 0.0416, "step": 14765 }, { "epoch": 2.47, "grad_norm": 0.38251209259033203, "learning_rate": 1.5950165796074612e-06, "loss": 0.0328, "step": 14766 }, { "epoch": 2.47, "grad_norm": 0.438111275434494, "learning_rate": 1.5940383517429125e-06, "loss": 0.0366, "step": 14767 }, { "epoch": 2.47, "grad_norm": 0.37258607149124146, "learning_rate": 1.593060397965399e-06, "loss": 0.0196, "step": 14768 }, { "epoch": 2.47, "grad_norm": 0.33734753727912903, "learning_rate": 1.592082718306811e-06, "loss": 0.022, "step": 14769 }, { "epoch": 2.47, "grad_norm": 0.2551635205745697, "learning_rate": 1.5911053127990262e-06, "loss": 0.0187, "step": 14770 }, { "epoch": 2.47, "grad_norm": 0.5281091928482056, "learning_rate": 1.59012818147391e-06, "loss": 0.0463, "step": 14771 }, { "epoch": 2.47, "grad_norm": 0.42586782574653625, "learning_rate": 1.5891513243633295e-06, "loss": 0.0342, "step": 14772 }, { "epoch": 2.47, "grad_norm": 0.5302169919013977, "learning_rate": 1.5881747414991312e-06, "loss": 0.0406, "step": 14773 }, { "epoch": 2.47, "grad_norm": 0.3318691551685333, "learning_rate": 1.587198432913163e-06, "loss": 0.0297, "step": 14774 }, { "epoch": 2.47, "grad_norm": 0.3409164249897003, "learning_rate": 1.5862223986372516e-06, "loss": 0.04, "step": 14775 }, { "epoch": 2.47, "grad_norm": 0.3170585334300995, "learning_rate": 1.5852466387032274e-06, "loss": 0.0428, "step": 14776 }, { "epoch": 2.47, "grad_norm": 0.2918642461299896, "learning_rate": 1.5842711531429066e-06, "loss": 0.0245, "step": 14777 }, { "epoch": 2.47, "grad_norm": 0.357067734003067, "learning_rate": 1.5832959419880944e-06, "loss": 0.0321, "step": 14778 }, { "epoch": 2.47, "grad_norm": 0.4812273383140564, "learning_rate": 1.582321005270586e-06, "loss": 0.044, "step": 14779 }, { "epoch": 2.47, "grad_norm": 0.49140965938568115, "learning_rate": 1.5813463430221754e-06, "loss": 0.0452, "step": 14780 }, { "epoch": 2.47, "grad_norm": 0.37052488327026367, "learning_rate": 1.580371955274641e-06, "loss": 0.0297, "step": 14781 }, { "epoch": 2.47, "grad_norm": 0.2795991003513336, "learning_rate": 1.579397842059751e-06, "loss": 0.0285, "step": 14782 }, { "epoch": 2.47, "grad_norm": 0.38007092475891113, "learning_rate": 1.5784240034092702e-06, "loss": 0.0285, "step": 14783 }, { "epoch": 2.47, "grad_norm": 0.45233774185180664, "learning_rate": 1.5774504393549539e-06, "loss": 0.0416, "step": 14784 }, { "epoch": 2.47, "grad_norm": 0.25967150926589966, "learning_rate": 1.5764771499285426e-06, "loss": 0.0352, "step": 14785 }, { "epoch": 2.47, "grad_norm": 0.4054120182991028, "learning_rate": 1.5755041351617738e-06, "loss": 0.0337, "step": 14786 }, { "epoch": 2.47, "grad_norm": 0.4465443193912506, "learning_rate": 1.5745313950863717e-06, "loss": 0.0302, "step": 14787 }, { "epoch": 2.47, "grad_norm": 0.37233370542526245, "learning_rate": 1.573558929734058e-06, "loss": 0.0404, "step": 14788 }, { "epoch": 2.47, "grad_norm": 0.43089303374290466, "learning_rate": 1.572586739136539e-06, "loss": 0.0348, "step": 14789 }, { "epoch": 2.47, "grad_norm": 0.2538355886936188, "learning_rate": 1.57161482332551e-06, "loss": 0.0226, "step": 14790 }, { "epoch": 2.47, "grad_norm": 0.31122368574142456, "learning_rate": 1.5706431823326663e-06, "loss": 0.0276, "step": 14791 }, { "epoch": 2.47, "grad_norm": 0.5216047763824463, "learning_rate": 1.5696718161896895e-06, "loss": 0.0419, "step": 14792 }, { "epoch": 2.47, "grad_norm": 0.32948675751686096, "learning_rate": 1.5687007249282494e-06, "loss": 0.0332, "step": 14793 }, { "epoch": 2.47, "grad_norm": 0.299132376909256, "learning_rate": 1.5677299085800146e-06, "loss": 0.0215, "step": 14794 }, { "epoch": 2.47, "grad_norm": 0.47113797068595886, "learning_rate": 1.5667593671766357e-06, "loss": 0.0421, "step": 14795 }, { "epoch": 2.47, "grad_norm": 0.3354097604751587, "learning_rate": 1.5657891007497573e-06, "loss": 0.0267, "step": 14796 }, { "epoch": 2.47, "grad_norm": 0.37208840250968933, "learning_rate": 1.5648191093310205e-06, "loss": 0.0257, "step": 14797 }, { "epoch": 2.47, "grad_norm": 0.4342041611671448, "learning_rate": 1.5638493929520483e-06, "loss": 0.0334, "step": 14798 }, { "epoch": 2.48, "grad_norm": 0.3764127492904663, "learning_rate": 1.5628799516444615e-06, "loss": 0.0474, "step": 14799 }, { "epoch": 2.48, "grad_norm": 0.4007548391819, "learning_rate": 1.561910785439873e-06, "loss": 0.0391, "step": 14800 }, { "epoch": 2.48, "grad_norm": 0.31355202198028564, "learning_rate": 1.5609418943698795e-06, "loss": 0.0315, "step": 14801 }, { "epoch": 2.48, "grad_norm": 0.35579222440719604, "learning_rate": 1.559973278466077e-06, "loss": 0.0251, "step": 14802 }, { "epoch": 2.48, "grad_norm": 0.42878618836402893, "learning_rate": 1.5590049377600459e-06, "loss": 0.0351, "step": 14803 }, { "epoch": 2.48, "grad_norm": 0.5486866235733032, "learning_rate": 1.5580368722833572e-06, "loss": 0.0383, "step": 14804 }, { "epoch": 2.48, "grad_norm": 0.46873220801353455, "learning_rate": 1.5570690820675815e-06, "loss": 0.0282, "step": 14805 }, { "epoch": 2.48, "grad_norm": 0.4661944806575775, "learning_rate": 1.5561015671442714e-06, "loss": 0.0441, "step": 14806 }, { "epoch": 2.48, "grad_norm": 0.38297006487846375, "learning_rate": 1.5551343275449736e-06, "loss": 0.0273, "step": 14807 }, { "epoch": 2.48, "grad_norm": 0.33563050627708435, "learning_rate": 1.5541673633012299e-06, "loss": 0.0246, "step": 14808 }, { "epoch": 2.48, "grad_norm": 0.40470361709594727, "learning_rate": 1.553200674444567e-06, "loss": 0.045, "step": 14809 }, { "epoch": 2.48, "grad_norm": 0.42667511105537415, "learning_rate": 1.5522342610065033e-06, "loss": 0.0354, "step": 14810 }, { "epoch": 2.48, "grad_norm": 0.4739060401916504, "learning_rate": 1.5512681230185534e-06, "loss": 0.0421, "step": 14811 }, { "epoch": 2.48, "grad_norm": 0.3965274691581726, "learning_rate": 1.5503022605122175e-06, "loss": 0.0302, "step": 14812 }, { "epoch": 2.48, "grad_norm": 0.26732707023620605, "learning_rate": 1.5493366735189873e-06, "loss": 0.0203, "step": 14813 }, { "epoch": 2.48, "grad_norm": 0.3465043008327484, "learning_rate": 1.5483713620703488e-06, "loss": 0.0259, "step": 14814 }, { "epoch": 2.48, "grad_norm": 0.27934589982032776, "learning_rate": 1.5474063261977757e-06, "loss": 0.0222, "step": 14815 }, { "epoch": 2.48, "grad_norm": 0.6743441224098206, "learning_rate": 1.5464415659327392e-06, "loss": 0.028, "step": 14816 }, { "epoch": 2.48, "grad_norm": 0.29074835777282715, "learning_rate": 1.5454770813066923e-06, "loss": 0.0179, "step": 14817 }, { "epoch": 2.48, "grad_norm": 0.4355337619781494, "learning_rate": 1.5445128723510817e-06, "loss": 0.0429, "step": 14818 }, { "epoch": 2.48, "grad_norm": 0.3388345539569855, "learning_rate": 1.5435489390973513e-06, "loss": 0.0477, "step": 14819 }, { "epoch": 2.48, "grad_norm": 0.4991370439529419, "learning_rate": 1.5425852815769283e-06, "loss": 0.0408, "step": 14820 }, { "epoch": 2.48, "grad_norm": 0.38698020577430725, "learning_rate": 1.5416218998212329e-06, "loss": 0.0341, "step": 14821 }, { "epoch": 2.48, "grad_norm": 0.5974956750869751, "learning_rate": 1.54065879386168e-06, "loss": 0.0409, "step": 14822 }, { "epoch": 2.48, "grad_norm": 0.37309008836746216, "learning_rate": 1.5396959637296716e-06, "loss": 0.0195, "step": 14823 }, { "epoch": 2.48, "grad_norm": 0.3457516133785248, "learning_rate": 1.5387334094566009e-06, "loss": 0.026, "step": 14824 }, { "epoch": 2.48, "grad_norm": 0.4468846619129181, "learning_rate": 1.5377711310738574e-06, "loss": 0.039, "step": 14825 }, { "epoch": 2.48, "grad_norm": 0.3493439257144928, "learning_rate": 1.5368091286128139e-06, "loss": 0.038, "step": 14826 }, { "epoch": 2.48, "grad_norm": 0.5223392248153687, "learning_rate": 1.5358474021048364e-06, "loss": 0.0314, "step": 14827 }, { "epoch": 2.48, "grad_norm": 0.4636748135089874, "learning_rate": 1.5348859515812885e-06, "loss": 0.0369, "step": 14828 }, { "epoch": 2.48, "grad_norm": 0.322596937417984, "learning_rate": 1.5339247770735134e-06, "loss": 0.0245, "step": 14829 }, { "epoch": 2.48, "grad_norm": 0.377633273601532, "learning_rate": 1.5329638786128565e-06, "loss": 0.0334, "step": 14830 }, { "epoch": 2.48, "grad_norm": 0.36330854892730713, "learning_rate": 1.5320032562306442e-06, "loss": 0.0223, "step": 14831 }, { "epoch": 2.48, "grad_norm": 0.3869571089744568, "learning_rate": 1.5310429099582013e-06, "loss": 0.0403, "step": 14832 }, { "epoch": 2.48, "grad_norm": 0.3497388958930969, "learning_rate": 1.5300828398268442e-06, "loss": 0.0344, "step": 14833 }, { "epoch": 2.48, "grad_norm": 0.4522838890552521, "learning_rate": 1.5291230458678741e-06, "loss": 0.0247, "step": 14834 }, { "epoch": 2.48, "grad_norm": 0.3676460385322571, "learning_rate": 1.5281635281125839e-06, "loss": 0.0296, "step": 14835 }, { "epoch": 2.48, "grad_norm": 0.48529067635536194, "learning_rate": 1.5272042865922654e-06, "loss": 0.0225, "step": 14836 }, { "epoch": 2.48, "grad_norm": 0.3152408301830292, "learning_rate": 1.5262453213381934e-06, "loss": 0.0242, "step": 14837 }, { "epoch": 2.48, "grad_norm": 0.49517178535461426, "learning_rate": 1.5252866323816328e-06, "loss": 0.0229, "step": 14838 }, { "epoch": 2.48, "grad_norm": 0.41995376348495483, "learning_rate": 1.5243282197538455e-06, "loss": 0.0328, "step": 14839 }, { "epoch": 2.48, "grad_norm": 0.4281250834465027, "learning_rate": 1.5233700834860844e-06, "loss": 0.0436, "step": 14840 }, { "epoch": 2.48, "grad_norm": 0.3967210650444031, "learning_rate": 1.5224122236095861e-06, "loss": 0.04, "step": 14841 }, { "epoch": 2.48, "grad_norm": 0.5848618745803833, "learning_rate": 1.5214546401555885e-06, "loss": 0.0472, "step": 14842 }, { "epoch": 2.48, "grad_norm": 0.3267799913883209, "learning_rate": 1.5204973331553086e-06, "loss": 0.0295, "step": 14843 }, { "epoch": 2.48, "grad_norm": 0.369319349527359, "learning_rate": 1.5195403026399646e-06, "loss": 0.0356, "step": 14844 }, { "epoch": 2.48, "grad_norm": 0.43844541907310486, "learning_rate": 1.5185835486407618e-06, "loss": 0.039, "step": 14845 }, { "epoch": 2.48, "grad_norm": 0.44761624932289124, "learning_rate": 1.517627071188893e-06, "loss": 0.0344, "step": 14846 }, { "epoch": 2.48, "grad_norm": 0.44364479184150696, "learning_rate": 1.5166708703155475e-06, "loss": 0.0256, "step": 14847 }, { "epoch": 2.48, "grad_norm": 0.26455357670783997, "learning_rate": 1.5157149460519049e-06, "loss": 0.0262, "step": 14848 }, { "epoch": 2.48, "grad_norm": 0.46509575843811035, "learning_rate": 1.5147592984291315e-06, "loss": 0.0356, "step": 14849 }, { "epoch": 2.48, "grad_norm": 0.5551848411560059, "learning_rate": 1.513803927478391e-06, "loss": 0.0411, "step": 14850 }, { "epoch": 2.48, "grad_norm": 0.34578004479408264, "learning_rate": 1.512848833230831e-06, "loss": 0.0201, "step": 14851 }, { "epoch": 2.48, "grad_norm": 0.3520047962665558, "learning_rate": 1.5118940157175943e-06, "loss": 0.0272, "step": 14852 }, { "epoch": 2.48, "grad_norm": 0.3711426556110382, "learning_rate": 1.510939474969816e-06, "loss": 0.0296, "step": 14853 }, { "epoch": 2.48, "grad_norm": 0.40329137444496155, "learning_rate": 1.5099852110186165e-06, "loss": 0.0316, "step": 14854 }, { "epoch": 2.48, "grad_norm": 0.4073983132839203, "learning_rate": 1.5090312238951122e-06, "loss": 0.0267, "step": 14855 }, { "epoch": 2.48, "grad_norm": 0.3350594937801361, "learning_rate": 1.5080775136304126e-06, "loss": 0.0267, "step": 14856 }, { "epoch": 2.48, "grad_norm": 0.3334634602069855, "learning_rate": 1.5071240802556085e-06, "loss": 0.0253, "step": 14857 }, { "epoch": 2.49, "grad_norm": 0.5366578102111816, "learning_rate": 1.5061709238017952e-06, "loss": 0.0367, "step": 14858 }, { "epoch": 2.49, "grad_norm": 0.38802021741867065, "learning_rate": 1.5052180443000464e-06, "loss": 0.0274, "step": 14859 }, { "epoch": 2.49, "grad_norm": 0.3763461709022522, "learning_rate": 1.5042654417814307e-06, "loss": 0.0351, "step": 14860 }, { "epoch": 2.49, "grad_norm": 0.3265201151371002, "learning_rate": 1.5033131162770132e-06, "loss": 0.0392, "step": 14861 }, { "epoch": 2.49, "grad_norm": 0.4290786385536194, "learning_rate": 1.5023610678178414e-06, "loss": 0.0346, "step": 14862 }, { "epoch": 2.49, "grad_norm": 0.43018579483032227, "learning_rate": 1.5014092964349603e-06, "loss": 0.0325, "step": 14863 }, { "epoch": 2.49, "grad_norm": 0.4188724756240845, "learning_rate": 1.5004578021594062e-06, "loss": 0.0451, "step": 14864 }, { "epoch": 2.49, "grad_norm": 0.3645205497741699, "learning_rate": 1.4995065850222012e-06, "loss": 0.0258, "step": 14865 }, { "epoch": 2.49, "grad_norm": 0.3654624819755554, "learning_rate": 1.4985556450543593e-06, "loss": 0.0292, "step": 14866 }, { "epoch": 2.49, "grad_norm": 0.47454833984375, "learning_rate": 1.49760498228689e-06, "loss": 0.0569, "step": 14867 }, { "epoch": 2.49, "grad_norm": 0.34095215797424316, "learning_rate": 1.4966545967507885e-06, "loss": 0.0355, "step": 14868 }, { "epoch": 2.49, "grad_norm": 0.3309154808521271, "learning_rate": 1.4957044884770455e-06, "loss": 0.0232, "step": 14869 }, { "epoch": 2.49, "grad_norm": 0.39650240540504456, "learning_rate": 1.4947546574966377e-06, "loss": 0.0309, "step": 14870 }, { "epoch": 2.49, "grad_norm": 0.5565215349197388, "learning_rate": 1.4938051038405387e-06, "loss": 0.0442, "step": 14871 }, { "epoch": 2.49, "grad_norm": 0.3738171458244324, "learning_rate": 1.4928558275397077e-06, "loss": 0.0275, "step": 14872 }, { "epoch": 2.49, "grad_norm": 0.4679484963417053, "learning_rate": 1.4919068286250994e-06, "loss": 0.0299, "step": 14873 }, { "epoch": 2.49, "grad_norm": 0.4518106281757355, "learning_rate": 1.4909581071276525e-06, "loss": 0.0425, "step": 14874 }, { "epoch": 2.49, "grad_norm": 0.31562307476997375, "learning_rate": 1.490009663078308e-06, "loss": 0.0311, "step": 14875 }, { "epoch": 2.49, "grad_norm": 0.4472140371799469, "learning_rate": 1.4890614965079864e-06, "loss": 0.0458, "step": 14876 }, { "epoch": 2.49, "grad_norm": 0.27981308102607727, "learning_rate": 1.4881136074476032e-06, "loss": 0.0269, "step": 14877 }, { "epoch": 2.49, "grad_norm": 0.3410819470882416, "learning_rate": 1.4871659959280694e-06, "loss": 0.0361, "step": 14878 }, { "epoch": 2.49, "grad_norm": 0.28998813033103943, "learning_rate": 1.4862186619802777e-06, "loss": 0.0178, "step": 14879 }, { "epoch": 2.49, "grad_norm": 0.554577648639679, "learning_rate": 1.4852716056351212e-06, "loss": 0.06, "step": 14880 }, { "epoch": 2.49, "grad_norm": 0.32601198554039, "learning_rate": 1.484324826923481e-06, "loss": 0.0287, "step": 14881 }, { "epoch": 2.49, "grad_norm": 0.47961580753326416, "learning_rate": 1.4833783258762246e-06, "loss": 0.0306, "step": 14882 }, { "epoch": 2.49, "grad_norm": 0.2921766936779022, "learning_rate": 1.4824321025242162e-06, "loss": 0.0281, "step": 14883 }, { "epoch": 2.49, "grad_norm": 0.37213510274887085, "learning_rate": 1.4814861568983086e-06, "loss": 0.0221, "step": 14884 }, { "epoch": 2.49, "grad_norm": 0.36179319024086, "learning_rate": 1.4805404890293417e-06, "loss": 0.032, "step": 14885 }, { "epoch": 2.49, "grad_norm": 0.2905271053314209, "learning_rate": 1.4795950989481566e-06, "loss": 0.0245, "step": 14886 }, { "epoch": 2.49, "grad_norm": 0.428853303194046, "learning_rate": 1.478649986685572e-06, "loss": 0.0253, "step": 14887 }, { "epoch": 2.49, "grad_norm": 0.46943557262420654, "learning_rate": 1.4777051522724084e-06, "loss": 0.0389, "step": 14888 }, { "epoch": 2.49, "grad_norm": 0.396767258644104, "learning_rate": 1.476760595739476e-06, "loss": 0.0473, "step": 14889 }, { "epoch": 2.49, "grad_norm": 0.3694845139980316, "learning_rate": 1.4758163171175687e-06, "loss": 0.0255, "step": 14890 }, { "epoch": 2.49, "grad_norm": 0.3223719000816345, "learning_rate": 1.4748723164374756e-06, "loss": 0.0303, "step": 14891 }, { "epoch": 2.49, "grad_norm": 0.3454509377479553, "learning_rate": 1.473928593729982e-06, "loss": 0.032, "step": 14892 }, { "epoch": 2.49, "grad_norm": 0.40304604172706604, "learning_rate": 1.4729851490258552e-06, "loss": 0.034, "step": 14893 }, { "epoch": 2.49, "grad_norm": 0.40206509828567505, "learning_rate": 1.4720419823558562e-06, "loss": 0.0242, "step": 14894 }, { "epoch": 2.49, "grad_norm": 0.3259679079055786, "learning_rate": 1.4710990937507397e-06, "loss": 0.0189, "step": 14895 }, { "epoch": 2.49, "grad_norm": 0.28128117322921753, "learning_rate": 1.4701564832412508e-06, "loss": 0.0239, "step": 14896 }, { "epoch": 2.49, "grad_norm": 0.31188979744911194, "learning_rate": 1.4692141508581248e-06, "loss": 0.0264, "step": 14897 }, { "epoch": 2.49, "grad_norm": 0.37749427556991577, "learning_rate": 1.4682720966320884e-06, "loss": 0.0409, "step": 14898 }, { "epoch": 2.49, "grad_norm": 0.3332636058330536, "learning_rate": 1.4673303205938537e-06, "loss": 0.0394, "step": 14899 }, { "epoch": 2.49, "grad_norm": 0.3929603695869446, "learning_rate": 1.466388822774134e-06, "loss": 0.0371, "step": 14900 }, { "epoch": 2.49, "grad_norm": 0.41865333914756775, "learning_rate": 1.4654476032036258e-06, "loss": 0.0304, "step": 14901 }, { "epoch": 2.49, "grad_norm": 0.37435096502304077, "learning_rate": 1.4645066619130166e-06, "loss": 0.0273, "step": 14902 }, { "epoch": 2.49, "grad_norm": 0.2826060950756073, "learning_rate": 1.4635659989329887e-06, "loss": 0.026, "step": 14903 }, { "epoch": 2.49, "grad_norm": 0.4158487021923065, "learning_rate": 1.4626256142942164e-06, "loss": 0.0411, "step": 14904 }, { "epoch": 2.49, "grad_norm": 0.3922313153743744, "learning_rate": 1.4616855080273573e-06, "loss": 0.0381, "step": 14905 }, { "epoch": 2.49, "grad_norm": 0.45514222979545593, "learning_rate": 1.4607456801630692e-06, "loss": 0.0272, "step": 14906 }, { "epoch": 2.49, "grad_norm": 0.3168703615665436, "learning_rate": 1.4598061307319944e-06, "loss": 0.0321, "step": 14907 }, { "epoch": 2.49, "grad_norm": 0.4539748430252075, "learning_rate": 1.458866859764766e-06, "loss": 0.036, "step": 14908 }, { "epoch": 2.49, "grad_norm": 0.3721618354320526, "learning_rate": 1.457927867292015e-06, "loss": 0.0201, "step": 14909 }, { "epoch": 2.49, "grad_norm": 0.4563177227973938, "learning_rate": 1.4569891533443525e-06, "loss": 0.0308, "step": 14910 }, { "epoch": 2.49, "grad_norm": 0.33859410881996155, "learning_rate": 1.4560507179523886e-06, "loss": 0.0306, "step": 14911 }, { "epoch": 2.49, "grad_norm": 0.33669909834861755, "learning_rate": 1.4551125611467264e-06, "loss": 0.0295, "step": 14912 }, { "epoch": 2.49, "grad_norm": 0.35536760091781616, "learning_rate": 1.4541746829579507e-06, "loss": 0.0221, "step": 14913 }, { "epoch": 2.49, "grad_norm": 0.39678335189819336, "learning_rate": 1.4532370834166454e-06, "loss": 0.0416, "step": 14914 }, { "epoch": 2.49, "grad_norm": 0.47120562195777893, "learning_rate": 1.4522997625533796e-06, "loss": 0.0354, "step": 14915 }, { "epoch": 2.49, "grad_norm": 0.4428441524505615, "learning_rate": 1.4513627203987156e-06, "loss": 0.0326, "step": 14916 }, { "epoch": 2.49, "grad_norm": 0.3704741299152374, "learning_rate": 1.4504259569832113e-06, "loss": 0.0264, "step": 14917 }, { "epoch": 2.5, "grad_norm": 0.34202075004577637, "learning_rate": 1.4494894723374041e-06, "loss": 0.0339, "step": 14918 }, { "epoch": 2.5, "grad_norm": 0.5032377243041992, "learning_rate": 1.4485532664918334e-06, "loss": 0.0278, "step": 14919 }, { "epoch": 2.5, "grad_norm": 0.2971177101135254, "learning_rate": 1.4476173394770276e-06, "loss": 0.0223, "step": 14920 }, { "epoch": 2.5, "grad_norm": 0.27138450741767883, "learning_rate": 1.4466816913235016e-06, "loss": 0.0307, "step": 14921 }, { "epoch": 2.5, "grad_norm": 0.36935994029045105, "learning_rate": 1.4457463220617596e-06, "loss": 0.0447, "step": 14922 }, { "epoch": 2.5, "grad_norm": 0.44932401180267334, "learning_rate": 1.4448112317223062e-06, "loss": 0.0348, "step": 14923 }, { "epoch": 2.5, "grad_norm": 0.3521598279476166, "learning_rate": 1.443876420335627e-06, "loss": 0.03, "step": 14924 }, { "epoch": 2.5, "grad_norm": 0.28273192048072815, "learning_rate": 1.4429418879322076e-06, "loss": 0.0317, "step": 14925 }, { "epoch": 2.5, "grad_norm": 0.3830386698246002, "learning_rate": 1.442007634542516e-06, "loss": 0.0423, "step": 14926 }, { "epoch": 2.5, "grad_norm": 0.31050175428390503, "learning_rate": 1.4410736601970133e-06, "loss": 0.0246, "step": 14927 }, { "epoch": 2.5, "grad_norm": 0.40970027446746826, "learning_rate": 1.4401399649261538e-06, "loss": 0.0343, "step": 14928 }, { "epoch": 2.5, "grad_norm": 0.4696585536003113, "learning_rate": 1.4392065487603867e-06, "loss": 0.0241, "step": 14929 }, { "epoch": 2.5, "grad_norm": 0.36493757367134094, "learning_rate": 1.43827341173014e-06, "loss": 0.0319, "step": 14930 }, { "epoch": 2.5, "grad_norm": 0.3762569725513458, "learning_rate": 1.4373405538658468e-06, "loss": 0.0259, "step": 14931 }, { "epoch": 2.5, "grad_norm": 0.3579951822757721, "learning_rate": 1.4364079751979198e-06, "loss": 0.0386, "step": 14932 }, { "epoch": 2.5, "grad_norm": 0.3357880115509033, "learning_rate": 1.4354756757567655e-06, "loss": 0.0346, "step": 14933 }, { "epoch": 2.5, "grad_norm": 0.2724497616291046, "learning_rate": 1.4345436555727877e-06, "loss": 0.0303, "step": 14934 }, { "epoch": 2.5, "grad_norm": 0.468850314617157, "learning_rate": 1.4336119146763706e-06, "loss": 0.0391, "step": 14935 }, { "epoch": 2.5, "grad_norm": 0.5035123229026794, "learning_rate": 1.4326804530978967e-06, "loss": 0.04, "step": 14936 }, { "epoch": 2.5, "grad_norm": 0.3552170991897583, "learning_rate": 1.4317492708677416e-06, "loss": 0.0219, "step": 14937 }, { "epoch": 2.5, "grad_norm": 0.32916954159736633, "learning_rate": 1.4308183680162625e-06, "loss": 0.0315, "step": 14938 }, { "epoch": 2.5, "grad_norm": 0.4137866199016571, "learning_rate": 1.4298877445738158e-06, "loss": 0.0323, "step": 14939 }, { "epoch": 2.5, "grad_norm": 0.35909539461135864, "learning_rate": 1.4289574005707452e-06, "loss": 0.0406, "step": 14940 }, { "epoch": 2.5, "grad_norm": 0.4973074495792389, "learning_rate": 1.4280273360373819e-06, "loss": 0.0475, "step": 14941 }, { "epoch": 2.5, "grad_norm": 0.32302728295326233, "learning_rate": 1.4270975510040575e-06, "loss": 0.0305, "step": 14942 }, { "epoch": 2.5, "grad_norm": 0.3054032623767853, "learning_rate": 1.426168045501084e-06, "loss": 0.0275, "step": 14943 }, { "epoch": 2.5, "grad_norm": 0.34320637583732605, "learning_rate": 1.4252388195587718e-06, "loss": 0.0372, "step": 14944 }, { "epoch": 2.5, "grad_norm": 0.4351089298725128, "learning_rate": 1.4243098732074212e-06, "loss": 0.0369, "step": 14945 }, { "epoch": 2.5, "grad_norm": 0.4072329103946686, "learning_rate": 1.423381206477319e-06, "loss": 0.0258, "step": 14946 }, { "epoch": 2.5, "grad_norm": 0.41161707043647766, "learning_rate": 1.422452819398743e-06, "loss": 0.035, "step": 14947 }, { "epoch": 2.5, "grad_norm": 0.4265654683113098, "learning_rate": 1.4215247120019716e-06, "loss": 0.0562, "step": 14948 }, { "epoch": 2.5, "grad_norm": 0.3618951141834259, "learning_rate": 1.4205968843172612e-06, "loss": 0.0285, "step": 14949 }, { "epoch": 2.5, "grad_norm": 0.3067478537559509, "learning_rate": 1.4196693363748649e-06, "loss": 0.0239, "step": 14950 }, { "epoch": 2.5, "grad_norm": 1.0460765361785889, "learning_rate": 1.418742068205028e-06, "loss": 0.0412, "step": 14951 }, { "epoch": 2.5, "grad_norm": 0.2981930375099182, "learning_rate": 1.4178150798379843e-06, "loss": 0.0198, "step": 14952 }, { "epoch": 2.5, "grad_norm": 0.2333662211894989, "learning_rate": 1.416888371303964e-06, "loss": 0.0146, "step": 14953 }, { "epoch": 2.5, "grad_norm": 0.22507084906101227, "learning_rate": 1.4159619426331795e-06, "loss": 0.0212, "step": 14954 }, { "epoch": 2.5, "grad_norm": 0.39277389645576477, "learning_rate": 1.4150357938558367e-06, "loss": 0.0232, "step": 14955 }, { "epoch": 2.5, "grad_norm": 0.4743289053440094, "learning_rate": 1.4141099250021384e-06, "loss": 0.053, "step": 14956 }, { "epoch": 2.5, "grad_norm": 0.4791805148124695, "learning_rate": 1.4131843361022701e-06, "loss": 0.055, "step": 14957 }, { "epoch": 2.5, "grad_norm": 0.37553828954696655, "learning_rate": 1.4122590271864111e-06, "loss": 0.0279, "step": 14958 }, { "epoch": 2.5, "grad_norm": 0.41138169169425964, "learning_rate": 1.4113339982847352e-06, "loss": 0.0268, "step": 14959 }, { "epoch": 2.5, "grad_norm": 0.28980791568756104, "learning_rate": 1.4104092494274046e-06, "loss": 0.0264, "step": 14960 }, { "epoch": 2.5, "grad_norm": 0.419800341129303, "learning_rate": 1.4094847806445678e-06, "loss": 0.0423, "step": 14961 }, { "epoch": 2.5, "grad_norm": 0.5457625985145569, "learning_rate": 1.4085605919663725e-06, "loss": 0.0373, "step": 14962 }, { "epoch": 2.5, "grad_norm": 0.29757562279701233, "learning_rate": 1.4076366834229526e-06, "loss": 0.0289, "step": 14963 }, { "epoch": 2.5, "grad_norm": 0.3070128262042999, "learning_rate": 1.4067130550444286e-06, "loss": 0.0191, "step": 14964 }, { "epoch": 2.5, "grad_norm": 0.2725659906864166, "learning_rate": 1.405789706860924e-06, "loss": 0.0296, "step": 14965 }, { "epoch": 2.5, "grad_norm": 0.3088405132293701, "learning_rate": 1.4048666389025377e-06, "loss": 0.024, "step": 14966 }, { "epoch": 2.5, "grad_norm": 0.5172139406204224, "learning_rate": 1.403943851199372e-06, "loss": 0.0397, "step": 14967 }, { "epoch": 2.5, "grad_norm": 0.301845908164978, "learning_rate": 1.4030213437815177e-06, "loss": 0.0222, "step": 14968 }, { "epoch": 2.5, "grad_norm": 0.3121436834335327, "learning_rate": 1.4020991166790499e-06, "loss": 0.0336, "step": 14969 }, { "epoch": 2.5, "grad_norm": 0.28559213876724243, "learning_rate": 1.4011771699220433e-06, "loss": 0.0229, "step": 14970 }, { "epoch": 2.5, "grad_norm": 0.4825631082057953, "learning_rate": 1.4002555035405553e-06, "loss": 0.024, "step": 14971 }, { "epoch": 2.5, "grad_norm": 0.3313107490539551, "learning_rate": 1.399334117564638e-06, "loss": 0.0375, "step": 14972 }, { "epoch": 2.5, "grad_norm": 0.44989168643951416, "learning_rate": 1.3984130120243378e-06, "loss": 0.0323, "step": 14973 }, { "epoch": 2.5, "grad_norm": 0.3892832398414612, "learning_rate": 1.3974921869496849e-06, "loss": 0.0336, "step": 14974 }, { "epoch": 2.5, "grad_norm": 0.38295549154281616, "learning_rate": 1.3965716423707054e-06, "loss": 0.0363, "step": 14975 }, { "epoch": 2.5, "grad_norm": 0.3448692858219147, "learning_rate": 1.3956513783174163e-06, "loss": 0.0336, "step": 14976 }, { "epoch": 2.5, "grad_norm": 0.36463162302970886, "learning_rate": 1.3947313948198238e-06, "loss": 0.0267, "step": 14977 }, { "epoch": 2.51, "grad_norm": 0.34279242157936096, "learning_rate": 1.3938116919079203e-06, "loss": 0.0289, "step": 14978 }, { "epoch": 2.51, "grad_norm": 0.36355674266815186, "learning_rate": 1.3928922696117008e-06, "loss": 0.0312, "step": 14979 }, { "epoch": 2.51, "grad_norm": 0.34764009714126587, "learning_rate": 1.3919731279611382e-06, "loss": 0.0265, "step": 14980 }, { "epoch": 2.51, "grad_norm": 0.44141125679016113, "learning_rate": 1.391054266986208e-06, "loss": 0.0446, "step": 14981 }, { "epoch": 2.51, "grad_norm": 0.39219871163368225, "learning_rate": 1.390135686716867e-06, "loss": 0.0349, "step": 14982 }, { "epoch": 2.51, "grad_norm": 0.4010758399963379, "learning_rate": 1.389217387183066e-06, "loss": 0.0355, "step": 14983 }, { "epoch": 2.51, "grad_norm": 0.37778040766716003, "learning_rate": 1.3882993684147484e-06, "loss": 0.0427, "step": 14984 }, { "epoch": 2.51, "grad_norm": 0.39611008763313293, "learning_rate": 1.3873816304418508e-06, "loss": 0.0345, "step": 14985 }, { "epoch": 2.51, "grad_norm": 0.45473945140838623, "learning_rate": 1.3864641732942907e-06, "loss": 0.0315, "step": 14986 }, { "epoch": 2.51, "grad_norm": 0.28096258640289307, "learning_rate": 1.3855469970019897e-06, "loss": 0.0232, "step": 14987 }, { "epoch": 2.51, "grad_norm": 0.3628973066806793, "learning_rate": 1.3846301015948504e-06, "loss": 0.0186, "step": 14988 }, { "epoch": 2.51, "grad_norm": 0.31151726841926575, "learning_rate": 1.3837134871027668e-06, "loss": 0.0216, "step": 14989 }, { "epoch": 2.51, "grad_norm": 0.4001360535621643, "learning_rate": 1.3827971535556306e-06, "loss": 0.0281, "step": 14990 }, { "epoch": 2.51, "grad_norm": 0.32653185725212097, "learning_rate": 1.3818811009833166e-06, "loss": 0.0195, "step": 14991 }, { "epoch": 2.51, "grad_norm": 0.47289326786994934, "learning_rate": 1.3809653294156944e-06, "loss": 0.0403, "step": 14992 }, { "epoch": 2.51, "grad_norm": 0.39344313740730286, "learning_rate": 1.3800498388826277e-06, "loss": 0.023, "step": 14993 }, { "epoch": 2.51, "grad_norm": 0.34556400775909424, "learning_rate": 1.3791346294139617e-06, "loss": 0.0292, "step": 14994 }, { "epoch": 2.51, "grad_norm": 0.40717363357543945, "learning_rate": 1.3782197010395437e-06, "loss": 0.0394, "step": 14995 }, { "epoch": 2.51, "grad_norm": 0.3703621029853821, "learning_rate": 1.377305053789203e-06, "loss": 0.0264, "step": 14996 }, { "epoch": 2.51, "grad_norm": 0.30416566133499146, "learning_rate": 1.3763906876927601e-06, "loss": 0.0333, "step": 14997 }, { "epoch": 2.51, "grad_norm": 0.3631812632083893, "learning_rate": 1.3754766027800337e-06, "loss": 0.0343, "step": 14998 }, { "epoch": 2.51, "grad_norm": 0.3224882483482361, "learning_rate": 1.3745627990808251e-06, "loss": 0.0252, "step": 14999 }, { "epoch": 2.51, "grad_norm": 0.44846901297569275, "learning_rate": 1.3736492766249321e-06, "loss": 0.0263, "step": 15000 }, { "epoch": 2.51, "grad_norm": 0.573959231376648, "learning_rate": 1.3727360354421437e-06, "loss": 0.0383, "step": 15001 }, { "epoch": 2.51, "grad_norm": 0.439071387052536, "learning_rate": 1.371823075562234e-06, "loss": 0.0389, "step": 15002 }, { "epoch": 2.51, "grad_norm": 0.2514917254447937, "learning_rate": 1.3709103970149696e-06, "loss": 0.023, "step": 15003 }, { "epoch": 2.51, "grad_norm": 0.43410617113113403, "learning_rate": 1.3699979998301127e-06, "loss": 0.0202, "step": 15004 }, { "epoch": 2.51, "grad_norm": 0.3762338161468506, "learning_rate": 1.3690858840374132e-06, "loss": 0.0307, "step": 15005 }, { "epoch": 2.51, "grad_norm": 0.3432009518146515, "learning_rate": 1.3681740496666084e-06, "loss": 0.0324, "step": 15006 }, { "epoch": 2.51, "grad_norm": 0.31403112411499023, "learning_rate": 1.3672624967474324e-06, "loss": 0.0314, "step": 15007 }, { "epoch": 2.51, "grad_norm": 0.43322309851646423, "learning_rate": 1.3663512253096079e-06, "loss": 0.0348, "step": 15008 }, { "epoch": 2.51, "grad_norm": 0.43685799837112427, "learning_rate": 1.3654402353828488e-06, "loss": 0.0372, "step": 15009 }, { "epoch": 2.51, "grad_norm": 0.34669914841651917, "learning_rate": 1.3645295269968572e-06, "loss": 0.032, "step": 15010 }, { "epoch": 2.51, "grad_norm": 0.37191081047058105, "learning_rate": 1.363619100181327e-06, "loss": 0.0284, "step": 15011 }, { "epoch": 2.51, "grad_norm": 0.3653230369091034, "learning_rate": 1.3627089549659479e-06, "loss": 0.0326, "step": 15012 }, { "epoch": 2.51, "grad_norm": 0.3248732388019562, "learning_rate": 1.3617990913803936e-06, "loss": 0.0428, "step": 15013 }, { "epoch": 2.51, "grad_norm": 0.4574509859085083, "learning_rate": 1.3608895094543285e-06, "loss": 0.045, "step": 15014 }, { "epoch": 2.51, "grad_norm": 0.534285306930542, "learning_rate": 1.359980209217414e-06, "loss": 0.0324, "step": 15015 }, { "epoch": 2.51, "grad_norm": 0.3850996792316437, "learning_rate": 1.3590711906993004e-06, "loss": 0.041, "step": 15016 }, { "epoch": 2.51, "grad_norm": 0.29377007484436035, "learning_rate": 1.3581624539296246e-06, "loss": 0.0223, "step": 15017 }, { "epoch": 2.51, "grad_norm": 0.3816905915737152, "learning_rate": 1.3572539989380196e-06, "loss": 0.0376, "step": 15018 }, { "epoch": 2.51, "grad_norm": 0.45524004101753235, "learning_rate": 1.3563458257541051e-06, "loss": 0.0343, "step": 15019 }, { "epoch": 2.51, "grad_norm": 0.48602554202079773, "learning_rate": 1.3554379344074908e-06, "loss": 0.0387, "step": 15020 }, { "epoch": 2.51, "grad_norm": 0.2949419617652893, "learning_rate": 1.3545303249277853e-06, "loss": 0.018, "step": 15021 }, { "epoch": 2.51, "grad_norm": 0.38751086592674255, "learning_rate": 1.353622997344577e-06, "loss": 0.0278, "step": 15022 }, { "epoch": 2.51, "grad_norm": 0.29239320755004883, "learning_rate": 1.352715951687452e-06, "loss": 0.034, "step": 15023 }, { "epoch": 2.51, "grad_norm": 0.35618963837623596, "learning_rate": 1.3518091879859897e-06, "loss": 0.0358, "step": 15024 }, { "epoch": 2.51, "grad_norm": 0.2999419569969177, "learning_rate": 1.3509027062697511e-06, "loss": 0.028, "step": 15025 }, { "epoch": 2.51, "grad_norm": 0.3025417923927307, "learning_rate": 1.3499965065682962e-06, "loss": 0.0281, "step": 15026 }, { "epoch": 2.51, "grad_norm": 0.47471949458122253, "learning_rate": 1.3490905889111716e-06, "loss": 0.0326, "step": 15027 }, { "epoch": 2.51, "grad_norm": 0.34719398617744446, "learning_rate": 1.3481849533279146e-06, "loss": 0.033, "step": 15028 }, { "epoch": 2.51, "grad_norm": 0.47991955280303955, "learning_rate": 1.347279599848058e-06, "loss": 0.0244, "step": 15029 }, { "epoch": 2.51, "grad_norm": 0.3359382748603821, "learning_rate": 1.3463745285011177e-06, "loss": 0.0193, "step": 15030 }, { "epoch": 2.51, "grad_norm": 0.2864052951335907, "learning_rate": 1.3454697393166094e-06, "loss": 0.0233, "step": 15031 }, { "epoch": 2.51, "grad_norm": 0.4876675307750702, "learning_rate": 1.34456523232403e-06, "loss": 0.0512, "step": 15032 }, { "epoch": 2.51, "grad_norm": 0.8149318099021912, "learning_rate": 1.3436610075528767e-06, "loss": 0.0522, "step": 15033 }, { "epoch": 2.51, "grad_norm": 0.4584253132343292, "learning_rate": 1.3427570650326293e-06, "loss": 0.0435, "step": 15034 }, { "epoch": 2.51, "grad_norm": 0.2848413288593292, "learning_rate": 1.3418534047927646e-06, "loss": 0.0216, "step": 15035 }, { "epoch": 2.51, "grad_norm": 0.4137098789215088, "learning_rate": 1.3409500268627451e-06, "loss": 0.0331, "step": 15036 }, { "epoch": 2.51, "grad_norm": 0.32823434472084045, "learning_rate": 1.34004693127203e-06, "loss": 0.0303, "step": 15037 }, { "epoch": 2.52, "grad_norm": 0.25321778655052185, "learning_rate": 1.339144118050063e-06, "loss": 0.0171, "step": 15038 }, { "epoch": 2.52, "grad_norm": 0.47420161962509155, "learning_rate": 1.338241587226281e-06, "loss": 0.0477, "step": 15039 }, { "epoch": 2.52, "grad_norm": 0.37071484327316284, "learning_rate": 1.3373393388301137e-06, "loss": 0.0311, "step": 15040 }, { "epoch": 2.52, "grad_norm": 0.442212849855423, "learning_rate": 1.3364373728909807e-06, "loss": 0.0322, "step": 15041 }, { "epoch": 2.52, "grad_norm": 0.35120829939842224, "learning_rate": 1.3355356894382897e-06, "loss": 0.0345, "step": 15042 }, { "epoch": 2.52, "grad_norm": 0.44089528918266296, "learning_rate": 1.3346342885014441e-06, "loss": 0.0415, "step": 15043 }, { "epoch": 2.52, "grad_norm": 0.3381321132183075, "learning_rate": 1.3337331701098344e-06, "loss": 0.033, "step": 15044 }, { "epoch": 2.52, "grad_norm": 0.35477566719055176, "learning_rate": 1.3328323342928385e-06, "loss": 0.0329, "step": 15045 }, { "epoch": 2.52, "grad_norm": 0.6537870764732361, "learning_rate": 1.3319317810798349e-06, "loss": 0.0371, "step": 15046 }, { "epoch": 2.52, "grad_norm": 0.5474724769592285, "learning_rate": 1.3310315105001835e-06, "loss": 0.0425, "step": 15047 }, { "epoch": 2.52, "grad_norm": 0.347444623708725, "learning_rate": 1.3301315225832402e-06, "loss": 0.033, "step": 15048 }, { "epoch": 2.52, "grad_norm": 0.3201911449432373, "learning_rate": 1.329231817358352e-06, "loss": 0.0253, "step": 15049 }, { "epoch": 2.52, "grad_norm": 0.38491418957710266, "learning_rate": 1.3283323948548509e-06, "loss": 0.0272, "step": 15050 }, { "epoch": 2.52, "grad_norm": 0.31404629349708557, "learning_rate": 1.3274332551020686e-06, "loss": 0.0224, "step": 15051 }, { "epoch": 2.52, "grad_norm": 0.549272358417511, "learning_rate": 1.3265343981293199e-06, "loss": 0.0438, "step": 15052 }, { "epoch": 2.52, "grad_norm": 0.5097998976707458, "learning_rate": 1.3256358239659118e-06, "loss": 0.0424, "step": 15053 }, { "epoch": 2.52, "grad_norm": 0.4037477672100067, "learning_rate": 1.3247375326411472e-06, "loss": 0.0347, "step": 15054 }, { "epoch": 2.52, "grad_norm": 0.3869725465774536, "learning_rate": 1.3238395241843105e-06, "loss": 0.0369, "step": 15055 }, { "epoch": 2.52, "grad_norm": 0.34958401322364807, "learning_rate": 1.3229417986246873e-06, "loss": 0.0304, "step": 15056 }, { "epoch": 2.52, "grad_norm": 0.335610955953598, "learning_rate": 1.3220443559915485e-06, "loss": 0.0358, "step": 15057 }, { "epoch": 2.52, "grad_norm": 0.4013114273548126, "learning_rate": 1.3211471963141565e-06, "loss": 0.0317, "step": 15058 }, { "epoch": 2.52, "grad_norm": 0.3899608850479126, "learning_rate": 1.3202503196217598e-06, "loss": 0.028, "step": 15059 }, { "epoch": 2.52, "grad_norm": 0.39511606097221375, "learning_rate": 1.3193537259436084e-06, "loss": 0.0408, "step": 15060 }, { "epoch": 2.52, "grad_norm": 0.38467714190483093, "learning_rate": 1.3184574153089347e-06, "loss": 0.0269, "step": 15061 }, { "epoch": 2.52, "grad_norm": 0.48100361227989197, "learning_rate": 1.3175613877469606e-06, "loss": 0.0261, "step": 15062 }, { "epoch": 2.52, "grad_norm": 0.3212663233280182, "learning_rate": 1.3166656432869051e-06, "loss": 0.0247, "step": 15063 }, { "epoch": 2.52, "grad_norm": 0.3481212258338928, "learning_rate": 1.3157701819579749e-06, "loss": 0.0274, "step": 15064 }, { "epoch": 2.52, "grad_norm": 0.5312350988388062, "learning_rate": 1.314875003789371e-06, "loss": 0.0335, "step": 15065 }, { "epoch": 2.52, "grad_norm": 0.2950471341609955, "learning_rate": 1.3139801088102777e-06, "loss": 0.0287, "step": 15066 }, { "epoch": 2.52, "grad_norm": 0.35881584882736206, "learning_rate": 1.313085497049873e-06, "loss": 0.034, "step": 15067 }, { "epoch": 2.52, "grad_norm": 0.33967122435569763, "learning_rate": 1.3121911685373313e-06, "loss": 0.0281, "step": 15068 }, { "epoch": 2.52, "grad_norm": 0.3716702163219452, "learning_rate": 1.3112971233018113e-06, "loss": 0.0347, "step": 15069 }, { "epoch": 2.52, "grad_norm": 0.3541471064090729, "learning_rate": 1.310403361372462e-06, "loss": 0.0239, "step": 15070 }, { "epoch": 2.52, "grad_norm": 0.29761552810668945, "learning_rate": 1.3095098827784269e-06, "loss": 0.0234, "step": 15071 }, { "epoch": 2.52, "grad_norm": 0.3558177351951599, "learning_rate": 1.3086166875488425e-06, "loss": 0.0261, "step": 15072 }, { "epoch": 2.52, "grad_norm": 0.33134785294532776, "learning_rate": 1.3077237757128281e-06, "loss": 0.0343, "step": 15073 }, { "epoch": 2.52, "grad_norm": 0.4021149277687073, "learning_rate": 1.306831147299502e-06, "loss": 0.0288, "step": 15074 }, { "epoch": 2.52, "grad_norm": 0.5103119611740112, "learning_rate": 1.3059388023379683e-06, "loss": 0.0335, "step": 15075 }, { "epoch": 2.52, "grad_norm": 0.3258478343486786, "learning_rate": 1.3050467408573197e-06, "loss": 0.0267, "step": 15076 }, { "epoch": 2.52, "grad_norm": 0.31760329008102417, "learning_rate": 1.3041549628866468e-06, "loss": 0.0393, "step": 15077 }, { "epoch": 2.52, "grad_norm": 0.5749468207359314, "learning_rate": 1.303263468455025e-06, "loss": 0.0449, "step": 15078 }, { "epoch": 2.52, "grad_norm": 0.32569754123687744, "learning_rate": 1.3023722575915232e-06, "loss": 0.024, "step": 15079 }, { "epoch": 2.52, "grad_norm": 0.38877126574516296, "learning_rate": 1.3014813303252028e-06, "loss": 0.0227, "step": 15080 }, { "epoch": 2.52, "grad_norm": 0.40166175365448, "learning_rate": 1.3005906866851092e-06, "loss": 0.0386, "step": 15081 }, { "epoch": 2.52, "grad_norm": 0.602520227432251, "learning_rate": 1.2997003267002882e-06, "loss": 0.0494, "step": 15082 }, { "epoch": 2.52, "grad_norm": 0.37778031826019287, "learning_rate": 1.2988102503997679e-06, "loss": 0.0331, "step": 15083 }, { "epoch": 2.52, "grad_norm": 0.3731057047843933, "learning_rate": 1.2979204578125692e-06, "loss": 0.0252, "step": 15084 }, { "epoch": 2.52, "grad_norm": 0.4098179340362549, "learning_rate": 1.2970309489677079e-06, "loss": 0.0335, "step": 15085 }, { "epoch": 2.52, "grad_norm": 0.41513386368751526, "learning_rate": 1.2961417238941864e-06, "loss": 0.0484, "step": 15086 }, { "epoch": 2.52, "grad_norm": 0.25100696086883545, "learning_rate": 1.295252782620997e-06, "loss": 0.0141, "step": 15087 }, { "epoch": 2.52, "grad_norm": 0.311935156583786, "learning_rate": 1.2943641251771267e-06, "loss": 0.026, "step": 15088 }, { "epoch": 2.52, "grad_norm": 0.38953518867492676, "learning_rate": 1.293475751591553e-06, "loss": 0.0368, "step": 15089 }, { "epoch": 2.52, "grad_norm": 0.4169321358203888, "learning_rate": 1.2925876618932377e-06, "loss": 0.0308, "step": 15090 }, { "epoch": 2.52, "grad_norm": 0.4195014238357544, "learning_rate": 1.2916998561111438e-06, "loss": 0.0371, "step": 15091 }, { "epoch": 2.52, "grad_norm": 0.39170023798942566, "learning_rate": 1.290812334274214e-06, "loss": 0.0307, "step": 15092 }, { "epoch": 2.52, "grad_norm": 0.31152692437171936, "learning_rate": 1.289925096411392e-06, "loss": 0.0255, "step": 15093 }, { "epoch": 2.52, "grad_norm": 0.28983554244041443, "learning_rate": 1.2890381425516052e-06, "loss": 0.0271, "step": 15094 }, { "epoch": 2.52, "grad_norm": 0.29228347539901733, "learning_rate": 1.288151472723771e-06, "loss": 0.0169, "step": 15095 }, { "epoch": 2.52, "grad_norm": 0.6426756381988525, "learning_rate": 1.2872650869568026e-06, "loss": 0.049, "step": 15096 }, { "epoch": 2.53, "grad_norm": 0.38223642110824585, "learning_rate": 1.286378985279605e-06, "loss": 0.0341, "step": 15097 }, { "epoch": 2.53, "grad_norm": 0.3822762072086334, "learning_rate": 1.2854931677210657e-06, "loss": 0.0365, "step": 15098 }, { "epoch": 2.53, "grad_norm": 0.3175276815891266, "learning_rate": 1.2846076343100711e-06, "loss": 0.0236, "step": 15099 }, { "epoch": 2.53, "grad_norm": 0.4115498661994934, "learning_rate": 1.2837223850754942e-06, "loss": 0.0253, "step": 15100 }, { "epoch": 2.53, "grad_norm": 0.3213556706905365, "learning_rate": 1.2828374200461978e-06, "loss": 0.0245, "step": 15101 }, { "epoch": 2.53, "grad_norm": 0.41872426867485046, "learning_rate": 1.281952739251041e-06, "loss": 0.0384, "step": 15102 }, { "epoch": 2.53, "grad_norm": 0.42506730556488037, "learning_rate": 1.2810683427188663e-06, "loss": 0.0339, "step": 15103 }, { "epoch": 2.53, "grad_norm": 0.4455103874206543, "learning_rate": 1.2801842304785105e-06, "loss": 0.0337, "step": 15104 }, { "epoch": 2.53, "grad_norm": 0.3773629665374756, "learning_rate": 1.2793004025588062e-06, "loss": 0.0458, "step": 15105 }, { "epoch": 2.53, "grad_norm": 0.48290443420410156, "learning_rate": 1.2784168589885671e-06, "loss": 0.0244, "step": 15106 }, { "epoch": 2.53, "grad_norm": 0.3498488962650299, "learning_rate": 1.2775335997966043e-06, "loss": 0.0347, "step": 15107 }, { "epoch": 2.53, "grad_norm": 0.3255769908428192, "learning_rate": 1.2766506250117171e-06, "loss": 0.0237, "step": 15108 }, { "epoch": 2.53, "grad_norm": 0.4481496512889862, "learning_rate": 1.2757679346626939e-06, "loss": 0.0292, "step": 15109 }, { "epoch": 2.53, "grad_norm": 0.508409321308136, "learning_rate": 1.2748855287783202e-06, "loss": 0.0349, "step": 15110 }, { "epoch": 2.53, "grad_norm": 0.38245171308517456, "learning_rate": 1.274003407387363e-06, "loss": 0.0389, "step": 15111 }, { "epoch": 2.53, "grad_norm": 0.4525236189365387, "learning_rate": 1.2731215705185873e-06, "loss": 0.0348, "step": 15112 }, { "epoch": 2.53, "grad_norm": 0.345329225063324, "learning_rate": 1.272240018200749e-06, "loss": 0.0297, "step": 15113 }, { "epoch": 2.53, "grad_norm": 0.47010573744773865, "learning_rate": 1.2713587504625902e-06, "loss": 0.041, "step": 15114 }, { "epoch": 2.53, "grad_norm": 0.38950738310813904, "learning_rate": 1.2704777673328438e-06, "loss": 0.0392, "step": 15115 }, { "epoch": 2.53, "grad_norm": 0.33023208379745483, "learning_rate": 1.2695970688402392e-06, "loss": 0.0395, "step": 15116 }, { "epoch": 2.53, "grad_norm": 0.4733012318611145, "learning_rate": 1.26871665501349e-06, "loss": 0.0394, "step": 15117 }, { "epoch": 2.53, "grad_norm": 0.4045599400997162, "learning_rate": 1.267836525881302e-06, "loss": 0.0357, "step": 15118 }, { "epoch": 2.53, "grad_norm": 0.5210021734237671, "learning_rate": 1.2669566814723744e-06, "loss": 0.0361, "step": 15119 }, { "epoch": 2.53, "grad_norm": 0.27619725465774536, "learning_rate": 1.2660771218153966e-06, "loss": 0.0268, "step": 15120 }, { "epoch": 2.53, "grad_norm": 0.24675807356834412, "learning_rate": 1.265197846939049e-06, "loss": 0.0243, "step": 15121 }, { "epoch": 2.53, "grad_norm": 0.5205377340316772, "learning_rate": 1.264318856871999e-06, "loss": 0.0519, "step": 15122 }, { "epoch": 2.53, "grad_norm": 0.3177264630794525, "learning_rate": 1.2634401516429062e-06, "loss": 0.0244, "step": 15123 }, { "epoch": 2.53, "grad_norm": 0.44377678632736206, "learning_rate": 1.2625617312804261e-06, "loss": 0.0315, "step": 15124 }, { "epoch": 2.53, "grad_norm": 0.4395730197429657, "learning_rate": 1.2616835958131978e-06, "loss": 0.0367, "step": 15125 }, { "epoch": 2.53, "grad_norm": 0.43565064668655396, "learning_rate": 1.2608057452698518e-06, "loss": 0.036, "step": 15126 }, { "epoch": 2.53, "grad_norm": 0.39967480301856995, "learning_rate": 1.259928179679014e-06, "loss": 0.0343, "step": 15127 }, { "epoch": 2.53, "grad_norm": 0.37293440103530884, "learning_rate": 1.2590508990693007e-06, "loss": 0.0312, "step": 15128 }, { "epoch": 2.53, "grad_norm": 0.319319486618042, "learning_rate": 1.258173903469313e-06, "loss": 0.0206, "step": 15129 }, { "epoch": 2.53, "grad_norm": 0.4985223412513733, "learning_rate": 1.2572971929076504e-06, "loss": 0.0385, "step": 15130 }, { "epoch": 2.53, "grad_norm": 0.35060054063796997, "learning_rate": 1.256420767412896e-06, "loss": 0.0261, "step": 15131 }, { "epoch": 2.53, "grad_norm": 0.28926628828048706, "learning_rate": 1.2555446270136263e-06, "loss": 0.0255, "step": 15132 }, { "epoch": 2.53, "grad_norm": 0.36029595136642456, "learning_rate": 1.2546687717384121e-06, "loss": 0.0273, "step": 15133 }, { "epoch": 2.53, "grad_norm": 0.36908331513404846, "learning_rate": 1.2537932016158084e-06, "loss": 0.0422, "step": 15134 }, { "epoch": 2.53, "grad_norm": 0.3822193741798401, "learning_rate": 1.2529179166743666e-06, "loss": 0.031, "step": 15135 }, { "epoch": 2.53, "grad_norm": 0.3530217707157135, "learning_rate": 1.252042916942624e-06, "loss": 0.0377, "step": 15136 }, { "epoch": 2.53, "grad_norm": 0.46547698974609375, "learning_rate": 1.2511682024491134e-06, "loss": 0.0317, "step": 15137 }, { "epoch": 2.53, "grad_norm": 0.34648066759109497, "learning_rate": 1.2502937732223575e-06, "loss": 0.0345, "step": 15138 }, { "epoch": 2.53, "grad_norm": 0.3845216929912567, "learning_rate": 1.2494196292908656e-06, "loss": 0.0378, "step": 15139 }, { "epoch": 2.53, "grad_norm": 0.5214868187904358, "learning_rate": 1.2485457706831382e-06, "loss": 0.0369, "step": 15140 }, { "epoch": 2.53, "grad_norm": 0.4277329742908478, "learning_rate": 1.247672197427674e-06, "loss": 0.0334, "step": 15141 }, { "epoch": 2.53, "grad_norm": 0.45516467094421387, "learning_rate": 1.2467989095529542e-06, "loss": 0.0442, "step": 15142 }, { "epoch": 2.53, "grad_norm": 0.36199861764907837, "learning_rate": 1.2459259070874507e-06, "loss": 0.0313, "step": 15143 }, { "epoch": 2.53, "grad_norm": 0.3091278374195099, "learning_rate": 1.2450531900596307e-06, "loss": 0.0218, "step": 15144 }, { "epoch": 2.53, "grad_norm": 0.4173913896083832, "learning_rate": 1.2441807584979547e-06, "loss": 0.0328, "step": 15145 }, { "epoch": 2.53, "grad_norm": 0.3187507092952728, "learning_rate": 1.2433086124308625e-06, "loss": 0.0359, "step": 15146 }, { "epoch": 2.53, "grad_norm": 0.3133467733860016, "learning_rate": 1.2424367518867975e-06, "loss": 0.0231, "step": 15147 }, { "epoch": 2.53, "grad_norm": 0.3293249011039734, "learning_rate": 1.2415651768941828e-06, "loss": 0.0242, "step": 15148 }, { "epoch": 2.53, "grad_norm": 0.36615094542503357, "learning_rate": 1.2406938874814412e-06, "loss": 0.0487, "step": 15149 }, { "epoch": 2.53, "grad_norm": 0.380434513092041, "learning_rate": 1.23982288367698e-06, "loss": 0.0244, "step": 15150 }, { "epoch": 2.53, "grad_norm": 0.4060077667236328, "learning_rate": 1.2389521655091984e-06, "loss": 0.0304, "step": 15151 }, { "epoch": 2.53, "grad_norm": 0.37729671597480774, "learning_rate": 1.2380817330064887e-06, "loss": 0.032, "step": 15152 }, { "epoch": 2.53, "grad_norm": 0.345156729221344, "learning_rate": 1.2372115861972345e-06, "loss": 0.0257, "step": 15153 }, { "epoch": 2.53, "grad_norm": 0.334611177444458, "learning_rate": 1.2363417251098042e-06, "loss": 0.0304, "step": 15154 }, { "epoch": 2.53, "grad_norm": 0.5223187804222107, "learning_rate": 1.2354721497725651e-06, "loss": 0.0353, "step": 15155 }, { "epoch": 2.53, "grad_norm": 0.3395763635635376, "learning_rate": 1.2346028602138671e-06, "loss": 0.03, "step": 15156 }, { "epoch": 2.54, "grad_norm": 0.44394657015800476, "learning_rate": 1.2337338564620539e-06, "loss": 0.0273, "step": 15157 }, { "epoch": 2.54, "grad_norm": 0.35567402839660645, "learning_rate": 1.232865138545465e-06, "loss": 0.0334, "step": 15158 }, { "epoch": 2.54, "grad_norm": 0.35194212198257446, "learning_rate": 1.231996706492421e-06, "loss": 0.0362, "step": 15159 }, { "epoch": 2.54, "grad_norm": 0.392981618642807, "learning_rate": 1.2311285603312394e-06, "loss": 0.0503, "step": 15160 }, { "epoch": 2.54, "grad_norm": 0.39911559224128723, "learning_rate": 1.230260700090231e-06, "loss": 0.038, "step": 15161 }, { "epoch": 2.54, "grad_norm": 0.3741864860057831, "learning_rate": 1.2293931257976888e-06, "loss": 0.0255, "step": 15162 }, { "epoch": 2.54, "grad_norm": 0.34130391478538513, "learning_rate": 1.2285258374819054e-06, "loss": 0.0218, "step": 15163 }, { "epoch": 2.54, "grad_norm": 0.3197624981403351, "learning_rate": 1.2276588351711572e-06, "loss": 0.0226, "step": 15164 }, { "epoch": 2.54, "grad_norm": 0.5189191699028015, "learning_rate": 1.2267921188937126e-06, "loss": 0.0264, "step": 15165 }, { "epoch": 2.54, "grad_norm": 0.3664683997631073, "learning_rate": 1.225925688677836e-06, "loss": 0.0375, "step": 15166 }, { "epoch": 2.54, "grad_norm": 0.19840563833713531, "learning_rate": 1.2250595445517733e-06, "loss": 0.0164, "step": 15167 }, { "epoch": 2.54, "grad_norm": 0.3881469964981079, "learning_rate": 1.2241936865437698e-06, "loss": 0.0276, "step": 15168 }, { "epoch": 2.54, "grad_norm": 0.3961136043071747, "learning_rate": 1.2233281146820586e-06, "loss": 0.0324, "step": 15169 }, { "epoch": 2.54, "grad_norm": 0.42781558632850647, "learning_rate": 1.2224628289948614e-06, "loss": 0.0402, "step": 15170 }, { "epoch": 2.54, "grad_norm": 0.6017906069755554, "learning_rate": 1.2215978295103892e-06, "loss": 0.0458, "step": 15171 }, { "epoch": 2.54, "grad_norm": 0.42879432439804077, "learning_rate": 1.2207331162568524e-06, "loss": 0.0397, "step": 15172 }, { "epoch": 2.54, "grad_norm": 0.6916113495826721, "learning_rate": 1.2198686892624423e-06, "loss": 0.0459, "step": 15173 }, { "epoch": 2.54, "grad_norm": 0.5329427123069763, "learning_rate": 1.2190045485553426e-06, "loss": 0.0375, "step": 15174 }, { "epoch": 2.54, "grad_norm": 0.3706551790237427, "learning_rate": 1.218140694163732e-06, "loss": 0.0311, "step": 15175 }, { "epoch": 2.54, "grad_norm": 0.4885638952255249, "learning_rate": 1.2172771261157778e-06, "loss": 0.0277, "step": 15176 }, { "epoch": 2.54, "grad_norm": 0.5627382397651672, "learning_rate": 1.2164138444396401e-06, "loss": 0.0502, "step": 15177 }, { "epoch": 2.54, "grad_norm": 0.38034236431121826, "learning_rate": 1.2155508491634648e-06, "loss": 0.014, "step": 15178 }, { "epoch": 2.54, "grad_norm": 0.3653607964515686, "learning_rate": 1.21468814031539e-06, "loss": 0.0278, "step": 15179 }, { "epoch": 2.54, "grad_norm": 0.30451998114585876, "learning_rate": 1.2138257179235468e-06, "loss": 0.0193, "step": 15180 }, { "epoch": 2.54, "grad_norm": 0.6906750202178955, "learning_rate": 1.2129635820160556e-06, "loss": 0.0177, "step": 15181 }, { "epoch": 2.54, "grad_norm": 0.5935201644897461, "learning_rate": 1.212101732621026e-06, "loss": 0.047, "step": 15182 }, { "epoch": 2.54, "grad_norm": 0.4181387424468994, "learning_rate": 1.2112401697665598e-06, "loss": 0.0357, "step": 15183 }, { "epoch": 2.54, "grad_norm": 0.4028388559818268, "learning_rate": 1.2103788934807525e-06, "loss": 0.0286, "step": 15184 }, { "epoch": 2.54, "grad_norm": 0.35090091824531555, "learning_rate": 1.2095179037916826e-06, "loss": 0.0186, "step": 15185 }, { "epoch": 2.54, "grad_norm": 0.2956860661506653, "learning_rate": 1.2086572007274278e-06, "loss": 0.0224, "step": 15186 }, { "epoch": 2.54, "grad_norm": 0.3871343731880188, "learning_rate": 1.207796784316052e-06, "loss": 0.0362, "step": 15187 }, { "epoch": 2.54, "grad_norm": 0.5755636096000671, "learning_rate": 1.2069366545856054e-06, "loss": 0.0324, "step": 15188 }, { "epoch": 2.54, "grad_norm": 0.4733287990093231, "learning_rate": 1.2060768115641396e-06, "loss": 0.0364, "step": 15189 }, { "epoch": 2.54, "grad_norm": 0.30088427662849426, "learning_rate": 1.205217255279687e-06, "loss": 0.0272, "step": 15190 }, { "epoch": 2.54, "grad_norm": 0.3426758944988251, "learning_rate": 1.204357985760277e-06, "loss": 0.0231, "step": 15191 }, { "epoch": 2.54, "grad_norm": 0.4570142924785614, "learning_rate": 1.2034990030339244e-06, "loss": 0.0321, "step": 15192 }, { "epoch": 2.54, "grad_norm": 0.36959272623062134, "learning_rate": 1.2026403071286386e-06, "loss": 0.0294, "step": 15193 }, { "epoch": 2.54, "grad_norm": 0.3053632378578186, "learning_rate": 1.2017818980724216e-06, "loss": 0.0212, "step": 15194 }, { "epoch": 2.54, "grad_norm": 0.29626408219337463, "learning_rate": 1.200923775893259e-06, "loss": 0.0254, "step": 15195 }, { "epoch": 2.54, "grad_norm": 0.39718878269195557, "learning_rate": 1.2000659406191318e-06, "loss": 0.0232, "step": 15196 }, { "epoch": 2.54, "grad_norm": 0.488173246383667, "learning_rate": 1.1992083922780117e-06, "loss": 0.023, "step": 15197 }, { "epoch": 2.54, "grad_norm": 0.31051918864250183, "learning_rate": 1.1983511308978602e-06, "loss": 0.0269, "step": 15198 }, { "epoch": 2.54, "grad_norm": 0.4281766414642334, "learning_rate": 1.1974941565066277e-06, "loss": 0.0361, "step": 15199 }, { "epoch": 2.54, "grad_norm": 0.43489354848861694, "learning_rate": 1.196637469132257e-06, "loss": 0.0362, "step": 15200 }, { "epoch": 2.54, "grad_norm": 0.41994211077690125, "learning_rate": 1.195781068802685e-06, "loss": 0.0289, "step": 15201 }, { "epoch": 2.54, "grad_norm": 0.36179420351982117, "learning_rate": 1.194924955545831e-06, "loss": 0.0253, "step": 15202 }, { "epoch": 2.54, "grad_norm": 0.37563765048980713, "learning_rate": 1.1940691293896145e-06, "loss": 0.0473, "step": 15203 }, { "epoch": 2.54, "grad_norm": 0.3145941197872162, "learning_rate": 1.1932135903619357e-06, "loss": 0.032, "step": 15204 }, { "epoch": 2.54, "grad_norm": 0.38133394718170166, "learning_rate": 1.1923583384906945e-06, "loss": 0.0332, "step": 15205 }, { "epoch": 2.54, "grad_norm": 0.36886024475097656, "learning_rate": 1.1915033738037773e-06, "loss": 0.0322, "step": 15206 }, { "epoch": 2.54, "grad_norm": 0.3382458984851837, "learning_rate": 1.1906486963290576e-06, "loss": 0.027, "step": 15207 }, { "epoch": 2.54, "grad_norm": 0.4262816607952118, "learning_rate": 1.1897943060944051e-06, "loss": 0.0316, "step": 15208 }, { "epoch": 2.54, "grad_norm": 0.38337409496307373, "learning_rate": 1.188940203127681e-06, "loss": 0.0246, "step": 15209 }, { "epoch": 2.54, "grad_norm": 0.37882232666015625, "learning_rate": 1.1880863874567305e-06, "loss": 0.0333, "step": 15210 }, { "epoch": 2.54, "grad_norm": 0.3521277606487274, "learning_rate": 1.1872328591093974e-06, "loss": 0.0268, "step": 15211 }, { "epoch": 2.54, "grad_norm": 0.4155515730381012, "learning_rate": 1.186379618113509e-06, "loss": 0.0305, "step": 15212 }, { "epoch": 2.54, "grad_norm": 0.43698999285697937, "learning_rate": 1.1855266644968865e-06, "loss": 0.0289, "step": 15213 }, { "epoch": 2.54, "grad_norm": 2.199526071548462, "learning_rate": 1.1846739982873424e-06, "loss": 0.0357, "step": 15214 }, { "epoch": 2.54, "grad_norm": 0.5539392828941345, "learning_rate": 1.1838216195126783e-06, "loss": 0.0332, "step": 15215 }, { "epoch": 2.54, "grad_norm": 0.41150131821632385, "learning_rate": 1.182969528200686e-06, "loss": 0.0332, "step": 15216 }, { "epoch": 2.55, "grad_norm": 0.3041938245296478, "learning_rate": 1.1821177243791538e-06, "loss": 0.0327, "step": 15217 }, { "epoch": 2.55, "grad_norm": 0.3600238859653473, "learning_rate": 1.1812662080758497e-06, "loss": 0.0278, "step": 15218 }, { "epoch": 2.55, "grad_norm": 0.4236447215080261, "learning_rate": 1.1804149793185439e-06, "loss": 0.0246, "step": 15219 }, { "epoch": 2.55, "grad_norm": 0.33670052886009216, "learning_rate": 1.1795640381349881e-06, "loss": 0.0244, "step": 15220 }, { "epoch": 2.55, "grad_norm": 0.3996603488922119, "learning_rate": 1.1787133845529286e-06, "loss": 0.0445, "step": 15221 }, { "epoch": 2.55, "grad_norm": 0.4523179531097412, "learning_rate": 1.1778630186001039e-06, "loss": 0.0233, "step": 15222 }, { "epoch": 2.55, "grad_norm": 0.353359192609787, "learning_rate": 1.1770129403042386e-06, "loss": 0.0332, "step": 15223 }, { "epoch": 2.55, "grad_norm": 0.3398493528366089, "learning_rate": 1.1761631496930526e-06, "loss": 0.03, "step": 15224 }, { "epoch": 2.55, "grad_norm": 0.44830986857414246, "learning_rate": 1.1753136467942562e-06, "loss": 0.0323, "step": 15225 }, { "epoch": 2.55, "grad_norm": 0.5956506133079529, "learning_rate": 1.1744644316355447e-06, "loss": 0.051, "step": 15226 }, { "epoch": 2.55, "grad_norm": 0.4087086617946625, "learning_rate": 1.1736155042446085e-06, "loss": 0.0244, "step": 15227 }, { "epoch": 2.55, "grad_norm": 0.38259586691856384, "learning_rate": 1.1727668646491318e-06, "loss": 0.0409, "step": 15228 }, { "epoch": 2.55, "grad_norm": 0.4021347165107727, "learning_rate": 1.1719185128767818e-06, "loss": 0.0267, "step": 15229 }, { "epoch": 2.55, "grad_norm": 0.3711887300014496, "learning_rate": 1.1710704489552182e-06, "loss": 0.0346, "step": 15230 }, { "epoch": 2.55, "grad_norm": 0.8762123584747314, "learning_rate": 1.1702226729120968e-06, "loss": 0.0324, "step": 15231 }, { "epoch": 2.55, "grad_norm": 0.34819164872169495, "learning_rate": 1.16937518477506e-06, "loss": 0.0285, "step": 15232 }, { "epoch": 2.55, "grad_norm": 0.2871325612068176, "learning_rate": 1.1685279845717424e-06, "loss": 0.0224, "step": 15233 }, { "epoch": 2.55, "grad_norm": 0.3541069030761719, "learning_rate": 1.167681072329766e-06, "loss": 0.0259, "step": 15234 }, { "epoch": 2.55, "grad_norm": 0.31038710474967957, "learning_rate": 1.1668344480767446e-06, "loss": 0.0194, "step": 15235 }, { "epoch": 2.55, "grad_norm": 0.3448295295238495, "learning_rate": 1.165988111840286e-06, "loss": 0.027, "step": 15236 }, { "epoch": 2.55, "grad_norm": 0.27056097984313965, "learning_rate": 1.165142063647985e-06, "loss": 0.0208, "step": 15237 }, { "epoch": 2.55, "grad_norm": 0.5048871636390686, "learning_rate": 1.1642963035274257e-06, "loss": 0.0292, "step": 15238 }, { "epoch": 2.55, "grad_norm": 0.4869833290576935, "learning_rate": 1.1634508315061899e-06, "loss": 0.0276, "step": 15239 }, { "epoch": 2.55, "grad_norm": 0.5064007639884949, "learning_rate": 1.1626056476118408e-06, "loss": 0.042, "step": 15240 }, { "epoch": 2.55, "grad_norm": 0.438641220331192, "learning_rate": 1.1617607518719388e-06, "loss": 0.0403, "step": 15241 }, { "epoch": 2.55, "grad_norm": 0.5326732993125916, "learning_rate": 1.1609161443140337e-06, "loss": 0.038, "step": 15242 }, { "epoch": 2.55, "grad_norm": 0.3717680871486664, "learning_rate": 1.160071824965664e-06, "loss": 0.0313, "step": 15243 }, { "epoch": 2.55, "grad_norm": 0.3727893531322479, "learning_rate": 1.1592277938543583e-06, "loss": 0.0215, "step": 15244 }, { "epoch": 2.55, "grad_norm": 0.3833501636981964, "learning_rate": 1.1583840510076405e-06, "loss": 0.0363, "step": 15245 }, { "epoch": 2.55, "grad_norm": 0.3886473476886749, "learning_rate": 1.1575405964530184e-06, "loss": 0.0331, "step": 15246 }, { "epoch": 2.55, "grad_norm": 0.2577580213546753, "learning_rate": 1.1566974302179978e-06, "loss": 0.0189, "step": 15247 }, { "epoch": 2.55, "grad_norm": 0.45474714040756226, "learning_rate": 1.1558545523300668e-06, "loss": 0.0385, "step": 15248 }, { "epoch": 2.55, "grad_norm": 0.3115815222263336, "learning_rate": 1.15501196281671e-06, "loss": 0.0177, "step": 15249 }, { "epoch": 2.55, "grad_norm": 0.5355966687202454, "learning_rate": 1.1541696617054055e-06, "loss": 0.0332, "step": 15250 }, { "epoch": 2.55, "grad_norm": 0.3597548305988312, "learning_rate": 1.1533276490236122e-06, "loss": 0.0285, "step": 15251 }, { "epoch": 2.55, "grad_norm": 0.36794036626815796, "learning_rate": 1.1524859247987863e-06, "loss": 0.0358, "step": 15252 }, { "epoch": 2.55, "grad_norm": 0.37743163108825684, "learning_rate": 1.1516444890583744e-06, "loss": 0.0251, "step": 15253 }, { "epoch": 2.55, "grad_norm": 0.37581348419189453, "learning_rate": 1.1508033418298126e-06, "loss": 0.0291, "step": 15254 }, { "epoch": 2.55, "grad_norm": 0.47028225660324097, "learning_rate": 1.1499624831405242e-06, "loss": 0.0295, "step": 15255 }, { "epoch": 2.55, "grad_norm": 0.38885796070098877, "learning_rate": 1.14912191301793e-06, "loss": 0.0464, "step": 15256 }, { "epoch": 2.55, "grad_norm": 0.4297519624233246, "learning_rate": 1.1482816314894386e-06, "loss": 0.0358, "step": 15257 }, { "epoch": 2.55, "grad_norm": 0.33471372723579407, "learning_rate": 1.147441638582445e-06, "loss": 0.0356, "step": 15258 }, { "epoch": 2.55, "grad_norm": 0.265576034784317, "learning_rate": 1.1466019343243418e-06, "loss": 0.0235, "step": 15259 }, { "epoch": 2.55, "grad_norm": 0.2882997393608093, "learning_rate": 1.1457625187425058e-06, "loss": 0.0304, "step": 15260 }, { "epoch": 2.55, "grad_norm": 0.8151448369026184, "learning_rate": 1.14492339186431e-06, "loss": 0.0359, "step": 15261 }, { "epoch": 2.55, "grad_norm": 0.2978289723396301, "learning_rate": 1.144084553717113e-06, "loss": 0.0202, "step": 15262 }, { "epoch": 2.55, "grad_norm": 0.3870468735694885, "learning_rate": 1.1432460043282646e-06, "loss": 0.036, "step": 15263 }, { "epoch": 2.55, "grad_norm": 0.4195573627948761, "learning_rate": 1.142407743725109e-06, "loss": 0.0416, "step": 15264 }, { "epoch": 2.55, "grad_norm": 0.4178158938884735, "learning_rate": 1.1415697719349805e-06, "loss": 0.0399, "step": 15265 }, { "epoch": 2.55, "grad_norm": 0.5574538707733154, "learning_rate": 1.140732088985198e-06, "loss": 0.0575, "step": 15266 }, { "epoch": 2.55, "grad_norm": 0.34187307953834534, "learning_rate": 1.139894694903081e-06, "loss": 0.0228, "step": 15267 }, { "epoch": 2.55, "grad_norm": 0.3322651982307434, "learning_rate": 1.1390575897159284e-06, "loss": 0.0243, "step": 15268 }, { "epoch": 2.55, "grad_norm": 0.4786578416824341, "learning_rate": 1.1382207734510364e-06, "loss": 0.0241, "step": 15269 }, { "epoch": 2.55, "grad_norm": 0.44931116700172424, "learning_rate": 1.1373842461356931e-06, "loss": 0.0371, "step": 15270 }, { "epoch": 2.55, "grad_norm": 0.3577224910259247, "learning_rate": 1.1365480077971692e-06, "loss": 0.0198, "step": 15271 }, { "epoch": 2.55, "grad_norm": 0.2925190329551697, "learning_rate": 1.1357120584627357e-06, "loss": 0.0169, "step": 15272 }, { "epoch": 2.55, "grad_norm": 0.44081324338912964, "learning_rate": 1.1348763981596512e-06, "loss": 0.0485, "step": 15273 }, { "epoch": 2.55, "grad_norm": 0.36978840827941895, "learning_rate": 1.1340410269151581e-06, "loss": 0.0228, "step": 15274 }, { "epoch": 2.55, "grad_norm": 0.3436766266822815, "learning_rate": 1.133205944756499e-06, "loss": 0.0313, "step": 15275 }, { "epoch": 2.55, "grad_norm": 0.268456369638443, "learning_rate": 1.1323711517109027e-06, "loss": 0.0206, "step": 15276 }, { "epoch": 2.56, "grad_norm": 0.33541327714920044, "learning_rate": 1.1315366478055844e-06, "loss": 0.027, "step": 15277 }, { "epoch": 2.56, "grad_norm": 0.3154591917991638, "learning_rate": 1.1307024330677608e-06, "loss": 0.0209, "step": 15278 }, { "epoch": 2.56, "grad_norm": 0.8456084132194519, "learning_rate": 1.129868507524625e-06, "loss": 0.0338, "step": 15279 }, { "epoch": 2.56, "grad_norm": 0.4070282280445099, "learning_rate": 1.1290348712033727e-06, "loss": 0.0306, "step": 15280 }, { "epoch": 2.56, "grad_norm": 0.3929747939109802, "learning_rate": 1.1282015241311873e-06, "loss": 0.0331, "step": 15281 }, { "epoch": 2.56, "grad_norm": 0.5759568214416504, "learning_rate": 1.127368466335239e-06, "loss": 0.0438, "step": 15282 }, { "epoch": 2.56, "grad_norm": 0.38492918014526367, "learning_rate": 1.1265356978426889e-06, "loss": 0.0244, "step": 15283 }, { "epoch": 2.56, "grad_norm": 0.46413955092430115, "learning_rate": 1.1257032186806938e-06, "loss": 0.0585, "step": 15284 }, { "epoch": 2.56, "grad_norm": 0.36843574047088623, "learning_rate": 1.1248710288763953e-06, "loss": 0.0303, "step": 15285 }, { "epoch": 2.56, "grad_norm": 0.40844112634658813, "learning_rate": 1.1240391284569275e-06, "loss": 0.0403, "step": 15286 }, { "epoch": 2.56, "grad_norm": 0.43816110491752625, "learning_rate": 1.1232075174494172e-06, "loss": 0.0396, "step": 15287 }, { "epoch": 2.56, "grad_norm": 0.33513858914375305, "learning_rate": 1.1223761958809799e-06, "loss": 0.0262, "step": 15288 }, { "epoch": 2.56, "grad_norm": 0.34700441360473633, "learning_rate": 1.1215451637787234e-06, "loss": 0.027, "step": 15289 }, { "epoch": 2.56, "grad_norm": 0.2993122637271881, "learning_rate": 1.1207144211697429e-06, "loss": 0.0269, "step": 15290 }, { "epoch": 2.56, "grad_norm": 0.4492940306663513, "learning_rate": 1.1198839680811235e-06, "loss": 0.0382, "step": 15291 }, { "epoch": 2.56, "grad_norm": 0.49812746047973633, "learning_rate": 1.1190538045399479e-06, "loss": 0.0349, "step": 15292 }, { "epoch": 2.56, "grad_norm": 0.4038204252719879, "learning_rate": 1.1182239305732835e-06, "loss": 0.0307, "step": 15293 }, { "epoch": 2.56, "grad_norm": 0.6207565069198608, "learning_rate": 1.117394346208185e-06, "loss": 0.0386, "step": 15294 }, { "epoch": 2.56, "grad_norm": 0.23975254595279694, "learning_rate": 1.1165650514717076e-06, "loss": 0.0234, "step": 15295 }, { "epoch": 2.56, "grad_norm": 0.3306470811367035, "learning_rate": 1.115736046390886e-06, "loss": 0.0283, "step": 15296 }, { "epoch": 2.56, "grad_norm": 0.4143964648246765, "learning_rate": 1.114907330992755e-06, "loss": 0.042, "step": 15297 }, { "epoch": 2.56, "grad_norm": 0.4680563509464264, "learning_rate": 1.1140789053043378e-06, "loss": 0.0366, "step": 15298 }, { "epoch": 2.56, "grad_norm": 0.6300540566444397, "learning_rate": 1.1132507693526417e-06, "loss": 0.0499, "step": 15299 }, { "epoch": 2.56, "grad_norm": 0.5739495754241943, "learning_rate": 1.112422923164671e-06, "loss": 0.0417, "step": 15300 }, { "epoch": 2.56, "grad_norm": 0.3621385395526886, "learning_rate": 1.1115953667674195e-06, "loss": 0.0291, "step": 15301 }, { "epoch": 2.56, "grad_norm": 0.33461254835128784, "learning_rate": 1.1107681001878678e-06, "loss": 0.0294, "step": 15302 }, { "epoch": 2.56, "grad_norm": 0.7532358765602112, "learning_rate": 1.1099411234529934e-06, "loss": 0.052, "step": 15303 }, { "epoch": 2.56, "grad_norm": 0.30558088421821594, "learning_rate": 1.1091144365897587e-06, "loss": 0.0237, "step": 15304 }, { "epoch": 2.56, "grad_norm": 0.2729414105415344, "learning_rate": 1.1082880396251194e-06, "loss": 0.0214, "step": 15305 }, { "epoch": 2.56, "grad_norm": 0.4131448268890381, "learning_rate": 1.1074619325860225e-06, "loss": 0.0303, "step": 15306 }, { "epoch": 2.56, "grad_norm": 0.5050178170204163, "learning_rate": 1.1066361154994042e-06, "loss": 0.0414, "step": 15307 }, { "epoch": 2.56, "grad_norm": 0.32279059290885925, "learning_rate": 1.105810588392189e-06, "loss": 0.0283, "step": 15308 }, { "epoch": 2.56, "grad_norm": 0.35957056283950806, "learning_rate": 1.1049853512912967e-06, "loss": 0.0294, "step": 15309 }, { "epoch": 2.56, "grad_norm": 0.34593018889427185, "learning_rate": 1.1041604042236342e-06, "loss": 0.0248, "step": 15310 }, { "epoch": 2.56, "grad_norm": 0.3300807774066925, "learning_rate": 1.1033357472160976e-06, "loss": 0.0291, "step": 15311 }, { "epoch": 2.56, "grad_norm": 0.3712400794029236, "learning_rate": 1.102511380295579e-06, "loss": 0.0222, "step": 15312 }, { "epoch": 2.56, "grad_norm": 0.48267003893852234, "learning_rate": 1.1016873034889586e-06, "loss": 0.029, "step": 15313 }, { "epoch": 2.56, "grad_norm": 0.366098016500473, "learning_rate": 1.100863516823103e-06, "loss": 0.0328, "step": 15314 }, { "epoch": 2.56, "grad_norm": 0.4501081705093384, "learning_rate": 1.1000400203248773e-06, "loss": 0.0334, "step": 15315 }, { "epoch": 2.56, "grad_norm": 0.443726509809494, "learning_rate": 1.0992168140211267e-06, "loss": 0.0246, "step": 15316 }, { "epoch": 2.56, "grad_norm": 0.3947564661502838, "learning_rate": 1.0983938979386998e-06, "loss": 0.0301, "step": 15317 }, { "epoch": 2.56, "grad_norm": 0.23930028080940247, "learning_rate": 1.0975712721044252e-06, "loss": 0.0189, "step": 15318 }, { "epoch": 2.56, "grad_norm": 0.40651026368141174, "learning_rate": 1.0967489365451234e-06, "loss": 0.0346, "step": 15319 }, { "epoch": 2.56, "grad_norm": 0.5425399541854858, "learning_rate": 1.0959268912876098e-06, "loss": 0.0474, "step": 15320 }, { "epoch": 2.56, "grad_norm": 0.7469460964202881, "learning_rate": 1.0951051363586906e-06, "loss": 0.0489, "step": 15321 }, { "epoch": 2.56, "grad_norm": 0.268755704164505, "learning_rate": 1.0942836717851568e-06, "loss": 0.0153, "step": 15322 }, { "epoch": 2.56, "grad_norm": 0.3197877109050751, "learning_rate": 1.0934624975937957e-06, "loss": 0.0266, "step": 15323 }, { "epoch": 2.56, "grad_norm": 0.40631282329559326, "learning_rate": 1.0926416138113826e-06, "loss": 0.0362, "step": 15324 }, { "epoch": 2.56, "grad_norm": 0.6113772392272949, "learning_rate": 1.0918210204646796e-06, "loss": 0.0441, "step": 15325 }, { "epoch": 2.56, "grad_norm": 0.5458798408508301, "learning_rate": 1.0910007175804481e-06, "loss": 0.0296, "step": 15326 }, { "epoch": 2.56, "grad_norm": 0.47658035159111023, "learning_rate": 1.0901807051854318e-06, "loss": 0.0272, "step": 15327 }, { "epoch": 2.56, "grad_norm": 0.42465293407440186, "learning_rate": 1.089360983306369e-06, "loss": 0.0329, "step": 15328 }, { "epoch": 2.56, "grad_norm": 0.3820858597755432, "learning_rate": 1.0885415519699905e-06, "loss": 0.0197, "step": 15329 }, { "epoch": 2.56, "grad_norm": 0.41092735528945923, "learning_rate": 1.0877224112030106e-06, "loss": 0.0369, "step": 15330 }, { "epoch": 2.56, "grad_norm": 0.3511195182800293, "learning_rate": 1.0869035610321422e-06, "loss": 0.0258, "step": 15331 }, { "epoch": 2.56, "grad_norm": 0.2791078984737396, "learning_rate": 1.086085001484083e-06, "loss": 0.0162, "step": 15332 }, { "epoch": 2.56, "grad_norm": 0.3766315281391144, "learning_rate": 1.0852667325855226e-06, "loss": 0.0344, "step": 15333 }, { "epoch": 2.56, "grad_norm": 0.42459091544151306, "learning_rate": 1.0844487543631433e-06, "loss": 0.0285, "step": 15334 }, { "epoch": 2.56, "grad_norm": 0.27943453192710876, "learning_rate": 1.0836310668436134e-06, "loss": 0.0142, "step": 15335 }, { "epoch": 2.56, "grad_norm": 0.4325415790081024, "learning_rate": 1.0828136700535974e-06, "loss": 0.0393, "step": 15336 }, { "epoch": 2.57, "grad_norm": 0.3537237346172333, "learning_rate": 1.0819965640197494e-06, "loss": 0.0204, "step": 15337 }, { "epoch": 2.57, "grad_norm": 0.2819926142692566, "learning_rate": 1.0811797487687092e-06, "loss": 0.0231, "step": 15338 }, { "epoch": 2.57, "grad_norm": 0.3665594160556793, "learning_rate": 1.080363224327109e-06, "loss": 0.0331, "step": 15339 }, { "epoch": 2.57, "grad_norm": 0.5010690093040466, "learning_rate": 1.079546990721575e-06, "loss": 0.0343, "step": 15340 }, { "epoch": 2.57, "grad_norm": 0.3784995973110199, "learning_rate": 1.0787310479787216e-06, "loss": 0.0254, "step": 15341 }, { "epoch": 2.57, "grad_norm": 0.4508608281612396, "learning_rate": 1.0779153961251508e-06, "loss": 0.0263, "step": 15342 }, { "epoch": 2.57, "grad_norm": 0.39468470215797424, "learning_rate": 1.0771000351874595e-06, "loss": 0.0331, "step": 15343 }, { "epoch": 2.57, "grad_norm": 0.4704970419406891, "learning_rate": 1.0762849651922368e-06, "loss": 0.041, "step": 15344 }, { "epoch": 2.57, "grad_norm": 0.4116628170013428, "learning_rate": 1.0754701861660544e-06, "loss": 0.0431, "step": 15345 }, { "epoch": 2.57, "grad_norm": 0.37084293365478516, "learning_rate": 1.0746556981354828e-06, "loss": 0.0238, "step": 15346 }, { "epoch": 2.57, "grad_norm": 0.49188417196273804, "learning_rate": 1.0738415011270754e-06, "loss": 0.0357, "step": 15347 }, { "epoch": 2.57, "grad_norm": 0.38167232275009155, "learning_rate": 1.073027595167384e-06, "loss": 0.0232, "step": 15348 }, { "epoch": 2.57, "grad_norm": 0.3915490210056305, "learning_rate": 1.0722139802829467e-06, "loss": 0.0288, "step": 15349 }, { "epoch": 2.57, "grad_norm": 0.45474228262901306, "learning_rate": 1.0714006565002889e-06, "loss": 0.0281, "step": 15350 }, { "epoch": 2.57, "grad_norm": 0.33550551533699036, "learning_rate": 1.0705876238459334e-06, "loss": 0.0286, "step": 15351 }, { "epoch": 2.57, "grad_norm": 0.3259563148021698, "learning_rate": 1.0697748823463883e-06, "loss": 0.0342, "step": 15352 }, { "epoch": 2.57, "grad_norm": 0.3603207468986511, "learning_rate": 1.0689624320281545e-06, "loss": 0.0352, "step": 15353 }, { "epoch": 2.57, "grad_norm": 0.548439621925354, "learning_rate": 1.0681502729177261e-06, "loss": 0.0322, "step": 15354 }, { "epoch": 2.57, "grad_norm": 0.4944782257080078, "learning_rate": 1.0673384050415813e-06, "loss": 0.034, "step": 15355 }, { "epoch": 2.57, "grad_norm": 0.358185738325119, "learning_rate": 1.06652682842619e-06, "loss": 0.0301, "step": 15356 }, { "epoch": 2.57, "grad_norm": 0.387846976518631, "learning_rate": 1.0657155430980203e-06, "loss": 0.019, "step": 15357 }, { "epoch": 2.57, "grad_norm": 0.5325486660003662, "learning_rate": 1.0649045490835196e-06, "loss": 0.0436, "step": 15358 }, { "epoch": 2.57, "grad_norm": 0.35913851857185364, "learning_rate": 1.0640938464091366e-06, "loss": 0.0276, "step": 15359 }, { "epoch": 2.57, "grad_norm": 0.5240128040313721, "learning_rate": 1.0632834351013011e-06, "loss": 0.0326, "step": 15360 }, { "epoch": 2.57, "grad_norm": 0.31685373187065125, "learning_rate": 1.0624733151864375e-06, "loss": 0.0234, "step": 15361 }, { "epoch": 2.57, "grad_norm": 0.34808769822120667, "learning_rate": 1.0616634866909659e-06, "loss": 0.0335, "step": 15362 }, { "epoch": 2.57, "grad_norm": 0.3172799348831177, "learning_rate": 1.0608539496412884e-06, "loss": 0.0193, "step": 15363 }, { "epoch": 2.57, "grad_norm": 0.4512052834033966, "learning_rate": 1.0600447040637985e-06, "loss": 0.0299, "step": 15364 }, { "epoch": 2.57, "grad_norm": 0.44279566407203674, "learning_rate": 1.059235749984887e-06, "loss": 0.0329, "step": 15365 }, { "epoch": 2.57, "grad_norm": 0.3591715097427368, "learning_rate": 1.0584270874309289e-06, "loss": 0.0334, "step": 15366 }, { "epoch": 2.57, "grad_norm": 0.379170686006546, "learning_rate": 1.0576187164282903e-06, "loss": 0.0395, "step": 15367 }, { "epoch": 2.57, "grad_norm": 0.4602971374988556, "learning_rate": 1.0568106370033305e-06, "loss": 0.0442, "step": 15368 }, { "epoch": 2.57, "grad_norm": 0.5865728855133057, "learning_rate": 1.0560028491823992e-06, "loss": 0.0341, "step": 15369 }, { "epoch": 2.57, "grad_norm": 0.43435904383659363, "learning_rate": 1.0551953529918336e-06, "loss": 0.0358, "step": 15370 }, { "epoch": 2.57, "grad_norm": 0.4519045650959015, "learning_rate": 1.0543881484579655e-06, "loss": 0.0458, "step": 15371 }, { "epoch": 2.57, "grad_norm": 0.5253692269325256, "learning_rate": 1.0535812356071106e-06, "loss": 0.0338, "step": 15372 }, { "epoch": 2.57, "grad_norm": 0.40956422686576843, "learning_rate": 1.0527746144655837e-06, "loss": 0.0319, "step": 15373 }, { "epoch": 2.57, "grad_norm": 0.34584563970565796, "learning_rate": 1.0519682850596846e-06, "loss": 0.0216, "step": 15374 }, { "epoch": 2.57, "grad_norm": 0.5415627956390381, "learning_rate": 1.0511622474157013e-06, "loss": 0.0264, "step": 15375 }, { "epoch": 2.57, "grad_norm": 0.4213315546512604, "learning_rate": 1.0503565015599182e-06, "loss": 0.0247, "step": 15376 }, { "epoch": 2.57, "grad_norm": 0.46065646409988403, "learning_rate": 1.04955104751861e-06, "loss": 0.0392, "step": 15377 }, { "epoch": 2.57, "grad_norm": 0.39563822746276855, "learning_rate": 1.0487458853180354e-06, "loss": 0.0314, "step": 15378 }, { "epoch": 2.57, "grad_norm": 0.3564721941947937, "learning_rate": 1.0479410149844516e-06, "loss": 0.0319, "step": 15379 }, { "epoch": 2.57, "grad_norm": 0.3914089798927307, "learning_rate": 1.0471364365440994e-06, "loss": 0.0426, "step": 15380 }, { "epoch": 2.57, "grad_norm": 0.3611837923526764, "learning_rate": 1.0463321500232138e-06, "loss": 0.0257, "step": 15381 }, { "epoch": 2.57, "grad_norm": 0.40915513038635254, "learning_rate": 1.0455281554480213e-06, "loss": 0.0367, "step": 15382 }, { "epoch": 2.57, "grad_norm": 0.3367499113082886, "learning_rate": 1.0447244528447332e-06, "loss": 0.0219, "step": 15383 }, { "epoch": 2.57, "grad_norm": 0.37675023078918457, "learning_rate": 1.0439210422395585e-06, "loss": 0.0363, "step": 15384 }, { "epoch": 2.57, "grad_norm": 0.2710812985897064, "learning_rate": 1.043117923658694e-06, "loss": 0.0145, "step": 15385 }, { "epoch": 2.57, "grad_norm": 0.36243462562561035, "learning_rate": 1.0423150971283246e-06, "loss": 0.0336, "step": 15386 }, { "epoch": 2.57, "grad_norm": 0.4858196973800659, "learning_rate": 1.0415125626746293e-06, "loss": 0.026, "step": 15387 }, { "epoch": 2.57, "grad_norm": 0.33686310052871704, "learning_rate": 1.0407103203237745e-06, "loss": 0.0287, "step": 15388 }, { "epoch": 2.57, "grad_norm": 0.43343713879585266, "learning_rate": 1.0399083701019163e-06, "loss": 0.0394, "step": 15389 }, { "epoch": 2.57, "grad_norm": 0.3520599901676178, "learning_rate": 1.0391067120352073e-06, "loss": 0.0314, "step": 15390 }, { "epoch": 2.57, "grad_norm": 0.30400192737579346, "learning_rate": 1.0383053461497817e-06, "loss": 0.0211, "step": 15391 }, { "epoch": 2.57, "grad_norm": 0.3948081135749817, "learning_rate": 1.0375042724717733e-06, "loss": 0.032, "step": 15392 }, { "epoch": 2.57, "grad_norm": 0.3793010413646698, "learning_rate": 1.0367034910273022e-06, "loss": 0.0311, "step": 15393 }, { "epoch": 2.57, "grad_norm": 0.39855754375457764, "learning_rate": 1.0359030018424777e-06, "loss": 0.0453, "step": 15394 }, { "epoch": 2.57, "grad_norm": 0.36168166995048523, "learning_rate": 1.0351028049433987e-06, "loss": 0.0349, "step": 15395 }, { "epoch": 2.58, "grad_norm": 0.4044082462787628, "learning_rate": 1.03430290035616e-06, "loss": 0.0324, "step": 15396 }, { "epoch": 2.58, "grad_norm": 0.3751686215400696, "learning_rate": 1.0335032881068397e-06, "loss": 0.0273, "step": 15397 }, { "epoch": 2.58, "grad_norm": 0.3453427851200104, "learning_rate": 1.0327039682215157e-06, "loss": 0.0328, "step": 15398 }, { "epoch": 2.58, "grad_norm": 0.3753781318664551, "learning_rate": 1.031904940726246e-06, "loss": 0.0393, "step": 15399 }, { "epoch": 2.58, "grad_norm": 0.30098530650138855, "learning_rate": 1.0311062056470844e-06, "loss": 0.0326, "step": 15400 }, { "epoch": 2.58, "grad_norm": 0.2841911315917969, "learning_rate": 1.0303077630100755e-06, "loss": 0.0193, "step": 15401 }, { "epoch": 2.58, "grad_norm": 0.3644443154335022, "learning_rate": 1.0295096128412564e-06, "loss": 0.0315, "step": 15402 }, { "epoch": 2.58, "grad_norm": 0.3832226097583771, "learning_rate": 1.0287117551666459e-06, "loss": 0.0406, "step": 15403 }, { "epoch": 2.58, "grad_norm": 0.3546154201030731, "learning_rate": 1.0279141900122658e-06, "loss": 0.0273, "step": 15404 }, { "epoch": 2.58, "grad_norm": 0.38908132910728455, "learning_rate": 1.0271169174041172e-06, "loss": 0.0245, "step": 15405 }, { "epoch": 2.58, "grad_norm": 0.29416781663894653, "learning_rate": 1.0263199373681964e-06, "loss": 0.0292, "step": 15406 }, { "epoch": 2.58, "grad_norm": 0.3423738479614258, "learning_rate": 1.0255232499304923e-06, "loss": 0.0256, "step": 15407 }, { "epoch": 2.58, "grad_norm": 0.4814720153808594, "learning_rate": 1.0247268551169787e-06, "loss": 0.0483, "step": 15408 }, { "epoch": 2.58, "grad_norm": 0.28242582082748413, "learning_rate": 1.0239307529536246e-06, "loss": 0.0216, "step": 15409 }, { "epoch": 2.58, "grad_norm": 0.3836728632450104, "learning_rate": 1.0231349434663905e-06, "loss": 0.0297, "step": 15410 }, { "epoch": 2.58, "grad_norm": 0.3715931177139282, "learning_rate": 1.0223394266812214e-06, "loss": 0.0307, "step": 15411 }, { "epoch": 2.58, "grad_norm": 0.43557536602020264, "learning_rate": 1.0215442026240574e-06, "loss": 0.0325, "step": 15412 }, { "epoch": 2.58, "grad_norm": 0.36797580122947693, "learning_rate": 1.020749271320829e-06, "loss": 0.0294, "step": 15413 }, { "epoch": 2.58, "grad_norm": 0.20344416797161102, "learning_rate": 1.0199546327974518e-06, "loss": 0.0123, "step": 15414 }, { "epoch": 2.58, "grad_norm": 0.5479851961135864, "learning_rate": 1.019160287079841e-06, "loss": 0.0246, "step": 15415 }, { "epoch": 2.58, "grad_norm": 0.6081061959266663, "learning_rate": 1.0183662341938926e-06, "loss": 0.0289, "step": 15416 }, { "epoch": 2.58, "grad_norm": 0.267282098531723, "learning_rate": 1.0175724741655002e-06, "loss": 0.0163, "step": 15417 }, { "epoch": 2.58, "grad_norm": 0.3684316873550415, "learning_rate": 1.0167790070205475e-06, "loss": 0.0313, "step": 15418 }, { "epoch": 2.58, "grad_norm": 0.7453382015228271, "learning_rate": 1.0159858327849037e-06, "loss": 0.0385, "step": 15419 }, { "epoch": 2.58, "grad_norm": 0.6913937330245972, "learning_rate": 1.0151929514844306e-06, "loss": 0.0434, "step": 15420 }, { "epoch": 2.58, "grad_norm": 0.3774193525314331, "learning_rate": 1.014400363144984e-06, "loss": 0.0371, "step": 15421 }, { "epoch": 2.58, "grad_norm": 0.3803999423980713, "learning_rate": 1.0136080677924053e-06, "loss": 0.047, "step": 15422 }, { "epoch": 2.58, "grad_norm": 0.33363091945648193, "learning_rate": 1.0128160654525265e-06, "loss": 0.0281, "step": 15423 }, { "epoch": 2.58, "grad_norm": 0.30757468938827515, "learning_rate": 1.0120243561511734e-06, "loss": 0.0269, "step": 15424 }, { "epoch": 2.58, "grad_norm": 0.6813015937805176, "learning_rate": 1.011232939914162e-06, "loss": 0.0418, "step": 15425 }, { "epoch": 2.58, "grad_norm": 0.38152435421943665, "learning_rate": 1.010441816767298e-06, "loss": 0.0282, "step": 15426 }, { "epoch": 2.58, "grad_norm": 0.42811059951782227, "learning_rate": 1.0096509867363758e-06, "loss": 0.0327, "step": 15427 }, { "epoch": 2.58, "grad_norm": 0.3760440945625305, "learning_rate": 1.0088604498471799e-06, "loss": 0.0398, "step": 15428 }, { "epoch": 2.58, "grad_norm": 0.36294376850128174, "learning_rate": 1.0080702061254888e-06, "loss": 0.0271, "step": 15429 }, { "epoch": 2.58, "grad_norm": 0.36854368448257446, "learning_rate": 1.0072802555970696e-06, "loss": 0.0257, "step": 15430 }, { "epoch": 2.58, "grad_norm": 0.4213627874851227, "learning_rate": 1.0064905982876761e-06, "loss": 0.0319, "step": 15431 }, { "epoch": 2.58, "grad_norm": 0.401028573513031, "learning_rate": 1.0057012342230587e-06, "loss": 0.0307, "step": 15432 }, { "epoch": 2.58, "grad_norm": 0.4622785151004791, "learning_rate": 1.0049121634289583e-06, "loss": 0.0282, "step": 15433 }, { "epoch": 2.58, "grad_norm": 0.5001586079597473, "learning_rate": 1.0041233859310972e-06, "loss": 0.0303, "step": 15434 }, { "epoch": 2.58, "grad_norm": 0.40785959362983704, "learning_rate": 1.003334901755202e-06, "loss": 0.0388, "step": 15435 }, { "epoch": 2.58, "grad_norm": 0.45616626739501953, "learning_rate": 1.0025467109269772e-06, "loss": 0.0291, "step": 15436 }, { "epoch": 2.58, "grad_norm": 0.4228586256504059, "learning_rate": 1.0017588134721212e-06, "loss": 0.0194, "step": 15437 }, { "epoch": 2.58, "grad_norm": 0.34673911333084106, "learning_rate": 1.0009712094163303e-06, "loss": 0.0382, "step": 15438 }, { "epoch": 2.58, "grad_norm": 0.5076369047164917, "learning_rate": 1.0001838987852796e-06, "loss": 0.0363, "step": 15439 }, { "epoch": 2.58, "grad_norm": 0.39223021268844604, "learning_rate": 9.99396881604644e-07, "loss": 0.0279, "step": 15440 }, { "epoch": 2.58, "grad_norm": 0.36773020029067993, "learning_rate": 9.986101579000851e-07, "loss": 0.034, "step": 15441 }, { "epoch": 2.58, "grad_norm": 0.2950621247291565, "learning_rate": 9.978237276972513e-07, "loss": 0.0242, "step": 15442 }, { "epoch": 2.58, "grad_norm": 0.4717303514480591, "learning_rate": 9.97037591021791e-07, "loss": 0.0403, "step": 15443 }, { "epoch": 2.58, "grad_norm": 0.461899071931839, "learning_rate": 9.962517478993329e-07, "loss": 0.0261, "step": 15444 }, { "epoch": 2.58, "grad_norm": 0.3480944335460663, "learning_rate": 9.954661983555004e-07, "loss": 0.0289, "step": 15445 }, { "epoch": 2.58, "grad_norm": 0.4668971300125122, "learning_rate": 9.94680942415911e-07, "loss": 0.0365, "step": 15446 }, { "epoch": 2.58, "grad_norm": 0.6021308898925781, "learning_rate": 9.938959801061632e-07, "loss": 0.0336, "step": 15447 }, { "epoch": 2.58, "grad_norm": 0.35119032859802246, "learning_rate": 9.931113114518575e-07, "loss": 0.035, "step": 15448 }, { "epoch": 2.58, "grad_norm": 0.4255479574203491, "learning_rate": 9.923269364785748e-07, "loss": 0.0286, "step": 15449 }, { "epoch": 2.58, "grad_norm": 0.3693746328353882, "learning_rate": 9.915428552118932e-07, "loss": 0.0397, "step": 15450 }, { "epoch": 2.58, "grad_norm": 0.5272195935249329, "learning_rate": 9.90759067677377e-07, "loss": 0.0423, "step": 15451 }, { "epoch": 2.58, "grad_norm": 0.396187961101532, "learning_rate": 9.899755739005846e-07, "loss": 0.034, "step": 15452 }, { "epoch": 2.58, "grad_norm": 0.30990999937057495, "learning_rate": 9.891923739070596e-07, "loss": 0.0228, "step": 15453 }, { "epoch": 2.58, "grad_norm": 0.3884287178516388, "learning_rate": 9.884094677223432e-07, "loss": 0.0376, "step": 15454 }, { "epoch": 2.58, "grad_norm": 0.4325082004070282, "learning_rate": 9.876268553719614e-07, "loss": 0.0417, "step": 15455 }, { "epoch": 2.59, "grad_norm": 0.3920802175998688, "learning_rate": 9.868445368814295e-07, "loss": 0.0234, "step": 15456 }, { "epoch": 2.59, "grad_norm": 0.5162721872329712, "learning_rate": 9.86062512276259e-07, "loss": 0.0434, "step": 15457 }, { "epoch": 2.59, "grad_norm": 0.40887367725372314, "learning_rate": 9.8528078158195e-07, "loss": 0.0342, "step": 15458 }, { "epoch": 2.59, "grad_norm": 0.3978738784790039, "learning_rate": 9.844993448239882e-07, "loss": 0.0342, "step": 15459 }, { "epoch": 2.59, "grad_norm": 0.5894352197647095, "learning_rate": 9.837182020278557e-07, "loss": 0.0339, "step": 15460 }, { "epoch": 2.59, "grad_norm": 0.3961622416973114, "learning_rate": 9.829373532190235e-07, "loss": 0.0441, "step": 15461 }, { "epoch": 2.59, "grad_norm": 0.3775688111782074, "learning_rate": 9.821567984229486e-07, "loss": 0.0181, "step": 15462 }, { "epoch": 2.59, "grad_norm": 0.3234764635562897, "learning_rate": 9.813765376650851e-07, "loss": 0.0271, "step": 15463 }, { "epoch": 2.59, "grad_norm": 0.24877803027629852, "learning_rate": 9.805965709708732e-07, "loss": 0.0185, "step": 15464 }, { "epoch": 2.59, "grad_norm": 0.3384731411933899, "learning_rate": 9.79816898365743e-07, "loss": 0.0265, "step": 15465 }, { "epoch": 2.59, "grad_norm": 0.325594037771225, "learning_rate": 9.790375198751224e-07, "loss": 0.0264, "step": 15466 }, { "epoch": 2.59, "grad_norm": 0.3471592962741852, "learning_rate": 9.782584355244173e-07, "loss": 0.0406, "step": 15467 }, { "epoch": 2.59, "grad_norm": 0.3258639872074127, "learning_rate": 9.774796453390357e-07, "loss": 0.0221, "step": 15468 }, { "epoch": 2.59, "grad_norm": 0.25838178396224976, "learning_rate": 9.76701149344369e-07, "loss": 0.0231, "step": 15469 }, { "epoch": 2.59, "grad_norm": 0.47616222500801086, "learning_rate": 9.759229475657993e-07, "loss": 0.029, "step": 15470 }, { "epoch": 2.59, "grad_norm": 0.5214262008666992, "learning_rate": 9.751450400287044e-07, "loss": 0.0289, "step": 15471 }, { "epoch": 2.59, "grad_norm": 0.45628419518470764, "learning_rate": 9.743674267584457e-07, "loss": 0.0358, "step": 15472 }, { "epoch": 2.59, "grad_norm": 0.39901643991470337, "learning_rate": 9.7359010778038e-07, "loss": 0.0345, "step": 15473 }, { "epoch": 2.59, "grad_norm": 0.3982756733894348, "learning_rate": 9.728130831198534e-07, "loss": 0.0272, "step": 15474 }, { "epoch": 2.59, "grad_norm": 0.5308936834335327, "learning_rate": 9.720363528022026e-07, "loss": 0.0417, "step": 15475 }, { "epoch": 2.59, "grad_norm": 0.5897071957588196, "learning_rate": 9.71259916852748e-07, "loss": 0.0319, "step": 15476 }, { "epoch": 2.59, "grad_norm": 0.620502233505249, "learning_rate": 9.704837752968143e-07, "loss": 0.0585, "step": 15477 }, { "epoch": 2.59, "grad_norm": 0.36021777987480164, "learning_rate": 9.697079281597044e-07, "loss": 0.0321, "step": 15478 }, { "epoch": 2.59, "grad_norm": 0.25611910223960876, "learning_rate": 9.689323754667134e-07, "loss": 0.0196, "step": 15479 }, { "epoch": 2.59, "grad_norm": 0.33826860785484314, "learning_rate": 9.681571172431326e-07, "loss": 0.0274, "step": 15480 }, { "epoch": 2.59, "grad_norm": 0.3716384470462799, "learning_rate": 9.673821535142392e-07, "loss": 0.0322, "step": 15481 }, { "epoch": 2.59, "grad_norm": 0.4400418996810913, "learning_rate": 9.666074843053042e-07, "loss": 0.0355, "step": 15482 }, { "epoch": 2.59, "grad_norm": 0.37636908888816833, "learning_rate": 9.658331096415852e-07, "loss": 0.0339, "step": 15483 }, { "epoch": 2.59, "grad_norm": 0.36887553334236145, "learning_rate": 9.650590295483298e-07, "loss": 0.0306, "step": 15484 }, { "epoch": 2.59, "grad_norm": 0.5150485634803772, "learning_rate": 9.6428524405078e-07, "loss": 0.0445, "step": 15485 }, { "epoch": 2.59, "grad_norm": 0.3988066613674164, "learning_rate": 9.635117531741656e-07, "loss": 0.0423, "step": 15486 }, { "epoch": 2.59, "grad_norm": 0.294392466545105, "learning_rate": 9.627385569437052e-07, "loss": 0.0227, "step": 15487 }, { "epoch": 2.59, "grad_norm": 0.5386911034584045, "learning_rate": 9.619656553846113e-07, "loss": 0.035, "step": 15488 }, { "epoch": 2.59, "grad_norm": 0.3782098591327667, "learning_rate": 9.611930485220878e-07, "loss": 0.034, "step": 15489 }, { "epoch": 2.59, "grad_norm": 0.356650173664093, "learning_rate": 9.604207363813234e-07, "loss": 0.0288, "step": 15490 }, { "epoch": 2.59, "grad_norm": 0.4915953278541565, "learning_rate": 9.596487189875015e-07, "loss": 0.0334, "step": 15491 }, { "epoch": 2.59, "grad_norm": 0.4993799328804016, "learning_rate": 9.588769963657963e-07, "loss": 0.0248, "step": 15492 }, { "epoch": 2.59, "grad_norm": 0.4013919234275818, "learning_rate": 9.581055685413653e-07, "loss": 0.0369, "step": 15493 }, { "epoch": 2.59, "grad_norm": 0.31073251366615295, "learning_rate": 9.573344355393688e-07, "loss": 0.0207, "step": 15494 }, { "epoch": 2.59, "grad_norm": 0.476611852645874, "learning_rate": 9.565635973849452e-07, "loss": 0.0323, "step": 15495 }, { "epoch": 2.59, "grad_norm": 0.6056680679321289, "learning_rate": 9.5579305410323e-07, "loss": 0.0223, "step": 15496 }, { "epoch": 2.59, "grad_norm": 0.3628506660461426, "learning_rate": 9.550228057193512e-07, "loss": 0.0235, "step": 15497 }, { "epoch": 2.59, "grad_norm": 0.35990509390830994, "learning_rate": 9.542528522584194e-07, "loss": 0.0263, "step": 15498 }, { "epoch": 2.59, "grad_norm": 0.5400443077087402, "learning_rate": 9.534831937455424e-07, "loss": 0.0343, "step": 15499 }, { "epoch": 2.59, "grad_norm": 0.31361445784568787, "learning_rate": 9.527138302058159e-07, "loss": 0.0265, "step": 15500 }, { "epoch": 2.59, "grad_norm": 0.392273873090744, "learning_rate": 9.51944761664323e-07, "loss": 0.0354, "step": 15501 }, { "epoch": 2.59, "grad_norm": 0.4281000792980194, "learning_rate": 9.511759881461447e-07, "loss": 0.0316, "step": 15502 }, { "epoch": 2.59, "grad_norm": 0.3262111246585846, "learning_rate": 9.504075096763454e-07, "loss": 0.0347, "step": 15503 }, { "epoch": 2.59, "grad_norm": 0.5621047019958496, "learning_rate": 9.496393262799797e-07, "loss": 0.0334, "step": 15504 }, { "epoch": 2.59, "grad_norm": 0.2672678828239441, "learning_rate": 9.488714379820985e-07, "loss": 0.0231, "step": 15505 }, { "epoch": 2.59, "grad_norm": 0.6865957379341125, "learning_rate": 9.481038448077406e-07, "loss": 0.0243, "step": 15506 }, { "epoch": 2.59, "grad_norm": 0.38402730226516724, "learning_rate": 9.473365467819307e-07, "loss": 0.0363, "step": 15507 }, { "epoch": 2.59, "grad_norm": 0.2988795340061188, "learning_rate": 9.465695439296918e-07, "loss": 0.0259, "step": 15508 }, { "epoch": 2.59, "grad_norm": 0.2973836660385132, "learning_rate": 9.458028362760296e-07, "loss": 0.0272, "step": 15509 }, { "epoch": 2.59, "grad_norm": 0.373978853225708, "learning_rate": 9.450364238459464e-07, "loss": 0.037, "step": 15510 }, { "epoch": 2.59, "grad_norm": 0.27209508419036865, "learning_rate": 9.442703066644299e-07, "loss": 0.0202, "step": 15511 }, { "epoch": 2.59, "grad_norm": 0.4735189378261566, "learning_rate": 9.4350448475646e-07, "loss": 0.0341, "step": 15512 }, { "epoch": 2.59, "grad_norm": 0.3928588926792145, "learning_rate": 9.427389581470081e-07, "loss": 0.0398, "step": 15513 }, { "epoch": 2.59, "grad_norm": 0.3478665053844452, "learning_rate": 9.419737268610362e-07, "loss": 0.0287, "step": 15514 }, { "epoch": 2.59, "grad_norm": 0.3735261559486389, "learning_rate": 9.412087909234946e-07, "loss": 0.0263, "step": 15515 }, { "epoch": 2.6, "grad_norm": 0.3072350323200226, "learning_rate": 9.404441503593253e-07, "loss": 0.0375, "step": 15516 }, { "epoch": 2.6, "grad_norm": 0.3518178164958954, "learning_rate": 9.396798051934608e-07, "loss": 0.0213, "step": 15517 }, { "epoch": 2.6, "grad_norm": 0.36284321546554565, "learning_rate": 9.38915755450821e-07, "loss": 0.0292, "step": 15518 }, { "epoch": 2.6, "grad_norm": 0.3362252116203308, "learning_rate": 9.381520011563228e-07, "loss": 0.0269, "step": 15519 }, { "epoch": 2.6, "grad_norm": 0.3485075831413269, "learning_rate": 9.37388542334865e-07, "loss": 0.0392, "step": 15520 }, { "epoch": 2.6, "grad_norm": 0.5718468427658081, "learning_rate": 9.366253790113444e-07, "loss": 0.0429, "step": 15521 }, { "epoch": 2.6, "grad_norm": 0.2859799861907959, "learning_rate": 9.358625112106446e-07, "loss": 0.0307, "step": 15522 }, { "epoch": 2.6, "grad_norm": 0.49242347478866577, "learning_rate": 9.350999389576376e-07, "loss": 0.0426, "step": 15523 }, { "epoch": 2.6, "grad_norm": 0.24166211485862732, "learning_rate": 9.343376622771916e-07, "loss": 0.0216, "step": 15524 }, { "epoch": 2.6, "grad_norm": 0.41015157103538513, "learning_rate": 9.335756811941599e-07, "loss": 0.0283, "step": 15525 }, { "epoch": 2.6, "grad_norm": 0.3785734474658966, "learning_rate": 9.328139957333848e-07, "loss": 0.0328, "step": 15526 }, { "epoch": 2.6, "grad_norm": 0.5908449292182922, "learning_rate": 9.320526059197077e-07, "loss": 0.058, "step": 15527 }, { "epoch": 2.6, "grad_norm": 0.4513050317764282, "learning_rate": 9.312915117779498e-07, "loss": 0.0327, "step": 15528 }, { "epoch": 2.6, "grad_norm": 0.3870738446712494, "learning_rate": 9.305307133329289e-07, "loss": 0.0259, "step": 15529 }, { "epoch": 2.6, "grad_norm": 0.3889540731906891, "learning_rate": 9.297702106094542e-07, "loss": 0.0272, "step": 15530 }, { "epoch": 2.6, "grad_norm": 0.33651745319366455, "learning_rate": 9.290100036323213e-07, "loss": 0.0319, "step": 15531 }, { "epoch": 2.6, "grad_norm": 0.34359291195869446, "learning_rate": 9.28250092426316e-07, "loss": 0.0322, "step": 15532 }, { "epoch": 2.6, "grad_norm": 0.3170872628688812, "learning_rate": 9.274904770162185e-07, "loss": 0.0313, "step": 15533 }, { "epoch": 2.6, "grad_norm": 0.5465676188468933, "learning_rate": 9.267311574267967e-07, "loss": 0.0351, "step": 15534 }, { "epoch": 2.6, "grad_norm": 0.6029090881347656, "learning_rate": 9.259721336828064e-07, "loss": 0.0401, "step": 15535 }, { "epoch": 2.6, "grad_norm": 0.4589557647705078, "learning_rate": 9.252134058090001e-07, "loss": 0.0277, "step": 15536 }, { "epoch": 2.6, "grad_norm": 1.1712175607681274, "learning_rate": 9.244549738301146e-07, "loss": 0.0272, "step": 15537 }, { "epoch": 2.6, "grad_norm": 0.40154194831848145, "learning_rate": 9.236968377708822e-07, "loss": 0.0414, "step": 15538 }, { "epoch": 2.6, "grad_norm": 0.6384051442146301, "learning_rate": 9.229389976560221e-07, "loss": 0.031, "step": 15539 }, { "epoch": 2.6, "grad_norm": 0.44710206985473633, "learning_rate": 9.221814535102425e-07, "loss": 0.0269, "step": 15540 }, { "epoch": 2.6, "grad_norm": 0.28913432359695435, "learning_rate": 9.214242053582468e-07, "loss": 0.0217, "step": 15541 }, { "epoch": 2.6, "grad_norm": 0.5492559671401978, "learning_rate": 9.20667253224724e-07, "loss": 0.0284, "step": 15542 }, { "epoch": 2.6, "grad_norm": 0.29668140411376953, "learning_rate": 9.199105971343558e-07, "loss": 0.0223, "step": 15543 }, { "epoch": 2.6, "grad_norm": 0.3701206147670746, "learning_rate": 9.191542371118134e-07, "loss": 0.036, "step": 15544 }, { "epoch": 2.6, "grad_norm": 0.4529555141925812, "learning_rate": 9.183981731817615e-07, "loss": 0.0261, "step": 15545 }, { "epoch": 2.6, "grad_norm": 0.2868105471134186, "learning_rate": 9.176424053688493e-07, "loss": 0.021, "step": 15546 }, { "epoch": 2.6, "grad_norm": 0.3148660957813263, "learning_rate": 9.168869336977226e-07, "loss": 0.0235, "step": 15547 }, { "epoch": 2.6, "grad_norm": 0.4240541458129883, "learning_rate": 9.16131758193014e-07, "loss": 0.0513, "step": 15548 }, { "epoch": 2.6, "grad_norm": 0.496884822845459, "learning_rate": 9.153768788793438e-07, "loss": 0.0393, "step": 15549 }, { "epoch": 2.6, "grad_norm": 0.43831196427345276, "learning_rate": 9.146222957813289e-07, "loss": 0.0449, "step": 15550 }, { "epoch": 2.6, "grad_norm": 0.3537199795246124, "learning_rate": 9.138680089235718e-07, "loss": 0.0345, "step": 15551 }, { "epoch": 2.6, "grad_norm": 0.3484465777873993, "learning_rate": 9.131140183306675e-07, "loss": 0.0247, "step": 15552 }, { "epoch": 2.6, "grad_norm": 0.7190476655960083, "learning_rate": 9.123603240272028e-07, "loss": 0.0334, "step": 15553 }, { "epoch": 2.6, "grad_norm": 0.3675512969493866, "learning_rate": 9.11606926037748e-07, "loss": 0.0354, "step": 15554 }, { "epoch": 2.6, "grad_norm": 0.4360049068927765, "learning_rate": 9.108538243868747e-07, "loss": 0.0359, "step": 15555 }, { "epoch": 2.6, "grad_norm": 0.31581640243530273, "learning_rate": 9.101010190991355e-07, "loss": 0.0214, "step": 15556 }, { "epoch": 2.6, "grad_norm": 0.4520280957221985, "learning_rate": 9.093485101990751e-07, "loss": 0.0296, "step": 15557 }, { "epoch": 2.6, "grad_norm": 0.37159377336502075, "learning_rate": 9.08596297711234e-07, "loss": 0.0334, "step": 15558 }, { "epoch": 2.6, "grad_norm": 0.3978622555732727, "learning_rate": 9.078443816601356e-07, "loss": 0.0291, "step": 15559 }, { "epoch": 2.6, "grad_norm": 0.34908628463745117, "learning_rate": 9.070927620702973e-07, "loss": 0.0301, "step": 15560 }, { "epoch": 2.6, "grad_norm": 0.4320419728755951, "learning_rate": 9.063414389662273e-07, "loss": 0.0407, "step": 15561 }, { "epoch": 2.6, "grad_norm": 2.84555721282959, "learning_rate": 9.055904123724246e-07, "loss": 0.0367, "step": 15562 }, { "epoch": 2.6, "grad_norm": 0.6185131072998047, "learning_rate": 9.048396823133754e-07, "loss": 0.0421, "step": 15563 }, { "epoch": 2.6, "grad_norm": 0.6062467694282532, "learning_rate": 9.040892488135611e-07, "loss": 0.0513, "step": 15564 }, { "epoch": 2.6, "grad_norm": 0.5873470902442932, "learning_rate": 9.033391118974466e-07, "loss": 0.0415, "step": 15565 }, { "epoch": 2.6, "grad_norm": 0.3368387818336487, "learning_rate": 9.025892715894957e-07, "loss": 0.0364, "step": 15566 }, { "epoch": 2.6, "grad_norm": 0.5725759267807007, "learning_rate": 9.018397279141544e-07, "loss": 0.0193, "step": 15567 }, { "epoch": 2.6, "grad_norm": 0.43188363313674927, "learning_rate": 9.010904808958621e-07, "loss": 0.0379, "step": 15568 }, { "epoch": 2.6, "grad_norm": 0.46515780687332153, "learning_rate": 9.003415305590502e-07, "loss": 0.03, "step": 15569 }, { "epoch": 2.6, "grad_norm": 0.38960757851600647, "learning_rate": 8.995928769281403e-07, "loss": 0.0384, "step": 15570 }, { "epoch": 2.6, "grad_norm": 0.37120258808135986, "learning_rate": 8.988445200275409e-07, "loss": 0.0262, "step": 15571 }, { "epoch": 2.6, "grad_norm": 0.4345848262310028, "learning_rate": 8.980964598816566e-07, "loss": 0.0402, "step": 15572 }, { "epoch": 2.6, "grad_norm": 0.36324748396873474, "learning_rate": 8.97348696514877e-07, "loss": 0.0305, "step": 15573 }, { "epoch": 2.6, "grad_norm": 0.35394302010536194, "learning_rate": 8.966012299515802e-07, "loss": 0.0266, "step": 15574 }, { "epoch": 2.6, "grad_norm": 0.3341236710548401, "learning_rate": 8.958540602161448e-07, "loss": 0.0285, "step": 15575 }, { "epoch": 2.61, "grad_norm": 0.3534027636051178, "learning_rate": 8.951071873329276e-07, "loss": 0.0347, "step": 15576 }, { "epoch": 2.61, "grad_norm": 0.3658745288848877, "learning_rate": 8.943606113262837e-07, "loss": 0.0282, "step": 15577 }, { "epoch": 2.61, "grad_norm": 0.41943609714508057, "learning_rate": 8.936143322205592e-07, "loss": 0.0489, "step": 15578 }, { "epoch": 2.61, "grad_norm": 0.4654504060745239, "learning_rate": 8.928683500400814e-07, "loss": 0.0266, "step": 15579 }, { "epoch": 2.61, "grad_norm": 0.3894200325012207, "learning_rate": 8.921226648091796e-07, "loss": 0.0359, "step": 15580 }, { "epoch": 2.61, "grad_norm": 0.38937443494796753, "learning_rate": 8.913772765521645e-07, "loss": 0.0251, "step": 15581 }, { "epoch": 2.61, "grad_norm": 0.30920618772506714, "learning_rate": 8.906321852933398e-07, "loss": 0.0229, "step": 15582 }, { "epoch": 2.61, "grad_norm": 0.5077576041221619, "learning_rate": 8.898873910570039e-07, "loss": 0.0345, "step": 15583 }, { "epoch": 2.61, "grad_norm": 0.39731723070144653, "learning_rate": 8.891428938674374e-07, "loss": 0.035, "step": 15584 }, { "epoch": 2.61, "grad_norm": 0.4604630172252655, "learning_rate": 8.883986937489175e-07, "loss": 0.0316, "step": 15585 }, { "epoch": 2.61, "grad_norm": 0.8171789050102234, "learning_rate": 8.876547907257116e-07, "loss": 0.0368, "step": 15586 }, { "epoch": 2.61, "grad_norm": 0.45192664861679077, "learning_rate": 8.869111848220735e-07, "loss": 0.0317, "step": 15587 }, { "epoch": 2.61, "grad_norm": 0.37128227949142456, "learning_rate": 8.861678760622483e-07, "loss": 0.0377, "step": 15588 }, { "epoch": 2.61, "grad_norm": 0.4396957457065582, "learning_rate": 8.854248644704766e-07, "loss": 0.0314, "step": 15589 }, { "epoch": 2.61, "grad_norm": 0.40415653586387634, "learning_rate": 8.846821500709823e-07, "loss": 0.0271, "step": 15590 }, { "epoch": 2.61, "grad_norm": 0.39185482263565063, "learning_rate": 8.839397328879806e-07, "loss": 0.0363, "step": 15591 }, { "epoch": 2.61, "grad_norm": 0.4483111500740051, "learning_rate": 8.831976129456821e-07, "loss": 0.0264, "step": 15592 }, { "epoch": 2.61, "grad_norm": 0.4992732107639313, "learning_rate": 8.824557902682829e-07, "loss": 0.0382, "step": 15593 }, { "epoch": 2.61, "grad_norm": 0.32387492060661316, "learning_rate": 8.817142648799737e-07, "loss": 0.023, "step": 15594 }, { "epoch": 2.61, "grad_norm": 0.2780354917049408, "learning_rate": 8.809730368049318e-07, "loss": 0.0297, "step": 15595 }, { "epoch": 2.61, "grad_norm": 0.4580897390842438, "learning_rate": 8.802321060673224e-07, "loss": 0.0346, "step": 15596 }, { "epoch": 2.61, "grad_norm": 0.31690695881843567, "learning_rate": 8.794914726913106e-07, "loss": 0.0219, "step": 15597 }, { "epoch": 2.61, "grad_norm": 0.40825337171554565, "learning_rate": 8.787511367010415e-07, "loss": 0.0521, "step": 15598 }, { "epoch": 2.61, "grad_norm": 0.4626029133796692, "learning_rate": 8.780110981206535e-07, "loss": 0.0377, "step": 15599 }, { "epoch": 2.61, "grad_norm": 0.5223162174224854, "learning_rate": 8.772713569742786e-07, "loss": 0.0426, "step": 15600 }, { "epoch": 2.61, "grad_norm": 0.3531500995159149, "learning_rate": 8.765319132860384e-07, "loss": 0.0258, "step": 15601 }, { "epoch": 2.61, "grad_norm": 0.38396012783050537, "learning_rate": 8.757927670800403e-07, "loss": 0.0474, "step": 15602 }, { "epoch": 2.61, "grad_norm": 0.33292216062545776, "learning_rate": 8.750539183803897e-07, "loss": 0.0315, "step": 15603 }, { "epoch": 2.61, "grad_norm": 0.32111525535583496, "learning_rate": 8.743153672111737e-07, "loss": 0.0227, "step": 15604 }, { "epoch": 2.61, "grad_norm": 0.3112657368183136, "learning_rate": 8.735771135964733e-07, "loss": 0.0279, "step": 15605 }, { "epoch": 2.61, "grad_norm": 0.347904235124588, "learning_rate": 8.728391575603623e-07, "loss": 0.035, "step": 15606 }, { "epoch": 2.61, "grad_norm": 0.38645732402801514, "learning_rate": 8.721014991269016e-07, "loss": 0.034, "step": 15607 }, { "epoch": 2.61, "grad_norm": 0.4205847978591919, "learning_rate": 8.713641383201454e-07, "loss": 0.0297, "step": 15608 }, { "epoch": 2.61, "grad_norm": 0.38904231786727905, "learning_rate": 8.706270751641322e-07, "loss": 0.0266, "step": 15609 }, { "epoch": 2.61, "grad_norm": 0.47509124875068665, "learning_rate": 8.698903096828981e-07, "loss": 0.0335, "step": 15610 }, { "epoch": 2.61, "grad_norm": 0.34954264760017395, "learning_rate": 8.691538419004675e-07, "loss": 0.0341, "step": 15611 }, { "epoch": 2.61, "grad_norm": 0.3445585370063782, "learning_rate": 8.684176718408521e-07, "loss": 0.0236, "step": 15612 }, { "epoch": 2.61, "grad_norm": 0.41038793325424194, "learning_rate": 8.676817995280528e-07, "loss": 0.0362, "step": 15613 }, { "epoch": 2.61, "grad_norm": 0.47433894872665405, "learning_rate": 8.669462249860694e-07, "loss": 0.0351, "step": 15614 }, { "epoch": 2.61, "grad_norm": 0.32477647066116333, "learning_rate": 8.662109482388825e-07, "loss": 0.0261, "step": 15615 }, { "epoch": 2.61, "grad_norm": 0.5204440951347351, "learning_rate": 8.654759693104664e-07, "loss": 0.0333, "step": 15616 }, { "epoch": 2.61, "grad_norm": 0.33584484457969666, "learning_rate": 8.647412882247864e-07, "loss": 0.0312, "step": 15617 }, { "epoch": 2.61, "grad_norm": 0.36063048243522644, "learning_rate": 8.640069050057998e-07, "loss": 0.0274, "step": 15618 }, { "epoch": 2.61, "grad_norm": 0.4342125952243805, "learning_rate": 8.632728196774498e-07, "loss": 0.04, "step": 15619 }, { "epoch": 2.61, "grad_norm": 0.31666773557662964, "learning_rate": 8.62539032263675e-07, "loss": 0.0307, "step": 15620 }, { "epoch": 2.61, "grad_norm": 0.45280614495277405, "learning_rate": 8.618055427883976e-07, "loss": 0.0477, "step": 15621 }, { "epoch": 2.61, "grad_norm": 0.4487193822860718, "learning_rate": 8.61072351275537e-07, "loss": 0.0215, "step": 15622 }, { "epoch": 2.61, "grad_norm": 0.3539423942565918, "learning_rate": 8.603394577489998e-07, "loss": 0.0303, "step": 15623 }, { "epoch": 2.61, "grad_norm": 0.37792447209358215, "learning_rate": 8.596068622326792e-07, "loss": 0.028, "step": 15624 }, { "epoch": 2.61, "grad_norm": 0.4118404686450958, "learning_rate": 8.588745647504648e-07, "loss": 0.038, "step": 15625 }, { "epoch": 2.61, "grad_norm": 0.3372892737388611, "learning_rate": 8.581425653262376e-07, "loss": 0.0308, "step": 15626 }, { "epoch": 2.61, "grad_norm": 0.49695804715156555, "learning_rate": 8.574108639838596e-07, "loss": 0.0328, "step": 15627 }, { "epoch": 2.61, "grad_norm": 0.48486268520355225, "learning_rate": 8.566794607471929e-07, "loss": 0.0193, "step": 15628 }, { "epoch": 2.61, "grad_norm": 0.4079936742782593, "learning_rate": 8.559483556400838e-07, "loss": 0.0364, "step": 15629 }, { "epoch": 2.61, "grad_norm": 0.4376598596572876, "learning_rate": 8.552175486863712e-07, "loss": 0.0375, "step": 15630 }, { "epoch": 2.61, "grad_norm": 0.37130266427993774, "learning_rate": 8.544870399098848e-07, "loss": 0.0266, "step": 15631 }, { "epoch": 2.61, "grad_norm": 0.6012168526649475, "learning_rate": 8.537568293344412e-07, "loss": 0.035, "step": 15632 }, { "epoch": 2.61, "grad_norm": 0.45788201689720154, "learning_rate": 8.530269169838523e-07, "loss": 0.0397, "step": 15633 }, { "epoch": 2.61, "grad_norm": 0.36228978633880615, "learning_rate": 8.522973028819193e-07, "loss": 0.0345, "step": 15634 }, { "epoch": 2.61, "grad_norm": 0.4303137958049774, "learning_rate": 8.515679870524285e-07, "loss": 0.0396, "step": 15635 }, { "epoch": 2.62, "grad_norm": 0.38480085134506226, "learning_rate": 8.508389695191632e-07, "loss": 0.0289, "step": 15636 }, { "epoch": 2.62, "grad_norm": 0.35641616582870483, "learning_rate": 8.501102503058933e-07, "loss": 0.0268, "step": 15637 }, { "epoch": 2.62, "grad_norm": 0.4513654112815857, "learning_rate": 8.493818294363765e-07, "loss": 0.0318, "step": 15638 }, { "epoch": 2.62, "grad_norm": 0.2389204353094101, "learning_rate": 8.486537069343681e-07, "loss": 0.0153, "step": 15639 }, { "epoch": 2.62, "grad_norm": 0.38741335272789, "learning_rate": 8.479258828236059e-07, "loss": 0.0384, "step": 15640 }, { "epoch": 2.62, "grad_norm": 0.4774935245513916, "learning_rate": 8.471983571278241e-07, "loss": 0.0211, "step": 15641 }, { "epoch": 2.62, "grad_norm": 0.4893557131290436, "learning_rate": 8.464711298707451e-07, "loss": 0.0254, "step": 15642 }, { "epoch": 2.62, "grad_norm": 0.4075123965740204, "learning_rate": 8.457442010760797e-07, "loss": 0.0308, "step": 15643 }, { "epoch": 2.62, "grad_norm": 0.33897313475608826, "learning_rate": 8.45017570767529e-07, "loss": 0.0345, "step": 15644 }, { "epoch": 2.62, "grad_norm": 0.4379499852657318, "learning_rate": 8.442912389687885e-07, "loss": 0.036, "step": 15645 }, { "epoch": 2.62, "grad_norm": 0.35124024748802185, "learning_rate": 8.435652057035393e-07, "loss": 0.0268, "step": 15646 }, { "epoch": 2.62, "grad_norm": 0.3820507824420929, "learning_rate": 8.428394709954546e-07, "loss": 0.0184, "step": 15647 }, { "epoch": 2.62, "grad_norm": 0.40840378403663635, "learning_rate": 8.421140348681978e-07, "loss": 0.0377, "step": 15648 }, { "epoch": 2.62, "grad_norm": 0.3535662889480591, "learning_rate": 8.413888973454232e-07, "loss": 0.0287, "step": 15649 }, { "epoch": 2.62, "grad_norm": 0.42274847626686096, "learning_rate": 8.406640584507763e-07, "loss": 0.038, "step": 15650 }, { "epoch": 2.62, "grad_norm": 0.40490296483039856, "learning_rate": 8.399395182078907e-07, "loss": 0.0404, "step": 15651 }, { "epoch": 2.62, "grad_norm": 0.295960932970047, "learning_rate": 8.392152766403882e-07, "loss": 0.0216, "step": 15652 }, { "epoch": 2.62, "grad_norm": 0.3759973347187042, "learning_rate": 8.384913337718881e-07, "loss": 0.0425, "step": 15653 }, { "epoch": 2.62, "grad_norm": 0.443363755941391, "learning_rate": 8.377676896259923e-07, "loss": 0.0416, "step": 15654 }, { "epoch": 2.62, "grad_norm": 0.33771079778671265, "learning_rate": 8.370443442262955e-07, "loss": 0.0258, "step": 15655 }, { "epoch": 2.62, "grad_norm": 0.47503775358200073, "learning_rate": 8.363212975963842e-07, "loss": 0.0341, "step": 15656 }, { "epoch": 2.62, "grad_norm": 0.3559012711048126, "learning_rate": 8.355985497598362e-07, "loss": 0.0309, "step": 15657 }, { "epoch": 2.62, "grad_norm": 0.3303312659263611, "learning_rate": 8.348761007402151e-07, "loss": 0.0202, "step": 15658 }, { "epoch": 2.62, "grad_norm": 0.3672697842121124, "learning_rate": 8.341539505610796e-07, "loss": 0.0241, "step": 15659 }, { "epoch": 2.62, "grad_norm": 0.39552217721939087, "learning_rate": 8.334320992459754e-07, "loss": 0.0317, "step": 15660 }, { "epoch": 2.62, "grad_norm": 0.5340011715888977, "learning_rate": 8.327105468184371e-07, "loss": 0.0214, "step": 15661 }, { "epoch": 2.62, "grad_norm": 0.2835078835487366, "learning_rate": 8.319892933019958e-07, "loss": 0.0179, "step": 15662 }, { "epoch": 2.62, "grad_norm": 0.46700790524482727, "learning_rate": 8.312683387201648e-07, "loss": 0.0391, "step": 15663 }, { "epoch": 2.62, "grad_norm": 0.32512661814689636, "learning_rate": 8.305476830964565e-07, "loss": 0.0303, "step": 15664 }, { "epoch": 2.62, "grad_norm": 0.3621568977832794, "learning_rate": 8.298273264543633e-07, "loss": 0.0329, "step": 15665 }, { "epoch": 2.62, "grad_norm": 0.29575836658477783, "learning_rate": 8.291072688173752e-07, "loss": 0.0247, "step": 15666 }, { "epoch": 2.62, "grad_norm": 0.31013235449790955, "learning_rate": 8.283875102089745e-07, "loss": 0.0215, "step": 15667 }, { "epoch": 2.62, "grad_norm": 0.3342304825782776, "learning_rate": 8.276680506526269e-07, "loss": 0.0219, "step": 15668 }, { "epoch": 2.62, "grad_norm": 0.429048091173172, "learning_rate": 8.269488901717893e-07, "loss": 0.0313, "step": 15669 }, { "epoch": 2.62, "grad_norm": 0.3984256684780121, "learning_rate": 8.262300287899139e-07, "loss": 0.0322, "step": 15670 }, { "epoch": 2.62, "grad_norm": 0.29062339663505554, "learning_rate": 8.255114665304398e-07, "loss": 0.022, "step": 15671 }, { "epoch": 2.62, "grad_norm": 0.41509971022605896, "learning_rate": 8.247932034167927e-07, "loss": 0.0246, "step": 15672 }, { "epoch": 2.62, "grad_norm": 0.37593990564346313, "learning_rate": 8.240752394723972e-07, "loss": 0.0399, "step": 15673 }, { "epoch": 2.62, "grad_norm": 0.3056626617908478, "learning_rate": 8.233575747206624e-07, "loss": 0.0244, "step": 15674 }, { "epoch": 2.62, "grad_norm": 0.42914384603500366, "learning_rate": 8.226402091849873e-07, "loss": 0.0404, "step": 15675 }, { "epoch": 2.62, "grad_norm": 0.30475059151649475, "learning_rate": 8.219231428887641e-07, "loss": 0.0371, "step": 15676 }, { "epoch": 2.62, "grad_norm": 0.3115408718585968, "learning_rate": 8.212063758553713e-07, "loss": 0.0416, "step": 15677 }, { "epoch": 2.62, "grad_norm": 0.36013898253440857, "learning_rate": 8.20489908108183e-07, "loss": 0.0371, "step": 15678 }, { "epoch": 2.62, "grad_norm": 0.40505367517471313, "learning_rate": 8.197737396705597e-07, "loss": 0.0351, "step": 15679 }, { "epoch": 2.62, "grad_norm": 0.33974018692970276, "learning_rate": 8.190578705658503e-07, "loss": 0.0389, "step": 15680 }, { "epoch": 2.62, "grad_norm": 0.4832233488559723, "learning_rate": 8.183423008173985e-07, "loss": 0.0394, "step": 15681 }, { "epoch": 2.62, "grad_norm": 0.5515153408050537, "learning_rate": 8.176270304485378e-07, "loss": 0.042, "step": 15682 }, { "epoch": 2.62, "grad_norm": 0.41801461577415466, "learning_rate": 8.169120594825885e-07, "loss": 0.0398, "step": 15683 }, { "epoch": 2.62, "grad_norm": 0.5263172388076782, "learning_rate": 8.161973879428653e-07, "loss": 0.0384, "step": 15684 }, { "epoch": 2.62, "grad_norm": 0.2736414670944214, "learning_rate": 8.154830158526694e-07, "loss": 0.0214, "step": 15685 }, { "epoch": 2.62, "grad_norm": 0.3472745418548584, "learning_rate": 8.147689432352923e-07, "loss": 0.035, "step": 15686 }, { "epoch": 2.62, "grad_norm": 0.3584951162338257, "learning_rate": 8.140551701140209e-07, "loss": 0.0306, "step": 15687 }, { "epoch": 2.62, "grad_norm": 0.34838658571243286, "learning_rate": 8.133416965121243e-07, "loss": 0.0297, "step": 15688 }, { "epoch": 2.62, "grad_norm": 0.31311213970184326, "learning_rate": 8.126285224528685e-07, "loss": 0.0306, "step": 15689 }, { "epoch": 2.62, "grad_norm": 0.3765118420124054, "learning_rate": 8.119156479595092e-07, "loss": 0.036, "step": 15690 }, { "epoch": 2.62, "grad_norm": 0.34825724363327026, "learning_rate": 8.112030730552877e-07, "loss": 0.0233, "step": 15691 }, { "epoch": 2.62, "grad_norm": 0.3916831910610199, "learning_rate": 8.104907977634413e-07, "loss": 0.0327, "step": 15692 }, { "epoch": 2.62, "grad_norm": 0.39236509799957275, "learning_rate": 8.097788221071922e-07, "loss": 0.0304, "step": 15693 }, { "epoch": 2.62, "grad_norm": 0.3844032287597656, "learning_rate": 8.090671461097554e-07, "loss": 0.0332, "step": 15694 }, { "epoch": 2.63, "grad_norm": 0.4448874890804291, "learning_rate": 8.083557697943367e-07, "loss": 0.0371, "step": 15695 }, { "epoch": 2.63, "grad_norm": 0.3424181044101715, "learning_rate": 8.076446931841309e-07, "loss": 0.0276, "step": 15696 }, { "epoch": 2.63, "grad_norm": 0.33592379093170166, "learning_rate": 8.069339163023238e-07, "loss": 0.0206, "step": 15697 }, { "epoch": 2.63, "grad_norm": 0.3717554211616516, "learning_rate": 8.062234391720924e-07, "loss": 0.0274, "step": 15698 }, { "epoch": 2.63, "grad_norm": 0.34041011333465576, "learning_rate": 8.055132618166029e-07, "loss": 0.0269, "step": 15699 }, { "epoch": 2.63, "grad_norm": 0.32313501834869385, "learning_rate": 8.048033842590075e-07, "loss": 0.0327, "step": 15700 }, { "epoch": 2.63, "grad_norm": 0.28684887290000916, "learning_rate": 8.040938065224579e-07, "loss": 0.02, "step": 15701 }, { "epoch": 2.63, "grad_norm": 0.5405088067054749, "learning_rate": 8.033845286300879e-07, "loss": 0.0424, "step": 15702 }, { "epoch": 2.63, "grad_norm": 0.29343366622924805, "learning_rate": 8.026755506050232e-07, "loss": 0.022, "step": 15703 }, { "epoch": 2.63, "grad_norm": 0.5013257265090942, "learning_rate": 8.019668724703822e-07, "loss": 0.0328, "step": 15704 }, { "epoch": 2.63, "grad_norm": 0.439062237739563, "learning_rate": 8.01258494249273e-07, "loss": 0.0439, "step": 15705 }, { "epoch": 2.63, "grad_norm": 0.5472002029418945, "learning_rate": 8.005504159647948e-07, "loss": 0.0378, "step": 15706 }, { "epoch": 2.63, "grad_norm": 0.2809927463531494, "learning_rate": 7.998426376400325e-07, "loss": 0.0178, "step": 15707 }, { "epoch": 2.63, "grad_norm": 0.2424187809228897, "learning_rate": 7.991351592980645e-07, "loss": 0.0187, "step": 15708 }, { "epoch": 2.63, "grad_norm": 0.39647504687309265, "learning_rate": 7.98427980961961e-07, "loss": 0.0296, "step": 15709 }, { "epoch": 2.63, "grad_norm": 0.574754536151886, "learning_rate": 7.977211026547782e-07, "loss": 0.0381, "step": 15710 }, { "epoch": 2.63, "grad_norm": 0.35650888085365295, "learning_rate": 7.970145243995642e-07, "loss": 0.0321, "step": 15711 }, { "epoch": 2.63, "grad_norm": 0.31558147072792053, "learning_rate": 7.963082462193605e-07, "loss": 0.0243, "step": 15712 }, { "epoch": 2.63, "grad_norm": 0.6878998279571533, "learning_rate": 7.956022681371933e-07, "loss": 0.0341, "step": 15713 }, { "epoch": 2.63, "grad_norm": 0.4319176971912384, "learning_rate": 7.948965901760841e-07, "loss": 0.0431, "step": 15714 }, { "epoch": 2.63, "grad_norm": 0.2951239049434662, "learning_rate": 7.941912123590434e-07, "loss": 0.0194, "step": 15715 }, { "epoch": 2.63, "grad_norm": 0.34109073877334595, "learning_rate": 7.934861347090694e-07, "loss": 0.0263, "step": 15716 }, { "epoch": 2.63, "grad_norm": 0.4482451379299164, "learning_rate": 7.927813572491494e-07, "loss": 0.0341, "step": 15717 }, { "epoch": 2.63, "grad_norm": 0.533006489276886, "learning_rate": 7.920768800022693e-07, "loss": 0.0404, "step": 15718 }, { "epoch": 2.63, "grad_norm": 0.417818158864975, "learning_rate": 7.913727029913943e-07, "loss": 0.0297, "step": 15719 }, { "epoch": 2.63, "grad_norm": 0.2743805944919586, "learning_rate": 7.90668826239488e-07, "loss": 0.0318, "step": 15720 }, { "epoch": 2.63, "grad_norm": 0.40694689750671387, "learning_rate": 7.899652497694987e-07, "loss": 0.0293, "step": 15721 }, { "epoch": 2.63, "grad_norm": 0.47214505076408386, "learning_rate": 7.892619736043694e-07, "loss": 0.0305, "step": 15722 }, { "epoch": 2.63, "grad_norm": 0.4282938539981842, "learning_rate": 7.885589977670327e-07, "loss": 0.0259, "step": 15723 }, { "epoch": 2.63, "grad_norm": 0.6538618803024292, "learning_rate": 7.878563222804069e-07, "loss": 0.0459, "step": 15724 }, { "epoch": 2.63, "grad_norm": 0.26667505502700806, "learning_rate": 7.87153947167405e-07, "loss": 0.0215, "step": 15725 }, { "epoch": 2.63, "grad_norm": 0.3457329273223877, "learning_rate": 7.864518724509295e-07, "loss": 0.0314, "step": 15726 }, { "epoch": 2.63, "grad_norm": 0.3914930522441864, "learning_rate": 7.857500981538713e-07, "loss": 0.0305, "step": 15727 }, { "epoch": 2.63, "grad_norm": 0.7314395904541016, "learning_rate": 7.850486242991118e-07, "loss": 0.0214, "step": 15728 }, { "epoch": 2.63, "grad_norm": 0.364957332611084, "learning_rate": 7.84347450909525e-07, "loss": 0.0428, "step": 15729 }, { "epoch": 2.63, "grad_norm": 0.3604075312614441, "learning_rate": 7.836465780079738e-07, "loss": 0.0345, "step": 15730 }, { "epoch": 2.63, "grad_norm": 0.6031169891357422, "learning_rate": 7.8294600561731e-07, "loss": 0.0303, "step": 15731 }, { "epoch": 2.63, "grad_norm": 0.35931846499443054, "learning_rate": 7.822457337603784e-07, "loss": 0.0301, "step": 15732 }, { "epoch": 2.63, "grad_norm": 0.4724166691303253, "learning_rate": 7.815457624600087e-07, "loss": 0.0336, "step": 15733 }, { "epoch": 2.63, "grad_norm": 0.33255279064178467, "learning_rate": 7.808460917390281e-07, "loss": 0.0339, "step": 15734 }, { "epoch": 2.63, "grad_norm": 0.4546723961830139, "learning_rate": 7.801467216202496e-07, "loss": 0.029, "step": 15735 }, { "epoch": 2.63, "grad_norm": 0.31457334756851196, "learning_rate": 7.794476521264737e-07, "loss": 0.0346, "step": 15736 }, { "epoch": 2.63, "grad_norm": 0.30037859082221985, "learning_rate": 7.78748883280498e-07, "loss": 0.0252, "step": 15737 }, { "epoch": 2.63, "grad_norm": 0.36724644899368286, "learning_rate": 7.780504151051072e-07, "loss": 0.0334, "step": 15738 }, { "epoch": 2.63, "grad_norm": 0.31964215636253357, "learning_rate": 7.773522476230722e-07, "loss": 0.0222, "step": 15739 }, { "epoch": 2.63, "grad_norm": 0.46546995639801025, "learning_rate": 7.766543808571614e-07, "loss": 0.0334, "step": 15740 }, { "epoch": 2.63, "grad_norm": 0.2685891091823578, "learning_rate": 7.759568148301278e-07, "loss": 0.0252, "step": 15741 }, { "epoch": 2.63, "grad_norm": 0.3053707778453827, "learning_rate": 7.752595495647141e-07, "loss": 0.018, "step": 15742 }, { "epoch": 2.63, "grad_norm": 0.4321901202201843, "learning_rate": 7.745625850836613e-07, "loss": 0.0366, "step": 15743 }, { "epoch": 2.63, "grad_norm": 0.3219660222530365, "learning_rate": 7.738659214096888e-07, "loss": 0.0253, "step": 15744 }, { "epoch": 2.63, "grad_norm": 0.5420984625816345, "learning_rate": 7.731695585655141e-07, "loss": 0.0282, "step": 15745 }, { "epoch": 2.63, "grad_norm": 0.6983953714370728, "learning_rate": 7.724734965738467e-07, "loss": 0.0425, "step": 15746 }, { "epoch": 2.63, "grad_norm": 0.44498875737190247, "learning_rate": 7.717777354573774e-07, "loss": 0.0328, "step": 15747 }, { "epoch": 2.63, "grad_norm": 0.3573824465274811, "learning_rate": 7.71082275238797e-07, "loss": 0.0251, "step": 15748 }, { "epoch": 2.63, "grad_norm": 0.3185766339302063, "learning_rate": 7.703871159407783e-07, "loss": 0.032, "step": 15749 }, { "epoch": 2.63, "grad_norm": 0.37190377712249756, "learning_rate": 7.696922575859878e-07, "loss": 0.02, "step": 15750 }, { "epoch": 2.63, "grad_norm": 0.2901342809200287, "learning_rate": 7.68997700197085e-07, "loss": 0.0235, "step": 15751 }, { "epoch": 2.63, "grad_norm": 0.42388269305229187, "learning_rate": 7.68303443796714e-07, "loss": 0.0322, "step": 15752 }, { "epoch": 2.63, "grad_norm": 0.47361892461776733, "learning_rate": 7.676094884075136e-07, "loss": 0.0306, "step": 15753 }, { "epoch": 2.63, "grad_norm": 0.6065811514854431, "learning_rate": 7.66915834052111e-07, "loss": 0.032, "step": 15754 }, { "epoch": 2.64, "grad_norm": 0.6898012757301331, "learning_rate": 7.662224807531249e-07, "loss": 0.0361, "step": 15755 }, { "epoch": 2.64, "grad_norm": 0.4478609263896942, "learning_rate": 7.655294285331583e-07, "loss": 0.0342, "step": 15756 }, { "epoch": 2.64, "grad_norm": 0.5402349829673767, "learning_rate": 7.648366774148152e-07, "loss": 0.0322, "step": 15757 }, { "epoch": 2.64, "grad_norm": 0.38395532965660095, "learning_rate": 7.6414422742068e-07, "loss": 0.0319, "step": 15758 }, { "epoch": 2.64, "grad_norm": 0.3350609838962555, "learning_rate": 7.634520785733291e-07, "loss": 0.0239, "step": 15759 }, { "epoch": 2.64, "grad_norm": 0.3776242434978485, "learning_rate": 7.627602308953341e-07, "loss": 0.0276, "step": 15760 }, { "epoch": 2.64, "grad_norm": 0.29530537128448486, "learning_rate": 7.620686844092551e-07, "loss": 0.0266, "step": 15761 }, { "epoch": 2.64, "grad_norm": 0.3239336609840393, "learning_rate": 7.613774391376361e-07, "loss": 0.0295, "step": 15762 }, { "epoch": 2.64, "grad_norm": 1.4492446184158325, "learning_rate": 7.60686495103019e-07, "loss": 0.0229, "step": 15763 }, { "epoch": 2.64, "grad_norm": 0.4110698103904724, "learning_rate": 7.599958523279327e-07, "loss": 0.0237, "step": 15764 }, { "epoch": 2.64, "grad_norm": 0.9274379014968872, "learning_rate": 7.593055108348968e-07, "loss": 0.0429, "step": 15765 }, { "epoch": 2.64, "grad_norm": 0.456825315952301, "learning_rate": 7.586154706464199e-07, "loss": 0.0542, "step": 15766 }, { "epoch": 2.64, "grad_norm": 0.440133273601532, "learning_rate": 7.579257317850008e-07, "loss": 0.0386, "step": 15767 }, { "epoch": 2.64, "grad_norm": 0.39751118421554565, "learning_rate": 7.572362942731315e-07, "loss": 0.0376, "step": 15768 }, { "epoch": 2.64, "grad_norm": 0.5319316983222961, "learning_rate": 7.565471581332895e-07, "loss": 0.0315, "step": 15769 }, { "epoch": 2.64, "grad_norm": 0.3039828836917877, "learning_rate": 7.558583233879457e-07, "loss": 0.024, "step": 15770 }, { "epoch": 2.64, "grad_norm": 0.33044201135635376, "learning_rate": 7.551697900595633e-07, "loss": 0.0276, "step": 15771 }, { "epoch": 2.64, "grad_norm": 0.32917240262031555, "learning_rate": 7.5448155817059e-07, "loss": 0.0328, "step": 15772 }, { "epoch": 2.64, "grad_norm": 0.35252147912979126, "learning_rate": 7.537936277434665e-07, "loss": 0.0248, "step": 15773 }, { "epoch": 2.64, "grad_norm": 0.3763442933559418, "learning_rate": 7.531059988006251e-07, "loss": 0.0308, "step": 15774 }, { "epoch": 2.64, "grad_norm": 0.4263732135295868, "learning_rate": 7.524186713644832e-07, "loss": 0.0459, "step": 15775 }, { "epoch": 2.64, "grad_norm": 0.32534655928611755, "learning_rate": 7.517316454574575e-07, "loss": 0.0291, "step": 15776 }, { "epoch": 2.64, "grad_norm": 0.28841710090637207, "learning_rate": 7.510449211019444e-07, "loss": 0.0268, "step": 15777 }, { "epoch": 2.64, "grad_norm": 0.32833632826805115, "learning_rate": 7.503584983203371e-07, "loss": 0.0396, "step": 15778 }, { "epoch": 2.64, "grad_norm": 0.43650785088539124, "learning_rate": 7.496723771350201e-07, "loss": 0.041, "step": 15779 }, { "epoch": 2.64, "grad_norm": 0.26123887300491333, "learning_rate": 7.48986557568363e-07, "loss": 0.0258, "step": 15780 }, { "epoch": 2.64, "grad_norm": 0.2958959937095642, "learning_rate": 7.483010396427259e-07, "loss": 0.0252, "step": 15781 }, { "epoch": 2.64, "grad_norm": 0.3651992678642273, "learning_rate": 7.476158233804631e-07, "loss": 0.0314, "step": 15782 }, { "epoch": 2.64, "grad_norm": 0.3206574618816376, "learning_rate": 7.469309088039178e-07, "loss": 0.021, "step": 15783 }, { "epoch": 2.64, "grad_norm": 0.6658034920692444, "learning_rate": 7.462462959354199e-07, "loss": 0.0424, "step": 15784 }, { "epoch": 2.64, "grad_norm": 0.466243177652359, "learning_rate": 7.455619847972928e-07, "loss": 0.0436, "step": 15785 }, { "epoch": 2.64, "grad_norm": 0.3957021236419678, "learning_rate": 7.448779754118518e-07, "loss": 0.0315, "step": 15786 }, { "epoch": 2.64, "grad_norm": 0.40434029698371887, "learning_rate": 7.441942678013958e-07, "loss": 0.0301, "step": 15787 }, { "epoch": 2.64, "grad_norm": 0.3243143856525421, "learning_rate": 7.435108619882225e-07, "loss": 0.0346, "step": 15788 }, { "epoch": 2.64, "grad_norm": 0.26246464252471924, "learning_rate": 7.428277579946119e-07, "loss": 0.0206, "step": 15789 }, { "epoch": 2.64, "grad_norm": 0.28936636447906494, "learning_rate": 7.421449558428395e-07, "loss": 0.0329, "step": 15790 }, { "epoch": 2.64, "grad_norm": 0.3958342373371124, "learning_rate": 7.414624555551675e-07, "loss": 0.0353, "step": 15791 }, { "epoch": 2.64, "grad_norm": 0.3384217321872711, "learning_rate": 7.407802571538492e-07, "loss": 0.0308, "step": 15792 }, { "epoch": 2.64, "grad_norm": 0.36954963207244873, "learning_rate": 7.400983606611301e-07, "loss": 0.035, "step": 15793 }, { "epoch": 2.64, "grad_norm": 0.40229395031929016, "learning_rate": 7.394167660992435e-07, "loss": 0.037, "step": 15794 }, { "epoch": 2.64, "grad_norm": 0.45767688751220703, "learning_rate": 7.387354734904139e-07, "loss": 0.0316, "step": 15795 }, { "epoch": 2.64, "grad_norm": 0.4426352083683014, "learning_rate": 7.380544828568559e-07, "loss": 0.0316, "step": 15796 }, { "epoch": 2.64, "grad_norm": 0.29033443331718445, "learning_rate": 7.373737942207748e-07, "loss": 0.0305, "step": 15797 }, { "epoch": 2.64, "grad_norm": 0.3387262523174286, "learning_rate": 7.366934076043631e-07, "loss": 0.0209, "step": 15798 }, { "epoch": 2.64, "grad_norm": 0.4576069414615631, "learning_rate": 7.360133230298072e-07, "loss": 0.0415, "step": 15799 }, { "epoch": 2.64, "grad_norm": 0.45520275831222534, "learning_rate": 7.353335405192807e-07, "loss": 0.0351, "step": 15800 }, { "epoch": 2.64, "grad_norm": 0.3865240812301636, "learning_rate": 7.346540600949503e-07, "loss": 0.0301, "step": 15801 }, { "epoch": 2.64, "grad_norm": 0.4107756018638611, "learning_rate": 7.339748817789727e-07, "loss": 0.0301, "step": 15802 }, { "epoch": 2.64, "grad_norm": 0.4187960624694824, "learning_rate": 7.33296005593489e-07, "loss": 0.0266, "step": 15803 }, { "epoch": 2.64, "grad_norm": 0.29298990964889526, "learning_rate": 7.326174315606394e-07, "loss": 0.0296, "step": 15804 }, { "epoch": 2.64, "grad_norm": 0.3674432933330536, "learning_rate": 7.319391597025483e-07, "loss": 0.0264, "step": 15805 }, { "epoch": 2.64, "grad_norm": 0.3421429693698883, "learning_rate": 7.31261190041328e-07, "loss": 0.0288, "step": 15806 }, { "epoch": 2.64, "grad_norm": 0.35398226976394653, "learning_rate": 7.305835225990909e-07, "loss": 0.0223, "step": 15807 }, { "epoch": 2.64, "grad_norm": 0.33853670954704285, "learning_rate": 7.299061573979272e-07, "loss": 0.0253, "step": 15808 }, { "epoch": 2.64, "grad_norm": 0.39260560274124146, "learning_rate": 7.292290944599257e-07, "loss": 0.0349, "step": 15809 }, { "epoch": 2.64, "grad_norm": 0.7035801410675049, "learning_rate": 7.285523338071654e-07, "loss": 0.0387, "step": 15810 }, { "epoch": 2.64, "grad_norm": 0.29413843154907227, "learning_rate": 7.278758754617099e-07, "loss": 0.0215, "step": 15811 }, { "epoch": 2.64, "grad_norm": 0.41053783893585205, "learning_rate": 7.271997194456149e-07, "loss": 0.0256, "step": 15812 }, { "epoch": 2.64, "grad_norm": 0.40005433559417725, "learning_rate": 7.265238657809315e-07, "loss": 0.0307, "step": 15813 }, { "epoch": 2.64, "grad_norm": 0.3853940963745117, "learning_rate": 7.258483144896944e-07, "loss": 0.0302, "step": 15814 }, { "epoch": 2.65, "grad_norm": 0.34112995862960815, "learning_rate": 7.251730655939293e-07, "loss": 0.0228, "step": 15815 }, { "epoch": 2.65, "grad_norm": 0.38408318161964417, "learning_rate": 7.244981191156564e-07, "loss": 0.0268, "step": 15816 }, { "epoch": 2.65, "grad_norm": 0.36422550678253174, "learning_rate": 7.238234750768791e-07, "loss": 0.0278, "step": 15817 }, { "epoch": 2.65, "grad_norm": 0.40923064947128296, "learning_rate": 7.231491334995988e-07, "loss": 0.0367, "step": 15818 }, { "epoch": 2.65, "grad_norm": 0.3376981019973755, "learning_rate": 7.224750944058045e-07, "loss": 0.0309, "step": 15819 }, { "epoch": 2.65, "grad_norm": 0.35117077827453613, "learning_rate": 7.218013578174699e-07, "loss": 0.0154, "step": 15820 }, { "epoch": 2.65, "grad_norm": 0.2644009292125702, "learning_rate": 7.21127923756566e-07, "loss": 0.0241, "step": 15821 }, { "epoch": 2.65, "grad_norm": 0.2835657298564911, "learning_rate": 7.2045479224505e-07, "loss": 0.0282, "step": 15822 }, { "epoch": 2.65, "grad_norm": 0.5023253560066223, "learning_rate": 7.197819633048686e-07, "loss": 0.0389, "step": 15823 }, { "epoch": 2.65, "grad_norm": 0.4597950279712677, "learning_rate": 7.191094369579632e-07, "loss": 0.0236, "step": 15824 }, { "epoch": 2.65, "grad_norm": 0.4447743892669678, "learning_rate": 7.184372132262596e-07, "loss": 0.0465, "step": 15825 }, { "epoch": 2.65, "grad_norm": 0.41667190194129944, "learning_rate": 7.17765292131678e-07, "loss": 0.0432, "step": 15826 }, { "epoch": 2.65, "grad_norm": 0.40353211760520935, "learning_rate": 7.170936736961287e-07, "loss": 0.0355, "step": 15827 }, { "epoch": 2.65, "grad_norm": 0.5490584969520569, "learning_rate": 7.164223579415086e-07, "loss": 0.0238, "step": 15828 }, { "epoch": 2.65, "grad_norm": 0.259189248085022, "learning_rate": 7.157513448897057e-07, "loss": 0.0171, "step": 15829 }, { "epoch": 2.65, "grad_norm": 0.38622599840164185, "learning_rate": 7.150806345626038e-07, "loss": 0.0217, "step": 15830 }, { "epoch": 2.65, "grad_norm": 0.384207159280777, "learning_rate": 7.144102269820663e-07, "loss": 0.0305, "step": 15831 }, { "epoch": 2.65, "grad_norm": 0.3406845033168793, "learning_rate": 7.13740122169957e-07, "loss": 0.0215, "step": 15832 }, { "epoch": 2.65, "grad_norm": 0.42162320017814636, "learning_rate": 7.130703201481237e-07, "loss": 0.0303, "step": 15833 }, { "epoch": 2.65, "grad_norm": 0.47448158264160156, "learning_rate": 7.12400820938406e-07, "loss": 0.0211, "step": 15834 }, { "epoch": 2.65, "grad_norm": 0.48925745487213135, "learning_rate": 7.11731624562636e-07, "loss": 0.0324, "step": 15835 }, { "epoch": 2.65, "grad_norm": 0.31569555401802063, "learning_rate": 7.11062731042631e-07, "loss": 0.0299, "step": 15836 }, { "epoch": 2.65, "grad_norm": 0.4584606885910034, "learning_rate": 7.103941404002023e-07, "loss": 0.0291, "step": 15837 }, { "epoch": 2.65, "grad_norm": 0.4723081588745117, "learning_rate": 7.097258526571504e-07, "loss": 0.0326, "step": 15838 }, { "epoch": 2.65, "grad_norm": 0.35577231645584106, "learning_rate": 7.090578678352667e-07, "loss": 0.0234, "step": 15839 }, { "epoch": 2.65, "grad_norm": 0.2626838982105255, "learning_rate": 7.08390185956328e-07, "loss": 0.0264, "step": 15840 }, { "epoch": 2.65, "grad_norm": 0.4264835715293884, "learning_rate": 7.077228070421071e-07, "loss": 0.0511, "step": 15841 }, { "epoch": 2.65, "grad_norm": 0.2598065137863159, "learning_rate": 7.070557311143655e-07, "loss": 0.0236, "step": 15842 }, { "epoch": 2.65, "grad_norm": 0.3411293029785156, "learning_rate": 7.063889581948536e-07, "loss": 0.033, "step": 15843 }, { "epoch": 2.65, "grad_norm": 0.36606261134147644, "learning_rate": 7.057224883053126e-07, "loss": 0.0277, "step": 15844 }, { "epoch": 2.65, "grad_norm": 0.3508088290691376, "learning_rate": 7.050563214674722e-07, "loss": 0.035, "step": 15845 }, { "epoch": 2.65, "grad_norm": 0.40728217363357544, "learning_rate": 7.043904577030558e-07, "loss": 0.0318, "step": 15846 }, { "epoch": 2.65, "grad_norm": 0.33306846022605896, "learning_rate": 7.03724897033774e-07, "loss": 0.0228, "step": 15847 }, { "epoch": 2.65, "grad_norm": 0.36170366406440735, "learning_rate": 7.030596394813261e-07, "loss": 0.0241, "step": 15848 }, { "epoch": 2.65, "grad_norm": 0.4456290006637573, "learning_rate": 7.023946850674046e-07, "loss": 0.0474, "step": 15849 }, { "epoch": 2.65, "grad_norm": 0.3743203580379486, "learning_rate": 7.017300338136946e-07, "loss": 0.0283, "step": 15850 }, { "epoch": 2.65, "grad_norm": 0.44060075283050537, "learning_rate": 7.01065685741863e-07, "loss": 0.038, "step": 15851 }, { "epoch": 2.65, "grad_norm": 0.3624778687953949, "learning_rate": 7.004016408735747e-07, "loss": 0.0191, "step": 15852 }, { "epoch": 2.65, "grad_norm": 0.3602208197116852, "learning_rate": 6.997378992304826e-07, "loss": 0.0209, "step": 15853 }, { "epoch": 2.65, "grad_norm": 0.4082880914211273, "learning_rate": 6.990744608342249e-07, "loss": 0.0296, "step": 15854 }, { "epoch": 2.65, "grad_norm": 0.5754275918006897, "learning_rate": 6.984113257064373e-07, "loss": 0.0359, "step": 15855 }, { "epoch": 2.65, "grad_norm": 0.4525732100009918, "learning_rate": 6.977484938687396e-07, "loss": 0.0396, "step": 15856 }, { "epoch": 2.65, "grad_norm": 0.3475475609302521, "learning_rate": 6.970859653427465e-07, "loss": 0.0193, "step": 15857 }, { "epoch": 2.65, "grad_norm": 0.4488508999347687, "learning_rate": 6.964237401500606e-07, "loss": 0.0546, "step": 15858 }, { "epoch": 2.65, "grad_norm": 0.3377143442630768, "learning_rate": 6.957618183122717e-07, "loss": 0.035, "step": 15859 }, { "epoch": 2.65, "grad_norm": 0.41203486919403076, "learning_rate": 6.951001998509676e-07, "loss": 0.036, "step": 15860 }, { "epoch": 2.65, "grad_norm": 0.28287407755851746, "learning_rate": 6.94438884787717e-07, "loss": 0.0232, "step": 15861 }, { "epoch": 2.65, "grad_norm": 0.3877747058868408, "learning_rate": 6.937778731440836e-07, "loss": 0.0417, "step": 15862 }, { "epoch": 2.65, "grad_norm": 0.3227013349533081, "learning_rate": 6.931171649416224e-07, "loss": 0.0197, "step": 15863 }, { "epoch": 2.65, "grad_norm": 0.4301358163356781, "learning_rate": 6.924567602018728e-07, "loss": 0.0417, "step": 15864 }, { "epoch": 2.65, "grad_norm": 0.4032168984413147, "learning_rate": 6.917966589463721e-07, "loss": 0.0279, "step": 15865 }, { "epoch": 2.65, "grad_norm": 0.43345311284065247, "learning_rate": 6.91136861196643e-07, "loss": 0.0277, "step": 15866 }, { "epoch": 2.65, "grad_norm": 0.4446956217288971, "learning_rate": 6.904773669741982e-07, "loss": 0.037, "step": 15867 }, { "epoch": 2.65, "grad_norm": 0.3350394368171692, "learning_rate": 6.898181763005408e-07, "loss": 0.0311, "step": 15868 }, { "epoch": 2.65, "grad_norm": 0.38194358348846436, "learning_rate": 6.891592891971655e-07, "loss": 0.0378, "step": 15869 }, { "epoch": 2.65, "grad_norm": 0.46774786710739136, "learning_rate": 6.885007056855575e-07, "loss": 0.0387, "step": 15870 }, { "epoch": 2.65, "grad_norm": 0.3608029782772064, "learning_rate": 6.878424257871863e-07, "loss": 0.0212, "step": 15871 }, { "epoch": 2.65, "grad_norm": 0.4900086522102356, "learning_rate": 6.871844495235202e-07, "loss": 0.04, "step": 15872 }, { "epoch": 2.65, "grad_norm": 0.3563900887966156, "learning_rate": 6.865267769160111e-07, "loss": 0.0257, "step": 15873 }, { "epoch": 2.65, "grad_norm": 0.2824479937553406, "learning_rate": 6.85869407986104e-07, "loss": 0.0252, "step": 15874 }, { "epoch": 2.66, "grad_norm": 0.6079955697059631, "learning_rate": 6.852123427552348e-07, "loss": 0.0473, "step": 15875 }, { "epoch": 2.66, "grad_norm": 0.37274742126464844, "learning_rate": 6.845555812448257e-07, "loss": 0.024, "step": 15876 }, { "epoch": 2.66, "grad_norm": 0.5137974619865417, "learning_rate": 6.838991234762926e-07, "loss": 0.0242, "step": 15877 }, { "epoch": 2.66, "grad_norm": 0.31084343791007996, "learning_rate": 6.832429694710397e-07, "loss": 0.0207, "step": 15878 }, { "epoch": 2.66, "grad_norm": 0.38061001896858215, "learning_rate": 6.825871192504608e-07, "loss": 0.0264, "step": 15879 }, { "epoch": 2.66, "grad_norm": 0.30234718322753906, "learning_rate": 6.819315728359422e-07, "loss": 0.0279, "step": 15880 }, { "epoch": 2.66, "grad_norm": 0.3577732741832733, "learning_rate": 6.812763302488567e-07, "loss": 0.0294, "step": 15881 }, { "epoch": 2.66, "grad_norm": 0.5340105891227722, "learning_rate": 6.806213915105708e-07, "loss": 0.0349, "step": 15882 }, { "epoch": 2.66, "grad_norm": 0.2780887484550476, "learning_rate": 6.799667566424406e-07, "loss": 0.0247, "step": 15883 }, { "epoch": 2.66, "grad_norm": 0.4767302870750427, "learning_rate": 6.793124256658113e-07, "loss": 0.0365, "step": 15884 }, { "epoch": 2.66, "grad_norm": 0.3161657452583313, "learning_rate": 6.786583986020157e-07, "loss": 0.0325, "step": 15885 }, { "epoch": 2.66, "grad_norm": 0.36458107829093933, "learning_rate": 6.780046754723812e-07, "loss": 0.0275, "step": 15886 }, { "epoch": 2.66, "grad_norm": 0.3902710974216461, "learning_rate": 6.773512562982221e-07, "loss": 0.0219, "step": 15887 }, { "epoch": 2.66, "grad_norm": 0.2939426004886627, "learning_rate": 6.766981411008456e-07, "loss": 0.0235, "step": 15888 }, { "epoch": 2.66, "grad_norm": 0.4910654127597809, "learning_rate": 6.760453299015446e-07, "loss": 0.036, "step": 15889 }, { "epoch": 2.66, "grad_norm": 0.4936675429344177, "learning_rate": 6.753928227216067e-07, "loss": 0.0387, "step": 15890 }, { "epoch": 2.66, "grad_norm": 0.5138728618621826, "learning_rate": 6.747406195823091e-07, "loss": 0.0395, "step": 15891 }, { "epoch": 2.66, "grad_norm": 0.486266165971756, "learning_rate": 6.740887205049162e-07, "loss": 0.039, "step": 15892 }, { "epoch": 2.66, "grad_norm": 0.3748894929885864, "learning_rate": 6.734371255106831e-07, "loss": 0.0304, "step": 15893 }, { "epoch": 2.66, "grad_norm": 0.45669978857040405, "learning_rate": 6.727858346208572e-07, "loss": 0.0355, "step": 15894 }, { "epoch": 2.66, "grad_norm": 0.5695749521255493, "learning_rate": 6.721348478566747e-07, "loss": 0.0344, "step": 15895 }, { "epoch": 2.66, "grad_norm": 0.4615401327610016, "learning_rate": 6.7148416523936e-07, "loss": 0.0375, "step": 15896 }, { "epoch": 2.66, "grad_norm": 0.45754510164260864, "learning_rate": 6.708337867901316e-07, "loss": 0.0307, "step": 15897 }, { "epoch": 2.66, "grad_norm": 0.33121663331985474, "learning_rate": 6.701837125301936e-07, "loss": 0.0227, "step": 15898 }, { "epoch": 2.66, "grad_norm": 0.33079585433006287, "learning_rate": 6.695339424807468e-07, "loss": 0.0226, "step": 15899 }, { "epoch": 2.66, "grad_norm": 0.3196745812892914, "learning_rate": 6.688844766629743e-07, "loss": 0.0216, "step": 15900 }, { "epoch": 2.66, "grad_norm": 0.46119844913482666, "learning_rate": 6.682353150980514e-07, "loss": 0.0417, "step": 15901 }, { "epoch": 2.66, "grad_norm": 0.33900249004364014, "learning_rate": 6.675864578071501e-07, "loss": 0.0248, "step": 15902 }, { "epoch": 2.66, "grad_norm": 0.4123660922050476, "learning_rate": 6.669379048114222e-07, "loss": 0.0279, "step": 15903 }, { "epoch": 2.66, "grad_norm": 0.5272077322006226, "learning_rate": 6.662896561320153e-07, "loss": 0.0402, "step": 15904 }, { "epoch": 2.66, "grad_norm": 0.37929052114486694, "learning_rate": 6.656417117900682e-07, "loss": 0.0293, "step": 15905 }, { "epoch": 2.66, "grad_norm": 0.3947751522064209, "learning_rate": 6.649940718067083e-07, "loss": 0.0351, "step": 15906 }, { "epoch": 2.66, "grad_norm": 0.5024291276931763, "learning_rate": 6.643467362030509e-07, "loss": 0.0328, "step": 15907 }, { "epoch": 2.66, "grad_norm": 0.45771777629852295, "learning_rate": 6.636997050002048e-07, "loss": 0.0294, "step": 15908 }, { "epoch": 2.66, "grad_norm": 0.38252827525138855, "learning_rate": 6.630529782192663e-07, "loss": 0.0212, "step": 15909 }, { "epoch": 2.66, "grad_norm": 0.40122395753860474, "learning_rate": 6.624065558813209e-07, "loss": 0.0318, "step": 15910 }, { "epoch": 2.66, "grad_norm": 0.3519229292869568, "learning_rate": 6.617604380074505e-07, "loss": 0.0242, "step": 15911 }, { "epoch": 2.66, "grad_norm": 0.3608843684196472, "learning_rate": 6.611146246187172e-07, "loss": 0.0297, "step": 15912 }, { "epoch": 2.66, "grad_norm": 0.5636975169181824, "learning_rate": 6.604691157361831e-07, "loss": 0.0349, "step": 15913 }, { "epoch": 2.66, "grad_norm": 0.42604339122772217, "learning_rate": 6.598239113808947e-07, "loss": 0.0276, "step": 15914 }, { "epoch": 2.66, "grad_norm": 0.2935227155685425, "learning_rate": 6.591790115738872e-07, "loss": 0.0291, "step": 15915 }, { "epoch": 2.66, "grad_norm": 0.3629431426525116, "learning_rate": 6.585344163361928e-07, "loss": 0.0339, "step": 15916 }, { "epoch": 2.66, "grad_norm": 0.4334758520126343, "learning_rate": 6.57890125688827e-07, "loss": 0.0401, "step": 15917 }, { "epoch": 2.66, "grad_norm": 0.3311436176300049, "learning_rate": 6.572461396527952e-07, "loss": 0.0338, "step": 15918 }, { "epoch": 2.66, "grad_norm": 0.2812071740627289, "learning_rate": 6.566024582491004e-07, "loss": 0.0204, "step": 15919 }, { "epoch": 2.66, "grad_norm": 0.3542730212211609, "learning_rate": 6.559590814987249e-07, "loss": 0.0202, "step": 15920 }, { "epoch": 2.66, "grad_norm": 0.2997824549674988, "learning_rate": 6.55316009422653e-07, "loss": 0.0242, "step": 15921 }, { "epoch": 2.66, "grad_norm": 0.34042537212371826, "learning_rate": 6.546732420418477e-07, "loss": 0.0381, "step": 15922 }, { "epoch": 2.66, "grad_norm": 0.4056178033351898, "learning_rate": 6.540307793772715e-07, "loss": 0.0348, "step": 15923 }, { "epoch": 2.66, "grad_norm": 0.43447503447532654, "learning_rate": 6.533886214498686e-07, "loss": 0.0351, "step": 15924 }, { "epoch": 2.66, "grad_norm": 0.35841697454452515, "learning_rate": 6.527467682805799e-07, "loss": 0.0355, "step": 15925 }, { "epoch": 2.66, "grad_norm": 0.38954246044158936, "learning_rate": 6.521052198903332e-07, "loss": 0.0354, "step": 15926 }, { "epoch": 2.66, "grad_norm": 0.5705596804618835, "learning_rate": 6.514639763000485e-07, "loss": 0.033, "step": 15927 }, { "epoch": 2.66, "grad_norm": 0.3791019320487976, "learning_rate": 6.508230375306324e-07, "loss": 0.0342, "step": 15928 }, { "epoch": 2.66, "grad_norm": 0.48864027857780457, "learning_rate": 6.501824036029825e-07, "loss": 0.0355, "step": 15929 }, { "epoch": 2.66, "grad_norm": 0.3867160379886627, "learning_rate": 6.495420745379888e-07, "loss": 0.0341, "step": 15930 }, { "epoch": 2.66, "grad_norm": 0.49814656376838684, "learning_rate": 6.489020503565324e-07, "loss": 0.0365, "step": 15931 }, { "epoch": 2.66, "grad_norm": 0.40771394968032837, "learning_rate": 6.482623310794788e-07, "loss": 0.0297, "step": 15932 }, { "epoch": 2.66, "grad_norm": 0.32788893580436707, "learning_rate": 6.476229167276892e-07, "loss": 0.0394, "step": 15933 }, { "epoch": 2.66, "grad_norm": 0.3702605962753296, "learning_rate": 6.469838073220114e-07, "loss": 0.0324, "step": 15934 }, { "epoch": 2.67, "grad_norm": 0.46587836742401123, "learning_rate": 6.463450028832829e-07, "loss": 0.0433, "step": 15935 }, { "epoch": 2.67, "grad_norm": 0.4282068908214569, "learning_rate": 6.457065034323351e-07, "loss": 0.0301, "step": 15936 }, { "epoch": 2.67, "grad_norm": 0.44726094603538513, "learning_rate": 6.450683089899856e-07, "loss": 0.0227, "step": 15937 }, { "epoch": 2.67, "grad_norm": 0.4199095666408539, "learning_rate": 6.444304195770435e-07, "loss": 0.0399, "step": 15938 }, { "epoch": 2.67, "grad_norm": 0.33770468831062317, "learning_rate": 6.437928352143096e-07, "loss": 0.0286, "step": 15939 }, { "epoch": 2.67, "grad_norm": 0.4567873179912567, "learning_rate": 6.431555559225722e-07, "loss": 0.0364, "step": 15940 }, { "epoch": 2.67, "grad_norm": 0.31006738543510437, "learning_rate": 6.42518581722611e-07, "loss": 0.0232, "step": 15941 }, { "epoch": 2.67, "grad_norm": 0.3506876528263092, "learning_rate": 6.41881912635195e-07, "loss": 0.0288, "step": 15942 }, { "epoch": 2.67, "grad_norm": 0.3880263566970825, "learning_rate": 6.41245548681082e-07, "loss": 0.0248, "step": 15943 }, { "epoch": 2.67, "grad_norm": 0.337841659784317, "learning_rate": 6.406094898810244e-07, "loss": 0.0191, "step": 15944 }, { "epoch": 2.67, "grad_norm": 0.57002192735672, "learning_rate": 6.399737362557578e-07, "loss": 0.0413, "step": 15945 }, { "epoch": 2.67, "grad_norm": 0.37720975279808044, "learning_rate": 6.393382878260157e-07, "loss": 0.0287, "step": 15946 }, { "epoch": 2.67, "grad_norm": 0.3362385928630829, "learning_rate": 6.387031446125169e-07, "loss": 0.0219, "step": 15947 }, { "epoch": 2.67, "grad_norm": 0.5213716626167297, "learning_rate": 6.380683066359705e-07, "loss": 0.0353, "step": 15948 }, { "epoch": 2.67, "grad_norm": 0.3654617369174957, "learning_rate": 6.374337739170744e-07, "loss": 0.0277, "step": 15949 }, { "epoch": 2.67, "grad_norm": 0.507426917552948, "learning_rate": 6.36799546476522e-07, "loss": 0.0523, "step": 15950 }, { "epoch": 2.67, "grad_norm": 0.5127384066581726, "learning_rate": 6.361656243349912e-07, "loss": 0.0292, "step": 15951 }, { "epoch": 2.67, "grad_norm": 0.34560489654541016, "learning_rate": 6.355320075131499e-07, "loss": 0.0391, "step": 15952 }, { "epoch": 2.67, "grad_norm": 0.4164094924926758, "learning_rate": 6.348986960316594e-07, "loss": 0.0308, "step": 15953 }, { "epoch": 2.67, "grad_norm": 0.4850481152534485, "learning_rate": 6.342656899111699e-07, "loss": 0.0462, "step": 15954 }, { "epoch": 2.67, "grad_norm": 0.33312398195266724, "learning_rate": 6.336329891723236e-07, "loss": 0.0256, "step": 15955 }, { "epoch": 2.67, "grad_norm": 0.2987467646598816, "learning_rate": 6.330005938357486e-07, "loss": 0.0246, "step": 15956 }, { "epoch": 2.67, "grad_norm": 0.5602357983589172, "learning_rate": 6.323685039220629e-07, "loss": 0.0461, "step": 15957 }, { "epoch": 2.67, "grad_norm": 0.3139299154281616, "learning_rate": 6.317367194518797e-07, "loss": 0.0263, "step": 15958 }, { "epoch": 2.67, "grad_norm": 0.37535566091537476, "learning_rate": 6.311052404457985e-07, "loss": 0.0224, "step": 15959 }, { "epoch": 2.67, "grad_norm": 0.34710219502449036, "learning_rate": 6.30474066924407e-07, "loss": 0.0273, "step": 15960 }, { "epoch": 2.67, "grad_norm": 0.35561010241508484, "learning_rate": 6.298431989082876e-07, "loss": 0.0362, "step": 15961 }, { "epoch": 2.67, "grad_norm": 0.3706494867801666, "learning_rate": 6.292126364180107e-07, "loss": 0.0376, "step": 15962 }, { "epoch": 2.67, "grad_norm": 0.4468502402305603, "learning_rate": 6.285823794741353e-07, "loss": 0.042, "step": 15963 }, { "epoch": 2.67, "grad_norm": 0.36339741945266724, "learning_rate": 6.279524280972138e-07, "loss": 0.0311, "step": 15964 }, { "epoch": 2.67, "grad_norm": 0.41627687215805054, "learning_rate": 6.273227823077855e-07, "loss": 0.0309, "step": 15965 }, { "epoch": 2.67, "grad_norm": 0.5691763758659363, "learning_rate": 6.266934421263793e-07, "loss": 0.0472, "step": 15966 }, { "epoch": 2.67, "grad_norm": 0.49498239159584045, "learning_rate": 6.260644075735178e-07, "loss": 0.0499, "step": 15967 }, { "epoch": 2.67, "grad_norm": 0.419342577457428, "learning_rate": 6.254356786697102e-07, "loss": 0.0381, "step": 15968 }, { "epoch": 2.67, "grad_norm": 0.35773465037345886, "learning_rate": 6.248072554354568e-07, "loss": 0.0247, "step": 15969 }, { "epoch": 2.67, "grad_norm": 0.4709599018096924, "learning_rate": 6.241791378912499e-07, "loss": 0.0434, "step": 15970 }, { "epoch": 2.67, "grad_norm": 0.2812006175518036, "learning_rate": 6.235513260575676e-07, "loss": 0.0211, "step": 15971 }, { "epoch": 2.67, "grad_norm": 0.3651471436023712, "learning_rate": 6.229238199548826e-07, "loss": 0.0317, "step": 15972 }, { "epoch": 2.67, "grad_norm": 0.41581547260284424, "learning_rate": 6.222966196036562e-07, "loss": 0.0359, "step": 15973 }, { "epoch": 2.67, "grad_norm": 0.3099161982536316, "learning_rate": 6.216697250243354e-07, "loss": 0.0262, "step": 15974 }, { "epoch": 2.67, "grad_norm": 0.2964973747730255, "learning_rate": 6.210431362373647e-07, "loss": 0.0203, "step": 15975 }, { "epoch": 2.67, "grad_norm": 0.5336768627166748, "learning_rate": 6.204168532631727e-07, "loss": 0.0387, "step": 15976 }, { "epoch": 2.67, "grad_norm": 0.3901190757751465, "learning_rate": 6.197908761221793e-07, "loss": 0.0381, "step": 15977 }, { "epoch": 2.67, "grad_norm": 0.4313613474369049, "learning_rate": 6.191652048347974e-07, "loss": 0.0355, "step": 15978 }, { "epoch": 2.67, "grad_norm": 0.3768370747566223, "learning_rate": 6.185398394214282e-07, "loss": 0.0223, "step": 15979 }, { "epoch": 2.67, "grad_norm": 0.3649894893169403, "learning_rate": 6.179147799024587e-07, "loss": 0.0317, "step": 15980 }, { "epoch": 2.67, "grad_norm": 0.34228280186653137, "learning_rate": 6.172900262982751e-07, "loss": 0.0191, "step": 15981 }, { "epoch": 2.67, "grad_norm": 0.5295888781547546, "learning_rate": 6.166655786292442e-07, "loss": 0.0314, "step": 15982 }, { "epoch": 2.67, "grad_norm": 0.5202485918998718, "learning_rate": 6.160414369157297e-07, "loss": 0.027, "step": 15983 }, { "epoch": 2.67, "grad_norm": 0.5885874629020691, "learning_rate": 6.15417601178081e-07, "loss": 0.0263, "step": 15984 }, { "epoch": 2.67, "grad_norm": 0.37256336212158203, "learning_rate": 6.147940714366373e-07, "loss": 0.0328, "step": 15985 }, { "epoch": 2.67, "grad_norm": 0.38203656673431396, "learning_rate": 6.141708477117326e-07, "loss": 0.0319, "step": 15986 }, { "epoch": 2.67, "grad_norm": 0.3988436460494995, "learning_rate": 6.13547930023688e-07, "loss": 0.0428, "step": 15987 }, { "epoch": 2.67, "grad_norm": 0.2356383502483368, "learning_rate": 6.129253183928108e-07, "loss": 0.0168, "step": 15988 }, { "epoch": 2.67, "grad_norm": 0.3764304518699646, "learning_rate": 6.12303012839407e-07, "loss": 0.0362, "step": 15989 }, { "epoch": 2.67, "grad_norm": 0.41415566205978394, "learning_rate": 6.116810133837647e-07, "loss": 0.0324, "step": 15990 }, { "epoch": 2.67, "grad_norm": 0.466777503490448, "learning_rate": 6.110593200461646e-07, "loss": 0.0427, "step": 15991 }, { "epoch": 2.67, "grad_norm": 0.32571426033973694, "learning_rate": 6.104379328468801e-07, "loss": 0.0281, "step": 15992 }, { "epoch": 2.67, "grad_norm": 0.47365236282348633, "learning_rate": 6.098168518061687e-07, "loss": 0.0377, "step": 15993 }, { "epoch": 2.68, "grad_norm": 0.37910374999046326, "learning_rate": 6.09196076944284e-07, "loss": 0.0389, "step": 15994 }, { "epoch": 2.68, "grad_norm": 0.3198789358139038, "learning_rate": 6.085756082814686e-07, "loss": 0.0259, "step": 15995 }, { "epoch": 2.68, "grad_norm": 0.4295959174633026, "learning_rate": 6.079554458379511e-07, "loss": 0.0285, "step": 15996 }, { "epoch": 2.68, "grad_norm": 0.3050764799118042, "learning_rate": 6.073355896339539e-07, "loss": 0.0265, "step": 15997 }, { "epoch": 2.68, "grad_norm": 0.32515600323677063, "learning_rate": 6.067160396896887e-07, "loss": 0.0383, "step": 15998 }, { "epoch": 2.68, "grad_norm": 0.404824823141098, "learning_rate": 6.060967960253538e-07, "loss": 0.0323, "step": 15999 }, { "epoch": 2.68, "grad_norm": 0.38290101289749146, "learning_rate": 6.054778586611443e-07, "loss": 0.0263, "step": 16000 }, { "epoch": 2.68, "grad_norm": 0.4332994222640991, "learning_rate": 6.048592276172382e-07, "loss": 0.0309, "step": 16001 }, { "epoch": 2.68, "grad_norm": 0.3145316243171692, "learning_rate": 6.042409029138086e-07, "loss": 0.0253, "step": 16002 }, { "epoch": 2.68, "grad_norm": 0.4238015115261078, "learning_rate": 6.036228845710168e-07, "loss": 0.0226, "step": 16003 }, { "epoch": 2.68, "grad_norm": 0.4020916521549225, "learning_rate": 6.030051726090136e-07, "loss": 0.0409, "step": 16004 }, { "epoch": 2.68, "grad_norm": 0.3118334710597992, "learning_rate": 6.023877670479395e-07, "loss": 0.0312, "step": 16005 }, { "epoch": 2.68, "grad_norm": 0.4027152359485626, "learning_rate": 6.017706679079283e-07, "loss": 0.0269, "step": 16006 }, { "epoch": 2.68, "grad_norm": 0.5308322906494141, "learning_rate": 6.011538752090984e-07, "loss": 0.029, "step": 16007 }, { "epoch": 2.68, "grad_norm": 0.4445180594921112, "learning_rate": 6.005373889715615e-07, "loss": 0.0351, "step": 16008 }, { "epoch": 2.68, "grad_norm": 0.5247676372528076, "learning_rate": 5.999212092154205e-07, "loss": 0.0438, "step": 16009 }, { "epoch": 2.68, "grad_norm": 0.3935261368751526, "learning_rate": 5.993053359607648e-07, "loss": 0.0428, "step": 16010 }, { "epoch": 2.68, "grad_norm": 0.2719537317752838, "learning_rate": 5.986897692276783e-07, "loss": 0.0324, "step": 16011 }, { "epoch": 2.68, "grad_norm": 0.42164039611816406, "learning_rate": 5.980745090362316e-07, "loss": 0.0236, "step": 16012 }, { "epoch": 2.68, "grad_norm": 0.3592412769794464, "learning_rate": 5.974595554064833e-07, "loss": 0.0354, "step": 16013 }, { "epoch": 2.68, "grad_norm": 0.27843713760375977, "learning_rate": 5.968449083584882e-07, "loss": 0.0268, "step": 16014 }, { "epoch": 2.68, "grad_norm": 0.4512225389480591, "learning_rate": 5.96230567912287e-07, "loss": 0.0329, "step": 16015 }, { "epoch": 2.68, "grad_norm": 0.2847915589809418, "learning_rate": 5.956165340879083e-07, "loss": 0.0302, "step": 16016 }, { "epoch": 2.68, "grad_norm": 0.408210813999176, "learning_rate": 5.950028069053748e-07, "loss": 0.0352, "step": 16017 }, { "epoch": 2.68, "grad_norm": 0.5585629940032959, "learning_rate": 5.943893863847006e-07, "loss": 0.0383, "step": 16018 }, { "epoch": 2.68, "grad_norm": 1.458436369895935, "learning_rate": 5.93776272545884e-07, "loss": 0.0381, "step": 16019 }, { "epoch": 2.68, "grad_norm": 0.3823094964027405, "learning_rate": 5.93163465408918e-07, "loss": 0.0299, "step": 16020 }, { "epoch": 2.68, "grad_norm": 0.33180347084999084, "learning_rate": 5.925509649937833e-07, "loss": 0.0363, "step": 16021 }, { "epoch": 2.68, "grad_norm": 0.3423537611961365, "learning_rate": 5.919387713204494e-07, "loss": 0.0258, "step": 16022 }, { "epoch": 2.68, "grad_norm": 0.28590184450149536, "learning_rate": 5.913268844088815e-07, "loss": 0.0202, "step": 16023 }, { "epoch": 2.68, "grad_norm": 0.5731077790260315, "learning_rate": 5.90715304279027e-07, "loss": 0.0351, "step": 16024 }, { "epoch": 2.68, "grad_norm": 0.42070645093917847, "learning_rate": 5.90104030950831e-07, "loss": 0.0367, "step": 16025 }, { "epoch": 2.68, "grad_norm": 0.4908623993396759, "learning_rate": 5.89493064444221e-07, "loss": 0.0223, "step": 16026 }, { "epoch": 2.68, "grad_norm": 0.333827406167984, "learning_rate": 5.888824047791208e-07, "loss": 0.0368, "step": 16027 }, { "epoch": 2.68, "grad_norm": 0.4372502565383911, "learning_rate": 5.882720519754414e-07, "loss": 0.0339, "step": 16028 }, { "epoch": 2.68, "grad_norm": 0.26638004183769226, "learning_rate": 5.876620060530858e-07, "loss": 0.0196, "step": 16029 }, { "epoch": 2.68, "grad_norm": 0.4019891619682312, "learning_rate": 5.870522670319412e-07, "loss": 0.0305, "step": 16030 }, { "epoch": 2.68, "grad_norm": 0.3427082300186157, "learning_rate": 5.864428349318929e-07, "loss": 0.0255, "step": 16031 }, { "epoch": 2.68, "grad_norm": 0.3647365868091583, "learning_rate": 5.858337097728095e-07, "loss": 0.0302, "step": 16032 }, { "epoch": 2.68, "grad_norm": 0.39075493812561035, "learning_rate": 5.852248915745528e-07, "loss": 0.0317, "step": 16033 }, { "epoch": 2.68, "grad_norm": 0.47948697209358215, "learning_rate": 5.846163803569748e-07, "loss": 0.0361, "step": 16034 }, { "epoch": 2.68, "grad_norm": 0.45015960931777954, "learning_rate": 5.840081761399174e-07, "loss": 0.0388, "step": 16035 }, { "epoch": 2.68, "grad_norm": 0.32932010293006897, "learning_rate": 5.834002789432103e-07, "loss": 0.023, "step": 16036 }, { "epoch": 2.68, "grad_norm": 1.0554139614105225, "learning_rate": 5.827926887866763e-07, "loss": 0.0295, "step": 16037 }, { "epoch": 2.68, "grad_norm": 0.3520925045013428, "learning_rate": 5.821854056901243e-07, "loss": 0.0285, "step": 16038 }, { "epoch": 2.68, "grad_norm": 0.4417232275009155, "learning_rate": 5.815784296733584e-07, "loss": 0.0336, "step": 16039 }, { "epoch": 2.68, "grad_norm": 0.42902082204818726, "learning_rate": 5.809717607561694e-07, "loss": 0.043, "step": 16040 }, { "epoch": 2.68, "grad_norm": 0.45271944999694824, "learning_rate": 5.803653989583358e-07, "loss": 0.0361, "step": 16041 }, { "epoch": 2.68, "grad_norm": 0.46703046560287476, "learning_rate": 5.797593442996308e-07, "loss": 0.0294, "step": 16042 }, { "epoch": 2.68, "grad_norm": 0.42831143736839294, "learning_rate": 5.791535967998163e-07, "loss": 0.0308, "step": 16043 }, { "epoch": 2.68, "grad_norm": 0.3270467519760132, "learning_rate": 5.78548156478641e-07, "loss": 0.0254, "step": 16044 }, { "epoch": 2.68, "grad_norm": 0.4200412631034851, "learning_rate": 5.779430233558491e-07, "loss": 0.0473, "step": 16045 }, { "epoch": 2.68, "grad_norm": 0.5594862103462219, "learning_rate": 5.773381974511704e-07, "loss": 0.0393, "step": 16046 }, { "epoch": 2.68, "grad_norm": 0.32758834958076477, "learning_rate": 5.767336787843248e-07, "loss": 0.0398, "step": 16047 }, { "epoch": 2.68, "grad_norm": 0.34113940596580505, "learning_rate": 5.761294673750262e-07, "loss": 0.0249, "step": 16048 }, { "epoch": 2.68, "grad_norm": 0.4499426484107971, "learning_rate": 5.755255632429713e-07, "loss": 0.0344, "step": 16049 }, { "epoch": 2.68, "grad_norm": 0.411142498254776, "learning_rate": 5.749219664078543e-07, "loss": 0.0329, "step": 16050 }, { "epoch": 2.68, "grad_norm": 0.37931546568870544, "learning_rate": 5.743186768893583e-07, "loss": 0.0378, "step": 16051 }, { "epoch": 2.68, "grad_norm": 0.37899500131607056, "learning_rate": 5.737156947071487e-07, "loss": 0.0338, "step": 16052 }, { "epoch": 2.68, "grad_norm": 0.2655389904975891, "learning_rate": 5.731130198808921e-07, "loss": 0.0223, "step": 16053 }, { "epoch": 2.69, "grad_norm": 0.3410319685935974, "learning_rate": 5.72510652430236e-07, "loss": 0.0238, "step": 16054 }, { "epoch": 2.69, "grad_norm": 0.38087454438209534, "learning_rate": 5.719085923748213e-07, "loss": 0.0352, "step": 16055 }, { "epoch": 2.69, "grad_norm": 0.28236544132232666, "learning_rate": 5.713068397342814e-07, "loss": 0.0228, "step": 16056 }, { "epoch": 2.69, "grad_norm": 0.44867372512817383, "learning_rate": 5.707053945282338e-07, "loss": 0.0239, "step": 16057 }, { "epoch": 2.69, "grad_norm": 0.4400375485420227, "learning_rate": 5.701042567762915e-07, "loss": 0.0413, "step": 16058 }, { "epoch": 2.69, "grad_norm": 0.29934757947921753, "learning_rate": 5.69503426498057e-07, "loss": 0.0227, "step": 16059 }, { "epoch": 2.69, "grad_norm": 0.3821629285812378, "learning_rate": 5.689029037131189e-07, "loss": 0.0459, "step": 16060 }, { "epoch": 2.69, "grad_norm": 0.3307764530181885, "learning_rate": 5.68302688441057e-07, "loss": 0.023, "step": 16061 }, { "epoch": 2.69, "grad_norm": 0.4676326513290405, "learning_rate": 5.677027807014457e-07, "loss": 0.0254, "step": 16062 }, { "epoch": 2.69, "grad_norm": 0.43877163529396057, "learning_rate": 5.671031805138427e-07, "loss": 0.0376, "step": 16063 }, { "epoch": 2.69, "grad_norm": 0.3908366560935974, "learning_rate": 5.66503887897798e-07, "loss": 0.0287, "step": 16064 }, { "epoch": 2.69, "grad_norm": 0.3676859438419342, "learning_rate": 5.659049028728547e-07, "loss": 0.0281, "step": 16065 }, { "epoch": 2.69, "grad_norm": 0.3813382685184479, "learning_rate": 5.653062254585418e-07, "loss": 0.0267, "step": 16066 }, { "epoch": 2.69, "grad_norm": 0.48993539810180664, "learning_rate": 5.647078556743823e-07, "loss": 0.0344, "step": 16067 }, { "epoch": 2.69, "grad_norm": 0.41810086369514465, "learning_rate": 5.641097935398853e-07, "loss": 0.0281, "step": 16068 }, { "epoch": 2.69, "grad_norm": 0.41615238785743713, "learning_rate": 5.635120390745496e-07, "loss": 0.0277, "step": 16069 }, { "epoch": 2.69, "grad_norm": 0.3315734267234802, "learning_rate": 5.629145922978695e-07, "loss": 0.0265, "step": 16070 }, { "epoch": 2.69, "grad_norm": 0.39529600739479065, "learning_rate": 5.623174532293229e-07, "loss": 0.0282, "step": 16071 }, { "epoch": 2.69, "grad_norm": 0.6667559742927551, "learning_rate": 5.617206218883809e-07, "loss": 0.0447, "step": 16072 }, { "epoch": 2.69, "grad_norm": 0.29375776648521423, "learning_rate": 5.611240982945021e-07, "loss": 0.0279, "step": 16073 }, { "epoch": 2.69, "grad_norm": 0.40915167331695557, "learning_rate": 5.605278824671412e-07, "loss": 0.043, "step": 16074 }, { "epoch": 2.69, "grad_norm": 0.5063169002532959, "learning_rate": 5.599319744257337e-07, "loss": 0.0478, "step": 16075 }, { "epoch": 2.69, "grad_norm": 0.5316877961158752, "learning_rate": 5.593363741897151e-07, "loss": 0.029, "step": 16076 }, { "epoch": 2.69, "grad_norm": 0.49175673723220825, "learning_rate": 5.587410817785021e-07, "loss": 0.0304, "step": 16077 }, { "epoch": 2.69, "grad_norm": 0.40405088663101196, "learning_rate": 5.581460972115038e-07, "loss": 0.0367, "step": 16078 }, { "epoch": 2.69, "grad_norm": 0.4073364734649658, "learning_rate": 5.575514205081245e-07, "loss": 0.0332, "step": 16079 }, { "epoch": 2.69, "grad_norm": 0.312438428401947, "learning_rate": 5.569570516877509e-07, "loss": 0.0194, "step": 16080 }, { "epoch": 2.69, "grad_norm": 0.41205456852912903, "learning_rate": 5.563629907697665e-07, "loss": 0.026, "step": 16081 }, { "epoch": 2.69, "grad_norm": 0.4821542799472809, "learning_rate": 5.557692377735369e-07, "loss": 0.0364, "step": 16082 }, { "epoch": 2.69, "grad_norm": 0.22968919575214386, "learning_rate": 5.551757927184253e-07, "loss": 0.0246, "step": 16083 }, { "epoch": 2.69, "grad_norm": 0.2847362756729126, "learning_rate": 5.545826556237833e-07, "loss": 0.0316, "step": 16084 }, { "epoch": 2.69, "grad_norm": 0.4402781128883362, "learning_rate": 5.539898265089483e-07, "loss": 0.0326, "step": 16085 }, { "epoch": 2.69, "grad_norm": 0.41891229152679443, "learning_rate": 5.533973053932484e-07, "loss": 0.0337, "step": 16086 }, { "epoch": 2.69, "grad_norm": 0.4599109888076782, "learning_rate": 5.528050922960082e-07, "loss": 0.0453, "step": 16087 }, { "epoch": 2.69, "grad_norm": 0.5106506943702698, "learning_rate": 5.522131872365344e-07, "loss": 0.0422, "step": 16088 }, { "epoch": 2.69, "grad_norm": 0.35764214396476746, "learning_rate": 5.516215902341271e-07, "loss": 0.031, "step": 16089 }, { "epoch": 2.69, "grad_norm": 0.320096492767334, "learning_rate": 5.510303013080753e-07, "loss": 0.0265, "step": 16090 }, { "epoch": 2.69, "grad_norm": 0.24161791801452637, "learning_rate": 5.504393204776626e-07, "loss": 0.0162, "step": 16091 }, { "epoch": 2.69, "grad_norm": 0.3251500427722931, "learning_rate": 5.498486477621534e-07, "loss": 0.0272, "step": 16092 }, { "epoch": 2.69, "grad_norm": 0.49191635847091675, "learning_rate": 5.492582831808125e-07, "loss": 0.0425, "step": 16093 }, { "epoch": 2.69, "grad_norm": 0.4348588287830353, "learning_rate": 5.486682267528843e-07, "loss": 0.023, "step": 16094 }, { "epoch": 2.69, "grad_norm": 0.27386248111724854, "learning_rate": 5.480784784976134e-07, "loss": 0.0261, "step": 16095 }, { "epoch": 2.69, "grad_norm": 0.30018144845962524, "learning_rate": 5.474890384342269e-07, "loss": 0.0197, "step": 16096 }, { "epoch": 2.69, "grad_norm": 0.2658584713935852, "learning_rate": 5.468999065819424e-07, "loss": 0.0303, "step": 16097 }, { "epoch": 2.69, "grad_norm": 0.516128420829773, "learning_rate": 5.463110829599716e-07, "loss": 0.0414, "step": 16098 }, { "epoch": 2.69, "grad_norm": 0.30373138189315796, "learning_rate": 5.457225675875144e-07, "loss": 0.0316, "step": 16099 }, { "epoch": 2.69, "grad_norm": 0.44929468631744385, "learning_rate": 5.451343604837578e-07, "loss": 0.0369, "step": 16100 }, { "epoch": 2.69, "grad_norm": 0.3628849387168884, "learning_rate": 5.445464616678831e-07, "loss": 0.0247, "step": 16101 }, { "epoch": 2.69, "grad_norm": 0.4043034315109253, "learning_rate": 5.439588711590593e-07, "loss": 0.0263, "step": 16102 }, { "epoch": 2.69, "grad_norm": 0.38219213485717773, "learning_rate": 5.433715889764435e-07, "loss": 0.0338, "step": 16103 }, { "epoch": 2.69, "grad_norm": 0.40505701303482056, "learning_rate": 5.427846151391869e-07, "loss": 0.0416, "step": 16104 }, { "epoch": 2.69, "grad_norm": 0.36077290773391724, "learning_rate": 5.421979496664276e-07, "loss": 0.0364, "step": 16105 }, { "epoch": 2.69, "grad_norm": 0.422961950302124, "learning_rate": 5.416115925772936e-07, "loss": 0.0304, "step": 16106 }, { "epoch": 2.69, "grad_norm": 0.35164064168930054, "learning_rate": 5.410255438909073e-07, "loss": 0.0358, "step": 16107 }, { "epoch": 2.69, "grad_norm": 0.41460734605789185, "learning_rate": 5.404398036263736e-07, "loss": 0.0245, "step": 16108 }, { "epoch": 2.69, "grad_norm": 0.3447348475456238, "learning_rate": 5.398543718027938e-07, "loss": 0.0264, "step": 16109 }, { "epoch": 2.69, "grad_norm": 0.3514242470264435, "learning_rate": 5.39269248439257e-07, "loss": 0.0262, "step": 16110 }, { "epoch": 2.69, "grad_norm": 0.42008596658706665, "learning_rate": 5.38684433554838e-07, "loss": 0.0271, "step": 16111 }, { "epoch": 2.69, "grad_norm": 0.4119531810283661, "learning_rate": 5.380999271686105e-07, "loss": 0.0283, "step": 16112 }, { "epoch": 2.69, "grad_norm": 0.3750886917114258, "learning_rate": 5.375157292996291e-07, "loss": 0.0341, "step": 16113 }, { "epoch": 2.7, "grad_norm": 0.3296249210834503, "learning_rate": 5.369318399669433e-07, "loss": 0.0253, "step": 16114 }, { "epoch": 2.7, "grad_norm": 0.34193670749664307, "learning_rate": 5.363482591895941e-07, "loss": 0.03, "step": 16115 }, { "epoch": 2.7, "grad_norm": 0.4276120662689209, "learning_rate": 5.357649869866077e-07, "loss": 0.0408, "step": 16116 }, { "epoch": 2.7, "grad_norm": 0.3989274501800537, "learning_rate": 5.351820233770011e-07, "loss": 0.0308, "step": 16117 }, { "epoch": 2.7, "grad_norm": 0.4264039099216461, "learning_rate": 5.345993683797856e-07, "loss": 0.0369, "step": 16118 }, { "epoch": 2.7, "grad_norm": 0.32037055492401123, "learning_rate": 5.340170220139585e-07, "loss": 0.0321, "step": 16119 }, { "epoch": 2.7, "grad_norm": 0.37562480568885803, "learning_rate": 5.334349842985065e-07, "loss": 0.0268, "step": 16120 }, { "epoch": 2.7, "grad_norm": 0.4056777358055115, "learning_rate": 5.328532552524069e-07, "loss": 0.0372, "step": 16121 }, { "epoch": 2.7, "grad_norm": 0.32753369212150574, "learning_rate": 5.32271834894631e-07, "loss": 0.0214, "step": 16122 }, { "epoch": 2.7, "grad_norm": 0.3796357214450836, "learning_rate": 5.316907232441359e-07, "loss": 0.0363, "step": 16123 }, { "epoch": 2.7, "grad_norm": 0.4534607231616974, "learning_rate": 5.311099203198688e-07, "loss": 0.0396, "step": 16124 }, { "epoch": 2.7, "grad_norm": 0.34578245878219604, "learning_rate": 5.305294261407656e-07, "loss": 0.0338, "step": 16125 }, { "epoch": 2.7, "grad_norm": 0.2947586476802826, "learning_rate": 5.299492407257578e-07, "loss": 0.0209, "step": 16126 }, { "epoch": 2.7, "grad_norm": 0.22962413728237152, "learning_rate": 5.293693640937603e-07, "loss": 0.0196, "step": 16127 }, { "epoch": 2.7, "grad_norm": 0.38389360904693604, "learning_rate": 5.287897962636812e-07, "loss": 0.0236, "step": 16128 }, { "epoch": 2.7, "grad_norm": 0.45386508107185364, "learning_rate": 5.282105372544178e-07, "loss": 0.0352, "step": 16129 }, { "epoch": 2.7, "grad_norm": 0.3669476807117462, "learning_rate": 5.276315870848603e-07, "loss": 0.0288, "step": 16130 }, { "epoch": 2.7, "grad_norm": 0.4290362298488617, "learning_rate": 5.270529457738816e-07, "loss": 0.0348, "step": 16131 }, { "epoch": 2.7, "grad_norm": 0.4402174651622772, "learning_rate": 5.264746133403531e-07, "loss": 0.0275, "step": 16132 }, { "epoch": 2.7, "grad_norm": 0.2820174992084503, "learning_rate": 5.258965898031309e-07, "loss": 0.0321, "step": 16133 }, { "epoch": 2.7, "grad_norm": 0.31252437829971313, "learning_rate": 5.253188751810601e-07, "loss": 0.0241, "step": 16134 }, { "epoch": 2.7, "grad_norm": 0.41899776458740234, "learning_rate": 5.247414694929809e-07, "loss": 0.0446, "step": 16135 }, { "epoch": 2.7, "grad_norm": 0.44828709959983826, "learning_rate": 5.241643727577173e-07, "loss": 0.032, "step": 16136 }, { "epoch": 2.7, "grad_norm": 0.37237539887428284, "learning_rate": 5.235875849940896e-07, "loss": 0.0289, "step": 16137 }, { "epoch": 2.7, "grad_norm": 0.3491365909576416, "learning_rate": 5.230111062209009e-07, "loss": 0.0305, "step": 16138 }, { "epoch": 2.7, "grad_norm": 0.3854009807109833, "learning_rate": 5.224349364569503e-07, "loss": 0.0239, "step": 16139 }, { "epoch": 2.7, "grad_norm": 0.45416730642318726, "learning_rate": 5.218590757210262e-07, "loss": 0.0298, "step": 16140 }, { "epoch": 2.7, "grad_norm": 0.4122999310493469, "learning_rate": 5.212835240319025e-07, "loss": 0.0374, "step": 16141 }, { "epoch": 2.7, "grad_norm": 0.3550967574119568, "learning_rate": 5.207082814083453e-07, "loss": 0.0429, "step": 16142 }, { "epoch": 2.7, "grad_norm": 0.5060324668884277, "learning_rate": 5.20133347869114e-07, "loss": 0.03, "step": 16143 }, { "epoch": 2.7, "grad_norm": 0.42198359966278076, "learning_rate": 5.195587234329535e-07, "loss": 0.0255, "step": 16144 }, { "epoch": 2.7, "grad_norm": 0.3896615207195282, "learning_rate": 5.18984408118599e-07, "loss": 0.0322, "step": 16145 }, { "epoch": 2.7, "grad_norm": 0.46978309750556946, "learning_rate": 5.184104019447778e-07, "loss": 0.0321, "step": 16146 }, { "epoch": 2.7, "grad_norm": 0.47737523913383484, "learning_rate": 5.17836704930208e-07, "loss": 0.0332, "step": 16147 }, { "epoch": 2.7, "grad_norm": 0.3318828344345093, "learning_rate": 5.172633170935926e-07, "loss": 0.0247, "step": 16148 }, { "epoch": 2.7, "grad_norm": 0.40265026688575745, "learning_rate": 5.166902384536299e-07, "loss": 0.0246, "step": 16149 }, { "epoch": 2.7, "grad_norm": 0.42997080087661743, "learning_rate": 5.161174690290039e-07, "loss": 0.023, "step": 16150 }, { "epoch": 2.7, "grad_norm": 0.40443217754364014, "learning_rate": 5.155450088383917e-07, "loss": 0.0355, "step": 16151 }, { "epoch": 2.7, "grad_norm": 0.5807145833969116, "learning_rate": 5.149728579004609e-07, "loss": 0.0294, "step": 16152 }, { "epoch": 2.7, "grad_norm": 0.4050914943218231, "learning_rate": 5.14401016233862e-07, "loss": 0.0311, "step": 16153 }, { "epoch": 2.7, "grad_norm": 0.33407244086265564, "learning_rate": 5.138294838572455e-07, "loss": 0.0378, "step": 16154 }, { "epoch": 2.7, "grad_norm": 0.4619423747062683, "learning_rate": 5.132582607892455e-07, "loss": 0.0451, "step": 16155 }, { "epoch": 2.7, "grad_norm": 0.47199001908302307, "learning_rate": 5.126873470484861e-07, "loss": 0.036, "step": 16156 }, { "epoch": 2.7, "grad_norm": 0.32405197620391846, "learning_rate": 5.121167426535855e-07, "loss": 0.0296, "step": 16157 }, { "epoch": 2.7, "grad_norm": 0.4505775570869446, "learning_rate": 5.115464476231469e-07, "loss": 0.0304, "step": 16158 }, { "epoch": 2.7, "grad_norm": 0.3737231492996216, "learning_rate": 5.10976461975764e-07, "loss": 0.0368, "step": 16159 }, { "epoch": 2.7, "grad_norm": 0.35265812277793884, "learning_rate": 5.104067857300255e-07, "loss": 0.0185, "step": 16160 }, { "epoch": 2.7, "grad_norm": 0.39350008964538574, "learning_rate": 5.098374189045041e-07, "loss": 0.0417, "step": 16161 }, { "epoch": 2.7, "grad_norm": 0.3472262918949127, "learning_rate": 5.092683615177651e-07, "loss": 0.0245, "step": 16162 }, { "epoch": 2.7, "grad_norm": 0.3222516179084778, "learning_rate": 5.086996135883638e-07, "loss": 0.0248, "step": 16163 }, { "epoch": 2.7, "grad_norm": 0.438410222530365, "learning_rate": 5.08131175134845e-07, "loss": 0.0248, "step": 16164 }, { "epoch": 2.7, "grad_norm": 0.35304659605026245, "learning_rate": 5.07563046175743e-07, "loss": 0.0335, "step": 16165 }, { "epoch": 2.7, "grad_norm": 0.402182936668396, "learning_rate": 5.069952267295831e-07, "loss": 0.0407, "step": 16166 }, { "epoch": 2.7, "grad_norm": 0.36048588156700134, "learning_rate": 5.064277168148779e-07, "loss": 0.0238, "step": 16167 }, { "epoch": 2.7, "grad_norm": 0.4100050926208496, "learning_rate": 5.05860516450134e-07, "loss": 0.0279, "step": 16168 }, { "epoch": 2.7, "grad_norm": 0.5040249824523926, "learning_rate": 5.052936256538432e-07, "loss": 0.039, "step": 16169 }, { "epoch": 2.7, "grad_norm": 0.4226832687854767, "learning_rate": 5.047270444444908e-07, "loss": 0.032, "step": 16170 }, { "epoch": 2.7, "grad_norm": 0.33942514657974243, "learning_rate": 5.041607728405529e-07, "loss": 0.0317, "step": 16171 }, { "epoch": 2.7, "grad_norm": 0.47691506147384644, "learning_rate": 5.035948108604927e-07, "loss": 0.0435, "step": 16172 }, { "epoch": 2.7, "grad_norm": 0.4352850019931793, "learning_rate": 5.030291585227609e-07, "loss": 0.031, "step": 16173 }, { "epoch": 2.71, "grad_norm": 0.42745646834373474, "learning_rate": 5.02463815845805e-07, "loss": 0.0318, "step": 16174 }, { "epoch": 2.71, "grad_norm": 0.37452980875968933, "learning_rate": 5.018987828480581e-07, "loss": 0.0278, "step": 16175 }, { "epoch": 2.71, "grad_norm": 0.4552760720252991, "learning_rate": 5.013340595479421e-07, "loss": 0.0339, "step": 16176 }, { "epoch": 2.71, "grad_norm": 0.41095706820487976, "learning_rate": 5.0076964596387e-07, "loss": 0.0279, "step": 16177 }, { "epoch": 2.71, "grad_norm": 0.38021841645240784, "learning_rate": 5.002055421142482e-07, "loss": 0.0349, "step": 16178 }, { "epoch": 2.71, "grad_norm": 0.36049985885620117, "learning_rate": 4.996417480174687e-07, "loss": 0.0318, "step": 16179 }, { "epoch": 2.71, "grad_norm": 0.39104074239730835, "learning_rate": 4.990782636919156e-07, "loss": 0.0298, "step": 16180 }, { "epoch": 2.71, "grad_norm": 0.30059394240379333, "learning_rate": 4.985150891559598e-07, "loss": 0.0296, "step": 16181 }, { "epoch": 2.71, "grad_norm": 0.44536614418029785, "learning_rate": 4.979522244279656e-07, "loss": 0.0357, "step": 16182 }, { "epoch": 2.71, "grad_norm": 0.4517285227775574, "learning_rate": 4.97389669526287e-07, "loss": 0.0261, "step": 16183 }, { "epoch": 2.71, "grad_norm": 0.4342353940010071, "learning_rate": 4.968274244692639e-07, "loss": 0.0236, "step": 16184 }, { "epoch": 2.71, "grad_norm": 0.43590182065963745, "learning_rate": 4.962654892752317e-07, "loss": 0.0457, "step": 16185 }, { "epoch": 2.71, "grad_norm": 0.2556169629096985, "learning_rate": 4.957038639625112e-07, "loss": 0.021, "step": 16186 }, { "epoch": 2.71, "grad_norm": 0.5902800559997559, "learning_rate": 4.951425485494166e-07, "loss": 0.042, "step": 16187 }, { "epoch": 2.71, "grad_norm": 0.5213871002197266, "learning_rate": 4.9458154305425e-07, "loss": 0.0348, "step": 16188 }, { "epoch": 2.71, "grad_norm": 0.36685749888420105, "learning_rate": 4.940208474953023e-07, "loss": 0.0254, "step": 16189 }, { "epoch": 2.71, "grad_norm": 0.47905999422073364, "learning_rate": 4.934604618908567e-07, "loss": 0.0376, "step": 16190 }, { "epoch": 2.71, "grad_norm": 0.3544597029685974, "learning_rate": 4.929003862591864e-07, "loss": 0.033, "step": 16191 }, { "epoch": 2.71, "grad_norm": 0.30585867166519165, "learning_rate": 4.923406206185499e-07, "loss": 0.0227, "step": 16192 }, { "epoch": 2.71, "grad_norm": 0.3413200080394745, "learning_rate": 4.917811649872029e-07, "loss": 0.0349, "step": 16193 }, { "epoch": 2.71, "grad_norm": 0.35253241658210754, "learning_rate": 4.91222019383385e-07, "loss": 0.0335, "step": 16194 }, { "epoch": 2.71, "grad_norm": 0.32985273003578186, "learning_rate": 4.906631838253273e-07, "loss": 0.0415, "step": 16195 }, { "epoch": 2.71, "grad_norm": 0.3924436569213867, "learning_rate": 4.901046583312541e-07, "loss": 0.0334, "step": 16196 }, { "epoch": 2.71, "grad_norm": 0.5065590739250183, "learning_rate": 4.895464429193752e-07, "loss": 0.0346, "step": 16197 }, { "epoch": 2.71, "grad_norm": 0.4470362067222595, "learning_rate": 4.889885376078917e-07, "loss": 0.0358, "step": 16198 }, { "epoch": 2.71, "grad_norm": 0.3490317463874817, "learning_rate": 4.884309424149946e-07, "loss": 0.0187, "step": 16199 }, { "epoch": 2.71, "grad_norm": 0.4888622760772705, "learning_rate": 4.878736573588672e-07, "loss": 0.0343, "step": 16200 }, { "epoch": 2.71, "grad_norm": 0.5080068707466125, "learning_rate": 4.873166824576759e-07, "loss": 0.03, "step": 16201 }, { "epoch": 2.71, "grad_norm": 0.3076474368572235, "learning_rate": 4.86760017729585e-07, "loss": 0.0259, "step": 16202 }, { "epoch": 2.71, "grad_norm": 0.325661301612854, "learning_rate": 4.862036631927458e-07, "loss": 0.0393, "step": 16203 }, { "epoch": 2.71, "grad_norm": 0.4012129306793213, "learning_rate": 4.85647618865297e-07, "loss": 0.0251, "step": 16204 }, { "epoch": 2.71, "grad_norm": 0.44390586018562317, "learning_rate": 4.850918847653707e-07, "loss": 0.0319, "step": 16205 }, { "epoch": 2.71, "grad_norm": 0.3716781437397003, "learning_rate": 4.845364609110847e-07, "loss": 0.0259, "step": 16206 }, { "epoch": 2.71, "grad_norm": 0.44040775299072266, "learning_rate": 4.839813473205534e-07, "loss": 0.0484, "step": 16207 }, { "epoch": 2.71, "grad_norm": 0.4322923421859741, "learning_rate": 4.834265440118746e-07, "loss": 0.0482, "step": 16208 }, { "epoch": 2.71, "grad_norm": 0.3961075246334076, "learning_rate": 4.828720510031371e-07, "loss": 0.0209, "step": 16209 }, { "epoch": 2.71, "grad_norm": 0.35393184423446655, "learning_rate": 4.823178683124219e-07, "loss": 0.0257, "step": 16210 }, { "epoch": 2.71, "grad_norm": 0.3438399136066437, "learning_rate": 4.817639959578002e-07, "loss": 0.0224, "step": 16211 }, { "epoch": 2.71, "grad_norm": 0.4823223054409027, "learning_rate": 4.812104339573298e-07, "loss": 0.0433, "step": 16212 }, { "epoch": 2.71, "grad_norm": 0.38224658370018005, "learning_rate": 4.806571823290629e-07, "loss": 0.0332, "step": 16213 }, { "epoch": 2.71, "grad_norm": 0.3376676142215729, "learning_rate": 4.801042410910361e-07, "loss": 0.0234, "step": 16214 }, { "epoch": 2.71, "grad_norm": 0.3889021575450897, "learning_rate": 4.795516102612796e-07, "loss": 0.0369, "step": 16215 }, { "epoch": 2.71, "grad_norm": 0.4090011715888977, "learning_rate": 4.789992898578133e-07, "loss": 0.0277, "step": 16216 }, { "epoch": 2.71, "grad_norm": 0.3849618136882782, "learning_rate": 4.78447279898645e-07, "loss": 0.0367, "step": 16217 }, { "epoch": 2.71, "grad_norm": 0.498920202255249, "learning_rate": 4.778955804017749e-07, "loss": 0.0435, "step": 16218 }, { "epoch": 2.71, "grad_norm": 0.3116241693496704, "learning_rate": 4.773441913851929e-07, "loss": 0.0252, "step": 16219 }, { "epoch": 2.71, "grad_norm": 0.29130983352661133, "learning_rate": 4.7679311286687457e-07, "loss": 0.0197, "step": 16220 }, { "epoch": 2.71, "grad_norm": 0.31489208340644836, "learning_rate": 4.762423448647924e-07, "loss": 0.0251, "step": 16221 }, { "epoch": 2.71, "grad_norm": 0.36328285932540894, "learning_rate": 4.7569188739690186e-07, "loss": 0.0214, "step": 16222 }, { "epoch": 2.71, "grad_norm": 0.34910041093826294, "learning_rate": 4.75141740481152e-07, "loss": 0.0236, "step": 16223 }, { "epoch": 2.71, "grad_norm": 0.4288341999053955, "learning_rate": 4.7459190413548185e-07, "loss": 0.0295, "step": 16224 }, { "epoch": 2.71, "grad_norm": 0.30683407187461853, "learning_rate": 4.7404237837781807e-07, "loss": 0.0263, "step": 16225 }, { "epoch": 2.71, "grad_norm": 0.4574747085571289, "learning_rate": 4.734931632260786e-07, "loss": 0.039, "step": 16226 }, { "epoch": 2.71, "grad_norm": 0.4226122796535492, "learning_rate": 4.7294425869817473e-07, "loss": 0.0284, "step": 16227 }, { "epoch": 2.71, "grad_norm": 0.2798231542110443, "learning_rate": 4.7239566481200206e-07, "loss": 0.0282, "step": 16228 }, { "epoch": 2.71, "grad_norm": 0.49363642930984497, "learning_rate": 4.718473815854452e-07, "loss": 0.0517, "step": 16229 }, { "epoch": 2.71, "grad_norm": 0.3887825906276703, "learning_rate": 4.712994090363865e-07, "loss": 0.0352, "step": 16230 }, { "epoch": 2.71, "grad_norm": 0.35276705026626587, "learning_rate": 4.7075174718269165e-07, "loss": 0.0238, "step": 16231 }, { "epoch": 2.71, "grad_norm": 0.46568813920021057, "learning_rate": 4.7020439604221535e-07, "loss": 0.0462, "step": 16232 }, { "epoch": 2.72, "grad_norm": 0.31982576847076416, "learning_rate": 4.696573556328066e-07, "loss": 0.0295, "step": 16233 }, { "epoch": 2.72, "grad_norm": 0.4277704060077667, "learning_rate": 4.691106259723033e-07, "loss": 0.022, "step": 16234 }, { "epoch": 2.72, "grad_norm": 0.38042405247688293, "learning_rate": 4.6856420707853014e-07, "loss": 0.0297, "step": 16235 }, { "epoch": 2.72, "grad_norm": 0.45703068375587463, "learning_rate": 4.680180989693062e-07, "loss": 0.0335, "step": 16236 }, { "epoch": 2.72, "grad_norm": 0.4269281327724457, "learning_rate": 4.6747230166243606e-07, "loss": 0.0342, "step": 16237 }, { "epoch": 2.72, "grad_norm": 0.33122849464416504, "learning_rate": 4.6692681517571783e-07, "loss": 0.0248, "step": 16238 }, { "epoch": 2.72, "grad_norm": 0.4486284852027893, "learning_rate": 4.6638163952693717e-07, "loss": 0.0471, "step": 16239 }, { "epoch": 2.72, "grad_norm": 0.6693540811538696, "learning_rate": 4.658367747338688e-07, "loss": 0.0359, "step": 16240 }, { "epoch": 2.72, "grad_norm": 0.3563431203365326, "learning_rate": 4.6529222081428074e-07, "loss": 0.0281, "step": 16241 }, { "epoch": 2.72, "grad_norm": 0.5550795793533325, "learning_rate": 4.647479777859265e-07, "loss": 0.03, "step": 16242 }, { "epoch": 2.72, "grad_norm": 0.2677777409553528, "learning_rate": 4.642040456665531e-07, "loss": 0.0211, "step": 16243 }, { "epoch": 2.72, "grad_norm": 0.38090261816978455, "learning_rate": 4.636604244738985e-07, "loss": 0.0253, "step": 16244 }, { "epoch": 2.72, "grad_norm": 0.34885355830192566, "learning_rate": 4.6311711422568515e-07, "loss": 0.0357, "step": 16245 }, { "epoch": 2.72, "grad_norm": 0.5141147375106812, "learning_rate": 4.625741149396279e-07, "loss": 0.0418, "step": 16246 }, { "epoch": 2.72, "grad_norm": 0.4248979985713959, "learning_rate": 4.620314266334347e-07, "loss": 0.0384, "step": 16247 }, { "epoch": 2.72, "grad_norm": 0.30828195810317993, "learning_rate": 4.614890493247981e-07, "loss": 0.0392, "step": 16248 }, { "epoch": 2.72, "grad_norm": 0.5638342499732971, "learning_rate": 4.6094698303140504e-07, "loss": 0.0408, "step": 16249 }, { "epoch": 2.72, "grad_norm": 0.5635710954666138, "learning_rate": 4.604052277709281e-07, "loss": 0.0492, "step": 16250 }, { "epoch": 2.72, "grad_norm": 0.5177212953567505, "learning_rate": 4.59863783561032e-07, "loss": 0.0258, "step": 16251 }, { "epoch": 2.72, "grad_norm": 0.29283407330513, "learning_rate": 4.5932265041937484e-07, "loss": 0.0246, "step": 16252 }, { "epoch": 2.72, "grad_norm": 0.5489526987075806, "learning_rate": 4.587818283635981e-07, "loss": 0.043, "step": 16253 }, { "epoch": 2.72, "grad_norm": 0.2572725713253021, "learning_rate": 4.5824131741133425e-07, "loss": 0.021, "step": 16254 }, { "epoch": 2.72, "grad_norm": 0.4267481863498688, "learning_rate": 4.5770111758021154e-07, "loss": 0.0389, "step": 16255 }, { "epoch": 2.72, "grad_norm": 0.6423265933990479, "learning_rate": 4.571612288878413e-07, "loss": 0.0542, "step": 16256 }, { "epoch": 2.72, "grad_norm": 0.3496514856815338, "learning_rate": 4.5662165135182623e-07, "loss": 0.0377, "step": 16257 }, { "epoch": 2.72, "grad_norm": 0.37930819392204285, "learning_rate": 4.560823849897611e-07, "loss": 0.0302, "step": 16258 }, { "epoch": 2.72, "grad_norm": 0.3166276216506958, "learning_rate": 4.5554342981923186e-07, "loss": 0.0202, "step": 16259 }, { "epoch": 2.72, "grad_norm": 0.44420284032821655, "learning_rate": 4.550047858578077e-07, "loss": 0.0254, "step": 16260 }, { "epoch": 2.72, "grad_norm": 0.30367612838745117, "learning_rate": 4.544664531230547e-07, "loss": 0.0226, "step": 16261 }, { "epoch": 2.72, "grad_norm": 0.39858096837997437, "learning_rate": 4.539284316325232e-07, "loss": 0.0273, "step": 16262 }, { "epoch": 2.72, "grad_norm": 0.43445512652397156, "learning_rate": 4.5339072140376026e-07, "loss": 0.0287, "step": 16263 }, { "epoch": 2.72, "grad_norm": 0.321811705827713, "learning_rate": 4.5285332245429523e-07, "loss": 0.0366, "step": 16264 }, { "epoch": 2.72, "grad_norm": 0.6585462689399719, "learning_rate": 4.523162348016508e-07, "loss": 0.0449, "step": 16265 }, { "epoch": 2.72, "grad_norm": 0.3530430197715759, "learning_rate": 4.517794584633395e-07, "loss": 0.0346, "step": 16266 }, { "epoch": 2.72, "grad_norm": 0.365492045879364, "learning_rate": 4.512429934568652e-07, "loss": 0.0249, "step": 16267 }, { "epoch": 2.72, "grad_norm": 0.4859154224395752, "learning_rate": 4.507068397997183e-07, "loss": 0.032, "step": 16268 }, { "epoch": 2.72, "grad_norm": 0.3865460753440857, "learning_rate": 4.5017099750938267e-07, "loss": 0.0299, "step": 16269 }, { "epoch": 2.72, "grad_norm": 0.4143534004688263, "learning_rate": 4.496354666033298e-07, "loss": 0.0392, "step": 16270 }, { "epoch": 2.72, "grad_norm": 0.3297938108444214, "learning_rate": 4.49100247099018e-07, "loss": 0.029, "step": 16271 }, { "epoch": 2.72, "grad_norm": 0.38442885875701904, "learning_rate": 4.4856533901390334e-07, "loss": 0.0303, "step": 16272 }, { "epoch": 2.72, "grad_norm": 0.45411261916160583, "learning_rate": 4.48030742365424e-07, "loss": 0.0237, "step": 16273 }, { "epoch": 2.72, "grad_norm": 0.4413766860961914, "learning_rate": 4.4749645717101163e-07, "loss": 0.0382, "step": 16274 }, { "epoch": 2.72, "grad_norm": 0.42248329520225525, "learning_rate": 4.469624834480901e-07, "loss": 0.0423, "step": 16275 }, { "epoch": 2.72, "grad_norm": 0.37660473585128784, "learning_rate": 4.464288212140655e-07, "loss": 0.0284, "step": 16276 }, { "epoch": 2.72, "grad_norm": 0.4030592143535614, "learning_rate": 4.4589547048634384e-07, "loss": 0.0257, "step": 16277 }, { "epoch": 2.72, "grad_norm": 0.33788567781448364, "learning_rate": 4.4536243128231237e-07, "loss": 0.0287, "step": 16278 }, { "epoch": 2.72, "grad_norm": 0.47182705998420715, "learning_rate": 4.4482970361935163e-07, "loss": 0.0318, "step": 16279 }, { "epoch": 2.72, "grad_norm": 0.34486088156700134, "learning_rate": 4.442972875148333e-07, "loss": 0.0317, "step": 16280 }, { "epoch": 2.72, "grad_norm": 0.3106572926044464, "learning_rate": 4.437651829861156e-07, "loss": 0.0246, "step": 16281 }, { "epoch": 2.72, "grad_norm": 0.39487403631210327, "learning_rate": 4.4323339005054923e-07, "loss": 0.0363, "step": 16282 }, { "epoch": 2.72, "grad_norm": 0.37407171726226807, "learning_rate": 4.427019087254758e-07, "loss": 0.0314, "step": 16283 }, { "epoch": 2.72, "grad_norm": 0.4238935112953186, "learning_rate": 4.421707390282226e-07, "loss": 0.0256, "step": 16284 }, { "epoch": 2.72, "grad_norm": 0.4623579680919647, "learning_rate": 4.416398809761091e-07, "loss": 0.0335, "step": 16285 }, { "epoch": 2.72, "grad_norm": 0.5209847092628479, "learning_rate": 4.41109334586447e-07, "loss": 0.0287, "step": 16286 }, { "epoch": 2.72, "grad_norm": 0.3975418210029602, "learning_rate": 4.405790998765336e-07, "loss": 0.0291, "step": 16287 }, { "epoch": 2.72, "grad_norm": 0.4117403030395508, "learning_rate": 4.4004917686365724e-07, "loss": 0.0404, "step": 16288 }, { "epoch": 2.72, "grad_norm": 0.23740831017494202, "learning_rate": 4.395195655650986e-07, "loss": 0.0147, "step": 16289 }, { "epoch": 2.72, "grad_norm": 0.25090456008911133, "learning_rate": 4.3899026599812377e-07, "loss": 0.0177, "step": 16290 }, { "epoch": 2.72, "grad_norm": 0.3709392547607422, "learning_rate": 4.384612781799935e-07, "loss": 0.0236, "step": 16291 }, { "epoch": 2.72, "grad_norm": 0.3747105598449707, "learning_rate": 4.379326021279562e-07, "loss": 0.0336, "step": 16292 }, { "epoch": 2.73, "grad_norm": 0.37611255049705505, "learning_rate": 4.37404237859248e-07, "loss": 0.0203, "step": 16293 }, { "epoch": 2.73, "grad_norm": 0.4727378189563751, "learning_rate": 4.368761853910997e-07, "loss": 0.0276, "step": 16294 }, { "epoch": 2.73, "grad_norm": 0.4094296097755432, "learning_rate": 4.3634844474072733e-07, "loss": 0.0272, "step": 16295 }, { "epoch": 2.73, "grad_norm": 0.4250635504722595, "learning_rate": 4.358210159253373e-07, "loss": 0.0362, "step": 16296 }, { "epoch": 2.73, "grad_norm": 0.33051174879074097, "learning_rate": 4.3529389896212914e-07, "loss": 0.0232, "step": 16297 }, { "epoch": 2.73, "grad_norm": 0.513822078704834, "learning_rate": 4.347670938682891e-07, "loss": 0.0386, "step": 16298 }, { "epoch": 2.73, "grad_norm": 0.28482088446617126, "learning_rate": 4.342406006609956e-07, "loss": 0.0273, "step": 16299 }, { "epoch": 2.73, "grad_norm": 0.486605167388916, "learning_rate": 4.33714419357415e-07, "loss": 0.0364, "step": 16300 }, { "epoch": 2.73, "grad_norm": 0.3069838881492615, "learning_rate": 4.331885499747035e-07, "loss": 0.0296, "step": 16301 }, { "epoch": 2.73, "grad_norm": 0.5393485426902771, "learning_rate": 4.326629925300074e-07, "loss": 0.03, "step": 16302 }, { "epoch": 2.73, "grad_norm": 0.4337485730648041, "learning_rate": 4.321377470404664e-07, "loss": 0.0225, "step": 16303 }, { "epoch": 2.73, "grad_norm": 0.3706757426261902, "learning_rate": 4.3161281352320116e-07, "loss": 0.0283, "step": 16304 }, { "epoch": 2.73, "grad_norm": 0.4052448868751526, "learning_rate": 4.3108819199533357e-07, "loss": 0.0363, "step": 16305 }, { "epoch": 2.73, "grad_norm": 0.40311679244041443, "learning_rate": 4.305638824739655e-07, "loss": 0.021, "step": 16306 }, { "epoch": 2.73, "grad_norm": 0.44771409034729004, "learning_rate": 4.300398849761933e-07, "loss": 0.0399, "step": 16307 }, { "epoch": 2.73, "grad_norm": 0.33517852425575256, "learning_rate": 4.295161995191055e-07, "loss": 0.0276, "step": 16308 }, { "epoch": 2.73, "grad_norm": 0.4723924696445465, "learning_rate": 4.2899282611977513e-07, "loss": 0.038, "step": 16309 }, { "epoch": 2.73, "grad_norm": 0.4254777729511261, "learning_rate": 4.2846976479526734e-07, "loss": 0.0496, "step": 16310 }, { "epoch": 2.73, "grad_norm": 0.41199421882629395, "learning_rate": 4.279470155626375e-07, "loss": 0.0309, "step": 16311 }, { "epoch": 2.73, "grad_norm": 0.3950190842151642, "learning_rate": 4.274245784389319e-07, "loss": 0.017, "step": 16312 }, { "epoch": 2.73, "grad_norm": 0.4160279631614685, "learning_rate": 4.269024534411825e-07, "loss": 0.0305, "step": 16313 }, { "epoch": 2.73, "grad_norm": 0.45882448554039, "learning_rate": 4.263806405864157e-07, "loss": 0.0316, "step": 16314 }, { "epoch": 2.73, "grad_norm": 0.42726099491119385, "learning_rate": 4.2585913989164673e-07, "loss": 0.031, "step": 16315 }, { "epoch": 2.73, "grad_norm": 0.49094873666763306, "learning_rate": 4.253379513738765e-07, "loss": 0.0343, "step": 16316 }, { "epoch": 2.73, "grad_norm": 0.2766798138618469, "learning_rate": 4.248170750501035e-07, "loss": 0.035, "step": 16317 }, { "epoch": 2.73, "grad_norm": 0.3246956467628479, "learning_rate": 4.242965109373076e-07, "loss": 0.0213, "step": 16318 }, { "epoch": 2.73, "grad_norm": 0.4237457811832428, "learning_rate": 4.237762590524652e-07, "loss": 0.0216, "step": 16319 }, { "epoch": 2.73, "grad_norm": 0.5671192407608032, "learning_rate": 4.232563194125383e-07, "loss": 0.0402, "step": 16320 }, { "epoch": 2.73, "grad_norm": 0.3537232279777527, "learning_rate": 4.2273669203447996e-07, "loss": 0.0374, "step": 16321 }, { "epoch": 2.73, "grad_norm": 0.521166980266571, "learning_rate": 4.2221737693523337e-07, "loss": 0.0582, "step": 16322 }, { "epoch": 2.73, "grad_norm": 0.43110406398773193, "learning_rate": 4.2169837413173374e-07, "loss": 0.0332, "step": 16323 }, { "epoch": 2.73, "grad_norm": 0.3995318114757538, "learning_rate": 4.211796836408999e-07, "loss": 0.0451, "step": 16324 }, { "epoch": 2.73, "grad_norm": 0.35704270005226135, "learning_rate": 4.206613054796493e-07, "loss": 0.0323, "step": 16325 }, { "epoch": 2.73, "grad_norm": 0.3010474443435669, "learning_rate": 4.201432396648797e-07, "loss": 0.0264, "step": 16326 }, { "epoch": 2.73, "grad_norm": 0.30372154712677, "learning_rate": 4.196254862134852e-07, "loss": 0.0365, "step": 16327 }, { "epoch": 2.73, "grad_norm": 0.5032132863998413, "learning_rate": 4.19108045142349e-07, "loss": 0.0246, "step": 16328 }, { "epoch": 2.73, "grad_norm": 0.45527854561805725, "learning_rate": 4.1859091646833995e-07, "loss": 0.0408, "step": 16329 }, { "epoch": 2.73, "grad_norm": 0.3221859633922577, "learning_rate": 4.1807410020832106e-07, "loss": 0.0228, "step": 16330 }, { "epoch": 2.73, "grad_norm": 0.31346291303634644, "learning_rate": 4.175575963791456e-07, "loss": 0.0239, "step": 16331 }, { "epoch": 2.73, "grad_norm": 0.41240522265434265, "learning_rate": 4.1704140499765234e-07, "loss": 0.0331, "step": 16332 }, { "epoch": 2.73, "grad_norm": 0.4445585012435913, "learning_rate": 4.1652552608067444e-07, "loss": 0.0419, "step": 16333 }, { "epoch": 2.73, "grad_norm": 0.35824769735336304, "learning_rate": 4.160099596450318e-07, "loss": 0.033, "step": 16334 }, { "epoch": 2.73, "grad_norm": 0.4763824939727783, "learning_rate": 4.1549470570753424e-07, "loss": 0.038, "step": 16335 }, { "epoch": 2.73, "grad_norm": 0.4566405117511749, "learning_rate": 4.14979764284984e-07, "loss": 0.0283, "step": 16336 }, { "epoch": 2.73, "grad_norm": 0.3564503490924835, "learning_rate": 4.1446513539416865e-07, "loss": 0.0199, "step": 16337 }, { "epoch": 2.73, "grad_norm": 0.34603509306907654, "learning_rate": 4.1395081905187265e-07, "loss": 0.0353, "step": 16338 }, { "epoch": 2.73, "grad_norm": 0.30392855405807495, "learning_rate": 4.1343681527486136e-07, "loss": 0.0197, "step": 16339 }, { "epoch": 2.73, "grad_norm": 0.48715344071388245, "learning_rate": 4.12923124079897e-07, "loss": 0.0362, "step": 16340 }, { "epoch": 2.73, "grad_norm": 0.3946059048175812, "learning_rate": 4.1240974548372834e-07, "loss": 0.036, "step": 16341 }, { "epoch": 2.73, "grad_norm": 0.3633868098258972, "learning_rate": 4.1189667950309654e-07, "loss": 0.0293, "step": 16342 }, { "epoch": 2.73, "grad_norm": 0.44490689039230347, "learning_rate": 4.113839261547292e-07, "loss": 0.0369, "step": 16343 }, { "epoch": 2.73, "grad_norm": 0.5102862119674683, "learning_rate": 4.1087148545534417e-07, "loss": 0.0299, "step": 16344 }, { "epoch": 2.73, "grad_norm": 0.3048441410064697, "learning_rate": 4.1035935742165245e-07, "loss": 0.0258, "step": 16345 }, { "epoch": 2.73, "grad_norm": 0.4268339276313782, "learning_rate": 4.098475420703507e-07, "loss": 0.0192, "step": 16346 }, { "epoch": 2.73, "grad_norm": 0.28704673051834106, "learning_rate": 4.093360394181278e-07, "loss": 0.0211, "step": 16347 }, { "epoch": 2.73, "grad_norm": 0.31475746631622314, "learning_rate": 4.088248494816649e-07, "loss": 0.0226, "step": 16348 }, { "epoch": 2.73, "grad_norm": 0.35490429401397705, "learning_rate": 4.0831397227762526e-07, "loss": 0.0315, "step": 16349 }, { "epoch": 2.73, "grad_norm": 0.38765189051628113, "learning_rate": 4.0780340782267e-07, "loss": 0.0242, "step": 16350 }, { "epoch": 2.73, "grad_norm": 0.28040406107902527, "learning_rate": 4.072931561334459e-07, "loss": 0.0306, "step": 16351 }, { "epoch": 2.73, "grad_norm": 0.39989545941352844, "learning_rate": 4.0678321722658954e-07, "loss": 0.0313, "step": 16352 }, { "epoch": 2.74, "grad_norm": 0.39432117342948914, "learning_rate": 4.0627359111872877e-07, "loss": 0.0254, "step": 16353 }, { "epoch": 2.74, "grad_norm": 0.46598029136657715, "learning_rate": 4.0576427782648033e-07, "loss": 0.0289, "step": 16354 }, { "epoch": 2.74, "grad_norm": 0.2916600704193115, "learning_rate": 4.052552773664509e-07, "loss": 0.016, "step": 16355 }, { "epoch": 2.74, "grad_norm": 0.3886891007423401, "learning_rate": 4.047465897552383e-07, "loss": 0.0262, "step": 16356 }, { "epoch": 2.74, "grad_norm": 0.5245652794837952, "learning_rate": 4.042382150094282e-07, "loss": 0.0305, "step": 16357 }, { "epoch": 2.74, "grad_norm": 0.40777990221977234, "learning_rate": 4.0373015314559507e-07, "loss": 0.0337, "step": 16358 }, { "epoch": 2.74, "grad_norm": 0.40127307176589966, "learning_rate": 4.0322240418030787e-07, "loss": 0.0327, "step": 16359 }, { "epoch": 2.74, "grad_norm": 0.44327273964881897, "learning_rate": 4.027149681301201e-07, "loss": 0.0304, "step": 16360 }, { "epoch": 2.74, "grad_norm": 0.31963080167770386, "learning_rate": 4.022078450115796e-07, "loss": 0.0252, "step": 16361 }, { "epoch": 2.74, "grad_norm": 0.3896486163139343, "learning_rate": 4.017010348412187e-07, "loss": 0.0316, "step": 16362 }, { "epoch": 2.74, "grad_norm": 0.3975392282009125, "learning_rate": 4.0119453763556413e-07, "loss": 0.0324, "step": 16363 }, { "epoch": 2.74, "grad_norm": 0.3914758563041687, "learning_rate": 4.006883534111328e-07, "loss": 0.0242, "step": 16364 }, { "epoch": 2.74, "grad_norm": 0.28047141432762146, "learning_rate": 4.00182482184428e-07, "loss": 0.0295, "step": 16365 }, { "epoch": 2.74, "grad_norm": 0.37846341729164124, "learning_rate": 3.9967692397194225e-07, "loss": 0.0241, "step": 16366 }, { "epoch": 2.74, "grad_norm": 0.4434555470943451, "learning_rate": 3.991716787901634e-07, "loss": 0.0316, "step": 16367 }, { "epoch": 2.74, "grad_norm": 0.4018155634403229, "learning_rate": 3.986667466555638e-07, "loss": 0.0231, "step": 16368 }, { "epoch": 2.74, "grad_norm": 0.37415751814842224, "learning_rate": 3.9816212758460594e-07, "loss": 0.0264, "step": 16369 }, { "epoch": 2.74, "grad_norm": 0.3718344569206238, "learning_rate": 3.9765782159374544e-07, "loss": 0.0271, "step": 16370 }, { "epoch": 2.74, "grad_norm": 0.42179954051971436, "learning_rate": 3.9715382869942586e-07, "loss": 0.031, "step": 16371 }, { "epoch": 2.74, "grad_norm": 0.4823681712150574, "learning_rate": 3.9665014891808075e-07, "loss": 0.0314, "step": 16372 }, { "epoch": 2.74, "grad_norm": 0.3590700030326843, "learning_rate": 3.9614678226613244e-07, "loss": 0.0303, "step": 16373 }, { "epoch": 2.74, "grad_norm": 0.2765328288078308, "learning_rate": 3.9564372875999347e-07, "loss": 0.0209, "step": 16374 }, { "epoch": 2.74, "grad_norm": 0.3693596124649048, "learning_rate": 3.951409884160684e-07, "loss": 0.0188, "step": 16375 }, { "epoch": 2.74, "grad_norm": 0.5624860525131226, "learning_rate": 3.946385612507486e-07, "loss": 0.0345, "step": 16376 }, { "epoch": 2.74, "grad_norm": 0.5318848490715027, "learning_rate": 3.941364472804143e-07, "loss": 0.0388, "step": 16377 }, { "epoch": 2.74, "grad_norm": 0.3908933997154236, "learning_rate": 3.9363464652144024e-07, "loss": 0.0325, "step": 16378 }, { "epoch": 2.74, "grad_norm": 0.32873132824897766, "learning_rate": 3.9313315899018876e-07, "loss": 0.0196, "step": 16379 }, { "epoch": 2.74, "grad_norm": 0.2943274974822998, "learning_rate": 3.92631984703008e-07, "loss": 0.029, "step": 16380 }, { "epoch": 2.74, "grad_norm": 0.40724149346351624, "learning_rate": 3.9213112367624374e-07, "loss": 0.0377, "step": 16381 }, { "epoch": 2.74, "grad_norm": 0.3060458302497864, "learning_rate": 3.916305759262251e-07, "loss": 0.0309, "step": 16382 }, { "epoch": 2.74, "grad_norm": 0.3556514382362366, "learning_rate": 3.911303414692724e-07, "loss": 0.0369, "step": 16383 }, { "epoch": 2.74, "grad_norm": 0.45925965905189514, "learning_rate": 3.9063042032169706e-07, "loss": 0.0365, "step": 16384 }, { "epoch": 2.74, "grad_norm": 0.4021439552307129, "learning_rate": 3.9013081249979936e-07, "loss": 0.0255, "step": 16385 }, { "epoch": 2.74, "grad_norm": 0.3410540223121643, "learning_rate": 3.896315180198695e-07, "loss": 0.0417, "step": 16386 }, { "epoch": 2.74, "grad_norm": 0.4469206929206848, "learning_rate": 3.8913253689818906e-07, "loss": 0.0349, "step": 16387 }, { "epoch": 2.74, "grad_norm": 0.32727548480033875, "learning_rate": 3.886338691510261e-07, "loss": 0.0215, "step": 16388 }, { "epoch": 2.74, "grad_norm": 0.48305028676986694, "learning_rate": 3.8813551479464196e-07, "loss": 0.031, "step": 16389 }, { "epoch": 2.74, "grad_norm": 0.4683888256549835, "learning_rate": 3.8763747384528593e-07, "loss": 0.0346, "step": 16390 }, { "epoch": 2.74, "grad_norm": 0.3426143229007721, "learning_rate": 3.8713974631919504e-07, "loss": 0.0225, "step": 16391 }, { "epoch": 2.74, "grad_norm": 0.3910966217517853, "learning_rate": 3.866423322326007e-07, "loss": 0.0412, "step": 16392 }, { "epoch": 2.74, "grad_norm": 0.32815173268318176, "learning_rate": 3.861452316017211e-07, "loss": 0.0258, "step": 16393 }, { "epoch": 2.74, "grad_norm": 0.36867064237594604, "learning_rate": 3.856484444427644e-07, "loss": 0.0305, "step": 16394 }, { "epoch": 2.74, "grad_norm": 0.411596417427063, "learning_rate": 3.851519707719287e-07, "loss": 0.0352, "step": 16395 }, { "epoch": 2.74, "grad_norm": 0.41233789920806885, "learning_rate": 3.8465581060540436e-07, "loss": 0.0341, "step": 16396 }, { "epoch": 2.74, "grad_norm": 0.4620765149593353, "learning_rate": 3.841599639593652e-07, "loss": 0.0376, "step": 16397 }, { "epoch": 2.74, "grad_norm": 0.4586770236492157, "learning_rate": 3.836644308499837e-07, "loss": 0.0245, "step": 16398 }, { "epoch": 2.74, "grad_norm": 0.3364477753639221, "learning_rate": 3.8316921129341487e-07, "loss": 0.0227, "step": 16399 }, { "epoch": 2.74, "grad_norm": 0.5287188291549683, "learning_rate": 3.8267430530580465e-07, "loss": 0.0464, "step": 16400 }, { "epoch": 2.74, "grad_norm": 0.32857227325439453, "learning_rate": 3.8217971290329336e-07, "loss": 0.0243, "step": 16401 }, { "epoch": 2.74, "grad_norm": 0.26267579197883606, "learning_rate": 3.816854341020049e-07, "loss": 0.0176, "step": 16402 }, { "epoch": 2.74, "grad_norm": 0.41268298029899597, "learning_rate": 3.8119146891805626e-07, "loss": 0.0299, "step": 16403 }, { "epoch": 2.74, "grad_norm": 0.5232517719268799, "learning_rate": 3.8069781736755574e-07, "loss": 0.032, "step": 16404 }, { "epoch": 2.74, "grad_norm": 0.39868828654289246, "learning_rate": 3.802044794665971e-07, "loss": 0.0408, "step": 16405 }, { "epoch": 2.74, "grad_norm": 0.4473850727081299, "learning_rate": 3.7971145523126973e-07, "loss": 0.0383, "step": 16406 }, { "epoch": 2.74, "grad_norm": 0.42653635144233704, "learning_rate": 3.792187446776463e-07, "loss": 0.0312, "step": 16407 }, { "epoch": 2.74, "grad_norm": 0.44216904044151306, "learning_rate": 3.787263478217917e-07, "loss": 0.0339, "step": 16408 }, { "epoch": 2.74, "grad_norm": 0.4083125591278076, "learning_rate": 3.782342646797632e-07, "loss": 0.0269, "step": 16409 }, { "epoch": 2.74, "grad_norm": 0.49413320422172546, "learning_rate": 3.7774249526760454e-07, "loss": 0.0418, "step": 16410 }, { "epoch": 2.74, "grad_norm": 0.3270992636680603, "learning_rate": 3.772510396013496e-07, "loss": 0.02, "step": 16411 }, { "epoch": 2.74, "grad_norm": 0.3332764804363251, "learning_rate": 3.767598976970266e-07, "loss": 0.0287, "step": 16412 }, { "epoch": 2.75, "grad_norm": 0.342008501291275, "learning_rate": 3.7626906957064613e-07, "loss": 0.0227, "step": 16413 }, { "epoch": 2.75, "grad_norm": 0.5141952633857727, "learning_rate": 3.7577855523821536e-07, "loss": 0.0402, "step": 16414 }, { "epoch": 2.75, "grad_norm": 0.4450388252735138, "learning_rate": 3.75288354715726e-07, "loss": 0.0357, "step": 16415 }, { "epoch": 2.75, "grad_norm": 0.36388227343559265, "learning_rate": 3.747984680191608e-07, "loss": 0.0314, "step": 16416 }, { "epoch": 2.75, "grad_norm": 0.3078685700893402, "learning_rate": 3.7430889516449574e-07, "loss": 0.0211, "step": 16417 }, { "epoch": 2.75, "grad_norm": 0.4483046233654022, "learning_rate": 3.7381963616769045e-07, "loss": 0.0356, "step": 16418 }, { "epoch": 2.75, "grad_norm": 0.3999475836753845, "learning_rate": 3.73330691044701e-07, "loss": 0.023, "step": 16419 }, { "epoch": 2.75, "grad_norm": 0.30256223678588867, "learning_rate": 3.7284205981147016e-07, "loss": 0.0222, "step": 16420 }, { "epoch": 2.75, "grad_norm": 0.41294652223587036, "learning_rate": 3.7235374248392966e-07, "loss": 0.0423, "step": 16421 }, { "epoch": 2.75, "grad_norm": 0.46883174777030945, "learning_rate": 3.7186573907800004e-07, "loss": 0.0264, "step": 16422 }, { "epoch": 2.75, "grad_norm": 0.5257080793380737, "learning_rate": 3.713780496095953e-07, "loss": 0.0527, "step": 16423 }, { "epoch": 2.75, "grad_norm": 0.3383050262928009, "learning_rate": 3.708906740946161e-07, "loss": 0.034, "step": 16424 }, { "epoch": 2.75, "grad_norm": 0.39744600653648376, "learning_rate": 3.7040361254895407e-07, "loss": 0.0301, "step": 16425 }, { "epoch": 2.75, "grad_norm": 0.3500799834728241, "learning_rate": 3.699168649884899e-07, "loss": 0.0287, "step": 16426 }, { "epoch": 2.75, "grad_norm": 0.46379533410072327, "learning_rate": 3.694304314290964e-07, "loss": 0.0327, "step": 16427 }, { "epoch": 2.75, "grad_norm": 0.42856574058532715, "learning_rate": 3.6894431188663316e-07, "loss": 0.0416, "step": 16428 }, { "epoch": 2.75, "grad_norm": 0.30631232261657715, "learning_rate": 3.684585063769519e-07, "loss": 0.0419, "step": 16429 }, { "epoch": 2.75, "grad_norm": 0.4548218250274658, "learning_rate": 3.679730149158911e-07, "loss": 0.0504, "step": 16430 }, { "epoch": 2.75, "grad_norm": 0.4032362103462219, "learning_rate": 3.674878375192814e-07, "loss": 0.0312, "step": 16431 }, { "epoch": 2.75, "grad_norm": 0.6074825525283813, "learning_rate": 3.6700297420294463e-07, "loss": 0.0336, "step": 16432 }, { "epoch": 2.75, "grad_norm": 0.4224075973033905, "learning_rate": 3.665184249826859e-07, "loss": 0.0328, "step": 16433 }, { "epoch": 2.75, "grad_norm": 0.35064131021499634, "learning_rate": 3.6603418987430805e-07, "loss": 0.038, "step": 16434 }, { "epoch": 2.75, "grad_norm": 0.4552290439605713, "learning_rate": 3.655502688936008e-07, "loss": 0.0363, "step": 16435 }, { "epoch": 2.75, "grad_norm": 0.32665199041366577, "learning_rate": 3.6506666205634033e-07, "loss": 0.025, "step": 16436 }, { "epoch": 2.75, "grad_norm": 0.35715731978416443, "learning_rate": 3.645833693782974e-07, "loss": 0.031, "step": 16437 }, { "epoch": 2.75, "grad_norm": 0.27988678216934204, "learning_rate": 3.6410039087522943e-07, "loss": 0.0247, "step": 16438 }, { "epoch": 2.75, "grad_norm": 0.317690908908844, "learning_rate": 3.6361772656288376e-07, "loss": 0.0249, "step": 16439 }, { "epoch": 2.75, "grad_norm": 0.4046578109264374, "learning_rate": 3.63135376457e-07, "loss": 0.0396, "step": 16440 }, { "epoch": 2.75, "grad_norm": 0.46474507451057434, "learning_rate": 3.626533405733046e-07, "loss": 0.0301, "step": 16441 }, { "epoch": 2.75, "grad_norm": 0.3306955397129059, "learning_rate": 3.6217161892751374e-07, "loss": 0.0265, "step": 16442 }, { "epoch": 2.75, "grad_norm": 0.3569882810115814, "learning_rate": 3.6169021153533936e-07, "loss": 0.0397, "step": 16443 }, { "epoch": 2.75, "grad_norm": 0.5339707732200623, "learning_rate": 3.612091184124722e-07, "loss": 0.0456, "step": 16444 }, { "epoch": 2.75, "grad_norm": 0.370776504278183, "learning_rate": 3.6072833957460416e-07, "loss": 0.0201, "step": 16445 }, { "epoch": 2.75, "grad_norm": 0.5042348504066467, "learning_rate": 3.6024787503740834e-07, "loss": 0.044, "step": 16446 }, { "epoch": 2.75, "grad_norm": 0.39931607246398926, "learning_rate": 3.59767724816551e-07, "loss": 0.0291, "step": 16447 }, { "epoch": 2.75, "grad_norm": 0.43546828627586365, "learning_rate": 3.592878889276907e-07, "loss": 0.0319, "step": 16448 }, { "epoch": 2.75, "grad_norm": 0.4258362054824829, "learning_rate": 3.588083673864706e-07, "loss": 0.0276, "step": 16449 }, { "epoch": 2.75, "grad_norm": 0.36635082960128784, "learning_rate": 3.5832916020852594e-07, "loss": 0.0351, "step": 16450 }, { "epoch": 2.75, "grad_norm": 0.474918931722641, "learning_rate": 3.5785026740948194e-07, "loss": 0.0427, "step": 16451 }, { "epoch": 2.75, "grad_norm": 0.4296399652957916, "learning_rate": 3.573716890049561e-07, "loss": 0.0282, "step": 16452 }, { "epoch": 2.75, "grad_norm": 0.3739992082118988, "learning_rate": 3.568934250105505e-07, "loss": 0.0319, "step": 16453 }, { "epoch": 2.75, "grad_norm": 0.4923642873764038, "learning_rate": 3.564154754418614e-07, "loss": 0.0426, "step": 16454 }, { "epoch": 2.75, "grad_norm": 0.8635793924331665, "learning_rate": 3.559378403144709e-07, "loss": 0.0225, "step": 16455 }, { "epoch": 2.75, "grad_norm": 0.5187003016471863, "learning_rate": 3.554605196439542e-07, "loss": 0.0373, "step": 16456 }, { "epoch": 2.75, "grad_norm": 0.40583688020706177, "learning_rate": 3.5498351344587566e-07, "loss": 0.0375, "step": 16457 }, { "epoch": 2.75, "grad_norm": 0.41795825958251953, "learning_rate": 3.545068217357861e-07, "loss": 0.0254, "step": 16458 }, { "epoch": 2.75, "grad_norm": 0.32049596309661865, "learning_rate": 3.5403044452922976e-07, "loss": 0.0277, "step": 16459 }, { "epoch": 2.75, "grad_norm": 0.35608071088790894, "learning_rate": 3.5355438184174195e-07, "loss": 0.0341, "step": 16460 }, { "epoch": 2.75, "grad_norm": 0.30009642243385315, "learning_rate": 3.5307863368884144e-07, "loss": 0.0297, "step": 16461 }, { "epoch": 2.75, "grad_norm": 0.3124745190143585, "learning_rate": 3.5260320008604355e-07, "loss": 0.0279, "step": 16462 }, { "epoch": 2.75, "grad_norm": 0.33523136377334595, "learning_rate": 3.5212808104884923e-07, "loss": 0.0295, "step": 16463 }, { "epoch": 2.75, "grad_norm": 0.40345075726509094, "learning_rate": 3.516532765927505e-07, "loss": 0.0399, "step": 16464 }, { "epoch": 2.75, "grad_norm": 0.5135434865951538, "learning_rate": 3.5117878673322834e-07, "loss": 0.0506, "step": 16465 }, { "epoch": 2.75, "grad_norm": 0.39587849378585815, "learning_rate": 3.5070461148575485e-07, "loss": 0.0316, "step": 16466 }, { "epoch": 2.75, "grad_norm": 0.3453415334224701, "learning_rate": 3.5023075086578986e-07, "loss": 0.0371, "step": 16467 }, { "epoch": 2.75, "grad_norm": 0.34390124678611755, "learning_rate": 3.497572048887876e-07, "loss": 0.0278, "step": 16468 }, { "epoch": 2.75, "grad_norm": 0.3008449673652649, "learning_rate": 3.4928397357018476e-07, "loss": 0.0154, "step": 16469 }, { "epoch": 2.75, "grad_norm": 0.5966792702674866, "learning_rate": 3.488110569254133e-07, "loss": 0.0357, "step": 16470 }, { "epoch": 2.75, "grad_norm": 0.32535579800605774, "learning_rate": 3.483384549698943e-07, "loss": 0.0172, "step": 16471 }, { "epoch": 2.75, "grad_norm": 0.33440864086151123, "learning_rate": 3.478661677190343e-07, "loss": 0.0278, "step": 16472 }, { "epoch": 2.76, "grad_norm": 0.3519715666770935, "learning_rate": 3.4739419518823646e-07, "loss": 0.0199, "step": 16473 }, { "epoch": 2.76, "grad_norm": 0.5896803736686707, "learning_rate": 3.4692253739288753e-07, "loss": 0.0284, "step": 16474 }, { "epoch": 2.76, "grad_norm": 0.34986168146133423, "learning_rate": 3.4645119434836837e-07, "loss": 0.0235, "step": 16475 }, { "epoch": 2.76, "grad_norm": 0.4791768193244934, "learning_rate": 3.459801660700468e-07, "loss": 0.0286, "step": 16476 }, { "epoch": 2.76, "grad_norm": 0.4422743320465088, "learning_rate": 3.455094525732816e-07, "loss": 0.0301, "step": 16477 }, { "epoch": 2.76, "grad_norm": 0.3924882411956787, "learning_rate": 3.4503905387341943e-07, "loss": 0.0203, "step": 16478 }, { "epoch": 2.76, "grad_norm": 0.41897109150886536, "learning_rate": 3.445689699858001e-07, "loss": 0.0227, "step": 16479 }, { "epoch": 2.76, "grad_norm": 0.48731207847595215, "learning_rate": 3.4409920092575156e-07, "loss": 0.0273, "step": 16480 }, { "epoch": 2.76, "grad_norm": 0.3913418650627136, "learning_rate": 3.4362974670858916e-07, "loss": 0.0382, "step": 16481 }, { "epoch": 2.76, "grad_norm": 0.3890551030635834, "learning_rate": 3.431606073496208e-07, "loss": 0.0304, "step": 16482 }, { "epoch": 2.76, "grad_norm": 0.36152300238609314, "learning_rate": 3.426917828641441e-07, "loss": 0.0185, "step": 16483 }, { "epoch": 2.76, "grad_norm": 0.33727386593818665, "learning_rate": 3.4222327326744575e-07, "loss": 0.0255, "step": 16484 }, { "epoch": 2.76, "grad_norm": 0.6216849088668823, "learning_rate": 3.417550785748025e-07, "loss": 0.0554, "step": 16485 }, { "epoch": 2.76, "grad_norm": 0.4318508803844452, "learning_rate": 3.412871988014788e-07, "loss": 0.0354, "step": 16486 }, { "epoch": 2.76, "grad_norm": 0.5827562212944031, "learning_rate": 3.408196339627323e-07, "loss": 0.0365, "step": 16487 }, { "epoch": 2.76, "grad_norm": 0.36054378747940063, "learning_rate": 3.4035238407380766e-07, "loss": 0.0357, "step": 16488 }, { "epoch": 2.76, "grad_norm": 0.532529354095459, "learning_rate": 3.3988544914993814e-07, "loss": 0.0352, "step": 16489 }, { "epoch": 2.76, "grad_norm": 0.2996056377887726, "learning_rate": 3.394188292063516e-07, "loss": 0.0224, "step": 16490 }, { "epoch": 2.76, "grad_norm": 0.36425429582595825, "learning_rate": 3.389525242582625e-07, "loss": 0.0319, "step": 16491 }, { "epoch": 2.76, "grad_norm": 0.27881428599357605, "learning_rate": 3.384865343208732e-07, "loss": 0.0185, "step": 16492 }, { "epoch": 2.76, "grad_norm": 0.2946414053440094, "learning_rate": 3.3802085940938145e-07, "loss": 0.0214, "step": 16493 }, { "epoch": 2.76, "grad_norm": 0.567490816116333, "learning_rate": 3.375554995389685e-07, "loss": 0.0265, "step": 16494 }, { "epoch": 2.76, "grad_norm": 0.264911949634552, "learning_rate": 3.3709045472480664e-07, "loss": 0.0235, "step": 16495 }, { "epoch": 2.76, "grad_norm": 0.5367220640182495, "learning_rate": 3.366257249820637e-07, "loss": 0.0487, "step": 16496 }, { "epoch": 2.76, "grad_norm": 0.4530295133590698, "learning_rate": 3.3616131032588873e-07, "loss": 0.0436, "step": 16497 }, { "epoch": 2.76, "grad_norm": 0.32831698656082153, "learning_rate": 3.3569721077142627e-07, "loss": 0.0133, "step": 16498 }, { "epoch": 2.76, "grad_norm": 0.2920577824115753, "learning_rate": 3.352334263338086e-07, "loss": 0.0231, "step": 16499 }, { "epoch": 2.76, "grad_norm": 0.26226744055747986, "learning_rate": 3.3476995702815704e-07, "loss": 0.0206, "step": 16500 }, { "epoch": 2.76, "grad_norm": 0.41701188683509827, "learning_rate": 3.343068028695873e-07, "loss": 0.0328, "step": 16501 }, { "epoch": 2.76, "grad_norm": 0.3576332628726959, "learning_rate": 3.3384396387319717e-07, "loss": 0.0227, "step": 16502 }, { "epoch": 2.76, "grad_norm": 0.31166285276412964, "learning_rate": 3.3338144005407916e-07, "loss": 0.0257, "step": 16503 }, { "epoch": 2.76, "grad_norm": 0.4038373529911041, "learning_rate": 3.3291923142731553e-07, "loss": 0.0277, "step": 16504 }, { "epoch": 2.76, "grad_norm": 0.38727280497550964, "learning_rate": 3.3245733800797543e-07, "loss": 0.0295, "step": 16505 }, { "epoch": 2.76, "grad_norm": 0.4323361814022064, "learning_rate": 3.319957598111201e-07, "loss": 0.0331, "step": 16506 }, { "epoch": 2.76, "grad_norm": 0.27908188104629517, "learning_rate": 3.3153449685179974e-07, "loss": 0.0224, "step": 16507 }, { "epoch": 2.76, "grad_norm": 0.3249947130680084, "learning_rate": 3.3107354914505674e-07, "loss": 0.024, "step": 16508 }, { "epoch": 2.76, "grad_norm": 0.37189093232154846, "learning_rate": 3.306129167059169e-07, "loss": 0.0341, "step": 16509 }, { "epoch": 2.76, "grad_norm": 0.4582946300506592, "learning_rate": 3.301525995494037e-07, "loss": 0.0291, "step": 16510 }, { "epoch": 2.76, "grad_norm": 0.49053072929382324, "learning_rate": 3.29692597690523e-07, "loss": 0.0342, "step": 16511 }, { "epoch": 2.76, "grad_norm": 0.3102690875530243, "learning_rate": 3.2923291114427604e-07, "loss": 0.0268, "step": 16512 }, { "epoch": 2.76, "grad_norm": 0.3806132376194, "learning_rate": 3.2877353992565086e-07, "loss": 0.0336, "step": 16513 }, { "epoch": 2.76, "grad_norm": 0.4126010835170746, "learning_rate": 3.283144840496244e-07, "loss": 0.0318, "step": 16514 }, { "epoch": 2.76, "grad_norm": 0.44193920493125916, "learning_rate": 3.278557435311669e-07, "loss": 0.0358, "step": 16515 }, { "epoch": 2.76, "grad_norm": 0.44173330068588257, "learning_rate": 3.2739731838523524e-07, "loss": 0.0295, "step": 16516 }, { "epoch": 2.76, "grad_norm": 0.4354838728904724, "learning_rate": 3.269392086267764e-07, "loss": 0.027, "step": 16517 }, { "epoch": 2.76, "grad_norm": 0.30858170986175537, "learning_rate": 3.2648141427072845e-07, "loss": 0.029, "step": 16518 }, { "epoch": 2.76, "grad_norm": 0.35110801458358765, "learning_rate": 3.260239353320194e-07, "loss": 0.0208, "step": 16519 }, { "epoch": 2.76, "grad_norm": 0.33315351605415344, "learning_rate": 3.2556677182556284e-07, "loss": 0.0206, "step": 16520 }, { "epoch": 2.76, "grad_norm": 0.3424139618873596, "learning_rate": 3.2510992376626805e-07, "loss": 0.0386, "step": 16521 }, { "epoch": 2.76, "grad_norm": 0.4699510931968689, "learning_rate": 3.246533911690286e-07, "loss": 0.0381, "step": 16522 }, { "epoch": 2.76, "grad_norm": 0.40900862216949463, "learning_rate": 3.2419717404873263e-07, "loss": 0.0277, "step": 16523 }, { "epoch": 2.76, "grad_norm": 0.3527590334415436, "learning_rate": 3.237412724202549e-07, "loss": 0.0354, "step": 16524 }, { "epoch": 2.76, "grad_norm": 0.2424267828464508, "learning_rate": 3.2328568629846016e-07, "loss": 0.0167, "step": 16525 }, { "epoch": 2.76, "grad_norm": 0.47672557830810547, "learning_rate": 3.228304156982054e-07, "loss": 0.0262, "step": 16526 }, { "epoch": 2.76, "grad_norm": 0.4547524154186249, "learning_rate": 3.223754606343321e-07, "loss": 0.0501, "step": 16527 }, { "epoch": 2.76, "grad_norm": 0.4426771402359009, "learning_rate": 3.219208211216762e-07, "loss": 0.016, "step": 16528 }, { "epoch": 2.76, "grad_norm": 0.37140515446662903, "learning_rate": 3.2146649717506364e-07, "loss": 0.0385, "step": 16529 }, { "epoch": 2.76, "grad_norm": 0.5389032363891602, "learning_rate": 3.2101248880930357e-07, "loss": 0.0445, "step": 16530 }, { "epoch": 2.76, "grad_norm": 0.3922339379787445, "learning_rate": 3.2055879603920315e-07, "loss": 0.0265, "step": 16531 }, { "epoch": 2.77, "grad_norm": 0.45095574855804443, "learning_rate": 3.20105418879556e-07, "loss": 0.0376, "step": 16532 }, { "epoch": 2.77, "grad_norm": 0.42918044328689575, "learning_rate": 3.1965235734514486e-07, "loss": 0.0233, "step": 16533 }, { "epoch": 2.77, "grad_norm": 0.30183184146881104, "learning_rate": 3.1919961145073897e-07, "loss": 0.024, "step": 16534 }, { "epoch": 2.77, "grad_norm": 0.35791775584220886, "learning_rate": 3.187471812111043e-07, "loss": 0.0338, "step": 16535 }, { "epoch": 2.77, "grad_norm": 0.517983078956604, "learning_rate": 3.182950666409923e-07, "loss": 0.0231, "step": 16536 }, { "epoch": 2.77, "grad_norm": 0.3473062515258789, "learning_rate": 3.178432677551424e-07, "loss": 0.0189, "step": 16537 }, { "epoch": 2.77, "grad_norm": 0.48422130942344666, "learning_rate": 3.1739178456828835e-07, "loss": 0.0371, "step": 16538 }, { "epoch": 2.77, "grad_norm": 0.3023166060447693, "learning_rate": 3.1694061709515054e-07, "loss": 0.0234, "step": 16539 }, { "epoch": 2.77, "grad_norm": 0.3183042109012604, "learning_rate": 3.164897653504406e-07, "loss": 0.0198, "step": 16540 }, { "epoch": 2.77, "grad_norm": 0.407861590385437, "learning_rate": 3.1603922934886013e-07, "loss": 0.0363, "step": 16541 }, { "epoch": 2.77, "grad_norm": 0.3092286288738251, "learning_rate": 3.1558900910509613e-07, "loss": 0.0245, "step": 16542 }, { "epoch": 2.77, "grad_norm": 0.5481124520301819, "learning_rate": 3.1513910463383145e-07, "loss": 0.0316, "step": 16543 }, { "epoch": 2.77, "grad_norm": 0.368046373128891, "learning_rate": 3.1468951594973427e-07, "loss": 0.0338, "step": 16544 }, { "epoch": 2.77, "grad_norm": 0.49252620339393616, "learning_rate": 3.14240243067464e-07, "loss": 0.0425, "step": 16545 }, { "epoch": 2.77, "grad_norm": 0.28441280126571655, "learning_rate": 3.137912860016701e-07, "loss": 0.0191, "step": 16546 }, { "epoch": 2.77, "grad_norm": 0.33885639905929565, "learning_rate": 3.133426447669929e-07, "loss": 0.0184, "step": 16547 }, { "epoch": 2.77, "grad_norm": 0.4230020344257355, "learning_rate": 3.128943193780576e-07, "loss": 0.028, "step": 16548 }, { "epoch": 2.77, "grad_norm": 0.3551447093486786, "learning_rate": 3.1244630984948566e-07, "loss": 0.0323, "step": 16549 }, { "epoch": 2.77, "grad_norm": 0.488959938287735, "learning_rate": 3.1199861619588436e-07, "loss": 0.0493, "step": 16550 }, { "epoch": 2.77, "grad_norm": 0.42585182189941406, "learning_rate": 3.1155123843184974e-07, "loss": 0.0302, "step": 16551 }, { "epoch": 2.77, "grad_norm": 0.3799075484275818, "learning_rate": 3.1110417657197025e-07, "loss": 0.0208, "step": 16552 }, { "epoch": 2.77, "grad_norm": 0.3588826358318329, "learning_rate": 3.1065743063082186e-07, "loss": 0.0313, "step": 16553 }, { "epoch": 2.77, "grad_norm": 0.5541251301765442, "learning_rate": 3.1021100062297304e-07, "loss": 0.038, "step": 16554 }, { "epoch": 2.77, "grad_norm": 0.6223446130752563, "learning_rate": 3.097648865629788e-07, "loss": 0.0432, "step": 16555 }, { "epoch": 2.77, "grad_norm": 0.3724462389945984, "learning_rate": 3.0931908846538515e-07, "loss": 0.0322, "step": 16556 }, { "epoch": 2.77, "grad_norm": 0.39526885747909546, "learning_rate": 3.0887360634473065e-07, "loss": 0.0335, "step": 16557 }, { "epoch": 2.77, "grad_norm": 0.2609817087650299, "learning_rate": 3.08428440215538e-07, "loss": 0.0199, "step": 16558 }, { "epoch": 2.77, "grad_norm": 0.3979355990886688, "learning_rate": 3.0798359009232117e-07, "loss": 0.0244, "step": 16559 }, { "epoch": 2.77, "grad_norm": 0.40008673071861267, "learning_rate": 3.0753905598958857e-07, "loss": 0.0529, "step": 16560 }, { "epoch": 2.77, "grad_norm": 0.4865483045578003, "learning_rate": 3.0709483792183416e-07, "loss": 0.0406, "step": 16561 }, { "epoch": 2.77, "grad_norm": 0.5055465698242188, "learning_rate": 3.066509359035386e-07, "loss": 0.0256, "step": 16562 }, { "epoch": 2.77, "grad_norm": 0.5089150667190552, "learning_rate": 3.062073499491791e-07, "loss": 0.0496, "step": 16563 }, { "epoch": 2.77, "grad_norm": 0.4223317801952362, "learning_rate": 3.057640800732209e-07, "loss": 0.0339, "step": 16564 }, { "epoch": 2.77, "grad_norm": 0.3242781460285187, "learning_rate": 3.0532112629011344e-07, "loss": 0.0273, "step": 16565 }, { "epoch": 2.77, "grad_norm": 0.32324543595314026, "learning_rate": 3.048784886143019e-07, "loss": 0.0377, "step": 16566 }, { "epoch": 2.77, "grad_norm": 0.4819645285606384, "learning_rate": 3.0443616706021807e-07, "loss": 0.0436, "step": 16567 }, { "epoch": 2.77, "grad_norm": 0.4193398356437683, "learning_rate": 3.03994161642287e-07, "loss": 0.0337, "step": 16568 }, { "epoch": 2.77, "grad_norm": 0.35236623883247375, "learning_rate": 3.0355247237491834e-07, "loss": 0.0262, "step": 16569 }, { "epoch": 2.77, "grad_norm": 0.3164623975753784, "learning_rate": 3.031110992725128e-07, "loss": 0.0212, "step": 16570 }, { "epoch": 2.77, "grad_norm": 0.3243650794029236, "learning_rate": 3.026700423494644e-07, "loss": 0.0263, "step": 16571 }, { "epoch": 2.77, "grad_norm": 0.3366461992263794, "learning_rate": 3.0222930162015494e-07, "loss": 0.0263, "step": 16572 }, { "epoch": 2.77, "grad_norm": 0.2629958391189575, "learning_rate": 3.017888770989519e-07, "loss": 0.0165, "step": 16573 }, { "epoch": 2.77, "grad_norm": 0.45876428484916687, "learning_rate": 3.0134876880022036e-07, "loss": 0.0481, "step": 16574 }, { "epoch": 2.77, "grad_norm": 0.43604934215545654, "learning_rate": 3.0090897673830664e-07, "loss": 0.0237, "step": 16575 }, { "epoch": 2.77, "grad_norm": 0.3010055422782898, "learning_rate": 3.0046950092755267e-07, "loss": 0.0326, "step": 16576 }, { "epoch": 2.77, "grad_norm": 0.40169504284858704, "learning_rate": 3.0003034138228803e-07, "loss": 0.0367, "step": 16577 }, { "epoch": 2.77, "grad_norm": 0.37007802724838257, "learning_rate": 2.995914981168302e-07, "loss": 0.03, "step": 16578 }, { "epoch": 2.77, "grad_norm": 0.3513334393501282, "learning_rate": 2.9915297114548993e-07, "loss": 0.0247, "step": 16579 }, { "epoch": 2.77, "grad_norm": 0.4441080391407013, "learning_rate": 2.9871476048256797e-07, "loss": 0.0342, "step": 16580 }, { "epoch": 2.77, "grad_norm": 0.41920334100723267, "learning_rate": 2.982768661423485e-07, "loss": 0.0278, "step": 16581 }, { "epoch": 2.77, "grad_norm": 0.4958477318286896, "learning_rate": 2.9783928813911345e-07, "loss": 0.0417, "step": 16582 }, { "epoch": 2.77, "grad_norm": 0.38793855905532837, "learning_rate": 2.97402026487128e-07, "loss": 0.0264, "step": 16583 }, { "epoch": 2.77, "grad_norm": 0.35288527607917786, "learning_rate": 2.969650812006497e-07, "loss": 0.0344, "step": 16584 }, { "epoch": 2.77, "grad_norm": 0.32078787684440613, "learning_rate": 2.9652845229392823e-07, "loss": 0.0183, "step": 16585 }, { "epoch": 2.77, "grad_norm": 0.4972147047519684, "learning_rate": 2.9609213978119665e-07, "loss": 0.0291, "step": 16586 }, { "epoch": 2.77, "grad_norm": 0.2827618718147278, "learning_rate": 2.956561436766836e-07, "loss": 0.0158, "step": 16587 }, { "epoch": 2.77, "grad_norm": 0.5511142015457153, "learning_rate": 2.952204639946066e-07, "loss": 0.0225, "step": 16588 }, { "epoch": 2.77, "grad_norm": 0.44050702452659607, "learning_rate": 2.9478510074916975e-07, "loss": 0.0375, "step": 16589 }, { "epoch": 2.77, "grad_norm": 0.4218529462814331, "learning_rate": 2.943500539545685e-07, "loss": 0.0304, "step": 16590 }, { "epoch": 2.77, "grad_norm": 0.4377577602863312, "learning_rate": 2.939153236249892e-07, "loss": 0.0386, "step": 16591 }, { "epoch": 2.78, "grad_norm": 0.3348008692264557, "learning_rate": 2.934809097746061e-07, "loss": 0.0207, "step": 16592 }, { "epoch": 2.78, "grad_norm": 0.43444696068763733, "learning_rate": 2.9304681241758335e-07, "loss": 0.0441, "step": 16593 }, { "epoch": 2.78, "grad_norm": 0.29906001687049866, "learning_rate": 2.926130315680753e-07, "loss": 0.0256, "step": 16594 }, { "epoch": 2.78, "grad_norm": 0.40894246101379395, "learning_rate": 2.921795672402272e-07, "loss": 0.0406, "step": 16595 }, { "epoch": 2.78, "grad_norm": 0.42260676622390747, "learning_rate": 2.9174641944817226e-07, "loss": 0.0288, "step": 16596 }, { "epoch": 2.78, "grad_norm": 0.41391661763191223, "learning_rate": 2.913135882060336e-07, "loss": 0.031, "step": 16597 }, { "epoch": 2.78, "grad_norm": 0.34614089131355286, "learning_rate": 2.908810735279233e-07, "loss": 0.0365, "step": 16598 }, { "epoch": 2.78, "grad_norm": 0.32451847195625305, "learning_rate": 2.904488754279455e-07, "loss": 0.0266, "step": 16599 }, { "epoch": 2.78, "grad_norm": 0.29291072487831116, "learning_rate": 2.9001699392019245e-07, "loss": 0.0215, "step": 16600 }, { "epoch": 2.78, "grad_norm": 0.350242555141449, "learning_rate": 2.8958542901874386e-07, "loss": 0.0368, "step": 16601 }, { "epoch": 2.78, "grad_norm": 0.47527769207954407, "learning_rate": 2.891541807376741e-07, "loss": 0.0271, "step": 16602 }, { "epoch": 2.78, "grad_norm": 1.0414929389953613, "learning_rate": 2.887232490910441e-07, "loss": 0.0307, "step": 16603 }, { "epoch": 2.78, "grad_norm": 0.3697102963924408, "learning_rate": 2.8829263409290376e-07, "loss": 0.0228, "step": 16604 }, { "epoch": 2.78, "grad_norm": 0.3838648200035095, "learning_rate": 2.8786233575729514e-07, "loss": 0.0297, "step": 16605 }, { "epoch": 2.78, "grad_norm": 0.3162972927093506, "learning_rate": 2.874323540982482e-07, "loss": 0.0219, "step": 16606 }, { "epoch": 2.78, "grad_norm": 0.5425117015838623, "learning_rate": 2.870026891297828e-07, "loss": 0.0284, "step": 16607 }, { "epoch": 2.78, "grad_norm": 0.3604530990123749, "learning_rate": 2.865733408659088e-07, "loss": 0.036, "step": 16608 }, { "epoch": 2.78, "grad_norm": 0.35644015669822693, "learning_rate": 2.861443093206262e-07, "loss": 0.0247, "step": 16609 }, { "epoch": 2.78, "grad_norm": 0.5862096548080444, "learning_rate": 2.8571559450792373e-07, "loss": 0.0408, "step": 16610 }, { "epoch": 2.78, "grad_norm": 0.3466305732727051, "learning_rate": 2.8528719644177917e-07, "loss": 0.0333, "step": 16611 }, { "epoch": 2.78, "grad_norm": 0.4574461281299591, "learning_rate": 2.848591151361613e-07, "loss": 0.0236, "step": 16612 }, { "epoch": 2.78, "grad_norm": 0.32273346185684204, "learning_rate": 2.844313506050311e-07, "loss": 0.0274, "step": 16613 }, { "epoch": 2.78, "grad_norm": 0.3903461992740631, "learning_rate": 2.8400390286233314e-07, "loss": 0.0391, "step": 16614 }, { "epoch": 2.78, "grad_norm": 0.4514119327068329, "learning_rate": 2.8357677192200615e-07, "loss": 0.0257, "step": 16615 }, { "epoch": 2.78, "grad_norm": 0.4452175796031952, "learning_rate": 2.831499577979768e-07, "loss": 0.0241, "step": 16616 }, { "epoch": 2.78, "grad_norm": 0.486214816570282, "learning_rate": 2.827234605041629e-07, "loss": 0.0315, "step": 16617 }, { "epoch": 2.78, "grad_norm": 0.5377779006958008, "learning_rate": 2.822972800544688e-07, "loss": 0.0388, "step": 16618 }, { "epoch": 2.78, "grad_norm": 0.3293971121311188, "learning_rate": 2.8187141646279114e-07, "loss": 0.0357, "step": 16619 }, { "epoch": 2.78, "grad_norm": 0.4698498249053955, "learning_rate": 2.814458697430189e-07, "loss": 0.0273, "step": 16620 }, { "epoch": 2.78, "grad_norm": 0.4874623119831085, "learning_rate": 2.8102063990902427e-07, "loss": 0.0375, "step": 16621 }, { "epoch": 2.78, "grad_norm": 0.3759238123893738, "learning_rate": 2.8059572697467395e-07, "loss": 0.0245, "step": 16622 }, { "epoch": 2.78, "grad_norm": 0.25857263803482056, "learning_rate": 2.801711309538213e-07, "loss": 0.0147, "step": 16623 }, { "epoch": 2.78, "grad_norm": 0.3586973249912262, "learning_rate": 2.797468518603119e-07, "loss": 0.035, "step": 16624 }, { "epoch": 2.78, "grad_norm": 0.30911383032798767, "learning_rate": 2.7932288970798136e-07, "loss": 0.0179, "step": 16625 }, { "epoch": 2.78, "grad_norm": 0.4588395059108734, "learning_rate": 2.7889924451064975e-07, "loss": 0.0237, "step": 16626 }, { "epoch": 2.78, "grad_norm": 0.42540881037712097, "learning_rate": 2.7847591628213376e-07, "loss": 0.0179, "step": 16627 }, { "epoch": 2.78, "grad_norm": 0.3114054799079895, "learning_rate": 2.7805290503623573e-07, "loss": 0.0196, "step": 16628 }, { "epoch": 2.78, "grad_norm": 0.5057359933853149, "learning_rate": 2.776302107867468e-07, "loss": 0.0462, "step": 16629 }, { "epoch": 2.78, "grad_norm": 0.3664751946926117, "learning_rate": 2.7720783354745265e-07, "loss": 0.0299, "step": 16630 }, { "epoch": 2.78, "grad_norm": 0.5199498534202576, "learning_rate": 2.767857733321233e-07, "loss": 0.0294, "step": 16631 }, { "epoch": 2.78, "grad_norm": 0.28398722410202026, "learning_rate": 2.7636403015452007e-07, "loss": 0.0244, "step": 16632 }, { "epoch": 2.78, "grad_norm": 0.29837167263031006, "learning_rate": 2.7594260402839635e-07, "loss": 0.0193, "step": 16633 }, { "epoch": 2.78, "grad_norm": 0.3227272033691406, "learning_rate": 2.7552149496749224e-07, "loss": 0.0206, "step": 16634 }, { "epoch": 2.78, "grad_norm": 0.5244478583335876, "learning_rate": 2.7510070298553684e-07, "loss": 0.0402, "step": 16635 }, { "epoch": 2.78, "grad_norm": 0.26586905121803284, "learning_rate": 2.746802280962546e-07, "loss": 0.022, "step": 16636 }, { "epoch": 2.78, "grad_norm": 0.4540737569332123, "learning_rate": 2.7426007031335133e-07, "loss": 0.044, "step": 16637 }, { "epoch": 2.78, "grad_norm": 0.4424798786640167, "learning_rate": 2.7384022965053047e-07, "loss": 0.0173, "step": 16638 }, { "epoch": 2.78, "grad_norm": 0.31171298027038574, "learning_rate": 2.7342070612147886e-07, "loss": 0.0313, "step": 16639 }, { "epoch": 2.78, "grad_norm": 0.3715425133705139, "learning_rate": 2.7300149973987667e-07, "loss": 0.0303, "step": 16640 }, { "epoch": 2.78, "grad_norm": 0.3895719051361084, "learning_rate": 2.7258261051939294e-07, "loss": 0.0256, "step": 16641 }, { "epoch": 2.78, "grad_norm": 0.3071094751358032, "learning_rate": 2.7216403847368455e-07, "loss": 0.0261, "step": 16642 }, { "epoch": 2.78, "grad_norm": 0.41160231828689575, "learning_rate": 2.717457836164006e-07, "loss": 0.0346, "step": 16643 }, { "epoch": 2.78, "grad_norm": 0.30545827746391296, "learning_rate": 2.713278459611801e-07, "loss": 0.0344, "step": 16644 }, { "epoch": 2.78, "grad_norm": 0.2814647853374481, "learning_rate": 2.709102255216489e-07, "loss": 0.0254, "step": 16645 }, { "epoch": 2.78, "grad_norm": 0.4246366024017334, "learning_rate": 2.704929223114239e-07, "loss": 0.0436, "step": 16646 }, { "epoch": 2.78, "grad_norm": 0.4249502122402191, "learning_rate": 2.70075936344113e-07, "loss": 0.0321, "step": 16647 }, { "epoch": 2.78, "grad_norm": 0.605975866317749, "learning_rate": 2.6965926763331095e-07, "loss": 0.0454, "step": 16648 }, { "epoch": 2.78, "grad_norm": 0.37355491518974304, "learning_rate": 2.692429161926047e-07, "loss": 0.0285, "step": 16649 }, { "epoch": 2.78, "grad_norm": 0.36642327904701233, "learning_rate": 2.6882688203556996e-07, "loss": 0.026, "step": 16650 }, { "epoch": 2.78, "grad_norm": 0.38459399342536926, "learning_rate": 2.6841116517577146e-07, "loss": 0.0283, "step": 16651 }, { "epoch": 2.79, "grad_norm": 0.38648441433906555, "learning_rate": 2.679957656267662e-07, "loss": 0.0317, "step": 16652 }, { "epoch": 2.79, "grad_norm": 0.459312379360199, "learning_rate": 2.6758068340209666e-07, "loss": 0.0349, "step": 16653 }, { "epoch": 2.79, "grad_norm": 0.36546170711517334, "learning_rate": 2.671659185152975e-07, "loss": 0.0303, "step": 16654 }, { "epoch": 2.79, "grad_norm": 0.2986988127231598, "learning_rate": 2.667514709798924e-07, "loss": 0.0187, "step": 16655 }, { "epoch": 2.79, "grad_norm": 0.4601168930530548, "learning_rate": 2.6633734080939724e-07, "loss": 0.0387, "step": 16656 }, { "epoch": 2.79, "grad_norm": 0.49232718348503113, "learning_rate": 2.6592352801731116e-07, "loss": 0.0382, "step": 16657 }, { "epoch": 2.79, "grad_norm": 0.4825327694416046, "learning_rate": 2.6551003261713117e-07, "loss": 0.0234, "step": 16658 }, { "epoch": 2.79, "grad_norm": 0.2795734405517578, "learning_rate": 2.650968546223365e-07, "loss": 0.0188, "step": 16659 }, { "epoch": 2.79, "grad_norm": 0.471681147813797, "learning_rate": 2.646839940464008e-07, "loss": 0.0546, "step": 16660 }, { "epoch": 2.79, "grad_norm": 0.44078177213668823, "learning_rate": 2.642714509027866e-07, "loss": 0.0304, "step": 16661 }, { "epoch": 2.79, "grad_norm": 0.4288981854915619, "learning_rate": 2.638592252049454e-07, "loss": 0.0347, "step": 16662 }, { "epoch": 2.79, "grad_norm": 0.43453192710876465, "learning_rate": 2.634473169663165e-07, "loss": 0.0314, "step": 16663 }, { "epoch": 2.79, "grad_norm": 0.432391494512558, "learning_rate": 2.6303572620033246e-07, "loss": 0.0382, "step": 16664 }, { "epoch": 2.79, "grad_norm": 0.38002100586891174, "learning_rate": 2.626244529204125e-07, "loss": 0.0291, "step": 16665 }, { "epoch": 2.79, "grad_norm": 0.37406328320503235, "learning_rate": 2.622134971399692e-07, "loss": 0.027, "step": 16666 }, { "epoch": 2.79, "grad_norm": 0.41107550263404846, "learning_rate": 2.6180285887239864e-07, "loss": 0.0167, "step": 16667 }, { "epoch": 2.79, "grad_norm": 0.36700063943862915, "learning_rate": 2.613925381310911e-07, "loss": 0.0256, "step": 16668 }, { "epoch": 2.79, "grad_norm": 0.2678299844264984, "learning_rate": 2.6098253492942816e-07, "loss": 0.0199, "step": 16669 }, { "epoch": 2.79, "grad_norm": 0.3741644024848938, "learning_rate": 2.605728492807769e-07, "loss": 0.0474, "step": 16670 }, { "epoch": 2.79, "grad_norm": 0.42622581124305725, "learning_rate": 2.6016348119849544e-07, "loss": 0.043, "step": 16671 }, { "epoch": 2.79, "grad_norm": 0.3342115879058838, "learning_rate": 2.5975443069593207e-07, "loss": 0.0267, "step": 16672 }, { "epoch": 2.79, "grad_norm": 0.4017164707183838, "learning_rate": 2.5934569778642394e-07, "loss": 0.0365, "step": 16673 }, { "epoch": 2.79, "grad_norm": 0.2791367769241333, "learning_rate": 2.589372824832981e-07, "loss": 0.0167, "step": 16674 }, { "epoch": 2.79, "grad_norm": 0.4449083209037781, "learning_rate": 2.5852918479987167e-07, "loss": 0.0374, "step": 16675 }, { "epoch": 2.79, "grad_norm": 0.3621073067188263, "learning_rate": 2.581214047494518e-07, "loss": 0.0287, "step": 16676 }, { "epoch": 2.79, "grad_norm": 0.9931913018226624, "learning_rate": 2.5771394234533343e-07, "loss": 0.024, "step": 16677 }, { "epoch": 2.79, "grad_norm": 0.4690636396408081, "learning_rate": 2.573067976008048e-07, "loss": 0.0491, "step": 16678 }, { "epoch": 2.79, "grad_norm": 0.45761585235595703, "learning_rate": 2.568999705291397e-07, "loss": 0.0288, "step": 16679 }, { "epoch": 2.79, "grad_norm": 0.3063181936740875, "learning_rate": 2.564934611436032e-07, "loss": 0.0331, "step": 16680 }, { "epoch": 2.79, "grad_norm": 0.3622472882270813, "learning_rate": 2.560872694574501e-07, "loss": 0.019, "step": 16681 }, { "epoch": 2.79, "grad_norm": 0.5453461408615112, "learning_rate": 2.556813954839243e-07, "loss": 0.0366, "step": 16682 }, { "epoch": 2.79, "grad_norm": 0.3691161274909973, "learning_rate": 2.552758392362609e-07, "loss": 0.0327, "step": 16683 }, { "epoch": 2.79, "grad_norm": 0.34238603711128235, "learning_rate": 2.5487060072768353e-07, "loss": 0.0257, "step": 16684 }, { "epoch": 2.79, "grad_norm": 0.37222617864608765, "learning_rate": 2.5446567997140404e-07, "loss": 0.0242, "step": 16685 }, { "epoch": 2.79, "grad_norm": 1.082405924797058, "learning_rate": 2.5406107698062844e-07, "loss": 0.0276, "step": 16686 }, { "epoch": 2.79, "grad_norm": 0.427920937538147, "learning_rate": 2.536567917685462e-07, "loss": 0.0381, "step": 16687 }, { "epoch": 2.79, "grad_norm": 0.3323915898799896, "learning_rate": 2.532528243483412e-07, "loss": 0.0235, "step": 16688 }, { "epoch": 2.79, "grad_norm": 0.43453481793403625, "learning_rate": 2.5284917473318406e-07, "loss": 0.034, "step": 16689 }, { "epoch": 2.79, "grad_norm": 0.5170002579689026, "learning_rate": 2.5244584293623755e-07, "loss": 0.0435, "step": 16690 }, { "epoch": 2.79, "grad_norm": 0.4682281017303467, "learning_rate": 2.520428289706511e-07, "loss": 0.0405, "step": 16691 }, { "epoch": 2.79, "grad_norm": 0.3970397114753723, "learning_rate": 2.516401328495688e-07, "loss": 0.0208, "step": 16692 }, { "epoch": 2.79, "grad_norm": 0.41498008370399475, "learning_rate": 2.5123775458611665e-07, "loss": 0.0387, "step": 16693 }, { "epoch": 2.79, "grad_norm": 0.5690346956253052, "learning_rate": 2.508356941934187e-07, "loss": 0.0341, "step": 16694 }, { "epoch": 2.79, "grad_norm": 0.4325118064880371, "learning_rate": 2.504339516845833e-07, "loss": 0.0311, "step": 16695 }, { "epoch": 2.79, "grad_norm": 0.4329282343387604, "learning_rate": 2.5003252707270775e-07, "loss": 0.0365, "step": 16696 }, { "epoch": 2.79, "grad_norm": 0.5150248408317566, "learning_rate": 2.496314203708849e-07, "loss": 0.0316, "step": 16697 }, { "epoch": 2.79, "grad_norm": 0.2808643877506256, "learning_rate": 2.4923063159218887e-07, "loss": 0.0215, "step": 16698 }, { "epoch": 2.79, "grad_norm": 0.35080403089523315, "learning_rate": 2.488301607496912e-07, "loss": 0.0241, "step": 16699 }, { "epoch": 2.79, "grad_norm": 0.47040295600891113, "learning_rate": 2.4843000785644833e-07, "loss": 0.0306, "step": 16700 }, { "epoch": 2.79, "grad_norm": 0.45435163378715515, "learning_rate": 2.4803017292550967e-07, "loss": 0.0336, "step": 16701 }, { "epoch": 2.79, "grad_norm": 0.34985244274139404, "learning_rate": 2.476306559699093e-07, "loss": 0.0225, "step": 16702 }, { "epoch": 2.79, "grad_norm": 0.34193459153175354, "learning_rate": 2.472314570026757e-07, "loss": 0.0272, "step": 16703 }, { "epoch": 2.79, "grad_norm": 0.2612748444080353, "learning_rate": 2.468325760368251e-07, "loss": 0.0205, "step": 16704 }, { "epoch": 2.79, "grad_norm": 0.31503134965896606, "learning_rate": 2.4643401308536373e-07, "loss": 0.0292, "step": 16705 }, { "epoch": 2.79, "grad_norm": 0.40196457505226135, "learning_rate": 2.460357681612857e-07, "loss": 0.0328, "step": 16706 }, { "epoch": 2.79, "grad_norm": 0.4608812630176544, "learning_rate": 2.456378412775795e-07, "loss": 0.0365, "step": 16707 }, { "epoch": 2.79, "grad_norm": 0.3916362524032593, "learning_rate": 2.4524023244721586e-07, "loss": 0.0227, "step": 16708 }, { "epoch": 2.79, "grad_norm": 0.386776864528656, "learning_rate": 2.4484294168316324e-07, "loss": 0.034, "step": 16709 }, { "epoch": 2.79, "grad_norm": 0.4024820923805237, "learning_rate": 2.444459689983736e-07, "loss": 0.023, "step": 16710 }, { "epoch": 2.79, "grad_norm": 0.40577012300491333, "learning_rate": 2.4404931440579206e-07, "loss": 0.0312, "step": 16711 }, { "epoch": 2.8, "grad_norm": 0.3691498637199402, "learning_rate": 2.436529779183505e-07, "loss": 0.0267, "step": 16712 }, { "epoch": 2.8, "grad_norm": 0.2781829535961151, "learning_rate": 2.432569595489731e-07, "loss": 0.0206, "step": 16713 }, { "epoch": 2.8, "grad_norm": 0.8381217122077942, "learning_rate": 2.428612593105717e-07, "loss": 0.0372, "step": 16714 }, { "epoch": 2.8, "grad_norm": 0.35181018710136414, "learning_rate": 2.424658772160493e-07, "loss": 0.0266, "step": 16715 }, { "epoch": 2.8, "grad_norm": 0.5349253416061401, "learning_rate": 2.4207081327829673e-07, "loss": 0.0301, "step": 16716 }, { "epoch": 2.8, "grad_norm": 0.5251274108886719, "learning_rate": 2.416760675101981e-07, "loss": 0.0377, "step": 16717 }, { "epoch": 2.8, "grad_norm": 0.39180299639701843, "learning_rate": 2.412816399246221e-07, "loss": 0.0251, "step": 16718 }, { "epoch": 2.8, "grad_norm": 0.26740220189094543, "learning_rate": 2.4088753053442937e-07, "loss": 0.0212, "step": 16719 }, { "epoch": 2.8, "grad_norm": 0.345628559589386, "learning_rate": 2.4049373935247314e-07, "loss": 0.0227, "step": 16720 }, { "epoch": 2.8, "grad_norm": 0.3787858784198761, "learning_rate": 2.401002663915908e-07, "loss": 0.0352, "step": 16721 }, { "epoch": 2.8, "grad_norm": 0.3226146996021271, "learning_rate": 2.3970711166461325e-07, "loss": 0.0369, "step": 16722 }, { "epoch": 2.8, "grad_norm": 0.4646851718425751, "learning_rate": 2.3931427518435803e-07, "loss": 0.0388, "step": 16723 }, { "epoch": 2.8, "grad_norm": 0.31292417645454407, "learning_rate": 2.3892175696363597e-07, "loss": 0.0288, "step": 16724 }, { "epoch": 2.8, "grad_norm": 0.30597740411758423, "learning_rate": 2.3852955701524684e-07, "loss": 0.0218, "step": 16725 }, { "epoch": 2.8, "grad_norm": 0.447965145111084, "learning_rate": 2.3813767535197596e-07, "loss": 0.0368, "step": 16726 }, { "epoch": 2.8, "grad_norm": 0.2698724865913391, "learning_rate": 2.3774611198660203e-07, "loss": 0.0201, "step": 16727 }, { "epoch": 2.8, "grad_norm": 0.327935129404068, "learning_rate": 2.373548669318937e-07, "loss": 0.0231, "step": 16728 }, { "epoch": 2.8, "grad_norm": 0.465706467628479, "learning_rate": 2.3696394020060742e-07, "loss": 0.0411, "step": 16729 }, { "epoch": 2.8, "grad_norm": 0.4948611259460449, "learning_rate": 2.3657333180548748e-07, "loss": 0.0414, "step": 16730 }, { "epoch": 2.8, "grad_norm": 0.42874521017074585, "learning_rate": 2.3618304175927253e-07, "loss": 0.0373, "step": 16731 }, { "epoch": 2.8, "grad_norm": 0.318512499332428, "learning_rate": 2.357930700746891e-07, "loss": 0.0274, "step": 16732 }, { "epoch": 2.8, "grad_norm": 0.34779325127601624, "learning_rate": 2.3540341676445145e-07, "loss": 0.0239, "step": 16733 }, { "epoch": 2.8, "grad_norm": 0.3569834232330322, "learning_rate": 2.3501408184126496e-07, "loss": 0.021, "step": 16734 }, { "epoch": 2.8, "grad_norm": 0.6027056574821472, "learning_rate": 2.346250653178239e-07, "loss": 0.0251, "step": 16735 }, { "epoch": 2.8, "grad_norm": 0.5440763235092163, "learning_rate": 2.3423636720681375e-07, "loss": 0.0457, "step": 16736 }, { "epoch": 2.8, "grad_norm": 0.33319762349128723, "learning_rate": 2.3384798752090877e-07, "loss": 0.0136, "step": 16737 }, { "epoch": 2.8, "grad_norm": 0.4756872355937958, "learning_rate": 2.334599262727699e-07, "loss": 0.0215, "step": 16738 }, { "epoch": 2.8, "grad_norm": 0.38815438747406006, "learning_rate": 2.3307218347505267e-07, "loss": 0.0408, "step": 16739 }, { "epoch": 2.8, "grad_norm": 0.28284788131713867, "learning_rate": 2.3268475914040023e-07, "loss": 0.0275, "step": 16740 }, { "epoch": 2.8, "grad_norm": 0.45152705907821655, "learning_rate": 2.322976532814436e-07, "loss": 0.0353, "step": 16741 }, { "epoch": 2.8, "grad_norm": 0.30294686555862427, "learning_rate": 2.3191086591080604e-07, "loss": 0.023, "step": 16742 }, { "epoch": 2.8, "grad_norm": 0.3195946216583252, "learning_rate": 2.3152439704109853e-07, "loss": 0.0199, "step": 16743 }, { "epoch": 2.8, "grad_norm": 0.35397768020629883, "learning_rate": 2.3113824668492212e-07, "loss": 0.0231, "step": 16744 }, { "epoch": 2.8, "grad_norm": 0.41922903060913086, "learning_rate": 2.30752414854869e-07, "loss": 0.0325, "step": 16745 }, { "epoch": 2.8, "grad_norm": 0.3700084984302521, "learning_rate": 2.3036690156351794e-07, "loss": 0.0274, "step": 16746 }, { "epoch": 2.8, "grad_norm": 0.41516628861427307, "learning_rate": 2.2998170682344e-07, "loss": 0.0222, "step": 16747 }, { "epoch": 2.8, "grad_norm": 0.5207708477973938, "learning_rate": 2.2959683064719517e-07, "loss": 0.0308, "step": 16748 }, { "epoch": 2.8, "grad_norm": 0.3968560993671417, "learning_rate": 2.292122730473323e-07, "loss": 0.0357, "step": 16749 }, { "epoch": 2.8, "grad_norm": 0.46760472655296326, "learning_rate": 2.2882803403639243e-07, "loss": 0.0251, "step": 16750 }, { "epoch": 2.8, "grad_norm": 0.37759727239608765, "learning_rate": 2.2844411362690112e-07, "loss": 0.0236, "step": 16751 }, { "epoch": 2.8, "grad_norm": 0.4265376329421997, "learning_rate": 2.2806051183137723e-07, "loss": 0.0254, "step": 16752 }, { "epoch": 2.8, "grad_norm": 0.4778812527656555, "learning_rate": 2.2767722866233078e-07, "loss": 0.0341, "step": 16753 }, { "epoch": 2.8, "grad_norm": 0.49494239687919617, "learning_rate": 2.272942641322562e-07, "loss": 0.0265, "step": 16754 }, { "epoch": 2.8, "grad_norm": 0.39541274309158325, "learning_rate": 2.2691161825364238e-07, "loss": 0.0338, "step": 16755 }, { "epoch": 2.8, "grad_norm": 0.3416670560836792, "learning_rate": 2.2652929103896714e-07, "loss": 0.0294, "step": 16756 }, { "epoch": 2.8, "grad_norm": 0.4882238805294037, "learning_rate": 2.2614728250069384e-07, "loss": 0.0388, "step": 16757 }, { "epoch": 2.8, "grad_norm": 0.2865661382675171, "learning_rate": 2.2576559265127917e-07, "loss": 0.0194, "step": 16758 }, { "epoch": 2.8, "grad_norm": 0.3948828876018524, "learning_rate": 2.2538422150317096e-07, "loss": 0.0325, "step": 16759 }, { "epoch": 2.8, "grad_norm": 0.5084667205810547, "learning_rate": 2.250031690688015e-07, "loss": 0.0398, "step": 16760 }, { "epoch": 2.8, "grad_norm": 0.33227720856666565, "learning_rate": 2.2462243536059635e-07, "loss": 0.016, "step": 16761 }, { "epoch": 2.8, "grad_norm": 0.4137670397758484, "learning_rate": 2.242420203909701e-07, "loss": 0.0317, "step": 16762 }, { "epoch": 2.8, "grad_norm": 0.29763537645339966, "learning_rate": 2.238619241723261e-07, "loss": 0.0171, "step": 16763 }, { "epoch": 2.8, "grad_norm": 0.45299750566482544, "learning_rate": 2.2348214671705782e-07, "loss": 0.0363, "step": 16764 }, { "epoch": 2.8, "grad_norm": 0.38193613290786743, "learning_rate": 2.2310268803754975e-07, "loss": 0.0388, "step": 16765 }, { "epoch": 2.8, "grad_norm": 0.3760741949081421, "learning_rate": 2.2272354814617314e-07, "loss": 0.0212, "step": 16766 }, { "epoch": 2.8, "grad_norm": 0.36430230736732483, "learning_rate": 2.2234472705529252e-07, "loss": 0.0208, "step": 16767 }, { "epoch": 2.8, "grad_norm": 0.3125000298023224, "learning_rate": 2.2196622477725692e-07, "loss": 0.0272, "step": 16768 }, { "epoch": 2.8, "grad_norm": 0.2951005697250366, "learning_rate": 2.215880413244087e-07, "loss": 0.0215, "step": 16769 }, { "epoch": 2.8, "grad_norm": 0.40002086758613586, "learning_rate": 2.2121017670908128e-07, "loss": 0.0351, "step": 16770 }, { "epoch": 2.8, "grad_norm": 0.44478464126586914, "learning_rate": 2.2083263094359154e-07, "loss": 0.0334, "step": 16771 }, { "epoch": 2.81, "grad_norm": 0.645854651927948, "learning_rate": 2.204554040402529e-07, "loss": 0.0459, "step": 16772 }, { "epoch": 2.81, "grad_norm": 0.29998597502708435, "learning_rate": 2.2007849601136445e-07, "loss": 0.0263, "step": 16773 }, { "epoch": 2.81, "grad_norm": 0.4357847273349762, "learning_rate": 2.1970190686921634e-07, "loss": 0.0303, "step": 16774 }, { "epoch": 2.81, "grad_norm": 0.2880480885505676, "learning_rate": 2.1932563662608543e-07, "loss": 0.0197, "step": 16775 }, { "epoch": 2.81, "grad_norm": 0.36165472865104675, "learning_rate": 2.1894968529424297e-07, "loss": 0.0317, "step": 16776 }, { "epoch": 2.81, "grad_norm": 0.7174457907676697, "learning_rate": 2.1857405288594592e-07, "loss": 0.0419, "step": 16777 }, { "epoch": 2.81, "grad_norm": 0.33343738317489624, "learning_rate": 2.181987394134444e-07, "loss": 0.0354, "step": 16778 }, { "epoch": 2.81, "grad_norm": 0.35769209265708923, "learning_rate": 2.1782374488897196e-07, "loss": 0.0295, "step": 16779 }, { "epoch": 2.81, "grad_norm": 0.4351339638233185, "learning_rate": 2.1744906932475885e-07, "loss": 0.0368, "step": 16780 }, { "epoch": 2.81, "grad_norm": 0.5654183030128479, "learning_rate": 2.1707471273302304e-07, "loss": 0.0421, "step": 16781 }, { "epoch": 2.81, "grad_norm": 0.3696730136871338, "learning_rate": 2.1670067512596816e-07, "loss": 0.0439, "step": 16782 }, { "epoch": 2.81, "grad_norm": 0.41079145669937134, "learning_rate": 2.1632695651578994e-07, "loss": 0.0331, "step": 16783 }, { "epoch": 2.81, "grad_norm": 0.5408018231391907, "learning_rate": 2.1595355691467645e-07, "loss": 0.0484, "step": 16784 }, { "epoch": 2.81, "grad_norm": 0.45668646693229675, "learning_rate": 2.1558047633480127e-07, "loss": 0.0228, "step": 16785 }, { "epoch": 2.81, "grad_norm": 0.4388905167579651, "learning_rate": 2.1520771478832803e-07, "loss": 0.029, "step": 16786 }, { "epoch": 2.81, "grad_norm": 0.428642600774765, "learning_rate": 2.1483527228741364e-07, "loss": 0.0269, "step": 16787 }, { "epoch": 2.81, "grad_norm": 0.43286892771720886, "learning_rate": 2.1446314884420061e-07, "loss": 0.0425, "step": 16788 }, { "epoch": 2.81, "grad_norm": 0.5463335514068604, "learning_rate": 2.140913444708226e-07, "loss": 0.0367, "step": 16789 }, { "epoch": 2.81, "grad_norm": 0.2870953679084778, "learning_rate": 2.137198591794032e-07, "loss": 0.0234, "step": 16790 }, { "epoch": 2.81, "grad_norm": 0.37681880593299866, "learning_rate": 2.1334869298205385e-07, "loss": 0.0266, "step": 16791 }, { "epoch": 2.81, "grad_norm": 0.41476142406463623, "learning_rate": 2.1297784589087933e-07, "loss": 0.0347, "step": 16792 }, { "epoch": 2.81, "grad_norm": 0.34668537974357605, "learning_rate": 2.1260731791796995e-07, "loss": 0.0305, "step": 16793 }, { "epoch": 2.81, "grad_norm": 0.40317636728286743, "learning_rate": 2.1223710907540718e-07, "loss": 0.0233, "step": 16794 }, { "epoch": 2.81, "grad_norm": 0.4041910171508789, "learning_rate": 2.1186721937526133e-07, "loss": 0.0496, "step": 16795 }, { "epoch": 2.81, "grad_norm": 0.38742783665657043, "learning_rate": 2.114976488295961e-07, "loss": 0.0297, "step": 16796 }, { "epoch": 2.81, "grad_norm": 0.5018003582954407, "learning_rate": 2.1112839745045855e-07, "loss": 0.0332, "step": 16797 }, { "epoch": 2.81, "grad_norm": 0.5786698460578918, "learning_rate": 2.1075946524989122e-07, "loss": 0.0309, "step": 16798 }, { "epoch": 2.81, "grad_norm": 0.3728492856025696, "learning_rate": 2.1039085223992228e-07, "loss": 0.0322, "step": 16799 }, { "epoch": 2.81, "grad_norm": 0.36821478605270386, "learning_rate": 2.1002255843256992e-07, "loss": 0.0269, "step": 16800 }, { "epoch": 2.81, "grad_norm": 0.5116540193557739, "learning_rate": 2.096545838398445e-07, "loss": 0.0437, "step": 16801 }, { "epoch": 2.81, "grad_norm": 0.36037591099739075, "learning_rate": 2.09286928473742e-07, "loss": 0.039, "step": 16802 }, { "epoch": 2.81, "grad_norm": 0.40887948870658875, "learning_rate": 2.089195923462528e-07, "loss": 0.0317, "step": 16803 }, { "epoch": 2.81, "grad_norm": 0.3305712044239044, "learning_rate": 2.0855257546935403e-07, "loss": 0.0212, "step": 16804 }, { "epoch": 2.81, "grad_norm": 0.37446165084838867, "learning_rate": 2.0818587785501167e-07, "loss": 0.034, "step": 16805 }, { "epoch": 2.81, "grad_norm": 0.43862536549568176, "learning_rate": 2.078194995151839e-07, "loss": 0.0281, "step": 16806 }, { "epoch": 2.81, "grad_norm": 0.4035019278526306, "learning_rate": 2.074534404618156e-07, "loss": 0.0367, "step": 16807 }, { "epoch": 2.81, "grad_norm": 0.3469187021255493, "learning_rate": 2.070877007068417e-07, "loss": 0.0229, "step": 16808 }, { "epoch": 2.81, "grad_norm": 0.31879085302352905, "learning_rate": 2.0672228026219045e-07, "loss": 0.0247, "step": 16809 }, { "epoch": 2.81, "grad_norm": 0.31128907203674316, "learning_rate": 2.0635717913977337e-07, "loss": 0.0281, "step": 16810 }, { "epoch": 2.81, "grad_norm": 0.290103942155838, "learning_rate": 2.059923973514988e-07, "loss": 0.0245, "step": 16811 }, { "epoch": 2.81, "grad_norm": 0.41662663221359253, "learning_rate": 2.0562793490925714e-07, "loss": 0.0362, "step": 16812 }, { "epoch": 2.81, "grad_norm": 0.45734915137290955, "learning_rate": 2.0526379182493562e-07, "loss": 0.0252, "step": 16813 }, { "epoch": 2.81, "grad_norm": 0.4087533950805664, "learning_rate": 2.048999681104047e-07, "loss": 0.0336, "step": 16814 }, { "epoch": 2.81, "grad_norm": 0.4323267340660095, "learning_rate": 2.0453646377752933e-07, "loss": 0.0298, "step": 16815 }, { "epoch": 2.81, "grad_norm": 0.3782840967178345, "learning_rate": 2.041732788381612e-07, "loss": 0.0288, "step": 16816 }, { "epoch": 2.81, "grad_norm": 0.3357104957103729, "learning_rate": 2.0381041330414186e-07, "loss": 0.027, "step": 16817 }, { "epoch": 2.81, "grad_norm": 0.34508106112480164, "learning_rate": 2.0344786718730413e-07, "loss": 0.032, "step": 16818 }, { "epoch": 2.81, "grad_norm": 0.371013343334198, "learning_rate": 2.0308564049946856e-07, "loss": 0.0357, "step": 16819 }, { "epoch": 2.81, "grad_norm": 0.3030628263950348, "learning_rate": 2.0272373325244564e-07, "loss": 0.0243, "step": 16820 }, { "epoch": 2.81, "grad_norm": 0.3552708625793457, "learning_rate": 2.0236214545803822e-07, "loss": 0.0219, "step": 16821 }, { "epoch": 2.81, "grad_norm": 0.3006564974784851, "learning_rate": 2.020008771280324e-07, "loss": 0.025, "step": 16822 }, { "epoch": 2.81, "grad_norm": 0.31555402278900146, "learning_rate": 2.016399282742121e-07, "loss": 0.0254, "step": 16823 }, { "epoch": 2.81, "grad_norm": 0.41520172357559204, "learning_rate": 2.0127929890834342e-07, "loss": 0.0281, "step": 16824 }, { "epoch": 2.81, "grad_norm": 0.3751516044139862, "learning_rate": 2.0091898904218477e-07, "loss": 0.0278, "step": 16825 }, { "epoch": 2.81, "grad_norm": 0.4469178318977356, "learning_rate": 2.0055899868748784e-07, "loss": 0.0253, "step": 16826 }, { "epoch": 2.81, "grad_norm": 0.3516548275947571, "learning_rate": 2.0019932785598661e-07, "loss": 0.0235, "step": 16827 }, { "epoch": 2.81, "grad_norm": 0.4402792751789093, "learning_rate": 1.9983997655941167e-07, "loss": 0.0331, "step": 16828 }, { "epoch": 2.81, "grad_norm": 0.40873220562934875, "learning_rate": 1.994809448094792e-07, "loss": 0.0324, "step": 16829 }, { "epoch": 2.81, "grad_norm": 0.7065628170967102, "learning_rate": 1.9912223261789542e-07, "loss": 0.0302, "step": 16830 }, { "epoch": 2.82, "grad_norm": 0.3174642026424408, "learning_rate": 1.987638399963565e-07, "loss": 0.0294, "step": 16831 }, { "epoch": 2.82, "grad_norm": 0.4459076225757599, "learning_rate": 1.9840576695654868e-07, "loss": 0.0314, "step": 16832 }, { "epoch": 2.82, "grad_norm": 0.3314202129840851, "learning_rate": 1.9804801351014703e-07, "loss": 0.0218, "step": 16833 }, { "epoch": 2.82, "grad_norm": 0.42345428466796875, "learning_rate": 1.9769057966881777e-07, "loss": 0.0314, "step": 16834 }, { "epoch": 2.82, "grad_norm": 0.44431403279304504, "learning_rate": 1.9733346544421383e-07, "loss": 0.0421, "step": 16835 }, { "epoch": 2.82, "grad_norm": 0.33471500873565674, "learning_rate": 1.9697667084797921e-07, "loss": 0.0283, "step": 16836 }, { "epoch": 2.82, "grad_norm": 0.3649539649486542, "learning_rate": 1.966201958917502e-07, "loss": 0.0265, "step": 16837 }, { "epoch": 2.82, "grad_norm": 0.39603495597839355, "learning_rate": 1.9626404058714855e-07, "loss": 0.0269, "step": 16838 }, { "epoch": 2.82, "grad_norm": 0.3821844458580017, "learning_rate": 1.9590820494578612e-07, "loss": 0.0353, "step": 16839 }, { "epoch": 2.82, "grad_norm": 0.39213454723358154, "learning_rate": 1.9555268897926692e-07, "loss": 0.0348, "step": 16840 }, { "epoch": 2.82, "grad_norm": 0.4335923492908478, "learning_rate": 1.9519749269918177e-07, "loss": 0.0324, "step": 16841 }, { "epoch": 2.82, "grad_norm": 0.478101909160614, "learning_rate": 1.9484261611711242e-07, "loss": 0.0351, "step": 16842 }, { "epoch": 2.82, "grad_norm": 0.44195595383644104, "learning_rate": 1.9448805924463187e-07, "loss": 0.0343, "step": 16843 }, { "epoch": 2.82, "grad_norm": 0.5374156832695007, "learning_rate": 1.9413382209329867e-07, "loss": 0.0389, "step": 16844 }, { "epoch": 2.82, "grad_norm": 0.33855101466178894, "learning_rate": 1.9377990467466468e-07, "loss": 0.0237, "step": 16845 }, { "epoch": 2.82, "grad_norm": 0.31351086497306824, "learning_rate": 1.9342630700026954e-07, "loss": 0.025, "step": 16846 }, { "epoch": 2.82, "grad_norm": 0.28545311093330383, "learning_rate": 1.9307302908164182e-07, "loss": 0.0212, "step": 16847 }, { "epoch": 2.82, "grad_norm": 0.4562014937400818, "learning_rate": 1.927200709303012e-07, "loss": 0.0375, "step": 16848 }, { "epoch": 2.82, "grad_norm": 0.31147268414497375, "learning_rate": 1.9236743255775735e-07, "loss": 0.0369, "step": 16849 }, { "epoch": 2.82, "grad_norm": 0.31645646691322327, "learning_rate": 1.9201511397550665e-07, "loss": 0.0194, "step": 16850 }, { "epoch": 2.82, "grad_norm": 0.44172990322113037, "learning_rate": 1.9166311519503765e-07, "loss": 0.0317, "step": 16851 }, { "epoch": 2.82, "grad_norm": 0.5041183829307556, "learning_rate": 1.9131143622782789e-07, "loss": 0.0261, "step": 16852 }, { "epoch": 2.82, "grad_norm": 0.42709454894065857, "learning_rate": 1.909600770853437e-07, "loss": 0.0361, "step": 16853 }, { "epoch": 2.82, "grad_norm": 0.4080745577812195, "learning_rate": 1.9060903777904372e-07, "loss": 0.0304, "step": 16854 }, { "epoch": 2.82, "grad_norm": 0.6210430264472961, "learning_rate": 1.902583183203721e-07, "loss": 0.0487, "step": 16855 }, { "epoch": 2.82, "grad_norm": 0.4868123531341553, "learning_rate": 1.8990791872076307e-07, "loss": 0.0206, "step": 16856 }, { "epoch": 2.82, "grad_norm": 0.41372916102409363, "learning_rate": 1.8955783899164527e-07, "loss": 0.0207, "step": 16857 }, { "epoch": 2.82, "grad_norm": 0.44249144196510315, "learning_rate": 1.8920807914443172e-07, "loss": 0.0233, "step": 16858 }, { "epoch": 2.82, "grad_norm": 0.44326454401016235, "learning_rate": 1.888586391905256e-07, "loss": 0.0229, "step": 16859 }, { "epoch": 2.82, "grad_norm": 0.4093911349773407, "learning_rate": 1.8850951914132442e-07, "loss": 0.0435, "step": 16860 }, { "epoch": 2.82, "grad_norm": 0.3907586336135864, "learning_rate": 1.8816071900820688e-07, "loss": 0.0327, "step": 16861 }, { "epoch": 2.82, "grad_norm": 0.35391107201576233, "learning_rate": 1.8781223880255051e-07, "loss": 0.0257, "step": 16862 }, { "epoch": 2.82, "grad_norm": 0.3925389349460602, "learning_rate": 1.8746407853571624e-07, "loss": 0.0294, "step": 16863 }, { "epoch": 2.82, "grad_norm": 0.3267399072647095, "learning_rate": 1.871162382190539e-07, "loss": 0.0255, "step": 16864 }, { "epoch": 2.82, "grad_norm": 0.29882246255874634, "learning_rate": 1.867687178639088e-07, "loss": 0.0293, "step": 16865 }, { "epoch": 2.82, "grad_norm": 0.5022107362747192, "learning_rate": 1.8642151748161197e-07, "loss": 0.0258, "step": 16866 }, { "epoch": 2.82, "grad_norm": 0.599594235420227, "learning_rate": 1.8607463708348096e-07, "loss": 0.0362, "step": 16867 }, { "epoch": 2.82, "grad_norm": 0.36333170533180237, "learning_rate": 1.8572807668083004e-07, "loss": 0.0352, "step": 16868 }, { "epoch": 2.82, "grad_norm": 0.4350453317165375, "learning_rate": 1.8538183628495686e-07, "loss": 0.0376, "step": 16869 }, { "epoch": 2.82, "grad_norm": 0.38442325592041016, "learning_rate": 1.850359159071524e-07, "loss": 0.0277, "step": 16870 }, { "epoch": 2.82, "grad_norm": 0.31818315386772156, "learning_rate": 1.8469031555869544e-07, "loss": 0.0325, "step": 16871 }, { "epoch": 2.82, "grad_norm": 0.4426862895488739, "learning_rate": 1.8434503525085468e-07, "loss": 0.0283, "step": 16872 }, { "epoch": 2.82, "grad_norm": 0.44610631465911865, "learning_rate": 1.840000749948878e-07, "loss": 0.0357, "step": 16873 }, { "epoch": 2.82, "grad_norm": 0.2283787727355957, "learning_rate": 1.8365543480204472e-07, "loss": 0.0133, "step": 16874 }, { "epoch": 2.82, "grad_norm": 0.4198203980922699, "learning_rate": 1.8331111468355978e-07, "loss": 0.0277, "step": 16875 }, { "epoch": 2.82, "grad_norm": 0.38449808955192566, "learning_rate": 1.8296711465066174e-07, "loss": 0.0327, "step": 16876 }, { "epoch": 2.82, "grad_norm": 0.4645583927631378, "learning_rate": 1.8262343471456723e-07, "loss": 0.0368, "step": 16877 }, { "epoch": 2.82, "grad_norm": 0.3583105802536011, "learning_rate": 1.822800748864817e-07, "loss": 0.0242, "step": 16878 }, { "epoch": 2.82, "grad_norm": 0.3590085208415985, "learning_rate": 1.8193703517760287e-07, "loss": 0.0167, "step": 16879 }, { "epoch": 2.82, "grad_norm": 0.4950348734855652, "learning_rate": 1.815943155991129e-07, "loss": 0.0294, "step": 16880 }, { "epoch": 2.82, "grad_norm": 0.45640939474105835, "learning_rate": 1.8125191616218841e-07, "loss": 0.0402, "step": 16881 }, { "epoch": 2.82, "grad_norm": 0.37394803762435913, "learning_rate": 1.8090983687799379e-07, "loss": 0.0357, "step": 16882 }, { "epoch": 2.82, "grad_norm": 0.32901063561439514, "learning_rate": 1.8056807775768237e-07, "loss": 0.0373, "step": 16883 }, { "epoch": 2.82, "grad_norm": 0.7380484342575073, "learning_rate": 1.8022663881239743e-07, "loss": 0.0278, "step": 16884 }, { "epoch": 2.82, "grad_norm": 0.29631176590919495, "learning_rate": 1.7988552005327232e-07, "loss": 0.0199, "step": 16885 }, { "epoch": 2.82, "grad_norm": 0.3847375810146332, "learning_rate": 1.7954472149143142e-07, "loss": 0.0231, "step": 16886 }, { "epoch": 2.82, "grad_norm": 0.43685296177864075, "learning_rate": 1.7920424313798368e-07, "loss": 0.0272, "step": 16887 }, { "epoch": 2.82, "grad_norm": 0.36707451939582825, "learning_rate": 1.7886408500403351e-07, "loss": 0.0265, "step": 16888 }, { "epoch": 2.82, "grad_norm": 0.4919009208679199, "learning_rate": 1.7852424710066985e-07, "loss": 0.0351, "step": 16889 }, { "epoch": 2.82, "grad_norm": 0.285949170589447, "learning_rate": 1.7818472943897602e-07, "loss": 0.0171, "step": 16890 }, { "epoch": 2.83, "grad_norm": 0.5822529196739197, "learning_rate": 1.7784553203001987e-07, "loss": 0.0387, "step": 16891 }, { "epoch": 2.83, "grad_norm": 0.44263869524002075, "learning_rate": 1.7750665488486363e-07, "loss": 0.0345, "step": 16892 }, { "epoch": 2.83, "grad_norm": 0.3480839431285858, "learning_rate": 1.7716809801455626e-07, "loss": 0.0284, "step": 16893 }, { "epoch": 2.83, "grad_norm": 0.41866376996040344, "learning_rate": 1.768298614301367e-07, "loss": 0.0268, "step": 16894 }, { "epoch": 2.83, "grad_norm": 0.3020963668823242, "learning_rate": 1.764919451426317e-07, "loss": 0.0209, "step": 16895 }, { "epoch": 2.83, "grad_norm": 0.46287232637405396, "learning_rate": 1.761543491630624e-07, "loss": 0.033, "step": 16896 }, { "epoch": 2.83, "grad_norm": 0.41565340757369995, "learning_rate": 1.758170735024356e-07, "loss": 0.0214, "step": 16897 }, { "epoch": 2.83, "grad_norm": 0.34446626901626587, "learning_rate": 1.7548011817174693e-07, "loss": 0.0216, "step": 16898 }, { "epoch": 2.83, "grad_norm": 0.7370203733444214, "learning_rate": 1.7514348318198537e-07, "loss": 0.0344, "step": 16899 }, { "epoch": 2.83, "grad_norm": 0.4364195466041565, "learning_rate": 1.748071685441266e-07, "loss": 0.0292, "step": 16900 }, { "epoch": 2.83, "grad_norm": 0.37333500385284424, "learning_rate": 1.744711742691352e-07, "loss": 0.033, "step": 16901 }, { "epoch": 2.83, "grad_norm": 0.40377277135849, "learning_rate": 1.7413550036797012e-07, "loss": 0.0322, "step": 16902 }, { "epoch": 2.83, "grad_norm": 0.3234192132949829, "learning_rate": 1.7380014685157265e-07, "loss": 0.0236, "step": 16903 }, { "epoch": 2.83, "grad_norm": 0.714099645614624, "learning_rate": 1.734651137308796e-07, "loss": 0.0386, "step": 16904 }, { "epoch": 2.83, "grad_norm": 0.6399874091148376, "learning_rate": 1.7313040101681555e-07, "loss": 0.0399, "step": 16905 }, { "epoch": 2.83, "grad_norm": 0.38063880801200867, "learning_rate": 1.727960087202918e-07, "loss": 0.0314, "step": 16906 }, { "epoch": 2.83, "grad_norm": 0.6902650594711304, "learning_rate": 1.7246193685221402e-07, "loss": 0.0432, "step": 16907 }, { "epoch": 2.83, "grad_norm": 0.35218241810798645, "learning_rate": 1.7212818542347354e-07, "loss": 0.027, "step": 16908 }, { "epoch": 2.83, "grad_norm": 0.464197039604187, "learning_rate": 1.7179475444495386e-07, "loss": 0.038, "step": 16909 }, { "epoch": 2.83, "grad_norm": 0.2802610397338867, "learning_rate": 1.714616439275274e-07, "loss": 0.0191, "step": 16910 }, { "epoch": 2.83, "grad_norm": 0.3186512291431427, "learning_rate": 1.7112885388205325e-07, "loss": 0.0357, "step": 16911 }, { "epoch": 2.83, "grad_norm": 0.38924410939216614, "learning_rate": 1.7079638431938384e-07, "loss": 0.0317, "step": 16912 }, { "epoch": 2.83, "grad_norm": 0.38885554671287537, "learning_rate": 1.704642352503616e-07, "loss": 0.0303, "step": 16913 }, { "epoch": 2.83, "grad_norm": 0.2905014157295227, "learning_rate": 1.7013240668581232e-07, "loss": 0.0199, "step": 16914 }, { "epoch": 2.83, "grad_norm": 0.28296801447868347, "learning_rate": 1.698008986365607e-07, "loss": 0.0309, "step": 16915 }, { "epoch": 2.83, "grad_norm": 0.3463268280029297, "learning_rate": 1.6946971111341136e-07, "loss": 0.026, "step": 16916 }, { "epoch": 2.83, "grad_norm": 0.3630830943584442, "learning_rate": 1.6913884412716575e-07, "loss": 0.0298, "step": 16917 }, { "epoch": 2.83, "grad_norm": 0.4174242615699768, "learning_rate": 1.6880829768861296e-07, "loss": 0.0371, "step": 16918 }, { "epoch": 2.83, "grad_norm": 0.31582850217819214, "learning_rate": 1.6847807180852993e-07, "loss": 0.0344, "step": 16919 }, { "epoch": 2.83, "grad_norm": 0.369167298078537, "learning_rate": 1.681481664976814e-07, "loss": 0.0358, "step": 16920 }, { "epoch": 2.83, "grad_norm": 0.47697198390960693, "learning_rate": 1.6781858176682874e-07, "loss": 0.0253, "step": 16921 }, { "epoch": 2.83, "grad_norm": 0.4280813932418823, "learning_rate": 1.6748931762671672e-07, "loss": 0.0288, "step": 16922 }, { "epoch": 2.83, "grad_norm": 0.49339333176612854, "learning_rate": 1.6716037408808005e-07, "loss": 0.0406, "step": 16923 }, { "epoch": 2.83, "grad_norm": 0.5704135298728943, "learning_rate": 1.6683175116164575e-07, "loss": 0.0467, "step": 16924 }, { "epoch": 2.83, "grad_norm": 0.40863481163978577, "learning_rate": 1.6650344885812853e-07, "loss": 0.0309, "step": 16925 }, { "epoch": 2.83, "grad_norm": 0.5256161689758301, "learning_rate": 1.6617546718823318e-07, "loss": 0.0235, "step": 16926 }, { "epoch": 2.83, "grad_norm": 0.45039552450180054, "learning_rate": 1.658478061626556e-07, "loss": 0.0431, "step": 16927 }, { "epoch": 2.83, "grad_norm": 0.3258436322212219, "learning_rate": 1.6552046579207613e-07, "loss": 0.0275, "step": 16928 }, { "epoch": 2.83, "grad_norm": 0.4057222306728363, "learning_rate": 1.651934460871707e-07, "loss": 0.0212, "step": 16929 }, { "epoch": 2.83, "grad_norm": 0.3823463022708893, "learning_rate": 1.6486674705860295e-07, "loss": 0.0196, "step": 16930 }, { "epoch": 2.83, "grad_norm": 0.47474417090415955, "learning_rate": 1.6454036871702218e-07, "loss": 0.0433, "step": 16931 }, { "epoch": 2.83, "grad_norm": 0.3061448335647583, "learning_rate": 1.6421431107307206e-07, "loss": 0.0244, "step": 16932 }, { "epoch": 2.83, "grad_norm": 0.4063420593738556, "learning_rate": 1.6388857413738523e-07, "loss": 0.0467, "step": 16933 }, { "epoch": 2.83, "grad_norm": 0.3357156217098236, "learning_rate": 1.6356315792058098e-07, "loss": 0.0346, "step": 16934 }, { "epoch": 2.83, "grad_norm": 0.3831001818180084, "learning_rate": 1.6323806243327078e-07, "loss": 0.0284, "step": 16935 }, { "epoch": 2.83, "grad_norm": 0.5312135219573975, "learning_rate": 1.6291328768605508e-07, "loss": 0.036, "step": 16936 }, { "epoch": 2.83, "grad_norm": 0.4525306224822998, "learning_rate": 1.6258883368952204e-07, "loss": 0.0364, "step": 16937 }, { "epoch": 2.83, "grad_norm": 0.3986339867115021, "learning_rate": 1.6226470045425324e-07, "loss": 0.029, "step": 16938 }, { "epoch": 2.83, "grad_norm": 0.4435954689979553, "learning_rate": 1.6194088799081575e-07, "loss": 0.0362, "step": 16939 }, { "epoch": 2.83, "grad_norm": 0.4877103269100189, "learning_rate": 1.616173963097678e-07, "loss": 0.0265, "step": 16940 }, { "epoch": 2.83, "grad_norm": 0.4240696430206299, "learning_rate": 1.6129422542165873e-07, "loss": 0.0332, "step": 16941 }, { "epoch": 2.83, "grad_norm": 0.3100457787513733, "learning_rate": 1.6097137533702345e-07, "loss": 0.0285, "step": 16942 }, { "epoch": 2.83, "grad_norm": 0.3674861788749695, "learning_rate": 1.6064884606639242e-07, "loss": 0.0298, "step": 16943 }, { "epoch": 2.83, "grad_norm": 0.3646095395088196, "learning_rate": 1.6032663762027946e-07, "loss": 0.0311, "step": 16944 }, { "epoch": 2.83, "grad_norm": 0.3248507082462311, "learning_rate": 1.6000475000919057e-07, "loss": 0.0222, "step": 16945 }, { "epoch": 2.83, "grad_norm": 0.4254169762134552, "learning_rate": 1.5968318324362186e-07, "loss": 0.0324, "step": 16946 }, { "epoch": 2.83, "grad_norm": 0.35677653551101685, "learning_rate": 1.5936193733405937e-07, "loss": 0.0316, "step": 16947 }, { "epoch": 2.83, "grad_norm": 0.3805919885635376, "learning_rate": 1.590410122909758e-07, "loss": 0.0349, "step": 16948 }, { "epoch": 2.83, "grad_norm": 0.37499290704727173, "learning_rate": 1.5872040812483726e-07, "loss": 0.0332, "step": 16949 }, { "epoch": 2.83, "grad_norm": 0.48164424300193787, "learning_rate": 1.5840012484609757e-07, "loss": 0.0202, "step": 16950 }, { "epoch": 2.84, "grad_norm": 0.3897407054901123, "learning_rate": 1.580801624651973e-07, "loss": 0.0343, "step": 16951 }, { "epoch": 2.84, "grad_norm": 0.45336705446243286, "learning_rate": 1.5776052099257254e-07, "loss": 0.0417, "step": 16952 }, { "epoch": 2.84, "grad_norm": 0.5192499756813049, "learning_rate": 1.5744120043864276e-07, "loss": 0.0272, "step": 16953 }, { "epoch": 2.84, "grad_norm": 0.2364431768655777, "learning_rate": 1.571222008138218e-07, "loss": 0.0179, "step": 16954 }, { "epoch": 2.84, "grad_norm": 0.4748457670211792, "learning_rate": 1.5680352212850913e-07, "loss": 0.0308, "step": 16955 }, { "epoch": 2.84, "grad_norm": 0.7595564126968384, "learning_rate": 1.564851643930976e-07, "loss": 0.0317, "step": 16956 }, { "epoch": 2.84, "grad_norm": 0.340865820646286, "learning_rate": 1.561671276179666e-07, "loss": 0.0322, "step": 16957 }, { "epoch": 2.84, "grad_norm": 0.4862571060657501, "learning_rate": 1.558494118134879e-07, "loss": 0.0368, "step": 16958 }, { "epoch": 2.84, "grad_norm": 0.35303157567977905, "learning_rate": 1.5553201699001764e-07, "loss": 0.0242, "step": 16959 }, { "epoch": 2.84, "grad_norm": 0.3248273432254791, "learning_rate": 1.5521494315790863e-07, "loss": 0.0348, "step": 16960 }, { "epoch": 2.84, "grad_norm": 0.6062798500061035, "learning_rate": 1.548981903274971e-07, "loss": 0.0518, "step": 16961 }, { "epoch": 2.84, "grad_norm": 0.3802407681941986, "learning_rate": 1.545817585091103e-07, "loss": 0.0255, "step": 16962 }, { "epoch": 2.84, "grad_norm": 0.4285738170146942, "learning_rate": 1.5426564771306773e-07, "loss": 0.0218, "step": 16963 }, { "epoch": 2.84, "grad_norm": 0.42129337787628174, "learning_rate": 1.5394985794967787e-07, "loss": 0.0369, "step": 16964 }, { "epoch": 2.84, "grad_norm": 0.46608564257621765, "learning_rate": 1.5363438922923358e-07, "loss": 0.0269, "step": 16965 }, { "epoch": 2.84, "grad_norm": 0.37290045619010925, "learning_rate": 1.533192415620255e-07, "loss": 0.0323, "step": 16966 }, { "epoch": 2.84, "grad_norm": 0.3514188230037689, "learning_rate": 1.5300441495832652e-07, "loss": 0.0234, "step": 16967 }, { "epoch": 2.84, "grad_norm": 0.4296087324619293, "learning_rate": 1.5268990942840178e-07, "loss": 0.0272, "step": 16968 }, { "epoch": 2.84, "grad_norm": 0.4273790121078491, "learning_rate": 1.5237572498250752e-07, "loss": 0.0274, "step": 16969 }, { "epoch": 2.84, "grad_norm": 0.33987632393836975, "learning_rate": 1.5206186163088666e-07, "loss": 0.0187, "step": 16970 }, { "epoch": 2.84, "grad_norm": 0.41636621952056885, "learning_rate": 1.5174831938377544e-07, "loss": 0.0394, "step": 16971 }, { "epoch": 2.84, "grad_norm": 0.47019898891448975, "learning_rate": 1.5143509825139458e-07, "loss": 0.0366, "step": 16972 }, { "epoch": 2.84, "grad_norm": 0.3726765811443329, "learning_rate": 1.5112219824395925e-07, "loss": 0.0238, "step": 16973 }, { "epoch": 2.84, "grad_norm": 0.48030000925064087, "learning_rate": 1.5080961937167126e-07, "loss": 0.0331, "step": 16974 }, { "epoch": 2.84, "grad_norm": 0.41221436858177185, "learning_rate": 1.504973616447225e-07, "loss": 0.0255, "step": 16975 }, { "epoch": 2.84, "grad_norm": 0.4323124587535858, "learning_rate": 1.501854250732937e-07, "loss": 0.0285, "step": 16976 }, { "epoch": 2.84, "grad_norm": 0.6752203106880188, "learning_rate": 1.4987380966755781e-07, "loss": 0.0532, "step": 16977 }, { "epoch": 2.84, "grad_norm": 0.355499804019928, "learning_rate": 1.4956251543767454e-07, "loss": 0.0246, "step": 16978 }, { "epoch": 2.84, "grad_norm": 0.5249778032302856, "learning_rate": 1.4925154239379237e-07, "loss": 0.0267, "step": 16979 }, { "epoch": 2.84, "grad_norm": 0.4346354305744171, "learning_rate": 1.489408905460521e-07, "loss": 0.0394, "step": 16980 }, { "epoch": 2.84, "grad_norm": 0.3854617476463318, "learning_rate": 1.4863055990458562e-07, "loss": 0.0306, "step": 16981 }, { "epoch": 2.84, "grad_norm": 0.3525148332118988, "learning_rate": 1.483205504795071e-07, "loss": 0.0312, "step": 16982 }, { "epoch": 2.84, "grad_norm": 0.3663977384567261, "learning_rate": 1.4801086228092842e-07, "loss": 0.0413, "step": 16983 }, { "epoch": 2.84, "grad_norm": 0.2547835409641266, "learning_rate": 1.4770149531894594e-07, "loss": 0.0197, "step": 16984 }, { "epoch": 2.84, "grad_norm": 0.3581576347351074, "learning_rate": 1.4739244960364606e-07, "loss": 0.0386, "step": 16985 }, { "epoch": 2.84, "grad_norm": 0.3202550709247589, "learning_rate": 1.470837251451074e-07, "loss": 0.0253, "step": 16986 }, { "epoch": 2.84, "grad_norm": 0.30303695797920227, "learning_rate": 1.4677532195339515e-07, "loss": 0.0258, "step": 16987 }, { "epoch": 2.84, "grad_norm": 0.29699674248695374, "learning_rate": 1.4646724003856473e-07, "loss": 0.0199, "step": 16988 }, { "epoch": 2.84, "grad_norm": 0.3298749625682831, "learning_rate": 1.4615947941066354e-07, "loss": 0.0226, "step": 16989 }, { "epoch": 2.84, "grad_norm": 0.4415586590766907, "learning_rate": 1.4585204007972477e-07, "loss": 0.0368, "step": 16990 }, { "epoch": 2.84, "grad_norm": 0.3862360119819641, "learning_rate": 1.4554492205577363e-07, "loss": 0.0256, "step": 16991 }, { "epoch": 2.84, "grad_norm": 0.7368948459625244, "learning_rate": 1.452381253488233e-07, "loss": 0.0446, "step": 16992 }, { "epoch": 2.84, "grad_norm": 0.4395160675048828, "learning_rate": 1.44931649968878e-07, "loss": 0.0293, "step": 16993 }, { "epoch": 2.84, "grad_norm": 0.31787779927253723, "learning_rate": 1.446254959259308e-07, "loss": 0.0205, "step": 16994 }, { "epoch": 2.84, "grad_norm": 0.46083104610443115, "learning_rate": 1.443196632299637e-07, "loss": 0.0361, "step": 16995 }, { "epoch": 2.84, "grad_norm": 0.5116653442382812, "learning_rate": 1.440141518909488e-07, "loss": 0.0216, "step": 16996 }, { "epoch": 2.84, "grad_norm": 0.4545477628707886, "learning_rate": 1.4370896191884808e-07, "loss": 0.0359, "step": 16997 }, { "epoch": 2.84, "grad_norm": 0.347192645072937, "learning_rate": 1.4340409332361248e-07, "loss": 0.0296, "step": 16998 }, { "epoch": 2.84, "grad_norm": 0.3394934833049774, "learning_rate": 1.4309954611518295e-07, "loss": 0.0332, "step": 16999 }, { "epoch": 2.84, "grad_norm": 0.37546029686927795, "learning_rate": 1.427953203034893e-07, "loss": 0.0266, "step": 17000 }, { "epoch": 2.84, "grad_norm": 0.38827595114707947, "learning_rate": 1.424914158984514e-07, "loss": 0.0363, "step": 17001 }, { "epoch": 2.84, "grad_norm": 0.4222439229488373, "learning_rate": 1.4218783290997795e-07, "loss": 0.0357, "step": 17002 }, { "epoch": 2.84, "grad_norm": 0.43891412019729614, "learning_rate": 1.4188457134796773e-07, "loss": 0.0314, "step": 17003 }, { "epoch": 2.84, "grad_norm": 0.57561194896698, "learning_rate": 1.415816312223084e-07, "loss": 0.0477, "step": 17004 }, { "epoch": 2.84, "grad_norm": 0.3462217450141907, "learning_rate": 1.412790125428798e-07, "loss": 0.038, "step": 17005 }, { "epoch": 2.84, "grad_norm": 0.33625614643096924, "learning_rate": 1.4097671531954848e-07, "loss": 0.0242, "step": 17006 }, { "epoch": 2.84, "grad_norm": 0.63422030210495, "learning_rate": 1.4067473956216993e-07, "loss": 0.0435, "step": 17007 }, { "epoch": 2.84, "grad_norm": 0.5810681581497192, "learning_rate": 1.4037308528059067e-07, "loss": 0.0376, "step": 17008 }, { "epoch": 2.84, "grad_norm": 0.5055763125419617, "learning_rate": 1.4007175248464844e-07, "loss": 0.0459, "step": 17009 }, { "epoch": 2.84, "grad_norm": 0.3468896746635437, "learning_rate": 1.3977074118416535e-07, "loss": 0.0237, "step": 17010 }, { "epoch": 2.85, "grad_norm": 0.4566945731639862, "learning_rate": 1.3947005138895908e-07, "loss": 0.04, "step": 17011 }, { "epoch": 2.85, "grad_norm": 0.5259995460510254, "learning_rate": 1.3916968310883183e-07, "loss": 0.0393, "step": 17012 }, { "epoch": 2.85, "grad_norm": 0.6333020329475403, "learning_rate": 1.388696363535802e-07, "loss": 0.0438, "step": 17013 }, { "epoch": 2.85, "grad_norm": 0.7180020809173584, "learning_rate": 1.3856991113298634e-07, "loss": 0.0349, "step": 17014 }, { "epoch": 2.85, "grad_norm": 0.42019712924957275, "learning_rate": 1.3827050745682135e-07, "loss": 0.0319, "step": 17015 }, { "epoch": 2.85, "grad_norm": 0.4106714129447937, "learning_rate": 1.3797142533484964e-07, "loss": 0.0266, "step": 17016 }, { "epoch": 2.85, "grad_norm": 0.3470897972583771, "learning_rate": 1.3767266477682338e-07, "loss": 0.0221, "step": 17017 }, { "epoch": 2.85, "grad_norm": 0.3530905544757843, "learning_rate": 1.3737422579248261e-07, "loss": 0.0301, "step": 17018 }, { "epoch": 2.85, "grad_norm": 0.38888266682624817, "learning_rate": 1.3707610839155948e-07, "loss": 0.0252, "step": 17019 }, { "epoch": 2.85, "grad_norm": 0.4766579568386078, "learning_rate": 1.3677831258377405e-07, "loss": 0.042, "step": 17020 }, { "epoch": 2.85, "grad_norm": 0.4292801320552826, "learning_rate": 1.364808383788363e-07, "loss": 0.0365, "step": 17021 }, { "epoch": 2.85, "grad_norm": 0.31166326999664307, "learning_rate": 1.3618368578644626e-07, "loss": 0.0214, "step": 17022 }, { "epoch": 2.85, "grad_norm": 0.5320195555686951, "learning_rate": 1.3588685481629172e-07, "loss": 0.0448, "step": 17023 }, { "epoch": 2.85, "grad_norm": 0.5362128615379333, "learning_rate": 1.355903454780527e-07, "loss": 0.0392, "step": 17024 }, { "epoch": 2.85, "grad_norm": 0.23600561916828156, "learning_rate": 1.3529415778139597e-07, "loss": 0.016, "step": 17025 }, { "epoch": 2.85, "grad_norm": 0.42935654520988464, "learning_rate": 1.3499829173598044e-07, "loss": 0.027, "step": 17026 }, { "epoch": 2.85, "grad_norm": 0.26895466446876526, "learning_rate": 1.3470274735145173e-07, "loss": 0.0204, "step": 17027 }, { "epoch": 2.85, "grad_norm": 0.31318849325180054, "learning_rate": 1.3440752463744766e-07, "loss": 0.0318, "step": 17028 }, { "epoch": 2.85, "grad_norm": 0.5879932045936584, "learning_rate": 1.341126236035939e-07, "loss": 0.0431, "step": 17029 }, { "epoch": 2.85, "grad_norm": 0.3900046646595001, "learning_rate": 1.3381804425950605e-07, "loss": 0.0284, "step": 17030 }, { "epoch": 2.85, "grad_norm": 0.4034939110279083, "learning_rate": 1.3352378661478982e-07, "loss": 0.0283, "step": 17031 }, { "epoch": 2.85, "grad_norm": 0.36365634202957153, "learning_rate": 1.3322985067903748e-07, "loss": 0.0303, "step": 17032 }, { "epoch": 2.85, "grad_norm": 0.4679376780986786, "learning_rate": 1.3293623646183696e-07, "loss": 0.0382, "step": 17033 }, { "epoch": 2.85, "grad_norm": 0.4617100954055786, "learning_rate": 1.3264294397275946e-07, "loss": 0.0303, "step": 17034 }, { "epoch": 2.85, "grad_norm": 0.34656333923339844, "learning_rate": 1.3234997322136732e-07, "loss": 0.02, "step": 17035 }, { "epoch": 2.85, "grad_norm": 0.42173635959625244, "learning_rate": 1.3205732421721518e-07, "loss": 0.037, "step": 17036 }, { "epoch": 2.85, "grad_norm": 0.2824268043041229, "learning_rate": 1.3176499696984534e-07, "loss": 0.0242, "step": 17037 }, { "epoch": 2.85, "grad_norm": 0.2993510067462921, "learning_rate": 1.31472991488788e-07, "loss": 0.0311, "step": 17038 }, { "epoch": 2.85, "grad_norm": 0.4734426438808441, "learning_rate": 1.3118130778356553e-07, "loss": 0.0263, "step": 17039 }, { "epoch": 2.85, "grad_norm": 0.3525497019290924, "learning_rate": 1.308899458636881e-07, "loss": 0.026, "step": 17040 }, { "epoch": 2.85, "grad_norm": 0.35770222544670105, "learning_rate": 1.3059890573865698e-07, "loss": 0.0371, "step": 17041 }, { "epoch": 2.85, "grad_norm": 0.4161223769187927, "learning_rate": 1.3030818741796014e-07, "loss": 0.0246, "step": 17042 }, { "epoch": 2.85, "grad_norm": 0.3578408360481262, "learning_rate": 1.300177909110778e-07, "loss": 0.0245, "step": 17043 }, { "epoch": 2.85, "grad_norm": 0.29530608654022217, "learning_rate": 1.2972771622747793e-07, "loss": 0.0172, "step": 17044 }, { "epoch": 2.85, "grad_norm": 0.3715173304080963, "learning_rate": 1.2943796337661962e-07, "loss": 0.0457, "step": 17045 }, { "epoch": 2.85, "grad_norm": 0.38294467329978943, "learning_rate": 1.2914853236795088e-07, "loss": 0.0304, "step": 17046 }, { "epoch": 2.85, "grad_norm": 0.3587334454059601, "learning_rate": 1.288594232109086e-07, "loss": 0.032, "step": 17047 }, { "epoch": 2.85, "grad_norm": 0.42130792140960693, "learning_rate": 1.285706359149197e-07, "loss": 0.0343, "step": 17048 }, { "epoch": 2.85, "grad_norm": 0.3245980739593506, "learning_rate": 1.2828217048939996e-07, "loss": 0.0271, "step": 17049 }, { "epoch": 2.85, "grad_norm": 0.49937477707862854, "learning_rate": 1.2799402694375518e-07, "loss": 0.0438, "step": 17050 }, { "epoch": 2.85, "grad_norm": 0.3450313210487366, "learning_rate": 1.277062052873812e-07, "loss": 0.025, "step": 17051 }, { "epoch": 2.85, "grad_norm": 0.4062444865703583, "learning_rate": 1.274187055296616e-07, "loss": 0.031, "step": 17052 }, { "epoch": 2.85, "grad_norm": 0.4193091094493866, "learning_rate": 1.271315276799734e-07, "loss": 0.0464, "step": 17053 }, { "epoch": 2.85, "grad_norm": 0.3696199655532837, "learning_rate": 1.268446717476768e-07, "loss": 0.0303, "step": 17054 }, { "epoch": 2.85, "grad_norm": 0.45075029134750366, "learning_rate": 1.2655813774212766e-07, "loss": 0.0436, "step": 17055 }, { "epoch": 2.85, "grad_norm": 0.39645707607269287, "learning_rate": 1.2627192567266854e-07, "loss": 0.0236, "step": 17056 }, { "epoch": 2.85, "grad_norm": 0.5370770692825317, "learning_rate": 1.2598603554863086e-07, "loss": 0.0498, "step": 17057 }, { "epoch": 2.85, "grad_norm": 0.4013367295265198, "learning_rate": 1.2570046737933716e-07, "loss": 0.0303, "step": 17058 }, { "epoch": 2.85, "grad_norm": 0.2969774007797241, "learning_rate": 1.2541522117409777e-07, "loss": 0.0352, "step": 17059 }, { "epoch": 2.85, "grad_norm": 0.40889593958854675, "learning_rate": 1.25130296942213e-07, "loss": 0.0266, "step": 17060 }, { "epoch": 2.85, "grad_norm": 0.4435938894748688, "learning_rate": 1.248456946929766e-07, "loss": 0.0226, "step": 17061 }, { "epoch": 2.85, "grad_norm": 0.43912383913993835, "learning_rate": 1.2456141443566549e-07, "loss": 0.0362, "step": 17062 }, { "epoch": 2.85, "grad_norm": 0.31986090540885925, "learning_rate": 1.2427745617954788e-07, "loss": 0.0257, "step": 17063 }, { "epoch": 2.85, "grad_norm": 0.34305503964424133, "learning_rate": 1.2399381993388638e-07, "loss": 0.0241, "step": 17064 }, { "epoch": 2.85, "grad_norm": 0.25855982303619385, "learning_rate": 1.2371050570792686e-07, "loss": 0.0217, "step": 17065 }, { "epoch": 2.85, "grad_norm": 0.4057812988758087, "learning_rate": 1.2342751351090642e-07, "loss": 0.0236, "step": 17066 }, { "epoch": 2.85, "grad_norm": 0.6852716207504272, "learning_rate": 1.2314484335205323e-07, "loss": 0.0346, "step": 17067 }, { "epoch": 2.85, "grad_norm": 0.42565980553627014, "learning_rate": 1.2286249524058547e-07, "loss": 0.0301, "step": 17068 }, { "epoch": 2.85, "grad_norm": 0.34089070558547974, "learning_rate": 1.22580469185708e-07, "loss": 0.0358, "step": 17069 }, { "epoch": 2.85, "grad_norm": 0.3191874921321869, "learning_rate": 1.2229876519661786e-07, "loss": 0.0216, "step": 17070 }, { "epoch": 2.86, "grad_norm": 0.41074392199516296, "learning_rate": 1.2201738328249778e-07, "loss": 0.0312, "step": 17071 }, { "epoch": 2.86, "grad_norm": 0.40425530076026917, "learning_rate": 1.2173632345252484e-07, "loss": 0.0301, "step": 17072 }, { "epoch": 2.86, "grad_norm": 0.33889031410217285, "learning_rate": 1.2145558571586392e-07, "loss": 0.027, "step": 17073 }, { "epoch": 2.86, "grad_norm": 0.4447772800922394, "learning_rate": 1.2117517008166658e-07, "loss": 0.0399, "step": 17074 }, { "epoch": 2.86, "grad_norm": 0.4332542419433594, "learning_rate": 1.2089507655907773e-07, "loss": 0.0322, "step": 17075 }, { "epoch": 2.86, "grad_norm": 0.3683388829231262, "learning_rate": 1.2061530515722898e-07, "loss": 0.0271, "step": 17076 }, { "epoch": 2.86, "grad_norm": 0.3390031158924103, "learning_rate": 1.20335855885243e-07, "loss": 0.0198, "step": 17077 }, { "epoch": 2.86, "grad_norm": 0.5301856398582458, "learning_rate": 1.200567287522325e-07, "loss": 0.0313, "step": 17078 }, { "epoch": 2.86, "grad_norm": 0.2848810851573944, "learning_rate": 1.1977792376729913e-07, "loss": 0.0192, "step": 17079 }, { "epoch": 2.86, "grad_norm": 0.2643212378025055, "learning_rate": 1.1949944093953115e-07, "loss": 0.0207, "step": 17080 }, { "epoch": 2.86, "grad_norm": 0.42681148648262024, "learning_rate": 1.1922128027801126e-07, "loss": 0.0284, "step": 17081 }, { "epoch": 2.86, "grad_norm": 0.46402910351753235, "learning_rate": 1.1894344179180784e-07, "loss": 0.0374, "step": 17082 }, { "epoch": 2.86, "grad_norm": 1.1009992361068726, "learning_rate": 1.1866592548998135e-07, "loss": 0.0447, "step": 17083 }, { "epoch": 2.86, "grad_norm": 0.31425774097442627, "learning_rate": 1.1838873138157903e-07, "loss": 0.0219, "step": 17084 }, { "epoch": 2.86, "grad_norm": 0.40864259004592896, "learning_rate": 1.1811185947564141e-07, "loss": 0.0271, "step": 17085 }, { "epoch": 2.86, "grad_norm": 0.2735680341720581, "learning_rate": 1.1783530978119462e-07, "loss": 0.0194, "step": 17086 }, { "epoch": 2.86, "grad_norm": 0.40762653946876526, "learning_rate": 1.1755908230725587e-07, "loss": 0.0255, "step": 17087 }, { "epoch": 2.86, "grad_norm": 0.3196158707141876, "learning_rate": 1.1728317706283133e-07, "loss": 0.0294, "step": 17088 }, { "epoch": 2.86, "grad_norm": 0.3647189736366272, "learning_rate": 1.1700759405691931e-07, "loss": 0.0344, "step": 17089 }, { "epoch": 2.86, "grad_norm": 0.4031493663787842, "learning_rate": 1.1673233329850486e-07, "loss": 0.0389, "step": 17090 }, { "epoch": 2.86, "grad_norm": 0.3485957384109497, "learning_rate": 1.1645739479656193e-07, "loss": 0.019, "step": 17091 }, { "epoch": 2.86, "grad_norm": 0.42004629969596863, "learning_rate": 1.1618277856005556e-07, "loss": 0.0307, "step": 17092 }, { "epoch": 2.86, "grad_norm": 0.6533069014549255, "learning_rate": 1.159084845979408e-07, "loss": 0.0317, "step": 17093 }, { "epoch": 2.86, "grad_norm": 0.7473241686820984, "learning_rate": 1.156345129191605e-07, "loss": 0.0211, "step": 17094 }, { "epoch": 2.86, "grad_norm": 0.3238775432109833, "learning_rate": 1.1536086353264975e-07, "loss": 0.033, "step": 17095 }, { "epoch": 2.86, "grad_norm": 0.32120418548583984, "learning_rate": 1.1508753644732807e-07, "loss": 0.0204, "step": 17096 }, { "epoch": 2.86, "grad_norm": 0.30705320835113525, "learning_rate": 1.1481453167211054e-07, "loss": 0.0235, "step": 17097 }, { "epoch": 2.86, "grad_norm": 0.39791446924209595, "learning_rate": 1.1454184921589783e-07, "loss": 0.0366, "step": 17098 }, { "epoch": 2.86, "grad_norm": 0.4841434955596924, "learning_rate": 1.1426948908758062e-07, "loss": 0.0366, "step": 17099 }, { "epoch": 2.86, "grad_norm": 0.6330339908599854, "learning_rate": 1.1399745129603956e-07, "loss": 0.0302, "step": 17100 }, { "epoch": 2.86, "grad_norm": 0.3406081199645996, "learning_rate": 1.1372573585014534e-07, "loss": 0.0221, "step": 17101 }, { "epoch": 2.86, "grad_norm": 0.3733823001384735, "learning_rate": 1.1345434275875756e-07, "loss": 0.0295, "step": 17102 }, { "epoch": 2.86, "grad_norm": 0.5815313458442688, "learning_rate": 1.1318327203072466e-07, "loss": 0.0431, "step": 17103 }, { "epoch": 2.86, "grad_norm": 0.375352680683136, "learning_rate": 1.1291252367488625e-07, "loss": 0.029, "step": 17104 }, { "epoch": 2.86, "grad_norm": 0.38776060938835144, "learning_rate": 1.1264209770006973e-07, "loss": 0.0329, "step": 17105 }, { "epoch": 2.86, "grad_norm": 0.36586329340934753, "learning_rate": 1.1237199411509248e-07, "loss": 0.0263, "step": 17106 }, { "epoch": 2.86, "grad_norm": 0.28229913115501404, "learning_rate": 1.121022129287619e-07, "loss": 0.0255, "step": 17107 }, { "epoch": 2.86, "grad_norm": 0.7051013112068176, "learning_rate": 1.1183275414987537e-07, "loss": 0.0395, "step": 17108 }, { "epoch": 2.86, "grad_norm": 0.43917107582092285, "learning_rate": 1.1156361778721702e-07, "loss": 0.0367, "step": 17109 }, { "epoch": 2.86, "grad_norm": 0.42046117782592773, "learning_rate": 1.1129480384956426e-07, "loss": 0.0341, "step": 17110 }, { "epoch": 2.86, "grad_norm": 0.3845166265964508, "learning_rate": 1.1102631234568117e-07, "loss": 0.025, "step": 17111 }, { "epoch": 2.86, "grad_norm": 0.4544232189655304, "learning_rate": 1.1075814328432299e-07, "loss": 0.0404, "step": 17112 }, { "epoch": 2.86, "grad_norm": 0.3408166766166687, "learning_rate": 1.1049029667423273e-07, "loss": 0.0309, "step": 17113 }, { "epoch": 2.86, "grad_norm": 0.23129531741142273, "learning_rate": 1.1022277252414448e-07, "loss": 0.0228, "step": 17114 }, { "epoch": 2.86, "grad_norm": 0.4448709785938263, "learning_rate": 1.0995557084278019e-07, "loss": 0.0326, "step": 17115 }, { "epoch": 2.86, "grad_norm": 0.35394036769866943, "learning_rate": 1.0968869163885287e-07, "loss": 0.0201, "step": 17116 }, { "epoch": 2.86, "grad_norm": 0.40607818961143494, "learning_rate": 1.0942213492106668e-07, "loss": 0.04, "step": 17117 }, { "epoch": 2.86, "grad_norm": 0.22463196516036987, "learning_rate": 1.0915590069811022e-07, "loss": 0.0186, "step": 17118 }, { "epoch": 2.86, "grad_norm": 0.3233102262020111, "learning_rate": 1.0888998897866432e-07, "loss": 0.0327, "step": 17119 }, { "epoch": 2.86, "grad_norm": 0.4355977177619934, "learning_rate": 1.0862439977140094e-07, "loss": 0.0384, "step": 17120 }, { "epoch": 2.86, "grad_norm": 0.39943182468414307, "learning_rate": 1.083591330849798e-07, "loss": 0.0307, "step": 17121 }, { "epoch": 2.86, "grad_norm": 0.3938564658164978, "learning_rate": 1.0809418892804957e-07, "loss": 0.0247, "step": 17122 }, { "epoch": 2.86, "grad_norm": 0.30079713463783264, "learning_rate": 1.0782956730924887e-07, "loss": 0.0319, "step": 17123 }, { "epoch": 2.86, "grad_norm": 0.3609716296195984, "learning_rate": 1.0756526823720637e-07, "loss": 0.0282, "step": 17124 }, { "epoch": 2.86, "grad_norm": 0.41382336616516113, "learning_rate": 1.0730129172054071e-07, "loss": 0.0384, "step": 17125 }, { "epoch": 2.86, "grad_norm": 0.31491613388061523, "learning_rate": 1.0703763776785836e-07, "loss": 0.0269, "step": 17126 }, { "epoch": 2.86, "grad_norm": 0.44455486536026, "learning_rate": 1.0677430638775466e-07, "loss": 0.0493, "step": 17127 }, { "epoch": 2.86, "grad_norm": 0.3266680836677551, "learning_rate": 1.065112975888194e-07, "loss": 0.0234, "step": 17128 }, { "epoch": 2.86, "grad_norm": 0.44817647337913513, "learning_rate": 1.0624861137962572e-07, "loss": 0.0291, "step": 17129 }, { "epoch": 2.87, "grad_norm": 0.3218258023262024, "learning_rate": 1.059862477687379e-07, "loss": 0.0199, "step": 17130 }, { "epoch": 2.87, "grad_norm": 0.36038845777511597, "learning_rate": 1.0572420676471351e-07, "loss": 0.0299, "step": 17131 }, { "epoch": 2.87, "grad_norm": 0.3392331004142761, "learning_rate": 1.0546248837609574e-07, "loss": 0.0235, "step": 17132 }, { "epoch": 2.87, "grad_norm": 0.5287131667137146, "learning_rate": 1.0520109261141664e-07, "loss": 0.0447, "step": 17133 }, { "epoch": 2.87, "grad_norm": 0.31446823477745056, "learning_rate": 1.0494001947920163e-07, "loss": 0.0239, "step": 17134 }, { "epoch": 2.87, "grad_norm": 0.3920804262161255, "learning_rate": 1.0467926898796165e-07, "loss": 0.0316, "step": 17135 }, { "epoch": 2.87, "grad_norm": 0.2261582314968109, "learning_rate": 1.0441884114619993e-07, "loss": 0.0234, "step": 17136 }, { "epoch": 2.87, "grad_norm": 0.36804211139678955, "learning_rate": 1.0415873596240744e-07, "loss": 0.0271, "step": 17137 }, { "epoch": 2.87, "grad_norm": 0.755182683467865, "learning_rate": 1.0389895344506406e-07, "loss": 0.0398, "step": 17138 }, { "epoch": 2.87, "grad_norm": 0.3288363814353943, "learning_rate": 1.0363949360264303e-07, "loss": 0.0266, "step": 17139 }, { "epoch": 2.87, "grad_norm": 0.4235842525959015, "learning_rate": 1.03380356443602e-07, "loss": 0.0463, "step": 17140 }, { "epoch": 2.87, "grad_norm": 0.48519232869148254, "learning_rate": 1.0312154197639201e-07, "loss": 0.0393, "step": 17141 }, { "epoch": 2.87, "grad_norm": 0.46873509883880615, "learning_rate": 1.0286305020945187e-07, "loss": 0.0286, "step": 17142 }, { "epoch": 2.87, "grad_norm": 0.3556860387325287, "learning_rate": 1.0260488115120925e-07, "loss": 0.0331, "step": 17143 }, { "epoch": 2.87, "grad_norm": 0.3097267746925354, "learning_rate": 1.023470348100819e-07, "loss": 0.0251, "step": 17144 }, { "epoch": 2.87, "grad_norm": 0.38492351770401, "learning_rate": 1.0208951119447863e-07, "loss": 0.0321, "step": 17145 }, { "epoch": 2.87, "grad_norm": 0.3257310390472412, "learning_rate": 1.0183231031279494e-07, "loss": 0.0209, "step": 17146 }, { "epoch": 2.87, "grad_norm": 0.3810945451259613, "learning_rate": 1.0157543217341748e-07, "loss": 0.0325, "step": 17147 }, { "epoch": 2.87, "grad_norm": 0.39851659536361694, "learning_rate": 1.0131887678472286e-07, "loss": 0.0277, "step": 17148 }, { "epoch": 2.87, "grad_norm": 0.32377707958221436, "learning_rate": 1.010626441550755e-07, "loss": 0.0209, "step": 17149 }, { "epoch": 2.87, "grad_norm": 0.5572956800460815, "learning_rate": 1.0080673429283095e-07, "loss": 0.0288, "step": 17150 }, { "epoch": 2.87, "grad_norm": 0.3667268753051758, "learning_rate": 1.0055114720633252e-07, "loss": 0.0266, "step": 17151 }, { "epoch": 2.87, "grad_norm": 0.3300597667694092, "learning_rate": 1.0029588290391467e-07, "loss": 0.0329, "step": 17152 }, { "epoch": 2.87, "grad_norm": 0.6984124779701233, "learning_rate": 1.0004094139390075e-07, "loss": 0.0579, "step": 17153 }, { "epoch": 2.87, "grad_norm": 0.3704919219017029, "learning_rate": 9.978632268460298e-08, "loss": 0.0245, "step": 17154 }, { "epoch": 2.87, "grad_norm": 0.3359363377094269, "learning_rate": 9.95320267843225e-08, "loss": 0.0252, "step": 17155 }, { "epoch": 2.87, "grad_norm": 0.33425435423851013, "learning_rate": 9.927805370135379e-08, "loss": 0.0432, "step": 17156 }, { "epoch": 2.87, "grad_norm": 0.43410366773605347, "learning_rate": 9.90244034439758e-08, "loss": 0.0304, "step": 17157 }, { "epoch": 2.87, "grad_norm": 0.42814165353775024, "learning_rate": 9.877107602045855e-08, "loss": 0.0312, "step": 17158 }, { "epoch": 2.87, "grad_norm": 0.41863346099853516, "learning_rate": 9.851807143906433e-08, "loss": 0.0271, "step": 17159 }, { "epoch": 2.87, "grad_norm": 0.30208590626716614, "learning_rate": 9.826538970804211e-08, "loss": 0.0321, "step": 17160 }, { "epoch": 2.87, "grad_norm": 0.39709752798080444, "learning_rate": 9.801303083562864e-08, "loss": 0.037, "step": 17161 }, { "epoch": 2.87, "grad_norm": 0.30443328619003296, "learning_rate": 9.776099483005619e-08, "loss": 0.0251, "step": 17162 }, { "epoch": 2.87, "grad_norm": 0.44981202483177185, "learning_rate": 9.750928169953822e-08, "loss": 0.0407, "step": 17163 }, { "epoch": 2.87, "grad_norm": 0.36509352922439575, "learning_rate": 9.725789145228592e-08, "loss": 0.0386, "step": 17164 }, { "epoch": 2.87, "grad_norm": 0.6434407830238342, "learning_rate": 9.700682409649497e-08, "loss": 0.0298, "step": 17165 }, { "epoch": 2.87, "grad_norm": 0.40888655185699463, "learning_rate": 9.675607964034994e-08, "loss": 0.0368, "step": 17166 }, { "epoch": 2.87, "grad_norm": 0.41675814986228943, "learning_rate": 9.650565809202983e-08, "loss": 0.0246, "step": 17167 }, { "epoch": 2.87, "grad_norm": 0.3753698170185089, "learning_rate": 9.625555945969922e-08, "loss": 0.0314, "step": 17168 }, { "epoch": 2.87, "grad_norm": 0.36627331376075745, "learning_rate": 9.600578375151049e-08, "loss": 0.0257, "step": 17169 }, { "epoch": 2.87, "grad_norm": 0.6109201908111572, "learning_rate": 9.575633097561044e-08, "loss": 0.0351, "step": 17170 }, { "epoch": 2.87, "grad_norm": 0.4802263081073761, "learning_rate": 9.550720114013035e-08, "loss": 0.0311, "step": 17171 }, { "epoch": 2.87, "grad_norm": 0.4214994013309479, "learning_rate": 9.525839425319594e-08, "loss": 0.0392, "step": 17172 }, { "epoch": 2.87, "grad_norm": 0.36923155188560486, "learning_rate": 9.50099103229185e-08, "loss": 0.0325, "step": 17173 }, { "epoch": 2.87, "grad_norm": 0.4019806385040283, "learning_rate": 9.476174935740157e-08, "loss": 0.0505, "step": 17174 }, { "epoch": 2.87, "grad_norm": 0.2508014738559723, "learning_rate": 9.45139113647342e-08, "loss": 0.0167, "step": 17175 }, { "epoch": 2.87, "grad_norm": 0.39459460973739624, "learning_rate": 9.426639635300106e-08, "loss": 0.0314, "step": 17176 }, { "epoch": 2.87, "grad_norm": 0.5129762291908264, "learning_rate": 9.401920433026901e-08, "loss": 0.0428, "step": 17177 }, { "epoch": 2.87, "grad_norm": 0.4658876657485962, "learning_rate": 9.37723353045994e-08, "loss": 0.0359, "step": 17178 }, { "epoch": 2.87, "grad_norm": 0.3995603919029236, "learning_rate": 9.352578928404243e-08, "loss": 0.0268, "step": 17179 }, { "epoch": 2.87, "grad_norm": 0.4813423752784729, "learning_rate": 9.327956627663725e-08, "loss": 0.0255, "step": 17180 }, { "epoch": 2.87, "grad_norm": 0.8561946153640747, "learning_rate": 9.303366629040966e-08, "loss": 0.019, "step": 17181 }, { "epoch": 2.87, "grad_norm": 0.3932243585586548, "learning_rate": 9.278808933338101e-08, "loss": 0.0304, "step": 17182 }, { "epoch": 2.87, "grad_norm": 0.3771001100540161, "learning_rate": 9.254283541355713e-08, "loss": 0.0353, "step": 17183 }, { "epoch": 2.87, "grad_norm": 0.339644193649292, "learning_rate": 9.229790453893495e-08, "loss": 0.0261, "step": 17184 }, { "epoch": 2.87, "grad_norm": 0.33906474709510803, "learning_rate": 9.20532967175003e-08, "loss": 0.0208, "step": 17185 }, { "epoch": 2.87, "grad_norm": 0.26746127009391785, "learning_rate": 9.180901195722791e-08, "loss": 0.0206, "step": 17186 }, { "epoch": 2.87, "grad_norm": 0.5076931118965149, "learning_rate": 9.156505026608587e-08, "loss": 0.0468, "step": 17187 }, { "epoch": 2.87, "grad_norm": 0.4934651255607605, "learning_rate": 9.132141165202668e-08, "loss": 0.0313, "step": 17188 }, { "epoch": 2.87, "grad_norm": 0.3004818260669708, "learning_rate": 9.107809612299512e-08, "loss": 0.0261, "step": 17189 }, { "epoch": 2.88, "grad_norm": 0.33488479256629944, "learning_rate": 9.083510368692594e-08, "loss": 0.0393, "step": 17190 }, { "epoch": 2.88, "grad_norm": 0.34883931279182434, "learning_rate": 9.059243435173948e-08, "loss": 0.0306, "step": 17191 }, { "epoch": 2.88, "grad_norm": 0.29264765977859497, "learning_rate": 9.035008812535051e-08, "loss": 0.0276, "step": 17192 }, { "epoch": 2.88, "grad_norm": 0.2843247056007385, "learning_rate": 9.01080650156605e-08, "loss": 0.0197, "step": 17193 }, { "epoch": 2.88, "grad_norm": 0.3840123414993286, "learning_rate": 8.98663650305609e-08, "loss": 0.0327, "step": 17194 }, { "epoch": 2.88, "grad_norm": 0.41057172417640686, "learning_rate": 8.962498817793319e-08, "loss": 0.0351, "step": 17195 }, { "epoch": 2.88, "grad_norm": 0.3775862753391266, "learning_rate": 8.938393446564552e-08, "loss": 0.022, "step": 17196 }, { "epoch": 2.88, "grad_norm": 0.3117311894893646, "learning_rate": 8.914320390156051e-08, "loss": 0.0239, "step": 17197 }, { "epoch": 2.88, "grad_norm": 0.5460515022277832, "learning_rate": 8.890279649352629e-08, "loss": 0.0366, "step": 17198 }, { "epoch": 2.88, "grad_norm": 0.41201624274253845, "learning_rate": 8.866271224938105e-08, "loss": 0.0361, "step": 17199 }, { "epoch": 2.88, "grad_norm": 0.25431016087532043, "learning_rate": 8.84229511769541e-08, "loss": 0.0184, "step": 17200 }, { "epoch": 2.88, "grad_norm": 0.38917121291160583, "learning_rate": 8.81835132840636e-08, "loss": 0.0434, "step": 17201 }, { "epoch": 2.88, "grad_norm": 0.3559805452823639, "learning_rate": 8.794439857851556e-08, "loss": 0.0302, "step": 17202 }, { "epoch": 2.88, "grad_norm": 0.5154643058776855, "learning_rate": 8.770560706810593e-08, "loss": 0.0288, "step": 17203 }, { "epoch": 2.88, "grad_norm": 0.2929898798465729, "learning_rate": 8.746713876062184e-08, "loss": 0.0228, "step": 17204 }, { "epoch": 2.88, "grad_norm": 0.5945414900779724, "learning_rate": 8.72289936638393e-08, "loss": 0.0359, "step": 17205 }, { "epoch": 2.88, "grad_norm": 0.2450019270181656, "learning_rate": 8.699117178552319e-08, "loss": 0.016, "step": 17206 }, { "epoch": 2.88, "grad_norm": 0.33957886695861816, "learning_rate": 8.67536731334273e-08, "loss": 0.0201, "step": 17207 }, { "epoch": 2.88, "grad_norm": 0.24936293065547943, "learning_rate": 8.651649771529547e-08, "loss": 0.0224, "step": 17208 }, { "epoch": 2.88, "grad_norm": 0.4443868398666382, "learning_rate": 8.627964553886259e-08, "loss": 0.0333, "step": 17209 }, { "epoch": 2.88, "grad_norm": 0.4709964394569397, "learning_rate": 8.604311661185027e-08, "loss": 0.028, "step": 17210 }, { "epoch": 2.88, "grad_norm": 0.6215729713439941, "learning_rate": 8.580691094197125e-08, "loss": 0.0253, "step": 17211 }, { "epoch": 2.88, "grad_norm": 0.30270135402679443, "learning_rate": 8.557102853692601e-08, "loss": 0.0244, "step": 17212 }, { "epoch": 2.88, "grad_norm": 0.3799082934856415, "learning_rate": 8.53354694044084e-08, "loss": 0.0284, "step": 17213 }, { "epoch": 2.88, "grad_norm": 0.3227071762084961, "learning_rate": 8.510023355209674e-08, "loss": 0.0268, "step": 17214 }, { "epoch": 2.88, "grad_norm": 0.42468851804733276, "learning_rate": 8.486532098766265e-08, "loss": 0.0416, "step": 17215 }, { "epoch": 2.88, "grad_norm": 0.40105560421943665, "learning_rate": 8.463073171876446e-08, "loss": 0.0427, "step": 17216 }, { "epoch": 2.88, "grad_norm": 0.38279587030410767, "learning_rate": 8.439646575305271e-08, "loss": 0.03, "step": 17217 }, { "epoch": 2.88, "grad_norm": 0.3749893605709076, "learning_rate": 8.416252309816463e-08, "loss": 0.0418, "step": 17218 }, { "epoch": 2.88, "grad_norm": 0.3379230201244354, "learning_rate": 8.392890376172858e-08, "loss": 0.0338, "step": 17219 }, { "epoch": 2.88, "grad_norm": 0.3453470766544342, "learning_rate": 8.369560775136287e-08, "loss": 0.0276, "step": 17220 }, { "epoch": 2.88, "grad_norm": 0.4311673939228058, "learning_rate": 8.346263507467367e-08, "loss": 0.031, "step": 17221 }, { "epoch": 2.88, "grad_norm": 0.37429699301719666, "learning_rate": 8.322998573925822e-08, "loss": 0.0276, "step": 17222 }, { "epoch": 2.88, "grad_norm": 0.3717931807041168, "learning_rate": 8.299765975270047e-08, "loss": 0.0326, "step": 17223 }, { "epoch": 2.88, "grad_norm": 0.42771461606025696, "learning_rate": 8.276565712257767e-08, "loss": 0.0258, "step": 17224 }, { "epoch": 2.88, "grad_norm": 0.4808386266231537, "learning_rate": 8.253397785645267e-08, "loss": 0.0299, "step": 17225 }, { "epoch": 2.88, "grad_norm": 0.39608973264694214, "learning_rate": 8.230262196188166e-08, "loss": 0.0329, "step": 17226 }, { "epoch": 2.88, "grad_norm": 0.3951908349990845, "learning_rate": 8.207158944640747e-08, "loss": 0.0385, "step": 17227 }, { "epoch": 2.88, "grad_norm": 0.42275652289390564, "learning_rate": 8.184088031756188e-08, "loss": 0.0289, "step": 17228 }, { "epoch": 2.88, "grad_norm": 0.45907700061798096, "learning_rate": 8.161049458286995e-08, "loss": 0.0352, "step": 17229 }, { "epoch": 2.88, "grad_norm": 0.31548142433166504, "learning_rate": 8.138043224984238e-08, "loss": 0.0226, "step": 17230 }, { "epoch": 2.88, "grad_norm": 0.41842180490493774, "learning_rate": 8.11506933259798e-08, "loss": 0.0407, "step": 17231 }, { "epoch": 2.88, "grad_norm": 0.35632026195526123, "learning_rate": 8.092127781877401e-08, "loss": 0.033, "step": 17232 }, { "epoch": 2.88, "grad_norm": 0.3673422932624817, "learning_rate": 8.069218573570681e-08, "loss": 0.0248, "step": 17233 }, { "epoch": 2.88, "grad_norm": 0.3399295508861542, "learning_rate": 8.046341708424443e-08, "loss": 0.0226, "step": 17234 }, { "epoch": 2.88, "grad_norm": 0.45046716928482056, "learning_rate": 8.023497187184981e-08, "loss": 0.026, "step": 17235 }, { "epoch": 2.88, "grad_norm": 0.33078014850616455, "learning_rate": 8.000685010597031e-08, "loss": 0.0381, "step": 17236 }, { "epoch": 2.88, "grad_norm": 0.36334651708602905, "learning_rate": 7.977905179404332e-08, "loss": 0.03, "step": 17237 }, { "epoch": 2.88, "grad_norm": 0.27565762400627136, "learning_rate": 7.955157694349735e-08, "loss": 0.0264, "step": 17238 }, { "epoch": 2.88, "grad_norm": 0.31449347734451294, "learning_rate": 7.93244255617498e-08, "loss": 0.031, "step": 17239 }, { "epoch": 2.88, "grad_norm": 0.46584853529930115, "learning_rate": 7.909759765620695e-08, "loss": 0.0416, "step": 17240 }, { "epoch": 2.88, "grad_norm": 0.36222246289253235, "learning_rate": 7.887109323426512e-08, "loss": 0.0345, "step": 17241 }, { "epoch": 2.88, "grad_norm": 0.3843427002429962, "learning_rate": 7.86449123033084e-08, "loss": 0.0271, "step": 17242 }, { "epoch": 2.88, "grad_norm": 0.3654768764972687, "learning_rate": 7.841905487071311e-08, "loss": 0.0235, "step": 17243 }, { "epoch": 2.88, "grad_norm": 0.6750813722610474, "learning_rate": 7.819352094384225e-08, "loss": 0.0371, "step": 17244 }, { "epoch": 2.88, "grad_norm": 0.3169657289981842, "learning_rate": 7.796831053005105e-08, "loss": 0.0313, "step": 17245 }, { "epoch": 2.88, "grad_norm": 0.2989948093891144, "learning_rate": 7.774342363668253e-08, "loss": 0.0273, "step": 17246 }, { "epoch": 2.88, "grad_norm": 0.49492937326431274, "learning_rate": 7.75188602710697e-08, "loss": 0.0521, "step": 17247 }, { "epoch": 2.88, "grad_norm": 0.34563159942626953, "learning_rate": 7.729462044053227e-08, "loss": 0.0262, "step": 17248 }, { "epoch": 2.88, "grad_norm": 0.2980349659919739, "learning_rate": 7.707070415238549e-08, "loss": 0.0359, "step": 17249 }, { "epoch": 2.89, "grad_norm": 0.3983864188194275, "learning_rate": 7.684711141392797e-08, "loss": 0.0294, "step": 17250 }, { "epoch": 2.89, "grad_norm": 0.5433828830718994, "learning_rate": 7.662384223245167e-08, "loss": 0.0393, "step": 17251 }, { "epoch": 2.89, "grad_norm": 0.5957940816879272, "learning_rate": 7.64008966152352e-08, "loss": 0.027, "step": 17252 }, { "epoch": 2.89, "grad_norm": 0.2909191846847534, "learning_rate": 7.61782745695483e-08, "loss": 0.0177, "step": 17253 }, { "epoch": 2.89, "grad_norm": 0.27409908175468445, "learning_rate": 7.595597610265182e-08, "loss": 0.0252, "step": 17254 }, { "epoch": 2.89, "grad_norm": 0.3558036983013153, "learning_rate": 7.573400122179109e-08, "loss": 0.03, "step": 17255 }, { "epoch": 2.89, "grad_norm": 0.49240556359291077, "learning_rate": 7.551234993420586e-08, "loss": 0.0393, "step": 17256 }, { "epoch": 2.89, "grad_norm": 0.37233269214630127, "learning_rate": 7.529102224712259e-08, "loss": 0.0287, "step": 17257 }, { "epoch": 2.89, "grad_norm": 0.31899869441986084, "learning_rate": 7.507001816775883e-08, "loss": 0.0338, "step": 17258 }, { "epoch": 2.89, "grad_norm": 0.5780381560325623, "learning_rate": 7.484933770331881e-08, "loss": 0.0458, "step": 17259 }, { "epoch": 2.89, "grad_norm": 0.42869308590888977, "learning_rate": 7.462898086099902e-08, "loss": 0.0385, "step": 17260 }, { "epoch": 2.89, "grad_norm": 0.29978302121162415, "learning_rate": 7.440894764798589e-08, "loss": 0.029, "step": 17261 }, { "epoch": 2.89, "grad_norm": 0.4976155161857605, "learning_rate": 7.418923807145262e-08, "loss": 0.0379, "step": 17262 }, { "epoch": 2.89, "grad_norm": 0.38277098536491394, "learning_rate": 7.396985213856233e-08, "loss": 0.0326, "step": 17263 }, { "epoch": 2.89, "grad_norm": 0.3678005337715149, "learning_rate": 7.375078985646933e-08, "loss": 0.0302, "step": 17264 }, { "epoch": 2.89, "grad_norm": 0.5553054809570312, "learning_rate": 7.353205123231788e-08, "loss": 0.0486, "step": 17265 }, { "epoch": 2.89, "grad_norm": 0.40188783407211304, "learning_rate": 7.331363627323784e-08, "loss": 0.0266, "step": 17266 }, { "epoch": 2.89, "grad_norm": 0.2885868549346924, "learning_rate": 7.30955449863513e-08, "loss": 0.0225, "step": 17267 }, { "epoch": 2.89, "grad_norm": 0.40087223052978516, "learning_rate": 7.287777737876922e-08, "loss": 0.0406, "step": 17268 }, { "epoch": 2.89, "grad_norm": 0.39896565675735474, "learning_rate": 7.266033345759483e-08, "loss": 0.0435, "step": 17269 }, { "epoch": 2.89, "grad_norm": 0.39040273427963257, "learning_rate": 7.244321322991465e-08, "loss": 0.0371, "step": 17270 }, { "epoch": 2.89, "grad_norm": 0.4441247582435608, "learning_rate": 7.222641670280972e-08, "loss": 0.0364, "step": 17271 }, { "epoch": 2.89, "grad_norm": 0.31669172644615173, "learning_rate": 7.200994388334992e-08, "loss": 0.0207, "step": 17272 }, { "epoch": 2.89, "grad_norm": 0.36353328824043274, "learning_rate": 7.179379477859183e-08, "loss": 0.0327, "step": 17273 }, { "epoch": 2.89, "grad_norm": 0.5398257970809937, "learning_rate": 7.157796939558536e-08, "loss": 0.0312, "step": 17274 }, { "epoch": 2.89, "grad_norm": 0.5009375810623169, "learning_rate": 7.13624677413649e-08, "loss": 0.0269, "step": 17275 }, { "epoch": 2.89, "grad_norm": 0.3401746153831482, "learning_rate": 7.114728982295926e-08, "loss": 0.025, "step": 17276 }, { "epoch": 2.89, "grad_norm": 0.5452255606651306, "learning_rate": 7.093243564738395e-08, "loss": 0.0425, "step": 17277 }, { "epoch": 2.89, "grad_norm": 0.42824774980545044, "learning_rate": 7.071790522164446e-08, "loss": 0.027, "step": 17278 }, { "epoch": 2.89, "grad_norm": 0.37962645292282104, "learning_rate": 7.050369855273631e-08, "loss": 0.0283, "step": 17279 }, { "epoch": 2.89, "grad_norm": 0.3760376274585724, "learning_rate": 7.028981564764504e-08, "loss": 0.0231, "step": 17280 }, { "epoch": 2.89, "grad_norm": 0.286949098110199, "learning_rate": 7.007625651334171e-08, "loss": 0.0274, "step": 17281 }, { "epoch": 2.89, "grad_norm": 0.3814769387245178, "learning_rate": 6.986302115679189e-08, "loss": 0.0273, "step": 17282 }, { "epoch": 2.89, "grad_norm": 0.3078770339488983, "learning_rate": 6.965010958494666e-08, "loss": 0.0293, "step": 17283 }, { "epoch": 2.89, "grad_norm": 0.3766159415245056, "learning_rate": 6.943752180475049e-08, "loss": 0.0233, "step": 17284 }, { "epoch": 2.89, "grad_norm": 0.46414387226104736, "learning_rate": 6.922525782313339e-08, "loss": 0.0394, "step": 17285 }, { "epoch": 2.89, "grad_norm": 0.3999457061290741, "learning_rate": 6.901331764701646e-08, "loss": 0.0308, "step": 17286 }, { "epoch": 2.89, "grad_norm": 0.4064377546310425, "learning_rate": 6.880170128331198e-08, "loss": 0.0269, "step": 17287 }, { "epoch": 2.89, "grad_norm": 0.289029985666275, "learning_rate": 6.859040873891887e-08, "loss": 0.0206, "step": 17288 }, { "epoch": 2.89, "grad_norm": 0.3603372573852539, "learning_rate": 6.837944002072605e-08, "loss": 0.0252, "step": 17289 }, { "epoch": 2.89, "grad_norm": 0.3015379011631012, "learning_rate": 6.816879513561248e-08, "loss": 0.0217, "step": 17290 }, { "epoch": 2.89, "grad_norm": 0.4715745151042938, "learning_rate": 6.79584740904471e-08, "loss": 0.0294, "step": 17291 }, { "epoch": 2.89, "grad_norm": 0.44063758850097656, "learning_rate": 6.774847689208774e-08, "loss": 0.024, "step": 17292 }, { "epoch": 2.89, "grad_norm": 0.4746086001396179, "learning_rate": 6.753880354738118e-08, "loss": 0.0355, "step": 17293 }, { "epoch": 2.89, "grad_norm": 0.30463504791259766, "learning_rate": 6.732945406316527e-08, "loss": 0.0283, "step": 17294 }, { "epoch": 2.89, "grad_norm": 0.43596354126930237, "learning_rate": 6.712042844626453e-08, "loss": 0.0229, "step": 17295 }, { "epoch": 2.89, "grad_norm": 0.3173881769180298, "learning_rate": 6.691172670349577e-08, "loss": 0.0239, "step": 17296 }, { "epoch": 2.89, "grad_norm": 0.43945956230163574, "learning_rate": 6.67033488416624e-08, "loss": 0.0374, "step": 17297 }, { "epoch": 2.89, "grad_norm": 0.3533878028392792, "learning_rate": 6.649529486756012e-08, "loss": 0.0221, "step": 17298 }, { "epoch": 2.89, "grad_norm": 0.3789336681365967, "learning_rate": 6.628756478797349e-08, "loss": 0.0394, "step": 17299 }, { "epoch": 2.89, "grad_norm": 0.3562850058078766, "learning_rate": 6.608015860967376e-08, "loss": 0.0358, "step": 17300 }, { "epoch": 2.89, "grad_norm": 0.3847823441028595, "learning_rate": 6.587307633942441e-08, "loss": 0.03, "step": 17301 }, { "epoch": 2.89, "grad_norm": 0.3469952940940857, "learning_rate": 6.566631798397782e-08, "loss": 0.022, "step": 17302 }, { "epoch": 2.89, "grad_norm": 0.2760802209377289, "learning_rate": 6.545988355007748e-08, "loss": 0.0244, "step": 17303 }, { "epoch": 2.89, "grad_norm": 0.4585244059562683, "learning_rate": 6.525377304445024e-08, "loss": 0.0417, "step": 17304 }, { "epoch": 2.89, "grad_norm": 0.4405882954597473, "learning_rate": 6.504798647382072e-08, "loss": 0.0278, "step": 17305 }, { "epoch": 2.89, "grad_norm": 0.4689006209373474, "learning_rate": 6.484252384489686e-08, "loss": 0.0429, "step": 17306 }, { "epoch": 2.89, "grad_norm": 0.4498845934867859, "learning_rate": 6.463738516437779e-08, "loss": 0.0303, "step": 17307 }, { "epoch": 2.89, "grad_norm": 0.4664153754711151, "learning_rate": 6.443257043895368e-08, "loss": 0.0351, "step": 17308 }, { "epoch": 2.89, "grad_norm": 0.36631864309310913, "learning_rate": 6.422807967530143e-08, "loss": 0.0318, "step": 17309 }, { "epoch": 2.9, "grad_norm": 0.3742985725402832, "learning_rate": 6.402391288008902e-08, "loss": 0.0325, "step": 17310 }, { "epoch": 2.9, "grad_norm": 0.39056679606437683, "learning_rate": 6.382007005997449e-08, "loss": 0.0233, "step": 17311 }, { "epoch": 2.9, "grad_norm": 0.47590726613998413, "learning_rate": 6.361655122160249e-08, "loss": 0.0347, "step": 17312 }, { "epoch": 2.9, "grad_norm": 0.49008065462112427, "learning_rate": 6.341335637161217e-08, "loss": 0.0322, "step": 17313 }, { "epoch": 2.9, "grad_norm": 0.3033757507801056, "learning_rate": 6.321048551662601e-08, "loss": 0.0259, "step": 17314 }, { "epoch": 2.9, "grad_norm": 0.33967840671539307, "learning_rate": 6.300793866325871e-08, "loss": 0.0161, "step": 17315 }, { "epoch": 2.9, "grad_norm": 0.49403631687164307, "learning_rate": 6.280571581811723e-08, "loss": 0.0422, "step": 17316 }, { "epoch": 2.9, "grad_norm": 0.48411089181900024, "learning_rate": 6.260381698779294e-08, "loss": 0.0393, "step": 17317 }, { "epoch": 2.9, "grad_norm": 0.3972982168197632, "learning_rate": 6.240224217887059e-08, "loss": 0.0321, "step": 17318 }, { "epoch": 2.9, "grad_norm": 0.6861584186553955, "learning_rate": 6.220099139792157e-08, "loss": 0.0444, "step": 17319 }, { "epoch": 2.9, "grad_norm": 0.4112258851528168, "learning_rate": 6.200006465150843e-08, "loss": 0.0233, "step": 17320 }, { "epoch": 2.9, "grad_norm": 0.5141220688819885, "learning_rate": 6.179946194618258e-08, "loss": 0.0507, "step": 17321 }, { "epoch": 2.9, "grad_norm": 0.3577245771884918, "learning_rate": 6.159918328848547e-08, "loss": 0.0275, "step": 17322 }, { "epoch": 2.9, "grad_norm": 0.40863174200057983, "learning_rate": 6.13992286849463e-08, "loss": 0.0346, "step": 17323 }, { "epoch": 2.9, "grad_norm": 0.28872308135032654, "learning_rate": 6.119959814208543e-08, "loss": 0.0231, "step": 17324 }, { "epoch": 2.9, "grad_norm": 0.34557604789733887, "learning_rate": 6.100029166641208e-08, "loss": 0.0217, "step": 17325 }, { "epoch": 2.9, "grad_norm": 0.391366183757782, "learning_rate": 6.080130926442551e-08, "loss": 0.0209, "step": 17326 }, { "epoch": 2.9, "grad_norm": 0.42121559381484985, "learning_rate": 6.060265094261274e-08, "loss": 0.0279, "step": 17327 }, { "epoch": 2.9, "grad_norm": 0.2940305769443512, "learning_rate": 6.040431670745195e-08, "loss": 0.0207, "step": 17328 }, { "epoch": 2.9, "grad_norm": 0.44658905267715454, "learning_rate": 6.020630656541015e-08, "loss": 0.0409, "step": 17329 }, { "epoch": 2.9, "grad_norm": 0.46050846576690674, "learning_rate": 6.000862052294332e-08, "loss": 0.0286, "step": 17330 }, { "epoch": 2.9, "grad_norm": 0.3710532784461975, "learning_rate": 5.981125858649628e-08, "loss": 0.0195, "step": 17331 }, { "epoch": 2.9, "grad_norm": 0.4655921757221222, "learning_rate": 5.961422076250611e-08, "loss": 0.0349, "step": 17332 }, { "epoch": 2.9, "grad_norm": 0.30020779371261597, "learning_rate": 5.941750705739768e-08, "loss": 0.0259, "step": 17333 }, { "epoch": 2.9, "grad_norm": 0.4243110418319702, "learning_rate": 5.9221117477582526e-08, "loss": 0.0268, "step": 17334 }, { "epoch": 2.9, "grad_norm": 0.37669116258621216, "learning_rate": 5.9025052029466626e-08, "loss": 0.0159, "step": 17335 }, { "epoch": 2.9, "grad_norm": 0.355232834815979, "learning_rate": 5.8829310719442644e-08, "loss": 0.0238, "step": 17336 }, { "epoch": 2.9, "grad_norm": 0.33495181798934937, "learning_rate": 5.863389355389104e-08, "loss": 0.0378, "step": 17337 }, { "epoch": 2.9, "grad_norm": 0.41082316637039185, "learning_rate": 5.843880053918671e-08, "loss": 0.0325, "step": 17338 }, { "epoch": 2.9, "grad_norm": 0.2707940638065338, "learning_rate": 5.824403168168791e-08, "loss": 0.0224, "step": 17339 }, { "epoch": 2.9, "grad_norm": 0.3005116283893585, "learning_rate": 5.804958698774732e-08, "loss": 0.0253, "step": 17340 }, { "epoch": 2.9, "grad_norm": 0.42149868607521057, "learning_rate": 5.785546646370321e-08, "loss": 0.0356, "step": 17341 }, { "epoch": 2.9, "grad_norm": 0.32841745018959045, "learning_rate": 5.76616701158883e-08, "loss": 0.0207, "step": 17342 }, { "epoch": 2.9, "grad_norm": 0.31911006569862366, "learning_rate": 5.7468197950617535e-08, "loss": 0.0294, "step": 17343 }, { "epoch": 2.9, "grad_norm": 0.42964091897010803, "learning_rate": 5.7275049974202524e-08, "loss": 0.037, "step": 17344 }, { "epoch": 2.9, "grad_norm": 0.40472400188446045, "learning_rate": 5.708222619294046e-08, "loss": 0.0263, "step": 17345 }, { "epoch": 2.9, "grad_norm": 0.4623478353023529, "learning_rate": 5.6889726613116316e-08, "loss": 0.0447, "step": 17346 }, { "epoch": 2.9, "grad_norm": 0.5237316489219666, "learning_rate": 5.669755124101062e-08, "loss": 0.0337, "step": 17347 }, { "epoch": 2.9, "grad_norm": 0.28940343856811523, "learning_rate": 5.650570008288614e-08, "loss": 0.024, "step": 17348 }, { "epoch": 2.9, "grad_norm": 0.37908411026000977, "learning_rate": 5.631417314499899e-08, "loss": 0.0371, "step": 17349 }, { "epoch": 2.9, "grad_norm": 0.34011420607566833, "learning_rate": 5.612297043359638e-08, "loss": 0.0282, "step": 17350 }, { "epoch": 2.9, "grad_norm": 0.41515347361564636, "learning_rate": 5.5932091954910005e-08, "loss": 0.0365, "step": 17351 }, { "epoch": 2.9, "grad_norm": 0.392643541097641, "learning_rate": 5.5741537715165996e-08, "loss": 0.0304, "step": 17352 }, { "epoch": 2.9, "grad_norm": 0.3706985116004944, "learning_rate": 5.5551307720576044e-08, "loss": 0.025, "step": 17353 }, { "epoch": 2.9, "grad_norm": 0.4910586476325989, "learning_rate": 5.5361401977342965e-08, "loss": 0.0338, "step": 17354 }, { "epoch": 2.9, "grad_norm": 0.5295253396034241, "learning_rate": 5.517182049165959e-08, "loss": 0.0338, "step": 17355 }, { "epoch": 2.9, "grad_norm": 0.5775132775306702, "learning_rate": 5.4982563269706527e-08, "loss": 0.0365, "step": 17356 }, { "epoch": 2.9, "grad_norm": 0.3028523027896881, "learning_rate": 5.47936303176555e-08, "loss": 0.0269, "step": 17357 }, { "epoch": 2.9, "grad_norm": 0.48639002442359924, "learning_rate": 5.4605021641667144e-08, "loss": 0.0416, "step": 17358 }, { "epoch": 2.9, "grad_norm": 0.33993619680404663, "learning_rate": 5.4416737247890984e-08, "loss": 0.0224, "step": 17359 }, { "epoch": 2.9, "grad_norm": 0.3231564462184906, "learning_rate": 5.4228777142465436e-08, "loss": 0.0189, "step": 17360 }, { "epoch": 2.9, "grad_norm": 0.34719118475914, "learning_rate": 5.404114133152116e-08, "loss": 0.0401, "step": 17361 }, { "epoch": 2.9, "grad_norm": 0.35630810260772705, "learning_rate": 5.3853829821173265e-08, "loss": 0.0328, "step": 17362 }, { "epoch": 2.9, "grad_norm": 0.5523197650909424, "learning_rate": 5.366684261753241e-08, "loss": 0.0444, "step": 17363 }, { "epoch": 2.9, "grad_norm": 0.448790043592453, "learning_rate": 5.3480179726693726e-08, "loss": 0.0315, "step": 17364 }, { "epoch": 2.9, "grad_norm": 0.3746313452720642, "learning_rate": 5.3293841154743456e-08, "loss": 0.027, "step": 17365 }, { "epoch": 2.9, "grad_norm": 0.4001895487308502, "learning_rate": 5.3107826907758955e-08, "loss": 0.0267, "step": 17366 }, { "epoch": 2.9, "grad_norm": 0.32249927520751953, "learning_rate": 5.2922136991804264e-08, "loss": 0.0288, "step": 17367 }, { "epoch": 2.9, "grad_norm": 0.43301481008529663, "learning_rate": 5.2736771412933426e-08, "loss": 0.0264, "step": 17368 }, { "epoch": 2.91, "grad_norm": 0.4847514033317566, "learning_rate": 5.2551730177191616e-08, "loss": 0.0408, "step": 17369 }, { "epoch": 2.91, "grad_norm": 0.4143630564212799, "learning_rate": 5.236701329061289e-08, "loss": 0.0421, "step": 17370 }, { "epoch": 2.91, "grad_norm": 0.31445854902267456, "learning_rate": 5.218262075921798e-08, "loss": 0.0229, "step": 17371 }, { "epoch": 2.91, "grad_norm": 0.3749203383922577, "learning_rate": 5.199855258902098e-08, "loss": 0.0287, "step": 17372 }, { "epoch": 2.91, "grad_norm": 0.3481782078742981, "learning_rate": 5.181480878602374e-08, "loss": 0.0211, "step": 17373 }, { "epoch": 2.91, "grad_norm": 0.2679902911186218, "learning_rate": 5.1631389356217035e-08, "loss": 0.0182, "step": 17374 }, { "epoch": 2.91, "grad_norm": 0.4191719591617584, "learning_rate": 5.144829430558163e-08, "loss": 0.0398, "step": 17375 }, { "epoch": 2.91, "grad_norm": 0.4553335905075073, "learning_rate": 5.126552364008719e-08, "loss": 0.0369, "step": 17376 }, { "epoch": 2.91, "grad_norm": 0.28754162788391113, "learning_rate": 5.10830773656934e-08, "loss": 0.0241, "step": 17377 }, { "epoch": 2.91, "grad_norm": 0.4778279662132263, "learning_rate": 5.0900955488348835e-08, "loss": 0.0312, "step": 17378 }, { "epoch": 2.91, "grad_norm": 0.31040462851524353, "learning_rate": 5.0719158013992074e-08, "loss": 0.0373, "step": 17379 }, { "epoch": 2.91, "grad_norm": 0.3593173623085022, "learning_rate": 5.0537684948550605e-08, "loss": 0.0359, "step": 17380 }, { "epoch": 2.91, "grad_norm": 0.3239436149597168, "learning_rate": 5.0356536297943015e-08, "loss": 0.0278, "step": 17381 }, { "epoch": 2.91, "grad_norm": 0.35044166445732117, "learning_rate": 5.0175712068073477e-08, "loss": 0.0356, "step": 17382 }, { "epoch": 2.91, "grad_norm": 0.5173159241676331, "learning_rate": 4.999521226484061e-08, "loss": 0.0264, "step": 17383 }, { "epoch": 2.91, "grad_norm": 0.39460432529449463, "learning_rate": 4.981503689412748e-08, "loss": 0.0255, "step": 17384 }, { "epoch": 2.91, "grad_norm": 0.3557121753692627, "learning_rate": 4.9635185961810494e-08, "loss": 0.0287, "step": 17385 }, { "epoch": 2.91, "grad_norm": 0.45614033937454224, "learning_rate": 4.945565947375275e-08, "loss": 0.0303, "step": 17386 }, { "epoch": 2.91, "grad_norm": 0.4979287087917328, "learning_rate": 4.9276457435808444e-08, "loss": 0.0415, "step": 17387 }, { "epoch": 2.91, "grad_norm": 0.2722078859806061, "learning_rate": 4.90975798538218e-08, "loss": 0.0197, "step": 17388 }, { "epoch": 2.91, "grad_norm": 0.3600645363330841, "learning_rate": 4.89190267336237e-08, "loss": 0.0264, "step": 17389 }, { "epoch": 2.91, "grad_norm": 0.44502055644989014, "learning_rate": 4.874079808103615e-08, "loss": 0.0289, "step": 17390 }, { "epoch": 2.91, "grad_norm": 0.4718126952648163, "learning_rate": 4.85628939018723e-08, "loss": 0.0309, "step": 17391 }, { "epoch": 2.91, "grad_norm": 0.4328373074531555, "learning_rate": 4.8385314201931934e-08, "loss": 0.0179, "step": 17392 }, { "epoch": 2.91, "grad_norm": 0.5070306062698364, "learning_rate": 4.820805898700376e-08, "loss": 0.034, "step": 17393 }, { "epoch": 2.91, "grad_norm": 0.42385730147361755, "learning_rate": 4.803112826286982e-08, "loss": 0.0342, "step": 17394 }, { "epoch": 2.91, "grad_norm": 0.3755541741847992, "learning_rate": 4.785452203529772e-08, "loss": 0.0404, "step": 17395 }, { "epoch": 2.91, "grad_norm": 0.44853487610816956, "learning_rate": 4.7678240310046195e-08, "loss": 0.0312, "step": 17396 }, { "epoch": 2.91, "grad_norm": 0.4748450517654419, "learning_rate": 4.750228309286287e-08, "loss": 0.0377, "step": 17397 }, { "epoch": 2.91, "grad_norm": 0.36621102690696716, "learning_rate": 4.7326650389486473e-08, "loss": 0.0401, "step": 17398 }, { "epoch": 2.91, "grad_norm": 0.4300054609775543, "learning_rate": 4.715134220564133e-08, "loss": 0.039, "step": 17399 }, { "epoch": 2.91, "grad_norm": 0.7959626317024231, "learning_rate": 4.697635854704619e-08, "loss": 0.0387, "step": 17400 }, { "epoch": 2.91, "grad_norm": 0.42032697796821594, "learning_rate": 4.680169941940538e-08, "loss": 0.0348, "step": 17401 }, { "epoch": 2.91, "grad_norm": 0.3272258937358856, "learning_rate": 4.662736482841213e-08, "loss": 0.02, "step": 17402 }, { "epoch": 2.91, "grad_norm": 0.24922670423984528, "learning_rate": 4.64533547797541e-08, "loss": 0.0154, "step": 17403 }, { "epoch": 2.91, "grad_norm": 0.2917919158935547, "learning_rate": 4.6279669279103436e-08, "loss": 0.0203, "step": 17404 }, { "epoch": 2.91, "grad_norm": 0.4373817443847656, "learning_rate": 4.610630833212226e-08, "loss": 0.0365, "step": 17405 }, { "epoch": 2.91, "grad_norm": 0.37429454922676086, "learning_rate": 4.5933271944466064e-08, "loss": 0.0274, "step": 17406 }, { "epoch": 2.91, "grad_norm": 0.4159024655818939, "learning_rate": 4.5760560121774764e-08, "loss": 0.0287, "step": 17407 }, { "epoch": 2.91, "grad_norm": 0.35498347878456116, "learning_rate": 4.5588172869680537e-08, "loss": 0.0231, "step": 17408 }, { "epoch": 2.91, "grad_norm": 0.47179391980171204, "learning_rate": 4.541611019380443e-08, "loss": 0.0582, "step": 17409 }, { "epoch": 2.91, "grad_norm": 0.32148492336273193, "learning_rate": 4.524437209975529e-08, "loss": 0.026, "step": 17410 }, { "epoch": 2.91, "grad_norm": 0.44970381259918213, "learning_rate": 4.507295859313532e-08, "loss": 0.0451, "step": 17411 }, { "epoch": 2.91, "grad_norm": 0.2818271517753601, "learning_rate": 4.490186967953225e-08, "loss": 0.028, "step": 17412 }, { "epoch": 2.91, "grad_norm": 0.4361308217048645, "learning_rate": 4.4731105364523854e-08, "loss": 0.0489, "step": 17413 }, { "epoch": 2.91, "grad_norm": 0.4142003357410431, "learning_rate": 4.4560665653680115e-08, "loss": 0.0436, "step": 17414 }, { "epoch": 2.91, "grad_norm": 0.2697519361972809, "learning_rate": 4.43905505525577e-08, "loss": 0.0276, "step": 17415 }, { "epoch": 2.91, "grad_norm": 0.33306193351745605, "learning_rate": 4.4220760066702176e-08, "loss": 0.0314, "step": 17416 }, { "epoch": 2.91, "grad_norm": 0.4152349531650543, "learning_rate": 4.405129420165133e-08, "loss": 0.0234, "step": 17417 }, { "epoch": 2.91, "grad_norm": 0.33131206035614014, "learning_rate": 4.388215296292964e-08, "loss": 0.0228, "step": 17418 }, { "epoch": 2.91, "grad_norm": 0.30936935544013977, "learning_rate": 4.3713336356052685e-08, "loss": 0.021, "step": 17419 }, { "epoch": 2.91, "grad_norm": 0.46731036901474, "learning_rate": 4.354484438652606e-08, "loss": 0.0472, "step": 17420 }, { "epoch": 2.91, "grad_norm": 0.29880842566490173, "learning_rate": 4.3376677059840944e-08, "loss": 0.0226, "step": 17421 }, { "epoch": 2.91, "grad_norm": 0.3306666910648346, "learning_rate": 4.3208834381484045e-08, "loss": 0.0303, "step": 17422 }, { "epoch": 2.91, "grad_norm": 0.5611100792884827, "learning_rate": 4.304131635692543e-08, "loss": 0.0287, "step": 17423 }, { "epoch": 2.91, "grad_norm": 0.41792839765548706, "learning_rate": 4.287412299162852e-08, "loss": 0.0357, "step": 17424 }, { "epoch": 2.91, "grad_norm": 0.4750139117240906, "learning_rate": 4.27072542910445e-08, "loss": 0.0384, "step": 17425 }, { "epoch": 2.91, "grad_norm": 0.37135937809944153, "learning_rate": 4.254071026061457e-08, "loss": 0.0187, "step": 17426 }, { "epoch": 2.91, "grad_norm": 0.3668908476829529, "learning_rate": 4.237449090576773e-08, "loss": 0.0275, "step": 17427 }, { "epoch": 2.91, "grad_norm": 0.6364882588386536, "learning_rate": 4.220859623192519e-08, "loss": 0.036, "step": 17428 }, { "epoch": 2.92, "grad_norm": 0.4713037610054016, "learning_rate": 4.2043026244497076e-08, "loss": 0.0257, "step": 17429 }, { "epoch": 2.92, "grad_norm": 0.2590036690235138, "learning_rate": 4.187778094888018e-08, "loss": 0.0183, "step": 17430 }, { "epoch": 2.92, "grad_norm": 0.37495288252830505, "learning_rate": 4.1712860350463514e-08, "loss": 0.0316, "step": 17431 }, { "epoch": 2.92, "grad_norm": 0.3504815697669983, "learning_rate": 4.1548264454622787e-08, "loss": 0.0192, "step": 17432 }, { "epoch": 2.92, "grad_norm": 0.5831294059753418, "learning_rate": 4.138399326672815e-08, "loss": 0.0458, "step": 17433 }, { "epoch": 2.92, "grad_norm": 0.3599129617214203, "learning_rate": 4.122004679213309e-08, "loss": 0.0289, "step": 17434 }, { "epoch": 2.92, "grad_norm": 0.3968912959098816, "learning_rate": 4.105642503618334e-08, "loss": 0.0411, "step": 17435 }, { "epoch": 2.92, "grad_norm": 0.469870388507843, "learning_rate": 4.0893128004215745e-08, "loss": 0.0377, "step": 17436 }, { "epoch": 2.92, "grad_norm": 0.6568314433097839, "learning_rate": 4.073015570155381e-08, "loss": 0.0226, "step": 17437 }, { "epoch": 2.92, "grad_norm": 0.37632983922958374, "learning_rate": 4.056750813351218e-08, "loss": 0.0282, "step": 17438 }, { "epoch": 2.92, "grad_norm": 0.5233376026153564, "learning_rate": 4.040518530539328e-08, "loss": 0.0395, "step": 17439 }, { "epoch": 2.92, "grad_norm": 0.458693265914917, "learning_rate": 4.0243187222489544e-08, "loss": 0.0348, "step": 17440 }, { "epoch": 2.92, "grad_norm": 0.3670336902141571, "learning_rate": 4.008151389008341e-08, "loss": 0.0326, "step": 17441 }, { "epoch": 2.92, "grad_norm": 0.2757193446159363, "learning_rate": 3.9920165313447334e-08, "loss": 0.0274, "step": 17442 }, { "epoch": 2.92, "grad_norm": 0.286695271730423, "learning_rate": 3.9759141497841544e-08, "loss": 0.0274, "step": 17443 }, { "epoch": 2.92, "grad_norm": 0.41403433680534363, "learning_rate": 3.95984424485174e-08, "loss": 0.0277, "step": 17444 }, { "epoch": 2.92, "grad_norm": 0.4177120327949524, "learning_rate": 3.943806817071294e-08, "loss": 0.0285, "step": 17445 }, { "epoch": 2.92, "grad_norm": 0.45439136028289795, "learning_rate": 3.92780186696573e-08, "loss": 0.042, "step": 17446 }, { "epoch": 2.92, "grad_norm": 0.21624553203582764, "learning_rate": 3.911829395057187e-08, "loss": 0.0141, "step": 17447 }, { "epoch": 2.92, "grad_norm": 0.33432531356811523, "learning_rate": 3.895889401866248e-08, "loss": 0.0199, "step": 17448 }, { "epoch": 2.92, "grad_norm": 0.28447070717811584, "learning_rate": 3.87998188791272e-08, "loss": 0.0321, "step": 17449 }, { "epoch": 2.92, "grad_norm": 0.3627491295337677, "learning_rate": 3.864106853715188e-08, "loss": 0.0426, "step": 17450 }, { "epoch": 2.92, "grad_norm": 0.331282377243042, "learning_rate": 3.84826429979146e-08, "loss": 0.0225, "step": 17451 }, { "epoch": 2.92, "grad_norm": 0.4227629005908966, "learning_rate": 3.8324542266579004e-08, "loss": 0.0303, "step": 17452 }, { "epoch": 2.92, "grad_norm": 0.3937232196331024, "learning_rate": 3.8166766348300965e-08, "loss": 0.0375, "step": 17453 }, { "epoch": 2.92, "grad_norm": 0.33082515001296997, "learning_rate": 3.800931524822638e-08, "loss": 0.0307, "step": 17454 }, { "epoch": 2.92, "grad_norm": 0.424679160118103, "learning_rate": 3.785218897148668e-08, "loss": 0.0346, "step": 17455 }, { "epoch": 2.92, "grad_norm": 0.3794316053390503, "learning_rate": 3.7695387523206675e-08, "loss": 0.0301, "step": 17456 }, { "epoch": 2.92, "grad_norm": 0.38950347900390625, "learning_rate": 3.7538910908498926e-08, "loss": 0.0261, "step": 17457 }, { "epoch": 2.92, "grad_norm": 0.5257704257965088, "learning_rate": 3.738275913246603e-08, "loss": 0.0385, "step": 17458 }, { "epoch": 2.92, "grad_norm": 0.3160051703453064, "learning_rate": 3.722693220019835e-08, "loss": 0.017, "step": 17459 }, { "epoch": 2.92, "grad_norm": 0.39147669076919556, "learning_rate": 3.707143011677739e-08, "loss": 0.0273, "step": 17460 }, { "epoch": 2.92, "grad_norm": 0.36975380778312683, "learning_rate": 3.6916252887273516e-08, "loss": 0.0279, "step": 17461 }, { "epoch": 2.92, "grad_norm": 0.2750317454338074, "learning_rate": 3.676140051674604e-08, "loss": 0.0206, "step": 17462 }, { "epoch": 2.92, "grad_norm": 0.4562405049800873, "learning_rate": 3.660687301024535e-08, "loss": 0.0334, "step": 17463 }, { "epoch": 2.92, "grad_norm": 0.36479389667510986, "learning_rate": 3.645267037280853e-08, "loss": 0.0264, "step": 17464 }, { "epoch": 2.92, "grad_norm": 0.406482994556427, "learning_rate": 3.6298792609464895e-08, "loss": 0.0291, "step": 17465 }, { "epoch": 2.92, "grad_norm": 0.2740669548511505, "learning_rate": 3.614523972523043e-08, "loss": 0.0251, "step": 17466 }, { "epoch": 2.92, "grad_norm": 0.26686322689056396, "learning_rate": 3.599201172511335e-08, "loss": 0.0203, "step": 17467 }, { "epoch": 2.92, "grad_norm": 0.6133571267127991, "learning_rate": 3.583910861410855e-08, "loss": 0.0273, "step": 17468 }, { "epoch": 2.92, "grad_norm": 0.5539731979370117, "learning_rate": 3.568653039720205e-08, "loss": 0.0368, "step": 17469 }, { "epoch": 2.92, "grad_norm": 0.3934670686721802, "learning_rate": 3.553427707936985e-08, "loss": 0.0271, "step": 17470 }, { "epoch": 2.92, "grad_norm": 0.46905288100242615, "learning_rate": 3.5382348665574664e-08, "loss": 0.0568, "step": 17471 }, { "epoch": 2.92, "grad_norm": 0.42725297808647156, "learning_rate": 3.523074516077141e-08, "loss": 0.0298, "step": 17472 }, { "epoch": 2.92, "grad_norm": 0.47406426072120667, "learning_rate": 3.5079466569903906e-08, "loss": 0.0377, "step": 17473 }, { "epoch": 2.92, "grad_norm": 0.3657363951206207, "learning_rate": 3.492851289790267e-08, "loss": 0.0242, "step": 17474 }, { "epoch": 2.92, "grad_norm": 0.433257132768631, "learning_rate": 3.477788414969152e-08, "loss": 0.0334, "step": 17475 }, { "epoch": 2.92, "grad_norm": 0.4306716024875641, "learning_rate": 3.462758033018099e-08, "loss": 0.0307, "step": 17476 }, { "epoch": 2.92, "grad_norm": 0.5324764847755432, "learning_rate": 3.44776014442727e-08, "loss": 0.0372, "step": 17477 }, { "epoch": 2.92, "grad_norm": 0.38651221990585327, "learning_rate": 3.432794749685719e-08, "loss": 0.0392, "step": 17478 }, { "epoch": 2.92, "grad_norm": 0.34083303809165955, "learning_rate": 3.417861849281279e-08, "loss": 0.0378, "step": 17479 }, { "epoch": 2.92, "grad_norm": 0.29654353857040405, "learning_rate": 3.402961443701003e-08, "loss": 0.0138, "step": 17480 }, { "epoch": 2.92, "grad_norm": 0.3128919005393982, "learning_rate": 3.388093533430503e-08, "loss": 0.0193, "step": 17481 }, { "epoch": 2.92, "grad_norm": 0.35481154918670654, "learning_rate": 3.373258118954836e-08, "loss": 0.0308, "step": 17482 }, { "epoch": 2.92, "grad_norm": 0.340901643037796, "learning_rate": 3.3584552007576156e-08, "loss": 0.0222, "step": 17483 }, { "epoch": 2.92, "grad_norm": 0.4426548182964325, "learning_rate": 3.343684779321454e-08, "loss": 0.042, "step": 17484 }, { "epoch": 2.92, "grad_norm": 0.2880321145057678, "learning_rate": 3.328946855127968e-08, "loss": 0.0218, "step": 17485 }, { "epoch": 2.92, "grad_norm": 0.39882466197013855, "learning_rate": 3.314241428657883e-08, "loss": 0.0301, "step": 17486 }, { "epoch": 2.92, "grad_norm": 0.4142081141471863, "learning_rate": 3.299568500390482e-08, "loss": 0.0263, "step": 17487 }, { "epoch": 2.92, "grad_norm": 0.503655731678009, "learning_rate": 3.284928070804272e-08, "loss": 0.043, "step": 17488 }, { "epoch": 2.93, "grad_norm": 0.4698580503463745, "learning_rate": 3.2703201403766484e-08, "loss": 0.0259, "step": 17489 }, { "epoch": 2.93, "grad_norm": 0.44440269470214844, "learning_rate": 3.255744709583897e-08, "loss": 0.0351, "step": 17490 }, { "epoch": 2.93, "grad_norm": 0.3603111505508423, "learning_rate": 3.241201778901304e-08, "loss": 0.0305, "step": 17491 }, { "epoch": 2.93, "grad_norm": 0.48314061760902405, "learning_rate": 3.226691348802935e-08, "loss": 0.0381, "step": 17492 }, { "epoch": 2.93, "grad_norm": 0.40413379669189453, "learning_rate": 3.212213419762078e-08, "loss": 0.0226, "step": 17493 }, { "epoch": 2.93, "grad_norm": 0.38815629482269287, "learning_rate": 3.197767992250689e-08, "loss": 0.0275, "step": 17494 }, { "epoch": 2.93, "grad_norm": 0.34457772970199585, "learning_rate": 3.183355066739835e-08, "loss": 0.0256, "step": 17495 }, { "epoch": 2.93, "grad_norm": 0.3833189308643341, "learning_rate": 3.168974643699474e-08, "loss": 0.0232, "step": 17496 }, { "epoch": 2.93, "grad_norm": 0.4755384624004364, "learning_rate": 3.154626723598342e-08, "loss": 0.0278, "step": 17497 }, { "epoch": 2.93, "grad_norm": 0.32142025232315063, "learning_rate": 3.14031130690462e-08, "loss": 0.0218, "step": 17498 }, { "epoch": 2.93, "grad_norm": 0.517345666885376, "learning_rate": 3.1260283940847124e-08, "loss": 0.0333, "step": 17499 }, { "epoch": 2.93, "grad_norm": 0.5266538262367249, "learning_rate": 3.11177798560458e-08, "loss": 0.0576, "step": 17500 }, { "epoch": 2.93, "grad_norm": 0.38045528531074524, "learning_rate": 3.097560081928741e-08, "loss": 0.0379, "step": 17501 }, { "epoch": 2.93, "grad_norm": 0.40107855200767517, "learning_rate": 3.083374683520824e-08, "loss": 0.0381, "step": 17502 }, { "epoch": 2.93, "grad_norm": 0.2617616057395935, "learning_rate": 3.069221790843457e-08, "loss": 0.029, "step": 17503 }, { "epoch": 2.93, "grad_norm": 0.24774597585201263, "learning_rate": 3.0551014043579406e-08, "loss": 0.0275, "step": 17504 }, { "epoch": 2.93, "grad_norm": 0.25891774892807007, "learning_rate": 3.041013524524794e-08, "loss": 0.0162, "step": 17505 }, { "epoch": 2.93, "grad_norm": 0.37087416648864746, "learning_rate": 3.026958151803316e-08, "loss": 0.0199, "step": 17506 }, { "epoch": 2.93, "grad_norm": 0.4370904564857483, "learning_rate": 3.012935286651919e-08, "loss": 0.0334, "step": 17507 }, { "epoch": 2.93, "grad_norm": 0.327176958322525, "learning_rate": 2.998944929527681e-08, "loss": 0.0386, "step": 17508 }, { "epoch": 2.93, "grad_norm": 0.5105572938919067, "learning_rate": 2.984987080886792e-08, "loss": 0.0391, "step": 17509 }, { "epoch": 2.93, "grad_norm": 0.3611774146556854, "learning_rate": 2.9710617411845555e-08, "loss": 0.0243, "step": 17510 }, { "epoch": 2.93, "grad_norm": 0.3934159278869629, "learning_rate": 2.9571689108747194e-08, "loss": 0.0282, "step": 17511 }, { "epoch": 2.93, "grad_norm": 0.42073532938957214, "learning_rate": 2.9433085904105875e-08, "loss": 0.0436, "step": 17512 }, { "epoch": 2.93, "grad_norm": 0.24981248378753662, "learning_rate": 2.9294807802439094e-08, "loss": 0.0167, "step": 17513 }, { "epoch": 2.93, "grad_norm": 0.5089033246040344, "learning_rate": 2.9156854808255475e-08, "loss": 0.0437, "step": 17514 }, { "epoch": 2.93, "grad_norm": 0.35811904072761536, "learning_rate": 2.9019226926054745e-08, "loss": 0.0425, "step": 17515 }, { "epoch": 2.93, "grad_norm": 0.35998499393463135, "learning_rate": 2.8881924160322206e-08, "loss": 0.0282, "step": 17516 }, { "epoch": 2.93, "grad_norm": 0.4370171129703522, "learning_rate": 2.8744946515536498e-08, "loss": 0.0313, "step": 17517 }, { "epoch": 2.93, "grad_norm": 0.5704020857810974, "learning_rate": 2.860829399616405e-08, "loss": 0.0218, "step": 17518 }, { "epoch": 2.93, "grad_norm": 0.3362051844596863, "learning_rate": 2.8471966606659073e-08, "loss": 0.0254, "step": 17519 }, { "epoch": 2.93, "grad_norm": 0.4422036409378052, "learning_rate": 2.8335964351468015e-08, "loss": 0.0389, "step": 17520 }, { "epoch": 2.93, "grad_norm": 0.3926776051521301, "learning_rate": 2.8200287235023992e-08, "loss": 0.027, "step": 17521 }, { "epoch": 2.93, "grad_norm": 0.28613904118537903, "learning_rate": 2.806493526175347e-08, "loss": 0.0255, "step": 17522 }, { "epoch": 2.93, "grad_norm": 0.3826996684074402, "learning_rate": 2.7929908436067354e-08, "loss": 0.0276, "step": 17523 }, { "epoch": 2.93, "grad_norm": 0.44474807381629944, "learning_rate": 2.77952067623688e-08, "loss": 0.0337, "step": 17524 }, { "epoch": 2.93, "grad_norm": 0.5336955189704895, "learning_rate": 2.7660830245049842e-08, "loss": 0.039, "step": 17525 }, { "epoch": 2.93, "grad_norm": 0.30824363231658936, "learning_rate": 2.7526778888493643e-08, "loss": 0.0274, "step": 17526 }, { "epoch": 2.93, "grad_norm": 0.3210996389389038, "learning_rate": 2.7393052697070045e-08, "loss": 0.0256, "step": 17527 }, { "epoch": 2.93, "grad_norm": 0.28728193044662476, "learning_rate": 2.725965167513889e-08, "loss": 0.0169, "step": 17528 }, { "epoch": 2.93, "grad_norm": 0.44521182775497437, "learning_rate": 2.712657582705003e-08, "loss": 0.0329, "step": 17529 }, { "epoch": 2.93, "grad_norm": 0.3550163209438324, "learning_rate": 2.6993825157143326e-08, "loss": 0.0258, "step": 17530 }, { "epoch": 2.93, "grad_norm": 0.503825843334198, "learning_rate": 2.6861399669746436e-08, "loss": 0.0292, "step": 17531 }, { "epoch": 2.93, "grad_norm": 0.41870906949043274, "learning_rate": 2.6729299369177007e-08, "loss": 0.0457, "step": 17532 }, { "epoch": 2.93, "grad_norm": 0.5085027813911438, "learning_rate": 2.6597524259743823e-08, "loss": 0.0423, "step": 17533 }, { "epoch": 2.93, "grad_norm": 0.540642499923706, "learning_rate": 2.6466074345742333e-08, "loss": 0.0314, "step": 17534 }, { "epoch": 2.93, "grad_norm": 0.27212411165237427, "learning_rate": 2.6334949631459106e-08, "loss": 0.0182, "step": 17535 }, { "epoch": 2.93, "grad_norm": 0.4523791968822479, "learning_rate": 2.6204150121168502e-08, "loss": 0.0398, "step": 17536 }, { "epoch": 2.93, "grad_norm": 0.38955897092819214, "learning_rate": 2.607367581913822e-08, "loss": 0.0285, "step": 17537 }, { "epoch": 2.93, "grad_norm": 0.35853520035743713, "learning_rate": 2.5943526729619306e-08, "loss": 0.0246, "step": 17538 }, { "epoch": 2.93, "grad_norm": 0.352771520614624, "learning_rate": 2.581370285685725e-08, "loss": 0.0416, "step": 17539 }, { "epoch": 2.93, "grad_norm": 0.41470667719841003, "learning_rate": 2.568420420508422e-08, "loss": 0.02, "step": 17540 }, { "epoch": 2.93, "grad_norm": 0.36170080304145813, "learning_rate": 2.5555030778523506e-08, "loss": 0.0336, "step": 17541 }, { "epoch": 2.93, "grad_norm": 0.4358097314834595, "learning_rate": 2.5426182581387292e-08, "loss": 0.0229, "step": 17542 }, { "epoch": 2.93, "grad_norm": 0.42418089509010315, "learning_rate": 2.5297659617875558e-08, "loss": 0.0456, "step": 17543 }, { "epoch": 2.93, "grad_norm": 0.34948962926864624, "learning_rate": 2.51694618921805e-08, "loss": 0.0269, "step": 17544 }, { "epoch": 2.93, "grad_norm": 0.4027525782585144, "learning_rate": 2.5041589408479893e-08, "loss": 0.0249, "step": 17545 }, { "epoch": 2.93, "grad_norm": 0.3133285641670227, "learning_rate": 2.491404217094595e-08, "loss": 0.0297, "step": 17546 }, { "epoch": 2.93, "grad_norm": 0.4237750470638275, "learning_rate": 2.478682018373535e-08, "loss": 0.0413, "step": 17547 }, { "epoch": 2.93, "grad_norm": 0.3074001669883728, "learning_rate": 2.465992345099699e-08, "loss": 0.0318, "step": 17548 }, { "epoch": 2.94, "grad_norm": 0.37130486965179443, "learning_rate": 2.453335197686868e-08, "loss": 0.0385, "step": 17549 }, { "epoch": 2.94, "grad_norm": 0.3325464427471161, "learning_rate": 2.4407105765477113e-08, "loss": 0.0282, "step": 17550 }, { "epoch": 2.94, "grad_norm": 0.40969568490982056, "learning_rate": 2.4281184820938995e-08, "loss": 0.03, "step": 17551 }, { "epoch": 2.94, "grad_norm": 0.47830432653427124, "learning_rate": 2.4155589147361048e-08, "loss": 0.0353, "step": 17552 }, { "epoch": 2.94, "grad_norm": 0.5750932693481445, "learning_rate": 2.4030318748836655e-08, "loss": 0.0273, "step": 17553 }, { "epoch": 2.94, "grad_norm": 0.35470831394195557, "learning_rate": 2.3905373629451446e-08, "loss": 0.0345, "step": 17554 }, { "epoch": 2.94, "grad_norm": 0.46945491433143616, "learning_rate": 2.378075379327882e-08, "loss": 0.0443, "step": 17555 }, { "epoch": 2.94, "grad_norm": 0.32317420840263367, "learning_rate": 2.3656459244383313e-08, "loss": 0.0245, "step": 17556 }, { "epoch": 2.94, "grad_norm": 0.42246389389038086, "learning_rate": 2.353248998681612e-08, "loss": 0.032, "step": 17557 }, { "epoch": 2.94, "grad_norm": 0.32694509625434875, "learning_rate": 2.340884602462068e-08, "loss": 0.0286, "step": 17558 }, { "epoch": 2.94, "grad_norm": 0.3558557331562042, "learning_rate": 2.3285527361828208e-08, "loss": 0.0251, "step": 17559 }, { "epoch": 2.94, "grad_norm": 0.3596225380897522, "learning_rate": 2.3162534002459937e-08, "loss": 0.0274, "step": 17560 }, { "epoch": 2.94, "grad_norm": 0.2986152172088623, "learning_rate": 2.3039865950525987e-08, "loss": 0.0218, "step": 17561 }, { "epoch": 2.94, "grad_norm": 0.3680383861064911, "learning_rate": 2.2917523210025383e-08, "loss": 0.0374, "step": 17562 }, { "epoch": 2.94, "grad_norm": 0.5071936249732971, "learning_rate": 2.279550578494827e-08, "loss": 0.0443, "step": 17563 }, { "epoch": 2.94, "grad_norm": 0.3478736877441406, "learning_rate": 2.2673813679273683e-08, "loss": 0.0215, "step": 17564 }, { "epoch": 2.94, "grad_norm": 0.32596877217292786, "learning_rate": 2.255244689696734e-08, "loss": 0.0194, "step": 17565 }, { "epoch": 2.94, "grad_norm": 0.362409770488739, "learning_rate": 2.2431405441988298e-08, "loss": 0.0245, "step": 17566 }, { "epoch": 2.94, "grad_norm": 0.47189024090766907, "learning_rate": 2.2310689318283397e-08, "loss": 0.0285, "step": 17567 }, { "epoch": 2.94, "grad_norm": 0.34883224964141846, "learning_rate": 2.219029852978727e-08, "loss": 0.0247, "step": 17568 }, { "epoch": 2.94, "grad_norm": 0.5215546488761902, "learning_rate": 2.2070233080426774e-08, "loss": 0.039, "step": 17569 }, { "epoch": 2.94, "grad_norm": 0.3872082531452179, "learning_rate": 2.1950492974116555e-08, "loss": 0.0334, "step": 17570 }, { "epoch": 2.94, "grad_norm": 0.3776501715183258, "learning_rate": 2.183107821476127e-08, "loss": 0.0288, "step": 17571 }, { "epoch": 2.94, "grad_norm": 0.4529164135456085, "learning_rate": 2.171198880625336e-08, "loss": 0.0321, "step": 17572 }, { "epoch": 2.94, "grad_norm": 0.3506724238395691, "learning_rate": 2.1593224752476382e-08, "loss": 0.0219, "step": 17573 }, { "epoch": 2.94, "grad_norm": 0.3028780221939087, "learning_rate": 2.1474786057302798e-08, "loss": 0.021, "step": 17574 }, { "epoch": 2.94, "grad_norm": 0.31545689702033997, "learning_rate": 2.1356672724595074e-08, "loss": 0.027, "step": 17575 }, { "epoch": 2.94, "grad_norm": 0.5717720985412598, "learning_rate": 2.123888475820457e-08, "loss": 0.0357, "step": 17576 }, { "epoch": 2.94, "grad_norm": 0.5910969376564026, "learning_rate": 2.112142216197044e-08, "loss": 0.0332, "step": 17577 }, { "epoch": 2.94, "grad_norm": 0.352078378200531, "learning_rate": 2.1004284939724062e-08, "loss": 0.0207, "step": 17578 }, { "epoch": 2.94, "grad_norm": 0.41960689425468445, "learning_rate": 2.0887473095283496e-08, "loss": 0.0246, "step": 17579 }, { "epoch": 2.94, "grad_norm": 0.3494757115840912, "learning_rate": 2.0770986632459024e-08, "loss": 0.0381, "step": 17580 }, { "epoch": 2.94, "grad_norm": 0.38189634680747986, "learning_rate": 2.065482555504872e-08, "loss": 0.0243, "step": 17581 }, { "epoch": 2.94, "grad_norm": 0.32588666677474976, "learning_rate": 2.0538989866839555e-08, "loss": 0.0255, "step": 17582 }, { "epoch": 2.94, "grad_norm": 0.3664226830005646, "learning_rate": 2.0423479571607397e-08, "loss": 0.0254, "step": 17583 }, { "epoch": 2.94, "grad_norm": 0.3367242217063904, "learning_rate": 2.0308294673121453e-08, "loss": 0.0227, "step": 17584 }, { "epoch": 2.94, "grad_norm": 0.3442453444004059, "learning_rate": 2.0193435175135388e-08, "loss": 0.0266, "step": 17585 }, { "epoch": 2.94, "grad_norm": 0.31604117155075073, "learning_rate": 2.0078901081393986e-08, "loss": 0.0174, "step": 17586 }, { "epoch": 2.94, "grad_norm": 0.41943830251693726, "learning_rate": 1.9964692395633146e-08, "loss": 0.0467, "step": 17587 }, { "epoch": 2.94, "grad_norm": 0.38176456093788147, "learning_rate": 1.9850809121576554e-08, "loss": 0.0388, "step": 17588 }, { "epoch": 2.94, "grad_norm": 0.3583696782588959, "learning_rate": 1.9737251262936795e-08, "loss": 0.0324, "step": 17589 }, { "epoch": 2.94, "grad_norm": 0.44587984681129456, "learning_rate": 1.962401882341758e-08, "loss": 0.0335, "step": 17590 }, { "epoch": 2.94, "grad_norm": 0.3631362020969391, "learning_rate": 1.9511111806709283e-08, "loss": 0.0223, "step": 17591 }, { "epoch": 2.94, "grad_norm": 0.31512564420700073, "learning_rate": 1.9398530216495626e-08, "loss": 0.0267, "step": 17592 }, { "epoch": 2.94, "grad_norm": 0.4470865726470947, "learning_rate": 1.9286274056445898e-08, "loss": 0.0277, "step": 17593 }, { "epoch": 2.94, "grad_norm": 0.3403400182723999, "learning_rate": 1.9174343330220503e-08, "loss": 0.0336, "step": 17594 }, { "epoch": 2.94, "grad_norm": 0.22777314484119415, "learning_rate": 1.9062738041468744e-08, "loss": 0.0151, "step": 17595 }, { "epoch": 2.94, "grad_norm": 0.33990809321403503, "learning_rate": 1.8951458193831042e-08, "loss": 0.0264, "step": 17596 }, { "epoch": 2.94, "grad_norm": 0.3415079116821289, "learning_rate": 1.8840503790935606e-08, "loss": 0.0398, "step": 17597 }, { "epoch": 2.94, "grad_norm": 0.4586311876773834, "learning_rate": 1.872987483639843e-08, "loss": 0.0412, "step": 17598 }, { "epoch": 2.94, "grad_norm": 0.40463557839393616, "learning_rate": 1.8619571333828856e-08, "loss": 0.0241, "step": 17599 }, { "epoch": 2.94, "grad_norm": 0.3961680233478546, "learning_rate": 1.850959328682289e-08, "loss": 0.0309, "step": 17600 }, { "epoch": 2.94, "grad_norm": 0.35683226585388184, "learning_rate": 1.8399940698965447e-08, "loss": 0.0261, "step": 17601 }, { "epoch": 2.94, "grad_norm": 0.42748886346817017, "learning_rate": 1.829061357383366e-08, "loss": 0.0217, "step": 17602 }, { "epoch": 2.94, "grad_norm": 0.49549293518066406, "learning_rate": 1.8181611914990237e-08, "loss": 0.0282, "step": 17603 }, { "epoch": 2.94, "grad_norm": 0.37974813580513, "learning_rate": 1.8072935725990114e-08, "loss": 0.0296, "step": 17604 }, { "epoch": 2.94, "grad_norm": 0.43865135312080383, "learning_rate": 1.7964585010378234e-08, "loss": 0.0346, "step": 17605 }, { "epoch": 2.94, "grad_norm": 0.45096203684806824, "learning_rate": 1.78565597716851e-08, "loss": 0.0264, "step": 17606 }, { "epoch": 2.94, "grad_norm": 0.3370939791202545, "learning_rate": 1.7748860013434565e-08, "loss": 0.0372, "step": 17607 }, { "epoch": 2.94, "grad_norm": 0.3826325237751007, "learning_rate": 1.7641485739138266e-08, "loss": 0.0312, "step": 17608 }, { "epoch": 2.95, "grad_norm": 0.40969836711883545, "learning_rate": 1.753443695229673e-08, "loss": 0.0192, "step": 17609 }, { "epoch": 2.95, "grad_norm": 0.39056700468063354, "learning_rate": 1.742771365640161e-08, "loss": 0.0392, "step": 17610 }, { "epoch": 2.95, "grad_norm": 0.37434425950050354, "learning_rate": 1.732131585493013e-08, "loss": 0.0256, "step": 17611 }, { "epoch": 2.95, "grad_norm": 0.29092541337013245, "learning_rate": 1.7215243551355064e-08, "loss": 0.028, "step": 17612 }, { "epoch": 2.95, "grad_norm": 0.4326590299606323, "learning_rate": 1.7109496749131427e-08, "loss": 0.0234, "step": 17613 }, { "epoch": 2.95, "grad_norm": 0.31795600056648254, "learning_rate": 1.700407545170979e-08, "loss": 0.0281, "step": 17614 }, { "epoch": 2.95, "grad_norm": 0.30451348423957825, "learning_rate": 1.6898979662527404e-08, "loss": 0.022, "step": 17615 }, { "epoch": 2.95, "grad_norm": 0.4001516103744507, "learning_rate": 1.679420938500931e-08, "loss": 0.0426, "step": 17616 }, { "epoch": 2.95, "grad_norm": 0.3346642255783081, "learning_rate": 1.6689764622573878e-08, "loss": 0.0254, "step": 17617 }, { "epoch": 2.95, "grad_norm": 0.524412214756012, "learning_rate": 1.658564537862506e-08, "loss": 0.0377, "step": 17618 }, { "epoch": 2.95, "grad_norm": 0.44811466336250305, "learning_rate": 1.6481851656557913e-08, "loss": 0.0308, "step": 17619 }, { "epoch": 2.95, "grad_norm": 0.4569259285926819, "learning_rate": 1.637838345975751e-08, "loss": 0.0539, "step": 17620 }, { "epoch": 2.95, "grad_norm": 0.4978882670402527, "learning_rate": 1.62752407915967e-08, "loss": 0.0447, "step": 17621 }, { "epoch": 2.95, "grad_norm": 0.4605672061443329, "learning_rate": 1.6172423655439475e-08, "loss": 0.0322, "step": 17622 }, { "epoch": 2.95, "grad_norm": 0.40337687730789185, "learning_rate": 1.606993205463758e-08, "loss": 0.0361, "step": 17623 }, { "epoch": 2.95, "grad_norm": 0.5621756911277771, "learning_rate": 1.5967765992532802e-08, "loss": 0.0508, "step": 17624 }, { "epoch": 2.95, "grad_norm": 0.3442164361476898, "learning_rate": 1.586592547245802e-08, "loss": 0.0209, "step": 17625 }, { "epoch": 2.95, "grad_norm": 0.3432389199733734, "learning_rate": 1.5764410497731697e-08, "loss": 0.0272, "step": 17626 }, { "epoch": 2.95, "grad_norm": 0.9173879623413086, "learning_rate": 1.5663221071665625e-08, "loss": 0.0446, "step": 17627 }, { "epoch": 2.95, "grad_norm": 0.4588254988193512, "learning_rate": 1.5562357197557166e-08, "loss": 0.0363, "step": 17628 }, { "epoch": 2.95, "grad_norm": 0.29412147402763367, "learning_rate": 1.5461818878697022e-08, "loss": 0.0232, "step": 17629 }, { "epoch": 2.95, "grad_norm": 0.38160449266433716, "learning_rate": 1.536160611836368e-08, "loss": 0.0347, "step": 17630 }, { "epoch": 2.95, "grad_norm": 0.4771074950695038, "learning_rate": 1.526171891982231e-08, "loss": 0.0378, "step": 17631 }, { "epoch": 2.95, "grad_norm": 0.34908533096313477, "learning_rate": 1.5162157286332524e-08, "loss": 0.03, "step": 17632 }, { "epoch": 2.95, "grad_norm": 0.5727000832557678, "learning_rate": 1.5062921221138394e-08, "loss": 0.0365, "step": 17633 }, { "epoch": 2.95, "grad_norm": 0.5143211483955383, "learning_rate": 1.4964010727477328e-08, "loss": 0.0504, "step": 17634 }, { "epoch": 2.95, "grad_norm": 0.4237830638885498, "learning_rate": 1.4865425808573419e-08, "loss": 0.0277, "step": 17635 }, { "epoch": 2.95, "grad_norm": 0.45084789395332336, "learning_rate": 1.4767166467640758e-08, "loss": 0.0353, "step": 17636 }, { "epoch": 2.95, "grad_norm": 0.3170018792152405, "learning_rate": 1.4669232707884562e-08, "loss": 0.0262, "step": 17637 }, { "epoch": 2.95, "grad_norm": 0.35489189624786377, "learning_rate": 1.457162453249783e-08, "loss": 0.0305, "step": 17638 }, { "epoch": 2.95, "grad_norm": 0.5447407364845276, "learning_rate": 1.4474341944662462e-08, "loss": 0.0382, "step": 17639 }, { "epoch": 2.95, "grad_norm": 0.5023084282875061, "learning_rate": 1.4377384947551476e-08, "loss": 0.0519, "step": 17640 }, { "epoch": 2.95, "grad_norm": 0.30665963888168335, "learning_rate": 1.4280753544324566e-08, "loss": 0.0153, "step": 17641 }, { "epoch": 2.95, "grad_norm": 0.30625206232070923, "learning_rate": 1.4184447738134765e-08, "loss": 0.0219, "step": 17642 }, { "epoch": 2.95, "grad_norm": 0.43141964077949524, "learning_rate": 1.4088467532119566e-08, "loss": 0.0246, "step": 17643 }, { "epoch": 2.95, "grad_norm": 0.4989376664161682, "learning_rate": 1.3992812929410905e-08, "loss": 0.0446, "step": 17644 }, { "epoch": 2.95, "grad_norm": 0.3645942211151123, "learning_rate": 1.3897483933127398e-08, "loss": 0.0282, "step": 17645 }, { "epoch": 2.95, "grad_norm": 0.3413313627243042, "learning_rate": 1.3802480546376562e-08, "loss": 0.0265, "step": 17646 }, { "epoch": 2.95, "grad_norm": 0.39152026176452637, "learning_rate": 1.3707802772255918e-08, "loss": 0.0289, "step": 17647 }, { "epoch": 2.95, "grad_norm": 0.523151159286499, "learning_rate": 1.3613450613852996e-08, "loss": 0.0369, "step": 17648 }, { "epoch": 2.95, "grad_norm": 0.48334649205207825, "learning_rate": 1.3519424074245336e-08, "loss": 0.0278, "step": 17649 }, { "epoch": 2.95, "grad_norm": 0.36421647667884827, "learning_rate": 1.342572315649715e-08, "loss": 0.0247, "step": 17650 }, { "epoch": 2.95, "grad_norm": 0.29751309752464294, "learning_rate": 1.3332347863663775e-08, "loss": 0.032, "step": 17651 }, { "epoch": 2.95, "grad_norm": 0.5412673354148865, "learning_rate": 1.323929819879055e-08, "loss": 0.0385, "step": 17652 }, { "epoch": 2.95, "grad_norm": 0.46375441551208496, "learning_rate": 1.3146574164911718e-08, "loss": 0.0456, "step": 17653 }, { "epoch": 2.95, "grad_norm": 0.35098540782928467, "learning_rate": 1.3054175765049303e-08, "loss": 0.031, "step": 17654 }, { "epoch": 2.95, "grad_norm": 0.44461849331855774, "learning_rate": 1.2962103002217562e-08, "loss": 0.0328, "step": 17655 }, { "epoch": 2.95, "grad_norm": 0.3473033905029297, "learning_rate": 1.2870355879418538e-08, "loss": 0.0255, "step": 17656 }, { "epoch": 2.95, "grad_norm": 0.37457987666130066, "learning_rate": 1.2778934399643173e-08, "loss": 0.0338, "step": 17657 }, { "epoch": 2.95, "grad_norm": 0.48135387897491455, "learning_rate": 1.2687838565871303e-08, "loss": 0.0338, "step": 17658 }, { "epoch": 2.95, "grad_norm": 0.4768277406692505, "learning_rate": 1.2597068381076105e-08, "loss": 0.0404, "step": 17659 }, { "epoch": 2.95, "grad_norm": 0.2825770080089569, "learning_rate": 1.2506623848214106e-08, "loss": 0.021, "step": 17660 }, { "epoch": 2.95, "grad_norm": 0.8151912093162537, "learning_rate": 1.2416504970236275e-08, "loss": 0.0291, "step": 17661 }, { "epoch": 2.95, "grad_norm": 0.35576120018959045, "learning_rate": 1.2326711750080266e-08, "loss": 0.0301, "step": 17662 }, { "epoch": 2.95, "grad_norm": 0.4483277499675751, "learning_rate": 1.2237244190674846e-08, "loss": 0.0356, "step": 17663 }, { "epoch": 2.95, "grad_norm": 0.30164363980293274, "learning_rate": 1.214810229493657e-08, "loss": 0.025, "step": 17664 }, { "epoch": 2.95, "grad_norm": 0.5487667322158813, "learning_rate": 1.2059286065772002e-08, "loss": 0.0255, "step": 17665 }, { "epoch": 2.95, "grad_norm": 0.44709300994873047, "learning_rate": 1.1970795506076604e-08, "loss": 0.0368, "step": 17666 }, { "epoch": 2.95, "grad_norm": 0.27522042393684387, "learning_rate": 1.1882630618736957e-08, "loss": 0.0168, "step": 17667 }, { "epoch": 2.96, "grad_norm": 0.3946683406829834, "learning_rate": 1.1794791406626316e-08, "loss": 0.0364, "step": 17668 }, { "epoch": 2.96, "grad_norm": 0.5195184946060181, "learning_rate": 1.1707277872610168e-08, "loss": 0.0497, "step": 17669 }, { "epoch": 2.96, "grad_norm": 0.36519238352775574, "learning_rate": 1.1620090019541786e-08, "loss": 0.0198, "step": 17670 }, { "epoch": 2.96, "grad_norm": 0.37909191846847534, "learning_rate": 1.153322785026334e-08, "loss": 0.0362, "step": 17671 }, { "epoch": 2.96, "grad_norm": 0.4519786536693573, "learning_rate": 1.144669136760701e-08, "loss": 0.0302, "step": 17672 }, { "epoch": 2.96, "grad_norm": 0.4180445075035095, "learning_rate": 1.1360480574394983e-08, "loss": 0.041, "step": 17673 }, { "epoch": 2.96, "grad_norm": 0.48657509684562683, "learning_rate": 1.1274595473438344e-08, "loss": 0.0365, "step": 17674 }, { "epoch": 2.96, "grad_norm": 0.34606751799583435, "learning_rate": 1.1189036067537074e-08, "loss": 0.0252, "step": 17675 }, { "epoch": 2.96, "grad_norm": 0.38668161630630493, "learning_rate": 1.1103802359481164e-08, "loss": 0.0287, "step": 17676 }, { "epoch": 2.96, "grad_norm": 0.3954105079174042, "learning_rate": 1.1018894352049503e-08, "loss": 0.0303, "step": 17677 }, { "epoch": 2.96, "grad_norm": 0.34447261691093445, "learning_rate": 1.0934312048010986e-08, "loss": 0.0236, "step": 17678 }, { "epoch": 2.96, "grad_norm": 0.2647121548652649, "learning_rate": 1.0850055450123409e-08, "loss": 0.0204, "step": 17679 }, { "epoch": 2.96, "grad_norm": 0.2874297797679901, "learning_rate": 1.0766124561134572e-08, "loss": 0.0138, "step": 17680 }, { "epoch": 2.96, "grad_norm": 0.4786481261253357, "learning_rate": 1.0682519383780066e-08, "loss": 0.0567, "step": 17681 }, { "epoch": 2.96, "grad_norm": 0.35349830985069275, "learning_rate": 1.0599239920786597e-08, "loss": 0.0298, "step": 17682 }, { "epoch": 2.96, "grad_norm": 0.4168539047241211, "learning_rate": 1.0516286174869771e-08, "loss": 0.0384, "step": 17683 }, { "epoch": 2.96, "grad_norm": 0.26524415612220764, "learning_rate": 1.0433658148734093e-08, "loss": 0.0232, "step": 17684 }, { "epoch": 2.96, "grad_norm": 0.41033852100372314, "learning_rate": 1.0351355845072963e-08, "loss": 0.0219, "step": 17685 }, { "epoch": 2.96, "grad_norm": 0.4355068504810333, "learning_rate": 1.0269379266572011e-08, "loss": 0.028, "step": 17686 }, { "epoch": 2.96, "grad_norm": 0.3594883978366852, "learning_rate": 1.0187728415902431e-08, "loss": 0.0356, "step": 17687 }, { "epoch": 2.96, "grad_norm": 0.3504386246204376, "learning_rate": 1.0106403295726541e-08, "loss": 0.019, "step": 17688 }, { "epoch": 2.96, "grad_norm": 0.36517855525016785, "learning_rate": 1.0025403908697773e-08, "loss": 0.0273, "step": 17689 }, { "epoch": 2.96, "grad_norm": 0.32422906160354614, "learning_rate": 9.944730257455126e-09, "loss": 0.0332, "step": 17690 }, { "epoch": 2.96, "grad_norm": 0.5059548020362854, "learning_rate": 9.864382344629831e-09, "loss": 0.0312, "step": 17691 }, { "epoch": 2.96, "grad_norm": 0.48131895065307617, "learning_rate": 9.784360172843122e-09, "loss": 0.0415, "step": 17692 }, { "epoch": 2.96, "grad_norm": 0.4192798435688019, "learning_rate": 9.704663744701803e-09, "loss": 0.0171, "step": 17693 }, { "epoch": 2.96, "grad_norm": 0.5108615756034851, "learning_rate": 9.625293062806018e-09, "loss": 0.0518, "step": 17694 }, { "epoch": 2.96, "grad_norm": 0.2785068452358246, "learning_rate": 9.546248129742585e-09, "loss": 0.0126, "step": 17695 }, { "epoch": 2.96, "grad_norm": 0.30731818079948425, "learning_rate": 9.467528948090555e-09, "loss": 0.025, "step": 17696 }, { "epoch": 2.96, "grad_norm": 0.32830584049224854, "learning_rate": 9.389135520415649e-09, "loss": 0.0306, "step": 17697 }, { "epoch": 2.96, "grad_norm": 0.3082854449748993, "learning_rate": 9.311067849274714e-09, "loss": 0.0247, "step": 17698 }, { "epoch": 2.96, "grad_norm": 0.43194130063056946, "learning_rate": 9.233325937211268e-09, "loss": 0.0316, "step": 17699 }, { "epoch": 2.96, "grad_norm": 0.3830014765262604, "learning_rate": 9.155909786762174e-09, "loss": 0.0261, "step": 17700 }, { "epoch": 2.96, "grad_norm": 0.4175754487514496, "learning_rate": 9.078819400452077e-09, "loss": 0.024, "step": 17701 }, { "epoch": 2.96, "grad_norm": 0.5429412126541138, "learning_rate": 9.002054780792301e-09, "loss": 0.0315, "step": 17702 }, { "epoch": 2.96, "grad_norm": 0.3845936059951782, "learning_rate": 8.925615930287513e-09, "loss": 0.0331, "step": 17703 }, { "epoch": 2.96, "grad_norm": 0.2772296965122223, "learning_rate": 8.849502851429048e-09, "loss": 0.0284, "step": 17704 }, { "epoch": 2.96, "grad_norm": 0.4610792100429535, "learning_rate": 8.773715546700478e-09, "loss": 0.032, "step": 17705 }, { "epoch": 2.96, "grad_norm": 0.4172571003437042, "learning_rate": 8.69825401857205e-09, "loss": 0.026, "step": 17706 }, { "epoch": 2.96, "grad_norm": 0.38964784145355225, "learning_rate": 8.623118269502906e-09, "loss": 0.0214, "step": 17707 }, { "epoch": 2.96, "grad_norm": 0.38712623715400696, "learning_rate": 8.54830830194442e-09, "loss": 0.0259, "step": 17708 }, { "epoch": 2.96, "grad_norm": 0.48003092408180237, "learning_rate": 8.473824118335749e-09, "loss": 0.0317, "step": 17709 }, { "epoch": 2.96, "grad_norm": 0.42134174704551697, "learning_rate": 8.399665721104954e-09, "loss": 0.0397, "step": 17710 }, { "epoch": 2.96, "grad_norm": 0.3542309105396271, "learning_rate": 8.325833112671212e-09, "loss": 0.0223, "step": 17711 }, { "epoch": 2.96, "grad_norm": 0.3656175136566162, "learning_rate": 8.252326295440372e-09, "loss": 0.0239, "step": 17712 }, { "epoch": 2.96, "grad_norm": 0.4241820275783539, "learning_rate": 8.179145271810519e-09, "loss": 0.0367, "step": 17713 }, { "epoch": 2.96, "grad_norm": 0.3319440484046936, "learning_rate": 8.10629004416752e-09, "loss": 0.0259, "step": 17714 }, { "epoch": 2.96, "grad_norm": 0.5773342847824097, "learning_rate": 8.033760614886143e-09, "loss": 0.043, "step": 17715 }, { "epoch": 2.96, "grad_norm": 0.497014582157135, "learning_rate": 7.961556986332275e-09, "loss": 0.0435, "step": 17716 }, { "epoch": 2.96, "grad_norm": 0.2586817443370819, "learning_rate": 7.889679160859587e-09, "loss": 0.0219, "step": 17717 }, { "epoch": 2.96, "grad_norm": 0.646759569644928, "learning_rate": 7.81812714081176e-09, "loss": 0.0476, "step": 17718 }, { "epoch": 2.96, "grad_norm": 0.5066764950752258, "learning_rate": 7.746900928521372e-09, "loss": 0.0374, "step": 17719 }, { "epoch": 2.96, "grad_norm": 0.3715573251247406, "learning_rate": 7.676000526312121e-09, "loss": 0.0307, "step": 17720 }, { "epoch": 2.96, "grad_norm": 0.3803554177284241, "learning_rate": 7.605425936495493e-09, "loss": 0.0324, "step": 17721 }, { "epoch": 2.96, "grad_norm": 0.39914581179618835, "learning_rate": 7.535177161371864e-09, "loss": 0.0223, "step": 17722 }, { "epoch": 2.96, "grad_norm": 0.4267480671405792, "learning_rate": 7.465254203232742e-09, "loss": 0.0315, "step": 17723 }, { "epoch": 2.96, "grad_norm": 0.4939821660518646, "learning_rate": 7.395657064357409e-09, "loss": 0.0384, "step": 17724 }, { "epoch": 2.96, "grad_norm": 0.31958362460136414, "learning_rate": 7.326385747015164e-09, "loss": 0.0322, "step": 17725 }, { "epoch": 2.96, "grad_norm": 0.4640492796897888, "learning_rate": 7.257440253465309e-09, "loss": 0.0247, "step": 17726 }, { "epoch": 2.96, "grad_norm": 0.4003080725669861, "learning_rate": 7.188820585954936e-09, "loss": 0.0249, "step": 17727 }, { "epoch": 2.97, "grad_norm": 0.3317907452583313, "learning_rate": 7.120526746722256e-09, "loss": 0.0245, "step": 17728 }, { "epoch": 2.97, "grad_norm": 0.4684281349182129, "learning_rate": 7.0525587379932645e-09, "loss": 0.0447, "step": 17729 }, { "epoch": 2.97, "grad_norm": 0.2969779670238495, "learning_rate": 6.984916561986188e-09, "loss": 0.032, "step": 17730 }, { "epoch": 2.97, "grad_norm": 0.4409927725791931, "learning_rate": 6.917600220903709e-09, "loss": 0.0322, "step": 17731 }, { "epoch": 2.97, "grad_norm": 0.34664982557296753, "learning_rate": 6.850609716941848e-09, "loss": 0.0308, "step": 17732 }, { "epoch": 2.97, "grad_norm": 0.31225112080574036, "learning_rate": 6.783945052286634e-09, "loss": 0.0257, "step": 17733 }, { "epoch": 2.97, "grad_norm": 0.36091360449790955, "learning_rate": 6.717606229108553e-09, "loss": 0.029, "step": 17734 }, { "epoch": 2.97, "grad_norm": 0.4095914661884308, "learning_rate": 6.651593249573651e-09, "loss": 0.0344, "step": 17735 }, { "epoch": 2.97, "grad_norm": 0.36199963092803955, "learning_rate": 6.585906115833541e-09, "loss": 0.0398, "step": 17736 }, { "epoch": 2.97, "grad_norm": 0.33520928025245667, "learning_rate": 6.52054483002873e-09, "loss": 0.0237, "step": 17737 }, { "epoch": 2.97, "grad_norm": 0.4413280785083771, "learning_rate": 6.455509394290849e-09, "loss": 0.0326, "step": 17738 }, { "epoch": 2.97, "grad_norm": 0.24462851881980896, "learning_rate": 6.3907998107426424e-09, "loss": 0.0174, "step": 17739 }, { "epoch": 2.97, "grad_norm": 0.3908236026763916, "learning_rate": 6.326416081491315e-09, "loss": 0.0246, "step": 17740 }, { "epoch": 2.97, "grad_norm": 0.5008156299591064, "learning_rate": 6.262358208636299e-09, "loss": 0.0402, "step": 17741 }, { "epoch": 2.97, "grad_norm": 0.3899669945240021, "learning_rate": 6.198626194268143e-09, "loss": 0.0288, "step": 17742 }, { "epoch": 2.97, "grad_norm": 0.3559224009513855, "learning_rate": 6.135220040464074e-09, "loss": 0.031, "step": 17743 }, { "epoch": 2.97, "grad_norm": 0.481257826089859, "learning_rate": 6.072139749291328e-09, "loss": 0.0286, "step": 17744 }, { "epoch": 2.97, "grad_norm": 0.37558433413505554, "learning_rate": 6.0093853228071486e-09, "loss": 0.026, "step": 17745 }, { "epoch": 2.97, "grad_norm": 0.40890297293663025, "learning_rate": 5.946956763056566e-09, "loss": 0.0309, "step": 17746 }, { "epoch": 2.97, "grad_norm": 0.3220181465148926, "learning_rate": 5.88485407207684e-09, "loss": 0.0302, "step": 17747 }, { "epoch": 2.97, "grad_norm": 0.2893708050251007, "learning_rate": 5.823077251890796e-09, "loss": 0.026, "step": 17748 }, { "epoch": 2.97, "grad_norm": 0.43986907601356506, "learning_rate": 5.7616263045146e-09, "loss": 0.029, "step": 17749 }, { "epoch": 2.97, "grad_norm": 0.29246318340301514, "learning_rate": 5.700501231952204e-09, "loss": 0.0211, "step": 17750 }, { "epoch": 2.97, "grad_norm": 0.41139695048332214, "learning_rate": 5.639702036194239e-09, "loss": 0.0389, "step": 17751 }, { "epoch": 2.97, "grad_norm": 0.3445594906806946, "learning_rate": 5.579228719225782e-09, "loss": 0.0299, "step": 17752 }, { "epoch": 2.97, "grad_norm": 0.32397282123565674, "learning_rate": 5.51908128301748e-09, "loss": 0.0203, "step": 17753 }, { "epoch": 2.97, "grad_norm": 0.3688191771507263, "learning_rate": 5.459259729529987e-09, "loss": 0.0339, "step": 17754 }, { "epoch": 2.97, "grad_norm": 0.49275797605514526, "learning_rate": 5.399764060715074e-09, "loss": 0.0492, "step": 17755 }, { "epoch": 2.97, "grad_norm": 0.3770616054534912, "learning_rate": 5.340594278511191e-09, "loss": 0.0295, "step": 17756 }, { "epoch": 2.97, "grad_norm": 0.3301684260368347, "learning_rate": 5.281750384849016e-09, "loss": 0.0238, "step": 17757 }, { "epoch": 2.97, "grad_norm": 0.4037640392780304, "learning_rate": 5.223232381647014e-09, "loss": 0.0386, "step": 17758 }, { "epoch": 2.97, "grad_norm": 0.328763872385025, "learning_rate": 5.165040270812549e-09, "loss": 0.0274, "step": 17759 }, { "epoch": 2.97, "grad_norm": 0.37520045042037964, "learning_rate": 5.1071740542429914e-09, "loss": 0.0337, "step": 17760 }, { "epoch": 2.97, "grad_norm": 0.3734470009803772, "learning_rate": 5.049633733825721e-09, "loss": 0.023, "step": 17761 }, { "epoch": 2.97, "grad_norm": 0.4031265377998352, "learning_rate": 4.992419311437013e-09, "loss": 0.025, "step": 17762 }, { "epoch": 2.97, "grad_norm": 0.3297693431377411, "learning_rate": 4.935530788943155e-09, "loss": 0.0184, "step": 17763 }, { "epoch": 2.97, "grad_norm": 0.3928576111793518, "learning_rate": 4.8789681681971066e-09, "loss": 0.0319, "step": 17764 }, { "epoch": 2.97, "grad_norm": 1.0525686740875244, "learning_rate": 4.822731451044061e-09, "loss": 0.0396, "step": 17765 }, { "epoch": 2.97, "grad_norm": 0.45054492354393005, "learning_rate": 4.7668206393181036e-09, "loss": 0.0242, "step": 17766 }, { "epoch": 2.97, "grad_norm": 0.6162948608398438, "learning_rate": 4.711235734841113e-09, "loss": 0.0259, "step": 17767 }, { "epoch": 2.97, "grad_norm": 0.3977237939834595, "learning_rate": 4.655976739427193e-09, "loss": 0.0382, "step": 17768 }, { "epoch": 2.97, "grad_norm": 0.4230073392391205, "learning_rate": 4.601043654877124e-09, "loss": 0.0348, "step": 17769 }, { "epoch": 2.97, "grad_norm": 0.6232358813285828, "learning_rate": 4.546436482982808e-09, "loss": 0.038, "step": 17770 }, { "epoch": 2.97, "grad_norm": 0.43272313475608826, "learning_rate": 4.49215522552282e-09, "loss": 0.0406, "step": 17771 }, { "epoch": 2.97, "grad_norm": 0.3948621451854706, "learning_rate": 4.4381998842690784e-09, "loss": 0.0316, "step": 17772 }, { "epoch": 2.97, "grad_norm": 0.32049015164375305, "learning_rate": 4.384570460980175e-09, "loss": 0.026, "step": 17773 }, { "epoch": 2.97, "grad_norm": 0.7656598687171936, "learning_rate": 4.3312669574058216e-09, "loss": 0.0472, "step": 17774 }, { "epoch": 2.97, "grad_norm": 0.45829665660858154, "learning_rate": 4.278289375282407e-09, "loss": 0.051, "step": 17775 }, { "epoch": 2.97, "grad_norm": 0.43897950649261475, "learning_rate": 4.225637716337439e-09, "loss": 0.0439, "step": 17776 }, { "epoch": 2.97, "grad_norm": 0.2965250015258789, "learning_rate": 4.173311982288431e-09, "loss": 0.016, "step": 17777 }, { "epoch": 2.97, "grad_norm": 0.3098451495170593, "learning_rate": 4.121312174841796e-09, "loss": 0.0211, "step": 17778 }, { "epoch": 2.97, "grad_norm": 0.3583512306213379, "learning_rate": 4.0696382956917356e-09, "loss": 0.035, "step": 17779 }, { "epoch": 2.97, "grad_norm": 0.30019238591194153, "learning_rate": 4.018290346524678e-09, "loss": 0.0338, "step": 17780 }, { "epoch": 2.97, "grad_norm": 0.3303540349006653, "learning_rate": 3.967268329013729e-09, "loss": 0.0232, "step": 17781 }, { "epoch": 2.97, "grad_norm": 0.5781899094581604, "learning_rate": 3.916572244824224e-09, "loss": 0.0437, "step": 17782 }, { "epoch": 2.97, "grad_norm": 0.36706650257110596, "learning_rate": 3.866202095607063e-09, "loss": 0.0278, "step": 17783 }, { "epoch": 2.97, "grad_norm": 0.46815112233161926, "learning_rate": 3.816157883005378e-09, "loss": 0.0241, "step": 17784 }, { "epoch": 2.97, "grad_norm": 0.41024667024612427, "learning_rate": 3.766439608651196e-09, "loss": 0.0325, "step": 17785 }, { "epoch": 2.97, "grad_norm": 0.3745911419391632, "learning_rate": 3.717047274165442e-09, "loss": 0.0223, "step": 17786 }, { "epoch": 2.97, "grad_norm": 0.3985775411128998, "learning_rate": 3.66798088115905e-09, "loss": 0.0294, "step": 17787 }, { "epoch": 2.98, "grad_norm": 0.4065646231174469, "learning_rate": 3.619240431230742e-09, "loss": 0.0248, "step": 17788 }, { "epoch": 2.98, "grad_norm": 0.4276042580604553, "learning_rate": 3.5708259259703558e-09, "loss": 0.0305, "step": 17789 }, { "epoch": 2.98, "grad_norm": 0.5317956209182739, "learning_rate": 3.5227373669577404e-09, "loss": 0.0414, "step": 17790 }, { "epoch": 2.98, "grad_norm": 0.4721555709838867, "learning_rate": 3.4749747557594195e-09, "loss": 0.0344, "step": 17791 }, { "epoch": 2.98, "grad_norm": 0.5166428089141846, "learning_rate": 3.427538093933036e-09, "loss": 0.024, "step": 17792 }, { "epoch": 2.98, "grad_norm": 0.4573233127593994, "learning_rate": 3.380427383026241e-09, "loss": 0.0243, "step": 17793 }, { "epoch": 2.98, "grad_norm": 0.4045390486717224, "learning_rate": 3.3336426245733635e-09, "loss": 0.027, "step": 17794 }, { "epoch": 2.98, "grad_norm": 0.30811071395874023, "learning_rate": 3.287183820100959e-09, "loss": 0.027, "step": 17795 }, { "epoch": 2.98, "grad_norm": 0.3892064392566681, "learning_rate": 3.241050971124482e-09, "loss": 0.0269, "step": 17796 }, { "epoch": 2.98, "grad_norm": 0.37876319885253906, "learning_rate": 3.195244079147175e-09, "loss": 0.0231, "step": 17797 }, { "epoch": 2.98, "grad_norm": 0.3292156159877777, "learning_rate": 3.149763145662288e-09, "loss": 0.0278, "step": 17798 }, { "epoch": 2.98, "grad_norm": 0.4623505771160126, "learning_rate": 3.104608172154189e-09, "loss": 0.0286, "step": 17799 }, { "epoch": 2.98, "grad_norm": 0.3585739731788635, "learning_rate": 3.059779160093923e-09, "loss": 0.0279, "step": 17800 }, { "epoch": 2.98, "grad_norm": 0.5803201198577881, "learning_rate": 3.015276110944765e-09, "loss": 0.0412, "step": 17801 }, { "epoch": 2.98, "grad_norm": 0.3245786428451538, "learning_rate": 2.971099026155555e-09, "loss": 0.0258, "step": 17802 }, { "epoch": 2.98, "grad_norm": 0.2913137376308441, "learning_rate": 2.9272479071684733e-09, "loss": 0.0352, "step": 17803 }, { "epoch": 2.98, "grad_norm": 0.6943495273590088, "learning_rate": 2.883722755412377e-09, "loss": 0.0302, "step": 17804 }, { "epoch": 2.98, "grad_norm": 0.41654542088508606, "learning_rate": 2.8405235723072412e-09, "loss": 0.0402, "step": 17805 }, { "epoch": 2.98, "grad_norm": 0.40191981196403503, "learning_rate": 2.797650359260828e-09, "loss": 0.0221, "step": 17806 }, { "epoch": 2.98, "grad_norm": 0.2570289075374603, "learning_rate": 2.75510311767202e-09, "loss": 0.0221, "step": 17807 }, { "epoch": 2.98, "grad_norm": 0.4060062766075134, "learning_rate": 2.7128818489274842e-09, "loss": 0.0321, "step": 17808 }, { "epoch": 2.98, "grad_norm": 0.4673857092857361, "learning_rate": 2.6709865544038982e-09, "loss": 0.0343, "step": 17809 }, { "epoch": 2.98, "grad_norm": 0.3052349388599396, "learning_rate": 2.6294172354668355e-09, "loss": 0.0239, "step": 17810 }, { "epoch": 2.98, "grad_norm": 0.2896278500556946, "learning_rate": 2.5881738934740998e-09, "loss": 0.0223, "step": 17811 }, { "epoch": 2.98, "grad_norm": 0.39517199993133545, "learning_rate": 2.5472565297668395e-09, "loss": 0.024, "step": 17812 }, { "epoch": 2.98, "grad_norm": 0.4116764962673187, "learning_rate": 2.5066651456817637e-09, "loss": 0.0214, "step": 17813 }, { "epoch": 2.98, "grad_norm": 0.4575057923793793, "learning_rate": 2.4663997425422583e-09, "loss": 0.0252, "step": 17814 }, { "epoch": 2.98, "grad_norm": 0.34460365772247314, "learning_rate": 2.4264603216606065e-09, "loss": 0.0214, "step": 17815 }, { "epoch": 2.98, "grad_norm": 0.3374418616294861, "learning_rate": 2.38684688433799e-09, "loss": 0.0242, "step": 17816 }, { "epoch": 2.98, "grad_norm": 0.41158851981163025, "learning_rate": 2.3475594318678183e-09, "loss": 0.0409, "step": 17817 }, { "epoch": 2.98, "grad_norm": 0.3298953175544739, "learning_rate": 2.3085979655312894e-09, "loss": 0.0192, "step": 17818 }, { "epoch": 2.98, "grad_norm": 0.35252201557159424, "learning_rate": 2.269962486597388e-09, "loss": 0.0229, "step": 17819 }, { "epoch": 2.98, "grad_norm": 0.33887189626693726, "learning_rate": 2.2316529963262167e-09, "loss": 0.0349, "step": 17820 }, { "epoch": 2.98, "grad_norm": 0.3046943247318268, "learning_rate": 2.193669495967887e-09, "loss": 0.0259, "step": 17821 }, { "epoch": 2.98, "grad_norm": 0.34042155742645264, "learning_rate": 2.156011986760298e-09, "loss": 0.0118, "step": 17822 }, { "epoch": 2.98, "grad_norm": 0.396598219871521, "learning_rate": 2.1186804699302453e-09, "loss": 0.0303, "step": 17823 }, { "epoch": 2.98, "grad_norm": 0.5632432699203491, "learning_rate": 2.0816749466967546e-09, "loss": 0.0331, "step": 17824 }, { "epoch": 2.98, "grad_norm": 0.5188268423080444, "learning_rate": 2.0449954182655273e-09, "loss": 0.0367, "step": 17825 }, { "epoch": 2.98, "grad_norm": 0.46895384788513184, "learning_rate": 2.0086418858333843e-09, "loss": 0.0421, "step": 17826 }, { "epoch": 2.98, "grad_norm": 0.30409812927246094, "learning_rate": 1.9726143505838237e-09, "loss": 0.0197, "step": 17827 }, { "epoch": 2.98, "grad_norm": 0.30973926186561584, "learning_rate": 1.9369128136925706e-09, "loss": 0.0172, "step": 17828 }, { "epoch": 2.98, "grad_norm": 0.35702404379844666, "learning_rate": 1.90153727632425e-09, "loss": 0.0334, "step": 17829 }, { "epoch": 2.98, "grad_norm": 0.37481552362442017, "learning_rate": 1.8664877396323833e-09, "loss": 0.0332, "step": 17830 }, { "epoch": 2.98, "grad_norm": 0.2747703790664673, "learning_rate": 1.8317642047593898e-09, "loss": 0.0328, "step": 17831 }, { "epoch": 2.98, "grad_norm": 0.2854340970516205, "learning_rate": 1.7973666728365868e-09, "loss": 0.0175, "step": 17832 }, { "epoch": 2.98, "grad_norm": 0.44105979800224304, "learning_rate": 1.76329514498641e-09, "loss": 0.0439, "step": 17833 }, { "epoch": 2.98, "grad_norm": 0.3020128905773163, "learning_rate": 1.7295496223201925e-09, "loss": 0.0219, "step": 17834 }, { "epoch": 2.98, "grad_norm": 0.42062047123908997, "learning_rate": 1.6961301059381653e-09, "loss": 0.0332, "step": 17835 }, { "epoch": 2.98, "grad_norm": 0.4169602692127228, "learning_rate": 1.663036596929457e-09, "loss": 0.033, "step": 17836 }, { "epoch": 2.98, "grad_norm": 0.42359545826911926, "learning_rate": 1.6302690963732048e-09, "loss": 0.0267, "step": 17837 }, { "epoch": 2.98, "grad_norm": 0.4455876052379608, "learning_rate": 1.5978276053385533e-09, "loss": 0.0337, "step": 17838 }, { "epoch": 2.98, "grad_norm": 0.4366706311702728, "learning_rate": 1.5657121248824347e-09, "loss": 0.0355, "step": 17839 }, { "epoch": 2.98, "grad_norm": 0.39649152755737305, "learning_rate": 1.5339226560517895e-09, "loss": 0.0351, "step": 17840 }, { "epoch": 2.98, "grad_norm": 0.3733845055103302, "learning_rate": 1.5024591998835658e-09, "loss": 0.0289, "step": 17841 }, { "epoch": 2.98, "grad_norm": 0.577222466468811, "learning_rate": 1.47132175740472e-09, "loss": 0.0358, "step": 17842 }, { "epoch": 2.98, "grad_norm": 0.48228341341018677, "learning_rate": 1.4405103296288859e-09, "loss": 0.0368, "step": 17843 }, { "epoch": 2.98, "grad_norm": 0.41192343831062317, "learning_rate": 1.4100249175619251e-09, "loss": 0.0436, "step": 17844 }, { "epoch": 2.98, "grad_norm": 0.3187392055988312, "learning_rate": 1.3798655221974876e-09, "loss": 0.0291, "step": 17845 }, { "epoch": 2.98, "grad_norm": 0.5169587135314941, "learning_rate": 1.3500321445181208e-09, "loss": 0.0374, "step": 17846 }, { "epoch": 2.98, "grad_norm": 0.30105897784233093, "learning_rate": 1.32052478549749e-09, "loss": 0.0187, "step": 17847 }, { "epoch": 2.99, "grad_norm": 0.40005385875701904, "learning_rate": 1.291343446098159e-09, "loss": 0.0407, "step": 17848 }, { "epoch": 2.99, "grad_norm": 0.3597397804260254, "learning_rate": 1.262488127270478e-09, "loss": 0.0235, "step": 17849 }, { "epoch": 2.99, "grad_norm": 0.32333895564079285, "learning_rate": 1.233958829955917e-09, "loss": 0.0274, "step": 17850 }, { "epoch": 2.99, "grad_norm": 0.5280017256736755, "learning_rate": 1.205755555083732e-09, "loss": 0.0396, "step": 17851 }, { "epoch": 2.99, "grad_norm": 0.34923848509788513, "learning_rate": 1.1778783035754083e-09, "loss": 0.0283, "step": 17852 }, { "epoch": 2.99, "grad_norm": 0.35724157094955444, "learning_rate": 1.1503270763379981e-09, "loss": 0.019, "step": 17853 }, { "epoch": 2.99, "grad_norm": 0.4062645733356476, "learning_rate": 1.1231018742718924e-09, "loss": 0.0436, "step": 17854 }, { "epoch": 2.99, "grad_norm": 0.41251295804977417, "learning_rate": 1.0962026982630492e-09, "loss": 0.0326, "step": 17855 }, { "epoch": 2.99, "grad_norm": 0.454340398311615, "learning_rate": 1.0696295491896546e-09, "loss": 0.0481, "step": 17856 }, { "epoch": 2.99, "grad_norm": 0.5864945650100708, "learning_rate": 1.0433824279165728e-09, "loss": 0.0244, "step": 17857 }, { "epoch": 2.99, "grad_norm": 0.29087013006210327, "learning_rate": 1.0174613353008956e-09, "loss": 0.0185, "step": 17858 }, { "epoch": 2.99, "grad_norm": 0.24944321811199188, "learning_rate": 9.91866272188613e-10, "loss": 0.0158, "step": 17859 }, { "epoch": 2.99, "grad_norm": 0.3428092896938324, "learning_rate": 9.665972394123925e-10, "loss": 0.0269, "step": 17860 }, { "epoch": 2.99, "grad_norm": 0.4446142613887787, "learning_rate": 9.416542377982396e-10, "loss": 0.0371, "step": 17861 }, { "epoch": 2.99, "grad_norm": 0.5576842427253723, "learning_rate": 9.170372681566176e-10, "loss": 0.0341, "step": 17862 }, { "epoch": 2.99, "grad_norm": 0.33544573187828064, "learning_rate": 8.927463312935481e-10, "loss": 0.0186, "step": 17863 }, { "epoch": 2.99, "grad_norm": 0.6303685307502747, "learning_rate": 8.687814279983997e-10, "loss": 0.0252, "step": 17864 }, { "epoch": 2.99, "grad_norm": 0.5077300667762756, "learning_rate": 8.451425590538798e-10, "loss": 0.0374, "step": 17865 }, { "epoch": 2.99, "grad_norm": 0.3142698109149933, "learning_rate": 8.21829725230483e-10, "loss": 0.0264, "step": 17866 }, { "epoch": 2.99, "grad_norm": 0.4325321316719055, "learning_rate": 7.988429272876019e-10, "loss": 0.032, "step": 17867 }, { "epoch": 2.99, "grad_norm": 0.9100531339645386, "learning_rate": 7.761821659768576e-10, "loss": 0.0333, "step": 17868 }, { "epoch": 2.99, "grad_norm": 0.37165990471839905, "learning_rate": 7.538474420343278e-10, "loss": 0.0389, "step": 17869 }, { "epoch": 2.99, "grad_norm": 0.7606257200241089, "learning_rate": 7.318387561905393e-10, "loss": 0.0545, "step": 17870 }, { "epoch": 2.99, "grad_norm": 0.3421931266784668, "learning_rate": 7.101561091615861e-10, "loss": 0.0314, "step": 17871 }, { "epoch": 2.99, "grad_norm": 0.3379805386066437, "learning_rate": 6.887995016557903e-10, "loss": 0.0246, "step": 17872 }, { "epoch": 2.99, "grad_norm": 0.35860303044319153, "learning_rate": 6.677689343681514e-10, "loss": 0.0219, "step": 17873 }, { "epoch": 2.99, "grad_norm": 0.4155217111110687, "learning_rate": 6.470644079858979e-10, "loss": 0.0441, "step": 17874 }, { "epoch": 2.99, "grad_norm": 0.6620409488677979, "learning_rate": 6.266859231829347e-10, "loss": 0.0348, "step": 17875 }, { "epoch": 2.99, "grad_norm": 0.4556795656681061, "learning_rate": 6.066334806242857e-10, "loss": 0.0391, "step": 17876 }, { "epoch": 2.99, "grad_norm": 0.4505921006202698, "learning_rate": 5.869070809627619e-10, "loss": 0.0466, "step": 17877 }, { "epoch": 2.99, "grad_norm": 0.4419930875301361, "learning_rate": 5.67506724843403e-10, "loss": 0.0363, "step": 17878 }, { "epoch": 2.99, "grad_norm": 0.2931143045425415, "learning_rate": 5.484324128979257e-10, "loss": 0.016, "step": 17879 }, { "epoch": 2.99, "grad_norm": 0.3170875608921051, "learning_rate": 5.296841457469449e-10, "loss": 0.0354, "step": 17880 }, { "epoch": 2.99, "grad_norm": 0.38316571712493896, "learning_rate": 5.112619240044136e-10, "loss": 0.0376, "step": 17881 }, { "epoch": 2.99, "grad_norm": 0.36062806844711304, "learning_rate": 4.931657482687425e-10, "loss": 0.0236, "step": 17882 }, { "epoch": 2.99, "grad_norm": 0.5161061882972717, "learning_rate": 4.753956191305698e-10, "loss": 0.0391, "step": 17883 }, { "epoch": 2.99, "grad_norm": 0.4991001486778259, "learning_rate": 4.579515371705423e-10, "loss": 0.0384, "step": 17884 }, { "epoch": 2.99, "grad_norm": 0.7872998714447021, "learning_rate": 4.4083350295598404e-10, "loss": 0.0227, "step": 17885 }, { "epoch": 2.99, "grad_norm": 0.42590034008026123, "learning_rate": 4.2404151704533713e-10, "loss": 0.0274, "step": 17886 }, { "epoch": 2.99, "grad_norm": 0.35734695196151733, "learning_rate": 4.0757557998594156e-10, "loss": 0.0217, "step": 17887 }, { "epoch": 2.99, "grad_norm": 0.440387487411499, "learning_rate": 3.9143569231514523e-10, "loss": 0.0297, "step": 17888 }, { "epoch": 2.99, "grad_norm": 0.5197552442550659, "learning_rate": 3.7562185456030406e-10, "loss": 0.0363, "step": 17889 }, { "epoch": 2.99, "grad_norm": 0.4776335656642914, "learning_rate": 3.601340672343412e-10, "loss": 0.0326, "step": 17890 }, { "epoch": 2.99, "grad_norm": 0.39511409401893616, "learning_rate": 3.449723308446284e-10, "loss": 0.0377, "step": 17891 }, { "epoch": 2.99, "grad_norm": 0.26769959926605225, "learning_rate": 3.301366458852151e-10, "loss": 0.0219, "step": 17892 }, { "epoch": 2.99, "grad_norm": 0.47564637660980225, "learning_rate": 3.1562701283793797e-10, "loss": 0.0299, "step": 17893 }, { "epoch": 2.99, "grad_norm": 0.31777530908584595, "learning_rate": 3.0144343217797247e-10, "loss": 0.018, "step": 17894 }, { "epoch": 2.99, "grad_norm": 0.4078538417816162, "learning_rate": 2.8758590436717136e-10, "loss": 0.0274, "step": 17895 }, { "epoch": 2.99, "grad_norm": 0.3245237171649933, "learning_rate": 2.740544298573955e-10, "loss": 0.0285, "step": 17896 }, { "epoch": 2.99, "grad_norm": 0.39955881237983704, "learning_rate": 2.6084900908940336e-10, "loss": 0.0366, "step": 17897 }, { "epoch": 2.99, "grad_norm": 0.4726966917514801, "learning_rate": 2.479696424939615e-10, "loss": 0.0402, "step": 17898 }, { "epoch": 2.99, "grad_norm": 0.6989284157752991, "learning_rate": 2.354163304918444e-10, "loss": 0.0362, "step": 17899 }, { "epoch": 2.99, "grad_norm": 0.4324267506599426, "learning_rate": 2.2318907349161423e-10, "loss": 0.0177, "step": 17900 }, { "epoch": 2.99, "grad_norm": 0.3583243787288666, "learning_rate": 2.1128787189184098e-10, "loss": 0.0231, "step": 17901 }, { "epoch": 2.99, "grad_norm": 0.3546251952648163, "learning_rate": 1.997127260811027e-10, "loss": 0.0237, "step": 17902 }, { "epoch": 2.99, "grad_norm": 0.3608177602291107, "learning_rate": 1.8846363643576505e-10, "loss": 0.0259, "step": 17903 }, { "epoch": 2.99, "grad_norm": 0.4980717897415161, "learning_rate": 1.775406033233118e-10, "loss": 0.0391, "step": 17904 }, { "epoch": 2.99, "grad_norm": 0.4500148892402649, "learning_rate": 1.6694362710123478e-10, "loss": 0.0335, "step": 17905 }, { "epoch": 2.99, "grad_norm": 0.3719852864742279, "learning_rate": 1.5667270811259293e-10, "loss": 0.0373, "step": 17906 }, { "epoch": 2.99, "grad_norm": 0.40882059931755066, "learning_rate": 1.467278466937838e-10, "loss": 0.0376, "step": 17907 }, { "epoch": 3.0, "grad_norm": 0.502804160118103, "learning_rate": 1.3710904316899254e-10, "loss": 0.0287, "step": 17908 }, { "epoch": 3.0, "grad_norm": 0.2952256500720978, "learning_rate": 1.27816297851302e-10, "loss": 0.0209, "step": 17909 }, { "epoch": 3.0, "grad_norm": 0.5052370429039001, "learning_rate": 1.1884961104380311e-10, "loss": 0.0322, "step": 17910 }, { "epoch": 3.0, "grad_norm": 0.333963006734848, "learning_rate": 1.1020898303959471e-10, "loss": 0.0272, "step": 17911 }, { "epoch": 3.0, "grad_norm": 0.359017938375473, "learning_rate": 1.0189441411956324e-10, "loss": 0.0304, "step": 17912 }, { "epoch": 3.0, "grad_norm": 0.5268967747688293, "learning_rate": 9.390590455571335e-11, "loss": 0.0458, "step": 17913 }, { "epoch": 3.0, "grad_norm": 0.4260559678077698, "learning_rate": 8.624345460783723e-11, "loss": 0.0324, "step": 17914 }, { "epoch": 3.0, "grad_norm": 0.41543662548065186, "learning_rate": 7.890706452573505e-11, "loss": 0.0308, "step": 17915 }, { "epoch": 3.0, "grad_norm": 0.3257021903991699, "learning_rate": 7.189673454921498e-11, "loss": 0.0284, "step": 17916 }, { "epoch": 3.0, "grad_norm": 0.38448622822761536, "learning_rate": 6.521246490587273e-11, "loss": 0.0305, "step": 17917 }, { "epoch": 3.0, "grad_norm": 0.2536414563655853, "learning_rate": 5.885425581442229e-11, "loss": 0.0271, "step": 17918 }, { "epoch": 3.0, "grad_norm": 0.31296417117118835, "learning_rate": 5.282210748136507e-11, "loss": 0.0197, "step": 17919 }, { "epoch": 3.0, "grad_norm": 0.36413946747779846, "learning_rate": 4.7116020105431036e-11, "loss": 0.0263, "step": 17920 }, { "epoch": 3.0, "grad_norm": 0.41866207122802734, "learning_rate": 4.173599386980698e-11, "loss": 0.0217, "step": 17921 }, { "epoch": 3.0, "grad_norm": 0.29418686032295227, "learning_rate": 3.668202895212858e-11, "loss": 0.0253, "step": 17922 }, { "epoch": 3.0, "grad_norm": 0.42467719316482544, "learning_rate": 3.195412551559862e-11, "loss": 0.0328, "step": 17923 }, { "epoch": 3.0, "grad_norm": 0.4518546164035797, "learning_rate": 2.7552283716758554e-11, "loss": 0.0377, "step": 17924 }, { "epoch": 3.0, "grad_norm": 0.28338423371315, "learning_rate": 2.3476503696606702e-11, "loss": 0.0254, "step": 17925 }, { "epoch": 3.0, "grad_norm": 0.469907283782959, "learning_rate": 1.9726785588369823e-11, "loss": 0.0234, "step": 17926 }, { "epoch": 3.0, "grad_norm": 0.25858885049819946, "learning_rate": 1.6303129516392902e-11, "loss": 0.0271, "step": 17927 }, { "epoch": 3.0, "grad_norm": 0.43678906559944153, "learning_rate": 1.3205535589477791e-11, "loss": 0.03, "step": 17928 }, { "epoch": 3.0, "grad_norm": 0.40026530623435974, "learning_rate": 1.0434003910875235e-11, "loss": 0.0337, "step": 17929 }, { "epoch": 3.0, "grad_norm": 0.30089136958122253, "learning_rate": 7.988534569403073e-12, "loss": 0.0293, "step": 17930 }, { "epoch": 3.0, "grad_norm": 0.31213510036468506, "learning_rate": 5.8691276461075864e-12, "loss": 0.0333, "step": 17931 }, { "epoch": 3.0, "grad_norm": 0.4638673663139343, "learning_rate": 4.0757832098226034e-12, "loss": 0.0329, "step": 17932 }, { "epoch": 3.0, "grad_norm": 0.29059821367263794, "learning_rate": 2.6085013182797214e-12, "loss": 0.0223, "step": 17933 }, { "epoch": 3.0, "grad_norm": 0.4478525221347809, "learning_rate": 1.46728201921853e-12, "loss": 0.0214, "step": 17934 }, { "epoch": 3.0, "grad_norm": 0.5466391444206238, "learning_rate": 6.52125350386612e-13, "loss": 0.0377, "step": 17935 }, { "epoch": 3.0, "grad_norm": 0.4739135503768921, "learning_rate": 1.630313395395433e-13, "loss": 0.0486, "step": 17936 }, { "epoch": 3.0, "grad_norm": 0.2578465938568115, "learning_rate": 0.0, "loss": 0.0234, "step": 17937 }, { "epoch": 3.0, "step": 17937, "total_flos": 2.218668829637922e+19, "train_loss": 0.08152479821400815, "train_runtime": 50553.2448, "train_samples_per_second": 45.415, "train_steps_per_second": 0.355 } ], "logging_steps": 1.0, "max_steps": 17937, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.218668829637922e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }