{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 140.0, "eval_steps": 25.0, "global_step": 5880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023880597014925373, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 76.8913, "step": 1 }, { "epoch": 0.04776119402985075, "grad_norm": 395.8459777832031, "learning_rate": 1e-05, "loss": 77.3235, "step": 2 }, { "epoch": 0.07164179104477612, "grad_norm": 161.73968505859375, "learning_rate": 9.988095238095239e-06, "loss": 67.3668, "step": 3 }, { "epoch": 0.0955223880597015, "grad_norm": 145.1616973876953, "learning_rate": 9.976190476190477e-06, "loss": 62.8542, "step": 4 }, { "epoch": 0.11940298507462686, "grad_norm": 52.10374069213867, "learning_rate": 9.964285714285714e-06, "loss": 59.8627, "step": 5 }, { "epoch": 0.14328358208955225, "grad_norm": 31.430763244628906, "learning_rate": 9.952380952380954e-06, "loss": 59.2378, "step": 6 }, { "epoch": 0.16716417910447762, "grad_norm": 37.42692947387695, "learning_rate": 9.940476190476192e-06, "loss": 58.3114, "step": 7 }, { "epoch": 0.191044776119403, "grad_norm": 32.284332275390625, "learning_rate": 9.92857142857143e-06, "loss": 57.3408, "step": 8 }, { "epoch": 0.21492537313432836, "grad_norm": 28.520849227905273, "learning_rate": 9.916666666666668e-06, "loss": 56.9256, "step": 9 }, { "epoch": 0.23880597014925373, "grad_norm": 40.480167388916016, "learning_rate": 9.904761904761906e-06, "loss": 57.4842, "step": 10 }, { "epoch": 0.2626865671641791, "grad_norm": 20.3512020111084, "learning_rate": 9.892857142857143e-06, "loss": 56.7597, "step": 11 }, { "epoch": 0.2865671641791045, "grad_norm": 22.308382034301758, "learning_rate": 9.880952380952381e-06, "loss": 56.594, "step": 12 }, { "epoch": 0.31044776119402984, "grad_norm": 14.086284637451172, "learning_rate": 9.869047619047621e-06, "loss": 56.6327, "step": 13 }, { "epoch": 0.33432835820895523, "grad_norm": NaN, "learning_rate": 9.857142857142859e-06, "loss": 63.0791, "step": 14 }, { "epoch": 0.3582089552238806, "grad_norm": 17.055604934692383, "learning_rate": 9.857142857142859e-06, "loss": 56.1551, "step": 15 }, { "epoch": 0.382089552238806, "grad_norm": 18.52287483215332, "learning_rate": 9.845238095238097e-06, "loss": 54.9502, "step": 16 }, { "epoch": 0.4059701492537313, "grad_norm": 32.66905212402344, "learning_rate": 9.833333333333333e-06, "loss": 55.6494, "step": 17 }, { "epoch": 0.4298507462686567, "grad_norm": 24.075742721557617, "learning_rate": 9.821428571428573e-06, "loss": 55.4766, "step": 18 }, { "epoch": 0.4537313432835821, "grad_norm": 31.505783081054688, "learning_rate": 9.80952380952381e-06, "loss": 55.1481, "step": 19 }, { "epoch": 0.47761194029850745, "grad_norm": 30.53020477294922, "learning_rate": 9.797619047619048e-06, "loss": 55.158, "step": 20 }, { "epoch": 0.5014925373134328, "grad_norm": 14.44444751739502, "learning_rate": 9.785714285714286e-06, "loss": 54.8286, "step": 21 }, { "epoch": 0.5253731343283582, "grad_norm": 53.601078033447266, "learning_rate": 9.773809523809524e-06, "loss": 55.1466, "step": 22 }, { "epoch": 0.5492537313432836, "grad_norm": 37.156028747558594, "learning_rate": 9.761904761904762e-06, "loss": 54.629, "step": 23 }, { "epoch": 0.573134328358209, "grad_norm": 41.84994125366211, "learning_rate": 9.75e-06, "loss": 55.2766, "step": 24 }, { "epoch": 0.5970149253731343, "grad_norm": 50.61705017089844, "learning_rate": 9.73809523809524e-06, "loss": 54.3497, "step": 25 }, { "epoch": 0.6208955223880597, "grad_norm": 16.964982986450195, "learning_rate": 9.726190476190477e-06, "loss": 55.1673, "step": 26 }, { "epoch": 0.6447761194029851, "grad_norm": 22.71157455444336, "learning_rate": 9.714285714285715e-06, "loss": 54.8001, "step": 27 }, { "epoch": 0.6686567164179105, "grad_norm": 16.372802734375, "learning_rate": 9.702380952380953e-06, "loss": 54.4506, "step": 28 }, { "epoch": 0.6925373134328359, "grad_norm": 13.21664047241211, "learning_rate": 9.690476190476191e-06, "loss": 53.0488, "step": 29 }, { "epoch": 0.7164179104477612, "grad_norm": NaN, "learning_rate": 9.678571428571429e-06, "loss": 74.7707, "step": 30 }, { "epoch": 0.7402985074626866, "grad_norm": 23.045652389526367, "learning_rate": 9.678571428571429e-06, "loss": 54.1909, "step": 31 }, { "epoch": 0.764179104477612, "grad_norm": 15.034178733825684, "learning_rate": 9.666666666666667e-06, "loss": 53.3253, "step": 32 }, { "epoch": 0.7880597014925373, "grad_norm": 14.148232460021973, "learning_rate": 9.654761904761906e-06, "loss": 53.693, "step": 33 }, { "epoch": 0.8119402985074626, "grad_norm": NaN, "learning_rate": 9.642857142857144e-06, "loss": 61.3517, "step": 34 }, { "epoch": 0.835820895522388, "grad_norm": 14.757994651794434, "learning_rate": 9.642857142857144e-06, "loss": 53.3175, "step": 35 }, { "epoch": 0.8597014925373134, "grad_norm": 10.875706672668457, "learning_rate": 9.630952380952382e-06, "loss": 54.2592, "step": 36 }, { "epoch": 0.8835820895522388, "grad_norm": 9.926539421081543, "learning_rate": 9.61904761904762e-06, "loss": 53.8721, "step": 37 }, { "epoch": 0.9074626865671642, "grad_norm": 17.697235107421875, "learning_rate": 9.607142857142858e-06, "loss": 54.2901, "step": 38 }, { "epoch": 0.9313432835820895, "grad_norm": 29.19430160522461, "learning_rate": 9.595238095238096e-06, "loss": 53.1261, "step": 39 }, { "epoch": 0.9552238805970149, "grad_norm": 12.3985595703125, "learning_rate": 9.583333333333335e-06, "loss": 53.9815, "step": 40 }, { "epoch": 0.9791044776119403, "grad_norm": 17.109691619873047, "learning_rate": 9.571428571428573e-06, "loss": 52.5838, "step": 41 }, { "epoch": 1.0, "grad_norm": 14.529239654541016, "learning_rate": 9.559523809523811e-06, "loss": 46.1888, "step": 42 }, { "epoch": 1.0238805970149254, "grad_norm": 15.683514595031738, "learning_rate": 9.547619047619049e-06, "loss": 52.2043, "step": 43 }, { "epoch": 1.0477611940298508, "grad_norm": 26.219507217407227, "learning_rate": 9.535714285714287e-06, "loss": 53.09, "step": 44 }, { "epoch": 1.0716417910447762, "grad_norm": 19.859697341918945, "learning_rate": 9.523809523809525e-06, "loss": 52.5858, "step": 45 }, { "epoch": 1.0955223880597016, "grad_norm": 11.090332984924316, "learning_rate": 9.511904761904763e-06, "loss": 53.7674, "step": 46 }, { "epoch": 1.1194029850746268, "grad_norm": 15.586993217468262, "learning_rate": 9.5e-06, "loss": 53.4901, "step": 47 }, { "epoch": 1.1432835820895522, "grad_norm": 22.734928131103516, "learning_rate": 9.488095238095238e-06, "loss": 53.6667, "step": 48 }, { "epoch": 1.1671641791044776, "grad_norm": 16.382047653198242, "learning_rate": 9.476190476190476e-06, "loss": 53.2914, "step": 49 }, { "epoch": 1.191044776119403, "grad_norm": 15.916092872619629, "learning_rate": 9.464285714285714e-06, "loss": 51.729, "step": 50 }, { "epoch": 1.2149253731343284, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 66.3151, "step": 51 }, { "epoch": 1.2388059701492538, "grad_norm": 20.121395111083984, "learning_rate": 9.452380952380952e-06, "loss": 53.866, "step": 52 }, { "epoch": 1.2626865671641792, "grad_norm": NaN, "learning_rate": 9.440476190476192e-06, "loss": 67.3538, "step": 53 }, { "epoch": 1.2865671641791045, "grad_norm": 13.869222640991211, "learning_rate": 9.440476190476192e-06, "loss": 53.5431, "step": 54 }, { "epoch": 1.31044776119403, "grad_norm": 25.478107452392578, "learning_rate": 9.42857142857143e-06, "loss": 52.7126, "step": 55 }, { "epoch": 1.3343283582089551, "grad_norm": 35.76942825317383, "learning_rate": 9.416666666666667e-06, "loss": 53.8082, "step": 56 }, { "epoch": 1.3582089552238805, "grad_norm": 17.95109748840332, "learning_rate": 9.404761904761905e-06, "loss": 53.0275, "step": 57 }, { "epoch": 1.382089552238806, "grad_norm": 26.80129623413086, "learning_rate": 9.392857142857143e-06, "loss": 53.3295, "step": 58 }, { "epoch": 1.4059701492537313, "grad_norm": 25.803054809570312, "learning_rate": 9.380952380952381e-06, "loss": 52.3771, "step": 59 }, { "epoch": 1.4298507462686567, "grad_norm": 35.39850616455078, "learning_rate": 9.36904761904762e-06, "loss": 52.9467, "step": 60 }, { "epoch": 1.4537313432835821, "grad_norm": 27.43315887451172, "learning_rate": 9.357142857142859e-06, "loss": 53.1359, "step": 61 }, { "epoch": 1.4776119402985075, "grad_norm": 33.431400299072266, "learning_rate": 9.345238095238096e-06, "loss": 52.2006, "step": 62 }, { "epoch": 1.5014925373134327, "grad_norm": 33.08237075805664, "learning_rate": 9.333333333333334e-06, "loss": 52.6816, "step": 63 }, { "epoch": 1.5253731343283583, "grad_norm": 25.711997985839844, "learning_rate": 9.321428571428572e-06, "loss": 52.6113, "step": 64 }, { "epoch": 1.5492537313432835, "grad_norm": 32.688297271728516, "learning_rate": 9.30952380952381e-06, "loss": 51.9086, "step": 65 }, { "epoch": 1.573134328358209, "grad_norm": 31.856857299804688, "learning_rate": 9.297619047619048e-06, "loss": 53.1913, "step": 66 }, { "epoch": 1.5970149253731343, "grad_norm": 26.231773376464844, "learning_rate": 9.285714285714288e-06, "loss": 51.464, "step": 67 }, { "epoch": 1.6208955223880597, "grad_norm": 29.39109230041504, "learning_rate": 9.273809523809525e-06, "loss": 52.0572, "step": 68 }, { "epoch": 1.644776119402985, "grad_norm": 28.86277198791504, "learning_rate": 9.261904761904763e-06, "loss": 52.7115, "step": 69 }, { "epoch": 1.6686567164179105, "grad_norm": 28.82640266418457, "learning_rate": 9.250000000000001e-06, "loss": 53.1756, "step": 70 }, { "epoch": 1.6925373134328359, "grad_norm": 31.32577133178711, "learning_rate": 9.238095238095239e-06, "loss": 52.369, "step": 71 }, { "epoch": 1.716417910447761, "grad_norm": 18.739269256591797, "learning_rate": 9.226190476190477e-06, "loss": 52.6631, "step": 72 }, { "epoch": 1.7402985074626867, "grad_norm": 22.889320373535156, "learning_rate": 9.214285714285715e-06, "loss": 52.4786, "step": 73 }, { "epoch": 1.7641791044776118, "grad_norm": 24.175626754760742, "learning_rate": 9.202380952380953e-06, "loss": 52.1792, "step": 74 }, { "epoch": 1.7880597014925375, "grad_norm": 25.257095336914062, "learning_rate": 9.19047619047619e-06, "loss": 51.39, "step": 75 }, { "epoch": 1.8119402985074626, "grad_norm": 35.745208740234375, "learning_rate": 9.178571428571429e-06, "loss": 53.331, "step": 76 }, { "epoch": 1.835820895522388, "grad_norm": 23.815813064575195, "learning_rate": 9.166666666666666e-06, "loss": 52.7632, "step": 77 }, { "epoch": 1.8597014925373134, "grad_norm": 32.405757904052734, "learning_rate": 9.154761904761906e-06, "loss": 52.3165, "step": 78 }, { "epoch": 1.8835820895522388, "grad_norm": 38.95046615600586, "learning_rate": 9.142857142857144e-06, "loss": 52.0931, "step": 79 }, { "epoch": 1.9074626865671642, "grad_norm": 22.412342071533203, "learning_rate": 9.130952380952382e-06, "loss": 51.8732, "step": 80 }, { "epoch": 1.9313432835820894, "grad_norm": 35.088253021240234, "learning_rate": 9.11904761904762e-06, "loss": 52.0182, "step": 81 }, { "epoch": 1.955223880597015, "grad_norm": 20.136964797973633, "learning_rate": 9.107142857142858e-06, "loss": 52.7956, "step": 82 }, { "epoch": 1.9791044776119402, "grad_norm": 20.860034942626953, "learning_rate": 9.095238095238095e-06, "loss": 50.339, "step": 83 }, { "epoch": 2.0, "grad_norm": 20.38931655883789, "learning_rate": 9.083333333333333e-06, "loss": 45.3651, "step": 84 }, { "epoch": 2.023880597014925, "grad_norm": 21.519498825073242, "learning_rate": 9.071428571428573e-06, "loss": 52.1228, "step": 85 }, { "epoch": 2.047761194029851, "grad_norm": 30.47000503540039, "learning_rate": 9.05952380952381e-06, "loss": 51.1968, "step": 86 }, { "epoch": 2.071641791044776, "grad_norm": 19.728044509887695, "learning_rate": 9.047619047619049e-06, "loss": 52.1514, "step": 87 }, { "epoch": 2.0955223880597016, "grad_norm": 42.647281646728516, "learning_rate": 9.035714285714287e-06, "loss": 52.5597, "step": 88 }, { "epoch": 2.1194029850746268, "grad_norm": 55.45186233520508, "learning_rate": 9.023809523809524e-06, "loss": 53.7621, "step": 89 }, { "epoch": 2.1432835820895524, "grad_norm": 14.879026412963867, "learning_rate": 9.011904761904762e-06, "loss": 52.0743, "step": 90 }, { "epoch": 2.1671641791044776, "grad_norm": 48.55704116821289, "learning_rate": 9e-06, "loss": 52.0543, "step": 91 }, { "epoch": 2.191044776119403, "grad_norm": 37.96782302856445, "learning_rate": 8.98809523809524e-06, "loss": 52.4684, "step": 92 }, { "epoch": 2.2149253731343284, "grad_norm": 27.714475631713867, "learning_rate": 8.976190476190478e-06, "loss": 52.725, "step": 93 }, { "epoch": 2.2388059701492535, "grad_norm": 32.12433624267578, "learning_rate": 8.964285714285716e-06, "loss": 52.7161, "step": 94 }, { "epoch": 2.262686567164179, "grad_norm": 23.8153018951416, "learning_rate": 8.952380952380953e-06, "loss": 51.3124, "step": 95 }, { "epoch": 2.2865671641791043, "grad_norm": 31.269794464111328, "learning_rate": 8.940476190476191e-06, "loss": 51.9646, "step": 96 }, { "epoch": 2.31044776119403, "grad_norm": 16.611865997314453, "learning_rate": 8.92857142857143e-06, "loss": 51.8503, "step": 97 }, { "epoch": 2.334328358208955, "grad_norm": 26.69631004333496, "learning_rate": 8.916666666666667e-06, "loss": 52.4857, "step": 98 }, { "epoch": 2.3582089552238807, "grad_norm": 21.10638999938965, "learning_rate": 8.904761904761905e-06, "loss": 52.0022, "step": 99 }, { "epoch": 2.382089552238806, "grad_norm": 16.273351669311523, "learning_rate": 8.892857142857143e-06, "loss": 50.4367, "step": 100 }, { "epoch": 2.405970149253731, "grad_norm": 16.407167434692383, "learning_rate": 8.88095238095238e-06, "loss": 51.2079, "step": 101 }, { "epoch": 2.4298507462686567, "grad_norm": 16.22024154663086, "learning_rate": 8.869047619047619e-06, "loss": 50.4939, "step": 102 }, { "epoch": 2.4537313432835823, "grad_norm": 27.11235809326172, "learning_rate": 8.857142857142858e-06, "loss": 50.0872, "step": 103 }, { "epoch": 2.4776119402985075, "grad_norm": 18.912181854248047, "learning_rate": 8.845238095238096e-06, "loss": 51.8135, "step": 104 }, { "epoch": 2.5014925373134327, "grad_norm": 29.597028732299805, "learning_rate": 8.833333333333334e-06, "loss": 49.4789, "step": 105 }, { "epoch": 2.5253731343283583, "grad_norm": 30.51687240600586, "learning_rate": 8.821428571428572e-06, "loss": 52.5555, "step": 106 }, { "epoch": 2.5492537313432835, "grad_norm": 31.4583797454834, "learning_rate": 8.80952380952381e-06, "loss": 51.0073, "step": 107 }, { "epoch": 2.573134328358209, "grad_norm": 30.35653305053711, "learning_rate": 8.797619047619048e-06, "loss": 50.9501, "step": 108 }, { "epoch": 2.5970149253731343, "grad_norm": 24.041545867919922, "learning_rate": 8.785714285714286e-06, "loss": 49.5162, "step": 109 }, { "epoch": 2.62089552238806, "grad_norm": 23.52166175842285, "learning_rate": 8.773809523809525e-06, "loss": 52.9747, "step": 110 }, { "epoch": 2.644776119402985, "grad_norm": 28.871065139770508, "learning_rate": 8.761904761904763e-06, "loss": 50.2273, "step": 111 }, { "epoch": 2.6686567164179102, "grad_norm": 26.484140396118164, "learning_rate": 8.750000000000001e-06, "loss": 51.2286, "step": 112 }, { "epoch": 2.692537313432836, "grad_norm": 37.570743560791016, "learning_rate": 8.738095238095239e-06, "loss": 49.7131, "step": 113 }, { "epoch": 2.716417910447761, "grad_norm": 23.827178955078125, "learning_rate": 8.726190476190477e-06, "loss": 51.913, "step": 114 }, { "epoch": 2.7402985074626867, "grad_norm": 33.89924621582031, "learning_rate": 8.714285714285715e-06, "loss": 52.2382, "step": 115 }, { "epoch": 2.764179104477612, "grad_norm": 29.397851943969727, "learning_rate": 8.702380952380952e-06, "loss": 52.1548, "step": 116 }, { "epoch": 2.7880597014925375, "grad_norm": 28.73517417907715, "learning_rate": 8.690476190476192e-06, "loss": 51.2892, "step": 117 }, { "epoch": 2.8119402985074626, "grad_norm": 32.068138122558594, "learning_rate": 8.67857142857143e-06, "loss": 51.542, "step": 118 }, { "epoch": 2.835820895522388, "grad_norm": 22.76898956298828, "learning_rate": 8.666666666666668e-06, "loss": 50.373, "step": 119 }, { "epoch": 2.8597014925373134, "grad_norm": 33.528263092041016, "learning_rate": 8.654761904761906e-06, "loss": 51.3075, "step": 120 }, { "epoch": 2.883582089552239, "grad_norm": 21.655696868896484, "learning_rate": 8.642857142857144e-06, "loss": 51.1397, "step": 121 }, { "epoch": 2.9074626865671642, "grad_norm": 25.94880485534668, "learning_rate": 8.630952380952381e-06, "loss": 51.4326, "step": 122 }, { "epoch": 2.9313432835820894, "grad_norm": 36.14421844482422, "learning_rate": 8.61904761904762e-06, "loss": 50.9524, "step": 123 }, { "epoch": 2.955223880597015, "grad_norm": 28.361459732055664, "learning_rate": 8.607142857142859e-06, "loss": 51.3171, "step": 124 }, { "epoch": 2.97910447761194, "grad_norm": 30.784954071044922, "learning_rate": 8.595238095238097e-06, "loss": 49.9797, "step": 125 }, { "epoch": 3.0, "grad_norm": 22.98565101623535, "learning_rate": 8.583333333333333e-06, "loss": 44.3471, "step": 126 }, { "epoch": 3.023880597014925, "grad_norm": 25.601985931396484, "learning_rate": 8.571428571428571e-06, "loss": 51.6574, "step": 127 }, { "epoch": 3.047761194029851, "grad_norm": 27.648792266845703, "learning_rate": 8.55952380952381e-06, "loss": 51.3385, "step": 128 }, { "epoch": 3.071641791044776, "grad_norm": 18.773529052734375, "learning_rate": 8.547619047619048e-06, "loss": 51.1451, "step": 129 }, { "epoch": 3.0955223880597016, "grad_norm": 29.439353942871094, "learning_rate": 8.535714285714286e-06, "loss": 51.6092, "step": 130 }, { "epoch": 3.1194029850746268, "grad_norm": 32.41486740112305, "learning_rate": 8.523809523809524e-06, "loss": 50.9068, "step": 131 }, { "epoch": 3.1432835820895524, "grad_norm": 23.441896438598633, "learning_rate": 8.511904761904762e-06, "loss": 51.7453, "step": 132 }, { "epoch": 3.1671641791044776, "grad_norm": 29.218734741210938, "learning_rate": 8.5e-06, "loss": 49.9124, "step": 133 }, { "epoch": 3.191044776119403, "grad_norm": 20.988981246948242, "learning_rate": 8.488095238095238e-06, "loss": 50.9788, "step": 134 }, { "epoch": 3.2149253731343284, "grad_norm": 22.57052993774414, "learning_rate": 8.476190476190477e-06, "loss": 51.4228, "step": 135 }, { "epoch": 3.2388059701492535, "grad_norm": 26.112573623657227, "learning_rate": 8.464285714285715e-06, "loss": 50.3332, "step": 136 }, { "epoch": 3.262686567164179, "grad_norm": 23.8747615814209, "learning_rate": 8.452380952380953e-06, "loss": 51.1763, "step": 137 }, { "epoch": 3.2865671641791043, "grad_norm": 24.12811851501465, "learning_rate": 8.440476190476191e-06, "loss": 49.8539, "step": 138 }, { "epoch": 3.31044776119403, "grad_norm": 27.462984085083008, "learning_rate": 8.428571428571429e-06, "loss": 50.7766, "step": 139 }, { "epoch": 3.334328358208955, "grad_norm": 31.261472702026367, "learning_rate": 8.416666666666667e-06, "loss": 49.7599, "step": 140 }, { "epoch": 3.3582089552238807, "grad_norm": 21.049545288085938, "learning_rate": 8.404761904761905e-06, "loss": 49.6827, "step": 141 }, { "epoch": 3.382089552238806, "grad_norm": 30.103389739990234, "learning_rate": 8.392857142857144e-06, "loss": 49.3866, "step": 142 }, { "epoch": 3.405970149253731, "grad_norm": 31.348888397216797, "learning_rate": 8.380952380952382e-06, "loss": 51.4607, "step": 143 }, { "epoch": 3.4298507462686567, "grad_norm": 28.910200119018555, "learning_rate": 8.36904761904762e-06, "loss": 51.2337, "step": 144 }, { "epoch": 3.4537313432835823, "grad_norm": 21.00281524658203, "learning_rate": 8.357142857142858e-06, "loss": 50.9557, "step": 145 }, { "epoch": 3.4776119402985075, "grad_norm": 45.842002868652344, "learning_rate": 8.345238095238096e-06, "loss": 49.7377, "step": 146 }, { "epoch": 3.5014925373134327, "grad_norm": 30.77996253967285, "learning_rate": 8.333333333333334e-06, "loss": 51.1234, "step": 147 }, { "epoch": 3.5253731343283583, "grad_norm": 31.492767333984375, "learning_rate": 8.321428571428573e-06, "loss": 50.5733, "step": 148 }, { "epoch": 3.5492537313432835, "grad_norm": 36.57206344604492, "learning_rate": 8.309523809523811e-06, "loss": 50.6762, "step": 149 }, { "epoch": 3.573134328358209, "grad_norm": 33.86347198486328, "learning_rate": 8.297619047619049e-06, "loss": 50.8281, "step": 150 }, { "epoch": 3.5970149253731343, "grad_norm": 30.812152862548828, "learning_rate": 8.285714285714287e-06, "loss": 50.6509, "step": 151 }, { "epoch": 3.62089552238806, "grad_norm": 24.536882400512695, "learning_rate": 8.273809523809523e-06, "loss": 50.1112, "step": 152 }, { "epoch": 3.644776119402985, "grad_norm": 29.8430233001709, "learning_rate": 8.261904761904763e-06, "loss": 50.846, "step": 153 }, { "epoch": 3.6686567164179102, "grad_norm": 26.18596076965332, "learning_rate": 8.25e-06, "loss": 50.3806, "step": 154 }, { "epoch": 3.692537313432836, "grad_norm": 38.75019836425781, "learning_rate": 8.238095238095239e-06, "loss": 49.8915, "step": 155 }, { "epoch": 3.716417910447761, "grad_norm": 34.30149841308594, "learning_rate": 8.226190476190476e-06, "loss": 50.7886, "step": 156 }, { "epoch": 3.7402985074626867, "grad_norm": 33.179298400878906, "learning_rate": 8.214285714285714e-06, "loss": 50.8175, "step": 157 }, { "epoch": 3.764179104477612, "grad_norm": 34.90909957885742, "learning_rate": 8.202380952380952e-06, "loss": 50.3521, "step": 158 }, { "epoch": 3.7880597014925375, "grad_norm": 33.2717399597168, "learning_rate": 8.190476190476192e-06, "loss": 51.2006, "step": 159 }, { "epoch": 3.8119402985074626, "grad_norm": 33.082672119140625, "learning_rate": 8.17857142857143e-06, "loss": 49.5627, "step": 160 }, { "epoch": 3.835820895522388, "grad_norm": 23.65228843688965, "learning_rate": 8.166666666666668e-06, "loss": 49.9631, "step": 161 }, { "epoch": 3.8597014925373134, "grad_norm": 37.3172492980957, "learning_rate": 8.154761904761905e-06, "loss": 50.7175, "step": 162 }, { "epoch": 3.883582089552239, "grad_norm": 29.369930267333984, "learning_rate": 8.142857142857143e-06, "loss": 51.1435, "step": 163 }, { "epoch": 3.9074626865671642, "grad_norm": 28.807470321655273, "learning_rate": 8.130952380952381e-06, "loss": 50.3349, "step": 164 }, { "epoch": 3.9313432835820894, "grad_norm": 33.90628433227539, "learning_rate": 8.119047619047619e-06, "loss": 50.7241, "step": 165 }, { "epoch": 3.955223880597015, "grad_norm": 21.72952651977539, "learning_rate": 8.107142857142859e-06, "loss": 49.9013, "step": 166 }, { "epoch": 3.97910447761194, "grad_norm": 26.831520080566406, "learning_rate": 8.095238095238097e-06, "loss": 51.0161, "step": 167 }, { "epoch": 4.0, "grad_norm": 24.49069595336914, "learning_rate": 8.083333333333334e-06, "loss": 44.6758, "step": 168 }, { "epoch": 4.023880597014926, "grad_norm": 36.32711410522461, "learning_rate": 8.071428571428572e-06, "loss": 49.8601, "step": 169 }, { "epoch": 4.04776119402985, "grad_norm": 29.862812042236328, "learning_rate": 8.05952380952381e-06, "loss": 51.712, "step": 170 }, { "epoch": 4.071641791044776, "grad_norm": 40.245887756347656, "learning_rate": 8.047619047619048e-06, "loss": 50.3353, "step": 171 }, { "epoch": 4.095522388059702, "grad_norm": 34.22684097290039, "learning_rate": 8.035714285714286e-06, "loss": 50.6474, "step": 172 }, { "epoch": 4.119402985074627, "grad_norm": 36.754669189453125, "learning_rate": 8.023809523809526e-06, "loss": 50.1623, "step": 173 }, { "epoch": 4.143283582089552, "grad_norm": 35.76541519165039, "learning_rate": 8.011904761904763e-06, "loss": 50.2426, "step": 174 }, { "epoch": 4.167164179104478, "grad_norm": 25.851362228393555, "learning_rate": 8.000000000000001e-06, "loss": 49.9525, "step": 175 }, { "epoch": 4.191044776119403, "grad_norm": 24.48278045654297, "learning_rate": 7.98809523809524e-06, "loss": 49.1466, "step": 176 }, { "epoch": 4.214925373134328, "grad_norm": 28.79146385192871, "learning_rate": 7.976190476190477e-06, "loss": 49.9365, "step": 177 }, { "epoch": 4.2388059701492535, "grad_norm": 29.29482650756836, "learning_rate": 7.964285714285715e-06, "loss": 50.7427, "step": 178 }, { "epoch": 4.262686567164179, "grad_norm": 23.50571060180664, "learning_rate": 7.952380952380953e-06, "loss": 49.7287, "step": 179 }, { "epoch": 4.286567164179105, "grad_norm": 27.805828094482422, "learning_rate": 7.94047619047619e-06, "loss": 50.4316, "step": 180 }, { "epoch": 4.3104477611940295, "grad_norm": 28.323888778686523, "learning_rate": 7.928571428571429e-06, "loss": 50.0263, "step": 181 }, { "epoch": 4.334328358208955, "grad_norm": 25.43438148498535, "learning_rate": 7.916666666666667e-06, "loss": 49.426, "step": 182 }, { "epoch": 4.358208955223881, "grad_norm": 22.169496536254883, "learning_rate": 7.904761904761904e-06, "loss": 51.1048, "step": 183 }, { "epoch": 4.382089552238806, "grad_norm": 33.660545349121094, "learning_rate": 7.892857142857144e-06, "loss": 49.7654, "step": 184 }, { "epoch": 4.405970149253731, "grad_norm": 24.276273727416992, "learning_rate": 7.880952380952382e-06, "loss": 50.4976, "step": 185 }, { "epoch": 4.429850746268657, "grad_norm": 41.48741149902344, "learning_rate": 7.86904761904762e-06, "loss": 52.0386, "step": 186 }, { "epoch": 4.453731343283582, "grad_norm": 25.86789894104004, "learning_rate": 7.857142857142858e-06, "loss": 49.129, "step": 187 }, { "epoch": 4.477611940298507, "grad_norm": 26.607038497924805, "learning_rate": 7.845238095238096e-06, "loss": 49.3561, "step": 188 }, { "epoch": 4.501492537313433, "grad_norm": 43.54303741455078, "learning_rate": 7.833333333333333e-06, "loss": 50.1143, "step": 189 }, { "epoch": 4.525373134328358, "grad_norm": 45.6146354675293, "learning_rate": 7.821428571428571e-06, "loss": 49.3217, "step": 190 }, { "epoch": 4.549253731343284, "grad_norm": 24.00080680847168, "learning_rate": 7.809523809523811e-06, "loss": 50.484, "step": 191 }, { "epoch": 4.573134328358209, "grad_norm": 29.736740112304688, "learning_rate": 7.797619047619049e-06, "loss": 49.748, "step": 192 }, { "epoch": 4.597014925373134, "grad_norm": 33.08702850341797, "learning_rate": 7.785714285714287e-06, "loss": 50.2142, "step": 193 }, { "epoch": 4.62089552238806, "grad_norm": 19.16411018371582, "learning_rate": 7.773809523809525e-06, "loss": 50.1073, "step": 194 }, { "epoch": 4.6447761194029855, "grad_norm": 32.145721435546875, "learning_rate": 7.761904761904762e-06, "loss": 48.5769, "step": 195 }, { "epoch": 4.66865671641791, "grad_norm": 38.768341064453125, "learning_rate": 7.75e-06, "loss": 49.681, "step": 196 }, { "epoch": 4.692537313432836, "grad_norm": 26.108245849609375, "learning_rate": 7.738095238095238e-06, "loss": 49.9193, "step": 197 }, { "epoch": 4.7164179104477615, "grad_norm": 28.86294174194336, "learning_rate": 7.726190476190478e-06, "loss": 50.4584, "step": 198 }, { "epoch": 4.740298507462686, "grad_norm": 31.089380264282227, "learning_rate": 7.714285714285716e-06, "loss": 50.7873, "step": 199 }, { "epoch": 4.764179104477612, "grad_norm": 22.934032440185547, "learning_rate": 7.702380952380954e-06, "loss": 50.611, "step": 200 }, { "epoch": 4.7880597014925375, "grad_norm": 27.986371994018555, "learning_rate": 7.690476190476191e-06, "loss": 49.275, "step": 201 }, { "epoch": 4.811940298507462, "grad_norm": 23.44196319580078, "learning_rate": 7.67857142857143e-06, "loss": 50.2035, "step": 202 }, { "epoch": 4.835820895522388, "grad_norm": 22.05059242248535, "learning_rate": 7.666666666666667e-06, "loss": 48.9595, "step": 203 }, { "epoch": 4.859701492537313, "grad_norm": 29.709396362304688, "learning_rate": 7.654761904761905e-06, "loss": 50.5343, "step": 204 }, { "epoch": 4.883582089552239, "grad_norm": 23.702781677246094, "learning_rate": 7.642857142857143e-06, "loss": 50.2627, "step": 205 }, { "epoch": 4.907462686567165, "grad_norm": 20.144807815551758, "learning_rate": 7.630952380952381e-06, "loss": 51.0125, "step": 206 }, { "epoch": 4.931343283582089, "grad_norm": 28.83676528930664, "learning_rate": 7.61904761904762e-06, "loss": 50.5985, "step": 207 }, { "epoch": 4.955223880597015, "grad_norm": 34.40160369873047, "learning_rate": 7.6071428571428575e-06, "loss": 49.6469, "step": 208 }, { "epoch": 4.979104477611941, "grad_norm": 26.982925415039062, "learning_rate": 7.595238095238095e-06, "loss": 50.1666, "step": 209 }, { "epoch": 5.0, "grad_norm": 19.569746017456055, "learning_rate": 7.583333333333333e-06, "loss": 43.6715, "step": 210 }, { "epoch": 5.023880597014926, "grad_norm": 23.753328323364258, "learning_rate": 7.571428571428572e-06, "loss": 49.9273, "step": 211 }, { "epoch": 5.04776119402985, "grad_norm": 22.463659286499023, "learning_rate": 7.55952380952381e-06, "loss": 48.8499, "step": 212 }, { "epoch": 5.071641791044776, "grad_norm": 24.507875442504883, "learning_rate": 7.547619047619048e-06, "loss": 49.3275, "step": 213 }, { "epoch": 5.095522388059702, "grad_norm": 21.727603912353516, "learning_rate": 7.5357142857142865e-06, "loss": 49.1879, "step": 214 }, { "epoch": 5.119402985074627, "grad_norm": 26.122251510620117, "learning_rate": 7.523809523809524e-06, "loss": 50.1094, "step": 215 }, { "epoch": 5.143283582089552, "grad_norm": 24.142263412475586, "learning_rate": 7.511904761904762e-06, "loss": 50.2708, "step": 216 }, { "epoch": 5.167164179104478, "grad_norm": 22.762237548828125, "learning_rate": 7.500000000000001e-06, "loss": 50.441, "step": 217 }, { "epoch": 5.191044776119403, "grad_norm": 35.74570846557617, "learning_rate": 7.488095238095239e-06, "loss": 48.5121, "step": 218 }, { "epoch": 5.214925373134328, "grad_norm": 30.92180824279785, "learning_rate": 7.476190476190477e-06, "loss": 49.4257, "step": 219 }, { "epoch": 5.2388059701492535, "grad_norm": 26.90997314453125, "learning_rate": 7.464285714285715e-06, "loss": 50.9712, "step": 220 }, { "epoch": 5.262686567164179, "grad_norm": 35.544700622558594, "learning_rate": 7.4523809523809534e-06, "loss": 49.7908, "step": 221 }, { "epoch": 5.286567164179105, "grad_norm": 33.78145217895508, "learning_rate": 7.440476190476191e-06, "loss": 49.2105, "step": 222 }, { "epoch": 5.3104477611940295, "grad_norm": 32.16508102416992, "learning_rate": 7.428571428571429e-06, "loss": 49.8545, "step": 223 }, { "epoch": 5.334328358208955, "grad_norm": 30.4263973236084, "learning_rate": 7.416666666666668e-06, "loss": 50.0994, "step": 224 }, { "epoch": 5.358208955223881, "grad_norm": 25.801084518432617, "learning_rate": 7.404761904761906e-06, "loss": 49.6227, "step": 225 }, { "epoch": 5.382089552238806, "grad_norm": 27.16851234436035, "learning_rate": 7.392857142857144e-06, "loss": 50.0005, "step": 226 }, { "epoch": 5.405970149253731, "grad_norm": 30.102867126464844, "learning_rate": 7.380952380952382e-06, "loss": 50.7114, "step": 227 }, { "epoch": 5.429850746268657, "grad_norm": 26.032968521118164, "learning_rate": 7.36904761904762e-06, "loss": 48.99, "step": 228 }, { "epoch": 5.453731343283582, "grad_norm": 24.074424743652344, "learning_rate": 7.357142857142858e-06, "loss": 49.6965, "step": 229 }, { "epoch": 5.477611940298507, "grad_norm": 24.5870361328125, "learning_rate": 7.345238095238096e-06, "loss": 48.8593, "step": 230 }, { "epoch": 5.501492537313433, "grad_norm": 22.831932067871094, "learning_rate": 7.333333333333333e-06, "loss": 49.9975, "step": 231 }, { "epoch": 5.525373134328358, "grad_norm": 26.88197135925293, "learning_rate": 7.321428571428572e-06, "loss": 49.5131, "step": 232 }, { "epoch": 5.549253731343284, "grad_norm": 28.986154556274414, "learning_rate": 7.30952380952381e-06, "loss": 48.9042, "step": 233 }, { "epoch": 5.573134328358209, "grad_norm": 17.220605850219727, "learning_rate": 7.297619047619048e-06, "loss": 49.453, "step": 234 }, { "epoch": 5.597014925373134, "grad_norm": 25.110107421875, "learning_rate": 7.285714285714286e-06, "loss": 49.8196, "step": 235 }, { "epoch": 5.62089552238806, "grad_norm": 34.680870056152344, "learning_rate": 7.273809523809524e-06, "loss": 49.9709, "step": 236 }, { "epoch": 5.6447761194029855, "grad_norm": 24.10121726989746, "learning_rate": 7.261904761904762e-06, "loss": 49.4685, "step": 237 }, { "epoch": 5.66865671641791, "grad_norm": 28.65550422668457, "learning_rate": 7.25e-06, "loss": 50.3439, "step": 238 }, { "epoch": 5.692537313432836, "grad_norm": 27.49604606628418, "learning_rate": 7.238095238095239e-06, "loss": 50.0419, "step": 239 }, { "epoch": 5.7164179104477615, "grad_norm": 22.843509674072266, "learning_rate": 7.226190476190477e-06, "loss": 49.0357, "step": 240 }, { "epoch": 5.740298507462686, "grad_norm": 36.56801223754883, "learning_rate": 7.2142857142857145e-06, "loss": 49.4478, "step": 241 }, { "epoch": 5.764179104477612, "grad_norm": 38.233734130859375, "learning_rate": 7.202380952380953e-06, "loss": 50.4473, "step": 242 }, { "epoch": 5.7880597014925375, "grad_norm": 29.198333740234375, "learning_rate": 7.190476190476191e-06, "loss": 49.8598, "step": 243 }, { "epoch": 5.811940298507462, "grad_norm": 34.49404525756836, "learning_rate": 7.178571428571429e-06, "loss": 49.1441, "step": 244 }, { "epoch": 5.835820895522388, "grad_norm": 35.568359375, "learning_rate": 7.166666666666667e-06, "loss": 49.9402, "step": 245 }, { "epoch": 5.859701492537313, "grad_norm": 31.041446685791016, "learning_rate": 7.154761904761906e-06, "loss": 50.1265, "step": 246 }, { "epoch": 5.883582089552239, "grad_norm": 48.34186935424805, "learning_rate": 7.1428571428571436e-06, "loss": 50.7649, "step": 247 }, { "epoch": 5.907462686567165, "grad_norm": 39.171661376953125, "learning_rate": 7.1309523809523814e-06, "loss": 48.943, "step": 248 }, { "epoch": 5.931343283582089, "grad_norm": 28.724523544311523, "learning_rate": 7.11904761904762e-06, "loss": 50.8039, "step": 249 }, { "epoch": 5.955223880597015, "grad_norm": 36.57830810546875, "learning_rate": 7.107142857142858e-06, "loss": 50.2311, "step": 250 }, { "epoch": 5.979104477611941, "grad_norm": 39.91551971435547, "learning_rate": 7.095238095238096e-06, "loss": 49.1617, "step": 251 }, { "epoch": 6.0, "grad_norm": 17.104145050048828, "learning_rate": 7.083333333333335e-06, "loss": 42.8003, "step": 252 }, { "epoch": 6.023880597014926, "grad_norm": 33.03441619873047, "learning_rate": 7.0714285714285726e-06, "loss": 48.2896, "step": 253 }, { "epoch": 6.04776119402985, "grad_norm": 26.487470626831055, "learning_rate": 7.0595238095238105e-06, "loss": 49.205, "step": 254 }, { "epoch": 6.071641791044776, "grad_norm": 26.752981185913086, "learning_rate": 7.047619047619048e-06, "loss": 50.3943, "step": 255 }, { "epoch": 6.095522388059702, "grad_norm": 22.44376564025879, "learning_rate": 7.035714285714287e-06, "loss": 49.285, "step": 256 }, { "epoch": 6.119402985074627, "grad_norm": 31.066368103027344, "learning_rate": 7.023809523809524e-06, "loss": 49.3131, "step": 257 }, { "epoch": 6.143283582089552, "grad_norm": 28.67262840270996, "learning_rate": 7.011904761904762e-06, "loss": 50.6188, "step": 258 }, { "epoch": 6.167164179104478, "grad_norm": 24.013134002685547, "learning_rate": 7e-06, "loss": 50.4382, "step": 259 }, { "epoch": 6.191044776119403, "grad_norm": 26.5673828125, "learning_rate": 6.988095238095239e-06, "loss": 49.7058, "step": 260 }, { "epoch": 6.214925373134328, "grad_norm": 20.803695678710938, "learning_rate": 6.9761904761904765e-06, "loss": 48.9389, "step": 261 }, { "epoch": 6.2388059701492535, "grad_norm": 23.450183868408203, "learning_rate": 6.964285714285714e-06, "loss": 49.0091, "step": 262 }, { "epoch": 6.262686567164179, "grad_norm": 36.94446563720703, "learning_rate": 6.952380952380952e-06, "loss": 50.2589, "step": 263 }, { "epoch": 6.286567164179105, "grad_norm": 39.548095703125, "learning_rate": 6.940476190476191e-06, "loss": 49.3129, "step": 264 }, { "epoch": 6.3104477611940295, "grad_norm": 30.536083221435547, "learning_rate": 6.928571428571429e-06, "loss": 49.1838, "step": 265 }, { "epoch": 6.334328358208955, "grad_norm": 27.97296714782715, "learning_rate": 6.916666666666667e-06, "loss": 50.3184, "step": 266 }, { "epoch": 6.358208955223881, "grad_norm": 25.69655418395996, "learning_rate": 6.9047619047619055e-06, "loss": 49.2226, "step": 267 }, { "epoch": 6.382089552238806, "grad_norm": 22.114097595214844, "learning_rate": 6.892857142857143e-06, "loss": 49.4455, "step": 268 }, { "epoch": 6.405970149253731, "grad_norm": 30.47511100769043, "learning_rate": 6.880952380952381e-06, "loss": 49.7409, "step": 269 }, { "epoch": 6.429850746268657, "grad_norm": 26.32929039001465, "learning_rate": 6.86904761904762e-06, "loss": 50.3336, "step": 270 }, { "epoch": 6.453731343283582, "grad_norm": 28.09309196472168, "learning_rate": 6.857142857142858e-06, "loss": 49.6044, "step": 271 }, { "epoch": 6.477611940298507, "grad_norm": 25.840974807739258, "learning_rate": 6.845238095238096e-06, "loss": 49.9185, "step": 272 }, { "epoch": 6.501492537313433, "grad_norm": 31.89126205444336, "learning_rate": 6.833333333333334e-06, "loss": 48.2732, "step": 273 }, { "epoch": 6.525373134328358, "grad_norm": 24.013029098510742, "learning_rate": 6.8214285714285724e-06, "loss": 49.9752, "step": 274 }, { "epoch": 6.549253731343284, "grad_norm": 25.509836196899414, "learning_rate": 6.80952380952381e-06, "loss": 50.5493, "step": 275 }, { "epoch": 6.573134328358209, "grad_norm": 35.25442886352539, "learning_rate": 6.797619047619048e-06, "loss": 49.2553, "step": 276 }, { "epoch": 6.597014925373134, "grad_norm": 29.42585563659668, "learning_rate": 6.785714285714287e-06, "loss": 48.776, "step": 277 }, { "epoch": 6.62089552238806, "grad_norm": 25.90894889831543, "learning_rate": 6.773809523809525e-06, "loss": 49.1964, "step": 278 }, { "epoch": 6.6447761194029855, "grad_norm": 25.63600730895996, "learning_rate": 6.761904761904763e-06, "loss": 48.4528, "step": 279 }, { "epoch": 6.66865671641791, "grad_norm": 29.943740844726562, "learning_rate": 6.750000000000001e-06, "loss": 49.9026, "step": 280 }, { "epoch": 6.692537313432836, "grad_norm": 33.253910064697266, "learning_rate": 6.738095238095239e-06, "loss": 49.2364, "step": 281 }, { "epoch": 6.7164179104477615, "grad_norm": 23.465354919433594, "learning_rate": 6.726190476190477e-06, "loss": 49.2759, "step": 282 }, { "epoch": 6.740298507462686, "grad_norm": 31.023218154907227, "learning_rate": 6.714285714285714e-06, "loss": 49.3256, "step": 283 }, { "epoch": 6.764179104477612, "grad_norm": 32.376991271972656, "learning_rate": 6.702380952380952e-06, "loss": 47.7239, "step": 284 }, { "epoch": 6.7880597014925375, "grad_norm": 18.388896942138672, "learning_rate": 6.690476190476191e-06, "loss": 49.5751, "step": 285 }, { "epoch": 6.811940298507462, "grad_norm": 22.1639404296875, "learning_rate": 6.678571428571429e-06, "loss": 48.8153, "step": 286 }, { "epoch": 6.835820895522388, "grad_norm": 31.39455223083496, "learning_rate": 6.666666666666667e-06, "loss": 49.7872, "step": 287 }, { "epoch": 6.859701492537313, "grad_norm": 36.480533599853516, "learning_rate": 6.654761904761905e-06, "loss": 48.9679, "step": 288 }, { "epoch": 6.883582089552239, "grad_norm": 23.432872772216797, "learning_rate": 6.642857142857143e-06, "loss": 48.4627, "step": 289 }, { "epoch": 6.907462686567165, "grad_norm": 33.31097412109375, "learning_rate": 6.630952380952381e-06, "loss": 49.617, "step": 290 }, { "epoch": 6.931343283582089, "grad_norm": 34.07685852050781, "learning_rate": 6.619047619047619e-06, "loss": 49.3365, "step": 291 }, { "epoch": 6.955223880597015, "grad_norm": 25.68811798095703, "learning_rate": 6.607142857142858e-06, "loss": 49.7821, "step": 292 }, { "epoch": 6.979104477611941, "grad_norm": 24.179588317871094, "learning_rate": 6.595238095238096e-06, "loss": 48.5466, "step": 293 }, { "epoch": 7.0, "grad_norm": 26.158781051635742, "learning_rate": 6.5833333333333335e-06, "loss": 43.2838, "step": 294 }, { "epoch": 7.023880597014926, "grad_norm": 22.84689712524414, "learning_rate": 6.571428571428572e-06, "loss": 49.3253, "step": 295 }, { "epoch": 7.04776119402985, "grad_norm": NaN, "learning_rate": 6.55952380952381e-06, "loss": 75.2786, "step": 296 }, { "epoch": 7.071641791044776, "grad_norm": 31.13886070251465, "learning_rate": 6.55952380952381e-06, "loss": 49.984, "step": 297 }, { "epoch": 7.095522388059702, "grad_norm": 32.37982940673828, "learning_rate": 6.547619047619048e-06, "loss": 49.6632, "step": 298 }, { "epoch": 7.119402985074627, "grad_norm": 22.977916717529297, "learning_rate": 6.535714285714286e-06, "loss": 48.7802, "step": 299 }, { "epoch": 7.143283582089552, "grad_norm": NaN, "learning_rate": 6.523809523809525e-06, "loss": 60.3381, "step": 300 }, { "epoch": 7.167164179104478, "grad_norm": 32.18650817871094, "learning_rate": 6.523809523809525e-06, "loss": 49.2689, "step": 301 }, { "epoch": 7.191044776119403, "grad_norm": 30.0800724029541, "learning_rate": 6.5119047619047626e-06, "loss": 49.3891, "step": 302 }, { "epoch": 7.214925373134328, "grad_norm": 32.35110855102539, "learning_rate": 6.5000000000000004e-06, "loss": 48.4497, "step": 303 }, { "epoch": 7.2388059701492535, "grad_norm": 34.08786392211914, "learning_rate": 6.488095238095239e-06, "loss": 49.1321, "step": 304 }, { "epoch": 7.262686567164179, "grad_norm": 25.25969696044922, "learning_rate": 6.476190476190477e-06, "loss": 49.0524, "step": 305 }, { "epoch": 7.286567164179105, "grad_norm": 25.843929290771484, "learning_rate": 6.464285714285715e-06, "loss": 49.8077, "step": 306 }, { "epoch": 7.3104477611940295, "grad_norm": 34.57284927368164, "learning_rate": 6.452380952380954e-06, "loss": 49.5393, "step": 307 }, { "epoch": 7.334328358208955, "grad_norm": 33.44814682006836, "learning_rate": 6.4404761904761916e-06, "loss": 49.0375, "step": 308 }, { "epoch": 7.358208955223881, "grad_norm": 25.127429962158203, "learning_rate": 6.4285714285714295e-06, "loss": 48.8145, "step": 309 }, { "epoch": 7.382089552238806, "grad_norm": 31.81999969482422, "learning_rate": 6.416666666666667e-06, "loss": 49.6432, "step": 310 }, { "epoch": 7.405970149253731, "grad_norm": 22.428335189819336, "learning_rate": 6.404761904761904e-06, "loss": 47.6232, "step": 311 }, { "epoch": 7.429850746268657, "grad_norm": 45.87803268432617, "learning_rate": 6.392857142857143e-06, "loss": 48.3479, "step": 312 }, { "epoch": 7.453731343283582, "grad_norm": 37.441253662109375, "learning_rate": 6.380952380952381e-06, "loss": 48.593, "step": 313 }, { "epoch": 7.477611940298507, "grad_norm": 23.15785789489746, "learning_rate": 6.369047619047619e-06, "loss": 49.1204, "step": 314 }, { "epoch": 7.501492537313433, "grad_norm": 35.8905029296875, "learning_rate": 6.357142857142858e-06, "loss": 49.2918, "step": 315 }, { "epoch": 7.525373134328358, "grad_norm": 37.41954040527344, "learning_rate": 6.3452380952380955e-06, "loss": 47.7495, "step": 316 }, { "epoch": 7.549253731343284, "grad_norm": 31.173114776611328, "learning_rate": 6.333333333333333e-06, "loss": 49.539, "step": 317 }, { "epoch": 7.573134328358209, "grad_norm": 23.941965103149414, "learning_rate": 6.321428571428571e-06, "loss": 49.0958, "step": 318 }, { "epoch": 7.597014925373134, "grad_norm": 31.949769973754883, "learning_rate": 6.30952380952381e-06, "loss": 49.1945, "step": 319 }, { "epoch": 7.62089552238806, "grad_norm": 21.299409866333008, "learning_rate": 6.297619047619048e-06, "loss": 49.3823, "step": 320 }, { "epoch": 7.6447761194029855, "grad_norm": 34.93647766113281, "learning_rate": 6.285714285714286e-06, "loss": 48.8867, "step": 321 }, { "epoch": 7.66865671641791, "grad_norm": 30.189655303955078, "learning_rate": 6.2738095238095245e-06, "loss": 49.8644, "step": 322 }, { "epoch": 7.692537313432836, "grad_norm": 19.964523315429688, "learning_rate": 6.261904761904762e-06, "loss": 49.6489, "step": 323 }, { "epoch": 7.7164179104477615, "grad_norm": 22.253337860107422, "learning_rate": 6.25e-06, "loss": 48.0582, "step": 324 }, { "epoch": 7.740298507462686, "grad_norm": 26.631391525268555, "learning_rate": 6.238095238095239e-06, "loss": 48.5585, "step": 325 }, { "epoch": 7.764179104477612, "grad_norm": 26.0469913482666, "learning_rate": 6.226190476190477e-06, "loss": 49.4969, "step": 326 }, { "epoch": 7.7880597014925375, "grad_norm": 30.000507354736328, "learning_rate": 6.214285714285715e-06, "loss": 49.6044, "step": 327 }, { "epoch": 7.811940298507462, "grad_norm": 29.44800567626953, "learning_rate": 6.202380952380953e-06, "loss": 50.3622, "step": 328 }, { "epoch": 7.835820895522388, "grad_norm": 24.83717918395996, "learning_rate": 6.1904761904761914e-06, "loss": 50.0974, "step": 329 }, { "epoch": 7.859701492537313, "grad_norm": 30.0760555267334, "learning_rate": 6.178571428571429e-06, "loss": 48.9307, "step": 330 }, { "epoch": 7.883582089552239, "grad_norm": 21.087966918945312, "learning_rate": 6.166666666666667e-06, "loss": 49.3432, "step": 331 }, { "epoch": 7.907462686567165, "grad_norm": 23.193716049194336, "learning_rate": 6.154761904761906e-06, "loss": 48.6664, "step": 332 }, { "epoch": 7.931343283582089, "grad_norm": 22.764123916625977, "learning_rate": 6.142857142857144e-06, "loss": 49.3497, "step": 333 }, { "epoch": 7.955223880597015, "grad_norm": 22.411897659301758, "learning_rate": 6.130952380952382e-06, "loss": 49.4106, "step": 334 }, { "epoch": 7.979104477611941, "grad_norm": 29.535375595092773, "learning_rate": 6.11904761904762e-06, "loss": 49.0695, "step": 335 }, { "epoch": 8.0, "grad_norm": 21.094457626342773, "learning_rate": 6.107142857142858e-06, "loss": 42.1367, "step": 336 }, { "epoch": 8.023880597014925, "grad_norm": 33.74859619140625, "learning_rate": 6.095238095238096e-06, "loss": 47.0065, "step": 337 }, { "epoch": 8.047761194029851, "grad_norm": 32.539127349853516, "learning_rate": 6.083333333333333e-06, "loss": 47.9697, "step": 338 }, { "epoch": 8.071641791044776, "grad_norm": 19.168655395507812, "learning_rate": 6.071428571428571e-06, "loss": 49.4919, "step": 339 }, { "epoch": 8.0955223880597, "grad_norm": 30.041269302368164, "learning_rate": 6.05952380952381e-06, "loss": 48.7887, "step": 340 }, { "epoch": 8.119402985074627, "grad_norm": 21.070598602294922, "learning_rate": 6.047619047619048e-06, "loss": 48.5064, "step": 341 }, { "epoch": 8.143283582089552, "grad_norm": 29.560287475585938, "learning_rate": 6.035714285714286e-06, "loss": 47.7472, "step": 342 }, { "epoch": 8.167164179104478, "grad_norm": 24.256393432617188, "learning_rate": 6.023809523809524e-06, "loss": 48.8917, "step": 343 }, { "epoch": 8.191044776119403, "grad_norm": 29.970674514770508, "learning_rate": 6.011904761904762e-06, "loss": 48.3464, "step": 344 }, { "epoch": 8.214925373134328, "grad_norm": 25.274595260620117, "learning_rate": 6e-06, "loss": 49.1565, "step": 345 }, { "epoch": 8.238805970149254, "grad_norm": 24.000280380249023, "learning_rate": 5.988095238095238e-06, "loss": 49.3396, "step": 346 }, { "epoch": 8.26268656716418, "grad_norm": 25.110261917114258, "learning_rate": 5.976190476190477e-06, "loss": 49.786, "step": 347 }, { "epoch": 8.286567164179104, "grad_norm": 26.188514709472656, "learning_rate": 5.964285714285715e-06, "loss": 50.1652, "step": 348 }, { "epoch": 8.31044776119403, "grad_norm": 18.536714553833008, "learning_rate": 5.9523809523809525e-06, "loss": 49.7224, "step": 349 }, { "epoch": 8.334328358208955, "grad_norm": 33.79502868652344, "learning_rate": 5.940476190476191e-06, "loss": 48.2923, "step": 350 }, { "epoch": 8.35820895522388, "grad_norm": 33.03609085083008, "learning_rate": 5.928571428571429e-06, "loss": 49.128, "step": 351 }, { "epoch": 8.382089552238806, "grad_norm": 23.88555145263672, "learning_rate": 5.916666666666667e-06, "loss": 49.6072, "step": 352 }, { "epoch": 8.405970149253731, "grad_norm": 29.688135147094727, "learning_rate": 5.904761904761905e-06, "loss": 49.0984, "step": 353 }, { "epoch": 8.429850746268656, "grad_norm": 23.166162490844727, "learning_rate": 5.892857142857144e-06, "loss": 48.8104, "step": 354 }, { "epoch": 8.453731343283582, "grad_norm": 27.68876838684082, "learning_rate": 5.8809523809523816e-06, "loss": 48.7745, "step": 355 }, { "epoch": 8.477611940298507, "grad_norm": 26.520286560058594, "learning_rate": 5.8690476190476194e-06, "loss": 47.883, "step": 356 }, { "epoch": 8.501492537313434, "grad_norm": 28.830135345458984, "learning_rate": 5.857142857142858e-06, "loss": 49.1347, "step": 357 }, { "epoch": 8.525373134328358, "grad_norm": 27.387250900268555, "learning_rate": 5.845238095238096e-06, "loss": 48.2092, "step": 358 }, { "epoch": 8.549253731343283, "grad_norm": 23.53616714477539, "learning_rate": 5.833333333333334e-06, "loss": 48.437, "step": 359 }, { "epoch": 8.57313432835821, "grad_norm": 25.665664672851562, "learning_rate": 5.821428571428573e-06, "loss": 49.3006, "step": 360 }, { "epoch": 8.597014925373134, "grad_norm": 24.35331153869629, "learning_rate": 5.8095238095238106e-06, "loss": 49.5249, "step": 361 }, { "epoch": 8.620895522388059, "grad_norm": 28.612688064575195, "learning_rate": 5.7976190476190485e-06, "loss": 50.1344, "step": 362 }, { "epoch": 8.644776119402986, "grad_norm": 25.055545806884766, "learning_rate": 5.785714285714286e-06, "loss": 48.6014, "step": 363 }, { "epoch": 8.66865671641791, "grad_norm": 27.645490646362305, "learning_rate": 5.773809523809523e-06, "loss": 48.953, "step": 364 }, { "epoch": 8.692537313432837, "grad_norm": 26.791471481323242, "learning_rate": 5.761904761904762e-06, "loss": 49.5912, "step": 365 }, { "epoch": 8.716417910447761, "grad_norm": 27.57213592529297, "learning_rate": 5.75e-06, "loss": 48.9958, "step": 366 }, { "epoch": 8.740298507462686, "grad_norm": 20.936344146728516, "learning_rate": 5.738095238095238e-06, "loss": 48.3449, "step": 367 }, { "epoch": 8.764179104477613, "grad_norm": 31.695810317993164, "learning_rate": 5.726190476190477e-06, "loss": 49.1015, "step": 368 }, { "epoch": 8.788059701492537, "grad_norm": 31.584064483642578, "learning_rate": 5.7142857142857145e-06, "loss": 48.8249, "step": 369 }, { "epoch": 8.811940298507462, "grad_norm": 30.70412826538086, "learning_rate": 5.702380952380952e-06, "loss": 49.2984, "step": 370 }, { "epoch": 8.835820895522389, "grad_norm": 36.31315231323242, "learning_rate": 5.690476190476191e-06, "loss": 48.6769, "step": 371 }, { "epoch": 8.859701492537313, "grad_norm": 28.98838996887207, "learning_rate": 5.678571428571429e-06, "loss": 50.2101, "step": 372 }, { "epoch": 8.883582089552238, "grad_norm": 29.07052230834961, "learning_rate": 5.666666666666667e-06, "loss": 49.9206, "step": 373 }, { "epoch": 8.907462686567165, "grad_norm": 31.653087615966797, "learning_rate": 5.654761904761905e-06, "loss": 48.3035, "step": 374 }, { "epoch": 8.93134328358209, "grad_norm": 27.019704818725586, "learning_rate": 5.6428571428571435e-06, "loss": 48.6833, "step": 375 }, { "epoch": 8.955223880597014, "grad_norm": 30.919578552246094, "learning_rate": 5.630952380952381e-06, "loss": 47.7973, "step": 376 }, { "epoch": 8.97910447761194, "grad_norm": 28.002975463867188, "learning_rate": 5.619047619047619e-06, "loss": 49.5539, "step": 377 }, { "epoch": 9.0, "grad_norm": 27.587263107299805, "learning_rate": 5.607142857142858e-06, "loss": 42.9343, "step": 378 }, { "epoch": 9.023880597014925, "grad_norm": 31.024024963378906, "learning_rate": 5.595238095238096e-06, "loss": 48.6774, "step": 379 }, { "epoch": 9.047761194029851, "grad_norm": 27.262426376342773, "learning_rate": 5.583333333333334e-06, "loss": 47.8833, "step": 380 }, { "epoch": 9.071641791044776, "grad_norm": 29.223133087158203, "learning_rate": 5.571428571428572e-06, "loss": 47.1563, "step": 381 }, { "epoch": 9.0955223880597, "grad_norm": 21.004749298095703, "learning_rate": 5.5595238095238104e-06, "loss": 49.0407, "step": 382 }, { "epoch": 9.119402985074627, "grad_norm": 25.157907485961914, "learning_rate": 5.547619047619048e-06, "loss": 48.5233, "step": 383 }, { "epoch": 9.143283582089552, "grad_norm": 17.611478805541992, "learning_rate": 5.535714285714286e-06, "loss": 47.4846, "step": 384 }, { "epoch": 9.167164179104478, "grad_norm": 21.00395965576172, "learning_rate": 5.523809523809525e-06, "loss": 49.7533, "step": 385 }, { "epoch": 9.191044776119403, "grad_norm": 22.07697296142578, "learning_rate": 5.511904761904763e-06, "loss": 48.5003, "step": 386 }, { "epoch": 9.214925373134328, "grad_norm": 21.743778228759766, "learning_rate": 5.500000000000001e-06, "loss": 48.149, "step": 387 }, { "epoch": 9.238805970149254, "grad_norm": 23.499980926513672, "learning_rate": 5.4880952380952394e-06, "loss": 48.2213, "step": 388 }, { "epoch": 9.26268656716418, "grad_norm": 22.22580337524414, "learning_rate": 5.476190476190477e-06, "loss": 48.4671, "step": 389 }, { "epoch": 9.286567164179104, "grad_norm": 26.5915470123291, "learning_rate": 5.464285714285714e-06, "loss": 49.2343, "step": 390 }, { "epoch": 9.31044776119403, "grad_norm": 22.510892868041992, "learning_rate": 5.452380952380952e-06, "loss": 48.9363, "step": 391 }, { "epoch": 9.334328358208955, "grad_norm": 27.17405128479004, "learning_rate": 5.44047619047619e-06, "loss": 49.1814, "step": 392 }, { "epoch": 9.35820895522388, "grad_norm": 29.143529891967773, "learning_rate": 5.428571428571429e-06, "loss": 48.4786, "step": 393 }, { "epoch": 9.382089552238806, "grad_norm": 20.24784278869629, "learning_rate": 5.416666666666667e-06, "loss": 49.2987, "step": 394 }, { "epoch": 9.405970149253731, "grad_norm": 31.44426155090332, "learning_rate": 5.404761904761905e-06, "loss": 49.9466, "step": 395 }, { "epoch": 9.429850746268656, "grad_norm": 23.775951385498047, "learning_rate": 5.392857142857143e-06, "loss": 49.1681, "step": 396 }, { "epoch": 9.453731343283582, "grad_norm": 22.168636322021484, "learning_rate": 5.380952380952381e-06, "loss": 48.8523, "step": 397 }, { "epoch": 9.477611940298507, "grad_norm": 20.944936752319336, "learning_rate": 5.369047619047619e-06, "loss": 48.7369, "step": 398 }, { "epoch": 9.501492537313434, "grad_norm": 23.880292892456055, "learning_rate": 5.357142857142857e-06, "loss": 48.4703, "step": 399 }, { "epoch": 9.525373134328358, "grad_norm": 25.316978454589844, "learning_rate": 5.345238095238096e-06, "loss": 48.3752, "step": 400 }, { "epoch": 9.549253731343283, "grad_norm": 24.398311614990234, "learning_rate": 5.333333333333334e-06, "loss": 47.532, "step": 401 }, { "epoch": 9.57313432835821, "grad_norm": 23.157140731811523, "learning_rate": 5.3214285714285715e-06, "loss": 49.1824, "step": 402 }, { "epoch": 9.597014925373134, "grad_norm": 21.641061782836914, "learning_rate": 5.30952380952381e-06, "loss": 49.6601, "step": 403 }, { "epoch": 9.620895522388059, "grad_norm": 23.863712310791016, "learning_rate": 5.297619047619048e-06, "loss": 49.2146, "step": 404 }, { "epoch": 9.644776119402986, "grad_norm": 21.876007080078125, "learning_rate": 5.285714285714286e-06, "loss": 48.0027, "step": 405 }, { "epoch": 9.66865671641791, "grad_norm": 25.783042907714844, "learning_rate": 5.273809523809525e-06, "loss": 48.2702, "step": 406 }, { "epoch": 9.692537313432837, "grad_norm": 18.782087326049805, "learning_rate": 5.261904761904763e-06, "loss": 48.9365, "step": 407 }, { "epoch": 9.716417910447761, "grad_norm": 20.206588745117188, "learning_rate": 5.2500000000000006e-06, "loss": 49.4144, "step": 408 }, { "epoch": 9.740298507462686, "grad_norm": 20.98710823059082, "learning_rate": 5.2380952380952384e-06, "loss": 49.7442, "step": 409 }, { "epoch": 9.764179104477613, "grad_norm": 19.24452018737793, "learning_rate": 5.226190476190477e-06, "loss": 49.0249, "step": 410 }, { "epoch": 9.788059701492537, "grad_norm": 23.18075180053711, "learning_rate": 5.214285714285715e-06, "loss": 48.8795, "step": 411 }, { "epoch": 9.811940298507462, "grad_norm": 17.233261108398438, "learning_rate": 5.202380952380953e-06, "loss": 49.2985, "step": 412 }, { "epoch": 9.835820895522389, "grad_norm": 24.74007797241211, "learning_rate": 5.190476190476192e-06, "loss": 48.8793, "step": 413 }, { "epoch": 9.859701492537313, "grad_norm": 20.26863670349121, "learning_rate": 5.1785714285714296e-06, "loss": 49.6989, "step": 414 }, { "epoch": 9.883582089552238, "grad_norm": 26.168167114257812, "learning_rate": 5.1666666666666675e-06, "loss": 48.7413, "step": 415 }, { "epoch": 9.907462686567165, "grad_norm": 29.008501052856445, "learning_rate": 5.1547619047619045e-06, "loss": 48.7414, "step": 416 }, { "epoch": 9.93134328358209, "grad_norm": 18.459829330444336, "learning_rate": 5.142857142857142e-06, "loss": 47.7865, "step": 417 }, { "epoch": 9.955223880597014, "grad_norm": 20.898181915283203, "learning_rate": 5.130952380952381e-06, "loss": 47.7274, "step": 418 }, { "epoch": 9.97910447761194, "grad_norm": 23.5065860748291, "learning_rate": 5.119047619047619e-06, "loss": 48.471, "step": 419 }, { "epoch": 10.0, "grad_norm": 23.147043228149414, "learning_rate": 5.107142857142857e-06, "loss": 42.3971, "step": 420 }, { "epoch": 10.023880597014925, "grad_norm": 28.423707962036133, "learning_rate": 5.095238095238096e-06, "loss": 49.4977, "step": 421 }, { "epoch": 10.047761194029851, "grad_norm": 22.017820358276367, "learning_rate": 5.0833333333333335e-06, "loss": 47.0638, "step": 422 }, { "epoch": 10.071641791044776, "grad_norm": 18.173845291137695, "learning_rate": 5.071428571428571e-06, "loss": 48.338, "step": 423 }, { "epoch": 10.0955223880597, "grad_norm": 17.628551483154297, "learning_rate": 5.05952380952381e-06, "loss": 48.2847, "step": 424 }, { "epoch": 10.119402985074627, "grad_norm": 19.974040985107422, "learning_rate": 5.047619047619048e-06, "loss": 49.2284, "step": 425 }, { "epoch": 10.143283582089552, "grad_norm": 22.45549774169922, "learning_rate": 5.035714285714286e-06, "loss": 49.6345, "step": 426 }, { "epoch": 10.167164179104478, "grad_norm": 21.609479904174805, "learning_rate": 5.023809523809524e-06, "loss": 48.2098, "step": 427 }, { "epoch": 10.191044776119403, "grad_norm": 24.7137451171875, "learning_rate": 5.0119047619047625e-06, "loss": 47.9527, "step": 428 }, { "epoch": 10.214925373134328, "grad_norm": 22.888975143432617, "learning_rate": 5e-06, "loss": 49.781, "step": 429 }, { "epoch": 10.238805970149254, "grad_norm": 25.53217124938965, "learning_rate": 4.988095238095238e-06, "loss": 48.9902, "step": 430 }, { "epoch": 10.26268656716418, "grad_norm": 27.80384063720703, "learning_rate": 4.976190476190477e-06, "loss": 48.2545, "step": 431 }, { "epoch": 10.286567164179104, "grad_norm": 21.421342849731445, "learning_rate": 4.964285714285715e-06, "loss": 49.1483, "step": 432 }, { "epoch": 10.31044776119403, "grad_norm": 26.178152084350586, "learning_rate": 4.952380952380953e-06, "loss": 49.1129, "step": 433 }, { "epoch": 10.334328358208955, "grad_norm": 27.993371963500977, "learning_rate": 4.940476190476191e-06, "loss": 48.1783, "step": 434 }, { "epoch": 10.35820895522388, "grad_norm": 26.75821876525879, "learning_rate": 4.928571428571429e-06, "loss": 48.1773, "step": 435 }, { "epoch": 10.382089552238806, "grad_norm": 25.641353607177734, "learning_rate": 4.9166666666666665e-06, "loss": 48.9295, "step": 436 }, { "epoch": 10.405970149253731, "grad_norm": 23.26271629333496, "learning_rate": 4.904761904761905e-06, "loss": 49.5486, "step": 437 }, { "epoch": 10.429850746268656, "grad_norm": 23.637466430664062, "learning_rate": 4.892857142857143e-06, "loss": 48.1263, "step": 438 }, { "epoch": 10.453731343283582, "grad_norm": 29.285432815551758, "learning_rate": 4.880952380952381e-06, "loss": 48.2424, "step": 439 }, { "epoch": 10.477611940298507, "grad_norm": 29.91914939880371, "learning_rate": 4.86904761904762e-06, "loss": 48.3695, "step": 440 }, { "epoch": 10.501492537313434, "grad_norm": 25.249099731445312, "learning_rate": 4.857142857142858e-06, "loss": 48.3644, "step": 441 }, { "epoch": 10.525373134328358, "grad_norm": 22.37591552734375, "learning_rate": 4.8452380952380955e-06, "loss": 49.397, "step": 442 }, { "epoch": 10.549253731343283, "grad_norm": 22.805437088012695, "learning_rate": 4.833333333333333e-06, "loss": 48.6522, "step": 443 }, { "epoch": 10.57313432835821, "grad_norm": 21.229095458984375, "learning_rate": 4.821428571428572e-06, "loss": 47.6681, "step": 444 }, { "epoch": 10.597014925373134, "grad_norm": 23.359468460083008, "learning_rate": 4.80952380952381e-06, "loss": 48.602, "step": 445 }, { "epoch": 10.620895522388059, "grad_norm": 20.953310012817383, "learning_rate": 4.797619047619048e-06, "loss": 49.3366, "step": 446 }, { "epoch": 10.644776119402986, "grad_norm": 21.970388412475586, "learning_rate": 4.785714285714287e-06, "loss": 46.964, "step": 447 }, { "epoch": 10.66865671641791, "grad_norm": 24.282426834106445, "learning_rate": 4.7738095238095245e-06, "loss": 48.2676, "step": 448 }, { "epoch": 10.692537313432837, "grad_norm": 15.47967529296875, "learning_rate": 4.761904761904762e-06, "loss": 48.1993, "step": 449 }, { "epoch": 10.716417910447761, "grad_norm": 23.230947494506836, "learning_rate": 4.75e-06, "loss": 48.5229, "step": 450 }, { "epoch": 10.740298507462686, "grad_norm": 20.514225006103516, "learning_rate": 4.738095238095238e-06, "loss": 48.062, "step": 451 }, { "epoch": 10.764179104477613, "grad_norm": 19.060667037963867, "learning_rate": 4.726190476190476e-06, "loss": 48.3893, "step": 452 }, { "epoch": 10.788059701492537, "grad_norm": 29.78558349609375, "learning_rate": 4.714285714285715e-06, "loss": 48.9921, "step": 453 }, { "epoch": 10.811940298507462, "grad_norm": 23.262001037597656, "learning_rate": 4.702380952380953e-06, "loss": 48.5597, "step": 454 }, { "epoch": 10.835820895522389, "grad_norm": 25.83403778076172, "learning_rate": 4.6904761904761905e-06, "loss": 49.2911, "step": 455 }, { "epoch": 10.859701492537313, "grad_norm": 21.846391677856445, "learning_rate": 4.678571428571429e-06, "loss": 47.3256, "step": 456 }, { "epoch": 10.883582089552238, "grad_norm": 17.09532356262207, "learning_rate": 4.666666666666667e-06, "loss": 48.3647, "step": 457 }, { "epoch": 10.907462686567165, "grad_norm": 31.050525665283203, "learning_rate": 4.654761904761905e-06, "loss": 48.3605, "step": 458 }, { "epoch": 10.93134328358209, "grad_norm": 22.532379150390625, "learning_rate": 4.642857142857144e-06, "loss": 49.0826, "step": 459 }, { "epoch": 10.955223880597014, "grad_norm": 23.585033416748047, "learning_rate": 4.630952380952382e-06, "loss": 48.5111, "step": 460 }, { "epoch": 10.97910447761194, "grad_norm": NaN, "learning_rate": 4.6190476190476196e-06, "loss": 66.9717, "step": 461 }, { "epoch": 11.0, "grad_norm": 24.73590087890625, "learning_rate": 4.6190476190476196e-06, "loss": 41.9122, "step": 462 }, { "epoch": 11.023880597014925, "grad_norm": 27.4709415435791, "learning_rate": 4.6071428571428574e-06, "loss": 48.4682, "step": 463 }, { "epoch": 11.047761194029851, "grad_norm": 26.158245086669922, "learning_rate": 4.595238095238095e-06, "loss": 48.1845, "step": 464 }, { "epoch": 11.071641791044776, "grad_norm": 25.14693260192871, "learning_rate": 4.583333333333333e-06, "loss": 48.4229, "step": 465 }, { "epoch": 11.0955223880597, "grad_norm": 22.229764938354492, "learning_rate": 4.571428571428572e-06, "loss": 47.8876, "step": 466 }, { "epoch": 11.119402985074627, "grad_norm": 24.202686309814453, "learning_rate": 4.55952380952381e-06, "loss": 48.4304, "step": 467 }, { "epoch": 11.143283582089552, "grad_norm": 21.449726104736328, "learning_rate": 4.547619047619048e-06, "loss": 47.6457, "step": 468 }, { "epoch": 11.167164179104478, "grad_norm": 23.769763946533203, "learning_rate": 4.5357142857142865e-06, "loss": 49.1031, "step": 469 }, { "epoch": 11.191044776119403, "grad_norm": 21.20684814453125, "learning_rate": 4.523809523809524e-06, "loss": 47.6488, "step": 470 }, { "epoch": 11.214925373134328, "grad_norm": 17.992631912231445, "learning_rate": 4.511904761904762e-06, "loss": 47.9435, "step": 471 }, { "epoch": 11.238805970149254, "grad_norm": 22.017776489257812, "learning_rate": 4.5e-06, "loss": 48.5224, "step": 472 }, { "epoch": 11.26268656716418, "grad_norm": 22.98673439025879, "learning_rate": 4.488095238095239e-06, "loss": 47.9258, "step": 473 }, { "epoch": 11.286567164179104, "grad_norm": 16.146743774414062, "learning_rate": 4.476190476190477e-06, "loss": 48.3957, "step": 474 }, { "epoch": 11.31044776119403, "grad_norm": 23.30071258544922, "learning_rate": 4.464285714285715e-06, "loss": 48.5472, "step": 475 }, { "epoch": 11.334328358208955, "grad_norm": 24.949913024902344, "learning_rate": 4.4523809523809525e-06, "loss": 48.2387, "step": 476 }, { "epoch": 11.35820895522388, "grad_norm": 23.10662841796875, "learning_rate": 4.44047619047619e-06, "loss": 49.0681, "step": 477 }, { "epoch": 11.382089552238806, "grad_norm": 19.024614334106445, "learning_rate": 4.428571428571429e-06, "loss": 49.3255, "step": 478 }, { "epoch": 11.405970149253731, "grad_norm": 22.34437370300293, "learning_rate": 4.416666666666667e-06, "loss": 47.0069, "step": 479 }, { "epoch": 11.429850746268656, "grad_norm": 23.563596725463867, "learning_rate": 4.404761904761905e-06, "loss": 46.8188, "step": 480 }, { "epoch": 11.453731343283582, "grad_norm": 20.5488338470459, "learning_rate": 4.392857142857143e-06, "loss": 47.8277, "step": 481 }, { "epoch": 11.477611940298507, "grad_norm": 18.416519165039062, "learning_rate": 4.3809523809523815e-06, "loss": 48.2203, "step": 482 }, { "epoch": 11.501492537313434, "grad_norm": 28.21132469177246, "learning_rate": 4.369047619047619e-06, "loss": 48.0691, "step": 483 }, { "epoch": 11.525373134328358, "grad_norm": 21.36182975769043, "learning_rate": 4.357142857142857e-06, "loss": 48.273, "step": 484 }, { "epoch": 11.549253731343283, "grad_norm": 25.726530075073242, "learning_rate": 4.345238095238096e-06, "loss": 48.7529, "step": 485 }, { "epoch": 11.57313432835821, "grad_norm": 21.686412811279297, "learning_rate": 4.333333333333334e-06, "loss": 48.3005, "step": 486 }, { "epoch": 11.597014925373134, "grad_norm": 20.56638526916504, "learning_rate": 4.321428571428572e-06, "loss": 50.1248, "step": 487 }, { "epoch": 11.620895522388059, "grad_norm": 24.193323135375977, "learning_rate": 4.30952380952381e-06, "loss": 48.6031, "step": 488 }, { "epoch": 11.644776119402986, "grad_norm": 17.18548583984375, "learning_rate": 4.297619047619048e-06, "loss": 49.2039, "step": 489 }, { "epoch": 11.66865671641791, "grad_norm": 19.07050895690918, "learning_rate": 4.2857142857142855e-06, "loss": 48.0961, "step": 490 }, { "epoch": 11.692537313432837, "grad_norm": 19.831188201904297, "learning_rate": 4.273809523809524e-06, "loss": 48.5481, "step": 491 }, { "epoch": 11.716417910447761, "grad_norm": 23.408592224121094, "learning_rate": 4.261904761904762e-06, "loss": 48.583, "step": 492 }, { "epoch": 11.740298507462686, "grad_norm": 22.152788162231445, "learning_rate": 4.25e-06, "loss": 48.6684, "step": 493 }, { "epoch": 11.764179104477613, "grad_norm": NaN, "learning_rate": 4.238095238095239e-06, "loss": 54.7097, "step": 494 }, { "epoch": 11.788059701492537, "grad_norm": 23.1225528717041, "learning_rate": 4.238095238095239e-06, "loss": 48.5439, "step": 495 }, { "epoch": 11.811940298507462, "grad_norm": 24.673904418945312, "learning_rate": 4.226190476190477e-06, "loss": 48.2645, "step": 496 }, { "epoch": 11.835820895522389, "grad_norm": 23.318784713745117, "learning_rate": 4.2142857142857145e-06, "loss": 47.9159, "step": 497 }, { "epoch": 11.859701492537313, "grad_norm": 24.62889289855957, "learning_rate": 4.202380952380952e-06, "loss": 48.1392, "step": 498 }, { "epoch": 11.883582089552238, "grad_norm": 17.315168380737305, "learning_rate": 4.190476190476191e-06, "loss": 49.399, "step": 499 }, { "epoch": 11.907462686567165, "grad_norm": 24.458532333374023, "learning_rate": 4.178571428571429e-06, "loss": 49.2189, "step": 500 }, { "epoch": 11.93134328358209, "grad_norm": 28.294036865234375, "learning_rate": 4.166666666666667e-06, "loss": 48.4759, "step": 501 }, { "epoch": 11.955223880597014, "grad_norm": 22.393577575683594, "learning_rate": 4.154761904761906e-06, "loss": 48.9718, "step": 502 }, { "epoch": 11.97910447761194, "grad_norm": 20.199522018432617, "learning_rate": 4.1428571428571435e-06, "loss": 47.5364, "step": 503 }, { "epoch": 12.0, "grad_norm": 22.080204010009766, "learning_rate": 4.130952380952381e-06, "loss": 42.5308, "step": 504 }, { "epoch": 12.023880597014925, "grad_norm": 28.897024154663086, "learning_rate": 4.119047619047619e-06, "loss": 48.9022, "step": 505 }, { "epoch": 12.047761194029851, "grad_norm": 28.31342887878418, "learning_rate": 4.107142857142857e-06, "loss": 47.6489, "step": 506 }, { "epoch": 12.071641791044776, "grad_norm": 22.62079620361328, "learning_rate": 4.095238095238096e-06, "loss": 48.1606, "step": 507 }, { "epoch": 12.0955223880597, "grad_norm": 33.49858474731445, "learning_rate": 4.083333333333334e-06, "loss": 47.8462, "step": 508 }, { "epoch": 12.119402985074627, "grad_norm": 22.20858383178711, "learning_rate": 4.071428571428572e-06, "loss": 47.2505, "step": 509 }, { "epoch": 12.143283582089552, "grad_norm": 25.425495147705078, "learning_rate": 4.0595238095238095e-06, "loss": 48.6289, "step": 510 }, { "epoch": 12.167164179104478, "grad_norm": 29.32784652709961, "learning_rate": 4.047619047619048e-06, "loss": 47.7772, "step": 511 }, { "epoch": 12.191044776119403, "grad_norm": 20.661781311035156, "learning_rate": 4.035714285714286e-06, "loss": 47.1414, "step": 512 }, { "epoch": 12.214925373134328, "grad_norm": 31.4210205078125, "learning_rate": 4.023809523809524e-06, "loss": 47.4312, "step": 513 }, { "epoch": 12.238805970149254, "grad_norm": 32.390071868896484, "learning_rate": 4.011904761904763e-06, "loss": 49.9899, "step": 514 }, { "epoch": 12.26268656716418, "grad_norm": 17.431835174560547, "learning_rate": 4.000000000000001e-06, "loss": 48.8975, "step": 515 }, { "epoch": 12.286567164179104, "grad_norm": 29.32766342163086, "learning_rate": 3.9880952380952386e-06, "loss": 48.8764, "step": 516 }, { "epoch": 12.31044776119403, "grad_norm": 29.523069381713867, "learning_rate": 3.9761904761904764e-06, "loss": 48.2602, "step": 517 }, { "epoch": 12.334328358208955, "grad_norm": 23.866840362548828, "learning_rate": 3.964285714285714e-06, "loss": 47.4016, "step": 518 }, { "epoch": 12.35820895522388, "grad_norm": 27.464962005615234, "learning_rate": 3.952380952380952e-06, "loss": 48.2559, "step": 519 }, { "epoch": 12.382089552238806, "grad_norm": 19.796552658081055, "learning_rate": 3.940476190476191e-06, "loss": 48.7665, "step": 520 }, { "epoch": 12.405970149253731, "grad_norm": 18.637983322143555, "learning_rate": 3.928571428571429e-06, "loss": 48.1456, "step": 521 }, { "epoch": 12.429850746268656, "grad_norm": 22.065799713134766, "learning_rate": 3.916666666666667e-06, "loss": 48.7803, "step": 522 }, { "epoch": 12.453731343283582, "grad_norm": 22.648218154907227, "learning_rate": 3.9047619047619055e-06, "loss": 47.3376, "step": 523 }, { "epoch": 12.477611940298507, "grad_norm": 17.55946922302246, "learning_rate": 3.892857142857143e-06, "loss": 47.6002, "step": 524 }, { "epoch": 12.501492537313434, "grad_norm": 19.173139572143555, "learning_rate": 3.880952380952381e-06, "loss": 48.8976, "step": 525 }, { "epoch": 12.525373134328358, "grad_norm": 24.052696228027344, "learning_rate": 3.869047619047619e-06, "loss": 48.1851, "step": 526 }, { "epoch": 12.549253731343283, "grad_norm": 19.28683090209961, "learning_rate": 3.857142857142858e-06, "loss": 48.0342, "step": 527 }, { "epoch": 12.57313432835821, "grad_norm": 21.528470993041992, "learning_rate": 3.845238095238096e-06, "loss": 49.3597, "step": 528 }, { "epoch": 12.597014925373134, "grad_norm": 22.880159378051758, "learning_rate": 3.833333333333334e-06, "loss": 47.9594, "step": 529 }, { "epoch": 12.620895522388059, "grad_norm": 19.00438117980957, "learning_rate": 3.8214285714285715e-06, "loss": 47.2837, "step": 530 }, { "epoch": 12.644776119402986, "grad_norm": 22.21845054626465, "learning_rate": 3.80952380952381e-06, "loss": 47.1453, "step": 531 }, { "epoch": 12.66865671641791, "grad_norm": 18.551712036132812, "learning_rate": 3.7976190476190477e-06, "loss": 47.9594, "step": 532 }, { "epoch": 12.692537313432837, "grad_norm": 17.805360794067383, "learning_rate": 3.785714285714286e-06, "loss": 49.1036, "step": 533 }, { "epoch": 12.716417910447761, "grad_norm": 14.508918762207031, "learning_rate": 3.773809523809524e-06, "loss": 48.1203, "step": 534 }, { "epoch": 12.740298507462686, "grad_norm": 19.395994186401367, "learning_rate": 3.761904761904762e-06, "loss": 47.7891, "step": 535 }, { "epoch": 12.764179104477613, "grad_norm": 27.492908477783203, "learning_rate": 3.7500000000000005e-06, "loss": 48.9027, "step": 536 }, { "epoch": 12.788059701492537, "grad_norm": 21.751968383789062, "learning_rate": 3.7380952380952384e-06, "loss": 48.0929, "step": 537 }, { "epoch": 12.811940298507462, "grad_norm": 24.78274917602539, "learning_rate": 3.7261904761904767e-06, "loss": 48.1678, "step": 538 }, { "epoch": 12.835820895522389, "grad_norm": 26.319196701049805, "learning_rate": 3.7142857142857146e-06, "loss": 49.1874, "step": 539 }, { "epoch": 12.859701492537313, "grad_norm": 20.670148849487305, "learning_rate": 3.702380952380953e-06, "loss": 48.8441, "step": 540 }, { "epoch": 12.883582089552238, "grad_norm": 23.578706741333008, "learning_rate": 3.690476190476191e-06, "loss": 47.1627, "step": 541 }, { "epoch": 12.907462686567165, "grad_norm": 23.807973861694336, "learning_rate": 3.678571428571429e-06, "loss": 47.493, "step": 542 }, { "epoch": 12.93134328358209, "grad_norm": 20.977373123168945, "learning_rate": 3.6666666666666666e-06, "loss": 49.3489, "step": 543 }, { "epoch": 12.955223880597014, "grad_norm": 21.219995498657227, "learning_rate": 3.654761904761905e-06, "loss": 49.8562, "step": 544 }, { "epoch": 12.97910447761194, "grad_norm": 17.777210235595703, "learning_rate": 3.642857142857143e-06, "loss": 48.4018, "step": 545 }, { "epoch": 13.0, "grad_norm": 17.52475929260254, "learning_rate": 3.630952380952381e-06, "loss": 42.3621, "step": 546 }, { "epoch": 13.023880597014925, "grad_norm": 23.431884765625, "learning_rate": 3.6190476190476194e-06, "loss": 49.0982, "step": 547 }, { "epoch": 13.047761194029851, "grad_norm": 25.512338638305664, "learning_rate": 3.6071428571428573e-06, "loss": 47.5758, "step": 548 }, { "epoch": 13.071641791044776, "grad_norm": 25.41205406188965, "learning_rate": 3.5952380952380956e-06, "loss": 49.0519, "step": 549 }, { "epoch": 13.0955223880597, "grad_norm": 20.511945724487305, "learning_rate": 3.5833333333333335e-06, "loss": 48.9739, "step": 550 }, { "epoch": 13.119402985074627, "grad_norm": 18.88302993774414, "learning_rate": 3.5714285714285718e-06, "loss": 47.0551, "step": 551 }, { "epoch": 13.143283582089552, "grad_norm": 17.176782608032227, "learning_rate": 3.55952380952381e-06, "loss": 48.0771, "step": 552 }, { "epoch": 13.167164179104478, "grad_norm": 19.72154426574707, "learning_rate": 3.547619047619048e-06, "loss": 49.5084, "step": 553 }, { "epoch": 13.191044776119403, "grad_norm": 24.780994415283203, "learning_rate": 3.5357142857142863e-06, "loss": 46.5557, "step": 554 }, { "epoch": 13.214925373134328, "grad_norm": 20.380996704101562, "learning_rate": 3.523809523809524e-06, "loss": 48.841, "step": 555 }, { "epoch": 13.238805970149254, "grad_norm": 26.90860939025879, "learning_rate": 3.511904761904762e-06, "loss": 47.6185, "step": 556 }, { "epoch": 13.26268656716418, "grad_norm": 21.40388298034668, "learning_rate": 3.5e-06, "loss": 47.787, "step": 557 }, { "epoch": 13.286567164179104, "grad_norm": 24.708845138549805, "learning_rate": 3.4880952380952383e-06, "loss": 47.1974, "step": 558 }, { "epoch": 13.31044776119403, "grad_norm": 25.317148208618164, "learning_rate": 3.476190476190476e-06, "loss": 49.2282, "step": 559 }, { "epoch": 13.334328358208955, "grad_norm": 22.903011322021484, "learning_rate": 3.4642857142857145e-06, "loss": 47.0762, "step": 560 }, { "epoch": 13.35820895522388, "grad_norm": 23.626604080200195, "learning_rate": 3.4523809523809528e-06, "loss": 47.3622, "step": 561 }, { "epoch": 13.382089552238806, "grad_norm": 16.69061279296875, "learning_rate": 3.4404761904761907e-06, "loss": 48.5621, "step": 562 }, { "epoch": 13.405970149253731, "grad_norm": 20.52508544921875, "learning_rate": 3.428571428571429e-06, "loss": 47.6565, "step": 563 }, { "epoch": 13.429850746268656, "grad_norm": 25.125743865966797, "learning_rate": 3.416666666666667e-06, "loss": 48.1353, "step": 564 }, { "epoch": 13.453731343283582, "grad_norm": 20.697166442871094, "learning_rate": 3.404761904761905e-06, "loss": 47.9368, "step": 565 }, { "epoch": 13.477611940298507, "grad_norm": 22.396892547607422, "learning_rate": 3.3928571428571435e-06, "loss": 48.2956, "step": 566 }, { "epoch": 13.501492537313434, "grad_norm": 24.770437240600586, "learning_rate": 3.3809523809523814e-06, "loss": 48.4467, "step": 567 }, { "epoch": 13.525373134328358, "grad_norm": 19.44706153869629, "learning_rate": 3.3690476190476197e-06, "loss": 48.3155, "step": 568 }, { "epoch": 13.549253731343283, "grad_norm": 27.680660247802734, "learning_rate": 3.357142857142857e-06, "loss": 47.9039, "step": 569 }, { "epoch": 13.57313432835821, "grad_norm": 21.89419174194336, "learning_rate": 3.3452380952380954e-06, "loss": 47.9416, "step": 570 }, { "epoch": 13.597014925373134, "grad_norm": 19.10918426513672, "learning_rate": 3.3333333333333333e-06, "loss": 48.45, "step": 571 }, { "epoch": 13.620895522388059, "grad_norm": 29.83106231689453, "learning_rate": 3.3214285714285716e-06, "loss": 48.9583, "step": 572 }, { "epoch": 13.644776119402986, "grad_norm": 28.05882453918457, "learning_rate": 3.3095238095238095e-06, "loss": 49.108, "step": 573 }, { "epoch": 13.66865671641791, "grad_norm": 17.379384994506836, "learning_rate": 3.297619047619048e-06, "loss": 48.4707, "step": 574 }, { "epoch": 13.692537313432837, "grad_norm": 19.15117645263672, "learning_rate": 3.285714285714286e-06, "loss": 48.476, "step": 575 }, { "epoch": 13.716417910447761, "grad_norm": 23.892152786254883, "learning_rate": 3.273809523809524e-06, "loss": 48.0321, "step": 576 }, { "epoch": 13.740298507462686, "grad_norm": 18.658008575439453, "learning_rate": 3.2619047619047623e-06, "loss": 47.2192, "step": 577 }, { "epoch": 13.764179104477613, "grad_norm": 16.940099716186523, "learning_rate": 3.2500000000000002e-06, "loss": 49.2263, "step": 578 }, { "epoch": 13.788059701492537, "grad_norm": 25.7972412109375, "learning_rate": 3.2380952380952385e-06, "loss": 47.5039, "step": 579 }, { "epoch": 13.811940298507462, "grad_norm": 28.928129196166992, "learning_rate": 3.226190476190477e-06, "loss": 47.9264, "step": 580 }, { "epoch": 13.835820895522389, "grad_norm": 23.67597007751465, "learning_rate": 3.2142857142857147e-06, "loss": 49.1464, "step": 581 }, { "epoch": 13.859701492537313, "grad_norm": 18.345443725585938, "learning_rate": 3.202380952380952e-06, "loss": 47.888, "step": 582 }, { "epoch": 13.883582089552238, "grad_norm": 19.80716896057129, "learning_rate": 3.1904761904761905e-06, "loss": 47.4324, "step": 583 }, { "epoch": 13.907462686567165, "grad_norm": 20.488346099853516, "learning_rate": 3.178571428571429e-06, "loss": 48.3033, "step": 584 }, { "epoch": 13.93134328358209, "grad_norm": 22.3657283782959, "learning_rate": 3.1666666666666667e-06, "loss": 48.1474, "step": 585 }, { "epoch": 13.955223880597014, "grad_norm": 17.457408905029297, "learning_rate": 3.154761904761905e-06, "loss": 47.2418, "step": 586 }, { "epoch": 13.97910447761194, "grad_norm": NaN, "learning_rate": 3.142857142857143e-06, "loss": 54.1812, "step": 587 }, { "epoch": 14.0, "grad_norm": 17.137672424316406, "learning_rate": 3.142857142857143e-06, "loss": 42.3703, "step": 588 }, { "epoch": 14.023880597014925, "grad_norm": 20.55642318725586, "learning_rate": 3.130952380952381e-06, "loss": 49.4628, "step": 589 }, { "epoch": 14.047761194029851, "grad_norm": 19.925596237182617, "learning_rate": 3.1190476190476195e-06, "loss": 47.5266, "step": 590 }, { "epoch": 14.071641791044776, "grad_norm": 12.49276065826416, "learning_rate": 3.1071428571428574e-06, "loss": 47.8654, "step": 591 }, { "epoch": 14.0955223880597, "grad_norm": 17.266550064086914, "learning_rate": 3.0952380952380957e-06, "loss": 48.4362, "step": 592 }, { "epoch": 14.119402985074627, "grad_norm": 18.234397888183594, "learning_rate": 3.0833333333333336e-06, "loss": 48.9532, "step": 593 }, { "epoch": 14.143283582089552, "grad_norm": 19.880165100097656, "learning_rate": 3.071428571428572e-06, "loss": 48.0088, "step": 594 }, { "epoch": 14.167164179104478, "grad_norm": 23.04216766357422, "learning_rate": 3.05952380952381e-06, "loss": 48.0934, "step": 595 }, { "epoch": 14.191044776119403, "grad_norm": 19.199676513671875, "learning_rate": 3.047619047619048e-06, "loss": 48.3845, "step": 596 }, { "epoch": 14.214925373134328, "grad_norm": 20.758337020874023, "learning_rate": 3.0357142857142856e-06, "loss": 47.4652, "step": 597 }, { "epoch": 14.238805970149254, "grad_norm": 17.532787322998047, "learning_rate": 3.023809523809524e-06, "loss": 48.0212, "step": 598 }, { "epoch": 14.26268656716418, "grad_norm": 16.547094345092773, "learning_rate": 3.011904761904762e-06, "loss": 48.6113, "step": 599 }, { "epoch": 14.286567164179104, "grad_norm": 16.324464797973633, "learning_rate": 3e-06, "loss": 47.9735, "step": 600 }, { "epoch": 14.31044776119403, "grad_norm": 16.54167938232422, "learning_rate": 2.9880952380952384e-06, "loss": 47.4436, "step": 601 }, { "epoch": 14.334328358208955, "grad_norm": 23.455759048461914, "learning_rate": 2.9761904761904763e-06, "loss": 47.6631, "step": 602 }, { "epoch": 14.35820895522388, "grad_norm": 19.159008026123047, "learning_rate": 2.9642857142857146e-06, "loss": 48.0291, "step": 603 }, { "epoch": 14.382089552238806, "grad_norm": 18.66881561279297, "learning_rate": 2.9523809523809525e-06, "loss": 46.4582, "step": 604 }, { "epoch": 14.405970149253731, "grad_norm": 19.129064559936523, "learning_rate": 2.9404761904761908e-06, "loss": 49.4455, "step": 605 }, { "epoch": 14.429850746268656, "grad_norm": NaN, "learning_rate": 2.928571428571429e-06, "loss": 78.6564, "step": 606 }, { "epoch": 14.453731343283582, "grad_norm": 18.47364044189453, "learning_rate": 2.928571428571429e-06, "loss": 48.1748, "step": 607 }, { "epoch": 14.477611940298507, "grad_norm": 17.920883178710938, "learning_rate": 2.916666666666667e-06, "loss": 47.6447, "step": 608 }, { "epoch": 14.501492537313434, "grad_norm": 18.263038635253906, "learning_rate": 2.9047619047619053e-06, "loss": 48.7324, "step": 609 }, { "epoch": 14.525373134328358, "grad_norm": 24.323266983032227, "learning_rate": 2.892857142857143e-06, "loss": 48.6135, "step": 610 }, { "epoch": 14.549253731343283, "grad_norm": 21.56492042541504, "learning_rate": 2.880952380952381e-06, "loss": 47.0007, "step": 611 }, { "epoch": 14.57313432835821, "grad_norm": 17.741748809814453, "learning_rate": 2.869047619047619e-06, "loss": 46.3136, "step": 612 }, { "epoch": 14.597014925373134, "grad_norm": 17.218914031982422, "learning_rate": 2.8571428571428573e-06, "loss": 47.6417, "step": 613 }, { "epoch": 14.620895522388059, "grad_norm": 22.856996536254883, "learning_rate": 2.8452380952380956e-06, "loss": 47.6898, "step": 614 }, { "epoch": 14.644776119402986, "grad_norm": NaN, "learning_rate": 2.8333333333333335e-06, "loss": 53.529, "step": 615 }, { "epoch": 14.66865671641791, "grad_norm": 23.29751968383789, "learning_rate": 2.8333333333333335e-06, "loss": 48.164, "step": 616 }, { "epoch": 14.692537313432837, "grad_norm": 15.633321762084961, "learning_rate": 2.8214285714285718e-06, "loss": 46.9866, "step": 617 }, { "epoch": 14.716417910447761, "grad_norm": 21.713376998901367, "learning_rate": 2.8095238095238096e-06, "loss": 48.2856, "step": 618 }, { "epoch": 14.740298507462686, "grad_norm": 17.07369613647461, "learning_rate": 2.797619047619048e-06, "loss": 46.4404, "step": 619 }, { "epoch": 14.764179104477613, "grad_norm": 14.855449676513672, "learning_rate": 2.785714285714286e-06, "loss": 48.2668, "step": 620 }, { "epoch": 14.788059701492537, "grad_norm": 16.479616165161133, "learning_rate": 2.773809523809524e-06, "loss": 49.2661, "step": 621 }, { "epoch": 14.811940298507462, "grad_norm": 14.471490859985352, "learning_rate": 2.7619047619047625e-06, "loss": 47.0484, "step": 622 }, { "epoch": 14.835820895522389, "grad_norm": 19.018714904785156, "learning_rate": 2.7500000000000004e-06, "loss": 49.2253, "step": 623 }, { "epoch": 14.859701492537313, "grad_norm": 16.21799087524414, "learning_rate": 2.7380952380952387e-06, "loss": 49.0738, "step": 624 }, { "epoch": 14.883582089552238, "grad_norm": 20.86383628845215, "learning_rate": 2.726190476190476e-06, "loss": 48.4231, "step": 625 }, { "epoch": 14.907462686567165, "grad_norm": 20.60930633544922, "learning_rate": 2.7142857142857144e-06, "loss": 47.7464, "step": 626 }, { "epoch": 14.93134328358209, "grad_norm": 20.909135818481445, "learning_rate": 2.7023809523809523e-06, "loss": 48.519, "step": 627 }, { "epoch": 14.955223880597014, "grad_norm": 18.555694580078125, "learning_rate": 2.6904761904761906e-06, "loss": 48.1625, "step": 628 }, { "epoch": 14.97910447761194, "grad_norm": 18.154813766479492, "learning_rate": 2.6785714285714285e-06, "loss": 48.9444, "step": 629 }, { "epoch": 15.0, "grad_norm": 19.308523178100586, "learning_rate": 2.666666666666667e-06, "loss": 42.1936, "step": 630 }, { "epoch": 15.023880597014925, "grad_norm": 22.725357055664062, "learning_rate": 2.654761904761905e-06, "loss": 49.5597, "step": 631 }, { "epoch": 15.047761194029851, "grad_norm": 18.862451553344727, "learning_rate": 2.642857142857143e-06, "loss": 46.914, "step": 632 }, { "epoch": 15.071641791044776, "grad_norm": 19.017065048217773, "learning_rate": 2.6309523809523813e-06, "loss": 47.7233, "step": 633 }, { "epoch": 15.0955223880597, "grad_norm": 19.03627586364746, "learning_rate": 2.6190476190476192e-06, "loss": 48.0406, "step": 634 }, { "epoch": 15.119402985074627, "grad_norm": 18.53116798400879, "learning_rate": 2.6071428571428575e-06, "loss": 47.3259, "step": 635 }, { "epoch": 15.143283582089552, "grad_norm": 19.265275955200195, "learning_rate": 2.595238095238096e-06, "loss": 47.2465, "step": 636 }, { "epoch": 15.167164179104478, "grad_norm": 19.497289657592773, "learning_rate": 2.5833333333333337e-06, "loss": 48.5984, "step": 637 }, { "epoch": 15.191044776119403, "grad_norm": 20.183780670166016, "learning_rate": 2.571428571428571e-06, "loss": 46.6221, "step": 638 }, { "epoch": 15.214925373134328, "grad_norm": 22.911672592163086, "learning_rate": 2.5595238095238095e-06, "loss": 48.0178, "step": 639 }, { "epoch": 15.238805970149254, "grad_norm": 20.678709030151367, "learning_rate": 2.547619047619048e-06, "loss": 47.0322, "step": 640 }, { "epoch": 15.26268656716418, "grad_norm": 18.579042434692383, "learning_rate": 2.5357142857142857e-06, "loss": 48.1428, "step": 641 }, { "epoch": 15.286567164179104, "grad_norm": 23.61576271057129, "learning_rate": 2.523809523809524e-06, "loss": 48.444, "step": 642 }, { "epoch": 15.31044776119403, "grad_norm": 19.602746963500977, "learning_rate": 2.511904761904762e-06, "loss": 48.2582, "step": 643 }, { "epoch": 15.334328358208955, "grad_norm": 12.509607315063477, "learning_rate": 2.5e-06, "loss": 48.8531, "step": 644 }, { "epoch": 15.35820895522388, "grad_norm": 18.749767303466797, "learning_rate": 2.4880952380952385e-06, "loss": 47.6453, "step": 645 }, { "epoch": 15.382089552238806, "grad_norm": 20.612041473388672, "learning_rate": 2.4761904761904764e-06, "loss": 48.7038, "step": 646 }, { "epoch": 15.405970149253731, "grad_norm": 18.65719985961914, "learning_rate": 2.4642857142857147e-06, "loss": 47.7954, "step": 647 }, { "epoch": 15.429850746268656, "grad_norm": 22.636686325073242, "learning_rate": 2.4523809523809526e-06, "loss": 48.1164, "step": 648 }, { "epoch": 15.453731343283582, "grad_norm": 20.93446922302246, "learning_rate": 2.4404761904761905e-06, "loss": 48.5955, "step": 649 }, { "epoch": 15.477611940298507, "grad_norm": 20.77125358581543, "learning_rate": 2.428571428571429e-06, "loss": 48.4369, "step": 650 }, { "epoch": 15.501492537313434, "grad_norm": 17.003498077392578, "learning_rate": 2.4166666666666667e-06, "loss": 49.0355, "step": 651 }, { "epoch": 15.525373134328358, "grad_norm": 20.743436813354492, "learning_rate": 2.404761904761905e-06, "loss": 47.8368, "step": 652 }, { "epoch": 15.549253731343283, "grad_norm": NaN, "learning_rate": 2.3928571428571433e-06, "loss": 41.6371, "step": 653 }, { "epoch": 15.57313432835821, "grad_norm": 21.716781616210938, "learning_rate": 2.3928571428571433e-06, "loss": 48.5806, "step": 654 }, { "epoch": 15.597014925373134, "grad_norm": 18.8812198638916, "learning_rate": 2.380952380952381e-06, "loss": 49.0707, "step": 655 }, { "epoch": 15.620895522388059, "grad_norm": 22.305049896240234, "learning_rate": 2.369047619047619e-06, "loss": 47.7556, "step": 656 }, { "epoch": 15.644776119402986, "grad_norm": 20.51401710510254, "learning_rate": 2.3571428571428574e-06, "loss": 48.1588, "step": 657 }, { "epoch": 15.66865671641791, "grad_norm": 17.691770553588867, "learning_rate": 2.3452380952380953e-06, "loss": 47.5187, "step": 658 }, { "epoch": 15.692537313432837, "grad_norm": 22.343585968017578, "learning_rate": 2.3333333333333336e-06, "loss": 47.5725, "step": 659 }, { "epoch": 15.716417910447761, "grad_norm": 21.656587600708008, "learning_rate": 2.321428571428572e-06, "loss": 47.6903, "step": 660 }, { "epoch": 15.740298507462686, "grad_norm": 20.632055282592773, "learning_rate": 2.3095238095238098e-06, "loss": 47.4526, "step": 661 }, { "epoch": 15.764179104477613, "grad_norm": 22.324811935424805, "learning_rate": 2.2976190476190477e-06, "loss": 47.3316, "step": 662 }, { "epoch": 15.788059701492537, "grad_norm": 19.320737838745117, "learning_rate": 2.285714285714286e-06, "loss": 48.2315, "step": 663 }, { "epoch": 15.811940298507462, "grad_norm": 18.58050537109375, "learning_rate": 2.273809523809524e-06, "loss": 47.9147, "step": 664 }, { "epoch": 15.835820895522389, "grad_norm": 20.37384796142578, "learning_rate": 2.261904761904762e-06, "loss": 47.5874, "step": 665 }, { "epoch": 15.859701492537313, "grad_norm": 20.893856048583984, "learning_rate": 2.25e-06, "loss": 49.225, "step": 666 }, { "epoch": 15.883582089552238, "grad_norm": 18.4589786529541, "learning_rate": 2.2380952380952384e-06, "loss": 47.3042, "step": 667 }, { "epoch": 15.907462686567165, "grad_norm": 20.845996856689453, "learning_rate": 2.2261904761904763e-06, "loss": 47.3255, "step": 668 }, { "epoch": 15.93134328358209, "grad_norm": 20.149137496948242, "learning_rate": 2.2142857142857146e-06, "loss": 48.6543, "step": 669 }, { "epoch": 15.955223880597014, "grad_norm": 14.768882751464844, "learning_rate": 2.2023809523809525e-06, "loss": 46.8274, "step": 670 }, { "epoch": 15.97910447761194, "grad_norm": 26.926074981689453, "learning_rate": 2.1904761904761908e-06, "loss": 48.2035, "step": 671 }, { "epoch": 16.0, "grad_norm": 22.840618133544922, "learning_rate": 2.1785714285714286e-06, "loss": 42.8242, "step": 672 }, { "epoch": 16.023880597014927, "grad_norm": 16.183008193969727, "learning_rate": 2.166666666666667e-06, "loss": 47.8309, "step": 673 }, { "epoch": 16.04776119402985, "grad_norm": 20.603744506835938, "learning_rate": 2.154761904761905e-06, "loss": 48.5197, "step": 674 }, { "epoch": 16.071641791044776, "grad_norm": 26.492107391357422, "learning_rate": 2.1428571428571427e-06, "loss": 47.2312, "step": 675 }, { "epoch": 16.095522388059702, "grad_norm": 19.786901473999023, "learning_rate": 2.130952380952381e-06, "loss": 49.6201, "step": 676 }, { "epoch": 16.119402985074625, "grad_norm": 18.150909423828125, "learning_rate": 2.1190476190476194e-06, "loss": 48.7407, "step": 677 }, { "epoch": 16.143283582089552, "grad_norm": 18.797983169555664, "learning_rate": 2.1071428571428572e-06, "loss": 47.0801, "step": 678 }, { "epoch": 16.16716417910448, "grad_norm": 16.088953018188477, "learning_rate": 2.0952380952380955e-06, "loss": 47.6509, "step": 679 }, { "epoch": 16.1910447761194, "grad_norm": 20.359085083007812, "learning_rate": 2.0833333333333334e-06, "loss": 48.9226, "step": 680 }, { "epoch": 16.214925373134328, "grad_norm": 21.99265480041504, "learning_rate": 2.0714285714285717e-06, "loss": 47.3775, "step": 681 }, { "epoch": 16.238805970149254, "grad_norm": 18.616743087768555, "learning_rate": 2.0595238095238096e-06, "loss": 45.8448, "step": 682 }, { "epoch": 16.262686567164177, "grad_norm": 19.6337947845459, "learning_rate": 2.047619047619048e-06, "loss": 48.2077, "step": 683 }, { "epoch": 16.286567164179104, "grad_norm": 23.881439208984375, "learning_rate": 2.035714285714286e-06, "loss": 48.6796, "step": 684 }, { "epoch": 16.31044776119403, "grad_norm": 19.665023803710938, "learning_rate": 2.023809523809524e-06, "loss": 48.7275, "step": 685 }, { "epoch": 16.334328358208957, "grad_norm": 18.438793182373047, "learning_rate": 2.011904761904762e-06, "loss": 49.585, "step": 686 }, { "epoch": 16.35820895522388, "grad_norm": 17.073816299438477, "learning_rate": 2.0000000000000003e-06, "loss": 47.4548, "step": 687 }, { "epoch": 16.382089552238806, "grad_norm": 20.504276275634766, "learning_rate": 1.9880952380952382e-06, "loss": 47.3555, "step": 688 }, { "epoch": 16.405970149253733, "grad_norm": 21.564546585083008, "learning_rate": 1.976190476190476e-06, "loss": 47.6304, "step": 689 }, { "epoch": 16.429850746268656, "grad_norm": 16.773197174072266, "learning_rate": 1.9642857142857144e-06, "loss": 49.078, "step": 690 }, { "epoch": 16.453731343283582, "grad_norm": 22.77934455871582, "learning_rate": 1.9523809523809527e-06, "loss": 47.8289, "step": 691 }, { "epoch": 16.47761194029851, "grad_norm": 17.375993728637695, "learning_rate": 1.9404761904761906e-06, "loss": 48.4812, "step": 692 }, { "epoch": 16.501492537313432, "grad_norm": 21.407329559326172, "learning_rate": 1.928571428571429e-06, "loss": 48.2934, "step": 693 }, { "epoch": 16.52537313432836, "grad_norm": 15.673316955566406, "learning_rate": 1.916666666666667e-06, "loss": 46.7304, "step": 694 }, { "epoch": 16.549253731343285, "grad_norm": 24.577089309692383, "learning_rate": 1.904761904761905e-06, "loss": 47.9352, "step": 695 }, { "epoch": 16.573134328358208, "grad_norm": 24.46076774597168, "learning_rate": 1.892857142857143e-06, "loss": 48.8173, "step": 696 }, { "epoch": 16.597014925373134, "grad_norm": 14.248388290405273, "learning_rate": 1.880952380952381e-06, "loss": 48.5858, "step": 697 }, { "epoch": 16.62089552238806, "grad_norm": 16.925329208374023, "learning_rate": 1.8690476190476192e-06, "loss": 47.8278, "step": 698 }, { "epoch": 16.644776119402984, "grad_norm": 25.52614402770996, "learning_rate": 1.8571428571428573e-06, "loss": 48.1248, "step": 699 }, { "epoch": 16.66865671641791, "grad_norm": 21.011341094970703, "learning_rate": 1.8452380952380954e-06, "loss": 47.8154, "step": 700 }, { "epoch": 16.692537313432837, "grad_norm": 14.694896697998047, "learning_rate": 1.8333333333333333e-06, "loss": 47.9668, "step": 701 }, { "epoch": 16.71641791044776, "grad_norm": 22.32903480529785, "learning_rate": 1.8214285714285716e-06, "loss": 48.6784, "step": 702 }, { "epoch": 16.740298507462686, "grad_norm": 17.19482421875, "learning_rate": 1.8095238095238097e-06, "loss": 46.9973, "step": 703 }, { "epoch": 16.764179104477613, "grad_norm": 14.590733528137207, "learning_rate": 1.7976190476190478e-06, "loss": 47.2393, "step": 704 }, { "epoch": 16.788059701492536, "grad_norm": 17.131982803344727, "learning_rate": 1.7857142857142859e-06, "loss": 47.9412, "step": 705 }, { "epoch": 16.811940298507462, "grad_norm": 18.513992309570312, "learning_rate": 1.773809523809524e-06, "loss": 48.8777, "step": 706 }, { "epoch": 16.83582089552239, "grad_norm": 17.625539779663086, "learning_rate": 1.761904761904762e-06, "loss": 48.3885, "step": 707 }, { "epoch": 16.85970149253731, "grad_norm": 16.540056228637695, "learning_rate": 1.75e-06, "loss": 47.8561, "step": 708 }, { "epoch": 16.883582089552238, "grad_norm": 20.070533752441406, "learning_rate": 1.738095238095238e-06, "loss": 46.6418, "step": 709 }, { "epoch": 16.907462686567165, "grad_norm": 18.742460250854492, "learning_rate": 1.7261904761904764e-06, "loss": 46.7471, "step": 710 }, { "epoch": 16.93134328358209, "grad_norm": 17.491954803466797, "learning_rate": 1.7142857142857145e-06, "loss": 47.5558, "step": 711 }, { "epoch": 16.955223880597014, "grad_norm": 17.457130432128906, "learning_rate": 1.7023809523809526e-06, "loss": 47.4441, "step": 712 }, { "epoch": 16.97910447761194, "grad_norm": 21.053844451904297, "learning_rate": 1.6904761904761907e-06, "loss": 48.1931, "step": 713 }, { "epoch": 17.0, "grad_norm": 16.943801879882812, "learning_rate": 1.6785714285714286e-06, "loss": 41.9934, "step": 714 }, { "epoch": 17.023880597014927, "grad_norm": 21.56785011291504, "learning_rate": 1.6666666666666667e-06, "loss": 47.1652, "step": 715 }, { "epoch": 17.04776119402985, "grad_norm": 21.193382263183594, "learning_rate": 1.6547619047619048e-06, "loss": 47.6751, "step": 716 }, { "epoch": 17.071641791044776, "grad_norm": 16.245115280151367, "learning_rate": 1.642857142857143e-06, "loss": 47.4133, "step": 717 }, { "epoch": 17.095522388059702, "grad_norm": 18.834646224975586, "learning_rate": 1.6309523809523812e-06, "loss": 48.145, "step": 718 }, { "epoch": 17.119402985074625, "grad_norm": 15.769698143005371, "learning_rate": 1.6190476190476193e-06, "loss": 48.1181, "step": 719 }, { "epoch": 17.143283582089552, "grad_norm": 13.460511207580566, "learning_rate": 1.6071428571428574e-06, "loss": 49.1229, "step": 720 }, { "epoch": 17.16716417910448, "grad_norm": 18.58087158203125, "learning_rate": 1.5952380952380953e-06, "loss": 47.5095, "step": 721 }, { "epoch": 17.1910447761194, "grad_norm": 18.607332229614258, "learning_rate": 1.5833333333333333e-06, "loss": 49.1334, "step": 722 }, { "epoch": 17.214925373134328, "grad_norm": 15.046488761901855, "learning_rate": 1.5714285714285714e-06, "loss": 47.6151, "step": 723 }, { "epoch": 17.238805970149254, "grad_norm": 17.442358016967773, "learning_rate": 1.5595238095238098e-06, "loss": 47.3771, "step": 724 }, { "epoch": 17.262686567164177, "grad_norm": 11.690101623535156, "learning_rate": 1.5476190476190479e-06, "loss": 48.1095, "step": 725 }, { "epoch": 17.286567164179104, "grad_norm": 17.945192337036133, "learning_rate": 1.535714285714286e-06, "loss": 47.8941, "step": 726 }, { "epoch": 17.31044776119403, "grad_norm": 13.878116607666016, "learning_rate": 1.523809523809524e-06, "loss": 47.6422, "step": 727 }, { "epoch": 17.334328358208957, "grad_norm": 15.942928314208984, "learning_rate": 1.511904761904762e-06, "loss": 46.9964, "step": 728 }, { "epoch": 17.35820895522388, "grad_norm": 13.57482624053955, "learning_rate": 1.5e-06, "loss": 47.1832, "step": 729 }, { "epoch": 17.382089552238806, "grad_norm": 13.781617164611816, "learning_rate": 1.4880952380952381e-06, "loss": 48.9621, "step": 730 }, { "epoch": 17.405970149253733, "grad_norm": 14.26857852935791, "learning_rate": 1.4761904761904762e-06, "loss": 48.6631, "step": 731 }, { "epoch": 17.429850746268656, "grad_norm": 16.23444938659668, "learning_rate": 1.4642857142857145e-06, "loss": 46.84, "step": 732 }, { "epoch": 17.453731343283582, "grad_norm": 17.442630767822266, "learning_rate": 1.4523809523809526e-06, "loss": 48.2996, "step": 733 }, { "epoch": 17.47761194029851, "grad_norm": 14.329082489013672, "learning_rate": 1.4404761904761905e-06, "loss": 47.396, "step": 734 }, { "epoch": 17.501492537313432, "grad_norm": 14.772257804870605, "learning_rate": 1.4285714285714286e-06, "loss": 48.5733, "step": 735 }, { "epoch": 17.52537313432836, "grad_norm": 14.331324577331543, "learning_rate": 1.4166666666666667e-06, "loss": 48.2969, "step": 736 }, { "epoch": 17.549253731343285, "grad_norm": 17.498600006103516, "learning_rate": 1.4047619047619048e-06, "loss": 48.0221, "step": 737 }, { "epoch": 17.573134328358208, "grad_norm": 16.155025482177734, "learning_rate": 1.392857142857143e-06, "loss": 47.9848, "step": 738 }, { "epoch": 17.597014925373134, "grad_norm": 15.552813529968262, "learning_rate": 1.3809523809523812e-06, "loss": 48.4413, "step": 739 }, { "epoch": 17.62089552238806, "grad_norm": 15.887310981750488, "learning_rate": 1.3690476190476193e-06, "loss": 47.6463, "step": 740 }, { "epoch": 17.644776119402984, "grad_norm": 17.783411026000977, "learning_rate": 1.3571428571428572e-06, "loss": 47.8009, "step": 741 }, { "epoch": 17.66865671641791, "grad_norm": 17.108932495117188, "learning_rate": 1.3452380952380953e-06, "loss": 47.9888, "step": 742 }, { "epoch": 17.692537313432837, "grad_norm": 19.79203224182129, "learning_rate": 1.3333333333333334e-06, "loss": 48.5732, "step": 743 }, { "epoch": 17.71641791044776, "grad_norm": 17.06324005126953, "learning_rate": 1.3214285714285715e-06, "loss": 48.4815, "step": 744 }, { "epoch": 17.740298507462686, "grad_norm": 17.399097442626953, "learning_rate": 1.3095238095238096e-06, "loss": 47.5591, "step": 745 }, { "epoch": 17.764179104477613, "grad_norm": 15.836935997009277, "learning_rate": 1.297619047619048e-06, "loss": 47.994, "step": 746 }, { "epoch": 17.788059701492536, "grad_norm": 18.20856475830078, "learning_rate": 1.2857142857142856e-06, "loss": 47.9979, "step": 747 }, { "epoch": 17.811940298507462, "grad_norm": 19.10239601135254, "learning_rate": 1.273809523809524e-06, "loss": 48.1196, "step": 748 }, { "epoch": 17.83582089552239, "grad_norm": 17.21087646484375, "learning_rate": 1.261904761904762e-06, "loss": 47.8816, "step": 749 }, { "epoch": 17.85970149253731, "grad_norm": 14.792268753051758, "learning_rate": 1.25e-06, "loss": 47.8182, "step": 750 }, { "epoch": 17.883582089552238, "grad_norm": 13.695488929748535, "learning_rate": 1.2380952380952382e-06, "loss": 47.4298, "step": 751 }, { "epoch": 17.907462686567165, "grad_norm": 15.197646141052246, "learning_rate": 1.2261904761904763e-06, "loss": 47.7132, "step": 752 }, { "epoch": 17.93134328358209, "grad_norm": 19.13431739807129, "learning_rate": 1.2142857142857144e-06, "loss": 48.431, "step": 753 }, { "epoch": 17.955223880597014, "grad_norm": 15.690411567687988, "learning_rate": 1.2023809523809525e-06, "loss": 47.4529, "step": 754 }, { "epoch": 17.97910447761194, "grad_norm": 14.75414752960205, "learning_rate": 1.1904761904761906e-06, "loss": 47.9668, "step": 755 }, { "epoch": 18.0, "grad_norm": 11.497115135192871, "learning_rate": 1.1785714285714287e-06, "loss": 41.8653, "step": 756 }, { "epoch": 18.023880597014927, "grad_norm": 16.20159339904785, "learning_rate": 1.1666666666666668e-06, "loss": 47.2871, "step": 757 }, { "epoch": 18.04776119402985, "grad_norm": 15.400497436523438, "learning_rate": 1.1547619047619049e-06, "loss": 46.3673, "step": 758 }, { "epoch": 18.071641791044776, "grad_norm": 12.16773509979248, "learning_rate": 1.142857142857143e-06, "loss": 47.7463, "step": 759 }, { "epoch": 18.095522388059702, "grad_norm": 19.978351593017578, "learning_rate": 1.130952380952381e-06, "loss": 47.4632, "step": 760 }, { "epoch": 18.119402985074625, "grad_norm": 14.090561866760254, "learning_rate": 1.1190476190476192e-06, "loss": 48.9356, "step": 761 }, { "epoch": 18.143283582089552, "grad_norm": 13.143173217773438, "learning_rate": 1.1071428571428573e-06, "loss": 48.1129, "step": 762 }, { "epoch": 18.16716417910448, "grad_norm": 15.609000205993652, "learning_rate": 1.0952380952380954e-06, "loss": 48.8554, "step": 763 }, { "epoch": 18.1910447761194, "grad_norm": 14.012611389160156, "learning_rate": 1.0833333333333335e-06, "loss": 47.6785, "step": 764 }, { "epoch": 18.214925373134328, "grad_norm": 13.417494773864746, "learning_rate": 1.0714285714285714e-06, "loss": 48.2733, "step": 765 }, { "epoch": 18.238805970149254, "grad_norm": 15.562864303588867, "learning_rate": 1.0595238095238097e-06, "loss": 48.0488, "step": 766 }, { "epoch": 18.262686567164177, "grad_norm": 17.083723068237305, "learning_rate": 1.0476190476190478e-06, "loss": 49.3136, "step": 767 }, { "epoch": 18.286567164179104, "grad_norm": 16.564395904541016, "learning_rate": 1.0357142857142859e-06, "loss": 48.1835, "step": 768 }, { "epoch": 18.31044776119403, "grad_norm": 14.694929122924805, "learning_rate": 1.023809523809524e-06, "loss": 48.8632, "step": 769 }, { "epoch": 18.334328358208957, "grad_norm": 13.4928560256958, "learning_rate": 1.011904761904762e-06, "loss": 47.3333, "step": 770 }, { "epoch": 18.35820895522388, "grad_norm": 12.980204582214355, "learning_rate": 1.0000000000000002e-06, "loss": 47.7961, "step": 771 }, { "epoch": 18.382089552238806, "grad_norm": 14.19666862487793, "learning_rate": 9.88095238095238e-07, "loss": 47.7806, "step": 772 }, { "epoch": 18.405970149253733, "grad_norm": 13.2017183303833, "learning_rate": 9.761904761904764e-07, "loss": 46.1119, "step": 773 }, { "epoch": 18.429850746268656, "grad_norm": 15.064650535583496, "learning_rate": 9.642857142857145e-07, "loss": 47.8105, "step": 774 }, { "epoch": 18.453731343283582, "grad_norm": 14.713834762573242, "learning_rate": 9.523809523809525e-07, "loss": 47.5723, "step": 775 }, { "epoch": 18.47761194029851, "grad_norm": 13.394201278686523, "learning_rate": 9.404761904761906e-07, "loss": 48.1632, "step": 776 }, { "epoch": 18.501492537313432, "grad_norm": 10.742532730102539, "learning_rate": 9.285714285714287e-07, "loss": 48.6118, "step": 777 }, { "epoch": 18.52537313432836, "grad_norm": 12.402650833129883, "learning_rate": 9.166666666666666e-07, "loss": 48.6597, "step": 778 }, { "epoch": 18.549253731343285, "grad_norm": 15.73616886138916, "learning_rate": 9.047619047619048e-07, "loss": 47.9931, "step": 779 }, { "epoch": 18.573134328358208, "grad_norm": 14.188780784606934, "learning_rate": 8.928571428571429e-07, "loss": 47.0081, "step": 780 }, { "epoch": 18.597014925373134, "grad_norm": 12.516701698303223, "learning_rate": 8.80952380952381e-07, "loss": 49.0287, "step": 781 }, { "epoch": 18.62089552238806, "grad_norm": 15.069429397583008, "learning_rate": 8.69047619047619e-07, "loss": 47.8721, "step": 782 }, { "epoch": 18.644776119402984, "grad_norm": 13.091047286987305, "learning_rate": 8.571428571428572e-07, "loss": 48.1678, "step": 783 }, { "epoch": 18.66865671641791, "grad_norm": 15.017065048217773, "learning_rate": 8.452380952380953e-07, "loss": 47.1277, "step": 784 }, { "epoch": 18.692537313432837, "grad_norm": 12.091531753540039, "learning_rate": 8.333333333333333e-07, "loss": 47.4962, "step": 785 }, { "epoch": 18.71641791044776, "grad_norm": 15.20182991027832, "learning_rate": 8.214285714285715e-07, "loss": 48.6224, "step": 786 }, { "epoch": 18.740298507462686, "grad_norm": 11.17827320098877, "learning_rate": 8.095238095238096e-07, "loss": 47.9759, "step": 787 }, { "epoch": 18.764179104477613, "grad_norm": 14.884525299072266, "learning_rate": 7.976190476190476e-07, "loss": 47.9749, "step": 788 }, { "epoch": 18.788059701492536, "grad_norm": 14.360984802246094, "learning_rate": 7.857142857142857e-07, "loss": 48.9952, "step": 789 }, { "epoch": 18.811940298507462, "grad_norm": 11.265621185302734, "learning_rate": 7.738095238095239e-07, "loss": 47.4274, "step": 790 }, { "epoch": 18.83582089552239, "grad_norm": 18.072290420532227, "learning_rate": 7.61904761904762e-07, "loss": 47.8815, "step": 791 }, { "epoch": 18.85970149253731, "grad_norm": 15.310029029846191, "learning_rate": 7.5e-07, "loss": 47.3907, "step": 792 }, { "epoch": 18.883582089552238, "grad_norm": 14.032752990722656, "learning_rate": 7.380952380952381e-07, "loss": 48.0883, "step": 793 }, { "epoch": 18.907462686567165, "grad_norm": 12.853668212890625, "learning_rate": 7.261904761904763e-07, "loss": 47.1452, "step": 794 }, { "epoch": 18.93134328358209, "grad_norm": 15.6067476272583, "learning_rate": 7.142857142857143e-07, "loss": 46.6303, "step": 795 }, { "epoch": 18.955223880597014, "grad_norm": 12.828201293945312, "learning_rate": 7.023809523809524e-07, "loss": 47.9885, "step": 796 }, { "epoch": 18.97910447761194, "grad_norm": 13.336589813232422, "learning_rate": 6.904761904761906e-07, "loss": 48.2315, "step": 797 }, { "epoch": 19.0, "grad_norm": 13.629434585571289, "learning_rate": 6.785714285714286e-07, "loss": 41.9374, "step": 798 }, { "epoch": 19.023880597014927, "grad_norm": 13.237930297851562, "learning_rate": 6.666666666666667e-07, "loss": 46.6802, "step": 799 }, { "epoch": 19.04776119402985, "grad_norm": 13.715863227844238, "learning_rate": 6.547619047619048e-07, "loss": 49.0494, "step": 800 }, { "epoch": 19.071641791044776, "grad_norm": 13.439970016479492, "learning_rate": 6.428571428571428e-07, "loss": 46.3647, "step": 801 }, { "epoch": 19.095522388059702, "grad_norm": 15.468942642211914, "learning_rate": 6.30952380952381e-07, "loss": 48.4725, "step": 802 }, { "epoch": 19.119402985074625, "grad_norm": 14.160257339477539, "learning_rate": 6.190476190476191e-07, "loss": 47.4033, "step": 803 }, { "epoch": 19.143283582089552, "grad_norm": 13.667155265808105, "learning_rate": 6.071428571428572e-07, "loss": 48.4729, "step": 804 }, { "epoch": 19.16716417910448, "grad_norm": 12.428313255310059, "learning_rate": 5.952380952380953e-07, "loss": 48.8939, "step": 805 }, { "epoch": 19.1910447761194, "grad_norm": 12.985882759094238, "learning_rate": 5.833333333333334e-07, "loss": 47.0663, "step": 806 }, { "epoch": 19.214925373134328, "grad_norm": 12.827404975891113, "learning_rate": 5.714285714285715e-07, "loss": 47.5614, "step": 807 }, { "epoch": 19.238805970149254, "grad_norm": 11.078653335571289, "learning_rate": 5.595238095238096e-07, "loss": 48.564, "step": 808 }, { "epoch": 19.262686567164177, "grad_norm": 13.346016883850098, "learning_rate": 5.476190476190477e-07, "loss": 48.0823, "step": 809 }, { "epoch": 19.286567164179104, "grad_norm": 14.523963928222656, "learning_rate": 5.357142857142857e-07, "loss": 48.4225, "step": 810 }, { "epoch": 19.31044776119403, "grad_norm": 12.598445892333984, "learning_rate": 5.238095238095239e-07, "loss": 47.2514, "step": 811 }, { "epoch": 19.334328358208957, "grad_norm": 12.203497886657715, "learning_rate": 5.11904761904762e-07, "loss": 47.217, "step": 812 }, { "epoch": 19.35820895522388, "grad_norm": 12.144754409790039, "learning_rate": 5.000000000000001e-07, "loss": 47.191, "step": 813 }, { "epoch": 19.382089552238806, "grad_norm": 12.585047721862793, "learning_rate": 4.880952380952382e-07, "loss": 48.4947, "step": 814 }, { "epoch": 19.405970149253733, "grad_norm": 11.295561790466309, "learning_rate": 4.7619047619047623e-07, "loss": 46.9444, "step": 815 }, { "epoch": 19.429850746268656, "grad_norm": 13.055256843566895, "learning_rate": 4.642857142857143e-07, "loss": 48.4469, "step": 816 }, { "epoch": 19.453731343283582, "grad_norm": 12.051807403564453, "learning_rate": 4.523809523809524e-07, "loss": 48.0547, "step": 817 }, { "epoch": 19.47761194029851, "grad_norm": 13.44185733795166, "learning_rate": 4.404761904761905e-07, "loss": 48.3155, "step": 818 }, { "epoch": 19.501492537313432, "grad_norm": 12.405723571777344, "learning_rate": 4.285714285714286e-07, "loss": 48.3982, "step": 819 }, { "epoch": 19.52537313432836, "grad_norm": 14.900402069091797, "learning_rate": 4.1666666666666667e-07, "loss": 48.2653, "step": 820 }, { "epoch": 19.549253731343285, "grad_norm": 10.70801067352295, "learning_rate": 4.047619047619048e-07, "loss": 48.0384, "step": 821 }, { "epoch": 19.573134328358208, "grad_norm": 12.318074226379395, "learning_rate": 3.9285714285714286e-07, "loss": 47.554, "step": 822 }, { "epoch": 19.597014925373134, "grad_norm": 12.898431777954102, "learning_rate": 3.80952380952381e-07, "loss": 48.3586, "step": 823 }, { "epoch": 19.62089552238806, "grad_norm": 15.45779800415039, "learning_rate": 3.6904761904761906e-07, "loss": 48.4193, "step": 824 }, { "epoch": 19.644776119402984, "grad_norm": 11.230570793151855, "learning_rate": 3.5714285714285716e-07, "loss": 48.5294, "step": 825 }, { "epoch": 19.66865671641791, "grad_norm": 13.647272109985352, "learning_rate": 3.452380952380953e-07, "loss": 47.2569, "step": 826 }, { "epoch": 19.692537313432837, "grad_norm": 11.521178245544434, "learning_rate": 3.3333333333333335e-07, "loss": 47.2899, "step": 827 }, { "epoch": 19.71641791044776, "grad_norm": 11.537907600402832, "learning_rate": 3.214285714285714e-07, "loss": 46.6462, "step": 828 }, { "epoch": 19.740298507462686, "grad_norm": 11.670267105102539, "learning_rate": 3.0952380952380955e-07, "loss": 47.9797, "step": 829 }, { "epoch": 19.764179104477613, "grad_norm": 11.660557746887207, "learning_rate": 2.9761904761904765e-07, "loss": 47.9744, "step": 830 }, { "epoch": 19.788059701492536, "grad_norm": 12.332269668579102, "learning_rate": 2.8571428571428575e-07, "loss": 48.6015, "step": 831 }, { "epoch": 19.811940298507462, "grad_norm": 12.228848457336426, "learning_rate": 2.7380952380952385e-07, "loss": 47.3215, "step": 832 }, { "epoch": 19.83582089552239, "grad_norm": 13.780754089355469, "learning_rate": 2.6190476190476194e-07, "loss": 48.853, "step": 833 }, { "epoch": 19.85970149253731, "grad_norm": 11.639240264892578, "learning_rate": 2.5000000000000004e-07, "loss": 48.8199, "step": 834 }, { "epoch": 19.883582089552238, "grad_norm": 10.796862602233887, "learning_rate": 2.3809523809523811e-07, "loss": 47.5373, "step": 835 }, { "epoch": 19.907462686567165, "grad_norm": 13.573180198669434, "learning_rate": 2.261904761904762e-07, "loss": 47.8368, "step": 836 }, { "epoch": 19.93134328358209, "grad_norm": 11.497776985168457, "learning_rate": 2.142857142857143e-07, "loss": 47.8226, "step": 837 }, { "epoch": 19.955223880597014, "grad_norm": 10.777889251708984, "learning_rate": 2.023809523809524e-07, "loss": 47.6424, "step": 838 }, { "epoch": 19.97910447761194, "grad_norm": 10.77852725982666, "learning_rate": 1.904761904761905e-07, "loss": 46.379, "step": 839 }, { "epoch": 20.0, "grad_norm": 13.582564353942871, "learning_rate": 1.7857142857142858e-07, "loss": 42.5239, "step": 840 }, { "epoch": 20.0, "step": 840, "total_flos": 4.130470305428237e+16, "train_loss": 49.47331008002872, "train_runtime": 26137.3223, "train_samples_per_second": 4.095, "train_steps_per_second": 0.032 }, { "epoch": 20.023880597014927, "grad_norm": 21.18770408630371, "learning_rate": 1e-05, "loss": 48.1474, "step": 841 }, { "epoch": 20.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 60.2758, "step": 842 }, { "epoch": 20.071641791044776, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 61.7211, "step": 843 }, { "epoch": 20.095522388059702, "grad_norm": 504.4407958984375, "learning_rate": 9.99404761904762e-06, "loss": 60.8189, "step": 844 }, { "epoch": 20.119402985074625, "grad_norm": 221.96849060058594, "learning_rate": 9.988095238095239e-06, "loss": 54.4658, "step": 845 }, { "epoch": 20.143283582089552, "grad_norm": 110.4036865234375, "learning_rate": 9.982142857142858e-06, "loss": 52.4242, "step": 846 }, { "epoch": 20.16716417910448, "grad_norm": 82.75493621826172, "learning_rate": 9.976190476190477e-06, "loss": 50.3129, "step": 847 }, { "epoch": 20.1910447761194, "grad_norm": 62.56040573120117, "learning_rate": 9.970238095238096e-06, "loss": 49.7171, "step": 848 }, { "epoch": 20.214925373134328, "grad_norm": 70.04007720947266, "learning_rate": 9.964285714285714e-06, "loss": 48.185, "step": 849 }, { "epoch": 20.238805970149254, "grad_norm": 56.70342254638672, "learning_rate": 9.958333333333334e-06, "loss": 49.5787, "step": 850 }, { "epoch": 20.262686567164177, "grad_norm": 64.66405487060547, "learning_rate": 9.952380952380954e-06, "loss": 49.6106, "step": 851 }, { "epoch": 20.286567164179104, "grad_norm": 43.37612533569336, "learning_rate": 9.946428571428572e-06, "loss": 49.2966, "step": 852 }, { "epoch": 20.31044776119403, "grad_norm": 42.66206359863281, "learning_rate": 9.940476190476192e-06, "loss": 48.7073, "step": 853 }, { "epoch": 20.334328358208957, "grad_norm": 37.17741775512695, "learning_rate": 9.93452380952381e-06, "loss": 48.7592, "step": 854 }, { "epoch": 20.35820895522388, "grad_norm": 39.27332305908203, "learning_rate": 9.92857142857143e-06, "loss": 48.1181, "step": 855 }, { "epoch": 20.382089552238806, "grad_norm": 31.37261390686035, "learning_rate": 9.922619047619048e-06, "loss": 47.4873, "step": 856 }, { "epoch": 20.405970149253733, "grad_norm": 41.693809509277344, "learning_rate": 9.916666666666668e-06, "loss": 48.9428, "step": 857 }, { "epoch": 20.429850746268656, "grad_norm": 29.33939552307129, "learning_rate": 9.910714285714288e-06, "loss": 49.2928, "step": 858 }, { "epoch": 20.453731343283582, "grad_norm": 30.606157302856445, "learning_rate": 9.904761904761906e-06, "loss": 49.1506, "step": 859 }, { "epoch": 20.47761194029851, "grad_norm": 27.273784637451172, "learning_rate": 9.898809523809525e-06, "loss": 46.6136, "step": 860 }, { "epoch": 20.501492537313432, "grad_norm": 24.410682678222656, "learning_rate": 9.892857142857143e-06, "loss": 48.3989, "step": 861 }, { "epoch": 20.52537313432836, "grad_norm": 24.138607025146484, "learning_rate": 9.886904761904763e-06, "loss": 49.3858, "step": 862 }, { "epoch": 20.549253731343285, "grad_norm": 27.50669288635254, "learning_rate": 9.880952380952381e-06, "loss": 48.5058, "step": 863 }, { "epoch": 20.573134328358208, "grad_norm": 27.739347457885742, "learning_rate": 9.875000000000001e-06, "loss": 49.676, "step": 864 }, { "epoch": 20.597014925373134, "grad_norm": 22.63895034790039, "learning_rate": 9.869047619047621e-06, "loss": 47.6998, "step": 865 }, { "epoch": 20.62089552238806, "grad_norm": 26.80891990661621, "learning_rate": 9.863095238095239e-06, "loss": 47.9571, "step": 866 }, { "epoch": 20.644776119402984, "grad_norm": 26.259008407592773, "learning_rate": 9.857142857142859e-06, "loss": 48.8771, "step": 867 }, { "epoch": 20.66865671641791, "grad_norm": 23.716773986816406, "learning_rate": 9.851190476190477e-06, "loss": 47.1255, "step": 868 }, { "epoch": 20.692537313432837, "grad_norm": 26.96156120300293, "learning_rate": 9.845238095238097e-06, "loss": 47.2227, "step": 869 }, { "epoch": 20.71641791044776, "grad_norm": 25.1954345703125, "learning_rate": 9.839285714285715e-06, "loss": 47.6847, "step": 870 }, { "epoch": 20.740298507462686, "grad_norm": 21.56642723083496, "learning_rate": 9.833333333333333e-06, "loss": 47.7292, "step": 871 }, { "epoch": 20.764179104477613, "grad_norm": 25.091773986816406, "learning_rate": 9.827380952380953e-06, "loss": 46.6588, "step": 872 }, { "epoch": 20.788059701492536, "grad_norm": 26.45799446105957, "learning_rate": 9.821428571428573e-06, "loss": 47.3963, "step": 873 }, { "epoch": 20.811940298507462, "grad_norm": 25.865068435668945, "learning_rate": 9.81547619047619e-06, "loss": 48.8823, "step": 874 }, { "epoch": 20.83582089552239, "grad_norm": 27.056106567382812, "learning_rate": 9.80952380952381e-06, "loss": 47.2222, "step": 875 }, { "epoch": 20.85970149253731, "grad_norm": 27.02417755126953, "learning_rate": 9.803571428571428e-06, "loss": 47.4543, "step": 876 }, { "epoch": 20.883582089552238, "grad_norm": 23.681915283203125, "learning_rate": 9.797619047619048e-06, "loss": 47.7518, "step": 877 }, { "epoch": 20.907462686567165, "grad_norm": 20.77193260192871, "learning_rate": 9.791666666666666e-06, "loss": 46.3164, "step": 878 }, { "epoch": 20.93134328358209, "grad_norm": 24.61642837524414, "learning_rate": 9.785714285714286e-06, "loss": 48.6711, "step": 879 }, { "epoch": 20.955223880597014, "grad_norm": 20.59898567199707, "learning_rate": 9.779761904761906e-06, "loss": 49.114, "step": 880 }, { "epoch": 20.97910447761194, "grad_norm": 24.815736770629883, "learning_rate": 9.773809523809524e-06, "loss": 48.4315, "step": 881 }, { "epoch": 21.0, "grad_norm": 17.920352935791016, "learning_rate": 9.767857142857144e-06, "loss": 41.3634, "step": 882 }, { "epoch": 21.023880597014927, "grad_norm": 26.69571876525879, "learning_rate": 9.761904761904762e-06, "loss": 47.8968, "step": 883 }, { "epoch": 21.04776119402985, "grad_norm": 23.156524658203125, "learning_rate": 9.755952380952382e-06, "loss": 48.5914, "step": 884 }, { "epoch": 21.071641791044776, "grad_norm": 21.612483978271484, "learning_rate": 9.75e-06, "loss": 47.8711, "step": 885 }, { "epoch": 21.095522388059702, "grad_norm": 24.346399307250977, "learning_rate": 9.74404761904762e-06, "loss": 48.8689, "step": 886 }, { "epoch": 21.119402985074625, "grad_norm": 21.973896026611328, "learning_rate": 9.73809523809524e-06, "loss": 46.7465, "step": 887 }, { "epoch": 21.143283582089552, "grad_norm": 20.034557342529297, "learning_rate": 9.732142857142858e-06, "loss": 47.1505, "step": 888 }, { "epoch": 21.16716417910448, "grad_norm": 20.113008499145508, "learning_rate": 9.726190476190477e-06, "loss": 47.9234, "step": 889 }, { "epoch": 21.1910447761194, "grad_norm": 24.743249893188477, "learning_rate": 9.720238095238095e-06, "loss": 47.4432, "step": 890 }, { "epoch": 21.214925373134328, "grad_norm": 25.538530349731445, "learning_rate": 9.714285714285715e-06, "loss": 47.0015, "step": 891 }, { "epoch": 21.238805970149254, "grad_norm": 27.43077278137207, "learning_rate": 9.708333333333333e-06, "loss": 48.6757, "step": 892 }, { "epoch": 21.262686567164177, "grad_norm": 25.34470558166504, "learning_rate": 9.702380952380953e-06, "loss": 46.8118, "step": 893 }, { "epoch": 21.286567164179104, "grad_norm": 29.590490341186523, "learning_rate": 9.696428571428573e-06, "loss": 47.5079, "step": 894 }, { "epoch": 21.31044776119403, "grad_norm": 16.418222427368164, "learning_rate": 9.690476190476191e-06, "loss": 48.7679, "step": 895 }, { "epoch": 21.334328358208957, "grad_norm": 30.906719207763672, "learning_rate": 9.68452380952381e-06, "loss": 48.4926, "step": 896 }, { "epoch": 21.35820895522388, "grad_norm": 30.252347946166992, "learning_rate": 9.678571428571429e-06, "loss": 48.9318, "step": 897 }, { "epoch": 21.382089552238806, "grad_norm": 30.137592315673828, "learning_rate": 9.672619047619049e-06, "loss": 47.0388, "step": 898 }, { "epoch": 21.405970149253733, "grad_norm": 25.297151565551758, "learning_rate": 9.666666666666667e-06, "loss": 47.334, "step": 899 }, { "epoch": 21.429850746268656, "grad_norm": 31.72736358642578, "learning_rate": 9.660714285714287e-06, "loss": 47.8769, "step": 900 }, { "epoch": 21.453731343283582, "grad_norm": 24.4852294921875, "learning_rate": 9.654761904761906e-06, "loss": 47.4009, "step": 901 }, { "epoch": 21.47761194029851, "grad_norm": 31.223567962646484, "learning_rate": 9.648809523809524e-06, "loss": 48.4972, "step": 902 }, { "epoch": 21.501492537313432, "grad_norm": 24.1851806640625, "learning_rate": 9.642857142857144e-06, "loss": 46.1818, "step": 903 }, { "epoch": 21.52537313432836, "grad_norm": NaN, "learning_rate": 9.636904761904762e-06, "loss": 70.1176, "step": 904 }, { "epoch": 21.549253731343285, "grad_norm": 29.140161514282227, "learning_rate": 9.636904761904762e-06, "loss": 47.1614, "step": 905 }, { "epoch": 21.573134328358208, "grad_norm": 31.186546325683594, "learning_rate": 9.630952380952382e-06, "loss": 47.3643, "step": 906 }, { "epoch": 21.597014925373134, "grad_norm": 24.395353317260742, "learning_rate": 9.625e-06, "loss": 48.3591, "step": 907 }, { "epoch": 21.62089552238806, "grad_norm": 29.287492752075195, "learning_rate": 9.61904761904762e-06, "loss": 47.261, "step": 908 }, { "epoch": 21.644776119402984, "grad_norm": 26.76996612548828, "learning_rate": 9.61309523809524e-06, "loss": 48.7017, "step": 909 }, { "epoch": 21.66865671641791, "grad_norm": 29.820920944213867, "learning_rate": 9.607142857142858e-06, "loss": 48.5165, "step": 910 }, { "epoch": 21.692537313432837, "grad_norm": 30.011823654174805, "learning_rate": 9.601190476190478e-06, "loss": 46.5558, "step": 911 }, { "epoch": 21.71641791044776, "grad_norm": 32.796905517578125, "learning_rate": 9.595238095238096e-06, "loss": 47.276, "step": 912 }, { "epoch": 21.740298507462686, "grad_norm": 28.798233032226562, "learning_rate": 9.589285714285716e-06, "loss": 47.6033, "step": 913 }, { "epoch": 21.764179104477613, "grad_norm": 31.51072120666504, "learning_rate": 9.583333333333335e-06, "loss": 48.1236, "step": 914 }, { "epoch": 21.788059701492536, "grad_norm": 20.611305236816406, "learning_rate": 9.577380952380953e-06, "loss": 48.2839, "step": 915 }, { "epoch": 21.811940298507462, "grad_norm": 26.748571395874023, "learning_rate": 9.571428571428573e-06, "loss": 48.2225, "step": 916 }, { "epoch": 21.83582089552239, "grad_norm": 22.262859344482422, "learning_rate": 9.565476190476191e-06, "loss": 46.661, "step": 917 }, { "epoch": 21.85970149253731, "grad_norm": 34.15045166015625, "learning_rate": 9.559523809523811e-06, "loss": 47.3229, "step": 918 }, { "epoch": 21.883582089552238, "grad_norm": 24.26387596130371, "learning_rate": 9.55357142857143e-06, "loss": 47.4686, "step": 919 }, { "epoch": 21.907462686567165, "grad_norm": 29.463472366333008, "learning_rate": 9.547619047619049e-06, "loss": 47.6019, "step": 920 }, { "epoch": 21.93134328358209, "grad_norm": 31.184497833251953, "learning_rate": 9.541666666666669e-06, "loss": 47.3228, "step": 921 }, { "epoch": 21.955223880597014, "grad_norm": 26.506031036376953, "learning_rate": 9.535714285714287e-06, "loss": 47.9961, "step": 922 }, { "epoch": 21.97910447761194, "grad_norm": 30.547340393066406, "learning_rate": 9.529761904761905e-06, "loss": 47.8973, "step": 923 }, { "epoch": 22.0, "grad_norm": 22.91999053955078, "learning_rate": 9.523809523809525e-06, "loss": 41.3426, "step": 924 }, { "epoch": 22.023880597014927, "grad_norm": 28.242450714111328, "learning_rate": 9.517857142857143e-06, "loss": 47.2478, "step": 925 }, { "epoch": 22.04776119402985, "grad_norm": 33.07649612426758, "learning_rate": 9.511904761904763e-06, "loss": 47.6489, "step": 926 }, { "epoch": 22.071641791044776, "grad_norm": 28.14696502685547, "learning_rate": 9.50595238095238e-06, "loss": 46.521, "step": 927 }, { "epoch": 22.095522388059702, "grad_norm": 34.472206115722656, "learning_rate": 9.5e-06, "loss": 47.6476, "step": 928 }, { "epoch": 22.119402985074625, "grad_norm": 25.370718002319336, "learning_rate": 9.494047619047619e-06, "loss": 47.7215, "step": 929 }, { "epoch": 22.143283582089552, "grad_norm": 31.77129554748535, "learning_rate": 9.488095238095238e-06, "loss": 46.5566, "step": 930 }, { "epoch": 22.16716417910448, "grad_norm": 25.42667579650879, "learning_rate": 9.482142857142858e-06, "loss": 47.9832, "step": 931 }, { "epoch": 22.1910447761194, "grad_norm": 26.3134765625, "learning_rate": 9.476190476190476e-06, "loss": 47.9402, "step": 932 }, { "epoch": 22.214925373134328, "grad_norm": 31.683523178100586, "learning_rate": 9.470238095238096e-06, "loss": 47.4404, "step": 933 }, { "epoch": 22.238805970149254, "grad_norm": 31.90761375427246, "learning_rate": 9.464285714285714e-06, "loss": 47.7601, "step": 934 }, { "epoch": 22.262686567164177, "grad_norm": 24.635921478271484, "learning_rate": 9.458333333333334e-06, "loss": 46.2573, "step": 935 }, { "epoch": 22.286567164179104, "grad_norm": 25.32915496826172, "learning_rate": 9.452380952380952e-06, "loss": 48.4756, "step": 936 }, { "epoch": 22.31044776119403, "grad_norm": 28.117773056030273, "learning_rate": 9.446428571428572e-06, "loss": 48.6971, "step": 937 }, { "epoch": 22.334328358208957, "grad_norm": 22.504152297973633, "learning_rate": 9.440476190476192e-06, "loss": 47.4534, "step": 938 }, { "epoch": 22.35820895522388, "grad_norm": 31.765676498413086, "learning_rate": 9.43452380952381e-06, "loss": 48.0168, "step": 939 }, { "epoch": 22.382089552238806, "grad_norm": 27.647945404052734, "learning_rate": 9.42857142857143e-06, "loss": 48.0918, "step": 940 }, { "epoch": 22.405970149253733, "grad_norm": 33.35643005371094, "learning_rate": 9.422619047619048e-06, "loss": 48.295, "step": 941 }, { "epoch": 22.429850746268656, "grad_norm": 26.12603187561035, "learning_rate": 9.416666666666667e-06, "loss": 48.8921, "step": 942 }, { "epoch": 22.453731343283582, "grad_norm": 23.728809356689453, "learning_rate": 9.410714285714286e-06, "loss": 47.3206, "step": 943 }, { "epoch": 22.47761194029851, "grad_norm": 28.772401809692383, "learning_rate": 9.404761904761905e-06, "loss": 47.6536, "step": 944 }, { "epoch": 22.501492537313432, "grad_norm": 28.205202102661133, "learning_rate": 9.398809523809525e-06, "loss": 47.1952, "step": 945 }, { "epoch": 22.52537313432836, "grad_norm": 33.80730438232422, "learning_rate": 9.392857142857143e-06, "loss": 47.1336, "step": 946 }, { "epoch": 22.549253731343285, "grad_norm": 25.538846969604492, "learning_rate": 9.386904761904763e-06, "loss": 46.4229, "step": 947 }, { "epoch": 22.573134328358208, "grad_norm": 41.13503646850586, "learning_rate": 9.380952380952381e-06, "loss": 46.8325, "step": 948 }, { "epoch": 22.597014925373134, "grad_norm": 36.823001861572266, "learning_rate": 9.375000000000001e-06, "loss": 47.205, "step": 949 }, { "epoch": 22.62089552238806, "grad_norm": 29.992229461669922, "learning_rate": 9.36904761904762e-06, "loss": 46.683, "step": 950 }, { "epoch": 22.644776119402984, "grad_norm": 40.20172882080078, "learning_rate": 9.363095238095239e-06, "loss": 48.4859, "step": 951 }, { "epoch": 22.66865671641791, "grad_norm": 27.357097625732422, "learning_rate": 9.357142857142859e-06, "loss": 47.2987, "step": 952 }, { "epoch": 22.692537313432837, "grad_norm": 40.66689682006836, "learning_rate": 9.351190476190477e-06, "loss": 46.3579, "step": 953 }, { "epoch": 22.71641791044776, "grad_norm": 35.37788391113281, "learning_rate": 9.345238095238096e-06, "loss": 47.3369, "step": 954 }, { "epoch": 22.740298507462686, "grad_norm": 36.279151916503906, "learning_rate": 9.339285714285715e-06, "loss": 47.1137, "step": 955 }, { "epoch": 22.764179104477613, "grad_norm": 27.949628829956055, "learning_rate": 9.333333333333334e-06, "loss": 47.1438, "step": 956 }, { "epoch": 22.788059701492536, "grad_norm": 45.424556732177734, "learning_rate": 9.327380952380954e-06, "loss": 48.3171, "step": 957 }, { "epoch": 22.811940298507462, "grad_norm": 27.726537704467773, "learning_rate": 9.321428571428572e-06, "loss": 47.1718, "step": 958 }, { "epoch": 22.83582089552239, "grad_norm": 58.36731719970703, "learning_rate": 9.315476190476192e-06, "loss": 47.5895, "step": 959 }, { "epoch": 22.85970149253731, "grad_norm": 58.96028137207031, "learning_rate": 9.30952380952381e-06, "loss": 47.4109, "step": 960 }, { "epoch": 22.883582089552238, "grad_norm": 24.928117752075195, "learning_rate": 9.30357142857143e-06, "loss": 48.1841, "step": 961 }, { "epoch": 22.907462686567165, "grad_norm": 38.36846160888672, "learning_rate": 9.297619047619048e-06, "loss": 47.7438, "step": 962 }, { "epoch": 22.93134328358209, "grad_norm": 37.60481643676758, "learning_rate": 9.291666666666668e-06, "loss": 46.5067, "step": 963 }, { "epoch": 22.955223880597014, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 78.3124, "step": 964 }, { "epoch": 22.97910447761194, "grad_norm": 28.587425231933594, "learning_rate": 9.285714285714288e-06, "loss": 47.1599, "step": 965 }, { "epoch": 23.0, "grad_norm": 41.493404388427734, "learning_rate": 9.279761904761906e-06, "loss": 41.2983, "step": 966 }, { "epoch": 23.023880597014927, "grad_norm": 41.00606918334961, "learning_rate": 9.273809523809525e-06, "loss": 46.8696, "step": 967 }, { "epoch": 23.04776119402985, "grad_norm": 31.043148040771484, "learning_rate": 9.267857142857144e-06, "loss": 46.4614, "step": 968 }, { "epoch": 23.071641791044776, "grad_norm": 36.815940856933594, "learning_rate": 9.261904761904763e-06, "loss": 47.5987, "step": 969 }, { "epoch": 23.095522388059702, "grad_norm": 35.73536682128906, "learning_rate": 9.255952380952381e-06, "loss": 47.8339, "step": 970 }, { "epoch": 23.119402985074625, "grad_norm": 26.95656967163086, "learning_rate": 9.250000000000001e-06, "loss": 48.0632, "step": 971 }, { "epoch": 23.143283582089552, "grad_norm": 40.408348083496094, "learning_rate": 9.244047619047621e-06, "loss": 47.5458, "step": 972 }, { "epoch": 23.16716417910448, "grad_norm": 41.97018051147461, "learning_rate": 9.238095238095239e-06, "loss": 48.8528, "step": 973 }, { "epoch": 23.1910447761194, "grad_norm": 23.809162139892578, "learning_rate": 9.232142857142859e-06, "loss": 47.663, "step": 974 }, { "epoch": 23.214925373134328, "grad_norm": 36.0232048034668, "learning_rate": 9.226190476190477e-06, "loss": 47.496, "step": 975 }, { "epoch": 23.238805970149254, "grad_norm": 32.06623077392578, "learning_rate": 9.220238095238097e-06, "loss": 47.4472, "step": 976 }, { "epoch": 23.262686567164177, "grad_norm": 30.663307189941406, "learning_rate": 9.214285714285715e-06, "loss": 47.1342, "step": 977 }, { "epoch": 23.286567164179104, "grad_norm": 39.121437072753906, "learning_rate": 9.208333333333333e-06, "loss": 47.9977, "step": 978 }, { "epoch": 23.31044776119403, "grad_norm": 31.75649642944336, "learning_rate": 9.202380952380953e-06, "loss": 49.2196, "step": 979 }, { "epoch": 23.334328358208957, "grad_norm": 50.10381317138672, "learning_rate": 9.196428571428571e-06, "loss": 47.6487, "step": 980 }, { "epoch": 23.35820895522388, "grad_norm": 36.412906646728516, "learning_rate": 9.19047619047619e-06, "loss": 47.0012, "step": 981 }, { "epoch": 23.382089552238806, "grad_norm": 40.47570037841797, "learning_rate": 9.18452380952381e-06, "loss": 45.4449, "step": 982 }, { "epoch": 23.405970149253733, "grad_norm": 43.92324447631836, "learning_rate": 9.178571428571429e-06, "loss": 47.8727, "step": 983 }, { "epoch": 23.429850746268656, "grad_norm": 28.896121978759766, "learning_rate": 9.172619047619048e-06, "loss": 47.8489, "step": 984 }, { "epoch": 23.453731343283582, "grad_norm": 37.02536392211914, "learning_rate": 9.166666666666666e-06, "loss": 48.4484, "step": 985 }, { "epoch": 23.47761194029851, "grad_norm": 26.289518356323242, "learning_rate": 9.160714285714286e-06, "loss": 47.0221, "step": 986 }, { "epoch": 23.501492537313432, "grad_norm": 33.60945129394531, "learning_rate": 9.154761904761906e-06, "loss": 47.7454, "step": 987 }, { "epoch": 23.52537313432836, "grad_norm": 31.284311294555664, "learning_rate": 9.148809523809524e-06, "loss": 47.0558, "step": 988 }, { "epoch": 23.549253731343285, "grad_norm": 30.488906860351562, "learning_rate": 9.142857142857144e-06, "loss": 46.4408, "step": 989 }, { "epoch": 23.573134328358208, "grad_norm": 34.29289245605469, "learning_rate": 9.136904761904762e-06, "loss": 46.2796, "step": 990 }, { "epoch": 23.597014925373134, "grad_norm": 22.803457260131836, "learning_rate": 9.130952380952382e-06, "loss": 47.2684, "step": 991 }, { "epoch": 23.62089552238806, "grad_norm": 33.18730926513672, "learning_rate": 9.125e-06, "loss": 47.5, "step": 992 }, { "epoch": 23.644776119402984, "grad_norm": 28.421592712402344, "learning_rate": 9.11904761904762e-06, "loss": 46.7508, "step": 993 }, { "epoch": 23.66865671641791, "grad_norm": 23.492319107055664, "learning_rate": 9.11309523809524e-06, "loss": 46.6042, "step": 994 }, { "epoch": 23.692537313432837, "grad_norm": 30.10877227783203, "learning_rate": 9.107142857142858e-06, "loss": 46.2632, "step": 995 }, { "epoch": 23.71641791044776, "grad_norm": 23.64444351196289, "learning_rate": 9.101190476190477e-06, "loss": 47.2817, "step": 996 }, { "epoch": 23.740298507462686, "grad_norm": 28.243606567382812, "learning_rate": 9.095238095238095e-06, "loss": 47.1196, "step": 997 }, { "epoch": 23.764179104477613, "grad_norm": 26.84208869934082, "learning_rate": 9.089285714285715e-06, "loss": 46.6631, "step": 998 }, { "epoch": 23.788059701492536, "grad_norm": 29.558794021606445, "learning_rate": 9.083333333333333e-06, "loss": 45.8711, "step": 999 }, { "epoch": 23.811940298507462, "grad_norm": 25.105928421020508, "learning_rate": 9.077380952380953e-06, "loss": 47.8253, "step": 1000 }, { "epoch": 23.83582089552239, "grad_norm": NaN, "learning_rate": 9.071428571428573e-06, "loss": 82.5048, "step": 1001 }, { "epoch": 23.85970149253731, "grad_norm": 25.548643112182617, "learning_rate": 9.071428571428573e-06, "loss": 47.5042, "step": 1002 }, { "epoch": 23.883582089552238, "grad_norm": 28.8011531829834, "learning_rate": 9.065476190476191e-06, "loss": 47.0084, "step": 1003 }, { "epoch": 23.907462686567165, "grad_norm": 31.907651901245117, "learning_rate": 9.05952380952381e-06, "loss": 48.287, "step": 1004 }, { "epoch": 23.93134328358209, "grad_norm": 32.044986724853516, "learning_rate": 9.053571428571429e-06, "loss": 47.276, "step": 1005 }, { "epoch": 23.955223880597014, "grad_norm": 31.224260330200195, "learning_rate": 9.047619047619049e-06, "loss": 47.4774, "step": 1006 }, { "epoch": 23.97910447761194, "grad_norm": 29.830835342407227, "learning_rate": 9.041666666666667e-06, "loss": 47.7031, "step": 1007 }, { "epoch": 24.0, "grad_norm": 25.12934112548828, "learning_rate": 9.035714285714287e-06, "loss": 41.8156, "step": 1008 }, { "epoch": 24.023880597014927, "grad_norm": 31.172348022460938, "learning_rate": 9.029761904761906e-06, "loss": 48.0591, "step": 1009 }, { "epoch": 24.04776119402985, "grad_norm": 26.59412956237793, "learning_rate": 9.023809523809524e-06, "loss": 47.6291, "step": 1010 }, { "epoch": 24.071641791044776, "grad_norm": 29.16905975341797, "learning_rate": 9.017857142857144e-06, "loss": 47.4587, "step": 1011 }, { "epoch": 24.095522388059702, "grad_norm": 33.05836868286133, "learning_rate": 9.011904761904762e-06, "loss": 47.8748, "step": 1012 }, { "epoch": 24.119402985074625, "grad_norm": 26.13016700744629, "learning_rate": 9.005952380952382e-06, "loss": 48.0003, "step": 1013 }, { "epoch": 24.143283582089552, "grad_norm": 29.883411407470703, "learning_rate": 9e-06, "loss": 47.0188, "step": 1014 }, { "epoch": 24.16716417910448, "grad_norm": 29.039255142211914, "learning_rate": 8.99404761904762e-06, "loss": 46.8844, "step": 1015 }, { "epoch": 24.1910447761194, "grad_norm": 26.532760620117188, "learning_rate": 8.98809523809524e-06, "loss": 47.1817, "step": 1016 }, { "epoch": 24.214925373134328, "grad_norm": 30.146087646484375, "learning_rate": 8.982142857142858e-06, "loss": 46.4863, "step": 1017 }, { "epoch": 24.238805970149254, "grad_norm": 27.027935028076172, "learning_rate": 8.976190476190478e-06, "loss": 45.7162, "step": 1018 }, { "epoch": 24.262686567164177, "grad_norm": 27.315515518188477, "learning_rate": 8.970238095238096e-06, "loss": 46.6337, "step": 1019 }, { "epoch": 24.286567164179104, "grad_norm": 25.63303565979004, "learning_rate": 8.964285714285716e-06, "loss": 46.5452, "step": 1020 }, { "epoch": 24.31044776119403, "grad_norm": 22.407268524169922, "learning_rate": 8.958333333333334e-06, "loss": 47.3262, "step": 1021 }, { "epoch": 24.334328358208957, "grad_norm": 19.4051513671875, "learning_rate": 8.952380952380953e-06, "loss": 46.8407, "step": 1022 }, { "epoch": 24.35820895522388, "grad_norm": NaN, "learning_rate": 8.946428571428573e-06, "loss": 77.1735, "step": 1023 }, { "epoch": 24.382089552238806, "grad_norm": 26.870166778564453, "learning_rate": 8.946428571428573e-06, "loss": 46.9395, "step": 1024 }, { "epoch": 24.405970149253733, "grad_norm": 21.54165267944336, "learning_rate": 8.940476190476191e-06, "loss": 47.2505, "step": 1025 }, { "epoch": 24.429850746268656, "grad_norm": 29.317501068115234, "learning_rate": 8.934523809523811e-06, "loss": 48.0554, "step": 1026 }, { "epoch": 24.453731343283582, "grad_norm": 27.45809555053711, "learning_rate": 8.92857142857143e-06, "loss": 47.0495, "step": 1027 }, { "epoch": 24.47761194029851, "grad_norm": NaN, "learning_rate": 8.922619047619049e-06, "loss": 66.51, "step": 1028 }, { "epoch": 24.501492537313432, "grad_norm": 23.169204711914062, "learning_rate": 8.922619047619049e-06, "loss": 47.5902, "step": 1029 }, { "epoch": 24.52537313432836, "grad_norm": 31.986774444580078, "learning_rate": 8.916666666666667e-06, "loss": 47.4281, "step": 1030 }, { "epoch": 24.549253731343285, "grad_norm": 27.15190315246582, "learning_rate": 8.910714285714287e-06, "loss": 46.3638, "step": 1031 }, { "epoch": 24.573134328358208, "grad_norm": 26.88028335571289, "learning_rate": 8.904761904761905e-06, "loss": 45.0491, "step": 1032 }, { "epoch": 24.597014925373134, "grad_norm": 27.693952560424805, "learning_rate": 8.898809523809525e-06, "loss": 47.6471, "step": 1033 }, { "epoch": 24.62089552238806, "grad_norm": 33.45442581176758, "learning_rate": 8.892857142857143e-06, "loss": 47.1459, "step": 1034 }, { "epoch": 24.644776119402984, "grad_norm": 29.933320999145508, "learning_rate": 8.886904761904763e-06, "loss": 46.9218, "step": 1035 }, { "epoch": 24.66865671641791, "grad_norm": 26.401569366455078, "learning_rate": 8.88095238095238e-06, "loss": 47.7027, "step": 1036 }, { "epoch": 24.692537313432837, "grad_norm": 26.92498016357422, "learning_rate": 8.875e-06, "loss": 47.2302, "step": 1037 }, { "epoch": 24.71641791044776, "grad_norm": 28.368043899536133, "learning_rate": 8.869047619047619e-06, "loss": 47.4479, "step": 1038 }, { "epoch": 24.740298507462686, "grad_norm": 27.319650650024414, "learning_rate": 8.863095238095238e-06, "loss": 47.4652, "step": 1039 }, { "epoch": 24.764179104477613, "grad_norm": 37.10929870605469, "learning_rate": 8.857142857142858e-06, "loss": 47.3543, "step": 1040 }, { "epoch": 24.788059701492536, "grad_norm": 32.430416107177734, "learning_rate": 8.851190476190476e-06, "loss": 46.1406, "step": 1041 }, { "epoch": 24.811940298507462, "grad_norm": 33.29399108886719, "learning_rate": 8.845238095238096e-06, "loss": 47.1917, "step": 1042 }, { "epoch": 24.83582089552239, "grad_norm": 32.72507095336914, "learning_rate": 8.839285714285714e-06, "loss": 47.677, "step": 1043 }, { "epoch": 24.85970149253731, "grad_norm": 25.997148513793945, "learning_rate": 8.833333333333334e-06, "loss": 47.6851, "step": 1044 }, { "epoch": 24.883582089552238, "grad_norm": 33.00047302246094, "learning_rate": 8.827380952380952e-06, "loss": 47.5326, "step": 1045 }, { "epoch": 24.907462686567165, "grad_norm": 33.95719528198242, "learning_rate": 8.821428571428572e-06, "loss": 47.2836, "step": 1046 }, { "epoch": 24.93134328358209, "grad_norm": 31.353008270263672, "learning_rate": 8.815476190476192e-06, "loss": 47.8318, "step": 1047 }, { "epoch": 24.955223880597014, "grad_norm": 27.4250545501709, "learning_rate": 8.80952380952381e-06, "loss": 47.0066, "step": 1048 }, { "epoch": 24.97910447761194, "grad_norm": 30.22010612487793, "learning_rate": 8.80357142857143e-06, "loss": 46.6991, "step": 1049 }, { "epoch": 25.0, "grad_norm": 19.693180084228516, "learning_rate": 8.797619047619048e-06, "loss": 41.6055, "step": 1050 }, { "epoch": 25.023880597014927, "grad_norm": 24.590662002563477, "learning_rate": 8.791666666666667e-06, "loss": 46.3536, "step": 1051 }, { "epoch": 25.04776119402985, "grad_norm": 28.37199592590332, "learning_rate": 8.785714285714286e-06, "loss": 47.8334, "step": 1052 }, { "epoch": 25.071641791044776, "grad_norm": 26.38755226135254, "learning_rate": 8.779761904761905e-06, "loss": 47.6486, "step": 1053 }, { "epoch": 25.095522388059702, "grad_norm": 27.338485717773438, "learning_rate": 8.773809523809525e-06, "loss": 47.3044, "step": 1054 }, { "epoch": 25.119402985074625, "grad_norm": 25.308486938476562, "learning_rate": 8.767857142857143e-06, "loss": 45.6873, "step": 1055 }, { "epoch": 25.143283582089552, "grad_norm": 30.886962890625, "learning_rate": 8.761904761904763e-06, "loss": 46.8938, "step": 1056 }, { "epoch": 25.16716417910448, "grad_norm": 25.25688934326172, "learning_rate": 8.755952380952381e-06, "loss": 47.4858, "step": 1057 }, { "epoch": 25.1910447761194, "grad_norm": 30.462963104248047, "learning_rate": 8.750000000000001e-06, "loss": 46.6334, "step": 1058 }, { "epoch": 25.214925373134328, "grad_norm": 22.87471580505371, "learning_rate": 8.744047619047619e-06, "loss": 46.0966, "step": 1059 }, { "epoch": 25.238805970149254, "grad_norm": 23.413904190063477, "learning_rate": 8.738095238095239e-06, "loss": 46.8938, "step": 1060 }, { "epoch": 25.262686567164177, "grad_norm": 26.926279067993164, "learning_rate": 8.732142857142859e-06, "loss": 46.3773, "step": 1061 }, { "epoch": 25.286567164179104, "grad_norm": 27.595348358154297, "learning_rate": 8.726190476190477e-06, "loss": 48.0235, "step": 1062 }, { "epoch": 25.31044776119403, "grad_norm": 26.124523162841797, "learning_rate": 8.720238095238096e-06, "loss": 46.6863, "step": 1063 }, { "epoch": 25.334328358208957, "grad_norm": 28.308120727539062, "learning_rate": 8.714285714285715e-06, "loss": 47.7158, "step": 1064 }, { "epoch": 25.35820895522388, "grad_norm": 23.434846878051758, "learning_rate": 8.708333333333334e-06, "loss": 47.2951, "step": 1065 }, { "epoch": 25.382089552238806, "grad_norm": 26.917911529541016, "learning_rate": 8.702380952380952e-06, "loss": 45.7266, "step": 1066 }, { "epoch": 25.405970149253733, "grad_norm": 19.7725772857666, "learning_rate": 8.696428571428572e-06, "loss": 46.5458, "step": 1067 }, { "epoch": 25.429850746268656, "grad_norm": 27.18629264831543, "learning_rate": 8.690476190476192e-06, "loss": 46.3133, "step": 1068 }, { "epoch": 25.453731343283582, "grad_norm": 26.112865447998047, "learning_rate": 8.68452380952381e-06, "loss": 46.6383, "step": 1069 }, { "epoch": 25.47761194029851, "grad_norm": 19.385990142822266, "learning_rate": 8.67857142857143e-06, "loss": 46.5541, "step": 1070 }, { "epoch": 25.501492537313432, "grad_norm": 26.713350296020508, "learning_rate": 8.672619047619048e-06, "loss": 48.045, "step": 1071 }, { "epoch": 25.52537313432836, "grad_norm": 29.80147933959961, "learning_rate": 8.666666666666668e-06, "loss": 47.4443, "step": 1072 }, { "epoch": 25.549253731343285, "grad_norm": 23.674266815185547, "learning_rate": 8.660714285714286e-06, "loss": 46.6662, "step": 1073 }, { "epoch": 25.573134328358208, "grad_norm": 46.435401916503906, "learning_rate": 8.654761904761906e-06, "loss": 46.9276, "step": 1074 }, { "epoch": 25.597014925373134, "grad_norm": 35.016502380371094, "learning_rate": 8.648809523809526e-06, "loss": 47.6811, "step": 1075 }, { "epoch": 25.62089552238806, "grad_norm": 42.57990646362305, "learning_rate": 8.642857142857144e-06, "loss": 46.5684, "step": 1076 }, { "epoch": 25.644776119402984, "grad_norm": 36.2376708984375, "learning_rate": 8.636904761904763e-06, "loss": 46.1807, "step": 1077 }, { "epoch": 25.66865671641791, "grad_norm": 41.44023895263672, "learning_rate": 8.630952380952381e-06, "loss": 46.0823, "step": 1078 }, { "epoch": 25.692537313432837, "grad_norm": 43.62863540649414, "learning_rate": 8.625000000000001e-06, "loss": 47.9958, "step": 1079 }, { "epoch": 25.71641791044776, "grad_norm": 34.232120513916016, "learning_rate": 8.61904761904762e-06, "loss": 47.9585, "step": 1080 }, { "epoch": 25.740298507462686, "grad_norm": 38.023197174072266, "learning_rate": 8.61309523809524e-06, "loss": 47.7344, "step": 1081 }, { "epoch": 25.764179104477613, "grad_norm": 37.89833068847656, "learning_rate": 8.607142857142859e-06, "loss": 47.8956, "step": 1082 }, { "epoch": 25.788059701492536, "grad_norm": 33.03269958496094, "learning_rate": 8.601190476190477e-06, "loss": 47.7408, "step": 1083 }, { "epoch": 25.811940298507462, "grad_norm": 44.31171798706055, "learning_rate": 8.595238095238097e-06, "loss": 47.6232, "step": 1084 }, { "epoch": 25.83582089552239, "grad_norm": 42.54961395263672, "learning_rate": 8.589285714285715e-06, "loss": 47.9757, "step": 1085 }, { "epoch": 25.85970149253731, "grad_norm": 27.695526123046875, "learning_rate": 8.583333333333333e-06, "loss": 47.0934, "step": 1086 }, { "epoch": 25.883582089552238, "grad_norm": 32.62801742553711, "learning_rate": 8.577380952380953e-06, "loss": 47.1186, "step": 1087 }, { "epoch": 25.907462686567165, "grad_norm": 26.777305603027344, "learning_rate": 8.571428571428571e-06, "loss": 47.2931, "step": 1088 }, { "epoch": 25.93134328358209, "grad_norm": 24.382678985595703, "learning_rate": 8.56547619047619e-06, "loss": 46.4698, "step": 1089 }, { "epoch": 25.955223880597014, "grad_norm": 34.310150146484375, "learning_rate": 8.55952380952381e-06, "loss": 46.0509, "step": 1090 }, { "epoch": 25.97910447761194, "grad_norm": 27.468976974487305, "learning_rate": 8.553571428571429e-06, "loss": 46.9123, "step": 1091 }, { "epoch": 26.0, "grad_norm": 27.90901756286621, "learning_rate": 8.547619047619048e-06, "loss": 41.8265, "step": 1092 }, { "epoch": 26.023880597014927, "grad_norm": 28.853416442871094, "learning_rate": 8.541666666666666e-06, "loss": 47.9612, "step": 1093 }, { "epoch": 26.04776119402985, "grad_norm": 31.96144676208496, "learning_rate": 8.535714285714286e-06, "loss": 46.8167, "step": 1094 }, { "epoch": 26.071641791044776, "grad_norm": 33.179141998291016, "learning_rate": 8.529761904761904e-06, "loss": 48.0464, "step": 1095 }, { "epoch": 26.095522388059702, "grad_norm": 32.18705368041992, "learning_rate": 8.523809523809524e-06, "loss": 45.6743, "step": 1096 }, { "epoch": 26.119402985074625, "grad_norm": 26.125934600830078, "learning_rate": 8.517857142857144e-06, "loss": 46.4944, "step": 1097 }, { "epoch": 26.143283582089552, "grad_norm": 31.666461944580078, "learning_rate": 8.511904761904762e-06, "loss": 47.6152, "step": 1098 }, { "epoch": 26.16716417910448, "grad_norm": 29.90437889099121, "learning_rate": 8.505952380952382e-06, "loss": 46.4497, "step": 1099 }, { "epoch": 26.1910447761194, "grad_norm": 32.59938430786133, "learning_rate": 8.5e-06, "loss": 47.1877, "step": 1100 }, { "epoch": 26.214925373134328, "grad_norm": 28.368562698364258, "learning_rate": 8.49404761904762e-06, "loss": 46.4898, "step": 1101 }, { "epoch": 26.238805970149254, "grad_norm": 31.274070739746094, "learning_rate": 8.488095238095238e-06, "loss": 47.1507, "step": 1102 }, { "epoch": 26.262686567164177, "grad_norm": 24.63444709777832, "learning_rate": 8.482142857142858e-06, "loss": 47.3659, "step": 1103 }, { "epoch": 26.286567164179104, "grad_norm": 35.413970947265625, "learning_rate": 8.476190476190477e-06, "loss": 46.6459, "step": 1104 }, { "epoch": 26.31044776119403, "grad_norm": 27.774656295776367, "learning_rate": 8.470238095238095e-06, "loss": 47.4369, "step": 1105 }, { "epoch": 26.334328358208957, "grad_norm": 32.258155822753906, "learning_rate": 8.464285714285715e-06, "loss": 47.487, "step": 1106 }, { "epoch": 26.35820895522388, "grad_norm": 18.22418785095215, "learning_rate": 8.458333333333333e-06, "loss": 46.948, "step": 1107 }, { "epoch": 26.382089552238806, "grad_norm": 24.50945472717285, "learning_rate": 8.452380952380953e-06, "loss": 45.7291, "step": 1108 }, { "epoch": 26.405970149253733, "grad_norm": 23.20486831665039, "learning_rate": 8.446428571428571e-06, "loss": 46.8704, "step": 1109 }, { "epoch": 26.429850746268656, "grad_norm": 20.810514450073242, "learning_rate": 8.440476190476191e-06, "loss": 47.3419, "step": 1110 }, { "epoch": 26.453731343283582, "grad_norm": 27.68440818786621, "learning_rate": 8.434523809523811e-06, "loss": 47.0641, "step": 1111 }, { "epoch": 26.47761194029851, "grad_norm": 26.989046096801758, "learning_rate": 8.428571428571429e-06, "loss": 46.9324, "step": 1112 }, { "epoch": 26.501492537313432, "grad_norm": 25.986888885498047, "learning_rate": 8.422619047619049e-06, "loss": 48.6179, "step": 1113 }, { "epoch": 26.52537313432836, "grad_norm": 28.111356735229492, "learning_rate": 8.416666666666667e-06, "loss": 46.4251, "step": 1114 }, { "epoch": 26.549253731343285, "grad_norm": 24.306228637695312, "learning_rate": 8.410714285714287e-06, "loss": 46.4379, "step": 1115 }, { "epoch": 26.573134328358208, "grad_norm": 23.894895553588867, "learning_rate": 8.404761904761905e-06, "loss": 46.665, "step": 1116 }, { "epoch": 26.597014925373134, "grad_norm": 25.917400360107422, "learning_rate": 8.398809523809525e-06, "loss": 46.6619, "step": 1117 }, { "epoch": 26.62089552238806, "grad_norm": 21.423585891723633, "learning_rate": 8.392857142857144e-06, "loss": 46.3447, "step": 1118 }, { "epoch": 26.644776119402984, "grad_norm": 29.13437271118164, "learning_rate": 8.386904761904762e-06, "loss": 46.4292, "step": 1119 }, { "epoch": 26.66865671641791, "grad_norm": 25.711469650268555, "learning_rate": 8.380952380952382e-06, "loss": 46.6156, "step": 1120 }, { "epoch": 26.692537313432837, "grad_norm": 26.55695915222168, "learning_rate": 8.375e-06, "loss": 46.7429, "step": 1121 }, { "epoch": 26.71641791044776, "grad_norm": 27.66262435913086, "learning_rate": 8.36904761904762e-06, "loss": 47.1275, "step": 1122 }, { "epoch": 26.740298507462686, "grad_norm": 33.85395050048828, "learning_rate": 8.36309523809524e-06, "loss": 46.7244, "step": 1123 }, { "epoch": 26.764179104477613, "grad_norm": 29.51833152770996, "learning_rate": 8.357142857142858e-06, "loss": 47.2072, "step": 1124 }, { "epoch": 26.788059701492536, "grad_norm": 26.21416664123535, "learning_rate": 8.351190476190478e-06, "loss": 47.8298, "step": 1125 }, { "epoch": 26.811940298507462, "grad_norm": 31.24039649963379, "learning_rate": 8.345238095238096e-06, "loss": 46.8069, "step": 1126 }, { "epoch": 26.83582089552239, "grad_norm": 32.19520568847656, "learning_rate": 8.339285714285716e-06, "loss": 47.1494, "step": 1127 }, { "epoch": 26.85970149253731, "grad_norm": 29.194063186645508, "learning_rate": 8.333333333333334e-06, "loss": 46.1827, "step": 1128 }, { "epoch": 26.883582089552238, "grad_norm": 28.723541259765625, "learning_rate": 8.327380952380954e-06, "loss": 46.8513, "step": 1129 }, { "epoch": 26.907462686567165, "grad_norm": 26.91135597229004, "learning_rate": 8.321428571428573e-06, "loss": 46.561, "step": 1130 }, { "epoch": 26.93134328358209, "grad_norm": 25.60898208618164, "learning_rate": 8.315476190476191e-06, "loss": 46.4706, "step": 1131 }, { "epoch": 26.955223880597014, "grad_norm": 23.72539520263672, "learning_rate": 8.309523809523811e-06, "loss": 46.7871, "step": 1132 }, { "epoch": 26.97910447761194, "grad_norm": 25.030731201171875, "learning_rate": 8.30357142857143e-06, "loss": 46.2433, "step": 1133 }, { "epoch": 27.0, "grad_norm": 25.439281463623047, "learning_rate": 8.297619047619049e-06, "loss": 40.9101, "step": 1134 }, { "epoch": 27.023880597014927, "grad_norm": 29.52981948852539, "learning_rate": 8.291666666666667e-06, "loss": 46.9385, "step": 1135 }, { "epoch": 27.04776119402985, "grad_norm": 22.007299423217773, "learning_rate": 8.285714285714287e-06, "loss": 47.2751, "step": 1136 }, { "epoch": 27.071641791044776, "grad_norm": 31.965675354003906, "learning_rate": 8.279761904761905e-06, "loss": 47.7763, "step": 1137 }, { "epoch": 27.095522388059702, "grad_norm": 23.38637351989746, "learning_rate": 8.273809523809523e-06, "loss": 46.0412, "step": 1138 }, { "epoch": 27.119402985074625, "grad_norm": 24.295711517333984, "learning_rate": 8.267857142857143e-06, "loss": 46.8552, "step": 1139 }, { "epoch": 27.143283582089552, "grad_norm": 20.915624618530273, "learning_rate": 8.261904761904763e-06, "loss": 46.3041, "step": 1140 }, { "epoch": 27.16716417910448, "grad_norm": 28.25569725036621, "learning_rate": 8.25595238095238e-06, "loss": 45.8736, "step": 1141 }, { "epoch": 27.1910447761194, "grad_norm": 24.8399658203125, "learning_rate": 8.25e-06, "loss": 46.0629, "step": 1142 }, { "epoch": 27.214925373134328, "grad_norm": 21.237272262573242, "learning_rate": 8.244047619047619e-06, "loss": 47.1252, "step": 1143 }, { "epoch": 27.238805970149254, "grad_norm": 24.35887336730957, "learning_rate": 8.238095238095239e-06, "loss": 46.9687, "step": 1144 }, { "epoch": 27.262686567164177, "grad_norm": 26.583545684814453, "learning_rate": 8.232142857142857e-06, "loss": 47.1486, "step": 1145 }, { "epoch": 27.286567164179104, "grad_norm": 23.712989807128906, "learning_rate": 8.226190476190476e-06, "loss": 46.4769, "step": 1146 }, { "epoch": 27.31044776119403, "grad_norm": 18.466094970703125, "learning_rate": 8.220238095238096e-06, "loss": 46.6911, "step": 1147 }, { "epoch": 27.334328358208957, "grad_norm": 31.812236785888672, "learning_rate": 8.214285714285714e-06, "loss": 47.6261, "step": 1148 }, { "epoch": 27.35820895522388, "grad_norm": 23.437780380249023, "learning_rate": 8.208333333333334e-06, "loss": 46.1776, "step": 1149 }, { "epoch": 27.382089552238806, "grad_norm": 27.252187728881836, "learning_rate": 8.202380952380952e-06, "loss": 44.8499, "step": 1150 }, { "epoch": 27.405970149253733, "grad_norm": 24.425500869750977, "learning_rate": 8.196428571428572e-06, "loss": 46.5397, "step": 1151 }, { "epoch": 27.429850746268656, "grad_norm": 28.237712860107422, "learning_rate": 8.190476190476192e-06, "loss": 46.8922, "step": 1152 }, { "epoch": 27.453731343283582, "grad_norm": 23.262300491333008, "learning_rate": 8.18452380952381e-06, "loss": 47.4204, "step": 1153 }, { "epoch": 27.47761194029851, "grad_norm": 20.69318389892578, "learning_rate": 8.17857142857143e-06, "loss": 47.1507, "step": 1154 }, { "epoch": 27.501492537313432, "grad_norm": 30.182701110839844, "learning_rate": 8.172619047619048e-06, "loss": 46.198, "step": 1155 }, { "epoch": 27.52537313432836, "grad_norm": 28.804855346679688, "learning_rate": 8.166666666666668e-06, "loss": 46.1366, "step": 1156 }, { "epoch": 27.549253731343285, "grad_norm": 26.992097854614258, "learning_rate": 8.160714285714286e-06, "loss": 47.3639, "step": 1157 }, { "epoch": 27.573134328358208, "grad_norm": 27.22978401184082, "learning_rate": 8.154761904761905e-06, "loss": 46.7295, "step": 1158 }, { "epoch": 27.597014925373134, "grad_norm": 24.036380767822266, "learning_rate": 8.148809523809525e-06, "loss": 46.7824, "step": 1159 }, { "epoch": 27.62089552238806, "grad_norm": 28.67648696899414, "learning_rate": 8.142857142857143e-06, "loss": 46.9712, "step": 1160 }, { "epoch": 27.644776119402984, "grad_norm": 27.389991760253906, "learning_rate": 8.136904761904763e-06, "loss": 47.6263, "step": 1161 }, { "epoch": 27.66865671641791, "grad_norm": 28.887022018432617, "learning_rate": 8.130952380952381e-06, "loss": 47.7553, "step": 1162 }, { "epoch": 27.692537313432837, "grad_norm": 17.354753494262695, "learning_rate": 8.125000000000001e-06, "loss": 46.1359, "step": 1163 }, { "epoch": 27.71641791044776, "grad_norm": 26.72220230102539, "learning_rate": 8.119047619047619e-06, "loss": 46.3653, "step": 1164 }, { "epoch": 27.740298507462686, "grad_norm": 22.09579849243164, "learning_rate": 8.113095238095239e-06, "loss": 46.856, "step": 1165 }, { "epoch": 27.764179104477613, "grad_norm": 30.197189331054688, "learning_rate": 8.107142857142859e-06, "loss": 46.4435, "step": 1166 }, { "epoch": 27.788059701492536, "grad_norm": 28.597610473632812, "learning_rate": 8.101190476190477e-06, "loss": 48.3097, "step": 1167 }, { "epoch": 27.811940298507462, "grad_norm": 22.391801834106445, "learning_rate": 8.095238095238097e-06, "loss": 47.2598, "step": 1168 }, { "epoch": 27.83582089552239, "grad_norm": 28.523584365844727, "learning_rate": 8.089285714285715e-06, "loss": 46.3123, "step": 1169 }, { "epoch": 27.85970149253731, "grad_norm": 21.646997451782227, "learning_rate": 8.083333333333334e-06, "loss": 46.2853, "step": 1170 }, { "epoch": 27.883582089552238, "grad_norm": 22.68369483947754, "learning_rate": 8.077380952380953e-06, "loss": 46.1355, "step": 1171 }, { "epoch": 27.907462686567165, "grad_norm": 25.581987380981445, "learning_rate": 8.071428571428572e-06, "loss": 45.5431, "step": 1172 }, { "epoch": 27.93134328358209, "grad_norm": 26.512523651123047, "learning_rate": 8.065476190476192e-06, "loss": 47.1898, "step": 1173 }, { "epoch": 27.955223880597014, "grad_norm": 26.89809226989746, "learning_rate": 8.05952380952381e-06, "loss": 46.8108, "step": 1174 }, { "epoch": 27.97910447761194, "grad_norm": 23.638704299926758, "learning_rate": 8.05357142857143e-06, "loss": 47.1454, "step": 1175 }, { "epoch": 28.0, "grad_norm": 28.61042022705078, "learning_rate": 8.047619047619048e-06, "loss": 41.7161, "step": 1176 }, { "epoch": 28.023880597014927, "grad_norm": 31.2153377532959, "learning_rate": 8.041666666666668e-06, "loss": 46.8925, "step": 1177 }, { "epoch": 28.04776119402985, "grad_norm": 26.284482955932617, "learning_rate": 8.035714285714286e-06, "loss": 45.7065, "step": 1178 }, { "epoch": 28.071641791044776, "grad_norm": 30.96581268310547, "learning_rate": 8.029761904761906e-06, "loss": 46.5612, "step": 1179 }, { "epoch": 28.095522388059702, "grad_norm": 24.686336517333984, "learning_rate": 8.023809523809526e-06, "loss": 46.4376, "step": 1180 }, { "epoch": 28.119402985074625, "grad_norm": 27.783416748046875, "learning_rate": 8.017857142857144e-06, "loss": 47.247, "step": 1181 }, { "epoch": 28.143283582089552, "grad_norm": 33.3108024597168, "learning_rate": 8.011904761904763e-06, "loss": 47.3171, "step": 1182 }, { "epoch": 28.16716417910448, "grad_norm": 30.010536193847656, "learning_rate": 8.005952380952382e-06, "loss": 45.9761, "step": 1183 }, { "epoch": 28.1910447761194, "grad_norm": 29.399965286254883, "learning_rate": 8.000000000000001e-06, "loss": 47.3345, "step": 1184 }, { "epoch": 28.214925373134328, "grad_norm": 25.835142135620117, "learning_rate": 7.99404761904762e-06, "loss": 46.3395, "step": 1185 }, { "epoch": 28.238805970149254, "grad_norm": 26.06570053100586, "learning_rate": 7.98809523809524e-06, "loss": 45.842, "step": 1186 }, { "epoch": 28.262686567164177, "grad_norm": 28.64603042602539, "learning_rate": 7.982142857142859e-06, "loss": 46.4802, "step": 1187 }, { "epoch": 28.286567164179104, "grad_norm": 27.157583236694336, "learning_rate": 7.976190476190477e-06, "loss": 45.7376, "step": 1188 }, { "epoch": 28.31044776119403, "grad_norm": 22.073328018188477, "learning_rate": 7.970238095238097e-06, "loss": 47.0787, "step": 1189 }, { "epoch": 28.334328358208957, "grad_norm": 21.545568466186523, "learning_rate": 7.964285714285715e-06, "loss": 45.6691, "step": 1190 }, { "epoch": 28.35820895522388, "grad_norm": 26.17327880859375, "learning_rate": 7.958333333333333e-06, "loss": 46.2058, "step": 1191 }, { "epoch": 28.382089552238806, "grad_norm": 24.443920135498047, "learning_rate": 7.952380952380953e-06, "loss": 45.3531, "step": 1192 }, { "epoch": 28.405970149253733, "grad_norm": 27.207778930664062, "learning_rate": 7.946428571428571e-06, "loss": 46.5519, "step": 1193 }, { "epoch": 28.429850746268656, "grad_norm": 23.15156364440918, "learning_rate": 7.94047619047619e-06, "loss": 46.9284, "step": 1194 }, { "epoch": 28.453731343283582, "grad_norm": 27.96567153930664, "learning_rate": 7.93452380952381e-06, "loss": 46.3696, "step": 1195 }, { "epoch": 28.47761194029851, "grad_norm": 25.828689575195312, "learning_rate": 7.928571428571429e-06, "loss": 46.2933, "step": 1196 }, { "epoch": 28.501492537313432, "grad_norm": 30.69676971435547, "learning_rate": 7.922619047619048e-06, "loss": 46.7471, "step": 1197 }, { "epoch": 28.52537313432836, "grad_norm": 24.977018356323242, "learning_rate": 7.916666666666667e-06, "loss": 47.045, "step": 1198 }, { "epoch": 28.549253731343285, "grad_norm": 26.286821365356445, "learning_rate": 7.910714285714286e-06, "loss": 47.0185, "step": 1199 }, { "epoch": 28.573134328358208, "grad_norm": 25.324783325195312, "learning_rate": 7.904761904761904e-06, "loss": 46.4036, "step": 1200 }, { "epoch": 28.597014925373134, "grad_norm": 34.25847625732422, "learning_rate": 7.898809523809524e-06, "loss": 46.8307, "step": 1201 }, { "epoch": 28.62089552238806, "grad_norm": 24.739521026611328, "learning_rate": 7.892857142857144e-06, "loss": 46.515, "step": 1202 }, { "epoch": 28.644776119402984, "grad_norm": 36.694252014160156, "learning_rate": 7.886904761904762e-06, "loss": 45.7078, "step": 1203 }, { "epoch": 28.66865671641791, "grad_norm": 34.95314025878906, "learning_rate": 7.880952380952382e-06, "loss": 47.6302, "step": 1204 }, { "epoch": 28.692537313432837, "grad_norm": 26.474821090698242, "learning_rate": 7.875e-06, "loss": 47.2158, "step": 1205 }, { "epoch": 28.71641791044776, "grad_norm": 30.19892692565918, "learning_rate": 7.86904761904762e-06, "loss": 46.1515, "step": 1206 }, { "epoch": 28.740298507462686, "grad_norm": 32.16860580444336, "learning_rate": 7.863095238095238e-06, "loss": 46.3963, "step": 1207 }, { "epoch": 28.764179104477613, "grad_norm": 27.323444366455078, "learning_rate": 7.857142857142858e-06, "loss": 47.6511, "step": 1208 }, { "epoch": 28.788059701492536, "grad_norm": 24.073701858520508, "learning_rate": 7.851190476190477e-06, "loss": 46.7283, "step": 1209 }, { "epoch": 28.811940298507462, "grad_norm": 25.73206329345703, "learning_rate": 7.845238095238096e-06, "loss": 47.3957, "step": 1210 }, { "epoch": 28.83582089552239, "grad_norm": 23.368709564208984, "learning_rate": 7.839285714285715e-06, "loss": 46.1844, "step": 1211 }, { "epoch": 28.85970149253731, "grad_norm": 24.563371658325195, "learning_rate": 7.833333333333333e-06, "loss": 47.0752, "step": 1212 }, { "epoch": 28.883582089552238, "grad_norm": 20.747081756591797, "learning_rate": 7.827380952380953e-06, "loss": 45.9425, "step": 1213 }, { "epoch": 28.907462686567165, "grad_norm": 22.27573013305664, "learning_rate": 7.821428571428571e-06, "loss": 46.746, "step": 1214 }, { "epoch": 28.93134328358209, "grad_norm": 23.162179946899414, "learning_rate": 7.815476190476191e-06, "loss": 46.808, "step": 1215 }, { "epoch": 28.955223880597014, "grad_norm": 23.585325241088867, "learning_rate": 7.809523809523811e-06, "loss": 47.2584, "step": 1216 }, { "epoch": 28.97910447761194, "grad_norm": 29.979564666748047, "learning_rate": 7.803571428571429e-06, "loss": 47.3433, "step": 1217 }, { "epoch": 29.0, "grad_norm": 25.872072219848633, "learning_rate": 7.797619047619049e-06, "loss": 41.1844, "step": 1218 }, { "epoch": 29.023880597014927, "grad_norm": 25.673351287841797, "learning_rate": 7.791666666666667e-06, "loss": 46.4546, "step": 1219 }, { "epoch": 29.04776119402985, "grad_norm": 29.831058502197266, "learning_rate": 7.785714285714287e-06, "loss": 46.8122, "step": 1220 }, { "epoch": 29.071641791044776, "grad_norm": 27.548316955566406, "learning_rate": 7.779761904761905e-06, "loss": 47.748, "step": 1221 }, { "epoch": 29.095522388059702, "grad_norm": 26.399370193481445, "learning_rate": 7.773809523809525e-06, "loss": 46.736, "step": 1222 }, { "epoch": 29.119402985074625, "grad_norm": 25.127031326293945, "learning_rate": 7.767857142857144e-06, "loss": 46.8307, "step": 1223 }, { "epoch": 29.143283582089552, "grad_norm": 26.624732971191406, "learning_rate": 7.761904761904762e-06, "loss": 46.2401, "step": 1224 }, { "epoch": 29.16716417910448, "grad_norm": 30.770824432373047, "learning_rate": 7.755952380952382e-06, "loss": 46.7194, "step": 1225 }, { "epoch": 29.1910447761194, "grad_norm": 23.830007553100586, "learning_rate": 7.75e-06, "loss": 46.4737, "step": 1226 }, { "epoch": 29.214925373134328, "grad_norm": 32.90129470825195, "learning_rate": 7.74404761904762e-06, "loss": 47.4361, "step": 1227 }, { "epoch": 29.238805970149254, "grad_norm": 23.381397247314453, "learning_rate": 7.738095238095238e-06, "loss": 45.3297, "step": 1228 }, { "epoch": 29.262686567164177, "grad_norm": 32.836387634277344, "learning_rate": 7.732142857142858e-06, "loss": 46.0574, "step": 1229 }, { "epoch": 29.286567164179104, "grad_norm": 25.803264617919922, "learning_rate": 7.726190476190478e-06, "loss": 45.757, "step": 1230 }, { "epoch": 29.31044776119403, "grad_norm": 29.38982391357422, "learning_rate": 7.720238095238096e-06, "loss": 46.7099, "step": 1231 }, { "epoch": 29.334328358208957, "grad_norm": 26.39947509765625, "learning_rate": 7.714285714285716e-06, "loss": 47.5944, "step": 1232 }, { "epoch": 29.35820895522388, "grad_norm": 25.958354949951172, "learning_rate": 7.708333333333334e-06, "loss": 46.1395, "step": 1233 }, { "epoch": 29.382089552238806, "grad_norm": 28.697542190551758, "learning_rate": 7.702380952380954e-06, "loss": 46.1713, "step": 1234 }, { "epoch": 29.405970149253733, "grad_norm": 19.471586227416992, "learning_rate": 7.696428571428572e-06, "loss": 47.7724, "step": 1235 }, { "epoch": 29.429850746268656, "grad_norm": 29.924991607666016, "learning_rate": 7.690476190476191e-06, "loss": 47.2203, "step": 1236 }, { "epoch": 29.453731343283582, "grad_norm": 20.516891479492188, "learning_rate": 7.684523809523811e-06, "loss": 46.2945, "step": 1237 }, { "epoch": 29.47761194029851, "grad_norm": 30.605262756347656, "learning_rate": 7.67857142857143e-06, "loss": 47.1786, "step": 1238 }, { "epoch": 29.501492537313432, "grad_norm": 16.288013458251953, "learning_rate": 7.672619047619049e-06, "loss": 45.3413, "step": 1239 }, { "epoch": 29.52537313432836, "grad_norm": 23.54091453552246, "learning_rate": 7.666666666666667e-06, "loss": 45.9196, "step": 1240 }, { "epoch": 29.549253731343285, "grad_norm": 20.33724021911621, "learning_rate": 7.660714285714287e-06, "loss": 47.0275, "step": 1241 }, { "epoch": 29.573134328358208, "grad_norm": 27.460975646972656, "learning_rate": 7.654761904761905e-06, "loss": 44.8995, "step": 1242 }, { "epoch": 29.597014925373134, "grad_norm": 25.58623695373535, "learning_rate": 7.648809523809523e-06, "loss": 46.0706, "step": 1243 }, { "epoch": 29.62089552238806, "grad_norm": 27.997203826904297, "learning_rate": 7.642857142857143e-06, "loss": 47.2368, "step": 1244 }, { "epoch": 29.644776119402984, "grad_norm": 31.361181259155273, "learning_rate": 7.636904761904763e-06, "loss": 46.8056, "step": 1245 }, { "epoch": 29.66865671641791, "grad_norm": 29.266433715820312, "learning_rate": 7.630952380952381e-06, "loss": 45.323, "step": 1246 }, { "epoch": 29.692537313432837, "grad_norm": 24.066415786743164, "learning_rate": 7.625e-06, "loss": 46.9221, "step": 1247 }, { "epoch": 29.71641791044776, "grad_norm": 25.790491104125977, "learning_rate": 7.61904761904762e-06, "loss": 45.7051, "step": 1248 }, { "epoch": 29.740298507462686, "grad_norm": 24.202716827392578, "learning_rate": 7.6130952380952386e-06, "loss": 47.1067, "step": 1249 }, { "epoch": 29.764179104477613, "grad_norm": 27.302003860473633, "learning_rate": 7.6071428571428575e-06, "loss": 46.6039, "step": 1250 }, { "epoch": 29.788059701492536, "grad_norm": 22.75196075439453, "learning_rate": 7.6011904761904765e-06, "loss": 46.3265, "step": 1251 }, { "epoch": 29.811940298507462, "grad_norm": 30.963153839111328, "learning_rate": 7.595238095238095e-06, "loss": 46.3283, "step": 1252 }, { "epoch": 29.83582089552239, "grad_norm": 21.538162231445312, "learning_rate": 7.589285714285714e-06, "loss": 46.5345, "step": 1253 }, { "epoch": 29.85970149253731, "grad_norm": 28.09955596923828, "learning_rate": 7.583333333333333e-06, "loss": 47.1, "step": 1254 }, { "epoch": 29.883582089552238, "grad_norm": 20.88216781616211, "learning_rate": 7.577380952380953e-06, "loss": 45.3354, "step": 1255 }, { "epoch": 29.907462686567165, "grad_norm": 24.15240478515625, "learning_rate": 7.571428571428572e-06, "loss": 46.299, "step": 1256 }, { "epoch": 29.93134328358209, "grad_norm": 22.839298248291016, "learning_rate": 7.565476190476191e-06, "loss": 46.3436, "step": 1257 }, { "epoch": 29.955223880597014, "grad_norm": 26.582752227783203, "learning_rate": 7.55952380952381e-06, "loss": 45.9107, "step": 1258 }, { "epoch": 29.97910447761194, "grad_norm": 24.98562240600586, "learning_rate": 7.553571428571429e-06, "loss": 46.7134, "step": 1259 }, { "epoch": 30.0, "grad_norm": 23.327436447143555, "learning_rate": 7.547619047619048e-06, "loss": 41.2325, "step": 1260 }, { "epoch": 30.023880597014927, "grad_norm": 20.400623321533203, "learning_rate": 7.541666666666667e-06, "loss": 46.9564, "step": 1261 }, { "epoch": 30.04776119402985, "grad_norm": NaN, "learning_rate": 7.5357142857142865e-06, "loss": 68.2215, "step": 1262 }, { "epoch": 30.071641791044776, "grad_norm": 22.870811462402344, "learning_rate": 7.5357142857142865e-06, "loss": 46.5712, "step": 1263 }, { "epoch": 30.095522388059702, "grad_norm": 24.057098388671875, "learning_rate": 7.5297619047619055e-06, "loss": 46.8943, "step": 1264 }, { "epoch": 30.119402985074625, "grad_norm": 25.820720672607422, "learning_rate": 7.523809523809524e-06, "loss": 46.0747, "step": 1265 }, { "epoch": 30.143283582089552, "grad_norm": 28.460693359375, "learning_rate": 7.517857142857143e-06, "loss": 47.0127, "step": 1266 }, { "epoch": 30.16716417910448, "grad_norm": 21.60432243347168, "learning_rate": 7.511904761904762e-06, "loss": 45.8081, "step": 1267 }, { "epoch": 30.1910447761194, "grad_norm": 29.013648986816406, "learning_rate": 7.505952380952381e-06, "loss": 46.6712, "step": 1268 }, { "epoch": 30.214925373134328, "grad_norm": 24.865493774414062, "learning_rate": 7.500000000000001e-06, "loss": 46.6816, "step": 1269 }, { "epoch": 30.238805970149254, "grad_norm": 23.676206588745117, "learning_rate": 7.49404761904762e-06, "loss": 46.2663, "step": 1270 }, { "epoch": 30.262686567164177, "grad_norm": 27.889135360717773, "learning_rate": 7.488095238095239e-06, "loss": 45.7052, "step": 1271 }, { "epoch": 30.286567164179104, "grad_norm": 29.024211883544922, "learning_rate": 7.482142857142858e-06, "loss": 45.5005, "step": 1272 }, { "epoch": 30.31044776119403, "grad_norm": 25.8428955078125, "learning_rate": 7.476190476190477e-06, "loss": 46.788, "step": 1273 }, { "epoch": 30.334328358208957, "grad_norm": 26.765539169311523, "learning_rate": 7.470238095238096e-06, "loss": 46.454, "step": 1274 }, { "epoch": 30.35820895522388, "grad_norm": 34.80079650878906, "learning_rate": 7.464285714285715e-06, "loss": 47.6929, "step": 1275 }, { "epoch": 30.382089552238806, "grad_norm": 25.589618682861328, "learning_rate": 7.4583333333333345e-06, "loss": 46.0104, "step": 1276 }, { "epoch": 30.405970149253733, "grad_norm": 27.0733699798584, "learning_rate": 7.4523809523809534e-06, "loss": 45.4742, "step": 1277 }, { "epoch": 30.429850746268656, "grad_norm": 26.662338256835938, "learning_rate": 7.446428571428572e-06, "loss": 46.8066, "step": 1278 }, { "epoch": 30.453731343283582, "grad_norm": 28.389951705932617, "learning_rate": 7.440476190476191e-06, "loss": 46.9716, "step": 1279 }, { "epoch": 30.47761194029851, "grad_norm": NaN, "learning_rate": 7.43452380952381e-06, "loss": 52.2915, "step": 1280 }, { "epoch": 30.501492537313432, "grad_norm": 26.77708625793457, "learning_rate": 7.43452380952381e-06, "loss": 44.919, "step": 1281 }, { "epoch": 30.52537313432836, "grad_norm": 25.423444747924805, "learning_rate": 7.428571428571429e-06, "loss": 46.5057, "step": 1282 }, { "epoch": 30.549253731343285, "grad_norm": 24.04167366027832, "learning_rate": 7.422619047619048e-06, "loss": 46.3685, "step": 1283 }, { "epoch": 30.573134328358208, "grad_norm": 23.51607894897461, "learning_rate": 7.416666666666668e-06, "loss": 45.9694, "step": 1284 }, { "epoch": 30.597014925373134, "grad_norm": 26.216157913208008, "learning_rate": 7.410714285714287e-06, "loss": 47.2582, "step": 1285 }, { "epoch": 30.62089552238806, "grad_norm": 24.339780807495117, "learning_rate": 7.404761904761906e-06, "loss": 44.8052, "step": 1286 }, { "epoch": 30.644776119402984, "grad_norm": 19.203577041625977, "learning_rate": 7.398809523809525e-06, "loss": 47.0301, "step": 1287 }, { "epoch": 30.66865671641791, "grad_norm": 22.252805709838867, "learning_rate": 7.392857142857144e-06, "loss": 45.5993, "step": 1288 }, { "epoch": 30.692537313432837, "grad_norm": 25.316205978393555, "learning_rate": 7.386904761904763e-06, "loss": 46.1157, "step": 1289 }, { "epoch": 30.71641791044776, "grad_norm": 18.311643600463867, "learning_rate": 7.380952380952382e-06, "loss": 46.5986, "step": 1290 }, { "epoch": 30.740298507462686, "grad_norm": 31.84505271911621, "learning_rate": 7.375000000000001e-06, "loss": 46.9177, "step": 1291 }, { "epoch": 30.764179104477613, "grad_norm": 26.221525192260742, "learning_rate": 7.36904761904762e-06, "loss": 47.0897, "step": 1292 }, { "epoch": 30.788059701492536, "grad_norm": 27.029104232788086, "learning_rate": 7.363095238095239e-06, "loss": 45.3724, "step": 1293 }, { "epoch": 30.811940298507462, "grad_norm": 33.51012420654297, "learning_rate": 7.357142857142858e-06, "loss": 46.7046, "step": 1294 }, { "epoch": 30.83582089552239, "grad_norm": 26.42972183227539, "learning_rate": 7.351190476190477e-06, "loss": 46.7606, "step": 1295 }, { "epoch": 30.85970149253731, "grad_norm": 30.91115951538086, "learning_rate": 7.345238095238096e-06, "loss": 47.5485, "step": 1296 }, { "epoch": 30.883582089552238, "grad_norm": 28.296560287475586, "learning_rate": 7.339285714285714e-06, "loss": 46.4997, "step": 1297 }, { "epoch": 30.907462686567165, "grad_norm": 32.054561614990234, "learning_rate": 7.333333333333333e-06, "loss": 46.4953, "step": 1298 }, { "epoch": 30.93134328358209, "grad_norm": 31.635595321655273, "learning_rate": 7.327380952380952e-06, "loss": 46.5325, "step": 1299 }, { "epoch": 30.955223880597014, "grad_norm": 25.557523727416992, "learning_rate": 7.321428571428572e-06, "loss": 45.56, "step": 1300 }, { "epoch": 30.97910447761194, "grad_norm": 30.01810073852539, "learning_rate": 7.315476190476191e-06, "loss": 46.6149, "step": 1301 }, { "epoch": 31.0, "grad_norm": 24.6826114654541, "learning_rate": 7.30952380952381e-06, "loss": 40.8651, "step": 1302 }, { "epoch": 31.023880597014927, "grad_norm": 24.378164291381836, "learning_rate": 7.303571428571429e-06, "loss": 46.0721, "step": 1303 }, { "epoch": 31.04776119402985, "grad_norm": 20.247482299804688, "learning_rate": 7.297619047619048e-06, "loss": 45.8819, "step": 1304 }, { "epoch": 31.071641791044776, "grad_norm": 25.636112213134766, "learning_rate": 7.291666666666667e-06, "loss": 47.1987, "step": 1305 }, { "epoch": 31.095522388059702, "grad_norm": 30.428096771240234, "learning_rate": 7.285714285714286e-06, "loss": 46.6961, "step": 1306 }, { "epoch": 31.119402985074625, "grad_norm": 21.404991149902344, "learning_rate": 7.279761904761905e-06, "loss": 46.6841, "step": 1307 }, { "epoch": 31.143283582089552, "grad_norm": 31.655052185058594, "learning_rate": 7.273809523809524e-06, "loss": 47.7781, "step": 1308 }, { "epoch": 31.16716417910448, "grad_norm": 24.327327728271484, "learning_rate": 7.267857142857143e-06, "loss": 46.002, "step": 1309 }, { "epoch": 31.1910447761194, "grad_norm": 26.230745315551758, "learning_rate": 7.261904761904762e-06, "loss": 47.3903, "step": 1310 }, { "epoch": 31.214925373134328, "grad_norm": 27.337961196899414, "learning_rate": 7.255952380952381e-06, "loss": 46.0999, "step": 1311 }, { "epoch": 31.238805970149254, "grad_norm": 35.14864730834961, "learning_rate": 7.25e-06, "loss": 46.5187, "step": 1312 }, { "epoch": 31.262686567164177, "grad_norm": 26.60109519958496, "learning_rate": 7.24404761904762e-06, "loss": 44.5864, "step": 1313 }, { "epoch": 31.286567164179104, "grad_norm": 33.15165710449219, "learning_rate": 7.238095238095239e-06, "loss": 46.4779, "step": 1314 }, { "epoch": 31.31044776119403, "grad_norm": 26.3510684967041, "learning_rate": 7.232142857142858e-06, "loss": 47.0845, "step": 1315 }, { "epoch": 31.334328358208957, "grad_norm": 47.12569046020508, "learning_rate": 7.226190476190477e-06, "loss": 47.2947, "step": 1316 }, { "epoch": 31.35820895522388, "grad_norm": 40.15263748168945, "learning_rate": 7.220238095238096e-06, "loss": 45.8788, "step": 1317 }, { "epoch": 31.382089552238806, "grad_norm": 36.59072494506836, "learning_rate": 7.2142857142857145e-06, "loss": 45.991, "step": 1318 }, { "epoch": 31.405970149253733, "grad_norm": 36.895408630371094, "learning_rate": 7.2083333333333335e-06, "loss": 46.197, "step": 1319 }, { "epoch": 31.429850746268656, "grad_norm": NaN, "learning_rate": 7.202380952380953e-06, "loss": 38.9024, "step": 1320 }, { "epoch": 31.453731343283582, "grad_norm": 27.446247100830078, "learning_rate": 7.202380952380953e-06, "loss": 45.5293, "step": 1321 }, { "epoch": 31.47761194029851, "grad_norm": 27.48939323425293, "learning_rate": 7.196428571428572e-06, "loss": 46.8754, "step": 1322 }, { "epoch": 31.501492537313432, "grad_norm": 22.736833572387695, "learning_rate": 7.190476190476191e-06, "loss": 44.4905, "step": 1323 }, { "epoch": 31.52537313432836, "grad_norm": 23.413612365722656, "learning_rate": 7.18452380952381e-06, "loss": 47.0714, "step": 1324 }, { "epoch": 31.549253731343285, "grad_norm": 29.154848098754883, "learning_rate": 7.178571428571429e-06, "loss": 46.393, "step": 1325 }, { "epoch": 31.573134328358208, "grad_norm": 28.130638122558594, "learning_rate": 7.172619047619048e-06, "loss": 46.1857, "step": 1326 }, { "epoch": 31.597014925373134, "grad_norm": 19.745920181274414, "learning_rate": 7.166666666666667e-06, "loss": 45.2873, "step": 1327 }, { "epoch": 31.62089552238806, "grad_norm": 27.630279541015625, "learning_rate": 7.160714285714287e-06, "loss": 46.5475, "step": 1328 }, { "epoch": 31.644776119402984, "grad_norm": 20.568862915039062, "learning_rate": 7.154761904761906e-06, "loss": 46.231, "step": 1329 }, { "epoch": 31.66865671641791, "grad_norm": 17.769695281982422, "learning_rate": 7.148809523809525e-06, "loss": 46.8431, "step": 1330 }, { "epoch": 31.692537313432837, "grad_norm": 29.941057205200195, "learning_rate": 7.1428571428571436e-06, "loss": 44.842, "step": 1331 }, { "epoch": 31.71641791044776, "grad_norm": 21.054975509643555, "learning_rate": 7.1369047619047625e-06, "loss": 45.1147, "step": 1332 }, { "epoch": 31.740298507462686, "grad_norm": 23.80388069152832, "learning_rate": 7.1309523809523814e-06, "loss": 46.1839, "step": 1333 }, { "epoch": 31.764179104477613, "grad_norm": 30.561933517456055, "learning_rate": 7.125e-06, "loss": 46.2703, "step": 1334 }, { "epoch": 31.788059701492536, "grad_norm": 23.752151489257812, "learning_rate": 7.11904761904762e-06, "loss": 46.7347, "step": 1335 }, { "epoch": 31.811940298507462, "grad_norm": 32.00548553466797, "learning_rate": 7.113095238095239e-06, "loss": 46.1236, "step": 1336 }, { "epoch": 31.83582089552239, "grad_norm": 26.685504913330078, "learning_rate": 7.107142857142858e-06, "loss": 47.8881, "step": 1337 }, { "epoch": 31.85970149253731, "grad_norm": 26.5799503326416, "learning_rate": 7.101190476190477e-06, "loss": 46.1187, "step": 1338 }, { "epoch": 31.883582089552238, "grad_norm": 28.78062629699707, "learning_rate": 7.095238095238096e-06, "loss": 46.8058, "step": 1339 }, { "epoch": 31.907462686567165, "grad_norm": 26.98428726196289, "learning_rate": 7.089285714285715e-06, "loss": 46.3602, "step": 1340 }, { "epoch": 31.93134328358209, "grad_norm": 32.5291633605957, "learning_rate": 7.083333333333335e-06, "loss": 46.464, "step": 1341 }, { "epoch": 31.955223880597014, "grad_norm": 25.088685989379883, "learning_rate": 7.077380952380954e-06, "loss": 47.0542, "step": 1342 }, { "epoch": 31.97910447761194, "grad_norm": 32.58052444458008, "learning_rate": 7.0714285714285726e-06, "loss": 46.3364, "step": 1343 }, { "epoch": 32.0, "grad_norm": 22.65249252319336, "learning_rate": 7.0654761904761915e-06, "loss": 39.8201, "step": 1344 }, { "epoch": 32.02388059701492, "grad_norm": 27.03556251525879, "learning_rate": 7.0595238095238105e-06, "loss": 47.7819, "step": 1345 }, { "epoch": 32.04776119402985, "grad_norm": 25.712047576904297, "learning_rate": 7.053571428571429e-06, "loss": 46.1116, "step": 1346 }, { "epoch": 32.071641791044776, "grad_norm": 21.99336051940918, "learning_rate": 7.047619047619048e-06, "loss": 46.3745, "step": 1347 }, { "epoch": 32.0955223880597, "grad_norm": 28.53151512145996, "learning_rate": 7.041666666666668e-06, "loss": 46.5998, "step": 1348 }, { "epoch": 32.11940298507463, "grad_norm": 20.151912689208984, "learning_rate": 7.035714285714287e-06, "loss": 45.4197, "step": 1349 }, { "epoch": 32.14328358208955, "grad_norm": 21.491193771362305, "learning_rate": 7.029761904761905e-06, "loss": 46.0246, "step": 1350 }, { "epoch": 32.167164179104475, "grad_norm": 20.057588577270508, "learning_rate": 7.023809523809524e-06, "loss": 46.2149, "step": 1351 }, { "epoch": 32.191044776119405, "grad_norm": 16.675336837768555, "learning_rate": 7.017857142857143e-06, "loss": 46.5231, "step": 1352 }, { "epoch": 32.21492537313433, "grad_norm": 22.007305145263672, "learning_rate": 7.011904761904762e-06, "loss": 44.8665, "step": 1353 }, { "epoch": 32.23880597014925, "grad_norm": 22.947837829589844, "learning_rate": 7.005952380952381e-06, "loss": 45.0394, "step": 1354 }, { "epoch": 32.26268656716418, "grad_norm": 25.444522857666016, "learning_rate": 7e-06, "loss": 46.0367, "step": 1355 }, { "epoch": 32.286567164179104, "grad_norm": 22.319833755493164, "learning_rate": 6.994047619047619e-06, "loss": 47.0455, "step": 1356 }, { "epoch": 32.31044776119403, "grad_norm": 20.41710090637207, "learning_rate": 6.988095238095239e-06, "loss": 45.5119, "step": 1357 }, { "epoch": 32.33432835820896, "grad_norm": 29.03120994567871, "learning_rate": 6.9821428571428576e-06, "loss": 45.1962, "step": 1358 }, { "epoch": 32.35820895522388, "grad_norm": 22.10372543334961, "learning_rate": 6.9761904761904765e-06, "loss": 47.379, "step": 1359 }, { "epoch": 32.3820895522388, "grad_norm": 29.49492073059082, "learning_rate": 6.9702380952380955e-06, "loss": 48.2375, "step": 1360 }, { "epoch": 32.40597014925373, "grad_norm": 26.655149459838867, "learning_rate": 6.964285714285714e-06, "loss": 45.8468, "step": 1361 }, { "epoch": 32.429850746268656, "grad_norm": 27.994979858398438, "learning_rate": 6.958333333333333e-06, "loss": 46.4883, "step": 1362 }, { "epoch": 32.45373134328358, "grad_norm": 25.787900924682617, "learning_rate": 6.952380952380952e-06, "loss": 47.0159, "step": 1363 }, { "epoch": 32.47761194029851, "grad_norm": 29.429485321044922, "learning_rate": 6.946428571428572e-06, "loss": 45.182, "step": 1364 }, { "epoch": 32.50149253731343, "grad_norm": 21.825122833251953, "learning_rate": 6.940476190476191e-06, "loss": 47.4224, "step": 1365 }, { "epoch": 32.525373134328355, "grad_norm": 26.284622192382812, "learning_rate": 6.93452380952381e-06, "loss": 45.7025, "step": 1366 }, { "epoch": 32.549253731343285, "grad_norm": 21.384979248046875, "learning_rate": 6.928571428571429e-06, "loss": 45.6267, "step": 1367 }, { "epoch": 32.57313432835821, "grad_norm": 21.64442253112793, "learning_rate": 6.922619047619048e-06, "loss": 46.8577, "step": 1368 }, { "epoch": 32.59701492537313, "grad_norm": 22.377302169799805, "learning_rate": 6.916666666666667e-06, "loss": 46.5022, "step": 1369 }, { "epoch": 32.62089552238806, "grad_norm": 18.1933536529541, "learning_rate": 6.910714285714286e-06, "loss": 46.7098, "step": 1370 }, { "epoch": 32.644776119402984, "grad_norm": NaN, "learning_rate": 6.9047619047619055e-06, "loss": 59.6159, "step": 1371 }, { "epoch": 32.668656716417914, "grad_norm": 20.35690689086914, "learning_rate": 6.9047619047619055e-06, "loss": 47.4638, "step": 1372 }, { "epoch": 32.69253731343284, "grad_norm": 29.140775680541992, "learning_rate": 6.8988095238095245e-06, "loss": 46.242, "step": 1373 }, { "epoch": 32.71641791044776, "grad_norm": 25.27906608581543, "learning_rate": 6.892857142857143e-06, "loss": 45.7122, "step": 1374 }, { "epoch": 32.74029850746269, "grad_norm": 19.000076293945312, "learning_rate": 6.886904761904762e-06, "loss": 46.4813, "step": 1375 }, { "epoch": 32.76417910447761, "grad_norm": 25.048797607421875, "learning_rate": 6.880952380952381e-06, "loss": 45.5569, "step": 1376 }, { "epoch": 32.788059701492536, "grad_norm": 24.078060150146484, "learning_rate": 6.875e-06, "loss": 45.9708, "step": 1377 }, { "epoch": 32.811940298507466, "grad_norm": 23.822643280029297, "learning_rate": 6.86904761904762e-06, "loss": 47.5914, "step": 1378 }, { "epoch": 32.83582089552239, "grad_norm": 29.267864227294922, "learning_rate": 6.863095238095239e-06, "loss": 45.2741, "step": 1379 }, { "epoch": 32.85970149253731, "grad_norm": 19.477649688720703, "learning_rate": 6.857142857142858e-06, "loss": 46.3849, "step": 1380 }, { "epoch": 32.88358208955224, "grad_norm": 33.31391525268555, "learning_rate": 6.851190476190477e-06, "loss": 44.9609, "step": 1381 }, { "epoch": 32.907462686567165, "grad_norm": 23.064956665039062, "learning_rate": 6.845238095238096e-06, "loss": 45.8295, "step": 1382 }, { "epoch": 32.93134328358209, "grad_norm": 30.366653442382812, "learning_rate": 6.839285714285715e-06, "loss": 44.3142, "step": 1383 }, { "epoch": 32.95522388059702, "grad_norm": 25.059572219848633, "learning_rate": 6.833333333333334e-06, "loss": 46.5768, "step": 1384 }, { "epoch": 32.97910447761194, "grad_norm": 23.186697006225586, "learning_rate": 6.8273809523809535e-06, "loss": 45.185, "step": 1385 }, { "epoch": 33.0, "grad_norm": 21.550168991088867, "learning_rate": 6.8214285714285724e-06, "loss": 39.1732, "step": 1386 }, { "epoch": 33.02388059701492, "grad_norm": 22.417282104492188, "learning_rate": 6.815476190476191e-06, "loss": 47.6667, "step": 1387 }, { "epoch": 33.04776119402985, "grad_norm": 26.805702209472656, "learning_rate": 6.80952380952381e-06, "loss": 46.4091, "step": 1388 }, { "epoch": 33.071641791044776, "grad_norm": 23.723695755004883, "learning_rate": 6.803571428571429e-06, "loss": 46.3798, "step": 1389 }, { "epoch": 33.0955223880597, "grad_norm": 30.029897689819336, "learning_rate": 6.797619047619048e-06, "loss": 45.9736, "step": 1390 }, { "epoch": 33.11940298507463, "grad_norm": 19.387653350830078, "learning_rate": 6.791666666666667e-06, "loss": 45.1998, "step": 1391 }, { "epoch": 33.14328358208955, "grad_norm": 33.68477249145508, "learning_rate": 6.785714285714287e-06, "loss": 45.4435, "step": 1392 }, { "epoch": 33.167164179104475, "grad_norm": 26.001699447631836, "learning_rate": 6.779761904761906e-06, "loss": 45.6725, "step": 1393 }, { "epoch": 33.191044776119405, "grad_norm": 34.19535827636719, "learning_rate": 6.773809523809525e-06, "loss": 46.6387, "step": 1394 }, { "epoch": 33.21492537313433, "grad_norm": 24.243515014648438, "learning_rate": 6.767857142857144e-06, "loss": 46.4235, "step": 1395 }, { "epoch": 33.23880597014925, "grad_norm": 33.013675689697266, "learning_rate": 6.761904761904763e-06, "loss": 46.7151, "step": 1396 }, { "epoch": 33.26268656716418, "grad_norm": 30.15135955810547, "learning_rate": 6.755952380952382e-06, "loss": 46.3002, "step": 1397 }, { "epoch": 33.286567164179104, "grad_norm": 31.58100128173828, "learning_rate": 6.750000000000001e-06, "loss": 46.6084, "step": 1398 }, { "epoch": 33.31044776119403, "grad_norm": 26.23592185974121, "learning_rate": 6.74404761904762e-06, "loss": 45.5745, "step": 1399 }, { "epoch": 33.33432835820896, "grad_norm": 32.273311614990234, "learning_rate": 6.738095238095239e-06, "loss": 45.1131, "step": 1400 }, { "epoch": 33.35820895522388, "grad_norm": 29.7532958984375, "learning_rate": 6.732142857142858e-06, "loss": 45.9739, "step": 1401 }, { "epoch": 33.3820895522388, "grad_norm": 32.648704528808594, "learning_rate": 6.726190476190477e-06, "loss": 46.6293, "step": 1402 }, { "epoch": 33.40597014925373, "grad_norm": 26.455778121948242, "learning_rate": 6.720238095238096e-06, "loss": 46.5187, "step": 1403 }, { "epoch": 33.429850746268656, "grad_norm": 30.5809326171875, "learning_rate": 6.714285714285714e-06, "loss": 46.5477, "step": 1404 }, { "epoch": 33.45373134328358, "grad_norm": 29.604442596435547, "learning_rate": 6.708333333333333e-06, "loss": 45.462, "step": 1405 }, { "epoch": 33.47761194029851, "grad_norm": 36.19733428955078, "learning_rate": 6.702380952380952e-06, "loss": 46.7046, "step": 1406 }, { "epoch": 33.50149253731343, "grad_norm": 37.733619689941406, "learning_rate": 6.696428571428571e-06, "loss": 46.2156, "step": 1407 }, { "epoch": 33.525373134328355, "grad_norm": 26.49405288696289, "learning_rate": 6.690476190476191e-06, "loss": 45.373, "step": 1408 }, { "epoch": 33.549253731343285, "grad_norm": 30.09432601928711, "learning_rate": 6.68452380952381e-06, "loss": 46.3868, "step": 1409 }, { "epoch": 33.57313432835821, "grad_norm": 25.85702896118164, "learning_rate": 6.678571428571429e-06, "loss": 45.805, "step": 1410 }, { "epoch": 33.59701492537313, "grad_norm": 28.564380645751953, "learning_rate": 6.672619047619048e-06, "loss": 46.4158, "step": 1411 }, { "epoch": 33.62089552238806, "grad_norm": 19.878551483154297, "learning_rate": 6.666666666666667e-06, "loss": 46.5922, "step": 1412 }, { "epoch": 33.644776119402984, "grad_norm": 22.83441734313965, "learning_rate": 6.660714285714286e-06, "loss": 45.1216, "step": 1413 }, { "epoch": 33.668656716417914, "grad_norm": 31.372957229614258, "learning_rate": 6.654761904761905e-06, "loss": 47.111, "step": 1414 }, { "epoch": 33.69253731343284, "grad_norm": 23.98666763305664, "learning_rate": 6.648809523809524e-06, "loss": 47.1762, "step": 1415 }, { "epoch": 33.71641791044776, "grad_norm": 27.895401000976562, "learning_rate": 6.642857142857143e-06, "loss": 45.6151, "step": 1416 }, { "epoch": 33.74029850746269, "grad_norm": 21.776100158691406, "learning_rate": 6.636904761904762e-06, "loss": 45.7198, "step": 1417 }, { "epoch": 33.76417910447761, "grad_norm": 30.373878479003906, "learning_rate": 6.630952380952381e-06, "loss": 45.2212, "step": 1418 }, { "epoch": 33.788059701492536, "grad_norm": 26.604324340820312, "learning_rate": 6.625e-06, "loss": 45.2001, "step": 1419 }, { "epoch": 33.811940298507466, "grad_norm": 29.38104248046875, "learning_rate": 6.619047619047619e-06, "loss": 46.711, "step": 1420 }, { "epoch": 33.83582089552239, "grad_norm": 24.36806869506836, "learning_rate": 6.613095238095239e-06, "loss": 46.3608, "step": 1421 }, { "epoch": 33.85970149253731, "grad_norm": 33.40534210205078, "learning_rate": 6.607142857142858e-06, "loss": 45.5189, "step": 1422 }, { "epoch": 33.88358208955224, "grad_norm": 25.91522789001465, "learning_rate": 6.601190476190477e-06, "loss": 47.3604, "step": 1423 }, { "epoch": 33.907462686567165, "grad_norm": 25.26549530029297, "learning_rate": 6.595238095238096e-06, "loss": 46.483, "step": 1424 }, { "epoch": 33.93134328358209, "grad_norm": 26.101816177368164, "learning_rate": 6.589285714285715e-06, "loss": 45.7998, "step": 1425 }, { "epoch": 33.95522388059702, "grad_norm": 27.942903518676758, "learning_rate": 6.5833333333333335e-06, "loss": 46.4593, "step": 1426 }, { "epoch": 33.97910447761194, "grad_norm": 21.551429748535156, "learning_rate": 6.5773809523809525e-06, "loss": 45.458, "step": 1427 }, { "epoch": 34.0, "grad_norm": 32.26907730102539, "learning_rate": 6.571428571428572e-06, "loss": 38.5718, "step": 1428 }, { "epoch": 34.02388059701492, "grad_norm": 32.16934585571289, "learning_rate": 6.565476190476191e-06, "loss": 45.5812, "step": 1429 }, { "epoch": 34.04776119402985, "grad_norm": 19.646459579467773, "learning_rate": 6.55952380952381e-06, "loss": 44.9032, "step": 1430 }, { "epoch": 34.071641791044776, "grad_norm": 28.886430740356445, "learning_rate": 6.553571428571429e-06, "loss": 45.4187, "step": 1431 }, { "epoch": 34.0955223880597, "grad_norm": 22.722471237182617, "learning_rate": 6.547619047619048e-06, "loss": 45.468, "step": 1432 }, { "epoch": 34.11940298507463, "grad_norm": 25.334766387939453, "learning_rate": 6.541666666666667e-06, "loss": 47.3534, "step": 1433 }, { "epoch": 34.14328358208955, "grad_norm": 28.49740982055664, "learning_rate": 6.535714285714286e-06, "loss": 47.4733, "step": 1434 }, { "epoch": 34.167164179104475, "grad_norm": 27.773820877075195, "learning_rate": 6.529761904761906e-06, "loss": 45.3215, "step": 1435 }, { "epoch": 34.191044776119405, "grad_norm": 24.25234031677246, "learning_rate": 6.523809523809525e-06, "loss": 46.0011, "step": 1436 }, { "epoch": 34.21492537313433, "grad_norm": 28.666475296020508, "learning_rate": 6.517857142857144e-06, "loss": 45.9091, "step": 1437 }, { "epoch": 34.23880597014925, "grad_norm": 24.367712020874023, "learning_rate": 6.5119047619047626e-06, "loss": 46.5004, "step": 1438 }, { "epoch": 34.26268656716418, "grad_norm": 23.11983299255371, "learning_rate": 6.5059523809523815e-06, "loss": 47.3335, "step": 1439 }, { "epoch": 34.286567164179104, "grad_norm": 20.672304153442383, "learning_rate": 6.5000000000000004e-06, "loss": 47.1491, "step": 1440 }, { "epoch": 34.31044776119403, "grad_norm": 23.815290451049805, "learning_rate": 6.49404761904762e-06, "loss": 46.7084, "step": 1441 }, { "epoch": 34.33432835820896, "grad_norm": 20.582489013671875, "learning_rate": 6.488095238095239e-06, "loss": 46.9707, "step": 1442 }, { "epoch": 34.35820895522388, "grad_norm": 18.315673828125, "learning_rate": 6.482142857142858e-06, "loss": 47.5359, "step": 1443 }, { "epoch": 34.3820895522388, "grad_norm": 24.396499633789062, "learning_rate": 6.476190476190477e-06, "loss": 46.052, "step": 1444 }, { "epoch": 34.40597014925373, "grad_norm": 21.200523376464844, "learning_rate": 6.470238095238096e-06, "loss": 46.5843, "step": 1445 }, { "epoch": 34.429850746268656, "grad_norm": 17.59020233154297, "learning_rate": 6.464285714285715e-06, "loss": 46.0017, "step": 1446 }, { "epoch": 34.45373134328358, "grad_norm": 21.810382843017578, "learning_rate": 6.458333333333334e-06, "loss": 46.4232, "step": 1447 }, { "epoch": 34.47761194029851, "grad_norm": 27.78464126586914, "learning_rate": 6.452380952380954e-06, "loss": 46.0973, "step": 1448 }, { "epoch": 34.50149253731343, "grad_norm": 29.360275268554688, "learning_rate": 6.446428571428573e-06, "loss": 45.4821, "step": 1449 }, { "epoch": 34.525373134328355, "grad_norm": 26.914587020874023, "learning_rate": 6.4404761904761916e-06, "loss": 45.2982, "step": 1450 }, { "epoch": 34.549253731343285, "grad_norm": 22.19925880432129, "learning_rate": 6.4345238095238105e-06, "loss": 46.6693, "step": 1451 }, { "epoch": 34.57313432835821, "grad_norm": 25.39541244506836, "learning_rate": 6.4285714285714295e-06, "loss": 45.8936, "step": 1452 }, { "epoch": 34.59701492537313, "grad_norm": 20.633222579956055, "learning_rate": 6.422619047619048e-06, "loss": 44.6061, "step": 1453 }, { "epoch": 34.62089552238806, "grad_norm": 22.513790130615234, "learning_rate": 6.416666666666667e-06, "loss": 45.5503, "step": 1454 }, { "epoch": 34.644776119402984, "grad_norm": 25.715484619140625, "learning_rate": 6.410714285714287e-06, "loss": 45.7485, "step": 1455 }, { "epoch": 34.668656716417914, "grad_norm": 21.964609146118164, "learning_rate": 6.404761904761904e-06, "loss": 46.3223, "step": 1456 }, { "epoch": 34.69253731343284, "grad_norm": 20.32435417175293, "learning_rate": 6.398809523809524e-06, "loss": 45.1507, "step": 1457 }, { "epoch": 34.71641791044776, "grad_norm": 24.32924461364746, "learning_rate": 6.392857142857143e-06, "loss": 45.8221, "step": 1458 }, { "epoch": 34.74029850746269, "grad_norm": 19.200895309448242, "learning_rate": 6.386904761904762e-06, "loss": 45.0915, "step": 1459 }, { "epoch": 34.76417910447761, "grad_norm": 24.436569213867188, "learning_rate": 6.380952380952381e-06, "loss": 45.5892, "step": 1460 }, { "epoch": 34.788059701492536, "grad_norm": 24.381568908691406, "learning_rate": 6.375e-06, "loss": 45.5295, "step": 1461 }, { "epoch": 34.811940298507466, "grad_norm": 19.64159393310547, "learning_rate": 6.369047619047619e-06, "loss": 46.244, "step": 1462 }, { "epoch": 34.83582089552239, "grad_norm": 27.420351028442383, "learning_rate": 6.363095238095238e-06, "loss": 45.9723, "step": 1463 }, { "epoch": 34.85970149253731, "grad_norm": 18.136165618896484, "learning_rate": 6.357142857142858e-06, "loss": 45.5106, "step": 1464 }, { "epoch": 34.88358208955224, "grad_norm": 21.70622444152832, "learning_rate": 6.3511904761904766e-06, "loss": 46.4965, "step": 1465 }, { "epoch": 34.907462686567165, "grad_norm": 23.573131561279297, "learning_rate": 6.3452380952380955e-06, "loss": 46.0698, "step": 1466 }, { "epoch": 34.93134328358209, "grad_norm": 21.20003890991211, "learning_rate": 6.3392857142857145e-06, "loss": 45.6992, "step": 1467 }, { "epoch": 34.95522388059702, "grad_norm": 23.745859146118164, "learning_rate": 6.333333333333333e-06, "loss": 45.8431, "step": 1468 }, { "epoch": 34.97910447761194, "grad_norm": 21.26241683959961, "learning_rate": 6.327380952380952e-06, "loss": 45.6577, "step": 1469 }, { "epoch": 35.0, "grad_norm": 22.033447265625, "learning_rate": 6.321428571428571e-06, "loss": 39.8491, "step": 1470 }, { "epoch": 35.02388059701492, "grad_norm": NaN, "learning_rate": 6.315476190476191e-06, "loss": 68.4405, "step": 1471 }, { "epoch": 35.04776119402985, "grad_norm": 22.06501007080078, "learning_rate": 6.315476190476191e-06, "loss": 44.971, "step": 1472 }, { "epoch": 35.071641791044776, "grad_norm": 23.923011779785156, "learning_rate": 6.30952380952381e-06, "loss": 45.4865, "step": 1473 }, { "epoch": 35.0955223880597, "grad_norm": 18.272428512573242, "learning_rate": 6.303571428571429e-06, "loss": 46.6551, "step": 1474 }, { "epoch": 35.11940298507463, "grad_norm": 23.046764373779297, "learning_rate": 6.297619047619048e-06, "loss": 46.3486, "step": 1475 }, { "epoch": 35.14328358208955, "grad_norm": 23.790733337402344, "learning_rate": 6.291666666666667e-06, "loss": 46.7032, "step": 1476 }, { "epoch": 35.167164179104475, "grad_norm": 23.891183853149414, "learning_rate": 6.285714285714286e-06, "loss": 44.9916, "step": 1477 }, { "epoch": 35.191044776119405, "grad_norm": 25.107316970825195, "learning_rate": 6.279761904761906e-06, "loss": 46.2358, "step": 1478 }, { "epoch": 35.21492537313433, "grad_norm": 20.48590660095215, "learning_rate": 6.2738095238095245e-06, "loss": 46.0048, "step": 1479 }, { "epoch": 35.23880597014925, "grad_norm": 25.425119400024414, "learning_rate": 6.2678571428571435e-06, "loss": 44.0941, "step": 1480 }, { "epoch": 35.26268656716418, "grad_norm": 28.264352798461914, "learning_rate": 6.261904761904762e-06, "loss": 46.5301, "step": 1481 }, { "epoch": 35.286567164179104, "grad_norm": 23.869232177734375, "learning_rate": 6.255952380952381e-06, "loss": 45.681, "step": 1482 }, { "epoch": 35.31044776119403, "grad_norm": 28.840408325195312, "learning_rate": 6.25e-06, "loss": 43.7517, "step": 1483 }, { "epoch": 35.33432835820896, "grad_norm": 26.768037796020508, "learning_rate": 6.244047619047619e-06, "loss": 46.1423, "step": 1484 }, { "epoch": 35.35820895522388, "grad_norm": 23.532470703125, "learning_rate": 6.238095238095239e-06, "loss": 45.6669, "step": 1485 }, { "epoch": 35.3820895522388, "grad_norm": 25.94774055480957, "learning_rate": 6.232142857142858e-06, "loss": 45.7672, "step": 1486 }, { "epoch": 35.40597014925373, "grad_norm": 23.215801239013672, "learning_rate": 6.226190476190477e-06, "loss": 45.6991, "step": 1487 }, { "epoch": 35.429850746268656, "grad_norm": 22.13661003112793, "learning_rate": 6.220238095238096e-06, "loss": 44.5214, "step": 1488 }, { "epoch": 35.45373134328358, "grad_norm": 24.596481323242188, "learning_rate": 6.214285714285715e-06, "loss": 46.1515, "step": 1489 }, { "epoch": 35.47761194029851, "grad_norm": 19.416872024536133, "learning_rate": 6.208333333333334e-06, "loss": 45.7596, "step": 1490 }, { "epoch": 35.50149253731343, "grad_norm": 23.993833541870117, "learning_rate": 6.202380952380953e-06, "loss": 46.1668, "step": 1491 }, { "epoch": 35.525373134328355, "grad_norm": 21.481637954711914, "learning_rate": 6.1964285714285725e-06, "loss": 45.1812, "step": 1492 }, { "epoch": 35.549253731343285, "grad_norm": 19.26917839050293, "learning_rate": 6.1904761904761914e-06, "loss": 45.9316, "step": 1493 }, { "epoch": 35.57313432835821, "grad_norm": 22.80115509033203, "learning_rate": 6.18452380952381e-06, "loss": 45.9088, "step": 1494 }, { "epoch": 35.59701492537313, "grad_norm": 21.33648109436035, "learning_rate": 6.178571428571429e-06, "loss": 46.7602, "step": 1495 }, { "epoch": 35.62089552238806, "grad_norm": 28.059947967529297, "learning_rate": 6.172619047619048e-06, "loss": 46.1767, "step": 1496 }, { "epoch": 35.644776119402984, "grad_norm": 21.1577205657959, "learning_rate": 6.166666666666667e-06, "loss": 45.6847, "step": 1497 }, { "epoch": 35.668656716417914, "grad_norm": 23.277509689331055, "learning_rate": 6.160714285714286e-06, "loss": 45.6145, "step": 1498 }, { "epoch": 35.69253731343284, "grad_norm": 16.815677642822266, "learning_rate": 6.154761904761906e-06, "loss": 45.515, "step": 1499 }, { "epoch": 35.71641791044776, "grad_norm": 24.218280792236328, "learning_rate": 6.148809523809525e-06, "loss": 47.6329, "step": 1500 }, { "epoch": 35.74029850746269, "grad_norm": 20.943737030029297, "learning_rate": 6.142857142857144e-06, "loss": 45.7388, "step": 1501 }, { "epoch": 35.76417910447761, "grad_norm": 20.344369888305664, "learning_rate": 6.136904761904763e-06, "loss": 45.9404, "step": 1502 }, { "epoch": 35.788059701492536, "grad_norm": 25.980487823486328, "learning_rate": 6.130952380952382e-06, "loss": 46.6928, "step": 1503 }, { "epoch": 35.811940298507466, "grad_norm": 19.285552978515625, "learning_rate": 6.125000000000001e-06, "loss": 46.4614, "step": 1504 }, { "epoch": 35.83582089552239, "grad_norm": 27.701011657714844, "learning_rate": 6.11904761904762e-06, "loss": 45.258, "step": 1505 }, { "epoch": 35.85970149253731, "grad_norm": 24.963760375976562, "learning_rate": 6.113095238095239e-06, "loss": 47.0721, "step": 1506 }, { "epoch": 35.88358208955224, "grad_norm": 25.08616828918457, "learning_rate": 6.107142857142858e-06, "loss": 45.9668, "step": 1507 }, { "epoch": 35.907462686567165, "grad_norm": 18.00580406188965, "learning_rate": 6.101190476190477e-06, "loss": 46.1049, "step": 1508 }, { "epoch": 35.93134328358209, "grad_norm": 24.686004638671875, "learning_rate": 6.095238095238096e-06, "loss": 46.6996, "step": 1509 }, { "epoch": 35.95522388059702, "grad_norm": 18.304157257080078, "learning_rate": 6.089285714285714e-06, "loss": 46.694, "step": 1510 }, { "epoch": 35.97910447761194, "grad_norm": 23.10132598876953, "learning_rate": 6.083333333333333e-06, "loss": 46.3807, "step": 1511 }, { "epoch": 36.0, "grad_norm": 19.077655792236328, "learning_rate": 6.077380952380952e-06, "loss": 41.1702, "step": 1512 }, { "epoch": 36.02388059701492, "grad_norm": 26.49584197998047, "learning_rate": 6.071428571428571e-06, "loss": 45.382, "step": 1513 }, { "epoch": 36.04776119402985, "grad_norm": 24.438323974609375, "learning_rate": 6.065476190476191e-06, "loss": 45.9433, "step": 1514 }, { "epoch": 36.071641791044776, "grad_norm": 30.8107852935791, "learning_rate": 6.05952380952381e-06, "loss": 45.6688, "step": 1515 }, { "epoch": 36.0955223880597, "grad_norm": 31.754154205322266, "learning_rate": 6.053571428571429e-06, "loss": 45.9768, "step": 1516 }, { "epoch": 36.11940298507463, "grad_norm": 26.034778594970703, "learning_rate": 6.047619047619048e-06, "loss": 46.022, "step": 1517 }, { "epoch": 36.14328358208955, "grad_norm": 31.643035888671875, "learning_rate": 6.041666666666667e-06, "loss": 44.5987, "step": 1518 }, { "epoch": 36.167164179104475, "grad_norm": 24.322874069213867, "learning_rate": 6.035714285714286e-06, "loss": 45.3774, "step": 1519 }, { "epoch": 36.191044776119405, "grad_norm": 29.067466735839844, "learning_rate": 6.029761904761905e-06, "loss": 46.1784, "step": 1520 }, { "epoch": 36.21492537313433, "grad_norm": 30.415788650512695, "learning_rate": 6.023809523809524e-06, "loss": 46.7259, "step": 1521 }, { "epoch": 36.23880597014925, "grad_norm": 19.417943954467773, "learning_rate": 6.017857142857143e-06, "loss": 46.0544, "step": 1522 }, { "epoch": 36.26268656716418, "grad_norm": 27.239500045776367, "learning_rate": 6.011904761904762e-06, "loss": 46.9344, "step": 1523 }, { "epoch": 36.286567164179104, "grad_norm": 27.671018600463867, "learning_rate": 6.005952380952381e-06, "loss": 45.78, "step": 1524 }, { "epoch": 36.31044776119403, "grad_norm": 25.103811264038086, "learning_rate": 6e-06, "loss": 45.7153, "step": 1525 }, { "epoch": 36.33432835820896, "grad_norm": 26.25937843322754, "learning_rate": 5.994047619047619e-06, "loss": 45.3151, "step": 1526 }, { "epoch": 36.35820895522388, "grad_norm": 18.400033950805664, "learning_rate": 5.988095238095238e-06, "loss": 46.5614, "step": 1527 }, { "epoch": 36.3820895522388, "grad_norm": 35.505374908447266, "learning_rate": 5.982142857142858e-06, "loss": 45.8805, "step": 1528 }, { "epoch": 36.40597014925373, "grad_norm": 31.476438522338867, "learning_rate": 5.976190476190477e-06, "loss": 46.189, "step": 1529 }, { "epoch": 36.429850746268656, "grad_norm": 26.192047119140625, "learning_rate": 5.970238095238096e-06, "loss": 45.7026, "step": 1530 }, { "epoch": 36.45373134328358, "grad_norm": 29.712961196899414, "learning_rate": 5.964285714285715e-06, "loss": 44.86, "step": 1531 }, { "epoch": 36.47761194029851, "grad_norm": 28.22374153137207, "learning_rate": 5.958333333333334e-06, "loss": 45.7644, "step": 1532 }, { "epoch": 36.50149253731343, "grad_norm": 23.614940643310547, "learning_rate": 5.9523809523809525e-06, "loss": 45.0373, "step": 1533 }, { "epoch": 36.525373134328355, "grad_norm": 27.78896141052246, "learning_rate": 5.9464285714285715e-06, "loss": 46.9277, "step": 1534 }, { "epoch": 36.549253731343285, "grad_norm": 18.64702606201172, "learning_rate": 5.940476190476191e-06, "loss": 45.277, "step": 1535 }, { "epoch": 36.57313432835821, "grad_norm": 27.2061710357666, "learning_rate": 5.93452380952381e-06, "loss": 46.8394, "step": 1536 }, { "epoch": 36.59701492537313, "grad_norm": 26.296287536621094, "learning_rate": 5.928571428571429e-06, "loss": 44.8519, "step": 1537 }, { "epoch": 36.62089552238806, "grad_norm": 26.594314575195312, "learning_rate": 5.922619047619048e-06, "loss": 45.1743, "step": 1538 }, { "epoch": 36.644776119402984, "grad_norm": 24.076461791992188, "learning_rate": 5.916666666666667e-06, "loss": 45.4145, "step": 1539 }, { "epoch": 36.668656716417914, "grad_norm": 23.31978416442871, "learning_rate": 5.910714285714286e-06, "loss": 45.7526, "step": 1540 }, { "epoch": 36.69253731343284, "grad_norm": 22.630998611450195, "learning_rate": 5.904761904761905e-06, "loss": 46.4197, "step": 1541 }, { "epoch": 36.71641791044776, "grad_norm": 32.66592025756836, "learning_rate": 5.898809523809525e-06, "loss": 45.0123, "step": 1542 }, { "epoch": 36.74029850746269, "grad_norm": 24.478839874267578, "learning_rate": 5.892857142857144e-06, "loss": 46.1418, "step": 1543 }, { "epoch": 36.76417910447761, "grad_norm": 33.325775146484375, "learning_rate": 5.886904761904763e-06, "loss": 45.8228, "step": 1544 }, { "epoch": 36.788059701492536, "grad_norm": 29.264528274536133, "learning_rate": 5.8809523809523816e-06, "loss": 46.1921, "step": 1545 }, { "epoch": 36.811940298507466, "grad_norm": 31.78297233581543, "learning_rate": 5.8750000000000005e-06, "loss": 45.4564, "step": 1546 }, { "epoch": 36.83582089552239, "grad_norm": 27.223127365112305, "learning_rate": 5.8690476190476194e-06, "loss": 45.5277, "step": 1547 }, { "epoch": 36.85970149253731, "grad_norm": 26.29422950744629, "learning_rate": 5.863095238095239e-06, "loss": 46.2285, "step": 1548 }, { "epoch": 36.88358208955224, "grad_norm": 27.933652877807617, "learning_rate": 5.857142857142858e-06, "loss": 46.4441, "step": 1549 }, { "epoch": 36.907462686567165, "grad_norm": 25.306129455566406, "learning_rate": 5.851190476190477e-06, "loss": 45.9724, "step": 1550 }, { "epoch": 36.93134328358209, "grad_norm": 23.481304168701172, "learning_rate": 5.845238095238096e-06, "loss": 46.2544, "step": 1551 }, { "epoch": 36.95522388059702, "grad_norm": 20.86615562438965, "learning_rate": 5.839285714285715e-06, "loss": 47.4502, "step": 1552 }, { "epoch": 36.97910447761194, "grad_norm": 21.519290924072266, "learning_rate": 5.833333333333334e-06, "loss": 45.0165, "step": 1553 }, { "epoch": 37.0, "grad_norm": 22.031705856323242, "learning_rate": 5.827380952380953e-06, "loss": 40.6199, "step": 1554 }, { "epoch": 37.02388059701492, "grad_norm": 29.273820877075195, "learning_rate": 5.821428571428573e-06, "loss": 46.5836, "step": 1555 }, { "epoch": 37.04776119402985, "grad_norm": 24.417945861816406, "learning_rate": 5.815476190476192e-06, "loss": 44.9549, "step": 1556 }, { "epoch": 37.071641791044776, "grad_norm": 24.60706901550293, "learning_rate": 5.8095238095238106e-06, "loss": 44.8607, "step": 1557 }, { "epoch": 37.0955223880597, "grad_norm": 24.76397132873535, "learning_rate": 5.8035714285714295e-06, "loss": 44.9875, "step": 1558 }, { "epoch": 37.11940298507463, "grad_norm": 24.380352020263672, "learning_rate": 5.7976190476190485e-06, "loss": 45.4835, "step": 1559 }, { "epoch": 37.14328358208955, "grad_norm": 19.852746963500977, "learning_rate": 5.791666666666667e-06, "loss": 45.1303, "step": 1560 }, { "epoch": 37.167164179104475, "grad_norm": 23.550888061523438, "learning_rate": 5.785714285714286e-06, "loss": 46.1086, "step": 1561 }, { "epoch": 37.191044776119405, "grad_norm": 24.31315803527832, "learning_rate": 5.7797619047619044e-06, "loss": 45.8181, "step": 1562 }, { "epoch": 37.21492537313433, "grad_norm": 19.324602127075195, "learning_rate": 5.773809523809523e-06, "loss": 44.8606, "step": 1563 }, { "epoch": 37.23880597014925, "grad_norm": 26.747098922729492, "learning_rate": 5.767857142857143e-06, "loss": 45.753, "step": 1564 }, { "epoch": 37.26268656716418, "grad_norm": 22.472572326660156, "learning_rate": 5.761904761904762e-06, "loss": 46.0156, "step": 1565 }, { "epoch": 37.286567164179104, "grad_norm": 20.813426971435547, "learning_rate": 5.755952380952381e-06, "loss": 46.7466, "step": 1566 }, { "epoch": 37.31044776119403, "grad_norm": 27.869413375854492, "learning_rate": 5.75e-06, "loss": 46.287, "step": 1567 }, { "epoch": 37.33432835820896, "grad_norm": 23.257444381713867, "learning_rate": 5.744047619047619e-06, "loss": 45.9862, "step": 1568 }, { "epoch": 37.35820895522388, "grad_norm": 24.715946197509766, "learning_rate": 5.738095238095238e-06, "loss": 47.3128, "step": 1569 }, { "epoch": 37.3820895522388, "grad_norm": 21.670385360717773, "learning_rate": 5.732142857142857e-06, "loss": 46.121, "step": 1570 }, { "epoch": 37.40597014925373, "grad_norm": 24.53063201904297, "learning_rate": 5.726190476190477e-06, "loss": 46.5441, "step": 1571 }, { "epoch": 37.429850746268656, "grad_norm": 19.584630966186523, "learning_rate": 5.7202380952380956e-06, "loss": 46.0683, "step": 1572 }, { "epoch": 37.45373134328358, "grad_norm": 26.179149627685547, "learning_rate": 5.7142857142857145e-06, "loss": 46.3294, "step": 1573 }, { "epoch": 37.47761194029851, "grad_norm": 21.13595199584961, "learning_rate": 5.7083333333333335e-06, "loss": 45.7853, "step": 1574 }, { "epoch": 37.50149253731343, "grad_norm": 28.440006256103516, "learning_rate": 5.702380952380952e-06, "loss": 46.5029, "step": 1575 }, { "epoch": 37.525373134328355, "grad_norm": 27.941879272460938, "learning_rate": 5.696428571428571e-06, "loss": 45.6132, "step": 1576 }, { "epoch": 37.549253731343285, "grad_norm": 25.952688217163086, "learning_rate": 5.690476190476191e-06, "loss": 45.6803, "step": 1577 }, { "epoch": 37.57313432835821, "grad_norm": 23.551633834838867, "learning_rate": 5.68452380952381e-06, "loss": 45.1563, "step": 1578 }, { "epoch": 37.59701492537313, "grad_norm": 23.119415283203125, "learning_rate": 5.678571428571429e-06, "loss": 47.2717, "step": 1579 }, { "epoch": 37.62089552238806, "grad_norm": 27.995214462280273, "learning_rate": 5.672619047619048e-06, "loss": 46.1847, "step": 1580 }, { "epoch": 37.644776119402984, "grad_norm": 28.0698299407959, "learning_rate": 5.666666666666667e-06, "loss": 46.4639, "step": 1581 }, { "epoch": 37.668656716417914, "grad_norm": 23.09457015991211, "learning_rate": 5.660714285714286e-06, "loss": 45.0939, "step": 1582 }, { "epoch": 37.69253731343284, "grad_norm": 25.94692611694336, "learning_rate": 5.654761904761905e-06, "loss": 45.216, "step": 1583 }, { "epoch": 37.71641791044776, "grad_norm": 20.192176818847656, "learning_rate": 5.648809523809525e-06, "loss": 45.7997, "step": 1584 }, { "epoch": 37.74029850746269, "grad_norm": 26.115283966064453, "learning_rate": 5.6428571428571435e-06, "loss": 44.8405, "step": 1585 }, { "epoch": 37.76417910447761, "grad_norm": 24.431346893310547, "learning_rate": 5.6369047619047625e-06, "loss": 46.5067, "step": 1586 }, { "epoch": 37.788059701492536, "grad_norm": 25.838623046875, "learning_rate": 5.630952380952381e-06, "loss": 46.1806, "step": 1587 }, { "epoch": 37.811940298507466, "grad_norm": 20.44222640991211, "learning_rate": 5.625e-06, "loss": 45.7445, "step": 1588 }, { "epoch": 37.83582089552239, "grad_norm": 19.459331512451172, "learning_rate": 5.619047619047619e-06, "loss": 45.7875, "step": 1589 }, { "epoch": 37.85970149253731, "grad_norm": 17.49920082092285, "learning_rate": 5.613095238095238e-06, "loss": 44.2889, "step": 1590 }, { "epoch": 37.88358208955224, "grad_norm": 18.541828155517578, "learning_rate": 5.607142857142858e-06, "loss": 46.7668, "step": 1591 }, { "epoch": 37.907462686567165, "grad_norm": 16.22308349609375, "learning_rate": 5.601190476190477e-06, "loss": 45.0406, "step": 1592 }, { "epoch": 37.93134328358209, "grad_norm": 21.068069458007812, "learning_rate": 5.595238095238096e-06, "loss": 44.0997, "step": 1593 }, { "epoch": 37.95522388059702, "grad_norm": 18.877992630004883, "learning_rate": 5.589285714285715e-06, "loss": 46.5816, "step": 1594 }, { "epoch": 37.97910447761194, "grad_norm": 20.14031410217285, "learning_rate": 5.583333333333334e-06, "loss": 44.8537, "step": 1595 }, { "epoch": 38.0, "grad_norm": 19.989953994750977, "learning_rate": 5.577380952380953e-06, "loss": 39.8501, "step": 1596 }, { "epoch": 38.02388059701492, "grad_norm": 23.484283447265625, "learning_rate": 5.571428571428572e-06, "loss": 46.3864, "step": 1597 }, { "epoch": 38.04776119402985, "grad_norm": 20.579587936401367, "learning_rate": 5.5654761904761915e-06, "loss": 46.1473, "step": 1598 }, { "epoch": 38.071641791044776, "grad_norm": 19.48423194885254, "learning_rate": 5.5595238095238104e-06, "loss": 45.3255, "step": 1599 }, { "epoch": 38.0955223880597, "grad_norm": 23.766077041625977, "learning_rate": 5.553571428571429e-06, "loss": 45.4387, "step": 1600 }, { "epoch": 38.11940298507463, "grad_norm": 17.605247497558594, "learning_rate": 5.547619047619048e-06, "loss": 46.1065, "step": 1601 }, { "epoch": 38.14328358208955, "grad_norm": 20.179826736450195, "learning_rate": 5.541666666666667e-06, "loss": 45.974, "step": 1602 }, { "epoch": 38.167164179104475, "grad_norm": 28.50605583190918, "learning_rate": 5.535714285714286e-06, "loss": 46.0505, "step": 1603 }, { "epoch": 38.191044776119405, "grad_norm": 16.770771026611328, "learning_rate": 5.529761904761905e-06, "loss": 46.4403, "step": 1604 }, { "epoch": 38.21492537313433, "grad_norm": NaN, "learning_rate": 5.523809523809525e-06, "loss": 69.3153, "step": 1605 }, { "epoch": 38.23880597014925, "grad_norm": 25.01431655883789, "learning_rate": 5.523809523809525e-06, "loss": 46.8119, "step": 1606 }, { "epoch": 38.26268656716418, "grad_norm": 20.459747314453125, "learning_rate": 5.517857142857144e-06, "loss": 47.7687, "step": 1607 }, { "epoch": 38.286567164179104, "grad_norm": 21.603086471557617, "learning_rate": 5.511904761904763e-06, "loss": 44.6093, "step": 1608 }, { "epoch": 38.31044776119403, "grad_norm": 25.284805297851562, "learning_rate": 5.505952380952382e-06, "loss": 45.0834, "step": 1609 }, { "epoch": 38.33432835820896, "grad_norm": 21.638917922973633, "learning_rate": 5.500000000000001e-06, "loss": 45.3904, "step": 1610 }, { "epoch": 38.35820895522388, "grad_norm": 22.443374633789062, "learning_rate": 5.49404761904762e-06, "loss": 43.7163, "step": 1611 }, { "epoch": 38.3820895522388, "grad_norm": 23.427288055419922, "learning_rate": 5.4880952380952394e-06, "loss": 44.7692, "step": 1612 }, { "epoch": 38.40597014925373, "grad_norm": 22.346813201904297, "learning_rate": 5.482142857142858e-06, "loss": 45.0674, "step": 1613 }, { "epoch": 38.429850746268656, "grad_norm": 20.567325592041016, "learning_rate": 5.476190476190477e-06, "loss": 45.5367, "step": 1614 }, { "epoch": 38.45373134328358, "grad_norm": 23.872394561767578, "learning_rate": 5.470238095238096e-06, "loss": 46.2728, "step": 1615 }, { "epoch": 38.47761194029851, "grad_norm": 23.790176391601562, "learning_rate": 5.464285714285714e-06, "loss": 46.3734, "step": 1616 }, { "epoch": 38.50149253731343, "grad_norm": 22.707136154174805, "learning_rate": 5.458333333333333e-06, "loss": 44.577, "step": 1617 }, { "epoch": 38.525373134328355, "grad_norm": 26.203781127929688, "learning_rate": 5.452380952380952e-06, "loss": 45.6794, "step": 1618 }, { "epoch": 38.549253731343285, "grad_norm": 22.935991287231445, "learning_rate": 5.446428571428571e-06, "loss": 45.7815, "step": 1619 }, { "epoch": 38.57313432835821, "grad_norm": 28.275053024291992, "learning_rate": 5.44047619047619e-06, "loss": 45.0312, "step": 1620 }, { "epoch": 38.59701492537313, "grad_norm": 23.848264694213867, "learning_rate": 5.43452380952381e-06, "loss": 46.7093, "step": 1621 }, { "epoch": 38.62089552238806, "grad_norm": 25.240819931030273, "learning_rate": 5.428571428571429e-06, "loss": 46.6751, "step": 1622 }, { "epoch": 38.644776119402984, "grad_norm": 26.2618350982666, "learning_rate": 5.422619047619048e-06, "loss": 47.5501, "step": 1623 }, { "epoch": 38.668656716417914, "grad_norm": 23.986392974853516, "learning_rate": 5.416666666666667e-06, "loss": 45.6208, "step": 1624 }, { "epoch": 38.69253731343284, "grad_norm": 22.11539077758789, "learning_rate": 5.410714285714286e-06, "loss": 44.4163, "step": 1625 }, { "epoch": 38.71641791044776, "grad_norm": 22.9071044921875, "learning_rate": 5.404761904761905e-06, "loss": 45.5715, "step": 1626 }, { "epoch": 38.74029850746269, "grad_norm": 22.759733200073242, "learning_rate": 5.398809523809524e-06, "loss": 45.1706, "step": 1627 }, { "epoch": 38.76417910447761, "grad_norm": 23.66644287109375, "learning_rate": 5.392857142857143e-06, "loss": 45.4343, "step": 1628 }, { "epoch": 38.788059701492536, "grad_norm": 20.179203033447266, "learning_rate": 5.386904761904762e-06, "loss": 45.9163, "step": 1629 }, { "epoch": 38.811940298507466, "grad_norm": 22.327817916870117, "learning_rate": 5.380952380952381e-06, "loss": 44.1558, "step": 1630 }, { "epoch": 38.83582089552239, "grad_norm": 22.10496711730957, "learning_rate": 5.375e-06, "loss": 45.764, "step": 1631 }, { "epoch": 38.85970149253731, "grad_norm": 24.25627326965332, "learning_rate": 5.369047619047619e-06, "loss": 46.6394, "step": 1632 }, { "epoch": 38.88358208955224, "grad_norm": 20.797740936279297, "learning_rate": 5.363095238095238e-06, "loss": 45.6251, "step": 1633 }, { "epoch": 38.907462686567165, "grad_norm": 24.14659309387207, "learning_rate": 5.357142857142857e-06, "loss": 45.5603, "step": 1634 }, { "epoch": 38.93134328358209, "grad_norm": 23.259584426879883, "learning_rate": 5.351190476190477e-06, "loss": 46.055, "step": 1635 }, { "epoch": 38.95522388059702, "grad_norm": 23.72128677368164, "learning_rate": 5.345238095238096e-06, "loss": 45.9729, "step": 1636 }, { "epoch": 38.97910447761194, "grad_norm": 22.746183395385742, "learning_rate": 5.339285714285715e-06, "loss": 46.1893, "step": 1637 }, { "epoch": 39.0, "grad_norm": 22.067306518554688, "learning_rate": 5.333333333333334e-06, "loss": 39.8095, "step": 1638 }, { "epoch": 39.02388059701492, "grad_norm": 22.888097763061523, "learning_rate": 5.327380952380953e-06, "loss": 45.3095, "step": 1639 }, { "epoch": 39.04776119402985, "grad_norm": 23.86408233642578, "learning_rate": 5.3214285714285715e-06, "loss": 46.3774, "step": 1640 }, { "epoch": 39.071641791044776, "grad_norm": 21.418088912963867, "learning_rate": 5.3154761904761905e-06, "loss": 45.6404, "step": 1641 }, { "epoch": 39.0955223880597, "grad_norm": 21.521831512451172, "learning_rate": 5.30952380952381e-06, "loss": 46.4895, "step": 1642 }, { "epoch": 39.11940298507463, "grad_norm": 20.189105987548828, "learning_rate": 5.303571428571429e-06, "loss": 44.6538, "step": 1643 }, { "epoch": 39.14328358208955, "grad_norm": 19.73761558532715, "learning_rate": 5.297619047619048e-06, "loss": 45.5941, "step": 1644 }, { "epoch": 39.167164179104475, "grad_norm": 25.631227493286133, "learning_rate": 5.291666666666667e-06, "loss": 44.4105, "step": 1645 }, { "epoch": 39.191044776119405, "grad_norm": 19.47798728942871, "learning_rate": 5.285714285714286e-06, "loss": 46.3286, "step": 1646 }, { "epoch": 39.21492537313433, "grad_norm": 19.627609252929688, "learning_rate": 5.279761904761905e-06, "loss": 46.0707, "step": 1647 }, { "epoch": 39.23880597014925, "grad_norm": 19.668777465820312, "learning_rate": 5.273809523809525e-06, "loss": 44.8447, "step": 1648 }, { "epoch": 39.26268656716418, "grad_norm": 23.311546325683594, "learning_rate": 5.267857142857144e-06, "loss": 45.9081, "step": 1649 }, { "epoch": 39.286567164179104, "grad_norm": 21.426624298095703, "learning_rate": 5.261904761904763e-06, "loss": 45.4256, "step": 1650 }, { "epoch": 39.31044776119403, "grad_norm": 19.545969009399414, "learning_rate": 5.255952380952382e-06, "loss": 45.4264, "step": 1651 }, { "epoch": 39.33432835820896, "grad_norm": 22.78704833984375, "learning_rate": 5.2500000000000006e-06, "loss": 47.6822, "step": 1652 }, { "epoch": 39.35820895522388, "grad_norm": 18.759178161621094, "learning_rate": 5.2440476190476195e-06, "loss": 44.5254, "step": 1653 }, { "epoch": 39.3820895522388, "grad_norm": 19.855981826782227, "learning_rate": 5.2380952380952384e-06, "loss": 46.249, "step": 1654 }, { "epoch": 39.40597014925373, "grad_norm": 18.817089080810547, "learning_rate": 5.232142857142858e-06, "loss": 45.2813, "step": 1655 }, { "epoch": 39.429850746268656, "grad_norm": 19.587581634521484, "learning_rate": 5.226190476190477e-06, "loss": 45.0445, "step": 1656 }, { "epoch": 39.45373134328358, "grad_norm": 19.9105167388916, "learning_rate": 5.220238095238096e-06, "loss": 46.8658, "step": 1657 }, { "epoch": 39.47761194029851, "grad_norm": 19.529748916625977, "learning_rate": 5.214285714285715e-06, "loss": 46.6175, "step": 1658 }, { "epoch": 39.50149253731343, "grad_norm": 18.63764762878418, "learning_rate": 5.208333333333334e-06, "loss": 46.3122, "step": 1659 }, { "epoch": 39.525373134328355, "grad_norm": 19.58228874206543, "learning_rate": 5.202380952380953e-06, "loss": 44.6263, "step": 1660 }, { "epoch": 39.549253731343285, "grad_norm": 21.451528549194336, "learning_rate": 5.196428571428572e-06, "loss": 46.2707, "step": 1661 }, { "epoch": 39.57313432835821, "grad_norm": 22.756628036499023, "learning_rate": 5.190476190476192e-06, "loss": 45.0001, "step": 1662 }, { "epoch": 39.59701492537313, "grad_norm": 24.481945037841797, "learning_rate": 5.184523809523811e-06, "loss": 45.3038, "step": 1663 }, { "epoch": 39.62089552238806, "grad_norm": 19.3010196685791, "learning_rate": 5.1785714285714296e-06, "loss": 46.1894, "step": 1664 }, { "epoch": 39.644776119402984, "grad_norm": 24.840822219848633, "learning_rate": 5.1726190476190485e-06, "loss": 46.6593, "step": 1665 }, { "epoch": 39.668656716417914, "grad_norm": 20.712875366210938, "learning_rate": 5.1666666666666675e-06, "loss": 46.7594, "step": 1666 }, { "epoch": 39.69253731343284, "grad_norm": 20.431598663330078, "learning_rate": 5.160714285714286e-06, "loss": 46.4969, "step": 1667 }, { "epoch": 39.71641791044776, "grad_norm": 21.094484329223633, "learning_rate": 5.1547619047619045e-06, "loss": 44.5114, "step": 1668 }, { "epoch": 39.74029850746269, "grad_norm": 22.929946899414062, "learning_rate": 5.1488095238095234e-06, "loss": 44.5251, "step": 1669 }, { "epoch": 39.76417910447761, "grad_norm": 17.285877227783203, "learning_rate": 5.142857142857142e-06, "loss": 45.312, "step": 1670 }, { "epoch": 39.788059701492536, "grad_norm": 18.29960823059082, "learning_rate": 5.136904761904762e-06, "loss": 44.984, "step": 1671 }, { "epoch": 39.811940298507466, "grad_norm": 25.79044532775879, "learning_rate": 5.130952380952381e-06, "loss": 44.8192, "step": 1672 }, { "epoch": 39.83582089552239, "grad_norm": 21.014759063720703, "learning_rate": 5.125e-06, "loss": 46.9319, "step": 1673 }, { "epoch": 39.85970149253731, "grad_norm": 24.91911506652832, "learning_rate": 5.119047619047619e-06, "loss": 46.7778, "step": 1674 }, { "epoch": 39.88358208955224, "grad_norm": 25.174942016601562, "learning_rate": 5.113095238095238e-06, "loss": 44.948, "step": 1675 }, { "epoch": 39.907462686567165, "grad_norm": 22.642148971557617, "learning_rate": 5.107142857142857e-06, "loss": 45.5964, "step": 1676 }, { "epoch": 39.93134328358209, "grad_norm": 24.867389678955078, "learning_rate": 5.101190476190476e-06, "loss": 45.446, "step": 1677 }, { "epoch": 39.95522388059702, "grad_norm": 21.888269424438477, "learning_rate": 5.095238095238096e-06, "loss": 45.414, "step": 1678 }, { "epoch": 39.97910447761194, "grad_norm": 25.071487426757812, "learning_rate": 5.0892857142857146e-06, "loss": 44.9464, "step": 1679 }, { "epoch": 40.0, "grad_norm": 19.389556884765625, "learning_rate": 5.0833333333333335e-06, "loss": 39.5515, "step": 1680 }, { "epoch": 40.0, "step": 1680, "total_flos": 8.26172747445074e+16, "train_loss": 23.38366504396711, "train_runtime": 26137.4766, "train_samples_per_second": 8.191, "train_steps_per_second": 0.064 }, { "epoch": 40.02388059701492, "grad_norm": 18.99544334411621, "learning_rate": 1e-05, "loss": 46.1194, "step": 1681 }, { "epoch": 40.04776119402985, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.6718, "step": 1682 }, { "epoch": 40.071641791044776, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.4703, "step": 1683 }, { "epoch": 40.0955223880597, "grad_norm": 416.26324462890625, "learning_rate": 9.996031746031746e-06, "loss": 53.5676, "step": 1684 }, { "epoch": 40.11940298507463, "grad_norm": 147.0504608154297, "learning_rate": 9.992063492063493e-06, "loss": 50.2561, "step": 1685 }, { "epoch": 40.14328358208955, "grad_norm": 122.7557601928711, "learning_rate": 9.988095238095239e-06, "loss": 50.4153, "step": 1686 }, { "epoch": 40.167164179104475, "grad_norm": 97.062744140625, "learning_rate": 9.984126984126986e-06, "loss": 47.2739, "step": 1687 }, { "epoch": 40.191044776119405, "grad_norm": 73.37904357910156, "learning_rate": 9.980158730158731e-06, "loss": 48.0252, "step": 1688 }, { "epoch": 40.21492537313433, "grad_norm": 68.98373413085938, "learning_rate": 9.976190476190477e-06, "loss": 47.6782, "step": 1689 }, { "epoch": 40.23880597014925, "grad_norm": 56.258548736572266, "learning_rate": 9.972222222222224e-06, "loss": 47.5786, "step": 1690 }, { "epoch": 40.26268656716418, "grad_norm": 68.9515609741211, "learning_rate": 9.968253968253969e-06, "loss": 46.3938, "step": 1691 }, { "epoch": 40.286567164179104, "grad_norm": 39.17803955078125, "learning_rate": 9.964285714285714e-06, "loss": 45.9047, "step": 1692 }, { "epoch": 40.31044776119403, "grad_norm": 51.936981201171875, "learning_rate": 9.960317460317462e-06, "loss": 45.6047, "step": 1693 }, { "epoch": 40.33432835820896, "grad_norm": 43.64280700683594, "learning_rate": 9.956349206349207e-06, "loss": 46.6234, "step": 1694 }, { "epoch": 40.35820895522388, "grad_norm": 58.56443405151367, "learning_rate": 9.952380952380954e-06, "loss": 47.255, "step": 1695 }, { "epoch": 40.3820895522388, "grad_norm": 37.53863525390625, "learning_rate": 9.9484126984127e-06, "loss": 47.1183, "step": 1696 }, { "epoch": 40.40597014925373, "grad_norm": 35.800628662109375, "learning_rate": 9.944444444444445e-06, "loss": 46.3602, "step": 1697 }, { "epoch": 40.429850746268656, "grad_norm": 39.58418655395508, "learning_rate": 9.940476190476192e-06, "loss": 46.3082, "step": 1698 }, { "epoch": 40.45373134328358, "grad_norm": 30.6373233795166, "learning_rate": 9.936507936507937e-06, "loss": 45.2231, "step": 1699 }, { "epoch": 40.47761194029851, "grad_norm": 34.47962951660156, "learning_rate": 9.932539682539684e-06, "loss": 46.3243, "step": 1700 }, { "epoch": 40.50149253731343, "grad_norm": 23.599184036254883, "learning_rate": 9.92857142857143e-06, "loss": 46.3045, "step": 1701 }, { "epoch": 40.525373134328355, "grad_norm": 27.183767318725586, "learning_rate": 9.924603174603175e-06, "loss": 45.3216, "step": 1702 }, { "epoch": 40.549253731343285, "grad_norm": 27.263038635253906, "learning_rate": 9.920634920634922e-06, "loss": 46.8117, "step": 1703 }, { "epoch": 40.57313432835821, "grad_norm": 30.570518493652344, "learning_rate": 9.916666666666668e-06, "loss": 46.0951, "step": 1704 }, { "epoch": 40.59701492537313, "grad_norm": 23.30783462524414, "learning_rate": 9.912698412698413e-06, "loss": 45.5407, "step": 1705 }, { "epoch": 40.62089552238806, "grad_norm": 29.269088745117188, "learning_rate": 9.90873015873016e-06, "loss": 45.9624, "step": 1706 }, { "epoch": 40.644776119402984, "grad_norm": NaN, "learning_rate": 9.904761904761906e-06, "loss": 75.1575, "step": 1707 }, { "epoch": 40.668656716417914, "grad_norm": 25.785404205322266, "learning_rate": 9.904761904761906e-06, "loss": 45.9263, "step": 1708 }, { "epoch": 40.69253731343284, "grad_norm": 34.729549407958984, "learning_rate": 9.900793650793653e-06, "loss": 45.5276, "step": 1709 }, { "epoch": 40.71641791044776, "grad_norm": 28.62750816345215, "learning_rate": 9.896825396825398e-06, "loss": 46.2797, "step": 1710 }, { "epoch": 40.74029850746269, "grad_norm": 31.081378936767578, "learning_rate": 9.892857142857143e-06, "loss": 45.1643, "step": 1711 }, { "epoch": 40.76417910447761, "grad_norm": 28.92620086669922, "learning_rate": 9.88888888888889e-06, "loss": 46.3105, "step": 1712 }, { "epoch": 40.788059701492536, "grad_norm": 23.232866287231445, "learning_rate": 9.884920634920636e-06, "loss": 46.6131, "step": 1713 }, { "epoch": 40.811940298507466, "grad_norm": 25.97928810119629, "learning_rate": 9.880952380952381e-06, "loss": 45.5054, "step": 1714 }, { "epoch": 40.83582089552239, "grad_norm": 27.382034301757812, "learning_rate": 9.876984126984128e-06, "loss": 45.593, "step": 1715 }, { "epoch": 40.85970149253731, "grad_norm": 23.762460708618164, "learning_rate": 9.873015873015874e-06, "loss": 45.7414, "step": 1716 }, { "epoch": 40.88358208955224, "grad_norm": 29.6158390045166, "learning_rate": 9.869047619047621e-06, "loss": 45.0669, "step": 1717 }, { "epoch": 40.907462686567165, "grad_norm": 24.66147804260254, "learning_rate": 9.865079365079366e-06, "loss": 45.6125, "step": 1718 }, { "epoch": 40.93134328358209, "grad_norm": 28.167495727539062, "learning_rate": 9.861111111111112e-06, "loss": 46.099, "step": 1719 }, { "epoch": 40.95522388059702, "grad_norm": 27.325531005859375, "learning_rate": 9.857142857142859e-06, "loss": 45.1728, "step": 1720 }, { "epoch": 40.97910447761194, "grad_norm": 23.650911331176758, "learning_rate": 9.853174603174604e-06, "loss": 44.6743, "step": 1721 }, { "epoch": 41.0, "grad_norm": 22.53518295288086, "learning_rate": 9.849206349206351e-06, "loss": 39.1464, "step": 1722 }, { "epoch": 41.02388059701492, "grad_norm": 28.995275497436523, "learning_rate": 9.845238095238097e-06, "loss": 44.5823, "step": 1723 }, { "epoch": 41.04776119402985, "grad_norm": 28.680805206298828, "learning_rate": 9.841269841269842e-06, "loss": 44.7002, "step": 1724 }, { "epoch": 41.071641791044776, "grad_norm": 24.10047149658203, "learning_rate": 9.837301587301588e-06, "loss": 46.232, "step": 1725 }, { "epoch": 41.0955223880597, "grad_norm": 25.722291946411133, "learning_rate": 9.833333333333333e-06, "loss": 45.1447, "step": 1726 }, { "epoch": 41.11940298507463, "grad_norm": 22.944278717041016, "learning_rate": 9.82936507936508e-06, "loss": 46.57, "step": 1727 }, { "epoch": 41.14328358208955, "grad_norm": 25.734941482543945, "learning_rate": 9.825396825396825e-06, "loss": 45.8386, "step": 1728 }, { "epoch": 41.167164179104475, "grad_norm": 23.644197463989258, "learning_rate": 9.821428571428573e-06, "loss": 46.2608, "step": 1729 }, { "epoch": 41.191044776119405, "grad_norm": 22.163721084594727, "learning_rate": 9.817460317460318e-06, "loss": 45.2914, "step": 1730 }, { "epoch": 41.21492537313433, "grad_norm": 33.71270751953125, "learning_rate": 9.813492063492063e-06, "loss": 44.6372, "step": 1731 }, { "epoch": 41.23880597014925, "grad_norm": 28.478361129760742, "learning_rate": 9.80952380952381e-06, "loss": 45.472, "step": 1732 }, { "epoch": 41.26268656716418, "grad_norm": 27.120990753173828, "learning_rate": 9.805555555555556e-06, "loss": 46.445, "step": 1733 }, { "epoch": 41.286567164179104, "grad_norm": 25.342784881591797, "learning_rate": 9.801587301587301e-06, "loss": 45.8317, "step": 1734 }, { "epoch": 41.31044776119403, "grad_norm": 28.94765853881836, "learning_rate": 9.797619047619048e-06, "loss": 46.0677, "step": 1735 }, { "epoch": 41.33432835820896, "grad_norm": 22.983802795410156, "learning_rate": 9.793650793650794e-06, "loss": 45.8029, "step": 1736 }, { "epoch": 41.35820895522388, "grad_norm": 24.97469711303711, "learning_rate": 9.78968253968254e-06, "loss": 46.7215, "step": 1737 }, { "epoch": 41.3820895522388, "grad_norm": 26.136960983276367, "learning_rate": 9.785714285714286e-06, "loss": 45.8042, "step": 1738 }, { "epoch": 41.40597014925373, "grad_norm": 21.150083541870117, "learning_rate": 9.781746031746032e-06, "loss": 45.836, "step": 1739 }, { "epoch": 41.429850746268656, "grad_norm": 19.56538963317871, "learning_rate": 9.777777777777779e-06, "loss": 46.0126, "step": 1740 }, { "epoch": 41.45373134328358, "grad_norm": 26.608108520507812, "learning_rate": 9.773809523809524e-06, "loss": 45.3108, "step": 1741 }, { "epoch": 41.47761194029851, "grad_norm": 19.020097732543945, "learning_rate": 9.769841269841271e-06, "loss": 46.278, "step": 1742 }, { "epoch": 41.50149253731343, "grad_norm": 25.4818172454834, "learning_rate": 9.765873015873017e-06, "loss": 45.8142, "step": 1743 }, { "epoch": 41.525373134328355, "grad_norm": 21.7120304107666, "learning_rate": 9.761904761904762e-06, "loss": 46.0221, "step": 1744 }, { "epoch": 41.549253731343285, "grad_norm": 24.395984649658203, "learning_rate": 9.757936507936509e-06, "loss": 45.3654, "step": 1745 }, { "epoch": 41.57313432835821, "grad_norm": 26.8757381439209, "learning_rate": 9.753968253968254e-06, "loss": 46.0073, "step": 1746 }, { "epoch": 41.59701492537313, "grad_norm": 27.31254768371582, "learning_rate": 9.75e-06, "loss": 45.465, "step": 1747 }, { "epoch": 41.62089552238806, "grad_norm": 23.271629333496094, "learning_rate": 9.746031746031747e-06, "loss": 46.0739, "step": 1748 }, { "epoch": 41.644776119402984, "grad_norm": 24.240131378173828, "learning_rate": 9.742063492063492e-06, "loss": 45.6978, "step": 1749 }, { "epoch": 41.668656716417914, "grad_norm": 23.16962242126465, "learning_rate": 9.73809523809524e-06, "loss": 45.9961, "step": 1750 }, { "epoch": 41.69253731343284, "grad_norm": 29.63677406311035, "learning_rate": 9.734126984126985e-06, "loss": 45.0859, "step": 1751 }, { "epoch": 41.71641791044776, "grad_norm": 20.725126266479492, "learning_rate": 9.73015873015873e-06, "loss": 45.3546, "step": 1752 }, { "epoch": 41.74029850746269, "grad_norm": 23.172834396362305, "learning_rate": 9.726190476190477e-06, "loss": 45.3822, "step": 1753 }, { "epoch": 41.76417910447761, "grad_norm": 30.179182052612305, "learning_rate": 9.722222222222223e-06, "loss": 45.0901, "step": 1754 }, { "epoch": 41.788059701492536, "grad_norm": 17.276126861572266, "learning_rate": 9.71825396825397e-06, "loss": 45.4555, "step": 1755 }, { "epoch": 41.811940298507466, "grad_norm": 24.585174560546875, "learning_rate": 9.714285714285715e-06, "loss": 43.8513, "step": 1756 }, { "epoch": 41.83582089552239, "grad_norm": 23.242969512939453, "learning_rate": 9.71031746031746e-06, "loss": 45.7996, "step": 1757 }, { "epoch": 41.85970149253731, "grad_norm": 21.585342407226562, "learning_rate": 9.706349206349208e-06, "loss": 45.2616, "step": 1758 }, { "epoch": 41.88358208955224, "grad_norm": 28.802600860595703, "learning_rate": 9.702380952380953e-06, "loss": 45.6062, "step": 1759 }, { "epoch": 41.907462686567165, "grad_norm": 23.895822525024414, "learning_rate": 9.698412698412698e-06, "loss": 44.3029, "step": 1760 }, { "epoch": 41.93134328358209, "grad_norm": 26.175247192382812, "learning_rate": 9.694444444444446e-06, "loss": 45.6048, "step": 1761 }, { "epoch": 41.95522388059702, "grad_norm": 23.499914169311523, "learning_rate": 9.690476190476191e-06, "loss": 45.4891, "step": 1762 }, { "epoch": 41.97910447761194, "grad_norm": 22.244211196899414, "learning_rate": 9.686507936507938e-06, "loss": 44.1723, "step": 1763 }, { "epoch": 42.0, "grad_norm": 20.29228401184082, "learning_rate": 9.682539682539683e-06, "loss": 39.7896, "step": 1764 }, { "epoch": 42.02388059701492, "grad_norm": 27.773515701293945, "learning_rate": 9.678571428571429e-06, "loss": 45.7383, "step": 1765 }, { "epoch": 42.04776119402985, "grad_norm": 27.289716720581055, "learning_rate": 9.674603174603176e-06, "loss": 45.2073, "step": 1766 }, { "epoch": 42.071641791044776, "grad_norm": 21.16016387939453, "learning_rate": 9.670634920634921e-06, "loss": 45.3415, "step": 1767 }, { "epoch": 42.0955223880597, "grad_norm": 28.878597259521484, "learning_rate": 9.666666666666667e-06, "loss": 45.1139, "step": 1768 }, { "epoch": 42.11940298507463, "grad_norm": 29.504600524902344, "learning_rate": 9.662698412698414e-06, "loss": 46.185, "step": 1769 }, { "epoch": 42.14328358208955, "grad_norm": 20.372560501098633, "learning_rate": 9.65873015873016e-06, "loss": 46.4996, "step": 1770 }, { "epoch": 42.167164179104475, "grad_norm": 27.437274932861328, "learning_rate": 9.654761904761906e-06, "loss": 43.77, "step": 1771 }, { "epoch": 42.191044776119405, "grad_norm": 23.735233306884766, "learning_rate": 9.650793650793652e-06, "loss": 43.9415, "step": 1772 }, { "epoch": 42.21492537313433, "grad_norm": 26.434886932373047, "learning_rate": 9.646825396825397e-06, "loss": 46.6163, "step": 1773 }, { "epoch": 42.23880597014925, "grad_norm": 26.843782424926758, "learning_rate": 9.642857142857144e-06, "loss": 46.1987, "step": 1774 }, { "epoch": 42.26268656716418, "grad_norm": 25.86046600341797, "learning_rate": 9.63888888888889e-06, "loss": 46.635, "step": 1775 }, { "epoch": 42.286567164179104, "grad_norm": 25.95208740234375, "learning_rate": 9.634920634920637e-06, "loss": 44.6339, "step": 1776 }, { "epoch": 42.31044776119403, "grad_norm": 21.243392944335938, "learning_rate": 9.630952380952382e-06, "loss": 45.1151, "step": 1777 }, { "epoch": 42.33432835820896, "grad_norm": 22.445972442626953, "learning_rate": 9.626984126984127e-06, "loss": 45.1704, "step": 1778 }, { "epoch": 42.35820895522388, "grad_norm": 37.871681213378906, "learning_rate": 9.623015873015875e-06, "loss": 45.116, "step": 1779 }, { "epoch": 42.3820895522388, "grad_norm": 25.75882339477539, "learning_rate": 9.61904761904762e-06, "loss": 45.2748, "step": 1780 }, { "epoch": 42.40597014925373, "grad_norm": 32.44329071044922, "learning_rate": 9.615079365079365e-06, "loss": 45.0782, "step": 1781 }, { "epoch": 42.429850746268656, "grad_norm": 25.74696159362793, "learning_rate": 9.611111111111112e-06, "loss": 46.1405, "step": 1782 }, { "epoch": 42.45373134328358, "grad_norm": 44.88374710083008, "learning_rate": 9.607142857142858e-06, "loss": 45.7843, "step": 1783 }, { "epoch": 42.47761194029851, "grad_norm": 29.956615447998047, "learning_rate": 9.603174603174605e-06, "loss": 46.7361, "step": 1784 }, { "epoch": 42.50149253731343, "grad_norm": 41.191864013671875, "learning_rate": 9.59920634920635e-06, "loss": 45.7368, "step": 1785 }, { "epoch": 42.525373134328355, "grad_norm": 32.30370330810547, "learning_rate": 9.595238095238096e-06, "loss": 45.4091, "step": 1786 }, { "epoch": 42.549253731343285, "grad_norm": 32.65694046020508, "learning_rate": 9.591269841269843e-06, "loss": 44.8837, "step": 1787 }, { "epoch": 42.57313432835821, "grad_norm": 29.783634185791016, "learning_rate": 9.587301587301588e-06, "loss": 46.0239, "step": 1788 }, { "epoch": 42.59701492537313, "grad_norm": 32.415035247802734, "learning_rate": 9.583333333333335e-06, "loss": 44.7968, "step": 1789 }, { "epoch": 42.62089552238806, "grad_norm": 31.461589813232422, "learning_rate": 9.57936507936508e-06, "loss": 44.5408, "step": 1790 }, { "epoch": 42.644776119402984, "grad_norm": 27.083560943603516, "learning_rate": 9.575396825396826e-06, "loss": 44.9716, "step": 1791 }, { "epoch": 42.668656716417914, "grad_norm": 34.453102111816406, "learning_rate": 9.571428571428573e-06, "loss": 44.8527, "step": 1792 }, { "epoch": 42.69253731343284, "grad_norm": 24.403902053833008, "learning_rate": 9.567460317460319e-06, "loss": 44.6635, "step": 1793 }, { "epoch": 42.71641791044776, "grad_norm": 43.89455795288086, "learning_rate": 9.563492063492064e-06, "loss": 45.9798, "step": 1794 }, { "epoch": 42.74029850746269, "grad_norm": 33.704498291015625, "learning_rate": 9.559523809523811e-06, "loss": 45.8182, "step": 1795 }, { "epoch": 42.76417910447761, "grad_norm": 38.266357421875, "learning_rate": 9.555555555555556e-06, "loss": 44.8923, "step": 1796 }, { "epoch": 42.788059701492536, "grad_norm": 36.38774490356445, "learning_rate": 9.551587301587304e-06, "loss": 45.5987, "step": 1797 }, { "epoch": 42.811940298507466, "grad_norm": 33.449737548828125, "learning_rate": 9.547619047619049e-06, "loss": 46.2494, "step": 1798 }, { "epoch": 42.83582089552239, "grad_norm": 29.902509689331055, "learning_rate": 9.543650793650794e-06, "loss": 44.7438, "step": 1799 }, { "epoch": 42.85970149253731, "grad_norm": 35.025184631347656, "learning_rate": 9.539682539682541e-06, "loss": 44.7825, "step": 1800 }, { "epoch": 42.88358208955224, "grad_norm": 30.783037185668945, "learning_rate": 9.535714285714287e-06, "loss": 45.3493, "step": 1801 }, { "epoch": 42.907462686567165, "grad_norm": 28.61165428161621, "learning_rate": 9.531746031746032e-06, "loss": 46.5537, "step": 1802 }, { "epoch": 42.93134328358209, "grad_norm": 34.27008056640625, "learning_rate": 9.527777777777778e-06, "loss": 44.0439, "step": 1803 }, { "epoch": 42.95522388059702, "grad_norm": 31.05691146850586, "learning_rate": 9.523809523809525e-06, "loss": 46.1128, "step": 1804 }, { "epoch": 42.97910447761194, "grad_norm": 28.658565521240234, "learning_rate": 9.51984126984127e-06, "loss": 46.2442, "step": 1805 }, { "epoch": 43.0, "grad_norm": 20.02385139465332, "learning_rate": 9.515873015873016e-06, "loss": 39.8537, "step": 1806 }, { "epoch": 43.02388059701492, "grad_norm": 32.5422248840332, "learning_rate": 9.511904761904763e-06, "loss": 44.0765, "step": 1807 }, { "epoch": 43.04776119402985, "grad_norm": 22.364904403686523, "learning_rate": 9.507936507936508e-06, "loss": 45.6789, "step": 1808 }, { "epoch": 43.071641791044776, "grad_norm": 35.576072692871094, "learning_rate": 9.503968253968255e-06, "loss": 45.6707, "step": 1809 }, { "epoch": 43.0955223880597, "grad_norm": 27.892908096313477, "learning_rate": 9.5e-06, "loss": 46.348, "step": 1810 }, { "epoch": 43.11940298507463, "grad_norm": 22.283756256103516, "learning_rate": 9.496031746031746e-06, "loss": 44.8757, "step": 1811 }, { "epoch": 43.14328358208955, "grad_norm": 34.38758087158203, "learning_rate": 9.492063492063493e-06, "loss": 45.0544, "step": 1812 }, { "epoch": 43.167164179104475, "grad_norm": 26.720060348510742, "learning_rate": 9.488095238095238e-06, "loss": 46.2092, "step": 1813 }, { "epoch": 43.191044776119405, "grad_norm": 35.375362396240234, "learning_rate": 9.484126984126984e-06, "loss": 46.0173, "step": 1814 }, { "epoch": 43.21492537313433, "grad_norm": 24.92397117614746, "learning_rate": 9.480158730158731e-06, "loss": 45.5031, "step": 1815 }, { "epoch": 43.23880597014925, "grad_norm": 35.76795959472656, "learning_rate": 9.476190476190476e-06, "loss": 44.8149, "step": 1816 }, { "epoch": 43.26268656716418, "grad_norm": 29.861675262451172, "learning_rate": 9.472222222222223e-06, "loss": 45.4173, "step": 1817 }, { "epoch": 43.286567164179104, "grad_norm": 33.83314895629883, "learning_rate": 9.468253968253969e-06, "loss": 44.8036, "step": 1818 }, { "epoch": 43.31044776119403, "grad_norm": 32.994483947753906, "learning_rate": 9.464285714285714e-06, "loss": 46.5555, "step": 1819 }, { "epoch": 43.33432835820896, "grad_norm": 23.94085693359375, "learning_rate": 9.460317460317461e-06, "loss": 45.9566, "step": 1820 }, { "epoch": 43.35820895522388, "grad_norm": 34.10947799682617, "learning_rate": 9.456349206349207e-06, "loss": 45.2182, "step": 1821 }, { "epoch": 43.3820895522388, "grad_norm": 23.844850540161133, "learning_rate": 9.452380952380952e-06, "loss": 45.5904, "step": 1822 }, { "epoch": 43.40597014925373, "grad_norm": 46.643768310546875, "learning_rate": 9.4484126984127e-06, "loss": 46.6924, "step": 1823 }, { "epoch": 43.429850746268656, "grad_norm": 32.49457931518555, "learning_rate": 9.444444444444445e-06, "loss": 44.024, "step": 1824 }, { "epoch": 43.45373134328358, "grad_norm": 32.36979293823242, "learning_rate": 9.440476190476192e-06, "loss": 45.5752, "step": 1825 }, { "epoch": 43.47761194029851, "grad_norm": 28.21212387084961, "learning_rate": 9.436507936507937e-06, "loss": 43.3649, "step": 1826 }, { "epoch": 43.50149253731343, "grad_norm": 30.494169235229492, "learning_rate": 9.432539682539682e-06, "loss": 44.4726, "step": 1827 }, { "epoch": 43.525373134328355, "grad_norm": 29.817806243896484, "learning_rate": 9.42857142857143e-06, "loss": 45.4315, "step": 1828 }, { "epoch": 43.549253731343285, "grad_norm": 32.68490219116211, "learning_rate": 9.424603174603175e-06, "loss": 45.1985, "step": 1829 }, { "epoch": 43.57313432835821, "grad_norm": 28.48166275024414, "learning_rate": 9.420634920634922e-06, "loss": 45.6737, "step": 1830 }, { "epoch": 43.59701492537313, "grad_norm": 30.532995223999023, "learning_rate": 9.416666666666667e-06, "loss": 45.9931, "step": 1831 }, { "epoch": 43.62089552238806, "grad_norm": 24.953765869140625, "learning_rate": 9.412698412698413e-06, "loss": 44.4189, "step": 1832 }, { "epoch": 43.644776119402984, "grad_norm": 23.647258758544922, "learning_rate": 9.40873015873016e-06, "loss": 44.6757, "step": 1833 }, { "epoch": 43.668656716417914, "grad_norm": 28.41623878479004, "learning_rate": 9.404761904761905e-06, "loss": 45.5732, "step": 1834 }, { "epoch": 43.69253731343284, "grad_norm": 25.599082946777344, "learning_rate": 9.40079365079365e-06, "loss": 45.0382, "step": 1835 }, { "epoch": 43.71641791044776, "grad_norm": 39.584144592285156, "learning_rate": 9.396825396825398e-06, "loss": 45.1531, "step": 1836 }, { "epoch": 43.74029850746269, "grad_norm": 30.606550216674805, "learning_rate": 9.392857142857143e-06, "loss": 44.3696, "step": 1837 }, { "epoch": 43.76417910447761, "grad_norm": 37.25154495239258, "learning_rate": 9.38888888888889e-06, "loss": 45.4297, "step": 1838 }, { "epoch": 43.788059701492536, "grad_norm": 30.60915184020996, "learning_rate": 9.384920634920636e-06, "loss": 45.2441, "step": 1839 }, { "epoch": 43.811940298507466, "grad_norm": 32.886268615722656, "learning_rate": 9.380952380952381e-06, "loss": 45.3913, "step": 1840 }, { "epoch": 43.83582089552239, "grad_norm": 27.98761749267578, "learning_rate": 9.376984126984128e-06, "loss": 45.2191, "step": 1841 }, { "epoch": 43.85970149253731, "grad_norm": 33.787261962890625, "learning_rate": 9.373015873015874e-06, "loss": 45.0051, "step": 1842 }, { "epoch": 43.88358208955224, "grad_norm": 26.90253257751465, "learning_rate": 9.36904761904762e-06, "loss": 45.9333, "step": 1843 }, { "epoch": 43.907462686567165, "grad_norm": 29.78704261779785, "learning_rate": 9.365079365079366e-06, "loss": 45.3598, "step": 1844 }, { "epoch": 43.93134328358209, "grad_norm": 24.871315002441406, "learning_rate": 9.361111111111111e-06, "loss": 44.4159, "step": 1845 }, { "epoch": 43.95522388059702, "grad_norm": 22.998323440551758, "learning_rate": 9.357142857142859e-06, "loss": 44.9989, "step": 1846 }, { "epoch": 43.97910447761194, "grad_norm": 28.724388122558594, "learning_rate": 9.353174603174604e-06, "loss": 46.0748, "step": 1847 }, { "epoch": 44.0, "grad_norm": 19.870426177978516, "learning_rate": 9.34920634920635e-06, "loss": 41.7823, "step": 1848 }, { "epoch": 44.02388059701492, "grad_norm": 28.000728607177734, "learning_rate": 9.345238095238096e-06, "loss": 44.3686, "step": 1849 }, { "epoch": 44.04776119402985, "grad_norm": 25.330766677856445, "learning_rate": 9.341269841269842e-06, "loss": 44.8766, "step": 1850 }, { "epoch": 44.071641791044776, "grad_norm": 30.589149475097656, "learning_rate": 9.337301587301589e-06, "loss": 46.3777, "step": 1851 }, { "epoch": 44.0955223880597, "grad_norm": 27.803207397460938, "learning_rate": 9.333333333333334e-06, "loss": 45.7627, "step": 1852 }, { "epoch": 44.11940298507463, "grad_norm": 28.11823081970215, "learning_rate": 9.32936507936508e-06, "loss": 44.9612, "step": 1853 }, { "epoch": 44.14328358208955, "grad_norm": 24.24823570251465, "learning_rate": 9.325396825396827e-06, "loss": 45.6512, "step": 1854 }, { "epoch": 44.167164179104475, "grad_norm": 29.81229019165039, "learning_rate": 9.321428571428572e-06, "loss": 43.7322, "step": 1855 }, { "epoch": 44.191044776119405, "grad_norm": 24.88245964050293, "learning_rate": 9.317460317460318e-06, "loss": 45.288, "step": 1856 }, { "epoch": 44.21492537313433, "grad_norm": 31.246389389038086, "learning_rate": 9.313492063492065e-06, "loss": 44.6547, "step": 1857 }, { "epoch": 44.23880597014925, "grad_norm": 29.363845825195312, "learning_rate": 9.30952380952381e-06, "loss": 44.7851, "step": 1858 }, { "epoch": 44.26268656716418, "grad_norm": 32.35028839111328, "learning_rate": 9.305555555555557e-06, "loss": 44.5643, "step": 1859 }, { "epoch": 44.286567164179104, "grad_norm": 31.52218246459961, "learning_rate": 9.301587301587303e-06, "loss": 45.293, "step": 1860 }, { "epoch": 44.31044776119403, "grad_norm": 29.180295944213867, "learning_rate": 9.297619047619048e-06, "loss": 45.298, "step": 1861 }, { "epoch": 44.33432835820896, "grad_norm": 27.626508712768555, "learning_rate": 9.293650793650795e-06, "loss": 45.1187, "step": 1862 }, { "epoch": 44.35820895522388, "grad_norm": 28.44379425048828, "learning_rate": 9.28968253968254e-06, "loss": 45.0835, "step": 1863 }, { "epoch": 44.3820895522388, "grad_norm": 29.45343017578125, "learning_rate": 9.285714285714288e-06, "loss": 45.5642, "step": 1864 }, { "epoch": 44.40597014925373, "grad_norm": 21.64850425720215, "learning_rate": 9.281746031746033e-06, "loss": 45.6837, "step": 1865 }, { "epoch": 44.429850746268656, "grad_norm": 35.32088088989258, "learning_rate": 9.277777777777778e-06, "loss": 44.9266, "step": 1866 }, { "epoch": 44.45373134328358, "grad_norm": 28.638429641723633, "learning_rate": 9.273809523809525e-06, "loss": 45.9407, "step": 1867 }, { "epoch": 44.47761194029851, "grad_norm": 31.444725036621094, "learning_rate": 9.26984126984127e-06, "loss": 45.4442, "step": 1868 }, { "epoch": 44.50149253731343, "grad_norm": 26.114784240722656, "learning_rate": 9.265873015873016e-06, "loss": 45.1998, "step": 1869 }, { "epoch": 44.525373134328355, "grad_norm": 24.51571273803711, "learning_rate": 9.261904761904763e-06, "loss": 45.0705, "step": 1870 }, { "epoch": 44.549253731343285, "grad_norm": 24.52007293701172, "learning_rate": 9.257936507936509e-06, "loss": 43.9359, "step": 1871 }, { "epoch": 44.57313432835821, "grad_norm": 17.876834869384766, "learning_rate": 9.253968253968256e-06, "loss": 44.5254, "step": 1872 }, { "epoch": 44.59701492537313, "grad_norm": 21.0299015045166, "learning_rate": 9.250000000000001e-06, "loss": 46.0916, "step": 1873 }, { "epoch": 44.62089552238806, "grad_norm": 30.12071990966797, "learning_rate": 9.246031746031747e-06, "loss": 44.1769, "step": 1874 }, { "epoch": 44.644776119402984, "grad_norm": 23.94618797302246, "learning_rate": 9.242063492063494e-06, "loss": 45.628, "step": 1875 }, { "epoch": 44.668656716417914, "grad_norm": 29.615930557250977, "learning_rate": 9.238095238095239e-06, "loss": 45.2762, "step": 1876 }, { "epoch": 44.69253731343284, "grad_norm": 30.00957489013672, "learning_rate": 9.234126984126986e-06, "loss": 45.6399, "step": 1877 }, { "epoch": 44.71641791044776, "grad_norm": 26.414703369140625, "learning_rate": 9.230158730158732e-06, "loss": 44.6988, "step": 1878 }, { "epoch": 44.74029850746269, "grad_norm": 28.785755157470703, "learning_rate": 9.226190476190477e-06, "loss": 45.4551, "step": 1879 }, { "epoch": 44.76417910447761, "grad_norm": 23.4616756439209, "learning_rate": 9.222222222222224e-06, "loss": 44.8668, "step": 1880 }, { "epoch": 44.788059701492536, "grad_norm": 25.046113967895508, "learning_rate": 9.218253968253968e-06, "loss": 45.8905, "step": 1881 }, { "epoch": 44.811940298507466, "grad_norm": 31.216581344604492, "learning_rate": 9.214285714285715e-06, "loss": 46.6996, "step": 1882 }, { "epoch": 44.83582089552239, "grad_norm": 22.215465545654297, "learning_rate": 9.21031746031746e-06, "loss": 46.1791, "step": 1883 }, { "epoch": 44.85970149253731, "grad_norm": 33.831214904785156, "learning_rate": 9.206349206349207e-06, "loss": 45.3197, "step": 1884 }, { "epoch": 44.88358208955224, "grad_norm": 24.447084426879883, "learning_rate": 9.202380952380953e-06, "loss": 45.2949, "step": 1885 }, { "epoch": 44.907462686567165, "grad_norm": 31.735240936279297, "learning_rate": 9.198412698412698e-06, "loss": 46.4555, "step": 1886 }, { "epoch": 44.93134328358209, "grad_norm": 27.23394012451172, "learning_rate": 9.194444444444445e-06, "loss": 45.9441, "step": 1887 }, { "epoch": 44.95522388059702, "grad_norm": 27.79869270324707, "learning_rate": 9.19047619047619e-06, "loss": 45.387, "step": 1888 }, { "epoch": 44.97910447761194, "grad_norm": 24.329313278198242, "learning_rate": 9.186507936507936e-06, "loss": 44.2934, "step": 1889 }, { "epoch": 45.0, "grad_norm": 22.191181182861328, "learning_rate": 9.182539682539683e-06, "loss": 39.0195, "step": 1890 }, { "epoch": 45.02388059701492, "grad_norm": 32.1130256652832, "learning_rate": 9.178571428571429e-06, "loss": 45.9272, "step": 1891 }, { "epoch": 45.04776119402985, "grad_norm": 19.621145248413086, "learning_rate": 9.174603174603176e-06, "loss": 44.3447, "step": 1892 }, { "epoch": 45.071641791044776, "grad_norm": 39.10493087768555, "learning_rate": 9.170634920634921e-06, "loss": 46.001, "step": 1893 }, { "epoch": 45.0955223880597, "grad_norm": 23.7473201751709, "learning_rate": 9.166666666666666e-06, "loss": 45.5786, "step": 1894 }, { "epoch": 45.11940298507463, "grad_norm": 30.535781860351562, "learning_rate": 9.162698412698414e-06, "loss": 46.3373, "step": 1895 }, { "epoch": 45.14328358208955, "grad_norm": 26.53186798095703, "learning_rate": 9.158730158730159e-06, "loss": 44.6074, "step": 1896 }, { "epoch": 45.167164179104475, "grad_norm": 31.9615478515625, "learning_rate": 9.154761904761906e-06, "loss": 43.229, "step": 1897 }, { "epoch": 45.191044776119405, "grad_norm": 28.577655792236328, "learning_rate": 9.150793650793651e-06, "loss": 45.6407, "step": 1898 }, { "epoch": 45.21492537313433, "grad_norm": 18.897531509399414, "learning_rate": 9.146825396825397e-06, "loss": 45.4092, "step": 1899 }, { "epoch": 45.23880597014925, "grad_norm": 31.126819610595703, "learning_rate": 9.142857142857144e-06, "loss": 43.966, "step": 1900 }, { "epoch": 45.26268656716418, "grad_norm": 26.312490463256836, "learning_rate": 9.13888888888889e-06, "loss": 45.3819, "step": 1901 }, { "epoch": 45.286567164179104, "grad_norm": 28.255640029907227, "learning_rate": 9.134920634920635e-06, "loss": 43.9926, "step": 1902 }, { "epoch": 45.31044776119403, "grad_norm": 27.333642959594727, "learning_rate": 9.130952380952382e-06, "loss": 45.7229, "step": 1903 }, { "epoch": 45.33432835820896, "grad_norm": 27.024580001831055, "learning_rate": 9.126984126984127e-06, "loss": 45.24, "step": 1904 }, { "epoch": 45.35820895522388, "grad_norm": 31.131914138793945, "learning_rate": 9.123015873015874e-06, "loss": 44.4842, "step": 1905 }, { "epoch": 45.3820895522388, "grad_norm": 27.244861602783203, "learning_rate": 9.11904761904762e-06, "loss": 45.0392, "step": 1906 }, { "epoch": 45.40597014925373, "grad_norm": 30.606016159057617, "learning_rate": 9.115079365079365e-06, "loss": 44.4968, "step": 1907 }, { "epoch": 45.429850746268656, "grad_norm": 22.56324577331543, "learning_rate": 9.111111111111112e-06, "loss": 45.5149, "step": 1908 }, { "epoch": 45.45373134328358, "grad_norm": 31.586326599121094, "learning_rate": 9.107142857142858e-06, "loss": 45.9413, "step": 1909 }, { "epoch": 45.47761194029851, "grad_norm": 23.143661499023438, "learning_rate": 9.103174603174603e-06, "loss": 44.3301, "step": 1910 }, { "epoch": 45.50149253731343, "grad_norm": 33.158111572265625, "learning_rate": 9.09920634920635e-06, "loss": 45.3503, "step": 1911 }, { "epoch": 45.525373134328355, "grad_norm": 26.259010314941406, "learning_rate": 9.095238095238095e-06, "loss": 44.125, "step": 1912 }, { "epoch": 45.549253731343285, "grad_norm": 25.72600555419922, "learning_rate": 9.091269841269843e-06, "loss": 45.8252, "step": 1913 }, { "epoch": 45.57313432835821, "grad_norm": 29.651403427124023, "learning_rate": 9.087301587301588e-06, "loss": 44.7603, "step": 1914 }, { "epoch": 45.59701492537313, "grad_norm": 24.896892547607422, "learning_rate": 9.083333333333333e-06, "loss": 45.3582, "step": 1915 }, { "epoch": 45.62089552238806, "grad_norm": 26.172271728515625, "learning_rate": 9.07936507936508e-06, "loss": 45.418, "step": 1916 }, { "epoch": 45.644776119402984, "grad_norm": 31.333498001098633, "learning_rate": 9.075396825396826e-06, "loss": 45.5952, "step": 1917 }, { "epoch": 45.668656716417914, "grad_norm": 23.452194213867188, "learning_rate": 9.071428571428573e-06, "loss": 45.8141, "step": 1918 }, { "epoch": 45.69253731343284, "grad_norm": 30.300634384155273, "learning_rate": 9.067460317460318e-06, "loss": 46.1877, "step": 1919 }, { "epoch": 45.71641791044776, "grad_norm": 24.516042709350586, "learning_rate": 9.063492063492064e-06, "loss": 44.0542, "step": 1920 }, { "epoch": 45.74029850746269, "grad_norm": 26.41005516052246, "learning_rate": 9.05952380952381e-06, "loss": 44.2296, "step": 1921 }, { "epoch": 45.76417910447761, "grad_norm": 23.099822998046875, "learning_rate": 9.055555555555556e-06, "loss": 45.2567, "step": 1922 }, { "epoch": 45.788059701492536, "grad_norm": 18.7821044921875, "learning_rate": 9.051587301587302e-06, "loss": 44.5807, "step": 1923 }, { "epoch": 45.811940298507466, "grad_norm": 31.705181121826172, "learning_rate": 9.047619047619049e-06, "loss": 45.1571, "step": 1924 }, { "epoch": 45.83582089552239, "grad_norm": 25.712608337402344, "learning_rate": 9.043650793650794e-06, "loss": 44.9665, "step": 1925 }, { "epoch": 45.85970149253731, "grad_norm": 31.790864944458008, "learning_rate": 9.039682539682541e-06, "loss": 45.6095, "step": 1926 }, { "epoch": 45.88358208955224, "grad_norm": 27.735107421875, "learning_rate": 9.035714285714287e-06, "loss": 45.8988, "step": 1927 }, { "epoch": 45.907462686567165, "grad_norm": 30.94534683227539, "learning_rate": 9.031746031746032e-06, "loss": 45.8302, "step": 1928 }, { "epoch": 45.93134328358209, "grad_norm": 23.146005630493164, "learning_rate": 9.027777777777779e-06, "loss": 45.3911, "step": 1929 }, { "epoch": 45.95522388059702, "grad_norm": 24.59404945373535, "learning_rate": 9.023809523809524e-06, "loss": 45.1403, "step": 1930 }, { "epoch": 45.97910447761194, "grad_norm": 25.62955665588379, "learning_rate": 9.019841269841272e-06, "loss": 44.8934, "step": 1931 }, { "epoch": 46.0, "grad_norm": 20.037391662597656, "learning_rate": 9.015873015873017e-06, "loss": 39.4122, "step": 1932 }, { "epoch": 46.02388059701492, "grad_norm": 25.78251075744629, "learning_rate": 9.011904761904762e-06, "loss": 45.7163, "step": 1933 }, { "epoch": 46.04776119402985, "grad_norm": 28.0667781829834, "learning_rate": 9.00793650793651e-06, "loss": 44.6447, "step": 1934 }, { "epoch": 46.071641791044776, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 61.1269, "step": 1935 }, { "epoch": 46.0955223880597, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 57.8669, "step": 1936 }, { "epoch": 46.11940298507463, "grad_norm": 19.022104263305664, "learning_rate": 9.003968253968255e-06, "loss": 46.0546, "step": 1937 }, { "epoch": 46.14328358208955, "grad_norm": 28.844619750976562, "learning_rate": 9e-06, "loss": 44.5077, "step": 1938 }, { "epoch": 46.167164179104475, "grad_norm": 23.570850372314453, "learning_rate": 8.996031746031747e-06, "loss": 44.8965, "step": 1939 }, { "epoch": 46.191044776119405, "grad_norm": 27.71855354309082, "learning_rate": 8.992063492063493e-06, "loss": 45.3302, "step": 1940 }, { "epoch": 46.21492537313433, "grad_norm": 23.61193084716797, "learning_rate": 8.98809523809524e-06, "loss": 45.4048, "step": 1941 }, { "epoch": 46.23880597014925, "grad_norm": 27.16132926940918, "learning_rate": 8.984126984126985e-06, "loss": 44.535, "step": 1942 }, { "epoch": 46.26268656716418, "grad_norm": 25.254039764404297, "learning_rate": 8.98015873015873e-06, "loss": 45.2944, "step": 1943 }, { "epoch": 46.286567164179104, "grad_norm": 28.196325302124023, "learning_rate": 8.976190476190478e-06, "loss": 44.0106, "step": 1944 }, { "epoch": 46.31044776119403, "grad_norm": 24.75798988342285, "learning_rate": 8.972222222222223e-06, "loss": 44.949, "step": 1945 }, { "epoch": 46.33432835820896, "grad_norm": 30.992849349975586, "learning_rate": 8.968253968253968e-06, "loss": 44.6185, "step": 1946 }, { "epoch": 46.35820895522388, "grad_norm": 28.122825622558594, "learning_rate": 8.964285714285716e-06, "loss": 46.7498, "step": 1947 }, { "epoch": 46.3820895522388, "grad_norm": 25.130678176879883, "learning_rate": 8.960317460317461e-06, "loss": 45.7823, "step": 1948 }, { "epoch": 46.40597014925373, "grad_norm": 26.97332763671875, "learning_rate": 8.956349206349208e-06, "loss": 44.8217, "step": 1949 }, { "epoch": 46.429850746268656, "grad_norm": 21.403100967407227, "learning_rate": 8.952380952380953e-06, "loss": 45.4608, "step": 1950 }, { "epoch": 46.45373134328358, "grad_norm": 30.794330596923828, "learning_rate": 8.948412698412699e-06, "loss": 45.0327, "step": 1951 }, { "epoch": 46.47761194029851, "grad_norm": 26.035839080810547, "learning_rate": 8.944444444444446e-06, "loss": 44.6979, "step": 1952 }, { "epoch": 46.50149253731343, "grad_norm": 21.501266479492188, "learning_rate": 8.940476190476191e-06, "loss": 44.6421, "step": 1953 }, { "epoch": 46.525373134328355, "grad_norm": 27.67610740661621, "learning_rate": 8.936507936507938e-06, "loss": 44.5721, "step": 1954 }, { "epoch": 46.549253731343285, "grad_norm": 24.71251678466797, "learning_rate": 8.932539682539684e-06, "loss": 45.2891, "step": 1955 }, { "epoch": 46.57313432835821, "grad_norm": 32.72700500488281, "learning_rate": 8.92857142857143e-06, "loss": 45.0829, "step": 1956 }, { "epoch": 46.59701492537313, "grad_norm": 26.203643798828125, "learning_rate": 8.924603174603176e-06, "loss": 44.9264, "step": 1957 }, { "epoch": 46.62089552238806, "grad_norm": 25.362638473510742, "learning_rate": 8.920634920634922e-06, "loss": 45.1448, "step": 1958 }, { "epoch": 46.644776119402984, "grad_norm": 25.224456787109375, "learning_rate": 8.916666666666667e-06, "loss": 45.6017, "step": 1959 }, { "epoch": 46.668656716417914, "grad_norm": 29.02377700805664, "learning_rate": 8.912698412698414e-06, "loss": 45.5859, "step": 1960 }, { "epoch": 46.69253731343284, "grad_norm": 25.2493896484375, "learning_rate": 8.90873015873016e-06, "loss": 44.3262, "step": 1961 }, { "epoch": 46.71641791044776, "grad_norm": 24.432043075561523, "learning_rate": 8.904761904761905e-06, "loss": 44.0005, "step": 1962 }, { "epoch": 46.74029850746269, "grad_norm": 23.06245994567871, "learning_rate": 8.90079365079365e-06, "loss": 45.2406, "step": 1963 }, { "epoch": 46.76417910447761, "grad_norm": 27.603015899658203, "learning_rate": 8.896825396825398e-06, "loss": 45.2547, "step": 1964 }, { "epoch": 46.788059701492536, "grad_norm": 26.66181182861328, "learning_rate": 8.892857142857143e-06, "loss": 45.0288, "step": 1965 }, { "epoch": 46.811940298507466, "grad_norm": 19.665678024291992, "learning_rate": 8.888888888888888e-06, "loss": 45.1412, "step": 1966 }, { "epoch": 46.83582089552239, "grad_norm": 31.3046932220459, "learning_rate": 8.884920634920635e-06, "loss": 45.7144, "step": 1967 }, { "epoch": 46.85970149253731, "grad_norm": 24.661293029785156, "learning_rate": 8.88095238095238e-06, "loss": 43.9468, "step": 1968 }, { "epoch": 46.88358208955224, "grad_norm": 25.421525955200195, "learning_rate": 8.876984126984128e-06, "loss": 45.4404, "step": 1969 }, { "epoch": 46.907462686567165, "grad_norm": 30.11313247680664, "learning_rate": 8.873015873015873e-06, "loss": 44.4083, "step": 1970 }, { "epoch": 46.93134328358209, "grad_norm": 24.19677734375, "learning_rate": 8.869047619047619e-06, "loss": 45.5387, "step": 1971 }, { "epoch": 46.95522388059702, "grad_norm": 25.183414459228516, "learning_rate": 8.865079365079366e-06, "loss": 45.2725, "step": 1972 }, { "epoch": 46.97910447761194, "grad_norm": 22.570981979370117, "learning_rate": 8.861111111111111e-06, "loss": 44.1263, "step": 1973 }, { "epoch": 47.0, "grad_norm": 27.16869354248047, "learning_rate": 8.857142857142858e-06, "loss": 39.0382, "step": 1974 }, { "epoch": 47.02388059701492, "grad_norm": 27.326980590820312, "learning_rate": 8.853174603174604e-06, "loss": 45.0956, "step": 1975 }, { "epoch": 47.04776119402985, "grad_norm": 25.321685791015625, "learning_rate": 8.849206349206349e-06, "loss": 45.1531, "step": 1976 }, { "epoch": 47.071641791044776, "grad_norm": 29.480770111083984, "learning_rate": 8.845238095238096e-06, "loss": 44.7925, "step": 1977 }, { "epoch": 47.0955223880597, "grad_norm": 29.82880210876465, "learning_rate": 8.841269841269842e-06, "loss": 45.6435, "step": 1978 }, { "epoch": 47.11940298507463, "grad_norm": 31.852386474609375, "learning_rate": 8.837301587301587e-06, "loss": 45.0481, "step": 1979 }, { "epoch": 47.14328358208955, "grad_norm": 27.80265235900879, "learning_rate": 8.833333333333334e-06, "loss": 44.7472, "step": 1980 }, { "epoch": 47.167164179104475, "grad_norm": NaN, "learning_rate": 8.82936507936508e-06, "loss": 38.8619, "step": 1981 }, { "epoch": 47.191044776119405, "grad_norm": 24.525455474853516, "learning_rate": 8.82936507936508e-06, "loss": 44.8093, "step": 1982 }, { "epoch": 47.21492537313433, "grad_norm": 26.450302124023438, "learning_rate": 8.825396825396827e-06, "loss": 44.7615, "step": 1983 }, { "epoch": 47.23880597014925, "grad_norm": 22.493268966674805, "learning_rate": 8.821428571428572e-06, "loss": 44.5445, "step": 1984 }, { "epoch": 47.26268656716418, "grad_norm": 26.506013870239258, "learning_rate": 8.817460317460317e-06, "loss": 45.4412, "step": 1985 }, { "epoch": 47.286567164179104, "grad_norm": 23.09911346435547, "learning_rate": 8.813492063492064e-06, "loss": 44.8791, "step": 1986 }, { "epoch": 47.31044776119403, "grad_norm": 21.34832191467285, "learning_rate": 8.80952380952381e-06, "loss": 44.8867, "step": 1987 }, { "epoch": 47.33432835820896, "grad_norm": 25.69770050048828, "learning_rate": 8.805555555555557e-06, "loss": 45.0307, "step": 1988 }, { "epoch": 47.35820895522388, "grad_norm": 27.75917625427246, "learning_rate": 8.801587301587302e-06, "loss": 43.7733, "step": 1989 }, { "epoch": 47.3820895522388, "grad_norm": 24.314449310302734, "learning_rate": 8.797619047619048e-06, "loss": 44.8685, "step": 1990 }, { "epoch": 47.40597014925373, "grad_norm": 22.21106719970703, "learning_rate": 8.793650793650795e-06, "loss": 45.2589, "step": 1991 }, { "epoch": 47.429850746268656, "grad_norm": 28.61949920654297, "learning_rate": 8.78968253968254e-06, "loss": 45.7972, "step": 1992 }, { "epoch": 47.45373134328358, "grad_norm": 27.726839065551758, "learning_rate": 8.785714285714286e-06, "loss": 44.0989, "step": 1993 }, { "epoch": 47.47761194029851, "grad_norm": 24.9364013671875, "learning_rate": 8.781746031746033e-06, "loss": 44.9365, "step": 1994 }, { "epoch": 47.50149253731343, "grad_norm": 23.380905151367188, "learning_rate": 8.777777777777778e-06, "loss": 44.9662, "step": 1995 }, { "epoch": 47.525373134328355, "grad_norm": 22.02720832824707, "learning_rate": 8.773809523809525e-06, "loss": 45.1456, "step": 1996 }, { "epoch": 47.549253731343285, "grad_norm": NaN, "learning_rate": 8.76984126984127e-06, "loss": 60.0243, "step": 1997 }, { "epoch": 47.57313432835821, "grad_norm": 21.263904571533203, "learning_rate": 8.76984126984127e-06, "loss": 44.6697, "step": 1998 }, { "epoch": 47.59701492537313, "grad_norm": 25.381332397460938, "learning_rate": 8.765873015873016e-06, "loss": 44.9032, "step": 1999 }, { "epoch": 47.62089552238806, "grad_norm": 24.297027587890625, "learning_rate": 8.761904761904763e-06, "loss": 44.5833, "step": 2000 }, { "epoch": 47.644776119402984, "grad_norm": 26.303585052490234, "learning_rate": 8.757936507936508e-06, "loss": 45.252, "step": 2001 }, { "epoch": 47.668656716417914, "grad_norm": 23.310070037841797, "learning_rate": 8.753968253968254e-06, "loss": 45.0068, "step": 2002 }, { "epoch": 47.69253731343284, "grad_norm": 30.19032859802246, "learning_rate": 8.750000000000001e-06, "loss": 46.1286, "step": 2003 }, { "epoch": 47.71641791044776, "grad_norm": 27.43839454650879, "learning_rate": 8.746031746031746e-06, "loss": 46.5151, "step": 2004 }, { "epoch": 47.74029850746269, "grad_norm": 24.49736976623535, "learning_rate": 8.742063492063493e-06, "loss": 45.2309, "step": 2005 }, { "epoch": 47.76417910447761, "grad_norm": 32.9915885925293, "learning_rate": 8.738095238095239e-06, "loss": 44.221, "step": 2006 }, { "epoch": 47.788059701492536, "grad_norm": 27.080114364624023, "learning_rate": 8.734126984126984e-06, "loss": 44.4515, "step": 2007 }, { "epoch": 47.811940298507466, "grad_norm": 34.84925079345703, "learning_rate": 8.730158730158731e-06, "loss": 44.5223, "step": 2008 }, { "epoch": 47.83582089552239, "grad_norm": 28.061695098876953, "learning_rate": 8.726190476190477e-06, "loss": 45.6776, "step": 2009 }, { "epoch": 47.85970149253731, "grad_norm": 35.316009521484375, "learning_rate": 8.722222222222224e-06, "loss": 45.6784, "step": 2010 }, { "epoch": 47.88358208955224, "grad_norm": 29.395872116088867, "learning_rate": 8.71825396825397e-06, "loss": 46.054, "step": 2011 }, { "epoch": 47.907462686567165, "grad_norm": 31.359512329101562, "learning_rate": 8.714285714285715e-06, "loss": 44.6921, "step": 2012 }, { "epoch": 47.93134328358209, "grad_norm": 24.621870040893555, "learning_rate": 8.710317460317462e-06, "loss": 45.8119, "step": 2013 }, { "epoch": 47.95522388059702, "grad_norm": 30.466150283813477, "learning_rate": 8.706349206349207e-06, "loss": 44.5282, "step": 2014 }, { "epoch": 47.97910447761194, "grad_norm": 29.490886688232422, "learning_rate": 8.702380952380952e-06, "loss": 45.2275, "step": 2015 }, { "epoch": 48.0, "grad_norm": 18.86721420288086, "learning_rate": 8.6984126984127e-06, "loss": 38.1757, "step": 2016 }, { "epoch": 48.02388059701492, "grad_norm": 34.39149856567383, "learning_rate": 8.694444444444445e-06, "loss": 45.4931, "step": 2017 }, { "epoch": 48.04776119402985, "grad_norm": 28.87833023071289, "learning_rate": 8.690476190476192e-06, "loss": 45.3396, "step": 2018 }, { "epoch": 48.071641791044776, "grad_norm": 36.20280838012695, "learning_rate": 8.686507936507937e-06, "loss": 44.7758, "step": 2019 }, { "epoch": 48.0955223880597, "grad_norm": 30.76156234741211, "learning_rate": 8.682539682539683e-06, "loss": 44.2899, "step": 2020 }, { "epoch": 48.11940298507463, "grad_norm": 36.33967208862305, "learning_rate": 8.67857142857143e-06, "loss": 44.6879, "step": 2021 }, { "epoch": 48.14328358208955, "grad_norm": 30.22699737548828, "learning_rate": 8.674603174603175e-06, "loss": 45.8113, "step": 2022 }, { "epoch": 48.167164179104475, "grad_norm": 30.748640060424805, "learning_rate": 8.670634920634922e-06, "loss": 44.048, "step": 2023 }, { "epoch": 48.191044776119405, "grad_norm": 25.484418869018555, "learning_rate": 8.666666666666668e-06, "loss": 44.9645, "step": 2024 }, { "epoch": 48.21492537313433, "grad_norm": 33.34728240966797, "learning_rate": 8.662698412698413e-06, "loss": 44.2533, "step": 2025 }, { "epoch": 48.23880597014925, "grad_norm": 24.65802764892578, "learning_rate": 8.65873015873016e-06, "loss": 45.9453, "step": 2026 }, { "epoch": 48.26268656716418, "grad_norm": 30.4432373046875, "learning_rate": 8.654761904761906e-06, "loss": 45.8027, "step": 2027 }, { "epoch": 48.286567164179104, "grad_norm": 22.55684471130371, "learning_rate": 8.650793650793651e-06, "loss": 45.6855, "step": 2028 }, { "epoch": 48.31044776119403, "grad_norm": 22.167613983154297, "learning_rate": 8.646825396825398e-06, "loss": 44.3946, "step": 2029 }, { "epoch": 48.33432835820896, "grad_norm": 27.42496681213379, "learning_rate": 8.642857142857144e-06, "loss": 45.3506, "step": 2030 }, { "epoch": 48.35820895522388, "grad_norm": 24.647188186645508, "learning_rate": 8.63888888888889e-06, "loss": 44.3746, "step": 2031 }, { "epoch": 48.3820895522388, "grad_norm": 28.068981170654297, "learning_rate": 8.634920634920636e-06, "loss": 44.7821, "step": 2032 }, { "epoch": 48.40597014925373, "grad_norm": 22.093984603881836, "learning_rate": 8.630952380952381e-06, "loss": 43.8444, "step": 2033 }, { "epoch": 48.429850746268656, "grad_norm": 33.278778076171875, "learning_rate": 8.626984126984129e-06, "loss": 44.8849, "step": 2034 }, { "epoch": 48.45373134328358, "grad_norm": 23.357349395751953, "learning_rate": 8.623015873015874e-06, "loss": 44.8346, "step": 2035 }, { "epoch": 48.47761194029851, "grad_norm": 29.543947219848633, "learning_rate": 8.61904761904762e-06, "loss": 45.8072, "step": 2036 }, { "epoch": 48.50149253731343, "grad_norm": 24.81306266784668, "learning_rate": 8.615079365079366e-06, "loss": 43.6868, "step": 2037 }, { "epoch": 48.525373134328355, "grad_norm": 30.09635353088379, "learning_rate": 8.611111111111112e-06, "loss": 45.1631, "step": 2038 }, { "epoch": 48.549253731343285, "grad_norm": 26.751686096191406, "learning_rate": 8.607142857142859e-06, "loss": 44.5276, "step": 2039 }, { "epoch": 48.57313432835821, "grad_norm": 22.96086883544922, "learning_rate": 8.603174603174604e-06, "loss": 45.5322, "step": 2040 }, { "epoch": 48.59701492537313, "grad_norm": 30.90753173828125, "learning_rate": 8.59920634920635e-06, "loss": 44.5476, "step": 2041 }, { "epoch": 48.62089552238806, "grad_norm": 22.072256088256836, "learning_rate": 8.595238095238097e-06, "loss": 45.3412, "step": 2042 }, { "epoch": 48.644776119402984, "grad_norm": 37.27132034301758, "learning_rate": 8.591269841269842e-06, "loss": 43.9968, "step": 2043 }, { "epoch": 48.668656716417914, "grad_norm": 31.473464965820312, "learning_rate": 8.587301587301588e-06, "loss": 46.7003, "step": 2044 }, { "epoch": 48.69253731343284, "grad_norm": 41.3200798034668, "learning_rate": 8.583333333333333e-06, "loss": 44.9254, "step": 2045 }, { "epoch": 48.71641791044776, "grad_norm": 28.326889038085938, "learning_rate": 8.57936507936508e-06, "loss": 45.4611, "step": 2046 }, { "epoch": 48.74029850746269, "grad_norm": 42.016624450683594, "learning_rate": 8.575396825396826e-06, "loss": 45.9752, "step": 2047 }, { "epoch": 48.76417910447761, "grad_norm": 39.264827728271484, "learning_rate": 8.571428571428571e-06, "loss": 45.9133, "step": 2048 }, { "epoch": 48.788059701492536, "grad_norm": 36.876461029052734, "learning_rate": 8.567460317460318e-06, "loss": 44.052, "step": 2049 }, { "epoch": 48.811940298507466, "grad_norm": 33.36867141723633, "learning_rate": 8.563492063492063e-06, "loss": 44.8014, "step": 2050 }, { "epoch": 48.83582089552239, "grad_norm": 33.16298294067383, "learning_rate": 8.55952380952381e-06, "loss": 44.005, "step": 2051 }, { "epoch": 48.85970149253731, "grad_norm": 32.4409065246582, "learning_rate": 8.555555555555556e-06, "loss": 44.2993, "step": 2052 }, { "epoch": 48.88358208955224, "grad_norm": 32.56459426879883, "learning_rate": 8.551587301587301e-06, "loss": 45.2025, "step": 2053 }, { "epoch": 48.907462686567165, "grad_norm": 30.31665802001953, "learning_rate": 8.547619047619048e-06, "loss": 43.8506, "step": 2054 }, { "epoch": 48.93134328358209, "grad_norm": 29.07672119140625, "learning_rate": 8.543650793650794e-06, "loss": 44.2567, "step": 2055 }, { "epoch": 48.95522388059702, "grad_norm": 24.603849411010742, "learning_rate": 8.53968253968254e-06, "loss": 44.5072, "step": 2056 }, { "epoch": 48.97910447761194, "grad_norm": 26.305355072021484, "learning_rate": 8.535714285714286e-06, "loss": 45.2023, "step": 2057 }, { "epoch": 49.0, "grad_norm": 20.483905792236328, "learning_rate": 8.531746031746032e-06, "loss": 38.3416, "step": 2058 }, { "epoch": 49.02388059701492, "grad_norm": 18.845535278320312, "learning_rate": 8.527777777777779e-06, "loss": 44.0003, "step": 2059 }, { "epoch": 49.04776119402985, "grad_norm": 20.018390655517578, "learning_rate": 8.523809523809524e-06, "loss": 45.5951, "step": 2060 }, { "epoch": 49.071641791044776, "grad_norm": 18.276540756225586, "learning_rate": 8.51984126984127e-06, "loss": 45.4302, "step": 2061 }, { "epoch": 49.0955223880597, "grad_norm": 18.592966079711914, "learning_rate": 8.515873015873017e-06, "loss": 44.9415, "step": 2062 }, { "epoch": 49.11940298507463, "grad_norm": NaN, "learning_rate": 8.511904761904762e-06, "loss": 77.195, "step": 2063 }, { "epoch": 49.14328358208955, "grad_norm": 23.695045471191406, "learning_rate": 8.511904761904762e-06, "loss": 45.1853, "step": 2064 }, { "epoch": 49.167164179104475, "grad_norm": 16.90850830078125, "learning_rate": 8.507936507936509e-06, "loss": 44.0122, "step": 2065 }, { "epoch": 49.191044776119405, "grad_norm": 30.50786781311035, "learning_rate": 8.503968253968255e-06, "loss": 44.8398, "step": 2066 }, { "epoch": 49.21492537313433, "grad_norm": 24.35599136352539, "learning_rate": 8.5e-06, "loss": 43.4544, "step": 2067 }, { "epoch": 49.23880597014925, "grad_norm": 29.541887283325195, "learning_rate": 8.496031746031747e-06, "loss": 45.1471, "step": 2068 }, { "epoch": 49.26268656716418, "grad_norm": 20.277528762817383, "learning_rate": 8.492063492063492e-06, "loss": 45.1862, "step": 2069 }, { "epoch": 49.286567164179104, "grad_norm": 33.5463752746582, "learning_rate": 8.488095238095238e-06, "loss": 43.5467, "step": 2070 }, { "epoch": 49.31044776119403, "grad_norm": 23.218936920166016, "learning_rate": 8.484126984126985e-06, "loss": 44.6577, "step": 2071 }, { "epoch": 49.33432835820896, "grad_norm": 36.53571701049805, "learning_rate": 8.48015873015873e-06, "loss": 46.4774, "step": 2072 }, { "epoch": 49.35820895522388, "grad_norm": 32.15842819213867, "learning_rate": 8.476190476190477e-06, "loss": 45.3236, "step": 2073 }, { "epoch": 49.3820895522388, "grad_norm": 29.57740020751953, "learning_rate": 8.472222222222223e-06, "loss": 44.7034, "step": 2074 }, { "epoch": 49.40597014925373, "grad_norm": 28.12784194946289, "learning_rate": 8.468253968253968e-06, "loss": 43.741, "step": 2075 }, { "epoch": 49.429850746268656, "grad_norm": 28.08392906188965, "learning_rate": 8.464285714285715e-06, "loss": 45.326, "step": 2076 }, { "epoch": 49.45373134328358, "grad_norm": 24.909330368041992, "learning_rate": 8.46031746031746e-06, "loss": 45.979, "step": 2077 }, { "epoch": 49.47761194029851, "grad_norm": 26.343902587890625, "learning_rate": 8.456349206349208e-06, "loss": 44.1665, "step": 2078 }, { "epoch": 49.50149253731343, "grad_norm": 30.070533752441406, "learning_rate": 8.452380952380953e-06, "loss": 45.1331, "step": 2079 }, { "epoch": 49.525373134328355, "grad_norm": 26.733827590942383, "learning_rate": 8.448412698412699e-06, "loss": 43.9576, "step": 2080 }, { "epoch": 49.549253731343285, "grad_norm": 31.43610191345215, "learning_rate": 8.444444444444446e-06, "loss": 44.3933, "step": 2081 }, { "epoch": 49.57313432835821, "grad_norm": 24.856496810913086, "learning_rate": 8.440476190476191e-06, "loss": 44.561, "step": 2082 }, { "epoch": 49.59701492537313, "grad_norm": 30.097368240356445, "learning_rate": 8.436507936507936e-06, "loss": 44.617, "step": 2083 }, { "epoch": 49.62089552238806, "grad_norm": 26.63928985595703, "learning_rate": 8.432539682539684e-06, "loss": 45.1091, "step": 2084 }, { "epoch": 49.644776119402984, "grad_norm": 33.428932189941406, "learning_rate": 8.428571428571429e-06, "loss": 45.8576, "step": 2085 }, { "epoch": 49.668656716417914, "grad_norm": 26.33061408996582, "learning_rate": 8.424603174603176e-06, "loss": 46.6266, "step": 2086 }, { "epoch": 49.69253731343284, "grad_norm": 35.67467498779297, "learning_rate": 8.420634920634921e-06, "loss": 43.8886, "step": 2087 }, { "epoch": 49.71641791044776, "grad_norm": 33.62556076049805, "learning_rate": 8.416666666666667e-06, "loss": 44.819, "step": 2088 }, { "epoch": 49.74029850746269, "grad_norm": 29.146684646606445, "learning_rate": 8.412698412698414e-06, "loss": 45.1877, "step": 2089 }, { "epoch": 49.76417910447761, "grad_norm": 29.51055335998535, "learning_rate": 8.40873015873016e-06, "loss": 44.9054, "step": 2090 }, { "epoch": 49.788059701492536, "grad_norm": 31.709413528442383, "learning_rate": 8.404761904761905e-06, "loss": 44.8456, "step": 2091 }, { "epoch": 49.811940298507466, "grad_norm": 26.646390914916992, "learning_rate": 8.400793650793652e-06, "loss": 44.1815, "step": 2092 }, { "epoch": 49.83582089552239, "grad_norm": 35.582496643066406, "learning_rate": 8.396825396825397e-06, "loss": 44.9951, "step": 2093 }, { "epoch": 49.85970149253731, "grad_norm": 25.587371826171875, "learning_rate": 8.392857142857144e-06, "loss": 44.3349, "step": 2094 }, { "epoch": 49.88358208955224, "grad_norm": 29.13399887084961, "learning_rate": 8.38888888888889e-06, "loss": 45.28, "step": 2095 }, { "epoch": 49.907462686567165, "grad_norm": 21.462890625, "learning_rate": 8.384920634920635e-06, "loss": 44.4383, "step": 2096 }, { "epoch": 49.93134328358209, "grad_norm": 31.970626831054688, "learning_rate": 8.380952380952382e-06, "loss": 45.989, "step": 2097 }, { "epoch": 49.95522388059702, "grad_norm": 21.948705673217773, "learning_rate": 8.376984126984128e-06, "loss": 44.0871, "step": 2098 }, { "epoch": 49.97910447761194, "grad_norm": 35.07805252075195, "learning_rate": 8.373015873015875e-06, "loss": 44.709, "step": 2099 }, { "epoch": 50.0, "grad_norm": 21.554956436157227, "learning_rate": 8.36904761904762e-06, "loss": 38.6725, "step": 2100 }, { "epoch": 50.02388059701492, "grad_norm": 35.4162712097168, "learning_rate": 8.365079365079365e-06, "loss": 44.2866, "step": 2101 }, { "epoch": 50.04776119402985, "grad_norm": 31.357215881347656, "learning_rate": 8.361111111111113e-06, "loss": 44.9399, "step": 2102 }, { "epoch": 50.071641791044776, "grad_norm": 28.055850982666016, "learning_rate": 8.357142857142858e-06, "loss": 44.2145, "step": 2103 }, { "epoch": 50.0955223880597, "grad_norm": 27.62700080871582, "learning_rate": 8.353174603174603e-06, "loss": 44.715, "step": 2104 }, { "epoch": 50.11940298507463, "grad_norm": 32.586219787597656, "learning_rate": 8.34920634920635e-06, "loss": 45.6174, "step": 2105 }, { "epoch": 50.14328358208955, "grad_norm": 24.922584533691406, "learning_rate": 8.345238095238096e-06, "loss": 46.0653, "step": 2106 }, { "epoch": 50.167164179104475, "grad_norm": 29.282079696655273, "learning_rate": 8.341269841269843e-06, "loss": 44.8826, "step": 2107 }, { "epoch": 50.191044776119405, "grad_norm": 25.85003089904785, "learning_rate": 8.337301587301588e-06, "loss": 43.7337, "step": 2108 }, { "epoch": 50.21492537313433, "grad_norm": 26.331398010253906, "learning_rate": 8.333333333333334e-06, "loss": 44.9624, "step": 2109 }, { "epoch": 50.23880597014925, "grad_norm": 19.595951080322266, "learning_rate": 8.32936507936508e-06, "loss": 45.0561, "step": 2110 }, { "epoch": 50.26268656716418, "grad_norm": 18.431438446044922, "learning_rate": 8.325396825396826e-06, "loss": 44.6963, "step": 2111 }, { "epoch": 50.286567164179104, "grad_norm": 20.670730590820312, "learning_rate": 8.321428571428573e-06, "loss": 44.6057, "step": 2112 }, { "epoch": 50.31044776119403, "grad_norm": 20.497106552124023, "learning_rate": 8.317460317460319e-06, "loss": 45.6219, "step": 2113 }, { "epoch": 50.33432835820896, "grad_norm": 21.33808708190918, "learning_rate": 8.313492063492064e-06, "loss": 43.6802, "step": 2114 }, { "epoch": 50.35820895522388, "grad_norm": 17.015180587768555, "learning_rate": 8.309523809523811e-06, "loss": 45.6156, "step": 2115 }, { "epoch": 50.3820895522388, "grad_norm": 25.82108497619629, "learning_rate": 8.305555555555557e-06, "loss": 45.529, "step": 2116 }, { "epoch": 50.40597014925373, "grad_norm": 20.37699317932129, "learning_rate": 8.301587301587302e-06, "loss": 44.4007, "step": 2117 }, { "epoch": 50.429850746268656, "grad_norm": 24.1844482421875, "learning_rate": 8.297619047619049e-06, "loss": 45.0155, "step": 2118 }, { "epoch": 50.45373134328358, "grad_norm": 21.229581832885742, "learning_rate": 8.293650793650794e-06, "loss": 44.8109, "step": 2119 }, { "epoch": 50.47761194029851, "grad_norm": 23.752500534057617, "learning_rate": 8.289682539682542e-06, "loss": 45.1129, "step": 2120 }, { "epoch": 50.50149253731343, "grad_norm": 19.724092483520508, "learning_rate": 8.285714285714287e-06, "loss": 44.1519, "step": 2121 }, { "epoch": 50.525373134328355, "grad_norm": 21.154827117919922, "learning_rate": 8.281746031746032e-06, "loss": 43.8136, "step": 2122 }, { "epoch": 50.549253731343285, "grad_norm": 21.17751121520996, "learning_rate": 8.277777777777778e-06, "loss": 44.7593, "step": 2123 }, { "epoch": 50.57313432835821, "grad_norm": 24.729738235473633, "learning_rate": 8.273809523809523e-06, "loss": 44.7794, "step": 2124 }, { "epoch": 50.59701492537313, "grad_norm": 18.432241439819336, "learning_rate": 8.26984126984127e-06, "loss": 44.0237, "step": 2125 }, { "epoch": 50.62089552238806, "grad_norm": 26.357515335083008, "learning_rate": 8.265873015873016e-06, "loss": 45.2566, "step": 2126 }, { "epoch": 50.644776119402984, "grad_norm": 24.270259857177734, "learning_rate": 8.261904761904763e-06, "loss": 44.1182, "step": 2127 }, { "epoch": 50.668656716417914, "grad_norm": 20.756067276000977, "learning_rate": 8.257936507936508e-06, "loss": 46.2374, "step": 2128 }, { "epoch": 50.69253731343284, "grad_norm": 23.159393310546875, "learning_rate": 8.253968253968254e-06, "loss": 44.1878, "step": 2129 }, { "epoch": 50.71641791044776, "grad_norm": 22.44221305847168, "learning_rate": 8.25e-06, "loss": 45.3746, "step": 2130 }, { "epoch": 50.74029850746269, "grad_norm": 20.27827262878418, "learning_rate": 8.246031746031746e-06, "loss": 44.1278, "step": 2131 }, { "epoch": 50.76417910447761, "grad_norm": 21.407669067382812, "learning_rate": 8.242063492063493e-06, "loss": 44.8487, "step": 2132 }, { "epoch": 50.788059701492536, "grad_norm": 24.570688247680664, "learning_rate": 8.238095238095239e-06, "loss": 44.2913, "step": 2133 }, { "epoch": 50.811940298507466, "grad_norm": 23.73247528076172, "learning_rate": 8.234126984126984e-06, "loss": 45.4539, "step": 2134 }, { "epoch": 50.83582089552239, "grad_norm": 20.265886306762695, "learning_rate": 8.230158730158731e-06, "loss": 43.1901, "step": 2135 }, { "epoch": 50.85970149253731, "grad_norm": 16.51488494873047, "learning_rate": 8.226190476190476e-06, "loss": 45.0321, "step": 2136 }, { "epoch": 50.88358208955224, "grad_norm": 19.107425689697266, "learning_rate": 8.222222222222222e-06, "loss": 44.3746, "step": 2137 }, { "epoch": 50.907462686567165, "grad_norm": 19.300790786743164, "learning_rate": 8.218253968253969e-06, "loss": 45.1466, "step": 2138 }, { "epoch": 50.93134328358209, "grad_norm": 19.817272186279297, "learning_rate": 8.214285714285714e-06, "loss": 44.9703, "step": 2139 }, { "epoch": 50.95522388059702, "grad_norm": 22.794174194335938, "learning_rate": 8.210317460317461e-06, "loss": 43.917, "step": 2140 }, { "epoch": 50.97910447761194, "grad_norm": 18.948871612548828, "learning_rate": 8.206349206349207e-06, "loss": 44.4099, "step": 2141 }, { "epoch": 51.0, "grad_norm": 13.966577529907227, "learning_rate": 8.202380952380952e-06, "loss": 38.9733, "step": 2142 }, { "epoch": 51.02388059701492, "grad_norm": 29.5616397857666, "learning_rate": 8.1984126984127e-06, "loss": 44.8355, "step": 2143 }, { "epoch": 51.04776119402985, "grad_norm": 22.391014099121094, "learning_rate": 8.194444444444445e-06, "loss": 44.6835, "step": 2144 }, { "epoch": 51.071641791044776, "grad_norm": 28.830854415893555, "learning_rate": 8.190476190476192e-06, "loss": 43.3011, "step": 2145 }, { "epoch": 51.0955223880597, "grad_norm": 21.114011764526367, "learning_rate": 8.186507936507937e-06, "loss": 44.4223, "step": 2146 }, { "epoch": 51.11940298507463, "grad_norm": 28.902416229248047, "learning_rate": 8.182539682539683e-06, "loss": 44.0485, "step": 2147 }, { "epoch": 51.14328358208955, "grad_norm": 21.923168182373047, "learning_rate": 8.17857142857143e-06, "loss": 45.3272, "step": 2148 }, { "epoch": 51.167164179104475, "grad_norm": 28.772884368896484, "learning_rate": 8.174603174603175e-06, "loss": 45.6205, "step": 2149 }, { "epoch": 51.191044776119405, "grad_norm": 23.949098587036133, "learning_rate": 8.17063492063492e-06, "loss": 45.0204, "step": 2150 }, { "epoch": 51.21492537313433, "grad_norm": 26.735624313354492, "learning_rate": 8.166666666666668e-06, "loss": 45.6338, "step": 2151 }, { "epoch": 51.23880597014925, "grad_norm": 28.049888610839844, "learning_rate": 8.162698412698413e-06, "loss": 44.2502, "step": 2152 }, { "epoch": 51.26268656716418, "grad_norm": 23.256439208984375, "learning_rate": 8.15873015873016e-06, "loss": 44.1981, "step": 2153 }, { "epoch": 51.286567164179104, "grad_norm": 32.3640022277832, "learning_rate": 8.154761904761905e-06, "loss": 43.6928, "step": 2154 }, { "epoch": 51.31044776119403, "grad_norm": 23.900907516479492, "learning_rate": 8.15079365079365e-06, "loss": 45.3594, "step": 2155 }, { "epoch": 51.33432835820896, "grad_norm": 39.41314697265625, "learning_rate": 8.146825396825398e-06, "loss": 44.5862, "step": 2156 }, { "epoch": 51.35820895522388, "grad_norm": 31.826566696166992, "learning_rate": 8.142857142857143e-06, "loss": 44.6213, "step": 2157 }, { "epoch": 51.3820895522388, "grad_norm": 35.3351936340332, "learning_rate": 8.138888888888889e-06, "loss": 44.9952, "step": 2158 }, { "epoch": 51.40597014925373, "grad_norm": 33.0169677734375, "learning_rate": 8.134920634920636e-06, "loss": 44.7576, "step": 2159 }, { "epoch": 51.429850746268656, "grad_norm": 32.347251892089844, "learning_rate": 8.130952380952381e-06, "loss": 45.0997, "step": 2160 }, { "epoch": 51.45373134328358, "grad_norm": 25.79857635498047, "learning_rate": 8.126984126984128e-06, "loss": 45.8578, "step": 2161 }, { "epoch": 51.47761194029851, "grad_norm": 33.378108978271484, "learning_rate": 8.123015873015874e-06, "loss": 44.6084, "step": 2162 }, { "epoch": 51.50149253731343, "grad_norm": 27.625028610229492, "learning_rate": 8.119047619047619e-06, "loss": 45.1928, "step": 2163 }, { "epoch": 51.525373134328355, "grad_norm": 32.47718811035156, "learning_rate": 8.115079365079366e-06, "loss": 44.38, "step": 2164 }, { "epoch": 51.549253731343285, "grad_norm": 31.10133934020996, "learning_rate": 8.111111111111112e-06, "loss": 44.1878, "step": 2165 }, { "epoch": 51.57313432835821, "grad_norm": 33.062007904052734, "learning_rate": 8.107142857142859e-06, "loss": 44.6587, "step": 2166 }, { "epoch": 51.59701492537313, "grad_norm": 31.35774803161621, "learning_rate": 8.103174603174604e-06, "loss": 44.0408, "step": 2167 }, { "epoch": 51.62089552238806, "grad_norm": 35.262237548828125, "learning_rate": 8.09920634920635e-06, "loss": 45.3717, "step": 2168 }, { "epoch": 51.644776119402984, "grad_norm": 32.77524948120117, "learning_rate": 8.095238095238097e-06, "loss": 44.8105, "step": 2169 }, { "epoch": 51.668656716417914, "grad_norm": 28.838821411132812, "learning_rate": 8.091269841269842e-06, "loss": 44.3364, "step": 2170 }, { "epoch": 51.69253731343284, "grad_norm": 26.18807029724121, "learning_rate": 8.087301587301587e-06, "loss": 44.5054, "step": 2171 }, { "epoch": 51.71641791044776, "grad_norm": 31.639286041259766, "learning_rate": 8.083333333333334e-06, "loss": 45.4023, "step": 2172 }, { "epoch": 51.74029850746269, "grad_norm": 27.998628616333008, "learning_rate": 8.07936507936508e-06, "loss": 44.8306, "step": 2173 }, { "epoch": 51.76417910447761, "grad_norm": 30.69230079650879, "learning_rate": 8.075396825396827e-06, "loss": 45.1802, "step": 2174 }, { "epoch": 51.788059701492536, "grad_norm": 23.640962600708008, "learning_rate": 8.071428571428572e-06, "loss": 43.7667, "step": 2175 }, { "epoch": 51.811940298507466, "grad_norm": 29.017114639282227, "learning_rate": 8.067460317460318e-06, "loss": 43.9821, "step": 2176 }, { "epoch": 51.83582089552239, "grad_norm": 21.79175567626953, "learning_rate": 8.063492063492065e-06, "loss": 45.0959, "step": 2177 }, { "epoch": 51.85970149253731, "grad_norm": 25.505756378173828, "learning_rate": 8.05952380952381e-06, "loss": 44.1622, "step": 2178 }, { "epoch": 51.88358208955224, "grad_norm": 19.43979263305664, "learning_rate": 8.055555555555557e-06, "loss": 43.4959, "step": 2179 }, { "epoch": 51.907462686567165, "grad_norm": 32.855037689208984, "learning_rate": 8.051587301587303e-06, "loss": 44.3206, "step": 2180 }, { "epoch": 51.93134328358209, "grad_norm": 23.80797576904297, "learning_rate": 8.047619047619048e-06, "loss": 43.6716, "step": 2181 }, { "epoch": 51.95522388059702, "grad_norm": 37.09321594238281, "learning_rate": 8.043650793650795e-06, "loss": 45.3091, "step": 2182 }, { "epoch": 51.97910447761194, "grad_norm": 25.76487922668457, "learning_rate": 8.03968253968254e-06, "loss": 44.5829, "step": 2183 }, { "epoch": 52.0, "grad_norm": 24.34773063659668, "learning_rate": 8.035714285714286e-06, "loss": 39.637, "step": 2184 }, { "epoch": 52.02388059701492, "grad_norm": 24.28459358215332, "learning_rate": 8.031746031746033e-06, "loss": 42.8823, "step": 2185 }, { "epoch": 52.04776119402985, "grad_norm": 31.015172958374023, "learning_rate": 8.027777777777778e-06, "loss": 43.6859, "step": 2186 }, { "epoch": 52.071641791044776, "grad_norm": 27.413232803344727, "learning_rate": 8.023809523809526e-06, "loss": 44.0734, "step": 2187 }, { "epoch": 52.0955223880597, "grad_norm": 34.3042106628418, "learning_rate": 8.019841269841271e-06, "loss": 44.4303, "step": 2188 }, { "epoch": 52.11940298507463, "grad_norm": 25.737226486206055, "learning_rate": 8.015873015873016e-06, "loss": 45.6858, "step": 2189 }, { "epoch": 52.14328358208955, "grad_norm": 33.09044647216797, "learning_rate": 8.011904761904763e-06, "loss": 44.0591, "step": 2190 }, { "epoch": 52.167164179104475, "grad_norm": 26.903594970703125, "learning_rate": 8.007936507936509e-06, "loss": 44.4434, "step": 2191 }, { "epoch": 52.191044776119405, "grad_norm": 32.05507278442383, "learning_rate": 8.003968253968254e-06, "loss": 44.1334, "step": 2192 }, { "epoch": 52.21492537313433, "grad_norm": 23.954050064086914, "learning_rate": 8.000000000000001e-06, "loss": 45.4077, "step": 2193 }, { "epoch": 52.23880597014925, "grad_norm": 25.273069381713867, "learning_rate": 7.996031746031747e-06, "loss": 44.4704, "step": 2194 }, { "epoch": 52.26268656716418, "grad_norm": 24.762975692749023, "learning_rate": 7.992063492063494e-06, "loss": 44.9846, "step": 2195 }, { "epoch": 52.286567164179104, "grad_norm": 31.624853134155273, "learning_rate": 7.98809523809524e-06, "loss": 44.6678, "step": 2196 }, { "epoch": 52.31044776119403, "grad_norm": 20.407798767089844, "learning_rate": 7.984126984126985e-06, "loss": 44.5191, "step": 2197 }, { "epoch": 52.33432835820896, "grad_norm": 35.610721588134766, "learning_rate": 7.980158730158732e-06, "loss": 43.797, "step": 2198 }, { "epoch": 52.35820895522388, "grad_norm": 23.916271209716797, "learning_rate": 7.976190476190477e-06, "loss": 44.5035, "step": 2199 }, { "epoch": 52.3820895522388, "grad_norm": 30.07246971130371, "learning_rate": 7.972222222222224e-06, "loss": 44.8658, "step": 2200 }, { "epoch": 52.40597014925373, "grad_norm": 26.69670295715332, "learning_rate": 7.968253968253968e-06, "loss": 43.1086, "step": 2201 }, { "epoch": 52.429850746268656, "grad_norm": 35.99201583862305, "learning_rate": 7.964285714285715e-06, "loss": 43.8965, "step": 2202 }, { "epoch": 52.45373134328358, "grad_norm": 26.909433364868164, "learning_rate": 7.96031746031746e-06, "loss": 44.3023, "step": 2203 }, { "epoch": 52.47761194029851, "grad_norm": 31.2402286529541, "learning_rate": 7.956349206349206e-06, "loss": 43.8009, "step": 2204 }, { "epoch": 52.50149253731343, "grad_norm": 28.230714797973633, "learning_rate": 7.952380952380953e-06, "loss": 45.6781, "step": 2205 }, { "epoch": 52.525373134328355, "grad_norm": 32.47516632080078, "learning_rate": 7.948412698412698e-06, "loss": 46.0123, "step": 2206 }, { "epoch": 52.549253731343285, "grad_norm": 29.042253494262695, "learning_rate": 7.944444444444445e-06, "loss": 46.4036, "step": 2207 }, { "epoch": 52.57313432835821, "grad_norm": 24.23044776916504, "learning_rate": 7.94047619047619e-06, "loss": 44.0722, "step": 2208 }, { "epoch": 52.59701492537313, "grad_norm": 25.844972610473633, "learning_rate": 7.936507936507936e-06, "loss": 44.1403, "step": 2209 }, { "epoch": 52.62089552238806, "grad_norm": 25.40447235107422, "learning_rate": 7.932539682539683e-06, "loss": 43.699, "step": 2210 }, { "epoch": 52.644776119402984, "grad_norm": 24.027687072753906, "learning_rate": 7.928571428571429e-06, "loss": 45.1803, "step": 2211 }, { "epoch": 52.668656716417914, "grad_norm": 22.707393646240234, "learning_rate": 7.924603174603174e-06, "loss": 43.7808, "step": 2212 }, { "epoch": 52.69253731343284, "grad_norm": 17.410104751586914, "learning_rate": 7.920634920634921e-06, "loss": 44.7556, "step": 2213 }, { "epoch": 52.71641791044776, "grad_norm": 19.376863479614258, "learning_rate": 7.916666666666667e-06, "loss": 45.3176, "step": 2214 }, { "epoch": 52.74029850746269, "grad_norm": 21.29641342163086, "learning_rate": 7.912698412698414e-06, "loss": 44.8597, "step": 2215 }, { "epoch": 52.76417910447761, "grad_norm": 21.937013626098633, "learning_rate": 7.908730158730159e-06, "loss": 44.3548, "step": 2216 }, { "epoch": 52.788059701492536, "grad_norm": 27.38592529296875, "learning_rate": 7.904761904761904e-06, "loss": 45.204, "step": 2217 }, { "epoch": 52.811940298507466, "grad_norm": 21.232566833496094, "learning_rate": 7.900793650793652e-06, "loss": 43.9788, "step": 2218 }, { "epoch": 52.83582089552239, "grad_norm": 22.52651023864746, "learning_rate": 7.896825396825397e-06, "loss": 44.161, "step": 2219 }, { "epoch": 52.85970149253731, "grad_norm": 23.06977081298828, "learning_rate": 7.892857142857144e-06, "loss": 44.5394, "step": 2220 }, { "epoch": 52.88358208955224, "grad_norm": 19.71670150756836, "learning_rate": 7.88888888888889e-06, "loss": 44.4384, "step": 2221 }, { "epoch": 52.907462686567165, "grad_norm": 19.651142120361328, "learning_rate": 7.884920634920635e-06, "loss": 45.3143, "step": 2222 }, { "epoch": 52.93134328358209, "grad_norm": 23.386962890625, "learning_rate": 7.880952380952382e-06, "loss": 44.4246, "step": 2223 }, { "epoch": 52.95522388059702, "grad_norm": 19.763513565063477, "learning_rate": 7.876984126984127e-06, "loss": 45.6001, "step": 2224 }, { "epoch": 52.97910447761194, "grad_norm": 21.81069564819336, "learning_rate": 7.873015873015873e-06, "loss": 45.6176, "step": 2225 }, { "epoch": 53.0, "grad_norm": 18.15079689025879, "learning_rate": 7.86904761904762e-06, "loss": 39.4819, "step": 2226 }, { "epoch": 53.02388059701492, "grad_norm": 17.333694458007812, "learning_rate": 7.865079365079365e-06, "loss": 45.612, "step": 2227 }, { "epoch": 53.04776119402985, "grad_norm": 18.72818946838379, "learning_rate": 7.861111111111112e-06, "loss": 43.5423, "step": 2228 }, { "epoch": 53.071641791044776, "grad_norm": 18.34732437133789, "learning_rate": 7.857142857142858e-06, "loss": 43.972, "step": 2229 }, { "epoch": 53.0955223880597, "grad_norm": 21.568077087402344, "learning_rate": 7.853174603174603e-06, "loss": 44.8122, "step": 2230 }, { "epoch": 53.11940298507463, "grad_norm": 20.801836013793945, "learning_rate": 7.84920634920635e-06, "loss": 43.8729, "step": 2231 }, { "epoch": 53.14328358208955, "grad_norm": 23.20212745666504, "learning_rate": 7.845238095238096e-06, "loss": 45.3738, "step": 2232 }, { "epoch": 53.167164179104475, "grad_norm": 24.016311645507812, "learning_rate": 7.841269841269843e-06, "loss": 45.0862, "step": 2233 }, { "epoch": 53.191044776119405, "grad_norm": 18.800554275512695, "learning_rate": 7.837301587301588e-06, "loss": 43.2166, "step": 2234 }, { "epoch": 53.21492537313433, "grad_norm": 20.73765754699707, "learning_rate": 7.833333333333333e-06, "loss": 43.3917, "step": 2235 }, { "epoch": 53.23880597014925, "grad_norm": 23.1943302154541, "learning_rate": 7.82936507936508e-06, "loss": 44.0957, "step": 2236 }, { "epoch": 53.26268656716418, "grad_norm": 18.450380325317383, "learning_rate": 7.825396825396826e-06, "loss": 44.1782, "step": 2237 }, { "epoch": 53.286567164179104, "grad_norm": 24.16314697265625, "learning_rate": 7.821428571428571e-06, "loss": 45.0735, "step": 2238 }, { "epoch": 53.31044776119403, "grad_norm": 25.004743576049805, "learning_rate": 7.817460317460318e-06, "loss": 44.4628, "step": 2239 }, { "epoch": 53.33432835820896, "grad_norm": 17.29636573791504, "learning_rate": 7.813492063492064e-06, "loss": 45.2476, "step": 2240 }, { "epoch": 53.35820895522388, "grad_norm": 24.759471893310547, "learning_rate": 7.809523809523811e-06, "loss": 44.71, "step": 2241 }, { "epoch": 53.3820895522388, "grad_norm": 21.52720832824707, "learning_rate": 7.805555555555556e-06, "loss": 44.9929, "step": 2242 }, { "epoch": 53.40597014925373, "grad_norm": 22.760278701782227, "learning_rate": 7.801587301587302e-06, "loss": 43.6639, "step": 2243 }, { "epoch": 53.429850746268656, "grad_norm": 19.5325927734375, "learning_rate": 7.797619047619049e-06, "loss": 44.2974, "step": 2244 }, { "epoch": 53.45373134328358, "grad_norm": 25.756797790527344, "learning_rate": 7.793650793650794e-06, "loss": 45.401, "step": 2245 }, { "epoch": 53.47761194029851, "grad_norm": 19.75324058532715, "learning_rate": 7.78968253968254e-06, "loss": 44.6426, "step": 2246 }, { "epoch": 53.50149253731343, "grad_norm": 25.47930145263672, "learning_rate": 7.785714285714287e-06, "loss": 42.2875, "step": 2247 }, { "epoch": 53.525373134328355, "grad_norm": 21.61121368408203, "learning_rate": 7.781746031746032e-06, "loss": 45.7982, "step": 2248 }, { "epoch": 53.549253731343285, "grad_norm": 24.11342430114746, "learning_rate": 7.77777777777778e-06, "loss": 43.6397, "step": 2249 }, { "epoch": 53.57313432835821, "grad_norm": 25.151281356811523, "learning_rate": 7.773809523809525e-06, "loss": 44.0536, "step": 2250 }, { "epoch": 53.59701492537313, "grad_norm": 21.925559997558594, "learning_rate": 7.76984126984127e-06, "loss": 45.2035, "step": 2251 }, { "epoch": 53.62089552238806, "grad_norm": 22.38170623779297, "learning_rate": 7.765873015873017e-06, "loss": 44.3272, "step": 2252 }, { "epoch": 53.644776119402984, "grad_norm": 24.35360336303711, "learning_rate": 7.761904761904762e-06, "loss": 45.687, "step": 2253 }, { "epoch": 53.668656716417914, "grad_norm": 20.127119064331055, "learning_rate": 7.75793650793651e-06, "loss": 44.001, "step": 2254 }, { "epoch": 53.69253731343284, "grad_norm": 20.66204833984375, "learning_rate": 7.753968253968255e-06, "loss": 45.1368, "step": 2255 }, { "epoch": 53.71641791044776, "grad_norm": 22.565038681030273, "learning_rate": 7.75e-06, "loss": 43.7021, "step": 2256 }, { "epoch": 53.74029850746269, "grad_norm": 20.893674850463867, "learning_rate": 7.746031746031747e-06, "loss": 44.381, "step": 2257 }, { "epoch": 53.76417910447761, "grad_norm": 21.53620147705078, "learning_rate": 7.742063492063493e-06, "loss": 45.2511, "step": 2258 }, { "epoch": 53.788059701492536, "grad_norm": 20.66484832763672, "learning_rate": 7.738095238095238e-06, "loss": 45.167, "step": 2259 }, { "epoch": 53.811940298507466, "grad_norm": 24.964414596557617, "learning_rate": 7.734126984126985e-06, "loss": 44.6754, "step": 2260 }, { "epoch": 53.83582089552239, "grad_norm": 22.17997169494629, "learning_rate": 7.73015873015873e-06, "loss": 44.1696, "step": 2261 }, { "epoch": 53.85970149253731, "grad_norm": 19.715208053588867, "learning_rate": 7.726190476190478e-06, "loss": 43.8961, "step": 2262 }, { "epoch": 53.88358208955224, "grad_norm": 27.562166213989258, "learning_rate": 7.722222222222223e-06, "loss": 43.9035, "step": 2263 }, { "epoch": 53.907462686567165, "grad_norm": 21.021081924438477, "learning_rate": 7.718253968253969e-06, "loss": 45.5108, "step": 2264 }, { "epoch": 53.93134328358209, "grad_norm": 25.675813674926758, "learning_rate": 7.714285714285716e-06, "loss": 44.5437, "step": 2265 }, { "epoch": 53.95522388059702, "grad_norm": 25.80996322631836, "learning_rate": 7.710317460317461e-06, "loss": 44.7417, "step": 2266 }, { "epoch": 53.97910447761194, "grad_norm": 23.055217742919922, "learning_rate": 7.706349206349208e-06, "loss": 44.3122, "step": 2267 }, { "epoch": 54.0, "grad_norm": 14.958904266357422, "learning_rate": 7.702380952380954e-06, "loss": 40.1174, "step": 2268 }, { "epoch": 54.02388059701492, "grad_norm": 23.825021743774414, "learning_rate": 7.698412698412699e-06, "loss": 43.6857, "step": 2269 }, { "epoch": 54.04776119402985, "grad_norm": 20.29824447631836, "learning_rate": 7.694444444444446e-06, "loss": 43.9979, "step": 2270 }, { "epoch": 54.071641791044776, "grad_norm": 19.905017852783203, "learning_rate": 7.690476190476191e-06, "loss": 44.1637, "step": 2271 }, { "epoch": 54.0955223880597, "grad_norm": 16.342378616333008, "learning_rate": 7.686507936507937e-06, "loss": 43.9998, "step": 2272 }, { "epoch": 54.11940298507463, "grad_norm": 22.551780700683594, "learning_rate": 7.682539682539684e-06, "loss": 44.6528, "step": 2273 }, { "epoch": 54.14328358208955, "grad_norm": 16.87897491455078, "learning_rate": 7.67857142857143e-06, "loss": 44.4259, "step": 2274 }, { "epoch": 54.167164179104475, "grad_norm": 27.494592666625977, "learning_rate": 7.674603174603176e-06, "loss": 45.7648, "step": 2275 }, { "epoch": 54.191044776119405, "grad_norm": 22.326452255249023, "learning_rate": 7.670634920634922e-06, "loss": 44.1077, "step": 2276 }, { "epoch": 54.21492537313433, "grad_norm": 30.2500057220459, "learning_rate": 7.666666666666667e-06, "loss": 44.5322, "step": 2277 }, { "epoch": 54.23880597014925, "grad_norm": 28.212095260620117, "learning_rate": 7.662698412698414e-06, "loss": 43.6225, "step": 2278 }, { "epoch": 54.26268656716418, "grad_norm": 23.524145126342773, "learning_rate": 7.65873015873016e-06, "loss": 44.9014, "step": 2279 }, { "epoch": 54.286567164179104, "grad_norm": 29.799076080322266, "learning_rate": 7.654761904761905e-06, "loss": 44.6654, "step": 2280 }, { "epoch": 54.31044776119403, "grad_norm": 20.350683212280273, "learning_rate": 7.65079365079365e-06, "loss": 44.254, "step": 2281 }, { "epoch": 54.33432835820896, "grad_norm": 25.748899459838867, "learning_rate": 7.646825396825398e-06, "loss": 44.4278, "step": 2282 }, { "epoch": 54.35820895522388, "grad_norm": 25.086284637451172, "learning_rate": 7.642857142857143e-06, "loss": 43.8974, "step": 2283 }, { "epoch": 54.3820895522388, "grad_norm": 24.49972915649414, "learning_rate": 7.638888888888888e-06, "loss": 44.4423, "step": 2284 }, { "epoch": 54.40597014925373, "grad_norm": 18.78260612487793, "learning_rate": 7.634920634920635e-06, "loss": 44.5666, "step": 2285 }, { "epoch": 54.429850746268656, "grad_norm": 20.125263214111328, "learning_rate": 7.630952380952381e-06, "loss": 44.4853, "step": 2286 }, { "epoch": 54.45373134328358, "grad_norm": 20.763385772705078, "learning_rate": 7.626984126984127e-06, "loss": 43.6951, "step": 2287 }, { "epoch": 54.47761194029851, "grad_norm": 22.267620086669922, "learning_rate": 7.623015873015873e-06, "loss": 45.0328, "step": 2288 }, { "epoch": 54.50149253731343, "grad_norm": 24.786283493041992, "learning_rate": 7.61904761904762e-06, "loss": 45.0047, "step": 2289 }, { "epoch": 54.525373134328355, "grad_norm": 18.581987380981445, "learning_rate": 7.615079365079365e-06, "loss": 45.0172, "step": 2290 }, { "epoch": 54.549253731343285, "grad_norm": 27.262859344482422, "learning_rate": 7.611111111111111e-06, "loss": 44.1982, "step": 2291 }, { "epoch": 54.57313432835821, "grad_norm": 25.255537033081055, "learning_rate": 7.6071428571428575e-06, "loss": 43.8743, "step": 2292 }, { "epoch": 54.59701492537313, "grad_norm": 17.982698440551758, "learning_rate": 7.603174603174604e-06, "loss": 43.4799, "step": 2293 }, { "epoch": 54.62089552238806, "grad_norm": 25.1834716796875, "learning_rate": 7.599206349206349e-06, "loss": 44.8257, "step": 2294 }, { "epoch": 54.644776119402984, "grad_norm": 19.047700881958008, "learning_rate": 7.595238095238095e-06, "loss": 45.2678, "step": 2295 }, { "epoch": 54.668656716417914, "grad_norm": 26.404882431030273, "learning_rate": 7.591269841269842e-06, "loss": 43.3948, "step": 2296 }, { "epoch": 54.69253731343284, "grad_norm": 19.84337615966797, "learning_rate": 7.587301587301588e-06, "loss": 45.7629, "step": 2297 }, { "epoch": 54.71641791044776, "grad_norm": 19.9034481048584, "learning_rate": 7.583333333333333e-06, "loss": 44.4071, "step": 2298 }, { "epoch": 54.74029850746269, "grad_norm": NaN, "learning_rate": 7.5793650793650795e-06, "loss": 62.5737, "step": 2299 }, { "epoch": 54.76417910447761, "grad_norm": 21.210494995117188, "learning_rate": 7.5793650793650795e-06, "loss": 44.7093, "step": 2300 }, { "epoch": 54.788059701492536, "grad_norm": 21.403833389282227, "learning_rate": 7.575396825396826e-06, "loss": 44.6465, "step": 2301 }, { "epoch": 54.811940298507466, "grad_norm": 21.515085220336914, "learning_rate": 7.571428571428572e-06, "loss": 44.3846, "step": 2302 }, { "epoch": 54.83582089552239, "grad_norm": 25.024738311767578, "learning_rate": 7.567460317460317e-06, "loss": 44.7177, "step": 2303 }, { "epoch": 54.85970149253731, "grad_norm": 18.742982864379883, "learning_rate": 7.563492063492064e-06, "loss": 44.0076, "step": 2304 }, { "epoch": 54.88358208955224, "grad_norm": 20.333248138427734, "learning_rate": 7.55952380952381e-06, "loss": 43.6154, "step": 2305 }, { "epoch": 54.907462686567165, "grad_norm": 21.4791316986084, "learning_rate": 7.555555555555556e-06, "loss": 44.2151, "step": 2306 }, { "epoch": 54.93134328358209, "grad_norm": 26.272165298461914, "learning_rate": 7.551587301587302e-06, "loss": 45.5681, "step": 2307 }, { "epoch": 54.95522388059702, "grad_norm": 24.541637420654297, "learning_rate": 7.547619047619048e-06, "loss": 42.8248, "step": 2308 }, { "epoch": 54.97910447761194, "grad_norm": 23.563505172729492, "learning_rate": 7.543650793650794e-06, "loss": 45.1566, "step": 2309 }, { "epoch": 55.0, "grad_norm": 18.819583892822266, "learning_rate": 7.53968253968254e-06, "loss": 39.9674, "step": 2310 }, { "epoch": 55.02388059701492, "grad_norm": 21.50135040283203, "learning_rate": 7.5357142857142865e-06, "loss": 44.7008, "step": 2311 }, { "epoch": 55.04776119402985, "grad_norm": 20.28955078125, "learning_rate": 7.531746031746032e-06, "loss": 45.7705, "step": 2312 }, { "epoch": 55.071641791044776, "grad_norm": 19.94631004333496, "learning_rate": 7.527777777777778e-06, "loss": 44.3223, "step": 2313 }, { "epoch": 55.0955223880597, "grad_norm": 22.5699520111084, "learning_rate": 7.523809523809524e-06, "loss": 45.2447, "step": 2314 }, { "epoch": 55.11940298507463, "grad_norm": 17.17287254333496, "learning_rate": 7.519841269841271e-06, "loss": 44.4756, "step": 2315 }, { "epoch": 55.14328358208955, "grad_norm": 17.727617263793945, "learning_rate": 7.515873015873016e-06, "loss": 44.3907, "step": 2316 }, { "epoch": 55.167164179104475, "grad_norm": 18.320112228393555, "learning_rate": 7.511904761904762e-06, "loss": 44.3684, "step": 2317 }, { "epoch": 55.191044776119405, "grad_norm": 14.576784133911133, "learning_rate": 7.5079365079365085e-06, "loss": 43.8936, "step": 2318 }, { "epoch": 55.21492537313433, "grad_norm": 18.880218505859375, "learning_rate": 7.503968253968255e-06, "loss": 44.5556, "step": 2319 }, { "epoch": 55.23880597014925, "grad_norm": 19.764198303222656, "learning_rate": 7.500000000000001e-06, "loss": 43.8549, "step": 2320 }, { "epoch": 55.26268656716418, "grad_norm": NaN, "learning_rate": 7.4960317460317464e-06, "loss": 44.1908, "step": 2321 }, { "epoch": 55.286567164179104, "grad_norm": 19.977001190185547, "learning_rate": 7.4960317460317464e-06, "loss": 44.3724, "step": 2322 }, { "epoch": 55.31044776119403, "grad_norm": 21.85407066345215, "learning_rate": 7.492063492063493e-06, "loss": 44.2829, "step": 2323 }, { "epoch": 55.33432835820896, "grad_norm": 22.168100357055664, "learning_rate": 7.488095238095239e-06, "loss": 44.4898, "step": 2324 }, { "epoch": 55.35820895522388, "grad_norm": 18.234298706054688, "learning_rate": 7.484126984126985e-06, "loss": 44.3711, "step": 2325 }, { "epoch": 55.3820895522388, "grad_norm": 30.803691864013672, "learning_rate": 7.4801587301587306e-06, "loss": 43.825, "step": 2326 }, { "epoch": 55.40597014925373, "grad_norm": 22.18082046508789, "learning_rate": 7.476190476190477e-06, "loss": 44.8057, "step": 2327 }, { "epoch": 55.429850746268656, "grad_norm": 32.34336853027344, "learning_rate": 7.472222222222223e-06, "loss": 45.0128, "step": 2328 }, { "epoch": 55.45373134328358, "grad_norm": 28.040363311767578, "learning_rate": 7.468253968253969e-06, "loss": 45.2428, "step": 2329 }, { "epoch": 55.47761194029851, "grad_norm": 31.505037307739258, "learning_rate": 7.464285714285715e-06, "loss": 43.6846, "step": 2330 }, { "epoch": 55.50149253731343, "grad_norm": 21.62251853942871, "learning_rate": 7.460317460317461e-06, "loss": 44.467, "step": 2331 }, { "epoch": 55.525373134328355, "grad_norm": 29.035741806030273, "learning_rate": 7.456349206349207e-06, "loss": 43.7965, "step": 2332 }, { "epoch": 55.549253731343285, "grad_norm": 17.466440200805664, "learning_rate": 7.4523809523809534e-06, "loss": 44.8456, "step": 2333 }, { "epoch": 55.57313432835821, "grad_norm": 37.01939392089844, "learning_rate": 7.448412698412699e-06, "loss": 44.4359, "step": 2334 }, { "epoch": 55.59701492537313, "grad_norm": 24.612354278564453, "learning_rate": 7.444444444444445e-06, "loss": 43.7534, "step": 2335 }, { "epoch": 55.62089552238806, "grad_norm": 39.57061004638672, "learning_rate": 7.440476190476191e-06, "loss": 45.5826, "step": 2336 }, { "epoch": 55.644776119402984, "grad_norm": 33.687538146972656, "learning_rate": 7.4365079365079376e-06, "loss": 44.3013, "step": 2337 }, { "epoch": 55.668656716417914, "grad_norm": 38.39813995361328, "learning_rate": 7.432539682539684e-06, "loss": 44.7874, "step": 2338 }, { "epoch": 55.69253731343284, "grad_norm": 38.15765380859375, "learning_rate": 7.428571428571429e-06, "loss": 44.6177, "step": 2339 }, { "epoch": 55.71641791044776, "grad_norm": 27.5556640625, "learning_rate": 7.4246031746031754e-06, "loss": 43.1288, "step": 2340 }, { "epoch": 55.74029850746269, "grad_norm": 30.958349227905273, "learning_rate": 7.420634920634922e-06, "loss": 45.4904, "step": 2341 }, { "epoch": 55.76417910447761, "grad_norm": 21.71723747253418, "learning_rate": 7.416666666666668e-06, "loss": 43.0391, "step": 2342 }, { "epoch": 55.788059701492536, "grad_norm": 24.229736328125, "learning_rate": 7.412698412698413e-06, "loss": 44.0514, "step": 2343 }, { "epoch": 55.811940298507466, "grad_norm": 32.11771011352539, "learning_rate": 7.40873015873016e-06, "loss": 44.5521, "step": 2344 }, { "epoch": 55.83582089552239, "grad_norm": 21.694486618041992, "learning_rate": 7.404761904761906e-06, "loss": 43.9067, "step": 2345 }, { "epoch": 55.85970149253731, "grad_norm": 31.469402313232422, "learning_rate": 7.400793650793652e-06, "loss": 43.3506, "step": 2346 }, { "epoch": 55.88358208955224, "grad_norm": 28.431011199951172, "learning_rate": 7.3968253968253975e-06, "loss": 43.7056, "step": 2347 }, { "epoch": 55.907462686567165, "grad_norm": 34.616065979003906, "learning_rate": 7.392857142857144e-06, "loss": 44.0428, "step": 2348 }, { "epoch": 55.93134328358209, "grad_norm": 27.52882194519043, "learning_rate": 7.38888888888889e-06, "loss": 43.8619, "step": 2349 }, { "epoch": 55.95522388059702, "grad_norm": 36.93627166748047, "learning_rate": 7.384920634920636e-06, "loss": 44.1589, "step": 2350 }, { "epoch": 55.97910447761194, "grad_norm": 37.746578216552734, "learning_rate": 7.380952380952382e-06, "loss": 46.0174, "step": 2351 }, { "epoch": 56.0, "grad_norm": 20.20071029663086, "learning_rate": 7.376984126984128e-06, "loss": 37.7123, "step": 2352 }, { "epoch": 56.02388059701492, "grad_norm": 25.446529388427734, "learning_rate": 7.373015873015874e-06, "loss": 43.4657, "step": 2353 }, { "epoch": 56.04776119402985, "grad_norm": 25.78912353515625, "learning_rate": 7.36904761904762e-06, "loss": 44.7379, "step": 2354 }, { "epoch": 56.071641791044776, "grad_norm": 24.028154373168945, "learning_rate": 7.3650793650793666e-06, "loss": 43.4876, "step": 2355 }, { "epoch": 56.0955223880597, "grad_norm": 30.157793045043945, "learning_rate": 7.361111111111112e-06, "loss": 44.2346, "step": 2356 }, { "epoch": 56.11940298507463, "grad_norm": 29.836891174316406, "learning_rate": 7.357142857142858e-06, "loss": 44.0793, "step": 2357 }, { "epoch": 56.14328358208955, "grad_norm": 30.458818435668945, "learning_rate": 7.3531746031746045e-06, "loss": 44.7587, "step": 2358 }, { "epoch": 56.167164179104475, "grad_norm": 28.30854034423828, "learning_rate": 7.349206349206351e-06, "loss": 44.9153, "step": 2359 }, { "epoch": 56.191044776119405, "grad_norm": 29.497085571289062, "learning_rate": 7.345238095238096e-06, "loss": 44.194, "step": 2360 }, { "epoch": 56.21492537313433, "grad_norm": 21.790063858032227, "learning_rate": 7.3412698412698415e-06, "loss": 44.1507, "step": 2361 }, { "epoch": 56.23880597014925, "grad_norm": 27.489242553710938, "learning_rate": 7.337301587301588e-06, "loss": 44.3505, "step": 2362 }, { "epoch": 56.26268656716418, "grad_norm": 22.906452178955078, "learning_rate": 7.333333333333333e-06, "loss": 44.9329, "step": 2363 }, { "epoch": 56.286567164179104, "grad_norm": 33.46257400512695, "learning_rate": 7.329365079365079e-06, "loss": 43.1323, "step": 2364 }, { "epoch": 56.31044776119403, "grad_norm": 22.410470962524414, "learning_rate": 7.325396825396826e-06, "loss": 43.6305, "step": 2365 }, { "epoch": 56.33432835820896, "grad_norm": 32.74277114868164, "learning_rate": 7.321428571428572e-06, "loss": 43.6165, "step": 2366 }, { "epoch": 56.35820895522388, "grad_norm": 31.368419647216797, "learning_rate": 7.317460317460317e-06, "loss": 44.8837, "step": 2367 }, { "epoch": 56.3820895522388, "grad_norm": 30.941593170166016, "learning_rate": 7.3134920634920635e-06, "loss": 44.2282, "step": 2368 }, { "epoch": 56.40597014925373, "grad_norm": 34.12104415893555, "learning_rate": 7.30952380952381e-06, "loss": 45.2191, "step": 2369 }, { "epoch": 56.429850746268656, "grad_norm": 28.09603500366211, "learning_rate": 7.305555555555556e-06, "loss": 44.9848, "step": 2370 }, { "epoch": 56.45373134328358, "grad_norm": 29.32378578186035, "learning_rate": 7.301587301587301e-06, "loss": 44.2658, "step": 2371 }, { "epoch": 56.47761194029851, "grad_norm": 26.269071578979492, "learning_rate": 7.297619047619048e-06, "loss": 46.2933, "step": 2372 }, { "epoch": 56.50149253731343, "grad_norm": 25.23761558532715, "learning_rate": 7.293650793650794e-06, "loss": 44.4726, "step": 2373 }, { "epoch": 56.525373134328355, "grad_norm": 27.6646728515625, "learning_rate": 7.28968253968254e-06, "loss": 43.9823, "step": 2374 }, { "epoch": 56.549253731343285, "grad_norm": 23.01959228515625, "learning_rate": 7.285714285714286e-06, "loss": 44.5168, "step": 2375 }, { "epoch": 56.57313432835821, "grad_norm": 31.120128631591797, "learning_rate": 7.281746031746032e-06, "loss": 44.6706, "step": 2376 }, { "epoch": 56.59701492537313, "grad_norm": 24.57699203491211, "learning_rate": 7.277777777777778e-06, "loss": 45.1781, "step": 2377 }, { "epoch": 56.62089552238806, "grad_norm": 28.831064224243164, "learning_rate": 7.273809523809524e-06, "loss": 43.7437, "step": 2378 }, { "epoch": 56.644776119402984, "grad_norm": 26.15156364440918, "learning_rate": 7.2698412698412705e-06, "loss": 45.1624, "step": 2379 }, { "epoch": 56.668656716417914, "grad_norm": 28.825542449951172, "learning_rate": 7.265873015873016e-06, "loss": 44.66, "step": 2380 }, { "epoch": 56.69253731343284, "grad_norm": 26.763559341430664, "learning_rate": 7.261904761904762e-06, "loss": 43.6022, "step": 2381 }, { "epoch": 56.71641791044776, "grad_norm": 26.9444522857666, "learning_rate": 7.257936507936508e-06, "loss": 44.025, "step": 2382 }, { "epoch": 56.74029850746269, "grad_norm": 24.93570327758789, "learning_rate": 7.253968253968255e-06, "loss": 45.3596, "step": 2383 }, { "epoch": 56.76417910447761, "grad_norm": 29.15943717956543, "learning_rate": 7.25e-06, "loss": 44.2434, "step": 2384 }, { "epoch": 56.788059701492536, "grad_norm": 25.154356002807617, "learning_rate": 7.246031746031746e-06, "loss": 44.1319, "step": 2385 }, { "epoch": 56.811940298507466, "grad_norm": 24.86849021911621, "learning_rate": 7.2420634920634925e-06, "loss": 44.5886, "step": 2386 }, { "epoch": 56.83582089552239, "grad_norm": 22.996164321899414, "learning_rate": 7.238095238095239e-06, "loss": 43.9457, "step": 2387 }, { "epoch": 56.85970149253731, "grad_norm": 21.19574737548828, "learning_rate": 7.234126984126984e-06, "loss": 42.4004, "step": 2388 }, { "epoch": 56.88358208955224, "grad_norm": 22.6845645904541, "learning_rate": 7.23015873015873e-06, "loss": 43.7031, "step": 2389 }, { "epoch": 56.907462686567165, "grad_norm": 24.80902099609375, "learning_rate": 7.226190476190477e-06, "loss": 45.4586, "step": 2390 }, { "epoch": 56.93134328358209, "grad_norm": 23.157859802246094, "learning_rate": 7.222222222222223e-06, "loss": 43.4031, "step": 2391 }, { "epoch": 56.95522388059702, "grad_norm": 16.62670135498047, "learning_rate": 7.218253968253969e-06, "loss": 43.9552, "step": 2392 }, { "epoch": 56.97910447761194, "grad_norm": 31.313417434692383, "learning_rate": 7.2142857142857145e-06, "loss": 44.5037, "step": 2393 }, { "epoch": 57.0, "grad_norm": 17.58053970336914, "learning_rate": 7.210317460317461e-06, "loss": 38.2111, "step": 2394 }, { "epoch": 57.02388059701492, "grad_norm": 28.399444580078125, "learning_rate": 7.206349206349207e-06, "loss": 43.0462, "step": 2395 }, { "epoch": 57.04776119402985, "grad_norm": 28.965984344482422, "learning_rate": 7.202380952380953e-06, "loss": 45.5986, "step": 2396 }, { "epoch": 57.071641791044776, "grad_norm": 20.43558120727539, "learning_rate": 7.198412698412699e-06, "loss": 43.5525, "step": 2397 }, { "epoch": 57.0955223880597, "grad_norm": 29.135162353515625, "learning_rate": 7.194444444444445e-06, "loss": 44.4881, "step": 2398 }, { "epoch": 57.11940298507463, "grad_norm": 23.580699920654297, "learning_rate": 7.190476190476191e-06, "loss": 43.7271, "step": 2399 }, { "epoch": 57.14328358208955, "grad_norm": 22.67143440246582, "learning_rate": 7.186507936507937e-06, "loss": 42.7561, "step": 2400 }, { "epoch": 57.167164179104475, "grad_norm": 21.757719039916992, "learning_rate": 7.182539682539683e-06, "loss": 44.4426, "step": 2401 }, { "epoch": 57.191044776119405, "grad_norm": 22.878713607788086, "learning_rate": 7.178571428571429e-06, "loss": 43.9361, "step": 2402 }, { "epoch": 57.21492537313433, "grad_norm": 21.213376998901367, "learning_rate": 7.174603174603175e-06, "loss": 44.4992, "step": 2403 }, { "epoch": 57.23880597014925, "grad_norm": 26.92378807067871, "learning_rate": 7.1706349206349215e-06, "loss": 43.3883, "step": 2404 }, { "epoch": 57.26268656716418, "grad_norm": 21.269611358642578, "learning_rate": 7.166666666666667e-06, "loss": 45.1554, "step": 2405 }, { "epoch": 57.286567164179104, "grad_norm": 24.775962829589844, "learning_rate": 7.162698412698413e-06, "loss": 44.6757, "step": 2406 }, { "epoch": 57.31044776119403, "grad_norm": 23.0452880859375, "learning_rate": 7.1587301587301594e-06, "loss": 44.0688, "step": 2407 }, { "epoch": 57.33432835820896, "grad_norm": 25.50167465209961, "learning_rate": 7.154761904761906e-06, "loss": 44.7899, "step": 2408 }, { "epoch": 57.35820895522388, "grad_norm": 21.560483932495117, "learning_rate": 7.150793650793652e-06, "loss": 44.3325, "step": 2409 }, { "epoch": 57.3820895522388, "grad_norm": 22.5815486907959, "learning_rate": 7.146825396825397e-06, "loss": 44.1531, "step": 2410 }, { "epoch": 57.40597014925373, "grad_norm": 22.582660675048828, "learning_rate": 7.1428571428571436e-06, "loss": 45.738, "step": 2411 }, { "epoch": 57.429850746268656, "grad_norm": 21.388498306274414, "learning_rate": 7.13888888888889e-06, "loss": 44.2737, "step": 2412 }, { "epoch": 57.45373134328358, "grad_norm": 25.76162338256836, "learning_rate": 7.134920634920636e-06, "loss": 45.273, "step": 2413 }, { "epoch": 57.47761194029851, "grad_norm": 22.20672035217285, "learning_rate": 7.1309523809523814e-06, "loss": 44.8847, "step": 2414 }, { "epoch": 57.50149253731343, "grad_norm": 22.07602310180664, "learning_rate": 7.126984126984128e-06, "loss": 45.0162, "step": 2415 }, { "epoch": 57.525373134328355, "grad_norm": 23.339630126953125, "learning_rate": 7.123015873015874e-06, "loss": 45.1952, "step": 2416 }, { "epoch": 57.549253731343285, "grad_norm": 21.66541290283203, "learning_rate": 7.11904761904762e-06, "loss": 43.586, "step": 2417 }, { "epoch": 57.57313432835821, "grad_norm": 19.015748977661133, "learning_rate": 7.115079365079366e-06, "loss": 44.391, "step": 2418 }, { "epoch": 57.59701492537313, "grad_norm": 20.501789093017578, "learning_rate": 7.111111111111112e-06, "loss": 43.8462, "step": 2419 }, { "epoch": 57.62089552238806, "grad_norm": 20.447154998779297, "learning_rate": 7.107142857142858e-06, "loss": 44.0195, "step": 2420 }, { "epoch": 57.644776119402984, "grad_norm": 17.749000549316406, "learning_rate": 7.103174603174604e-06, "loss": 43.0026, "step": 2421 }, { "epoch": 57.668656716417914, "grad_norm": 22.757408142089844, "learning_rate": 7.09920634920635e-06, "loss": 44.1692, "step": 2422 }, { "epoch": 57.69253731343284, "grad_norm": 17.98983383178711, "learning_rate": 7.095238095238096e-06, "loss": 43.4101, "step": 2423 }, { "epoch": 57.71641791044776, "grad_norm": 24.20079803466797, "learning_rate": 7.091269841269842e-06, "loss": 44.8966, "step": 2424 }, { "epoch": 57.74029850746269, "grad_norm": 22.89764404296875, "learning_rate": 7.0873015873015884e-06, "loss": 43.2216, "step": 2425 }, { "epoch": 57.76417910447761, "grad_norm": 22.002994537353516, "learning_rate": 7.083333333333335e-06, "loss": 44.6504, "step": 2426 }, { "epoch": 57.788059701492536, "grad_norm": 21.76395606994629, "learning_rate": 7.07936507936508e-06, "loss": 44.2117, "step": 2427 }, { "epoch": 57.811940298507466, "grad_norm": 21.774457931518555, "learning_rate": 7.075396825396826e-06, "loss": 42.8879, "step": 2428 }, { "epoch": 57.83582089552239, "grad_norm": 20.503652572631836, "learning_rate": 7.0714285714285726e-06, "loss": 45.954, "step": 2429 }, { "epoch": 57.85970149253731, "grad_norm": 23.904399871826172, "learning_rate": 7.067460317460319e-06, "loss": 43.356, "step": 2430 }, { "epoch": 57.88358208955224, "grad_norm": 19.87941551208496, "learning_rate": 7.063492063492064e-06, "loss": 43.2895, "step": 2431 }, { "epoch": 57.907462686567165, "grad_norm": 21.451114654541016, "learning_rate": 7.0595238095238105e-06, "loss": 45.5597, "step": 2432 }, { "epoch": 57.93134328358209, "grad_norm": 19.936561584472656, "learning_rate": 7.055555555555557e-06, "loss": 44.9952, "step": 2433 }, { "epoch": 57.95522388059702, "grad_norm": 28.258575439453125, "learning_rate": 7.051587301587303e-06, "loss": 44.6988, "step": 2434 }, { "epoch": 57.97910447761194, "grad_norm": 17.210622787475586, "learning_rate": 7.047619047619048e-06, "loss": 43.6215, "step": 2435 }, { "epoch": 58.0, "grad_norm": 24.241130828857422, "learning_rate": 7.043650793650795e-06, "loss": 38.4149, "step": 2436 }, { "epoch": 58.02388059701492, "grad_norm": 16.801782608032227, "learning_rate": 7.039682539682541e-06, "loss": 43.5018, "step": 2437 }, { "epoch": 58.04776119402985, "grad_norm": 29.925647735595703, "learning_rate": 7.035714285714287e-06, "loss": 43.2125, "step": 2438 }, { "epoch": 58.071641791044776, "grad_norm": 18.95758056640625, "learning_rate": 7.0317460317460325e-06, "loss": 44.4259, "step": 2439 }, { "epoch": 58.0955223880597, "grad_norm": 23.302980422973633, "learning_rate": 7.027777777777778e-06, "loss": 44.0357, "step": 2440 }, { "epoch": 58.11940298507463, "grad_norm": 20.59381866455078, "learning_rate": 7.023809523809524e-06, "loss": 43.307, "step": 2441 }, { "epoch": 58.14328358208955, "grad_norm": 22.13384246826172, "learning_rate": 7.0198412698412695e-06, "loss": 44.4651, "step": 2442 }, { "epoch": 58.167164179104475, "grad_norm": 19.862943649291992, "learning_rate": 7.015873015873016e-06, "loss": 44.2492, "step": 2443 }, { "epoch": 58.191044776119405, "grad_norm": 17.979585647583008, "learning_rate": 7.011904761904762e-06, "loss": 44.8333, "step": 2444 }, { "epoch": 58.21492537313433, "grad_norm": 24.35641860961914, "learning_rate": 7.007936507936508e-06, "loss": 43.9583, "step": 2445 }, { "epoch": 58.23880597014925, "grad_norm": 20.068201065063477, "learning_rate": 7.0039682539682545e-06, "loss": 44.2379, "step": 2446 }, { "epoch": 58.26268656716418, "grad_norm": 24.976778030395508, "learning_rate": 7e-06, "loss": 45.3054, "step": 2447 }, { "epoch": 58.286567164179104, "grad_norm": 19.772478103637695, "learning_rate": 6.996031746031746e-06, "loss": 44.7523, "step": 2448 }, { "epoch": 58.31044776119403, "grad_norm": 24.084999084472656, "learning_rate": 6.992063492063492e-06, "loss": 43.7454, "step": 2449 }, { "epoch": 58.33432835820896, "grad_norm": 23.313718795776367, "learning_rate": 6.988095238095239e-06, "loss": 44.8838, "step": 2450 }, { "epoch": 58.35820895522388, "grad_norm": 22.206872940063477, "learning_rate": 6.984126984126984e-06, "loss": 45.3141, "step": 2451 }, { "epoch": 58.3820895522388, "grad_norm": 17.005720138549805, "learning_rate": 6.98015873015873e-06, "loss": 43.8783, "step": 2452 }, { "epoch": 58.40597014925373, "grad_norm": NaN, "learning_rate": 6.9761904761904765e-06, "loss": 43.2019, "step": 2453 }, { "epoch": 58.429850746268656, "grad_norm": 20.627288818359375, "learning_rate": 6.9761904761904765e-06, "loss": 44.6086, "step": 2454 }, { "epoch": 58.45373134328358, "grad_norm": 19.47835350036621, "learning_rate": 6.972222222222223e-06, "loss": 44.6011, "step": 2455 }, { "epoch": 58.47761194029851, "grad_norm": 23.258609771728516, "learning_rate": 6.968253968253968e-06, "loss": 44.0184, "step": 2456 }, { "epoch": 58.50149253731343, "grad_norm": 22.79289436340332, "learning_rate": 6.964285714285714e-06, "loss": 44.1053, "step": 2457 }, { "epoch": 58.525373134328355, "grad_norm": 20.517322540283203, "learning_rate": 6.960317460317461e-06, "loss": 43.8647, "step": 2458 }, { "epoch": 58.549253731343285, "grad_norm": 15.967523574829102, "learning_rate": 6.956349206349207e-06, "loss": 44.9457, "step": 2459 }, { "epoch": 58.57313432835821, "grad_norm": 26.704971313476562, "learning_rate": 6.952380952380952e-06, "loss": 42.5455, "step": 2460 }, { "epoch": 58.59701492537313, "grad_norm": 17.790040969848633, "learning_rate": 6.9484126984126985e-06, "loss": 45.0774, "step": 2461 }, { "epoch": 58.62089552238806, "grad_norm": 24.100841522216797, "learning_rate": 6.944444444444445e-06, "loss": 44.7616, "step": 2462 }, { "epoch": 58.644776119402984, "grad_norm": 17.18956184387207, "learning_rate": 6.940476190476191e-06, "loss": 44.6506, "step": 2463 }, { "epoch": 58.668656716417914, "grad_norm": 25.185026168823242, "learning_rate": 6.936507936507937e-06, "loss": 44.9502, "step": 2464 }, { "epoch": 58.69253731343284, "grad_norm": 22.515111923217773, "learning_rate": 6.932539682539683e-06, "loss": 43.802, "step": 2465 }, { "epoch": 58.71641791044776, "grad_norm": 26.777843475341797, "learning_rate": 6.928571428571429e-06, "loss": 44.0202, "step": 2466 }, { "epoch": 58.74029850746269, "grad_norm": 22.73821449279785, "learning_rate": 6.924603174603175e-06, "loss": 44.1536, "step": 2467 }, { "epoch": 58.76417910447761, "grad_norm": 20.890169143676758, "learning_rate": 6.920634920634921e-06, "loss": 43.6729, "step": 2468 }, { "epoch": 58.788059701492536, "grad_norm": 21.856304168701172, "learning_rate": 6.916666666666667e-06, "loss": 44.5367, "step": 2469 }, { "epoch": 58.811940298507466, "grad_norm": 23.25510025024414, "learning_rate": 6.912698412698413e-06, "loss": 44.8127, "step": 2470 }, { "epoch": 58.83582089552239, "grad_norm": 26.32556915283203, "learning_rate": 6.908730158730159e-06, "loss": 44.8174, "step": 2471 }, { "epoch": 58.85970149253731, "grad_norm": 17.11307716369629, "learning_rate": 6.9047619047619055e-06, "loss": 42.9014, "step": 2472 }, { "epoch": 58.88358208955224, "grad_norm": 22.380199432373047, "learning_rate": 6.900793650793651e-06, "loss": 43.9276, "step": 2473 }, { "epoch": 58.907462686567165, "grad_norm": 17.855484008789062, "learning_rate": 6.896825396825397e-06, "loss": 43.5554, "step": 2474 }, { "epoch": 58.93134328358209, "grad_norm": 18.112934112548828, "learning_rate": 6.892857142857143e-06, "loss": 43.1567, "step": 2475 }, { "epoch": 58.95522388059702, "grad_norm": 21.682844161987305, "learning_rate": 6.88888888888889e-06, "loss": 44.4182, "step": 2476 }, { "epoch": 58.97910447761194, "grad_norm": 22.78960609436035, "learning_rate": 6.884920634920635e-06, "loss": 44.5085, "step": 2477 }, { "epoch": 59.0, "grad_norm": 26.30175018310547, "learning_rate": 6.880952380952381e-06, "loss": 38.3404, "step": 2478 }, { "epoch": 59.02388059701492, "grad_norm": 23.714672088623047, "learning_rate": 6.8769841269841275e-06, "loss": 44.131, "step": 2479 }, { "epoch": 59.04776119402985, "grad_norm": 21.14238929748535, "learning_rate": 6.873015873015874e-06, "loss": 44.0208, "step": 2480 }, { "epoch": 59.071641791044776, "grad_norm": 25.2884464263916, "learning_rate": 6.86904761904762e-06, "loss": 43.4771, "step": 2481 }, { "epoch": 59.0955223880597, "grad_norm": 21.62645149230957, "learning_rate": 6.8650793650793654e-06, "loss": 43.3618, "step": 2482 }, { "epoch": 59.11940298507463, "grad_norm": 21.61750602722168, "learning_rate": 6.861111111111112e-06, "loss": 45.1006, "step": 2483 }, { "epoch": 59.14328358208955, "grad_norm": 25.124187469482422, "learning_rate": 6.857142857142858e-06, "loss": 44.2154, "step": 2484 }, { "epoch": 59.167164179104475, "grad_norm": 21.88067626953125, "learning_rate": 6.853174603174604e-06, "loss": 44.6725, "step": 2485 }, { "epoch": 59.191044776119405, "grad_norm": 26.95779037475586, "learning_rate": 6.8492063492063496e-06, "loss": 43.2419, "step": 2486 }, { "epoch": 59.21492537313433, "grad_norm": 18.35979461669922, "learning_rate": 6.845238095238096e-06, "loss": 42.7172, "step": 2487 }, { "epoch": 59.23880597014925, "grad_norm": 27.409034729003906, "learning_rate": 6.841269841269842e-06, "loss": 44.7929, "step": 2488 }, { "epoch": 59.26268656716418, "grad_norm": 23.257780075073242, "learning_rate": 6.837301587301588e-06, "loss": 45.0742, "step": 2489 }, { "epoch": 59.286567164179104, "grad_norm": 23.64436912536621, "learning_rate": 6.833333333333334e-06, "loss": 45.3608, "step": 2490 }, { "epoch": 59.31044776119403, "grad_norm": 19.836320877075195, "learning_rate": 6.82936507936508e-06, "loss": 43.3152, "step": 2491 }, { "epoch": 59.33432835820896, "grad_norm": 23.7291259765625, "learning_rate": 6.825396825396826e-06, "loss": 45.0224, "step": 2492 }, { "epoch": 59.35820895522388, "grad_norm": 22.776365280151367, "learning_rate": 6.8214285714285724e-06, "loss": 44.0495, "step": 2493 }, { "epoch": 59.3820895522388, "grad_norm": 19.556560516357422, "learning_rate": 6.817460317460318e-06, "loss": 43.0716, "step": 2494 }, { "epoch": 59.40597014925373, "grad_norm": 19.592493057250977, "learning_rate": 6.813492063492064e-06, "loss": 43.8102, "step": 2495 }, { "epoch": 59.429850746268656, "grad_norm": 18.4060001373291, "learning_rate": 6.80952380952381e-06, "loss": 42.5119, "step": 2496 }, { "epoch": 59.45373134328358, "grad_norm": 18.86701202392578, "learning_rate": 6.8055555555555566e-06, "loss": 42.7576, "step": 2497 }, { "epoch": 59.47761194029851, "grad_norm": 19.602235794067383, "learning_rate": 6.801587301587303e-06, "loss": 44.8432, "step": 2498 }, { "epoch": 59.50149253731343, "grad_norm": NaN, "learning_rate": 6.797619047619048e-06, "loss": 77.3141, "step": 2499 }, { "epoch": 59.525373134328355, "grad_norm": 18.09695816040039, "learning_rate": 6.797619047619048e-06, "loss": 44.5263, "step": 2500 }, { "epoch": 59.549253731343285, "grad_norm": 20.728633880615234, "learning_rate": 6.7936507936507944e-06, "loss": 45.2417, "step": 2501 }, { "epoch": 59.57313432835821, "grad_norm": 19.164405822753906, "learning_rate": 6.789682539682541e-06, "loss": 44.1673, "step": 2502 }, { "epoch": 59.59701492537313, "grad_norm": 17.986604690551758, "learning_rate": 6.785714285714287e-06, "loss": 44.9311, "step": 2503 }, { "epoch": 59.62089552238806, "grad_norm": 21.212223052978516, "learning_rate": 6.781746031746032e-06, "loss": 43.9186, "step": 2504 }, { "epoch": 59.644776119402984, "grad_norm": 19.34587287902832, "learning_rate": 6.777777777777779e-06, "loss": 43.9205, "step": 2505 }, { "epoch": 59.668656716417914, "grad_norm": 21.107257843017578, "learning_rate": 6.773809523809525e-06, "loss": 43.7522, "step": 2506 }, { "epoch": 59.69253731343284, "grad_norm": 19.897724151611328, "learning_rate": 6.769841269841271e-06, "loss": 43.7826, "step": 2507 }, { "epoch": 59.71641791044776, "grad_norm": 22.272457122802734, "learning_rate": 6.7658730158730165e-06, "loss": 44.4984, "step": 2508 }, { "epoch": 59.74029850746269, "grad_norm": 20.087844848632812, "learning_rate": 6.761904761904763e-06, "loss": 43.6682, "step": 2509 }, { "epoch": 59.76417910447761, "grad_norm": 22.083215713500977, "learning_rate": 6.757936507936509e-06, "loss": 43.1799, "step": 2510 }, { "epoch": 59.788059701492536, "grad_norm": 18.583234786987305, "learning_rate": 6.753968253968255e-06, "loss": 44.9017, "step": 2511 }, { "epoch": 59.811940298507466, "grad_norm": 20.20134735107422, "learning_rate": 6.750000000000001e-06, "loss": 44.1051, "step": 2512 }, { "epoch": 59.83582089552239, "grad_norm": 20.973419189453125, "learning_rate": 6.746031746031747e-06, "loss": 44.138, "step": 2513 }, { "epoch": 59.85970149253731, "grad_norm": 15.97662353515625, "learning_rate": 6.742063492063493e-06, "loss": 44.9675, "step": 2514 }, { "epoch": 59.88358208955224, "grad_norm": 23.126541137695312, "learning_rate": 6.738095238095239e-06, "loss": 44.4417, "step": 2515 }, { "epoch": 59.907462686567165, "grad_norm": 19.715782165527344, "learning_rate": 6.7341269841269856e-06, "loss": 44.768, "step": 2516 }, { "epoch": 59.93134328358209, "grad_norm": 27.000070571899414, "learning_rate": 6.730158730158731e-06, "loss": 45.0485, "step": 2517 }, { "epoch": 59.95522388059702, "grad_norm": 21.746152877807617, "learning_rate": 6.726190476190477e-06, "loss": 44.5848, "step": 2518 }, { "epoch": 59.97910447761194, "grad_norm": 17.86555290222168, "learning_rate": 6.7222222222222235e-06, "loss": 44.0213, "step": 2519 }, { "epoch": 60.0, "grad_norm": 20.369977951049805, "learning_rate": 6.718253968253968e-06, "loss": 38.2947, "step": 2520 }, { "epoch": 60.0, "step": 2520, "total_flos": 1.2389502240404026e+17, "train_loss": 14.973776844569615, "train_runtime": 26580.6785, "train_samples_per_second": 12.081, "train_steps_per_second": 0.095 }, { "epoch": 60.02388059701492, "grad_norm": 22.268356323242188, "learning_rate": 1e-05, "loss": 43.4551, "step": 2521 }, { "epoch": 60.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99702380952381e-06, "loss": 49.1255, "step": 2522 }, { "epoch": 60.071641791044776, "grad_norm": 202.42481994628906, "learning_rate": 9.99702380952381e-06, "loss": 48.2461, "step": 2523 }, { "epoch": 60.0955223880597, "grad_norm": 190.5668182373047, "learning_rate": 9.99404761904762e-06, "loss": 49.3017, "step": 2524 }, { "epoch": 60.11940298507463, "grad_norm": 69.69708251953125, "learning_rate": 9.99107142857143e-06, "loss": 47.4179, "step": 2525 }, { "epoch": 60.14328358208955, "grad_norm": 94.14574432373047, "learning_rate": 9.988095238095239e-06, "loss": 46.2633, "step": 2526 }, { "epoch": 60.167164179104475, "grad_norm": 59.666481018066406, "learning_rate": 9.985119047619048e-06, "loss": 45.9356, "step": 2527 }, { "epoch": 60.191044776119405, "grad_norm": 66.49242401123047, "learning_rate": 9.982142857142858e-06, "loss": 46.0376, "step": 2528 }, { "epoch": 60.21492537313433, "grad_norm": 46.52798080444336, "learning_rate": 9.979166666666668e-06, "loss": 42.4879, "step": 2529 }, { "epoch": 60.23880597014925, "grad_norm": 41.28635025024414, "learning_rate": 9.976190476190477e-06, "loss": 43.5567, "step": 2530 }, { "epoch": 60.26268656716418, "grad_norm": 48.749210357666016, "learning_rate": 9.973214285714287e-06, "loss": 44.6395, "step": 2531 }, { "epoch": 60.286567164179104, "grad_norm": 40.452842712402344, "learning_rate": 9.970238095238096e-06, "loss": 44.65, "step": 2532 }, { "epoch": 60.31044776119403, "grad_norm": 41.35492706298828, "learning_rate": 9.967261904761905e-06, "loss": 44.8214, "step": 2533 }, { "epoch": 60.33432835820896, "grad_norm": 32.109527587890625, "learning_rate": 9.964285714285714e-06, "loss": 46.1763, "step": 2534 }, { "epoch": 60.35820895522388, "grad_norm": 28.22223472595215, "learning_rate": 9.961309523809525e-06, "loss": 45.4326, "step": 2535 }, { "epoch": 60.3820895522388, "grad_norm": 42.460872650146484, "learning_rate": 9.958333333333334e-06, "loss": 44.2028, "step": 2536 }, { "epoch": 60.40597014925373, "grad_norm": 24.426565170288086, "learning_rate": 9.955357142857143e-06, "loss": 44.141, "step": 2537 }, { "epoch": 60.429850746268656, "grad_norm": 30.63353729248047, "learning_rate": 9.952380952380954e-06, "loss": 44.1465, "step": 2538 }, { "epoch": 60.45373134328358, "grad_norm": 24.15091323852539, "learning_rate": 9.949404761904763e-06, "loss": 44.4579, "step": 2539 }, { "epoch": 60.47761194029851, "grad_norm": 30.757854461669922, "learning_rate": 9.946428571428572e-06, "loss": 44.251, "step": 2540 }, { "epoch": 60.50149253731343, "grad_norm": 24.651243209838867, "learning_rate": 9.943452380952381e-06, "loss": 45.6042, "step": 2541 }, { "epoch": 60.525373134328355, "grad_norm": 23.414180755615234, "learning_rate": 9.940476190476192e-06, "loss": 43.8686, "step": 2542 }, { "epoch": 60.549253731343285, "grad_norm": 29.41202735900879, "learning_rate": 9.937500000000001e-06, "loss": 45.111, "step": 2543 }, { "epoch": 60.57313432835821, "grad_norm": 25.947559356689453, "learning_rate": 9.93452380952381e-06, "loss": 43.6131, "step": 2544 }, { "epoch": 60.59701492537313, "grad_norm": 30.613439559936523, "learning_rate": 9.93154761904762e-06, "loss": 44.9413, "step": 2545 }, { "epoch": 60.62089552238806, "grad_norm": 28.363725662231445, "learning_rate": 9.92857142857143e-06, "loss": 43.3202, "step": 2546 }, { "epoch": 60.644776119402984, "grad_norm": 24.05493927001953, "learning_rate": 9.925595238095239e-06, "loss": 45.434, "step": 2547 }, { "epoch": 60.668656716417914, "grad_norm": 30.285140991210938, "learning_rate": 9.922619047619048e-06, "loss": 44.2144, "step": 2548 }, { "epoch": 60.69253731343284, "grad_norm": 24.850799560546875, "learning_rate": 9.919642857142859e-06, "loss": 43.4151, "step": 2549 }, { "epoch": 60.71641791044776, "grad_norm": 23.70008087158203, "learning_rate": 9.916666666666668e-06, "loss": 44.0035, "step": 2550 }, { "epoch": 60.74029850746269, "grad_norm": 29.343976974487305, "learning_rate": 9.913690476190477e-06, "loss": 44.9651, "step": 2551 }, { "epoch": 60.76417910447761, "grad_norm": 19.914142608642578, "learning_rate": 9.910714285714288e-06, "loss": 44.3699, "step": 2552 }, { "epoch": 60.788059701492536, "grad_norm": 23.31308937072754, "learning_rate": 9.907738095238097e-06, "loss": 45.7724, "step": 2553 }, { "epoch": 60.811940298507466, "grad_norm": 26.089832305908203, "learning_rate": 9.904761904761906e-06, "loss": 44.9611, "step": 2554 }, { "epoch": 60.83582089552239, "grad_norm": 22.15082550048828, "learning_rate": 9.901785714285715e-06, "loss": 43.1386, "step": 2555 }, { "epoch": 60.85970149253731, "grad_norm": 30.4031925201416, "learning_rate": 9.898809523809525e-06, "loss": 44.324, "step": 2556 }, { "epoch": 60.88358208955224, "grad_norm": 22.16672134399414, "learning_rate": 9.895833333333334e-06, "loss": 43.6286, "step": 2557 }, { "epoch": 60.907462686567165, "grad_norm": 21.490468978881836, "learning_rate": 9.892857142857143e-06, "loss": 42.8494, "step": 2558 }, { "epoch": 60.93134328358209, "grad_norm": 28.823944091796875, "learning_rate": 9.889880952380954e-06, "loss": 42.9944, "step": 2559 }, { "epoch": 60.95522388059702, "grad_norm": 19.950031280517578, "learning_rate": 9.886904761904763e-06, "loss": 44.7105, "step": 2560 }, { "epoch": 60.97910447761194, "grad_norm": 31.069194793701172, "learning_rate": 9.883928571428572e-06, "loss": 43.6064, "step": 2561 }, { "epoch": 61.0, "grad_norm": 19.411388397216797, "learning_rate": 9.880952380952381e-06, "loss": 38.8114, "step": 2562 }, { "epoch": 61.02388059701492, "grad_norm": 26.66782569885254, "learning_rate": 9.877976190476192e-06, "loss": 43.0697, "step": 2563 }, { "epoch": 61.04776119402985, "grad_norm": 25.78309440612793, "learning_rate": 9.875000000000001e-06, "loss": 43.8682, "step": 2564 }, { "epoch": 61.071641791044776, "grad_norm": 19.312572479248047, "learning_rate": 9.87202380952381e-06, "loss": 44.7347, "step": 2565 }, { "epoch": 61.0955223880597, "grad_norm": 33.189598083496094, "learning_rate": 9.869047619047621e-06, "loss": 43.6253, "step": 2566 }, { "epoch": 61.11940298507463, "grad_norm": 27.351518630981445, "learning_rate": 9.86607142857143e-06, "loss": 43.6716, "step": 2567 }, { "epoch": 61.14328358208955, "grad_norm": 21.69624900817871, "learning_rate": 9.863095238095239e-06, "loss": 44.2277, "step": 2568 }, { "epoch": 61.167164179104475, "grad_norm": 27.58108901977539, "learning_rate": 9.860119047619048e-06, "loss": 44.3117, "step": 2569 }, { "epoch": 61.191044776119405, "grad_norm": 18.802303314208984, "learning_rate": 9.857142857142859e-06, "loss": 44.4119, "step": 2570 }, { "epoch": 61.21492537313433, "grad_norm": 21.36333656311035, "learning_rate": 9.854166666666668e-06, "loss": 45.0113, "step": 2571 }, { "epoch": 61.23880597014925, "grad_norm": 23.86919593811035, "learning_rate": 9.851190476190477e-06, "loss": 43.8577, "step": 2572 }, { "epoch": 61.26268656716418, "grad_norm": 20.259685516357422, "learning_rate": 9.848214285714288e-06, "loss": 43.921, "step": 2573 }, { "epoch": 61.286567164179104, "grad_norm": 29.262182235717773, "learning_rate": 9.845238095238097e-06, "loss": 44.1589, "step": 2574 }, { "epoch": 61.31044776119403, "grad_norm": 21.13313102722168, "learning_rate": 9.842261904761906e-06, "loss": 44.3854, "step": 2575 }, { "epoch": 61.33432835820896, "grad_norm": 23.83458137512207, "learning_rate": 9.839285714285715e-06, "loss": 43.5571, "step": 2576 }, { "epoch": 61.35820895522388, "grad_norm": 18.778934478759766, "learning_rate": 9.836309523809524e-06, "loss": 43.827, "step": 2577 }, { "epoch": 61.3820895522388, "grad_norm": 22.37734031677246, "learning_rate": 9.833333333333333e-06, "loss": 45.207, "step": 2578 }, { "epoch": 61.40597014925373, "grad_norm": 25.046817779541016, "learning_rate": 9.830357142857144e-06, "loss": 43.7649, "step": 2579 }, { "epoch": 61.429850746268656, "grad_norm": 21.867618560791016, "learning_rate": 9.827380952380953e-06, "loss": 43.7025, "step": 2580 }, { "epoch": 61.45373134328358, "grad_norm": 22.729969024658203, "learning_rate": 9.824404761904762e-06, "loss": 44.3454, "step": 2581 }, { "epoch": 61.47761194029851, "grad_norm": 23.03755760192871, "learning_rate": 9.821428571428573e-06, "loss": 43.6942, "step": 2582 }, { "epoch": 61.50149253731343, "grad_norm": 20.736244201660156, "learning_rate": 9.818452380952382e-06, "loss": 44.6758, "step": 2583 }, { "epoch": 61.525373134328355, "grad_norm": 17.63121795654297, "learning_rate": 9.81547619047619e-06, "loss": 43.3541, "step": 2584 }, { "epoch": 61.549253731343285, "grad_norm": 18.32825469970703, "learning_rate": 9.8125e-06, "loss": 43.8749, "step": 2585 }, { "epoch": 61.57313432835821, "grad_norm": 19.190811157226562, "learning_rate": 9.80952380952381e-06, "loss": 44.1191, "step": 2586 }, { "epoch": 61.59701492537313, "grad_norm": 17.29913330078125, "learning_rate": 9.80654761904762e-06, "loss": 44.5885, "step": 2587 }, { "epoch": 61.62089552238806, "grad_norm": 21.12825584411621, "learning_rate": 9.803571428571428e-06, "loss": 43.9206, "step": 2588 }, { "epoch": 61.644776119402984, "grad_norm": 26.83173942565918, "learning_rate": 9.80059523809524e-06, "loss": 44.7224, "step": 2589 }, { "epoch": 61.668656716417914, "grad_norm": 18.34907341003418, "learning_rate": 9.797619047619048e-06, "loss": 44.3625, "step": 2590 }, { "epoch": 61.69253731343284, "grad_norm": 18.709016799926758, "learning_rate": 9.794642857142857e-06, "loss": 43.2573, "step": 2591 }, { "epoch": 61.71641791044776, "grad_norm": 24.946210861206055, "learning_rate": 9.791666666666666e-06, "loss": 43.4754, "step": 2592 }, { "epoch": 61.74029850746269, "grad_norm": 22.253814697265625, "learning_rate": 9.788690476190477e-06, "loss": 43.7911, "step": 2593 }, { "epoch": 61.76417910447761, "grad_norm": 21.44365692138672, "learning_rate": 9.785714285714286e-06, "loss": 44.9724, "step": 2594 }, { "epoch": 61.788059701492536, "grad_norm": 19.002973556518555, "learning_rate": 9.782738095238095e-06, "loss": 44.5918, "step": 2595 }, { "epoch": 61.811940298507466, "grad_norm": 23.515329360961914, "learning_rate": 9.779761904761906e-06, "loss": 43.7888, "step": 2596 }, { "epoch": 61.83582089552239, "grad_norm": 22.531938552856445, "learning_rate": 9.776785714285715e-06, "loss": 43.3796, "step": 2597 }, { "epoch": 61.85970149253731, "grad_norm": 21.187646865844727, "learning_rate": 9.773809523809524e-06, "loss": 44.5443, "step": 2598 }, { "epoch": 61.88358208955224, "grad_norm": 16.221466064453125, "learning_rate": 9.770833333333333e-06, "loss": 43.3915, "step": 2599 }, { "epoch": 61.907462686567165, "grad_norm": 26.778657913208008, "learning_rate": 9.767857142857144e-06, "loss": 43.5025, "step": 2600 }, { "epoch": 61.93134328358209, "grad_norm": 22.768152236938477, "learning_rate": 9.764880952380953e-06, "loss": 44.5111, "step": 2601 }, { "epoch": 61.95522388059702, "grad_norm": 27.74826431274414, "learning_rate": 9.761904761904762e-06, "loss": 44.5974, "step": 2602 }, { "epoch": 61.97910447761194, "grad_norm": 22.05244255065918, "learning_rate": 9.758928571428573e-06, "loss": 43.5759, "step": 2603 }, { "epoch": 62.0, "grad_norm": 19.424118041992188, "learning_rate": 9.755952380952382e-06, "loss": 39.0078, "step": 2604 }, { "epoch": 62.02388059701492, "grad_norm": 26.683176040649414, "learning_rate": 9.75297619047619e-06, "loss": 43.6656, "step": 2605 }, { "epoch": 62.04776119402985, "grad_norm": 20.35538673400879, "learning_rate": 9.75e-06, "loss": 45.2448, "step": 2606 }, { "epoch": 62.071641791044776, "grad_norm": 20.601633071899414, "learning_rate": 9.74702380952381e-06, "loss": 45.3538, "step": 2607 }, { "epoch": 62.0955223880597, "grad_norm": 17.472143173217773, "learning_rate": 9.74404761904762e-06, "loss": 44.3077, "step": 2608 }, { "epoch": 62.11940298507463, "grad_norm": 21.365177154541016, "learning_rate": 9.741071428571429e-06, "loss": 43.7227, "step": 2609 }, { "epoch": 62.14328358208955, "grad_norm": 25.560060501098633, "learning_rate": 9.73809523809524e-06, "loss": 44.6434, "step": 2610 }, { "epoch": 62.167164179104475, "grad_norm": 20.327442169189453, "learning_rate": 9.735119047619048e-06, "loss": 43.5369, "step": 2611 }, { "epoch": 62.191044776119405, "grad_norm": 19.99593734741211, "learning_rate": 9.732142857142858e-06, "loss": 44.1446, "step": 2612 }, { "epoch": 62.21492537313433, "grad_norm": 19.648574829101562, "learning_rate": 9.729166666666667e-06, "loss": 43.4092, "step": 2613 }, { "epoch": 62.23880597014925, "grad_norm": 26.606019973754883, "learning_rate": 9.726190476190477e-06, "loss": 44.3531, "step": 2614 }, { "epoch": 62.26268656716418, "grad_norm": 27.989334106445312, "learning_rate": 9.723214285714286e-06, "loss": 44.3962, "step": 2615 }, { "epoch": 62.286567164179104, "grad_norm": 22.545223236083984, "learning_rate": 9.720238095238095e-06, "loss": 42.8718, "step": 2616 }, { "epoch": 62.31044776119403, "grad_norm": 26.62592315673828, "learning_rate": 9.717261904761906e-06, "loss": 43.004, "step": 2617 }, { "epoch": 62.33432835820896, "grad_norm": 26.52608299255371, "learning_rate": 9.714285714285715e-06, "loss": 43.2152, "step": 2618 }, { "epoch": 62.35820895522388, "grad_norm": 20.17901611328125, "learning_rate": 9.711309523809524e-06, "loss": 43.2228, "step": 2619 }, { "epoch": 62.3820895522388, "grad_norm": 30.335596084594727, "learning_rate": 9.708333333333333e-06, "loss": 45.0079, "step": 2620 }, { "epoch": 62.40597014925373, "grad_norm": 26.02729606628418, "learning_rate": 9.705357142857144e-06, "loss": 43.3155, "step": 2621 }, { "epoch": 62.429850746268656, "grad_norm": 25.07903480529785, "learning_rate": 9.702380952380953e-06, "loss": 45.2378, "step": 2622 }, { "epoch": 62.45373134328358, "grad_norm": 28.89750099182129, "learning_rate": 9.699404761904762e-06, "loss": 42.2074, "step": 2623 }, { "epoch": 62.47761194029851, "grad_norm": 20.475128173828125, "learning_rate": 9.696428571428573e-06, "loss": 43.8792, "step": 2624 }, { "epoch": 62.50149253731343, "grad_norm": 26.103612899780273, "learning_rate": 9.693452380952382e-06, "loss": 44.961, "step": 2625 }, { "epoch": 62.525373134328355, "grad_norm": 21.666906356811523, "learning_rate": 9.690476190476191e-06, "loss": 43.4776, "step": 2626 }, { "epoch": 62.549253731343285, "grad_norm": 22.16206932067871, "learning_rate": 9.6875e-06, "loss": 43.1076, "step": 2627 }, { "epoch": 62.57313432835821, "grad_norm": 18.17336082458496, "learning_rate": 9.68452380952381e-06, "loss": 43.7046, "step": 2628 }, { "epoch": 62.59701492537313, "grad_norm": 25.36472511291504, "learning_rate": 9.68154761904762e-06, "loss": 44.8569, "step": 2629 }, { "epoch": 62.62089552238806, "grad_norm": 18.750511169433594, "learning_rate": 9.678571428571429e-06, "loss": 43.73, "step": 2630 }, { "epoch": 62.644776119402984, "grad_norm": 18.174638748168945, "learning_rate": 9.67559523809524e-06, "loss": 44.4229, "step": 2631 }, { "epoch": 62.668656716417914, "grad_norm": 17.8627872467041, "learning_rate": 9.672619047619049e-06, "loss": 44.9343, "step": 2632 }, { "epoch": 62.69253731343284, "grad_norm": 28.161239624023438, "learning_rate": 9.669642857142858e-06, "loss": 43.7649, "step": 2633 }, { "epoch": 62.71641791044776, "grad_norm": 27.701793670654297, "learning_rate": 9.666666666666667e-06, "loss": 43.7122, "step": 2634 }, { "epoch": 62.74029850746269, "grad_norm": 18.659244537353516, "learning_rate": 9.663690476190477e-06, "loss": 43.5179, "step": 2635 }, { "epoch": 62.76417910447761, "grad_norm": 35.163169860839844, "learning_rate": 9.660714285714287e-06, "loss": 44.4568, "step": 2636 }, { "epoch": 62.788059701492536, "grad_norm": NaN, "learning_rate": 9.657738095238096e-06, "loss": 42.4459, "step": 2637 }, { "epoch": 62.811940298507466, "grad_norm": 26.87259292602539, "learning_rate": 9.657738095238096e-06, "loss": 42.7683, "step": 2638 }, { "epoch": 62.83582089552239, "grad_norm": 31.837942123413086, "learning_rate": 9.654761904761906e-06, "loss": 44.2405, "step": 2639 }, { "epoch": 62.85970149253731, "grad_norm": 24.40672492980957, "learning_rate": 9.651785714285715e-06, "loss": 44.4058, "step": 2640 }, { "epoch": 62.88358208955224, "grad_norm": 29.0338134765625, "learning_rate": 9.648809523809524e-06, "loss": 43.405, "step": 2641 }, { "epoch": 62.907462686567165, "grad_norm": 28.022174835205078, "learning_rate": 9.645833333333333e-06, "loss": 43.6733, "step": 2642 }, { "epoch": 62.93134328358209, "grad_norm": 19.517061233520508, "learning_rate": 9.642857142857144e-06, "loss": 44.2386, "step": 2643 }, { "epoch": 62.95522388059702, "grad_norm": 22.2393741607666, "learning_rate": 9.639880952380953e-06, "loss": 45.1874, "step": 2644 }, { "epoch": 62.97910447761194, "grad_norm": 27.02622413635254, "learning_rate": 9.636904761904762e-06, "loss": 43.8541, "step": 2645 }, { "epoch": 63.0, "grad_norm": 20.51211929321289, "learning_rate": 9.633928571428573e-06, "loss": 39.6487, "step": 2646 }, { "epoch": 63.02388059701492, "grad_norm": 24.02116584777832, "learning_rate": 9.630952380952382e-06, "loss": 44.5685, "step": 2647 }, { "epoch": 63.04776119402985, "grad_norm": 30.00434112548828, "learning_rate": 9.627976190476191e-06, "loss": 43.2549, "step": 2648 }, { "epoch": 63.071641791044776, "grad_norm": 23.16147804260254, "learning_rate": 9.625e-06, "loss": 44.1254, "step": 2649 }, { "epoch": 63.0955223880597, "grad_norm": 30.86275291442871, "learning_rate": 9.622023809523811e-06, "loss": 43.4804, "step": 2650 }, { "epoch": 63.11940298507463, "grad_norm": 27.942575454711914, "learning_rate": 9.61904761904762e-06, "loss": 44.4437, "step": 2651 }, { "epoch": 63.14328358208955, "grad_norm": 22.330169677734375, "learning_rate": 9.616071428571429e-06, "loss": 44.1067, "step": 2652 }, { "epoch": 63.167164179104475, "grad_norm": 27.878795623779297, "learning_rate": 9.61309523809524e-06, "loss": 42.5768, "step": 2653 }, { "epoch": 63.191044776119405, "grad_norm": 23.200098037719727, "learning_rate": 9.610119047619049e-06, "loss": 43.5906, "step": 2654 }, { "epoch": 63.21492537313433, "grad_norm": 23.872238159179688, "learning_rate": 9.607142857142858e-06, "loss": 43.8177, "step": 2655 }, { "epoch": 63.23880597014925, "grad_norm": 31.89397430419922, "learning_rate": 9.604166666666669e-06, "loss": 43.5719, "step": 2656 }, { "epoch": 63.26268656716418, "grad_norm": 24.745256423950195, "learning_rate": 9.601190476190478e-06, "loss": 43.1085, "step": 2657 }, { "epoch": 63.286567164179104, "grad_norm": 31.859682083129883, "learning_rate": 9.598214285714287e-06, "loss": 42.8871, "step": 2658 }, { "epoch": 63.31044776119403, "grad_norm": 25.792551040649414, "learning_rate": 9.595238095238096e-06, "loss": 42.7027, "step": 2659 }, { "epoch": 63.33432835820896, "grad_norm": 25.225967407226562, "learning_rate": 9.592261904761906e-06, "loss": 43.0075, "step": 2660 }, { "epoch": 63.35820895522388, "grad_norm": 27.146207809448242, "learning_rate": 9.589285714285716e-06, "loss": 44.3992, "step": 2661 }, { "epoch": 63.3820895522388, "grad_norm": 16.27069664001465, "learning_rate": 9.586309523809525e-06, "loss": 44.1708, "step": 2662 }, { "epoch": 63.40597014925373, "grad_norm": 34.79555892944336, "learning_rate": 9.583333333333335e-06, "loss": 44.4863, "step": 2663 }, { "epoch": 63.429850746268656, "grad_norm": 23.31925392150879, "learning_rate": 9.580357142857144e-06, "loss": 44.3615, "step": 2664 }, { "epoch": 63.45373134328358, "grad_norm": 28.239566802978516, "learning_rate": 9.577380952380953e-06, "loss": 45.042, "step": 2665 }, { "epoch": 63.47761194029851, "grad_norm": 25.107566833496094, "learning_rate": 9.574404761904762e-06, "loss": 44.4372, "step": 2666 }, { "epoch": 63.50149253731343, "grad_norm": 20.601322174072266, "learning_rate": 9.571428571428573e-06, "loss": 43.5807, "step": 2667 }, { "epoch": 63.525373134328355, "grad_norm": 34.94065475463867, "learning_rate": 9.568452380952382e-06, "loss": 42.8904, "step": 2668 }, { "epoch": 63.549253731343285, "grad_norm": 26.336591720581055, "learning_rate": 9.565476190476191e-06, "loss": 44.1117, "step": 2669 }, { "epoch": 63.57313432835821, "grad_norm": 35.81476974487305, "learning_rate": 9.562500000000002e-06, "loss": 44.847, "step": 2670 }, { "epoch": 63.59701492537313, "grad_norm": 21.011463165283203, "learning_rate": 9.559523809523811e-06, "loss": 43.9457, "step": 2671 }, { "epoch": 63.62089552238806, "grad_norm": 36.32665252685547, "learning_rate": 9.55654761904762e-06, "loss": 43.4279, "step": 2672 }, { "epoch": 63.644776119402984, "grad_norm": 21.384214401245117, "learning_rate": 9.55357142857143e-06, "loss": 45.3947, "step": 2673 }, { "epoch": 63.668656716417914, "grad_norm": 38.893680572509766, "learning_rate": 9.55059523809524e-06, "loss": 43.9397, "step": 2674 }, { "epoch": 63.69253731343284, "grad_norm": 26.085948944091797, "learning_rate": 9.547619047619049e-06, "loss": 43.118, "step": 2675 }, { "epoch": 63.71641791044776, "grad_norm": 35.67828369140625, "learning_rate": 9.544642857142858e-06, "loss": 44.8236, "step": 2676 }, { "epoch": 63.74029850746269, "grad_norm": 25.065685272216797, "learning_rate": 9.541666666666669e-06, "loss": 43.8344, "step": 2677 }, { "epoch": 63.76417910447761, "grad_norm": 26.32991600036621, "learning_rate": 9.538690476190478e-06, "loss": 45.5309, "step": 2678 }, { "epoch": 63.788059701492536, "grad_norm": 26.833250045776367, "learning_rate": 9.535714285714287e-06, "loss": 43.7626, "step": 2679 }, { "epoch": 63.811940298507466, "grad_norm": 23.64604949951172, "learning_rate": 9.532738095238096e-06, "loss": 43.7545, "step": 2680 }, { "epoch": 63.83582089552239, "grad_norm": 24.57122230529785, "learning_rate": 9.529761904761905e-06, "loss": 43.5666, "step": 2681 }, { "epoch": 63.85970149253731, "grad_norm": 21.429603576660156, "learning_rate": 9.526785714285714e-06, "loss": 44.8421, "step": 2682 }, { "epoch": 63.88358208955224, "grad_norm": NaN, "learning_rate": 9.523809523809525e-06, "loss": 77.6969, "step": 2683 }, { "epoch": 63.907462686567165, "grad_norm": 25.843442916870117, "learning_rate": 9.523809523809525e-06, "loss": 42.4353, "step": 2684 }, { "epoch": 63.93134328358209, "grad_norm": 26.74856185913086, "learning_rate": 9.520833333333334e-06, "loss": 45.0478, "step": 2685 }, { "epoch": 63.95522388059702, "grad_norm": 22.9956111907959, "learning_rate": 9.517857142857143e-06, "loss": 44.3383, "step": 2686 }, { "epoch": 63.97910447761194, "grad_norm": 17.62372589111328, "learning_rate": 9.514880952380952e-06, "loss": 43.2765, "step": 2687 }, { "epoch": 64.0, "grad_norm": 21.372318267822266, "learning_rate": 9.511904761904763e-06, "loss": 37.8049, "step": 2688 }, { "epoch": 64.02388059701492, "grad_norm": 23.412595748901367, "learning_rate": 9.508928571428572e-06, "loss": 45.446, "step": 2689 }, { "epoch": 64.04776119402985, "grad_norm": 21.825000762939453, "learning_rate": 9.50595238095238e-06, "loss": 42.871, "step": 2690 }, { "epoch": 64.07164179104478, "grad_norm": 18.50835418701172, "learning_rate": 9.502976190476191e-06, "loss": 43.1485, "step": 2691 }, { "epoch": 64.0955223880597, "grad_norm": 22.428272247314453, "learning_rate": 9.5e-06, "loss": 42.7172, "step": 2692 }, { "epoch": 64.11940298507463, "grad_norm": 19.58050537109375, "learning_rate": 9.49702380952381e-06, "loss": 43.4599, "step": 2693 }, { "epoch": 64.14328358208955, "grad_norm": 21.66231346130371, "learning_rate": 9.494047619047619e-06, "loss": 43.601, "step": 2694 }, { "epoch": 64.16716417910447, "grad_norm": 29.5888729095459, "learning_rate": 9.49107142857143e-06, "loss": 44.8395, "step": 2695 }, { "epoch": 64.1910447761194, "grad_norm": 16.3875675201416, "learning_rate": 9.488095238095238e-06, "loss": 43.8201, "step": 2696 }, { "epoch": 64.21492537313434, "grad_norm": 28.326553344726562, "learning_rate": 9.485119047619047e-06, "loss": 45.1189, "step": 2697 }, { "epoch": 64.23880597014926, "grad_norm": 20.549386978149414, "learning_rate": 9.482142857142858e-06, "loss": 44.0127, "step": 2698 }, { "epoch": 64.26268656716418, "grad_norm": 25.79012680053711, "learning_rate": 9.479166666666667e-06, "loss": 43.0571, "step": 2699 }, { "epoch": 64.2865671641791, "grad_norm": 31.000024795532227, "learning_rate": 9.476190476190476e-06, "loss": 42.4615, "step": 2700 }, { "epoch": 64.31044776119403, "grad_norm": 19.49623680114746, "learning_rate": 9.473214285714285e-06, "loss": 45.6714, "step": 2701 }, { "epoch": 64.33432835820895, "grad_norm": 23.13125991821289, "learning_rate": 9.470238095238096e-06, "loss": 44.1373, "step": 2702 }, { "epoch": 64.35820895522389, "grad_norm": 32.59320068359375, "learning_rate": 9.467261904761905e-06, "loss": 42.5976, "step": 2703 }, { "epoch": 64.38208955223881, "grad_norm": 19.396995544433594, "learning_rate": 9.464285714285714e-06, "loss": 43.9782, "step": 2704 }, { "epoch": 64.40597014925373, "grad_norm": 28.275136947631836, "learning_rate": 9.461309523809525e-06, "loss": 44.3116, "step": 2705 }, { "epoch": 64.42985074626866, "grad_norm": 25.157663345336914, "learning_rate": 9.458333333333334e-06, "loss": 45.07, "step": 2706 }, { "epoch": 64.45373134328358, "grad_norm": 22.684513092041016, "learning_rate": 9.455357142857143e-06, "loss": 44.1489, "step": 2707 }, { "epoch": 64.4776119402985, "grad_norm": 19.41883659362793, "learning_rate": 9.452380952380952e-06, "loss": 43.5031, "step": 2708 }, { "epoch": 64.50149253731344, "grad_norm": 27.202308654785156, "learning_rate": 9.449404761904763e-06, "loss": 44.4811, "step": 2709 }, { "epoch": 64.52537313432836, "grad_norm": 17.535995483398438, "learning_rate": 9.446428571428572e-06, "loss": 43.7648, "step": 2710 }, { "epoch": 64.54925373134328, "grad_norm": 21.47702980041504, "learning_rate": 9.443452380952381e-06, "loss": 42.7421, "step": 2711 }, { "epoch": 64.57313432835821, "grad_norm": 20.23499870300293, "learning_rate": 9.440476190476192e-06, "loss": 43.8339, "step": 2712 }, { "epoch": 64.59701492537313, "grad_norm": 19.41843032836914, "learning_rate": 9.4375e-06, "loss": 44.182, "step": 2713 }, { "epoch": 64.62089552238805, "grad_norm": 22.892518997192383, "learning_rate": 9.43452380952381e-06, "loss": 42.7459, "step": 2714 }, { "epoch": 64.64477611940299, "grad_norm": 25.601083755493164, "learning_rate": 9.431547619047619e-06, "loss": 44.316, "step": 2715 }, { "epoch": 64.66865671641791, "grad_norm": 20.23451042175293, "learning_rate": 9.42857142857143e-06, "loss": 44.6613, "step": 2716 }, { "epoch": 64.69253731343284, "grad_norm": 16.326499938964844, "learning_rate": 9.425595238095239e-06, "loss": 43.9092, "step": 2717 }, { "epoch": 64.71641791044776, "grad_norm": 29.170324325561523, "learning_rate": 9.422619047619048e-06, "loss": 42.957, "step": 2718 }, { "epoch": 64.74029850746268, "grad_norm": 24.257295608520508, "learning_rate": 9.419642857142858e-06, "loss": 44.119, "step": 2719 }, { "epoch": 64.7641791044776, "grad_norm": 21.303083419799805, "learning_rate": 9.416666666666667e-06, "loss": 43.4882, "step": 2720 }, { "epoch": 64.78805970149254, "grad_norm": 20.77082633972168, "learning_rate": 9.413690476190476e-06, "loss": 43.9079, "step": 2721 }, { "epoch": 64.81194029850747, "grad_norm": 24.470279693603516, "learning_rate": 9.410714285714286e-06, "loss": 45.0313, "step": 2722 }, { "epoch": 64.83582089552239, "grad_norm": 22.445308685302734, "learning_rate": 9.407738095238096e-06, "loss": 43.0798, "step": 2723 }, { "epoch": 64.85970149253731, "grad_norm": 27.02490234375, "learning_rate": 9.404761904761905e-06, "loss": 43.1318, "step": 2724 }, { "epoch": 64.88358208955223, "grad_norm": 22.678592681884766, "learning_rate": 9.401785714285714e-06, "loss": 44.1473, "step": 2725 }, { "epoch": 64.90746268656716, "grad_norm": 26.62460708618164, "learning_rate": 9.398809523809525e-06, "loss": 43.2439, "step": 2726 }, { "epoch": 64.9313432835821, "grad_norm": 24.331209182739258, "learning_rate": 9.395833333333334e-06, "loss": 43.517, "step": 2727 }, { "epoch": 64.95522388059702, "grad_norm": 20.00579071044922, "learning_rate": 9.392857142857143e-06, "loss": 44.7679, "step": 2728 }, { "epoch": 64.97910447761194, "grad_norm": 28.423246383666992, "learning_rate": 9.389880952380954e-06, "loss": 43.5171, "step": 2729 }, { "epoch": 65.0, "grad_norm": 22.855792999267578, "learning_rate": 9.386904761904763e-06, "loss": 38.3847, "step": 2730 }, { "epoch": 65.02388059701492, "grad_norm": 22.053749084472656, "learning_rate": 9.383928571428572e-06, "loss": 44.0032, "step": 2731 }, { "epoch": 65.04776119402985, "grad_norm": 24.45530891418457, "learning_rate": 9.380952380952381e-06, "loss": 43.4024, "step": 2732 }, { "epoch": 65.07164179104478, "grad_norm": 24.508438110351562, "learning_rate": 9.377976190476192e-06, "loss": 43.4435, "step": 2733 }, { "epoch": 65.0955223880597, "grad_norm": 22.03391456604004, "learning_rate": 9.375000000000001e-06, "loss": 43.413, "step": 2734 }, { "epoch": 65.11940298507463, "grad_norm": 23.95793342590332, "learning_rate": 9.37202380952381e-06, "loss": 44.1908, "step": 2735 }, { "epoch": 65.14328358208955, "grad_norm": 23.00299072265625, "learning_rate": 9.36904761904762e-06, "loss": 43.9526, "step": 2736 }, { "epoch": 65.16716417910447, "grad_norm": 21.019451141357422, "learning_rate": 9.36607142857143e-06, "loss": 44.114, "step": 2737 }, { "epoch": 65.1910447761194, "grad_norm": 21.974138259887695, "learning_rate": 9.363095238095239e-06, "loss": 43.8516, "step": 2738 }, { "epoch": 65.21492537313434, "grad_norm": 17.860519409179688, "learning_rate": 9.360119047619048e-06, "loss": 44.2829, "step": 2739 }, { "epoch": 65.23880597014926, "grad_norm": 18.76349639892578, "learning_rate": 9.357142857142859e-06, "loss": 42.7555, "step": 2740 }, { "epoch": 65.26268656716418, "grad_norm": 22.4278621673584, "learning_rate": 9.354166666666668e-06, "loss": 43.807, "step": 2741 }, { "epoch": 65.2865671641791, "grad_norm": 17.867431640625, "learning_rate": 9.351190476190477e-06, "loss": 43.474, "step": 2742 }, { "epoch": 65.31044776119403, "grad_norm": 16.060117721557617, "learning_rate": 9.348214285714287e-06, "loss": 44.2362, "step": 2743 }, { "epoch": 65.33432835820895, "grad_norm": 27.756179809570312, "learning_rate": 9.345238095238096e-06, "loss": 43.9566, "step": 2744 }, { "epoch": 65.35820895522389, "grad_norm": 18.937381744384766, "learning_rate": 9.342261904761905e-06, "loss": 42.2936, "step": 2745 }, { "epoch": 65.38208955223881, "grad_norm": 23.91965675354004, "learning_rate": 9.339285714285715e-06, "loss": 43.1194, "step": 2746 }, { "epoch": 65.40597014925373, "grad_norm": 28.585317611694336, "learning_rate": 9.336309523809525e-06, "loss": 43.7419, "step": 2747 }, { "epoch": 65.42985074626866, "grad_norm": 18.788578033447266, "learning_rate": 9.333333333333334e-06, "loss": 43.7811, "step": 2748 }, { "epoch": 65.45373134328358, "grad_norm": 24.84532928466797, "learning_rate": 9.330357142857143e-06, "loss": 44.4898, "step": 2749 }, { "epoch": 65.4776119402985, "grad_norm": 20.880659103393555, "learning_rate": 9.327380952380954e-06, "loss": 44.5627, "step": 2750 }, { "epoch": 65.50149253731344, "grad_norm": 18.502254486083984, "learning_rate": 9.324404761904763e-06, "loss": 43.621, "step": 2751 }, { "epoch": 65.52537313432836, "grad_norm": 23.150991439819336, "learning_rate": 9.321428571428572e-06, "loss": 43.9683, "step": 2752 }, { "epoch": 65.54925373134328, "grad_norm": 20.03653907775879, "learning_rate": 9.318452380952381e-06, "loss": 42.4545, "step": 2753 }, { "epoch": 65.57313432835821, "grad_norm": 24.8642635345459, "learning_rate": 9.315476190476192e-06, "loss": 43.1368, "step": 2754 }, { "epoch": 65.59701492537313, "grad_norm": 19.812273025512695, "learning_rate": 9.312500000000001e-06, "loss": 44.5991, "step": 2755 }, { "epoch": 65.62089552238805, "grad_norm": 20.746320724487305, "learning_rate": 9.30952380952381e-06, "loss": 42.3573, "step": 2756 }, { "epoch": 65.64477611940299, "grad_norm": 28.684810638427734, "learning_rate": 9.30654761904762e-06, "loss": 43.798, "step": 2757 }, { "epoch": 65.66865671641791, "grad_norm": 17.441326141357422, "learning_rate": 9.30357142857143e-06, "loss": 44.0526, "step": 2758 }, { "epoch": 65.69253731343284, "grad_norm": 27.091472625732422, "learning_rate": 9.300595238095239e-06, "loss": 43.5748, "step": 2759 }, { "epoch": 65.71641791044776, "grad_norm": 23.270544052124023, "learning_rate": 9.297619047619048e-06, "loss": 44.0027, "step": 2760 }, { "epoch": 65.74029850746268, "grad_norm": 28.322011947631836, "learning_rate": 9.294642857142859e-06, "loss": 44.4845, "step": 2761 }, { "epoch": 65.7641791044776, "grad_norm": 22.097503662109375, "learning_rate": 9.291666666666668e-06, "loss": 42.6655, "step": 2762 }, { "epoch": 65.78805970149254, "grad_norm": 28.492340087890625, "learning_rate": 9.288690476190477e-06, "loss": 44.669, "step": 2763 }, { "epoch": 65.81194029850747, "grad_norm": 18.208921432495117, "learning_rate": 9.285714285714288e-06, "loss": 44.1797, "step": 2764 }, { "epoch": 65.83582089552239, "grad_norm": 32.15492248535156, "learning_rate": 9.282738095238097e-06, "loss": 43.1981, "step": 2765 }, { "epoch": 65.85970149253731, "grad_norm": 22.335176467895508, "learning_rate": 9.279761904761906e-06, "loss": 43.0925, "step": 2766 }, { "epoch": 65.88358208955223, "grad_norm": 26.412460327148438, "learning_rate": 9.276785714285715e-06, "loss": 43.3175, "step": 2767 }, { "epoch": 65.90746268656716, "grad_norm": 21.380569458007812, "learning_rate": 9.273809523809525e-06, "loss": 45.7111, "step": 2768 }, { "epoch": 65.9313432835821, "grad_norm": 26.568763732910156, "learning_rate": 9.270833333333334e-06, "loss": 44.4841, "step": 2769 }, { "epoch": 65.95522388059702, "grad_norm": 22.947973251342773, "learning_rate": 9.267857142857144e-06, "loss": 44.0597, "step": 2770 }, { "epoch": 65.97910447761194, "grad_norm": 28.732847213745117, "learning_rate": 9.264880952380954e-06, "loss": 43.9232, "step": 2771 }, { "epoch": 66.0, "grad_norm": 19.51029396057129, "learning_rate": 9.261904761904763e-06, "loss": 38.3696, "step": 2772 }, { "epoch": 66.02388059701492, "grad_norm": 26.772396087646484, "learning_rate": 9.258928571428572e-06, "loss": 44.29, "step": 2773 }, { "epoch": 66.04776119402985, "grad_norm": 28.08762550354004, "learning_rate": 9.255952380952381e-06, "loss": 43.5123, "step": 2774 }, { "epoch": 66.07164179104478, "grad_norm": 23.839458465576172, "learning_rate": 9.252976190476192e-06, "loss": 42.9248, "step": 2775 }, { "epoch": 66.0955223880597, "grad_norm": 34.98361587524414, "learning_rate": 9.250000000000001e-06, "loss": 44.5183, "step": 2776 }, { "epoch": 66.11940298507463, "grad_norm": 18.406028747558594, "learning_rate": 9.24702380952381e-06, "loss": 43.6267, "step": 2777 }, { "epoch": 66.14328358208955, "grad_norm": 24.17736053466797, "learning_rate": 9.244047619047621e-06, "loss": 43.9814, "step": 2778 }, { "epoch": 66.16716417910447, "grad_norm": 25.904033660888672, "learning_rate": 9.24107142857143e-06, "loss": 44.2089, "step": 2779 }, { "epoch": 66.1910447761194, "grad_norm": 18.518312454223633, "learning_rate": 9.238095238095239e-06, "loss": 43.8829, "step": 2780 }, { "epoch": 66.21492537313434, "grad_norm": 13.93060302734375, "learning_rate": 9.235119047619048e-06, "loss": 43.0088, "step": 2781 }, { "epoch": 66.23880597014926, "grad_norm": 18.91266632080078, "learning_rate": 9.232142857142859e-06, "loss": 43.9835, "step": 2782 }, { "epoch": 66.26268656716418, "grad_norm": 22.63692283630371, "learning_rate": 9.229166666666668e-06, "loss": 43.8378, "step": 2783 }, { "epoch": 66.2865671641791, "grad_norm": 19.935054779052734, "learning_rate": 9.226190476190477e-06, "loss": 43.5139, "step": 2784 }, { "epoch": 66.31044776119403, "grad_norm": 20.09627342224121, "learning_rate": 9.223214285714288e-06, "loss": 42.9882, "step": 2785 }, { "epoch": 66.33432835820895, "grad_norm": 16.47371482849121, "learning_rate": 9.220238095238097e-06, "loss": 44.0665, "step": 2786 }, { "epoch": 66.35820895522389, "grad_norm": 25.363866806030273, "learning_rate": 9.217261904761904e-06, "loss": 44.696, "step": 2787 }, { "epoch": 66.38208955223881, "grad_norm": 19.95291519165039, "learning_rate": 9.214285714285715e-06, "loss": 44.1116, "step": 2788 }, { "epoch": 66.40597014925373, "grad_norm": NaN, "learning_rate": 9.211309523809524e-06, "loss": 76.4785, "step": 2789 }, { "epoch": 66.42985074626866, "grad_norm": 19.490074157714844, "learning_rate": 9.211309523809524e-06, "loss": 44.0432, "step": 2790 }, { "epoch": 66.45373134328358, "grad_norm": 17.4990234375, "learning_rate": 9.208333333333333e-06, "loss": 43.2972, "step": 2791 }, { "epoch": 66.4776119402985, "grad_norm": 18.9461727142334, "learning_rate": 9.205357142857144e-06, "loss": 43.6698, "step": 2792 }, { "epoch": 66.50149253731344, "grad_norm": 27.035369873046875, "learning_rate": 9.202380952380953e-06, "loss": 43.0748, "step": 2793 }, { "epoch": 66.52537313432836, "grad_norm": 18.747451782226562, "learning_rate": 9.199404761904762e-06, "loss": 43.4684, "step": 2794 }, { "epoch": 66.54925373134328, "grad_norm": 22.31947135925293, "learning_rate": 9.196428571428571e-06, "loss": 43.224, "step": 2795 }, { "epoch": 66.57313432835821, "grad_norm": 20.444355010986328, "learning_rate": 9.193452380952382e-06, "loss": 44.7154, "step": 2796 }, { "epoch": 66.59701492537313, "grad_norm": 20.574586868286133, "learning_rate": 9.19047619047619e-06, "loss": 42.8251, "step": 2797 }, { "epoch": 66.62089552238805, "grad_norm": 21.91870880126953, "learning_rate": 9.1875e-06, "loss": 42.1616, "step": 2798 }, { "epoch": 66.64477611940299, "grad_norm": 20.03777503967285, "learning_rate": 9.18452380952381e-06, "loss": 43.9713, "step": 2799 }, { "epoch": 66.66865671641791, "grad_norm": 25.128442764282227, "learning_rate": 9.18154761904762e-06, "loss": 43.1631, "step": 2800 }, { "epoch": 66.69253731343284, "grad_norm": 21.742931365966797, "learning_rate": 9.178571428571429e-06, "loss": 43.8442, "step": 2801 }, { "epoch": 66.71641791044776, "grad_norm": 25.366992950439453, "learning_rate": 9.17559523809524e-06, "loss": 42.6068, "step": 2802 }, { "epoch": 66.74029850746268, "grad_norm": 22.109886169433594, "learning_rate": 9.172619047619048e-06, "loss": 43.0879, "step": 2803 }, { "epoch": 66.7641791044776, "grad_norm": 26.36429786682129, "learning_rate": 9.169642857142857e-06, "loss": 43.9465, "step": 2804 }, { "epoch": 66.78805970149254, "grad_norm": 20.30027198791504, "learning_rate": 9.166666666666666e-06, "loss": 44.1096, "step": 2805 }, { "epoch": 66.81194029850747, "grad_norm": 25.123811721801758, "learning_rate": 9.163690476190477e-06, "loss": 44.2981, "step": 2806 }, { "epoch": 66.83582089552239, "grad_norm": 23.127016067504883, "learning_rate": 9.160714285714286e-06, "loss": 42.5751, "step": 2807 }, { "epoch": 66.85970149253731, "grad_norm": NaN, "learning_rate": 9.157738095238095e-06, "loss": 66.1901, "step": 2808 }, { "epoch": 66.88358208955223, "grad_norm": 26.10099220275879, "learning_rate": 9.157738095238095e-06, "loss": 44.763, "step": 2809 }, { "epoch": 66.90746268656716, "grad_norm": 23.104337692260742, "learning_rate": 9.154761904761906e-06, "loss": 43.0964, "step": 2810 }, { "epoch": 66.9313432835821, "grad_norm": 25.94508934020996, "learning_rate": 9.151785714285715e-06, "loss": 44.2004, "step": 2811 }, { "epoch": 66.95522388059702, "grad_norm": 19.599328994750977, "learning_rate": 9.148809523809524e-06, "loss": 43.9027, "step": 2812 }, { "epoch": 66.97910447761194, "grad_norm": NaN, "learning_rate": 9.145833333333333e-06, "loss": 53.695, "step": 2813 }, { "epoch": 67.0, "grad_norm": 26.630434036254883, "learning_rate": 9.145833333333333e-06, "loss": 39.2172, "step": 2814 }, { "epoch": 67.02388059701492, "grad_norm": 20.954557418823242, "learning_rate": 9.142857142857144e-06, "loss": 45.022, "step": 2815 }, { "epoch": 67.04776119402985, "grad_norm": 34.23554611206055, "learning_rate": 9.139880952380953e-06, "loss": 44.5962, "step": 2816 }, { "epoch": 67.07164179104478, "grad_norm": 23.212488174438477, "learning_rate": 9.136904761904762e-06, "loss": 43.3898, "step": 2817 }, { "epoch": 67.0955223880597, "grad_norm": 28.811594009399414, "learning_rate": 9.133928571428573e-06, "loss": 43.0525, "step": 2818 }, { "epoch": 67.11940298507463, "grad_norm": 25.612319946289062, "learning_rate": 9.130952380952382e-06, "loss": 45.0229, "step": 2819 }, { "epoch": 67.14328358208955, "grad_norm": 19.928409576416016, "learning_rate": 9.12797619047619e-06, "loss": 42.2313, "step": 2820 }, { "epoch": 67.16716417910447, "grad_norm": 21.425382614135742, "learning_rate": 9.125e-06, "loss": 43.8085, "step": 2821 }, { "epoch": 67.1910447761194, "grad_norm": 24.726892471313477, "learning_rate": 9.12202380952381e-06, "loss": 42.5952, "step": 2822 }, { "epoch": 67.21492537313434, "grad_norm": 21.010210037231445, "learning_rate": 9.11904761904762e-06, "loss": 44.5508, "step": 2823 }, { "epoch": 67.23880597014926, "grad_norm": 17.506437301635742, "learning_rate": 9.116071428571429e-06, "loss": 42.7668, "step": 2824 }, { "epoch": 67.26268656716418, "grad_norm": 20.494918823242188, "learning_rate": 9.11309523809524e-06, "loss": 42.8061, "step": 2825 }, { "epoch": 67.2865671641791, "grad_norm": 20.985994338989258, "learning_rate": 9.110119047619049e-06, "loss": 44.8666, "step": 2826 }, { "epoch": 67.31044776119403, "grad_norm": 22.91364097595215, "learning_rate": 9.107142857142858e-06, "loss": 44.1208, "step": 2827 }, { "epoch": 67.33432835820895, "grad_norm": 19.81299591064453, "learning_rate": 9.104166666666667e-06, "loss": 43.939, "step": 2828 }, { "epoch": 67.35820895522389, "grad_norm": 23.234989166259766, "learning_rate": 9.101190476190477e-06, "loss": 42.0411, "step": 2829 }, { "epoch": 67.38208955223881, "grad_norm": 22.17540168762207, "learning_rate": 9.098214285714286e-06, "loss": 43.5693, "step": 2830 }, { "epoch": 67.40597014925373, "grad_norm": 21.292728424072266, "learning_rate": 9.095238095238095e-06, "loss": 44.0742, "step": 2831 }, { "epoch": 67.42985074626866, "grad_norm": 28.952625274658203, "learning_rate": 9.092261904761906e-06, "loss": 42.8393, "step": 2832 }, { "epoch": 67.45373134328358, "grad_norm": 19.387126922607422, "learning_rate": 9.089285714285715e-06, "loss": 41.7117, "step": 2833 }, { "epoch": 67.4776119402985, "grad_norm": 23.430946350097656, "learning_rate": 9.086309523809524e-06, "loss": 42.7006, "step": 2834 }, { "epoch": 67.50149253731344, "grad_norm": 28.108659744262695, "learning_rate": 9.083333333333333e-06, "loss": 45.417, "step": 2835 }, { "epoch": 67.52537313432836, "grad_norm": 22.115737915039062, "learning_rate": 9.080357142857144e-06, "loss": 44.4444, "step": 2836 }, { "epoch": 67.54925373134328, "grad_norm": 29.82137107849121, "learning_rate": 9.077380952380953e-06, "loss": 43.4888, "step": 2837 }, { "epoch": 67.57313432835821, "grad_norm": 25.010299682617188, "learning_rate": 9.074404761904762e-06, "loss": 43.9609, "step": 2838 }, { "epoch": 67.59701492537313, "grad_norm": 21.027952194213867, "learning_rate": 9.071428571428573e-06, "loss": 44.2489, "step": 2839 }, { "epoch": 67.62089552238805, "grad_norm": 27.009239196777344, "learning_rate": 9.068452380952382e-06, "loss": 43.6564, "step": 2840 }, { "epoch": 67.64477611940299, "grad_norm": 19.743545532226562, "learning_rate": 9.065476190476191e-06, "loss": 43.9997, "step": 2841 }, { "epoch": 67.66865671641791, "grad_norm": 28.90030288696289, "learning_rate": 9.0625e-06, "loss": 42.6926, "step": 2842 }, { "epoch": 67.69253731343284, "grad_norm": 25.418079376220703, "learning_rate": 9.05952380952381e-06, "loss": 43.2036, "step": 2843 }, { "epoch": 67.71641791044776, "grad_norm": 18.400348663330078, "learning_rate": 9.05654761904762e-06, "loss": 44.4565, "step": 2844 }, { "epoch": 67.74029850746268, "grad_norm": 26.924072265625, "learning_rate": 9.053571428571429e-06, "loss": 44.4893, "step": 2845 }, { "epoch": 67.7641791044776, "grad_norm": 25.352108001708984, "learning_rate": 9.05059523809524e-06, "loss": 43.0946, "step": 2846 }, { "epoch": 67.78805970149254, "grad_norm": 19.23507308959961, "learning_rate": 9.047619047619049e-06, "loss": 43.0507, "step": 2847 }, { "epoch": 67.81194029850747, "grad_norm": 28.143768310546875, "learning_rate": 9.044642857142858e-06, "loss": 43.2408, "step": 2848 }, { "epoch": 67.83582089552239, "grad_norm": 20.298084259033203, "learning_rate": 9.041666666666667e-06, "loss": 44.288, "step": 2849 }, { "epoch": 67.85970149253731, "grad_norm": 21.65508460998535, "learning_rate": 9.038690476190478e-06, "loss": 43.4648, "step": 2850 }, { "epoch": 67.88358208955223, "grad_norm": 23.92845916748047, "learning_rate": 9.035714285714287e-06, "loss": 42.1811, "step": 2851 }, { "epoch": 67.90746268656716, "grad_norm": 20.545076370239258, "learning_rate": 9.032738095238096e-06, "loss": 42.9022, "step": 2852 }, { "epoch": 67.9313432835821, "grad_norm": 32.36678695678711, "learning_rate": 9.029761904761906e-06, "loss": 43.4491, "step": 2853 }, { "epoch": 67.95522388059702, "grad_norm": 23.14188003540039, "learning_rate": 9.026785714285715e-06, "loss": 44.4911, "step": 2854 }, { "epoch": 67.97910447761194, "grad_norm": 31.488239288330078, "learning_rate": 9.023809523809524e-06, "loss": 44.5784, "step": 2855 }, { "epoch": 68.0, "grad_norm": 24.96849250793457, "learning_rate": 9.020833333333334e-06, "loss": 38.8154, "step": 2856 }, { "epoch": 68.02388059701492, "grad_norm": 26.00245475769043, "learning_rate": 9.017857142857144e-06, "loss": 43.6339, "step": 2857 }, { "epoch": 68.04776119402985, "grad_norm": 33.977596282958984, "learning_rate": 9.014880952380953e-06, "loss": 43.7634, "step": 2858 }, { "epoch": 68.07164179104478, "grad_norm": 21.762340545654297, "learning_rate": 9.011904761904762e-06, "loss": 43.8865, "step": 2859 }, { "epoch": 68.0955223880597, "grad_norm": 38.268455505371094, "learning_rate": 9.008928571428573e-06, "loss": 43.8947, "step": 2860 }, { "epoch": 68.11940298507463, "grad_norm": 26.789215087890625, "learning_rate": 9.005952380952382e-06, "loss": 42.0072, "step": 2861 }, { "epoch": 68.14328358208955, "grad_norm": 44.15632629394531, "learning_rate": 9.002976190476191e-06, "loss": 43.1045, "step": 2862 }, { "epoch": 68.16716417910447, "grad_norm": 36.71260070800781, "learning_rate": 9e-06, "loss": 43.7232, "step": 2863 }, { "epoch": 68.1910447761194, "grad_norm": 38.94734191894531, "learning_rate": 8.997023809523811e-06, "loss": 43.4934, "step": 2864 }, { "epoch": 68.21492537313434, "grad_norm": 38.295501708984375, "learning_rate": 8.99404761904762e-06, "loss": 43.3372, "step": 2865 }, { "epoch": 68.23880597014926, "grad_norm": 30.496740341186523, "learning_rate": 8.991071428571429e-06, "loss": 43.4905, "step": 2866 }, { "epoch": 68.26268656716418, "grad_norm": 30.001113891601562, "learning_rate": 8.98809523809524e-06, "loss": 44.0486, "step": 2867 }, { "epoch": 68.2865671641791, "grad_norm": 39.6522216796875, "learning_rate": 8.985119047619049e-06, "loss": 43.4828, "step": 2868 }, { "epoch": 68.31044776119403, "grad_norm": 31.28143310546875, "learning_rate": 8.982142857142858e-06, "loss": 43.7613, "step": 2869 }, { "epoch": 68.33432835820895, "grad_norm": 31.950016021728516, "learning_rate": 8.979166666666667e-06, "loss": 43.4209, "step": 2870 }, { "epoch": 68.35820895522389, "grad_norm": 30.61543083190918, "learning_rate": 8.976190476190478e-06, "loss": 43.2437, "step": 2871 }, { "epoch": 68.38208955223881, "grad_norm": 39.3588752746582, "learning_rate": 8.973214285714287e-06, "loss": 43.1893, "step": 2872 }, { "epoch": 68.40597014925373, "grad_norm": 29.70042610168457, "learning_rate": 8.970238095238096e-06, "loss": 42.1193, "step": 2873 }, { "epoch": 68.42985074626866, "grad_norm": 40.6136474609375, "learning_rate": 8.967261904761907e-06, "loss": 41.7532, "step": 2874 }, { "epoch": 68.45373134328358, "grad_norm": 36.44509506225586, "learning_rate": 8.964285714285716e-06, "loss": 44.5191, "step": 2875 }, { "epoch": 68.4776119402985, "grad_norm": NaN, "learning_rate": 8.961309523809525e-06, "loss": 70.4286, "step": 2876 }, { "epoch": 68.50149253731344, "grad_norm": 27.74057960510254, "learning_rate": 8.961309523809525e-06, "loss": 43.254, "step": 2877 }, { "epoch": 68.52537313432836, "grad_norm": 29.346860885620117, "learning_rate": 8.958333333333334e-06, "loss": 43.5863, "step": 2878 }, { "epoch": 68.54925373134328, "grad_norm": 36.642398834228516, "learning_rate": 8.955357142857144e-06, "loss": 43.3733, "step": 2879 }, { "epoch": 68.57313432835821, "grad_norm": 33.670162200927734, "learning_rate": 8.952380952380953e-06, "loss": 43.7232, "step": 2880 }, { "epoch": 68.59701492537313, "grad_norm": 33.738712310791016, "learning_rate": 8.949404761904763e-06, "loss": 42.704, "step": 2881 }, { "epoch": 68.62089552238805, "grad_norm": 31.452713012695312, "learning_rate": 8.946428571428573e-06, "loss": 43.8197, "step": 2882 }, { "epoch": 68.64477611940299, "grad_norm": 35.86618423461914, "learning_rate": 8.943452380952382e-06, "loss": 44.9871, "step": 2883 }, { "epoch": 68.66865671641791, "grad_norm": 30.94584083557129, "learning_rate": 8.940476190476191e-06, "loss": 43.8781, "step": 2884 }, { "epoch": 68.69253731343284, "grad_norm": 30.81380271911621, "learning_rate": 8.9375e-06, "loss": 44.2209, "step": 2885 }, { "epoch": 68.71641791044776, "grad_norm": 28.51966094970703, "learning_rate": 8.934523809523811e-06, "loss": 44.16, "step": 2886 }, { "epoch": 68.74029850746268, "grad_norm": NaN, "learning_rate": 8.93154761904762e-06, "loss": 77.0854, "step": 2887 }, { "epoch": 68.7641791044776, "grad_norm": 34.56461715698242, "learning_rate": 8.93154761904762e-06, "loss": 42.7519, "step": 2888 }, { "epoch": 68.78805970149254, "grad_norm": 27.781518936157227, "learning_rate": 8.92857142857143e-06, "loss": 44.0645, "step": 2889 }, { "epoch": 68.81194029850747, "grad_norm": 33.2479133605957, "learning_rate": 8.92559523809524e-06, "loss": 44.109, "step": 2890 }, { "epoch": 68.83582089552239, "grad_norm": 30.329626083374023, "learning_rate": 8.922619047619049e-06, "loss": 42.8678, "step": 2891 }, { "epoch": 68.85970149253731, "grad_norm": 32.120269775390625, "learning_rate": 8.919642857142858e-06, "loss": 44.2325, "step": 2892 }, { "epoch": 68.88358208955223, "grad_norm": 27.283164978027344, "learning_rate": 8.916666666666667e-06, "loss": 43.7788, "step": 2893 }, { "epoch": 68.90746268656716, "grad_norm": 31.86570930480957, "learning_rate": 8.913690476190478e-06, "loss": 44.3469, "step": 2894 }, { "epoch": 68.9313432835821, "grad_norm": 22.55097007751465, "learning_rate": 8.910714285714287e-06, "loss": 43.85, "step": 2895 }, { "epoch": 68.95522388059702, "grad_norm": 34.648773193359375, "learning_rate": 8.907738095238096e-06, "loss": 44.526, "step": 2896 }, { "epoch": 68.97910447761194, "grad_norm": 26.3565731048584, "learning_rate": 8.904761904761905e-06, "loss": 44.2, "step": 2897 }, { "epoch": 69.0, "grad_norm": 34.48598098754883, "learning_rate": 8.901785714285714e-06, "loss": 37.5511, "step": 2898 }, { "epoch": 69.02388059701492, "grad_norm": 36.6775016784668, "learning_rate": 8.898809523809525e-06, "loss": 42.6231, "step": 2899 }, { "epoch": 69.04776119402985, "grad_norm": 33.529296875, "learning_rate": 8.895833333333334e-06, "loss": 43.2504, "step": 2900 }, { "epoch": 69.07164179104478, "grad_norm": 31.762542724609375, "learning_rate": 8.892857142857143e-06, "loss": 43.752, "step": 2901 }, { "epoch": 69.0955223880597, "grad_norm": 28.147245407104492, "learning_rate": 8.889880952380952e-06, "loss": 43.8319, "step": 2902 }, { "epoch": 69.11940298507463, "grad_norm": 27.896669387817383, "learning_rate": 8.886904761904763e-06, "loss": 42.1947, "step": 2903 }, { "epoch": 69.14328358208955, "grad_norm": 28.625850677490234, "learning_rate": 8.883928571428572e-06, "loss": 43.3085, "step": 2904 }, { "epoch": 69.16716417910447, "grad_norm": 28.545974731445312, "learning_rate": 8.88095238095238e-06, "loss": 42.9532, "step": 2905 }, { "epoch": 69.1910447761194, "grad_norm": 34.869781494140625, "learning_rate": 8.877976190476192e-06, "loss": 44.2308, "step": 2906 }, { "epoch": 69.21492537313434, "grad_norm": 30.4566650390625, "learning_rate": 8.875e-06, "loss": 43.3219, "step": 2907 }, { "epoch": 69.23880597014926, "grad_norm": 29.15296745300293, "learning_rate": 8.87202380952381e-06, "loss": 44.0736, "step": 2908 }, { "epoch": 69.26268656716418, "grad_norm": 29.97230339050293, "learning_rate": 8.869047619047619e-06, "loss": 43.3511, "step": 2909 }, { "epoch": 69.2865671641791, "grad_norm": 30.087413787841797, "learning_rate": 8.86607142857143e-06, "loss": 43.8738, "step": 2910 }, { "epoch": 69.31044776119403, "grad_norm": 23.660356521606445, "learning_rate": 8.863095238095238e-06, "loss": 43.5448, "step": 2911 }, { "epoch": 69.33432835820895, "grad_norm": 28.7690372467041, "learning_rate": 8.860119047619048e-06, "loss": 42.9617, "step": 2912 }, { "epoch": 69.35820895522389, "grad_norm": 25.60896110534668, "learning_rate": 8.857142857142858e-06, "loss": 45.3785, "step": 2913 }, { "epoch": 69.38208955223881, "grad_norm": 31.27063751220703, "learning_rate": 8.854166666666667e-06, "loss": 43.376, "step": 2914 }, { "epoch": 69.40597014925373, "grad_norm": 18.817829132080078, "learning_rate": 8.851190476190476e-06, "loss": 44.0999, "step": 2915 }, { "epoch": 69.42985074626866, "grad_norm": 36.125919342041016, "learning_rate": 8.848214285714285e-06, "loss": 43.8083, "step": 2916 }, { "epoch": 69.45373134328358, "grad_norm": 25.558866500854492, "learning_rate": 8.845238095238096e-06, "loss": 45.1369, "step": 2917 }, { "epoch": 69.4776119402985, "grad_norm": 38.35983657836914, "learning_rate": 8.842261904761905e-06, "loss": 43.587, "step": 2918 }, { "epoch": 69.50149253731344, "grad_norm": 30.0064754486084, "learning_rate": 8.839285714285714e-06, "loss": 43.1645, "step": 2919 }, { "epoch": 69.52537313432836, "grad_norm": 31.177242279052734, "learning_rate": 8.836309523809525e-06, "loss": 42.2305, "step": 2920 }, { "epoch": 69.54925373134328, "grad_norm": 29.04176139831543, "learning_rate": 8.833333333333334e-06, "loss": 45.1403, "step": 2921 }, { "epoch": 69.57313432835821, "grad_norm": 35.95783233642578, "learning_rate": 8.830357142857143e-06, "loss": 43.4081, "step": 2922 }, { "epoch": 69.59701492537313, "grad_norm": 27.838382720947266, "learning_rate": 8.827380952380952e-06, "loss": 44.7195, "step": 2923 }, { "epoch": 69.62089552238805, "grad_norm": 30.860624313354492, "learning_rate": 8.824404761904763e-06, "loss": 42.7175, "step": 2924 }, { "epoch": 69.64477611940299, "grad_norm": 21.701316833496094, "learning_rate": 8.821428571428572e-06, "loss": 43.0401, "step": 2925 }, { "epoch": 69.66865671641791, "grad_norm": 27.270732879638672, "learning_rate": 8.818452380952381e-06, "loss": 43.686, "step": 2926 }, { "epoch": 69.69253731343284, "grad_norm": 25.814538955688477, "learning_rate": 8.815476190476192e-06, "loss": 44.3424, "step": 2927 }, { "epoch": 69.71641791044776, "grad_norm": 26.155197143554688, "learning_rate": 8.8125e-06, "loss": 43.6455, "step": 2928 }, { "epoch": 69.74029850746268, "grad_norm": 20.438846588134766, "learning_rate": 8.80952380952381e-06, "loss": 44.3784, "step": 2929 }, { "epoch": 69.7641791044776, "grad_norm": 26.45317268371582, "learning_rate": 8.806547619047619e-06, "loss": 42.6501, "step": 2930 }, { "epoch": 69.78805970149254, "grad_norm": 22.06026840209961, "learning_rate": 8.80357142857143e-06, "loss": 42.4144, "step": 2931 }, { "epoch": 69.81194029850747, "grad_norm": 24.45191764831543, "learning_rate": 8.800595238095239e-06, "loss": 43.7415, "step": 2932 }, { "epoch": 69.83582089552239, "grad_norm": 26.77782440185547, "learning_rate": 8.797619047619048e-06, "loss": 43.2565, "step": 2933 }, { "epoch": 69.85970149253731, "grad_norm": 22.350242614746094, "learning_rate": 8.794642857142858e-06, "loss": 43.5869, "step": 2934 }, { "epoch": 69.88358208955223, "grad_norm": 23.063016891479492, "learning_rate": 8.791666666666667e-06, "loss": 43.3821, "step": 2935 }, { "epoch": 69.90746268656716, "grad_norm": 18.864139556884766, "learning_rate": 8.788690476190477e-06, "loss": 42.8421, "step": 2936 }, { "epoch": 69.9313432835821, "grad_norm": 19.763843536376953, "learning_rate": 8.785714285714286e-06, "loss": 43.3783, "step": 2937 }, { "epoch": 69.95522388059702, "grad_norm": 19.347801208496094, "learning_rate": 8.782738095238096e-06, "loss": 42.8249, "step": 2938 }, { "epoch": 69.97910447761194, "grad_norm": 16.3013858795166, "learning_rate": 8.779761904761905e-06, "loss": 42.9306, "step": 2939 }, { "epoch": 70.0, "grad_norm": 15.056166648864746, "learning_rate": 8.776785714285714e-06, "loss": 38.2541, "step": 2940 }, { "epoch": 70.02388059701492, "grad_norm": 14.691337585449219, "learning_rate": 8.773809523809525e-06, "loss": 42.8378, "step": 2941 }, { "epoch": 70.04776119402985, "grad_norm": 24.51978874206543, "learning_rate": 8.770833333333334e-06, "loss": 43.2568, "step": 2942 }, { "epoch": 70.07164179104478, "grad_norm": 16.552833557128906, "learning_rate": 8.767857142857143e-06, "loss": 44.9251, "step": 2943 }, { "epoch": 70.0955223880597, "grad_norm": 26.28189468383789, "learning_rate": 8.764880952380952e-06, "loss": 43.8141, "step": 2944 }, { "epoch": 70.11940298507463, "grad_norm": 21.29142951965332, "learning_rate": 8.761904761904763e-06, "loss": 43.8724, "step": 2945 }, { "epoch": 70.14328358208955, "grad_norm": 23.159542083740234, "learning_rate": 8.758928571428572e-06, "loss": 44.4016, "step": 2946 }, { "epoch": 70.16716417910447, "grad_norm": 21.08184051513672, "learning_rate": 8.755952380952381e-06, "loss": 44.1115, "step": 2947 }, { "epoch": 70.1910447761194, "grad_norm": 18.838504791259766, "learning_rate": 8.752976190476192e-06, "loss": 43.2276, "step": 2948 }, { "epoch": 70.21492537313434, "grad_norm": 21.613079071044922, "learning_rate": 8.750000000000001e-06, "loss": 42.679, "step": 2949 }, { "epoch": 70.23880597014926, "grad_norm": 21.29805564880371, "learning_rate": 8.74702380952381e-06, "loss": 43.1822, "step": 2950 }, { "epoch": 70.26268656716418, "grad_norm": NaN, "learning_rate": 8.744047619047619e-06, "loss": 43.3802, "step": 2951 }, { "epoch": 70.2865671641791, "grad_norm": 22.393659591674805, "learning_rate": 8.744047619047619e-06, "loss": 43.3569, "step": 2952 }, { "epoch": 70.31044776119403, "grad_norm": 17.94029998779297, "learning_rate": 8.74107142857143e-06, "loss": 42.4398, "step": 2953 }, { "epoch": 70.33432835820895, "grad_norm": 17.575550079345703, "learning_rate": 8.738095238095239e-06, "loss": 42.3488, "step": 2954 }, { "epoch": 70.35820895522389, "grad_norm": 18.268203735351562, "learning_rate": 8.735119047619048e-06, "loss": 42.6199, "step": 2955 }, { "epoch": 70.38208955223881, "grad_norm": 25.415603637695312, "learning_rate": 8.732142857142859e-06, "loss": 43.803, "step": 2956 }, { "epoch": 70.40597014925373, "grad_norm": 23.37176513671875, "learning_rate": 8.729166666666668e-06, "loss": 44.5072, "step": 2957 }, { "epoch": 70.42985074626866, "grad_norm": 24.91670036315918, "learning_rate": 8.726190476190477e-06, "loss": 44.1411, "step": 2958 }, { "epoch": 70.45373134328358, "grad_norm": 20.50780487060547, "learning_rate": 8.723214285714286e-06, "loss": 45.4114, "step": 2959 }, { "epoch": 70.4776119402985, "grad_norm": 21.885364532470703, "learning_rate": 8.720238095238096e-06, "loss": 43.1786, "step": 2960 }, { "epoch": 70.50149253731344, "grad_norm": 18.620540618896484, "learning_rate": 8.717261904761906e-06, "loss": 42.5272, "step": 2961 }, { "epoch": 70.52537313432836, "grad_norm": 27.28016471862793, "learning_rate": 8.714285714285715e-06, "loss": 44.0531, "step": 2962 }, { "epoch": 70.54925373134328, "grad_norm": 22.124799728393555, "learning_rate": 8.711309523809525e-06, "loss": 43.445, "step": 2963 }, { "epoch": 70.57313432835821, "grad_norm": 25.905492782592773, "learning_rate": 8.708333333333334e-06, "loss": 43.619, "step": 2964 }, { "epoch": 70.59701492537313, "grad_norm": 23.890172958374023, "learning_rate": 8.705357142857143e-06, "loss": 43.1365, "step": 2965 }, { "epoch": 70.62089552238805, "grad_norm": 20.158838272094727, "learning_rate": 8.702380952380952e-06, "loss": 41.9394, "step": 2966 }, { "epoch": 70.64477611940299, "grad_norm": 24.878849029541016, "learning_rate": 8.699404761904763e-06, "loss": 43.7568, "step": 2967 }, { "epoch": 70.66865671641791, "grad_norm": 20.08368492126465, "learning_rate": 8.696428571428572e-06, "loss": 43.7444, "step": 2968 }, { "epoch": 70.69253731343284, "grad_norm": 24.59374237060547, "learning_rate": 8.693452380952381e-06, "loss": 43.6659, "step": 2969 }, { "epoch": 70.71641791044776, "grad_norm": 21.96346664428711, "learning_rate": 8.690476190476192e-06, "loss": 42.8718, "step": 2970 }, { "epoch": 70.74029850746268, "grad_norm": 20.61510467529297, "learning_rate": 8.687500000000001e-06, "loss": 43.8264, "step": 2971 }, { "epoch": 70.7641791044776, "grad_norm": 25.367786407470703, "learning_rate": 8.68452380952381e-06, "loss": 42.2802, "step": 2972 }, { "epoch": 70.78805970149254, "grad_norm": 21.911298751831055, "learning_rate": 8.68154761904762e-06, "loss": 44.4695, "step": 2973 }, { "epoch": 70.81194029850747, "grad_norm": 26.7462100982666, "learning_rate": 8.67857142857143e-06, "loss": 43.4564, "step": 2974 }, { "epoch": 70.83582089552239, "grad_norm": 23.370485305786133, "learning_rate": 8.675595238095239e-06, "loss": 45.0502, "step": 2975 }, { "epoch": 70.85970149253731, "grad_norm": 26.052675247192383, "learning_rate": 8.672619047619048e-06, "loss": 42.6782, "step": 2976 }, { "epoch": 70.88358208955223, "grad_norm": 21.637617111206055, "learning_rate": 8.669642857142859e-06, "loss": 44.426, "step": 2977 }, { "epoch": 70.90746268656716, "grad_norm": 26.575313568115234, "learning_rate": 8.666666666666668e-06, "loss": 43.6968, "step": 2978 }, { "epoch": 70.9313432835821, "grad_norm": 23.814599990844727, "learning_rate": 8.663690476190477e-06, "loss": 43.3269, "step": 2979 }, { "epoch": 70.95522388059702, "grad_norm": 21.367717742919922, "learning_rate": 8.660714285714286e-06, "loss": 43.1399, "step": 2980 }, { "epoch": 70.97910447761194, "grad_norm": 19.98285484313965, "learning_rate": 8.657738095238097e-06, "loss": 42.9342, "step": 2981 }, { "epoch": 71.0, "grad_norm": 22.52842140197754, "learning_rate": 8.654761904761906e-06, "loss": 36.6415, "step": 2982 }, { "epoch": 71.02388059701492, "grad_norm": 22.04327392578125, "learning_rate": 8.651785714285715e-06, "loss": 43.0825, "step": 2983 }, { "epoch": 71.04776119402985, "grad_norm": 21.24346351623535, "learning_rate": 8.648809523809526e-06, "loss": 43.511, "step": 2984 }, { "epoch": 71.07164179104478, "grad_norm": 23.4123592376709, "learning_rate": 8.645833333333335e-06, "loss": 43.1464, "step": 2985 }, { "epoch": 71.0955223880597, "grad_norm": 23.918460845947266, "learning_rate": 8.642857142857144e-06, "loss": 44.1223, "step": 2986 }, { "epoch": 71.11940298507463, "grad_norm": 16.164955139160156, "learning_rate": 8.639880952380953e-06, "loss": 43.2759, "step": 2987 }, { "epoch": 71.14328358208955, "grad_norm": 22.15060043334961, "learning_rate": 8.636904761904763e-06, "loss": 43.1227, "step": 2988 }, { "epoch": 71.16716417910447, "grad_norm": 19.598203659057617, "learning_rate": 8.633928571428572e-06, "loss": 41.9802, "step": 2989 }, { "epoch": 71.1910447761194, "grad_norm": 16.25682830810547, "learning_rate": 8.630952380952381e-06, "loss": 42.1285, "step": 2990 }, { "epoch": 71.21492537313434, "grad_norm": 20.54530143737793, "learning_rate": 8.627976190476192e-06, "loss": 43.4601, "step": 2991 }, { "epoch": 71.23880597014926, "grad_norm": 25.911041259765625, "learning_rate": 8.625000000000001e-06, "loss": 42.6006, "step": 2992 }, { "epoch": 71.26268656716418, "grad_norm": 16.15741539001465, "learning_rate": 8.62202380952381e-06, "loss": 42.6621, "step": 2993 }, { "epoch": 71.2865671641791, "grad_norm": 30.334243774414062, "learning_rate": 8.61904761904762e-06, "loss": 43.4206, "step": 2994 }, { "epoch": 71.31044776119403, "grad_norm": 26.023889541625977, "learning_rate": 8.61607142857143e-06, "loss": 43.5777, "step": 2995 }, { "epoch": 71.33432835820895, "grad_norm": 21.3012638092041, "learning_rate": 8.61309523809524e-06, "loss": 42.4823, "step": 2996 }, { "epoch": 71.35820895522389, "grad_norm": 25.109596252441406, "learning_rate": 8.610119047619048e-06, "loss": 44.2666, "step": 2997 }, { "epoch": 71.38208955223881, "grad_norm": 22.26563835144043, "learning_rate": 8.607142857142859e-06, "loss": 40.9261, "step": 2998 }, { "epoch": 71.40597014925373, "grad_norm": 30.94297218322754, "learning_rate": 8.604166666666668e-06, "loss": 43.5651, "step": 2999 }, { "epoch": 71.42985074626866, "grad_norm": 24.670034408569336, "learning_rate": 8.601190476190477e-06, "loss": 43.6695, "step": 3000 }, { "epoch": 71.45373134328358, "grad_norm": 29.290430068969727, "learning_rate": 8.598214285714288e-06, "loss": 43.6725, "step": 3001 }, { "epoch": 71.4776119402985, "grad_norm": 23.058176040649414, "learning_rate": 8.595238095238097e-06, "loss": 43.695, "step": 3002 }, { "epoch": 71.50149253731344, "grad_norm": 21.41179084777832, "learning_rate": 8.592261904761904e-06, "loss": 43.1715, "step": 3003 }, { "epoch": 71.52537313432836, "grad_norm": 22.226594924926758, "learning_rate": 8.589285714285715e-06, "loss": 43.1411, "step": 3004 }, { "epoch": 71.54925373134328, "grad_norm": 19.892719268798828, "learning_rate": 8.586309523809524e-06, "loss": 44.4913, "step": 3005 }, { "epoch": 71.57313432835821, "grad_norm": 18.263708114624023, "learning_rate": 8.583333333333333e-06, "loss": 43.2348, "step": 3006 }, { "epoch": 71.59701492537313, "grad_norm": 22.065439224243164, "learning_rate": 8.580357142857144e-06, "loss": 44.3296, "step": 3007 }, { "epoch": 71.62089552238805, "grad_norm": 19.95087432861328, "learning_rate": 8.577380952380953e-06, "loss": 44.827, "step": 3008 }, { "epoch": 71.64477611940299, "grad_norm": 19.371231079101562, "learning_rate": 8.574404761904762e-06, "loss": 43.9034, "step": 3009 }, { "epoch": 71.66865671641791, "grad_norm": 22.265600204467773, "learning_rate": 8.571428571428571e-06, "loss": 42.659, "step": 3010 }, { "epoch": 71.69253731343284, "grad_norm": 18.449695587158203, "learning_rate": 8.568452380952382e-06, "loss": 43.674, "step": 3011 }, { "epoch": 71.71641791044776, "grad_norm": 25.14525604248047, "learning_rate": 8.56547619047619e-06, "loss": 43.5625, "step": 3012 }, { "epoch": 71.74029850746268, "grad_norm": 23.78099822998047, "learning_rate": 8.5625e-06, "loss": 44.3134, "step": 3013 }, { "epoch": 71.7641791044776, "grad_norm": 18.84084129333496, "learning_rate": 8.55952380952381e-06, "loss": 44.8436, "step": 3014 }, { "epoch": 71.78805970149254, "grad_norm": 28.59735107421875, "learning_rate": 8.55654761904762e-06, "loss": 43.3521, "step": 3015 }, { "epoch": 71.81194029850747, "grad_norm": 22.86484718322754, "learning_rate": 8.553571428571429e-06, "loss": 44.0742, "step": 3016 }, { "epoch": 71.83582089552239, "grad_norm": 17.34327507019043, "learning_rate": 8.550595238095238e-06, "loss": 43.5721, "step": 3017 }, { "epoch": 71.85970149253731, "grad_norm": 32.2520637512207, "learning_rate": 8.547619047619048e-06, "loss": 42.3465, "step": 3018 }, { "epoch": 71.88358208955223, "grad_norm": 23.380569458007812, "learning_rate": 8.544642857142857e-06, "loss": 43.2287, "step": 3019 }, { "epoch": 71.90746268656716, "grad_norm": 31.07112693786621, "learning_rate": 8.541666666666666e-06, "loss": 44.3177, "step": 3020 }, { "epoch": 71.9313432835821, "grad_norm": 24.860567092895508, "learning_rate": 8.538690476190477e-06, "loss": 43.6361, "step": 3021 }, { "epoch": 71.95522388059702, "grad_norm": 22.43517303466797, "learning_rate": 8.535714285714286e-06, "loss": 43.5824, "step": 3022 }, { "epoch": 71.97910447761194, "grad_norm": 27.975297927856445, "learning_rate": 8.532738095238095e-06, "loss": 43.4829, "step": 3023 }, { "epoch": 72.0, "grad_norm": 17.978660583496094, "learning_rate": 8.529761904761904e-06, "loss": 38.115, "step": 3024 }, { "epoch": 72.02388059701492, "grad_norm": 31.69437599182129, "learning_rate": 8.526785714285715e-06, "loss": 43.5237, "step": 3025 }, { "epoch": 72.04776119402985, "grad_norm": 27.577686309814453, "learning_rate": 8.523809523809524e-06, "loss": 43.1406, "step": 3026 }, { "epoch": 72.07164179104478, "grad_norm": 28.320255279541016, "learning_rate": 8.520833333333333e-06, "loss": 44.5784, "step": 3027 }, { "epoch": 72.0955223880597, "grad_norm": 26.59323501586914, "learning_rate": 8.517857142857144e-06, "loss": 42.2067, "step": 3028 }, { "epoch": 72.11940298507463, "grad_norm": 21.94460105895996, "learning_rate": 8.514880952380953e-06, "loss": 43.4262, "step": 3029 }, { "epoch": 72.14328358208955, "grad_norm": 23.56421661376953, "learning_rate": 8.511904761904762e-06, "loss": 41.1196, "step": 3030 }, { "epoch": 72.16716417910447, "grad_norm": 19.21329689025879, "learning_rate": 8.508928571428571e-06, "loss": 42.5441, "step": 3031 }, { "epoch": 72.1910447761194, "grad_norm": 23.377782821655273, "learning_rate": 8.505952380952382e-06, "loss": 43.0296, "step": 3032 }, { "epoch": 72.21492537313434, "grad_norm": 24.402435302734375, "learning_rate": 8.502976190476191e-06, "loss": 44.2474, "step": 3033 }, { "epoch": 72.23880597014926, "grad_norm": 18.61969566345215, "learning_rate": 8.5e-06, "loss": 43.6984, "step": 3034 }, { "epoch": 72.26268656716418, "grad_norm": 30.627338409423828, "learning_rate": 8.49702380952381e-06, "loss": 42.5441, "step": 3035 }, { "epoch": 72.2865671641791, "grad_norm": 26.115427017211914, "learning_rate": 8.49404761904762e-06, "loss": 41.8235, "step": 3036 }, { "epoch": 72.31044776119403, "grad_norm": 24.971904754638672, "learning_rate": 8.491071428571429e-06, "loss": 43.9344, "step": 3037 }, { "epoch": 72.33432835820895, "grad_norm": 26.42667007446289, "learning_rate": 8.488095238095238e-06, "loss": 43.2757, "step": 3038 }, { "epoch": 72.35820895522389, "grad_norm": 23.19200897216797, "learning_rate": 8.485119047619049e-06, "loss": 42.9536, "step": 3039 }, { "epoch": 72.38208955223881, "grad_norm": 31.263626098632812, "learning_rate": 8.482142857142858e-06, "loss": 42.8037, "step": 3040 }, { "epoch": 72.40597014925373, "grad_norm": 25.049270629882812, "learning_rate": 8.479166666666667e-06, "loss": 42.8005, "step": 3041 }, { "epoch": 72.42985074626866, "grad_norm": 20.71118927001953, "learning_rate": 8.476190476190477e-06, "loss": 43.106, "step": 3042 }, { "epoch": 72.45373134328358, "grad_norm": 22.156679153442383, "learning_rate": 8.473214285714286e-06, "loss": 42.6742, "step": 3043 }, { "epoch": 72.4776119402985, "grad_norm": 22.091957092285156, "learning_rate": 8.470238095238095e-06, "loss": 43.6855, "step": 3044 }, { "epoch": 72.50149253731344, "grad_norm": 21.12959861755371, "learning_rate": 8.467261904761905e-06, "loss": 42.9416, "step": 3045 }, { "epoch": 72.52537313432836, "grad_norm": 20.53251075744629, "learning_rate": 8.464285714285715e-06, "loss": 44.3919, "step": 3046 }, { "epoch": 72.54925373134328, "grad_norm": 19.188758850097656, "learning_rate": 8.461309523809524e-06, "loss": 43.3152, "step": 3047 }, { "epoch": 72.57313432835821, "grad_norm": 26.149826049804688, "learning_rate": 8.458333333333333e-06, "loss": 44.7382, "step": 3048 }, { "epoch": 72.59701492537313, "grad_norm": 18.40545082092285, "learning_rate": 8.455357142857144e-06, "loss": 44.0886, "step": 3049 }, { "epoch": 72.62089552238805, "grad_norm": 21.535911560058594, "learning_rate": 8.452380952380953e-06, "loss": 43.2625, "step": 3050 }, { "epoch": 72.64477611940299, "grad_norm": 17.798324584960938, "learning_rate": 8.449404761904762e-06, "loss": 43.5087, "step": 3051 }, { "epoch": 72.66865671641791, "grad_norm": 22.086271286010742, "learning_rate": 8.446428571428571e-06, "loss": 43.7427, "step": 3052 }, { "epoch": 72.69253731343284, "grad_norm": 20.795154571533203, "learning_rate": 8.443452380952382e-06, "loss": 43.6492, "step": 3053 }, { "epoch": 72.71641791044776, "grad_norm": 23.004671096801758, "learning_rate": 8.440476190476191e-06, "loss": 43.2841, "step": 3054 }, { "epoch": 72.74029850746268, "grad_norm": 19.808507919311523, "learning_rate": 8.4375e-06, "loss": 43.2447, "step": 3055 }, { "epoch": 72.7641791044776, "grad_norm": 25.06849479675293, "learning_rate": 8.434523809523811e-06, "loss": 42.7637, "step": 3056 }, { "epoch": 72.78805970149254, "grad_norm": 25.014245986938477, "learning_rate": 8.43154761904762e-06, "loss": 43.4822, "step": 3057 }, { "epoch": 72.81194029850747, "grad_norm": 22.324596405029297, "learning_rate": 8.428571428571429e-06, "loss": 43.3555, "step": 3058 }, { "epoch": 72.83582089552239, "grad_norm": 28.37264060974121, "learning_rate": 8.425595238095238e-06, "loss": 45.1914, "step": 3059 }, { "epoch": 72.85970149253731, "grad_norm": 20.218700408935547, "learning_rate": 8.422619047619049e-06, "loss": 43.7534, "step": 3060 }, { "epoch": 72.88358208955223, "grad_norm": 23.96106719970703, "learning_rate": 8.419642857142858e-06, "loss": 43.3726, "step": 3061 }, { "epoch": 72.90746268656716, "grad_norm": 24.620227813720703, "learning_rate": 8.416666666666667e-06, "loss": 43.2246, "step": 3062 }, { "epoch": 72.9313432835821, "grad_norm": 17.006282806396484, "learning_rate": 8.413690476190478e-06, "loss": 43.0239, "step": 3063 }, { "epoch": 72.95522388059702, "grad_norm": 32.321250915527344, "learning_rate": 8.410714285714287e-06, "loss": 43.8265, "step": 3064 }, { "epoch": 72.97910447761194, "grad_norm": 26.541305541992188, "learning_rate": 8.407738095238096e-06, "loss": 43.715, "step": 3065 }, { "epoch": 73.0, "grad_norm": 20.71360969543457, "learning_rate": 8.404761904761905e-06, "loss": 38.4916, "step": 3066 }, { "epoch": 73.02388059701492, "grad_norm": 25.500295639038086, "learning_rate": 8.401785714285715e-06, "loss": 43.3955, "step": 3067 }, { "epoch": 73.04776119402985, "grad_norm": 26.59987449645996, "learning_rate": 8.398809523809525e-06, "loss": 43.3811, "step": 3068 }, { "epoch": 73.07164179104478, "grad_norm": 22.731945037841797, "learning_rate": 8.395833333333334e-06, "loss": 43.2902, "step": 3069 }, { "epoch": 73.0955223880597, "grad_norm": 20.676626205444336, "learning_rate": 8.392857142857144e-06, "loss": 44.4288, "step": 3070 }, { "epoch": 73.11940298507463, "grad_norm": 24.257009506225586, "learning_rate": 8.389880952380953e-06, "loss": 42.6346, "step": 3071 }, { "epoch": 73.14328358208955, "grad_norm": 20.27753448486328, "learning_rate": 8.386904761904762e-06, "loss": 43.195, "step": 3072 }, { "epoch": 73.16716417910447, "grad_norm": 22.37655258178711, "learning_rate": 8.383928571428573e-06, "loss": 43.7297, "step": 3073 }, { "epoch": 73.1910447761194, "grad_norm": 22.078298568725586, "learning_rate": 8.380952380952382e-06, "loss": 45.0908, "step": 3074 }, { "epoch": 73.21492537313434, "grad_norm": 22.645662307739258, "learning_rate": 8.377976190476191e-06, "loss": 43.4577, "step": 3075 }, { "epoch": 73.23880597014926, "grad_norm": 18.159029006958008, "learning_rate": 8.375e-06, "loss": 42.8618, "step": 3076 }, { "epoch": 73.26268656716418, "grad_norm": 22.44676399230957, "learning_rate": 8.372023809523811e-06, "loss": 41.7892, "step": 3077 }, { "epoch": 73.2865671641791, "grad_norm": 21.480403900146484, "learning_rate": 8.36904761904762e-06, "loss": 44.0939, "step": 3078 }, { "epoch": 73.31044776119403, "grad_norm": 19.49287986755371, "learning_rate": 8.366071428571429e-06, "loss": 44.0851, "step": 3079 }, { "epoch": 73.33432835820895, "grad_norm": 18.453174591064453, "learning_rate": 8.36309523809524e-06, "loss": 42.5673, "step": 3080 }, { "epoch": 73.35820895522389, "grad_norm": NaN, "learning_rate": 8.360119047619049e-06, "loss": 71.053, "step": 3081 }, { "epoch": 73.38208955223881, "grad_norm": 20.119003295898438, "learning_rate": 8.360119047619049e-06, "loss": 42.981, "step": 3082 }, { "epoch": 73.40597014925373, "grad_norm": 18.897857666015625, "learning_rate": 8.357142857142858e-06, "loss": 42.5696, "step": 3083 }, { "epoch": 73.42985074626866, "grad_norm": 26.755035400390625, "learning_rate": 8.354166666666667e-06, "loss": 43.2951, "step": 3084 }, { "epoch": 73.45373134328358, "grad_norm": 19.104629516601562, "learning_rate": 8.351190476190478e-06, "loss": 42.5016, "step": 3085 }, { "epoch": 73.4776119402985, "grad_norm": 25.36631965637207, "learning_rate": 8.348214285714287e-06, "loss": 42.6552, "step": 3086 }, { "epoch": 73.50149253731344, "grad_norm": 27.23288345336914, "learning_rate": 8.345238095238096e-06, "loss": 42.6917, "step": 3087 }, { "epoch": 73.52537313432836, "grad_norm": 16.930316925048828, "learning_rate": 8.342261904761907e-06, "loss": 43.1315, "step": 3088 }, { "epoch": 73.54925373134328, "grad_norm": 26.30918312072754, "learning_rate": 8.339285714285716e-06, "loss": 42.7197, "step": 3089 }, { "epoch": 73.57313432835821, "grad_norm": 24.781511306762695, "learning_rate": 8.336309523809525e-06, "loss": 42.5099, "step": 3090 }, { "epoch": 73.59701492537313, "grad_norm": 19.516469955444336, "learning_rate": 8.333333333333334e-06, "loss": 43.0713, "step": 3091 }, { "epoch": 73.62089552238805, "grad_norm": 22.657184600830078, "learning_rate": 8.330357142857144e-06, "loss": 43.3808, "step": 3092 }, { "epoch": 73.64477611940299, "grad_norm": 18.468502044677734, "learning_rate": 8.327380952380954e-06, "loss": 43.7249, "step": 3093 }, { "epoch": 73.66865671641791, "grad_norm": 17.16704750061035, "learning_rate": 8.324404761904763e-06, "loss": 43.8457, "step": 3094 }, { "epoch": 73.69253731343284, "grad_norm": 21.254226684570312, "learning_rate": 8.321428571428573e-06, "loss": 43.5131, "step": 3095 }, { "epoch": 73.71641791044776, "grad_norm": 24.988006591796875, "learning_rate": 8.318452380952382e-06, "loss": 43.419, "step": 3096 }, { "epoch": 73.74029850746268, "grad_norm": 18.345117568969727, "learning_rate": 8.315476190476191e-06, "loss": 43.89, "step": 3097 }, { "epoch": 73.7641791044776, "grad_norm": 19.947589874267578, "learning_rate": 8.3125e-06, "loss": 41.9095, "step": 3098 }, { "epoch": 73.78805970149254, "grad_norm": 21.689882278442383, "learning_rate": 8.309523809523811e-06, "loss": 43.5629, "step": 3099 }, { "epoch": 73.81194029850747, "grad_norm": 18.021583557128906, "learning_rate": 8.30654761904762e-06, "loss": 45.2045, "step": 3100 }, { "epoch": 73.83582089552239, "grad_norm": 21.016939163208008, "learning_rate": 8.30357142857143e-06, "loss": 42.9508, "step": 3101 }, { "epoch": 73.85970149253731, "grad_norm": 19.921489715576172, "learning_rate": 8.30059523809524e-06, "loss": 45.0384, "step": 3102 }, { "epoch": 73.88358208955223, "grad_norm": 17.989734649658203, "learning_rate": 8.297619047619049e-06, "loss": 43.6752, "step": 3103 }, { "epoch": 73.90746268656716, "grad_norm": 19.126956939697266, "learning_rate": 8.294642857142858e-06, "loss": 42.4258, "step": 3104 }, { "epoch": 73.9313432835821, "grad_norm": 18.107421875, "learning_rate": 8.291666666666667e-06, "loss": 42.0089, "step": 3105 }, { "epoch": 73.95522388059702, "grad_norm": 22.599328994750977, "learning_rate": 8.288690476190478e-06, "loss": 43.1967, "step": 3106 }, { "epoch": 73.97910447761194, "grad_norm": 17.103744506835938, "learning_rate": 8.285714285714287e-06, "loss": 42.932, "step": 3107 }, { "epoch": 74.0, "grad_norm": 16.514545440673828, "learning_rate": 8.282738095238096e-06, "loss": 38.5601, "step": 3108 }, { "epoch": 74.02388059701492, "grad_norm": 19.938108444213867, "learning_rate": 8.279761904761905e-06, "loss": 43.1656, "step": 3109 }, { "epoch": 74.04776119402985, "grad_norm": 23.691556930541992, "learning_rate": 8.276785714285714e-06, "loss": 43.248, "step": 3110 }, { "epoch": 74.07164179104478, "grad_norm": 24.84130859375, "learning_rate": 8.273809523809523e-06, "loss": 43.0973, "step": 3111 }, { "epoch": 74.0955223880597, "grad_norm": 16.541378021240234, "learning_rate": 8.270833333333334e-06, "loss": 43.8453, "step": 3112 }, { "epoch": 74.11940298507463, "grad_norm": 34.161293029785156, "learning_rate": 8.267857142857143e-06, "loss": 41.7, "step": 3113 }, { "epoch": 74.14328358208955, "grad_norm": 26.104328155517578, "learning_rate": 8.264880952380952e-06, "loss": 43.6119, "step": 3114 }, { "epoch": 74.16716417910447, "grad_norm": 26.31689453125, "learning_rate": 8.261904761904763e-06, "loss": 41.5545, "step": 3115 }, { "epoch": 74.1910447761194, "grad_norm": 23.808761596679688, "learning_rate": 8.258928571428572e-06, "loss": 44.5862, "step": 3116 }, { "epoch": 74.21492537313434, "grad_norm": 24.158493041992188, "learning_rate": 8.25595238095238e-06, "loss": 42.9814, "step": 3117 }, { "epoch": 74.23880597014926, "grad_norm": 25.35089874267578, "learning_rate": 8.25297619047619e-06, "loss": 42.4484, "step": 3118 }, { "epoch": 74.26268656716418, "grad_norm": 24.48615264892578, "learning_rate": 8.25e-06, "loss": 42.7431, "step": 3119 }, { "epoch": 74.2865671641791, "grad_norm": 24.813716888427734, "learning_rate": 8.24702380952381e-06, "loss": 43.1515, "step": 3120 }, { "epoch": 74.31044776119403, "grad_norm": 18.43018341064453, "learning_rate": 8.244047619047619e-06, "loss": 43.5142, "step": 3121 }, { "epoch": 74.33432835820895, "grad_norm": 25.593732833862305, "learning_rate": 8.24107142857143e-06, "loss": 44.4342, "step": 3122 }, { "epoch": 74.35820895522389, "grad_norm": 21.224576950073242, "learning_rate": 8.238095238095239e-06, "loss": 44.1973, "step": 3123 }, { "epoch": 74.38208955223881, "grad_norm": 17.604145050048828, "learning_rate": 8.235119047619048e-06, "loss": 43.4662, "step": 3124 }, { "epoch": 74.40597014925373, "grad_norm": 30.535215377807617, "learning_rate": 8.232142857142857e-06, "loss": 42.8872, "step": 3125 }, { "epoch": 74.42985074626866, "grad_norm": 22.767736434936523, "learning_rate": 8.229166666666667e-06, "loss": 43.4187, "step": 3126 }, { "epoch": 74.45373134328358, "grad_norm": 33.97389221191406, "learning_rate": 8.226190476190476e-06, "loss": 43.8719, "step": 3127 }, { "epoch": 74.4776119402985, "grad_norm": 26.33451271057129, "learning_rate": 8.223214285714285e-06, "loss": 43.6458, "step": 3128 }, { "epoch": 74.50149253731344, "grad_norm": 35.393733978271484, "learning_rate": 8.220238095238096e-06, "loss": 44.2996, "step": 3129 }, { "epoch": 74.52537313432836, "grad_norm": 27.903955459594727, "learning_rate": 8.217261904761905e-06, "loss": 43.5218, "step": 3130 }, { "epoch": 74.54925373134328, "grad_norm": 27.946807861328125, "learning_rate": 8.214285714285714e-06, "loss": 43.8993, "step": 3131 }, { "epoch": 74.57313432835821, "grad_norm": 21.519737243652344, "learning_rate": 8.211309523809523e-06, "loss": 42.9369, "step": 3132 }, { "epoch": 74.59701492537313, "grad_norm": 27.311965942382812, "learning_rate": 8.208333333333334e-06, "loss": 43.4193, "step": 3133 }, { "epoch": 74.62089552238805, "grad_norm": 23.38337516784668, "learning_rate": 8.205357142857143e-06, "loss": 43.5582, "step": 3134 }, { "epoch": 74.64477611940299, "grad_norm": 19.415571212768555, "learning_rate": 8.202380952380952e-06, "loss": 42.8066, "step": 3135 }, { "epoch": 74.66865671641791, "grad_norm": 25.44513511657715, "learning_rate": 8.199404761904763e-06, "loss": 42.8859, "step": 3136 }, { "epoch": 74.69253731343284, "grad_norm": 23.1788330078125, "learning_rate": 8.196428571428572e-06, "loss": 42.1339, "step": 3137 }, { "epoch": 74.71641791044776, "grad_norm": 14.436179161071777, "learning_rate": 8.193452380952381e-06, "loss": 42.6687, "step": 3138 }, { "epoch": 74.74029850746268, "grad_norm": 30.928714752197266, "learning_rate": 8.190476190476192e-06, "loss": 44.5744, "step": 3139 }, { "epoch": 74.7641791044776, "grad_norm": 23.915878295898438, "learning_rate": 8.1875e-06, "loss": 44.3435, "step": 3140 }, { "epoch": 74.78805970149254, "grad_norm": 27.95979881286621, "learning_rate": 8.18452380952381e-06, "loss": 42.4667, "step": 3141 }, { "epoch": 74.81194029850747, "grad_norm": 22.4390811920166, "learning_rate": 8.181547619047619e-06, "loss": 42.6036, "step": 3142 }, { "epoch": 74.83582089552239, "grad_norm": 22.94829750061035, "learning_rate": 8.17857142857143e-06, "loss": 42.4304, "step": 3143 }, { "epoch": 74.85970149253731, "grad_norm": 20.711339950561523, "learning_rate": 8.175595238095239e-06, "loss": 43.1806, "step": 3144 }, { "epoch": 74.88358208955223, "grad_norm": 21.30629539489746, "learning_rate": 8.172619047619048e-06, "loss": 42.7325, "step": 3145 }, { "epoch": 74.90746268656716, "grad_norm": 20.381263732910156, "learning_rate": 8.169642857142858e-06, "loss": 43.0491, "step": 3146 }, { "epoch": 74.9313432835821, "grad_norm": 21.54926300048828, "learning_rate": 8.166666666666668e-06, "loss": 44.8298, "step": 3147 }, { "epoch": 74.95522388059702, "grad_norm": 15.518889427185059, "learning_rate": 8.163690476190477e-06, "loss": 42.6821, "step": 3148 }, { "epoch": 74.97910447761194, "grad_norm": 24.487192153930664, "learning_rate": 8.160714285714286e-06, "loss": 43.0891, "step": 3149 }, { "epoch": 75.0, "grad_norm": 15.607013702392578, "learning_rate": 8.157738095238096e-06, "loss": 37.212, "step": 3150 }, { "epoch": 75.02388059701492, "grad_norm": 31.642353057861328, "learning_rate": 8.154761904761905e-06, "loss": 43.9061, "step": 3151 }, { "epoch": 75.04776119402985, "grad_norm": 23.92624855041504, "learning_rate": 8.151785714285714e-06, "loss": 44.0244, "step": 3152 }, { "epoch": 75.07164179104478, "grad_norm": 23.756420135498047, "learning_rate": 8.148809523809525e-06, "loss": 44.7597, "step": 3153 }, { "epoch": 75.0955223880597, "grad_norm": 26.027414321899414, "learning_rate": 8.145833333333334e-06, "loss": 42.3933, "step": 3154 }, { "epoch": 75.11940298507463, "grad_norm": 18.252239227294922, "learning_rate": 8.142857142857143e-06, "loss": 43.1075, "step": 3155 }, { "epoch": 75.14328358208955, "grad_norm": 25.58303451538086, "learning_rate": 8.139880952380952e-06, "loss": 43.3715, "step": 3156 }, { "epoch": 75.16716417910447, "grad_norm": 24.198566436767578, "learning_rate": 8.136904761904763e-06, "loss": 42.042, "step": 3157 }, { "epoch": 75.1910447761194, "grad_norm": 21.632183074951172, "learning_rate": 8.133928571428572e-06, "loss": 42.4693, "step": 3158 }, { "epoch": 75.21492537313434, "grad_norm": 27.104801177978516, "learning_rate": 8.130952380952381e-06, "loss": 42.597, "step": 3159 }, { "epoch": 75.23880597014926, "grad_norm": 21.614917755126953, "learning_rate": 8.127976190476192e-06, "loss": 42.9729, "step": 3160 }, { "epoch": 75.26268656716418, "grad_norm": 27.62027359008789, "learning_rate": 8.125000000000001e-06, "loss": 43.3302, "step": 3161 }, { "epoch": 75.2865671641791, "grad_norm": 24.087974548339844, "learning_rate": 8.12202380952381e-06, "loss": 44.1364, "step": 3162 }, { "epoch": 75.31044776119403, "grad_norm": 21.590192794799805, "learning_rate": 8.119047619047619e-06, "loss": 42.7373, "step": 3163 }, { "epoch": 75.33432835820895, "grad_norm": 27.612075805664062, "learning_rate": 8.11607142857143e-06, "loss": 43.5758, "step": 3164 }, { "epoch": 75.35820895522389, "grad_norm": 18.209209442138672, "learning_rate": 8.113095238095239e-06, "loss": 43.309, "step": 3165 }, { "epoch": 75.38208955223881, "grad_norm": 28.845134735107422, "learning_rate": 8.110119047619048e-06, "loss": 43.3125, "step": 3166 }, { "epoch": 75.40597014925373, "grad_norm": 20.03913116455078, "learning_rate": 8.107142857142859e-06, "loss": 44.666, "step": 3167 }, { "epoch": 75.42985074626866, "grad_norm": 29.69953155517578, "learning_rate": 8.104166666666668e-06, "loss": 43.3558, "step": 3168 }, { "epoch": 75.45373134328358, "grad_norm": 22.189376831054688, "learning_rate": 8.101190476190477e-06, "loss": 43.6229, "step": 3169 }, { "epoch": 75.4776119402985, "grad_norm": 23.93678092956543, "learning_rate": 8.098214285714286e-06, "loss": 42.9279, "step": 3170 }, { "epoch": 75.50149253731344, "grad_norm": 21.489761352539062, "learning_rate": 8.095238095238097e-06, "loss": 43.4537, "step": 3171 }, { "epoch": 75.52537313432836, "grad_norm": 18.95380210876465, "learning_rate": 8.092261904761906e-06, "loss": 42.9752, "step": 3172 }, { "epoch": 75.54925373134328, "grad_norm": 26.20965576171875, "learning_rate": 8.089285714285715e-06, "loss": 42.7511, "step": 3173 }, { "epoch": 75.57313432835821, "grad_norm": 19.629926681518555, "learning_rate": 8.086309523809525e-06, "loss": 43.7784, "step": 3174 }, { "epoch": 75.59701492537313, "grad_norm": 25.866622924804688, "learning_rate": 8.083333333333334e-06, "loss": 42.7349, "step": 3175 }, { "epoch": 75.62089552238805, "grad_norm": 24.383323669433594, "learning_rate": 8.080357142857143e-06, "loss": 42.5395, "step": 3176 }, { "epoch": 75.64477611940299, "grad_norm": 19.74950408935547, "learning_rate": 8.077380952380953e-06, "loss": 43.1058, "step": 3177 }, { "epoch": 75.66865671641791, "grad_norm": 28.67831039428711, "learning_rate": 8.074404761904763e-06, "loss": 43.5871, "step": 3178 }, { "epoch": 75.69253731343284, "grad_norm": 23.102951049804688, "learning_rate": 8.071428571428572e-06, "loss": 42.76, "step": 3179 }, { "epoch": 75.71641791044776, "grad_norm": 35.02995681762695, "learning_rate": 8.068452380952381e-06, "loss": 43.8252, "step": 3180 }, { "epoch": 75.74029850746268, "grad_norm": 24.358551025390625, "learning_rate": 8.065476190476192e-06, "loss": 43.1074, "step": 3181 }, { "epoch": 75.7641791044776, "grad_norm": 30.14754295349121, "learning_rate": 8.062500000000001e-06, "loss": 43.8415, "step": 3182 }, { "epoch": 75.78805970149254, "grad_norm": 24.45053482055664, "learning_rate": 8.05952380952381e-06, "loss": 43.0215, "step": 3183 }, { "epoch": 75.81194029850747, "grad_norm": 37.40525436401367, "learning_rate": 8.05654761904762e-06, "loss": 42.961, "step": 3184 }, { "epoch": 75.83582089552239, "grad_norm": 24.555240631103516, "learning_rate": 8.05357142857143e-06, "loss": 44.2708, "step": 3185 }, { "epoch": 75.85970149253731, "grad_norm": 37.460670471191406, "learning_rate": 8.050595238095239e-06, "loss": 43.5956, "step": 3186 }, { "epoch": 75.88358208955223, "grad_norm": 32.54770278930664, "learning_rate": 8.047619047619048e-06, "loss": 42.3289, "step": 3187 }, { "epoch": 75.90746268656716, "grad_norm": 38.01876449584961, "learning_rate": 8.044642857142859e-06, "loss": 42.9972, "step": 3188 }, { "epoch": 75.9313432835821, "grad_norm": 30.63246726989746, "learning_rate": 8.041666666666668e-06, "loss": 42.5397, "step": 3189 }, { "epoch": 75.95522388059702, "grad_norm": 27.40627670288086, "learning_rate": 8.038690476190477e-06, "loss": 41.377, "step": 3190 }, { "epoch": 75.97910447761194, "grad_norm": 26.620893478393555, "learning_rate": 8.035714285714286e-06, "loss": 42.9367, "step": 3191 }, { "epoch": 76.0, "grad_norm": 31.36514663696289, "learning_rate": 8.032738095238097e-06, "loss": 37.8523, "step": 3192 }, { "epoch": 76.02388059701492, "grad_norm": 27.55282974243164, "learning_rate": 8.029761904761906e-06, "loss": 43.2614, "step": 3193 }, { "epoch": 76.04776119402985, "grad_norm": 36.373634338378906, "learning_rate": 8.026785714285715e-06, "loss": 42.1966, "step": 3194 }, { "epoch": 76.07164179104478, "grad_norm": 29.89250373840332, "learning_rate": 8.023809523809526e-06, "loss": 43.0278, "step": 3195 }, { "epoch": 76.0955223880597, "grad_norm": 28.84893226623535, "learning_rate": 8.020833333333335e-06, "loss": 44.8331, "step": 3196 }, { "epoch": 76.11940298507463, "grad_norm": 27.258445739746094, "learning_rate": 8.017857142857144e-06, "loss": 42.5176, "step": 3197 }, { "epoch": 76.14328358208955, "grad_norm": 30.8077449798584, "learning_rate": 8.014880952380953e-06, "loss": 43.3045, "step": 3198 }, { "epoch": 76.16716417910447, "grad_norm": 28.528837203979492, "learning_rate": 8.011904761904763e-06, "loss": 43.1302, "step": 3199 }, { "epoch": 76.1910447761194, "grad_norm": 30.751039505004883, "learning_rate": 8.008928571428572e-06, "loss": 43.7287, "step": 3200 }, { "epoch": 76.21492537313434, "grad_norm": 27.781261444091797, "learning_rate": 8.005952380952382e-06, "loss": 43.3939, "step": 3201 }, { "epoch": 76.23880597014926, "grad_norm": 27.45984649658203, "learning_rate": 8.002976190476192e-06, "loss": 43.6411, "step": 3202 }, { "epoch": 76.26268656716418, "grad_norm": 26.628419876098633, "learning_rate": 8.000000000000001e-06, "loss": 42.6454, "step": 3203 }, { "epoch": 76.2865671641791, "grad_norm": 36.02729034423828, "learning_rate": 7.99702380952381e-06, "loss": 43.2459, "step": 3204 }, { "epoch": 76.31044776119403, "grad_norm": 28.480478286743164, "learning_rate": 7.99404761904762e-06, "loss": 42.7675, "step": 3205 }, { "epoch": 76.33432835820895, "grad_norm": 31.36353874206543, "learning_rate": 7.99107142857143e-06, "loss": 42.8571, "step": 3206 }, { "epoch": 76.35820895522389, "grad_norm": 29.178728103637695, "learning_rate": 7.98809523809524e-06, "loss": 42.7477, "step": 3207 }, { "epoch": 76.38208955223881, "grad_norm": 28.539457321166992, "learning_rate": 7.985119047619048e-06, "loss": 44.1444, "step": 3208 }, { "epoch": 76.40597014925373, "grad_norm": 26.178895950317383, "learning_rate": 7.982142857142859e-06, "loss": 42.7187, "step": 3209 }, { "epoch": 76.42985074626866, "grad_norm": 30.825010299682617, "learning_rate": 7.979166666666668e-06, "loss": 43.478, "step": 3210 }, { "epoch": 76.45373134328358, "grad_norm": 27.317245483398438, "learning_rate": 7.976190476190477e-06, "loss": 43.479, "step": 3211 }, { "epoch": 76.4776119402985, "grad_norm": 31.42888641357422, "learning_rate": 7.973214285714286e-06, "loss": 43.7278, "step": 3212 }, { "epoch": 76.50149253731344, "grad_norm": 28.949392318725586, "learning_rate": 7.970238095238097e-06, "loss": 43.6134, "step": 3213 }, { "epoch": 76.52537313432836, "grad_norm": 36.61643981933594, "learning_rate": 7.967261904761904e-06, "loss": 44.1841, "step": 3214 }, { "epoch": 76.54925373134328, "grad_norm": 31.78457260131836, "learning_rate": 7.964285714285715e-06, "loss": 43.9995, "step": 3215 }, { "epoch": 76.57313432835821, "grad_norm": 29.883163452148438, "learning_rate": 7.961309523809524e-06, "loss": 42.596, "step": 3216 }, { "epoch": 76.59701492537313, "grad_norm": 27.458534240722656, "learning_rate": 7.958333333333333e-06, "loss": 43.7156, "step": 3217 }, { "epoch": 76.62089552238805, "grad_norm": 26.423311233520508, "learning_rate": 7.955357142857144e-06, "loss": 42.2925, "step": 3218 }, { "epoch": 76.64477611940299, "grad_norm": 22.850927352905273, "learning_rate": 7.952380952380953e-06, "loss": 43.3146, "step": 3219 }, { "epoch": 76.66865671641791, "grad_norm": 32.23415756225586, "learning_rate": 7.949404761904762e-06, "loss": 43.4622, "step": 3220 }, { "epoch": 76.69253731343284, "grad_norm": 25.596759796142578, "learning_rate": 7.946428571428571e-06, "loss": 42.6238, "step": 3221 }, { "epoch": 76.71641791044776, "grad_norm": 28.371593475341797, "learning_rate": 7.943452380952382e-06, "loss": 41.2267, "step": 3222 }, { "epoch": 76.74029850746268, "grad_norm": 24.369253158569336, "learning_rate": 7.94047619047619e-06, "loss": 43.24, "step": 3223 }, { "epoch": 76.7641791044776, "grad_norm": 34.42658996582031, "learning_rate": 7.9375e-06, "loss": 42.8095, "step": 3224 }, { "epoch": 76.78805970149254, "grad_norm": 26.35492515563965, "learning_rate": 7.93452380952381e-06, "loss": 42.0312, "step": 3225 }, { "epoch": 76.81194029850747, "grad_norm": 33.34773254394531, "learning_rate": 7.93154761904762e-06, "loss": 43.4483, "step": 3226 }, { "epoch": 76.83582089552239, "grad_norm": 31.470170974731445, "learning_rate": 7.928571428571429e-06, "loss": 43.9896, "step": 3227 }, { "epoch": 76.85970149253731, "grad_norm": 28.38050651550293, "learning_rate": 7.925595238095238e-06, "loss": 43.9711, "step": 3228 }, { "epoch": 76.88358208955223, "grad_norm": NaN, "learning_rate": 7.922619047619048e-06, "loss": 75.7577, "step": 3229 }, { "epoch": 76.90746268656716, "grad_norm": 21.927776336669922, "learning_rate": 7.922619047619048e-06, "loss": 42.1852, "step": 3230 }, { "epoch": 76.9313432835821, "grad_norm": 28.636518478393555, "learning_rate": 7.919642857142857e-06, "loss": 43.103, "step": 3231 }, { "epoch": 76.95522388059702, "grad_norm": 25.48936653137207, "learning_rate": 7.916666666666667e-06, "loss": 43.1688, "step": 3232 }, { "epoch": 76.97910447761194, "grad_norm": 29.641143798828125, "learning_rate": 7.913690476190477e-06, "loss": 41.7518, "step": 3233 }, { "epoch": 77.0, "grad_norm": 22.023099899291992, "learning_rate": 7.910714285714286e-06, "loss": 38.1447, "step": 3234 }, { "epoch": 77.02388059701492, "grad_norm": 35.88689041137695, "learning_rate": 7.907738095238095e-06, "loss": 43.1578, "step": 3235 }, { "epoch": 77.04776119402985, "grad_norm": 34.37343978881836, "learning_rate": 7.904761904761904e-06, "loss": 43.0582, "step": 3236 }, { "epoch": 77.07164179104478, "grad_norm": 18.577016830444336, "learning_rate": 7.901785714285715e-06, "loss": 42.1815, "step": 3237 }, { "epoch": 77.0955223880597, "grad_norm": 23.373125076293945, "learning_rate": 7.898809523809524e-06, "loss": 44.491, "step": 3238 }, { "epoch": 77.11940298507463, "grad_norm": 28.848159790039062, "learning_rate": 7.895833333333333e-06, "loss": 43.0114, "step": 3239 }, { "epoch": 77.14328358208955, "grad_norm": 19.10719108581543, "learning_rate": 7.892857142857144e-06, "loss": 42.862, "step": 3240 }, { "epoch": 77.16716417910447, "grad_norm": 34.79095458984375, "learning_rate": 7.889880952380953e-06, "loss": 43.7736, "step": 3241 }, { "epoch": 77.1910447761194, "grad_norm": 28.950021743774414, "learning_rate": 7.886904761904762e-06, "loss": 44.5221, "step": 3242 }, { "epoch": 77.21492537313434, "grad_norm": 29.437536239624023, "learning_rate": 7.883928571428571e-06, "loss": 43.0498, "step": 3243 }, { "epoch": 77.23880597014926, "grad_norm": 26.087984085083008, "learning_rate": 7.880952380952382e-06, "loss": 42.1991, "step": 3244 }, { "epoch": 77.26268656716418, "grad_norm": 30.868637084960938, "learning_rate": 7.877976190476191e-06, "loss": 43.1896, "step": 3245 }, { "epoch": 77.2865671641791, "grad_norm": 26.28648567199707, "learning_rate": 7.875e-06, "loss": 41.9695, "step": 3246 }, { "epoch": 77.31044776119403, "grad_norm": 27.738021850585938, "learning_rate": 7.87202380952381e-06, "loss": 42.9537, "step": 3247 }, { "epoch": 77.33432835820895, "grad_norm": 23.0654296875, "learning_rate": 7.86904761904762e-06, "loss": 43.1053, "step": 3248 }, { "epoch": 77.35820895522389, "grad_norm": 31.976926803588867, "learning_rate": 7.866071428571429e-06, "loss": 42.0648, "step": 3249 }, { "epoch": 77.38208955223881, "grad_norm": 28.690933227539062, "learning_rate": 7.863095238095238e-06, "loss": 43.0786, "step": 3250 }, { "epoch": 77.40597014925373, "grad_norm": 29.870180130004883, "learning_rate": 7.860119047619049e-06, "loss": 44.1362, "step": 3251 }, { "epoch": 77.42985074626866, "grad_norm": 29.524002075195312, "learning_rate": 7.857142857142858e-06, "loss": 42.635, "step": 3252 }, { "epoch": 77.45373134328358, "grad_norm": 24.833131790161133, "learning_rate": 7.854166666666667e-06, "loss": 43.1208, "step": 3253 }, { "epoch": 77.4776119402985, "grad_norm": 24.424755096435547, "learning_rate": 7.851190476190477e-06, "loss": 44.4682, "step": 3254 }, { "epoch": 77.50149253731344, "grad_norm": 30.417823791503906, "learning_rate": 7.848214285714287e-06, "loss": 45.1353, "step": 3255 }, { "epoch": 77.52537313432836, "grad_norm": 23.12209701538086, "learning_rate": 7.845238095238096e-06, "loss": 41.7736, "step": 3256 }, { "epoch": 77.54925373134328, "grad_norm": 30.454221725463867, "learning_rate": 7.842261904761905e-06, "loss": 42.6765, "step": 3257 }, { "epoch": 77.57313432835821, "grad_norm": 30.55715560913086, "learning_rate": 7.839285714285715e-06, "loss": 43.4168, "step": 3258 }, { "epoch": 77.59701492537313, "grad_norm": 26.72547149658203, "learning_rate": 7.836309523809524e-06, "loss": 42.5388, "step": 3259 }, { "epoch": 77.62089552238805, "grad_norm": 25.03418731689453, "learning_rate": 7.833333333333333e-06, "loss": 43.3748, "step": 3260 }, { "epoch": 77.64477611940299, "grad_norm": 28.706029891967773, "learning_rate": 7.830357142857144e-06, "loss": 43.7717, "step": 3261 }, { "epoch": 77.66865671641791, "grad_norm": 30.39940643310547, "learning_rate": 7.827380952380953e-06, "loss": 42.952, "step": 3262 }, { "epoch": 77.69253731343284, "grad_norm": 25.622882843017578, "learning_rate": 7.824404761904762e-06, "loss": 42.7133, "step": 3263 }, { "epoch": 77.71641791044776, "grad_norm": 25.120025634765625, "learning_rate": 7.821428571428571e-06, "loss": 42.2453, "step": 3264 }, { "epoch": 77.74029850746268, "grad_norm": 27.227832794189453, "learning_rate": 7.818452380952382e-06, "loss": 42.4094, "step": 3265 }, { "epoch": 77.7641791044776, "grad_norm": 23.663406372070312, "learning_rate": 7.815476190476191e-06, "loss": 43.7332, "step": 3266 }, { "epoch": 77.78805970149254, "grad_norm": 28.738086700439453, "learning_rate": 7.8125e-06, "loss": 43.7881, "step": 3267 }, { "epoch": 77.81194029850747, "grad_norm": 27.955598831176758, "learning_rate": 7.809523809523811e-06, "loss": 43.4782, "step": 3268 }, { "epoch": 77.83582089552239, "grad_norm": 24.79859161376953, "learning_rate": 7.80654761904762e-06, "loss": 41.0554, "step": 3269 }, { "epoch": 77.85970149253731, "grad_norm": 25.531471252441406, "learning_rate": 7.803571428571429e-06, "loss": 43.0072, "step": 3270 }, { "epoch": 77.88358208955223, "grad_norm": 27.746000289916992, "learning_rate": 7.800595238095238e-06, "loss": 43.8641, "step": 3271 }, { "epoch": 77.90746268656716, "grad_norm": 25.056262969970703, "learning_rate": 7.797619047619049e-06, "loss": 43.1316, "step": 3272 }, { "epoch": 77.9313432835821, "grad_norm": 30.888355255126953, "learning_rate": 7.794642857142858e-06, "loss": 43.482, "step": 3273 }, { "epoch": 77.95522388059702, "grad_norm": 22.501649856567383, "learning_rate": 7.791666666666667e-06, "loss": 43.4869, "step": 3274 }, { "epoch": 77.97910447761194, "grad_norm": 31.175397872924805, "learning_rate": 7.788690476190478e-06, "loss": 43.5349, "step": 3275 }, { "epoch": 78.0, "grad_norm": 20.901432037353516, "learning_rate": 7.785714285714287e-06, "loss": 36.2874, "step": 3276 }, { "epoch": 78.02388059701492, "grad_norm": 30.319852828979492, "learning_rate": 7.782738095238096e-06, "loss": 41.102, "step": 3277 }, { "epoch": 78.04776119402985, "grad_norm": 28.31625747680664, "learning_rate": 7.779761904761905e-06, "loss": 42.4304, "step": 3278 }, { "epoch": 78.07164179104478, "grad_norm": 26.445859909057617, "learning_rate": 7.776785714285716e-06, "loss": 42.8755, "step": 3279 }, { "epoch": 78.0955223880597, "grad_norm": 20.42568588256836, "learning_rate": 7.773809523809525e-06, "loss": 44.5072, "step": 3280 }, { "epoch": 78.11940298507463, "grad_norm": 28.535858154296875, "learning_rate": 7.770833333333334e-06, "loss": 42.325, "step": 3281 }, { "epoch": 78.14328358208955, "grad_norm": 21.800678253173828, "learning_rate": 7.767857142857144e-06, "loss": 44.3283, "step": 3282 }, { "epoch": 78.16716417910447, "grad_norm": 26.74295997619629, "learning_rate": 7.764880952380953e-06, "loss": 44.3208, "step": 3283 }, { "epoch": 78.1910447761194, "grad_norm": 28.9124755859375, "learning_rate": 7.761904761904762e-06, "loss": 42.8112, "step": 3284 }, { "epoch": 78.21492537313434, "grad_norm": 24.948265075683594, "learning_rate": 7.758928571428571e-06, "loss": 42.6617, "step": 3285 }, { "epoch": 78.23880597014926, "grad_norm": 25.038854598999023, "learning_rate": 7.755952380952382e-06, "loss": 42.6089, "step": 3286 }, { "epoch": 78.26268656716418, "grad_norm": 24.622905731201172, "learning_rate": 7.752976190476191e-06, "loss": 42.9201, "step": 3287 }, { "epoch": 78.2865671641791, "grad_norm": 22.999900817871094, "learning_rate": 7.75e-06, "loss": 43.2141, "step": 3288 }, { "epoch": 78.31044776119403, "grad_norm": 22.848161697387695, "learning_rate": 7.747023809523811e-06, "loss": 44.1053, "step": 3289 }, { "epoch": 78.33432835820895, "grad_norm": 16.15705108642578, "learning_rate": 7.74404761904762e-06, "loss": 43.7009, "step": 3290 }, { "epoch": 78.35820895522389, "grad_norm": 29.3355655670166, "learning_rate": 7.74107142857143e-06, "loss": 42.3037, "step": 3291 }, { "epoch": 78.38208955223881, "grad_norm": 19.516281127929688, "learning_rate": 7.738095238095238e-06, "loss": 42.6299, "step": 3292 }, { "epoch": 78.40597014925373, "grad_norm": 34.26980209350586, "learning_rate": 7.735119047619049e-06, "loss": 43.052, "step": 3293 }, { "epoch": 78.42985074626866, "grad_norm": 32.0604133605957, "learning_rate": 7.732142857142858e-06, "loss": 42.4497, "step": 3294 }, { "epoch": 78.45373134328358, "grad_norm": 23.038795471191406, "learning_rate": 7.729166666666667e-06, "loss": 42.5542, "step": 3295 }, { "epoch": 78.4776119402985, "grad_norm": 27.498064041137695, "learning_rate": 7.726190476190478e-06, "loss": 41.834, "step": 3296 }, { "epoch": 78.50149253731344, "grad_norm": 25.38565444946289, "learning_rate": 7.723214285714287e-06, "loss": 44.7325, "step": 3297 }, { "epoch": 78.52537313432836, "grad_norm": 21.209095001220703, "learning_rate": 7.720238095238096e-06, "loss": 44.6015, "step": 3298 }, { "epoch": 78.54925373134328, "grad_norm": 27.321908950805664, "learning_rate": 7.717261904761905e-06, "loss": 43.3014, "step": 3299 }, { "epoch": 78.57313432835821, "grad_norm": 20.742706298828125, "learning_rate": 7.714285714285716e-06, "loss": 44.1572, "step": 3300 }, { "epoch": 78.59701492537313, "grad_norm": 28.640583038330078, "learning_rate": 7.711309523809525e-06, "loss": 42.1555, "step": 3301 }, { "epoch": 78.62089552238805, "grad_norm": 30.252870559692383, "learning_rate": 7.708333333333334e-06, "loss": 43.4469, "step": 3302 }, { "epoch": 78.64477611940299, "grad_norm": 23.6368350982666, "learning_rate": 7.705357142857145e-06, "loss": 44.3375, "step": 3303 }, { "epoch": 78.66865671641791, "grad_norm": 22.434412002563477, "learning_rate": 7.702380952380954e-06, "loss": 42.8106, "step": 3304 }, { "epoch": 78.69253731343284, "grad_norm": 28.329635620117188, "learning_rate": 7.699404761904763e-06, "loss": 42.8968, "step": 3305 }, { "epoch": 78.71641791044776, "grad_norm": 21.02295684814453, "learning_rate": 7.696428571428572e-06, "loss": 42.1169, "step": 3306 }, { "epoch": 78.74029850746268, "grad_norm": 30.06182861328125, "learning_rate": 7.693452380952382e-06, "loss": 43.0741, "step": 3307 }, { "epoch": 78.7641791044776, "grad_norm": 22.40550994873047, "learning_rate": 7.690476190476191e-06, "loss": 42.8449, "step": 3308 }, { "epoch": 78.78805970149254, "grad_norm": 28.855802536010742, "learning_rate": 7.6875e-06, "loss": 43.0846, "step": 3309 }, { "epoch": 78.81194029850747, "grad_norm": 25.507308959960938, "learning_rate": 7.684523809523811e-06, "loss": 41.954, "step": 3310 }, { "epoch": 78.83582089552239, "grad_norm": 26.092424392700195, "learning_rate": 7.68154761904762e-06, "loss": 42.1684, "step": 3311 }, { "epoch": 78.85970149253731, "grad_norm": 24.099889755249023, "learning_rate": 7.67857142857143e-06, "loss": 43.9231, "step": 3312 }, { "epoch": 78.88358208955223, "grad_norm": 28.72806739807129, "learning_rate": 7.675595238095238e-06, "loss": 42.546, "step": 3313 }, { "epoch": 78.90746268656716, "grad_norm": 26.489227294921875, "learning_rate": 7.672619047619049e-06, "loss": 44.1023, "step": 3314 }, { "epoch": 78.9313432835821, "grad_norm": 29.59152603149414, "learning_rate": 7.669642857142858e-06, "loss": 43.7005, "step": 3315 }, { "epoch": 78.95522388059702, "grad_norm": 23.0878963470459, "learning_rate": 7.666666666666667e-06, "loss": 41.9249, "step": 3316 }, { "epoch": 78.97910447761194, "grad_norm": 29.851896286010742, "learning_rate": 7.663690476190478e-06, "loss": 42.2078, "step": 3317 }, { "epoch": 79.0, "grad_norm": 23.739883422851562, "learning_rate": 7.660714285714287e-06, "loss": 39.1357, "step": 3318 }, { "epoch": 79.02388059701492, "grad_norm": 23.394466400146484, "learning_rate": 7.657738095238096e-06, "loss": 43.7385, "step": 3319 }, { "epoch": 79.04776119402985, "grad_norm": 22.10674285888672, "learning_rate": 7.654761904761905e-06, "loss": 43.253, "step": 3320 }, { "epoch": 79.07164179104478, "grad_norm": 25.71041488647461, "learning_rate": 7.651785714285714e-06, "loss": 43.6012, "step": 3321 }, { "epoch": 79.0955223880597, "grad_norm": 18.054738998413086, "learning_rate": 7.648809523809523e-06, "loss": 42.5356, "step": 3322 }, { "epoch": 79.11940298507463, "grad_norm": 25.66161346435547, "learning_rate": 7.645833333333334e-06, "loss": 43.796, "step": 3323 }, { "epoch": 79.14328358208955, "grad_norm": 15.92872142791748, "learning_rate": 7.642857142857143e-06, "loss": 43.4924, "step": 3324 }, { "epoch": 79.16716417910447, "grad_norm": 26.33378791809082, "learning_rate": 7.639880952380952e-06, "loss": 41.9388, "step": 3325 }, { "epoch": 79.1910447761194, "grad_norm": 18.938690185546875, "learning_rate": 7.636904761904763e-06, "loss": 42.6458, "step": 3326 }, { "epoch": 79.21492537313434, "grad_norm": 21.968505859375, "learning_rate": 7.633928571428572e-06, "loss": 43.2856, "step": 3327 }, { "epoch": 79.23880597014926, "grad_norm": 21.652313232421875, "learning_rate": 7.630952380952381e-06, "loss": 43.1669, "step": 3328 }, { "epoch": 79.26268656716418, "grad_norm": 16.064531326293945, "learning_rate": 7.627976190476191e-06, "loss": 43.09, "step": 3329 }, { "epoch": 79.2865671641791, "grad_norm": 21.19333839416504, "learning_rate": 7.625e-06, "loss": 42.7371, "step": 3330 }, { "epoch": 79.31044776119403, "grad_norm": 19.381980895996094, "learning_rate": 7.62202380952381e-06, "loss": 43.129, "step": 3331 }, { "epoch": 79.33432835820895, "grad_norm": 17.10456085205078, "learning_rate": 7.61904761904762e-06, "loss": 42.795, "step": 3332 }, { "epoch": 79.35820895522389, "grad_norm": 18.57830810546875, "learning_rate": 7.616071428571429e-06, "loss": 43.1172, "step": 3333 }, { "epoch": 79.38208955223881, "grad_norm": 16.343597412109375, "learning_rate": 7.6130952380952386e-06, "loss": 44.1413, "step": 3334 }, { "epoch": 79.40597014925373, "grad_norm": 18.999656677246094, "learning_rate": 7.610119047619048e-06, "loss": 43.154, "step": 3335 }, { "epoch": 79.42985074626866, "grad_norm": 18.70110321044922, "learning_rate": 7.6071428571428575e-06, "loss": 43.2832, "step": 3336 }, { "epoch": 79.45373134328358, "grad_norm": 17.107995986938477, "learning_rate": 7.6041666666666666e-06, "loss": 42.6499, "step": 3337 }, { "epoch": 79.4776119402985, "grad_norm": 20.98540496826172, "learning_rate": 7.6011904761904765e-06, "loss": 42.6728, "step": 3338 }, { "epoch": 79.50149253731344, "grad_norm": 18.264223098754883, "learning_rate": 7.598214285714286e-06, "loss": 42.1924, "step": 3339 }, { "epoch": 79.52537313432836, "grad_norm": 22.478178024291992, "learning_rate": 7.595238095238095e-06, "loss": 43.1835, "step": 3340 }, { "epoch": 79.54925373134328, "grad_norm": 21.464313507080078, "learning_rate": 7.592261904761905e-06, "loss": 42.8992, "step": 3341 }, { "epoch": 79.57313432835821, "grad_norm": 23.627376556396484, "learning_rate": 7.589285714285714e-06, "loss": 43.3444, "step": 3342 }, { "epoch": 79.59701492537313, "grad_norm": 20.699804306030273, "learning_rate": 7.586309523809524e-06, "loss": 43.5294, "step": 3343 }, { "epoch": 79.62089552238805, "grad_norm": 27.1911678314209, "learning_rate": 7.583333333333333e-06, "loss": 42.3842, "step": 3344 }, { "epoch": 79.64477611940299, "grad_norm": 22.591445922851562, "learning_rate": 7.580357142857143e-06, "loss": 43.2132, "step": 3345 }, { "epoch": 79.66865671641791, "grad_norm": 23.79202651977539, "learning_rate": 7.577380952380953e-06, "loss": 42.7603, "step": 3346 }, { "epoch": 79.69253731343284, "grad_norm": 21.520214080810547, "learning_rate": 7.574404761904762e-06, "loss": 42.9868, "step": 3347 }, { "epoch": 79.71641791044776, "grad_norm": 21.92240333557129, "learning_rate": 7.571428571428572e-06, "loss": 42.5148, "step": 3348 }, { "epoch": 79.74029850746268, "grad_norm": 21.808698654174805, "learning_rate": 7.568452380952381e-06, "loss": 42.2734, "step": 3349 }, { "epoch": 79.7641791044776, "grad_norm": 21.703947067260742, "learning_rate": 7.565476190476191e-06, "loss": 43.9589, "step": 3350 }, { "epoch": 79.78805970149254, "grad_norm": 21.56643295288086, "learning_rate": 7.5625e-06, "loss": 42.249, "step": 3351 }, { "epoch": 79.81194029850747, "grad_norm": 20.325498580932617, "learning_rate": 7.55952380952381e-06, "loss": 42.5246, "step": 3352 }, { "epoch": 79.83582089552239, "grad_norm": 20.19651985168457, "learning_rate": 7.55654761904762e-06, "loss": 43.1353, "step": 3353 }, { "epoch": 79.85970149253731, "grad_norm": 15.062832832336426, "learning_rate": 7.553571428571429e-06, "loss": 42.7335, "step": 3354 }, { "epoch": 79.88358208955223, "grad_norm": 21.990650177001953, "learning_rate": 7.550595238095239e-06, "loss": 44.103, "step": 3355 }, { "epoch": 79.90746268656716, "grad_norm": 17.816457748413086, "learning_rate": 7.547619047619048e-06, "loss": 43.592, "step": 3356 }, { "epoch": 79.9313432835821, "grad_norm": 21.62665557861328, "learning_rate": 7.544642857142858e-06, "loss": 44.0372, "step": 3357 }, { "epoch": 79.95522388059702, "grad_norm": 20.444469451904297, "learning_rate": 7.541666666666667e-06, "loss": 42.7547, "step": 3358 }, { "epoch": 79.97910447761194, "grad_norm": 15.230064392089844, "learning_rate": 7.538690476190477e-06, "loss": 42.4287, "step": 3359 }, { "epoch": 80.0, "grad_norm": 18.977619171142578, "learning_rate": 7.5357142857142865e-06, "loss": 36.8674, "step": 3360 }, { "epoch": 80.0, "step": 3360, "total_flos": 1.6516474192825325e+17, "train_loss": 10.921977708453223, "train_runtime": 25778.6818, "train_samples_per_second": 16.609, "train_steps_per_second": 0.13 }, { "epoch": 80.02388059701492, "grad_norm": 20.951553344726562, "learning_rate": 1e-05, "loss": 42.8953, "step": 3361 }, { "epoch": 80.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997354497354498e-06, "loss": 49.2702, "step": 3362 }, { "epoch": 80.07164179104478, "grad_norm": 272.02093505859375, "learning_rate": 9.997354497354498e-06, "loss": 48.7639, "step": 3363 }, { "epoch": 80.0955223880597, "grad_norm": 136.40426635742188, "learning_rate": 9.994708994708996e-06, "loss": 48.2845, "step": 3364 }, { "epoch": 80.11940298507463, "grad_norm": 69.2103500366211, "learning_rate": 9.992063492063493e-06, "loss": 45.905, "step": 3365 }, { "epoch": 80.14328358208955, "grad_norm": 42.27269744873047, "learning_rate": 9.989417989417989e-06, "loss": 44.495, "step": 3366 }, { "epoch": 80.16716417910447, "grad_norm": 78.32905578613281, "learning_rate": 9.986772486772488e-06, "loss": 43.787, "step": 3367 }, { "epoch": 80.1910447761194, "grad_norm": 53.60576248168945, "learning_rate": 9.984126984126986e-06, "loss": 44.9412, "step": 3368 }, { "epoch": 80.21492537313434, "grad_norm": 43.58672332763672, "learning_rate": 9.981481481481482e-06, "loss": 43.5559, "step": 3369 }, { "epoch": 80.23880597014926, "grad_norm": 52.74037170410156, "learning_rate": 9.97883597883598e-06, "loss": 43.7715, "step": 3370 }, { "epoch": 80.26268656716418, "grad_norm": 36.5859260559082, "learning_rate": 9.976190476190477e-06, "loss": 44.8368, "step": 3371 }, { "epoch": 80.2865671641791, "grad_norm": 41.1060676574707, "learning_rate": 9.973544973544974e-06, "loss": 44.2442, "step": 3372 }, { "epoch": 80.31044776119403, "grad_norm": 29.22023582458496, "learning_rate": 9.970899470899472e-06, "loss": 44.9361, "step": 3373 }, { "epoch": 80.33432835820895, "grad_norm": 23.876710891723633, "learning_rate": 9.968253968253969e-06, "loss": 43.0819, "step": 3374 }, { "epoch": 80.35820895522389, "grad_norm": 29.575992584228516, "learning_rate": 9.965608465608467e-06, "loss": 43.4547, "step": 3375 }, { "epoch": 80.38208955223881, "grad_norm": 30.555126190185547, "learning_rate": 9.962962962962964e-06, "loss": 42.7816, "step": 3376 }, { "epoch": 80.40597014925373, "grad_norm": 22.153589248657227, "learning_rate": 9.960317460317462e-06, "loss": 43.225, "step": 3377 }, { "epoch": 80.42985074626866, "grad_norm": 22.4864501953125, "learning_rate": 9.957671957671959e-06, "loss": 44.3476, "step": 3378 }, { "epoch": 80.45373134328358, "grad_norm": 28.664342880249023, "learning_rate": 9.955026455026457e-06, "loss": 43.8263, "step": 3379 }, { "epoch": 80.4776119402985, "grad_norm": 20.183809280395508, "learning_rate": 9.952380952380954e-06, "loss": 43.0054, "step": 3380 }, { "epoch": 80.50149253731344, "grad_norm": 20.122495651245117, "learning_rate": 9.94973544973545e-06, "loss": 42.8467, "step": 3381 }, { "epoch": 80.52537313432836, "grad_norm": 18.21672821044922, "learning_rate": 9.947089947089947e-06, "loss": 43.1002, "step": 3382 }, { "epoch": 80.54925373134328, "grad_norm": 19.279260635375977, "learning_rate": 9.944444444444445e-06, "loss": 43.057, "step": 3383 }, { "epoch": 80.57313432835821, "grad_norm": 16.66730308532715, "learning_rate": 9.941798941798942e-06, "loss": 41.9396, "step": 3384 }, { "epoch": 80.59701492537313, "grad_norm": 23.94289779663086, "learning_rate": 9.93915343915344e-06, "loss": 41.9997, "step": 3385 }, { "epoch": 80.62089552238805, "grad_norm": 19.543209075927734, "learning_rate": 9.936507936507937e-06, "loss": 43.4446, "step": 3386 }, { "epoch": 80.64477611940299, "grad_norm": 16.7114315032959, "learning_rate": 9.933862433862435e-06, "loss": 42.8548, "step": 3387 }, { "epoch": 80.66865671641791, "grad_norm": 14.687740325927734, "learning_rate": 9.931216931216932e-06, "loss": 43.4851, "step": 3388 }, { "epoch": 80.69253731343284, "grad_norm": 20.930234909057617, "learning_rate": 9.92857142857143e-06, "loss": 43.216, "step": 3389 }, { "epoch": 80.71641791044776, "grad_norm": 18.500185012817383, "learning_rate": 9.925925925925927e-06, "loss": 43.32, "step": 3390 }, { "epoch": 80.74029850746268, "grad_norm": 17.255064010620117, "learning_rate": 9.923280423280423e-06, "loss": 41.8527, "step": 3391 }, { "epoch": 80.7641791044776, "grad_norm": 23.286033630371094, "learning_rate": 9.920634920634922e-06, "loss": 42.4732, "step": 3392 }, { "epoch": 80.78805970149254, "grad_norm": 21.66954803466797, "learning_rate": 9.917989417989418e-06, "loss": 43.0689, "step": 3393 }, { "epoch": 80.81194029850747, "grad_norm": 15.510072708129883, "learning_rate": 9.915343915343916e-06, "loss": 42.6028, "step": 3394 }, { "epoch": 80.83582089552239, "grad_norm": 17.338539123535156, "learning_rate": 9.912698412698413e-06, "loss": 43.066, "step": 3395 }, { "epoch": 80.85970149253731, "grad_norm": 28.546316146850586, "learning_rate": 9.91005291005291e-06, "loss": 42.7705, "step": 3396 }, { "epoch": 80.88358208955223, "grad_norm": 21.883974075317383, "learning_rate": 9.907407407407408e-06, "loss": 42.3245, "step": 3397 }, { "epoch": 80.90746268656716, "grad_norm": 23.212677001953125, "learning_rate": 9.904761904761906e-06, "loss": 43.1431, "step": 3398 }, { "epoch": 80.9313432835821, "grad_norm": 19.58159828186035, "learning_rate": 9.902116402116403e-06, "loss": 43.5287, "step": 3399 }, { "epoch": 80.95522388059702, "grad_norm": 26.139862060546875, "learning_rate": 9.8994708994709e-06, "loss": 42.9908, "step": 3400 }, { "epoch": 80.97910447761194, "grad_norm": 16.672977447509766, "learning_rate": 9.896825396825398e-06, "loss": 42.1315, "step": 3401 }, { "epoch": 81.0, "grad_norm": 24.852455139160156, "learning_rate": 9.894179894179896e-06, "loss": 36.8278, "step": 3402 }, { "epoch": 81.02388059701492, "grad_norm": 22.26006317138672, "learning_rate": 9.891534391534391e-06, "loss": 42.4729, "step": 3403 }, { "epoch": 81.04776119402985, "grad_norm": 16.017719268798828, "learning_rate": 9.88888888888889e-06, "loss": 42.9225, "step": 3404 }, { "epoch": 81.07164179104478, "grad_norm": 28.550519943237305, "learning_rate": 9.886243386243386e-06, "loss": 42.6745, "step": 3405 }, { "epoch": 81.0955223880597, "grad_norm": 23.507572174072266, "learning_rate": 9.883597883597884e-06, "loss": 42.0028, "step": 3406 }, { "epoch": 81.11940298507463, "grad_norm": 21.06671905517578, "learning_rate": 9.880952380952381e-06, "loss": 43.0596, "step": 3407 }, { "epoch": 81.14328358208955, "grad_norm": 30.52378273010254, "learning_rate": 9.878306878306879e-06, "loss": 42.6651, "step": 3408 }, { "epoch": 81.16716417910447, "grad_norm": 20.8646183013916, "learning_rate": 9.875661375661376e-06, "loss": 42.5492, "step": 3409 }, { "epoch": 81.1910447761194, "grad_norm": 24.76753044128418, "learning_rate": 9.873015873015874e-06, "loss": 44.1658, "step": 3410 }, { "epoch": 81.21492537313434, "grad_norm": 24.59670066833496, "learning_rate": 9.870370370370371e-06, "loss": 41.993, "step": 3411 }, { "epoch": 81.23880597014926, "grad_norm": 18.1619815826416, "learning_rate": 9.867724867724869e-06, "loss": 41.729, "step": 3412 }, { "epoch": 81.26268656716418, "grad_norm": 25.726171493530273, "learning_rate": 9.865079365079366e-06, "loss": 43.4774, "step": 3413 }, { "epoch": 81.2865671641791, "grad_norm": 19.582408905029297, "learning_rate": 9.862433862433864e-06, "loss": 44.2081, "step": 3414 }, { "epoch": 81.31044776119403, "grad_norm": 19.20425033569336, "learning_rate": 9.85978835978836e-06, "loss": 45.2273, "step": 3415 }, { "epoch": 81.33432835820895, "grad_norm": 24.18745994567871, "learning_rate": 9.857142857142859e-06, "loss": 43.2535, "step": 3416 }, { "epoch": 81.35820895522389, "grad_norm": 20.09618377685547, "learning_rate": 9.854497354497355e-06, "loss": 42.837, "step": 3417 }, { "epoch": 81.38208955223881, "grad_norm": 18.357542037963867, "learning_rate": 9.851851851851852e-06, "loss": 42.3722, "step": 3418 }, { "epoch": 81.40597014925373, "grad_norm": 21.53424644470215, "learning_rate": 9.849206349206351e-06, "loss": 42.6014, "step": 3419 }, { "epoch": 81.42985074626866, "grad_norm": 23.138153076171875, "learning_rate": 9.846560846560847e-06, "loss": 43.1802, "step": 3420 }, { "epoch": 81.45373134328358, "grad_norm": NaN, "learning_rate": 9.843915343915345e-06, "loss": 60.8525, "step": 3421 }, { "epoch": 81.4776119402985, "grad_norm": 16.697940826416016, "learning_rate": 9.843915343915345e-06, "loss": 42.6524, "step": 3422 }, { "epoch": 81.50149253731344, "grad_norm": 21.829591751098633, "learning_rate": 9.841269841269842e-06, "loss": 42.8111, "step": 3423 }, { "epoch": 81.52537313432836, "grad_norm": 24.891218185424805, "learning_rate": 9.83862433862434e-06, "loss": 43.6078, "step": 3424 }, { "epoch": 81.54925373134328, "grad_norm": 21.53104019165039, "learning_rate": 9.835978835978837e-06, "loss": 42.8522, "step": 3425 }, { "epoch": 81.57313432835821, "grad_norm": 24.85852813720703, "learning_rate": 9.833333333333333e-06, "loss": 42.5736, "step": 3426 }, { "epoch": 81.59701492537313, "grad_norm": 25.954561233520508, "learning_rate": 9.830687830687832e-06, "loss": 42.513, "step": 3427 }, { "epoch": 81.62089552238805, "grad_norm": 18.79954719543457, "learning_rate": 9.828042328042328e-06, "loss": 42.4569, "step": 3428 }, { "epoch": 81.64477611940299, "grad_norm": 21.777231216430664, "learning_rate": 9.825396825396825e-06, "loss": 41.9235, "step": 3429 }, { "epoch": 81.66865671641791, "grad_norm": 20.84613037109375, "learning_rate": 9.822751322751325e-06, "loss": 43.7221, "step": 3430 }, { "epoch": 81.69253731343284, "grad_norm": 25.095165252685547, "learning_rate": 9.82010582010582e-06, "loss": 43.7676, "step": 3431 }, { "epoch": 81.71641791044776, "grad_norm": 20.732393264770508, "learning_rate": 9.817460317460318e-06, "loss": 42.3845, "step": 3432 }, { "epoch": 81.74029850746268, "grad_norm": NaN, "learning_rate": 9.814814814814815e-06, "loss": 42.1237, "step": 3433 }, { "epoch": 81.7641791044776, "grad_norm": 33.96809768676758, "learning_rate": 9.814814814814815e-06, "loss": 43.6781, "step": 3434 }, { "epoch": 81.78805970149254, "grad_norm": 20.83742904663086, "learning_rate": 9.812169312169313e-06, "loss": 43.1676, "step": 3435 }, { "epoch": 81.81194029850747, "grad_norm": 37.817081451416016, "learning_rate": 9.80952380952381e-06, "loss": 42.5989, "step": 3436 }, { "epoch": 81.83582089552239, "grad_norm": 26.07498550415039, "learning_rate": 9.806878306878308e-06, "loss": 43.613, "step": 3437 }, { "epoch": 81.85970149253731, "grad_norm": 32.35169982910156, "learning_rate": 9.804232804232805e-06, "loss": 43.4166, "step": 3438 }, { "epoch": 81.88358208955223, "grad_norm": 23.49301528930664, "learning_rate": 9.801587301587301e-06, "loss": 40.9932, "step": 3439 }, { "epoch": 81.90746268656716, "grad_norm": 28.475976943969727, "learning_rate": 9.7989417989418e-06, "loss": 44.0779, "step": 3440 }, { "epoch": 81.9313432835821, "grad_norm": 20.77143669128418, "learning_rate": 9.796296296296298e-06, "loss": 43.0358, "step": 3441 }, { "epoch": 81.95522388059702, "grad_norm": 27.558744430541992, "learning_rate": 9.793650793650794e-06, "loss": 42.6501, "step": 3442 }, { "epoch": 81.97910447761194, "grad_norm": 17.57852554321289, "learning_rate": 9.791005291005293e-06, "loss": 43.0594, "step": 3443 }, { "epoch": 82.0, "grad_norm": 30.414134979248047, "learning_rate": 9.788359788359789e-06, "loss": 37.7772, "step": 3444 }, { "epoch": 82.02388059701492, "grad_norm": 29.184572219848633, "learning_rate": 9.785714285714286e-06, "loss": 43.0878, "step": 3445 }, { "epoch": 82.04776119402985, "grad_norm": 24.36541748046875, "learning_rate": 9.783068783068784e-06, "loss": 43.1851, "step": 3446 }, { "epoch": 82.07164179104478, "grad_norm": 24.232807159423828, "learning_rate": 9.780423280423281e-06, "loss": 43.4104, "step": 3447 }, { "epoch": 82.0955223880597, "grad_norm": 29.002002716064453, "learning_rate": 9.777777777777779e-06, "loss": 44.6274, "step": 3448 }, { "epoch": 82.11940298507463, "grad_norm": 22.997961044311523, "learning_rate": 9.775132275132276e-06, "loss": 43.2128, "step": 3449 }, { "epoch": 82.14328358208955, "grad_norm": 26.34942626953125, "learning_rate": 9.772486772486774e-06, "loss": 42.6116, "step": 3450 }, { "epoch": 82.16716417910447, "grad_norm": 19.555774688720703, "learning_rate": 9.769841269841271e-06, "loss": 42.9207, "step": 3451 }, { "epoch": 82.1910447761194, "grad_norm": 25.108083724975586, "learning_rate": 9.767195767195769e-06, "loss": 41.7188, "step": 3452 }, { "epoch": 82.21492537313434, "grad_norm": 20.387653350830078, "learning_rate": 9.764550264550266e-06, "loss": 42.8712, "step": 3453 }, { "epoch": 82.23880597014926, "grad_norm": 24.493921279907227, "learning_rate": 9.761904761904762e-06, "loss": 43.6475, "step": 3454 }, { "epoch": 82.26268656716418, "grad_norm": 23.366165161132812, "learning_rate": 9.759259259259261e-06, "loss": 42.5025, "step": 3455 }, { "epoch": 82.2865671641791, "grad_norm": 25.831466674804688, "learning_rate": 9.756613756613757e-06, "loss": 44.1183, "step": 3456 }, { "epoch": 82.31044776119403, "grad_norm": 20.5382137298584, "learning_rate": 9.753968253968254e-06, "loss": 42.0874, "step": 3457 }, { "epoch": 82.33432835820895, "grad_norm": 23.923063278198242, "learning_rate": 9.751322751322752e-06, "loss": 44.2198, "step": 3458 }, { "epoch": 82.35820895522389, "grad_norm": 21.77039909362793, "learning_rate": 9.74867724867725e-06, "loss": 42.8486, "step": 3459 }, { "epoch": 82.38208955223881, "grad_norm": 19.86173439025879, "learning_rate": 9.746031746031747e-06, "loss": 43.104, "step": 3460 }, { "epoch": 82.40597014925373, "grad_norm": 20.714754104614258, "learning_rate": 9.743386243386244e-06, "loss": 41.789, "step": 3461 }, { "epoch": 82.42985074626866, "grad_norm": 24.748607635498047, "learning_rate": 9.740740740740742e-06, "loss": 41.7835, "step": 3462 }, { "epoch": 82.45373134328358, "grad_norm": 19.247220993041992, "learning_rate": 9.73809523809524e-06, "loss": 42.3253, "step": 3463 }, { "epoch": 82.4776119402985, "grad_norm": 21.964488983154297, "learning_rate": 9.735449735449735e-06, "loss": 40.6579, "step": 3464 }, { "epoch": 82.50149253731344, "grad_norm": 19.75965118408203, "learning_rate": 9.732804232804234e-06, "loss": 42.2777, "step": 3465 }, { "epoch": 82.52537313432836, "grad_norm": 19.871715545654297, "learning_rate": 9.73015873015873e-06, "loss": 41.8654, "step": 3466 }, { "epoch": 82.54925373134328, "grad_norm": 17.353679656982422, "learning_rate": 9.727513227513228e-06, "loss": 43.1572, "step": 3467 }, { "epoch": 82.57313432835821, "grad_norm": 22.952226638793945, "learning_rate": 9.724867724867725e-06, "loss": 42.2348, "step": 3468 }, { "epoch": 82.59701492537313, "grad_norm": 19.62160873413086, "learning_rate": 9.722222222222223e-06, "loss": 43.7133, "step": 3469 }, { "epoch": 82.62089552238805, "grad_norm": NaN, "learning_rate": 9.71957671957672e-06, "loss": 44.3913, "step": 3470 }, { "epoch": 82.64477611940299, "grad_norm": 22.301387786865234, "learning_rate": 9.71957671957672e-06, "loss": 42.7776, "step": 3471 }, { "epoch": 82.66865671641791, "grad_norm": 23.42523956298828, "learning_rate": 9.716931216931218e-06, "loss": 43.9875, "step": 3472 }, { "epoch": 82.69253731343284, "grad_norm": 19.187870025634766, "learning_rate": 9.714285714285715e-06, "loss": 43.6333, "step": 3473 }, { "epoch": 82.71641791044776, "grad_norm": 17.408340454101562, "learning_rate": 9.711640211640213e-06, "loss": 42.3257, "step": 3474 }, { "epoch": 82.74029850746268, "grad_norm": 17.102418899536133, "learning_rate": 9.70899470899471e-06, "loss": 41.7486, "step": 3475 }, { "epoch": 82.7641791044776, "grad_norm": 17.715524673461914, "learning_rate": 9.706349206349208e-06, "loss": 43.9781, "step": 3476 }, { "epoch": 82.78805970149254, "grad_norm": 22.915067672729492, "learning_rate": 9.703703703703703e-06, "loss": 43.0049, "step": 3477 }, { "epoch": 82.81194029850747, "grad_norm": 18.104154586791992, "learning_rate": 9.701058201058203e-06, "loss": 43.0062, "step": 3478 }, { "epoch": 82.83582089552239, "grad_norm": 14.81946086883545, "learning_rate": 9.698412698412698e-06, "loss": 42.0968, "step": 3479 }, { "epoch": 82.85970149253731, "grad_norm": 19.58578872680664, "learning_rate": 9.695767195767196e-06, "loss": 43.6563, "step": 3480 }, { "epoch": 82.88358208955223, "grad_norm": 17.979524612426758, "learning_rate": 9.693121693121693e-06, "loss": 41.9954, "step": 3481 }, { "epoch": 82.90746268656716, "grad_norm": 17.92389488220215, "learning_rate": 9.690476190476191e-06, "loss": 42.0242, "step": 3482 }, { "epoch": 82.9313432835821, "grad_norm": 22.026195526123047, "learning_rate": 9.687830687830688e-06, "loss": 43.2985, "step": 3483 }, { "epoch": 82.95522388059702, "grad_norm": 15.080731391906738, "learning_rate": 9.685185185185186e-06, "loss": 42.8814, "step": 3484 }, { "epoch": 82.97910447761194, "grad_norm": 23.170284271240234, "learning_rate": 9.682539682539683e-06, "loss": 42.4875, "step": 3485 }, { "epoch": 83.0, "grad_norm": 15.19926929473877, "learning_rate": 9.679894179894181e-06, "loss": 38.3047, "step": 3486 }, { "epoch": 83.02388059701492, "grad_norm": 20.842618942260742, "learning_rate": 9.677248677248678e-06, "loss": 41.9214, "step": 3487 }, { "epoch": 83.04776119402985, "grad_norm": 19.11284637451172, "learning_rate": 9.674603174603176e-06, "loss": 43.2375, "step": 3488 }, { "epoch": 83.07164179104478, "grad_norm": 19.39193344116211, "learning_rate": 9.671957671957672e-06, "loss": 43.5418, "step": 3489 }, { "epoch": 83.0955223880597, "grad_norm": 19.154869079589844, "learning_rate": 9.669312169312171e-06, "loss": 42.4917, "step": 3490 }, { "epoch": 83.11940298507463, "grad_norm": 27.682418823242188, "learning_rate": 9.666666666666667e-06, "loss": 43.22, "step": 3491 }, { "epoch": 83.14328358208955, "grad_norm": 19.741304397583008, "learning_rate": 9.664021164021164e-06, "loss": 42.6503, "step": 3492 }, { "epoch": 83.16716417910447, "grad_norm": 23.25188446044922, "learning_rate": 9.661375661375663e-06, "loss": 42.7449, "step": 3493 }, { "epoch": 83.1910447761194, "grad_norm": 25.500925064086914, "learning_rate": 9.65873015873016e-06, "loss": 43.8239, "step": 3494 }, { "epoch": 83.21492537313434, "grad_norm": 22.653488159179688, "learning_rate": 9.656084656084657e-06, "loss": 42.4962, "step": 3495 }, { "epoch": 83.23880597014926, "grad_norm": 21.660871505737305, "learning_rate": 9.653439153439154e-06, "loss": 44.1403, "step": 3496 }, { "epoch": 83.26268656716418, "grad_norm": 24.922666549682617, "learning_rate": 9.650793650793652e-06, "loss": 42.4295, "step": 3497 }, { "epoch": 83.2865671641791, "grad_norm": 20.24859619140625, "learning_rate": 9.64814814814815e-06, "loss": 41.7125, "step": 3498 }, { "epoch": 83.31044776119403, "grad_norm": 16.770278930664062, "learning_rate": 9.645502645502647e-06, "loss": 43.0386, "step": 3499 }, { "epoch": 83.33432835820895, "grad_norm": 20.553585052490234, "learning_rate": 9.642857142857144e-06, "loss": 43.2005, "step": 3500 }, { "epoch": 83.35820895522389, "grad_norm": 22.309749603271484, "learning_rate": 9.64021164021164e-06, "loss": 43.9736, "step": 3501 }, { "epoch": 83.38208955223881, "grad_norm": 16.99924659729004, "learning_rate": 9.63756613756614e-06, "loss": 42.9804, "step": 3502 }, { "epoch": 83.40597014925373, "grad_norm": 17.541120529174805, "learning_rate": 9.634920634920637e-06, "loss": 41.9332, "step": 3503 }, { "epoch": 83.42985074626866, "grad_norm": 19.222923278808594, "learning_rate": 9.632275132275132e-06, "loss": 43.163, "step": 3504 }, { "epoch": 83.45373134328358, "grad_norm": 23.178749084472656, "learning_rate": 9.62962962962963e-06, "loss": 41.4791, "step": 3505 }, { "epoch": 83.4776119402985, "grad_norm": 24.103410720825195, "learning_rate": 9.626984126984127e-06, "loss": 43.5107, "step": 3506 }, { "epoch": 83.50149253731344, "grad_norm": 16.439075469970703, "learning_rate": 9.624338624338625e-06, "loss": 43.6286, "step": 3507 }, { "epoch": 83.52537313432836, "grad_norm": 29.148473739624023, "learning_rate": 9.621693121693122e-06, "loss": 44.0076, "step": 3508 }, { "epoch": 83.54925373134328, "grad_norm": 23.33673667907715, "learning_rate": 9.61904761904762e-06, "loss": 42.0299, "step": 3509 }, { "epoch": 83.57313432835821, "grad_norm": 20.69951820373535, "learning_rate": 9.616402116402117e-06, "loss": 41.9305, "step": 3510 }, { "epoch": 83.59701492537313, "grad_norm": 28.55817413330078, "learning_rate": 9.613756613756613e-06, "loss": 42.112, "step": 3511 }, { "epoch": 83.62089552238805, "grad_norm": 20.63089942932129, "learning_rate": 9.611111111111112e-06, "loss": 42.5737, "step": 3512 }, { "epoch": 83.64477611940299, "grad_norm": 18.186328887939453, "learning_rate": 9.60846560846561e-06, "loss": 42.6654, "step": 3513 }, { "epoch": 83.66865671641791, "grad_norm": 30.312583923339844, "learning_rate": 9.605820105820106e-06, "loss": 41.6198, "step": 3514 }, { "epoch": 83.69253731343284, "grad_norm": 22.397600173950195, "learning_rate": 9.603174603174605e-06, "loss": 43.7027, "step": 3515 }, { "epoch": 83.71641791044776, "grad_norm": 22.637603759765625, "learning_rate": 9.6005291005291e-06, "loss": 43.3998, "step": 3516 }, { "epoch": 83.74029850746268, "grad_norm": 24.366125106811523, "learning_rate": 9.597883597883598e-06, "loss": 42.6906, "step": 3517 }, { "epoch": 83.7641791044776, "grad_norm": 21.425613403320312, "learning_rate": 9.595238095238096e-06, "loss": 42.7173, "step": 3518 }, { "epoch": 83.78805970149254, "grad_norm": 18.075485229492188, "learning_rate": 9.592592592592593e-06, "loss": 42.9601, "step": 3519 }, { "epoch": 83.81194029850747, "grad_norm": 19.24690818786621, "learning_rate": 9.58994708994709e-06, "loss": 41.9579, "step": 3520 }, { "epoch": 83.83582089552239, "grad_norm": 21.10234832763672, "learning_rate": 9.587301587301588e-06, "loss": 42.6078, "step": 3521 }, { "epoch": 83.85970149253731, "grad_norm": 21.595741271972656, "learning_rate": 9.584656084656086e-06, "loss": 43.3926, "step": 3522 }, { "epoch": 83.88358208955223, "grad_norm": 14.618033409118652, "learning_rate": 9.582010582010583e-06, "loss": 42.7237, "step": 3523 }, { "epoch": 83.90746268656716, "grad_norm": 18.805774688720703, "learning_rate": 9.57936507936508e-06, "loss": 43.6884, "step": 3524 }, { "epoch": 83.9313432835821, "grad_norm": 17.666229248046875, "learning_rate": 9.576719576719578e-06, "loss": 43.3069, "step": 3525 }, { "epoch": 83.95522388059702, "grad_norm": NaN, "learning_rate": 9.574074074074074e-06, "loss": 47.9701, "step": 3526 }, { "epoch": 83.97910447761194, "grad_norm": 18.41876792907715, "learning_rate": 9.574074074074074e-06, "loss": 42.0814, "step": 3527 }, { "epoch": 84.0, "grad_norm": 14.344976425170898, "learning_rate": 9.571428571428573e-06, "loss": 36.702, "step": 3528 }, { "epoch": 84.02388059701492, "grad_norm": 19.47123146057129, "learning_rate": 9.568783068783069e-06, "loss": 43.0682, "step": 3529 }, { "epoch": 84.04776119402985, "grad_norm": 18.708087921142578, "learning_rate": 9.566137566137567e-06, "loss": 43.4093, "step": 3530 }, { "epoch": 84.07164179104478, "grad_norm": 21.98741340637207, "learning_rate": 9.563492063492064e-06, "loss": 42.619, "step": 3531 }, { "epoch": 84.0955223880597, "grad_norm": 21.4478816986084, "learning_rate": 9.560846560846561e-06, "loss": 43.3145, "step": 3532 }, { "epoch": 84.11940298507463, "grad_norm": 21.093963623046875, "learning_rate": 9.558201058201059e-06, "loss": 43.1162, "step": 3533 }, { "epoch": 84.14328358208955, "grad_norm": 18.37552833557129, "learning_rate": 9.555555555555556e-06, "loss": 42.4734, "step": 3534 }, { "epoch": 84.16716417910447, "grad_norm": 13.956351280212402, "learning_rate": 9.552910052910054e-06, "loss": 42.4351, "step": 3535 }, { "epoch": 84.1910447761194, "grad_norm": 20.104270935058594, "learning_rate": 9.550264550264551e-06, "loss": 43.2507, "step": 3536 }, { "epoch": 84.21492537313434, "grad_norm": 20.69384002685547, "learning_rate": 9.547619047619049e-06, "loss": 42.8764, "step": 3537 }, { "epoch": 84.23880597014926, "grad_norm": 26.53329086303711, "learning_rate": 9.544973544973546e-06, "loss": 42.4139, "step": 3538 }, { "epoch": 84.26268656716418, "grad_norm": 11.859530448913574, "learning_rate": 9.542328042328042e-06, "loss": 42.4525, "step": 3539 }, { "epoch": 84.2865671641791, "grad_norm": 22.784093856811523, "learning_rate": 9.539682539682541e-06, "loss": 42.6754, "step": 3540 }, { "epoch": 84.31044776119403, "grad_norm": 22.02342987060547, "learning_rate": 9.537037037037037e-06, "loss": 42.8119, "step": 3541 }, { "epoch": 84.33432835820895, "grad_norm": 16.383922576904297, "learning_rate": 9.534391534391535e-06, "loss": 41.9982, "step": 3542 }, { "epoch": 84.35820895522389, "grad_norm": 18.745128631591797, "learning_rate": 9.531746031746032e-06, "loss": 43.0496, "step": 3543 }, { "epoch": 84.38208955223881, "grad_norm": 33.664764404296875, "learning_rate": 9.52910052910053e-06, "loss": 42.5116, "step": 3544 }, { "epoch": 84.40597014925373, "grad_norm": 18.74268341064453, "learning_rate": 9.526455026455027e-06, "loss": 43.642, "step": 3545 }, { "epoch": 84.42985074626866, "grad_norm": 30.136598587036133, "learning_rate": 9.523809523809525e-06, "loss": 42.8695, "step": 3546 }, { "epoch": 84.45373134328358, "grad_norm": 22.268802642822266, "learning_rate": 9.521164021164022e-06, "loss": 42.6697, "step": 3547 }, { "epoch": 84.4776119402985, "grad_norm": 22.149734497070312, "learning_rate": 9.51851851851852e-06, "loss": 43.0171, "step": 3548 }, { "epoch": 84.50149253731344, "grad_norm": 33.4512825012207, "learning_rate": 9.515873015873016e-06, "loss": 43.2588, "step": 3549 }, { "epoch": 84.52537313432836, "grad_norm": 22.5905704498291, "learning_rate": 9.513227513227515e-06, "loss": 43.2581, "step": 3550 }, { "epoch": 84.54925373134328, "grad_norm": 38.85606384277344, "learning_rate": 9.51058201058201e-06, "loss": 42.2418, "step": 3551 }, { "epoch": 84.57313432835821, "grad_norm": 27.77952003479004, "learning_rate": 9.507936507936508e-06, "loss": 43.4077, "step": 3552 }, { "epoch": 84.59701492537313, "grad_norm": 46.536651611328125, "learning_rate": 9.505291005291006e-06, "loss": 42.1365, "step": 3553 }, { "epoch": 84.62089552238805, "grad_norm": 32.448482513427734, "learning_rate": 9.502645502645503e-06, "loss": 43.362, "step": 3554 }, { "epoch": 84.64477611940299, "grad_norm": 43.40568161010742, "learning_rate": 9.5e-06, "loss": 42.4134, "step": 3555 }, { "epoch": 84.66865671641791, "grad_norm": 44.625125885009766, "learning_rate": 9.497354497354498e-06, "loss": 42.3841, "step": 3556 }, { "epoch": 84.69253731343284, "grad_norm": 30.825876235961914, "learning_rate": 9.494708994708996e-06, "loss": 43.0973, "step": 3557 }, { "epoch": 84.71641791044776, "grad_norm": 32.886775970458984, "learning_rate": 9.492063492063493e-06, "loss": 42.6478, "step": 3558 }, { "epoch": 84.74029850746268, "grad_norm": 35.800621032714844, "learning_rate": 9.48941798941799e-06, "loss": 42.1319, "step": 3559 }, { "epoch": 84.7641791044776, "grad_norm": 27.23737907409668, "learning_rate": 9.486772486772488e-06, "loss": 42.0883, "step": 3560 }, { "epoch": 84.78805970149254, "grad_norm": 40.162166595458984, "learning_rate": 9.484126984126984e-06, "loss": 42.5786, "step": 3561 }, { "epoch": 84.81194029850747, "grad_norm": 32.1665153503418, "learning_rate": 9.481481481481483e-06, "loss": 41.7711, "step": 3562 }, { "epoch": 84.83582089552239, "grad_norm": 34.32803726196289, "learning_rate": 9.478835978835979e-06, "loss": 43.3193, "step": 3563 }, { "epoch": 84.85970149253731, "grad_norm": 34.155452728271484, "learning_rate": 9.476190476190476e-06, "loss": 43.2305, "step": 3564 }, { "epoch": 84.88358208955223, "grad_norm": 31.642534255981445, "learning_rate": 9.473544973544975e-06, "loss": 44.1911, "step": 3565 }, { "epoch": 84.90746268656716, "grad_norm": 33.413291931152344, "learning_rate": 9.470899470899471e-06, "loss": 41.0447, "step": 3566 }, { "epoch": 84.9313432835821, "grad_norm": 35.05025100708008, "learning_rate": 9.468253968253969e-06, "loss": 43.3446, "step": 3567 }, { "epoch": 84.95522388059702, "grad_norm": 28.946184158325195, "learning_rate": 9.465608465608466e-06, "loss": 42.4865, "step": 3568 }, { "epoch": 84.97910447761194, "grad_norm": 38.28304672241211, "learning_rate": 9.462962962962964e-06, "loss": 42.6287, "step": 3569 }, { "epoch": 85.0, "grad_norm": 27.42157745361328, "learning_rate": 9.460317460317461e-06, "loss": 37.3853, "step": 3570 }, { "epoch": 85.02388059701492, "grad_norm": 40.84228515625, "learning_rate": 9.457671957671959e-06, "loss": 43.8201, "step": 3571 }, { "epoch": 85.04776119402985, "grad_norm": 36.39906692504883, "learning_rate": 9.455026455026456e-06, "loss": 41.5254, "step": 3572 }, { "epoch": 85.07164179104478, "grad_norm": 29.795923233032227, "learning_rate": 9.452380952380952e-06, "loss": 42.4395, "step": 3573 }, { "epoch": 85.0955223880597, "grad_norm": 25.486753463745117, "learning_rate": 9.449735449735451e-06, "loss": 42.8189, "step": 3574 }, { "epoch": 85.11940298507463, "grad_norm": 37.790260314941406, "learning_rate": 9.447089947089949e-06, "loss": 42.8718, "step": 3575 }, { "epoch": 85.14328358208955, "grad_norm": 29.528602600097656, "learning_rate": 9.444444444444445e-06, "loss": 43.3714, "step": 3576 }, { "epoch": 85.16716417910447, "grad_norm": 37.71443176269531, "learning_rate": 9.441798941798944e-06, "loss": 42.4381, "step": 3577 }, { "epoch": 85.1910447761194, "grad_norm": 36.625, "learning_rate": 9.43915343915344e-06, "loss": 41.7229, "step": 3578 }, { "epoch": 85.21492537313434, "grad_norm": 30.335342407226562, "learning_rate": 9.436507936507937e-06, "loss": 41.8887, "step": 3579 }, { "epoch": 85.23880597014926, "grad_norm": 24.375329971313477, "learning_rate": 9.433862433862435e-06, "loss": 42.6894, "step": 3580 }, { "epoch": 85.26268656716418, "grad_norm": 34.27681350708008, "learning_rate": 9.431216931216932e-06, "loss": 42.4825, "step": 3581 }, { "epoch": 85.2865671641791, "grad_norm": 27.515474319458008, "learning_rate": 9.42857142857143e-06, "loss": 41.3011, "step": 3582 }, { "epoch": 85.31044776119403, "grad_norm": 39.355350494384766, "learning_rate": 9.425925925925925e-06, "loss": 42.1456, "step": 3583 }, { "epoch": 85.33432835820895, "grad_norm": 34.957523345947266, "learning_rate": 9.423280423280425e-06, "loss": 42.9466, "step": 3584 }, { "epoch": 85.35820895522389, "grad_norm": 30.264474868774414, "learning_rate": 9.420634920634922e-06, "loss": 42.5819, "step": 3585 }, { "epoch": 85.38208955223881, "grad_norm": 27.88845443725586, "learning_rate": 9.417989417989418e-06, "loss": 41.4037, "step": 3586 }, { "epoch": 85.40597014925373, "grad_norm": 30.240957260131836, "learning_rate": 9.415343915343917e-06, "loss": 44.7681, "step": 3587 }, { "epoch": 85.42985074626866, "grad_norm": 23.867399215698242, "learning_rate": 9.412698412698413e-06, "loss": 41.2699, "step": 3588 }, { "epoch": 85.45373134328358, "grad_norm": 39.2992057800293, "learning_rate": 9.41005291005291e-06, "loss": 42.2639, "step": 3589 }, { "epoch": 85.4776119402985, "grad_norm": 32.746673583984375, "learning_rate": 9.407407407407408e-06, "loss": 43.3612, "step": 3590 }, { "epoch": 85.50149253731344, "grad_norm": 33.791748046875, "learning_rate": 9.404761904761905e-06, "loss": 43.1554, "step": 3591 }, { "epoch": 85.52537313432836, "grad_norm": 35.11564254760742, "learning_rate": 9.402116402116403e-06, "loss": 43.4265, "step": 3592 }, { "epoch": 85.54925373134328, "grad_norm": 27.411352157592773, "learning_rate": 9.3994708994709e-06, "loss": 42.7959, "step": 3593 }, { "epoch": 85.57313432835821, "grad_norm": 27.369596481323242, "learning_rate": 9.396825396825398e-06, "loss": 44.1557, "step": 3594 }, { "epoch": 85.59701492537313, "grad_norm": 30.399707794189453, "learning_rate": 9.394179894179895e-06, "loss": 42.5034, "step": 3595 }, { "epoch": 85.62089552238805, "grad_norm": 24.180538177490234, "learning_rate": 9.391534391534393e-06, "loss": 42.256, "step": 3596 }, { "epoch": 85.64477611940299, "grad_norm": 35.2861328125, "learning_rate": 9.38888888888889e-06, "loss": 43.6244, "step": 3597 }, { "epoch": 85.66865671641791, "grad_norm": 28.855852127075195, "learning_rate": 9.386243386243386e-06, "loss": 43.046, "step": 3598 }, { "epoch": 85.69253731343284, "grad_norm": 33.648170471191406, "learning_rate": 9.383597883597885e-06, "loss": 42.0113, "step": 3599 }, { "epoch": 85.71641791044776, "grad_norm": 30.42345428466797, "learning_rate": 9.380952380952381e-06, "loss": 42.4, "step": 3600 }, { "epoch": 85.74029850746268, "grad_norm": 34.80357360839844, "learning_rate": 9.378306878306879e-06, "loss": 41.5381, "step": 3601 }, { "epoch": 85.7641791044776, "grad_norm": 29.07464027404785, "learning_rate": 9.375661375661376e-06, "loss": 43.5597, "step": 3602 }, { "epoch": 85.78805970149254, "grad_norm": 35.02674865722656, "learning_rate": 9.373015873015874e-06, "loss": 42.0479, "step": 3603 }, { "epoch": 85.81194029850747, "grad_norm": 29.9696044921875, "learning_rate": 9.370370370370371e-06, "loss": 42.6829, "step": 3604 }, { "epoch": 85.83582089552239, "grad_norm": 31.754671096801758, "learning_rate": 9.367724867724869e-06, "loss": 42.824, "step": 3605 }, { "epoch": 85.85970149253731, "grad_norm": 30.765913009643555, "learning_rate": 9.365079365079366e-06, "loss": 42.8912, "step": 3606 }, { "epoch": 85.88358208955223, "grad_norm": 33.485015869140625, "learning_rate": 9.362433862433864e-06, "loss": 42.7802, "step": 3607 }, { "epoch": 85.90746268656716, "grad_norm": 27.535614013671875, "learning_rate": 9.359788359788361e-06, "loss": 43.0182, "step": 3608 }, { "epoch": 85.9313432835821, "grad_norm": 28.8901309967041, "learning_rate": 9.357142857142859e-06, "loss": 43.1223, "step": 3609 }, { "epoch": 85.95522388059702, "grad_norm": 27.991809844970703, "learning_rate": 9.354497354497354e-06, "loss": 43.4701, "step": 3610 }, { "epoch": 85.97910447761194, "grad_norm": 33.3857536315918, "learning_rate": 9.351851851851854e-06, "loss": 43.115, "step": 3611 }, { "epoch": 86.0, "grad_norm": 25.59542465209961, "learning_rate": 9.34920634920635e-06, "loss": 37.2275, "step": 3612 }, { "epoch": 86.02388059701492, "grad_norm": 32.959251403808594, "learning_rate": 9.346560846560847e-06, "loss": 41.951, "step": 3613 }, { "epoch": 86.04776119402985, "grad_norm": 26.431245803833008, "learning_rate": 9.343915343915344e-06, "loss": 40.553, "step": 3614 }, { "epoch": 86.07164179104478, "grad_norm": 31.518957138061523, "learning_rate": 9.341269841269842e-06, "loss": 42.7644, "step": 3615 }, { "epoch": 86.0955223880597, "grad_norm": 30.063220977783203, "learning_rate": 9.33862433862434e-06, "loss": 41.5891, "step": 3616 }, { "epoch": 86.11940298507463, "grad_norm": 32.649227142333984, "learning_rate": 9.335978835978837e-06, "loss": 43.2927, "step": 3617 }, { "epoch": 86.14328358208955, "grad_norm": 28.61098861694336, "learning_rate": 9.333333333333334e-06, "loss": 42.4467, "step": 3618 }, { "epoch": 86.16716417910447, "grad_norm": 30.715784072875977, "learning_rate": 9.330687830687832e-06, "loss": 41.0933, "step": 3619 }, { "epoch": 86.1910447761194, "grad_norm": 27.99184799194336, "learning_rate": 9.32804232804233e-06, "loss": 42.218, "step": 3620 }, { "epoch": 86.21492537313434, "grad_norm": 32.13215255737305, "learning_rate": 9.325396825396827e-06, "loss": 42.4495, "step": 3621 }, { "epoch": 86.23880597014926, "grad_norm": 28.051559448242188, "learning_rate": 9.322751322751323e-06, "loss": 43.2451, "step": 3622 }, { "epoch": 86.26268656716418, "grad_norm": 33.464115142822266, "learning_rate": 9.32010582010582e-06, "loss": 43.6584, "step": 3623 }, { "epoch": 86.2865671641791, "grad_norm": 28.151817321777344, "learning_rate": 9.317460317460318e-06, "loss": 42.722, "step": 3624 }, { "epoch": 86.31044776119403, "grad_norm": NaN, "learning_rate": 9.314814814814815e-06, "loss": 37.1565, "step": 3625 }, { "epoch": 86.33432835820895, "grad_norm": 29.83131217956543, "learning_rate": 9.314814814814815e-06, "loss": 43.1632, "step": 3626 }, { "epoch": 86.35820895522389, "grad_norm": 24.83383560180664, "learning_rate": 9.312169312169313e-06, "loss": 43.5542, "step": 3627 }, { "epoch": 86.38208955223881, "grad_norm": 33.05693817138672, "learning_rate": 9.30952380952381e-06, "loss": 42.7432, "step": 3628 }, { "epoch": 86.40597014925373, "grad_norm": 24.293209075927734, "learning_rate": 9.306878306878308e-06, "loss": 42.9506, "step": 3629 }, { "epoch": 86.42985074626866, "grad_norm": 33.47346496582031, "learning_rate": 9.304232804232805e-06, "loss": 42.2723, "step": 3630 }, { "epoch": 86.45373134328358, "grad_norm": 28.47313117980957, "learning_rate": 9.301587301587303e-06, "loss": 43.7464, "step": 3631 }, { "epoch": 86.4776119402985, "grad_norm": 32.237510681152344, "learning_rate": 9.2989417989418e-06, "loss": 42.8704, "step": 3632 }, { "epoch": 86.50149253731344, "grad_norm": 25.842601776123047, "learning_rate": 9.296296296296296e-06, "loss": 41.6084, "step": 3633 }, { "epoch": 86.52537313432836, "grad_norm": 27.513307571411133, "learning_rate": 9.293650793650795e-06, "loss": 42.7922, "step": 3634 }, { "epoch": 86.54925373134328, "grad_norm": 25.676212310791016, "learning_rate": 9.291005291005291e-06, "loss": 42.0415, "step": 3635 }, { "epoch": 86.57313432835821, "grad_norm": 29.911081314086914, "learning_rate": 9.288359788359788e-06, "loss": 43.0526, "step": 3636 }, { "epoch": 86.59701492537313, "grad_norm": 21.788707733154297, "learning_rate": 9.285714285714288e-06, "loss": 42.2228, "step": 3637 }, { "epoch": 86.62089552238805, "grad_norm": 34.92530822753906, "learning_rate": 9.283068783068783e-06, "loss": 42.6756, "step": 3638 }, { "epoch": 86.64477611940299, "grad_norm": 31.41309928894043, "learning_rate": 9.280423280423281e-06, "loss": 43.3258, "step": 3639 }, { "epoch": 86.66865671641791, "grad_norm": 27.432342529296875, "learning_rate": 9.277777777777778e-06, "loss": 43.0612, "step": 3640 }, { "epoch": 86.69253731343284, "grad_norm": 25.92644691467285, "learning_rate": 9.275132275132276e-06, "loss": 41.7141, "step": 3641 }, { "epoch": 86.71641791044776, "grad_norm": 27.26793098449707, "learning_rate": 9.272486772486773e-06, "loss": 42.8127, "step": 3642 }, { "epoch": 86.74029850746268, "grad_norm": 22.45132827758789, "learning_rate": 9.26984126984127e-06, "loss": 43.4623, "step": 3643 }, { "epoch": 86.7641791044776, "grad_norm": 29.31770896911621, "learning_rate": 9.267195767195768e-06, "loss": 43.428, "step": 3644 }, { "epoch": 86.78805970149254, "grad_norm": 26.000110626220703, "learning_rate": 9.264550264550264e-06, "loss": 43.7773, "step": 3645 }, { "epoch": 86.81194029850747, "grad_norm": 30.51299476623535, "learning_rate": 9.261904761904763e-06, "loss": 43.2915, "step": 3646 }, { "epoch": 86.83582089552239, "grad_norm": 25.712812423706055, "learning_rate": 9.25925925925926e-06, "loss": 42.6203, "step": 3647 }, { "epoch": 86.85970149253731, "grad_norm": 32.85362243652344, "learning_rate": 9.256613756613757e-06, "loss": 42.1768, "step": 3648 }, { "epoch": 86.88358208955223, "grad_norm": 30.07919692993164, "learning_rate": 9.253968253968256e-06, "loss": 42.4139, "step": 3649 }, { "epoch": 86.90746268656716, "grad_norm": 31.38039779663086, "learning_rate": 9.251322751322752e-06, "loss": 42.23, "step": 3650 }, { "epoch": 86.9313432835821, "grad_norm": 26.601993560791016, "learning_rate": 9.248677248677249e-06, "loss": 42.2522, "step": 3651 }, { "epoch": 86.95522388059702, "grad_norm": 31.616823196411133, "learning_rate": 9.246031746031747e-06, "loss": 43.3183, "step": 3652 }, { "epoch": 86.97910447761194, "grad_norm": 25.606231689453125, "learning_rate": 9.243386243386244e-06, "loss": 42.7862, "step": 3653 }, { "epoch": 87.0, "grad_norm": 22.20980453491211, "learning_rate": 9.240740740740742e-06, "loss": 37.7077, "step": 3654 }, { "epoch": 87.02388059701492, "grad_norm": 24.519224166870117, "learning_rate": 9.238095238095239e-06, "loss": 42.4255, "step": 3655 }, { "epoch": 87.04776119402985, "grad_norm": 27.409582138061523, "learning_rate": 9.235449735449737e-06, "loss": 42.0198, "step": 3656 }, { "epoch": 87.07164179104478, "grad_norm": 20.307886123657227, "learning_rate": 9.232804232804234e-06, "loss": 41.6037, "step": 3657 }, { "epoch": 87.0955223880597, "grad_norm": 24.046375274658203, "learning_rate": 9.230158730158732e-06, "loss": 43.9297, "step": 3658 }, { "epoch": 87.11940298507463, "grad_norm": 23.58251953125, "learning_rate": 9.227513227513229e-06, "loss": 43.4211, "step": 3659 }, { "epoch": 87.14328358208955, "grad_norm": 20.67659568786621, "learning_rate": 9.224867724867725e-06, "loss": 42.7205, "step": 3660 }, { "epoch": 87.16716417910447, "grad_norm": 18.82547950744629, "learning_rate": 9.222222222222224e-06, "loss": 42.8921, "step": 3661 }, { "epoch": 87.1910447761194, "grad_norm": 21.20027732849121, "learning_rate": 9.21957671957672e-06, "loss": 41.0809, "step": 3662 }, { "epoch": 87.21492537313434, "grad_norm": 20.002410888671875, "learning_rate": 9.216931216931217e-06, "loss": 42.0559, "step": 3663 }, { "epoch": 87.23880597014926, "grad_norm": 16.792434692382812, "learning_rate": 9.214285714285715e-06, "loss": 40.659, "step": 3664 }, { "epoch": 87.26268656716418, "grad_norm": 20.209190368652344, "learning_rate": 9.211640211640212e-06, "loss": 42.387, "step": 3665 }, { "epoch": 87.2865671641791, "grad_norm": 17.87749481201172, "learning_rate": 9.20899470899471e-06, "loss": 41.6863, "step": 3666 }, { "epoch": 87.31044776119403, "grad_norm": 16.422809600830078, "learning_rate": 9.206349206349207e-06, "loss": 43.5165, "step": 3667 }, { "epoch": 87.33432835820895, "grad_norm": 17.762025833129883, "learning_rate": 9.203703703703705e-06, "loss": 41.3489, "step": 3668 }, { "epoch": 87.35820895522389, "grad_norm": 18.185434341430664, "learning_rate": 9.201058201058202e-06, "loss": 42.9896, "step": 3669 }, { "epoch": 87.38208955223881, "grad_norm": 15.573823928833008, "learning_rate": 9.198412698412698e-06, "loss": 42.5428, "step": 3670 }, { "epoch": 87.40597014925373, "grad_norm": 21.007041931152344, "learning_rate": 9.195767195767197e-06, "loss": 41.6825, "step": 3671 }, { "epoch": 87.42985074626866, "grad_norm": 21.610292434692383, "learning_rate": 9.193121693121693e-06, "loss": 42.8643, "step": 3672 }, { "epoch": 87.45373134328358, "grad_norm": 16.124156951904297, "learning_rate": 9.19047619047619e-06, "loss": 42.5377, "step": 3673 }, { "epoch": 87.4776119402985, "grad_norm": 22.14504051208496, "learning_rate": 9.187830687830688e-06, "loss": 42.878, "step": 3674 }, { "epoch": 87.50149253731344, "grad_norm": 17.33942222595215, "learning_rate": 9.185185185185186e-06, "loss": 44.3817, "step": 3675 }, { "epoch": 87.52537313432836, "grad_norm": 21.361644744873047, "learning_rate": 9.182539682539683e-06, "loss": 42.913, "step": 3676 }, { "epoch": 87.54925373134328, "grad_norm": 18.6135196685791, "learning_rate": 9.17989417989418e-06, "loss": 42.8328, "step": 3677 }, { "epoch": 87.57313432835821, "grad_norm": 23.618101119995117, "learning_rate": 9.177248677248678e-06, "loss": 42.4581, "step": 3678 }, { "epoch": 87.59701492537313, "grad_norm": 18.788637161254883, "learning_rate": 9.174603174603176e-06, "loss": 43.5344, "step": 3679 }, { "epoch": 87.62089552238805, "grad_norm": 17.69763946533203, "learning_rate": 9.171957671957673e-06, "loss": 42.8437, "step": 3680 }, { "epoch": 87.64477611940299, "grad_norm": 19.06989097595215, "learning_rate": 9.16931216931217e-06, "loss": 42.3788, "step": 3681 }, { "epoch": 87.66865671641791, "grad_norm": 18.462968826293945, "learning_rate": 9.166666666666666e-06, "loss": 42.759, "step": 3682 }, { "epoch": 87.69253731343284, "grad_norm": 21.524621963500977, "learning_rate": 9.164021164021166e-06, "loss": 43.1027, "step": 3683 }, { "epoch": 87.71641791044776, "grad_norm": 18.747453689575195, "learning_rate": 9.161375661375661e-06, "loss": 43.0803, "step": 3684 }, { "epoch": 87.74029850746268, "grad_norm": 21.170255661010742, "learning_rate": 9.158730158730159e-06, "loss": 42.641, "step": 3685 }, { "epoch": 87.7641791044776, "grad_norm": 19.89739990234375, "learning_rate": 9.156084656084656e-06, "loss": 42.5469, "step": 3686 }, { "epoch": 87.78805970149254, "grad_norm": 22.9807071685791, "learning_rate": 9.153439153439154e-06, "loss": 42.5137, "step": 3687 }, { "epoch": 87.81194029850747, "grad_norm": 19.036230087280273, "learning_rate": 9.150793650793651e-06, "loss": 42.8328, "step": 3688 }, { "epoch": 87.83582089552239, "grad_norm": 23.97933006286621, "learning_rate": 9.148148148148149e-06, "loss": 42.9784, "step": 3689 }, { "epoch": 87.85970149253731, "grad_norm": 18.06254768371582, "learning_rate": 9.145502645502646e-06, "loss": 41.7068, "step": 3690 }, { "epoch": 87.88358208955223, "grad_norm": 19.88326072692871, "learning_rate": 9.142857142857144e-06, "loss": 43.8, "step": 3691 }, { "epoch": 87.90746268656716, "grad_norm": 20.145050048828125, "learning_rate": 9.140211640211641e-06, "loss": 43.2459, "step": 3692 }, { "epoch": 87.9313432835821, "grad_norm": 16.824399948120117, "learning_rate": 9.137566137566139e-06, "loss": 42.4406, "step": 3693 }, { "epoch": 87.95522388059702, "grad_norm": 20.99275779724121, "learning_rate": 9.134920634920635e-06, "loss": 42.2506, "step": 3694 }, { "epoch": 87.97910447761194, "grad_norm": 23.64455223083496, "learning_rate": 9.132275132275134e-06, "loss": 43.1451, "step": 3695 }, { "epoch": 88.0, "grad_norm": 17.736629486083984, "learning_rate": 9.12962962962963e-06, "loss": 36.9082, "step": 3696 }, { "epoch": 88.02388059701492, "grad_norm": 20.58110809326172, "learning_rate": 9.126984126984127e-06, "loss": 41.6838, "step": 3697 }, { "epoch": 88.04776119402985, "grad_norm": 21.2742977142334, "learning_rate": 9.124338624338626e-06, "loss": 43.8259, "step": 3698 }, { "epoch": 88.07164179104478, "grad_norm": 18.40839958190918, "learning_rate": 9.121693121693122e-06, "loss": 41.6561, "step": 3699 }, { "epoch": 88.0955223880597, "grad_norm": 25.24982261657715, "learning_rate": 9.11904761904762e-06, "loss": 43.4407, "step": 3700 }, { "epoch": 88.11940298507463, "grad_norm": 16.522397994995117, "learning_rate": 9.116402116402117e-06, "loss": 42.3175, "step": 3701 }, { "epoch": 88.14328358208955, "grad_norm": 23.80354881286621, "learning_rate": 9.113756613756615e-06, "loss": 41.8656, "step": 3702 }, { "epoch": 88.16716417910447, "grad_norm": 17.915058135986328, "learning_rate": 9.111111111111112e-06, "loss": 43.4793, "step": 3703 }, { "epoch": 88.1910447761194, "grad_norm": 24.271337509155273, "learning_rate": 9.108465608465608e-06, "loss": 42.3917, "step": 3704 }, { "epoch": 88.21492537313434, "grad_norm": 21.696147918701172, "learning_rate": 9.105820105820107e-06, "loss": 42.3141, "step": 3705 }, { "epoch": 88.23880597014926, "grad_norm": 23.576507568359375, "learning_rate": 9.103174603174603e-06, "loss": 42.9454, "step": 3706 }, { "epoch": 88.26268656716418, "grad_norm": 25.030128479003906, "learning_rate": 9.1005291005291e-06, "loss": 42.8441, "step": 3707 }, { "epoch": 88.2865671641791, "grad_norm": 21.148405075073242, "learning_rate": 9.0978835978836e-06, "loss": 43.0314, "step": 3708 }, { "epoch": 88.31044776119403, "grad_norm": 25.8000431060791, "learning_rate": 9.095238095238095e-06, "loss": 42.5864, "step": 3709 }, { "epoch": 88.33432835820895, "grad_norm": 15.713743209838867, "learning_rate": 9.092592592592593e-06, "loss": 42.8121, "step": 3710 }, { "epoch": 88.35820895522389, "grad_norm": 23.208627700805664, "learning_rate": 9.08994708994709e-06, "loss": 42.9846, "step": 3711 }, { "epoch": 88.38208955223881, "grad_norm": 17.478639602661133, "learning_rate": 9.087301587301588e-06, "loss": 42.2004, "step": 3712 }, { "epoch": 88.40597014925373, "grad_norm": 21.487903594970703, "learning_rate": 9.084656084656085e-06, "loss": 41.7275, "step": 3713 }, { "epoch": 88.42985074626866, "grad_norm": 27.780941009521484, "learning_rate": 9.082010582010583e-06, "loss": 42.1269, "step": 3714 }, { "epoch": 88.45373134328358, "grad_norm": 14.19015884399414, "learning_rate": 9.07936507936508e-06, "loss": 43.512, "step": 3715 }, { "epoch": 88.4776119402985, "grad_norm": 27.63198471069336, "learning_rate": 9.076719576719576e-06, "loss": 42.4196, "step": 3716 }, { "epoch": 88.50149253731344, "grad_norm": 21.5277099609375, "learning_rate": 9.074074074074075e-06, "loss": 41.8393, "step": 3717 }, { "epoch": 88.52537313432836, "grad_norm": 20.19924545288086, "learning_rate": 9.071428571428573e-06, "loss": 41.6486, "step": 3718 }, { "epoch": 88.54925373134328, "grad_norm": 22.75286865234375, "learning_rate": 9.068783068783069e-06, "loss": 43.3116, "step": 3719 }, { "epoch": 88.57313432835821, "grad_norm": 16.763381958007812, "learning_rate": 9.066137566137568e-06, "loss": 43.0704, "step": 3720 }, { "epoch": 88.59701492537313, "grad_norm": 23.842023849487305, "learning_rate": 9.063492063492064e-06, "loss": 43.7468, "step": 3721 }, { "epoch": 88.62089552238805, "grad_norm": 20.88597297668457, "learning_rate": 9.060846560846561e-06, "loss": 42.2398, "step": 3722 }, { "epoch": 88.64477611940299, "grad_norm": 19.333271026611328, "learning_rate": 9.058201058201059e-06, "loss": 41.7667, "step": 3723 }, { "epoch": 88.66865671641791, "grad_norm": 22.313888549804688, "learning_rate": 9.055555555555556e-06, "loss": 42.3198, "step": 3724 }, { "epoch": 88.69253731343284, "grad_norm": 20.26089096069336, "learning_rate": 9.052910052910054e-06, "loss": 42.9191, "step": 3725 }, { "epoch": 88.71641791044776, "grad_norm": 17.900373458862305, "learning_rate": 9.050264550264551e-06, "loss": 41.7498, "step": 3726 }, { "epoch": 88.74029850746268, "grad_norm": 22.735700607299805, "learning_rate": 9.047619047619049e-06, "loss": 41.4744, "step": 3727 }, { "epoch": 88.7641791044776, "grad_norm": 22.933048248291016, "learning_rate": 9.044973544973546e-06, "loss": 43.4595, "step": 3728 }, { "epoch": 88.78805970149254, "grad_norm": 15.648778915405273, "learning_rate": 9.042328042328044e-06, "loss": 43.4811, "step": 3729 }, { "epoch": 88.81194029850747, "grad_norm": 35.44391632080078, "learning_rate": 9.039682539682541e-06, "loss": 42.0879, "step": 3730 }, { "epoch": 88.83582089552239, "grad_norm": 26.575231552124023, "learning_rate": 9.037037037037037e-06, "loss": 41.6883, "step": 3731 }, { "epoch": 88.85970149253731, "grad_norm": 33.38102340698242, "learning_rate": 9.034391534391536e-06, "loss": 43.2903, "step": 3732 }, { "epoch": 88.88358208955223, "grad_norm": 26.297910690307617, "learning_rate": 9.031746031746032e-06, "loss": 42.744, "step": 3733 }, { "epoch": 88.90746268656716, "grad_norm": 25.057889938354492, "learning_rate": 9.02910052910053e-06, "loss": 42.076, "step": 3734 }, { "epoch": 88.9313432835821, "grad_norm": 21.162078857421875, "learning_rate": 9.026455026455027e-06, "loss": 42.397, "step": 3735 }, { "epoch": 88.95522388059702, "grad_norm": 21.846647262573242, "learning_rate": 9.023809523809524e-06, "loss": 42.7379, "step": 3736 }, { "epoch": 88.97910447761194, "grad_norm": 19.74768829345703, "learning_rate": 9.021164021164022e-06, "loss": 42.0906, "step": 3737 }, { "epoch": 89.0, "grad_norm": 18.839765548706055, "learning_rate": 9.01851851851852e-06, "loss": 37.881, "step": 3738 }, { "epoch": 89.02388059701492, "grad_norm": 22.15633201599121, "learning_rate": 9.015873015873017e-06, "loss": 42.5544, "step": 3739 }, { "epoch": 89.04776119402985, "grad_norm": 18.709840774536133, "learning_rate": 9.013227513227514e-06, "loss": 43.2949, "step": 3740 }, { "epoch": 89.07164179104478, "grad_norm": 22.922399520874023, "learning_rate": 9.010582010582012e-06, "loss": 41.9215, "step": 3741 }, { "epoch": 89.0955223880597, "grad_norm": 18.445695877075195, "learning_rate": 9.00793650793651e-06, "loss": 42.994, "step": 3742 }, { "epoch": 89.11940298507463, "grad_norm": 22.694503784179688, "learning_rate": 9.005291005291005e-06, "loss": 42.4024, "step": 3743 }, { "epoch": 89.14328358208955, "grad_norm": 23.259532928466797, "learning_rate": 9.002645502645503e-06, "loss": 41.9366, "step": 3744 }, { "epoch": 89.16716417910447, "grad_norm": 24.131465911865234, "learning_rate": 9e-06, "loss": 42.9172, "step": 3745 }, { "epoch": 89.1910447761194, "grad_norm": 21.01772117614746, "learning_rate": 8.997354497354498e-06, "loss": 42.0505, "step": 3746 }, { "epoch": 89.21492537313434, "grad_norm": 20.675086975097656, "learning_rate": 8.994708994708995e-06, "loss": 42.7076, "step": 3747 }, { "epoch": 89.23880597014926, "grad_norm": 22.289649963378906, "learning_rate": 8.992063492063493e-06, "loss": 42.4533, "step": 3748 }, { "epoch": 89.26268656716418, "grad_norm": 22.76655387878418, "learning_rate": 8.98941798941799e-06, "loss": 42.0269, "step": 3749 }, { "epoch": 89.2865671641791, "grad_norm": 19.732887268066406, "learning_rate": 8.986772486772488e-06, "loss": 44.2783, "step": 3750 }, { "epoch": 89.31044776119403, "grad_norm": 22.45815658569336, "learning_rate": 8.984126984126985e-06, "loss": 40.1901, "step": 3751 }, { "epoch": 89.33432835820895, "grad_norm": 24.511625289916992, "learning_rate": 8.981481481481483e-06, "loss": 43.0842, "step": 3752 }, { "epoch": 89.35820895522389, "grad_norm": 19.739845275878906, "learning_rate": 8.978835978835979e-06, "loss": 43.7219, "step": 3753 }, { "epoch": 89.38208955223881, "grad_norm": 26.18813133239746, "learning_rate": 8.976190476190478e-06, "loss": 43.5427, "step": 3754 }, { "epoch": 89.40597014925373, "grad_norm": 21.95644760131836, "learning_rate": 8.973544973544973e-06, "loss": 42.9161, "step": 3755 }, { "epoch": 89.42985074626866, "grad_norm": 22.270849227905273, "learning_rate": 8.970899470899471e-06, "loss": 42.6121, "step": 3756 }, { "epoch": 89.45373134328358, "grad_norm": 18.48128318786621, "learning_rate": 8.968253968253968e-06, "loss": 42.044, "step": 3757 }, { "epoch": 89.4776119402985, "grad_norm": 22.865985870361328, "learning_rate": 8.965608465608466e-06, "loss": 42.1096, "step": 3758 }, { "epoch": 89.50149253731344, "grad_norm": 19.26102066040039, "learning_rate": 8.962962962962963e-06, "loss": 42.5147, "step": 3759 }, { "epoch": 89.52537313432836, "grad_norm": 27.352407455444336, "learning_rate": 8.960317460317461e-06, "loss": 41.7614, "step": 3760 }, { "epoch": 89.54925373134328, "grad_norm": 21.059770584106445, "learning_rate": 8.957671957671958e-06, "loss": 41.5053, "step": 3761 }, { "epoch": 89.57313432835821, "grad_norm": 23.909198760986328, "learning_rate": 8.955026455026456e-06, "loss": 43.4126, "step": 3762 }, { "epoch": 89.59701492537313, "grad_norm": 28.529970169067383, "learning_rate": 8.952380952380953e-06, "loss": 43.489, "step": 3763 }, { "epoch": 89.62089552238805, "grad_norm": 22.008472442626953, "learning_rate": 8.949735449735451e-06, "loss": 42.5781, "step": 3764 }, { "epoch": 89.64477611940299, "grad_norm": NaN, "learning_rate": 8.947089947089947e-06, "loss": 37.0211, "step": 3765 }, { "epoch": 89.66865671641791, "grad_norm": 29.881391525268555, "learning_rate": 8.947089947089947e-06, "loss": 42.6102, "step": 3766 }, { "epoch": 89.69253731343284, "grad_norm": 24.919992446899414, "learning_rate": 8.944444444444446e-06, "loss": 42.7878, "step": 3767 }, { "epoch": 89.71641791044776, "grad_norm": 29.473249435424805, "learning_rate": 8.941798941798942e-06, "loss": 41.9105, "step": 3768 }, { "epoch": 89.74029850746268, "grad_norm": 20.71428871154785, "learning_rate": 8.93915343915344e-06, "loss": 42.0715, "step": 3769 }, { "epoch": 89.7641791044776, "grad_norm": 29.31629180908203, "learning_rate": 8.936507936507938e-06, "loss": 41.3888, "step": 3770 }, { "epoch": 89.78805970149254, "grad_norm": 22.29326057434082, "learning_rate": 8.933862433862434e-06, "loss": 43.0029, "step": 3771 }, { "epoch": 89.81194029850747, "grad_norm": NaN, "learning_rate": 8.931216931216932e-06, "loss": 49.4483, "step": 3772 }, { "epoch": 89.83582089552239, "grad_norm": 23.31702423095703, "learning_rate": 8.931216931216932e-06, "loss": 42.8926, "step": 3773 }, { "epoch": 89.85970149253731, "grad_norm": 26.894012451171875, "learning_rate": 8.92857142857143e-06, "loss": 41.3476, "step": 3774 }, { "epoch": 89.88358208955223, "grad_norm": 19.226701736450195, "learning_rate": 8.925925925925927e-06, "loss": 42.9396, "step": 3775 }, { "epoch": 89.90746268656716, "grad_norm": 26.918243408203125, "learning_rate": 8.923280423280424e-06, "loss": 41.7109, "step": 3776 }, { "epoch": 89.9313432835821, "grad_norm": 22.435697555541992, "learning_rate": 8.920634920634922e-06, "loss": 42.5026, "step": 3777 }, { "epoch": 89.95522388059702, "grad_norm": 19.455547332763672, "learning_rate": 8.91798941798942e-06, "loss": 42.4964, "step": 3778 }, { "epoch": 89.97910447761194, "grad_norm": 24.792171478271484, "learning_rate": 8.915343915343915e-06, "loss": 41.6366, "step": 3779 }, { "epoch": 90.0, "grad_norm": 14.4516019821167, "learning_rate": 8.912698412698414e-06, "loss": 36.7873, "step": 3780 }, { "epoch": 90.0, "step": 3780, "total_flos": 1.857999472723437e+17, "train_loss": 4.747417533713043, "train_runtime": 12850.2933, "train_samples_per_second": 37.484, "train_steps_per_second": 0.294 }, { "epoch": 90.02388059701492, "grad_norm": 26.03937339782715, "learning_rate": 1e-05, "loss": 42.4337, "step": 3781 }, { "epoch": 90.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 51.1491, "step": 3782 }, { "epoch": 90.07164179104478, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 53.2051, "step": 3783 }, { "epoch": 90.0955223880597, "grad_norm": 446.1357421875, "learning_rate": 9.997835497835499e-06, "loss": 51.5745, "step": 3784 }, { "epoch": 90.11940298507463, "grad_norm": 229.35903930664062, "learning_rate": 9.995670995670996e-06, "loss": 49.5899, "step": 3785 }, { "epoch": 90.14328358208955, "grad_norm": 109.18777465820312, "learning_rate": 9.993506493506494e-06, "loss": 45.889, "step": 3786 }, { "epoch": 90.16716417910447, "grad_norm": 79.82958221435547, "learning_rate": 9.991341991341992e-06, "loss": 44.3638, "step": 3787 }, { "epoch": 90.1910447761194, "grad_norm": 69.46668243408203, "learning_rate": 9.98917748917749e-06, "loss": 43.6641, "step": 3788 }, { "epoch": 90.21492537313434, "grad_norm": 56.4055290222168, "learning_rate": 9.987012987012988e-06, "loss": 45.0336, "step": 3789 }, { "epoch": 90.23880597014926, "grad_norm": 53.48906326293945, "learning_rate": 9.984848484848485e-06, "loss": 42.9807, "step": 3790 }, { "epoch": 90.26268656716418, "grad_norm": 38.25556564331055, "learning_rate": 9.982683982683983e-06, "loss": 44.1306, "step": 3791 }, { "epoch": 90.2865671641791, "grad_norm": 41.42750549316406, "learning_rate": 9.980519480519481e-06, "loss": 42.1205, "step": 3792 }, { "epoch": 90.31044776119403, "grad_norm": 34.52850341796875, "learning_rate": 9.978354978354979e-06, "loss": 43.3744, "step": 3793 }, { "epoch": 90.33432835820895, "grad_norm": 28.61484146118164, "learning_rate": 9.976190476190477e-06, "loss": 43.487, "step": 3794 }, { "epoch": 90.35820895522389, "grad_norm": 27.961273193359375, "learning_rate": 9.974025974025974e-06, "loss": 43.9663, "step": 3795 }, { "epoch": 90.38208955223881, "grad_norm": 27.92458152770996, "learning_rate": 9.971861471861472e-06, "loss": 43.2716, "step": 3796 }, { "epoch": 90.40597014925373, "grad_norm": 21.93165397644043, "learning_rate": 9.96969696969697e-06, "loss": 43.3704, "step": 3797 }, { "epoch": 90.42985074626866, "grad_norm": 27.053754806518555, "learning_rate": 9.967532467532468e-06, "loss": 42.7038, "step": 3798 }, { "epoch": 90.45373134328358, "grad_norm": 31.030607223510742, "learning_rate": 9.965367965367966e-06, "loss": 43.1343, "step": 3799 }, { "epoch": 90.4776119402985, "grad_norm": 24.048316955566406, "learning_rate": 9.963203463203463e-06, "loss": 42.1113, "step": 3800 }, { "epoch": 90.50149253731344, "grad_norm": 17.98249053955078, "learning_rate": 9.961038961038963e-06, "loss": 42.6117, "step": 3801 }, { "epoch": 90.52537313432836, "grad_norm": 20.080669403076172, "learning_rate": 9.95887445887446e-06, "loss": 42.4281, "step": 3802 }, { "epoch": 90.54925373134328, "grad_norm": 19.842525482177734, "learning_rate": 9.956709956709958e-06, "loss": 40.8022, "step": 3803 }, { "epoch": 90.57313432835821, "grad_norm": 20.453306198120117, "learning_rate": 9.954545454545456e-06, "loss": 42.8288, "step": 3804 }, { "epoch": 90.59701492537313, "grad_norm": 19.955123901367188, "learning_rate": 9.952380952380954e-06, "loss": 40.2546, "step": 3805 }, { "epoch": 90.62089552238805, "grad_norm": 17.246713638305664, "learning_rate": 9.950216450216452e-06, "loss": 42.0433, "step": 3806 }, { "epoch": 90.64477611940299, "grad_norm": 20.76253890991211, "learning_rate": 9.94805194805195e-06, "loss": 42.7741, "step": 3807 }, { "epoch": 90.66865671641791, "grad_norm": 21.001201629638672, "learning_rate": 9.945887445887446e-06, "loss": 43.6741, "step": 3808 }, { "epoch": 90.69253731343284, "grad_norm": 20.765684127807617, "learning_rate": 9.943722943722944e-06, "loss": 41.8182, "step": 3809 }, { "epoch": 90.71641791044776, "grad_norm": 16.794981002807617, "learning_rate": 9.941558441558441e-06, "loss": 42.6478, "step": 3810 }, { "epoch": 90.74029850746268, "grad_norm": 23.377695083618164, "learning_rate": 9.939393939393939e-06, "loss": 42.0878, "step": 3811 }, { "epoch": 90.7641791044776, "grad_norm": 23.543071746826172, "learning_rate": 9.937229437229437e-06, "loss": 42.4977, "step": 3812 }, { "epoch": 90.78805970149254, "grad_norm": 18.546525955200195, "learning_rate": 9.935064935064936e-06, "loss": 42.4457, "step": 3813 }, { "epoch": 90.81194029850747, "grad_norm": 25.244186401367188, "learning_rate": 9.932900432900434e-06, "loss": 42.4906, "step": 3814 }, { "epoch": 90.83582089552239, "grad_norm": 21.267963409423828, "learning_rate": 9.930735930735932e-06, "loss": 41.7433, "step": 3815 }, { "epoch": 90.85970149253731, "grad_norm": 19.291160583496094, "learning_rate": 9.92857142857143e-06, "loss": 41.7054, "step": 3816 }, { "epoch": 90.88358208955223, "grad_norm": 21.301227569580078, "learning_rate": 9.926406926406928e-06, "loss": 42.5566, "step": 3817 }, { "epoch": 90.90746268656716, "grad_norm": 19.511821746826172, "learning_rate": 9.924242424242425e-06, "loss": 41.5064, "step": 3818 }, { "epoch": 90.9313432835821, "grad_norm": 18.419504165649414, "learning_rate": 9.922077922077923e-06, "loss": 41.4675, "step": 3819 }, { "epoch": 90.95522388059702, "grad_norm": 19.577409744262695, "learning_rate": 9.919913419913421e-06, "loss": 43.4705, "step": 3820 }, { "epoch": 90.97910447761194, "grad_norm": 23.015262603759766, "learning_rate": 9.917748917748919e-06, "loss": 42.0356, "step": 3821 }, { "epoch": 91.0, "grad_norm": 17.785385131835938, "learning_rate": 9.915584415584417e-06, "loss": 37.6509, "step": 3822 }, { "epoch": 91.02388059701492, "grad_norm": 16.111051559448242, "learning_rate": 9.913419913419914e-06, "loss": 41.7977, "step": 3823 }, { "epoch": 91.04776119402985, "grad_norm": 22.09601593017578, "learning_rate": 9.911255411255412e-06, "loss": 42.5569, "step": 3824 }, { "epoch": 91.07164179104478, "grad_norm": 18.80573081970215, "learning_rate": 9.90909090909091e-06, "loss": 41.773, "step": 3825 }, { "epoch": 91.0955223880597, "grad_norm": 14.442939758300781, "learning_rate": 9.906926406926408e-06, "loss": 42.0426, "step": 3826 }, { "epoch": 91.11940298507463, "grad_norm": 21.839468002319336, "learning_rate": 9.904761904761906e-06, "loss": 41.9993, "step": 3827 }, { "epoch": 91.14328358208955, "grad_norm": 17.792217254638672, "learning_rate": 9.902597402597403e-06, "loss": 42.1515, "step": 3828 }, { "epoch": 91.16716417910447, "grad_norm": 15.722336769104004, "learning_rate": 9.900432900432901e-06, "loss": 42.2694, "step": 3829 }, { "epoch": 91.1910447761194, "grad_norm": 20.94297218322754, "learning_rate": 9.898268398268399e-06, "loss": 42.7043, "step": 3830 }, { "epoch": 91.21492537313434, "grad_norm": 16.2196044921875, "learning_rate": 9.896103896103897e-06, "loss": 42.4405, "step": 3831 }, { "epoch": 91.23880597014926, "grad_norm": 20.381193161010742, "learning_rate": 9.893939393939395e-06, "loss": 43.424, "step": 3832 }, { "epoch": 91.26268656716418, "grad_norm": 14.948447227478027, "learning_rate": 9.891774891774892e-06, "loss": 42.7289, "step": 3833 }, { "epoch": 91.2865671641791, "grad_norm": 17.548126220703125, "learning_rate": 9.88961038961039e-06, "loss": 41.9656, "step": 3834 }, { "epoch": 91.31044776119403, "grad_norm": 20.301937103271484, "learning_rate": 9.887445887445888e-06, "loss": 42.9516, "step": 3835 }, { "epoch": 91.33432835820895, "grad_norm": 18.3472900390625, "learning_rate": 9.885281385281386e-06, "loss": 42.281, "step": 3836 }, { "epoch": 91.35820895522389, "grad_norm": 15.503434181213379, "learning_rate": 9.883116883116885e-06, "loss": 42.502, "step": 3837 }, { "epoch": 91.38208955223881, "grad_norm": 21.448226928710938, "learning_rate": 9.880952380952381e-06, "loss": 43.0384, "step": 3838 }, { "epoch": 91.40597014925373, "grad_norm": 16.685815811157227, "learning_rate": 9.87878787878788e-06, "loss": 41.798, "step": 3839 }, { "epoch": 91.42985074626866, "grad_norm": 18.722484588623047, "learning_rate": 9.876623376623377e-06, "loss": 43.4082, "step": 3840 }, { "epoch": 91.45373134328358, "grad_norm": 19.54647445678711, "learning_rate": 9.874458874458875e-06, "loss": 42.2679, "step": 3841 }, { "epoch": 91.4776119402985, "grad_norm": 18.793495178222656, "learning_rate": 9.872294372294373e-06, "loss": 42.2962, "step": 3842 }, { "epoch": 91.50149253731344, "grad_norm": 16.687400817871094, "learning_rate": 9.87012987012987e-06, "loss": 44.2949, "step": 3843 }, { "epoch": 91.52537313432836, "grad_norm": 16.13211441040039, "learning_rate": 9.867965367965368e-06, "loss": 42.602, "step": 3844 }, { "epoch": 91.54925373134328, "grad_norm": 16.72748565673828, "learning_rate": 9.865800865800866e-06, "loss": 42.3636, "step": 3845 }, { "epoch": 91.57313432835821, "grad_norm": 22.206905364990234, "learning_rate": 9.863636363636364e-06, "loss": 43.1925, "step": 3846 }, { "epoch": 91.59701492537313, "grad_norm": 19.21588134765625, "learning_rate": 9.861471861471862e-06, "loss": 43.1342, "step": 3847 }, { "epoch": 91.62089552238805, "grad_norm": 19.708059310913086, "learning_rate": 9.85930735930736e-06, "loss": 42.7964, "step": 3848 }, { "epoch": 91.64477611940299, "grad_norm": 22.789594650268555, "learning_rate": 9.857142857142859e-06, "loss": 42.7767, "step": 3849 }, { "epoch": 91.66865671641791, "grad_norm": 17.048229217529297, "learning_rate": 9.854978354978357e-06, "loss": 42.6642, "step": 3850 }, { "epoch": 91.69253731343284, "grad_norm": 21.39427375793457, "learning_rate": 9.852813852813854e-06, "loss": 42.8962, "step": 3851 }, { "epoch": 91.71641791044776, "grad_norm": 25.67850112915039, "learning_rate": 9.850649350649352e-06, "loss": 42.4072, "step": 3852 }, { "epoch": 91.74029850746268, "grad_norm": 20.17367935180664, "learning_rate": 9.84848484848485e-06, "loss": 42.3302, "step": 3853 }, { "epoch": 91.7641791044776, "grad_norm": 16.018030166625977, "learning_rate": 9.846320346320348e-06, "loss": 42.6877, "step": 3854 }, { "epoch": 91.78805970149254, "grad_norm": 18.5965576171875, "learning_rate": 9.844155844155846e-06, "loss": 41.4104, "step": 3855 }, { "epoch": 91.81194029850747, "grad_norm": 17.651378631591797, "learning_rate": 9.841991341991343e-06, "loss": 42.1591, "step": 3856 }, { "epoch": 91.83582089552239, "grad_norm": 15.912792205810547, "learning_rate": 9.839826839826841e-06, "loss": 41.0675, "step": 3857 }, { "epoch": 91.85970149253731, "grad_norm": 20.338071823120117, "learning_rate": 9.837662337662337e-06, "loss": 43.0971, "step": 3858 }, { "epoch": 91.88358208955223, "grad_norm": 19.422807693481445, "learning_rate": 9.835497835497835e-06, "loss": 41.022, "step": 3859 }, { "epoch": 91.90746268656716, "grad_norm": 18.216012954711914, "learning_rate": 9.833333333333333e-06, "loss": 42.0068, "step": 3860 }, { "epoch": 91.9313432835821, "grad_norm": 17.68181610107422, "learning_rate": 9.831168831168832e-06, "loss": 42.778, "step": 3861 }, { "epoch": 91.95522388059702, "grad_norm": 20.660480499267578, "learning_rate": 9.82900432900433e-06, "loss": 42.8923, "step": 3862 }, { "epoch": 91.97910447761194, "grad_norm": 22.78632926940918, "learning_rate": 9.826839826839828e-06, "loss": 41.5412, "step": 3863 }, { "epoch": 92.0, "grad_norm": 17.660106658935547, "learning_rate": 9.824675324675326e-06, "loss": 36.8816, "step": 3864 }, { "epoch": 92.02388059701492, "grad_norm": 19.257198333740234, "learning_rate": 9.822510822510824e-06, "loss": 41.3789, "step": 3865 }, { "epoch": 92.04776119402985, "grad_norm": 17.690038681030273, "learning_rate": 9.820346320346321e-06, "loss": 41.8596, "step": 3866 }, { "epoch": 92.07164179104478, "grad_norm": 25.88194465637207, "learning_rate": 9.81818181818182e-06, "loss": 42.1967, "step": 3867 }, { "epoch": 92.0955223880597, "grad_norm": 18.971637725830078, "learning_rate": 9.816017316017317e-06, "loss": 41.5025, "step": 3868 }, { "epoch": 92.11940298507463, "grad_norm": 18.14025115966797, "learning_rate": 9.813852813852815e-06, "loss": 42.7121, "step": 3869 }, { "epoch": 92.14328358208955, "grad_norm": 24.20391845703125, "learning_rate": 9.811688311688313e-06, "loss": 42.9952, "step": 3870 }, { "epoch": 92.16716417910447, "grad_norm": 18.484018325805664, "learning_rate": 9.80952380952381e-06, "loss": 44.174, "step": 3871 }, { "epoch": 92.1910447761194, "grad_norm": 24.238615036010742, "learning_rate": 9.807359307359308e-06, "loss": 42.933, "step": 3872 }, { "epoch": 92.21492537313434, "grad_norm": 21.95537757873535, "learning_rate": 9.805194805194806e-06, "loss": 42.5797, "step": 3873 }, { "epoch": 92.23880597014926, "grad_norm": 16.300167083740234, "learning_rate": 9.803030303030304e-06, "loss": 41.8871, "step": 3874 }, { "epoch": 92.26268656716418, "grad_norm": 31.398351669311523, "learning_rate": 9.800865800865802e-06, "loss": 42.8308, "step": 3875 }, { "epoch": 92.2865671641791, "grad_norm": 21.76424789428711, "learning_rate": 9.7987012987013e-06, "loss": 42.1119, "step": 3876 }, { "epoch": 92.31044776119403, "grad_norm": 26.037975311279297, "learning_rate": 9.796536796536797e-06, "loss": 42.0092, "step": 3877 }, { "epoch": 92.33432835820895, "grad_norm": 26.393800735473633, "learning_rate": 9.794372294372295e-06, "loss": 43.9124, "step": 3878 }, { "epoch": 92.35820895522389, "grad_norm": 21.763713836669922, "learning_rate": 9.792207792207793e-06, "loss": 42.6169, "step": 3879 }, { "epoch": 92.38208955223881, "grad_norm": 28.867443084716797, "learning_rate": 9.79004329004329e-06, "loss": 43.093, "step": 3880 }, { "epoch": 92.40597014925373, "grad_norm": 20.59787940979004, "learning_rate": 9.787878787878788e-06, "loss": 43.4976, "step": 3881 }, { "epoch": 92.42985074626866, "grad_norm": 32.58126449584961, "learning_rate": 9.785714285714286e-06, "loss": 42.2799, "step": 3882 }, { "epoch": 92.45373134328358, "grad_norm": 18.00343132019043, "learning_rate": 9.783549783549784e-06, "loss": 42.9497, "step": 3883 }, { "epoch": 92.4776119402985, "grad_norm": 31.740930557250977, "learning_rate": 9.781385281385282e-06, "loss": 42.7341, "step": 3884 }, { "epoch": 92.50149253731344, "grad_norm": 24.078405380249023, "learning_rate": 9.779220779220781e-06, "loss": 43.1077, "step": 3885 }, { "epoch": 92.52537313432836, "grad_norm": 21.194313049316406, "learning_rate": 9.777056277056279e-06, "loss": 41.9059, "step": 3886 }, { "epoch": 92.54925373134328, "grad_norm": 30.298595428466797, "learning_rate": 9.774891774891775e-06, "loss": 41.5753, "step": 3887 }, { "epoch": 92.57313432835821, "grad_norm": 21.55902099609375, "learning_rate": 9.772727272727273e-06, "loss": 41.659, "step": 3888 }, { "epoch": 92.59701492537313, "grad_norm": 27.879924774169922, "learning_rate": 9.77056277056277e-06, "loss": 42.4026, "step": 3889 }, { "epoch": 92.62089552238805, "grad_norm": 20.100893020629883, "learning_rate": 9.768398268398269e-06, "loss": 42.3196, "step": 3890 }, { "epoch": 92.64477611940299, "grad_norm": 24.352115631103516, "learning_rate": 9.766233766233766e-06, "loss": 42.4063, "step": 3891 }, { "epoch": 92.66865671641791, "grad_norm": 24.65276336669922, "learning_rate": 9.764069264069264e-06, "loss": 41.6774, "step": 3892 }, { "epoch": 92.69253731343284, "grad_norm": 18.95211410522461, "learning_rate": 9.761904761904762e-06, "loss": 40.4774, "step": 3893 }, { "epoch": 92.71641791044776, "grad_norm": 37.48885726928711, "learning_rate": 9.75974025974026e-06, "loss": 42.0188, "step": 3894 }, { "epoch": 92.74029850746268, "grad_norm": 27.999391555786133, "learning_rate": 9.757575757575758e-06, "loss": 41.9417, "step": 3895 }, { "epoch": 92.7641791044776, "grad_norm": 41.38749694824219, "learning_rate": 9.755411255411255e-06, "loss": 42.3823, "step": 3896 }, { "epoch": 92.78805970149254, "grad_norm": 30.16627311706543, "learning_rate": 9.753246753246755e-06, "loss": 42.6722, "step": 3897 }, { "epoch": 92.81194029850747, "grad_norm": 42.71925735473633, "learning_rate": 9.751082251082253e-06, "loss": 42.7932, "step": 3898 }, { "epoch": 92.83582089552239, "grad_norm": 42.11480712890625, "learning_rate": 9.74891774891775e-06, "loss": 42.3812, "step": 3899 }, { "epoch": 92.85970149253731, "grad_norm": 23.51568031311035, "learning_rate": 9.746753246753248e-06, "loss": 42.0872, "step": 3900 }, { "epoch": 92.88358208955223, "grad_norm": 29.64082145690918, "learning_rate": 9.744588744588746e-06, "loss": 42.7743, "step": 3901 }, { "epoch": 92.90746268656716, "grad_norm": 24.687829971313477, "learning_rate": 9.742424242424244e-06, "loss": 42.151, "step": 3902 }, { "epoch": 92.9313432835821, "grad_norm": 23.673076629638672, "learning_rate": 9.740259740259742e-06, "loss": 42.949, "step": 3903 }, { "epoch": 92.95522388059702, "grad_norm": 29.738771438598633, "learning_rate": 9.73809523809524e-06, "loss": 41.3754, "step": 3904 }, { "epoch": 92.97910447761194, "grad_norm": 23.26430320739746, "learning_rate": 9.735930735930737e-06, "loss": 42.2649, "step": 3905 }, { "epoch": 93.0, "grad_norm": 33.02578353881836, "learning_rate": 9.733766233766235e-06, "loss": 36.7133, "step": 3906 }, { "epoch": 93.02388059701492, "grad_norm": 29.762083053588867, "learning_rate": 9.731601731601731e-06, "loss": 42.1617, "step": 3907 }, { "epoch": 93.04776119402985, "grad_norm": 42.29904556274414, "learning_rate": 9.729437229437229e-06, "loss": 41.4727, "step": 3908 }, { "epoch": 93.07164179104478, "grad_norm": 35.2297477722168, "learning_rate": 9.727272727272728e-06, "loss": 41.8486, "step": 3909 }, { "epoch": 93.0955223880597, "grad_norm": 31.90110206604004, "learning_rate": 9.725108225108226e-06, "loss": 41.3951, "step": 3910 }, { "epoch": 93.11940298507463, "grad_norm": 33.118011474609375, "learning_rate": 9.722943722943724e-06, "loss": 42.8038, "step": 3911 }, { "epoch": 93.14328358208955, "grad_norm": 28.162616729736328, "learning_rate": 9.720779220779222e-06, "loss": 42.2424, "step": 3912 }, { "epoch": 93.16716417910447, "grad_norm": 26.799827575683594, "learning_rate": 9.71861471861472e-06, "loss": 41.9939, "step": 3913 }, { "epoch": 93.1910447761194, "grad_norm": 36.02149200439453, "learning_rate": 9.716450216450217e-06, "loss": 43.0555, "step": 3914 }, { "epoch": 93.21492537313434, "grad_norm": 30.073331832885742, "learning_rate": 9.714285714285715e-06, "loss": 40.7799, "step": 3915 }, { "epoch": 93.23880597014926, "grad_norm": 32.572547912597656, "learning_rate": 9.712121212121213e-06, "loss": 42.139, "step": 3916 }, { "epoch": 93.26268656716418, "grad_norm": 30.6304988861084, "learning_rate": 9.70995670995671e-06, "loss": 42.702, "step": 3917 }, { "epoch": 93.2865671641791, "grad_norm": 33.230812072753906, "learning_rate": 9.707792207792209e-06, "loss": 42.4281, "step": 3918 }, { "epoch": 93.31044776119403, "grad_norm": 29.524002075195312, "learning_rate": 9.705627705627706e-06, "loss": 42.5262, "step": 3919 }, { "epoch": 93.33432835820895, "grad_norm": 29.51606559753418, "learning_rate": 9.703463203463204e-06, "loss": 41.8173, "step": 3920 }, { "epoch": 93.35820895522389, "grad_norm": 22.32621192932129, "learning_rate": 9.701298701298702e-06, "loss": 43.059, "step": 3921 }, { "epoch": 93.38208955223881, "grad_norm": 36.80875778198242, "learning_rate": 9.6991341991342e-06, "loss": 41.8935, "step": 3922 }, { "epoch": 93.40597014925373, "grad_norm": 30.580604553222656, "learning_rate": 9.696969696969698e-06, "loss": 43.2128, "step": 3923 }, { "epoch": 93.42985074626866, "grad_norm": 29.170934677124023, "learning_rate": 9.694805194805195e-06, "loss": 41.6993, "step": 3924 }, { "epoch": 93.45373134328358, "grad_norm": 28.69053840637207, "learning_rate": 9.692640692640693e-06, "loss": 43.051, "step": 3925 }, { "epoch": 93.4776119402985, "grad_norm": 29.881338119506836, "learning_rate": 9.690476190476191e-06, "loss": 41.1923, "step": 3926 }, { "epoch": 93.50149253731344, "grad_norm": 25.122774124145508, "learning_rate": 9.688311688311689e-06, "loss": 42.4061, "step": 3927 }, { "epoch": 93.52537313432836, "grad_norm": 34.054847717285156, "learning_rate": 9.686147186147187e-06, "loss": 42.4354, "step": 3928 }, { "epoch": 93.54925373134328, "grad_norm": 29.546493530273438, "learning_rate": 9.683982683982684e-06, "loss": 41.9759, "step": 3929 }, { "epoch": 93.57313432835821, "grad_norm": 32.49911880493164, "learning_rate": 9.681818181818182e-06, "loss": 43.3769, "step": 3930 }, { "epoch": 93.59701492537313, "grad_norm": 28.943012237548828, "learning_rate": 9.67965367965368e-06, "loss": 41.6171, "step": 3931 }, { "epoch": 93.62089552238805, "grad_norm": 32.4178466796875, "learning_rate": 9.67748917748918e-06, "loss": 42.6111, "step": 3932 }, { "epoch": 93.64477611940299, "grad_norm": 30.295703887939453, "learning_rate": 9.675324675324677e-06, "loss": 41.6904, "step": 3933 }, { "epoch": 93.66865671641791, "grad_norm": 31.419668197631836, "learning_rate": 9.673160173160175e-06, "loss": 42.3374, "step": 3934 }, { "epoch": 93.69253731343284, "grad_norm": 29.994272232055664, "learning_rate": 9.670995670995673e-06, "loss": 42.1553, "step": 3935 }, { "epoch": 93.71641791044776, "grad_norm": 30.031116485595703, "learning_rate": 9.66883116883117e-06, "loss": 42.1101, "step": 3936 }, { "epoch": 93.74029850746268, "grad_norm": 28.21011734008789, "learning_rate": 9.666666666666667e-06, "loss": 42.0604, "step": 3937 }, { "epoch": 93.7641791044776, "grad_norm": 32.34469985961914, "learning_rate": 9.664502164502165e-06, "loss": 42.4025, "step": 3938 }, { "epoch": 93.78805970149254, "grad_norm": 25.2736759185791, "learning_rate": 9.662337662337662e-06, "loss": 42.7677, "step": 3939 }, { "epoch": 93.81194029850747, "grad_norm": 35.72128677368164, "learning_rate": 9.66017316017316e-06, "loss": 43.4687, "step": 3940 }, { "epoch": 93.83582089552239, "grad_norm": 30.39203453063965, "learning_rate": 9.658008658008658e-06, "loss": 41.7504, "step": 3941 }, { "epoch": 93.85970149253731, "grad_norm": 26.031253814697266, "learning_rate": 9.655844155844156e-06, "loss": 41.6092, "step": 3942 }, { "epoch": 93.88358208955223, "grad_norm": 23.05304718017578, "learning_rate": 9.653679653679654e-06, "loss": 42.4116, "step": 3943 }, { "epoch": 93.90746268656716, "grad_norm": 27.849210739135742, "learning_rate": 9.651515151515153e-06, "loss": 43.2295, "step": 3944 }, { "epoch": 93.9313432835821, "grad_norm": 25.089933395385742, "learning_rate": 9.64935064935065e-06, "loss": 42.6244, "step": 3945 }, { "epoch": 93.95522388059702, "grad_norm": 32.90645217895508, "learning_rate": 9.647186147186149e-06, "loss": 42.7992, "step": 3946 }, { "epoch": 93.97910447761194, "grad_norm": 28.58262825012207, "learning_rate": 9.645021645021646e-06, "loss": 43.0072, "step": 3947 }, { "epoch": 94.0, "grad_norm": 23.826631546020508, "learning_rate": 9.642857142857144e-06, "loss": 37.1225, "step": 3948 }, { "epoch": 94.02388059701492, "grad_norm": 28.149904251098633, "learning_rate": 9.640692640692642e-06, "loss": 42.374, "step": 3949 }, { "epoch": 94.04776119402985, "grad_norm": 28.40786361694336, "learning_rate": 9.63852813852814e-06, "loss": 41.6844, "step": 3950 }, { "epoch": 94.07164179104478, "grad_norm": 25.789466857910156, "learning_rate": 9.636363636363638e-06, "loss": 41.9359, "step": 3951 }, { "epoch": 94.0955223880597, "grad_norm": 31.53352928161621, "learning_rate": 9.634199134199135e-06, "loss": 41.4059, "step": 3952 }, { "epoch": 94.11940298507463, "grad_norm": 25.65757179260254, "learning_rate": 9.632034632034633e-06, "loss": 42.8445, "step": 3953 }, { "epoch": 94.14328358208955, "grad_norm": 35.67771911621094, "learning_rate": 9.629870129870131e-06, "loss": 43.0635, "step": 3954 }, { "epoch": 94.16716417910447, "grad_norm": 31.19240951538086, "learning_rate": 9.627705627705629e-06, "loss": 42.4725, "step": 3955 }, { "epoch": 94.1910447761194, "grad_norm": 31.1099853515625, "learning_rate": 9.625541125541127e-06, "loss": 42.6572, "step": 3956 }, { "epoch": 94.21492537313434, "grad_norm": 28.18238639831543, "learning_rate": 9.623376623376624e-06, "loss": 40.6298, "step": 3957 }, { "epoch": 94.23880597014926, "grad_norm": 25.916431427001953, "learning_rate": 9.621212121212122e-06, "loss": 42.1036, "step": 3958 }, { "epoch": 94.26268656716418, "grad_norm": 25.19932746887207, "learning_rate": 9.61904761904762e-06, "loss": 42.7877, "step": 3959 }, { "epoch": 94.2865671641791, "grad_norm": 31.23909568786621, "learning_rate": 9.616883116883118e-06, "loss": 42.3302, "step": 3960 }, { "epoch": 94.31044776119403, "grad_norm": 27.547996520996094, "learning_rate": 9.614718614718616e-06, "loss": 42.9115, "step": 3961 }, { "epoch": 94.33432835820895, "grad_norm": 33.331939697265625, "learning_rate": 9.612554112554113e-06, "loss": 42.9594, "step": 3962 }, { "epoch": 94.35820895522389, "grad_norm": 26.780292510986328, "learning_rate": 9.610389610389611e-06, "loss": 43.8544, "step": 3963 }, { "epoch": 94.38208955223881, "grad_norm": 25.683496475219727, "learning_rate": 9.608225108225109e-06, "loss": 41.3053, "step": 3964 }, { "epoch": 94.40597014925373, "grad_norm": 22.268705368041992, "learning_rate": 9.606060606060607e-06, "loss": 41.5663, "step": 3965 }, { "epoch": 94.42985074626866, "grad_norm": 26.915376663208008, "learning_rate": 9.603896103896105e-06, "loss": 42.8438, "step": 3966 }, { "epoch": 94.45373134328358, "grad_norm": 18.383493423461914, "learning_rate": 9.601731601731602e-06, "loss": 42.499, "step": 3967 }, { "epoch": 94.4776119402985, "grad_norm": 36.09028244018555, "learning_rate": 9.5995670995671e-06, "loss": 42.8744, "step": 3968 }, { "epoch": 94.50149253731344, "grad_norm": 27.188034057617188, "learning_rate": 9.597402597402598e-06, "loss": 41.8915, "step": 3969 }, { "epoch": 94.52537313432836, "grad_norm": 30.428661346435547, "learning_rate": 9.595238095238096e-06, "loss": 41.9762, "step": 3970 }, { "epoch": 94.54925373134328, "grad_norm": 25.777450561523438, "learning_rate": 9.593073593073594e-06, "loss": 43.0853, "step": 3971 }, { "epoch": 94.57313432835821, "grad_norm": 28.07237434387207, "learning_rate": 9.590909090909091e-06, "loss": 42.7039, "step": 3972 }, { "epoch": 94.59701492537313, "grad_norm": 22.956628799438477, "learning_rate": 9.588744588744589e-06, "loss": 42.8252, "step": 3973 }, { "epoch": 94.62089552238805, "grad_norm": 30.87279510498047, "learning_rate": 9.586580086580087e-06, "loss": 42.8992, "step": 3974 }, { "epoch": 94.64477611940299, "grad_norm": 24.29635238647461, "learning_rate": 9.584415584415585e-06, "loss": 41.1669, "step": 3975 }, { "epoch": 94.66865671641791, "grad_norm": 31.342975616455078, "learning_rate": 9.582251082251083e-06, "loss": 42.9206, "step": 3976 }, { "epoch": 94.69253731343284, "grad_norm": 32.55195999145508, "learning_rate": 9.58008658008658e-06, "loss": 41.6277, "step": 3977 }, { "epoch": 94.71641791044776, "grad_norm": 29.774578094482422, "learning_rate": 9.577922077922078e-06, "loss": 42.2551, "step": 3978 }, { "epoch": 94.74029850746268, "grad_norm": 25.456302642822266, "learning_rate": 9.575757575757576e-06, "loss": 40.1482, "step": 3979 }, { "epoch": 94.7641791044776, "grad_norm": 25.847124099731445, "learning_rate": 9.573593073593075e-06, "loss": 42.2201, "step": 3980 }, { "epoch": 94.78805970149254, "grad_norm": 27.12795066833496, "learning_rate": 9.571428571428573e-06, "loss": 41.8479, "step": 3981 }, { "epoch": 94.81194029850747, "grad_norm": 24.278888702392578, "learning_rate": 9.569264069264071e-06, "loss": 42.6692, "step": 3982 }, { "epoch": 94.83582089552239, "grad_norm": 22.567380905151367, "learning_rate": 9.567099567099569e-06, "loss": 42.3215, "step": 3983 }, { "epoch": 94.85970149253731, "grad_norm": 23.813114166259766, "learning_rate": 9.564935064935067e-06, "loss": 42.6284, "step": 3984 }, { "epoch": 94.88358208955223, "grad_norm": 19.152956008911133, "learning_rate": 9.562770562770564e-06, "loss": 41.7055, "step": 3985 }, { "epoch": 94.90746268656716, "grad_norm": 25.253353118896484, "learning_rate": 9.56060606060606e-06, "loss": 42.5487, "step": 3986 }, { "epoch": 94.9313432835821, "grad_norm": 21.04471206665039, "learning_rate": 9.558441558441558e-06, "loss": 44.019, "step": 3987 }, { "epoch": 94.95522388059702, "grad_norm": NaN, "learning_rate": 9.556277056277056e-06, "loss": 47.5805, "step": 3988 }, { "epoch": 94.97910447761194, "grad_norm": 20.38011932373047, "learning_rate": 9.556277056277056e-06, "loss": 40.8306, "step": 3989 }, { "epoch": 95.0, "grad_norm": 20.988080978393555, "learning_rate": 9.554112554112554e-06, "loss": 35.8475, "step": 3990 }, { "epoch": 95.02388059701492, "grad_norm": 25.182218551635742, "learning_rate": 9.551948051948052e-06, "loss": 42.7702, "step": 3991 }, { "epoch": 95.04776119402985, "grad_norm": 18.022729873657227, "learning_rate": 9.54978354978355e-06, "loss": 41.3642, "step": 3992 }, { "epoch": 95.07164179104478, "grad_norm": 28.234127044677734, "learning_rate": 9.547619047619049e-06, "loss": 41.819, "step": 3993 }, { "epoch": 95.0955223880597, "grad_norm": 22.71247100830078, "learning_rate": 9.545454545454547e-06, "loss": 43.0423, "step": 3994 }, { "epoch": 95.11940298507463, "grad_norm": 26.776891708374023, "learning_rate": 9.543290043290045e-06, "loss": 42.4988, "step": 3995 }, { "epoch": 95.14328358208955, "grad_norm": 21.445236206054688, "learning_rate": 9.541125541125542e-06, "loss": 41.4199, "step": 3996 }, { "epoch": 95.16716417910447, "grad_norm": 23.514680862426758, "learning_rate": 9.53896103896104e-06, "loss": 42.3048, "step": 3997 }, { "epoch": 95.1910447761194, "grad_norm": 19.648818969726562, "learning_rate": 9.536796536796538e-06, "loss": 41.8681, "step": 3998 }, { "epoch": 95.21492537313434, "grad_norm": 21.146074295043945, "learning_rate": 9.534632034632036e-06, "loss": 42.3901, "step": 3999 }, { "epoch": 95.23880597014926, "grad_norm": 17.257108688354492, "learning_rate": 9.532467532467534e-06, "loss": 42.5485, "step": 4000 }, { "epoch": 95.26268656716418, "grad_norm": 20.980907440185547, "learning_rate": 9.530303030303031e-06, "loss": 42.5298, "step": 4001 }, { "epoch": 95.2865671641791, "grad_norm": 22.18124771118164, "learning_rate": 9.52813852813853e-06, "loss": 42.7032, "step": 4002 }, { "epoch": 95.31044776119403, "grad_norm": 20.432281494140625, "learning_rate": 9.525974025974027e-06, "loss": 43.449, "step": 4003 }, { "epoch": 95.33432835820895, "grad_norm": 19.2701473236084, "learning_rate": 9.523809523809525e-06, "loss": 40.555, "step": 4004 }, { "epoch": 95.35820895522389, "grad_norm": 19.681455612182617, "learning_rate": 9.521645021645023e-06, "loss": 41.2141, "step": 4005 }, { "epoch": 95.38208955223881, "grad_norm": 18.39265251159668, "learning_rate": 9.51948051948052e-06, "loss": 42.6937, "step": 4006 }, { "epoch": 95.40597014925373, "grad_norm": 19.818313598632812, "learning_rate": 9.517316017316018e-06, "loss": 43.3448, "step": 4007 }, { "epoch": 95.42985074626866, "grad_norm": 22.540481567382812, "learning_rate": 9.515151515151516e-06, "loss": 42.9516, "step": 4008 }, { "epoch": 95.45373134328358, "grad_norm": 19.422515869140625, "learning_rate": 9.512987012987014e-06, "loss": 42.7121, "step": 4009 }, { "epoch": 95.4776119402985, "grad_norm": 22.789037704467773, "learning_rate": 9.510822510822512e-06, "loss": 42.5243, "step": 4010 }, { "epoch": 95.50149253731344, "grad_norm": 18.70187759399414, "learning_rate": 9.50865800865801e-06, "loss": 40.3263, "step": 4011 }, { "epoch": 95.52537313432836, "grad_norm": 24.231351852416992, "learning_rate": 9.506493506493507e-06, "loss": 42.1699, "step": 4012 }, { "epoch": 95.54925373134328, "grad_norm": 26.356748580932617, "learning_rate": 9.504329004329005e-06, "loss": 42.181, "step": 4013 }, { "epoch": 95.57313432835821, "grad_norm": 18.702556610107422, "learning_rate": 9.502164502164503e-06, "loss": 42.4881, "step": 4014 }, { "epoch": 95.59701492537313, "grad_norm": 27.878799438476562, "learning_rate": 9.5e-06, "loss": 42.2801, "step": 4015 }, { "epoch": 95.62089552238805, "grad_norm": 20.791034698486328, "learning_rate": 9.497835497835498e-06, "loss": 41.909, "step": 4016 }, { "epoch": 95.64477611940299, "grad_norm": 24.874574661254883, "learning_rate": 9.495670995670996e-06, "loss": 42.2108, "step": 4017 }, { "epoch": 95.66865671641791, "grad_norm": 18.562255859375, "learning_rate": 9.493506493506494e-06, "loss": 41.687, "step": 4018 }, { "epoch": 95.69253731343284, "grad_norm": 27.460060119628906, "learning_rate": 9.491341991341992e-06, "loss": 42.3688, "step": 4019 }, { "epoch": 95.71641791044776, "grad_norm": 21.485797882080078, "learning_rate": 9.48917748917749e-06, "loss": 42.6037, "step": 4020 }, { "epoch": 95.74029850746268, "grad_norm": 29.475221633911133, "learning_rate": 9.487012987012987e-06, "loss": 39.9582, "step": 4021 }, { "epoch": 95.7641791044776, "grad_norm": 24.83645248413086, "learning_rate": 9.484848484848485e-06, "loss": 42.7876, "step": 4022 }, { "epoch": 95.78805970149254, "grad_norm": 29.321386337280273, "learning_rate": 9.482683982683983e-06, "loss": 42.1032, "step": 4023 }, { "epoch": 95.81194029850747, "grad_norm": 26.891469955444336, "learning_rate": 9.48051948051948e-06, "loss": 42.557, "step": 4024 }, { "epoch": 95.83582089552239, "grad_norm": 27.05336570739746, "learning_rate": 9.478354978354978e-06, "loss": 42.9743, "step": 4025 }, { "epoch": 95.85970149253731, "grad_norm": 25.014963150024414, "learning_rate": 9.476190476190476e-06, "loss": 43.1592, "step": 4026 }, { "epoch": 95.88358208955223, "grad_norm": 25.66219711303711, "learning_rate": 9.474025974025974e-06, "loss": 41.8458, "step": 4027 }, { "epoch": 95.90746268656716, "grad_norm": 22.460660934448242, "learning_rate": 9.471861471861472e-06, "loss": 42.1439, "step": 4028 }, { "epoch": 95.9313432835821, "grad_norm": 19.01448631286621, "learning_rate": 9.469696969696971e-06, "loss": 42.2933, "step": 4029 }, { "epoch": 95.95522388059702, "grad_norm": 21.85147476196289, "learning_rate": 9.46753246753247e-06, "loss": 42.1108, "step": 4030 }, { "epoch": 95.97910447761194, "grad_norm": 18.99871826171875, "learning_rate": 9.465367965367967e-06, "loss": 42.5071, "step": 4031 }, { "epoch": 96.0, "grad_norm": 16.825069427490234, "learning_rate": 9.463203463203465e-06, "loss": 37.1366, "step": 4032 }, { "epoch": 96.02388059701492, "grad_norm": 19.010360717773438, "learning_rate": 9.461038961038963e-06, "loss": 42.3766, "step": 4033 }, { "epoch": 96.04776119402985, "grad_norm": 22.50554656982422, "learning_rate": 9.45887445887446e-06, "loss": 42.5748, "step": 4034 }, { "epoch": 96.07164179104478, "grad_norm": 16.554548263549805, "learning_rate": 9.456709956709958e-06, "loss": 41.9278, "step": 4035 }, { "epoch": 96.0955223880597, "grad_norm": 23.447858810424805, "learning_rate": 9.454545454545456e-06, "loss": 42.3679, "step": 4036 }, { "epoch": 96.11940298507463, "grad_norm": 23.394611358642578, "learning_rate": 9.452380952380952e-06, "loss": 42.4519, "step": 4037 }, { "epoch": 96.14328358208955, "grad_norm": 17.726774215698242, "learning_rate": 9.45021645021645e-06, "loss": 41.8001, "step": 4038 }, { "epoch": 96.16716417910447, "grad_norm": 19.8607177734375, "learning_rate": 9.448051948051948e-06, "loss": 42.2731, "step": 4039 }, { "epoch": 96.1910447761194, "grad_norm": 24.878158569335938, "learning_rate": 9.445887445887445e-06, "loss": 42.4626, "step": 4040 }, { "epoch": 96.21492537313434, "grad_norm": 18.564037322998047, "learning_rate": 9.443722943722945e-06, "loss": 42.4094, "step": 4041 }, { "epoch": 96.23880597014926, "grad_norm": 29.672882080078125, "learning_rate": 9.441558441558443e-06, "loss": 41.8399, "step": 4042 }, { "epoch": 96.26268656716418, "grad_norm": 21.15955924987793, "learning_rate": 9.43939393939394e-06, "loss": 41.8022, "step": 4043 }, { "epoch": 96.2865671641791, "grad_norm": 19.90737533569336, "learning_rate": 9.437229437229438e-06, "loss": 41.5356, "step": 4044 }, { "epoch": 96.31044776119403, "grad_norm": 27.035198211669922, "learning_rate": 9.435064935064936e-06, "loss": 42.3891, "step": 4045 }, { "epoch": 96.33432835820895, "grad_norm": 19.44938850402832, "learning_rate": 9.432900432900434e-06, "loss": 41.7612, "step": 4046 }, { "epoch": 96.35820895522389, "grad_norm": 32.34653091430664, "learning_rate": 9.430735930735932e-06, "loss": 42.9741, "step": 4047 }, { "epoch": 96.38208955223881, "grad_norm": 23.551259994506836, "learning_rate": 9.42857142857143e-06, "loss": 41.3423, "step": 4048 }, { "epoch": 96.40597014925373, "grad_norm": 36.44496536254883, "learning_rate": 9.426406926406927e-06, "loss": 42.8346, "step": 4049 }, { "epoch": 96.42985074626866, "grad_norm": 28.864904403686523, "learning_rate": 9.424242424242425e-06, "loss": 41.8315, "step": 4050 }, { "epoch": 96.45373134328358, "grad_norm": 35.26904296875, "learning_rate": 9.422077922077923e-06, "loss": 41.5353, "step": 4051 }, { "epoch": 96.4776119402985, "grad_norm": 32.65912628173828, "learning_rate": 9.41991341991342e-06, "loss": 41.5928, "step": 4052 }, { "epoch": 96.50149253731344, "grad_norm": 31.7542667388916, "learning_rate": 9.417748917748919e-06, "loss": 41.4377, "step": 4053 }, { "epoch": 96.52537313432836, "grad_norm": 31.60584259033203, "learning_rate": 9.415584415584416e-06, "loss": 42.9119, "step": 4054 }, { "epoch": 96.54925373134328, "grad_norm": 31.597043991088867, "learning_rate": 9.413419913419914e-06, "loss": 42.2946, "step": 4055 }, { "epoch": 96.57313432835821, "grad_norm": 25.871496200561523, "learning_rate": 9.411255411255412e-06, "loss": 42.3518, "step": 4056 }, { "epoch": 96.59701492537313, "grad_norm": 38.121971130371094, "learning_rate": 9.40909090909091e-06, "loss": 41.3268, "step": 4057 }, { "epoch": 96.62089552238805, "grad_norm": 31.4708309173584, "learning_rate": 9.406926406926408e-06, "loss": 42.001, "step": 4058 }, { "epoch": 96.64477611940299, "grad_norm": 32.240604400634766, "learning_rate": 9.404761904761905e-06, "loss": 43.7004, "step": 4059 }, { "epoch": 96.66865671641791, "grad_norm": 29.972900390625, "learning_rate": 9.402597402597403e-06, "loss": 40.8066, "step": 4060 }, { "epoch": 96.69253731343284, "grad_norm": 28.71061897277832, "learning_rate": 9.400432900432901e-06, "loss": 43.052, "step": 4061 }, { "epoch": 96.71641791044776, "grad_norm": 23.861024856567383, "learning_rate": 9.398268398268399e-06, "loss": 42.5682, "step": 4062 }, { "epoch": 96.74029850746268, "grad_norm": 34.21725845336914, "learning_rate": 9.396103896103896e-06, "loss": 42.418, "step": 4063 }, { "epoch": 96.7641791044776, "grad_norm": 22.93166732788086, "learning_rate": 9.393939393939396e-06, "loss": 42.3199, "step": 4064 }, { "epoch": 96.78805970149254, "grad_norm": 35.91544723510742, "learning_rate": 9.391774891774894e-06, "loss": 40.5579, "step": 4065 }, { "epoch": 96.81194029850747, "grad_norm": 29.065799713134766, "learning_rate": 9.38961038961039e-06, "loss": 40.6409, "step": 4066 }, { "epoch": 96.83582089552239, "grad_norm": 33.4009895324707, "learning_rate": 9.387445887445888e-06, "loss": 42.934, "step": 4067 }, { "epoch": 96.85970149253731, "grad_norm": 32.16798782348633, "learning_rate": 9.385281385281385e-06, "loss": 42.4209, "step": 4068 }, { "epoch": 96.88358208955223, "grad_norm": 27.158573150634766, "learning_rate": 9.383116883116883e-06, "loss": 42.3285, "step": 4069 }, { "epoch": 96.90746268656716, "grad_norm": 28.05286407470703, "learning_rate": 9.380952380952381e-06, "loss": 43.6253, "step": 4070 }, { "epoch": 96.9313432835821, "grad_norm": 31.17296028137207, "learning_rate": 9.378787878787879e-06, "loss": 43.8199, "step": 4071 }, { "epoch": 96.95522388059702, "grad_norm": 25.182817459106445, "learning_rate": 9.376623376623377e-06, "loss": 41.0505, "step": 4072 }, { "epoch": 96.97910447761194, "grad_norm": 35.5045166015625, "learning_rate": 9.374458874458874e-06, "loss": 42.9265, "step": 4073 }, { "epoch": 97.0, "grad_norm": 23.445880889892578, "learning_rate": 9.372294372294372e-06, "loss": 36.5814, "step": 4074 }, { "epoch": 97.02388059701492, "grad_norm": 28.6851806640625, "learning_rate": 9.37012987012987e-06, "loss": 41.6689, "step": 4075 }, { "epoch": 97.04776119402985, "grad_norm": 22.152568817138672, "learning_rate": 9.36796536796537e-06, "loss": 41.6459, "step": 4076 }, { "epoch": 97.07164179104478, "grad_norm": 35.39872360229492, "learning_rate": 9.365800865800867e-06, "loss": 41.9915, "step": 4077 }, { "epoch": 97.0955223880597, "grad_norm": 27.264184951782227, "learning_rate": 9.363636363636365e-06, "loss": 42.6117, "step": 4078 }, { "epoch": 97.11940298507463, "grad_norm": 36.01545715332031, "learning_rate": 9.361471861471863e-06, "loss": 43.7312, "step": 4079 }, { "epoch": 97.14328358208955, "grad_norm": 34.436134338378906, "learning_rate": 9.35930735930736e-06, "loss": 42.597, "step": 4080 }, { "epoch": 97.16716417910447, "grad_norm": 24.796520233154297, "learning_rate": 9.357142857142859e-06, "loss": 42.432, "step": 4081 }, { "epoch": 97.1910447761194, "grad_norm": 26.330299377441406, "learning_rate": 9.354978354978356e-06, "loss": 42.1124, "step": 4082 }, { "epoch": 97.21492537313434, "grad_norm": 27.518465042114258, "learning_rate": 9.352813852813854e-06, "loss": 41.3868, "step": 4083 }, { "epoch": 97.23880597014926, "grad_norm": 25.9599552154541, "learning_rate": 9.350649350649352e-06, "loss": 40.6964, "step": 4084 }, { "epoch": 97.26268656716418, "grad_norm": 33.074974060058594, "learning_rate": 9.34848484848485e-06, "loss": 42.1326, "step": 4085 }, { "epoch": 97.2865671641791, "grad_norm": 29.895139694213867, "learning_rate": 9.346320346320346e-06, "loss": 42.1873, "step": 4086 }, { "epoch": 97.31044776119403, "grad_norm": 32.33000946044922, "learning_rate": 9.344155844155844e-06, "loss": 42.5366, "step": 4087 }, { "epoch": 97.33432835820895, "grad_norm": 28.283353805541992, "learning_rate": 9.341991341991343e-06, "loss": 41.8857, "step": 4088 }, { "epoch": 97.35820895522389, "grad_norm": 27.200963973999023, "learning_rate": 9.339826839826841e-06, "loss": 41.4329, "step": 4089 }, { "epoch": 97.38208955223881, "grad_norm": 27.918405532836914, "learning_rate": 9.337662337662339e-06, "loss": 41.4236, "step": 4090 }, { "epoch": 97.40597014925373, "grad_norm": 24.885950088500977, "learning_rate": 9.335497835497837e-06, "loss": 41.8926, "step": 4091 }, { "epoch": 97.42985074626866, "grad_norm": 24.703994750976562, "learning_rate": 9.333333333333334e-06, "loss": 42.3685, "step": 4092 }, { "epoch": 97.45373134328358, "grad_norm": 32.68978500366211, "learning_rate": 9.331168831168832e-06, "loss": 41.5668, "step": 4093 }, { "epoch": 97.4776119402985, "grad_norm": 27.5683536529541, "learning_rate": 9.32900432900433e-06, "loss": 42.4125, "step": 4094 }, { "epoch": 97.50149253731344, "grad_norm": 30.541976928710938, "learning_rate": 9.326839826839828e-06, "loss": 40.7424, "step": 4095 }, { "epoch": 97.52537313432836, "grad_norm": 28.704875946044922, "learning_rate": 9.324675324675326e-06, "loss": 42.0617, "step": 4096 }, { "epoch": 97.54925373134328, "grad_norm": 29.45570945739746, "learning_rate": 9.322510822510823e-06, "loss": 42.2572, "step": 4097 }, { "epoch": 97.57313432835821, "grad_norm": 29.299041748046875, "learning_rate": 9.320346320346321e-06, "loss": 42.5461, "step": 4098 }, { "epoch": 97.59701492537313, "grad_norm": 28.30889320373535, "learning_rate": 9.318181818181819e-06, "loss": 41.9226, "step": 4099 }, { "epoch": 97.62089552238805, "grad_norm": 23.587907791137695, "learning_rate": 9.316017316017317e-06, "loss": 42.0195, "step": 4100 }, { "epoch": 97.64477611940299, "grad_norm": 31.324934005737305, "learning_rate": 9.313852813852815e-06, "loss": 41.731, "step": 4101 }, { "epoch": 97.66865671641791, "grad_norm": 25.146387100219727, "learning_rate": 9.311688311688312e-06, "loss": 41.8452, "step": 4102 }, { "epoch": 97.69253731343284, "grad_norm": NaN, "learning_rate": 9.30952380952381e-06, "loss": 73.1578, "step": 4103 }, { "epoch": 97.71641791044776, "grad_norm": 33.619197845458984, "learning_rate": 9.30952380952381e-06, "loss": 42.6151, "step": 4104 }, { "epoch": 97.74029850746268, "grad_norm": 30.636676788330078, "learning_rate": 9.307359307359308e-06, "loss": 43.1022, "step": 4105 }, { "epoch": 97.7641791044776, "grad_norm": 30.259347915649414, "learning_rate": 9.305194805194806e-06, "loss": 42.0399, "step": 4106 }, { "epoch": 97.78805970149254, "grad_norm": 28.927536010742188, "learning_rate": 9.303030303030303e-06, "loss": 42.5658, "step": 4107 }, { "epoch": 97.81194029850747, "grad_norm": 27.93010139465332, "learning_rate": 9.300865800865801e-06, "loss": 41.5662, "step": 4108 }, { "epoch": 97.83582089552239, "grad_norm": 25.34616470336914, "learning_rate": 9.298701298701299e-06, "loss": 43.0076, "step": 4109 }, { "epoch": 97.85970149253731, "grad_norm": 28.407508850097656, "learning_rate": 9.296536796536797e-06, "loss": 43.035, "step": 4110 }, { "epoch": 97.88358208955223, "grad_norm": 22.58799934387207, "learning_rate": 9.294372294372295e-06, "loss": 42.5904, "step": 4111 }, { "epoch": 97.90746268656716, "grad_norm": 30.51255989074707, "learning_rate": 9.292207792207792e-06, "loss": 40.6314, "step": 4112 }, { "epoch": 97.9313432835821, "grad_norm": NaN, "learning_rate": 9.290043290043292e-06, "loss": 47.9418, "step": 4113 }, { "epoch": 97.95522388059702, "grad_norm": 24.9912166595459, "learning_rate": 9.290043290043292e-06, "loss": 42.5057, "step": 4114 }, { "epoch": 97.97910447761194, "grad_norm": 29.492568969726562, "learning_rate": 9.28787878787879e-06, "loss": 42.4723, "step": 4115 }, { "epoch": 98.0, "grad_norm": 22.984312057495117, "learning_rate": 9.285714285714288e-06, "loss": 36.1324, "step": 4116 }, { "epoch": 98.02388059701492, "grad_norm": 26.956518173217773, "learning_rate": 9.283549783549785e-06, "loss": 42.6798, "step": 4117 }, { "epoch": 98.04776119402985, "grad_norm": 23.24462890625, "learning_rate": 9.281385281385281e-06, "loss": 42.5043, "step": 4118 }, { "epoch": 98.07164179104478, "grad_norm": 32.33470153808594, "learning_rate": 9.27922077922078e-06, "loss": 42.0607, "step": 4119 }, { "epoch": 98.0955223880597, "grad_norm": 30.606536865234375, "learning_rate": 9.277056277056277e-06, "loss": 42.3543, "step": 4120 }, { "epoch": 98.11940298507463, "grad_norm": 26.795475006103516, "learning_rate": 9.274891774891775e-06, "loss": 41.33, "step": 4121 }, { "epoch": 98.14328358208955, "grad_norm": 23.049283981323242, "learning_rate": 9.272727272727273e-06, "loss": 41.2262, "step": 4122 }, { "epoch": 98.16716417910447, "grad_norm": 30.961490631103516, "learning_rate": 9.27056277056277e-06, "loss": 42.3126, "step": 4123 }, { "epoch": 98.1910447761194, "grad_norm": 25.457870483398438, "learning_rate": 9.268398268398268e-06, "loss": 43.0498, "step": 4124 }, { "epoch": 98.21492537313434, "grad_norm": 28.787675857543945, "learning_rate": 9.266233766233766e-06, "loss": 41.5441, "step": 4125 }, { "epoch": 98.23880597014926, "grad_norm": 23.33895492553711, "learning_rate": 9.264069264069266e-06, "loss": 41.2298, "step": 4126 }, { "epoch": 98.26268656716418, "grad_norm": 28.43191146850586, "learning_rate": 9.261904761904763e-06, "loss": 43.8188, "step": 4127 }, { "epoch": 98.2865671641791, "grad_norm": 22.150148391723633, "learning_rate": 9.259740259740261e-06, "loss": 41.9418, "step": 4128 }, { "epoch": 98.31044776119403, "grad_norm": 32.84375762939453, "learning_rate": 9.257575757575759e-06, "loss": 42.181, "step": 4129 }, { "epoch": 98.33432835820895, "grad_norm": 27.58066177368164, "learning_rate": 9.255411255411257e-06, "loss": 41.9053, "step": 4130 }, { "epoch": 98.35820895522389, "grad_norm": 26.275638580322266, "learning_rate": 9.253246753246755e-06, "loss": 42.643, "step": 4131 }, { "epoch": 98.38208955223881, "grad_norm": 26.407045364379883, "learning_rate": 9.251082251082252e-06, "loss": 41.2759, "step": 4132 }, { "epoch": 98.40597014925373, "grad_norm": 28.262874603271484, "learning_rate": 9.24891774891775e-06, "loss": 41.2746, "step": 4133 }, { "epoch": 98.42985074626866, "grad_norm": 25.495405197143555, "learning_rate": 9.246753246753248e-06, "loss": 41.17, "step": 4134 }, { "epoch": 98.45373134328358, "grad_norm": 30.302942276000977, "learning_rate": 9.244588744588746e-06, "loss": 40.8692, "step": 4135 }, { "epoch": 98.4776119402985, "grad_norm": 26.874711990356445, "learning_rate": 9.242424242424244e-06, "loss": 42.9695, "step": 4136 }, { "epoch": 98.50149253731344, "grad_norm": 27.96731948852539, "learning_rate": 9.240259740259741e-06, "loss": 41.0995, "step": 4137 }, { "epoch": 98.52537313432836, "grad_norm": 26.49541664123535, "learning_rate": 9.238095238095239e-06, "loss": 42.3258, "step": 4138 }, { "epoch": 98.54925373134328, "grad_norm": 24.790346145629883, "learning_rate": 9.235930735930737e-06, "loss": 42.5989, "step": 4139 }, { "epoch": 98.57313432835821, "grad_norm": 22.83180809020996, "learning_rate": 9.233766233766235e-06, "loss": 41.4101, "step": 4140 }, { "epoch": 98.59701492537313, "grad_norm": 27.18695640563965, "learning_rate": 9.231601731601733e-06, "loss": 42.1914, "step": 4141 }, { "epoch": 98.62089552238805, "grad_norm": 23.35308074951172, "learning_rate": 9.22943722943723e-06, "loss": 42.3357, "step": 4142 }, { "epoch": 98.64477611940299, "grad_norm": 32.9411735534668, "learning_rate": 9.227272727272728e-06, "loss": 42.2151, "step": 4143 }, { "epoch": 98.66865671641791, "grad_norm": 28.968116760253906, "learning_rate": 9.225108225108226e-06, "loss": 42.5766, "step": 4144 }, { "epoch": 98.69253731343284, "grad_norm": 26.254579544067383, "learning_rate": 9.222943722943724e-06, "loss": 42.5968, "step": 4145 }, { "epoch": 98.71641791044776, "grad_norm": 27.665916442871094, "learning_rate": 9.220779220779221e-06, "loss": 41.0831, "step": 4146 }, { "epoch": 98.74029850746268, "grad_norm": 29.594675064086914, "learning_rate": 9.21861471861472e-06, "loss": 42.1963, "step": 4147 }, { "epoch": 98.7641791044776, "grad_norm": 23.506603240966797, "learning_rate": 9.216450216450217e-06, "loss": 41.9209, "step": 4148 }, { "epoch": 98.78805970149254, "grad_norm": 32.939395904541016, "learning_rate": 9.214285714285715e-06, "loss": 42.0637, "step": 4149 }, { "epoch": 98.81194029850747, "grad_norm": 27.35706901550293, "learning_rate": 9.212121212121213e-06, "loss": 42.4936, "step": 4150 }, { "epoch": 98.83582089552239, "grad_norm": 31.6049861907959, "learning_rate": 9.20995670995671e-06, "loss": 43.5351, "step": 4151 }, { "epoch": 98.85970149253731, "grad_norm": 26.57269287109375, "learning_rate": 9.207792207792208e-06, "loss": 42.2598, "step": 4152 }, { "epoch": 98.88358208955223, "grad_norm": 30.60957908630371, "learning_rate": 9.205627705627706e-06, "loss": 42.3751, "step": 4153 }, { "epoch": 98.90746268656716, "grad_norm": 28.574939727783203, "learning_rate": 9.203463203463204e-06, "loss": 41.8665, "step": 4154 }, { "epoch": 98.9313432835821, "grad_norm": 24.66292953491211, "learning_rate": 9.201298701298702e-06, "loss": 42.2066, "step": 4155 }, { "epoch": 98.95522388059702, "grad_norm": 23.727333068847656, "learning_rate": 9.1991341991342e-06, "loss": 41.3947, "step": 4156 }, { "epoch": 98.97910447761194, "grad_norm": 27.1662654876709, "learning_rate": 9.196969696969697e-06, "loss": 42.752, "step": 4157 }, { "epoch": 99.0, "grad_norm": 19.463891983032227, "learning_rate": 9.194805194805195e-06, "loss": 35.6173, "step": 4158 }, { "epoch": 99.02388059701492, "grad_norm": 31.107654571533203, "learning_rate": 9.192640692640693e-06, "loss": 42.7329, "step": 4159 }, { "epoch": 99.04776119402985, "grad_norm": 26.082523345947266, "learning_rate": 9.19047619047619e-06, "loss": 43.3724, "step": 4160 }, { "epoch": 99.07164179104478, "grad_norm": 23.824567794799805, "learning_rate": 9.188311688311688e-06, "loss": 42.6574, "step": 4161 }, { "epoch": 99.0955223880597, "grad_norm": 23.710350036621094, "learning_rate": 9.186147186147188e-06, "loss": 41.6831, "step": 4162 }, { "epoch": 99.11940298507463, "grad_norm": 28.668537139892578, "learning_rate": 9.183982683982686e-06, "loss": 41.099, "step": 4163 }, { "epoch": 99.14328358208955, "grad_norm": 21.060327529907227, "learning_rate": 9.181818181818184e-06, "loss": 43.0679, "step": 4164 }, { "epoch": 99.16716417910447, "grad_norm": 25.86065673828125, "learning_rate": 9.179653679653681e-06, "loss": 42.248, "step": 4165 }, { "epoch": 99.1910447761194, "grad_norm": 20.043672561645508, "learning_rate": 9.177489177489179e-06, "loss": 41.114, "step": 4166 }, { "epoch": 99.21492537313434, "grad_norm": 25.1352481842041, "learning_rate": 9.175324675324675e-06, "loss": 40.9968, "step": 4167 }, { "epoch": 99.23880597014926, "grad_norm": 20.042200088500977, "learning_rate": 9.173160173160173e-06, "loss": 41.9535, "step": 4168 }, { "epoch": 99.26268656716418, "grad_norm": 27.261369705200195, "learning_rate": 9.17099567099567e-06, "loss": 42.6293, "step": 4169 }, { "epoch": 99.2865671641791, "grad_norm": 23.163576126098633, "learning_rate": 9.168831168831169e-06, "loss": 41.9948, "step": 4170 }, { "epoch": 99.31044776119403, "grad_norm": 27.297080993652344, "learning_rate": 9.166666666666666e-06, "loss": 41.4716, "step": 4171 }, { "epoch": 99.33432835820895, "grad_norm": 22.44979476928711, "learning_rate": 9.164502164502164e-06, "loss": 42.406, "step": 4172 }, { "epoch": 99.35820895522389, "grad_norm": 23.482084274291992, "learning_rate": 9.162337662337664e-06, "loss": 41.5008, "step": 4173 }, { "epoch": 99.38208955223881, "grad_norm": 22.505319595336914, "learning_rate": 9.160173160173162e-06, "loss": 40.9368, "step": 4174 }, { "epoch": 99.40597014925373, "grad_norm": 24.250532150268555, "learning_rate": 9.15800865800866e-06, "loss": 40.7122, "step": 4175 }, { "epoch": 99.42985074626866, "grad_norm": 23.2113037109375, "learning_rate": 9.155844155844157e-06, "loss": 41.7559, "step": 4176 }, { "epoch": 99.45373134328358, "grad_norm": 18.7581787109375, "learning_rate": 9.153679653679655e-06, "loss": 41.661, "step": 4177 }, { "epoch": 99.4776119402985, "grad_norm": 17.8604793548584, "learning_rate": 9.151515151515153e-06, "loss": 41.51, "step": 4178 }, { "epoch": 99.50149253731344, "grad_norm": 16.258312225341797, "learning_rate": 9.14935064935065e-06, "loss": 41.2024, "step": 4179 }, { "epoch": 99.52537313432836, "grad_norm": 16.66613006591797, "learning_rate": 9.147186147186148e-06, "loss": 42.5017, "step": 4180 }, { "epoch": 99.54925373134328, "grad_norm": 15.366393089294434, "learning_rate": 9.145021645021646e-06, "loss": 41.6167, "step": 4181 }, { "epoch": 99.57313432835821, "grad_norm": 23.028663635253906, "learning_rate": 9.142857142857144e-06, "loss": 42.308, "step": 4182 }, { "epoch": 99.59701492537313, "grad_norm": 16.91287612915039, "learning_rate": 9.140692640692642e-06, "loss": 43.1037, "step": 4183 }, { "epoch": 99.62089552238805, "grad_norm": 19.781919479370117, "learning_rate": 9.13852813852814e-06, "loss": 42.3187, "step": 4184 }, { "epoch": 99.64477611940299, "grad_norm": 18.985305786132812, "learning_rate": 9.136363636363637e-06, "loss": 41.971, "step": 4185 }, { "epoch": 99.66865671641791, "grad_norm": 17.393688201904297, "learning_rate": 9.134199134199135e-06, "loss": 41.1467, "step": 4186 }, { "epoch": 99.69253731343284, "grad_norm": 19.685924530029297, "learning_rate": 9.132034632034633e-06, "loss": 41.822, "step": 4187 }, { "epoch": 99.71641791044776, "grad_norm": 19.761327743530273, "learning_rate": 9.12987012987013e-06, "loss": 42.1768, "step": 4188 }, { "epoch": 99.74029850746268, "grad_norm": 16.2159423828125, "learning_rate": 9.127705627705628e-06, "loss": 42.9327, "step": 4189 }, { "epoch": 99.7641791044776, "grad_norm": 21.257530212402344, "learning_rate": 9.125541125541126e-06, "loss": 42.556, "step": 4190 }, { "epoch": 99.78805970149254, "grad_norm": NaN, "learning_rate": 9.123376623376624e-06, "loss": 53.9793, "step": 4191 }, { "epoch": 99.81194029850747, "grad_norm": 19.869991302490234, "learning_rate": 9.123376623376624e-06, "loss": 41.4833, "step": 4192 }, { "epoch": 99.83582089552239, "grad_norm": 17.66855239868164, "learning_rate": 9.121212121212122e-06, "loss": 41.6514, "step": 4193 }, { "epoch": 99.85970149253731, "grad_norm": 19.992225646972656, "learning_rate": 9.11904761904762e-06, "loss": 43.4129, "step": 4194 }, { "epoch": 99.88358208955223, "grad_norm": 23.21436882019043, "learning_rate": 9.116883116883117e-06, "loss": 43.2426, "step": 4195 }, { "epoch": 99.90746268656716, "grad_norm": 18.16109848022461, "learning_rate": 9.114718614718615e-06, "loss": 41.9741, "step": 4196 }, { "epoch": 99.9313432835821, "grad_norm": 22.761810302734375, "learning_rate": 9.112554112554113e-06, "loss": 41.4668, "step": 4197 }, { "epoch": 99.95522388059702, "grad_norm": 21.3942928314209, "learning_rate": 9.110389610389611e-06, "loss": 41.6686, "step": 4198 }, { "epoch": 99.97910447761194, "grad_norm": 17.734172821044922, "learning_rate": 9.108225108225109e-06, "loss": 41.746, "step": 4199 }, { "epoch": 100.0, "grad_norm": 22.795557022094727, "learning_rate": 9.106060606060606e-06, "loss": 37.4113, "step": 4200 }, { "epoch": 100.02388059701492, "grad_norm": 18.693927764892578, "learning_rate": 9.103896103896104e-06, "loss": 41.1692, "step": 4201 }, { "epoch": 100.04776119402985, "grad_norm": 15.947311401367188, "learning_rate": 9.101731601731602e-06, "loss": 43.5011, "step": 4202 }, { "epoch": 100.07164179104478, "grad_norm": 24.349090576171875, "learning_rate": 9.0995670995671e-06, "loss": 41.954, "step": 4203 }, { "epoch": 100.0955223880597, "grad_norm": 18.305612564086914, "learning_rate": 9.097402597402598e-06, "loss": 41.7676, "step": 4204 }, { "epoch": 100.11940298507463, "grad_norm": 29.68235206604004, "learning_rate": 9.095238095238095e-06, "loss": 40.8579, "step": 4205 }, { "epoch": 100.14328358208955, "grad_norm": 24.512508392333984, "learning_rate": 9.093073593073593e-06, "loss": 40.7238, "step": 4206 }, { "epoch": 100.16716417910447, "grad_norm": 24.545705795288086, "learning_rate": 9.090909090909091e-06, "loss": 42.7197, "step": 4207 }, { "epoch": 100.1910447761194, "grad_norm": 18.792917251586914, "learning_rate": 9.088744588744589e-06, "loss": 40.8385, "step": 4208 }, { "epoch": 100.21492537313434, "grad_norm": 21.766145706176758, "learning_rate": 9.086580086580087e-06, "loss": 41.3234, "step": 4209 }, { "epoch": 100.23880597014926, "grad_norm": 17.32309341430664, "learning_rate": 9.084415584415586e-06, "loss": 40.6989, "step": 4210 }, { "epoch": 100.26268656716418, "grad_norm": 17.80112648010254, "learning_rate": 9.082251082251084e-06, "loss": 41.0043, "step": 4211 }, { "epoch": 100.2865671641791, "grad_norm": 15.762267112731934, "learning_rate": 9.080086580086582e-06, "loss": 42.5453, "step": 4212 }, { "epoch": 100.31044776119403, "grad_norm": 15.99219036102295, "learning_rate": 9.07792207792208e-06, "loss": 41.9223, "step": 4213 }, { "epoch": 100.33432835820895, "grad_norm": 21.16149139404297, "learning_rate": 9.075757575757577e-06, "loss": 41.7332, "step": 4214 }, { "epoch": 100.35820895522389, "grad_norm": 16.26340675354004, "learning_rate": 9.073593073593075e-06, "loss": 41.9333, "step": 4215 }, { "epoch": 100.38208955223881, "grad_norm": 22.789945602416992, "learning_rate": 9.071428571428573e-06, "loss": 41.5922, "step": 4216 }, { "epoch": 100.40597014925373, "grad_norm": 20.777421951293945, "learning_rate": 9.06926406926407e-06, "loss": 42.4934, "step": 4217 }, { "epoch": 100.42985074626866, "grad_norm": 20.417619705200195, "learning_rate": 9.067099567099567e-06, "loss": 42.0611, "step": 4218 }, { "epoch": 100.45373134328358, "grad_norm": 17.323135375976562, "learning_rate": 9.064935064935065e-06, "loss": 41.4595, "step": 4219 }, { "epoch": 100.4776119402985, "grad_norm": 17.62958335876465, "learning_rate": 9.062770562770562e-06, "loss": 42.1578, "step": 4220 }, { "epoch": 100.50149253731344, "grad_norm": 19.73848533630371, "learning_rate": 9.06060606060606e-06, "loss": 40.6611, "step": 4221 }, { "epoch": 100.52537313432836, "grad_norm": 15.945398330688477, "learning_rate": 9.05844155844156e-06, "loss": 41.9703, "step": 4222 }, { "epoch": 100.54925373134328, "grad_norm": 31.24019432067871, "learning_rate": 9.056277056277057e-06, "loss": 42.1433, "step": 4223 }, { "epoch": 100.57313432835821, "grad_norm": 21.933677673339844, "learning_rate": 9.054112554112555e-06, "loss": 41.873, "step": 4224 }, { "epoch": 100.59701492537313, "grad_norm": 31.41733741760254, "learning_rate": 9.051948051948053e-06, "loss": 42.7139, "step": 4225 }, { "epoch": 100.62089552238805, "grad_norm": 21.998600006103516, "learning_rate": 9.049783549783551e-06, "loss": 42.7483, "step": 4226 }, { "epoch": 100.64477611940299, "grad_norm": 34.37179183959961, "learning_rate": 9.047619047619049e-06, "loss": 41.3319, "step": 4227 }, { "epoch": 100.66865671641791, "grad_norm": 27.14617156982422, "learning_rate": 9.045454545454546e-06, "loss": 42.022, "step": 4228 }, { "epoch": 100.69253731343284, "grad_norm": 37.454708099365234, "learning_rate": 9.043290043290044e-06, "loss": 41.9875, "step": 4229 }, { "epoch": 100.71641791044776, "grad_norm": 32.32929229736328, "learning_rate": 9.041125541125542e-06, "loss": 43.1461, "step": 4230 }, { "epoch": 100.74029850746268, "grad_norm": 33.369842529296875, "learning_rate": 9.03896103896104e-06, "loss": 42.1309, "step": 4231 }, { "epoch": 100.7641791044776, "grad_norm": 26.55228042602539, "learning_rate": 9.036796536796538e-06, "loss": 42.6242, "step": 4232 }, { "epoch": 100.78805970149254, "grad_norm": 30.329452514648438, "learning_rate": 9.034632034632035e-06, "loss": 41.174, "step": 4233 }, { "epoch": 100.81194029850747, "grad_norm": 32.0432014465332, "learning_rate": 9.032467532467533e-06, "loss": 43.1256, "step": 4234 }, { "epoch": 100.83582089552239, "grad_norm": 29.122236251831055, "learning_rate": 9.030303030303031e-06, "loss": 41.3778, "step": 4235 }, { "epoch": 100.85970149253731, "grad_norm": 24.6899471282959, "learning_rate": 9.028138528138529e-06, "loss": 42.2167, "step": 4236 }, { "epoch": 100.88358208955223, "grad_norm": 31.051576614379883, "learning_rate": 9.025974025974027e-06, "loss": 42.5137, "step": 4237 }, { "epoch": 100.90746268656716, "grad_norm": 27.56793785095215, "learning_rate": 9.023809523809524e-06, "loss": 42.2763, "step": 4238 }, { "epoch": 100.9313432835821, "grad_norm": 35.045108795166016, "learning_rate": 9.021645021645022e-06, "loss": 43.3116, "step": 4239 }, { "epoch": 100.95522388059702, "grad_norm": 28.35376739501953, "learning_rate": 9.01948051948052e-06, "loss": 42.4737, "step": 4240 }, { "epoch": 100.97910447761194, "grad_norm": 29.537580490112305, "learning_rate": 9.017316017316018e-06, "loss": 42.2073, "step": 4241 }, { "epoch": 101.0, "grad_norm": 24.736759185791016, "learning_rate": 9.015151515151516e-06, "loss": 37.5375, "step": 4242 }, { "epoch": 101.02388059701492, "grad_norm": 27.93048667907715, "learning_rate": 9.012987012987013e-06, "loss": 42.1642, "step": 4243 }, { "epoch": 101.04776119402985, "grad_norm": 24.460664749145508, "learning_rate": 9.010822510822511e-06, "loss": 42.1769, "step": 4244 }, { "epoch": 101.07164179104478, "grad_norm": 22.52399253845215, "learning_rate": 9.008658008658009e-06, "loss": 41.99, "step": 4245 }, { "epoch": 101.0955223880597, "grad_norm": 19.33254623413086, "learning_rate": 9.006493506493509e-06, "loss": 40.422, "step": 4246 }, { "epoch": 101.11940298507463, "grad_norm": 22.645910263061523, "learning_rate": 9.004329004329005e-06, "loss": 42.8041, "step": 4247 }, { "epoch": 101.14328358208955, "grad_norm": 20.89433479309082, "learning_rate": 9.002164502164502e-06, "loss": 43.0258, "step": 4248 }, { "epoch": 101.16716417910447, "grad_norm": 19.612567901611328, "learning_rate": 9e-06, "loss": 41.4478, "step": 4249 }, { "epoch": 101.1910447761194, "grad_norm": 19.565265655517578, "learning_rate": 8.997835497835498e-06, "loss": 42.6328, "step": 4250 }, { "epoch": 101.21492537313434, "grad_norm": 20.93030548095703, "learning_rate": 8.995670995670996e-06, "loss": 42.7268, "step": 4251 }, { "epoch": 101.23880597014926, "grad_norm": 18.67580795288086, "learning_rate": 8.993506493506494e-06, "loss": 43.3658, "step": 4252 }, { "epoch": 101.26268656716418, "grad_norm": 26.36067008972168, "learning_rate": 8.991341991341991e-06, "loss": 42.2089, "step": 4253 }, { "epoch": 101.2865671641791, "grad_norm": 19.841224670410156, "learning_rate": 8.98917748917749e-06, "loss": 40.842, "step": 4254 }, { "epoch": 101.31044776119403, "grad_norm": 26.14617156982422, "learning_rate": 8.987012987012987e-06, "loss": 43.1673, "step": 4255 }, { "epoch": 101.33432835820895, "grad_norm": 21.286962509155273, "learning_rate": 8.984848484848485e-06, "loss": 42.0463, "step": 4256 }, { "epoch": 101.35820895522389, "grad_norm": 26.335676193237305, "learning_rate": 8.982683982683983e-06, "loss": 41.8856, "step": 4257 }, { "epoch": 101.38208955223881, "grad_norm": 23.881567001342773, "learning_rate": 8.980519480519482e-06, "loss": 41.6253, "step": 4258 }, { "epoch": 101.40597014925373, "grad_norm": 21.65298843383789, "learning_rate": 8.97835497835498e-06, "loss": 42.0994, "step": 4259 }, { "epoch": 101.42985074626866, "grad_norm": 27.039722442626953, "learning_rate": 8.976190476190478e-06, "loss": 41.8836, "step": 4260 }, { "epoch": 101.45373134328358, "grad_norm": 20.1751766204834, "learning_rate": 8.974025974025975e-06, "loss": 41.1007, "step": 4261 }, { "epoch": 101.4776119402985, "grad_norm": 31.58852767944336, "learning_rate": 8.971861471861473e-06, "loss": 41.9793, "step": 4262 }, { "epoch": 101.50149253731344, "grad_norm": 21.907556533813477, "learning_rate": 8.969696969696971e-06, "loss": 41.509, "step": 4263 }, { "epoch": 101.52537313432836, "grad_norm": 32.310272216796875, "learning_rate": 8.967532467532469e-06, "loss": 41.1805, "step": 4264 }, { "epoch": 101.54925373134328, "grad_norm": 25.363170623779297, "learning_rate": 8.965367965367967e-06, "loss": 42.3668, "step": 4265 }, { "epoch": 101.57313432835821, "grad_norm": 29.320520401000977, "learning_rate": 8.963203463203464e-06, "loss": 41.7248, "step": 4266 }, { "epoch": 101.59701492537313, "grad_norm": 24.637983322143555, "learning_rate": 8.96103896103896e-06, "loss": 40.1595, "step": 4267 }, { "epoch": 101.62089552238805, "grad_norm": 32.69458770751953, "learning_rate": 8.958874458874458e-06, "loss": 41.6096, "step": 4268 }, { "epoch": 101.64477611940299, "grad_norm": 24.87364959716797, "learning_rate": 8.956709956709956e-06, "loss": 41.3295, "step": 4269 }, { "epoch": 101.66865671641791, "grad_norm": 31.5223445892334, "learning_rate": 8.954545454545456e-06, "loss": 42.1731, "step": 4270 }, { "epoch": 101.69253731343284, "grad_norm": 29.047664642333984, "learning_rate": 8.952380952380953e-06, "loss": 41.8301, "step": 4271 }, { "epoch": 101.71641791044776, "grad_norm": 31.420434951782227, "learning_rate": 8.950216450216451e-06, "loss": 41.6502, "step": 4272 }, { "epoch": 101.74029850746268, "grad_norm": 28.40896224975586, "learning_rate": 8.948051948051949e-06, "loss": 41.7585, "step": 4273 }, { "epoch": 101.7641791044776, "grad_norm": 32.256263732910156, "learning_rate": 8.945887445887447e-06, "loss": 41.8508, "step": 4274 }, { "epoch": 101.78805970149254, "grad_norm": 30.496904373168945, "learning_rate": 8.943722943722945e-06, "loss": 41.7192, "step": 4275 }, { "epoch": 101.81194029850747, "grad_norm": 31.20074462890625, "learning_rate": 8.941558441558442e-06, "loss": 42.687, "step": 4276 }, { "epoch": 101.83582089552239, "grad_norm": 27.639835357666016, "learning_rate": 8.93939393939394e-06, "loss": 41.7068, "step": 4277 }, { "epoch": 101.85970149253731, "grad_norm": 31.692638397216797, "learning_rate": 8.937229437229438e-06, "loss": 42.8243, "step": 4278 }, { "epoch": 101.88358208955223, "grad_norm": 28.27922248840332, "learning_rate": 8.935064935064936e-06, "loss": 41.8772, "step": 4279 }, { "epoch": 101.90746268656716, "grad_norm": 28.70676040649414, "learning_rate": 8.932900432900434e-06, "loss": 41.682, "step": 4280 }, { "epoch": 101.9313432835821, "grad_norm": 27.140151977539062, "learning_rate": 8.930735930735931e-06, "loss": 42.078, "step": 4281 }, { "epoch": 101.95522388059702, "grad_norm": 25.135448455810547, "learning_rate": 8.92857142857143e-06, "loss": 42.2035, "step": 4282 }, { "epoch": 101.97910447761194, "grad_norm": 22.988903045654297, "learning_rate": 8.926406926406927e-06, "loss": 41.2573, "step": 4283 }, { "epoch": 102.0, "grad_norm": 25.694786071777344, "learning_rate": 8.924242424242425e-06, "loss": 36.1049, "step": 4284 }, { "epoch": 102.02388059701492, "grad_norm": 24.528118133544922, "learning_rate": 8.922077922077923e-06, "loss": 42.162, "step": 4285 }, { "epoch": 102.04776119402985, "grad_norm": 27.563627243041992, "learning_rate": 8.91991341991342e-06, "loss": 41.7018, "step": 4286 }, { "epoch": 102.07164179104478, "grad_norm": 23.374286651611328, "learning_rate": 8.917748917748918e-06, "loss": 42.4075, "step": 4287 }, { "epoch": 102.0955223880597, "grad_norm": 28.673614501953125, "learning_rate": 8.915584415584416e-06, "loss": 41.8272, "step": 4288 }, { "epoch": 102.11940298507463, "grad_norm": 24.432859420776367, "learning_rate": 8.913419913419914e-06, "loss": 41.7054, "step": 4289 }, { "epoch": 102.14328358208955, "grad_norm": 26.83321189880371, "learning_rate": 8.911255411255412e-06, "loss": 42.2169, "step": 4290 }, { "epoch": 102.16716417910447, "grad_norm": 21.222537994384766, "learning_rate": 8.90909090909091e-06, "loss": 42.164, "step": 4291 }, { "epoch": 102.1910447761194, "grad_norm": 32.05888748168945, "learning_rate": 8.906926406926407e-06, "loss": 42.0759, "step": 4292 }, { "epoch": 102.21492537313434, "grad_norm": 22.959369659423828, "learning_rate": 8.904761904761905e-06, "loss": 43.0785, "step": 4293 }, { "epoch": 102.23880597014926, "grad_norm": 37.53632736206055, "learning_rate": 8.902597402597405e-06, "loss": 42.1665, "step": 4294 }, { "epoch": 102.26268656716418, "grad_norm": 29.86913299560547, "learning_rate": 8.900432900432902e-06, "loss": 41.3932, "step": 4295 }, { "epoch": 102.2865671641791, "grad_norm": 31.11789894104004, "learning_rate": 8.8982683982684e-06, "loss": 43.0771, "step": 4296 }, { "epoch": 102.31044776119403, "grad_norm": 27.745323181152344, "learning_rate": 8.896103896103896e-06, "loss": 41.1395, "step": 4297 }, { "epoch": 102.33432835820895, "grad_norm": 25.368127822875977, "learning_rate": 8.893939393939394e-06, "loss": 42.7978, "step": 4298 }, { "epoch": 102.35820895522389, "grad_norm": 24.081409454345703, "learning_rate": 8.891774891774892e-06, "loss": 41.4698, "step": 4299 }, { "epoch": 102.38208955223881, "grad_norm": 24.39154815673828, "learning_rate": 8.88961038961039e-06, "loss": 41.6765, "step": 4300 }, { "epoch": 102.40597014925373, "grad_norm": 21.794816970825195, "learning_rate": 8.887445887445887e-06, "loss": 40.9793, "step": 4301 }, { "epoch": 102.42985074626866, "grad_norm": 24.50321388244629, "learning_rate": 8.885281385281385e-06, "loss": 41.3914, "step": 4302 }, { "epoch": 102.45373134328358, "grad_norm": 21.492965698242188, "learning_rate": 8.883116883116883e-06, "loss": 42.1772, "step": 4303 }, { "epoch": 102.4776119402985, "grad_norm": 25.231094360351562, "learning_rate": 8.88095238095238e-06, "loss": 41.6758, "step": 4304 }, { "epoch": 102.50149253731344, "grad_norm": 21.51530647277832, "learning_rate": 8.87878787878788e-06, "loss": 41.0819, "step": 4305 }, { "epoch": 102.52537313432836, "grad_norm": 21.023269653320312, "learning_rate": 8.876623376623378e-06, "loss": 41.446, "step": 4306 }, { "epoch": 102.54925373134328, "grad_norm": 25.81951904296875, "learning_rate": 8.874458874458876e-06, "loss": 41.3221, "step": 4307 }, { "epoch": 102.57313432835821, "grad_norm": 19.7045841217041, "learning_rate": 8.872294372294374e-06, "loss": 42.5273, "step": 4308 }, { "epoch": 102.59701492537313, "grad_norm": 30.536680221557617, "learning_rate": 8.870129870129871e-06, "loss": 40.7574, "step": 4309 }, { "epoch": 102.62089552238805, "grad_norm": 22.61910629272461, "learning_rate": 8.86796536796537e-06, "loss": 42.1551, "step": 4310 }, { "epoch": 102.64477611940299, "grad_norm": 31.215150833129883, "learning_rate": 8.865800865800867e-06, "loss": 42.3013, "step": 4311 }, { "epoch": 102.66865671641791, "grad_norm": 29.22039794921875, "learning_rate": 8.863636363636365e-06, "loss": 42.3447, "step": 4312 }, { "epoch": 102.69253731343284, "grad_norm": 31.03571128845215, "learning_rate": 8.861471861471863e-06, "loss": 41.9643, "step": 4313 }, { "epoch": 102.71641791044776, "grad_norm": 26.90915298461914, "learning_rate": 8.85930735930736e-06, "loss": 42.8879, "step": 4314 }, { "epoch": 102.74029850746268, "grad_norm": 31.34430503845215, "learning_rate": 8.857142857142858e-06, "loss": 41.6856, "step": 4315 }, { "epoch": 102.7641791044776, "grad_norm": 26.868675231933594, "learning_rate": 8.854978354978356e-06, "loss": 41.1538, "step": 4316 }, { "epoch": 102.78805970149254, "grad_norm": 26.82084846496582, "learning_rate": 8.852813852813854e-06, "loss": 42.6873, "step": 4317 }, { "epoch": 102.81194029850747, "grad_norm": 24.742094039916992, "learning_rate": 8.850649350649352e-06, "loss": 43.168, "step": 4318 }, { "epoch": 102.83582089552239, "grad_norm": 23.871686935424805, "learning_rate": 8.84848484848485e-06, "loss": 42.0424, "step": 4319 }, { "epoch": 102.85970149253731, "grad_norm": 21.681507110595703, "learning_rate": 8.846320346320347e-06, "loss": 42.1547, "step": 4320 }, { "epoch": 102.88358208955223, "grad_norm": 28.63477325439453, "learning_rate": 8.844155844155845e-06, "loss": 41.2654, "step": 4321 }, { "epoch": 102.90746268656716, "grad_norm": 19.495147705078125, "learning_rate": 8.841991341991343e-06, "loss": 41.5641, "step": 4322 }, { "epoch": 102.9313432835821, "grad_norm": 33.34874725341797, "learning_rate": 8.83982683982684e-06, "loss": 41.7787, "step": 4323 }, { "epoch": 102.95522388059702, "grad_norm": 27.586767196655273, "learning_rate": 8.837662337662338e-06, "loss": 40.4204, "step": 4324 }, { "epoch": 102.97910447761194, "grad_norm": 28.708871841430664, "learning_rate": 8.835497835497836e-06, "loss": 41.7225, "step": 4325 }, { "epoch": 103.0, "grad_norm": 22.439306259155273, "learning_rate": 8.833333333333334e-06, "loss": 35.3291, "step": 4326 }, { "epoch": 103.02388059701492, "grad_norm": 25.760793685913086, "learning_rate": 8.831168831168832e-06, "loss": 42.0465, "step": 4327 }, { "epoch": 103.04776119402985, "grad_norm": 22.56456756591797, "learning_rate": 8.82900432900433e-06, "loss": 41.6094, "step": 4328 }, { "epoch": 103.07164179104478, "grad_norm": 30.912078857421875, "learning_rate": 8.826839826839827e-06, "loss": 43.0196, "step": 4329 }, { "epoch": 103.0955223880597, "grad_norm": 23.01909065246582, "learning_rate": 8.824675324675325e-06, "loss": 42.7305, "step": 4330 }, { "epoch": 103.11940298507463, "grad_norm": 29.197927474975586, "learning_rate": 8.822510822510823e-06, "loss": 41.0641, "step": 4331 }, { "epoch": 103.14328358208955, "grad_norm": 27.894495010375977, "learning_rate": 8.82034632034632e-06, "loss": 40.9656, "step": 4332 }, { "epoch": 103.16716417910447, "grad_norm": 27.135541915893555, "learning_rate": 8.818181818181819e-06, "loss": 41.7715, "step": 4333 }, { "epoch": 103.1910447761194, "grad_norm": 24.774351119995117, "learning_rate": 8.816017316017316e-06, "loss": 40.5809, "step": 4334 }, { "epoch": 103.21492537313434, "grad_norm": 27.74059295654297, "learning_rate": 8.813852813852814e-06, "loss": 40.9501, "step": 4335 }, { "epoch": 103.23880597014926, "grad_norm": 24.502626419067383, "learning_rate": 8.811688311688312e-06, "loss": 41.3341, "step": 4336 }, { "epoch": 103.26268656716418, "grad_norm": 29.406909942626953, "learning_rate": 8.80952380952381e-06, "loss": 42.8791, "step": 4337 }, { "epoch": 103.2865671641791, "grad_norm": 24.162965774536133, "learning_rate": 8.807359307359308e-06, "loss": 41.3314, "step": 4338 }, { "epoch": 103.31044776119403, "grad_norm": 27.782527923583984, "learning_rate": 8.805194805194805e-06, "loss": 41.0151, "step": 4339 }, { "epoch": 103.33432835820895, "grad_norm": 25.89789390563965, "learning_rate": 8.803030303030303e-06, "loss": 41.0182, "step": 4340 }, { "epoch": 103.35820895522389, "grad_norm": 31.413692474365234, "learning_rate": 8.800865800865803e-06, "loss": 41.1635, "step": 4341 }, { "epoch": 103.38208955223881, "grad_norm": 23.838945388793945, "learning_rate": 8.7987012987013e-06, "loss": 42.2695, "step": 4342 }, { "epoch": 103.40597014925373, "grad_norm": 27.55811309814453, "learning_rate": 8.796536796536798e-06, "loss": 42.6491, "step": 4343 }, { "epoch": 103.42985074626866, "grad_norm": 24.99410629272461, "learning_rate": 8.794372294372296e-06, "loss": 40.49, "step": 4344 }, { "epoch": 103.45373134328358, "grad_norm": 32.69471740722656, "learning_rate": 8.792207792207794e-06, "loss": 41.0334, "step": 4345 }, { "epoch": 103.4776119402985, "grad_norm": 25.661212921142578, "learning_rate": 8.79004329004329e-06, "loss": 42.0111, "step": 4346 }, { "epoch": 103.50149253731344, "grad_norm": 32.33528518676758, "learning_rate": 8.787878787878788e-06, "loss": 41.451, "step": 4347 }, { "epoch": 103.52537313432836, "grad_norm": 30.863183975219727, "learning_rate": 8.785714285714286e-06, "loss": 43.0101, "step": 4348 }, { "epoch": 103.54925373134328, "grad_norm": 27.80331802368164, "learning_rate": 8.783549783549783e-06, "loss": 41.8201, "step": 4349 }, { "epoch": 103.57313432835821, "grad_norm": 25.65656089782715, "learning_rate": 8.781385281385281e-06, "loss": 42.3188, "step": 4350 }, { "epoch": 103.59701492537313, "grad_norm": 27.477493286132812, "learning_rate": 8.779220779220779e-06, "loss": 42.4443, "step": 4351 }, { "epoch": 103.62089552238805, "grad_norm": 19.195556640625, "learning_rate": 8.777056277056277e-06, "loss": 41.6902, "step": 4352 }, { "epoch": 103.64477611940299, "grad_norm": 31.54138946533203, "learning_rate": 8.774891774891776e-06, "loss": 41.7891, "step": 4353 }, { "epoch": 103.66865671641791, "grad_norm": 24.392765045166016, "learning_rate": 8.772727272727274e-06, "loss": 43.201, "step": 4354 }, { "epoch": 103.69253731343284, "grad_norm": 31.868196487426758, "learning_rate": 8.770562770562772e-06, "loss": 42.0864, "step": 4355 }, { "epoch": 103.71641791044776, "grad_norm": 28.33005142211914, "learning_rate": 8.76839826839827e-06, "loss": 40.8061, "step": 4356 }, { "epoch": 103.74029850746268, "grad_norm": 29.663543701171875, "learning_rate": 8.766233766233767e-06, "loss": 41.195, "step": 4357 }, { "epoch": 103.7641791044776, "grad_norm": 24.99871826171875, "learning_rate": 8.764069264069265e-06, "loss": 42.0865, "step": 4358 }, { "epoch": 103.78805970149254, "grad_norm": 26.281768798828125, "learning_rate": 8.761904761904763e-06, "loss": 42.2214, "step": 4359 }, { "epoch": 103.81194029850747, "grad_norm": 25.848814010620117, "learning_rate": 8.75974025974026e-06, "loss": 41.752, "step": 4360 }, { "epoch": 103.83582089552239, "grad_norm": 25.99828338623047, "learning_rate": 8.757575757575759e-06, "loss": 41.3675, "step": 4361 }, { "epoch": 103.85970149253731, "grad_norm": 24.577255249023438, "learning_rate": 8.755411255411256e-06, "loss": 41.3633, "step": 4362 }, { "epoch": 103.88358208955223, "grad_norm": 28.189889907836914, "learning_rate": 8.753246753246754e-06, "loss": 41.1397, "step": 4363 }, { "epoch": 103.90746268656716, "grad_norm": 21.285263061523438, "learning_rate": 8.751082251082252e-06, "loss": 42.9034, "step": 4364 }, { "epoch": 103.9313432835821, "grad_norm": 26.459442138671875, "learning_rate": 8.74891774891775e-06, "loss": 42.1868, "step": 4365 }, { "epoch": 103.95522388059702, "grad_norm": 23.833219528198242, "learning_rate": 8.746753246753248e-06, "loss": 43.8222, "step": 4366 }, { "epoch": 103.97910447761194, "grad_norm": 28.269039154052734, "learning_rate": 8.744588744588745e-06, "loss": 41.7916, "step": 4367 }, { "epoch": 104.0, "grad_norm": 21.251577377319336, "learning_rate": 8.742424242424243e-06, "loss": 36.7322, "step": 4368 }, { "epoch": 104.02388059701492, "grad_norm": 24.385892868041992, "learning_rate": 8.740259740259741e-06, "loss": 41.1758, "step": 4369 }, { "epoch": 104.04776119402985, "grad_norm": 24.85951805114746, "learning_rate": 8.738095238095239e-06, "loss": 41.1797, "step": 4370 }, { "epoch": 104.07164179104478, "grad_norm": 22.94902229309082, "learning_rate": 8.735930735930737e-06, "loss": 42.0245, "step": 4371 }, { "epoch": 104.0955223880597, "grad_norm": 22.89316749572754, "learning_rate": 8.733766233766234e-06, "loss": 40.8802, "step": 4372 }, { "epoch": 104.11940298507463, "grad_norm": 17.931550979614258, "learning_rate": 8.731601731601732e-06, "loss": 41.7585, "step": 4373 }, { "epoch": 104.14328358208955, "grad_norm": 25.272066116333008, "learning_rate": 8.72943722943723e-06, "loss": 41.9595, "step": 4374 }, { "epoch": 104.16716417910447, "grad_norm": 18.83379364013672, "learning_rate": 8.727272727272728e-06, "loss": 42.0377, "step": 4375 }, { "epoch": 104.1910447761194, "grad_norm": 26.816553115844727, "learning_rate": 8.725108225108226e-06, "loss": 42.3945, "step": 4376 }, { "epoch": 104.21492537313434, "grad_norm": 21.217594146728516, "learning_rate": 8.722943722943723e-06, "loss": 41.0879, "step": 4377 }, { "epoch": 104.23880597014926, "grad_norm": 26.040369033813477, "learning_rate": 8.720779220779221e-06, "loss": 41.9009, "step": 4378 }, { "epoch": 104.26268656716418, "grad_norm": 21.120927810668945, "learning_rate": 8.718614718614719e-06, "loss": 41.5876, "step": 4379 }, { "epoch": 104.2865671641791, "grad_norm": 24.789485931396484, "learning_rate": 8.716450216450217e-06, "loss": 42.4683, "step": 4380 }, { "epoch": 104.31044776119403, "grad_norm": 20.2288761138916, "learning_rate": 8.714285714285715e-06, "loss": 41.7415, "step": 4381 }, { "epoch": 104.33432835820895, "grad_norm": 23.13172721862793, "learning_rate": 8.712121212121212e-06, "loss": 41.8696, "step": 4382 }, { "epoch": 104.35820895522389, "grad_norm": 21.838037490844727, "learning_rate": 8.70995670995671e-06, "loss": 43.1081, "step": 4383 }, { "epoch": 104.38208955223881, "grad_norm": 18.31660270690918, "learning_rate": 8.707792207792208e-06, "loss": 41.6609, "step": 4384 }, { "epoch": 104.40597014925373, "grad_norm": 20.596466064453125, "learning_rate": 8.705627705627706e-06, "loss": 41.9226, "step": 4385 }, { "epoch": 104.42985074626866, "grad_norm": 19.209354400634766, "learning_rate": 8.703463203463204e-06, "loss": 41.8937, "step": 4386 }, { "epoch": 104.45373134328358, "grad_norm": 21.35397720336914, "learning_rate": 8.701298701298701e-06, "loss": 41.222, "step": 4387 }, { "epoch": 104.4776119402985, "grad_norm": 16.040178298950195, "learning_rate": 8.6991341991342e-06, "loss": 40.8326, "step": 4388 }, { "epoch": 104.50149253731344, "grad_norm": 26.846803665161133, "learning_rate": 8.696969696969699e-06, "loss": 42.0748, "step": 4389 }, { "epoch": 104.52537313432836, "grad_norm": 19.368515014648438, "learning_rate": 8.694805194805196e-06, "loss": 41.4322, "step": 4390 }, { "epoch": 104.54925373134328, "grad_norm": 30.950580596923828, "learning_rate": 8.692640692640694e-06, "loss": 41.2695, "step": 4391 }, { "epoch": 104.57313432835821, "grad_norm": 23.07410430908203, "learning_rate": 8.690476190476192e-06, "loss": 41.8303, "step": 4392 }, { "epoch": 104.59701492537313, "grad_norm": 27.158117294311523, "learning_rate": 8.68831168831169e-06, "loss": 42.4952, "step": 4393 }, { "epoch": 104.62089552238805, "grad_norm": 25.001056671142578, "learning_rate": 8.686147186147188e-06, "loss": 41.4797, "step": 4394 }, { "epoch": 104.64477611940299, "grad_norm": 27.168846130371094, "learning_rate": 8.683982683982685e-06, "loss": 41.8096, "step": 4395 }, { "epoch": 104.66865671641791, "grad_norm": 21.596757888793945, "learning_rate": 8.681818181818182e-06, "loss": 41.9243, "step": 4396 }, { "epoch": 104.69253731343284, "grad_norm": 27.944332122802734, "learning_rate": 8.67965367965368e-06, "loss": 42.5102, "step": 4397 }, { "epoch": 104.71641791044776, "grad_norm": 24.295595169067383, "learning_rate": 8.677489177489177e-06, "loss": 42.5514, "step": 4398 }, { "epoch": 104.74029850746268, "grad_norm": 27.505474090576172, "learning_rate": 8.675324675324675e-06, "loss": 42.017, "step": 4399 }, { "epoch": 104.7641791044776, "grad_norm": 24.030363082885742, "learning_rate": 8.673160173160173e-06, "loss": 42.5318, "step": 4400 }, { "epoch": 104.78805970149254, "grad_norm": 26.74481964111328, "learning_rate": 8.670995670995672e-06, "loss": 42.4153, "step": 4401 }, { "epoch": 104.81194029850747, "grad_norm": 25.275205612182617, "learning_rate": 8.66883116883117e-06, "loss": 40.9114, "step": 4402 }, { "epoch": 104.83582089552239, "grad_norm": 19.21797752380371, "learning_rate": 8.666666666666668e-06, "loss": 41.4621, "step": 4403 }, { "epoch": 104.85970149253731, "grad_norm": 21.647167205810547, "learning_rate": 8.664502164502166e-06, "loss": 42.0579, "step": 4404 }, { "epoch": 104.88358208955223, "grad_norm": 18.133159637451172, "learning_rate": 8.662337662337663e-06, "loss": 41.3995, "step": 4405 }, { "epoch": 104.90746268656716, "grad_norm": 17.7130069732666, "learning_rate": 8.660173160173161e-06, "loss": 42.2021, "step": 4406 }, { "epoch": 104.9313432835821, "grad_norm": 17.646291732788086, "learning_rate": 8.658008658008659e-06, "loss": 41.4231, "step": 4407 }, { "epoch": 104.95522388059702, "grad_norm": 20.67991065979004, "learning_rate": 8.655844155844157e-06, "loss": 40.8638, "step": 4408 }, { "epoch": 104.97910447761194, "grad_norm": 19.140832901000977, "learning_rate": 8.653679653679655e-06, "loss": 42.5387, "step": 4409 }, { "epoch": 105.0, "grad_norm": 13.847710609436035, "learning_rate": 8.651515151515152e-06, "loss": 35.4038, "step": 4410 }, { "epoch": 105.02388059701492, "grad_norm": 16.923620223999023, "learning_rate": 8.64935064935065e-06, "loss": 43.0403, "step": 4411 }, { "epoch": 105.04776119402985, "grad_norm": 17.983060836791992, "learning_rate": 8.647186147186148e-06, "loss": 42.2899, "step": 4412 }, { "epoch": 105.07164179104478, "grad_norm": 16.440452575683594, "learning_rate": 8.645021645021646e-06, "loss": 41.3221, "step": 4413 }, { "epoch": 105.0955223880597, "grad_norm": 20.931194305419922, "learning_rate": 8.642857142857144e-06, "loss": 41.6118, "step": 4414 }, { "epoch": 105.11940298507463, "grad_norm": 16.63971710205078, "learning_rate": 8.640692640692641e-06, "loss": 41.56, "step": 4415 }, { "epoch": 105.14328358208955, "grad_norm": 19.395835876464844, "learning_rate": 8.63852813852814e-06, "loss": 40.3512, "step": 4416 }, { "epoch": 105.16716417910447, "grad_norm": 23.419681549072266, "learning_rate": 8.636363636363637e-06, "loss": 41.588, "step": 4417 }, { "epoch": 105.1910447761194, "grad_norm": 18.330759048461914, "learning_rate": 8.634199134199135e-06, "loss": 41.5903, "step": 4418 }, { "epoch": 105.21492537313434, "grad_norm": 32.92748260498047, "learning_rate": 8.632034632034633e-06, "loss": 41.7201, "step": 4419 }, { "epoch": 105.23880597014926, "grad_norm": 23.43516731262207, "learning_rate": 8.62987012987013e-06, "loss": 42.0367, "step": 4420 }, { "epoch": 105.26268656716418, "grad_norm": 31.077037811279297, "learning_rate": 8.627705627705628e-06, "loss": 41.8229, "step": 4421 }, { "epoch": 105.2865671641791, "grad_norm": 24.310850143432617, "learning_rate": 8.625541125541126e-06, "loss": 41.869, "step": 4422 }, { "epoch": 105.31044776119403, "grad_norm": 29.064128875732422, "learning_rate": 8.623376623376624e-06, "loss": 41.3312, "step": 4423 }, { "epoch": 105.33432835820895, "grad_norm": 27.2437686920166, "learning_rate": 8.621212121212122e-06, "loss": 41.4347, "step": 4424 }, { "epoch": 105.35820895522389, "grad_norm": 26.48787498474121, "learning_rate": 8.61904761904762e-06, "loss": 41.9868, "step": 4425 }, { "epoch": 105.38208955223881, "grad_norm": 23.06917953491211, "learning_rate": 8.616883116883117e-06, "loss": 40.7182, "step": 4426 }, { "epoch": 105.40597014925373, "grad_norm": 25.888072967529297, "learning_rate": 8.614718614718615e-06, "loss": 43.4227, "step": 4427 }, { "epoch": 105.42985074626866, "grad_norm": 21.196561813354492, "learning_rate": 8.612554112554113e-06, "loss": 42.1541, "step": 4428 }, { "epoch": 105.45373134328358, "grad_norm": 23.897281646728516, "learning_rate": 8.61038961038961e-06, "loss": 42.3009, "step": 4429 }, { "epoch": 105.4776119402985, "grad_norm": 21.39472770690918, "learning_rate": 8.608225108225108e-06, "loss": 41.9873, "step": 4430 }, { "epoch": 105.50149253731344, "grad_norm": 22.932235717773438, "learning_rate": 8.606060606060606e-06, "loss": 42.9503, "step": 4431 }, { "epoch": 105.52537313432836, "grad_norm": 19.643224716186523, "learning_rate": 8.603896103896104e-06, "loss": 41.1197, "step": 4432 }, { "epoch": 105.54925373134328, "grad_norm": 22.474496841430664, "learning_rate": 8.601731601731602e-06, "loss": 41.472, "step": 4433 }, { "epoch": 105.57313432835821, "grad_norm": 18.618505477905273, "learning_rate": 8.5995670995671e-06, "loss": 42.0385, "step": 4434 }, { "epoch": 105.59701492537313, "grad_norm": 22.780241012573242, "learning_rate": 8.597402597402597e-06, "loss": 42.2941, "step": 4435 }, { "epoch": 105.62089552238805, "grad_norm": 18.00736427307129, "learning_rate": 8.595238095238097e-06, "loss": 41.086, "step": 4436 }, { "epoch": 105.64477611940299, "grad_norm": 26.372411727905273, "learning_rate": 8.593073593073595e-06, "loss": 43.5092, "step": 4437 }, { "epoch": 105.66865671641791, "grad_norm": 20.315715789794922, "learning_rate": 8.590909090909092e-06, "loss": 42.6546, "step": 4438 }, { "epoch": 105.69253731343284, "grad_norm": 25.5256404876709, "learning_rate": 8.58874458874459e-06, "loss": 41.5332, "step": 4439 }, { "epoch": 105.71641791044776, "grad_norm": 23.848834991455078, "learning_rate": 8.586580086580088e-06, "loss": 41.1996, "step": 4440 }, { "epoch": 105.74029850746268, "grad_norm": 22.64993667602539, "learning_rate": 8.584415584415586e-06, "loss": 41.0973, "step": 4441 }, { "epoch": 105.7641791044776, "grad_norm": 25.26251792907715, "learning_rate": 8.582251082251084e-06, "loss": 40.5223, "step": 4442 }, { "epoch": 105.78805970149254, "grad_norm": 18.45581817626953, "learning_rate": 8.580086580086581e-06, "loss": 40.3545, "step": 4443 }, { "epoch": 105.81194029850747, "grad_norm": 20.561473846435547, "learning_rate": 8.57792207792208e-06, "loss": 41.3425, "step": 4444 }, { "epoch": 105.83582089552239, "grad_norm": 19.369930267333984, "learning_rate": 8.575757575757575e-06, "loss": 41.9595, "step": 4445 }, { "epoch": 105.85970149253731, "grad_norm": 16.14900779724121, "learning_rate": 8.573593073593073e-06, "loss": 41.1797, "step": 4446 }, { "epoch": 105.88358208955223, "grad_norm": 21.74477195739746, "learning_rate": 8.571428571428571e-06, "loss": 40.7879, "step": 4447 }, { "epoch": 105.90746268656716, "grad_norm": 16.549848556518555, "learning_rate": 8.56926406926407e-06, "loss": 41.5383, "step": 4448 }, { "epoch": 105.9313432835821, "grad_norm": 18.291797637939453, "learning_rate": 8.567099567099568e-06, "loss": 42.818, "step": 4449 }, { "epoch": 105.95522388059702, "grad_norm": 21.389198303222656, "learning_rate": 8.564935064935066e-06, "loss": 41.8181, "step": 4450 }, { "epoch": 105.97910447761194, "grad_norm": 18.4671630859375, "learning_rate": 8.562770562770564e-06, "loss": 40.5471, "step": 4451 }, { "epoch": 106.0, "grad_norm": 12.387261390686035, "learning_rate": 8.560606060606062e-06, "loss": 37.1723, "step": 4452 }, { "epoch": 106.02388059701492, "grad_norm": 25.35000991821289, "learning_rate": 8.55844155844156e-06, "loss": 41.526, "step": 4453 }, { "epoch": 106.04776119402985, "grad_norm": 18.196853637695312, "learning_rate": 8.556277056277057e-06, "loss": 42.4372, "step": 4454 }, { "epoch": 106.07164179104478, "grad_norm": 17.911649703979492, "learning_rate": 8.554112554112555e-06, "loss": 41.6807, "step": 4455 }, { "epoch": 106.0955223880597, "grad_norm": 18.82575798034668, "learning_rate": 8.551948051948053e-06, "loss": 41.5713, "step": 4456 }, { "epoch": 106.11940298507463, "grad_norm": 17.8409423828125, "learning_rate": 8.54978354978355e-06, "loss": 41.243, "step": 4457 }, { "epoch": 106.14328358208955, "grad_norm": 14.669032096862793, "learning_rate": 8.547619047619048e-06, "loss": 41.3578, "step": 4458 }, { "epoch": 106.16716417910447, "grad_norm": 18.624805450439453, "learning_rate": 8.545454545454546e-06, "loss": 42.5552, "step": 4459 }, { "epoch": 106.1910447761194, "grad_norm": 15.485766410827637, "learning_rate": 8.543290043290044e-06, "loss": 42.2293, "step": 4460 }, { "epoch": 106.21492537313434, "grad_norm": 19.794565200805664, "learning_rate": 8.541125541125542e-06, "loss": 41.1415, "step": 4461 }, { "epoch": 106.23880597014926, "grad_norm": 18.35716438293457, "learning_rate": 8.53896103896104e-06, "loss": 41.6452, "step": 4462 }, { "epoch": 106.26268656716418, "grad_norm": 20.6253719329834, "learning_rate": 8.536796536796537e-06, "loss": 41.2028, "step": 4463 }, { "epoch": 106.2865671641791, "grad_norm": 17.438785552978516, "learning_rate": 8.534632034632035e-06, "loss": 42.4732, "step": 4464 }, { "epoch": 106.31044776119403, "grad_norm": 22.83930778503418, "learning_rate": 8.532467532467533e-06, "loss": 40.1875, "step": 4465 }, { "epoch": 106.33432835820895, "grad_norm": 19.77629852294922, "learning_rate": 8.53030303030303e-06, "loss": 42.7191, "step": 4466 }, { "epoch": 106.35820895522389, "grad_norm": 24.823516845703125, "learning_rate": 8.528138528138529e-06, "loss": 41.8532, "step": 4467 }, { "epoch": 106.38208955223881, "grad_norm": 25.804109573364258, "learning_rate": 8.525974025974026e-06, "loss": 41.3039, "step": 4468 }, { "epoch": 106.40597014925373, "grad_norm": 18.37181854248047, "learning_rate": 8.523809523809524e-06, "loss": 42.5937, "step": 4469 }, { "epoch": 106.42985074626866, "grad_norm": 21.761140823364258, "learning_rate": 8.521645021645022e-06, "loss": 41.5739, "step": 4470 }, { "epoch": 106.45373134328358, "grad_norm": 18.34234619140625, "learning_rate": 8.51948051948052e-06, "loss": 42.0672, "step": 4471 }, { "epoch": 106.4776119402985, "grad_norm": 21.598434448242188, "learning_rate": 8.51731601731602e-06, "loss": 43.3303, "step": 4472 }, { "epoch": 106.50149253731344, "grad_norm": 19.549448013305664, "learning_rate": 8.515151515151517e-06, "loss": 41.4708, "step": 4473 }, { "epoch": 106.52537313432836, "grad_norm": 20.763225555419922, "learning_rate": 8.512987012987015e-06, "loss": 41.8263, "step": 4474 }, { "epoch": 106.54925373134328, "grad_norm": 19.644168853759766, "learning_rate": 8.510822510822511e-06, "loss": 42.1677, "step": 4475 }, { "epoch": 106.57313432835821, "grad_norm": 23.9834041595459, "learning_rate": 8.508658008658009e-06, "loss": 42.5743, "step": 4476 }, { "epoch": 106.59701492537313, "grad_norm": 19.93153953552246, "learning_rate": 8.506493506493507e-06, "loss": 41.2167, "step": 4477 }, { "epoch": 106.62089552238805, "grad_norm": 22.8863525390625, "learning_rate": 8.504329004329004e-06, "loss": 41.1281, "step": 4478 }, { "epoch": 106.64477611940299, "grad_norm": 20.42034149169922, "learning_rate": 8.502164502164502e-06, "loss": 41.0462, "step": 4479 }, { "epoch": 106.66865671641791, "grad_norm": 21.096284866333008, "learning_rate": 8.5e-06, "loss": 41.1694, "step": 4480 }, { "epoch": 106.69253731343284, "grad_norm": 21.80982780456543, "learning_rate": 8.497835497835498e-06, "loss": 41.3229, "step": 4481 }, { "epoch": 106.71641791044776, "grad_norm": 19.85307502746582, "learning_rate": 8.495670995670996e-06, "loss": 41.6649, "step": 4482 }, { "epoch": 106.74029850746268, "grad_norm": 15.509448051452637, "learning_rate": 8.493506493506493e-06, "loss": 41.1427, "step": 4483 }, { "epoch": 106.7641791044776, "grad_norm": 19.124879837036133, "learning_rate": 8.491341991341993e-06, "loss": 41.2628, "step": 4484 }, { "epoch": 106.78805970149254, "grad_norm": 16.123470306396484, "learning_rate": 8.48917748917749e-06, "loss": 41.9344, "step": 4485 }, { "epoch": 106.81194029850747, "grad_norm": 16.704002380371094, "learning_rate": 8.487012987012988e-06, "loss": 41.3242, "step": 4486 }, { "epoch": 106.83582089552239, "grad_norm": 17.679168701171875, "learning_rate": 8.484848484848486e-06, "loss": 42.4921, "step": 4487 }, { "epoch": 106.85970149253731, "grad_norm": 16.334306716918945, "learning_rate": 8.482683982683984e-06, "loss": 41.7613, "step": 4488 }, { "epoch": 106.88358208955223, "grad_norm": 21.279388427734375, "learning_rate": 8.480519480519482e-06, "loss": 40.7258, "step": 4489 }, { "epoch": 106.90746268656716, "grad_norm": 18.096824645996094, "learning_rate": 8.47835497835498e-06, "loss": 41.4365, "step": 4490 }, { "epoch": 106.9313432835821, "grad_norm": 14.148079872131348, "learning_rate": 8.476190476190477e-06, "loss": 41.9096, "step": 4491 }, { "epoch": 106.95522388059702, "grad_norm": 18.99448013305664, "learning_rate": 8.474025974025975e-06, "loss": 41.1249, "step": 4492 }, { "epoch": 106.97910447761194, "grad_norm": 19.877487182617188, "learning_rate": 8.471861471861473e-06, "loss": 41.6588, "step": 4493 }, { "epoch": 107.0, "grad_norm": 17.858646392822266, "learning_rate": 8.46969696969697e-06, "loss": 35.8561, "step": 4494 }, { "epoch": 107.02388059701492, "grad_norm": 15.608851432800293, "learning_rate": 8.467532467532467e-06, "loss": 41.4418, "step": 4495 }, { "epoch": 107.04776119402985, "grad_norm": 22.582759857177734, "learning_rate": 8.465367965367966e-06, "loss": 41.0498, "step": 4496 }, { "epoch": 107.07164179104478, "grad_norm": 21.779876708984375, "learning_rate": 8.463203463203464e-06, "loss": 41.6588, "step": 4497 }, { "epoch": 107.0955223880597, "grad_norm": 20.698528289794922, "learning_rate": 8.461038961038962e-06, "loss": 43.0142, "step": 4498 }, { "epoch": 107.11940298507463, "grad_norm": 16.091886520385742, "learning_rate": 8.45887445887446e-06, "loss": 41.3033, "step": 4499 }, { "epoch": 107.14328358208955, "grad_norm": 28.291919708251953, "learning_rate": 8.456709956709958e-06, "loss": 41.3949, "step": 4500 }, { "epoch": 107.16716417910447, "grad_norm": 19.51844596862793, "learning_rate": 8.454545454545455e-06, "loss": 42.2322, "step": 4501 }, { "epoch": 107.1910447761194, "grad_norm": 27.817554473876953, "learning_rate": 8.452380952380953e-06, "loss": 41.1951, "step": 4502 }, { "epoch": 107.21492537313434, "grad_norm": 20.286903381347656, "learning_rate": 8.450216450216451e-06, "loss": 42.1971, "step": 4503 }, { "epoch": 107.23880597014926, "grad_norm": 26.34720230102539, "learning_rate": 8.448051948051949e-06, "loss": 40.7248, "step": 4504 }, { "epoch": 107.26268656716418, "grad_norm": 21.989835739135742, "learning_rate": 8.445887445887447e-06, "loss": 42.4462, "step": 4505 }, { "epoch": 107.2865671641791, "grad_norm": 22.28291893005371, "learning_rate": 8.443722943722944e-06, "loss": 41.103, "step": 4506 }, { "epoch": 107.31044776119403, "grad_norm": 21.63711166381836, "learning_rate": 8.441558441558442e-06, "loss": 42.685, "step": 4507 }, { "epoch": 107.33432835820895, "grad_norm": 16.82655143737793, "learning_rate": 8.43939393939394e-06, "loss": 42.0045, "step": 4508 }, { "epoch": 107.35820895522389, "grad_norm": 24.85128402709961, "learning_rate": 8.437229437229438e-06, "loss": 41.6018, "step": 4509 }, { "epoch": 107.38208955223881, "grad_norm": 18.015731811523438, "learning_rate": 8.435064935064936e-06, "loss": 40.7281, "step": 4510 }, { "epoch": 107.40597014925373, "grad_norm": 26.402570724487305, "learning_rate": 8.432900432900433e-06, "loss": 42.5324, "step": 4511 }, { "epoch": 107.42985074626866, "grad_norm": 21.223861694335938, "learning_rate": 8.430735930735931e-06, "loss": 40.7112, "step": 4512 }, { "epoch": 107.45373134328358, "grad_norm": 19.461315155029297, "learning_rate": 8.428571428571429e-06, "loss": 40.8781, "step": 4513 }, { "epoch": 107.4776119402985, "grad_norm": 23.075971603393555, "learning_rate": 8.426406926406927e-06, "loss": 41.2487, "step": 4514 }, { "epoch": 107.50149253731344, "grad_norm": 22.154701232910156, "learning_rate": 8.424242424242425e-06, "loss": 41.175, "step": 4515 }, { "epoch": 107.52537313432836, "grad_norm": 29.775875091552734, "learning_rate": 8.422077922077922e-06, "loss": 42.0601, "step": 4516 }, { "epoch": 107.54925373134328, "grad_norm": 23.231462478637695, "learning_rate": 8.41991341991342e-06, "loss": 41.6765, "step": 4517 }, { "epoch": 107.57313432835821, "grad_norm": 28.446731567382812, "learning_rate": 8.417748917748918e-06, "loss": 43.3939, "step": 4518 }, { "epoch": 107.59701492537313, "grad_norm": 17.26323890686035, "learning_rate": 8.415584415584416e-06, "loss": 41.2887, "step": 4519 }, { "epoch": 107.62089552238805, "grad_norm": 20.49373435974121, "learning_rate": 8.413419913419915e-06, "loss": 41.6722, "step": 4520 }, { "epoch": 107.64477611940299, "grad_norm": 16.85104751586914, "learning_rate": 8.411255411255413e-06, "loss": 40.5418, "step": 4521 }, { "epoch": 107.66865671641791, "grad_norm": 16.146242141723633, "learning_rate": 8.40909090909091e-06, "loss": 40.9689, "step": 4522 }, { "epoch": 107.69253731343284, "grad_norm": 17.153108596801758, "learning_rate": 8.406926406926409e-06, "loss": 41.6464, "step": 4523 }, { "epoch": 107.71641791044776, "grad_norm": 20.580894470214844, "learning_rate": 8.404761904761905e-06, "loss": 42.4474, "step": 4524 }, { "epoch": 107.74029850746268, "grad_norm": 15.058161735534668, "learning_rate": 8.402597402597403e-06, "loss": 41.7216, "step": 4525 }, { "epoch": 107.7641791044776, "grad_norm": 20.474285125732422, "learning_rate": 8.4004329004329e-06, "loss": 41.7553, "step": 4526 }, { "epoch": 107.78805970149254, "grad_norm": 20.75484275817871, "learning_rate": 8.398268398268398e-06, "loss": 41.6698, "step": 4527 }, { "epoch": 107.81194029850747, "grad_norm": 14.600532531738281, "learning_rate": 8.396103896103896e-06, "loss": 41.6121, "step": 4528 }, { "epoch": 107.83582089552239, "grad_norm": 18.704586029052734, "learning_rate": 8.393939393939394e-06, "loss": 40.5424, "step": 4529 }, { "epoch": 107.85970149253731, "grad_norm": 15.7553129196167, "learning_rate": 8.391774891774892e-06, "loss": 41.0951, "step": 4530 }, { "epoch": 107.88358208955223, "grad_norm": 20.587574005126953, "learning_rate": 8.38961038961039e-06, "loss": 42.1185, "step": 4531 }, { "epoch": 107.90746268656716, "grad_norm": 16.545307159423828, "learning_rate": 8.387445887445889e-06, "loss": 40.7692, "step": 4532 }, { "epoch": 107.9313432835821, "grad_norm": 19.46141815185547, "learning_rate": 8.385281385281387e-06, "loss": 41.6013, "step": 4533 }, { "epoch": 107.95522388059702, "grad_norm": 16.87144660949707, "learning_rate": 8.383116883116884e-06, "loss": 41.6794, "step": 4534 }, { "epoch": 107.97910447761194, "grad_norm": 16.42438316345215, "learning_rate": 8.380952380952382e-06, "loss": 42.3932, "step": 4535 }, { "epoch": 108.0, "grad_norm": 24.72748374938965, "learning_rate": 8.37878787878788e-06, "loss": 36.3065, "step": 4536 }, { "epoch": 108.02388059701492, "grad_norm": 21.551437377929688, "learning_rate": 8.376623376623378e-06, "loss": 41.1883, "step": 4537 }, { "epoch": 108.04776119402985, "grad_norm": 31.447101593017578, "learning_rate": 8.374458874458876e-06, "loss": 41.2846, "step": 4538 }, { "epoch": 108.07164179104478, "grad_norm": 23.090343475341797, "learning_rate": 8.372294372294373e-06, "loss": 41.7488, "step": 4539 }, { "epoch": 108.0955223880597, "grad_norm": 29.949562072753906, "learning_rate": 8.370129870129871e-06, "loss": 42.462, "step": 4540 }, { "epoch": 108.11940298507463, "grad_norm": 21.09743309020996, "learning_rate": 8.367965367965369e-06, "loss": 41.4602, "step": 4541 }, { "epoch": 108.14328358208955, "grad_norm": 34.469139099121094, "learning_rate": 8.365800865800867e-06, "loss": 41.5088, "step": 4542 }, { "epoch": 108.16716417910447, "grad_norm": 25.73923110961914, "learning_rate": 8.363636363636365e-06, "loss": 42.9585, "step": 4543 }, { "epoch": 108.1910447761194, "grad_norm": 30.073488235473633, "learning_rate": 8.361471861471862e-06, "loss": 41.5492, "step": 4544 }, { "epoch": 108.21492537313434, "grad_norm": 26.56512451171875, "learning_rate": 8.35930735930736e-06, "loss": 41.2259, "step": 4545 }, { "epoch": 108.23880597014926, "grad_norm": 32.05238723754883, "learning_rate": 8.357142857142858e-06, "loss": 41.8249, "step": 4546 }, { "epoch": 108.26268656716418, "grad_norm": 25.487403869628906, "learning_rate": 8.354978354978356e-06, "loss": 41.2949, "step": 4547 }, { "epoch": 108.2865671641791, "grad_norm": 26.391586303710938, "learning_rate": 8.352813852813854e-06, "loss": 41.9133, "step": 4548 }, { "epoch": 108.31044776119403, "grad_norm": 23.400354385375977, "learning_rate": 8.350649350649351e-06, "loss": 41.2916, "step": 4549 }, { "epoch": 108.33432835820895, "grad_norm": 32.927467346191406, "learning_rate": 8.348484848484849e-06, "loss": 41.0151, "step": 4550 }, { "epoch": 108.35820895522389, "grad_norm": NaN, "learning_rate": 8.346320346320347e-06, "loss": 41.9067, "step": 4551 }, { "epoch": 108.38208955223881, "grad_norm": 27.199121475219727, "learning_rate": 8.346320346320347e-06, "loss": 41.8715, "step": 4552 }, { "epoch": 108.40597014925373, "grad_norm": 30.17380142211914, "learning_rate": 8.344155844155845e-06, "loss": 41.9553, "step": 4553 }, { "epoch": 108.42985074626866, "grad_norm": 28.72991943359375, "learning_rate": 8.341991341991343e-06, "loss": 41.9683, "step": 4554 }, { "epoch": 108.45373134328358, "grad_norm": 26.323143005371094, "learning_rate": 8.33982683982684e-06, "loss": 41.1456, "step": 4555 }, { "epoch": 108.4776119402985, "grad_norm": 23.400619506835938, "learning_rate": 8.337662337662338e-06, "loss": 41.7313, "step": 4556 }, { "epoch": 108.50149253731344, "grad_norm": 23.805021286010742, "learning_rate": 8.335497835497836e-06, "loss": 40.5904, "step": 4557 }, { "epoch": 108.52537313432836, "grad_norm": 21.17874526977539, "learning_rate": 8.333333333333334e-06, "loss": 41.5941, "step": 4558 }, { "epoch": 108.54925373134328, "grad_norm": 26.89427375793457, "learning_rate": 8.331168831168832e-06, "loss": 40.6515, "step": 4559 }, { "epoch": 108.57313432835821, "grad_norm": 22.102890014648438, "learning_rate": 8.32900432900433e-06, "loss": 41.6195, "step": 4560 }, { "epoch": 108.59701492537313, "grad_norm": 28.349239349365234, "learning_rate": 8.326839826839827e-06, "loss": 41.7613, "step": 4561 }, { "epoch": 108.62089552238805, "grad_norm": 24.95227813720703, "learning_rate": 8.324675324675325e-06, "loss": 42.2593, "step": 4562 }, { "epoch": 108.64477611940299, "grad_norm": 29.643531799316406, "learning_rate": 8.322510822510823e-06, "loss": 42.5247, "step": 4563 }, { "epoch": 108.66865671641791, "grad_norm": 24.321622848510742, "learning_rate": 8.32034632034632e-06, "loss": 41.5149, "step": 4564 }, { "epoch": 108.69253731343284, "grad_norm": 27.7292537689209, "learning_rate": 8.318181818181818e-06, "loss": 41.6252, "step": 4565 }, { "epoch": 108.71641791044776, "grad_norm": 23.14917755126953, "learning_rate": 8.316017316017316e-06, "loss": 41.8188, "step": 4566 }, { "epoch": 108.74029850746268, "grad_norm": 31.897857666015625, "learning_rate": 8.313852813852814e-06, "loss": 41.2639, "step": 4567 }, { "epoch": 108.7641791044776, "grad_norm": 22.20448112487793, "learning_rate": 8.311688311688313e-06, "loss": 40.8366, "step": 4568 }, { "epoch": 108.78805970149254, "grad_norm": 33.260982513427734, "learning_rate": 8.309523809523811e-06, "loss": 42.1547, "step": 4569 }, { "epoch": 108.81194029850747, "grad_norm": 28.327970504760742, "learning_rate": 8.307359307359309e-06, "loss": 41.0261, "step": 4570 }, { "epoch": 108.83582089552239, "grad_norm": 30.421405792236328, "learning_rate": 8.305194805194807e-06, "loss": 42.6333, "step": 4571 }, { "epoch": 108.85970149253731, "grad_norm": 27.54227066040039, "learning_rate": 8.303030303030305e-06, "loss": 41.5392, "step": 4572 }, { "epoch": 108.88358208955223, "grad_norm": 27.083431243896484, "learning_rate": 8.300865800865802e-06, "loss": 40.5557, "step": 4573 }, { "epoch": 108.90746268656716, "grad_norm": 22.203136444091797, "learning_rate": 8.2987012987013e-06, "loss": 40.2079, "step": 4574 }, { "epoch": 108.9313432835821, "grad_norm": 30.871158599853516, "learning_rate": 8.296536796536796e-06, "loss": 40.74, "step": 4575 }, { "epoch": 108.95522388059702, "grad_norm": 26.300838470458984, "learning_rate": 8.294372294372294e-06, "loss": 41.3107, "step": 4576 }, { "epoch": 108.97910447761194, "grad_norm": 31.482698440551758, "learning_rate": 8.292207792207792e-06, "loss": 42.54, "step": 4577 }, { "epoch": 109.0, "grad_norm": 21.353776931762695, "learning_rate": 8.29004329004329e-06, "loss": 36.6851, "step": 4578 }, { "epoch": 109.02388059701492, "grad_norm": 27.117504119873047, "learning_rate": 8.287878787878787e-06, "loss": 41.3884, "step": 4579 }, { "epoch": 109.04776119402985, "grad_norm": 23.497106552124023, "learning_rate": 8.285714285714287e-06, "loss": 41.0846, "step": 4580 }, { "epoch": 109.07164179104478, "grad_norm": 27.996051788330078, "learning_rate": 8.283549783549785e-06, "loss": 41.214, "step": 4581 }, { "epoch": 109.0955223880597, "grad_norm": 24.364675521850586, "learning_rate": 8.281385281385283e-06, "loss": 42.1255, "step": 4582 }, { "epoch": 109.11940298507463, "grad_norm": 31.155681610107422, "learning_rate": 8.27922077922078e-06, "loss": 42.1774, "step": 4583 }, { "epoch": 109.14328358208955, "grad_norm": 27.193376541137695, "learning_rate": 8.277056277056278e-06, "loss": 42.4321, "step": 4584 }, { "epoch": 109.16716417910447, "grad_norm": 30.398059844970703, "learning_rate": 8.274891774891776e-06, "loss": 42.0286, "step": 4585 }, { "epoch": 109.1910447761194, "grad_norm": 27.1219425201416, "learning_rate": 8.272727272727274e-06, "loss": 40.9508, "step": 4586 }, { "epoch": 109.21492537313434, "grad_norm": 29.481327056884766, "learning_rate": 8.270562770562772e-06, "loss": 42.1899, "step": 4587 }, { "epoch": 109.23880597014926, "grad_norm": 29.547292709350586, "learning_rate": 8.26839826839827e-06, "loss": 40.2919, "step": 4588 }, { "epoch": 109.26268656716418, "grad_norm": 26.99224281311035, "learning_rate": 8.266233766233767e-06, "loss": 41.1843, "step": 4589 }, { "epoch": 109.2865671641791, "grad_norm": 25.45054054260254, "learning_rate": 8.264069264069265e-06, "loss": 41.6843, "step": 4590 }, { "epoch": 109.31044776119403, "grad_norm": 27.529739379882812, "learning_rate": 8.261904761904763e-06, "loss": 39.7442, "step": 4591 }, { "epoch": 109.33432835820895, "grad_norm": 23.54625129699707, "learning_rate": 8.25974025974026e-06, "loss": 40.9662, "step": 4592 }, { "epoch": 109.35820895522389, "grad_norm": 26.74515151977539, "learning_rate": 8.257575757575758e-06, "loss": 42.1445, "step": 4593 }, { "epoch": 109.38208955223881, "grad_norm": 24.591623306274414, "learning_rate": 8.255411255411256e-06, "loss": 41.4631, "step": 4594 }, { "epoch": 109.40597014925373, "grad_norm": 27.32378578186035, "learning_rate": 8.253246753246754e-06, "loss": 43.223, "step": 4595 }, { "epoch": 109.42985074626866, "grad_norm": 24.74321937561035, "learning_rate": 8.251082251082252e-06, "loss": 42.1151, "step": 4596 }, { "epoch": 109.45373134328358, "grad_norm": 24.764156341552734, "learning_rate": 8.24891774891775e-06, "loss": 42.0353, "step": 4597 }, { "epoch": 109.4776119402985, "grad_norm": 22.001508712768555, "learning_rate": 8.246753246753247e-06, "loss": 41.5189, "step": 4598 }, { "epoch": 109.50149253731344, "grad_norm": 27.916759490966797, "learning_rate": 8.244588744588745e-06, "loss": 42.8372, "step": 4599 }, { "epoch": 109.52537313432836, "grad_norm": 23.65235137939453, "learning_rate": 8.242424242424243e-06, "loss": 40.9737, "step": 4600 }, { "epoch": 109.54925373134328, "grad_norm": 25.150957107543945, "learning_rate": 8.24025974025974e-06, "loss": 41.279, "step": 4601 }, { "epoch": 109.57313432835821, "grad_norm": 24.193187713623047, "learning_rate": 8.238095238095239e-06, "loss": 42.029, "step": 4602 }, { "epoch": 109.59701492537313, "grad_norm": 26.186813354492188, "learning_rate": 8.235930735930736e-06, "loss": 40.3791, "step": 4603 }, { "epoch": 109.62089552238805, "grad_norm": 24.97614097595215, "learning_rate": 8.233766233766236e-06, "loss": 40.8347, "step": 4604 }, { "epoch": 109.64477611940299, "grad_norm": 27.51297950744629, "learning_rate": 8.231601731601732e-06, "loss": 40.6329, "step": 4605 }, { "epoch": 109.66865671641791, "grad_norm": 24.866369247436523, "learning_rate": 8.22943722943723e-06, "loss": 41.5968, "step": 4606 }, { "epoch": 109.69253731343284, "grad_norm": 28.864290237426758, "learning_rate": 8.227272727272728e-06, "loss": 41.3671, "step": 4607 }, { "epoch": 109.71641791044776, "grad_norm": 29.503835678100586, "learning_rate": 8.225108225108225e-06, "loss": 41.2315, "step": 4608 }, { "epoch": 109.74029850746268, "grad_norm": 26.039966583251953, "learning_rate": 8.222943722943723e-06, "loss": 41.0179, "step": 4609 }, { "epoch": 109.7641791044776, "grad_norm": 22.42831039428711, "learning_rate": 8.220779220779221e-06, "loss": 41.3331, "step": 4610 }, { "epoch": 109.78805970149254, "grad_norm": 25.328296661376953, "learning_rate": 8.218614718614719e-06, "loss": 39.9773, "step": 4611 }, { "epoch": 109.81194029850747, "grad_norm": 18.524499893188477, "learning_rate": 8.216450216450216e-06, "loss": 42.5481, "step": 4612 }, { "epoch": 109.83582089552239, "grad_norm": 26.1571102142334, "learning_rate": 8.214285714285714e-06, "loss": 41.759, "step": 4613 }, { "epoch": 109.85970149253731, "grad_norm": 22.46668243408203, "learning_rate": 8.212121212121212e-06, "loss": 42.0987, "step": 4614 }, { "epoch": 109.88358208955223, "grad_norm": 29.418230056762695, "learning_rate": 8.20995670995671e-06, "loss": 42.8083, "step": 4615 }, { "epoch": 109.90746268656716, "grad_norm": 23.00196647644043, "learning_rate": 8.20779220779221e-06, "loss": 40.488, "step": 4616 }, { "epoch": 109.9313432835821, "grad_norm": 27.977956771850586, "learning_rate": 8.205627705627707e-06, "loss": 41.4731, "step": 4617 }, { "epoch": 109.95522388059702, "grad_norm": 24.776628494262695, "learning_rate": 8.203463203463205e-06, "loss": 42.6836, "step": 4618 }, { "epoch": 109.97910447761194, "grad_norm": 27.11109733581543, "learning_rate": 8.201298701298703e-06, "loss": 40.7662, "step": 4619 }, { "epoch": 110.0, "grad_norm": 20.246700286865234, "learning_rate": 8.1991341991342e-06, "loss": 36.1303, "step": 4620 }, { "epoch": 110.0, "step": 4620, "total_flos": 2.2713564637226506e+17, "train_loss": 7.6340307194433175, "train_runtime": 25635.1587, "train_samples_per_second": 22.965, "train_steps_per_second": 0.18 }, { "epoch": 110.02388059701492, "grad_norm": 21.282230377197266, "learning_rate": 1e-05, "loss": 42.4757, "step": 4621 }, { "epoch": 110.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 48.912, "step": 4622 }, { "epoch": 110.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 49.0673, "step": 4623 }, { "epoch": 110.0955223880597, "grad_norm": 445.232177734375, "learning_rate": 9.998015873015874e-06, "loss": 48.7345, "step": 4624 }, { "epoch": 110.11940298507463, "grad_norm": 224.98858642578125, "learning_rate": 9.996031746031746e-06, "loss": 45.5376, "step": 4625 }, { "epoch": 110.14328358208955, "grad_norm": 92.86235046386719, "learning_rate": 9.99404761904762e-06, "loss": 44.0537, "step": 4626 }, { "epoch": 110.16716417910447, "grad_norm": 67.79006958007812, "learning_rate": 9.992063492063493e-06, "loss": 42.4137, "step": 4627 }, { "epoch": 110.1910447761194, "grad_norm": 52.0079345703125, "learning_rate": 9.990079365079366e-06, "loss": 42.7133, "step": 4628 }, { "epoch": 110.21492537313434, "grad_norm": 40.780120849609375, "learning_rate": 9.988095238095239e-06, "loss": 42.7036, "step": 4629 }, { "epoch": 110.23880597014926, "grad_norm": 45.80341339111328, "learning_rate": 9.986111111111111e-06, "loss": 42.5543, "step": 4630 }, { "epoch": 110.26268656716418, "grad_norm": NaN, "learning_rate": 9.984126984126986e-06, "loss": 67.9585, "step": 4631 }, { "epoch": 110.2865671641791, "grad_norm": 37.52305603027344, "learning_rate": 9.984126984126986e-06, "loss": 42.0859, "step": 4632 }, { "epoch": 110.31044776119403, "grad_norm": 37.10969543457031, "learning_rate": 9.982142857142858e-06, "loss": 42.8517, "step": 4633 }, { "epoch": 110.33432835820895, "grad_norm": 33.601905822753906, "learning_rate": 9.980158730158731e-06, "loss": 41.4451, "step": 4634 }, { "epoch": 110.35820895522389, "grad_norm": 25.348403930664062, "learning_rate": 9.978174603174604e-06, "loss": 41.7698, "step": 4635 }, { "epoch": 110.38208955223881, "grad_norm": 28.69048309326172, "learning_rate": 9.976190476190477e-06, "loss": 41.8287, "step": 4636 }, { "epoch": 110.40597014925373, "grad_norm": 30.578548431396484, "learning_rate": 9.97420634920635e-06, "loss": 42.4165, "step": 4637 }, { "epoch": 110.42985074626866, "grad_norm": 19.63727569580078, "learning_rate": 9.972222222222224e-06, "loss": 42.37, "step": 4638 }, { "epoch": 110.45373134328358, "grad_norm": 34.42063522338867, "learning_rate": 9.970238095238096e-06, "loss": 42.7996, "step": 4639 }, { "epoch": 110.4776119402985, "grad_norm": NaN, "learning_rate": 9.968253968253969e-06, "loss": 62.9577, "step": 4640 }, { "epoch": 110.50149253731344, "grad_norm": 23.131494522094727, "learning_rate": 9.968253968253969e-06, "loss": 40.8106, "step": 4641 }, { "epoch": 110.52537313432836, "grad_norm": 22.294376373291016, "learning_rate": 9.966269841269842e-06, "loss": 42.7557, "step": 4642 }, { "epoch": 110.54925373134328, "grad_norm": 30.476016998291016, "learning_rate": 9.964285714285714e-06, "loss": 42.1925, "step": 4643 }, { "epoch": 110.57313432835821, "grad_norm": 20.84010887145996, "learning_rate": 9.962301587301589e-06, "loss": 41.9241, "step": 4644 }, { "epoch": 110.59701492537313, "grad_norm": 23.944196701049805, "learning_rate": 9.960317460317462e-06, "loss": 41.8911, "step": 4645 }, { "epoch": 110.62089552238805, "grad_norm": 28.773279190063477, "learning_rate": 9.958333333333334e-06, "loss": 41.5711, "step": 4646 }, { "epoch": 110.64477611940299, "grad_norm": 22.82482147216797, "learning_rate": 9.956349206349207e-06, "loss": 42.1915, "step": 4647 }, { "epoch": 110.66865671641791, "grad_norm": 24.0530948638916, "learning_rate": 9.95436507936508e-06, "loss": 40.6648, "step": 4648 }, { "epoch": 110.69253731343284, "grad_norm": 21.640682220458984, "learning_rate": 9.952380952380954e-06, "loss": 41.5682, "step": 4649 }, { "epoch": 110.71641791044776, "grad_norm": 21.377979278564453, "learning_rate": 9.950396825396827e-06, "loss": 41.6034, "step": 4650 }, { "epoch": 110.74029850746268, "grad_norm": 19.04741668701172, "learning_rate": 9.9484126984127e-06, "loss": 41.8165, "step": 4651 }, { "epoch": 110.7641791044776, "grad_norm": 22.74652099609375, "learning_rate": 9.946428571428572e-06, "loss": 40.4093, "step": 4652 }, { "epoch": 110.78805970149254, "grad_norm": 16.828824996948242, "learning_rate": 9.944444444444445e-06, "loss": 42.181, "step": 4653 }, { "epoch": 110.81194029850747, "grad_norm": 20.226478576660156, "learning_rate": 9.94246031746032e-06, "loss": 41.774, "step": 4654 }, { "epoch": 110.83582089552239, "grad_norm": 19.935068130493164, "learning_rate": 9.940476190476192e-06, "loss": 41.6547, "step": 4655 }, { "epoch": 110.85970149253731, "grad_norm": 18.150102615356445, "learning_rate": 9.938492063492065e-06, "loss": 39.962, "step": 4656 }, { "epoch": 110.88358208955223, "grad_norm": 27.12464141845703, "learning_rate": 9.936507936507937e-06, "loss": 41.2807, "step": 4657 }, { "epoch": 110.90746268656716, "grad_norm": 18.194360733032227, "learning_rate": 9.93452380952381e-06, "loss": 40.8381, "step": 4658 }, { "epoch": 110.9313432835821, "grad_norm": 25.638107299804688, "learning_rate": 9.932539682539684e-06, "loss": 41.2385, "step": 4659 }, { "epoch": 110.95522388059702, "grad_norm": 21.1163387298584, "learning_rate": 9.930555555555557e-06, "loss": 41.0065, "step": 4660 }, { "epoch": 110.97910447761194, "grad_norm": 17.089710235595703, "learning_rate": 9.92857142857143e-06, "loss": 41.8835, "step": 4661 }, { "epoch": 111.0, "grad_norm": 19.484764099121094, "learning_rate": 9.926587301587303e-06, "loss": 37.1289, "step": 4662 }, { "epoch": 111.02388059701492, "grad_norm": 20.73271942138672, "learning_rate": 9.924603174603175e-06, "loss": 40.8035, "step": 4663 }, { "epoch": 111.04776119402985, "grad_norm": 14.759368896484375, "learning_rate": 9.922619047619048e-06, "loss": 42.164, "step": 4664 }, { "epoch": 111.07164179104478, "grad_norm": 20.654579162597656, "learning_rate": 9.920634920634922e-06, "loss": 41.7302, "step": 4665 }, { "epoch": 111.0955223880597, "grad_norm": 17.05702781677246, "learning_rate": 9.918650793650795e-06, "loss": 41.3278, "step": 4666 }, { "epoch": 111.11940298507463, "grad_norm": 15.701156616210938, "learning_rate": 9.916666666666668e-06, "loss": 40.7933, "step": 4667 }, { "epoch": 111.14328358208955, "grad_norm": 17.04022216796875, "learning_rate": 9.91468253968254e-06, "loss": 41.58, "step": 4668 }, { "epoch": 111.16716417910447, "grad_norm": 16.614116668701172, "learning_rate": 9.912698412698413e-06, "loss": 40.8149, "step": 4669 }, { "epoch": 111.1910447761194, "grad_norm": 17.664091110229492, "learning_rate": 9.910714285714288e-06, "loss": 40.459, "step": 4670 }, { "epoch": 111.21492537313434, "grad_norm": 17.730188369750977, "learning_rate": 9.90873015873016e-06, "loss": 42.3291, "step": 4671 }, { "epoch": 111.23880597014926, "grad_norm": 14.860199928283691, "learning_rate": 9.906746031746033e-06, "loss": 42.314, "step": 4672 }, { "epoch": 111.26268656716418, "grad_norm": 18.45416259765625, "learning_rate": 9.904761904761906e-06, "loss": 41.2486, "step": 4673 }, { "epoch": 111.2865671641791, "grad_norm": 15.178065299987793, "learning_rate": 9.902777777777778e-06, "loss": 42.0394, "step": 4674 }, { "epoch": 111.31044776119403, "grad_norm": 16.214420318603516, "learning_rate": 9.900793650793653e-06, "loss": 41.6074, "step": 4675 }, { "epoch": 111.33432835820895, "grad_norm": 19.241151809692383, "learning_rate": 9.898809523809525e-06, "loss": 42.0125, "step": 4676 }, { "epoch": 111.35820895522389, "grad_norm": 16.019407272338867, "learning_rate": 9.896825396825398e-06, "loss": 41.5867, "step": 4677 }, { "epoch": 111.38208955223881, "grad_norm": 18.017990112304688, "learning_rate": 9.89484126984127e-06, "loss": 40.4534, "step": 4678 }, { "epoch": 111.40597014925373, "grad_norm": 18.37062644958496, "learning_rate": 9.892857142857143e-06, "loss": 41.4307, "step": 4679 }, { "epoch": 111.42985074626866, "grad_norm": 18.12076187133789, "learning_rate": 9.890873015873018e-06, "loss": 42.1702, "step": 4680 }, { "epoch": 111.45373134328358, "grad_norm": 21.935218811035156, "learning_rate": 9.88888888888889e-06, "loss": 40.5205, "step": 4681 }, { "epoch": 111.4776119402985, "grad_norm": 19.747133255004883, "learning_rate": 9.886904761904763e-06, "loss": 41.7721, "step": 4682 }, { "epoch": 111.50149253731344, "grad_norm": 17.159732818603516, "learning_rate": 9.884920634920636e-06, "loss": 41.285, "step": 4683 }, { "epoch": 111.52537313432836, "grad_norm": 15.736952781677246, "learning_rate": 9.882936507936509e-06, "loss": 40.8216, "step": 4684 }, { "epoch": 111.54925373134328, "grad_norm": 17.591854095458984, "learning_rate": 9.880952380952381e-06, "loss": 40.0516, "step": 4685 }, { "epoch": 111.57313432835821, "grad_norm": 17.530582427978516, "learning_rate": 9.878968253968256e-06, "loss": 41.4235, "step": 4686 }, { "epoch": 111.59701492537313, "grad_norm": 18.394372940063477, "learning_rate": 9.876984126984128e-06, "loss": 41.7204, "step": 4687 }, { "epoch": 111.62089552238805, "grad_norm": 17.80558967590332, "learning_rate": 9.875000000000001e-06, "loss": 41.6861, "step": 4688 }, { "epoch": 111.64477611940299, "grad_norm": 19.939964294433594, "learning_rate": 9.873015873015874e-06, "loss": 41.3087, "step": 4689 }, { "epoch": 111.66865671641791, "grad_norm": 14.58205509185791, "learning_rate": 9.871031746031747e-06, "loss": 41.7955, "step": 4690 }, { "epoch": 111.69253731343284, "grad_norm": 19.98933982849121, "learning_rate": 9.869047619047621e-06, "loss": 42.3174, "step": 4691 }, { "epoch": 111.71641791044776, "grad_norm": 20.377466201782227, "learning_rate": 9.867063492063494e-06, "loss": 42.1654, "step": 4692 }, { "epoch": 111.74029850746268, "grad_norm": 19.26752471923828, "learning_rate": 9.865079365079366e-06, "loss": 41.0597, "step": 4693 }, { "epoch": 111.7641791044776, "grad_norm": 16.435440063476562, "learning_rate": 9.863095238095239e-06, "loss": 42.1122, "step": 4694 }, { "epoch": 111.78805970149254, "grad_norm": 17.955474853515625, "learning_rate": 9.861111111111112e-06, "loss": 41.0326, "step": 4695 }, { "epoch": 111.81194029850747, "grad_norm": 21.791505813598633, "learning_rate": 9.859126984126986e-06, "loss": 42.4256, "step": 4696 }, { "epoch": 111.83582089552239, "grad_norm": 17.081600189208984, "learning_rate": 9.857142857142859e-06, "loss": 41.7548, "step": 4697 }, { "epoch": 111.85970149253731, "grad_norm": 21.21491241455078, "learning_rate": 9.855158730158732e-06, "loss": 41.1434, "step": 4698 }, { "epoch": 111.88358208955223, "grad_norm": 25.082992553710938, "learning_rate": 9.853174603174604e-06, "loss": 41.2857, "step": 4699 }, { "epoch": 111.90746268656716, "grad_norm": 19.19919204711914, "learning_rate": 9.851190476190477e-06, "loss": 41.5529, "step": 4700 }, { "epoch": 111.9313432835821, "grad_norm": 32.29753494262695, "learning_rate": 9.849206349206351e-06, "loss": 42.4376, "step": 4701 }, { "epoch": 111.95522388059702, "grad_norm": 20.654430389404297, "learning_rate": 9.847222222222224e-06, "loss": 41.3052, "step": 4702 }, { "epoch": 111.97910447761194, "grad_norm": 32.98462677001953, "learning_rate": 9.845238095238097e-06, "loss": 41.1561, "step": 4703 }, { "epoch": 112.0, "grad_norm": 18.214174270629883, "learning_rate": 9.843253968253968e-06, "loss": 35.3902, "step": 4704 }, { "epoch": 112.02388059701492, "grad_norm": 25.639781951904297, "learning_rate": 9.841269841269842e-06, "loss": 40.7291, "step": 4705 }, { "epoch": 112.04776119402985, "grad_norm": 19.745450973510742, "learning_rate": 9.839285714285715e-06, "loss": 41.6564, "step": 4706 }, { "epoch": 112.07164179104478, "grad_norm": 24.907617568969727, "learning_rate": 9.837301587301588e-06, "loss": 41.4856, "step": 4707 }, { "epoch": 112.0955223880597, "grad_norm": 24.20347023010254, "learning_rate": 9.83531746031746e-06, "loss": 40.6423, "step": 4708 }, { "epoch": 112.11940298507463, "grad_norm": 16.246206283569336, "learning_rate": 9.833333333333333e-06, "loss": 40.5309, "step": 4709 }, { "epoch": 112.14328358208955, "grad_norm": 28.89447784423828, "learning_rate": 9.831349206349207e-06, "loss": 41.173, "step": 4710 }, { "epoch": 112.16716417910447, "grad_norm": 18.989233016967773, "learning_rate": 9.82936507936508e-06, "loss": 42.2629, "step": 4711 }, { "epoch": 112.1910447761194, "grad_norm": 22.261035919189453, "learning_rate": 9.827380952380953e-06, "loss": 41.9901, "step": 4712 }, { "epoch": 112.21492537313434, "grad_norm": 21.082855224609375, "learning_rate": 9.825396825396825e-06, "loss": 40.9817, "step": 4713 }, { "epoch": 112.23880597014926, "grad_norm": 15.739337921142578, "learning_rate": 9.823412698412698e-06, "loss": 42.0745, "step": 4714 }, { "epoch": 112.26268656716418, "grad_norm": 25.604066848754883, "learning_rate": 9.821428571428573e-06, "loss": 40.9371, "step": 4715 }, { "epoch": 112.2865671641791, "grad_norm": 17.916481018066406, "learning_rate": 9.819444444444445e-06, "loss": 40.9361, "step": 4716 }, { "epoch": 112.31044776119403, "grad_norm": 21.53338050842285, "learning_rate": 9.817460317460318e-06, "loss": 40.2245, "step": 4717 }, { "epoch": 112.33432835820895, "grad_norm": 21.370702743530273, "learning_rate": 9.81547619047619e-06, "loss": 40.7986, "step": 4718 }, { "epoch": 112.35820895522389, "grad_norm": 18.217588424682617, "learning_rate": 9.813492063492063e-06, "loss": 41.5072, "step": 4719 }, { "epoch": 112.38208955223881, "grad_norm": 18.874122619628906, "learning_rate": 9.811507936507938e-06, "loss": 39.7088, "step": 4720 }, { "epoch": 112.40597014925373, "grad_norm": 17.31776237487793, "learning_rate": 9.80952380952381e-06, "loss": 41.6839, "step": 4721 }, { "epoch": 112.42985074626866, "grad_norm": 23.88166046142578, "learning_rate": 9.807539682539683e-06, "loss": 41.7857, "step": 4722 }, { "epoch": 112.45373134328358, "grad_norm": 17.09743881225586, "learning_rate": 9.805555555555556e-06, "loss": 42.2407, "step": 4723 }, { "epoch": 112.4776119402985, "grad_norm": 20.519947052001953, "learning_rate": 9.803571428571428e-06, "loss": 41.8095, "step": 4724 }, { "epoch": 112.50149253731344, "grad_norm": 23.761943817138672, "learning_rate": 9.801587301587301e-06, "loss": 41.371, "step": 4725 }, { "epoch": 112.52537313432836, "grad_norm": 17.033470153808594, "learning_rate": 9.799603174603176e-06, "loss": 41.5687, "step": 4726 }, { "epoch": 112.54925373134328, "grad_norm": 18.175559997558594, "learning_rate": 9.797619047619048e-06, "loss": 42.2144, "step": 4727 }, { "epoch": 112.57313432835821, "grad_norm": 19.10957145690918, "learning_rate": 9.795634920634921e-06, "loss": 40.2305, "step": 4728 }, { "epoch": 112.59701492537313, "grad_norm": 20.52096176147461, "learning_rate": 9.793650793650794e-06, "loss": 42.5612, "step": 4729 }, { "epoch": 112.62089552238805, "grad_norm": 17.42753791809082, "learning_rate": 9.791666666666666e-06, "loss": 43.286, "step": 4730 }, { "epoch": 112.64477611940299, "grad_norm": 25.452363967895508, "learning_rate": 9.78968253968254e-06, "loss": 41.0071, "step": 4731 }, { "epoch": 112.66865671641791, "grad_norm": 21.480247497558594, "learning_rate": 9.787698412698413e-06, "loss": 41.7063, "step": 4732 }, { "epoch": 112.69253731343284, "grad_norm": 18.553220748901367, "learning_rate": 9.785714285714286e-06, "loss": 41.4099, "step": 4733 }, { "epoch": 112.71641791044776, "grad_norm": 25.513225555419922, "learning_rate": 9.783730158730159e-06, "loss": 41.5696, "step": 4734 }, { "epoch": 112.74029850746268, "grad_norm": 16.76629638671875, "learning_rate": 9.781746031746032e-06, "loss": 41.6305, "step": 4735 }, { "epoch": 112.7641791044776, "grad_norm": 19.330625534057617, "learning_rate": 9.779761904761906e-06, "loss": 40.7885, "step": 4736 }, { "epoch": 112.78805970149254, "grad_norm": 24.649667739868164, "learning_rate": 9.777777777777779e-06, "loss": 41.5939, "step": 4737 }, { "epoch": 112.81194029850747, "grad_norm": 15.628157615661621, "learning_rate": 9.775793650793651e-06, "loss": 40.9676, "step": 4738 }, { "epoch": 112.83582089552239, "grad_norm": 18.18578338623047, "learning_rate": 9.773809523809524e-06, "loss": 40.0681, "step": 4739 }, { "epoch": 112.85970149253731, "grad_norm": 16.768980026245117, "learning_rate": 9.771825396825397e-06, "loss": 42.2564, "step": 4740 }, { "epoch": 112.88358208955223, "grad_norm": 18.52190399169922, "learning_rate": 9.769841269841271e-06, "loss": 42.4806, "step": 4741 }, { "epoch": 112.90746268656716, "grad_norm": 20.884937286376953, "learning_rate": 9.767857142857144e-06, "loss": 41.2333, "step": 4742 }, { "epoch": 112.9313432835821, "grad_norm": 20.760377883911133, "learning_rate": 9.765873015873017e-06, "loss": 41.3071, "step": 4743 }, { "epoch": 112.95522388059702, "grad_norm": 19.27536392211914, "learning_rate": 9.76388888888889e-06, "loss": 42.3135, "step": 4744 }, { "epoch": 112.97910447761194, "grad_norm": 16.836727142333984, "learning_rate": 9.761904761904762e-06, "loss": 40.9553, "step": 4745 }, { "epoch": 113.0, "grad_norm": 15.910188674926758, "learning_rate": 9.759920634920635e-06, "loss": 35.1574, "step": 4746 }, { "epoch": 113.02388059701492, "grad_norm": 25.05491828918457, "learning_rate": 9.757936507936509e-06, "loss": 40.585, "step": 4747 }, { "epoch": 113.04776119402985, "grad_norm": NaN, "learning_rate": 9.755952380952382e-06, "loss": 62.2866, "step": 4748 }, { "epoch": 113.07164179104478, "grad_norm": 15.88016414642334, "learning_rate": 9.755952380952382e-06, "loss": 41.1309, "step": 4749 }, { "epoch": 113.0955223880597, "grad_norm": NaN, "learning_rate": 9.753968253968254e-06, "loss": 48.2293, "step": 4750 }, { "epoch": 113.11940298507463, "grad_norm": 24.244104385375977, "learning_rate": 9.753968253968254e-06, "loss": 42.1546, "step": 4751 }, { "epoch": 113.14328358208955, "grad_norm": 24.652694702148438, "learning_rate": 9.751984126984127e-06, "loss": 41.6784, "step": 4752 }, { "epoch": 113.16716417910447, "grad_norm": 17.30400276184082, "learning_rate": 9.75e-06, "loss": 41.3338, "step": 4753 }, { "epoch": 113.1910447761194, "grad_norm": 22.837020874023438, "learning_rate": 9.748015873015874e-06, "loss": 39.9112, "step": 4754 }, { "epoch": 113.21492537313434, "grad_norm": NaN, "learning_rate": 9.746031746031747e-06, "loss": 51.4889, "step": 4755 }, { "epoch": 113.23880597014926, "grad_norm": 19.977386474609375, "learning_rate": 9.746031746031747e-06, "loss": 40.8136, "step": 4756 }, { "epoch": 113.26268656716418, "grad_norm": 17.338441848754883, "learning_rate": 9.74404761904762e-06, "loss": 41.41, "step": 4757 }, { "epoch": 113.2865671641791, "grad_norm": 17.25606346130371, "learning_rate": 9.742063492063492e-06, "loss": 42.0376, "step": 4758 }, { "epoch": 113.31044776119403, "grad_norm": 18.690338134765625, "learning_rate": 9.740079365079365e-06, "loss": 39.8714, "step": 4759 }, { "epoch": 113.33432835820895, "grad_norm": 20.5388240814209, "learning_rate": 9.73809523809524e-06, "loss": 40.7767, "step": 4760 }, { "epoch": 113.35820895522389, "grad_norm": 20.36353302001953, "learning_rate": 9.736111111111112e-06, "loss": 42.7652, "step": 4761 }, { "epoch": 113.38208955223881, "grad_norm": 17.473264694213867, "learning_rate": 9.734126984126985e-06, "loss": 41.3501, "step": 4762 }, { "epoch": 113.40597014925373, "grad_norm": 16.665048599243164, "learning_rate": 9.732142857142858e-06, "loss": 40.8948, "step": 4763 }, { "epoch": 113.42985074626866, "grad_norm": 18.917985916137695, "learning_rate": 9.73015873015873e-06, "loss": 42.4226, "step": 4764 }, { "epoch": 113.45373134328358, "grad_norm": 15.448834419250488, "learning_rate": 9.728174603174605e-06, "loss": 41.5246, "step": 4765 }, { "epoch": 113.4776119402985, "grad_norm": 16.92607879638672, "learning_rate": 9.726190476190477e-06, "loss": 41.1058, "step": 4766 }, { "epoch": 113.50149253731344, "grad_norm": 16.17359161376953, "learning_rate": 9.72420634920635e-06, "loss": 41.4232, "step": 4767 }, { "epoch": 113.52537313432836, "grad_norm": 16.6822452545166, "learning_rate": 9.722222222222223e-06, "loss": 41.9703, "step": 4768 }, { "epoch": 113.54925373134328, "grad_norm": 16.724811553955078, "learning_rate": 9.720238095238095e-06, "loss": 41.4117, "step": 4769 }, { "epoch": 113.57313432835821, "grad_norm": 16.85785484313965, "learning_rate": 9.71825396825397e-06, "loss": 41.4467, "step": 4770 }, { "epoch": 113.59701492537313, "grad_norm": 19.173654556274414, "learning_rate": 9.716269841269842e-06, "loss": 40.871, "step": 4771 }, { "epoch": 113.62089552238805, "grad_norm": 16.131881713867188, "learning_rate": 9.714285714285715e-06, "loss": 42.595, "step": 4772 }, { "epoch": 113.64477611940299, "grad_norm": 15.41543960571289, "learning_rate": 9.712301587301588e-06, "loss": 41.7077, "step": 4773 }, { "epoch": 113.66865671641791, "grad_norm": 19.808330535888672, "learning_rate": 9.71031746031746e-06, "loss": 40.8761, "step": 4774 }, { "epoch": 113.69253731343284, "grad_norm": 16.406370162963867, "learning_rate": 9.708333333333333e-06, "loss": 41.1769, "step": 4775 }, { "epoch": 113.71641791044776, "grad_norm": 20.239530563354492, "learning_rate": 9.706349206349208e-06, "loss": 40.8274, "step": 4776 }, { "epoch": 113.74029850746268, "grad_norm": 18.771743774414062, "learning_rate": 9.70436507936508e-06, "loss": 41.4099, "step": 4777 }, { "epoch": 113.7641791044776, "grad_norm": 18.418540954589844, "learning_rate": 9.702380952380953e-06, "loss": 39.6443, "step": 4778 }, { "epoch": 113.78805970149254, "grad_norm": 21.50214958190918, "learning_rate": 9.700396825396826e-06, "loss": 41.6937, "step": 4779 }, { "epoch": 113.81194029850747, "grad_norm": 22.449935913085938, "learning_rate": 9.698412698412698e-06, "loss": 41.7069, "step": 4780 }, { "epoch": 113.83582089552239, "grad_norm": 15.33384895324707, "learning_rate": 9.696428571428573e-06, "loss": 40.6666, "step": 4781 }, { "epoch": 113.85970149253731, "grad_norm": 21.013437271118164, "learning_rate": 9.694444444444446e-06, "loss": 40.5768, "step": 4782 }, { "epoch": 113.88358208955223, "grad_norm": 19.128190994262695, "learning_rate": 9.692460317460318e-06, "loss": 41.4668, "step": 4783 }, { "epoch": 113.90746268656716, "grad_norm": 23.851394653320312, "learning_rate": 9.690476190476191e-06, "loss": 41.1051, "step": 4784 }, { "epoch": 113.9313432835821, "grad_norm": 21.990671157836914, "learning_rate": 9.688492063492064e-06, "loss": 41.6264, "step": 4785 }, { "epoch": 113.95522388059702, "grad_norm": 16.185327529907227, "learning_rate": 9.686507936507938e-06, "loss": 41.8408, "step": 4786 }, { "epoch": 113.97910447761194, "grad_norm": 30.063560485839844, "learning_rate": 9.68452380952381e-06, "loss": 41.2658, "step": 4787 }, { "epoch": 114.0, "grad_norm": 19.5380916595459, "learning_rate": 9.682539682539683e-06, "loss": 36.7106, "step": 4788 }, { "epoch": 114.02388059701492, "grad_norm": 26.1965389251709, "learning_rate": 9.680555555555556e-06, "loss": 42.3092, "step": 4789 }, { "epoch": 114.04776119402985, "grad_norm": 19.98543930053711, "learning_rate": 9.678571428571429e-06, "loss": 41.2309, "step": 4790 }, { "epoch": 114.07164179104478, "grad_norm": 26.361085891723633, "learning_rate": 9.676587301587303e-06, "loss": 41.9058, "step": 4791 }, { "epoch": 114.0955223880597, "grad_norm": 23.132400512695312, "learning_rate": 9.674603174603176e-06, "loss": 43.0372, "step": 4792 }, { "epoch": 114.11940298507463, "grad_norm": 25.199525833129883, "learning_rate": 9.672619047619049e-06, "loss": 41.5403, "step": 4793 }, { "epoch": 114.14328358208955, "grad_norm": 23.17612075805664, "learning_rate": 9.670634920634921e-06, "loss": 41.0863, "step": 4794 }, { "epoch": 114.16716417910447, "grad_norm": 23.930667877197266, "learning_rate": 9.668650793650794e-06, "loss": 40.8035, "step": 4795 }, { "epoch": 114.1910447761194, "grad_norm": 23.487939834594727, "learning_rate": 9.666666666666667e-06, "loss": 39.6217, "step": 4796 }, { "epoch": 114.21492537313434, "grad_norm": 23.342439651489258, "learning_rate": 9.664682539682541e-06, "loss": 42.0502, "step": 4797 }, { "epoch": 114.23880597014926, "grad_norm": 25.328317642211914, "learning_rate": 9.662698412698414e-06, "loss": 40.3101, "step": 4798 }, { "epoch": 114.26268656716418, "grad_norm": 18.363313674926758, "learning_rate": 9.660714285714287e-06, "loss": 40.5746, "step": 4799 }, { "epoch": 114.2865671641791, "grad_norm": 24.081649780273438, "learning_rate": 9.65873015873016e-06, "loss": 42.0376, "step": 4800 }, { "epoch": 114.31044776119403, "grad_norm": 20.24997329711914, "learning_rate": 9.656746031746032e-06, "loss": 40.5347, "step": 4801 }, { "epoch": 114.33432835820895, "grad_norm": 14.942011833190918, "learning_rate": 9.654761904761906e-06, "loss": 41.7814, "step": 4802 }, { "epoch": 114.35820895522389, "grad_norm": 22.662822723388672, "learning_rate": 9.652777777777779e-06, "loss": 41.767, "step": 4803 }, { "epoch": 114.38208955223881, "grad_norm": 19.27354621887207, "learning_rate": 9.650793650793652e-06, "loss": 40.7947, "step": 4804 }, { "epoch": 114.40597014925373, "grad_norm": 14.431193351745605, "learning_rate": 9.648809523809524e-06, "loss": 42.3785, "step": 4805 }, { "epoch": 114.42985074626866, "grad_norm": 15.706212043762207, "learning_rate": 9.646825396825397e-06, "loss": 42.0003, "step": 4806 }, { "epoch": 114.45373134328358, "grad_norm": 17.65169906616211, "learning_rate": 9.644841269841271e-06, "loss": 41.968, "step": 4807 }, { "epoch": 114.4776119402985, "grad_norm": 16.792739868164062, "learning_rate": 9.642857142857144e-06, "loss": 41.1987, "step": 4808 }, { "epoch": 114.50149253731344, "grad_norm": 20.06905746459961, "learning_rate": 9.640873015873017e-06, "loss": 41.0098, "step": 4809 }, { "epoch": 114.52537313432836, "grad_norm": 24.13865852355957, "learning_rate": 9.63888888888889e-06, "loss": 41.5633, "step": 4810 }, { "epoch": 114.54925373134328, "grad_norm": 16.85896873474121, "learning_rate": 9.636904761904762e-06, "loss": 41.7772, "step": 4811 }, { "epoch": 114.57313432835821, "grad_norm": 15.44628620147705, "learning_rate": 9.634920634920637e-06, "loss": 40.0732, "step": 4812 }, { "epoch": 114.59701492537313, "grad_norm": 18.970260620117188, "learning_rate": 9.63293650793651e-06, "loss": 42.318, "step": 4813 }, { "epoch": 114.62089552238805, "grad_norm": 16.574501037597656, "learning_rate": 9.630952380952382e-06, "loss": 40.0387, "step": 4814 }, { "epoch": 114.64477611940299, "grad_norm": 18.372955322265625, "learning_rate": 9.628968253968255e-06, "loss": 41.5759, "step": 4815 }, { "epoch": 114.66865671641791, "grad_norm": 21.253253936767578, "learning_rate": 9.626984126984127e-06, "loss": 40.2675, "step": 4816 }, { "epoch": 114.69253731343284, "grad_norm": 19.223817825317383, "learning_rate": 9.625e-06, "loss": 41.1779, "step": 4817 }, { "epoch": 114.71641791044776, "grad_norm": 17.391407012939453, "learning_rate": 9.623015873015875e-06, "loss": 40.9899, "step": 4818 }, { "epoch": 114.74029850746268, "grad_norm": 21.367889404296875, "learning_rate": 9.621031746031747e-06, "loss": 40.1854, "step": 4819 }, { "epoch": 114.7641791044776, "grad_norm": 21.202396392822266, "learning_rate": 9.61904761904762e-06, "loss": 41.5819, "step": 4820 }, { "epoch": 114.78805970149254, "grad_norm": 14.345793724060059, "learning_rate": 9.617063492063493e-06, "loss": 41.7843, "step": 4821 }, { "epoch": 114.81194029850747, "grad_norm": 16.483112335205078, "learning_rate": 9.615079365079365e-06, "loss": 40.9715, "step": 4822 }, { "epoch": 114.83582089552239, "grad_norm": 16.397315979003906, "learning_rate": 9.61309523809524e-06, "loss": 40.8702, "step": 4823 }, { "epoch": 114.85970149253731, "grad_norm": 14.784750938415527, "learning_rate": 9.611111111111112e-06, "loss": 40.5076, "step": 4824 }, { "epoch": 114.88358208955223, "grad_norm": 21.29036521911621, "learning_rate": 9.609126984126985e-06, "loss": 41.0657, "step": 4825 }, { "epoch": 114.90746268656716, "grad_norm": 19.237743377685547, "learning_rate": 9.607142857142858e-06, "loss": 40.7839, "step": 4826 }, { "epoch": 114.9313432835821, "grad_norm": 17.527833938598633, "learning_rate": 9.60515873015873e-06, "loss": 41.3853, "step": 4827 }, { "epoch": 114.95522388059702, "grad_norm": 16.477439880371094, "learning_rate": 9.603174603174605e-06, "loss": 41.3862, "step": 4828 }, { "epoch": 114.97910447761194, "grad_norm": 16.46197509765625, "learning_rate": 9.601190476190478e-06, "loss": 41.9143, "step": 4829 }, { "epoch": 115.0, "grad_norm": 18.8862361907959, "learning_rate": 9.59920634920635e-06, "loss": 36.444, "step": 4830 }, { "epoch": 115.02388059701492, "grad_norm": 22.985044479370117, "learning_rate": 9.597222222222223e-06, "loss": 41.3098, "step": 4831 }, { "epoch": 115.04776119402985, "grad_norm": 17.263700485229492, "learning_rate": 9.595238095238096e-06, "loss": 41.2013, "step": 4832 }, { "epoch": 115.07164179104478, "grad_norm": 21.497802734375, "learning_rate": 9.59325396825397e-06, "loss": 40.4798, "step": 4833 }, { "epoch": 115.0955223880597, "grad_norm": 20.014450073242188, "learning_rate": 9.591269841269843e-06, "loss": 41.2098, "step": 4834 }, { "epoch": 115.11940298507463, "grad_norm": 18.972618103027344, "learning_rate": 9.589285714285716e-06, "loss": 41.7606, "step": 4835 }, { "epoch": 115.14328358208955, "grad_norm": 14.9144287109375, "learning_rate": 9.587301587301588e-06, "loss": 40.7529, "step": 4836 }, { "epoch": 115.16716417910447, "grad_norm": 24.37519073486328, "learning_rate": 9.585317460317461e-06, "loss": 41.7598, "step": 4837 }, { "epoch": 115.1910447761194, "grad_norm": 23.033283233642578, "learning_rate": 9.583333333333335e-06, "loss": 41.4316, "step": 4838 }, { "epoch": 115.21492537313434, "grad_norm": 20.98251724243164, "learning_rate": 9.581349206349208e-06, "loss": 40.3066, "step": 4839 }, { "epoch": 115.23880597014926, "grad_norm": 21.950714111328125, "learning_rate": 9.57936507936508e-06, "loss": 40.1732, "step": 4840 }, { "epoch": 115.26268656716418, "grad_norm": 22.479713439941406, "learning_rate": 9.577380952380953e-06, "loss": 41.586, "step": 4841 }, { "epoch": 115.2865671641791, "grad_norm": 16.739639282226562, "learning_rate": 9.575396825396826e-06, "loss": 42.143, "step": 4842 }, { "epoch": 115.31044776119403, "grad_norm": 23.182594299316406, "learning_rate": 9.573412698412699e-06, "loss": 42.4852, "step": 4843 }, { "epoch": 115.33432835820895, "grad_norm": 23.18885040283203, "learning_rate": 9.571428571428573e-06, "loss": 40.3618, "step": 4844 }, { "epoch": 115.35820895522389, "grad_norm": 15.238030433654785, "learning_rate": 9.569444444444446e-06, "loss": 41.3859, "step": 4845 }, { "epoch": 115.38208955223881, "grad_norm": 28.07355308532715, "learning_rate": 9.567460317460319e-06, "loss": 41.1147, "step": 4846 }, { "epoch": 115.40597014925373, "grad_norm": 21.76200294494629, "learning_rate": 9.565476190476191e-06, "loss": 41.6603, "step": 4847 }, { "epoch": 115.42985074626866, "grad_norm": 32.459312438964844, "learning_rate": 9.563492063492064e-06, "loss": 40.7283, "step": 4848 }, { "epoch": 115.45373134328358, "grad_norm": 22.368288040161133, "learning_rate": 9.561507936507938e-06, "loss": 40.4951, "step": 4849 }, { "epoch": 115.4776119402985, "grad_norm": 22.91469955444336, "learning_rate": 9.559523809523811e-06, "loss": 41.117, "step": 4850 }, { "epoch": 115.50149253731344, "grad_norm": 20.357376098632812, "learning_rate": 9.557539682539684e-06, "loss": 41.753, "step": 4851 }, { "epoch": 115.52537313432836, "grad_norm": 21.377849578857422, "learning_rate": 9.555555555555556e-06, "loss": 41.8999, "step": 4852 }, { "epoch": 115.54925373134328, "grad_norm": 33.38006591796875, "learning_rate": 9.55357142857143e-06, "loss": 41.1317, "step": 4853 }, { "epoch": 115.57313432835821, "grad_norm": 21.435209274291992, "learning_rate": 9.551587301587304e-06, "loss": 40.1686, "step": 4854 }, { "epoch": 115.59701492537313, "grad_norm": 31.958423614501953, "learning_rate": 9.549603174603176e-06, "loss": 42.572, "step": 4855 }, { "epoch": 115.62089552238805, "grad_norm": 21.460599899291992, "learning_rate": 9.547619047619049e-06, "loss": 40.5071, "step": 4856 }, { "epoch": 115.64477611940299, "grad_norm": 33.65336227416992, "learning_rate": 9.545634920634922e-06, "loss": 41.7753, "step": 4857 }, { "epoch": 115.66865671641791, "grad_norm": 23.594022750854492, "learning_rate": 9.543650793650794e-06, "loss": 41.4436, "step": 4858 }, { "epoch": 115.69253731343284, "grad_norm": 23.563594818115234, "learning_rate": 9.541666666666669e-06, "loss": 39.9414, "step": 4859 }, { "epoch": 115.71641791044776, "grad_norm": 24.98297882080078, "learning_rate": 9.539682539682541e-06, "loss": 40.8619, "step": 4860 }, { "epoch": 115.74029850746268, "grad_norm": 22.393163681030273, "learning_rate": 9.537698412698414e-06, "loss": 42.8338, "step": 4861 }, { "epoch": 115.7641791044776, "grad_norm": 30.07286834716797, "learning_rate": 9.535714285714287e-06, "loss": 41.2226, "step": 4862 }, { "epoch": 115.78805970149254, "grad_norm": 22.388198852539062, "learning_rate": 9.53373015873016e-06, "loss": 41.1935, "step": 4863 }, { "epoch": 115.81194029850747, "grad_norm": 33.4913215637207, "learning_rate": 9.531746031746032e-06, "loss": 42.5784, "step": 4864 }, { "epoch": 115.83582089552239, "grad_norm": 25.117082595825195, "learning_rate": 9.529761904761905e-06, "loss": 39.364, "step": 4865 }, { "epoch": 115.85970149253731, "grad_norm": 37.31660079956055, "learning_rate": 9.527777777777778e-06, "loss": 41.5319, "step": 4866 }, { "epoch": 115.88358208955223, "grad_norm": 28.936159133911133, "learning_rate": 9.52579365079365e-06, "loss": 41.757, "step": 4867 }, { "epoch": 115.90746268656716, "grad_norm": 34.599647521972656, "learning_rate": 9.523809523809525e-06, "loss": 41.6518, "step": 4868 }, { "epoch": 115.9313432835821, "grad_norm": 27.539873123168945, "learning_rate": 9.521825396825397e-06, "loss": 40.9794, "step": 4869 }, { "epoch": 115.95522388059702, "grad_norm": 37.74484634399414, "learning_rate": 9.51984126984127e-06, "loss": 40.8585, "step": 4870 }, { "epoch": 115.97910447761194, "grad_norm": 32.444847106933594, "learning_rate": 9.517857142857143e-06, "loss": 41.7152, "step": 4871 }, { "epoch": 116.0, "grad_norm": 32.239253997802734, "learning_rate": 9.515873015873016e-06, "loss": 35.2825, "step": 4872 }, { "epoch": 116.02388059701492, "grad_norm": 35.12287521362305, "learning_rate": 9.51388888888889e-06, "loss": 41.7451, "step": 4873 }, { "epoch": 116.04776119402985, "grad_norm": 28.03133773803711, "learning_rate": 9.511904761904763e-06, "loss": 40.8461, "step": 4874 }, { "epoch": 116.07164179104478, "grad_norm": 25.59912872314453, "learning_rate": 9.509920634920635e-06, "loss": 41.5307, "step": 4875 }, { "epoch": 116.0955223880597, "grad_norm": 31.361936569213867, "learning_rate": 9.507936507936508e-06, "loss": 41.9054, "step": 4876 }, { "epoch": 116.11940298507463, "grad_norm": 21.869449615478516, "learning_rate": 9.50595238095238e-06, "loss": 40.38, "step": 4877 }, { "epoch": 116.14328358208955, "grad_norm": 38.86557388305664, "learning_rate": 9.503968253968255e-06, "loss": 42.0518, "step": 4878 }, { "epoch": 116.16716417910447, "grad_norm": 31.712495803833008, "learning_rate": 9.501984126984128e-06, "loss": 40.2141, "step": 4879 }, { "epoch": 116.1910447761194, "grad_norm": 34.77455520629883, "learning_rate": 9.5e-06, "loss": 41.5116, "step": 4880 }, { "epoch": 116.21492537313434, "grad_norm": 28.530269622802734, "learning_rate": 9.498015873015873e-06, "loss": 40.6907, "step": 4881 }, { "epoch": 116.23880597014926, "grad_norm": 28.550081253051758, "learning_rate": 9.496031746031746e-06, "loss": 41.0168, "step": 4882 }, { "epoch": 116.26268656716418, "grad_norm": 28.081035614013672, "learning_rate": 9.494047619047619e-06, "loss": 42.3482, "step": 4883 }, { "epoch": 116.2865671641791, "grad_norm": 39.402713775634766, "learning_rate": 9.492063492063493e-06, "loss": 41.3423, "step": 4884 }, { "epoch": 116.31044776119403, "grad_norm": 30.37664794921875, "learning_rate": 9.490079365079366e-06, "loss": 41.0571, "step": 4885 }, { "epoch": 116.33432835820895, "grad_norm": 33.314979553222656, "learning_rate": 9.488095238095238e-06, "loss": 41.7844, "step": 4886 }, { "epoch": 116.35820895522389, "grad_norm": 31.91356658935547, "learning_rate": 9.486111111111111e-06, "loss": 42.6115, "step": 4887 }, { "epoch": 116.38208955223881, "grad_norm": 33.23076629638672, "learning_rate": 9.484126984126984e-06, "loss": 42.9912, "step": 4888 }, { "epoch": 116.40597014925373, "grad_norm": 33.23727798461914, "learning_rate": 9.482142857142858e-06, "loss": 40.2839, "step": 4889 }, { "epoch": 116.42985074626866, "grad_norm": 34.349090576171875, "learning_rate": 9.480158730158731e-06, "loss": 41.3853, "step": 4890 }, { "epoch": 116.45373134328358, "grad_norm": 28.603391647338867, "learning_rate": 9.478174603174604e-06, "loss": 41.8607, "step": 4891 }, { "epoch": 116.4776119402985, "grad_norm": 30.6513671875, "learning_rate": 9.476190476190476e-06, "loss": 40.6123, "step": 4892 }, { "epoch": 116.50149253731344, "grad_norm": 26.542037963867188, "learning_rate": 9.474206349206349e-06, "loss": 40.7056, "step": 4893 }, { "epoch": 116.52537313432836, "grad_norm": 33.709774017333984, "learning_rate": 9.472222222222223e-06, "loss": 41.8717, "step": 4894 }, { "epoch": 116.54925373134328, "grad_norm": 29.847158432006836, "learning_rate": 9.470238095238096e-06, "loss": 39.7896, "step": 4895 }, { "epoch": 116.57313432835821, "grad_norm": 29.366252899169922, "learning_rate": 9.468253968253969e-06, "loss": 40.6317, "step": 4896 }, { "epoch": 116.59701492537313, "grad_norm": 27.17310905456543, "learning_rate": 9.466269841269841e-06, "loss": 41.57, "step": 4897 }, { "epoch": 116.62089552238805, "grad_norm": 29.52984619140625, "learning_rate": 9.464285714285714e-06, "loss": 41.313, "step": 4898 }, { "epoch": 116.64477611940299, "grad_norm": 25.72901725769043, "learning_rate": 9.462301587301589e-06, "loss": 39.4479, "step": 4899 }, { "epoch": 116.66865671641791, "grad_norm": 36.030372619628906, "learning_rate": 9.460317460317461e-06, "loss": 41.6829, "step": 4900 }, { "epoch": 116.69253731343284, "grad_norm": 30.29513168334961, "learning_rate": 9.458333333333334e-06, "loss": 41.8183, "step": 4901 }, { "epoch": 116.71641791044776, "grad_norm": 28.564956665039062, "learning_rate": 9.456349206349207e-06, "loss": 41.1474, "step": 4902 }, { "epoch": 116.74029850746268, "grad_norm": 24.22428321838379, "learning_rate": 9.45436507936508e-06, "loss": 41.2769, "step": 4903 }, { "epoch": 116.7641791044776, "grad_norm": 27.916051864624023, "learning_rate": 9.452380952380952e-06, "loss": 40.8082, "step": 4904 }, { "epoch": 116.78805970149254, "grad_norm": 20.302335739135742, "learning_rate": 9.450396825396826e-06, "loss": 41.0273, "step": 4905 }, { "epoch": 116.81194029850747, "grad_norm": 32.881134033203125, "learning_rate": 9.4484126984127e-06, "loss": 41.9168, "step": 4906 }, { "epoch": 116.83582089552239, "grad_norm": 26.058923721313477, "learning_rate": 9.446428571428572e-06, "loss": 41.0683, "step": 4907 }, { "epoch": 116.85970149253731, "grad_norm": 34.14630889892578, "learning_rate": 9.444444444444445e-06, "loss": 40.9509, "step": 4908 }, { "epoch": 116.88358208955223, "grad_norm": 31.35688018798828, "learning_rate": 9.442460317460317e-06, "loss": 40.551, "step": 4909 }, { "epoch": 116.90746268656716, "grad_norm": 24.473339080810547, "learning_rate": 9.440476190476192e-06, "loss": 39.3649, "step": 4910 }, { "epoch": 116.9313432835821, "grad_norm": 21.814205169677734, "learning_rate": 9.438492063492064e-06, "loss": 40.4577, "step": 4911 }, { "epoch": 116.95522388059702, "grad_norm": 29.724409103393555, "learning_rate": 9.436507936507937e-06, "loss": 40.6152, "step": 4912 }, { "epoch": 116.97910447761194, "grad_norm": 24.086170196533203, "learning_rate": 9.43452380952381e-06, "loss": 41.106, "step": 4913 }, { "epoch": 117.0, "grad_norm": 28.476037979125977, "learning_rate": 9.432539682539682e-06, "loss": 36.447, "step": 4914 }, { "epoch": 117.02388059701492, "grad_norm": 27.55150032043457, "learning_rate": 9.430555555555557e-06, "loss": 41.9582, "step": 4915 }, { "epoch": 117.04776119402985, "grad_norm": 28.565845489501953, "learning_rate": 9.42857142857143e-06, "loss": 40.9572, "step": 4916 }, { "epoch": 117.07164179104478, "grad_norm": 24.59885025024414, "learning_rate": 9.426587301587302e-06, "loss": 41.2797, "step": 4917 }, { "epoch": 117.0955223880597, "grad_norm": 21.83265495300293, "learning_rate": 9.424603174603175e-06, "loss": 41.1726, "step": 4918 }, { "epoch": 117.11940298507463, "grad_norm": 21.117053985595703, "learning_rate": 9.422619047619048e-06, "loss": 42.4423, "step": 4919 }, { "epoch": 117.14328358208955, "grad_norm": 26.478992462158203, "learning_rate": 9.420634920634922e-06, "loss": 40.2709, "step": 4920 }, { "epoch": 117.16716417910447, "grad_norm": 20.61237335205078, "learning_rate": 9.418650793650795e-06, "loss": 40.8788, "step": 4921 }, { "epoch": 117.1910447761194, "grad_norm": 32.1706657409668, "learning_rate": 9.416666666666667e-06, "loss": 41.6381, "step": 4922 }, { "epoch": 117.21492537313434, "grad_norm": 26.040164947509766, "learning_rate": 9.41468253968254e-06, "loss": 40.662, "step": 4923 }, { "epoch": 117.23880597014926, "grad_norm": 27.465307235717773, "learning_rate": 9.412698412698413e-06, "loss": 39.2348, "step": 4924 }, { "epoch": 117.26268656716418, "grad_norm": 28.407739639282227, "learning_rate": 9.410714285714286e-06, "loss": 40.981, "step": 4925 }, { "epoch": 117.2865671641791, "grad_norm": 26.080398559570312, "learning_rate": 9.40873015873016e-06, "loss": 39.726, "step": 4926 }, { "epoch": 117.31044776119403, "grad_norm": 23.23761749267578, "learning_rate": 9.406746031746033e-06, "loss": 41.9898, "step": 4927 }, { "epoch": 117.33432835820895, "grad_norm": 25.763086318969727, "learning_rate": 9.404761904761905e-06, "loss": 41.6503, "step": 4928 }, { "epoch": 117.35820895522389, "grad_norm": 25.27565574645996, "learning_rate": 9.402777777777778e-06, "loss": 41.5848, "step": 4929 }, { "epoch": 117.38208955223881, "grad_norm": 21.535991668701172, "learning_rate": 9.40079365079365e-06, "loss": 41.4816, "step": 4930 }, { "epoch": 117.40597014925373, "grad_norm": 20.212120056152344, "learning_rate": 9.398809523809525e-06, "loss": 40.8427, "step": 4931 }, { "epoch": 117.42985074626866, "grad_norm": 24.479822158813477, "learning_rate": 9.396825396825398e-06, "loss": 41.3141, "step": 4932 }, { "epoch": 117.45373134328358, "grad_norm": 14.332042694091797, "learning_rate": 9.39484126984127e-06, "loss": 41.4974, "step": 4933 }, { "epoch": 117.4776119402985, "grad_norm": 22.84208869934082, "learning_rate": 9.392857142857143e-06, "loss": 41.8713, "step": 4934 }, { "epoch": 117.50149253731344, "grad_norm": 18.916187286376953, "learning_rate": 9.390873015873016e-06, "loss": 41.2954, "step": 4935 }, { "epoch": 117.52537313432836, "grad_norm": 22.096107482910156, "learning_rate": 9.38888888888889e-06, "loss": 40.7045, "step": 4936 }, { "epoch": 117.54925373134328, "grad_norm": 20.42098045349121, "learning_rate": 9.386904761904763e-06, "loss": 42.4039, "step": 4937 }, { "epoch": 117.57313432835821, "grad_norm": 19.17930793762207, "learning_rate": 9.384920634920636e-06, "loss": 41.2849, "step": 4938 }, { "epoch": 117.59701492537313, "grad_norm": 18.003908157348633, "learning_rate": 9.382936507936508e-06, "loss": 41.2694, "step": 4939 }, { "epoch": 117.62089552238805, "grad_norm": 21.67378044128418, "learning_rate": 9.380952380952381e-06, "loss": 41.4086, "step": 4940 }, { "epoch": 117.64477611940299, "grad_norm": 14.220067024230957, "learning_rate": 9.378968253968255e-06, "loss": 40.5293, "step": 4941 }, { "epoch": 117.66865671641791, "grad_norm": 17.12972640991211, "learning_rate": 9.376984126984128e-06, "loss": 40.7469, "step": 4942 }, { "epoch": 117.69253731343284, "grad_norm": 21.055694580078125, "learning_rate": 9.375000000000001e-06, "loss": 39.6643, "step": 4943 }, { "epoch": 117.71641791044776, "grad_norm": 17.032026290893555, "learning_rate": 9.373015873015874e-06, "loss": 39.6835, "step": 4944 }, { "epoch": 117.74029850746268, "grad_norm": 22.909225463867188, "learning_rate": 9.371031746031746e-06, "loss": 41.411, "step": 4945 }, { "epoch": 117.7641791044776, "grad_norm": 15.6399564743042, "learning_rate": 9.36904761904762e-06, "loss": 41.0838, "step": 4946 }, { "epoch": 117.78805970149254, "grad_norm": 22.99868392944336, "learning_rate": 9.367063492063493e-06, "loss": 41.1988, "step": 4947 }, { "epoch": 117.81194029850747, "grad_norm": 19.78955841064453, "learning_rate": 9.365079365079366e-06, "loss": 41.5181, "step": 4948 }, { "epoch": 117.83582089552239, "grad_norm": 21.281328201293945, "learning_rate": 9.363095238095239e-06, "loss": 40.5115, "step": 4949 }, { "epoch": 117.85970149253731, "grad_norm": 19.100648880004883, "learning_rate": 9.361111111111111e-06, "loss": 40.3604, "step": 4950 }, { "epoch": 117.88358208955223, "grad_norm": 24.486183166503906, "learning_rate": 9.359126984126984e-06, "loss": 42.065, "step": 4951 }, { "epoch": 117.90746268656716, "grad_norm": 20.265453338623047, "learning_rate": 9.357142857142859e-06, "loss": 42.1137, "step": 4952 }, { "epoch": 117.9313432835821, "grad_norm": 21.281848907470703, "learning_rate": 9.355158730158731e-06, "loss": 42.0899, "step": 4953 }, { "epoch": 117.95522388059702, "grad_norm": 21.65452766418457, "learning_rate": 9.353174603174604e-06, "loss": 41.4076, "step": 4954 }, { "epoch": 117.97910447761194, "grad_norm": 19.85662841796875, "learning_rate": 9.351190476190477e-06, "loss": 40.9143, "step": 4955 }, { "epoch": 118.0, "grad_norm": 16.60548210144043, "learning_rate": 9.34920634920635e-06, "loss": 35.2268, "step": 4956 }, { "epoch": 118.02388059701492, "grad_norm": 19.02985382080078, "learning_rate": 9.347222222222224e-06, "loss": 41.6227, "step": 4957 }, { "epoch": 118.04776119402985, "grad_norm": 20.057069778442383, "learning_rate": 9.345238095238096e-06, "loss": 39.6729, "step": 4958 }, { "epoch": 118.07164179104478, "grad_norm": 16.330196380615234, "learning_rate": 9.343253968253969e-06, "loss": 41.2542, "step": 4959 }, { "epoch": 118.0955223880597, "grad_norm": 18.172393798828125, "learning_rate": 9.341269841269842e-06, "loss": 40.0607, "step": 4960 }, { "epoch": 118.11940298507463, "grad_norm": 20.96540069580078, "learning_rate": 9.339285714285715e-06, "loss": 40.585, "step": 4961 }, { "epoch": 118.14328358208955, "grad_norm": 14.967394828796387, "learning_rate": 9.337301587301589e-06, "loss": 40.2613, "step": 4962 }, { "epoch": 118.16716417910447, "grad_norm": 18.953601837158203, "learning_rate": 9.335317460317462e-06, "loss": 39.38, "step": 4963 }, { "epoch": 118.1910447761194, "grad_norm": 15.904739379882812, "learning_rate": 9.333333333333334e-06, "loss": 41.6314, "step": 4964 }, { "epoch": 118.21492537313434, "grad_norm": 24.293170928955078, "learning_rate": 9.331349206349207e-06, "loss": 40.5077, "step": 4965 }, { "epoch": 118.23880597014926, "grad_norm": 20.04494857788086, "learning_rate": 9.32936507936508e-06, "loss": 40.8951, "step": 4966 }, { "epoch": 118.26268656716418, "grad_norm": 23.613727569580078, "learning_rate": 9.327380952380954e-06, "loss": 42.0233, "step": 4967 }, { "epoch": 118.2865671641791, "grad_norm": 23.967741012573242, "learning_rate": 9.325396825396827e-06, "loss": 41.0547, "step": 4968 }, { "epoch": 118.31044776119403, "grad_norm": 19.54030418395996, "learning_rate": 9.3234126984127e-06, "loss": 41.2887, "step": 4969 }, { "epoch": 118.33432835820895, "grad_norm": 23.12442398071289, "learning_rate": 9.321428571428572e-06, "loss": 40.5083, "step": 4970 }, { "epoch": 118.35820895522389, "grad_norm": 21.34069061279297, "learning_rate": 9.319444444444445e-06, "loss": 41.3474, "step": 4971 }, { "epoch": 118.38208955223881, "grad_norm": 20.411256790161133, "learning_rate": 9.317460317460318e-06, "loss": 40.3927, "step": 4972 }, { "epoch": 118.40597014925373, "grad_norm": 21.702983856201172, "learning_rate": 9.315476190476192e-06, "loss": 41.2522, "step": 4973 }, { "epoch": 118.42985074626866, "grad_norm": 20.09593963623047, "learning_rate": 9.313492063492065e-06, "loss": 40.8607, "step": 4974 }, { "epoch": 118.45373134328358, "grad_norm": 16.693893432617188, "learning_rate": 9.311507936507937e-06, "loss": 41.9847, "step": 4975 }, { "epoch": 118.4776119402985, "grad_norm": 16.682085037231445, "learning_rate": 9.30952380952381e-06, "loss": 41.3428, "step": 4976 }, { "epoch": 118.50149253731344, "grad_norm": 16.73056983947754, "learning_rate": 9.307539682539683e-06, "loss": 40.8279, "step": 4977 }, { "epoch": 118.52537313432836, "grad_norm": 16.317480087280273, "learning_rate": 9.305555555555557e-06, "loss": 40.4602, "step": 4978 }, { "epoch": 118.54925373134328, "grad_norm": 15.660470008850098, "learning_rate": 9.30357142857143e-06, "loss": 40.7565, "step": 4979 }, { "epoch": 118.57313432835821, "grad_norm": 21.601036071777344, "learning_rate": 9.301587301587303e-06, "loss": 41.7317, "step": 4980 }, { "epoch": 118.59701492537313, "grad_norm": 16.545438766479492, "learning_rate": 9.299603174603175e-06, "loss": 42.1659, "step": 4981 }, { "epoch": 118.62089552238805, "grad_norm": 20.3563175201416, "learning_rate": 9.297619047619048e-06, "loss": 39.8948, "step": 4982 }, { "epoch": 118.64477611940299, "grad_norm": 19.03108024597168, "learning_rate": 9.295634920634922e-06, "loss": 40.6225, "step": 4983 }, { "epoch": 118.66865671641791, "grad_norm": 18.866544723510742, "learning_rate": 9.293650793650795e-06, "loss": 40.781, "step": 4984 }, { "epoch": 118.69253731343284, "grad_norm": 18.367883682250977, "learning_rate": 9.291666666666668e-06, "loss": 42.1775, "step": 4985 }, { "epoch": 118.71641791044776, "grad_norm": 17.574983596801758, "learning_rate": 9.28968253968254e-06, "loss": 40.7228, "step": 4986 }, { "epoch": 118.74029850746268, "grad_norm": 17.931612014770508, "learning_rate": 9.287698412698413e-06, "loss": 41.352, "step": 4987 }, { "epoch": 118.7641791044776, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 37.2747, "step": 4988 }, { "epoch": 118.78805970149254, "grad_norm": 19.131587982177734, "learning_rate": 9.285714285714288e-06, "loss": 41.442, "step": 4989 }, { "epoch": 118.81194029850747, "grad_norm": 19.01002311706543, "learning_rate": 9.28373015873016e-06, "loss": 40.1583, "step": 4990 }, { "epoch": 118.83582089552239, "grad_norm": 20.718921661376953, "learning_rate": 9.281746031746033e-06, "loss": 42.1721, "step": 4991 }, { "epoch": 118.85970149253731, "grad_norm": 24.149545669555664, "learning_rate": 9.279761904761906e-06, "loss": 39.6434, "step": 4992 }, { "epoch": 118.88358208955223, "grad_norm": 19.575162887573242, "learning_rate": 9.277777777777778e-06, "loss": 41.7524, "step": 4993 }, { "epoch": 118.90746268656716, "grad_norm": 21.472047805786133, "learning_rate": 9.275793650793653e-06, "loss": 41.5381, "step": 4994 }, { "epoch": 118.9313432835821, "grad_norm": 18.96376609802246, "learning_rate": 9.273809523809525e-06, "loss": 41.8712, "step": 4995 }, { "epoch": 118.95522388059702, "grad_norm": 20.816585540771484, "learning_rate": 9.271825396825398e-06, "loss": 42.7263, "step": 4996 }, { "epoch": 118.97910447761194, "grad_norm": 18.856704711914062, "learning_rate": 9.26984126984127e-06, "loss": 42.2396, "step": 4997 }, { "epoch": 119.0, "grad_norm": 17.700910568237305, "learning_rate": 9.267857142857144e-06, "loss": 35.0377, "step": 4998 }, { "epoch": 119.02388059701492, "grad_norm": 18.852880477905273, "learning_rate": 9.265873015873016e-06, "loss": 40.1171, "step": 4999 }, { "epoch": 119.04776119402985, "grad_norm": 17.4823055267334, "learning_rate": 9.26388888888889e-06, "loss": 39.7783, "step": 5000 }, { "epoch": 119.07164179104478, "grad_norm": 22.45401954650879, "learning_rate": 9.261904761904763e-06, "loss": 41.6926, "step": 5001 }, { "epoch": 119.0955223880597, "grad_norm": 19.38802719116211, "learning_rate": 9.259920634920636e-06, "loss": 41.0149, "step": 5002 }, { "epoch": 119.11940298507463, "grad_norm": 18.921022415161133, "learning_rate": 9.257936507936509e-06, "loss": 41.2486, "step": 5003 }, { "epoch": 119.14328358208955, "grad_norm": 22.00980567932129, "learning_rate": 9.255952380952381e-06, "loss": 40.8794, "step": 5004 }, { "epoch": 119.16716417910447, "grad_norm": 13.831929206848145, "learning_rate": 9.253968253968256e-06, "loss": 40.3292, "step": 5005 }, { "epoch": 119.1910447761194, "grad_norm": 20.504989624023438, "learning_rate": 9.251984126984129e-06, "loss": 41.5119, "step": 5006 }, { "epoch": 119.21492537313434, "grad_norm": 15.127291679382324, "learning_rate": 9.250000000000001e-06, "loss": 40.214, "step": 5007 }, { "epoch": 119.23880597014926, "grad_norm": 18.562606811523438, "learning_rate": 9.248015873015874e-06, "loss": 41.0757, "step": 5008 }, { "epoch": 119.26268656716418, "grad_norm": 20.99079132080078, "learning_rate": 9.246031746031747e-06, "loss": 41.3658, "step": 5009 }, { "epoch": 119.2865671641791, "grad_norm": 17.714588165283203, "learning_rate": 9.244047619047621e-06, "loss": 41.8379, "step": 5010 }, { "epoch": 119.31044776119403, "grad_norm": 20.95669174194336, "learning_rate": 9.242063492063494e-06, "loss": 40.6619, "step": 5011 }, { "epoch": 119.33432835820895, "grad_norm": 18.291975021362305, "learning_rate": 9.240079365079366e-06, "loss": 38.9992, "step": 5012 }, { "epoch": 119.35820895522389, "grad_norm": 14.831878662109375, "learning_rate": 9.238095238095239e-06, "loss": 41.5072, "step": 5013 }, { "epoch": 119.38208955223881, "grad_norm": 17.76835823059082, "learning_rate": 9.236111111111112e-06, "loss": 41.0227, "step": 5014 }, { "epoch": 119.40597014925373, "grad_norm": 15.433774948120117, "learning_rate": 9.234126984126986e-06, "loss": 40.4539, "step": 5015 }, { "epoch": 119.42985074626866, "grad_norm": 23.18012237548828, "learning_rate": 9.232142857142859e-06, "loss": 41.8991, "step": 5016 }, { "epoch": 119.45373134328358, "grad_norm": 17.35015106201172, "learning_rate": 9.230158730158732e-06, "loss": 40.189, "step": 5017 }, { "epoch": 119.4776119402985, "grad_norm": 19.60420036315918, "learning_rate": 9.228174603174604e-06, "loss": 41.602, "step": 5018 }, { "epoch": 119.50149253731344, "grad_norm": 20.470211029052734, "learning_rate": 9.226190476190477e-06, "loss": 42.1062, "step": 5019 }, { "epoch": 119.52537313432836, "grad_norm": 16.949901580810547, "learning_rate": 9.22420634920635e-06, "loss": 41.6508, "step": 5020 }, { "epoch": 119.54925373134328, "grad_norm": 22.598966598510742, "learning_rate": 9.222222222222224e-06, "loss": 39.9819, "step": 5021 }, { "epoch": 119.57313432835821, "grad_norm": 16.502370834350586, "learning_rate": 9.220238095238097e-06, "loss": 40.1142, "step": 5022 }, { "epoch": 119.59701492537313, "grad_norm": 20.456647872924805, "learning_rate": 9.218253968253968e-06, "loss": 41.6525, "step": 5023 }, { "epoch": 119.62089552238805, "grad_norm": 18.311965942382812, "learning_rate": 9.216269841269842e-06, "loss": 41.1592, "step": 5024 }, { "epoch": 119.64477611940299, "grad_norm": 19.683259963989258, "learning_rate": 9.214285714285715e-06, "loss": 40.853, "step": 5025 }, { "epoch": 119.66865671641791, "grad_norm": 20.134082794189453, "learning_rate": 9.212301587301588e-06, "loss": 40.3045, "step": 5026 }, { "epoch": 119.69253731343284, "grad_norm": 28.281267166137695, "learning_rate": 9.21031746031746e-06, "loss": 41.6703, "step": 5027 }, { "epoch": 119.71641791044776, "grad_norm": 22.25422477722168, "learning_rate": 9.208333333333333e-06, "loss": 41.0012, "step": 5028 }, { "epoch": 119.74029850746268, "grad_norm": 15.698911666870117, "learning_rate": 9.206349206349207e-06, "loss": 39.3874, "step": 5029 }, { "epoch": 119.7641791044776, "grad_norm": 22.822614669799805, "learning_rate": 9.20436507936508e-06, "loss": 42.7782, "step": 5030 }, { "epoch": 119.78805970149254, "grad_norm": 18.489330291748047, "learning_rate": 9.202380952380953e-06, "loss": 42.2175, "step": 5031 }, { "epoch": 119.81194029850747, "grad_norm": 23.18742561340332, "learning_rate": 9.200396825396825e-06, "loss": 42.1583, "step": 5032 }, { "epoch": 119.83582089552239, "grad_norm": 24.11537742614746, "learning_rate": 9.198412698412698e-06, "loss": 40.783, "step": 5033 }, { "epoch": 119.85970149253731, "grad_norm": 16.897441864013672, "learning_rate": 9.196428571428571e-06, "loss": 40.3459, "step": 5034 }, { "epoch": 119.88358208955223, "grad_norm": 20.22298812866211, "learning_rate": 9.194444444444445e-06, "loss": 40.9984, "step": 5035 }, { "epoch": 119.90746268656716, "grad_norm": 19.373756408691406, "learning_rate": 9.192460317460318e-06, "loss": 41.8363, "step": 5036 }, { "epoch": 119.9313432835821, "grad_norm": 16.265701293945312, "learning_rate": 9.19047619047619e-06, "loss": 40.9217, "step": 5037 }, { "epoch": 119.95522388059702, "grad_norm": 28.902698516845703, "learning_rate": 9.188492063492063e-06, "loss": 41.7966, "step": 5038 }, { "epoch": 119.97910447761194, "grad_norm": 19.491430282592773, "learning_rate": 9.186507936507936e-06, "loss": 41.2973, "step": 5039 }, { "epoch": 120.0, "grad_norm": 25.749500274658203, "learning_rate": 9.18452380952381e-06, "loss": 35.3125, "step": 5040 }, { "epoch": 120.0, "step": 5040, "total_flos": 2.4776207925060864e+17, "train_loss": 3.4518184624021013, "train_runtime": 12809.9419, "train_samples_per_second": 50.136, "train_steps_per_second": 0.393 }, { "epoch": 120.02388059701492, "grad_norm": 24.0944766998291, "learning_rate": 1e-05, "loss": 41.0597, "step": 5041 }, { "epoch": 120.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998168498168499e-06, "loss": 46.3783, "step": 5042 }, { "epoch": 120.07164179104478, "grad_norm": 259.0445861816406, "learning_rate": 9.998168498168499e-06, "loss": 46.5108, "step": 5043 }, { "epoch": 120.0955223880597, "grad_norm": 128.19775390625, "learning_rate": 9.996336996336997e-06, "loss": 45.0948, "step": 5044 }, { "epoch": 120.11940298507463, "grad_norm": 58.83436584472656, "learning_rate": 9.994505494505496e-06, "loss": 43.1635, "step": 5045 }, { "epoch": 120.14328358208955, "grad_norm": 58.79975891113281, "learning_rate": 9.992673992673994e-06, "loss": 41.6829, "step": 5046 }, { "epoch": 120.16716417910447, "grad_norm": 50.534278869628906, "learning_rate": 9.990842490842492e-06, "loss": 42.3871, "step": 5047 }, { "epoch": 120.1910447761194, "grad_norm": 38.682125091552734, "learning_rate": 9.98901098901099e-06, "loss": 40.9709, "step": 5048 }, { "epoch": 120.21492537313434, "grad_norm": 35.06442642211914, "learning_rate": 9.987179487179488e-06, "loss": 41.0217, "step": 5049 }, { "epoch": 120.23880597014926, "grad_norm": 59.00712585449219, "learning_rate": 9.985347985347986e-06, "loss": 41.7985, "step": 5050 }, { "epoch": 120.26268656716418, "grad_norm": 36.52231216430664, "learning_rate": 9.983516483516485e-06, "loss": 41.6886, "step": 5051 }, { "epoch": 120.2865671641791, "grad_norm": 35.213436126708984, "learning_rate": 9.981684981684983e-06, "loss": 40.9909, "step": 5052 }, { "epoch": 120.31044776119403, "grad_norm": 40.0443000793457, "learning_rate": 9.97985347985348e-06, "loss": 41.1657, "step": 5053 }, { "epoch": 120.33432835820895, "grad_norm": 27.66771697998047, "learning_rate": 9.978021978021979e-06, "loss": 41.327, "step": 5054 }, { "epoch": 120.35820895522389, "grad_norm": 34.4952507019043, "learning_rate": 9.976190476190477e-06, "loss": 40.8086, "step": 5055 }, { "epoch": 120.38208955223881, "grad_norm": 26.404708862304688, "learning_rate": 9.974358974358974e-06, "loss": 41.0862, "step": 5056 }, { "epoch": 120.40597014925373, "grad_norm": 24.669050216674805, "learning_rate": 9.972527472527474e-06, "loss": 40.6639, "step": 5057 }, { "epoch": 120.42985074626866, "grad_norm": 29.60878562927246, "learning_rate": 9.970695970695972e-06, "loss": 40.8127, "step": 5058 }, { "epoch": 120.45373134328358, "grad_norm": 17.245283126831055, "learning_rate": 9.96886446886447e-06, "loss": 41.6983, "step": 5059 }, { "epoch": 120.4776119402985, "grad_norm": 26.338546752929688, "learning_rate": 9.967032967032968e-06, "loss": 40.5917, "step": 5060 }, { "epoch": 120.50149253731344, "grad_norm": 25.838808059692383, "learning_rate": 9.965201465201466e-06, "loss": 41.6386, "step": 5061 }, { "epoch": 120.52537313432836, "grad_norm": 17.583539962768555, "learning_rate": 9.963369963369965e-06, "loss": 39.5372, "step": 5062 }, { "epoch": 120.54925373134328, "grad_norm": 29.433382034301758, "learning_rate": 9.961538461538463e-06, "loss": 41.2372, "step": 5063 }, { "epoch": 120.57313432835821, "grad_norm": 19.41893768310547, "learning_rate": 9.959706959706961e-06, "loss": 41.2464, "step": 5064 }, { "epoch": 120.59701492537313, "grad_norm": 20.060937881469727, "learning_rate": 9.957875457875459e-06, "loss": 41.1316, "step": 5065 }, { "epoch": 120.62089552238805, "grad_norm": 21.93149185180664, "learning_rate": 9.956043956043957e-06, "loss": 40.6738, "step": 5066 }, { "epoch": 120.64477611940299, "grad_norm": 20.02782440185547, "learning_rate": 9.954212454212454e-06, "loss": 41.0332, "step": 5067 }, { "epoch": 120.66865671641791, "grad_norm": 16.836517333984375, "learning_rate": 9.952380952380954e-06, "loss": 41.8322, "step": 5068 }, { "epoch": 120.69253731343284, "grad_norm": 19.467927932739258, "learning_rate": 9.950549450549452e-06, "loss": 42.0419, "step": 5069 }, { "epoch": 120.71641791044776, "grad_norm": 20.398895263671875, "learning_rate": 9.94871794871795e-06, "loss": 40.1522, "step": 5070 }, { "epoch": 120.74029850746268, "grad_norm": 17.445634841918945, "learning_rate": 9.946886446886448e-06, "loss": 41.1946, "step": 5071 }, { "epoch": 120.7641791044776, "grad_norm": 17.94610595703125, "learning_rate": 9.945054945054946e-06, "loss": 41.4025, "step": 5072 }, { "epoch": 120.78805970149254, "grad_norm": 25.02172088623047, "learning_rate": 9.943223443223443e-06, "loss": 42.0855, "step": 5073 }, { "epoch": 120.81194029850747, "grad_norm": 16.557662963867188, "learning_rate": 9.941391941391943e-06, "loss": 39.8862, "step": 5074 }, { "epoch": 120.83582089552239, "grad_norm": 19.688400268554688, "learning_rate": 9.939560439560441e-06, "loss": 40.9361, "step": 5075 }, { "epoch": 120.85970149253731, "grad_norm": 29.196117401123047, "learning_rate": 9.937728937728939e-06, "loss": 42.8812, "step": 5076 }, { "epoch": 120.88358208955223, "grad_norm": 17.111480712890625, "learning_rate": 9.935897435897437e-06, "loss": 41.4032, "step": 5077 }, { "epoch": 120.90746268656716, "grad_norm": 29.072128295898438, "learning_rate": 9.934065934065935e-06, "loss": 42.2839, "step": 5078 }, { "epoch": 120.9313432835821, "grad_norm": 24.953367233276367, "learning_rate": 9.932234432234434e-06, "loss": 41.5165, "step": 5079 }, { "epoch": 120.95522388059702, "grad_norm": 19.515911102294922, "learning_rate": 9.930402930402932e-06, "loss": 40.4111, "step": 5080 }, { "epoch": 120.97910447761194, "grad_norm": 23.281414031982422, "learning_rate": 9.92857142857143e-06, "loss": 40.3576, "step": 5081 }, { "epoch": 121.0, "grad_norm": 16.75458335876465, "learning_rate": 9.926739926739928e-06, "loss": 36.3203, "step": 5082 }, { "epoch": 121.02388059701492, "grad_norm": 29.20741844177246, "learning_rate": 9.924908424908426e-06, "loss": 39.9303, "step": 5083 }, { "epoch": 121.04776119402985, "grad_norm": 21.79246711730957, "learning_rate": 9.923076923076923e-06, "loss": 41.4785, "step": 5084 }, { "epoch": 121.07164179104478, "grad_norm": 29.117504119873047, "learning_rate": 9.921245421245423e-06, "loss": 41.8695, "step": 5085 }, { "epoch": 121.0955223880597, "grad_norm": 17.819120407104492, "learning_rate": 9.919413919413921e-06, "loss": 39.2762, "step": 5086 }, { "epoch": 121.11940298507463, "grad_norm": 24.556377410888672, "learning_rate": 9.917582417582419e-06, "loss": 41.5134, "step": 5087 }, { "epoch": 121.14328358208955, "grad_norm": 19.049671173095703, "learning_rate": 9.915750915750917e-06, "loss": 40.8369, "step": 5088 }, { "epoch": 121.16716417910447, "grad_norm": 20.745899200439453, "learning_rate": 9.913919413919415e-06, "loss": 41.4137, "step": 5089 }, { "epoch": 121.1910447761194, "grad_norm": 21.53566551208496, "learning_rate": 9.912087912087912e-06, "loss": 40.3688, "step": 5090 }, { "epoch": 121.21492537313434, "grad_norm": 23.52694320678711, "learning_rate": 9.910256410256412e-06, "loss": 41.1741, "step": 5091 }, { "epoch": 121.23880597014926, "grad_norm": 19.23663330078125, "learning_rate": 9.90842490842491e-06, "loss": 41.2629, "step": 5092 }, { "epoch": 121.26268656716418, "grad_norm": 20.38791847229004, "learning_rate": 9.906593406593408e-06, "loss": 40.6994, "step": 5093 }, { "epoch": 121.2865671641791, "grad_norm": 29.10164451599121, "learning_rate": 9.904761904761906e-06, "loss": 41.7159, "step": 5094 }, { "epoch": 121.31044776119403, "grad_norm": 18.191295623779297, "learning_rate": 9.902930402930403e-06, "loss": 40.0695, "step": 5095 }, { "epoch": 121.33432835820895, "grad_norm": 34.14667510986328, "learning_rate": 9.901098901098903e-06, "loss": 40.7836, "step": 5096 }, { "epoch": 121.35820895522389, "grad_norm": 25.464981079101562, "learning_rate": 9.899267399267401e-06, "loss": 40.5731, "step": 5097 }, { "epoch": 121.38208955223881, "grad_norm": 34.738773345947266, "learning_rate": 9.897435897435899e-06, "loss": 42.5079, "step": 5098 }, { "epoch": 121.40597014925373, "grad_norm": 24.047697067260742, "learning_rate": 9.895604395604397e-06, "loss": 41.9274, "step": 5099 }, { "epoch": 121.42985074626866, "grad_norm": 36.788326263427734, "learning_rate": 9.893772893772895e-06, "loss": 41.1378, "step": 5100 }, { "epoch": 121.45373134328358, "grad_norm": 26.662019729614258, "learning_rate": 9.891941391941392e-06, "loss": 41.4065, "step": 5101 }, { "epoch": 121.4776119402985, "grad_norm": 35.20701217651367, "learning_rate": 9.890109890109892e-06, "loss": 39.1299, "step": 5102 }, { "epoch": 121.50149253731344, "grad_norm": 29.675378799438477, "learning_rate": 9.88827838827839e-06, "loss": 41.0234, "step": 5103 }, { "epoch": 121.52537313432836, "grad_norm": 34.06852722167969, "learning_rate": 9.886446886446888e-06, "loss": 41.8632, "step": 5104 }, { "epoch": 121.54925373134328, "grad_norm": 25.621753692626953, "learning_rate": 9.884615384615386e-06, "loss": 40.9295, "step": 5105 }, { "epoch": 121.57313432835821, "grad_norm": 27.804433822631836, "learning_rate": 9.882783882783884e-06, "loss": 40.0458, "step": 5106 }, { "epoch": 121.59701492537313, "grad_norm": 26.332223892211914, "learning_rate": 9.880952380952381e-06, "loss": 39.7798, "step": 5107 }, { "epoch": 121.62089552238805, "grad_norm": 29.49053192138672, "learning_rate": 9.879120879120881e-06, "loss": 42.0289, "step": 5108 }, { "epoch": 121.64477611940299, "grad_norm": 24.052976608276367, "learning_rate": 9.877289377289379e-06, "loss": 40.5861, "step": 5109 }, { "epoch": 121.66865671641791, "grad_norm": 23.03173828125, "learning_rate": 9.875457875457877e-06, "loss": 40.9261, "step": 5110 }, { "epoch": 121.69253731343284, "grad_norm": 24.134889602661133, "learning_rate": 9.873626373626375e-06, "loss": 41.0466, "step": 5111 }, { "epoch": 121.71641791044776, "grad_norm": 19.443124771118164, "learning_rate": 9.871794871794872e-06, "loss": 40.4331, "step": 5112 }, { "epoch": 121.74029850746268, "grad_norm": 31.88178825378418, "learning_rate": 9.869963369963372e-06, "loss": 40.6991, "step": 5113 }, { "epoch": 121.7641791044776, "grad_norm": 21.850631713867188, "learning_rate": 9.86813186813187e-06, "loss": 41.4331, "step": 5114 }, { "epoch": 121.78805970149254, "grad_norm": 37.39925765991211, "learning_rate": 9.866300366300368e-06, "loss": 40.9437, "step": 5115 }, { "epoch": 121.81194029850747, "grad_norm": 31.58283042907715, "learning_rate": 9.864468864468866e-06, "loss": 41.0558, "step": 5116 }, { "epoch": 121.83582089552239, "grad_norm": 29.965499877929688, "learning_rate": 9.862637362637364e-06, "loss": 39.5632, "step": 5117 }, { "epoch": 121.85970149253731, "grad_norm": 25.50206756591797, "learning_rate": 9.860805860805861e-06, "loss": 41.287, "step": 5118 }, { "epoch": 121.88358208955223, "grad_norm": 34.806034088134766, "learning_rate": 9.858974358974361e-06, "loss": 41.0144, "step": 5119 }, { "epoch": 121.90746268656716, "grad_norm": 21.66145133972168, "learning_rate": 9.857142857142859e-06, "loss": 41.2587, "step": 5120 }, { "epoch": 121.9313432835821, "grad_norm": 37.883094787597656, "learning_rate": 9.855311355311357e-06, "loss": 40.7321, "step": 5121 }, { "epoch": 121.95522388059702, "grad_norm": 28.472124099731445, "learning_rate": 9.853479853479855e-06, "loss": 41.5554, "step": 5122 }, { "epoch": 121.97910447761194, "grad_norm": 35.33477783203125, "learning_rate": 9.851648351648352e-06, "loss": 42.0246, "step": 5123 }, { "epoch": 122.0, "grad_norm": 27.911645889282227, "learning_rate": 9.84981684981685e-06, "loss": 35.3824, "step": 5124 }, { "epoch": 122.02388059701492, "grad_norm": 33.792213439941406, "learning_rate": 9.84798534798535e-06, "loss": 40.2451, "step": 5125 }, { "epoch": 122.04776119402985, "grad_norm": 33.73054885864258, "learning_rate": 9.846153846153848e-06, "loss": 41.5777, "step": 5126 }, { "epoch": 122.07164179104478, "grad_norm": 29.55936622619629, "learning_rate": 9.844322344322346e-06, "loss": 40.5313, "step": 5127 }, { "epoch": 122.0955223880597, "grad_norm": 21.786413192749023, "learning_rate": 9.842490842490844e-06, "loss": 41.795, "step": 5128 }, { "epoch": 122.11940298507463, "grad_norm": 38.503475189208984, "learning_rate": 9.840659340659341e-06, "loss": 40.2868, "step": 5129 }, { "epoch": 122.14328358208955, "grad_norm": 27.126779556274414, "learning_rate": 9.83882783882784e-06, "loss": 40.5464, "step": 5130 }, { "epoch": 122.16716417910447, "grad_norm": 34.76428985595703, "learning_rate": 9.836996336996337e-06, "loss": 40.2589, "step": 5131 }, { "epoch": 122.1910447761194, "grad_norm": 37.37604522705078, "learning_rate": 9.835164835164835e-06, "loss": 40.8401, "step": 5132 }, { "epoch": 122.21492537313434, "grad_norm": 29.67528533935547, "learning_rate": 9.833333333333333e-06, "loss": 40.9921, "step": 5133 }, { "epoch": 122.23880597014926, "grad_norm": 27.43715476989746, "learning_rate": 9.831501831501832e-06, "loss": 40.4038, "step": 5134 }, { "epoch": 122.26268656716418, "grad_norm": 30.960216522216797, "learning_rate": 9.82967032967033e-06, "loss": 39.8886, "step": 5135 }, { "epoch": 122.2865671641791, "grad_norm": 27.186513900756836, "learning_rate": 9.827838827838828e-06, "loss": 42.1122, "step": 5136 }, { "epoch": 122.31044776119403, "grad_norm": 32.01823806762695, "learning_rate": 9.826007326007326e-06, "loss": 40.7854, "step": 5137 }, { "epoch": 122.33432835820895, "grad_norm": 26.988773345947266, "learning_rate": 9.824175824175824e-06, "loss": 40.5902, "step": 5138 }, { "epoch": 122.35820895522389, "grad_norm": 29.70166778564453, "learning_rate": 9.822344322344322e-06, "loss": 41.4538, "step": 5139 }, { "epoch": 122.38208955223881, "grad_norm": 25.9971981048584, "learning_rate": 9.820512820512821e-06, "loss": 39.6575, "step": 5140 }, { "epoch": 122.40597014925373, "grad_norm": 33.1441535949707, "learning_rate": 9.81868131868132e-06, "loss": 40.0902, "step": 5141 }, { "epoch": 122.42985074626866, "grad_norm": 27.196630477905273, "learning_rate": 9.816849816849817e-06, "loss": 40.0376, "step": 5142 }, { "epoch": 122.45373134328358, "grad_norm": 34.561798095703125, "learning_rate": 9.815018315018315e-06, "loss": 41.6209, "step": 5143 }, { "epoch": 122.4776119402985, "grad_norm": 33.98078155517578, "learning_rate": 9.813186813186813e-06, "loss": 40.8931, "step": 5144 }, { "epoch": 122.50149253731344, "grad_norm": 29.115427017211914, "learning_rate": 9.811355311355313e-06, "loss": 41.4718, "step": 5145 }, { "epoch": 122.52537313432836, "grad_norm": 24.698219299316406, "learning_rate": 9.80952380952381e-06, "loss": 40.2337, "step": 5146 }, { "epoch": 122.54925373134328, "grad_norm": 32.09329605102539, "learning_rate": 9.807692307692308e-06, "loss": 40.1893, "step": 5147 }, { "epoch": 122.57313432835821, "grad_norm": 28.50708770751953, "learning_rate": 9.805860805860806e-06, "loss": 41.2457, "step": 5148 }, { "epoch": 122.59701492537313, "grad_norm": 34.65631103515625, "learning_rate": 9.804029304029304e-06, "loss": 40.8311, "step": 5149 }, { "epoch": 122.62089552238805, "grad_norm": 27.82625961303711, "learning_rate": 9.802197802197802e-06, "loss": 40.3574, "step": 5150 }, { "epoch": 122.64477611940299, "grad_norm": 31.24656105041504, "learning_rate": 9.800366300366301e-06, "loss": 40.999, "step": 5151 }, { "epoch": 122.66865671641791, "grad_norm": 26.075342178344727, "learning_rate": 9.7985347985348e-06, "loss": 41.6763, "step": 5152 }, { "epoch": 122.69253731343284, "grad_norm": 28.61420440673828, "learning_rate": 9.796703296703297e-06, "loss": 41.1096, "step": 5153 }, { "epoch": 122.71641791044776, "grad_norm": 24.201374053955078, "learning_rate": 9.794871794871795e-06, "loss": 41.7294, "step": 5154 }, { "epoch": 122.74029850746268, "grad_norm": 33.25908660888672, "learning_rate": 9.793040293040293e-06, "loss": 41.0633, "step": 5155 }, { "epoch": 122.7641791044776, "grad_norm": 28.24220848083496, "learning_rate": 9.79120879120879e-06, "loss": 42.0281, "step": 5156 }, { "epoch": 122.78805970149254, "grad_norm": 34.96881103515625, "learning_rate": 9.78937728937729e-06, "loss": 40.648, "step": 5157 }, { "epoch": 122.81194029850747, "grad_norm": 29.03910255432129, "learning_rate": 9.787545787545788e-06, "loss": 41.1215, "step": 5158 }, { "epoch": 122.83582089552239, "grad_norm": 30.120044708251953, "learning_rate": 9.785714285714286e-06, "loss": 41.7353, "step": 5159 }, { "epoch": 122.85970149253731, "grad_norm": 30.23310661315918, "learning_rate": 9.783882783882784e-06, "loss": 40.7885, "step": 5160 }, { "epoch": 122.88358208955223, "grad_norm": 29.74199104309082, "learning_rate": 9.782051282051282e-06, "loss": 41.4646, "step": 5161 }, { "epoch": 122.90746268656716, "grad_norm": 27.558090209960938, "learning_rate": 9.780219780219781e-06, "loss": 41.0687, "step": 5162 }, { "epoch": 122.9313432835821, "grad_norm": 29.82993507385254, "learning_rate": 9.77838827838828e-06, "loss": 41.5666, "step": 5163 }, { "epoch": 122.95522388059702, "grad_norm": 24.96250343322754, "learning_rate": 9.776556776556777e-06, "loss": 41.1099, "step": 5164 }, { "epoch": 122.97910447761194, "grad_norm": 34.85405731201172, "learning_rate": 9.774725274725275e-06, "loss": 38.6541, "step": 5165 }, { "epoch": 123.0, "grad_norm": 28.839818954467773, "learning_rate": 9.772893772893773e-06, "loss": 35.9493, "step": 5166 }, { "epoch": 123.02388059701492, "grad_norm": 32.29933547973633, "learning_rate": 9.771062271062271e-06, "loss": 40.9199, "step": 5167 }, { "epoch": 123.04776119402985, "grad_norm": 26.617511749267578, "learning_rate": 9.76923076923077e-06, "loss": 39.6813, "step": 5168 }, { "epoch": 123.07164179104478, "grad_norm": 29.118209838867188, "learning_rate": 9.767399267399268e-06, "loss": 40.971, "step": 5169 }, { "epoch": 123.0955223880597, "grad_norm": 26.295345306396484, "learning_rate": 9.765567765567766e-06, "loss": 40.8982, "step": 5170 }, { "epoch": 123.11940298507463, "grad_norm": 33.3271369934082, "learning_rate": 9.763736263736264e-06, "loss": 41.9198, "step": 5171 }, { "epoch": 123.14328358208955, "grad_norm": 25.857398986816406, "learning_rate": 9.761904761904762e-06, "loss": 39.5357, "step": 5172 }, { "epoch": 123.16716417910447, "grad_norm": 32.97218704223633, "learning_rate": 9.76007326007326e-06, "loss": 41.1038, "step": 5173 }, { "epoch": 123.1910447761194, "grad_norm": 28.88793182373047, "learning_rate": 9.75824175824176e-06, "loss": 40.8065, "step": 5174 }, { "epoch": 123.21492537313434, "grad_norm": 24.024185180664062, "learning_rate": 9.756410256410257e-06, "loss": 39.8969, "step": 5175 }, { "epoch": 123.23880597014926, "grad_norm": 23.380300521850586, "learning_rate": 9.754578754578755e-06, "loss": 40.962, "step": 5176 }, { "epoch": 123.26268656716418, "grad_norm": 28.82596778869629, "learning_rate": 9.752747252747253e-06, "loss": 40.4959, "step": 5177 }, { "epoch": 123.2865671641791, "grad_norm": 19.895410537719727, "learning_rate": 9.750915750915751e-06, "loss": 41.0015, "step": 5178 }, { "epoch": 123.31044776119403, "grad_norm": 28.44173812866211, "learning_rate": 9.74908424908425e-06, "loss": 40.7281, "step": 5179 }, { "epoch": 123.33432835820895, "grad_norm": 22.277742385864258, "learning_rate": 9.747252747252748e-06, "loss": 40.0391, "step": 5180 }, { "epoch": 123.35820895522389, "grad_norm": 27.770545959472656, "learning_rate": 9.745421245421246e-06, "loss": 41.0649, "step": 5181 }, { "epoch": 123.38208955223881, "grad_norm": 22.383668899536133, "learning_rate": 9.743589743589744e-06, "loss": 40.735, "step": 5182 }, { "epoch": 123.40597014925373, "grad_norm": 31.16164779663086, "learning_rate": 9.741758241758242e-06, "loss": 41.1004, "step": 5183 }, { "epoch": 123.42985074626866, "grad_norm": 25.458309173583984, "learning_rate": 9.73992673992674e-06, "loss": 40.4399, "step": 5184 }, { "epoch": 123.45373134328358, "grad_norm": 37.73893356323242, "learning_rate": 9.73809523809524e-06, "loss": 42.08, "step": 5185 }, { "epoch": 123.4776119402985, "grad_norm": 28.723541259765625, "learning_rate": 9.736263736263737e-06, "loss": 40.479, "step": 5186 }, { "epoch": 123.50149253731344, "grad_norm": 30.29216194152832, "learning_rate": 9.734432234432235e-06, "loss": 40.76, "step": 5187 }, { "epoch": 123.52537313432836, "grad_norm": 25.559480667114258, "learning_rate": 9.732600732600733e-06, "loss": 39.7645, "step": 5188 }, { "epoch": 123.54925373134328, "grad_norm": 30.328344345092773, "learning_rate": 9.730769230769231e-06, "loss": 42.2182, "step": 5189 }, { "epoch": 123.57313432835821, "grad_norm": 24.075218200683594, "learning_rate": 9.728937728937729e-06, "loss": 39.7574, "step": 5190 }, { "epoch": 123.59701492537313, "grad_norm": 29.823719024658203, "learning_rate": 9.727106227106228e-06, "loss": 41.1253, "step": 5191 }, { "epoch": 123.62089552238805, "grad_norm": 21.241701126098633, "learning_rate": 9.725274725274726e-06, "loss": 41.0588, "step": 5192 }, { "epoch": 123.64477611940299, "grad_norm": 34.10343933105469, "learning_rate": 9.723443223443224e-06, "loss": 40.7287, "step": 5193 }, { "epoch": 123.66865671641791, "grad_norm": 24.037466049194336, "learning_rate": 9.721611721611722e-06, "loss": 41.1033, "step": 5194 }, { "epoch": 123.69253731343284, "grad_norm": 26.837879180908203, "learning_rate": 9.71978021978022e-06, "loss": 41.1869, "step": 5195 }, { "epoch": 123.71641791044776, "grad_norm": 22.90353012084961, "learning_rate": 9.71794871794872e-06, "loss": 41.4571, "step": 5196 }, { "epoch": 123.74029850746268, "grad_norm": 31.232582092285156, "learning_rate": 9.716117216117217e-06, "loss": 40.6233, "step": 5197 }, { "epoch": 123.7641791044776, "grad_norm": 24.480405807495117, "learning_rate": 9.714285714285715e-06, "loss": 40.8121, "step": 5198 }, { "epoch": 123.78805970149254, "grad_norm": 33.86972427368164, "learning_rate": 9.712454212454213e-06, "loss": 41.596, "step": 5199 }, { "epoch": 123.81194029850747, "grad_norm": 23.727428436279297, "learning_rate": 9.710622710622711e-06, "loss": 41.1944, "step": 5200 }, { "epoch": 123.83582089552239, "grad_norm": 32.29154586791992, "learning_rate": 9.708791208791209e-06, "loss": 40.361, "step": 5201 }, { "epoch": 123.85970149253731, "grad_norm": 22.611989974975586, "learning_rate": 9.706959706959708e-06, "loss": 39.6001, "step": 5202 }, { "epoch": 123.88358208955223, "grad_norm": 33.92005157470703, "learning_rate": 9.705128205128206e-06, "loss": 42.1112, "step": 5203 }, { "epoch": 123.90746268656716, "grad_norm": 28.993995666503906, "learning_rate": 9.703296703296704e-06, "loss": 41.436, "step": 5204 }, { "epoch": 123.9313432835821, "grad_norm": 27.87895393371582, "learning_rate": 9.701465201465202e-06, "loss": 40.7799, "step": 5205 }, { "epoch": 123.95522388059702, "grad_norm": 29.898271560668945, "learning_rate": 9.6996336996337e-06, "loss": 41.2078, "step": 5206 }, { "epoch": 123.97910447761194, "grad_norm": 24.88825798034668, "learning_rate": 9.697802197802198e-06, "loss": 40.1142, "step": 5207 }, { "epoch": 124.0, "grad_norm": 20.064050674438477, "learning_rate": 9.695970695970697e-06, "loss": 35.74, "step": 5208 }, { "epoch": 124.02388059701492, "grad_norm": 30.132843017578125, "learning_rate": 9.694139194139195e-06, "loss": 41.1361, "step": 5209 }, { "epoch": 124.04776119402985, "grad_norm": 21.139568328857422, "learning_rate": 9.692307692307693e-06, "loss": 41.4147, "step": 5210 }, { "epoch": 124.07164179104478, "grad_norm": 29.36510467529297, "learning_rate": 9.690476190476191e-06, "loss": 40.6358, "step": 5211 }, { "epoch": 124.0955223880597, "grad_norm": 27.090465545654297, "learning_rate": 9.688644688644689e-06, "loss": 41.6791, "step": 5212 }, { "epoch": 124.11940298507463, "grad_norm": 24.170644760131836, "learning_rate": 9.686813186813188e-06, "loss": 40.7704, "step": 5213 }, { "epoch": 124.14328358208955, "grad_norm": 26.26068115234375, "learning_rate": 9.684981684981686e-06, "loss": 39.3922, "step": 5214 }, { "epoch": 124.16716417910447, "grad_norm": 22.155975341796875, "learning_rate": 9.683150183150184e-06, "loss": 40.9681, "step": 5215 }, { "epoch": 124.1910447761194, "grad_norm": 25.21603012084961, "learning_rate": 9.681318681318682e-06, "loss": 42.1241, "step": 5216 }, { "epoch": 124.21492537313434, "grad_norm": 16.18509292602539, "learning_rate": 9.67948717948718e-06, "loss": 40.9757, "step": 5217 }, { "epoch": 124.23880597014926, "grad_norm": 21.46571159362793, "learning_rate": 9.677655677655678e-06, "loss": 41.6829, "step": 5218 }, { "epoch": 124.26268656716418, "grad_norm": 18.195297241210938, "learning_rate": 9.675824175824177e-06, "loss": 40.194, "step": 5219 }, { "epoch": 124.2865671641791, "grad_norm": 18.00617790222168, "learning_rate": 9.673992673992675e-06, "loss": 39.3802, "step": 5220 }, { "epoch": 124.31044776119403, "grad_norm": 18.236934661865234, "learning_rate": 9.672161172161173e-06, "loss": 41.0138, "step": 5221 }, { "epoch": 124.33432835820895, "grad_norm": 16.526309967041016, "learning_rate": 9.670329670329671e-06, "loss": 40.2031, "step": 5222 }, { "epoch": 124.35820895522389, "grad_norm": 20.008708953857422, "learning_rate": 9.668498168498169e-06, "loss": 40.9772, "step": 5223 }, { "epoch": 124.38208955223881, "grad_norm": 14.738056182861328, "learning_rate": 9.666666666666667e-06, "loss": 40.5985, "step": 5224 }, { "epoch": 124.40597014925373, "grad_norm": 19.540645599365234, "learning_rate": 9.664835164835166e-06, "loss": 41.1823, "step": 5225 }, { "epoch": 124.42985074626866, "grad_norm": 17.26000213623047, "learning_rate": 9.663003663003664e-06, "loss": 40.2975, "step": 5226 }, { "epoch": 124.45373134328358, "grad_norm": 19.984989166259766, "learning_rate": 9.661172161172162e-06, "loss": 40.6366, "step": 5227 }, { "epoch": 124.4776119402985, "grad_norm": 24.717369079589844, "learning_rate": 9.65934065934066e-06, "loss": 40.665, "step": 5228 }, { "epoch": 124.50149253731344, "grad_norm": 16.406538009643555, "learning_rate": 9.657509157509158e-06, "loss": 40.4751, "step": 5229 }, { "epoch": 124.52537313432836, "grad_norm": 23.191200256347656, "learning_rate": 9.655677655677657e-06, "loss": 40.6781, "step": 5230 }, { "epoch": 124.54925373134328, "grad_norm": 18.91063690185547, "learning_rate": 9.653846153846155e-06, "loss": 40.9245, "step": 5231 }, { "epoch": 124.57313432835821, "grad_norm": 23.012889862060547, "learning_rate": 9.652014652014653e-06, "loss": 41.7688, "step": 5232 }, { "epoch": 124.59701492537313, "grad_norm": 20.35813331604004, "learning_rate": 9.650183150183151e-06, "loss": 40.9169, "step": 5233 }, { "epoch": 124.62089552238805, "grad_norm": 22.06452751159668, "learning_rate": 9.648351648351649e-06, "loss": 41.0061, "step": 5234 }, { "epoch": 124.64477611940299, "grad_norm": 23.17784309387207, "learning_rate": 9.646520146520147e-06, "loss": 40.7811, "step": 5235 }, { "epoch": 124.66865671641791, "grad_norm": 19.43151092529297, "learning_rate": 9.644688644688646e-06, "loss": 40.3725, "step": 5236 }, { "epoch": 124.69253731343284, "grad_norm": 23.144960403442383, "learning_rate": 9.642857142857144e-06, "loss": 42.382, "step": 5237 }, { "epoch": 124.71641791044776, "grad_norm": 18.223936080932617, "learning_rate": 9.641025641025642e-06, "loss": 40.3686, "step": 5238 }, { "epoch": 124.74029850746268, "grad_norm": 21.855030059814453, "learning_rate": 9.63919413919414e-06, "loss": 41.8939, "step": 5239 }, { "epoch": 124.7641791044776, "grad_norm": 21.252012252807617, "learning_rate": 9.637362637362638e-06, "loss": 40.5954, "step": 5240 }, { "epoch": 124.78805970149254, "grad_norm": 20.320215225219727, "learning_rate": 9.635531135531136e-06, "loss": 40.2941, "step": 5241 }, { "epoch": 124.81194029850747, "grad_norm": 20.874921798706055, "learning_rate": 9.633699633699635e-06, "loss": 39.8523, "step": 5242 }, { "epoch": 124.83582089552239, "grad_norm": 20.560138702392578, "learning_rate": 9.631868131868133e-06, "loss": 40.1141, "step": 5243 }, { "epoch": 124.85970149253731, "grad_norm": 16.412206649780273, "learning_rate": 9.630036630036631e-06, "loss": 41.1038, "step": 5244 }, { "epoch": 124.88358208955223, "grad_norm": 20.39592170715332, "learning_rate": 9.628205128205129e-06, "loss": 39.8884, "step": 5245 }, { "epoch": 124.90746268656716, "grad_norm": 16.07599639892578, "learning_rate": 9.626373626373627e-06, "loss": 40.7929, "step": 5246 }, { "epoch": 124.9313432835821, "grad_norm": 14.183424949645996, "learning_rate": 9.624542124542126e-06, "loss": 41.5261, "step": 5247 }, { "epoch": 124.95522388059702, "grad_norm": 17.80473518371582, "learning_rate": 9.622710622710624e-06, "loss": 39.6694, "step": 5248 }, { "epoch": 124.97910447761194, "grad_norm": 16.59119987487793, "learning_rate": 9.620879120879122e-06, "loss": 40.9024, "step": 5249 }, { "epoch": 125.0, "grad_norm": 15.37125301361084, "learning_rate": 9.61904761904762e-06, "loss": 35.3595, "step": 5250 }, { "epoch": 125.02388059701492, "grad_norm": 18.345430374145508, "learning_rate": 9.617216117216118e-06, "loss": 40.2401, "step": 5251 }, { "epoch": 125.04776119402985, "grad_norm": 17.491918563842773, "learning_rate": 9.615384615384616e-06, "loss": 39.8787, "step": 5252 }, { "epoch": 125.07164179104478, "grad_norm": 16.483713150024414, "learning_rate": 9.613553113553115e-06, "loss": 41.3826, "step": 5253 }, { "epoch": 125.0955223880597, "grad_norm": 15.222822189331055, "learning_rate": 9.611721611721613e-06, "loss": 41.1321, "step": 5254 }, { "epoch": 125.11940298507463, "grad_norm": 16.675804138183594, "learning_rate": 9.609890109890111e-06, "loss": 41.0334, "step": 5255 }, { "epoch": 125.14328358208955, "grad_norm": 17.025266647338867, "learning_rate": 9.608058608058609e-06, "loss": 40.6213, "step": 5256 }, { "epoch": 125.16716417910447, "grad_norm": 15.499921798706055, "learning_rate": 9.606227106227107e-06, "loss": 39.8817, "step": 5257 }, { "epoch": 125.1910447761194, "grad_norm": 14.926107406616211, "learning_rate": 9.604395604395605e-06, "loss": 42.1196, "step": 5258 }, { "epoch": 125.21492537313434, "grad_norm": 17.896583557128906, "learning_rate": 9.602564102564104e-06, "loss": 40.1941, "step": 5259 }, { "epoch": 125.23880597014926, "grad_norm": 21.413713455200195, "learning_rate": 9.600732600732602e-06, "loss": 39.9496, "step": 5260 }, { "epoch": 125.26268656716418, "grad_norm": 20.262035369873047, "learning_rate": 9.5989010989011e-06, "loss": 40.8554, "step": 5261 }, { "epoch": 125.2865671641791, "grad_norm": 17.94382095336914, "learning_rate": 9.597069597069598e-06, "loss": 40.618, "step": 5262 }, { "epoch": 125.31044776119403, "grad_norm": 14.720929145812988, "learning_rate": 9.595238095238096e-06, "loss": 41.6634, "step": 5263 }, { "epoch": 125.33432835820895, "grad_norm": 21.567907333374023, "learning_rate": 9.593406593406595e-06, "loss": 41.6142, "step": 5264 }, { "epoch": 125.35820895522389, "grad_norm": 23.717586517333984, "learning_rate": 9.591575091575093e-06, "loss": 41.458, "step": 5265 }, { "epoch": 125.38208955223881, "grad_norm": 13.948038101196289, "learning_rate": 9.589743589743591e-06, "loss": 41.2909, "step": 5266 }, { "epoch": 125.40597014925373, "grad_norm": 27.392465591430664, "learning_rate": 9.587912087912089e-06, "loss": 41.3308, "step": 5267 }, { "epoch": 125.42985074626866, "grad_norm": 20.557374954223633, "learning_rate": 9.586080586080587e-06, "loss": 41.7717, "step": 5268 }, { "epoch": 125.45373134328358, "grad_norm": 21.076601028442383, "learning_rate": 9.584249084249085e-06, "loss": 39.8928, "step": 5269 }, { "epoch": 125.4776119402985, "grad_norm": 22.425079345703125, "learning_rate": 9.582417582417584e-06, "loss": 41.5856, "step": 5270 }, { "epoch": 125.50149253731344, "grad_norm": 19.16175079345703, "learning_rate": 9.580586080586082e-06, "loss": 40.7785, "step": 5271 }, { "epoch": 125.52537313432836, "grad_norm": 18.763565063476562, "learning_rate": 9.57875457875458e-06, "loss": 39.8277, "step": 5272 }, { "epoch": 125.54925373134328, "grad_norm": 18.3720645904541, "learning_rate": 9.576923076923078e-06, "loss": 40.9783, "step": 5273 }, { "epoch": 125.57313432835821, "grad_norm": 18.62623405456543, "learning_rate": 9.575091575091576e-06, "loss": 40.5206, "step": 5274 }, { "epoch": 125.59701492537313, "grad_norm": 20.078596115112305, "learning_rate": 9.573260073260074e-06, "loss": 40.2231, "step": 5275 }, { "epoch": 125.62089552238805, "grad_norm": 28.77025032043457, "learning_rate": 9.571428571428573e-06, "loss": 40.4628, "step": 5276 }, { "epoch": 125.64477611940299, "grad_norm": 17.48457145690918, "learning_rate": 9.569597069597071e-06, "loss": 38.3776, "step": 5277 }, { "epoch": 125.66865671641791, "grad_norm": 29.077014923095703, "learning_rate": 9.567765567765569e-06, "loss": 40.7467, "step": 5278 }, { "epoch": 125.69253731343284, "grad_norm": 22.95465660095215, "learning_rate": 9.565934065934067e-06, "loss": 40.831, "step": 5279 }, { "epoch": 125.71641791044776, "grad_norm": 26.317485809326172, "learning_rate": 9.564102564102565e-06, "loss": 40.4036, "step": 5280 }, { "epoch": 125.74029850746268, "grad_norm": 20.771020889282227, "learning_rate": 9.562271062271064e-06, "loss": 40.7238, "step": 5281 }, { "epoch": 125.7641791044776, "grad_norm": 21.856155395507812, "learning_rate": 9.560439560439562e-06, "loss": 40.1953, "step": 5282 }, { "epoch": 125.78805970149254, "grad_norm": 24.748037338256836, "learning_rate": 9.55860805860806e-06, "loss": 39.5605, "step": 5283 }, { "epoch": 125.81194029850747, "grad_norm": NaN, "learning_rate": 9.556776556776558e-06, "loss": 60.4649, "step": 5284 }, { "epoch": 125.83582089552239, "grad_norm": 18.565261840820312, "learning_rate": 9.556776556776558e-06, "loss": 41.9694, "step": 5285 }, { "epoch": 125.85970149253731, "grad_norm": 29.6701717376709, "learning_rate": 9.554945054945056e-06, "loss": 41.4843, "step": 5286 }, { "epoch": 125.88358208955223, "grad_norm": 20.192317962646484, "learning_rate": 9.553113553113554e-06, "loss": 39.8961, "step": 5287 }, { "epoch": 125.90746268656716, "grad_norm": 25.22960662841797, "learning_rate": 9.551282051282053e-06, "loss": 39.7754, "step": 5288 }, { "epoch": 125.9313432835821, "grad_norm": 19.892139434814453, "learning_rate": 9.549450549450551e-06, "loss": 40.257, "step": 5289 }, { "epoch": 125.95522388059702, "grad_norm": 18.426124572753906, "learning_rate": 9.547619047619049e-06, "loss": 41.252, "step": 5290 }, { "epoch": 125.97910447761194, "grad_norm": 24.085840225219727, "learning_rate": 9.545787545787547e-06, "loss": 41.3266, "step": 5291 }, { "epoch": 126.0, "grad_norm": 14.462137222290039, "learning_rate": 9.543956043956045e-06, "loss": 36.235, "step": 5292 }, { "epoch": 126.02388059701492, "grad_norm": 21.527910232543945, "learning_rate": 9.542124542124543e-06, "loss": 41.1059, "step": 5293 }, { "epoch": 126.04776119402985, "grad_norm": 19.539413452148438, "learning_rate": 9.540293040293042e-06, "loss": 41.8102, "step": 5294 }, { "epoch": 126.07164179104478, "grad_norm": 16.535566329956055, "learning_rate": 9.53846153846154e-06, "loss": 40.4373, "step": 5295 }, { "epoch": 126.0955223880597, "grad_norm": 30.60129737854004, "learning_rate": 9.536630036630038e-06, "loss": 40.3107, "step": 5296 }, { "epoch": 126.11940298507463, "grad_norm": 19.504737854003906, "learning_rate": 9.534798534798536e-06, "loss": 39.7933, "step": 5297 }, { "epoch": 126.14328358208955, "grad_norm": 40.68082809448242, "learning_rate": 9.532967032967034e-06, "loss": 40.4788, "step": 5298 }, { "epoch": 126.16716417910447, "grad_norm": 29.288623809814453, "learning_rate": 9.531135531135532e-06, "loss": 40.3154, "step": 5299 }, { "epoch": 126.1910447761194, "grad_norm": 39.86507797241211, "learning_rate": 9.52930402930403e-06, "loss": 40.9565, "step": 5300 }, { "epoch": 126.21492537313434, "grad_norm": 37.94214630126953, "learning_rate": 9.527472527472527e-06, "loss": 39.7678, "step": 5301 }, { "epoch": 126.23880597014926, "grad_norm": 30.128881454467773, "learning_rate": 9.525641025641025e-06, "loss": 40.9812, "step": 5302 }, { "epoch": 126.26268656716418, "grad_norm": 32.11579895019531, "learning_rate": 9.523809523809525e-06, "loss": 40.7709, "step": 5303 }, { "epoch": 126.2865671641791, "grad_norm": 30.610383987426758, "learning_rate": 9.521978021978023e-06, "loss": 39.3623, "step": 5304 }, { "epoch": 126.31044776119403, "grad_norm": 25.186908721923828, "learning_rate": 9.52014652014652e-06, "loss": 40.5833, "step": 5305 }, { "epoch": 126.33432835820895, "grad_norm": 35.33464050292969, "learning_rate": 9.518315018315018e-06, "loss": 40.4571, "step": 5306 }, { "epoch": 126.35820895522389, "grad_norm": 30.900115966796875, "learning_rate": 9.516483516483516e-06, "loss": 41.5089, "step": 5307 }, { "epoch": 126.38208955223881, "grad_norm": 30.170385360717773, "learning_rate": 9.514652014652014e-06, "loss": 40.4776, "step": 5308 }, { "epoch": 126.40597014925373, "grad_norm": 25.576396942138672, "learning_rate": 9.512820512820514e-06, "loss": 40.5452, "step": 5309 }, { "epoch": 126.42985074626866, "grad_norm": 31.52381706237793, "learning_rate": 9.510989010989012e-06, "loss": 41.0569, "step": 5310 }, { "epoch": 126.45373134328358, "grad_norm": 28.613876342773438, "learning_rate": 9.50915750915751e-06, "loss": 40.4693, "step": 5311 }, { "epoch": 126.4776119402985, "grad_norm": 34.052391052246094, "learning_rate": 9.507326007326007e-06, "loss": 39.9473, "step": 5312 }, { "epoch": 126.50149253731344, "grad_norm": 28.65314292907715, "learning_rate": 9.505494505494505e-06, "loss": 39.012, "step": 5313 }, { "epoch": 126.52537313432836, "grad_norm": 28.400449752807617, "learning_rate": 9.503663003663005e-06, "loss": 40.3811, "step": 5314 }, { "epoch": 126.54925373134328, "grad_norm": 26.874284744262695, "learning_rate": 9.501831501831503e-06, "loss": 39.8546, "step": 5315 }, { "epoch": 126.57313432835821, "grad_norm": 29.638126373291016, "learning_rate": 9.5e-06, "loss": 41.6115, "step": 5316 }, { "epoch": 126.59701492537313, "grad_norm": 27.8295841217041, "learning_rate": 9.498168498168498e-06, "loss": 40.7197, "step": 5317 }, { "epoch": 126.62089552238805, "grad_norm": 34.33130645751953, "learning_rate": 9.496336996336996e-06, "loss": 41.2572, "step": 5318 }, { "epoch": 126.64477611940299, "grad_norm": 28.334978103637695, "learning_rate": 9.494505494505494e-06, "loss": 41.4411, "step": 5319 }, { "epoch": 126.66865671641791, "grad_norm": 28.492050170898438, "learning_rate": 9.492673992673994e-06, "loss": 39.6498, "step": 5320 }, { "epoch": 126.69253731343284, "grad_norm": 26.12206268310547, "learning_rate": 9.490842490842492e-06, "loss": 41.3662, "step": 5321 }, { "epoch": 126.71641791044776, "grad_norm": 31.722883224487305, "learning_rate": 9.48901098901099e-06, "loss": 41.3887, "step": 5322 }, { "epoch": 126.74029850746268, "grad_norm": 26.18402862548828, "learning_rate": 9.487179487179487e-06, "loss": 41.5819, "step": 5323 }, { "epoch": 126.7641791044776, "grad_norm": 29.256437301635742, "learning_rate": 9.485347985347985e-06, "loss": 41.0093, "step": 5324 }, { "epoch": 126.78805970149254, "grad_norm": 26.79650115966797, "learning_rate": 9.483516483516483e-06, "loss": 39.9968, "step": 5325 }, { "epoch": 126.81194029850747, "grad_norm": 27.056190490722656, "learning_rate": 9.481684981684983e-06, "loss": 39.1216, "step": 5326 }, { "epoch": 126.83582089552239, "grad_norm": 22.335859298706055, "learning_rate": 9.47985347985348e-06, "loss": 40.7778, "step": 5327 }, { "epoch": 126.85970149253731, "grad_norm": NaN, "learning_rate": 9.478021978021978e-06, "loss": 58.1092, "step": 5328 }, { "epoch": 126.88358208955223, "grad_norm": 24.419767379760742, "learning_rate": 9.478021978021978e-06, "loss": 40.4599, "step": 5329 }, { "epoch": 126.90746268656716, "grad_norm": 20.041467666625977, "learning_rate": 9.476190476190476e-06, "loss": 40.1727, "step": 5330 }, { "epoch": 126.9313432835821, "grad_norm": 26.40553855895996, "learning_rate": 9.474358974358974e-06, "loss": 41.0726, "step": 5331 }, { "epoch": 126.95522388059702, "grad_norm": 24.665653228759766, "learning_rate": 9.472527472527474e-06, "loss": 42.0728, "step": 5332 }, { "epoch": 126.97910447761194, "grad_norm": 21.605026245117188, "learning_rate": 9.470695970695972e-06, "loss": 40.6338, "step": 5333 }, { "epoch": 127.0, "grad_norm": 20.224733352661133, "learning_rate": 9.46886446886447e-06, "loss": 35.2993, "step": 5334 }, { "epoch": 127.02388059701492, "grad_norm": 21.891176223754883, "learning_rate": 9.467032967032967e-06, "loss": 40.7857, "step": 5335 }, { "epoch": 127.04776119402985, "grad_norm": 24.02487564086914, "learning_rate": 9.465201465201465e-06, "loss": 39.9996, "step": 5336 }, { "epoch": 127.07164179104478, "grad_norm": 26.67331314086914, "learning_rate": 9.463369963369963e-06, "loss": 41.6034, "step": 5337 }, { "epoch": 127.0955223880597, "grad_norm": 18.6497802734375, "learning_rate": 9.461538461538463e-06, "loss": 42.0516, "step": 5338 }, { "epoch": 127.11940298507463, "grad_norm": 31.833471298217773, "learning_rate": 9.45970695970696e-06, "loss": 40.597, "step": 5339 }, { "epoch": 127.14328358208955, "grad_norm": 27.305522918701172, "learning_rate": 9.457875457875458e-06, "loss": 40.7429, "step": 5340 }, { "epoch": 127.16716417910447, "grad_norm": 29.530677795410156, "learning_rate": 9.456043956043956e-06, "loss": 40.4873, "step": 5341 }, { "epoch": 127.1910447761194, "grad_norm": 29.030101776123047, "learning_rate": 9.454212454212454e-06, "loss": 39.0437, "step": 5342 }, { "epoch": 127.21492537313434, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 60.0619, "step": 5343 }, { "epoch": 127.23880597014926, "grad_norm": 31.528074264526367, "learning_rate": 9.452380952380952e-06, "loss": 40.4362, "step": 5344 }, { "epoch": 127.26268656716418, "grad_norm": 28.54173469543457, "learning_rate": 9.450549450549452e-06, "loss": 41.0408, "step": 5345 }, { "epoch": 127.2865671641791, "grad_norm": 33.24728775024414, "learning_rate": 9.44871794871795e-06, "loss": 40.6743, "step": 5346 }, { "epoch": 127.31044776119403, "grad_norm": 29.59555435180664, "learning_rate": 9.446886446886447e-06, "loss": 40.3809, "step": 5347 }, { "epoch": 127.33432835820895, "grad_norm": 32.15523147583008, "learning_rate": 9.445054945054945e-06, "loss": 40.6554, "step": 5348 }, { "epoch": 127.35820895522389, "grad_norm": 31.1282901763916, "learning_rate": 9.443223443223443e-06, "loss": 39.8469, "step": 5349 }, { "epoch": 127.38208955223881, "grad_norm": 30.29323959350586, "learning_rate": 9.441391941391943e-06, "loss": 40.155, "step": 5350 }, { "epoch": 127.40597014925373, "grad_norm": 28.87554931640625, "learning_rate": 9.43956043956044e-06, "loss": 39.2159, "step": 5351 }, { "epoch": 127.42985074626866, "grad_norm": 28.77309226989746, "learning_rate": 9.437728937728938e-06, "loss": 40.7679, "step": 5352 }, { "epoch": 127.45373134328358, "grad_norm": 24.797941207885742, "learning_rate": 9.435897435897436e-06, "loss": 40.2341, "step": 5353 }, { "epoch": 127.4776119402985, "grad_norm": 30.226261138916016, "learning_rate": 9.434065934065934e-06, "loss": 41.5231, "step": 5354 }, { "epoch": 127.50149253731344, "grad_norm": 23.113122940063477, "learning_rate": 9.432234432234432e-06, "loss": 40.5916, "step": 5355 }, { "epoch": 127.52537313432836, "grad_norm": 36.03447341918945, "learning_rate": 9.430402930402932e-06, "loss": 40.8739, "step": 5356 }, { "epoch": 127.54925373134328, "grad_norm": 32.825225830078125, "learning_rate": 9.42857142857143e-06, "loss": 40.2842, "step": 5357 }, { "epoch": 127.57313432835821, "grad_norm": 26.586402893066406, "learning_rate": 9.426739926739927e-06, "loss": 40.3329, "step": 5358 }, { "epoch": 127.59701492537313, "grad_norm": 25.394254684448242, "learning_rate": 9.424908424908425e-06, "loss": 41.4049, "step": 5359 }, { "epoch": 127.62089552238805, "grad_norm": 28.440998077392578, "learning_rate": 9.423076923076923e-06, "loss": 39.6021, "step": 5360 }, { "epoch": 127.64477611940299, "grad_norm": 25.379180908203125, "learning_rate": 9.421245421245421e-06, "loss": 40.6451, "step": 5361 }, { "epoch": 127.66865671641791, "grad_norm": 33.607208251953125, "learning_rate": 9.41941391941392e-06, "loss": 41.685, "step": 5362 }, { "epoch": 127.69253731343284, "grad_norm": 24.925783157348633, "learning_rate": 9.417582417582418e-06, "loss": 41.2479, "step": 5363 }, { "epoch": 127.71641791044776, "grad_norm": 34.97409439086914, "learning_rate": 9.415750915750916e-06, "loss": 40.9911, "step": 5364 }, { "epoch": 127.74029850746268, "grad_norm": 25.85514259338379, "learning_rate": 9.413919413919414e-06, "loss": 39.0472, "step": 5365 }, { "epoch": 127.7641791044776, "grad_norm": 32.1847038269043, "learning_rate": 9.412087912087912e-06, "loss": 40.6535, "step": 5366 }, { "epoch": 127.78805970149254, "grad_norm": 29.844226837158203, "learning_rate": 9.410256410256412e-06, "loss": 39.3755, "step": 5367 }, { "epoch": 127.81194029850747, "grad_norm": 31.974084854125977, "learning_rate": 9.40842490842491e-06, "loss": 41.1137, "step": 5368 }, { "epoch": 127.83582089552239, "grad_norm": 30.38601303100586, "learning_rate": 9.406593406593407e-06, "loss": 40.9944, "step": 5369 }, { "epoch": 127.85970149253731, "grad_norm": 25.314817428588867, "learning_rate": 9.404761904761905e-06, "loss": 40.0512, "step": 5370 }, { "epoch": 127.88358208955223, "grad_norm": 26.56514549255371, "learning_rate": 9.402930402930403e-06, "loss": 40.3742, "step": 5371 }, { "epoch": 127.90746268656716, "grad_norm": 27.275182723999023, "learning_rate": 9.401098901098901e-06, "loss": 41.387, "step": 5372 }, { "epoch": 127.9313432835821, "grad_norm": 24.307111740112305, "learning_rate": 9.3992673992674e-06, "loss": 41.2045, "step": 5373 }, { "epoch": 127.95522388059702, "grad_norm": 34.821327209472656, "learning_rate": 9.397435897435899e-06, "loss": 40.7932, "step": 5374 }, { "epoch": 127.97910447761194, "grad_norm": 31.300153732299805, "learning_rate": 9.395604395604396e-06, "loss": 41.0882, "step": 5375 }, { "epoch": 128.0, "grad_norm": 25.360746383666992, "learning_rate": 9.393772893772894e-06, "loss": 35.6565, "step": 5376 }, { "epoch": 128.02388059701494, "grad_norm": 32.15913772583008, "learning_rate": 9.391941391941392e-06, "loss": 39.5816, "step": 5377 }, { "epoch": 128.04776119402985, "grad_norm": 22.382192611694336, "learning_rate": 9.39010989010989e-06, "loss": 41.6509, "step": 5378 }, { "epoch": 128.07164179104478, "grad_norm": 23.480571746826172, "learning_rate": 9.38827838827839e-06, "loss": 40.7536, "step": 5379 }, { "epoch": 128.0955223880597, "grad_norm": 19.44637107849121, "learning_rate": 9.386446886446887e-06, "loss": 40.499, "step": 5380 }, { "epoch": 128.11940298507463, "grad_norm": 20.136741638183594, "learning_rate": 9.384615384615385e-06, "loss": 41.3551, "step": 5381 }, { "epoch": 128.14328358208957, "grad_norm": 20.516332626342773, "learning_rate": 9.382783882783883e-06, "loss": 40.7676, "step": 5382 }, { "epoch": 128.16716417910447, "grad_norm": 18.942041397094727, "learning_rate": 9.380952380952381e-06, "loss": 40.5404, "step": 5383 }, { "epoch": 128.1910447761194, "grad_norm": 22.05898666381836, "learning_rate": 9.37912087912088e-06, "loss": 40.9921, "step": 5384 }, { "epoch": 128.21492537313432, "grad_norm": 15.969873428344727, "learning_rate": 9.377289377289379e-06, "loss": 40.1066, "step": 5385 }, { "epoch": 128.23880597014926, "grad_norm": 18.438854217529297, "learning_rate": 9.375457875457876e-06, "loss": 39.8564, "step": 5386 }, { "epoch": 128.26268656716417, "grad_norm": 18.526012420654297, "learning_rate": 9.373626373626374e-06, "loss": 40.388, "step": 5387 }, { "epoch": 128.2865671641791, "grad_norm": 13.87939167022705, "learning_rate": 9.371794871794872e-06, "loss": 41.2088, "step": 5388 }, { "epoch": 128.31044776119404, "grad_norm": 19.515592575073242, "learning_rate": 9.36996336996337e-06, "loss": 40.3321, "step": 5389 }, { "epoch": 128.33432835820895, "grad_norm": 17.547893524169922, "learning_rate": 9.36813186813187e-06, "loss": 40.0459, "step": 5390 }, { "epoch": 128.3582089552239, "grad_norm": 24.08388900756836, "learning_rate": 9.366300366300367e-06, "loss": 40.7233, "step": 5391 }, { "epoch": 128.3820895522388, "grad_norm": 25.02381134033203, "learning_rate": 9.364468864468865e-06, "loss": 41.4629, "step": 5392 }, { "epoch": 128.40597014925373, "grad_norm": 17.845233917236328, "learning_rate": 9.362637362637363e-06, "loss": 40.136, "step": 5393 }, { "epoch": 128.42985074626867, "grad_norm": 24.73293685913086, "learning_rate": 9.360805860805861e-06, "loss": 40.1744, "step": 5394 }, { "epoch": 128.45373134328358, "grad_norm": 18.738384246826172, "learning_rate": 9.358974358974359e-06, "loss": 40.9566, "step": 5395 }, { "epoch": 128.47761194029852, "grad_norm": 22.628456115722656, "learning_rate": 9.357142857142859e-06, "loss": 39.9645, "step": 5396 }, { "epoch": 128.50149253731342, "grad_norm": 19.057598114013672, "learning_rate": 9.355311355311356e-06, "loss": 38.6498, "step": 5397 }, { "epoch": 128.52537313432836, "grad_norm": 20.58139419555664, "learning_rate": 9.353479853479854e-06, "loss": 41.7546, "step": 5398 }, { "epoch": 128.54925373134327, "grad_norm": 23.596145629882812, "learning_rate": 9.351648351648352e-06, "loss": 39.7231, "step": 5399 }, { "epoch": 128.5731343283582, "grad_norm": 18.677183151245117, "learning_rate": 9.34981684981685e-06, "loss": 39.6687, "step": 5400 }, { "epoch": 128.59701492537314, "grad_norm": 22.48053550720215, "learning_rate": 9.34798534798535e-06, "loss": 41.1109, "step": 5401 }, { "epoch": 128.62089552238805, "grad_norm": 18.408390045166016, "learning_rate": 9.346153846153847e-06, "loss": 40.0313, "step": 5402 }, { "epoch": 128.644776119403, "grad_norm": 18.866302490234375, "learning_rate": 9.344322344322345e-06, "loss": 41.4068, "step": 5403 }, { "epoch": 128.6686567164179, "grad_norm": 18.15769386291504, "learning_rate": 9.342490842490843e-06, "loss": 40.0289, "step": 5404 }, { "epoch": 128.69253731343284, "grad_norm": 21.213743209838867, "learning_rate": 9.340659340659341e-06, "loss": 41.5406, "step": 5405 }, { "epoch": 128.71641791044777, "grad_norm": 14.050131797790527, "learning_rate": 9.338827838827839e-06, "loss": 40.9447, "step": 5406 }, { "epoch": 128.74029850746268, "grad_norm": 20.822832107543945, "learning_rate": 9.336996336996339e-06, "loss": 40.7616, "step": 5407 }, { "epoch": 128.76417910447762, "grad_norm": 16.915830612182617, "learning_rate": 9.335164835164836e-06, "loss": 39.8529, "step": 5408 }, { "epoch": 128.78805970149253, "grad_norm": 24.053998947143555, "learning_rate": 9.333333333333334e-06, "loss": 41.2986, "step": 5409 }, { "epoch": 128.81194029850747, "grad_norm": 21.357769012451172, "learning_rate": 9.331501831501832e-06, "loss": 41.6824, "step": 5410 }, { "epoch": 128.83582089552237, "grad_norm": 16.31240463256836, "learning_rate": 9.32967032967033e-06, "loss": 40.1068, "step": 5411 }, { "epoch": 128.8597014925373, "grad_norm": 18.027111053466797, "learning_rate": 9.327838827838828e-06, "loss": 39.9807, "step": 5412 }, { "epoch": 128.88358208955225, "grad_norm": 17.471216201782227, "learning_rate": 9.326007326007328e-06, "loss": 40.1997, "step": 5413 }, { "epoch": 128.90746268656716, "grad_norm": 14.707521438598633, "learning_rate": 9.324175824175825e-06, "loss": 40.2096, "step": 5414 }, { "epoch": 128.9313432835821, "grad_norm": 16.55643081665039, "learning_rate": 9.322344322344323e-06, "loss": 41.4804, "step": 5415 }, { "epoch": 128.955223880597, "grad_norm": 17.47356414794922, "learning_rate": 9.320512820512821e-06, "loss": 40.0436, "step": 5416 }, { "epoch": 128.97910447761194, "grad_norm": 18.92135238647461, "learning_rate": 9.318681318681319e-06, "loss": 40.1894, "step": 5417 }, { "epoch": 129.0, "grad_norm": 17.002300262451172, "learning_rate": 9.316849816849819e-06, "loss": 36.4327, "step": 5418 }, { "epoch": 129.02388059701494, "grad_norm": 18.210742950439453, "learning_rate": 9.315018315018316e-06, "loss": 40.8625, "step": 5419 }, { "epoch": 129.04776119402985, "grad_norm": 17.84212875366211, "learning_rate": 9.313186813186814e-06, "loss": 39.4351, "step": 5420 }, { "epoch": 129.07164179104478, "grad_norm": 16.789724349975586, "learning_rate": 9.311355311355312e-06, "loss": 40.7753, "step": 5421 }, { "epoch": 129.0955223880597, "grad_norm": 16.961986541748047, "learning_rate": 9.30952380952381e-06, "loss": 39.5132, "step": 5422 }, { "epoch": 129.11940298507463, "grad_norm": 19.639286041259766, "learning_rate": 9.307692307692308e-06, "loss": 41.1626, "step": 5423 }, { "epoch": 129.14328358208957, "grad_norm": 15.542900085449219, "learning_rate": 9.305860805860808e-06, "loss": 37.8755, "step": 5424 }, { "epoch": 129.16716417910447, "grad_norm": 21.57238006591797, "learning_rate": 9.304029304029305e-06, "loss": 40.1871, "step": 5425 }, { "epoch": 129.1910447761194, "grad_norm": 18.78668785095215, "learning_rate": 9.302197802197803e-06, "loss": 40.0839, "step": 5426 }, { "epoch": 129.21492537313432, "grad_norm": 19.953189849853516, "learning_rate": 9.300366300366301e-06, "loss": 39.1708, "step": 5427 }, { "epoch": 129.23880597014926, "grad_norm": 19.159618377685547, "learning_rate": 9.298534798534799e-06, "loss": 40.0572, "step": 5428 }, { "epoch": 129.26268656716417, "grad_norm": 20.108295440673828, "learning_rate": 9.296703296703297e-06, "loss": 40.5194, "step": 5429 }, { "epoch": 129.2865671641791, "grad_norm": 18.625139236450195, "learning_rate": 9.294871794871796e-06, "loss": 39.8611, "step": 5430 }, { "epoch": 129.31044776119404, "grad_norm": 23.48390007019043, "learning_rate": 9.293040293040294e-06, "loss": 39.9747, "step": 5431 }, { "epoch": 129.33432835820895, "grad_norm": 17.067564010620117, "learning_rate": 9.291208791208792e-06, "loss": 40.5828, "step": 5432 }, { "epoch": 129.3582089552239, "grad_norm": 24.928804397583008, "learning_rate": 9.28937728937729e-06, "loss": 41.1937, "step": 5433 }, { "epoch": 129.3820895522388, "grad_norm": 20.61871910095215, "learning_rate": 9.287545787545788e-06, "loss": 40.7314, "step": 5434 }, { "epoch": 129.40597014925373, "grad_norm": 28.40680694580078, "learning_rate": 9.285714285714288e-06, "loss": 40.6506, "step": 5435 }, { "epoch": 129.42985074626867, "grad_norm": 22.84246253967285, "learning_rate": 9.283882783882785e-06, "loss": 41.2816, "step": 5436 }, { "epoch": 129.45373134328358, "grad_norm": 29.882131576538086, "learning_rate": 9.282051282051283e-06, "loss": 40.7815, "step": 5437 }, { "epoch": 129.47761194029852, "grad_norm": 21.24380111694336, "learning_rate": 9.280219780219781e-06, "loss": 41.2894, "step": 5438 }, { "epoch": 129.50149253731342, "grad_norm": 23.89835548400879, "learning_rate": 9.278388278388279e-06, "loss": 40.1416, "step": 5439 }, { "epoch": 129.52537313432836, "grad_norm": 21.517475128173828, "learning_rate": 9.276556776556777e-06, "loss": 41.05, "step": 5440 }, { "epoch": 129.54925373134327, "grad_norm": 17.225387573242188, "learning_rate": 9.274725274725277e-06, "loss": 41.058, "step": 5441 }, { "epoch": 129.5731343283582, "grad_norm": 17.844186782836914, "learning_rate": 9.272893772893774e-06, "loss": 41.5639, "step": 5442 }, { "epoch": 129.59701492537314, "grad_norm": 18.40740394592285, "learning_rate": 9.271062271062272e-06, "loss": 40.1886, "step": 5443 }, { "epoch": 129.62089552238805, "grad_norm": 16.104562759399414, "learning_rate": 9.26923076923077e-06, "loss": 40.9611, "step": 5444 }, { "epoch": 129.644776119403, "grad_norm": 15.872597694396973, "learning_rate": 9.267399267399268e-06, "loss": 41.0908, "step": 5445 }, { "epoch": 129.6686567164179, "grad_norm": 16.474458694458008, "learning_rate": 9.265567765567766e-06, "loss": 39.5688, "step": 5446 }, { "epoch": 129.69253731343284, "grad_norm": 18.232454299926758, "learning_rate": 9.263736263736265e-06, "loss": 40.8028, "step": 5447 }, { "epoch": 129.71641791044777, "grad_norm": 16.598379135131836, "learning_rate": 9.261904761904763e-06, "loss": 40.5047, "step": 5448 }, { "epoch": 129.74029850746268, "grad_norm": 19.88945770263672, "learning_rate": 9.260073260073261e-06, "loss": 41.5815, "step": 5449 }, { "epoch": 129.76417910447762, "grad_norm": 18.88849449157715, "learning_rate": 9.258241758241759e-06, "loss": 41.1635, "step": 5450 }, { "epoch": 129.78805970149253, "grad_norm": 16.19620704650879, "learning_rate": 9.256410256410257e-06, "loss": 39.988, "step": 5451 }, { "epoch": 129.81194029850747, "grad_norm": 17.755510330200195, "learning_rate": 9.254578754578757e-06, "loss": 39.5852, "step": 5452 }, { "epoch": 129.83582089552237, "grad_norm": 18.566909790039062, "learning_rate": 9.252747252747254e-06, "loss": 40.5909, "step": 5453 }, { "epoch": 129.8597014925373, "grad_norm": NaN, "learning_rate": 9.250915750915752e-06, "loss": 36.0401, "step": 5454 }, { "epoch": 129.88358208955225, "grad_norm": 16.708894729614258, "learning_rate": 9.250915750915752e-06, "loss": 40.8427, "step": 5455 }, { "epoch": 129.90746268656716, "grad_norm": 18.091861724853516, "learning_rate": 9.24908424908425e-06, "loss": 40.9939, "step": 5456 }, { "epoch": 129.9313432835821, "grad_norm": 19.519044876098633, "learning_rate": 9.247252747252748e-06, "loss": 40.0013, "step": 5457 }, { "epoch": 129.955223880597, "grad_norm": 15.587594032287598, "learning_rate": 9.245421245421246e-06, "loss": 40.2778, "step": 5458 }, { "epoch": 129.97910447761194, "grad_norm": 19.010610580444336, "learning_rate": 9.243589743589745e-06, "loss": 40.7324, "step": 5459 }, { "epoch": 130.0, "grad_norm": 16.66925048828125, "learning_rate": 9.241758241758243e-06, "loss": 35.6576, "step": 5460 }, { "epoch": 130.0, "step": 5460, "total_flos": 2.6841554727339034e+17, "train_loss": 3.138686427441272, "train_runtime": 12817.2093, "train_samples_per_second": 54.283, "train_steps_per_second": 0.426 }, { "epoch": 130.02388059701494, "grad_norm": 18.575708389282227, "learning_rate": 1e-05, "loss": 40.4192, "step": 5461 }, { "epoch": 130.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998299319727893e-06, "loss": 45.3289, "step": 5462 }, { "epoch": 130.07164179104478, "grad_norm": 228.45680236816406, "learning_rate": 9.998299319727893e-06, "loss": 44.984, "step": 5463 }, { "epoch": 130.0955223880597, "grad_norm": 129.42657470703125, "learning_rate": 9.996598639455783e-06, "loss": 44.7713, "step": 5464 }, { "epoch": 130.11940298507463, "grad_norm": 55.86802291870117, "learning_rate": 9.994897959183675e-06, "loss": 43.4058, "step": 5465 }, { "epoch": 130.14328358208957, "grad_norm": 93.52910614013672, "learning_rate": 9.993197278911566e-06, "loss": 42.3555, "step": 5466 }, { "epoch": 130.16716417910447, "grad_norm": 49.01381301879883, "learning_rate": 9.991496598639456e-06, "loss": 41.0505, "step": 5467 }, { "epoch": 130.1910447761194, "grad_norm": 54.29282760620117, "learning_rate": 9.989795918367348e-06, "loss": 40.938, "step": 5468 }, { "epoch": 130.21492537313432, "grad_norm": 37.95635223388672, "learning_rate": 9.988095238095239e-06, "loss": 40.3188, "step": 5469 }, { "epoch": 130.23880597014926, "grad_norm": 36.652523040771484, "learning_rate": 9.98639455782313e-06, "loss": 41.5117, "step": 5470 }, { "epoch": 130.26268656716417, "grad_norm": 40.90021896362305, "learning_rate": 9.984693877551021e-06, "loss": 42.5635, "step": 5471 }, { "epoch": 130.2865671641791, "grad_norm": 28.823503494262695, "learning_rate": 9.982993197278913e-06, "loss": 41.404, "step": 5472 }, { "epoch": 130.31044776119404, "grad_norm": 24.62152862548828, "learning_rate": 9.981292517006804e-06, "loss": 40.9372, "step": 5473 }, { "epoch": 130.33432835820895, "grad_norm": 29.644268035888672, "learning_rate": 9.979591836734694e-06, "loss": 40.5455, "step": 5474 }, { "epoch": 130.3582089552239, "grad_norm": 21.859779357910156, "learning_rate": 9.977891156462586e-06, "loss": 41.4169, "step": 5475 }, { "epoch": 130.3820895522388, "grad_norm": 23.489789962768555, "learning_rate": 9.976190476190477e-06, "loss": 39.8592, "step": 5476 }, { "epoch": 130.40597014925373, "grad_norm": 18.39851951599121, "learning_rate": 9.974489795918369e-06, "loss": 39.969, "step": 5477 }, { "epoch": 130.42985074626867, "grad_norm": 25.369873046875, "learning_rate": 9.972789115646259e-06, "loss": 39.7739, "step": 5478 }, { "epoch": 130.45373134328358, "grad_norm": 22.13943862915039, "learning_rate": 9.97108843537415e-06, "loss": 40.0817, "step": 5479 }, { "epoch": 130.47761194029852, "grad_norm": 22.7308292388916, "learning_rate": 9.969387755102042e-06, "loss": 41.4501, "step": 5480 }, { "epoch": 130.50149253731342, "grad_norm": 16.09027671813965, "learning_rate": 9.967687074829932e-06, "loss": 41.1944, "step": 5481 }, { "epoch": 130.52537313432836, "grad_norm": 20.12171745300293, "learning_rate": 9.965986394557824e-06, "loss": 40.5477, "step": 5482 }, { "epoch": 130.54925373134327, "grad_norm": 18.88404655456543, "learning_rate": 9.964285714285714e-06, "loss": 39.402, "step": 5483 }, { "epoch": 130.5731343283582, "grad_norm": 20.32000732421875, "learning_rate": 9.962585034013607e-06, "loss": 40.6678, "step": 5484 }, { "epoch": 130.59701492537314, "grad_norm": 20.351774215698242, "learning_rate": 9.960884353741499e-06, "loss": 40.075, "step": 5485 }, { "epoch": 130.62089552238805, "grad_norm": 14.881600379943848, "learning_rate": 9.959183673469387e-06, "loss": 40.0113, "step": 5486 }, { "epoch": 130.644776119403, "grad_norm": 21.500431060791016, "learning_rate": 9.95748299319728e-06, "loss": 39.5136, "step": 5487 }, { "epoch": 130.6686567164179, "grad_norm": 20.59417152404785, "learning_rate": 9.955782312925172e-06, "loss": 40.3664, "step": 5488 }, { "epoch": 130.69253731343284, "grad_norm": 20.141138076782227, "learning_rate": 9.954081632653062e-06, "loss": 40.8991, "step": 5489 }, { "epoch": 130.71641791044777, "grad_norm": 16.41176986694336, "learning_rate": 9.952380952380954e-06, "loss": 40.3613, "step": 5490 }, { "epoch": 130.74029850746268, "grad_norm": 20.832176208496094, "learning_rate": 9.950680272108844e-06, "loss": 40.709, "step": 5491 }, { "epoch": 130.76417910447762, "grad_norm": 18.355520248413086, "learning_rate": 9.948979591836737e-06, "loss": 41.1332, "step": 5492 }, { "epoch": 130.78805970149253, "grad_norm": 21.0073299407959, "learning_rate": 9.947278911564627e-06, "loss": 40.496, "step": 5493 }, { "epoch": 130.81194029850747, "grad_norm": 18.184412002563477, "learning_rate": 9.945578231292517e-06, "loss": 40.9514, "step": 5494 }, { "epoch": 130.83582089552237, "grad_norm": 23.272981643676758, "learning_rate": 9.94387755102041e-06, "loss": 40.2932, "step": 5495 }, { "epoch": 130.8597014925373, "grad_norm": 16.066865921020508, "learning_rate": 9.9421768707483e-06, "loss": 41.2145, "step": 5496 }, { "epoch": 130.88358208955225, "grad_norm": 19.863813400268555, "learning_rate": 9.940476190476192e-06, "loss": 40.9969, "step": 5497 }, { "epoch": 130.90746268656716, "grad_norm": 20.84225082397461, "learning_rate": 9.938775510204082e-06, "loss": 40.1575, "step": 5498 }, { "epoch": 130.9313432835821, "grad_norm": 16.452865600585938, "learning_rate": 9.937074829931974e-06, "loss": 38.9115, "step": 5499 }, { "epoch": 130.955223880597, "grad_norm": 19.1783390045166, "learning_rate": 9.935374149659865e-06, "loss": 40.7441, "step": 5500 }, { "epoch": 130.97910447761194, "grad_norm": 21.94544219970703, "learning_rate": 9.933673469387755e-06, "loss": 41.8275, "step": 5501 }, { "epoch": 131.0, "grad_norm": 13.472136497497559, "learning_rate": 9.931972789115647e-06, "loss": 34.5508, "step": 5502 }, { "epoch": 131.02388059701494, "grad_norm": 18.82528305053711, "learning_rate": 9.930272108843538e-06, "loss": 41.3352, "step": 5503 }, { "epoch": 131.04776119402985, "grad_norm": 16.815523147583008, "learning_rate": 9.92857142857143e-06, "loss": 39.1606, "step": 5504 }, { "epoch": 131.07164179104478, "grad_norm": 18.014087677001953, "learning_rate": 9.92687074829932e-06, "loss": 41.389, "step": 5505 }, { "epoch": 131.0955223880597, "grad_norm": 18.153976440429688, "learning_rate": 9.92517006802721e-06, "loss": 41.0835, "step": 5506 }, { "epoch": 131.11940298507463, "grad_norm": 16.97452163696289, "learning_rate": 9.923469387755103e-06, "loss": 41.149, "step": 5507 }, { "epoch": 131.14328358208957, "grad_norm": 16.83989143371582, "learning_rate": 9.921768707482993e-06, "loss": 40.9826, "step": 5508 }, { "epoch": 131.16716417910447, "grad_norm": 15.62459659576416, "learning_rate": 9.920068027210885e-06, "loss": 41.0703, "step": 5509 }, { "epoch": 131.1910447761194, "grad_norm": 14.438183784484863, "learning_rate": 9.918367346938776e-06, "loss": 41.3628, "step": 5510 }, { "epoch": 131.21492537313432, "grad_norm": 23.413602828979492, "learning_rate": 9.916666666666668e-06, "loss": 40.0985, "step": 5511 }, { "epoch": 131.23880597014926, "grad_norm": 19.558998107910156, "learning_rate": 9.91496598639456e-06, "loss": 40.2111, "step": 5512 }, { "epoch": 131.26268656716417, "grad_norm": 12.165032386779785, "learning_rate": 9.913265306122449e-06, "loss": 40.6546, "step": 5513 }, { "epoch": 131.2865671641791, "grad_norm": 18.257869720458984, "learning_rate": 9.91156462585034e-06, "loss": 42.2008, "step": 5514 }, { "epoch": 131.31044776119404, "grad_norm": 21.828651428222656, "learning_rate": 9.909863945578233e-06, "loss": 39.9445, "step": 5515 }, { "epoch": 131.33432835820895, "grad_norm": 16.42722511291504, "learning_rate": 9.908163265306123e-06, "loss": 40.4844, "step": 5516 }, { "epoch": 131.3582089552239, "grad_norm": 15.241820335388184, "learning_rate": 9.906462585034015e-06, "loss": 39.5317, "step": 5517 }, { "epoch": 131.3820895522388, "grad_norm": 14.893516540527344, "learning_rate": 9.904761904761906e-06, "loss": 40.9785, "step": 5518 }, { "epoch": 131.40597014925373, "grad_norm": 15.520328521728516, "learning_rate": 9.903061224489798e-06, "loss": 40.2672, "step": 5519 }, { "epoch": 131.42985074626867, "grad_norm": 20.18976402282715, "learning_rate": 9.901360544217688e-06, "loss": 39.9149, "step": 5520 }, { "epoch": 131.45373134328358, "grad_norm": 17.364437103271484, "learning_rate": 9.899659863945579e-06, "loss": 39.2209, "step": 5521 }, { "epoch": 131.47761194029852, "grad_norm": 18.58165168762207, "learning_rate": 9.89795918367347e-06, "loss": 41.0448, "step": 5522 }, { "epoch": 131.50149253731342, "grad_norm": 14.185492515563965, "learning_rate": 9.896258503401361e-06, "loss": 40.0128, "step": 5523 }, { "epoch": 131.52537313432836, "grad_norm": 18.741228103637695, "learning_rate": 9.894557823129253e-06, "loss": 40.4496, "step": 5524 }, { "epoch": 131.54925373134327, "grad_norm": 16.92427635192871, "learning_rate": 9.892857142857143e-06, "loss": 39.8989, "step": 5525 }, { "epoch": 131.5731343283582, "grad_norm": 18.248445510864258, "learning_rate": 9.891156462585036e-06, "loss": 39.7473, "step": 5526 }, { "epoch": 131.59701492537314, "grad_norm": 18.88313865661621, "learning_rate": 9.889455782312926e-06, "loss": 40.1553, "step": 5527 }, { "epoch": 131.62089552238805, "grad_norm": 19.88404655456543, "learning_rate": 9.887755102040816e-06, "loss": 40.9204, "step": 5528 }, { "epoch": 131.644776119403, "grad_norm": 17.367191314697266, "learning_rate": 9.886054421768708e-06, "loss": 39.9127, "step": 5529 }, { "epoch": 131.6686567164179, "grad_norm": 18.826900482177734, "learning_rate": 9.884353741496599e-06, "loss": 40.3812, "step": 5530 }, { "epoch": 131.69253731343284, "grad_norm": 16.4368953704834, "learning_rate": 9.882653061224491e-06, "loss": 40.536, "step": 5531 }, { "epoch": 131.71641791044777, "grad_norm": 17.32594108581543, "learning_rate": 9.880952380952381e-06, "loss": 41.1266, "step": 5532 }, { "epoch": 131.74029850746268, "grad_norm": 25.614294052124023, "learning_rate": 9.879251700680272e-06, "loss": 40.5684, "step": 5533 }, { "epoch": 131.76417910447762, "grad_norm": 18.330366134643555, "learning_rate": 9.877551020408164e-06, "loss": 40.9372, "step": 5534 }, { "epoch": 131.78805970149253, "grad_norm": 17.743070602416992, "learning_rate": 9.875850340136054e-06, "loss": 40.3854, "step": 5535 }, { "epoch": 131.81194029850747, "grad_norm": 20.566457748413086, "learning_rate": 9.874149659863946e-06, "loss": 40.7082, "step": 5536 }, { "epoch": 131.83582089552237, "grad_norm": 24.459489822387695, "learning_rate": 9.872448979591838e-06, "loss": 41.2391, "step": 5537 }, { "epoch": 131.8597014925373, "grad_norm": 15.706886291503906, "learning_rate": 9.870748299319729e-06, "loss": 39.2229, "step": 5538 }, { "epoch": 131.88358208955225, "grad_norm": 18.37516975402832, "learning_rate": 9.869047619047621e-06, "loss": 39.3149, "step": 5539 }, { "epoch": 131.90746268656716, "grad_norm": 18.098176956176758, "learning_rate": 9.867346938775511e-06, "loss": 39.4227, "step": 5540 }, { "epoch": 131.9313432835821, "grad_norm": 14.898340225219727, "learning_rate": 9.865646258503402e-06, "loss": 39.4192, "step": 5541 }, { "epoch": 131.955223880597, "grad_norm": 21.42721176147461, "learning_rate": 9.863945578231294e-06, "loss": 40.879, "step": 5542 }, { "epoch": 131.97910447761194, "grad_norm": 19.07784652709961, "learning_rate": 9.862244897959184e-06, "loss": 40.1433, "step": 5543 }, { "epoch": 132.0, "grad_norm": 16.04290199279785, "learning_rate": 9.860544217687076e-06, "loss": 34.6443, "step": 5544 }, { "epoch": 132.02388059701494, "grad_norm": 17.641210556030273, "learning_rate": 9.858843537414967e-06, "loss": 40.6522, "step": 5545 }, { "epoch": 132.04776119402985, "grad_norm": 22.368637084960938, "learning_rate": 9.857142857142859e-06, "loss": 41.3016, "step": 5546 }, { "epoch": 132.07164179104478, "grad_norm": 19.166706085205078, "learning_rate": 9.85544217687075e-06, "loss": 40.4856, "step": 5547 }, { "epoch": 132.0955223880597, "grad_norm": 20.525333404541016, "learning_rate": 9.85374149659864e-06, "loss": 41.073, "step": 5548 }, { "epoch": 132.11940298507463, "grad_norm": 19.90342140197754, "learning_rate": 9.852040816326532e-06, "loss": 39.3624, "step": 5549 }, { "epoch": 132.14328358208957, "grad_norm": 15.503653526306152, "learning_rate": 9.850340136054422e-06, "loss": 41.8662, "step": 5550 }, { "epoch": 132.16716417910447, "grad_norm": 15.871750831604004, "learning_rate": 9.848639455782314e-06, "loss": 39.9852, "step": 5551 }, { "epoch": 132.1910447761194, "grad_norm": 25.539695739746094, "learning_rate": 9.846938775510205e-06, "loss": 38.6591, "step": 5552 }, { "epoch": 132.21492537313432, "grad_norm": 20.75281524658203, "learning_rate": 9.845238095238097e-06, "loss": 40.6497, "step": 5553 }, { "epoch": 132.23880597014926, "grad_norm": 17.729764938354492, "learning_rate": 9.843537414965987e-06, "loss": 39.8891, "step": 5554 }, { "epoch": 132.26268656716417, "grad_norm": 16.33675765991211, "learning_rate": 9.841836734693878e-06, "loss": 41.0467, "step": 5555 }, { "epoch": 132.2865671641791, "grad_norm": 16.76603126525879, "learning_rate": 9.84013605442177e-06, "loss": 41.4239, "step": 5556 }, { "epoch": 132.31044776119404, "grad_norm": 20.384475708007812, "learning_rate": 9.83843537414966e-06, "loss": 40.4335, "step": 5557 }, { "epoch": 132.33432835820895, "grad_norm": 16.604625701904297, "learning_rate": 9.836734693877552e-06, "loss": 40.5462, "step": 5558 }, { "epoch": 132.3582089552239, "grad_norm": 19.941377639770508, "learning_rate": 9.835034013605444e-06, "loss": 41.7404, "step": 5559 }, { "epoch": 132.3820895522388, "grad_norm": 15.349913597106934, "learning_rate": 9.833333333333333e-06, "loss": 38.5686, "step": 5560 }, { "epoch": 132.40597014925373, "grad_norm": 21.03326988220215, "learning_rate": 9.831632653061225e-06, "loss": 41.3306, "step": 5561 }, { "epoch": 132.42985074626867, "grad_norm": 18.98489761352539, "learning_rate": 9.829931972789115e-06, "loss": 39.5212, "step": 5562 }, { "epoch": 132.45373134328358, "grad_norm": 15.476447105407715, "learning_rate": 9.828231292517008e-06, "loss": 39.31, "step": 5563 }, { "epoch": 132.47761194029852, "grad_norm": 17.050857543945312, "learning_rate": 9.8265306122449e-06, "loss": 40.1559, "step": 5564 }, { "epoch": 132.50149253731342, "grad_norm": 17.630809783935547, "learning_rate": 9.82482993197279e-06, "loss": 39.8047, "step": 5565 }, { "epoch": 132.52537313432836, "grad_norm": NaN, "learning_rate": 9.823129251700682e-06, "loss": 54.2096, "step": 5566 }, { "epoch": 132.54925373134327, "grad_norm": 18.909269332885742, "learning_rate": 9.823129251700682e-06, "loss": 40.1987, "step": 5567 }, { "epoch": 132.5731343283582, "grad_norm": 20.534330368041992, "learning_rate": 9.821428571428573e-06, "loss": 40.7122, "step": 5568 }, { "epoch": 132.59701492537314, "grad_norm": 16.048946380615234, "learning_rate": 9.819727891156463e-06, "loss": 40.2584, "step": 5569 }, { "epoch": 132.62089552238805, "grad_norm": 14.615914344787598, "learning_rate": 9.818027210884355e-06, "loss": 40.1147, "step": 5570 }, { "epoch": 132.644776119403, "grad_norm": 16.347827911376953, "learning_rate": 9.816326530612245e-06, "loss": 41.0445, "step": 5571 }, { "epoch": 132.6686567164179, "grad_norm": 20.02432632446289, "learning_rate": 9.814625850340137e-06, "loss": 40.1337, "step": 5572 }, { "epoch": 132.69253731343284, "grad_norm": 18.087976455688477, "learning_rate": 9.812925170068028e-06, "loss": 39.9454, "step": 5573 }, { "epoch": 132.71641791044777, "grad_norm": 16.806800842285156, "learning_rate": 9.81122448979592e-06, "loss": 40.7469, "step": 5574 }, { "epoch": 132.74029850746268, "grad_norm": 14.957366943359375, "learning_rate": 9.80952380952381e-06, "loss": 41.9708, "step": 5575 }, { "epoch": 132.76417910447762, "grad_norm": 15.429438591003418, "learning_rate": 9.8078231292517e-06, "loss": 40.5727, "step": 5576 }, { "epoch": 132.78805970149253, "grad_norm": 18.437835693359375, "learning_rate": 9.806122448979593e-06, "loss": 39.3392, "step": 5577 }, { "epoch": 132.81194029850747, "grad_norm": 23.49526023864746, "learning_rate": 9.804421768707483e-06, "loss": 40.8007, "step": 5578 }, { "epoch": 132.83582089552237, "grad_norm": 15.580110549926758, "learning_rate": 9.802721088435375e-06, "loss": 40.2113, "step": 5579 }, { "epoch": 132.8597014925373, "grad_norm": 13.494383811950684, "learning_rate": 9.801020408163266e-06, "loss": 39.5501, "step": 5580 }, { "epoch": 132.88358208955225, "grad_norm": 14.148122787475586, "learning_rate": 9.799319727891158e-06, "loss": 39.5385, "step": 5581 }, { "epoch": 132.90746268656716, "grad_norm": 14.981057167053223, "learning_rate": 9.797619047619048e-06, "loss": 40.1832, "step": 5582 }, { "epoch": 132.9313432835821, "grad_norm": 17.651594161987305, "learning_rate": 9.795918367346939e-06, "loss": 40.8822, "step": 5583 }, { "epoch": 132.955223880597, "grad_norm": 23.53675079345703, "learning_rate": 9.79421768707483e-06, "loss": 40.4374, "step": 5584 }, { "epoch": 132.97910447761194, "grad_norm": 14.797532081604004, "learning_rate": 9.792517006802721e-06, "loss": 40.3035, "step": 5585 }, { "epoch": 133.0, "grad_norm": 19.286834716796875, "learning_rate": 9.790816326530613e-06, "loss": 35.0022, "step": 5586 }, { "epoch": 133.02388059701494, "grad_norm": 25.947200775146484, "learning_rate": 9.789115646258505e-06, "loss": 40.0884, "step": 5587 }, { "epoch": 133.04776119402985, "grad_norm": 17.286386489868164, "learning_rate": 9.787414965986394e-06, "loss": 40.6761, "step": 5588 }, { "epoch": 133.07164179104478, "grad_norm": 16.327795028686523, "learning_rate": 9.785714285714286e-06, "loss": 39.5775, "step": 5589 }, { "epoch": 133.0955223880597, "grad_norm": 25.301265716552734, "learning_rate": 9.784013605442178e-06, "loss": 39.6754, "step": 5590 }, { "epoch": 133.11940298507463, "grad_norm": 18.68819236755371, "learning_rate": 9.782312925170069e-06, "loss": 40.444, "step": 5591 }, { "epoch": 133.14328358208957, "grad_norm": 16.839736938476562, "learning_rate": 9.78061224489796e-06, "loss": 39.586, "step": 5592 }, { "epoch": 133.16716417910447, "grad_norm": 27.723005294799805, "learning_rate": 9.778911564625851e-06, "loss": 40.631, "step": 5593 }, { "epoch": 133.1910447761194, "grad_norm": 16.834030151367188, "learning_rate": 9.777210884353743e-06, "loss": 39.9121, "step": 5594 }, { "epoch": 133.21492537313432, "grad_norm": 16.289016723632812, "learning_rate": 9.775510204081634e-06, "loss": 39.8342, "step": 5595 }, { "epoch": 133.23880597014926, "grad_norm": 23.45367431640625, "learning_rate": 9.773809523809524e-06, "loss": 39.8418, "step": 5596 }, { "epoch": 133.26268656716417, "grad_norm": 18.50150489807129, "learning_rate": 9.772108843537416e-06, "loss": 40.8921, "step": 5597 }, { "epoch": 133.2865671641791, "grad_norm": 15.655564308166504, "learning_rate": 9.770408163265307e-06, "loss": 39.9306, "step": 5598 }, { "epoch": 133.31044776119404, "grad_norm": 23.770095825195312, "learning_rate": 9.768707482993199e-06, "loss": 40.686, "step": 5599 }, { "epoch": 133.33432835820895, "grad_norm": 21.083984375, "learning_rate": 9.767006802721089e-06, "loss": 40.5774, "step": 5600 }, { "epoch": 133.3582089552239, "grad_norm": 14.010787010192871, "learning_rate": 9.765306122448981e-06, "loss": 40.3888, "step": 5601 }, { "epoch": 133.3820895522388, "grad_norm": 29.777660369873047, "learning_rate": 9.763605442176872e-06, "loss": 41.4408, "step": 5602 }, { "epoch": 133.40597014925373, "grad_norm": 19.067794799804688, "learning_rate": 9.761904761904762e-06, "loss": 40.1208, "step": 5603 }, { "epoch": 133.42985074626867, "grad_norm": 30.848791122436523, "learning_rate": 9.760204081632654e-06, "loss": 40.7094, "step": 5604 }, { "epoch": 133.45373134328358, "grad_norm": 29.024898529052734, "learning_rate": 9.758503401360544e-06, "loss": 40.7004, "step": 5605 }, { "epoch": 133.47761194029852, "grad_norm": 22.88898468017578, "learning_rate": 9.756802721088437e-06, "loss": 40.257, "step": 5606 }, { "epoch": 133.50149253731342, "grad_norm": 39.7208137512207, "learning_rate": 9.755102040816327e-06, "loss": 38.6114, "step": 5607 }, { "epoch": 133.52537313432836, "grad_norm": 30.543888092041016, "learning_rate": 9.753401360544217e-06, "loss": 40.1867, "step": 5608 }, { "epoch": 133.54925373134327, "grad_norm": 36.19719314575195, "learning_rate": 9.75170068027211e-06, "loss": 40.5948, "step": 5609 }, { "epoch": 133.5731343283582, "grad_norm": 32.90020751953125, "learning_rate": 9.75e-06, "loss": 39.8139, "step": 5610 }, { "epoch": 133.59701492537314, "grad_norm": 34.50712585449219, "learning_rate": 9.748299319727892e-06, "loss": 40.1155, "step": 5611 }, { "epoch": 133.62089552238805, "grad_norm": 32.24649429321289, "learning_rate": 9.746598639455784e-06, "loss": 40.149, "step": 5612 }, { "epoch": 133.644776119403, "grad_norm": 35.7637939453125, "learning_rate": 9.744897959183674e-06, "loss": 39.7303, "step": 5613 }, { "epoch": 133.6686567164179, "grad_norm": 31.09421157836914, "learning_rate": 9.743197278911567e-06, "loss": 41.0925, "step": 5614 }, { "epoch": 133.69253731343284, "grad_norm": 37.82075881958008, "learning_rate": 9.741496598639457e-06, "loss": 39.9909, "step": 5615 }, { "epoch": 133.71641791044777, "grad_norm": 33.92351150512695, "learning_rate": 9.739795918367347e-06, "loss": 40.0986, "step": 5616 }, { "epoch": 133.74029850746268, "grad_norm": 29.645198822021484, "learning_rate": 9.73809523809524e-06, "loss": 41.5591, "step": 5617 }, { "epoch": 133.76417910447762, "grad_norm": 24.506332397460938, "learning_rate": 9.73639455782313e-06, "loss": 41.3366, "step": 5618 }, { "epoch": 133.78805970149253, "grad_norm": 38.3758544921875, "learning_rate": 9.734693877551022e-06, "loss": 41.016, "step": 5619 }, { "epoch": 133.81194029850747, "grad_norm": 33.210044860839844, "learning_rate": 9.732993197278912e-06, "loss": 40.9384, "step": 5620 }, { "epoch": 133.83582089552237, "grad_norm": 33.01791000366211, "learning_rate": 9.731292517006804e-06, "loss": 39.6658, "step": 5621 }, { "epoch": 133.8597014925373, "grad_norm": 34.2905158996582, "learning_rate": 9.729591836734695e-06, "loss": 40.4843, "step": 5622 }, { "epoch": 133.88358208955225, "grad_norm": 29.771053314208984, "learning_rate": 9.727891156462585e-06, "loss": 40.2978, "step": 5623 }, { "epoch": 133.90746268656716, "grad_norm": 30.07183837890625, "learning_rate": 9.726190476190477e-06, "loss": 40.2479, "step": 5624 }, { "epoch": 133.9313432835821, "grad_norm": 30.720661163330078, "learning_rate": 9.724489795918368e-06, "loss": 39.5252, "step": 5625 }, { "epoch": 133.955223880597, "grad_norm": 27.56161117553711, "learning_rate": 9.72278911564626e-06, "loss": 40.4758, "step": 5626 }, { "epoch": 133.97910447761194, "grad_norm": 32.74715805053711, "learning_rate": 9.72108843537415e-06, "loss": 40.6321, "step": 5627 }, { "epoch": 134.0, "grad_norm": 25.854846954345703, "learning_rate": 9.719387755102042e-06, "loss": 34.2593, "step": 5628 }, { "epoch": 134.02388059701494, "grad_norm": 33.82636642456055, "learning_rate": 9.717687074829933e-06, "loss": 40.2388, "step": 5629 }, { "epoch": 134.04776119402985, "grad_norm": 29.441238403320312, "learning_rate": 9.715986394557823e-06, "loss": 40.5805, "step": 5630 }, { "epoch": 134.07164179104478, "grad_norm": 29.590694427490234, "learning_rate": 9.714285714285715e-06, "loss": 38.7185, "step": 5631 }, { "epoch": 134.0955223880597, "grad_norm": 26.878095626831055, "learning_rate": 9.712585034013606e-06, "loss": 41.1294, "step": 5632 }, { "epoch": 134.11940298507463, "grad_norm": 31.240013122558594, "learning_rate": 9.710884353741498e-06, "loss": 40.0814, "step": 5633 }, { "epoch": 134.14328358208957, "grad_norm": 27.573955535888672, "learning_rate": 9.70918367346939e-06, "loss": 40.6451, "step": 5634 }, { "epoch": 134.16716417910447, "grad_norm": 35.54013442993164, "learning_rate": 9.707482993197278e-06, "loss": 41.3382, "step": 5635 }, { "epoch": 134.1910447761194, "grad_norm": 33.757408142089844, "learning_rate": 9.70578231292517e-06, "loss": 39.4768, "step": 5636 }, { "epoch": 134.21492537313432, "grad_norm": 29.37469482421875, "learning_rate": 9.704081632653061e-06, "loss": 39.8421, "step": 5637 }, { "epoch": 134.23880597014926, "grad_norm": 29.495834350585938, "learning_rate": 9.702380952380953e-06, "loss": 39.2846, "step": 5638 }, { "epoch": 134.26268656716417, "grad_norm": 28.723642349243164, "learning_rate": 9.700680272108845e-06, "loss": 39.4364, "step": 5639 }, { "epoch": 134.2865671641791, "grad_norm": 25.51703453063965, "learning_rate": 9.698979591836736e-06, "loss": 39.4578, "step": 5640 }, { "epoch": 134.31044776119404, "grad_norm": 34.16410446166992, "learning_rate": 9.697278911564628e-06, "loss": 40.5937, "step": 5641 }, { "epoch": 134.33432835820895, "grad_norm": 30.546810150146484, "learning_rate": 9.695578231292518e-06, "loss": 39.912, "step": 5642 }, { "epoch": 134.3582089552239, "grad_norm": 30.73379898071289, "learning_rate": 9.693877551020408e-06, "loss": 41.5471, "step": 5643 }, { "epoch": 134.3820895522388, "grad_norm": 30.759567260742188, "learning_rate": 9.6921768707483e-06, "loss": 40.3315, "step": 5644 }, { "epoch": 134.40597014925373, "grad_norm": 28.02313995361328, "learning_rate": 9.690476190476191e-06, "loss": 40.2851, "step": 5645 }, { "epoch": 134.42985074626867, "grad_norm": 24.580036163330078, "learning_rate": 9.688775510204083e-06, "loss": 40.9942, "step": 5646 }, { "epoch": 134.45373134328358, "grad_norm": 32.100738525390625, "learning_rate": 9.687074829931973e-06, "loss": 40.2184, "step": 5647 }, { "epoch": 134.47761194029852, "grad_norm": 30.24114418029785, "learning_rate": 9.685374149659866e-06, "loss": 40.3371, "step": 5648 }, { "epoch": 134.50149253731342, "grad_norm": 32.3997917175293, "learning_rate": 9.683673469387756e-06, "loss": 40.7586, "step": 5649 }, { "epoch": 134.52537313432836, "grad_norm": 25.58622169494629, "learning_rate": 9.681972789115646e-06, "loss": 40.1238, "step": 5650 }, { "epoch": 134.54925373134327, "grad_norm": 32.82097244262695, "learning_rate": 9.680272108843538e-06, "loss": 40.6563, "step": 5651 }, { "epoch": 134.5731343283582, "grad_norm": 27.216670989990234, "learning_rate": 9.678571428571429e-06, "loss": 38.6664, "step": 5652 }, { "epoch": 134.59701492537314, "grad_norm": 30.91448211669922, "learning_rate": 9.676870748299321e-06, "loss": 40.0405, "step": 5653 }, { "epoch": 134.62089552238805, "grad_norm": 27.467674255371094, "learning_rate": 9.675170068027211e-06, "loss": 40.8484, "step": 5654 }, { "epoch": 134.644776119403, "grad_norm": 33.313507080078125, "learning_rate": 9.673469387755103e-06, "loss": 40.5139, "step": 5655 }, { "epoch": 134.6686567164179, "grad_norm": 28.826663970947266, "learning_rate": 9.671768707482994e-06, "loss": 39.9436, "step": 5656 }, { "epoch": 134.69253731343284, "grad_norm": 31.69590950012207, "learning_rate": 9.670068027210884e-06, "loss": 40.458, "step": 5657 }, { "epoch": 134.71641791044777, "grad_norm": 24.371248245239258, "learning_rate": 9.668367346938776e-06, "loss": 40.4455, "step": 5658 }, { "epoch": 134.74029850746268, "grad_norm": 31.334495544433594, "learning_rate": 9.666666666666667e-06, "loss": 40.7902, "step": 5659 }, { "epoch": 134.76417910447762, "grad_norm": 27.586498260498047, "learning_rate": 9.664965986394559e-06, "loss": 40.5867, "step": 5660 }, { "epoch": 134.78805970149253, "grad_norm": 28.80315399169922, "learning_rate": 9.663265306122451e-06, "loss": 39.4688, "step": 5661 }, { "epoch": 134.81194029850747, "grad_norm": 24.875734329223633, "learning_rate": 9.66156462585034e-06, "loss": 39.2296, "step": 5662 }, { "epoch": 134.83582089552237, "grad_norm": 26.77202033996582, "learning_rate": 9.659863945578232e-06, "loss": 41.5271, "step": 5663 }, { "epoch": 134.8597014925373, "grad_norm": 21.632478713989258, "learning_rate": 9.658163265306124e-06, "loss": 39.7494, "step": 5664 }, { "epoch": 134.88358208955225, "grad_norm": 33.85261154174805, "learning_rate": 9.656462585034014e-06, "loss": 39.4471, "step": 5665 }, { "epoch": 134.90746268656716, "grad_norm": 27.42376708984375, "learning_rate": 9.654761904761906e-06, "loss": 40.2511, "step": 5666 }, { "epoch": 134.9313432835821, "grad_norm": 29.52701187133789, "learning_rate": 9.653061224489797e-06, "loss": 39.9535, "step": 5667 }, { "epoch": 134.955223880597, "grad_norm": 25.98667335510254, "learning_rate": 9.651360544217689e-06, "loss": 40.6712, "step": 5668 }, { "epoch": 134.97910447761194, "grad_norm": 26.950590133666992, "learning_rate": 9.64965986394558e-06, "loss": 40.4322, "step": 5669 }, { "epoch": 135.0, "grad_norm": 18.281841278076172, "learning_rate": 9.64795918367347e-06, "loss": 34.9402, "step": 5670 }, { "epoch": 135.02388059701494, "grad_norm": 30.714963912963867, "learning_rate": 9.646258503401362e-06, "loss": 40.2777, "step": 5671 }, { "epoch": 135.04776119402985, "grad_norm": 21.559858322143555, "learning_rate": 9.644557823129252e-06, "loss": 39.8327, "step": 5672 }, { "epoch": 135.07164179104478, "grad_norm": 27.76194953918457, "learning_rate": 9.642857142857144e-06, "loss": 40.0835, "step": 5673 }, { "epoch": 135.0955223880597, "grad_norm": 22.750877380371094, "learning_rate": 9.641156462585035e-06, "loss": 40.3429, "step": 5674 }, { "epoch": 135.11940298507463, "grad_norm": 28.511995315551758, "learning_rate": 9.639455782312927e-06, "loss": 39.3794, "step": 5675 }, { "epoch": 135.14328358208957, "grad_norm": 21.04129409790039, "learning_rate": 9.637755102040817e-06, "loss": 41.34, "step": 5676 }, { "epoch": 135.16716417910447, "grad_norm": 27.8126277923584, "learning_rate": 9.636054421768707e-06, "loss": 40.3671, "step": 5677 }, { "epoch": 135.1910447761194, "grad_norm": 23.500349044799805, "learning_rate": 9.6343537414966e-06, "loss": 40.235, "step": 5678 }, { "epoch": 135.21492537313432, "grad_norm": 25.186744689941406, "learning_rate": 9.63265306122449e-06, "loss": 40.7387, "step": 5679 }, { "epoch": 135.23880597014926, "grad_norm": 21.36899185180664, "learning_rate": 9.630952380952382e-06, "loss": 40.0717, "step": 5680 }, { "epoch": 135.26268656716417, "grad_norm": 23.584760665893555, "learning_rate": 9.629251700680272e-06, "loss": 40.1511, "step": 5681 }, { "epoch": 135.2865671641791, "grad_norm": 22.20633316040039, "learning_rate": 9.627551020408165e-06, "loss": 40.0655, "step": 5682 }, { "epoch": 135.31044776119404, "grad_norm": 19.99517822265625, "learning_rate": 9.625850340136055e-06, "loss": 39.8154, "step": 5683 }, { "epoch": 135.33432835820895, "grad_norm": 22.59499168395996, "learning_rate": 9.624149659863945e-06, "loss": 40.6277, "step": 5684 }, { "epoch": 135.3582089552239, "grad_norm": 17.33830451965332, "learning_rate": 9.622448979591837e-06, "loss": 39.4982, "step": 5685 }, { "epoch": 135.3820895522388, "grad_norm": 22.377470016479492, "learning_rate": 9.62074829931973e-06, "loss": 39.4303, "step": 5686 }, { "epoch": 135.40597014925373, "grad_norm": 22.994571685791016, "learning_rate": 9.61904761904762e-06, "loss": 38.643, "step": 5687 }, { "epoch": 135.42985074626867, "grad_norm": 17.39454460144043, "learning_rate": 9.617346938775512e-06, "loss": 41.7366, "step": 5688 }, { "epoch": 135.45373134328358, "grad_norm": 24.441268920898438, "learning_rate": 9.6156462585034e-06, "loss": 41.1602, "step": 5689 }, { "epoch": 135.47761194029852, "grad_norm": 16.182247161865234, "learning_rate": 9.613945578231293e-06, "loss": 40.4013, "step": 5690 }, { "epoch": 135.50149253731342, "grad_norm": 23.803049087524414, "learning_rate": 9.612244897959185e-06, "loss": 41.3252, "step": 5691 }, { "epoch": 135.52537313432836, "grad_norm": 20.565837860107422, "learning_rate": 9.610544217687075e-06, "loss": 40.3434, "step": 5692 }, { "epoch": 135.54925373134327, "grad_norm": 26.256967544555664, "learning_rate": 9.608843537414967e-06, "loss": 40.2281, "step": 5693 }, { "epoch": 135.5731343283582, "grad_norm": 18.350553512573242, "learning_rate": 9.607142857142858e-06, "loss": 39.1361, "step": 5694 }, { "epoch": 135.59701492537314, "grad_norm": 25.684616088867188, "learning_rate": 9.60544217687075e-06, "loss": 39.7602, "step": 5695 }, { "epoch": 135.62089552238805, "grad_norm": 22.026763916015625, "learning_rate": 9.60374149659864e-06, "loss": 40.2298, "step": 5696 }, { "epoch": 135.644776119403, "grad_norm": 15.483604431152344, "learning_rate": 9.60204081632653e-06, "loss": 39.8388, "step": 5697 }, { "epoch": 135.6686567164179, "grad_norm": 21.13356590270996, "learning_rate": 9.600340136054423e-06, "loss": 39.5239, "step": 5698 }, { "epoch": 135.69253731343284, "grad_norm": 17.695802688598633, "learning_rate": 9.598639455782313e-06, "loss": 40.336, "step": 5699 }, { "epoch": 135.71641791044777, "grad_norm": 16.947023391723633, "learning_rate": 9.596938775510205e-06, "loss": 39.7942, "step": 5700 }, { "epoch": 135.74029850746268, "grad_norm": 18.580827713012695, "learning_rate": 9.595238095238096e-06, "loss": 41.1081, "step": 5701 }, { "epoch": 135.76417910447762, "grad_norm": 19.310028076171875, "learning_rate": 9.593537414965988e-06, "loss": 38.773, "step": 5702 }, { "epoch": 135.78805970149253, "grad_norm": 17.71697235107422, "learning_rate": 9.591836734693878e-06, "loss": 41.1084, "step": 5703 }, { "epoch": 135.81194029850747, "grad_norm": 19.53215217590332, "learning_rate": 9.590136054421769e-06, "loss": 40.7152, "step": 5704 }, { "epoch": 135.83582089552237, "grad_norm": 26.050701141357422, "learning_rate": 9.58843537414966e-06, "loss": 41.2326, "step": 5705 }, { "epoch": 135.8597014925373, "grad_norm": 21.59418296813965, "learning_rate": 9.586734693877551e-06, "loss": 39.7008, "step": 5706 }, { "epoch": 135.88358208955225, "grad_norm": 17.44019889831543, "learning_rate": 9.585034013605443e-06, "loss": 40.0913, "step": 5707 }, { "epoch": 135.90746268656716, "grad_norm": 22.628219604492188, "learning_rate": 9.583333333333335e-06, "loss": 41.0258, "step": 5708 }, { "epoch": 135.9313432835821, "grad_norm": 18.4293155670166, "learning_rate": 9.581632653061226e-06, "loss": 39.4461, "step": 5709 }, { "epoch": 135.955223880597, "grad_norm": 17.186227798461914, "learning_rate": 9.579931972789116e-06, "loss": 38.9269, "step": 5710 }, { "epoch": 135.97910447761194, "grad_norm": 20.301193237304688, "learning_rate": 9.578231292517007e-06, "loss": 39.9266, "step": 5711 }, { "epoch": 136.0, "grad_norm": 18.15862464904785, "learning_rate": 9.576530612244899e-06, "loss": 35.705, "step": 5712 }, { "epoch": 136.02388059701494, "grad_norm": 17.27276611328125, "learning_rate": 9.57482993197279e-06, "loss": 39.9615, "step": 5713 }, { "epoch": 136.04776119402985, "grad_norm": 16.116933822631836, "learning_rate": 9.573129251700681e-06, "loss": 40.3388, "step": 5714 }, { "epoch": 136.07164179104478, "grad_norm": 14.106700897216797, "learning_rate": 9.571428571428573e-06, "loss": 39.8286, "step": 5715 }, { "epoch": 136.0955223880597, "grad_norm": 18.357019424438477, "learning_rate": 9.569727891156464e-06, "loss": 40.6918, "step": 5716 }, { "epoch": 136.11940298507463, "grad_norm": 16.41695213317871, "learning_rate": 9.568027210884354e-06, "loss": 40.1238, "step": 5717 }, { "epoch": 136.14328358208957, "grad_norm": 15.24857234954834, "learning_rate": 9.566326530612246e-06, "loss": 39.5314, "step": 5718 }, { "epoch": 136.16716417910447, "grad_norm": 21.097612380981445, "learning_rate": 9.564625850340137e-06, "loss": 39.4418, "step": 5719 }, { "epoch": 136.1910447761194, "grad_norm": 15.658564567565918, "learning_rate": 9.562925170068029e-06, "loss": 40.4354, "step": 5720 }, { "epoch": 136.21492537313432, "grad_norm": 18.364137649536133, "learning_rate": 9.561224489795919e-06, "loss": 39.4063, "step": 5721 }, { "epoch": 136.23880597014926, "grad_norm": 16.437915802001953, "learning_rate": 9.559523809523811e-06, "loss": 39.2412, "step": 5722 }, { "epoch": 136.26268656716417, "grad_norm": 18.161527633666992, "learning_rate": 9.557823129251701e-06, "loss": 40.1167, "step": 5723 }, { "epoch": 136.2865671641791, "grad_norm": 19.824352264404297, "learning_rate": 9.556122448979592e-06, "loss": 39.5653, "step": 5724 }, { "epoch": 136.31044776119404, "grad_norm": 16.736989974975586, "learning_rate": 9.554421768707484e-06, "loss": 39.4445, "step": 5725 }, { "epoch": 136.33432835820895, "grad_norm": NaN, "learning_rate": 9.552721088435374e-06, "loss": 40.8717, "step": 5726 }, { "epoch": 136.3582089552239, "grad_norm": 16.963516235351562, "learning_rate": 9.552721088435374e-06, "loss": 40.3213, "step": 5727 }, { "epoch": 136.3820895522388, "grad_norm": 18.735271453857422, "learning_rate": 9.551020408163266e-06, "loss": 40.8078, "step": 5728 }, { "epoch": 136.40597014925373, "grad_norm": 19.308032989501953, "learning_rate": 9.549319727891157e-06, "loss": 39.9691, "step": 5729 }, { "epoch": 136.42985074626867, "grad_norm": 14.293987274169922, "learning_rate": 9.547619047619049e-06, "loss": 39.0772, "step": 5730 }, { "epoch": 136.45373134328358, "grad_norm": 21.123519897460938, "learning_rate": 9.54591836734694e-06, "loss": 40.995, "step": 5731 }, { "epoch": 136.47761194029852, "grad_norm": 16.979511260986328, "learning_rate": 9.54421768707483e-06, "loss": 41.6872, "step": 5732 }, { "epoch": 136.50149253731342, "grad_norm": 20.042757034301758, "learning_rate": 9.542517006802722e-06, "loss": 40.1547, "step": 5733 }, { "epoch": 136.52537313432836, "grad_norm": 19.689138412475586, "learning_rate": 9.540816326530612e-06, "loss": 40.4422, "step": 5734 }, { "epoch": 136.54925373134327, "grad_norm": 19.830251693725586, "learning_rate": 9.539115646258504e-06, "loss": 38.6685, "step": 5735 }, { "epoch": 136.5731343283582, "grad_norm": 19.68994903564453, "learning_rate": 9.537414965986396e-06, "loss": 40.3769, "step": 5736 }, { "epoch": 136.59701492537314, "grad_norm": 19.520610809326172, "learning_rate": 9.535714285714287e-06, "loss": 39.8802, "step": 5737 }, { "epoch": 136.62089552238805, "grad_norm": 20.209075927734375, "learning_rate": 9.534013605442177e-06, "loss": 40.5337, "step": 5738 }, { "epoch": 136.644776119403, "grad_norm": 18.009183883666992, "learning_rate": 9.53231292517007e-06, "loss": 40.5237, "step": 5739 }, { "epoch": 136.6686567164179, "grad_norm": 17.618444442749023, "learning_rate": 9.53061224489796e-06, "loss": 39.4263, "step": 5740 }, { "epoch": 136.69253731343284, "grad_norm": 17.066255569458008, "learning_rate": 9.528911564625852e-06, "loss": 39.0451, "step": 5741 }, { "epoch": 136.71641791044777, "grad_norm": 16.11752700805664, "learning_rate": 9.527210884353742e-06, "loss": 40.5889, "step": 5742 }, { "epoch": 136.74029850746268, "grad_norm": 24.23548126220703, "learning_rate": 9.525510204081634e-06, "loss": 40.1915, "step": 5743 }, { "epoch": 136.76417910447762, "grad_norm": 17.77320671081543, "learning_rate": 9.523809523809525e-06, "loss": 40.4259, "step": 5744 }, { "epoch": 136.78805970149253, "grad_norm": 18.783700942993164, "learning_rate": 9.522108843537415e-06, "loss": 40.112, "step": 5745 }, { "epoch": 136.81194029850747, "grad_norm": 19.52975845336914, "learning_rate": 9.520408163265307e-06, "loss": 40.2694, "step": 5746 }, { "epoch": 136.83582089552237, "grad_norm": 22.467615127563477, "learning_rate": 9.518707482993198e-06, "loss": 41.4122, "step": 5747 }, { "epoch": 136.8597014925373, "grad_norm": 16.6851806640625, "learning_rate": 9.51700680272109e-06, "loss": 40.2696, "step": 5748 }, { "epoch": 136.88358208955225, "grad_norm": 19.272367477416992, "learning_rate": 9.51530612244898e-06, "loss": 40.4809, "step": 5749 }, { "epoch": 136.90746268656716, "grad_norm": 25.64748764038086, "learning_rate": 9.513605442176872e-06, "loss": 40.3818, "step": 5750 }, { "epoch": 136.9313432835821, "grad_norm": 17.339828491210938, "learning_rate": 9.511904761904763e-06, "loss": 40.3506, "step": 5751 }, { "epoch": 136.955223880597, "grad_norm": 23.17976188659668, "learning_rate": 9.510204081632653e-06, "loss": 39.2061, "step": 5752 }, { "epoch": 136.97910447761194, "grad_norm": 23.762033462524414, "learning_rate": 9.508503401360545e-06, "loss": 40.4341, "step": 5753 }, { "epoch": 137.0, "grad_norm": 14.065231323242188, "learning_rate": 9.506802721088436e-06, "loss": 35.9299, "step": 5754 }, { "epoch": 137.02388059701494, "grad_norm": 22.709367752075195, "learning_rate": 9.505102040816328e-06, "loss": 39.375, "step": 5755 }, { "epoch": 137.04776119402985, "grad_norm": 20.099899291992188, "learning_rate": 9.503401360544218e-06, "loss": 39.1875, "step": 5756 }, { "epoch": 137.07164179104478, "grad_norm": 16.251981735229492, "learning_rate": 9.50170068027211e-06, "loss": 39.7174, "step": 5757 }, { "epoch": 137.0955223880597, "grad_norm": 17.096813201904297, "learning_rate": 9.5e-06, "loss": 39.1641, "step": 5758 }, { "epoch": 137.11940298507463, "grad_norm": 21.969449996948242, "learning_rate": 9.498299319727891e-06, "loss": 39.6796, "step": 5759 }, { "epoch": 137.14328358208957, "grad_norm": 13.48315715789795, "learning_rate": 9.496598639455783e-06, "loss": 40.1312, "step": 5760 }, { "epoch": 137.16716417910447, "grad_norm": 15.142317771911621, "learning_rate": 9.494897959183675e-06, "loss": 39.0918, "step": 5761 }, { "epoch": 137.1910447761194, "grad_norm": 15.083260536193848, "learning_rate": 9.493197278911566e-06, "loss": 40.3378, "step": 5762 }, { "epoch": 137.21492537313432, "grad_norm": 16.5947208404541, "learning_rate": 9.491496598639458e-06, "loss": 40.0254, "step": 5763 }, { "epoch": 137.23880597014926, "grad_norm": 17.31525421142578, "learning_rate": 9.489795918367348e-06, "loss": 39.7925, "step": 5764 }, { "epoch": 137.26268656716417, "grad_norm": 13.33224105834961, "learning_rate": 9.488095238095238e-06, "loss": 39.1608, "step": 5765 }, { "epoch": 137.2865671641791, "grad_norm": 18.62505340576172, "learning_rate": 9.48639455782313e-06, "loss": 39.6955, "step": 5766 }, { "epoch": 137.31044776119404, "grad_norm": 20.526426315307617, "learning_rate": 9.484693877551021e-06, "loss": 40.8692, "step": 5767 }, { "epoch": 137.33432835820895, "grad_norm": 17.54509162902832, "learning_rate": 9.482993197278913e-06, "loss": 38.9902, "step": 5768 }, { "epoch": 137.3582089552239, "grad_norm": 13.5675048828125, "learning_rate": 9.481292517006803e-06, "loss": 40.2917, "step": 5769 }, { "epoch": 137.3820895522388, "grad_norm": 17.16435432434082, "learning_rate": 9.479591836734695e-06, "loss": 39.8777, "step": 5770 }, { "epoch": 137.40597014925373, "grad_norm": NaN, "learning_rate": 9.477891156462586e-06, "loss": 40.3914, "step": 5771 }, { "epoch": 137.42985074626867, "grad_norm": 18.361515045166016, "learning_rate": 9.477891156462586e-06, "loss": 42.1308, "step": 5772 }, { "epoch": 137.45373134328358, "grad_norm": 15.623734474182129, "learning_rate": 9.476190476190476e-06, "loss": 41.3761, "step": 5773 }, { "epoch": 137.47761194029852, "grad_norm": 16.020898818969727, "learning_rate": 9.474489795918368e-06, "loss": 40.9852, "step": 5774 }, { "epoch": 137.50149253731342, "grad_norm": 24.223079681396484, "learning_rate": 9.472789115646259e-06, "loss": 40.3601, "step": 5775 }, { "epoch": 137.52537313432836, "grad_norm": 16.226585388183594, "learning_rate": 9.471088435374151e-06, "loss": 39.25, "step": 5776 }, { "epoch": 137.54925373134327, "grad_norm": 14.546438217163086, "learning_rate": 9.469387755102041e-06, "loss": 41.5317, "step": 5777 }, { "epoch": 137.5731343283582, "grad_norm": 25.475976943969727, "learning_rate": 9.467687074829933e-06, "loss": 40.7448, "step": 5778 }, { "epoch": 137.59701492537314, "grad_norm": 21.050052642822266, "learning_rate": 9.465986394557824e-06, "loss": 39.494, "step": 5779 }, { "epoch": 137.62089552238805, "grad_norm": 14.88813591003418, "learning_rate": 9.464285714285714e-06, "loss": 40.073, "step": 5780 }, { "epoch": 137.644776119403, "grad_norm": 21.426273345947266, "learning_rate": 9.462585034013606e-06, "loss": 39.7685, "step": 5781 }, { "epoch": 137.6686567164179, "grad_norm": 21.00870704650879, "learning_rate": 9.460884353741497e-06, "loss": 39.4509, "step": 5782 }, { "epoch": 137.69253731343284, "grad_norm": 14.929703712463379, "learning_rate": 9.459183673469389e-06, "loss": 41.4336, "step": 5783 }, { "epoch": 137.71641791044777, "grad_norm": 13.802526473999023, "learning_rate": 9.457482993197281e-06, "loss": 40.6038, "step": 5784 }, { "epoch": 137.74029850746268, "grad_norm": 25.661685943603516, "learning_rate": 9.455782312925171e-06, "loss": 39.2058, "step": 5785 }, { "epoch": 137.76417910447762, "grad_norm": 17.845937728881836, "learning_rate": 9.454081632653062e-06, "loss": 41.1643, "step": 5786 }, { "epoch": 137.78805970149253, "grad_norm": 25.97015953063965, "learning_rate": 9.452380952380952e-06, "loss": 39.7219, "step": 5787 }, { "epoch": 137.81194029850747, "grad_norm": 17.875333786010742, "learning_rate": 9.450680272108844e-06, "loss": 39.7798, "step": 5788 }, { "epoch": 137.83582089552237, "grad_norm": 18.28219223022461, "learning_rate": 9.448979591836736e-06, "loss": 39.3965, "step": 5789 }, { "epoch": 137.8597014925373, "grad_norm": 19.815677642822266, "learning_rate": 9.447278911564627e-06, "loss": 40.5489, "step": 5790 }, { "epoch": 137.88358208955225, "grad_norm": 20.447330474853516, "learning_rate": 9.445578231292519e-06, "loss": 39.633, "step": 5791 }, { "epoch": 137.90746268656716, "grad_norm": 16.50349998474121, "learning_rate": 9.44387755102041e-06, "loss": 39.9416, "step": 5792 }, { "epoch": 137.9313432835821, "grad_norm": 22.41202735900879, "learning_rate": 9.4421768707483e-06, "loss": 41.0672, "step": 5793 }, { "epoch": 137.955223880597, "grad_norm": 17.746328353881836, "learning_rate": 9.440476190476192e-06, "loss": 39.8027, "step": 5794 }, { "epoch": 137.97910447761194, "grad_norm": 18.95381736755371, "learning_rate": 9.438775510204082e-06, "loss": 40.2487, "step": 5795 }, { "epoch": 138.0, "grad_norm": 14.501996994018555, "learning_rate": 9.437074829931974e-06, "loss": 35.2047, "step": 5796 }, { "epoch": 138.02388059701494, "grad_norm": 23.209070205688477, "learning_rate": 9.435374149659865e-06, "loss": 40.6912, "step": 5797 }, { "epoch": 138.04776119402985, "grad_norm": 19.782623291015625, "learning_rate": 9.433673469387757e-06, "loss": 40.41, "step": 5798 }, { "epoch": 138.07164179104478, "grad_norm": 18.50634002685547, "learning_rate": 9.431972789115647e-06, "loss": 40.1066, "step": 5799 }, { "epoch": 138.0955223880597, "grad_norm": 19.37914276123047, "learning_rate": 9.430272108843537e-06, "loss": 39.8479, "step": 5800 }, { "epoch": 138.11940298507463, "grad_norm": NaN, "learning_rate": 9.42857142857143e-06, "loss": 69.6561, "step": 5801 }, { "epoch": 138.14328358208957, "grad_norm": 20.234079360961914, "learning_rate": 9.42857142857143e-06, "loss": 40.7243, "step": 5802 }, { "epoch": 138.16716417910447, "grad_norm": 19.689483642578125, "learning_rate": 9.42687074829932e-06, "loss": 40.2885, "step": 5803 }, { "epoch": 138.1910447761194, "grad_norm": 15.311651229858398, "learning_rate": 9.425170068027212e-06, "loss": 40.0564, "step": 5804 }, { "epoch": 138.21492537313432, "grad_norm": 22.144147872924805, "learning_rate": 9.423469387755102e-06, "loss": 40.0064, "step": 5805 }, { "epoch": 138.23880597014926, "grad_norm": 19.106332778930664, "learning_rate": 9.421768707482995e-06, "loss": 38.9603, "step": 5806 }, { "epoch": 138.26268656716417, "grad_norm": 17.888164520263672, "learning_rate": 9.420068027210885e-06, "loss": 38.8051, "step": 5807 }, { "epoch": 138.2865671641791, "grad_norm": NaN, "learning_rate": 9.418367346938775e-06, "loss": 34.5894, "step": 5808 }, { "epoch": 138.31044776119404, "grad_norm": 16.295089721679688, "learning_rate": 9.418367346938775e-06, "loss": 39.8359, "step": 5809 }, { "epoch": 138.33432835820895, "grad_norm": 17.898618698120117, "learning_rate": 9.416666666666667e-06, "loss": 40.9419, "step": 5810 }, { "epoch": 138.3582089552239, "grad_norm": 15.549861907958984, "learning_rate": 9.414965986394558e-06, "loss": 38.8705, "step": 5811 }, { "epoch": 138.3820895522388, "grad_norm": 21.414033889770508, "learning_rate": 9.41326530612245e-06, "loss": 40.366, "step": 5812 }, { "epoch": 138.40597014925373, "grad_norm": 18.34477996826172, "learning_rate": 9.411564625850342e-06, "loss": 41.358, "step": 5813 }, { "epoch": 138.42985074626867, "grad_norm": 18.43037223815918, "learning_rate": 9.409863945578232e-06, "loss": 41.0623, "step": 5814 }, { "epoch": 138.45373134328358, "grad_norm": 22.278278350830078, "learning_rate": 9.408163265306123e-06, "loss": 39.3361, "step": 5815 }, { "epoch": 138.47761194029852, "grad_norm": NaN, "learning_rate": 9.406462585034015e-06, "loss": 39.6796, "step": 5816 }, { "epoch": 138.50149253731342, "grad_norm": 18.300764083862305, "learning_rate": 9.406462585034015e-06, "loss": 40.625, "step": 5817 }, { "epoch": 138.52537313432836, "grad_norm": 16.382335662841797, "learning_rate": 9.404761904761905e-06, "loss": 40.4843, "step": 5818 }, { "epoch": 138.54925373134327, "grad_norm": 20.629667282104492, "learning_rate": 9.403061224489797e-06, "loss": 39.1476, "step": 5819 }, { "epoch": 138.5731343283582, "grad_norm": 25.73557472229004, "learning_rate": 9.401360544217688e-06, "loss": 41.4348, "step": 5820 }, { "epoch": 138.59701492537314, "grad_norm": 15.648715019226074, "learning_rate": 9.39965986394558e-06, "loss": 40.4888, "step": 5821 }, { "epoch": 138.62089552238805, "grad_norm": 16.803377151489258, "learning_rate": 9.39795918367347e-06, "loss": 40.5578, "step": 5822 }, { "epoch": 138.644776119403, "grad_norm": 29.228322982788086, "learning_rate": 9.39625850340136e-06, "loss": 40.6632, "step": 5823 }, { "epoch": 138.6686567164179, "grad_norm": 15.427154541015625, "learning_rate": 9.394557823129253e-06, "loss": 39.1214, "step": 5824 }, { "epoch": 138.69253731343284, "grad_norm": 28.359830856323242, "learning_rate": 9.392857142857143e-06, "loss": 40.3437, "step": 5825 }, { "epoch": 138.71641791044777, "grad_norm": 18.356201171875, "learning_rate": 9.391156462585035e-06, "loss": 40.6458, "step": 5826 }, { "epoch": 138.74029850746268, "grad_norm": 19.94028091430664, "learning_rate": 9.389455782312926e-06, "loss": 39.7405, "step": 5827 }, { "epoch": 138.76417910447762, "grad_norm": 27.602651596069336, "learning_rate": 9.387755102040818e-06, "loss": 40.1602, "step": 5828 }, { "epoch": 138.78805970149253, "grad_norm": 16.400421142578125, "learning_rate": 9.386054421768708e-06, "loss": 38.7246, "step": 5829 }, { "epoch": 138.81194029850747, "grad_norm": 26.579187393188477, "learning_rate": 9.384353741496599e-06, "loss": 38.5539, "step": 5830 }, { "epoch": 138.83582089552237, "grad_norm": 21.284912109375, "learning_rate": 9.38265306122449e-06, "loss": 40.3989, "step": 5831 }, { "epoch": 138.8597014925373, "grad_norm": 23.772157669067383, "learning_rate": 9.380952380952381e-06, "loss": 39.4185, "step": 5832 }, { "epoch": 138.88358208955225, "grad_norm": 29.053791046142578, "learning_rate": 9.379251700680273e-06, "loss": 41.1253, "step": 5833 }, { "epoch": 138.90746268656716, "grad_norm": 16.733402252197266, "learning_rate": 9.377551020408164e-06, "loss": 40.1286, "step": 5834 }, { "epoch": 138.9313432835821, "grad_norm": 36.72946548461914, "learning_rate": 9.375850340136056e-06, "loss": 38.9963, "step": 5835 }, { "epoch": 138.955223880597, "grad_norm": 26.51390838623047, "learning_rate": 9.374149659863946e-06, "loss": 40.031, "step": 5836 }, { "epoch": 138.97910447761194, "grad_norm": 31.663070678710938, "learning_rate": 9.372448979591836e-06, "loss": 40.099, "step": 5837 }, { "epoch": 139.0, "grad_norm": 21.624252319335938, "learning_rate": 9.370748299319729e-06, "loss": 35.3932, "step": 5838 }, { "epoch": 139.02388059701494, "grad_norm": 33.74135971069336, "learning_rate": 9.36904761904762e-06, "loss": 40.1791, "step": 5839 }, { "epoch": 139.04776119402985, "grad_norm": 21.488868713378906, "learning_rate": 9.367346938775511e-06, "loss": 38.745, "step": 5840 }, { "epoch": 139.07164179104478, "grad_norm": 35.68408203125, "learning_rate": 9.365646258503403e-06, "loss": 40.4261, "step": 5841 }, { "epoch": 139.0955223880597, "grad_norm": 27.531938552856445, "learning_rate": 9.363945578231294e-06, "loss": 39.8502, "step": 5842 }, { "epoch": 139.11940298507463, "grad_norm": 41.84492874145508, "learning_rate": 9.362244897959184e-06, "loss": 40.0804, "step": 5843 }, { "epoch": 139.14328358208957, "grad_norm": 34.03583526611328, "learning_rate": 9.360544217687076e-06, "loss": 40.4201, "step": 5844 }, { "epoch": 139.16716417910447, "grad_norm": 27.924837112426758, "learning_rate": 9.358843537414966e-06, "loss": 40.006, "step": 5845 }, { "epoch": 139.1910447761194, "grad_norm": 29.39901351928711, "learning_rate": 9.357142857142859e-06, "loss": 39.8178, "step": 5846 }, { "epoch": 139.21492537313432, "grad_norm": 31.170534133911133, "learning_rate": 9.355442176870749e-06, "loss": 39.8539, "step": 5847 }, { "epoch": 139.23880597014926, "grad_norm": 24.115842819213867, "learning_rate": 9.353741496598641e-06, "loss": 39.5716, "step": 5848 }, { "epoch": 139.26268656716417, "grad_norm": 30.86311149597168, "learning_rate": 9.352040816326531e-06, "loss": 39.3648, "step": 5849 }, { "epoch": 139.2865671641791, "grad_norm": 23.781522750854492, "learning_rate": 9.350340136054422e-06, "loss": 39.8786, "step": 5850 }, { "epoch": 139.31044776119404, "grad_norm": 38.31922149658203, "learning_rate": 9.348639455782314e-06, "loss": 39.9693, "step": 5851 }, { "epoch": 139.33432835820895, "grad_norm": 26.093704223632812, "learning_rate": 9.346938775510204e-06, "loss": 41.1107, "step": 5852 }, { "epoch": 139.3582089552239, "grad_norm": 35.27671813964844, "learning_rate": 9.345238095238096e-06, "loss": 39.3532, "step": 5853 }, { "epoch": 139.3820895522388, "grad_norm": 28.302349090576172, "learning_rate": 9.343537414965987e-06, "loss": 40.0611, "step": 5854 }, { "epoch": 139.40597014925373, "grad_norm": 34.33140182495117, "learning_rate": 9.341836734693879e-06, "loss": 39.3752, "step": 5855 }, { "epoch": 139.42985074626867, "grad_norm": 30.547216415405273, "learning_rate": 9.34013605442177e-06, "loss": 41.2377, "step": 5856 }, { "epoch": 139.45373134328358, "grad_norm": 30.49541664123535, "learning_rate": 9.33843537414966e-06, "loss": 39.2959, "step": 5857 }, { "epoch": 139.47761194029852, "grad_norm": 25.64105796813965, "learning_rate": 9.336734693877552e-06, "loss": 41.0909, "step": 5858 }, { "epoch": 139.50149253731342, "grad_norm": NaN, "learning_rate": 9.335034013605442e-06, "loss": 60.6154, "step": 5859 }, { "epoch": 139.52537313432836, "grad_norm": 35.060829162597656, "learning_rate": 9.335034013605442e-06, "loss": 39.9802, "step": 5860 }, { "epoch": 139.54925373134327, "grad_norm": 27.54969024658203, "learning_rate": 9.333333333333334e-06, "loss": 39.3133, "step": 5861 }, { "epoch": 139.5731343283582, "grad_norm": 38.9669075012207, "learning_rate": 9.331632653061225e-06, "loss": 41.312, "step": 5862 }, { "epoch": 139.59701492537314, "grad_norm": 34.280067443847656, "learning_rate": 9.329931972789117e-06, "loss": 39.4449, "step": 5863 }, { "epoch": 139.62089552238805, "grad_norm": 29.73419189453125, "learning_rate": 9.328231292517007e-06, "loss": 40.2213, "step": 5864 }, { "epoch": 139.644776119403, "grad_norm": 24.6690673828125, "learning_rate": 9.326530612244898e-06, "loss": 40.6231, "step": 5865 }, { "epoch": 139.6686567164179, "grad_norm": 35.420101165771484, "learning_rate": 9.32482993197279e-06, "loss": 39.6659, "step": 5866 }, { "epoch": 139.69253731343284, "grad_norm": 32.64005661010742, "learning_rate": 9.323129251700682e-06, "loss": 39.3794, "step": 5867 }, { "epoch": 139.71641791044777, "grad_norm": 30.59541893005371, "learning_rate": 9.321428571428572e-06, "loss": 40.0863, "step": 5868 }, { "epoch": 139.74029850746268, "grad_norm": 28.199207305908203, "learning_rate": 9.319727891156464e-06, "loss": 39.7205, "step": 5869 }, { "epoch": 139.76417910447762, "grad_norm": 28.04796600341797, "learning_rate": 9.318027210884355e-06, "loss": 39.0113, "step": 5870 }, { "epoch": 139.78805970149253, "grad_norm": 24.012332916259766, "learning_rate": 9.316326530612245e-06, "loss": 40.5084, "step": 5871 }, { "epoch": 139.81194029850747, "grad_norm": 34.23363494873047, "learning_rate": 9.314625850340137e-06, "loss": 39.5083, "step": 5872 }, { "epoch": 139.83582089552237, "grad_norm": 29.06350326538086, "learning_rate": 9.312925170068028e-06, "loss": 39.3723, "step": 5873 }, { "epoch": 139.8597014925373, "grad_norm": 28.527681350708008, "learning_rate": 9.31122448979592e-06, "loss": 40.6419, "step": 5874 }, { "epoch": 139.88358208955225, "grad_norm": 29.12566375732422, "learning_rate": 9.30952380952381e-06, "loss": 40.6302, "step": 5875 }, { "epoch": 139.90746268656716, "grad_norm": 31.133377075195312, "learning_rate": 9.307823129251702e-06, "loss": 40.5931, "step": 5876 }, { "epoch": 139.9313432835821, "grad_norm": 24.83881378173828, "learning_rate": 9.306122448979593e-06, "loss": 39.411, "step": 5877 }, { "epoch": 139.955223880597, "grad_norm": 31.597652435302734, "learning_rate": 9.304421768707483e-06, "loss": 39.0108, "step": 5878 }, { "epoch": 139.97910447761194, "grad_norm": 29.325529098510742, "learning_rate": 9.302721088435375e-06, "loss": 39.4128, "step": 5879 }, { "epoch": 140.0, "grad_norm": 25.26357078552246, "learning_rate": 9.301020408163265e-06, "loss": 35.0399, "step": 5880 }, { "epoch": 140.0, "step": 5880, "total_flos": 2.890707963725509e+17, "train_loss": 2.877911633861308, "train_runtime": 12847.333, "train_samples_per_second": 58.322, "train_steps_per_second": 0.458 } ], "logging_steps": 1.0, "max_steps": 5880, "num_input_tokens_seen": 0, "num_train_epochs": 140, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.890707963725509e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }