| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1144, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0008741258741258741, | |
| "grad_norm": 2556.159912109375, | |
| "learning_rate": 8.695652173913044e-08, | |
| "loss": 11.1873, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0017482517482517483, | |
| "grad_norm": 2494.249755859375, | |
| "learning_rate": 1.7391304347826088e-07, | |
| "loss": 11.2026, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0026223776223776225, | |
| "grad_norm": 2464.940673828125, | |
| "learning_rate": 2.608695652173913e-07, | |
| "loss": 11.1886, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0034965034965034965, | |
| "grad_norm": 4495.7900390625, | |
| "learning_rate": 3.4782608695652175e-07, | |
| "loss": 9.6162, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.004370629370629371, | |
| "grad_norm": 1926.453857421875, | |
| "learning_rate": 4.347826086956522e-07, | |
| "loss": 8.121, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005244755244755245, | |
| "grad_norm": 6460.1591796875, | |
| "learning_rate": 5.217391304347826e-07, | |
| "loss": 9.4069, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.006118881118881119, | |
| "grad_norm": 2406.016845703125, | |
| "learning_rate": 6.086956521739131e-07, | |
| "loss": 10.9389, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.006993006993006993, | |
| "grad_norm": 2580.868896484375, | |
| "learning_rate": 6.956521739130435e-07, | |
| "loss": 11.1335, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.007867132867132868, | |
| "grad_norm": 2579.739013671875, | |
| "learning_rate": 7.82608695652174e-07, | |
| "loss": 11.0525, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.008741258741258742, | |
| "grad_norm": 2215.97265625, | |
| "learning_rate": 8.695652173913044e-07, | |
| "loss": 10.6272, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009615384615384616, | |
| "grad_norm": 1414.517333984375, | |
| "learning_rate": 9.565217391304349e-07, | |
| "loss": 7.8986, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01048951048951049, | |
| "grad_norm": 2600.98583984375, | |
| "learning_rate": 1.0434782608695653e-06, | |
| "loss": 9.1026, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.011363636363636364, | |
| "grad_norm": 2265.3310546875, | |
| "learning_rate": 1.1304347826086956e-06, | |
| "loss": 10.7433, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.012237762237762238, | |
| "grad_norm": 2093.912353515625, | |
| "learning_rate": 1.2173913043478262e-06, | |
| "loss": 10.9278, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.013111888111888112, | |
| "grad_norm": 2282.7275390625, | |
| "learning_rate": 1.3043478260869566e-06, | |
| "loss": 11.0357, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.013986013986013986, | |
| "grad_norm": 4415.32763671875, | |
| "learning_rate": 1.391304347826087e-06, | |
| "loss": 9.6434, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01486013986013986, | |
| "grad_norm": 2071.832275390625, | |
| "learning_rate": 1.4782608695652176e-06, | |
| "loss": 7.5336, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.015734265734265736, | |
| "grad_norm": 2921.714599609375, | |
| "learning_rate": 1.565217391304348e-06, | |
| "loss": 7.5054, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.016608391608391608, | |
| "grad_norm": 2904.253173828125, | |
| "learning_rate": 1.6521739130434784e-06, | |
| "loss": 10.3506, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.017482517482517484, | |
| "grad_norm": 2957.742431640625, | |
| "learning_rate": 1.7391304347826088e-06, | |
| "loss": 10.6261, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.018356643356643356, | |
| "grad_norm": 2202.565185546875, | |
| "learning_rate": 1.8260869565217394e-06, | |
| "loss": 10.5676, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.019230769230769232, | |
| "grad_norm": 4123.73095703125, | |
| "learning_rate": 1.9130434782608697e-06, | |
| "loss": 10.5481, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.020104895104895104, | |
| "grad_norm": 1153.431884765625, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 7.4527, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02097902097902098, | |
| "grad_norm": 3231.1708984375, | |
| "learning_rate": 2.0869565217391305e-06, | |
| "loss": 7.3864, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.021853146853146852, | |
| "grad_norm": 5035.11572265625, | |
| "learning_rate": 2.173913043478261e-06, | |
| "loss": 9.6911, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.022727272727272728, | |
| "grad_norm": 1802.8153076171875, | |
| "learning_rate": 2.2608695652173913e-06, | |
| "loss": 10.2017, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0236013986013986, | |
| "grad_norm": 1880.7608642578125, | |
| "learning_rate": 2.347826086956522e-06, | |
| "loss": 10.0493, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.024475524475524476, | |
| "grad_norm": 2126.647705078125, | |
| "learning_rate": 2.4347826086956525e-06, | |
| "loss": 9.9926, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.025349650349650348, | |
| "grad_norm": 8877.7578125, | |
| "learning_rate": 2.5217391304347826e-06, | |
| "loss": 8.2142, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.026223776223776224, | |
| "grad_norm": 3567.623779296875, | |
| "learning_rate": 2.6086956521739132e-06, | |
| "loss": 7.0594, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.027097902097902096, | |
| "grad_norm": 4317.46630859375, | |
| "learning_rate": 2.695652173913044e-06, | |
| "loss": 8.3961, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.027972027972027972, | |
| "grad_norm": 1640.6510009765625, | |
| "learning_rate": 2.782608695652174e-06, | |
| "loss": 9.3768, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.028846153846153848, | |
| "grad_norm": 1890.374267578125, | |
| "learning_rate": 2.8695652173913046e-06, | |
| "loss": 9.4034, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.02972027972027972, | |
| "grad_norm": 1811.1141357421875, | |
| "learning_rate": 2.956521739130435e-06, | |
| "loss": 9.2706, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.030594405594405596, | |
| "grad_norm": 1407.465087890625, | |
| "learning_rate": 3.043478260869566e-06, | |
| "loss": 8.4026, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03146853146853147, | |
| "grad_norm": 5426.54931640625, | |
| "learning_rate": 3.130434782608696e-06, | |
| "loss": 6.3423, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.032342657342657344, | |
| "grad_norm": 4124.9208984375, | |
| "learning_rate": 3.217391304347826e-06, | |
| "loss": 6.9582, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.033216783216783216, | |
| "grad_norm": 7167.1748046875, | |
| "learning_rate": 3.3043478260869567e-06, | |
| "loss": 8.3896, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03409090909090909, | |
| "grad_norm": 13229.9189453125, | |
| "learning_rate": 3.391304347826087e-06, | |
| "loss": 8.3201, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.03496503496503497, | |
| "grad_norm": 26215.244140625, | |
| "learning_rate": 3.4782608695652175e-06, | |
| "loss": 7.7396, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03583916083916084, | |
| "grad_norm": 715493.5, | |
| "learning_rate": 3.565217391304348e-06, | |
| "loss": 7.6015, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.03671328671328671, | |
| "grad_norm": 5296.10595703125, | |
| "learning_rate": 3.6521739130434787e-06, | |
| "loss": 4.9831, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.037587412587412584, | |
| "grad_norm": 7245.8544921875, | |
| "learning_rate": 3.739130434782609e-06, | |
| "loss": 4.6633, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 11489.5283203125, | |
| "learning_rate": 3.8260869565217395e-06, | |
| "loss": 5.3457, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.039335664335664336, | |
| "grad_norm": 40950.015625, | |
| "learning_rate": 3.91304347826087e-06, | |
| "loss": 5.2591, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.04020979020979021, | |
| "grad_norm": 75491.3359375, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 4.4825, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04108391608391608, | |
| "grad_norm": 18060.21875, | |
| "learning_rate": 4.086956521739131e-06, | |
| "loss": 4.1051, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04195804195804196, | |
| "grad_norm": 2458.598876953125, | |
| "learning_rate": 4.173913043478261e-06, | |
| "loss": 3.5274, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04283216783216783, | |
| "grad_norm": 2676.119140625, | |
| "learning_rate": 4.260869565217392e-06, | |
| "loss": 3.4095, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.043706293706293704, | |
| "grad_norm": 7722.36083984375, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 3.8389, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.044580419580419584, | |
| "grad_norm": 18194.107421875, | |
| "learning_rate": 4.434782608695653e-06, | |
| "loss": 3.917, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.045454545454545456, | |
| "grad_norm": 5762.40869140625, | |
| "learning_rate": 4.5217391304347826e-06, | |
| "loss": 3.6735, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.04632867132867133, | |
| "grad_norm": 14932.345703125, | |
| "learning_rate": 4.608695652173913e-06, | |
| "loss": 3.5259, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.0472027972027972, | |
| "grad_norm": 3072.980712890625, | |
| "learning_rate": 4.695652173913044e-06, | |
| "loss": 3.4225, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04807692307692308, | |
| "grad_norm": 1641.509033203125, | |
| "learning_rate": 4.782608695652174e-06, | |
| "loss": 3.2713, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.04895104895104895, | |
| "grad_norm": 4321.64453125, | |
| "learning_rate": 4.869565217391305e-06, | |
| "loss": 3.4484, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.049825174825174824, | |
| "grad_norm": 5372.69287109375, | |
| "learning_rate": 4.9565217391304355e-06, | |
| "loss": 3.2868, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.050699300699300696, | |
| "grad_norm": 2426.148681640625, | |
| "learning_rate": 5.043478260869565e-06, | |
| "loss": 3.0171, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.051573426573426576, | |
| "grad_norm": 2080.174560546875, | |
| "learning_rate": 5.130434782608697e-06, | |
| "loss": 2.9291, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.05244755244755245, | |
| "grad_norm": 1464.4158935546875, | |
| "learning_rate": 5.2173913043478265e-06, | |
| "loss": 2.7738, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05332167832167832, | |
| "grad_norm": 2160.916015625, | |
| "learning_rate": 5.304347826086957e-06, | |
| "loss": 3.0263, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.05419580419580419, | |
| "grad_norm": 2836.554443359375, | |
| "learning_rate": 5.391304347826088e-06, | |
| "loss": 3.1385, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.05506993006993007, | |
| "grad_norm": 3934.994873046875, | |
| "learning_rate": 5.478260869565217e-06, | |
| "loss": 3.0901, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.055944055944055944, | |
| "grad_norm": 2294.864990234375, | |
| "learning_rate": 5.565217391304348e-06, | |
| "loss": 2.45, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.056818181818181816, | |
| "grad_norm": 843.7431640625, | |
| "learning_rate": 5.652173913043479e-06, | |
| "loss": 2.4281, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.057692307692307696, | |
| "grad_norm": 938.2041015625, | |
| "learning_rate": 5.739130434782609e-06, | |
| "loss": 2.401, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05856643356643357, | |
| "grad_norm": 662.5889892578125, | |
| "learning_rate": 5.826086956521739e-06, | |
| "loss": 2.8292, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.05944055944055944, | |
| "grad_norm": 634.2139282226562, | |
| "learning_rate": 5.91304347826087e-06, | |
| "loss": 2.8133, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.06031468531468531, | |
| "grad_norm": 1620.3892822265625, | |
| "learning_rate": 6e-06, | |
| "loss": 2.7996, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.06118881118881119, | |
| "grad_norm": 916.6468505859375, | |
| "learning_rate": 6.086956521739132e-06, | |
| "loss": 2.0608, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.062062937062937064, | |
| "grad_norm": 609.5055541992188, | |
| "learning_rate": 6.173913043478261e-06, | |
| "loss": 1.9956, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.06293706293706294, | |
| "grad_norm": 432.4795837402344, | |
| "learning_rate": 6.260869565217392e-06, | |
| "loss": 1.9366, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.06381118881118882, | |
| "grad_norm": 419.33660888671875, | |
| "learning_rate": 6.3478260869565225e-06, | |
| "loss": 2.3583, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.06468531468531469, | |
| "grad_norm": 504.5553894042969, | |
| "learning_rate": 6.434782608695652e-06, | |
| "loss": 2.5322, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.06555944055944056, | |
| "grad_norm": 1139.7701416015625, | |
| "learning_rate": 6.521739130434783e-06, | |
| "loss": 2.741, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06643356643356643, | |
| "grad_norm": 342.4759826660156, | |
| "learning_rate": 6.6086956521739135e-06, | |
| "loss": 1.6079, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.0673076923076923, | |
| "grad_norm": 330.3990173339844, | |
| "learning_rate": 6.695652173913044e-06, | |
| "loss": 1.539, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.06818181818181818, | |
| "grad_norm": 294.3912048339844, | |
| "learning_rate": 6.782608695652174e-06, | |
| "loss": 1.4513, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06905594405594405, | |
| "grad_norm": 241.0177001953125, | |
| "learning_rate": 6.869565217391305e-06, | |
| "loss": 1.746, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.06993006993006994, | |
| "grad_norm": 416.7195739746094, | |
| "learning_rate": 6.956521739130435e-06, | |
| "loss": 2.1817, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07080419580419581, | |
| "grad_norm": 1799.737548828125, | |
| "learning_rate": 7.0434782608695665e-06, | |
| "loss": 2.2388, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.07167832167832168, | |
| "grad_norm": 378.99920654296875, | |
| "learning_rate": 7.130434782608696e-06, | |
| "loss": 1.5085, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.07255244755244755, | |
| "grad_norm": 263.5702209472656, | |
| "learning_rate": 7.217391304347827e-06, | |
| "loss": 1.1512, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.07342657342657342, | |
| "grad_norm": 166.30267333984375, | |
| "learning_rate": 7.304347826086957e-06, | |
| "loss": 1.1098, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.0743006993006993, | |
| "grad_norm": 132.2535400390625, | |
| "learning_rate": 7.391304347826087e-06, | |
| "loss": 1.2852, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.07517482517482517, | |
| "grad_norm": 452.1421813964844, | |
| "learning_rate": 7.478260869565218e-06, | |
| "loss": 1.8348, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.07604895104895106, | |
| "grad_norm": 446.5715637207031, | |
| "learning_rate": 7.565217391304348e-06, | |
| "loss": 1.7715, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 3067.76171875, | |
| "learning_rate": 7.652173913043479e-06, | |
| "loss": 1.4327, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0777972027972028, | |
| "grad_norm": 121.17166900634766, | |
| "learning_rate": 7.739130434782609e-06, | |
| "loss": 0.9575, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.07867132867132867, | |
| "grad_norm": 78.57453918457031, | |
| "learning_rate": 7.82608695652174e-06, | |
| "loss": 0.9516, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07954545454545454, | |
| "grad_norm": 68.7636947631836, | |
| "learning_rate": 7.91304347826087e-06, | |
| "loss": 0.9146, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.08041958041958042, | |
| "grad_norm": 376.2358703613281, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.4726, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.08129370629370629, | |
| "grad_norm": 329.2913818359375, | |
| "learning_rate": 8.086956521739131e-06, | |
| "loss": 1.577, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.08216783216783216, | |
| "grad_norm": 1479.5242919921875, | |
| "learning_rate": 8.173913043478263e-06, | |
| "loss": 1.5309, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.08304195804195805, | |
| "grad_norm": 55.359046936035156, | |
| "learning_rate": 8.260869565217392e-06, | |
| "loss": 0.8398, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.08391608391608392, | |
| "grad_norm": 58.67531204223633, | |
| "learning_rate": 8.347826086956522e-06, | |
| "loss": 0.8237, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.08479020979020979, | |
| "grad_norm": 64.31134033203125, | |
| "learning_rate": 8.434782608695653e-06, | |
| "loss": 0.7818, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.08566433566433566, | |
| "grad_norm": 158.56243896484375, | |
| "learning_rate": 8.521739130434783e-06, | |
| "loss": 1.0498, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.08653846153846154, | |
| "grad_norm": 279.12371826171875, | |
| "learning_rate": 8.608695652173915e-06, | |
| "loss": 1.3211, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.08741258741258741, | |
| "grad_norm": 434.05230712890625, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 1.6166, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08828671328671328, | |
| "grad_norm": 38.728424072265625, | |
| "learning_rate": 8.782608695652174e-06, | |
| "loss": 0.7303, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.08916083916083917, | |
| "grad_norm": 37.98553466796875, | |
| "learning_rate": 8.869565217391306e-06, | |
| "loss": 0.7315, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.09003496503496504, | |
| "grad_norm": 59.28043746948242, | |
| "learning_rate": 8.956521739130435e-06, | |
| "loss": 0.689, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 119.89543151855469, | |
| "learning_rate": 9.043478260869565e-06, | |
| "loss": 0.9381, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.09178321678321678, | |
| "grad_norm": 148.6534423828125, | |
| "learning_rate": 9.130434782608697e-06, | |
| "loss": 1.0946, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.09265734265734266, | |
| "grad_norm": 236.9190216064453, | |
| "learning_rate": 9.217391304347826e-06, | |
| "loss": 1.3037, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.09353146853146853, | |
| "grad_norm": 78.93495178222656, | |
| "learning_rate": 9.304347826086956e-06, | |
| "loss": 0.7912, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.0944055944055944, | |
| "grad_norm": 96.54972839355469, | |
| "learning_rate": 9.391304347826087e-06, | |
| "loss": 0.6279, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.09527972027972027, | |
| "grad_norm": 38.24094772338867, | |
| "learning_rate": 9.478260869565217e-06, | |
| "loss": 0.6111, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.09615384615384616, | |
| "grad_norm": 71.03121185302734, | |
| "learning_rate": 9.565217391304349e-06, | |
| "loss": 0.6944, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09702797202797203, | |
| "grad_norm": 137.56362915039062, | |
| "learning_rate": 9.652173913043478e-06, | |
| "loss": 0.9474, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.0979020979020979, | |
| "grad_norm": 879.013916015625, | |
| "learning_rate": 9.73913043478261e-06, | |
| "loss": 1.0262, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.09877622377622378, | |
| "grad_norm": 119.42290496826172, | |
| "learning_rate": 9.82608695652174e-06, | |
| "loss": 0.8811, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.09965034965034965, | |
| "grad_norm": 93.4661636352539, | |
| "learning_rate": 9.913043478260871e-06, | |
| "loss": 0.5285, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.10052447552447552, | |
| "grad_norm": 76.67430877685547, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5282, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.10139860139860139, | |
| "grad_norm": 54.31447219848633, | |
| "learning_rate": 9.999994774591762e-06, | |
| "loss": 0.4725, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.10227272727272728, | |
| "grad_norm": 113.34749603271484, | |
| "learning_rate": 9.999979098377964e-06, | |
| "loss": 0.7714, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.10314685314685315, | |
| "grad_norm": 136.5832061767578, | |
| "learning_rate": 9.999952971391373e-06, | |
| "loss": 0.8836, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.10402097902097902, | |
| "grad_norm": 94.26959991455078, | |
| "learning_rate": 9.9999163936866e-06, | |
| "loss": 0.894, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1048951048951049, | |
| "grad_norm": 122.33444213867188, | |
| "learning_rate": 9.9998693653401e-06, | |
| "loss": 0.3968, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.10576923076923077, | |
| "grad_norm": 76.0854721069336, | |
| "learning_rate": 9.999811886450166e-06, | |
| "loss": 0.3949, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.10664335664335664, | |
| "grad_norm": 56.318912506103516, | |
| "learning_rate": 9.99974395713694e-06, | |
| "loss": 0.3585, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.10751748251748251, | |
| "grad_norm": 41.908634185791016, | |
| "learning_rate": 9.999665577542406e-06, | |
| "loss": 0.6524, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.10839160839160839, | |
| "grad_norm": 51.77201843261719, | |
| "learning_rate": 9.99957674783039e-06, | |
| "loss": 0.7918, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.10926573426573427, | |
| "grad_norm": 234.0983123779297, | |
| "learning_rate": 9.99947746818656e-06, | |
| "loss": 0.8153, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.11013986013986014, | |
| "grad_norm": 84.35652923583984, | |
| "learning_rate": 9.999367738818428e-06, | |
| "loss": 0.3227, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.11101398601398602, | |
| "grad_norm": 60.76322937011719, | |
| "learning_rate": 9.999247559955346e-06, | |
| "loss": 0.2588, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.11188811188811189, | |
| "grad_norm": 46.1147575378418, | |
| "learning_rate": 9.999116931848504e-06, | |
| "loss": 0.2339, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.11276223776223776, | |
| "grad_norm": 49.116477966308594, | |
| "learning_rate": 9.998975854770939e-06, | |
| "loss": 0.4233, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.11363636363636363, | |
| "grad_norm": 56.302513122558594, | |
| "learning_rate": 9.998824329017526e-06, | |
| "loss": 0.6892, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1145104895104895, | |
| "grad_norm": 139.8785400390625, | |
| "learning_rate": 9.998662354904978e-06, | |
| "loss": 0.7064, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.11538461538461539, | |
| "grad_norm": 70.04507446289062, | |
| "learning_rate": 9.998489932771846e-06, | |
| "loss": 0.3128, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.11625874125874126, | |
| "grad_norm": 72.47883605957031, | |
| "learning_rate": 9.99830706297852e-06, | |
| "loss": 0.1324, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.11713286713286714, | |
| "grad_norm": 72.9150390625, | |
| "learning_rate": 9.99811374590723e-06, | |
| "loss": 0.1102, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.11800699300699301, | |
| "grad_norm": 56.1715087890625, | |
| "learning_rate": 9.997909981962039e-06, | |
| "loss": 0.0999, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.11888111888111888, | |
| "grad_norm": 94.35802459716797, | |
| "learning_rate": 9.997695771568848e-06, | |
| "loss": 0.5452, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.11975524475524475, | |
| "grad_norm": 75.62159729003906, | |
| "learning_rate": 9.99747111517539e-06, | |
| "loss": 0.5765, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.12062937062937062, | |
| "grad_norm": 131.2911376953125, | |
| "learning_rate": 9.997236013251234e-06, | |
| "loss": 0.3496, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.1215034965034965, | |
| "grad_norm": 62.778892517089844, | |
| "learning_rate": 9.996990466287784e-06, | |
| "loss": 0.078, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.12237762237762238, | |
| "grad_norm": 51.58250045776367, | |
| "learning_rate": 9.996734474798269e-06, | |
| "loss": 0.0734, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.12325174825174826, | |
| "grad_norm": 26.350582122802734, | |
| "learning_rate": 9.996468039317756e-06, | |
| "loss": 0.0659, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.12412587412587413, | |
| "grad_norm": 35.71564483642578, | |
| "learning_rate": 9.996191160403137e-06, | |
| "loss": 0.4461, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 39.71724319458008, | |
| "learning_rate": 9.995903838633133e-06, | |
| "loss": 0.5344, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1258741258741259, | |
| "grad_norm": 1632.76220703125, | |
| "learning_rate": 9.9956060746083e-06, | |
| "loss": 0.4163, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.12674825174825174, | |
| "grad_norm": 27.664169311523438, | |
| "learning_rate": 9.995297868951006e-06, | |
| "loss": 0.0523, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.12762237762237763, | |
| "grad_norm": 23.907243728637695, | |
| "learning_rate": 9.994979222305453e-06, | |
| "loss": 0.0509, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1284965034965035, | |
| "grad_norm": 12.382387161254883, | |
| "learning_rate": 9.994650135337667e-06, | |
| "loss": 0.0471, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.12937062937062938, | |
| "grad_norm": 14.604494094848633, | |
| "learning_rate": 9.994310608735492e-06, | |
| "loss": 0.3942, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.13024475524475523, | |
| "grad_norm": 15.283137321472168, | |
| "learning_rate": 9.99396064320859e-06, | |
| "loss": 0.4985, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.13111888111888112, | |
| "grad_norm": 126.60017395019531, | |
| "learning_rate": 9.993600239488454e-06, | |
| "loss": 0.4646, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.131993006993007, | |
| "grad_norm": 12.914412498474121, | |
| "learning_rate": 9.993229398328382e-06, | |
| "loss": 0.0511, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.13286713286713286, | |
| "grad_norm": 9.330611228942871, | |
| "learning_rate": 9.992848120503493e-06, | |
| "loss": 0.0465, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.13374125874125875, | |
| "grad_norm": 11.379688262939453, | |
| "learning_rate": 9.99245640681072e-06, | |
| "loss": 0.037, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.1346153846153846, | |
| "grad_norm": 9.89504623413086, | |
| "learning_rate": 9.992054258068809e-06, | |
| "loss": 0.2015, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1354895104895105, | |
| "grad_norm": 25.518352508544922, | |
| "learning_rate": 9.991641675118317e-06, | |
| "loss": 0.4853, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.13636363636363635, | |
| "grad_norm": 206.08486938476562, | |
| "learning_rate": 9.991218658821609e-06, | |
| "loss": 0.4689, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.13723776223776224, | |
| "grad_norm": 85.88076782226562, | |
| "learning_rate": 9.990785210062856e-06, | |
| "loss": 0.1277, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1381118881118881, | |
| "grad_norm": 5.743524551391602, | |
| "learning_rate": 9.99034132974804e-06, | |
| "loss": 0.0399, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.13898601398601398, | |
| "grad_norm": 12.778155326843262, | |
| "learning_rate": 9.989887018804943e-06, | |
| "loss": 0.0541, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.13986013986013987, | |
| "grad_norm": 5.539422035217285, | |
| "learning_rate": 9.989422278183148e-06, | |
| "loss": 0.1197, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14073426573426573, | |
| "grad_norm": 15.714009284973145, | |
| "learning_rate": 9.988947108854037e-06, | |
| "loss": 0.4535, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.14160839160839161, | |
| "grad_norm": 37.637420654296875, | |
| "learning_rate": 9.988461511810796e-06, | |
| "loss": 0.4265, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.14248251748251747, | |
| "grad_norm": 54.52699279785156, | |
| "learning_rate": 9.987965488068398e-06, | |
| "loss": 0.1981, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.14335664335664336, | |
| "grad_norm": 8.105698585510254, | |
| "learning_rate": 9.987459038663617e-06, | |
| "loss": 0.0427, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.14423076923076922, | |
| "grad_norm": 8.774629592895508, | |
| "learning_rate": 9.986942164655012e-06, | |
| "loss": 0.0397, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1451048951048951, | |
| "grad_norm": 11.141607284545898, | |
| "learning_rate": 9.986414867122935e-06, | |
| "loss": 0.0367, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.145979020979021, | |
| "grad_norm": 8.934805870056152, | |
| "learning_rate": 9.985877147169524e-06, | |
| "loss": 0.3389, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.14685314685314685, | |
| "grad_norm": 19.358036041259766, | |
| "learning_rate": 9.985329005918702e-06, | |
| "loss": 0.4332, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.14772727272727273, | |
| "grad_norm": 50.90660858154297, | |
| "learning_rate": 9.984770444516175e-06, | |
| "loss": 0.2372, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.1486013986013986, | |
| "grad_norm": 5.672937870025635, | |
| "learning_rate": 9.984201464129424e-06, | |
| "loss": 0.0276, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.14947552447552448, | |
| "grad_norm": 22.71971893310547, | |
| "learning_rate": 9.983622065947714e-06, | |
| "loss": 0.0625, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.15034965034965034, | |
| "grad_norm": 12.130918502807617, | |
| "learning_rate": 9.983032251182081e-06, | |
| "loss": 0.0499, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.15122377622377622, | |
| "grad_norm": 6.720962047576904, | |
| "learning_rate": 9.982432021065334e-06, | |
| "loss": 0.2796, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.1520979020979021, | |
| "grad_norm": 19.050613403320312, | |
| "learning_rate": 9.98182137685205e-06, | |
| "loss": 0.4275, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.15297202797202797, | |
| "grad_norm": 81.68606567382812, | |
| "learning_rate": 9.98120031981858e-06, | |
| "loss": 0.296, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 2.0445480346679688, | |
| "learning_rate": 9.98056885126303e-06, | |
| "loss": 0.0382, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.1547202797202797, | |
| "grad_norm": 7.913002014160156, | |
| "learning_rate": 9.979926972505275e-06, | |
| "loss": 0.0304, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.1555944055944056, | |
| "grad_norm": 8.316620826721191, | |
| "learning_rate": 9.979274684886943e-06, | |
| "loss": 0.0469, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.15646853146853146, | |
| "grad_norm": 5.155393123626709, | |
| "learning_rate": 9.978611989771426e-06, | |
| "loss": 0.2217, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.15734265734265734, | |
| "grad_norm": 11.583910942077637, | |
| "learning_rate": 9.977938888543862e-06, | |
| "loss": 0.4284, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.15821678321678323, | |
| "grad_norm": 21.996143341064453, | |
| "learning_rate": 9.977255382611144e-06, | |
| "loss": 0.3383, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1590909090909091, | |
| "grad_norm": 33.84432601928711, | |
| "learning_rate": 9.976561473401912e-06, | |
| "loss": 0.1232, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.15996503496503497, | |
| "grad_norm": 3.2832467555999756, | |
| "learning_rate": 9.975857162366547e-06, | |
| "loss": 0.0407, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.16083916083916083, | |
| "grad_norm": 3.5775864124298096, | |
| "learning_rate": 9.975142450977174e-06, | |
| "loss": 0.0433, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.16171328671328672, | |
| "grad_norm": 6.35914421081543, | |
| "learning_rate": 9.974417340727658e-06, | |
| "loss": 0.1783, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.16258741258741258, | |
| "grad_norm": 14.336048126220703, | |
| "learning_rate": 9.9736818331336e-06, | |
| "loss": 0.4208, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.16346153846153846, | |
| "grad_norm": 34.624168395996094, | |
| "learning_rate": 9.972935929732326e-06, | |
| "loss": 0.3602, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.16433566433566432, | |
| "grad_norm": 41.29864501953125, | |
| "learning_rate": 9.972179632082899e-06, | |
| "loss": 0.1549, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.1652097902097902, | |
| "grad_norm": 9.372153282165527, | |
| "learning_rate": 9.971412941766105e-06, | |
| "loss": 0.0428, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.1660839160839161, | |
| "grad_norm": 4.807158470153809, | |
| "learning_rate": 9.97063586038445e-06, | |
| "loss": 0.0452, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.16695804195804195, | |
| "grad_norm": 14.897777557373047, | |
| "learning_rate": 9.969848389562162e-06, | |
| "loss": 0.0344, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.16783216783216784, | |
| "grad_norm": 17.41871452331543, | |
| "learning_rate": 9.969050530945185e-06, | |
| "loss": 0.3708, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1687062937062937, | |
| "grad_norm": 15.670287132263184, | |
| "learning_rate": 9.968242286201171e-06, | |
| "loss": 0.416, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.16958041958041958, | |
| "grad_norm": 34.61851501464844, | |
| "learning_rate": 9.967423657019485e-06, | |
| "loss": 0.1738, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.17045454545454544, | |
| "grad_norm": 7.756021976470947, | |
| "learning_rate": 9.966594645111196e-06, | |
| "loss": 0.0455, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.17132867132867133, | |
| "grad_norm": 1.5823596715927124, | |
| "learning_rate": 9.965755252209073e-06, | |
| "loss": 0.0312, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.17220279720279721, | |
| "grad_norm": 2.669685125350952, | |
| "learning_rate": 9.964905480067585e-06, | |
| "loss": 0.0231, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.17307692307692307, | |
| "grad_norm": 5.0861616134643555, | |
| "learning_rate": 9.964045330462896e-06, | |
| "loss": 0.1902, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.17395104895104896, | |
| "grad_norm": 9.383868217468262, | |
| "learning_rate": 9.963174805192857e-06, | |
| "loss": 0.4223, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.17482517482517482, | |
| "grad_norm": 61.373939514160156, | |
| "learning_rate": 9.962293906077007e-06, | |
| "loss": 0.2332, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1756993006993007, | |
| "grad_norm": 9.795272827148438, | |
| "learning_rate": 9.961402634956575e-06, | |
| "loss": 0.0469, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.17657342657342656, | |
| "grad_norm": 4.310669422149658, | |
| "learning_rate": 9.960500993694455e-06, | |
| "loss": 0.0301, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.17744755244755245, | |
| "grad_norm": 6.653733253479004, | |
| "learning_rate": 9.959588984175228e-06, | |
| "loss": 0.0402, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.17832167832167833, | |
| "grad_norm": 7.323151588439941, | |
| "learning_rate": 9.958666608305145e-06, | |
| "loss": 0.2074, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1791958041958042, | |
| "grad_norm": 11.370004653930664, | |
| "learning_rate": 9.95773386801212e-06, | |
| "loss": 0.4051, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.18006993006993008, | |
| "grad_norm": 35.1203498840332, | |
| "learning_rate": 9.956790765245733e-06, | |
| "loss": 0.2979, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.18094405594405594, | |
| "grad_norm": 21.699630737304688, | |
| "learning_rate": 9.955837301977222e-06, | |
| "loss": 0.0628, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.7240406274795532, | |
| "learning_rate": 9.954873480199483e-06, | |
| "loss": 0.031, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.18269230769230768, | |
| "grad_norm": 4.516720771789551, | |
| "learning_rate": 9.953899301927058e-06, | |
| "loss": 0.0443, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.18356643356643357, | |
| "grad_norm": 4.583275318145752, | |
| "learning_rate": 9.95291476919614e-06, | |
| "loss": 0.1401, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.18444055944055945, | |
| "grad_norm": 10.784034729003906, | |
| "learning_rate": 9.951919884064564e-06, | |
| "loss": 0.4118, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.1853146853146853, | |
| "grad_norm": 15.929347038269043, | |
| "learning_rate": 9.950914648611803e-06, | |
| "loss": 0.3643, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.1861888111888112, | |
| "grad_norm": 22.45490074157715, | |
| "learning_rate": 9.94989906493896e-06, | |
| "loss": 0.1374, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.18706293706293706, | |
| "grad_norm": 1.9940564632415771, | |
| "learning_rate": 9.948873135168772e-06, | |
| "loss": 0.0422, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.18793706293706294, | |
| "grad_norm": 9.121724128723145, | |
| "learning_rate": 9.947836861445604e-06, | |
| "loss": 0.0324, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1888111888111888, | |
| "grad_norm": 4.333606243133545, | |
| "learning_rate": 9.946790245935429e-06, | |
| "loss": 0.0347, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1896853146853147, | |
| "grad_norm": 16.115217208862305, | |
| "learning_rate": 9.945733290825853e-06, | |
| "loss": 0.4056, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.19055944055944055, | |
| "grad_norm": 17.320140838623047, | |
| "learning_rate": 9.94466599832608e-06, | |
| "loss": 0.3958, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.19143356643356643, | |
| "grad_norm": 23.57686996459961, | |
| "learning_rate": 9.943588370666925e-06, | |
| "loss": 0.1774, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 1.5236711502075195, | |
| "learning_rate": 9.942500410100808e-06, | |
| "loss": 0.032, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.19318181818181818, | |
| "grad_norm": 8.071771621704102, | |
| "learning_rate": 9.941402118901743e-06, | |
| "loss": 0.048, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.19405594405594406, | |
| "grad_norm": 2.024702787399292, | |
| "learning_rate": 9.94029349936534e-06, | |
| "loss": 0.0267, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.19493006993006992, | |
| "grad_norm": 10.864749908447266, | |
| "learning_rate": 9.939174553808793e-06, | |
| "loss": 0.2626, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.1958041958041958, | |
| "grad_norm": 11.942625045776367, | |
| "learning_rate": 9.938045284570878e-06, | |
| "loss": 0.3772, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.19667832167832167, | |
| "grad_norm": 38.574405670166016, | |
| "learning_rate": 9.93690569401196e-06, | |
| "loss": 0.2097, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.19755244755244755, | |
| "grad_norm": 4.849496841430664, | |
| "learning_rate": 9.935755784513961e-06, | |
| "loss": 0.0391, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.19842657342657344, | |
| "grad_norm": 1.8271946907043457, | |
| "learning_rate": 9.934595558480384e-06, | |
| "loss": 0.0296, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.1993006993006993, | |
| "grad_norm": 1.5063093900680542, | |
| "learning_rate": 9.933425018336292e-06, | |
| "loss": 0.0307, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.20017482517482518, | |
| "grad_norm": 6.395155906677246, | |
| "learning_rate": 9.932244166528302e-06, | |
| "loss": 0.1872, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.20104895104895104, | |
| "grad_norm": 11.66751766204834, | |
| "learning_rate": 9.93105300552459e-06, | |
| "loss": 0.3988, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.20192307692307693, | |
| "grad_norm": 16.185529708862305, | |
| "learning_rate": 9.929851537814871e-06, | |
| "loss": 0.3089, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.20279720279720279, | |
| "grad_norm": 12.022547721862793, | |
| "learning_rate": 9.928639765910417e-06, | |
| "loss": 0.0777, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.20367132867132867, | |
| "grad_norm": 1.9756624698638916, | |
| "learning_rate": 9.927417692344025e-06, | |
| "loss": 0.0393, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.20454545454545456, | |
| "grad_norm": 1.200640320777893, | |
| "learning_rate": 9.926185319670028e-06, | |
| "loss": 0.0359, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.20541958041958042, | |
| "grad_norm": 13.648567199707031, | |
| "learning_rate": 9.924942650464287e-06, | |
| "loss": 0.2055, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2062937062937063, | |
| "grad_norm": 20.275653839111328, | |
| "learning_rate": 9.92368968732418e-06, | |
| "loss": 0.3945, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.20716783216783216, | |
| "grad_norm": 18.18129539489746, | |
| "learning_rate": 9.922426432868611e-06, | |
| "loss": 0.2755, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.20804195804195805, | |
| "grad_norm": 31.813112258911133, | |
| "learning_rate": 9.921152889737985e-06, | |
| "loss": 0.118, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.2089160839160839, | |
| "grad_norm": 7.986058235168457, | |
| "learning_rate": 9.919869060594214e-06, | |
| "loss": 0.0464, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.2097902097902098, | |
| "grad_norm": 7.997095584869385, | |
| "learning_rate": 9.918574948120711e-06, | |
| "loss": 0.0491, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.21066433566433568, | |
| "grad_norm": 6.035437107086182, | |
| "learning_rate": 9.917270555022384e-06, | |
| "loss": 0.0408, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.21153846153846154, | |
| "grad_norm": 11.036323547363281, | |
| "learning_rate": 9.915955884025627e-06, | |
| "loss": 0.21, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.21241258741258742, | |
| "grad_norm": 18.96322250366211, | |
| "learning_rate": 9.914630937878315e-06, | |
| "loss": 0.3936, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.21328671328671328, | |
| "grad_norm": 33.46514892578125, | |
| "learning_rate": 9.913295719349805e-06, | |
| "loss": 0.1674, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.21416083916083917, | |
| "grad_norm": 7.765932083129883, | |
| "learning_rate": 9.911950231230919e-06, | |
| "loss": 0.0379, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.21503496503496503, | |
| "grad_norm": 3.729930877685547, | |
| "learning_rate": 9.910594476333948e-06, | |
| "loss": 0.0407, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2159090909090909, | |
| "grad_norm": 6.890056610107422, | |
| "learning_rate": 9.90922845749264e-06, | |
| "loss": 0.0355, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.21678321678321677, | |
| "grad_norm": 15.682589530944824, | |
| "learning_rate": 9.907852177562201e-06, | |
| "loss": 0.2292, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.21765734265734266, | |
| "grad_norm": 18.603309631347656, | |
| "learning_rate": 9.906465639419278e-06, | |
| "loss": 0.3974, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.21853146853146854, | |
| "grad_norm": 63.19478225708008, | |
| "learning_rate": 9.905068845961962e-06, | |
| "loss": 0.2231, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2194055944055944, | |
| "grad_norm": 7.919271945953369, | |
| "learning_rate": 9.903661800109781e-06, | |
| "loss": 0.0362, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.2202797202797203, | |
| "grad_norm": 15.985671043395996, | |
| "learning_rate": 9.902244504803688e-06, | |
| "loss": 0.0512, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.22115384615384615, | |
| "grad_norm": 5.3753790855407715, | |
| "learning_rate": 9.900816963006063e-06, | |
| "loss": 0.0331, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.22202797202797203, | |
| "grad_norm": 8.979366302490234, | |
| "learning_rate": 9.899379177700704e-06, | |
| "loss": 0.1906, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2229020979020979, | |
| "grad_norm": 12.159628868103027, | |
| "learning_rate": 9.897931151892813e-06, | |
| "loss": 0.3723, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.22377622377622378, | |
| "grad_norm": 42.70203399658203, | |
| "learning_rate": 9.896472888609001e-06, | |
| "loss": 0.2815, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.22465034965034966, | |
| "grad_norm": 8.95479965209961, | |
| "learning_rate": 9.895004390897277e-06, | |
| "loss": 0.0656, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.22552447552447552, | |
| "grad_norm": 1.8920732736587524, | |
| "learning_rate": 9.893525661827043e-06, | |
| "loss": 0.0365, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2263986013986014, | |
| "grad_norm": 3.121210813522339, | |
| "learning_rate": 9.892036704489084e-06, | |
| "loss": 0.0387, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 7.621633052825928, | |
| "learning_rate": 9.890537521995562e-06, | |
| "loss": 0.1081, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.22814685314685315, | |
| "grad_norm": 418.2604675292969, | |
| "learning_rate": 9.889028117480013e-06, | |
| "loss": 0.3855, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.229020979020979, | |
| "grad_norm": 14.098556518554688, | |
| "learning_rate": 9.88750849409734e-06, | |
| "loss": 0.3303, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2298951048951049, | |
| "grad_norm": 10.519493103027344, | |
| "learning_rate": 9.885978655023805e-06, | |
| "loss": 0.1016, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 2.6055939197540283, | |
| "learning_rate": 9.88443860345702e-06, | |
| "loss": 0.042, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.23164335664335664, | |
| "grad_norm": 1.9771308898925781, | |
| "learning_rate": 9.882888342615944e-06, | |
| "loss": 0.0403, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.23251748251748253, | |
| "grad_norm": 4.139562129974365, | |
| "learning_rate": 9.881327875740876e-06, | |
| "loss": 0.0389, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.23339160839160839, | |
| "grad_norm": 8.686479568481445, | |
| "learning_rate": 9.879757206093445e-06, | |
| "loss": 0.323, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.23426573426573427, | |
| "grad_norm": 11.847586631774902, | |
| "learning_rate": 9.87817633695661e-06, | |
| "loss": 0.3533, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.23513986013986013, | |
| "grad_norm": 16.601634979248047, | |
| "learning_rate": 9.876585271634645e-06, | |
| "loss": 0.1302, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.23601398601398602, | |
| "grad_norm": 2.124263286590576, | |
| "learning_rate": 9.874984013453135e-06, | |
| "loss": 0.0273, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2368881118881119, | |
| "grad_norm": 3.8491525650024414, | |
| "learning_rate": 9.873372565758972e-06, | |
| "loss": 0.0216, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.23776223776223776, | |
| "grad_norm": 3.6490116119384766, | |
| "learning_rate": 9.871750931920344e-06, | |
| "loss": 0.03, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.23863636363636365, | |
| "grad_norm": 13.216423034667969, | |
| "learning_rate": 9.87011911532673e-06, | |
| "loss": 0.356, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2395104895104895, | |
| "grad_norm": 12.804665565490723, | |
| "learning_rate": 9.868477119388897e-06, | |
| "loss": 0.352, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2403846153846154, | |
| "grad_norm": 22.13023567199707, | |
| "learning_rate": 9.866824947538879e-06, | |
| "loss": 0.1911, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.24125874125874125, | |
| "grad_norm": 9.757916450500488, | |
| "learning_rate": 9.865162603229988e-06, | |
| "loss": 0.0521, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.24213286713286714, | |
| "grad_norm": 6.684544563293457, | |
| "learning_rate": 9.863490089936795e-06, | |
| "loss": 0.0375, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.243006993006993, | |
| "grad_norm": 7.354583740234375, | |
| "learning_rate": 9.861807411155126e-06, | |
| "loss": 0.0445, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.24388111888111888, | |
| "grad_norm": 7.129856586456299, | |
| "learning_rate": 9.860114570402055e-06, | |
| "loss": 0.1624, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.24475524475524477, | |
| "grad_norm": 13.456852912902832, | |
| "learning_rate": 9.858411571215893e-06, | |
| "loss": 0.3717, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.24562937062937062, | |
| "grad_norm": 12.758528709411621, | |
| "learning_rate": 9.856698417156189e-06, | |
| "loss": 0.3017, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.2465034965034965, | |
| "grad_norm": 8.24226188659668, | |
| "learning_rate": 9.854975111803714e-06, | |
| "loss": 0.0775, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.24737762237762237, | |
| "grad_norm": 3.928032159805298, | |
| "learning_rate": 9.853241658760457e-06, | |
| "loss": 0.03, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.24825174825174826, | |
| "grad_norm": 3.9799962043762207, | |
| "learning_rate": 9.851498061649618e-06, | |
| "loss": 0.0238, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.2491258741258741, | |
| "grad_norm": 5.96354866027832, | |
| "learning_rate": 9.849744324115602e-06, | |
| "loss": 0.1163, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 10.743607521057129, | |
| "learning_rate": 9.847980449824002e-06, | |
| "loss": 0.3595, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.2508741258741259, | |
| "grad_norm": 19.058738708496094, | |
| "learning_rate": 9.846206442461608e-06, | |
| "loss": 0.3354, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.2517482517482518, | |
| "grad_norm": 12.518518447875977, | |
| "learning_rate": 9.844422305736383e-06, | |
| "loss": 0.0874, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.2526223776223776, | |
| "grad_norm": 3.7975564002990723, | |
| "learning_rate": 9.842628043377465e-06, | |
| "loss": 0.0301, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.2534965034965035, | |
| "grad_norm": 3.613295555114746, | |
| "learning_rate": 9.840823659135153e-06, | |
| "loss": 0.0287, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2543706293706294, | |
| "grad_norm": 7.934193134307861, | |
| "learning_rate": 9.839009156780908e-06, | |
| "loss": 0.1423, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.25524475524475526, | |
| "grad_norm": 14.053423881530762, | |
| "learning_rate": 9.837184540107334e-06, | |
| "loss": 0.3828, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.2561188811188811, | |
| "grad_norm": 16.05836296081543, | |
| "learning_rate": 9.835349812928178e-06, | |
| "loss": 0.3386, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.256993006993007, | |
| "grad_norm": 17.560699462890625, | |
| "learning_rate": 9.83350497907832e-06, | |
| "loss": 0.1149, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.25786713286713286, | |
| "grad_norm": 4.02256965637207, | |
| "learning_rate": 9.831650042413765e-06, | |
| "loss": 0.0307, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.25874125874125875, | |
| "grad_norm": 9.253922462463379, | |
| "learning_rate": 9.829785006811632e-06, | |
| "loss": 0.0386, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.25961538461538464, | |
| "grad_norm": 4.394706726074219, | |
| "learning_rate": 9.827909876170148e-06, | |
| "loss": 0.0283, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.26048951048951047, | |
| "grad_norm": 8.029558181762695, | |
| "learning_rate": 9.826024654408645e-06, | |
| "loss": 0.1921, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.26136363636363635, | |
| "grad_norm": 11.811787605285645, | |
| "learning_rate": 9.824129345467545e-06, | |
| "loss": 0.3748, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.26223776223776224, | |
| "grad_norm": 25.04216957092285, | |
| "learning_rate": 9.822223953308354e-06, | |
| "loss": 0.2538, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2631118881118881, | |
| "grad_norm": 23.821672439575195, | |
| "learning_rate": 9.820308481913649e-06, | |
| "loss": 0.0692, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.263986013986014, | |
| "grad_norm": 7.635990619659424, | |
| "learning_rate": 9.818382935287078e-06, | |
| "loss": 0.0394, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.26486013986013984, | |
| "grad_norm": 16.731515884399414, | |
| "learning_rate": 9.816447317453353e-06, | |
| "loss": 0.0758, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.26573426573426573, | |
| "grad_norm": 13.700812339782715, | |
| "learning_rate": 9.814501632458226e-06, | |
| "loss": 0.0492, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.2666083916083916, | |
| "grad_norm": 21.844310760498047, | |
| "learning_rate": 9.812545884368499e-06, | |
| "loss": 0.3774, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2674825174825175, | |
| "grad_norm": 18.387826919555664, | |
| "learning_rate": 9.810580077272004e-06, | |
| "loss": 0.2408, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.26835664335664333, | |
| "grad_norm": 13.082258224487305, | |
| "learning_rate": 9.8086042152776e-06, | |
| "loss": 0.0613, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.2692307692307692, | |
| "grad_norm": 5.791593551635742, | |
| "learning_rate": 9.80661830251516e-06, | |
| "loss": 0.0368, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.2701048951048951, | |
| "grad_norm": 2.8048057556152344, | |
| "learning_rate": 9.804622343135565e-06, | |
| "loss": 0.0258, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.270979020979021, | |
| "grad_norm": 6.751336097717285, | |
| "learning_rate": 9.8026163413107e-06, | |
| "loss": 0.1153, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2718531468531469, | |
| "grad_norm": 10.871200561523438, | |
| "learning_rate": 9.800600301233431e-06, | |
| "loss": 0.354, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 15.406217575073242, | |
| "learning_rate": 9.798574227117616e-06, | |
| "loss": 0.2765, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.2736013986013986, | |
| "grad_norm": 14.365307807922363, | |
| "learning_rate": 9.796538123198077e-06, | |
| "loss": 0.0918, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.2744755244755245, | |
| "grad_norm": 2.7442431449890137, | |
| "learning_rate": 9.794491993730607e-06, | |
| "loss": 0.0267, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.27534965034965037, | |
| "grad_norm": 2.941704273223877, | |
| "learning_rate": 9.792435842991945e-06, | |
| "loss": 0.0319, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2762237762237762, | |
| "grad_norm": 2.744584798812866, | |
| "learning_rate": 9.790369675279789e-06, | |
| "loss": 0.0326, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.2770979020979021, | |
| "grad_norm": 12.064460754394531, | |
| "learning_rate": 9.788293494912762e-06, | |
| "loss": 0.27, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.27797202797202797, | |
| "grad_norm": 11.667068481445312, | |
| "learning_rate": 9.786207306230422e-06, | |
| "loss": 0.3604, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.27884615384615385, | |
| "grad_norm": 19.818653106689453, | |
| "learning_rate": 9.784111113593244e-06, | |
| "loss": 0.1214, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.27972027972027974, | |
| "grad_norm": 4.330897331237793, | |
| "learning_rate": 9.782004921382612e-06, | |
| "loss": 0.0309, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.28059440559440557, | |
| "grad_norm": 9.383328437805176, | |
| "learning_rate": 9.779888734000813e-06, | |
| "loss": 0.0248, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.28146853146853146, | |
| "grad_norm": 3.3568198680877686, | |
| "learning_rate": 9.777762555871024e-06, | |
| "loss": 0.034, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.28234265734265734, | |
| "grad_norm": 9.252466201782227, | |
| "learning_rate": 9.775626391437303e-06, | |
| "loss": 0.1675, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.28321678321678323, | |
| "grad_norm": 16.179365158081055, | |
| "learning_rate": 9.773480245164582e-06, | |
| "loss": 0.3824, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.2840909090909091, | |
| "grad_norm": 14.496709823608398, | |
| "learning_rate": 9.771324121538658e-06, | |
| "loss": 0.2268, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.28496503496503495, | |
| "grad_norm": 1.671203851699829, | |
| "learning_rate": 9.769158025066185e-06, | |
| "loss": 0.0279, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.28583916083916083, | |
| "grad_norm": 3.007488250732422, | |
| "learning_rate": 9.766981960274653e-06, | |
| "loss": 0.0227, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.2867132867132867, | |
| "grad_norm": 4.134337902069092, | |
| "learning_rate": 9.764795931712396e-06, | |
| "loss": 0.0207, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.2875874125874126, | |
| "grad_norm": 12.430707931518555, | |
| "learning_rate": 9.762599943948569e-06, | |
| "loss": 0.2246, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.28846153846153844, | |
| "grad_norm": 13.117758750915527, | |
| "learning_rate": 9.760394001573148e-06, | |
| "loss": 0.3695, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2893356643356643, | |
| "grad_norm": 14.137457847595215, | |
| "learning_rate": 9.758178109196908e-06, | |
| "loss": 0.1992, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.2902097902097902, | |
| "grad_norm": 8.393913269042969, | |
| "learning_rate": 9.75595227145143e-06, | |
| "loss": 0.0685, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.2910839160839161, | |
| "grad_norm": 5.6643781661987305, | |
| "learning_rate": 9.753716492989076e-06, | |
| "loss": 0.0193, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.291958041958042, | |
| "grad_norm": 7.929760456085205, | |
| "learning_rate": 9.751470778482987e-06, | |
| "loss": 0.0352, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.2928321678321678, | |
| "grad_norm": 4.623815536499023, | |
| "learning_rate": 9.749215132627078e-06, | |
| "loss": 0.0883, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2937062937062937, | |
| "grad_norm": 11.840864181518555, | |
| "learning_rate": 9.746949560136012e-06, | |
| "loss": 0.3566, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2945804195804196, | |
| "grad_norm": 15.384440422058105, | |
| "learning_rate": 9.74467406574521e-06, | |
| "loss": 0.2527, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.29545454545454547, | |
| "grad_norm": 10.941631317138672, | |
| "learning_rate": 9.742388654210822e-06, | |
| "loss": 0.0982, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.29632867132867136, | |
| "grad_norm": 4.305505275726318, | |
| "learning_rate": 9.740093330309735e-06, | |
| "loss": 0.0242, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.2972027972027972, | |
| "grad_norm": 5.7103071212768555, | |
| "learning_rate": 9.737788098839549e-06, | |
| "loss": 0.0265, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2980769230769231, | |
| "grad_norm": 4.577970027923584, | |
| "learning_rate": 9.735472964618575e-06, | |
| "loss": 0.0288, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.29895104895104896, | |
| "grad_norm": 10.164754867553711, | |
| "learning_rate": 9.73314793248582e-06, | |
| "loss": 0.227, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.29982517482517484, | |
| "grad_norm": 12.016974449157715, | |
| "learning_rate": 9.730813007300984e-06, | |
| "loss": 0.3492, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.3006993006993007, | |
| "grad_norm": 10.356107711791992, | |
| "learning_rate": 9.72846819394444e-06, | |
| "loss": 0.095, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.30157342657342656, | |
| "grad_norm": 5.022082328796387, | |
| "learning_rate": 9.72611349731723e-06, | |
| "loss": 0.0306, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.30244755244755245, | |
| "grad_norm": 3.5736045837402344, | |
| "learning_rate": 9.723748922341055e-06, | |
| "loss": 0.0261, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.30332167832167833, | |
| "grad_norm": 2.7318332195281982, | |
| "learning_rate": 9.721374473958263e-06, | |
| "loss": 0.0217, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.3041958041958042, | |
| "grad_norm": 16.001564025878906, | |
| "learning_rate": 9.718990157131841e-06, | |
| "loss": 0.3135, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.30506993006993005, | |
| "grad_norm": 21.409624099731445, | |
| "learning_rate": 9.716595976845396e-06, | |
| "loss": 0.3394, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.30594405594405594, | |
| "grad_norm": 15.650821685791016, | |
| "learning_rate": 9.71419193810316e-06, | |
| "loss": 0.2161, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3068181818181818, | |
| "grad_norm": 4.666651725769043, | |
| "learning_rate": 9.711778045929962e-06, | |
| "loss": 0.0259, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 5.207132339477539, | |
| "learning_rate": 9.709354305371237e-06, | |
| "loss": 0.0317, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.30856643356643354, | |
| "grad_norm": 3.6765620708465576, | |
| "learning_rate": 9.706920721492995e-06, | |
| "loss": 0.0217, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3094405594405594, | |
| "grad_norm": 23.28678321838379, | |
| "learning_rate": 9.704477299381822e-06, | |
| "loss": 0.3548, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3103146853146853, | |
| "grad_norm": 13.751917839050293, | |
| "learning_rate": 9.70202404414487e-06, | |
| "loss": 0.3695, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3111888111888112, | |
| "grad_norm": 18.74825668334961, | |
| "learning_rate": 9.699560960909847e-06, | |
| "loss": 0.2066, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3120629370629371, | |
| "grad_norm": 16.306304931640625, | |
| "learning_rate": 9.697088054824995e-06, | |
| "loss": 0.0519, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3129370629370629, | |
| "grad_norm": 11.421350479125977, | |
| "learning_rate": 9.694605331059094e-06, | |
| "loss": 0.0544, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.3138111888111888, | |
| "grad_norm": 10.214865684509277, | |
| "learning_rate": 9.69211279480144e-06, | |
| "loss": 0.0362, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.3146853146853147, | |
| "grad_norm": 11.61394214630127, | |
| "learning_rate": 9.689610451261841e-06, | |
| "loss": 0.1856, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3155594405594406, | |
| "grad_norm": 27.110857009887695, | |
| "learning_rate": 9.687098305670606e-06, | |
| "loss": 0.4011, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.31643356643356646, | |
| "grad_norm": 16.320880889892578, | |
| "learning_rate": 9.684576363278526e-06, | |
| "loss": 0.3078, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3173076923076923, | |
| "grad_norm": 13.66405963897705, | |
| "learning_rate": 9.682044629356874e-06, | |
| "loss": 0.093, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3181818181818182, | |
| "grad_norm": 6.054985046386719, | |
| "learning_rate": 9.67950310919739e-06, | |
| "loss": 0.0359, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.31905594405594406, | |
| "grad_norm": 4.00903844833374, | |
| "learning_rate": 9.676951808112263e-06, | |
| "loss": 0.0291, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.31993006993006995, | |
| "grad_norm": 9.689427375793457, | |
| "learning_rate": 9.674390731434129e-06, | |
| "loss": 0.0295, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3208041958041958, | |
| "grad_norm": 12.514798164367676, | |
| "learning_rate": 9.671819884516057e-06, | |
| "loss": 0.3548, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.32167832167832167, | |
| "grad_norm": 16.211997985839844, | |
| "learning_rate": 9.669239272731538e-06, | |
| "loss": 0.3533, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.32255244755244755, | |
| "grad_norm": 18.064395904541016, | |
| "learning_rate": 9.66664890147447e-06, | |
| "loss": 0.168, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.32342657342657344, | |
| "grad_norm": 1.859338641166687, | |
| "learning_rate": 9.664048776159153e-06, | |
| "loss": 0.0246, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3243006993006993, | |
| "grad_norm": 1.7575695514678955, | |
| "learning_rate": 9.661438902220274e-06, | |
| "loss": 0.0252, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.32517482517482516, | |
| "grad_norm": 4.70383358001709, | |
| "learning_rate": 9.658819285112894e-06, | |
| "loss": 0.0317, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.32604895104895104, | |
| "grad_norm": 13.62159252166748, | |
| "learning_rate": 9.656189930312443e-06, | |
| "loss": 0.2622, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.3269230769230769, | |
| "grad_norm": 14.757182121276855, | |
| "learning_rate": 9.653550843314701e-06, | |
| "loss": 0.3407, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.3277972027972028, | |
| "grad_norm": 17.184926986694336, | |
| "learning_rate": 9.650902029635789e-06, | |
| "loss": 0.1394, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.32867132867132864, | |
| "grad_norm": 1.8275545835494995, | |
| "learning_rate": 9.648243494812161e-06, | |
| "loss": 0.0202, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.32954545454545453, | |
| "grad_norm": 4.400397777557373, | |
| "learning_rate": 9.64557524440059e-06, | |
| "loss": 0.0215, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.3304195804195804, | |
| "grad_norm": 4.888149261474609, | |
| "learning_rate": 9.642897283978157e-06, | |
| "loss": 0.019, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.3312937062937063, | |
| "grad_norm": 8.976241111755371, | |
| "learning_rate": 9.640209619142232e-06, | |
| "loss": 0.2292, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.3321678321678322, | |
| "grad_norm": 19.7901554107666, | |
| "learning_rate": 9.637512255510475e-06, | |
| "loss": 0.3571, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.333041958041958, | |
| "grad_norm": 14.594758987426758, | |
| "learning_rate": 9.634805198720816e-06, | |
| "loss": 0.2542, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.3339160839160839, | |
| "grad_norm": 10.740225791931152, | |
| "learning_rate": 9.632088454431448e-06, | |
| "loss": 0.048, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.3347902097902098, | |
| "grad_norm": 3.904679775238037, | |
| "learning_rate": 9.629362028320808e-06, | |
| "loss": 0.0269, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.3356643356643357, | |
| "grad_norm": 8.046211242675781, | |
| "learning_rate": 9.626625926087574e-06, | |
| "loss": 0.0319, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.33653846153846156, | |
| "grad_norm": 3.6598877906799316, | |
| "learning_rate": 9.623880153450645e-06, | |
| "loss": 0.0443, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3374125874125874, | |
| "grad_norm": 20.70134925842285, | |
| "learning_rate": 9.621124716149132e-06, | |
| "loss": 0.3784, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3382867132867133, | |
| "grad_norm": 15.787075996398926, | |
| "learning_rate": 9.618359619942354e-06, | |
| "loss": 0.3128, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.33916083916083917, | |
| "grad_norm": 9.562203407287598, | |
| "learning_rate": 9.615584870609809e-06, | |
| "loss": 0.0773, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.34003496503496505, | |
| "grad_norm": 5.507321834564209, | |
| "learning_rate": 9.612800473951179e-06, | |
| "loss": 0.0263, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.3409090909090909, | |
| "grad_norm": 5.733173370361328, | |
| "learning_rate": 9.610006435786306e-06, | |
| "loss": 0.0222, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.34178321678321677, | |
| "grad_norm": 4.45504093170166, | |
| "learning_rate": 9.607202761955188e-06, | |
| "loss": 0.0206, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.34265734265734266, | |
| "grad_norm": 14.974676132202148, | |
| "learning_rate": 9.604389458317958e-06, | |
| "loss": 0.2932, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.34353146853146854, | |
| "grad_norm": 17.133859634399414, | |
| "learning_rate": 9.601566530754882e-06, | |
| "loss": 0.3208, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.34440559440559443, | |
| "grad_norm": 23.01615333557129, | |
| "learning_rate": 9.598733985166342e-06, | |
| "loss": 0.1291, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.34527972027972026, | |
| "grad_norm": 7.733895301818848, | |
| "learning_rate": 9.595891827472815e-06, | |
| "loss": 0.0257, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.34615384615384615, | |
| "grad_norm": 6.829917907714844, | |
| "learning_rate": 9.59304006361488e-06, | |
| "loss": 0.0202, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.34702797202797203, | |
| "grad_norm": 5.945058822631836, | |
| "learning_rate": 9.590178699553186e-06, | |
| "loss": 0.0305, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.3479020979020979, | |
| "grad_norm": 7.486079692840576, | |
| "learning_rate": 9.587307741268452e-06, | |
| "loss": 0.2337, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3487762237762238, | |
| "grad_norm": 9.138191223144531, | |
| "learning_rate": 9.584427194761452e-06, | |
| "loss": 0.3317, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.34965034965034963, | |
| "grad_norm": 16.70454216003418, | |
| "learning_rate": 9.581537066052996e-06, | |
| "loss": 0.1657, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3505244755244755, | |
| "grad_norm": 9.92362117767334, | |
| "learning_rate": 9.578637361183922e-06, | |
| "loss": 0.0393, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.3513986013986014, | |
| "grad_norm": 6.719553470611572, | |
| "learning_rate": 9.575728086215093e-06, | |
| "loss": 0.0266, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3522727272727273, | |
| "grad_norm": 6.581875324249268, | |
| "learning_rate": 9.572809247227366e-06, | |
| "loss": 0.0821, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.3531468531468531, | |
| "grad_norm": 12.581414222717285, | |
| "learning_rate": 9.569880850321588e-06, | |
| "loss": 0.3399, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.354020979020979, | |
| "grad_norm": 10.872791290283203, | |
| "learning_rate": 9.566942901618593e-06, | |
| "loss": 0.3341, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.3548951048951049, | |
| "grad_norm": 12.547783851623535, | |
| "learning_rate": 9.56399540725917e-06, | |
| "loss": 0.1436, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.3557692307692308, | |
| "grad_norm": 5.812360763549805, | |
| "learning_rate": 9.561038373404062e-06, | |
| "loss": 0.0393, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.35664335664335667, | |
| "grad_norm": 4.367349147796631, | |
| "learning_rate": 9.558071806233955e-06, | |
| "loss": 0.0225, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3575174825174825, | |
| "grad_norm": 3.215648651123047, | |
| "learning_rate": 9.55509571194946e-06, | |
| "loss": 0.0201, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.3583916083916084, | |
| "grad_norm": 8.4769287109375, | |
| "learning_rate": 9.552110096771095e-06, | |
| "loss": 0.0724, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.35926573426573427, | |
| "grad_norm": 9.1715087890625, | |
| "learning_rate": 9.549114966939288e-06, | |
| "loss": 0.3416, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.36013986013986016, | |
| "grad_norm": 12.174132347106934, | |
| "learning_rate": 9.546110328714348e-06, | |
| "loss": 0.2997, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.361013986013986, | |
| "grad_norm": 9.850729942321777, | |
| "learning_rate": 9.54309618837646e-06, | |
| "loss": 0.1195, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.3618881118881119, | |
| "grad_norm": 2.96714448928833, | |
| "learning_rate": 9.54007255222567e-06, | |
| "loss": 0.0215, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.36276223776223776, | |
| "grad_norm": 4.077871322631836, | |
| "learning_rate": 9.537039426581868e-06, | |
| "loss": 0.0252, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 4.6943359375, | |
| "learning_rate": 9.533996817784786e-06, | |
| "loss": 0.0291, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.36451048951048953, | |
| "grad_norm": 14.766371726989746, | |
| "learning_rate": 9.53094473219397e-06, | |
| "loss": 0.2869, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.36538461538461536, | |
| "grad_norm": 10.966767311096191, | |
| "learning_rate": 9.52788317618878e-06, | |
| "loss": 0.3403, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.36625874125874125, | |
| "grad_norm": 14.03886604309082, | |
| "learning_rate": 9.524812156168364e-06, | |
| "loss": 0.0842, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.36713286713286714, | |
| "grad_norm": 4.030780792236328, | |
| "learning_rate": 9.52173167855166e-06, | |
| "loss": 0.0184, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.368006993006993, | |
| "grad_norm": 4.559696197509766, | |
| "learning_rate": 9.518641749777366e-06, | |
| "loss": 0.0275, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.3688811188811189, | |
| "grad_norm": 5.692295074462891, | |
| "learning_rate": 9.515542376303942e-06, | |
| "loss": 0.0302, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.36975524475524474, | |
| "grad_norm": 11.976212501525879, | |
| "learning_rate": 9.512433564609578e-06, | |
| "loss": 0.2843, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.3706293706293706, | |
| "grad_norm": 14.001188278198242, | |
| "learning_rate": 9.509315321192203e-06, | |
| "loss": 0.3338, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.3715034965034965, | |
| "grad_norm": 13.774450302124023, | |
| "learning_rate": 9.506187652569455e-06, | |
| "loss": 0.2052, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.3723776223776224, | |
| "grad_norm": 2.97493314743042, | |
| "learning_rate": 9.50305056527867e-06, | |
| "loss": 0.0166, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.3732517482517482, | |
| "grad_norm": 2.0962185859680176, | |
| "learning_rate": 9.499904065876872e-06, | |
| "loss": 0.0149, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.3741258741258741, | |
| "grad_norm": 4.462445259094238, | |
| "learning_rate": 9.496748160940762e-06, | |
| "loss": 0.022, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 6.745120048522949, | |
| "learning_rate": 9.493582857066694e-06, | |
| "loss": 0.1226, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.3758741258741259, | |
| "grad_norm": 12.13577651977539, | |
| "learning_rate": 9.490408160870671e-06, | |
| "loss": 0.3253, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3767482517482518, | |
| "grad_norm": 10.356282234191895, | |
| "learning_rate": 9.487224078988326e-06, | |
| "loss": 0.2551, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.3776223776223776, | |
| "grad_norm": 6.351311206817627, | |
| "learning_rate": 9.484030618074912e-06, | |
| "loss": 0.0444, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.3784965034965035, | |
| "grad_norm": 3.7853026390075684, | |
| "learning_rate": 9.480827784805278e-06, | |
| "loss": 0.0196, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.3793706293706294, | |
| "grad_norm": 6.178353786468506, | |
| "learning_rate": 9.477615585873877e-06, | |
| "loss": 0.027, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.38024475524475526, | |
| "grad_norm": 10.301407814025879, | |
| "learning_rate": 9.474394027994722e-06, | |
| "loss": 0.0247, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.3811188811188811, | |
| "grad_norm": 46.66459655761719, | |
| "learning_rate": 9.471163117901398e-06, | |
| "loss": 0.3238, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.381993006993007, | |
| "grad_norm": 10.651971817016602, | |
| "learning_rate": 9.467922862347037e-06, | |
| "loss": 0.2915, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.38286713286713286, | |
| "grad_norm": 8.504287719726562, | |
| "learning_rate": 9.464673268104299e-06, | |
| "loss": 0.0585, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.38374125874125875, | |
| "grad_norm": 4.71248722076416, | |
| "learning_rate": 9.461414341965365e-06, | |
| "loss": 0.0194, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 8.104900360107422, | |
| "learning_rate": 9.458146090741929e-06, | |
| "loss": 0.028, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.38548951048951047, | |
| "grad_norm": 5.508533954620361, | |
| "learning_rate": 9.454868521265164e-06, | |
| "loss": 0.0836, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.38636363636363635, | |
| "grad_norm": 18.34882926940918, | |
| "learning_rate": 9.451581640385727e-06, | |
| "loss": 0.3699, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.38723776223776224, | |
| "grad_norm": 12.612072944641113, | |
| "learning_rate": 9.448285454973739e-06, | |
| "loss": 0.2886, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.3881118881118881, | |
| "grad_norm": 13.686249732971191, | |
| "learning_rate": 9.44497997191876e-06, | |
| "loss": 0.0991, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.388986013986014, | |
| "grad_norm": 7.197528839111328, | |
| "learning_rate": 9.441665198129792e-06, | |
| "loss": 0.022, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.38986013986013984, | |
| "grad_norm": 7.797658920288086, | |
| "learning_rate": 9.438341140535256e-06, | |
| "loss": 0.0238, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.39073426573426573, | |
| "grad_norm": 9.259930610656738, | |
| "learning_rate": 9.435007806082971e-06, | |
| "loss": 0.0354, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.3916083916083916, | |
| "grad_norm": 9.313063621520996, | |
| "learning_rate": 9.431665201740154e-06, | |
| "loss": 0.1674, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.3924825174825175, | |
| "grad_norm": 9.840208053588867, | |
| "learning_rate": 9.428313334493394e-06, | |
| "loss": 0.3309, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.39335664335664333, | |
| "grad_norm": 9.283288955688477, | |
| "learning_rate": 9.424952211348636e-06, | |
| "loss": 0.1454, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3942307692307692, | |
| "grad_norm": 3.111521005630493, | |
| "learning_rate": 9.42158183933118e-06, | |
| "loss": 0.0268, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.3951048951048951, | |
| "grad_norm": 8.377511978149414, | |
| "learning_rate": 9.418202225485654e-06, | |
| "loss": 0.0301, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.395979020979021, | |
| "grad_norm": 10.577919006347656, | |
| "learning_rate": 9.414813376876003e-06, | |
| "loss": 0.0345, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3968531468531469, | |
| "grad_norm": 11.329242706298828, | |
| "learning_rate": 9.411415300585471e-06, | |
| "loss": 0.1777, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3977272727272727, | |
| "grad_norm": 14.575852394104004, | |
| "learning_rate": 9.408008003716595e-06, | |
| "loss": 0.3388, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3986013986013986, | |
| "grad_norm": 10.720549583435059, | |
| "learning_rate": 9.404591493391181e-06, | |
| "loss": 0.2089, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.3994755244755245, | |
| "grad_norm": 7.143903732299805, | |
| "learning_rate": 9.401165776750294e-06, | |
| "loss": 0.0403, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.40034965034965037, | |
| "grad_norm": 4.975497245788574, | |
| "learning_rate": 9.397730860954242e-06, | |
| "loss": 0.0208, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.4012237762237762, | |
| "grad_norm": 2.2027719020843506, | |
| "learning_rate": 9.394286753182558e-06, | |
| "loss": 0.0145, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.4020979020979021, | |
| "grad_norm": 8.32071304321289, | |
| "learning_rate": 9.39083346063399e-06, | |
| "loss": 0.0835, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.40297202797202797, | |
| "grad_norm": 15.139922142028809, | |
| "learning_rate": 9.387370990526485e-06, | |
| "loss": 0.3228, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.40384615384615385, | |
| "grad_norm": 16.493928909301758, | |
| "learning_rate": 9.38389935009717e-06, | |
| "loss": 0.2458, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.40472027972027974, | |
| "grad_norm": 10.39638614654541, | |
| "learning_rate": 9.38041854660234e-06, | |
| "loss": 0.0654, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.40559440559440557, | |
| "grad_norm": 3.3049631118774414, | |
| "learning_rate": 9.37692858731744e-06, | |
| "loss": 0.015, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.40646853146853146, | |
| "grad_norm": 7.281655311584473, | |
| "learning_rate": 9.373429479537061e-06, | |
| "loss": 0.0275, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.40734265734265734, | |
| "grad_norm": 3.981154441833496, | |
| "learning_rate": 9.369921230574905e-06, | |
| "loss": 0.0305, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.40821678321678323, | |
| "grad_norm": 5.4708170890808105, | |
| "learning_rate": 9.366403847763788e-06, | |
| "loss": 0.0886, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.4090909090909091, | |
| "grad_norm": 11.091739654541016, | |
| "learning_rate": 9.362877338455611e-06, | |
| "loss": 0.3132, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.40996503496503495, | |
| "grad_norm": 14.433770179748535, | |
| "learning_rate": 9.359341710021357e-06, | |
| "loss": 0.1135, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.41083916083916083, | |
| "grad_norm": 4.00241231918335, | |
| "learning_rate": 9.355796969851066e-06, | |
| "loss": 0.0209, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4117132867132867, | |
| "grad_norm": 8.080660820007324, | |
| "learning_rate": 9.352243125353825e-06, | |
| "loss": 0.029, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.4125874125874126, | |
| "grad_norm": 6.013932704925537, | |
| "learning_rate": 9.348680183957748e-06, | |
| "loss": 0.0273, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.41346153846153844, | |
| "grad_norm": 11.60200309753418, | |
| "learning_rate": 9.345108153109963e-06, | |
| "loss": 0.1821, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.4143356643356643, | |
| "grad_norm": 15.039935111999512, | |
| "learning_rate": 9.3415270402766e-06, | |
| "loss": 0.3468, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.4152097902097902, | |
| "grad_norm": 19.306856155395508, | |
| "learning_rate": 9.33793685294277e-06, | |
| "loss": 0.1206, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.4160839160839161, | |
| "grad_norm": 4.838857173919678, | |
| "learning_rate": 9.334337598612549e-06, | |
| "loss": 0.0238, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.416958041958042, | |
| "grad_norm": 2.250865936279297, | |
| "learning_rate": 9.330729284808967e-06, | |
| "loss": 0.0122, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.4178321678321678, | |
| "grad_norm": 3.8062644004821777, | |
| "learning_rate": 9.327111919073988e-06, | |
| "loss": 0.0205, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.4187062937062937, | |
| "grad_norm": 9.404979705810547, | |
| "learning_rate": 9.3234855089685e-06, | |
| "loss": 0.1837, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.4195804195804196, | |
| "grad_norm": 9.634592056274414, | |
| "learning_rate": 9.319850062072289e-06, | |
| "loss": 0.2971, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.42045454545454547, | |
| "grad_norm": 11.955926895141602, | |
| "learning_rate": 9.316205585984035e-06, | |
| "loss": 0.1856, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.42132867132867136, | |
| "grad_norm": 6.618170738220215, | |
| "learning_rate": 9.312552088321287e-06, | |
| "loss": 0.0177, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.4222027972027972, | |
| "grad_norm": 1.8442251682281494, | |
| "learning_rate": 9.308889576720453e-06, | |
| "loss": 0.0203, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.4230769230769231, | |
| "grad_norm": 6.175897598266602, | |
| "learning_rate": 9.305218058836778e-06, | |
| "loss": 0.0201, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.42395104895104896, | |
| "grad_norm": 6.660616874694824, | |
| "learning_rate": 9.301537542344337e-06, | |
| "loss": 0.0246, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.42482517482517484, | |
| "grad_norm": 10.686960220336914, | |
| "learning_rate": 9.297848034936007e-06, | |
| "loss": 0.3182, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4256993006993007, | |
| "grad_norm": 12.791363716125488, | |
| "learning_rate": 9.294149544323462e-06, | |
| "loss": 0.3255, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.42657342657342656, | |
| "grad_norm": 14.707098007202148, | |
| "learning_rate": 9.290442078237154e-06, | |
| "loss": 0.0942, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.42744755244755245, | |
| "grad_norm": 5.705606937408447, | |
| "learning_rate": 9.28672564442629e-06, | |
| "loss": 0.0188, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.42832167832167833, | |
| "grad_norm": 5.5619683265686035, | |
| "learning_rate": 9.283000250658824e-06, | |
| "loss": 0.0226, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4291958041958042, | |
| "grad_norm": 3.309412717819214, | |
| "learning_rate": 9.279265904721438e-06, | |
| "loss": 0.0138, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.43006993006993005, | |
| "grad_norm": 7.550408363342285, | |
| "learning_rate": 9.275522614419522e-06, | |
| "loss": 0.1292, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.43094405594405594, | |
| "grad_norm": 13.313096046447754, | |
| "learning_rate": 9.271770387577168e-06, | |
| "loss": 0.334, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.4318181818181818, | |
| "grad_norm": 7.744288921356201, | |
| "learning_rate": 9.26800923203714e-06, | |
| "loss": 0.132, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.4326923076923077, | |
| "grad_norm": 3.6891541481018066, | |
| "learning_rate": 9.26423915566087e-06, | |
| "loss": 0.0097, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.43356643356643354, | |
| "grad_norm": 4.717189788818359, | |
| "learning_rate": 9.26046016632843e-06, | |
| "loss": 0.0206, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.4344405594405594, | |
| "grad_norm": 3.2968766689300537, | |
| "learning_rate": 9.256672271938527e-06, | |
| "loss": 0.0137, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.4353146853146853, | |
| "grad_norm": 14.217388153076172, | |
| "learning_rate": 9.252875480408479e-06, | |
| "loss": 0.2592, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.4361888111888112, | |
| "grad_norm": 17.267000198364258, | |
| "learning_rate": 9.2490697996742e-06, | |
| "loss": 0.3311, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.4370629370629371, | |
| "grad_norm": 21.242443084716797, | |
| "learning_rate": 9.245255237690182e-06, | |
| "loss": 0.1707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4379370629370629, | |
| "grad_norm": 5.021553039550781, | |
| "learning_rate": 9.241431802429484e-06, | |
| "loss": 0.0147, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.4388111888111888, | |
| "grad_norm": 3.196441888809204, | |
| "learning_rate": 9.237599501883711e-06, | |
| "loss": 0.013, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.4396853146853147, | |
| "grad_norm": 3.9709372520446777, | |
| "learning_rate": 9.233758344062996e-06, | |
| "loss": 0.029, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.4405594405594406, | |
| "grad_norm": 10.392723083496094, | |
| "learning_rate": 9.229908336995986e-06, | |
| "loss": 0.1646, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.44143356643356646, | |
| "grad_norm": 10.8936767578125, | |
| "learning_rate": 9.226049488729825e-06, | |
| "loss": 0.3277, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.4423076923076923, | |
| "grad_norm": 15.315105438232422, | |
| "learning_rate": 9.222181807330135e-06, | |
| "loss": 0.1962, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.4431818181818182, | |
| "grad_norm": 6.867097854614258, | |
| "learning_rate": 9.218305300881004e-06, | |
| "loss": 0.0354, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.44405594405594406, | |
| "grad_norm": 11.064175605773926, | |
| "learning_rate": 9.21441997748496e-06, | |
| "loss": 0.0302, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.44493006993006995, | |
| "grad_norm": 3.0181567668914795, | |
| "learning_rate": 9.210525845262966e-06, | |
| "loss": 0.0185, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.4458041958041958, | |
| "grad_norm": 3.8957598209381104, | |
| "learning_rate": 9.206622912354395e-06, | |
| "loss": 0.0161, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.44667832167832167, | |
| "grad_norm": 10.6497220993042, | |
| "learning_rate": 9.202711186917011e-06, | |
| "loss": 0.2501, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.44755244755244755, | |
| "grad_norm": 9.581523895263672, | |
| "learning_rate": 9.198790677126959e-06, | |
| "loss": 0.3042, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.44842657342657344, | |
| "grad_norm": 11.025154113769531, | |
| "learning_rate": 9.194861391178749e-06, | |
| "loss": 0.0634, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.4493006993006993, | |
| "grad_norm": 3.7636709213256836, | |
| "learning_rate": 9.190923337285225e-06, | |
| "loss": 0.0167, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.45017482517482516, | |
| "grad_norm": 4.047549724578857, | |
| "learning_rate": 9.186976523677567e-06, | |
| "loss": 0.0133, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.45104895104895104, | |
| "grad_norm": 2.4612019062042236, | |
| "learning_rate": 9.183020958605258e-06, | |
| "loss": 0.0157, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.4519230769230769, | |
| "grad_norm": 8.521439552307129, | |
| "learning_rate": 9.179056650336074e-06, | |
| "loss": 0.2934, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.4527972027972028, | |
| "grad_norm": 12.02381420135498, | |
| "learning_rate": 9.175083607156067e-06, | |
| "loss": 0.326, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.45367132867132864, | |
| "grad_norm": 11.01368236541748, | |
| "learning_rate": 9.171101837369549e-06, | |
| "loss": 0.102, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 1.831136703491211, | |
| "learning_rate": 9.167111349299065e-06, | |
| "loss": 0.015, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4554195804195804, | |
| "grad_norm": 4.148126125335693, | |
| "learning_rate": 9.16311215128539e-06, | |
| "loss": 0.0141, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.4562937062937063, | |
| "grad_norm": 6.365659713745117, | |
| "learning_rate": 9.159104251687498e-06, | |
| "loss": 0.0294, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.4571678321678322, | |
| "grad_norm": 11.267927169799805, | |
| "learning_rate": 9.155087658882555e-06, | |
| "loss": 0.3188, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.458041958041958, | |
| "grad_norm": 11.216529846191406, | |
| "learning_rate": 9.151062381265897e-06, | |
| "loss": 0.3106, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.4589160839160839, | |
| "grad_norm": 12.907726287841797, | |
| "learning_rate": 9.14702842725101e-06, | |
| "loss": 0.1507, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.4597902097902098, | |
| "grad_norm": 6.234944820404053, | |
| "learning_rate": 9.142985805269516e-06, | |
| "loss": 0.0401, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.4606643356643357, | |
| "grad_norm": 5.195436954498291, | |
| "learning_rate": 9.138934523771157e-06, | |
| "loss": 0.016, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 4.245481014251709, | |
| "learning_rate": 9.134874591223773e-06, | |
| "loss": 0.0184, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.4624125874125874, | |
| "grad_norm": 6.0341291427612305, | |
| "learning_rate": 9.130806016113283e-06, | |
| "loss": 0.0107, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.4632867132867133, | |
| "grad_norm": 10.869454383850098, | |
| "learning_rate": 9.126728806943676e-06, | |
| "loss": 0.2808, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.46416083916083917, | |
| "grad_norm": 9.308477401733398, | |
| "learning_rate": 9.122642972236983e-06, | |
| "loss": 0.2343, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.46503496503496505, | |
| "grad_norm": 8.253998756408691, | |
| "learning_rate": 9.11854852053327e-06, | |
| "loss": 0.0515, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.4659090909090909, | |
| "grad_norm": 3.3931519985198975, | |
| "learning_rate": 9.114445460390605e-06, | |
| "loss": 0.0173, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.46678321678321677, | |
| "grad_norm": 3.145233631134033, | |
| "learning_rate": 9.110333800385056e-06, | |
| "loss": 0.0078, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.46765734265734266, | |
| "grad_norm": 9.047693252563477, | |
| "learning_rate": 9.10621354911066e-06, | |
| "loss": 0.1088, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.46853146853146854, | |
| "grad_norm": 18.222139358520508, | |
| "learning_rate": 9.102084715179423e-06, | |
| "loss": 0.3272, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.46940559440559443, | |
| "grad_norm": 14.845891952514648, | |
| "learning_rate": 9.097947307221274e-06, | |
| "loss": 0.2607, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.47027972027972026, | |
| "grad_norm": 18.673412322998047, | |
| "learning_rate": 9.093801333884076e-06, | |
| "loss": 0.0831, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.47115384615384615, | |
| "grad_norm": 5.903540134429932, | |
| "learning_rate": 9.089646803833589e-06, | |
| "loss": 0.0232, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.47202797202797203, | |
| "grad_norm": 4.757245063781738, | |
| "learning_rate": 9.085483725753458e-06, | |
| "loss": 0.0182, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4729020979020979, | |
| "grad_norm": 9.735963821411133, | |
| "learning_rate": 9.081312108345199e-06, | |
| "loss": 0.0921, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.4737762237762238, | |
| "grad_norm": 15.792946815490723, | |
| "learning_rate": 9.07713196032817e-06, | |
| "loss": 0.3199, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.47465034965034963, | |
| "grad_norm": 13.27805233001709, | |
| "learning_rate": 9.072943290439566e-06, | |
| "loss": 0.3297, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.4755244755244755, | |
| "grad_norm": 10.719405174255371, | |
| "learning_rate": 9.06874610743439e-06, | |
| "loss": 0.0497, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.4763986013986014, | |
| "grad_norm": 3.7867608070373535, | |
| "learning_rate": 9.064540420085438e-06, | |
| "loss": 0.0287, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.4772727272727273, | |
| "grad_norm": 2.8741486072540283, | |
| "learning_rate": 9.060326237183286e-06, | |
| "loss": 0.0139, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.4781468531468531, | |
| "grad_norm": 2.266998529434204, | |
| "learning_rate": 9.056103567536264e-06, | |
| "loss": 0.0218, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.479020979020979, | |
| "grad_norm": 9.35477066040039, | |
| "learning_rate": 9.051872419970439e-06, | |
| "loss": 0.2034, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.4798951048951049, | |
| "grad_norm": 7.704215049743652, | |
| "learning_rate": 9.047632803329602e-06, | |
| "loss": 0.3229, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.4807692307692308, | |
| "grad_norm": 14.413203239440918, | |
| "learning_rate": 9.043384726475244e-06, | |
| "loss": 0.1735, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.48164335664335667, | |
| "grad_norm": 7.794137477874756, | |
| "learning_rate": 9.039128198286538e-06, | |
| "loss": 0.0181, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.4825174825174825, | |
| "grad_norm": 5.036735534667969, | |
| "learning_rate": 9.034863227660326e-06, | |
| "loss": 0.0337, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.4833916083916084, | |
| "grad_norm": 11.02287483215332, | |
| "learning_rate": 9.03058982351109e-06, | |
| "loss": 0.0387, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.48426573426573427, | |
| "grad_norm": 8.359135627746582, | |
| "learning_rate": 9.026307994770946e-06, | |
| "loss": 0.1734, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.48513986013986016, | |
| "grad_norm": 12.85606861114502, | |
| "learning_rate": 9.022017750389611e-06, | |
| "loss": 0.3206, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.486013986013986, | |
| "grad_norm": 11.453328132629395, | |
| "learning_rate": 9.0177190993344e-06, | |
| "loss": 0.1628, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.4868881118881119, | |
| "grad_norm": 8.016172409057617, | |
| "learning_rate": 9.013412050590193e-06, | |
| "loss": 0.0388, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.48776223776223776, | |
| "grad_norm": 3.9730803966522217, | |
| "learning_rate": 9.009096613159426e-06, | |
| "loss": 0.0181, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.48863636363636365, | |
| "grad_norm": 3.319014072418213, | |
| "learning_rate": 9.00477279606207e-06, | |
| "loss": 0.0217, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.48951048951048953, | |
| "grad_norm": 7.716640472412109, | |
| "learning_rate": 9.000440608335604e-06, | |
| "loss": 0.0669, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.49038461538461536, | |
| "grad_norm": 13.569385528564453, | |
| "learning_rate": 8.996100059035012e-06, | |
| "loss": 0.2975, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.49125874125874125, | |
| "grad_norm": 12.194028854370117, | |
| "learning_rate": 8.99175115723275e-06, | |
| "loss": 0.2975, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.49213286713286714, | |
| "grad_norm": 8.394533157348633, | |
| "learning_rate": 8.987393912018732e-06, | |
| "loss": 0.0476, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.493006993006993, | |
| "grad_norm": 4.18928861618042, | |
| "learning_rate": 8.983028332500314e-06, | |
| "loss": 0.025, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.4938811188811189, | |
| "grad_norm": 6.169031143188477, | |
| "learning_rate": 8.978654427802267e-06, | |
| "loss": 0.0201, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.49475524475524474, | |
| "grad_norm": 1.4415744543075562, | |
| "learning_rate": 8.974272207066767e-06, | |
| "loss": 0.0112, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.4956293706293706, | |
| "grad_norm": 18.2711124420166, | |
| "learning_rate": 8.969881679453372e-06, | |
| "loss": 0.3302, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.4965034965034965, | |
| "grad_norm": 14.267679214477539, | |
| "learning_rate": 8.965482854139003e-06, | |
| "loss": 0.3346, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4973776223776224, | |
| "grad_norm": 13.41834545135498, | |
| "learning_rate": 8.961075740317919e-06, | |
| "loss": 0.0692, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.4982517482517482, | |
| "grad_norm": 3.7099716663360596, | |
| "learning_rate": 8.956660347201711e-06, | |
| "loss": 0.0258, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.4991258741258741, | |
| "grad_norm": 6.250145435333252, | |
| "learning_rate": 8.95223668401927e-06, | |
| "loss": 0.0265, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 6.77029275894165, | |
| "learning_rate": 8.947804760016778e-06, | |
| "loss": 0.037, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.5008741258741258, | |
| "grad_norm": 16.265125274658203, | |
| "learning_rate": 8.943364584457675e-06, | |
| "loss": 0.297, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.5017482517482518, | |
| "grad_norm": 18.595430374145508, | |
| "learning_rate": 8.938916166622654e-06, | |
| "loss": 0.3284, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.5026223776223776, | |
| "grad_norm": 13.660833358764648, | |
| "learning_rate": 8.934459515809638e-06, | |
| "loss": 0.1424, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5034965034965035, | |
| "grad_norm": 5.335470199584961, | |
| "learning_rate": 8.929994641333748e-06, | |
| "loss": 0.0269, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5043706293706294, | |
| "grad_norm": 3.783602476119995, | |
| "learning_rate": 8.92552155252731e-06, | |
| "loss": 0.0156, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.5052447552447552, | |
| "grad_norm": 2.616309642791748, | |
| "learning_rate": 8.921040258739804e-06, | |
| "loss": 0.0141, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.5061188811188811, | |
| "grad_norm": 12.275871276855469, | |
| "learning_rate": 8.916550769337866e-06, | |
| "loss": 0.2118, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.506993006993007, | |
| "grad_norm": 17.26217269897461, | |
| "learning_rate": 8.912053093705265e-06, | |
| "loss": 0.3094, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5078671328671329, | |
| "grad_norm": 17.747581481933594, | |
| "learning_rate": 8.907547241242874e-06, | |
| "loss": 0.2316, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.5087412587412588, | |
| "grad_norm": 9.435482025146484, | |
| "learning_rate": 8.903033221368662e-06, | |
| "loss": 0.047, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.5096153846153846, | |
| "grad_norm": 4.142625331878662, | |
| "learning_rate": 8.898511043517668e-06, | |
| "loss": 0.0297, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.5104895104895105, | |
| "grad_norm": 2.7383079528808594, | |
| "learning_rate": 8.893980717141983e-06, | |
| "loss": 0.0094, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.5113636363636364, | |
| "grad_norm": 2.9339723587036133, | |
| "learning_rate": 8.889442251710728e-06, | |
| "loss": 0.0152, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.5122377622377622, | |
| "grad_norm": 18.222333908081055, | |
| "learning_rate": 8.884895656710034e-06, | |
| "loss": 0.329, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.5131118881118881, | |
| "grad_norm": 12.440513610839844, | |
| "learning_rate": 8.88034094164303e-06, | |
| "loss": 0.2579, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.513986013986014, | |
| "grad_norm": 11.368993759155273, | |
| "learning_rate": 8.875778116029816e-06, | |
| "loss": 0.0838, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.5148601398601399, | |
| "grad_norm": 1.7992818355560303, | |
| "learning_rate": 8.871207189407441e-06, | |
| "loss": 0.0099, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.5157342657342657, | |
| "grad_norm": 1.8870066404342651, | |
| "learning_rate": 8.866628171329885e-06, | |
| "loss": 0.012, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5166083916083916, | |
| "grad_norm": 3.4279234409332275, | |
| "learning_rate": 8.862041071368048e-06, | |
| "loss": 0.0367, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.5174825174825175, | |
| "grad_norm": 12.216835975646973, | |
| "learning_rate": 8.857445899109716e-06, | |
| "loss": 0.3495, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5183566433566433, | |
| "grad_norm": 9.349586486816406, | |
| "learning_rate": 8.85284266415955e-06, | |
| "loss": 0.2921, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.5192307692307693, | |
| "grad_norm": 13.595704078674316, | |
| "learning_rate": 8.84823137613906e-06, | |
| "loss": 0.0609, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5201048951048951, | |
| "grad_norm": 4.4822001457214355, | |
| "learning_rate": 8.843612044686594e-06, | |
| "loss": 0.0216, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5209790209790209, | |
| "grad_norm": 2.7091450691223145, | |
| "learning_rate": 8.838984679457308e-06, | |
| "loss": 0.0097, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.5218531468531469, | |
| "grad_norm": 3.3083577156066895, | |
| "learning_rate": 8.834349290123152e-06, | |
| "loss": 0.0141, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.5227272727272727, | |
| "grad_norm": 16.26782989501953, | |
| "learning_rate": 8.829705886372845e-06, | |
| "loss": 0.2572, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5236013986013986, | |
| "grad_norm": 13.288602828979492, | |
| "learning_rate": 8.825054477911861e-06, | |
| "loss": 0.3221, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.5244755244755245, | |
| "grad_norm": 21.329805374145508, | |
| "learning_rate": 8.820395074462403e-06, | |
| "loss": 0.0986, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5253496503496503, | |
| "grad_norm": 5.134272575378418, | |
| "learning_rate": 8.815727685763383e-06, | |
| "loss": 0.0177, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.5262237762237763, | |
| "grad_norm": 4.260464668273926, | |
| "learning_rate": 8.811052321570405e-06, | |
| "loss": 0.0152, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.5270979020979021, | |
| "grad_norm": 7.355073928833008, | |
| "learning_rate": 8.806368991655747e-06, | |
| "loss": 0.0184, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.527972027972028, | |
| "grad_norm": 6.010684490203857, | |
| "learning_rate": 8.801677705808333e-06, | |
| "loss": 0.0721, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.5288461538461539, | |
| "grad_norm": 16.951820373535156, | |
| "learning_rate": 8.796978473833712e-06, | |
| "loss": 0.3193, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.5297202797202797, | |
| "grad_norm": 12.741106986999512, | |
| "learning_rate": 8.792271305554054e-06, | |
| "loss": 0.214, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.5305944055944056, | |
| "grad_norm": 8.974580764770508, | |
| "learning_rate": 8.787556210808101e-06, | |
| "loss": 0.04, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.5314685314685315, | |
| "grad_norm": 4.283096790313721, | |
| "learning_rate": 8.782833199451177e-06, | |
| "loss": 0.0237, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.5323426573426573, | |
| "grad_norm": 4.1386799812316895, | |
| "learning_rate": 8.778102281355143e-06, | |
| "loss": 0.0193, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.5332167832167832, | |
| "grad_norm": 3.923788547515869, | |
| "learning_rate": 8.773363466408393e-06, | |
| "loss": 0.0365, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5340909090909091, | |
| "grad_norm": 27.081817626953125, | |
| "learning_rate": 8.768616764515822e-06, | |
| "loss": 0.2997, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.534965034965035, | |
| "grad_norm": 15.323077201843262, | |
| "learning_rate": 8.763862185598814e-06, | |
| "loss": 0.2383, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5358391608391608, | |
| "grad_norm": 8.706354141235352, | |
| "learning_rate": 8.759099739595215e-06, | |
| "loss": 0.0542, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.5367132867132867, | |
| "grad_norm": 1.2458351850509644, | |
| "learning_rate": 8.754329436459313e-06, | |
| "loss": 0.0079, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.5375874125874126, | |
| "grad_norm": 8.164717674255371, | |
| "learning_rate": 8.749551286161824e-06, | |
| "loss": 0.0255, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 3.613734245300293, | |
| "learning_rate": 8.744765298689859e-06, | |
| "loss": 0.0215, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.5393356643356644, | |
| "grad_norm": 12.16817855834961, | |
| "learning_rate": 8.739971484046913e-06, | |
| "loss": 0.2652, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.5402097902097902, | |
| "grad_norm": 13.061808586120605, | |
| "learning_rate": 8.735169852252848e-06, | |
| "loss": 0.3201, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.541083916083916, | |
| "grad_norm": 14.585681915283203, | |
| "learning_rate": 8.73036041334385e-06, | |
| "loss": 0.1009, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.541958041958042, | |
| "grad_norm": 1.6156708002090454, | |
| "learning_rate": 8.725543177372435e-06, | |
| "loss": 0.0099, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5428321678321678, | |
| "grad_norm": 5.239449501037598, | |
| "learning_rate": 8.720718154407413e-06, | |
| "loss": 0.0181, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.5437062937062938, | |
| "grad_norm": 6.884598255157471, | |
| "learning_rate": 8.715885354533871e-06, | |
| "loss": 0.032, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.5445804195804196, | |
| "grad_norm": 4.45266580581665, | |
| "learning_rate": 8.71104478785315e-06, | |
| "loss": 0.1454, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 15.045814514160156, | |
| "learning_rate": 8.706196464482821e-06, | |
| "loss": 0.3334, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.5463286713286714, | |
| "grad_norm": 14.02426815032959, | |
| "learning_rate": 8.701340394556677e-06, | |
| "loss": 0.186, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.5472027972027972, | |
| "grad_norm": 4.265748977661133, | |
| "learning_rate": 8.696476588224691e-06, | |
| "loss": 0.0099, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.5480769230769231, | |
| "grad_norm": 3.733309507369995, | |
| "learning_rate": 8.691605055653018e-06, | |
| "loss": 0.027, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.548951048951049, | |
| "grad_norm": 3.797884225845337, | |
| "learning_rate": 8.686725807023955e-06, | |
| "loss": 0.0251, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.5498251748251748, | |
| "grad_norm": 4.356199741363525, | |
| "learning_rate": 8.681838852535928e-06, | |
| "loss": 0.0777, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.5506993006993007, | |
| "grad_norm": 10.192365646362305, | |
| "learning_rate": 8.67694420240347e-06, | |
| "loss": 0.3088, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5515734265734266, | |
| "grad_norm": 10.459392547607422, | |
| "learning_rate": 8.672041866857198e-06, | |
| "loss": 0.1515, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.5524475524475524, | |
| "grad_norm": 7.445313453674316, | |
| "learning_rate": 8.667131856143793e-06, | |
| "loss": 0.0265, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.5533216783216783, | |
| "grad_norm": 3.1520612239837646, | |
| "learning_rate": 8.662214180525982e-06, | |
| "loss": 0.0179, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.5541958041958042, | |
| "grad_norm": 4.792072296142578, | |
| "learning_rate": 8.657288850282508e-06, | |
| "loss": 0.0204, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.5550699300699301, | |
| "grad_norm": 6.317907333374023, | |
| "learning_rate": 8.652355875708118e-06, | |
| "loss": 0.1076, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5559440559440559, | |
| "grad_norm": 16.156579971313477, | |
| "learning_rate": 8.647415267113533e-06, | |
| "loss": 0.3175, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.5568181818181818, | |
| "grad_norm": 7.539283275604248, | |
| "learning_rate": 8.64246703482543e-06, | |
| "loss": 0.2469, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.5576923076923077, | |
| "grad_norm": 10.567534446716309, | |
| "learning_rate": 8.637511189186425e-06, | |
| "loss": 0.0727, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5585664335664335, | |
| "grad_norm": 4.3527703285217285, | |
| "learning_rate": 8.632547740555044e-06, | |
| "loss": 0.0272, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.5594405594405595, | |
| "grad_norm": 2.90842342376709, | |
| "learning_rate": 8.627576699305703e-06, | |
| "loss": 0.0152, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5603146853146853, | |
| "grad_norm": 3.240353584289551, | |
| "learning_rate": 8.622598075828692e-06, | |
| "loss": 0.0144, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.5611888111888111, | |
| "grad_norm": 15.213561058044434, | |
| "learning_rate": 8.61761188053015e-06, | |
| "loss": 0.2274, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5620629370629371, | |
| "grad_norm": 18.974872589111328, | |
| "learning_rate": 8.612618123832033e-06, | |
| "loss": 0.3185, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.5629370629370629, | |
| "grad_norm": 12.162973403930664, | |
| "learning_rate": 8.607616816172112e-06, | |
| "loss": 0.0834, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.5638111888111889, | |
| "grad_norm": 4.089027404785156, | |
| "learning_rate": 8.602607968003935e-06, | |
| "loss": 0.0228, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5646853146853147, | |
| "grad_norm": 5.622372150421143, | |
| "learning_rate": 8.597591589796816e-06, | |
| "loss": 0.0205, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5655594405594405, | |
| "grad_norm": 2.607387065887451, | |
| "learning_rate": 8.5925676920358e-06, | |
| "loss": 0.0212, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.5664335664335665, | |
| "grad_norm": 10.069025039672852, | |
| "learning_rate": 8.587536285221656e-06, | |
| "loss": 0.2098, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.5673076923076923, | |
| "grad_norm": 11.376680374145508, | |
| "learning_rate": 8.582497379870846e-06, | |
| "loss": 0.3261, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.5681818181818182, | |
| "grad_norm": 22.801218032836914, | |
| "learning_rate": 8.577450986515505e-06, | |
| "loss": 0.1493, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5690559440559441, | |
| "grad_norm": 2.2024729251861572, | |
| "learning_rate": 8.57239711570342e-06, | |
| "loss": 0.0142, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.5699300699300699, | |
| "grad_norm": 4.151445388793945, | |
| "learning_rate": 8.567335777998003e-06, | |
| "loss": 0.0322, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.5708041958041958, | |
| "grad_norm": 2.4866418838500977, | |
| "learning_rate": 8.562266983978278e-06, | |
| "loss": 0.0163, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.5716783216783217, | |
| "grad_norm": 2.902174949645996, | |
| "learning_rate": 8.557190744238854e-06, | |
| "loss": 0.0222, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.5725524475524476, | |
| "grad_norm": 1320.8974609375, | |
| "learning_rate": 8.5521070693899e-06, | |
| "loss": 0.3591, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5734265734265734, | |
| "grad_norm": 14.313606262207031, | |
| "learning_rate": 8.547015970057125e-06, | |
| "loss": 0.1869, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.5743006993006993, | |
| "grad_norm": 11.474748611450195, | |
| "learning_rate": 8.54191745688176e-06, | |
| "loss": 0.0618, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.5751748251748252, | |
| "grad_norm": 6.402536869049072, | |
| "learning_rate": 8.536811540520529e-06, | |
| "loss": 0.0241, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.576048951048951, | |
| "grad_norm": 4.930122375488281, | |
| "learning_rate": 8.531698231645631e-06, | |
| "loss": 0.0165, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 4.849780559539795, | |
| "learning_rate": 8.526577540944718e-06, | |
| "loss": 0.0517, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5777972027972028, | |
| "grad_norm": 12.813011169433594, | |
| "learning_rate": 8.521449479120866e-06, | |
| "loss": 0.2966, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.5786713286713286, | |
| "grad_norm": 8.656574249267578, | |
| "learning_rate": 8.516314056892565e-06, | |
| "loss": 0.2311, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5795454545454546, | |
| "grad_norm": 7.297996520996094, | |
| "learning_rate": 8.511171284993686e-06, | |
| "loss": 0.0564, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.5804195804195804, | |
| "grad_norm": 4.41420316696167, | |
| "learning_rate": 8.506021174173463e-06, | |
| "loss": 0.0188, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.5812937062937062, | |
| "grad_norm": 3.025160551071167, | |
| "learning_rate": 8.500863735196462e-06, | |
| "loss": 0.0258, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5821678321678322, | |
| "grad_norm": 3.6793315410614014, | |
| "learning_rate": 8.495698978842582e-06, | |
| "loss": 0.0246, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.583041958041958, | |
| "grad_norm": 15.452958106994629, | |
| "learning_rate": 8.490526915907001e-06, | |
| "loss": 0.3023, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.583916083916084, | |
| "grad_norm": 14.039949417114258, | |
| "learning_rate": 8.485347557200177e-06, | |
| "loss": 0.3056, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.5847902097902098, | |
| "grad_norm": 13.447628021240234, | |
| "learning_rate": 8.480160913547815e-06, | |
| "loss": 0.0796, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.5856643356643356, | |
| "grad_norm": 2.245462656021118, | |
| "learning_rate": 8.474966995790848e-06, | |
| "loss": 0.0115, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5865384615384616, | |
| "grad_norm": 5.804445743560791, | |
| "learning_rate": 8.46976581478541e-06, | |
| "loss": 0.0247, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.5874125874125874, | |
| "grad_norm": 1.934926986694336, | |
| "learning_rate": 8.46455738140282e-06, | |
| "loss": 0.0121, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.5882867132867133, | |
| "grad_norm": 5.4213948249816895, | |
| "learning_rate": 8.459341706529557e-06, | |
| "loss": 0.1654, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.5891608391608392, | |
| "grad_norm": 15.305550575256348, | |
| "learning_rate": 8.454118801067229e-06, | |
| "loss": 0.3116, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.590034965034965, | |
| "grad_norm": 11.948801040649414, | |
| "learning_rate": 8.448888675932563e-06, | |
| "loss": 0.0994, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5909090909090909, | |
| "grad_norm": 1.4484741687774658, | |
| "learning_rate": 8.443651342057377e-06, | |
| "loss": 0.0067, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5917832167832168, | |
| "grad_norm": 7.12942361831665, | |
| "learning_rate": 8.438406810388549e-06, | |
| "loss": 0.0241, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.5926573426573427, | |
| "grad_norm": 5.489367485046387, | |
| "learning_rate": 8.43315509188801e-06, | |
| "loss": 0.0251, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.5935314685314685, | |
| "grad_norm": 4.826040744781494, | |
| "learning_rate": 8.42789619753271e-06, | |
| "loss": 0.1216, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.5944055944055944, | |
| "grad_norm": 17.75969886779785, | |
| "learning_rate": 8.422630138314595e-06, | |
| "loss": 0.2945, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5952797202797203, | |
| "grad_norm": 13.765280723571777, | |
| "learning_rate": 8.417356925240587e-06, | |
| "loss": 0.1571, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.5961538461538461, | |
| "grad_norm": 9.371541976928711, | |
| "learning_rate": 8.412076569332568e-06, | |
| "loss": 0.0404, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.597027972027972, | |
| "grad_norm": 5.822971343994141, | |
| "learning_rate": 8.406789081627337e-06, | |
| "loss": 0.017, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.5979020979020979, | |
| "grad_norm": 3.478846311569214, | |
| "learning_rate": 8.401494473176614e-06, | |
| "loss": 0.0138, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5987762237762237, | |
| "grad_norm": 3.4902524948120117, | |
| "learning_rate": 8.39619275504699e-06, | |
| "loss": 0.021, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5996503496503497, | |
| "grad_norm": 20.912506103515625, | |
| "learning_rate": 8.390883938319922e-06, | |
| "loss": 0.3059, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.6005244755244755, | |
| "grad_norm": 14.454429626464844, | |
| "learning_rate": 8.38556803409171e-06, | |
| "loss": 0.2583, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.6013986013986014, | |
| "grad_norm": 9.838977813720703, | |
| "learning_rate": 8.380245053473452e-06, | |
| "loss": 0.0327, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.6022727272727273, | |
| "grad_norm": 3.255235433578491, | |
| "learning_rate": 8.374915007591053e-06, | |
| "loss": 0.0138, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.6031468531468531, | |
| "grad_norm": 7.5467681884765625, | |
| "learning_rate": 8.36957790758518e-06, | |
| "loss": 0.0278, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6040209790209791, | |
| "grad_norm": 7.938135147094727, | |
| "learning_rate": 8.36423376461124e-06, | |
| "loss": 0.0298, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.6048951048951049, | |
| "grad_norm": 7.151315689086914, | |
| "learning_rate": 8.358882589839365e-06, | |
| "loss": 0.1949, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.6057692307692307, | |
| "grad_norm": 9.231287002563477, | |
| "learning_rate": 8.353524394454388e-06, | |
| "loss": 0.2325, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.6066433566433567, | |
| "grad_norm": 12.112785339355469, | |
| "learning_rate": 8.348159189655809e-06, | |
| "loss": 0.0363, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.6075174825174825, | |
| "grad_norm": 4.6648268699646, | |
| "learning_rate": 8.342786986657781e-06, | |
| "loss": 0.0224, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.6083916083916084, | |
| "grad_norm": 6.105136871337891, | |
| "learning_rate": 8.337407796689088e-06, | |
| "loss": 0.031, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.6092657342657343, | |
| "grad_norm": 4.181456565856934, | |
| "learning_rate": 8.332021630993115e-06, | |
| "loss": 0.0201, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.6101398601398601, | |
| "grad_norm": 12.197380065917969, | |
| "learning_rate": 8.326628500827826e-06, | |
| "loss": 0.2164, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.611013986013986, | |
| "grad_norm": 14.289386749267578, | |
| "learning_rate": 8.321228417465747e-06, | |
| "loss": 0.3106, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.6118881118881119, | |
| "grad_norm": 11.324996948242188, | |
| "learning_rate": 8.315821392193932e-06, | |
| "loss": 0.1494, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6127622377622378, | |
| "grad_norm": 3.6152749061584473, | |
| "learning_rate": 8.310407436313947e-06, | |
| "loss": 0.0152, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.6136363636363636, | |
| "grad_norm": 4.090268611907959, | |
| "learning_rate": 8.304986561141844e-06, | |
| "loss": 0.017, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.6145104895104895, | |
| "grad_norm": 3.0178444385528564, | |
| "learning_rate": 8.299558778008137e-06, | |
| "loss": 0.0129, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.9255274534225464, | |
| "learning_rate": 8.294124098257782e-06, | |
| "loss": 0.0107, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.6162587412587412, | |
| "grad_norm": 18.309310913085938, | |
| "learning_rate": 8.288682533250139e-06, | |
| "loss": 0.3289, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.6171328671328671, | |
| "grad_norm": 12.632488250732422, | |
| "learning_rate": 8.283234094358976e-06, | |
| "loss": 0.1573, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.618006993006993, | |
| "grad_norm": 5.997402191162109, | |
| "learning_rate": 8.277778792972417e-06, | |
| "loss": 0.0226, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.6188811188811189, | |
| "grad_norm": 2.9253907203674316, | |
| "learning_rate": 8.272316640492932e-06, | |
| "loss": 0.0197, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.6197552447552448, | |
| "grad_norm": 4.174371242523193, | |
| "learning_rate": 8.266847648337312e-06, | |
| "loss": 0.0251, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.6206293706293706, | |
| "grad_norm": 7.968226909637451, | |
| "learning_rate": 8.261371827936645e-06, | |
| "loss": 0.0767, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6215034965034965, | |
| "grad_norm": 14.977242469787598, | |
| "learning_rate": 8.255889190736288e-06, | |
| "loss": 0.2903, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.6223776223776224, | |
| "grad_norm": 8.295584678649902, | |
| "learning_rate": 8.25039974819585e-06, | |
| "loss": 0.1982, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.6232517482517482, | |
| "grad_norm": 12.30835247039795, | |
| "learning_rate": 8.244903511789158e-06, | |
| "loss": 0.0708, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.6241258741258742, | |
| "grad_norm": 4.109304428100586, | |
| "learning_rate": 8.239400493004249e-06, | |
| "loss": 0.0242, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 5.172191619873047, | |
| "learning_rate": 8.233890703343329e-06, | |
| "loss": 0.022, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.6258741258741258, | |
| "grad_norm": 4.665693283081055, | |
| "learning_rate": 8.228374154322755e-06, | |
| "loss": 0.016, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.6267482517482518, | |
| "grad_norm": 6.270390510559082, | |
| "learning_rate": 8.22285085747302e-06, | |
| "loss": 0.2092, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.6276223776223776, | |
| "grad_norm": 8.817326545715332, | |
| "learning_rate": 8.217320824338713e-06, | |
| "loss": 0.2758, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.6284965034965035, | |
| "grad_norm": 11.41317367553711, | |
| "learning_rate": 8.21178406647851e-06, | |
| "loss": 0.0707, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.6293706293706294, | |
| "grad_norm": 5.5788445472717285, | |
| "learning_rate": 8.206240595465137e-06, | |
| "loss": 0.0084, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6302447552447552, | |
| "grad_norm": 3.08897066116333, | |
| "learning_rate": 8.20069042288535e-06, | |
| "loss": 0.0123, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.6311188811188811, | |
| "grad_norm": 9.162919998168945, | |
| "learning_rate": 8.19513356033992e-06, | |
| "loss": 0.0339, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.631993006993007, | |
| "grad_norm": 7.19411563873291, | |
| "learning_rate": 8.189570019443597e-06, | |
| "loss": 0.218, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.6328671328671329, | |
| "grad_norm": 10.532979011535645, | |
| "learning_rate": 8.18399981182509e-06, | |
| "loss": 0.2822, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.6337412587412588, | |
| "grad_norm": 10.911194801330566, | |
| "learning_rate": 8.178422949127041e-06, | |
| "loss": 0.1251, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.6346153846153846, | |
| "grad_norm": 2.906049966812134, | |
| "learning_rate": 8.172839443006006e-06, | |
| "loss": 0.0132, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.6354895104895105, | |
| "grad_norm": 3.061995267868042, | |
| "learning_rate": 8.167249305132423e-06, | |
| "loss": 0.0134, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 4.562097072601318, | |
| "learning_rate": 8.161652547190593e-06, | |
| "loss": 0.0294, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.6372377622377622, | |
| "grad_norm": 6.103448867797852, | |
| "learning_rate": 8.156049180878653e-06, | |
| "loss": 0.0314, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.6381118881118881, | |
| "grad_norm": 16.161893844604492, | |
| "learning_rate": 8.150439217908557e-06, | |
| "loss": 0.2637, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.638986013986014, | |
| "grad_norm": 13.73505973815918, | |
| "learning_rate": 8.14482267000604e-06, | |
| "loss": 0.1979, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.6398601398601399, | |
| "grad_norm": 7.310273170471191, | |
| "learning_rate": 8.139199548910605e-06, | |
| "loss": 0.069, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.6407342657342657, | |
| "grad_norm": 6.33121395111084, | |
| "learning_rate": 8.133569866375497e-06, | |
| "loss": 0.0315, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.6416083916083916, | |
| "grad_norm": 2.1470510959625244, | |
| "learning_rate": 8.127933634167666e-06, | |
| "loss": 0.0137, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.6424825174825175, | |
| "grad_norm": 5.463751316070557, | |
| "learning_rate": 8.122290864067762e-06, | |
| "loss": 0.1036, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.6433566433566433, | |
| "grad_norm": 16.222488403320312, | |
| "learning_rate": 8.116641567870093e-06, | |
| "loss": 0.2884, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.6442307692307693, | |
| "grad_norm": 9.267319679260254, | |
| "learning_rate": 8.110985757382614e-06, | |
| "loss": 0.184, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.6451048951048951, | |
| "grad_norm": 5.381446838378906, | |
| "learning_rate": 8.105323444426891e-06, | |
| "loss": 0.0305, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.6459790209790209, | |
| "grad_norm": 3.3439786434173584, | |
| "learning_rate": 8.099654640838081e-06, | |
| "loss": 0.0252, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.6468531468531469, | |
| "grad_norm": 3.0296132564544678, | |
| "learning_rate": 8.093979358464912e-06, | |
| "loss": 0.0201, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6477272727272727, | |
| "grad_norm": 3.4436793327331543, | |
| "learning_rate": 8.088297609169648e-06, | |
| "loss": 0.046, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.6486013986013986, | |
| "grad_norm": 11.34019660949707, | |
| "learning_rate": 8.082609404828075e-06, | |
| "loss": 0.3081, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.6494755244755245, | |
| "grad_norm": 16.66128158569336, | |
| "learning_rate": 8.076914757329467e-06, | |
| "loss": 0.2532, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.6503496503496503, | |
| "grad_norm": 10.066668510437012, | |
| "learning_rate": 8.07121367857657e-06, | |
| "loss": 0.0582, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.6512237762237763, | |
| "grad_norm": 2.1656241416931152, | |
| "learning_rate": 8.065506180485566e-06, | |
| "loss": 0.02, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.6520979020979021, | |
| "grad_norm": 6.971703052520752, | |
| "learning_rate": 8.059792274986062e-06, | |
| "loss": 0.0275, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.652972027972028, | |
| "grad_norm": 3.100680112838745, | |
| "learning_rate": 8.05407197402105e-06, | |
| "loss": 0.0223, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.6538461538461539, | |
| "grad_norm": 10.498968124389648, | |
| "learning_rate": 8.048345289546895e-06, | |
| "loss": 0.1368, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.6547202797202797, | |
| "grad_norm": 14.528643608093262, | |
| "learning_rate": 8.042612233533302e-06, | |
| "loss": 0.2966, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.6555944055944056, | |
| "grad_norm": 16.252084732055664, | |
| "learning_rate": 8.036872817963296e-06, | |
| "loss": 0.1938, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6564685314685315, | |
| "grad_norm": 3.0863349437713623, | |
| "learning_rate": 8.031127054833192e-06, | |
| "loss": 0.0128, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.6573426573426573, | |
| "grad_norm": 2.377596139907837, | |
| "learning_rate": 8.02537495615257e-06, | |
| "loss": 0.0155, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.6582167832167832, | |
| "grad_norm": 4.360049724578857, | |
| "learning_rate": 8.01961653394426e-06, | |
| "loss": 0.0129, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.6590909090909091, | |
| "grad_norm": 8.342607498168945, | |
| "learning_rate": 8.013851800244301e-06, | |
| "loss": 0.1426, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.659965034965035, | |
| "grad_norm": 15.271238327026367, | |
| "learning_rate": 8.008080767101932e-06, | |
| "loss": 0.2883, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.6608391608391608, | |
| "grad_norm": 15.89466381072998, | |
| "learning_rate": 8.002303446579549e-06, | |
| "loss": 0.1893, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.6617132867132867, | |
| "grad_norm": 4.381922245025635, | |
| "learning_rate": 7.996519850752702e-06, | |
| "loss": 0.0139, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.6625874125874126, | |
| "grad_norm": 3.8654425144195557, | |
| "learning_rate": 7.990729991710046e-06, | |
| "loss": 0.0218, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.6634615384615384, | |
| "grad_norm": 2.470996618270874, | |
| "learning_rate": 7.984933881553332e-06, | |
| "loss": 0.021, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.6643356643356644, | |
| "grad_norm": 7.659494400024414, | |
| "learning_rate": 7.97913153239738e-06, | |
| "loss": 0.1373, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6652097902097902, | |
| "grad_norm": 7.245584964752197, | |
| "learning_rate": 7.973322956370043e-06, | |
| "loss": 0.2819, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.666083916083916, | |
| "grad_norm": 9.463001251220703, | |
| "learning_rate": 7.967508165612197e-06, | |
| "loss": 0.2398, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.666958041958042, | |
| "grad_norm": 7.6854248046875, | |
| "learning_rate": 7.9616871722777e-06, | |
| "loss": 0.0323, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.6678321678321678, | |
| "grad_norm": 3.1720035076141357, | |
| "learning_rate": 7.955859988533385e-06, | |
| "loss": 0.0121, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.6687062937062938, | |
| "grad_norm": 2.6548798084259033, | |
| "learning_rate": 7.950026626559014e-06, | |
| "loss": 0.013, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6695804195804196, | |
| "grad_norm": 2.829648017883301, | |
| "learning_rate": 7.944187098547263e-06, | |
| "loss": 0.0188, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.6704545454545454, | |
| "grad_norm": 6.0343546867370605, | |
| "learning_rate": 7.938341416703703e-06, | |
| "loss": 0.1306, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.6713286713286714, | |
| "grad_norm": 9.436321258544922, | |
| "learning_rate": 7.932489593246764e-06, | |
| "loss": 0.3017, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6722027972027972, | |
| "grad_norm": 7.504520416259766, | |
| "learning_rate": 7.926631640407711e-06, | |
| "loss": 0.0545, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.6730769230769231, | |
| "grad_norm": 4.290851593017578, | |
| "learning_rate": 7.920767570430622e-06, | |
| "loss": 0.016, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.673951048951049, | |
| "grad_norm": 2.721652030944824, | |
| "learning_rate": 7.914897395572362e-06, | |
| "loss": 0.015, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.6748251748251748, | |
| "grad_norm": 4.208317279815674, | |
| "learning_rate": 7.909021128102552e-06, | |
| "loss": 0.0139, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.6756993006993007, | |
| "grad_norm": 11.704246520996094, | |
| "learning_rate": 7.903138780303556e-06, | |
| "loss": 0.2335, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.6765734265734266, | |
| "grad_norm": 13.73259449005127, | |
| "learning_rate": 7.897250364470435e-06, | |
| "loss": 0.3015, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.6774475524475524, | |
| "grad_norm": 9.910913467407227, | |
| "learning_rate": 7.891355892910946e-06, | |
| "loss": 0.1305, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6783216783216783, | |
| "grad_norm": 2.0368525981903076, | |
| "learning_rate": 7.885455377945492e-06, | |
| "loss": 0.0189, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.6791958041958042, | |
| "grad_norm": 7.256038665771484, | |
| "learning_rate": 7.879548831907115e-06, | |
| "loss": 0.0286, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.6800699300699301, | |
| "grad_norm": 2.8705687522888184, | |
| "learning_rate": 7.873636267141463e-06, | |
| "loss": 0.0204, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.6809440559440559, | |
| "grad_norm": 13.25925350189209, | |
| "learning_rate": 7.867717696006757e-06, | |
| "loss": 0.166, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 14.789470672607422, | |
| "learning_rate": 7.861793130873779e-06, | |
| "loss": 0.3105, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6826923076923077, | |
| "grad_norm": 14.244874000549316, | |
| "learning_rate": 7.85586258412584e-06, | |
| "loss": 0.2234, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.6835664335664335, | |
| "grad_norm": 4.365936279296875, | |
| "learning_rate": 7.84992606815875e-06, | |
| "loss": 0.0213, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.6844405594405595, | |
| "grad_norm": 3.639108419418335, | |
| "learning_rate": 7.843983595380793e-06, | |
| "loss": 0.0178, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.6853146853146853, | |
| "grad_norm": 3.1062943935394287, | |
| "learning_rate": 7.838035178212713e-06, | |
| "loss": 0.0144, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.6861888111888111, | |
| "grad_norm": 3.8421266078948975, | |
| "learning_rate": 7.83208082908767e-06, | |
| "loss": 0.044, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6870629370629371, | |
| "grad_norm": 11.614649772644043, | |
| "learning_rate": 7.826120560451228e-06, | |
| "loss": 0.2971, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.6879370629370629, | |
| "grad_norm": 10.628190994262695, | |
| "learning_rate": 7.820154384761319e-06, | |
| "loss": 0.2131, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.6888111888111889, | |
| "grad_norm": 6.863769054412842, | |
| "learning_rate": 7.814182314488225e-06, | |
| "loss": 0.0296, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.6896853146853147, | |
| "grad_norm": 5.237539768218994, | |
| "learning_rate": 7.80820436211455e-06, | |
| "loss": 0.0338, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.6905594405594405, | |
| "grad_norm": 5.266608715057373, | |
| "learning_rate": 7.80222054013519e-06, | |
| "loss": 0.0396, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6914335664335665, | |
| "grad_norm": 2.744847297668457, | |
| "learning_rate": 7.79623086105731e-06, | |
| "loss": 0.0209, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 14.346212387084961, | |
| "learning_rate": 7.790235337400319e-06, | |
| "loss": 0.3028, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.6931818181818182, | |
| "grad_norm": 9.34296703338623, | |
| "learning_rate": 7.784233981695835e-06, | |
| "loss": 0.2901, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.6940559440559441, | |
| "grad_norm": 10.783062934875488, | |
| "learning_rate": 7.778226806487678e-06, | |
| "loss": 0.0461, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.6949300699300699, | |
| "grad_norm": 2.3906545639038086, | |
| "learning_rate": 7.772213824331821e-06, | |
| "loss": 0.0108, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6958041958041958, | |
| "grad_norm": 2.7493884563446045, | |
| "learning_rate": 7.766195047796378e-06, | |
| "loss": 0.0142, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6966783216783217, | |
| "grad_norm": 3.7258777618408203, | |
| "learning_rate": 7.76017048946158e-06, | |
| "loss": 0.013, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.6975524475524476, | |
| "grad_norm": 8.554015159606934, | |
| "learning_rate": 7.754140161919732e-06, | |
| "loss": 0.152, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6984265734265734, | |
| "grad_norm": 12.41418743133545, | |
| "learning_rate": 7.748104077775208e-06, | |
| "loss": 0.2876, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.6993006993006993, | |
| "grad_norm": 14.932111740112305, | |
| "learning_rate": 7.742062249644404e-06, | |
| "loss": 0.119, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7001748251748252, | |
| "grad_norm": 3.058673620223999, | |
| "learning_rate": 7.736014690155732e-06, | |
| "loss": 0.018, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.701048951048951, | |
| "grad_norm": 2.909818410873413, | |
| "learning_rate": 7.729961411949579e-06, | |
| "loss": 0.0101, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.7019230769230769, | |
| "grad_norm": 6.851865768432617, | |
| "learning_rate": 7.72390242767828e-06, | |
| "loss": 0.0233, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.7027972027972028, | |
| "grad_norm": 7.350844860076904, | |
| "learning_rate": 7.717837750006106e-06, | |
| "loss": 0.1143, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.7036713286713286, | |
| "grad_norm": 14.948003768920898, | |
| "learning_rate": 7.711767391609225e-06, | |
| "loss": 0.3069, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.7045454545454546, | |
| "grad_norm": 20.419147491455078, | |
| "learning_rate": 7.705691365175672e-06, | |
| "loss": 0.156, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.7054195804195804, | |
| "grad_norm": 4.166325569152832, | |
| "learning_rate": 7.699609683405336e-06, | |
| "loss": 0.02, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.7062937062937062, | |
| "grad_norm": 2.2647218704223633, | |
| "learning_rate": 7.693522359009931e-06, | |
| "loss": 0.0132, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.7071678321678322, | |
| "grad_norm": 4.822109699249268, | |
| "learning_rate": 7.68742940471295e-06, | |
| "loss": 0.0228, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.708041958041958, | |
| "grad_norm": 3.1034998893737793, | |
| "learning_rate": 7.681330833249669e-06, | |
| "loss": 0.0263, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.708916083916084, | |
| "grad_norm": 9.458381652832031, | |
| "learning_rate": 7.675226657367095e-06, | |
| "loss": 0.2779, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.7097902097902098, | |
| "grad_norm": 17.289400100708008, | |
| "learning_rate": 7.669116889823955e-06, | |
| "loss": 0.2082, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.7106643356643356, | |
| "grad_norm": 7.215839862823486, | |
| "learning_rate": 7.663001543390657e-06, | |
| "loss": 0.0337, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.7115384615384616, | |
| "grad_norm": 2.5133867263793945, | |
| "learning_rate": 7.656880630849276e-06, | |
| "loss": 0.014, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.7124125874125874, | |
| "grad_norm": 2.4868149757385254, | |
| "learning_rate": 7.650754164993521e-06, | |
| "loss": 0.01, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.7132867132867133, | |
| "grad_norm": 7.082677364349365, | |
| "learning_rate": 7.644622158628701e-06, | |
| "loss": 0.1003, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.7141608391608392, | |
| "grad_norm": 8.927213668823242, | |
| "learning_rate": 7.638484624571715e-06, | |
| "loss": 0.2763, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.715034965034965, | |
| "grad_norm": 9.628310203552246, | |
| "learning_rate": 7.63234157565101e-06, | |
| "loss": 0.2558, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.7159090909090909, | |
| "grad_norm": 9.074616432189941, | |
| "learning_rate": 7.626193024706561e-06, | |
| "loss": 0.036, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.7167832167832168, | |
| "grad_norm": 4.605230331420898, | |
| "learning_rate": 7.620038984589841e-06, | |
| "loss": 0.0231, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7176573426573427, | |
| "grad_norm": 2.644078254699707, | |
| "learning_rate": 7.613879468163804e-06, | |
| "loss": 0.0174, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.7185314685314685, | |
| "grad_norm": 8.305257797241211, | |
| "learning_rate": 7.607714488302842e-06, | |
| "loss": 0.0324, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.7194055944055944, | |
| "grad_norm": 8.573472023010254, | |
| "learning_rate": 7.601544057892769e-06, | |
| "loss": 0.1456, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.7202797202797203, | |
| "grad_norm": 13.571534156799316, | |
| "learning_rate": 7.595368189830794e-06, | |
| "loss": 0.2828, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.7211538461538461, | |
| "grad_norm": 8.992969512939453, | |
| "learning_rate": 7.589186897025491e-06, | |
| "loss": 0.1569, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.722027972027972, | |
| "grad_norm": 3.1057798862457275, | |
| "learning_rate": 7.583000192396768e-06, | |
| "loss": 0.0195, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.7229020979020979, | |
| "grad_norm": 1.9561690092086792, | |
| "learning_rate": 7.576808088875849e-06, | |
| "loss": 0.0122, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.7237762237762237, | |
| "grad_norm": 3.478318929672241, | |
| "learning_rate": 7.570610599405242e-06, | |
| "loss": 0.0165, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.7246503496503497, | |
| "grad_norm": 11.972442626953125, | |
| "learning_rate": 7.5644077369387125e-06, | |
| "loss": 0.2327, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.7255244755244755, | |
| "grad_norm": 10.18558120727539, | |
| "learning_rate": 7.558199514441254e-06, | |
| "loss": 0.2941, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7263986013986014, | |
| "grad_norm": 15.375913619995117, | |
| "learning_rate": 7.551985944889068e-06, | |
| "loss": 0.1015, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 5.754219055175781, | |
| "learning_rate": 7.545767041269528e-06, | |
| "loss": 0.0181, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.7281468531468531, | |
| "grad_norm": 2.360522747039795, | |
| "learning_rate": 7.539542816581157e-06, | |
| "loss": 0.013, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.7290209790209791, | |
| "grad_norm": 4.416573524475098, | |
| "learning_rate": 7.533313283833603e-06, | |
| "loss": 0.0188, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.7298951048951049, | |
| "grad_norm": 4.017740249633789, | |
| "learning_rate": 7.527078456047605e-06, | |
| "loss": 0.0318, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.7307692307692307, | |
| "grad_norm": 11.504507064819336, | |
| "learning_rate": 7.520838346254975e-06, | |
| "loss": 0.3047, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.7316433566433567, | |
| "grad_norm": 11.946819305419922, | |
| "learning_rate": 7.5145929674985556e-06, | |
| "loss": 0.2445, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.7325174825174825, | |
| "grad_norm": 5.778463363647461, | |
| "learning_rate": 7.508342332832213e-06, | |
| "loss": 0.0335, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.7333916083916084, | |
| "grad_norm": 2.853790521621704, | |
| "learning_rate": 7.502086455320792e-06, | |
| "loss": 0.0096, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.7342657342657343, | |
| "grad_norm": 5.210794925689697, | |
| "learning_rate": 7.495825348040098e-06, | |
| "loss": 0.0191, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7351398601398601, | |
| "grad_norm": 6.486093044281006, | |
| "learning_rate": 7.489559024076869e-06, | |
| "loss": 0.0551, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.736013986013986, | |
| "grad_norm": 14.548480033874512, | |
| "learning_rate": 7.483287496528745e-06, | |
| "loss": 0.2874, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.7368881118881119, | |
| "grad_norm": 9.569380760192871, | |
| "learning_rate": 7.477010778504241e-06, | |
| "loss": 0.2758, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.7377622377622378, | |
| "grad_norm": 8.25275707244873, | |
| "learning_rate": 7.470728883122725e-06, | |
| "loss": 0.0355, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.7386363636363636, | |
| "grad_norm": 4.022761821746826, | |
| "learning_rate": 7.4644418235143835e-06, | |
| "loss": 0.0198, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.7395104895104895, | |
| "grad_norm": 3.3871257305145264, | |
| "learning_rate": 7.458149612820198e-06, | |
| "loss": 0.013, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.7403846153846154, | |
| "grad_norm": 5.009469032287598, | |
| "learning_rate": 7.451852264191914e-06, | |
| "loss": 0.0126, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.7412587412587412, | |
| "grad_norm": 13.273589134216309, | |
| "learning_rate": 7.445549790792021e-06, | |
| "loss": 0.1507, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.7421328671328671, | |
| "grad_norm": 14.326006889343262, | |
| "learning_rate": 7.439242205793718e-06, | |
| "loss": 0.2861, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.743006993006993, | |
| "grad_norm": 9.313475608825684, | |
| "learning_rate": 7.432929522380885e-06, | |
| "loss": 0.0611, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7438811188811189, | |
| "grad_norm": 3.4401090145111084, | |
| "learning_rate": 7.426611753748063e-06, | |
| "loss": 0.0207, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.7447552447552448, | |
| "grad_norm": 3.210700511932373, | |
| "learning_rate": 7.42028891310042e-06, | |
| "loss": 0.0108, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.7456293706293706, | |
| "grad_norm": 2.5403525829315186, | |
| "learning_rate": 7.413961013653725e-06, | |
| "loss": 0.0157, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.7465034965034965, | |
| "grad_norm": 5.771678447723389, | |
| "learning_rate": 7.407628068634321e-06, | |
| "loss": 0.0811, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.7473776223776224, | |
| "grad_norm": 8.880474090576172, | |
| "learning_rate": 7.4012900912790985e-06, | |
| "loss": 0.2818, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.7482517482517482, | |
| "grad_norm": 10.646051406860352, | |
| "learning_rate": 7.394947094835464e-06, | |
| "loss": 0.2309, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.7491258741258742, | |
| "grad_norm": 10.677642822265625, | |
| "learning_rate": 7.388599092561315e-06, | |
| "loss": 0.0488, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 4.501589775085449, | |
| "learning_rate": 7.3822460977250145e-06, | |
| "loss": 0.012, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.7508741258741258, | |
| "grad_norm": 2.511590003967285, | |
| "learning_rate": 7.375888123605359e-06, | |
| "loss": 0.0093, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.7517482517482518, | |
| "grad_norm": 9.10222053527832, | |
| "learning_rate": 7.369525183491553e-06, | |
| "loss": 0.0231, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7526223776223776, | |
| "grad_norm": 14.617132186889648, | |
| "learning_rate": 7.363157290683177e-06, | |
| "loss": 0.2781, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.7534965034965035, | |
| "grad_norm": 10.766728401184082, | |
| "learning_rate": 7.356784458490172e-06, | |
| "loss": 0.1911, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.7543706293706294, | |
| "grad_norm": 9.733813285827637, | |
| "learning_rate": 7.350406700232794e-06, | |
| "loss": 0.0534, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.7552447552447552, | |
| "grad_norm": 2.132784366607666, | |
| "learning_rate": 7.344024029241601e-06, | |
| "loss": 0.0104, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.7561188811188811, | |
| "grad_norm": 5.4743194580078125, | |
| "learning_rate": 7.3376364588574165e-06, | |
| "loss": 0.0259, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.756993006993007, | |
| "grad_norm": 3.495774745941162, | |
| "learning_rate": 7.331244002431308e-06, | |
| "loss": 0.0121, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.7578671328671329, | |
| "grad_norm": 9.002830505371094, | |
| "learning_rate": 7.324846673324551e-06, | |
| "loss": 0.1492, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.7587412587412588, | |
| "grad_norm": 9.361391067504883, | |
| "learning_rate": 7.318444484908606e-06, | |
| "loss": 0.2642, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.7596153846153846, | |
| "grad_norm": 9.56513786315918, | |
| "learning_rate": 7.312037450565098e-06, | |
| "loss": 0.0648, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.7604895104895105, | |
| "grad_norm": 4.027193546295166, | |
| "learning_rate": 7.305625583685771e-06, | |
| "loss": 0.0182, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7613636363636364, | |
| "grad_norm": 4.473756790161133, | |
| "learning_rate": 7.299208897672474e-06, | |
| "loss": 0.0159, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.7622377622377622, | |
| "grad_norm": 3.320054769515991, | |
| "learning_rate": 7.292787405937129e-06, | |
| "loss": 0.0155, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.7631118881118881, | |
| "grad_norm": 11.365391731262207, | |
| "learning_rate": 7.286361121901706e-06, | |
| "loss": 0.2686, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.763986013986014, | |
| "grad_norm": 14.833230972290039, | |
| "learning_rate": 7.27993005899818e-06, | |
| "loss": 0.2757, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.7648601398601399, | |
| "grad_norm": 9.428201675415039, | |
| "learning_rate": 7.2734942306685304e-06, | |
| "loss": 0.1099, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.7657342657342657, | |
| "grad_norm": 2.4826157093048096, | |
| "learning_rate": 7.267053650364686e-06, | |
| "loss": 0.0073, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.7666083916083916, | |
| "grad_norm": 2.8154211044311523, | |
| "learning_rate": 7.260608331548512e-06, | |
| "loss": 0.0244, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.7674825174825175, | |
| "grad_norm": 2.0929274559020996, | |
| "learning_rate": 7.254158287691775e-06, | |
| "loss": 0.0054, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.7683566433566433, | |
| "grad_norm": 7.84702205657959, | |
| "learning_rate": 7.247703532276122e-06, | |
| "loss": 0.0683, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 13.125106811523438, | |
| "learning_rate": 7.2412440787930475e-06, | |
| "loss": 0.2679, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7701048951048951, | |
| "grad_norm": 9.96452808380127, | |
| "learning_rate": 7.23477994074386e-06, | |
| "loss": 0.2367, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.7709790209790209, | |
| "grad_norm": 7.259433269500732, | |
| "learning_rate": 7.228311131639667e-06, | |
| "loss": 0.033, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.7718531468531469, | |
| "grad_norm": 1.9379146099090576, | |
| "learning_rate": 7.221837665001335e-06, | |
| "loss": 0.0118, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.7727272727272727, | |
| "grad_norm": 2.756248950958252, | |
| "learning_rate": 7.215359554359465e-06, | |
| "loss": 0.0141, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.7736013986013986, | |
| "grad_norm": 5.479997158050537, | |
| "learning_rate": 7.208876813254366e-06, | |
| "loss": 0.0723, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.7744755244755245, | |
| "grad_norm": 12.799932479858398, | |
| "learning_rate": 7.202389455236029e-06, | |
| "loss": 0.2965, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.7753496503496503, | |
| "grad_norm": 12.132229804992676, | |
| "learning_rate": 7.195897493864088e-06, | |
| "loss": 0.176, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.7762237762237763, | |
| "grad_norm": 14.60354232788086, | |
| "learning_rate": 7.189400942707804e-06, | |
| "loss": 0.0495, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.7770979020979021, | |
| "grad_norm": 3.0225894451141357, | |
| "learning_rate": 7.182899815346029e-06, | |
| "loss": 0.0106, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.777972027972028, | |
| "grad_norm": 3.3088302612304688, | |
| "learning_rate": 7.176394125367182e-06, | |
| "loss": 0.0229, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7788461538461539, | |
| "grad_norm": 3.0035033226013184, | |
| "learning_rate": 7.169883886369216e-06, | |
| "loss": 0.0169, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.7797202797202797, | |
| "grad_norm": 6.58284330368042, | |
| "learning_rate": 7.163369111959594e-06, | |
| "loss": 0.1353, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.7805944055944056, | |
| "grad_norm": 16.387571334838867, | |
| "learning_rate": 7.1568498157552576e-06, | |
| "loss": 0.2771, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.7814685314685315, | |
| "grad_norm": 9.8047513961792, | |
| "learning_rate": 7.1503260113826035e-06, | |
| "loss": 0.032, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.7823426573426573, | |
| "grad_norm": 2.903493642807007, | |
| "learning_rate": 7.143797712477445e-06, | |
| "loss": 0.0139, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7832167832167832, | |
| "grad_norm": 2.35031795501709, | |
| "learning_rate": 7.137264932684993e-06, | |
| "loss": 0.0102, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.7840909090909091, | |
| "grad_norm": 2.0192954540252686, | |
| "learning_rate": 7.1307276856598265e-06, | |
| "loss": 0.012, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.784965034965035, | |
| "grad_norm": 8.136798858642578, | |
| "learning_rate": 7.124185985065856e-06, | |
| "loss": 0.1588, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.7858391608391608, | |
| "grad_norm": 11.011357307434082, | |
| "learning_rate": 7.117639844576307e-06, | |
| "loss": 0.2787, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.7867132867132867, | |
| "grad_norm": 12.940624237060547, | |
| "learning_rate": 7.111089277873681e-06, | |
| "loss": 0.0926, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7875874125874126, | |
| "grad_norm": 2.3509361743927, | |
| "learning_rate": 7.104534298649733e-06, | |
| "loss": 0.013, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.7884615384615384, | |
| "grad_norm": 4.245987415313721, | |
| "learning_rate": 7.097974920605435e-06, | |
| "loss": 0.0124, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.7893356643356644, | |
| "grad_norm": 4.734135627746582, | |
| "learning_rate": 7.091411157450965e-06, | |
| "loss": 0.0246, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.7902097902097902, | |
| "grad_norm": 6.089751243591309, | |
| "learning_rate": 7.084843022905656e-06, | |
| "loss": 0.0587, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.791083916083916, | |
| "grad_norm": 10.954769134521484, | |
| "learning_rate": 7.078270530697982e-06, | |
| "loss": 0.2734, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.791958041958042, | |
| "grad_norm": 15.439305305480957, | |
| "learning_rate": 7.071693694565526e-06, | |
| "loss": 0.118, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.7928321678321678, | |
| "grad_norm": 6.708484172821045, | |
| "learning_rate": 7.06511252825495e-06, | |
| "loss": 0.0327, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.7937062937062938, | |
| "grad_norm": 3.336785078048706, | |
| "learning_rate": 7.0585270455219654e-06, | |
| "loss": 0.0217, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.7945804195804196, | |
| "grad_norm": 3.638423204421997, | |
| "learning_rate": 7.051937260131306e-06, | |
| "loss": 0.0153, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.7954545454545454, | |
| "grad_norm": 3.0343968868255615, | |
| "learning_rate": 7.045343185856701e-06, | |
| "loss": 0.0274, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7963286713286714, | |
| "grad_norm": 11.519174575805664, | |
| "learning_rate": 7.03874483648084e-06, | |
| "loss": 0.2755, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.7972027972027972, | |
| "grad_norm": 8.29574966430664, | |
| "learning_rate": 7.0321422257953505e-06, | |
| "loss": 0.1356, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.7980769230769231, | |
| "grad_norm": 7.4113969802856445, | |
| "learning_rate": 7.025535367600771e-06, | |
| "loss": 0.0497, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.798951048951049, | |
| "grad_norm": 3.685851573944092, | |
| "learning_rate": 7.018924275706511e-06, | |
| "loss": 0.0247, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.7998251748251748, | |
| "grad_norm": 4.4680070877075195, | |
| "learning_rate": 7.012308963930831e-06, | |
| "loss": 0.0146, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.8006993006993007, | |
| "grad_norm": 2.5341711044311523, | |
| "learning_rate": 7.005689446100816e-06, | |
| "loss": 0.0082, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.8015734265734266, | |
| "grad_norm": 14.937386512756348, | |
| "learning_rate": 6.999065736052337e-06, | |
| "loss": 0.1741, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.8024475524475524, | |
| "grad_norm": 11.307042121887207, | |
| "learning_rate": 6.992437847630031e-06, | |
| "loss": 0.2681, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.8033216783216783, | |
| "grad_norm": 8.31412124633789, | |
| "learning_rate": 6.9858057946872645e-06, | |
| "loss": 0.0606, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.8041958041958042, | |
| "grad_norm": 3.257115364074707, | |
| "learning_rate": 6.979169591086115e-06, | |
| "loss": 0.0122, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8050699300699301, | |
| "grad_norm": 1.5598549842834473, | |
| "learning_rate": 6.972529250697329e-06, | |
| "loss": 0.0041, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.8059440559440559, | |
| "grad_norm": 3.2592859268188477, | |
| "learning_rate": 6.965884787400301e-06, | |
| "loss": 0.0178, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.8068181818181818, | |
| "grad_norm": 8.014803886413574, | |
| "learning_rate": 6.95923621508305e-06, | |
| "loss": 0.0927, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.8076923076923077, | |
| "grad_norm": 12.724344253540039, | |
| "learning_rate": 6.952583547642171e-06, | |
| "loss": 0.298, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.8085664335664335, | |
| "grad_norm": 11.19556713104248, | |
| "learning_rate": 6.945926798982829e-06, | |
| "loss": 0.1578, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.8094405594405595, | |
| "grad_norm": 2.427116632461548, | |
| "learning_rate": 6.939265983018717e-06, | |
| "loss": 0.0176, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.8103146853146853, | |
| "grad_norm": 2.2512614727020264, | |
| "learning_rate": 6.932601113672025e-06, | |
| "loss": 0.0132, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.8111888111888111, | |
| "grad_norm": 1.7966564893722534, | |
| "learning_rate": 6.92593220487342e-06, | |
| "loss": 0.007, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.8120629370629371, | |
| "grad_norm": 10.684746742248535, | |
| "learning_rate": 6.919259270562009e-06, | |
| "loss": 0.2328, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.8129370629370629, | |
| "grad_norm": 15.68927001953125, | |
| "learning_rate": 6.912582324685315e-06, | |
| "loss": 0.2676, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8138111888111889, | |
| "grad_norm": 12.867997169494629, | |
| "learning_rate": 6.905901381199245e-06, | |
| "loss": 0.1599, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.8146853146853147, | |
| "grad_norm": 4.499429225921631, | |
| "learning_rate": 6.899216454068063e-06, | |
| "loss": 0.0247, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.8155594405594405, | |
| "grad_norm": 5.0020294189453125, | |
| "learning_rate": 6.892527557264358e-06, | |
| "loss": 0.0114, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.8164335664335665, | |
| "grad_norm": 3.3071727752685547, | |
| "learning_rate": 6.885834704769017e-06, | |
| "loss": 0.0092, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.8173076923076923, | |
| "grad_norm": 4.7062482833862305, | |
| "learning_rate": 6.879137910571191e-06, | |
| "loss": 0.0461, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 18.184219360351562, | |
| "learning_rate": 6.872437188668279e-06, | |
| "loss": 0.2962, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.8190559440559441, | |
| "grad_norm": 14.613560676574707, | |
| "learning_rate": 6.865732553065879e-06, | |
| "loss": 0.2449, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.8199300699300699, | |
| "grad_norm": 5.844992160797119, | |
| "learning_rate": 6.859024017777779e-06, | |
| "loss": 0.0242, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.8208041958041958, | |
| "grad_norm": 4.663540840148926, | |
| "learning_rate": 6.852311596825908e-06, | |
| "loss": 0.023, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.8216783216783217, | |
| "grad_norm": 5.994654655456543, | |
| "learning_rate": 6.845595304240327e-06, | |
| "loss": 0.0252, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8225524475524476, | |
| "grad_norm": 4.661762714385986, | |
| "learning_rate": 6.838875154059181e-06, | |
| "loss": 0.0587, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.8234265734265734, | |
| "grad_norm": 10.339978218078613, | |
| "learning_rate": 6.832151160328681e-06, | |
| "loss": 0.2695, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.8243006993006993, | |
| "grad_norm": 13.686936378479004, | |
| "learning_rate": 6.825423337103074e-06, | |
| "loss": 0.1875, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.8251748251748252, | |
| "grad_norm": 7.988853454589844, | |
| "learning_rate": 6.818691698444608e-06, | |
| "loss": 0.0446, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.826048951048951, | |
| "grad_norm": 4.286409378051758, | |
| "learning_rate": 6.811956258423508e-06, | |
| "loss": 0.0177, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.8269230769230769, | |
| "grad_norm": 1.5553338527679443, | |
| "learning_rate": 6.805217031117941e-06, | |
| "loss": 0.0102, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.8277972027972028, | |
| "grad_norm": 2.7665109634399414, | |
| "learning_rate": 6.798474030613995e-06, | |
| "loss": 0.0147, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.8286713286713286, | |
| "grad_norm": 6.374751567840576, | |
| "learning_rate": 6.791727271005642e-06, | |
| "loss": 0.1334, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.8295454545454546, | |
| "grad_norm": 7.61808443069458, | |
| "learning_rate": 6.784976766394711e-06, | |
| "loss": 0.2849, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.8304195804195804, | |
| "grad_norm": 12.682550430297852, | |
| "learning_rate": 6.7782225308908575e-06, | |
| "loss": 0.1053, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8312937062937062, | |
| "grad_norm": 3.896414041519165, | |
| "learning_rate": 6.7714645786115395e-06, | |
| "loss": 0.0175, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.8321678321678322, | |
| "grad_norm": 3.617689609527588, | |
| "learning_rate": 6.764702923681977e-06, | |
| "loss": 0.0111, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.833041958041958, | |
| "grad_norm": 8.42477798461914, | |
| "learning_rate": 6.757937580235138e-06, | |
| "loss": 0.0205, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.833916083916084, | |
| "grad_norm": 4.580524921417236, | |
| "learning_rate": 6.751168562411689e-06, | |
| "loss": 0.0646, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.8347902097902098, | |
| "grad_norm": 13.78461742401123, | |
| "learning_rate": 6.744395884359987e-06, | |
| "loss": 0.2605, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.8356643356643356, | |
| "grad_norm": 9.278969764709473, | |
| "learning_rate": 6.737619560236035e-06, | |
| "loss": 0.1574, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.8365384615384616, | |
| "grad_norm": 5.628566741943359, | |
| "learning_rate": 6.730839604203454e-06, | |
| "loss": 0.0173, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.8374125874125874, | |
| "grad_norm": 5.519917964935303, | |
| "learning_rate": 6.724056030433464e-06, | |
| "loss": 0.0204, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.8382867132867133, | |
| "grad_norm": 4.296505451202393, | |
| "learning_rate": 6.717268853104834e-06, | |
| "loss": 0.0123, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.8391608391608392, | |
| "grad_norm": 7.724421501159668, | |
| "learning_rate": 6.710478086403883e-06, | |
| "loss": 0.0861, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.840034965034965, | |
| "grad_norm": 15.097043991088867, | |
| "learning_rate": 6.703683744524415e-06, | |
| "loss": 0.276, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.8409090909090909, | |
| "grad_norm": 14.16945743560791, | |
| "learning_rate": 6.696885841667718e-06, | |
| "loss": 0.1828, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.8417832167832168, | |
| "grad_norm": 7.923300743103027, | |
| "learning_rate": 6.690084392042514e-06, | |
| "loss": 0.0357, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.8426573426573427, | |
| "grad_norm": 3.7586417198181152, | |
| "learning_rate": 6.683279409864949e-06, | |
| "loss": 0.0236, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.8435314685314685, | |
| "grad_norm": 2.1967530250549316, | |
| "learning_rate": 6.676470909358545e-06, | |
| "loss": 0.0124, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.8444055944055944, | |
| "grad_norm": 3.6063077449798584, | |
| "learning_rate": 6.669658904754177e-06, | |
| "loss": 0.009, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.8452797202797203, | |
| "grad_norm": 14.22199821472168, | |
| "learning_rate": 6.662843410290052e-06, | |
| "loss": 0.1912, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 8.282665252685547, | |
| "learning_rate": 6.656024440211662e-06, | |
| "loss": 0.179, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.847027972027972, | |
| "grad_norm": 11.80463981628418, | |
| "learning_rate": 6.64920200877177e-06, | |
| "loss": 0.0618, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.8479020979020979, | |
| "grad_norm": 2.3243815898895264, | |
| "learning_rate": 6.642376130230373e-06, | |
| "loss": 0.0102, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8487762237762237, | |
| "grad_norm": 2.3746681213378906, | |
| "learning_rate": 6.63554681885467e-06, | |
| "loss": 0.0109, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.8496503496503497, | |
| "grad_norm": 6.492579936981201, | |
| "learning_rate": 6.628714088919037e-06, | |
| "loss": 0.0252, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.8505244755244755, | |
| "grad_norm": 14.998275756835938, | |
| "learning_rate": 6.621877954704996e-06, | |
| "loss": 0.2528, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.8513986013986014, | |
| "grad_norm": 11.99267578125, | |
| "learning_rate": 6.615038430501183e-06, | |
| "loss": 0.2712, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.8522727272727273, | |
| "grad_norm": 14.408089637756348, | |
| "learning_rate": 6.608195530603322e-06, | |
| "loss": 0.0526, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.8531468531468531, | |
| "grad_norm": 3.712310791015625, | |
| "learning_rate": 6.601349269314188e-06, | |
| "loss": 0.0264, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.8540209790209791, | |
| "grad_norm": 6.570241451263428, | |
| "learning_rate": 6.59449966094359e-06, | |
| "loss": 0.0263, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.8548951048951049, | |
| "grad_norm": 2.1878886222839355, | |
| "learning_rate": 6.5876467198083235e-06, | |
| "loss": 0.0119, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.8557692307692307, | |
| "grad_norm": 5.433715343475342, | |
| "learning_rate": 6.5807904602321585e-06, | |
| "loss": 0.0848, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.8566433566433567, | |
| "grad_norm": 8.28032112121582, | |
| "learning_rate": 6.573930896545792e-06, | |
| "loss": 0.2577, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8575174825174825, | |
| "grad_norm": 9.188396453857422, | |
| "learning_rate": 6.567068043086836e-06, | |
| "loss": 0.1921, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.8583916083916084, | |
| "grad_norm": 4.982787609100342, | |
| "learning_rate": 6.560201914199774e-06, | |
| "loss": 0.0217, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.8592657342657343, | |
| "grad_norm": 4.867199897766113, | |
| "learning_rate": 6.553332524235937e-06, | |
| "loss": 0.0197, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.8601398601398601, | |
| "grad_norm": 2.035226583480835, | |
| "learning_rate": 6.5464598875534714e-06, | |
| "loss": 0.0086, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.861013986013986, | |
| "grad_norm": 4.880307197570801, | |
| "learning_rate": 6.5395840185173096e-06, | |
| "loss": 0.0686, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.8618881118881119, | |
| "grad_norm": 8.762625694274902, | |
| "learning_rate": 6.532704931499142e-06, | |
| "loss": 0.2805, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.8627622377622378, | |
| "grad_norm": 8.857834815979004, | |
| "learning_rate": 6.525822640877383e-06, | |
| "loss": 0.1359, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.8636363636363636, | |
| "grad_norm": 4.882622241973877, | |
| "learning_rate": 6.518937161037144e-06, | |
| "loss": 0.0245, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.8645104895104895, | |
| "grad_norm": 2.6919195652008057, | |
| "learning_rate": 6.512048506370201e-06, | |
| "loss": 0.0096, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.8653846153846154, | |
| "grad_norm": 2.0163915157318115, | |
| "learning_rate": 6.5051566912749695e-06, | |
| "loss": 0.0141, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.8662587412587412, | |
| "grad_norm": 2.6442372798919678, | |
| "learning_rate": 6.4982617301564665e-06, | |
| "loss": 0.0122, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.8671328671328671, | |
| "grad_norm": 11.013758659362793, | |
| "learning_rate": 6.491363637426289e-06, | |
| "loss": 0.3032, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.868006993006993, | |
| "grad_norm": 15.482316970825195, | |
| "learning_rate": 6.484462427502572e-06, | |
| "loss": 0.2905, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.8688811188811189, | |
| "grad_norm": 6.231189727783203, | |
| "learning_rate": 6.4775581148099786e-06, | |
| "loss": 0.0461, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.8697552447552448, | |
| "grad_norm": 3.635223150253296, | |
| "learning_rate": 6.470650713779644e-06, | |
| "loss": 0.0183, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.8706293706293706, | |
| "grad_norm": 2.8513622283935547, | |
| "learning_rate": 6.463740238849165e-06, | |
| "loss": 0.0126, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.8715034965034965, | |
| "grad_norm": 4.475131988525391, | |
| "learning_rate": 6.4568267044625664e-06, | |
| "loss": 0.015, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.8723776223776224, | |
| "grad_norm": 6.545045375823975, | |
| "learning_rate": 6.44991012507026e-06, | |
| "loss": 0.16, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.8732517482517482, | |
| "grad_norm": 9.549799919128418, | |
| "learning_rate": 6.44299051512903e-06, | |
| "loss": 0.2999, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.8741258741258742, | |
| "grad_norm": 12.031996726989746, | |
| "learning_rate": 6.436067889101985e-06, | |
| "loss": 0.1842, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "grad_norm": 3.0472235679626465, | |
| "learning_rate": 6.4291422614585505e-06, | |
| "loss": 0.0149, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.8758741258741258, | |
| "grad_norm": 8.994250297546387, | |
| "learning_rate": 6.422213646674416e-06, | |
| "loss": 0.0109, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.8767482517482518, | |
| "grad_norm": 5.993330955505371, | |
| "learning_rate": 6.415282059231518e-06, | |
| "loss": 0.025, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.8776223776223776, | |
| "grad_norm": 8.873002052307129, | |
| "learning_rate": 6.408347513618005e-06, | |
| "loss": 0.1725, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.8784965034965035, | |
| "grad_norm": 9.883171081542969, | |
| "learning_rate": 6.4014100243282144e-06, | |
| "loss": 0.2505, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.8793706293706294, | |
| "grad_norm": 9.27820873260498, | |
| "learning_rate": 6.394469605862625e-06, | |
| "loss": 0.1078, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.8802447552447552, | |
| "grad_norm": 5.319599151611328, | |
| "learning_rate": 6.38752627272785e-06, | |
| "loss": 0.0241, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.8811188811188811, | |
| "grad_norm": 1.2024259567260742, | |
| "learning_rate": 6.380580039436586e-06, | |
| "loss": 0.0045, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.881993006993007, | |
| "grad_norm": 2.553077220916748, | |
| "learning_rate": 6.373630920507598e-06, | |
| "loss": 0.0168, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.8828671328671329, | |
| "grad_norm": 4.856099605560303, | |
| "learning_rate": 6.366678930465676e-06, | |
| "loss": 0.0882, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.8837412587412588, | |
| "grad_norm": 9.911934852600098, | |
| "learning_rate": 6.3597240838416175e-06, | |
| "loss": 0.2497, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.8846153846153846, | |
| "grad_norm": 8.151529312133789, | |
| "learning_rate": 6.352766395172186e-06, | |
| "loss": 0.1824, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.8854895104895105, | |
| "grad_norm": 6.0536627769470215, | |
| "learning_rate": 6.345805879000087e-06, | |
| "loss": 0.0298, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.8863636363636364, | |
| "grad_norm": 4.24729061126709, | |
| "learning_rate": 6.338842549873937e-06, | |
| "loss": 0.013, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.8872377622377622, | |
| "grad_norm": 2.9513211250305176, | |
| "learning_rate": 6.3318764223482285e-06, | |
| "loss": 0.011, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.8881118881118881, | |
| "grad_norm": 6.47172212600708, | |
| "learning_rate": 6.32490751098331e-06, | |
| "loss": 0.0569, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.888986013986014, | |
| "grad_norm": 13.089883804321289, | |
| "learning_rate": 6.3179358303453386e-06, | |
| "loss": 0.2815, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.8898601398601399, | |
| "grad_norm": 9.718770980834961, | |
| "learning_rate": 6.31096139500627e-06, | |
| "loss": 0.2019, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.8907342657342657, | |
| "grad_norm": 10.260836601257324, | |
| "learning_rate": 6.303984219543811e-06, | |
| "loss": 0.0329, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.8916083916083916, | |
| "grad_norm": 4.303269863128662, | |
| "learning_rate": 6.297004318541396e-06, | |
| "loss": 0.0105, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8924825174825175, | |
| "grad_norm": 6.483615875244141, | |
| "learning_rate": 6.290021706588161e-06, | |
| "loss": 0.0335, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.8933566433566433, | |
| "grad_norm": 3.1851353645324707, | |
| "learning_rate": 6.283036398278903e-06, | |
| "loss": 0.0149, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.8942307692307693, | |
| "grad_norm": 10.102582931518555, | |
| "learning_rate": 6.2760484082140604e-06, | |
| "loss": 0.2066, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.8951048951048951, | |
| "grad_norm": 13.186929702758789, | |
| "learning_rate": 6.269057750999668e-06, | |
| "loss": 0.3107, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.8959790209790209, | |
| "grad_norm": 10.629234313964844, | |
| "learning_rate": 6.262064441247346e-06, | |
| "loss": 0.1025, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.8968531468531469, | |
| "grad_norm": 3.5307230949401855, | |
| "learning_rate": 6.255068493574252e-06, | |
| "loss": 0.0116, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.8977272727272727, | |
| "grad_norm": 2.278611660003662, | |
| "learning_rate": 6.248069922603057e-06, | |
| "loss": 0.0074, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.8986013986013986, | |
| "grad_norm": 2.166477918624878, | |
| "learning_rate": 6.24106874296192e-06, | |
| "loss": 0.0063, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.8994755244755245, | |
| "grad_norm": 7.1525444984436035, | |
| "learning_rate": 6.23406496928445e-06, | |
| "loss": 0.161, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.9003496503496503, | |
| "grad_norm": 8.597286224365234, | |
| "learning_rate": 6.227058616209674e-06, | |
| "loss": 0.2873, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9012237762237763, | |
| "grad_norm": 9.426263809204102, | |
| "learning_rate": 6.220049698382018e-06, | |
| "loss": 0.1762, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.9020979020979021, | |
| "grad_norm": 6.403757095336914, | |
| "learning_rate": 6.2130382304512615e-06, | |
| "loss": 0.0361, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.902972027972028, | |
| "grad_norm": 3.5435636043548584, | |
| "learning_rate": 6.20602422707252e-06, | |
| "loss": 0.0239, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.9038461538461539, | |
| "grad_norm": 3.3914921283721924, | |
| "learning_rate": 6.1990077029062055e-06, | |
| "loss": 0.0165, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.9047202797202797, | |
| "grad_norm": 3.278165340423584, | |
| "learning_rate": 6.1919886726179975e-06, | |
| "loss": 0.0111, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.9055944055944056, | |
| "grad_norm": 16.941152572631836, | |
| "learning_rate": 6.184967150878819e-06, | |
| "loss": 0.283, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.9064685314685315, | |
| "grad_norm": 9.810595512390137, | |
| "learning_rate": 6.177943152364793e-06, | |
| "loss": 0.1887, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.9073426573426573, | |
| "grad_norm": 12.529802322387695, | |
| "learning_rate": 6.1709166917572264e-06, | |
| "loss": 0.0531, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.9082167832167832, | |
| "grad_norm": 5.325544834136963, | |
| "learning_rate": 6.163887783742566e-06, | |
| "loss": 0.0291, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 2.7411320209503174, | |
| "learning_rate": 6.156856443012382e-06, | |
| "loss": 0.0164, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.909965034965035, | |
| "grad_norm": 5.648186683654785, | |
| "learning_rate": 6.14982268426332e-06, | |
| "loss": 0.071, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.9108391608391608, | |
| "grad_norm": 14.734941482543945, | |
| "learning_rate": 6.142786522197088e-06, | |
| "loss": 0.2795, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.9117132867132867, | |
| "grad_norm": 10.999231338500977, | |
| "learning_rate": 6.135747971520412e-06, | |
| "loss": 0.2838, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.9125874125874126, | |
| "grad_norm": 10.483818054199219, | |
| "learning_rate": 6.128707046945011e-06, | |
| "loss": 0.0701, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.9134615384615384, | |
| "grad_norm": 3.004197835922241, | |
| "learning_rate": 6.121663763187569e-06, | |
| "loss": 0.0195, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.9143356643356644, | |
| "grad_norm": 3.49302339553833, | |
| "learning_rate": 6.114618134969698e-06, | |
| "loss": 0.0151, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.9152097902097902, | |
| "grad_norm": 2.7401669025421143, | |
| "learning_rate": 6.107570177017915e-06, | |
| "loss": 0.0107, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.916083916083916, | |
| "grad_norm": 6.902370452880859, | |
| "learning_rate": 6.100519904063597e-06, | |
| "loss": 0.1256, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.916958041958042, | |
| "grad_norm": 11.483890533447266, | |
| "learning_rate": 6.093467330842972e-06, | |
| "loss": 0.2667, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.9178321678321678, | |
| "grad_norm": 9.019611358642578, | |
| "learning_rate": 6.086412472097066e-06, | |
| "loss": 0.1116, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9187062937062938, | |
| "grad_norm": 2.321434497833252, | |
| "learning_rate": 6.079355342571685e-06, | |
| "loss": 0.0202, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.9195804195804196, | |
| "grad_norm": 2.9524950981140137, | |
| "learning_rate": 6.072295957017385e-06, | |
| "loss": 0.0136, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.9204545454545454, | |
| "grad_norm": 1.275251865386963, | |
| "learning_rate": 6.0652343301894345e-06, | |
| "loss": 0.0081, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.9213286713286714, | |
| "grad_norm": 7.594128608703613, | |
| "learning_rate": 6.0581704768477825e-06, | |
| "loss": 0.1663, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.9222027972027972, | |
| "grad_norm": 10.844099044799805, | |
| "learning_rate": 6.05110441175704e-06, | |
| "loss": 0.2654, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 6.2613043785095215, | |
| "learning_rate": 6.044036149686436e-06, | |
| "loss": 0.1344, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.923951048951049, | |
| "grad_norm": 3.1522412300109863, | |
| "learning_rate": 6.036965705409793e-06, | |
| "loss": 0.0205, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.9248251748251748, | |
| "grad_norm": 5.819720268249512, | |
| "learning_rate": 6.029893093705492e-06, | |
| "loss": 0.0203, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.9256993006993007, | |
| "grad_norm": 3.8648738861083984, | |
| "learning_rate": 6.022818329356449e-06, | |
| "loss": 0.0149, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.9265734265734266, | |
| "grad_norm": 11.772501945495605, | |
| "learning_rate": 6.015741427150076e-06, | |
| "loss": 0.0641, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.9274475524475524, | |
| "grad_norm": 10.90075969696045, | |
| "learning_rate": 6.008662401878251e-06, | |
| "loss": 0.2749, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.9283216783216783, | |
| "grad_norm": 11.61825942993164, | |
| "learning_rate": 6.0015812683372975e-06, | |
| "loss": 0.1669, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.9291958041958042, | |
| "grad_norm": 8.49216079711914, | |
| "learning_rate": 5.99449804132794e-06, | |
| "loss": 0.0468, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.9300699300699301, | |
| "grad_norm": 2.8473808765411377, | |
| "learning_rate": 5.987412735655277e-06, | |
| "loss": 0.0188, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.9309440559440559, | |
| "grad_norm": 2.774134397506714, | |
| "learning_rate": 5.980325366128755e-06, | |
| "loss": 0.0135, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.9318181818181818, | |
| "grad_norm": 2.1524031162261963, | |
| "learning_rate": 5.973235947562137e-06, | |
| "loss": 0.0121, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.9326923076923077, | |
| "grad_norm": 9.562312126159668, | |
| "learning_rate": 5.966144494773462e-06, | |
| "loss": 0.2737, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.9335664335664335, | |
| "grad_norm": 10.389531135559082, | |
| "learning_rate": 5.959051022585025e-06, | |
| "loss": 0.254, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.9344405594405595, | |
| "grad_norm": 7.379521369934082, | |
| "learning_rate": 5.951955545823342e-06, | |
| "loss": 0.0484, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.9353146853146853, | |
| "grad_norm": 3.7240424156188965, | |
| "learning_rate": 5.944858079319118e-06, | |
| "loss": 0.0091, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9361888111888111, | |
| "grad_norm": 3.325143337249756, | |
| "learning_rate": 5.937758637907216e-06, | |
| "loss": 0.0237, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.9370629370629371, | |
| "grad_norm": 2.829467535018921, | |
| "learning_rate": 5.9306572364266294e-06, | |
| "loss": 0.0099, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.9379370629370629, | |
| "grad_norm": 9.33228874206543, | |
| "learning_rate": 5.923553889720447e-06, | |
| "loss": 0.1837, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.9388111888111889, | |
| "grad_norm": 8.610675811767578, | |
| "learning_rate": 5.9164486126358214e-06, | |
| "loss": 0.2616, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.9396853146853147, | |
| "grad_norm": 6.8186540603637695, | |
| "learning_rate": 5.909341420023942e-06, | |
| "loss": 0.0629, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.9405594405594405, | |
| "grad_norm": 5.0157623291015625, | |
| "learning_rate": 5.902232326740004e-06, | |
| "loss": 0.0231, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.9414335664335665, | |
| "grad_norm": 4.069489479064941, | |
| "learning_rate": 5.895121347643173e-06, | |
| "loss": 0.015, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.9423076923076923, | |
| "grad_norm": 4.21807336807251, | |
| "learning_rate": 5.888008497596553e-06, | |
| "loss": 0.0204, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.9431818181818182, | |
| "grad_norm": 3.9311933517456055, | |
| "learning_rate": 5.880893791467167e-06, | |
| "loss": 0.0614, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.9440559440559441, | |
| "grad_norm": 23.556373596191406, | |
| "learning_rate": 5.87377724412591e-06, | |
| "loss": 0.2655, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9449300699300699, | |
| "grad_norm": 6.629197597503662, | |
| "learning_rate": 5.866658870447528e-06, | |
| "loss": 0.1451, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.9458041958041958, | |
| "grad_norm": 6.973886489868164, | |
| "learning_rate": 5.859538685310585e-06, | |
| "loss": 0.025, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.9466783216783217, | |
| "grad_norm": 3.6189990043640137, | |
| "learning_rate": 5.852416703597431e-06, | |
| "loss": 0.0109, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.9475524475524476, | |
| "grad_norm": 1.4186196327209473, | |
| "learning_rate": 5.84529294019417e-06, | |
| "loss": 0.0102, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.9484265734265734, | |
| "grad_norm": 3.404859781265259, | |
| "learning_rate": 5.8381674099906306e-06, | |
| "loss": 0.0187, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.9493006993006993, | |
| "grad_norm": 11.563116073608398, | |
| "learning_rate": 5.831040127880337e-06, | |
| "loss": 0.2783, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.9501748251748252, | |
| "grad_norm": 13.677116394042969, | |
| "learning_rate": 5.823911108760468e-06, | |
| "loss": 0.2176, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.951048951048951, | |
| "grad_norm": 5.8455891609191895, | |
| "learning_rate": 5.816780367531841e-06, | |
| "loss": 0.0235, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.9519230769230769, | |
| "grad_norm": 5.079223155975342, | |
| "learning_rate": 5.80964791909887e-06, | |
| "loss": 0.0182, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.9527972027972028, | |
| "grad_norm": 4.756433486938477, | |
| "learning_rate": 5.802513778369535e-06, | |
| "loss": 0.0143, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.9536713286713286, | |
| "grad_norm": 3.7484090328216553, | |
| "learning_rate": 5.795377960255356e-06, | |
| "loss": 0.0173, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.9545454545454546, | |
| "grad_norm": 11.774727821350098, | |
| "learning_rate": 5.788240479671359e-06, | |
| "loss": 0.248, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.9554195804195804, | |
| "grad_norm": 11.909862518310547, | |
| "learning_rate": 5.781101351536041e-06, | |
| "loss": 0.2752, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.9562937062937062, | |
| "grad_norm": 5.393920421600342, | |
| "learning_rate": 5.773960590771348e-06, | |
| "loss": 0.0603, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.9571678321678322, | |
| "grad_norm": 2.777489423751831, | |
| "learning_rate": 5.766818212302636e-06, | |
| "loss": 0.0082, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.958041958041958, | |
| "grad_norm": 3.53434157371521, | |
| "learning_rate": 5.759674231058643e-06, | |
| "loss": 0.0095, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.958916083916084, | |
| "grad_norm": 2.228837490081787, | |
| "learning_rate": 5.7525286619714545e-06, | |
| "loss": 0.0164, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.9597902097902098, | |
| "grad_norm": 6.298214435577393, | |
| "learning_rate": 5.745381519976477e-06, | |
| "loss": 0.1261, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.9606643356643356, | |
| "grad_norm": 12.420851707458496, | |
| "learning_rate": 5.738232820012407e-06, | |
| "loss": 0.2555, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 10.995224952697754, | |
| "learning_rate": 5.731082577021191e-06, | |
| "loss": 0.135, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9624125874125874, | |
| "grad_norm": 3.1969802379608154, | |
| "learning_rate": 5.723930805948008e-06, | |
| "loss": 0.0073, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.9632867132867133, | |
| "grad_norm": 3.0112719535827637, | |
| "learning_rate": 5.716777521741223e-06, | |
| "loss": 0.016, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.9641608391608392, | |
| "grad_norm": 2.3236653804779053, | |
| "learning_rate": 5.7096227393523716e-06, | |
| "loss": 0.0132, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.965034965034965, | |
| "grad_norm": 2.309762954711914, | |
| "learning_rate": 5.702466473736117e-06, | |
| "loss": 0.0131, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.9659090909090909, | |
| "grad_norm": 14.102072715759277, | |
| "learning_rate": 5.695308739850222e-06, | |
| "loss": 0.2529, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.9667832167832168, | |
| "grad_norm": 12.475796699523926, | |
| "learning_rate": 5.68814955265552e-06, | |
| "loss": 0.1927, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.9676573426573427, | |
| "grad_norm": 3.961515188217163, | |
| "learning_rate": 5.680988927115879e-06, | |
| "loss": 0.0141, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.9685314685314685, | |
| "grad_norm": 1.8610446453094482, | |
| "learning_rate": 5.673826878198181e-06, | |
| "loss": 0.0152, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.9694055944055944, | |
| "grad_norm": 3.5631394386291504, | |
| "learning_rate": 5.6666634208722705e-06, | |
| "loss": 0.0119, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.9702797202797203, | |
| "grad_norm": 6.340417861938477, | |
| "learning_rate": 5.65949857011095e-06, | |
| "loss": 0.1092, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.9711538461538461, | |
| "grad_norm": 9.307807922363281, | |
| "learning_rate": 5.652332340889923e-06, | |
| "loss": 0.2618, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.972027972027972, | |
| "grad_norm": 8.373964309692383, | |
| "learning_rate": 5.645164748187781e-06, | |
| "loss": 0.2399, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.9729020979020979, | |
| "grad_norm": 4.447226524353027, | |
| "learning_rate": 5.637995806985961e-06, | |
| "loss": 0.0283, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.9737762237762237, | |
| "grad_norm": 3.0013158321380615, | |
| "learning_rate": 5.630825532268725e-06, | |
| "loss": 0.0159, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.9746503496503497, | |
| "grad_norm": 4.143692493438721, | |
| "learning_rate": 5.623653939023111e-06, | |
| "loss": 0.0176, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.9755244755244755, | |
| "grad_norm": 3.907721519470215, | |
| "learning_rate": 5.6164810422389285e-06, | |
| "loss": 0.0425, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.9763986013986014, | |
| "grad_norm": 12.202678680419922, | |
| "learning_rate": 5.609306856908697e-06, | |
| "loss": 0.2448, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.9772727272727273, | |
| "grad_norm": 7.564881324768066, | |
| "learning_rate": 5.602131398027637e-06, | |
| "loss": 0.2391, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.9781468531468531, | |
| "grad_norm": 6.032947540283203, | |
| "learning_rate": 5.594954680593631e-06, | |
| "loss": 0.0428, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.9790209790209791, | |
| "grad_norm": 2.3707187175750732, | |
| "learning_rate": 5.587776719607187e-06, | |
| "loss": 0.0156, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9798951048951049, | |
| "grad_norm": 1.5278058052062988, | |
| "learning_rate": 5.580597530071419e-06, | |
| "loss": 0.0085, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.9807692307692307, | |
| "grad_norm": 2.7148990631103516, | |
| "learning_rate": 5.573417126992004e-06, | |
| "loss": 0.009, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.9816433566433567, | |
| "grad_norm": 13.499276161193848, | |
| "learning_rate": 5.566235525377155e-06, | |
| "loss": 0.2236, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.9825174825174825, | |
| "grad_norm": 11.465832710266113, | |
| "learning_rate": 5.559052740237595e-06, | |
| "loss": 0.2582, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.9833916083916084, | |
| "grad_norm": 12.596088409423828, | |
| "learning_rate": 5.551868786586517e-06, | |
| "loss": 0.0552, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.9842657342657343, | |
| "grad_norm": 2.256972551345825, | |
| "learning_rate": 5.544683679439556e-06, | |
| "loss": 0.0142, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.9851398601398601, | |
| "grad_norm": 1.5715539455413818, | |
| "learning_rate": 5.537497433814762e-06, | |
| "loss": 0.007, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.986013986013986, | |
| "grad_norm": 2.449791669845581, | |
| "learning_rate": 5.530310064732559e-06, | |
| "loss": 0.0086, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.9868881118881119, | |
| "grad_norm": 5.747419834136963, | |
| "learning_rate": 5.523121587215724e-06, | |
| "loss": 0.1231, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.9877622377622378, | |
| "grad_norm": 12.612768173217773, | |
| "learning_rate": 5.515932016289347e-06, | |
| "loss": 0.2859, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9886363636363636, | |
| "grad_norm": 18.74411392211914, | |
| "learning_rate": 5.508741366980809e-06, | |
| "loss": 0.2543, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.9895104895104895, | |
| "grad_norm": 4.73159646987915, | |
| "learning_rate": 5.501549654319734e-06, | |
| "loss": 0.0349, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.9903846153846154, | |
| "grad_norm": 4.9475507736206055, | |
| "learning_rate": 5.494356893337985e-06, | |
| "loss": 0.01, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.9912587412587412, | |
| "grad_norm": 7.61251974105835, | |
| "learning_rate": 5.4871630990696005e-06, | |
| "loss": 0.023, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.9921328671328671, | |
| "grad_norm": 6.168530464172363, | |
| "learning_rate": 5.479968286550786e-06, | |
| "loss": 0.0294, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.993006993006993, | |
| "grad_norm": 8.08245849609375, | |
| "learning_rate": 5.472772470819877e-06, | |
| "loss": 0.2709, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.9938811188811189, | |
| "grad_norm": 11.990961074829102, | |
| "learning_rate": 5.465575666917302e-06, | |
| "loss": 0.1948, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.9947552447552448, | |
| "grad_norm": 4.770583152770996, | |
| "learning_rate": 5.4583778898855576e-06, | |
| "loss": 0.0193, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.9956293706293706, | |
| "grad_norm": 5.345957279205322, | |
| "learning_rate": 5.4511791547691694e-06, | |
| "loss": 0.0199, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.9965034965034965, | |
| "grad_norm": 5.5198869705200195, | |
| "learning_rate": 5.443979476614674e-06, | |
| "loss": 0.0123, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9973776223776224, | |
| "grad_norm": 8.734823226928711, | |
| "learning_rate": 5.4367788704705725e-06, | |
| "loss": 0.1664, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.9982517482517482, | |
| "grad_norm": 12.101849555969238, | |
| "learning_rate": 5.4295773513873085e-06, | |
| "loss": 0.2311, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.9991258741258742, | |
| "grad_norm": 7.892526149749756, | |
| "learning_rate": 5.422374934417228e-06, | |
| "loss": 0.0884, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.670325517654419, | |
| "learning_rate": 5.415171634614567e-06, | |
| "loss": 0.0283, | |
| "step": 1144 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 2288, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 155552547225600.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |