|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9983193277310924, |
|
"eval_steps": 500, |
|
"global_step": 594, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005042016806722689, |
|
"grad_norm": 9.45597365399993, |
|
"learning_rate": 0.0, |
|
"loss": 1.7242, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010084033613445379, |
|
"grad_norm": 9.218921810032594, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 1.9603, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015126050420168067, |
|
"grad_norm": 9.19364568473009, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 1.7815, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.020168067226890758, |
|
"grad_norm": 9.753359655679406, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.8671, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.025210084033613446, |
|
"grad_norm": 10.188684139684757, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 1.8868, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.030252100840336135, |
|
"grad_norm": 9.253535763532076, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 1.8821, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03529411764705882, |
|
"grad_norm": 9.452472463389428, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.8398, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.040336134453781515, |
|
"grad_norm": 8.338459992866273, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 1.7522, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0453781512605042, |
|
"grad_norm": 8.599040436901118, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 1.7879, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05042016806722689, |
|
"grad_norm": 9.204139051227466, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.8949, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05546218487394958, |
|
"grad_norm": 8.383986517840034, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.7568, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06050420168067227, |
|
"grad_norm": 6.14215523192106, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 1.6243, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06554621848739496, |
|
"grad_norm": 5.998914335428499, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.5973, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07058823529411765, |
|
"grad_norm": 5.047474738743573, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 1.3774, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07563025210084033, |
|
"grad_norm": 5.330740621399064, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 1.5953, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08067226890756303, |
|
"grad_norm": 3.3659526026887012, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.3746, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08571428571428572, |
|
"grad_norm": 3.639732034816691, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 1.4698, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0907563025210084, |
|
"grad_norm": 3.461514147091586, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 1.4229, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0957983193277311, |
|
"grad_norm": 3.765309579932919, |
|
"learning_rate": 3e-06, |
|
"loss": 1.3948, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10084033613445378, |
|
"grad_norm": 2.825230202760748, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 1.3286, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10588235294117647, |
|
"grad_norm": 2.387015147619193, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.2574, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11092436974789915, |
|
"grad_norm": 2.6592293064240176, |
|
"learning_rate": 3.5e-06, |
|
"loss": 1.2994, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11596638655462185, |
|
"grad_norm": 2.9338685422018163, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 1.271, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12100840336134454, |
|
"grad_norm": 2.8053283243940923, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 1.239, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12605042016806722, |
|
"grad_norm": 2.4764651014882673, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.2632, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13109243697478992, |
|
"grad_norm": 4.193230652323676, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.2494, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1361344537815126, |
|
"grad_norm": 2.166632601601999, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 1.1772, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1411764705882353, |
|
"grad_norm": 2.0456983888545133, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.3323, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.146218487394958, |
|
"grad_norm": 1.9041534025850353, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 1.123, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.15126050420168066, |
|
"grad_norm": 1.7473372136225975, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 1.1116, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15630252100840336, |
|
"grad_norm": 1.9237786068741898, |
|
"learning_rate": 5e-06, |
|
"loss": 1.2038, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.16134453781512606, |
|
"grad_norm": 1.9862371515679214, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 1.2171, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16638655462184873, |
|
"grad_norm": 1.5922593116941988, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 1.0193, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.17142857142857143, |
|
"grad_norm": 1.6830455258736572, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.0761, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 1.594143028453368, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 1.1126, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1815126050420168, |
|
"grad_norm": 1.9420003685481775, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 1.1203, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1865546218487395, |
|
"grad_norm": 1.5815112240806883, |
|
"learning_rate": 6e-06, |
|
"loss": 1.0293, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1915966386554622, |
|
"grad_norm": 1.4697006996217221, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.995, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.19663865546218487, |
|
"grad_norm": 1.5886739084366435, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 1.1051, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.20168067226890757, |
|
"grad_norm": 1.3717225438634324, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.0817, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.20672268907563024, |
|
"grad_norm": 1.4586233032739204, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.9949, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.21176470588235294, |
|
"grad_norm": 1.4404526895251804, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 1.0369, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.21680672268907564, |
|
"grad_norm": 1.5011071614715905, |
|
"learning_rate": 7e-06, |
|
"loss": 1.0126, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2218487394957983, |
|
"grad_norm": 1.446801500279163, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.9829, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.226890756302521, |
|
"grad_norm": 1.3157845464395648, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.9432, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2319327731092437, |
|
"grad_norm": 1.3291092123967403, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.9518, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.23697478991596638, |
|
"grad_norm": 1.5105509029003468, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 1.0235, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.24201680672268908, |
|
"grad_norm": 1.420355667391472, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.9567, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.24705882352941178, |
|
"grad_norm": 1.463732709856337, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.0417, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.25210084033613445, |
|
"grad_norm": 1.4275241446789713, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 1.0347, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2571428571428571, |
|
"grad_norm": 1.309592587931707, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.9524, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.26218487394957984, |
|
"grad_norm": 1.3344872488030621, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.0684, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2672268907563025, |
|
"grad_norm": 1.3533956797177575, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.9501, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2722689075630252, |
|
"grad_norm": 1.4422509166091777, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.9452, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2773109243697479, |
|
"grad_norm": 1.3534627088209181, |
|
"learning_rate": 9e-06, |
|
"loss": 0.9243, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2823529411764706, |
|
"grad_norm": 1.370929089587996, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.9577, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.28739495798319326, |
|
"grad_norm": 1.34141912977082, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.9216, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.292436974789916, |
|
"grad_norm": 1.437190020022949, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.986, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.29747899159663865, |
|
"grad_norm": 1.3190591357074484, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 1.0163, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3025210084033613, |
|
"grad_norm": 1.3230400720636633, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.9071, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.30756302521008405, |
|
"grad_norm": 1.570821042981294, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0532, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3126050420168067, |
|
"grad_norm": 1.3817712282096664, |
|
"learning_rate": 9.999913472135126e-06, |
|
"loss": 0.9497, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3176470588235294, |
|
"grad_norm": 1.3461235016869455, |
|
"learning_rate": 9.99965389153533e-06, |
|
"loss": 0.9656, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3226890756302521, |
|
"grad_norm": 1.2703045215015534, |
|
"learning_rate": 9.999221267184993e-06, |
|
"loss": 0.8563, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3277310924369748, |
|
"grad_norm": 1.4463044763025328, |
|
"learning_rate": 9.998615614057743e-06, |
|
"loss": 0.9743, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.33277310924369746, |
|
"grad_norm": 1.2126520135581191, |
|
"learning_rate": 9.997836953115927e-06, |
|
"loss": 0.8256, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3378151260504202, |
|
"grad_norm": 1.465456256707118, |
|
"learning_rate": 9.996885311309892e-06, |
|
"loss": 0.9112, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 1.3774012861831768, |
|
"learning_rate": 9.995760721577053e-06, |
|
"loss": 1.0031, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.34789915966386553, |
|
"grad_norm": 1.214727510886685, |
|
"learning_rate": 9.994463222840748e-06, |
|
"loss": 0.8777, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 1.3372556283226344, |
|
"learning_rate": 9.992992860008893e-06, |
|
"loss": 0.9503, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.35798319327731093, |
|
"grad_norm": 1.2629663699758409, |
|
"learning_rate": 9.991349683972435e-06, |
|
"loss": 0.9707, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3630252100840336, |
|
"grad_norm": 1.2961666438854509, |
|
"learning_rate": 9.989533751603578e-06, |
|
"loss": 0.8987, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3680672268907563, |
|
"grad_norm": 1.3451690514655665, |
|
"learning_rate": 9.987545125753818e-06, |
|
"loss": 0.9614, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.373109243697479, |
|
"grad_norm": 1.3824819884360038, |
|
"learning_rate": 9.985383875251783e-06, |
|
"loss": 0.9101, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.37815126050420167, |
|
"grad_norm": 1.290324816657544, |
|
"learning_rate": 9.983050074900824e-06, |
|
"loss": 0.8901, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3831932773109244, |
|
"grad_norm": 1.3785449206810632, |
|
"learning_rate": 9.980543805476447e-06, |
|
"loss": 0.9305, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.38823529411764707, |
|
"grad_norm": 1.2723741333137952, |
|
"learning_rate": 9.977865153723508e-06, |
|
"loss": 0.9145, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.39327731092436974, |
|
"grad_norm": 1.3277787150964286, |
|
"learning_rate": 9.975014212353212e-06, |
|
"loss": 0.9386, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3983193277310924, |
|
"grad_norm": 1.300378629259356, |
|
"learning_rate": 9.971991080039912e-06, |
|
"loss": 0.9072, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.40336134453781514, |
|
"grad_norm": 1.3180887220440103, |
|
"learning_rate": 9.968795861417676e-06, |
|
"loss": 0.8538, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4084033613445378, |
|
"grad_norm": 1.2852565908527667, |
|
"learning_rate": 9.965428667076687e-06, |
|
"loss": 0.8625, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4134453781512605, |
|
"grad_norm": 1.22082061679436, |
|
"learning_rate": 9.961889613559396e-06, |
|
"loss": 0.8002, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4184873949579832, |
|
"grad_norm": 1.3948047447367582, |
|
"learning_rate": 9.958178823356503e-06, |
|
"loss": 0.9563, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.4235294117647059, |
|
"grad_norm": 1.32125427246041, |
|
"learning_rate": 9.954296424902709e-06, |
|
"loss": 0.9009, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 1.2664915782700163, |
|
"learning_rate": 9.950242552572272e-06, |
|
"loss": 0.8489, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4336134453781513, |
|
"grad_norm": 1.273298827077617, |
|
"learning_rate": 9.946017346674362e-06, |
|
"loss": 0.847, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.43865546218487395, |
|
"grad_norm": 1.328680054216705, |
|
"learning_rate": 9.941620953448195e-06, |
|
"loss": 0.9382, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4436974789915966, |
|
"grad_norm": 1.263646905073375, |
|
"learning_rate": 9.937053525057977e-06, |
|
"loss": 0.8991, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.44873949579831934, |
|
"grad_norm": 1.209796673070386, |
|
"learning_rate": 9.932315219587641e-06, |
|
"loss": 0.8611, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.453781512605042, |
|
"grad_norm": 1.1317133515894529, |
|
"learning_rate": 9.927406201035368e-06, |
|
"loss": 0.8254, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4588235294117647, |
|
"grad_norm": 1.2581352252268798, |
|
"learning_rate": 9.922326639307918e-06, |
|
"loss": 0.8186, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.4638655462184874, |
|
"grad_norm": 1.1615726675287243, |
|
"learning_rate": 9.917076710214739e-06, |
|
"loss": 0.8217, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4689075630252101, |
|
"grad_norm": 1.3906544125113194, |
|
"learning_rate": 9.911656595461899e-06, |
|
"loss": 0.9606, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.47394957983193275, |
|
"grad_norm": 1.3491688269700184, |
|
"learning_rate": 9.906066482645774e-06, |
|
"loss": 0.8865, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4789915966386555, |
|
"grad_norm": 1.2884319333617182, |
|
"learning_rate": 9.900306565246579e-06, |
|
"loss": 0.8608, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.48403361344537815, |
|
"grad_norm": 1.332999472417029, |
|
"learning_rate": 9.894377042621654e-06, |
|
"loss": 0.8476, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4890756302521008, |
|
"grad_norm": 1.3206768360556793, |
|
"learning_rate": 9.888278119998573e-06, |
|
"loss": 0.898, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.49411764705882355, |
|
"grad_norm": 1.3732673184556148, |
|
"learning_rate": 9.882010008468038e-06, |
|
"loss": 0.9482, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.4991596638655462, |
|
"grad_norm": 1.4284063475101123, |
|
"learning_rate": 9.875572924976568e-06, |
|
"loss": 0.8932, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5042016806722689, |
|
"grad_norm": 1.249757410129038, |
|
"learning_rate": 9.868967092319003e-06, |
|
"loss": 0.9113, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5092436974789916, |
|
"grad_norm": 1.2033755235104269, |
|
"learning_rate": 9.86219273913078e-06, |
|
"loss": 0.8373, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5142857142857142, |
|
"grad_norm": 1.3285676372655046, |
|
"learning_rate": 9.855250099880026e-06, |
|
"loss": 0.82, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.519327731092437, |
|
"grad_norm": 1.280372963776325, |
|
"learning_rate": 9.848139414859441e-06, |
|
"loss": 0.9269, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5243697478991597, |
|
"grad_norm": 1.3597201294098022, |
|
"learning_rate": 9.840860930177984e-06, |
|
"loss": 0.8917, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 1.3044841757394627, |
|
"learning_rate": 9.833414897752346e-06, |
|
"loss": 0.8242, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.534453781512605, |
|
"grad_norm": 1.2237707733265701, |
|
"learning_rate": 9.825801575298248e-06, |
|
"loss": 0.8369, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5394957983193277, |
|
"grad_norm": 1.2984723776565605, |
|
"learning_rate": 9.818021226321502e-06, |
|
"loss": 0.8687, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5445378151260504, |
|
"grad_norm": 1.3966505679016854, |
|
"learning_rate": 9.8100741201089e-06, |
|
"loss": 0.8698, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5495798319327732, |
|
"grad_norm": 1.3695596995593027, |
|
"learning_rate": 9.801960531718898e-06, |
|
"loss": 0.9224, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5546218487394958, |
|
"grad_norm": 1.2219956732497297, |
|
"learning_rate": 9.793680741972084e-06, |
|
"loss": 0.7909, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5596638655462185, |
|
"grad_norm": 1.1958717679101365, |
|
"learning_rate": 9.785235037441473e-06, |
|
"loss": 0.8222, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5647058823529412, |
|
"grad_norm": 1.3284406137942217, |
|
"learning_rate": 9.77662371044258e-06, |
|
"loss": 0.9698, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5697478991596638, |
|
"grad_norm": 1.4005342916908725, |
|
"learning_rate": 9.767847059023292e-06, |
|
"loss": 0.8141, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5747899159663865, |
|
"grad_norm": 1.3280058867861344, |
|
"learning_rate": 9.75890538695358e-06, |
|
"loss": 0.8281, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5798319327731093, |
|
"grad_norm": 1.348332178712391, |
|
"learning_rate": 9.749799003714954e-06, |
|
"loss": 0.8174, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.584873949579832, |
|
"grad_norm": 1.345901958116435, |
|
"learning_rate": 9.74052822448978e-06, |
|
"loss": 0.8662, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5899159663865546, |
|
"grad_norm": 1.4938772005815362, |
|
"learning_rate": 9.731093370150349e-06, |
|
"loss": 0.9227, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5949579831932773, |
|
"grad_norm": 1.5782055001938107, |
|
"learning_rate": 9.721494767247779e-06, |
|
"loss": 0.9292, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.2813061736782214, |
|
"learning_rate": 9.71173274800072e-06, |
|
"loss": 0.808, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6050420168067226, |
|
"grad_norm": 1.3387521092808896, |
|
"learning_rate": 9.70180765028384e-06, |
|
"loss": 0.8052, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6100840336134454, |
|
"grad_norm": 1.1971567112258479, |
|
"learning_rate": 9.691719817616148e-06, |
|
"loss": 0.8321, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6151260504201681, |
|
"grad_norm": 1.4022847044925355, |
|
"learning_rate": 9.681469599149093e-06, |
|
"loss": 0.8362, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6201680672268908, |
|
"grad_norm": 1.4458562904255674, |
|
"learning_rate": 9.671057349654481e-06, |
|
"loss": 0.8753, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6252100840336134, |
|
"grad_norm": 1.3489812277335955, |
|
"learning_rate": 9.660483429512198e-06, |
|
"loss": 0.8406, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6302521008403361, |
|
"grad_norm": 1.2541520148654464, |
|
"learning_rate": 9.649748204697741e-06, |
|
"loss": 0.8096, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6352941176470588, |
|
"grad_norm": 1.4166136476450861, |
|
"learning_rate": 9.63885204676954e-06, |
|
"loss": 0.9279, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6403361344537815, |
|
"grad_norm": 1.2096305649684784, |
|
"learning_rate": 9.627795332856107e-06, |
|
"loss": 0.8668, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6453781512605042, |
|
"grad_norm": 1.0817129947497557, |
|
"learning_rate": 9.616578445642982e-06, |
|
"loss": 0.8021, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6504201680672269, |
|
"grad_norm": 1.2857282530529068, |
|
"learning_rate": 9.605201773359485e-06, |
|
"loss": 0.9031, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6554621848739496, |
|
"grad_norm": 1.2909981390159206, |
|
"learning_rate": 9.59366570976528e-06, |
|
"loss": 0.9028, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6605042016806723, |
|
"grad_norm": 1.277642300275485, |
|
"learning_rate": 9.581970654136752e-06, |
|
"loss": 0.8206, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6655462184873949, |
|
"grad_norm": 1.2618202348884826, |
|
"learning_rate": 9.570117011253173e-06, |
|
"loss": 0.8038, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6705882352941176, |
|
"grad_norm": 1.3158796346136465, |
|
"learning_rate": 9.55810519138271e-06, |
|
"loss": 0.8594, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6756302521008404, |
|
"grad_norm": 1.464049668724664, |
|
"learning_rate": 9.545935610268213e-06, |
|
"loss": 0.8946, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.680672268907563, |
|
"grad_norm": 1.3568598282729065, |
|
"learning_rate": 9.533608689112827e-06, |
|
"loss": 0.8747, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 1.459842199207566, |
|
"learning_rate": 9.521124854565425e-06, |
|
"loss": 0.8665, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6907563025210084, |
|
"grad_norm": 1.2651754016717647, |
|
"learning_rate": 9.508484538705823e-06, |
|
"loss": 0.8172, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6957983193277311, |
|
"grad_norm": 1.3148283789857567, |
|
"learning_rate": 9.495688179029838e-06, |
|
"loss": 0.8159, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7008403361344537, |
|
"grad_norm": 1.3062514406684878, |
|
"learning_rate": 9.482736218434144e-06, |
|
"loss": 0.772, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 1.233357901449911, |
|
"learning_rate": 9.469629105200937e-06, |
|
"loss": 0.812, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7109243697478992, |
|
"grad_norm": 1.4036092051385856, |
|
"learning_rate": 9.45636729298243e-06, |
|
"loss": 0.9176, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7159663865546219, |
|
"grad_norm": 1.2475986918890871, |
|
"learning_rate": 9.442951240785135e-06, |
|
"loss": 0.9227, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7210084033613445, |
|
"grad_norm": 1.33327258291273, |
|
"learning_rate": 9.429381412954e-06, |
|
"loss": 0.8406, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7260504201680672, |
|
"grad_norm": 1.2457766641422836, |
|
"learning_rate": 9.415658279156312e-06, |
|
"loss": 0.7944, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7310924369747899, |
|
"grad_norm": 1.214604972950531, |
|
"learning_rate": 9.401782314365458e-06, |
|
"loss": 0.7889, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7361344537815127, |
|
"grad_norm": 1.4091496584822034, |
|
"learning_rate": 9.387753998844482e-06, |
|
"loss": 0.8542, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7411764705882353, |
|
"grad_norm": 1.336371637577696, |
|
"learning_rate": 9.37357381812946e-06, |
|
"loss": 0.8713, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.746218487394958, |
|
"grad_norm": 1.2559095107113698, |
|
"learning_rate": 9.359242263012693e-06, |
|
"loss": 0.8405, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7512605042016807, |
|
"grad_norm": 1.371982879040437, |
|
"learning_rate": 9.344759829525734e-06, |
|
"loss": 0.8666, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7563025210084033, |
|
"grad_norm": 1.23974913873784, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.7429, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.761344537815126, |
|
"grad_norm": 1.3741045518217379, |
|
"learning_rate": 9.315344337660422e-06, |
|
"loss": 0.8649, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7663865546218488, |
|
"grad_norm": 1.348659089360585, |
|
"learning_rate": 9.300412297385954e-06, |
|
"loss": 0.8614, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7714285714285715, |
|
"grad_norm": 1.199362811459465, |
|
"learning_rate": 9.285331414913816e-06, |
|
"loss": 0.837, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7764705882352941, |
|
"grad_norm": 1.2184218309322916, |
|
"learning_rate": 9.270102212210632e-06, |
|
"loss": 0.8404, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7815126050420168, |
|
"grad_norm": 1.386612554465055, |
|
"learning_rate": 9.254725216376562e-06, |
|
"loss": 0.9221, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7865546218487395, |
|
"grad_norm": 1.3380478699356555, |
|
"learning_rate": 9.239200959627048e-06, |
|
"loss": 0.8627, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7915966386554621, |
|
"grad_norm": 1.4014570562834296, |
|
"learning_rate": 9.223529979274411e-06, |
|
"loss": 0.8525, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7966386554621848, |
|
"grad_norm": 1.3172489244042282, |
|
"learning_rate": 9.207712817709237e-06, |
|
"loss": 0.7901, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8016806722689076, |
|
"grad_norm": 1.354483035270781, |
|
"learning_rate": 9.191750022381613e-06, |
|
"loss": 0.865, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8067226890756303, |
|
"grad_norm": 1.2415343975219086, |
|
"learning_rate": 9.175642145782179e-06, |
|
"loss": 0.7898, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8117647058823529, |
|
"grad_norm": 1.2532359973917484, |
|
"learning_rate": 9.159389745423003e-06, |
|
"loss": 0.8372, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8168067226890756, |
|
"grad_norm": 1.2390725118364732, |
|
"learning_rate": 9.142993383818284e-06, |
|
"loss": 0.8383, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8218487394957983, |
|
"grad_norm": 1.3766117307822159, |
|
"learning_rate": 9.126453628464889e-06, |
|
"loss": 0.8151, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.826890756302521, |
|
"grad_norm": 1.3256804846243377, |
|
"learning_rate": 9.109771051822702e-06, |
|
"loss": 0.8444, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8319327731092437, |
|
"grad_norm": 1.3520618668694473, |
|
"learning_rate": 9.09294623129482e-06, |
|
"loss": 0.8672, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8369747899159664, |
|
"grad_norm": 1.329653882039925, |
|
"learning_rate": 9.07597974920756e-06, |
|
"loss": 0.8168, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.8420168067226891, |
|
"grad_norm": 1.3543281390803807, |
|
"learning_rate": 9.058872192790314e-06, |
|
"loss": 0.9118, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.8470588235294118, |
|
"grad_norm": 1.3456977881970305, |
|
"learning_rate": 9.041624154155208e-06, |
|
"loss": 0.8515, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8521008403361344, |
|
"grad_norm": 1.297767613562501, |
|
"learning_rate": 9.02423623027663e-06, |
|
"loss": 0.7417, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.2894576740180352, |
|
"learning_rate": 9.006709022970547e-06, |
|
"loss": 0.8408, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8621848739495799, |
|
"grad_norm": 1.2240598626483896, |
|
"learning_rate": 8.98904313887369e-06, |
|
"loss": 0.7358, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.8672268907563025, |
|
"grad_norm": 1.1890744366393113, |
|
"learning_rate": 8.971239189422555e-06, |
|
"loss": 0.8322, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.8722689075630252, |
|
"grad_norm": 1.3386067991043302, |
|
"learning_rate": 8.953297790832231e-06, |
|
"loss": 0.8411, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.8773109243697479, |
|
"grad_norm": 1.408000314117784, |
|
"learning_rate": 8.935219564075087e-06, |
|
"loss": 0.8036, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 1.3426412490545896, |
|
"learning_rate": 8.917005134859263e-06, |
|
"loss": 0.8035, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8873949579831932, |
|
"grad_norm": 1.4645291848377162, |
|
"learning_rate": 8.89865513360703e-06, |
|
"loss": 0.8392, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.892436974789916, |
|
"grad_norm": 1.2117719390717796, |
|
"learning_rate": 8.88017019543296e-06, |
|
"loss": 0.8328, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8974789915966387, |
|
"grad_norm": 1.3344830085574295, |
|
"learning_rate": 8.861550960121946e-06, |
|
"loss": 0.8543, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9025210084033614, |
|
"grad_norm": 1.4853304361578643, |
|
"learning_rate": 8.842798072107055e-06, |
|
"loss": 0.8512, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.907563025210084, |
|
"grad_norm": 1.2284352653979531, |
|
"learning_rate": 8.823912180447237e-06, |
|
"loss": 0.8598, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9126050420168067, |
|
"grad_norm": 1.37221802812512, |
|
"learning_rate": 8.804893938804839e-06, |
|
"loss": 0.8613, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9176470588235294, |
|
"grad_norm": 1.4397712752139291, |
|
"learning_rate": 8.785744005423003e-06, |
|
"loss": 0.8192, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9226890756302522, |
|
"grad_norm": 1.4307484306743805, |
|
"learning_rate": 8.766463043102864e-06, |
|
"loss": 0.8114, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.9277310924369748, |
|
"grad_norm": 1.4036453214728524, |
|
"learning_rate": 8.747051719180626e-06, |
|
"loss": 0.8922, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.9327731092436975, |
|
"grad_norm": 1.4752551479904314, |
|
"learning_rate": 8.727510705504453e-06, |
|
"loss": 0.8932, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9378151260504202, |
|
"grad_norm": 1.322337640774981, |
|
"learning_rate": 8.707840678411223e-06, |
|
"loss": 0.7998, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.9428571428571428, |
|
"grad_norm": 1.2136277321616975, |
|
"learning_rate": 8.688042318703111e-06, |
|
"loss": 0.7416, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9478991596638655, |
|
"grad_norm": 1.342849040104635, |
|
"learning_rate": 8.66811631162404e-06, |
|
"loss": 0.8685, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.9529411764705882, |
|
"grad_norm": 1.5250386207067939, |
|
"learning_rate": 8.648063346835943e-06, |
|
"loss": 0.8485, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.957983193277311, |
|
"grad_norm": 1.3173191874193797, |
|
"learning_rate": 8.627884118394913e-06, |
|
"loss": 0.8286, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9630252100840336, |
|
"grad_norm": 1.32796081599915, |
|
"learning_rate": 8.607579324727175e-06, |
|
"loss": 0.8544, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.9680672268907563, |
|
"grad_norm": 1.350363153783161, |
|
"learning_rate": 8.5871496686049e-06, |
|
"loss": 0.8102, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.973109243697479, |
|
"grad_norm": 1.3655669107662696, |
|
"learning_rate": 8.566595857121902e-06, |
|
"loss": 0.8122, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.9781512605042016, |
|
"grad_norm": 1.3452211499259599, |
|
"learning_rate": 8.545918601669147e-06, |
|
"loss": 0.8834, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9831932773109243, |
|
"grad_norm": 1.3376410418915317, |
|
"learning_rate": 8.525118617910144e-06, |
|
"loss": 0.8148, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.9882352941176471, |
|
"grad_norm": 1.2489273918302621, |
|
"learning_rate": 8.504196625756166e-06, |
|
"loss": 0.8271, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9932773109243698, |
|
"grad_norm": 1.4139088289405872, |
|
"learning_rate": 8.483153349341336e-06, |
|
"loss": 0.845, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9983193277310924, |
|
"grad_norm": 1.384588034693747, |
|
"learning_rate": 8.461989516997565e-06, |
|
"loss": 0.8312, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.0050420168067227, |
|
"grad_norm": 2.499955060770187, |
|
"learning_rate": 8.440705861229344e-06, |
|
"loss": 1.4381, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.0100840336134453, |
|
"grad_norm": 1.413536932523174, |
|
"learning_rate": 8.41930311868839e-06, |
|
"loss": 0.713, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.015126050420168, |
|
"grad_norm": 1.3570359586304308, |
|
"learning_rate": 8.397782030148147e-06, |
|
"loss": 0.716, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.0201680672268907, |
|
"grad_norm": 1.187974845871534, |
|
"learning_rate": 8.376143340478153e-06, |
|
"loss": 0.6197, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.0252100840336134, |
|
"grad_norm": 1.1805636492053666, |
|
"learning_rate": 8.354387798618254e-06, |
|
"loss": 0.6082, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.030252100840336, |
|
"grad_norm": 1.3319326327566277, |
|
"learning_rate": 8.332516157552684e-06, |
|
"loss": 0.6667, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.035294117647059, |
|
"grad_norm": 1.3080442340316867, |
|
"learning_rate": 8.310529174284004e-06, |
|
"loss": 0.6438, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0403361344537816, |
|
"grad_norm": 1.360919752940988, |
|
"learning_rate": 8.288427609806899e-06, |
|
"loss": 0.6931, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.0453781512605043, |
|
"grad_norm": 1.2928882019326107, |
|
"learning_rate": 8.266212229081846e-06, |
|
"loss": 0.6571, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.050420168067227, |
|
"grad_norm": 1.279346131512037, |
|
"learning_rate": 8.243883801008632e-06, |
|
"loss": 0.6105, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.0554621848739496, |
|
"grad_norm": 1.3976246828088796, |
|
"learning_rate": 8.221443098399733e-06, |
|
"loss": 0.633, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.0605042016806723, |
|
"grad_norm": 1.4051676037106482, |
|
"learning_rate": 8.198890897953586e-06, |
|
"loss": 0.631, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.065546218487395, |
|
"grad_norm": 1.4026478680925658, |
|
"learning_rate": 8.176227980227693e-06, |
|
"loss": 0.646, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.0705882352941176, |
|
"grad_norm": 1.4783461586544826, |
|
"learning_rate": 8.153455129611605e-06, |
|
"loss": 0.6341, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.0756302521008403, |
|
"grad_norm": 1.2992917788523406, |
|
"learning_rate": 8.130573134299782e-06, |
|
"loss": 0.7027, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.080672268907563, |
|
"grad_norm": 1.4403523864907255, |
|
"learning_rate": 8.107582786264299e-06, |
|
"loss": 0.6745, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0857142857142856, |
|
"grad_norm": 1.2904789259135272, |
|
"learning_rate": 8.084484881227449e-06, |
|
"loss": 0.6278, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0907563025210083, |
|
"grad_norm": 1.3928383691850674, |
|
"learning_rate": 8.061280218634192e-06, |
|
"loss": 0.665, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.0957983193277312, |
|
"grad_norm": 1.3355440702392616, |
|
"learning_rate": 8.037969601624495e-06, |
|
"loss": 0.6095, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.1008403361344539, |
|
"grad_norm": 1.3135802297885384, |
|
"learning_rate": 8.014553837005527e-06, |
|
"loss": 0.7134, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.1058823529411765, |
|
"grad_norm": 1.3334358438044307, |
|
"learning_rate": 7.99103373522373e-06, |
|
"loss": 0.6149, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.1109243697478992, |
|
"grad_norm": 1.3855125872698653, |
|
"learning_rate": 7.967410110336782e-06, |
|
"loss": 0.6709, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1159663865546219, |
|
"grad_norm": 1.4082439279428, |
|
"learning_rate": 7.943683779985412e-06, |
|
"loss": 0.6665, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.1210084033613446, |
|
"grad_norm": 1.3849413150174785, |
|
"learning_rate": 7.919855565365102e-06, |
|
"loss": 0.6698, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.1260504201680672, |
|
"grad_norm": 1.3025006342892487, |
|
"learning_rate": 7.895926291197667e-06, |
|
"loss": 0.6726, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.13109243697479, |
|
"grad_norm": 1.3438499346918609, |
|
"learning_rate": 7.871896785702707e-06, |
|
"loss": 0.6361, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.1361344537815126, |
|
"grad_norm": 1.252763414951386, |
|
"learning_rate": 7.847767880568944e-06, |
|
"loss": 0.6534, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1411764705882352, |
|
"grad_norm": 1.4594024040073388, |
|
"learning_rate": 7.823540410925434e-06, |
|
"loss": 0.7176, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.146218487394958, |
|
"grad_norm": 1.3020082357416656, |
|
"learning_rate": 7.799215215312667e-06, |
|
"loss": 0.6117, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.1512605042016806, |
|
"grad_norm": 1.3344891922181583, |
|
"learning_rate": 7.774793135653537e-06, |
|
"loss": 0.6502, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.1563025210084033, |
|
"grad_norm": 1.1931020476239522, |
|
"learning_rate": 7.750275017224208e-06, |
|
"loss": 0.5864, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.1613445378151261, |
|
"grad_norm": 1.3817137725123274, |
|
"learning_rate": 7.725661708624855e-06, |
|
"loss": 0.6845, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1663865546218488, |
|
"grad_norm": 1.3718851116188664, |
|
"learning_rate": 7.700954061750295e-06, |
|
"loss": 0.6666, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.1714285714285715, |
|
"grad_norm": 1.3538961263237106, |
|
"learning_rate": 7.676152931760496e-06, |
|
"loss": 0.6815, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 1.3576998269549865, |
|
"learning_rate": 7.651259177050996e-06, |
|
"loss": 0.6169, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.1815126050420168, |
|
"grad_norm": 1.3317040137841496, |
|
"learning_rate": 7.626273659223166e-06, |
|
"loss": 0.8546, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.1865546218487395, |
|
"grad_norm": 1.368524911957153, |
|
"learning_rate": 7.601197243054411e-06, |
|
"loss": 0.6168, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.1915966386554622, |
|
"grad_norm": 1.3058914037226665, |
|
"learning_rate": 7.576030796468233e-06, |
|
"loss": 0.7452, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.1966386554621848, |
|
"grad_norm": 1.5392470830352827, |
|
"learning_rate": 7.5507751905041885e-06, |
|
"loss": 0.6195, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.2016806722689075, |
|
"grad_norm": 1.4102673119306182, |
|
"learning_rate": 7.525431299287737e-06, |
|
"loss": 0.6523, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.2067226890756302, |
|
"grad_norm": 1.4511322902886419, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.6862, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.2117647058823529, |
|
"grad_norm": 1.2661930310847365, |
|
"learning_rate": 7.474482172847391e-06, |
|
"loss": 0.6528, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2168067226890757, |
|
"grad_norm": 1.3307860380456358, |
|
"learning_rate": 7.4488787010311425e-06, |
|
"loss": 0.6602, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.2218487394957984, |
|
"grad_norm": 1.3750585055686875, |
|
"learning_rate": 7.423190470716761e-06, |
|
"loss": 0.6432, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.226890756302521, |
|
"grad_norm": 1.2979245099980825, |
|
"learning_rate": 7.3974183710033334e-06, |
|
"loss": 0.6288, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.2319327731092438, |
|
"grad_norm": 1.2999814021886877, |
|
"learning_rate": 7.371563293892761e-06, |
|
"loss": 0.6119, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.2369747899159664, |
|
"grad_norm": 1.2917976929827104, |
|
"learning_rate": 7.345626134258897e-06, |
|
"loss": 0.6657, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.242016806722689, |
|
"grad_norm": 1.4010288472470998, |
|
"learning_rate": 7.319607789816555e-06, |
|
"loss": 0.6586, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.2470588235294118, |
|
"grad_norm": 1.4146400942510136, |
|
"learning_rate": 7.293509161090453e-06, |
|
"loss": 0.6595, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.2521008403361344, |
|
"grad_norm": 1.2728109027093242, |
|
"learning_rate": 7.2673311513840395e-06, |
|
"loss": 0.6353, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.2571428571428571, |
|
"grad_norm": 1.3471043709018875, |
|
"learning_rate": 7.241074666748228e-06, |
|
"loss": 0.6713, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.2621848739495798, |
|
"grad_norm": 1.353231427350053, |
|
"learning_rate": 7.214740615950041e-06, |
|
"loss": 0.6102, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2672268907563025, |
|
"grad_norm": 1.337514944324046, |
|
"learning_rate": 7.188329910441154e-06, |
|
"loss": 0.6282, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.2722689075630251, |
|
"grad_norm": 1.362404295247445, |
|
"learning_rate": 7.161843464326349e-06, |
|
"loss": 0.6072, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.2773109243697478, |
|
"grad_norm": 1.1818447088372563, |
|
"learning_rate": 7.135282194331881e-06, |
|
"loss": 0.6057, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.2823529411764705, |
|
"grad_norm": 1.4982822435126113, |
|
"learning_rate": 7.1086470197737405e-06, |
|
"loss": 0.6803, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.2873949579831931, |
|
"grad_norm": 1.4344811997979932, |
|
"learning_rate": 7.0819388625258385e-06, |
|
"loss": 0.8567, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.292436974789916, |
|
"grad_norm": 1.3859091438882214, |
|
"learning_rate": 7.05515864698811e-06, |
|
"loss": 0.7355, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.2974789915966387, |
|
"grad_norm": 1.1626254136263392, |
|
"learning_rate": 7.028307300054499e-06, |
|
"loss": 0.5839, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.3025210084033614, |
|
"grad_norm": 1.3552944579781003, |
|
"learning_rate": 7.0013857510808934e-06, |
|
"loss": 0.6836, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.307563025210084, |
|
"grad_norm": 1.3028817545835125, |
|
"learning_rate": 6.974394931852957e-06, |
|
"loss": 0.6284, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.3126050420168067, |
|
"grad_norm": 1.5434124541373508, |
|
"learning_rate": 6.94733577655387e-06, |
|
"loss": 0.7012, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3176470588235294, |
|
"grad_norm": 1.303474015679206, |
|
"learning_rate": 6.920209221732007e-06, |
|
"loss": 0.5703, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.322689075630252, |
|
"grad_norm": 1.3348450903633984, |
|
"learning_rate": 6.893016206268518e-06, |
|
"loss": 0.5917, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.3277310924369747, |
|
"grad_norm": 1.3433706513738732, |
|
"learning_rate": 6.865757671344827e-06, |
|
"loss": 0.6672, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.3327731092436974, |
|
"grad_norm": 1.2935787672149481, |
|
"learning_rate": 6.838434560410064e-06, |
|
"loss": 0.6701, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.3378151260504203, |
|
"grad_norm": 1.3458569492608534, |
|
"learning_rate": 6.811047819148413e-06, |
|
"loss": 0.6647, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.342857142857143, |
|
"grad_norm": 1.3814097147596185, |
|
"learning_rate": 6.783598395446371e-06, |
|
"loss": 0.6866, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.3478991596638656, |
|
"grad_norm": 1.384769236934002, |
|
"learning_rate": 6.756087239359948e-06, |
|
"loss": 0.6058, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.3529411764705883, |
|
"grad_norm": 1.4299755108319103, |
|
"learning_rate": 6.728515303081782e-06, |
|
"loss": 0.6608, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.357983193277311, |
|
"grad_norm": 1.6844501725850975, |
|
"learning_rate": 6.700883540908185e-06, |
|
"loss": 0.6902, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.3630252100840337, |
|
"grad_norm": 1.490837215727114, |
|
"learning_rate": 6.673192909206109e-06, |
|
"loss": 0.6622, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3680672268907563, |
|
"grad_norm": 1.5025542365103597, |
|
"learning_rate": 6.64544436638005e-06, |
|
"loss": 0.7318, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.373109243697479, |
|
"grad_norm": 1.368007843570876, |
|
"learning_rate": 6.617638872838874e-06, |
|
"loss": 0.6616, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.3781512605042017, |
|
"grad_norm": 1.3302784390410516, |
|
"learning_rate": 6.589777390962575e-06, |
|
"loss": 0.5837, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.3831932773109243, |
|
"grad_norm": 1.3818583989196362, |
|
"learning_rate": 6.561860885068972e-06, |
|
"loss": 0.7319, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.388235294117647, |
|
"grad_norm": 1.3678970576063487, |
|
"learning_rate": 6.53389032138032e-06, |
|
"loss": 0.6479, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.3932773109243697, |
|
"grad_norm": 1.3918528373329961, |
|
"learning_rate": 6.505866667989884e-06, |
|
"loss": 0.6657, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.3983193277310924, |
|
"grad_norm": 1.3578596611461975, |
|
"learning_rate": 6.477790894828422e-06, |
|
"loss": 0.6227, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.403361344537815, |
|
"grad_norm": 1.37442116613121, |
|
"learning_rate": 6.449663973630613e-06, |
|
"loss": 0.668, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.4084033613445377, |
|
"grad_norm": 1.251535744853749, |
|
"learning_rate": 6.421486877901436e-06, |
|
"loss": 0.6394, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.4134453781512604, |
|
"grad_norm": 1.3817098557899696, |
|
"learning_rate": 6.393260582882462e-06, |
|
"loss": 0.7289, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4184873949579833, |
|
"grad_norm": 1.3924770743130575, |
|
"learning_rate": 6.364986065518106e-06, |
|
"loss": 0.6632, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.423529411764706, |
|
"grad_norm": 1.3388647960669742, |
|
"learning_rate": 6.336664304421818e-06, |
|
"loss": 0.6445, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.3627824010774807, |
|
"learning_rate": 6.308296279842204e-06, |
|
"loss": 0.6785, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.4336134453781513, |
|
"grad_norm": 1.2353887841733255, |
|
"learning_rate": 6.279882973629101e-06, |
|
"loss": 0.5987, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.438655462184874, |
|
"grad_norm": 1.2803646798399686, |
|
"learning_rate": 6.2514253691996e-06, |
|
"loss": 0.6593, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.4436974789915966, |
|
"grad_norm": 1.3106097252223476, |
|
"learning_rate": 6.222924451504001e-06, |
|
"loss": 0.6612, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.4487394957983193, |
|
"grad_norm": 1.491149138722541, |
|
"learning_rate": 6.194381206991723e-06, |
|
"loss": 0.6603, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.453781512605042, |
|
"grad_norm": 1.4729722170121724, |
|
"learning_rate": 6.165796623577171e-06, |
|
"loss": 0.6458, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.4588235294117646, |
|
"grad_norm": 1.2583772868484708, |
|
"learning_rate": 6.1371716906055336e-06, |
|
"loss": 0.6571, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.4638655462184875, |
|
"grad_norm": 1.6484902113991295, |
|
"learning_rate": 6.10850739881854e-06, |
|
"loss": 0.8048, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4689075630252102, |
|
"grad_norm": 1.1293948636395863, |
|
"learning_rate": 6.079804740320181e-06, |
|
"loss": 0.631, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.4739495798319329, |
|
"grad_norm": 1.357543211738453, |
|
"learning_rate": 6.051064708542357e-06, |
|
"loss": 0.6834, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.4789915966386555, |
|
"grad_norm": 1.422094283192291, |
|
"learning_rate": 6.022288298210502e-06, |
|
"loss": 0.7688, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.4840336134453782, |
|
"grad_norm": 1.3320687626409005, |
|
"learning_rate": 5.993476505309154e-06, |
|
"loss": 0.6438, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.4890756302521009, |
|
"grad_norm": 1.479155880731166, |
|
"learning_rate": 5.964630327047485e-06, |
|
"loss": 0.6983, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.4941176470588236, |
|
"grad_norm": 1.4751670026359378, |
|
"learning_rate": 5.935750761824777e-06, |
|
"loss": 0.6784, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.4991596638655462, |
|
"grad_norm": 1.3971166152312533, |
|
"learning_rate": 5.906838809195879e-06, |
|
"loss": 0.7934, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.504201680672269, |
|
"grad_norm": 1.486282793941636, |
|
"learning_rate": 5.877895469836604e-06, |
|
"loss": 0.7149, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.5092436974789916, |
|
"grad_norm": 1.3831360984251488, |
|
"learning_rate": 5.848921745509094e-06, |
|
"loss": 0.6853, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.5142857142857142, |
|
"grad_norm": 1.373255418518971, |
|
"learning_rate": 5.819918639027149e-06, |
|
"loss": 0.6262, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.519327731092437, |
|
"grad_norm": 1.398139776725886, |
|
"learning_rate": 5.790887154221521e-06, |
|
"loss": 0.6682, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.5243697478991596, |
|
"grad_norm": 1.459786025141565, |
|
"learning_rate": 5.7618282959051685e-06, |
|
"loss": 0.6596, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.5294117647058822, |
|
"grad_norm": 1.386843554966046, |
|
"learning_rate": 5.7327430698384775e-06, |
|
"loss": 0.662, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.534453781512605, |
|
"grad_norm": 1.334093052658649, |
|
"learning_rate": 5.703632482694453e-06, |
|
"loss": 0.5642, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.5394957983193276, |
|
"grad_norm": 1.394936799748242, |
|
"learning_rate": 5.674497542023875e-06, |
|
"loss": 0.6785, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.5445378151260503, |
|
"grad_norm": 1.2487045092120568, |
|
"learning_rate": 5.645339256220427e-06, |
|
"loss": 0.6405, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.5495798319327732, |
|
"grad_norm": 1.449626002944486, |
|
"learning_rate": 5.616158634485793e-06, |
|
"loss": 0.7186, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.5546218487394958, |
|
"grad_norm": 1.3148115913009149, |
|
"learning_rate": 5.5869566867947344e-06, |
|
"loss": 0.6689, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.5596638655462185, |
|
"grad_norm": 1.3031066852612374, |
|
"learning_rate": 5.557734423860122e-06, |
|
"loss": 0.6865, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.5647058823529412, |
|
"grad_norm": 1.4070190634154978, |
|
"learning_rate": 5.528492857097966e-06, |
|
"loss": 0.692, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5697478991596638, |
|
"grad_norm": 1.424416347019562, |
|
"learning_rate": 5.499232998592399e-06, |
|
"loss": 0.6712, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.5747899159663865, |
|
"grad_norm": 1.4045930546601455, |
|
"learning_rate": 5.469955861060653e-06, |
|
"loss": 0.692, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.5798319327731094, |
|
"grad_norm": 1.4633924161825607, |
|
"learning_rate": 5.44066245781801e-06, |
|
"loss": 0.6972, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.584873949579832, |
|
"grad_norm": 1.3419059215183884, |
|
"learning_rate": 5.4113538027427245e-06, |
|
"loss": 0.5832, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.5899159663865547, |
|
"grad_norm": 1.4651690425379238, |
|
"learning_rate": 5.382030910240936e-06, |
|
"loss": 0.7263, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.5949579831932774, |
|
"grad_norm": 1.3544416080791692, |
|
"learning_rate": 5.352694795211555e-06, |
|
"loss": 0.6693, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.3796831843734638, |
|
"learning_rate": 5.3233464730111426e-06, |
|
"loss": 0.6843, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.6050420168067228, |
|
"grad_norm": 1.3756368583869594, |
|
"learning_rate": 5.29398695941876e-06, |
|
"loss": 0.6956, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.6100840336134454, |
|
"grad_norm": 1.354906917799083, |
|
"learning_rate": 5.2646172706008154e-06, |
|
"loss": 0.5865, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.615126050420168, |
|
"grad_norm": 1.283604806155226, |
|
"learning_rate": 5.235238423075899e-06, |
|
"loss": 0.6476, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6201680672268908, |
|
"grad_norm": 1.3323430668544856, |
|
"learning_rate": 5.20585143367959e-06, |
|
"loss": 0.5978, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.6252100840336134, |
|
"grad_norm": 1.4432636768429228, |
|
"learning_rate": 5.176457319529264e-06, |
|
"loss": 0.7229, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.6302521008403361, |
|
"grad_norm": 1.3389659599587687, |
|
"learning_rate": 5.147057097988898e-06, |
|
"loss": 0.7036, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.6352941176470588, |
|
"grad_norm": 1.40224689957347, |
|
"learning_rate": 5.1176517866338495e-06, |
|
"loss": 0.6524, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.6403361344537815, |
|
"grad_norm": 1.448948508673923, |
|
"learning_rate": 5.088242403215644e-06, |
|
"loss": 0.6574, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.6453781512605041, |
|
"grad_norm": 1.4336192786572701, |
|
"learning_rate": 5.058829965626742e-06, |
|
"loss": 0.6649, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.6504201680672268, |
|
"grad_norm": 1.1551398885920936, |
|
"learning_rate": 5.029415491865311e-06, |
|
"loss": 0.6607, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.6554621848739495, |
|
"grad_norm": 1.4081755117550179, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6308, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.6605042016806721, |
|
"grad_norm": 1.2962293823552042, |
|
"learning_rate": 4.97058450813469e-06, |
|
"loss": 0.6315, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.6655462184873948, |
|
"grad_norm": 1.2609233329938516, |
|
"learning_rate": 4.94117003437326e-06, |
|
"loss": 0.6453, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6705882352941175, |
|
"grad_norm": 1.4395586718171531, |
|
"learning_rate": 4.911757596784358e-06, |
|
"loss": 0.7056, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.6756302521008404, |
|
"grad_norm": 1.490647265803814, |
|
"learning_rate": 4.882348213366152e-06, |
|
"loss": 0.7463, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.680672268907563, |
|
"grad_norm": 1.4744084173114673, |
|
"learning_rate": 4.8529429020111035e-06, |
|
"loss": 0.6518, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.6857142857142857, |
|
"grad_norm": 1.3256051086606053, |
|
"learning_rate": 4.823542680470738e-06, |
|
"loss": 0.6322, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.6907563025210084, |
|
"grad_norm": 1.4043201154667322, |
|
"learning_rate": 4.794148566320412e-06, |
|
"loss": 0.6623, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.695798319327731, |
|
"grad_norm": 1.3058283187944708, |
|
"learning_rate": 4.7647615769241e-06, |
|
"loss": 0.7233, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.7008403361344537, |
|
"grad_norm": 1.3709304051984876, |
|
"learning_rate": 4.7353827293991845e-06, |
|
"loss": 0.7237, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.7058823529411766, |
|
"grad_norm": 1.3476441152074792, |
|
"learning_rate": 4.706013040581242e-06, |
|
"loss": 0.6408, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.7109243697478993, |
|
"grad_norm": 1.4435937624188804, |
|
"learning_rate": 4.676653526988858e-06, |
|
"loss": 0.6647, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.715966386554622, |
|
"grad_norm": 1.3226553142476545, |
|
"learning_rate": 4.647305204788445e-06, |
|
"loss": 0.6489, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7210084033613446, |
|
"grad_norm": 1.3388051536697478, |
|
"learning_rate": 4.617969089759066e-06, |
|
"loss": 0.6414, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.7260504201680673, |
|
"grad_norm": 1.369018029455846, |
|
"learning_rate": 4.588646197257278e-06, |
|
"loss": 0.6535, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.73109243697479, |
|
"grad_norm": 1.4137443784434733, |
|
"learning_rate": 4.559337542181993e-06, |
|
"loss": 0.6446, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.7361344537815127, |
|
"grad_norm": 1.3718987426836817, |
|
"learning_rate": 4.53004413893935e-06, |
|
"loss": 0.6477, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.7411764705882353, |
|
"grad_norm": 1.262236928246166, |
|
"learning_rate": 4.500767001407604e-06, |
|
"loss": 0.6059, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.746218487394958, |
|
"grad_norm": 1.3613528737566392, |
|
"learning_rate": 4.471507142902036e-06, |
|
"loss": 0.6545, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.7512605042016807, |
|
"grad_norm": 1.303211681985445, |
|
"learning_rate": 4.4422655761398785e-06, |
|
"loss": 0.633, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.7563025210084033, |
|
"grad_norm": 1.3262900181605304, |
|
"learning_rate": 4.413043313205266e-06, |
|
"loss": 0.6873, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.761344537815126, |
|
"grad_norm": 1.5014706286550592, |
|
"learning_rate": 4.383841365514208e-06, |
|
"loss": 0.6715, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.7663865546218487, |
|
"grad_norm": 1.3748458240376293, |
|
"learning_rate": 4.354660743779575e-06, |
|
"loss": 0.6322, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7714285714285714, |
|
"grad_norm": 1.3200606309946945, |
|
"learning_rate": 4.325502457976126e-06, |
|
"loss": 0.6468, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.776470588235294, |
|
"grad_norm": 1.4363798100469027, |
|
"learning_rate": 4.296367517305548e-06, |
|
"loss": 0.6424, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.7815126050420167, |
|
"grad_norm": 1.3665833844005753, |
|
"learning_rate": 4.267256930161523e-06, |
|
"loss": 0.6895, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.7865546218487394, |
|
"grad_norm": 1.3126702843544444, |
|
"learning_rate": 4.238171704094833e-06, |
|
"loss": 0.6766, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.791596638655462, |
|
"grad_norm": 1.3931998076257006, |
|
"learning_rate": 4.209112845778481e-06, |
|
"loss": 0.7165, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.7966386554621847, |
|
"grad_norm": 1.4120182498478362, |
|
"learning_rate": 4.180081360972852e-06, |
|
"loss": 0.6909, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.8016806722689076, |
|
"grad_norm": 1.3825157448385343, |
|
"learning_rate": 4.151078254490908e-06, |
|
"loss": 0.6634, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.8067226890756303, |
|
"grad_norm": 1.2976324503271779, |
|
"learning_rate": 4.122104530163397e-06, |
|
"loss": 0.6482, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.811764705882353, |
|
"grad_norm": 1.3371821093594873, |
|
"learning_rate": 4.09316119080412e-06, |
|
"loss": 0.5939, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.8168067226890756, |
|
"grad_norm": 1.2815723486743216, |
|
"learning_rate": 4.064249238175223e-06, |
|
"loss": 0.5873, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8218487394957983, |
|
"grad_norm": 1.2598876616725718, |
|
"learning_rate": 4.035369672952516e-06, |
|
"loss": 0.6211, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.826890756302521, |
|
"grad_norm": 1.3775558524100238, |
|
"learning_rate": 4.0065234946908456e-06, |
|
"loss": 0.6362, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.8319327731092439, |
|
"grad_norm": 1.3605455122282684, |
|
"learning_rate": 3.977711701789499e-06, |
|
"loss": 0.6173, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.8369747899159665, |
|
"grad_norm": 1.2800072707024852, |
|
"learning_rate": 3.948935291457645e-06, |
|
"loss": 0.6325, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.8420168067226892, |
|
"grad_norm": 1.3258336050686086, |
|
"learning_rate": 3.920195259679822e-06, |
|
"loss": 0.653, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.8470588235294119, |
|
"grad_norm": 1.3413446326047822, |
|
"learning_rate": 3.891492601181462e-06, |
|
"loss": 0.651, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.8521008403361345, |
|
"grad_norm": 1.41115994835795, |
|
"learning_rate": 3.862828309394469e-06, |
|
"loss": 0.6292, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 1.3205359045412157, |
|
"learning_rate": 3.834203376422831e-06, |
|
"loss": 0.6064, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.8621848739495799, |
|
"grad_norm": 1.271016774529, |
|
"learning_rate": 3.805618793008279e-06, |
|
"loss": 0.6503, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.8672268907563025, |
|
"grad_norm": 1.38208148943542, |
|
"learning_rate": 3.777075548496001e-06, |
|
"loss": 0.673, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8722689075630252, |
|
"grad_norm": 1.4627608316199674, |
|
"learning_rate": 3.7485746308004013e-06, |
|
"loss": 0.6853, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.877310924369748, |
|
"grad_norm": 1.2952312321525565, |
|
"learning_rate": 3.7201170263709004e-06, |
|
"loss": 0.6164, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"grad_norm": 1.4840833764786416, |
|
"learning_rate": 3.6917037201577977e-06, |
|
"loss": 0.6935, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.8873949579831932, |
|
"grad_norm": 1.371096887673559, |
|
"learning_rate": 3.6633356955781827e-06, |
|
"loss": 0.6571, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.892436974789916, |
|
"grad_norm": 1.1787569156110669, |
|
"learning_rate": 3.635013934481895e-06, |
|
"loss": 0.5976, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.8974789915966386, |
|
"grad_norm": 1.292415912438797, |
|
"learning_rate": 3.6067394171175397e-06, |
|
"loss": 0.662, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.9025210084033612, |
|
"grad_norm": 1.4004270726912136, |
|
"learning_rate": 3.578513122098566e-06, |
|
"loss": 0.6902, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.907563025210084, |
|
"grad_norm": 1.3676893820953542, |
|
"learning_rate": 3.5503360263693887e-06, |
|
"loss": 0.6736, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.9126050420168066, |
|
"grad_norm": 1.5497019666472422, |
|
"learning_rate": 3.5222091051715803e-06, |
|
"loss": 0.6474, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.9176470588235293, |
|
"grad_norm": 1.4107058784966016, |
|
"learning_rate": 3.4941333320101173e-06, |
|
"loss": 0.6214, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9226890756302522, |
|
"grad_norm": 1.3074693513299003, |
|
"learning_rate": 3.466109678619681e-06, |
|
"loss": 0.5863, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.9277310924369748, |
|
"grad_norm": 1.2533065740051568, |
|
"learning_rate": 3.4381391149310294e-06, |
|
"loss": 0.6145, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.9327731092436975, |
|
"grad_norm": 1.279932965905714, |
|
"learning_rate": 3.4102226090374246e-06, |
|
"loss": 0.6138, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.9378151260504202, |
|
"grad_norm": 1.279194036152673, |
|
"learning_rate": 3.3823611271611266e-06, |
|
"loss": 0.6051, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.9428571428571428, |
|
"grad_norm": 1.4523883672700335, |
|
"learning_rate": 3.35455563361995e-06, |
|
"loss": 0.6475, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.9478991596638655, |
|
"grad_norm": 1.319917640705539, |
|
"learning_rate": 3.3268070907938915e-06, |
|
"loss": 0.575, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.9529411764705882, |
|
"grad_norm": 1.356219744351625, |
|
"learning_rate": 3.2991164590918162e-06, |
|
"loss": 0.6707, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.957983193277311, |
|
"grad_norm": 1.3980927144998019, |
|
"learning_rate": 3.271484696918218e-06, |
|
"loss": 0.62, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.9630252100840337, |
|
"grad_norm": 1.3412194145756722, |
|
"learning_rate": 3.2439127606400546e-06, |
|
"loss": 0.6249, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.9680672268907564, |
|
"grad_norm": 1.231905550971943, |
|
"learning_rate": 3.2164016045536306e-06, |
|
"loss": 0.6542, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.973109243697479, |
|
"grad_norm": 1.3549695794420435, |
|
"learning_rate": 3.1889521808515888e-06, |
|
"loss": 0.6176, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.9781512605042018, |
|
"grad_norm": 1.415166811994311, |
|
"learning_rate": 3.1615654395899377e-06, |
|
"loss": 0.6593, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.9831932773109244, |
|
"grad_norm": 1.3126591809141124, |
|
"learning_rate": 3.1342423286551756e-06, |
|
"loss": 0.6891, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.988235294117647, |
|
"grad_norm": 1.3842054436860431, |
|
"learning_rate": 3.1069837937314846e-06, |
|
"loss": 0.6342, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.9932773109243698, |
|
"grad_norm": 1.4424046044230687, |
|
"learning_rate": 3.0797907782679944e-06, |
|
"loss": 0.6461, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.9983193277310924, |
|
"grad_norm": 1.3718751038472339, |
|
"learning_rate": 3.0526642234461313e-06, |
|
"loss": 0.6338, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.0050420168067227, |
|
"grad_norm": 3.363833604785768, |
|
"learning_rate": 3.0256050681470446e-06, |
|
"loss": 1.2006, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.0100840336134453, |
|
"grad_norm": 1.410375521884215, |
|
"learning_rate": 2.9986142489191074e-06, |
|
"loss": 0.5121, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.015126050420168, |
|
"grad_norm": 1.463355598251907, |
|
"learning_rate": 2.971692699945502e-06, |
|
"loss": 0.4394, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.0201680672268907, |
|
"grad_norm": 1.2914998337098158, |
|
"learning_rate": 2.9448413530118912e-06, |
|
"loss": 0.4978, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0252100840336134, |
|
"grad_norm": 1.3604150815997402, |
|
"learning_rate": 2.9180611374741623e-06, |
|
"loss": 0.4689, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.030252100840336, |
|
"grad_norm": 1.1964953052023972, |
|
"learning_rate": 2.891352980226262e-06, |
|
"loss": 0.5015, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.0352941176470587, |
|
"grad_norm": 1.1694739760631343, |
|
"learning_rate": 2.8647178056681197e-06, |
|
"loss": 0.447, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.0403361344537814, |
|
"grad_norm": 1.3174590682003549, |
|
"learning_rate": 2.838156535673652e-06, |
|
"loss": 0.414, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.045378151260504, |
|
"grad_norm": 1.2140198128144435, |
|
"learning_rate": 2.8116700895588473e-06, |
|
"loss": 0.4505, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.0504201680672267, |
|
"grad_norm": 1.3398119898455612, |
|
"learning_rate": 2.785259384049959e-06, |
|
"loss": 0.4532, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.0554621848739494, |
|
"grad_norm": 1.4229930176202614, |
|
"learning_rate": 2.7589253332517736e-06, |
|
"loss": 0.5546, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.060504201680672, |
|
"grad_norm": 1.4684509907326317, |
|
"learning_rate": 2.7326688486159613e-06, |
|
"loss": 0.5254, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.065546218487395, |
|
"grad_norm": 1.4962520925453975, |
|
"learning_rate": 2.706490838909547e-06, |
|
"loss": 0.4673, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.070588235294118, |
|
"grad_norm": 1.3630229586386085, |
|
"learning_rate": 2.680392210183446e-06, |
|
"loss": 0.4473, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.0756302521008405, |
|
"grad_norm": 1.38978907137299, |
|
"learning_rate": 2.6543738657411033e-06, |
|
"loss": 0.5159, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.080672268907563, |
|
"grad_norm": 1.429662885547244, |
|
"learning_rate": 2.628436706107238e-06, |
|
"loss": 0.5161, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.085714285714286, |
|
"grad_norm": 1.394356185017467, |
|
"learning_rate": 2.6025816289966703e-06, |
|
"loss": 0.5032, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.0907563025210085, |
|
"grad_norm": 1.480088664868798, |
|
"learning_rate": 2.5768095292832412e-06, |
|
"loss": 0.4802, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.095798319327731, |
|
"grad_norm": 1.3859048551297604, |
|
"learning_rate": 2.5511212989688587e-06, |
|
"loss": 0.4993, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.100840336134454, |
|
"grad_norm": 1.440430022618694, |
|
"learning_rate": 2.525517827152614e-06, |
|
"loss": 0.4551, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.1058823529411765, |
|
"grad_norm": 1.4332550806993916, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.5611, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.110924369747899, |
|
"grad_norm": 1.3161188350792523, |
|
"learning_rate": 2.4745687007122636e-06, |
|
"loss": 0.4602, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.115966386554622, |
|
"grad_norm": 1.4145836319136063, |
|
"learning_rate": 2.449224809495815e-06, |
|
"loss": 0.4464, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.1210084033613446, |
|
"grad_norm": 1.3638972016864883, |
|
"learning_rate": 2.423969203531768e-06, |
|
"loss": 0.4625, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.1260504201680672, |
|
"grad_norm": 1.4282920146552893, |
|
"learning_rate": 2.3988027569455895e-06, |
|
"loss": 0.4809, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.13109243697479, |
|
"grad_norm": 1.452704091304085, |
|
"learning_rate": 2.373726340776837e-06, |
|
"loss": 0.4959, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.1361344537815126, |
|
"grad_norm": 1.4474065940760683, |
|
"learning_rate": 2.348740822949006e-06, |
|
"loss": 0.4557, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.1411764705882352, |
|
"grad_norm": 1.406883162238408, |
|
"learning_rate": 2.323847068239504e-06, |
|
"loss": 0.5069, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.146218487394958, |
|
"grad_norm": 1.4713827636564831, |
|
"learning_rate": 2.2990459382497086e-06, |
|
"loss": 0.4813, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.1512605042016806, |
|
"grad_norm": 1.4582227343532888, |
|
"learning_rate": 2.274338291375147e-06, |
|
"loss": 0.462, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.1563025210084033, |
|
"grad_norm": 1.353197229608169, |
|
"learning_rate": 2.2497249827757933e-06, |
|
"loss": 0.4658, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.161344537815126, |
|
"grad_norm": 1.3550947330778897, |
|
"learning_rate": 2.225206864346465e-06, |
|
"loss": 0.5794, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.1663865546218486, |
|
"grad_norm": 1.4137143069445475, |
|
"learning_rate": 2.2007847846873342e-06, |
|
"loss": 0.4722, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.1714285714285713, |
|
"grad_norm": 1.2932234077066185, |
|
"learning_rate": 2.176459589074566e-06, |
|
"loss": 0.4369, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.176470588235294, |
|
"grad_norm": 1.3725308971047603, |
|
"learning_rate": 2.1522321194310577e-06, |
|
"loss": 0.4958, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.1815126050420166, |
|
"grad_norm": 1.4324324040918073, |
|
"learning_rate": 2.1281032142972933e-06, |
|
"loss": 0.4954, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.1865546218487397, |
|
"grad_norm": 1.4153168395436235, |
|
"learning_rate": 2.1040737088023323e-06, |
|
"loss": 0.4457, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.1915966386554624, |
|
"grad_norm": 1.3341155055487035, |
|
"learning_rate": 2.080144434634898e-06, |
|
"loss": 0.5017, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.196638655462185, |
|
"grad_norm": 1.352939614197411, |
|
"learning_rate": 2.056316220014588e-06, |
|
"loss": 0.4553, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.2016806722689077, |
|
"grad_norm": 1.393182470026338, |
|
"learning_rate": 2.0325898896632178e-06, |
|
"loss": 0.4448, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.2067226890756304, |
|
"grad_norm": 1.4033955608191793, |
|
"learning_rate": 2.0089662647762716e-06, |
|
"loss": 0.441, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.211764705882353, |
|
"grad_norm": 1.41226298350313, |
|
"learning_rate": 1.9854461629944764e-06, |
|
"loss": 0.4656, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.2168067226890757, |
|
"grad_norm": 1.3512621478929514, |
|
"learning_rate": 1.962030398375506e-06, |
|
"loss": 0.5245, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.2218487394957984, |
|
"grad_norm": 1.3932479184910864, |
|
"learning_rate": 1.9387197813658092e-06, |
|
"loss": 0.456, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.226890756302521, |
|
"grad_norm": 1.3400595100259751, |
|
"learning_rate": 1.915515118772555e-06, |
|
"loss": 0.4622, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.2319327731092438, |
|
"grad_norm": 1.3239101426319217, |
|
"learning_rate": 1.8924172137357038e-06, |
|
"loss": 0.4821, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.2369747899159664, |
|
"grad_norm": 1.4028557110251756, |
|
"learning_rate": 1.8694268657002197e-06, |
|
"loss": 0.4592, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 2.242016806722689, |
|
"grad_norm": 1.4043326661254716, |
|
"learning_rate": 1.8465448703883959e-06, |
|
"loss": 0.4642, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.2470588235294118, |
|
"grad_norm": 1.4748018123002309, |
|
"learning_rate": 1.8237720197723075e-06, |
|
"loss": 0.5244, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.2521008403361344, |
|
"grad_norm": 1.3653204295657917, |
|
"learning_rate": 1.8011091020464138e-06, |
|
"loss": 0.5117, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.257142857142857, |
|
"grad_norm": 1.4578979263769525, |
|
"learning_rate": 1.7785569016002686e-06, |
|
"loss": 0.4622, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 2.26218487394958, |
|
"grad_norm": 1.4739147697577966, |
|
"learning_rate": 1.75611619899137e-06, |
|
"loss": 0.4524, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.2672268907563025, |
|
"grad_norm": 1.3465934593186815, |
|
"learning_rate": 1.7337877709181527e-06, |
|
"loss": 0.4616, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 2.272268907563025, |
|
"grad_norm": 1.4287084373091115, |
|
"learning_rate": 1.711572390193102e-06, |
|
"loss": 0.6594, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.277310924369748, |
|
"grad_norm": 1.3274840093520053, |
|
"learning_rate": 1.689470825715998e-06, |
|
"loss": 0.4529, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.2823529411764705, |
|
"grad_norm": 1.4216422105253623, |
|
"learning_rate": 1.6674838424473172e-06, |
|
"loss": 0.4655, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.287394957983193, |
|
"grad_norm": 1.452303728671861, |
|
"learning_rate": 1.6456122013817477e-06, |
|
"loss": 0.4625, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 2.292436974789916, |
|
"grad_norm": 1.4369743256615972, |
|
"learning_rate": 1.6238566595218475e-06, |
|
"loss": 0.4761, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.2974789915966385, |
|
"grad_norm": 1.407023006658543, |
|
"learning_rate": 1.6022179698518525e-06, |
|
"loss": 0.4505, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.302521008403361, |
|
"grad_norm": 1.391039540718536, |
|
"learning_rate": 1.580696881311611e-06, |
|
"loss": 0.4894, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.307563025210084, |
|
"grad_norm": 1.3557281771597436, |
|
"learning_rate": 1.5592941387706562e-06, |
|
"loss": 0.4108, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.3126050420168065, |
|
"grad_norm": 1.3010131467886796, |
|
"learning_rate": 1.538010483002435e-06, |
|
"loss": 0.425, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 2.317647058823529, |
|
"grad_norm": 1.3625069219769537, |
|
"learning_rate": 1.5168466506586654e-06, |
|
"loss": 0.4431, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 2.3226890756302523, |
|
"grad_norm": 1.2997097389936179, |
|
"learning_rate": 1.4958033742438348e-06, |
|
"loss": 0.4058, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.327731092436975, |
|
"grad_norm": 1.3546221586310845, |
|
"learning_rate": 1.4748813820898554e-06, |
|
"loss": 0.5043, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 2.3327731092436976, |
|
"grad_norm": 1.3503940282999218, |
|
"learning_rate": 1.454081398330855e-06, |
|
"loss": 0.5015, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 2.3378151260504203, |
|
"grad_norm": 1.2879127697899735, |
|
"learning_rate": 1.4334041428781003e-06, |
|
"loss": 0.4219, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 2.342857142857143, |
|
"grad_norm": 1.5900890446730591, |
|
"learning_rate": 1.4128503313951008e-06, |
|
"loss": 0.5508, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.3478991596638656, |
|
"grad_norm": 1.4693275041182954, |
|
"learning_rate": 1.3924206752728282e-06, |
|
"loss": 0.5196, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 1.3739526563603481, |
|
"learning_rate": 1.3721158816050872e-06, |
|
"loss": 0.5223, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.357983193277311, |
|
"grad_norm": 1.2888756368302696, |
|
"learning_rate": 1.3519366531640589e-06, |
|
"loss": 0.4745, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.3630252100840337, |
|
"grad_norm": 1.3646861171520672, |
|
"learning_rate": 1.3318836883759634e-06, |
|
"loss": 0.4765, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.3680672268907563, |
|
"grad_norm": 1.3876282049959663, |
|
"learning_rate": 1.3119576812968893e-06, |
|
"loss": 0.4552, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.373109243697479, |
|
"grad_norm": 1.3212811305037033, |
|
"learning_rate": 1.292159321588778e-06, |
|
"loss": 0.4444, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.3781512605042017, |
|
"grad_norm": 1.4025656868262555, |
|
"learning_rate": 1.272489294495548e-06, |
|
"loss": 0.5373, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.3831932773109243, |
|
"grad_norm": 1.3992039142572703, |
|
"learning_rate": 1.252948280819375e-06, |
|
"loss": 0.4297, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.388235294117647, |
|
"grad_norm": 1.438194701698973, |
|
"learning_rate": 1.2335369568971362e-06, |
|
"loss": 0.4577, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.3932773109243697, |
|
"grad_norm": 1.3560235059252677, |
|
"learning_rate": 1.2142559945769995e-06, |
|
"loss": 0.4576, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.3983193277310924, |
|
"grad_norm": 1.357949004614199, |
|
"learning_rate": 1.1951060611951615e-06, |
|
"loss": 0.5944, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.403361344537815, |
|
"grad_norm": 1.2895013043643404, |
|
"learning_rate": 1.1760878195527642e-06, |
|
"loss": 0.4192, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.4084033613445377, |
|
"grad_norm": 1.2608640104913673, |
|
"learning_rate": 1.1572019278929457e-06, |
|
"loss": 0.4431, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.4134453781512604, |
|
"grad_norm": 1.4235058216914491, |
|
"learning_rate": 1.1384490398780563e-06, |
|
"loss": 0.4835, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.418487394957983, |
|
"grad_norm": 1.3849158950764375, |
|
"learning_rate": 1.1198298045670402e-06, |
|
"loss": 0.4497, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.4235294117647057, |
|
"grad_norm": 1.4243621054419897, |
|
"learning_rate": 1.1013448663929704e-06, |
|
"loss": 0.5031, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 1.2997464135987702, |
|
"learning_rate": 1.0829948651407374e-06, |
|
"loss": 0.483, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.4336134453781515, |
|
"grad_norm": 1.2887117802326669, |
|
"learning_rate": 1.0647804359249143e-06, |
|
"loss": 0.4424, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.438655462184874, |
|
"grad_norm": 1.2955280324064098, |
|
"learning_rate": 1.0467022091677692e-06, |
|
"loss": 0.4963, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.443697478991597, |
|
"grad_norm": 1.5695989821047664, |
|
"learning_rate": 1.0287608105774456e-06, |
|
"loss": 0.512, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.4487394957983195, |
|
"grad_norm": 1.3900121464168351, |
|
"learning_rate": 1.0109568611263094e-06, |
|
"loss": 0.4418, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.453781512605042, |
|
"grad_norm": 1.443290081700745, |
|
"learning_rate": 9.932909770294542e-07, |
|
"loss": 0.4439, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.458823529411765, |
|
"grad_norm": 1.3476484251272791, |
|
"learning_rate": 9.757637697233723e-07, |
|
"loss": 0.4885, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.4638655462184875, |
|
"grad_norm": 1.3389474168899225, |
|
"learning_rate": 9.58375845844793e-07, |
|
"loss": 0.4486, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.46890756302521, |
|
"grad_norm": 1.2353966317116258, |
|
"learning_rate": 9.41127807209688e-07, |
|
"loss": 0.4321, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.473949579831933, |
|
"grad_norm": 1.2849383161233021, |
|
"learning_rate": 9.240202507924412e-07, |
|
"loss": 0.433, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.4789915966386555, |
|
"grad_norm": 1.3336087651970685, |
|
"learning_rate": 9.070537687051817e-07, |
|
"loss": 0.4516, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.484033613445378, |
|
"grad_norm": 1.3550057200939567, |
|
"learning_rate": 8.902289481772996e-07, |
|
"loss": 0.4616, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.489075630252101, |
|
"grad_norm": 1.3590095983206505, |
|
"learning_rate": 8.735463715351139e-07, |
|
"loss": 0.4203, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.4941176470588236, |
|
"grad_norm": 1.2915320514796769, |
|
"learning_rate": 8.570066161817176e-07, |
|
"loss": 0.4503, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.499159663865546, |
|
"grad_norm": 1.2679676777389248, |
|
"learning_rate": 8.406102545769989e-07, |
|
"loss": 0.4566, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.504201680672269, |
|
"grad_norm": 1.426642729326135, |
|
"learning_rate": 8.243578542178227e-07, |
|
"loss": 0.4707, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.5092436974789916, |
|
"grad_norm": 1.4592108582229681, |
|
"learning_rate": 8.082499776183883e-07, |
|
"loss": 0.4845, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.5142857142857142, |
|
"grad_norm": 1.5266839034291377, |
|
"learning_rate": 7.922871822907641e-07, |
|
"loss": 0.5228, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.519327731092437, |
|
"grad_norm": 1.471645595600825, |
|
"learning_rate": 7.764700207255904e-07, |
|
"loss": 0.4173, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.5243697478991596, |
|
"grad_norm": 1.3871858021840573, |
|
"learning_rate": 7.607990403729526e-07, |
|
"loss": 0.4601, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.5294117647058822, |
|
"grad_norm": 1.3138350820905274, |
|
"learning_rate": 7.452747836234392e-07, |
|
"loss": 0.4504, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.534453781512605, |
|
"grad_norm": 1.2975304324598231, |
|
"learning_rate": 7.298977877893688e-07, |
|
"loss": 0.4265, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.5394957983193276, |
|
"grad_norm": 1.3447001192643702, |
|
"learning_rate": 7.146685850861851e-07, |
|
"loss": 0.466, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.5445378151260503, |
|
"grad_norm": 1.3862420743153665, |
|
"learning_rate": 6.995877026140468e-07, |
|
"loss": 0.4884, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.549579831932773, |
|
"grad_norm": 1.4032983423284162, |
|
"learning_rate": 6.846556623395795e-07, |
|
"loss": 0.4948, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.5546218487394956, |
|
"grad_norm": 1.362120295068725, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.4702, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.5596638655462183, |
|
"grad_norm": 1.389808913275814, |
|
"learning_rate": 6.552401704742678e-07, |
|
"loss": 0.4825, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.564705882352941, |
|
"grad_norm": 1.2860994495581453, |
|
"learning_rate": 6.40757736987307e-07, |
|
"loss": 0.4321, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.5697478991596636, |
|
"grad_norm": 1.212606448511892, |
|
"learning_rate": 6.26426181870542e-07, |
|
"loss": 0.3868, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.5747899159663863, |
|
"grad_norm": 1.2670489383748516, |
|
"learning_rate": 6.122460011555187e-07, |
|
"loss": 0.4532, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.5798319327731094, |
|
"grad_norm": 1.3801554590726837, |
|
"learning_rate": 5.982176856345445e-07, |
|
"loss": 0.4263, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 2.584873949579832, |
|
"grad_norm": 1.3394504151016333, |
|
"learning_rate": 5.843417208436908e-07, |
|
"loss": 0.496, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.5899159663865547, |
|
"grad_norm": 1.2955707760211432, |
|
"learning_rate": 5.706185870460018e-07, |
|
"loss": 0.4253, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.5949579831932774, |
|
"grad_norm": 1.289481906227215, |
|
"learning_rate": 5.570487592148666e-07, |
|
"loss": 0.4035, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 1.3376266312340062, |
|
"learning_rate": 5.436327070175729e-07, |
|
"loss": 0.4545, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.6050420168067228, |
|
"grad_norm": 1.4001675009701846, |
|
"learning_rate": 5.303708947990638e-07, |
|
"loss": 0.4684, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.6100840336134454, |
|
"grad_norm": 1.4896915805848956, |
|
"learning_rate": 5.172637815658583e-07, |
|
"loss": 0.4704, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 2.615126050420168, |
|
"grad_norm": 1.430686916061002, |
|
"learning_rate": 5.04311820970163e-07, |
|
"loss": 0.4782, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.6201680672268908, |
|
"grad_norm": 1.3676105828350056, |
|
"learning_rate": 4.915154612941781e-07, |
|
"loss": 0.5979, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 2.6252100840336134, |
|
"grad_norm": 1.3552413071380474, |
|
"learning_rate": 4.788751454345763e-07, |
|
"loss": 0.4405, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.630252100840336, |
|
"grad_norm": 1.320913107468769, |
|
"learning_rate": 4.663913108871726e-07, |
|
"loss": 0.4105, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 2.635294117647059, |
|
"grad_norm": 1.2848967010536776, |
|
"learning_rate": 4.540643897317887e-07, |
|
"loss": 0.3934, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.6403361344537815, |
|
"grad_norm": 1.3500509189164658, |
|
"learning_rate": 4.4189480861729137e-07, |
|
"loss": 0.4339, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 2.645378151260504, |
|
"grad_norm": 1.3387080610453355, |
|
"learning_rate": 4.2988298874682754e-07, |
|
"loss": 0.4552, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.650420168067227, |
|
"grad_norm": 1.3397812410356982, |
|
"learning_rate": 4.1802934586324897e-07, |
|
"loss": 0.5401, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.6554621848739495, |
|
"grad_norm": 1.446011629760243, |
|
"learning_rate": 4.0633429023472004e-07, |
|
"loss": 0.5409, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.660504201680672, |
|
"grad_norm": 1.3710949034220614, |
|
"learning_rate": 3.947982266405159e-07, |
|
"loss": 0.501, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.665546218487395, |
|
"grad_norm": 1.5073033115483478, |
|
"learning_rate": 3.834215543570191e-07, |
|
"loss": 0.5156, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.6705882352941175, |
|
"grad_norm": 1.3549599833015573, |
|
"learning_rate": 3.72204667143895e-07, |
|
"loss": 0.4667, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.6756302521008406, |
|
"grad_norm": 1.368632751852017, |
|
"learning_rate": 3.611479532304618e-07, |
|
"loss": 0.4596, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.6806722689075633, |
|
"grad_norm": 1.3310734620781681, |
|
"learning_rate": 3.5025179530225995e-07, |
|
"loss": 0.4248, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.685714285714286, |
|
"grad_norm": 1.429961991715737, |
|
"learning_rate": 3.395165704878023e-07, |
|
"loss": 0.4921, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.6907563025210086, |
|
"grad_norm": 1.3220689464603654, |
|
"learning_rate": 3.289426503455201e-07, |
|
"loss": 0.4686, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.6957983193277313, |
|
"grad_norm": 1.3596446823078556, |
|
"learning_rate": 3.185304008509077e-07, |
|
"loss": 0.4692, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.700840336134454, |
|
"grad_norm": 1.2664017870580138, |
|
"learning_rate": 3.082801823838527e-07, |
|
"loss": 0.4792, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.7058823529411766, |
|
"grad_norm": 1.277008676617942, |
|
"learning_rate": 2.9819234971616154e-07, |
|
"loss": 0.4496, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.7109243697478993, |
|
"grad_norm": 1.3031675483473417, |
|
"learning_rate": 2.882672519992824e-07, |
|
"loss": 0.4599, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.715966386554622, |
|
"grad_norm": 1.475285425023621, |
|
"learning_rate": 2.785052327522214e-07, |
|
"loss": 0.5562, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.7210084033613446, |
|
"grad_norm": 1.2387397112349467, |
|
"learning_rate": 2.6890662984965234e-07, |
|
"loss": 0.4508, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.7260504201680673, |
|
"grad_norm": 1.2769755883493084, |
|
"learning_rate": 2.594717755102205e-07, |
|
"loss": 0.4497, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.73109243697479, |
|
"grad_norm": 1.4117553058680856, |
|
"learning_rate": 2.5020099628504603e-07, |
|
"loss": 0.4176, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.7361344537815127, |
|
"grad_norm": 1.3430474164461437, |
|
"learning_rate": 2.4109461304642254e-07, |
|
"loss": 0.61, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.7411764705882353, |
|
"grad_norm": 1.319429861827343, |
|
"learning_rate": 2.3215294097670927e-07, |
|
"loss": 0.4451, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.746218487394958, |
|
"grad_norm": 1.436920605125832, |
|
"learning_rate": 2.2337628955742263e-07, |
|
"loss": 0.4874, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.7512605042016807, |
|
"grad_norm": 1.3812471581213166, |
|
"learning_rate": 2.1476496255852685e-07, |
|
"loss": 0.382, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.7563025210084033, |
|
"grad_norm": 1.205494792014491, |
|
"learning_rate": 2.0631925802791608e-07, |
|
"loss": 0.5224, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.761344537815126, |
|
"grad_norm": 1.3083334014447827, |
|
"learning_rate": 1.9803946828110376e-07, |
|
"loss": 0.5117, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.7663865546218487, |
|
"grad_norm": 1.3758887119834913, |
|
"learning_rate": 1.8992587989110133e-07, |
|
"loss": 0.4898, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.7714285714285714, |
|
"grad_norm": 1.3436017213466456, |
|
"learning_rate": 1.8197877367849948e-07, |
|
"loss": 0.5596, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.776470588235294, |
|
"grad_norm": 1.4507659924194913, |
|
"learning_rate": 1.7419842470175196e-07, |
|
"loss": 0.4889, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7815126050420167, |
|
"grad_norm": 1.5070411133243147, |
|
"learning_rate": 1.6658510224765333e-07, |
|
"loss": 0.47, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.7865546218487394, |
|
"grad_norm": 1.3934953281445221, |
|
"learning_rate": 1.5913906982201744e-07, |
|
"loss": 0.4626, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.791596638655462, |
|
"grad_norm": 1.4300047982632422, |
|
"learning_rate": 1.5186058514055912e-07, |
|
"loss": 0.4808, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.7966386554621847, |
|
"grad_norm": 1.3007207174809041, |
|
"learning_rate": 1.447499001199748e-07, |
|
"loss": 0.5228, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.8016806722689074, |
|
"grad_norm": 1.335166451449638, |
|
"learning_rate": 1.3780726086922103e-07, |
|
"loss": 0.5314, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.80672268907563, |
|
"grad_norm": 1.2727049723883297, |
|
"learning_rate": 1.3103290768099796e-07, |
|
"loss": 0.4538, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.8117647058823527, |
|
"grad_norm": 1.4233653924829766, |
|
"learning_rate": 1.244270750234333e-07, |
|
"loss": 0.4768, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.8168067226890754, |
|
"grad_norm": 1.4089563114452142, |
|
"learning_rate": 1.1798999153196433e-07, |
|
"loss": 0.4543, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.821848739495798, |
|
"grad_norm": 1.3596745441590257, |
|
"learning_rate": 1.1172188000142803e-07, |
|
"loss": 0.5016, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.8268907563025207, |
|
"grad_norm": 1.3375081145484837, |
|
"learning_rate": 1.0562295737834738e-07, |
|
"loss": 0.47, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.831932773109244, |
|
"grad_norm": 1.3797076618818533, |
|
"learning_rate": 9.969343475342285e-08, |
|
"loss": 0.4762, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.8369747899159665, |
|
"grad_norm": 1.4014527371585839, |
|
"learning_rate": 9.393351735422773e-08, |
|
"loss": 0.4606, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.842016806722689, |
|
"grad_norm": 1.317969883356561, |
|
"learning_rate": 8.834340453810375e-08, |
|
"loss": 0.4353, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.847058823529412, |
|
"grad_norm": 1.3062183016322855, |
|
"learning_rate": 8.29232897852611e-08, |
|
"loss": 0.3857, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.8521008403361345, |
|
"grad_norm": 1.3280320137002732, |
|
"learning_rate": 7.76733606920832e-08, |
|
"loss": 0.4572, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 1.4128418670110612, |
|
"learning_rate": 7.259379896463248e-08, |
|
"loss": 0.4476, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.86218487394958, |
|
"grad_norm": 1.3977595292294513, |
|
"learning_rate": 6.768478041236037e-08, |
|
"loss": 0.4436, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.8672268907563025, |
|
"grad_norm": 1.3855652086248782, |
|
"learning_rate": 6.294647494202444e-08, |
|
"loss": 0.4346, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.872268907563025, |
|
"grad_norm": 1.3251986287781006, |
|
"learning_rate": 5.8379046551807486e-08, |
|
"loss": 0.493, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.877310924369748, |
|
"grad_norm": 1.32087943884219, |
|
"learning_rate": 5.398265332563935e-08, |
|
"loss": 0.4551, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.8823529411764706, |
|
"grad_norm": 1.2437729277991256, |
|
"learning_rate": 4.975744742772848e-08, |
|
"loss": 0.4098, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.8873949579831932, |
|
"grad_norm": 1.340919476266603, |
|
"learning_rate": 4.5703575097292286e-08, |
|
"loss": 0.4726, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.892436974789916, |
|
"grad_norm": 1.2461844948007363, |
|
"learning_rate": 4.182117664349783e-08, |
|
"loss": 0.449, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.8974789915966386, |
|
"grad_norm": 1.3240662502351237, |
|
"learning_rate": 3.8110386440605164e-08, |
|
"loss": 0.4603, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.9025210084033612, |
|
"grad_norm": 1.3494315545656852, |
|
"learning_rate": 3.457133292331494e-08, |
|
"loss": 0.5058, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.907563025210084, |
|
"grad_norm": 1.3389143724686245, |
|
"learning_rate": 3.120413858232474e-08, |
|
"loss": 0.4578, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.9126050420168066, |
|
"grad_norm": 1.344475790060752, |
|
"learning_rate": 2.8008919960090253e-08, |
|
"loss": 0.5347, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.9176470588235293, |
|
"grad_norm": 1.388286539991785, |
|
"learning_rate": 2.4985787646788497e-08, |
|
"loss": 0.4792, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.9226890756302524, |
|
"grad_norm": 1.4667343155241181, |
|
"learning_rate": 2.2134846276494205e-08, |
|
"loss": 0.4854, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.927731092436975, |
|
"grad_norm": 1.393293250138424, |
|
"learning_rate": 1.9456194523554404e-08, |
|
"loss": 0.4796, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.9327731092436977, |
|
"grad_norm": 1.3210976282362301, |
|
"learning_rate": 1.69499250991767e-08, |
|
"loss": 0.4465, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.9378151260504204, |
|
"grad_norm": 1.3544687735071852, |
|
"learning_rate": 1.4616124748217387e-08, |
|
"loss": 0.5223, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.942857142857143, |
|
"grad_norm": 1.467595755846224, |
|
"learning_rate": 1.2454874246181081e-08, |
|
"loss": 0.6671, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.9478991596638657, |
|
"grad_norm": 1.3671723526105932, |
|
"learning_rate": 1.0466248396424072e-08, |
|
"loss": 0.4499, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.9529411764705884, |
|
"grad_norm": 1.4167636187504142, |
|
"learning_rate": 8.650316027566386e-09, |
|
"loss": 0.4873, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.957983193277311, |
|
"grad_norm": 1.220474765102595, |
|
"learning_rate": 7.007139991108136e-09, |
|
"loss": 0.4043, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.9630252100840337, |
|
"grad_norm": 1.3733660106334655, |
|
"learning_rate": 5.536777159254603e-09, |
|
"loss": 0.4793, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.9680672268907564, |
|
"grad_norm": 1.3544611708705747, |
|
"learning_rate": 4.239278422948911e-09, |
|
"loss": 0.4953, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.973109243697479, |
|
"grad_norm": 1.4589364978859505, |
|
"learning_rate": 3.1146886901090024e-09, |
|
"loss": 0.4547, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.9781512605042018, |
|
"grad_norm": 1.3938123480231057, |
|
"learning_rate": 2.1630468840738716e-09, |
|
"loss": 0.4115, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.9831932773109244, |
|
"grad_norm": 1.3511479563562372, |
|
"learning_rate": 1.3843859422574269e-09, |
|
"loss": 0.4926, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.988235294117647, |
|
"grad_norm": 1.445464043641677, |
|
"learning_rate": 7.787328150071771e-10, |
|
"loss": 0.5346, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.9932773109243698, |
|
"grad_norm": 1.5785257738352532, |
|
"learning_rate": 3.4610846467109106e-10, |
|
"loss": 0.5032, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.9983193277310924, |
|
"grad_norm": 1.305339383484568, |
|
"learning_rate": 8.652786487484133e-11, |
|
"loss": 0.4666, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.9983193277310924, |
|
"step": 594, |
|
"total_flos": 4.726427205490442e+17, |
|
"train_loss": 0.7082312573688199, |
|
"train_runtime": 63951.2458, |
|
"train_samples_per_second": 0.447, |
|
"train_steps_per_second": 0.009 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 594, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.726427205490442e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|