{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 500, "global_step": 12255, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012239902080783353, "grad_norm": 0.18583729171749852, "learning_rate": 4.999999917854624e-06, "loss": 0.7751, "step": 1 }, { "epoch": 0.0024479804161566705, "grad_norm": 0.20406357721085652, "learning_rate": 4.999999671418499e-06, "loss": 0.8304, "step": 2 }, { "epoch": 0.0036719706242350062, "grad_norm": 0.21291625994602287, "learning_rate": 4.999999260691644e-06, "loss": 1.0053, "step": 3 }, { "epoch": 0.004895960832313341, "grad_norm": 0.2877744427598735, "learning_rate": 4.999998685674084e-06, "loss": 0.7683, "step": 4 }, { "epoch": 0.006119951040391677, "grad_norm": 0.22350743109660962, "learning_rate": 4.999997946365856e-06, "loss": 0.9978, "step": 5 }, { "epoch": 0.0073439412484700125, "grad_norm": 0.2840586328444629, "learning_rate": 4.999997042767011e-06, "loss": 1.6078, "step": 6 }, { "epoch": 0.008567931456548347, "grad_norm": 0.23613497057256475, "learning_rate": 4.999995974877606e-06, "loss": 0.719, "step": 7 }, { "epoch": 0.009791921664626682, "grad_norm": 0.2970787719278101, "learning_rate": 4.999994742697714e-06, "loss": 0.9873, "step": 8 }, { "epoch": 0.011015911872705019, "grad_norm": 0.2070582341188991, "learning_rate": 4.999993346227413e-06, "loss": 0.6846, "step": 9 }, { "epoch": 0.012239902080783354, "grad_norm": 0.30932752611485503, "learning_rate": 4.999991785466797e-06, "loss": 1.3271, "step": 10 }, { "epoch": 0.01346389228886169, "grad_norm": 0.24978429465929539, "learning_rate": 4.999990060415967e-06, "loss": 0.6926, "step": 11 }, { "epoch": 0.014687882496940025, "grad_norm": 0.19147231300046869, "learning_rate": 4.999988171075038e-06, "loss": 0.6701, "step": 12 }, { "epoch": 0.01591187270501836, "grad_norm": 0.30225290731864235, "learning_rate": 4.999986117444132e-06, "loss": 0.7883, "step": 13 }, { "epoch": 0.017135862913096694, "grad_norm": 0.34854122308275026, "learning_rate": 4.9999838995233864e-06, "loss": 1.0646, "step": 14 }, { "epoch": 0.01835985312117503, "grad_norm": 0.35994514446338466, "learning_rate": 4.999981517312945e-06, "loss": 0.917, "step": 15 }, { "epoch": 0.019583843329253364, "grad_norm": 0.19253165865850944, "learning_rate": 4.9999789708129654e-06, "loss": 0.7728, "step": 16 }, { "epoch": 0.0208078335373317, "grad_norm": 0.25014201537622865, "learning_rate": 4.999976260023615e-06, "loss": 0.7767, "step": 17 }, { "epoch": 0.022031823745410038, "grad_norm": 0.1739379338522603, "learning_rate": 4.999973384945071e-06, "loss": 0.653, "step": 18 }, { "epoch": 0.023255813953488372, "grad_norm": 0.3584818482270971, "learning_rate": 4.999970345577521e-06, "loss": 1.2892, "step": 19 }, { "epoch": 0.02447980416156671, "grad_norm": 0.3080763209165389, "learning_rate": 4.999967141921169e-06, "loss": 1.2724, "step": 20 }, { "epoch": 0.025703794369645042, "grad_norm": 0.3078190263976052, "learning_rate": 4.999963773976223e-06, "loss": 1.5101, "step": 21 }, { "epoch": 0.02692778457772338, "grad_norm": 0.35734267049928353, "learning_rate": 4.999960241742904e-06, "loss": 1.0969, "step": 22 }, { "epoch": 0.028151774785801713, "grad_norm": 0.29870690242698555, "learning_rate": 4.9999565452214455e-06, "loss": 0.957, "step": 23 }, { "epoch": 0.02937576499388005, "grad_norm": 0.31183960915376496, "learning_rate": 4.999952684412089e-06, "loss": 0.8774, "step": 24 }, { "epoch": 0.030599755201958383, "grad_norm": 0.26729570980176937, "learning_rate": 4.999948659315088e-06, "loss": 0.9801, "step": 25 }, { "epoch": 0.03182374541003672, "grad_norm": 0.3298521691537802, "learning_rate": 4.999944469930708e-06, "loss": 1.4352, "step": 26 }, { "epoch": 0.033047735618115054, "grad_norm": 0.30511614902908596, "learning_rate": 4.9999401162592256e-06, "loss": 0.9802, "step": 27 }, { "epoch": 0.03427172582619339, "grad_norm": 0.26480266153469656, "learning_rate": 4.999935598300924e-06, "loss": 0.6735, "step": 28 }, { "epoch": 0.03549571603427173, "grad_norm": 0.26529159306625855, "learning_rate": 4.999930916056102e-06, "loss": 0.9023, "step": 29 }, { "epoch": 0.03671970624235006, "grad_norm": 0.3579345710211942, "learning_rate": 4.999926069525066e-06, "loss": 0.934, "step": 30 }, { "epoch": 0.037943696450428395, "grad_norm": 0.1866932821938979, "learning_rate": 4.999921058708136e-06, "loss": 0.7769, "step": 31 }, { "epoch": 0.03916768665850673, "grad_norm": 0.3184281967758577, "learning_rate": 4.999915883605641e-06, "loss": 0.683, "step": 32 }, { "epoch": 0.04039167686658507, "grad_norm": 0.27541521490140086, "learning_rate": 4.99991054421792e-06, "loss": 1.3672, "step": 33 }, { "epoch": 0.0416156670746634, "grad_norm": 0.2721316520895182, "learning_rate": 4.999905040545325e-06, "loss": 1.0381, "step": 34 }, { "epoch": 0.042839657282741736, "grad_norm": 0.20120575079725292, "learning_rate": 4.999899372588217e-06, "loss": 0.7746, "step": 35 }, { "epoch": 0.044063647490820076, "grad_norm": 0.27818574153822795, "learning_rate": 4.9998935403469685e-06, "loss": 0.8716, "step": 36 }, { "epoch": 0.04528763769889841, "grad_norm": 0.40361412171293226, "learning_rate": 4.999887543821963e-06, "loss": 1.0859, "step": 37 }, { "epoch": 0.046511627906976744, "grad_norm": 0.2456651991051225, "learning_rate": 4.999881383013595e-06, "loss": 1.0212, "step": 38 }, { "epoch": 0.04773561811505508, "grad_norm": 0.2060748267108159, "learning_rate": 4.999875057922269e-06, "loss": 0.8636, "step": 39 }, { "epoch": 0.04895960832313342, "grad_norm": 0.41509728031167886, "learning_rate": 4.9998685685484e-06, "loss": 0.911, "step": 40 }, { "epoch": 0.05018359853121175, "grad_norm": 0.21505365801947904, "learning_rate": 4.999861914892415e-06, "loss": 0.7291, "step": 41 }, { "epoch": 0.051407588739290085, "grad_norm": 0.3544708832311817, "learning_rate": 4.999855096954752e-06, "loss": 0.7691, "step": 42 }, { "epoch": 0.05263157894736842, "grad_norm": 0.32765260031528076, "learning_rate": 4.999848114735858e-06, "loss": 0.8236, "step": 43 }, { "epoch": 0.05385556915544676, "grad_norm": 0.3303817423046128, "learning_rate": 4.999840968236192e-06, "loss": 0.8613, "step": 44 }, { "epoch": 0.05507955936352509, "grad_norm": 0.2572617478071188, "learning_rate": 4.999833657456224e-06, "loss": 0.874, "step": 45 }, { "epoch": 0.056303549571603426, "grad_norm": 0.31006055814937555, "learning_rate": 4.999826182396435e-06, "loss": 0.8179, "step": 46 }, { "epoch": 0.05752753977968176, "grad_norm": 0.2656685858221822, "learning_rate": 4.9998185430573145e-06, "loss": 0.767, "step": 47 }, { "epoch": 0.0587515299877601, "grad_norm": 0.36661459170923244, "learning_rate": 4.999810739439365e-06, "loss": 0.8963, "step": 48 }, { "epoch": 0.05997552019583843, "grad_norm": 0.26792611459222554, "learning_rate": 4.999802771543101e-06, "loss": 0.89, "step": 49 }, { "epoch": 0.06119951040391677, "grad_norm": 0.48121373293033187, "learning_rate": 4.999794639369045e-06, "loss": 1.3291, "step": 50 }, { "epoch": 0.06242350061199511, "grad_norm": 0.26272074677420965, "learning_rate": 4.99978634291773e-06, "loss": 0.6825, "step": 51 }, { "epoch": 0.06364749082007344, "grad_norm": 0.2598096821654224, "learning_rate": 4.999777882189705e-06, "loss": 0.6748, "step": 52 }, { "epoch": 0.06487148102815178, "grad_norm": 0.2668709897768842, "learning_rate": 4.999769257185522e-06, "loss": 0.9461, "step": 53 }, { "epoch": 0.06609547123623011, "grad_norm": 0.3375475064373023, "learning_rate": 4.99976046790575e-06, "loss": 0.8876, "step": 54 }, { "epoch": 0.06731946144430845, "grad_norm": 0.4198840161163024, "learning_rate": 4.999751514350966e-06, "loss": 1.6459, "step": 55 }, { "epoch": 0.06854345165238677, "grad_norm": 0.34369233553235295, "learning_rate": 4.999742396521758e-06, "loss": 0.7366, "step": 56 }, { "epoch": 0.06976744186046512, "grad_norm": 0.40703261331120943, "learning_rate": 4.999733114418726e-06, "loss": 1.3698, "step": 57 }, { "epoch": 0.07099143206854346, "grad_norm": 0.22293367791017168, "learning_rate": 4.999723668042479e-06, "loss": 0.4068, "step": 58 }, { "epoch": 0.07221542227662178, "grad_norm": 0.4008775770379318, "learning_rate": 4.9997140573936385e-06, "loss": 0.7147, "step": 59 }, { "epoch": 0.07343941248470012, "grad_norm": 0.4194534781140063, "learning_rate": 4.9997042824728365e-06, "loss": 0.9667, "step": 60 }, { "epoch": 0.07466340269277846, "grad_norm": 0.42035518786031906, "learning_rate": 4.9996943432807144e-06, "loss": 0.9308, "step": 61 }, { "epoch": 0.07588739290085679, "grad_norm": 0.29454843150959004, "learning_rate": 4.999684239817925e-06, "loss": 0.9713, "step": 62 }, { "epoch": 0.07711138310893513, "grad_norm": 0.33329763298008036, "learning_rate": 4.999673972085135e-06, "loss": 1.021, "step": 63 }, { "epoch": 0.07833537331701346, "grad_norm": 0.29752155694550203, "learning_rate": 4.999663540083016e-06, "loss": 0.6485, "step": 64 }, { "epoch": 0.0795593635250918, "grad_norm": 0.3947136376006826, "learning_rate": 4.999652943812255e-06, "loss": 1.7957, "step": 65 }, { "epoch": 0.08078335373317014, "grad_norm": 0.20583114475708844, "learning_rate": 4.999642183273549e-06, "loss": 0.6997, "step": 66 }, { "epoch": 0.08200734394124846, "grad_norm": 0.18289509535433704, "learning_rate": 4.999631258467603e-06, "loss": 0.4703, "step": 67 }, { "epoch": 0.0832313341493268, "grad_norm": 0.2274688855191698, "learning_rate": 4.999620169395136e-06, "loss": 0.7131, "step": 68 }, { "epoch": 0.08445532435740515, "grad_norm": 0.35792525848819423, "learning_rate": 4.9996089160568775e-06, "loss": 0.8018, "step": 69 }, { "epoch": 0.08567931456548347, "grad_norm": 0.3448967587172274, "learning_rate": 4.999597498453567e-06, "loss": 0.8188, "step": 70 }, { "epoch": 0.08690330477356181, "grad_norm": 0.3715378107688424, "learning_rate": 4.9995859165859525e-06, "loss": 0.7089, "step": 71 }, { "epoch": 0.08812729498164015, "grad_norm": 0.2314946710165429, "learning_rate": 4.999574170454799e-06, "loss": 0.7165, "step": 72 }, { "epoch": 0.08935128518971848, "grad_norm": 0.5062107082444923, "learning_rate": 4.999562260060875e-06, "loss": 0.7468, "step": 73 }, { "epoch": 0.09057527539779682, "grad_norm": 0.3613440311794418, "learning_rate": 4.999550185404965e-06, "loss": 1.1806, "step": 74 }, { "epoch": 0.09179926560587515, "grad_norm": 0.36850743951287973, "learning_rate": 4.999537946487862e-06, "loss": 1.3959, "step": 75 }, { "epoch": 0.09302325581395349, "grad_norm": 0.2735739225496828, "learning_rate": 4.999525543310369e-06, "loss": 1.0208, "step": 76 }, { "epoch": 0.09424724602203183, "grad_norm": 0.31992463657244324, "learning_rate": 4.999512975873305e-06, "loss": 0.6857, "step": 77 }, { "epoch": 0.09547123623011015, "grad_norm": 0.19353299905849947, "learning_rate": 4.999500244177491e-06, "loss": 0.5956, "step": 78 }, { "epoch": 0.0966952264381885, "grad_norm": 0.3590515193305031, "learning_rate": 4.999487348223767e-06, "loss": 1.8687, "step": 79 }, { "epoch": 0.09791921664626684, "grad_norm": 0.30964026487168866, "learning_rate": 4.99947428801298e-06, "loss": 0.7706, "step": 80 }, { "epoch": 0.09914320685434516, "grad_norm": 0.5067456087223579, "learning_rate": 4.9994610635459865e-06, "loss": 0.9452, "step": 81 }, { "epoch": 0.1003671970624235, "grad_norm": 0.40985466873086296, "learning_rate": 4.999447674823657e-06, "loss": 0.8859, "step": 82 }, { "epoch": 0.10159118727050184, "grad_norm": 0.39190374257953436, "learning_rate": 4.9994341218468715e-06, "loss": 1.3009, "step": 83 }, { "epoch": 0.10281517747858017, "grad_norm": 0.28923539054173913, "learning_rate": 4.99942040461652e-06, "loss": 0.7321, "step": 84 }, { "epoch": 0.10403916768665851, "grad_norm": 0.2722920220496856, "learning_rate": 4.999406523133504e-06, "loss": 0.7567, "step": 85 }, { "epoch": 0.10526315789473684, "grad_norm": 0.3277663148624042, "learning_rate": 4.999392477398737e-06, "loss": 0.8629, "step": 86 }, { "epoch": 0.10648714810281518, "grad_norm": 0.5195962241294879, "learning_rate": 4.99937826741314e-06, "loss": 1.257, "step": 87 }, { "epoch": 0.10771113831089352, "grad_norm": 0.3903200576204231, "learning_rate": 4.999363893177648e-06, "loss": 1.4092, "step": 88 }, { "epoch": 0.10893512851897184, "grad_norm": 0.4906747473421015, "learning_rate": 4.999349354693206e-06, "loss": 1.6772, "step": 89 }, { "epoch": 0.11015911872705018, "grad_norm": 0.3121759660794242, "learning_rate": 4.9993346519607685e-06, "loss": 1.8001, "step": 90 }, { "epoch": 0.11138310893512852, "grad_norm": 0.2892084262986656, "learning_rate": 4.999319784981302e-06, "loss": 0.6667, "step": 91 }, { "epoch": 0.11260709914320685, "grad_norm": 0.26490607637177427, "learning_rate": 4.999304753755784e-06, "loss": 0.8763, "step": 92 }, { "epoch": 0.11383108935128519, "grad_norm": 0.18640634686040375, "learning_rate": 4.9992895582852e-06, "loss": 0.5593, "step": 93 }, { "epoch": 0.11505507955936352, "grad_norm": 0.2748875373849093, "learning_rate": 4.9992741985705524e-06, "loss": 0.5294, "step": 94 }, { "epoch": 0.11627906976744186, "grad_norm": 0.6441254262036021, "learning_rate": 4.999258674612849e-06, "loss": 0.8691, "step": 95 }, { "epoch": 0.1175030599755202, "grad_norm": 0.5235182704075505, "learning_rate": 4.999242986413108e-06, "loss": 1.1176, "step": 96 }, { "epoch": 0.11872705018359853, "grad_norm": 0.3556262696346673, "learning_rate": 4.999227133972364e-06, "loss": 0.6873, "step": 97 }, { "epoch": 0.11995104039167687, "grad_norm": 0.34559006466503145, "learning_rate": 4.999211117291655e-06, "loss": 0.6915, "step": 98 }, { "epoch": 0.1211750305997552, "grad_norm": 0.4455493985241837, "learning_rate": 4.999194936372036e-06, "loss": 0.7893, "step": 99 }, { "epoch": 0.12239902080783353, "grad_norm": 0.3422702470959658, "learning_rate": 4.99917859121457e-06, "loss": 0.7701, "step": 100 }, { "epoch": 0.12362301101591187, "grad_norm": 0.4425943797862132, "learning_rate": 4.99916208182033e-06, "loss": 0.7172, "step": 101 }, { "epoch": 0.12484700122399021, "grad_norm": 0.40506374911245696, "learning_rate": 4.9991454081904025e-06, "loss": 1.2405, "step": 102 }, { "epoch": 0.12607099143206854, "grad_norm": 0.28721291352960154, "learning_rate": 4.999128570325882e-06, "loss": 0.7086, "step": 103 }, { "epoch": 0.12729498164014688, "grad_norm": 0.42411263793572984, "learning_rate": 4.999111568227875e-06, "loss": 0.9167, "step": 104 }, { "epoch": 0.12851897184822522, "grad_norm": 0.46092441192090267, "learning_rate": 4.9990944018975e-06, "loss": 0.7406, "step": 105 }, { "epoch": 0.12974296205630356, "grad_norm": 0.36830448393020426, "learning_rate": 4.999077071335885e-06, "loss": 0.8869, "step": 106 }, { "epoch": 0.13096695226438188, "grad_norm": 0.44533454781577375, "learning_rate": 4.999059576544167e-06, "loss": 0.807, "step": 107 }, { "epoch": 0.13219094247246022, "grad_norm": 0.28515020625555076, "learning_rate": 4.999041917523497e-06, "loss": 0.7689, "step": 108 }, { "epoch": 0.13341493268053856, "grad_norm": 0.634481287771021, "learning_rate": 4.999024094275037e-06, "loss": 0.7283, "step": 109 }, { "epoch": 0.1346389228886169, "grad_norm": 0.39843886467027073, "learning_rate": 4.9990061067999555e-06, "loss": 0.8053, "step": 110 }, { "epoch": 0.13586291309669524, "grad_norm": 0.4253410945943641, "learning_rate": 4.998987955099436e-06, "loss": 0.7736, "step": 111 }, { "epoch": 0.13708690330477355, "grad_norm": 0.36722373631265515, "learning_rate": 4.998969639174671e-06, "loss": 0.9464, "step": 112 }, { "epoch": 0.1383108935128519, "grad_norm": 0.2732427884721167, "learning_rate": 4.998951159026866e-06, "loss": 0.833, "step": 113 }, { "epoch": 0.13953488372093023, "grad_norm": 0.34663595616055065, "learning_rate": 4.998932514657232e-06, "loss": 0.7257, "step": 114 }, { "epoch": 0.14075887392900857, "grad_norm": 0.5998368754726062, "learning_rate": 4.998913706066997e-06, "loss": 0.7323, "step": 115 }, { "epoch": 0.1419828641370869, "grad_norm": 0.4473463614973645, "learning_rate": 4.9988947332573955e-06, "loss": 1.2615, "step": 116 }, { "epoch": 0.14320685434516525, "grad_norm": 0.4486765780437808, "learning_rate": 4.998875596229675e-06, "loss": 1.5905, "step": 117 }, { "epoch": 0.14443084455324356, "grad_norm": 0.4219723228963629, "learning_rate": 4.998856294985093e-06, "loss": 0.8601, "step": 118 }, { "epoch": 0.1456548347613219, "grad_norm": 0.2506786547797268, "learning_rate": 4.998836829524918e-06, "loss": 0.6768, "step": 119 }, { "epoch": 0.14687882496940025, "grad_norm": 0.4501037316082423, "learning_rate": 4.99881719985043e-06, "loss": 0.8671, "step": 120 }, { "epoch": 0.1481028151774786, "grad_norm": 0.44614537792632397, "learning_rate": 4.9987974059629165e-06, "loss": 0.6588, "step": 121 }, { "epoch": 0.14932680538555693, "grad_norm": 0.2563130623514102, "learning_rate": 4.998777447863681e-06, "loss": 0.5567, "step": 122 }, { "epoch": 0.15055079559363524, "grad_norm": 0.4199130939198221, "learning_rate": 4.998757325554034e-06, "loss": 0.9347, "step": 123 }, { "epoch": 0.15177478580171358, "grad_norm": 0.3361919551388072, "learning_rate": 4.998737039035297e-06, "loss": 1.129, "step": 124 }, { "epoch": 0.15299877600979192, "grad_norm": 0.2960923862167825, "learning_rate": 4.998716588308805e-06, "loss": 0.5396, "step": 125 }, { "epoch": 0.15422276621787026, "grad_norm": 0.37670334524566873, "learning_rate": 4.9986959733759e-06, "loss": 0.7446, "step": 126 }, { "epoch": 0.1554467564259486, "grad_norm": 0.37407387501653966, "learning_rate": 4.998675194237939e-06, "loss": 1.2103, "step": 127 }, { "epoch": 0.15667074663402691, "grad_norm": 0.34935983601064924, "learning_rate": 4.9986542508962845e-06, "loss": 0.7557, "step": 128 }, { "epoch": 0.15789473684210525, "grad_norm": 0.2707127564841584, "learning_rate": 4.998633143352315e-06, "loss": 0.7545, "step": 129 }, { "epoch": 0.1591187270501836, "grad_norm": 0.24472003358768396, "learning_rate": 4.998611871607418e-06, "loss": 0.6538, "step": 130 }, { "epoch": 0.16034271725826194, "grad_norm": 0.347989151027408, "learning_rate": 4.99859043566299e-06, "loss": 0.6373, "step": 131 }, { "epoch": 0.16156670746634028, "grad_norm": 0.3410028328075718, "learning_rate": 4.9985688355204395e-06, "loss": 0.6982, "step": 132 }, { "epoch": 0.16279069767441862, "grad_norm": 0.4622243517751799, "learning_rate": 4.998547071181188e-06, "loss": 0.7655, "step": 133 }, { "epoch": 0.16401468788249693, "grad_norm": 0.24536179750121562, "learning_rate": 4.9985251426466635e-06, "loss": 0.6953, "step": 134 }, { "epoch": 0.16523867809057527, "grad_norm": 0.2593736638478067, "learning_rate": 4.998503049918308e-06, "loss": 0.6774, "step": 135 }, { "epoch": 0.1664626682986536, "grad_norm": 0.3122150544885731, "learning_rate": 4.998480792997574e-06, "loss": 0.6568, "step": 136 }, { "epoch": 0.16768665850673195, "grad_norm": 0.49403594015867824, "learning_rate": 4.998458371885924e-06, "loss": 1.3702, "step": 137 }, { "epoch": 0.1689106487148103, "grad_norm": 0.49563883047758617, "learning_rate": 4.99843578658483e-06, "loss": 1.5568, "step": 138 }, { "epoch": 0.1701346389228886, "grad_norm": 0.5328672596622324, "learning_rate": 4.9984130370957774e-06, "loss": 0.7061, "step": 139 }, { "epoch": 0.17135862913096694, "grad_norm": 0.4616635577958037, "learning_rate": 4.998390123420261e-06, "loss": 1.3856, "step": 140 }, { "epoch": 0.17258261933904528, "grad_norm": 0.42410787586749155, "learning_rate": 4.998367045559786e-06, "loss": 0.8944, "step": 141 }, { "epoch": 0.17380660954712362, "grad_norm": 0.3157157425050123, "learning_rate": 4.998343803515871e-06, "loss": 0.8737, "step": 142 }, { "epoch": 0.17503059975520197, "grad_norm": 0.39421935061211766, "learning_rate": 4.9983203972900404e-06, "loss": 0.6572, "step": 143 }, { "epoch": 0.1762545899632803, "grad_norm": 0.36976427689507774, "learning_rate": 4.998296826883835e-06, "loss": 0.8748, "step": 144 }, { "epoch": 0.17747858017135862, "grad_norm": 0.37479242639243654, "learning_rate": 4.998273092298802e-06, "loss": 0.5115, "step": 145 }, { "epoch": 0.17870257037943696, "grad_norm": 0.4514158745969038, "learning_rate": 4.998249193536502e-06, "loss": 0.7835, "step": 146 }, { "epoch": 0.1799265605875153, "grad_norm": 0.4951291207657622, "learning_rate": 4.998225130598506e-06, "loss": 0.6473, "step": 147 }, { "epoch": 0.18115055079559364, "grad_norm": 0.3386131828475296, "learning_rate": 4.9982009034863945e-06, "loss": 0.8011, "step": 148 }, { "epoch": 0.18237454100367198, "grad_norm": 0.3228627462553721, "learning_rate": 4.998176512201759e-06, "loss": 0.8726, "step": 149 }, { "epoch": 0.1835985312117503, "grad_norm": 0.5854932496433183, "learning_rate": 4.998151956746204e-06, "loss": 0.8107, "step": 150 }, { "epoch": 0.18482252141982863, "grad_norm": 0.4399488624127159, "learning_rate": 4.998127237121343e-06, "loss": 1.3297, "step": 151 }, { "epoch": 0.18604651162790697, "grad_norm": 0.7223478344080757, "learning_rate": 4.998102353328799e-06, "loss": 0.7384, "step": 152 }, { "epoch": 0.18727050183598531, "grad_norm": 0.4119591602064238, "learning_rate": 4.998077305370208e-06, "loss": 0.7381, "step": 153 }, { "epoch": 0.18849449204406366, "grad_norm": 0.3857796813404751, "learning_rate": 4.998052093247216e-06, "loss": 1.1831, "step": 154 }, { "epoch": 0.189718482252142, "grad_norm": 0.6318475758628039, "learning_rate": 4.998026716961482e-06, "loss": 0.6631, "step": 155 }, { "epoch": 0.1909424724602203, "grad_norm": 0.32050742437596175, "learning_rate": 4.998001176514669e-06, "loss": 0.6584, "step": 156 }, { "epoch": 0.19216646266829865, "grad_norm": 0.3915227502604706, "learning_rate": 4.9979754719084605e-06, "loss": 1.106, "step": 157 }, { "epoch": 0.193390452876377, "grad_norm": 0.7184424755359442, "learning_rate": 4.997949603144543e-06, "loss": 0.6375, "step": 158 }, { "epoch": 0.19461444308445533, "grad_norm": 0.5318068215351058, "learning_rate": 4.997923570224616e-06, "loss": 1.2265, "step": 159 }, { "epoch": 0.19583843329253367, "grad_norm": 0.5495236392930165, "learning_rate": 4.997897373150391e-06, "loss": 1.068, "step": 160 }, { "epoch": 0.19706242350061198, "grad_norm": 0.4896808464872852, "learning_rate": 4.99787101192359e-06, "loss": 1.107, "step": 161 }, { "epoch": 0.19828641370869032, "grad_norm": 0.4592888365279043, "learning_rate": 4.997844486545945e-06, "loss": 0.5706, "step": 162 }, { "epoch": 0.19951040391676866, "grad_norm": 0.37159977396263055, "learning_rate": 4.9978177970192e-06, "loss": 0.9603, "step": 163 }, { "epoch": 0.200734394124847, "grad_norm": 0.3919580648883766, "learning_rate": 4.997790943345108e-06, "loss": 1.1347, "step": 164 }, { "epoch": 0.20195838433292534, "grad_norm": 0.4848483989999201, "learning_rate": 4.997763925525433e-06, "loss": 0.9064, "step": 165 }, { "epoch": 0.20318237454100369, "grad_norm": 0.2445361529142901, "learning_rate": 4.997736743561953e-06, "loss": 0.4401, "step": 166 }, { "epoch": 0.204406364749082, "grad_norm": 0.49908539255042084, "learning_rate": 4.9977093974564505e-06, "loss": 0.8189, "step": 167 }, { "epoch": 0.20563035495716034, "grad_norm": 0.30279213170087316, "learning_rate": 4.997681887210726e-06, "loss": 0.9745, "step": 168 }, { "epoch": 0.20685434516523868, "grad_norm": 0.5715880439135204, "learning_rate": 4.997654212826586e-06, "loss": 0.6207, "step": 169 }, { "epoch": 0.20807833537331702, "grad_norm": 0.27475549346668576, "learning_rate": 4.99762637430585e-06, "loss": 0.6313, "step": 170 }, { "epoch": 0.20930232558139536, "grad_norm": 0.8468479347937827, "learning_rate": 4.997598371650346e-06, "loss": 1.3491, "step": 171 }, { "epoch": 0.21052631578947367, "grad_norm": 0.30388117983313656, "learning_rate": 4.9975702048619155e-06, "loss": 0.638, "step": 172 }, { "epoch": 0.211750305997552, "grad_norm": 0.5767394139051092, "learning_rate": 4.997541873942409e-06, "loss": 0.8063, "step": 173 }, { "epoch": 0.21297429620563035, "grad_norm": 0.5787388882265994, "learning_rate": 4.9975133788936876e-06, "loss": 1.3126, "step": 174 }, { "epoch": 0.2141982864137087, "grad_norm": 0.38625235096294347, "learning_rate": 4.997484719717624e-06, "loss": 0.8975, "step": 175 }, { "epoch": 0.21542227662178703, "grad_norm": 0.3826102878704471, "learning_rate": 4.997455896416104e-06, "loss": 0.4821, "step": 176 }, { "epoch": 0.21664626682986537, "grad_norm": 0.5100533216743708, "learning_rate": 4.997426908991019e-06, "loss": 0.6693, "step": 177 }, { "epoch": 0.2178702570379437, "grad_norm": 0.3240272144167246, "learning_rate": 4.997397757444275e-06, "loss": 0.75, "step": 178 }, { "epoch": 0.21909424724602203, "grad_norm": 0.5071827270438135, "learning_rate": 4.997368441777788e-06, "loss": 1.3654, "step": 179 }, { "epoch": 0.22031823745410037, "grad_norm": 0.28815113962935535, "learning_rate": 4.997338961993483e-06, "loss": 0.6788, "step": 180 }, { "epoch": 0.2215422276621787, "grad_norm": 0.49956327183259586, "learning_rate": 4.9973093180932995e-06, "loss": 0.7429, "step": 181 }, { "epoch": 0.22276621787025705, "grad_norm": 0.2905320526365547, "learning_rate": 4.997279510079184e-06, "loss": 0.6445, "step": 182 }, { "epoch": 0.22399020807833536, "grad_norm": 0.5239616159135687, "learning_rate": 4.997249537953096e-06, "loss": 0.5992, "step": 183 }, { "epoch": 0.2252141982864137, "grad_norm": 0.4648600472659148, "learning_rate": 4.997219401717005e-06, "loss": 0.7933, "step": 184 }, { "epoch": 0.22643818849449204, "grad_norm": 0.47836539467822914, "learning_rate": 4.997189101372892e-06, "loss": 0.78, "step": 185 }, { "epoch": 0.22766217870257038, "grad_norm": 0.5689519798620852, "learning_rate": 4.997158636922748e-06, "loss": 1.8561, "step": 186 }, { "epoch": 0.22888616891064872, "grad_norm": 0.3249181825098272, "learning_rate": 4.997128008368574e-06, "loss": 0.5759, "step": 187 }, { "epoch": 0.23011015911872704, "grad_norm": 0.43628931796956344, "learning_rate": 4.997097215712383e-06, "loss": 1.2084, "step": 188 }, { "epoch": 0.23133414932680538, "grad_norm": 0.5661250013530805, "learning_rate": 4.9970662589562004e-06, "loss": 1.0399, "step": 189 }, { "epoch": 0.23255813953488372, "grad_norm": 0.5227652020785303, "learning_rate": 4.997035138102057e-06, "loss": 1.7628, "step": 190 }, { "epoch": 0.23378212974296206, "grad_norm": 0.3039968524820034, "learning_rate": 4.997003853152003e-06, "loss": 0.7608, "step": 191 }, { "epoch": 0.2350061199510404, "grad_norm": 0.5004286280906974, "learning_rate": 4.99697240410809e-06, "loss": 0.7315, "step": 192 }, { "epoch": 0.23623011015911874, "grad_norm": 0.5471560096499389, "learning_rate": 4.9969407909723865e-06, "loss": 0.6934, "step": 193 }, { "epoch": 0.23745410036719705, "grad_norm": 0.41295993902503664, "learning_rate": 4.99690901374697e-06, "loss": 0.7179, "step": 194 }, { "epoch": 0.2386780905752754, "grad_norm": 0.3894074261579104, "learning_rate": 4.9968770724339284e-06, "loss": 1.2983, "step": 195 }, { "epoch": 0.23990208078335373, "grad_norm": 0.33281237615234205, "learning_rate": 4.996844967035361e-06, "loss": 0.7737, "step": 196 }, { "epoch": 0.24112607099143207, "grad_norm": 0.6370346789944287, "learning_rate": 4.996812697553378e-06, "loss": 1.5354, "step": 197 }, { "epoch": 0.2423500611995104, "grad_norm": 0.40147533713216516, "learning_rate": 4.996780263990099e-06, "loss": 1.4154, "step": 198 }, { "epoch": 0.24357405140758873, "grad_norm": 0.4736800171228452, "learning_rate": 4.996747666347655e-06, "loss": 0.8009, "step": 199 }, { "epoch": 0.24479804161566707, "grad_norm": 0.3341724444131733, "learning_rate": 4.996714904628191e-06, "loss": 0.7838, "step": 200 }, { "epoch": 0.2460220318237454, "grad_norm": 0.5437138362384825, "learning_rate": 4.996681978833857e-06, "loss": 0.9131, "step": 201 }, { "epoch": 0.24724602203182375, "grad_norm": 0.4218633779837018, "learning_rate": 4.996648888966819e-06, "loss": 0.7337, "step": 202 }, { "epoch": 0.2484700122399021, "grad_norm": 0.284889446000958, "learning_rate": 4.9966156350292495e-06, "loss": 0.7668, "step": 203 }, { "epoch": 0.24969400244798043, "grad_norm": 0.4141797542372401, "learning_rate": 4.996582217023336e-06, "loss": 0.9985, "step": 204 }, { "epoch": 0.25091799265605874, "grad_norm": 0.5678675624576675, "learning_rate": 4.996548634951272e-06, "loss": 0.7437, "step": 205 }, { "epoch": 0.2521419828641371, "grad_norm": 0.5162332751942637, "learning_rate": 4.996514888815267e-06, "loss": 1.2859, "step": 206 }, { "epoch": 0.2533659730722154, "grad_norm": 0.4395001801952133, "learning_rate": 4.996480978617536e-06, "loss": 0.7203, "step": 207 }, { "epoch": 0.25458996328029376, "grad_norm": 0.533497126973168, "learning_rate": 4.99644690436031e-06, "loss": 0.7629, "step": 208 }, { "epoch": 0.2558139534883721, "grad_norm": 0.36556328854877673, "learning_rate": 4.996412666045827e-06, "loss": 0.792, "step": 209 }, { "epoch": 0.25703794369645044, "grad_norm": 0.5027566620987959, "learning_rate": 4.996378263676337e-06, "loss": 0.7591, "step": 210 }, { "epoch": 0.2582619339045288, "grad_norm": 0.4690908461976835, "learning_rate": 4.996343697254101e-06, "loss": 1.4626, "step": 211 }, { "epoch": 0.2594859241126071, "grad_norm": 0.485475298306494, "learning_rate": 4.996308966781391e-06, "loss": 1.2105, "step": 212 }, { "epoch": 0.2607099143206854, "grad_norm": 0.4049909223288257, "learning_rate": 4.996274072260489e-06, "loss": 0.6471, "step": 213 }, { "epoch": 0.26193390452876375, "grad_norm": 0.43623035400909826, "learning_rate": 4.9962390136936875e-06, "loss": 0.7631, "step": 214 }, { "epoch": 0.2631578947368421, "grad_norm": 0.4328970092945313, "learning_rate": 4.996203791083291e-06, "loss": 0.8536, "step": 215 }, { "epoch": 0.26438188494492043, "grad_norm": 0.4129059493539081, "learning_rate": 4.996168404431614e-06, "loss": 1.1369, "step": 216 }, { "epoch": 0.26560587515299877, "grad_norm": 0.5512147023001712, "learning_rate": 4.996132853740983e-06, "loss": 0.603, "step": 217 }, { "epoch": 0.2668298653610771, "grad_norm": 0.32062319796630956, "learning_rate": 4.996097139013732e-06, "loss": 0.5787, "step": 218 }, { "epoch": 0.26805385556915545, "grad_norm": 0.5885131760628513, "learning_rate": 4.996061260252211e-06, "loss": 0.9231, "step": 219 }, { "epoch": 0.2692778457772338, "grad_norm": 0.44250343208886816, "learning_rate": 4.9960252174587755e-06, "loss": 0.869, "step": 220 }, { "epoch": 0.27050183598531213, "grad_norm": 0.6770444474548993, "learning_rate": 4.995989010635795e-06, "loss": 0.7859, "step": 221 }, { "epoch": 0.2717258261933905, "grad_norm": 0.41483569214928795, "learning_rate": 4.995952639785648e-06, "loss": 0.7092, "step": 222 }, { "epoch": 0.2729498164014688, "grad_norm": 0.45009232931162785, "learning_rate": 4.995916104910726e-06, "loss": 0.5354, "step": 223 }, { "epoch": 0.2741738066095471, "grad_norm": 0.43389200474036227, "learning_rate": 4.99587940601343e-06, "loss": 0.6763, "step": 224 }, { "epoch": 0.27539779681762544, "grad_norm": 0.3495561501216181, "learning_rate": 4.995842543096171e-06, "loss": 0.9284, "step": 225 }, { "epoch": 0.2766217870257038, "grad_norm": 0.6919048432012822, "learning_rate": 4.9958055161613706e-06, "loss": 0.7265, "step": 226 }, { "epoch": 0.2778457772337821, "grad_norm": 0.34441159988406594, "learning_rate": 4.995768325211463e-06, "loss": 0.5909, "step": 227 }, { "epoch": 0.27906976744186046, "grad_norm": 0.47574339841571883, "learning_rate": 4.995730970248893e-06, "loss": 0.606, "step": 228 }, { "epoch": 0.2802937576499388, "grad_norm": 0.4941533686083936, "learning_rate": 4.9956934512761145e-06, "loss": 1.5199, "step": 229 }, { "epoch": 0.28151774785801714, "grad_norm": 0.3742334713270821, "learning_rate": 4.995655768295593e-06, "loss": 0.7398, "step": 230 }, { "epoch": 0.2827417380660955, "grad_norm": 0.45036144161461145, "learning_rate": 4.995617921309806e-06, "loss": 0.6636, "step": 231 }, { "epoch": 0.2839657282741738, "grad_norm": 0.40031611547530066, "learning_rate": 4.995579910321239e-06, "loss": 0.7104, "step": 232 }, { "epoch": 0.28518971848225216, "grad_norm": 0.374237680233108, "learning_rate": 4.995541735332391e-06, "loss": 0.9785, "step": 233 }, { "epoch": 0.2864137086903305, "grad_norm": 0.509762012418007, "learning_rate": 4.995503396345771e-06, "loss": 0.7464, "step": 234 }, { "epoch": 0.2876376988984088, "grad_norm": 0.5496727138740468, "learning_rate": 4.9954648933638975e-06, "loss": 0.7652, "step": 235 }, { "epoch": 0.28886168910648713, "grad_norm": 0.4188285414593592, "learning_rate": 4.995426226389301e-06, "loss": 0.8009, "step": 236 }, { "epoch": 0.29008567931456547, "grad_norm": 0.32169460293596425, "learning_rate": 4.995387395424523e-06, "loss": 0.7628, "step": 237 }, { "epoch": 0.2913096695226438, "grad_norm": 0.464132936363322, "learning_rate": 4.9953484004721155e-06, "loss": 0.6802, "step": 238 }, { "epoch": 0.29253365973072215, "grad_norm": 0.3539045069204836, "learning_rate": 4.995309241534641e-06, "loss": 0.7423, "step": 239 }, { "epoch": 0.2937576499388005, "grad_norm": 0.5825407156686288, "learning_rate": 4.995269918614672e-06, "loss": 0.5653, "step": 240 }, { "epoch": 0.29498164014687883, "grad_norm": 0.4627979340308164, "learning_rate": 4.995230431714794e-06, "loss": 0.7908, "step": 241 }, { "epoch": 0.2962056303549572, "grad_norm": 0.5766648265598003, "learning_rate": 4.995190780837601e-06, "loss": 0.9683, "step": 242 }, { "epoch": 0.2974296205630355, "grad_norm": 0.45446441684394123, "learning_rate": 4.995150965985699e-06, "loss": 0.8602, "step": 243 }, { "epoch": 0.29865361077111385, "grad_norm": 0.6837464625447014, "learning_rate": 4.9951109871617045e-06, "loss": 0.5667, "step": 244 }, { "epoch": 0.2998776009791922, "grad_norm": 0.41141226748470483, "learning_rate": 4.9950708443682446e-06, "loss": 0.6613, "step": 245 }, { "epoch": 0.3011015911872705, "grad_norm": 0.43729198998748986, "learning_rate": 4.995030537607957e-06, "loss": 0.7126, "step": 246 }, { "epoch": 0.3023255813953488, "grad_norm": 0.6110751343503649, "learning_rate": 4.9949900668834916e-06, "loss": 0.7024, "step": 247 }, { "epoch": 0.30354957160342716, "grad_norm": 0.5053383323646585, "learning_rate": 4.994949432197507e-06, "loss": 0.6215, "step": 248 }, { "epoch": 0.3047735618115055, "grad_norm": 0.5390976496088581, "learning_rate": 4.994908633552674e-06, "loss": 1.1544, "step": 249 }, { "epoch": 0.30599755201958384, "grad_norm": 0.5491463550819378, "learning_rate": 4.994867670951674e-06, "loss": 0.4781, "step": 250 }, { "epoch": 0.3072215422276622, "grad_norm": 0.6831890373805769, "learning_rate": 4.994826544397198e-06, "loss": 0.9995, "step": 251 }, { "epoch": 0.3084455324357405, "grad_norm": 0.6019644145564317, "learning_rate": 4.994785253891949e-06, "loss": 1.3746, "step": 252 }, { "epoch": 0.30966952264381886, "grad_norm": 0.464487943349067, "learning_rate": 4.994743799438641e-06, "loss": 0.6875, "step": 253 }, { "epoch": 0.3108935128518972, "grad_norm": 0.5497408102914363, "learning_rate": 4.9947021810399986e-06, "loss": 0.6446, "step": 254 }, { "epoch": 0.31211750305997554, "grad_norm": 0.829023176350708, "learning_rate": 4.994660398698755e-06, "loss": 1.0661, "step": 255 }, { "epoch": 0.31334149326805383, "grad_norm": 0.3986199249741592, "learning_rate": 4.9946184524176576e-06, "loss": 0.6443, "step": 256 }, { "epoch": 0.31456548347613217, "grad_norm": 0.4633947127041751, "learning_rate": 4.994576342199462e-06, "loss": 0.6902, "step": 257 }, { "epoch": 0.3157894736842105, "grad_norm": 0.47760380163452454, "learning_rate": 4.994534068046936e-06, "loss": 0.644, "step": 258 }, { "epoch": 0.31701346389228885, "grad_norm": 1.0075439480350235, "learning_rate": 4.994491629962859e-06, "loss": 0.6522, "step": 259 }, { "epoch": 0.3182374541003672, "grad_norm": 0.4689984187228857, "learning_rate": 4.994449027950018e-06, "loss": 0.7934, "step": 260 }, { "epoch": 0.31946144430844553, "grad_norm": 0.5307467615288092, "learning_rate": 4.994406262011213e-06, "loss": 1.3388, "step": 261 }, { "epoch": 0.32068543451652387, "grad_norm": 0.47202037017977416, "learning_rate": 4.994363332149254e-06, "loss": 0.7637, "step": 262 }, { "epoch": 0.3219094247246022, "grad_norm": 0.35722945212055746, "learning_rate": 4.994320238366963e-06, "loss": 0.6222, "step": 263 }, { "epoch": 0.32313341493268055, "grad_norm": 0.5247557166547449, "learning_rate": 4.994276980667173e-06, "loss": 1.2315, "step": 264 }, { "epoch": 0.3243574051407589, "grad_norm": 0.6510691266406681, "learning_rate": 4.994233559052724e-06, "loss": 0.7935, "step": 265 }, { "epoch": 0.32558139534883723, "grad_norm": 0.5808808864794651, "learning_rate": 4.994189973526472e-06, "loss": 0.694, "step": 266 }, { "epoch": 0.3268053855569155, "grad_norm": 0.342859550230117, "learning_rate": 4.99414622409128e-06, "loss": 0.6828, "step": 267 }, { "epoch": 0.32802937576499386, "grad_norm": 0.43612585685740884, "learning_rate": 4.994102310750023e-06, "loss": 0.6533, "step": 268 }, { "epoch": 0.3292533659730722, "grad_norm": 0.5436454281415156, "learning_rate": 4.994058233505588e-06, "loss": 1.6295, "step": 269 }, { "epoch": 0.33047735618115054, "grad_norm": 0.43775295110254103, "learning_rate": 4.99401399236087e-06, "loss": 0.7311, "step": 270 }, { "epoch": 0.3317013463892289, "grad_norm": 0.5170552538273601, "learning_rate": 4.993969587318777e-06, "loss": 1.167, "step": 271 }, { "epoch": 0.3329253365973072, "grad_norm": 0.5440519486265794, "learning_rate": 4.993925018382228e-06, "loss": 0.5029, "step": 272 }, { "epoch": 0.33414932680538556, "grad_norm": 0.35139266911721506, "learning_rate": 4.993880285554151e-06, "loss": 0.682, "step": 273 }, { "epoch": 0.3353733170134639, "grad_norm": 0.5468861395513627, "learning_rate": 4.9938353888374864e-06, "loss": 0.6786, "step": 274 }, { "epoch": 0.33659730722154224, "grad_norm": 0.4382209259198201, "learning_rate": 4.993790328235184e-06, "loss": 0.6551, "step": 275 }, { "epoch": 0.3378212974296206, "grad_norm": 0.7234003051270653, "learning_rate": 4.9937451037502035e-06, "loss": 0.729, "step": 276 }, { "epoch": 0.3390452876376989, "grad_norm": 0.4397032702863513, "learning_rate": 4.99369971538552e-06, "loss": 1.1771, "step": 277 }, { "epoch": 0.3402692778457772, "grad_norm": 0.7131330442490159, "learning_rate": 4.993654163144115e-06, "loss": 0.6182, "step": 278 }, { "epoch": 0.34149326805385555, "grad_norm": 0.624111336386168, "learning_rate": 4.993608447028981e-06, "loss": 0.7858, "step": 279 }, { "epoch": 0.3427172582619339, "grad_norm": 0.46245621252372643, "learning_rate": 4.9935625670431235e-06, "loss": 0.6259, "step": 280 }, { "epoch": 0.34394124847001223, "grad_norm": 0.7632145813638226, "learning_rate": 4.993516523189556e-06, "loss": 0.9955, "step": 281 }, { "epoch": 0.34516523867809057, "grad_norm": 0.6707122084619282, "learning_rate": 4.9934703154713075e-06, "loss": 1.2381, "step": 282 }, { "epoch": 0.3463892288861689, "grad_norm": 0.4520286005651424, "learning_rate": 4.99342394389141e-06, "loss": 0.7872, "step": 283 }, { "epoch": 0.34761321909424725, "grad_norm": 0.3916751700619931, "learning_rate": 4.993377408452916e-06, "loss": 0.8928, "step": 284 }, { "epoch": 0.3488372093023256, "grad_norm": 0.6570141277578657, "learning_rate": 4.993330709158879e-06, "loss": 0.4832, "step": 285 }, { "epoch": 0.35006119951040393, "grad_norm": 0.4003850438158674, "learning_rate": 4.993283846012371e-06, "loss": 0.7022, "step": 286 }, { "epoch": 0.35128518971848227, "grad_norm": 0.3036266746080802, "learning_rate": 4.993236819016471e-06, "loss": 0.4886, "step": 287 }, { "epoch": 0.3525091799265606, "grad_norm": 0.3622207827400109, "learning_rate": 4.9931896281742685e-06, "loss": 0.6166, "step": 288 }, { "epoch": 0.3537331701346389, "grad_norm": 0.3880251816279412, "learning_rate": 4.993142273488866e-06, "loss": 0.6824, "step": 289 }, { "epoch": 0.35495716034271724, "grad_norm": 0.5738188001442078, "learning_rate": 4.9930947549633745e-06, "loss": 1.1688, "step": 290 }, { "epoch": 0.3561811505507956, "grad_norm": 0.4362385965173308, "learning_rate": 4.9930470726009165e-06, "loss": 0.7815, "step": 291 }, { "epoch": 0.3574051407588739, "grad_norm": 0.5442023426597746, "learning_rate": 4.992999226404627e-06, "loss": 1.7166, "step": 292 }, { "epoch": 0.35862913096695226, "grad_norm": 0.5710824241256561, "learning_rate": 4.992951216377648e-06, "loss": 0.6388, "step": 293 }, { "epoch": 0.3598531211750306, "grad_norm": 0.6390812083022306, "learning_rate": 4.992903042523137e-06, "loss": 0.7242, "step": 294 }, { "epoch": 0.36107711138310894, "grad_norm": 0.8321065324171557, "learning_rate": 4.992854704844257e-06, "loss": 1.121, "step": 295 }, { "epoch": 0.3623011015911873, "grad_norm": 0.48345521754380033, "learning_rate": 4.992806203344188e-06, "loss": 0.6511, "step": 296 }, { "epoch": 0.3635250917992656, "grad_norm": 0.442311019177579, "learning_rate": 4.992757538026115e-06, "loss": 1.1869, "step": 297 }, { "epoch": 0.36474908200734396, "grad_norm": 0.36259486182817524, "learning_rate": 4.992708708893237e-06, "loss": 0.7094, "step": 298 }, { "epoch": 0.3659730722154223, "grad_norm": 0.5544932696828196, "learning_rate": 4.992659715948763e-06, "loss": 1.5548, "step": 299 }, { "epoch": 0.3671970624235006, "grad_norm": 0.5607043561772156, "learning_rate": 4.99261055919591e-06, "loss": 1.5498, "step": 300 }, { "epoch": 0.3684210526315789, "grad_norm": 0.4106643603096727, "learning_rate": 4.992561238637912e-06, "loss": 0.6165, "step": 301 }, { "epoch": 0.36964504283965727, "grad_norm": 0.3431100429385, "learning_rate": 4.992511754278009e-06, "loss": 0.7248, "step": 302 }, { "epoch": 0.3708690330477356, "grad_norm": 0.48499515589131414, "learning_rate": 4.9924621061194524e-06, "loss": 0.6237, "step": 303 }, { "epoch": 0.37209302325581395, "grad_norm": 0.5995655457611933, "learning_rate": 4.992412294165505e-06, "loss": 0.5666, "step": 304 }, { "epoch": 0.3733170134638923, "grad_norm": 0.4765058018222547, "learning_rate": 4.99236231841944e-06, "loss": 1.0014, "step": 305 }, { "epoch": 0.37454100367197063, "grad_norm": 0.5353333723369841, "learning_rate": 4.992312178884543e-06, "loss": 1.0534, "step": 306 }, { "epoch": 0.37576499388004897, "grad_norm": 0.3624605466616526, "learning_rate": 4.992261875564108e-06, "loss": 0.7108, "step": 307 }, { "epoch": 0.3769889840881273, "grad_norm": 0.35899087384306405, "learning_rate": 4.99221140846144e-06, "loss": 0.5408, "step": 308 }, { "epoch": 0.37821297429620565, "grad_norm": 0.487077037642005, "learning_rate": 4.992160777579857e-06, "loss": 0.8632, "step": 309 }, { "epoch": 0.379436964504284, "grad_norm": 1.0139106930067168, "learning_rate": 4.992109982922685e-06, "loss": 0.5317, "step": 310 }, { "epoch": 0.3806609547123623, "grad_norm": 0.4696845443377511, "learning_rate": 4.992059024493263e-06, "loss": 0.5028, "step": 311 }, { "epoch": 0.3818849449204406, "grad_norm": 0.4499122175232395, "learning_rate": 4.992007902294939e-06, "loss": 0.9648, "step": 312 }, { "epoch": 0.38310893512851896, "grad_norm": 0.5356240915106696, "learning_rate": 4.991956616331074e-06, "loss": 0.7667, "step": 313 }, { "epoch": 0.3843329253365973, "grad_norm": 0.7173577965714655, "learning_rate": 4.9919051666050355e-06, "loss": 2.0693, "step": 314 }, { "epoch": 0.38555691554467564, "grad_norm": 0.6139317501138394, "learning_rate": 4.9918535531202075e-06, "loss": 1.7767, "step": 315 }, { "epoch": 0.386780905752754, "grad_norm": 0.3593650422494968, "learning_rate": 4.991801775879981e-06, "loss": 0.6836, "step": 316 }, { "epoch": 0.3880048959608323, "grad_norm": 0.7812116567688177, "learning_rate": 4.991749834887758e-06, "loss": 0.8054, "step": 317 }, { "epoch": 0.38922888616891066, "grad_norm": 0.7470535859823948, "learning_rate": 4.991697730146952e-06, "loss": 0.6041, "step": 318 }, { "epoch": 0.390452876376989, "grad_norm": 0.6404465963351149, "learning_rate": 4.9916454616609875e-06, "loss": 1.1557, "step": 319 }, { "epoch": 0.39167686658506734, "grad_norm": 0.3153155888607163, "learning_rate": 4.9915930294332995e-06, "loss": 0.5525, "step": 320 }, { "epoch": 0.3929008567931457, "grad_norm": 0.6655474444195767, "learning_rate": 4.991540433467333e-06, "loss": 0.74, "step": 321 }, { "epoch": 0.39412484700122397, "grad_norm": 0.4130331472288031, "learning_rate": 4.991487673766544e-06, "loss": 0.6494, "step": 322 }, { "epoch": 0.3953488372093023, "grad_norm": 0.9084530571943739, "learning_rate": 4.991434750334402e-06, "loss": 0.7103, "step": 323 }, { "epoch": 0.39657282741738065, "grad_norm": 0.4806525902993342, "learning_rate": 4.991381663174382e-06, "loss": 1.005, "step": 324 }, { "epoch": 0.397796817625459, "grad_norm": 0.39113371583719014, "learning_rate": 4.991328412289975e-06, "loss": 0.4884, "step": 325 }, { "epoch": 0.3990208078335373, "grad_norm": 0.5011128516738553, "learning_rate": 4.991274997684679e-06, "loss": 0.7204, "step": 326 }, { "epoch": 0.40024479804161567, "grad_norm": 0.5277401592985527, "learning_rate": 4.991221419362005e-06, "loss": 1.0627, "step": 327 }, { "epoch": 0.401468788249694, "grad_norm": 0.5599662265938297, "learning_rate": 4.991167677325473e-06, "loss": 1.1175, "step": 328 }, { "epoch": 0.40269277845777235, "grad_norm": 0.36090832619308766, "learning_rate": 4.991113771578617e-06, "loss": 0.832, "step": 329 }, { "epoch": 0.4039167686658507, "grad_norm": 0.6014499449533965, "learning_rate": 4.991059702124976e-06, "loss": 0.6821, "step": 330 }, { "epoch": 0.40514075887392903, "grad_norm": 0.6380961030988447, "learning_rate": 4.991005468968108e-06, "loss": 0.6623, "step": 331 }, { "epoch": 0.40636474908200737, "grad_norm": 0.5566817907514316, "learning_rate": 4.990951072111572e-06, "loss": 0.8899, "step": 332 }, { "epoch": 0.40758873929008566, "grad_norm": 0.3452644881067129, "learning_rate": 4.990896511558945e-06, "loss": 0.7251, "step": 333 }, { "epoch": 0.408812729498164, "grad_norm": 0.44533300591607666, "learning_rate": 4.990841787313813e-06, "loss": 0.3994, "step": 334 }, { "epoch": 0.41003671970624234, "grad_norm": 0.7012291935157753, "learning_rate": 4.990786899379771e-06, "loss": 1.2105, "step": 335 }, { "epoch": 0.4112607099143207, "grad_norm": 0.7699730158235925, "learning_rate": 4.9907318477604276e-06, "loss": 0.7512, "step": 336 }, { "epoch": 0.412484700122399, "grad_norm": 0.5844222721735775, "learning_rate": 4.990676632459399e-06, "loss": 0.741, "step": 337 }, { "epoch": 0.41370869033047736, "grad_norm": 0.9357690522851926, "learning_rate": 4.990621253480315e-06, "loss": 0.7072, "step": 338 }, { "epoch": 0.4149326805385557, "grad_norm": 0.27407095469032267, "learning_rate": 4.990565710826815e-06, "loss": 0.4287, "step": 339 }, { "epoch": 0.41615667074663404, "grad_norm": 0.5085447392685897, "learning_rate": 4.9905100045025465e-06, "loss": 1.422, "step": 340 }, { "epoch": 0.4173806609547124, "grad_norm": 0.7415706065836827, "learning_rate": 4.990454134511174e-06, "loss": 1.4713, "step": 341 }, { "epoch": 0.4186046511627907, "grad_norm": 0.4549838538864066, "learning_rate": 4.990398100856367e-06, "loss": 0.7243, "step": 342 }, { "epoch": 0.41982864137086906, "grad_norm": 0.4512242161507683, "learning_rate": 4.9903419035418075e-06, "loss": 0.8547, "step": 343 }, { "epoch": 0.42105263157894735, "grad_norm": 0.534164923256363, "learning_rate": 4.9902855425711905e-06, "loss": 0.7069, "step": 344 }, { "epoch": 0.4222766217870257, "grad_norm": 0.469234775417887, "learning_rate": 4.990229017948217e-06, "loss": 1.3196, "step": 345 }, { "epoch": 0.423500611995104, "grad_norm": 0.4842586594577994, "learning_rate": 4.990172329676605e-06, "loss": 0.6216, "step": 346 }, { "epoch": 0.42472460220318237, "grad_norm": 0.7143824981640142, "learning_rate": 4.9901154777600766e-06, "loss": 0.6318, "step": 347 }, { "epoch": 0.4259485924112607, "grad_norm": 0.4957859191910802, "learning_rate": 4.99005846220237e-06, "loss": 0.814, "step": 348 }, { "epoch": 0.42717258261933905, "grad_norm": 0.4420966835734801, "learning_rate": 4.9900012830072305e-06, "loss": 0.736, "step": 349 }, { "epoch": 0.4283965728274174, "grad_norm": 0.6145427384004296, "learning_rate": 4.9899439401784165e-06, "loss": 1.2759, "step": 350 }, { "epoch": 0.42962056303549573, "grad_norm": 0.36005809544555506, "learning_rate": 4.989886433719697e-06, "loss": 0.7074, "step": 351 }, { "epoch": 0.43084455324357407, "grad_norm": 0.5530317148312526, "learning_rate": 4.989828763634851e-06, "loss": 0.7222, "step": 352 }, { "epoch": 0.4320685434516524, "grad_norm": 0.5903230053277813, "learning_rate": 4.989770929927668e-06, "loss": 0.999, "step": 353 }, { "epoch": 0.43329253365973075, "grad_norm": 0.5235973098449931, "learning_rate": 4.989712932601947e-06, "loss": 0.623, "step": 354 }, { "epoch": 0.43451652386780903, "grad_norm": 0.8238368780573823, "learning_rate": 4.989654771661502e-06, "loss": 0.6532, "step": 355 }, { "epoch": 0.4357405140758874, "grad_norm": 0.3961066520599492, "learning_rate": 4.989596447110154e-06, "loss": 0.7365, "step": 356 }, { "epoch": 0.4369645042839657, "grad_norm": 0.4602146080601334, "learning_rate": 4.989537958951736e-06, "loss": 0.6349, "step": 357 }, { "epoch": 0.43818849449204406, "grad_norm": 0.38742547392598503, "learning_rate": 4.98947930719009e-06, "loss": 0.6247, "step": 358 }, { "epoch": 0.4394124847001224, "grad_norm": 0.42036788302585487, "learning_rate": 4.989420491829074e-06, "loss": 0.5722, "step": 359 }, { "epoch": 0.44063647490820074, "grad_norm": 0.7959798845821433, "learning_rate": 4.98936151287255e-06, "loss": 1.0968, "step": 360 }, { "epoch": 0.4418604651162791, "grad_norm": 0.738986970951876, "learning_rate": 4.989302370324395e-06, "loss": 1.0574, "step": 361 }, { "epoch": 0.4430844553243574, "grad_norm": 0.2604864666489375, "learning_rate": 4.989243064188495e-06, "loss": 0.4387, "step": 362 }, { "epoch": 0.44430844553243576, "grad_norm": 0.5476739282324891, "learning_rate": 4.989183594468748e-06, "loss": 0.5895, "step": 363 }, { "epoch": 0.4455324357405141, "grad_norm": 0.8000170519937692, "learning_rate": 4.989123961169064e-06, "loss": 0.5301, "step": 364 }, { "epoch": 0.4467564259485924, "grad_norm": 0.5883493340829177, "learning_rate": 4.989064164293357e-06, "loss": 1.2867, "step": 365 }, { "epoch": 0.4479804161566707, "grad_norm": 0.4351096501408344, "learning_rate": 4.989004203845561e-06, "loss": 0.5018, "step": 366 }, { "epoch": 0.44920440636474906, "grad_norm": 0.4826233005913043, "learning_rate": 4.9889440798296155e-06, "loss": 0.655, "step": 367 }, { "epoch": 0.4504283965728274, "grad_norm": 0.4503050011088164, "learning_rate": 4.988883792249469e-06, "loss": 0.6846, "step": 368 }, { "epoch": 0.45165238678090575, "grad_norm": 0.7312499673085558, "learning_rate": 4.988823341109087e-06, "loss": 0.5229, "step": 369 }, { "epoch": 0.4528763769889841, "grad_norm": 0.8825890360993164, "learning_rate": 4.988762726412441e-06, "loss": 0.8082, "step": 370 }, { "epoch": 0.4541003671970624, "grad_norm": 0.584121075932636, "learning_rate": 4.988701948163512e-06, "loss": 0.5286, "step": 371 }, { "epoch": 0.45532435740514077, "grad_norm": 0.7633584400222349, "learning_rate": 4.988641006366298e-06, "loss": 0.6654, "step": 372 }, { "epoch": 0.4565483476132191, "grad_norm": 0.6532785362101784, "learning_rate": 4.988579901024801e-06, "loss": 1.3154, "step": 373 }, { "epoch": 0.45777233782129745, "grad_norm": 0.5291382291351212, "learning_rate": 4.988518632143038e-06, "loss": 0.5225, "step": 374 }, { "epoch": 0.4589963280293758, "grad_norm": 0.6468474741863817, "learning_rate": 4.988457199725034e-06, "loss": 0.6772, "step": 375 }, { "epoch": 0.4602203182374541, "grad_norm": 0.4335137465584569, "learning_rate": 4.988395603774827e-06, "loss": 0.6934, "step": 376 }, { "epoch": 0.4614443084455324, "grad_norm": 0.9198073708896262, "learning_rate": 4.9883338442964655e-06, "loss": 0.9603, "step": 377 }, { "epoch": 0.46266829865361075, "grad_norm": 0.5760645355202023, "learning_rate": 4.988271921294008e-06, "loss": 1.2621, "step": 378 }, { "epoch": 0.4638922888616891, "grad_norm": 0.7885276885999555, "learning_rate": 4.988209834771522e-06, "loss": 0.5408, "step": 379 }, { "epoch": 0.46511627906976744, "grad_norm": 0.8238883625693336, "learning_rate": 4.98814758473309e-06, "loss": 0.4811, "step": 380 }, { "epoch": 0.4663402692778458, "grad_norm": 0.5576815178557181, "learning_rate": 4.988085171182802e-06, "loss": 0.6299, "step": 381 }, { "epoch": 0.4675642594859241, "grad_norm": 0.5123263432878964, "learning_rate": 4.988022594124758e-06, "loss": 0.7589, "step": 382 }, { "epoch": 0.46878824969400246, "grad_norm": 0.6220823653637169, "learning_rate": 4.987959853563072e-06, "loss": 0.9005, "step": 383 }, { "epoch": 0.4700122399020808, "grad_norm": 0.6290503139592389, "learning_rate": 4.987896949501867e-06, "loss": 0.9105, "step": 384 }, { "epoch": 0.47123623011015914, "grad_norm": 0.9401316401593696, "learning_rate": 4.987833881945277e-06, "loss": 0.7446, "step": 385 }, { "epoch": 0.4724602203182375, "grad_norm": 0.9051181878739166, "learning_rate": 4.987770650897446e-06, "loss": 0.7699, "step": 386 }, { "epoch": 0.47368421052631576, "grad_norm": 0.35399677965369547, "learning_rate": 4.987707256362529e-06, "loss": 0.6705, "step": 387 }, { "epoch": 0.4749082007343941, "grad_norm": 0.4664473022740672, "learning_rate": 4.987643698344693e-06, "loss": 0.7531, "step": 388 }, { "epoch": 0.47613219094247244, "grad_norm": 0.6268223032680224, "learning_rate": 4.9875799768481134e-06, "loss": 0.8883, "step": 389 }, { "epoch": 0.4773561811505508, "grad_norm": 0.7930645555375703, "learning_rate": 4.987516091876979e-06, "loss": 0.6697, "step": 390 }, { "epoch": 0.4785801713586291, "grad_norm": 0.39005757476249786, "learning_rate": 4.987452043435489e-06, "loss": 0.6306, "step": 391 }, { "epoch": 0.47980416156670747, "grad_norm": 0.33717536882333426, "learning_rate": 4.98738783152785e-06, "loss": 0.5174, "step": 392 }, { "epoch": 0.4810281517747858, "grad_norm": 0.5096723567856368, "learning_rate": 4.9873234561582826e-06, "loss": 0.7911, "step": 393 }, { "epoch": 0.48225214198286415, "grad_norm": 0.5771711738205841, "learning_rate": 4.987258917331018e-06, "loss": 0.4946, "step": 394 }, { "epoch": 0.4834761321909425, "grad_norm": 0.6184236099986771, "learning_rate": 4.987194215050297e-06, "loss": 1.6691, "step": 395 }, { "epoch": 0.4847001223990208, "grad_norm": 0.7192296461953666, "learning_rate": 4.987129349320372e-06, "loss": 0.6894, "step": 396 }, { "epoch": 0.48592411260709917, "grad_norm": 0.5298906853294455, "learning_rate": 4.987064320145506e-06, "loss": 0.7028, "step": 397 }, { "epoch": 0.48714810281517745, "grad_norm": 0.40377559904858246, "learning_rate": 4.986999127529971e-06, "loss": 0.6399, "step": 398 }, { "epoch": 0.4883720930232558, "grad_norm": 0.455341828053919, "learning_rate": 4.986933771478052e-06, "loss": 0.6907, "step": 399 }, { "epoch": 0.48959608323133413, "grad_norm": 0.6681580290457239, "learning_rate": 4.986868251994045e-06, "loss": 1.3346, "step": 400 }, { "epoch": 0.4908200734394125, "grad_norm": 0.5057058961872322, "learning_rate": 4.986802569082254e-06, "loss": 0.6, "step": 401 }, { "epoch": 0.4920440636474908, "grad_norm": 0.4720055776923247, "learning_rate": 4.986736722746997e-06, "loss": 0.4718, "step": 402 }, { "epoch": 0.49326805385556916, "grad_norm": 0.7991662775483814, "learning_rate": 4.986670712992599e-06, "loss": 0.552, "step": 403 }, { "epoch": 0.4944920440636475, "grad_norm": 0.6535808127274191, "learning_rate": 4.986604539823399e-06, "loss": 0.7138, "step": 404 }, { "epoch": 0.49571603427172584, "grad_norm": 0.5658656474191684, "learning_rate": 4.986538203243748e-06, "loss": 0.7007, "step": 405 }, { "epoch": 0.4969400244798042, "grad_norm": 0.455049664118394, "learning_rate": 4.986471703258002e-06, "loss": 0.8734, "step": 406 }, { "epoch": 0.4981640146878825, "grad_norm": 0.5990395698316427, "learning_rate": 4.9864050398705334e-06, "loss": 0.7472, "step": 407 }, { "epoch": 0.49938800489596086, "grad_norm": 0.5479802574372877, "learning_rate": 4.986338213085721e-06, "loss": 1.2621, "step": 408 }, { "epoch": 0.5006119951040392, "grad_norm": 0.6532806167863214, "learning_rate": 4.986271222907958e-06, "loss": 0.6825, "step": 409 }, { "epoch": 0.5018359853121175, "grad_norm": 0.6120126735894845, "learning_rate": 4.9862040693416466e-06, "loss": 1.2561, "step": 410 }, { "epoch": 0.5030599755201959, "grad_norm": 0.5032771691644705, "learning_rate": 4.9861367523911995e-06, "loss": 0.8265, "step": 411 }, { "epoch": 0.5042839657282742, "grad_norm": 0.5928601683665099, "learning_rate": 4.9860692720610405e-06, "loss": 0.6969, "step": 412 }, { "epoch": 0.5055079559363526, "grad_norm": 0.3991212570382625, "learning_rate": 4.9860016283556045e-06, "loss": 0.706, "step": 413 }, { "epoch": 0.5067319461444308, "grad_norm": 0.44493232390900905, "learning_rate": 4.985933821279336e-06, "loss": 1.0315, "step": 414 }, { "epoch": 0.5079559363525091, "grad_norm": 0.5326504993434127, "learning_rate": 4.985865850836693e-06, "loss": 0.6428, "step": 415 }, { "epoch": 0.5091799265605875, "grad_norm": 0.75296526985409, "learning_rate": 4.985797717032139e-06, "loss": 0.7676, "step": 416 }, { "epoch": 0.5104039167686658, "grad_norm": 0.6071213565299739, "learning_rate": 4.985729419870154e-06, "loss": 0.7815, "step": 417 }, { "epoch": 0.5116279069767442, "grad_norm": 0.7313162025393259, "learning_rate": 4.985660959355226e-06, "loss": 0.6697, "step": 418 }, { "epoch": 0.5128518971848225, "grad_norm": 1.0084870827345973, "learning_rate": 4.985592335491854e-06, "loss": 0.6198, "step": 419 }, { "epoch": 0.5140758873929009, "grad_norm": 0.6003035571504491, "learning_rate": 4.9855235482845466e-06, "loss": 1.3909, "step": 420 }, { "epoch": 0.5152998776009792, "grad_norm": 0.6579874918589667, "learning_rate": 4.985454597737825e-06, "loss": 0.7469, "step": 421 }, { "epoch": 0.5165238678090576, "grad_norm": 0.6969597178767786, "learning_rate": 4.9853854838562196e-06, "loss": 1.5377, "step": 422 }, { "epoch": 0.5177478580171359, "grad_norm": 0.8000478783932281, "learning_rate": 4.9853162066442745e-06, "loss": 0.6222, "step": 423 }, { "epoch": 0.5189718482252142, "grad_norm": 0.7666703968425487, "learning_rate": 4.9852467661065395e-06, "loss": 1.2538, "step": 424 }, { "epoch": 0.5201958384332925, "grad_norm": 0.8465989219790139, "learning_rate": 4.98517716224758e-06, "loss": 0.6529, "step": 425 }, { "epoch": 0.5214198286413708, "grad_norm": 0.9688003399358155, "learning_rate": 4.98510739507197e-06, "loss": 0.5989, "step": 426 }, { "epoch": 0.5226438188494492, "grad_norm": 0.4637067568173058, "learning_rate": 4.985037464584293e-06, "loss": 0.4672, "step": 427 }, { "epoch": 0.5238678090575275, "grad_norm": 0.4574913334463103, "learning_rate": 4.984967370789146e-06, "loss": 0.5859, "step": 428 }, { "epoch": 0.5250917992656059, "grad_norm": 0.5114573213837407, "learning_rate": 4.9848971136911335e-06, "loss": 0.4764, "step": 429 }, { "epoch": 0.5263157894736842, "grad_norm": 0.6809683006620858, "learning_rate": 4.9848266932948745e-06, "loss": 0.5473, "step": 430 }, { "epoch": 0.5275397796817626, "grad_norm": 0.6463385524260794, "learning_rate": 4.984756109604996e-06, "loss": 1.3814, "step": 431 }, { "epoch": 0.5287637698898409, "grad_norm": 0.40419008330189565, "learning_rate": 4.984685362626135e-06, "loss": 0.7705, "step": 432 }, { "epoch": 0.5299877600979193, "grad_norm": 0.4939438732325167, "learning_rate": 4.9846144523629435e-06, "loss": 0.7647, "step": 433 }, { "epoch": 0.5312117503059975, "grad_norm": 0.9281581150066616, "learning_rate": 4.98454337882008e-06, "loss": 0.5892, "step": 434 }, { "epoch": 0.5324357405140759, "grad_norm": 0.48154020714924345, "learning_rate": 4.984472142002216e-06, "loss": 0.6756, "step": 435 }, { "epoch": 0.5336597307221542, "grad_norm": 0.5285488763420756, "learning_rate": 4.984400741914032e-06, "loss": 0.4437, "step": 436 }, { "epoch": 0.5348837209302325, "grad_norm": 0.5774225748447701, "learning_rate": 4.98432917856022e-06, "loss": 0.8708, "step": 437 }, { "epoch": 0.5361077111383109, "grad_norm": 0.4693383447942286, "learning_rate": 4.9842574519454825e-06, "loss": 0.8761, "step": 438 }, { "epoch": 0.5373317013463892, "grad_norm": 0.4550208973916101, "learning_rate": 4.984185562074535e-06, "loss": 0.5698, "step": 439 }, { "epoch": 0.5385556915544676, "grad_norm": 0.6150252036978977, "learning_rate": 4.9841135089521e-06, "loss": 0.9911, "step": 440 }, { "epoch": 0.5397796817625459, "grad_norm": 0.5628510230051749, "learning_rate": 4.984041292582914e-06, "loss": 0.6923, "step": 441 }, { "epoch": 0.5410036719706243, "grad_norm": 1.1405020898484086, "learning_rate": 4.983968912971722e-06, "loss": 1.0072, "step": 442 }, { "epoch": 0.5422276621787026, "grad_norm": 0.6941681422603433, "learning_rate": 4.98389637012328e-06, "loss": 0.5952, "step": 443 }, { "epoch": 0.543451652386781, "grad_norm": 0.5840750201714761, "learning_rate": 4.983823664042357e-06, "loss": 0.9915, "step": 444 }, { "epoch": 0.5446756425948592, "grad_norm": 0.35238863025625233, "learning_rate": 4.983750794733728e-06, "loss": 0.4847, "step": 445 }, { "epoch": 0.5458996328029376, "grad_norm": 0.9765951182060455, "learning_rate": 4.983677762202185e-06, "loss": 0.7767, "step": 446 }, { "epoch": 0.5471236230110159, "grad_norm": 0.5762381466614729, "learning_rate": 4.9836045664525255e-06, "loss": 0.7466, "step": 447 }, { "epoch": 0.5483476132190942, "grad_norm": 0.3986092045277627, "learning_rate": 4.9835312074895605e-06, "loss": 0.6021, "step": 448 }, { "epoch": 0.5495716034271726, "grad_norm": 0.7268919269682568, "learning_rate": 4.9834576853181095e-06, "loss": 0.8209, "step": 449 }, { "epoch": 0.5507955936352509, "grad_norm": 0.6555639447350133, "learning_rate": 4.9833839999430065e-06, "loss": 0.7777, "step": 450 }, { "epoch": 0.5520195838433293, "grad_norm": 0.66561695542955, "learning_rate": 4.9833101513690915e-06, "loss": 0.742, "step": 451 }, { "epoch": 0.5532435740514076, "grad_norm": 0.494781765119413, "learning_rate": 4.983236139601219e-06, "loss": 1.0711, "step": 452 }, { "epoch": 0.554467564259486, "grad_norm": 0.721069858553008, "learning_rate": 4.983161964644251e-06, "loss": 1.1872, "step": 453 }, { "epoch": 0.5556915544675642, "grad_norm": 0.72919590776234, "learning_rate": 4.983087626503065e-06, "loss": 0.6297, "step": 454 }, { "epoch": 0.5569155446756426, "grad_norm": 0.7450867470397293, "learning_rate": 4.983013125182543e-06, "loss": 0.7224, "step": 455 }, { "epoch": 0.5581395348837209, "grad_norm": 0.4084870021019476, "learning_rate": 4.982938460687583e-06, "loss": 0.6538, "step": 456 }, { "epoch": 0.5593635250917993, "grad_norm": 0.7509680628872014, "learning_rate": 4.982863633023092e-06, "loss": 0.3776, "step": 457 }, { "epoch": 0.5605875152998776, "grad_norm": 1.087997625617814, "learning_rate": 4.982788642193985e-06, "loss": 0.6623, "step": 458 }, { "epoch": 0.5618115055079559, "grad_norm": 0.48339687799418907, "learning_rate": 4.9827134882051924e-06, "loss": 0.5704, "step": 459 }, { "epoch": 0.5630354957160343, "grad_norm": 0.5534075806710526, "learning_rate": 4.982638171061652e-06, "loss": 0.7673, "step": 460 }, { "epoch": 0.5642594859241126, "grad_norm": 0.40986544335244157, "learning_rate": 4.982562690768314e-06, "loss": 0.646, "step": 461 }, { "epoch": 0.565483476132191, "grad_norm": 0.6381486235919178, "learning_rate": 4.982487047330139e-06, "loss": 1.066, "step": 462 }, { "epoch": 0.5667074663402693, "grad_norm": 0.7125070778783265, "learning_rate": 4.982411240752097e-06, "loss": 1.3099, "step": 463 }, { "epoch": 0.5679314565483476, "grad_norm": 0.5959287446038983, "learning_rate": 4.982335271039169e-06, "loss": 0.6067, "step": 464 }, { "epoch": 0.5691554467564259, "grad_norm": 0.6086512098470025, "learning_rate": 4.982259138196349e-06, "loss": 0.7151, "step": 465 }, { "epoch": 0.5703794369645043, "grad_norm": 0.46145738752682813, "learning_rate": 4.98218284222864e-06, "loss": 0.6642, "step": 466 }, { "epoch": 0.5716034271725826, "grad_norm": 0.5922458645503862, "learning_rate": 4.982106383141056e-06, "loss": 1.2248, "step": 467 }, { "epoch": 0.572827417380661, "grad_norm": 0.5554179979490513, "learning_rate": 4.9820297609386195e-06, "loss": 1.4382, "step": 468 }, { "epoch": 0.5740514075887393, "grad_norm": 0.3869879843858493, "learning_rate": 4.981952975626368e-06, "loss": 0.6222, "step": 469 }, { "epoch": 0.5752753977968176, "grad_norm": 0.7165812862697734, "learning_rate": 4.981876027209348e-06, "loss": 1.062, "step": 470 }, { "epoch": 0.576499388004896, "grad_norm": 0.3587533524927841, "learning_rate": 4.981798915692615e-06, "loss": 0.611, "step": 471 }, { "epoch": 0.5777233782129743, "grad_norm": 0.4620040648647825, "learning_rate": 4.9817216410812365e-06, "loss": 0.6235, "step": 472 }, { "epoch": 0.5789473684210527, "grad_norm": 0.6847007914054791, "learning_rate": 4.981644203380292e-06, "loss": 0.7786, "step": 473 }, { "epoch": 0.5801713586291309, "grad_norm": 0.8634056289002575, "learning_rate": 4.981566602594868e-06, "loss": 1.3832, "step": 474 }, { "epoch": 0.5813953488372093, "grad_norm": 0.7668323236402386, "learning_rate": 4.981488838730066e-06, "loss": 1.5351, "step": 475 }, { "epoch": 0.5826193390452876, "grad_norm": 0.5460972502140486, "learning_rate": 4.981410911790996e-06, "loss": 0.6451, "step": 476 }, { "epoch": 0.583843329253366, "grad_norm": 0.4847622397163458, "learning_rate": 4.981332821782779e-06, "loss": 0.5821, "step": 477 }, { "epoch": 0.5850673194614443, "grad_norm": 0.667526872565768, "learning_rate": 4.9812545687105476e-06, "loss": 0.6665, "step": 478 }, { "epoch": 0.5862913096695227, "grad_norm": 0.7171543323825912, "learning_rate": 4.981176152579442e-06, "loss": 0.5822, "step": 479 }, { "epoch": 0.587515299877601, "grad_norm": 0.8667584694004078, "learning_rate": 4.981097573394618e-06, "loss": 0.6365, "step": 480 }, { "epoch": 0.5887392900856793, "grad_norm": 0.7365407819701043, "learning_rate": 4.981018831161238e-06, "loss": 0.7338, "step": 481 }, { "epoch": 0.5899632802937577, "grad_norm": 0.6997802356459719, "learning_rate": 4.980939925884477e-06, "loss": 0.8013, "step": 482 }, { "epoch": 0.591187270501836, "grad_norm": 0.49963032895103354, "learning_rate": 4.98086085756952e-06, "loss": 0.7466, "step": 483 }, { "epoch": 0.5924112607099143, "grad_norm": 0.8920943812354478, "learning_rate": 4.980781626221565e-06, "loss": 0.7156, "step": 484 }, { "epoch": 0.5936352509179926, "grad_norm": 0.6661321240021298, "learning_rate": 4.980702231845816e-06, "loss": 0.7356, "step": 485 }, { "epoch": 0.594859241126071, "grad_norm": 0.4170324798701388, "learning_rate": 4.980622674447493e-06, "loss": 0.6441, "step": 486 }, { "epoch": 0.5960832313341493, "grad_norm": 0.8502814114814677, "learning_rate": 4.980542954031822e-06, "loss": 0.5912, "step": 487 }, { "epoch": 0.5973072215422277, "grad_norm": 0.5836125655231348, "learning_rate": 4.980463070604042e-06, "loss": 0.8118, "step": 488 }, { "epoch": 0.598531211750306, "grad_norm": 0.8087845455291846, "learning_rate": 4.980383024169405e-06, "loss": 0.7508, "step": 489 }, { "epoch": 0.5997552019583844, "grad_norm": 0.8757690894135692, "learning_rate": 4.98030281473317e-06, "loss": 1.2787, "step": 490 }, { "epoch": 0.6009791921664627, "grad_norm": 0.7872153042853589, "learning_rate": 4.980222442300608e-06, "loss": 1.1471, "step": 491 }, { "epoch": 0.602203182374541, "grad_norm": 0.5914271112228281, "learning_rate": 4.980141906877001e-06, "loss": 0.6121, "step": 492 }, { "epoch": 0.6034271725826194, "grad_norm": 0.9602661555495826, "learning_rate": 4.98006120846764e-06, "loss": 1.3528, "step": 493 }, { "epoch": 0.6046511627906976, "grad_norm": 0.8457421366249023, "learning_rate": 4.979980347077831e-06, "loss": 0.8904, "step": 494 }, { "epoch": 0.605875152998776, "grad_norm": 0.7803270149784354, "learning_rate": 4.979899322712885e-06, "loss": 0.8407, "step": 495 }, { "epoch": 0.6070991432068543, "grad_norm": 0.9003791171851305, "learning_rate": 4.979818135378129e-06, "loss": 0.6624, "step": 496 }, { "epoch": 0.6083231334149327, "grad_norm": 0.38048113984728554, "learning_rate": 4.979736785078897e-06, "loss": 0.4568, "step": 497 }, { "epoch": 0.609547123623011, "grad_norm": 0.5460484270490091, "learning_rate": 4.979655271820535e-06, "loss": 0.8807, "step": 498 }, { "epoch": 0.6107711138310894, "grad_norm": 0.8607446671430177, "learning_rate": 4.9795735956084015e-06, "loss": 2.4248, "step": 499 }, { "epoch": 0.6119951040391677, "grad_norm": 0.8657540161264972, "learning_rate": 4.9794917564478616e-06, "loss": 1.0421, "step": 500 }, { "epoch": 0.6132190942472461, "grad_norm": 0.786380900764491, "learning_rate": 4.979409754344294e-06, "loss": 1.1132, "step": 501 }, { "epoch": 0.6144430844553244, "grad_norm": 0.5541101339567411, "learning_rate": 4.979327589303089e-06, "loss": 0.6173, "step": 502 }, { "epoch": 0.6156670746634026, "grad_norm": 0.7338735579088909, "learning_rate": 4.979245261329645e-06, "loss": 0.9687, "step": 503 }, { "epoch": 0.616891064871481, "grad_norm": 0.6086756002681308, "learning_rate": 4.979162770429372e-06, "loss": 0.5935, "step": 504 }, { "epoch": 0.6181150550795593, "grad_norm": 0.7954198181003124, "learning_rate": 4.979080116607692e-06, "loss": 0.5992, "step": 505 }, { "epoch": 0.6193390452876377, "grad_norm": 0.5815447401847034, "learning_rate": 4.978997299870036e-06, "loss": 0.8095, "step": 506 }, { "epoch": 0.620563035495716, "grad_norm": 0.6904210968325463, "learning_rate": 4.978914320221847e-06, "loss": 1.0425, "step": 507 }, { "epoch": 0.6217870257037944, "grad_norm": 0.6509154661965433, "learning_rate": 4.978831177668577e-06, "loss": 0.6879, "step": 508 }, { "epoch": 0.6230110159118727, "grad_norm": 0.32876693792041606, "learning_rate": 4.978747872215691e-06, "loss": 0.2984, "step": 509 }, { "epoch": 0.6242350061199511, "grad_norm": 0.7048658689041563, "learning_rate": 4.978664403868664e-06, "loss": 0.7401, "step": 510 }, { "epoch": 0.6254589963280294, "grad_norm": 0.6802817915400121, "learning_rate": 4.978580772632979e-06, "loss": 1.2961, "step": 511 }, { "epoch": 0.6266829865361077, "grad_norm": 0.9447830556444728, "learning_rate": 4.9784969785141334e-06, "loss": 0.6635, "step": 512 }, { "epoch": 0.627906976744186, "grad_norm": 0.605067025896805, "learning_rate": 4.978413021517634e-06, "loss": 0.6016, "step": 513 }, { "epoch": 0.6291309669522643, "grad_norm": 0.6973401878576624, "learning_rate": 4.9783289016489975e-06, "loss": 1.7901, "step": 514 }, { "epoch": 0.6303549571603427, "grad_norm": 0.5743499259562451, "learning_rate": 4.978244618913754e-06, "loss": 0.6964, "step": 515 }, { "epoch": 0.631578947368421, "grad_norm": 0.5765570989674189, "learning_rate": 4.978160173317439e-06, "loss": 1.2022, "step": 516 }, { "epoch": 0.6328029375764994, "grad_norm": 0.676210526239049, "learning_rate": 4.978075564865603e-06, "loss": 0.5173, "step": 517 }, { "epoch": 0.6340269277845777, "grad_norm": 0.6101515429201264, "learning_rate": 4.9779907935638076e-06, "loss": 0.7999, "step": 518 }, { "epoch": 0.6352509179926561, "grad_norm": 0.625029551653209, "learning_rate": 4.977905859417622e-06, "loss": 1.0577, "step": 519 }, { "epoch": 0.6364749082007344, "grad_norm": 0.7728096552634098, "learning_rate": 4.9778207624326294e-06, "loss": 0.5477, "step": 520 }, { "epoch": 0.6376988984088128, "grad_norm": 0.43009825366727467, "learning_rate": 4.97773550261442e-06, "loss": 0.6058, "step": 521 }, { "epoch": 0.6389228886168911, "grad_norm": 0.8387121402128179, "learning_rate": 4.977650079968599e-06, "loss": 0.8564, "step": 522 }, { "epoch": 0.6401468788249693, "grad_norm": 0.5950306634511766, "learning_rate": 4.977564494500778e-06, "loss": 1.1089, "step": 523 }, { "epoch": 0.6413708690330477, "grad_norm": 0.5943989281202401, "learning_rate": 4.977478746216583e-06, "loss": 0.6134, "step": 524 }, { "epoch": 0.642594859241126, "grad_norm": 0.729651571226729, "learning_rate": 4.977392835121649e-06, "loss": 0.7043, "step": 525 }, { "epoch": 0.6438188494492044, "grad_norm": 0.9178273079178908, "learning_rate": 4.9773067612216194e-06, "loss": 0.7013, "step": 526 }, { "epoch": 0.6450428396572827, "grad_norm": 0.7462894848550342, "learning_rate": 4.9772205245221525e-06, "loss": 1.0939, "step": 527 }, { "epoch": 0.6462668298653611, "grad_norm": 0.7004286005767373, "learning_rate": 4.977134125028916e-06, "loss": 0.5541, "step": 528 }, { "epoch": 0.6474908200734394, "grad_norm": 0.9715965342324924, "learning_rate": 4.977047562747588e-06, "loss": 0.59, "step": 529 }, { "epoch": 0.6487148102815178, "grad_norm": 0.744286503821514, "learning_rate": 4.9769608376838555e-06, "loss": 1.1145, "step": 530 }, { "epoch": 0.6499388004895961, "grad_norm": 0.9508178288411085, "learning_rate": 4.976873949843417e-06, "loss": 0.7731, "step": 531 }, { "epoch": 0.6511627906976745, "grad_norm": 0.780669900476306, "learning_rate": 4.976786899231986e-06, "loss": 0.7806, "step": 532 }, { "epoch": 0.6523867809057528, "grad_norm": 0.5254012370160568, "learning_rate": 4.976699685855279e-06, "loss": 0.6822, "step": 533 }, { "epoch": 0.653610771113831, "grad_norm": 0.6146151754181215, "learning_rate": 4.976612309719031e-06, "loss": 0.8659, "step": 534 }, { "epoch": 0.6548347613219094, "grad_norm": 0.9731637117312572, "learning_rate": 4.976524770828981e-06, "loss": 0.9978, "step": 535 }, { "epoch": 0.6560587515299877, "grad_norm": 0.80898410376378, "learning_rate": 4.976437069190884e-06, "loss": 1.9593, "step": 536 }, { "epoch": 0.6572827417380661, "grad_norm": 0.49631139959251924, "learning_rate": 4.976349204810502e-06, "loss": 0.6042, "step": 537 }, { "epoch": 0.6585067319461444, "grad_norm": 0.5877262837299358, "learning_rate": 4.97626117769361e-06, "loss": 0.7233, "step": 538 }, { "epoch": 0.6597307221542228, "grad_norm": 0.5006659854179215, "learning_rate": 4.976172987845992e-06, "loss": 0.4859, "step": 539 }, { "epoch": 0.6609547123623011, "grad_norm": 0.7243688811014526, "learning_rate": 4.976084635273445e-06, "loss": 1.4443, "step": 540 }, { "epoch": 0.6621787025703795, "grad_norm": 0.5788182162667203, "learning_rate": 4.975996119981773e-06, "loss": 1.2865, "step": 541 }, { "epoch": 0.6634026927784578, "grad_norm": 0.7618498816492038, "learning_rate": 4.975907441976795e-06, "loss": 0.6311, "step": 542 }, { "epoch": 0.6646266829865362, "grad_norm": 0.9105223655593707, "learning_rate": 4.975818601264337e-06, "loss": 0.8695, "step": 543 }, { "epoch": 0.6658506731946144, "grad_norm": 0.5709587329130438, "learning_rate": 4.975729597850238e-06, "loss": 0.7136, "step": 544 }, { "epoch": 0.6670746634026927, "grad_norm": 0.5709572171217463, "learning_rate": 4.9756404317403475e-06, "loss": 1.4518, "step": 545 }, { "epoch": 0.6682986536107711, "grad_norm": 0.7137365348802489, "learning_rate": 4.9755511029405235e-06, "loss": 0.6105, "step": 546 }, { "epoch": 0.6695226438188494, "grad_norm": 0.9472825614416733, "learning_rate": 4.975461611456639e-06, "loss": 1.0258, "step": 547 }, { "epoch": 0.6707466340269278, "grad_norm": 0.5948426326943427, "learning_rate": 4.975371957294572e-06, "loss": 0.7631, "step": 548 }, { "epoch": 0.6719706242350061, "grad_norm": 0.49442036024015285, "learning_rate": 4.975282140460216e-06, "loss": 0.5656, "step": 549 }, { "epoch": 0.6731946144430845, "grad_norm": 0.5141243670686299, "learning_rate": 4.975192160959474e-06, "loss": 0.6187, "step": 550 }, { "epoch": 0.6744186046511628, "grad_norm": 0.6447537114880092, "learning_rate": 4.975102018798258e-06, "loss": 0.6707, "step": 551 }, { "epoch": 0.6756425948592412, "grad_norm": 1.0498450919663247, "learning_rate": 4.9750117139824916e-06, "loss": 0.8024, "step": 552 }, { "epoch": 0.6768665850673194, "grad_norm": 0.3967421027806449, "learning_rate": 4.9749212465181095e-06, "loss": 0.4971, "step": 553 }, { "epoch": 0.6780905752753978, "grad_norm": 0.4999352613405446, "learning_rate": 4.974830616411059e-06, "loss": 0.6739, "step": 554 }, { "epoch": 0.6793145654834761, "grad_norm": 0.5726588192115494, "learning_rate": 4.974739823667292e-06, "loss": 0.978, "step": 555 }, { "epoch": 0.6805385556915544, "grad_norm": 0.723750119249683, "learning_rate": 4.974648868292778e-06, "loss": 0.5329, "step": 556 }, { "epoch": 0.6817625458996328, "grad_norm": 0.5514615518789243, "learning_rate": 4.9745577502934936e-06, "loss": 1.1273, "step": 557 }, { "epoch": 0.6829865361077111, "grad_norm": 0.6237740221028041, "learning_rate": 4.974466469675427e-06, "loss": 0.7259, "step": 558 }, { "epoch": 0.6842105263157895, "grad_norm": 0.441421519775364, "learning_rate": 4.974375026444575e-06, "loss": 0.6798, "step": 559 }, { "epoch": 0.6854345165238678, "grad_norm": 0.9338845353458881, "learning_rate": 4.97428342060695e-06, "loss": 0.7226, "step": 560 }, { "epoch": 0.6866585067319462, "grad_norm": 0.6267436130602022, "learning_rate": 4.974191652168569e-06, "loss": 0.6096, "step": 561 }, { "epoch": 0.6878824969400245, "grad_norm": 0.8211413779042164, "learning_rate": 4.974099721135466e-06, "loss": 1.5565, "step": 562 }, { "epoch": 0.6891064871481029, "grad_norm": 0.6910690891205508, "learning_rate": 4.974007627513678e-06, "loss": 1.2202, "step": 563 }, { "epoch": 0.6903304773561811, "grad_norm": 0.5917342611493596, "learning_rate": 4.973915371309262e-06, "loss": 0.9193, "step": 564 }, { "epoch": 0.6915544675642595, "grad_norm": 1.0435096061682623, "learning_rate": 4.973822952528276e-06, "loss": 1.5729, "step": 565 }, { "epoch": 0.6927784577723378, "grad_norm": 0.6072455238693985, "learning_rate": 4.973730371176796e-06, "loss": 0.6981, "step": 566 }, { "epoch": 0.6940024479804161, "grad_norm": 0.7833292444100382, "learning_rate": 4.973637627260907e-06, "loss": 0.6399, "step": 567 }, { "epoch": 0.6952264381884945, "grad_norm": 0.5908805712931489, "learning_rate": 4.973544720786701e-06, "loss": 0.9483, "step": 568 }, { "epoch": 0.6964504283965728, "grad_norm": 0.9507882820554344, "learning_rate": 4.973451651760286e-06, "loss": 1.1759, "step": 569 }, { "epoch": 0.6976744186046512, "grad_norm": 0.5395558365491222, "learning_rate": 4.973358420187776e-06, "loss": 0.6891, "step": 570 }, { "epoch": 0.6988984088127295, "grad_norm": 0.8226985692636989, "learning_rate": 4.973265026075299e-06, "loss": 0.9661, "step": 571 }, { "epoch": 0.7001223990208079, "grad_norm": 0.8020154739677174, "learning_rate": 4.973171469428993e-06, "loss": 0.6196, "step": 572 }, { "epoch": 0.7013463892288861, "grad_norm": 0.4430199949633676, "learning_rate": 4.973077750255006e-06, "loss": 0.6245, "step": 573 }, { "epoch": 0.7025703794369645, "grad_norm": 0.8217842159188796, "learning_rate": 4.972983868559497e-06, "loss": 1.086, "step": 574 }, { "epoch": 0.7037943696450428, "grad_norm": 0.6507059382623177, "learning_rate": 4.9728898243486335e-06, "loss": 0.5705, "step": 575 }, { "epoch": 0.7050183598531212, "grad_norm": 0.5685690088846183, "learning_rate": 4.972795617628598e-06, "loss": 0.7152, "step": 576 }, { "epoch": 0.7062423500611995, "grad_norm": 0.5979174639210477, "learning_rate": 4.972701248405581e-06, "loss": 0.988, "step": 577 }, { "epoch": 0.7074663402692778, "grad_norm": 0.6342720690962881, "learning_rate": 4.972606716685784e-06, "loss": 1.0455, "step": 578 }, { "epoch": 0.7086903304773562, "grad_norm": 0.5674760213224617, "learning_rate": 4.9725120224754184e-06, "loss": 1.0505, "step": 579 }, { "epoch": 0.7099143206854345, "grad_norm": 0.5223678278906756, "learning_rate": 4.972417165780709e-06, "loss": 0.7916, "step": 580 }, { "epoch": 0.7111383108935129, "grad_norm": 0.7098437120298289, "learning_rate": 4.972322146607887e-06, "loss": 0.7439, "step": 581 }, { "epoch": 0.7123623011015912, "grad_norm": 0.5554196168253557, "learning_rate": 4.972226964963198e-06, "loss": 0.7848, "step": 582 }, { "epoch": 0.7135862913096696, "grad_norm": 0.8028778572167714, "learning_rate": 4.972131620852898e-06, "loss": 1.0801, "step": 583 }, { "epoch": 0.7148102815177478, "grad_norm": 0.8788064974366373, "learning_rate": 4.9720361142832516e-06, "loss": 1.1014, "step": 584 }, { "epoch": 0.7160342717258262, "grad_norm": 1.0507662863719238, "learning_rate": 4.9719404452605345e-06, "loss": 0.5231, "step": 585 }, { "epoch": 0.7172582619339045, "grad_norm": 0.8519649680014808, "learning_rate": 4.971844613791035e-06, "loss": 0.7638, "step": 586 }, { "epoch": 0.7184822521419829, "grad_norm": 0.8676498937975138, "learning_rate": 4.97174861988105e-06, "loss": 0.4428, "step": 587 }, { "epoch": 0.7197062423500612, "grad_norm": 0.39356823885394826, "learning_rate": 4.971652463536888e-06, "loss": 0.4902, "step": 588 }, { "epoch": 0.7209302325581395, "grad_norm": 0.7042322941656949, "learning_rate": 4.971556144764868e-06, "loss": 1.658, "step": 589 }, { "epoch": 0.7221542227662179, "grad_norm": 1.1854771058805402, "learning_rate": 4.97145966357132e-06, "loss": 0.8904, "step": 590 }, { "epoch": 0.7233782129742962, "grad_norm": 0.5708757192214396, "learning_rate": 4.971363019962584e-06, "loss": 0.7227, "step": 591 }, { "epoch": 0.7246022031823746, "grad_norm": 0.6772516152524453, "learning_rate": 4.971266213945012e-06, "loss": 0.4588, "step": 592 }, { "epoch": 0.7258261933904528, "grad_norm": 1.0012987474314943, "learning_rate": 4.971169245524964e-06, "loss": 0.6288, "step": 593 }, { "epoch": 0.7270501835985312, "grad_norm": 0.9484340869296382, "learning_rate": 4.971072114708813e-06, "loss": 0.581, "step": 594 }, { "epoch": 0.7282741738066095, "grad_norm": 0.615108701886359, "learning_rate": 4.970974821502944e-06, "loss": 0.6183, "step": 595 }, { "epoch": 0.7294981640146879, "grad_norm": 0.7012882258838486, "learning_rate": 4.970877365913749e-06, "loss": 0.5819, "step": 596 }, { "epoch": 0.7307221542227662, "grad_norm": 0.47208445383336833, "learning_rate": 4.970779747947632e-06, "loss": 0.5465, "step": 597 }, { "epoch": 0.7319461444308446, "grad_norm": 0.513142195829669, "learning_rate": 4.970681967611009e-06, "loss": 0.4998, "step": 598 }, { "epoch": 0.7331701346389229, "grad_norm": 0.4816548410982356, "learning_rate": 4.9705840249103054e-06, "loss": 0.6552, "step": 599 }, { "epoch": 0.7343941248470012, "grad_norm": 0.6431509174434921, "learning_rate": 4.970485919851958e-06, "loss": 1.3001, "step": 600 }, { "epoch": 0.7356181150550796, "grad_norm": 0.712352793323645, "learning_rate": 4.970387652442413e-06, "loss": 1.4916, "step": 601 }, { "epoch": 0.7368421052631579, "grad_norm": 1.0396576730053728, "learning_rate": 4.970289222688129e-06, "loss": 1.1944, "step": 602 }, { "epoch": 0.7380660954712362, "grad_norm": 0.7592000215963804, "learning_rate": 4.970190630595575e-06, "loss": 0.8748, "step": 603 }, { "epoch": 0.7392900856793145, "grad_norm": 0.9156707634423166, "learning_rate": 4.970091876171229e-06, "loss": 0.594, "step": 604 }, { "epoch": 0.7405140758873929, "grad_norm": 0.6451404632893534, "learning_rate": 4.969992959421581e-06, "loss": 0.6855, "step": 605 }, { "epoch": 0.7417380660954712, "grad_norm": 0.46130620716085075, "learning_rate": 4.969893880353132e-06, "loss": 0.6904, "step": 606 }, { "epoch": 0.7429620563035496, "grad_norm": 1.0270600784677892, "learning_rate": 4.969794638972392e-06, "loss": 0.7618, "step": 607 }, { "epoch": 0.7441860465116279, "grad_norm": 0.7836370519805019, "learning_rate": 4.9696952352858844e-06, "loss": 0.705, "step": 608 }, { "epoch": 0.7454100367197063, "grad_norm": 0.39752796690613, "learning_rate": 4.9695956693001394e-06, "loss": 0.5664, "step": 609 }, { "epoch": 0.7466340269277846, "grad_norm": 0.7742064722344367, "learning_rate": 4.969495941021702e-06, "loss": 0.6021, "step": 610 }, { "epoch": 0.7478580171358629, "grad_norm": 0.9269461702870032, "learning_rate": 4.969396050457126e-06, "loss": 1.3448, "step": 611 }, { "epoch": 0.7490820073439413, "grad_norm": 0.5962969508176195, "learning_rate": 4.969295997612974e-06, "loss": 0.5883, "step": 612 }, { "epoch": 0.7503059975520195, "grad_norm": 0.998690953858965, "learning_rate": 4.969195782495823e-06, "loss": 0.6252, "step": 613 }, { "epoch": 0.7515299877600979, "grad_norm": 0.6369734465150525, "learning_rate": 4.969095405112259e-06, "loss": 0.5243, "step": 614 }, { "epoch": 0.7527539779681762, "grad_norm": 0.5883419096865912, "learning_rate": 4.968994865468876e-06, "loss": 0.9644, "step": 615 }, { "epoch": 0.7539779681762546, "grad_norm": 0.6266375168065322, "learning_rate": 4.9688941635722835e-06, "loss": 0.6935, "step": 616 }, { "epoch": 0.7552019583843329, "grad_norm": 0.6529873979701071, "learning_rate": 4.968793299429098e-06, "loss": 1.3436, "step": 617 }, { "epoch": 0.7564259485924113, "grad_norm": 0.46924977448314387, "learning_rate": 4.968692273045949e-06, "loss": 0.779, "step": 618 }, { "epoch": 0.7576499388004896, "grad_norm": 0.4417891405151123, "learning_rate": 4.968591084429474e-06, "loss": 0.7529, "step": 619 }, { "epoch": 0.758873929008568, "grad_norm": 0.5595233063808529, "learning_rate": 4.968489733586324e-06, "loss": 0.5163, "step": 620 }, { "epoch": 0.7600979192166463, "grad_norm": 0.687497280904428, "learning_rate": 4.968388220523158e-06, "loss": 0.849, "step": 621 }, { "epoch": 0.7613219094247246, "grad_norm": 0.8279073121259894, "learning_rate": 4.96828654524665e-06, "loss": 0.6802, "step": 622 }, { "epoch": 0.762545899632803, "grad_norm": 0.4282792593944984, "learning_rate": 4.968184707763478e-06, "loss": 0.5985, "step": 623 }, { "epoch": 0.7637698898408812, "grad_norm": 0.7291351662089038, "learning_rate": 4.968082708080337e-06, "loss": 0.7812, "step": 624 }, { "epoch": 0.7649938800489596, "grad_norm": 1.1061164288585412, "learning_rate": 4.96798054620393e-06, "loss": 1.3159, "step": 625 }, { "epoch": 0.7662178702570379, "grad_norm": 1.2197055829355696, "learning_rate": 4.967878222140969e-06, "loss": 1.1424, "step": 626 }, { "epoch": 0.7674418604651163, "grad_norm": 0.6475891215948089, "learning_rate": 4.967775735898179e-06, "loss": 0.8342, "step": 627 }, { "epoch": 0.7686658506731946, "grad_norm": 1.0532057669413615, "learning_rate": 4.9676730874822965e-06, "loss": 0.6239, "step": 628 }, { "epoch": 0.769889840881273, "grad_norm": 0.809246997189568, "learning_rate": 4.967570276900065e-06, "loss": 0.6994, "step": 629 }, { "epoch": 0.7711138310893513, "grad_norm": 0.7892053930779026, "learning_rate": 4.967467304158242e-06, "loss": 0.9058, "step": 630 }, { "epoch": 0.7723378212974297, "grad_norm": 1.0314193793563253, "learning_rate": 4.967364169263594e-06, "loss": 1.1671, "step": 631 }, { "epoch": 0.773561811505508, "grad_norm": 0.9123532849570993, "learning_rate": 4.9672608722228995e-06, "loss": 0.525, "step": 632 }, { "epoch": 0.7747858017135862, "grad_norm": 0.8480696357200321, "learning_rate": 4.967157413042946e-06, "loss": 1.5879, "step": 633 }, { "epoch": 0.7760097919216646, "grad_norm": 0.8596720181825599, "learning_rate": 4.967053791730533e-06, "loss": 0.5371, "step": 634 }, { "epoch": 0.7772337821297429, "grad_norm": 0.6211059219747544, "learning_rate": 4.96695000829247e-06, "loss": 0.4747, "step": 635 }, { "epoch": 0.7784577723378213, "grad_norm": 0.4629490378032919, "learning_rate": 4.966846062735576e-06, "loss": 0.5684, "step": 636 }, { "epoch": 0.7796817625458996, "grad_norm": 0.806056164140906, "learning_rate": 4.966741955066684e-06, "loss": 0.6234, "step": 637 }, { "epoch": 0.780905752753978, "grad_norm": 0.7714680181902983, "learning_rate": 4.966637685292635e-06, "loss": 0.5484, "step": 638 }, { "epoch": 0.7821297429620563, "grad_norm": 0.9269158255118782, "learning_rate": 4.966533253420279e-06, "loss": 0.643, "step": 639 }, { "epoch": 0.7833537331701347, "grad_norm": 0.39727557307009653, "learning_rate": 4.966428659456482e-06, "loss": 0.5241, "step": 640 }, { "epoch": 0.784577723378213, "grad_norm": 0.5498888346071843, "learning_rate": 4.966323903408116e-06, "loss": 0.6624, "step": 641 }, { "epoch": 0.7858017135862914, "grad_norm": 0.9582460661242013, "learning_rate": 4.966218985282065e-06, "loss": 1.102, "step": 642 }, { "epoch": 0.7870257037943696, "grad_norm": 0.6924637960627974, "learning_rate": 4.966113905085225e-06, "loss": 1.4242, "step": 643 }, { "epoch": 0.7882496940024479, "grad_norm": 0.6435657697670896, "learning_rate": 4.9660086628245e-06, "loss": 0.7195, "step": 644 }, { "epoch": 0.7894736842105263, "grad_norm": 0.8642952500877826, "learning_rate": 4.965903258506806e-06, "loss": 1.5262, "step": 645 }, { "epoch": 0.7906976744186046, "grad_norm": 1.0681830827384629, "learning_rate": 4.9657976921390714e-06, "loss": 0.6774, "step": 646 }, { "epoch": 0.791921664626683, "grad_norm": 1.0524664740300034, "learning_rate": 4.965691963728233e-06, "loss": 1.0384, "step": 647 }, { "epoch": 0.7931456548347613, "grad_norm": 0.6254499843442785, "learning_rate": 4.965586073281239e-06, "loss": 0.7753, "step": 648 }, { "epoch": 0.7943696450428397, "grad_norm": 0.6386283497926499, "learning_rate": 4.965480020805047e-06, "loss": 0.6537, "step": 649 }, { "epoch": 0.795593635250918, "grad_norm": 0.5542214681334583, "learning_rate": 4.9653738063066285e-06, "loss": 0.4985, "step": 650 }, { "epoch": 0.7968176254589964, "grad_norm": 0.7385661588704525, "learning_rate": 4.965267429792961e-06, "loss": 0.7591, "step": 651 }, { "epoch": 0.7980416156670747, "grad_norm": 0.7988023399399428, "learning_rate": 4.965160891271037e-06, "loss": 1.4882, "step": 652 }, { "epoch": 0.799265605875153, "grad_norm": 0.6219625214624916, "learning_rate": 4.9650541907478566e-06, "loss": 0.5383, "step": 653 }, { "epoch": 0.8004895960832313, "grad_norm": 0.6750578805333489, "learning_rate": 4.964947328230433e-06, "loss": 0.7228, "step": 654 }, { "epoch": 0.8017135862913096, "grad_norm": 0.611696364581251, "learning_rate": 4.964840303725788e-06, "loss": 0.736, "step": 655 }, { "epoch": 0.802937576499388, "grad_norm": 0.8365574692074819, "learning_rate": 4.964733117240955e-06, "loss": 0.7894, "step": 656 }, { "epoch": 0.8041615667074663, "grad_norm": 0.37500351164507584, "learning_rate": 4.964625768782978e-06, "loss": 0.4694, "step": 657 }, { "epoch": 0.8053855569155447, "grad_norm": 0.8599796209464186, "learning_rate": 4.964518258358912e-06, "loss": 0.8291, "step": 658 }, { "epoch": 0.806609547123623, "grad_norm": 0.7475307753112096, "learning_rate": 4.964410585975821e-06, "loss": 1.5502, "step": 659 }, { "epoch": 0.8078335373317014, "grad_norm": 0.8015958302816727, "learning_rate": 4.964302751640782e-06, "loss": 1.326, "step": 660 }, { "epoch": 0.8090575275397797, "grad_norm": 1.0528153516904397, "learning_rate": 4.9641947553608805e-06, "loss": 1.0994, "step": 661 }, { "epoch": 0.8102815177478581, "grad_norm": 0.6757406701884192, "learning_rate": 4.964086597143214e-06, "loss": 0.816, "step": 662 }, { "epoch": 0.8115055079559363, "grad_norm": 0.6082119601165287, "learning_rate": 4.963978276994891e-06, "loss": 0.8588, "step": 663 }, { "epoch": 0.8127294981640147, "grad_norm": 0.42903926514342355, "learning_rate": 4.963869794923029e-06, "loss": 0.6817, "step": 664 }, { "epoch": 0.813953488372093, "grad_norm": 0.8183416215602448, "learning_rate": 4.963761150934757e-06, "loss": 0.8746, "step": 665 }, { "epoch": 0.8151774785801713, "grad_norm": 0.5790604514629629, "learning_rate": 4.963652345037215e-06, "loss": 0.5978, "step": 666 }, { "epoch": 0.8164014687882497, "grad_norm": 0.9841913474099444, "learning_rate": 4.963543377237554e-06, "loss": 0.8679, "step": 667 }, { "epoch": 0.817625458996328, "grad_norm": 0.5242588053675499, "learning_rate": 4.963434247542934e-06, "loss": 0.6791, "step": 668 }, { "epoch": 0.8188494492044064, "grad_norm": 0.6524840579850808, "learning_rate": 4.9633249559605264e-06, "loss": 0.9256, "step": 669 }, { "epoch": 0.8200734394124847, "grad_norm": 0.5638342011037669, "learning_rate": 4.963215502497514e-06, "loss": 0.9675, "step": 670 }, { "epoch": 0.8212974296205631, "grad_norm": 0.5582692750055275, "learning_rate": 4.96310588716109e-06, "loss": 0.8604, "step": 671 }, { "epoch": 0.8225214198286414, "grad_norm": 0.7894170377790417, "learning_rate": 4.9629961099584575e-06, "loss": 1.3202, "step": 672 }, { "epoch": 0.8237454100367197, "grad_norm": 0.768432594533552, "learning_rate": 4.96288617089683e-06, "loss": 0.4463, "step": 673 }, { "epoch": 0.824969400244798, "grad_norm": 0.5474461322631754, "learning_rate": 4.962776069983434e-06, "loss": 1.0516, "step": 674 }, { "epoch": 0.8261933904528764, "grad_norm": 0.9023793086012241, "learning_rate": 4.962665807225503e-06, "loss": 0.7088, "step": 675 }, { "epoch": 0.8274173806609547, "grad_norm": 0.8770622743672146, "learning_rate": 4.962555382630284e-06, "loss": 0.5896, "step": 676 }, { "epoch": 0.828641370869033, "grad_norm": 0.40190443269097986, "learning_rate": 4.962444796205034e-06, "loss": 0.4439, "step": 677 }, { "epoch": 0.8298653610771114, "grad_norm": 0.9237737974360988, "learning_rate": 4.96233404795702e-06, "loss": 0.6128, "step": 678 }, { "epoch": 0.8310893512851897, "grad_norm": 0.6992319859751572, "learning_rate": 4.962223137893518e-06, "loss": 1.3304, "step": 679 }, { "epoch": 0.8323133414932681, "grad_norm": 0.6258347473825795, "learning_rate": 4.96211206602182e-06, "loss": 0.5668, "step": 680 }, { "epoch": 0.8335373317013464, "grad_norm": 0.8470335439349213, "learning_rate": 4.962000832349224e-06, "loss": 0.522, "step": 681 }, { "epoch": 0.8347613219094248, "grad_norm": 1.1234994612112508, "learning_rate": 4.9618894368830396e-06, "loss": 0.4874, "step": 682 }, { "epoch": 0.835985312117503, "grad_norm": 1.0489500450141993, "learning_rate": 4.961777879630587e-06, "loss": 0.6158, "step": 683 }, { "epoch": 0.8372093023255814, "grad_norm": 0.6241119450309481, "learning_rate": 4.961666160599198e-06, "loss": 0.8178, "step": 684 }, { "epoch": 0.8384332925336597, "grad_norm": 1.0682003904914388, "learning_rate": 4.961554279796214e-06, "loss": 0.5875, "step": 685 }, { "epoch": 0.8396572827417381, "grad_norm": 0.6472953076353286, "learning_rate": 4.961442237228987e-06, "loss": 0.6897, "step": 686 }, { "epoch": 0.8408812729498164, "grad_norm": 0.485126674032805, "learning_rate": 4.961330032904881e-06, "loss": 0.9089, "step": 687 }, { "epoch": 0.8421052631578947, "grad_norm": 0.939181228389836, "learning_rate": 4.961217666831268e-06, "loss": 0.5114, "step": 688 }, { "epoch": 0.8433292533659731, "grad_norm": 0.7543209755588531, "learning_rate": 4.961105139015534e-06, "loss": 0.5583, "step": 689 }, { "epoch": 0.8445532435740514, "grad_norm": 0.4744987718552312, "learning_rate": 4.960992449465073e-06, "loss": 0.6766, "step": 690 }, { "epoch": 0.8457772337821298, "grad_norm": 0.8144338828937318, "learning_rate": 4.960879598187292e-06, "loss": 0.6966, "step": 691 }, { "epoch": 0.847001223990208, "grad_norm": 1.1445769205634282, "learning_rate": 4.960766585189604e-06, "loss": 0.5002, "step": 692 }, { "epoch": 0.8482252141982864, "grad_norm": 0.7189221539713921, "learning_rate": 4.960653410479439e-06, "loss": 0.586, "step": 693 }, { "epoch": 0.8494492044063647, "grad_norm": 0.48895865285309326, "learning_rate": 4.9605400740642336e-06, "loss": 0.667, "step": 694 }, { "epoch": 0.8506731946144431, "grad_norm": 0.777080565568276, "learning_rate": 4.960426575951434e-06, "loss": 1.2394, "step": 695 }, { "epoch": 0.8518971848225214, "grad_norm": 0.8822613054732716, "learning_rate": 4.960312916148502e-06, "loss": 0.5631, "step": 696 }, { "epoch": 0.8531211750305998, "grad_norm": 0.5824593370286995, "learning_rate": 4.960199094662904e-06, "loss": 0.424, "step": 697 }, { "epoch": 0.8543451652386781, "grad_norm": 0.6520130072079502, "learning_rate": 4.960085111502122e-06, "loss": 1.197, "step": 698 }, { "epoch": 0.8555691554467564, "grad_norm": 0.7655563199471871, "learning_rate": 4.959970966673645e-06, "loss": 0.5878, "step": 699 }, { "epoch": 0.8567931456548348, "grad_norm": 1.1527368782096508, "learning_rate": 4.9598566601849755e-06, "loss": 1.1473, "step": 700 }, { "epoch": 0.8580171358629131, "grad_norm": 0.8243418319838043, "learning_rate": 4.959742192043624e-06, "loss": 1.287, "step": 701 }, { "epoch": 0.8592411260709915, "grad_norm": 0.44765985850280393, "learning_rate": 4.959627562257113e-06, "loss": 0.7051, "step": 702 }, { "epoch": 0.8604651162790697, "grad_norm": 0.6765836162428761, "learning_rate": 4.959512770832977e-06, "loss": 0.608, "step": 703 }, { "epoch": 0.8616891064871481, "grad_norm": 0.7104304635049636, "learning_rate": 4.959397817778759e-06, "loss": 1.3051, "step": 704 }, { "epoch": 0.8629130966952264, "grad_norm": 0.7503986020799235, "learning_rate": 4.959282703102013e-06, "loss": 1.3645, "step": 705 }, { "epoch": 0.8641370869033048, "grad_norm": 1.0291781668201516, "learning_rate": 4.959167426810304e-06, "loss": 0.9358, "step": 706 }, { "epoch": 0.8653610771113831, "grad_norm": 1.236303290364792, "learning_rate": 4.959051988911208e-06, "loss": 0.7589, "step": 707 }, { "epoch": 0.8665850673194615, "grad_norm": 0.5529621778113976, "learning_rate": 4.95893638941231e-06, "loss": 0.3963, "step": 708 }, { "epoch": 0.8678090575275398, "grad_norm": 0.5655819564631934, "learning_rate": 4.958820628321207e-06, "loss": 0.7413, "step": 709 }, { "epoch": 0.8690330477356181, "grad_norm": 0.6326436098735134, "learning_rate": 4.958704705645508e-06, "loss": 0.6838, "step": 710 }, { "epoch": 0.8702570379436965, "grad_norm": 0.5610922186390335, "learning_rate": 4.958588621392829e-06, "loss": 0.6588, "step": 711 }, { "epoch": 0.8714810281517748, "grad_norm": 1.0853802862978965, "learning_rate": 4.9584723755708e-06, "loss": 1.0627, "step": 712 }, { "epoch": 0.8727050183598531, "grad_norm": 0.4181265139862361, "learning_rate": 4.95835596818706e-06, "loss": 0.4741, "step": 713 }, { "epoch": 0.8739290085679314, "grad_norm": 0.5697305138603498, "learning_rate": 4.958239399249257e-06, "loss": 0.4528, "step": 714 }, { "epoch": 0.8751529987760098, "grad_norm": 0.5409908090439541, "learning_rate": 4.958122668765055e-06, "loss": 0.7722, "step": 715 }, { "epoch": 0.8763769889840881, "grad_norm": 0.6114565244788789, "learning_rate": 4.9580057767421216e-06, "loss": 0.6247, "step": 716 }, { "epoch": 0.8776009791921665, "grad_norm": 0.5492951193229206, "learning_rate": 4.9578887231881415e-06, "loss": 0.8009, "step": 717 }, { "epoch": 0.8788249694002448, "grad_norm": 0.8082722482767791, "learning_rate": 4.957771508110804e-06, "loss": 0.536, "step": 718 }, { "epoch": 0.8800489596083231, "grad_norm": 1.1912454600581321, "learning_rate": 4.9576541315178155e-06, "loss": 0.8341, "step": 719 }, { "epoch": 0.8812729498164015, "grad_norm": 0.6627422431388482, "learning_rate": 4.957536593416887e-06, "loss": 0.5589, "step": 720 }, { "epoch": 0.8824969400244798, "grad_norm": 0.8281215538387402, "learning_rate": 4.957418893815743e-06, "loss": 0.8272, "step": 721 }, { "epoch": 0.8837209302325582, "grad_norm": 0.2322118039477947, "learning_rate": 4.95730103272212e-06, "loss": 0.1485, "step": 722 }, { "epoch": 0.8849449204406364, "grad_norm": 0.771347945813748, "learning_rate": 4.95718301014376e-06, "loss": 1.3325, "step": 723 }, { "epoch": 0.8861689106487148, "grad_norm": 1.0376997785252853, "learning_rate": 4.957064826088422e-06, "loss": 0.3694, "step": 724 }, { "epoch": 0.8873929008567931, "grad_norm": 0.45775974748006704, "learning_rate": 4.956946480563872e-06, "loss": 0.6373, "step": 725 }, { "epoch": 0.8886168910648715, "grad_norm": 0.8508035796461484, "learning_rate": 4.956827973577887e-06, "loss": 0.9275, "step": 726 }, { "epoch": 0.8898408812729498, "grad_norm": 0.46320961532413946, "learning_rate": 4.956709305138254e-06, "loss": 0.6821, "step": 727 }, { "epoch": 0.8910648714810282, "grad_norm": 0.9018089617689261, "learning_rate": 4.956590475252773e-06, "loss": 1.1907, "step": 728 }, { "epoch": 0.8922888616891065, "grad_norm": 0.503750120577611, "learning_rate": 4.956471483929252e-06, "loss": 0.6648, "step": 729 }, { "epoch": 0.8935128518971848, "grad_norm": 0.8408772095120547, "learning_rate": 4.95635233117551e-06, "loss": 1.0152, "step": 730 }, { "epoch": 0.8947368421052632, "grad_norm": 0.5631755390842799, "learning_rate": 4.956233016999379e-06, "loss": 0.7104, "step": 731 }, { "epoch": 0.8959608323133414, "grad_norm": 0.8712703886908723, "learning_rate": 4.956113541408699e-06, "loss": 1.0763, "step": 732 }, { "epoch": 0.8971848225214198, "grad_norm": 0.512408541132229, "learning_rate": 4.955993904411322e-06, "loss": 0.7247, "step": 733 }, { "epoch": 0.8984088127294981, "grad_norm": 0.6824974015011425, "learning_rate": 4.9558741060151095e-06, "loss": 0.9624, "step": 734 }, { "epoch": 0.8996328029375765, "grad_norm": 0.5183321760850187, "learning_rate": 4.955754146227934e-06, "loss": 0.6987, "step": 735 }, { "epoch": 0.9008567931456548, "grad_norm": 1.0467640809491838, "learning_rate": 4.955634025057679e-06, "loss": 0.5277, "step": 736 }, { "epoch": 0.9020807833537332, "grad_norm": 0.5811866370147754, "learning_rate": 4.95551374251224e-06, "loss": 1.169, "step": 737 }, { "epoch": 0.9033047735618115, "grad_norm": 0.6467966511771853, "learning_rate": 4.955393298599519e-06, "loss": 0.6356, "step": 738 }, { "epoch": 0.9045287637698899, "grad_norm": 0.705212707899457, "learning_rate": 4.955272693327432e-06, "loss": 0.801, "step": 739 }, { "epoch": 0.9057527539779682, "grad_norm": 0.628522507726457, "learning_rate": 4.955151926703906e-06, "loss": 1.006, "step": 740 }, { "epoch": 0.9069767441860465, "grad_norm": 0.6229832723062967, "learning_rate": 4.955030998736876e-06, "loss": 0.3779, "step": 741 }, { "epoch": 0.9082007343941249, "grad_norm": 0.7466982422594095, "learning_rate": 4.95490990943429e-06, "loss": 0.5944, "step": 742 }, { "epoch": 0.9094247246022031, "grad_norm": 0.6324108113406297, "learning_rate": 4.954788658804104e-06, "loss": 0.7041, "step": 743 }, { "epoch": 0.9106487148102815, "grad_norm": 0.7062328351787057, "learning_rate": 4.954667246854287e-06, "loss": 1.171, "step": 744 }, { "epoch": 0.9118727050183598, "grad_norm": 0.6043655151870077, "learning_rate": 4.954545673592818e-06, "loss": 0.7547, "step": 745 }, { "epoch": 0.9130966952264382, "grad_norm": 0.6288704008008857, "learning_rate": 4.954423939027685e-06, "loss": 0.6992, "step": 746 }, { "epoch": 0.9143206854345165, "grad_norm": 0.56448098498115, "learning_rate": 4.95430204316689e-06, "loss": 0.5258, "step": 747 }, { "epoch": 0.9155446756425949, "grad_norm": 0.953467737764038, "learning_rate": 4.954179986018442e-06, "loss": 0.6948, "step": 748 }, { "epoch": 0.9167686658506732, "grad_norm": 0.6567046061576766, "learning_rate": 4.954057767590364e-06, "loss": 0.6245, "step": 749 }, { "epoch": 0.9179926560587516, "grad_norm": 1.003548014952242, "learning_rate": 4.9539353878906855e-06, "loss": 0.657, "step": 750 }, { "epoch": 0.9192166462668299, "grad_norm": 0.7329053091410532, "learning_rate": 4.9538128469274495e-06, "loss": 0.406, "step": 751 }, { "epoch": 0.9204406364749081, "grad_norm": 0.9746605148531934, "learning_rate": 4.9536901447087094e-06, "loss": 0.7816, "step": 752 }, { "epoch": 0.9216646266829865, "grad_norm": 0.7637650437816885, "learning_rate": 4.95356728124253e-06, "loss": 0.6554, "step": 753 }, { "epoch": 0.9228886168910648, "grad_norm": 0.6660208266325064, "learning_rate": 4.953444256536982e-06, "loss": 0.6415, "step": 754 }, { "epoch": 0.9241126070991432, "grad_norm": 0.9819341653189343, "learning_rate": 4.9533210706001526e-06, "loss": 1.5814, "step": 755 }, { "epoch": 0.9253365973072215, "grad_norm": 0.9377834858093372, "learning_rate": 4.9531977234401376e-06, "loss": 0.7036, "step": 756 }, { "epoch": 0.9265605875152999, "grad_norm": 0.7616575323489688, "learning_rate": 4.9530742150650405e-06, "loss": 0.4911, "step": 757 }, { "epoch": 0.9277845777233782, "grad_norm": 0.6953791207814416, "learning_rate": 4.95295054548298e-06, "loss": 1.65, "step": 758 }, { "epoch": 0.9290085679314566, "grad_norm": 1.1387378973646702, "learning_rate": 4.952826714702082e-06, "loss": 0.5804, "step": 759 }, { "epoch": 0.9302325581395349, "grad_norm": 0.8861422220151547, "learning_rate": 4.952702722730486e-06, "loss": 0.5614, "step": 760 }, { "epoch": 0.9314565483476133, "grad_norm": 0.7418298836843789, "learning_rate": 4.9525785695763385e-06, "loss": 0.7323, "step": 761 }, { "epoch": 0.9326805385556916, "grad_norm": 1.0368621356180565, "learning_rate": 4.952454255247798e-06, "loss": 1.2654, "step": 762 }, { "epoch": 0.9339045287637698, "grad_norm": 0.744680815090098, "learning_rate": 4.952329779753036e-06, "loss": 0.5789, "step": 763 }, { "epoch": 0.9351285189718482, "grad_norm": 0.7435190315239415, "learning_rate": 4.952205143100231e-06, "loss": 0.6035, "step": 764 }, { "epoch": 0.9363525091799265, "grad_norm": 0.9180800699512491, "learning_rate": 4.952080345297573e-06, "loss": 0.8417, "step": 765 }, { "epoch": 0.9375764993880049, "grad_norm": 0.5401721647431306, "learning_rate": 4.951955386353265e-06, "loss": 0.824, "step": 766 }, { "epoch": 0.9388004895960832, "grad_norm": 0.8520554366017021, "learning_rate": 4.951830266275519e-06, "loss": 0.746, "step": 767 }, { "epoch": 0.9400244798041616, "grad_norm": 0.41268144712817595, "learning_rate": 4.951704985072556e-06, "loss": 0.5081, "step": 768 }, { "epoch": 0.9412484700122399, "grad_norm": 0.7399246329327385, "learning_rate": 4.95157954275261e-06, "loss": 0.8574, "step": 769 }, { "epoch": 0.9424724602203183, "grad_norm": 0.6699501069245549, "learning_rate": 4.951453939323924e-06, "loss": 0.7014, "step": 770 }, { "epoch": 0.9436964504283966, "grad_norm": 0.7953659085232874, "learning_rate": 4.951328174794752e-06, "loss": 0.7177, "step": 771 }, { "epoch": 0.944920440636475, "grad_norm": 1.4365935603083932, "learning_rate": 4.951202249173359e-06, "loss": 0.9069, "step": 772 }, { "epoch": 0.9461444308445532, "grad_norm": 0.9958209988883617, "learning_rate": 4.951076162468022e-06, "loss": 1.154, "step": 773 }, { "epoch": 0.9473684210526315, "grad_norm": 0.8172148241879345, "learning_rate": 4.950949914687024e-06, "loss": 1.0645, "step": 774 }, { "epoch": 0.9485924112607099, "grad_norm": 0.6654613847396502, "learning_rate": 4.9508235058386635e-06, "loss": 0.5749, "step": 775 }, { "epoch": 0.9498164014687882, "grad_norm": 0.349047639859916, "learning_rate": 4.950696935931247e-06, "loss": 0.2597, "step": 776 }, { "epoch": 0.9510403916768666, "grad_norm": 0.6259195946879196, "learning_rate": 4.950570204973092e-06, "loss": 0.9118, "step": 777 }, { "epoch": 0.9522643818849449, "grad_norm": 0.7726247437629674, "learning_rate": 4.950443312972528e-06, "loss": 1.4145, "step": 778 }, { "epoch": 0.9534883720930233, "grad_norm": 1.2905346863679434, "learning_rate": 4.950316259937893e-06, "loss": 0.5826, "step": 779 }, { "epoch": 0.9547123623011016, "grad_norm": 0.9103500823309562, "learning_rate": 4.950189045877536e-06, "loss": 0.5736, "step": 780 }, { "epoch": 0.95593635250918, "grad_norm": 0.6626792535247903, "learning_rate": 4.950061670799818e-06, "loss": 0.5445, "step": 781 }, { "epoch": 0.9571603427172583, "grad_norm": 1.0176661356963794, "learning_rate": 4.949934134713109e-06, "loss": 1.144, "step": 782 }, { "epoch": 0.9583843329253366, "grad_norm": 1.0613860820373697, "learning_rate": 4.9498064376257905e-06, "loss": 1.1083, "step": 783 }, { "epoch": 0.9596083231334149, "grad_norm": 0.8661539175464111, "learning_rate": 4.949678579546254e-06, "loss": 0.8527, "step": 784 }, { "epoch": 0.9608323133414932, "grad_norm": 1.0089892450035782, "learning_rate": 4.949550560482902e-06, "loss": 1.2164, "step": 785 }, { "epoch": 0.9620563035495716, "grad_norm": 0.6471811639787152, "learning_rate": 4.949422380444147e-06, "loss": 0.67, "step": 786 }, { "epoch": 0.9632802937576499, "grad_norm": 1.3305504669122221, "learning_rate": 4.9492940394384135e-06, "loss": 0.7003, "step": 787 }, { "epoch": 0.9645042839657283, "grad_norm": 0.6822602927488968, "learning_rate": 4.949165537474134e-06, "loss": 0.5909, "step": 788 }, { "epoch": 0.9657282741738066, "grad_norm": 1.265269610791293, "learning_rate": 4.949036874559755e-06, "loss": 0.5821, "step": 789 }, { "epoch": 0.966952264381885, "grad_norm": 0.5048014197446955, "learning_rate": 4.9489080507037314e-06, "loss": 0.4543, "step": 790 }, { "epoch": 0.9681762545899633, "grad_norm": 0.5077967861665366, "learning_rate": 4.9487790659145285e-06, "loss": 0.5157, "step": 791 }, { "epoch": 0.9694002447980417, "grad_norm": 1.1738487056815181, "learning_rate": 4.9486499202006225e-06, "loss": 0.5897, "step": 792 }, { "epoch": 0.9706242350061199, "grad_norm": 0.8593748606891259, "learning_rate": 4.9485206135705e-06, "loss": 0.659, "step": 793 }, { "epoch": 0.9718482252141983, "grad_norm": 0.9106326648832945, "learning_rate": 4.94839114603266e-06, "loss": 0.5134, "step": 794 }, { "epoch": 0.9730722154222766, "grad_norm": 1.1073536618977897, "learning_rate": 4.94826151759561e-06, "loss": 0.5198, "step": 795 }, { "epoch": 0.9742962056303549, "grad_norm": 0.774719335187564, "learning_rate": 4.948131728267868e-06, "loss": 0.6796, "step": 796 }, { "epoch": 0.9755201958384333, "grad_norm": 0.5551126390132856, "learning_rate": 4.948001778057964e-06, "loss": 0.7378, "step": 797 }, { "epoch": 0.9767441860465116, "grad_norm": 0.9278909177767704, "learning_rate": 4.947871666974438e-06, "loss": 0.527, "step": 798 }, { "epoch": 0.97796817625459, "grad_norm": 0.6459144722406741, "learning_rate": 4.94774139502584e-06, "loss": 0.6721, "step": 799 }, { "epoch": 0.9791921664626683, "grad_norm": 0.7560365405148642, "learning_rate": 4.9476109622207305e-06, "loss": 0.5276, "step": 800 }, { "epoch": 0.9804161566707467, "grad_norm": 0.9382628125879249, "learning_rate": 4.947480368567682e-06, "loss": 0.462, "step": 801 }, { "epoch": 0.981640146878825, "grad_norm": 0.7358258340675429, "learning_rate": 4.9473496140752765e-06, "loss": 1.6796, "step": 802 }, { "epoch": 0.9828641370869033, "grad_norm": 0.8665619231728571, "learning_rate": 4.9472186987521065e-06, "loss": 1.6072, "step": 803 }, { "epoch": 0.9840881272949816, "grad_norm": 0.8872786972909009, "learning_rate": 4.947087622606775e-06, "loss": 0.9852, "step": 804 }, { "epoch": 0.98531211750306, "grad_norm": 0.7688120320974523, "learning_rate": 4.946956385647896e-06, "loss": 1.2714, "step": 805 }, { "epoch": 0.9865361077111383, "grad_norm": 0.7701508714382308, "learning_rate": 4.946824987884095e-06, "loss": 0.6108, "step": 806 }, { "epoch": 0.9877600979192166, "grad_norm": 0.5248408647757767, "learning_rate": 4.946693429324005e-06, "loss": 0.5792, "step": 807 }, { "epoch": 0.988984088127295, "grad_norm": 0.9737646168351266, "learning_rate": 4.9465617099762725e-06, "loss": 0.5862, "step": 808 }, { "epoch": 0.9902080783353733, "grad_norm": 0.7606404708012034, "learning_rate": 4.946429829849554e-06, "loss": 0.6566, "step": 809 }, { "epoch": 0.9914320685434517, "grad_norm": 0.5894997176752026, "learning_rate": 4.946297788952515e-06, "loss": 0.4674, "step": 810 }, { "epoch": 0.99265605875153, "grad_norm": 0.7956348703033926, "learning_rate": 4.946165587293835e-06, "loss": 0.6732, "step": 811 }, { "epoch": 0.9938800489596084, "grad_norm": 0.9052988526417667, "learning_rate": 4.946033224882199e-06, "loss": 1.6674, "step": 812 }, { "epoch": 0.9951040391676866, "grad_norm": 0.8035133481225467, "learning_rate": 4.945900701726307e-06, "loss": 0.5189, "step": 813 }, { "epoch": 0.996328029375765, "grad_norm": 0.8606738785392671, "learning_rate": 4.945768017834868e-06, "loss": 0.6192, "step": 814 }, { "epoch": 0.9975520195838433, "grad_norm": 0.6238537090922615, "learning_rate": 4.945635173216602e-06, "loss": 0.6999, "step": 815 }, { "epoch": 0.9987760097919217, "grad_norm": 0.6203579342173314, "learning_rate": 4.945502167880236e-06, "loss": 0.7971, "step": 816 }, { "epoch": 1.0, "grad_norm": 0.5693544736753935, "learning_rate": 4.9453690018345144e-06, "loss": 0.6203, "step": 817 }, { "epoch": 1.0012239902080784, "grad_norm": 0.8661940272854449, "learning_rate": 4.945235675088187e-06, "loss": 0.6973, "step": 818 }, { "epoch": 1.0024479804161566, "grad_norm": 0.707151983898238, "learning_rate": 4.945102187650015e-06, "loss": 0.836, "step": 819 }, { "epoch": 1.003671970624235, "grad_norm": 0.6446105970479922, "learning_rate": 4.94496853952877e-06, "loss": 0.9218, "step": 820 }, { "epoch": 1.0048959608323134, "grad_norm": 0.5477801538704142, "learning_rate": 4.944834730733237e-06, "loss": 0.4513, "step": 821 }, { "epoch": 1.0061199510403918, "grad_norm": 0.6368417317347421, "learning_rate": 4.9447007612722076e-06, "loss": 0.9051, "step": 822 }, { "epoch": 1.00734394124847, "grad_norm": 0.7613256766593812, "learning_rate": 4.944566631154487e-06, "loss": 0.6626, "step": 823 }, { "epoch": 1.0085679314565483, "grad_norm": 0.9616628961324019, "learning_rate": 4.944432340388889e-06, "loss": 0.7408, "step": 824 }, { "epoch": 1.0097919216646267, "grad_norm": 0.8207080720176745, "learning_rate": 4.944297888984239e-06, "loss": 0.5055, "step": 825 }, { "epoch": 1.0110159118727051, "grad_norm": 0.6978541379200914, "learning_rate": 4.944163276949373e-06, "loss": 1.6658, "step": 826 }, { "epoch": 1.0122399020807833, "grad_norm": 0.40682640339588866, "learning_rate": 4.944028504293136e-06, "loss": 0.4815, "step": 827 }, { "epoch": 1.0134638922888617, "grad_norm": 0.5904825770432894, "learning_rate": 4.943893571024386e-06, "loss": 0.5393, "step": 828 }, { "epoch": 1.01468788249694, "grad_norm": 0.8637420173350128, "learning_rate": 4.94375847715199e-06, "loss": 0.7088, "step": 829 }, { "epoch": 1.0159118727050183, "grad_norm": 0.7083996965236148, "learning_rate": 4.9436232226848254e-06, "loss": 0.8026, "step": 830 }, { "epoch": 1.0171358629130967, "grad_norm": 1.1715644372132843, "learning_rate": 4.94348780763178e-06, "loss": 1.1236, "step": 831 }, { "epoch": 1.018359853121175, "grad_norm": 0.4170623593069568, "learning_rate": 4.943352232001755e-06, "loss": 0.5134, "step": 832 }, { "epoch": 1.0195838433292534, "grad_norm": 1.1046852518147066, "learning_rate": 4.943216495803659e-06, "loss": 1.5398, "step": 833 }, { "epoch": 1.0208078335373316, "grad_norm": 0.6988946062657729, "learning_rate": 4.943080599046411e-06, "loss": 0.5888, "step": 834 }, { "epoch": 1.02203182374541, "grad_norm": 0.6141841076150183, "learning_rate": 4.9429445417389425e-06, "loss": 0.4347, "step": 835 }, { "epoch": 1.0232558139534884, "grad_norm": 0.7861586283093154, "learning_rate": 4.9428083238901945e-06, "loss": 0.795, "step": 836 }, { "epoch": 1.0244798041615668, "grad_norm": 0.4737187818914004, "learning_rate": 4.942671945509119e-06, "loss": 0.6727, "step": 837 }, { "epoch": 1.025703794369645, "grad_norm": 0.8658438555873381, "learning_rate": 4.942535406604678e-06, "loss": 1.1304, "step": 838 }, { "epoch": 1.0269277845777234, "grad_norm": 1.2823582828519138, "learning_rate": 4.942398707185844e-06, "loss": 0.8813, "step": 839 }, { "epoch": 1.0281517747858018, "grad_norm": 0.8294051139830289, "learning_rate": 4.9422618472616005e-06, "loss": 0.5225, "step": 840 }, { "epoch": 1.02937576499388, "grad_norm": 1.1458218559403601, "learning_rate": 4.9421248268409425e-06, "loss": 0.67, "step": 841 }, { "epoch": 1.0305997552019583, "grad_norm": 0.9507148784282786, "learning_rate": 4.941987645932873e-06, "loss": 0.728, "step": 842 }, { "epoch": 1.0318237454100367, "grad_norm": 1.0953952073990505, "learning_rate": 4.9418503045464085e-06, "loss": 0.681, "step": 843 }, { "epoch": 1.0330477356181151, "grad_norm": 0.9183838020477175, "learning_rate": 4.941712802690573e-06, "loss": 0.9376, "step": 844 }, { "epoch": 1.0342717258261933, "grad_norm": 1.131657359103838, "learning_rate": 4.941575140374404e-06, "loss": 1.0094, "step": 845 }, { "epoch": 1.0354957160342717, "grad_norm": 0.4836443208439507, "learning_rate": 4.941437317606948e-06, "loss": 0.697, "step": 846 }, { "epoch": 1.03671970624235, "grad_norm": 0.5312915295698105, "learning_rate": 4.94129933439726e-06, "loss": 0.6277, "step": 847 }, { "epoch": 1.0379436964504285, "grad_norm": 0.9088395020591802, "learning_rate": 4.941161190754411e-06, "loss": 2.0003, "step": 848 }, { "epoch": 1.0391676866585067, "grad_norm": 0.7397669026028801, "learning_rate": 4.941022886687477e-06, "loss": 0.9658, "step": 849 }, { "epoch": 1.040391676866585, "grad_norm": 0.9475645784237857, "learning_rate": 4.940884422205548e-06, "loss": 1.502, "step": 850 }, { "epoch": 1.0416156670746635, "grad_norm": 1.2027277459322063, "learning_rate": 4.940745797317723e-06, "loss": 0.8304, "step": 851 }, { "epoch": 1.0428396572827416, "grad_norm": 1.4366594569596243, "learning_rate": 4.9406070120331116e-06, "loss": 0.636, "step": 852 }, { "epoch": 1.04406364749082, "grad_norm": 0.4464224155188248, "learning_rate": 4.940468066360835e-06, "loss": 0.4806, "step": 853 }, { "epoch": 1.0452876376988984, "grad_norm": 1.3012907038125006, "learning_rate": 4.940328960310023e-06, "loss": 0.7299, "step": 854 }, { "epoch": 1.0465116279069768, "grad_norm": 0.7355836555123412, "learning_rate": 4.940189693889819e-06, "loss": 0.6743, "step": 855 }, { "epoch": 1.047735618115055, "grad_norm": 0.6992330112851228, "learning_rate": 4.940050267109373e-06, "loss": 0.7331, "step": 856 }, { "epoch": 1.0489596083231334, "grad_norm": 0.6033412559576553, "learning_rate": 4.939910679977848e-06, "loss": 0.6466, "step": 857 }, { "epoch": 1.0501835985312118, "grad_norm": 0.7488411337841201, "learning_rate": 4.939770932504419e-06, "loss": 0.6794, "step": 858 }, { "epoch": 1.0514075887392902, "grad_norm": 0.5800918156185451, "learning_rate": 4.9396310246982685e-06, "loss": 0.6641, "step": 859 }, { "epoch": 1.0526315789473684, "grad_norm": 0.7551573359193208, "learning_rate": 4.939490956568589e-06, "loss": 1.195, "step": 860 }, { "epoch": 1.0538555691554468, "grad_norm": 0.9180580663687439, "learning_rate": 4.939350728124588e-06, "loss": 0.547, "step": 861 }, { "epoch": 1.0550795593635252, "grad_norm": 0.812652605834241, "learning_rate": 4.93921033937548e-06, "loss": 0.6669, "step": 862 }, { "epoch": 1.0563035495716033, "grad_norm": 0.6000139939882762, "learning_rate": 4.93906979033049e-06, "loss": 0.6664, "step": 863 }, { "epoch": 1.0575275397796817, "grad_norm": 0.9157930175254086, "learning_rate": 4.938929080998856e-06, "loss": 0.6129, "step": 864 }, { "epoch": 1.0587515299877601, "grad_norm": 0.794185060925577, "learning_rate": 4.938788211389822e-06, "loss": 1.1158, "step": 865 }, { "epoch": 1.0599755201958385, "grad_norm": 0.8064743589840888, "learning_rate": 4.938647181512648e-06, "loss": 0.6668, "step": 866 }, { "epoch": 1.0611995104039167, "grad_norm": 0.9588730797191946, "learning_rate": 4.938505991376601e-06, "loss": 1.1179, "step": 867 }, { "epoch": 1.062423500611995, "grad_norm": 0.6374079362232818, "learning_rate": 4.9383646409909604e-06, "loss": 0.6922, "step": 868 }, { "epoch": 1.0636474908200735, "grad_norm": 1.0498529267851326, "learning_rate": 4.938223130365014e-06, "loss": 1.0353, "step": 869 }, { "epoch": 1.0648714810281519, "grad_norm": 0.8120040043969616, "learning_rate": 4.938081459508061e-06, "loss": 1.3931, "step": 870 }, { "epoch": 1.06609547123623, "grad_norm": 0.9150882229283052, "learning_rate": 4.9379396284294135e-06, "loss": 0.9144, "step": 871 }, { "epoch": 1.0673194614443084, "grad_norm": 0.7500586163669113, "learning_rate": 4.937797637138391e-06, "loss": 0.4016, "step": 872 }, { "epoch": 1.0685434516523868, "grad_norm": 0.8078203460896909, "learning_rate": 4.937655485644324e-06, "loss": 0.4653, "step": 873 }, { "epoch": 1.069767441860465, "grad_norm": 0.8067060929128912, "learning_rate": 4.937513173956554e-06, "loss": 0.4519, "step": 874 }, { "epoch": 1.0709914320685434, "grad_norm": 0.6180085262754264, "learning_rate": 4.937370702084435e-06, "loss": 0.8411, "step": 875 }, { "epoch": 1.0722154222766218, "grad_norm": 1.1061653049333586, "learning_rate": 4.937228070037329e-06, "loss": 0.9889, "step": 876 }, { "epoch": 1.0734394124847002, "grad_norm": 1.1043132113659924, "learning_rate": 4.937085277824608e-06, "loss": 1.071, "step": 877 }, { "epoch": 1.0746634026927784, "grad_norm": 0.6688194414997779, "learning_rate": 4.936942325455657e-06, "loss": 1.1218, "step": 878 }, { "epoch": 1.0758873929008568, "grad_norm": 1.3979019482131052, "learning_rate": 4.93679921293987e-06, "loss": 0.6765, "step": 879 }, { "epoch": 1.0771113831089352, "grad_norm": 0.8891508978142024, "learning_rate": 4.936655940286652e-06, "loss": 0.7434, "step": 880 }, { "epoch": 1.0783353733170133, "grad_norm": 0.6170598773984476, "learning_rate": 4.936512507505418e-06, "loss": 0.7343, "step": 881 }, { "epoch": 1.0795593635250917, "grad_norm": 0.6601043549706049, "learning_rate": 4.936368914605593e-06, "loss": 0.6579, "step": 882 }, { "epoch": 1.0807833537331701, "grad_norm": 0.797528036624534, "learning_rate": 4.9362251615966165e-06, "loss": 0.5999, "step": 883 }, { "epoch": 1.0820073439412485, "grad_norm": 0.7302283080473125, "learning_rate": 4.936081248487932e-06, "loss": 0.4804, "step": 884 }, { "epoch": 1.0832313341493267, "grad_norm": 0.8000642381307068, "learning_rate": 4.935937175288999e-06, "loss": 0.6468, "step": 885 }, { "epoch": 1.084455324357405, "grad_norm": 0.7482638787732815, "learning_rate": 4.935792942009285e-06, "loss": 0.6066, "step": 886 }, { "epoch": 1.0856793145654835, "grad_norm": 0.7839314095512564, "learning_rate": 4.9356485486582676e-06, "loss": 0.5721, "step": 887 }, { "epoch": 1.086903304773562, "grad_norm": 0.5233299131994322, "learning_rate": 4.935503995245435e-06, "loss": 0.4908, "step": 888 }, { "epoch": 1.08812729498164, "grad_norm": 0.6099284488229789, "learning_rate": 4.93535928178029e-06, "loss": 0.4673, "step": 889 }, { "epoch": 1.0893512851897185, "grad_norm": 0.48434984226035793, "learning_rate": 4.935214408272341e-06, "loss": 0.6289, "step": 890 }, { "epoch": 1.0905752753977969, "grad_norm": 0.7076133083344578, "learning_rate": 4.935069374731108e-06, "loss": 0.4804, "step": 891 }, { "epoch": 1.091799265605875, "grad_norm": 0.63015760369857, "learning_rate": 4.934924181166121e-06, "loss": 0.692, "step": 892 }, { "epoch": 1.0930232558139534, "grad_norm": 0.722134084925986, "learning_rate": 4.9347788275869245e-06, "loss": 1.3578, "step": 893 }, { "epoch": 1.0942472460220318, "grad_norm": 0.8147020330197138, "learning_rate": 4.9346333140030685e-06, "loss": 0.8277, "step": 894 }, { "epoch": 1.0954712362301102, "grad_norm": 0.6722225050176123, "learning_rate": 4.934487640424116e-06, "loss": 0.5787, "step": 895 }, { "epoch": 1.0966952264381884, "grad_norm": 1.1436531291375829, "learning_rate": 4.934341806859641e-06, "loss": 1.1474, "step": 896 }, { "epoch": 1.0979192166462668, "grad_norm": 0.4830185150974974, "learning_rate": 4.934195813319226e-06, "loss": 0.5948, "step": 897 }, { "epoch": 1.0991432068543452, "grad_norm": 1.0204265174128586, "learning_rate": 4.934049659812466e-06, "loss": 0.6106, "step": 898 }, { "epoch": 1.1003671970624236, "grad_norm": 0.717775102907032, "learning_rate": 4.933903346348965e-06, "loss": 1.1727, "step": 899 }, { "epoch": 1.1015911872705018, "grad_norm": 1.183887331450676, "learning_rate": 4.933756872938338e-06, "loss": 0.7355, "step": 900 }, { "epoch": 1.1028151774785802, "grad_norm": 0.6266520398342849, "learning_rate": 4.933610239590212e-06, "loss": 0.5325, "step": 901 }, { "epoch": 1.1040391676866586, "grad_norm": 0.7839937195979166, "learning_rate": 4.933463446314221e-06, "loss": 0.8373, "step": 902 }, { "epoch": 1.1052631578947367, "grad_norm": 1.2403158864800392, "learning_rate": 4.933316493120015e-06, "loss": 0.5419, "step": 903 }, { "epoch": 1.1064871481028151, "grad_norm": 1.0968796147664983, "learning_rate": 4.933169380017249e-06, "loss": 1.1853, "step": 904 }, { "epoch": 1.1077111383108935, "grad_norm": 0.9760553254367574, "learning_rate": 4.9330221070155895e-06, "loss": 1.2385, "step": 905 }, { "epoch": 1.108935128518972, "grad_norm": 0.5933915925866321, "learning_rate": 4.932874674124717e-06, "loss": 0.9547, "step": 906 }, { "epoch": 1.11015911872705, "grad_norm": 0.5260163594124148, "learning_rate": 4.9327270813543195e-06, "loss": 0.7735, "step": 907 }, { "epoch": 1.1113831089351285, "grad_norm": 0.7198693072522813, "learning_rate": 4.932579328714097e-06, "loss": 0.6696, "step": 908 }, { "epoch": 1.1126070991432069, "grad_norm": 0.8423682346092232, "learning_rate": 4.932431416213757e-06, "loss": 0.5328, "step": 909 }, { "epoch": 1.1138310893512853, "grad_norm": 0.7805678363954234, "learning_rate": 4.932283343863023e-06, "loss": 1.0152, "step": 910 }, { "epoch": 1.1150550795593634, "grad_norm": 0.8329413704405583, "learning_rate": 4.932135111671622e-06, "loss": 1.1024, "step": 911 }, { "epoch": 1.1162790697674418, "grad_norm": 0.5943061743713192, "learning_rate": 4.931986719649298e-06, "loss": 0.4314, "step": 912 }, { "epoch": 1.1175030599755202, "grad_norm": 0.794108142697148, "learning_rate": 4.931838167805804e-06, "loss": 1.7536, "step": 913 }, { "epoch": 1.1187270501835984, "grad_norm": 0.746695015450399, "learning_rate": 4.9316894561508985e-06, "loss": 1.2597, "step": 914 }, { "epoch": 1.1199510403916768, "grad_norm": 0.6997059029735759, "learning_rate": 4.931540584694356e-06, "loss": 0.4579, "step": 915 }, { "epoch": 1.1211750305997552, "grad_norm": 0.6928252230203336, "learning_rate": 4.931391553445961e-06, "loss": 0.5626, "step": 916 }, { "epoch": 1.1223990208078336, "grad_norm": 1.0488506948189316, "learning_rate": 4.931242362415506e-06, "loss": 0.7599, "step": 917 }, { "epoch": 1.1236230110159118, "grad_norm": 0.46909806642517743, "learning_rate": 4.931093011612795e-06, "loss": 0.4386, "step": 918 }, { "epoch": 1.1248470012239902, "grad_norm": 1.0453864809975137, "learning_rate": 4.930943501047644e-06, "loss": 0.7751, "step": 919 }, { "epoch": 1.1260709914320686, "grad_norm": 0.6272981133245432, "learning_rate": 4.930793830729877e-06, "loss": 0.9235, "step": 920 }, { "epoch": 1.127294981640147, "grad_norm": 0.7740683320030309, "learning_rate": 4.93064400066933e-06, "loss": 0.6552, "step": 921 }, { "epoch": 1.1285189718482251, "grad_norm": 1.5976515114142773, "learning_rate": 4.9304940108758496e-06, "loss": 0.542, "step": 922 }, { "epoch": 1.1297429620563035, "grad_norm": 1.3665461061409951, "learning_rate": 4.9303438613592925e-06, "loss": 0.4761, "step": 923 }, { "epoch": 1.130966952264382, "grad_norm": 1.032797170774786, "learning_rate": 4.930193552129527e-06, "loss": 0.631, "step": 924 }, { "epoch": 1.13219094247246, "grad_norm": 0.5868325194223467, "learning_rate": 4.9300430831964286e-06, "loss": 0.7676, "step": 925 }, { "epoch": 1.1334149326805385, "grad_norm": 1.4125796974973803, "learning_rate": 4.929892454569888e-06, "loss": 0.8429, "step": 926 }, { "epoch": 1.134638922888617, "grad_norm": 1.0415909203562288, "learning_rate": 4.9297416662598025e-06, "loss": 0.6605, "step": 927 }, { "epoch": 1.1358629130966953, "grad_norm": 0.6958759036489013, "learning_rate": 4.929590718276082e-06, "loss": 0.4051, "step": 928 }, { "epoch": 1.1370869033047735, "grad_norm": 0.8563740613252238, "learning_rate": 4.929439610628645e-06, "loss": 1.0091, "step": 929 }, { "epoch": 1.1383108935128519, "grad_norm": 0.5959915597772225, "learning_rate": 4.929288343327423e-06, "loss": 0.6246, "step": 930 }, { "epoch": 1.1395348837209303, "grad_norm": 0.4289902223630486, "learning_rate": 4.929136916382356e-06, "loss": 0.5524, "step": 931 }, { "epoch": 1.1407588739290087, "grad_norm": 0.6468380010275062, "learning_rate": 4.928985329803397e-06, "loss": 0.7002, "step": 932 }, { "epoch": 1.1419828641370868, "grad_norm": 0.5693465792602754, "learning_rate": 4.928833583600505e-06, "loss": 0.5987, "step": 933 }, { "epoch": 1.1432068543451652, "grad_norm": 0.7824804311255348, "learning_rate": 4.928681677783654e-06, "loss": 0.6367, "step": 934 }, { "epoch": 1.1444308445532436, "grad_norm": 0.8154337392216127, "learning_rate": 4.928529612362827e-06, "loss": 0.6176, "step": 935 }, { "epoch": 1.1456548347613218, "grad_norm": 0.5610969221997649, "learning_rate": 4.928377387348016e-06, "loss": 0.5611, "step": 936 }, { "epoch": 1.1468788249694002, "grad_norm": 0.7099537279375294, "learning_rate": 4.928225002749224e-06, "loss": 0.8004, "step": 937 }, { "epoch": 1.1481028151774786, "grad_norm": 0.7894720183270739, "learning_rate": 4.928072458576467e-06, "loss": 0.565, "step": 938 }, { "epoch": 1.149326805385557, "grad_norm": 0.5316884827058088, "learning_rate": 4.92791975483977e-06, "loss": 0.5073, "step": 939 }, { "epoch": 1.1505507955936352, "grad_norm": 0.8910257373498645, "learning_rate": 4.927766891549165e-06, "loss": 0.4202, "step": 940 }, { "epoch": 1.1517747858017136, "grad_norm": 0.7859968254384013, "learning_rate": 4.9276138687147e-06, "loss": 0.4868, "step": 941 }, { "epoch": 1.152998776009792, "grad_norm": 0.9733159649971784, "learning_rate": 4.927460686346431e-06, "loss": 1.1687, "step": 942 }, { "epoch": 1.1542227662178703, "grad_norm": 0.5069621260234324, "learning_rate": 4.927307344454424e-06, "loss": 0.7368, "step": 943 }, { "epoch": 1.1554467564259485, "grad_norm": 0.9543918791709688, "learning_rate": 4.9271538430487565e-06, "loss": 1.5965, "step": 944 }, { "epoch": 1.156670746634027, "grad_norm": 0.6038739711408615, "learning_rate": 4.927000182139516e-06, "loss": 0.578, "step": 945 }, { "epoch": 1.1578947368421053, "grad_norm": 0.7634198474197418, "learning_rate": 4.9268463617368e-06, "loss": 0.7874, "step": 946 }, { "epoch": 1.1591187270501835, "grad_norm": 0.6256737297145649, "learning_rate": 4.926692381850718e-06, "loss": 1.1065, "step": 947 }, { "epoch": 1.1603427172582619, "grad_norm": 0.8339957897002079, "learning_rate": 4.926538242491388e-06, "loss": 1.2655, "step": 948 }, { "epoch": 1.1615667074663403, "grad_norm": 0.6127153345448116, "learning_rate": 4.92638394366894e-06, "loss": 0.6249, "step": 949 }, { "epoch": 1.1627906976744187, "grad_norm": 1.2542859836633777, "learning_rate": 4.926229485393513e-06, "loss": 0.5003, "step": 950 }, { "epoch": 1.1640146878824968, "grad_norm": 0.5927348994236576, "learning_rate": 4.9260748676752595e-06, "loss": 0.7237, "step": 951 }, { "epoch": 1.1652386780905752, "grad_norm": 1.2866449929619959, "learning_rate": 4.925920090524338e-06, "loss": 0.7623, "step": 952 }, { "epoch": 1.1664626682986536, "grad_norm": 0.6551881947629175, "learning_rate": 4.925765153950922e-06, "loss": 0.7113, "step": 953 }, { "epoch": 1.167686658506732, "grad_norm": 1.0664162593744582, "learning_rate": 4.925610057965193e-06, "loss": 0.5734, "step": 954 }, { "epoch": 1.1689106487148102, "grad_norm": 0.8377440063233088, "learning_rate": 4.925454802577341e-06, "loss": 0.6356, "step": 955 }, { "epoch": 1.1701346389228886, "grad_norm": 0.958904077070117, "learning_rate": 4.925299387797571e-06, "loss": 1.5659, "step": 956 }, { "epoch": 1.171358629130967, "grad_norm": 0.720541162087234, "learning_rate": 4.9251438136360956e-06, "loss": 0.627, "step": 957 }, { "epoch": 1.1725826193390452, "grad_norm": 0.8027211485843715, "learning_rate": 4.924988080103139e-06, "loss": 0.5878, "step": 958 }, { "epoch": 1.1738066095471236, "grad_norm": 0.680947294238266, "learning_rate": 4.924832187208936e-06, "loss": 0.8529, "step": 959 }, { "epoch": 1.175030599755202, "grad_norm": 0.7789477347455837, "learning_rate": 4.924676134963729e-06, "loss": 0.6991, "step": 960 }, { "epoch": 1.1762545899632804, "grad_norm": 0.5647351597644509, "learning_rate": 4.924519923377775e-06, "loss": 0.6869, "step": 961 }, { "epoch": 1.1774785801713585, "grad_norm": 0.8041850804465166, "learning_rate": 4.9243635524613405e-06, "loss": 0.5852, "step": 962 }, { "epoch": 1.178702570379437, "grad_norm": 0.7761495342909841, "learning_rate": 4.9242070222247e-06, "loss": 0.6551, "step": 963 }, { "epoch": 1.1799265605875153, "grad_norm": 0.5208367555983227, "learning_rate": 4.9240503326781395e-06, "loss": 0.7174, "step": 964 }, { "epoch": 1.1811505507955937, "grad_norm": 0.897098027578854, "learning_rate": 4.923893483831957e-06, "loss": 0.7096, "step": 965 }, { "epoch": 1.182374541003672, "grad_norm": 0.9420163277029596, "learning_rate": 4.923736475696462e-06, "loss": 0.6526, "step": 966 }, { "epoch": 1.1835985312117503, "grad_norm": 1.138976690639025, "learning_rate": 4.923579308281969e-06, "loss": 1.0771, "step": 967 }, { "epoch": 1.1848225214198287, "grad_norm": 0.8356549551399949, "learning_rate": 4.923421981598808e-06, "loss": 0.804, "step": 968 }, { "epoch": 1.1860465116279069, "grad_norm": 0.7107893489698167, "learning_rate": 4.923264495657319e-06, "loss": 0.7593, "step": 969 }, { "epoch": 1.1872705018359853, "grad_norm": 0.9764157104302571, "learning_rate": 4.923106850467851e-06, "loss": 0.5815, "step": 970 }, { "epoch": 1.1884944920440637, "grad_norm": 0.7543816850733417, "learning_rate": 4.922949046040761e-06, "loss": 1.0227, "step": 971 }, { "epoch": 1.189718482252142, "grad_norm": 0.5591102336542225, "learning_rate": 4.9227910823864225e-06, "loss": 0.8137, "step": 972 }, { "epoch": 1.1909424724602202, "grad_norm": 0.7500602509810964, "learning_rate": 4.922632959515215e-06, "loss": 1.0798, "step": 973 }, { "epoch": 1.1921664626682986, "grad_norm": 0.9627577952203146, "learning_rate": 4.922474677437532e-06, "loss": 0.5036, "step": 974 }, { "epoch": 1.193390452876377, "grad_norm": 0.973304754817475, "learning_rate": 4.922316236163771e-06, "loss": 0.7579, "step": 975 }, { "epoch": 1.1946144430844554, "grad_norm": 0.8126311162827432, "learning_rate": 4.9221576357043475e-06, "loss": 1.2265, "step": 976 }, { "epoch": 1.1958384332925336, "grad_norm": 1.2418598819945843, "learning_rate": 4.921998876069684e-06, "loss": 0.5119, "step": 977 }, { "epoch": 1.197062423500612, "grad_norm": 0.7086188588681058, "learning_rate": 4.921839957270211e-06, "loss": 0.6529, "step": 978 }, { "epoch": 1.1982864137086904, "grad_norm": 0.723585265704103, "learning_rate": 4.921680879316375e-06, "loss": 0.9056, "step": 979 }, { "epoch": 1.1995104039167686, "grad_norm": 0.7950226442863032, "learning_rate": 4.921521642218628e-06, "loss": 1.3962, "step": 980 }, { "epoch": 1.200734394124847, "grad_norm": 0.8648744807066634, "learning_rate": 4.921362245987436e-06, "loss": 0.8437, "step": 981 }, { "epoch": 1.2019583843329253, "grad_norm": 0.6128650184941334, "learning_rate": 4.921202690633272e-06, "loss": 1.0406, "step": 982 }, { "epoch": 1.2031823745410037, "grad_norm": 1.1443531703227012, "learning_rate": 4.921042976166623e-06, "loss": 0.9109, "step": 983 }, { "epoch": 1.204406364749082, "grad_norm": 0.8344225431890311, "learning_rate": 4.9208831025979854e-06, "loss": 0.6978, "step": 984 }, { "epoch": 1.2056303549571603, "grad_norm": 1.1195495602100733, "learning_rate": 4.9207230699378636e-06, "loss": 0.7325, "step": 985 }, { "epoch": 1.2068543451652387, "grad_norm": 0.39588294193338325, "learning_rate": 4.920562878196776e-06, "loss": 0.3968, "step": 986 }, { "epoch": 1.208078335373317, "grad_norm": 0.9760070512472677, "learning_rate": 4.920402527385248e-06, "loss": 0.5608, "step": 987 }, { "epoch": 1.2093023255813953, "grad_norm": 1.2293153634384493, "learning_rate": 4.92024201751382e-06, "loss": 0.6443, "step": 988 }, { "epoch": 1.2105263157894737, "grad_norm": 0.6030495268527476, "learning_rate": 4.920081348593038e-06, "loss": 0.55, "step": 989 }, { "epoch": 1.211750305997552, "grad_norm": 0.7126068612496806, "learning_rate": 4.9199205206334595e-06, "loss": 1.3686, "step": 990 }, { "epoch": 1.2129742962056302, "grad_norm": 0.7740471075086655, "learning_rate": 4.919759533645656e-06, "loss": 0.8094, "step": 991 }, { "epoch": 1.2141982864137086, "grad_norm": 0.515580783456274, "learning_rate": 4.919598387640207e-06, "loss": 0.6673, "step": 992 }, { "epoch": 1.215422276621787, "grad_norm": 0.780976307398921, "learning_rate": 4.919437082627701e-06, "loss": 0.641, "step": 993 }, { "epoch": 1.2166462668298654, "grad_norm": 0.9544816299190854, "learning_rate": 4.919275618618737e-06, "loss": 1.4979, "step": 994 }, { "epoch": 1.2178702570379436, "grad_norm": 0.5106496870118997, "learning_rate": 4.919113995623929e-06, "loss": 0.6591, "step": 995 }, { "epoch": 1.219094247246022, "grad_norm": 0.6762304323937639, "learning_rate": 4.918952213653897e-06, "loss": 0.6253, "step": 996 }, { "epoch": 1.2203182374541004, "grad_norm": 0.6838575604461612, "learning_rate": 4.918790272719273e-06, "loss": 1.1172, "step": 997 }, { "epoch": 1.2215422276621788, "grad_norm": 0.8246161308497401, "learning_rate": 4.918628172830698e-06, "loss": 0.7533, "step": 998 }, { "epoch": 1.222766217870257, "grad_norm": 1.0863318043310828, "learning_rate": 4.918465913998825e-06, "loss": 0.4049, "step": 999 }, { "epoch": 1.2239902080783354, "grad_norm": 0.6096340900185448, "learning_rate": 4.918303496234318e-06, "loss": 0.8322, "step": 1000 }, { "epoch": 1.2252141982864138, "grad_norm": 0.8645840083019114, "learning_rate": 4.91814091954785e-06, "loss": 0.4845, "step": 1001 }, { "epoch": 1.226438188494492, "grad_norm": 0.8102115911830429, "learning_rate": 4.917978183950105e-06, "loss": 0.6439, "step": 1002 }, { "epoch": 1.2276621787025703, "grad_norm": 0.7512809015329637, "learning_rate": 4.917815289451777e-06, "loss": 1.3333, "step": 1003 }, { "epoch": 1.2288861689106487, "grad_norm": 0.6431051421878352, "learning_rate": 4.917652236063572e-06, "loss": 0.7275, "step": 1004 }, { "epoch": 1.2301101591187271, "grad_norm": 0.6448254471673616, "learning_rate": 4.917489023796202e-06, "loss": 0.8449, "step": 1005 }, { "epoch": 1.2313341493268053, "grad_norm": 1.330971742119266, "learning_rate": 4.9173256526603975e-06, "loss": 0.7213, "step": 1006 }, { "epoch": 1.2325581395348837, "grad_norm": 0.8432932452798065, "learning_rate": 4.917162122666891e-06, "loss": 0.6551, "step": 1007 }, { "epoch": 1.233782129742962, "grad_norm": 0.9338181929205969, "learning_rate": 4.91699843382643e-06, "loss": 0.594, "step": 1008 }, { "epoch": 1.2350061199510405, "grad_norm": 0.47003880427296724, "learning_rate": 4.916834586149772e-06, "loss": 0.5829, "step": 1009 }, { "epoch": 1.2362301101591187, "grad_norm": 1.1175818271109208, "learning_rate": 4.9166705796476846e-06, "loss": 0.8135, "step": 1010 }, { "epoch": 1.237454100367197, "grad_norm": 0.5461684548674339, "learning_rate": 4.916506414330945e-06, "loss": 0.8966, "step": 1011 }, { "epoch": 1.2386780905752754, "grad_norm": 0.8189207716880389, "learning_rate": 4.916342090210342e-06, "loss": 1.0068, "step": 1012 }, { "epoch": 1.2399020807833536, "grad_norm": 1.019437584202964, "learning_rate": 4.916177607296675e-06, "loss": 0.5515, "step": 1013 }, { "epoch": 1.241126070991432, "grad_norm": 0.929667499792301, "learning_rate": 4.916012965600752e-06, "loss": 0.5994, "step": 1014 }, { "epoch": 1.2423500611995104, "grad_norm": 0.5304088553117589, "learning_rate": 4.915848165133392e-06, "loss": 0.6411, "step": 1015 }, { "epoch": 1.2435740514075888, "grad_norm": 0.5282803546863084, "learning_rate": 4.915683205905427e-06, "loss": 0.7026, "step": 1016 }, { "epoch": 1.244798041615667, "grad_norm": 0.7887128091764675, "learning_rate": 4.9155180879276974e-06, "loss": 0.83, "step": 1017 }, { "epoch": 1.2460220318237454, "grad_norm": 0.8038974396610025, "learning_rate": 4.915352811211053e-06, "loss": 0.9488, "step": 1018 }, { "epoch": 1.2472460220318238, "grad_norm": 1.2872357738168838, "learning_rate": 4.915187375766355e-06, "loss": 0.5598, "step": 1019 }, { "epoch": 1.2484700122399022, "grad_norm": 0.7219431286340979, "learning_rate": 4.915021781604476e-06, "loss": 0.2874, "step": 1020 }, { "epoch": 1.2496940024479803, "grad_norm": 0.6710068681026446, "learning_rate": 4.914856028736298e-06, "loss": 0.5839, "step": 1021 }, { "epoch": 1.2509179926560587, "grad_norm": 1.2432570458466436, "learning_rate": 4.9146901171727144e-06, "loss": 0.5486, "step": 1022 }, { "epoch": 1.2521419828641371, "grad_norm": 0.8151537942527669, "learning_rate": 4.914524046924627e-06, "loss": 0.711, "step": 1023 }, { "epoch": 1.2533659730722153, "grad_norm": 0.9504461739681449, "learning_rate": 4.91435781800295e-06, "loss": 1.2027, "step": 1024 }, { "epoch": 1.2545899632802937, "grad_norm": 0.90578730800974, "learning_rate": 4.914191430418608e-06, "loss": 0.6249, "step": 1025 }, { "epoch": 1.255813953488372, "grad_norm": 1.4054020932479068, "learning_rate": 4.914024884182535e-06, "loss": 0.8146, "step": 1026 }, { "epoch": 1.2570379436964505, "grad_norm": 0.6786586298943523, "learning_rate": 4.9138581793056735e-06, "loss": 0.7505, "step": 1027 }, { "epoch": 1.258261933904529, "grad_norm": 0.761767346758803, "learning_rate": 4.9136913157989825e-06, "loss": 1.2721, "step": 1028 }, { "epoch": 1.259485924112607, "grad_norm": 1.1651046257041635, "learning_rate": 4.9135242936734255e-06, "loss": 0.9971, "step": 1029 }, { "epoch": 1.2607099143206855, "grad_norm": 1.1112997733405057, "learning_rate": 4.913357112939978e-06, "loss": 0.5412, "step": 1030 }, { "epoch": 1.2619339045287639, "grad_norm": 1.077518229420999, "learning_rate": 4.913189773609628e-06, "loss": 0.6806, "step": 1031 }, { "epoch": 1.263157894736842, "grad_norm": 0.7799422346648465, "learning_rate": 4.913022275693372e-06, "loss": 1.6532, "step": 1032 }, { "epoch": 1.2643818849449204, "grad_norm": 1.2686396516334226, "learning_rate": 4.912854619202218e-06, "loss": 0.5767, "step": 1033 }, { "epoch": 1.2656058751529988, "grad_norm": 0.9762570273001033, "learning_rate": 4.912686804147181e-06, "loss": 0.4087, "step": 1034 }, { "epoch": 1.266829865361077, "grad_norm": 0.6009351904915886, "learning_rate": 4.912518830539292e-06, "loss": 0.7852, "step": 1035 }, { "epoch": 1.2680538555691554, "grad_norm": 0.5706649725043963, "learning_rate": 4.91235069838959e-06, "loss": 0.6202, "step": 1036 }, { "epoch": 1.2692778457772338, "grad_norm": 0.48467864123474236, "learning_rate": 4.91218240770912e-06, "loss": 0.5806, "step": 1037 }, { "epoch": 1.2705018359853122, "grad_norm": 0.6223687908401416, "learning_rate": 4.912013958508947e-06, "loss": 0.6051, "step": 1038 }, { "epoch": 1.2717258261933906, "grad_norm": 1.5454483233117453, "learning_rate": 4.9118453508001375e-06, "loss": 0.5579, "step": 1039 }, { "epoch": 1.2729498164014688, "grad_norm": 0.5855177011950922, "learning_rate": 4.9116765845937715e-06, "loss": 0.575, "step": 1040 }, { "epoch": 1.2741738066095472, "grad_norm": 0.5714269294840185, "learning_rate": 4.91150765990094e-06, "loss": 0.6914, "step": 1041 }, { "epoch": 1.2753977968176256, "grad_norm": 0.8462659370753556, "learning_rate": 4.911338576732746e-06, "loss": 0.8168, "step": 1042 }, { "epoch": 1.2766217870257037, "grad_norm": 1.1846517633620257, "learning_rate": 4.9111693351003e-06, "loss": 0.4928, "step": 1043 }, { "epoch": 1.2778457772337821, "grad_norm": 0.6396779225132486, "learning_rate": 4.910999935014722e-06, "loss": 0.6902, "step": 1044 }, { "epoch": 1.2790697674418605, "grad_norm": 1.1272876104119254, "learning_rate": 4.910830376487147e-06, "loss": 0.6584, "step": 1045 }, { "epoch": 1.2802937576499387, "grad_norm": 0.94587504450619, "learning_rate": 4.910660659528716e-06, "loss": 1.4931, "step": 1046 }, { "epoch": 1.281517747858017, "grad_norm": 0.8434396433757076, "learning_rate": 4.910490784150583e-06, "loss": 0.7663, "step": 1047 }, { "epoch": 1.2827417380660955, "grad_norm": 0.7249595657518458, "learning_rate": 4.910320750363912e-06, "loss": 0.6905, "step": 1048 }, { "epoch": 1.2839657282741739, "grad_norm": 0.39607086271841424, "learning_rate": 4.910150558179877e-06, "loss": 0.2548, "step": 1049 }, { "epoch": 1.2851897184822523, "grad_norm": 0.6255905752065372, "learning_rate": 4.909980207609661e-06, "loss": 0.6511, "step": 1050 }, { "epoch": 1.2864137086903304, "grad_norm": 1.2164878690095329, "learning_rate": 4.9098096986644595e-06, "loss": 1.0885, "step": 1051 }, { "epoch": 1.2876376988984088, "grad_norm": 0.8338084387779549, "learning_rate": 4.909639031355478e-06, "loss": 1.0243, "step": 1052 }, { "epoch": 1.2888616891064872, "grad_norm": 0.9307022124869818, "learning_rate": 4.909468205693932e-06, "loss": 1.4632, "step": 1053 }, { "epoch": 1.2900856793145654, "grad_norm": 0.5907288912057423, "learning_rate": 4.909297221691047e-06, "loss": 0.6851, "step": 1054 }, { "epoch": 1.2913096695226438, "grad_norm": 0.7859393991764065, "learning_rate": 4.909126079358061e-06, "loss": 1.659, "step": 1055 }, { "epoch": 1.2925336597307222, "grad_norm": 0.6946272145980266, "learning_rate": 4.90895477870622e-06, "loss": 0.8927, "step": 1056 }, { "epoch": 1.2937576499388004, "grad_norm": 0.8647521894953364, "learning_rate": 4.90878331974678e-06, "loss": 0.5832, "step": 1057 }, { "epoch": 1.2949816401468788, "grad_norm": 1.3439474574306063, "learning_rate": 4.90861170249101e-06, "loss": 0.9269, "step": 1058 }, { "epoch": 1.2962056303549572, "grad_norm": 0.7759666294897798, "learning_rate": 4.908439926950188e-06, "loss": 0.501, "step": 1059 }, { "epoch": 1.2974296205630356, "grad_norm": 0.7457974282048464, "learning_rate": 4.908267993135602e-06, "loss": 0.6254, "step": 1060 }, { "epoch": 1.298653610771114, "grad_norm": 1.2092145958070974, "learning_rate": 4.90809590105855e-06, "loss": 0.5816, "step": 1061 }, { "epoch": 1.2998776009791921, "grad_norm": 0.5944068061053872, "learning_rate": 4.907923650730344e-06, "loss": 0.7543, "step": 1062 }, { "epoch": 1.3011015911872705, "grad_norm": 1.157101291382137, "learning_rate": 4.907751242162302e-06, "loss": 1.3014, "step": 1063 }, { "epoch": 1.302325581395349, "grad_norm": 1.1164781148840996, "learning_rate": 4.907578675365753e-06, "loss": 0.7546, "step": 1064 }, { "epoch": 1.303549571603427, "grad_norm": 0.5310689856584393, "learning_rate": 4.907405950352039e-06, "loss": 0.568, "step": 1065 }, { "epoch": 1.3047735618115055, "grad_norm": 1.1875873221422717, "learning_rate": 4.90723306713251e-06, "loss": 0.3372, "step": 1066 }, { "epoch": 1.305997552019584, "grad_norm": 1.0307019916100078, "learning_rate": 4.907060025718528e-06, "loss": 0.7595, "step": 1067 }, { "epoch": 1.307221542227662, "grad_norm": 0.6578066026318019, "learning_rate": 4.906886826121464e-06, "loss": 0.6299, "step": 1068 }, { "epoch": 1.3084455324357405, "grad_norm": 0.6371688410083587, "learning_rate": 4.9067134683527e-06, "loss": 0.6252, "step": 1069 }, { "epoch": 1.3096695226438189, "grad_norm": 0.7943568486224012, "learning_rate": 4.906539952423629e-06, "loss": 0.5642, "step": 1070 }, { "epoch": 1.3108935128518973, "grad_norm": 1.085381880539723, "learning_rate": 4.906366278345653e-06, "loss": 0.6709, "step": 1071 }, { "epoch": 1.3121175030599757, "grad_norm": 0.9280479231043428, "learning_rate": 4.906192446130187e-06, "loss": 1.1794, "step": 1072 }, { "epoch": 1.3133414932680538, "grad_norm": 0.7631110346505048, "learning_rate": 4.906018455788652e-06, "loss": 0.606, "step": 1073 }, { "epoch": 1.3145654834761322, "grad_norm": 0.7274024351807775, "learning_rate": 4.905844307332484e-06, "loss": 0.5015, "step": 1074 }, { "epoch": 1.3157894736842106, "grad_norm": 0.7971073514761351, "learning_rate": 4.905670000773126e-06, "loss": 0.5541, "step": 1075 }, { "epoch": 1.3170134638922888, "grad_norm": 0.5524365760657188, "learning_rate": 4.905495536122034e-06, "loss": 0.6665, "step": 1076 }, { "epoch": 1.3182374541003672, "grad_norm": 0.9903957545500455, "learning_rate": 4.905320913390673e-06, "loss": 0.6868, "step": 1077 }, { "epoch": 1.3194614443084456, "grad_norm": 1.4867549530108757, "learning_rate": 4.905146132590518e-06, "loss": 0.5044, "step": 1078 }, { "epoch": 1.3206854345165238, "grad_norm": 0.8079198359312023, "learning_rate": 4.904971193733055e-06, "loss": 1.1417, "step": 1079 }, { "epoch": 1.3219094247246022, "grad_norm": 0.7705632401976016, "learning_rate": 4.904796096829781e-06, "loss": 0.9595, "step": 1080 }, { "epoch": 1.3231334149326806, "grad_norm": 0.7735109978261849, "learning_rate": 4.904620841892201e-06, "loss": 0.6258, "step": 1081 }, { "epoch": 1.324357405140759, "grad_norm": 0.8609514439504001, "learning_rate": 4.9044454289318345e-06, "loss": 1.4805, "step": 1082 }, { "epoch": 1.3255813953488373, "grad_norm": 0.8790860607024982, "learning_rate": 4.904269857960208e-06, "loss": 0.6894, "step": 1083 }, { "epoch": 1.3268053855569155, "grad_norm": 1.1010200781840789, "learning_rate": 4.9040941289888585e-06, "loss": 0.5819, "step": 1084 }, { "epoch": 1.328029375764994, "grad_norm": 1.01638728777305, "learning_rate": 4.903918242029335e-06, "loss": 0.8175, "step": 1085 }, { "epoch": 1.3292533659730723, "grad_norm": 0.5946861261723128, "learning_rate": 4.903742197093197e-06, "loss": 0.6346, "step": 1086 }, { "epoch": 1.3304773561811505, "grad_norm": 0.9277566765201711, "learning_rate": 4.903565994192012e-06, "loss": 0.5345, "step": 1087 }, { "epoch": 1.3317013463892289, "grad_norm": 0.8782016445114257, "learning_rate": 4.90338963333736e-06, "loss": 1.2902, "step": 1088 }, { "epoch": 1.3329253365973073, "grad_norm": 0.6864683930465169, "learning_rate": 4.903213114540831e-06, "loss": 0.8311, "step": 1089 }, { "epoch": 1.3341493268053854, "grad_norm": 0.8963701147015143, "learning_rate": 4.903036437814025e-06, "loss": 1.285, "step": 1090 }, { "epoch": 1.3353733170134638, "grad_norm": 1.2402983899185305, "learning_rate": 4.902859603168552e-06, "loss": 1.0214, "step": 1091 }, { "epoch": 1.3365973072215422, "grad_norm": 0.9044587068239511, "learning_rate": 4.902682610616034e-06, "loss": 0.5171, "step": 1092 }, { "epoch": 1.3378212974296206, "grad_norm": 0.6492804779132865, "learning_rate": 4.902505460168101e-06, "loss": 0.7111, "step": 1093 }, { "epoch": 1.339045287637699, "grad_norm": 0.931608547087862, "learning_rate": 4.9023281518363965e-06, "loss": 1.2952, "step": 1094 }, { "epoch": 1.3402692778457772, "grad_norm": 1.4355971234240854, "learning_rate": 4.902150685632571e-06, "loss": 1.1625, "step": 1095 }, { "epoch": 1.3414932680538556, "grad_norm": 0.6497840413474866, "learning_rate": 4.901973061568287e-06, "loss": 0.447, "step": 1096 }, { "epoch": 1.342717258261934, "grad_norm": 0.8431309503813132, "learning_rate": 4.901795279655218e-06, "loss": 1.7044, "step": 1097 }, { "epoch": 1.3439412484700122, "grad_norm": 0.5682078592186636, "learning_rate": 4.901617339905047e-06, "loss": 0.5916, "step": 1098 }, { "epoch": 1.3451652386780906, "grad_norm": 0.8486957061312206, "learning_rate": 4.901439242329467e-06, "loss": 1.6226, "step": 1099 }, { "epoch": 1.346389228886169, "grad_norm": 0.9450196687163396, "learning_rate": 4.9012609869401826e-06, "loss": 0.6011, "step": 1100 }, { "epoch": 1.3476132190942471, "grad_norm": 0.792601072258462, "learning_rate": 4.901082573748907e-06, "loss": 0.7509, "step": 1101 }, { "epoch": 1.3488372093023255, "grad_norm": 0.7975393702893371, "learning_rate": 4.900904002767367e-06, "loss": 0.5591, "step": 1102 }, { "epoch": 1.350061199510404, "grad_norm": 0.622536101146563, "learning_rate": 4.900725274007296e-06, "loss": 0.6398, "step": 1103 }, { "epoch": 1.3512851897184823, "grad_norm": 1.027752781787025, "learning_rate": 4.900546387480439e-06, "loss": 2.3774, "step": 1104 }, { "epoch": 1.3525091799265607, "grad_norm": 0.9324896453336858, "learning_rate": 4.9003673431985525e-06, "loss": 0.5183, "step": 1105 }, { "epoch": 1.353733170134639, "grad_norm": 1.146571080251268, "learning_rate": 4.900188141173403e-06, "loss": 0.3474, "step": 1106 }, { "epoch": 1.3549571603427173, "grad_norm": 1.3718909876753118, "learning_rate": 4.9000087814167665e-06, "loss": 0.4625, "step": 1107 }, { "epoch": 1.3561811505507957, "grad_norm": 0.868832683744956, "learning_rate": 4.899829263940429e-06, "loss": 0.6029, "step": 1108 }, { "epoch": 1.3574051407588739, "grad_norm": 0.6763350045508673, "learning_rate": 4.89964958875619e-06, "loss": 0.7569, "step": 1109 }, { "epoch": 1.3586291309669523, "grad_norm": 1.278962063921407, "learning_rate": 4.899469755875855e-06, "loss": 0.6489, "step": 1110 }, { "epoch": 1.3598531211750307, "grad_norm": 1.0905033742375256, "learning_rate": 4.899289765311243e-06, "loss": 0.4515, "step": 1111 }, { "epoch": 1.3610771113831088, "grad_norm": 1.4747607544809413, "learning_rate": 4.899109617074181e-06, "loss": 1.1834, "step": 1112 }, { "epoch": 1.3623011015911872, "grad_norm": 0.9530163198519706, "learning_rate": 4.89892931117651e-06, "loss": 1.0856, "step": 1113 }, { "epoch": 1.3635250917992656, "grad_norm": 0.6995087334805631, "learning_rate": 4.8987488476300765e-06, "loss": 0.4748, "step": 1114 }, { "epoch": 1.364749082007344, "grad_norm": 0.8860788685367288, "learning_rate": 4.898568226446742e-06, "loss": 1.6588, "step": 1115 }, { "epoch": 1.3659730722154224, "grad_norm": 0.7267940445380268, "learning_rate": 4.898387447638374e-06, "loss": 0.5623, "step": 1116 }, { "epoch": 1.3671970624235006, "grad_norm": 0.7041345906784646, "learning_rate": 4.898206511216855e-06, "loss": 0.8569, "step": 1117 }, { "epoch": 1.368421052631579, "grad_norm": 0.681304298029581, "learning_rate": 4.898025417194075e-06, "loss": 0.7717, "step": 1118 }, { "epoch": 1.3696450428396574, "grad_norm": 0.6131686214160815, "learning_rate": 4.897844165581934e-06, "loss": 0.441, "step": 1119 }, { "epoch": 1.3708690330477356, "grad_norm": 1.2188223275005334, "learning_rate": 4.897662756392343e-06, "loss": 1.0677, "step": 1120 }, { "epoch": 1.372093023255814, "grad_norm": 1.0282830482489482, "learning_rate": 4.897481189637224e-06, "loss": 0.552, "step": 1121 }, { "epoch": 1.3733170134638923, "grad_norm": 0.7282006820265837, "learning_rate": 4.897299465328509e-06, "loss": 0.5896, "step": 1122 }, { "epoch": 1.3745410036719705, "grad_norm": 0.8610131147096707, "learning_rate": 4.89711758347814e-06, "loss": 0.5657, "step": 1123 }, { "epoch": 1.375764993880049, "grad_norm": 0.7278065809704583, "learning_rate": 4.896935544098069e-06, "loss": 0.5613, "step": 1124 }, { "epoch": 1.3769889840881273, "grad_norm": 1.0723322353464118, "learning_rate": 4.896753347200261e-06, "loss": 0.6708, "step": 1125 }, { "epoch": 1.3782129742962057, "grad_norm": 1.0095410744038804, "learning_rate": 4.8965709927966875e-06, "loss": 0.7943, "step": 1126 }, { "epoch": 1.379436964504284, "grad_norm": 0.9330199231654827, "learning_rate": 4.896388480899333e-06, "loss": 1.3037, "step": 1127 }, { "epoch": 1.3806609547123623, "grad_norm": 1.242765047001813, "learning_rate": 4.896205811520191e-06, "loss": 0.5315, "step": 1128 }, { "epoch": 1.3818849449204407, "grad_norm": 0.7328236421804898, "learning_rate": 4.896022984671266e-06, "loss": 0.5529, "step": 1129 }, { "epoch": 1.383108935128519, "grad_norm": 1.4924446837060636, "learning_rate": 4.895840000364572e-06, "loss": 0.6041, "step": 1130 }, { "epoch": 1.3843329253365972, "grad_norm": 0.5750379305200451, "learning_rate": 4.895656858612136e-06, "loss": 0.6575, "step": 1131 }, { "epoch": 1.3855569155446756, "grad_norm": 1.0475845842601774, "learning_rate": 4.895473559425991e-06, "loss": 0.4975, "step": 1132 }, { "epoch": 1.386780905752754, "grad_norm": 0.5311708201710875, "learning_rate": 4.895290102818184e-06, "loss": 0.5973, "step": 1133 }, { "epoch": 1.3880048959608322, "grad_norm": 0.47767838262645773, "learning_rate": 4.895106488800772e-06, "loss": 0.3936, "step": 1134 }, { "epoch": 1.3892288861689106, "grad_norm": 0.7361757544350555, "learning_rate": 4.89492271738582e-06, "loss": 1.0734, "step": 1135 }, { "epoch": 1.390452876376989, "grad_norm": 0.8097075253269312, "learning_rate": 4.894738788585404e-06, "loss": 1.5948, "step": 1136 }, { "epoch": 1.3916768665850674, "grad_norm": 0.5547630807214372, "learning_rate": 4.894554702411615e-06, "loss": 0.6451, "step": 1137 }, { "epoch": 1.3929008567931458, "grad_norm": 0.785935352703157, "learning_rate": 4.894370458876546e-06, "loss": 0.5716, "step": 1138 }, { "epoch": 1.394124847001224, "grad_norm": 1.6645337053339961, "learning_rate": 4.894186057992307e-06, "loss": 0.8786, "step": 1139 }, { "epoch": 1.3953488372093024, "grad_norm": 1.0299728094290033, "learning_rate": 4.894001499771015e-06, "loss": 0.5646, "step": 1140 }, { "epoch": 1.3965728274173808, "grad_norm": 0.6884106902635764, "learning_rate": 4.8938167842248e-06, "loss": 1.1515, "step": 1141 }, { "epoch": 1.397796817625459, "grad_norm": 0.5440666764789626, "learning_rate": 4.893631911365801e-06, "loss": 0.6304, "step": 1142 }, { "epoch": 1.3990208078335373, "grad_norm": 1.0067017257150366, "learning_rate": 4.893446881206164e-06, "loss": 0.625, "step": 1143 }, { "epoch": 1.4002447980416157, "grad_norm": 0.7455584968586297, "learning_rate": 4.893261693758052e-06, "loss": 0.6521, "step": 1144 }, { "epoch": 1.401468788249694, "grad_norm": 1.090075140968715, "learning_rate": 4.8930763490336334e-06, "loss": 0.4371, "step": 1145 }, { "epoch": 1.4026927784577723, "grad_norm": 0.5981417185342028, "learning_rate": 4.892890847045089e-06, "loss": 0.5328, "step": 1146 }, { "epoch": 1.4039167686658507, "grad_norm": 0.5182221339513697, "learning_rate": 4.892705187804608e-06, "loss": 0.6609, "step": 1147 }, { "epoch": 1.405140758873929, "grad_norm": 0.8568359870937143, "learning_rate": 4.892519371324393e-06, "loss": 0.6739, "step": 1148 }, { "epoch": 1.4063647490820075, "grad_norm": 1.0313713154038737, "learning_rate": 4.892333397616653e-06, "loss": 0.7129, "step": 1149 }, { "epoch": 1.4075887392900857, "grad_norm": 1.430588490396873, "learning_rate": 4.892147266693611e-06, "loss": 0.4512, "step": 1150 }, { "epoch": 1.408812729498164, "grad_norm": 0.6693619914780443, "learning_rate": 4.891960978567499e-06, "loss": 0.7809, "step": 1151 }, { "epoch": 1.4100367197062424, "grad_norm": 0.6572112029500279, "learning_rate": 4.891774533250559e-06, "loss": 0.8061, "step": 1152 }, { "epoch": 1.4112607099143206, "grad_norm": 0.6249133135188174, "learning_rate": 4.8915879307550426e-06, "loss": 0.8543, "step": 1153 }, { "epoch": 1.412484700122399, "grad_norm": 1.3646176291362138, "learning_rate": 4.891401171093214e-06, "loss": 0.4781, "step": 1154 }, { "epoch": 1.4137086903304774, "grad_norm": 0.7817140234578278, "learning_rate": 4.891214254277345e-06, "loss": 0.5528, "step": 1155 }, { "epoch": 1.4149326805385556, "grad_norm": 0.8346509314599219, "learning_rate": 4.8910271803197195e-06, "loss": 0.7754, "step": 1156 }, { "epoch": 1.416156670746634, "grad_norm": 0.8610127899815279, "learning_rate": 4.890839949232632e-06, "loss": 1.5457, "step": 1157 }, { "epoch": 1.4173806609547124, "grad_norm": 1.4054426726173574, "learning_rate": 4.890652561028386e-06, "loss": 0.6113, "step": 1158 }, { "epoch": 1.4186046511627908, "grad_norm": 0.8273339290234721, "learning_rate": 4.890465015719297e-06, "loss": 0.3913, "step": 1159 }, { "epoch": 1.4198286413708692, "grad_norm": 0.8687774648520845, "learning_rate": 4.890277313317687e-06, "loss": 0.7198, "step": 1160 }, { "epoch": 1.4210526315789473, "grad_norm": 0.8946357741561024, "learning_rate": 4.890089453835894e-06, "loss": 1.1938, "step": 1161 }, { "epoch": 1.4222766217870257, "grad_norm": 0.8192884566968174, "learning_rate": 4.889901437286263e-06, "loss": 0.6155, "step": 1162 }, { "epoch": 1.4235006119951041, "grad_norm": 1.4366569292063802, "learning_rate": 4.889713263681149e-06, "loss": 0.5785, "step": 1163 }, { "epoch": 1.4247246022031823, "grad_norm": 0.5206450031276049, "learning_rate": 4.8895249330329175e-06, "loss": 0.5796, "step": 1164 }, { "epoch": 1.4259485924112607, "grad_norm": 0.8038562597100775, "learning_rate": 4.889336445353946e-06, "loss": 0.5033, "step": 1165 }, { "epoch": 1.427172582619339, "grad_norm": 1.0745047680459017, "learning_rate": 4.889147800656621e-06, "loss": 1.0874, "step": 1166 }, { "epoch": 1.4283965728274173, "grad_norm": 0.6879359687175218, "learning_rate": 4.88895899895334e-06, "loss": 0.5625, "step": 1167 }, { "epoch": 1.4296205630354957, "grad_norm": 0.528064917488075, "learning_rate": 4.888770040256508e-06, "loss": 0.599, "step": 1168 }, { "epoch": 1.430844553243574, "grad_norm": 0.9789527907994929, "learning_rate": 4.8885809245785454e-06, "loss": 1.1339, "step": 1169 }, { "epoch": 1.4320685434516525, "grad_norm": 0.6678071626329536, "learning_rate": 4.888391651931879e-06, "loss": 0.5274, "step": 1170 }, { "epoch": 1.4332925336597309, "grad_norm": 1.4293396018544062, "learning_rate": 4.888202222328948e-06, "loss": 0.523, "step": 1171 }, { "epoch": 1.434516523867809, "grad_norm": 0.8669687575262846, "learning_rate": 4.888012635782199e-06, "loss": 0.9041, "step": 1172 }, { "epoch": 1.4357405140758874, "grad_norm": 0.8151743014181194, "learning_rate": 4.887822892304094e-06, "loss": 0.521, "step": 1173 }, { "epoch": 1.4369645042839658, "grad_norm": 1.1762451119100759, "learning_rate": 4.887632991907099e-06, "loss": 0.6112, "step": 1174 }, { "epoch": 1.438188494492044, "grad_norm": 0.828996560860172, "learning_rate": 4.887442934603696e-06, "loss": 0.6509, "step": 1175 }, { "epoch": 1.4394124847001224, "grad_norm": 0.8886545296001926, "learning_rate": 4.887252720406373e-06, "loss": 0.5955, "step": 1176 }, { "epoch": 1.4406364749082008, "grad_norm": 0.5845138679853458, "learning_rate": 4.887062349327631e-06, "loss": 0.7097, "step": 1177 }, { "epoch": 1.441860465116279, "grad_norm": 0.6676544280319279, "learning_rate": 4.886871821379981e-06, "loss": 0.6868, "step": 1178 }, { "epoch": 1.4430844553243574, "grad_norm": 1.2585786290411753, "learning_rate": 4.886681136575943e-06, "loss": 0.7097, "step": 1179 }, { "epoch": 1.4443084455324358, "grad_norm": 1.0857247107635473, "learning_rate": 4.886490294928049e-06, "loss": 0.7021, "step": 1180 }, { "epoch": 1.4455324357405142, "grad_norm": 1.1375788518200016, "learning_rate": 4.8862992964488395e-06, "loss": 0.5486, "step": 1181 }, { "epoch": 1.4467564259485923, "grad_norm": 0.5713654211162366, "learning_rate": 4.886108141150866e-06, "loss": 0.5461, "step": 1182 }, { "epoch": 1.4479804161566707, "grad_norm": 1.2211191859404338, "learning_rate": 4.885916829046692e-06, "loss": 0.6173, "step": 1183 }, { "epoch": 1.4492044063647491, "grad_norm": 0.7682521127007785, "learning_rate": 4.885725360148888e-06, "loss": 0.5721, "step": 1184 }, { "epoch": 1.4504283965728275, "grad_norm": 0.8476181582721496, "learning_rate": 4.885533734470039e-06, "loss": 1.0153, "step": 1185 }, { "epoch": 1.4516523867809057, "grad_norm": 1.2083400167422327, "learning_rate": 4.885341952022735e-06, "loss": 0.8671, "step": 1186 }, { "epoch": 1.452876376988984, "grad_norm": 0.765224846936888, "learning_rate": 4.885150012819582e-06, "loss": 0.5466, "step": 1187 }, { "epoch": 1.4541003671970625, "grad_norm": 0.7271323706539619, "learning_rate": 4.884957916873191e-06, "loss": 0.6565, "step": 1188 }, { "epoch": 1.4553243574051407, "grad_norm": 1.2275074254847234, "learning_rate": 4.884765664196187e-06, "loss": 0.4773, "step": 1189 }, { "epoch": 1.456548347613219, "grad_norm": 1.1887938606969513, "learning_rate": 4.884573254801205e-06, "loss": 0.6802, "step": 1190 }, { "epoch": 1.4577723378212974, "grad_norm": 0.7637351692957838, "learning_rate": 4.884380688700888e-06, "loss": 0.7452, "step": 1191 }, { "epoch": 1.4589963280293758, "grad_norm": 0.8663856818816139, "learning_rate": 4.8841879659078915e-06, "loss": 0.7114, "step": 1192 }, { "epoch": 1.460220318237454, "grad_norm": 0.8545206546836961, "learning_rate": 4.883995086434881e-06, "loss": 0.6757, "step": 1193 }, { "epoch": 1.4614443084455324, "grad_norm": 1.099454532209251, "learning_rate": 4.8838020502945305e-06, "loss": 1.1227, "step": 1194 }, { "epoch": 1.4626682986536108, "grad_norm": 0.9471838652399067, "learning_rate": 4.883608857499526e-06, "loss": 0.7475, "step": 1195 }, { "epoch": 1.4638922888616892, "grad_norm": 0.5817100965156184, "learning_rate": 4.883415508062565e-06, "loss": 0.5643, "step": 1196 }, { "epoch": 1.4651162790697674, "grad_norm": 0.5242312017787263, "learning_rate": 4.883222001996352e-06, "loss": 0.6021, "step": 1197 }, { "epoch": 1.4663402692778458, "grad_norm": 0.6112608879238469, "learning_rate": 4.883028339313603e-06, "loss": 0.5515, "step": 1198 }, { "epoch": 1.4675642594859242, "grad_norm": 0.746363745826184, "learning_rate": 4.8828345200270465e-06, "loss": 0.6575, "step": 1199 }, { "epoch": 1.4687882496940023, "grad_norm": 0.5441400220762983, "learning_rate": 4.8826405441494185e-06, "loss": 0.6424, "step": 1200 }, { "epoch": 1.4700122399020807, "grad_norm": 0.7418069408837273, "learning_rate": 4.8824464116934675e-06, "loss": 0.5822, "step": 1201 }, { "epoch": 1.4712362301101591, "grad_norm": 1.0771289447214616, "learning_rate": 4.882252122671949e-06, "loss": 0.6028, "step": 1202 }, { "epoch": 1.4724602203182375, "grad_norm": 0.9410223309511013, "learning_rate": 4.882057677097633e-06, "loss": 0.5943, "step": 1203 }, { "epoch": 1.4736842105263157, "grad_norm": 0.5799904346053545, "learning_rate": 4.881863074983298e-06, "loss": 0.6269, "step": 1204 }, { "epoch": 1.474908200734394, "grad_norm": 0.9798184438144371, "learning_rate": 4.881668316341731e-06, "loss": 0.9567, "step": 1205 }, { "epoch": 1.4761321909424725, "grad_norm": 1.0055329204246881, "learning_rate": 4.881473401185732e-06, "loss": 0.6094, "step": 1206 }, { "epoch": 1.477356181150551, "grad_norm": 0.9763515548928199, "learning_rate": 4.881278329528108e-06, "loss": 0.6275, "step": 1207 }, { "epoch": 1.478580171358629, "grad_norm": 1.174518265429184, "learning_rate": 4.881083101381681e-06, "loss": 0.4906, "step": 1208 }, { "epoch": 1.4798041615667075, "grad_norm": 1.1596627219040332, "learning_rate": 4.8808877167592795e-06, "loss": 0.5106, "step": 1209 }, { "epoch": 1.4810281517747859, "grad_norm": 0.851382671798185, "learning_rate": 4.880692175673743e-06, "loss": 1.221, "step": 1210 }, { "epoch": 1.482252141982864, "grad_norm": 0.3815843895462902, "learning_rate": 4.880496478137923e-06, "loss": 0.2825, "step": 1211 }, { "epoch": 1.4834761321909424, "grad_norm": 0.8071900253370294, "learning_rate": 4.880300624164679e-06, "loss": 0.7536, "step": 1212 }, { "epoch": 1.4847001223990208, "grad_norm": 0.9276840768926988, "learning_rate": 4.880104613766881e-06, "loss": 0.4521, "step": 1213 }, { "epoch": 1.4859241126070992, "grad_norm": 1.4711334313118787, "learning_rate": 4.879908446957413e-06, "loss": 1.046, "step": 1214 }, { "epoch": 1.4871481028151774, "grad_norm": 0.8187000320715278, "learning_rate": 4.879712123749164e-06, "loss": 0.5378, "step": 1215 }, { "epoch": 1.4883720930232558, "grad_norm": 1.6789672103265632, "learning_rate": 4.879515644155036e-06, "loss": 0.5873, "step": 1216 }, { "epoch": 1.4895960832313342, "grad_norm": 0.7475266800258944, "learning_rate": 4.879319008187942e-06, "loss": 1.388, "step": 1217 }, { "epoch": 1.4908200734394126, "grad_norm": 0.8530002499718742, "learning_rate": 4.879122215860802e-06, "loss": 0.5782, "step": 1218 }, { "epoch": 1.4920440636474908, "grad_norm": 0.9760552501347369, "learning_rate": 4.878925267186549e-06, "loss": 0.3886, "step": 1219 }, { "epoch": 1.4932680538555692, "grad_norm": 0.7262251767219555, "learning_rate": 4.878728162178128e-06, "loss": 0.5767, "step": 1220 }, { "epoch": 1.4944920440636476, "grad_norm": 0.8496532393192555, "learning_rate": 4.8785309008484905e-06, "loss": 1.4828, "step": 1221 }, { "epoch": 1.4957160342717257, "grad_norm": 0.818644098190518, "learning_rate": 4.878333483210599e-06, "loss": 0.5851, "step": 1222 }, { "epoch": 1.4969400244798041, "grad_norm": 0.7737479488375891, "learning_rate": 4.878135909277428e-06, "loss": 0.6536, "step": 1223 }, { "epoch": 1.4981640146878825, "grad_norm": 1.1221044816124206, "learning_rate": 4.877938179061962e-06, "loss": 0.7076, "step": 1224 }, { "epoch": 1.499388004895961, "grad_norm": 1.3181398891150558, "learning_rate": 4.877740292577194e-06, "loss": 0.4751, "step": 1225 }, { "epoch": 1.5006119951040393, "grad_norm": 1.386905200834611, "learning_rate": 4.877542249836127e-06, "loss": 0.9189, "step": 1226 }, { "epoch": 1.5018359853121175, "grad_norm": 1.3514418295926585, "learning_rate": 4.877344050851779e-06, "loss": 1.283, "step": 1227 }, { "epoch": 1.5030599755201959, "grad_norm": 0.6288279384701069, "learning_rate": 4.877145695637173e-06, "loss": 0.5059, "step": 1228 }, { "epoch": 1.5042839657282743, "grad_norm": 0.7333657087818135, "learning_rate": 4.8769471842053436e-06, "loss": 0.946, "step": 1229 }, { "epoch": 1.5055079559363524, "grad_norm": 0.5685984509039165, "learning_rate": 4.876748516569337e-06, "loss": 0.6067, "step": 1230 }, { "epoch": 1.5067319461444308, "grad_norm": 0.7669290361892817, "learning_rate": 4.876549692742209e-06, "loss": 0.5138, "step": 1231 }, { "epoch": 1.5079559363525092, "grad_norm": 1.0123100175840016, "learning_rate": 4.876350712737026e-06, "loss": 0.5887, "step": 1232 }, { "epoch": 1.5091799265605874, "grad_norm": 0.7024717228794907, "learning_rate": 4.876151576566863e-06, "loss": 0.6919, "step": 1233 }, { "epoch": 1.5104039167686658, "grad_norm": 0.9258911198930364, "learning_rate": 4.875952284244808e-06, "loss": 1.2447, "step": 1234 }, { "epoch": 1.5116279069767442, "grad_norm": 1.0980471908498237, "learning_rate": 4.875752835783957e-06, "loss": 1.4557, "step": 1235 }, { "epoch": 1.5128518971848224, "grad_norm": 0.6739579246101375, "learning_rate": 4.875553231197416e-06, "loss": 0.5888, "step": 1236 }, { "epoch": 1.514075887392901, "grad_norm": 1.5045137449631847, "learning_rate": 4.875353470498303e-06, "loss": 0.6895, "step": 1237 }, { "epoch": 1.5152998776009792, "grad_norm": 1.0304562114394713, "learning_rate": 4.8751535536997465e-06, "loss": 0.6715, "step": 1238 }, { "epoch": 1.5165238678090576, "grad_norm": 1.1798508958139775, "learning_rate": 4.8749534808148835e-06, "loss": 0.5468, "step": 1239 }, { "epoch": 1.517747858017136, "grad_norm": 1.30341616347032, "learning_rate": 4.874753251856862e-06, "loss": 0.5787, "step": 1240 }, { "epoch": 1.5189718482252141, "grad_norm": 1.0958475763129394, "learning_rate": 4.87455286683884e-06, "loss": 0.4981, "step": 1241 }, { "epoch": 1.5201958384332925, "grad_norm": 0.6226745407956138, "learning_rate": 4.8743523257739866e-06, "loss": 0.5477, "step": 1242 }, { "epoch": 1.521419828641371, "grad_norm": 1.1042885954704351, "learning_rate": 4.874151628675481e-06, "loss": 0.5586, "step": 1243 }, { "epoch": 1.522643818849449, "grad_norm": 0.6993281146757927, "learning_rate": 4.873950775556511e-06, "loss": 0.5808, "step": 1244 }, { "epoch": 1.5238678090575275, "grad_norm": 1.811794119264975, "learning_rate": 4.873749766430277e-06, "loss": 0.5287, "step": 1245 }, { "epoch": 1.525091799265606, "grad_norm": 0.519143181601484, "learning_rate": 4.873548601309988e-06, "loss": 0.5732, "step": 1246 }, { "epoch": 1.526315789473684, "grad_norm": 0.5053337785484447, "learning_rate": 4.873347280208866e-06, "loss": 0.4506, "step": 1247 }, { "epoch": 1.5275397796817627, "grad_norm": 1.1466487211396454, "learning_rate": 4.873145803140137e-06, "loss": 0.9317, "step": 1248 }, { "epoch": 1.5287637698898409, "grad_norm": 1.3975054358871863, "learning_rate": 4.872944170117044e-06, "loss": 0.6666, "step": 1249 }, { "epoch": 1.5299877600979193, "grad_norm": 0.9055931967858868, "learning_rate": 4.872742381152837e-06, "loss": 1.4555, "step": 1250 }, { "epoch": 1.5312117503059977, "grad_norm": 1.2105732945510717, "learning_rate": 4.872540436260776e-06, "loss": 0.7988, "step": 1251 }, { "epoch": 1.5324357405140758, "grad_norm": 0.6746884816320633, "learning_rate": 4.872338335454134e-06, "loss": 0.6277, "step": 1252 }, { "epoch": 1.5336597307221542, "grad_norm": 1.1093992332542735, "learning_rate": 4.872136078746191e-06, "loss": 1.2359, "step": 1253 }, { "epoch": 1.5348837209302326, "grad_norm": 0.8547103999645286, "learning_rate": 4.871933666150239e-06, "loss": 1.305, "step": 1254 }, { "epoch": 1.5361077111383108, "grad_norm": 0.7343524561546985, "learning_rate": 4.87173109767958e-06, "loss": 1.1699, "step": 1255 }, { "epoch": 1.5373317013463892, "grad_norm": 0.8962267139467814, "learning_rate": 4.871528373347525e-06, "loss": 1.2598, "step": 1256 }, { "epoch": 1.5385556915544676, "grad_norm": 1.3428711439563348, "learning_rate": 4.871325493167398e-06, "loss": 0.6084, "step": 1257 }, { "epoch": 1.5397796817625458, "grad_norm": 0.6398627843784108, "learning_rate": 4.87112245715253e-06, "loss": 0.6951, "step": 1258 }, { "epoch": 1.5410036719706244, "grad_norm": 0.7893622728968009, "learning_rate": 4.870919265316264e-06, "loss": 0.8219, "step": 1259 }, { "epoch": 1.5422276621787026, "grad_norm": 0.7824666346718955, "learning_rate": 4.870715917671953e-06, "loss": 1.1929, "step": 1260 }, { "epoch": 1.543451652386781, "grad_norm": 0.9235517577337293, "learning_rate": 4.870512414232961e-06, "loss": 0.6401, "step": 1261 }, { "epoch": 1.5446756425948593, "grad_norm": 0.8740233896553848, "learning_rate": 4.870308755012661e-06, "loss": 1.0384, "step": 1262 }, { "epoch": 1.5458996328029375, "grad_norm": 1.1131375093209825, "learning_rate": 4.870104940024437e-06, "loss": 1.2314, "step": 1263 }, { "epoch": 1.547123623011016, "grad_norm": 0.9155993202320094, "learning_rate": 4.869900969281683e-06, "loss": 0.5068, "step": 1264 }, { "epoch": 1.5483476132190943, "grad_norm": 0.7877223716528418, "learning_rate": 4.869696842797802e-06, "loss": 0.8321, "step": 1265 }, { "epoch": 1.5495716034271725, "grad_norm": 1.0729437717819634, "learning_rate": 4.86949256058621e-06, "loss": 0.9288, "step": 1266 }, { "epoch": 1.5507955936352509, "grad_norm": 0.8140952636405555, "learning_rate": 4.869288122660331e-06, "loss": 0.5267, "step": 1267 }, { "epoch": 1.5520195838433293, "grad_norm": 0.6779073107237319, "learning_rate": 4.869083529033599e-06, "loss": 0.8976, "step": 1268 }, { "epoch": 1.5532435740514074, "grad_norm": 1.0657664771955375, "learning_rate": 4.868878779719462e-06, "loss": 1.1118, "step": 1269 }, { "epoch": 1.554467564259486, "grad_norm": 0.9703439580190897, "learning_rate": 4.868673874731372e-06, "loss": 1.26, "step": 1270 }, { "epoch": 1.5556915544675642, "grad_norm": 0.7240747517427476, "learning_rate": 4.868468814082796e-06, "loss": 0.399, "step": 1271 }, { "epoch": 1.5569155446756426, "grad_norm": 0.8279338496867802, "learning_rate": 4.868263597787211e-06, "loss": 1.0394, "step": 1272 }, { "epoch": 1.558139534883721, "grad_norm": 0.697447343309313, "learning_rate": 4.8680582258581016e-06, "loss": 1.0164, "step": 1273 }, { "epoch": 1.5593635250917992, "grad_norm": 0.6791974737894272, "learning_rate": 4.867852698308964e-06, "loss": 0.9366, "step": 1274 }, { "epoch": 1.5605875152998776, "grad_norm": 1.1429456597771954, "learning_rate": 4.867647015153306e-06, "loss": 0.6507, "step": 1275 }, { "epoch": 1.561811505507956, "grad_norm": 0.8627963461149404, "learning_rate": 4.8674411764046425e-06, "loss": 0.701, "step": 1276 }, { "epoch": 1.5630354957160342, "grad_norm": 0.9307743538200832, "learning_rate": 4.867235182076502e-06, "loss": 0.8259, "step": 1277 }, { "epoch": 1.5642594859241126, "grad_norm": 0.49952956018201805, "learning_rate": 4.867029032182421e-06, "loss": 0.5092, "step": 1278 }, { "epoch": 1.565483476132191, "grad_norm": 0.9086549916176242, "learning_rate": 4.866822726735947e-06, "loss": 0.5722, "step": 1279 }, { "epoch": 1.5667074663402691, "grad_norm": 0.8128093759307843, "learning_rate": 4.866616265750637e-06, "loss": 1.3979, "step": 1280 }, { "epoch": 1.5679314565483478, "grad_norm": 1.2887211920427077, "learning_rate": 4.8664096492400604e-06, "loss": 0.6293, "step": 1281 }, { "epoch": 1.569155446756426, "grad_norm": 0.7472808379577176, "learning_rate": 4.8662028772177945e-06, "loss": 0.7293, "step": 1282 }, { "epoch": 1.5703794369645043, "grad_norm": 1.2209972349362828, "learning_rate": 4.8659959496974276e-06, "loss": 0.6568, "step": 1283 }, { "epoch": 1.5716034271725827, "grad_norm": 0.8050873513630448, "learning_rate": 4.865788866692557e-06, "loss": 0.5319, "step": 1284 }, { "epoch": 1.572827417380661, "grad_norm": 1.080940081244901, "learning_rate": 4.865581628216793e-06, "loss": 0.5959, "step": 1285 }, { "epoch": 1.5740514075887393, "grad_norm": 1.2188637016420278, "learning_rate": 4.865374234283755e-06, "loss": 1.058, "step": 1286 }, { "epoch": 1.5752753977968177, "grad_norm": 0.5802900643246232, "learning_rate": 4.86516668490707e-06, "loss": 0.4426, "step": 1287 }, { "epoch": 1.5764993880048959, "grad_norm": 0.67331000299375, "learning_rate": 4.864958980100379e-06, "loss": 0.4751, "step": 1288 }, { "epoch": 1.5777233782129743, "grad_norm": 0.620216303123961, "learning_rate": 4.864751119877332e-06, "loss": 0.6445, "step": 1289 }, { "epoch": 1.5789473684210527, "grad_norm": 0.5603524188150197, "learning_rate": 4.864543104251587e-06, "loss": 0.5166, "step": 1290 }, { "epoch": 1.5801713586291308, "grad_norm": 0.9628117918710384, "learning_rate": 4.864334933236816e-06, "loss": 0.669, "step": 1291 }, { "epoch": 1.5813953488372094, "grad_norm": 0.7747258513436537, "learning_rate": 4.864126606846697e-06, "loss": 0.6797, "step": 1292 }, { "epoch": 1.5826193390452876, "grad_norm": 1.0510986552259636, "learning_rate": 4.8639181250949225e-06, "loss": 1.0228, "step": 1293 }, { "epoch": 1.583843329253366, "grad_norm": 1.1323773196729516, "learning_rate": 4.863709487995192e-06, "loss": 1.399, "step": 1294 }, { "epoch": 1.5850673194614444, "grad_norm": 1.054771893563801, "learning_rate": 4.8635006955612175e-06, "loss": 0.6225, "step": 1295 }, { "epoch": 1.5862913096695226, "grad_norm": 1.289721539358316, "learning_rate": 4.863291747806718e-06, "loss": 0.5786, "step": 1296 }, { "epoch": 1.587515299877601, "grad_norm": 1.066661357290134, "learning_rate": 4.863082644745427e-06, "loss": 0.5561, "step": 1297 }, { "epoch": 1.5887392900856794, "grad_norm": 0.8861876581718572, "learning_rate": 4.862873386391085e-06, "loss": 0.5817, "step": 1298 }, { "epoch": 1.5899632802937576, "grad_norm": 1.2134201505155071, "learning_rate": 4.862663972757443e-06, "loss": 0.5431, "step": 1299 }, { "epoch": 1.591187270501836, "grad_norm": 0.6824973037816972, "learning_rate": 4.862454403858264e-06, "loss": 0.5988, "step": 1300 }, { "epoch": 1.5924112607099143, "grad_norm": 0.9118784821892911, "learning_rate": 4.86224467970732e-06, "loss": 0.9899, "step": 1301 }, { "epoch": 1.5936352509179925, "grad_norm": 0.8463844548075125, "learning_rate": 4.862034800318392e-06, "loss": 1.147, "step": 1302 }, { "epoch": 1.5948592411260711, "grad_norm": 0.8838515084760562, "learning_rate": 4.861824765705275e-06, "loss": 0.5707, "step": 1303 }, { "epoch": 1.5960832313341493, "grad_norm": 0.5586838901577673, "learning_rate": 4.861614575881769e-06, "loss": 0.509, "step": 1304 }, { "epoch": 1.5973072215422277, "grad_norm": 0.7278865696787127, "learning_rate": 4.861404230861688e-06, "loss": 0.5505, "step": 1305 }, { "epoch": 1.598531211750306, "grad_norm": 1.209919730231102, "learning_rate": 4.861193730658856e-06, "loss": 0.4917, "step": 1306 }, { "epoch": 1.5997552019583843, "grad_norm": 1.0902870395851068, "learning_rate": 4.8609830752871055e-06, "loss": 0.527, "step": 1307 }, { "epoch": 1.6009791921664627, "grad_norm": 0.5400794698971968, "learning_rate": 4.860772264760279e-06, "loss": 0.5704, "step": 1308 }, { "epoch": 1.602203182374541, "grad_norm": 0.9534456903405749, "learning_rate": 4.8605612990922326e-06, "loss": 1.5033, "step": 1309 }, { "epoch": 1.6034271725826192, "grad_norm": 1.6361443891689702, "learning_rate": 4.860350178296828e-06, "loss": 0.482, "step": 1310 }, { "epoch": 1.6046511627906976, "grad_norm": 1.2937913155461236, "learning_rate": 4.8601389023879395e-06, "loss": 0.4973, "step": 1311 }, { "epoch": 1.605875152998776, "grad_norm": 1.225940152786754, "learning_rate": 4.859927471379452e-06, "loss": 0.5878, "step": 1312 }, { "epoch": 1.6070991432068542, "grad_norm": 0.8729862588247709, "learning_rate": 4.85971588528526e-06, "loss": 0.6194, "step": 1313 }, { "epoch": 1.6083231334149328, "grad_norm": 0.9912373185567547, "learning_rate": 4.859504144119268e-06, "loss": 0.4919, "step": 1314 }, { "epoch": 1.609547123623011, "grad_norm": 0.49511696212569967, "learning_rate": 4.8592922478953915e-06, "loss": 0.4917, "step": 1315 }, { "epoch": 1.6107711138310894, "grad_norm": 0.5970535155490498, "learning_rate": 4.859080196627554e-06, "loss": 0.5607, "step": 1316 }, { "epoch": 1.6119951040391678, "grad_norm": 1.0702180327365347, "learning_rate": 4.858867990329692e-06, "loss": 0.7249, "step": 1317 }, { "epoch": 1.613219094247246, "grad_norm": 1.2623727071558004, "learning_rate": 4.85865562901575e-06, "loss": 0.6018, "step": 1318 }, { "epoch": 1.6144430844553244, "grad_norm": 0.8862716295358708, "learning_rate": 4.858443112699685e-06, "loss": 0.7971, "step": 1319 }, { "epoch": 1.6156670746634028, "grad_norm": 0.6785540271818395, "learning_rate": 4.858230441395462e-06, "loss": 0.4315, "step": 1320 }, { "epoch": 1.616891064871481, "grad_norm": 1.0348099374879327, "learning_rate": 4.8580176151170565e-06, "loss": 1.5543, "step": 1321 }, { "epoch": 1.6181150550795593, "grad_norm": 0.859196625423065, "learning_rate": 4.857804633878455e-06, "loss": 0.9792, "step": 1322 }, { "epoch": 1.6193390452876377, "grad_norm": 0.7252994247653778, "learning_rate": 4.857591497693654e-06, "loss": 0.5629, "step": 1323 }, { "epoch": 1.620563035495716, "grad_norm": 1.2377112659141427, "learning_rate": 4.857378206576659e-06, "loss": 0.6221, "step": 1324 }, { "epoch": 1.6217870257037945, "grad_norm": 1.2621367558751118, "learning_rate": 4.857164760541489e-06, "loss": 0.7073, "step": 1325 }, { "epoch": 1.6230110159118727, "grad_norm": 1.081962728710649, "learning_rate": 4.856951159602168e-06, "loss": 1.0375, "step": 1326 }, { "epoch": 1.624235006119951, "grad_norm": 1.3287630685401857, "learning_rate": 4.856737403772735e-06, "loss": 0.6238, "step": 1327 }, { "epoch": 1.6254589963280295, "grad_norm": 0.5749454756818798, "learning_rate": 4.856523493067238e-06, "loss": 0.5984, "step": 1328 }, { "epoch": 1.6266829865361077, "grad_norm": 0.914600103989306, "learning_rate": 4.8563094274997325e-06, "loss": 0.5569, "step": 1329 }, { "epoch": 1.627906976744186, "grad_norm": 0.9279012652409752, "learning_rate": 4.856095207084287e-06, "loss": 0.8186, "step": 1330 }, { "epoch": 1.6291309669522644, "grad_norm": 0.7431589824602952, "learning_rate": 4.855880831834978e-06, "loss": 0.5676, "step": 1331 }, { "epoch": 1.6303549571603426, "grad_norm": 1.209577241666578, "learning_rate": 4.855666301765895e-06, "loss": 0.5479, "step": 1332 }, { "epoch": 1.631578947368421, "grad_norm": 1.5426648423845288, "learning_rate": 4.8554516168911364e-06, "loss": 1.0857, "step": 1333 }, { "epoch": 1.6328029375764994, "grad_norm": 1.1627382551047727, "learning_rate": 4.85523677722481e-06, "loss": 0.6013, "step": 1334 }, { "epoch": 1.6340269277845776, "grad_norm": 0.9986436889437172, "learning_rate": 4.855021782781033e-06, "loss": 0.6065, "step": 1335 }, { "epoch": 1.6352509179926562, "grad_norm": 0.7767789280488503, "learning_rate": 4.8548066335739355e-06, "loss": 0.9064, "step": 1336 }, { "epoch": 1.6364749082007344, "grad_norm": 0.8492255677426758, "learning_rate": 4.854591329617655e-06, "loss": 1.3151, "step": 1337 }, { "epoch": 1.6376988984088128, "grad_norm": 0.989116711650738, "learning_rate": 4.854375870926342e-06, "loss": 1.6304, "step": 1338 }, { "epoch": 1.6389228886168912, "grad_norm": 0.9441605227066312, "learning_rate": 4.854160257514155e-06, "loss": 0.4936, "step": 1339 }, { "epoch": 1.6401468788249693, "grad_norm": 0.7577852722313807, "learning_rate": 4.853944489395264e-06, "loss": 1.1823, "step": 1340 }, { "epoch": 1.6413708690330477, "grad_norm": 1.0699013530541237, "learning_rate": 4.853728566583847e-06, "loss": 1.3895, "step": 1341 }, { "epoch": 1.6425948592411261, "grad_norm": 1.0177459759367598, "learning_rate": 4.8535124890940944e-06, "loss": 0.6413, "step": 1342 }, { "epoch": 1.6438188494492043, "grad_norm": 0.7484579873313579, "learning_rate": 4.853296256940206e-06, "loss": 0.7953, "step": 1343 }, { "epoch": 1.6450428396572827, "grad_norm": 0.7065984335070344, "learning_rate": 4.853079870136392e-06, "loss": 0.7967, "step": 1344 }, { "epoch": 1.646266829865361, "grad_norm": 0.6754733251334394, "learning_rate": 4.852863328696873e-06, "loss": 0.4691, "step": 1345 }, { "epoch": 1.6474908200734393, "grad_norm": 0.7535483018816074, "learning_rate": 4.852646632635878e-06, "loss": 0.6419, "step": 1346 }, { "epoch": 1.648714810281518, "grad_norm": 0.9335048575988064, "learning_rate": 4.8524297819676494e-06, "loss": 0.463, "step": 1347 }, { "epoch": 1.649938800489596, "grad_norm": 0.4788823849364188, "learning_rate": 4.8522127767064344e-06, "loss": 0.4536, "step": 1348 }, { "epoch": 1.6511627906976745, "grad_norm": 0.6990374625548642, "learning_rate": 4.851995616866497e-06, "loss": 0.508, "step": 1349 }, { "epoch": 1.6523867809057529, "grad_norm": 0.9700323317300212, "learning_rate": 4.851778302462108e-06, "loss": 0.5853, "step": 1350 }, { "epoch": 1.653610771113831, "grad_norm": 0.7357349048416832, "learning_rate": 4.851560833507546e-06, "loss": 0.6819, "step": 1351 }, { "epoch": 1.6548347613219094, "grad_norm": 0.6174928789709433, "learning_rate": 4.8513432100171044e-06, "loss": 0.7344, "step": 1352 }, { "epoch": 1.6560587515299878, "grad_norm": 0.41941156105343674, "learning_rate": 4.851125432005085e-06, "loss": 0.3967, "step": 1353 }, { "epoch": 1.657282741738066, "grad_norm": 1.10855360030447, "learning_rate": 4.850907499485798e-06, "loss": 1.1499, "step": 1354 }, { "epoch": 1.6585067319461444, "grad_norm": 0.8030494616017138, "learning_rate": 4.850689412473565e-06, "loss": 0.6579, "step": 1355 }, { "epoch": 1.6597307221542228, "grad_norm": 0.6652803920591223, "learning_rate": 4.850471170982718e-06, "loss": 0.6075, "step": 1356 }, { "epoch": 1.660954712362301, "grad_norm": 0.8485450368081902, "learning_rate": 4.8502527750276e-06, "loss": 0.5829, "step": 1357 }, { "epoch": 1.6621787025703796, "grad_norm": 0.8486727341664393, "learning_rate": 4.850034224622563e-06, "loss": 0.5789, "step": 1358 }, { "epoch": 1.6634026927784578, "grad_norm": 1.2916547002119936, "learning_rate": 4.849815519781969e-06, "loss": 0.7959, "step": 1359 }, { "epoch": 1.6646266829865362, "grad_norm": 1.3670052703137243, "learning_rate": 4.8495966605201895e-06, "loss": 0.4788, "step": 1360 }, { "epoch": 1.6658506731946146, "grad_norm": 0.8459446527470325, "learning_rate": 4.849377646851609e-06, "loss": 0.9401, "step": 1361 }, { "epoch": 1.6670746634026927, "grad_norm": 0.8850094395833262, "learning_rate": 4.849158478790618e-06, "loss": 0.9929, "step": 1362 }, { "epoch": 1.6682986536107711, "grad_norm": 0.82629077446571, "learning_rate": 4.848939156351622e-06, "loss": 0.5203, "step": 1363 }, { "epoch": 1.6695226438188495, "grad_norm": 0.984167554488913, "learning_rate": 4.848719679549032e-06, "loss": 0.7205, "step": 1364 }, { "epoch": 1.6707466340269277, "grad_norm": 1.2906276270549286, "learning_rate": 4.8485000483972724e-06, "loss": 0.4931, "step": 1365 }, { "epoch": 1.671970624235006, "grad_norm": 0.8568283075229575, "learning_rate": 4.848280262910776e-06, "loss": 0.5456, "step": 1366 }, { "epoch": 1.6731946144430845, "grad_norm": 1.0378493466396788, "learning_rate": 4.8480603231039855e-06, "loss": 1.0458, "step": 1367 }, { "epoch": 1.6744186046511627, "grad_norm": 1.5545427442070008, "learning_rate": 4.8478402289913566e-06, "loss": 0.5829, "step": 1368 }, { "epoch": 1.6756425948592413, "grad_norm": 1.2146483118697817, "learning_rate": 4.847619980587351e-06, "loss": 0.5282, "step": 1369 }, { "epoch": 1.6768665850673194, "grad_norm": 1.3057177265577697, "learning_rate": 4.847399577906445e-06, "loss": 0.5385, "step": 1370 }, { "epoch": 1.6780905752753978, "grad_norm": 1.3010363198457944, "learning_rate": 4.84717902096312e-06, "loss": 1.0415, "step": 1371 }, { "epoch": 1.6793145654834762, "grad_norm": 1.097423872542746, "learning_rate": 4.846958309771872e-06, "loss": 0.5982, "step": 1372 }, { "epoch": 1.6805385556915544, "grad_norm": 0.9190234955374704, "learning_rate": 4.846737444347204e-06, "loss": 0.8408, "step": 1373 }, { "epoch": 1.6817625458996328, "grad_norm": 0.9539080579963868, "learning_rate": 4.8465164247036315e-06, "loss": 0.6833, "step": 1374 }, { "epoch": 1.6829865361077112, "grad_norm": 1.0189941618103304, "learning_rate": 4.8462952508556795e-06, "loss": 0.5425, "step": 1375 }, { "epoch": 1.6842105263157894, "grad_norm": 0.9546078906645562, "learning_rate": 4.846073922817881e-06, "loss": 0.7074, "step": 1376 }, { "epoch": 1.6854345165238678, "grad_norm": 1.318250133445118, "learning_rate": 4.8458524406047814e-06, "loss": 0.59, "step": 1377 }, { "epoch": 1.6866585067319462, "grad_norm": 0.8396172369658257, "learning_rate": 4.845630804230937e-06, "loss": 0.3801, "step": 1378 }, { "epoch": 1.6878824969400243, "grad_norm": 0.5053025695981047, "learning_rate": 4.845409013710912e-06, "loss": 0.4293, "step": 1379 }, { "epoch": 1.689106487148103, "grad_norm": 1.107202519002441, "learning_rate": 4.8451870690592815e-06, "loss": 0.5315, "step": 1380 }, { "epoch": 1.6903304773561811, "grad_norm": 0.8321144207482137, "learning_rate": 4.8449649702906305e-06, "loss": 1.1851, "step": 1381 }, { "epoch": 1.6915544675642595, "grad_norm": 0.5432489801135765, "learning_rate": 4.844742717419555e-06, "loss": 0.4244, "step": 1382 }, { "epoch": 1.692778457772338, "grad_norm": 0.7461345531536079, "learning_rate": 4.8445203104606616e-06, "loss": 0.4797, "step": 1383 }, { "epoch": 1.694002447980416, "grad_norm": 1.0772964135869134, "learning_rate": 4.8442977494285645e-06, "loss": 1.4066, "step": 1384 }, { "epoch": 1.6952264381884945, "grad_norm": 0.8885310766162761, "learning_rate": 4.844075034337891e-06, "loss": 1.6176, "step": 1385 }, { "epoch": 1.696450428396573, "grad_norm": 0.8881540751736681, "learning_rate": 4.843852165203276e-06, "loss": 1.2524, "step": 1386 }, { "epoch": 1.697674418604651, "grad_norm": 1.1335553658619664, "learning_rate": 4.843629142039366e-06, "loss": 0.6461, "step": 1387 }, { "epoch": 1.6988984088127295, "grad_norm": 1.1289339218566845, "learning_rate": 4.843405964860818e-06, "loss": 0.5618, "step": 1388 }, { "epoch": 1.7001223990208079, "grad_norm": 0.8939417067555648, "learning_rate": 4.8431826336822965e-06, "loss": 1.3365, "step": 1389 }, { "epoch": 1.701346389228886, "grad_norm": 0.9214500333377122, "learning_rate": 4.8429591485184795e-06, "loss": 0.6453, "step": 1390 }, { "epoch": 1.7025703794369647, "grad_norm": 0.8788926841480005, "learning_rate": 4.842735509384053e-06, "loss": 0.7287, "step": 1391 }, { "epoch": 1.7037943696450428, "grad_norm": 0.9457172859574038, "learning_rate": 4.842511716293715e-06, "loss": 0.6348, "step": 1392 }, { "epoch": 1.7050183598531212, "grad_norm": 1.1734931553643246, "learning_rate": 4.8422877692621705e-06, "loss": 0.53, "step": 1393 }, { "epoch": 1.7062423500611996, "grad_norm": 1.3285801745847947, "learning_rate": 4.842063668304138e-06, "loss": 0.501, "step": 1394 }, { "epoch": 1.7074663402692778, "grad_norm": 0.7199472703807621, "learning_rate": 4.841839413434343e-06, "loss": 0.6997, "step": 1395 }, { "epoch": 1.7086903304773562, "grad_norm": 1.83276289893966, "learning_rate": 4.841615004667524e-06, "loss": 0.5097, "step": 1396 }, { "epoch": 1.7099143206854346, "grad_norm": 0.7052925368181556, "learning_rate": 4.841390442018428e-06, "loss": 0.8261, "step": 1397 }, { "epoch": 1.7111383108935128, "grad_norm": 0.7598471345316804, "learning_rate": 4.841165725501812e-06, "loss": 0.7082, "step": 1398 }, { "epoch": 1.7123623011015912, "grad_norm": 0.7840692605783667, "learning_rate": 4.840940855132445e-06, "loss": 0.5775, "step": 1399 }, { "epoch": 1.7135862913096696, "grad_norm": 0.57584935461918, "learning_rate": 4.840715830925102e-06, "loss": 0.5684, "step": 1400 }, { "epoch": 1.7148102815177477, "grad_norm": 0.6617358250441628, "learning_rate": 4.8404906528945736e-06, "loss": 0.4991, "step": 1401 }, { "epoch": 1.7160342717258263, "grad_norm": 1.1445618470947627, "learning_rate": 4.840265321055656e-06, "loss": 0.5051, "step": 1402 }, { "epoch": 1.7172582619339045, "grad_norm": 1.538271508159586, "learning_rate": 4.840039835423157e-06, "loss": 0.654, "step": 1403 }, { "epoch": 1.718482252141983, "grad_norm": 0.7134922011799449, "learning_rate": 4.839814196011896e-06, "loss": 1.2648, "step": 1404 }, { "epoch": 1.7197062423500613, "grad_norm": 0.969705780432247, "learning_rate": 4.8395884028366994e-06, "loss": 0.539, "step": 1405 }, { "epoch": 1.7209302325581395, "grad_norm": 0.7386756837251552, "learning_rate": 4.8393624559124074e-06, "loss": 0.5594, "step": 1406 }, { "epoch": 1.7221542227662179, "grad_norm": 0.6629138978784509, "learning_rate": 4.8391363552538674e-06, "loss": 0.5322, "step": 1407 }, { "epoch": 1.7233782129742963, "grad_norm": 1.372697965929999, "learning_rate": 4.838910100875937e-06, "loss": 0.6041, "step": 1408 }, { "epoch": 1.7246022031823744, "grad_norm": 1.1665020399550567, "learning_rate": 4.838683692793487e-06, "loss": 0.5951, "step": 1409 }, { "epoch": 1.7258261933904528, "grad_norm": 1.00397553516941, "learning_rate": 4.838457131021394e-06, "loss": 0.7124, "step": 1410 }, { "epoch": 1.7270501835985312, "grad_norm": 1.0035554133006386, "learning_rate": 4.838230415574549e-06, "loss": 0.6264, "step": 1411 }, { "epoch": 1.7282741738066094, "grad_norm": 0.8629602372173336, "learning_rate": 4.8380035464678485e-06, "loss": 0.6809, "step": 1412 }, { "epoch": 1.729498164014688, "grad_norm": 1.1364577666201288, "learning_rate": 4.837776523716203e-06, "loss": 0.6161, "step": 1413 }, { "epoch": 1.7307221542227662, "grad_norm": 1.1951282054629713, "learning_rate": 4.837549347334532e-06, "loss": 0.6011, "step": 1414 }, { "epoch": 1.7319461444308446, "grad_norm": 0.7444852547295959, "learning_rate": 4.837322017337762e-06, "loss": 0.6868, "step": 1415 }, { "epoch": 1.733170134638923, "grad_norm": 1.2031672218003941, "learning_rate": 4.8370945337408356e-06, "loss": 1.1329, "step": 1416 }, { "epoch": 1.7343941248470012, "grad_norm": 0.5380183744441042, "learning_rate": 4.836866896558701e-06, "loss": 0.3613, "step": 1417 }, { "epoch": 1.7356181150550796, "grad_norm": 1.1858491090616687, "learning_rate": 4.836639105806316e-06, "loss": 0.7785, "step": 1418 }, { "epoch": 1.736842105263158, "grad_norm": 0.9596609725048824, "learning_rate": 4.836411161498653e-06, "loss": 0.5446, "step": 1419 }, { "epoch": 1.7380660954712361, "grad_norm": 1.812871766843271, "learning_rate": 4.836183063650689e-06, "loss": 0.4096, "step": 1420 }, { "epoch": 1.7392900856793145, "grad_norm": 1.3994264445112043, "learning_rate": 4.835954812277416e-06, "loss": 0.6902, "step": 1421 }, { "epoch": 1.740514075887393, "grad_norm": 1.6970485240903936, "learning_rate": 4.835726407393833e-06, "loss": 0.4778, "step": 1422 }, { "epoch": 1.741738066095471, "grad_norm": 1.3219431743593213, "learning_rate": 4.835497849014948e-06, "loss": 0.5155, "step": 1423 }, { "epoch": 1.7429620563035497, "grad_norm": 0.6777142142697299, "learning_rate": 4.835269137155785e-06, "loss": 0.5073, "step": 1424 }, { "epoch": 1.744186046511628, "grad_norm": 0.9467826782412418, "learning_rate": 4.835040271831371e-06, "loss": 1.704, "step": 1425 }, { "epoch": 1.7454100367197063, "grad_norm": 0.8935283934972048, "learning_rate": 4.834811253056746e-06, "loss": 1.4113, "step": 1426 }, { "epoch": 1.7466340269277847, "grad_norm": 0.8144381789045311, "learning_rate": 4.834582080846962e-06, "loss": 0.5345, "step": 1427 }, { "epoch": 1.7478580171358629, "grad_norm": 0.5978291395144247, "learning_rate": 4.834352755217079e-06, "loss": 0.5682, "step": 1428 }, { "epoch": 1.7490820073439413, "grad_norm": 1.0298927491557819, "learning_rate": 4.834123276182167e-06, "loss": 0.4752, "step": 1429 }, { "epoch": 1.7503059975520197, "grad_norm": 1.0757422131080239, "learning_rate": 4.833893643757307e-06, "loss": 1.0708, "step": 1430 }, { "epoch": 1.7515299877600978, "grad_norm": 1.140193493363014, "learning_rate": 4.833663857957589e-06, "loss": 0.6584, "step": 1431 }, { "epoch": 1.7527539779681762, "grad_norm": 1.0498651202795317, "learning_rate": 4.833433918798114e-06, "loss": 0.8691, "step": 1432 }, { "epoch": 1.7539779681762546, "grad_norm": 0.29051884009607687, "learning_rate": 4.8332038262939914e-06, "loss": 0.1424, "step": 1433 }, { "epoch": 1.7552019583843328, "grad_norm": 1.003747979684293, "learning_rate": 4.832973580460345e-06, "loss": 1.6384, "step": 1434 }, { "epoch": 1.7564259485924114, "grad_norm": 1.0275523068284698, "learning_rate": 4.832743181312303e-06, "loss": 0.8051, "step": 1435 }, { "epoch": 1.7576499388004896, "grad_norm": 0.8802168865799148, "learning_rate": 4.832512628865007e-06, "loss": 0.5273, "step": 1436 }, { "epoch": 1.758873929008568, "grad_norm": 1.1090781196950992, "learning_rate": 4.832281923133609e-06, "loss": 0.8023, "step": 1437 }, { "epoch": 1.7600979192166464, "grad_norm": 0.7877038327645125, "learning_rate": 4.83205106413327e-06, "loss": 0.6419, "step": 1438 }, { "epoch": 1.7613219094247246, "grad_norm": 0.9633117173854182, "learning_rate": 4.83182005187916e-06, "loss": 0.7026, "step": 1439 }, { "epoch": 1.762545899632803, "grad_norm": 0.9315482359424593, "learning_rate": 4.8315888863864615e-06, "loss": 0.4907, "step": 1440 }, { "epoch": 1.7637698898408813, "grad_norm": 1.266559758771324, "learning_rate": 4.831357567670365e-06, "loss": 0.3877, "step": 1441 }, { "epoch": 1.7649938800489595, "grad_norm": 1.0711176323070952, "learning_rate": 4.831126095746072e-06, "loss": 0.5213, "step": 1442 }, { "epoch": 1.766217870257038, "grad_norm": 0.560452411556726, "learning_rate": 4.830894470628794e-06, "loss": 0.5893, "step": 1443 }, { "epoch": 1.7674418604651163, "grad_norm": 0.9624596295314435, "learning_rate": 4.830662692333754e-06, "loss": 0.6874, "step": 1444 }, { "epoch": 1.7686658506731945, "grad_norm": 1.1601756221193327, "learning_rate": 4.830430760876181e-06, "loss": 0.7026, "step": 1445 }, { "epoch": 1.769889840881273, "grad_norm": 0.7653526766814143, "learning_rate": 4.830198676271319e-06, "loss": 1.3045, "step": 1446 }, { "epoch": 1.7711138310893513, "grad_norm": 0.836608977438489, "learning_rate": 4.8299664385344184e-06, "loss": 1.0041, "step": 1447 }, { "epoch": 1.7723378212974297, "grad_norm": 0.7163530598483613, "learning_rate": 4.829734047680742e-06, "loss": 0.7731, "step": 1448 }, { "epoch": 1.773561811505508, "grad_norm": 0.7535189244824811, "learning_rate": 4.82950150372556e-06, "loss": 0.7461, "step": 1449 }, { "epoch": 1.7747858017135862, "grad_norm": 0.7676280899418488, "learning_rate": 4.829268806684156e-06, "loss": 0.8682, "step": 1450 }, { "epoch": 1.7760097919216646, "grad_norm": 0.5607757510867017, "learning_rate": 4.829035956571821e-06, "loss": 0.7716, "step": 1451 }, { "epoch": 1.777233782129743, "grad_norm": 0.5288249183296992, "learning_rate": 4.828802953403858e-06, "loss": 0.6514, "step": 1452 }, { "epoch": 1.7784577723378212, "grad_norm": 0.8543669651055841, "learning_rate": 4.8285697971955785e-06, "loss": 0.7092, "step": 1453 }, { "epoch": 1.7796817625458996, "grad_norm": 1.236002560873317, "learning_rate": 4.828336487962305e-06, "loss": 1.2197, "step": 1454 }, { "epoch": 1.780905752753978, "grad_norm": 0.7611961135179246, "learning_rate": 4.828103025719368e-06, "loss": 0.4352, "step": 1455 }, { "epoch": 1.7821297429620562, "grad_norm": 0.585270506211237, "learning_rate": 4.827869410482113e-06, "loss": 0.6359, "step": 1456 }, { "epoch": 1.7833537331701348, "grad_norm": 0.5691500029885712, "learning_rate": 4.82763564226589e-06, "loss": 0.6929, "step": 1457 }, { "epoch": 1.784577723378213, "grad_norm": 1.2692134179564167, "learning_rate": 4.8274017210860614e-06, "loss": 0.5831, "step": 1458 }, { "epoch": 1.7858017135862914, "grad_norm": 1.1968943413430078, "learning_rate": 4.827167646958e-06, "loss": 1.1208, "step": 1459 }, { "epoch": 1.7870257037943698, "grad_norm": 0.9007292695455923, "learning_rate": 4.826933419897089e-06, "loss": 0.5554, "step": 1460 }, { "epoch": 1.788249694002448, "grad_norm": 1.315736156666934, "learning_rate": 4.826699039918721e-06, "loss": 0.5642, "step": 1461 }, { "epoch": 1.7894736842105263, "grad_norm": 0.6890804498886349, "learning_rate": 4.8264645070382964e-06, "loss": 0.6224, "step": 1462 }, { "epoch": 1.7906976744186047, "grad_norm": 0.893253330229824, "learning_rate": 4.82622982127123e-06, "loss": 0.7738, "step": 1463 }, { "epoch": 1.791921664626683, "grad_norm": 0.7496760166368227, "learning_rate": 4.825994982632944e-06, "loss": 0.6149, "step": 1464 }, { "epoch": 1.7931456548347613, "grad_norm": 1.1939569135824306, "learning_rate": 4.825759991138872e-06, "loss": 1.2779, "step": 1465 }, { "epoch": 1.7943696450428397, "grad_norm": 1.2956294888148754, "learning_rate": 4.825524846804455e-06, "loss": 1.1265, "step": 1466 }, { "epoch": 1.7955936352509179, "grad_norm": 0.7180266129586983, "learning_rate": 4.8252895496451455e-06, "loss": 0.7777, "step": 1467 }, { "epoch": 1.7968176254589965, "grad_norm": 0.7136187038557338, "learning_rate": 4.8250540996764086e-06, "loss": 0.6305, "step": 1468 }, { "epoch": 1.7980416156670747, "grad_norm": 1.0062599563324495, "learning_rate": 4.824818496913716e-06, "loss": 1.904, "step": 1469 }, { "epoch": 1.799265605875153, "grad_norm": 0.6918545327681261, "learning_rate": 4.824582741372551e-06, "loss": 0.5781, "step": 1470 }, { "epoch": 1.8004895960832314, "grad_norm": 1.0430502695215231, "learning_rate": 4.824346833068405e-06, "loss": 0.6479, "step": 1471 }, { "epoch": 1.8017135862913096, "grad_norm": 0.7457556451687253, "learning_rate": 4.824110772016784e-06, "loss": 0.4822, "step": 1472 }, { "epoch": 1.802937576499388, "grad_norm": 1.052174678771876, "learning_rate": 4.823874558233198e-06, "loss": 0.6481, "step": 1473 }, { "epoch": 1.8041615667074664, "grad_norm": 0.9083250478784275, "learning_rate": 4.823638191733172e-06, "loss": 0.5417, "step": 1474 }, { "epoch": 1.8053855569155446, "grad_norm": 1.1439026159882684, "learning_rate": 4.823401672532239e-06, "loss": 0.858, "step": 1475 }, { "epoch": 1.806609547123623, "grad_norm": 1.256538574235264, "learning_rate": 4.8231650006459405e-06, "loss": 0.5352, "step": 1476 }, { "epoch": 1.8078335373317014, "grad_norm": 0.7919996035872724, "learning_rate": 4.822928176089832e-06, "loss": 0.6042, "step": 1477 }, { "epoch": 1.8090575275397796, "grad_norm": 1.109389395009973, "learning_rate": 4.822691198879474e-06, "loss": 0.4746, "step": 1478 }, { "epoch": 1.8102815177478582, "grad_norm": 1.304222745961119, "learning_rate": 4.822454069030443e-06, "loss": 0.4661, "step": 1479 }, { "epoch": 1.8115055079559363, "grad_norm": 1.5182068433934577, "learning_rate": 4.822216786558322e-06, "loss": 0.5989, "step": 1480 }, { "epoch": 1.8127294981640147, "grad_norm": 0.515145698306268, "learning_rate": 4.8219793514787006e-06, "loss": 0.4915, "step": 1481 }, { "epoch": 1.8139534883720931, "grad_norm": 1.435381972900211, "learning_rate": 4.821741763807186e-06, "loss": 0.4254, "step": 1482 }, { "epoch": 1.8151774785801713, "grad_norm": 1.109797096943933, "learning_rate": 4.82150402355939e-06, "loss": 0.7823, "step": 1483 }, { "epoch": 1.8164014687882497, "grad_norm": 1.0654999109953989, "learning_rate": 4.821266130750936e-06, "loss": 0.7398, "step": 1484 }, { "epoch": 1.817625458996328, "grad_norm": 0.7074367822925478, "learning_rate": 4.821028085397458e-06, "loss": 0.3936, "step": 1485 }, { "epoch": 1.8188494492044063, "grad_norm": 0.854122269515197, "learning_rate": 4.820789887514599e-06, "loss": 0.7649, "step": 1486 }, { "epoch": 1.8200734394124847, "grad_norm": 1.130758457983194, "learning_rate": 4.820551537118012e-06, "loss": 1.4075, "step": 1487 }, { "epoch": 1.821297429620563, "grad_norm": 1.1303211458507376, "learning_rate": 4.820313034223362e-06, "loss": 0.5834, "step": 1488 }, { "epoch": 1.8225214198286412, "grad_norm": 0.8877457788280062, "learning_rate": 4.82007437884632e-06, "loss": 1.0032, "step": 1489 }, { "epoch": 1.8237454100367199, "grad_norm": 0.9739734926993258, "learning_rate": 4.819835571002573e-06, "loss": 0.4644, "step": 1490 }, { "epoch": 1.824969400244798, "grad_norm": 1.623434946798665, "learning_rate": 4.819596610707812e-06, "loss": 1.0311, "step": 1491 }, { "epoch": 1.8261933904528764, "grad_norm": 0.8748455082565944, "learning_rate": 4.819357497977741e-06, "loss": 0.7445, "step": 1492 }, { "epoch": 1.8274173806609548, "grad_norm": 0.6040657590077547, "learning_rate": 4.819118232828075e-06, "loss": 0.5022, "step": 1493 }, { "epoch": 1.828641370869033, "grad_norm": 1.0165450930958981, "learning_rate": 4.818878815274536e-06, "loss": 1.0947, "step": 1494 }, { "epoch": 1.8298653610771114, "grad_norm": 1.0663356289941008, "learning_rate": 4.8186392453328576e-06, "loss": 1.0436, "step": 1495 }, { "epoch": 1.8310893512851898, "grad_norm": 1.107881590568128, "learning_rate": 4.818399523018785e-06, "loss": 0.6394, "step": 1496 }, { "epoch": 1.832313341493268, "grad_norm": 0.7738151914790905, "learning_rate": 4.8181596483480704e-06, "loss": 1.2337, "step": 1497 }, { "epoch": 1.8335373317013464, "grad_norm": 0.894704045032589, "learning_rate": 4.817919621336479e-06, "loss": 0.7724, "step": 1498 }, { "epoch": 1.8347613219094248, "grad_norm": 1.0613202505320993, "learning_rate": 4.817679441999782e-06, "loss": 0.5273, "step": 1499 }, { "epoch": 1.835985312117503, "grad_norm": 0.7053683671503921, "learning_rate": 4.8174391103537655e-06, "loss": 0.6449, "step": 1500 }, { "epoch": 1.8372093023255816, "grad_norm": 1.0700410780135006, "learning_rate": 4.817198626414222e-06, "loss": 0.7081, "step": 1501 }, { "epoch": 1.8384332925336597, "grad_norm": 0.8864625877201399, "learning_rate": 4.8169579901969556e-06, "loss": 1.1952, "step": 1502 }, { "epoch": 1.8396572827417381, "grad_norm": 0.9498111240616791, "learning_rate": 4.816717201717779e-06, "loss": 0.4213, "step": 1503 }, { "epoch": 1.8408812729498165, "grad_norm": 0.7801217545876808, "learning_rate": 4.816476260992518e-06, "loss": 0.3375, "step": 1504 }, { "epoch": 1.8421052631578947, "grad_norm": 0.7235709244331405, "learning_rate": 4.8162351680370046e-06, "loss": 0.6006, "step": 1505 }, { "epoch": 1.843329253365973, "grad_norm": 1.0925782755243532, "learning_rate": 4.815993922867084e-06, "loss": 0.9456, "step": 1506 }, { "epoch": 1.8445532435740515, "grad_norm": 0.8754926016871207, "learning_rate": 4.815752525498608e-06, "loss": 1.3869, "step": 1507 }, { "epoch": 1.8457772337821297, "grad_norm": 0.8328080498002697, "learning_rate": 4.8155109759474415e-06, "loss": 0.5503, "step": 1508 }, { "epoch": 1.847001223990208, "grad_norm": 1.2455584044321122, "learning_rate": 4.815269274229459e-06, "loss": 0.4463, "step": 1509 }, { "epoch": 1.8482252141982864, "grad_norm": 0.9275182298774276, "learning_rate": 4.8150274203605425e-06, "loss": 0.4408, "step": 1510 }, { "epoch": 1.8494492044063646, "grad_norm": 1.0840100737439364, "learning_rate": 4.814785414356587e-06, "loss": 0.4433, "step": 1511 }, { "epoch": 1.8506731946144432, "grad_norm": 1.0552100637568758, "learning_rate": 4.814543256233496e-06, "loss": 0.6814, "step": 1512 }, { "epoch": 1.8518971848225214, "grad_norm": 1.1488548126990605, "learning_rate": 4.8143009460071825e-06, "loss": 1.219, "step": 1513 }, { "epoch": 1.8531211750305998, "grad_norm": 0.9831063796409565, "learning_rate": 4.814058483693571e-06, "loss": 1.6901, "step": 1514 }, { "epoch": 1.8543451652386782, "grad_norm": 1.020444989883295, "learning_rate": 4.8138158693085955e-06, "loss": 0.514, "step": 1515 }, { "epoch": 1.8555691554467564, "grad_norm": 0.8803074916524232, "learning_rate": 4.8135731028681995e-06, "loss": 0.6051, "step": 1516 }, { "epoch": 1.8567931456548348, "grad_norm": 1.0944912254157153, "learning_rate": 4.813330184388336e-06, "loss": 0.9437, "step": 1517 }, { "epoch": 1.8580171358629132, "grad_norm": 0.8017896115993902, "learning_rate": 4.813087113884969e-06, "loss": 0.3534, "step": 1518 }, { "epoch": 1.8592411260709913, "grad_norm": 1.3391569596956512, "learning_rate": 4.812843891374073e-06, "loss": 0.9792, "step": 1519 }, { "epoch": 1.8604651162790697, "grad_norm": 0.9575500128466213, "learning_rate": 4.812600516871631e-06, "loss": 0.6995, "step": 1520 }, { "epoch": 1.8616891064871481, "grad_norm": 0.6607800837320198, "learning_rate": 4.8123569903936365e-06, "loss": 0.6451, "step": 1521 }, { "epoch": 1.8629130966952263, "grad_norm": 0.869912539710866, "learning_rate": 4.812113311956092e-06, "loss": 1.0551, "step": 1522 }, { "epoch": 1.864137086903305, "grad_norm": 0.5838790234659531, "learning_rate": 4.811869481575015e-06, "loss": 0.522, "step": 1523 }, { "epoch": 1.865361077111383, "grad_norm": 1.350590569104206, "learning_rate": 4.811625499266426e-06, "loss": 0.6209, "step": 1524 }, { "epoch": 1.8665850673194615, "grad_norm": 1.6371713950579037, "learning_rate": 4.811381365046358e-06, "loss": 0.5374, "step": 1525 }, { "epoch": 1.86780905752754, "grad_norm": 1.098951158727059, "learning_rate": 4.8111370789308575e-06, "loss": 0.5957, "step": 1526 }, { "epoch": 1.869033047735618, "grad_norm": 0.7839450633508284, "learning_rate": 4.810892640935976e-06, "loss": 1.0284, "step": 1527 }, { "epoch": 1.8702570379436965, "grad_norm": 1.6708137430281147, "learning_rate": 4.810648051077777e-06, "loss": 0.9328, "step": 1528 }, { "epoch": 1.8714810281517749, "grad_norm": 1.250364662763702, "learning_rate": 4.810403309372334e-06, "loss": 1.1808, "step": 1529 }, { "epoch": 1.872705018359853, "grad_norm": 1.0168547795883442, "learning_rate": 4.810158415835733e-06, "loss": 0.6585, "step": 1530 }, { "epoch": 1.8739290085679314, "grad_norm": 0.6385948502943634, "learning_rate": 4.809913370484064e-06, "loss": 0.7035, "step": 1531 }, { "epoch": 1.8751529987760098, "grad_norm": 0.7029505573606275, "learning_rate": 4.809668173333433e-06, "loss": 0.3702, "step": 1532 }, { "epoch": 1.876376988984088, "grad_norm": 0.7027859663405749, "learning_rate": 4.809422824399953e-06, "loss": 1.1989, "step": 1533 }, { "epoch": 1.8776009791921666, "grad_norm": 0.9574615135006147, "learning_rate": 4.809177323699745e-06, "loss": 1.21, "step": 1534 }, { "epoch": 1.8788249694002448, "grad_norm": 0.735155950074949, "learning_rate": 4.808931671248946e-06, "loss": 1.4036, "step": 1535 }, { "epoch": 1.880048959608323, "grad_norm": 1.0676006437755676, "learning_rate": 4.808685867063697e-06, "loss": 1.0464, "step": 1536 }, { "epoch": 1.8812729498164016, "grad_norm": 0.842146943442167, "learning_rate": 4.808439911160153e-06, "loss": 0.5602, "step": 1537 }, { "epoch": 1.8824969400244798, "grad_norm": 1.3556074807724596, "learning_rate": 4.808193803554475e-06, "loss": 0.9214, "step": 1538 }, { "epoch": 1.8837209302325582, "grad_norm": 1.5543090043681407, "learning_rate": 4.807947544262838e-06, "loss": 1.0266, "step": 1539 }, { "epoch": 1.8849449204406366, "grad_norm": 1.260015606551513, "learning_rate": 4.8077011333014255e-06, "loss": 0.6517, "step": 1540 }, { "epoch": 1.8861689106487147, "grad_norm": 1.362238409990797, "learning_rate": 4.80745457068643e-06, "loss": 1.0566, "step": 1541 }, { "epoch": 1.8873929008567931, "grad_norm": 0.9673539197161027, "learning_rate": 4.8072078564340544e-06, "loss": 0.8399, "step": 1542 }, { "epoch": 1.8886168910648715, "grad_norm": 0.6942370848274635, "learning_rate": 4.806960990560513e-06, "loss": 0.5736, "step": 1543 }, { "epoch": 1.8898408812729497, "grad_norm": 1.120104529714829, "learning_rate": 4.806713973082027e-06, "loss": 0.7519, "step": 1544 }, { "epoch": 1.8910648714810283, "grad_norm": 0.9220660548016286, "learning_rate": 4.8064668040148315e-06, "loss": 0.5115, "step": 1545 }, { "epoch": 1.8922888616891065, "grad_norm": 0.849610342061057, "learning_rate": 4.806219483375168e-06, "loss": 1.0824, "step": 1546 }, { "epoch": 1.8935128518971847, "grad_norm": 0.7855186118456127, "learning_rate": 4.805972011179291e-06, "loss": 0.8608, "step": 1547 }, { "epoch": 1.8947368421052633, "grad_norm": 0.6359533911782205, "learning_rate": 4.8057243874434625e-06, "loss": 0.6822, "step": 1548 }, { "epoch": 1.8959608323133414, "grad_norm": 1.2259476116977583, "learning_rate": 4.805476612183955e-06, "loss": 1.1406, "step": 1549 }, { "epoch": 1.8971848225214198, "grad_norm": 0.8488143973244074, "learning_rate": 4.805228685417052e-06, "loss": 0.9291, "step": 1550 }, { "epoch": 1.8984088127294982, "grad_norm": 1.0973264929642157, "learning_rate": 4.804980607159047e-06, "loss": 0.4934, "step": 1551 }, { "epoch": 1.8996328029375764, "grad_norm": 0.5853289191125773, "learning_rate": 4.804732377426241e-06, "loss": 0.5552, "step": 1552 }, { "epoch": 1.9008567931456548, "grad_norm": 1.1341593121674785, "learning_rate": 4.804483996234949e-06, "loss": 1.2437, "step": 1553 }, { "epoch": 1.9020807833537332, "grad_norm": 1.0490810724715849, "learning_rate": 4.804235463601491e-06, "loss": 0.4737, "step": 1554 }, { "epoch": 1.9033047735618114, "grad_norm": 1.3341499778955537, "learning_rate": 4.8039867795422015e-06, "loss": 0.622, "step": 1555 }, { "epoch": 1.90452876376989, "grad_norm": 0.5906945024274528, "learning_rate": 4.803737944073423e-06, "loss": 0.6012, "step": 1556 }, { "epoch": 1.9057527539779682, "grad_norm": 1.0220042730977423, "learning_rate": 4.803488957211508e-06, "loss": 0.7194, "step": 1557 }, { "epoch": 1.9069767441860463, "grad_norm": 0.8406068750683137, "learning_rate": 4.803239818972818e-06, "loss": 0.6668, "step": 1558 }, { "epoch": 1.908200734394125, "grad_norm": 0.8329151244038531, "learning_rate": 4.8029905293737265e-06, "loss": 1.2737, "step": 1559 }, { "epoch": 1.9094247246022031, "grad_norm": 0.7697497840113672, "learning_rate": 4.802741088430616e-06, "loss": 0.6155, "step": 1560 }, { "epoch": 1.9106487148102815, "grad_norm": 0.9161043715658023, "learning_rate": 4.802491496159878e-06, "loss": 0.6098, "step": 1561 }, { "epoch": 1.91187270501836, "grad_norm": 1.548788154384532, "learning_rate": 4.8022417525779155e-06, "loss": 0.9378, "step": 1562 }, { "epoch": 1.913096695226438, "grad_norm": 0.9954150013599182, "learning_rate": 4.80199185770114e-06, "loss": 0.6086, "step": 1563 }, { "epoch": 1.9143206854345165, "grad_norm": 1.237058750275263, "learning_rate": 4.801741811545975e-06, "loss": 0.6909, "step": 1564 }, { "epoch": 1.915544675642595, "grad_norm": 0.9480509960415106, "learning_rate": 4.801491614128851e-06, "loss": 0.6204, "step": 1565 }, { "epoch": 1.916768665850673, "grad_norm": 1.2017033704707512, "learning_rate": 4.801241265466211e-06, "loss": 0.5929, "step": 1566 }, { "epoch": 1.9179926560587517, "grad_norm": 1.093453887715438, "learning_rate": 4.800990765574507e-06, "loss": 0.8205, "step": 1567 }, { "epoch": 1.9192166462668299, "grad_norm": 0.5727770073428716, "learning_rate": 4.800740114470199e-06, "loss": 0.4438, "step": 1568 }, { "epoch": 1.920440636474908, "grad_norm": 1.1251755045828478, "learning_rate": 4.8004893121697626e-06, "loss": 0.4136, "step": 1569 }, { "epoch": 1.9216646266829867, "grad_norm": 0.7555333676743492, "learning_rate": 4.800238358689676e-06, "loss": 0.6418, "step": 1570 }, { "epoch": 1.9228886168910648, "grad_norm": 1.2527800001417086, "learning_rate": 4.799987254046433e-06, "loss": 0.4479, "step": 1571 }, { "epoch": 1.9241126070991432, "grad_norm": 1.0645839689689491, "learning_rate": 4.799735998256535e-06, "loss": 1.0947, "step": 1572 }, { "epoch": 1.9253365973072216, "grad_norm": 0.5916297141513027, "learning_rate": 4.799484591336494e-06, "loss": 0.6436, "step": 1573 }, { "epoch": 1.9265605875152998, "grad_norm": 0.6514642287801761, "learning_rate": 4.79923303330283e-06, "loss": 0.4481, "step": 1574 }, { "epoch": 1.9277845777233782, "grad_norm": 0.896296188970787, "learning_rate": 4.798981324172075e-06, "loss": 0.8211, "step": 1575 }, { "epoch": 1.9290085679314566, "grad_norm": 0.7983919089512396, "learning_rate": 4.798729463960771e-06, "loss": 0.5918, "step": 1576 }, { "epoch": 1.9302325581395348, "grad_norm": 1.1981620844381733, "learning_rate": 4.79847745268547e-06, "loss": 1.2933, "step": 1577 }, { "epoch": 1.9314565483476134, "grad_norm": 0.9958421783718234, "learning_rate": 4.798225290362731e-06, "loss": 0.8313, "step": 1578 }, { "epoch": 1.9326805385556916, "grad_norm": 0.795154614038978, "learning_rate": 4.797972977009127e-06, "loss": 0.723, "step": 1579 }, { "epoch": 1.9339045287637697, "grad_norm": 0.8489395754953905, "learning_rate": 4.797720512641238e-06, "loss": 0.6306, "step": 1580 }, { "epoch": 1.9351285189718483, "grad_norm": 0.8163265507577604, "learning_rate": 4.797467897275656e-06, "loss": 0.5754, "step": 1581 }, { "epoch": 1.9363525091799265, "grad_norm": 1.251260743323998, "learning_rate": 4.797215130928982e-06, "loss": 0.5843, "step": 1582 }, { "epoch": 1.937576499388005, "grad_norm": 0.7792404795394278, "learning_rate": 4.796962213617826e-06, "loss": 0.6807, "step": 1583 }, { "epoch": 1.9388004895960833, "grad_norm": 0.9382435937172323, "learning_rate": 4.796709145358809e-06, "loss": 0.5393, "step": 1584 }, { "epoch": 1.9400244798041615, "grad_norm": 0.9484565418253064, "learning_rate": 4.796455926168561e-06, "loss": 0.5472, "step": 1585 }, { "epoch": 1.9412484700122399, "grad_norm": 0.788175646007282, "learning_rate": 4.796202556063726e-06, "loss": 0.5138, "step": 1586 }, { "epoch": 1.9424724602203183, "grad_norm": 0.5167376654052457, "learning_rate": 4.79594903506095e-06, "loss": 0.4768, "step": 1587 }, { "epoch": 1.9436964504283964, "grad_norm": 1.3124482978543974, "learning_rate": 4.795695363176897e-06, "loss": 0.7975, "step": 1588 }, { "epoch": 1.944920440636475, "grad_norm": 1.499050422895394, "learning_rate": 4.795441540428235e-06, "loss": 1.3848, "step": 1589 }, { "epoch": 1.9461444308445532, "grad_norm": 1.5736573751410727, "learning_rate": 4.795187566831645e-06, "loss": 0.4929, "step": 1590 }, { "epoch": 1.9473684210526314, "grad_norm": 0.9282396318941775, "learning_rate": 4.794933442403818e-06, "loss": 0.5113, "step": 1591 }, { "epoch": 1.94859241126071, "grad_norm": 0.5668914444564315, "learning_rate": 4.794679167161453e-06, "loss": 0.6605, "step": 1592 }, { "epoch": 1.9498164014687882, "grad_norm": 0.9413994777064197, "learning_rate": 4.794424741121261e-06, "loss": 0.7124, "step": 1593 }, { "epoch": 1.9510403916768666, "grad_norm": 1.4751485691641153, "learning_rate": 4.794170164299962e-06, "loss": 0.568, "step": 1594 }, { "epoch": 1.952264381884945, "grad_norm": 0.6632059960631201, "learning_rate": 4.793915436714285e-06, "loss": 0.968, "step": 1595 }, { "epoch": 1.9534883720930232, "grad_norm": 0.9436766785972045, "learning_rate": 4.793660558380969e-06, "loss": 0.6869, "step": 1596 }, { "epoch": 1.9547123623011016, "grad_norm": 0.7545899323255287, "learning_rate": 4.7934055293167655e-06, "loss": 0.6668, "step": 1597 }, { "epoch": 1.95593635250918, "grad_norm": 0.796718361455369, "learning_rate": 4.793150349538434e-06, "loss": 0.922, "step": 1598 }, { "epoch": 1.9571603427172581, "grad_norm": 0.8838904072449117, "learning_rate": 4.792895019062742e-06, "loss": 0.9931, "step": 1599 }, { "epoch": 1.9583843329253368, "grad_norm": 1.3610229943698242, "learning_rate": 4.792639537906472e-06, "loss": 0.9976, "step": 1600 }, { "epoch": 1.959608323133415, "grad_norm": 0.873640372990564, "learning_rate": 4.79238390608641e-06, "loss": 1.0471, "step": 1601 }, { "epoch": 1.960832313341493, "grad_norm": 1.106150886064866, "learning_rate": 4.792128123619357e-06, "loss": 0.8431, "step": 1602 }, { "epoch": 1.9620563035495717, "grad_norm": 1.1566886780408674, "learning_rate": 4.791872190522122e-06, "loss": 0.6454, "step": 1603 }, { "epoch": 1.96328029375765, "grad_norm": 0.952404860746821, "learning_rate": 4.791616106811523e-06, "loss": 0.6135, "step": 1604 }, { "epoch": 1.9645042839657283, "grad_norm": 0.6047281057345922, "learning_rate": 4.79135987250439e-06, "loss": 0.6614, "step": 1605 }, { "epoch": 1.9657282741738067, "grad_norm": 1.072351043382434, "learning_rate": 4.791103487617562e-06, "loss": 0.6474, "step": 1606 }, { "epoch": 1.9669522643818849, "grad_norm": 1.4877300163071183, "learning_rate": 4.790846952167886e-06, "loss": 0.5281, "step": 1607 }, { "epoch": 1.9681762545899633, "grad_norm": 0.8475132727790834, "learning_rate": 4.790590266172222e-06, "loss": 0.6702, "step": 1608 }, { "epoch": 1.9694002447980417, "grad_norm": 0.6904125037000479, "learning_rate": 4.790333429647438e-06, "loss": 0.5328, "step": 1609 }, { "epoch": 1.9706242350061198, "grad_norm": 0.5738387721390603, "learning_rate": 4.790076442610412e-06, "loss": 0.7358, "step": 1610 }, { "epoch": 1.9718482252141984, "grad_norm": 0.9940065467747108, "learning_rate": 4.789819305078033e-06, "loss": 1.1867, "step": 1611 }, { "epoch": 1.9730722154222766, "grad_norm": 0.7741968807200302, "learning_rate": 4.789562017067199e-06, "loss": 0.6888, "step": 1612 }, { "epoch": 1.9742962056303548, "grad_norm": 1.0157883415025875, "learning_rate": 4.7893045785948165e-06, "loss": 0.6599, "step": 1613 }, { "epoch": 1.9755201958384334, "grad_norm": 1.365293167549576, "learning_rate": 4.789046989677805e-06, "loss": 0.6171, "step": 1614 }, { "epoch": 1.9767441860465116, "grad_norm": 1.0348273602315656, "learning_rate": 4.7887892503330936e-06, "loss": 0.9952, "step": 1615 }, { "epoch": 1.97796817625459, "grad_norm": 0.5772740264910727, "learning_rate": 4.7885313605776166e-06, "loss": 0.4676, "step": 1616 }, { "epoch": 1.9791921664626684, "grad_norm": 1.1462268746002024, "learning_rate": 4.7882733204283236e-06, "loss": 1.0766, "step": 1617 }, { "epoch": 1.9804161566707466, "grad_norm": 1.3045196276006517, "learning_rate": 4.788015129902173e-06, "loss": 0.8134, "step": 1618 }, { "epoch": 1.981640146878825, "grad_norm": 0.5746352940970983, "learning_rate": 4.787756789016129e-06, "loss": 0.5776, "step": 1619 }, { "epoch": 1.9828641370869033, "grad_norm": 1.2871927748316516, "learning_rate": 4.787498297787172e-06, "loss": 1.3033, "step": 1620 }, { "epoch": 1.9840881272949815, "grad_norm": 0.5250215967251644, "learning_rate": 4.7872396562322865e-06, "loss": 0.6282, "step": 1621 }, { "epoch": 1.9853121175030601, "grad_norm": 0.8117094005468507, "learning_rate": 4.786980864368471e-06, "loss": 0.4296, "step": 1622 }, { "epoch": 1.9865361077111383, "grad_norm": 0.7246604017053855, "learning_rate": 4.786721922212733e-06, "loss": 0.5986, "step": 1623 }, { "epoch": 1.9877600979192165, "grad_norm": 0.8103264306157301, "learning_rate": 4.786462829782088e-06, "loss": 0.6901, "step": 1624 }, { "epoch": 1.988984088127295, "grad_norm": 1.0144325822234537, "learning_rate": 4.786203587093562e-06, "loss": 1.4275, "step": 1625 }, { "epoch": 1.9902080783353733, "grad_norm": 1.1449075940255176, "learning_rate": 4.785944194164193e-06, "loss": 1.1992, "step": 1626 }, { "epoch": 1.9914320685434517, "grad_norm": 0.9127127368532034, "learning_rate": 4.785684651011027e-06, "loss": 1.5022, "step": 1627 }, { "epoch": 1.99265605875153, "grad_norm": 0.6696578934519518, "learning_rate": 4.785424957651119e-06, "loss": 0.7072, "step": 1628 }, { "epoch": 1.9938800489596082, "grad_norm": 1.3549650773850697, "learning_rate": 4.785165114101536e-06, "loss": 0.8635, "step": 1629 }, { "epoch": 1.9951040391676866, "grad_norm": 1.1412482767167145, "learning_rate": 4.784905120379355e-06, "loss": 0.8452, "step": 1630 }, { "epoch": 1.996328029375765, "grad_norm": 1.1973599530072898, "learning_rate": 4.78464497650166e-06, "loss": 0.5399, "step": 1631 }, { "epoch": 1.9975520195838432, "grad_norm": 0.7731379418226847, "learning_rate": 4.784384682485548e-06, "loss": 0.6433, "step": 1632 }, { "epoch": 1.9987760097919218, "grad_norm": 0.7525596065093401, "learning_rate": 4.7841242383481235e-06, "loss": 0.632, "step": 1633 }, { "epoch": 2.0, "grad_norm": 1.030655347898543, "learning_rate": 4.783863644106502e-06, "loss": 1.2715, "step": 1634 }, { "epoch": 2.001223990208078, "grad_norm": 1.3490444287734904, "learning_rate": 4.783602899777811e-06, "loss": 0.595, "step": 1635 }, { "epoch": 2.002447980416157, "grad_norm": 0.7923855008059066, "learning_rate": 4.783342005379182e-06, "loss": 0.6415, "step": 1636 }, { "epoch": 2.003671970624235, "grad_norm": 0.840601862691673, "learning_rate": 4.783080960927763e-06, "loss": 0.5996, "step": 1637 }, { "epoch": 2.004895960832313, "grad_norm": 1.391136439821326, "learning_rate": 4.782819766440707e-06, "loss": 0.5675, "step": 1638 }, { "epoch": 2.0061199510403918, "grad_norm": 0.7652098606946581, "learning_rate": 4.78255842193518e-06, "loss": 0.9135, "step": 1639 }, { "epoch": 2.00734394124847, "grad_norm": 1.013382164786699, "learning_rate": 4.782296927428356e-06, "loss": 0.9813, "step": 1640 }, { "epoch": 2.0085679314565485, "grad_norm": 0.9079413204020284, "learning_rate": 4.78203528293742e-06, "loss": 0.853, "step": 1641 }, { "epoch": 2.0097919216646267, "grad_norm": 0.9123261391193673, "learning_rate": 4.7817734884795656e-06, "loss": 0.555, "step": 1642 }, { "epoch": 2.011015911872705, "grad_norm": 1.3492668717845504, "learning_rate": 4.781511544071996e-06, "loss": 0.6342, "step": 1643 }, { "epoch": 2.0122399020807835, "grad_norm": 1.4964263381362626, "learning_rate": 4.781249449731928e-06, "loss": 0.508, "step": 1644 }, { "epoch": 2.0134638922888617, "grad_norm": 0.5764661455109725, "learning_rate": 4.780987205476584e-06, "loss": 0.438, "step": 1645 }, { "epoch": 2.01468788249694, "grad_norm": 0.5169239546489464, "learning_rate": 4.7807248113231966e-06, "loss": 0.4727, "step": 1646 }, { "epoch": 2.0159118727050185, "grad_norm": 0.7646726079291667, "learning_rate": 4.780462267289011e-06, "loss": 0.8894, "step": 1647 }, { "epoch": 2.0171358629130967, "grad_norm": 1.2525118759256464, "learning_rate": 4.78019957339128e-06, "loss": 0.5727, "step": 1648 }, { "epoch": 2.018359853121175, "grad_norm": 1.689618777322081, "learning_rate": 4.779936729647267e-06, "loss": 1.0123, "step": 1649 }, { "epoch": 2.0195838433292534, "grad_norm": 1.0700814104922753, "learning_rate": 4.779673736074245e-06, "loss": 0.7087, "step": 1650 }, { "epoch": 2.0208078335373316, "grad_norm": 1.1335791592403512, "learning_rate": 4.7794105926894965e-06, "loss": 0.7027, "step": 1651 }, { "epoch": 2.0220318237454102, "grad_norm": 0.9448495370841044, "learning_rate": 4.779147299510316e-06, "loss": 0.4908, "step": 1652 }, { "epoch": 2.0232558139534884, "grad_norm": 0.8304644268133966, "learning_rate": 4.7788838565540044e-06, "loss": 1.6364, "step": 1653 }, { "epoch": 2.0244798041615666, "grad_norm": 1.2690986488399714, "learning_rate": 4.778620263837875e-06, "loss": 0.3258, "step": 1654 }, { "epoch": 2.025703794369645, "grad_norm": 0.7254185937017236, "learning_rate": 4.778356521379248e-06, "loss": 0.6692, "step": 1655 }, { "epoch": 2.0269277845777234, "grad_norm": 0.8991220592918276, "learning_rate": 4.778092629195459e-06, "loss": 0.6414, "step": 1656 }, { "epoch": 2.0281517747858016, "grad_norm": 1.0098488404769435, "learning_rate": 4.777828587303849e-06, "loss": 0.6886, "step": 1657 }, { "epoch": 2.02937576499388, "grad_norm": 0.8998552000188978, "learning_rate": 4.777564395721768e-06, "loss": 1.29, "step": 1658 }, { "epoch": 2.0305997552019583, "grad_norm": 0.7900915476144522, "learning_rate": 4.77730005446658e-06, "loss": 1.0966, "step": 1659 }, { "epoch": 2.0318237454100365, "grad_norm": 1.274376228013902, "learning_rate": 4.777035563555655e-06, "loss": 0.5093, "step": 1660 }, { "epoch": 2.033047735618115, "grad_norm": 0.8947602390354029, "learning_rate": 4.776770923006376e-06, "loss": 1.2073, "step": 1661 }, { "epoch": 2.0342717258261933, "grad_norm": 0.6396272574357137, "learning_rate": 4.776506132836131e-06, "loss": 0.6155, "step": 1662 }, { "epoch": 2.035495716034272, "grad_norm": 0.5786500263985648, "learning_rate": 4.7762411930623255e-06, "loss": 0.463, "step": 1663 }, { "epoch": 2.03671970624235, "grad_norm": 1.1313863117217278, "learning_rate": 4.775976103702367e-06, "loss": 1.213, "step": 1664 }, { "epoch": 2.0379436964504283, "grad_norm": 0.5733360418805645, "learning_rate": 4.775710864773677e-06, "loss": 0.6554, "step": 1665 }, { "epoch": 2.039167686658507, "grad_norm": 0.7735539350818456, "learning_rate": 4.775445476293688e-06, "loss": 0.6618, "step": 1666 }, { "epoch": 2.040391676866585, "grad_norm": 0.9418886170050217, "learning_rate": 4.775179938279836e-06, "loss": 0.6685, "step": 1667 }, { "epoch": 2.0416156670746632, "grad_norm": 0.6861068909865007, "learning_rate": 4.774914250749575e-06, "loss": 0.4329, "step": 1668 }, { "epoch": 2.042839657282742, "grad_norm": 1.5389060247313142, "learning_rate": 4.774648413720364e-06, "loss": 1.013, "step": 1669 }, { "epoch": 2.04406364749082, "grad_norm": 1.5102522542072925, "learning_rate": 4.774382427209672e-06, "loss": 0.429, "step": 1670 }, { "epoch": 2.045287637698898, "grad_norm": 1.025940607447305, "learning_rate": 4.77411629123498e-06, "loss": 1.2591, "step": 1671 }, { "epoch": 2.046511627906977, "grad_norm": 0.711185166481529, "learning_rate": 4.773850005813776e-06, "loss": 0.6222, "step": 1672 }, { "epoch": 2.047735618115055, "grad_norm": 0.8524740497303297, "learning_rate": 4.77358357096356e-06, "loss": 0.7012, "step": 1673 }, { "epoch": 2.0489596083231336, "grad_norm": 0.9624710180554567, "learning_rate": 4.7733169867018414e-06, "loss": 1.6047, "step": 1674 }, { "epoch": 2.050183598531212, "grad_norm": 1.2788530532638447, "learning_rate": 4.773050253046139e-06, "loss": 0.6269, "step": 1675 }, { "epoch": 2.05140758873929, "grad_norm": 1.0217457862202248, "learning_rate": 4.772783370013981e-06, "loss": 1.8886, "step": 1676 }, { "epoch": 2.0526315789473686, "grad_norm": 1.4765882368011554, "learning_rate": 4.772516337622907e-06, "loss": 0.6272, "step": 1677 }, { "epoch": 2.0538555691554468, "grad_norm": 0.5717420280638605, "learning_rate": 4.772249155890464e-06, "loss": 0.5839, "step": 1678 }, { "epoch": 2.055079559363525, "grad_norm": 0.8855425505193519, "learning_rate": 4.771981824834211e-06, "loss": 0.6591, "step": 1679 }, { "epoch": 2.0563035495716036, "grad_norm": 1.3007485590081038, "learning_rate": 4.771714344471716e-06, "loss": 1.2758, "step": 1680 }, { "epoch": 2.0575275397796817, "grad_norm": 0.8867001622432548, "learning_rate": 4.771446714820558e-06, "loss": 0.4784, "step": 1681 }, { "epoch": 2.05875152998776, "grad_norm": 0.8220415597183386, "learning_rate": 4.771178935898322e-06, "loss": 0.5838, "step": 1682 }, { "epoch": 2.0599755201958385, "grad_norm": 1.1879024691303384, "learning_rate": 4.770911007722607e-06, "loss": 0.6057, "step": 1683 }, { "epoch": 2.0611995104039167, "grad_norm": 0.8032418789970761, "learning_rate": 4.7706429303110205e-06, "loss": 0.6386, "step": 1684 }, { "epoch": 2.0624235006119953, "grad_norm": 1.3515686114454204, "learning_rate": 4.770374703681178e-06, "loss": 0.9821, "step": 1685 }, { "epoch": 2.0636474908200735, "grad_norm": 1.643732420666673, "learning_rate": 4.770106327850709e-06, "loss": 0.4771, "step": 1686 }, { "epoch": 2.0648714810281517, "grad_norm": 0.8870433627561697, "learning_rate": 4.769837802837248e-06, "loss": 0.5062, "step": 1687 }, { "epoch": 2.0660954712362303, "grad_norm": 0.8277681308651833, "learning_rate": 4.769569128658443e-06, "loss": 0.875, "step": 1688 }, { "epoch": 2.0673194614443084, "grad_norm": 0.9305066892829389, "learning_rate": 4.769300305331949e-06, "loss": 0.6364, "step": 1689 }, { "epoch": 2.0685434516523866, "grad_norm": 1.3562157517338775, "learning_rate": 4.7690313328754316e-06, "loss": 0.5846, "step": 1690 }, { "epoch": 2.0697674418604652, "grad_norm": 1.1746921621008894, "learning_rate": 4.768762211306568e-06, "loss": 1.1463, "step": 1691 }, { "epoch": 2.0709914320685434, "grad_norm": 0.6298041034646288, "learning_rate": 4.768492940643043e-06, "loss": 0.6953, "step": 1692 }, { "epoch": 2.0722154222766216, "grad_norm": 0.681923862205774, "learning_rate": 4.768223520902554e-06, "loss": 0.7025, "step": 1693 }, { "epoch": 2.0734394124847, "grad_norm": 0.5175011734226519, "learning_rate": 4.767953952102804e-06, "loss": 0.4999, "step": 1694 }, { "epoch": 2.0746634026927784, "grad_norm": 0.9013754959204043, "learning_rate": 4.767684234261509e-06, "loss": 0.3814, "step": 1695 }, { "epoch": 2.075887392900857, "grad_norm": 0.8899917581311273, "learning_rate": 4.767414367396393e-06, "loss": 1.1774, "step": 1696 }, { "epoch": 2.077111383108935, "grad_norm": 0.776151371660974, "learning_rate": 4.767144351525192e-06, "loss": 0.6757, "step": 1697 }, { "epoch": 2.0783353733170133, "grad_norm": 0.7296713265867719, "learning_rate": 4.7668741866656495e-06, "loss": 0.6192, "step": 1698 }, { "epoch": 2.079559363525092, "grad_norm": 0.6413799606073156, "learning_rate": 4.76660387283552e-06, "loss": 0.5596, "step": 1699 }, { "epoch": 2.08078335373317, "grad_norm": 1.2070980034873404, "learning_rate": 4.766333410052568e-06, "loss": 0.6761, "step": 1700 }, { "epoch": 2.0820073439412483, "grad_norm": 0.8205271201463079, "learning_rate": 4.766062798334566e-06, "loss": 0.7142, "step": 1701 }, { "epoch": 2.083231334149327, "grad_norm": 0.849099388361979, "learning_rate": 4.765792037699299e-06, "loss": 0.5057, "step": 1702 }, { "epoch": 2.084455324357405, "grad_norm": 1.4807478302959003, "learning_rate": 4.765521128164561e-06, "loss": 1.26, "step": 1703 }, { "epoch": 2.0856793145654833, "grad_norm": 0.7557006985387202, "learning_rate": 4.765250069748153e-06, "loss": 0.6342, "step": 1704 }, { "epoch": 2.086903304773562, "grad_norm": 0.9478472773592295, "learning_rate": 4.764978862467889e-06, "loss": 1.5716, "step": 1705 }, { "epoch": 2.08812729498164, "grad_norm": 0.8707528682370116, "learning_rate": 4.764707506341592e-06, "loss": 1.052, "step": 1706 }, { "epoch": 2.0893512851897187, "grad_norm": 0.8855340741221631, "learning_rate": 4.764436001387094e-06, "loss": 0.7315, "step": 1707 }, { "epoch": 2.090575275397797, "grad_norm": 0.9717173172079296, "learning_rate": 4.764164347622237e-06, "loss": 0.7863, "step": 1708 }, { "epoch": 2.091799265605875, "grad_norm": 0.9952359706463395, "learning_rate": 4.763892545064874e-06, "loss": 0.8258, "step": 1709 }, { "epoch": 2.0930232558139537, "grad_norm": 0.6909781689289004, "learning_rate": 4.763620593732867e-06, "loss": 0.8363, "step": 1710 }, { "epoch": 2.094247246022032, "grad_norm": 1.226308147176626, "learning_rate": 4.763348493644087e-06, "loss": 0.5291, "step": 1711 }, { "epoch": 2.09547123623011, "grad_norm": 0.8356326441068579, "learning_rate": 4.763076244816414e-06, "loss": 0.2731, "step": 1712 }, { "epoch": 2.0966952264381886, "grad_norm": 0.8618034249405533, "learning_rate": 4.762803847267743e-06, "loss": 1.2402, "step": 1713 }, { "epoch": 2.097919216646267, "grad_norm": 1.6486283167088436, "learning_rate": 4.762531301015972e-06, "loss": 0.5007, "step": 1714 }, { "epoch": 2.099143206854345, "grad_norm": 0.9386617708445711, "learning_rate": 4.762258606079012e-06, "loss": 1.1188, "step": 1715 }, { "epoch": 2.1003671970624236, "grad_norm": 0.978306173900155, "learning_rate": 4.761985762474784e-06, "loss": 0.6813, "step": 1716 }, { "epoch": 2.1015911872705018, "grad_norm": 0.7897540880502508, "learning_rate": 4.761712770221219e-06, "loss": 0.4696, "step": 1717 }, { "epoch": 2.1028151774785804, "grad_norm": 1.2469524536855359, "learning_rate": 4.7614396293362555e-06, "loss": 0.6841, "step": 1718 }, { "epoch": 2.1040391676866586, "grad_norm": 0.7982074074636537, "learning_rate": 4.761166339837844e-06, "loss": 1.2923, "step": 1719 }, { "epoch": 2.1052631578947367, "grad_norm": 1.16803009625026, "learning_rate": 4.760892901743944e-06, "loss": 0.6524, "step": 1720 }, { "epoch": 2.1064871481028153, "grad_norm": 1.5263695940141289, "learning_rate": 4.760619315072525e-06, "loss": 0.4485, "step": 1721 }, { "epoch": 2.1077111383108935, "grad_norm": 0.6229222575548571, "learning_rate": 4.7603455798415664e-06, "loss": 0.6148, "step": 1722 }, { "epoch": 2.1089351285189717, "grad_norm": 1.1816401904189082, "learning_rate": 4.760071696069057e-06, "loss": 0.5042, "step": 1723 }, { "epoch": 2.1101591187270503, "grad_norm": 1.3656047303957681, "learning_rate": 4.759797663772995e-06, "loss": 0.9532, "step": 1724 }, { "epoch": 2.1113831089351285, "grad_norm": 0.9026007881199491, "learning_rate": 4.759523482971389e-06, "loss": 0.6274, "step": 1725 }, { "epoch": 2.1126070991432067, "grad_norm": 0.6925339490668826, "learning_rate": 4.759249153682256e-06, "loss": 0.9589, "step": 1726 }, { "epoch": 2.1138310893512853, "grad_norm": 1.092159800471345, "learning_rate": 4.758974675923625e-06, "loss": 1.0809, "step": 1727 }, { "epoch": 2.1150550795593634, "grad_norm": 0.9609230031064179, "learning_rate": 4.758700049713535e-06, "loss": 1.6841, "step": 1728 }, { "epoch": 2.116279069767442, "grad_norm": 1.3613335228678929, "learning_rate": 4.758425275070032e-06, "loss": 0.5831, "step": 1729 }, { "epoch": 2.1175030599755202, "grad_norm": 1.4067945891132145, "learning_rate": 4.758150352011172e-06, "loss": 0.4991, "step": 1730 }, { "epoch": 2.1187270501835984, "grad_norm": 0.5826754910591225, "learning_rate": 4.757875280555024e-06, "loss": 0.5654, "step": 1731 }, { "epoch": 2.119951040391677, "grad_norm": 0.6713054679309671, "learning_rate": 4.757600060719665e-06, "loss": 0.5381, "step": 1732 }, { "epoch": 2.121175030599755, "grad_norm": 0.6603022635318611, "learning_rate": 4.757324692523179e-06, "loss": 0.5967, "step": 1733 }, { "epoch": 2.1223990208078334, "grad_norm": 1.739955682879349, "learning_rate": 4.757049175983663e-06, "loss": 0.6657, "step": 1734 }, { "epoch": 2.123623011015912, "grad_norm": 1.4928719853892538, "learning_rate": 4.756773511119225e-06, "loss": 0.5505, "step": 1735 }, { "epoch": 2.12484700122399, "grad_norm": 1.0014239746136204, "learning_rate": 4.756497697947978e-06, "loss": 0.5667, "step": 1736 }, { "epoch": 2.1260709914320683, "grad_norm": 1.4587697882417965, "learning_rate": 4.756221736488048e-06, "loss": 0.3107, "step": 1737 }, { "epoch": 2.127294981640147, "grad_norm": 0.9744809671776056, "learning_rate": 4.755945626757572e-06, "loss": 0.4052, "step": 1738 }, { "epoch": 2.128518971848225, "grad_norm": 1.0826318594087034, "learning_rate": 4.755669368774693e-06, "loss": 0.3694, "step": 1739 }, { "epoch": 2.1297429620563038, "grad_norm": 1.0485519413956328, "learning_rate": 4.755392962557565e-06, "loss": 1.2753, "step": 1740 }, { "epoch": 2.130966952264382, "grad_norm": 0.5297184378343576, "learning_rate": 4.755116408124355e-06, "loss": 0.4809, "step": 1741 }, { "epoch": 2.13219094247246, "grad_norm": 1.5161854409880913, "learning_rate": 4.754839705493236e-06, "loss": 0.4472, "step": 1742 }, { "epoch": 2.1334149326805387, "grad_norm": 0.9053565036262434, "learning_rate": 4.75456285468239e-06, "loss": 0.3276, "step": 1743 }, { "epoch": 2.134638922888617, "grad_norm": 1.0484293334816646, "learning_rate": 4.754285855710014e-06, "loss": 1.4149, "step": 1744 }, { "epoch": 2.135862913096695, "grad_norm": 0.7804640642118915, "learning_rate": 4.754008708594308e-06, "loss": 0.6731, "step": 1745 }, { "epoch": 2.1370869033047737, "grad_norm": 1.2699250970577678, "learning_rate": 4.753731413353488e-06, "loss": 0.6442, "step": 1746 }, { "epoch": 2.138310893512852, "grad_norm": 0.7752691507762891, "learning_rate": 4.753453970005776e-06, "loss": 0.9262, "step": 1747 }, { "epoch": 2.13953488372093, "grad_norm": 0.9047863492717602, "learning_rate": 4.753176378569403e-06, "loss": 0.724, "step": 1748 }, { "epoch": 2.1407588739290087, "grad_norm": 0.815756485854861, "learning_rate": 4.752898639062612e-06, "loss": 0.5425, "step": 1749 }, { "epoch": 2.141982864137087, "grad_norm": 0.7466341475371909, "learning_rate": 4.752620751503656e-06, "loss": 0.46, "step": 1750 }, { "epoch": 2.1432068543451654, "grad_norm": 0.8330271685404932, "learning_rate": 4.752342715910796e-06, "loss": 0.344, "step": 1751 }, { "epoch": 2.1444308445532436, "grad_norm": 1.295484219311035, "learning_rate": 4.752064532302303e-06, "loss": 0.5263, "step": 1752 }, { "epoch": 2.145654834761322, "grad_norm": 0.7708465461630388, "learning_rate": 4.751786200696459e-06, "loss": 0.6004, "step": 1753 }, { "epoch": 2.1468788249694004, "grad_norm": 0.5625380049418784, "learning_rate": 4.751507721111555e-06, "loss": 0.5671, "step": 1754 }, { "epoch": 2.1481028151774786, "grad_norm": 0.7821827935292218, "learning_rate": 4.751229093565891e-06, "loss": 0.887, "step": 1755 }, { "epoch": 2.1493268053855568, "grad_norm": 0.6416714853485103, "learning_rate": 4.750950318077778e-06, "loss": 0.5931, "step": 1756 }, { "epoch": 2.1505507955936354, "grad_norm": 0.8894509783296423, "learning_rate": 4.750671394665536e-06, "loss": 0.6835, "step": 1757 }, { "epoch": 2.1517747858017136, "grad_norm": 0.9435729025265643, "learning_rate": 4.750392323347494e-06, "loss": 1.4662, "step": 1758 }, { "epoch": 2.1529987760097917, "grad_norm": 1.174664139125529, "learning_rate": 4.750113104141992e-06, "loss": 0.4315, "step": 1759 }, { "epoch": 2.1542227662178703, "grad_norm": 1.199311624186637, "learning_rate": 4.74983373706738e-06, "loss": 0.473, "step": 1760 }, { "epoch": 2.1554467564259485, "grad_norm": 1.5508949010600261, "learning_rate": 4.7495542221420165e-06, "loss": 1.3696, "step": 1761 }, { "epoch": 2.1566707466340267, "grad_norm": 1.4530554672289604, "learning_rate": 4.749274559384269e-06, "loss": 0.9923, "step": 1762 }, { "epoch": 2.1578947368421053, "grad_norm": 1.2894182484962242, "learning_rate": 4.748994748812518e-06, "loss": 1.2662, "step": 1763 }, { "epoch": 2.1591187270501835, "grad_norm": 1.7444268844262065, "learning_rate": 4.74871479044515e-06, "loss": 0.453, "step": 1764 }, { "epoch": 2.160342717258262, "grad_norm": 1.22194839718637, "learning_rate": 4.748434684300564e-06, "loss": 0.5124, "step": 1765 }, { "epoch": 2.1615667074663403, "grad_norm": 0.8780388823093234, "learning_rate": 4.748154430397166e-06, "loss": 0.5523, "step": 1766 }, { "epoch": 2.1627906976744184, "grad_norm": 1.2301432478643375, "learning_rate": 4.747874028753375e-06, "loss": 0.6919, "step": 1767 }, { "epoch": 2.164014687882497, "grad_norm": 1.3219349160942884, "learning_rate": 4.7475934793876165e-06, "loss": 0.9689, "step": 1768 }, { "epoch": 2.1652386780905752, "grad_norm": 1.2618463601765646, "learning_rate": 4.747312782318329e-06, "loss": 0.6402, "step": 1769 }, { "epoch": 2.1664626682986534, "grad_norm": 1.0022874580219454, "learning_rate": 4.747031937563956e-06, "loss": 0.4487, "step": 1770 }, { "epoch": 2.167686658506732, "grad_norm": 0.9257859139413773, "learning_rate": 4.746750945142956e-06, "loss": 0.366, "step": 1771 }, { "epoch": 2.16891064871481, "grad_norm": 0.6704583104659286, "learning_rate": 4.746469805073794e-06, "loss": 0.6701, "step": 1772 }, { "epoch": 2.170134638922889, "grad_norm": 1.1285864852383078, "learning_rate": 4.746188517374946e-06, "loss": 0.6736, "step": 1773 }, { "epoch": 2.171358629130967, "grad_norm": 0.8850309489675169, "learning_rate": 4.745907082064897e-06, "loss": 1.2971, "step": 1774 }, { "epoch": 2.172582619339045, "grad_norm": 1.1156511712503216, "learning_rate": 4.74562549916214e-06, "loss": 0.9061, "step": 1775 }, { "epoch": 2.173806609547124, "grad_norm": 0.8329642642581615, "learning_rate": 4.745343768685182e-06, "loss": 0.547, "step": 1776 }, { "epoch": 2.175030599755202, "grad_norm": 1.5172666336688354, "learning_rate": 4.745061890652537e-06, "loss": 0.62, "step": 1777 }, { "epoch": 2.17625458996328, "grad_norm": 0.9027662793665578, "learning_rate": 4.744779865082728e-06, "loss": 0.7572, "step": 1778 }, { "epoch": 2.1774785801713588, "grad_norm": 0.9873706669697835, "learning_rate": 4.744497691994289e-06, "loss": 0.7273, "step": 1779 }, { "epoch": 2.178702570379437, "grad_norm": 1.303181055798484, "learning_rate": 4.744215371405764e-06, "loss": 0.5969, "step": 1780 }, { "epoch": 2.179926560587515, "grad_norm": 1.3134831564590304, "learning_rate": 4.7439329033357055e-06, "loss": 1.0466, "step": 1781 }, { "epoch": 2.1811505507955937, "grad_norm": 0.8101837763511736, "learning_rate": 4.743650287802676e-06, "loss": 0.8579, "step": 1782 }, { "epoch": 2.182374541003672, "grad_norm": 1.0879644823716874, "learning_rate": 4.743367524825249e-06, "loss": 1.0159, "step": 1783 }, { "epoch": 2.18359853121175, "grad_norm": 1.1428176183079704, "learning_rate": 4.743084614422004e-06, "loss": 0.4788, "step": 1784 }, { "epoch": 2.1848225214198287, "grad_norm": 1.577010398002274, "learning_rate": 4.7428015566115365e-06, "loss": 0.722, "step": 1785 }, { "epoch": 2.186046511627907, "grad_norm": 0.5479610683692664, "learning_rate": 4.742518351412446e-06, "loss": 0.4881, "step": 1786 }, { "epoch": 2.1872705018359855, "grad_norm": 1.0831106431688409, "learning_rate": 4.7422349988433435e-06, "loss": 1.0277, "step": 1787 }, { "epoch": 2.1884944920440637, "grad_norm": 0.8137059457381444, "learning_rate": 4.7419514989228506e-06, "loss": 0.6348, "step": 1788 }, { "epoch": 2.189718482252142, "grad_norm": 1.109160614594669, "learning_rate": 4.7416678516695976e-06, "loss": 1.0812, "step": 1789 }, { "epoch": 2.1909424724602204, "grad_norm": 1.5617374900405647, "learning_rate": 4.741384057102224e-06, "loss": 0.6711, "step": 1790 }, { "epoch": 2.1921664626682986, "grad_norm": 0.9958576810548486, "learning_rate": 4.741100115239382e-06, "loss": 1.1966, "step": 1791 }, { "epoch": 2.193390452876377, "grad_norm": 1.3368785616852292, "learning_rate": 4.740816026099728e-06, "loss": 0.5177, "step": 1792 }, { "epoch": 2.1946144430844554, "grad_norm": 1.2459621065853588, "learning_rate": 4.740531789701934e-06, "loss": 0.5726, "step": 1793 }, { "epoch": 2.1958384332925336, "grad_norm": 1.1450589630973276, "learning_rate": 4.740247406064678e-06, "loss": 0.651, "step": 1794 }, { "epoch": 2.197062423500612, "grad_norm": 1.0751465594882477, "learning_rate": 4.7399628752066486e-06, "loss": 0.4384, "step": 1795 }, { "epoch": 2.1982864137086904, "grad_norm": 0.8376073995816036, "learning_rate": 4.739678197146544e-06, "loss": 0.5452, "step": 1796 }, { "epoch": 2.1995104039167686, "grad_norm": 1.0701691025973727, "learning_rate": 4.7393933719030715e-06, "loss": 0.3894, "step": 1797 }, { "epoch": 2.200734394124847, "grad_norm": 0.9215621066118259, "learning_rate": 4.739108399494951e-06, "loss": 0.8002, "step": 1798 }, { "epoch": 2.2019583843329253, "grad_norm": 0.6412729534694632, "learning_rate": 4.738823279940907e-06, "loss": 0.6726, "step": 1799 }, { "epoch": 2.2031823745410035, "grad_norm": 0.7004628880447266, "learning_rate": 4.7385380132596785e-06, "loss": 0.551, "step": 1800 }, { "epoch": 2.204406364749082, "grad_norm": 0.9262003534126843, "learning_rate": 4.7382525994700115e-06, "loss": 0.8, "step": 1801 }, { "epoch": 2.2056303549571603, "grad_norm": 1.0710788151352677, "learning_rate": 4.737967038590662e-06, "loss": 0.4904, "step": 1802 }, { "epoch": 2.2068543451652385, "grad_norm": 0.7062019626708327, "learning_rate": 4.737681330640397e-06, "loss": 0.6984, "step": 1803 }, { "epoch": 2.208078335373317, "grad_norm": 1.0565224955452448, "learning_rate": 4.737395475637991e-06, "loss": 0.592, "step": 1804 }, { "epoch": 2.2093023255813953, "grad_norm": 0.8346772761359592, "learning_rate": 4.73710947360223e-06, "loss": 0.8892, "step": 1805 }, { "epoch": 2.2105263157894735, "grad_norm": 0.7791482841512478, "learning_rate": 4.736823324551909e-06, "loss": 0.9354, "step": 1806 }, { "epoch": 2.211750305997552, "grad_norm": 0.728324620056421, "learning_rate": 4.736537028505832e-06, "loss": 0.4012, "step": 1807 }, { "epoch": 2.2129742962056302, "grad_norm": 0.9839303655234137, "learning_rate": 4.736250585482815e-06, "loss": 0.555, "step": 1808 }, { "epoch": 2.214198286413709, "grad_norm": 0.9180249157922745, "learning_rate": 4.735963995501681e-06, "loss": 0.4999, "step": 1809 }, { "epoch": 2.215422276621787, "grad_norm": 1.6989421209791185, "learning_rate": 4.7356772585812625e-06, "loss": 0.6278, "step": 1810 }, { "epoch": 2.216646266829865, "grad_norm": 0.9378942900986411, "learning_rate": 4.735390374740404e-06, "loss": 1.3353, "step": 1811 }, { "epoch": 2.217870257037944, "grad_norm": 1.0856616670887254, "learning_rate": 4.735103343997958e-06, "loss": 1.4561, "step": 1812 }, { "epoch": 2.219094247246022, "grad_norm": 0.8746120336894221, "learning_rate": 4.734816166372788e-06, "loss": 0.5586, "step": 1813 }, { "epoch": 2.2203182374541, "grad_norm": 1.065969943422191, "learning_rate": 4.734528841883765e-06, "loss": 1.2255, "step": 1814 }, { "epoch": 2.221542227662179, "grad_norm": 1.0522994747609946, "learning_rate": 4.734241370549771e-06, "loss": 0.425, "step": 1815 }, { "epoch": 2.222766217870257, "grad_norm": 1.3282550160336022, "learning_rate": 4.7339537523896985e-06, "loss": 0.5816, "step": 1816 }, { "epoch": 2.2239902080783356, "grad_norm": 1.2656975598846336, "learning_rate": 4.733665987422449e-06, "loss": 0.6368, "step": 1817 }, { "epoch": 2.2252141982864138, "grad_norm": 1.0060139398567456, "learning_rate": 4.733378075666931e-06, "loss": 0.7547, "step": 1818 }, { "epoch": 2.226438188494492, "grad_norm": 0.7553384049883551, "learning_rate": 4.733090017142068e-06, "loss": 0.4207, "step": 1819 }, { "epoch": 2.2276621787025706, "grad_norm": 1.3278405854798798, "learning_rate": 4.732801811866789e-06, "loss": 0.4105, "step": 1820 }, { "epoch": 2.2288861689106487, "grad_norm": 1.2089961065140573, "learning_rate": 4.732513459860032e-06, "loss": 1.0333, "step": 1821 }, { "epoch": 2.230110159118727, "grad_norm": 0.8469317936601658, "learning_rate": 4.732224961140748e-06, "loss": 0.7319, "step": 1822 }, { "epoch": 2.2313341493268055, "grad_norm": 2.211180444328651, "learning_rate": 4.731936315727896e-06, "loss": 0.4932, "step": 1823 }, { "epoch": 2.2325581395348837, "grad_norm": 0.7674983221769965, "learning_rate": 4.731647523640446e-06, "loss": 0.4976, "step": 1824 }, { "epoch": 2.233782129742962, "grad_norm": 0.7409032210537124, "learning_rate": 4.7313585848973745e-06, "loss": 0.6004, "step": 1825 }, { "epoch": 2.2350061199510405, "grad_norm": 0.9638518210593149, "learning_rate": 4.73106949951767e-06, "loss": 0.5089, "step": 1826 }, { "epoch": 2.2362301101591187, "grad_norm": 0.9509339026916247, "learning_rate": 4.73078026752033e-06, "loss": 0.5697, "step": 1827 }, { "epoch": 2.237454100367197, "grad_norm": 0.9738475333198687, "learning_rate": 4.730490888924361e-06, "loss": 1.0584, "step": 1828 }, { "epoch": 2.2386780905752754, "grad_norm": 1.4582233294507194, "learning_rate": 4.730201363748782e-06, "loss": 1.084, "step": 1829 }, { "epoch": 2.2399020807833536, "grad_norm": 1.7042668918675679, "learning_rate": 4.729911692012617e-06, "loss": 0.5606, "step": 1830 }, { "epoch": 2.2411260709914322, "grad_norm": 0.7581074151542866, "learning_rate": 4.729621873734906e-06, "loss": 0.5963, "step": 1831 }, { "epoch": 2.2423500611995104, "grad_norm": 1.3360313786455533, "learning_rate": 4.72933190893469e-06, "loss": 0.4877, "step": 1832 }, { "epoch": 2.2435740514075886, "grad_norm": 0.7184313254117661, "learning_rate": 4.729041797631027e-06, "loss": 0.7677, "step": 1833 }, { "epoch": 2.244798041615667, "grad_norm": 1.1645894847080243, "learning_rate": 4.7287515398429824e-06, "loss": 0.5799, "step": 1834 }, { "epoch": 2.2460220318237454, "grad_norm": 0.8479011547862644, "learning_rate": 4.72846113558963e-06, "loss": 0.6823, "step": 1835 }, { "epoch": 2.2472460220318236, "grad_norm": 0.8898998548033978, "learning_rate": 4.728170584890054e-06, "loss": 0.6253, "step": 1836 }, { "epoch": 2.248470012239902, "grad_norm": 0.4561413170871365, "learning_rate": 4.727879887763348e-06, "loss": 0.2452, "step": 1837 }, { "epoch": 2.2496940024479803, "grad_norm": 1.3255084834600868, "learning_rate": 4.7275890442286175e-06, "loss": 0.6177, "step": 1838 }, { "epoch": 2.250917992656059, "grad_norm": 1.1361557952232664, "learning_rate": 4.727298054304974e-06, "loss": 0.6927, "step": 1839 }, { "epoch": 2.252141982864137, "grad_norm": 0.6445646635251346, "learning_rate": 4.72700691801154e-06, "loss": 0.6932, "step": 1840 }, { "epoch": 2.2533659730722153, "grad_norm": 1.1869827340444223, "learning_rate": 4.726715635367448e-06, "loss": 0.5297, "step": 1841 }, { "epoch": 2.254589963280294, "grad_norm": 1.1262635701193886, "learning_rate": 4.726424206391842e-06, "loss": 0.6857, "step": 1842 }, { "epoch": 2.255813953488372, "grad_norm": 1.1251204405610844, "learning_rate": 4.726132631103871e-06, "loss": 0.6287, "step": 1843 }, { "epoch": 2.2570379436964503, "grad_norm": 1.2808548883599133, "learning_rate": 4.725840909522698e-06, "loss": 0.505, "step": 1844 }, { "epoch": 2.258261933904529, "grad_norm": 1.2573748060361178, "learning_rate": 4.725549041667492e-06, "loss": 0.4732, "step": 1845 }, { "epoch": 2.259485924112607, "grad_norm": 0.625782663898054, "learning_rate": 4.725257027557436e-06, "loss": 0.5549, "step": 1846 }, { "epoch": 2.2607099143206852, "grad_norm": 0.8962879415661794, "learning_rate": 4.724964867211719e-06, "loss": 0.5514, "step": 1847 }, { "epoch": 2.261933904528764, "grad_norm": 1.1857627338796528, "learning_rate": 4.724672560649539e-06, "loss": 1.032, "step": 1848 }, { "epoch": 2.263157894736842, "grad_norm": 0.9898677015952242, "learning_rate": 4.724380107890109e-06, "loss": 0.65, "step": 1849 }, { "epoch": 2.26438188494492, "grad_norm": 1.2647551099992795, "learning_rate": 4.724087508952644e-06, "loss": 0.7948, "step": 1850 }, { "epoch": 2.265605875152999, "grad_norm": 1.4254645426893446, "learning_rate": 4.723794763856374e-06, "loss": 1.1283, "step": 1851 }, { "epoch": 2.266829865361077, "grad_norm": 0.9517438484247206, "learning_rate": 4.723501872620539e-06, "loss": 0.4815, "step": 1852 }, { "epoch": 2.2680538555691556, "grad_norm": 0.8302880719195052, "learning_rate": 4.7232088352643835e-06, "loss": 0.8382, "step": 1853 }, { "epoch": 2.269277845777234, "grad_norm": 1.1880042075613113, "learning_rate": 4.722915651807168e-06, "loss": 0.8823, "step": 1854 }, { "epoch": 2.270501835985312, "grad_norm": 1.959710865197276, "learning_rate": 4.722622322268157e-06, "loss": 0.8529, "step": 1855 }, { "epoch": 2.2717258261933906, "grad_norm": 1.3383385208293768, "learning_rate": 4.722328846666627e-06, "loss": 0.7134, "step": 1856 }, { "epoch": 2.2729498164014688, "grad_norm": 0.7894993022537544, "learning_rate": 4.722035225021866e-06, "loss": 0.8994, "step": 1857 }, { "epoch": 2.274173806609547, "grad_norm": 1.4010546573473848, "learning_rate": 4.721741457353169e-06, "loss": 0.874, "step": 1858 }, { "epoch": 2.2753977968176256, "grad_norm": 0.846692059695791, "learning_rate": 4.72144754367984e-06, "loss": 0.551, "step": 1859 }, { "epoch": 2.2766217870257037, "grad_norm": 1.138376636031738, "learning_rate": 4.721153484021196e-06, "loss": 0.5056, "step": 1860 }, { "epoch": 2.2778457772337823, "grad_norm": 1.3194817659277083, "learning_rate": 4.72085927839656e-06, "loss": 1.2871, "step": 1861 }, { "epoch": 2.2790697674418605, "grad_norm": 0.8756462663047224, "learning_rate": 4.720564926825267e-06, "loss": 0.9803, "step": 1862 }, { "epoch": 2.2802937576499387, "grad_norm": 1.3930737784578227, "learning_rate": 4.7202704293266605e-06, "loss": 0.7733, "step": 1863 }, { "epoch": 2.2815177478580173, "grad_norm": 1.2695267347626382, "learning_rate": 4.719975785920093e-06, "loss": 0.5501, "step": 1864 }, { "epoch": 2.2827417380660955, "grad_norm": 1.53738085378353, "learning_rate": 4.719680996624928e-06, "loss": 1.1468, "step": 1865 }, { "epoch": 2.2839657282741737, "grad_norm": 1.299574984351455, "learning_rate": 4.719386061460539e-06, "loss": 0.4306, "step": 1866 }, { "epoch": 2.2851897184822523, "grad_norm": 1.0925311283029786, "learning_rate": 4.719090980446306e-06, "loss": 0.5851, "step": 1867 }, { "epoch": 2.2864137086903304, "grad_norm": 1.063449925527497, "learning_rate": 4.718795753601622e-06, "loss": 1.435, "step": 1868 }, { "epoch": 2.2876376988984086, "grad_norm": 1.4497983828572243, "learning_rate": 4.718500380945887e-06, "loss": 0.5128, "step": 1869 }, { "epoch": 2.2888616891064872, "grad_norm": 1.2062776153243528, "learning_rate": 4.718204862498514e-06, "loss": 0.6857, "step": 1870 }, { "epoch": 2.2900856793145654, "grad_norm": 0.9012660885233963, "learning_rate": 4.71790919827892e-06, "loss": 1.6296, "step": 1871 }, { "epoch": 2.2913096695226436, "grad_norm": 0.9715708676625618, "learning_rate": 4.717613388306539e-06, "loss": 1.1728, "step": 1872 }, { "epoch": 2.292533659730722, "grad_norm": 1.0263480235301417, "learning_rate": 4.7173174326008075e-06, "loss": 0.6868, "step": 1873 }, { "epoch": 2.2937576499388004, "grad_norm": 0.6194327692280582, "learning_rate": 4.7170213311811756e-06, "loss": 0.5867, "step": 1874 }, { "epoch": 2.294981640146879, "grad_norm": 0.891155756844536, "learning_rate": 4.716725084067102e-06, "loss": 0.5361, "step": 1875 }, { "epoch": 2.296205630354957, "grad_norm": 1.1769091125333857, "learning_rate": 4.716428691278056e-06, "loss": 1.1276, "step": 1876 }, { "epoch": 2.2974296205630353, "grad_norm": 0.8029776505209408, "learning_rate": 4.716132152833513e-06, "loss": 1.1354, "step": 1877 }, { "epoch": 2.298653610771114, "grad_norm": 1.196895501195475, "learning_rate": 4.7158354687529625e-06, "loss": 1.4678, "step": 1878 }, { "epoch": 2.299877600979192, "grad_norm": 1.3165146202300035, "learning_rate": 4.715538639055901e-06, "loss": 0.7601, "step": 1879 }, { "epoch": 2.3011015911872703, "grad_norm": 1.1039384845770621, "learning_rate": 4.7152416637618335e-06, "loss": 0.5752, "step": 1880 }, { "epoch": 2.302325581395349, "grad_norm": 0.7269891027360053, "learning_rate": 4.7149445428902794e-06, "loss": 0.7948, "step": 1881 }, { "epoch": 2.303549571603427, "grad_norm": 0.8998994953010416, "learning_rate": 4.714647276460762e-06, "loss": 0.5645, "step": 1882 }, { "epoch": 2.3047735618115057, "grad_norm": 1.2568270616569326, "learning_rate": 4.714349864492816e-06, "loss": 0.997, "step": 1883 }, { "epoch": 2.305997552019584, "grad_norm": 0.9724051828838117, "learning_rate": 4.714052307005987e-06, "loss": 0.922, "step": 1884 }, { "epoch": 2.307221542227662, "grad_norm": 0.7563659597943564, "learning_rate": 4.713754604019831e-06, "loss": 0.4572, "step": 1885 }, { "epoch": 2.3084455324357407, "grad_norm": 1.1748432216588744, "learning_rate": 4.71345675555391e-06, "loss": 0.5626, "step": 1886 }, { "epoch": 2.309669522643819, "grad_norm": 1.6688177462208922, "learning_rate": 4.713158761627799e-06, "loss": 1.0607, "step": 1887 }, { "epoch": 2.310893512851897, "grad_norm": 0.9631109577110486, "learning_rate": 4.712860622261078e-06, "loss": 0.5539, "step": 1888 }, { "epoch": 2.3121175030599757, "grad_norm": 1.0912333202113949, "learning_rate": 4.712562337473343e-06, "loss": 0.4901, "step": 1889 }, { "epoch": 2.313341493268054, "grad_norm": 0.9151982095186041, "learning_rate": 4.712263907284195e-06, "loss": 0.6658, "step": 1890 }, { "epoch": 2.314565483476132, "grad_norm": 0.7630881977432663, "learning_rate": 4.7119653317132454e-06, "loss": 0.4936, "step": 1891 }, { "epoch": 2.3157894736842106, "grad_norm": 0.5097942777190032, "learning_rate": 4.711666610780115e-06, "loss": 0.4609, "step": 1892 }, { "epoch": 2.317013463892289, "grad_norm": 0.8108320508497355, "learning_rate": 4.711367744504436e-06, "loss": 0.6758, "step": 1893 }, { "epoch": 2.318237454100367, "grad_norm": 0.7435477627947105, "learning_rate": 4.711068732905848e-06, "loss": 0.5171, "step": 1894 }, { "epoch": 2.3194614443084456, "grad_norm": 0.8959268130708758, "learning_rate": 4.710769576004001e-06, "loss": 0.4453, "step": 1895 }, { "epoch": 2.3206854345165238, "grad_norm": 0.7471499052525399, "learning_rate": 4.710470273818555e-06, "loss": 1.2508, "step": 1896 }, { "epoch": 2.3219094247246024, "grad_norm": 1.1633638260446977, "learning_rate": 4.710170826369178e-06, "loss": 1.0517, "step": 1897 }, { "epoch": 2.3231334149326806, "grad_norm": 0.608115559424718, "learning_rate": 4.709871233675549e-06, "loss": 0.5565, "step": 1898 }, { "epoch": 2.3243574051407587, "grad_norm": 0.8005951613983394, "learning_rate": 4.709571495757357e-06, "loss": 0.7041, "step": 1899 }, { "epoch": 2.3255813953488373, "grad_norm": 0.9610517655945263, "learning_rate": 4.709271612634299e-06, "loss": 0.9996, "step": 1900 }, { "epoch": 2.3268053855569155, "grad_norm": 0.8326697093095426, "learning_rate": 4.708971584326081e-06, "loss": 1.0881, "step": 1901 }, { "epoch": 2.3280293757649937, "grad_norm": 0.7542930072225869, "learning_rate": 4.7086714108524215e-06, "loss": 0.5679, "step": 1902 }, { "epoch": 2.3292533659730723, "grad_norm": 0.8384071664102889, "learning_rate": 4.708371092233046e-06, "loss": 0.7764, "step": 1903 }, { "epoch": 2.3304773561811505, "grad_norm": 1.1740951557909265, "learning_rate": 4.708070628487691e-06, "loss": 0.7425, "step": 1904 }, { "epoch": 2.331701346389229, "grad_norm": 0.639505116805248, "learning_rate": 4.707770019636101e-06, "loss": 0.6238, "step": 1905 }, { "epoch": 2.3329253365973073, "grad_norm": 1.7116632004338355, "learning_rate": 4.707469265698031e-06, "loss": 0.7828, "step": 1906 }, { "epoch": 2.3341493268053854, "grad_norm": 1.555643459821072, "learning_rate": 4.707168366693247e-06, "loss": 0.8961, "step": 1907 }, { "epoch": 2.335373317013464, "grad_norm": 0.8023299135409482, "learning_rate": 4.70686732264152e-06, "loss": 1.0553, "step": 1908 }, { "epoch": 2.3365973072215422, "grad_norm": 1.0094992980005395, "learning_rate": 4.706566133562636e-06, "loss": 0.733, "step": 1909 }, { "epoch": 2.3378212974296204, "grad_norm": 1.1427769200077593, "learning_rate": 4.706264799476388e-06, "loss": 1.2053, "step": 1910 }, { "epoch": 2.339045287637699, "grad_norm": 0.9930890466331854, "learning_rate": 4.705963320402577e-06, "loss": 0.5195, "step": 1911 }, { "epoch": 2.340269277845777, "grad_norm": 0.8653098939490641, "learning_rate": 4.705661696361016e-06, "loss": 0.777, "step": 1912 }, { "epoch": 2.3414932680538554, "grad_norm": 0.6873472466625226, "learning_rate": 4.705359927371526e-06, "loss": 0.572, "step": 1913 }, { "epoch": 2.342717258261934, "grad_norm": 1.3607697033237565, "learning_rate": 4.7050580134539395e-06, "loss": 0.6323, "step": 1914 }, { "epoch": 2.343941248470012, "grad_norm": 1.2451884434956817, "learning_rate": 4.7047559546280965e-06, "loss": 0.7687, "step": 1915 }, { "epoch": 2.3451652386780903, "grad_norm": 0.9453088368988347, "learning_rate": 4.704453750913846e-06, "loss": 0.5769, "step": 1916 }, { "epoch": 2.346389228886169, "grad_norm": 0.79296426484685, "learning_rate": 4.704151402331049e-06, "loss": 0.5068, "step": 1917 }, { "epoch": 2.347613219094247, "grad_norm": 0.7502610101843505, "learning_rate": 4.703848908899574e-06, "loss": 0.6193, "step": 1918 }, { "epoch": 2.3488372093023258, "grad_norm": 1.664090265732174, "learning_rate": 4.703546270639301e-06, "loss": 0.9136, "step": 1919 }, { "epoch": 2.350061199510404, "grad_norm": 1.481465404552778, "learning_rate": 4.7032434875701176e-06, "loss": 0.5476, "step": 1920 }, { "epoch": 2.351285189718482, "grad_norm": 1.0184048897291256, "learning_rate": 4.702940559711921e-06, "loss": 0.5005, "step": 1921 }, { "epoch": 2.3525091799265607, "grad_norm": 1.0554609294124078, "learning_rate": 4.702637487084618e-06, "loss": 0.9804, "step": 1922 }, { "epoch": 2.353733170134639, "grad_norm": 1.4399412085210204, "learning_rate": 4.7023342697081276e-06, "loss": 0.6211, "step": 1923 }, { "epoch": 2.354957160342717, "grad_norm": 0.7170198421669076, "learning_rate": 4.702030907602375e-06, "loss": 0.4592, "step": 1924 }, { "epoch": 2.3561811505507957, "grad_norm": 0.9968701763978323, "learning_rate": 4.701727400787295e-06, "loss": 0.5919, "step": 1925 }, { "epoch": 2.357405140758874, "grad_norm": 0.9385233850263351, "learning_rate": 4.701423749282834e-06, "loss": 0.7543, "step": 1926 }, { "epoch": 2.3586291309669525, "grad_norm": 0.8955818205284788, "learning_rate": 4.701119953108947e-06, "loss": 0.6333, "step": 1927 }, { "epoch": 2.3598531211750307, "grad_norm": 0.7063996029116164, "learning_rate": 4.700816012285598e-06, "loss": 0.8812, "step": 1928 }, { "epoch": 2.361077111383109, "grad_norm": 1.570893375814074, "learning_rate": 4.700511926832762e-06, "loss": 0.5483, "step": 1929 }, { "epoch": 2.3623011015911874, "grad_norm": 1.0496391730387182, "learning_rate": 4.700207696770419e-06, "loss": 1.5592, "step": 1930 }, { "epoch": 2.3635250917992656, "grad_norm": 1.1805859725287076, "learning_rate": 4.699903322118566e-06, "loss": 0.9241, "step": 1931 }, { "epoch": 2.364749082007344, "grad_norm": 1.1521708308442629, "learning_rate": 4.699598802897203e-06, "loss": 0.7245, "step": 1932 }, { "epoch": 2.3659730722154224, "grad_norm": 0.9394921217044425, "learning_rate": 4.699294139126343e-06, "loss": 0.6159, "step": 1933 }, { "epoch": 2.3671970624235006, "grad_norm": 1.1761606310691894, "learning_rate": 4.698989330826005e-06, "loss": 0.3799, "step": 1934 }, { "epoch": 2.3684210526315788, "grad_norm": 1.4233267404878251, "learning_rate": 4.698684378016223e-06, "loss": 0.9572, "step": 1935 }, { "epoch": 2.3696450428396574, "grad_norm": 0.6451691659753587, "learning_rate": 4.698379280717036e-06, "loss": 0.6448, "step": 1936 }, { "epoch": 2.3708690330477356, "grad_norm": 1.439779100423735, "learning_rate": 4.698074038948494e-06, "loss": 0.7716, "step": 1937 }, { "epoch": 2.3720930232558137, "grad_norm": 1.3128047291081741, "learning_rate": 4.697768652730656e-06, "loss": 0.3677, "step": 1938 }, { "epoch": 2.3733170134638923, "grad_norm": 0.8545334307708811, "learning_rate": 4.6974631220835905e-06, "loss": 0.4209, "step": 1939 }, { "epoch": 2.3745410036719705, "grad_norm": 0.789638408852278, "learning_rate": 4.697157447027378e-06, "loss": 1.3923, "step": 1940 }, { "epoch": 2.375764993880049, "grad_norm": 1.7246724080929752, "learning_rate": 4.696851627582104e-06, "loss": 0.8318, "step": 1941 }, { "epoch": 2.3769889840881273, "grad_norm": 1.456222956380552, "learning_rate": 4.696545663767867e-06, "loss": 1.0376, "step": 1942 }, { "epoch": 2.3782129742962055, "grad_norm": 0.6127381225023172, "learning_rate": 4.696239555604773e-06, "loss": 0.4118, "step": 1943 }, { "epoch": 2.379436964504284, "grad_norm": 1.0696777199392271, "learning_rate": 4.695933303112939e-06, "loss": 1.0755, "step": 1944 }, { "epoch": 2.3806609547123623, "grad_norm": 1.0209308036731126, "learning_rate": 4.6956269063124895e-06, "loss": 0.6175, "step": 1945 }, { "epoch": 2.3818849449204405, "grad_norm": 0.9339617373988369, "learning_rate": 4.6953203652235615e-06, "loss": 0.9893, "step": 1946 }, { "epoch": 2.383108935128519, "grad_norm": 1.214925742772279, "learning_rate": 4.6950136798663e-06, "loss": 0.5943, "step": 1947 }, { "epoch": 2.3843329253365972, "grad_norm": 0.8132353543630525, "learning_rate": 4.694706850260857e-06, "loss": 0.7598, "step": 1948 }, { "epoch": 2.385556915544676, "grad_norm": 1.3373094365979548, "learning_rate": 4.694399876427397e-06, "loss": 0.627, "step": 1949 }, { "epoch": 2.386780905752754, "grad_norm": 1.4439702640169259, "learning_rate": 4.694092758386095e-06, "loss": 0.5952, "step": 1950 }, { "epoch": 2.388004895960832, "grad_norm": 0.9176361644503913, "learning_rate": 4.693785496157132e-06, "loss": 1.3798, "step": 1951 }, { "epoch": 2.389228886168911, "grad_norm": 0.6360087069726685, "learning_rate": 4.6934780897607005e-06, "loss": 0.5893, "step": 1952 }, { "epoch": 2.390452876376989, "grad_norm": 1.1873039082776884, "learning_rate": 4.6931705392170016e-06, "loss": 0.5028, "step": 1953 }, { "epoch": 2.391676866585067, "grad_norm": 0.8976245880576597, "learning_rate": 4.692862844546246e-06, "loss": 0.6801, "step": 1954 }, { "epoch": 2.392900856793146, "grad_norm": 1.0239295740757415, "learning_rate": 4.692555005768656e-06, "loss": 1.6771, "step": 1955 }, { "epoch": 2.394124847001224, "grad_norm": 0.8201907697460017, "learning_rate": 4.692247022904461e-06, "loss": 0.75, "step": 1956 }, { "epoch": 2.395348837209302, "grad_norm": 1.1062417697379563, "learning_rate": 4.691938895973901e-06, "loss": 1.5318, "step": 1957 }, { "epoch": 2.3965728274173808, "grad_norm": 1.4255719949443622, "learning_rate": 4.691630624997222e-06, "loss": 0.5687, "step": 1958 }, { "epoch": 2.397796817625459, "grad_norm": 0.723361749038174, "learning_rate": 4.691322209994687e-06, "loss": 0.5326, "step": 1959 }, { "epoch": 2.399020807833537, "grad_norm": 0.9656153034686662, "learning_rate": 4.69101365098656e-06, "loss": 1.392, "step": 1960 }, { "epoch": 2.4002447980416157, "grad_norm": 0.632142485940512, "learning_rate": 4.69070494799312e-06, "loss": 0.6782, "step": 1961 }, { "epoch": 2.401468788249694, "grad_norm": 1.4857979129260754, "learning_rate": 4.690396101034654e-06, "loss": 0.4955, "step": 1962 }, { "epoch": 2.4026927784577725, "grad_norm": 1.2001526909005433, "learning_rate": 4.690087110131458e-06, "loss": 0.5794, "step": 1963 }, { "epoch": 2.4039167686658507, "grad_norm": 0.5613649425225692, "learning_rate": 4.689777975303837e-06, "loss": 0.3814, "step": 1964 }, { "epoch": 2.405140758873929, "grad_norm": 0.6756331894660321, "learning_rate": 4.689468696572108e-06, "loss": 0.6698, "step": 1965 }, { "epoch": 2.4063647490820075, "grad_norm": 1.0462189008787655, "learning_rate": 4.689159273956593e-06, "loss": 0.4957, "step": 1966 }, { "epoch": 2.4075887392900857, "grad_norm": 0.9070338221332254, "learning_rate": 4.688849707477629e-06, "loss": 0.6409, "step": 1967 }, { "epoch": 2.408812729498164, "grad_norm": 0.9967960041788031, "learning_rate": 4.688539997155558e-06, "loss": 0.686, "step": 1968 }, { "epoch": 2.4100367197062424, "grad_norm": 1.2869814660815038, "learning_rate": 4.688230143010733e-06, "loss": 0.8089, "step": 1969 }, { "epoch": 2.4112607099143206, "grad_norm": 0.6453206958110141, "learning_rate": 4.687920145063517e-06, "loss": 0.5461, "step": 1970 }, { "epoch": 2.4124847001223992, "grad_norm": 1.403224757460005, "learning_rate": 4.6876100033342805e-06, "loss": 1.0907, "step": 1971 }, { "epoch": 2.4137086903304774, "grad_norm": 0.6486978902986142, "learning_rate": 4.687299717843407e-06, "loss": 0.5121, "step": 1972 }, { "epoch": 2.4149326805385556, "grad_norm": 0.7575360742798415, "learning_rate": 4.686989288611287e-06, "loss": 0.5722, "step": 1973 }, { "epoch": 2.416156670746634, "grad_norm": 1.2238088568054706, "learning_rate": 4.686678715658319e-06, "loss": 1.2408, "step": 1974 }, { "epoch": 2.4173806609547124, "grad_norm": 1.2586215422424256, "learning_rate": 4.686367999004914e-06, "loss": 1.0632, "step": 1975 }, { "epoch": 2.4186046511627906, "grad_norm": 0.6021748916105605, "learning_rate": 4.6860571386714905e-06, "loss": 0.5588, "step": 1976 }, { "epoch": 2.419828641370869, "grad_norm": 0.6611145951086846, "learning_rate": 4.685746134678478e-06, "loss": 0.5291, "step": 1977 }, { "epoch": 2.4210526315789473, "grad_norm": 0.8229597056603816, "learning_rate": 4.685434987046314e-06, "loss": 0.7084, "step": 1978 }, { "epoch": 2.4222766217870255, "grad_norm": 0.7819356406782624, "learning_rate": 4.685123695795446e-06, "loss": 1.1465, "step": 1979 }, { "epoch": 2.423500611995104, "grad_norm": 1.6516904878339431, "learning_rate": 4.684812260946331e-06, "loss": 1.1279, "step": 1980 }, { "epoch": 2.4247246022031823, "grad_norm": 0.781169797088301, "learning_rate": 4.684500682519435e-06, "loss": 0.5803, "step": 1981 }, { "epoch": 2.4259485924112605, "grad_norm": 0.8255248329447696, "learning_rate": 4.684188960535234e-06, "loss": 0.5327, "step": 1982 }, { "epoch": 2.427172582619339, "grad_norm": 0.9664449380806159, "learning_rate": 4.683877095014215e-06, "loss": 0.6499, "step": 1983 }, { "epoch": 2.4283965728274173, "grad_norm": 0.9511864570644972, "learning_rate": 4.6835650859768685e-06, "loss": 0.5007, "step": 1984 }, { "epoch": 2.429620563035496, "grad_norm": 0.8630653387578063, "learning_rate": 4.683252933443701e-06, "loss": 0.6229, "step": 1985 }, { "epoch": 2.430844553243574, "grad_norm": 1.087426350518753, "learning_rate": 4.682940637435228e-06, "loss": 0.6085, "step": 1986 }, { "epoch": 2.4320685434516522, "grad_norm": 0.976540682236335, "learning_rate": 4.682628197971969e-06, "loss": 1.2311, "step": 1987 }, { "epoch": 2.433292533659731, "grad_norm": 0.8362601926282501, "learning_rate": 4.682315615074459e-06, "loss": 0.5455, "step": 1988 }, { "epoch": 2.434516523867809, "grad_norm": 1.2600123674086403, "learning_rate": 4.682002888763237e-06, "loss": 0.537, "step": 1989 }, { "epoch": 2.435740514075887, "grad_norm": 1.241308544882274, "learning_rate": 4.681690019058857e-06, "loss": 0.5755, "step": 1990 }, { "epoch": 2.436964504283966, "grad_norm": 0.9879516652685221, "learning_rate": 4.681377005981878e-06, "loss": 1.0684, "step": 1991 }, { "epoch": 2.438188494492044, "grad_norm": 1.2137035156314764, "learning_rate": 4.681063849552871e-06, "loss": 0.7647, "step": 1992 }, { "epoch": 2.4394124847001226, "grad_norm": 1.2958304187030811, "learning_rate": 4.680750549792415e-06, "loss": 0.5229, "step": 1993 }, { "epoch": 2.440636474908201, "grad_norm": 0.8033045091645395, "learning_rate": 4.680437106721098e-06, "loss": 0.5968, "step": 1994 }, { "epoch": 2.441860465116279, "grad_norm": 0.9231230495744421, "learning_rate": 4.68012352035952e-06, "loss": 0.6566, "step": 1995 }, { "epoch": 2.4430844553243576, "grad_norm": 0.8793167928133118, "learning_rate": 4.679809790728287e-06, "loss": 1.3367, "step": 1996 }, { "epoch": 2.4443084455324358, "grad_norm": 0.953736568385416, "learning_rate": 4.679495917848018e-06, "loss": 0.8771, "step": 1997 }, { "epoch": 2.445532435740514, "grad_norm": 1.6317287910534855, "learning_rate": 4.679181901739339e-06, "loss": 0.4566, "step": 1998 }, { "epoch": 2.4467564259485926, "grad_norm": 0.8093634260542532, "learning_rate": 4.678867742422885e-06, "loss": 0.6829, "step": 1999 }, { "epoch": 2.4479804161566707, "grad_norm": 1.5105886740632646, "learning_rate": 4.6785534399193015e-06, "loss": 0.4565, "step": 2000 }, { "epoch": 2.449204406364749, "grad_norm": 1.4643503858923508, "learning_rate": 4.678238994249245e-06, "loss": 0.5366, "step": 2001 }, { "epoch": 2.4504283965728275, "grad_norm": 0.9391872800730385, "learning_rate": 4.677924405433378e-06, "loss": 0.6659, "step": 2002 }, { "epoch": 2.4516523867809057, "grad_norm": 0.689648363482712, "learning_rate": 4.677609673492375e-06, "loss": 0.6167, "step": 2003 }, { "epoch": 2.452876376988984, "grad_norm": 1.0244745242005668, "learning_rate": 4.677294798446918e-06, "loss": 0.6969, "step": 2004 }, { "epoch": 2.4541003671970625, "grad_norm": 1.391714878445633, "learning_rate": 4.676979780317701e-06, "loss": 0.7948, "step": 2005 }, { "epoch": 2.4553243574051407, "grad_norm": 0.7441361607029013, "learning_rate": 4.676664619125424e-06, "loss": 0.7332, "step": 2006 }, { "epoch": 2.4565483476132193, "grad_norm": 0.9360468581899858, "learning_rate": 4.6763493148907995e-06, "loss": 1.0396, "step": 2007 }, { "epoch": 2.4577723378212974, "grad_norm": 0.8179059763595751, "learning_rate": 4.676033867634548e-06, "loss": 0.9257, "step": 2008 }, { "epoch": 2.4589963280293756, "grad_norm": 0.9103874259138527, "learning_rate": 4.675718277377399e-06, "loss": 0.3834, "step": 2009 }, { "epoch": 2.4602203182374542, "grad_norm": 0.8265545041437229, "learning_rate": 4.675402544140093e-06, "loss": 0.7303, "step": 2010 }, { "epoch": 2.4614443084455324, "grad_norm": 0.9088141147919799, "learning_rate": 4.675086667943377e-06, "loss": 0.7313, "step": 2011 }, { "epoch": 2.4626682986536106, "grad_norm": 1.1528736987929922, "learning_rate": 4.674770648808011e-06, "loss": 0.5642, "step": 2012 }, { "epoch": 2.463892288861689, "grad_norm": 0.7593355888294113, "learning_rate": 4.674454486754762e-06, "loss": 0.4517, "step": 2013 }, { "epoch": 2.4651162790697674, "grad_norm": 0.7312213833910401, "learning_rate": 4.674138181804406e-06, "loss": 0.4202, "step": 2014 }, { "epoch": 2.466340269277846, "grad_norm": 0.7117683056305699, "learning_rate": 4.673821733977731e-06, "loss": 0.6088, "step": 2015 }, { "epoch": 2.467564259485924, "grad_norm": 0.7600211202310814, "learning_rate": 4.6735051432955314e-06, "loss": 0.5758, "step": 2016 }, { "epoch": 2.4687882496940023, "grad_norm": 1.0044817658988496, "learning_rate": 4.673188409778614e-06, "loss": 0.6097, "step": 2017 }, { "epoch": 2.470012239902081, "grad_norm": 1.6430019153827742, "learning_rate": 4.672871533447791e-06, "loss": 0.4027, "step": 2018 }, { "epoch": 2.471236230110159, "grad_norm": 1.2899556293953571, "learning_rate": 4.672554514323889e-06, "loss": 0.5659, "step": 2019 }, { "epoch": 2.4724602203182373, "grad_norm": 1.3482809537612783, "learning_rate": 4.67223735242774e-06, "loss": 0.4588, "step": 2020 }, { "epoch": 2.473684210526316, "grad_norm": 1.0982380787179564, "learning_rate": 4.6719200477801865e-06, "loss": 0.4683, "step": 2021 }, { "epoch": 2.474908200734394, "grad_norm": 1.0897631691509069, "learning_rate": 4.67160260040208e-06, "loss": 0.4694, "step": 2022 }, { "epoch": 2.4761321909424723, "grad_norm": 1.024320284884599, "learning_rate": 4.671285010314284e-06, "loss": 1.1598, "step": 2023 }, { "epoch": 2.477356181150551, "grad_norm": 1.1545270238147962, "learning_rate": 4.670967277537667e-06, "loss": 0.6288, "step": 2024 }, { "epoch": 2.478580171358629, "grad_norm": 1.0798534668809237, "learning_rate": 4.670649402093112e-06, "loss": 0.61, "step": 2025 }, { "epoch": 2.4798041615667072, "grad_norm": 1.0764644794708718, "learning_rate": 4.6703313840015055e-06, "loss": 1.1621, "step": 2026 }, { "epoch": 2.481028151774786, "grad_norm": 0.7943273138271846, "learning_rate": 4.670013223283748e-06, "loss": 0.7624, "step": 2027 }, { "epoch": 2.482252141982864, "grad_norm": 0.8512146712941421, "learning_rate": 4.669694919960749e-06, "loss": 0.4063, "step": 2028 }, { "epoch": 2.4834761321909427, "grad_norm": 1.056282043894564, "learning_rate": 4.669376474053424e-06, "loss": 0.6716, "step": 2029 }, { "epoch": 2.484700122399021, "grad_norm": 1.1315707906117647, "learning_rate": 4.6690578855827006e-06, "loss": 0.6443, "step": 2030 }, { "epoch": 2.485924112607099, "grad_norm": 1.3745625296415231, "learning_rate": 4.668739154569517e-06, "loss": 0.6216, "step": 2031 }, { "epoch": 2.4871481028151776, "grad_norm": 0.905764903604571, "learning_rate": 4.668420281034817e-06, "loss": 0.4719, "step": 2032 }, { "epoch": 2.488372093023256, "grad_norm": 1.0881152181161204, "learning_rate": 4.668101264999557e-06, "loss": 0.9763, "step": 2033 }, { "epoch": 2.489596083231334, "grad_norm": 1.0819726592251493, "learning_rate": 4.667782106484701e-06, "loss": 1.4903, "step": 2034 }, { "epoch": 2.4908200734394126, "grad_norm": 0.9690189951718694, "learning_rate": 4.667462805511223e-06, "loss": 0.5254, "step": 2035 }, { "epoch": 2.4920440636474908, "grad_norm": 1.014980905433294, "learning_rate": 4.667143362100106e-06, "loss": 1.2322, "step": 2036 }, { "epoch": 2.4932680538555694, "grad_norm": 0.8157045803424169, "learning_rate": 4.666823776272344e-06, "loss": 0.6549, "step": 2037 }, { "epoch": 2.4944920440636476, "grad_norm": 0.6659207752470652, "learning_rate": 4.666504048048937e-06, "loss": 0.6322, "step": 2038 }, { "epoch": 2.4957160342717257, "grad_norm": 1.7911628920858291, "learning_rate": 4.666184177450897e-06, "loss": 0.7994, "step": 2039 }, { "epoch": 2.4969400244798043, "grad_norm": 0.9831500745888958, "learning_rate": 4.665864164499245e-06, "loss": 0.5262, "step": 2040 }, { "epoch": 2.4981640146878825, "grad_norm": 1.1964928512229351, "learning_rate": 4.665544009215012e-06, "loss": 0.4512, "step": 2041 }, { "epoch": 2.4993880048959607, "grad_norm": 1.3722437336116062, "learning_rate": 4.665223711619236e-06, "loss": 1.4931, "step": 2042 }, { "epoch": 2.5006119951040393, "grad_norm": 0.7502108691050444, "learning_rate": 4.664903271732966e-06, "loss": 0.6247, "step": 2043 }, { "epoch": 2.5018359853121175, "grad_norm": 0.9127226801361672, "learning_rate": 4.664582689577261e-06, "loss": 0.9907, "step": 2044 }, { "epoch": 2.5030599755201957, "grad_norm": 0.9087741259363168, "learning_rate": 4.664261965173188e-06, "loss": 0.9293, "step": 2045 }, { "epoch": 2.5042839657282743, "grad_norm": 0.8928036186180087, "learning_rate": 4.663941098541823e-06, "loss": 1.365, "step": 2046 }, { "epoch": 2.5055079559363524, "grad_norm": 1.6154146938554772, "learning_rate": 4.663620089704253e-06, "loss": 0.9113, "step": 2047 }, { "epoch": 2.5067319461444306, "grad_norm": 1.5325606043101374, "learning_rate": 4.6632989386815744e-06, "loss": 0.5367, "step": 2048 }, { "epoch": 2.5079559363525092, "grad_norm": 0.7396270937517353, "learning_rate": 4.662977645494889e-06, "loss": 0.5482, "step": 2049 }, { "epoch": 2.5091799265605874, "grad_norm": 1.405465285131659, "learning_rate": 4.662656210165315e-06, "loss": 0.6416, "step": 2050 }, { "epoch": 2.5104039167686656, "grad_norm": 0.80710487969568, "learning_rate": 4.662334632713973e-06, "loss": 0.5832, "step": 2051 }, { "epoch": 2.511627906976744, "grad_norm": 1.5059245057990465, "learning_rate": 4.662012913161998e-06, "loss": 0.512, "step": 2052 }, { "epoch": 2.5128518971848224, "grad_norm": 0.857311595280918, "learning_rate": 4.661691051530529e-06, "loss": 1.0003, "step": 2053 }, { "epoch": 2.514075887392901, "grad_norm": 1.1764782213359095, "learning_rate": 4.661369047840722e-06, "loss": 0.69, "step": 2054 }, { "epoch": 2.515299877600979, "grad_norm": 1.053797392608861, "learning_rate": 4.661046902113734e-06, "loss": 1.4593, "step": 2055 }, { "epoch": 2.516523867809058, "grad_norm": 1.3665686703365592, "learning_rate": 4.660724614370737e-06, "loss": 0.6689, "step": 2056 }, { "epoch": 2.517747858017136, "grad_norm": 1.204794022962197, "learning_rate": 4.66040218463291e-06, "loss": 0.5165, "step": 2057 }, { "epoch": 2.518971848225214, "grad_norm": 2.0784461150173845, "learning_rate": 4.660079612921443e-06, "loss": 0.3873, "step": 2058 }, { "epoch": 2.5201958384332928, "grad_norm": 1.3119630268259508, "learning_rate": 4.659756899257532e-06, "loss": 0.9024, "step": 2059 }, { "epoch": 2.521419828641371, "grad_norm": 0.9963898559279143, "learning_rate": 4.659434043662388e-06, "loss": 0.7723, "step": 2060 }, { "epoch": 2.522643818849449, "grad_norm": 0.9156178165680909, "learning_rate": 4.659111046157225e-06, "loss": 0.6725, "step": 2061 }, { "epoch": 2.5238678090575277, "grad_norm": 0.9334890440525432, "learning_rate": 4.65878790676327e-06, "loss": 0.5992, "step": 2062 }, { "epoch": 2.525091799265606, "grad_norm": 0.9955109799015857, "learning_rate": 4.658464625501759e-06, "loss": 0.5329, "step": 2063 }, { "epoch": 2.526315789473684, "grad_norm": 0.656079145237901, "learning_rate": 4.658141202393935e-06, "loss": 0.726, "step": 2064 }, { "epoch": 2.5275397796817627, "grad_norm": 1.2085286500971073, "learning_rate": 4.6578176374610545e-06, "loss": 0.7309, "step": 2065 }, { "epoch": 2.528763769889841, "grad_norm": 1.0343651850060889, "learning_rate": 4.65749393072438e-06, "loss": 0.8806, "step": 2066 }, { "epoch": 2.529987760097919, "grad_norm": 1.0430910166940934, "learning_rate": 4.657170082205185e-06, "loss": 0.618, "step": 2067 }, { "epoch": 2.5312117503059977, "grad_norm": 0.9736605867913639, "learning_rate": 4.6568460919247495e-06, "loss": 0.9745, "step": 2068 }, { "epoch": 2.532435740514076, "grad_norm": 1.5203476957695676, "learning_rate": 4.656521959904368e-06, "loss": 0.4698, "step": 2069 }, { "epoch": 2.533659730722154, "grad_norm": 0.9442290007645968, "learning_rate": 4.656197686165339e-06, "loss": 0.7653, "step": 2070 }, { "epoch": 2.5348837209302326, "grad_norm": 1.1070459465137836, "learning_rate": 4.655873270728973e-06, "loss": 1.6179, "step": 2071 }, { "epoch": 2.536107711138311, "grad_norm": 1.0707753501436956, "learning_rate": 4.655548713616591e-06, "loss": 0.5692, "step": 2072 }, { "epoch": 2.537331701346389, "grad_norm": 0.981975084742688, "learning_rate": 4.655224014849519e-06, "loss": 0.5867, "step": 2073 }, { "epoch": 2.5385556915544676, "grad_norm": 1.1371537352711047, "learning_rate": 4.654899174449097e-06, "loss": 0.5847, "step": 2074 }, { "epoch": 2.5397796817625458, "grad_norm": 1.3683627396272005, "learning_rate": 4.654574192436672e-06, "loss": 1.0362, "step": 2075 }, { "epoch": 2.5410036719706244, "grad_norm": 0.930662798267102, "learning_rate": 4.6542490688336e-06, "loss": 0.5217, "step": 2076 }, { "epoch": 2.5422276621787026, "grad_norm": 1.5138177920209508, "learning_rate": 4.653923803661246e-06, "loss": 0.472, "step": 2077 }, { "epoch": 2.543451652386781, "grad_norm": 1.7615472395071672, "learning_rate": 4.653598396940988e-06, "loss": 0.5755, "step": 2078 }, { "epoch": 2.5446756425948593, "grad_norm": 1.5124888994398478, "learning_rate": 4.653272848694208e-06, "loss": 0.3637, "step": 2079 }, { "epoch": 2.5458996328029375, "grad_norm": 1.3073437015323743, "learning_rate": 4.652947158942301e-06, "loss": 1.0822, "step": 2080 }, { "epoch": 2.547123623011016, "grad_norm": 1.2861262227901857, "learning_rate": 4.65262132770667e-06, "loss": 1.4235, "step": 2081 }, { "epoch": 2.5483476132190943, "grad_norm": 0.613898787695729, "learning_rate": 4.652295355008728e-06, "loss": 0.6397, "step": 2082 }, { "epoch": 2.5495716034271725, "grad_norm": 1.4213442493853043, "learning_rate": 4.651969240869895e-06, "loss": 0.4613, "step": 2083 }, { "epoch": 2.550795593635251, "grad_norm": 0.7241423234863972, "learning_rate": 4.651642985311604e-06, "loss": 0.6286, "step": 2084 }, { "epoch": 2.5520195838433293, "grad_norm": 1.3002659757692996, "learning_rate": 4.651316588355294e-06, "loss": 0.596, "step": 2085 }, { "epoch": 2.5532435740514074, "grad_norm": 0.8958377942237526, "learning_rate": 4.650990050022415e-06, "loss": 0.5603, "step": 2086 }, { "epoch": 2.554467564259486, "grad_norm": 0.8046171671990715, "learning_rate": 4.650663370334426e-06, "loss": 0.4431, "step": 2087 }, { "epoch": 2.5556915544675642, "grad_norm": 1.4507892131212132, "learning_rate": 4.6503365493127945e-06, "loss": 0.6924, "step": 2088 }, { "epoch": 2.5569155446756424, "grad_norm": 2.065000829353021, "learning_rate": 4.650009586978999e-06, "loss": 0.4829, "step": 2089 }, { "epoch": 2.558139534883721, "grad_norm": 1.2757677493302193, "learning_rate": 4.6496824833545254e-06, "loss": 0.7579, "step": 2090 }, { "epoch": 2.559363525091799, "grad_norm": 1.212731790994818, "learning_rate": 4.649355238460871e-06, "loss": 1.027, "step": 2091 }, { "epoch": 2.5605875152998774, "grad_norm": 1.0888089467420163, "learning_rate": 4.6490278523195395e-06, "loss": 0.9339, "step": 2092 }, { "epoch": 2.561811505507956, "grad_norm": 0.5234815759610121, "learning_rate": 4.6487003249520455e-06, "loss": 0.4572, "step": 2093 }, { "epoch": 2.563035495716034, "grad_norm": 0.6491394404938936, "learning_rate": 4.648372656379915e-06, "loss": 0.6298, "step": 2094 }, { "epoch": 2.5642594859241123, "grad_norm": 1.1445492213594157, "learning_rate": 4.64804484662468e-06, "loss": 0.6563, "step": 2095 }, { "epoch": 2.565483476132191, "grad_norm": 1.3857417468957276, "learning_rate": 4.647716895707881e-06, "loss": 0.5703, "step": 2096 }, { "epoch": 2.566707466340269, "grad_norm": 1.3535704607565835, "learning_rate": 4.647388803651073e-06, "loss": 0.5064, "step": 2097 }, { "epoch": 2.5679314565483478, "grad_norm": 1.1238696519048372, "learning_rate": 4.6470605704758155e-06, "loss": 0.5099, "step": 2098 }, { "epoch": 2.569155446756426, "grad_norm": 1.5234737344430358, "learning_rate": 4.646732196203679e-06, "loss": 0.5968, "step": 2099 }, { "epoch": 2.5703794369645045, "grad_norm": 0.5834258593778913, "learning_rate": 4.646403680856241e-06, "loss": 0.5622, "step": 2100 }, { "epoch": 2.5716034271725827, "grad_norm": 1.4431406327834029, "learning_rate": 4.646075024455093e-06, "loss": 0.5165, "step": 2101 }, { "epoch": 2.572827417380661, "grad_norm": 1.3054570412267728, "learning_rate": 4.645746227021833e-06, "loss": 0.6803, "step": 2102 }, { "epoch": 2.5740514075887395, "grad_norm": 1.038043509922551, "learning_rate": 4.645417288578066e-06, "loss": 0.4522, "step": 2103 }, { "epoch": 2.5752753977968177, "grad_norm": 1.2321864109612957, "learning_rate": 4.645088209145411e-06, "loss": 0.829, "step": 2104 }, { "epoch": 2.576499388004896, "grad_norm": 0.5519622620062806, "learning_rate": 4.644758988745493e-06, "loss": 0.5296, "step": 2105 }, { "epoch": 2.5777233782129745, "grad_norm": 1.3820656366175297, "learning_rate": 4.644429627399947e-06, "loss": 1.1597, "step": 2106 }, { "epoch": 2.5789473684210527, "grad_norm": 0.8421848227348346, "learning_rate": 4.644100125130418e-06, "loss": 0.5617, "step": 2107 }, { "epoch": 2.580171358629131, "grad_norm": 0.973204490085165, "learning_rate": 4.6437704819585595e-06, "loss": 0.5479, "step": 2108 }, { "epoch": 2.5813953488372094, "grad_norm": 1.138924017652126, "learning_rate": 4.643440697906033e-06, "loss": 0.4387, "step": 2109 }, { "epoch": 2.5826193390452876, "grad_norm": 0.9412391192922551, "learning_rate": 4.6431107729945126e-06, "loss": 0.5049, "step": 2110 }, { "epoch": 2.583843329253366, "grad_norm": 0.9261500306384691, "learning_rate": 4.642780707245679e-06, "loss": 0.6582, "step": 2111 }, { "epoch": 2.5850673194614444, "grad_norm": 0.9744912971493168, "learning_rate": 4.6424505006812225e-06, "loss": 0.9399, "step": 2112 }, { "epoch": 2.5862913096695226, "grad_norm": 1.7347745171051403, "learning_rate": 4.642120153322845e-06, "loss": 0.6806, "step": 2113 }, { "epoch": 2.5875152998776008, "grad_norm": 1.428678385152879, "learning_rate": 4.641789665192252e-06, "loss": 0.498, "step": 2114 }, { "epoch": 2.5887392900856794, "grad_norm": 0.8689637453607042, "learning_rate": 4.641459036311166e-06, "loss": 1.3624, "step": 2115 }, { "epoch": 2.5899632802937576, "grad_norm": 0.8764753770634701, "learning_rate": 4.641128266701311e-06, "loss": 0.7269, "step": 2116 }, { "epoch": 2.5911872705018357, "grad_norm": 0.9266386198802584, "learning_rate": 4.6407973563844274e-06, "loss": 0.5307, "step": 2117 }, { "epoch": 2.5924112607099143, "grad_norm": 1.264306659919823, "learning_rate": 4.640466305382259e-06, "loss": 0.5019, "step": 2118 }, { "epoch": 2.5936352509179925, "grad_norm": 0.8249426463975164, "learning_rate": 4.640135113716562e-06, "loss": 0.732, "step": 2119 }, { "epoch": 2.594859241126071, "grad_norm": 1.2102716489196568, "learning_rate": 4.639803781409102e-06, "loss": 0.554, "step": 2120 }, { "epoch": 2.5960832313341493, "grad_norm": 1.8983204772016684, "learning_rate": 4.639472308481651e-06, "loss": 0.4465, "step": 2121 }, { "epoch": 2.597307221542228, "grad_norm": 1.053892085320467, "learning_rate": 4.639140694955993e-06, "loss": 1.2047, "step": 2122 }, { "epoch": 2.598531211750306, "grad_norm": 1.3322411663293574, "learning_rate": 4.638808940853923e-06, "loss": 1.3746, "step": 2123 }, { "epoch": 2.5997552019583843, "grad_norm": 1.113072607181846, "learning_rate": 4.638477046197238e-06, "loss": 0.556, "step": 2124 }, { "epoch": 2.600979192166463, "grad_norm": 1.0452558193492478, "learning_rate": 4.638145011007752e-06, "loss": 1.5915, "step": 2125 }, { "epoch": 2.602203182374541, "grad_norm": 1.7101631569556102, "learning_rate": 4.637812835307284e-06, "loss": 0.8983, "step": 2126 }, { "epoch": 2.6034271725826192, "grad_norm": 1.0478733685906347, "learning_rate": 4.637480519117664e-06, "loss": 1.7102, "step": 2127 }, { "epoch": 2.604651162790698, "grad_norm": 0.9827442203121775, "learning_rate": 4.63714806246073e-06, "loss": 1.128, "step": 2128 }, { "epoch": 2.605875152998776, "grad_norm": 1.1787403419208504, "learning_rate": 4.63681546535833e-06, "loss": 0.6064, "step": 2129 }, { "epoch": 2.607099143206854, "grad_norm": 1.5854246342053935, "learning_rate": 4.636482727832322e-06, "loss": 0.4502, "step": 2130 }, { "epoch": 2.608323133414933, "grad_norm": 0.811103833794266, "learning_rate": 4.636149849904571e-06, "loss": 0.4228, "step": 2131 }, { "epoch": 2.609547123623011, "grad_norm": 1.3060986833270367, "learning_rate": 4.6358168315969536e-06, "loss": 0.7204, "step": 2132 }, { "epoch": 2.610771113831089, "grad_norm": 1.0307046918668448, "learning_rate": 4.6354836729313525e-06, "loss": 0.9887, "step": 2133 }, { "epoch": 2.611995104039168, "grad_norm": 0.9994328272538338, "learning_rate": 4.635150373929663e-06, "loss": 0.6914, "step": 2134 }, { "epoch": 2.613219094247246, "grad_norm": 1.3154941480864162, "learning_rate": 4.63481693461379e-06, "loss": 1.1409, "step": 2135 }, { "epoch": 2.614443084455324, "grad_norm": 1.60115088296653, "learning_rate": 4.634483355005642e-06, "loss": 0.4494, "step": 2136 }, { "epoch": 2.6156670746634028, "grad_norm": 1.076399643997741, "learning_rate": 4.634149635127144e-06, "loss": 0.5958, "step": 2137 }, { "epoch": 2.616891064871481, "grad_norm": 1.0069916996313146, "learning_rate": 4.633815775000226e-06, "loss": 0.5668, "step": 2138 }, { "epoch": 2.618115055079559, "grad_norm": 1.0801143560442514, "learning_rate": 4.633481774646827e-06, "loss": 0.6644, "step": 2139 }, { "epoch": 2.6193390452876377, "grad_norm": 1.1115074894600387, "learning_rate": 4.633147634088897e-06, "loss": 1.6084, "step": 2140 }, { "epoch": 2.620563035495716, "grad_norm": 0.915699651577624, "learning_rate": 4.632813353348395e-06, "loss": 0.688, "step": 2141 }, { "epoch": 2.6217870257037945, "grad_norm": 0.6651109822995419, "learning_rate": 4.632478932447288e-06, "loss": 0.4301, "step": 2142 }, { "epoch": 2.6230110159118727, "grad_norm": 0.5451588635291792, "learning_rate": 4.632144371407553e-06, "loss": 0.4424, "step": 2143 }, { "epoch": 2.6242350061199513, "grad_norm": 0.8060212855822413, "learning_rate": 4.631809670251176e-06, "loss": 0.5628, "step": 2144 }, { "epoch": 2.6254589963280295, "grad_norm": 0.8267720826299793, "learning_rate": 4.631474829000152e-06, "loss": 0.6211, "step": 2145 }, { "epoch": 2.6266829865361077, "grad_norm": 1.2719840398177333, "learning_rate": 4.631139847676487e-06, "loss": 0.4524, "step": 2146 }, { "epoch": 2.6279069767441863, "grad_norm": 1.147529101825175, "learning_rate": 4.630804726302194e-06, "loss": 0.6644, "step": 2147 }, { "epoch": 2.6291309669522644, "grad_norm": 0.9910113537444569, "learning_rate": 4.630469464899294e-06, "loss": 0.6042, "step": 2148 }, { "epoch": 2.6303549571603426, "grad_norm": 0.8194425000132584, "learning_rate": 4.630134063489822e-06, "loss": 1.1833, "step": 2149 }, { "epoch": 2.6315789473684212, "grad_norm": 1.7692079267470275, "learning_rate": 4.629798522095818e-06, "loss": 0.4135, "step": 2150 }, { "epoch": 2.6328029375764994, "grad_norm": 0.7210987684959724, "learning_rate": 4.629462840739333e-06, "loss": 0.719, "step": 2151 }, { "epoch": 2.6340269277845776, "grad_norm": 1.3266655232925069, "learning_rate": 4.629127019442426e-06, "loss": 0.6338, "step": 2152 }, { "epoch": 2.635250917992656, "grad_norm": 1.4842063916601462, "learning_rate": 4.628791058227167e-06, "loss": 0.6736, "step": 2153 }, { "epoch": 2.6364749082007344, "grad_norm": 1.5476080308147022, "learning_rate": 4.6284549571156325e-06, "loss": 0.5478, "step": 2154 }, { "epoch": 2.6376988984088126, "grad_norm": 0.9730951880799199, "learning_rate": 4.6281187161299115e-06, "loss": 0.7473, "step": 2155 }, { "epoch": 2.638922888616891, "grad_norm": 1.0073362179897392, "learning_rate": 4.627782335292099e-06, "loss": 1.3489, "step": 2156 }, { "epoch": 2.6401468788249693, "grad_norm": 1.0157339041814044, "learning_rate": 4.6274458146243026e-06, "loss": 0.8173, "step": 2157 }, { "epoch": 2.6413708690330475, "grad_norm": 1.0605597767282686, "learning_rate": 4.627109154148636e-06, "loss": 0.5332, "step": 2158 }, { "epoch": 2.642594859241126, "grad_norm": 1.084474440279894, "learning_rate": 4.626772353887223e-06, "loss": 1.2364, "step": 2159 }, { "epoch": 2.6438188494492043, "grad_norm": 1.0577835527442867, "learning_rate": 4.626435413862198e-06, "loss": 0.5233, "step": 2160 }, { "epoch": 2.6450428396572825, "grad_norm": 0.8876364224576155, "learning_rate": 4.626098334095703e-06, "loss": 0.8672, "step": 2161 }, { "epoch": 2.646266829865361, "grad_norm": 0.8010456062850537, "learning_rate": 4.625761114609888e-06, "loss": 0.6291, "step": 2162 }, { "epoch": 2.6474908200734393, "grad_norm": 1.1315509599497875, "learning_rate": 4.625423755426915e-06, "loss": 0.6149, "step": 2163 }, { "epoch": 2.648714810281518, "grad_norm": 1.021956061330222, "learning_rate": 4.6250862565689555e-06, "loss": 1.3148, "step": 2164 }, { "epoch": 2.649938800489596, "grad_norm": 1.3257506690009997, "learning_rate": 4.624748618058187e-06, "loss": 1.1076, "step": 2165 }, { "epoch": 2.6511627906976747, "grad_norm": 1.1823709950591617, "learning_rate": 4.624410839916798e-06, "loss": 0.8471, "step": 2166 }, { "epoch": 2.652386780905753, "grad_norm": 0.6507203521302883, "learning_rate": 4.624072922166987e-06, "loss": 0.5786, "step": 2167 }, { "epoch": 2.653610771113831, "grad_norm": 0.966142696279852, "learning_rate": 4.6237348648309585e-06, "loss": 0.5926, "step": 2168 }, { "epoch": 2.6548347613219097, "grad_norm": 0.6724944900159797, "learning_rate": 4.62339666793093e-06, "loss": 0.649, "step": 2169 }, { "epoch": 2.656058751529988, "grad_norm": 1.168565142666766, "learning_rate": 4.623058331489127e-06, "loss": 1.6315, "step": 2170 }, { "epoch": 2.657282741738066, "grad_norm": 1.2719328927201698, "learning_rate": 4.622719855527784e-06, "loss": 0.616, "step": 2171 }, { "epoch": 2.6585067319461446, "grad_norm": 1.344854066804907, "learning_rate": 4.622381240069142e-06, "loss": 1.0501, "step": 2172 }, { "epoch": 2.659730722154223, "grad_norm": 1.2125485941656362, "learning_rate": 4.6220424851354565e-06, "loss": 0.611, "step": 2173 }, { "epoch": 2.660954712362301, "grad_norm": 1.4218069751659992, "learning_rate": 4.621703590748987e-06, "loss": 0.7035, "step": 2174 }, { "epoch": 2.6621787025703796, "grad_norm": 1.0594397384972658, "learning_rate": 4.621364556932005e-06, "loss": 0.5976, "step": 2175 }, { "epoch": 2.6634026927784578, "grad_norm": 0.7992949863966209, "learning_rate": 4.621025383706791e-06, "loss": 0.3578, "step": 2176 }, { "epoch": 2.664626682986536, "grad_norm": 0.7823159089600693, "learning_rate": 4.620686071095634e-06, "loss": 0.7954, "step": 2177 }, { "epoch": 2.6658506731946146, "grad_norm": 1.3886335651871597, "learning_rate": 4.620346619120831e-06, "loss": 0.5775, "step": 2178 }, { "epoch": 2.6670746634026927, "grad_norm": 1.175061970440519, "learning_rate": 4.620007027804692e-06, "loss": 0.7875, "step": 2179 }, { "epoch": 2.668298653610771, "grad_norm": 1.1149459901827092, "learning_rate": 4.619667297169532e-06, "loss": 0.5361, "step": 2180 }, { "epoch": 2.6695226438188495, "grad_norm": 1.2546370272138538, "learning_rate": 4.619327427237678e-06, "loss": 1.3836, "step": 2181 }, { "epoch": 2.6707466340269277, "grad_norm": 1.2139980903232455, "learning_rate": 4.618987418031464e-06, "loss": 1.1049, "step": 2182 }, { "epoch": 2.671970624235006, "grad_norm": 0.8111423860226766, "learning_rate": 4.618647269573234e-06, "loss": 0.787, "step": 2183 }, { "epoch": 2.6731946144430845, "grad_norm": 1.0052890159337822, "learning_rate": 4.618306981885343e-06, "loss": 0.8031, "step": 2184 }, { "epoch": 2.6744186046511627, "grad_norm": 0.718479367574715, "learning_rate": 4.617966554990151e-06, "loss": 0.6532, "step": 2185 }, { "epoch": 2.6756425948592413, "grad_norm": 0.9307897768065131, "learning_rate": 4.617625988910031e-06, "loss": 1.02, "step": 2186 }, { "epoch": 2.6768665850673194, "grad_norm": 0.7953223888724118, "learning_rate": 4.6172852836673645e-06, "loss": 0.6864, "step": 2187 }, { "epoch": 2.678090575275398, "grad_norm": 1.116667893658052, "learning_rate": 4.61694443928454e-06, "loss": 0.4732, "step": 2188 }, { "epoch": 2.6793145654834762, "grad_norm": 1.3605382580754222, "learning_rate": 4.6166034557839564e-06, "loss": 0.5321, "step": 2189 }, { "epoch": 2.6805385556915544, "grad_norm": 0.9116926354367513, "learning_rate": 4.616262333188023e-06, "loss": 0.5599, "step": 2190 }, { "epoch": 2.681762545899633, "grad_norm": 1.2314200615173392, "learning_rate": 4.615921071519156e-06, "loss": 0.7003, "step": 2191 }, { "epoch": 2.682986536107711, "grad_norm": 0.9877864767465424, "learning_rate": 4.615579670799783e-06, "loss": 0.5615, "step": 2192 }, { "epoch": 2.6842105263157894, "grad_norm": 0.9090137941070238, "learning_rate": 4.615238131052339e-06, "loss": 0.5243, "step": 2193 }, { "epoch": 2.685434516523868, "grad_norm": 1.1611064069556565, "learning_rate": 4.614896452299269e-06, "loss": 1.0496, "step": 2194 }, { "epoch": 2.686658506731946, "grad_norm": 1.7218852099072395, "learning_rate": 4.614554634563026e-06, "loss": 0.8886, "step": 2195 }, { "epoch": 2.6878824969400243, "grad_norm": 0.9583011073411407, "learning_rate": 4.614212677866073e-06, "loss": 0.5189, "step": 2196 }, { "epoch": 2.689106487148103, "grad_norm": 1.179327323885605, "learning_rate": 4.6138705822308845e-06, "loss": 1.1716, "step": 2197 }, { "epoch": 2.690330477356181, "grad_norm": 0.8044303267553937, "learning_rate": 4.613528347679938e-06, "loss": 1.0175, "step": 2198 }, { "epoch": 2.6915544675642593, "grad_norm": 1.3198960272056917, "learning_rate": 4.613185974235728e-06, "loss": 1.1914, "step": 2199 }, { "epoch": 2.692778457772338, "grad_norm": 1.150438155135514, "learning_rate": 4.61284346192075e-06, "loss": 0.6387, "step": 2200 }, { "epoch": 2.694002447980416, "grad_norm": 0.8815506167757904, "learning_rate": 4.6125008107575165e-06, "loss": 0.6185, "step": 2201 }, { "epoch": 2.6952264381884943, "grad_norm": 1.8015984886502048, "learning_rate": 4.612158020768542e-06, "loss": 0.5164, "step": 2202 }, { "epoch": 2.696450428396573, "grad_norm": 0.7105515922917889, "learning_rate": 4.611815091976355e-06, "loss": 0.4845, "step": 2203 }, { "epoch": 2.697674418604651, "grad_norm": 1.263268621234841, "learning_rate": 4.611472024403491e-06, "loss": 0.5095, "step": 2204 }, { "epoch": 2.6988984088127292, "grad_norm": 1.1299113743075342, "learning_rate": 4.611128818072496e-06, "loss": 0.5199, "step": 2205 }, { "epoch": 2.700122399020808, "grad_norm": 0.998282353987232, "learning_rate": 4.610785473005923e-06, "loss": 0.49, "step": 2206 }, { "epoch": 2.701346389228886, "grad_norm": 1.661492000975268, "learning_rate": 4.610441989226336e-06, "loss": 0.4674, "step": 2207 }, { "epoch": 2.7025703794369647, "grad_norm": 0.8247924981345542, "learning_rate": 4.610098366756308e-06, "loss": 0.7629, "step": 2208 }, { "epoch": 2.703794369645043, "grad_norm": 1.086138293508475, "learning_rate": 4.609754605618419e-06, "loss": 1.2668, "step": 2209 }, { "epoch": 2.7050183598531214, "grad_norm": 1.2785328721665017, "learning_rate": 4.609410705835261e-06, "loss": 0.5645, "step": 2210 }, { "epoch": 2.7062423500611996, "grad_norm": 0.9603712212398866, "learning_rate": 4.609066667429435e-06, "loss": 0.6446, "step": 2211 }, { "epoch": 2.707466340269278, "grad_norm": 0.8926265725118112, "learning_rate": 4.6087224904235475e-06, "loss": 0.7601, "step": 2212 }, { "epoch": 2.7086903304773564, "grad_norm": 0.857920306137564, "learning_rate": 4.6083781748402165e-06, "loss": 0.7179, "step": 2213 }, { "epoch": 2.7099143206854346, "grad_norm": 0.8287416172133364, "learning_rate": 4.608033720702072e-06, "loss": 0.3788, "step": 2214 }, { "epoch": 2.7111383108935128, "grad_norm": 2.0643575744888283, "learning_rate": 4.607689128031747e-06, "loss": 0.5858, "step": 2215 }, { "epoch": 2.7123623011015914, "grad_norm": 1.1590016676509982, "learning_rate": 4.6073443968518894e-06, "loss": 0.8128, "step": 2216 }, { "epoch": 2.7135862913096696, "grad_norm": 1.117295246761789, "learning_rate": 4.606999527185152e-06, "loss": 1.6738, "step": 2217 }, { "epoch": 2.7148102815177477, "grad_norm": 1.4590982023502928, "learning_rate": 4.606654519054199e-06, "loss": 0.425, "step": 2218 }, { "epoch": 2.7160342717258263, "grad_norm": 0.7570633227803636, "learning_rate": 4.606309372481703e-06, "loss": 0.5234, "step": 2219 }, { "epoch": 2.7172582619339045, "grad_norm": 0.6862282775088852, "learning_rate": 4.605964087490346e-06, "loss": 0.4874, "step": 2220 }, { "epoch": 2.7184822521419827, "grad_norm": 0.8190324031988453, "learning_rate": 4.605618664102818e-06, "loss": 0.4168, "step": 2221 }, { "epoch": 2.7197062423500613, "grad_norm": 0.8403264398358936, "learning_rate": 4.60527310234182e-06, "loss": 0.615, "step": 2222 }, { "epoch": 2.7209302325581395, "grad_norm": 1.413856088711858, "learning_rate": 4.604927402230061e-06, "loss": 0.8246, "step": 2223 }, { "epoch": 2.7221542227662177, "grad_norm": 1.0743724007112436, "learning_rate": 4.604581563790258e-06, "loss": 0.7938, "step": 2224 }, { "epoch": 2.7233782129742963, "grad_norm": 1.3102138301835118, "learning_rate": 4.60423558704514e-06, "loss": 0.457, "step": 2225 }, { "epoch": 2.7246022031823744, "grad_norm": 1.0664313571969872, "learning_rate": 4.603889472017441e-06, "loss": 2.3426, "step": 2226 }, { "epoch": 2.7258261933904526, "grad_norm": 1.3224402311265275, "learning_rate": 4.603543218729908e-06, "loss": 0.4438, "step": 2227 }, { "epoch": 2.7270501835985312, "grad_norm": 0.9699778630985946, "learning_rate": 4.603196827205295e-06, "loss": 1.0323, "step": 2228 }, { "epoch": 2.7282741738066094, "grad_norm": 0.7747721059239158, "learning_rate": 4.602850297466366e-06, "loss": 1.0796, "step": 2229 }, { "epoch": 2.729498164014688, "grad_norm": 1.2622997414562305, "learning_rate": 4.602503629535893e-06, "loss": 0.8345, "step": 2230 }, { "epoch": 2.730722154222766, "grad_norm": 1.0969158387174398, "learning_rate": 4.6021568234366585e-06, "loss": 0.6197, "step": 2231 }, { "epoch": 2.731946144430845, "grad_norm": 1.040415349158388, "learning_rate": 4.601809879191452e-06, "loss": 0.4532, "step": 2232 }, { "epoch": 2.733170134638923, "grad_norm": 0.7887428701695565, "learning_rate": 4.6014627968230755e-06, "loss": 0.8087, "step": 2233 }, { "epoch": 2.734394124847001, "grad_norm": 0.9648800372527141, "learning_rate": 4.601115576354336e-06, "loss": 0.5799, "step": 2234 }, { "epoch": 2.73561811505508, "grad_norm": 1.1590714379858513, "learning_rate": 4.600768217808053e-06, "loss": 0.5621, "step": 2235 }, { "epoch": 2.736842105263158, "grad_norm": 1.0741543526118316, "learning_rate": 4.600420721207053e-06, "loss": 0.557, "step": 2236 }, { "epoch": 2.738066095471236, "grad_norm": 0.9037531903754719, "learning_rate": 4.600073086574171e-06, "loss": 0.4592, "step": 2237 }, { "epoch": 2.7392900856793148, "grad_norm": 1.0856531550094826, "learning_rate": 4.599725313932254e-06, "loss": 1.0754, "step": 2238 }, { "epoch": 2.740514075887393, "grad_norm": 0.6668802893545525, "learning_rate": 4.599377403304156e-06, "loss": 0.4936, "step": 2239 }, { "epoch": 2.741738066095471, "grad_norm": 1.148784380288063, "learning_rate": 4.59902935471274e-06, "loss": 0.6099, "step": 2240 }, { "epoch": 2.7429620563035497, "grad_norm": 0.8239190422818863, "learning_rate": 4.598681168180879e-06, "loss": 0.6652, "step": 2241 }, { "epoch": 2.744186046511628, "grad_norm": 1.3137469417793242, "learning_rate": 4.598332843731453e-06, "loss": 0.5213, "step": 2242 }, { "epoch": 2.745410036719706, "grad_norm": 0.9468270621698734, "learning_rate": 4.597984381387354e-06, "loss": 0.9203, "step": 2243 }, { "epoch": 2.7466340269277847, "grad_norm": 1.0880887778647201, "learning_rate": 4.597635781171482e-06, "loss": 1.5164, "step": 2244 }, { "epoch": 2.747858017135863, "grad_norm": 0.5814260077638449, "learning_rate": 4.5972870431067445e-06, "loss": 0.4183, "step": 2245 }, { "epoch": 2.749082007343941, "grad_norm": 1.310491874131183, "learning_rate": 4.5969381672160595e-06, "loss": 1.2752, "step": 2246 }, { "epoch": 2.7503059975520197, "grad_norm": 1.6686413193395975, "learning_rate": 4.596589153522355e-06, "loss": 0.5235, "step": 2247 }, { "epoch": 2.751529987760098, "grad_norm": 0.7873642646520717, "learning_rate": 4.5962400020485665e-06, "loss": 0.8017, "step": 2248 }, { "epoch": 2.752753977968176, "grad_norm": 1.2956362462731228, "learning_rate": 4.595890712817638e-06, "loss": 0.8232, "step": 2249 }, { "epoch": 2.7539779681762546, "grad_norm": 0.822849908110494, "learning_rate": 4.5955412858525234e-06, "loss": 0.8067, "step": 2250 }, { "epoch": 2.755201958384333, "grad_norm": 1.2634016887887665, "learning_rate": 4.595191721176187e-06, "loss": 0.3891, "step": 2251 }, { "epoch": 2.7564259485924114, "grad_norm": 0.6899623472126725, "learning_rate": 4.5948420188116e-06, "loss": 0.4871, "step": 2252 }, { "epoch": 2.7576499388004896, "grad_norm": 0.658962596982903, "learning_rate": 4.594492178781744e-06, "loss": 0.6795, "step": 2253 }, { "epoch": 2.758873929008568, "grad_norm": 1.0216352817911467, "learning_rate": 4.5941422011096085e-06, "loss": 0.5991, "step": 2254 }, { "epoch": 2.7600979192166464, "grad_norm": 1.035605098942707, "learning_rate": 4.593792085818194e-06, "loss": 1.365, "step": 2255 }, { "epoch": 2.7613219094247246, "grad_norm": 0.804696328358046, "learning_rate": 4.593441832930507e-06, "loss": 0.7315, "step": 2256 }, { "epoch": 2.762545899632803, "grad_norm": 1.0066518364921733, "learning_rate": 4.593091442469567e-06, "loss": 0.6171, "step": 2257 }, { "epoch": 2.7637698898408813, "grad_norm": 1.0754940594927935, "learning_rate": 4.592740914458399e-06, "loss": 0.6011, "step": 2258 }, { "epoch": 2.7649938800489595, "grad_norm": 1.3505625346939523, "learning_rate": 4.5923902489200375e-06, "loss": 1.0946, "step": 2259 }, { "epoch": 2.766217870257038, "grad_norm": 0.7686285404360617, "learning_rate": 4.59203944587753e-06, "loss": 0.5548, "step": 2260 }, { "epoch": 2.7674418604651163, "grad_norm": 1.9457788722992384, "learning_rate": 4.5916885053539265e-06, "loss": 0.4643, "step": 2261 }, { "epoch": 2.7686658506731945, "grad_norm": 0.6454646889900293, "learning_rate": 4.591337427372291e-06, "loss": 0.4094, "step": 2262 }, { "epoch": 2.769889840881273, "grad_norm": 0.9204165251327752, "learning_rate": 4.590986211955696e-06, "loss": 0.5843, "step": 2263 }, { "epoch": 2.7711138310893513, "grad_norm": 1.6162005424265886, "learning_rate": 4.590634859127221e-06, "loss": 0.601, "step": 2264 }, { "epoch": 2.7723378212974294, "grad_norm": 0.8096770319473827, "learning_rate": 4.590283368909955e-06, "loss": 0.5159, "step": 2265 }, { "epoch": 2.773561811505508, "grad_norm": 1.1541709038458718, "learning_rate": 4.5899317413269985e-06, "loss": 1.428, "step": 2266 }, { "epoch": 2.7747858017135862, "grad_norm": 1.7885055755938009, "learning_rate": 4.589579976401457e-06, "loss": 0.5631, "step": 2267 }, { "epoch": 2.7760097919216644, "grad_norm": 1.032566638707591, "learning_rate": 4.589228074156449e-06, "loss": 0.5885, "step": 2268 }, { "epoch": 2.777233782129743, "grad_norm": 0.7628429913513141, "learning_rate": 4.5888760346150996e-06, "loss": 0.6279, "step": 2269 }, { "epoch": 2.778457772337821, "grad_norm": 0.7455540974133628, "learning_rate": 4.588523857800543e-06, "loss": 0.6725, "step": 2270 }, { "epoch": 2.7796817625458994, "grad_norm": 0.8248441580188699, "learning_rate": 4.588171543735923e-06, "loss": 0.5995, "step": 2271 }, { "epoch": 2.780905752753978, "grad_norm": 0.7433673929799363, "learning_rate": 4.587819092444393e-06, "loss": 0.5333, "step": 2272 }, { "epoch": 2.782129742962056, "grad_norm": 1.519701228297172, "learning_rate": 4.587466503949115e-06, "loss": 0.768, "step": 2273 }, { "epoch": 2.783353733170135, "grad_norm": 1.1656685465472054, "learning_rate": 4.587113778273259e-06, "loss": 1.1489, "step": 2274 }, { "epoch": 2.784577723378213, "grad_norm": 1.3151148098914216, "learning_rate": 4.586760915440006e-06, "loss": 1.1794, "step": 2275 }, { "epoch": 2.7858017135862916, "grad_norm": 1.309896871608821, "learning_rate": 4.5864079154725434e-06, "loss": 1.2013, "step": 2276 }, { "epoch": 2.7870257037943698, "grad_norm": 0.9688787160185232, "learning_rate": 4.5860547783940695e-06, "loss": 0.9835, "step": 2277 }, { "epoch": 2.788249694002448, "grad_norm": 1.5116010840969039, "learning_rate": 4.585701504227792e-06, "loss": 1.1028, "step": 2278 }, { "epoch": 2.7894736842105265, "grad_norm": 1.2119840777978987, "learning_rate": 4.585348092996925e-06, "loss": 1.3838, "step": 2279 }, { "epoch": 2.7906976744186047, "grad_norm": 1.2702887399958727, "learning_rate": 4.584994544724695e-06, "loss": 0.6521, "step": 2280 }, { "epoch": 2.791921664626683, "grad_norm": 1.284024645936518, "learning_rate": 4.584640859434336e-06, "loss": 1.223, "step": 2281 }, { "epoch": 2.7931456548347615, "grad_norm": 0.6033871075379328, "learning_rate": 4.58428703714909e-06, "loss": 0.6341, "step": 2282 }, { "epoch": 2.7943696450428397, "grad_norm": 1.0820106944975485, "learning_rate": 4.5839330778922085e-06, "loss": 0.5292, "step": 2283 }, { "epoch": 2.795593635250918, "grad_norm": 1.119611054153639, "learning_rate": 4.583578981686953e-06, "loss": 0.6569, "step": 2284 }, { "epoch": 2.7968176254589965, "grad_norm": 1.0904514881992535, "learning_rate": 4.583224748556595e-06, "loss": 0.536, "step": 2285 }, { "epoch": 2.7980416156670747, "grad_norm": 1.1186132328737495, "learning_rate": 4.58287037852441e-06, "loss": 0.6915, "step": 2286 }, { "epoch": 2.799265605875153, "grad_norm": 1.227743888880496, "learning_rate": 4.582515871613688e-06, "loss": 1.3577, "step": 2287 }, { "epoch": 2.8004895960832314, "grad_norm": 0.7959143564589592, "learning_rate": 4.582161227847727e-06, "loss": 1.0136, "step": 2288 }, { "epoch": 2.8017135862913096, "grad_norm": 0.981771929690956, "learning_rate": 4.58180644724983e-06, "loss": 0.9127, "step": 2289 }, { "epoch": 2.802937576499388, "grad_norm": 1.5852002396349985, "learning_rate": 4.581451529843315e-06, "loss": 1.0343, "step": 2290 }, { "epoch": 2.8041615667074664, "grad_norm": 0.9295811380355233, "learning_rate": 4.581096475651503e-06, "loss": 0.5537, "step": 2291 }, { "epoch": 2.8053855569155446, "grad_norm": 0.7689469187901813, "learning_rate": 4.5807412846977284e-06, "loss": 0.4361, "step": 2292 }, { "epoch": 2.8066095471236228, "grad_norm": 1.4073082078724117, "learning_rate": 4.580385957005332e-06, "loss": 0.5486, "step": 2293 }, { "epoch": 2.8078335373317014, "grad_norm": 1.3424227123587704, "learning_rate": 4.580030492597666e-06, "loss": 0.7112, "step": 2294 }, { "epoch": 2.8090575275397796, "grad_norm": 0.6976926937454897, "learning_rate": 4.579674891498089e-06, "loss": 0.7067, "step": 2295 }, { "epoch": 2.810281517747858, "grad_norm": 1.2006421543190517, "learning_rate": 4.579319153729971e-06, "loss": 1.9528, "step": 2296 }, { "epoch": 2.8115055079559363, "grad_norm": 0.8914891056007683, "learning_rate": 4.578963279316689e-06, "loss": 1.1309, "step": 2297 }, { "epoch": 2.812729498164015, "grad_norm": 1.0439420133212902, "learning_rate": 4.578607268281629e-06, "loss": 0.7695, "step": 2298 }, { "epoch": 2.813953488372093, "grad_norm": 1.284679320225621, "learning_rate": 4.578251120648188e-06, "loss": 0.9219, "step": 2299 }, { "epoch": 2.8151774785801713, "grad_norm": 1.818793314465752, "learning_rate": 4.577894836439771e-06, "loss": 0.4947, "step": 2300 }, { "epoch": 2.81640146878825, "grad_norm": 1.0572098724423684, "learning_rate": 4.5775384156797904e-06, "loss": 0.5801, "step": 2301 }, { "epoch": 2.817625458996328, "grad_norm": 1.9335240813462298, "learning_rate": 4.57718185839167e-06, "loss": 0.5433, "step": 2302 }, { "epoch": 2.8188494492044063, "grad_norm": 1.0827248861118537, "learning_rate": 4.57682516459884e-06, "loss": 1.2535, "step": 2303 }, { "epoch": 2.820073439412485, "grad_norm": 1.2316971626640572, "learning_rate": 4.576468334324742e-06, "loss": 0.625, "step": 2304 }, { "epoch": 2.821297429620563, "grad_norm": 0.8461271391270831, "learning_rate": 4.576111367592825e-06, "loss": 0.5924, "step": 2305 }, { "epoch": 2.8225214198286412, "grad_norm": 1.1401648420785262, "learning_rate": 4.575754264426548e-06, "loss": 0.7052, "step": 2306 }, { "epoch": 2.82374541003672, "grad_norm": 0.782319223172386, "learning_rate": 4.57539702484938e-06, "loss": 0.4864, "step": 2307 }, { "epoch": 2.824969400244798, "grad_norm": 0.9288651647549095, "learning_rate": 4.575039648884795e-06, "loss": 1.2571, "step": 2308 }, { "epoch": 2.826193390452876, "grad_norm": 0.6459261678319496, "learning_rate": 4.574682136556278e-06, "loss": 0.3472, "step": 2309 }, { "epoch": 2.827417380660955, "grad_norm": 0.8977162777791443, "learning_rate": 4.574324487887326e-06, "loss": 0.5938, "step": 2310 }, { "epoch": 2.828641370869033, "grad_norm": 1.0766223020863774, "learning_rate": 4.5739667029014416e-06, "loss": 0.7092, "step": 2311 }, { "epoch": 2.829865361077111, "grad_norm": 2.120350913128021, "learning_rate": 4.573608781622136e-06, "loss": 0.5432, "step": 2312 }, { "epoch": 2.83108935128519, "grad_norm": 1.0656715219005652, "learning_rate": 4.57325072407293e-06, "loss": 0.5306, "step": 2313 }, { "epoch": 2.832313341493268, "grad_norm": 1.710331354721117, "learning_rate": 4.5728925302773555e-06, "loss": 0.5961, "step": 2314 }, { "epoch": 2.833537331701346, "grad_norm": 1.2285609946303768, "learning_rate": 4.572534200258951e-06, "loss": 0.4577, "step": 2315 }, { "epoch": 2.8347613219094248, "grad_norm": 1.45828512526589, "learning_rate": 4.572175734041265e-06, "loss": 0.5597, "step": 2316 }, { "epoch": 2.835985312117503, "grad_norm": 0.8745618706108752, "learning_rate": 4.5718171316478536e-06, "loss": 0.6259, "step": 2317 }, { "epoch": 2.8372093023255816, "grad_norm": 1.2185259850896366, "learning_rate": 4.571458393102284e-06, "loss": 0.8016, "step": 2318 }, { "epoch": 2.8384332925336597, "grad_norm": 1.0738050366547107, "learning_rate": 4.571099518428131e-06, "loss": 0.5175, "step": 2319 }, { "epoch": 2.8396572827417383, "grad_norm": 1.032307356219277, "learning_rate": 4.570740507648977e-06, "loss": 1.2303, "step": 2320 }, { "epoch": 2.8408812729498165, "grad_norm": 1.0457654557525675, "learning_rate": 4.570381360788416e-06, "loss": 0.547, "step": 2321 }, { "epoch": 2.8421052631578947, "grad_norm": 1.0563061778528389, "learning_rate": 4.570022077870051e-06, "loss": 0.4668, "step": 2322 }, { "epoch": 2.8433292533659733, "grad_norm": 0.9950091682488957, "learning_rate": 4.56966265891749e-06, "loss": 1.4581, "step": 2323 }, { "epoch": 2.8445532435740515, "grad_norm": 1.242646184661988, "learning_rate": 4.569303103954355e-06, "loss": 1.089, "step": 2324 }, { "epoch": 2.8457772337821297, "grad_norm": 1.1771126463862922, "learning_rate": 4.568943413004274e-06, "loss": 0.4421, "step": 2325 }, { "epoch": 2.8470012239902083, "grad_norm": 1.2617355097657599, "learning_rate": 4.568583586090884e-06, "loss": 1.1798, "step": 2326 }, { "epoch": 2.8482252141982864, "grad_norm": 1.206577388495166, "learning_rate": 4.568223623237832e-06, "loss": 0.4521, "step": 2327 }, { "epoch": 2.8494492044063646, "grad_norm": 0.7399314375368015, "learning_rate": 4.5678635244687745e-06, "loss": 0.5042, "step": 2328 }, { "epoch": 2.8506731946144432, "grad_norm": 0.8142641778886158, "learning_rate": 4.567503289807373e-06, "loss": 0.545, "step": 2329 }, { "epoch": 2.8518971848225214, "grad_norm": 1.0524887624241896, "learning_rate": 4.567142919277303e-06, "loss": 0.4204, "step": 2330 }, { "epoch": 2.8531211750305996, "grad_norm": 0.9067088699247055, "learning_rate": 4.566782412902246e-06, "loss": 0.6439, "step": 2331 }, { "epoch": 2.854345165238678, "grad_norm": 0.6372786725649205, "learning_rate": 4.5664217707058935e-06, "loss": 0.6351, "step": 2332 }, { "epoch": 2.8555691554467564, "grad_norm": 0.8330396436852358, "learning_rate": 4.566060992711946e-06, "loss": 0.8174, "step": 2333 }, { "epoch": 2.8567931456548346, "grad_norm": 1.066306959265128, "learning_rate": 4.565700078944111e-06, "loss": 0.967, "step": 2334 }, { "epoch": 2.858017135862913, "grad_norm": 1.3147404227688513, "learning_rate": 4.565339029426108e-06, "loss": 0.6488, "step": 2335 }, { "epoch": 2.8592411260709913, "grad_norm": 0.8669759813068143, "learning_rate": 4.564977844181662e-06, "loss": 0.6648, "step": 2336 }, { "epoch": 2.8604651162790695, "grad_norm": 1.575808157742632, "learning_rate": 4.564616523234511e-06, "loss": 0.4335, "step": 2337 }, { "epoch": 2.861689106487148, "grad_norm": 0.7252501385526152, "learning_rate": 4.564255066608398e-06, "loss": 0.5861, "step": 2338 }, { "epoch": 2.8629130966952263, "grad_norm": 1.1591517750813822, "learning_rate": 4.563893474327077e-06, "loss": 0.4805, "step": 2339 }, { "epoch": 2.864137086903305, "grad_norm": 0.6418717544926285, "learning_rate": 4.563531746414311e-06, "loss": 0.7536, "step": 2340 }, { "epoch": 2.865361077111383, "grad_norm": 0.8198777016790953, "learning_rate": 4.563169882893872e-06, "loss": 0.5976, "step": 2341 }, { "epoch": 2.8665850673194617, "grad_norm": 1.4521369915398115, "learning_rate": 4.5628078837895385e-06, "loss": 0.5725, "step": 2342 }, { "epoch": 2.86780905752754, "grad_norm": 1.1000540769783658, "learning_rate": 4.5624457491251e-06, "loss": 0.6789, "step": 2343 }, { "epoch": 2.869033047735618, "grad_norm": 1.1173505982683034, "learning_rate": 4.562083478924357e-06, "loss": 0.5505, "step": 2344 }, { "epoch": 2.8702570379436967, "grad_norm": 1.0904083827904085, "learning_rate": 4.561721073211114e-06, "loss": 0.5911, "step": 2345 }, { "epoch": 2.871481028151775, "grad_norm": 1.2463425666514405, "learning_rate": 4.561358532009188e-06, "loss": 0.5286, "step": 2346 }, { "epoch": 2.872705018359853, "grad_norm": 1.5983601229943538, "learning_rate": 4.560995855342404e-06, "loss": 0.4641, "step": 2347 }, { "epoch": 2.8739290085679317, "grad_norm": 1.063584641560283, "learning_rate": 4.560633043234595e-06, "loss": 0.3687, "step": 2348 }, { "epoch": 2.87515299877601, "grad_norm": 1.127785796403911, "learning_rate": 4.560270095709603e-06, "loss": 0.6608, "step": 2349 }, { "epoch": 2.876376988984088, "grad_norm": 0.6763194253536473, "learning_rate": 4.559907012791283e-06, "loss": 0.6137, "step": 2350 }, { "epoch": 2.8776009791921666, "grad_norm": 0.7638915188348195, "learning_rate": 4.559543794503492e-06, "loss": 0.74, "step": 2351 }, { "epoch": 2.878824969400245, "grad_norm": 0.95191459958052, "learning_rate": 4.5591804408701e-06, "loss": 0.5305, "step": 2352 }, { "epoch": 2.880048959608323, "grad_norm": 1.3619140292302907, "learning_rate": 4.5588169519149875e-06, "loss": 0.5195, "step": 2353 }, { "epoch": 2.8812729498164016, "grad_norm": 0.7175503780433844, "learning_rate": 4.558453327662039e-06, "loss": 0.6324, "step": 2354 }, { "epoch": 2.8824969400244798, "grad_norm": 0.7727625588792004, "learning_rate": 4.558089568135151e-06, "loss": 0.7007, "step": 2355 }, { "epoch": 2.883720930232558, "grad_norm": 1.5578224079401806, "learning_rate": 4.5577256733582296e-06, "loss": 0.5038, "step": 2356 }, { "epoch": 2.8849449204406366, "grad_norm": 0.92841940576592, "learning_rate": 4.557361643355187e-06, "loss": 0.6211, "step": 2357 }, { "epoch": 2.8861689106487147, "grad_norm": 1.1360295222483738, "learning_rate": 4.556997478149948e-06, "loss": 1.254, "step": 2358 }, { "epoch": 2.887392900856793, "grad_norm": 0.847869620891708, "learning_rate": 4.556633177766442e-06, "loss": 0.7773, "step": 2359 }, { "epoch": 2.8886168910648715, "grad_norm": 0.6852002056695864, "learning_rate": 4.556268742228612e-06, "loss": 0.6383, "step": 2360 }, { "epoch": 2.8898408812729497, "grad_norm": 0.5339475913659488, "learning_rate": 4.5559041715604045e-06, "loss": 0.3754, "step": 2361 }, { "epoch": 2.8910648714810283, "grad_norm": 0.9599298115980407, "learning_rate": 4.55553946578578e-06, "loss": 0.7596, "step": 2362 }, { "epoch": 2.8922888616891065, "grad_norm": 1.6468359131972943, "learning_rate": 4.555174624928704e-06, "loss": 1.0203, "step": 2363 }, { "epoch": 2.8935128518971847, "grad_norm": 1.4858262907575457, "learning_rate": 4.554809649013154e-06, "loss": 0.5134, "step": 2364 }, { "epoch": 2.8947368421052633, "grad_norm": 0.6596207313888333, "learning_rate": 4.554444538063113e-06, "loss": 0.5809, "step": 2365 }, { "epoch": 2.8959608323133414, "grad_norm": 1.1567952590568915, "learning_rate": 4.5540792921025765e-06, "loss": 0.6176, "step": 2366 }, { "epoch": 2.89718482252142, "grad_norm": 1.0533160458229922, "learning_rate": 4.553713911155547e-06, "loss": 1.1694, "step": 2367 }, { "epoch": 2.8984088127294982, "grad_norm": 0.6257114251881628, "learning_rate": 4.553348395246035e-06, "loss": 0.4777, "step": 2368 }, { "epoch": 2.8996328029375764, "grad_norm": 1.7274436100513606, "learning_rate": 4.552982744398061e-06, "loss": 0.5079, "step": 2369 }, { "epoch": 2.900856793145655, "grad_norm": 1.0437321355575, "learning_rate": 4.5526169586356535e-06, "loss": 0.8015, "step": 2370 }, { "epoch": 2.902080783353733, "grad_norm": 0.973767746575319, "learning_rate": 4.552251037982854e-06, "loss": 0.5242, "step": 2371 }, { "epoch": 2.9033047735618114, "grad_norm": 1.1091322131351278, "learning_rate": 4.551884982463705e-06, "loss": 0.7936, "step": 2372 }, { "epoch": 2.90452876376989, "grad_norm": 1.096043265919907, "learning_rate": 4.551518792102266e-06, "loss": 1.5329, "step": 2373 }, { "epoch": 2.905752753977968, "grad_norm": 1.1852800498459155, "learning_rate": 4.551152466922598e-06, "loss": 0.8082, "step": 2374 }, { "epoch": 2.9069767441860463, "grad_norm": 1.2646509913115278, "learning_rate": 4.550786006948778e-06, "loss": 0.6118, "step": 2375 }, { "epoch": 2.908200734394125, "grad_norm": 0.8868003466719138, "learning_rate": 4.5504194122048865e-06, "loss": 0.5518, "step": 2376 }, { "epoch": 2.909424724602203, "grad_norm": 1.1667983221973468, "learning_rate": 4.550052682715015e-06, "loss": 0.4869, "step": 2377 }, { "epoch": 2.9106487148102813, "grad_norm": 0.9050304363250656, "learning_rate": 4.5496858185032645e-06, "loss": 0.6564, "step": 2378 }, { "epoch": 2.91187270501836, "grad_norm": 0.9858405482451021, "learning_rate": 4.5493188195937425e-06, "loss": 0.5589, "step": 2379 }, { "epoch": 2.913096695226438, "grad_norm": 0.347388076962206, "learning_rate": 4.548951686010568e-06, "loss": 0.1392, "step": 2380 }, { "epoch": 2.9143206854345163, "grad_norm": 0.647667656072901, "learning_rate": 4.548584417777867e-06, "loss": 0.5714, "step": 2381 }, { "epoch": 2.915544675642595, "grad_norm": 0.9567819190547915, "learning_rate": 4.548217014919776e-06, "loss": 0.8294, "step": 2382 }, { "epoch": 2.916768665850673, "grad_norm": 1.5405976942438115, "learning_rate": 4.5478494774604375e-06, "loss": 0.5506, "step": 2383 }, { "epoch": 2.9179926560587517, "grad_norm": 1.203331396455515, "learning_rate": 4.547481805424007e-06, "loss": 0.5577, "step": 2384 }, { "epoch": 2.91921664626683, "grad_norm": 0.9631931362738453, "learning_rate": 4.5471139988346445e-06, "loss": 1.6193, "step": 2385 }, { "epoch": 2.920440636474908, "grad_norm": 1.14367129385976, "learning_rate": 4.546746057716522e-06, "loss": 1.0145, "step": 2386 }, { "epoch": 2.9216646266829867, "grad_norm": 2.0506114370874857, "learning_rate": 4.546377982093819e-06, "loss": 0.6053, "step": 2387 }, { "epoch": 2.922888616891065, "grad_norm": 1.1707923347272948, "learning_rate": 4.546009771990724e-06, "loss": 1.0609, "step": 2388 }, { "epoch": 2.9241126070991434, "grad_norm": 1.3842248201092033, "learning_rate": 4.545641427431434e-06, "loss": 0.4095, "step": 2389 }, { "epoch": 2.9253365973072216, "grad_norm": 2.271198187921925, "learning_rate": 4.545272948440157e-06, "loss": 0.4835, "step": 2390 }, { "epoch": 2.9265605875153, "grad_norm": 0.8365925933227671, "learning_rate": 4.544904335041106e-06, "loss": 0.6076, "step": 2391 }, { "epoch": 2.9277845777233784, "grad_norm": 1.0592938201503375, "learning_rate": 4.544535587258505e-06, "loss": 0.5135, "step": 2392 }, { "epoch": 2.9290085679314566, "grad_norm": 0.6737639960939096, "learning_rate": 4.5441667051165884e-06, "loss": 0.5423, "step": 2393 }, { "epoch": 2.9302325581395348, "grad_norm": 1.1510359449089556, "learning_rate": 4.543797688639596e-06, "loss": 0.9885, "step": 2394 }, { "epoch": 2.9314565483476134, "grad_norm": 1.2491548110517527, "learning_rate": 4.54342853785178e-06, "loss": 0.579, "step": 2395 }, { "epoch": 2.9326805385556916, "grad_norm": 1.187906424336536, "learning_rate": 4.543059252777397e-06, "loss": 0.609, "step": 2396 }, { "epoch": 2.9339045287637697, "grad_norm": 1.2190365719655702, "learning_rate": 4.5426898334407184e-06, "loss": 0.9918, "step": 2397 }, { "epoch": 2.9351285189718483, "grad_norm": 1.5349285748240722, "learning_rate": 4.5423202798660184e-06, "loss": 1.0989, "step": 2398 }, { "epoch": 2.9363525091799265, "grad_norm": 1.574410951864215, "learning_rate": 4.541950592077584e-06, "loss": 0.6781, "step": 2399 }, { "epoch": 2.9375764993880047, "grad_norm": 0.9438098353460729, "learning_rate": 4.541580770099709e-06, "loss": 0.9502, "step": 2400 }, { "epoch": 2.9388004895960833, "grad_norm": 1.6018661666655294, "learning_rate": 4.5412108139566974e-06, "loss": 0.5958, "step": 2401 }, { "epoch": 2.9400244798041615, "grad_norm": 0.9403106439557926, "learning_rate": 4.540840723672861e-06, "loss": 0.416, "step": 2402 }, { "epoch": 2.9412484700122397, "grad_norm": 0.6277460192136504, "learning_rate": 4.54047049927252e-06, "loss": 0.432, "step": 2403 }, { "epoch": 2.9424724602203183, "grad_norm": 0.7025677598181642, "learning_rate": 4.540100140780006e-06, "loss": 0.6174, "step": 2404 }, { "epoch": 2.9436964504283964, "grad_norm": 1.0982753496696072, "learning_rate": 4.539729648219656e-06, "loss": 1.4724, "step": 2405 }, { "epoch": 2.944920440636475, "grad_norm": 1.550322998390007, "learning_rate": 4.539359021615819e-06, "loss": 0.8303, "step": 2406 }, { "epoch": 2.9461444308445532, "grad_norm": 1.3857986336331334, "learning_rate": 4.538988260992849e-06, "loss": 0.4802, "step": 2407 }, { "epoch": 2.9473684210526314, "grad_norm": 1.9021696068639997, "learning_rate": 4.538617366375112e-06, "loss": 0.7821, "step": 2408 }, { "epoch": 2.94859241126071, "grad_norm": 1.024451732034074, "learning_rate": 4.538246337786982e-06, "loss": 1.1471, "step": 2409 }, { "epoch": 2.949816401468788, "grad_norm": 1.0368732550294646, "learning_rate": 4.537875175252842e-06, "loss": 0.3588, "step": 2410 }, { "epoch": 2.951040391676867, "grad_norm": 1.6186569167940048, "learning_rate": 4.5375038787970835e-06, "loss": 1.0087, "step": 2411 }, { "epoch": 2.952264381884945, "grad_norm": 1.2576114415175097, "learning_rate": 4.537132448444106e-06, "loss": 0.8915, "step": 2412 }, { "epoch": 2.953488372093023, "grad_norm": 0.6530428496262475, "learning_rate": 4.536760884218319e-06, "loss": 0.5514, "step": 2413 }, { "epoch": 2.954712362301102, "grad_norm": 0.7735854861064247, "learning_rate": 4.53638918614414e-06, "loss": 0.5915, "step": 2414 }, { "epoch": 2.95593635250918, "grad_norm": 0.8801333205421741, "learning_rate": 4.5360173542459965e-06, "loss": 1.2085, "step": 2415 }, { "epoch": 2.957160342717258, "grad_norm": 0.8480761150488775, "learning_rate": 4.535645388548322e-06, "loss": 0.6868, "step": 2416 }, { "epoch": 2.9583843329253368, "grad_norm": 1.6718946179672758, "learning_rate": 4.5352732890755625e-06, "loss": 0.6837, "step": 2417 }, { "epoch": 2.959608323133415, "grad_norm": 1.721050113237689, "learning_rate": 4.53490105585217e-06, "loss": 0.5752, "step": 2418 }, { "epoch": 2.960832313341493, "grad_norm": 1.784178764002309, "learning_rate": 4.534528688902606e-06, "loss": 0.6267, "step": 2419 }, { "epoch": 2.9620563035495717, "grad_norm": 0.6186555727144681, "learning_rate": 4.534156188251343e-06, "loss": 0.6177, "step": 2420 }, { "epoch": 2.96328029375765, "grad_norm": 1.092749622573756, "learning_rate": 4.533783553922859e-06, "loss": 0.7537, "step": 2421 }, { "epoch": 2.964504283965728, "grad_norm": 0.7654709556096652, "learning_rate": 4.5334107859416424e-06, "loss": 0.6475, "step": 2422 }, { "epoch": 2.9657282741738067, "grad_norm": 0.8022229025155242, "learning_rate": 4.53303788433219e-06, "loss": 0.5166, "step": 2423 }, { "epoch": 2.966952264381885, "grad_norm": 2.1331389095439417, "learning_rate": 4.532664849119006e-06, "loss": 0.5149, "step": 2424 }, { "epoch": 2.968176254589963, "grad_norm": 0.6929950322415823, "learning_rate": 4.532291680326608e-06, "loss": 0.6014, "step": 2425 }, { "epoch": 2.9694002447980417, "grad_norm": 1.1747462017978718, "learning_rate": 4.531918377979517e-06, "loss": 0.5544, "step": 2426 }, { "epoch": 2.97062423500612, "grad_norm": 1.3660308124489196, "learning_rate": 4.531544942102267e-06, "loss": 0.6631, "step": 2427 }, { "epoch": 2.9718482252141984, "grad_norm": 0.4960179000828608, "learning_rate": 4.531171372719396e-06, "loss": 0.3815, "step": 2428 }, { "epoch": 2.9730722154222766, "grad_norm": 1.2339209623528586, "learning_rate": 4.530797669855456e-06, "loss": 0.6975, "step": 2429 }, { "epoch": 2.974296205630355, "grad_norm": 1.112689387111434, "learning_rate": 4.530423833535004e-06, "loss": 0.5287, "step": 2430 }, { "epoch": 2.9755201958384334, "grad_norm": 1.4224290995265993, "learning_rate": 4.5300498637826084e-06, "loss": 0.5761, "step": 2431 }, { "epoch": 2.9767441860465116, "grad_norm": 0.9889927076445122, "learning_rate": 4.529675760622844e-06, "loss": 0.5268, "step": 2432 }, { "epoch": 2.97796817625459, "grad_norm": 1.84058324507547, "learning_rate": 4.529301524080295e-06, "loss": 0.9563, "step": 2433 }, { "epoch": 2.9791921664626684, "grad_norm": 1.3064091011209367, "learning_rate": 4.528927154179557e-06, "loss": 0.7804, "step": 2434 }, { "epoch": 2.9804161566707466, "grad_norm": 1.287044932182841, "learning_rate": 4.5285526509452304e-06, "loss": 0.621, "step": 2435 }, { "epoch": 2.981640146878825, "grad_norm": 0.928779472641578, "learning_rate": 4.5281780144019275e-06, "loss": 0.8027, "step": 2436 }, { "epoch": 2.9828641370869033, "grad_norm": 1.0854465457516853, "learning_rate": 4.527803244574266e-06, "loss": 0.7787, "step": 2437 }, { "epoch": 2.9840881272949815, "grad_norm": 1.2416857524175433, "learning_rate": 4.527428341486876e-06, "loss": 0.5933, "step": 2438 }, { "epoch": 2.98531211750306, "grad_norm": 1.032605911127453, "learning_rate": 4.5270533051643944e-06, "loss": 1.303, "step": 2439 }, { "epoch": 2.9865361077111383, "grad_norm": 1.899655168161689, "learning_rate": 4.526678135631467e-06, "loss": 0.4806, "step": 2440 }, { "epoch": 2.9877600979192165, "grad_norm": 1.8746054501030678, "learning_rate": 4.526302832912749e-06, "loss": 0.6804, "step": 2441 }, { "epoch": 2.988984088127295, "grad_norm": 1.2445173536930647, "learning_rate": 4.525927397032903e-06, "loss": 0.5529, "step": 2442 }, { "epoch": 2.9902080783353733, "grad_norm": 0.4786375751365313, "learning_rate": 4.525551828016602e-06, "loss": 0.2736, "step": 2443 }, { "epoch": 2.9914320685434515, "grad_norm": 0.8028999296308257, "learning_rate": 4.525176125888528e-06, "loss": 0.4787, "step": 2444 }, { "epoch": 2.99265605875153, "grad_norm": 0.897776074380303, "learning_rate": 4.524800290673369e-06, "loss": 0.4897, "step": 2445 }, { "epoch": 2.9938800489596082, "grad_norm": 0.8093236665566064, "learning_rate": 4.524424322395824e-06, "loss": 0.5761, "step": 2446 }, { "epoch": 2.9951040391676864, "grad_norm": 0.9240487025444044, "learning_rate": 4.5240482210806e-06, "loss": 1.1636, "step": 2447 }, { "epoch": 2.996328029375765, "grad_norm": 0.9204495942975788, "learning_rate": 4.523671986752413e-06, "loss": 0.5325, "step": 2448 }, { "epoch": 2.997552019583843, "grad_norm": 1.3846168551586802, "learning_rate": 4.523295619435989e-06, "loss": 0.5384, "step": 2449 }, { "epoch": 2.998776009791922, "grad_norm": 1.1191577653609084, "learning_rate": 4.52291911915606e-06, "loss": 0.5962, "step": 2450 }, { "epoch": 3.0, "grad_norm": 0.85819815943381, "learning_rate": 4.522542485937369e-06, "loss": 0.4979, "step": 2451 }, { "epoch": 3.001223990208078, "grad_norm": 1.5019726139100966, "learning_rate": 4.522165719804666e-06, "loss": 1.0324, "step": 2452 }, { "epoch": 3.002447980416157, "grad_norm": 0.7276967813393894, "learning_rate": 4.521788820782713e-06, "loss": 0.6414, "step": 2453 }, { "epoch": 3.003671970624235, "grad_norm": 1.2969745308777048, "learning_rate": 4.521411788896276e-06, "loss": 0.5061, "step": 2454 }, { "epoch": 3.004895960832313, "grad_norm": 0.6312288500058904, "learning_rate": 4.521034624170132e-06, "loss": 0.427, "step": 2455 }, { "epoch": 3.0061199510403918, "grad_norm": 1.0776205534979248, "learning_rate": 4.520657326629069e-06, "loss": 0.7401, "step": 2456 }, { "epoch": 3.00734394124847, "grad_norm": 0.6637683593237725, "learning_rate": 4.520279896297879e-06, "loss": 0.5784, "step": 2457 }, { "epoch": 3.0085679314565485, "grad_norm": 0.7162968128681916, "learning_rate": 4.519902333201368e-06, "loss": 0.5452, "step": 2458 }, { "epoch": 3.0097919216646267, "grad_norm": 0.9430975387522736, "learning_rate": 4.519524637364346e-06, "loss": 0.9417, "step": 2459 }, { "epoch": 3.011015911872705, "grad_norm": 0.6508567482485539, "learning_rate": 4.5191468088116355e-06, "loss": 0.4229, "step": 2460 }, { "epoch": 3.0122399020807835, "grad_norm": 0.984553097597724, "learning_rate": 4.5187688475680635e-06, "loss": 1.0287, "step": 2461 }, { "epoch": 3.0134638922888617, "grad_norm": 1.3459606202401266, "learning_rate": 4.518390753658471e-06, "loss": 1.0202, "step": 2462 }, { "epoch": 3.01468788249694, "grad_norm": 1.2906910635698514, "learning_rate": 4.518012527107704e-06, "loss": 0.6743, "step": 2463 }, { "epoch": 3.0159118727050185, "grad_norm": 0.9036588273704346, "learning_rate": 4.517634167940616e-06, "loss": 0.2634, "step": 2464 }, { "epoch": 3.0171358629130967, "grad_norm": 0.818827425477907, "learning_rate": 4.517255676182075e-06, "loss": 0.5027, "step": 2465 }, { "epoch": 3.018359853121175, "grad_norm": 1.1386835512152047, "learning_rate": 4.5168770518569515e-06, "loss": 0.691, "step": 2466 }, { "epoch": 3.0195838433292534, "grad_norm": 1.15605125838173, "learning_rate": 4.516498294990128e-06, "loss": 0.6235, "step": 2467 }, { "epoch": 3.0208078335373316, "grad_norm": 1.425243656840284, "learning_rate": 4.516119405606496e-06, "loss": 0.5339, "step": 2468 }, { "epoch": 3.0220318237454102, "grad_norm": 1.0460744036623735, "learning_rate": 4.5157403837309525e-06, "loss": 0.5516, "step": 2469 }, { "epoch": 3.0232558139534884, "grad_norm": 1.1877957005457382, "learning_rate": 4.515361229388408e-06, "loss": 0.409, "step": 2470 }, { "epoch": 3.0244798041615666, "grad_norm": 1.1464369534972503, "learning_rate": 4.514981942603777e-06, "loss": 0.6514, "step": 2471 }, { "epoch": 3.025703794369645, "grad_norm": 0.7981855813485291, "learning_rate": 4.514602523401986e-06, "loss": 0.5469, "step": 2472 }, { "epoch": 3.0269277845777234, "grad_norm": 1.1032676079549244, "learning_rate": 4.514222971807968e-06, "loss": 1.1575, "step": 2473 }, { "epoch": 3.0281517747858016, "grad_norm": 0.8141042808455591, "learning_rate": 4.513843287846666e-06, "loss": 0.6778, "step": 2474 }, { "epoch": 3.02937576499388, "grad_norm": 1.7813718926888147, "learning_rate": 4.513463471543032e-06, "loss": 1.022, "step": 2475 }, { "epoch": 3.0305997552019583, "grad_norm": 1.0129620810447315, "learning_rate": 4.513083522922027e-06, "loss": 1.0367, "step": 2476 }, { "epoch": 3.0318237454100365, "grad_norm": 0.616622859682934, "learning_rate": 4.512703442008618e-06, "loss": 0.4764, "step": 2477 }, { "epoch": 3.033047735618115, "grad_norm": 1.0490207416774018, "learning_rate": 4.512323228827784e-06, "loss": 1.3547, "step": 2478 }, { "epoch": 3.0342717258261933, "grad_norm": 1.1010133369367903, "learning_rate": 4.51194288340451e-06, "loss": 0.4812, "step": 2479 }, { "epoch": 3.035495716034272, "grad_norm": 1.1483409449005981, "learning_rate": 4.511562405763791e-06, "loss": 0.5555, "step": 2480 }, { "epoch": 3.03671970624235, "grad_norm": 1.198067024840011, "learning_rate": 4.511181795930631e-06, "loss": 0.598, "step": 2481 }, { "epoch": 3.0379436964504283, "grad_norm": 0.7862662538729204, "learning_rate": 4.510801053930043e-06, "loss": 1.0739, "step": 2482 }, { "epoch": 3.039167686658507, "grad_norm": 1.3339539002281275, "learning_rate": 4.510420179787047e-06, "loss": 1.214, "step": 2483 }, { "epoch": 3.040391676866585, "grad_norm": 0.8750390623351249, "learning_rate": 4.510039173526672e-06, "loss": 0.3703, "step": 2484 }, { "epoch": 3.0416156670746632, "grad_norm": 1.0945668105446344, "learning_rate": 4.509658035173957e-06, "loss": 0.5927, "step": 2485 }, { "epoch": 3.042839657282742, "grad_norm": 1.5574197083877737, "learning_rate": 4.50927676475395e-06, "loss": 0.5002, "step": 2486 }, { "epoch": 3.04406364749082, "grad_norm": 1.7709621078870266, "learning_rate": 4.5088953622917055e-06, "loss": 0.5305, "step": 2487 }, { "epoch": 3.045287637698898, "grad_norm": 0.6207389116696249, "learning_rate": 4.508513827812289e-06, "loss": 0.4737, "step": 2488 }, { "epoch": 3.046511627906977, "grad_norm": 0.6652657337315278, "learning_rate": 4.508132161340772e-06, "loss": 0.5493, "step": 2489 }, { "epoch": 3.047735618115055, "grad_norm": 0.795351113668382, "learning_rate": 4.507750362902236e-06, "loss": 0.5223, "step": 2490 }, { "epoch": 3.0489596083231336, "grad_norm": 0.8317659191339594, "learning_rate": 4.507368432521774e-06, "loss": 0.4068, "step": 2491 }, { "epoch": 3.050183598531212, "grad_norm": 1.079267609325809, "learning_rate": 4.506986370224482e-06, "loss": 0.5277, "step": 2492 }, { "epoch": 3.05140758873929, "grad_norm": 1.6943907060848709, "learning_rate": 4.50660417603547e-06, "loss": 1.0054, "step": 2493 }, { "epoch": 3.0526315789473686, "grad_norm": 1.0852775347509074, "learning_rate": 4.506221849979853e-06, "loss": 0.4409, "step": 2494 }, { "epoch": 3.0538555691554468, "grad_norm": 0.8319120033234779, "learning_rate": 4.505839392082757e-06, "loss": 0.485, "step": 2495 }, { "epoch": 3.055079559363525, "grad_norm": 0.9905877314093634, "learning_rate": 4.505456802369315e-06, "loss": 0.6807, "step": 2496 }, { "epoch": 3.0563035495716036, "grad_norm": 0.9877651012880011, "learning_rate": 4.5050740808646685e-06, "loss": 0.737, "step": 2497 }, { "epoch": 3.0575275397796817, "grad_norm": 1.1495843429704593, "learning_rate": 4.5046912275939716e-06, "loss": 0.6485, "step": 2498 }, { "epoch": 3.05875152998776, "grad_norm": 1.2532356027027147, "learning_rate": 4.50430824258238e-06, "loss": 0.9923, "step": 2499 }, { "epoch": 3.0599755201958385, "grad_norm": 1.8156530265529598, "learning_rate": 4.503925125855065e-06, "loss": 0.8953, "step": 2500 }, { "epoch": 3.0611995104039167, "grad_norm": 1.1340672614815723, "learning_rate": 4.503541877437204e-06, "loss": 1.1826, "step": 2501 }, { "epoch": 3.0624235006119953, "grad_norm": 0.6494141520443184, "learning_rate": 4.50315849735398e-06, "loss": 0.5602, "step": 2502 }, { "epoch": 3.0636474908200735, "grad_norm": 1.4889657288664015, "learning_rate": 4.5027749856305895e-06, "loss": 0.5483, "step": 2503 }, { "epoch": 3.0648714810281517, "grad_norm": 1.517082354325872, "learning_rate": 4.502391342292234e-06, "loss": 0.4983, "step": 2504 }, { "epoch": 3.0660954712362303, "grad_norm": 1.0916754944708404, "learning_rate": 4.502007567364126e-06, "loss": 1.001, "step": 2505 }, { "epoch": 3.0673194614443084, "grad_norm": 1.3034577494852235, "learning_rate": 4.501623660871486e-06, "loss": 0.5564, "step": 2506 }, { "epoch": 3.0685434516523866, "grad_norm": 1.3073526226808656, "learning_rate": 4.501239622839542e-06, "loss": 0.5465, "step": 2507 }, { "epoch": 3.0697674418604652, "grad_norm": 0.6824793409519839, "learning_rate": 4.500855453293532e-06, "loss": 0.6416, "step": 2508 }, { "epoch": 3.0709914320685434, "grad_norm": 0.9336633907729536, "learning_rate": 4.500471152258702e-06, "loss": 0.5839, "step": 2509 }, { "epoch": 3.0722154222766216, "grad_norm": 0.931630764176794, "learning_rate": 4.500086719760308e-06, "loss": 0.5225, "step": 2510 }, { "epoch": 3.0734394124847, "grad_norm": 0.8470899237529007, "learning_rate": 4.499702155823612e-06, "loss": 0.5813, "step": 2511 }, { "epoch": 3.0746634026927784, "grad_norm": 1.1289102134335578, "learning_rate": 4.4993174604738875e-06, "loss": 0.6649, "step": 2512 }, { "epoch": 3.075887392900857, "grad_norm": 1.7546668984096738, "learning_rate": 4.498932633736413e-06, "loss": 0.9952, "step": 2513 }, { "epoch": 3.077111383108935, "grad_norm": 0.926455409006507, "learning_rate": 4.4985476756364805e-06, "loss": 0.613, "step": 2514 }, { "epoch": 3.0783353733170133, "grad_norm": 1.737932678315694, "learning_rate": 4.4981625861993865e-06, "loss": 0.5932, "step": 2515 }, { "epoch": 3.079559363525092, "grad_norm": 1.074127298909647, "learning_rate": 4.497777365450439e-06, "loss": 0.8069, "step": 2516 }, { "epoch": 3.08078335373317, "grad_norm": 1.2119901735884306, "learning_rate": 4.4973920134149505e-06, "loss": 0.4537, "step": 2517 }, { "epoch": 3.0820073439412483, "grad_norm": 1.3889489466440683, "learning_rate": 4.497006530118248e-06, "loss": 1.1946, "step": 2518 }, { "epoch": 3.083231334149327, "grad_norm": 1.0553824916548724, "learning_rate": 4.496620915585662e-06, "loss": 0.6207, "step": 2519 }, { "epoch": 3.084455324357405, "grad_norm": 0.730695928479999, "learning_rate": 4.496235169842535e-06, "loss": 0.4471, "step": 2520 }, { "epoch": 3.0856793145654833, "grad_norm": 0.5939886189651445, "learning_rate": 4.495849292914216e-06, "loss": 0.4632, "step": 2521 }, { "epoch": 3.086903304773562, "grad_norm": 1.3752432860902977, "learning_rate": 4.495463284826064e-06, "loss": 0.5824, "step": 2522 }, { "epoch": 3.08812729498164, "grad_norm": 0.5920685368158534, "learning_rate": 4.495077145603445e-06, "loss": 0.414, "step": 2523 }, { "epoch": 3.0893512851897187, "grad_norm": 0.8224286588122312, "learning_rate": 4.494690875271735e-06, "loss": 0.3499, "step": 2524 }, { "epoch": 3.090575275397797, "grad_norm": 1.1287566408262688, "learning_rate": 4.49430447385632e-06, "loss": 0.9791, "step": 2525 }, { "epoch": 3.091799265605875, "grad_norm": 0.9541655559468333, "learning_rate": 4.4939179413825904e-06, "loss": 0.765, "step": 2526 }, { "epoch": 3.0930232558139537, "grad_norm": 1.2498854159339572, "learning_rate": 4.493531277875948e-06, "loss": 1.2396, "step": 2527 }, { "epoch": 3.094247246022032, "grad_norm": 1.5263142598187005, "learning_rate": 4.493144483361804e-06, "loss": 0.4109, "step": 2528 }, { "epoch": 3.09547123623011, "grad_norm": 1.6239053819590075, "learning_rate": 4.492757557865578e-06, "loss": 0.938, "step": 2529 }, { "epoch": 3.0966952264381886, "grad_norm": 1.1508525240457177, "learning_rate": 4.492370501412695e-06, "loss": 0.4311, "step": 2530 }, { "epoch": 3.097919216646267, "grad_norm": 1.4319509195017999, "learning_rate": 4.491983314028591e-06, "loss": 0.485, "step": 2531 }, { "epoch": 3.099143206854345, "grad_norm": 1.0943000248514514, "learning_rate": 4.491595995738713e-06, "loss": 0.6716, "step": 2532 }, { "epoch": 3.1003671970624236, "grad_norm": 1.3296752323361705, "learning_rate": 4.491208546568512e-06, "loss": 1.1891, "step": 2533 }, { "epoch": 3.1015911872705018, "grad_norm": 0.8724618743387204, "learning_rate": 4.49082096654345e-06, "loss": 0.5903, "step": 2534 }, { "epoch": 3.1028151774785804, "grad_norm": 1.8012886999445987, "learning_rate": 4.490433255688998e-06, "loss": 0.5288, "step": 2535 }, { "epoch": 3.1040391676866586, "grad_norm": 0.7326516374943186, "learning_rate": 4.490045414030634e-06, "loss": 0.6239, "step": 2536 }, { "epoch": 3.1052631578947367, "grad_norm": 1.1564490945426649, "learning_rate": 4.489657441593847e-06, "loss": 0.7635, "step": 2537 }, { "epoch": 3.1064871481028153, "grad_norm": 2.1475157268606053, "learning_rate": 4.489269338404131e-06, "loss": 0.3637, "step": 2538 }, { "epoch": 3.1077111383108935, "grad_norm": 0.9062416957861741, "learning_rate": 4.488881104486993e-06, "loss": 0.7106, "step": 2539 }, { "epoch": 3.1089351285189717, "grad_norm": 1.0294570052391228, "learning_rate": 4.488492739867944e-06, "loss": 0.5552, "step": 2540 }, { "epoch": 3.1101591187270503, "grad_norm": 0.9482646963895243, "learning_rate": 4.488104244572508e-06, "loss": 0.5673, "step": 2541 }, { "epoch": 3.1113831089351285, "grad_norm": 1.3036588073727717, "learning_rate": 4.487715618626214e-06, "loss": 0.6847, "step": 2542 }, { "epoch": 3.1126070991432067, "grad_norm": 0.7191065047045402, "learning_rate": 4.487326862054602e-06, "loss": 0.606, "step": 2543 }, { "epoch": 3.1138310893512853, "grad_norm": 0.8785554195337548, "learning_rate": 4.486937974883218e-06, "loss": 0.4905, "step": 2544 }, { "epoch": 3.1150550795593634, "grad_norm": 1.5823123717213547, "learning_rate": 4.4865489571376214e-06, "loss": 0.5697, "step": 2545 }, { "epoch": 3.116279069767442, "grad_norm": 0.9819448704483195, "learning_rate": 4.486159808843375e-06, "loss": 0.4906, "step": 2546 }, { "epoch": 3.1175030599755202, "grad_norm": 1.084161245638622, "learning_rate": 4.485770530026051e-06, "loss": 0.7904, "step": 2547 }, { "epoch": 3.1187270501835984, "grad_norm": 1.2624813146789096, "learning_rate": 4.485381120711233e-06, "loss": 0.486, "step": 2548 }, { "epoch": 3.119951040391677, "grad_norm": 1.1048840188370355, "learning_rate": 4.484991580924512e-06, "loss": 0.7681, "step": 2549 }, { "epoch": 3.121175030599755, "grad_norm": 1.0388582937901558, "learning_rate": 4.484601910691484e-06, "loss": 1.2213, "step": 2550 }, { "epoch": 3.1223990208078334, "grad_norm": 1.0212734142608066, "learning_rate": 4.484212110037761e-06, "loss": 0.9764, "step": 2551 }, { "epoch": 3.123623011015912, "grad_norm": 1.8855527405576038, "learning_rate": 4.483822178988957e-06, "loss": 0.8758, "step": 2552 }, { "epoch": 3.12484700122399, "grad_norm": 0.6890604543420231, "learning_rate": 4.4834321175706966e-06, "loss": 0.404, "step": 2553 }, { "epoch": 3.1260709914320683, "grad_norm": 0.824550200277482, "learning_rate": 4.483041925808614e-06, "loss": 0.6018, "step": 2554 }, { "epoch": 3.127294981640147, "grad_norm": 0.8285091645858446, "learning_rate": 4.482651603728351e-06, "loss": 0.5047, "step": 2555 }, { "epoch": 3.128518971848225, "grad_norm": 1.1437853456394, "learning_rate": 4.482261151355557e-06, "loss": 0.4827, "step": 2556 }, { "epoch": 3.1297429620563038, "grad_norm": 0.7360047354889917, "learning_rate": 4.481870568715893e-06, "loss": 0.613, "step": 2557 }, { "epoch": 3.130966952264382, "grad_norm": 1.0988782297806161, "learning_rate": 4.481479855835025e-06, "loss": 1.4744, "step": 2558 }, { "epoch": 3.13219094247246, "grad_norm": 0.8491742909974143, "learning_rate": 4.481089012738629e-06, "loss": 0.7704, "step": 2559 }, { "epoch": 3.1334149326805387, "grad_norm": 1.3485883571426112, "learning_rate": 4.480698039452392e-06, "loss": 0.6003, "step": 2560 }, { "epoch": 3.134638922888617, "grad_norm": 1.0576060740594164, "learning_rate": 4.480306936002006e-06, "loss": 0.7353, "step": 2561 }, { "epoch": 3.135862913096695, "grad_norm": 1.34443508004059, "learning_rate": 4.479915702413172e-06, "loss": 0.7046, "step": 2562 }, { "epoch": 3.1370869033047737, "grad_norm": 1.148993055606977, "learning_rate": 4.479524338711603e-06, "loss": 1.865, "step": 2563 }, { "epoch": 3.138310893512852, "grad_norm": 1.148211242345032, "learning_rate": 4.4791328449230145e-06, "loss": 1.6623, "step": 2564 }, { "epoch": 3.13953488372093, "grad_norm": 1.3501078667127324, "learning_rate": 4.478741221073136e-06, "loss": 1.1289, "step": 2565 }, { "epoch": 3.1407588739290087, "grad_norm": 0.8018485348368848, "learning_rate": 4.478349467187703e-06, "loss": 0.923, "step": 2566 }, { "epoch": 3.141982864137087, "grad_norm": 1.3806065413224533, "learning_rate": 4.477957583292461e-06, "loss": 0.6349, "step": 2567 }, { "epoch": 3.1432068543451654, "grad_norm": 0.9185231270917612, "learning_rate": 4.477565569413162e-06, "loss": 0.8237, "step": 2568 }, { "epoch": 3.1444308445532436, "grad_norm": 1.1068168042887472, "learning_rate": 4.47717342557557e-06, "loss": 0.5661, "step": 2569 }, { "epoch": 3.145654834761322, "grad_norm": 1.7243877722429901, "learning_rate": 4.476781151805451e-06, "loss": 0.4283, "step": 2570 }, { "epoch": 3.1468788249694004, "grad_norm": 0.7240379022317064, "learning_rate": 4.4763887481285875e-06, "loss": 0.6858, "step": 2571 }, { "epoch": 3.1481028151774786, "grad_norm": 0.7727291441214302, "learning_rate": 4.4759962145707656e-06, "loss": 0.4975, "step": 2572 }, { "epoch": 3.1493268053855568, "grad_norm": 1.635335405918015, "learning_rate": 4.475603551157783e-06, "loss": 0.5314, "step": 2573 }, { "epoch": 3.1505507955936354, "grad_norm": 0.8911762093944924, "learning_rate": 4.475210757915439e-06, "loss": 0.7568, "step": 2574 }, { "epoch": 3.1517747858017136, "grad_norm": 1.3399101721337259, "learning_rate": 4.474817834869551e-06, "loss": 0.6365, "step": 2575 }, { "epoch": 3.1529987760097917, "grad_norm": 1.1231964388897184, "learning_rate": 4.47442478204594e-06, "loss": 1.2456, "step": 2576 }, { "epoch": 3.1542227662178703, "grad_norm": 0.7503949169976555, "learning_rate": 4.474031599470435e-06, "loss": 0.7089, "step": 2577 }, { "epoch": 3.1554467564259485, "grad_norm": 0.8578697184501369, "learning_rate": 4.473638287168874e-06, "loss": 1.135, "step": 2578 }, { "epoch": 3.1566707466340267, "grad_norm": 1.3623970782300574, "learning_rate": 4.473244845167107e-06, "loss": 0.8811, "step": 2579 }, { "epoch": 3.1578947368421053, "grad_norm": 1.3097895022233739, "learning_rate": 4.472851273490985e-06, "loss": 0.7172, "step": 2580 }, { "epoch": 3.1591187270501835, "grad_norm": 0.9771684578550475, "learning_rate": 4.472457572166374e-06, "loss": 0.5275, "step": 2581 }, { "epoch": 3.160342717258262, "grad_norm": 0.9254007851239002, "learning_rate": 4.472063741219148e-06, "loss": 1.278, "step": 2582 }, { "epoch": 3.1615667074663403, "grad_norm": 1.2970624793838186, "learning_rate": 4.471669780675188e-06, "loss": 1.0132, "step": 2583 }, { "epoch": 3.1627906976744184, "grad_norm": 1.7273578058289327, "learning_rate": 4.471275690560381e-06, "loss": 0.6465, "step": 2584 }, { "epoch": 3.164014687882497, "grad_norm": 1.245479832060968, "learning_rate": 4.470881470900628e-06, "loss": 0.8348, "step": 2585 }, { "epoch": 3.1652386780905752, "grad_norm": 1.4199910371395013, "learning_rate": 4.470487121721834e-06, "loss": 0.5491, "step": 2586 }, { "epoch": 3.1664626682986534, "grad_norm": 1.1043075926980916, "learning_rate": 4.470092643049915e-06, "loss": 0.5834, "step": 2587 }, { "epoch": 3.167686658506732, "grad_norm": 1.0721091464719736, "learning_rate": 4.469698034910793e-06, "loss": 0.3474, "step": 2588 }, { "epoch": 3.16891064871481, "grad_norm": 0.8477306538676153, "learning_rate": 4.469303297330403e-06, "loss": 0.7799, "step": 2589 }, { "epoch": 3.170134638922889, "grad_norm": 1.1512127089742756, "learning_rate": 4.468908430334684e-06, "loss": 0.6803, "step": 2590 }, { "epoch": 3.171358629130967, "grad_norm": 1.2614828649850511, "learning_rate": 4.468513433949585e-06, "loss": 1.1614, "step": 2591 }, { "epoch": 3.172582619339045, "grad_norm": 1.5476053908647838, "learning_rate": 4.468118308201065e-06, "loss": 0.8099, "step": 2592 }, { "epoch": 3.173806609547124, "grad_norm": 0.8780273248879857, "learning_rate": 4.46772305311509e-06, "loss": 0.6551, "step": 2593 }, { "epoch": 3.175030599755202, "grad_norm": 0.7370991977249216, "learning_rate": 4.467327668717632e-06, "loss": 0.5215, "step": 2594 }, { "epoch": 3.17625458996328, "grad_norm": 0.9167905205632998, "learning_rate": 4.466932155034677e-06, "loss": 0.6079, "step": 2595 }, { "epoch": 3.1774785801713588, "grad_norm": 0.7912957149431679, "learning_rate": 4.466536512092216e-06, "loss": 0.5101, "step": 2596 }, { "epoch": 3.178702570379437, "grad_norm": 1.4423738512434698, "learning_rate": 4.46614073991625e-06, "loss": 0.566, "step": 2597 }, { "epoch": 3.179926560587515, "grad_norm": 0.8589861981648842, "learning_rate": 4.465744838532786e-06, "loss": 1.0072, "step": 2598 }, { "epoch": 3.1811505507955937, "grad_norm": 1.333058121826294, "learning_rate": 4.465348807967842e-06, "loss": 0.5097, "step": 2599 }, { "epoch": 3.182374541003672, "grad_norm": 1.1479632024002633, "learning_rate": 4.4649526482474435e-06, "loss": 0.3921, "step": 2600 }, { "epoch": 3.18359853121175, "grad_norm": 0.5494598044967577, "learning_rate": 4.464556359397625e-06, "loss": 0.4534, "step": 2601 }, { "epoch": 3.1848225214198287, "grad_norm": 0.9700017823564119, "learning_rate": 4.4641599414444295e-06, "loss": 1.3644, "step": 2602 }, { "epoch": 3.186046511627907, "grad_norm": 1.40526718621559, "learning_rate": 4.463763394413907e-06, "loss": 1.0614, "step": 2603 }, { "epoch": 3.1872705018359855, "grad_norm": 1.012231969705591, "learning_rate": 4.4633667183321175e-06, "loss": 0.6296, "step": 2604 }, { "epoch": 3.1884944920440637, "grad_norm": 1.913108551768777, "learning_rate": 4.462969913225129e-06, "loss": 0.8135, "step": 2605 }, { "epoch": 3.189718482252142, "grad_norm": 0.9463616946155443, "learning_rate": 4.46257297911902e-06, "loss": 0.6448, "step": 2606 }, { "epoch": 3.1909424724602204, "grad_norm": 1.001563362993544, "learning_rate": 4.462175916039873e-06, "loss": 0.6179, "step": 2607 }, { "epoch": 3.1921664626682986, "grad_norm": 1.1403831201551435, "learning_rate": 4.461778724013782e-06, "loss": 1.598, "step": 2608 }, { "epoch": 3.193390452876377, "grad_norm": 1.6825041551604836, "learning_rate": 4.461381403066849e-06, "loss": 0.667, "step": 2609 }, { "epoch": 3.1946144430844554, "grad_norm": 1.770864015869555, "learning_rate": 4.460983953225185e-06, "loss": 0.4891, "step": 2610 }, { "epoch": 3.1958384332925336, "grad_norm": 0.9044727007043715, "learning_rate": 4.460586374514909e-06, "loss": 0.6229, "step": 2611 }, { "epoch": 3.197062423500612, "grad_norm": 0.7637346254368684, "learning_rate": 4.460188666962148e-06, "loss": 0.5955, "step": 2612 }, { "epoch": 3.1982864137086904, "grad_norm": 0.7799998003264197, "learning_rate": 4.459790830593039e-06, "loss": 0.5398, "step": 2613 }, { "epoch": 3.1995104039167686, "grad_norm": 1.571755677225706, "learning_rate": 4.459392865433725e-06, "loss": 0.9315, "step": 2614 }, { "epoch": 3.200734394124847, "grad_norm": 0.9499668310293599, "learning_rate": 4.458994771510359e-06, "loss": 0.6063, "step": 2615 }, { "epoch": 3.2019583843329253, "grad_norm": 1.9911460683020177, "learning_rate": 4.458596548849102e-06, "loss": 0.6364, "step": 2616 }, { "epoch": 3.2031823745410035, "grad_norm": 1.7724146792601028, "learning_rate": 4.458198197476125e-06, "loss": 0.5533, "step": 2617 }, { "epoch": 3.204406364749082, "grad_norm": 1.169349072075869, "learning_rate": 4.457799717417605e-06, "loss": 1.1578, "step": 2618 }, { "epoch": 3.2056303549571603, "grad_norm": 1.2540338634203407, "learning_rate": 4.457401108699728e-06, "loss": 0.4386, "step": 2619 }, { "epoch": 3.2068543451652385, "grad_norm": 1.1851256631008218, "learning_rate": 4.457002371348691e-06, "loss": 0.6075, "step": 2620 }, { "epoch": 3.208078335373317, "grad_norm": 0.9322340878778848, "learning_rate": 4.456603505390697e-06, "loss": 1.0826, "step": 2621 }, { "epoch": 3.2093023255813953, "grad_norm": 0.9443083724456973, "learning_rate": 4.456204510851957e-06, "loss": 1.3784, "step": 2622 }, { "epoch": 3.2105263157894735, "grad_norm": 0.7320423469165174, "learning_rate": 4.4558053877586916e-06, "loss": 0.5414, "step": 2623 }, { "epoch": 3.211750305997552, "grad_norm": 1.0054551568023882, "learning_rate": 4.45540613613713e-06, "loss": 1.14, "step": 2624 }, { "epoch": 3.2129742962056302, "grad_norm": 1.0560265074825619, "learning_rate": 4.455006756013511e-06, "loss": 1.1931, "step": 2625 }, { "epoch": 3.214198286413709, "grad_norm": 2.1362992285726308, "learning_rate": 4.454607247414079e-06, "loss": 0.4561, "step": 2626 }, { "epoch": 3.215422276621787, "grad_norm": 1.4679701139238326, "learning_rate": 4.454207610365087e-06, "loss": 0.6557, "step": 2627 }, { "epoch": 3.216646266829865, "grad_norm": 1.462345635435965, "learning_rate": 4.4538078448928e-06, "loss": 0.4999, "step": 2628 }, { "epoch": 3.217870257037944, "grad_norm": 0.8687002546075069, "learning_rate": 4.4534079510234875e-06, "loss": 0.5528, "step": 2629 }, { "epoch": 3.219094247246022, "grad_norm": 1.1125521015557662, "learning_rate": 4.45300792878343e-06, "loss": 0.6672, "step": 2630 }, { "epoch": 3.2203182374541, "grad_norm": 1.1250931457548383, "learning_rate": 4.452607778198915e-06, "loss": 0.5062, "step": 2631 }, { "epoch": 3.221542227662179, "grad_norm": 1.021747761331091, "learning_rate": 4.4522074992962385e-06, "loss": 0.5198, "step": 2632 }, { "epoch": 3.222766217870257, "grad_norm": 1.0310077556661634, "learning_rate": 4.451807092101707e-06, "loss": 1.0103, "step": 2633 }, { "epoch": 3.2239902080783356, "grad_norm": 0.8196739162623282, "learning_rate": 4.451406556641632e-06, "loss": 0.5356, "step": 2634 }, { "epoch": 3.2252141982864138, "grad_norm": 0.7027200013796292, "learning_rate": 4.451005892942335e-06, "loss": 0.6986, "step": 2635 }, { "epoch": 3.226438188494492, "grad_norm": 1.4296481226181454, "learning_rate": 4.4506051010301476e-06, "loss": 0.4858, "step": 2636 }, { "epoch": 3.2276621787025706, "grad_norm": 1.2402432157775487, "learning_rate": 4.450204180931408e-06, "loss": 0.5568, "step": 2637 }, { "epoch": 3.2288861689106487, "grad_norm": 1.717963629582267, "learning_rate": 4.449803132672463e-06, "loss": 0.4306, "step": 2638 }, { "epoch": 3.230110159118727, "grad_norm": 0.985157708532004, "learning_rate": 4.449401956279668e-06, "loss": 0.544, "step": 2639 }, { "epoch": 3.2313341493268055, "grad_norm": 1.0390092615734745, "learning_rate": 4.449000651779386e-06, "loss": 1.1237, "step": 2640 }, { "epoch": 3.2325581395348837, "grad_norm": 0.9140381029303751, "learning_rate": 4.448599219197991e-06, "loss": 0.5715, "step": 2641 }, { "epoch": 3.233782129742962, "grad_norm": 1.1920371592866783, "learning_rate": 4.448197658561862e-06, "loss": 0.5904, "step": 2642 }, { "epoch": 3.2350061199510405, "grad_norm": 1.0112063502909725, "learning_rate": 4.447795969897389e-06, "loss": 0.5741, "step": 2643 }, { "epoch": 3.2362301101591187, "grad_norm": 1.1133376094433798, "learning_rate": 4.44739415323097e-06, "loss": 0.5129, "step": 2644 }, { "epoch": 3.237454100367197, "grad_norm": 2.0895401041569546, "learning_rate": 4.446992208589009e-06, "loss": 0.4258, "step": 2645 }, { "epoch": 3.2386780905752754, "grad_norm": 1.567868741342563, "learning_rate": 4.446590135997923e-06, "loss": 0.6882, "step": 2646 }, { "epoch": 3.2399020807833536, "grad_norm": 1.0914505646847985, "learning_rate": 4.446187935484132e-06, "loss": 0.9817, "step": 2647 }, { "epoch": 3.2411260709914322, "grad_norm": 1.4437849758416024, "learning_rate": 4.445785607074068e-06, "loss": 1.2419, "step": 2648 }, { "epoch": 3.2423500611995104, "grad_norm": 1.8514751747244613, "learning_rate": 4.445383150794171e-06, "loss": 0.4805, "step": 2649 }, { "epoch": 3.2435740514075886, "grad_norm": 0.9154754996574146, "learning_rate": 4.44498056667089e-06, "loss": 0.9878, "step": 2650 }, { "epoch": 3.244798041615667, "grad_norm": 0.8492533848844187, "learning_rate": 4.444577854730679e-06, "loss": 0.7193, "step": 2651 }, { "epoch": 3.2460220318237454, "grad_norm": 0.9369013577272924, "learning_rate": 4.444175015000004e-06, "loss": 0.52, "step": 2652 }, { "epoch": 3.2472460220318236, "grad_norm": 0.6864696330509883, "learning_rate": 4.443772047505338e-06, "loss": 0.5542, "step": 2653 }, { "epoch": 3.248470012239902, "grad_norm": 1.3931930514187927, "learning_rate": 4.443368952273163e-06, "loss": 0.4928, "step": 2654 }, { "epoch": 3.2496940024479803, "grad_norm": 0.868042442708177, "learning_rate": 4.442965729329968e-06, "loss": 0.5672, "step": 2655 }, { "epoch": 3.250917992656059, "grad_norm": 0.6890059002616634, "learning_rate": 4.442562378702252e-06, "loss": 0.7149, "step": 2656 }, { "epoch": 3.252141982864137, "grad_norm": 1.0487961905643408, "learning_rate": 4.442158900416522e-06, "loss": 1.1159, "step": 2657 }, { "epoch": 3.2533659730722153, "grad_norm": 1.052289384821845, "learning_rate": 4.441755294499291e-06, "loss": 0.6048, "step": 2658 }, { "epoch": 3.254589963280294, "grad_norm": 1.5612195175337875, "learning_rate": 4.441351560977086e-06, "loss": 0.6001, "step": 2659 }, { "epoch": 3.255813953488372, "grad_norm": 1.6370736612829806, "learning_rate": 4.4409476998764365e-06, "loss": 0.5807, "step": 2660 }, { "epoch": 3.2570379436964503, "grad_norm": 0.9275890148875694, "learning_rate": 4.440543711223883e-06, "loss": 0.7138, "step": 2661 }, { "epoch": 3.258261933904529, "grad_norm": 0.9665070840967706, "learning_rate": 4.440139595045974e-06, "loss": 0.52, "step": 2662 }, { "epoch": 3.259485924112607, "grad_norm": 0.7379025944041129, "learning_rate": 4.4397353513692674e-06, "loss": 0.6325, "step": 2663 }, { "epoch": 3.2607099143206852, "grad_norm": 0.8717083421995269, "learning_rate": 4.439330980220328e-06, "loss": 0.5528, "step": 2664 }, { "epoch": 3.261933904528764, "grad_norm": 1.2548864207990376, "learning_rate": 4.438926481625729e-06, "loss": 0.4705, "step": 2665 }, { "epoch": 3.263157894736842, "grad_norm": 1.2262493575653592, "learning_rate": 4.438521855612054e-06, "loss": 0.7954, "step": 2666 }, { "epoch": 3.26438188494492, "grad_norm": 1.1488454081167205, "learning_rate": 4.438117102205892e-06, "loss": 1.5043, "step": 2667 }, { "epoch": 3.265605875152999, "grad_norm": 0.7969860113308458, "learning_rate": 4.437712221433844e-06, "loss": 0.6872, "step": 2668 }, { "epoch": 3.266829865361077, "grad_norm": 0.6165026029884395, "learning_rate": 4.437307213322515e-06, "loss": 0.4368, "step": 2669 }, { "epoch": 3.2680538555691556, "grad_norm": 0.7365713676338234, "learning_rate": 4.436902077898522e-06, "loss": 0.6128, "step": 2670 }, { "epoch": 3.269277845777234, "grad_norm": 1.1803341188059886, "learning_rate": 4.436496815188488e-06, "loss": 0.6433, "step": 2671 }, { "epoch": 3.270501835985312, "grad_norm": 1.7173044344016908, "learning_rate": 4.4360914252190456e-06, "loss": 0.5093, "step": 2672 }, { "epoch": 3.2717258261933906, "grad_norm": 1.007549423758967, "learning_rate": 4.435685908016837e-06, "loss": 0.6425, "step": 2673 }, { "epoch": 3.2729498164014688, "grad_norm": 1.74256169945507, "learning_rate": 4.435280263608509e-06, "loss": 0.5804, "step": 2674 }, { "epoch": 3.274173806609547, "grad_norm": 0.8909658487128782, "learning_rate": 4.434874492020721e-06, "loss": 0.4306, "step": 2675 }, { "epoch": 3.2753977968176256, "grad_norm": 0.9439176644306071, "learning_rate": 4.434468593280138e-06, "loss": 0.5259, "step": 2676 }, { "epoch": 3.2766217870257037, "grad_norm": 1.711519333661347, "learning_rate": 4.434062567413433e-06, "loss": 0.4171, "step": 2677 }, { "epoch": 3.2778457772337823, "grad_norm": 1.8987673250280912, "learning_rate": 4.43365641444729e-06, "loss": 1.2462, "step": 2678 }, { "epoch": 3.2790697674418605, "grad_norm": 1.6680877692873155, "learning_rate": 4.433250134408401e-06, "loss": 1.2577, "step": 2679 }, { "epoch": 3.2802937576499387, "grad_norm": 0.9579754586159103, "learning_rate": 4.4328437273234625e-06, "loss": 0.6565, "step": 2680 }, { "epoch": 3.2815177478580173, "grad_norm": 0.9597824348685214, "learning_rate": 4.432437193219183e-06, "loss": 0.4802, "step": 2681 }, { "epoch": 3.2827417380660955, "grad_norm": 1.161809877233691, "learning_rate": 4.43203053212228e-06, "loss": 0.4529, "step": 2682 }, { "epoch": 3.2839657282741737, "grad_norm": 1.2952613916773414, "learning_rate": 4.431623744059476e-06, "loss": 0.9278, "step": 2683 }, { "epoch": 3.2851897184822523, "grad_norm": 1.3324884311604774, "learning_rate": 4.431216829057504e-06, "loss": 1.3681, "step": 2684 }, { "epoch": 3.2864137086903304, "grad_norm": 1.111229785629199, "learning_rate": 4.430809787143105e-06, "loss": 0.5966, "step": 2685 }, { "epoch": 3.2876376988984086, "grad_norm": 1.1208449653065697, "learning_rate": 4.430402618343028e-06, "loss": 0.5011, "step": 2686 }, { "epoch": 3.2888616891064872, "grad_norm": 1.7063354603515155, "learning_rate": 4.429995322684032e-06, "loss": 0.8115, "step": 2687 }, { "epoch": 3.2900856793145654, "grad_norm": 1.2262144475844399, "learning_rate": 4.429587900192881e-06, "loss": 0.4256, "step": 2688 }, { "epoch": 3.2913096695226436, "grad_norm": 1.626022521072038, "learning_rate": 4.4291803508963506e-06, "loss": 0.441, "step": 2689 }, { "epoch": 3.292533659730722, "grad_norm": 1.441735230277083, "learning_rate": 4.4287726748212235e-06, "loss": 0.6635, "step": 2690 }, { "epoch": 3.2937576499388004, "grad_norm": 1.3876374263586793, "learning_rate": 4.42836487199429e-06, "loss": 0.6625, "step": 2691 }, { "epoch": 3.294981640146879, "grad_norm": 1.482655370622302, "learning_rate": 4.427956942442349e-06, "loss": 0.516, "step": 2692 }, { "epoch": 3.296205630354957, "grad_norm": 2.1223864067081886, "learning_rate": 4.42754888619221e-06, "loss": 0.4424, "step": 2693 }, { "epoch": 3.2974296205630353, "grad_norm": 1.0482685391943547, "learning_rate": 4.427140703270687e-06, "loss": 0.5512, "step": 2694 }, { "epoch": 3.298653610771114, "grad_norm": 0.7887380587770276, "learning_rate": 4.426732393704605e-06, "loss": 0.5776, "step": 2695 }, { "epoch": 3.299877600979192, "grad_norm": 0.766690781713656, "learning_rate": 4.426323957520796e-06, "loss": 0.4764, "step": 2696 }, { "epoch": 3.3011015911872703, "grad_norm": 2.026563473934643, "learning_rate": 4.425915394746102e-06, "loss": 0.4985, "step": 2697 }, { "epoch": 3.302325581395349, "grad_norm": 1.4506597506064534, "learning_rate": 4.425506705407372e-06, "loss": 0.4462, "step": 2698 }, { "epoch": 3.303549571603427, "grad_norm": 1.1723034249916875, "learning_rate": 4.425097889531463e-06, "loss": 0.462, "step": 2699 }, { "epoch": 3.3047735618115057, "grad_norm": 1.2078255135433702, "learning_rate": 4.424688947145241e-06, "loss": 1.3588, "step": 2700 }, { "epoch": 3.305997552019584, "grad_norm": 1.4815255958965035, "learning_rate": 4.42427987827558e-06, "loss": 1.0733, "step": 2701 }, { "epoch": 3.307221542227662, "grad_norm": 1.132153635071585, "learning_rate": 4.423870682949364e-06, "loss": 0.5203, "step": 2702 }, { "epoch": 3.3084455324357407, "grad_norm": 0.6263112505062345, "learning_rate": 4.423461361193482e-06, "loss": 0.4892, "step": 2703 }, { "epoch": 3.309669522643819, "grad_norm": 0.889306256562072, "learning_rate": 4.423051913034834e-06, "loss": 0.4911, "step": 2704 }, { "epoch": 3.310893512851897, "grad_norm": 1.2987679984113512, "learning_rate": 4.422642338500326e-06, "loss": 0.65, "step": 2705 }, { "epoch": 3.3121175030599757, "grad_norm": 1.6312145481996354, "learning_rate": 4.422232637616876e-06, "loss": 0.4989, "step": 2706 }, { "epoch": 3.313341493268054, "grad_norm": 1.0070529103768027, "learning_rate": 4.4218228104114066e-06, "loss": 0.8547, "step": 2707 }, { "epoch": 3.314565483476132, "grad_norm": 1.2242933492081007, "learning_rate": 4.421412856910851e-06, "loss": 0.5457, "step": 2708 }, { "epoch": 3.3157894736842106, "grad_norm": 1.902067187385129, "learning_rate": 4.421002777142148e-06, "loss": 1.0382, "step": 2709 }, { "epoch": 3.317013463892289, "grad_norm": 1.264462272910692, "learning_rate": 4.420592571132248e-06, "loss": 0.4768, "step": 2710 }, { "epoch": 3.318237454100367, "grad_norm": 1.6804339570943123, "learning_rate": 4.420182238908109e-06, "loss": 0.7498, "step": 2711 }, { "epoch": 3.3194614443084456, "grad_norm": 1.0337061816433162, "learning_rate": 4.419771780496695e-06, "loss": 1.2718, "step": 2712 }, { "epoch": 3.3206854345165238, "grad_norm": 0.8650000890953032, "learning_rate": 4.419361195924981e-06, "loss": 0.5667, "step": 2713 }, { "epoch": 3.3219094247246024, "grad_norm": 0.8212654899883196, "learning_rate": 4.418950485219948e-06, "loss": 0.7553, "step": 2714 }, { "epoch": 3.3231334149326806, "grad_norm": 1.218013265399597, "learning_rate": 4.418539648408585e-06, "loss": 1.2433, "step": 2715 }, { "epoch": 3.3243574051407587, "grad_norm": 1.3577298519672396, "learning_rate": 4.4181286855178944e-06, "loss": 0.5406, "step": 2716 }, { "epoch": 3.3255813953488373, "grad_norm": 1.2480802355030498, "learning_rate": 4.417717596574881e-06, "loss": 1.4496, "step": 2717 }, { "epoch": 3.3268053855569155, "grad_norm": 0.8857464238732545, "learning_rate": 4.4173063816065605e-06, "loss": 0.689, "step": 2718 }, { "epoch": 3.3280293757649937, "grad_norm": 1.3435207876633417, "learning_rate": 4.416895040639956e-06, "loss": 0.3626, "step": 2719 }, { "epoch": 3.3292533659730723, "grad_norm": 0.5194498669147878, "learning_rate": 4.416483573702099e-06, "loss": 0.2386, "step": 2720 }, { "epoch": 3.3304773561811505, "grad_norm": 1.0070001710879286, "learning_rate": 4.416071980820031e-06, "loss": 0.8706, "step": 2721 }, { "epoch": 3.331701346389229, "grad_norm": 0.9347266902369363, "learning_rate": 4.415660262020799e-06, "loss": 1.1996, "step": 2722 }, { "epoch": 3.3329253365973073, "grad_norm": 1.5662282643601981, "learning_rate": 4.41524841733146e-06, "loss": 0.4973, "step": 2723 }, { "epoch": 3.3341493268053854, "grad_norm": 1.6341455588006524, "learning_rate": 4.41483644677908e-06, "loss": 0.8658, "step": 2724 }, { "epoch": 3.335373317013464, "grad_norm": 1.1362051047984356, "learning_rate": 4.41442435039073e-06, "loss": 0.6037, "step": 2725 }, { "epoch": 3.3365973072215422, "grad_norm": 0.9192416189778845, "learning_rate": 4.414012128193493e-06, "loss": 0.657, "step": 2726 }, { "epoch": 3.3378212974296204, "grad_norm": 1.7308278259054983, "learning_rate": 4.413599780214458e-06, "loss": 0.5298, "step": 2727 }, { "epoch": 3.339045287637699, "grad_norm": 1.8412556187973665, "learning_rate": 4.413187306480724e-06, "loss": 1.1041, "step": 2728 }, { "epoch": 3.340269277845777, "grad_norm": 1.4645183444670837, "learning_rate": 4.412774707019396e-06, "loss": 0.6633, "step": 2729 }, { "epoch": 3.3414932680538554, "grad_norm": 1.0312717594668688, "learning_rate": 4.41236198185759e-06, "loss": 0.5115, "step": 2730 }, { "epoch": 3.342717258261934, "grad_norm": 0.8122137838361809, "learning_rate": 4.411949131022427e-06, "loss": 0.8746, "step": 2731 }, { "epoch": 3.343941248470012, "grad_norm": 1.1773914063742836, "learning_rate": 4.4115361545410394e-06, "loss": 0.8711, "step": 2732 }, { "epoch": 3.3451652386780903, "grad_norm": 1.592330154498065, "learning_rate": 4.411123052440567e-06, "loss": 0.482, "step": 2733 }, { "epoch": 3.346389228886169, "grad_norm": 1.2787630443039923, "learning_rate": 4.4107098247481554e-06, "loss": 0.6868, "step": 2734 }, { "epoch": 3.347613219094247, "grad_norm": 0.7082083981743631, "learning_rate": 4.410296471490961e-06, "loss": 0.6662, "step": 2735 }, { "epoch": 3.3488372093023258, "grad_norm": 0.8093219688068988, "learning_rate": 4.4098829926961485e-06, "loss": 0.783, "step": 2736 }, { "epoch": 3.350061199510404, "grad_norm": 1.1772481129014791, "learning_rate": 4.409469388390889e-06, "loss": 0.9779, "step": 2737 }, { "epoch": 3.351285189718482, "grad_norm": 1.1056253636309834, "learning_rate": 4.4090556586023655e-06, "loss": 0.5179, "step": 2738 }, { "epoch": 3.3525091799265607, "grad_norm": 1.1931344120433756, "learning_rate": 4.4086418033577636e-06, "loss": 1.6174, "step": 2739 }, { "epoch": 3.353733170134639, "grad_norm": 0.8721041710742241, "learning_rate": 4.408227822684283e-06, "loss": 0.4448, "step": 2740 }, { "epoch": 3.354957160342717, "grad_norm": 1.251175103104469, "learning_rate": 4.407813716609128e-06, "loss": 0.5613, "step": 2741 }, { "epoch": 3.3561811505507957, "grad_norm": 0.9910594031298472, "learning_rate": 4.407399485159512e-06, "loss": 0.6944, "step": 2742 }, { "epoch": 3.357405140758874, "grad_norm": 0.9911667565221486, "learning_rate": 4.406985128362656e-06, "loss": 1.6074, "step": 2743 }, { "epoch": 3.3586291309669525, "grad_norm": 1.5959957600003385, "learning_rate": 4.406570646245793e-06, "loss": 0.6811, "step": 2744 }, { "epoch": 3.3598531211750307, "grad_norm": 0.7305389437577646, "learning_rate": 4.4061560388361564e-06, "loss": 0.6313, "step": 2745 }, { "epoch": 3.361077111383109, "grad_norm": 1.3163889709485193, "learning_rate": 4.405741306160997e-06, "loss": 1.0769, "step": 2746 }, { "epoch": 3.3623011015911874, "grad_norm": 0.7793767899228182, "learning_rate": 4.405326448247567e-06, "loss": 0.5971, "step": 2747 }, { "epoch": 3.3635250917992656, "grad_norm": 0.8966777003861152, "learning_rate": 4.404911465123131e-06, "loss": 0.5803, "step": 2748 }, { "epoch": 3.364749082007344, "grad_norm": 1.9126181154736117, "learning_rate": 4.404496356814958e-06, "loss": 0.479, "step": 2749 }, { "epoch": 3.3659730722154224, "grad_norm": 1.1276654648520013, "learning_rate": 4.4040811233503296e-06, "loss": 1.022, "step": 2750 }, { "epoch": 3.3671970624235006, "grad_norm": 1.0229723981976373, "learning_rate": 4.4036657647565315e-06, "loss": 0.7521, "step": 2751 }, { "epoch": 3.3684210526315788, "grad_norm": 1.151442856219347, "learning_rate": 4.403250281060862e-06, "loss": 0.6817, "step": 2752 }, { "epoch": 3.3696450428396574, "grad_norm": 1.1966727939792607, "learning_rate": 4.402834672290622e-06, "loss": 1.396, "step": 2753 }, { "epoch": 3.3708690330477356, "grad_norm": 0.9693675940728415, "learning_rate": 4.4024189384731275e-06, "loss": 1.3487, "step": 2754 }, { "epoch": 3.3720930232558137, "grad_norm": 2.0644468002142218, "learning_rate": 4.402003079635695e-06, "loss": 0.7661, "step": 2755 }, { "epoch": 3.3733170134638923, "grad_norm": 0.6825495044664927, "learning_rate": 4.401587095805656e-06, "loss": 0.5498, "step": 2756 }, { "epoch": 3.3745410036719705, "grad_norm": 1.021790621595063, "learning_rate": 4.401170987010347e-06, "loss": 0.5098, "step": 2757 }, { "epoch": 3.375764993880049, "grad_norm": 1.2775883023145036, "learning_rate": 4.400754753277112e-06, "loss": 0.454, "step": 2758 }, { "epoch": 3.3769889840881273, "grad_norm": 1.1441906126931822, "learning_rate": 4.400338394633305e-06, "loss": 0.7094, "step": 2759 }, { "epoch": 3.3782129742962055, "grad_norm": 1.2594700855639822, "learning_rate": 4.399921911106288e-06, "loss": 0.4317, "step": 2760 }, { "epoch": 3.379436964504284, "grad_norm": 1.5859558688348998, "learning_rate": 4.39950530272343e-06, "loss": 1.0807, "step": 2761 }, { "epoch": 3.3806609547123623, "grad_norm": 0.6769146465512795, "learning_rate": 4.399088569512109e-06, "loss": 0.5419, "step": 2762 }, { "epoch": 3.3818849449204405, "grad_norm": 0.9733351350375833, "learning_rate": 4.398671711499712e-06, "loss": 0.7202, "step": 2763 }, { "epoch": 3.383108935128519, "grad_norm": 1.1165237365821272, "learning_rate": 4.398254728713632e-06, "loss": 1.2179, "step": 2764 }, { "epoch": 3.3843329253365972, "grad_norm": 1.1976231747259047, "learning_rate": 4.397837621181273e-06, "loss": 0.8048, "step": 2765 }, { "epoch": 3.385556915544676, "grad_norm": 1.0921397616821866, "learning_rate": 4.397420388930046e-06, "loss": 0.5614, "step": 2766 }, { "epoch": 3.386780905752754, "grad_norm": 1.8336821917087471, "learning_rate": 4.397003031987368e-06, "loss": 0.8926, "step": 2767 }, { "epoch": 3.388004895960832, "grad_norm": 1.0981519979821863, "learning_rate": 4.3965855503806675e-06, "loss": 1.1344, "step": 2768 }, { "epoch": 3.389228886168911, "grad_norm": 1.4237473498939397, "learning_rate": 4.3961679441373795e-06, "loss": 0.6164, "step": 2769 }, { "epoch": 3.390452876376989, "grad_norm": 0.9575515860803475, "learning_rate": 4.395750213284948e-06, "loss": 0.9263, "step": 2770 }, { "epoch": 3.391676866585067, "grad_norm": 1.6718960381624643, "learning_rate": 4.395332357850824e-06, "loss": 0.513, "step": 2771 }, { "epoch": 3.392900856793146, "grad_norm": 0.9364567999550558, "learning_rate": 4.3949143778624684e-06, "loss": 1.6152, "step": 2772 }, { "epoch": 3.394124847001224, "grad_norm": 1.3155202728556288, "learning_rate": 4.394496273347347e-06, "loss": 0.5482, "step": 2773 }, { "epoch": 3.395348837209302, "grad_norm": 1.064892307261788, "learning_rate": 4.39407804433294e-06, "loss": 0.72, "step": 2774 }, { "epoch": 3.3965728274173808, "grad_norm": 1.6779136258700245, "learning_rate": 4.393659690846729e-06, "loss": 0.4574, "step": 2775 }, { "epoch": 3.397796817625459, "grad_norm": 0.8738043965238238, "learning_rate": 4.3932412129162064e-06, "loss": 0.4451, "step": 2776 }, { "epoch": 3.399020807833537, "grad_norm": 2.074419356457092, "learning_rate": 4.392822610568874e-06, "loss": 0.3974, "step": 2777 }, { "epoch": 3.4002447980416157, "grad_norm": 1.1979514605266683, "learning_rate": 4.3924038838322415e-06, "loss": 0.6566, "step": 2778 }, { "epoch": 3.401468788249694, "grad_norm": 1.6639487215398465, "learning_rate": 4.391985032733826e-06, "loss": 0.6592, "step": 2779 }, { "epoch": 3.4026927784577725, "grad_norm": 1.161894488077876, "learning_rate": 4.3915660573011495e-06, "loss": 0.4735, "step": 2780 }, { "epoch": 3.4039167686658507, "grad_norm": 1.0009624796843961, "learning_rate": 4.391146957561751e-06, "loss": 0.4476, "step": 2781 }, { "epoch": 3.405140758873929, "grad_norm": 1.4277098396592909, "learning_rate": 4.390727733543167e-06, "loss": 0.7394, "step": 2782 }, { "epoch": 3.4063647490820075, "grad_norm": 1.1029907752289099, "learning_rate": 4.390308385272951e-06, "loss": 1.303, "step": 2783 }, { "epoch": 3.4075887392900857, "grad_norm": 1.6145219953478458, "learning_rate": 4.389888912778659e-06, "loss": 0.607, "step": 2784 }, { "epoch": 3.408812729498164, "grad_norm": 1.413047487617001, "learning_rate": 4.389469316087859e-06, "loss": 0.9087, "step": 2785 }, { "epoch": 3.4100367197062424, "grad_norm": 0.7412197799281004, "learning_rate": 4.389049595228122e-06, "loss": 0.5373, "step": 2786 }, { "epoch": 3.4112607099143206, "grad_norm": 1.0073505450699676, "learning_rate": 4.388629750227035e-06, "loss": 0.4061, "step": 2787 }, { "epoch": 3.4124847001223992, "grad_norm": 1.0826847292925148, "learning_rate": 4.388209781112186e-06, "loss": 0.6302, "step": 2788 }, { "epoch": 3.4137086903304774, "grad_norm": 1.3320994310139884, "learning_rate": 4.387789687911173e-06, "loss": 1.0392, "step": 2789 }, { "epoch": 3.4149326805385556, "grad_norm": 1.4755602263112608, "learning_rate": 4.387369470651605e-06, "loss": 1.0751, "step": 2790 }, { "epoch": 3.416156670746634, "grad_norm": 0.7806862071470171, "learning_rate": 4.386949129361096e-06, "loss": 0.6239, "step": 2791 }, { "epoch": 3.4173806609547124, "grad_norm": 1.6963746588394495, "learning_rate": 4.38652866406727e-06, "loss": 0.5771, "step": 2792 }, { "epoch": 3.4186046511627906, "grad_norm": 1.4254390704624669, "learning_rate": 4.386108074797757e-06, "loss": 0.82, "step": 2793 }, { "epoch": 3.419828641370869, "grad_norm": 1.6593925469898378, "learning_rate": 4.3856873615801985e-06, "loss": 0.4679, "step": 2794 }, { "epoch": 3.4210526315789473, "grad_norm": 1.0413971224760616, "learning_rate": 4.385266524442241e-06, "loss": 1.1919, "step": 2795 }, { "epoch": 3.4222766217870255, "grad_norm": 0.9444471269865996, "learning_rate": 4.38484556341154e-06, "loss": 0.5425, "step": 2796 }, { "epoch": 3.423500611995104, "grad_norm": 0.8253637812964492, "learning_rate": 4.384424478515762e-06, "loss": 0.6636, "step": 2797 }, { "epoch": 3.4247246022031823, "grad_norm": 0.7165455207021285, "learning_rate": 4.384003269782575e-06, "loss": 0.4497, "step": 2798 }, { "epoch": 3.4259485924112605, "grad_norm": 1.1720862408597537, "learning_rate": 4.383581937239664e-06, "loss": 0.7829, "step": 2799 }, { "epoch": 3.427172582619339, "grad_norm": 1.592090900933828, "learning_rate": 4.383160480914713e-06, "loss": 0.3032, "step": 2800 }, { "epoch": 3.4283965728274173, "grad_norm": 0.8654987344695662, "learning_rate": 4.382738900835421e-06, "loss": 0.5874, "step": 2801 }, { "epoch": 3.429620563035496, "grad_norm": 1.2174178456780231, "learning_rate": 4.382317197029492e-06, "loss": 0.9788, "step": 2802 }, { "epoch": 3.430844553243574, "grad_norm": 1.0504898446920974, "learning_rate": 4.3818953695246395e-06, "loss": 1.2486, "step": 2803 }, { "epoch": 3.4320685434516522, "grad_norm": 1.3915311767448815, "learning_rate": 4.381473418348584e-06, "loss": 0.5994, "step": 2804 }, { "epoch": 3.433292533659731, "grad_norm": 1.5135759233079664, "learning_rate": 4.381051343529054e-06, "loss": 0.5812, "step": 2805 }, { "epoch": 3.434516523867809, "grad_norm": 1.0573431186018905, "learning_rate": 4.380629145093788e-06, "loss": 0.5489, "step": 2806 }, { "epoch": 3.435740514075887, "grad_norm": 1.1865854002744074, "learning_rate": 4.38020682307053e-06, "loss": 1.0484, "step": 2807 }, { "epoch": 3.436964504283966, "grad_norm": 1.0223057180080022, "learning_rate": 4.379784377487034e-06, "loss": 0.5096, "step": 2808 }, { "epoch": 3.438188494492044, "grad_norm": 1.2104581821304563, "learning_rate": 4.379361808371062e-06, "loss": 0.4524, "step": 2809 }, { "epoch": 3.4394124847001226, "grad_norm": 0.9935896933820176, "learning_rate": 4.3789391157503825e-06, "loss": 0.9077, "step": 2810 }, { "epoch": 3.440636474908201, "grad_norm": 0.7183126636169803, "learning_rate": 4.378516299652774e-06, "loss": 0.6056, "step": 2811 }, { "epoch": 3.441860465116279, "grad_norm": 1.2647044804634882, "learning_rate": 4.378093360106022e-06, "loss": 0.4443, "step": 2812 }, { "epoch": 3.4430844553243576, "grad_norm": 1.0727940380884586, "learning_rate": 4.377670297137923e-06, "loss": 0.4586, "step": 2813 }, { "epoch": 3.4443084455324358, "grad_norm": 0.7199060484475379, "learning_rate": 4.377247110776276e-06, "loss": 0.5686, "step": 2814 }, { "epoch": 3.445532435740514, "grad_norm": 1.0813027343577695, "learning_rate": 4.376823801048892e-06, "loss": 0.7906, "step": 2815 }, { "epoch": 3.4467564259485926, "grad_norm": 1.656179107573023, "learning_rate": 4.37640036798359e-06, "loss": 0.4882, "step": 2816 }, { "epoch": 3.4479804161566707, "grad_norm": 1.5862317341144805, "learning_rate": 4.375976811608196e-06, "loss": 0.7798, "step": 2817 }, { "epoch": 3.449204406364749, "grad_norm": 1.6050670819932364, "learning_rate": 4.375553131950544e-06, "loss": 0.6312, "step": 2818 }, { "epoch": 3.4504283965728275, "grad_norm": 1.9131052359791947, "learning_rate": 4.375129329038479e-06, "loss": 0.5434, "step": 2819 }, { "epoch": 3.4516523867809057, "grad_norm": 0.7085877702562351, "learning_rate": 4.374705402899849e-06, "loss": 0.5753, "step": 2820 }, { "epoch": 3.452876376988984, "grad_norm": 0.6630068421818639, "learning_rate": 4.374281353562514e-06, "loss": 0.5498, "step": 2821 }, { "epoch": 3.4541003671970625, "grad_norm": 0.7612810132680108, "learning_rate": 4.3738571810543416e-06, "loss": 0.6717, "step": 2822 }, { "epoch": 3.4553243574051407, "grad_norm": 0.905121133319078, "learning_rate": 4.3734328854032056e-06, "loss": 0.5816, "step": 2823 }, { "epoch": 3.4565483476132193, "grad_norm": 1.9777174846548027, "learning_rate": 4.37300846663699e-06, "loss": 0.9878, "step": 2824 }, { "epoch": 3.4577723378212974, "grad_norm": 1.3729450193411359, "learning_rate": 4.372583924783585e-06, "loss": 0.4283, "step": 2825 }, { "epoch": 3.4589963280293756, "grad_norm": 1.5775100830281101, "learning_rate": 4.372159259870892e-06, "loss": 0.4758, "step": 2826 }, { "epoch": 3.4602203182374542, "grad_norm": 1.155227440651814, "learning_rate": 4.371734471926816e-06, "loss": 0.9596, "step": 2827 }, { "epoch": 3.4614443084455324, "grad_norm": 0.7131379648292329, "learning_rate": 4.371309560979274e-06, "loss": 0.6291, "step": 2828 }, { "epoch": 3.4626682986536106, "grad_norm": 1.4634795383249997, "learning_rate": 4.370884527056189e-06, "loss": 1.0333, "step": 2829 }, { "epoch": 3.463892288861689, "grad_norm": 0.9407337259630113, "learning_rate": 4.370459370185492e-06, "loss": 0.7173, "step": 2830 }, { "epoch": 3.4651162790697674, "grad_norm": 1.6178966026633845, "learning_rate": 4.370034090395126e-06, "loss": 1.2697, "step": 2831 }, { "epoch": 3.466340269277846, "grad_norm": 1.4020711238365124, "learning_rate": 4.3696086877130335e-06, "loss": 0.3772, "step": 2832 }, { "epoch": 3.467564259485924, "grad_norm": 1.501711612900921, "learning_rate": 4.369183162167176e-06, "loss": 0.6024, "step": 2833 }, { "epoch": 3.4687882496940023, "grad_norm": 1.3929354173792095, "learning_rate": 4.368757513785512e-06, "loss": 1.0104, "step": 2834 }, { "epoch": 3.470012239902081, "grad_norm": 1.4588809591730025, "learning_rate": 4.368331742596018e-06, "loss": 0.649, "step": 2835 }, { "epoch": 3.471236230110159, "grad_norm": 1.1816098677361169, "learning_rate": 4.367905848626672e-06, "loss": 0.5384, "step": 2836 }, { "epoch": 3.4724602203182373, "grad_norm": 1.46101435663061, "learning_rate": 4.3674798319054625e-06, "loss": 0.5585, "step": 2837 }, { "epoch": 3.473684210526316, "grad_norm": 1.282884868140762, "learning_rate": 4.3670536924603855e-06, "loss": 0.4733, "step": 2838 }, { "epoch": 3.474908200734394, "grad_norm": 1.5235205881123561, "learning_rate": 4.366627430319445e-06, "loss": 0.5306, "step": 2839 }, { "epoch": 3.4761321909424723, "grad_norm": 0.7208413902971574, "learning_rate": 4.366201045510655e-06, "loss": 0.5024, "step": 2840 }, { "epoch": 3.477356181150551, "grad_norm": 1.5212634830713612, "learning_rate": 4.3657745380620345e-06, "loss": 0.5166, "step": 2841 }, { "epoch": 3.478580171358629, "grad_norm": 1.2630916907284164, "learning_rate": 4.365347908001613e-06, "loss": 1.0113, "step": 2842 }, { "epoch": 3.4798041615667072, "grad_norm": 1.1324915669238926, "learning_rate": 4.364921155357425e-06, "loss": 0.7533, "step": 2843 }, { "epoch": 3.481028151774786, "grad_norm": 0.829650568318722, "learning_rate": 4.3644942801575175e-06, "loss": 0.5995, "step": 2844 }, { "epoch": 3.482252141982864, "grad_norm": 0.8776309133059869, "learning_rate": 4.364067282429942e-06, "loss": 1.1716, "step": 2845 }, { "epoch": 3.4834761321909427, "grad_norm": 1.410099899447338, "learning_rate": 4.36364016220276e-06, "loss": 0.8629, "step": 2846 }, { "epoch": 3.484700122399021, "grad_norm": 0.7220109562801647, "learning_rate": 4.36321291950404e-06, "loss": 0.7451, "step": 2847 }, { "epoch": 3.485924112607099, "grad_norm": 0.8058746598568333, "learning_rate": 4.362785554361857e-06, "loss": 0.935, "step": 2848 }, { "epoch": 3.4871481028151776, "grad_norm": 1.0430948579037274, "learning_rate": 4.362358066804298e-06, "loss": 0.5357, "step": 2849 }, { "epoch": 3.488372093023256, "grad_norm": 1.410091925903012, "learning_rate": 4.361930456859455e-06, "loss": 0.5335, "step": 2850 }, { "epoch": 3.489596083231334, "grad_norm": 1.199296663083217, "learning_rate": 4.36150272455543e-06, "loss": 1.5411, "step": 2851 }, { "epoch": 3.4908200734394126, "grad_norm": 1.9081371690377402, "learning_rate": 4.36107486992033e-06, "loss": 0.6859, "step": 2852 }, { "epoch": 3.4920440636474908, "grad_norm": 1.0936317353552698, "learning_rate": 4.360646892982274e-06, "loss": 0.607, "step": 2853 }, { "epoch": 3.4932680538555694, "grad_norm": 0.9002175031295706, "learning_rate": 4.360218793769386e-06, "loss": 0.6728, "step": 2854 }, { "epoch": 3.4944920440636476, "grad_norm": 1.0280913199904282, "learning_rate": 4.359790572309799e-06, "loss": 0.4068, "step": 2855 }, { "epoch": 3.4957160342717257, "grad_norm": 0.9502640627615291, "learning_rate": 4.359362228631655e-06, "loss": 0.6132, "step": 2856 }, { "epoch": 3.4969400244798043, "grad_norm": 1.1774456243990288, "learning_rate": 4.358933762763102e-06, "loss": 0.4056, "step": 2857 }, { "epoch": 3.4981640146878825, "grad_norm": 1.0385350558629707, "learning_rate": 4.358505174732299e-06, "loss": 1.2151, "step": 2858 }, { "epoch": 3.4993880048959607, "grad_norm": 0.9093833920831976, "learning_rate": 4.358076464567409e-06, "loss": 0.4071, "step": 2859 }, { "epoch": 3.5006119951040393, "grad_norm": 0.7273023568297371, "learning_rate": 4.357647632296607e-06, "loss": 0.5659, "step": 2860 }, { "epoch": 3.5018359853121175, "grad_norm": 1.1432469956374232, "learning_rate": 4.357218677948073e-06, "loss": 0.6333, "step": 2861 }, { "epoch": 3.5030599755201957, "grad_norm": 0.7721303341088325, "learning_rate": 4.356789601549997e-06, "loss": 0.6548, "step": 2862 }, { "epoch": 3.5042839657282743, "grad_norm": 1.86207055388721, "learning_rate": 4.356360403130576e-06, "loss": 0.4994, "step": 2863 }, { "epoch": 3.5055079559363524, "grad_norm": 1.1082190705451813, "learning_rate": 4.355931082718016e-06, "loss": 1.3791, "step": 2864 }, { "epoch": 3.5067319461444306, "grad_norm": 1.4657520895397655, "learning_rate": 4.35550164034053e-06, "loss": 0.4633, "step": 2865 }, { "epoch": 3.5079559363525092, "grad_norm": 1.639356659161861, "learning_rate": 4.3550720760263384e-06, "loss": 0.7449, "step": 2866 }, { "epoch": 3.5091799265605874, "grad_norm": 0.8696338557828689, "learning_rate": 4.354642389803672e-06, "loss": 0.599, "step": 2867 }, { "epoch": 3.5104039167686656, "grad_norm": 1.2320731115894268, "learning_rate": 4.354212581700768e-06, "loss": 1.5121, "step": 2868 }, { "epoch": 3.511627906976744, "grad_norm": 1.0724019879786542, "learning_rate": 4.3537826517458705e-06, "loss": 2.3286, "step": 2869 }, { "epoch": 3.5128518971848224, "grad_norm": 1.424807700263062, "learning_rate": 4.353352599967235e-06, "loss": 1.257, "step": 2870 }, { "epoch": 3.514075887392901, "grad_norm": 1.734695150251853, "learning_rate": 4.35292242639312e-06, "loss": 0.9386, "step": 2871 }, { "epoch": 3.515299877600979, "grad_norm": 0.9493490477884802, "learning_rate": 4.3524921310517975e-06, "loss": 1.1547, "step": 2872 }, { "epoch": 3.516523867809058, "grad_norm": 1.0031853818344216, "learning_rate": 4.352061713971544e-06, "loss": 0.8201, "step": 2873 }, { "epoch": 3.517747858017136, "grad_norm": 0.8133102119444773, "learning_rate": 4.351631175180645e-06, "loss": 0.7188, "step": 2874 }, { "epoch": 3.518971848225214, "grad_norm": 1.333596407323286, "learning_rate": 4.3512005147073935e-06, "loss": 1.3437, "step": 2875 }, { "epoch": 3.5201958384332928, "grad_norm": 0.963409728127715, "learning_rate": 4.350769732580092e-06, "loss": 1.3209, "step": 2876 }, { "epoch": 3.521419828641371, "grad_norm": 0.7953622102156821, "learning_rate": 4.350338828827048e-06, "loss": 0.6409, "step": 2877 }, { "epoch": 3.522643818849449, "grad_norm": 2.0469246064562627, "learning_rate": 4.3499078034765815e-06, "loss": 0.4424, "step": 2878 }, { "epoch": 3.5238678090575277, "grad_norm": 1.1101438627335027, "learning_rate": 4.349476656557015e-06, "loss": 0.5345, "step": 2879 }, { "epoch": 3.525091799265606, "grad_norm": 1.8193174308253999, "learning_rate": 4.3490453880966855e-06, "loss": 0.9862, "step": 2880 }, { "epoch": 3.526315789473684, "grad_norm": 1.034196363468038, "learning_rate": 4.348613998123931e-06, "loss": 1.2781, "step": 2881 }, { "epoch": 3.5275397796817627, "grad_norm": 1.0523965277363985, "learning_rate": 4.3481824866671015e-06, "loss": 0.604, "step": 2882 }, { "epoch": 3.528763769889841, "grad_norm": 1.656567584339346, "learning_rate": 4.347750853754556e-06, "loss": 0.4674, "step": 2883 }, { "epoch": 3.529987760097919, "grad_norm": 1.2063002507104872, "learning_rate": 4.347319099414659e-06, "loss": 1.6032, "step": 2884 }, { "epoch": 3.5312117503059977, "grad_norm": 1.2588489507589293, "learning_rate": 4.346887223675782e-06, "loss": 0.5918, "step": 2885 }, { "epoch": 3.532435740514076, "grad_norm": 1.5753319132551966, "learning_rate": 4.346455226566308e-06, "loss": 0.5721, "step": 2886 }, { "epoch": 3.533659730722154, "grad_norm": 1.437677767836109, "learning_rate": 4.346023108114627e-06, "loss": 1.138, "step": 2887 }, { "epoch": 3.5348837209302326, "grad_norm": 1.6174159902513119, "learning_rate": 4.345590868349136e-06, "loss": 0.8475, "step": 2888 }, { "epoch": 3.536107711138311, "grad_norm": 1.9117625294983323, "learning_rate": 4.3451585072982385e-06, "loss": 0.7753, "step": 2889 }, { "epoch": 3.537331701346389, "grad_norm": 0.920941893558188, "learning_rate": 4.344726024990348e-06, "loss": 0.5894, "step": 2890 }, { "epoch": 3.5385556915544676, "grad_norm": 1.0737232633658507, "learning_rate": 4.3442934214538875e-06, "loss": 0.7515, "step": 2891 }, { "epoch": 3.5397796817625458, "grad_norm": 0.9068759220117667, "learning_rate": 4.343860696717284e-06, "loss": 1.0679, "step": 2892 }, { "epoch": 3.5410036719706244, "grad_norm": 1.1877016108546192, "learning_rate": 4.343427850808976e-06, "loss": 0.7803, "step": 2893 }, { "epoch": 3.5422276621787026, "grad_norm": 0.6994956168436867, "learning_rate": 4.342994883757408e-06, "loss": 0.6255, "step": 2894 }, { "epoch": 3.543451652386781, "grad_norm": 1.0149151345834697, "learning_rate": 4.342561795591033e-06, "loss": 0.9147, "step": 2895 }, { "epoch": 3.5446756425948593, "grad_norm": 1.5934095202750762, "learning_rate": 4.342128586338312e-06, "loss": 0.5539, "step": 2896 }, { "epoch": 3.5458996328029375, "grad_norm": 0.8753602322375681, "learning_rate": 4.341695256027714e-06, "loss": 0.7496, "step": 2897 }, { "epoch": 3.547123623011016, "grad_norm": 1.0438647113518067, "learning_rate": 4.3412618046877156e-06, "loss": 0.6661, "step": 2898 }, { "epoch": 3.5483476132190943, "grad_norm": 0.6356958795916889, "learning_rate": 4.3408282323468015e-06, "loss": 0.3716, "step": 2899 }, { "epoch": 3.5495716034271725, "grad_norm": 1.017960532421912, "learning_rate": 4.340394539033464e-06, "loss": 0.9629, "step": 2900 }, { "epoch": 3.550795593635251, "grad_norm": 1.3879733484722285, "learning_rate": 4.339960724776206e-06, "loss": 0.6111, "step": 2901 }, { "epoch": 3.5520195838433293, "grad_norm": 1.3610687652232283, "learning_rate": 4.339526789603534e-06, "loss": 0.6696, "step": 2902 }, { "epoch": 3.5532435740514074, "grad_norm": 1.4661209391882684, "learning_rate": 4.339092733543966e-06, "loss": 1.4012, "step": 2903 }, { "epoch": 3.554467564259486, "grad_norm": 2.0021521725087568, "learning_rate": 4.338658556626026e-06, "loss": 0.4631, "step": 2904 }, { "epoch": 3.5556915544675642, "grad_norm": 0.6959933730120821, "learning_rate": 4.338224258878244e-06, "loss": 0.3405, "step": 2905 }, { "epoch": 3.5569155446756424, "grad_norm": 0.9386699130500548, "learning_rate": 4.337789840329165e-06, "loss": 0.7559, "step": 2906 }, { "epoch": 3.558139534883721, "grad_norm": 0.902717120160523, "learning_rate": 4.337355301007336e-06, "loss": 0.8831, "step": 2907 }, { "epoch": 3.559363525091799, "grad_norm": 1.0899334819981827, "learning_rate": 4.33692064094131e-06, "loss": 0.9027, "step": 2908 }, { "epoch": 3.5605875152998774, "grad_norm": 1.0636529731664475, "learning_rate": 4.336485860159655e-06, "loss": 1.2134, "step": 2909 }, { "epoch": 3.561811505507956, "grad_norm": 1.4913058830162629, "learning_rate": 4.336050958690943e-06, "loss": 0.3932, "step": 2910 }, { "epoch": 3.563035495716034, "grad_norm": 1.5689633596339874, "learning_rate": 4.335615936563752e-06, "loss": 0.5574, "step": 2911 }, { "epoch": 3.5642594859241123, "grad_norm": 1.0337282029419634, "learning_rate": 4.335180793806671e-06, "loss": 0.5106, "step": 2912 }, { "epoch": 3.565483476132191, "grad_norm": 1.3621954638399791, "learning_rate": 4.334745530448296e-06, "loss": 0.539, "step": 2913 }, { "epoch": 3.566707466340269, "grad_norm": 0.6786718855947556, "learning_rate": 4.334310146517231e-06, "loss": 0.6264, "step": 2914 }, { "epoch": 3.5679314565483478, "grad_norm": 1.548475002732928, "learning_rate": 4.3338746420420885e-06, "loss": 0.6039, "step": 2915 }, { "epoch": 3.569155446756426, "grad_norm": 1.3093038227678264, "learning_rate": 4.333439017051487e-06, "loss": 0.5912, "step": 2916 }, { "epoch": 3.5703794369645045, "grad_norm": 0.8641330009975774, "learning_rate": 4.333003271574055e-06, "loss": 0.646, "step": 2917 }, { "epoch": 3.5716034271725827, "grad_norm": 1.2994748836516428, "learning_rate": 4.3325674056384274e-06, "loss": 0.7753, "step": 2918 }, { "epoch": 3.572827417380661, "grad_norm": 1.3311415003206768, "learning_rate": 4.332131419273249e-06, "loss": 0.589, "step": 2919 }, { "epoch": 3.5740514075887395, "grad_norm": 1.0597928957504616, "learning_rate": 4.331695312507169e-06, "loss": 1.4441, "step": 2920 }, { "epoch": 3.5752753977968177, "grad_norm": 0.7883088941387513, "learning_rate": 4.331259085368849e-06, "loss": 0.4757, "step": 2921 }, { "epoch": 3.576499388004896, "grad_norm": 2.0108915878429463, "learning_rate": 4.330822737886955e-06, "loss": 0.5491, "step": 2922 }, { "epoch": 3.5777233782129745, "grad_norm": 1.4615354812026373, "learning_rate": 4.330386270090162e-06, "loss": 0.4421, "step": 2923 }, { "epoch": 3.5789473684210527, "grad_norm": 1.2520819369621823, "learning_rate": 4.329949682007154e-06, "loss": 1.4592, "step": 2924 }, { "epoch": 3.580171358629131, "grad_norm": 1.7759958964803926, "learning_rate": 4.329512973666622e-06, "loss": 0.2783, "step": 2925 }, { "epoch": 3.5813953488372094, "grad_norm": 1.1180661635330762, "learning_rate": 4.329076145097263e-06, "loss": 0.6284, "step": 2926 }, { "epoch": 3.5826193390452876, "grad_norm": 0.7534276536189869, "learning_rate": 4.328639196327786e-06, "loss": 0.5784, "step": 2927 }, { "epoch": 3.583843329253366, "grad_norm": 0.9606622493202407, "learning_rate": 4.3282021273869044e-06, "loss": 0.6626, "step": 2928 }, { "epoch": 3.5850673194614444, "grad_norm": 1.3251032685439843, "learning_rate": 4.32776493830334e-06, "loss": 0.6655, "step": 2929 }, { "epoch": 3.5862913096695226, "grad_norm": 1.1428734678697354, "learning_rate": 4.327327629105826e-06, "loss": 0.9592, "step": 2930 }, { "epoch": 3.5875152998776008, "grad_norm": 1.517797832297763, "learning_rate": 4.326890199823098e-06, "loss": 0.9773, "step": 2931 }, { "epoch": 3.5887392900856794, "grad_norm": 1.3154842580167365, "learning_rate": 4.326452650483904e-06, "loss": 0.3502, "step": 2932 }, { "epoch": 3.5899632802937576, "grad_norm": 1.7968484425055977, "learning_rate": 4.326014981116996e-06, "loss": 0.4319, "step": 2933 }, { "epoch": 3.5911872705018357, "grad_norm": 1.1833902522404616, "learning_rate": 4.325577191751139e-06, "loss": 0.5145, "step": 2934 }, { "epoch": 3.5924112607099143, "grad_norm": 1.3018697964700894, "learning_rate": 4.325139282415101e-06, "loss": 0.564, "step": 2935 }, { "epoch": 3.5936352509179925, "grad_norm": 1.5766676791766687, "learning_rate": 4.3247012531376594e-06, "loss": 0.3882, "step": 2936 }, { "epoch": 3.594859241126071, "grad_norm": 1.596189690354281, "learning_rate": 4.324263103947601e-06, "loss": 1.1018, "step": 2937 }, { "epoch": 3.5960832313341493, "grad_norm": 1.0226506394508035, "learning_rate": 4.323824834873719e-06, "loss": 0.6443, "step": 2938 }, { "epoch": 3.597307221542228, "grad_norm": 1.1186633339195216, "learning_rate": 4.323386445944813e-06, "loss": 0.6768, "step": 2939 }, { "epoch": 3.598531211750306, "grad_norm": 1.1508859387295962, "learning_rate": 4.322947937189697e-06, "loss": 0.5746, "step": 2940 }, { "epoch": 3.5997552019583843, "grad_norm": 1.7215687685644296, "learning_rate": 4.322509308637183e-06, "loss": 1.0139, "step": 2941 }, { "epoch": 3.600979192166463, "grad_norm": 1.572727987584154, "learning_rate": 4.322070560316099e-06, "loss": 0.6197, "step": 2942 }, { "epoch": 3.602203182374541, "grad_norm": 1.151874912523627, "learning_rate": 4.321631692255277e-06, "loss": 0.5836, "step": 2943 }, { "epoch": 3.6034271725826192, "grad_norm": 1.5710696360548968, "learning_rate": 4.321192704483558e-06, "loss": 0.6482, "step": 2944 }, { "epoch": 3.604651162790698, "grad_norm": 0.8203846404613538, "learning_rate": 4.32075359702979e-06, "loss": 0.5793, "step": 2945 }, { "epoch": 3.605875152998776, "grad_norm": 0.9748441426848996, "learning_rate": 4.320314369922831e-06, "loss": 0.5789, "step": 2946 }, { "epoch": 3.607099143206854, "grad_norm": 1.3043319752950324, "learning_rate": 4.319875023191544e-06, "loss": 0.5848, "step": 2947 }, { "epoch": 3.608323133414933, "grad_norm": 1.5407516399105243, "learning_rate": 4.3194355568648015e-06, "loss": 0.4382, "step": 2948 }, { "epoch": 3.609547123623011, "grad_norm": 1.019025954754023, "learning_rate": 4.318995970971485e-06, "loss": 0.584, "step": 2949 }, { "epoch": 3.610771113831089, "grad_norm": 0.9691019674105982, "learning_rate": 4.31855626554048e-06, "loss": 0.8539, "step": 2950 }, { "epoch": 3.611995104039168, "grad_norm": 1.1101587502151693, "learning_rate": 4.318116440600683e-06, "loss": 0.3594, "step": 2951 }, { "epoch": 3.613219094247246, "grad_norm": 1.2982010373240704, "learning_rate": 4.317676496181e-06, "loss": 0.5932, "step": 2952 }, { "epoch": 3.614443084455324, "grad_norm": 0.817650242496108, "learning_rate": 4.317236432310341e-06, "loss": 0.7291, "step": 2953 }, { "epoch": 3.6156670746634028, "grad_norm": 1.2553094258715891, "learning_rate": 4.316796249017624e-06, "loss": 0.4662, "step": 2954 }, { "epoch": 3.616891064871481, "grad_norm": 0.8613309364649132, "learning_rate": 4.316355946331777e-06, "loss": 0.6126, "step": 2955 }, { "epoch": 3.618115055079559, "grad_norm": 0.898847023514368, "learning_rate": 4.315915524281736e-06, "loss": 0.5624, "step": 2956 }, { "epoch": 3.6193390452876377, "grad_norm": 0.7187855775506882, "learning_rate": 4.315474982896444e-06, "loss": 0.5673, "step": 2957 }, { "epoch": 3.620563035495716, "grad_norm": 1.397815487791166, "learning_rate": 4.315034322204851e-06, "loss": 1.0909, "step": 2958 }, { "epoch": 3.6217870257037945, "grad_norm": 0.7954250262460789, "learning_rate": 4.3145935422359154e-06, "loss": 0.5987, "step": 2959 }, { "epoch": 3.6230110159118727, "grad_norm": 1.6703271693458768, "learning_rate": 4.314152643018603e-06, "loss": 0.4089, "step": 2960 }, { "epoch": 3.6242350061199513, "grad_norm": 0.6712039056710571, "learning_rate": 4.31371162458189e-06, "loss": 0.5419, "step": 2961 }, { "epoch": 3.6254589963280295, "grad_norm": 2.5832252849684276, "learning_rate": 4.3132704869547575e-06, "loss": 0.4594, "step": 2962 }, { "epoch": 3.6266829865361077, "grad_norm": 0.7042627236230449, "learning_rate": 4.3128292301661955e-06, "loss": 0.5691, "step": 2963 }, { "epoch": 3.6279069767441863, "grad_norm": 1.1759148322568098, "learning_rate": 4.312387854245201e-06, "loss": 0.3473, "step": 2964 }, { "epoch": 3.6291309669522644, "grad_norm": 1.3652754673721526, "learning_rate": 4.31194635922078e-06, "loss": 0.5402, "step": 2965 }, { "epoch": 3.6303549571603426, "grad_norm": 1.0374557862926201, "learning_rate": 4.311504745121947e-06, "loss": 0.5431, "step": 2966 }, { "epoch": 3.6315789473684212, "grad_norm": 2.0238596964098923, "learning_rate": 4.311063011977723e-06, "loss": 0.6005, "step": 2967 }, { "epoch": 3.6328029375764994, "grad_norm": 0.970378465526723, "learning_rate": 4.310621159817135e-06, "loss": 0.3928, "step": 2968 }, { "epoch": 3.6340269277845776, "grad_norm": 1.20013594312382, "learning_rate": 4.310179188669222e-06, "loss": 1.4438, "step": 2969 }, { "epoch": 3.635250917992656, "grad_norm": 1.4745663220336436, "learning_rate": 4.309737098563029e-06, "loss": 0.4874, "step": 2970 }, { "epoch": 3.6364749082007344, "grad_norm": 1.2913192453657432, "learning_rate": 4.309294889527607e-06, "loss": 0.5406, "step": 2971 }, { "epoch": 3.6376988984088126, "grad_norm": 1.5984533050230296, "learning_rate": 4.308852561592017e-06, "loss": 0.5513, "step": 2972 }, { "epoch": 3.638922888616891, "grad_norm": 1.416615549400586, "learning_rate": 4.308410114785327e-06, "loss": 0.5753, "step": 2973 }, { "epoch": 3.6401468788249693, "grad_norm": 1.014033274623642, "learning_rate": 4.307967549136614e-06, "loss": 0.5336, "step": 2974 }, { "epoch": 3.6413708690330475, "grad_norm": 0.9574736851251069, "learning_rate": 4.307524864674959e-06, "loss": 0.7197, "step": 2975 }, { "epoch": 3.642594859241126, "grad_norm": 1.8108751206363718, "learning_rate": 4.3070820614294566e-06, "loss": 0.4868, "step": 2976 }, { "epoch": 3.6438188494492043, "grad_norm": 0.8819614192683026, "learning_rate": 4.306639139429205e-06, "loss": 0.4371, "step": 2977 }, { "epoch": 3.6450428396572825, "grad_norm": 1.1299782071341482, "learning_rate": 4.306196098703311e-06, "loss": 1.6064, "step": 2978 }, { "epoch": 3.646266829865361, "grad_norm": 1.5127683379167052, "learning_rate": 4.3057529392808905e-06, "loss": 0.7458, "step": 2979 }, { "epoch": 3.6474908200734393, "grad_norm": 0.9950284280157676, "learning_rate": 4.305309661191066e-06, "loss": 0.3716, "step": 2980 }, { "epoch": 3.648714810281518, "grad_norm": 1.7975230118444248, "learning_rate": 4.3048662644629675e-06, "loss": 0.5714, "step": 2981 }, { "epoch": 3.649938800489596, "grad_norm": 1.576473013604482, "learning_rate": 4.3044227491257345e-06, "loss": 0.7358, "step": 2982 }, { "epoch": 3.6511627906976747, "grad_norm": 1.6678818620435452, "learning_rate": 4.303979115208512e-06, "loss": 0.533, "step": 2983 }, { "epoch": 3.652386780905753, "grad_norm": 1.823383835929657, "learning_rate": 4.303535362740455e-06, "loss": 0.4796, "step": 2984 }, { "epoch": 3.653610771113831, "grad_norm": 1.621817227027596, "learning_rate": 4.303091491750724e-06, "loss": 0.6182, "step": 2985 }, { "epoch": 3.6548347613219097, "grad_norm": 1.0638994777171682, "learning_rate": 4.30264750226849e-06, "loss": 0.513, "step": 2986 }, { "epoch": 3.656058751529988, "grad_norm": 1.3565409743913774, "learning_rate": 4.302203394322929e-06, "loss": 1.1625, "step": 2987 }, { "epoch": 3.657282741738066, "grad_norm": 1.3004276559240062, "learning_rate": 4.301759167943228e-06, "loss": 1.0599, "step": 2988 }, { "epoch": 3.6585067319461446, "grad_norm": 1.4013600412580132, "learning_rate": 4.301314823158578e-06, "loss": 0.4961, "step": 2989 }, { "epoch": 3.659730722154223, "grad_norm": 1.1837034853762836, "learning_rate": 4.3008703599981795e-06, "loss": 1.5244, "step": 2990 }, { "epoch": 3.660954712362301, "grad_norm": 1.028262862475509, "learning_rate": 4.300425778491243e-06, "loss": 0.8406, "step": 2991 }, { "epoch": 3.6621787025703796, "grad_norm": 1.5651221614849686, "learning_rate": 4.299981078666984e-06, "loss": 0.9601, "step": 2992 }, { "epoch": 3.6634026927784578, "grad_norm": 1.4931347475636896, "learning_rate": 4.299536260554625e-06, "loss": 0.4835, "step": 2993 }, { "epoch": 3.664626682986536, "grad_norm": 1.0152398042063975, "learning_rate": 4.2990913241834e-06, "loss": 0.6631, "step": 2994 }, { "epoch": 3.6658506731946146, "grad_norm": 1.4191541057266053, "learning_rate": 4.298646269582547e-06, "loss": 0.9023, "step": 2995 }, { "epoch": 3.6670746634026927, "grad_norm": 1.3938596964146186, "learning_rate": 4.298201096781314e-06, "loss": 0.6048, "step": 2996 }, { "epoch": 3.668298653610771, "grad_norm": 1.5312461218899551, "learning_rate": 4.2977558058089565e-06, "loss": 0.4301, "step": 2997 }, { "epoch": 3.6695226438188495, "grad_norm": 0.8095875774354401, "learning_rate": 4.297310396694736e-06, "loss": 0.6156, "step": 2998 }, { "epoch": 3.6707466340269277, "grad_norm": 0.7539018615659618, "learning_rate": 4.2968648694679245e-06, "loss": 0.4204, "step": 2999 }, { "epoch": 3.671970624235006, "grad_norm": 1.278192535128054, "learning_rate": 4.2964192241578e-06, "loss": 0.6345, "step": 3000 }, { "epoch": 3.6731946144430845, "grad_norm": 1.27482935812848, "learning_rate": 4.295973460793649e-06, "loss": 0.5755, "step": 3001 }, { "epoch": 3.6744186046511627, "grad_norm": 1.1307060576979426, "learning_rate": 4.295527579404763e-06, "loss": 0.5661, "step": 3002 }, { "epoch": 3.6756425948592413, "grad_norm": 2.3173586365816856, "learning_rate": 4.2950815800204485e-06, "loss": 0.4903, "step": 3003 }, { "epoch": 3.6768665850673194, "grad_norm": 1.1565801089195158, "learning_rate": 4.29463546267001e-06, "loss": 1.5708, "step": 3004 }, { "epoch": 3.678090575275398, "grad_norm": 1.0704166461488451, "learning_rate": 4.294189227382768e-06, "loss": 0.7544, "step": 3005 }, { "epoch": 3.6793145654834762, "grad_norm": 2.51146969040047, "learning_rate": 4.293742874188045e-06, "loss": 0.4671, "step": 3006 }, { "epoch": 3.6805385556915544, "grad_norm": 0.8659487264990876, "learning_rate": 4.293296403115176e-06, "loss": 1.2332, "step": 3007 }, { "epoch": 3.681762545899633, "grad_norm": 1.2905523411602837, "learning_rate": 4.292849814193501e-06, "loss": 0.585, "step": 3008 }, { "epoch": 3.682986536107711, "grad_norm": 1.0507644364337232, "learning_rate": 4.292403107452366e-06, "loss": 0.6214, "step": 3009 }, { "epoch": 3.6842105263157894, "grad_norm": 1.5369018449907004, "learning_rate": 4.291956282921129e-06, "loss": 0.4722, "step": 3010 }, { "epoch": 3.685434516523868, "grad_norm": 1.2410666920877502, "learning_rate": 4.291509340629152e-06, "loss": 0.9595, "step": 3011 }, { "epoch": 3.686658506731946, "grad_norm": 1.7307243833399197, "learning_rate": 4.291062280605809e-06, "loss": 0.5883, "step": 3012 }, { "epoch": 3.6878824969400243, "grad_norm": 1.1819155125883472, "learning_rate": 4.290615102880477e-06, "loss": 1.0576, "step": 3013 }, { "epoch": 3.689106487148103, "grad_norm": 0.9915969395053581, "learning_rate": 4.2901678074825435e-06, "loss": 0.5243, "step": 3014 }, { "epoch": 3.690330477356181, "grad_norm": 2.4031169187436237, "learning_rate": 4.289720394441403e-06, "loss": 0.561, "step": 3015 }, { "epoch": 3.6915544675642593, "grad_norm": 0.8862365813641767, "learning_rate": 4.289272863786458e-06, "loss": 0.5075, "step": 3016 }, { "epoch": 3.692778457772338, "grad_norm": 0.8976684364126012, "learning_rate": 4.288825215547119e-06, "loss": 0.5736, "step": 3017 }, { "epoch": 3.694002447980416, "grad_norm": 0.9581946222731119, "learning_rate": 4.2883774497528015e-06, "loss": 0.4721, "step": 3018 }, { "epoch": 3.6952264381884943, "grad_norm": 1.823189190426306, "learning_rate": 4.287929566432935e-06, "loss": 0.4284, "step": 3019 }, { "epoch": 3.696450428396573, "grad_norm": 1.237808719279712, "learning_rate": 4.287481565616949e-06, "loss": 1.2473, "step": 3020 }, { "epoch": 3.697674418604651, "grad_norm": 1.3984379104864662, "learning_rate": 4.287033447334286e-06, "loss": 0.5109, "step": 3021 }, { "epoch": 3.6988984088127292, "grad_norm": 1.4322536349714843, "learning_rate": 4.286585211614396e-06, "loss": 0.5719, "step": 3022 }, { "epoch": 3.700122399020808, "grad_norm": 1.1723091285823055, "learning_rate": 4.286136858486732e-06, "loss": 1.4499, "step": 3023 }, { "epoch": 3.701346389228886, "grad_norm": 1.2395036197760911, "learning_rate": 4.285688387980762e-06, "loss": 0.5163, "step": 3024 }, { "epoch": 3.7025703794369647, "grad_norm": 1.0679567024286427, "learning_rate": 4.285239800125955e-06, "loss": 0.5721, "step": 3025 }, { "epoch": 3.703794369645043, "grad_norm": 0.8451870017028408, "learning_rate": 4.2847910949517916e-06, "loss": 0.6167, "step": 3026 }, { "epoch": 3.7050183598531214, "grad_norm": 1.2013410145122847, "learning_rate": 4.284342272487758e-06, "loss": 0.5995, "step": 3027 }, { "epoch": 3.7062423500611996, "grad_norm": 1.3689459113643188, "learning_rate": 4.283893332763352e-06, "loss": 0.8086, "step": 3028 }, { "epoch": 3.707466340269278, "grad_norm": 1.5005209860051842, "learning_rate": 4.283444275808073e-06, "loss": 1.0893, "step": 3029 }, { "epoch": 3.7086903304773564, "grad_norm": 1.830814939303419, "learning_rate": 4.282995101651433e-06, "loss": 0.8783, "step": 3030 }, { "epoch": 3.7099143206854346, "grad_norm": 1.0221729880332489, "learning_rate": 4.282545810322949e-06, "loss": 0.6164, "step": 3031 }, { "epoch": 3.7111383108935128, "grad_norm": 1.629024776985239, "learning_rate": 4.282096401852148e-06, "loss": 0.606, "step": 3032 }, { "epoch": 3.7123623011015914, "grad_norm": 0.9459133699899019, "learning_rate": 4.281646876268564e-06, "loss": 0.6673, "step": 3033 }, { "epoch": 3.7135862913096696, "grad_norm": 1.0718638885408114, "learning_rate": 4.281197233601736e-06, "loss": 0.5317, "step": 3034 }, { "epoch": 3.7148102815177477, "grad_norm": 2.0284136755809845, "learning_rate": 4.280747473881215e-06, "loss": 0.5971, "step": 3035 }, { "epoch": 3.7160342717258263, "grad_norm": 2.2875890473368616, "learning_rate": 4.280297597136556e-06, "loss": 0.8276, "step": 3036 }, { "epoch": 3.7172582619339045, "grad_norm": 1.2235306956113736, "learning_rate": 4.279847603397324e-06, "loss": 1.6525, "step": 3037 }, { "epoch": 3.7184822521419827, "grad_norm": 1.3195442949404703, "learning_rate": 4.27939749269309e-06, "loss": 0.9591, "step": 3038 }, { "epoch": 3.7197062423500613, "grad_norm": 1.4254870700214906, "learning_rate": 4.278947265053435e-06, "loss": 0.452, "step": 3039 }, { "epoch": 3.7209302325581395, "grad_norm": 1.088934581077748, "learning_rate": 4.278496920507945e-06, "loss": 0.9724, "step": 3040 }, { "epoch": 3.7221542227662177, "grad_norm": 1.129557276145332, "learning_rate": 4.278046459086216e-06, "loss": 1.3255, "step": 3041 }, { "epoch": 3.7233782129742963, "grad_norm": 1.6529284122332322, "learning_rate": 4.27759588081785e-06, "loss": 0.5384, "step": 3042 }, { "epoch": 3.7246022031823744, "grad_norm": 1.0272542785980685, "learning_rate": 4.277145185732458e-06, "loss": 0.6625, "step": 3043 }, { "epoch": 3.7258261933904526, "grad_norm": 0.8592320187266913, "learning_rate": 4.276694373859658e-06, "loss": 0.8152, "step": 3044 }, { "epoch": 3.7270501835985312, "grad_norm": 2.0377395768318363, "learning_rate": 4.276243445229074e-06, "loss": 0.6592, "step": 3045 }, { "epoch": 3.7282741738066094, "grad_norm": 1.0158832393233896, "learning_rate": 4.275792399870341e-06, "loss": 0.7923, "step": 3046 }, { "epoch": 3.729498164014688, "grad_norm": 0.8688083603342553, "learning_rate": 4.275341237813101e-06, "loss": 0.7977, "step": 3047 }, { "epoch": 3.730722154222766, "grad_norm": 0.8619724944432827, "learning_rate": 4.274889959087e-06, "loss": 1.0084, "step": 3048 }, { "epoch": 3.731946144430845, "grad_norm": 1.1469365954927044, "learning_rate": 4.274438563721695e-06, "loss": 0.9517, "step": 3049 }, { "epoch": 3.733170134638923, "grad_norm": 1.5293733074472167, "learning_rate": 4.273987051746853e-06, "loss": 0.6111, "step": 3050 }, { "epoch": 3.734394124847001, "grad_norm": 0.9550143177735964, "learning_rate": 4.273535423192142e-06, "loss": 0.5328, "step": 3051 }, { "epoch": 3.73561811505508, "grad_norm": 0.9252723806560965, "learning_rate": 4.273083678087243e-06, "loss": 0.798, "step": 3052 }, { "epoch": 3.736842105263158, "grad_norm": 1.6647000624629482, "learning_rate": 4.272631816461844e-06, "loss": 0.6605, "step": 3053 }, { "epoch": 3.738066095471236, "grad_norm": 1.3398399532864167, "learning_rate": 4.272179838345637e-06, "loss": 0.7667, "step": 3054 }, { "epoch": 3.7392900856793148, "grad_norm": 1.180749418641135, "learning_rate": 4.271727743768327e-06, "loss": 1.4093, "step": 3055 }, { "epoch": 3.740514075887393, "grad_norm": 1.2640635795032305, "learning_rate": 4.271275532759622e-06, "loss": 1.06, "step": 3056 }, { "epoch": 3.741738066095471, "grad_norm": 1.4668980873682436, "learning_rate": 4.27082320534924e-06, "loss": 1.1654, "step": 3057 }, { "epoch": 3.7429620563035497, "grad_norm": 0.882758695745506, "learning_rate": 4.270370761566909e-06, "loss": 0.4766, "step": 3058 }, { "epoch": 3.744186046511628, "grad_norm": 0.9396474940773939, "learning_rate": 4.269918201442358e-06, "loss": 0.743, "step": 3059 }, { "epoch": 3.745410036719706, "grad_norm": 1.0924411539815455, "learning_rate": 4.269465525005329e-06, "loss": 0.7783, "step": 3060 }, { "epoch": 3.7466340269277847, "grad_norm": 1.1767176894574323, "learning_rate": 4.269012732285571e-06, "loss": 1.6911, "step": 3061 }, { "epoch": 3.747858017135863, "grad_norm": 1.3303880340737395, "learning_rate": 4.26855982331284e-06, "loss": 1.9366, "step": 3062 }, { "epoch": 3.749082007343941, "grad_norm": 1.885643727072353, "learning_rate": 4.268106798116898e-06, "loss": 0.5759, "step": 3063 }, { "epoch": 3.7503059975520197, "grad_norm": 0.7288302125805673, "learning_rate": 4.267653656727518e-06, "loss": 0.6372, "step": 3064 }, { "epoch": 3.751529987760098, "grad_norm": 1.3233072866521771, "learning_rate": 4.267200399174477e-06, "loss": 0.5376, "step": 3065 }, { "epoch": 3.752753977968176, "grad_norm": 1.4556186154693775, "learning_rate": 4.266747025487562e-06, "loss": 0.4335, "step": 3066 }, { "epoch": 3.7539779681762546, "grad_norm": 1.1174895294330383, "learning_rate": 4.266293535696567e-06, "loss": 0.4233, "step": 3067 }, { "epoch": 3.755201958384333, "grad_norm": 1.562050801725391, "learning_rate": 4.2658399298312944e-06, "loss": 0.5354, "step": 3068 }, { "epoch": 3.7564259485924114, "grad_norm": 1.2175490096354604, "learning_rate": 4.2653862079215535e-06, "loss": 0.5298, "step": 3069 }, { "epoch": 3.7576499388004896, "grad_norm": 0.7783272560943433, "learning_rate": 4.26493236999716e-06, "loss": 0.4769, "step": 3070 }, { "epoch": 3.758873929008568, "grad_norm": 0.7801920692318581, "learning_rate": 4.264478416087939e-06, "loss": 0.6477, "step": 3071 }, { "epoch": 3.7600979192166464, "grad_norm": 1.2814011569091337, "learning_rate": 4.2640243462237245e-06, "loss": 0.5737, "step": 3072 }, { "epoch": 3.7613219094247246, "grad_norm": 1.2153143816438317, "learning_rate": 4.263570160434353e-06, "loss": 0.4961, "step": 3073 }, { "epoch": 3.762545899632803, "grad_norm": 1.5431838033877359, "learning_rate": 4.263115858749674e-06, "loss": 0.3468, "step": 3074 }, { "epoch": 3.7637698898408813, "grad_norm": 1.0150682561403608, "learning_rate": 4.2626614411995415e-06, "loss": 0.6306, "step": 3075 }, { "epoch": 3.7649938800489595, "grad_norm": 1.2765706301592676, "learning_rate": 4.262206907813819e-06, "loss": 0.6041, "step": 3076 }, { "epoch": 3.766217870257038, "grad_norm": 1.0389265367966636, "learning_rate": 4.261752258622377e-06, "loss": 1.0424, "step": 3077 }, { "epoch": 3.7674418604651163, "grad_norm": 1.361885743217014, "learning_rate": 4.261297493655092e-06, "loss": 0.6654, "step": 3078 }, { "epoch": 3.7686658506731945, "grad_norm": 1.4222532538635666, "learning_rate": 4.2608426129418514e-06, "loss": 0.7221, "step": 3079 }, { "epoch": 3.769889840881273, "grad_norm": 1.4784488106534912, "learning_rate": 4.260387616512547e-06, "loss": 0.6095, "step": 3080 }, { "epoch": 3.7711138310893513, "grad_norm": 1.0970894025974804, "learning_rate": 4.259932504397079e-06, "loss": 0.604, "step": 3081 }, { "epoch": 3.7723378212974294, "grad_norm": 2.214317374279887, "learning_rate": 4.259477276625357e-06, "loss": 0.5164, "step": 3082 }, { "epoch": 3.773561811505508, "grad_norm": 1.0525066742799187, "learning_rate": 4.259021933227295e-06, "loss": 0.572, "step": 3083 }, { "epoch": 3.7747858017135862, "grad_norm": 0.963528684407431, "learning_rate": 4.258566474232819e-06, "loss": 0.3119, "step": 3084 }, { "epoch": 3.7760097919216644, "grad_norm": 0.964290830606418, "learning_rate": 4.258110899671859e-06, "loss": 0.9058, "step": 3085 }, { "epoch": 3.777233782129743, "grad_norm": 0.761026398621415, "learning_rate": 4.257655209574352e-06, "loss": 0.5298, "step": 3086 }, { "epoch": 3.778457772337821, "grad_norm": 1.3548740377736872, "learning_rate": 4.257199403970247e-06, "loss": 1.0107, "step": 3087 }, { "epoch": 3.7796817625458994, "grad_norm": 1.5326219180775327, "learning_rate": 4.256743482889496e-06, "loss": 0.5027, "step": 3088 }, { "epoch": 3.780905752753978, "grad_norm": 1.7836578886929766, "learning_rate": 4.2562874463620615e-06, "loss": 1.0515, "step": 3089 }, { "epoch": 3.782129742962056, "grad_norm": 1.1636265243329595, "learning_rate": 4.255831294417912e-06, "loss": 0.9001, "step": 3090 }, { "epoch": 3.783353733170135, "grad_norm": 1.083848110618823, "learning_rate": 4.255375027087023e-06, "loss": 0.9743, "step": 3091 }, { "epoch": 3.784577723378213, "grad_norm": 1.1119584675723793, "learning_rate": 4.254918644399382e-06, "loss": 0.4747, "step": 3092 }, { "epoch": 3.7858017135862916, "grad_norm": 0.9505385868683223, "learning_rate": 4.2544621463849775e-06, "loss": 0.911, "step": 3093 }, { "epoch": 3.7870257037943698, "grad_norm": 0.7841506511896073, "learning_rate": 4.25400553307381e-06, "loss": 0.4816, "step": 3094 }, { "epoch": 3.788249694002448, "grad_norm": 0.8677801736542136, "learning_rate": 4.253548804495887e-06, "loss": 0.8706, "step": 3095 }, { "epoch": 3.7894736842105265, "grad_norm": 1.020093948489314, "learning_rate": 4.253091960681222e-06, "loss": 0.7422, "step": 3096 }, { "epoch": 3.7906976744186047, "grad_norm": 1.7951254514316166, "learning_rate": 4.252635001659838e-06, "loss": 0.4261, "step": 3097 }, { "epoch": 3.791921664626683, "grad_norm": 1.3405092704348531, "learning_rate": 4.252177927461763e-06, "loss": 0.4152, "step": 3098 }, { "epoch": 3.7931456548347615, "grad_norm": 1.829914946716124, "learning_rate": 4.251720738117037e-06, "loss": 0.4441, "step": 3099 }, { "epoch": 3.7943696450428397, "grad_norm": 1.0538893061229253, "learning_rate": 4.251263433655703e-06, "loss": 0.4538, "step": 3100 }, { "epoch": 3.795593635250918, "grad_norm": 1.7527708046737387, "learning_rate": 4.250806014107814e-06, "loss": 0.4698, "step": 3101 }, { "epoch": 3.7968176254589965, "grad_norm": 0.9086131355017543, "learning_rate": 4.250348479503428e-06, "loss": 0.4044, "step": 3102 }, { "epoch": 3.7980416156670747, "grad_norm": 1.177181158441611, "learning_rate": 4.249890829872615e-06, "loss": 1.034, "step": 3103 }, { "epoch": 3.799265605875153, "grad_norm": 1.7735560975448976, "learning_rate": 4.2494330652454485e-06, "loss": 0.5568, "step": 3104 }, { "epoch": 3.8004895960832314, "grad_norm": 1.2578239314121873, "learning_rate": 4.2489751856520125e-06, "loss": 0.5821, "step": 3105 }, { "epoch": 3.8017135862913096, "grad_norm": 0.9602259097736453, "learning_rate": 4.2485171911223965e-06, "loss": 0.7326, "step": 3106 }, { "epoch": 3.802937576499388, "grad_norm": 1.1038696015863398, "learning_rate": 4.2480590816866965e-06, "loss": 0.6491, "step": 3107 }, { "epoch": 3.8041615667074664, "grad_norm": 1.3982127460680442, "learning_rate": 4.24760085737502e-06, "loss": 0.7038, "step": 3108 }, { "epoch": 3.8053855569155446, "grad_norm": 1.6506495824943868, "learning_rate": 4.24714251821748e-06, "loss": 0.9469, "step": 3109 }, { "epoch": 3.8066095471236228, "grad_norm": 1.1975723789433264, "learning_rate": 4.2466840642441955e-06, "loss": 1.2197, "step": 3110 }, { "epoch": 3.8078335373317014, "grad_norm": 0.753948318331713, "learning_rate": 4.246225495485295e-06, "loss": 0.674, "step": 3111 }, { "epoch": 3.8090575275397796, "grad_norm": 1.3556576255140418, "learning_rate": 4.2457668119709124e-06, "loss": 0.6503, "step": 3112 }, { "epoch": 3.810281517747858, "grad_norm": 1.0101317078638816, "learning_rate": 4.245308013731193e-06, "loss": 0.6075, "step": 3113 }, { "epoch": 3.8115055079559363, "grad_norm": 1.908787870212063, "learning_rate": 4.244849100796287e-06, "loss": 1.1205, "step": 3114 }, { "epoch": 3.812729498164015, "grad_norm": 1.7264420296590912, "learning_rate": 4.24439007319635e-06, "loss": 0.5513, "step": 3115 }, { "epoch": 3.813953488372093, "grad_norm": 0.9618130130738143, "learning_rate": 4.243930930961551e-06, "loss": 0.6892, "step": 3116 }, { "epoch": 3.8151774785801713, "grad_norm": 1.358823937361697, "learning_rate": 4.243471674122061e-06, "loss": 1.1077, "step": 3117 }, { "epoch": 3.81640146878825, "grad_norm": 1.5417259239758323, "learning_rate": 4.243012302708063e-06, "loss": 0.5445, "step": 3118 }, { "epoch": 3.817625458996328, "grad_norm": 1.4080700278047578, "learning_rate": 4.242552816749741e-06, "loss": 1.1848, "step": 3119 }, { "epoch": 3.8188494492044063, "grad_norm": 1.2068833936777243, "learning_rate": 4.2420932162772936e-06, "loss": 0.5814, "step": 3120 }, { "epoch": 3.820073439412485, "grad_norm": 0.7787808034169793, "learning_rate": 4.241633501320924e-06, "loss": 0.5172, "step": 3121 }, { "epoch": 3.821297429620563, "grad_norm": 1.2689828702160677, "learning_rate": 4.241173671910843e-06, "loss": 0.53, "step": 3122 }, { "epoch": 3.8225214198286412, "grad_norm": 1.2070129118367148, "learning_rate": 4.240713728077268e-06, "loss": 0.5065, "step": 3123 }, { "epoch": 3.82374541003672, "grad_norm": 1.4371564958289553, "learning_rate": 4.240253669850425e-06, "loss": 1.1711, "step": 3124 }, { "epoch": 3.824969400244798, "grad_norm": 1.1051012433317404, "learning_rate": 4.239793497260548e-06, "loss": 0.6964, "step": 3125 }, { "epoch": 3.826193390452876, "grad_norm": 1.3210718792508302, "learning_rate": 4.239333210337877e-06, "loss": 0.7952, "step": 3126 }, { "epoch": 3.827417380660955, "grad_norm": 0.5405088512397118, "learning_rate": 4.2388728091126614e-06, "loss": 0.373, "step": 3127 }, { "epoch": 3.828641370869033, "grad_norm": 0.717990764227263, "learning_rate": 4.238412293615156e-06, "loss": 0.3954, "step": 3128 }, { "epoch": 3.829865361077111, "grad_norm": 1.567787365938041, "learning_rate": 4.237951663875625e-06, "loss": 1.4716, "step": 3129 }, { "epoch": 3.83108935128519, "grad_norm": 1.5672471837473472, "learning_rate": 4.237490919924338e-06, "loss": 0.4631, "step": 3130 }, { "epoch": 3.832313341493268, "grad_norm": 2.3391346785965244, "learning_rate": 4.237030061791575e-06, "loss": 0.5777, "step": 3131 }, { "epoch": 3.833537331701346, "grad_norm": 0.3845916090422017, "learning_rate": 4.23656908950762e-06, "loss": 0.1368, "step": 3132 }, { "epoch": 3.8347613219094248, "grad_norm": 1.2990141857626685, "learning_rate": 4.236108003102769e-06, "loss": 0.4642, "step": 3133 }, { "epoch": 3.835985312117503, "grad_norm": 1.3514626104621354, "learning_rate": 4.235646802607322e-06, "loss": 0.814, "step": 3134 }, { "epoch": 3.8372093023255816, "grad_norm": 0.8802330104009451, "learning_rate": 4.2351854880515856e-06, "loss": 0.7873, "step": 3135 }, { "epoch": 3.8384332925336597, "grad_norm": 1.1750498347401908, "learning_rate": 4.234724059465877e-06, "loss": 1.4327, "step": 3136 }, { "epoch": 3.8396572827417383, "grad_norm": 0.8456302170261832, "learning_rate": 4.23426251688052e-06, "loss": 0.4248, "step": 3137 }, { "epoch": 3.8408812729498165, "grad_norm": 0.9157836093366463, "learning_rate": 4.2338008603258444e-06, "loss": 0.5951, "step": 3138 }, { "epoch": 3.8421052631578947, "grad_norm": 0.7475062672582033, "learning_rate": 4.233339089832189e-06, "loss": 0.6687, "step": 3139 }, { "epoch": 3.8433292533659733, "grad_norm": 1.478957017449317, "learning_rate": 4.2328772054299005e-06, "loss": 1.3669, "step": 3140 }, { "epoch": 3.8445532435740515, "grad_norm": 1.1331979927799298, "learning_rate": 4.23241520714933e-06, "loss": 1.662, "step": 3141 }, { "epoch": 3.8457772337821297, "grad_norm": 1.6812337643420379, "learning_rate": 4.231953095020842e-06, "loss": 0.5876, "step": 3142 }, { "epoch": 3.8470012239902083, "grad_norm": 1.0211685297716826, "learning_rate": 4.231490869074802e-06, "loss": 0.4382, "step": 3143 }, { "epoch": 3.8482252141982864, "grad_norm": 1.5702306695523027, "learning_rate": 4.231028529341586e-06, "loss": 0.5609, "step": 3144 }, { "epoch": 3.8494492044063646, "grad_norm": 1.2739911686144465, "learning_rate": 4.230566075851578e-06, "loss": 0.5376, "step": 3145 }, { "epoch": 3.8506731946144432, "grad_norm": 1.3354310663560005, "learning_rate": 4.230103508635168e-06, "loss": 0.502, "step": 3146 }, { "epoch": 3.8518971848225214, "grad_norm": 1.3241972018157122, "learning_rate": 4.229640827722755e-06, "loss": 1.2592, "step": 3147 }, { "epoch": 3.8531211750305996, "grad_norm": 1.1432563835963918, "learning_rate": 4.229178033144744e-06, "loss": 0.4334, "step": 3148 }, { "epoch": 3.854345165238678, "grad_norm": 1.1284918225019118, "learning_rate": 4.228715124931548e-06, "loss": 0.8603, "step": 3149 }, { "epoch": 3.8555691554467564, "grad_norm": 2.0180471952193333, "learning_rate": 4.228252103113589e-06, "loss": 0.3848, "step": 3150 }, { "epoch": 3.8567931456548346, "grad_norm": 1.0328969637824374, "learning_rate": 4.227788967721294e-06, "loss": 0.8353, "step": 3151 }, { "epoch": 3.858017135862913, "grad_norm": 1.0923580492371523, "learning_rate": 4.227325718785098e-06, "loss": 0.737, "step": 3152 }, { "epoch": 3.8592411260709913, "grad_norm": 1.105930994172851, "learning_rate": 4.226862356335446e-06, "loss": 0.514, "step": 3153 }, { "epoch": 3.8604651162790695, "grad_norm": 0.976452990347767, "learning_rate": 4.226398880402786e-06, "loss": 0.6547, "step": 3154 }, { "epoch": 3.861689106487148, "grad_norm": 1.5399346960831135, "learning_rate": 4.2259352910175775e-06, "loss": 0.5066, "step": 3155 }, { "epoch": 3.8629130966952263, "grad_norm": 1.1738658945334446, "learning_rate": 4.2254715882102856e-06, "loss": 0.5902, "step": 3156 }, { "epoch": 3.864137086903305, "grad_norm": 1.367576524203623, "learning_rate": 4.2250077720113825e-06, "loss": 1.0509, "step": 3157 }, { "epoch": 3.865361077111383, "grad_norm": 1.119792873902574, "learning_rate": 4.2245438424513495e-06, "loss": 0.7073, "step": 3158 }, { "epoch": 3.8665850673194617, "grad_norm": 2.213799256156493, "learning_rate": 4.224079799560674e-06, "loss": 0.4268, "step": 3159 }, { "epoch": 3.86780905752754, "grad_norm": 1.2629838174437327, "learning_rate": 4.223615643369851e-06, "loss": 0.7086, "step": 3160 }, { "epoch": 3.869033047735618, "grad_norm": 0.9821557084779523, "learning_rate": 4.223151373909382e-06, "loss": 0.5463, "step": 3161 }, { "epoch": 3.8702570379436967, "grad_norm": 1.7348009311485701, "learning_rate": 4.222686991209779e-06, "loss": 0.4392, "step": 3162 }, { "epoch": 3.871481028151775, "grad_norm": 1.514410227098721, "learning_rate": 4.222222495301559e-06, "loss": 0.6918, "step": 3163 }, { "epoch": 3.872705018359853, "grad_norm": 0.9509436111495124, "learning_rate": 4.221757886215246e-06, "loss": 1.1195, "step": 3164 }, { "epoch": 3.8739290085679317, "grad_norm": 1.340753333376802, "learning_rate": 4.221293163981374e-06, "loss": 0.6885, "step": 3165 }, { "epoch": 3.87515299877601, "grad_norm": 1.6744785630659382, "learning_rate": 4.220828328630481e-06, "loss": 0.6695, "step": 3166 }, { "epoch": 3.876376988984088, "grad_norm": 1.280906820337949, "learning_rate": 4.220363380193115e-06, "loss": 0.4909, "step": 3167 }, { "epoch": 3.8776009791921666, "grad_norm": 1.407097955705677, "learning_rate": 4.219898318699831e-06, "loss": 0.4076, "step": 3168 }, { "epoch": 3.878824969400245, "grad_norm": 1.431948580842852, "learning_rate": 4.219433144181192e-06, "loss": 0.5589, "step": 3169 }, { "epoch": 3.880048959608323, "grad_norm": 1.1979153957375552, "learning_rate": 4.218967856667765e-06, "loss": 0.5197, "step": 3170 }, { "epoch": 3.8812729498164016, "grad_norm": 1.9553529273094514, "learning_rate": 4.21850245619013e-06, "loss": 0.4459, "step": 3171 }, { "epoch": 3.8824969400244798, "grad_norm": 0.5355398571508138, "learning_rate": 4.218036942778869e-06, "loss": 0.2704, "step": 3172 }, { "epoch": 3.883720930232558, "grad_norm": 1.1589045499130541, "learning_rate": 4.2175713164645745e-06, "loss": 0.6624, "step": 3173 }, { "epoch": 3.8849449204406366, "grad_norm": 1.2620656594994981, "learning_rate": 4.217105577277846e-06, "loss": 1.2049, "step": 3174 }, { "epoch": 3.8861689106487147, "grad_norm": 0.7687579689758555, "learning_rate": 4.216639725249291e-06, "loss": 0.6533, "step": 3175 }, { "epoch": 3.887392900856793, "grad_norm": 0.661068762510923, "learning_rate": 4.216173760409522e-06, "loss": 0.448, "step": 3176 }, { "epoch": 3.8886168910648715, "grad_norm": 0.6665320758772337, "learning_rate": 4.215707682789161e-06, "loss": 0.4667, "step": 3177 }, { "epoch": 3.8898408812729497, "grad_norm": 0.9241958251974215, "learning_rate": 4.215241492418837e-06, "loss": 0.474, "step": 3178 }, { "epoch": 3.8910648714810283, "grad_norm": 0.9987072549469569, "learning_rate": 4.214775189329186e-06, "loss": 0.8876, "step": 3179 }, { "epoch": 3.8922888616891065, "grad_norm": 0.9681594837587075, "learning_rate": 4.2143087735508535e-06, "loss": 0.8065, "step": 3180 }, { "epoch": 3.8935128518971847, "grad_norm": 0.898471855109452, "learning_rate": 4.2138422451144876e-06, "loss": 0.6737, "step": 3181 }, { "epoch": 3.8947368421052633, "grad_norm": 0.8988801462161548, "learning_rate": 4.21337560405075e-06, "loss": 0.4065, "step": 3182 }, { "epoch": 3.8959608323133414, "grad_norm": 1.807199486234844, "learning_rate": 4.212908850390304e-06, "loss": 1.3428, "step": 3183 }, { "epoch": 3.89718482252142, "grad_norm": 1.2205371714383537, "learning_rate": 4.212441984163824e-06, "loss": 1.0562, "step": 3184 }, { "epoch": 3.8984088127294982, "grad_norm": 0.7926346251388762, "learning_rate": 4.21197500540199e-06, "loss": 0.5134, "step": 3185 }, { "epoch": 3.8996328029375764, "grad_norm": 1.3974985082184015, "learning_rate": 4.211507914135492e-06, "loss": 0.7404, "step": 3186 }, { "epoch": 3.900856793145655, "grad_norm": 1.1849694837272244, "learning_rate": 4.2110407103950245e-06, "loss": 0.635, "step": 3187 }, { "epoch": 3.902080783353733, "grad_norm": 1.6772296568516902, "learning_rate": 4.21057339421129e-06, "loss": 1.0827, "step": 3188 }, { "epoch": 3.9033047735618114, "grad_norm": 1.195017656786451, "learning_rate": 4.210105965614998e-06, "loss": 1.14, "step": 3189 }, { "epoch": 3.90452876376989, "grad_norm": 1.1356266190465811, "learning_rate": 4.209638424636868e-06, "loss": 1.5805, "step": 3190 }, { "epoch": 3.905752753977968, "grad_norm": 1.9962716068031943, "learning_rate": 4.209170771307625e-06, "loss": 0.7587, "step": 3191 }, { "epoch": 3.9069767441860463, "grad_norm": 0.7672433662199944, "learning_rate": 4.208703005658e-06, "loss": 0.5909, "step": 3192 }, { "epoch": 3.908200734394125, "grad_norm": 1.0784443485965085, "learning_rate": 4.208235127718732e-06, "loss": 1.0902, "step": 3193 }, { "epoch": 3.909424724602203, "grad_norm": 1.3446926217017363, "learning_rate": 4.2077671375205715e-06, "loss": 0.7851, "step": 3194 }, { "epoch": 3.9106487148102813, "grad_norm": 1.4773187909919343, "learning_rate": 4.207299035094271e-06, "loss": 0.4784, "step": 3195 }, { "epoch": 3.91187270501836, "grad_norm": 1.1834761266468163, "learning_rate": 4.206830820470592e-06, "loss": 1.407, "step": 3196 }, { "epoch": 3.913096695226438, "grad_norm": 1.2889579712977055, "learning_rate": 4.206362493680306e-06, "loss": 1.2189, "step": 3197 }, { "epoch": 3.9143206854345163, "grad_norm": 1.3823076304267339, "learning_rate": 4.205894054754187e-06, "loss": 0.6096, "step": 3198 }, { "epoch": 3.915544675642595, "grad_norm": 1.3388542864008641, "learning_rate": 4.205425503723022e-06, "loss": 0.623, "step": 3199 }, { "epoch": 3.916768665850673, "grad_norm": 1.9933160584058467, "learning_rate": 4.2049568406175985e-06, "loss": 0.3982, "step": 3200 }, { "epoch": 3.9179926560587517, "grad_norm": 1.4292677960063531, "learning_rate": 4.20448806546872e-06, "loss": 0.6332, "step": 3201 }, { "epoch": 3.91921664626683, "grad_norm": 1.2451508826608653, "learning_rate": 4.204019178307188e-06, "loss": 1.1307, "step": 3202 }, { "epoch": 3.920440636474908, "grad_norm": 0.7806678914790082, "learning_rate": 4.20355017916382e-06, "loss": 0.6127, "step": 3203 }, { "epoch": 3.9216646266829867, "grad_norm": 1.1209116953406848, "learning_rate": 4.203081068069435e-06, "loss": 0.532, "step": 3204 }, { "epoch": 3.922888616891065, "grad_norm": 1.9833456060015826, "learning_rate": 4.202611845054861e-06, "loss": 0.6053, "step": 3205 }, { "epoch": 3.9241126070991434, "grad_norm": 1.3728766338253626, "learning_rate": 4.202142510150935e-06, "loss": 1.0235, "step": 3206 }, { "epoch": 3.9253365973072216, "grad_norm": 1.2202641772600984, "learning_rate": 4.201673063388498e-06, "loss": 0.4853, "step": 3207 }, { "epoch": 3.9265605875153, "grad_norm": 1.454628686766218, "learning_rate": 4.201203504798402e-06, "loss": 0.4859, "step": 3208 }, { "epoch": 3.9277845777233784, "grad_norm": 1.2449189979011324, "learning_rate": 4.200733834411503e-06, "loss": 0.5417, "step": 3209 }, { "epoch": 3.9290085679314566, "grad_norm": 0.9560082834304796, "learning_rate": 4.200264052258668e-06, "loss": 0.5944, "step": 3210 }, { "epoch": 3.9302325581395348, "grad_norm": 1.20536919265455, "learning_rate": 4.199794158370768e-06, "loss": 0.5717, "step": 3211 }, { "epoch": 3.9314565483476134, "grad_norm": 0.8158952700425665, "learning_rate": 4.1993241527786834e-06, "loss": 0.4123, "step": 3212 }, { "epoch": 3.9326805385556916, "grad_norm": 1.7207086748333111, "learning_rate": 4.1988540355133e-06, "loss": 0.3432, "step": 3213 }, { "epoch": 3.9339045287637697, "grad_norm": 0.988334551635925, "learning_rate": 4.198383806605514e-06, "loss": 0.6539, "step": 3214 }, { "epoch": 3.9351285189718483, "grad_norm": 1.856504578698825, "learning_rate": 4.197913466086224e-06, "loss": 0.5198, "step": 3215 }, { "epoch": 3.9363525091799265, "grad_norm": 0.8562463897578837, "learning_rate": 4.1974430139863435e-06, "loss": 0.6885, "step": 3216 }, { "epoch": 3.9375764993880047, "grad_norm": 1.1486449518367934, "learning_rate": 4.196972450336785e-06, "loss": 0.5681, "step": 3217 }, { "epoch": 3.9388004895960833, "grad_norm": 1.4599689079155813, "learning_rate": 4.196501775168475e-06, "loss": 0.886, "step": 3218 }, { "epoch": 3.9400244798041615, "grad_norm": 1.6047891731848427, "learning_rate": 4.196030988512343e-06, "loss": 0.8781, "step": 3219 }, { "epoch": 3.9412484700122397, "grad_norm": 1.1762603177850568, "learning_rate": 4.195560090399327e-06, "loss": 1.5444, "step": 3220 }, { "epoch": 3.9424724602203183, "grad_norm": 1.1126784995863812, "learning_rate": 4.195089080860374e-06, "loss": 0.4836, "step": 3221 }, { "epoch": 3.9436964504283964, "grad_norm": 1.5700165386306952, "learning_rate": 4.194617959926436e-06, "loss": 1.1205, "step": 3222 }, { "epoch": 3.944920440636475, "grad_norm": 0.9632983215117739, "learning_rate": 4.194146727628473e-06, "loss": 0.6148, "step": 3223 }, { "epoch": 3.9461444308445532, "grad_norm": 1.3082754592729957, "learning_rate": 4.1936753839974545e-06, "loss": 0.3689, "step": 3224 }, { "epoch": 3.9473684210526314, "grad_norm": 1.1079029818083657, "learning_rate": 4.1932039290643534e-06, "loss": 0.4782, "step": 3225 }, { "epoch": 3.94859241126071, "grad_norm": 1.0854158021281397, "learning_rate": 4.192732362860153e-06, "loss": 1.1578, "step": 3226 }, { "epoch": 3.949816401468788, "grad_norm": 1.150796134126557, "learning_rate": 4.1922606854158424e-06, "loss": 0.5303, "step": 3227 }, { "epoch": 3.951040391676867, "grad_norm": 1.0212608149482156, "learning_rate": 4.191788896762418e-06, "loss": 0.6171, "step": 3228 }, { "epoch": 3.952264381884945, "grad_norm": 1.0903885728486087, "learning_rate": 4.191316996930887e-06, "loss": 0.5023, "step": 3229 }, { "epoch": 3.953488372093023, "grad_norm": 1.0472816231174875, "learning_rate": 4.190844985952257e-06, "loss": 0.6424, "step": 3230 }, { "epoch": 3.954712362301102, "grad_norm": 0.9967431749239684, "learning_rate": 4.19037286385755e-06, "loss": 0.6363, "step": 3231 }, { "epoch": 3.95593635250918, "grad_norm": 1.1020261350821574, "learning_rate": 4.189900630677789e-06, "loss": 1.2871, "step": 3232 }, { "epoch": 3.957160342717258, "grad_norm": 1.0643312098170592, "learning_rate": 4.189428286444011e-06, "loss": 0.5426, "step": 3233 }, { "epoch": 3.9583843329253368, "grad_norm": 1.2425866321323995, "learning_rate": 4.188955831187253e-06, "loss": 0.512, "step": 3234 }, { "epoch": 3.959608323133415, "grad_norm": 2.1945178517922135, "learning_rate": 4.188483264938565e-06, "loss": 0.5272, "step": 3235 }, { "epoch": 3.960832313341493, "grad_norm": 0.9538433517694324, "learning_rate": 4.188010587729003e-06, "loss": 0.6053, "step": 3236 }, { "epoch": 3.9620563035495717, "grad_norm": 1.1130150621312103, "learning_rate": 4.187537799589628e-06, "loss": 0.5045, "step": 3237 }, { "epoch": 3.96328029375765, "grad_norm": 1.3838510638464083, "learning_rate": 4.187064900551511e-06, "loss": 0.6629, "step": 3238 }, { "epoch": 3.964504283965728, "grad_norm": 1.7891048767835043, "learning_rate": 4.186591890645728e-06, "loss": 0.7489, "step": 3239 }, { "epoch": 3.9657282741738067, "grad_norm": 0.9734314499524992, "learning_rate": 4.186118769903365e-06, "loss": 0.7779, "step": 3240 }, { "epoch": 3.966952264381885, "grad_norm": 1.1481597072537855, "learning_rate": 4.1856455383555124e-06, "loss": 0.7392, "step": 3241 }, { "epoch": 3.968176254589963, "grad_norm": 0.9156432422316886, "learning_rate": 4.18517219603327e-06, "loss": 0.8609, "step": 3242 }, { "epoch": 3.9694002447980417, "grad_norm": 0.8885352588113989, "learning_rate": 4.184698742967744e-06, "loss": 0.5541, "step": 3243 }, { "epoch": 3.97062423500612, "grad_norm": 1.1206194142053714, "learning_rate": 4.184225179190048e-06, "loss": 0.7709, "step": 3244 }, { "epoch": 3.9718482252141984, "grad_norm": 1.3753277018746342, "learning_rate": 4.183751504731303e-06, "loss": 0.6057, "step": 3245 }, { "epoch": 3.9730722154222766, "grad_norm": 1.543549493410082, "learning_rate": 4.183277719622636e-06, "loss": 0.7684, "step": 3246 }, { "epoch": 3.974296205630355, "grad_norm": 0.8460413132124633, "learning_rate": 4.182803823895184e-06, "loss": 0.5528, "step": 3247 }, { "epoch": 3.9755201958384334, "grad_norm": 1.054590519211611, "learning_rate": 4.182329817580089e-06, "loss": 1.3167, "step": 3248 }, { "epoch": 3.9767441860465116, "grad_norm": 0.6074308893167876, "learning_rate": 4.1818557007085e-06, "loss": 0.369, "step": 3249 }, { "epoch": 3.97796817625459, "grad_norm": 1.998817616009825, "learning_rate": 4.181381473311576e-06, "loss": 0.653, "step": 3250 }, { "epoch": 3.9791921664626684, "grad_norm": 1.1799043000883938, "learning_rate": 4.18090713542048e-06, "loss": 0.483, "step": 3251 }, { "epoch": 3.9804161566707466, "grad_norm": 0.6562459140235288, "learning_rate": 4.180432687066385e-06, "loss": 0.5185, "step": 3252 }, { "epoch": 3.981640146878825, "grad_norm": 1.6364731069240495, "learning_rate": 4.179958128280469e-06, "loss": 0.6471, "step": 3253 }, { "epoch": 3.9828641370869033, "grad_norm": 1.6597254854878378, "learning_rate": 4.179483459093918e-06, "loss": 0.5193, "step": 3254 }, { "epoch": 3.9840881272949815, "grad_norm": 1.183082950259907, "learning_rate": 4.179008679537927e-06, "loss": 0.3555, "step": 3255 }, { "epoch": 3.98531211750306, "grad_norm": 0.9294448612860027, "learning_rate": 4.1785337896436954e-06, "loss": 0.3776, "step": 3256 }, { "epoch": 3.9865361077111383, "grad_norm": 1.4145898583411525, "learning_rate": 4.178058789442431e-06, "loss": 1.046, "step": 3257 }, { "epoch": 3.9877600979192165, "grad_norm": 1.2453044262060387, "learning_rate": 4.17758367896535e-06, "loss": 0.509, "step": 3258 }, { "epoch": 3.988984088127295, "grad_norm": 1.0859080839856812, "learning_rate": 4.177108458243675e-06, "loss": 1.3496, "step": 3259 }, { "epoch": 3.9902080783353733, "grad_norm": 2.0012676872111226, "learning_rate": 4.1766331273086355e-06, "loss": 0.4217, "step": 3260 }, { "epoch": 3.9914320685434515, "grad_norm": 0.9975300610013733, "learning_rate": 4.1761576861914675e-06, "loss": 0.637, "step": 3261 }, { "epoch": 3.99265605875153, "grad_norm": 0.8582768205077455, "learning_rate": 4.175682134923417e-06, "loss": 0.6088, "step": 3262 }, { "epoch": 3.9938800489596082, "grad_norm": 1.3989164235659188, "learning_rate": 4.1752064735357335e-06, "loss": 0.5652, "step": 3263 }, { "epoch": 3.9951040391676864, "grad_norm": 1.8692129181327695, "learning_rate": 4.1747307020596775e-06, "loss": 0.9654, "step": 3264 }, { "epoch": 3.996328029375765, "grad_norm": 1.0531002565008702, "learning_rate": 4.174254820526513e-06, "loss": 0.332, "step": 3265 }, { "epoch": 3.997552019583843, "grad_norm": 1.4434190850273085, "learning_rate": 4.173778828967515e-06, "loss": 0.7892, "step": 3266 }, { "epoch": 3.998776009791922, "grad_norm": 1.1899233105511415, "learning_rate": 4.173302727413963e-06, "loss": 0.6891, "step": 3267 }, { "epoch": 4.0, "grad_norm": 2.337153056515082, "learning_rate": 4.172826515897146e-06, "loss": 0.5197, "step": 3268 }, { "epoch": 4.001223990208079, "grad_norm": 1.0770682496225605, "learning_rate": 4.172350194448357e-06, "loss": 1.2424, "step": 3269 }, { "epoch": 4.002447980416156, "grad_norm": 0.7668975448193764, "learning_rate": 4.171873763098898e-06, "loss": 0.5314, "step": 3270 }, { "epoch": 4.003671970624235, "grad_norm": 0.7231309384458519, "learning_rate": 4.171397221880081e-06, "loss": 0.4413, "step": 3271 }, { "epoch": 4.004895960832314, "grad_norm": 0.8844410665672732, "learning_rate": 4.1709205708232195e-06, "loss": 0.8655, "step": 3272 }, { "epoch": 4.006119951040391, "grad_norm": 0.7040824748901879, "learning_rate": 4.170443809959638e-06, "loss": 0.5522, "step": 3273 }, { "epoch": 4.00734394124847, "grad_norm": 1.035018920890649, "learning_rate": 4.169966939320668e-06, "loss": 0.9305, "step": 3274 }, { "epoch": 4.0085679314565485, "grad_norm": 1.3547910207449279, "learning_rate": 4.169489958937648e-06, "loss": 0.6207, "step": 3275 }, { "epoch": 4.009791921664626, "grad_norm": 1.296706367308019, "learning_rate": 4.169012868841923e-06, "loss": 0.4518, "step": 3276 }, { "epoch": 4.011015911872705, "grad_norm": 0.9799395753157674, "learning_rate": 4.168535669064845e-06, "loss": 0.6466, "step": 3277 }, { "epoch": 4.0122399020807835, "grad_norm": 0.9831362619662715, "learning_rate": 4.168058359637775e-06, "loss": 0.5339, "step": 3278 }, { "epoch": 4.013463892288861, "grad_norm": 1.306770430794324, "learning_rate": 4.167580940592079e-06, "loss": 1.1229, "step": 3279 }, { "epoch": 4.01468788249694, "grad_norm": 1.6447021321167703, "learning_rate": 4.167103411959131e-06, "loss": 0.4751, "step": 3280 }, { "epoch": 4.0159118727050185, "grad_norm": 1.9017146324792293, "learning_rate": 4.166625773770314e-06, "loss": 0.4645, "step": 3281 }, { "epoch": 4.017135862913097, "grad_norm": 1.7414616995185106, "learning_rate": 4.166148026057014e-06, "loss": 0.6397, "step": 3282 }, { "epoch": 4.018359853121175, "grad_norm": 1.1133351967361695, "learning_rate": 4.1656701688506294e-06, "loss": 0.7075, "step": 3283 }, { "epoch": 4.0195838433292534, "grad_norm": 1.1917587436567887, "learning_rate": 4.165192202182563e-06, "loss": 0.5042, "step": 3284 }, { "epoch": 4.020807833537332, "grad_norm": 1.5181313013975306, "learning_rate": 4.164714126084223e-06, "loss": 0.7576, "step": 3285 }, { "epoch": 4.02203182374541, "grad_norm": 1.1233532461550566, "learning_rate": 4.164235940587029e-06, "loss": 0.4133, "step": 3286 }, { "epoch": 4.023255813953488, "grad_norm": 1.0653283514514762, "learning_rate": 4.163757645722403e-06, "loss": 1.4393, "step": 3287 }, { "epoch": 4.024479804161567, "grad_norm": 0.9800986218668981, "learning_rate": 4.163279241521779e-06, "loss": 0.4829, "step": 3288 }, { "epoch": 4.025703794369645, "grad_norm": 1.2482067350824637, "learning_rate": 4.162800728016596e-06, "loss": 0.7705, "step": 3289 }, { "epoch": 4.026927784577723, "grad_norm": 1.226151921127532, "learning_rate": 4.162322105238299e-06, "loss": 0.5877, "step": 3290 }, { "epoch": 4.028151774785802, "grad_norm": 0.8126059107480726, "learning_rate": 4.161843373218342e-06, "loss": 0.5718, "step": 3291 }, { "epoch": 4.02937576499388, "grad_norm": 1.232450665432227, "learning_rate": 4.161364531988185e-06, "loss": 0.4992, "step": 3292 }, { "epoch": 4.030599755201958, "grad_norm": 1.5638809795129538, "learning_rate": 4.1608855815792955e-06, "loss": 0.4745, "step": 3293 }, { "epoch": 4.031823745410037, "grad_norm": 1.1930217799431573, "learning_rate": 4.160406522023149e-06, "loss": 0.621, "step": 3294 }, { "epoch": 4.033047735618115, "grad_norm": 1.0652230139913195, "learning_rate": 4.1599273533512275e-06, "loss": 0.5234, "step": 3295 }, { "epoch": 4.034271725826193, "grad_norm": 0.9582027820409285, "learning_rate": 4.15944807559502e-06, "loss": 0.6783, "step": 3296 }, { "epoch": 4.035495716034272, "grad_norm": 0.9918953431957879, "learning_rate": 4.158968688786022e-06, "loss": 0.6542, "step": 3297 }, { "epoch": 4.03671970624235, "grad_norm": 1.2361849219416445, "learning_rate": 4.158489192955739e-06, "loss": 1.1945, "step": 3298 }, { "epoch": 4.037943696450428, "grad_norm": 1.3906176318514818, "learning_rate": 4.158009588135681e-06, "loss": 1.2236, "step": 3299 }, { "epoch": 4.039167686658507, "grad_norm": 1.463784422943233, "learning_rate": 4.157529874357364e-06, "loss": 1.3581, "step": 3300 }, { "epoch": 4.0403916768665855, "grad_norm": 2.9122546967199674, "learning_rate": 4.157050051652315e-06, "loss": 0.5205, "step": 3301 }, { "epoch": 4.041615667074663, "grad_norm": 2.2136999535922075, "learning_rate": 4.156570120052066e-06, "loss": 0.4829, "step": 3302 }, { "epoch": 4.042839657282742, "grad_norm": 1.2448173086035832, "learning_rate": 4.156090079588155e-06, "loss": 0.5189, "step": 3303 }, { "epoch": 4.0440636474908205, "grad_norm": 0.920344483403311, "learning_rate": 4.1556099302921305e-06, "loss": 0.4895, "step": 3304 }, { "epoch": 4.045287637698898, "grad_norm": 1.2827465444005968, "learning_rate": 4.155129672195545e-06, "loss": 0.6347, "step": 3305 }, { "epoch": 4.046511627906977, "grad_norm": 1.137513148026802, "learning_rate": 4.154649305329959e-06, "loss": 1.2929, "step": 3306 }, { "epoch": 4.0477356181150554, "grad_norm": 0.9476714409464848, "learning_rate": 4.15416882972694e-06, "loss": 1.1137, "step": 3307 }, { "epoch": 4.048959608323133, "grad_norm": 1.62143869083556, "learning_rate": 4.153688245418065e-06, "loss": 0.5147, "step": 3308 }, { "epoch": 4.050183598531212, "grad_norm": 1.4392643920216541, "learning_rate": 4.153207552434915e-06, "loss": 0.8744, "step": 3309 }, { "epoch": 4.05140758873929, "grad_norm": 0.979334342927425, "learning_rate": 4.152726750809078e-06, "loss": 0.727, "step": 3310 }, { "epoch": 4.052631578947368, "grad_norm": 1.22096457738444, "learning_rate": 4.152245840572153e-06, "loss": 1.4245, "step": 3311 }, { "epoch": 4.053855569155447, "grad_norm": 1.1480898082799866, "learning_rate": 4.151764821755743e-06, "loss": 0.5654, "step": 3312 }, { "epoch": 4.055079559363525, "grad_norm": 0.8148936352858921, "learning_rate": 4.151283694391458e-06, "loss": 0.6313, "step": 3313 }, { "epoch": 4.056303549571603, "grad_norm": 1.068359732549216, "learning_rate": 4.150802458510916e-06, "loss": 0.5159, "step": 3314 }, { "epoch": 4.057527539779682, "grad_norm": 0.9512149303346032, "learning_rate": 4.150321114145743e-06, "loss": 0.5571, "step": 3315 }, { "epoch": 4.05875152998776, "grad_norm": 1.36158379931695, "learning_rate": 4.14983966132757e-06, "loss": 1.0395, "step": 3316 }, { "epoch": 4.059975520195838, "grad_norm": 1.100842509140695, "learning_rate": 4.149358100088037e-06, "loss": 0.5251, "step": 3317 }, { "epoch": 4.061199510403917, "grad_norm": 1.5466030911978659, "learning_rate": 4.14887643045879e-06, "loss": 0.8692, "step": 3318 }, { "epoch": 4.062423500611995, "grad_norm": 1.6374620781358773, "learning_rate": 4.148394652471483e-06, "loss": 0.6388, "step": 3319 }, { "epoch": 4.063647490820073, "grad_norm": 1.7062142487924488, "learning_rate": 4.1479127661577765e-06, "loss": 0.4844, "step": 3320 }, { "epoch": 4.064871481028152, "grad_norm": 0.7840421361447805, "learning_rate": 4.1474307715493375e-06, "loss": 0.5974, "step": 3321 }, { "epoch": 4.06609547123623, "grad_norm": 0.9052106078110027, "learning_rate": 4.146948668677842e-06, "loss": 0.9155, "step": 3322 }, { "epoch": 4.067319461444308, "grad_norm": 0.7829094216802672, "learning_rate": 4.1464664575749725e-06, "loss": 0.5303, "step": 3323 }, { "epoch": 4.068543451652387, "grad_norm": 1.2059700042507526, "learning_rate": 4.145984138272416e-06, "loss": 0.5777, "step": 3324 }, { "epoch": 4.069767441860465, "grad_norm": 1.358936868433063, "learning_rate": 4.14550171080187e-06, "loss": 0.652, "step": 3325 }, { "epoch": 4.070991432068544, "grad_norm": 1.0507571847065702, "learning_rate": 4.1450191751950386e-06, "loss": 0.6348, "step": 3326 }, { "epoch": 4.072215422276622, "grad_norm": 1.0833986940428377, "learning_rate": 4.144536531483632e-06, "loss": 2.317, "step": 3327 }, { "epoch": 4.0734394124847, "grad_norm": 1.3233102874578704, "learning_rate": 4.144053779699366e-06, "loss": 1.2495, "step": 3328 }, { "epoch": 4.074663402692779, "grad_norm": 1.055034910513984, "learning_rate": 4.143570919873967e-06, "loss": 0.445, "step": 3329 }, { "epoch": 4.0758873929008566, "grad_norm": 1.067279350382245, "learning_rate": 4.1430879520391665e-06, "loss": 0.8586, "step": 3330 }, { "epoch": 4.077111383108935, "grad_norm": 1.149745433072691, "learning_rate": 4.1426048762267036e-06, "loss": 0.653, "step": 3331 }, { "epoch": 4.078335373317014, "grad_norm": 0.9326021614109036, "learning_rate": 4.142121692468323e-06, "loss": 0.5894, "step": 3332 }, { "epoch": 4.0795593635250915, "grad_norm": 1.254660614667816, "learning_rate": 4.141638400795779e-06, "loss": 0.6499, "step": 3333 }, { "epoch": 4.08078335373317, "grad_norm": 1.1316326945938204, "learning_rate": 4.141155001240831e-06, "loss": 1.106, "step": 3334 }, { "epoch": 4.082007343941249, "grad_norm": 1.4915646261237072, "learning_rate": 4.140671493835246e-06, "loss": 0.7232, "step": 3335 }, { "epoch": 4.0832313341493265, "grad_norm": 1.2514589338658708, "learning_rate": 4.140187878610801e-06, "loss": 0.3962, "step": 3336 }, { "epoch": 4.084455324357405, "grad_norm": 0.8525859797429366, "learning_rate": 4.139704155599273e-06, "loss": 0.5258, "step": 3337 }, { "epoch": 4.085679314565484, "grad_norm": 0.7490831009027872, "learning_rate": 4.1392203248324535e-06, "loss": 0.414, "step": 3338 }, { "epoch": 4.0869033047735615, "grad_norm": 1.4568370802378043, "learning_rate": 4.138736386342137e-06, "loss": 0.3498, "step": 3339 }, { "epoch": 4.08812729498164, "grad_norm": 1.2444327604985292, "learning_rate": 4.1382523401601275e-06, "loss": 0.4986, "step": 3340 }, { "epoch": 4.089351285189719, "grad_norm": 1.2341884029874994, "learning_rate": 4.137768186318232e-06, "loss": 0.4713, "step": 3341 }, { "epoch": 4.090575275397796, "grad_norm": 1.0583263663903184, "learning_rate": 4.1372839248482695e-06, "loss": 0.3938, "step": 3342 }, { "epoch": 4.091799265605875, "grad_norm": 1.0856302097144028, "learning_rate": 4.136799555782064e-06, "loss": 0.568, "step": 3343 }, { "epoch": 4.093023255813954, "grad_norm": 1.0056371362304188, "learning_rate": 4.136315079151446e-06, "loss": 0.6462, "step": 3344 }, { "epoch": 4.094247246022032, "grad_norm": 0.8803101472854242, "learning_rate": 4.135830494988252e-06, "loss": 0.6829, "step": 3345 }, { "epoch": 4.09547123623011, "grad_norm": 1.428710796666031, "learning_rate": 4.13534580332433e-06, "loss": 1.4356, "step": 3346 }, { "epoch": 4.096695226438189, "grad_norm": 1.287158535494212, "learning_rate": 4.13486100419153e-06, "loss": 0.569, "step": 3347 }, { "epoch": 4.097919216646267, "grad_norm": 1.2979646935441453, "learning_rate": 4.134376097621711e-06, "loss": 1.2301, "step": 3348 }, { "epoch": 4.099143206854345, "grad_norm": 0.8604568055411687, "learning_rate": 4.133891083646741e-06, "loss": 0.4159, "step": 3349 }, { "epoch": 4.100367197062424, "grad_norm": 1.4547603443147472, "learning_rate": 4.133405962298491e-06, "loss": 0.6685, "step": 3350 }, { "epoch": 4.101591187270502, "grad_norm": 1.1494986663139095, "learning_rate": 4.132920733608844e-06, "loss": 0.686, "step": 3351 }, { "epoch": 4.10281517747858, "grad_norm": 0.8567847094736317, "learning_rate": 4.132435397609686e-06, "loss": 0.9242, "step": 3352 }, { "epoch": 4.1040391676866586, "grad_norm": 0.6120191385649659, "learning_rate": 4.131949954332911e-06, "loss": 0.3667, "step": 3353 }, { "epoch": 4.105263157894737, "grad_norm": 1.100877630490201, "learning_rate": 4.1314644038104215e-06, "loss": 0.9622, "step": 3354 }, { "epoch": 4.106487148102815, "grad_norm": 0.8197685072738816, "learning_rate": 4.130978746074126e-06, "loss": 0.5108, "step": 3355 }, { "epoch": 4.1077111383108935, "grad_norm": 0.7073119506704658, "learning_rate": 4.13049298115594e-06, "loss": 0.5423, "step": 3356 }, { "epoch": 4.108935128518972, "grad_norm": 1.3971119906865381, "learning_rate": 4.130007109087786e-06, "loss": 0.6146, "step": 3357 }, { "epoch": 4.11015911872705, "grad_norm": 2.0341821009472136, "learning_rate": 4.129521129901593e-06, "loss": 1.0215, "step": 3358 }, { "epoch": 4.1113831089351285, "grad_norm": 0.7830734070449611, "learning_rate": 4.129035043629299e-06, "loss": 0.6681, "step": 3359 }, { "epoch": 4.112607099143207, "grad_norm": 1.0269977206113536, "learning_rate": 4.128548850302847e-06, "loss": 0.8317, "step": 3360 }, { "epoch": 4.113831089351285, "grad_norm": 1.0585622822675491, "learning_rate": 4.1280625499541884e-06, "loss": 0.3948, "step": 3361 }, { "epoch": 4.1150550795593634, "grad_norm": 1.2759517994277079, "learning_rate": 4.1275761426152815e-06, "loss": 0.47, "step": 3362 }, { "epoch": 4.116279069767442, "grad_norm": 0.8473812039034465, "learning_rate": 4.12708962831809e-06, "loss": 0.6084, "step": 3363 }, { "epoch": 4.11750305997552, "grad_norm": 1.930059082866513, "learning_rate": 4.126603007094586e-06, "loss": 0.972, "step": 3364 }, { "epoch": 4.118727050183598, "grad_norm": 0.4021353633336182, "learning_rate": 4.1261162789767486e-06, "loss": 0.1354, "step": 3365 }, { "epoch": 4.119951040391677, "grad_norm": 1.2254911663804537, "learning_rate": 4.125629443996564e-06, "loss": 0.5704, "step": 3366 }, { "epoch": 4.121175030599755, "grad_norm": 1.012895156297931, "learning_rate": 4.125142502186025e-06, "loss": 0.7482, "step": 3367 }, { "epoch": 4.122399020807833, "grad_norm": 1.4543991287337852, "learning_rate": 4.124655453577132e-06, "loss": 1.0568, "step": 3368 }, { "epoch": 4.123623011015912, "grad_norm": 1.0339246719405268, "learning_rate": 4.124168298201893e-06, "loss": 0.2529, "step": 3369 }, { "epoch": 4.124847001223991, "grad_norm": 1.5372965776420549, "learning_rate": 4.123681036092319e-06, "loss": 0.5435, "step": 3370 }, { "epoch": 4.126070991432068, "grad_norm": 1.2046307555173001, "learning_rate": 4.123193667280434e-06, "loss": 0.9659, "step": 3371 }, { "epoch": 4.127294981640147, "grad_norm": 1.2006451234436915, "learning_rate": 4.122706191798265e-06, "loss": 1.4606, "step": 3372 }, { "epoch": 4.128518971848226, "grad_norm": 1.489207150188012, "learning_rate": 4.122218609677846e-06, "loss": 0.528, "step": 3373 }, { "epoch": 4.129742962056303, "grad_norm": 1.2370766628438878, "learning_rate": 4.121730920951221e-06, "loss": 1.2332, "step": 3374 }, { "epoch": 4.130966952264382, "grad_norm": 0.8563189763469776, "learning_rate": 4.121243125650438e-06, "loss": 0.655, "step": 3375 }, { "epoch": 4.1321909424724605, "grad_norm": 0.8301835358113144, "learning_rate": 4.120755223807553e-06, "loss": 0.4709, "step": 3376 }, { "epoch": 4.133414932680538, "grad_norm": 1.398448233001719, "learning_rate": 4.12026721545463e-06, "loss": 0.8032, "step": 3377 }, { "epoch": 4.134638922888617, "grad_norm": 1.2871386715868687, "learning_rate": 4.119779100623739e-06, "loss": 1.2085, "step": 3378 }, { "epoch": 4.1358629130966955, "grad_norm": 0.7946313308724975, "learning_rate": 4.119290879346956e-06, "loss": 0.5912, "step": 3379 }, { "epoch": 4.137086903304773, "grad_norm": 1.4349278977233086, "learning_rate": 4.118802551656365e-06, "loss": 0.4259, "step": 3380 }, { "epoch": 4.138310893512852, "grad_norm": 1.3460252667720802, "learning_rate": 4.118314117584059e-06, "loss": 0.4049, "step": 3381 }, { "epoch": 4.1395348837209305, "grad_norm": 1.0551625784443917, "learning_rate": 4.1178255771621345e-06, "loss": 0.8448, "step": 3382 }, { "epoch": 4.140758873929008, "grad_norm": 1.8017906840485531, "learning_rate": 4.117336930422697e-06, "loss": 0.4565, "step": 3383 }, { "epoch": 4.141982864137087, "grad_norm": 1.7615574768450382, "learning_rate": 4.116848177397859e-06, "loss": 0.4135, "step": 3384 }, { "epoch": 4.1432068543451654, "grad_norm": 0.9597067988562306, "learning_rate": 4.116359318119738e-06, "loss": 1.1629, "step": 3385 }, { "epoch": 4.144430844553243, "grad_norm": 1.1481166203842035, "learning_rate": 4.1158703526204625e-06, "loss": 0.3485, "step": 3386 }, { "epoch": 4.145654834761322, "grad_norm": 1.0679904668142843, "learning_rate": 4.115381280932163e-06, "loss": 0.7444, "step": 3387 }, { "epoch": 4.1468788249694, "grad_norm": 1.1885405402643132, "learning_rate": 4.11489210308698e-06, "loss": 1.604, "step": 3388 }, { "epoch": 4.148102815177479, "grad_norm": 1.1840717553644868, "learning_rate": 4.114402819117063e-06, "loss": 0.595, "step": 3389 }, { "epoch": 4.149326805385557, "grad_norm": 0.9157845314182612, "learning_rate": 4.113913429054563e-06, "loss": 0.6104, "step": 3390 }, { "epoch": 4.150550795593635, "grad_norm": 1.105303835639854, "learning_rate": 4.113423932931642e-06, "loss": 1.4355, "step": 3391 }, { "epoch": 4.151774785801714, "grad_norm": 1.6727705382161635, "learning_rate": 4.112934330780468e-06, "loss": 0.9157, "step": 3392 }, { "epoch": 4.152998776009792, "grad_norm": 1.0389938439575055, "learning_rate": 4.112444622633215e-06, "loss": 1.0354, "step": 3393 }, { "epoch": 4.15422276621787, "grad_norm": 1.904912507546912, "learning_rate": 4.111954808522065e-06, "loss": 0.4729, "step": 3394 }, { "epoch": 4.155446756425949, "grad_norm": 1.2507277948029176, "learning_rate": 4.111464888479209e-06, "loss": 0.4968, "step": 3395 }, { "epoch": 4.156670746634027, "grad_norm": 1.4580196346799044, "learning_rate": 4.11097486253684e-06, "loss": 1.1504, "step": 3396 }, { "epoch": 4.157894736842105, "grad_norm": 1.974338990709747, "learning_rate": 4.110484730727161e-06, "loss": 0.483, "step": 3397 }, { "epoch": 4.159118727050184, "grad_norm": 1.416270405409402, "learning_rate": 4.109994493082382e-06, "loss": 1.3332, "step": 3398 }, { "epoch": 4.160342717258262, "grad_norm": 1.9593393937763, "learning_rate": 4.109504149634721e-06, "loss": 0.5095, "step": 3399 }, { "epoch": 4.16156670746634, "grad_norm": 1.0452628680081464, "learning_rate": 4.1090137004164e-06, "loss": 0.3635, "step": 3400 }, { "epoch": 4.162790697674419, "grad_norm": 1.7459279465470268, "learning_rate": 4.10852314545965e-06, "loss": 0.4535, "step": 3401 }, { "epoch": 4.164014687882497, "grad_norm": 1.87106817105612, "learning_rate": 4.108032484796708e-06, "loss": 0.984, "step": 3402 }, { "epoch": 4.165238678090575, "grad_norm": 2.1131099483590883, "learning_rate": 4.1075417184598194e-06, "loss": 0.6145, "step": 3403 }, { "epoch": 4.166462668298654, "grad_norm": 1.5980194817224995, "learning_rate": 4.107050846481235e-06, "loss": 0.6045, "step": 3404 }, { "epoch": 4.167686658506732, "grad_norm": 0.934322119336499, "learning_rate": 4.106559868893213e-06, "loss": 0.4692, "step": 3405 }, { "epoch": 4.16891064871481, "grad_norm": 1.2120398881253192, "learning_rate": 4.106068785728018e-06, "loss": 0.7773, "step": 3406 }, { "epoch": 4.170134638922889, "grad_norm": 1.3292982827600648, "learning_rate": 4.105577597017923e-06, "loss": 1.0246, "step": 3407 }, { "epoch": 4.1713586291309666, "grad_norm": 1.0567469719353078, "learning_rate": 4.105086302795208e-06, "loss": 0.5073, "step": 3408 }, { "epoch": 4.172582619339045, "grad_norm": 1.141817811442184, "learning_rate": 4.1045949030921575e-06, "loss": 1.2677, "step": 3409 }, { "epoch": 4.173806609547124, "grad_norm": 2.0563260994504913, "learning_rate": 4.1041033979410635e-06, "loss": 0.9712, "step": 3410 }, { "epoch": 4.1750305997552015, "grad_norm": 1.8415173855644855, "learning_rate": 4.103611787374229e-06, "loss": 0.5464, "step": 3411 }, { "epoch": 4.17625458996328, "grad_norm": 1.1679385086132568, "learning_rate": 4.103120071423959e-06, "loss": 0.539, "step": 3412 }, { "epoch": 4.177478580171359, "grad_norm": 0.7399973734100924, "learning_rate": 4.102628250122567e-06, "loss": 0.5363, "step": 3413 }, { "epoch": 4.178702570379437, "grad_norm": 1.189216194287651, "learning_rate": 4.102136323502374e-06, "loss": 0.4653, "step": 3414 }, { "epoch": 4.179926560587515, "grad_norm": 1.2016459422518375, "learning_rate": 4.101644291595708e-06, "loss": 1.21, "step": 3415 }, { "epoch": 4.181150550795594, "grad_norm": 1.4751014831600553, "learning_rate": 4.101152154434903e-06, "loss": 0.5211, "step": 3416 }, { "epoch": 4.182374541003672, "grad_norm": 1.4851188821896093, "learning_rate": 4.100659912052301e-06, "loss": 0.4395, "step": 3417 }, { "epoch": 4.18359853121175, "grad_norm": 0.9369889124385221, "learning_rate": 4.10016756448025e-06, "loss": 1.0003, "step": 3418 }, { "epoch": 4.184822521419829, "grad_norm": 0.7445031212957764, "learning_rate": 4.099675111751105e-06, "loss": 0.5392, "step": 3419 }, { "epoch": 4.186046511627907, "grad_norm": 1.0090092552096495, "learning_rate": 4.099182553897228e-06, "loss": 0.3028, "step": 3420 }, { "epoch": 4.187270501835985, "grad_norm": 1.311449400506681, "learning_rate": 4.09868989095099e-06, "loss": 1.8449, "step": 3421 }, { "epoch": 4.188494492044064, "grad_norm": 1.3641269734985664, "learning_rate": 4.098197122944764e-06, "loss": 0.5826, "step": 3422 }, { "epoch": 4.189718482252142, "grad_norm": 1.3044134823180682, "learning_rate": 4.097704249910935e-06, "loss": 1.3838, "step": 3423 }, { "epoch": 4.19094247246022, "grad_norm": 1.0260273025455398, "learning_rate": 4.097211271881892e-06, "loss": 0.5956, "step": 3424 }, { "epoch": 4.192166462668299, "grad_norm": 0.9843760542304437, "learning_rate": 4.096718188890033e-06, "loss": 0.5773, "step": 3425 }, { "epoch": 4.193390452876377, "grad_norm": 1.678136196159439, "learning_rate": 4.096225000967759e-06, "loss": 1.4589, "step": 3426 }, { "epoch": 4.194614443084455, "grad_norm": 1.2335648415404112, "learning_rate": 4.095731708147483e-06, "loss": 1.1685, "step": 3427 }, { "epoch": 4.195838433292534, "grad_norm": 1.0835358959382175, "learning_rate": 4.095238310461621e-06, "loss": 0.6322, "step": 3428 }, { "epoch": 4.197062423500612, "grad_norm": 1.5947622658822855, "learning_rate": 4.094744807942597e-06, "loss": 0.6331, "step": 3429 }, { "epoch": 4.19828641370869, "grad_norm": 0.7995320784791059, "learning_rate": 4.094251200622843e-06, "loss": 0.6132, "step": 3430 }, { "epoch": 4.1995104039167686, "grad_norm": 1.5087507718996735, "learning_rate": 4.093757488534798e-06, "loss": 0.6554, "step": 3431 }, { "epoch": 4.200734394124847, "grad_norm": 1.8157717691785868, "learning_rate": 4.093263671710904e-06, "loss": 0.4954, "step": 3432 }, { "epoch": 4.201958384332926, "grad_norm": 0.9672253014071925, "learning_rate": 4.092769750183616e-06, "loss": 0.3712, "step": 3433 }, { "epoch": 4.2031823745410035, "grad_norm": 0.8284608289432331, "learning_rate": 4.092275723985391e-06, "loss": 0.4072, "step": 3434 }, { "epoch": 4.204406364749082, "grad_norm": 1.5687642790434866, "learning_rate": 4.091781593148694e-06, "loss": 0.5856, "step": 3435 }, { "epoch": 4.205630354957161, "grad_norm": 1.369496131557744, "learning_rate": 4.0912873577059985e-06, "loss": 0.7798, "step": 3436 }, { "epoch": 4.2068543451652385, "grad_norm": 2.0075821243778784, "learning_rate": 4.090793017689784e-06, "loss": 0.4713, "step": 3437 }, { "epoch": 4.208078335373317, "grad_norm": 1.6768294033381532, "learning_rate": 4.090298573132536e-06, "loss": 1.02, "step": 3438 }, { "epoch": 4.209302325581396, "grad_norm": 1.6421993835755013, "learning_rate": 4.089804024066748e-06, "loss": 1.1279, "step": 3439 }, { "epoch": 4.2105263157894735, "grad_norm": 0.8451260739628348, "learning_rate": 4.089309370524921e-06, "loss": 0.5075, "step": 3440 }, { "epoch": 4.211750305997552, "grad_norm": 1.9472306033712028, "learning_rate": 4.088814612539559e-06, "loss": 0.4268, "step": 3441 }, { "epoch": 4.212974296205631, "grad_norm": 0.6777799799171045, "learning_rate": 4.088319750143178e-06, "loss": 0.3665, "step": 3442 }, { "epoch": 4.214198286413708, "grad_norm": 1.3530899950932693, "learning_rate": 4.087824783368297e-06, "loss": 0.5924, "step": 3443 }, { "epoch": 4.215422276621787, "grad_norm": 1.4369706998368568, "learning_rate": 4.0873297122474454e-06, "loss": 0.7302, "step": 3444 }, { "epoch": 4.216646266829866, "grad_norm": 1.7171545358324656, "learning_rate": 4.086834536813155e-06, "loss": 0.605, "step": 3445 }, { "epoch": 4.217870257037943, "grad_norm": 0.9464228740453237, "learning_rate": 4.086339257097969e-06, "loss": 0.608, "step": 3446 }, { "epoch": 4.219094247246022, "grad_norm": 1.2424965918270636, "learning_rate": 4.0858438731344344e-06, "loss": 0.9427, "step": 3447 }, { "epoch": 4.220318237454101, "grad_norm": 2.109091276648509, "learning_rate": 4.0853483849551066e-06, "loss": 0.3645, "step": 3448 }, { "epoch": 4.221542227662178, "grad_norm": 0.7792249912409837, "learning_rate": 4.0848527925925465e-06, "loss": 0.4755, "step": 3449 }, { "epoch": 4.222766217870257, "grad_norm": 1.6113886912176316, "learning_rate": 4.0843570960793225e-06, "loss": 0.9649, "step": 3450 }, { "epoch": 4.223990208078336, "grad_norm": 1.6331190714367771, "learning_rate": 4.083861295448009e-06, "loss": 0.535, "step": 3451 }, { "epoch": 4.225214198286413, "grad_norm": 1.3097383220102126, "learning_rate": 4.083365390731191e-06, "loss": 0.5008, "step": 3452 }, { "epoch": 4.226438188494492, "grad_norm": 1.3829409187392667, "learning_rate": 4.082869381961456e-06, "loss": 0.5825, "step": 3453 }, { "epoch": 4.2276621787025706, "grad_norm": 1.486947640461893, "learning_rate": 4.082373269171399e-06, "loss": 0.3586, "step": 3454 }, { "epoch": 4.228886168910648, "grad_norm": 1.411108028014503, "learning_rate": 4.081877052393625e-06, "loss": 0.949, "step": 3455 }, { "epoch": 4.230110159118727, "grad_norm": 1.1847688243642103, "learning_rate": 4.081380731660741e-06, "loss": 1.122, "step": 3456 }, { "epoch": 4.2313341493268055, "grad_norm": 1.387864209249282, "learning_rate": 4.0808843070053635e-06, "loss": 0.3576, "step": 3457 }, { "epoch": 4.232558139534884, "grad_norm": 1.4191590207061333, "learning_rate": 4.080387778460118e-06, "loss": 0.5776, "step": 3458 }, { "epoch": 4.233782129742962, "grad_norm": 2.0934770806531877, "learning_rate": 4.079891146057633e-06, "loss": 0.4599, "step": 3459 }, { "epoch": 4.2350061199510405, "grad_norm": 1.8574710841404967, "learning_rate": 4.079394409830546e-06, "loss": 0.5427, "step": 3460 }, { "epoch": 4.236230110159119, "grad_norm": 0.7244121582766999, "learning_rate": 4.078897569811499e-06, "loss": 0.4666, "step": 3461 }, { "epoch": 4.237454100367197, "grad_norm": 1.3386983665426126, "learning_rate": 4.0784006260331445e-06, "loss": 0.7447, "step": 3462 }, { "epoch": 4.2386780905752754, "grad_norm": 0.9920482941520884, "learning_rate": 4.077903578528138e-06, "loss": 0.6439, "step": 3463 }, { "epoch": 4.239902080783354, "grad_norm": 0.5596841280421857, "learning_rate": 4.077406427329146e-06, "loss": 0.2684, "step": 3464 }, { "epoch": 4.241126070991432, "grad_norm": 1.5672437161005648, "learning_rate": 4.076909172468837e-06, "loss": 0.5574, "step": 3465 }, { "epoch": 4.24235006119951, "grad_norm": 2.4427080534368777, "learning_rate": 4.07641181397989e-06, "loss": 0.4339, "step": 3466 }, { "epoch": 4.243574051407589, "grad_norm": 1.429304713530537, "learning_rate": 4.075914351894989e-06, "loss": 0.4597, "step": 3467 }, { "epoch": 4.244798041615667, "grad_norm": 1.1271992674598865, "learning_rate": 4.075416786246827e-06, "loss": 0.7863, "step": 3468 }, { "epoch": 4.246022031823745, "grad_norm": 1.2279787227145853, "learning_rate": 4.074919117068099e-06, "loss": 1.5338, "step": 3469 }, { "epoch": 4.247246022031824, "grad_norm": 1.0189829791884726, "learning_rate": 4.074421344391513e-06, "loss": 1.3667, "step": 3470 }, { "epoch": 4.248470012239902, "grad_norm": 1.3854844242203366, "learning_rate": 4.073923468249779e-06, "loss": 0.4376, "step": 3471 }, { "epoch": 4.24969400244798, "grad_norm": 1.047002386316789, "learning_rate": 4.073425488675616e-06, "loss": 1.6038, "step": 3472 }, { "epoch": 4.250917992656059, "grad_norm": 0.8297754860802374, "learning_rate": 4.072927405701751e-06, "loss": 0.6648, "step": 3473 }, { "epoch": 4.252141982864137, "grad_norm": 2.144567750764559, "learning_rate": 4.0724292193609135e-06, "loss": 0.5293, "step": 3474 }, { "epoch": 4.253365973072215, "grad_norm": 1.0833715498299052, "learning_rate": 4.071930929685844e-06, "loss": 0.507, "step": 3475 }, { "epoch": 4.254589963280294, "grad_norm": 0.8983121381958039, "learning_rate": 4.071432536709289e-06, "loss": 0.5549, "step": 3476 }, { "epoch": 4.2558139534883725, "grad_norm": 1.3759566348123105, "learning_rate": 4.070934040463999e-06, "loss": 0.6233, "step": 3477 }, { "epoch": 4.25703794369645, "grad_norm": 1.142307852223168, "learning_rate": 4.070435440982734e-06, "loss": 0.6538, "step": 3478 }, { "epoch": 4.258261933904529, "grad_norm": 0.766594137193178, "learning_rate": 4.069936738298261e-06, "loss": 0.4192, "step": 3479 }, { "epoch": 4.2594859241126075, "grad_norm": 0.7839139360070703, "learning_rate": 4.069437932443352e-06, "loss": 0.3339, "step": 3480 }, { "epoch": 4.260709914320685, "grad_norm": 1.5262836408918452, "learning_rate": 4.0689390234507875e-06, "loss": 0.5941, "step": 3481 }, { "epoch": 4.261933904528764, "grad_norm": 1.1269598666494727, "learning_rate": 4.068440011353353e-06, "loss": 0.7509, "step": 3482 }, { "epoch": 4.2631578947368425, "grad_norm": 0.9932017782192541, "learning_rate": 4.067940896183843e-06, "loss": 0.5659, "step": 3483 }, { "epoch": 4.26438188494492, "grad_norm": 0.9330861255005087, "learning_rate": 4.067441677975056e-06, "loss": 0.4281, "step": 3484 }, { "epoch": 4.265605875152999, "grad_norm": 1.014827730118032, "learning_rate": 4.0669423567597994e-06, "loss": 0.6279, "step": 3485 }, { "epoch": 4.2668298653610774, "grad_norm": 1.1683652570801795, "learning_rate": 4.0664429325708875e-06, "loss": 0.782, "step": 3486 }, { "epoch": 4.268053855569155, "grad_norm": 1.1855899801200867, "learning_rate": 4.065943405441139e-06, "loss": 0.5917, "step": 3487 }, { "epoch": 4.269277845777234, "grad_norm": 0.933580731843695, "learning_rate": 4.0654437754033835e-06, "loss": 0.5342, "step": 3488 }, { "epoch": 4.270501835985312, "grad_norm": 1.4792153490519848, "learning_rate": 4.064944042490452e-06, "loss": 0.6957, "step": 3489 }, { "epoch": 4.27172582619339, "grad_norm": 1.1206228670626632, "learning_rate": 4.0644442067351865e-06, "loss": 0.4982, "step": 3490 }, { "epoch": 4.272949816401469, "grad_norm": 0.9720975169060032, "learning_rate": 4.063944268170434e-06, "loss": 0.4759, "step": 3491 }, { "epoch": 4.274173806609547, "grad_norm": 1.1479688551990086, "learning_rate": 4.06344422682905e-06, "loss": 0.4945, "step": 3492 }, { "epoch": 4.275397796817625, "grad_norm": 1.725112556369631, "learning_rate": 4.062944082743892e-06, "loss": 0.6649, "step": 3493 }, { "epoch": 4.276621787025704, "grad_norm": 0.8131653167234579, "learning_rate": 4.062443835947832e-06, "loss": 0.6471, "step": 3494 }, { "epoch": 4.277845777233782, "grad_norm": 1.6194698837032113, "learning_rate": 4.061943486473741e-06, "loss": 0.6467, "step": 3495 }, { "epoch": 4.27906976744186, "grad_norm": 1.620543960602914, "learning_rate": 4.061443034354502e-06, "loss": 0.5865, "step": 3496 }, { "epoch": 4.280293757649939, "grad_norm": 1.0153986580439545, "learning_rate": 4.060942479623001e-06, "loss": 1.3097, "step": 3497 }, { "epoch": 4.281517747858017, "grad_norm": 1.0290774615034324, "learning_rate": 4.060441822312135e-06, "loss": 0.6023, "step": 3498 }, { "epoch": 4.282741738066095, "grad_norm": 0.961940242217773, "learning_rate": 4.059941062454804e-06, "loss": 0.7442, "step": 3499 }, { "epoch": 4.283965728274174, "grad_norm": 1.4534944664298968, "learning_rate": 4.059440200083915e-06, "loss": 0.3376, "step": 3500 }, { "epoch": 4.285189718482252, "grad_norm": 1.7349111096208065, "learning_rate": 4.0589392352323855e-06, "loss": 0.6714, "step": 3501 }, { "epoch": 4.286413708690331, "grad_norm": 1.8315760445316476, "learning_rate": 4.058438167933136e-06, "loss": 0.4227, "step": 3502 }, { "epoch": 4.287637698898409, "grad_norm": 1.5054194428691978, "learning_rate": 4.057936998219092e-06, "loss": 0.5368, "step": 3503 }, { "epoch": 4.288861689106487, "grad_norm": 1.106948346138757, "learning_rate": 4.057435726123192e-06, "loss": 0.4384, "step": 3504 }, { "epoch": 4.290085679314566, "grad_norm": 1.4769796416152725, "learning_rate": 4.0569343516783775e-06, "loss": 1.1122, "step": 3505 }, { "epoch": 4.291309669522644, "grad_norm": 1.3216681098571348, "learning_rate": 4.056432874917595e-06, "loss": 0.5014, "step": 3506 }, { "epoch": 4.292533659730722, "grad_norm": 1.7884435909896423, "learning_rate": 4.055931295873802e-06, "loss": 0.4698, "step": 3507 }, { "epoch": 4.293757649938801, "grad_norm": 0.8345925147903958, "learning_rate": 4.055429614579958e-06, "loss": 0.5824, "step": 3508 }, { "epoch": 4.2949816401468786, "grad_norm": 0.888216589134071, "learning_rate": 4.054927831069033e-06, "loss": 0.5089, "step": 3509 }, { "epoch": 4.296205630354957, "grad_norm": 1.5196138331292581, "learning_rate": 4.054425945374002e-06, "loss": 0.5286, "step": 3510 }, { "epoch": 4.297429620563036, "grad_norm": 1.2083757491180374, "learning_rate": 4.053923957527849e-06, "loss": 0.8929, "step": 3511 }, { "epoch": 4.2986536107711135, "grad_norm": 2.5132247395176486, "learning_rate": 4.05342186756356e-06, "loss": 0.3437, "step": 3512 }, { "epoch": 4.299877600979192, "grad_norm": 1.3451845996235703, "learning_rate": 4.052919675514131e-06, "loss": 0.6353, "step": 3513 }, { "epoch": 4.301101591187271, "grad_norm": 1.2599121127853568, "learning_rate": 4.052417381412566e-06, "loss": 1.5598, "step": 3514 }, { "epoch": 4.3023255813953485, "grad_norm": 1.1800492444399018, "learning_rate": 4.051914985291871e-06, "loss": 0.7138, "step": 3515 }, { "epoch": 4.303549571603427, "grad_norm": 0.7935073376706231, "learning_rate": 4.051412487185065e-06, "loss": 0.3953, "step": 3516 }, { "epoch": 4.304773561811506, "grad_norm": 1.608114098556723, "learning_rate": 4.050909887125167e-06, "loss": 0.468, "step": 3517 }, { "epoch": 4.3059975520195835, "grad_norm": 1.8404854643857707, "learning_rate": 4.05040718514521e-06, "loss": 0.457, "step": 3518 }, { "epoch": 4.307221542227662, "grad_norm": 2.0403252573404984, "learning_rate": 4.049904381278225e-06, "loss": 0.6587, "step": 3519 }, { "epoch": 4.308445532435741, "grad_norm": 1.2575446906565215, "learning_rate": 4.049401475557258e-06, "loss": 1.2075, "step": 3520 }, { "epoch": 4.309669522643819, "grad_norm": 1.3998647528768597, "learning_rate": 4.048898468015356e-06, "loss": 0.4598, "step": 3521 }, { "epoch": 4.310893512851897, "grad_norm": 1.592115232264391, "learning_rate": 4.048395358685576e-06, "loss": 1.0146, "step": 3522 }, { "epoch": 4.312117503059976, "grad_norm": 1.9566816219020757, "learning_rate": 4.047892147600981e-06, "loss": 0.4113, "step": 3523 }, { "epoch": 4.313341493268053, "grad_norm": 1.3201821075294098, "learning_rate": 4.047388834794639e-06, "loss": 0.5491, "step": 3524 }, { "epoch": 4.314565483476132, "grad_norm": 1.7372377854879704, "learning_rate": 4.046885420299625e-06, "loss": 0.557, "step": 3525 }, { "epoch": 4.315789473684211, "grad_norm": 0.6844512826514385, "learning_rate": 4.046381904149024e-06, "loss": 0.5141, "step": 3526 }, { "epoch": 4.317013463892289, "grad_norm": 1.2787514467415213, "learning_rate": 4.045878286375924e-06, "loss": 0.8583, "step": 3527 }, { "epoch": 4.318237454100367, "grad_norm": 1.3925710408764087, "learning_rate": 4.0453745670134196e-06, "loss": 0.6043, "step": 3528 }, { "epoch": 4.319461444308446, "grad_norm": 1.6281328859067759, "learning_rate": 4.044870746094616e-06, "loss": 0.6, "step": 3529 }, { "epoch": 4.320685434516524, "grad_norm": 1.437599956508346, "learning_rate": 4.04436682365262e-06, "loss": 0.5499, "step": 3530 }, { "epoch": 4.321909424724602, "grad_norm": 1.1954408594703136, "learning_rate": 4.043862799720548e-06, "loss": 0.4729, "step": 3531 }, { "epoch": 4.3231334149326806, "grad_norm": 0.8409391778799481, "learning_rate": 4.043358674331524e-06, "loss": 0.5872, "step": 3532 }, { "epoch": 4.324357405140759, "grad_norm": 1.426576184762066, "learning_rate": 4.042854447518677e-06, "loss": 0.6616, "step": 3533 }, { "epoch": 4.325581395348837, "grad_norm": 1.620620996265577, "learning_rate": 4.042350119315142e-06, "loss": 0.4998, "step": 3534 }, { "epoch": 4.3268053855569155, "grad_norm": 1.3354891493421994, "learning_rate": 4.041845689754062e-06, "loss": 0.6065, "step": 3535 }, { "epoch": 4.328029375764994, "grad_norm": 1.0723704400060392, "learning_rate": 4.041341158868586e-06, "loss": 1.1472, "step": 3536 }, { "epoch": 4.329253365973072, "grad_norm": 1.2664066978149722, "learning_rate": 4.040836526691869e-06, "loss": 0.9893, "step": 3537 }, { "epoch": 4.3304773561811505, "grad_norm": 1.0446982657659547, "learning_rate": 4.040331793257076e-06, "loss": 0.5972, "step": 3538 }, { "epoch": 4.331701346389229, "grad_norm": 1.0487017503243619, "learning_rate": 4.039826958597375e-06, "loss": 1.0585, "step": 3539 }, { "epoch": 4.332925336597307, "grad_norm": 1.6976021633629266, "learning_rate": 4.0393220227459405e-06, "loss": 0.4997, "step": 3540 }, { "epoch": 4.3341493268053854, "grad_norm": 1.5195620232837876, "learning_rate": 4.0388169857359576e-06, "loss": 0.7113, "step": 3541 }, { "epoch": 4.335373317013464, "grad_norm": 1.2494599757399052, "learning_rate": 4.038311847600613e-06, "loss": 0.5068, "step": 3542 }, { "epoch": 4.336597307221542, "grad_norm": 0.8985740602952051, "learning_rate": 4.0378066083731036e-06, "loss": 0.8038, "step": 3543 }, { "epoch": 4.33782129742962, "grad_norm": 1.285922981743088, "learning_rate": 4.037301268086632e-06, "loss": 1.4898, "step": 3544 }, { "epoch": 4.339045287637699, "grad_norm": 1.412920823417153, "learning_rate": 4.0367958267744074e-06, "loss": 0.4765, "step": 3545 }, { "epoch": 4.340269277845778, "grad_norm": 1.2796609608491059, "learning_rate": 4.036290284469646e-06, "loss": 0.4662, "step": 3546 }, { "epoch": 4.341493268053855, "grad_norm": 1.6953412521905944, "learning_rate": 4.035784641205568e-06, "loss": 1.0186, "step": 3547 }, { "epoch": 4.342717258261934, "grad_norm": 1.1040115500019598, "learning_rate": 4.035278897015404e-06, "loss": 1.0227, "step": 3548 }, { "epoch": 4.343941248470013, "grad_norm": 0.7755347096952091, "learning_rate": 4.034773051932391e-06, "loss": 0.6564, "step": 3549 }, { "epoch": 4.34516523867809, "grad_norm": 1.862216777053464, "learning_rate": 4.034267105989768e-06, "loss": 0.4379, "step": 3550 }, { "epoch": 4.346389228886169, "grad_norm": 1.5024608944510132, "learning_rate": 4.0337610592207865e-06, "loss": 0.5282, "step": 3551 }, { "epoch": 4.347613219094248, "grad_norm": 1.2395878399464617, "learning_rate": 4.0332549116587015e-06, "loss": 1.1418, "step": 3552 }, { "epoch": 4.348837209302325, "grad_norm": 0.7856914391942512, "learning_rate": 4.032748663336774e-06, "loss": 0.5583, "step": 3553 }, { "epoch": 4.350061199510404, "grad_norm": 1.4145804494652012, "learning_rate": 4.032242314288274e-06, "loss": 0.394, "step": 3554 }, { "epoch": 4.3512851897184825, "grad_norm": 0.7654873544894892, "learning_rate": 4.031735864546476e-06, "loss": 0.6182, "step": 3555 }, { "epoch": 4.35250917992656, "grad_norm": 1.2471136111198433, "learning_rate": 4.031229314144664e-06, "loss": 1.2098, "step": 3556 }, { "epoch": 4.353733170134639, "grad_norm": 1.3264828794325472, "learning_rate": 4.030722663116123e-06, "loss": 0.6279, "step": 3557 }, { "epoch": 4.3549571603427175, "grad_norm": 2.327451740179415, "learning_rate": 4.030215911494152e-06, "loss": 0.4116, "step": 3558 }, { "epoch": 4.356181150550795, "grad_norm": 0.8047543191391845, "learning_rate": 4.029709059312051e-06, "loss": 0.6042, "step": 3559 }, { "epoch": 4.357405140758874, "grad_norm": 1.827528540314403, "learning_rate": 4.029202106603127e-06, "loss": 0.4513, "step": 3560 }, { "epoch": 4.3586291309669525, "grad_norm": 1.2761373589330836, "learning_rate": 4.028695053400699e-06, "loss": 0.6697, "step": 3561 }, { "epoch": 4.35985312117503, "grad_norm": 1.4046801144966863, "learning_rate": 4.028187899738085e-06, "loss": 0.4542, "step": 3562 }, { "epoch": 4.361077111383109, "grad_norm": 1.651847627339744, "learning_rate": 4.027680645648615e-06, "loss": 0.5958, "step": 3563 }, { "epoch": 4.3623011015911874, "grad_norm": 1.418868096745927, "learning_rate": 4.027173291165624e-06, "loss": 0.4374, "step": 3564 }, { "epoch": 4.363525091799266, "grad_norm": 0.992365693141958, "learning_rate": 4.026665836322451e-06, "loss": 0.4212, "step": 3565 }, { "epoch": 4.364749082007344, "grad_norm": 1.6556920889971463, "learning_rate": 4.0261582811524476e-06, "loss": 0.7317, "step": 3566 }, { "epoch": 4.365973072215422, "grad_norm": 1.4573628988271081, "learning_rate": 4.025650625688966e-06, "loss": 1.0502, "step": 3567 }, { "epoch": 4.3671970624235, "grad_norm": 0.6629964440015367, "learning_rate": 4.0251428699653696e-06, "loss": 0.4811, "step": 3568 }, { "epoch": 4.368421052631579, "grad_norm": 1.390779731560753, "learning_rate": 4.024635014015023e-06, "loss": 1.448, "step": 3569 }, { "epoch": 4.369645042839657, "grad_norm": 1.4328647255739637, "learning_rate": 4.024127057871304e-06, "loss": 0.535, "step": 3570 }, { "epoch": 4.370869033047736, "grad_norm": 0.9627311275708256, "learning_rate": 4.023619001567591e-06, "loss": 0.3993, "step": 3571 }, { "epoch": 4.372093023255814, "grad_norm": 1.2725890055915594, "learning_rate": 4.0231108451372735e-06, "loss": 0.7272, "step": 3572 }, { "epoch": 4.373317013463892, "grad_norm": 1.7189231080733645, "learning_rate": 4.022602588613745e-06, "loss": 0.6308, "step": 3573 }, { "epoch": 4.374541003671971, "grad_norm": 0.8691569229991046, "learning_rate": 4.022094232030405e-06, "loss": 0.6139, "step": 3574 }, { "epoch": 4.375764993880049, "grad_norm": 0.9699646502486835, "learning_rate": 4.021585775420663e-06, "loss": 0.5393, "step": 3575 }, { "epoch": 4.376988984088127, "grad_norm": 2.3616833254253438, "learning_rate": 4.021077218817932e-06, "loss": 0.4239, "step": 3576 }, { "epoch": 4.378212974296206, "grad_norm": 1.0801248350464407, "learning_rate": 4.020568562255632e-06, "loss": 0.6838, "step": 3577 }, { "epoch": 4.379436964504284, "grad_norm": 1.553467545054019, "learning_rate": 4.0200598057671896e-06, "loss": 1.1725, "step": 3578 }, { "epoch": 4.380660954712362, "grad_norm": 1.7985942362402787, "learning_rate": 4.01955094938604e-06, "loss": 0.4595, "step": 3579 }, { "epoch": 4.381884944920441, "grad_norm": 1.1453951637626651, "learning_rate": 4.019041993145621e-06, "loss": 0.7387, "step": 3580 }, { "epoch": 4.383108935128519, "grad_norm": 0.9921812991799427, "learning_rate": 4.018532937079382e-06, "loss": 0.5395, "step": 3581 }, { "epoch": 4.384332925336597, "grad_norm": 1.1124886402031469, "learning_rate": 4.018023781220774e-06, "loss": 0.5109, "step": 3582 }, { "epoch": 4.385556915544676, "grad_norm": 2.3431915781992627, "learning_rate": 4.0175145256032595e-06, "loss": 0.4031, "step": 3583 }, { "epoch": 4.386780905752754, "grad_norm": 2.227003772680972, "learning_rate": 4.017005170260302e-06, "loss": 0.7452, "step": 3584 }, { "epoch": 4.388004895960832, "grad_norm": 1.6844306798499544, "learning_rate": 4.016495715225376e-06, "loss": 0.4593, "step": 3585 }, { "epoch": 4.389228886168911, "grad_norm": 1.2056995378456405, "learning_rate": 4.015986160531961e-06, "loss": 0.6979, "step": 3586 }, { "epoch": 4.3904528763769886, "grad_norm": 1.1730794659066093, "learning_rate": 4.015476506213543e-06, "loss": 0.7654, "step": 3587 }, { "epoch": 4.391676866585067, "grad_norm": 0.7788475080742777, "learning_rate": 4.014966752303614e-06, "loss": 0.5578, "step": 3588 }, { "epoch": 4.392900856793146, "grad_norm": 1.7458378660974556, "learning_rate": 4.014456898835674e-06, "loss": 0.7861, "step": 3589 }, { "epoch": 4.394124847001224, "grad_norm": 1.6898855550374514, "learning_rate": 4.0139469458432265e-06, "loss": 0.4956, "step": 3590 }, { "epoch": 4.395348837209302, "grad_norm": 1.2642617946827694, "learning_rate": 4.013436893359787e-06, "loss": 0.7211, "step": 3591 }, { "epoch": 4.396572827417381, "grad_norm": 0.8442796532347824, "learning_rate": 4.012926741418872e-06, "loss": 0.4974, "step": 3592 }, { "epoch": 4.397796817625459, "grad_norm": 0.9888909606818186, "learning_rate": 4.012416490054008e-06, "loss": 0.4604, "step": 3593 }, { "epoch": 4.399020807833537, "grad_norm": 1.0115358085385253, "learning_rate": 4.0119061392987265e-06, "loss": 0.5698, "step": 3594 }, { "epoch": 4.400244798041616, "grad_norm": 1.8113731223538332, "learning_rate": 4.011395689186564e-06, "loss": 0.7243, "step": 3595 }, { "epoch": 4.401468788249694, "grad_norm": 1.930101710005665, "learning_rate": 4.01088513975107e-06, "loss": 1.0018, "step": 3596 }, { "epoch": 4.402692778457772, "grad_norm": 1.1706397650678757, "learning_rate": 4.01037449102579e-06, "loss": 0.6053, "step": 3597 }, { "epoch": 4.403916768665851, "grad_norm": 1.2810271140811518, "learning_rate": 4.009863743044287e-06, "loss": 0.3338, "step": 3598 }, { "epoch": 4.405140758873929, "grad_norm": 0.9004097764031004, "learning_rate": 4.009352895840122e-06, "loss": 0.6275, "step": 3599 }, { "epoch": 4.406364749082007, "grad_norm": 0.7414546760690397, "learning_rate": 4.008841949446869e-06, "loss": 0.5377, "step": 3600 }, { "epoch": 4.407588739290086, "grad_norm": 1.1185530095191238, "learning_rate": 4.0083309038981025e-06, "loss": 0.3229, "step": 3601 }, { "epoch": 4.408812729498164, "grad_norm": 1.5488588709265267, "learning_rate": 4.007819759227408e-06, "loss": 0.4197, "step": 3602 }, { "epoch": 4.410036719706242, "grad_norm": 1.2784032862781616, "learning_rate": 4.007308515468376e-06, "loss": 0.6156, "step": 3603 }, { "epoch": 4.411260709914321, "grad_norm": 1.261201744643915, "learning_rate": 4.006797172654605e-06, "loss": 0.6823, "step": 3604 }, { "epoch": 4.412484700122399, "grad_norm": 1.3418589445815, "learning_rate": 4.006285730819696e-06, "loss": 1.6504, "step": 3605 }, { "epoch": 4.413708690330477, "grad_norm": 1.039389779978606, "learning_rate": 4.0057741899972594e-06, "loss": 0.596, "step": 3606 }, { "epoch": 4.414932680538556, "grad_norm": 1.886447070364013, "learning_rate": 4.005262550220913e-06, "loss": 0.5346, "step": 3607 }, { "epoch": 4.416156670746634, "grad_norm": 1.5473484636639312, "learning_rate": 4.00475081152428e-06, "loss": 0.4917, "step": 3608 }, { "epoch": 4.417380660954713, "grad_norm": 1.4444151557110954, "learning_rate": 4.00423897394099e-06, "loss": 0.5257, "step": 3609 }, { "epoch": 4.4186046511627906, "grad_norm": 1.661021306496762, "learning_rate": 4.003727037504676e-06, "loss": 0.8655, "step": 3610 }, { "epoch": 4.419828641370869, "grad_norm": 1.5116152651407901, "learning_rate": 4.0032150022489845e-06, "loss": 0.5933, "step": 3611 }, { "epoch": 4.421052631578947, "grad_norm": 1.3704728439465184, "learning_rate": 4.002702868207563e-06, "loss": 0.7896, "step": 3612 }, { "epoch": 4.4222766217870255, "grad_norm": 1.2737318254619006, "learning_rate": 4.002190635414067e-06, "loss": 1.588, "step": 3613 }, { "epoch": 4.423500611995104, "grad_norm": 1.9018429020882703, "learning_rate": 4.001678303902159e-06, "loss": 1.037, "step": 3614 }, { "epoch": 4.424724602203183, "grad_norm": 1.5761670495683102, "learning_rate": 4.001165873705507e-06, "loss": 0.6183, "step": 3615 }, { "epoch": 4.4259485924112605, "grad_norm": 1.5197640414387439, "learning_rate": 4.000653344857786e-06, "loss": 0.5781, "step": 3616 }, { "epoch": 4.427172582619339, "grad_norm": 1.4745453168520009, "learning_rate": 4.000140717392678e-06, "loss": 0.7998, "step": 3617 }, { "epoch": 4.428396572827418, "grad_norm": 1.817922589326754, "learning_rate": 3.99962799134387e-06, "loss": 1.0689, "step": 3618 }, { "epoch": 4.4296205630354955, "grad_norm": 1.968291880513057, "learning_rate": 3.9991151667450575e-06, "loss": 0.263, "step": 3619 }, { "epoch": 4.430844553243574, "grad_norm": 1.4127543324676335, "learning_rate": 3.998602243629942e-06, "loss": 1.1487, "step": 3620 }, { "epoch": 4.432068543451653, "grad_norm": 1.0295903161935245, "learning_rate": 3.998089222032229e-06, "loss": 0.6157, "step": 3621 }, { "epoch": 4.43329253365973, "grad_norm": 1.3148182471602252, "learning_rate": 3.997576101985633e-06, "loss": 1.2332, "step": 3622 }, { "epoch": 4.434516523867809, "grad_norm": 1.4585680798062761, "learning_rate": 3.997062883523876e-06, "loss": 0.6763, "step": 3623 }, { "epoch": 4.435740514075888, "grad_norm": 1.3542433363694415, "learning_rate": 3.996549566680682e-06, "loss": 0.5024, "step": 3624 }, { "epoch": 4.436964504283965, "grad_norm": 1.3869931546937466, "learning_rate": 3.996036151489787e-06, "loss": 0.786, "step": 3625 }, { "epoch": 4.438188494492044, "grad_norm": 1.770040399791114, "learning_rate": 3.995522637984929e-06, "loss": 0.3731, "step": 3626 }, { "epoch": 4.439412484700123, "grad_norm": 1.15622384786685, "learning_rate": 3.995009026199855e-06, "loss": 0.8507, "step": 3627 }, { "epoch": 4.4406364749082, "grad_norm": 0.8362963696211706, "learning_rate": 3.994495316168317e-06, "loss": 0.64, "step": 3628 }, { "epoch": 4.441860465116279, "grad_norm": 1.1124488127464958, "learning_rate": 3.993981507924076e-06, "loss": 1.1486, "step": 3629 }, { "epoch": 4.443084455324358, "grad_norm": 2.10012908019713, "learning_rate": 3.993467601500895e-06, "loss": 0.5542, "step": 3630 }, { "epoch": 4.444308445532435, "grad_norm": 1.1256433187938075, "learning_rate": 3.992953596932547e-06, "loss": 0.9638, "step": 3631 }, { "epoch": 4.445532435740514, "grad_norm": 1.9763330937865309, "learning_rate": 3.992439494252811e-06, "loss": 0.4089, "step": 3632 }, { "epoch": 4.4467564259485926, "grad_norm": 1.2676336801101187, "learning_rate": 3.991925293495472e-06, "loss": 0.5719, "step": 3633 }, { "epoch": 4.447980416156671, "grad_norm": 1.0898721623126888, "learning_rate": 3.991410994694321e-06, "loss": 0.7293, "step": 3634 }, { "epoch": 4.449204406364749, "grad_norm": 1.1974059705846383, "learning_rate": 3.990896597883155e-06, "loss": 0.494, "step": 3635 }, { "epoch": 4.4504283965728275, "grad_norm": 1.1514282781753624, "learning_rate": 3.99038210309578e-06, "loss": 0.4682, "step": 3636 }, { "epoch": 4.451652386780906, "grad_norm": 1.2575677823320714, "learning_rate": 3.989867510366005e-06, "loss": 0.3479, "step": 3637 }, { "epoch": 4.452876376988984, "grad_norm": 1.7847863323387223, "learning_rate": 3.989352819727648e-06, "loss": 1.0706, "step": 3638 }, { "epoch": 4.4541003671970625, "grad_norm": 1.1090017349857237, "learning_rate": 3.9888380312145325e-06, "loss": 0.6055, "step": 3639 }, { "epoch": 4.455324357405141, "grad_norm": 0.9335024202340734, "learning_rate": 3.988323144860489e-06, "loss": 0.5889, "step": 3640 }, { "epoch": 4.456548347613219, "grad_norm": 1.8505894156199396, "learning_rate": 3.987808160699352e-06, "loss": 0.9464, "step": 3641 }, { "epoch": 4.4577723378212974, "grad_norm": 1.3310560935409077, "learning_rate": 3.987293078764967e-06, "loss": 0.696, "step": 3642 }, { "epoch": 4.458996328029376, "grad_norm": 0.9083108131637816, "learning_rate": 3.986777899091181e-06, "loss": 0.6771, "step": 3643 }, { "epoch": 4.460220318237454, "grad_norm": 1.0905031357219217, "learning_rate": 3.986262621711851e-06, "loss": 0.7395, "step": 3644 }, { "epoch": 4.461444308445532, "grad_norm": 1.6027956660567728, "learning_rate": 3.9857472466608385e-06, "loss": 0.4268, "step": 3645 }, { "epoch": 4.462668298653611, "grad_norm": 0.9896148563635758, "learning_rate": 3.985231773972013e-06, "loss": 1.0058, "step": 3646 }, { "epoch": 4.463892288861689, "grad_norm": 1.141940114086837, "learning_rate": 3.984716203679249e-06, "loss": 0.6109, "step": 3647 }, { "epoch": 4.465116279069767, "grad_norm": 1.6822239158745074, "learning_rate": 3.984200535816427e-06, "loss": 0.4556, "step": 3648 }, { "epoch": 4.466340269277846, "grad_norm": 1.2718344533625352, "learning_rate": 3.983684770417435e-06, "loss": 1.3969, "step": 3649 }, { "epoch": 4.467564259485924, "grad_norm": 1.5064282310821562, "learning_rate": 3.9831689075161685e-06, "loss": 1.1701, "step": 3650 }, { "epoch": 4.468788249694002, "grad_norm": 1.9655783002905527, "learning_rate": 3.982652947146528e-06, "loss": 0.6239, "step": 3651 }, { "epoch": 4.470012239902081, "grad_norm": 0.7575334637802201, "learning_rate": 3.982136889342418e-06, "loss": 0.5664, "step": 3652 }, { "epoch": 4.47123623011016, "grad_norm": 1.105158793899495, "learning_rate": 3.981620734137754e-06, "loss": 0.5987, "step": 3653 }, { "epoch": 4.472460220318237, "grad_norm": 1.5281857008302024, "learning_rate": 3.981104481566457e-06, "loss": 0.4535, "step": 3654 }, { "epoch": 4.473684210526316, "grad_norm": 1.427785779713977, "learning_rate": 3.980588131662451e-06, "loss": 1.1497, "step": 3655 }, { "epoch": 4.474908200734394, "grad_norm": 1.1652391245463396, "learning_rate": 3.980071684459669e-06, "loss": 1.5117, "step": 3656 }, { "epoch": 4.476132190942472, "grad_norm": 1.747039309138808, "learning_rate": 3.979555139992049e-06, "loss": 0.7611, "step": 3657 }, { "epoch": 4.477356181150551, "grad_norm": 1.8283568115877316, "learning_rate": 3.97903849829354e-06, "loss": 0.5202, "step": 3658 }, { "epoch": 4.4785801713586295, "grad_norm": 0.8718840053301404, "learning_rate": 3.978521759398091e-06, "loss": 0.6008, "step": 3659 }, { "epoch": 4.479804161566707, "grad_norm": 2.0532145528278765, "learning_rate": 3.9780049233396586e-06, "loss": 0.5861, "step": 3660 }, { "epoch": 4.481028151774786, "grad_norm": 1.2603144348795186, "learning_rate": 3.977487990152212e-06, "loss": 0.6706, "step": 3661 }, { "epoch": 4.4822521419828645, "grad_norm": 1.33611713037046, "learning_rate": 3.976970959869718e-06, "loss": 0.4401, "step": 3662 }, { "epoch": 4.483476132190942, "grad_norm": 1.1746110037926547, "learning_rate": 3.976453832526156e-06, "loss": 0.6388, "step": 3663 }, { "epoch": 4.484700122399021, "grad_norm": 0.9883032434439264, "learning_rate": 3.9759366081555095e-06, "loss": 0.3938, "step": 3664 }, { "epoch": 4.4859241126070994, "grad_norm": 1.1445576484014797, "learning_rate": 3.975419286791767e-06, "loss": 0.5351, "step": 3665 }, { "epoch": 4.487148102815177, "grad_norm": 1.4285398907907503, "learning_rate": 3.974901868468928e-06, "loss": 0.7645, "step": 3666 }, { "epoch": 4.488372093023256, "grad_norm": 1.011930758837756, "learning_rate": 3.9743843532209925e-06, "loss": 0.435, "step": 3667 }, { "epoch": 4.489596083231334, "grad_norm": 0.8869831204927647, "learning_rate": 3.9738667410819705e-06, "loss": 0.5305, "step": 3668 }, { "epoch": 4.490820073439412, "grad_norm": 1.1461834333010872, "learning_rate": 3.973349032085878e-06, "loss": 1.2793, "step": 3669 }, { "epoch": 4.492044063647491, "grad_norm": 1.1088947583350852, "learning_rate": 3.9728312262667365e-06, "loss": 0.8117, "step": 3670 }, { "epoch": 4.493268053855569, "grad_norm": 0.8015518807299583, "learning_rate": 3.972313323658574e-06, "loss": 0.5926, "step": 3671 }, { "epoch": 4.494492044063647, "grad_norm": 1.390417370387761, "learning_rate": 3.971795324295427e-06, "loss": 0.5398, "step": 3672 }, { "epoch": 4.495716034271726, "grad_norm": 1.5118547577608668, "learning_rate": 3.971277228211333e-06, "loss": 1.2458, "step": 3673 }, { "epoch": 4.496940024479804, "grad_norm": 2.089139951291609, "learning_rate": 3.970759035440343e-06, "loss": 1.0851, "step": 3674 }, { "epoch": 4.498164014687882, "grad_norm": 1.2678083607450439, "learning_rate": 3.970240746016509e-06, "loss": 0.743, "step": 3675 }, { "epoch": 4.499388004895961, "grad_norm": 1.44325398664822, "learning_rate": 3.969722359973891e-06, "loss": 0.6487, "step": 3676 }, { "epoch": 4.500611995104039, "grad_norm": 1.3287037094770242, "learning_rate": 3.9692038773465555e-06, "loss": 0.4548, "step": 3677 }, { "epoch": 4.501835985312118, "grad_norm": 1.0799204115906897, "learning_rate": 3.968685298168575e-06, "loss": 0.61, "step": 3678 }, { "epoch": 4.503059975520196, "grad_norm": 2.172717161569549, "learning_rate": 3.96816662247403e-06, "loss": 0.7548, "step": 3679 }, { "epoch": 4.504283965728274, "grad_norm": 1.4032650146384935, "learning_rate": 3.967647850297004e-06, "loss": 0.5352, "step": 3680 }, { "epoch": 4.505507955936353, "grad_norm": 1.2932785544942034, "learning_rate": 3.96712898167159e-06, "loss": 1.035, "step": 3681 }, { "epoch": 4.506731946144431, "grad_norm": 1.5527720864591728, "learning_rate": 3.966610016631886e-06, "loss": 0.6481, "step": 3682 }, { "epoch": 4.507955936352509, "grad_norm": 0.7342824994726488, "learning_rate": 3.966090955211997e-06, "loss": 0.4623, "step": 3683 }, { "epoch": 4.509179926560588, "grad_norm": 1.2537774446187413, "learning_rate": 3.965571797446031e-06, "loss": 1.0461, "step": 3684 }, { "epoch": 4.510403916768666, "grad_norm": 1.642761420370547, "learning_rate": 3.965052543368109e-06, "loss": 0.4699, "step": 3685 }, { "epoch": 4.511627906976744, "grad_norm": 1.2744274405932199, "learning_rate": 3.9645331930123535e-06, "loss": 0.4274, "step": 3686 }, { "epoch": 4.512851897184823, "grad_norm": 2.2963281559593125, "learning_rate": 3.964013746412892e-06, "loss": 0.4186, "step": 3687 }, { "epoch": 4.5140758873929006, "grad_norm": 2.3260734224517488, "learning_rate": 3.963494203603863e-06, "loss": 0.7486, "step": 3688 }, { "epoch": 4.515299877600979, "grad_norm": 1.043860105498053, "learning_rate": 3.962974564619407e-06, "loss": 0.5111, "step": 3689 }, { "epoch": 4.516523867809058, "grad_norm": 0.847787523239775, "learning_rate": 3.962454829493674e-06, "loss": 0.5321, "step": 3690 }, { "epoch": 4.5177478580171355, "grad_norm": 1.3146116430504347, "learning_rate": 3.9619349982608195e-06, "loss": 0.7516, "step": 3691 }, { "epoch": 4.518971848225214, "grad_norm": 1.3034744796890867, "learning_rate": 3.961415070955004e-06, "loss": 0.6528, "step": 3692 }, { "epoch": 4.520195838433293, "grad_norm": 1.8146447921750692, "learning_rate": 3.960895047610395e-06, "loss": 0.4818, "step": 3693 }, { "epoch": 4.5214198286413705, "grad_norm": 0.8947358107939535, "learning_rate": 3.960374928261167e-06, "loss": 1.0634, "step": 3694 }, { "epoch": 4.522643818849449, "grad_norm": 1.935443718070234, "learning_rate": 3.959854712941501e-06, "loss": 0.9922, "step": 3695 }, { "epoch": 4.523867809057528, "grad_norm": 1.2932376877380862, "learning_rate": 3.959334401685583e-06, "loss": 0.45, "step": 3696 }, { "epoch": 4.525091799265606, "grad_norm": 1.0474783200071196, "learning_rate": 3.958813994527604e-06, "loss": 0.3834, "step": 3697 }, { "epoch": 4.526315789473684, "grad_norm": 1.2057585543058231, "learning_rate": 3.958293491501767e-06, "loss": 1.0805, "step": 3698 }, { "epoch": 4.527539779681763, "grad_norm": 1.663606794675035, "learning_rate": 3.9577728926422745e-06, "loss": 0.6885, "step": 3699 }, { "epoch": 4.52876376988984, "grad_norm": 1.3626904648788982, "learning_rate": 3.95725219798334e-06, "loss": 1.5006, "step": 3700 }, { "epoch": 4.529987760097919, "grad_norm": 2.0576813548332966, "learning_rate": 3.956731407559181e-06, "loss": 0.5351, "step": 3701 }, { "epoch": 4.531211750305998, "grad_norm": 1.6011797157249987, "learning_rate": 3.956210521404021e-06, "loss": 1.2377, "step": 3702 }, { "epoch": 4.532435740514076, "grad_norm": 2.5595426565274826, "learning_rate": 3.955689539552093e-06, "loss": 0.8136, "step": 3703 }, { "epoch": 4.533659730722154, "grad_norm": 1.077123640043357, "learning_rate": 3.955168462037632e-06, "loss": 1.3071, "step": 3704 }, { "epoch": 4.534883720930233, "grad_norm": 1.6483113457834127, "learning_rate": 3.9546472888948825e-06, "loss": 0.5648, "step": 3705 }, { "epoch": 4.536107711138311, "grad_norm": 1.1620760502047276, "learning_rate": 3.9541260201580935e-06, "loss": 1.0131, "step": 3706 }, { "epoch": 4.537331701346389, "grad_norm": 1.1735105288903187, "learning_rate": 3.953604655861521e-06, "loss": 0.7584, "step": 3707 }, { "epoch": 4.538555691554468, "grad_norm": 1.1327440399807838, "learning_rate": 3.953083196039426e-06, "loss": 0.4971, "step": 3708 }, { "epoch": 4.539779681762546, "grad_norm": 1.505653623243857, "learning_rate": 3.952561640726079e-06, "loss": 0.8068, "step": 3709 }, { "epoch": 4.541003671970624, "grad_norm": 1.7938754062355948, "learning_rate": 3.952039989955753e-06, "loss": 0.5212, "step": 3710 }, { "epoch": 4.5422276621787026, "grad_norm": 0.8886713087598161, "learning_rate": 3.95151824376273e-06, "loss": 0.5658, "step": 3711 }, { "epoch": 4.543451652386781, "grad_norm": 1.683199939295174, "learning_rate": 3.950996402181297e-06, "loss": 0.4225, "step": 3712 }, { "epoch": 4.544675642594859, "grad_norm": 1.0390369755137348, "learning_rate": 3.950474465245746e-06, "loss": 0.5781, "step": 3713 }, { "epoch": 4.5458996328029375, "grad_norm": 1.3481795463395334, "learning_rate": 3.949952432990379e-06, "loss": 0.5726, "step": 3714 }, { "epoch": 4.547123623011016, "grad_norm": 1.6910199693247343, "learning_rate": 3.949430305449501e-06, "loss": 0.5175, "step": 3715 }, { "epoch": 4.548347613219094, "grad_norm": 1.5392987081630878, "learning_rate": 3.9489080826574245e-06, "loss": 0.8514, "step": 3716 }, { "epoch": 4.5495716034271725, "grad_norm": 0.8336592507440458, "learning_rate": 3.948385764648468e-06, "loss": 0.5237, "step": 3717 }, { "epoch": 4.550795593635251, "grad_norm": 1.0636381659524086, "learning_rate": 3.947863351456956e-06, "loss": 0.3582, "step": 3718 }, { "epoch": 4.552019583843329, "grad_norm": 1.0992476842434735, "learning_rate": 3.94734084311722e-06, "loss": 1.1818, "step": 3719 }, { "epoch": 4.5532435740514074, "grad_norm": 1.431149950806469, "learning_rate": 3.946818239663598e-06, "loss": 0.4225, "step": 3720 }, { "epoch": 4.554467564259486, "grad_norm": 2.0632024548857557, "learning_rate": 3.946295541130432e-06, "loss": 0.8785, "step": 3721 }, { "epoch": 4.555691554467565, "grad_norm": 1.0451388694186405, "learning_rate": 3.945772747552072e-06, "loss": 1.1857, "step": 3722 }, { "epoch": 4.556915544675642, "grad_norm": 1.765309704737816, "learning_rate": 3.945249858962874e-06, "loss": 0.5794, "step": 3723 }, { "epoch": 4.558139534883721, "grad_norm": 1.0248451296827104, "learning_rate": 3.944726875397202e-06, "loss": 0.711, "step": 3724 }, { "epoch": 4.5593635250918, "grad_norm": 1.0354915690311854, "learning_rate": 3.944203796889424e-06, "loss": 0.9002, "step": 3725 }, { "epoch": 4.560587515299877, "grad_norm": 1.4392051955646605, "learning_rate": 3.943680623473913e-06, "loss": 0.6361, "step": 3726 }, { "epoch": 4.561811505507956, "grad_norm": 1.0365845466261265, "learning_rate": 3.943157355185052e-06, "loss": 0.4736, "step": 3727 }, { "epoch": 4.563035495716035, "grad_norm": 1.3884461773828494, "learning_rate": 3.942633992057227e-06, "loss": 0.6765, "step": 3728 }, { "epoch": 4.564259485924112, "grad_norm": 0.953546995569663, "learning_rate": 3.9421105341248326e-06, "loss": 0.5675, "step": 3729 }, { "epoch": 4.565483476132191, "grad_norm": 0.9598530907743082, "learning_rate": 3.941586981422268e-06, "loss": 0.799, "step": 3730 }, { "epoch": 4.56670746634027, "grad_norm": 2.5541100858255854, "learning_rate": 3.941063333983937e-06, "loss": 0.4791, "step": 3731 }, { "epoch": 4.567931456548347, "grad_norm": 0.9892782420166751, "learning_rate": 3.9405395918442555e-06, "loss": 0.3388, "step": 3732 }, { "epoch": 4.569155446756426, "grad_norm": 1.0580425809732712, "learning_rate": 3.9400157550376405e-06, "loss": 0.702, "step": 3733 }, { "epoch": 4.5703794369645045, "grad_norm": 0.9606595526163828, "learning_rate": 3.939491823598516e-06, "loss": 0.7053, "step": 3734 }, { "epoch": 4.571603427172582, "grad_norm": 1.901723517022399, "learning_rate": 3.9389677975613125e-06, "loss": 0.5637, "step": 3735 }, { "epoch": 4.572827417380661, "grad_norm": 2.0161764481894364, "learning_rate": 3.938443676960468e-06, "loss": 0.4125, "step": 3736 }, { "epoch": 4.5740514075887395, "grad_norm": 1.0267248598265035, "learning_rate": 3.937919461830426e-06, "loss": 0.7449, "step": 3737 }, { "epoch": 4.575275397796817, "grad_norm": 1.652689948333433, "learning_rate": 3.937395152205635e-06, "loss": 0.4145, "step": 3738 }, { "epoch": 4.576499388004896, "grad_norm": 2.6069325541103323, "learning_rate": 3.936870748120551e-06, "loss": 0.5529, "step": 3739 }, { "epoch": 4.5777233782129745, "grad_norm": 1.2912407266783907, "learning_rate": 3.936346249609637e-06, "loss": 0.4235, "step": 3740 }, { "epoch": 4.578947368421053, "grad_norm": 0.7648604980040093, "learning_rate": 3.935821656707359e-06, "loss": 0.6173, "step": 3741 }, { "epoch": 4.580171358629131, "grad_norm": 1.2345978671920412, "learning_rate": 3.935296969448194e-06, "loss": 0.4448, "step": 3742 }, { "epoch": 4.5813953488372094, "grad_norm": 0.8770647907634886, "learning_rate": 3.93477218786662e-06, "loss": 1.2236, "step": 3743 }, { "epoch": 4.582619339045287, "grad_norm": 1.1760844731339164, "learning_rate": 3.934247311997124e-06, "loss": 0.5542, "step": 3744 }, { "epoch": 4.583843329253366, "grad_norm": 1.071643161927744, "learning_rate": 3.9337223418742e-06, "loss": 0.5892, "step": 3745 }, { "epoch": 4.585067319461444, "grad_norm": 1.802753013838483, "learning_rate": 3.9331972775323476e-06, "loss": 0.6547, "step": 3746 }, { "epoch": 4.586291309669523, "grad_norm": 2.0056137507570395, "learning_rate": 3.93267211900607e-06, "loss": 0.5155, "step": 3747 }, { "epoch": 4.587515299877601, "grad_norm": 0.8851447932318773, "learning_rate": 3.93214686632988e-06, "loss": 0.6979, "step": 3748 }, { "epoch": 4.588739290085679, "grad_norm": 0.8159019297972027, "learning_rate": 3.931621519538296e-06, "loss": 0.621, "step": 3749 }, { "epoch": 4.589963280293758, "grad_norm": 1.0239211761741196, "learning_rate": 3.93109607866584e-06, "loss": 0.7306, "step": 3750 }, { "epoch": 4.591187270501836, "grad_norm": 1.4856012518209956, "learning_rate": 3.930570543747043e-06, "loss": 0.4634, "step": 3751 }, { "epoch": 4.592411260709914, "grad_norm": 1.1101546374249718, "learning_rate": 3.930044914816441e-06, "loss": 1.2635, "step": 3752 }, { "epoch": 4.593635250917993, "grad_norm": 1.1751828589349984, "learning_rate": 3.929519191908578e-06, "loss": 1.2011, "step": 3753 }, { "epoch": 4.594859241126071, "grad_norm": 1.3060102169577883, "learning_rate": 3.928993375057999e-06, "loss": 0.5617, "step": 3754 }, { "epoch": 4.596083231334149, "grad_norm": 2.203000550304208, "learning_rate": 3.928467464299263e-06, "loss": 0.8541, "step": 3755 }, { "epoch": 4.597307221542228, "grad_norm": 0.8978490628306895, "learning_rate": 3.927941459666927e-06, "loss": 0.6787, "step": 3756 }, { "epoch": 4.598531211750306, "grad_norm": 2.679472930667635, "learning_rate": 3.927415361195561e-06, "loss": 0.4427, "step": 3757 }, { "epoch": 4.599755201958384, "grad_norm": 1.5315774002188678, "learning_rate": 3.9268891689197365e-06, "loss": 0.7756, "step": 3758 }, { "epoch": 4.600979192166463, "grad_norm": 1.324761516294892, "learning_rate": 3.926362882874034e-06, "loss": 1.1423, "step": 3759 }, { "epoch": 4.602203182374541, "grad_norm": 1.0106329603589588, "learning_rate": 3.925836503093038e-06, "loss": 0.6599, "step": 3760 }, { "epoch": 4.603427172582619, "grad_norm": 1.981326883737026, "learning_rate": 3.925310029611341e-06, "loss": 0.8532, "step": 3761 }, { "epoch": 4.604651162790698, "grad_norm": 0.7405515538735531, "learning_rate": 3.924783462463541e-06, "loss": 0.6046, "step": 3762 }, { "epoch": 4.605875152998776, "grad_norm": 1.1404713669513582, "learning_rate": 3.924256801684242e-06, "loss": 0.5544, "step": 3763 }, { "epoch": 4.607099143206854, "grad_norm": 1.562275015438096, "learning_rate": 3.923730047308054e-06, "loss": 0.4066, "step": 3764 }, { "epoch": 4.608323133414933, "grad_norm": 0.9634286364947997, "learning_rate": 3.923203199369592e-06, "loss": 0.4353, "step": 3765 }, { "epoch": 4.6095471236230114, "grad_norm": 1.7607232278799965, "learning_rate": 3.922676257903481e-06, "loss": 0.2844, "step": 3766 }, { "epoch": 4.610771113831089, "grad_norm": 1.2184257451697964, "learning_rate": 3.922149222944348e-06, "loss": 0.6086, "step": 3767 }, { "epoch": 4.611995104039168, "grad_norm": 1.6866135450030748, "learning_rate": 3.921622094526829e-06, "loss": 0.5347, "step": 3768 }, { "epoch": 4.613219094247246, "grad_norm": 2.1251933094162347, "learning_rate": 3.921094872685563e-06, "loss": 0.4309, "step": 3769 }, { "epoch": 4.614443084455324, "grad_norm": 0.7802673236884407, "learning_rate": 3.920567557455199e-06, "loss": 0.6191, "step": 3770 }, { "epoch": 4.615667074663403, "grad_norm": 1.3465055840840237, "learning_rate": 3.920040148870389e-06, "loss": 1.4003, "step": 3771 }, { "epoch": 4.616891064871481, "grad_norm": 1.5936264741570534, "learning_rate": 3.919512646965793e-06, "loss": 0.4671, "step": 3772 }, { "epoch": 4.618115055079559, "grad_norm": 1.3388562485255457, "learning_rate": 3.9189850517760755e-06, "loss": 1.5271, "step": 3773 }, { "epoch": 4.619339045287638, "grad_norm": 1.41192148376063, "learning_rate": 3.91845736333591e-06, "loss": 0.5279, "step": 3774 }, { "epoch": 4.620563035495716, "grad_norm": 1.9685838160983313, "learning_rate": 3.9179295816799725e-06, "loss": 0.5057, "step": 3775 }, { "epoch": 4.621787025703794, "grad_norm": 1.8577213596355762, "learning_rate": 3.9174017068429486e-06, "loss": 0.9152, "step": 3776 }, { "epoch": 4.623011015911873, "grad_norm": 1.162891813606725, "learning_rate": 3.916873738859526e-06, "loss": 0.6304, "step": 3777 }, { "epoch": 4.624235006119951, "grad_norm": 1.3243308572738661, "learning_rate": 3.916345677764401e-06, "loss": 0.4883, "step": 3778 }, { "epoch": 4.625458996328029, "grad_norm": 0.8698987944725893, "learning_rate": 3.915817523592279e-06, "loss": 0.7715, "step": 3779 }, { "epoch": 4.626682986536108, "grad_norm": 0.8077517404600526, "learning_rate": 3.915289276377864e-06, "loss": 0.6298, "step": 3780 }, { "epoch": 4.627906976744186, "grad_norm": 0.9646245301985371, "learning_rate": 3.9147609361558735e-06, "loss": 0.8723, "step": 3781 }, { "epoch": 4.629130966952264, "grad_norm": 1.2027882582452825, "learning_rate": 3.914232502961028e-06, "loss": 1.2031, "step": 3782 }, { "epoch": 4.630354957160343, "grad_norm": 1.4166088322268275, "learning_rate": 3.913703976828052e-06, "loss": 0.4386, "step": 3783 }, { "epoch": 4.631578947368421, "grad_norm": 0.6544665548425483, "learning_rate": 3.91317535779168e-06, "loss": 0.369, "step": 3784 }, { "epoch": 4.6328029375765, "grad_norm": 1.7588369038288398, "learning_rate": 3.9126466458866505e-06, "loss": 0.9484, "step": 3785 }, { "epoch": 4.634026927784578, "grad_norm": 1.2416029736019707, "learning_rate": 3.912117841147708e-06, "loss": 0.6586, "step": 3786 }, { "epoch": 4.635250917992656, "grad_norm": 1.3089220838754938, "learning_rate": 3.911588943609605e-06, "loss": 0.5227, "step": 3787 }, { "epoch": 4.636474908200734, "grad_norm": 1.4641395116952016, "learning_rate": 3.911059953307097e-06, "loss": 0.7739, "step": 3788 }, { "epoch": 4.6376988984088126, "grad_norm": 1.1245421015045387, "learning_rate": 3.910530870274948e-06, "loss": 0.497, "step": 3789 }, { "epoch": 4.638922888616891, "grad_norm": 0.8102663763324813, "learning_rate": 3.910001694547928e-06, "loss": 0.661, "step": 3790 }, { "epoch": 4.64014687882497, "grad_norm": 1.692354711196083, "learning_rate": 3.909472426160811e-06, "loss": 0.5096, "step": 3791 }, { "epoch": 4.6413708690330475, "grad_norm": 0.9964318055290455, "learning_rate": 3.9089430651483805e-06, "loss": 0.5548, "step": 3792 }, { "epoch": 4.642594859241126, "grad_norm": 1.1782990203874486, "learning_rate": 3.9084136115454215e-06, "loss": 0.4722, "step": 3793 }, { "epoch": 4.643818849449205, "grad_norm": 1.8618496417329857, "learning_rate": 3.907884065386731e-06, "loss": 0.7328, "step": 3794 }, { "epoch": 4.6450428396572825, "grad_norm": 1.2565480472429427, "learning_rate": 3.907354426707106e-06, "loss": 1.185, "step": 3795 }, { "epoch": 4.646266829865361, "grad_norm": 1.4883315744333285, "learning_rate": 3.906824695541354e-06, "loss": 1.0775, "step": 3796 }, { "epoch": 4.64749082007344, "grad_norm": 1.8707060772347006, "learning_rate": 3.9062948719242865e-06, "loss": 0.7263, "step": 3797 }, { "epoch": 4.6487148102815175, "grad_norm": 1.5309523598488055, "learning_rate": 3.905764955890722e-06, "loss": 0.8897, "step": 3798 }, { "epoch": 4.649938800489596, "grad_norm": 0.997541393758724, "learning_rate": 3.905234947475483e-06, "loss": 1.1194, "step": 3799 }, { "epoch": 4.651162790697675, "grad_norm": 1.009438255607341, "learning_rate": 3.904704846713402e-06, "loss": 1.0694, "step": 3800 }, { "epoch": 4.652386780905752, "grad_norm": 2.09385381330016, "learning_rate": 3.9041746536393135e-06, "loss": 0.5636, "step": 3801 }, { "epoch": 4.653610771113831, "grad_norm": 0.7958581360915951, "learning_rate": 3.90364436828806e-06, "loss": 0.7355, "step": 3802 }, { "epoch": 4.65483476132191, "grad_norm": 1.77805467109313, "learning_rate": 3.903113990694492e-06, "loss": 0.5359, "step": 3803 }, { "epoch": 4.656058751529987, "grad_norm": 1.128435374071368, "learning_rate": 3.902583520893461e-06, "loss": 1.3505, "step": 3804 }, { "epoch": 4.657282741738066, "grad_norm": 0.9890765890485449, "learning_rate": 3.902052958919829e-06, "loss": 0.5789, "step": 3805 }, { "epoch": 4.658506731946145, "grad_norm": 1.3703466172787675, "learning_rate": 3.901522304808462e-06, "loss": 0.4104, "step": 3806 }, { "epoch": 4.659730722154222, "grad_norm": 1.538072819978701, "learning_rate": 3.900991558594232e-06, "loss": 0.5973, "step": 3807 }, { "epoch": 4.660954712362301, "grad_norm": 2.0969990605358197, "learning_rate": 3.900460720312021e-06, "loss": 1.1096, "step": 3808 }, { "epoch": 4.66217870257038, "grad_norm": 2.497257078687693, "learning_rate": 3.8999297899967105e-06, "loss": 0.5429, "step": 3809 }, { "epoch": 4.663402692778458, "grad_norm": 1.1208995632273182, "learning_rate": 3.899398767683191e-06, "loss": 1.3417, "step": 3810 }, { "epoch": 4.664626682986536, "grad_norm": 1.2771706805120748, "learning_rate": 3.898867653406363e-06, "loss": 0.5226, "step": 3811 }, { "epoch": 4.6658506731946146, "grad_norm": 1.2675047971271205, "learning_rate": 3.898336447201125e-06, "loss": 0.7905, "step": 3812 }, { "epoch": 4.667074663402692, "grad_norm": 1.7065910600520846, "learning_rate": 3.897805149102389e-06, "loss": 0.5508, "step": 3813 }, { "epoch": 4.668298653610771, "grad_norm": 0.835605501742259, "learning_rate": 3.897273759145067e-06, "loss": 0.8629, "step": 3814 }, { "epoch": 4.6695226438188495, "grad_norm": 0.6283904767990558, "learning_rate": 3.896742277364083e-06, "loss": 0.448, "step": 3815 }, { "epoch": 4.670746634026928, "grad_norm": 1.1716451379141146, "learning_rate": 3.896210703794362e-06, "loss": 1.336, "step": 3816 }, { "epoch": 4.671970624235006, "grad_norm": 1.4711902521517402, "learning_rate": 3.895679038470839e-06, "loss": 0.5619, "step": 3817 }, { "epoch": 4.6731946144430845, "grad_norm": 1.2724571075581546, "learning_rate": 3.8951472814284505e-06, "loss": 0.3338, "step": 3818 }, { "epoch": 4.674418604651163, "grad_norm": 1.3299251487765837, "learning_rate": 3.894615432702144e-06, "loss": 0.487, "step": 3819 }, { "epoch": 4.675642594859241, "grad_norm": 0.9860912540511926, "learning_rate": 3.894083492326869e-06, "loss": 0.5847, "step": 3820 }, { "epoch": 4.6768665850673194, "grad_norm": 1.1099945865516154, "learning_rate": 3.893551460337583e-06, "loss": 0.716, "step": 3821 }, { "epoch": 4.678090575275398, "grad_norm": 1.4025723920396533, "learning_rate": 3.8930193367692496e-06, "loss": 0.4805, "step": 3822 }, { "epoch": 4.679314565483476, "grad_norm": 1.4315753124567552, "learning_rate": 3.892487121656838e-06, "loss": 0.5485, "step": 3823 }, { "epoch": 4.680538555691554, "grad_norm": 1.3602505821971216, "learning_rate": 3.891954815035324e-06, "loss": 0.642, "step": 3824 }, { "epoch": 4.681762545899633, "grad_norm": 1.612372094925039, "learning_rate": 3.891422416939686e-06, "loss": 1.2251, "step": 3825 }, { "epoch": 4.682986536107711, "grad_norm": 0.9606976486392892, "learning_rate": 3.890889927404915e-06, "loss": 0.6647, "step": 3826 }, { "epoch": 4.684210526315789, "grad_norm": 0.9908076928923341, "learning_rate": 3.890357346466001e-06, "loss": 0.5837, "step": 3827 }, { "epoch": 4.685434516523868, "grad_norm": 1.9272653369729187, "learning_rate": 3.889824674157947e-06, "loss": 0.6451, "step": 3828 }, { "epoch": 4.686658506731947, "grad_norm": 0.8244525430795184, "learning_rate": 3.889291910515755e-06, "loss": 0.6205, "step": 3829 }, { "epoch": 4.687882496940024, "grad_norm": 2.2523050830263704, "learning_rate": 3.888759055574437e-06, "loss": 0.4641, "step": 3830 }, { "epoch": 4.689106487148103, "grad_norm": 0.8638109319487003, "learning_rate": 3.88822610936901e-06, "loss": 0.5066, "step": 3831 }, { "epoch": 4.690330477356181, "grad_norm": 2.4153918414921502, "learning_rate": 3.8876930719344984e-06, "loss": 0.4951, "step": 3832 }, { "epoch": 4.691554467564259, "grad_norm": 1.8146619878499939, "learning_rate": 3.887159943305931e-06, "loss": 0.4519, "step": 3833 }, { "epoch": 4.692778457772338, "grad_norm": 1.7342221894203216, "learning_rate": 3.886626723518343e-06, "loss": 0.651, "step": 3834 }, { "epoch": 4.6940024479804165, "grad_norm": 1.8249056818933473, "learning_rate": 3.886093412606775e-06, "loss": 1.0045, "step": 3835 }, { "epoch": 4.695226438188494, "grad_norm": 1.6911751232892338, "learning_rate": 3.885560010606275e-06, "loss": 0.4295, "step": 3836 }, { "epoch": 4.696450428396573, "grad_norm": 1.3772771561976032, "learning_rate": 3.885026517551895e-06, "loss": 1.6392, "step": 3837 }, { "epoch": 4.6976744186046515, "grad_norm": 1.563885197151202, "learning_rate": 3.884492933478696e-06, "loss": 0.3959, "step": 3838 }, { "epoch": 4.698898408812729, "grad_norm": 1.124725008174981, "learning_rate": 3.8839592584217434e-06, "loss": 0.6806, "step": 3839 }, { "epoch": 4.700122399020808, "grad_norm": 1.0399086892336007, "learning_rate": 3.8834254924161055e-06, "loss": 0.3974, "step": 3840 }, { "epoch": 4.7013463892288865, "grad_norm": 1.4774658943628078, "learning_rate": 3.882891635496862e-06, "loss": 0.43, "step": 3841 }, { "epoch": 4.702570379436964, "grad_norm": 1.510977323945999, "learning_rate": 3.882357687699095e-06, "loss": 0.6509, "step": 3842 }, { "epoch": 4.703794369645043, "grad_norm": 1.6431801473221397, "learning_rate": 3.881823649057894e-06, "loss": 0.6774, "step": 3843 }, { "epoch": 4.7050183598531214, "grad_norm": 0.787639944540383, "learning_rate": 3.881289519608354e-06, "loss": 0.415, "step": 3844 }, { "epoch": 4.706242350061199, "grad_norm": 1.664043546421963, "learning_rate": 3.880755299385574e-06, "loss": 1.1097, "step": 3845 }, { "epoch": 4.707466340269278, "grad_norm": 1.6891650363607682, "learning_rate": 3.880220988424664e-06, "loss": 0.4893, "step": 3846 }, { "epoch": 4.708690330477356, "grad_norm": 1.0682095676909167, "learning_rate": 3.879686586760736e-06, "loss": 0.6461, "step": 3847 }, { "epoch": 4.709914320685434, "grad_norm": 0.7082124683399725, "learning_rate": 3.87915209442891e-06, "loss": 0.4647, "step": 3848 }, { "epoch": 4.711138310893513, "grad_norm": 2.159525233397554, "learning_rate": 3.878617511464307e-06, "loss": 0.584, "step": 3849 }, { "epoch": 4.712362301101591, "grad_norm": 1.0553194564275217, "learning_rate": 3.878082837902061e-06, "loss": 0.7922, "step": 3850 }, { "epoch": 4.713586291309669, "grad_norm": 1.4233519059901172, "learning_rate": 3.8775480737773085e-06, "loss": 0.9968, "step": 3851 }, { "epoch": 4.714810281517748, "grad_norm": 1.3447389532978522, "learning_rate": 3.8770132191251916e-06, "loss": 0.7662, "step": 3852 }, { "epoch": 4.716034271725826, "grad_norm": 0.8573140192708791, "learning_rate": 3.876478273980859e-06, "loss": 0.6441, "step": 3853 }, { "epoch": 4.717258261933905, "grad_norm": 1.273784006199861, "learning_rate": 3.875943238379466e-06, "loss": 0.5343, "step": 3854 }, { "epoch": 4.718482252141983, "grad_norm": 1.4975323379275303, "learning_rate": 3.875408112356172e-06, "loss": 0.5526, "step": 3855 }, { "epoch": 4.719706242350061, "grad_norm": 1.4157188676822474, "learning_rate": 3.874872895946145e-06, "loss": 0.5627, "step": 3856 }, { "epoch": 4.720930232558139, "grad_norm": 1.4542202741856565, "learning_rate": 3.874337589184556e-06, "loss": 0.4886, "step": 3857 }, { "epoch": 4.722154222766218, "grad_norm": 2.2446102187818333, "learning_rate": 3.873802192106583e-06, "loss": 0.3806, "step": 3858 }, { "epoch": 4.723378212974296, "grad_norm": 0.859256029405725, "learning_rate": 3.873266704747413e-06, "loss": 0.7187, "step": 3859 }, { "epoch": 4.724602203182375, "grad_norm": 1.4745387803578551, "learning_rate": 3.872731127142234e-06, "loss": 0.6352, "step": 3860 }, { "epoch": 4.725826193390453, "grad_norm": 1.1267208366398835, "learning_rate": 3.872195459326242e-06, "loss": 0.5202, "step": 3861 }, { "epoch": 4.727050183598531, "grad_norm": 1.689831495441326, "learning_rate": 3.87165970133464e-06, "loss": 1.0777, "step": 3862 }, { "epoch": 4.72827417380661, "grad_norm": 1.2529103860082147, "learning_rate": 3.871123853202637e-06, "loss": 0.5174, "step": 3863 }, { "epoch": 4.729498164014688, "grad_norm": 0.6951985591814531, "learning_rate": 3.870587914965445e-06, "loss": 0.4068, "step": 3864 }, { "epoch": 4.730722154222766, "grad_norm": 0.9806651631674292, "learning_rate": 3.870051886658284e-06, "loss": 0.6633, "step": 3865 }, { "epoch": 4.731946144430845, "grad_norm": 1.7815474792867037, "learning_rate": 3.869515768316382e-06, "loss": 0.521, "step": 3866 }, { "epoch": 4.7331701346389226, "grad_norm": 0.9571825414754315, "learning_rate": 3.868979559974968e-06, "loss": 0.7796, "step": 3867 }, { "epoch": 4.734394124847001, "grad_norm": 1.3292045799176089, "learning_rate": 3.868443261669281e-06, "loss": 0.5652, "step": 3868 }, { "epoch": 4.73561811505508, "grad_norm": 0.7773568351021357, "learning_rate": 3.867906873434564e-06, "loss": 0.3879, "step": 3869 }, { "epoch": 4.7368421052631575, "grad_norm": 1.1509411372076819, "learning_rate": 3.8673703953060685e-06, "loss": 0.9985, "step": 3870 }, { "epoch": 4.738066095471236, "grad_norm": 1.3387846767700868, "learning_rate": 3.866833827319047e-06, "loss": 1.129, "step": 3871 }, { "epoch": 4.739290085679315, "grad_norm": 1.9065348346585043, "learning_rate": 3.866297169508762e-06, "loss": 0.65, "step": 3872 }, { "epoch": 4.740514075887393, "grad_norm": 1.4099242743255993, "learning_rate": 3.86576042191048e-06, "loss": 1.0476, "step": 3873 }, { "epoch": 4.741738066095471, "grad_norm": 1.1056517501451506, "learning_rate": 3.865223584559475e-06, "loss": 0.7662, "step": 3874 }, { "epoch": 4.74296205630355, "grad_norm": 1.3926385015866696, "learning_rate": 3.864686657491026e-06, "loss": 1.0619, "step": 3875 }, { "epoch": 4.7441860465116275, "grad_norm": 1.7544256475286069, "learning_rate": 3.8641496407404165e-06, "loss": 0.5399, "step": 3876 }, { "epoch": 4.745410036719706, "grad_norm": 1.1220500680339782, "learning_rate": 3.86361253434294e-06, "loss": 0.5673, "step": 3877 }, { "epoch": 4.746634026927785, "grad_norm": 1.163672484121933, "learning_rate": 3.86307533833389e-06, "loss": 0.6172, "step": 3878 }, { "epoch": 4.747858017135863, "grad_norm": 2.046618918161762, "learning_rate": 3.8625380527485716e-06, "loss": 0.4076, "step": 3879 }, { "epoch": 4.749082007343941, "grad_norm": 1.4977113031072897, "learning_rate": 3.862000677622291e-06, "loss": 0.5951, "step": 3880 }, { "epoch": 4.75030599755202, "grad_norm": 1.409934341838558, "learning_rate": 3.861463212990364e-06, "loss": 0.9983, "step": 3881 }, { "epoch": 4.751529987760098, "grad_norm": 1.1885927654938855, "learning_rate": 3.860925658888109e-06, "loss": 0.5043, "step": 3882 }, { "epoch": 4.752753977968176, "grad_norm": 1.274012822662529, "learning_rate": 3.860388015350855e-06, "loss": 0.5896, "step": 3883 }, { "epoch": 4.753977968176255, "grad_norm": 1.4847252849772758, "learning_rate": 3.859850282413933e-06, "loss": 1.1764, "step": 3884 }, { "epoch": 4.755201958384333, "grad_norm": 1.4089731217501331, "learning_rate": 3.859312460112679e-06, "loss": 0.5713, "step": 3885 }, { "epoch": 4.756425948592411, "grad_norm": 1.2257118490370726, "learning_rate": 3.858774548482438e-06, "loss": 0.5375, "step": 3886 }, { "epoch": 4.75764993880049, "grad_norm": 1.8533957962376275, "learning_rate": 3.858236547558559e-06, "loss": 0.4805, "step": 3887 }, { "epoch": 4.758873929008568, "grad_norm": 1.0257624226569373, "learning_rate": 3.857698457376399e-06, "loss": 0.5822, "step": 3888 }, { "epoch": 4.760097919216646, "grad_norm": 1.4579541814209134, "learning_rate": 3.857160277971317e-06, "loss": 1.0364, "step": 3889 }, { "epoch": 4.7613219094247246, "grad_norm": 1.5655760217243508, "learning_rate": 3.856622009378682e-06, "loss": 0.621, "step": 3890 }, { "epoch": 4.762545899632803, "grad_norm": 1.1390620555330881, "learning_rate": 3.856083651633866e-06, "loss": 0.8991, "step": 3891 }, { "epoch": 4.763769889840881, "grad_norm": 1.3243562133149331, "learning_rate": 3.855545204772248e-06, "loss": 0.615, "step": 3892 }, { "epoch": 4.7649938800489595, "grad_norm": 1.651045691244505, "learning_rate": 3.855006668829214e-06, "loss": 1.0869, "step": 3893 }, { "epoch": 4.766217870257038, "grad_norm": 1.2139920842516123, "learning_rate": 3.854468043840153e-06, "loss": 1.0115, "step": 3894 }, { "epoch": 4.767441860465116, "grad_norm": 1.1053117184750336, "learning_rate": 3.853929329840462e-06, "loss": 0.905, "step": 3895 }, { "epoch": 4.7686658506731945, "grad_norm": 1.0097624509665304, "learning_rate": 3.853390526865542e-06, "loss": 0.4634, "step": 3896 }, { "epoch": 4.769889840881273, "grad_norm": 2.083770270540241, "learning_rate": 3.852851634950804e-06, "loss": 0.5699, "step": 3897 }, { "epoch": 4.771113831089352, "grad_norm": 1.695993974895643, "learning_rate": 3.85231265413166e-06, "loss": 0.5282, "step": 3898 }, { "epoch": 4.7723378212974294, "grad_norm": 2.060608151187121, "learning_rate": 3.85177358444353e-06, "loss": 0.489, "step": 3899 }, { "epoch": 4.773561811505508, "grad_norm": 1.3797204391420028, "learning_rate": 3.851234425921841e-06, "loss": 0.561, "step": 3900 }, { "epoch": 4.774785801713586, "grad_norm": 1.2718566663716822, "learning_rate": 3.850695178602022e-06, "loss": 0.5047, "step": 3901 }, { "epoch": 4.776009791921664, "grad_norm": 1.8280578885409873, "learning_rate": 3.850155842519513e-06, "loss": 0.5683, "step": 3902 }, { "epoch": 4.777233782129743, "grad_norm": 1.1374211459357546, "learning_rate": 3.8496164177097544e-06, "loss": 0.5291, "step": 3903 }, { "epoch": 4.778457772337822, "grad_norm": 1.6697498705353433, "learning_rate": 3.849076904208198e-06, "loss": 1.385, "step": 3904 }, { "epoch": 4.779681762545899, "grad_norm": 2.0128462612031424, "learning_rate": 3.848537302050297e-06, "loss": 1.2275, "step": 3905 }, { "epoch": 4.780905752753978, "grad_norm": 1.8416172323402595, "learning_rate": 3.847997611271512e-06, "loss": 0.3913, "step": 3906 }, { "epoch": 4.782129742962057, "grad_norm": 1.3428582062374113, "learning_rate": 3.8474578319073095e-06, "loss": 0.965, "step": 3907 }, { "epoch": 4.783353733170134, "grad_norm": 1.2368517079472954, "learning_rate": 3.8469179639931635e-06, "loss": 1.365, "step": 3908 }, { "epoch": 4.784577723378213, "grad_norm": 1.2912882351347057, "learning_rate": 3.8463780075645505e-06, "loss": 0.5758, "step": 3909 }, { "epoch": 4.785801713586292, "grad_norm": 1.3707087802713092, "learning_rate": 3.8458379626569545e-06, "loss": 0.5549, "step": 3910 }, { "epoch": 4.787025703794369, "grad_norm": 1.2094011308054873, "learning_rate": 3.845297829305865e-06, "loss": 0.5537, "step": 3911 }, { "epoch": 4.788249694002448, "grad_norm": 1.7849196983501878, "learning_rate": 3.844757607546778e-06, "loss": 0.5693, "step": 3912 }, { "epoch": 4.7894736842105265, "grad_norm": 1.3578476158328867, "learning_rate": 3.844217297415196e-06, "loss": 0.5222, "step": 3913 }, { "epoch": 4.790697674418604, "grad_norm": 1.238331824857719, "learning_rate": 3.8436768989466245e-06, "loss": 1.3085, "step": 3914 }, { "epoch": 4.791921664626683, "grad_norm": 0.9282874643212091, "learning_rate": 3.843136412176577e-06, "loss": 0.4332, "step": 3915 }, { "epoch": 4.7931456548347615, "grad_norm": 0.9637192524883046, "learning_rate": 3.842595837140572e-06, "loss": 0.6014, "step": 3916 }, { "epoch": 4.79436964504284, "grad_norm": 2.2048676397048443, "learning_rate": 3.842055173874135e-06, "loss": 0.5097, "step": 3917 }, { "epoch": 4.795593635250918, "grad_norm": 1.4240705654445107, "learning_rate": 3.841514422412797e-06, "loss": 0.6704, "step": 3918 }, { "epoch": 4.7968176254589965, "grad_norm": 1.200234181878473, "learning_rate": 3.840973582792091e-06, "loss": 1.1215, "step": 3919 }, { "epoch": 4.798041615667074, "grad_norm": 0.7157132541242024, "learning_rate": 3.840432655047563e-06, "loss": 0.4425, "step": 3920 }, { "epoch": 4.799265605875153, "grad_norm": 1.444210785435878, "learning_rate": 3.839891639214757e-06, "loss": 0.9151, "step": 3921 }, { "epoch": 4.8004895960832314, "grad_norm": 0.891277643864051, "learning_rate": 3.8393505353292305e-06, "loss": 0.5597, "step": 3922 }, { "epoch": 4.80171358629131, "grad_norm": 2.0075120839738356, "learning_rate": 3.838809343426538e-06, "loss": 0.5559, "step": 3923 }, { "epoch": 4.802937576499388, "grad_norm": 2.21084791066355, "learning_rate": 3.83826806354225e-06, "loss": 0.6374, "step": 3924 }, { "epoch": 4.804161566707466, "grad_norm": 1.0919377509216344, "learning_rate": 3.837726695711933e-06, "loss": 0.8245, "step": 3925 }, { "epoch": 4.805385556915545, "grad_norm": 2.019896165377279, "learning_rate": 3.837185239971166e-06, "loss": 0.5066, "step": 3926 }, { "epoch": 4.806609547123623, "grad_norm": 2.3524757487844647, "learning_rate": 3.836643696355531e-06, "loss": 0.4446, "step": 3927 }, { "epoch": 4.807833537331701, "grad_norm": 1.3327706632966636, "learning_rate": 3.836102064900617e-06, "loss": 0.6236, "step": 3928 }, { "epoch": 4.80905752753978, "grad_norm": 1.2290677295235823, "learning_rate": 3.835560345642017e-06, "loss": 0.4761, "step": 3929 }, { "epoch": 4.810281517747858, "grad_norm": 1.2227336863670288, "learning_rate": 3.8350185386153294e-06, "loss": 0.9609, "step": 3930 }, { "epoch": 4.811505507955936, "grad_norm": 1.0804357760148826, "learning_rate": 3.8344766438561635e-06, "loss": 0.5642, "step": 3931 }, { "epoch": 4.812729498164015, "grad_norm": 1.272022159091841, "learning_rate": 3.833934661400126e-06, "loss": 1.5721, "step": 3932 }, { "epoch": 4.813953488372093, "grad_norm": 1.2554478297707332, "learning_rate": 3.833392591282838e-06, "loss": 0.5466, "step": 3933 }, { "epoch": 4.815177478580171, "grad_norm": 1.7788244770375572, "learning_rate": 3.832850433539921e-06, "loss": 0.9374, "step": 3934 }, { "epoch": 4.81640146878825, "grad_norm": 1.563687518903311, "learning_rate": 3.832308188207003e-06, "loss": 0.5469, "step": 3935 }, { "epoch": 4.817625458996328, "grad_norm": 1.2382492503926494, "learning_rate": 3.831765855319718e-06, "loss": 1.6768, "step": 3936 }, { "epoch": 4.818849449204406, "grad_norm": 2.2947354291407422, "learning_rate": 3.831223434913708e-06, "loss": 0.5784, "step": 3937 }, { "epoch": 4.820073439412485, "grad_norm": 1.4848037287341793, "learning_rate": 3.830680927024617e-06, "loss": 0.7542, "step": 3938 }, { "epoch": 4.821297429620563, "grad_norm": 1.2166180920349023, "learning_rate": 3.830138331688098e-06, "loss": 0.7277, "step": 3939 }, { "epoch": 4.822521419828641, "grad_norm": 1.1517721209729608, "learning_rate": 3.829595648939807e-06, "loss": 0.5598, "step": 3940 }, { "epoch": 4.82374541003672, "grad_norm": 0.594744248321611, "learning_rate": 3.829052878815408e-06, "loss": 0.2327, "step": 3941 }, { "epoch": 4.8249694002447985, "grad_norm": 0.8457872539264581, "learning_rate": 3.82851002135057e-06, "loss": 0.5691, "step": 3942 }, { "epoch": 4.826193390452876, "grad_norm": 0.7932153241107824, "learning_rate": 3.827967076580966e-06, "loss": 0.5602, "step": 3943 }, { "epoch": 4.827417380660955, "grad_norm": 1.2034220702369667, "learning_rate": 3.8274240445422785e-06, "loss": 0.6367, "step": 3944 }, { "epoch": 4.828641370869033, "grad_norm": 1.0789086954967142, "learning_rate": 3.826880925270192e-06, "loss": 0.5368, "step": 3945 }, { "epoch": 4.829865361077111, "grad_norm": 1.2751888726080984, "learning_rate": 3.8263377188004e-06, "loss": 0.5013, "step": 3946 }, { "epoch": 4.83108935128519, "grad_norm": 1.4040666526194419, "learning_rate": 3.825794425168598e-06, "loss": 1.9201, "step": 3947 }, { "epoch": 4.832313341493268, "grad_norm": 1.066444287680524, "learning_rate": 3.82525104441049e-06, "loss": 0.7052, "step": 3948 }, { "epoch": 4.833537331701346, "grad_norm": 1.2886722134821684, "learning_rate": 3.824707576561786e-06, "loss": 0.9453, "step": 3949 }, { "epoch": 4.834761321909425, "grad_norm": 1.0087804705338113, "learning_rate": 3.8241640216582e-06, "loss": 0.7667, "step": 3950 }, { "epoch": 4.835985312117503, "grad_norm": 1.1598181968857744, "learning_rate": 3.823620379735451e-06, "loss": 0.6595, "step": 3951 }, { "epoch": 4.837209302325581, "grad_norm": 1.3650876503101348, "learning_rate": 3.823076650829267e-06, "loss": 0.9479, "step": 3952 }, { "epoch": 4.83843329253366, "grad_norm": 1.5045762371737315, "learning_rate": 3.8225328349753785e-06, "loss": 0.9674, "step": 3953 }, { "epoch": 4.839657282741738, "grad_norm": 1.6010445721452395, "learning_rate": 3.821988932209525e-06, "loss": 0.5839, "step": 3954 }, { "epoch": 4.840881272949816, "grad_norm": 1.6965744893780248, "learning_rate": 3.821444942567449e-06, "loss": 0.493, "step": 3955 }, { "epoch": 4.842105263157895, "grad_norm": 1.3706340136681063, "learning_rate": 3.820900866084898e-06, "loss": 0.588, "step": 3956 }, { "epoch": 4.843329253365973, "grad_norm": 1.4051680512730989, "learning_rate": 3.820356702797628e-06, "loss": 0.3767, "step": 3957 }, { "epoch": 4.844553243574051, "grad_norm": 1.1698736613723644, "learning_rate": 3.819812452741398e-06, "loss": 1.2562, "step": 3958 }, { "epoch": 4.84577723378213, "grad_norm": 2.523830506395649, "learning_rate": 3.819268115951977e-06, "loss": 0.4927, "step": 3959 }, { "epoch": 4.847001223990208, "grad_norm": 1.065849043989781, "learning_rate": 3.818723692465134e-06, "loss": 0.9747, "step": 3960 }, { "epoch": 4.848225214198287, "grad_norm": 0.8407679129191928, "learning_rate": 3.818179182316648e-06, "loss": 0.6293, "step": 3961 }, { "epoch": 4.849449204406365, "grad_norm": 0.8311965534277637, "learning_rate": 3.817634585542301e-06, "loss": 0.6023, "step": 3962 }, { "epoch": 4.850673194614443, "grad_norm": 1.949280026175921, "learning_rate": 3.817089902177884e-06, "loss": 0.4228, "step": 3963 }, { "epoch": 4.851897184822521, "grad_norm": 1.1997873346518195, "learning_rate": 3.816545132259189e-06, "loss": 0.5893, "step": 3964 }, { "epoch": 4.8531211750306, "grad_norm": 1.4690326187518903, "learning_rate": 3.816000275822018e-06, "loss": 1.353, "step": 3965 }, { "epoch": 4.854345165238678, "grad_norm": 1.9143392940606023, "learning_rate": 3.815455332902177e-06, "loss": 0.5679, "step": 3966 }, { "epoch": 4.855569155446757, "grad_norm": 1.2756682061673152, "learning_rate": 3.814910303535476e-06, "loss": 0.5561, "step": 3967 }, { "epoch": 4.8567931456548346, "grad_norm": 2.139529447484724, "learning_rate": 3.8143651877577344e-06, "loss": 0.8778, "step": 3968 }, { "epoch": 4.858017135862913, "grad_norm": 1.21957366728038, "learning_rate": 3.8138199856047733e-06, "loss": 0.5235, "step": 3969 }, { "epoch": 4.859241126070992, "grad_norm": 1.888503528425335, "learning_rate": 3.8132746971124224e-06, "loss": 0.9168, "step": 3970 }, { "epoch": 4.8604651162790695, "grad_norm": 1.1720225828947206, "learning_rate": 3.812729322316516e-06, "loss": 0.4943, "step": 3971 }, { "epoch": 4.861689106487148, "grad_norm": 0.8362714506051125, "learning_rate": 3.8121838612528943e-06, "loss": 0.5627, "step": 3972 }, { "epoch": 4.862913096695227, "grad_norm": 1.243627080494953, "learning_rate": 3.8116383139574022e-06, "loss": 0.5049, "step": 3973 }, { "epoch": 4.8641370869033045, "grad_norm": 1.0634416646694753, "learning_rate": 3.8110926804658914e-06, "loss": 0.5563, "step": 3974 }, { "epoch": 4.865361077111383, "grad_norm": 1.8463541036261395, "learning_rate": 3.81054696081422e-06, "loss": 1.3296, "step": 3975 }, { "epoch": 4.866585067319462, "grad_norm": 1.2174062596067512, "learning_rate": 3.8100011550382486e-06, "loss": 1.3327, "step": 3976 }, { "epoch": 4.8678090575275395, "grad_norm": 1.0765050370263196, "learning_rate": 3.809455263173847e-06, "loss": 1.1051, "step": 3977 }, { "epoch": 4.869033047735618, "grad_norm": 1.936619438168795, "learning_rate": 3.8089092852568888e-06, "loss": 0.5894, "step": 3978 }, { "epoch": 4.870257037943697, "grad_norm": 1.4085095069385354, "learning_rate": 3.8083632213232537e-06, "loss": 1.343, "step": 3979 }, { "epoch": 4.871481028151774, "grad_norm": 1.7687083418227434, "learning_rate": 3.8078170714088268e-06, "loss": 0.5451, "step": 3980 }, { "epoch": 4.872705018359853, "grad_norm": 1.7002933979636856, "learning_rate": 3.8072708355495005e-06, "loss": 0.9938, "step": 3981 }, { "epoch": 4.873929008567932, "grad_norm": 1.2714081925551262, "learning_rate": 3.8067245137811683e-06, "loss": 1.6539, "step": 3982 }, { "epoch": 4.875152998776009, "grad_norm": 1.1090548968310594, "learning_rate": 3.806178106139735e-06, "loss": 1.5929, "step": 3983 }, { "epoch": 4.876376988984088, "grad_norm": 1.0995048240295118, "learning_rate": 3.8056316126611083e-06, "loss": 0.8757, "step": 3984 }, { "epoch": 4.877600979192167, "grad_norm": 1.3576843538700336, "learning_rate": 3.8050850333812013e-06, "loss": 0.6942, "step": 3985 }, { "epoch": 4.878824969400245, "grad_norm": 1.5748885387767562, "learning_rate": 3.8045383683359316e-06, "loss": 0.8926, "step": 3986 }, { "epoch": 4.880048959608323, "grad_norm": 1.1435382392174436, "learning_rate": 3.8039916175612274e-06, "loss": 0.5699, "step": 3987 }, { "epoch": 4.881272949816402, "grad_norm": 0.8300735069031168, "learning_rate": 3.8034447810930165e-06, "loss": 0.6866, "step": 3988 }, { "epoch": 4.882496940024479, "grad_norm": 1.0290272084837009, "learning_rate": 3.802897858967235e-06, "loss": 0.7894, "step": 3989 }, { "epoch": 4.883720930232558, "grad_norm": 1.3477011852888152, "learning_rate": 3.802350851219826e-06, "loss": 0.5085, "step": 3990 }, { "epoch": 4.8849449204406366, "grad_norm": 1.3971588342359387, "learning_rate": 3.801803757886736e-06, "loss": 0.5261, "step": 3991 }, { "epoch": 4.886168910648715, "grad_norm": 0.9807759244835675, "learning_rate": 3.801256579003918e-06, "loss": 0.8496, "step": 3992 }, { "epoch": 4.887392900856793, "grad_norm": 1.686594222226998, "learning_rate": 3.800709314607331e-06, "loss": 1.0612, "step": 3993 }, { "epoch": 4.8886168910648715, "grad_norm": 1.140738474425307, "learning_rate": 3.8001619647329386e-06, "loss": 0.4646, "step": 3994 }, { "epoch": 4.88984088127295, "grad_norm": 2.792143659579334, "learning_rate": 3.7996145294167102e-06, "loss": 0.4332, "step": 3995 }, { "epoch": 4.891064871481028, "grad_norm": 1.9653949758033364, "learning_rate": 3.799067008694623e-06, "loss": 0.7882, "step": 3996 }, { "epoch": 4.8922888616891065, "grad_norm": 1.0705272891708633, "learning_rate": 3.7985194026026563e-06, "loss": 0.8424, "step": 3997 }, { "epoch": 4.893512851897185, "grad_norm": 1.1114167521064, "learning_rate": 3.7979717111767972e-06, "loss": 0.5117, "step": 3998 }, { "epoch": 4.894736842105263, "grad_norm": 0.7056838581467734, "learning_rate": 3.797423934453038e-06, "loss": 0.4291, "step": 3999 }, { "epoch": 4.8959608323133414, "grad_norm": 1.1894898200360482, "learning_rate": 3.796876072467377e-06, "loss": 0.5197, "step": 4000 }, { "epoch": 4.89718482252142, "grad_norm": 2.376990553253133, "learning_rate": 3.7963281252558177e-06, "loss": 0.6507, "step": 4001 }, { "epoch": 4.898408812729498, "grad_norm": 1.6233673972836817, "learning_rate": 3.7957800928543686e-06, "loss": 0.5149, "step": 4002 }, { "epoch": 4.899632802937576, "grad_norm": 1.4097753879579535, "learning_rate": 3.7952319752990434e-06, "loss": 0.4807, "step": 4003 }, { "epoch": 4.900856793145655, "grad_norm": 1.8043290723005447, "learning_rate": 3.7946837726258645e-06, "loss": 0.6356, "step": 4004 }, { "epoch": 4.902080783353734, "grad_norm": 1.0988203550755848, "learning_rate": 3.7941354848708576e-06, "loss": 0.6945, "step": 4005 }, { "epoch": 4.903304773561811, "grad_norm": 1.3327654516188738, "learning_rate": 3.793587112070052e-06, "loss": 1.0456, "step": 4006 }, { "epoch": 4.90452876376989, "grad_norm": 2.3967307475244017, "learning_rate": 3.7930386542594866e-06, "loss": 0.3715, "step": 4007 }, { "epoch": 4.905752753977968, "grad_norm": 1.8542217101040182, "learning_rate": 3.792490111475203e-06, "loss": 0.3868, "step": 4008 }, { "epoch": 4.906976744186046, "grad_norm": 1.1357612820032272, "learning_rate": 3.791941483753251e-06, "loss": 0.6446, "step": 4009 }, { "epoch": 4.908200734394125, "grad_norm": 2.384612290663425, "learning_rate": 3.7913927711296815e-06, "loss": 0.796, "step": 4010 }, { "epoch": 4.909424724602204, "grad_norm": 0.8134532207235741, "learning_rate": 3.790843973640557e-06, "loss": 0.5313, "step": 4011 }, { "epoch": 4.910648714810281, "grad_norm": 1.3590158678039739, "learning_rate": 3.7902950913219403e-06, "loss": 0.8889, "step": 4012 }, { "epoch": 4.91187270501836, "grad_norm": 2.049161187157738, "learning_rate": 3.789746124209903e-06, "loss": 1.0015, "step": 4013 }, { "epoch": 4.9130966952264385, "grad_norm": 1.8265592203614736, "learning_rate": 3.7891970723405213e-06, "loss": 0.5882, "step": 4014 }, { "epoch": 4.914320685434516, "grad_norm": 1.8338230930272101, "learning_rate": 3.7886479357498757e-06, "loss": 1.2457, "step": 4015 }, { "epoch": 4.915544675642595, "grad_norm": 0.9513454296722619, "learning_rate": 3.7880987144740543e-06, "loss": 0.7097, "step": 4016 }, { "epoch": 4.9167686658506735, "grad_norm": 0.9947612670788142, "learning_rate": 3.7875494085491503e-06, "loss": 0.7452, "step": 4017 }, { "epoch": 4.917992656058751, "grad_norm": 1.6880385776084819, "learning_rate": 3.787000018011261e-06, "loss": 0.596, "step": 4018 }, { "epoch": 4.91921664626683, "grad_norm": 1.5856400415295477, "learning_rate": 3.7864505428964904e-06, "loss": 0.6951, "step": 4019 }, { "epoch": 4.9204406364749085, "grad_norm": 1.6654853768227722, "learning_rate": 3.7859009832409486e-06, "loss": 0.623, "step": 4020 }, { "epoch": 4.921664626682986, "grad_norm": 1.8437118208148826, "learning_rate": 3.7853513390807506e-06, "loss": 0.4773, "step": 4021 }, { "epoch": 4.922888616891065, "grad_norm": 1.5850634575645626, "learning_rate": 3.7848016104520173e-06, "loss": 1.0029, "step": 4022 }, { "epoch": 4.9241126070991434, "grad_norm": 0.996284996296396, "learning_rate": 3.7842517973908734e-06, "loss": 0.4957, "step": 4023 }, { "epoch": 4.925336597307221, "grad_norm": 1.5021426947182797, "learning_rate": 3.783701899933452e-06, "loss": 1.0912, "step": 4024 }, { "epoch": 4.9265605875153, "grad_norm": 1.6380761658031842, "learning_rate": 3.7831519181158897e-06, "loss": 0.3297, "step": 4025 }, { "epoch": 4.927784577723378, "grad_norm": 1.5925334332290995, "learning_rate": 3.7826018519743298e-06, "loss": 0.4794, "step": 4026 }, { "epoch": 4.929008567931456, "grad_norm": 1.676305339551745, "learning_rate": 3.782051701544919e-06, "loss": 1.0357, "step": 4027 }, { "epoch": 4.930232558139535, "grad_norm": 1.0165943256155465, "learning_rate": 3.781501466863814e-06, "loss": 0.3951, "step": 4028 }, { "epoch": 4.931456548347613, "grad_norm": 1.7310555124349274, "learning_rate": 3.7809511479671712e-06, "loss": 0.3738, "step": 4029 }, { "epoch": 4.932680538555692, "grad_norm": 0.8951008335490757, "learning_rate": 3.7804007448911574e-06, "loss": 0.4617, "step": 4030 }, { "epoch": 4.93390452876377, "grad_norm": 1.889247762461582, "learning_rate": 3.779850257671943e-06, "loss": 0.831, "step": 4031 }, { "epoch": 4.935128518971848, "grad_norm": 2.3886581051678903, "learning_rate": 3.779299686345702e-06, "loss": 0.5053, "step": 4032 }, { "epoch": 4.936352509179926, "grad_norm": 0.9000012224203673, "learning_rate": 3.7787490309486186e-06, "loss": 0.4659, "step": 4033 }, { "epoch": 4.937576499388005, "grad_norm": 1.1868639898759117, "learning_rate": 3.778198291516879e-06, "loss": 0.9462, "step": 4034 }, { "epoch": 4.938800489596083, "grad_norm": 1.317697712709867, "learning_rate": 3.7776474680866746e-06, "loss": 1.0201, "step": 4035 }, { "epoch": 4.940024479804162, "grad_norm": 2.2409038538264348, "learning_rate": 3.7770965606942045e-06, "loss": 0.4071, "step": 4036 }, { "epoch": 4.94124847001224, "grad_norm": 1.420891445048487, "learning_rate": 3.7765455693756724e-06, "loss": 0.5668, "step": 4037 }, { "epoch": 4.942472460220318, "grad_norm": 1.198143578888788, "learning_rate": 3.7759944941672865e-06, "loss": 1.589, "step": 4038 }, { "epoch": 4.943696450428397, "grad_norm": 1.2616164036216413, "learning_rate": 3.775443335105262e-06, "loss": 0.5881, "step": 4039 }, { "epoch": 4.944920440636475, "grad_norm": 1.5909513313853134, "learning_rate": 3.7748920922258196e-06, "loss": 1.1935, "step": 4040 }, { "epoch": 4.946144430844553, "grad_norm": 0.987817198283476, "learning_rate": 3.7743407655651844e-06, "loss": 0.5431, "step": 4041 }, { "epoch": 4.947368421052632, "grad_norm": 1.3793093774357976, "learning_rate": 3.773789355159587e-06, "loss": 0.4081, "step": 4042 }, { "epoch": 4.94859241126071, "grad_norm": 1.7640209703743965, "learning_rate": 3.7732378610452656e-06, "loss": 0.4732, "step": 4043 }, { "epoch": 4.949816401468788, "grad_norm": 2.034890475366439, "learning_rate": 3.772686283258461e-06, "loss": 0.3994, "step": 4044 }, { "epoch": 4.951040391676867, "grad_norm": 1.0971562469949732, "learning_rate": 3.7721346218354205e-06, "loss": 0.8991, "step": 4045 }, { "epoch": 4.9522643818849446, "grad_norm": 1.229640776022709, "learning_rate": 3.7715828768123984e-06, "loss": 0.5297, "step": 4046 }, { "epoch": 4.953488372093023, "grad_norm": 1.767418976903907, "learning_rate": 3.771031048225653e-06, "loss": 0.7204, "step": 4047 }, { "epoch": 4.954712362301102, "grad_norm": 1.4262609396107837, "learning_rate": 3.770479136111448e-06, "loss": 0.6621, "step": 4048 }, { "epoch": 4.95593635250918, "grad_norm": 2.0018096356307784, "learning_rate": 3.7699271405060534e-06, "loss": 0.5521, "step": 4049 }, { "epoch": 4.957160342717258, "grad_norm": 1.8239283581930785, "learning_rate": 3.7693750614457447e-06, "loss": 0.5785, "step": 4050 }, { "epoch": 4.958384332925337, "grad_norm": 1.0005215730288117, "learning_rate": 3.768822898966802e-06, "loss": 0.757, "step": 4051 }, { "epoch": 4.9596083231334145, "grad_norm": 0.993603048927035, "learning_rate": 3.7682706531055112e-06, "loss": 0.4965, "step": 4052 }, { "epoch": 4.960832313341493, "grad_norm": 1.1707175819987075, "learning_rate": 3.7677183238981637e-06, "loss": 0.4237, "step": 4053 }, { "epoch": 4.962056303549572, "grad_norm": 1.195842628875113, "learning_rate": 3.767165911381057e-06, "loss": 0.9499, "step": 4054 }, { "epoch": 4.96328029375765, "grad_norm": 1.516191082446433, "learning_rate": 3.766613415590494e-06, "loss": 0.8149, "step": 4055 }, { "epoch": 4.964504283965728, "grad_norm": 1.4816079796836763, "learning_rate": 3.7660608365627823e-06, "loss": 0.5304, "step": 4056 }, { "epoch": 4.965728274173807, "grad_norm": 0.8321710354434264, "learning_rate": 3.765508174334235e-06, "loss": 0.7032, "step": 4057 }, { "epoch": 4.966952264381885, "grad_norm": 0.949275790272603, "learning_rate": 3.764955428941171e-06, "loss": 0.5452, "step": 4058 }, { "epoch": 4.968176254589963, "grad_norm": 0.7330766998440373, "learning_rate": 3.7644026004199153e-06, "loss": 0.4548, "step": 4059 }, { "epoch": 4.969400244798042, "grad_norm": 1.4541933343538465, "learning_rate": 3.7638496888067977e-06, "loss": 1.5921, "step": 4060 }, { "epoch": 4.97062423500612, "grad_norm": 1.25206838591234, "learning_rate": 3.763296694138152e-06, "loss": 0.9662, "step": 4061 }, { "epoch": 4.971848225214198, "grad_norm": 1.4262675088837862, "learning_rate": 3.762743616450321e-06, "loss": 0.9633, "step": 4062 }, { "epoch": 4.973072215422277, "grad_norm": 0.7185696974661676, "learning_rate": 3.76219045577965e-06, "loss": 0.5432, "step": 4063 }, { "epoch": 4.974296205630355, "grad_norm": 0.9837601685795316, "learning_rate": 3.761637212162491e-06, "loss": 0.6323, "step": 4064 }, { "epoch": 4.975520195838433, "grad_norm": 1.0909951142774328, "learning_rate": 3.761083885635201e-06, "loss": 0.9187, "step": 4065 }, { "epoch": 4.976744186046512, "grad_norm": 0.853095821941795, "learning_rate": 3.760530476234142e-06, "loss": 0.5877, "step": 4066 }, { "epoch": 4.97796817625459, "grad_norm": 1.2293647125410725, "learning_rate": 3.759976983995683e-06, "loss": 0.7243, "step": 4067 }, { "epoch": 4.979192166462668, "grad_norm": 1.4547359687504466, "learning_rate": 3.7594234089561966e-06, "loss": 0.5647, "step": 4068 }, { "epoch": 4.9804161566707466, "grad_norm": 1.5669472031218648, "learning_rate": 3.758869751152062e-06, "loss": 1.1577, "step": 4069 }, { "epoch": 4.981640146878825, "grad_norm": 0.955509897662767, "learning_rate": 3.758316010619664e-06, "loss": 0.4811, "step": 4070 }, { "epoch": 4.982864137086903, "grad_norm": 1.0703371762364289, "learning_rate": 3.757762187395391e-06, "loss": 0.6559, "step": 4071 }, { "epoch": 4.9840881272949815, "grad_norm": 1.1282513397309308, "learning_rate": 3.7572082815156407e-06, "loss": 0.6514, "step": 4072 }, { "epoch": 4.98531211750306, "grad_norm": 1.2305169584881515, "learning_rate": 3.7566542930168103e-06, "loss": 0.5804, "step": 4073 }, { "epoch": 4.986536107711139, "grad_norm": 1.1097513332192956, "learning_rate": 3.7561002219353092e-06, "loss": 0.6243, "step": 4074 }, { "epoch": 4.9877600979192165, "grad_norm": 1.3417008359444649, "learning_rate": 3.755546068307547e-06, "loss": 0.4562, "step": 4075 }, { "epoch": 4.988984088127295, "grad_norm": 1.6089805662552246, "learning_rate": 3.754991832169942e-06, "loss": 0.4963, "step": 4076 }, { "epoch": 4.990208078335373, "grad_norm": 1.3370186700907496, "learning_rate": 3.754437513558914e-06, "loss": 0.54, "step": 4077 }, { "epoch": 4.9914320685434515, "grad_norm": 1.1741392124636132, "learning_rate": 3.753883112510894e-06, "loss": 0.6535, "step": 4078 }, { "epoch": 4.99265605875153, "grad_norm": 1.0396880202292036, "learning_rate": 3.753328629062313e-06, "loss": 0.4897, "step": 4079 }, { "epoch": 4.993880048959609, "grad_norm": 1.2740686738281883, "learning_rate": 3.75277406324961e-06, "loss": 1.4296, "step": 4080 }, { "epoch": 4.995104039167686, "grad_norm": 1.065512737727095, "learning_rate": 3.7522194151092305e-06, "loss": 0.8055, "step": 4081 }, { "epoch": 4.996328029375765, "grad_norm": 2.0131264303419667, "learning_rate": 3.751664684677621e-06, "loss": 0.8663, "step": 4082 }, { "epoch": 4.997552019583844, "grad_norm": 1.9110052523783156, "learning_rate": 3.7511098719912385e-06, "loss": 0.3287, "step": 4083 }, { "epoch": 4.998776009791921, "grad_norm": 1.4078873348147225, "learning_rate": 3.7505549770865437e-06, "loss": 0.5291, "step": 4084 }, { "epoch": 5.0, "grad_norm": 2.2426745584441603, "learning_rate": 3.7500000000000005e-06, "loss": 0.5178, "step": 4085 }, { "epoch": 5.001223990208079, "grad_norm": 0.879063109954564, "learning_rate": 3.7494449407680806e-06, "loss": 0.5334, "step": 4086 }, { "epoch": 5.002447980416156, "grad_norm": 1.2929674394615043, "learning_rate": 3.748889799427261e-06, "loss": 1.1984, "step": 4087 }, { "epoch": 5.003671970624235, "grad_norm": 0.7850779611533373, "learning_rate": 3.748334576014023e-06, "loss": 0.4127, "step": 4088 }, { "epoch": 5.004895960832314, "grad_norm": 1.37050649233921, "learning_rate": 3.7477792705648536e-06, "loss": 0.4912, "step": 4089 }, { "epoch": 5.006119951040391, "grad_norm": 1.3461191383221272, "learning_rate": 3.747223883116246e-06, "loss": 0.4866, "step": 4090 }, { "epoch": 5.00734394124847, "grad_norm": 1.439936680629212, "learning_rate": 3.746668413704698e-06, "loss": 0.5285, "step": 4091 }, { "epoch": 5.0085679314565485, "grad_norm": 1.2370517721579402, "learning_rate": 3.7461128623667126e-06, "loss": 1.1801, "step": 4092 }, { "epoch": 5.009791921664626, "grad_norm": 0.9368490407702835, "learning_rate": 3.7455572291388e-06, "loss": 0.7959, "step": 4093 }, { "epoch": 5.011015911872705, "grad_norm": 2.1715713162912396, "learning_rate": 3.7450015140574724e-06, "loss": 0.3894, "step": 4094 }, { "epoch": 5.0122399020807835, "grad_norm": 1.1576250475191492, "learning_rate": 3.7444457171592496e-06, "loss": 1.4326, "step": 4095 }, { "epoch": 5.013463892288861, "grad_norm": 1.785604686964227, "learning_rate": 3.7438898384806586e-06, "loss": 0.9932, "step": 4096 }, { "epoch": 5.01468788249694, "grad_norm": 1.8536028120383583, "learning_rate": 3.7433338780582272e-06, "loss": 0.6609, "step": 4097 }, { "epoch": 5.0159118727050185, "grad_norm": 1.0548000609628008, "learning_rate": 3.7427778359284927e-06, "loss": 0.9104, "step": 4098 }, { "epoch": 5.017135862913097, "grad_norm": 1.0968286145308728, "learning_rate": 3.742221712127995e-06, "loss": 1.3288, "step": 4099 }, { "epoch": 5.018359853121175, "grad_norm": 0.7925730418467919, "learning_rate": 3.7416655066932818e-06, "loss": 0.6108, "step": 4100 }, { "epoch": 5.0195838433292534, "grad_norm": 0.8513827638255641, "learning_rate": 3.7411092196609035e-06, "loss": 0.6235, "step": 4101 }, { "epoch": 5.020807833537332, "grad_norm": 1.0823604876770776, "learning_rate": 3.7405528510674183e-06, "loss": 0.3498, "step": 4102 }, { "epoch": 5.02203182374541, "grad_norm": 1.3650857619678705, "learning_rate": 3.7399964009493874e-06, "loss": 0.4554, "step": 4103 }, { "epoch": 5.023255813953488, "grad_norm": 1.629904401306993, "learning_rate": 3.7394398693433798e-06, "loss": 0.4298, "step": 4104 }, { "epoch": 5.024479804161567, "grad_norm": 1.0138147831073214, "learning_rate": 3.738883256285969e-06, "loss": 0.7369, "step": 4105 }, { "epoch": 5.025703794369645, "grad_norm": 1.3697913775691386, "learning_rate": 3.7383265618137322e-06, "loss": 0.5079, "step": 4106 }, { "epoch": 5.026927784577723, "grad_norm": 1.1908476921551605, "learning_rate": 3.7377697859632543e-06, "loss": 0.9422, "step": 4107 }, { "epoch": 5.028151774785802, "grad_norm": 1.0493736635141853, "learning_rate": 3.7372129287711247e-06, "loss": 1.1541, "step": 4108 }, { "epoch": 5.02937576499388, "grad_norm": 1.7049165168200764, "learning_rate": 3.7366559902739368e-06, "loss": 0.9547, "step": 4109 }, { "epoch": 5.030599755201958, "grad_norm": 1.3537115414480767, "learning_rate": 3.7360989705082917e-06, "loss": 1.4369, "step": 4110 }, { "epoch": 5.031823745410037, "grad_norm": 0.6896965918692632, "learning_rate": 3.735541869510794e-06, "loss": 0.4776, "step": 4111 }, { "epoch": 5.033047735618115, "grad_norm": 0.9936970077908196, "learning_rate": 3.7349846873180544e-06, "loss": 0.3902, "step": 4112 }, { "epoch": 5.034271725826193, "grad_norm": 0.9055234887371305, "learning_rate": 3.7344274239666893e-06, "loss": 0.623, "step": 4113 }, { "epoch": 5.035495716034272, "grad_norm": 0.9707554280565841, "learning_rate": 3.7338700794933204e-06, "loss": 0.5349, "step": 4114 }, { "epoch": 5.03671970624235, "grad_norm": 1.6356064623884405, "learning_rate": 3.7333126539345725e-06, "loss": 1.0474, "step": 4115 }, { "epoch": 5.037943696450428, "grad_norm": 1.9145207426846733, "learning_rate": 3.7327551473270798e-06, "loss": 0.7126, "step": 4116 }, { "epoch": 5.039167686658507, "grad_norm": 1.3860322026856795, "learning_rate": 3.7321975597074777e-06, "loss": 0.5156, "step": 4117 }, { "epoch": 5.0403916768665855, "grad_norm": 1.3506605803598457, "learning_rate": 3.7316398911124097e-06, "loss": 0.4198, "step": 4118 }, { "epoch": 5.041615667074663, "grad_norm": 1.7236380888291132, "learning_rate": 3.7310821415785236e-06, "loss": 0.4653, "step": 4119 }, { "epoch": 5.042839657282742, "grad_norm": 1.4444066149651857, "learning_rate": 3.7305243111424725e-06, "loss": 1.4217, "step": 4120 }, { "epoch": 5.0440636474908205, "grad_norm": 1.9240292627805193, "learning_rate": 3.7299663998409153e-06, "loss": 0.4382, "step": 4121 }, { "epoch": 5.045287637698898, "grad_norm": 1.1667152231757323, "learning_rate": 3.7294084077105153e-06, "loss": 0.3866, "step": 4122 }, { "epoch": 5.046511627906977, "grad_norm": 1.137813148904686, "learning_rate": 3.7288503347879423e-06, "loss": 0.5611, "step": 4123 }, { "epoch": 5.0477356181150554, "grad_norm": 1.3882770754921245, "learning_rate": 3.7282921811098706e-06, "loss": 0.4719, "step": 4124 }, { "epoch": 5.048959608323133, "grad_norm": 1.1697571681947387, "learning_rate": 3.72773394671298e-06, "loss": 0.5961, "step": 4125 }, { "epoch": 5.050183598531212, "grad_norm": 1.356974391428421, "learning_rate": 3.7271756316339553e-06, "loss": 0.6464, "step": 4126 }, { "epoch": 5.05140758873929, "grad_norm": 1.4844829647516093, "learning_rate": 3.7266172359094864e-06, "loss": 1.0278, "step": 4127 }, { "epoch": 5.052631578947368, "grad_norm": 2.0216594710684337, "learning_rate": 3.726058759576271e-06, "loss": 0.5017, "step": 4128 }, { "epoch": 5.053855569155447, "grad_norm": 1.0691567618640823, "learning_rate": 3.7255002026710075e-06, "loss": 0.7224, "step": 4129 }, { "epoch": 5.055079559363525, "grad_norm": 2.701848169656287, "learning_rate": 3.7249415652304044e-06, "loss": 0.8036, "step": 4130 }, { "epoch": 5.056303549571603, "grad_norm": 1.131681151158039, "learning_rate": 3.7243828472911727e-06, "loss": 1.0979, "step": 4131 }, { "epoch": 5.057527539779682, "grad_norm": 1.4617588409866498, "learning_rate": 3.7238240488900277e-06, "loss": 1.9094, "step": 4132 }, { "epoch": 5.05875152998776, "grad_norm": 1.2104573817211184, "learning_rate": 3.7232651700636936e-06, "loss": 1.1141, "step": 4133 }, { "epoch": 5.059975520195838, "grad_norm": 1.495578480473626, "learning_rate": 3.722706210848897e-06, "loss": 0.4783, "step": 4134 }, { "epoch": 5.061199510403917, "grad_norm": 1.263405021383673, "learning_rate": 3.7221471712823707e-06, "loss": 1.2573, "step": 4135 }, { "epoch": 5.062423500611995, "grad_norm": 1.8459275100418115, "learning_rate": 3.7215880514008525e-06, "loss": 1.234, "step": 4136 }, { "epoch": 5.063647490820073, "grad_norm": 1.3739367706553296, "learning_rate": 3.7210288512410865e-06, "loss": 0.4993, "step": 4137 }, { "epoch": 5.064871481028152, "grad_norm": 1.1289127087500335, "learning_rate": 3.7204695708398198e-06, "loss": 0.9894, "step": 4138 }, { "epoch": 5.06609547123623, "grad_norm": 2.113310230898397, "learning_rate": 3.7199102102338075e-06, "loss": 0.4552, "step": 4139 }, { "epoch": 5.067319461444308, "grad_norm": 1.1999709750403422, "learning_rate": 3.7193507694598084e-06, "loss": 0.5266, "step": 4140 }, { "epoch": 5.068543451652387, "grad_norm": 1.1937346031405207, "learning_rate": 3.7187912485545867e-06, "loss": 0.318, "step": 4141 }, { "epoch": 5.069767441860465, "grad_norm": 2.0589744143496036, "learning_rate": 3.718231647554912e-06, "loss": 0.4725, "step": 4142 }, { "epoch": 5.070991432068544, "grad_norm": 1.799782457581629, "learning_rate": 3.7176719664975604e-06, "loss": 0.6213, "step": 4143 }, { "epoch": 5.072215422276622, "grad_norm": 1.3056023423748775, "learning_rate": 3.7171122054193093e-06, "loss": 1.0377, "step": 4144 }, { "epoch": 5.0734394124847, "grad_norm": 0.8847452985521517, "learning_rate": 3.7165523643569466e-06, "loss": 0.7149, "step": 4145 }, { "epoch": 5.074663402692779, "grad_norm": 1.0880395155092983, "learning_rate": 3.7159924433472635e-06, "loss": 1.5937, "step": 4146 }, { "epoch": 5.0758873929008566, "grad_norm": 0.8108060198005063, "learning_rate": 3.7154324424270526e-06, "loss": 0.5517, "step": 4147 }, { "epoch": 5.077111383108935, "grad_norm": 1.3199667032488156, "learning_rate": 3.7148723616331184e-06, "loss": 0.5656, "step": 4148 }, { "epoch": 5.078335373317014, "grad_norm": 0.7923682615015675, "learning_rate": 3.7143122010022657e-06, "loss": 0.6491, "step": 4149 }, { "epoch": 5.0795593635250915, "grad_norm": 1.1042858860232514, "learning_rate": 3.713751960571307e-06, "loss": 1.2991, "step": 4150 }, { "epoch": 5.08078335373317, "grad_norm": 1.6440592353873784, "learning_rate": 3.713191640377058e-06, "loss": 0.5887, "step": 4151 }, { "epoch": 5.082007343941249, "grad_norm": 1.1036556716091726, "learning_rate": 3.7126312404563425e-06, "loss": 0.4735, "step": 4152 }, { "epoch": 5.0832313341493265, "grad_norm": 0.9546618077624657, "learning_rate": 3.7120707608459862e-06, "loss": 0.5163, "step": 4153 }, { "epoch": 5.084455324357405, "grad_norm": 1.4016475397757442, "learning_rate": 3.711510201582823e-06, "loss": 0.9069, "step": 4154 }, { "epoch": 5.085679314565484, "grad_norm": 1.9474226357838167, "learning_rate": 3.7109495627036907e-06, "loss": 0.5026, "step": 4155 }, { "epoch": 5.0869033047735615, "grad_norm": 1.531924578440938, "learning_rate": 3.7103888442454323e-06, "loss": 0.5163, "step": 4156 }, { "epoch": 5.08812729498164, "grad_norm": 1.2275641519446574, "learning_rate": 3.709828046244896e-06, "loss": 1.0355, "step": 4157 }, { "epoch": 5.089351285189719, "grad_norm": 1.3145639856708806, "learning_rate": 3.7092671687389347e-06, "loss": 0.8858, "step": 4158 }, { "epoch": 5.090575275397796, "grad_norm": 1.720542482863626, "learning_rate": 3.7087062117644086e-06, "loss": 0.4566, "step": 4159 }, { "epoch": 5.091799265605875, "grad_norm": 1.618040203535799, "learning_rate": 3.7081451753581804e-06, "loss": 1.2262, "step": 4160 }, { "epoch": 5.093023255813954, "grad_norm": 1.602327120837178, "learning_rate": 3.70758405955712e-06, "loss": 0.6385, "step": 4161 }, { "epoch": 5.094247246022032, "grad_norm": 1.4034583226792785, "learning_rate": 3.7070228643981014e-06, "loss": 1.6267, "step": 4162 }, { "epoch": 5.09547123623011, "grad_norm": 1.0862418430091398, "learning_rate": 3.706461589918005e-06, "loss": 0.5545, "step": 4163 }, { "epoch": 5.096695226438189, "grad_norm": 1.2184890344342532, "learning_rate": 3.7059002361537157e-06, "loss": 0.9522, "step": 4164 }, { "epoch": 5.097919216646267, "grad_norm": 0.6731944428744475, "learning_rate": 3.705338803142123e-06, "loss": 0.3624, "step": 4165 }, { "epoch": 5.099143206854345, "grad_norm": 0.9512192731237781, "learning_rate": 3.704777290920122e-06, "loss": 0.676, "step": 4166 }, { "epoch": 5.100367197062424, "grad_norm": 2.041530024402415, "learning_rate": 3.7042156995246136e-06, "loss": 0.8525, "step": 4167 }, { "epoch": 5.101591187270502, "grad_norm": 1.5954274062483031, "learning_rate": 3.703654028992503e-06, "loss": 0.8653, "step": 4168 }, { "epoch": 5.10281517747858, "grad_norm": 1.8890975149014362, "learning_rate": 3.7030922793607033e-06, "loss": 0.457, "step": 4169 }, { "epoch": 5.1040391676866586, "grad_norm": 0.9512379699183214, "learning_rate": 3.702530450666128e-06, "loss": 0.7673, "step": 4170 }, { "epoch": 5.105263157894737, "grad_norm": 1.1872806363991375, "learning_rate": 3.7019685429456986e-06, "loss": 0.7139, "step": 4171 }, { "epoch": 5.106487148102815, "grad_norm": 1.1928107148642928, "learning_rate": 3.7014065562363433e-06, "loss": 1.0118, "step": 4172 }, { "epoch": 5.1077111383108935, "grad_norm": 1.56796414804517, "learning_rate": 3.700844490574993e-06, "loss": 1.3419, "step": 4173 }, { "epoch": 5.108935128518972, "grad_norm": 1.8714973853575212, "learning_rate": 3.700282345998584e-06, "loss": 0.9021, "step": 4174 }, { "epoch": 5.11015911872705, "grad_norm": 0.8545078042749233, "learning_rate": 3.699720122544058e-06, "loss": 0.5572, "step": 4175 }, { "epoch": 5.1113831089351285, "grad_norm": 1.2603126544688865, "learning_rate": 3.699157820248364e-06, "loss": 0.4862, "step": 4176 }, { "epoch": 5.112607099143207, "grad_norm": 1.2421682439620025, "learning_rate": 3.698595439148453e-06, "loss": 0.6311, "step": 4177 }, { "epoch": 5.113831089351285, "grad_norm": 1.045275274719113, "learning_rate": 3.6980329792812834e-06, "loss": 1.3553, "step": 4178 }, { "epoch": 5.1150550795593634, "grad_norm": 0.764871336470395, "learning_rate": 3.697470440683817e-06, "loss": 0.4603, "step": 4179 }, { "epoch": 5.116279069767442, "grad_norm": 1.3572763086511919, "learning_rate": 3.6969078233930235e-06, "loss": 0.324, "step": 4180 }, { "epoch": 5.11750305997552, "grad_norm": 1.8504411927839788, "learning_rate": 3.696345127445874e-06, "loss": 0.5875, "step": 4181 }, { "epoch": 5.118727050183598, "grad_norm": 0.8957635388106361, "learning_rate": 3.695782352879348e-06, "loss": 0.6044, "step": 4182 }, { "epoch": 5.119951040391677, "grad_norm": 1.5018970518248727, "learning_rate": 3.695219499730428e-06, "loss": 1.2084, "step": 4183 }, { "epoch": 5.121175030599755, "grad_norm": 1.7669966369496808, "learning_rate": 3.694656568036104e-06, "loss": 0.5161, "step": 4184 }, { "epoch": 5.122399020807833, "grad_norm": 1.9853933456709052, "learning_rate": 3.694093557833369e-06, "loss": 0.4, "step": 4185 }, { "epoch": 5.123623011015912, "grad_norm": 1.1833097307579932, "learning_rate": 3.6935304691592216e-06, "loss": 1.1906, "step": 4186 }, { "epoch": 5.124847001223991, "grad_norm": 2.104227960818154, "learning_rate": 3.692967302050667e-06, "loss": 0.4266, "step": 4187 }, { "epoch": 5.126070991432068, "grad_norm": 1.2816751454893844, "learning_rate": 3.6924040565447132e-06, "loss": 1.2014, "step": 4188 }, { "epoch": 5.127294981640147, "grad_norm": 1.59963374214599, "learning_rate": 3.6918407326783755e-06, "loss": 0.3379, "step": 4189 }, { "epoch": 5.128518971848226, "grad_norm": 0.8847622790177622, "learning_rate": 3.6912773304886735e-06, "loss": 0.4638, "step": 4190 }, { "epoch": 5.129742962056303, "grad_norm": 1.117305048085905, "learning_rate": 3.690713850012631e-06, "loss": 0.5604, "step": 4191 }, { "epoch": 5.130966952264382, "grad_norm": 0.9019646797406172, "learning_rate": 3.6901502912872785e-06, "loss": 0.4997, "step": 4192 }, { "epoch": 5.1321909424724605, "grad_norm": 1.9949322959785634, "learning_rate": 3.6895866543496513e-06, "loss": 0.7736, "step": 4193 }, { "epoch": 5.133414932680538, "grad_norm": 2.392832768104058, "learning_rate": 3.689022939236788e-06, "loss": 0.6324, "step": 4194 }, { "epoch": 5.134638922888617, "grad_norm": 2.3504975349535093, "learning_rate": 3.6884591459857366e-06, "loss": 0.7423, "step": 4195 }, { "epoch": 5.1358629130966955, "grad_norm": 1.424858648740238, "learning_rate": 3.6878952746335454e-06, "loss": 1.2217, "step": 4196 }, { "epoch": 5.137086903304773, "grad_norm": 1.0722400936032446, "learning_rate": 3.6873313252172697e-06, "loss": 0.5489, "step": 4197 }, { "epoch": 5.138310893512852, "grad_norm": 1.1277997100219441, "learning_rate": 3.686767297773972e-06, "loss": 0.8362, "step": 4198 }, { "epoch": 5.1395348837209305, "grad_norm": 1.6709588311305699, "learning_rate": 3.6862031923407166e-06, "loss": 0.8448, "step": 4199 }, { "epoch": 5.140758873929008, "grad_norm": 1.9040741481170438, "learning_rate": 3.6856390089545748e-06, "loss": 0.4416, "step": 4200 }, { "epoch": 5.141982864137087, "grad_norm": 1.902631704634107, "learning_rate": 3.685074747652622e-06, "loss": 0.8998, "step": 4201 }, { "epoch": 5.1432068543451654, "grad_norm": 1.3327495043594968, "learning_rate": 3.684510408471942e-06, "loss": 0.5968, "step": 4202 }, { "epoch": 5.144430844553243, "grad_norm": 2.2138819261694858, "learning_rate": 3.6839459914496174e-06, "loss": 0.4656, "step": 4203 }, { "epoch": 5.145654834761322, "grad_norm": 1.3383867155755353, "learning_rate": 3.683381496622742e-06, "loss": 0.9334, "step": 4204 }, { "epoch": 5.1468788249694, "grad_norm": 1.8122387182328838, "learning_rate": 3.6828169240284117e-06, "loss": 0.4576, "step": 4205 }, { "epoch": 5.148102815177479, "grad_norm": 1.1587037698078475, "learning_rate": 3.682252273703728e-06, "loss": 0.7065, "step": 4206 }, { "epoch": 5.149326805385557, "grad_norm": 1.2836000263235592, "learning_rate": 3.6816875456857975e-06, "loss": 1.5624, "step": 4207 }, { "epoch": 5.150550795593635, "grad_norm": 0.868628635143518, "learning_rate": 3.681122740011733e-06, "loss": 0.6546, "step": 4208 }, { "epoch": 5.151774785801714, "grad_norm": 2.274036871394697, "learning_rate": 3.6805578567186494e-06, "loss": 0.7296, "step": 4209 }, { "epoch": 5.152998776009792, "grad_norm": 1.4012855292464763, "learning_rate": 3.6799928958436707e-06, "loss": 0.5361, "step": 4210 }, { "epoch": 5.15422276621787, "grad_norm": 0.6474501364057795, "learning_rate": 3.679427857423924e-06, "loss": 0.4437, "step": 4211 }, { "epoch": 5.155446756425949, "grad_norm": 1.652837255842577, "learning_rate": 3.67886274149654e-06, "loss": 0.5843, "step": 4212 }, { "epoch": 5.156670746634027, "grad_norm": 1.1994377999079722, "learning_rate": 3.6782975480986573e-06, "loss": 0.4336, "step": 4213 }, { "epoch": 5.157894736842105, "grad_norm": 1.9037169410052552, "learning_rate": 3.6777322772674185e-06, "loss": 0.3775, "step": 4214 }, { "epoch": 5.159118727050184, "grad_norm": 1.8120169682393892, "learning_rate": 3.6771669290399697e-06, "loss": 0.9274, "step": 4215 }, { "epoch": 5.160342717258262, "grad_norm": 1.2996121844962045, "learning_rate": 3.676601503453465e-06, "loss": 0.7781, "step": 4216 }, { "epoch": 5.16156670746634, "grad_norm": 2.2817268015283916, "learning_rate": 3.6760360005450612e-06, "loss": 0.4193, "step": 4217 }, { "epoch": 5.162790697674419, "grad_norm": 0.9105964926527785, "learning_rate": 3.675470420351921e-06, "loss": 0.5987, "step": 4218 }, { "epoch": 5.164014687882497, "grad_norm": 0.9348753065922383, "learning_rate": 3.6749047629112133e-06, "loss": 0.646, "step": 4219 }, { "epoch": 5.165238678090575, "grad_norm": 1.1132848755892766, "learning_rate": 3.674339028260109e-06, "loss": 1.0234, "step": 4220 }, { "epoch": 5.166462668298654, "grad_norm": 0.9630361764337361, "learning_rate": 3.673773216435788e-06, "loss": 1.058, "step": 4221 }, { "epoch": 5.167686658506732, "grad_norm": 1.9118891677009413, "learning_rate": 3.673207327475433e-06, "loss": 0.5549, "step": 4222 }, { "epoch": 5.16891064871481, "grad_norm": 1.7222204406699753, "learning_rate": 3.6726413614162314e-06, "loss": 0.8573, "step": 4223 }, { "epoch": 5.170134638922889, "grad_norm": 1.9478035758937027, "learning_rate": 3.6720753182953773e-06, "loss": 0.5718, "step": 4224 }, { "epoch": 5.1713586291309666, "grad_norm": 1.3528751275392972, "learning_rate": 3.6715091981500684e-06, "loss": 1.4802, "step": 4225 }, { "epoch": 5.172582619339045, "grad_norm": 1.7613979802160893, "learning_rate": 3.6709430010175085e-06, "loss": 0.4615, "step": 4226 }, { "epoch": 5.173806609547124, "grad_norm": 1.1047008845395279, "learning_rate": 3.6703767269349043e-06, "loss": 0.7411, "step": 4227 }, { "epoch": 5.1750305997552015, "grad_norm": 1.2672379226662693, "learning_rate": 3.6698103759394722e-06, "loss": 0.9371, "step": 4228 }, { "epoch": 5.17625458996328, "grad_norm": 0.9645125998270491, "learning_rate": 3.6692439480684273e-06, "loss": 0.7038, "step": 4229 }, { "epoch": 5.177478580171359, "grad_norm": 1.395446972639668, "learning_rate": 3.6686774433589956e-06, "loss": 0.9791, "step": 4230 }, { "epoch": 5.178702570379437, "grad_norm": 1.4963112653346184, "learning_rate": 3.668110861848405e-06, "loss": 0.4498, "step": 4231 }, { "epoch": 5.179926560587515, "grad_norm": 1.3687443724500308, "learning_rate": 3.6675442035738894e-06, "loss": 0.5696, "step": 4232 }, { "epoch": 5.181150550795594, "grad_norm": 1.0323074139480561, "learning_rate": 3.666977468572687e-06, "loss": 0.5446, "step": 4233 }, { "epoch": 5.182374541003672, "grad_norm": 1.416248451570722, "learning_rate": 3.666410656882042e-06, "loss": 0.7775, "step": 4234 }, { "epoch": 5.18359853121175, "grad_norm": 0.9383900752641997, "learning_rate": 3.665843768539202e-06, "loss": 0.526, "step": 4235 }, { "epoch": 5.184822521419829, "grad_norm": 0.8362777184511306, "learning_rate": 3.665276803581422e-06, "loss": 0.5577, "step": 4236 }, { "epoch": 5.186046511627907, "grad_norm": 1.8664563613432958, "learning_rate": 3.6647097620459616e-06, "loss": 0.5218, "step": 4237 }, { "epoch": 5.187270501835985, "grad_norm": 0.9510034781743811, "learning_rate": 3.6641426439700823e-06, "loss": 0.5608, "step": 4238 }, { "epoch": 5.188494492044064, "grad_norm": 1.1113972467260913, "learning_rate": 3.6635754493910545e-06, "loss": 0.7994, "step": 4239 }, { "epoch": 5.189718482252142, "grad_norm": 1.0703422473750874, "learning_rate": 3.6630081783461524e-06, "loss": 0.4643, "step": 4240 }, { "epoch": 5.19094247246022, "grad_norm": 2.236755590282444, "learning_rate": 3.6624408308726544e-06, "loss": 0.5429, "step": 4241 }, { "epoch": 5.192166462668299, "grad_norm": 2.758617405891889, "learning_rate": 3.6618734070078438e-06, "loss": 0.4853, "step": 4242 }, { "epoch": 5.193390452876377, "grad_norm": 0.872513061045636, "learning_rate": 3.6613059067890116e-06, "loss": 0.5527, "step": 4243 }, { "epoch": 5.194614443084455, "grad_norm": 0.8795404940423862, "learning_rate": 3.6607383302534495e-06, "loss": 0.5245, "step": 4244 }, { "epoch": 5.195838433292534, "grad_norm": 1.141350221848387, "learning_rate": 3.660170677438458e-06, "loss": 0.6369, "step": 4245 }, { "epoch": 5.197062423500612, "grad_norm": 0.8425276661498231, "learning_rate": 3.6596029483813416e-06, "loss": 0.6145, "step": 4246 }, { "epoch": 5.19828641370869, "grad_norm": 1.8482680757987349, "learning_rate": 3.6590351431194072e-06, "loss": 0.5768, "step": 4247 }, { "epoch": 5.1995104039167686, "grad_norm": 0.7710801091320874, "learning_rate": 3.658467261689971e-06, "loss": 0.4572, "step": 4248 }, { "epoch": 5.200734394124847, "grad_norm": 1.6895540235547482, "learning_rate": 3.6578993041303505e-06, "loss": 0.6684, "step": 4249 }, { "epoch": 5.201958384332926, "grad_norm": 1.0556606630181802, "learning_rate": 3.6573312704778713e-06, "loss": 0.892, "step": 4250 }, { "epoch": 5.2031823745410035, "grad_norm": 2.345209956522388, "learning_rate": 3.6567631607698607e-06, "loss": 1.2143, "step": 4251 }, { "epoch": 5.204406364749082, "grad_norm": 1.2435432924020606, "learning_rate": 3.6561949750436556e-06, "loss": 0.4869, "step": 4252 }, { "epoch": 5.205630354957161, "grad_norm": 2.203321861233657, "learning_rate": 3.655626713336591e-06, "loss": 0.8624, "step": 4253 }, { "epoch": 5.2068543451652385, "grad_norm": 2.151907305915236, "learning_rate": 3.6550583756860144e-06, "loss": 0.456, "step": 4254 }, { "epoch": 5.208078335373317, "grad_norm": 1.4895870012533947, "learning_rate": 3.6544899621292735e-06, "loss": 0.5795, "step": 4255 }, { "epoch": 5.209302325581396, "grad_norm": 1.0134358082535861, "learning_rate": 3.653921472703722e-06, "loss": 0.4887, "step": 4256 }, { "epoch": 5.2105263157894735, "grad_norm": 2.0343314285458987, "learning_rate": 3.65335290744672e-06, "loss": 0.647, "step": 4257 }, { "epoch": 5.211750305997552, "grad_norm": 1.8262710790604717, "learning_rate": 3.6527842663956303e-06, "loss": 0.5659, "step": 4258 }, { "epoch": 5.212974296205631, "grad_norm": 0.9453135998552632, "learning_rate": 3.652215549587823e-06, "loss": 0.8411, "step": 4259 }, { "epoch": 5.214198286413708, "grad_norm": 1.8262681854862421, "learning_rate": 3.6516467570606704e-06, "loss": 1.1156, "step": 4260 }, { "epoch": 5.215422276621787, "grad_norm": 2.106782380211626, "learning_rate": 3.6510778888515547e-06, "loss": 0.4956, "step": 4261 }, { "epoch": 5.216646266829866, "grad_norm": 1.6541828641626706, "learning_rate": 3.650508944997856e-06, "loss": 0.5122, "step": 4262 }, { "epoch": 5.217870257037943, "grad_norm": 1.5530828907858478, "learning_rate": 3.649939925536965e-06, "loss": 0.5839, "step": 4263 }, { "epoch": 5.219094247246022, "grad_norm": 1.2533263589874262, "learning_rate": 3.649370830506276e-06, "loss": 1.6655, "step": 4264 }, { "epoch": 5.220318237454101, "grad_norm": 1.3848748863640334, "learning_rate": 3.6488016599431867e-06, "loss": 0.717, "step": 4265 }, { "epoch": 5.221542227662178, "grad_norm": 1.5625080246024787, "learning_rate": 3.6482324138851022e-06, "loss": 0.7896, "step": 4266 }, { "epoch": 5.222766217870257, "grad_norm": 0.8230990358383853, "learning_rate": 3.6476630923694302e-06, "loss": 0.7295, "step": 4267 }, { "epoch": 5.223990208078336, "grad_norm": 1.522168515146073, "learning_rate": 3.6470936954335833e-06, "loss": 0.9592, "step": 4268 }, { "epoch": 5.225214198286413, "grad_norm": 1.341722139791942, "learning_rate": 3.6465242231149835e-06, "loss": 0.7645, "step": 4269 }, { "epoch": 5.226438188494492, "grad_norm": 1.255611653051533, "learning_rate": 3.645954675451051e-06, "loss": 0.5132, "step": 4270 }, { "epoch": 5.2276621787025706, "grad_norm": 1.9476168524834452, "learning_rate": 3.6453850524792168e-06, "loss": 0.7537, "step": 4271 }, { "epoch": 5.228886168910648, "grad_norm": 1.9694017941230038, "learning_rate": 3.6448153542369137e-06, "loss": 0.3814, "step": 4272 }, { "epoch": 5.230110159118727, "grad_norm": 1.0873351913771432, "learning_rate": 3.644245580761579e-06, "loss": 0.5667, "step": 4273 }, { "epoch": 5.2313341493268055, "grad_norm": 1.4755616377331195, "learning_rate": 3.6436757320906575e-06, "loss": 0.6715, "step": 4274 }, { "epoch": 5.232558139534884, "grad_norm": 0.8574292158653031, "learning_rate": 3.6431058082615966e-06, "loss": 0.5548, "step": 4275 }, { "epoch": 5.233782129742962, "grad_norm": 1.4267907787767884, "learning_rate": 3.642535809311851e-06, "loss": 1.5856, "step": 4276 }, { "epoch": 5.2350061199510405, "grad_norm": 1.6404010650653775, "learning_rate": 3.641965735278876e-06, "loss": 1.162, "step": 4277 }, { "epoch": 5.236230110159119, "grad_norm": 1.3438574165722783, "learning_rate": 3.6413955862001393e-06, "loss": 0.9572, "step": 4278 }, { "epoch": 5.237454100367197, "grad_norm": 1.3896228728937945, "learning_rate": 3.6408253621131046e-06, "loss": 0.5608, "step": 4279 }, { "epoch": 5.2386780905752754, "grad_norm": 1.2877063442489123, "learning_rate": 3.6402550630552467e-06, "loss": 0.7576, "step": 4280 }, { "epoch": 5.239902080783354, "grad_norm": 0.8593877803908888, "learning_rate": 3.639684689064045e-06, "loss": 0.6595, "step": 4281 }, { "epoch": 5.241126070991432, "grad_norm": 0.9903624087764138, "learning_rate": 3.6391142401769795e-06, "loss": 0.7714, "step": 4282 }, { "epoch": 5.24235006119951, "grad_norm": 1.017372684148912, "learning_rate": 3.6385437164315397e-06, "loss": 0.6592, "step": 4283 }, { "epoch": 5.243574051407589, "grad_norm": 1.4417815363737445, "learning_rate": 3.637973117865219e-06, "loss": 0.5447, "step": 4284 }, { "epoch": 5.244798041615667, "grad_norm": 1.5823713893278137, "learning_rate": 3.6374024445155132e-06, "loss": 0.7489, "step": 4285 }, { "epoch": 5.246022031823745, "grad_norm": 1.4653821487486918, "learning_rate": 3.6368316964199253e-06, "loss": 1.0406, "step": 4286 }, { "epoch": 5.247246022031824, "grad_norm": 0.7469820993382222, "learning_rate": 3.6362608736159646e-06, "loss": 0.3605, "step": 4287 }, { "epoch": 5.248470012239902, "grad_norm": 1.1421261119741495, "learning_rate": 3.63568997614114e-06, "loss": 0.5748, "step": 4288 }, { "epoch": 5.24969400244798, "grad_norm": 1.2515724347401114, "learning_rate": 3.6351190040329725e-06, "loss": 0.6293, "step": 4289 }, { "epoch": 5.250917992656059, "grad_norm": 1.741300229539501, "learning_rate": 3.634547957328982e-06, "loss": 1.3733, "step": 4290 }, { "epoch": 5.252141982864137, "grad_norm": 1.9757614712840033, "learning_rate": 3.633976836066696e-06, "loss": 1.3193, "step": 4291 }, { "epoch": 5.253365973072215, "grad_norm": 1.927187384614435, "learning_rate": 3.6334056402836467e-06, "loss": 0.6998, "step": 4292 }, { "epoch": 5.254589963280294, "grad_norm": 1.0603840141672665, "learning_rate": 3.632834370017371e-06, "loss": 0.7028, "step": 4293 }, { "epoch": 5.2558139534883725, "grad_norm": 1.9318601824678152, "learning_rate": 3.6322630253054104e-06, "loss": 1.0189, "step": 4294 }, { "epoch": 5.25703794369645, "grad_norm": 1.1057170345620964, "learning_rate": 3.631691606185312e-06, "loss": 0.5487, "step": 4295 }, { "epoch": 5.258261933904529, "grad_norm": 1.672547013443651, "learning_rate": 3.631120112694627e-06, "loss": 0.7487, "step": 4296 }, { "epoch": 5.2594859241126075, "grad_norm": 1.4242594766587162, "learning_rate": 3.630548544870912e-06, "loss": 1.2183, "step": 4297 }, { "epoch": 5.260709914320685, "grad_norm": 1.6617565266105125, "learning_rate": 3.6299769027517284e-06, "loss": 0.8531, "step": 4298 }, { "epoch": 5.261933904528764, "grad_norm": 1.3713296595350237, "learning_rate": 3.6294051863746417e-06, "loss": 1.2205, "step": 4299 }, { "epoch": 5.2631578947368425, "grad_norm": 1.237369473220036, "learning_rate": 3.628833395777224e-06, "loss": 1.2713, "step": 4300 }, { "epoch": 5.26438188494492, "grad_norm": 0.8752038254204331, "learning_rate": 3.62826153099705e-06, "loss": 0.584, "step": 4301 }, { "epoch": 5.265605875152999, "grad_norm": 1.1589345523404555, "learning_rate": 3.6276895920717026e-06, "loss": 1.3414, "step": 4302 }, { "epoch": 5.2668298653610774, "grad_norm": 0.9981508254046779, "learning_rate": 3.627117579038765e-06, "loss": 0.6779, "step": 4303 }, { "epoch": 5.268053855569155, "grad_norm": 1.0932820397484142, "learning_rate": 3.6265454919358303e-06, "loss": 0.6691, "step": 4304 }, { "epoch": 5.269277845777234, "grad_norm": 1.449495574383733, "learning_rate": 3.625973330800491e-06, "loss": 0.6458, "step": 4305 }, { "epoch": 5.270501835985312, "grad_norm": 1.799563036915311, "learning_rate": 3.6254010956703507e-06, "loss": 0.4373, "step": 4306 }, { "epoch": 5.27172582619339, "grad_norm": 1.8928072195161407, "learning_rate": 3.6248287865830124e-06, "loss": 0.4449, "step": 4307 }, { "epoch": 5.272949816401469, "grad_norm": 1.6948485079208713, "learning_rate": 3.6242564035760863e-06, "loss": 0.395, "step": 4308 }, { "epoch": 5.274173806609547, "grad_norm": 1.0147783171711569, "learning_rate": 3.6236839466871882e-06, "loss": 0.553, "step": 4309 }, { "epoch": 5.275397796817625, "grad_norm": 1.2931642657680622, "learning_rate": 3.6231114159539367e-06, "loss": 0.7733, "step": 4310 }, { "epoch": 5.276621787025704, "grad_norm": 1.6768857578002503, "learning_rate": 3.622538811413958e-06, "loss": 0.5098, "step": 4311 }, { "epoch": 5.277845777233782, "grad_norm": 0.9122733049889713, "learning_rate": 3.6219661331048793e-06, "loss": 0.8604, "step": 4312 }, { "epoch": 5.27906976744186, "grad_norm": 1.8750814128719118, "learning_rate": 3.6213933810643374e-06, "loss": 0.507, "step": 4313 }, { "epoch": 5.280293757649939, "grad_norm": 1.7671061774966188, "learning_rate": 3.6208205553299704e-06, "loss": 0.4821, "step": 4314 }, { "epoch": 5.281517747858017, "grad_norm": 1.1726248470822982, "learning_rate": 3.6202476559394212e-06, "loss": 0.3834, "step": 4315 }, { "epoch": 5.282741738066095, "grad_norm": 1.456481473442461, "learning_rate": 3.6196746829303396e-06, "loss": 0.5542, "step": 4316 }, { "epoch": 5.283965728274174, "grad_norm": 1.8393266753312836, "learning_rate": 3.6191016363403796e-06, "loss": 0.7752, "step": 4317 }, { "epoch": 5.285189718482252, "grad_norm": 0.6288523591449686, "learning_rate": 3.6185285162071994e-06, "loss": 0.3653, "step": 4318 }, { "epoch": 5.286413708690331, "grad_norm": 1.7753837001902582, "learning_rate": 3.6179553225684623e-06, "loss": 0.3622, "step": 4319 }, { "epoch": 5.287637698898409, "grad_norm": 1.371734070150983, "learning_rate": 3.617382055461836e-06, "loss": 0.4289, "step": 4320 }, { "epoch": 5.288861689106487, "grad_norm": 1.3392432312160503, "learning_rate": 3.616808714924994e-06, "loss": 1.1126, "step": 4321 }, { "epoch": 5.290085679314566, "grad_norm": 2.0862647165762853, "learning_rate": 3.6162353009956147e-06, "loss": 0.8235, "step": 4322 }, { "epoch": 5.291309669522644, "grad_norm": 1.49136270423915, "learning_rate": 3.6156618137113796e-06, "loss": 0.6126, "step": 4323 }, { "epoch": 5.292533659730722, "grad_norm": 1.487436385736882, "learning_rate": 3.6150882531099766e-06, "loss": 0.4424, "step": 4324 }, { "epoch": 5.293757649938801, "grad_norm": 0.8562819828701981, "learning_rate": 3.614514619229098e-06, "loss": 0.6231, "step": 4325 }, { "epoch": 5.2949816401468786, "grad_norm": 0.8443544891622166, "learning_rate": 3.6139409121064415e-06, "loss": 0.5357, "step": 4326 }, { "epoch": 5.296205630354957, "grad_norm": 1.434583532710112, "learning_rate": 3.6133671317797077e-06, "loss": 0.6337, "step": 4327 }, { "epoch": 5.297429620563036, "grad_norm": 0.8520367352551844, "learning_rate": 3.612793278286605e-06, "loss": 0.5868, "step": 4328 }, { "epoch": 5.2986536107711135, "grad_norm": 0.8348702934201354, "learning_rate": 3.612219351664843e-06, "loss": 0.5629, "step": 4329 }, { "epoch": 5.299877600979192, "grad_norm": 1.350359829851054, "learning_rate": 3.611645351952139e-06, "loss": 0.8494, "step": 4330 }, { "epoch": 5.301101591187271, "grad_norm": 1.103332734325432, "learning_rate": 3.6110712791862144e-06, "loss": 0.6079, "step": 4331 }, { "epoch": 5.3023255813953485, "grad_norm": 1.6788274533826235, "learning_rate": 3.6104971334047954e-06, "loss": 0.799, "step": 4332 }, { "epoch": 5.303549571603427, "grad_norm": 2.3286078102087324, "learning_rate": 3.6099229146456117e-06, "loss": 0.5926, "step": 4333 }, { "epoch": 5.304773561811506, "grad_norm": 1.7727867096881784, "learning_rate": 3.6093486229464e-06, "loss": 0.4852, "step": 4334 }, { "epoch": 5.3059975520195835, "grad_norm": 1.0179037168070049, "learning_rate": 3.608774258344899e-06, "loss": 0.6, "step": 4335 }, { "epoch": 5.307221542227662, "grad_norm": 0.9358668039847722, "learning_rate": 3.608199820878855e-06, "loss": 0.4602, "step": 4336 }, { "epoch": 5.308445532435741, "grad_norm": 1.5551217655873437, "learning_rate": 3.6076253105860186e-06, "loss": 0.604, "step": 4337 }, { "epoch": 5.309669522643819, "grad_norm": 1.5577739875215777, "learning_rate": 3.607050727504143e-06, "loss": 0.6656, "step": 4338 }, { "epoch": 5.310893512851897, "grad_norm": 1.1324409520924996, "learning_rate": 3.606476071670988e-06, "loss": 0.588, "step": 4339 }, { "epoch": 5.312117503059976, "grad_norm": 2.447626100294901, "learning_rate": 3.6059013431243186e-06, "loss": 0.4034, "step": 4340 }, { "epoch": 5.313341493268053, "grad_norm": 1.6662919707250525, "learning_rate": 3.6053265419019034e-06, "loss": 0.4454, "step": 4341 }, { "epoch": 5.314565483476132, "grad_norm": 1.7237407926263766, "learning_rate": 3.604751668041516e-06, "loss": 0.4165, "step": 4342 }, { "epoch": 5.315789473684211, "grad_norm": 1.1196518810755984, "learning_rate": 3.6041767215809353e-06, "loss": 0.6398, "step": 4343 }, { "epoch": 5.317013463892289, "grad_norm": 1.7148925863675162, "learning_rate": 3.603601702557944e-06, "loss": 0.5829, "step": 4344 }, { "epoch": 5.318237454100367, "grad_norm": 1.5169354463987696, "learning_rate": 3.6030266110103314e-06, "loss": 0.5551, "step": 4345 }, { "epoch": 5.319461444308446, "grad_norm": 1.1835446323880072, "learning_rate": 3.6024514469758896e-06, "loss": 0.7367, "step": 4346 }, { "epoch": 5.320685434516524, "grad_norm": 1.3457978780760955, "learning_rate": 3.6018762104924163e-06, "loss": 0.5528, "step": 4347 }, { "epoch": 5.321909424724602, "grad_norm": 2.2841990181883745, "learning_rate": 3.6013009015977142e-06, "loss": 0.5573, "step": 4348 }, { "epoch": 5.3231334149326806, "grad_norm": 1.813954282864968, "learning_rate": 3.6007255203295898e-06, "loss": 0.4407, "step": 4349 }, { "epoch": 5.324357405140759, "grad_norm": 2.1636875493002488, "learning_rate": 3.600150066725856e-06, "loss": 0.6115, "step": 4350 }, { "epoch": 5.325581395348837, "grad_norm": 2.1184485394445547, "learning_rate": 3.599574540824329e-06, "loss": 0.9931, "step": 4351 }, { "epoch": 5.3268053855569155, "grad_norm": 1.4186937201400487, "learning_rate": 3.5989989426628303e-06, "loss": 0.5756, "step": 4352 }, { "epoch": 5.328029375764994, "grad_norm": 1.4965356021641048, "learning_rate": 3.598423272279185e-06, "loss": 0.5241, "step": 4353 }, { "epoch": 5.329253365973072, "grad_norm": 0.7342273336844484, "learning_rate": 3.597847529711226e-06, "loss": 0.5378, "step": 4354 }, { "epoch": 5.3304773561811505, "grad_norm": 1.2931859231517615, "learning_rate": 3.597271714996788e-06, "loss": 0.4018, "step": 4355 }, { "epoch": 5.331701346389229, "grad_norm": 1.421579682372198, "learning_rate": 3.5966958281737107e-06, "loss": 0.7393, "step": 4356 }, { "epoch": 5.332925336597307, "grad_norm": 1.544927793795705, "learning_rate": 3.596119869279841e-06, "loss": 0.524, "step": 4357 }, { "epoch": 5.3341493268053854, "grad_norm": 0.8784407626313382, "learning_rate": 3.5955438383530274e-06, "loss": 0.6812, "step": 4358 }, { "epoch": 5.335373317013464, "grad_norm": 1.4182692983300471, "learning_rate": 3.594967735431124e-06, "loss": 0.461, "step": 4359 }, { "epoch": 5.336597307221542, "grad_norm": 1.2005036187314102, "learning_rate": 3.594391560551992e-06, "loss": 0.7082, "step": 4360 }, { "epoch": 5.33782129742962, "grad_norm": 1.417995903756882, "learning_rate": 3.593815313753495e-06, "loss": 0.7294, "step": 4361 }, { "epoch": 5.339045287637699, "grad_norm": 1.2313356296347129, "learning_rate": 3.5932389950735e-06, "loss": 0.5965, "step": 4362 }, { "epoch": 5.340269277845778, "grad_norm": 1.295394005446654, "learning_rate": 3.5926626045498817e-06, "loss": 1.5788, "step": 4363 }, { "epoch": 5.341493268053855, "grad_norm": 1.0024655238931273, "learning_rate": 3.59208614222052e-06, "loss": 0.5816, "step": 4364 }, { "epoch": 5.342717258261934, "grad_norm": 1.6121054145141585, "learning_rate": 3.5915096081232953e-06, "loss": 0.7949, "step": 4365 }, { "epoch": 5.343941248470013, "grad_norm": 1.0022150458059447, "learning_rate": 3.5909330022960963e-06, "loss": 0.4188, "step": 4366 }, { "epoch": 5.34516523867809, "grad_norm": 1.531882937102185, "learning_rate": 3.5903563247768164e-06, "loss": 0.4195, "step": 4367 }, { "epoch": 5.346389228886169, "grad_norm": 0.7274114367092167, "learning_rate": 3.5897795756033505e-06, "loss": 0.4024, "step": 4368 }, { "epoch": 5.347613219094248, "grad_norm": 1.6480890964032626, "learning_rate": 3.5892027548136026e-06, "loss": 0.5233, "step": 4369 }, { "epoch": 5.348837209302325, "grad_norm": 1.2695711896280573, "learning_rate": 3.5886258624454786e-06, "loss": 0.4865, "step": 4370 }, { "epoch": 5.350061199510404, "grad_norm": 1.2774384495371223, "learning_rate": 3.5880488985368888e-06, "loss": 0.5326, "step": 4371 }, { "epoch": 5.3512851897184825, "grad_norm": 1.4553646779688971, "learning_rate": 3.5874718631257497e-06, "loss": 1.0149, "step": 4372 }, { "epoch": 5.35250917992656, "grad_norm": 1.040897274729759, "learning_rate": 3.5868947562499827e-06, "loss": 0.5777, "step": 4373 }, { "epoch": 5.353733170134639, "grad_norm": 2.073585803648856, "learning_rate": 3.5863175779475122e-06, "loss": 0.54, "step": 4374 }, { "epoch": 5.3549571603427175, "grad_norm": 1.4962406860238233, "learning_rate": 3.5857403282562685e-06, "loss": 0.9841, "step": 4375 }, { "epoch": 5.356181150550795, "grad_norm": 1.6731887449972997, "learning_rate": 3.5851630072141864e-06, "loss": 0.4813, "step": 4376 }, { "epoch": 5.357405140758874, "grad_norm": 1.6684988591503531, "learning_rate": 3.5845856148592045e-06, "loss": 0.5189, "step": 4377 }, { "epoch": 5.3586291309669525, "grad_norm": 1.039459441022642, "learning_rate": 3.5840081512292685e-06, "loss": 0.4275, "step": 4378 }, { "epoch": 5.35985312117503, "grad_norm": 1.5289440300994905, "learning_rate": 3.5834306163623255e-06, "loss": 0.6543, "step": 4379 }, { "epoch": 5.361077111383109, "grad_norm": 1.8506014753877247, "learning_rate": 3.5828530102963303e-06, "loss": 0.5273, "step": 4380 }, { "epoch": 5.3623011015911874, "grad_norm": 1.2196314516550142, "learning_rate": 3.5822753330692405e-06, "loss": 0.4288, "step": 4381 }, { "epoch": 5.363525091799266, "grad_norm": 1.244475218336885, "learning_rate": 3.581697584719019e-06, "loss": 1.1792, "step": 4382 }, { "epoch": 5.364749082007344, "grad_norm": 1.3275825352143475, "learning_rate": 3.5811197652836323e-06, "loss": 0.5469, "step": 4383 }, { "epoch": 5.365973072215422, "grad_norm": 1.3352502350714146, "learning_rate": 3.5805418748010546e-06, "loss": 0.6708, "step": 4384 }, { "epoch": 5.3671970624235, "grad_norm": 0.8769138602754324, "learning_rate": 3.5799639133092605e-06, "loss": 0.5639, "step": 4385 }, { "epoch": 5.368421052631579, "grad_norm": 1.2300068842946812, "learning_rate": 3.579385880846232e-06, "loss": 0.5449, "step": 4386 }, { "epoch": 5.369645042839657, "grad_norm": 0.7561712132703756, "learning_rate": 3.578807777449958e-06, "loss": 0.461, "step": 4387 }, { "epoch": 5.370869033047736, "grad_norm": 1.1448192298490794, "learning_rate": 3.578229603158425e-06, "loss": 0.3551, "step": 4388 }, { "epoch": 5.372093023255814, "grad_norm": 0.943902370000432, "learning_rate": 3.5776513580096316e-06, "loss": 0.6919, "step": 4389 }, { "epoch": 5.373317013463892, "grad_norm": 2.761148219866134, "learning_rate": 3.577073042041577e-06, "loss": 0.4607, "step": 4390 }, { "epoch": 5.374541003671971, "grad_norm": 1.669255683766952, "learning_rate": 3.576494655292265e-06, "loss": 0.4865, "step": 4391 }, { "epoch": 5.375764993880049, "grad_norm": 2.172366589679472, "learning_rate": 3.5759161977997064e-06, "loss": 0.3981, "step": 4392 }, { "epoch": 5.376988984088127, "grad_norm": 1.2561067029599369, "learning_rate": 3.5753376696019152e-06, "loss": 0.5513, "step": 4393 }, { "epoch": 5.378212974296206, "grad_norm": 2.309337118355062, "learning_rate": 3.574759070736909e-06, "loss": 0.8409, "step": 4394 }, { "epoch": 5.379436964504284, "grad_norm": 1.292257419420401, "learning_rate": 3.574180401242713e-06, "loss": 1.1427, "step": 4395 }, { "epoch": 5.380660954712362, "grad_norm": 1.6713161383724755, "learning_rate": 3.5736016611573533e-06, "loss": 1.1017, "step": 4396 }, { "epoch": 5.381884944920441, "grad_norm": 1.5276971637072985, "learning_rate": 3.5730228505188636e-06, "loss": 0.6594, "step": 4397 }, { "epoch": 5.383108935128519, "grad_norm": 1.6342347931594354, "learning_rate": 3.5724439693652813e-06, "loss": 0.8769, "step": 4398 }, { "epoch": 5.384332925336597, "grad_norm": 1.659845353935088, "learning_rate": 3.5718650177346474e-06, "loss": 0.6093, "step": 4399 }, { "epoch": 5.385556915544676, "grad_norm": 1.1175759137492824, "learning_rate": 3.57128599566501e-06, "loss": 0.7613, "step": 4400 }, { "epoch": 5.386780905752754, "grad_norm": 0.9472263410683649, "learning_rate": 3.570706903194418e-06, "loss": 0.5482, "step": 4401 }, { "epoch": 5.388004895960832, "grad_norm": 2.076163846179155, "learning_rate": 3.5701277403609302e-06, "loss": 0.4703, "step": 4402 }, { "epoch": 5.389228886168911, "grad_norm": 1.1767031420288685, "learning_rate": 3.569548507202604e-06, "loss": 0.6435, "step": 4403 }, { "epoch": 5.3904528763769886, "grad_norm": 1.3456553461663245, "learning_rate": 3.568969203757507e-06, "loss": 0.9361, "step": 4404 }, { "epoch": 5.391676866585067, "grad_norm": 1.2101204159734256, "learning_rate": 3.5683898300637072e-06, "loss": 0.5544, "step": 4405 }, { "epoch": 5.392900856793146, "grad_norm": 1.8957763865826212, "learning_rate": 3.5678103861592796e-06, "loss": 0.5426, "step": 4406 }, { "epoch": 5.394124847001224, "grad_norm": 1.827879309881904, "learning_rate": 3.5672308720823025e-06, "loss": 0.5043, "step": 4407 }, { "epoch": 5.395348837209302, "grad_norm": 2.013835020713998, "learning_rate": 3.566651287870861e-06, "loss": 0.4399, "step": 4408 }, { "epoch": 5.396572827417381, "grad_norm": 1.368392801812708, "learning_rate": 3.566071633563041e-06, "loss": 0.5246, "step": 4409 }, { "epoch": 5.397796817625459, "grad_norm": 2.7655456968316634, "learning_rate": 3.565491909196937e-06, "loss": 0.5281, "step": 4410 }, { "epoch": 5.399020807833537, "grad_norm": 1.5812899297411138, "learning_rate": 3.5649121148106457e-06, "loss": 0.4281, "step": 4411 }, { "epoch": 5.400244798041616, "grad_norm": 1.0883904580828152, "learning_rate": 3.5643322504422687e-06, "loss": 1.0638, "step": 4412 }, { "epoch": 5.401468788249694, "grad_norm": 1.5130573201321875, "learning_rate": 3.563752316129913e-06, "loss": 0.7632, "step": 4413 }, { "epoch": 5.402692778457772, "grad_norm": 1.4248786018512765, "learning_rate": 3.56317231191169e-06, "loss": 1.8325, "step": 4414 }, { "epoch": 5.403916768665851, "grad_norm": 1.3123055747480625, "learning_rate": 3.562592237825715e-06, "loss": 1.6467, "step": 4415 }, { "epoch": 5.405140758873929, "grad_norm": 1.094530621434538, "learning_rate": 3.562012093910108e-06, "loss": 0.4814, "step": 4416 }, { "epoch": 5.406364749082007, "grad_norm": 1.2376879944018688, "learning_rate": 3.5614318802029948e-06, "loss": 0.4881, "step": 4417 }, { "epoch": 5.407588739290086, "grad_norm": 1.0975471044741623, "learning_rate": 3.560851596742504e-06, "loss": 0.651, "step": 4418 }, { "epoch": 5.408812729498164, "grad_norm": 1.7829496385327708, "learning_rate": 3.5602712435667697e-06, "loss": 0.6199, "step": 4419 }, { "epoch": 5.410036719706242, "grad_norm": 1.4183840963326084, "learning_rate": 3.559690820713931e-06, "loss": 0.3967, "step": 4420 }, { "epoch": 5.411260709914321, "grad_norm": 1.285576769175522, "learning_rate": 3.5591103282221324e-06, "loss": 0.6752, "step": 4421 }, { "epoch": 5.412484700122399, "grad_norm": 2.665806644536347, "learning_rate": 3.5585297661295195e-06, "loss": 0.3878, "step": 4422 }, { "epoch": 5.413708690330477, "grad_norm": 1.2172779038728183, "learning_rate": 3.5579491344742457e-06, "loss": 0.6, "step": 4423 }, { "epoch": 5.414932680538556, "grad_norm": 1.3592611363098044, "learning_rate": 3.5573684332944682e-06, "loss": 0.6826, "step": 4424 }, { "epoch": 5.416156670746634, "grad_norm": 1.3068103378677902, "learning_rate": 3.556787662628347e-06, "loss": 1.0711, "step": 4425 }, { "epoch": 5.417380660954713, "grad_norm": 0.9874115035146913, "learning_rate": 3.5562068225140506e-06, "loss": 0.6689, "step": 4426 }, { "epoch": 5.4186046511627906, "grad_norm": 2.6423205667022467, "learning_rate": 3.5556259129897473e-06, "loss": 0.4806, "step": 4427 }, { "epoch": 5.419828641370869, "grad_norm": 2.1654432229767075, "learning_rate": 3.5550449340936145e-06, "loss": 0.9048, "step": 4428 }, { "epoch": 5.421052631578947, "grad_norm": 1.4871514220920214, "learning_rate": 3.5544638858638307e-06, "loss": 0.5555, "step": 4429 }, { "epoch": 5.4222766217870255, "grad_norm": 1.2779830647419022, "learning_rate": 3.553882768338581e-06, "loss": 0.4138, "step": 4430 }, { "epoch": 5.423500611995104, "grad_norm": 2.329771727033092, "learning_rate": 3.5533015815560535e-06, "loss": 0.6225, "step": 4431 }, { "epoch": 5.424724602203183, "grad_norm": 1.2696143709153584, "learning_rate": 3.5527203255544417e-06, "loss": 0.5451, "step": 4432 }, { "epoch": 5.4259485924112605, "grad_norm": 1.7945268530866494, "learning_rate": 3.5521390003719435e-06, "loss": 1.0239, "step": 4433 }, { "epoch": 5.427172582619339, "grad_norm": 0.9050532356083686, "learning_rate": 3.5515576060467627e-06, "loss": 0.4669, "step": 4434 }, { "epoch": 5.428396572827418, "grad_norm": 1.4446034012616487, "learning_rate": 3.5509761426171046e-06, "loss": 1.4875, "step": 4435 }, { "epoch": 5.4296205630354955, "grad_norm": 1.8655077353402538, "learning_rate": 3.5503946101211817e-06, "loss": 1.4466, "step": 4436 }, { "epoch": 5.430844553243574, "grad_norm": 1.4126364288108972, "learning_rate": 3.5498130085972116e-06, "loss": 1.4489, "step": 4437 }, { "epoch": 5.432068543451653, "grad_norm": 1.121588472853089, "learning_rate": 3.5492313380834122e-06, "loss": 0.6161, "step": 4438 }, { "epoch": 5.43329253365973, "grad_norm": 0.8348305130734763, "learning_rate": 3.5486495986180104e-06, "loss": 0.5258, "step": 4439 }, { "epoch": 5.434516523867809, "grad_norm": 1.1786803552119138, "learning_rate": 3.5480677902392358e-06, "loss": 0.9186, "step": 4440 }, { "epoch": 5.435740514075888, "grad_norm": 1.4870723160456336, "learning_rate": 3.547485912985322e-06, "loss": 1.3734, "step": 4441 }, { "epoch": 5.436964504283965, "grad_norm": 1.3264472504564593, "learning_rate": 3.546903966894509e-06, "loss": 0.4975, "step": 4442 }, { "epoch": 5.438188494492044, "grad_norm": 1.0958306880271862, "learning_rate": 3.5463219520050397e-06, "loss": 0.4605, "step": 4443 }, { "epoch": 5.439412484700123, "grad_norm": 0.95532951324432, "learning_rate": 3.545739868355161e-06, "loss": 0.5823, "step": 4444 }, { "epoch": 5.4406364749082, "grad_norm": 1.5419797894091691, "learning_rate": 3.545157715983126e-06, "loss": 0.6243, "step": 4445 }, { "epoch": 5.441860465116279, "grad_norm": 2.253804782729319, "learning_rate": 3.5445754949271925e-06, "loss": 0.5726, "step": 4446 }, { "epoch": 5.443084455324358, "grad_norm": 1.3059517478683114, "learning_rate": 3.543993205225621e-06, "loss": 0.8414, "step": 4447 }, { "epoch": 5.444308445532435, "grad_norm": 1.1433580973522437, "learning_rate": 3.543410846916677e-06, "loss": 1.5826, "step": 4448 }, { "epoch": 5.445532435740514, "grad_norm": 1.298884011113135, "learning_rate": 3.542828420038632e-06, "loss": 0.9525, "step": 4449 }, { "epoch": 5.4467564259485926, "grad_norm": 1.675102851311704, "learning_rate": 3.5422459246297604e-06, "loss": 0.6862, "step": 4450 }, { "epoch": 5.447980416156671, "grad_norm": 1.3758286999998326, "learning_rate": 3.5416633607283407e-06, "loss": 1.6382, "step": 4451 }, { "epoch": 5.449204406364749, "grad_norm": 1.156106062911457, "learning_rate": 3.5410807283726588e-06, "loss": 0.5904, "step": 4452 }, { "epoch": 5.4504283965728275, "grad_norm": 2.0648972959905123, "learning_rate": 3.5404980276010016e-06, "loss": 0.9716, "step": 4453 }, { "epoch": 5.451652386780906, "grad_norm": 1.5032976171187695, "learning_rate": 3.539915258451663e-06, "loss": 1.2002, "step": 4454 }, { "epoch": 5.452876376988984, "grad_norm": 1.3585662371557394, "learning_rate": 3.5393324209629397e-06, "loss": 1.3855, "step": 4455 }, { "epoch": 5.4541003671970625, "grad_norm": 2.3938038548610474, "learning_rate": 3.538749515173134e-06, "loss": 0.3594, "step": 4456 }, { "epoch": 5.455324357405141, "grad_norm": 1.6420610582189552, "learning_rate": 3.5381665411205518e-06, "loss": 0.9922, "step": 4457 }, { "epoch": 5.456548347613219, "grad_norm": 1.497521706436548, "learning_rate": 3.5375834988435054e-06, "loss": 0.7744, "step": 4458 }, { "epoch": 5.4577723378212974, "grad_norm": 0.9389741706771537, "learning_rate": 3.5370003883803084e-06, "loss": 0.4543, "step": 4459 }, { "epoch": 5.458996328029376, "grad_norm": 1.1314869313911855, "learning_rate": 3.536417209769282e-06, "loss": 0.8904, "step": 4460 }, { "epoch": 5.460220318237454, "grad_norm": 1.2938274180867675, "learning_rate": 3.53583396304875e-06, "loss": 1.1266, "step": 4461 }, { "epoch": 5.461444308445532, "grad_norm": 1.8206280604236806, "learning_rate": 3.53525064825704e-06, "loss": 0.5222, "step": 4462 }, { "epoch": 5.462668298653611, "grad_norm": 0.837120156868484, "learning_rate": 3.5346672654324878e-06, "loss": 0.5438, "step": 4463 }, { "epoch": 5.463892288861689, "grad_norm": 2.315343636747441, "learning_rate": 3.5340838146134294e-06, "loss": 0.9579, "step": 4464 }, { "epoch": 5.465116279069767, "grad_norm": 0.8944383081308822, "learning_rate": 3.533500295838208e-06, "loss": 0.3887, "step": 4465 }, { "epoch": 5.466340269277846, "grad_norm": 1.5159104253022815, "learning_rate": 3.5329167091451688e-06, "loss": 0.9374, "step": 4466 }, { "epoch": 5.467564259485924, "grad_norm": 1.9135547680589282, "learning_rate": 3.5323330545726653e-06, "loss": 0.6202, "step": 4467 }, { "epoch": 5.468788249694002, "grad_norm": 1.641453736780936, "learning_rate": 3.5317493321590506e-06, "loss": 1.1588, "step": 4468 }, { "epoch": 5.470012239902081, "grad_norm": 1.5606503208951517, "learning_rate": 3.531165541942687e-06, "loss": 0.3811, "step": 4469 }, { "epoch": 5.47123623011016, "grad_norm": 1.2165694999762116, "learning_rate": 3.530581683961937e-06, "loss": 0.502, "step": 4470 }, { "epoch": 5.472460220318237, "grad_norm": 0.9421442425340472, "learning_rate": 3.5299977582551714e-06, "loss": 0.593, "step": 4471 }, { "epoch": 5.473684210526316, "grad_norm": 1.4103693642167372, "learning_rate": 3.529413764860763e-06, "loss": 0.6103, "step": 4472 }, { "epoch": 5.474908200734394, "grad_norm": 1.3541626570393468, "learning_rate": 3.528829703817089e-06, "loss": 0.3395, "step": 4473 }, { "epoch": 5.476132190942472, "grad_norm": 1.4573118240729364, "learning_rate": 3.5282455751625323e-06, "loss": 0.4572, "step": 4474 }, { "epoch": 5.477356181150551, "grad_norm": 1.7545645227151554, "learning_rate": 3.5276613789354797e-06, "loss": 0.4783, "step": 4475 }, { "epoch": 5.4785801713586295, "grad_norm": 1.1364945592281195, "learning_rate": 3.527077115174322e-06, "loss": 0.6716, "step": 4476 }, { "epoch": 5.479804161566707, "grad_norm": 1.0705828996674054, "learning_rate": 3.526492783917455e-06, "loss": 1.107, "step": 4477 }, { "epoch": 5.481028151774786, "grad_norm": 1.600817079073567, "learning_rate": 3.5259083852032792e-06, "loss": 1.0661, "step": 4478 }, { "epoch": 5.4822521419828645, "grad_norm": 1.415594517386211, "learning_rate": 3.525323919070199e-06, "loss": 0.5137, "step": 4479 }, { "epoch": 5.483476132190942, "grad_norm": 1.4831808276103746, "learning_rate": 3.524739385556624e-06, "loss": 0.6839, "step": 4480 }, { "epoch": 5.484700122399021, "grad_norm": 1.9325781951383452, "learning_rate": 3.524154784700966e-06, "loss": 0.2723, "step": 4481 }, { "epoch": 5.4859241126070994, "grad_norm": 1.5150412033523168, "learning_rate": 3.5235701165416437e-06, "loss": 0.5677, "step": 4482 }, { "epoch": 5.487148102815177, "grad_norm": 0.7992915043471042, "learning_rate": 3.522985381117079e-06, "loss": 0.5277, "step": 4483 }, { "epoch": 5.488372093023256, "grad_norm": 1.2047839286459492, "learning_rate": 3.5224005784657e-06, "loss": 0.4958, "step": 4484 }, { "epoch": 5.489596083231334, "grad_norm": 1.3030182592149477, "learning_rate": 3.5218157086259354e-06, "loss": 0.4493, "step": 4485 }, { "epoch": 5.490820073439412, "grad_norm": 1.1242606118363498, "learning_rate": 3.521230771636222e-06, "loss": 1.1372, "step": 4486 }, { "epoch": 5.492044063647491, "grad_norm": 2.0637961772962443, "learning_rate": 3.520645767535e-06, "loss": 0.9867, "step": 4487 }, { "epoch": 5.493268053855569, "grad_norm": 1.0007247923835334, "learning_rate": 3.520060696360713e-06, "loss": 0.859, "step": 4488 }, { "epoch": 5.494492044063647, "grad_norm": 2.227523235346055, "learning_rate": 3.5194755581518103e-06, "loss": 0.8645, "step": 4489 }, { "epoch": 5.495716034271726, "grad_norm": 0.860968763681482, "learning_rate": 3.5188903529467444e-06, "loss": 0.435, "step": 4490 }, { "epoch": 5.496940024479804, "grad_norm": 1.915214990178703, "learning_rate": 3.518305080783973e-06, "loss": 0.505, "step": 4491 }, { "epoch": 5.498164014687882, "grad_norm": 1.3260308026172227, "learning_rate": 3.5177197417019584e-06, "loss": 0.4357, "step": 4492 }, { "epoch": 5.499388004895961, "grad_norm": 2.2339140649262683, "learning_rate": 3.517134335739167e-06, "loss": 1.0042, "step": 4493 }, { "epoch": 5.500611995104039, "grad_norm": 0.8261312558054136, "learning_rate": 3.516548862934068e-06, "loss": 0.3816, "step": 4494 }, { "epoch": 5.501835985312118, "grad_norm": 1.6155576516107917, "learning_rate": 3.515963323325138e-06, "loss": 0.4677, "step": 4495 }, { "epoch": 5.503059975520196, "grad_norm": 1.0322846134830173, "learning_rate": 3.5153777169508566e-06, "loss": 0.654, "step": 4496 }, { "epoch": 5.504283965728274, "grad_norm": 1.253312162175109, "learning_rate": 3.5147920438497073e-06, "loss": 0.9562, "step": 4497 }, { "epoch": 5.505507955936353, "grad_norm": 1.3018463718875648, "learning_rate": 3.514206304060178e-06, "loss": 0.6039, "step": 4498 }, { "epoch": 5.506731946144431, "grad_norm": 1.950458710917427, "learning_rate": 3.5136204976207627e-06, "loss": 0.4033, "step": 4499 }, { "epoch": 5.507955936352509, "grad_norm": 1.4261986831975177, "learning_rate": 3.513034624569957e-06, "loss": 0.3832, "step": 4500 }, { "epoch": 5.509179926560588, "grad_norm": 1.72178934908833, "learning_rate": 3.512448684946262e-06, "loss": 0.9836, "step": 4501 }, { "epoch": 5.510403916768666, "grad_norm": 1.3706483216317793, "learning_rate": 3.511862678788185e-06, "loss": 0.5799, "step": 4502 }, { "epoch": 5.511627906976744, "grad_norm": 1.3550558367786212, "learning_rate": 3.5112766061342346e-06, "loss": 0.5826, "step": 4503 }, { "epoch": 5.512851897184823, "grad_norm": 1.786526091837288, "learning_rate": 3.510690467022926e-06, "loss": 0.3205, "step": 4504 }, { "epoch": 5.5140758873929006, "grad_norm": 0.9536819554236164, "learning_rate": 3.5101042614927794e-06, "loss": 0.9104, "step": 4505 }, { "epoch": 5.515299877600979, "grad_norm": 1.514253489767651, "learning_rate": 3.509517989582316e-06, "loss": 0.393, "step": 4506 }, { "epoch": 5.516523867809058, "grad_norm": 2.2099752109057014, "learning_rate": 3.5089316513300643e-06, "loss": 0.5326, "step": 4507 }, { "epoch": 5.5177478580171355, "grad_norm": 1.089544589394896, "learning_rate": 3.5083452467745565e-06, "loss": 0.4528, "step": 4508 }, { "epoch": 5.518971848225214, "grad_norm": 1.5448205511476936, "learning_rate": 3.5077587759543285e-06, "loss": 0.4419, "step": 4509 }, { "epoch": 5.520195838433293, "grad_norm": 1.4246716310894927, "learning_rate": 3.507172238907921e-06, "loss": 0.4876, "step": 4510 }, { "epoch": 5.5214198286413705, "grad_norm": 1.450941148201937, "learning_rate": 3.5065856356738796e-06, "loss": 0.5778, "step": 4511 }, { "epoch": 5.522643818849449, "grad_norm": 1.337003392726757, "learning_rate": 3.5059989662907536e-06, "loss": 0.5287, "step": 4512 }, { "epoch": 5.523867809057528, "grad_norm": 1.5392359607983546, "learning_rate": 3.505412230797096e-06, "loss": 1.1108, "step": 4513 }, { "epoch": 5.525091799265606, "grad_norm": 1.6185275318676418, "learning_rate": 3.5048254292314654e-06, "loss": 0.8057, "step": 4514 }, { "epoch": 5.526315789473684, "grad_norm": 0.9544119587209946, "learning_rate": 3.5042385616324243e-06, "loss": 0.5634, "step": 4515 }, { "epoch": 5.527539779681763, "grad_norm": 1.1277508237321088, "learning_rate": 3.5036516280385386e-06, "loss": 0.9998, "step": 4516 }, { "epoch": 5.52876376988984, "grad_norm": 1.249757566077675, "learning_rate": 3.503064628488382e-06, "loss": 0.4953, "step": 4517 }, { "epoch": 5.529987760097919, "grad_norm": 1.5072032371191788, "learning_rate": 3.502477563020526e-06, "loss": 0.9879, "step": 4518 }, { "epoch": 5.531211750305998, "grad_norm": 1.553006396826202, "learning_rate": 3.5018904316735534e-06, "loss": 0.6048, "step": 4519 }, { "epoch": 5.532435740514076, "grad_norm": 1.2126353497554916, "learning_rate": 3.5013032344860478e-06, "loss": 0.4888, "step": 4520 }, { "epoch": 5.533659730722154, "grad_norm": 1.5962911281496284, "learning_rate": 3.5007159714965966e-06, "loss": 0.6533, "step": 4521 }, { "epoch": 5.534883720930233, "grad_norm": 1.4617366764802464, "learning_rate": 3.500128642743793e-06, "loss": 0.5218, "step": 4522 }, { "epoch": 5.536107711138311, "grad_norm": 1.17772115383515, "learning_rate": 3.499541248266235e-06, "loss": 0.2442, "step": 4523 }, { "epoch": 5.537331701346389, "grad_norm": 1.9983894678419472, "learning_rate": 3.498953788102522e-06, "loss": 0.9809, "step": 4524 }, { "epoch": 5.538555691554468, "grad_norm": 1.5609606154458777, "learning_rate": 3.4983662622912625e-06, "loss": 0.6433, "step": 4525 }, { "epoch": 5.539779681762546, "grad_norm": 1.2342106761869924, "learning_rate": 3.497778670871064e-06, "loss": 0.5034, "step": 4526 }, { "epoch": 5.541003671970624, "grad_norm": 0.9643959081184507, "learning_rate": 3.497191013880541e-06, "loss": 0.4743, "step": 4527 }, { "epoch": 5.5422276621787026, "grad_norm": 1.8629260894428068, "learning_rate": 3.4966032913583143e-06, "loss": 0.6313, "step": 4528 }, { "epoch": 5.543451652386781, "grad_norm": 1.2991335125703563, "learning_rate": 3.496015503343005e-06, "loss": 0.4618, "step": 4529 }, { "epoch": 5.544675642594859, "grad_norm": 1.0519949448564376, "learning_rate": 3.495427649873241e-06, "loss": 0.572, "step": 4530 }, { "epoch": 5.5458996328029375, "grad_norm": 2.9771598374108885, "learning_rate": 3.494839730987653e-06, "loss": 0.4326, "step": 4531 }, { "epoch": 5.547123623011016, "grad_norm": 1.2611873490767895, "learning_rate": 3.4942517467248783e-06, "loss": 0.5791, "step": 4532 }, { "epoch": 5.548347613219094, "grad_norm": 1.5593917488608595, "learning_rate": 3.4936636971235555e-06, "loss": 0.4135, "step": 4533 }, { "epoch": 5.5495716034271725, "grad_norm": 1.8629401784231996, "learning_rate": 3.4930755822223307e-06, "loss": 0.4985, "step": 4534 }, { "epoch": 5.550795593635251, "grad_norm": 1.0629718580952152, "learning_rate": 3.4924874020598508e-06, "loss": 0.6978, "step": 4535 }, { "epoch": 5.552019583843329, "grad_norm": 1.3587721925172065, "learning_rate": 3.4918991566747705e-06, "loss": 1.5476, "step": 4536 }, { "epoch": 5.5532435740514074, "grad_norm": 1.063876480974483, "learning_rate": 3.4913108461057467e-06, "loss": 1.1123, "step": 4537 }, { "epoch": 5.554467564259486, "grad_norm": 1.38080796330293, "learning_rate": 3.49072247039144e-06, "loss": 1.0129, "step": 4538 }, { "epoch": 5.555691554467565, "grad_norm": 1.4123067297672984, "learning_rate": 3.4901340295705177e-06, "loss": 0.9388, "step": 4539 }, { "epoch": 5.556915544675642, "grad_norm": 1.3049772487053188, "learning_rate": 3.4895455236816488e-06, "loss": 1.0035, "step": 4540 }, { "epoch": 5.558139534883721, "grad_norm": 0.9942072851418005, "learning_rate": 3.488956952763508e-06, "loss": 0.4092, "step": 4541 }, { "epoch": 5.5593635250918, "grad_norm": 1.1756568507021177, "learning_rate": 3.4883683168547737e-06, "loss": 0.5028, "step": 4542 }, { "epoch": 5.560587515299877, "grad_norm": 1.8163524327934293, "learning_rate": 3.4877796159941305e-06, "loss": 0.5363, "step": 4543 }, { "epoch": 5.561811505507956, "grad_norm": 1.568205123004877, "learning_rate": 3.487190850220263e-06, "loss": 0.5727, "step": 4544 }, { "epoch": 5.563035495716035, "grad_norm": 1.2316535949039265, "learning_rate": 3.486602019571866e-06, "loss": 0.5216, "step": 4545 }, { "epoch": 5.564259485924112, "grad_norm": 1.3494579919185705, "learning_rate": 3.4860131240876327e-06, "loss": 1.2005, "step": 4546 }, { "epoch": 5.565483476132191, "grad_norm": 1.656995356552588, "learning_rate": 3.4854241638062638e-06, "loss": 1.1487, "step": 4547 }, { "epoch": 5.56670746634027, "grad_norm": 1.01722414791791, "learning_rate": 3.484835138766463e-06, "loss": 0.6018, "step": 4548 }, { "epoch": 5.567931456548347, "grad_norm": 0.7345505918581199, "learning_rate": 3.4842460490069407e-06, "loss": 0.5082, "step": 4549 }, { "epoch": 5.569155446756426, "grad_norm": 0.8716065633225716, "learning_rate": 3.4836568945664083e-06, "loss": 0.6125, "step": 4550 }, { "epoch": 5.5703794369645045, "grad_norm": 1.1275261427508716, "learning_rate": 3.4830676754835824e-06, "loss": 0.5231, "step": 4551 }, { "epoch": 5.571603427172582, "grad_norm": 1.067519169162483, "learning_rate": 3.482478391797186e-06, "loss": 0.5672, "step": 4552 }, { "epoch": 5.572827417380661, "grad_norm": 2.0099138059333566, "learning_rate": 3.481889043545942e-06, "loss": 0.5202, "step": 4553 }, { "epoch": 5.5740514075887395, "grad_norm": 1.372440849791154, "learning_rate": 3.481299630768583e-06, "loss": 0.3228, "step": 4554 }, { "epoch": 5.575275397796817, "grad_norm": 2.2194235204547543, "learning_rate": 3.480710153503842e-06, "loss": 1.067, "step": 4555 }, { "epoch": 5.576499388004896, "grad_norm": 1.1103151661363448, "learning_rate": 3.480120611790457e-06, "loss": 0.3834, "step": 4556 }, { "epoch": 5.5777233782129745, "grad_norm": 1.0673240103273345, "learning_rate": 3.4795310056671698e-06, "loss": 0.5736, "step": 4557 }, { "epoch": 5.578947368421053, "grad_norm": 1.5624231178227903, "learning_rate": 3.478941335172729e-06, "loss": 0.5652, "step": 4558 }, { "epoch": 5.580171358629131, "grad_norm": 0.907406388327696, "learning_rate": 3.478351600345884e-06, "loss": 0.6157, "step": 4559 }, { "epoch": 5.5813953488372094, "grad_norm": 1.8265788302024, "learning_rate": 3.47776180122539e-06, "loss": 0.4437, "step": 4560 }, { "epoch": 5.582619339045287, "grad_norm": 1.4905920444912402, "learning_rate": 3.477171937850008e-06, "loss": 0.6384, "step": 4561 }, { "epoch": 5.583843329253366, "grad_norm": 1.748326430907189, "learning_rate": 3.4765820102585e-06, "loss": 0.6734, "step": 4562 }, { "epoch": 5.585067319461444, "grad_norm": 1.4264402857974654, "learning_rate": 3.4759920184896345e-06, "loss": 0.7347, "step": 4563 }, { "epoch": 5.586291309669523, "grad_norm": 1.279186249412405, "learning_rate": 3.4754019625821835e-06, "loss": 0.7486, "step": 4564 }, { "epoch": 5.587515299877601, "grad_norm": 2.0643576089605333, "learning_rate": 3.4748118425749233e-06, "loss": 0.592, "step": 4565 }, { "epoch": 5.588739290085679, "grad_norm": 1.3065774460443254, "learning_rate": 3.4742216585066334e-06, "loss": 0.4953, "step": 4566 }, { "epoch": 5.589963280293758, "grad_norm": 1.5553452122542701, "learning_rate": 3.473631410416101e-06, "loss": 0.5601, "step": 4567 }, { "epoch": 5.591187270501836, "grad_norm": 1.7524647803016193, "learning_rate": 3.4730410983421126e-06, "loss": 1.1008, "step": 4568 }, { "epoch": 5.592411260709914, "grad_norm": 1.6409793517090185, "learning_rate": 3.472450722323463e-06, "loss": 0.3449, "step": 4569 }, { "epoch": 5.593635250917993, "grad_norm": 0.9743554345885426, "learning_rate": 3.4718602823989477e-06, "loss": 0.6053, "step": 4570 }, { "epoch": 5.594859241126071, "grad_norm": 2.3670113050007964, "learning_rate": 3.47126977860737e-06, "loss": 0.4496, "step": 4571 }, { "epoch": 5.596083231334149, "grad_norm": 1.8880610440514325, "learning_rate": 3.4706792109875352e-06, "loss": 1.0053, "step": 4572 }, { "epoch": 5.597307221542228, "grad_norm": 1.0882213285905011, "learning_rate": 3.4700885795782522e-06, "loss": 0.525, "step": 4573 }, { "epoch": 5.598531211750306, "grad_norm": 1.1630271625421398, "learning_rate": 3.4694978844183364e-06, "loss": 0.5295, "step": 4574 }, { "epoch": 5.599755201958384, "grad_norm": 1.7083406927578428, "learning_rate": 3.4689071255466055e-06, "loss": 0.502, "step": 4575 }, { "epoch": 5.600979192166463, "grad_norm": 1.623912515892955, "learning_rate": 3.4683163030018824e-06, "loss": 0.4658, "step": 4576 }, { "epoch": 5.602203182374541, "grad_norm": 0.8674286008863695, "learning_rate": 3.467725416822992e-06, "loss": 0.6969, "step": 4577 }, { "epoch": 5.603427172582619, "grad_norm": 1.1899056452051293, "learning_rate": 3.4671344670487683e-06, "loss": 0.6244, "step": 4578 }, { "epoch": 5.604651162790698, "grad_norm": 1.1966907964581717, "learning_rate": 3.4665434537180435e-06, "loss": 0.4543, "step": 4579 }, { "epoch": 5.605875152998776, "grad_norm": 1.170519748499361, "learning_rate": 3.4659523768696584e-06, "loss": 0.5991, "step": 4580 }, { "epoch": 5.607099143206854, "grad_norm": 1.8201086862446105, "learning_rate": 3.465361236542456e-06, "loss": 0.5818, "step": 4581 }, { "epoch": 5.608323133414933, "grad_norm": 1.5690091666775403, "learning_rate": 3.4647700327752837e-06, "loss": 0.4453, "step": 4582 }, { "epoch": 5.6095471236230114, "grad_norm": 1.324426782562437, "learning_rate": 3.4641787656069924e-06, "loss": 0.8771, "step": 4583 }, { "epoch": 5.610771113831089, "grad_norm": 1.7607524492803233, "learning_rate": 3.4635874350764405e-06, "loss": 0.5569, "step": 4584 }, { "epoch": 5.611995104039168, "grad_norm": 1.2795171772223004, "learning_rate": 3.462996041222485e-06, "loss": 0.5091, "step": 4585 }, { "epoch": 5.613219094247246, "grad_norm": 1.3263327073565494, "learning_rate": 3.462404584083992e-06, "loss": 1.3228, "step": 4586 }, { "epoch": 5.614443084455324, "grad_norm": 1.5055461648632704, "learning_rate": 3.4618130636998297e-06, "loss": 0.7649, "step": 4587 }, { "epoch": 5.615667074663403, "grad_norm": 1.2761071582071668, "learning_rate": 3.46122148010887e-06, "loss": 0.4929, "step": 4588 }, { "epoch": 5.616891064871481, "grad_norm": 1.0761960680953506, "learning_rate": 3.4606298333499903e-06, "loss": 0.9121, "step": 4589 }, { "epoch": 5.618115055079559, "grad_norm": 1.2676981090094064, "learning_rate": 3.460038123462071e-06, "loss": 0.4842, "step": 4590 }, { "epoch": 5.619339045287638, "grad_norm": 1.3910693142435633, "learning_rate": 3.459446350483997e-06, "loss": 0.955, "step": 4591 }, { "epoch": 5.620563035495716, "grad_norm": 1.3070360986105605, "learning_rate": 3.4588545144546572e-06, "loss": 1.2493, "step": 4592 }, { "epoch": 5.621787025703794, "grad_norm": 1.7186688561289396, "learning_rate": 3.4582626154129464e-06, "loss": 0.6047, "step": 4593 }, { "epoch": 5.623011015911873, "grad_norm": 1.2985435660791929, "learning_rate": 3.4576706533977596e-06, "loss": 0.9606, "step": 4594 }, { "epoch": 5.624235006119951, "grad_norm": 1.4281199858853284, "learning_rate": 3.4570786284480003e-06, "loss": 0.5541, "step": 4595 }, { "epoch": 5.625458996328029, "grad_norm": 1.117426426037937, "learning_rate": 3.456486540602574e-06, "loss": 1.1758, "step": 4596 }, { "epoch": 5.626682986536108, "grad_norm": 0.7490830079084273, "learning_rate": 3.4558943899003893e-06, "loss": 0.4505, "step": 4597 }, { "epoch": 5.627906976744186, "grad_norm": 2.5929595914830044, "learning_rate": 3.4553021763803606e-06, "loss": 0.4078, "step": 4598 }, { "epoch": 5.629130966952264, "grad_norm": 1.1322187200254483, "learning_rate": 3.454709900081407e-06, "loss": 0.8675, "step": 4599 }, { "epoch": 5.630354957160343, "grad_norm": 1.737086604315628, "learning_rate": 3.4541175610424492e-06, "loss": 0.3854, "step": 4600 }, { "epoch": 5.631578947368421, "grad_norm": 1.54428185800753, "learning_rate": 3.4535251593024157e-06, "loss": 0.7541, "step": 4601 }, { "epoch": 5.6328029375765, "grad_norm": 1.075339172706889, "learning_rate": 3.452932694900235e-06, "loss": 0.7363, "step": 4602 }, { "epoch": 5.634026927784578, "grad_norm": 0.975111545127296, "learning_rate": 3.4523401678748415e-06, "loss": 0.4899, "step": 4603 }, { "epoch": 5.635250917992656, "grad_norm": 1.1000692358903483, "learning_rate": 3.4517475782651754e-06, "loss": 0.7838, "step": 4604 }, { "epoch": 5.636474908200734, "grad_norm": 1.889902040337147, "learning_rate": 3.4511549261101785e-06, "loss": 0.4901, "step": 4605 }, { "epoch": 5.6376988984088126, "grad_norm": 1.4802673699327256, "learning_rate": 3.450562211448798e-06, "loss": 0.3677, "step": 4606 }, { "epoch": 5.638922888616891, "grad_norm": 1.2260124900267313, "learning_rate": 3.449969434319984e-06, "loss": 0.5895, "step": 4607 }, { "epoch": 5.64014687882497, "grad_norm": 0.9098568360986651, "learning_rate": 3.449376594762694e-06, "loss": 0.5157, "step": 4608 }, { "epoch": 5.6413708690330475, "grad_norm": 1.1417728705232, "learning_rate": 3.4487836928158846e-06, "loss": 0.3867, "step": 4609 }, { "epoch": 5.642594859241126, "grad_norm": 2.9712287069535215, "learning_rate": 3.4481907285185202e-06, "loss": 0.4147, "step": 4610 }, { "epoch": 5.643818849449205, "grad_norm": 1.0968067296264468, "learning_rate": 3.4475977019095684e-06, "loss": 0.3892, "step": 4611 }, { "epoch": 5.6450428396572825, "grad_norm": 1.3901764591758, "learning_rate": 3.447004613028001e-06, "loss": 1.3865, "step": 4612 }, { "epoch": 5.646266829865361, "grad_norm": 1.1254571113320158, "learning_rate": 3.446411461912793e-06, "loss": 0.4247, "step": 4613 }, { "epoch": 5.64749082007344, "grad_norm": 1.8125857761007533, "learning_rate": 3.4458182486029246e-06, "loss": 0.6097, "step": 4614 }, { "epoch": 5.6487148102815175, "grad_norm": 1.3558703377104802, "learning_rate": 3.445224973137379e-06, "loss": 1.5069, "step": 4615 }, { "epoch": 5.649938800489596, "grad_norm": 1.5200198098608584, "learning_rate": 3.4446316355551434e-06, "loss": 0.3993, "step": 4616 }, { "epoch": 5.651162790697675, "grad_norm": 1.2329440308529962, "learning_rate": 3.4440382358952116e-06, "loss": 0.5412, "step": 4617 }, { "epoch": 5.652386780905752, "grad_norm": 2.2481245129144267, "learning_rate": 3.443444774196578e-06, "loss": 0.4948, "step": 4618 }, { "epoch": 5.653610771113831, "grad_norm": 2.547123657267804, "learning_rate": 3.4428512504982443e-06, "loss": 0.4296, "step": 4619 }, { "epoch": 5.65483476132191, "grad_norm": 1.1031783944530436, "learning_rate": 3.4422576648392138e-06, "loss": 0.759, "step": 4620 }, { "epoch": 5.656058751529987, "grad_norm": 1.9856031480028187, "learning_rate": 3.4416640172584935e-06, "loss": 0.357, "step": 4621 }, { "epoch": 5.657282741738066, "grad_norm": 1.653930678874258, "learning_rate": 3.441070307795098e-06, "loss": 0.7674, "step": 4622 }, { "epoch": 5.658506731946145, "grad_norm": 1.3191015223249565, "learning_rate": 3.4404765364880425e-06, "loss": 0.5788, "step": 4623 }, { "epoch": 5.659730722154222, "grad_norm": 1.2407103634413703, "learning_rate": 3.4398827033763472e-06, "loss": 0.8192, "step": 4624 }, { "epoch": 5.660954712362301, "grad_norm": 1.4905111320663105, "learning_rate": 3.4392888084990374e-06, "loss": 0.7559, "step": 4625 }, { "epoch": 5.66217870257038, "grad_norm": 1.3731509503347776, "learning_rate": 3.4386948518951417e-06, "loss": 0.5523, "step": 4626 }, { "epoch": 5.663402692778458, "grad_norm": 1.0241204448351604, "learning_rate": 3.4381008336036915e-06, "loss": 0.9907, "step": 4627 }, { "epoch": 5.664626682986536, "grad_norm": 2.732782533476069, "learning_rate": 3.437506753663725e-06, "loss": 0.3258, "step": 4628 }, { "epoch": 5.6658506731946146, "grad_norm": 1.5004625474668927, "learning_rate": 3.4369126121142813e-06, "loss": 0.6204, "step": 4629 }, { "epoch": 5.667074663402692, "grad_norm": 0.9614849258595926, "learning_rate": 3.436318408994407e-06, "loss": 0.4963, "step": 4630 }, { "epoch": 5.668298653610771, "grad_norm": 1.1046101698094355, "learning_rate": 3.43572414434315e-06, "loss": 0.6314, "step": 4631 }, { "epoch": 5.6695226438188495, "grad_norm": 1.4943283012927704, "learning_rate": 3.435129818199563e-06, "loss": 0.5128, "step": 4632 }, { "epoch": 5.670746634026928, "grad_norm": 1.060113954302605, "learning_rate": 3.4345354306027024e-06, "loss": 0.476, "step": 4633 }, { "epoch": 5.671970624235006, "grad_norm": 1.1587033681445946, "learning_rate": 3.4339409815916315e-06, "loss": 0.697, "step": 4634 }, { "epoch": 5.6731946144430845, "grad_norm": 1.3348774079725048, "learning_rate": 3.433346471205412e-06, "loss": 0.5998, "step": 4635 }, { "epoch": 5.674418604651163, "grad_norm": 1.3691465511880456, "learning_rate": 3.432751899483116e-06, "loss": 1.525, "step": 4636 }, { "epoch": 5.675642594859241, "grad_norm": 2.4489984485203, "learning_rate": 3.4321572664638146e-06, "loss": 0.4627, "step": 4637 }, { "epoch": 5.6768665850673194, "grad_norm": 1.98024984745998, "learning_rate": 3.4315625721865858e-06, "loss": 0.5053, "step": 4638 }, { "epoch": 5.678090575275398, "grad_norm": 1.533127171216978, "learning_rate": 3.43096781669051e-06, "loss": 0.347, "step": 4639 }, { "epoch": 5.679314565483476, "grad_norm": 1.5791045294283645, "learning_rate": 3.430373000014673e-06, "loss": 0.6269, "step": 4640 }, { "epoch": 5.680538555691554, "grad_norm": 1.7890505384000008, "learning_rate": 3.4297781221981636e-06, "loss": 0.4124, "step": 4641 }, { "epoch": 5.681762545899633, "grad_norm": 1.895416453156362, "learning_rate": 3.429183183280074e-06, "loss": 0.5745, "step": 4642 }, { "epoch": 5.682986536107711, "grad_norm": 1.6628795879579383, "learning_rate": 3.4285881832995038e-06, "loss": 0.4124, "step": 4643 }, { "epoch": 5.684210526315789, "grad_norm": 1.344043380610681, "learning_rate": 3.427993122295552e-06, "loss": 1.1957, "step": 4644 }, { "epoch": 5.685434516523868, "grad_norm": 0.9310119804813397, "learning_rate": 3.4273980003073247e-06, "loss": 0.6573, "step": 4645 }, { "epoch": 5.686658506731947, "grad_norm": 1.3531528628695146, "learning_rate": 3.4268028173739315e-06, "loss": 0.458, "step": 4646 }, { "epoch": 5.687882496940024, "grad_norm": 1.3338578802989087, "learning_rate": 3.426207573534485e-06, "loss": 0.3363, "step": 4647 }, { "epoch": 5.689106487148103, "grad_norm": 2.0264481881053777, "learning_rate": 3.4256122688281025e-06, "loss": 0.469, "step": 4648 }, { "epoch": 5.690330477356181, "grad_norm": 1.1473377351043126, "learning_rate": 3.425016903293905e-06, "loss": 0.6171, "step": 4649 }, { "epoch": 5.691554467564259, "grad_norm": 0.4516739472672605, "learning_rate": 3.4244214769710184e-06, "loss": 0.134, "step": 4650 }, { "epoch": 5.692778457772338, "grad_norm": 0.8960516175496366, "learning_rate": 3.4238259898985714e-06, "loss": 0.5846, "step": 4651 }, { "epoch": 5.6940024479804165, "grad_norm": 2.0299269483928155, "learning_rate": 3.423230442115698e-06, "loss": 0.5479, "step": 4652 }, { "epoch": 5.695226438188494, "grad_norm": 1.547111122503179, "learning_rate": 3.4226348336615333e-06, "loss": 0.4259, "step": 4653 }, { "epoch": 5.696450428396573, "grad_norm": 2.105076560413822, "learning_rate": 3.422039164575221e-06, "loss": 0.4107, "step": 4654 }, { "epoch": 5.6976744186046515, "grad_norm": 2.1452855613864816, "learning_rate": 3.421443434895905e-06, "loss": 0.54, "step": 4655 }, { "epoch": 5.698898408812729, "grad_norm": 1.3391254131083528, "learning_rate": 3.420847644662734e-06, "loss": 0.7212, "step": 4656 }, { "epoch": 5.700122399020808, "grad_norm": 2.1127671042318883, "learning_rate": 3.420251793914862e-06, "loss": 0.6241, "step": 4657 }, { "epoch": 5.7013463892288865, "grad_norm": 2.27567421981427, "learning_rate": 3.4196558826914473e-06, "loss": 0.4435, "step": 4658 }, { "epoch": 5.702570379436964, "grad_norm": 0.9006584892840007, "learning_rate": 3.4190599110316477e-06, "loss": 0.6349, "step": 4659 }, { "epoch": 5.703794369645043, "grad_norm": 1.5657420356876341, "learning_rate": 3.4184638789746304e-06, "loss": 1.1387, "step": 4660 }, { "epoch": 5.7050183598531214, "grad_norm": 1.316867985665217, "learning_rate": 3.417867786559565e-06, "loss": 0.5947, "step": 4661 }, { "epoch": 5.706242350061199, "grad_norm": 1.1973451356468439, "learning_rate": 3.4172716338256235e-06, "loss": 0.673, "step": 4662 }, { "epoch": 5.707466340269278, "grad_norm": 1.4193805676585383, "learning_rate": 3.4166754208119823e-06, "loss": 0.6139, "step": 4663 }, { "epoch": 5.708690330477356, "grad_norm": 1.0717095457665797, "learning_rate": 3.4160791475578238e-06, "loss": 0.7904, "step": 4664 }, { "epoch": 5.709914320685434, "grad_norm": 1.3178983950945542, "learning_rate": 3.4154828141023316e-06, "loss": 0.6209, "step": 4665 }, { "epoch": 5.711138310893513, "grad_norm": 1.7210678312548422, "learning_rate": 3.4148864204846943e-06, "loss": 0.6536, "step": 4666 }, { "epoch": 5.712362301101591, "grad_norm": 1.0651184938867828, "learning_rate": 3.414289966744107e-06, "loss": 0.53, "step": 4667 }, { "epoch": 5.713586291309669, "grad_norm": 1.7110208110587926, "learning_rate": 3.4136934529197634e-06, "loss": 0.5429, "step": 4668 }, { "epoch": 5.714810281517748, "grad_norm": 1.655381376865821, "learning_rate": 3.4130968790508662e-06, "loss": 0.4472, "step": 4669 }, { "epoch": 5.716034271725826, "grad_norm": 1.1897691459913609, "learning_rate": 3.4125002451766194e-06, "loss": 0.6334, "step": 4670 }, { "epoch": 5.717258261933905, "grad_norm": 0.7636724248855992, "learning_rate": 3.411903551336232e-06, "loss": 0.4386, "step": 4671 }, { "epoch": 5.718482252141983, "grad_norm": 2.0847589220841662, "learning_rate": 3.411306797568915e-06, "loss": 0.557, "step": 4672 }, { "epoch": 5.719706242350061, "grad_norm": 1.4033157477760756, "learning_rate": 3.410709983913887e-06, "loss": 0.5595, "step": 4673 }, { "epoch": 5.720930232558139, "grad_norm": 1.8597527760853743, "learning_rate": 3.4101131104103664e-06, "loss": 1.0679, "step": 4674 }, { "epoch": 5.722154222766218, "grad_norm": 1.3787498956955657, "learning_rate": 3.4095161770975793e-06, "loss": 0.5715, "step": 4675 }, { "epoch": 5.723378212974296, "grad_norm": 1.3684012927345333, "learning_rate": 3.408919184014753e-06, "loss": 0.7072, "step": 4676 }, { "epoch": 5.724602203182375, "grad_norm": 1.6562503709412002, "learning_rate": 3.4083221312011195e-06, "loss": 0.682, "step": 4677 }, { "epoch": 5.725826193390453, "grad_norm": 1.463340150596859, "learning_rate": 3.4077250186959154e-06, "loss": 1.1864, "step": 4678 }, { "epoch": 5.727050183598531, "grad_norm": 1.6926720080950746, "learning_rate": 3.40712784653838e-06, "loss": 1.0315, "step": 4679 }, { "epoch": 5.72827417380661, "grad_norm": 1.4405817192828536, "learning_rate": 3.4065306147677585e-06, "loss": 0.4984, "step": 4680 }, { "epoch": 5.729498164014688, "grad_norm": 2.5677780088546895, "learning_rate": 3.4059333234232977e-06, "loss": 0.3911, "step": 4681 }, { "epoch": 5.730722154222766, "grad_norm": 2.193469501658793, "learning_rate": 3.40533597254425e-06, "loss": 0.5512, "step": 4682 }, { "epoch": 5.731946144430845, "grad_norm": 1.054097147279582, "learning_rate": 3.4047385621698696e-06, "loss": 0.7385, "step": 4683 }, { "epoch": 5.7331701346389226, "grad_norm": 1.2041869839717803, "learning_rate": 3.404141092339418e-06, "loss": 0.8034, "step": 4684 }, { "epoch": 5.734394124847001, "grad_norm": 1.1678821944741715, "learning_rate": 3.4035435630921587e-06, "loss": 0.5579, "step": 4685 }, { "epoch": 5.73561811505508, "grad_norm": 1.1511041760116236, "learning_rate": 3.402945974467358e-06, "loss": 0.3594, "step": 4686 }, { "epoch": 5.7368421052631575, "grad_norm": 1.3294651142489309, "learning_rate": 3.402348326504287e-06, "loss": 1.0031, "step": 4687 }, { "epoch": 5.738066095471236, "grad_norm": 1.223895806399522, "learning_rate": 3.401750619242222e-06, "loss": 0.4107, "step": 4688 }, { "epoch": 5.739290085679315, "grad_norm": 1.1826858359428178, "learning_rate": 3.401152852720441e-06, "loss": 1.252, "step": 4689 }, { "epoch": 5.740514075887393, "grad_norm": 1.7217059342451206, "learning_rate": 3.4005550269782297e-06, "loss": 1.14, "step": 4690 }, { "epoch": 5.741738066095471, "grad_norm": 2.2147249206878517, "learning_rate": 3.3999571420548715e-06, "loss": 0.2498, "step": 4691 }, { "epoch": 5.74296205630355, "grad_norm": 1.3405545892404183, "learning_rate": 3.3993591979896578e-06, "loss": 0.6869, "step": 4692 }, { "epoch": 5.7441860465116275, "grad_norm": 1.3885961932353337, "learning_rate": 3.398761194821886e-06, "loss": 0.6568, "step": 4693 }, { "epoch": 5.745410036719706, "grad_norm": 1.006290292257215, "learning_rate": 3.398163132590851e-06, "loss": 0.6168, "step": 4694 }, { "epoch": 5.746634026927785, "grad_norm": 1.655474878102463, "learning_rate": 3.3975650113358584e-06, "loss": 0.4252, "step": 4695 }, { "epoch": 5.747858017135863, "grad_norm": 1.114559452741794, "learning_rate": 3.3969668310962123e-06, "loss": 0.5799, "step": 4696 }, { "epoch": 5.749082007343941, "grad_norm": 1.835562648527684, "learning_rate": 3.396368591911224e-06, "loss": 0.4962, "step": 4697 }, { "epoch": 5.75030599755202, "grad_norm": 1.4550619060394339, "learning_rate": 3.3957702938202064e-06, "loss": 1.1312, "step": 4698 }, { "epoch": 5.751529987760098, "grad_norm": 1.399794925451687, "learning_rate": 3.39517193686248e-06, "loss": 0.4921, "step": 4699 }, { "epoch": 5.752753977968176, "grad_norm": 1.3425441513821108, "learning_rate": 3.3945735210773634e-06, "loss": 0.5276, "step": 4700 }, { "epoch": 5.753977968176255, "grad_norm": 1.6814308181579505, "learning_rate": 3.3939750465041844e-06, "loss": 0.5667, "step": 4701 }, { "epoch": 5.755201958384333, "grad_norm": 0.9799731042801744, "learning_rate": 3.393376513182273e-06, "loss": 0.5751, "step": 4702 }, { "epoch": 5.756425948592411, "grad_norm": 1.1311701212683511, "learning_rate": 3.3927779211509593e-06, "loss": 0.7161, "step": 4703 }, { "epoch": 5.75764993880049, "grad_norm": 0.937795753745444, "learning_rate": 3.392179270449584e-06, "loss": 0.6315, "step": 4704 }, { "epoch": 5.758873929008568, "grad_norm": 1.605199697768647, "learning_rate": 3.3915805611174872e-06, "loss": 0.6255, "step": 4705 }, { "epoch": 5.760097919216646, "grad_norm": 1.819755120798666, "learning_rate": 3.3909817931940135e-06, "loss": 0.4025, "step": 4706 }, { "epoch": 5.7613219094247246, "grad_norm": 1.7233900962708872, "learning_rate": 3.3903829667185114e-06, "loss": 0.9989, "step": 4707 }, { "epoch": 5.762545899632803, "grad_norm": 1.874320020158142, "learning_rate": 3.389784081730335e-06, "loss": 0.4815, "step": 4708 }, { "epoch": 5.763769889840881, "grad_norm": 1.4948401422322481, "learning_rate": 3.3891851382688383e-06, "loss": 1.3339, "step": 4709 }, { "epoch": 5.7649938800489595, "grad_norm": 1.1027776295305411, "learning_rate": 3.388586136373384e-06, "loss": 0.5302, "step": 4710 }, { "epoch": 5.766217870257038, "grad_norm": 1.6723292291859584, "learning_rate": 3.387987076083336e-06, "loss": 0.5435, "step": 4711 }, { "epoch": 5.767441860465116, "grad_norm": 1.6483645009063461, "learning_rate": 3.3873879574380615e-06, "loss": 1.167, "step": 4712 }, { "epoch": 5.7686658506731945, "grad_norm": 1.7467049923132079, "learning_rate": 3.3867887804769315e-06, "loss": 0.6351, "step": 4713 }, { "epoch": 5.769889840881273, "grad_norm": 1.0669768437911418, "learning_rate": 3.386189545239325e-06, "loss": 0.7506, "step": 4714 }, { "epoch": 5.771113831089352, "grad_norm": 2.005357207935525, "learning_rate": 3.3855902517646185e-06, "loss": 0.5641, "step": 4715 }, { "epoch": 5.7723378212974294, "grad_norm": 1.2044479059765083, "learning_rate": 3.3849909000921955e-06, "loss": 0.8958, "step": 4716 }, { "epoch": 5.773561811505508, "grad_norm": 1.8752794311572092, "learning_rate": 3.384391490261445e-06, "loss": 0.6233, "step": 4717 }, { "epoch": 5.774785801713586, "grad_norm": 1.534799460904561, "learning_rate": 3.3837920223117553e-06, "loss": 0.4714, "step": 4718 }, { "epoch": 5.776009791921664, "grad_norm": 1.2564495992480567, "learning_rate": 3.383192496282524e-06, "loss": 0.5146, "step": 4719 }, { "epoch": 5.777233782129743, "grad_norm": 2.4046941607296413, "learning_rate": 3.3825929122131484e-06, "loss": 0.494, "step": 4720 }, { "epoch": 5.778457772337822, "grad_norm": 0.6508267653326365, "learning_rate": 3.381993270143031e-06, "loss": 0.23, "step": 4721 }, { "epoch": 5.779681762545899, "grad_norm": 0.9355567998644042, "learning_rate": 3.3813935701115785e-06, "loss": 0.5759, "step": 4722 }, { "epoch": 5.780905752753978, "grad_norm": 1.3413944913774398, "learning_rate": 3.3807938121582002e-06, "loss": 0.6963, "step": 4723 }, { "epoch": 5.782129742962057, "grad_norm": 1.617985887241337, "learning_rate": 3.38019399632231e-06, "loss": 0.4439, "step": 4724 }, { "epoch": 5.783353733170134, "grad_norm": 1.0246479107500495, "learning_rate": 3.3795941226433265e-06, "loss": 0.424, "step": 4725 }, { "epoch": 5.784577723378213, "grad_norm": 1.1001922253770324, "learning_rate": 3.3789941911606707e-06, "loss": 0.3301, "step": 4726 }, { "epoch": 5.785801713586292, "grad_norm": 1.8860657180826064, "learning_rate": 3.378394201913768e-06, "loss": 1.0741, "step": 4727 }, { "epoch": 5.787025703794369, "grad_norm": 2.112050689122249, "learning_rate": 3.3777941549420466e-06, "loss": 0.7086, "step": 4728 }, { "epoch": 5.788249694002448, "grad_norm": 1.4431526108850723, "learning_rate": 3.377194050284941e-06, "loss": 1.0263, "step": 4729 }, { "epoch": 5.7894736842105265, "grad_norm": 1.1048324208513078, "learning_rate": 3.3765938879818865e-06, "loss": 0.4645, "step": 4730 }, { "epoch": 5.790697674418604, "grad_norm": 1.6978917551806825, "learning_rate": 3.3759936680723238e-06, "loss": 0.6393, "step": 4731 }, { "epoch": 5.791921664626683, "grad_norm": 1.8030446848071437, "learning_rate": 3.375393390595698e-06, "loss": 0.4515, "step": 4732 }, { "epoch": 5.7931456548347615, "grad_norm": 2.2061038025235593, "learning_rate": 3.374793055591455e-06, "loss": 0.4412, "step": 4733 }, { "epoch": 5.79436964504284, "grad_norm": 1.26932723582523, "learning_rate": 3.374192663099049e-06, "loss": 0.8314, "step": 4734 }, { "epoch": 5.795593635250918, "grad_norm": 1.2526051109845684, "learning_rate": 3.3735922131579346e-06, "loss": 0.6063, "step": 4735 }, { "epoch": 5.7968176254589965, "grad_norm": 1.1583103137090285, "learning_rate": 3.372991705807571e-06, "loss": 0.6459, "step": 4736 }, { "epoch": 5.798041615667074, "grad_norm": 0.9071214656185663, "learning_rate": 3.3723911410874215e-06, "loss": 0.4053, "step": 4737 }, { "epoch": 5.799265605875153, "grad_norm": 1.289727845311229, "learning_rate": 3.371790519036953e-06, "loss": 0.7286, "step": 4738 }, { "epoch": 5.8004895960832314, "grad_norm": 2.4306760695029586, "learning_rate": 3.3711898396956357e-06, "loss": 0.7795, "step": 4739 }, { "epoch": 5.80171358629131, "grad_norm": 1.6536154427922443, "learning_rate": 3.3705891031029454e-06, "loss": 1.1838, "step": 4740 }, { "epoch": 5.802937576499388, "grad_norm": 1.3944987042177612, "learning_rate": 3.3699883092983586e-06, "loss": 1.355, "step": 4741 }, { "epoch": 5.804161566707466, "grad_norm": 1.0704019704817036, "learning_rate": 3.3693874583213583e-06, "loss": 0.558, "step": 4742 }, { "epoch": 5.805385556915545, "grad_norm": 0.9670669082458496, "learning_rate": 3.368786550211429e-06, "loss": 0.5248, "step": 4743 }, { "epoch": 5.806609547123623, "grad_norm": 1.2832717294643772, "learning_rate": 3.3681855850080624e-06, "loss": 0.8455, "step": 4744 }, { "epoch": 5.807833537331701, "grad_norm": 1.4170675010619873, "learning_rate": 3.3675845627507497e-06, "loss": 0.4414, "step": 4745 }, { "epoch": 5.80905752753978, "grad_norm": 1.8026649235831094, "learning_rate": 3.3669834834789883e-06, "loss": 0.7087, "step": 4746 }, { "epoch": 5.810281517747858, "grad_norm": 1.2473497493158057, "learning_rate": 3.36638234723228e-06, "loss": 0.8896, "step": 4747 }, { "epoch": 5.811505507955936, "grad_norm": 1.4492645513702238, "learning_rate": 3.3657811540501266e-06, "loss": 1.117, "step": 4748 }, { "epoch": 5.812729498164015, "grad_norm": 1.7462706317995693, "learning_rate": 3.3651799039720402e-06, "loss": 0.5727, "step": 4749 }, { "epoch": 5.813953488372093, "grad_norm": 2.3965266258956226, "learning_rate": 3.3645785970375288e-06, "loss": 0.505, "step": 4750 }, { "epoch": 5.815177478580171, "grad_norm": 1.5802749973379855, "learning_rate": 3.363977233286111e-06, "loss": 0.5154, "step": 4751 }, { "epoch": 5.81640146878825, "grad_norm": 1.0419001467404796, "learning_rate": 3.363375812757305e-06, "loss": 0.6241, "step": 4752 }, { "epoch": 5.817625458996328, "grad_norm": 1.4216324596669725, "learning_rate": 3.362774335490633e-06, "loss": 1.4236, "step": 4753 }, { "epoch": 5.818849449204406, "grad_norm": 1.9718734130970683, "learning_rate": 3.362172801525624e-06, "loss": 0.5916, "step": 4754 }, { "epoch": 5.820073439412485, "grad_norm": 1.4076777735912656, "learning_rate": 3.361571210901807e-06, "loss": 0.4754, "step": 4755 }, { "epoch": 5.821297429620563, "grad_norm": 2.223873939331688, "learning_rate": 3.3609695636587162e-06, "loss": 0.6374, "step": 4756 }, { "epoch": 5.822521419828641, "grad_norm": 1.7048840538523715, "learning_rate": 3.3603678598358903e-06, "loss": 0.4058, "step": 4757 }, { "epoch": 5.82374541003672, "grad_norm": 1.5960632445842935, "learning_rate": 3.359766099472872e-06, "loss": 0.5354, "step": 4758 }, { "epoch": 5.8249694002447985, "grad_norm": 1.2041645383826338, "learning_rate": 3.3591642826092045e-06, "loss": 0.6388, "step": 4759 }, { "epoch": 5.826193390452876, "grad_norm": 1.8675812023442606, "learning_rate": 3.3585624092844386e-06, "loss": 0.5307, "step": 4760 }, { "epoch": 5.827417380660955, "grad_norm": 2.0466984037844513, "learning_rate": 3.357960479538127e-06, "loss": 0.3121, "step": 4761 }, { "epoch": 5.828641370869033, "grad_norm": 1.2850509807147308, "learning_rate": 3.357358493409826e-06, "loss": 1.3011, "step": 4762 }, { "epoch": 5.829865361077111, "grad_norm": 2.1222696427252017, "learning_rate": 3.3567564509390952e-06, "loss": 0.3855, "step": 4763 }, { "epoch": 5.83108935128519, "grad_norm": 1.0402785241550008, "learning_rate": 3.356154352165501e-06, "loss": 0.6002, "step": 4764 }, { "epoch": 5.832313341493268, "grad_norm": 1.6420742956173897, "learning_rate": 3.3555521971286082e-06, "loss": 0.5382, "step": 4765 }, { "epoch": 5.833537331701346, "grad_norm": 1.1345272892712217, "learning_rate": 3.3549499858679896e-06, "loss": 0.4553, "step": 4766 }, { "epoch": 5.834761321909425, "grad_norm": 1.2505532668733663, "learning_rate": 3.3543477184232208e-06, "loss": 0.6424, "step": 4767 }, { "epoch": 5.835985312117503, "grad_norm": 1.044059822245221, "learning_rate": 3.3537453948338793e-06, "loss": 0.5391, "step": 4768 }, { "epoch": 5.837209302325581, "grad_norm": 1.502179118890304, "learning_rate": 3.3531430151395483e-06, "loss": 0.4848, "step": 4769 }, { "epoch": 5.83843329253366, "grad_norm": 1.3236058369666388, "learning_rate": 3.3525405793798142e-06, "loss": 1.0939, "step": 4770 }, { "epoch": 5.839657282741738, "grad_norm": 1.0708368979179848, "learning_rate": 3.3519380875942663e-06, "loss": 0.8642, "step": 4771 }, { "epoch": 5.840881272949816, "grad_norm": 1.422177584055925, "learning_rate": 3.3513355398224985e-06, "loss": 0.55, "step": 4772 }, { "epoch": 5.842105263157895, "grad_norm": 1.5147739122249333, "learning_rate": 3.350732936104108e-06, "loss": 1.5179, "step": 4773 }, { "epoch": 5.843329253365973, "grad_norm": 1.0916718929203444, "learning_rate": 3.350130276478695e-06, "loss": 0.4118, "step": 4774 }, { "epoch": 5.844553243574051, "grad_norm": 1.1436822933028665, "learning_rate": 3.3495275609858653e-06, "loss": 0.9678, "step": 4775 }, { "epoch": 5.84577723378213, "grad_norm": 1.994620528801175, "learning_rate": 3.3489247896652265e-06, "loss": 1.0603, "step": 4776 }, { "epoch": 5.847001223990208, "grad_norm": 2.2830398121084854, "learning_rate": 3.34832196255639e-06, "loss": 0.4111, "step": 4777 }, { "epoch": 5.848225214198287, "grad_norm": 1.9855414359761454, "learning_rate": 3.3477190796989724e-06, "loss": 1.0512, "step": 4778 }, { "epoch": 5.849449204406365, "grad_norm": 1.051053101122059, "learning_rate": 3.347116141132592e-06, "loss": 0.4982, "step": 4779 }, { "epoch": 5.850673194614443, "grad_norm": 2.1695379138526576, "learning_rate": 3.3465131468968725e-06, "loss": 0.3987, "step": 4780 }, { "epoch": 5.851897184822521, "grad_norm": 0.6491680049106968, "learning_rate": 3.3459100970314394e-06, "loss": 0.2649, "step": 4781 }, { "epoch": 5.8531211750306, "grad_norm": 2.08306071318415, "learning_rate": 3.345306991575924e-06, "loss": 0.4668, "step": 4782 }, { "epoch": 5.854345165238678, "grad_norm": 1.4774349095876746, "learning_rate": 3.344703830569959e-06, "loss": 1.1575, "step": 4783 }, { "epoch": 5.855569155446757, "grad_norm": 1.8226289803551823, "learning_rate": 3.3441006140531833e-06, "loss": 0.9366, "step": 4784 }, { "epoch": 5.8567931456548346, "grad_norm": 1.5921354923097943, "learning_rate": 3.3434973420652374e-06, "loss": 0.7641, "step": 4785 }, { "epoch": 5.858017135862913, "grad_norm": 2.4448581852209497, "learning_rate": 3.3428940146457655e-06, "loss": 0.3528, "step": 4786 }, { "epoch": 5.859241126070992, "grad_norm": 1.5062296284080245, "learning_rate": 3.3422906318344174e-06, "loss": 1.1355, "step": 4787 }, { "epoch": 5.8604651162790695, "grad_norm": 2.3889292932573323, "learning_rate": 3.341687193670844e-06, "loss": 0.4435, "step": 4788 }, { "epoch": 5.861689106487148, "grad_norm": 1.1725768666417697, "learning_rate": 3.3410837001947005e-06, "loss": 0.2948, "step": 4789 }, { "epoch": 5.862913096695227, "grad_norm": 2.1597600679037243, "learning_rate": 3.3404801514456487e-06, "loss": 0.6345, "step": 4790 }, { "epoch": 5.8641370869033045, "grad_norm": 2.191558437015007, "learning_rate": 3.339876547463349e-06, "loss": 0.9573, "step": 4791 }, { "epoch": 5.865361077111383, "grad_norm": 1.4296110615214401, "learning_rate": 3.3392728882874693e-06, "loss": 0.6574, "step": 4792 }, { "epoch": 5.866585067319462, "grad_norm": 1.3774336177067132, "learning_rate": 3.33866917395768e-06, "loss": 0.5055, "step": 4793 }, { "epoch": 5.8678090575275395, "grad_norm": 1.51907597486823, "learning_rate": 3.3380654045136545e-06, "loss": 0.5175, "step": 4794 }, { "epoch": 5.869033047735618, "grad_norm": 2.69226392410121, "learning_rate": 3.3374615799950705e-06, "loss": 0.473, "step": 4795 }, { "epoch": 5.870257037943697, "grad_norm": 2.7343609671756055, "learning_rate": 3.3368577004416087e-06, "loss": 0.4069, "step": 4796 }, { "epoch": 5.871481028151774, "grad_norm": 1.1948536082153638, "learning_rate": 3.3362537658929544e-06, "loss": 0.8166, "step": 4797 }, { "epoch": 5.872705018359853, "grad_norm": 1.648627077244817, "learning_rate": 3.3356497763887953e-06, "loss": 1.3205, "step": 4798 }, { "epoch": 5.873929008567932, "grad_norm": 1.5330271205918367, "learning_rate": 3.3350457319688247e-06, "loss": 1.4144, "step": 4799 }, { "epoch": 5.875152998776009, "grad_norm": 2.2029791979788356, "learning_rate": 3.3344416326727356e-06, "loss": 0.9373, "step": 4800 }, { "epoch": 5.876376988984088, "grad_norm": 0.9937856804874036, "learning_rate": 3.33383747854023e-06, "loss": 0.4005, "step": 4801 }, { "epoch": 5.877600979192167, "grad_norm": 1.6279576069493515, "learning_rate": 3.3332332696110093e-06, "loss": 0.7163, "step": 4802 }, { "epoch": 5.878824969400245, "grad_norm": 1.7994527430915137, "learning_rate": 3.33262900592478e-06, "loss": 1.0051, "step": 4803 }, { "epoch": 5.880048959608323, "grad_norm": 1.9282381378389652, "learning_rate": 3.332024687521252e-06, "loss": 0.5695, "step": 4804 }, { "epoch": 5.881272949816402, "grad_norm": 0.8705454495556584, "learning_rate": 3.3314203144401393e-06, "loss": 0.6115, "step": 4805 }, { "epoch": 5.882496940024479, "grad_norm": 1.2747471371300139, "learning_rate": 3.330815886721158e-06, "loss": 0.7341, "step": 4806 }, { "epoch": 5.883720930232558, "grad_norm": 1.1914805131666637, "learning_rate": 3.3302114044040302e-06, "loss": 1.0482, "step": 4807 }, { "epoch": 5.8849449204406366, "grad_norm": 2.1334463695290706, "learning_rate": 3.32960686752848e-06, "loss": 0.8339, "step": 4808 }, { "epoch": 5.886168910648715, "grad_norm": 1.242129280596423, "learning_rate": 3.329002276134234e-06, "loss": 0.5023, "step": 4809 }, { "epoch": 5.887392900856793, "grad_norm": 1.290554537585815, "learning_rate": 3.328397630261025e-06, "loss": 1.2332, "step": 4810 }, { "epoch": 5.8886168910648715, "grad_norm": 1.4237047347952605, "learning_rate": 3.3277929299485878e-06, "loss": 0.4477, "step": 4811 }, { "epoch": 5.88984088127295, "grad_norm": 1.4926738762301228, "learning_rate": 3.3271881752366615e-06, "loss": 0.5606, "step": 4812 }, { "epoch": 5.891064871481028, "grad_norm": 1.5377588137706495, "learning_rate": 3.3265833661649873e-06, "loss": 0.6141, "step": 4813 }, { "epoch": 5.8922888616891065, "grad_norm": 2.132515545635948, "learning_rate": 3.325978502773312e-06, "loss": 0.5254, "step": 4814 }, { "epoch": 5.893512851897185, "grad_norm": 1.6499524698659518, "learning_rate": 3.325373585101384e-06, "loss": 1.2327, "step": 4815 }, { "epoch": 5.894736842105263, "grad_norm": 1.48053152388336, "learning_rate": 3.3247686131889573e-06, "loss": 1.0495, "step": 4816 }, { "epoch": 5.8959608323133414, "grad_norm": 1.3174300776954628, "learning_rate": 3.3241635870757887e-06, "loss": 0.5192, "step": 4817 }, { "epoch": 5.89718482252142, "grad_norm": 2.333996629859323, "learning_rate": 3.323558506801636e-06, "loss": 0.4911, "step": 4818 }, { "epoch": 5.898408812729498, "grad_norm": 1.2606485231361895, "learning_rate": 3.322953372406266e-06, "loss": 1.5802, "step": 4819 }, { "epoch": 5.899632802937576, "grad_norm": 0.9008448775214074, "learning_rate": 3.3223481839294424e-06, "loss": 0.326, "step": 4820 }, { "epoch": 5.900856793145655, "grad_norm": 2.543990490691916, "learning_rate": 3.3217429414109394e-06, "loss": 0.729, "step": 4821 }, { "epoch": 5.902080783353734, "grad_norm": 1.5209050747397532, "learning_rate": 3.3211376448905285e-06, "loss": 1.239, "step": 4822 }, { "epoch": 5.903304773561811, "grad_norm": 1.3616549720224305, "learning_rate": 3.3205322944079898e-06, "loss": 0.4675, "step": 4823 }, { "epoch": 5.90452876376989, "grad_norm": 1.3213013365423099, "learning_rate": 3.3199268900031028e-06, "loss": 0.5783, "step": 4824 }, { "epoch": 5.905752753977968, "grad_norm": 1.163159967187124, "learning_rate": 3.3193214317156535e-06, "loss": 0.6858, "step": 4825 }, { "epoch": 5.906976744186046, "grad_norm": 1.5966928058121546, "learning_rate": 3.3187159195854306e-06, "loss": 0.5125, "step": 4826 }, { "epoch": 5.908200734394125, "grad_norm": 1.8958917911199513, "learning_rate": 3.3181103536522253e-06, "loss": 0.4091, "step": 4827 }, { "epoch": 5.909424724602204, "grad_norm": 2.256169435761508, "learning_rate": 3.317504733955834e-06, "loss": 0.3948, "step": 4828 }, { "epoch": 5.910648714810281, "grad_norm": 1.0111287188167812, "learning_rate": 3.3168990605360553e-06, "loss": 1.2171, "step": 4829 }, { "epoch": 5.91187270501836, "grad_norm": 1.7106187783787372, "learning_rate": 3.3162933334326913e-06, "loss": 1.3457, "step": 4830 }, { "epoch": 5.9130966952264385, "grad_norm": 1.341541157659123, "learning_rate": 3.3156875526855483e-06, "loss": 1.295, "step": 4831 }, { "epoch": 5.914320685434516, "grad_norm": 2.2892187552024046, "learning_rate": 3.3150817183344376e-06, "loss": 0.5146, "step": 4832 }, { "epoch": 5.915544675642595, "grad_norm": 1.8745748454137505, "learning_rate": 3.3144758304191703e-06, "loss": 0.6372, "step": 4833 }, { "epoch": 5.9167686658506735, "grad_norm": 1.2921197420438952, "learning_rate": 3.313869888979564e-06, "loss": 1.4256, "step": 4834 }, { "epoch": 5.917992656058751, "grad_norm": 1.3197823605833092, "learning_rate": 3.313263894055439e-06, "loss": 1.2815, "step": 4835 }, { "epoch": 5.91921664626683, "grad_norm": 2.61599641710621, "learning_rate": 3.3126578456866192e-06, "loss": 0.5663, "step": 4836 }, { "epoch": 5.9204406364749085, "grad_norm": 1.8379995876734398, "learning_rate": 3.3120517439129317e-06, "loss": 0.7152, "step": 4837 }, { "epoch": 5.921664626682986, "grad_norm": 2.4503407026507724, "learning_rate": 3.311445588774207e-06, "loss": 0.5742, "step": 4838 }, { "epoch": 5.922888616891065, "grad_norm": 1.3087391985859644, "learning_rate": 3.310839380310279e-06, "loss": 0.7184, "step": 4839 }, { "epoch": 5.9241126070991434, "grad_norm": 1.9315663955820122, "learning_rate": 3.3102331185609867e-06, "loss": 0.7089, "step": 4840 }, { "epoch": 5.925336597307221, "grad_norm": 0.9673144217340133, "learning_rate": 3.30962680356617e-06, "loss": 0.5013, "step": 4841 }, { "epoch": 5.9265605875153, "grad_norm": 0.7652916638119623, "learning_rate": 3.309020435365675e-06, "loss": 0.4246, "step": 4842 }, { "epoch": 5.927784577723378, "grad_norm": 0.8775878634316255, "learning_rate": 3.3084140139993492e-06, "loss": 0.5276, "step": 4843 }, { "epoch": 5.929008567931456, "grad_norm": 1.2345580065459838, "learning_rate": 3.3078075395070443e-06, "loss": 0.7404, "step": 4844 }, { "epoch": 5.930232558139535, "grad_norm": 1.5242693477628029, "learning_rate": 3.3072010119286156e-06, "loss": 0.5551, "step": 4845 }, { "epoch": 5.931456548347613, "grad_norm": 1.7003365055637463, "learning_rate": 3.3065944313039227e-06, "loss": 0.5335, "step": 4846 }, { "epoch": 5.932680538555692, "grad_norm": 0.8641866564496203, "learning_rate": 3.3059877976728274e-06, "loss": 0.4113, "step": 4847 }, { "epoch": 5.93390452876377, "grad_norm": 1.7729617044578796, "learning_rate": 3.3053811110751936e-06, "loss": 0.5822, "step": 4848 }, { "epoch": 5.935128518971848, "grad_norm": 1.5295383567163163, "learning_rate": 3.304774371550894e-06, "loss": 1.0344, "step": 4849 }, { "epoch": 5.936352509179926, "grad_norm": 1.3516754163769535, "learning_rate": 3.3041675791397976e-06, "loss": 1.5927, "step": 4850 }, { "epoch": 5.937576499388005, "grad_norm": 2.459028388613073, "learning_rate": 3.303560733881783e-06, "loss": 1.0926, "step": 4851 }, { "epoch": 5.938800489596083, "grad_norm": 1.7193341220818141, "learning_rate": 3.3029538358167294e-06, "loss": 0.5485, "step": 4852 }, { "epoch": 5.940024479804162, "grad_norm": 0.8958037890089586, "learning_rate": 3.30234688498452e-06, "loss": 0.4882, "step": 4853 }, { "epoch": 5.94124847001224, "grad_norm": 1.309529161109099, "learning_rate": 3.30173988142504e-06, "loss": 0.5173, "step": 4854 }, { "epoch": 5.942472460220318, "grad_norm": 0.8803314992951057, "learning_rate": 3.3011328251781814e-06, "loss": 0.5988, "step": 4855 }, { "epoch": 5.943696450428397, "grad_norm": 2.810074645130276, "learning_rate": 3.300525716283836e-06, "loss": 0.5359, "step": 4856 }, { "epoch": 5.944920440636475, "grad_norm": 1.6428226581465646, "learning_rate": 3.2999185547819013e-06, "loss": 0.8818, "step": 4857 }, { "epoch": 5.946144430844553, "grad_norm": 0.9244308930211322, "learning_rate": 3.299311340712279e-06, "loss": 0.6382, "step": 4858 }, { "epoch": 5.947368421052632, "grad_norm": 1.187069018469516, "learning_rate": 3.298704074114871e-06, "loss": 0.6926, "step": 4859 }, { "epoch": 5.94859241126071, "grad_norm": 1.9975041876810142, "learning_rate": 3.298096755029585e-06, "loss": 0.6424, "step": 4860 }, { "epoch": 5.949816401468788, "grad_norm": 1.276929778727759, "learning_rate": 3.297489383496333e-06, "loss": 0.6188, "step": 4861 }, { "epoch": 5.951040391676867, "grad_norm": 1.7893598377074926, "learning_rate": 3.296881959555028e-06, "loss": 0.5154, "step": 4862 }, { "epoch": 5.9522643818849446, "grad_norm": 1.1707556100767622, "learning_rate": 3.2962744832455883e-06, "loss": 0.583, "step": 4863 }, { "epoch": 5.953488372093023, "grad_norm": 1.8480923413331225, "learning_rate": 3.295666954607935e-06, "loss": 0.554, "step": 4864 }, { "epoch": 5.954712362301102, "grad_norm": 1.4294671285819864, "learning_rate": 3.295059373681992e-06, "loss": 0.4888, "step": 4865 }, { "epoch": 5.95593635250918, "grad_norm": 2.285455436054477, "learning_rate": 3.294451740507688e-06, "loss": 0.3937, "step": 4866 }, { "epoch": 5.957160342717258, "grad_norm": 1.0299741198821855, "learning_rate": 3.2938440551249547e-06, "loss": 0.7802, "step": 4867 }, { "epoch": 5.958384332925337, "grad_norm": 1.5955510242054471, "learning_rate": 3.2932363175737246e-06, "loss": 1.0805, "step": 4868 }, { "epoch": 5.9596083231334145, "grad_norm": 1.8086153686610227, "learning_rate": 3.292628527893939e-06, "loss": 0.5216, "step": 4869 }, { "epoch": 5.960832313341493, "grad_norm": 1.8189025774809742, "learning_rate": 3.2920206861255378e-06, "loss": 0.5288, "step": 4870 }, { "epoch": 5.962056303549572, "grad_norm": 1.4376066787069752, "learning_rate": 3.291412792308467e-06, "loss": 0.6394, "step": 4871 }, { "epoch": 5.96328029375765, "grad_norm": 1.472845613801114, "learning_rate": 3.2908048464826736e-06, "loss": 0.49, "step": 4872 }, { "epoch": 5.964504283965728, "grad_norm": 0.9897971220462906, "learning_rate": 3.2901968486881125e-06, "loss": 0.5913, "step": 4873 }, { "epoch": 5.965728274173807, "grad_norm": 2.4363514916256808, "learning_rate": 3.289588798964736e-06, "loss": 0.5097, "step": 4874 }, { "epoch": 5.966952264381885, "grad_norm": 1.1798065072179091, "learning_rate": 3.288980697352504e-06, "loss": 0.6467, "step": 4875 }, { "epoch": 5.968176254589963, "grad_norm": 1.8175355720864539, "learning_rate": 3.2883725438913795e-06, "loss": 1.048, "step": 4876 }, { "epoch": 5.969400244798042, "grad_norm": 1.3088067201218474, "learning_rate": 3.287764338621327e-06, "loss": 2.3056, "step": 4877 }, { "epoch": 5.97062423500612, "grad_norm": 1.205016109987429, "learning_rate": 3.287156081582316e-06, "loss": 0.584, "step": 4878 }, { "epoch": 5.971848225214198, "grad_norm": 0.9489339373412814, "learning_rate": 3.286547772814319e-06, "loss": 0.5198, "step": 4879 }, { "epoch": 5.973072215422277, "grad_norm": 0.9223731177069728, "learning_rate": 3.285939412357311e-06, "loss": 0.5933, "step": 4880 }, { "epoch": 5.974296205630355, "grad_norm": 1.6269001679344737, "learning_rate": 3.285331000251272e-06, "loss": 0.3244, "step": 4881 }, { "epoch": 5.975520195838433, "grad_norm": 1.6156264376646563, "learning_rate": 3.284722536536185e-06, "loss": 0.4724, "step": 4882 }, { "epoch": 5.976744186046512, "grad_norm": 1.5564257299737407, "learning_rate": 3.284114021252035e-06, "loss": 0.5883, "step": 4883 }, { "epoch": 5.97796817625459, "grad_norm": 1.3290169614120093, "learning_rate": 3.283505454438812e-06, "loss": 0.5362, "step": 4884 }, { "epoch": 5.979192166462668, "grad_norm": 1.9690085907070949, "learning_rate": 3.2828968361365077e-06, "loss": 0.4594, "step": 4885 }, { "epoch": 5.9804161566707466, "grad_norm": 2.1519447675818033, "learning_rate": 3.2822881663851204e-06, "loss": 0.4781, "step": 4886 }, { "epoch": 5.981640146878825, "grad_norm": 2.4303948385563223, "learning_rate": 3.281679445224647e-06, "loss": 0.3638, "step": 4887 }, { "epoch": 5.982864137086903, "grad_norm": 1.2375575789094297, "learning_rate": 3.2810706726950923e-06, "loss": 0.5767, "step": 4888 }, { "epoch": 5.9840881272949815, "grad_norm": 1.300183353276447, "learning_rate": 3.2804618488364614e-06, "loss": 0.6508, "step": 4889 }, { "epoch": 5.98531211750306, "grad_norm": 1.1833098068489214, "learning_rate": 3.279852973688766e-06, "loss": 0.3755, "step": 4890 }, { "epoch": 5.986536107711139, "grad_norm": 1.1326039813422901, "learning_rate": 3.279244047292016e-06, "loss": 0.5112, "step": 4891 }, { "epoch": 5.9877600979192165, "grad_norm": 1.7981726625525905, "learning_rate": 3.27863506968623e-06, "loss": 1.2169, "step": 4892 }, { "epoch": 5.988984088127295, "grad_norm": 1.2042911967653704, "learning_rate": 3.278026040911427e-06, "loss": 0.7711, "step": 4893 }, { "epoch": 5.990208078335373, "grad_norm": 1.5237531444364614, "learning_rate": 3.277416961007631e-06, "loss": 0.4894, "step": 4894 }, { "epoch": 5.9914320685434515, "grad_norm": 1.6100376862468269, "learning_rate": 3.2768078300148675e-06, "loss": 0.9545, "step": 4895 }, { "epoch": 5.99265605875153, "grad_norm": 1.3857396163971971, "learning_rate": 3.2761986479731667e-06, "loss": 0.4589, "step": 4896 }, { "epoch": 5.993880048959609, "grad_norm": 1.1983890025161505, "learning_rate": 3.2755894149225616e-06, "loss": 0.6407, "step": 4897 }, { "epoch": 5.995104039167686, "grad_norm": 2.1113370217256846, "learning_rate": 3.274980130903088e-06, "loss": 0.6404, "step": 4898 }, { "epoch": 5.996328029375765, "grad_norm": 1.2567289334505396, "learning_rate": 3.2743707959547882e-06, "loss": 1.333, "step": 4899 }, { "epoch": 5.997552019583844, "grad_norm": 2.128144293218338, "learning_rate": 3.2737614101177027e-06, "loss": 0.459, "step": 4900 }, { "epoch": 5.998776009791921, "grad_norm": 2.1513753247980905, "learning_rate": 3.27315197343188e-06, "loss": 0.7201, "step": 4901 }, { "epoch": 6.0, "grad_norm": 1.7001479325391273, "learning_rate": 3.272542485937369e-06, "loss": 0.6353, "step": 4902 }, { "epoch": 6.001223990208079, "grad_norm": 1.4495315883386672, "learning_rate": 3.2719329476742236e-06, "loss": 0.5061, "step": 4903 }, { "epoch": 6.002447980416156, "grad_norm": 2.2943541500653906, "learning_rate": 3.2713233586824986e-06, "loss": 0.4978, "step": 4904 }, { "epoch": 6.003671970624235, "grad_norm": 1.2833535328103756, "learning_rate": 3.2707137190022575e-06, "loss": 0.4951, "step": 4905 }, { "epoch": 6.004895960832314, "grad_norm": 1.5240816747144867, "learning_rate": 3.2701040286735606e-06, "loss": 1.3267, "step": 4906 }, { "epoch": 6.006119951040391, "grad_norm": 1.6996284860277722, "learning_rate": 3.2694942877364743e-06, "loss": 0.4353, "step": 4907 }, { "epoch": 6.00734394124847, "grad_norm": 1.8333687270234134, "learning_rate": 3.2688844962310717e-06, "loss": 0.5468, "step": 4908 }, { "epoch": 6.0085679314565485, "grad_norm": 1.9538509524087488, "learning_rate": 3.268274654197422e-06, "loss": 0.8928, "step": 4909 }, { "epoch": 6.009791921664626, "grad_norm": 0.8095711072335581, "learning_rate": 3.267664761675604e-06, "loss": 0.5344, "step": 4910 }, { "epoch": 6.011015911872705, "grad_norm": 1.5806493481615544, "learning_rate": 3.267054818705698e-06, "loss": 0.7479, "step": 4911 }, { "epoch": 6.0122399020807835, "grad_norm": 2.074616607001054, "learning_rate": 3.2664448253277866e-06, "loss": 1.0104, "step": 4912 }, { "epoch": 6.013463892288861, "grad_norm": 1.474120598728423, "learning_rate": 3.265834781581955e-06, "loss": 0.552, "step": 4913 }, { "epoch": 6.01468788249694, "grad_norm": 0.9782131926510688, "learning_rate": 3.2652246875082955e-06, "loss": 0.5572, "step": 4914 }, { "epoch": 6.0159118727050185, "grad_norm": 1.1508011283768056, "learning_rate": 3.2646145431468988e-06, "loss": 0.5577, "step": 4915 }, { "epoch": 6.017135862913097, "grad_norm": 2.0361322162003086, "learning_rate": 3.2640043485378626e-06, "loss": 0.4324, "step": 4916 }, { "epoch": 6.018359853121175, "grad_norm": 0.8809634951167585, "learning_rate": 3.263394103721287e-06, "loss": 0.5229, "step": 4917 }, { "epoch": 6.0195838433292534, "grad_norm": 1.738804833826841, "learning_rate": 3.2627838087372745e-06, "loss": 1.225, "step": 4918 }, { "epoch": 6.020807833537332, "grad_norm": 1.3361086598061538, "learning_rate": 3.262173463625931e-06, "loss": 1.0309, "step": 4919 }, { "epoch": 6.02203182374541, "grad_norm": 2.1176828364042173, "learning_rate": 3.2615630684273668e-06, "loss": 0.6929, "step": 4920 }, { "epoch": 6.023255813953488, "grad_norm": 2.089235202496076, "learning_rate": 3.2609526231816945e-06, "loss": 0.4592, "step": 4921 }, { "epoch": 6.024479804161567, "grad_norm": 1.5026504405635486, "learning_rate": 3.2603421279290304e-06, "loss": 1.2088, "step": 4922 }, { "epoch": 6.025703794369645, "grad_norm": 2.0277189919358194, "learning_rate": 3.2597315827094946e-06, "loss": 0.6096, "step": 4923 }, { "epoch": 6.026927784577723, "grad_norm": 1.5170366602052028, "learning_rate": 3.2591209875632077e-06, "loss": 0.6619, "step": 4924 }, { "epoch": 6.028151774785802, "grad_norm": 0.7738009109717333, "learning_rate": 3.2585103425302984e-06, "loss": 0.421, "step": 4925 }, { "epoch": 6.02937576499388, "grad_norm": 2.028753672367803, "learning_rate": 3.257899647650895e-06, "loss": 1.3065, "step": 4926 }, { "epoch": 6.030599755201958, "grad_norm": 0.671258128027905, "learning_rate": 3.2572889029651296e-06, "loss": 0.3605, "step": 4927 }, { "epoch": 6.031823745410037, "grad_norm": 1.4828326507250587, "learning_rate": 3.256678108513139e-06, "loss": 0.5471, "step": 4928 }, { "epoch": 6.033047735618115, "grad_norm": 1.6933048373114348, "learning_rate": 3.256067264335061e-06, "loss": 0.8718, "step": 4929 }, { "epoch": 6.034271725826193, "grad_norm": 1.4982681246173863, "learning_rate": 3.2554563704710396e-06, "loss": 1.1907, "step": 4930 }, { "epoch": 6.035495716034272, "grad_norm": 1.7484534123447877, "learning_rate": 3.2548454269612193e-06, "loss": 0.5957, "step": 4931 }, { "epoch": 6.03671970624235, "grad_norm": 1.5418366902327179, "learning_rate": 3.2542344338457505e-06, "loss": 1.9031, "step": 4932 }, { "epoch": 6.037943696450428, "grad_norm": 2.7853430097602665, "learning_rate": 3.253623391164783e-06, "loss": 0.3078, "step": 4933 }, { "epoch": 6.039167686658507, "grad_norm": 1.6139949514992882, "learning_rate": 3.2530122989584745e-06, "loss": 0.405, "step": 4934 }, { "epoch": 6.0403916768665855, "grad_norm": 1.4379171977321241, "learning_rate": 3.2524011572669834e-06, "loss": 0.6756, "step": 4935 }, { "epoch": 6.041615667074663, "grad_norm": 1.0673365253916953, "learning_rate": 3.2517899661304704e-06, "loss": 0.7865, "step": 4936 }, { "epoch": 6.042839657282742, "grad_norm": 1.1080901689459726, "learning_rate": 3.2511787255891018e-06, "loss": 0.7534, "step": 4937 }, { "epoch": 6.0440636474908205, "grad_norm": 1.5076793890919469, "learning_rate": 3.2505674356830457e-06, "loss": 1.3819, "step": 4938 }, { "epoch": 6.045287637698898, "grad_norm": 2.616440640589218, "learning_rate": 3.2499560964524733e-06, "loss": 0.4782, "step": 4939 }, { "epoch": 6.046511627906977, "grad_norm": 1.8296444145564486, "learning_rate": 3.2493447079375605e-06, "loss": 0.6067, "step": 4940 }, { "epoch": 6.0477356181150554, "grad_norm": 2.0232005552726817, "learning_rate": 3.2487332701784856e-06, "loss": 0.5672, "step": 4941 }, { "epoch": 6.048959608323133, "grad_norm": 1.6280212747305132, "learning_rate": 3.24812178321543e-06, "loss": 0.6467, "step": 4942 }, { "epoch": 6.050183598531212, "grad_norm": 2.8111256772611104, "learning_rate": 3.2475102470885766e-06, "loss": 0.4674, "step": 4943 }, { "epoch": 6.05140758873929, "grad_norm": 0.8740828219204546, "learning_rate": 3.246898661838116e-06, "loss": 0.4091, "step": 4944 }, { "epoch": 6.052631578947368, "grad_norm": 1.4716553680920659, "learning_rate": 3.246287027504237e-06, "loss": 0.6299, "step": 4945 }, { "epoch": 6.053855569155447, "grad_norm": 2.6834229872485795, "learning_rate": 3.245675344127135e-06, "loss": 0.39, "step": 4946 }, { "epoch": 6.055079559363525, "grad_norm": 1.7413162225726788, "learning_rate": 3.2450636117470076e-06, "loss": 0.5264, "step": 4947 }, { "epoch": 6.056303549571603, "grad_norm": 1.259915324543423, "learning_rate": 3.244451830404055e-06, "loss": 1.4256, "step": 4948 }, { "epoch": 6.057527539779682, "grad_norm": 0.991520956827504, "learning_rate": 3.2438400001384823e-06, "loss": 0.9038, "step": 4949 }, { "epoch": 6.05875152998776, "grad_norm": 1.5033564267154547, "learning_rate": 3.2432281209904954e-06, "loss": 1.1085, "step": 4950 }, { "epoch": 6.059975520195838, "grad_norm": 1.2076764629437715, "learning_rate": 3.2426161930003056e-06, "loss": 0.3408, "step": 4951 }, { "epoch": 6.061199510403917, "grad_norm": 2.058934797260067, "learning_rate": 3.2420042162081267e-06, "loss": 0.6988, "step": 4952 }, { "epoch": 6.062423500611995, "grad_norm": 1.6874285590223277, "learning_rate": 3.2413921906541752e-06, "loss": 0.7552, "step": 4953 }, { "epoch": 6.063647490820073, "grad_norm": 1.3603242508113675, "learning_rate": 3.24078011637867e-06, "loss": 0.621, "step": 4954 }, { "epoch": 6.064871481028152, "grad_norm": 2.19946195533844, "learning_rate": 3.2401679934218374e-06, "loss": 0.5296, "step": 4955 }, { "epoch": 6.06609547123623, "grad_norm": 1.4069039600713957, "learning_rate": 3.239555821823901e-06, "loss": 0.5391, "step": 4956 }, { "epoch": 6.067319461444308, "grad_norm": 1.709497319656516, "learning_rate": 3.238943601625091e-06, "loss": 0.982, "step": 4957 }, { "epoch": 6.068543451652387, "grad_norm": 1.351835047675315, "learning_rate": 3.238331332865642e-06, "loss": 0.4772, "step": 4958 }, { "epoch": 6.069767441860465, "grad_norm": 1.6287000612639446, "learning_rate": 3.237719015585787e-06, "loss": 0.5716, "step": 4959 }, { "epoch": 6.070991432068544, "grad_norm": 0.7934925720420954, "learning_rate": 3.2371066498257675e-06, "loss": 0.475, "step": 4960 }, { "epoch": 6.072215422276622, "grad_norm": 1.1572582387776182, "learning_rate": 3.2364942356258254e-06, "loss": 0.4495, "step": 4961 }, { "epoch": 6.0734394124847, "grad_norm": 1.2964454007668307, "learning_rate": 3.2358817730262073e-06, "loss": 0.4238, "step": 4962 }, { "epoch": 6.074663402692779, "grad_norm": 1.0283760919459746, "learning_rate": 3.235269262067159e-06, "loss": 0.5701, "step": 4963 }, { "epoch": 6.0758873929008566, "grad_norm": 1.657870920258874, "learning_rate": 3.2346567027889358e-06, "loss": 1.4118, "step": 4964 }, { "epoch": 6.077111383108935, "grad_norm": 2.2402070919410324, "learning_rate": 3.234044095231791e-06, "loss": 0.5643, "step": 4965 }, { "epoch": 6.078335373317014, "grad_norm": 1.9195826722818026, "learning_rate": 3.2334314394359827e-06, "loss": 1.1061, "step": 4966 }, { "epoch": 6.0795593635250915, "grad_norm": 2.032270843958977, "learning_rate": 3.232818735441774e-06, "loss": 0.4939, "step": 4967 }, { "epoch": 6.08078335373317, "grad_norm": 1.202929918923349, "learning_rate": 3.232205983289428e-06, "loss": 0.6086, "step": 4968 }, { "epoch": 6.082007343941249, "grad_norm": 1.1410996729210305, "learning_rate": 3.2315931830192133e-06, "loss": 0.352, "step": 4969 }, { "epoch": 6.0832313341493265, "grad_norm": 1.2886792928236062, "learning_rate": 3.2309803346714003e-06, "loss": 0.3108, "step": 4970 }, { "epoch": 6.084455324357405, "grad_norm": 1.36977712148266, "learning_rate": 3.230367438286264e-06, "loss": 0.5209, "step": 4971 }, { "epoch": 6.085679314565484, "grad_norm": 1.8699022335926228, "learning_rate": 3.2297544939040797e-06, "loss": 0.575, "step": 4972 }, { "epoch": 6.0869033047735615, "grad_norm": 1.6695638185273625, "learning_rate": 3.2291415015651304e-06, "loss": 0.5447, "step": 4973 }, { "epoch": 6.08812729498164, "grad_norm": 0.8839407383922806, "learning_rate": 3.228528461309698e-06, "loss": 0.5216, "step": 4974 }, { "epoch": 6.089351285189719, "grad_norm": 2.220876922258344, "learning_rate": 3.22791537317807e-06, "loss": 0.3956, "step": 4975 }, { "epoch": 6.090575275397796, "grad_norm": 1.8441206543444824, "learning_rate": 3.2273022372105364e-06, "loss": 0.8481, "step": 4976 }, { "epoch": 6.091799265605875, "grad_norm": 2.363831458493239, "learning_rate": 3.226689053447389e-06, "loss": 0.4513, "step": 4977 }, { "epoch": 6.093023255813954, "grad_norm": 2.1322794472475297, "learning_rate": 3.2260758219289255e-06, "loss": 0.303, "step": 4978 }, { "epoch": 6.094247246022032, "grad_norm": 1.3042766085138409, "learning_rate": 3.225462542695445e-06, "loss": 0.7293, "step": 4979 }, { "epoch": 6.09547123623011, "grad_norm": 2.308097449716267, "learning_rate": 3.2248492157872484e-06, "loss": 0.3948, "step": 4980 }, { "epoch": 6.096695226438189, "grad_norm": 1.5837988458937426, "learning_rate": 3.224235841244643e-06, "loss": 0.3831, "step": 4981 }, { "epoch": 6.097919216646267, "grad_norm": 1.7020958565574291, "learning_rate": 3.2236224191079375e-06, "loss": 0.3368, "step": 4982 }, { "epoch": 6.099143206854345, "grad_norm": 1.5518017651156921, "learning_rate": 3.2230089494174416e-06, "loss": 0.9738, "step": 4983 }, { "epoch": 6.100367197062424, "grad_norm": 0.7805946825944557, "learning_rate": 3.2223954322134724e-06, "loss": 0.4486, "step": 4984 }, { "epoch": 6.101591187270502, "grad_norm": 1.5651929669437348, "learning_rate": 3.221781867536348e-06, "loss": 0.5452, "step": 4985 }, { "epoch": 6.10281517747858, "grad_norm": 0.9582848286462313, "learning_rate": 3.2211682554263882e-06, "loss": 0.5803, "step": 4986 }, { "epoch": 6.1040391676866586, "grad_norm": 1.3592667149010977, "learning_rate": 3.220554595923919e-06, "loss": 0.5272, "step": 4987 }, { "epoch": 6.105263157894737, "grad_norm": 3.009922420339776, "learning_rate": 3.2199408890692656e-06, "loss": 0.7927, "step": 4988 }, { "epoch": 6.106487148102815, "grad_norm": 2.9410663043049863, "learning_rate": 3.21932713490276e-06, "loss": 0.5146, "step": 4989 }, { "epoch": 6.1077111383108935, "grad_norm": 1.2589892844182529, "learning_rate": 3.2187133334647362e-06, "loss": 0.4031, "step": 4990 }, { "epoch": 6.108935128518972, "grad_norm": 1.3312796355034278, "learning_rate": 3.2180994847955304e-06, "loss": 0.5009, "step": 4991 }, { "epoch": 6.11015911872705, "grad_norm": 1.51791840909122, "learning_rate": 3.2174855889354826e-06, "loss": 0.5471, "step": 4992 }, { "epoch": 6.1113831089351285, "grad_norm": 1.2700164288612994, "learning_rate": 3.216871645924936e-06, "loss": 0.4816, "step": 4993 }, { "epoch": 6.112607099143207, "grad_norm": 1.0909084457668567, "learning_rate": 3.2162576558042364e-06, "loss": 0.7275, "step": 4994 }, { "epoch": 6.113831089351285, "grad_norm": 2.1193645569908286, "learning_rate": 3.2156436186137323e-06, "loss": 0.7039, "step": 4995 }, { "epoch": 6.1150550795593634, "grad_norm": 1.5121310364746152, "learning_rate": 3.215029534393777e-06, "loss": 0.6065, "step": 4996 }, { "epoch": 6.116279069767442, "grad_norm": 1.3540936766894813, "learning_rate": 3.2144154031847253e-06, "loss": 1.25, "step": 4997 }, { "epoch": 6.11750305997552, "grad_norm": 1.0978512810268124, "learning_rate": 3.213801225026935e-06, "loss": 0.9871, "step": 4998 }, { "epoch": 6.118727050183598, "grad_norm": 1.0908890047484585, "learning_rate": 3.2131869999607695e-06, "loss": 0.7451, "step": 4999 }, { "epoch": 6.119951040391677, "grad_norm": 1.3002445347945324, "learning_rate": 3.212572728026592e-06, "loss": 2.2988, "step": 5000 }, { "epoch": 6.121175030599755, "grad_norm": 1.8307320905392317, "learning_rate": 3.21195840926477e-06, "loss": 0.8446, "step": 5001 }, { "epoch": 6.122399020807833, "grad_norm": 1.2915360659758834, "learning_rate": 3.2113440437156745e-06, "loss": 0.6114, "step": 5002 }, { "epoch": 6.123623011015912, "grad_norm": 1.3000044621245683, "learning_rate": 3.21072963141968e-06, "loss": 1.0862, "step": 5003 }, { "epoch": 6.124847001223991, "grad_norm": 0.9578173273971511, "learning_rate": 3.2101151724171624e-06, "loss": 0.4581, "step": 5004 }, { "epoch": 6.126070991432068, "grad_norm": 1.3746566297575276, "learning_rate": 3.2095006667485036e-06, "loss": 0.5857, "step": 5005 }, { "epoch": 6.127294981640147, "grad_norm": 0.9741346731209892, "learning_rate": 3.208886114454084e-06, "loss": 0.5193, "step": 5006 }, { "epoch": 6.128518971848226, "grad_norm": 1.9796139784824802, "learning_rate": 3.2082715155742913e-06, "loss": 0.5166, "step": 5007 }, { "epoch": 6.129742962056303, "grad_norm": 0.9817650019912368, "learning_rate": 3.2076568701495143e-06, "loss": 0.5933, "step": 5008 }, { "epoch": 6.130966952264382, "grad_norm": 1.9018029548735351, "learning_rate": 3.207042178220145e-06, "loss": 0.4271, "step": 5009 }, { "epoch": 6.1321909424724605, "grad_norm": 1.8627016432403698, "learning_rate": 3.2064274398265792e-06, "loss": 1.3611, "step": 5010 }, { "epoch": 6.133414932680538, "grad_norm": 1.4922792081956022, "learning_rate": 3.205812655009215e-06, "loss": 1.6171, "step": 5011 }, { "epoch": 6.134638922888617, "grad_norm": 1.4677160719075089, "learning_rate": 3.2051978238084536e-06, "loss": 0.5994, "step": 5012 }, { "epoch": 6.1358629130966955, "grad_norm": 1.5257848740883315, "learning_rate": 3.2045829462646986e-06, "loss": 0.9325, "step": 5013 }, { "epoch": 6.137086903304773, "grad_norm": 1.5670291132870946, "learning_rate": 3.20396802241836e-06, "loss": 0.489, "step": 5014 }, { "epoch": 6.138310893512852, "grad_norm": 1.418455227717482, "learning_rate": 3.203353052309846e-06, "loss": 0.9258, "step": 5015 }, { "epoch": 6.1395348837209305, "grad_norm": 1.6156598782743565, "learning_rate": 3.202738035979571e-06, "loss": 0.4402, "step": 5016 }, { "epoch": 6.140758873929008, "grad_norm": 1.8866851472563322, "learning_rate": 3.2021229734679514e-06, "loss": 0.4104, "step": 5017 }, { "epoch": 6.141982864137087, "grad_norm": 1.5320221819207216, "learning_rate": 3.2015078648154076e-06, "loss": 0.4771, "step": 5018 }, { "epoch": 6.1432068543451654, "grad_norm": 1.2987351257435138, "learning_rate": 3.200892710062361e-06, "loss": 0.5913, "step": 5019 }, { "epoch": 6.144430844553243, "grad_norm": 2.6831334638063216, "learning_rate": 3.2002775092492383e-06, "loss": 0.5507, "step": 5020 }, { "epoch": 6.145654834761322, "grad_norm": 1.4359405972060042, "learning_rate": 3.1996622624164676e-06, "loss": 1.5403, "step": 5021 }, { "epoch": 6.1468788249694, "grad_norm": 0.7968725611581579, "learning_rate": 3.1990469696044806e-06, "loss": 0.3557, "step": 5022 }, { "epoch": 6.148102815177479, "grad_norm": 1.1878294636284004, "learning_rate": 3.1984316308537134e-06, "loss": 0.8295, "step": 5023 }, { "epoch": 6.149326805385557, "grad_norm": 2.322647729527677, "learning_rate": 3.197816246204601e-06, "loss": 1.0826, "step": 5024 }, { "epoch": 6.150550795593635, "grad_norm": 1.9805261959662368, "learning_rate": 3.1972008156975877e-06, "loss": 0.4503, "step": 5025 }, { "epoch": 6.151774785801714, "grad_norm": 0.9076356164610674, "learning_rate": 3.196585339373115e-06, "loss": 0.6925, "step": 5026 }, { "epoch": 6.152998776009792, "grad_norm": 1.2117113035791238, "learning_rate": 3.1959698172716296e-06, "loss": 0.5175, "step": 5027 }, { "epoch": 6.15422276621787, "grad_norm": 1.574836986285899, "learning_rate": 3.195354249433582e-06, "loss": 0.5781, "step": 5028 }, { "epoch": 6.155446756425949, "grad_norm": 0.9348443121813554, "learning_rate": 3.1947386358994263e-06, "loss": 0.6169, "step": 5029 }, { "epoch": 6.156670746634027, "grad_norm": 1.398795643667143, "learning_rate": 3.194122976709616e-06, "loss": 1.063, "step": 5030 }, { "epoch": 6.157894736842105, "grad_norm": 1.6884367454473714, "learning_rate": 3.193507271904612e-06, "loss": 0.9486, "step": 5031 }, { "epoch": 6.159118727050184, "grad_norm": 1.6314551664765318, "learning_rate": 3.1928915215248746e-06, "loss": 0.7668, "step": 5032 }, { "epoch": 6.160342717258262, "grad_norm": 1.3755837868993164, "learning_rate": 3.1922757256108694e-06, "loss": 0.8366, "step": 5033 }, { "epoch": 6.16156670746634, "grad_norm": 1.4307947946190678, "learning_rate": 3.191659884203064e-06, "loss": 0.5759, "step": 5034 }, { "epoch": 6.162790697674419, "grad_norm": 1.6730373100985183, "learning_rate": 3.1910439973419293e-06, "loss": 0.3415, "step": 5035 }, { "epoch": 6.164014687882497, "grad_norm": 2.0816273384449753, "learning_rate": 3.190428065067939e-06, "loss": 0.455, "step": 5036 }, { "epoch": 6.165238678090575, "grad_norm": 1.3564389628686835, "learning_rate": 3.18981208742157e-06, "loss": 0.5242, "step": 5037 }, { "epoch": 6.166462668298654, "grad_norm": 2.3430407308511656, "learning_rate": 3.189196064443302e-06, "loss": 0.991, "step": 5038 }, { "epoch": 6.167686658506732, "grad_norm": 1.6654374181301463, "learning_rate": 3.1885799961736173e-06, "loss": 0.3159, "step": 5039 }, { "epoch": 6.16891064871481, "grad_norm": 1.631860101499813, "learning_rate": 3.187963882653003e-06, "loss": 0.4632, "step": 5040 }, { "epoch": 6.170134638922889, "grad_norm": 2.2314010198724485, "learning_rate": 3.187347723921946e-06, "loss": 0.8511, "step": 5041 }, { "epoch": 6.1713586291309666, "grad_norm": 1.4974042468713582, "learning_rate": 3.1867315200209403e-06, "loss": 0.9005, "step": 5042 }, { "epoch": 6.172582619339045, "grad_norm": 1.1217089350480969, "learning_rate": 3.1861152709904784e-06, "loss": 0.6557, "step": 5043 }, { "epoch": 6.173806609547124, "grad_norm": 1.4005877669236046, "learning_rate": 3.1854989768710585e-06, "loss": 1.1179, "step": 5044 }, { "epoch": 6.1750305997552015, "grad_norm": 2.1898081613661797, "learning_rate": 3.1848826377031816e-06, "loss": 0.9773, "step": 5045 }, { "epoch": 6.17625458996328, "grad_norm": 1.2419570230519992, "learning_rate": 3.184266253527351e-06, "loss": 1.0031, "step": 5046 }, { "epoch": 6.177478580171359, "grad_norm": 1.2236150196652122, "learning_rate": 3.183649824384073e-06, "loss": 0.5666, "step": 5047 }, { "epoch": 6.178702570379437, "grad_norm": 1.5013724478247217, "learning_rate": 3.1830333503138565e-06, "loss": 0.6427, "step": 5048 }, { "epoch": 6.179926560587515, "grad_norm": 1.9394804666126197, "learning_rate": 3.182416831357216e-06, "loss": 0.5753, "step": 5049 }, { "epoch": 6.181150550795594, "grad_norm": 2.226285931797056, "learning_rate": 3.1818002675546643e-06, "loss": 0.468, "step": 5050 }, { "epoch": 6.182374541003672, "grad_norm": 0.6539060781202931, "learning_rate": 3.181183658946721e-06, "loss": 0.263, "step": 5051 }, { "epoch": 6.18359853121175, "grad_norm": 0.9438863341530361, "learning_rate": 3.1805670055739073e-06, "loss": 0.6101, "step": 5052 }, { "epoch": 6.184822521419829, "grad_norm": 1.430052836253628, "learning_rate": 3.1799503074767477e-06, "loss": 0.543, "step": 5053 }, { "epoch": 6.186046511627907, "grad_norm": 1.341516101931673, "learning_rate": 3.179333564695768e-06, "loss": 0.5105, "step": 5054 }, { "epoch": 6.187270501835985, "grad_norm": 0.8462778969209044, "learning_rate": 3.1787167772715e-06, "loss": 0.5958, "step": 5055 }, { "epoch": 6.188494492044064, "grad_norm": 1.318830512803707, "learning_rate": 3.178099945244475e-06, "loss": 1.5736, "step": 5056 }, { "epoch": 6.189718482252142, "grad_norm": 1.0599387337759185, "learning_rate": 3.17748306865523e-06, "loss": 0.789, "step": 5057 }, { "epoch": 6.19094247246022, "grad_norm": 1.0900408638308066, "learning_rate": 3.176866147544304e-06, "loss": 0.5971, "step": 5058 }, { "epoch": 6.192166462668299, "grad_norm": 1.6933337735705989, "learning_rate": 3.176249181952238e-06, "loss": 0.528, "step": 5059 }, { "epoch": 6.193390452876377, "grad_norm": 1.2528597755939546, "learning_rate": 3.175632171919577e-06, "loss": 1.1711, "step": 5060 }, { "epoch": 6.194614443084455, "grad_norm": 2.1718360054281662, "learning_rate": 3.1750151174868684e-06, "loss": 0.5186, "step": 5061 }, { "epoch": 6.195838433292534, "grad_norm": 2.1126007407629226, "learning_rate": 3.1743980186946634e-06, "loss": 0.4535, "step": 5062 }, { "epoch": 6.197062423500612, "grad_norm": 1.288316659099044, "learning_rate": 3.1737808755835144e-06, "loss": 0.9344, "step": 5063 }, { "epoch": 6.19828641370869, "grad_norm": 1.2175929083376564, "learning_rate": 3.1731636881939794e-06, "loss": 1.292, "step": 5064 }, { "epoch": 6.1995104039167686, "grad_norm": 1.778686347884478, "learning_rate": 3.172546456566616e-06, "loss": 1.2191, "step": 5065 }, { "epoch": 6.200734394124847, "grad_norm": 1.0621873240509532, "learning_rate": 3.1719291807419874e-06, "loss": 0.588, "step": 5066 }, { "epoch": 6.201958384332926, "grad_norm": 1.1461693082787718, "learning_rate": 3.1713118607606575e-06, "loss": 1.145, "step": 5067 }, { "epoch": 6.2031823745410035, "grad_norm": 1.1350699634370278, "learning_rate": 3.170694496663196e-06, "loss": 0.5032, "step": 5068 }, { "epoch": 6.204406364749082, "grad_norm": 1.461219543636155, "learning_rate": 3.1700770884901723e-06, "loss": 0.4826, "step": 5069 }, { "epoch": 6.205630354957161, "grad_norm": 1.1968034421367164, "learning_rate": 3.1694596362821615e-06, "loss": 1.0894, "step": 5070 }, { "epoch": 6.2068543451652385, "grad_norm": 1.144937641394743, "learning_rate": 3.1688421400797397e-06, "loss": 0.4813, "step": 5071 }, { "epoch": 6.208078335373317, "grad_norm": 1.5422818090704904, "learning_rate": 3.1682245999234853e-06, "loss": 1.105, "step": 5072 }, { "epoch": 6.209302325581396, "grad_norm": 1.1850659386994014, "learning_rate": 3.167607015853983e-06, "loss": 0.5625, "step": 5073 }, { "epoch": 6.2105263157894735, "grad_norm": 0.8533220512636793, "learning_rate": 3.1669893879118158e-06, "loss": 0.5228, "step": 5074 }, { "epoch": 6.211750305997552, "grad_norm": 1.862526686824757, "learning_rate": 3.1663717161375736e-06, "loss": 0.4032, "step": 5075 }, { "epoch": 6.212974296205631, "grad_norm": 1.1499090956174667, "learning_rate": 3.1657540005718466e-06, "loss": 0.4059, "step": 5076 }, { "epoch": 6.214198286413708, "grad_norm": 1.1862791728241355, "learning_rate": 3.1651362412552293e-06, "loss": 0.6401, "step": 5077 }, { "epoch": 6.215422276621787, "grad_norm": 2.1255196434374666, "learning_rate": 3.1645184382283183e-06, "loss": 0.8885, "step": 5078 }, { "epoch": 6.216646266829866, "grad_norm": 1.489064095141007, "learning_rate": 3.163900591531714e-06, "loss": 0.4685, "step": 5079 }, { "epoch": 6.217870257037943, "grad_norm": 2.0893730932803813, "learning_rate": 3.1632827012060176e-06, "loss": 0.5061, "step": 5080 }, { "epoch": 6.219094247246022, "grad_norm": 0.8719556386834397, "learning_rate": 3.162664767291836e-06, "loss": 0.6053, "step": 5081 }, { "epoch": 6.220318237454101, "grad_norm": 1.582103202157159, "learning_rate": 3.1620467898297774e-06, "loss": 1.0309, "step": 5082 }, { "epoch": 6.221542227662178, "grad_norm": 1.7475007688632729, "learning_rate": 3.1614287688604513e-06, "loss": 1.1485, "step": 5083 }, { "epoch": 6.222766217870257, "grad_norm": 1.3886314989040456, "learning_rate": 3.1608107044244734e-06, "loss": 0.7492, "step": 5084 }, { "epoch": 6.223990208078336, "grad_norm": 1.2835948556307393, "learning_rate": 3.1601925965624604e-06, "loss": 0.4931, "step": 5085 }, { "epoch": 6.225214198286413, "grad_norm": 1.2934795368254515, "learning_rate": 3.1595744453150316e-06, "loss": 0.7018, "step": 5086 }, { "epoch": 6.226438188494492, "grad_norm": 2.4593093680318145, "learning_rate": 3.1589562507228106e-06, "loss": 0.527, "step": 5087 }, { "epoch": 6.2276621787025706, "grad_norm": 1.6686127563505049, "learning_rate": 3.1583380128264214e-06, "loss": 0.5284, "step": 5088 }, { "epoch": 6.228886168910648, "grad_norm": 1.0907047036709694, "learning_rate": 3.1577197316664925e-06, "loss": 0.5283, "step": 5089 }, { "epoch": 6.230110159118727, "grad_norm": 1.4944316925675412, "learning_rate": 3.1571014072836564e-06, "loss": 1.6321, "step": 5090 }, { "epoch": 6.2313341493268055, "grad_norm": 0.9352051751066006, "learning_rate": 3.156483039718546e-06, "loss": 0.4301, "step": 5091 }, { "epoch": 6.232558139534884, "grad_norm": 0.7818813358280692, "learning_rate": 3.1558646290117984e-06, "loss": 0.3993, "step": 5092 }, { "epoch": 6.233782129742962, "grad_norm": 1.585376320549656, "learning_rate": 3.155246175204054e-06, "loss": 0.5465, "step": 5093 }, { "epoch": 6.2350061199510405, "grad_norm": 1.6912424544369857, "learning_rate": 3.1546276783359532e-06, "loss": 0.928, "step": 5094 }, { "epoch": 6.236230110159119, "grad_norm": 1.169492430758933, "learning_rate": 3.1540091384481436e-06, "loss": 0.5652, "step": 5095 }, { "epoch": 6.237454100367197, "grad_norm": 1.3709308225057861, "learning_rate": 3.153390555581272e-06, "loss": 0.7162, "step": 5096 }, { "epoch": 6.2386780905752754, "grad_norm": 0.8891838800052027, "learning_rate": 3.15277192977599e-06, "loss": 0.7231, "step": 5097 }, { "epoch": 6.239902080783354, "grad_norm": 1.1919197151629488, "learning_rate": 3.152153261072951e-06, "loss": 1.0594, "step": 5098 }, { "epoch": 6.241126070991432, "grad_norm": 1.0107906581917385, "learning_rate": 3.151534549512812e-06, "loss": 0.4474, "step": 5099 }, { "epoch": 6.24235006119951, "grad_norm": 1.632981039228178, "learning_rate": 3.1509157951362325e-06, "loss": 0.4348, "step": 5100 }, { "epoch": 6.243574051407589, "grad_norm": 1.420724425150078, "learning_rate": 3.1502969979838742e-06, "loss": 1.2868, "step": 5101 }, { "epoch": 6.244798041615667, "grad_norm": 2.457871499028654, "learning_rate": 3.1496781580964024e-06, "loss": 0.9429, "step": 5102 }, { "epoch": 6.246022031823745, "grad_norm": 1.3875144782501043, "learning_rate": 3.1490592755144855e-06, "loss": 1.5882, "step": 5103 }, { "epoch": 6.247246022031824, "grad_norm": 1.9438846992379268, "learning_rate": 3.148440350278793e-06, "loss": 0.9964, "step": 5104 }, { "epoch": 6.248470012239902, "grad_norm": 1.8481290822610643, "learning_rate": 3.14782138243e-06, "loss": 0.4048, "step": 5105 }, { "epoch": 6.24969400244798, "grad_norm": 1.1357731084986995, "learning_rate": 3.1472023720087814e-06, "loss": 0.5218, "step": 5106 }, { "epoch": 6.250917992656059, "grad_norm": 1.3402065692326268, "learning_rate": 3.1465833190558163e-06, "loss": 0.4235, "step": 5107 }, { "epoch": 6.252141982864137, "grad_norm": 1.3823666952061648, "learning_rate": 3.1459642236117877e-06, "loss": 0.5605, "step": 5108 }, { "epoch": 6.253365973072215, "grad_norm": 1.7530888874350352, "learning_rate": 3.1453450857173795e-06, "loss": 0.5969, "step": 5109 }, { "epoch": 6.254589963280294, "grad_norm": 1.6671176912499175, "learning_rate": 3.1447259054132796e-06, "loss": 0.7973, "step": 5110 }, { "epoch": 6.2558139534883725, "grad_norm": 1.193743113171425, "learning_rate": 3.1441066827401768e-06, "loss": 0.547, "step": 5111 }, { "epoch": 6.25703794369645, "grad_norm": 1.7504905587370918, "learning_rate": 3.1434874177387657e-06, "loss": 0.7895, "step": 5112 }, { "epoch": 6.258261933904529, "grad_norm": 2.063792043893467, "learning_rate": 3.1428681104497416e-06, "loss": 0.3698, "step": 5113 }, { "epoch": 6.2594859241126075, "grad_norm": 1.457827579076249, "learning_rate": 3.1422487609138037e-06, "loss": 0.5617, "step": 5114 }, { "epoch": 6.260709914320685, "grad_norm": 1.0228133107983184, "learning_rate": 3.1416293691716515e-06, "loss": 0.4843, "step": 5115 }, { "epoch": 6.261933904528764, "grad_norm": 1.6583628090496667, "learning_rate": 3.141009935263991e-06, "loss": 1.0262, "step": 5116 }, { "epoch": 6.2631578947368425, "grad_norm": 1.5620816888658107, "learning_rate": 3.1403904592315287e-06, "loss": 0.4965, "step": 5117 }, { "epoch": 6.26438188494492, "grad_norm": 1.7653846767402892, "learning_rate": 3.139770941114974e-06, "loss": 0.8557, "step": 5118 }, { "epoch": 6.265605875152999, "grad_norm": 1.7135264398400152, "learning_rate": 3.1391513809550387e-06, "loss": 0.9806, "step": 5119 }, { "epoch": 6.2668298653610774, "grad_norm": 1.8459445828426224, "learning_rate": 3.1385317787924404e-06, "loss": 0.3748, "step": 5120 }, { "epoch": 6.268053855569155, "grad_norm": 1.012213574932585, "learning_rate": 3.137912134667894e-06, "loss": 0.3959, "step": 5121 }, { "epoch": 6.269277845777234, "grad_norm": 1.3736905179138803, "learning_rate": 3.1372924486221213e-06, "loss": 0.3924, "step": 5122 }, { "epoch": 6.270501835985312, "grad_norm": 1.7890517215659805, "learning_rate": 3.1366727206958474e-06, "loss": 0.5638, "step": 5123 }, { "epoch": 6.27172582619339, "grad_norm": 1.4198794654295313, "learning_rate": 3.136052950929796e-06, "loss": 0.6805, "step": 5124 }, { "epoch": 6.272949816401469, "grad_norm": 2.5117265400490423, "learning_rate": 3.1354331393646974e-06, "loss": 0.4801, "step": 5125 }, { "epoch": 6.274173806609547, "grad_norm": 2.234094818662838, "learning_rate": 3.1348132860412837e-06, "loss": 0.972, "step": 5126 }, { "epoch": 6.275397796817625, "grad_norm": 1.5122675025102843, "learning_rate": 3.1341933910002887e-06, "loss": 0.4772, "step": 5127 }, { "epoch": 6.276621787025704, "grad_norm": 3.157732600231198, "learning_rate": 3.133573454282449e-06, "loss": 0.4188, "step": 5128 }, { "epoch": 6.277845777233782, "grad_norm": 2.0547166525053466, "learning_rate": 3.1329534759285067e-06, "loss": 0.5634, "step": 5129 }, { "epoch": 6.27906976744186, "grad_norm": 1.8119997109498913, "learning_rate": 3.132333455979202e-06, "loss": 0.9936, "step": 5130 }, { "epoch": 6.280293757649939, "grad_norm": 1.2505256539022798, "learning_rate": 3.131713394475282e-06, "loss": 0.493, "step": 5131 }, { "epoch": 6.281517747858017, "grad_norm": 1.7478300190282847, "learning_rate": 3.131093291457494e-06, "loss": 0.626, "step": 5132 }, { "epoch": 6.282741738066095, "grad_norm": 1.0821712618966253, "learning_rate": 3.1304731469665892e-06, "loss": 0.6188, "step": 5133 }, { "epoch": 6.283965728274174, "grad_norm": 1.4159576718366151, "learning_rate": 3.1298529610433213e-06, "loss": 0.4878, "step": 5134 }, { "epoch": 6.285189718482252, "grad_norm": 1.586541651319388, "learning_rate": 3.1292327337284463e-06, "loss": 0.7707, "step": 5135 }, { "epoch": 6.286413708690331, "grad_norm": 1.614459347095316, "learning_rate": 3.128612465062724e-06, "loss": 0.6243, "step": 5136 }, { "epoch": 6.287637698898409, "grad_norm": 1.666080143171431, "learning_rate": 3.127992155086914e-06, "loss": 0.3886, "step": 5137 }, { "epoch": 6.288861689106487, "grad_norm": 1.3455261655496396, "learning_rate": 3.1273718038417837e-06, "loss": 1.3145, "step": 5138 }, { "epoch": 6.290085679314566, "grad_norm": 1.1489818345039409, "learning_rate": 3.126751411368098e-06, "loss": 0.3836, "step": 5139 }, { "epoch": 6.291309669522644, "grad_norm": 1.3023961616097004, "learning_rate": 3.1261309777066284e-06, "loss": 0.6204, "step": 5140 }, { "epoch": 6.292533659730722, "grad_norm": 1.0884616396414535, "learning_rate": 3.125510502898147e-06, "loss": 0.836, "step": 5141 }, { "epoch": 6.293757649938801, "grad_norm": 1.8405492228918197, "learning_rate": 3.1248899869834288e-06, "loss": 0.4976, "step": 5142 }, { "epoch": 6.2949816401468786, "grad_norm": 1.1676190240721025, "learning_rate": 3.1242694300032515e-06, "loss": 0.3758, "step": 5143 }, { "epoch": 6.296205630354957, "grad_norm": 1.550301137259765, "learning_rate": 3.123648831998397e-06, "loss": 0.5086, "step": 5144 }, { "epoch": 6.297429620563036, "grad_norm": 1.166241352059848, "learning_rate": 3.1230281930096474e-06, "loss": 0.546, "step": 5145 }, { "epoch": 6.2986536107711135, "grad_norm": 1.944767201199805, "learning_rate": 3.12240751307779e-06, "loss": 0.5458, "step": 5146 }, { "epoch": 6.299877600979192, "grad_norm": 1.2003780009590979, "learning_rate": 3.1217867922436125e-06, "loss": 0.6627, "step": 5147 }, { "epoch": 6.301101591187271, "grad_norm": 2.1574519392974074, "learning_rate": 3.1211660305479063e-06, "loss": 1.0466, "step": 5148 }, { "epoch": 6.3023255813953485, "grad_norm": 1.9796506912149894, "learning_rate": 3.120545228031467e-06, "loss": 1.0631, "step": 5149 }, { "epoch": 6.303549571603427, "grad_norm": 0.4779134045408042, "learning_rate": 3.1199243847350903e-06, "loss": 0.1325, "step": 5150 }, { "epoch": 6.304773561811506, "grad_norm": 1.227697419748376, "learning_rate": 3.119303500699576e-06, "loss": 1.5764, "step": 5151 }, { "epoch": 6.3059975520195835, "grad_norm": 1.7204585111605968, "learning_rate": 3.1186825759657263e-06, "loss": 1.1299, "step": 5152 }, { "epoch": 6.307221542227662, "grad_norm": 1.2791661809028727, "learning_rate": 3.118061610574346e-06, "loss": 0.7256, "step": 5153 }, { "epoch": 6.308445532435741, "grad_norm": 1.0678609230562675, "learning_rate": 3.117440604566242e-06, "loss": 0.6709, "step": 5154 }, { "epoch": 6.309669522643819, "grad_norm": 0.8552403415173792, "learning_rate": 3.116819557982227e-06, "loss": 0.605, "step": 5155 }, { "epoch": 6.310893512851897, "grad_norm": 1.1351381541178502, "learning_rate": 3.1161984708631097e-06, "loss": 0.4575, "step": 5156 }, { "epoch": 6.312117503059976, "grad_norm": 1.682052578046046, "learning_rate": 3.11557734324971e-06, "loss": 0.6159, "step": 5157 }, { "epoch": 6.313341493268053, "grad_norm": 1.2294805449197728, "learning_rate": 3.1149561751828433e-06, "loss": 0.6243, "step": 5158 }, { "epoch": 6.314565483476132, "grad_norm": 0.9500196958489768, "learning_rate": 3.1143349667033317e-06, "loss": 0.7065, "step": 5159 }, { "epoch": 6.315789473684211, "grad_norm": 1.864912566819893, "learning_rate": 3.1137137178519983e-06, "loss": 0.4767, "step": 5160 }, { "epoch": 6.317013463892289, "grad_norm": 1.878214117321595, "learning_rate": 3.1130924286696697e-06, "loss": 0.6658, "step": 5161 }, { "epoch": 6.318237454100367, "grad_norm": 2.1993834806608215, "learning_rate": 3.1124710991971733e-06, "loss": 0.3772, "step": 5162 }, { "epoch": 6.319461444308446, "grad_norm": 2.0375204390524413, "learning_rate": 3.111849729475342e-06, "loss": 0.5117, "step": 5163 }, { "epoch": 6.320685434516524, "grad_norm": 1.1203286099738816, "learning_rate": 3.1112283195450105e-06, "loss": 0.6428, "step": 5164 }, { "epoch": 6.321909424724602, "grad_norm": 1.4655066593792638, "learning_rate": 3.1106068694470133e-06, "loss": 0.7106, "step": 5165 }, { "epoch": 6.3231334149326806, "grad_norm": 1.0970750291978095, "learning_rate": 3.109985379222192e-06, "loss": 1.0998, "step": 5166 }, { "epoch": 6.324357405140759, "grad_norm": 1.3555766168678587, "learning_rate": 3.1093638489113875e-06, "loss": 0.5362, "step": 5167 }, { "epoch": 6.325581395348837, "grad_norm": 0.8008787230241562, "learning_rate": 3.1087422785554442e-06, "loss": 0.4358, "step": 5168 }, { "epoch": 6.3268053855569155, "grad_norm": 1.2879381393596085, "learning_rate": 3.10812066819521e-06, "loss": 0.5318, "step": 5169 }, { "epoch": 6.328029375764994, "grad_norm": 1.369892064853444, "learning_rate": 3.1074990178715364e-06, "loss": 1.6584, "step": 5170 }, { "epoch": 6.329253365973072, "grad_norm": 1.395334417184578, "learning_rate": 3.106877327625273e-06, "loss": 1.3495, "step": 5171 }, { "epoch": 6.3304773561811505, "grad_norm": 1.6357641424075164, "learning_rate": 3.1062555974972765e-06, "loss": 0.5565, "step": 5172 }, { "epoch": 6.331701346389229, "grad_norm": 1.4183228173617424, "learning_rate": 3.105633827528405e-06, "loss": 0.4512, "step": 5173 }, { "epoch": 6.332925336597307, "grad_norm": 1.5410697356821677, "learning_rate": 3.1050120177595173e-06, "loss": 0.4814, "step": 5174 }, { "epoch": 6.3341493268053854, "grad_norm": 2.584266276358924, "learning_rate": 3.1043901682314782e-06, "loss": 0.7329, "step": 5175 }, { "epoch": 6.335373317013464, "grad_norm": 1.7167494675840742, "learning_rate": 3.1037682789851524e-06, "loss": 0.437, "step": 5176 }, { "epoch": 6.336597307221542, "grad_norm": 1.8180581243277534, "learning_rate": 3.103146350061409e-06, "loss": 0.505, "step": 5177 }, { "epoch": 6.33782129742962, "grad_norm": 1.205180711644409, "learning_rate": 3.102524381501118e-06, "loss": 0.8604, "step": 5178 }, { "epoch": 6.339045287637699, "grad_norm": 1.2432797128385729, "learning_rate": 3.1019023733451536e-06, "loss": 0.9095, "step": 5179 }, { "epoch": 6.340269277845778, "grad_norm": 2.2779805793320973, "learning_rate": 3.1012803256343908e-06, "loss": 0.5324, "step": 5180 }, { "epoch": 6.341493268053855, "grad_norm": 2.0269682157548257, "learning_rate": 3.1006582384097096e-06, "loss": 0.4462, "step": 5181 }, { "epoch": 6.342717258261934, "grad_norm": 1.9212024812437598, "learning_rate": 3.100036111711991e-06, "loss": 0.6291, "step": 5182 }, { "epoch": 6.343941248470013, "grad_norm": 1.1910032272156494, "learning_rate": 3.0994139455821183e-06, "loss": 0.8885, "step": 5183 }, { "epoch": 6.34516523867809, "grad_norm": 1.636343890719578, "learning_rate": 3.0987917400609775e-06, "loss": 0.615, "step": 5184 }, { "epoch": 6.346389228886169, "grad_norm": 1.5665048970695965, "learning_rate": 3.098169495189459e-06, "loss": 0.7179, "step": 5185 }, { "epoch": 6.347613219094248, "grad_norm": 1.7559911073947418, "learning_rate": 3.0975472110084536e-06, "loss": 1.1592, "step": 5186 }, { "epoch": 6.348837209302325, "grad_norm": 1.6628832130336493, "learning_rate": 3.0969248875588547e-06, "loss": 1.0165, "step": 5187 }, { "epoch": 6.350061199510404, "grad_norm": 1.8536656643155178, "learning_rate": 3.096302524881562e-06, "loss": 0.382, "step": 5188 }, { "epoch": 6.3512851897184825, "grad_norm": 1.4984531859967243, "learning_rate": 3.095680123017471e-06, "loss": 0.7268, "step": 5189 }, { "epoch": 6.35250917992656, "grad_norm": 1.1361964629646923, "learning_rate": 3.0950576820074867e-06, "loss": 0.536, "step": 5190 }, { "epoch": 6.353733170134639, "grad_norm": 2.0608283979117714, "learning_rate": 3.0944352018925123e-06, "loss": 1.2273, "step": 5191 }, { "epoch": 6.3549571603427175, "grad_norm": 1.1733295396474879, "learning_rate": 3.093812682713455e-06, "loss": 0.7127, "step": 5192 }, { "epoch": 6.356181150550795, "grad_norm": 2.0354595177284818, "learning_rate": 3.093190124511225e-06, "loss": 0.4023, "step": 5193 }, { "epoch": 6.357405140758874, "grad_norm": 0.862418834509687, "learning_rate": 3.0925675273267335e-06, "loss": 0.5304, "step": 5194 }, { "epoch": 6.3586291309669525, "grad_norm": 1.8980559999083995, "learning_rate": 3.0919448912008953e-06, "loss": 0.5128, "step": 5195 }, { "epoch": 6.35985312117503, "grad_norm": 2.67030720517238, "learning_rate": 3.091322216174629e-06, "loss": 0.448, "step": 5196 }, { "epoch": 6.361077111383109, "grad_norm": 0.9518121101068123, "learning_rate": 3.0906995022888548e-06, "loss": 0.6767, "step": 5197 }, { "epoch": 6.3623011015911874, "grad_norm": 1.7597818510859173, "learning_rate": 3.090076749584493e-06, "loss": 0.5717, "step": 5198 }, { "epoch": 6.363525091799266, "grad_norm": 1.384699014976833, "learning_rate": 3.0894539581024697e-06, "loss": 0.4784, "step": 5199 }, { "epoch": 6.364749082007344, "grad_norm": 0.9173856649075256, "learning_rate": 3.0888311278837128e-06, "loss": 0.5513, "step": 5200 }, { "epoch": 6.365973072215422, "grad_norm": 1.332379109725859, "learning_rate": 3.088208258969152e-06, "loss": 0.7202, "step": 5201 }, { "epoch": 6.3671970624235, "grad_norm": 1.3018639260493576, "learning_rate": 3.0875853513997206e-06, "loss": 0.5424, "step": 5202 }, { "epoch": 6.368421052631579, "grad_norm": 1.9043069127516545, "learning_rate": 3.0869624052163527e-06, "loss": 0.658, "step": 5203 }, { "epoch": 6.369645042839657, "grad_norm": 1.831940146499211, "learning_rate": 3.086339420459986e-06, "loss": 0.9778, "step": 5204 }, { "epoch": 6.370869033047736, "grad_norm": 1.9994104830299426, "learning_rate": 3.0857163971715627e-06, "loss": 0.6151, "step": 5205 }, { "epoch": 6.372093023255814, "grad_norm": 1.5002599733821465, "learning_rate": 3.0850933353920233e-06, "loss": 0.5562, "step": 5206 }, { "epoch": 6.373317013463892, "grad_norm": 1.4075090317355905, "learning_rate": 3.084470235162314e-06, "loss": 0.6611, "step": 5207 }, { "epoch": 6.374541003671971, "grad_norm": 1.226992593037473, "learning_rate": 3.0838470965233823e-06, "loss": 0.3806, "step": 5208 }, { "epoch": 6.375764993880049, "grad_norm": 0.939834204309093, "learning_rate": 3.08322391951618e-06, "loss": 0.5513, "step": 5209 }, { "epoch": 6.376988984088127, "grad_norm": 2.312702693961054, "learning_rate": 3.082600704181658e-06, "loss": 0.2388, "step": 5210 }, { "epoch": 6.378212974296206, "grad_norm": 1.3426462482089907, "learning_rate": 3.081977450560773e-06, "loss": 0.474, "step": 5211 }, { "epoch": 6.379436964504284, "grad_norm": 1.5072813796103943, "learning_rate": 3.081354158694483e-06, "loss": 1.4408, "step": 5212 }, { "epoch": 6.380660954712362, "grad_norm": 2.551152977200912, "learning_rate": 3.080730828623747e-06, "loss": 1.0565, "step": 5213 }, { "epoch": 6.381884944920441, "grad_norm": 1.3566152761019628, "learning_rate": 3.0801074603895296e-06, "loss": 0.5182, "step": 5214 }, { "epoch": 6.383108935128519, "grad_norm": 1.7062631077392574, "learning_rate": 3.079484054032795e-06, "loss": 0.6245, "step": 5215 }, { "epoch": 6.384332925336597, "grad_norm": 2.947628989028716, "learning_rate": 3.0788606095945117e-06, "loss": 0.519, "step": 5216 }, { "epoch": 6.385556915544676, "grad_norm": 1.1029716910848006, "learning_rate": 3.0782371271156506e-06, "loss": 0.9058, "step": 5217 }, { "epoch": 6.386780905752754, "grad_norm": 1.0656800446308654, "learning_rate": 3.0776136066371843e-06, "loss": 0.403, "step": 5218 }, { "epoch": 6.388004895960832, "grad_norm": 1.9789932058987998, "learning_rate": 3.076990048200088e-06, "loss": 0.4741, "step": 5219 }, { "epoch": 6.389228886168911, "grad_norm": 1.4337006865764237, "learning_rate": 3.076366451845341e-06, "loss": 0.9989, "step": 5220 }, { "epoch": 6.3904528763769886, "grad_norm": 1.259753761496176, "learning_rate": 3.0757428176139204e-06, "loss": 0.4456, "step": 5221 }, { "epoch": 6.391676866585067, "grad_norm": 2.2426976192693386, "learning_rate": 3.0751191455468126e-06, "loss": 0.843, "step": 5222 }, { "epoch": 6.392900856793146, "grad_norm": 2.048600217866285, "learning_rate": 3.0744954356850028e-06, "loss": 0.5304, "step": 5223 }, { "epoch": 6.394124847001224, "grad_norm": 1.2665923981895382, "learning_rate": 3.0738716880694763e-06, "loss": 0.3746, "step": 5224 }, { "epoch": 6.395348837209302, "grad_norm": 1.208293564212225, "learning_rate": 3.0732479027412254e-06, "loss": 0.7353, "step": 5225 }, { "epoch": 6.396572827417381, "grad_norm": 2.333393530080522, "learning_rate": 3.0726240797412426e-06, "loss": 0.3866, "step": 5226 }, { "epoch": 6.397796817625459, "grad_norm": 1.2842540126023991, "learning_rate": 3.0720002191105234e-06, "loss": 0.5805, "step": 5227 }, { "epoch": 6.399020807833537, "grad_norm": 2.3208641779037262, "learning_rate": 3.0713763208900656e-06, "loss": 0.4582, "step": 5228 }, { "epoch": 6.400244798041616, "grad_norm": 3.06443380034515, "learning_rate": 3.0707523851208698e-06, "loss": 0.398, "step": 5229 }, { "epoch": 6.401468788249694, "grad_norm": 1.783610142204136, "learning_rate": 3.0701284118439373e-06, "loss": 0.5076, "step": 5230 }, { "epoch": 6.402692778457772, "grad_norm": 1.346876347901455, "learning_rate": 3.069504401100275e-06, "loss": 0.3486, "step": 5231 }, { "epoch": 6.403916768665851, "grad_norm": 1.2568600695327496, "learning_rate": 3.06888035293089e-06, "loss": 0.636, "step": 5232 }, { "epoch": 6.405140758873929, "grad_norm": 1.2436458277631997, "learning_rate": 3.068256267376792e-06, "loss": 1.3345, "step": 5233 }, { "epoch": 6.406364749082007, "grad_norm": 0.9319070992522746, "learning_rate": 3.067632144478994e-06, "loss": 0.6092, "step": 5234 }, { "epoch": 6.407588739290086, "grad_norm": 1.710348216474738, "learning_rate": 3.067007984278511e-06, "loss": 0.7806, "step": 5235 }, { "epoch": 6.408812729498164, "grad_norm": 1.6438210061862117, "learning_rate": 3.066383786816361e-06, "loss": 0.5479, "step": 5236 }, { "epoch": 6.410036719706242, "grad_norm": 1.2297807910385883, "learning_rate": 3.0657595521335624e-06, "loss": 1.3257, "step": 5237 }, { "epoch": 6.411260709914321, "grad_norm": 2.50755360589812, "learning_rate": 3.0651352802711397e-06, "loss": 0.496, "step": 5238 }, { "epoch": 6.412484700122399, "grad_norm": 1.5089664078621159, "learning_rate": 3.064510971270116e-06, "loss": 1.577, "step": 5239 }, { "epoch": 6.413708690330477, "grad_norm": 1.0664964342359886, "learning_rate": 3.063886625171519e-06, "loss": 0.9939, "step": 5240 }, { "epoch": 6.414932680538556, "grad_norm": 1.971317859155347, "learning_rate": 3.0632622420163795e-06, "loss": 0.472, "step": 5241 }, { "epoch": 6.416156670746634, "grad_norm": 2.419454285862608, "learning_rate": 3.0626378218457285e-06, "loss": 0.4332, "step": 5242 }, { "epoch": 6.417380660954713, "grad_norm": 0.7635018570798805, "learning_rate": 3.0620133647006007e-06, "loss": 0.3582, "step": 5243 }, { "epoch": 6.4186046511627906, "grad_norm": 1.0865410691780641, "learning_rate": 3.0613888706220336e-06, "loss": 0.7329, "step": 5244 }, { "epoch": 6.419828641370869, "grad_norm": 1.9055433255030234, "learning_rate": 3.0607643396510654e-06, "loss": 0.9289, "step": 5245 }, { "epoch": 6.421052631578947, "grad_norm": 1.8061697218612756, "learning_rate": 3.06013977182874e-06, "loss": 0.5704, "step": 5246 }, { "epoch": 6.4222766217870255, "grad_norm": 1.0718263452729597, "learning_rate": 3.059515167196101e-06, "loss": 0.7646, "step": 5247 }, { "epoch": 6.423500611995104, "grad_norm": 1.6770285152662974, "learning_rate": 3.0588905257941936e-06, "loss": 0.4316, "step": 5248 }, { "epoch": 6.424724602203183, "grad_norm": 2.543981419110719, "learning_rate": 3.0582658476640686e-06, "loss": 0.4066, "step": 5249 }, { "epoch": 6.4259485924112605, "grad_norm": 0.9391602504884662, "learning_rate": 3.057641132846777e-06, "loss": 0.5594, "step": 5250 }, { "epoch": 6.427172582619339, "grad_norm": 1.76986851467784, "learning_rate": 3.0570163813833723e-06, "loss": 0.677, "step": 5251 }, { "epoch": 6.428396572827418, "grad_norm": 1.6932353011382728, "learning_rate": 3.056391593314912e-06, "loss": 0.6175, "step": 5252 }, { "epoch": 6.4296205630354955, "grad_norm": 1.8190924593358844, "learning_rate": 3.0557667686824538e-06, "loss": 0.4936, "step": 5253 }, { "epoch": 6.430844553243574, "grad_norm": 2.4533058456752337, "learning_rate": 3.0551419075270585e-06, "loss": 0.3797, "step": 5254 }, { "epoch": 6.432068543451653, "grad_norm": 1.3986935538015697, "learning_rate": 3.0545170098897915e-06, "loss": 1.1932, "step": 5255 }, { "epoch": 6.43329253365973, "grad_norm": 1.8662831289401944, "learning_rate": 3.053892075811716e-06, "loss": 0.4701, "step": 5256 }, { "epoch": 6.434516523867809, "grad_norm": 1.3789164490168464, "learning_rate": 3.0532671053339034e-06, "loss": 0.4603, "step": 5257 }, { "epoch": 6.435740514075888, "grad_norm": 1.58553808993022, "learning_rate": 3.0526420984974233e-06, "loss": 0.5039, "step": 5258 }, { "epoch": 6.436964504283965, "grad_norm": 1.5303100131541043, "learning_rate": 3.052017055343348e-06, "loss": 1.2122, "step": 5259 }, { "epoch": 6.438188494492044, "grad_norm": 0.9564738267153151, "learning_rate": 3.0513919759127537e-06, "loss": 0.547, "step": 5260 }, { "epoch": 6.439412484700123, "grad_norm": 2.329232796389088, "learning_rate": 3.0507668602467175e-06, "loss": 0.9445, "step": 5261 }, { "epoch": 6.4406364749082, "grad_norm": 2.1838685153973785, "learning_rate": 3.0501417083863215e-06, "loss": 0.541, "step": 5262 }, { "epoch": 6.441860465116279, "grad_norm": 1.6653873807847648, "learning_rate": 3.049516520372646e-06, "loss": 0.5504, "step": 5263 }, { "epoch": 6.443084455324358, "grad_norm": 0.9056731988853691, "learning_rate": 3.0488912962467787e-06, "loss": 0.6436, "step": 5264 }, { "epoch": 6.444308445532435, "grad_norm": 1.3268590707395715, "learning_rate": 3.0482660360498044e-06, "loss": 0.5142, "step": 5265 }, { "epoch": 6.445532435740514, "grad_norm": 2.0000801921448574, "learning_rate": 3.047640739822815e-06, "loss": 0.4717, "step": 5266 }, { "epoch": 6.4467564259485926, "grad_norm": 0.9103401410822293, "learning_rate": 3.0470154076069015e-06, "loss": 0.5444, "step": 5267 }, { "epoch": 6.447980416156671, "grad_norm": 1.6443557131937512, "learning_rate": 3.0463900394431585e-06, "loss": 0.5155, "step": 5268 }, { "epoch": 6.449204406364749, "grad_norm": 1.289886352517538, "learning_rate": 3.045764635372683e-06, "loss": 0.8819, "step": 5269 }, { "epoch": 6.4504283965728275, "grad_norm": 1.2803055054723436, "learning_rate": 3.0451391954365746e-06, "loss": 0.8245, "step": 5270 }, { "epoch": 6.451652386780906, "grad_norm": 1.372376821155428, "learning_rate": 3.0445137196759337e-06, "loss": 0.6877, "step": 5271 }, { "epoch": 6.452876376988984, "grad_norm": 1.9610813334768602, "learning_rate": 3.043888208131866e-06, "loss": 0.6022, "step": 5272 }, { "epoch": 6.4541003671970625, "grad_norm": 2.2279771460447964, "learning_rate": 3.043262660845478e-06, "loss": 0.4281, "step": 5273 }, { "epoch": 6.455324357405141, "grad_norm": 1.0496508251931276, "learning_rate": 3.042637077857876e-06, "loss": 1.053, "step": 5274 }, { "epoch": 6.456548347613219, "grad_norm": 0.9587009045198399, "learning_rate": 3.0420114592101718e-06, "loss": 0.6529, "step": 5275 }, { "epoch": 6.4577723378212974, "grad_norm": 1.1551059004846356, "learning_rate": 3.0413858049434803e-06, "loss": 0.5187, "step": 5276 }, { "epoch": 6.458996328029376, "grad_norm": 1.6038909151401448, "learning_rate": 3.0407601150989157e-06, "loss": 0.6111, "step": 5277 }, { "epoch": 6.460220318237454, "grad_norm": 1.1081988768511692, "learning_rate": 3.040134389717596e-06, "loss": 1.1052, "step": 5278 }, { "epoch": 6.461444308445532, "grad_norm": 1.8436172231097054, "learning_rate": 3.039508628840643e-06, "loss": 0.7039, "step": 5279 }, { "epoch": 6.462668298653611, "grad_norm": 1.2968062921750463, "learning_rate": 3.038882832509178e-06, "loss": 0.7103, "step": 5280 }, { "epoch": 6.463892288861689, "grad_norm": 1.2277560200585054, "learning_rate": 3.038257000764326e-06, "loss": 0.5758, "step": 5281 }, { "epoch": 6.465116279069767, "grad_norm": 0.9540705568723842, "learning_rate": 3.037631133647216e-06, "loss": 0.6182, "step": 5282 }, { "epoch": 6.466340269277846, "grad_norm": 1.5582370191602384, "learning_rate": 3.0370052311989756e-06, "loss": 0.9445, "step": 5283 }, { "epoch": 6.467564259485924, "grad_norm": 2.14300181581877, "learning_rate": 3.0363792934607372e-06, "loss": 0.4276, "step": 5284 }, { "epoch": 6.468788249694002, "grad_norm": 2.002123489043887, "learning_rate": 3.0357533204736366e-06, "loss": 0.497, "step": 5285 }, { "epoch": 6.470012239902081, "grad_norm": 1.1995599621362933, "learning_rate": 3.0351273122788093e-06, "loss": 0.8833, "step": 5286 }, { "epoch": 6.47123623011016, "grad_norm": 1.2764398544000135, "learning_rate": 3.0345012689173937e-06, "loss": 0.5878, "step": 5287 }, { "epoch": 6.472460220318237, "grad_norm": 1.6960604392041478, "learning_rate": 3.0338751904305323e-06, "loss": 0.3708, "step": 5288 }, { "epoch": 6.473684210526316, "grad_norm": 1.3898488478219966, "learning_rate": 3.0332490768593676e-06, "loss": 0.9983, "step": 5289 }, { "epoch": 6.474908200734394, "grad_norm": 1.5097354138570733, "learning_rate": 3.032622928245046e-06, "loss": 0.5883, "step": 5290 }, { "epoch": 6.476132190942472, "grad_norm": 1.448813662105671, "learning_rate": 3.0319967446287157e-06, "loss": 0.7709, "step": 5291 }, { "epoch": 6.477356181150551, "grad_norm": 1.8512342573519214, "learning_rate": 3.031370526051527e-06, "loss": 0.5379, "step": 5292 }, { "epoch": 6.4785801713586295, "grad_norm": 1.2724500420229987, "learning_rate": 3.0307442725546327e-06, "loss": 0.7681, "step": 5293 }, { "epoch": 6.479804161566707, "grad_norm": 1.2567625367962512, "learning_rate": 3.0301179841791877e-06, "loss": 0.9625, "step": 5294 }, { "epoch": 6.481028151774786, "grad_norm": 0.8084406221969098, "learning_rate": 3.0294916609663496e-06, "loss": 0.5048, "step": 5295 }, { "epoch": 6.4822521419828645, "grad_norm": 1.8368701515191537, "learning_rate": 3.0288653029572777e-06, "loss": 0.9878, "step": 5296 }, { "epoch": 6.483476132190942, "grad_norm": 2.137899905210895, "learning_rate": 3.0282389101931354e-06, "loss": 0.3641, "step": 5297 }, { "epoch": 6.484700122399021, "grad_norm": 1.3970213151467021, "learning_rate": 3.027612482715085e-06, "loss": 0.5713, "step": 5298 }, { "epoch": 6.4859241126070994, "grad_norm": 1.5827461302215218, "learning_rate": 3.026986020564294e-06, "loss": 1.3645, "step": 5299 }, { "epoch": 6.487148102815177, "grad_norm": 2.8455529406005473, "learning_rate": 3.0263595237819306e-06, "loss": 0.4575, "step": 5300 }, { "epoch": 6.488372093023256, "grad_norm": 2.023117784583821, "learning_rate": 3.025732992409166e-06, "loss": 0.4292, "step": 5301 }, { "epoch": 6.489596083231334, "grad_norm": 1.1445218149582992, "learning_rate": 3.0251064264871742e-06, "loss": 0.6907, "step": 5302 }, { "epoch": 6.490820073439412, "grad_norm": 1.392959995817116, "learning_rate": 3.02447982605713e-06, "loss": 1.5708, "step": 5303 }, { "epoch": 6.492044063647491, "grad_norm": 2.1209419159281726, "learning_rate": 3.023853191160211e-06, "loss": 0.6269, "step": 5304 }, { "epoch": 6.493268053855569, "grad_norm": 1.2171374034880065, "learning_rate": 3.0232265218375984e-06, "loss": 0.7547, "step": 5305 }, { "epoch": 6.494492044063647, "grad_norm": 1.819238163422663, "learning_rate": 3.0225998181304745e-06, "loss": 1.1295, "step": 5306 }, { "epoch": 6.495716034271726, "grad_norm": 1.048095177624131, "learning_rate": 3.021973080080023e-06, "loss": 0.4547, "step": 5307 }, { "epoch": 6.496940024479804, "grad_norm": 1.1315115631597263, "learning_rate": 3.021346307727431e-06, "loss": 0.5713, "step": 5308 }, { "epoch": 6.498164014687882, "grad_norm": 1.012220254352979, "learning_rate": 3.020719501113889e-06, "loss": 0.6, "step": 5309 }, { "epoch": 6.499388004895961, "grad_norm": 1.9915755567933502, "learning_rate": 3.020092660280586e-06, "loss": 0.4915, "step": 5310 }, { "epoch": 6.500611995104039, "grad_norm": 2.1145020769184795, "learning_rate": 3.0194657852687186e-06, "loss": 0.5523, "step": 5311 }, { "epoch": 6.501835985312118, "grad_norm": 1.8162380933981084, "learning_rate": 3.018838876119481e-06, "loss": 0.7393, "step": 5312 }, { "epoch": 6.503059975520196, "grad_norm": 1.7031480873185578, "learning_rate": 3.01821193287407e-06, "loss": 0.7408, "step": 5313 }, { "epoch": 6.504283965728274, "grad_norm": 1.3194847431773604, "learning_rate": 3.017584955573689e-06, "loss": 0.5861, "step": 5314 }, { "epoch": 6.505507955936353, "grad_norm": 1.4890310300084912, "learning_rate": 3.016957944259538e-06, "loss": 1.4255, "step": 5315 }, { "epoch": 6.506731946144431, "grad_norm": 2.450300828680658, "learning_rate": 3.016330898972824e-06, "loss": 0.5594, "step": 5316 }, { "epoch": 6.507955936352509, "grad_norm": 1.8329365707865262, "learning_rate": 3.0157038197547527e-06, "loss": 0.6442, "step": 5317 }, { "epoch": 6.509179926560588, "grad_norm": 2.2019053274982023, "learning_rate": 3.015076706646534e-06, "loss": 0.8209, "step": 5318 }, { "epoch": 6.510403916768666, "grad_norm": 2.2740686789385287, "learning_rate": 3.0144495596893787e-06, "loss": 0.4814, "step": 5319 }, { "epoch": 6.511627906976744, "grad_norm": 1.9957741448651014, "learning_rate": 3.013822378924502e-06, "loss": 0.5273, "step": 5320 }, { "epoch": 6.512851897184823, "grad_norm": 1.809584462562897, "learning_rate": 3.0131951643931178e-06, "loss": 1.2066, "step": 5321 }, { "epoch": 6.5140758873929006, "grad_norm": 0.9487198297794097, "learning_rate": 3.0125679161364467e-06, "loss": 0.384, "step": 5322 }, { "epoch": 6.515299877600979, "grad_norm": 2.178331614344408, "learning_rate": 3.011940634195708e-06, "loss": 0.8109, "step": 5323 }, { "epoch": 6.516523867809058, "grad_norm": 2.0595157558768777, "learning_rate": 3.011313318612124e-06, "loss": 0.428, "step": 5324 }, { "epoch": 6.5177478580171355, "grad_norm": 1.2859237099344467, "learning_rate": 3.01068596942692e-06, "loss": 0.4864, "step": 5325 }, { "epoch": 6.518971848225214, "grad_norm": 2.0920858119779036, "learning_rate": 3.010058586681323e-06, "loss": 0.348, "step": 5326 }, { "epoch": 6.520195838433293, "grad_norm": 1.3119470492294893, "learning_rate": 3.0094311704165625e-06, "loss": 1.3225, "step": 5327 }, { "epoch": 6.5214198286413705, "grad_norm": 1.2716640554776317, "learning_rate": 3.0088037206738683e-06, "loss": 1.1014, "step": 5328 }, { "epoch": 6.522643818849449, "grad_norm": 1.448519425169039, "learning_rate": 3.008176237494478e-06, "loss": 0.9268, "step": 5329 }, { "epoch": 6.523867809057528, "grad_norm": 1.6654712457980188, "learning_rate": 3.007548720919623e-06, "loss": 0.5026, "step": 5330 }, { "epoch": 6.525091799265606, "grad_norm": 1.4573903577094696, "learning_rate": 3.006921170990544e-06, "loss": 0.6987, "step": 5331 }, { "epoch": 6.526315789473684, "grad_norm": 1.8047713588873915, "learning_rate": 3.0062935877484807e-06, "loss": 1.1529, "step": 5332 }, { "epoch": 6.527539779681763, "grad_norm": 0.9497939722379454, "learning_rate": 3.0056659712346753e-06, "loss": 0.4607, "step": 5333 }, { "epoch": 6.52876376988984, "grad_norm": 1.4808702266401585, "learning_rate": 3.0050383214903724e-06, "loss": 0.9496, "step": 5334 }, { "epoch": 6.529987760097919, "grad_norm": 1.1631253939177064, "learning_rate": 3.0044106385568194e-06, "loss": 0.6246, "step": 5335 }, { "epoch": 6.531211750305998, "grad_norm": 2.614757104072889, "learning_rate": 3.003782922475265e-06, "loss": 0.4376, "step": 5336 }, { "epoch": 6.532435740514076, "grad_norm": 1.9817381634604467, "learning_rate": 3.0031551732869597e-06, "loss": 1.4363, "step": 5337 }, { "epoch": 6.533659730722154, "grad_norm": 1.4402302537811131, "learning_rate": 3.002527391033159e-06, "loss": 0.5944, "step": 5338 }, { "epoch": 6.534883720930233, "grad_norm": 2.55414225637835, "learning_rate": 3.0018995757551145e-06, "loss": 0.3378, "step": 5339 }, { "epoch": 6.536107711138311, "grad_norm": 2.0113164398195575, "learning_rate": 3.001271727494088e-06, "loss": 0.9859, "step": 5340 }, { "epoch": 6.537331701346389, "grad_norm": 1.8586821946551257, "learning_rate": 3.0006438462913367e-06, "loss": 0.5001, "step": 5341 }, { "epoch": 6.538555691554468, "grad_norm": 1.0419399346288811, "learning_rate": 3.0000159321881246e-06, "loss": 0.5146, "step": 5342 }, { "epoch": 6.539779681762546, "grad_norm": 0.8541209556563468, "learning_rate": 2.999387985225714e-06, "loss": 0.4513, "step": 5343 }, { "epoch": 6.541003671970624, "grad_norm": 1.472974372863977, "learning_rate": 2.9987600054453725e-06, "loss": 0.5924, "step": 5344 }, { "epoch": 6.5422276621787026, "grad_norm": 1.3102145201953603, "learning_rate": 2.998131992888368e-06, "loss": 1.228, "step": 5345 }, { "epoch": 6.543451652386781, "grad_norm": 1.979472870550857, "learning_rate": 2.9975039475959717e-06, "loss": 1.0113, "step": 5346 }, { "epoch": 6.544675642594859, "grad_norm": 1.4662642821153606, "learning_rate": 2.996875869609456e-06, "loss": 0.8402, "step": 5347 }, { "epoch": 6.5458996328029375, "grad_norm": 1.1574807823956437, "learning_rate": 2.9962477589700954e-06, "loss": 0.4689, "step": 5348 }, { "epoch": 6.547123623011016, "grad_norm": 1.442888307041627, "learning_rate": 2.9956196157191685e-06, "loss": 0.5196, "step": 5349 }, { "epoch": 6.548347613219094, "grad_norm": 1.675266947117676, "learning_rate": 2.994991439897953e-06, "loss": 0.4636, "step": 5350 }, { "epoch": 6.5495716034271725, "grad_norm": 1.2399756219703901, "learning_rate": 2.994363231547731e-06, "loss": 0.6642, "step": 5351 }, { "epoch": 6.550795593635251, "grad_norm": 1.5931312300036078, "learning_rate": 2.993734990709787e-06, "loss": 0.451, "step": 5352 }, { "epoch": 6.552019583843329, "grad_norm": 1.9418298761914672, "learning_rate": 2.993106717425404e-06, "loss": 0.5225, "step": 5353 }, { "epoch": 6.5532435740514074, "grad_norm": 1.2290302718264912, "learning_rate": 2.992478411735872e-06, "loss": 0.6807, "step": 5354 }, { "epoch": 6.554467564259486, "grad_norm": 1.9360700357022829, "learning_rate": 2.99185007368248e-06, "loss": 0.4497, "step": 5355 }, { "epoch": 6.555691554467565, "grad_norm": 1.4766149188297162, "learning_rate": 2.9912217033065212e-06, "loss": 1.0048, "step": 5356 }, { "epoch": 6.556915544675642, "grad_norm": 1.3618875964653336, "learning_rate": 2.990593300649289e-06, "loss": 0.51, "step": 5357 }, { "epoch": 6.558139534883721, "grad_norm": 1.5113570217936043, "learning_rate": 2.989964865752079e-06, "loss": 1.1506, "step": 5358 }, { "epoch": 6.5593635250918, "grad_norm": 1.1539356337650317, "learning_rate": 2.989336398656191e-06, "loss": 0.5519, "step": 5359 }, { "epoch": 6.560587515299877, "grad_norm": 1.1849486757467327, "learning_rate": 2.9887078994029232e-06, "loss": 0.4728, "step": 5360 }, { "epoch": 6.561811505507956, "grad_norm": 0.9549763239060722, "learning_rate": 2.988079368033582e-06, "loss": 0.593, "step": 5361 }, { "epoch": 6.563035495716035, "grad_norm": 0.9330135841833421, "learning_rate": 2.987450804589469e-06, "loss": 0.6079, "step": 5362 }, { "epoch": 6.564259485924112, "grad_norm": 0.8996704424454582, "learning_rate": 2.9868222091118913e-06, "loss": 0.5574, "step": 5363 }, { "epoch": 6.565483476132191, "grad_norm": 2.5175713480942887, "learning_rate": 2.9861935816421596e-06, "loss": 0.3545, "step": 5364 }, { "epoch": 6.56670746634027, "grad_norm": 1.4596859699325908, "learning_rate": 2.9855649222215837e-06, "loss": 1.1921, "step": 5365 }, { "epoch": 6.567931456548347, "grad_norm": 1.2311117282029895, "learning_rate": 2.9849362308914775e-06, "loss": 0.811, "step": 5366 }, { "epoch": 6.569155446756426, "grad_norm": 2.128249655309296, "learning_rate": 2.9843075076931556e-06, "loss": 0.5798, "step": 5367 }, { "epoch": 6.5703794369645045, "grad_norm": 1.6208935708575591, "learning_rate": 2.983678752667936e-06, "loss": 1.4796, "step": 5368 }, { "epoch": 6.571603427172582, "grad_norm": 1.674346233722502, "learning_rate": 2.9830499658571367e-06, "loss": 1.1967, "step": 5369 }, { "epoch": 6.572827417380661, "grad_norm": 1.708150051478475, "learning_rate": 2.9824211473020814e-06, "loss": 0.4174, "step": 5370 }, { "epoch": 6.5740514075887395, "grad_norm": 0.9698889239524006, "learning_rate": 2.9817922970440918e-06, "loss": 0.6297, "step": 5371 }, { "epoch": 6.575275397796817, "grad_norm": 1.3168192878465668, "learning_rate": 2.981163415124495e-06, "loss": 0.9485, "step": 5372 }, { "epoch": 6.576499388004896, "grad_norm": 1.766797973874093, "learning_rate": 2.980534501584618e-06, "loss": 0.691, "step": 5373 }, { "epoch": 6.5777233782129745, "grad_norm": 2.5676215612630258, "learning_rate": 2.9799055564657913e-06, "loss": 0.5803, "step": 5374 }, { "epoch": 6.578947368421053, "grad_norm": 1.799742307125125, "learning_rate": 2.9792765798093466e-06, "loss": 0.8704, "step": 5375 }, { "epoch": 6.580171358629131, "grad_norm": 1.57040681171445, "learning_rate": 2.978647571656618e-06, "loss": 1.0047, "step": 5376 }, { "epoch": 6.5813953488372094, "grad_norm": 1.1376203627100405, "learning_rate": 2.9780185320489403e-06, "loss": 0.5668, "step": 5377 }, { "epoch": 6.582619339045287, "grad_norm": 1.2464884743093998, "learning_rate": 2.9773894610276525e-06, "loss": 1.0181, "step": 5378 }, { "epoch": 6.583843329253366, "grad_norm": 1.7258870523487413, "learning_rate": 2.976760358634097e-06, "loss": 0.5147, "step": 5379 }, { "epoch": 6.585067319461444, "grad_norm": 1.5516825602924413, "learning_rate": 2.9761312249096125e-06, "loss": 0.4609, "step": 5380 }, { "epoch": 6.586291309669523, "grad_norm": 1.3933203646536483, "learning_rate": 2.975502059895545e-06, "loss": 0.507, "step": 5381 }, { "epoch": 6.587515299877601, "grad_norm": 2.1565653103212528, "learning_rate": 2.974872863633242e-06, "loss": 0.3933, "step": 5382 }, { "epoch": 6.588739290085679, "grad_norm": 1.6922789991287492, "learning_rate": 2.9742436361640505e-06, "loss": 0.755, "step": 5383 }, { "epoch": 6.589963280293758, "grad_norm": 2.1989318064180186, "learning_rate": 2.9736143775293204e-06, "loss": 0.4292, "step": 5384 }, { "epoch": 6.591187270501836, "grad_norm": 1.0230367398926992, "learning_rate": 2.9729850877704065e-06, "loss": 0.4952, "step": 5385 }, { "epoch": 6.592411260709914, "grad_norm": 1.6944706976115564, "learning_rate": 2.9723557669286606e-06, "loss": 0.7839, "step": 5386 }, { "epoch": 6.593635250917993, "grad_norm": 1.2709389015815304, "learning_rate": 2.9717264150454417e-06, "loss": 0.5421, "step": 5387 }, { "epoch": 6.594859241126071, "grad_norm": 0.962302890329117, "learning_rate": 2.9710970321621086e-06, "loss": 0.51, "step": 5388 }, { "epoch": 6.596083231334149, "grad_norm": 1.2516869582831065, "learning_rate": 2.9704676183200195e-06, "loss": 0.5824, "step": 5389 }, { "epoch": 6.597307221542228, "grad_norm": 1.3463624762072195, "learning_rate": 2.969838173560539e-06, "loss": 0.5703, "step": 5390 }, { "epoch": 6.598531211750306, "grad_norm": 1.929397164305181, "learning_rate": 2.969208697925032e-06, "loss": 0.312, "step": 5391 }, { "epoch": 6.599755201958384, "grad_norm": 1.302340256621175, "learning_rate": 2.9685791914548646e-06, "loss": 0.6277, "step": 5392 }, { "epoch": 6.600979192166463, "grad_norm": 1.561087363635551, "learning_rate": 2.967949654191405e-06, "loss": 0.5678, "step": 5393 }, { "epoch": 6.602203182374541, "grad_norm": 1.6631831684142577, "learning_rate": 2.9673200861760263e-06, "loss": 1.0699, "step": 5394 }, { "epoch": 6.603427172582619, "grad_norm": 1.0231653977924138, "learning_rate": 2.966690487450099e-06, "loss": 0.5213, "step": 5395 }, { "epoch": 6.604651162790698, "grad_norm": 1.8717213264601191, "learning_rate": 2.966060858055e-06, "loss": 0.6283, "step": 5396 }, { "epoch": 6.605875152998776, "grad_norm": 0.8614544811778199, "learning_rate": 2.965431198032105e-06, "loss": 0.4539, "step": 5397 }, { "epoch": 6.607099143206854, "grad_norm": 1.9712625975197968, "learning_rate": 2.9648015074227927e-06, "loss": 0.6961, "step": 5398 }, { "epoch": 6.608323133414933, "grad_norm": 1.388435046797862, "learning_rate": 2.964171786268445e-06, "loss": 0.4058, "step": 5399 }, { "epoch": 6.6095471236230114, "grad_norm": 0.965417792779119, "learning_rate": 2.963542034610444e-06, "loss": 0.3219, "step": 5400 }, { "epoch": 6.610771113831089, "grad_norm": 0.9941354366362283, "learning_rate": 2.9629122524901754e-06, "loss": 0.6261, "step": 5401 }, { "epoch": 6.611995104039168, "grad_norm": 2.5131805294771694, "learning_rate": 2.9622824399490254e-06, "loss": 0.479, "step": 5402 }, { "epoch": 6.613219094247246, "grad_norm": 0.979355777309658, "learning_rate": 2.961652597028383e-06, "loss": 0.6467, "step": 5403 }, { "epoch": 6.614443084455324, "grad_norm": 0.9171329090352748, "learning_rate": 2.96102272376964e-06, "loss": 0.3764, "step": 5404 }, { "epoch": 6.615667074663403, "grad_norm": 1.8806884543596591, "learning_rate": 2.9603928202141887e-06, "loss": 1.0377, "step": 5405 }, { "epoch": 6.616891064871481, "grad_norm": 1.0862109727409912, "learning_rate": 2.959762886403424e-06, "loss": 0.4916, "step": 5406 }, { "epoch": 6.618115055079559, "grad_norm": 1.475439347861243, "learning_rate": 2.9591329223787435e-06, "loss": 0.4704, "step": 5407 }, { "epoch": 6.619339045287638, "grad_norm": 1.215879909100252, "learning_rate": 2.9585029281815447e-06, "loss": 0.7927, "step": 5408 }, { "epoch": 6.620563035495716, "grad_norm": 1.3517423440951264, "learning_rate": 2.95787290385323e-06, "loss": 0.6374, "step": 5409 }, { "epoch": 6.621787025703794, "grad_norm": 1.22669424337683, "learning_rate": 2.9572428494352006e-06, "loss": 0.4945, "step": 5410 }, { "epoch": 6.623011015911873, "grad_norm": 1.5821447657905434, "learning_rate": 2.956612764968864e-06, "loss": 0.6294, "step": 5411 }, { "epoch": 6.624235006119951, "grad_norm": 0.9667091857763446, "learning_rate": 2.9559826504956245e-06, "loss": 0.4839, "step": 5412 }, { "epoch": 6.625458996328029, "grad_norm": 1.114336665359784, "learning_rate": 2.955352506056892e-06, "loss": 0.6024, "step": 5413 }, { "epoch": 6.626682986536108, "grad_norm": 1.7770633845178825, "learning_rate": 2.9547223316940767e-06, "loss": 1.0936, "step": 5414 }, { "epoch": 6.627906976744186, "grad_norm": 1.182393811289438, "learning_rate": 2.9540921274485913e-06, "loss": 0.4462, "step": 5415 }, { "epoch": 6.629130966952264, "grad_norm": 1.539929677107794, "learning_rate": 2.9534618933618515e-06, "loss": 1.0174, "step": 5416 }, { "epoch": 6.630354957160343, "grad_norm": 2.206598380218775, "learning_rate": 2.9528316294752733e-06, "loss": 0.4148, "step": 5417 }, { "epoch": 6.631578947368421, "grad_norm": 2.196446122544651, "learning_rate": 2.9522013358302754e-06, "loss": 0.4477, "step": 5418 }, { "epoch": 6.6328029375765, "grad_norm": 1.8004781787572024, "learning_rate": 2.9515710124682772e-06, "loss": 0.3281, "step": 5419 }, { "epoch": 6.634026927784578, "grad_norm": 1.0810513485652549, "learning_rate": 2.9509406594307037e-06, "loss": 0.6619, "step": 5420 }, { "epoch": 6.635250917992656, "grad_norm": 2.0830999321247186, "learning_rate": 2.950310276758977e-06, "loss": 0.5519, "step": 5421 }, { "epoch": 6.636474908200734, "grad_norm": 2.65180981563236, "learning_rate": 2.949679864494525e-06, "loss": 0.3446, "step": 5422 }, { "epoch": 6.6376988984088126, "grad_norm": 2.2037671147687354, "learning_rate": 2.9490494226787758e-06, "loss": 0.886, "step": 5423 }, { "epoch": 6.638922888616891, "grad_norm": 1.9558876856273422, "learning_rate": 2.948418951353159e-06, "loss": 0.6177, "step": 5424 }, { "epoch": 6.64014687882497, "grad_norm": 1.6503930476376285, "learning_rate": 2.9477884505591076e-06, "loss": 1.2335, "step": 5425 }, { "epoch": 6.6413708690330475, "grad_norm": 1.144128551623011, "learning_rate": 2.9471579203380552e-06, "loss": 0.5402, "step": 5426 }, { "epoch": 6.642594859241126, "grad_norm": 1.2984433590828688, "learning_rate": 2.9465273607314386e-06, "loss": 0.642, "step": 5427 }, { "epoch": 6.643818849449205, "grad_norm": 1.9452229930110212, "learning_rate": 2.9458967717806945e-06, "loss": 0.485, "step": 5428 }, { "epoch": 6.6450428396572825, "grad_norm": 1.4287439972317089, "learning_rate": 2.9452661535272652e-06, "loss": 1.2445, "step": 5429 }, { "epoch": 6.646266829865361, "grad_norm": 1.4182890500517595, "learning_rate": 2.9446355060125903e-06, "loss": 1.64, "step": 5430 }, { "epoch": 6.64749082007344, "grad_norm": 2.6400337948234105, "learning_rate": 2.944004829278115e-06, "loss": 0.6207, "step": 5431 }, { "epoch": 6.6487148102815175, "grad_norm": 2.6956186339329804, "learning_rate": 2.943374123365284e-06, "loss": 0.4149, "step": 5432 }, { "epoch": 6.649938800489596, "grad_norm": 1.7763645340940566, "learning_rate": 2.942743388315546e-06, "loss": 0.4567, "step": 5433 }, { "epoch": 6.651162790697675, "grad_norm": 1.510037479639602, "learning_rate": 2.9421126241703496e-06, "loss": 1.0287, "step": 5434 }, { "epoch": 6.652386780905752, "grad_norm": 1.6801624841069247, "learning_rate": 2.9414818309711475e-06, "loss": 1.3111, "step": 5435 }, { "epoch": 6.653610771113831, "grad_norm": 2.0088029003878773, "learning_rate": 2.9408510087593915e-06, "loss": 0.4876, "step": 5436 }, { "epoch": 6.65483476132191, "grad_norm": 2.337474247938543, "learning_rate": 2.940220157576539e-06, "loss": 0.5387, "step": 5437 }, { "epoch": 6.656058751529987, "grad_norm": 1.9002422291282646, "learning_rate": 2.9395892774640455e-06, "loss": 1.0392, "step": 5438 }, { "epoch": 6.657282741738066, "grad_norm": 1.5662421120782486, "learning_rate": 2.9389583684633697e-06, "loss": 0.3726, "step": 5439 }, { "epoch": 6.658506731946145, "grad_norm": 1.6144778880007884, "learning_rate": 2.9383274306159747e-06, "loss": 0.5079, "step": 5440 }, { "epoch": 6.659730722154222, "grad_norm": 1.538253191851325, "learning_rate": 2.937696463963322e-06, "loss": 0.7277, "step": 5441 }, { "epoch": 6.660954712362301, "grad_norm": 1.2412801331283339, "learning_rate": 2.9370654685468766e-06, "loss": 0.611, "step": 5442 }, { "epoch": 6.66217870257038, "grad_norm": 0.9867519347586725, "learning_rate": 2.9364344444081043e-06, "loss": 0.5191, "step": 5443 }, { "epoch": 6.663402692778458, "grad_norm": 2.5559085656684815, "learning_rate": 2.9358033915884758e-06, "loss": 0.5622, "step": 5444 }, { "epoch": 6.664626682986536, "grad_norm": 2.495609696901564, "learning_rate": 2.9351723101294593e-06, "loss": 0.7681, "step": 5445 }, { "epoch": 6.6658506731946146, "grad_norm": 0.998768139159904, "learning_rate": 2.9345412000725293e-06, "loss": 0.5752, "step": 5446 }, { "epoch": 6.667074663402692, "grad_norm": 1.4598716408089027, "learning_rate": 2.9339100614591583e-06, "loss": 0.5515, "step": 5447 }, { "epoch": 6.668298653610771, "grad_norm": 1.843817307650144, "learning_rate": 2.9332788943308234e-06, "loss": 0.8334, "step": 5448 }, { "epoch": 6.6695226438188495, "grad_norm": 1.3053693899756624, "learning_rate": 2.9326476987290016e-06, "loss": 0.8387, "step": 5449 }, { "epoch": 6.670746634026928, "grad_norm": 1.7916472394091607, "learning_rate": 2.9320164746951734e-06, "loss": 1.3389, "step": 5450 }, { "epoch": 6.671970624235006, "grad_norm": 1.9494926775431656, "learning_rate": 2.9313852222708205e-06, "loss": 0.3525, "step": 5451 }, { "epoch": 6.6731946144430845, "grad_norm": 1.4250091911130924, "learning_rate": 2.930753941497426e-06, "loss": 0.4398, "step": 5452 }, { "epoch": 6.674418604651163, "grad_norm": 1.8861857789345184, "learning_rate": 2.930122632416476e-06, "loss": 0.5356, "step": 5453 }, { "epoch": 6.675642594859241, "grad_norm": 1.6478732289424214, "learning_rate": 2.9294912950694575e-06, "loss": 0.7073, "step": 5454 }, { "epoch": 6.6768665850673194, "grad_norm": 1.055885576391337, "learning_rate": 2.92885992949786e-06, "loss": 0.5498, "step": 5455 }, { "epoch": 6.678090575275398, "grad_norm": 1.2651068193868458, "learning_rate": 2.928228535743174e-06, "loss": 0.6309, "step": 5456 }, { "epoch": 6.679314565483476, "grad_norm": 1.0453699558789769, "learning_rate": 2.927597113846892e-06, "loss": 0.6704, "step": 5457 }, { "epoch": 6.680538555691554, "grad_norm": 1.544603762440099, "learning_rate": 2.9269656638505093e-06, "loss": 1.2118, "step": 5458 }, { "epoch": 6.681762545899633, "grad_norm": 1.3353120729939156, "learning_rate": 2.9263341857955237e-06, "loss": 0.4868, "step": 5459 }, { "epoch": 6.682986536107711, "grad_norm": 1.0203838379611825, "learning_rate": 2.9257026797234304e-06, "loss": 0.6861, "step": 5460 }, { "epoch": 6.684210526315789, "grad_norm": 1.1820878153630456, "learning_rate": 2.9250711456757332e-06, "loss": 0.5495, "step": 5461 }, { "epoch": 6.685434516523868, "grad_norm": 1.8551744844188174, "learning_rate": 2.9244395836939313e-06, "loss": 1.0906, "step": 5462 }, { "epoch": 6.686658506731947, "grad_norm": 1.0380606894740105, "learning_rate": 2.9238079938195306e-06, "loss": 0.5433, "step": 5463 }, { "epoch": 6.687882496940024, "grad_norm": 2.800254243011946, "learning_rate": 2.9231763760940364e-06, "loss": 0.7159, "step": 5464 }, { "epoch": 6.689106487148103, "grad_norm": 1.3471435714340598, "learning_rate": 2.9225447305589556e-06, "loss": 0.4845, "step": 5465 }, { "epoch": 6.690330477356181, "grad_norm": 2.2695246259851585, "learning_rate": 2.9219130572557973e-06, "loss": 0.3888, "step": 5466 }, { "epoch": 6.691554467564259, "grad_norm": 2.32404647565442, "learning_rate": 2.9212813562260744e-06, "loss": 0.5974, "step": 5467 }, { "epoch": 6.692778457772338, "grad_norm": 2.3276412691911976, "learning_rate": 2.920649627511299e-06, "loss": 0.3857, "step": 5468 }, { "epoch": 6.6940024479804165, "grad_norm": 1.466960140415234, "learning_rate": 2.920017871152985e-06, "loss": 1.4148, "step": 5469 }, { "epoch": 6.695226438188494, "grad_norm": 2.3949770999029276, "learning_rate": 2.919386087192651e-06, "loss": 0.4816, "step": 5470 }, { "epoch": 6.696450428396573, "grad_norm": 1.0715180229634735, "learning_rate": 2.9187542756718133e-06, "loss": 0.4676, "step": 5471 }, { "epoch": 6.6976744186046515, "grad_norm": 1.362905469831542, "learning_rate": 2.9181224366319947e-06, "loss": 0.7628, "step": 5472 }, { "epoch": 6.698898408812729, "grad_norm": 1.4433289273395071, "learning_rate": 2.917490570114716e-06, "loss": 0.6354, "step": 5473 }, { "epoch": 6.700122399020808, "grad_norm": 1.5215173231764259, "learning_rate": 2.916858676161501e-06, "loss": 0.491, "step": 5474 }, { "epoch": 6.7013463892288865, "grad_norm": 1.3187886904363915, "learning_rate": 2.916226754813876e-06, "loss": 1.2755, "step": 5475 }, { "epoch": 6.702570379436964, "grad_norm": 1.0213097910209732, "learning_rate": 2.915594806113368e-06, "loss": 0.7594, "step": 5476 }, { "epoch": 6.703794369645043, "grad_norm": 2.431051679861424, "learning_rate": 2.914962830101506e-06, "loss": 0.4393, "step": 5477 }, { "epoch": 6.7050183598531214, "grad_norm": 1.3683341266244353, "learning_rate": 2.914330826819821e-06, "loss": 0.4751, "step": 5478 }, { "epoch": 6.706242350061199, "grad_norm": 1.292614744828594, "learning_rate": 2.9136987963098485e-06, "loss": 0.7983, "step": 5479 }, { "epoch": 6.707466340269278, "grad_norm": 0.9952475928764565, "learning_rate": 2.9130667386131197e-06, "loss": 0.4919, "step": 5480 }, { "epoch": 6.708690330477356, "grad_norm": 0.9581135126661421, "learning_rate": 2.9124346537711736e-06, "loss": 0.8551, "step": 5481 }, { "epoch": 6.709914320685434, "grad_norm": 1.342252398961635, "learning_rate": 2.9118025418255467e-06, "loss": 0.4827, "step": 5482 }, { "epoch": 6.711138310893513, "grad_norm": 1.2986636058898233, "learning_rate": 2.9111704028177813e-06, "loss": 0.4674, "step": 5483 }, { "epoch": 6.712362301101591, "grad_norm": 2.3285673994736853, "learning_rate": 2.9105382367894158e-06, "loss": 0.4935, "step": 5484 }, { "epoch": 6.713586291309669, "grad_norm": 1.4881997250883878, "learning_rate": 2.909906043781998e-06, "loss": 1.1776, "step": 5485 }, { "epoch": 6.714810281517748, "grad_norm": 2.01784293684291, "learning_rate": 2.9092738238370696e-06, "loss": 0.7607, "step": 5486 }, { "epoch": 6.716034271725826, "grad_norm": 1.3899049304915057, "learning_rate": 2.9086415769961802e-06, "loss": 0.489, "step": 5487 }, { "epoch": 6.717258261933905, "grad_norm": 1.7038797248660045, "learning_rate": 2.908009303300878e-06, "loss": 1.3344, "step": 5488 }, { "epoch": 6.718482252141983, "grad_norm": 1.2390325890335105, "learning_rate": 2.9073770027927124e-06, "loss": 0.6985, "step": 5489 }, { "epoch": 6.719706242350061, "grad_norm": 1.2942152571252798, "learning_rate": 2.906744675513237e-06, "loss": 0.9298, "step": 5490 }, { "epoch": 6.720930232558139, "grad_norm": 1.3993578994827387, "learning_rate": 2.906112321504007e-06, "loss": 0.4527, "step": 5491 }, { "epoch": 6.722154222766218, "grad_norm": 1.4792718585314104, "learning_rate": 2.905479940806577e-06, "loss": 1.3775, "step": 5492 }, { "epoch": 6.723378212974296, "grad_norm": 1.137744589809637, "learning_rate": 2.904847533462504e-06, "loss": 0.4232, "step": 5493 }, { "epoch": 6.724602203182375, "grad_norm": 1.282985535639114, "learning_rate": 2.9042150995133506e-06, "loss": 0.553, "step": 5494 }, { "epoch": 6.725826193390453, "grad_norm": 1.3543811744864525, "learning_rate": 2.9035826390006737e-06, "loss": 0.9521, "step": 5495 }, { "epoch": 6.727050183598531, "grad_norm": 0.9516628616008946, "learning_rate": 2.9029501519660403e-06, "loss": 0.5791, "step": 5496 }, { "epoch": 6.72827417380661, "grad_norm": 1.660470956203183, "learning_rate": 2.902317638451012e-06, "loss": 0.5148, "step": 5497 }, { "epoch": 6.729498164014688, "grad_norm": 1.7630791995749566, "learning_rate": 2.901685098497158e-06, "loss": 0.5349, "step": 5498 }, { "epoch": 6.730722154222766, "grad_norm": 1.492037638767875, "learning_rate": 2.9010525321460444e-06, "loss": 0.5163, "step": 5499 }, { "epoch": 6.731946144430845, "grad_norm": 1.1929301688068106, "learning_rate": 2.9004199394392414e-06, "loss": 0.551, "step": 5500 }, { "epoch": 6.7331701346389226, "grad_norm": 1.9769071325880443, "learning_rate": 2.8997873204183215e-06, "loss": 0.9174, "step": 5501 }, { "epoch": 6.734394124847001, "grad_norm": 2.4633897698757554, "learning_rate": 2.8991546751248566e-06, "loss": 0.8526, "step": 5502 }, { "epoch": 6.73561811505508, "grad_norm": 1.2250614516993776, "learning_rate": 2.898522003600424e-06, "loss": 0.8115, "step": 5503 }, { "epoch": 6.7368421052631575, "grad_norm": 1.113889784809873, "learning_rate": 2.8978893058865986e-06, "loss": 0.4103, "step": 5504 }, { "epoch": 6.738066095471236, "grad_norm": 1.0669494947258493, "learning_rate": 2.89725658202496e-06, "loss": 0.609, "step": 5505 }, { "epoch": 6.739290085679315, "grad_norm": 1.6488454308015879, "learning_rate": 2.8966238320570883e-06, "loss": 0.6513, "step": 5506 }, { "epoch": 6.740514075887393, "grad_norm": 1.2775021558575208, "learning_rate": 2.8959910560245653e-06, "loss": 0.2372, "step": 5507 }, { "epoch": 6.741738066095471, "grad_norm": 1.3807083664211375, "learning_rate": 2.895358253968975e-06, "loss": 0.6266, "step": 5508 }, { "epoch": 6.74296205630355, "grad_norm": 1.5308071223081277, "learning_rate": 2.8947254259319025e-06, "loss": 0.438, "step": 5509 }, { "epoch": 6.7441860465116275, "grad_norm": 1.8845323303160175, "learning_rate": 2.894092571954934e-06, "loss": 0.4424, "step": 5510 }, { "epoch": 6.745410036719706, "grad_norm": 2.1992188258494703, "learning_rate": 2.893459692079661e-06, "loss": 0.9594, "step": 5511 }, { "epoch": 6.746634026927785, "grad_norm": 1.7386594934876363, "learning_rate": 2.892826786347671e-06, "loss": 0.4589, "step": 5512 }, { "epoch": 6.747858017135863, "grad_norm": 1.4867070782167986, "learning_rate": 2.892193854800558e-06, "loss": 1.1218, "step": 5513 }, { "epoch": 6.749082007343941, "grad_norm": 1.571551289082583, "learning_rate": 2.891560897479916e-06, "loss": 0.5037, "step": 5514 }, { "epoch": 6.75030599755202, "grad_norm": 1.7238627135306095, "learning_rate": 2.8909279144273395e-06, "loss": 1.0216, "step": 5515 }, { "epoch": 6.751529987760098, "grad_norm": 1.4780157052148044, "learning_rate": 2.8902949056844265e-06, "loss": 1.558, "step": 5516 }, { "epoch": 6.752753977968176, "grad_norm": 1.0489637450628029, "learning_rate": 2.889661871292776e-06, "loss": 0.6966, "step": 5517 }, { "epoch": 6.753977968176255, "grad_norm": 2.694992085034109, "learning_rate": 2.889028811293989e-06, "loss": 0.3871, "step": 5518 }, { "epoch": 6.755201958384333, "grad_norm": 1.5168854147992572, "learning_rate": 2.888395725729666e-06, "loss": 1.4027, "step": 5519 }, { "epoch": 6.756425948592411, "grad_norm": 1.30220338816623, "learning_rate": 2.8877626146414147e-06, "loss": 0.7329, "step": 5520 }, { "epoch": 6.75764993880049, "grad_norm": 1.5936273219399018, "learning_rate": 2.8871294780708364e-06, "loss": 0.5519, "step": 5521 }, { "epoch": 6.758873929008568, "grad_norm": 1.9881903207933318, "learning_rate": 2.8864963160595418e-06, "loss": 0.395, "step": 5522 }, { "epoch": 6.760097919216646, "grad_norm": 2.3256700234874628, "learning_rate": 2.8858631286491388e-06, "loss": 0.6122, "step": 5523 }, { "epoch": 6.7613219094247246, "grad_norm": 1.5894906512465472, "learning_rate": 2.885229915881238e-06, "loss": 0.4179, "step": 5524 }, { "epoch": 6.762545899632803, "grad_norm": 1.140624848690117, "learning_rate": 2.8845966777974514e-06, "loss": 0.6463, "step": 5525 }, { "epoch": 6.763769889840881, "grad_norm": 1.292258199720588, "learning_rate": 2.8839634144393953e-06, "loss": 0.5709, "step": 5526 }, { "epoch": 6.7649938800489595, "grad_norm": 1.1726544701905492, "learning_rate": 2.883330125848683e-06, "loss": 0.6946, "step": 5527 }, { "epoch": 6.766217870257038, "grad_norm": 2.101987659938037, "learning_rate": 2.882696812066932e-06, "loss": 1.0414, "step": 5528 }, { "epoch": 6.767441860465116, "grad_norm": 1.3925568247915026, "learning_rate": 2.882063473135763e-06, "loss": 0.5688, "step": 5529 }, { "epoch": 6.7686658506731945, "grad_norm": 2.8711317697730587, "learning_rate": 2.881430109096795e-06, "loss": 0.3734, "step": 5530 }, { "epoch": 6.769889840881273, "grad_norm": 2.3068818995491998, "learning_rate": 2.8807967199916508e-06, "loss": 0.7609, "step": 5531 }, { "epoch": 6.771113831089352, "grad_norm": 2.2119229857763325, "learning_rate": 2.8801633058619553e-06, "loss": 0.4917, "step": 5532 }, { "epoch": 6.7723378212974294, "grad_norm": 1.3587437601770174, "learning_rate": 2.879529866749334e-06, "loss": 0.3765, "step": 5533 }, { "epoch": 6.773561811505508, "grad_norm": 1.6662838264223658, "learning_rate": 2.878896402695412e-06, "loss": 1.1288, "step": 5534 }, { "epoch": 6.774785801713586, "grad_norm": 1.5381589647662035, "learning_rate": 2.878262913741822e-06, "loss": 0.3143, "step": 5535 }, { "epoch": 6.776009791921664, "grad_norm": 1.2113176917642094, "learning_rate": 2.87762939993019e-06, "loss": 0.7092, "step": 5536 }, { "epoch": 6.777233782129743, "grad_norm": 1.6666538096186447, "learning_rate": 2.876995861302152e-06, "loss": 0.6003, "step": 5537 }, { "epoch": 6.778457772337822, "grad_norm": 1.9984116802527854, "learning_rate": 2.87636229789934e-06, "loss": 0.9455, "step": 5538 }, { "epoch": 6.779681762545899, "grad_norm": 2.042764625334225, "learning_rate": 2.8757287097633903e-06, "loss": 0.5823, "step": 5539 }, { "epoch": 6.780905752753978, "grad_norm": 1.5944089415693725, "learning_rate": 2.8750950969359394e-06, "loss": 0.5314, "step": 5540 }, { "epoch": 6.782129742962057, "grad_norm": 0.909134969934406, "learning_rate": 2.874461459458626e-06, "loss": 0.5253, "step": 5541 }, { "epoch": 6.783353733170134, "grad_norm": 3.104198609857638, "learning_rate": 2.8738277973730903e-06, "loss": 0.4548, "step": 5542 }, { "epoch": 6.784577723378213, "grad_norm": 1.3730505978422332, "learning_rate": 2.8731941107209744e-06, "loss": 0.597, "step": 5543 }, { "epoch": 6.785801713586292, "grad_norm": 1.6106426423431808, "learning_rate": 2.872560399543922e-06, "loss": 1.5078, "step": 5544 }, { "epoch": 6.787025703794369, "grad_norm": 2.2294096730536332, "learning_rate": 2.8719266638835784e-06, "loss": 0.4211, "step": 5545 }, { "epoch": 6.788249694002448, "grad_norm": 2.5661905201238944, "learning_rate": 2.87129290378159e-06, "loss": 0.4292, "step": 5546 }, { "epoch": 6.7894736842105265, "grad_norm": 1.5694390956300333, "learning_rate": 2.870659119279605e-06, "loss": 0.9499, "step": 5547 }, { "epoch": 6.790697674418604, "grad_norm": 1.762397592582257, "learning_rate": 2.8700253104192743e-06, "loss": 0.6753, "step": 5548 }, { "epoch": 6.791921664626683, "grad_norm": 1.2192227787029608, "learning_rate": 2.8693914772422484e-06, "loss": 0.4167, "step": 5549 }, { "epoch": 6.7931456548347615, "grad_norm": 2.0348021446065467, "learning_rate": 2.868757619790181e-06, "loss": 0.4782, "step": 5550 }, { "epoch": 6.79436964504284, "grad_norm": 1.766855908897202, "learning_rate": 2.8681237381047268e-06, "loss": 0.5736, "step": 5551 }, { "epoch": 6.795593635250918, "grad_norm": 1.8174082994667973, "learning_rate": 2.8674898322275415e-06, "loss": 0.5579, "step": 5552 }, { "epoch": 6.7968176254589965, "grad_norm": 1.473994682999887, "learning_rate": 2.8668559022002847e-06, "loss": 1.176, "step": 5553 }, { "epoch": 6.798041615667074, "grad_norm": 1.2405617184968123, "learning_rate": 2.866221948064614e-06, "loss": 1.132, "step": 5554 }, { "epoch": 6.799265605875153, "grad_norm": 1.8038109729808949, "learning_rate": 2.865587969862192e-06, "loss": 0.4156, "step": 5555 }, { "epoch": 6.8004895960832314, "grad_norm": 1.2807488800389606, "learning_rate": 2.8649539676346806e-06, "loss": 1.0399, "step": 5556 }, { "epoch": 6.80171358629131, "grad_norm": 2.157830705854145, "learning_rate": 2.8643199414237444e-06, "loss": 0.5333, "step": 5557 }, { "epoch": 6.802937576499388, "grad_norm": 1.5374685861606177, "learning_rate": 2.8636858912710492e-06, "loss": 0.4798, "step": 5558 }, { "epoch": 6.804161566707466, "grad_norm": 1.7072757834044476, "learning_rate": 2.8630518172182624e-06, "loss": 0.5616, "step": 5559 }, { "epoch": 6.805385556915545, "grad_norm": 1.763419050909636, "learning_rate": 2.8624177193070527e-06, "loss": 0.5572, "step": 5560 }, { "epoch": 6.806609547123623, "grad_norm": 2.2086367623723335, "learning_rate": 2.8617835975790914e-06, "loss": 0.4924, "step": 5561 }, { "epoch": 6.807833537331701, "grad_norm": 1.6581662187082307, "learning_rate": 2.8611494520760503e-06, "loss": 0.5681, "step": 5562 }, { "epoch": 6.80905752753978, "grad_norm": 1.2912542498895163, "learning_rate": 2.8605152828396025e-06, "loss": 0.6669, "step": 5563 }, { "epoch": 6.810281517747858, "grad_norm": 1.2773360113086716, "learning_rate": 2.8598810899114242e-06, "loss": 1.2456, "step": 5564 }, { "epoch": 6.811505507955936, "grad_norm": 1.7239189412695697, "learning_rate": 2.859246873333192e-06, "loss": 1.0573, "step": 5565 }, { "epoch": 6.812729498164015, "grad_norm": 1.409111801451644, "learning_rate": 2.8586126331465837e-06, "loss": 1.1864, "step": 5566 }, { "epoch": 6.813953488372093, "grad_norm": 2.4140424828853235, "learning_rate": 2.8579783693932793e-06, "loss": 0.4393, "step": 5567 }, { "epoch": 6.815177478580171, "grad_norm": 1.7893217408104554, "learning_rate": 2.857344082114961e-06, "loss": 1.1748, "step": 5568 }, { "epoch": 6.81640146878825, "grad_norm": 1.4942723152045594, "learning_rate": 2.856709771353311e-06, "loss": 0.5438, "step": 5569 }, { "epoch": 6.817625458996328, "grad_norm": 1.6136583237999798, "learning_rate": 2.8560754371500144e-06, "loss": 1.0412, "step": 5570 }, { "epoch": 6.818849449204406, "grad_norm": 1.0855419495397352, "learning_rate": 2.855441079546757e-06, "loss": 0.533, "step": 5571 }, { "epoch": 6.820073439412485, "grad_norm": 1.1226668682009042, "learning_rate": 2.854806698585226e-06, "loss": 0.775, "step": 5572 }, { "epoch": 6.821297429620563, "grad_norm": 1.7615917203309266, "learning_rate": 2.854172294307112e-06, "loss": 0.6564, "step": 5573 }, { "epoch": 6.822521419828641, "grad_norm": 1.9358701893859358, "learning_rate": 2.8535378667541048e-06, "loss": 0.6999, "step": 5574 }, { "epoch": 6.82374541003672, "grad_norm": 0.9561991426415091, "learning_rate": 2.8529034159678957e-06, "loss": 0.5384, "step": 5575 }, { "epoch": 6.8249694002447985, "grad_norm": 1.222447857645459, "learning_rate": 2.8522689419901808e-06, "loss": 0.6322, "step": 5576 }, { "epoch": 6.826193390452876, "grad_norm": 1.3817302873636137, "learning_rate": 2.851634444862653e-06, "loss": 0.5453, "step": 5577 }, { "epoch": 6.827417380660955, "grad_norm": 1.5789549227460191, "learning_rate": 2.85099992462701e-06, "loss": 0.3586, "step": 5578 }, { "epoch": 6.828641370869033, "grad_norm": 2.151668502742587, "learning_rate": 2.850365381324951e-06, "loss": 0.2601, "step": 5579 }, { "epoch": 6.829865361077111, "grad_norm": 1.1444473203719696, "learning_rate": 2.8497308149981743e-06, "loss": 0.3236, "step": 5580 }, { "epoch": 6.83108935128519, "grad_norm": 1.411328254441265, "learning_rate": 2.8490962256883825e-06, "loss": 0.3274, "step": 5581 }, { "epoch": 6.832313341493268, "grad_norm": 1.7457198940619485, "learning_rate": 2.8484616134372777e-06, "loss": 0.4341, "step": 5582 }, { "epoch": 6.833537331701346, "grad_norm": 1.5333463904461255, "learning_rate": 2.8478269782865647e-06, "loss": 1.8214, "step": 5583 }, { "epoch": 6.834761321909425, "grad_norm": 1.4720810898924692, "learning_rate": 2.8471923202779485e-06, "loss": 1.4728, "step": 5584 }, { "epoch": 6.835985312117503, "grad_norm": 2.592203672423223, "learning_rate": 2.8465576394531387e-06, "loss": 0.6075, "step": 5585 }, { "epoch": 6.837209302325581, "grad_norm": 2.215516301459949, "learning_rate": 2.845922935853841e-06, "loss": 0.6339, "step": 5586 }, { "epoch": 6.83843329253366, "grad_norm": 2.4041657301943355, "learning_rate": 2.845288209521768e-06, "loss": 0.6214, "step": 5587 }, { "epoch": 6.839657282741738, "grad_norm": 1.3649307736723348, "learning_rate": 2.8446534604986316e-06, "loss": 1.4189, "step": 5588 }, { "epoch": 6.840881272949816, "grad_norm": 0.9574641350600663, "learning_rate": 2.844018688826144e-06, "loss": 0.654, "step": 5589 }, { "epoch": 6.842105263157895, "grad_norm": 1.5451458449232431, "learning_rate": 2.8433838945460207e-06, "loss": 0.3156, "step": 5590 }, { "epoch": 6.843329253365973, "grad_norm": 1.7067177890896719, "learning_rate": 2.8427490776999782e-06, "loss": 0.9798, "step": 5591 }, { "epoch": 6.844553243574051, "grad_norm": 1.4948734450041181, "learning_rate": 2.842114238329734e-06, "loss": 0.537, "step": 5592 }, { "epoch": 6.84577723378213, "grad_norm": 2.129204498136481, "learning_rate": 2.8414793764770066e-06, "loss": 0.4402, "step": 5593 }, { "epoch": 6.847001223990208, "grad_norm": 1.3930568137526673, "learning_rate": 2.8408444921835186e-06, "loss": 1.2634, "step": 5594 }, { "epoch": 6.848225214198287, "grad_norm": 1.9202199638406743, "learning_rate": 2.840209585490991e-06, "loss": 0.5557, "step": 5595 }, { "epoch": 6.849449204406365, "grad_norm": 2.0247105830522703, "learning_rate": 2.8395746564411473e-06, "loss": 0.5018, "step": 5596 }, { "epoch": 6.850673194614443, "grad_norm": 1.2873235442825794, "learning_rate": 2.838939705075714e-06, "loss": 0.3714, "step": 5597 }, { "epoch": 6.851897184822521, "grad_norm": 1.2695055235056605, "learning_rate": 2.838304731436417e-06, "loss": 0.2878, "step": 5598 }, { "epoch": 6.8531211750306, "grad_norm": 1.0802077898535527, "learning_rate": 2.837669735564984e-06, "loss": 0.857, "step": 5599 }, { "epoch": 6.854345165238678, "grad_norm": 1.563819332121256, "learning_rate": 2.837034717503145e-06, "loss": 0.4073, "step": 5600 }, { "epoch": 6.855569155446757, "grad_norm": 2.5685221140215315, "learning_rate": 2.83639967729263e-06, "loss": 0.7211, "step": 5601 }, { "epoch": 6.8567931456548346, "grad_norm": 1.2740984068263674, "learning_rate": 2.835764614975174e-06, "loss": 0.5242, "step": 5602 }, { "epoch": 6.858017135862913, "grad_norm": 1.6622183366915677, "learning_rate": 2.835129530592509e-06, "loss": 0.4811, "step": 5603 }, { "epoch": 6.859241126070992, "grad_norm": 2.0154729641193305, "learning_rate": 2.83449442418637e-06, "loss": 0.4418, "step": 5604 }, { "epoch": 6.8604651162790695, "grad_norm": 1.401774333585441, "learning_rate": 2.833859295798495e-06, "loss": 0.7093, "step": 5605 }, { "epoch": 6.861689106487148, "grad_norm": 1.4757713740131093, "learning_rate": 2.833224145470622e-06, "loss": 0.6489, "step": 5606 }, { "epoch": 6.862913096695227, "grad_norm": 1.462399271457787, "learning_rate": 2.8325889732444913e-06, "loss": 0.944, "step": 5607 }, { "epoch": 6.8641370869033045, "grad_norm": 1.1416131305379702, "learning_rate": 2.831953779161843e-06, "loss": 0.5225, "step": 5608 }, { "epoch": 6.865361077111383, "grad_norm": 2.542894341869147, "learning_rate": 2.8313185632644204e-06, "loss": 0.3781, "step": 5609 }, { "epoch": 6.866585067319462, "grad_norm": 1.103869734737693, "learning_rate": 2.830683325593966e-06, "loss": 0.4179, "step": 5610 }, { "epoch": 6.8678090575275395, "grad_norm": 1.8955038725499673, "learning_rate": 2.8300480661922274e-06, "loss": 0.3973, "step": 5611 }, { "epoch": 6.869033047735618, "grad_norm": 2.1629217074522002, "learning_rate": 2.829412785100951e-06, "loss": 0.5431, "step": 5612 }, { "epoch": 6.870257037943697, "grad_norm": 1.806640460690669, "learning_rate": 2.828777482361885e-06, "loss": 0.5942, "step": 5613 }, { "epoch": 6.871481028151774, "grad_norm": 1.5896452251647475, "learning_rate": 2.8281421580167784e-06, "loss": 0.7486, "step": 5614 }, { "epoch": 6.872705018359853, "grad_norm": 1.1884174332178827, "learning_rate": 2.8275068121073828e-06, "loss": 0.4513, "step": 5615 }, { "epoch": 6.873929008567932, "grad_norm": 1.5484639936027278, "learning_rate": 2.8268714446754513e-06, "loss": 0.4439, "step": 5616 }, { "epoch": 6.875152998776009, "grad_norm": 1.2693783182580014, "learning_rate": 2.8262360557627373e-06, "loss": 0.685, "step": 5617 }, { "epoch": 6.876376988984088, "grad_norm": 1.385174129542064, "learning_rate": 2.8256006454109963e-06, "loss": 0.6424, "step": 5618 }, { "epoch": 6.877600979192167, "grad_norm": 1.503480466250667, "learning_rate": 2.824965213661985e-06, "loss": 0.4484, "step": 5619 }, { "epoch": 6.878824969400245, "grad_norm": 1.4160362091416847, "learning_rate": 2.824329760557462e-06, "loss": 0.6639, "step": 5620 }, { "epoch": 6.880048959608323, "grad_norm": 1.1593963561084202, "learning_rate": 2.8236942861391866e-06, "loss": 0.5689, "step": 5621 }, { "epoch": 6.881272949816402, "grad_norm": 1.5986474443815986, "learning_rate": 2.8230587904489198e-06, "loss": 0.7542, "step": 5622 }, { "epoch": 6.882496940024479, "grad_norm": 1.0606291595186266, "learning_rate": 2.8224232735284252e-06, "loss": 0.8528, "step": 5623 }, { "epoch": 6.883720930232558, "grad_norm": 2.0414041826212985, "learning_rate": 2.8217877354194646e-06, "loss": 0.6062, "step": 5624 }, { "epoch": 6.8849449204406366, "grad_norm": 1.5628322833357626, "learning_rate": 2.821152176163804e-06, "loss": 0.5227, "step": 5625 }, { "epoch": 6.886168910648715, "grad_norm": 2.033286739920621, "learning_rate": 2.8205165958032114e-06, "loss": 0.4747, "step": 5626 }, { "epoch": 6.887392900856793, "grad_norm": 1.709131282192089, "learning_rate": 2.8198809943794536e-06, "loss": 0.4376, "step": 5627 }, { "epoch": 6.8886168910648715, "grad_norm": 1.2018914774896028, "learning_rate": 2.819245371934299e-06, "loss": 0.3833, "step": 5628 }, { "epoch": 6.88984088127295, "grad_norm": 0.7315140730800727, "learning_rate": 2.8186097285095205e-06, "loss": 0.4407, "step": 5629 }, { "epoch": 6.891064871481028, "grad_norm": 1.8206093292659014, "learning_rate": 2.8179740641468877e-06, "loss": 0.5082, "step": 5630 }, { "epoch": 6.8922888616891065, "grad_norm": 1.4726296290721987, "learning_rate": 2.8173383788881765e-06, "loss": 1.5164, "step": 5631 }, { "epoch": 6.893512851897185, "grad_norm": 2.2236524484736786, "learning_rate": 2.81670267277516e-06, "loss": 0.5094, "step": 5632 }, { "epoch": 6.894736842105263, "grad_norm": 2.5426804695466525, "learning_rate": 2.816066945849616e-06, "loss": 0.5464, "step": 5633 }, { "epoch": 6.8959608323133414, "grad_norm": 1.3481794610529918, "learning_rate": 2.815431198153321e-06, "loss": 0.9403, "step": 5634 }, { "epoch": 6.89718482252142, "grad_norm": 1.5207084979225358, "learning_rate": 2.814795429728055e-06, "loss": 0.4273, "step": 5635 }, { "epoch": 6.898408812729498, "grad_norm": 2.961267542614461, "learning_rate": 2.814159640615597e-06, "loss": 0.3902, "step": 5636 }, { "epoch": 6.899632802937576, "grad_norm": 1.321469085314527, "learning_rate": 2.8135238308577294e-06, "loss": 0.983, "step": 5637 }, { "epoch": 6.900856793145655, "grad_norm": 1.411216602371665, "learning_rate": 2.8128880004962354e-06, "loss": 1.189, "step": 5638 }, { "epoch": 6.902080783353734, "grad_norm": 1.5209734737620877, "learning_rate": 2.8122521495728993e-06, "loss": 0.5049, "step": 5639 }, { "epoch": 6.903304773561811, "grad_norm": 1.6266607480507114, "learning_rate": 2.8116162781295074e-06, "loss": 0.5653, "step": 5640 }, { "epoch": 6.90452876376989, "grad_norm": 2.074697253979892, "learning_rate": 2.8109803862078463e-06, "loss": 0.6308, "step": 5641 }, { "epoch": 6.905752753977968, "grad_norm": 1.4722037906119299, "learning_rate": 2.810344473849704e-06, "loss": 0.7524, "step": 5642 }, { "epoch": 6.906976744186046, "grad_norm": 0.9497172256863463, "learning_rate": 2.80970854109687e-06, "loss": 0.5778, "step": 5643 }, { "epoch": 6.908200734394125, "grad_norm": 2.144475825860658, "learning_rate": 2.8090725879911375e-06, "loss": 0.6466, "step": 5644 }, { "epoch": 6.909424724602204, "grad_norm": 1.628375054206427, "learning_rate": 2.808436614574297e-06, "loss": 0.606, "step": 5645 }, { "epoch": 6.910648714810281, "grad_norm": 1.5280337361002931, "learning_rate": 2.807800620888143e-06, "loss": 0.4826, "step": 5646 }, { "epoch": 6.91187270501836, "grad_norm": 1.1099564867713991, "learning_rate": 2.8071646069744713e-06, "loss": 0.6397, "step": 5647 }, { "epoch": 6.9130966952264385, "grad_norm": 1.1603910321236541, "learning_rate": 2.8065285728750774e-06, "loss": 0.7302, "step": 5648 }, { "epoch": 6.914320685434516, "grad_norm": 1.1903171311391147, "learning_rate": 2.8058925186317585e-06, "loss": 0.6879, "step": 5649 }, { "epoch": 6.915544675642595, "grad_norm": 0.9832356004428477, "learning_rate": 2.8052564442863162e-06, "loss": 0.5862, "step": 5650 }, { "epoch": 6.9167686658506735, "grad_norm": 1.423661062851523, "learning_rate": 2.8046203498805486e-06, "loss": 0.7372, "step": 5651 }, { "epoch": 6.917992656058751, "grad_norm": 1.2716321302258267, "learning_rate": 2.8039842354562584e-06, "loss": 0.5774, "step": 5652 }, { "epoch": 6.91921664626683, "grad_norm": 1.247396849369937, "learning_rate": 2.8033481010552492e-06, "loss": 1.1678, "step": 5653 }, { "epoch": 6.9204406364749085, "grad_norm": 1.4306493095549107, "learning_rate": 2.8027119467193243e-06, "loss": 0.8759, "step": 5654 }, { "epoch": 6.921664626682986, "grad_norm": 1.994299187587238, "learning_rate": 2.8020757724902902e-06, "loss": 1.0572, "step": 5655 }, { "epoch": 6.922888616891065, "grad_norm": 1.5424570527700001, "learning_rate": 2.8014395784099535e-06, "loss": 0.65, "step": 5656 }, { "epoch": 6.9241126070991434, "grad_norm": 2.2896733074433167, "learning_rate": 2.8008033645201226e-06, "loss": 0.6201, "step": 5657 }, { "epoch": 6.925336597307221, "grad_norm": 2.3345054837354673, "learning_rate": 2.8001671308626072e-06, "loss": 0.9247, "step": 5658 }, { "epoch": 6.9265605875153, "grad_norm": 1.2501600181123813, "learning_rate": 2.799530877479218e-06, "loss": 0.4548, "step": 5659 }, { "epoch": 6.927784577723378, "grad_norm": 1.1776687445336698, "learning_rate": 2.798894604411767e-06, "loss": 0.9079, "step": 5660 }, { "epoch": 6.929008567931456, "grad_norm": 1.6765667558287263, "learning_rate": 2.798258311702069e-06, "loss": 0.4492, "step": 5661 }, { "epoch": 6.930232558139535, "grad_norm": 1.6844512402481335, "learning_rate": 2.797621999391938e-06, "loss": 0.4179, "step": 5662 }, { "epoch": 6.931456548347613, "grad_norm": 0.9554788384749119, "learning_rate": 2.7969856675231892e-06, "loss": 0.5953, "step": 5663 }, { "epoch": 6.932680538555692, "grad_norm": 1.0187225692161708, "learning_rate": 2.7963493161376416e-06, "loss": 0.4889, "step": 5664 }, { "epoch": 6.93390452876377, "grad_norm": 1.504079583591393, "learning_rate": 2.7957129452771124e-06, "loss": 0.3289, "step": 5665 }, { "epoch": 6.935128518971848, "grad_norm": 0.705651972375692, "learning_rate": 2.795076554983423e-06, "loss": 0.2258, "step": 5666 }, { "epoch": 6.936352509179926, "grad_norm": 1.2209188390346009, "learning_rate": 2.7944401452983933e-06, "loss": 1.5845, "step": 5667 }, { "epoch": 6.937576499388005, "grad_norm": 1.039276224584157, "learning_rate": 2.7938037162638467e-06, "loss": 0.5855, "step": 5668 }, { "epoch": 6.938800489596083, "grad_norm": 1.7470168816925766, "learning_rate": 2.7931672679216055e-06, "loss": 0.6362, "step": 5669 }, { "epoch": 6.940024479804162, "grad_norm": 1.6136024828947029, "learning_rate": 2.7925308003134965e-06, "loss": 0.6726, "step": 5670 }, { "epoch": 6.94124847001224, "grad_norm": 2.8119470035877248, "learning_rate": 2.791894313481346e-06, "loss": 0.3785, "step": 5671 }, { "epoch": 6.942472460220318, "grad_norm": 1.4586942044300744, "learning_rate": 2.79125780746698e-06, "loss": 0.8718, "step": 5672 }, { "epoch": 6.943696450428397, "grad_norm": 2.311431681790971, "learning_rate": 2.790621282312228e-06, "loss": 1.2055, "step": 5673 }, { "epoch": 6.944920440636475, "grad_norm": 0.9864373083228465, "learning_rate": 2.789984738058921e-06, "loss": 0.4061, "step": 5674 }, { "epoch": 6.946144430844553, "grad_norm": 1.8878327347618347, "learning_rate": 2.7893481747488887e-06, "loss": 0.5095, "step": 5675 }, { "epoch": 6.947368421052632, "grad_norm": 2.4931241462147025, "learning_rate": 2.788711592423966e-06, "loss": 0.8309, "step": 5676 }, { "epoch": 6.94859241126071, "grad_norm": 1.08635165012773, "learning_rate": 2.7880749911259835e-06, "loss": 0.5723, "step": 5677 }, { "epoch": 6.949816401468788, "grad_norm": 1.165966432302402, "learning_rate": 2.7874383708967795e-06, "loss": 0.7778, "step": 5678 }, { "epoch": 6.951040391676867, "grad_norm": 1.077425618290188, "learning_rate": 2.7868017317781893e-06, "loss": 0.5074, "step": 5679 }, { "epoch": 6.9522643818849446, "grad_norm": 1.322945283083331, "learning_rate": 2.7861650738120483e-06, "loss": 1.1806, "step": 5680 }, { "epoch": 6.953488372093023, "grad_norm": 1.1650117298399576, "learning_rate": 2.785528397040198e-06, "loss": 1.3465, "step": 5681 }, { "epoch": 6.954712362301102, "grad_norm": 2.092081053163567, "learning_rate": 2.784891701504478e-06, "loss": 0.7386, "step": 5682 }, { "epoch": 6.95593635250918, "grad_norm": 1.6265146638502619, "learning_rate": 2.784254987246729e-06, "loss": 0.3867, "step": 5683 }, { "epoch": 6.957160342717258, "grad_norm": 0.9563106239619373, "learning_rate": 2.7836182543087925e-06, "loss": 0.3989, "step": 5684 }, { "epoch": 6.958384332925337, "grad_norm": 1.3121335740374447, "learning_rate": 2.782981502732515e-06, "loss": 1.133, "step": 5685 }, { "epoch": 6.9596083231334145, "grad_norm": 1.853527352489472, "learning_rate": 2.7823447325597385e-06, "loss": 0.5775, "step": 5686 }, { "epoch": 6.960832313341493, "grad_norm": 1.3518298250920942, "learning_rate": 2.7817079438323108e-06, "loss": 0.6364, "step": 5687 }, { "epoch": 6.962056303549572, "grad_norm": 1.9995846681058083, "learning_rate": 2.7810711365920797e-06, "loss": 0.4477, "step": 5688 }, { "epoch": 6.96328029375765, "grad_norm": 1.6077324491211626, "learning_rate": 2.7804343108808925e-06, "loss": 0.4776, "step": 5689 }, { "epoch": 6.964504283965728, "grad_norm": 1.8115933211891238, "learning_rate": 2.779797466740599e-06, "loss": 0.631, "step": 5690 }, { "epoch": 6.965728274173807, "grad_norm": 1.1595684306073137, "learning_rate": 2.7791606042130527e-06, "loss": 0.5911, "step": 5691 }, { "epoch": 6.966952264381885, "grad_norm": 1.3859485479142015, "learning_rate": 2.778523723340103e-06, "loss": 1.4986, "step": 5692 }, { "epoch": 6.968176254589963, "grad_norm": 2.0279516233768367, "learning_rate": 2.7778868241636037e-06, "loss": 0.5236, "step": 5693 }, { "epoch": 6.969400244798042, "grad_norm": 1.2921969119899792, "learning_rate": 2.7772499067254116e-06, "loss": 0.5807, "step": 5694 }, { "epoch": 6.97062423500612, "grad_norm": 1.5267217629225196, "learning_rate": 2.7766129710673793e-06, "loss": 0.4332, "step": 5695 }, { "epoch": 6.971848225214198, "grad_norm": 1.1166284550278673, "learning_rate": 2.775976017231367e-06, "loss": 1.2112, "step": 5696 }, { "epoch": 6.973072215422277, "grad_norm": 1.8305482993659905, "learning_rate": 2.7753390452592315e-06, "loss": 1.1402, "step": 5697 }, { "epoch": 6.974296205630355, "grad_norm": 1.3097586202556044, "learning_rate": 2.7747020551928324e-06, "loss": 0.5906, "step": 5698 }, { "epoch": 6.975520195838433, "grad_norm": 2.081456878253612, "learning_rate": 2.774065047074029e-06, "loss": 0.5131, "step": 5699 }, { "epoch": 6.976744186046512, "grad_norm": 1.722744498888617, "learning_rate": 2.773428020944687e-06, "loss": 0.5964, "step": 5700 }, { "epoch": 6.97796817625459, "grad_norm": 0.9771996886752283, "learning_rate": 2.7727909768466643e-06, "loss": 0.6322, "step": 5701 }, { "epoch": 6.979192166462668, "grad_norm": 1.4519669875846333, "learning_rate": 2.7721539148218284e-06, "loss": 1.1009, "step": 5702 }, { "epoch": 6.9804161566707466, "grad_norm": 1.893936512748418, "learning_rate": 2.7715168349120448e-06, "loss": 0.4188, "step": 5703 }, { "epoch": 6.981640146878825, "grad_norm": 1.0380596268232443, "learning_rate": 2.770879737159178e-06, "loss": 0.5522, "step": 5704 }, { "epoch": 6.982864137086903, "grad_norm": 1.5654329586133502, "learning_rate": 2.770242621605097e-06, "loss": 0.4964, "step": 5705 }, { "epoch": 6.9840881272949815, "grad_norm": 1.0099856866704504, "learning_rate": 2.769605488291671e-06, "loss": 0.5704, "step": 5706 }, { "epoch": 6.98531211750306, "grad_norm": 1.4013700428713258, "learning_rate": 2.7689683372607683e-06, "loss": 0.6112, "step": 5707 }, { "epoch": 6.986536107711139, "grad_norm": 2.0375718517328227, "learning_rate": 2.7683311685542626e-06, "loss": 0.5716, "step": 5708 }, { "epoch": 6.9877600979192165, "grad_norm": 2.441580915229278, "learning_rate": 2.7676939822140242e-06, "loss": 0.3858, "step": 5709 }, { "epoch": 6.988984088127295, "grad_norm": 1.7775613873638167, "learning_rate": 2.7670567782819276e-06, "loss": 0.4033, "step": 5710 }, { "epoch": 6.990208078335373, "grad_norm": 1.4286067491090981, "learning_rate": 2.766419556799847e-06, "loss": 0.5067, "step": 5711 }, { "epoch": 6.9914320685434515, "grad_norm": 2.3175541776858264, "learning_rate": 2.7657823178096593e-06, "loss": 0.9759, "step": 5712 }, { "epoch": 6.99265605875153, "grad_norm": 1.3174463191761763, "learning_rate": 2.7651450613532406e-06, "loss": 0.8898, "step": 5713 }, { "epoch": 6.993880048959609, "grad_norm": 2.6627553236610284, "learning_rate": 2.7645077874724686e-06, "loss": 0.4901, "step": 5714 }, { "epoch": 6.995104039167686, "grad_norm": 1.307970970516204, "learning_rate": 2.7638704962092235e-06, "loss": 1.2953, "step": 5715 }, { "epoch": 6.996328029375765, "grad_norm": 1.0771487535088442, "learning_rate": 2.7632331876053853e-06, "loss": 0.6143, "step": 5716 }, { "epoch": 6.997552019583844, "grad_norm": 1.7717526762143052, "learning_rate": 2.762595861702836e-06, "loss": 0.7555, "step": 5717 }, { "epoch": 6.998776009791921, "grad_norm": 1.6528459547647463, "learning_rate": 2.7619585185434577e-06, "loss": 0.6436, "step": 5718 }, { "epoch": 7.0, "grad_norm": 0.8407809597322985, "learning_rate": 2.761321158169134e-06, "loss": 0.4557, "step": 5719 }, { "epoch": 7.001223990208079, "grad_norm": 1.3936232359981666, "learning_rate": 2.7606837806217507e-06, "loss": 0.5045, "step": 5720 }, { "epoch": 7.002447980416156, "grad_norm": 1.9081785522443069, "learning_rate": 2.7600463859431935e-06, "loss": 0.696, "step": 5721 }, { "epoch": 7.003671970624235, "grad_norm": 1.86631256931455, "learning_rate": 2.7594089741753497e-06, "loss": 0.8365, "step": 5722 }, { "epoch": 7.004895960832314, "grad_norm": 1.2755530683852594, "learning_rate": 2.758771545360108e-06, "loss": 0.8842, "step": 5723 }, { "epoch": 7.006119951040391, "grad_norm": 1.5822070272052304, "learning_rate": 2.7581340995393576e-06, "loss": 1.1843, "step": 5724 }, { "epoch": 7.00734394124847, "grad_norm": 1.3173955583038957, "learning_rate": 2.7574966367549875e-06, "loss": 0.2338, "step": 5725 }, { "epoch": 7.0085679314565485, "grad_norm": 1.4004833582546488, "learning_rate": 2.7568591570488922e-06, "loss": 0.6541, "step": 5726 }, { "epoch": 7.009791921664626, "grad_norm": 1.447144346013208, "learning_rate": 2.7562216604629626e-06, "loss": 0.7426, "step": 5727 }, { "epoch": 7.011015911872705, "grad_norm": 1.8803523907042863, "learning_rate": 2.7555841470390937e-06, "loss": 0.462, "step": 5728 }, { "epoch": 7.0122399020807835, "grad_norm": 1.4348414073562572, "learning_rate": 2.7549466168191797e-06, "loss": 1.6506, "step": 5729 }, { "epoch": 7.013463892288861, "grad_norm": 2.2653560519163563, "learning_rate": 2.7543090698451168e-06, "loss": 0.684, "step": 5730 }, { "epoch": 7.01468788249694, "grad_norm": 1.3654100184497586, "learning_rate": 2.7536715061588028e-06, "loss": 0.492, "step": 5731 }, { "epoch": 7.0159118727050185, "grad_norm": 1.3452738442029217, "learning_rate": 2.7530339258021354e-06, "loss": 2.2912, "step": 5732 }, { "epoch": 7.017135862913097, "grad_norm": 1.1645464442942937, "learning_rate": 2.752396328817015e-06, "loss": 0.6895, "step": 5733 }, { "epoch": 7.018359853121175, "grad_norm": 1.6113214323656389, "learning_rate": 2.75175871524534e-06, "loss": 1.0966, "step": 5734 }, { "epoch": 7.0195838433292534, "grad_norm": 1.1610481549853344, "learning_rate": 2.751121085129015e-06, "loss": 0.7724, "step": 5735 }, { "epoch": 7.020807833537332, "grad_norm": 2.093481195984658, "learning_rate": 2.75048343850994e-06, "loss": 0.5418, "step": 5736 }, { "epoch": 7.02203182374541, "grad_norm": 2.4098117403833563, "learning_rate": 2.74984577543002e-06, "loss": 1.0422, "step": 5737 }, { "epoch": 7.023255813953488, "grad_norm": 0.9765361027707729, "learning_rate": 2.74920809593116e-06, "loss": 0.4784, "step": 5738 }, { "epoch": 7.024479804161567, "grad_norm": 1.1526812328428344, "learning_rate": 2.748570400055266e-06, "loss": 0.3773, "step": 5739 }, { "epoch": 7.025703794369645, "grad_norm": 1.1743774687069874, "learning_rate": 2.747932687844244e-06, "loss": 0.6403, "step": 5740 }, { "epoch": 7.026927784577723, "grad_norm": 1.5008926975731758, "learning_rate": 2.747294959340004e-06, "loss": 0.4337, "step": 5741 }, { "epoch": 7.028151774785802, "grad_norm": 1.1481467747500385, "learning_rate": 2.7466572145844526e-06, "loss": 0.6808, "step": 5742 }, { "epoch": 7.02937576499388, "grad_norm": 1.7680299683236922, "learning_rate": 2.746019453619502e-06, "loss": 0.9775, "step": 5743 }, { "epoch": 7.030599755201958, "grad_norm": 2.1993521413858903, "learning_rate": 2.7453816764870635e-06, "loss": 0.5715, "step": 5744 }, { "epoch": 7.031823745410037, "grad_norm": 1.7408233380372644, "learning_rate": 2.7447438832290472e-06, "loss": 0.4052, "step": 5745 }, { "epoch": 7.033047735618115, "grad_norm": 1.1310739777274708, "learning_rate": 2.744106073887369e-06, "loss": 0.7272, "step": 5746 }, { "epoch": 7.034271725826193, "grad_norm": 1.3316798823595026, "learning_rate": 2.743468248503943e-06, "loss": 0.501, "step": 5747 }, { "epoch": 7.035495716034272, "grad_norm": 1.8943219444810213, "learning_rate": 2.742830407120684e-06, "loss": 0.966, "step": 5748 }, { "epoch": 7.03671970624235, "grad_norm": 2.4968384597846263, "learning_rate": 2.742192549779508e-06, "loss": 0.707, "step": 5749 }, { "epoch": 7.037943696450428, "grad_norm": 1.508006557868597, "learning_rate": 2.7415546765223348e-06, "loss": 0.6423, "step": 5750 }, { "epoch": 7.039167686658507, "grad_norm": 1.1023914614340453, "learning_rate": 2.740916787391081e-06, "loss": 0.6931, "step": 5751 }, { "epoch": 7.0403916768665855, "grad_norm": 1.7197382424757552, "learning_rate": 2.7402788824276666e-06, "loss": 0.4326, "step": 5752 }, { "epoch": 7.041615667074663, "grad_norm": 1.6195769915097413, "learning_rate": 2.739640961674013e-06, "loss": 1.5709, "step": 5753 }, { "epoch": 7.042839657282742, "grad_norm": 1.8645274344379392, "learning_rate": 2.7390030251720424e-06, "loss": 0.6702, "step": 5754 }, { "epoch": 7.0440636474908205, "grad_norm": 2.0852137070919396, "learning_rate": 2.738365072963677e-06, "loss": 0.52, "step": 5755 }, { "epoch": 7.045287637698898, "grad_norm": 1.7589598655329923, "learning_rate": 2.73772710509084e-06, "loss": 0.4233, "step": 5756 }, { "epoch": 7.046511627906977, "grad_norm": 1.1172507436350694, "learning_rate": 2.7370891215954572e-06, "loss": 0.4873, "step": 5757 }, { "epoch": 7.0477356181150554, "grad_norm": 1.4946805867168533, "learning_rate": 2.736451122519454e-06, "loss": 0.4886, "step": 5758 }, { "epoch": 7.048959608323133, "grad_norm": 2.030585292545271, "learning_rate": 2.7358131079047586e-06, "loss": 0.5588, "step": 5759 }, { "epoch": 7.050183598531212, "grad_norm": 1.6193525253161094, "learning_rate": 2.735175077793296e-06, "loss": 0.4692, "step": 5760 }, { "epoch": 7.05140758873929, "grad_norm": 1.1418665943312767, "learning_rate": 2.734537032226999e-06, "loss": 0.5283, "step": 5761 }, { "epoch": 7.052631578947368, "grad_norm": 1.9771389968736977, "learning_rate": 2.733898971247795e-06, "loss": 0.3952, "step": 5762 }, { "epoch": 7.053855569155447, "grad_norm": 1.6324033415189536, "learning_rate": 2.7332608948976156e-06, "loss": 0.9971, "step": 5763 }, { "epoch": 7.055079559363525, "grad_norm": 2.7914129226149016, "learning_rate": 2.732622803218393e-06, "loss": 0.613, "step": 5764 }, { "epoch": 7.056303549571603, "grad_norm": 2.9666117003008337, "learning_rate": 2.7319846962520607e-06, "loss": 0.3603, "step": 5765 }, { "epoch": 7.057527539779682, "grad_norm": 1.2779417711179746, "learning_rate": 2.7313465740405515e-06, "loss": 0.8239, "step": 5766 }, { "epoch": 7.05875152998776, "grad_norm": 1.2079209765318863, "learning_rate": 2.730708436625802e-06, "loss": 0.5858, "step": 5767 }, { "epoch": 7.059975520195838, "grad_norm": 2.5068398972809938, "learning_rate": 2.7300702840497468e-06, "loss": 0.8193, "step": 5768 }, { "epoch": 7.061199510403917, "grad_norm": 2.3953955602104866, "learning_rate": 2.729432116354323e-06, "loss": 0.3867, "step": 5769 }, { "epoch": 7.062423500611995, "grad_norm": 1.8815610494355783, "learning_rate": 2.72879393358147e-06, "loss": 1.2113, "step": 5770 }, { "epoch": 7.063647490820073, "grad_norm": 1.2963416540082595, "learning_rate": 2.728155735773126e-06, "loss": 1.2845, "step": 5771 }, { "epoch": 7.064871481028152, "grad_norm": 1.4069806454930105, "learning_rate": 2.7275175229712304e-06, "loss": 0.6298, "step": 5772 }, { "epoch": 7.06609547123623, "grad_norm": 1.211515031542482, "learning_rate": 2.726879295217725e-06, "loss": 0.6859, "step": 5773 }, { "epoch": 7.067319461444308, "grad_norm": 1.21644057716479, "learning_rate": 2.7262410525545513e-06, "loss": 1.0965, "step": 5774 }, { "epoch": 7.068543451652387, "grad_norm": 1.0192296458061059, "learning_rate": 2.725602795023652e-06, "loss": 0.5154, "step": 5775 }, { "epoch": 7.069767441860465, "grad_norm": 1.351963391175913, "learning_rate": 2.724964522666973e-06, "loss": 0.5391, "step": 5776 }, { "epoch": 7.070991432068544, "grad_norm": 1.9329311037111725, "learning_rate": 2.724326235526456e-06, "loss": 0.4105, "step": 5777 }, { "epoch": 7.072215422276622, "grad_norm": 2.1154178307669653, "learning_rate": 2.723687933644049e-06, "loss": 0.4184, "step": 5778 }, { "epoch": 7.0734394124847, "grad_norm": 1.2904874668197714, "learning_rate": 2.7230496170616986e-06, "loss": 1.315, "step": 5779 }, { "epoch": 7.074663402692779, "grad_norm": 1.402154418545157, "learning_rate": 2.7224112858213523e-06, "loss": 1.2562, "step": 5780 }, { "epoch": 7.0758873929008566, "grad_norm": 1.3615501179704175, "learning_rate": 2.721772939964959e-06, "loss": 0.4416, "step": 5781 }, { "epoch": 7.077111383108935, "grad_norm": 1.3710532484153344, "learning_rate": 2.7211345795344683e-06, "loss": 0.5076, "step": 5782 }, { "epoch": 7.078335373317014, "grad_norm": 2.303215137439167, "learning_rate": 2.7204962045718313e-06, "loss": 0.4201, "step": 5783 }, { "epoch": 7.0795593635250915, "grad_norm": 2.5233256678333165, "learning_rate": 2.7198578151189988e-06, "loss": 0.4376, "step": 5784 }, { "epoch": 7.08078335373317, "grad_norm": 2.288407472573978, "learning_rate": 2.719219411217924e-06, "loss": 0.4178, "step": 5785 }, { "epoch": 7.082007343941249, "grad_norm": 1.1597742968340323, "learning_rate": 2.71858099291056e-06, "loss": 0.9023, "step": 5786 }, { "epoch": 7.0832313341493265, "grad_norm": 2.179264712309661, "learning_rate": 2.717942560238862e-06, "loss": 0.5424, "step": 5787 }, { "epoch": 7.084455324357405, "grad_norm": 1.4521444669365413, "learning_rate": 2.717304113244785e-06, "loss": 0.5228, "step": 5788 }, { "epoch": 7.085679314565484, "grad_norm": 2.0958335284910823, "learning_rate": 2.716665651970286e-06, "loss": 0.49, "step": 5789 }, { "epoch": 7.0869033047735615, "grad_norm": 1.028511948278021, "learning_rate": 2.7160271764573205e-06, "loss": 0.3915, "step": 5790 }, { "epoch": 7.08812729498164, "grad_norm": 1.2489147081014447, "learning_rate": 2.7153886867478497e-06, "loss": 0.5455, "step": 5791 }, { "epoch": 7.089351285189719, "grad_norm": 1.1290295322071788, "learning_rate": 2.7147501828838295e-06, "loss": 0.3979, "step": 5792 }, { "epoch": 7.090575275397796, "grad_norm": 1.895202978000537, "learning_rate": 2.7141116649072218e-06, "loss": 0.6168, "step": 5793 }, { "epoch": 7.091799265605875, "grad_norm": 1.57876117672094, "learning_rate": 2.713473132859988e-06, "loss": 0.9971, "step": 5794 }, { "epoch": 7.093023255813954, "grad_norm": 1.3512490624807578, "learning_rate": 2.7128345867840895e-06, "loss": 0.3961, "step": 5795 }, { "epoch": 7.094247246022032, "grad_norm": 1.2153718824515682, "learning_rate": 2.7121960267214894e-06, "loss": 0.9036, "step": 5796 }, { "epoch": 7.09547123623011, "grad_norm": 1.8156048887736884, "learning_rate": 2.7115574527141502e-06, "loss": 1.4047, "step": 5797 }, { "epoch": 7.096695226438189, "grad_norm": 0.966839330274678, "learning_rate": 2.7109188648040384e-06, "loss": 0.5913, "step": 5798 }, { "epoch": 7.097919216646267, "grad_norm": 2.715517175384114, "learning_rate": 2.7102802630331188e-06, "loss": 0.4344, "step": 5799 }, { "epoch": 7.099143206854345, "grad_norm": 1.1708328711818636, "learning_rate": 2.7096416474433585e-06, "loss": 0.5583, "step": 5800 }, { "epoch": 7.100367197062424, "grad_norm": 1.6515946316676944, "learning_rate": 2.709003018076724e-06, "loss": 1.0335, "step": 5801 }, { "epoch": 7.101591187270502, "grad_norm": 1.1642748315058615, "learning_rate": 2.7083643749751843e-06, "loss": 0.6499, "step": 5802 }, { "epoch": 7.10281517747858, "grad_norm": 1.41584899377622, "learning_rate": 2.707725718180709e-06, "loss": 0.5613, "step": 5803 }, { "epoch": 7.1040391676866586, "grad_norm": 1.7248928239434664, "learning_rate": 2.7070870477352674e-06, "loss": 0.7623, "step": 5804 }, { "epoch": 7.105263157894737, "grad_norm": 1.465756853129191, "learning_rate": 2.7064483636808314e-06, "loss": 0.5543, "step": 5805 }, { "epoch": 7.106487148102815, "grad_norm": 1.6159171993392503, "learning_rate": 2.705809666059372e-06, "loss": 0.7101, "step": 5806 }, { "epoch": 7.1077111383108935, "grad_norm": 1.5614535475125977, "learning_rate": 2.705170954912863e-06, "loss": 0.5435, "step": 5807 }, { "epoch": 7.108935128518972, "grad_norm": 2.6224495865992252, "learning_rate": 2.704532230283277e-06, "loss": 0.3255, "step": 5808 }, { "epoch": 7.11015911872705, "grad_norm": 0.9372957961476086, "learning_rate": 2.70389349221259e-06, "loss": 0.5726, "step": 5809 }, { "epoch": 7.1113831089351285, "grad_norm": 2.0172986779017057, "learning_rate": 2.703254740742777e-06, "loss": 0.6097, "step": 5810 }, { "epoch": 7.112607099143207, "grad_norm": 1.317932336170072, "learning_rate": 2.702615975915814e-06, "loss": 0.5641, "step": 5811 }, { "epoch": 7.113831089351285, "grad_norm": 2.0018421366614683, "learning_rate": 2.701977197773678e-06, "loss": 0.3822, "step": 5812 }, { "epoch": 7.1150550795593634, "grad_norm": 2.111066790359107, "learning_rate": 2.701338406358348e-06, "loss": 0.3948, "step": 5813 }, { "epoch": 7.116279069767442, "grad_norm": 1.0414407840743822, "learning_rate": 2.7006996017118033e-06, "loss": 0.5466, "step": 5814 }, { "epoch": 7.11750305997552, "grad_norm": 1.4741199032552008, "learning_rate": 2.700060783876022e-06, "loss": 0.9177, "step": 5815 }, { "epoch": 7.118727050183598, "grad_norm": 1.2079149455759826, "learning_rate": 2.6994219528929866e-06, "loss": 0.3707, "step": 5816 }, { "epoch": 7.119951040391677, "grad_norm": 2.407055732054247, "learning_rate": 2.698783108804678e-06, "loss": 0.5865, "step": 5817 }, { "epoch": 7.121175030599755, "grad_norm": 0.9583433603472471, "learning_rate": 2.698144251653079e-06, "loss": 0.5449, "step": 5818 }, { "epoch": 7.122399020807833, "grad_norm": 2.7918799769077456, "learning_rate": 2.6975053814801715e-06, "loss": 0.3722, "step": 5819 }, { "epoch": 7.123623011015912, "grad_norm": 1.0866525244794647, "learning_rate": 2.6968664983279417e-06, "loss": 0.4602, "step": 5820 }, { "epoch": 7.124847001223991, "grad_norm": 3.165613390544089, "learning_rate": 2.6962276022383742e-06, "loss": 0.3035, "step": 5821 }, { "epoch": 7.126070991432068, "grad_norm": 2.2064632569062894, "learning_rate": 2.6955886932534543e-06, "loss": 0.4203, "step": 5822 }, { "epoch": 7.127294981640147, "grad_norm": 1.5878167289346539, "learning_rate": 2.6949497714151686e-06, "loss": 1.395, "step": 5823 }, { "epoch": 7.128518971848226, "grad_norm": 2.0011138110996676, "learning_rate": 2.6943108367655053e-06, "loss": 0.6551, "step": 5824 }, { "epoch": 7.129742962056303, "grad_norm": 2.7029290724635495, "learning_rate": 2.6936718893464514e-06, "loss": 0.4773, "step": 5825 }, { "epoch": 7.130966952264382, "grad_norm": 1.3243529047019005, "learning_rate": 2.693032929199999e-06, "loss": 0.9968, "step": 5826 }, { "epoch": 7.1321909424724605, "grad_norm": 1.7906347674586385, "learning_rate": 2.692393956368135e-06, "loss": 0.7506, "step": 5827 }, { "epoch": 7.133414932680538, "grad_norm": 1.6451484260988838, "learning_rate": 2.6917549708928525e-06, "loss": 1.3737, "step": 5828 }, { "epoch": 7.134638922888617, "grad_norm": 2.6803967381959968, "learning_rate": 2.6911159728161424e-06, "loss": 0.4264, "step": 5829 }, { "epoch": 7.1358629130966955, "grad_norm": 2.218286829247927, "learning_rate": 2.6904769621799974e-06, "loss": 1.0412, "step": 5830 }, { "epoch": 7.137086903304773, "grad_norm": 1.8631819927982056, "learning_rate": 2.689837939026411e-06, "loss": 0.5298, "step": 5831 }, { "epoch": 7.138310893512852, "grad_norm": 1.61503109370927, "learning_rate": 2.6891989033973783e-06, "loss": 1.3184, "step": 5832 }, { "epoch": 7.1395348837209305, "grad_norm": 1.2004967683126637, "learning_rate": 2.6885598553348924e-06, "loss": 0.6179, "step": 5833 }, { "epoch": 7.140758873929008, "grad_norm": 2.579940059803031, "learning_rate": 2.68792079488095e-06, "loss": 0.5485, "step": 5834 }, { "epoch": 7.141982864137087, "grad_norm": 1.5079298477541492, "learning_rate": 2.6872817220775493e-06, "loss": 0.6302, "step": 5835 }, { "epoch": 7.1432068543451654, "grad_norm": 1.2315752382671101, "learning_rate": 2.6866426369666856e-06, "loss": 0.4112, "step": 5836 }, { "epoch": 7.144430844553243, "grad_norm": 1.2808646563410333, "learning_rate": 2.6860035395903583e-06, "loss": 0.7328, "step": 5837 }, { "epoch": 7.145654834761322, "grad_norm": 1.5640654567690786, "learning_rate": 2.6853644299905663e-06, "loss": 0.8384, "step": 5838 }, { "epoch": 7.1468788249694, "grad_norm": 1.7972076491829738, "learning_rate": 2.6847253082093093e-06, "loss": 0.6267, "step": 5839 }, { "epoch": 7.148102815177479, "grad_norm": 1.2860188435452964, "learning_rate": 2.6840861742885887e-06, "loss": 0.5705, "step": 5840 }, { "epoch": 7.149326805385557, "grad_norm": 1.4526572457315046, "learning_rate": 2.6834470282704063e-06, "loss": 0.7109, "step": 5841 }, { "epoch": 7.150550795593635, "grad_norm": 1.7306703202028026, "learning_rate": 2.682807870196762e-06, "loss": 1.0638, "step": 5842 }, { "epoch": 7.151774785801714, "grad_norm": 2.7733239265768876, "learning_rate": 2.682168700109662e-06, "loss": 0.3336, "step": 5843 }, { "epoch": 7.152998776009792, "grad_norm": 2.1403124186898097, "learning_rate": 2.6815295180511093e-06, "loss": 0.8832, "step": 5844 }, { "epoch": 7.15422276621787, "grad_norm": 1.607808075817778, "learning_rate": 2.680890324063106e-06, "loss": 0.323, "step": 5845 }, { "epoch": 7.155446756425949, "grad_norm": 2.509185596461943, "learning_rate": 2.6802511181876613e-06, "loss": 0.3759, "step": 5846 }, { "epoch": 7.156670746634027, "grad_norm": 1.6144407534646787, "learning_rate": 2.6796119004667796e-06, "loss": 0.4731, "step": 5847 }, { "epoch": 7.157894736842105, "grad_norm": 1.5713802882615826, "learning_rate": 2.678972670942468e-06, "loss": 0.5806, "step": 5848 }, { "epoch": 7.159118727050184, "grad_norm": 1.984274542863448, "learning_rate": 2.678333429656734e-06, "loss": 0.586, "step": 5849 }, { "epoch": 7.160342717258262, "grad_norm": 1.5198611656988819, "learning_rate": 2.6776941766515876e-06, "loss": 0.763, "step": 5850 }, { "epoch": 7.16156670746634, "grad_norm": 1.709463100978918, "learning_rate": 2.677054911969036e-06, "loss": 1.0194, "step": 5851 }, { "epoch": 7.162790697674419, "grad_norm": 1.9640060704857298, "learning_rate": 2.6764156356510913e-06, "loss": 0.6903, "step": 5852 }, { "epoch": 7.164014687882497, "grad_norm": 0.8293969431768294, "learning_rate": 2.6757763477397636e-06, "loss": 0.4327, "step": 5853 }, { "epoch": 7.165238678090575, "grad_norm": 1.6254361874901109, "learning_rate": 2.6751370482770638e-06, "loss": 1.5005, "step": 5854 }, { "epoch": 7.166462668298654, "grad_norm": 1.672238312705694, "learning_rate": 2.6744977373050057e-06, "loss": 0.6036, "step": 5855 }, { "epoch": 7.167686658506732, "grad_norm": 1.4939509545259069, "learning_rate": 2.6738584148656012e-06, "loss": 0.5766, "step": 5856 }, { "epoch": 7.16891064871481, "grad_norm": 1.5699765549997182, "learning_rate": 2.673219081000865e-06, "loss": 0.4812, "step": 5857 }, { "epoch": 7.170134638922889, "grad_norm": 1.6963738264336345, "learning_rate": 2.672579735752811e-06, "loss": 1.1211, "step": 5858 }, { "epoch": 7.1713586291309666, "grad_norm": 1.2084049652055546, "learning_rate": 2.6719403791634564e-06, "loss": 0.529, "step": 5859 }, { "epoch": 7.172582619339045, "grad_norm": 1.5779265910682159, "learning_rate": 2.6713010112748145e-06, "loss": 0.3068, "step": 5860 }, { "epoch": 7.173806609547124, "grad_norm": 1.745005180667403, "learning_rate": 2.6706616321289043e-06, "loss": 1.3273, "step": 5861 }, { "epoch": 7.1750305997552015, "grad_norm": 1.4749477798653052, "learning_rate": 2.670022241767743e-06, "loss": 0.5125, "step": 5862 }, { "epoch": 7.17625458996328, "grad_norm": 1.8456439993950582, "learning_rate": 2.669382840233349e-06, "loss": 1.1315, "step": 5863 }, { "epoch": 7.177478580171359, "grad_norm": 2.268773411982329, "learning_rate": 2.668743427567741e-06, "loss": 0.4819, "step": 5864 }, { "epoch": 7.178702570379437, "grad_norm": 1.3854112947657111, "learning_rate": 2.668104003812939e-06, "loss": 0.6203, "step": 5865 }, { "epoch": 7.179926560587515, "grad_norm": 2.2591000717236915, "learning_rate": 2.6674645690109636e-06, "loss": 0.802, "step": 5866 }, { "epoch": 7.181150550795594, "grad_norm": 1.41360072156089, "learning_rate": 2.666825123203836e-06, "loss": 0.5322, "step": 5867 }, { "epoch": 7.182374541003672, "grad_norm": 1.7975226052932693, "learning_rate": 2.6661856664335795e-06, "loss": 0.7294, "step": 5868 }, { "epoch": 7.18359853121175, "grad_norm": 1.8318051464720373, "learning_rate": 2.665546198742214e-06, "loss": 0.933, "step": 5869 }, { "epoch": 7.184822521419829, "grad_norm": 1.5092581838754413, "learning_rate": 2.6649067201717655e-06, "loss": 1.1137, "step": 5870 }, { "epoch": 7.186046511627907, "grad_norm": 1.2820211146030684, "learning_rate": 2.6642672307642575e-06, "loss": 0.282, "step": 5871 }, { "epoch": 7.187270501835985, "grad_norm": 1.302893622153774, "learning_rate": 2.6636277305617144e-06, "loss": 0.8562, "step": 5872 }, { "epoch": 7.188494492044064, "grad_norm": 1.232705315086875, "learning_rate": 2.6629882196061625e-06, "loss": 0.4457, "step": 5873 }, { "epoch": 7.189718482252142, "grad_norm": 1.332025558056674, "learning_rate": 2.6623486979396266e-06, "loss": 1.5668, "step": 5874 }, { "epoch": 7.19094247246022, "grad_norm": 1.094990909472066, "learning_rate": 2.6617091656041356e-06, "loss": 0.4791, "step": 5875 }, { "epoch": 7.192166462668299, "grad_norm": 1.5973759854999787, "learning_rate": 2.6610696226417163e-06, "loss": 0.4623, "step": 5876 }, { "epoch": 7.193390452876377, "grad_norm": 1.7111798491239965, "learning_rate": 2.660430069094397e-06, "loss": 1.1996, "step": 5877 }, { "epoch": 7.194614443084455, "grad_norm": 1.0396945210202908, "learning_rate": 2.6597905050042065e-06, "loss": 0.6421, "step": 5878 }, { "epoch": 7.195838433292534, "grad_norm": 2.013406270396875, "learning_rate": 2.659150930413176e-06, "loss": 0.5062, "step": 5879 }, { "epoch": 7.197062423500612, "grad_norm": 1.3246048634039544, "learning_rate": 2.6585113453633344e-06, "loss": 0.7236, "step": 5880 }, { "epoch": 7.19828641370869, "grad_norm": 0.9990101921829487, "learning_rate": 2.657871749896714e-06, "loss": 0.6484, "step": 5881 }, { "epoch": 7.1995104039167686, "grad_norm": 1.288076751192695, "learning_rate": 2.6572321440553454e-06, "loss": 0.513, "step": 5882 }, { "epoch": 7.200734394124847, "grad_norm": 2.263192158343417, "learning_rate": 2.656592527881262e-06, "loss": 0.3586, "step": 5883 }, { "epoch": 7.201958384332926, "grad_norm": 1.6352870729896452, "learning_rate": 2.6559529014164965e-06, "loss": 0.4557, "step": 5884 }, { "epoch": 7.2031823745410035, "grad_norm": 0.9476113974041005, "learning_rate": 2.6553132647030844e-06, "loss": 0.4031, "step": 5885 }, { "epoch": 7.204406364749082, "grad_norm": 0.9195531689459152, "learning_rate": 2.654673617783058e-06, "loss": 0.5928, "step": 5886 }, { "epoch": 7.205630354957161, "grad_norm": 2.227331124798192, "learning_rate": 2.654033960698454e-06, "loss": 0.8769, "step": 5887 }, { "epoch": 7.2068543451652385, "grad_norm": 1.4416628815693862, "learning_rate": 2.6533942934913077e-06, "loss": 0.9927, "step": 5888 }, { "epoch": 7.208078335373317, "grad_norm": 1.0928965417837828, "learning_rate": 2.6527546162036555e-06, "loss": 0.5947, "step": 5889 }, { "epoch": 7.209302325581396, "grad_norm": 2.2981471921257857, "learning_rate": 2.652114928877535e-06, "loss": 0.2964, "step": 5890 }, { "epoch": 7.2105263157894735, "grad_norm": 1.3102980647399354, "learning_rate": 2.651475231554985e-06, "loss": 0.9764, "step": 5891 }, { "epoch": 7.211750305997552, "grad_norm": 2.3313128639464744, "learning_rate": 2.6508355242780413e-06, "loss": 0.9511, "step": 5892 }, { "epoch": 7.212974296205631, "grad_norm": 1.361908127702271, "learning_rate": 2.6501958070887455e-06, "loss": 0.8778, "step": 5893 }, { "epoch": 7.214198286413708, "grad_norm": 0.6675559554317547, "learning_rate": 2.6495560800291383e-06, "loss": 0.261, "step": 5894 }, { "epoch": 7.215422276621787, "grad_norm": 1.0811648712669348, "learning_rate": 2.648916343141257e-06, "loss": 0.582, "step": 5895 }, { "epoch": 7.216646266829866, "grad_norm": 1.3773367077176948, "learning_rate": 2.648276596467145e-06, "loss": 0.8325, "step": 5896 }, { "epoch": 7.217870257037943, "grad_norm": 1.0346416586004556, "learning_rate": 2.6476368400488434e-06, "loss": 0.3171, "step": 5897 }, { "epoch": 7.219094247246022, "grad_norm": 1.877392196829962, "learning_rate": 2.6469970739283944e-06, "loss": 0.625, "step": 5898 }, { "epoch": 7.220318237454101, "grad_norm": 1.756247564291727, "learning_rate": 2.6463572981478412e-06, "loss": 0.3334, "step": 5899 }, { "epoch": 7.221542227662178, "grad_norm": 1.884875022901362, "learning_rate": 2.645717512749229e-06, "loss": 1.2184, "step": 5900 }, { "epoch": 7.222766217870257, "grad_norm": 1.0973380772363157, "learning_rate": 2.6450777177745996e-06, "loss": 0.5023, "step": 5901 }, { "epoch": 7.223990208078336, "grad_norm": 2.209907119930594, "learning_rate": 2.644437913266e-06, "loss": 0.4843, "step": 5902 }, { "epoch": 7.225214198286413, "grad_norm": 1.2687934738354933, "learning_rate": 2.6437980992654745e-06, "loss": 0.4682, "step": 5903 }, { "epoch": 7.226438188494492, "grad_norm": 2.1804890435223254, "learning_rate": 2.6431582758150705e-06, "loss": 0.5208, "step": 5904 }, { "epoch": 7.2276621787025706, "grad_norm": 1.5865355633507352, "learning_rate": 2.6425184429568333e-06, "loss": 0.7021, "step": 5905 }, { "epoch": 7.228886168910648, "grad_norm": 1.4808872647046532, "learning_rate": 2.6418786007328113e-06, "loss": 1.2383, "step": 5906 }, { "epoch": 7.230110159118727, "grad_norm": 1.2806652204508921, "learning_rate": 2.6412387491850526e-06, "loss": 1.291, "step": 5907 }, { "epoch": 7.2313341493268055, "grad_norm": 2.071364629070407, "learning_rate": 2.6405988883556056e-06, "loss": 0.6058, "step": 5908 }, { "epoch": 7.232558139534884, "grad_norm": 1.0192633611003123, "learning_rate": 2.6399590182865202e-06, "loss": 0.6255, "step": 5909 }, { "epoch": 7.233782129742962, "grad_norm": 1.2026378932120672, "learning_rate": 2.6393191390198447e-06, "loss": 0.5126, "step": 5910 }, { "epoch": 7.2350061199510405, "grad_norm": 1.0104356844195284, "learning_rate": 2.6386792505976314e-06, "loss": 0.6137, "step": 5911 }, { "epoch": 7.236230110159119, "grad_norm": 1.640903824144805, "learning_rate": 2.6380393530619307e-06, "loss": 0.6647, "step": 5912 }, { "epoch": 7.237454100367197, "grad_norm": 1.5307607419372122, "learning_rate": 2.6373994464547947e-06, "loss": 0.673, "step": 5913 }, { "epoch": 7.2386780905752754, "grad_norm": 1.2720899772676348, "learning_rate": 2.6367595308182747e-06, "loss": 0.6244, "step": 5914 }, { "epoch": 7.239902080783354, "grad_norm": 1.467685696574331, "learning_rate": 2.6361196061944244e-06, "loss": 1.1098, "step": 5915 }, { "epoch": 7.241126070991432, "grad_norm": 1.2181300036311464, "learning_rate": 2.635479672625296e-06, "loss": 0.5552, "step": 5916 }, { "epoch": 7.24235006119951, "grad_norm": 1.9832348584423176, "learning_rate": 2.634839730152946e-06, "loss": 0.5662, "step": 5917 }, { "epoch": 7.243574051407589, "grad_norm": 1.5891830929701114, "learning_rate": 2.6341997788194272e-06, "loss": 0.5983, "step": 5918 }, { "epoch": 7.244798041615667, "grad_norm": 1.787364039028482, "learning_rate": 2.6335598186667956e-06, "loss": 0.8687, "step": 5919 }, { "epoch": 7.246022031823745, "grad_norm": 0.9189615488026313, "learning_rate": 2.632919849737107e-06, "loss": 0.5209, "step": 5920 }, { "epoch": 7.247246022031824, "grad_norm": 1.8269469117948611, "learning_rate": 2.6322798720724167e-06, "loss": 0.5502, "step": 5921 }, { "epoch": 7.248470012239902, "grad_norm": 1.7829480355878096, "learning_rate": 2.631639885714783e-06, "loss": 0.5483, "step": 5922 }, { "epoch": 7.24969400244798, "grad_norm": 1.7055412705309752, "learning_rate": 2.630999890706262e-06, "loss": 0.5085, "step": 5923 }, { "epoch": 7.250917992656059, "grad_norm": 1.2682953854879155, "learning_rate": 2.630359887088914e-06, "loss": 1.0128, "step": 5924 }, { "epoch": 7.252141982864137, "grad_norm": 1.5465753148061108, "learning_rate": 2.6297198749047954e-06, "loss": 0.3064, "step": 5925 }, { "epoch": 7.253365973072215, "grad_norm": 1.1236670885975288, "learning_rate": 2.6290798541959665e-06, "loss": 0.7801, "step": 5926 }, { "epoch": 7.254589963280294, "grad_norm": 1.091761526240801, "learning_rate": 2.628439825004487e-06, "loss": 1.0497, "step": 5927 }, { "epoch": 7.2558139534883725, "grad_norm": 1.2859221097143547, "learning_rate": 2.627799787372418e-06, "loss": 0.8181, "step": 5928 }, { "epoch": 7.25703794369645, "grad_norm": 2.0907452929278243, "learning_rate": 2.6271597413418197e-06, "loss": 0.5141, "step": 5929 }, { "epoch": 7.258261933904529, "grad_norm": 1.3245765287036302, "learning_rate": 2.6265196869547533e-06, "loss": 0.6607, "step": 5930 }, { "epoch": 7.2594859241126075, "grad_norm": 0.9908278544764718, "learning_rate": 2.62587962425328e-06, "loss": 0.5812, "step": 5931 }, { "epoch": 7.260709914320685, "grad_norm": 1.0146535435761823, "learning_rate": 2.625239553279465e-06, "loss": 0.6024, "step": 5932 }, { "epoch": 7.261933904528764, "grad_norm": 1.4003264418018755, "learning_rate": 2.624599474075369e-06, "loss": 0.3057, "step": 5933 }, { "epoch": 7.2631578947368425, "grad_norm": 1.5714193863852155, "learning_rate": 2.623959386683056e-06, "loss": 1.1716, "step": 5934 }, { "epoch": 7.26438188494492, "grad_norm": 0.755908209444401, "learning_rate": 2.6233192911445916e-06, "loss": 0.4384, "step": 5935 }, { "epoch": 7.265605875152999, "grad_norm": 1.2231739750509312, "learning_rate": 2.6226791875020384e-06, "loss": 1.5779, "step": 5936 }, { "epoch": 7.2668298653610774, "grad_norm": 2.6746240235651935, "learning_rate": 2.6220390757974633e-06, "loss": 0.344, "step": 5937 }, { "epoch": 7.268053855569155, "grad_norm": 1.324689355851732, "learning_rate": 2.621398956072932e-06, "loss": 0.6377, "step": 5938 }, { "epoch": 7.269277845777234, "grad_norm": 1.56170755267866, "learning_rate": 2.6207588283705097e-06, "loss": 1.4327, "step": 5939 }, { "epoch": 7.270501835985312, "grad_norm": 1.5984482975105423, "learning_rate": 2.6201186927322635e-06, "loss": 1.4181, "step": 5940 }, { "epoch": 7.27172582619339, "grad_norm": 0.9832418775241379, "learning_rate": 2.6194785492002624e-06, "loss": 0.7188, "step": 5941 }, { "epoch": 7.272949816401469, "grad_norm": 1.6637590657795454, "learning_rate": 2.6188383978165716e-06, "loss": 0.7225, "step": 5942 }, { "epoch": 7.274173806609547, "grad_norm": 2.433531763712246, "learning_rate": 2.618198238623261e-06, "loss": 0.5257, "step": 5943 }, { "epoch": 7.275397796817625, "grad_norm": 2.031816040792014, "learning_rate": 2.6175580716624003e-06, "loss": 0.5394, "step": 5944 }, { "epoch": 7.276621787025704, "grad_norm": 2.536938681651498, "learning_rate": 2.6169178969760572e-06, "loss": 0.8441, "step": 5945 }, { "epoch": 7.277845777233782, "grad_norm": 1.2830184813922128, "learning_rate": 2.616277714606303e-06, "loss": 0.5582, "step": 5946 }, { "epoch": 7.27906976744186, "grad_norm": 2.72375927826823, "learning_rate": 2.615637524595207e-06, "loss": 0.4874, "step": 5947 }, { "epoch": 7.280293757649939, "grad_norm": 2.025424088420344, "learning_rate": 2.6149973269848408e-06, "loss": 0.6501, "step": 5948 }, { "epoch": 7.281517747858017, "grad_norm": 1.7579887450282297, "learning_rate": 2.6143571218172747e-06, "loss": 0.607, "step": 5949 }, { "epoch": 7.282741738066095, "grad_norm": 1.296260958604775, "learning_rate": 2.6137169091345833e-06, "loss": 0.6268, "step": 5950 }, { "epoch": 7.283965728274174, "grad_norm": 2.1291471912702944, "learning_rate": 2.6130766889788357e-06, "loss": 0.5655, "step": 5951 }, { "epoch": 7.285189718482252, "grad_norm": 2.5245829321509876, "learning_rate": 2.6124364613921066e-06, "loss": 0.4693, "step": 5952 }, { "epoch": 7.286413708690331, "grad_norm": 1.717234010234117, "learning_rate": 2.61179622641647e-06, "loss": 0.5375, "step": 5953 }, { "epoch": 7.287637698898409, "grad_norm": 1.6623661177813271, "learning_rate": 2.6111559840939983e-06, "loss": 0.489, "step": 5954 }, { "epoch": 7.288861689106487, "grad_norm": 1.2960466308787184, "learning_rate": 2.610515734466766e-06, "loss": 1.1256, "step": 5955 }, { "epoch": 7.290085679314566, "grad_norm": 1.2281747926141695, "learning_rate": 2.6098754775768494e-06, "loss": 0.516, "step": 5956 }, { "epoch": 7.291309669522644, "grad_norm": 1.4678302051065844, "learning_rate": 2.6092352134663222e-06, "loss": 0.681, "step": 5957 }, { "epoch": 7.292533659730722, "grad_norm": 2.002304125943287, "learning_rate": 2.6085949421772607e-06, "loss": 0.5953, "step": 5958 }, { "epoch": 7.293757649938801, "grad_norm": 2.1409577108970095, "learning_rate": 2.6079546637517416e-06, "loss": 0.6231, "step": 5959 }, { "epoch": 7.2949816401468786, "grad_norm": 1.4855107135850432, "learning_rate": 2.6073143782318412e-06, "loss": 0.8634, "step": 5960 }, { "epoch": 7.296205630354957, "grad_norm": 1.7539476588827991, "learning_rate": 2.606674085659637e-06, "loss": 0.6078, "step": 5961 }, { "epoch": 7.297429620563036, "grad_norm": 1.2106428384136332, "learning_rate": 2.6060337860772054e-06, "loss": 0.3185, "step": 5962 }, { "epoch": 7.2986536107711135, "grad_norm": 0.981218988353405, "learning_rate": 2.605393479526627e-06, "loss": 0.6046, "step": 5963 }, { "epoch": 7.299877600979192, "grad_norm": 1.6016474203887976, "learning_rate": 2.604753166049978e-06, "loss": 0.8943, "step": 5964 }, { "epoch": 7.301101591187271, "grad_norm": 1.5208217593969724, "learning_rate": 2.604112845689339e-06, "loss": 0.5869, "step": 5965 }, { "epoch": 7.3023255813953485, "grad_norm": 1.0932391912893304, "learning_rate": 2.6034725184867883e-06, "loss": 0.4406, "step": 5966 }, { "epoch": 7.303549571603427, "grad_norm": 2.6431264067426596, "learning_rate": 2.602832184484407e-06, "loss": 0.596, "step": 5967 }, { "epoch": 7.304773561811506, "grad_norm": 1.9138816723419527, "learning_rate": 2.602191843724274e-06, "loss": 1.1225, "step": 5968 }, { "epoch": 7.3059975520195835, "grad_norm": 0.6847921245352586, "learning_rate": 2.6015514962484716e-06, "loss": 0.2228, "step": 5969 }, { "epoch": 7.307221542227662, "grad_norm": 1.4688405815860193, "learning_rate": 2.6009111420990808e-06, "loss": 0.4803, "step": 5970 }, { "epoch": 7.308445532435741, "grad_norm": 0.9951747235405431, "learning_rate": 2.6002707813181825e-06, "loss": 0.6129, "step": 5971 }, { "epoch": 7.309669522643819, "grad_norm": 1.014353845365715, "learning_rate": 2.5996304139478597e-06, "loss": 0.5061, "step": 5972 }, { "epoch": 7.310893512851897, "grad_norm": 1.9414572846582285, "learning_rate": 2.5989900400301945e-06, "loss": 0.6188, "step": 5973 }, { "epoch": 7.312117503059976, "grad_norm": 1.4018957202375015, "learning_rate": 2.59834965960727e-06, "loss": 0.7583, "step": 5974 }, { "epoch": 7.313341493268053, "grad_norm": 2.3622873183132382, "learning_rate": 2.597709272721169e-06, "loss": 0.8148, "step": 5975 }, { "epoch": 7.314565483476132, "grad_norm": 1.2311652248125766, "learning_rate": 2.5970688794139763e-06, "loss": 0.4426, "step": 5976 }, { "epoch": 7.315789473684211, "grad_norm": 2.3853226213476018, "learning_rate": 2.596428479727776e-06, "loss": 0.973, "step": 5977 }, { "epoch": 7.317013463892289, "grad_norm": 3.0120655541927737, "learning_rate": 2.595788073704653e-06, "loss": 0.3755, "step": 5978 }, { "epoch": 7.318237454100367, "grad_norm": 2.1868471704892785, "learning_rate": 2.595147661386692e-06, "loss": 0.4996, "step": 5979 }, { "epoch": 7.319461444308446, "grad_norm": 2.004182219973011, "learning_rate": 2.5945072428159783e-06, "loss": 0.6209, "step": 5980 }, { "epoch": 7.320685434516524, "grad_norm": 0.9648632114561964, "learning_rate": 2.5938668180345983e-06, "loss": 0.6395, "step": 5981 }, { "epoch": 7.321909424724602, "grad_norm": 1.4389759625970304, "learning_rate": 2.5932263870846386e-06, "loss": 0.5362, "step": 5982 }, { "epoch": 7.3231334149326806, "grad_norm": 1.0769278770404218, "learning_rate": 2.5925859500081858e-06, "loss": 0.4523, "step": 5983 }, { "epoch": 7.324357405140759, "grad_norm": 2.46873006709113, "learning_rate": 2.5919455068473254e-06, "loss": 0.4443, "step": 5984 }, { "epoch": 7.325581395348837, "grad_norm": 1.6602813380514658, "learning_rate": 2.591305057644148e-06, "loss": 0.5221, "step": 5985 }, { "epoch": 7.3268053855569155, "grad_norm": 0.8371027727180844, "learning_rate": 2.5906646024407385e-06, "loss": 0.5304, "step": 5986 }, { "epoch": 7.328029375764994, "grad_norm": 1.7377561833099442, "learning_rate": 2.5900241412791876e-06, "loss": 0.4555, "step": 5987 }, { "epoch": 7.329253365973072, "grad_norm": 1.6330951714974256, "learning_rate": 2.589383674201583e-06, "loss": 0.6249, "step": 5988 }, { "epoch": 7.3304773561811505, "grad_norm": 1.7607216820367415, "learning_rate": 2.5887432012500134e-06, "loss": 0.4961, "step": 5989 }, { "epoch": 7.331701346389229, "grad_norm": 1.9510708580996678, "learning_rate": 2.588102722466569e-06, "loss": 0.827, "step": 5990 }, { "epoch": 7.332925336597307, "grad_norm": 1.313933161242551, "learning_rate": 2.58746223789334e-06, "loss": 0.8067, "step": 5991 }, { "epoch": 7.3341493268053854, "grad_norm": 1.2796146010636669, "learning_rate": 2.5868217475724157e-06, "loss": 0.6088, "step": 5992 }, { "epoch": 7.335373317013464, "grad_norm": 1.4422793519326782, "learning_rate": 2.586181251545888e-06, "loss": 1.172, "step": 5993 }, { "epoch": 7.336597307221542, "grad_norm": 0.8816985204942966, "learning_rate": 2.5855407498558467e-06, "loss": 0.4529, "step": 5994 }, { "epoch": 7.33782129742962, "grad_norm": 1.581866011162923, "learning_rate": 2.5849002425443837e-06, "loss": 0.4768, "step": 5995 }, { "epoch": 7.339045287637699, "grad_norm": 1.212705674231151, "learning_rate": 2.584259729653591e-06, "loss": 0.5177, "step": 5996 }, { "epoch": 7.340269277845778, "grad_norm": 0.8489667315689776, "learning_rate": 2.5836192112255603e-06, "loss": 0.4455, "step": 5997 }, { "epoch": 7.341493268053855, "grad_norm": 2.301183042340691, "learning_rate": 2.5829786873023844e-06, "loss": 0.4969, "step": 5998 }, { "epoch": 7.342717258261934, "grad_norm": 1.008677662090527, "learning_rate": 2.5823381579261556e-06, "loss": 0.6714, "step": 5999 }, { "epoch": 7.343941248470013, "grad_norm": 1.6874019047827935, "learning_rate": 2.581697623138969e-06, "loss": 0.6237, "step": 6000 }, { "epoch": 7.34516523867809, "grad_norm": 2.215407184418501, "learning_rate": 2.581057082982915e-06, "loss": 0.731, "step": 6001 }, { "epoch": 7.346389228886169, "grad_norm": 2.4253753179489266, "learning_rate": 2.5804165375000904e-06, "loss": 0.4602, "step": 6002 }, { "epoch": 7.347613219094248, "grad_norm": 1.6370392655039696, "learning_rate": 2.5797759867325884e-06, "loss": 0.9677, "step": 6003 }, { "epoch": 7.348837209302325, "grad_norm": 1.5754950385118511, "learning_rate": 2.5791354307225034e-06, "loss": 1.4106, "step": 6004 }, { "epoch": 7.350061199510404, "grad_norm": 1.833278887728968, "learning_rate": 2.5784948695119305e-06, "loss": 0.8468, "step": 6005 }, { "epoch": 7.3512851897184825, "grad_norm": 1.340711200342193, "learning_rate": 2.5778543031429655e-06, "loss": 0.6134, "step": 6006 }, { "epoch": 7.35250917992656, "grad_norm": 1.3162518796254616, "learning_rate": 2.577213731657703e-06, "loss": 0.3344, "step": 6007 }, { "epoch": 7.353733170134639, "grad_norm": 1.73884636872668, "learning_rate": 2.5765731550982403e-06, "loss": 0.3819, "step": 6008 }, { "epoch": 7.3549571603427175, "grad_norm": 1.5559656779269873, "learning_rate": 2.575932573506673e-06, "loss": 0.4208, "step": 6009 }, { "epoch": 7.356181150550795, "grad_norm": 1.6185953193398317, "learning_rate": 2.5752919869250977e-06, "loss": 1.6089, "step": 6010 }, { "epoch": 7.357405140758874, "grad_norm": 1.4542937513659768, "learning_rate": 2.5746513953956115e-06, "loss": 0.4844, "step": 6011 }, { "epoch": 7.3586291309669525, "grad_norm": 1.2174560620722166, "learning_rate": 2.5740107989603116e-06, "loss": 0.7816, "step": 6012 }, { "epoch": 7.35985312117503, "grad_norm": 1.6340708255199425, "learning_rate": 2.5733701976612956e-06, "loss": 1.471, "step": 6013 }, { "epoch": 7.361077111383109, "grad_norm": 1.7273296411214234, "learning_rate": 2.5727295915406614e-06, "loss": 0.5708, "step": 6014 }, { "epoch": 7.3623011015911874, "grad_norm": 2.6164929763134586, "learning_rate": 2.5720889806405087e-06, "loss": 0.3668, "step": 6015 }, { "epoch": 7.363525091799266, "grad_norm": 0.9640342274610135, "learning_rate": 2.5714483650029333e-06, "loss": 0.5473, "step": 6016 }, { "epoch": 7.364749082007344, "grad_norm": 1.3860637429164129, "learning_rate": 2.5708077446700363e-06, "loss": 0.9244, "step": 6017 }, { "epoch": 7.365973072215422, "grad_norm": 1.4351166462162042, "learning_rate": 2.5701671196839162e-06, "loss": 0.5123, "step": 6018 }, { "epoch": 7.3671970624235, "grad_norm": 3.1295697945608487, "learning_rate": 2.569526490086673e-06, "loss": 0.4442, "step": 6019 }, { "epoch": 7.368421052631579, "grad_norm": 1.7943172655387614, "learning_rate": 2.5688858559204056e-06, "loss": 0.7769, "step": 6020 }, { "epoch": 7.369645042839657, "grad_norm": 2.317596419975166, "learning_rate": 2.5682452172272143e-06, "loss": 0.8776, "step": 6021 }, { "epoch": 7.370869033047736, "grad_norm": 1.3949180133138774, "learning_rate": 2.5676045740492e-06, "loss": 0.8289, "step": 6022 }, { "epoch": 7.372093023255814, "grad_norm": 1.9283980290116884, "learning_rate": 2.5669639264284635e-06, "loss": 0.5892, "step": 6023 }, { "epoch": 7.373317013463892, "grad_norm": 1.1589183346938958, "learning_rate": 2.566323274407105e-06, "loss": 0.5916, "step": 6024 }, { "epoch": 7.374541003671971, "grad_norm": 0.7931431294781681, "learning_rate": 2.5656826180272254e-06, "loss": 0.3552, "step": 6025 }, { "epoch": 7.375764993880049, "grad_norm": 1.5628744207451057, "learning_rate": 2.565041957330928e-06, "loss": 0.4626, "step": 6026 }, { "epoch": 7.376988984088127, "grad_norm": 2.702241728521057, "learning_rate": 2.564401292360314e-06, "loss": 0.4195, "step": 6027 }, { "epoch": 7.378212974296206, "grad_norm": 1.2085926337562602, "learning_rate": 2.5637606231574854e-06, "loss": 0.4061, "step": 6028 }, { "epoch": 7.379436964504284, "grad_norm": 1.7875288677743737, "learning_rate": 2.5631199497645437e-06, "loss": 0.5626, "step": 6029 }, { "epoch": 7.380660954712362, "grad_norm": 1.778969673265318, "learning_rate": 2.5624792722235924e-06, "loss": 0.778, "step": 6030 }, { "epoch": 7.381884944920441, "grad_norm": 1.4537225247494032, "learning_rate": 2.5618385905767348e-06, "loss": 0.8705, "step": 6031 }, { "epoch": 7.383108935128519, "grad_norm": 2.2894793684285157, "learning_rate": 2.561197904866074e-06, "loss": 0.3559, "step": 6032 }, { "epoch": 7.384332925336597, "grad_norm": 1.959485171071054, "learning_rate": 2.560557215133713e-06, "loss": 0.5003, "step": 6033 }, { "epoch": 7.385556915544676, "grad_norm": 2.563648503324109, "learning_rate": 2.5599165214217554e-06, "loss": 0.4882, "step": 6034 }, { "epoch": 7.386780905752754, "grad_norm": 2.031621338900395, "learning_rate": 2.559275823772306e-06, "loss": 0.342, "step": 6035 }, { "epoch": 7.388004895960832, "grad_norm": 1.327101003354006, "learning_rate": 2.5586351222274687e-06, "loss": 0.6587, "step": 6036 }, { "epoch": 7.389228886168911, "grad_norm": 1.5013341551652672, "learning_rate": 2.557994416829348e-06, "loss": 0.5072, "step": 6037 }, { "epoch": 7.3904528763769886, "grad_norm": 1.576727615781655, "learning_rate": 2.557353707620049e-06, "loss": 0.593, "step": 6038 }, { "epoch": 7.391676866585067, "grad_norm": 1.4368151303979397, "learning_rate": 2.5567129946416765e-06, "loss": 0.7033, "step": 6039 }, { "epoch": 7.392900856793146, "grad_norm": 2.0002799647481324, "learning_rate": 2.5560722779363345e-06, "loss": 0.4908, "step": 6040 }, { "epoch": 7.394124847001224, "grad_norm": 2.472582335810784, "learning_rate": 2.5554315575461314e-06, "loss": 0.4673, "step": 6041 }, { "epoch": 7.395348837209302, "grad_norm": 1.2782960917000092, "learning_rate": 2.5547908335131704e-06, "loss": 0.5448, "step": 6042 }, { "epoch": 7.396572827417381, "grad_norm": 2.350259984611304, "learning_rate": 2.554150105879559e-06, "loss": 0.3804, "step": 6043 }, { "epoch": 7.397796817625459, "grad_norm": 2.0428847989715573, "learning_rate": 2.5535093746874025e-06, "loss": 0.484, "step": 6044 }, { "epoch": 7.399020807833537, "grad_norm": 2.563410262197895, "learning_rate": 2.5528686399788084e-06, "loss": 0.5497, "step": 6045 }, { "epoch": 7.400244798041616, "grad_norm": 2.064452724969936, "learning_rate": 2.5522279017958825e-06, "loss": 1.0837, "step": 6046 }, { "epoch": 7.401468788249694, "grad_norm": 1.3582553212127324, "learning_rate": 2.5515871601807317e-06, "loss": 0.4853, "step": 6047 }, { "epoch": 7.402692778457772, "grad_norm": 1.4551110282720963, "learning_rate": 2.550946415175464e-06, "loss": 0.6064, "step": 6048 }, { "epoch": 7.403916768665851, "grad_norm": 1.307933988581647, "learning_rate": 2.550305666822186e-06, "loss": 0.3652, "step": 6049 }, { "epoch": 7.405140758873929, "grad_norm": 2.3156079333811928, "learning_rate": 2.5496649151630066e-06, "loss": 0.5322, "step": 6050 }, { "epoch": 7.406364749082007, "grad_norm": 2.157254253031142, "learning_rate": 2.5490241602400314e-06, "loss": 0.4637, "step": 6051 }, { "epoch": 7.407588739290086, "grad_norm": 1.3861457953854517, "learning_rate": 2.54838340209537e-06, "loss": 0.6231, "step": 6052 }, { "epoch": 7.408812729498164, "grad_norm": 1.8153677261853844, "learning_rate": 2.547742640771131e-06, "loss": 0.7423, "step": 6053 }, { "epoch": 7.410036719706242, "grad_norm": 2.2002649238526257, "learning_rate": 2.5471018763094217e-06, "loss": 0.5958, "step": 6054 }, { "epoch": 7.411260709914321, "grad_norm": 1.4210700281835575, "learning_rate": 2.5464611087523512e-06, "loss": 1.0351, "step": 6055 }, { "epoch": 7.412484700122399, "grad_norm": 1.3243872910376806, "learning_rate": 2.5458203381420293e-06, "loss": 0.5714, "step": 6056 }, { "epoch": 7.413708690330477, "grad_norm": 1.1507425252721053, "learning_rate": 2.545179564520563e-06, "loss": 0.6652, "step": 6057 }, { "epoch": 7.414932680538556, "grad_norm": 2.1944067274760153, "learning_rate": 2.5445387879300635e-06, "loss": 0.441, "step": 6058 }, { "epoch": 7.416156670746634, "grad_norm": 1.8559542235995465, "learning_rate": 2.54389800841264e-06, "loss": 1.084, "step": 6059 }, { "epoch": 7.417380660954713, "grad_norm": 1.7419001103062157, "learning_rate": 2.5432572260104015e-06, "loss": 0.6576, "step": 6060 }, { "epoch": 7.4186046511627906, "grad_norm": 2.177037347828862, "learning_rate": 2.542616440765458e-06, "loss": 0.5749, "step": 6061 }, { "epoch": 7.419828641370869, "grad_norm": 1.773442561004639, "learning_rate": 2.5419756527199197e-06, "loss": 0.5626, "step": 6062 }, { "epoch": 7.421052631578947, "grad_norm": 2.261743758341832, "learning_rate": 2.5413348619158966e-06, "loss": 1.2993, "step": 6063 }, { "epoch": 7.4222766217870255, "grad_norm": 1.6215971394101416, "learning_rate": 2.540694068395499e-06, "loss": 0.4993, "step": 6064 }, { "epoch": 7.423500611995104, "grad_norm": 1.661564268430868, "learning_rate": 2.5400532722008387e-06, "loss": 0.3755, "step": 6065 }, { "epoch": 7.424724602203183, "grad_norm": 2.0565498046155204, "learning_rate": 2.5394124733740244e-06, "loss": 1.4262, "step": 6066 }, { "epoch": 7.4259485924112605, "grad_norm": 1.1584912495717106, "learning_rate": 2.538771671957169e-06, "loss": 0.6035, "step": 6067 }, { "epoch": 7.427172582619339, "grad_norm": 1.114414684605462, "learning_rate": 2.5381308679923823e-06, "loss": 0.9898, "step": 6068 }, { "epoch": 7.428396572827418, "grad_norm": 1.115430515591259, "learning_rate": 2.537490061521776e-06, "loss": 0.5815, "step": 6069 }, { "epoch": 7.4296205630354955, "grad_norm": 2.056839365082865, "learning_rate": 2.5368492525874618e-06, "loss": 1.0949, "step": 6070 }, { "epoch": 7.430844553243574, "grad_norm": 1.3166015936150672, "learning_rate": 2.53620844123155e-06, "loss": 1.0839, "step": 6071 }, { "epoch": 7.432068543451653, "grad_norm": 1.4857091172164065, "learning_rate": 2.5355676274961545e-06, "loss": 1.1826, "step": 6072 }, { "epoch": 7.43329253365973, "grad_norm": 3.3417370795016774, "learning_rate": 2.534926811423386e-06, "loss": 0.3828, "step": 6073 }, { "epoch": 7.434516523867809, "grad_norm": 2.6423860827186, "learning_rate": 2.5342859930553565e-06, "loss": 0.3655, "step": 6074 }, { "epoch": 7.435740514075888, "grad_norm": 1.0231370277035152, "learning_rate": 2.533645172434178e-06, "loss": 0.7014, "step": 6075 }, { "epoch": 7.436964504283965, "grad_norm": 0.8758688470822585, "learning_rate": 2.5330043496019627e-06, "loss": 0.4196, "step": 6076 }, { "epoch": 7.438188494492044, "grad_norm": 1.3175164223023497, "learning_rate": 2.532363524600825e-06, "loss": 0.6184, "step": 6077 }, { "epoch": 7.439412484700123, "grad_norm": 1.5641720443453775, "learning_rate": 2.5317226974728755e-06, "loss": 0.6691, "step": 6078 }, { "epoch": 7.4406364749082, "grad_norm": 2.473677380793289, "learning_rate": 2.5310818682602272e-06, "loss": 0.3745, "step": 6079 }, { "epoch": 7.441860465116279, "grad_norm": 1.0078888435839604, "learning_rate": 2.530441037004994e-06, "loss": 0.5138, "step": 6080 }, { "epoch": 7.443084455324358, "grad_norm": 1.2697819413615676, "learning_rate": 2.529800203749288e-06, "loss": 0.6325, "step": 6081 }, { "epoch": 7.444308445532435, "grad_norm": 1.261019962161274, "learning_rate": 2.529159368535224e-06, "loss": 0.8768, "step": 6082 }, { "epoch": 7.445532435740514, "grad_norm": 1.4605885455653669, "learning_rate": 2.528518531404913e-06, "loss": 0.5653, "step": 6083 }, { "epoch": 7.4467564259485926, "grad_norm": 1.3866405830366417, "learning_rate": 2.52787769240047e-06, "loss": 0.3686, "step": 6084 }, { "epoch": 7.447980416156671, "grad_norm": 2.1603253230839345, "learning_rate": 2.5272368515640084e-06, "loss": 0.4335, "step": 6085 }, { "epoch": 7.449204406364749, "grad_norm": 2.0814573678520008, "learning_rate": 2.5265960089376413e-06, "loss": 0.9118, "step": 6086 }, { "epoch": 7.4504283965728275, "grad_norm": 1.428007629846331, "learning_rate": 2.5259551645634828e-06, "loss": 0.5262, "step": 6087 }, { "epoch": 7.451652386780906, "grad_norm": 1.7400594089519827, "learning_rate": 2.525314318483647e-06, "loss": 0.553, "step": 6088 }, { "epoch": 7.452876376988984, "grad_norm": 2.359503683843971, "learning_rate": 2.524673470740248e-06, "loss": 0.9138, "step": 6089 }, { "epoch": 7.4541003671970625, "grad_norm": 2.253000641667843, "learning_rate": 2.5240326213753995e-06, "loss": 0.6265, "step": 6090 }, { "epoch": 7.455324357405141, "grad_norm": 0.9975225721173564, "learning_rate": 2.5233917704312167e-06, "loss": 0.6489, "step": 6091 }, { "epoch": 7.456548347613219, "grad_norm": 1.135679768286183, "learning_rate": 2.5227509179498123e-06, "loss": 0.8544, "step": 6092 }, { "epoch": 7.4577723378212974, "grad_norm": 1.5668430875013584, "learning_rate": 2.522110063973302e-06, "loss": 0.5319, "step": 6093 }, { "epoch": 7.458996328029376, "grad_norm": 1.5782835417774252, "learning_rate": 2.5214692085438003e-06, "loss": 0.5376, "step": 6094 }, { "epoch": 7.460220318237454, "grad_norm": 1.4155577174887026, "learning_rate": 2.5208283517034216e-06, "loss": 1.5809, "step": 6095 }, { "epoch": 7.461444308445532, "grad_norm": 0.9646864837251788, "learning_rate": 2.5201874934942804e-06, "loss": 0.5343, "step": 6096 }, { "epoch": 7.462668298653611, "grad_norm": 1.3259814230881448, "learning_rate": 2.519546633958492e-06, "loss": 1.3215, "step": 6097 }, { "epoch": 7.463892288861689, "grad_norm": 1.343208098366222, "learning_rate": 2.5189057731381716e-06, "loss": 1.2401, "step": 6098 }, { "epoch": 7.465116279069767, "grad_norm": 1.7666065002358464, "learning_rate": 2.5182649110754325e-06, "loss": 0.9722, "step": 6099 }, { "epoch": 7.466340269277846, "grad_norm": 1.4353883403584344, "learning_rate": 2.5176240478123926e-06, "loss": 0.5292, "step": 6100 }, { "epoch": 7.467564259485924, "grad_norm": 2.6125114949282318, "learning_rate": 2.516983183391164e-06, "loss": 0.9789, "step": 6101 }, { "epoch": 7.468788249694002, "grad_norm": 1.1786854395072885, "learning_rate": 2.5163423178538643e-06, "loss": 0.5218, "step": 6102 }, { "epoch": 7.470012239902081, "grad_norm": 1.5921586691449594, "learning_rate": 2.5157014512426074e-06, "loss": 0.6438, "step": 6103 }, { "epoch": 7.47123623011016, "grad_norm": 1.4335768291245108, "learning_rate": 2.5150605835995097e-06, "loss": 1.181, "step": 6104 }, { "epoch": 7.472460220318237, "grad_norm": 1.600357431311894, "learning_rate": 2.514419714966685e-06, "loss": 0.9433, "step": 6105 }, { "epoch": 7.473684210526316, "grad_norm": 2.634090099506393, "learning_rate": 2.5137788453862515e-06, "loss": 0.4718, "step": 6106 }, { "epoch": 7.474908200734394, "grad_norm": 1.1243857294647757, "learning_rate": 2.513137974900322e-06, "loss": 0.6561, "step": 6107 }, { "epoch": 7.476132190942472, "grad_norm": 1.0107949875225157, "learning_rate": 2.5124971035510143e-06, "loss": 0.5745, "step": 6108 }, { "epoch": 7.477356181150551, "grad_norm": 1.682807901670551, "learning_rate": 2.5118562313804433e-06, "loss": 0.9745, "step": 6109 }, { "epoch": 7.4785801713586295, "grad_norm": 1.8956441938366775, "learning_rate": 2.5112153584307237e-06, "loss": 0.5204, "step": 6110 }, { "epoch": 7.479804161566707, "grad_norm": 1.7273283634810426, "learning_rate": 2.5105744847439727e-06, "loss": 0.5925, "step": 6111 }, { "epoch": 7.481028151774786, "grad_norm": 2.519069616280341, "learning_rate": 2.5099336103623057e-06, "loss": 0.4316, "step": 6112 }, { "epoch": 7.4822521419828645, "grad_norm": 1.0888019478896982, "learning_rate": 2.5092927353278385e-06, "loss": 0.5887, "step": 6113 }, { "epoch": 7.483476132190942, "grad_norm": 1.307803378318719, "learning_rate": 2.508651859682687e-06, "loss": 0.5175, "step": 6114 }, { "epoch": 7.484700122399021, "grad_norm": 1.2957058222450901, "learning_rate": 2.508010983468968e-06, "loss": 0.5343, "step": 6115 }, { "epoch": 7.4859241126070994, "grad_norm": 2.473302128036359, "learning_rate": 2.5073701067287957e-06, "loss": 0.6085, "step": 6116 }, { "epoch": 7.487148102815177, "grad_norm": 1.9825347141532652, "learning_rate": 2.506729229504288e-06, "loss": 1.3518, "step": 6117 }, { "epoch": 7.488372093023256, "grad_norm": 2.4278534696682805, "learning_rate": 2.50608835183756e-06, "loss": 0.452, "step": 6118 }, { "epoch": 7.489596083231334, "grad_norm": 1.4042621705679088, "learning_rate": 2.505447473770728e-06, "loss": 0.4876, "step": 6119 }, { "epoch": 7.490820073439412, "grad_norm": 2.853619906164199, "learning_rate": 2.5048065953459084e-06, "loss": 0.4658, "step": 6120 }, { "epoch": 7.492044063647491, "grad_norm": 1.0801733655163022, "learning_rate": 2.5041657166052167e-06, "loss": 0.4502, "step": 6121 }, { "epoch": 7.493268053855569, "grad_norm": 1.301191635370929, "learning_rate": 2.50352483759077e-06, "loss": 0.9583, "step": 6122 }, { "epoch": 7.494492044063647, "grad_norm": 1.3723130538358295, "learning_rate": 2.502883958344683e-06, "loss": 0.4184, "step": 6123 }, { "epoch": 7.495716034271726, "grad_norm": 1.5822303801143285, "learning_rate": 2.5022430789090744e-06, "loss": 0.5624, "step": 6124 }, { "epoch": 7.496940024479804, "grad_norm": 1.7427664277732544, "learning_rate": 2.5016021993260576e-06, "loss": 0.5342, "step": 6125 }, { "epoch": 7.498164014687882, "grad_norm": 1.749077395392347, "learning_rate": 2.500961319637751e-06, "loss": 0.5482, "step": 6126 }, { "epoch": 7.499388004895961, "grad_norm": 1.4061645417990274, "learning_rate": 2.50032043988627e-06, "loss": 0.4169, "step": 6127 }, { "epoch": 7.500611995104039, "grad_norm": 1.2150993147614775, "learning_rate": 2.4996795601137306e-06, "loss": 0.747, "step": 6128 }, { "epoch": 7.501835985312118, "grad_norm": 2.3880122818659246, "learning_rate": 2.4990386803622496e-06, "loss": 0.936, "step": 6129 }, { "epoch": 7.503059975520196, "grad_norm": 1.494976701947248, "learning_rate": 2.4983978006739423e-06, "loss": 0.9921, "step": 6130 }, { "epoch": 7.504283965728274, "grad_norm": 1.8449890083394016, "learning_rate": 2.4977569210909264e-06, "loss": 0.6514, "step": 6131 }, { "epoch": 7.505507955936353, "grad_norm": 2.324051551638898, "learning_rate": 2.4971160416553175e-06, "loss": 0.4027, "step": 6132 }, { "epoch": 7.506731946144431, "grad_norm": 1.3527543558257142, "learning_rate": 2.4964751624092313e-06, "loss": 0.5866, "step": 6133 }, { "epoch": 7.507955936352509, "grad_norm": 1.4325695097896, "learning_rate": 2.4958342833947845e-06, "loss": 1.2231, "step": 6134 }, { "epoch": 7.509179926560588, "grad_norm": 1.137352236656296, "learning_rate": 2.4951934046540925e-06, "loss": 0.5679, "step": 6135 }, { "epoch": 7.510403916768666, "grad_norm": 1.8264961678546414, "learning_rate": 2.494552526229273e-06, "loss": 1.0493, "step": 6136 }, { "epoch": 7.511627906976744, "grad_norm": 1.7644000961128532, "learning_rate": 2.493911648162441e-06, "loss": 0.3761, "step": 6137 }, { "epoch": 7.512851897184823, "grad_norm": 1.381992645957713, "learning_rate": 2.493270770495713e-06, "loss": 1.2692, "step": 6138 }, { "epoch": 7.5140758873929006, "grad_norm": 1.4655481617006292, "learning_rate": 2.4926298932712047e-06, "loss": 0.4009, "step": 6139 }, { "epoch": 7.515299877600979, "grad_norm": 1.1307880941080095, "learning_rate": 2.491989016531033e-06, "loss": 0.7596, "step": 6140 }, { "epoch": 7.516523867809058, "grad_norm": 1.9805973774014445, "learning_rate": 2.491348140317314e-06, "loss": 0.5981, "step": 6141 }, { "epoch": 7.5177478580171355, "grad_norm": 1.4961849676718475, "learning_rate": 2.4907072646721624e-06, "loss": 0.5663, "step": 6142 }, { "epoch": 7.518971848225214, "grad_norm": 1.8049682202330257, "learning_rate": 2.4900663896376943e-06, "loss": 1.1664, "step": 6143 }, { "epoch": 7.520195838433293, "grad_norm": 1.585377021525601, "learning_rate": 2.4894255152560277e-06, "loss": 0.7413, "step": 6144 }, { "epoch": 7.5214198286413705, "grad_norm": 1.0410506622265685, "learning_rate": 2.488784641569277e-06, "loss": 1.2065, "step": 6145 }, { "epoch": 7.522643818849449, "grad_norm": 0.8798615939374048, "learning_rate": 2.488143768619558e-06, "loss": 0.4493, "step": 6146 }, { "epoch": 7.523867809057528, "grad_norm": 2.04603809562867, "learning_rate": 2.4875028964489866e-06, "loss": 0.9204, "step": 6147 }, { "epoch": 7.525091799265606, "grad_norm": 1.0347449185868562, "learning_rate": 2.486862025099678e-06, "loss": 0.754, "step": 6148 }, { "epoch": 7.526315789473684, "grad_norm": 2.0068491129132755, "learning_rate": 2.486221154613749e-06, "loss": 0.9766, "step": 6149 }, { "epoch": 7.527539779681763, "grad_norm": 1.7001312515044822, "learning_rate": 2.4855802850333154e-06, "loss": 0.4098, "step": 6150 }, { "epoch": 7.52876376988984, "grad_norm": 1.2976965266991234, "learning_rate": 2.4849394164004915e-06, "loss": 0.3456, "step": 6151 }, { "epoch": 7.529987760097919, "grad_norm": 1.3653949192717019, "learning_rate": 2.484298548757393e-06, "loss": 0.9337, "step": 6152 }, { "epoch": 7.531211750305998, "grad_norm": 0.9637523287868172, "learning_rate": 2.4836576821461365e-06, "loss": 0.5394, "step": 6153 }, { "epoch": 7.532435740514076, "grad_norm": 1.3216007338905937, "learning_rate": 2.483016816608837e-06, "loss": 0.3456, "step": 6154 }, { "epoch": 7.533659730722154, "grad_norm": 2.8816887578333383, "learning_rate": 2.4823759521876086e-06, "loss": 0.5446, "step": 6155 }, { "epoch": 7.534883720930233, "grad_norm": 1.270419355149507, "learning_rate": 2.4817350889245675e-06, "loss": 0.6048, "step": 6156 }, { "epoch": 7.536107711138311, "grad_norm": 1.1831225778341916, "learning_rate": 2.481094226861829e-06, "loss": 1.0994, "step": 6157 }, { "epoch": 7.537331701346389, "grad_norm": 1.6050683035933266, "learning_rate": 2.4804533660415083e-06, "loss": 0.5377, "step": 6158 }, { "epoch": 7.538555691554468, "grad_norm": 1.5722216752246725, "learning_rate": 2.4798125065057204e-06, "loss": 0.5369, "step": 6159 }, { "epoch": 7.539779681762546, "grad_norm": 1.3144109460143343, "learning_rate": 2.4791716482965797e-06, "loss": 0.6304, "step": 6160 }, { "epoch": 7.541003671970624, "grad_norm": 1.3440486915253003, "learning_rate": 2.4785307914562006e-06, "loss": 0.5402, "step": 6161 }, { "epoch": 7.5422276621787026, "grad_norm": 1.855158171545453, "learning_rate": 2.477889936026699e-06, "loss": 0.328, "step": 6162 }, { "epoch": 7.543451652386781, "grad_norm": 2.555566079549113, "learning_rate": 2.4772490820501885e-06, "loss": 0.4215, "step": 6163 }, { "epoch": 7.544675642594859, "grad_norm": 2.225302554369664, "learning_rate": 2.4766082295687846e-06, "loss": 0.9965, "step": 6164 }, { "epoch": 7.5458996328029375, "grad_norm": 1.782038690005208, "learning_rate": 2.475967378624601e-06, "loss": 0.5003, "step": 6165 }, { "epoch": 7.547123623011016, "grad_norm": 1.5245038204727228, "learning_rate": 2.4753265292597526e-06, "loss": 0.6906, "step": 6166 }, { "epoch": 7.548347613219094, "grad_norm": 0.9250030276374989, "learning_rate": 2.4746856815163532e-06, "loss": 0.5258, "step": 6167 }, { "epoch": 7.5495716034271725, "grad_norm": 2.6412871717958306, "learning_rate": 2.474044835436518e-06, "loss": 0.7593, "step": 6168 }, { "epoch": 7.550795593635251, "grad_norm": 2.1788147743753123, "learning_rate": 2.473403991062359e-06, "loss": 0.4403, "step": 6169 }, { "epoch": 7.552019583843329, "grad_norm": 1.0332792672357254, "learning_rate": 2.4727631484359924e-06, "loss": 0.5177, "step": 6170 }, { "epoch": 7.5532435740514074, "grad_norm": 1.0163362117626535, "learning_rate": 2.4721223075995305e-06, "loss": 0.5431, "step": 6171 }, { "epoch": 7.554467564259486, "grad_norm": 1.7209597917710562, "learning_rate": 2.4714814685950877e-06, "loss": 0.5419, "step": 6172 }, { "epoch": 7.555691554467565, "grad_norm": 1.4928979692641338, "learning_rate": 2.4708406314647774e-06, "loss": 0.4453, "step": 6173 }, { "epoch": 7.556915544675642, "grad_norm": 1.937759142300473, "learning_rate": 2.4701997962507118e-06, "loss": 1.0288, "step": 6174 }, { "epoch": 7.558139534883721, "grad_norm": 2.487098659903279, "learning_rate": 2.4695589629950067e-06, "loss": 0.5094, "step": 6175 }, { "epoch": 7.5593635250918, "grad_norm": 1.9549178695520475, "learning_rate": 2.468918131739773e-06, "loss": 0.467, "step": 6176 }, { "epoch": 7.560587515299877, "grad_norm": 1.1030077766603401, "learning_rate": 2.4682773025271257e-06, "loss": 0.5383, "step": 6177 }, { "epoch": 7.561811505507956, "grad_norm": 2.3391975890814485, "learning_rate": 2.4676364753991755e-06, "loss": 0.3654, "step": 6178 }, { "epoch": 7.563035495716035, "grad_norm": 1.1007951263944946, "learning_rate": 2.4669956503980377e-06, "loss": 0.5637, "step": 6179 }, { "epoch": 7.564259485924112, "grad_norm": 1.2349550291088451, "learning_rate": 2.466354827565823e-06, "loss": 0.5676, "step": 6180 }, { "epoch": 7.565483476132191, "grad_norm": 1.5544509845760528, "learning_rate": 2.465714006944645e-06, "loss": 1.3462, "step": 6181 }, { "epoch": 7.56670746634027, "grad_norm": 2.2877357376139735, "learning_rate": 2.4650731885766148e-06, "loss": 0.6382, "step": 6182 }, { "epoch": 7.567931456548347, "grad_norm": 1.6477121465072988, "learning_rate": 2.464432372503846e-06, "loss": 0.4418, "step": 6183 }, { "epoch": 7.569155446756426, "grad_norm": 1.582173657706063, "learning_rate": 2.4637915587684504e-06, "loss": 1.1874, "step": 6184 }, { "epoch": 7.5703794369645045, "grad_norm": 2.1312024366042808, "learning_rate": 2.4631507474125395e-06, "loss": 1.215, "step": 6185 }, { "epoch": 7.571603427172582, "grad_norm": 2.258728231016943, "learning_rate": 2.4625099384782255e-06, "loss": 0.6891, "step": 6186 }, { "epoch": 7.572827417380661, "grad_norm": 1.3687724820410727, "learning_rate": 2.461869132007618e-06, "loss": 0.5797, "step": 6187 }, { "epoch": 7.5740514075887395, "grad_norm": 1.6136110482120973, "learning_rate": 2.461228328042832e-06, "loss": 0.3648, "step": 6188 }, { "epoch": 7.575275397796817, "grad_norm": 1.9060356335013666, "learning_rate": 2.4605875266259765e-06, "loss": 0.3199, "step": 6189 }, { "epoch": 7.576499388004896, "grad_norm": 1.5012683925296622, "learning_rate": 2.4599467277991626e-06, "loss": 1.187, "step": 6190 }, { "epoch": 7.5777233782129745, "grad_norm": 0.8857766948880677, "learning_rate": 2.459305931604501e-06, "loss": 0.4477, "step": 6191 }, { "epoch": 7.578947368421053, "grad_norm": 1.4412875126489144, "learning_rate": 2.458665138084104e-06, "loss": 0.7029, "step": 6192 }, { "epoch": 7.580171358629131, "grad_norm": 1.4201882135030912, "learning_rate": 2.458024347280081e-06, "loss": 0.4768, "step": 6193 }, { "epoch": 7.5813953488372094, "grad_norm": 1.4089622610472727, "learning_rate": 2.4573835592345427e-06, "loss": 0.5801, "step": 6194 }, { "epoch": 7.582619339045287, "grad_norm": 1.4999634611024162, "learning_rate": 2.456742773989599e-06, "loss": 1.2802, "step": 6195 }, { "epoch": 7.583843329253366, "grad_norm": 1.8881913110797863, "learning_rate": 2.4561019915873604e-06, "loss": 0.7345, "step": 6196 }, { "epoch": 7.585067319461444, "grad_norm": 1.0666398175570457, "learning_rate": 2.455461212069937e-06, "loss": 0.4911, "step": 6197 }, { "epoch": 7.586291309669523, "grad_norm": 1.0347178879115895, "learning_rate": 2.4548204354794375e-06, "loss": 0.5655, "step": 6198 }, { "epoch": 7.587515299877601, "grad_norm": 2.313442434996858, "learning_rate": 2.4541796618579723e-06, "loss": 0.4504, "step": 6199 }, { "epoch": 7.588739290085679, "grad_norm": 1.4687889938849121, "learning_rate": 2.453538891247649e-06, "loss": 1.3075, "step": 6200 }, { "epoch": 7.589963280293758, "grad_norm": 1.5248883463126752, "learning_rate": 2.452898123690579e-06, "loss": 0.5861, "step": 6201 }, { "epoch": 7.591187270501836, "grad_norm": 1.5927991781122914, "learning_rate": 2.45225735922887e-06, "loss": 1.5123, "step": 6202 }, { "epoch": 7.592411260709914, "grad_norm": 1.8443181155383792, "learning_rate": 2.4516165979046307e-06, "loss": 1.3071, "step": 6203 }, { "epoch": 7.593635250917993, "grad_norm": 2.06521145936607, "learning_rate": 2.450975839759969e-06, "loss": 1.0036, "step": 6204 }, { "epoch": 7.594859241126071, "grad_norm": 2.204816751222035, "learning_rate": 2.4503350848369946e-06, "loss": 0.5562, "step": 6205 }, { "epoch": 7.596083231334149, "grad_norm": 1.6438652922680626, "learning_rate": 2.4496943331778148e-06, "loss": 0.5493, "step": 6206 }, { "epoch": 7.597307221542228, "grad_norm": 1.2916760846874298, "learning_rate": 2.449053584824537e-06, "loss": 0.4622, "step": 6207 }, { "epoch": 7.598531211750306, "grad_norm": 2.742208568838488, "learning_rate": 2.4484128398192687e-06, "loss": 0.3963, "step": 6208 }, { "epoch": 7.599755201958384, "grad_norm": 0.973799891074145, "learning_rate": 2.4477720982041183e-06, "loss": 0.5163, "step": 6209 }, { "epoch": 7.600979192166463, "grad_norm": 1.6912694135163884, "learning_rate": 2.447131360021193e-06, "loss": 0.9395, "step": 6210 }, { "epoch": 7.602203182374541, "grad_norm": 1.9911398351479648, "learning_rate": 2.446490625312598e-06, "loss": 0.5565, "step": 6211 }, { "epoch": 7.603427172582619, "grad_norm": 1.51081141073198, "learning_rate": 2.4458498941204425e-06, "loss": 1.4627, "step": 6212 }, { "epoch": 7.604651162790698, "grad_norm": 1.7309570874065212, "learning_rate": 2.44520916648683e-06, "loss": 1.1202, "step": 6213 }, { "epoch": 7.605875152998776, "grad_norm": 1.6369259654068378, "learning_rate": 2.4445684424538694e-06, "loss": 0.4763, "step": 6214 }, { "epoch": 7.607099143206854, "grad_norm": 2.0860915338917656, "learning_rate": 2.4439277220636664e-06, "loss": 0.395, "step": 6215 }, { "epoch": 7.608323133414933, "grad_norm": 1.4879094317381925, "learning_rate": 2.443287005358325e-06, "loss": 0.48, "step": 6216 }, { "epoch": 7.6095471236230114, "grad_norm": 2.8472398931786556, "learning_rate": 2.442646292379952e-06, "loss": 0.7206, "step": 6217 }, { "epoch": 7.610771113831089, "grad_norm": 1.471902825244244, "learning_rate": 2.4420055831706527e-06, "loss": 1.056, "step": 6218 }, { "epoch": 7.611995104039168, "grad_norm": 1.237349400651744, "learning_rate": 2.441364877772532e-06, "loss": 0.5482, "step": 6219 }, { "epoch": 7.613219094247246, "grad_norm": 0.5161203670723004, "learning_rate": 2.440724176227695e-06, "loss": 0.1315, "step": 6220 }, { "epoch": 7.614443084455324, "grad_norm": 1.1971232670851863, "learning_rate": 2.440083478578245e-06, "loss": 0.4746, "step": 6221 }, { "epoch": 7.615667074663403, "grad_norm": 0.9967736846013807, "learning_rate": 2.4394427848662878e-06, "loss": 0.5871, "step": 6222 }, { "epoch": 7.616891064871481, "grad_norm": 1.8068241737006503, "learning_rate": 2.438802095133927e-06, "loss": 0.5218, "step": 6223 }, { "epoch": 7.618115055079559, "grad_norm": 1.4531866880299504, "learning_rate": 2.438161409423266e-06, "loss": 0.4691, "step": 6224 }, { "epoch": 7.619339045287638, "grad_norm": 1.5649251337875758, "learning_rate": 2.4375207277764085e-06, "loss": 1.2042, "step": 6225 }, { "epoch": 7.620563035495716, "grad_norm": 1.7562804351514105, "learning_rate": 2.436880050235457e-06, "loss": 0.4066, "step": 6226 }, { "epoch": 7.621787025703794, "grad_norm": 1.8638113156111307, "learning_rate": 2.436239376842516e-06, "loss": 0.3976, "step": 6227 }, { "epoch": 7.623011015911873, "grad_norm": 1.1807536612498002, "learning_rate": 2.4355987076396866e-06, "loss": 0.7388, "step": 6228 }, { "epoch": 7.624235006119951, "grad_norm": 0.9468665454483438, "learning_rate": 2.4349580426690726e-06, "loss": 0.5534, "step": 6229 }, { "epoch": 7.625458996328029, "grad_norm": 1.572080801639012, "learning_rate": 2.4343173819727745e-06, "loss": 0.9236, "step": 6230 }, { "epoch": 7.626682986536108, "grad_norm": 1.6746264866290577, "learning_rate": 2.433676725592896e-06, "loss": 0.9248, "step": 6231 }, { "epoch": 7.627906976744186, "grad_norm": 1.585817840031627, "learning_rate": 2.4330360735715373e-06, "loss": 1.1442, "step": 6232 }, { "epoch": 7.629130966952264, "grad_norm": 1.6831160490355674, "learning_rate": 2.4323954259508013e-06, "loss": 1.224, "step": 6233 }, { "epoch": 7.630354957160343, "grad_norm": 1.8084710852761299, "learning_rate": 2.4317547827727857e-06, "loss": 0.6087, "step": 6234 }, { "epoch": 7.631578947368421, "grad_norm": 1.5035544444211026, "learning_rate": 2.4311141440795956e-06, "loss": 1.6324, "step": 6235 }, { "epoch": 7.6328029375765, "grad_norm": 1.8984633157375777, "learning_rate": 2.430473509913328e-06, "loss": 0.5269, "step": 6236 }, { "epoch": 7.634026927784578, "grad_norm": 1.7178517287730044, "learning_rate": 2.4298328803160846e-06, "loss": 0.4117, "step": 6237 }, { "epoch": 7.635250917992656, "grad_norm": 2.2175263216684113, "learning_rate": 2.429192255329964e-06, "loss": 0.4293, "step": 6238 }, { "epoch": 7.636474908200734, "grad_norm": 1.7116906762761694, "learning_rate": 2.428551634997067e-06, "loss": 0.5445, "step": 6239 }, { "epoch": 7.6376988984088126, "grad_norm": 1.4045885904958415, "learning_rate": 2.427911019359492e-06, "loss": 0.8066, "step": 6240 }, { "epoch": 7.638922888616891, "grad_norm": 1.4135724080715015, "learning_rate": 2.4272704084593395e-06, "loss": 0.5844, "step": 6241 }, { "epoch": 7.64014687882497, "grad_norm": 1.3644228705805737, "learning_rate": 2.4266298023387057e-06, "loss": 1.1678, "step": 6242 }, { "epoch": 7.6413708690330475, "grad_norm": 1.4871828035988977, "learning_rate": 2.425989201039689e-06, "loss": 1.5654, "step": 6243 }, { "epoch": 7.642594859241126, "grad_norm": 1.892404838136676, "learning_rate": 2.4253486046043894e-06, "loss": 0.6167, "step": 6244 }, { "epoch": 7.643818849449205, "grad_norm": 1.619577195135675, "learning_rate": 2.424708013074903e-06, "loss": 1.8137, "step": 6245 }, { "epoch": 7.6450428396572825, "grad_norm": 1.8095615753452048, "learning_rate": 2.424067426493328e-06, "loss": 0.7472, "step": 6246 }, { "epoch": 7.646266829865361, "grad_norm": 1.912249938441006, "learning_rate": 2.42342684490176e-06, "loss": 0.4095, "step": 6247 }, { "epoch": 7.64749082007344, "grad_norm": 2.3371232499700407, "learning_rate": 2.422786268342297e-06, "loss": 0.4365, "step": 6248 }, { "epoch": 7.6487148102815175, "grad_norm": 1.4694554086745333, "learning_rate": 2.422145696857035e-06, "loss": 0.4678, "step": 6249 }, { "epoch": 7.649938800489596, "grad_norm": 0.9788789431326116, "learning_rate": 2.4215051304880703e-06, "loss": 0.3716, "step": 6250 }, { "epoch": 7.651162790697675, "grad_norm": 2.6841692152994026, "learning_rate": 2.420864569277498e-06, "loss": 0.4684, "step": 6251 }, { "epoch": 7.652386780905752, "grad_norm": 1.503854197678877, "learning_rate": 2.420224013267412e-06, "loss": 0.3198, "step": 6252 }, { "epoch": 7.653610771113831, "grad_norm": 1.685191763320888, "learning_rate": 2.41958346249991e-06, "loss": 0.5617, "step": 6253 }, { "epoch": 7.65483476132191, "grad_norm": 2.1161571476178858, "learning_rate": 2.4189429170170855e-06, "loss": 0.4662, "step": 6254 }, { "epoch": 7.656058751529987, "grad_norm": 2.007661624166391, "learning_rate": 2.4183023768610326e-06, "loss": 0.3695, "step": 6255 }, { "epoch": 7.657282741738066, "grad_norm": 1.47509801785704, "learning_rate": 2.4176618420738448e-06, "loss": 0.3845, "step": 6256 }, { "epoch": 7.658506731946145, "grad_norm": 1.5606134156352238, "learning_rate": 2.417021312697616e-06, "loss": 0.514, "step": 6257 }, { "epoch": 7.659730722154222, "grad_norm": 1.085935129413594, "learning_rate": 2.41638078877444e-06, "loss": 0.5511, "step": 6258 }, { "epoch": 7.660954712362301, "grad_norm": 1.460596803726325, "learning_rate": 2.4157402703464104e-06, "loss": 0.9451, "step": 6259 }, { "epoch": 7.66217870257038, "grad_norm": 2.178821744866951, "learning_rate": 2.4150997574556163e-06, "loss": 0.4788, "step": 6260 }, { "epoch": 7.663402692778458, "grad_norm": 1.4466052506333684, "learning_rate": 2.4144592501441537e-06, "loss": 0.9296, "step": 6261 }, { "epoch": 7.664626682986536, "grad_norm": 3.25146181825888, "learning_rate": 2.4138187484541133e-06, "loss": 0.4451, "step": 6262 }, { "epoch": 7.6658506731946146, "grad_norm": 1.5887728459523804, "learning_rate": 2.413178252427585e-06, "loss": 1.3736, "step": 6263 }, { "epoch": 7.667074663402692, "grad_norm": 1.7056958099439616, "learning_rate": 2.412537762106661e-06, "loss": 0.4412, "step": 6264 }, { "epoch": 7.668298653610771, "grad_norm": 1.707456420735935, "learning_rate": 2.411897277533431e-06, "loss": 0.4722, "step": 6265 }, { "epoch": 7.6695226438188495, "grad_norm": 1.4126627764745834, "learning_rate": 2.411256798749987e-06, "loss": 0.3693, "step": 6266 }, { "epoch": 7.670746634026928, "grad_norm": 1.2354146276656335, "learning_rate": 2.410616325798418e-06, "loss": 0.5632, "step": 6267 }, { "epoch": 7.671970624235006, "grad_norm": 1.35179943647802, "learning_rate": 2.4099758587208137e-06, "loss": 0.6805, "step": 6268 }, { "epoch": 7.6731946144430845, "grad_norm": 2.406035828537886, "learning_rate": 2.4093353975592615e-06, "loss": 1.197, "step": 6269 }, { "epoch": 7.674418604651163, "grad_norm": 1.9472459813692993, "learning_rate": 2.408694942355853e-06, "loss": 1.189, "step": 6270 }, { "epoch": 7.675642594859241, "grad_norm": 1.557370363693338, "learning_rate": 2.4080544931526754e-06, "loss": 0.4453, "step": 6271 }, { "epoch": 7.6768665850673194, "grad_norm": 1.5985013741129968, "learning_rate": 2.407414049991816e-06, "loss": 0.5127, "step": 6272 }, { "epoch": 7.678090575275398, "grad_norm": 1.2860299535465822, "learning_rate": 2.406773612915362e-06, "loss": 0.6761, "step": 6273 }, { "epoch": 7.679314565483476, "grad_norm": 1.7220747341400386, "learning_rate": 2.4061331819654017e-06, "loss": 0.5378, "step": 6274 }, { "epoch": 7.680538555691554, "grad_norm": 2.563087509405035, "learning_rate": 2.405492757184022e-06, "loss": 0.515, "step": 6275 }, { "epoch": 7.681762545899633, "grad_norm": 0.9585274519546628, "learning_rate": 2.4048523386133083e-06, "loss": 0.5008, "step": 6276 }, { "epoch": 7.682986536107711, "grad_norm": 1.284346851400765, "learning_rate": 2.404211926295348e-06, "loss": 1.1623, "step": 6277 }, { "epoch": 7.684210526315789, "grad_norm": 1.4836959481927843, "learning_rate": 2.403571520272224e-06, "loss": 0.7308, "step": 6278 }, { "epoch": 7.685434516523868, "grad_norm": 1.674298073222216, "learning_rate": 2.402931120586024e-06, "loss": 0.3522, "step": 6279 }, { "epoch": 7.686658506731947, "grad_norm": 1.160416681341403, "learning_rate": 2.4022907272788316e-06, "loss": 0.7253, "step": 6280 }, { "epoch": 7.687882496940024, "grad_norm": 1.9402700990365052, "learning_rate": 2.401650340392731e-06, "loss": 0.4848, "step": 6281 }, { "epoch": 7.689106487148103, "grad_norm": 1.701940655457374, "learning_rate": 2.4010099599698063e-06, "loss": 0.4956, "step": 6282 }, { "epoch": 7.690330477356181, "grad_norm": 1.0851092405916003, "learning_rate": 2.4003695860521407e-06, "loss": 0.483, "step": 6283 }, { "epoch": 7.691554467564259, "grad_norm": 1.469383587352406, "learning_rate": 2.399729218681818e-06, "loss": 1.0257, "step": 6284 }, { "epoch": 7.692778457772338, "grad_norm": 1.620170030492761, "learning_rate": 2.39908885790092e-06, "loss": 0.5549, "step": 6285 }, { "epoch": 7.6940024479804165, "grad_norm": 1.0742852397369316, "learning_rate": 2.3984485037515296e-06, "loss": 0.6346, "step": 6286 }, { "epoch": 7.695226438188494, "grad_norm": 1.6728796504819454, "learning_rate": 2.3978081562757263e-06, "loss": 0.4313, "step": 6287 }, { "epoch": 7.696450428396573, "grad_norm": 1.8087834824117477, "learning_rate": 2.3971678155155944e-06, "loss": 0.7892, "step": 6288 }, { "epoch": 7.6976744186046515, "grad_norm": 1.6663826636637065, "learning_rate": 2.3965274815132125e-06, "loss": 0.4694, "step": 6289 }, { "epoch": 7.698898408812729, "grad_norm": 1.810430028534234, "learning_rate": 2.3958871543106625e-06, "loss": 0.5074, "step": 6290 }, { "epoch": 7.700122399020808, "grad_norm": 1.9761849178085087, "learning_rate": 2.3952468339500223e-06, "loss": 0.5615, "step": 6291 }, { "epoch": 7.7013463892288865, "grad_norm": 1.3203164613433664, "learning_rate": 2.394606520473374e-06, "loss": 0.6015, "step": 6292 }, { "epoch": 7.702570379436964, "grad_norm": 1.9003992897669886, "learning_rate": 2.393966213922795e-06, "loss": 0.4477, "step": 6293 }, { "epoch": 7.703794369645043, "grad_norm": 1.5205187979013979, "learning_rate": 2.3933259143403643e-06, "loss": 1.0942, "step": 6294 }, { "epoch": 7.7050183598531214, "grad_norm": 2.075422300090956, "learning_rate": 2.392685621768159e-06, "loss": 0.4797, "step": 6295 }, { "epoch": 7.706242350061199, "grad_norm": 2.1080163922110353, "learning_rate": 2.392045336248259e-06, "loss": 1.054, "step": 6296 }, { "epoch": 7.707466340269278, "grad_norm": 2.521287934213974, "learning_rate": 2.39140505782274e-06, "loss": 0.482, "step": 6297 }, { "epoch": 7.708690330477356, "grad_norm": 1.1255339854096258, "learning_rate": 2.390764786533679e-06, "loss": 0.6667, "step": 6298 }, { "epoch": 7.709914320685434, "grad_norm": 2.0873958870050355, "learning_rate": 2.390124522423152e-06, "loss": 0.5135, "step": 6299 }, { "epoch": 7.711138310893513, "grad_norm": 1.7285187535577733, "learning_rate": 2.3894842655332345e-06, "loss": 0.6161, "step": 6300 }, { "epoch": 7.712362301101591, "grad_norm": 2.141739276923362, "learning_rate": 2.388844015906003e-06, "loss": 0.6883, "step": 6301 }, { "epoch": 7.713586291309669, "grad_norm": 1.8117702710525805, "learning_rate": 2.388203773583531e-06, "loss": 0.4267, "step": 6302 }, { "epoch": 7.714810281517748, "grad_norm": 2.054150420700014, "learning_rate": 2.387563538607894e-06, "loss": 0.5478, "step": 6303 }, { "epoch": 7.716034271725826, "grad_norm": 1.7056453358179677, "learning_rate": 2.3869233110211647e-06, "loss": 1.0227, "step": 6304 }, { "epoch": 7.717258261933905, "grad_norm": 1.5546274238279476, "learning_rate": 2.3862830908654176e-06, "loss": 0.5198, "step": 6305 }, { "epoch": 7.718482252141983, "grad_norm": 1.3456353053041012, "learning_rate": 2.3856428781827257e-06, "loss": 0.5727, "step": 6306 }, { "epoch": 7.719706242350061, "grad_norm": 1.3914494861276754, "learning_rate": 2.3850026730151605e-06, "loss": 0.472, "step": 6307 }, { "epoch": 7.720930232558139, "grad_norm": 2.399434701292347, "learning_rate": 2.3843624754047936e-06, "loss": 0.9649, "step": 6308 }, { "epoch": 7.722154222766218, "grad_norm": 2.4004347447445302, "learning_rate": 2.383722285393698e-06, "loss": 0.5537, "step": 6309 }, { "epoch": 7.723378212974296, "grad_norm": 1.2993857470447243, "learning_rate": 2.3830821030239436e-06, "loss": 0.5429, "step": 6310 }, { "epoch": 7.724602203182375, "grad_norm": 1.3721975438360952, "learning_rate": 2.3824419283376005e-06, "loss": 1.3298, "step": 6311 }, { "epoch": 7.725826193390453, "grad_norm": 3.27607214112602, "learning_rate": 2.3818017613767392e-06, "loss": 0.788, "step": 6312 }, { "epoch": 7.727050183598531, "grad_norm": 1.5472215007488848, "learning_rate": 2.381161602183429e-06, "loss": 1.2436, "step": 6313 }, { "epoch": 7.72827417380661, "grad_norm": 2.128367492821983, "learning_rate": 2.3805214507997384e-06, "loss": 0.4874, "step": 6314 }, { "epoch": 7.729498164014688, "grad_norm": 1.2876238287066035, "learning_rate": 2.3798813072677373e-06, "loss": 1.3427, "step": 6315 }, { "epoch": 7.730722154222766, "grad_norm": 1.4484059469227706, "learning_rate": 2.3792411716294916e-06, "loss": 0.48, "step": 6316 }, { "epoch": 7.731946144430845, "grad_norm": 2.3858442345723665, "learning_rate": 2.378601043927069e-06, "loss": 0.8368, "step": 6317 }, { "epoch": 7.7331701346389226, "grad_norm": 1.0493042938955108, "learning_rate": 2.377960924202537e-06, "loss": 0.509, "step": 6318 }, { "epoch": 7.734394124847001, "grad_norm": 1.8323422053817044, "learning_rate": 2.3773208124979624e-06, "loss": 0.5863, "step": 6319 }, { "epoch": 7.73561811505508, "grad_norm": 2.260552549434383, "learning_rate": 2.3766807088554096e-06, "loss": 0.5954, "step": 6320 }, { "epoch": 7.7368421052631575, "grad_norm": 2.069251599982254, "learning_rate": 2.376040613316944e-06, "loss": 1.0334, "step": 6321 }, { "epoch": 7.738066095471236, "grad_norm": 1.4603185437822357, "learning_rate": 2.375400525924632e-06, "loss": 0.614, "step": 6322 }, { "epoch": 7.739290085679315, "grad_norm": 1.5334338214125567, "learning_rate": 2.3747604467205358e-06, "loss": 0.4542, "step": 6323 }, { "epoch": 7.740514075887393, "grad_norm": 2.117592068512626, "learning_rate": 2.3741203757467207e-06, "loss": 0.5541, "step": 6324 }, { "epoch": 7.741738066095471, "grad_norm": 1.098187444815803, "learning_rate": 2.3734803130452484e-06, "loss": 0.8942, "step": 6325 }, { "epoch": 7.74296205630355, "grad_norm": 1.498316405164169, "learning_rate": 2.372840258658181e-06, "loss": 0.7155, "step": 6326 }, { "epoch": 7.7441860465116275, "grad_norm": 1.482855421167908, "learning_rate": 2.3722002126275826e-06, "loss": 0.4709, "step": 6327 }, { "epoch": 7.745410036719706, "grad_norm": 1.3195068943399302, "learning_rate": 2.371560174995513e-06, "loss": 0.3725, "step": 6328 }, { "epoch": 7.746634026927785, "grad_norm": 2.5282448420952837, "learning_rate": 2.370920145804034e-06, "loss": 0.4291, "step": 6329 }, { "epoch": 7.747858017135863, "grad_norm": 2.004223365493421, "learning_rate": 2.370280125095205e-06, "loss": 0.5266, "step": 6330 }, { "epoch": 7.749082007343941, "grad_norm": 1.7539124504367005, "learning_rate": 2.369640112911087e-06, "loss": 0.744, "step": 6331 }, { "epoch": 7.75030599755202, "grad_norm": 1.7978011052772684, "learning_rate": 2.3690001092937383e-06, "loss": 0.5888, "step": 6332 }, { "epoch": 7.751529987760098, "grad_norm": 2.042500572979575, "learning_rate": 2.3683601142852184e-06, "loss": 0.6346, "step": 6333 }, { "epoch": 7.752753977968176, "grad_norm": 1.1373810389311845, "learning_rate": 2.3677201279275837e-06, "loss": 0.8285, "step": 6334 }, { "epoch": 7.753977968176255, "grad_norm": 1.8786890086638723, "learning_rate": 2.367080150262894e-06, "loss": 0.6426, "step": 6335 }, { "epoch": 7.755201958384333, "grad_norm": 1.0178942015443644, "learning_rate": 2.366440181333205e-06, "loss": 0.3776, "step": 6336 }, { "epoch": 7.756425948592411, "grad_norm": 2.251537826628075, "learning_rate": 2.365800221180573e-06, "loss": 0.2485, "step": 6337 }, { "epoch": 7.75764993880049, "grad_norm": 1.2350812837365237, "learning_rate": 2.3651602698470546e-06, "loss": 0.9825, "step": 6338 }, { "epoch": 7.758873929008568, "grad_norm": 2.056608369937828, "learning_rate": 2.3645203273747038e-06, "loss": 0.4328, "step": 6339 }, { "epoch": 7.760097919216646, "grad_norm": 2.1123474810372285, "learning_rate": 2.3638803938055765e-06, "loss": 0.3032, "step": 6340 }, { "epoch": 7.7613219094247246, "grad_norm": 1.4472691014130052, "learning_rate": 2.363240469181726e-06, "loss": 0.4991, "step": 6341 }, { "epoch": 7.762545899632803, "grad_norm": 1.9241302467442303, "learning_rate": 2.362600553545207e-06, "loss": 1.0187, "step": 6342 }, { "epoch": 7.763769889840881, "grad_norm": 1.6489050147899753, "learning_rate": 2.3619606469380693e-06, "loss": 0.72, "step": 6343 }, { "epoch": 7.7649938800489595, "grad_norm": 1.0742292818660073, "learning_rate": 2.361320749402369e-06, "loss": 0.4866, "step": 6344 }, { "epoch": 7.766217870257038, "grad_norm": 2.465369289846065, "learning_rate": 2.3606808609801558e-06, "loss": 0.9686, "step": 6345 }, { "epoch": 7.767441860465116, "grad_norm": 1.3968199881787542, "learning_rate": 2.360040981713481e-06, "loss": 1.0953, "step": 6346 }, { "epoch": 7.7686658506731945, "grad_norm": 1.760733944969053, "learning_rate": 2.3594011116443944e-06, "loss": 0.5066, "step": 6347 }, { "epoch": 7.769889840881273, "grad_norm": 1.3996303004579171, "learning_rate": 2.3587612508149478e-06, "loss": 1.413, "step": 6348 }, { "epoch": 7.771113831089352, "grad_norm": 1.6047709436251225, "learning_rate": 2.358121399267189e-06, "loss": 0.4265, "step": 6349 }, { "epoch": 7.7723378212974294, "grad_norm": 1.3355744914131111, "learning_rate": 2.3574815570431675e-06, "loss": 0.4883, "step": 6350 }, { "epoch": 7.773561811505508, "grad_norm": 1.33032966528709, "learning_rate": 2.356841724184931e-06, "loss": 0.4623, "step": 6351 }, { "epoch": 7.774785801713586, "grad_norm": 1.4077646611104828, "learning_rate": 2.3562019007345255e-06, "loss": 0.5741, "step": 6352 }, { "epoch": 7.776009791921664, "grad_norm": 1.7243617817751655, "learning_rate": 2.355562086734001e-06, "loss": 0.5009, "step": 6353 }, { "epoch": 7.777233782129743, "grad_norm": 1.6802018562370642, "learning_rate": 2.354922282225401e-06, "loss": 1.3587, "step": 6354 }, { "epoch": 7.778457772337822, "grad_norm": 1.3457874947541661, "learning_rate": 2.3542824872507723e-06, "loss": 0.7931, "step": 6355 }, { "epoch": 7.779681762545899, "grad_norm": 1.7305667743062343, "learning_rate": 2.3536427018521588e-06, "loss": 0.4725, "step": 6356 }, { "epoch": 7.780905752753978, "grad_norm": 2.024355493387664, "learning_rate": 2.353002926071606e-06, "loss": 0.4963, "step": 6357 }, { "epoch": 7.782129742962057, "grad_norm": 1.885957084810247, "learning_rate": 2.3523631599511574e-06, "loss": 1.2003, "step": 6358 }, { "epoch": 7.783353733170134, "grad_norm": 1.6099951447344143, "learning_rate": 2.3517234035328563e-06, "loss": 0.7466, "step": 6359 }, { "epoch": 7.784577723378213, "grad_norm": 2.7128212155577365, "learning_rate": 2.3510836568587434e-06, "loss": 0.567, "step": 6360 }, { "epoch": 7.785801713586292, "grad_norm": 1.6942467173483322, "learning_rate": 2.3504439199708625e-06, "loss": 0.4741, "step": 6361 }, { "epoch": 7.787025703794369, "grad_norm": 1.7649952586292341, "learning_rate": 2.349804192911255e-06, "loss": 0.4415, "step": 6362 }, { "epoch": 7.788249694002448, "grad_norm": 1.6012010501675753, "learning_rate": 2.3491644757219595e-06, "loss": 1.2034, "step": 6363 }, { "epoch": 7.7894736842105265, "grad_norm": 2.236564495282154, "learning_rate": 2.3485247684450164e-06, "loss": 0.3381, "step": 6364 }, { "epoch": 7.790697674418604, "grad_norm": 1.7695946863961953, "learning_rate": 2.3478850711224655e-06, "loss": 1.1516, "step": 6365 }, { "epoch": 7.791921664626683, "grad_norm": 1.945488839822804, "learning_rate": 2.3472453837963454e-06, "loss": 0.569, "step": 6366 }, { "epoch": 7.7931456548347615, "grad_norm": 1.2522616716007797, "learning_rate": 2.3466057065086935e-06, "loss": 0.4403, "step": 6367 }, { "epoch": 7.79436964504284, "grad_norm": 2.367155316204829, "learning_rate": 2.3459660393015478e-06, "loss": 0.7491, "step": 6368 }, { "epoch": 7.795593635250918, "grad_norm": 1.006276573669132, "learning_rate": 2.345326382216942e-06, "loss": 0.5452, "step": 6369 }, { "epoch": 7.7968176254589965, "grad_norm": 2.1771923151439396, "learning_rate": 2.3446867352969164e-06, "loss": 0.4818, "step": 6370 }, { "epoch": 7.798041615667074, "grad_norm": 1.4827532711194809, "learning_rate": 2.344047098583504e-06, "loss": 0.6535, "step": 6371 }, { "epoch": 7.799265605875153, "grad_norm": 1.6928400523280505, "learning_rate": 2.343407472118739e-06, "loss": 1.8947, "step": 6372 }, { "epoch": 7.8004895960832314, "grad_norm": 2.999869818653983, "learning_rate": 2.3427678559446554e-06, "loss": 0.4963, "step": 6373 }, { "epoch": 7.80171358629131, "grad_norm": 1.2997550884305609, "learning_rate": 2.342128250103287e-06, "loss": 0.598, "step": 6374 }, { "epoch": 7.802937576499388, "grad_norm": 1.2982665403350968, "learning_rate": 2.3414886546366665e-06, "loss": 0.8818, "step": 6375 }, { "epoch": 7.804161566707466, "grad_norm": 1.7790068173929274, "learning_rate": 2.340849069586825e-06, "loss": 0.5406, "step": 6376 }, { "epoch": 7.805385556915545, "grad_norm": 1.7934018327783172, "learning_rate": 2.3402094949957948e-06, "loss": 0.3088, "step": 6377 }, { "epoch": 7.806609547123623, "grad_norm": 2.884363091439924, "learning_rate": 2.3395699309056036e-06, "loss": 0.4004, "step": 6378 }, { "epoch": 7.807833537331701, "grad_norm": 1.7104944041457413, "learning_rate": 2.338930377358284e-06, "loss": 0.5988, "step": 6379 }, { "epoch": 7.80905752753978, "grad_norm": 1.828553207410003, "learning_rate": 2.3382908343958656e-06, "loss": 0.4237, "step": 6380 }, { "epoch": 7.810281517747858, "grad_norm": 0.8560473536461888, "learning_rate": 2.3376513020603742e-06, "loss": 0.3522, "step": 6381 }, { "epoch": 7.811505507955936, "grad_norm": 2.187164878712133, "learning_rate": 2.3370117803938388e-06, "loss": 0.5092, "step": 6382 }, { "epoch": 7.812729498164015, "grad_norm": 0.8581788747608755, "learning_rate": 2.336372269438286e-06, "loss": 0.3962, "step": 6383 }, { "epoch": 7.813953488372093, "grad_norm": 2.2238904289202077, "learning_rate": 2.3357327692357434e-06, "loss": 1.0314, "step": 6384 }, { "epoch": 7.815177478580171, "grad_norm": 1.811620978599382, "learning_rate": 2.335093279828235e-06, "loss": 1.009, "step": 6385 }, { "epoch": 7.81640146878825, "grad_norm": 2.5321778542782707, "learning_rate": 2.3344538012577863e-06, "loss": 0.2292, "step": 6386 }, { "epoch": 7.817625458996328, "grad_norm": 1.9701060897032372, "learning_rate": 2.3338143335664213e-06, "loss": 0.9867, "step": 6387 }, { "epoch": 7.818849449204406, "grad_norm": 2.7290725194464724, "learning_rate": 2.3331748767961647e-06, "loss": 0.3862, "step": 6388 }, { "epoch": 7.820073439412485, "grad_norm": 1.3232573521713817, "learning_rate": 2.3325354309890373e-06, "loss": 0.7225, "step": 6389 }, { "epoch": 7.821297429620563, "grad_norm": 2.0045064400462578, "learning_rate": 2.331895996187062e-06, "loss": 0.3664, "step": 6390 }, { "epoch": 7.822521419828641, "grad_norm": 2.1190042295328255, "learning_rate": 2.3312565724322596e-06, "loss": 0.436, "step": 6391 }, { "epoch": 7.82374541003672, "grad_norm": 2.9902093146959623, "learning_rate": 2.330617159766652e-06, "loss": 0.3684, "step": 6392 }, { "epoch": 7.8249694002447985, "grad_norm": 1.4003514046150987, "learning_rate": 2.329977758232258e-06, "loss": 1.4202, "step": 6393 }, { "epoch": 7.826193390452876, "grad_norm": 1.4795763990798554, "learning_rate": 2.3293383678710965e-06, "loss": 0.6359, "step": 6394 }, { "epoch": 7.827417380660955, "grad_norm": 2.4029286591475456, "learning_rate": 2.328698988725186e-06, "loss": 0.6086, "step": 6395 }, { "epoch": 7.828641370869033, "grad_norm": 2.568768644899412, "learning_rate": 2.328059620836545e-06, "loss": 0.3747, "step": 6396 }, { "epoch": 7.829865361077111, "grad_norm": 1.2690905295348887, "learning_rate": 2.32742026424719e-06, "loss": 1.0527, "step": 6397 }, { "epoch": 7.83108935128519, "grad_norm": 1.4997677769604212, "learning_rate": 2.3267809189991363e-06, "loss": 0.4673, "step": 6398 }, { "epoch": 7.832313341493268, "grad_norm": 1.43383328323072, "learning_rate": 2.3261415851343987e-06, "loss": 0.5766, "step": 6399 }, { "epoch": 7.833537331701346, "grad_norm": 2.402075423468278, "learning_rate": 2.325502262694995e-06, "loss": 0.4113, "step": 6400 }, { "epoch": 7.834761321909425, "grad_norm": 2.002035304717365, "learning_rate": 2.3248629517229366e-06, "loss": 0.389, "step": 6401 }, { "epoch": 7.835985312117503, "grad_norm": 2.15502089538727, "learning_rate": 2.3242236522602373e-06, "loss": 0.9879, "step": 6402 }, { "epoch": 7.837209302325581, "grad_norm": 0.8787532889013411, "learning_rate": 2.3235843643489095e-06, "loss": 0.4706, "step": 6403 }, { "epoch": 7.83843329253366, "grad_norm": 1.9807158728249974, "learning_rate": 2.322945088030964e-06, "loss": 1.1447, "step": 6404 }, { "epoch": 7.839657282741738, "grad_norm": 1.9719233390286839, "learning_rate": 2.322305823348413e-06, "loss": 0.7824, "step": 6405 }, { "epoch": 7.840881272949816, "grad_norm": 1.0417988660377782, "learning_rate": 2.3216665703432667e-06, "loss": 0.6053, "step": 6406 }, { "epoch": 7.842105263157895, "grad_norm": 1.4186704140327342, "learning_rate": 2.3210273290575333e-06, "loss": 0.7206, "step": 6407 }, { "epoch": 7.843329253365973, "grad_norm": 0.9068147912213598, "learning_rate": 2.3203880995332208e-06, "loss": 0.5186, "step": 6408 }, { "epoch": 7.844553243574051, "grad_norm": 1.2460194225261694, "learning_rate": 2.319748881812339e-06, "loss": 0.4177, "step": 6409 }, { "epoch": 7.84577723378213, "grad_norm": 1.2848295013715836, "learning_rate": 2.3191096759368943e-06, "loss": 0.4538, "step": 6410 }, { "epoch": 7.847001223990208, "grad_norm": 1.0454490295934582, "learning_rate": 2.3184704819488923e-06, "loss": 0.3956, "step": 6411 }, { "epoch": 7.848225214198287, "grad_norm": 1.2479954063672936, "learning_rate": 2.317831299890338e-06, "loss": 0.7033, "step": 6412 }, { "epoch": 7.849449204406365, "grad_norm": 1.717865191901609, "learning_rate": 2.317192129803238e-06, "loss": 0.4988, "step": 6413 }, { "epoch": 7.850673194614443, "grad_norm": 1.38676001828163, "learning_rate": 2.316552971729595e-06, "loss": 0.4838, "step": 6414 }, { "epoch": 7.851897184822521, "grad_norm": 1.5680380507891953, "learning_rate": 2.315913825711412e-06, "loss": 1.5325, "step": 6415 }, { "epoch": 7.8531211750306, "grad_norm": 1.4821264761333008, "learning_rate": 2.3152746917906916e-06, "loss": 0.5067, "step": 6416 }, { "epoch": 7.854345165238678, "grad_norm": 1.8098155642268712, "learning_rate": 2.3146355700094346e-06, "loss": 0.4316, "step": 6417 }, { "epoch": 7.855569155446757, "grad_norm": 1.2846579422771365, "learning_rate": 2.3139964604096426e-06, "loss": 1.1405, "step": 6418 }, { "epoch": 7.8567931456548346, "grad_norm": 1.0021234331769409, "learning_rate": 2.3133573630333157e-06, "loss": 0.5545, "step": 6419 }, { "epoch": 7.858017135862913, "grad_norm": 1.1620423428888107, "learning_rate": 2.312718277922452e-06, "loss": 0.6131, "step": 6420 }, { "epoch": 7.859241126070992, "grad_norm": 1.2451838058765947, "learning_rate": 2.3120792051190498e-06, "loss": 0.3992, "step": 6421 }, { "epoch": 7.8604651162790695, "grad_norm": 2.124742232014125, "learning_rate": 2.3114401446651084e-06, "loss": 0.537, "step": 6422 }, { "epoch": 7.861689106487148, "grad_norm": 1.2268336856535829, "learning_rate": 2.3108010966026226e-06, "loss": 0.6358, "step": 6423 }, { "epoch": 7.862913096695227, "grad_norm": 1.3706022485440579, "learning_rate": 2.3101620609735898e-06, "loss": 1.1281, "step": 6424 }, { "epoch": 7.8641370869033045, "grad_norm": 1.3991198972302028, "learning_rate": 2.309523037820002e-06, "loss": 0.5473, "step": 6425 }, { "epoch": 7.865361077111383, "grad_norm": 1.0517474575405679, "learning_rate": 2.308884027183858e-06, "loss": 0.8532, "step": 6426 }, { "epoch": 7.866585067319462, "grad_norm": 2.5357071214159426, "learning_rate": 2.3082450291071484e-06, "loss": 1.0745, "step": 6427 }, { "epoch": 7.8678090575275395, "grad_norm": 2.1568818226425774, "learning_rate": 2.3076060436318656e-06, "loss": 0.5034, "step": 6428 }, { "epoch": 7.869033047735618, "grad_norm": 2.2941299182510395, "learning_rate": 2.3069670708000026e-06, "loss": 0.621, "step": 6429 }, { "epoch": 7.870257037943697, "grad_norm": 2.0064956137754004, "learning_rate": 2.3063281106535486e-06, "loss": 0.5618, "step": 6430 }, { "epoch": 7.871481028151774, "grad_norm": 1.4646369976533984, "learning_rate": 2.305689163234496e-06, "loss": 1.553, "step": 6431 }, { "epoch": 7.872705018359853, "grad_norm": 1.5949644017463636, "learning_rate": 2.3050502285848322e-06, "loss": 0.6333, "step": 6432 }, { "epoch": 7.873929008567932, "grad_norm": 0.9464520176511044, "learning_rate": 2.304411306746547e-06, "loss": 0.6017, "step": 6433 }, { "epoch": 7.875152998776009, "grad_norm": 2.6321413989284608, "learning_rate": 2.303772397761626e-06, "loss": 0.5349, "step": 6434 }, { "epoch": 7.876376988984088, "grad_norm": 1.4101368969377561, "learning_rate": 2.3031335016720587e-06, "loss": 0.7616, "step": 6435 }, { "epoch": 7.877600979192167, "grad_norm": 1.005176405975124, "learning_rate": 2.302494618519829e-06, "loss": 0.4248, "step": 6436 }, { "epoch": 7.878824969400245, "grad_norm": 2.1429830850601372, "learning_rate": 2.301855748346922e-06, "loss": 0.4186, "step": 6437 }, { "epoch": 7.880048959608323, "grad_norm": 1.3705738582832643, "learning_rate": 2.301216891195323e-06, "loss": 1.174, "step": 6438 }, { "epoch": 7.881272949816402, "grad_norm": 1.9023268429407107, "learning_rate": 2.300578047107014e-06, "loss": 1.1379, "step": 6439 }, { "epoch": 7.882496940024479, "grad_norm": 2.0008588840539696, "learning_rate": 2.2999392161239783e-06, "loss": 0.8351, "step": 6440 }, { "epoch": 7.883720930232558, "grad_norm": 1.6679836005669844, "learning_rate": 2.2993003982881976e-06, "loss": 0.7588, "step": 6441 }, { "epoch": 7.8849449204406366, "grad_norm": 2.834046041133698, "learning_rate": 2.298661593641653e-06, "loss": 0.7057, "step": 6442 }, { "epoch": 7.886168910648715, "grad_norm": 1.8748266833418534, "learning_rate": 2.298022802226322e-06, "loss": 0.4014, "step": 6443 }, { "epoch": 7.887392900856793, "grad_norm": 1.6324551054584329, "learning_rate": 2.297384024084187e-06, "loss": 0.4017, "step": 6444 }, { "epoch": 7.8886168910648715, "grad_norm": 1.0721594644763368, "learning_rate": 2.296745259257224e-06, "loss": 0.5127, "step": 6445 }, { "epoch": 7.88984088127295, "grad_norm": 2.104718850953799, "learning_rate": 2.2961065077874106e-06, "loss": 0.4634, "step": 6446 }, { "epoch": 7.891064871481028, "grad_norm": 2.0213950996699226, "learning_rate": 2.2954677697167234e-06, "loss": 1.0477, "step": 6447 }, { "epoch": 7.8922888616891065, "grad_norm": 1.6426395057854148, "learning_rate": 2.294829045087138e-06, "loss": 0.4854, "step": 6448 }, { "epoch": 7.893512851897185, "grad_norm": 1.163177707520498, "learning_rate": 2.2941903339406284e-06, "loss": 0.7707, "step": 6449 }, { "epoch": 7.894736842105263, "grad_norm": 1.0519899776675787, "learning_rate": 2.2935516363191695e-06, "loss": 0.5757, "step": 6450 }, { "epoch": 7.8959608323133414, "grad_norm": 1.3041922177912244, "learning_rate": 2.292912952264734e-06, "loss": 0.695, "step": 6451 }, { "epoch": 7.89718482252142, "grad_norm": 2.1438570895912794, "learning_rate": 2.2922742818192916e-06, "loss": 0.7514, "step": 6452 }, { "epoch": 7.898408812729498, "grad_norm": 1.4938356877684331, "learning_rate": 2.291635625024816e-06, "loss": 0.9376, "step": 6453 }, { "epoch": 7.899632802937576, "grad_norm": 1.5404968556870977, "learning_rate": 2.290996981923277e-06, "loss": 0.5708, "step": 6454 }, { "epoch": 7.900856793145655, "grad_norm": 1.6330119023981398, "learning_rate": 2.2903583525566423e-06, "loss": 0.5375, "step": 6455 }, { "epoch": 7.902080783353734, "grad_norm": 0.9900299997270264, "learning_rate": 2.2897197369668812e-06, "loss": 0.5993, "step": 6456 }, { "epoch": 7.903304773561811, "grad_norm": 2.1715794479895796, "learning_rate": 2.289081135195962e-06, "loss": 0.471, "step": 6457 }, { "epoch": 7.90452876376989, "grad_norm": 1.5522241457021897, "learning_rate": 2.28844254728585e-06, "loss": 0.9422, "step": 6458 }, { "epoch": 7.905752753977968, "grad_norm": 3.4193560319395075, "learning_rate": 2.287803973278512e-06, "loss": 0.4074, "step": 6459 }, { "epoch": 7.906976744186046, "grad_norm": 2.431470915588443, "learning_rate": 2.287165413215911e-06, "loss": 0.5222, "step": 6460 }, { "epoch": 7.908200734394125, "grad_norm": 2.999078908870587, "learning_rate": 2.286526867140012e-06, "loss": 0.3796, "step": 6461 }, { "epoch": 7.909424724602204, "grad_norm": 2.6513877585482364, "learning_rate": 2.2858883350927786e-06, "loss": 0.4295, "step": 6462 }, { "epoch": 7.910648714810281, "grad_norm": 1.8947110048370153, "learning_rate": 2.2852498171161713e-06, "loss": 0.8614, "step": 6463 }, { "epoch": 7.91187270501836, "grad_norm": 2.3669011247685763, "learning_rate": 2.284611313252152e-06, "loss": 0.3775, "step": 6464 }, { "epoch": 7.9130966952264385, "grad_norm": 1.8013226038777947, "learning_rate": 2.2839728235426795e-06, "loss": 0.6667, "step": 6465 }, { "epoch": 7.914320685434516, "grad_norm": 1.8964834194622402, "learning_rate": 2.283334348029715e-06, "loss": 1.3314, "step": 6466 }, { "epoch": 7.915544675642595, "grad_norm": 1.2948188057470875, "learning_rate": 2.2826958867552153e-06, "loss": 0.3766, "step": 6467 }, { "epoch": 7.9167686658506735, "grad_norm": 1.0286402988524281, "learning_rate": 2.282057439761139e-06, "loss": 0.6279, "step": 6468 }, { "epoch": 7.917992656058751, "grad_norm": 1.6925426449765872, "learning_rate": 2.2814190070894402e-06, "loss": 1.1022, "step": 6469 }, { "epoch": 7.91921664626683, "grad_norm": 1.8427883041555264, "learning_rate": 2.2807805887820762e-06, "loss": 0.5196, "step": 6470 }, { "epoch": 7.9204406364749085, "grad_norm": 1.2956385665239043, "learning_rate": 2.2801421848810025e-06, "loss": 0.7867, "step": 6471 }, { "epoch": 7.921664626682986, "grad_norm": 2.0664780524073256, "learning_rate": 2.2795037954281703e-06, "loss": 0.4626, "step": 6472 }, { "epoch": 7.922888616891065, "grad_norm": 1.4566234344919349, "learning_rate": 2.278865420465532e-06, "loss": 0.9413, "step": 6473 }, { "epoch": 7.9241126070991434, "grad_norm": 2.311016848382608, "learning_rate": 2.2782270600350413e-06, "loss": 0.4418, "step": 6474 }, { "epoch": 7.925336597307221, "grad_norm": 1.5712197457368773, "learning_rate": 2.277588714178648e-06, "loss": 0.4979, "step": 6475 }, { "epoch": 7.9265605875153, "grad_norm": 1.3068030445040366, "learning_rate": 2.276950382938302e-06, "loss": 1.5698, "step": 6476 }, { "epoch": 7.927784577723378, "grad_norm": 1.82240820721155, "learning_rate": 2.276312066355952e-06, "loss": 0.9399, "step": 6477 }, { "epoch": 7.929008567931456, "grad_norm": 1.079701556070988, "learning_rate": 2.2756737644735446e-06, "loss": 0.571, "step": 6478 }, { "epoch": 7.930232558139535, "grad_norm": 1.6582408780794728, "learning_rate": 2.275035477333028e-06, "loss": 0.7474, "step": 6479 }, { "epoch": 7.931456548347613, "grad_norm": 1.4723081966904492, "learning_rate": 2.2743972049763484e-06, "loss": 0.7276, "step": 6480 }, { "epoch": 7.932680538555692, "grad_norm": 0.7479043622663435, "learning_rate": 2.27375894744545e-06, "loss": 0.3566, "step": 6481 }, { "epoch": 7.93390452876377, "grad_norm": 1.7851883474000476, "learning_rate": 2.2731207047822756e-06, "loss": 0.9204, "step": 6482 }, { "epoch": 7.935128518971848, "grad_norm": 2.3491979260505866, "learning_rate": 2.2724824770287704e-06, "loss": 0.527, "step": 6483 }, { "epoch": 7.936352509179926, "grad_norm": 1.2274876281734133, "learning_rate": 2.2718442642268753e-06, "loss": 0.5635, "step": 6484 }, { "epoch": 7.937576499388005, "grad_norm": 1.2271815659565355, "learning_rate": 2.2712060664185306e-06, "loss": 0.4973, "step": 6485 }, { "epoch": 7.938800489596083, "grad_norm": 1.573782003874472, "learning_rate": 2.2705678836456767e-06, "loss": 0.5436, "step": 6486 }, { "epoch": 7.940024479804162, "grad_norm": 1.5731067546142778, "learning_rate": 2.2699297159502536e-06, "loss": 1.0202, "step": 6487 }, { "epoch": 7.94124847001224, "grad_norm": 1.2063900980605007, "learning_rate": 2.269291563374199e-06, "loss": 0.5372, "step": 6488 }, { "epoch": 7.942472460220318, "grad_norm": 3.1352360942448128, "learning_rate": 2.2686534259594494e-06, "loss": 0.4528, "step": 6489 }, { "epoch": 7.943696450428397, "grad_norm": 1.844702157764708, "learning_rate": 2.2680153037479405e-06, "loss": 0.4569, "step": 6490 }, { "epoch": 7.944920440636475, "grad_norm": 2.5384215567415787, "learning_rate": 2.2673771967816077e-06, "loss": 0.5199, "step": 6491 }, { "epoch": 7.946144430844553, "grad_norm": 1.7335183536862504, "learning_rate": 2.2667391051023852e-06, "loss": 0.6368, "step": 6492 }, { "epoch": 7.947368421052632, "grad_norm": 1.7198996127612873, "learning_rate": 2.266101028752206e-06, "loss": 0.6977, "step": 6493 }, { "epoch": 7.94859241126071, "grad_norm": 1.055801014759983, "learning_rate": 2.265462967773002e-06, "loss": 0.6194, "step": 6494 }, { "epoch": 7.949816401468788, "grad_norm": 1.2478402803516269, "learning_rate": 2.264824922206704e-06, "loss": 0.6557, "step": 6495 }, { "epoch": 7.951040391676867, "grad_norm": 1.2738984262595374, "learning_rate": 2.2641868920952427e-06, "loss": 0.4489, "step": 6496 }, { "epoch": 7.9522643818849446, "grad_norm": 1.4174841342947835, "learning_rate": 2.263548877480547e-06, "loss": 0.6936, "step": 6497 }, { "epoch": 7.953488372093023, "grad_norm": 1.2633803266280825, "learning_rate": 2.262910878404544e-06, "loss": 0.7056, "step": 6498 }, { "epoch": 7.954712362301102, "grad_norm": 1.392550027616769, "learning_rate": 2.2622728949091603e-06, "loss": 0.9022, "step": 6499 }, { "epoch": 7.95593635250918, "grad_norm": 1.8769648888263015, "learning_rate": 2.261634927036324e-06, "loss": 0.4263, "step": 6500 }, { "epoch": 7.957160342717258, "grad_norm": 1.0889524895514484, "learning_rate": 2.260996974827958e-06, "loss": 0.6873, "step": 6501 }, { "epoch": 7.958384332925337, "grad_norm": 1.759003368501378, "learning_rate": 2.2603590383259873e-06, "loss": 0.6407, "step": 6502 }, { "epoch": 7.9596083231334145, "grad_norm": 3.213795418342855, "learning_rate": 2.259721117572334e-06, "loss": 0.5033, "step": 6503 }, { "epoch": 7.960832313341493, "grad_norm": 1.6570475961779154, "learning_rate": 2.2590832126089195e-06, "loss": 0.5572, "step": 6504 }, { "epoch": 7.962056303549572, "grad_norm": 2.401486414392633, "learning_rate": 2.2584453234776656e-06, "loss": 0.5347, "step": 6505 }, { "epoch": 7.96328029375765, "grad_norm": 2.3915733876965044, "learning_rate": 2.2578074502204923e-06, "loss": 0.4216, "step": 6506 }, { "epoch": 7.964504283965728, "grad_norm": 1.8723587409096716, "learning_rate": 2.257169592879317e-06, "loss": 0.4498, "step": 6507 }, { "epoch": 7.965728274173807, "grad_norm": 1.4795229286384264, "learning_rate": 2.256531751496057e-06, "loss": 1.4942, "step": 6508 }, { "epoch": 7.966952264381885, "grad_norm": 1.408643815080826, "learning_rate": 2.2558939261126313e-06, "loss": 0.6326, "step": 6509 }, { "epoch": 7.968176254589963, "grad_norm": 1.8632149437330823, "learning_rate": 2.255256116770953e-06, "loss": 0.364, "step": 6510 }, { "epoch": 7.969400244798042, "grad_norm": 1.1677212037084943, "learning_rate": 2.254618323512938e-06, "loss": 0.8503, "step": 6511 }, { "epoch": 7.97062423500612, "grad_norm": 2.74362637601098, "learning_rate": 2.253980546380498e-06, "loss": 0.9329, "step": 6512 }, { "epoch": 7.971848225214198, "grad_norm": 1.2138541003201544, "learning_rate": 2.2533427854155478e-06, "loss": 0.5336, "step": 6513 }, { "epoch": 7.973072215422277, "grad_norm": 1.9490635302675117, "learning_rate": 2.2527050406599968e-06, "loss": 0.4981, "step": 6514 }, { "epoch": 7.974296205630355, "grad_norm": 2.076823666080513, "learning_rate": 2.2520673121557567e-06, "loss": 0.4441, "step": 6515 }, { "epoch": 7.975520195838433, "grad_norm": 2.3446952090547604, "learning_rate": 2.2514295999447353e-06, "loss": 0.4177, "step": 6516 }, { "epoch": 7.976744186046512, "grad_norm": 1.4463576397344817, "learning_rate": 2.25079190406884e-06, "loss": 0.6029, "step": 6517 }, { "epoch": 7.97796817625459, "grad_norm": 0.9614984293437456, "learning_rate": 2.250154224569981e-06, "loss": 0.4051, "step": 6518 }, { "epoch": 7.979192166462668, "grad_norm": 2.439891115854889, "learning_rate": 2.249516561490061e-06, "loss": 0.6975, "step": 6519 }, { "epoch": 7.9804161566707466, "grad_norm": 2.366961466184464, "learning_rate": 2.2488789148709863e-06, "loss": 0.495, "step": 6520 }, { "epoch": 7.981640146878825, "grad_norm": 1.4314400639263924, "learning_rate": 2.2482412847546604e-06, "loss": 1.0787, "step": 6521 }, { "epoch": 7.982864137086903, "grad_norm": 1.7201384868054848, "learning_rate": 2.247603671182986e-06, "loss": 0.4982, "step": 6522 }, { "epoch": 7.9840881272949815, "grad_norm": 2.4314344159525048, "learning_rate": 2.2469660741978654e-06, "loss": 0.599, "step": 6523 }, { "epoch": 7.98531211750306, "grad_norm": 1.0378853743489331, "learning_rate": 2.2463284938411985e-06, "loss": 0.4567, "step": 6524 }, { "epoch": 7.986536107711139, "grad_norm": 1.4300654604795238, "learning_rate": 2.2456909301548832e-06, "loss": 0.59, "step": 6525 }, { "epoch": 7.9877600979192165, "grad_norm": 1.901056556159614, "learning_rate": 2.2450533831808215e-06, "loss": 0.6842, "step": 6526 }, { "epoch": 7.988984088127295, "grad_norm": 2.3694972643588987, "learning_rate": 2.244415852960907e-06, "loss": 0.4838, "step": 6527 }, { "epoch": 7.990208078335373, "grad_norm": 2.6010002579570903, "learning_rate": 2.243778339537038e-06, "loss": 0.8433, "step": 6528 }, { "epoch": 7.9914320685434515, "grad_norm": 1.8891821354845706, "learning_rate": 2.2431408429511086e-06, "loss": 0.9764, "step": 6529 }, { "epoch": 7.99265605875153, "grad_norm": 1.6584500545661083, "learning_rate": 2.2425033632450125e-06, "loss": 0.4366, "step": 6530 }, { "epoch": 7.993880048959609, "grad_norm": 1.6116162532409417, "learning_rate": 2.2418659004606437e-06, "loss": 1.0089, "step": 6531 }, { "epoch": 7.995104039167686, "grad_norm": 1.4242967264481996, "learning_rate": 2.2412284546398926e-06, "loss": 0.48, "step": 6532 }, { "epoch": 7.996328029375765, "grad_norm": 1.2222910631498214, "learning_rate": 2.2405910258246515e-06, "loss": 0.7472, "step": 6533 }, { "epoch": 7.997552019583844, "grad_norm": 1.175125652308538, "learning_rate": 2.2399536140568065e-06, "loss": 0.4111, "step": 6534 }, { "epoch": 7.998776009791921, "grad_norm": 1.607805746201495, "learning_rate": 2.23931621937825e-06, "loss": 1.6259, "step": 6535 }, { "epoch": 8.0, "grad_norm": 1.0664715413083938, "learning_rate": 2.238678841830867e-06, "loss": 0.6796, "step": 6536 }, { "epoch": 8.001223990208079, "grad_norm": 1.852407278952052, "learning_rate": 2.2380414814565436e-06, "loss": 0.4172, "step": 6537 }, { "epoch": 8.002447980416157, "grad_norm": 1.2034229308038695, "learning_rate": 2.237404138297165e-06, "loss": 1.0923, "step": 6538 }, { "epoch": 8.003671970624236, "grad_norm": 2.234676285346996, "learning_rate": 2.236766812394615e-06, "loss": 0.4209, "step": 6539 }, { "epoch": 8.004895960832313, "grad_norm": 2.595893648366043, "learning_rate": 2.236129503790777e-06, "loss": 0.5418, "step": 6540 }, { "epoch": 8.006119951040391, "grad_norm": 1.9606420224909622, "learning_rate": 2.2354922125275323e-06, "loss": 0.4593, "step": 6541 }, { "epoch": 8.00734394124847, "grad_norm": 2.5706821317399595, "learning_rate": 2.234854938646761e-06, "loss": 0.8358, "step": 6542 }, { "epoch": 8.008567931456549, "grad_norm": 1.6788531132023272, "learning_rate": 2.234217682190341e-06, "loss": 0.9327, "step": 6543 }, { "epoch": 8.009791921664627, "grad_norm": 1.831913180628728, "learning_rate": 2.2335804432001534e-06, "loss": 0.3546, "step": 6544 }, { "epoch": 8.011015911872706, "grad_norm": 1.6276527680994484, "learning_rate": 2.2329432217180737e-06, "loss": 0.4239, "step": 6545 }, { "epoch": 8.012239902080783, "grad_norm": 2.952098102199096, "learning_rate": 2.232306017785977e-06, "loss": 0.3968, "step": 6546 }, { "epoch": 8.013463892288861, "grad_norm": 1.9907334436472974, "learning_rate": 2.2316688314457382e-06, "loss": 1.2117, "step": 6547 }, { "epoch": 8.01468788249694, "grad_norm": 2.7088756035862187, "learning_rate": 2.231031662739232e-06, "loss": 0.4099, "step": 6548 }, { "epoch": 8.015911872705018, "grad_norm": 1.6520868462835037, "learning_rate": 2.2303945117083304e-06, "loss": 1.1773, "step": 6549 }, { "epoch": 8.017135862913097, "grad_norm": 1.1601104717803261, "learning_rate": 2.229757378394904e-06, "loss": 0.4985, "step": 6550 }, { "epoch": 8.018359853121176, "grad_norm": 1.3819156123124103, "learning_rate": 2.2291202628408225e-06, "loss": 0.611, "step": 6551 }, { "epoch": 8.019583843329253, "grad_norm": 1.8373062072800515, "learning_rate": 2.228483165087956e-06, "loss": 0.5109, "step": 6552 }, { "epoch": 8.020807833537331, "grad_norm": 1.7391937407116769, "learning_rate": 2.2278460851781724e-06, "loss": 0.4007, "step": 6553 }, { "epoch": 8.02203182374541, "grad_norm": 1.8163978202841773, "learning_rate": 2.2272090231533365e-06, "loss": 0.4412, "step": 6554 }, { "epoch": 8.023255813953488, "grad_norm": 1.0365210928806816, "learning_rate": 2.2265719790553147e-06, "loss": 0.6446, "step": 6555 }, { "epoch": 8.024479804161567, "grad_norm": 1.5101181448802954, "learning_rate": 2.225934952925971e-06, "loss": 0.6972, "step": 6556 }, { "epoch": 8.025703794369646, "grad_norm": 1.282521961338584, "learning_rate": 2.2252979448071685e-06, "loss": 0.4626, "step": 6557 }, { "epoch": 8.026927784577722, "grad_norm": 1.496127508815973, "learning_rate": 2.224660954740769e-06, "loss": 0.498, "step": 6558 }, { "epoch": 8.028151774785801, "grad_norm": 1.281316904699095, "learning_rate": 2.2240239827686335e-06, "loss": 0.7813, "step": 6559 }, { "epoch": 8.02937576499388, "grad_norm": 1.0207430624149103, "learning_rate": 2.2233870289326203e-06, "loss": 0.6158, "step": 6560 }, { "epoch": 8.030599755201958, "grad_norm": 0.8493580114493111, "learning_rate": 2.2227500932745893e-06, "loss": 0.5268, "step": 6561 }, { "epoch": 8.031823745410037, "grad_norm": 2.151843795840132, "learning_rate": 2.222113175836397e-06, "loss": 1.4202, "step": 6562 }, { "epoch": 8.033047735618116, "grad_norm": 1.2724303590932984, "learning_rate": 2.2214762766598985e-06, "loss": 0.4424, "step": 6563 }, { "epoch": 8.034271725826194, "grad_norm": 1.1824886137481352, "learning_rate": 2.220839395786948e-06, "loss": 0.7238, "step": 6564 }, { "epoch": 8.035495716034271, "grad_norm": 1.5981847336758548, "learning_rate": 2.220202533259401e-06, "loss": 0.5605, "step": 6565 }, { "epoch": 8.03671970624235, "grad_norm": 1.834556106330547, "learning_rate": 2.2195656891191083e-06, "loss": 0.9681, "step": 6566 }, { "epoch": 8.037943696450428, "grad_norm": 1.6821814050332506, "learning_rate": 2.218928863407921e-06, "loss": 0.5547, "step": 6567 }, { "epoch": 8.039167686658507, "grad_norm": 1.6306047756115023, "learning_rate": 2.2182920561676896e-06, "loss": 1.3683, "step": 6568 }, { "epoch": 8.040391676866586, "grad_norm": 2.2780416034381323, "learning_rate": 2.217655267440262e-06, "loss": 0.5337, "step": 6569 }, { "epoch": 8.041615667074664, "grad_norm": 1.556982369483097, "learning_rate": 2.2170184972674856e-06, "loss": 0.4385, "step": 6570 }, { "epoch": 8.042839657282741, "grad_norm": 2.3618280438293655, "learning_rate": 2.2163817456912083e-06, "loss": 0.3458, "step": 6571 }, { "epoch": 8.04406364749082, "grad_norm": 1.636695555283927, "learning_rate": 2.215745012753273e-06, "loss": 0.5754, "step": 6572 }, { "epoch": 8.045287637698898, "grad_norm": 1.4238920586087913, "learning_rate": 2.2151082984955228e-06, "loss": 0.435, "step": 6573 }, { "epoch": 8.046511627906977, "grad_norm": 2.3307723102488693, "learning_rate": 2.2144716029598025e-06, "loss": 0.6126, "step": 6574 }, { "epoch": 8.047735618115055, "grad_norm": 3.2714291160695126, "learning_rate": 2.213834926187952e-06, "loss": 0.4289, "step": 6575 }, { "epoch": 8.048959608323134, "grad_norm": 1.2835388956626008, "learning_rate": 2.2131982682218124e-06, "loss": 0.3714, "step": 6576 }, { "epoch": 8.050183598531211, "grad_norm": 1.1486199663859957, "learning_rate": 2.212561629103221e-06, "loss": 0.8461, "step": 6577 }, { "epoch": 8.05140758873929, "grad_norm": 2.0539881862262774, "learning_rate": 2.2119250088740165e-06, "loss": 1.0233, "step": 6578 }, { "epoch": 8.052631578947368, "grad_norm": 1.850919262021815, "learning_rate": 2.211288407576035e-06, "loss": 0.4154, "step": 6579 }, { "epoch": 8.053855569155447, "grad_norm": 1.739758323024896, "learning_rate": 2.210651825251112e-06, "loss": 1.5668, "step": 6580 }, { "epoch": 8.055079559363525, "grad_norm": 2.2217845303409, "learning_rate": 2.2100152619410805e-06, "loss": 0.913, "step": 6581 }, { "epoch": 8.056303549571604, "grad_norm": 1.9126609029246495, "learning_rate": 2.2093787176877723e-06, "loss": 1.1169, "step": 6582 }, { "epoch": 8.057527539779683, "grad_norm": 1.3409938918644844, "learning_rate": 2.2087421925330208e-06, "loss": 0.9533, "step": 6583 }, { "epoch": 8.05875152998776, "grad_norm": 1.3082520502415809, "learning_rate": 2.208105686518655e-06, "loss": 0.5391, "step": 6584 }, { "epoch": 8.059975520195838, "grad_norm": 1.4857668539596944, "learning_rate": 2.2074691996865043e-06, "loss": 0.6487, "step": 6585 }, { "epoch": 8.061199510403917, "grad_norm": 1.4408452366081115, "learning_rate": 2.2068327320783945e-06, "loss": 1.235, "step": 6586 }, { "epoch": 8.062423500611995, "grad_norm": 1.954758532434582, "learning_rate": 2.206196283736154e-06, "loss": 0.8205, "step": 6587 }, { "epoch": 8.063647490820074, "grad_norm": 2.6520138235005217, "learning_rate": 2.2055598547016075e-06, "loss": 0.3566, "step": 6588 }, { "epoch": 8.064871481028153, "grad_norm": 2.3329818206342017, "learning_rate": 2.204923445016578e-06, "loss": 0.4829, "step": 6589 }, { "epoch": 8.06609547123623, "grad_norm": 1.638126250963845, "learning_rate": 2.204287054722887e-06, "loss": 1.1961, "step": 6590 }, { "epoch": 8.067319461444308, "grad_norm": 1.6622609886613309, "learning_rate": 2.2036506838623593e-06, "loss": 1.8884, "step": 6591 }, { "epoch": 8.068543451652387, "grad_norm": 2.6595975837950703, "learning_rate": 2.203014332476811e-06, "loss": 0.4776, "step": 6592 }, { "epoch": 8.069767441860465, "grad_norm": 1.03603033962083, "learning_rate": 2.202378000608063e-06, "loss": 0.4519, "step": 6593 }, { "epoch": 8.070991432068544, "grad_norm": 2.684968126513587, "learning_rate": 2.2017416882979316e-06, "loss": 0.5044, "step": 6594 }, { "epoch": 8.072215422276622, "grad_norm": 1.0340528404807305, "learning_rate": 2.2011053955882332e-06, "loss": 0.5022, "step": 6595 }, { "epoch": 8.0734394124847, "grad_norm": 1.8083352965813093, "learning_rate": 2.2004691225207825e-06, "loss": 0.5253, "step": 6596 }, { "epoch": 8.074663402692778, "grad_norm": 2.0069766418215487, "learning_rate": 2.1998328691373936e-06, "loss": 0.456, "step": 6597 }, { "epoch": 8.075887392900857, "grad_norm": 1.6744848094951486, "learning_rate": 2.1991966354798787e-06, "loss": 0.651, "step": 6598 }, { "epoch": 8.077111383108935, "grad_norm": 0.8180308965548361, "learning_rate": 2.198560421590047e-06, "loss": 0.3518, "step": 6599 }, { "epoch": 8.078335373317014, "grad_norm": 1.6823419391440262, "learning_rate": 2.1979242275097106e-06, "loss": 0.4815, "step": 6600 }, { "epoch": 8.079559363525092, "grad_norm": 1.0552436767092936, "learning_rate": 2.1972880532806765e-06, "loss": 0.4815, "step": 6601 }, { "epoch": 8.080783353733171, "grad_norm": 1.7345020217486629, "learning_rate": 2.1966518989447516e-06, "loss": 1.1959, "step": 6602 }, { "epoch": 8.082007343941248, "grad_norm": 1.5237870269831193, "learning_rate": 2.196015764543742e-06, "loss": 0.7239, "step": 6603 }, { "epoch": 8.083231334149326, "grad_norm": 1.8228339648224645, "learning_rate": 2.1953796501194514e-06, "loss": 0.3744, "step": 6604 }, { "epoch": 8.084455324357405, "grad_norm": 2.755178929686862, "learning_rate": 2.194743555713684e-06, "loss": 0.4591, "step": 6605 }, { "epoch": 8.085679314565484, "grad_norm": 0.8518916534997869, "learning_rate": 2.194107481368242e-06, "loss": 0.4681, "step": 6606 }, { "epoch": 8.086903304773562, "grad_norm": 1.2180052140000721, "learning_rate": 2.1934714271249243e-06, "loss": 0.608, "step": 6607 }, { "epoch": 8.088127294981641, "grad_norm": 1.1778763596836528, "learning_rate": 2.192835393025529e-06, "loss": 0.4067, "step": 6608 }, { "epoch": 8.089351285189718, "grad_norm": 2.5546624246829968, "learning_rate": 2.1921993791118574e-06, "loss": 0.2179, "step": 6609 }, { "epoch": 8.090575275397796, "grad_norm": 2.9113226101020553, "learning_rate": 2.191563385425704e-06, "loss": 0.3246, "step": 6610 }, { "epoch": 8.091799265605875, "grad_norm": 2.153578585260385, "learning_rate": 2.1909274120088638e-06, "loss": 0.4268, "step": 6611 }, { "epoch": 8.093023255813954, "grad_norm": 2.2384837814527563, "learning_rate": 2.19029145890313e-06, "loss": 0.5483, "step": 6612 }, { "epoch": 8.094247246022032, "grad_norm": 1.6083340656080023, "learning_rate": 2.189655526150297e-06, "loss": 1.6205, "step": 6613 }, { "epoch": 8.095471236230111, "grad_norm": 1.558093894148372, "learning_rate": 2.189019613792155e-06, "loss": 0.38, "step": 6614 }, { "epoch": 8.096695226438188, "grad_norm": 2.739626162532667, "learning_rate": 2.188383721870494e-06, "loss": 0.3574, "step": 6615 }, { "epoch": 8.097919216646266, "grad_norm": 1.579293419500051, "learning_rate": 2.1877478504271015e-06, "loss": 0.4278, "step": 6616 }, { "epoch": 8.099143206854345, "grad_norm": 1.7546863226970089, "learning_rate": 2.1871119995037646e-06, "loss": 0.5293, "step": 6617 }, { "epoch": 8.100367197062424, "grad_norm": 0.9651305705126904, "learning_rate": 2.1864761691422715e-06, "loss": 0.4008, "step": 6618 }, { "epoch": 8.101591187270502, "grad_norm": 1.261085419908414, "learning_rate": 2.185840359384404e-06, "loss": 0.395, "step": 6619 }, { "epoch": 8.10281517747858, "grad_norm": 0.8989441457127049, "learning_rate": 2.1852045702719464e-06, "loss": 0.3937, "step": 6620 }, { "epoch": 8.104039167686658, "grad_norm": 1.7656444366257238, "learning_rate": 2.1845688018466793e-06, "loss": 0.9946, "step": 6621 }, { "epoch": 8.105263157894736, "grad_norm": 2.38707802991172, "learning_rate": 2.1839330541503846e-06, "loss": 0.4113, "step": 6622 }, { "epoch": 8.106487148102815, "grad_norm": 1.6094169226634656, "learning_rate": 2.1832973272248403e-06, "loss": 0.4574, "step": 6623 }, { "epoch": 8.107711138310894, "grad_norm": 1.848057597046354, "learning_rate": 2.182661621111825e-06, "loss": 0.5808, "step": 6624 }, { "epoch": 8.108935128518972, "grad_norm": 1.3617645402363812, "learning_rate": 2.1820259358531127e-06, "loss": 0.545, "step": 6625 }, { "epoch": 8.11015911872705, "grad_norm": 1.1044490361287267, "learning_rate": 2.1813902714904807e-06, "loss": 0.3881, "step": 6626 }, { "epoch": 8.11138310893513, "grad_norm": 1.7041014492511182, "learning_rate": 2.1807546280657023e-06, "loss": 1.0949, "step": 6627 }, { "epoch": 8.112607099143206, "grad_norm": 1.3435808071205826, "learning_rate": 2.180119005620548e-06, "loss": 1.1226, "step": 6628 }, { "epoch": 8.113831089351285, "grad_norm": 2.9039464362574616, "learning_rate": 2.1794834041967895e-06, "loss": 0.7097, "step": 6629 }, { "epoch": 8.115055079559363, "grad_norm": 1.8387532100957535, "learning_rate": 2.178847823836196e-06, "loss": 1.3199, "step": 6630 }, { "epoch": 8.116279069767442, "grad_norm": 2.195197198773073, "learning_rate": 2.178212264580536e-06, "loss": 0.6144, "step": 6631 }, { "epoch": 8.11750305997552, "grad_norm": 1.2453756904687139, "learning_rate": 2.1775767264715756e-06, "loss": 0.6351, "step": 6632 }, { "epoch": 8.1187270501836, "grad_norm": 1.4057463204214742, "learning_rate": 2.176941209551081e-06, "loss": 0.8791, "step": 6633 }, { "epoch": 8.119951040391676, "grad_norm": 2.0494387136333647, "learning_rate": 2.176305713860814e-06, "loss": 0.4886, "step": 6634 }, { "epoch": 8.121175030599755, "grad_norm": 3.2447267396527124, "learning_rate": 2.1756702394425386e-06, "loss": 0.7742, "step": 6635 }, { "epoch": 8.122399020807833, "grad_norm": 1.585722094621414, "learning_rate": 2.1750347863380162e-06, "loss": 0.5811, "step": 6636 }, { "epoch": 8.123623011015912, "grad_norm": 2.5140095035140586, "learning_rate": 2.1743993545890054e-06, "loss": 0.9073, "step": 6637 }, { "epoch": 8.12484700122399, "grad_norm": 1.5246166388034237, "learning_rate": 2.1737639442372636e-06, "loss": 0.7068, "step": 6638 }, { "epoch": 8.12607099143207, "grad_norm": 1.039561841201002, "learning_rate": 2.1731285553245495e-06, "loss": 0.4207, "step": 6639 }, { "epoch": 8.127294981640146, "grad_norm": 2.4200675142431303, "learning_rate": 2.1724931878926176e-06, "loss": 0.5107, "step": 6640 }, { "epoch": 8.128518971848225, "grad_norm": 1.9694600450717241, "learning_rate": 2.1718578419832225e-06, "loss": 1.1118, "step": 6641 }, { "epoch": 8.129742962056303, "grad_norm": 2.679731976729549, "learning_rate": 2.1712225176381164e-06, "loss": 0.4174, "step": 6642 }, { "epoch": 8.130966952264382, "grad_norm": 3.0519648641709716, "learning_rate": 2.170587214899049e-06, "loss": 0.5244, "step": 6643 }, { "epoch": 8.13219094247246, "grad_norm": 1.0215783000774044, "learning_rate": 2.169951933807773e-06, "loss": 0.5428, "step": 6644 }, { "epoch": 8.13341493268054, "grad_norm": 2.14200161193924, "learning_rate": 2.1693166744060344e-06, "loss": 0.4949, "step": 6645 }, { "epoch": 8.134638922888616, "grad_norm": 1.4375016099115383, "learning_rate": 2.1686814367355813e-06, "loss": 0.4682, "step": 6646 }, { "epoch": 8.135862913096695, "grad_norm": 1.6534159060911062, "learning_rate": 2.168046220838158e-06, "loss": 0.5319, "step": 6647 }, { "epoch": 8.137086903304773, "grad_norm": 0.9916513924430601, "learning_rate": 2.1674110267555095e-06, "loss": 0.5308, "step": 6648 }, { "epoch": 8.138310893512852, "grad_norm": 2.1021229821731717, "learning_rate": 2.1667758545293783e-06, "loss": 0.5569, "step": 6649 }, { "epoch": 8.13953488372093, "grad_norm": 1.3537435303763818, "learning_rate": 2.1661407042015055e-06, "loss": 1.3237, "step": 6650 }, { "epoch": 8.140758873929009, "grad_norm": 1.2355430849580833, "learning_rate": 2.16550557581363e-06, "loss": 0.4008, "step": 6651 }, { "epoch": 8.141982864137088, "grad_norm": 1.7402211961817422, "learning_rate": 2.1648704694074916e-06, "loss": 1.494, "step": 6652 }, { "epoch": 8.143206854345165, "grad_norm": 1.8623640443822076, "learning_rate": 2.1642353850248273e-06, "loss": 0.6018, "step": 6653 }, { "epoch": 8.144430844553243, "grad_norm": 1.5327530630083277, "learning_rate": 2.1636003227073704e-06, "loss": 1.5603, "step": 6654 }, { "epoch": 8.145654834761322, "grad_norm": 2.125362372051552, "learning_rate": 2.1629652824968563e-06, "loss": 0.456, "step": 6655 }, { "epoch": 8.1468788249694, "grad_norm": 1.3044534533739336, "learning_rate": 2.1623302644350168e-06, "loss": 0.8784, "step": 6656 }, { "epoch": 8.148102815177479, "grad_norm": 2.3539004988269667, "learning_rate": 2.1616952685635837e-06, "loss": 0.562, "step": 6657 }, { "epoch": 8.149326805385558, "grad_norm": 1.2825701965030385, "learning_rate": 2.1610602949242863e-06, "loss": 0.5321, "step": 6658 }, { "epoch": 8.150550795593634, "grad_norm": 1.3544175396790887, "learning_rate": 2.160425343558853e-06, "loss": 0.6234, "step": 6659 }, { "epoch": 8.151774785801713, "grad_norm": 1.299887202012589, "learning_rate": 2.1597904145090094e-06, "loss": 1.5635, "step": 6660 }, { "epoch": 8.152998776009792, "grad_norm": 2.333933363151094, "learning_rate": 2.159155507816482e-06, "loss": 0.4757, "step": 6661 }, { "epoch": 8.15422276621787, "grad_norm": 1.4684572616599354, "learning_rate": 2.158520623522994e-06, "loss": 1.0486, "step": 6662 }, { "epoch": 8.155446756425949, "grad_norm": 1.4752317033059412, "learning_rate": 2.1578857616702674e-06, "loss": 1.0724, "step": 6663 }, { "epoch": 8.156670746634028, "grad_norm": 1.64374938038092, "learning_rate": 2.157250922300022e-06, "loss": 1.4275, "step": 6664 }, { "epoch": 8.157894736842104, "grad_norm": 2.013130125002096, "learning_rate": 2.1566161054539797e-06, "loss": 0.4911, "step": 6665 }, { "epoch": 8.159118727050183, "grad_norm": 1.487332898870085, "learning_rate": 2.1559813111738567e-06, "loss": 0.7539, "step": 6666 }, { "epoch": 8.160342717258262, "grad_norm": 2.0264269394419285, "learning_rate": 2.1553465395013693e-06, "loss": 0.4774, "step": 6667 }, { "epoch": 8.16156670746634, "grad_norm": 1.772637673248803, "learning_rate": 2.1547117904782324e-06, "loss": 1.0138, "step": 6668 }, { "epoch": 8.162790697674419, "grad_norm": 1.2854701511936402, "learning_rate": 2.154077064146159e-06, "loss": 0.614, "step": 6669 }, { "epoch": 8.164014687882498, "grad_norm": 2.13341912020497, "learning_rate": 2.1534423605468626e-06, "loss": 0.2966, "step": 6670 }, { "epoch": 8.165238678090576, "grad_norm": 1.7009529264148802, "learning_rate": 2.152807679722052e-06, "loss": 1.6012, "step": 6671 }, { "epoch": 8.166462668298653, "grad_norm": 1.961206018779308, "learning_rate": 2.152173021713437e-06, "loss": 0.6095, "step": 6672 }, { "epoch": 8.167686658506732, "grad_norm": 1.787273137692857, "learning_rate": 2.151538386562723e-06, "loss": 0.5964, "step": 6673 }, { "epoch": 8.16891064871481, "grad_norm": 0.8946737230648709, "learning_rate": 2.1509037743116183e-06, "loss": 0.4176, "step": 6674 }, { "epoch": 8.170134638922889, "grad_norm": 2.2226810664160697, "learning_rate": 2.1502691850018265e-06, "loss": 0.4573, "step": 6675 }, { "epoch": 8.171358629130967, "grad_norm": 2.28428571044889, "learning_rate": 2.14963461867505e-06, "loss": 0.4298, "step": 6676 }, { "epoch": 8.172582619339046, "grad_norm": 2.997896380223036, "learning_rate": 2.14900007537299e-06, "loss": 0.3779, "step": 6677 }, { "epoch": 8.173806609547123, "grad_norm": 2.5589920230247296, "learning_rate": 2.1483655551373474e-06, "loss": 1.0753, "step": 6678 }, { "epoch": 8.175030599755202, "grad_norm": 1.570323946009387, "learning_rate": 2.1477310580098197e-06, "loss": 0.314, "step": 6679 }, { "epoch": 8.17625458996328, "grad_norm": 1.9497693534675653, "learning_rate": 2.147096584032105e-06, "loss": 0.4015, "step": 6680 }, { "epoch": 8.177478580171359, "grad_norm": 1.6538646220109985, "learning_rate": 2.1464621332458965e-06, "loss": 0.3021, "step": 6681 }, { "epoch": 8.178702570379437, "grad_norm": 1.8859867396907404, "learning_rate": 2.1458277056928885e-06, "loss": 0.5354, "step": 6682 }, { "epoch": 8.179926560587516, "grad_norm": 2.0462923770334, "learning_rate": 2.145193301414774e-06, "loss": 0.4839, "step": 6683 }, { "epoch": 8.181150550795593, "grad_norm": 3.1764638328194432, "learning_rate": 2.144558920453244e-06, "loss": 0.4366, "step": 6684 }, { "epoch": 8.182374541003671, "grad_norm": 1.4940195153223896, "learning_rate": 2.1439245628499864e-06, "loss": 0.8251, "step": 6685 }, { "epoch": 8.18359853121175, "grad_norm": 0.8803815968032952, "learning_rate": 2.1432902286466893e-06, "loss": 0.4974, "step": 6686 }, { "epoch": 8.184822521419829, "grad_norm": 2.3973895988894656, "learning_rate": 2.1426559178850396e-06, "loss": 0.5138, "step": 6687 }, { "epoch": 8.186046511627907, "grad_norm": 2.399922407032271, "learning_rate": 2.142021630606721e-06, "loss": 0.3593, "step": 6688 }, { "epoch": 8.187270501835986, "grad_norm": 2.458582555614869, "learning_rate": 2.1413873668534175e-06, "loss": 0.9569, "step": 6689 }, { "epoch": 8.188494492044065, "grad_norm": 1.3219841045953808, "learning_rate": 2.1407531266668084e-06, "loss": 0.5078, "step": 6690 }, { "epoch": 8.189718482252141, "grad_norm": 1.786886327529939, "learning_rate": 2.140118910088576e-06, "loss": 0.5379, "step": 6691 }, { "epoch": 8.19094247246022, "grad_norm": 1.7820163825414106, "learning_rate": 2.139484717160398e-06, "loss": 0.6181, "step": 6692 }, { "epoch": 8.192166462668299, "grad_norm": 1.82121968401776, "learning_rate": 2.1388505479239506e-06, "loss": 0.6339, "step": 6693 }, { "epoch": 8.193390452876377, "grad_norm": 1.0360683861721705, "learning_rate": 2.1382164024209094e-06, "loss": 0.57, "step": 6694 }, { "epoch": 8.194614443084456, "grad_norm": 2.5714272393481226, "learning_rate": 2.1375822806929477e-06, "loss": 0.8338, "step": 6695 }, { "epoch": 8.195838433292534, "grad_norm": 1.5600166080691629, "learning_rate": 2.136948182781738e-06, "loss": 0.5656, "step": 6696 }, { "epoch": 8.197062423500611, "grad_norm": 1.6523175232907512, "learning_rate": 2.136314108728951e-06, "loss": 0.6632, "step": 6697 }, { "epoch": 8.19828641370869, "grad_norm": 0.9734671190130147, "learning_rate": 2.1356800585762564e-06, "loss": 0.5981, "step": 6698 }, { "epoch": 8.199510403916769, "grad_norm": 1.5994324357962955, "learning_rate": 2.1350460323653194e-06, "loss": 0.5245, "step": 6699 }, { "epoch": 8.200734394124847, "grad_norm": 1.682272062621435, "learning_rate": 2.134412030137809e-06, "loss": 0.5384, "step": 6700 }, { "epoch": 8.201958384332926, "grad_norm": 1.7850944820183734, "learning_rate": 2.133778051935387e-06, "loss": 0.4931, "step": 6701 }, { "epoch": 8.203182374541004, "grad_norm": 1.0821734833681462, "learning_rate": 2.1331440977997166e-06, "loss": 0.7508, "step": 6702 }, { "epoch": 8.204406364749081, "grad_norm": 2.5916685060296683, "learning_rate": 2.1325101677724585e-06, "loss": 0.5467, "step": 6703 }, { "epoch": 8.20563035495716, "grad_norm": 1.8478674357515412, "learning_rate": 2.131876261895274e-06, "loss": 0.9338, "step": 6704 }, { "epoch": 8.206854345165238, "grad_norm": 1.8155312794217595, "learning_rate": 2.13124238020982e-06, "loss": 0.7548, "step": 6705 }, { "epoch": 8.208078335373317, "grad_norm": 1.5256660612015858, "learning_rate": 2.130608522757752e-06, "loss": 0.4622, "step": 6706 }, { "epoch": 8.209302325581396, "grad_norm": 1.3451092969048304, "learning_rate": 2.129974689580727e-06, "loss": 0.7154, "step": 6707 }, { "epoch": 8.210526315789474, "grad_norm": 1.3987444071945088, "learning_rate": 2.129340880720395e-06, "loss": 0.6329, "step": 6708 }, { "epoch": 8.211750305997551, "grad_norm": 1.1621294693742426, "learning_rate": 2.128707096218411e-06, "loss": 0.754, "step": 6709 }, { "epoch": 8.21297429620563, "grad_norm": 1.1845142642246196, "learning_rate": 2.1280733361164224e-06, "loss": 0.5784, "step": 6710 }, { "epoch": 8.214198286413708, "grad_norm": 2.189692872493779, "learning_rate": 2.1274396004560788e-06, "loss": 0.4255, "step": 6711 }, { "epoch": 8.215422276621787, "grad_norm": 1.7069615360247032, "learning_rate": 2.126805889279026e-06, "loss": 0.4953, "step": 6712 }, { "epoch": 8.216646266829866, "grad_norm": 1.8455025414397845, "learning_rate": 2.1261722026269105e-06, "loss": 0.5336, "step": 6713 }, { "epoch": 8.217870257037944, "grad_norm": 2.215870657127642, "learning_rate": 2.1255385405413746e-06, "loss": 0.5535, "step": 6714 }, { "epoch": 8.219094247246023, "grad_norm": 1.8008465592282343, "learning_rate": 2.1249049030640614e-06, "loss": 0.7374, "step": 6715 }, { "epoch": 8.2203182374541, "grad_norm": 2.2221660810178148, "learning_rate": 2.12427129023661e-06, "loss": 0.6016, "step": 6716 }, { "epoch": 8.221542227662178, "grad_norm": 1.514802575350776, "learning_rate": 2.1236377021006598e-06, "loss": 0.6302, "step": 6717 }, { "epoch": 8.222766217870257, "grad_norm": 1.3686064572897325, "learning_rate": 2.123004138697849e-06, "loss": 0.5531, "step": 6718 }, { "epoch": 8.223990208078336, "grad_norm": 1.5506624409823542, "learning_rate": 2.1223706000698106e-06, "loss": 0.559, "step": 6719 }, { "epoch": 8.225214198286414, "grad_norm": 1.2619792515703245, "learning_rate": 2.12173708625818e-06, "loss": 0.6791, "step": 6720 }, { "epoch": 8.226438188494493, "grad_norm": 2.699893192131862, "learning_rate": 2.121103597304588e-06, "loss": 0.7501, "step": 6721 }, { "epoch": 8.22766217870257, "grad_norm": 1.8301710128733761, "learning_rate": 2.1204701332506674e-06, "loss": 0.4253, "step": 6722 }, { "epoch": 8.228886168910648, "grad_norm": 2.3546894013600017, "learning_rate": 2.119836694138045e-06, "loss": 0.4906, "step": 6723 }, { "epoch": 8.230110159118727, "grad_norm": 1.4132850068347453, "learning_rate": 2.11920328000835e-06, "loss": 0.6256, "step": 6724 }, { "epoch": 8.231334149326806, "grad_norm": 3.1660562586079974, "learning_rate": 2.1185698909032054e-06, "loss": 0.3519, "step": 6725 }, { "epoch": 8.232558139534884, "grad_norm": 2.030107706445244, "learning_rate": 2.1179365268642377e-06, "loss": 1.196, "step": 6726 }, { "epoch": 8.233782129742963, "grad_norm": 1.608322012543041, "learning_rate": 2.117303187933069e-06, "loss": 1.1044, "step": 6727 }, { "epoch": 8.23500611995104, "grad_norm": 1.4258471824206793, "learning_rate": 2.1166698741513184e-06, "loss": 0.5584, "step": 6728 }, { "epoch": 8.236230110159118, "grad_norm": 1.7014759508851782, "learning_rate": 2.116036585560605e-06, "loss": 1.3916, "step": 6729 }, { "epoch": 8.237454100367197, "grad_norm": 1.6110456457897848, "learning_rate": 2.115403322202548e-06, "loss": 0.6262, "step": 6730 }, { "epoch": 8.238678090575275, "grad_norm": 1.369057315777079, "learning_rate": 2.114770084118763e-06, "loss": 0.3673, "step": 6731 }, { "epoch": 8.239902080783354, "grad_norm": 1.5856641013252073, "learning_rate": 2.114136871350862e-06, "loss": 0.449, "step": 6732 }, { "epoch": 8.241126070991433, "grad_norm": 1.5896961609791502, "learning_rate": 2.1135036839404595e-06, "loss": 0.9894, "step": 6733 }, { "epoch": 8.24235006119951, "grad_norm": 1.1095919475049956, "learning_rate": 2.1128705219291636e-06, "loss": 0.487, "step": 6734 }, { "epoch": 8.243574051407588, "grad_norm": 1.771171627510569, "learning_rate": 2.1122373853585866e-06, "loss": 1.1139, "step": 6735 }, { "epoch": 8.244798041615667, "grad_norm": 2.384446093686923, "learning_rate": 2.1116042742703342e-06, "loss": 0.3326, "step": 6736 }, { "epoch": 8.246022031823745, "grad_norm": 1.4817463362080683, "learning_rate": 2.1109711887060124e-06, "loss": 0.5037, "step": 6737 }, { "epoch": 8.247246022031824, "grad_norm": 1.4327859517819457, "learning_rate": 2.1103381287072244e-06, "loss": 0.72, "step": 6738 }, { "epoch": 8.248470012239903, "grad_norm": 1.3495614381404395, "learning_rate": 2.1097050943155744e-06, "loss": 0.9724, "step": 6739 }, { "epoch": 8.249694002447981, "grad_norm": 1.2732064228089517, "learning_rate": 2.1090720855726613e-06, "loss": 0.7176, "step": 6740 }, { "epoch": 8.250917992656058, "grad_norm": 1.9495001343135345, "learning_rate": 2.108439102520085e-06, "loss": 0.678, "step": 6741 }, { "epoch": 8.252141982864137, "grad_norm": 2.245210237420504, "learning_rate": 2.107806145199442e-06, "loss": 0.4962, "step": 6742 }, { "epoch": 8.253365973072215, "grad_norm": 1.6548151967739417, "learning_rate": 2.1071732136523293e-06, "loss": 1.0057, "step": 6743 }, { "epoch": 8.254589963280294, "grad_norm": 2.193276884181038, "learning_rate": 2.1065403079203397e-06, "loss": 0.5579, "step": 6744 }, { "epoch": 8.255813953488373, "grad_norm": 2.348411526038012, "learning_rate": 2.1059074280450663e-06, "loss": 0.5225, "step": 6745 }, { "epoch": 8.257037943696451, "grad_norm": 2.5345396484067813, "learning_rate": 2.1052745740680988e-06, "loss": 0.5894, "step": 6746 }, { "epoch": 8.258261933904528, "grad_norm": 1.5737495522063873, "learning_rate": 2.1046417460310257e-06, "loss": 0.4743, "step": 6747 }, { "epoch": 8.259485924112607, "grad_norm": 1.0999047407078313, "learning_rate": 2.104008943975435e-06, "loss": 0.8891, "step": 6748 }, { "epoch": 8.260709914320685, "grad_norm": 2.3640012164651627, "learning_rate": 2.1033761679429125e-06, "loss": 0.6317, "step": 6749 }, { "epoch": 8.261933904528764, "grad_norm": 1.7448125991572676, "learning_rate": 2.1027434179750404e-06, "loss": 1.4651, "step": 6750 }, { "epoch": 8.263157894736842, "grad_norm": 2.082843579032236, "learning_rate": 2.1021106941134014e-06, "loss": 0.5306, "step": 6751 }, { "epoch": 8.264381884944921, "grad_norm": 1.285677467447035, "learning_rate": 2.1014779963995767e-06, "loss": 0.56, "step": 6752 }, { "epoch": 8.265605875152998, "grad_norm": 2.1964523325078935, "learning_rate": 2.1008453248751442e-06, "loss": 0.4775, "step": 6753 }, { "epoch": 8.266829865361077, "grad_norm": 1.6362339127780214, "learning_rate": 2.10021267958168e-06, "loss": 0.3189, "step": 6754 }, { "epoch": 8.268053855569155, "grad_norm": 1.0116216538068679, "learning_rate": 2.09958006056076e-06, "loss": 0.367, "step": 6755 }, { "epoch": 8.269277845777234, "grad_norm": 1.4266712203175176, "learning_rate": 2.098947467853957e-06, "loss": 0.498, "step": 6756 }, { "epoch": 8.270501835985312, "grad_norm": 1.3497383671407535, "learning_rate": 2.0983149015028433e-06, "loss": 1.1624, "step": 6757 }, { "epoch": 8.271725826193391, "grad_norm": 1.3987575352186479, "learning_rate": 2.0976823615489884e-06, "loss": 0.6565, "step": 6758 }, { "epoch": 8.27294981640147, "grad_norm": 1.4749288224159403, "learning_rate": 2.097049848033961e-06, "loss": 1.2508, "step": 6759 }, { "epoch": 8.274173806609546, "grad_norm": 2.0940035104170978, "learning_rate": 2.096417360999326e-06, "loss": 0.5891, "step": 6760 }, { "epoch": 8.275397796817625, "grad_norm": 1.6991679231192904, "learning_rate": 2.0957849004866503e-06, "loss": 0.4715, "step": 6761 }, { "epoch": 8.276621787025704, "grad_norm": 1.7989150696472913, "learning_rate": 2.0951524665374966e-06, "loss": 0.4668, "step": 6762 }, { "epoch": 8.277845777233782, "grad_norm": 1.431294075021027, "learning_rate": 2.0945200591934247e-06, "loss": 0.5813, "step": 6763 }, { "epoch": 8.279069767441861, "grad_norm": 1.4361816478408258, "learning_rate": 2.0938876784959932e-06, "loss": 0.6765, "step": 6764 }, { "epoch": 8.28029375764994, "grad_norm": 1.0359744260475334, "learning_rate": 2.0932553244867632e-06, "loss": 0.3745, "step": 6765 }, { "epoch": 8.281517747858016, "grad_norm": 1.8166112840470832, "learning_rate": 2.0926229972072884e-06, "loss": 0.3704, "step": 6766 }, { "epoch": 8.282741738066095, "grad_norm": 1.5874678889682423, "learning_rate": 2.0919906966991234e-06, "loss": 0.6657, "step": 6767 }, { "epoch": 8.283965728274174, "grad_norm": 1.3002318697371347, "learning_rate": 2.091358423003821e-06, "loss": 0.8721, "step": 6768 }, { "epoch": 8.285189718482252, "grad_norm": 2.072009868604084, "learning_rate": 2.0907261761629304e-06, "loss": 0.6825, "step": 6769 }, { "epoch": 8.286413708690331, "grad_norm": 1.951644430293242, "learning_rate": 2.0900939562180028e-06, "loss": 0.6618, "step": 6770 }, { "epoch": 8.28763769889841, "grad_norm": 1.7413664251402752, "learning_rate": 2.0894617632105846e-06, "loss": 0.6916, "step": 6771 }, { "epoch": 8.288861689106486, "grad_norm": 1.053326936700101, "learning_rate": 2.0888295971822204e-06, "loss": 0.62, "step": 6772 }, { "epoch": 8.290085679314565, "grad_norm": 1.914578599713639, "learning_rate": 2.088197458174453e-06, "loss": 1.1472, "step": 6773 }, { "epoch": 8.291309669522644, "grad_norm": 1.59381467019456, "learning_rate": 2.0875653462288272e-06, "loss": 0.6854, "step": 6774 }, { "epoch": 8.292533659730722, "grad_norm": 2.992613059046619, "learning_rate": 2.0869332613868807e-06, "loss": 0.4548, "step": 6775 }, { "epoch": 8.2937576499388, "grad_norm": 2.0344607930791074, "learning_rate": 2.0863012036901528e-06, "loss": 1.0239, "step": 6776 }, { "epoch": 8.29498164014688, "grad_norm": 2.3698672687306375, "learning_rate": 2.085669173180179e-06, "loss": 0.4126, "step": 6777 }, { "epoch": 8.296205630354958, "grad_norm": 1.4317860778082927, "learning_rate": 2.085037169898495e-06, "loss": 1.0913, "step": 6778 }, { "epoch": 8.297429620563035, "grad_norm": 2.112292106529208, "learning_rate": 2.0844051938866334e-06, "loss": 0.3344, "step": 6779 }, { "epoch": 8.298653610771114, "grad_norm": 2.0735705521766588, "learning_rate": 2.0837732451861254e-06, "loss": 0.6421, "step": 6780 }, { "epoch": 8.299877600979192, "grad_norm": 1.246949879564877, "learning_rate": 2.0831413238385005e-06, "loss": 0.7354, "step": 6781 }, { "epoch": 8.30110159118727, "grad_norm": 2.720861952136872, "learning_rate": 2.082509429885285e-06, "loss": 0.9224, "step": 6782 }, { "epoch": 8.30232558139535, "grad_norm": 1.7113489685599366, "learning_rate": 2.0818775633680057e-06, "loss": 0.3012, "step": 6783 }, { "epoch": 8.303549571603428, "grad_norm": 1.5961815147557212, "learning_rate": 2.081245724328187e-06, "loss": 0.5085, "step": 6784 }, { "epoch": 8.304773561811505, "grad_norm": 2.4260714758487714, "learning_rate": 2.0806139128073504e-06, "loss": 0.4103, "step": 6785 }, { "epoch": 8.305997552019583, "grad_norm": 2.7867483053975266, "learning_rate": 2.0799821288470155e-06, "loss": 0.3339, "step": 6786 }, { "epoch": 8.307221542227662, "grad_norm": 1.1644117937877905, "learning_rate": 2.079350372488702e-06, "loss": 1.0455, "step": 6787 }, { "epoch": 8.30844553243574, "grad_norm": 2.4071487147458277, "learning_rate": 2.078718643773926e-06, "loss": 0.4442, "step": 6788 }, { "epoch": 8.30966952264382, "grad_norm": 1.3280722816880743, "learning_rate": 2.0780869427442035e-06, "loss": 1.1564, "step": 6789 }, { "epoch": 8.310893512851898, "grad_norm": 1.2839188514873148, "learning_rate": 2.077455269441045e-06, "loss": 0.3658, "step": 6790 }, { "epoch": 8.312117503059975, "grad_norm": 2.0866053189124747, "learning_rate": 2.076823623905965e-06, "loss": 1.3442, "step": 6791 }, { "epoch": 8.313341493268053, "grad_norm": 1.6545847559824294, "learning_rate": 2.07619200618047e-06, "loss": 0.8894, "step": 6792 }, { "epoch": 8.314565483476132, "grad_norm": 1.4727434613282415, "learning_rate": 2.075560416306069e-06, "loss": 1.4901, "step": 6793 }, { "epoch": 8.31578947368421, "grad_norm": 2.605610783118542, "learning_rate": 2.074928854324268e-06, "loss": 0.4296, "step": 6794 }, { "epoch": 8.31701346389229, "grad_norm": 0.8857399561794008, "learning_rate": 2.0742973202765696e-06, "loss": 0.3487, "step": 6795 }, { "epoch": 8.318237454100368, "grad_norm": 2.101797883365381, "learning_rate": 2.0736658142044776e-06, "loss": 0.9794, "step": 6796 }, { "epoch": 8.319461444308445, "grad_norm": 1.0652121702932194, "learning_rate": 2.073034336149491e-06, "loss": 0.6028, "step": 6797 }, { "epoch": 8.320685434516523, "grad_norm": 1.3839643983213603, "learning_rate": 2.072402886153109e-06, "loss": 0.5129, "step": 6798 }, { "epoch": 8.321909424724602, "grad_norm": 1.2438367460226005, "learning_rate": 2.0717714642568264e-06, "loss": 0.6627, "step": 6799 }, { "epoch": 8.32313341493268, "grad_norm": 1.3737488812909973, "learning_rate": 2.0711400705021413e-06, "loss": 0.5324, "step": 6800 }, { "epoch": 8.32435740514076, "grad_norm": 1.774709631141625, "learning_rate": 2.0705087049305433e-06, "loss": 0.4884, "step": 6801 }, { "epoch": 8.325581395348838, "grad_norm": 1.0732136486811348, "learning_rate": 2.069877367583525e-06, "loss": 0.4756, "step": 6802 }, { "epoch": 8.326805385556916, "grad_norm": 1.0719627827164726, "learning_rate": 2.0692460585025743e-06, "loss": 0.6092, "step": 6803 }, { "epoch": 8.328029375764993, "grad_norm": 2.4598849394385995, "learning_rate": 2.0686147777291803e-06, "loss": 0.5021, "step": 6804 }, { "epoch": 8.329253365973072, "grad_norm": 1.425740124878504, "learning_rate": 2.0679835253048275e-06, "loss": 0.8715, "step": 6805 }, { "epoch": 8.33047735618115, "grad_norm": 1.6707733517804741, "learning_rate": 2.0673523012709997e-06, "loss": 1.1657, "step": 6806 }, { "epoch": 8.331701346389229, "grad_norm": 1.7249642200112596, "learning_rate": 2.0667211056691783e-06, "loss": 0.5078, "step": 6807 }, { "epoch": 8.332925336597308, "grad_norm": 1.453933640824531, "learning_rate": 2.066089938540842e-06, "loss": 0.3626, "step": 6808 }, { "epoch": 8.334149326805386, "grad_norm": 1.3321100873739509, "learning_rate": 2.0654587999274715e-06, "loss": 0.4366, "step": 6809 }, { "epoch": 8.335373317013463, "grad_norm": 1.1667372925937674, "learning_rate": 2.064827689870541e-06, "loss": 0.392, "step": 6810 }, { "epoch": 8.336597307221542, "grad_norm": 3.1394919175102993, "learning_rate": 2.064196608411525e-06, "loss": 0.4861, "step": 6811 }, { "epoch": 8.33782129742962, "grad_norm": 1.1134408590171994, "learning_rate": 2.0635655555918957e-06, "loss": 0.5081, "step": 6812 }, { "epoch": 8.339045287637699, "grad_norm": 1.8774082004774948, "learning_rate": 2.062934531453124e-06, "loss": 0.3019, "step": 6813 }, { "epoch": 8.340269277845778, "grad_norm": 1.5663781361641809, "learning_rate": 2.062303536036679e-06, "loss": 0.7362, "step": 6814 }, { "epoch": 8.341493268053856, "grad_norm": 1.9858781973250021, "learning_rate": 2.0616725693840257e-06, "loss": 0.5444, "step": 6815 }, { "epoch": 8.342717258261933, "grad_norm": 1.1553833772943796, "learning_rate": 2.06104163153663e-06, "loss": 0.5603, "step": 6816 }, { "epoch": 8.343941248470012, "grad_norm": 1.1608865636745855, "learning_rate": 2.060410722535955e-06, "loss": 0.6063, "step": 6817 }, { "epoch": 8.34516523867809, "grad_norm": 1.3641510293732337, "learning_rate": 2.0597798424234624e-06, "loss": 0.5947, "step": 6818 }, { "epoch": 8.346389228886169, "grad_norm": 1.6032515499589617, "learning_rate": 2.059148991240609e-06, "loss": 0.4921, "step": 6819 }, { "epoch": 8.347613219094248, "grad_norm": 2.144048809414431, "learning_rate": 2.0585181690288538e-06, "loss": 0.5399, "step": 6820 }, { "epoch": 8.348837209302326, "grad_norm": 1.9850893023138876, "learning_rate": 2.057887375829651e-06, "loss": 0.5216, "step": 6821 }, { "epoch": 8.350061199510403, "grad_norm": 2.227170451793553, "learning_rate": 2.0572566116844547e-06, "loss": 0.8738, "step": 6822 }, { "epoch": 8.351285189718482, "grad_norm": 2.0383547852124706, "learning_rate": 2.0566258766347167e-06, "loss": 1.1282, "step": 6823 }, { "epoch": 8.35250917992656, "grad_norm": 2.6014354088055485, "learning_rate": 2.0559951707218863e-06, "loss": 0.3638, "step": 6824 }, { "epoch": 8.353733170134639, "grad_norm": 1.2688177141995514, "learning_rate": 2.05536449398741e-06, "loss": 0.5223, "step": 6825 }, { "epoch": 8.354957160342718, "grad_norm": 1.682948828933325, "learning_rate": 2.054733846472735e-06, "loss": 0.9381, "step": 6826 }, { "epoch": 8.356181150550796, "grad_norm": 2.3556846243576572, "learning_rate": 2.054103228219306e-06, "loss": 0.9946, "step": 6827 }, { "epoch": 8.357405140758875, "grad_norm": 1.666616377118988, "learning_rate": 2.0534726392685627e-06, "loss": 0.6282, "step": 6828 }, { "epoch": 8.358629130966952, "grad_norm": 1.3707270378830119, "learning_rate": 2.052842079661945e-06, "loss": 1.2796, "step": 6829 }, { "epoch": 8.35985312117503, "grad_norm": 1.2083573403036485, "learning_rate": 2.0522115494408932e-06, "loss": 0.7671, "step": 6830 }, { "epoch": 8.361077111383109, "grad_norm": 1.1174026788841374, "learning_rate": 2.0515810486468416e-06, "loss": 0.5437, "step": 6831 }, { "epoch": 8.362301101591187, "grad_norm": 1.546058902351172, "learning_rate": 2.050950577321225e-06, "loss": 1.6447, "step": 6832 }, { "epoch": 8.363525091799266, "grad_norm": 1.252668060576506, "learning_rate": 2.050320135505476e-06, "loss": 0.6309, "step": 6833 }, { "epoch": 8.364749082007345, "grad_norm": 1.8654232756845972, "learning_rate": 2.049689723241023e-06, "loss": 0.3997, "step": 6834 }, { "epoch": 8.365973072215422, "grad_norm": 1.6231936388585335, "learning_rate": 2.0490593405692967e-06, "loss": 0.7407, "step": 6835 }, { "epoch": 8.3671970624235, "grad_norm": 1.3457571095214387, "learning_rate": 2.0484289875317236e-06, "loss": 1.0304, "step": 6836 }, { "epoch": 8.368421052631579, "grad_norm": 1.14068004119968, "learning_rate": 2.0477986641697263e-06, "loss": 0.5336, "step": 6837 }, { "epoch": 8.369645042839657, "grad_norm": 1.2752132172781216, "learning_rate": 2.0471683705247275e-06, "loss": 0.6511, "step": 6838 }, { "epoch": 8.370869033047736, "grad_norm": 2.1348581090097833, "learning_rate": 2.046538106638149e-06, "loss": 0.4558, "step": 6839 }, { "epoch": 8.372093023255815, "grad_norm": 1.4098613119270778, "learning_rate": 2.045907872551409e-06, "loss": 0.8978, "step": 6840 }, { "epoch": 8.373317013463891, "grad_norm": 2.4792318228813985, "learning_rate": 2.0452776683059246e-06, "loss": 0.4757, "step": 6841 }, { "epoch": 8.37454100367197, "grad_norm": 1.7922595657013023, "learning_rate": 2.0446474939431086e-06, "loss": 0.5988, "step": 6842 }, { "epoch": 8.375764993880049, "grad_norm": 3.2039573023020425, "learning_rate": 2.044017349504376e-06, "loss": 0.2865, "step": 6843 }, { "epoch": 8.376988984088127, "grad_norm": 1.656442882211272, "learning_rate": 2.0433872350311367e-06, "loss": 0.4233, "step": 6844 }, { "epoch": 8.378212974296206, "grad_norm": 1.7115662539335517, "learning_rate": 2.0427571505648e-06, "loss": 0.4689, "step": 6845 }, { "epoch": 8.379436964504285, "grad_norm": 1.597843038260663, "learning_rate": 2.0421270961467713e-06, "loss": 0.5481, "step": 6846 }, { "epoch": 8.380660954712361, "grad_norm": 1.8851945680330533, "learning_rate": 2.0414970718184557e-06, "loss": 0.4514, "step": 6847 }, { "epoch": 8.38188494492044, "grad_norm": 1.3415422344928152, "learning_rate": 2.0408670776212573e-06, "loss": 0.4068, "step": 6848 }, { "epoch": 8.383108935128519, "grad_norm": 2.4239336931726476, "learning_rate": 2.0402371135965765e-06, "loss": 0.68, "step": 6849 }, { "epoch": 8.384332925336597, "grad_norm": 1.0978519134438605, "learning_rate": 2.039607179785812e-06, "loss": 0.5674, "step": 6850 }, { "epoch": 8.385556915544676, "grad_norm": 1.1190642771781085, "learning_rate": 2.0389772762303604e-06, "loss": 0.6769, "step": 6851 }, { "epoch": 8.386780905752754, "grad_norm": 1.8389414534254926, "learning_rate": 2.0383474029716173e-06, "loss": 1.0561, "step": 6852 }, { "epoch": 8.388004895960833, "grad_norm": 1.9656203623550008, "learning_rate": 2.037717560050975e-06, "loss": 0.496, "step": 6853 }, { "epoch": 8.38922888616891, "grad_norm": 1.327016169674342, "learning_rate": 2.0370877475098262e-06, "loss": 0.5376, "step": 6854 }, { "epoch": 8.390452876376989, "grad_norm": 1.8536413053123195, "learning_rate": 2.036457965389556e-06, "loss": 0.3275, "step": 6855 }, { "epoch": 8.391676866585067, "grad_norm": 1.0021245001200034, "learning_rate": 2.035828213731556e-06, "loss": 0.5228, "step": 6856 }, { "epoch": 8.392900856793146, "grad_norm": 1.8241781320089239, "learning_rate": 2.035198492577208e-06, "loss": 0.6322, "step": 6857 }, { "epoch": 8.394124847001224, "grad_norm": 2.8210332212182134, "learning_rate": 2.0345688019678963e-06, "loss": 0.58, "step": 6858 }, { "epoch": 8.395348837209303, "grad_norm": 2.6151300744916948, "learning_rate": 2.033939141945001e-06, "loss": 0.5957, "step": 6859 }, { "epoch": 8.39657282741738, "grad_norm": 2.165004402826451, "learning_rate": 2.033309512549901e-06, "loss": 1.0409, "step": 6860 }, { "epoch": 8.397796817625458, "grad_norm": 1.7571171236797694, "learning_rate": 2.032679913823974e-06, "loss": 0.4679, "step": 6861 }, { "epoch": 8.399020807833537, "grad_norm": 1.9053920841864824, "learning_rate": 2.0320503458085956e-06, "loss": 0.5575, "step": 6862 }, { "epoch": 8.400244798041616, "grad_norm": 1.9773040311635894, "learning_rate": 2.031420808545137e-06, "loss": 0.5818, "step": 6863 }, { "epoch": 8.401468788249694, "grad_norm": 3.0110220403143764, "learning_rate": 2.0307913020749685e-06, "loss": 0.3926, "step": 6864 }, { "epoch": 8.402692778457773, "grad_norm": 1.0568991420469478, "learning_rate": 2.0301618264394617e-06, "loss": 0.3997, "step": 6865 }, { "epoch": 8.403916768665852, "grad_norm": 1.7182677194320992, "learning_rate": 2.0295323816799814e-06, "loss": 1.3688, "step": 6866 }, { "epoch": 8.405140758873928, "grad_norm": 1.4008665143296746, "learning_rate": 2.028902967837893e-06, "loss": 0.6216, "step": 6867 }, { "epoch": 8.406364749082007, "grad_norm": 2.4883738778729336, "learning_rate": 2.0282735849545583e-06, "loss": 1.1887, "step": 6868 }, { "epoch": 8.407588739290086, "grad_norm": 1.3358477596711156, "learning_rate": 2.0276442330713394e-06, "loss": 0.8498, "step": 6869 }, { "epoch": 8.408812729498164, "grad_norm": 1.602101616691074, "learning_rate": 2.0270149122295943e-06, "loss": 0.5024, "step": 6870 }, { "epoch": 8.410036719706243, "grad_norm": 1.7691185932609315, "learning_rate": 2.0263856224706805e-06, "loss": 0.3458, "step": 6871 }, { "epoch": 8.411260709914322, "grad_norm": 1.1145475603032757, "learning_rate": 2.0257563638359507e-06, "loss": 1.2042, "step": 6872 }, { "epoch": 8.412484700122398, "grad_norm": 1.18557478379905, "learning_rate": 2.025127136366758e-06, "loss": 0.7738, "step": 6873 }, { "epoch": 8.413708690330477, "grad_norm": 1.2968808701250418, "learning_rate": 2.0244979401044553e-06, "loss": 0.7448, "step": 6874 }, { "epoch": 8.414932680538556, "grad_norm": 1.0343370882932446, "learning_rate": 2.0238687750903883e-06, "loss": 0.5108, "step": 6875 }, { "epoch": 8.416156670746634, "grad_norm": 2.296990632500212, "learning_rate": 2.0232396413659043e-06, "loss": 0.5308, "step": 6876 }, { "epoch": 8.417380660954713, "grad_norm": 2.2444572686856623, "learning_rate": 2.022610538972347e-06, "loss": 0.4093, "step": 6877 }, { "epoch": 8.418604651162791, "grad_norm": 2.1598296098055196, "learning_rate": 2.02198146795106e-06, "loss": 0.5268, "step": 6878 }, { "epoch": 8.419828641370868, "grad_norm": 1.6325921715529566, "learning_rate": 2.0213524283433834e-06, "loss": 0.4158, "step": 6879 }, { "epoch": 8.421052631578947, "grad_norm": 1.8089362406027387, "learning_rate": 2.0207234201906546e-06, "loss": 1.3975, "step": 6880 }, { "epoch": 8.422276621787026, "grad_norm": 1.8612527890644999, "learning_rate": 2.0200944435342086e-06, "loss": 0.5012, "step": 6881 }, { "epoch": 8.423500611995104, "grad_norm": 1.9504900453712164, "learning_rate": 2.019465498415382e-06, "loss": 0.6228, "step": 6882 }, { "epoch": 8.424724602203183, "grad_norm": 2.6515030430435997, "learning_rate": 2.0188365848755057e-06, "loss": 0.4604, "step": 6883 }, { "epoch": 8.425948592411261, "grad_norm": 1.0143521450619137, "learning_rate": 2.0182077029559086e-06, "loss": 0.3916, "step": 6884 }, { "epoch": 8.427172582619338, "grad_norm": 1.7655041446453574, "learning_rate": 2.01757885269792e-06, "loss": 1.2224, "step": 6885 }, { "epoch": 8.428396572827417, "grad_norm": 0.9272933985207185, "learning_rate": 2.0169500341428637e-06, "loss": 0.4503, "step": 6886 }, { "epoch": 8.429620563035495, "grad_norm": 2.757552124711763, "learning_rate": 2.016321247332065e-06, "loss": 0.4304, "step": 6887 }, { "epoch": 8.430844553243574, "grad_norm": 1.8491804811230579, "learning_rate": 2.015692492306845e-06, "loss": 0.9155, "step": 6888 }, { "epoch": 8.432068543451653, "grad_norm": 2.0338262293146614, "learning_rate": 2.015063769108524e-06, "loss": 0.9725, "step": 6889 }, { "epoch": 8.433292533659731, "grad_norm": 1.324387009278543, "learning_rate": 2.0144350777784163e-06, "loss": 1.1346, "step": 6890 }, { "epoch": 8.43451652386781, "grad_norm": 1.0707878985759098, "learning_rate": 2.0138064183578412e-06, "loss": 0.6247, "step": 6891 }, { "epoch": 8.435740514075887, "grad_norm": 2.2611799007059346, "learning_rate": 2.0131777908881096e-06, "loss": 0.4719, "step": 6892 }, { "epoch": 8.436964504283965, "grad_norm": 1.6975677601065493, "learning_rate": 2.0125491954105324e-06, "loss": 1.0275, "step": 6893 }, { "epoch": 8.438188494492044, "grad_norm": 2.0861918018359993, "learning_rate": 2.011920631966419e-06, "loss": 0.5208, "step": 6894 }, { "epoch": 8.439412484700123, "grad_norm": 1.754974238545466, "learning_rate": 2.0112921005970768e-06, "loss": 0.6605, "step": 6895 }, { "epoch": 8.440636474908201, "grad_norm": 2.1497824673124355, "learning_rate": 2.0106636013438103e-06, "loss": 0.6481, "step": 6896 }, { "epoch": 8.44186046511628, "grad_norm": 2.4560190232931527, "learning_rate": 2.010035134247922e-06, "loss": 0.6869, "step": 6897 }, { "epoch": 8.443084455324357, "grad_norm": 1.367054637174487, "learning_rate": 2.0094066993507128e-06, "loss": 1.0083, "step": 6898 }, { "epoch": 8.444308445532435, "grad_norm": 2.667424874141127, "learning_rate": 2.0087782966934788e-06, "loss": 0.435, "step": 6899 }, { "epoch": 8.445532435740514, "grad_norm": 1.4306981438521915, "learning_rate": 2.0081499263175204e-06, "loss": 1.3167, "step": 6900 }, { "epoch": 8.446756425948593, "grad_norm": 1.4035655127370477, "learning_rate": 2.007521588264129e-06, "loss": 0.8143, "step": 6901 }, { "epoch": 8.447980416156671, "grad_norm": 1.0646452283768477, "learning_rate": 2.0068932825745973e-06, "loss": 0.5515, "step": 6902 }, { "epoch": 8.44920440636475, "grad_norm": 1.0594162101167974, "learning_rate": 2.006265009290214e-06, "loss": 0.5885, "step": 6903 }, { "epoch": 8.450428396572827, "grad_norm": 1.3454459498404339, "learning_rate": 2.0056367684522693e-06, "loss": 0.7009, "step": 6904 }, { "epoch": 8.451652386780905, "grad_norm": 1.731456978484616, "learning_rate": 2.0050085601020476e-06, "loss": 0.5149, "step": 6905 }, { "epoch": 8.452876376988984, "grad_norm": 2.1124859246293677, "learning_rate": 2.0043803842808323e-06, "loss": 0.3843, "step": 6906 }, { "epoch": 8.454100367197062, "grad_norm": 1.756268694537351, "learning_rate": 2.003752241029906e-06, "loss": 0.4338, "step": 6907 }, { "epoch": 8.455324357405141, "grad_norm": 1.5286467358429099, "learning_rate": 2.0031241303905445e-06, "loss": 0.942, "step": 6908 }, { "epoch": 8.45654834761322, "grad_norm": 1.3663857157089, "learning_rate": 2.002496052404029e-06, "loss": 0.5985, "step": 6909 }, { "epoch": 8.457772337821297, "grad_norm": 1.4107113093896, "learning_rate": 2.001868007111633e-06, "loss": 0.6929, "step": 6910 }, { "epoch": 8.458996328029375, "grad_norm": 1.6320947535621373, "learning_rate": 2.0012399945546283e-06, "loss": 1.8083, "step": 6911 }, { "epoch": 8.460220318237454, "grad_norm": 1.4744483172046594, "learning_rate": 2.0006120147742865e-06, "loss": 0.7232, "step": 6912 }, { "epoch": 8.461444308445532, "grad_norm": 2.00352122411292, "learning_rate": 1.9999840678118767e-06, "loss": 0.9811, "step": 6913 }, { "epoch": 8.462668298653611, "grad_norm": 1.7565456244570796, "learning_rate": 1.9993561537086637e-06, "loss": 1.0189, "step": 6914 }, { "epoch": 8.46389228886169, "grad_norm": 1.2860435004003699, "learning_rate": 1.998728272505913e-06, "loss": 0.5951, "step": 6915 }, { "epoch": 8.465116279069768, "grad_norm": 1.4516205440411636, "learning_rate": 1.9981004242448855e-06, "loss": 0.9199, "step": 6916 }, { "epoch": 8.466340269277845, "grad_norm": 1.0543644906493497, "learning_rate": 1.997472608966842e-06, "loss": 0.6671, "step": 6917 }, { "epoch": 8.467564259485924, "grad_norm": 2.362233999394828, "learning_rate": 1.996844826713041e-06, "loss": 0.5139, "step": 6918 }, { "epoch": 8.468788249694002, "grad_norm": 1.6159726855292025, "learning_rate": 1.9962170775247365e-06, "loss": 0.4396, "step": 6919 }, { "epoch": 8.470012239902081, "grad_norm": 2.017775189588905, "learning_rate": 1.995589361443182e-06, "loss": 0.3846, "step": 6920 }, { "epoch": 8.47123623011016, "grad_norm": 2.1827549794974397, "learning_rate": 1.994961678509628e-06, "loss": 0.4734, "step": 6921 }, { "epoch": 8.472460220318238, "grad_norm": 1.3248508693903187, "learning_rate": 1.9943340287653255e-06, "loss": 0.4416, "step": 6922 }, { "epoch": 8.473684210526315, "grad_norm": 1.9989058329621847, "learning_rate": 1.9937064122515205e-06, "loss": 0.8537, "step": 6923 }, { "epoch": 8.474908200734394, "grad_norm": 1.6317226144446266, "learning_rate": 1.993078829009457e-06, "loss": 0.8315, "step": 6924 }, { "epoch": 8.476132190942472, "grad_norm": 2.4504369021737, "learning_rate": 1.9924512790803775e-06, "loss": 0.7389, "step": 6925 }, { "epoch": 8.477356181150551, "grad_norm": 0.7875174469814209, "learning_rate": 1.991823762505523e-06, "loss": 0.4365, "step": 6926 }, { "epoch": 8.47858017135863, "grad_norm": 2.1042315666532097, "learning_rate": 1.991196279326132e-06, "loss": 0.5648, "step": 6927 }, { "epoch": 8.479804161566708, "grad_norm": 1.8001978364625342, "learning_rate": 1.990568829583439e-06, "loss": 0.4665, "step": 6928 }, { "epoch": 8.481028151774785, "grad_norm": 1.9621950415326832, "learning_rate": 1.989941413318678e-06, "loss": 1.2034, "step": 6929 }, { "epoch": 8.482252141982864, "grad_norm": 1.1252543620823954, "learning_rate": 1.9893140305730805e-06, "loss": 0.6317, "step": 6930 }, { "epoch": 8.483476132190942, "grad_norm": 1.082171300932542, "learning_rate": 1.9886866813878767e-06, "loss": 0.563, "step": 6931 }, { "epoch": 8.48470012239902, "grad_norm": 2.2937667187947244, "learning_rate": 1.988059365804293e-06, "loss": 0.5672, "step": 6932 }, { "epoch": 8.4859241126071, "grad_norm": 1.9160811262149635, "learning_rate": 1.9874320838635537e-06, "loss": 1.0121, "step": 6933 }, { "epoch": 8.487148102815178, "grad_norm": 1.270493485851037, "learning_rate": 1.9868048356068818e-06, "loss": 0.5519, "step": 6934 }, { "epoch": 8.488372093023255, "grad_norm": 2.1302149793885845, "learning_rate": 1.9861776210754988e-06, "loss": 0.8275, "step": 6935 }, { "epoch": 8.489596083231334, "grad_norm": 1.4841212997947781, "learning_rate": 1.985550440310622e-06, "loss": 1.1688, "step": 6936 }, { "epoch": 8.490820073439412, "grad_norm": 2.491020025593202, "learning_rate": 1.9849232933534672e-06, "loss": 0.6713, "step": 6937 }, { "epoch": 8.49204406364749, "grad_norm": 1.5403835970613065, "learning_rate": 1.9842961802452477e-06, "loss": 0.4644, "step": 6938 }, { "epoch": 8.49326805385557, "grad_norm": 1.5383925625316852, "learning_rate": 1.983669101027177e-06, "loss": 0.4793, "step": 6939 }, { "epoch": 8.494492044063648, "grad_norm": 1.4579316438762524, "learning_rate": 1.9830420557404623e-06, "loss": 2.2863, "step": 6940 }, { "epoch": 8.495716034271727, "grad_norm": 1.4960720306087048, "learning_rate": 1.982415044426312e-06, "loss": 0.483, "step": 6941 }, { "epoch": 8.496940024479803, "grad_norm": 1.4837610998494806, "learning_rate": 1.98178806712593e-06, "loss": 0.4806, "step": 6942 }, { "epoch": 8.498164014687882, "grad_norm": 1.7611504411036183, "learning_rate": 1.9811611238805203e-06, "loss": 0.743, "step": 6943 }, { "epoch": 8.49938800489596, "grad_norm": 1.524582252481321, "learning_rate": 1.980534214731282e-06, "loss": 0.4711, "step": 6944 }, { "epoch": 8.50061199510404, "grad_norm": 1.215590223477337, "learning_rate": 1.9799073397194142e-06, "loss": 0.9796, "step": 6945 }, { "epoch": 8.501835985312118, "grad_norm": 1.4353806036948056, "learning_rate": 1.979280498886112e-06, "loss": 0.4796, "step": 6946 }, { "epoch": 8.503059975520197, "grad_norm": 1.6490080261176043, "learning_rate": 1.9786536922725692e-06, "loss": 0.6383, "step": 6947 }, { "epoch": 8.504283965728273, "grad_norm": 1.6226611269231428, "learning_rate": 1.9780269199199776e-06, "loss": 0.531, "step": 6948 }, { "epoch": 8.505507955936352, "grad_norm": 1.5774658530954, "learning_rate": 1.9774001818695264e-06, "loss": 1.3392, "step": 6949 }, { "epoch": 8.50673194614443, "grad_norm": 2.510875266684185, "learning_rate": 1.976773478162402e-06, "loss": 0.6027, "step": 6950 }, { "epoch": 8.50795593635251, "grad_norm": 1.7567774012251876, "learning_rate": 1.976146808839789e-06, "loss": 0.4946, "step": 6951 }, { "epoch": 8.509179926560588, "grad_norm": 2.7038831977134774, "learning_rate": 1.9755201739428707e-06, "loss": 0.4625, "step": 6952 }, { "epoch": 8.510403916768666, "grad_norm": 1.6297699448061456, "learning_rate": 1.9748935735128266e-06, "loss": 1.4047, "step": 6953 }, { "epoch": 8.511627906976745, "grad_norm": 2.9871413412687002, "learning_rate": 1.9742670075908353e-06, "loss": 0.6046, "step": 6954 }, { "epoch": 8.512851897184822, "grad_norm": 1.309861934293757, "learning_rate": 1.97364047621807e-06, "loss": 0.6985, "step": 6955 }, { "epoch": 8.5140758873929, "grad_norm": 1.2475183849935356, "learning_rate": 1.973013979435707e-06, "loss": 0.7786, "step": 6956 }, { "epoch": 8.51529987760098, "grad_norm": 1.683720266602124, "learning_rate": 1.972387517284916e-06, "loss": 1.4118, "step": 6957 }, { "epoch": 8.516523867809058, "grad_norm": 1.4700894603449188, "learning_rate": 1.9717610898068655e-06, "loss": 0.5771, "step": 6958 }, { "epoch": 8.517747858017136, "grad_norm": 1.5546460629328276, "learning_rate": 1.9711346970427227e-06, "loss": 1.2366, "step": 6959 }, { "epoch": 8.518971848225215, "grad_norm": 1.0445972967224164, "learning_rate": 1.970508339033651e-06, "loss": 0.6086, "step": 6960 }, { "epoch": 8.520195838433292, "grad_norm": 2.5897319070992877, "learning_rate": 1.969882015820813e-06, "loss": 0.3774, "step": 6961 }, { "epoch": 8.52141982864137, "grad_norm": 1.0624617437695234, "learning_rate": 1.969255727445368e-06, "loss": 0.5993, "step": 6962 }, { "epoch": 8.522643818849449, "grad_norm": 2.1278017626407775, "learning_rate": 1.968629473948474e-06, "loss": 0.3612, "step": 6963 }, { "epoch": 8.523867809057528, "grad_norm": 1.0893886143397888, "learning_rate": 1.9680032553712847e-06, "loss": 0.4488, "step": 6964 }, { "epoch": 8.525091799265606, "grad_norm": 1.2448772669332806, "learning_rate": 1.967377071754955e-06, "loss": 0.7217, "step": 6965 }, { "epoch": 8.526315789473685, "grad_norm": 1.8734994611788545, "learning_rate": 1.9667509231406332e-06, "loss": 1.0447, "step": 6966 }, { "epoch": 8.527539779681762, "grad_norm": 1.8564841346378953, "learning_rate": 1.966124809569469e-06, "loss": 0.5485, "step": 6967 }, { "epoch": 8.52876376988984, "grad_norm": 1.864999442228352, "learning_rate": 1.9654987310826067e-06, "loss": 0.4253, "step": 6968 }, { "epoch": 8.529987760097919, "grad_norm": 2.2516677305735264, "learning_rate": 1.964872687721191e-06, "loss": 0.5116, "step": 6969 }, { "epoch": 8.531211750305998, "grad_norm": 1.4414879105935106, "learning_rate": 1.964246679526364e-06, "loss": 0.412, "step": 6970 }, { "epoch": 8.532435740514076, "grad_norm": 1.5236935745854983, "learning_rate": 1.963620706539263e-06, "loss": 0.5546, "step": 6971 }, { "epoch": 8.533659730722155, "grad_norm": 2.0264787254515237, "learning_rate": 1.9629947688010253e-06, "loss": 0.5853, "step": 6972 }, { "epoch": 8.534883720930232, "grad_norm": 1.2899406145375123, "learning_rate": 1.962368866352784e-06, "loss": 1.5729, "step": 6973 }, { "epoch": 8.53610771113831, "grad_norm": 0.9917855599707611, "learning_rate": 1.9617429992356742e-06, "loss": 0.6343, "step": 6974 }, { "epoch": 8.537331701346389, "grad_norm": 2.13549473108488, "learning_rate": 1.9611171674908224e-06, "loss": 0.3213, "step": 6975 }, { "epoch": 8.538555691554468, "grad_norm": 1.5879604412863948, "learning_rate": 1.9604913711593577e-06, "loss": 0.9172, "step": 6976 }, { "epoch": 8.539779681762546, "grad_norm": 1.190114060866767, "learning_rate": 1.959865610282404e-06, "loss": 0.59, "step": 6977 }, { "epoch": 8.541003671970625, "grad_norm": 1.5341181966222488, "learning_rate": 1.9592398849010847e-06, "loss": 0.5287, "step": 6978 }, { "epoch": 8.542227662178703, "grad_norm": 1.7855740180502353, "learning_rate": 1.9586141950565205e-06, "loss": 1.0007, "step": 6979 }, { "epoch": 8.54345165238678, "grad_norm": 1.2107515043912824, "learning_rate": 1.957988540789829e-06, "loss": 0.5877, "step": 6980 }, { "epoch": 8.544675642594859, "grad_norm": 1.662352643095449, "learning_rate": 1.9573629221421246e-06, "loss": 0.5871, "step": 6981 }, { "epoch": 8.545899632802938, "grad_norm": 1.5743118417253978, "learning_rate": 1.956737339154523e-06, "loss": 0.5251, "step": 6982 }, { "epoch": 8.547123623011016, "grad_norm": 1.3388893345113348, "learning_rate": 1.9561117918681345e-06, "loss": 0.6286, "step": 6983 }, { "epoch": 8.548347613219095, "grad_norm": 1.6956677347821356, "learning_rate": 1.9554862803240667e-06, "loss": 0.593, "step": 6984 }, { "epoch": 8.549571603427173, "grad_norm": 1.7156477545231066, "learning_rate": 1.9548608045634267e-06, "loss": 0.5515, "step": 6985 }, { "epoch": 8.55079559363525, "grad_norm": 1.3581593877980909, "learning_rate": 1.9542353646273175e-06, "loss": 0.801, "step": 6986 }, { "epoch": 8.552019583843329, "grad_norm": 1.9786886043800822, "learning_rate": 1.9536099605568424e-06, "loss": 0.6175, "step": 6987 }, { "epoch": 8.553243574051407, "grad_norm": 1.0846975166652442, "learning_rate": 1.9529845923930997e-06, "loss": 0.505, "step": 6988 }, { "epoch": 8.554467564259486, "grad_norm": 1.6004140589306965, "learning_rate": 1.9523592601771863e-06, "loss": 1.5618, "step": 6989 }, { "epoch": 8.555691554467565, "grad_norm": 0.9480513133771694, "learning_rate": 1.9517339639501956e-06, "loss": 0.5154, "step": 6990 }, { "epoch": 8.556915544675643, "grad_norm": 1.1870889910233422, "learning_rate": 1.951108703753222e-06, "loss": 0.5246, "step": 6991 }, { "epoch": 8.55813953488372, "grad_norm": 2.397477385560184, "learning_rate": 1.9504834796273546e-06, "loss": 0.351, "step": 6992 }, { "epoch": 8.559363525091799, "grad_norm": 1.625215679643291, "learning_rate": 1.9498582916136798e-06, "loss": 0.4841, "step": 6993 }, { "epoch": 8.560587515299877, "grad_norm": 1.8104213337107102, "learning_rate": 1.949233139753283e-06, "loss": 0.5035, "step": 6994 }, { "epoch": 8.561811505507956, "grad_norm": 2.9058880199525237, "learning_rate": 1.9486080240872475e-06, "loss": 0.4709, "step": 6995 }, { "epoch": 8.563035495716035, "grad_norm": 1.3794313143383858, "learning_rate": 1.9479829446566528e-06, "loss": 0.3404, "step": 6996 }, { "epoch": 8.564259485924113, "grad_norm": 1.5714645157234262, "learning_rate": 1.9473579015025775e-06, "loss": 1.0895, "step": 6997 }, { "epoch": 8.56548347613219, "grad_norm": 1.4184598548313632, "learning_rate": 1.9467328946660974e-06, "loss": 0.3293, "step": 6998 }, { "epoch": 8.566707466340269, "grad_norm": 1.1664649432051177, "learning_rate": 1.9461079241882834e-06, "loss": 0.8242, "step": 6999 }, { "epoch": 8.567931456548347, "grad_norm": 2.127052968088281, "learning_rate": 1.9454829901102093e-06, "loss": 0.3611, "step": 7000 }, { "epoch": 8.569155446756426, "grad_norm": 1.5360334414169912, "learning_rate": 1.944858092472942e-06, "loss": 0.6065, "step": 7001 }, { "epoch": 8.570379436964505, "grad_norm": 2.0429365663111962, "learning_rate": 1.944233231317548e-06, "loss": 0.5486, "step": 7002 }, { "epoch": 8.571603427172583, "grad_norm": 2.2481252036061226, "learning_rate": 1.943608406685089e-06, "loss": 0.3751, "step": 7003 }, { "epoch": 8.572827417380662, "grad_norm": 1.4224850982577077, "learning_rate": 1.942983618616628e-06, "loss": 0.5685, "step": 7004 }, { "epoch": 8.574051407588739, "grad_norm": 2.027932625121946, "learning_rate": 1.942358867153224e-06, "loss": 0.3129, "step": 7005 }, { "epoch": 8.575275397796817, "grad_norm": 2.080973410913841, "learning_rate": 1.941734152335932e-06, "loss": 0.9716, "step": 7006 }, { "epoch": 8.576499388004896, "grad_norm": 1.3208582773207311, "learning_rate": 1.9411094742058063e-06, "loss": 0.6843, "step": 7007 }, { "epoch": 8.577723378212974, "grad_norm": 1.4115617657123003, "learning_rate": 1.9404848328039e-06, "loss": 0.6174, "step": 7008 }, { "epoch": 8.578947368421053, "grad_norm": 1.699165740286584, "learning_rate": 1.9398602281712607e-06, "loss": 0.5551, "step": 7009 }, { "epoch": 8.580171358629132, "grad_norm": 1.3790738655203627, "learning_rate": 1.939235660348935e-06, "loss": 1.4145, "step": 7010 }, { "epoch": 8.581395348837209, "grad_norm": 1.2866794876177157, "learning_rate": 1.9386111293779673e-06, "loss": 0.4922, "step": 7011 }, { "epoch": 8.582619339045287, "grad_norm": 1.2993168358965523, "learning_rate": 1.9379866352993997e-06, "loss": 0.4566, "step": 7012 }, { "epoch": 8.583843329253366, "grad_norm": 1.6774155201444798, "learning_rate": 1.9373621781542723e-06, "loss": 1.5073, "step": 7013 }, { "epoch": 8.585067319461444, "grad_norm": 1.7034041641804865, "learning_rate": 1.9367377579836213e-06, "loss": 1.0151, "step": 7014 }, { "epoch": 8.586291309669523, "grad_norm": 2.29651393733562, "learning_rate": 1.9361133748284812e-06, "loss": 0.4649, "step": 7015 }, { "epoch": 8.587515299877602, "grad_norm": 1.0265112025799277, "learning_rate": 1.9354890287298844e-06, "loss": 0.5127, "step": 7016 }, { "epoch": 8.588739290085678, "grad_norm": 1.4923692336409133, "learning_rate": 1.9348647197288608e-06, "loss": 0.5312, "step": 7017 }, { "epoch": 8.589963280293757, "grad_norm": 1.9711269523781725, "learning_rate": 1.9342404478664384e-06, "loss": 0.5563, "step": 7018 }, { "epoch": 8.591187270501836, "grad_norm": 1.224187828098308, "learning_rate": 1.9336162131836404e-06, "loss": 0.9876, "step": 7019 }, { "epoch": 8.592411260709914, "grad_norm": 1.1686508853068271, "learning_rate": 1.932992015721489e-06, "loss": 0.4749, "step": 7020 }, { "epoch": 8.593635250917993, "grad_norm": 1.743588962380306, "learning_rate": 1.9323678555210065e-06, "loss": 0.6186, "step": 7021 }, { "epoch": 8.594859241126072, "grad_norm": 1.5264901715173755, "learning_rate": 1.9317437326232086e-06, "loss": 0.3936, "step": 7022 }, { "epoch": 8.596083231334148, "grad_norm": 2.886225193117928, "learning_rate": 1.931119647069111e-06, "loss": 0.5895, "step": 7023 }, { "epoch": 8.597307221542227, "grad_norm": 3.2434324171743123, "learning_rate": 1.930495598899726e-06, "loss": 0.3668, "step": 7024 }, { "epoch": 8.598531211750306, "grad_norm": 1.412267348022248, "learning_rate": 1.9298715881560626e-06, "loss": 0.7561, "step": 7025 }, { "epoch": 8.599755201958384, "grad_norm": 1.4999659306686985, "learning_rate": 1.929247614879131e-06, "loss": 0.4722, "step": 7026 }, { "epoch": 8.600979192166463, "grad_norm": 1.2722238927932918, "learning_rate": 1.9286236791099352e-06, "loss": 0.47, "step": 7027 }, { "epoch": 8.602203182374542, "grad_norm": 2.8601166296548732, "learning_rate": 1.9279997808894774e-06, "loss": 0.387, "step": 7028 }, { "epoch": 8.60342717258262, "grad_norm": 2.988162430683071, "learning_rate": 1.927375920258757e-06, "loss": 0.3655, "step": 7029 }, { "epoch": 8.604651162790697, "grad_norm": 1.9041961352603067, "learning_rate": 1.9267520972587754e-06, "loss": 0.5144, "step": 7030 }, { "epoch": 8.605875152998776, "grad_norm": 1.0004572197871546, "learning_rate": 1.9261283119305245e-06, "loss": 0.5173, "step": 7031 }, { "epoch": 8.607099143206854, "grad_norm": 2.871630921248742, "learning_rate": 1.925504564314999e-06, "loss": 0.3169, "step": 7032 }, { "epoch": 8.608323133414933, "grad_norm": 2.3952329199061473, "learning_rate": 1.9248808544531874e-06, "loss": 0.5188, "step": 7033 }, { "epoch": 8.609547123623011, "grad_norm": 1.9647601353609718, "learning_rate": 1.9242571823860796e-06, "loss": 0.44, "step": 7034 }, { "epoch": 8.61077111383109, "grad_norm": 1.9466796840945508, "learning_rate": 1.9236335481546604e-06, "loss": 1.1592, "step": 7035 }, { "epoch": 8.611995104039167, "grad_norm": 1.014029076227133, "learning_rate": 1.9230099517999128e-06, "loss": 0.5347, "step": 7036 }, { "epoch": 8.613219094247246, "grad_norm": 1.7815667834562627, "learning_rate": 1.922386393362817e-06, "loss": 0.7368, "step": 7037 }, { "epoch": 8.614443084455324, "grad_norm": 1.4100560140789216, "learning_rate": 1.92176287288435e-06, "loss": 1.2198, "step": 7038 }, { "epoch": 8.615667074663403, "grad_norm": 1.4510869135240154, "learning_rate": 1.9211393904054887e-06, "loss": 0.4113, "step": 7039 }, { "epoch": 8.616891064871481, "grad_norm": 2.504149132648787, "learning_rate": 1.9205159459672055e-06, "loss": 0.8726, "step": 7040 }, { "epoch": 8.61811505507956, "grad_norm": 1.4407555093044733, "learning_rate": 1.9198925396104716e-06, "loss": 0.6108, "step": 7041 }, { "epoch": 8.619339045287639, "grad_norm": 2.0026325128611013, "learning_rate": 1.9192691713762534e-06, "loss": 0.7784, "step": 7042 }, { "epoch": 8.620563035495715, "grad_norm": 2.2659650231701973, "learning_rate": 1.9186458413055183e-06, "loss": 0.6164, "step": 7043 }, { "epoch": 8.621787025703794, "grad_norm": 2.810175391743407, "learning_rate": 1.9180225494392273e-06, "loss": 0.9746, "step": 7044 }, { "epoch": 8.623011015911873, "grad_norm": 1.8352839941725678, "learning_rate": 1.917399295818343e-06, "loss": 0.734, "step": 7045 }, { "epoch": 8.624235006119951, "grad_norm": 1.4823016670195748, "learning_rate": 1.91677608048382e-06, "loss": 1.1678, "step": 7046 }, { "epoch": 8.62545899632803, "grad_norm": 2.584698146955467, "learning_rate": 1.916152903476618e-06, "loss": 0.9663, "step": 7047 }, { "epoch": 8.626682986536107, "grad_norm": 1.6889411017291764, "learning_rate": 1.915529764837687e-06, "loss": 0.6955, "step": 7048 }, { "epoch": 8.627906976744185, "grad_norm": 1.4381027543449347, "learning_rate": 1.914906664607978e-06, "loss": 1.5759, "step": 7049 }, { "epoch": 8.629130966952264, "grad_norm": 1.2981824706348752, "learning_rate": 1.9142836028284385e-06, "loss": 0.5107, "step": 7050 }, { "epoch": 8.630354957160343, "grad_norm": 1.8903078351810934, "learning_rate": 1.913660579540014e-06, "loss": 0.6041, "step": 7051 }, { "epoch": 8.631578947368421, "grad_norm": 2.0930948149148474, "learning_rate": 1.913037594783648e-06, "loss": 0.5586, "step": 7052 }, { "epoch": 8.6328029375765, "grad_norm": 2.324881792910832, "learning_rate": 1.9124146486002803e-06, "loss": 0.6171, "step": 7053 }, { "epoch": 8.634026927784578, "grad_norm": 1.080089636661796, "learning_rate": 1.911791741030849e-06, "loss": 0.5399, "step": 7054 }, { "epoch": 8.635250917992655, "grad_norm": 1.1850243173915376, "learning_rate": 1.9111688721162872e-06, "loss": 0.5978, "step": 7055 }, { "epoch": 8.636474908200734, "grad_norm": 1.6580047350907374, "learning_rate": 1.9105460418975307e-06, "loss": 0.9607, "step": 7056 }, { "epoch": 8.637698898408813, "grad_norm": 1.4309727783931683, "learning_rate": 1.909923250415508e-06, "loss": 0.9225, "step": 7057 }, { "epoch": 8.638922888616891, "grad_norm": 1.1760131016164397, "learning_rate": 1.909300497711146e-06, "loss": 0.5133, "step": 7058 }, { "epoch": 8.64014687882497, "grad_norm": 0.9437675387366303, "learning_rate": 1.9086777838253714e-06, "loss": 0.5897, "step": 7059 }, { "epoch": 8.641370869033048, "grad_norm": 2.1794286002795964, "learning_rate": 1.9080551087991046e-06, "loss": 0.6794, "step": 7060 }, { "epoch": 8.642594859241125, "grad_norm": 1.8299791489657058, "learning_rate": 1.907432472673267e-06, "loss": 0.3692, "step": 7061 }, { "epoch": 8.643818849449204, "grad_norm": 3.041001436777335, "learning_rate": 1.906809875488776e-06, "loss": 0.6994, "step": 7062 }, { "epoch": 8.645042839657282, "grad_norm": 1.3097175843823354, "learning_rate": 1.906187317286546e-06, "loss": 0.6825, "step": 7063 }, { "epoch": 8.646266829865361, "grad_norm": 2.3533279964344627, "learning_rate": 1.9055647981074879e-06, "loss": 0.866, "step": 7064 }, { "epoch": 8.64749082007344, "grad_norm": 1.6535118934937274, "learning_rate": 1.904942317992514e-06, "loss": 0.9946, "step": 7065 }, { "epoch": 8.648714810281518, "grad_norm": 2.208882134733513, "learning_rate": 1.9043198769825294e-06, "loss": 0.5469, "step": 7066 }, { "epoch": 8.649938800489597, "grad_norm": 2.1936552903031044, "learning_rate": 1.9036974751184395e-06, "loss": 0.4327, "step": 7067 }, { "epoch": 8.651162790697674, "grad_norm": 1.8162583812433646, "learning_rate": 1.903075112441145e-06, "loss": 0.5547, "step": 7068 }, { "epoch": 8.652386780905752, "grad_norm": 1.5089522817213852, "learning_rate": 1.9024527889915472e-06, "loss": 1.0206, "step": 7069 }, { "epoch": 8.653610771113831, "grad_norm": 0.881211574787423, "learning_rate": 1.9018305048105418e-06, "loss": 0.4433, "step": 7070 }, { "epoch": 8.65483476132191, "grad_norm": 1.9622241437706243, "learning_rate": 1.901208259939023e-06, "loss": 0.6464, "step": 7071 }, { "epoch": 8.656058751529988, "grad_norm": 1.546410703153546, "learning_rate": 1.9005860544178834e-06, "loss": 0.9335, "step": 7072 }, { "epoch": 8.657282741738067, "grad_norm": 1.6934828056242963, "learning_rate": 1.8999638882880095e-06, "loss": 0.4794, "step": 7073 }, { "epoch": 8.658506731946144, "grad_norm": 1.448509313676436, "learning_rate": 1.8993417615902909e-06, "loss": 0.5971, "step": 7074 }, { "epoch": 8.659730722154222, "grad_norm": 1.3590215867979911, "learning_rate": 1.8987196743656096e-06, "loss": 0.7269, "step": 7075 }, { "epoch": 8.660954712362301, "grad_norm": 1.2993708905062111, "learning_rate": 1.8980976266548473e-06, "loss": 0.55, "step": 7076 }, { "epoch": 8.66217870257038, "grad_norm": 2.5270778953146955, "learning_rate": 1.8974756184988824e-06, "loss": 0.5117, "step": 7077 }, { "epoch": 8.663402692778458, "grad_norm": 1.832246629659369, "learning_rate": 1.8968536499385914e-06, "loss": 0.5853, "step": 7078 }, { "epoch": 8.664626682986537, "grad_norm": 2.0322226639541983, "learning_rate": 1.8962317210148482e-06, "loss": 0.5814, "step": 7079 }, { "epoch": 8.665850673194614, "grad_norm": 1.5499105365047605, "learning_rate": 1.8956098317685226e-06, "loss": 1.1808, "step": 7080 }, { "epoch": 8.667074663402692, "grad_norm": 2.5478562895980934, "learning_rate": 1.8949879822404831e-06, "loss": 0.4527, "step": 7081 }, { "epoch": 8.668298653610771, "grad_norm": 2.14225805637489, "learning_rate": 1.894366172471596e-06, "loss": 0.5423, "step": 7082 }, { "epoch": 8.66952264381885, "grad_norm": 1.6925124744282007, "learning_rate": 1.8937444025027245e-06, "loss": 0.7158, "step": 7083 }, { "epoch": 8.670746634026928, "grad_norm": 2.3335373341558423, "learning_rate": 1.8931226723747282e-06, "loss": 0.985, "step": 7084 }, { "epoch": 8.671970624235007, "grad_norm": 2.001227721487262, "learning_rate": 1.892500982128465e-06, "loss": 0.524, "step": 7085 }, { "epoch": 8.673194614443084, "grad_norm": 2.414808583681293, "learning_rate": 1.8918793318047896e-06, "loss": 0.3725, "step": 7086 }, { "epoch": 8.674418604651162, "grad_norm": 1.0461912660472263, "learning_rate": 1.891257721444556e-06, "loss": 0.5416, "step": 7087 }, { "epoch": 8.67564259485924, "grad_norm": 1.956498776744119, "learning_rate": 1.8906361510886135e-06, "loss": 0.3937, "step": 7088 }, { "epoch": 8.67686658506732, "grad_norm": 2.2709347688317894, "learning_rate": 1.8900146207778094e-06, "loss": 0.4722, "step": 7089 }, { "epoch": 8.678090575275398, "grad_norm": 1.3307463236237826, "learning_rate": 1.8893931305529867e-06, "loss": 0.7428, "step": 7090 }, { "epoch": 8.679314565483477, "grad_norm": 2.222697852002743, "learning_rate": 1.8887716804549903e-06, "loss": 0.484, "step": 7091 }, { "epoch": 8.680538555691555, "grad_norm": 1.9121330632124758, "learning_rate": 1.8881502705246586e-06, "loss": 0.663, "step": 7092 }, { "epoch": 8.681762545899632, "grad_norm": 1.5120190095954609, "learning_rate": 1.8875289008028275e-06, "loss": 0.5053, "step": 7093 }, { "epoch": 8.68298653610771, "grad_norm": 1.3906416561849189, "learning_rate": 1.8869075713303313e-06, "loss": 0.5087, "step": 7094 }, { "epoch": 8.68421052631579, "grad_norm": 1.731489166372191, "learning_rate": 1.8862862821480023e-06, "loss": 1.3131, "step": 7095 }, { "epoch": 8.685434516523868, "grad_norm": 1.906081391716827, "learning_rate": 1.885665033296669e-06, "loss": 0.5001, "step": 7096 }, { "epoch": 8.686658506731947, "grad_norm": 2.278492203909856, "learning_rate": 1.8850438248171571e-06, "loss": 0.7467, "step": 7097 }, { "epoch": 8.687882496940025, "grad_norm": 1.8290546291130974, "learning_rate": 1.8844226567502913e-06, "loss": 0.419, "step": 7098 }, { "epoch": 8.689106487148102, "grad_norm": 2.800739244346286, "learning_rate": 1.88380152913689e-06, "loss": 0.5425, "step": 7099 }, { "epoch": 8.69033047735618, "grad_norm": 2.0352202939219413, "learning_rate": 1.8831804420177742e-06, "loss": 0.4776, "step": 7100 }, { "epoch": 8.69155446756426, "grad_norm": 1.468113045108928, "learning_rate": 1.8825593954337583e-06, "loss": 0.6881, "step": 7101 }, { "epoch": 8.692778457772338, "grad_norm": 1.55666520701493, "learning_rate": 1.8819383894256554e-06, "loss": 0.6764, "step": 7102 }, { "epoch": 8.694002447980417, "grad_norm": 1.1295245974430754, "learning_rate": 1.8813174240342745e-06, "loss": 0.4784, "step": 7103 }, { "epoch": 8.695226438188495, "grad_norm": 2.1729655911827552, "learning_rate": 1.8806964993004248e-06, "loss": 0.5149, "step": 7104 }, { "epoch": 8.696450428396572, "grad_norm": 1.9107057761306587, "learning_rate": 1.8800756152649106e-06, "loss": 0.7419, "step": 7105 }, { "epoch": 8.69767441860465, "grad_norm": 1.678315526575965, "learning_rate": 1.8794547719685338e-06, "loss": 0.6393, "step": 7106 }, { "epoch": 8.69889840881273, "grad_norm": 1.352476619921566, "learning_rate": 1.8788339694520939e-06, "loss": 0.3411, "step": 7107 }, { "epoch": 8.700122399020808, "grad_norm": 0.8438920635287147, "learning_rate": 1.878213207756388e-06, "loss": 0.3543, "step": 7108 }, { "epoch": 8.701346389228886, "grad_norm": 2.2279804958428513, "learning_rate": 1.8775924869222106e-06, "loss": 0.4241, "step": 7109 }, { "epoch": 8.702570379436965, "grad_norm": 1.3833523162179864, "learning_rate": 1.8769718069903537e-06, "loss": 0.625, "step": 7110 }, { "epoch": 8.703794369645042, "grad_norm": 1.0861112155929005, "learning_rate": 1.8763511680016041e-06, "loss": 0.3129, "step": 7111 }, { "epoch": 8.70501835985312, "grad_norm": 1.8309394724905486, "learning_rate": 1.8757305699967487e-06, "loss": 1.0905, "step": 7112 }, { "epoch": 8.7062423500612, "grad_norm": 1.4891740286093416, "learning_rate": 1.875110013016572e-06, "loss": 0.5785, "step": 7113 }, { "epoch": 8.707466340269278, "grad_norm": 1.6970417809380671, "learning_rate": 1.8744894971018537e-06, "loss": 0.543, "step": 7114 }, { "epoch": 8.708690330477356, "grad_norm": 1.5826229899826285, "learning_rate": 1.873869022293372e-06, "loss": 0.6188, "step": 7115 }, { "epoch": 8.709914320685435, "grad_norm": 2.2050816206143606, "learning_rate": 1.8732485886319018e-06, "loss": 1.0478, "step": 7116 }, { "epoch": 8.711138310893514, "grad_norm": 1.4820074102937373, "learning_rate": 1.872628196158217e-06, "loss": 0.4746, "step": 7117 }, { "epoch": 8.71236230110159, "grad_norm": 1.5306508542614394, "learning_rate": 1.8720078449130868e-06, "loss": 0.4951, "step": 7118 }, { "epoch": 8.713586291309669, "grad_norm": 1.5863543778812912, "learning_rate": 1.8713875349372778e-06, "loss": 1.2335, "step": 7119 }, { "epoch": 8.714810281517748, "grad_norm": 1.299774848804881, "learning_rate": 1.8707672662715539e-06, "loss": 1.3366, "step": 7120 }, { "epoch": 8.716034271725826, "grad_norm": 1.7041710441511086, "learning_rate": 1.8701470389566795e-06, "loss": 0.4547, "step": 7121 }, { "epoch": 8.717258261933905, "grad_norm": 1.3966984748470117, "learning_rate": 1.8695268530334116e-06, "loss": 1.3126, "step": 7122 }, { "epoch": 8.718482252141984, "grad_norm": 2.486408542900024, "learning_rate": 1.8689067085425066e-06, "loss": 0.9259, "step": 7123 }, { "epoch": 8.71970624235006, "grad_norm": 1.2418564900002367, "learning_rate": 1.8682866055247189e-06, "loss": 0.5398, "step": 7124 }, { "epoch": 8.720930232558139, "grad_norm": 1.4797500347919583, "learning_rate": 1.8676665440207982e-06, "loss": 0.4738, "step": 7125 }, { "epoch": 8.722154222766218, "grad_norm": 0.7159341299437582, "learning_rate": 1.8670465240714939e-06, "loss": 0.2585, "step": 7126 }, { "epoch": 8.723378212974296, "grad_norm": 1.7216978262379328, "learning_rate": 1.8664265457175516e-06, "loss": 0.4474, "step": 7127 }, { "epoch": 8.724602203182375, "grad_norm": 1.3292998056026162, "learning_rate": 1.8658066089997124e-06, "loss": 0.4397, "step": 7128 }, { "epoch": 8.725826193390454, "grad_norm": 1.3515449009124987, "learning_rate": 1.8651867139587165e-06, "loss": 1.2867, "step": 7129 }, { "epoch": 8.727050183598532, "grad_norm": 2.7099167345348047, "learning_rate": 1.8645668606353028e-06, "loss": 0.4205, "step": 7130 }, { "epoch": 8.728274173806609, "grad_norm": 1.833455426510204, "learning_rate": 1.863947049070205e-06, "loss": 0.7673, "step": 7131 }, { "epoch": 8.729498164014688, "grad_norm": 1.9861925686613313, "learning_rate": 1.863327279304154e-06, "loss": 0.4487, "step": 7132 }, { "epoch": 8.730722154222766, "grad_norm": 1.257446200918336, "learning_rate": 1.8627075513778787e-06, "loss": 0.5154, "step": 7133 }, { "epoch": 8.731946144430845, "grad_norm": 2.0880211507297823, "learning_rate": 1.8620878653321066e-06, "loss": 0.498, "step": 7134 }, { "epoch": 8.733170134638923, "grad_norm": 1.1274470274667026, "learning_rate": 1.8614682212075605e-06, "loss": 0.5705, "step": 7135 }, { "epoch": 8.734394124847, "grad_norm": 1.0673109056126697, "learning_rate": 1.8608486190449617e-06, "loss": 0.6447, "step": 7136 }, { "epoch": 8.735618115055079, "grad_norm": 1.1760563568871727, "learning_rate": 1.860229058885027e-06, "loss": 0.6518, "step": 7137 }, { "epoch": 8.736842105263158, "grad_norm": 1.277679232812149, "learning_rate": 1.859609540768471e-06, "loss": 0.3144, "step": 7138 }, { "epoch": 8.738066095471236, "grad_norm": 1.360206804029494, "learning_rate": 1.8589900647360092e-06, "loss": 0.567, "step": 7139 }, { "epoch": 8.739290085679315, "grad_norm": 1.477765426240493, "learning_rate": 1.858370630828349e-06, "loss": 0.5191, "step": 7140 }, { "epoch": 8.740514075887393, "grad_norm": 1.9484798055456012, "learning_rate": 1.8577512390861977e-06, "loss": 0.4361, "step": 7141 }, { "epoch": 8.741738066095472, "grad_norm": 1.1573489726147173, "learning_rate": 1.8571318895502588e-06, "loss": 0.6884, "step": 7142 }, { "epoch": 8.742962056303549, "grad_norm": 1.9619957711431417, "learning_rate": 1.8565125822612345e-06, "loss": 0.8436, "step": 7143 }, { "epoch": 8.744186046511627, "grad_norm": 1.6908457730806874, "learning_rate": 1.8558933172598236e-06, "loss": 1.1104, "step": 7144 }, { "epoch": 8.745410036719706, "grad_norm": 1.175882831623166, "learning_rate": 1.855274094586722e-06, "loss": 0.5836, "step": 7145 }, { "epoch": 8.746634026927785, "grad_norm": 2.211198130511889, "learning_rate": 1.8546549142826205e-06, "loss": 1.0887, "step": 7146 }, { "epoch": 8.747858017135863, "grad_norm": 1.2544115939576916, "learning_rate": 1.8540357763882127e-06, "loss": 0.4482, "step": 7147 }, { "epoch": 8.749082007343942, "grad_norm": 1.4573301810335062, "learning_rate": 1.8534166809441843e-06, "loss": 0.5411, "step": 7148 }, { "epoch": 8.750305997552019, "grad_norm": 1.0865576561634696, "learning_rate": 1.8527976279912196e-06, "loss": 0.6975, "step": 7149 }, { "epoch": 8.751529987760097, "grad_norm": 1.0101926043841374, "learning_rate": 1.852178617570001e-06, "loss": 0.714, "step": 7150 }, { "epoch": 8.752753977968176, "grad_norm": 1.6715428794731262, "learning_rate": 1.851559649721207e-06, "loss": 0.5328, "step": 7151 }, { "epoch": 8.753977968176255, "grad_norm": 2.702937859280916, "learning_rate": 1.850940724485515e-06, "loss": 0.4088, "step": 7152 }, { "epoch": 8.755201958384333, "grad_norm": 2.0589486599672844, "learning_rate": 1.8503218419035978e-06, "loss": 0.4059, "step": 7153 }, { "epoch": 8.756425948592412, "grad_norm": 1.267654985290651, "learning_rate": 1.8497030020161268e-06, "loss": 1.0951, "step": 7154 }, { "epoch": 8.75764993880049, "grad_norm": 1.5032924902543292, "learning_rate": 1.8490842048637679e-06, "loss": 1.1757, "step": 7155 }, { "epoch": 8.758873929008567, "grad_norm": 1.806571878540636, "learning_rate": 1.8484654504871886e-06, "loss": 0.493, "step": 7156 }, { "epoch": 8.760097919216646, "grad_norm": 1.1445551377911019, "learning_rate": 1.8478467389270497e-06, "loss": 0.482, "step": 7157 }, { "epoch": 8.761321909424725, "grad_norm": 1.539866854129944, "learning_rate": 1.8472280702240108e-06, "loss": 0.6013, "step": 7158 }, { "epoch": 8.762545899632803, "grad_norm": 1.8846332217098338, "learning_rate": 1.8466094444187286e-06, "loss": 1.1837, "step": 7159 }, { "epoch": 8.763769889840882, "grad_norm": 1.8891854617880526, "learning_rate": 1.8459908615518572e-06, "loss": 0.7419, "step": 7160 }, { "epoch": 8.76499388004896, "grad_norm": 1.2568291827248856, "learning_rate": 1.8453723216640474e-06, "loss": 0.5579, "step": 7161 }, { "epoch": 8.766217870257037, "grad_norm": 2.4647931947772483, "learning_rate": 1.8447538247959474e-06, "loss": 0.9603, "step": 7162 }, { "epoch": 8.767441860465116, "grad_norm": 2.9163089604766266, "learning_rate": 1.8441353709882027e-06, "loss": 0.4196, "step": 7163 }, { "epoch": 8.768665850673194, "grad_norm": 1.8620220947996393, "learning_rate": 1.843516960281454e-06, "loss": 0.6367, "step": 7164 }, { "epoch": 8.769889840881273, "grad_norm": 1.1666398470039772, "learning_rate": 1.8428985927163446e-06, "loss": 0.4558, "step": 7165 }, { "epoch": 8.771113831089352, "grad_norm": 0.9346016007977423, "learning_rate": 1.8422802683335081e-06, "loss": 0.4454, "step": 7166 }, { "epoch": 8.77233782129743, "grad_norm": 2.473530626605038, "learning_rate": 1.8416619871735799e-06, "loss": 0.3684, "step": 7167 }, { "epoch": 8.773561811505507, "grad_norm": 2.2015132851805634, "learning_rate": 1.8410437492771905e-06, "loss": 0.4546, "step": 7168 }, { "epoch": 8.774785801713586, "grad_norm": 1.7290686434517775, "learning_rate": 1.8404255546849686e-06, "loss": 0.4614, "step": 7169 }, { "epoch": 8.776009791921664, "grad_norm": 2.3397374865324854, "learning_rate": 1.8398074034375402e-06, "loss": 1.291, "step": 7170 }, { "epoch": 8.777233782129743, "grad_norm": 1.3732711392126897, "learning_rate": 1.839189295575527e-06, "loss": 0.5644, "step": 7171 }, { "epoch": 8.778457772337822, "grad_norm": 1.241135423542788, "learning_rate": 1.8385712311395487e-06, "loss": 0.6451, "step": 7172 }, { "epoch": 8.7796817625459, "grad_norm": 1.5128611697132082, "learning_rate": 1.8379532101702235e-06, "loss": 1.1762, "step": 7173 }, { "epoch": 8.780905752753977, "grad_norm": 1.04134001338678, "learning_rate": 1.8373352327081647e-06, "loss": 0.5828, "step": 7174 }, { "epoch": 8.782129742962056, "grad_norm": 1.7826909947908924, "learning_rate": 1.836717298793983e-06, "loss": 0.4679, "step": 7175 }, { "epoch": 8.783353733170134, "grad_norm": 2.239704884397322, "learning_rate": 1.836099408468287e-06, "loss": 0.9065, "step": 7176 }, { "epoch": 8.784577723378213, "grad_norm": 1.5019365183523927, "learning_rate": 1.835481561771682e-06, "loss": 0.302, "step": 7177 }, { "epoch": 8.785801713586292, "grad_norm": 2.4964015522192544, "learning_rate": 1.8348637587447713e-06, "loss": 0.8045, "step": 7178 }, { "epoch": 8.78702570379437, "grad_norm": 1.4284307991833198, "learning_rate": 1.8342459994281542e-06, "loss": 1.2647, "step": 7179 }, { "epoch": 8.788249694002449, "grad_norm": 1.0687264779053247, "learning_rate": 1.8336282838624275e-06, "loss": 0.5414, "step": 7180 }, { "epoch": 8.789473684210526, "grad_norm": 1.6141424289599375, "learning_rate": 1.8330106120881846e-06, "loss": 0.5378, "step": 7181 }, { "epoch": 8.790697674418604, "grad_norm": 1.7506813768860336, "learning_rate": 1.832392984146018e-06, "loss": 0.3958, "step": 7182 }, { "epoch": 8.791921664626683, "grad_norm": 2.5273025164517517, "learning_rate": 1.8317754000765156e-06, "loss": 0.5252, "step": 7183 }, { "epoch": 8.793145654834762, "grad_norm": 1.710663631661679, "learning_rate": 1.8311578599202618e-06, "loss": 0.9181, "step": 7184 }, { "epoch": 8.79436964504284, "grad_norm": 2.1164883209008676, "learning_rate": 1.830540363717839e-06, "loss": 0.3851, "step": 7185 }, { "epoch": 8.795593635250919, "grad_norm": 1.4199559109988358, "learning_rate": 1.8299229115098277e-06, "loss": 0.9917, "step": 7186 }, { "epoch": 8.796817625458996, "grad_norm": 2.595606461658539, "learning_rate": 1.8293055033368046e-06, "loss": 0.4776, "step": 7187 }, { "epoch": 8.798041615667074, "grad_norm": 1.9423885480218512, "learning_rate": 1.828688139239343e-06, "loss": 1.0797, "step": 7188 }, { "epoch": 8.799265605875153, "grad_norm": 1.6567585372320848, "learning_rate": 1.8280708192580137e-06, "loss": 1.1398, "step": 7189 }, { "epoch": 8.800489596083231, "grad_norm": 3.1375980724372035, "learning_rate": 1.8274535434333846e-06, "loss": 0.3695, "step": 7190 }, { "epoch": 8.80171358629131, "grad_norm": 1.5862898723457155, "learning_rate": 1.8268363118060212e-06, "loss": 0.5799, "step": 7191 }, { "epoch": 8.802937576499389, "grad_norm": 1.3676519157134974, "learning_rate": 1.8262191244164862e-06, "loss": 0.6524, "step": 7192 }, { "epoch": 8.804161566707466, "grad_norm": 1.7147230856898554, "learning_rate": 1.8256019813053376e-06, "loss": 0.3599, "step": 7193 }, { "epoch": 8.805385556915544, "grad_norm": 2.079308442280361, "learning_rate": 1.8249848825131322e-06, "loss": 0.8289, "step": 7194 }, { "epoch": 8.806609547123623, "grad_norm": 2.725685570365817, "learning_rate": 1.824367828080424e-06, "loss": 0.8087, "step": 7195 }, { "epoch": 8.807833537331701, "grad_norm": 1.439739138924288, "learning_rate": 1.823750818047763e-06, "loss": 0.5342, "step": 7196 }, { "epoch": 8.80905752753978, "grad_norm": 1.5045913425571409, "learning_rate": 1.8231338524556969e-06, "loss": 1.3013, "step": 7197 }, { "epoch": 8.810281517747859, "grad_norm": 1.7923010071106449, "learning_rate": 1.8225169313447702e-06, "loss": 0.9697, "step": 7198 }, { "epoch": 8.811505507955935, "grad_norm": 1.8921176992985596, "learning_rate": 1.8219000547555255e-06, "loss": 0.5419, "step": 7199 }, { "epoch": 8.812729498164014, "grad_norm": 0.9442715469815652, "learning_rate": 1.8212832227285008e-06, "loss": 0.4446, "step": 7200 }, { "epoch": 8.813953488372093, "grad_norm": 1.6625531441359105, "learning_rate": 1.820666435304233e-06, "loss": 0.827, "step": 7201 }, { "epoch": 8.815177478580171, "grad_norm": 2.487102708946883, "learning_rate": 1.8200496925232535e-06, "loss": 0.9426, "step": 7202 }, { "epoch": 8.81640146878825, "grad_norm": 2.3946666545136415, "learning_rate": 1.819432994426093e-06, "loss": 0.4415, "step": 7203 }, { "epoch": 8.817625458996329, "grad_norm": 2.248999925297696, "learning_rate": 1.8188163410532794e-06, "loss": 0.5084, "step": 7204 }, { "epoch": 8.818849449204407, "grad_norm": 2.3902382707128838, "learning_rate": 1.8181997324453366e-06, "loss": 0.5885, "step": 7205 }, { "epoch": 8.820073439412484, "grad_norm": 1.8986904688918749, "learning_rate": 1.8175831686427853e-06, "loss": 0.7836, "step": 7206 }, { "epoch": 8.821297429620563, "grad_norm": 1.367353088257369, "learning_rate": 1.8169666496861435e-06, "loss": 0.6718, "step": 7207 }, { "epoch": 8.822521419828641, "grad_norm": 1.3886926710764194, "learning_rate": 1.8163501756159277e-06, "loss": 0.3617, "step": 7208 }, { "epoch": 8.82374541003672, "grad_norm": 1.56746505251863, "learning_rate": 1.81573374647265e-06, "loss": 0.5697, "step": 7209 }, { "epoch": 8.824969400244798, "grad_norm": 1.9368020544330675, "learning_rate": 1.8151173622968197e-06, "loss": 0.7308, "step": 7210 }, { "epoch": 8.826193390452877, "grad_norm": 1.9779152148425674, "learning_rate": 1.8145010231289417e-06, "loss": 0.8629, "step": 7211 }, { "epoch": 8.827417380660954, "grad_norm": 1.5850470641069907, "learning_rate": 1.8138847290095224e-06, "loss": 1.6291, "step": 7212 }, { "epoch": 8.828641370869033, "grad_norm": 1.5435676074218891, "learning_rate": 1.8132684799790606e-06, "loss": 0.4845, "step": 7213 }, { "epoch": 8.829865361077111, "grad_norm": 1.3845934019917574, "learning_rate": 1.812652276078054e-06, "loss": 0.4824, "step": 7214 }, { "epoch": 8.83108935128519, "grad_norm": 1.3899831059262633, "learning_rate": 1.812036117346998e-06, "loss": 0.2768, "step": 7215 }, { "epoch": 8.832313341493268, "grad_norm": 1.0226939603085714, "learning_rate": 1.8114200038263829e-06, "loss": 0.5515, "step": 7216 }, { "epoch": 8.833537331701347, "grad_norm": 1.2825198087961036, "learning_rate": 1.8108039355566987e-06, "loss": 0.4143, "step": 7217 }, { "epoch": 8.834761321909426, "grad_norm": 1.2665045733415046, "learning_rate": 1.8101879125784308e-06, "loss": 0.5123, "step": 7218 }, { "epoch": 8.835985312117502, "grad_norm": 1.4735652443833824, "learning_rate": 1.8095719349320622e-06, "loss": 1.1241, "step": 7219 }, { "epoch": 8.837209302325581, "grad_norm": 1.6412927499356138, "learning_rate": 1.808956002658071e-06, "loss": 0.9382, "step": 7220 }, { "epoch": 8.83843329253366, "grad_norm": 1.1432669225997005, "learning_rate": 1.8083401157969371e-06, "loss": 0.4349, "step": 7221 }, { "epoch": 8.839657282741738, "grad_norm": 1.4950016990776427, "learning_rate": 1.8077242743891316e-06, "loss": 0.5301, "step": 7222 }, { "epoch": 8.840881272949817, "grad_norm": 1.516791770456261, "learning_rate": 1.8071084784751265e-06, "loss": 1.5483, "step": 7223 }, { "epoch": 8.842105263157894, "grad_norm": 2.0752668112373147, "learning_rate": 1.8064927280953893e-06, "loss": 0.6116, "step": 7224 }, { "epoch": 8.843329253365972, "grad_norm": 0.7320055545097112, "learning_rate": 1.8058770232903842e-06, "loss": 0.221, "step": 7225 }, { "epoch": 8.844553243574051, "grad_norm": 2.675819183443763, "learning_rate": 1.8052613641005743e-06, "loss": 1.0372, "step": 7226 }, { "epoch": 8.84577723378213, "grad_norm": 1.9129526235716634, "learning_rate": 1.8046457505664183e-06, "loss": 0.4012, "step": 7227 }, { "epoch": 8.847001223990208, "grad_norm": 1.5379696399931382, "learning_rate": 1.8040301827283713e-06, "loss": 0.389, "step": 7228 }, { "epoch": 8.848225214198287, "grad_norm": 3.7082116491721386, "learning_rate": 1.8034146606268855e-06, "loss": 0.3999, "step": 7229 }, { "epoch": 8.849449204406366, "grad_norm": 2.0285320396078776, "learning_rate": 1.8027991843024133e-06, "loss": 1.138, "step": 7230 }, { "epoch": 8.850673194614442, "grad_norm": 2.2597987788465286, "learning_rate": 1.8021837537953992e-06, "loss": 0.4097, "step": 7231 }, { "epoch": 8.851897184822521, "grad_norm": 1.49872270747978, "learning_rate": 1.8015683691462878e-06, "loss": 0.5858, "step": 7232 }, { "epoch": 8.8531211750306, "grad_norm": 1.758351583936787, "learning_rate": 1.8009530303955198e-06, "loss": 0.7544, "step": 7233 }, { "epoch": 8.854345165238678, "grad_norm": 2.7330709056323474, "learning_rate": 1.800337737583533e-06, "loss": 0.6992, "step": 7234 }, { "epoch": 8.855569155446757, "grad_norm": 1.9129535256644716, "learning_rate": 1.7997224907507623e-06, "loss": 1.3, "step": 7235 }, { "epoch": 8.856793145654835, "grad_norm": 1.7408643695991997, "learning_rate": 1.79910728993764e-06, "loss": 0.7143, "step": 7236 }, { "epoch": 8.858017135862912, "grad_norm": 3.380495613609966, "learning_rate": 1.7984921351845934e-06, "loss": 0.4932, "step": 7237 }, { "epoch": 8.859241126070991, "grad_norm": 2.028477304708973, "learning_rate": 1.7978770265320484e-06, "loss": 0.4269, "step": 7238 }, { "epoch": 8.86046511627907, "grad_norm": 2.037640736284394, "learning_rate": 1.7972619640204298e-06, "loss": 0.6087, "step": 7239 }, { "epoch": 8.861689106487148, "grad_norm": 2.055975888507703, "learning_rate": 1.7966469476901548e-06, "loss": 0.578, "step": 7240 }, { "epoch": 8.862913096695227, "grad_norm": 1.376517422805062, "learning_rate": 1.796031977581641e-06, "loss": 0.8189, "step": 7241 }, { "epoch": 8.864137086903305, "grad_norm": 1.7973799773319954, "learning_rate": 1.7954170537353014e-06, "loss": 0.5919, "step": 7242 }, { "epoch": 8.865361077111384, "grad_norm": 2.8649783252672685, "learning_rate": 1.7948021761915474e-06, "loss": 0.4751, "step": 7243 }, { "epoch": 8.86658506731946, "grad_norm": 0.9235711792574973, "learning_rate": 1.794187344990786e-06, "loss": 0.4309, "step": 7244 }, { "epoch": 8.86780905752754, "grad_norm": 1.9403924437258337, "learning_rate": 1.7935725601734222e-06, "loss": 0.4163, "step": 7245 }, { "epoch": 8.869033047735618, "grad_norm": 2.3143938642765387, "learning_rate": 1.7929578217798554e-06, "loss": 0.5876, "step": 7246 }, { "epoch": 8.870257037943697, "grad_norm": 1.4686718580052012, "learning_rate": 1.7923431298504861e-06, "loss": 0.6289, "step": 7247 }, { "epoch": 8.871481028151775, "grad_norm": 1.750660590434502, "learning_rate": 1.79172848442571e-06, "loss": 1.3525, "step": 7248 }, { "epoch": 8.872705018359854, "grad_norm": 1.772536026065116, "learning_rate": 1.791113885545917e-06, "loss": 0.7032, "step": 7249 }, { "epoch": 8.87392900856793, "grad_norm": 2.0208851788886184, "learning_rate": 1.7904993332514981e-06, "loss": 1.1311, "step": 7250 }, { "epoch": 8.87515299877601, "grad_norm": 1.713137565098638, "learning_rate": 1.7898848275828378e-06, "loss": 1.199, "step": 7251 }, { "epoch": 8.876376988984088, "grad_norm": 1.9508795978593971, "learning_rate": 1.7892703685803205e-06, "loss": 0.7723, "step": 7252 }, { "epoch": 8.877600979192167, "grad_norm": 1.7784219426018923, "learning_rate": 1.788655956284326e-06, "loss": 0.6095, "step": 7253 }, { "epoch": 8.878824969400245, "grad_norm": 1.0942808753929916, "learning_rate": 1.7880415907352315e-06, "loss": 0.6364, "step": 7254 }, { "epoch": 8.880048959608324, "grad_norm": 1.7391020723418422, "learning_rate": 1.7874272719734087e-06, "loss": 0.4631, "step": 7255 }, { "epoch": 8.8812729498164, "grad_norm": 1.0284062942462464, "learning_rate": 1.7868130000392309e-06, "loss": 0.5969, "step": 7256 }, { "epoch": 8.88249694002448, "grad_norm": 1.578388022243378, "learning_rate": 1.7861987749730659e-06, "loss": 0.6467, "step": 7257 }, { "epoch": 8.883720930232558, "grad_norm": 2.6774575755430257, "learning_rate": 1.7855845968152761e-06, "loss": 0.3686, "step": 7258 }, { "epoch": 8.884944920440637, "grad_norm": 1.6665898068224396, "learning_rate": 1.784970465606224e-06, "loss": 0.7572, "step": 7259 }, { "epoch": 8.886168910648715, "grad_norm": 1.4796518506707077, "learning_rate": 1.7843563813862685e-06, "loss": 0.9351, "step": 7260 }, { "epoch": 8.887392900856794, "grad_norm": 0.5467381544691978, "learning_rate": 1.7837423441957646e-06, "loss": 0.1306, "step": 7261 }, { "epoch": 8.88861689106487, "grad_norm": 1.2477752434613232, "learning_rate": 1.7831283540750646e-06, "loss": 0.8996, "step": 7262 }, { "epoch": 8.88984088127295, "grad_norm": 1.5076713654083478, "learning_rate": 1.7825144110645182e-06, "loss": 0.9286, "step": 7263 }, { "epoch": 8.891064871481028, "grad_norm": 2.979574175669115, "learning_rate": 1.7819005152044694e-06, "loss": 0.424, "step": 7264 }, { "epoch": 8.892288861689106, "grad_norm": 1.4717015713662, "learning_rate": 1.781286666535264e-06, "loss": 0.574, "step": 7265 }, { "epoch": 8.893512851897185, "grad_norm": 1.502408973454318, "learning_rate": 1.7806728650972405e-06, "loss": 1.4075, "step": 7266 }, { "epoch": 8.894736842105264, "grad_norm": 2.226950508353413, "learning_rate": 1.7800591109307353e-06, "loss": 0.601, "step": 7267 }, { "epoch": 8.895960832313342, "grad_norm": 1.5704790991724589, "learning_rate": 1.7794454040760822e-06, "loss": 0.503, "step": 7268 }, { "epoch": 8.89718482252142, "grad_norm": 1.572585407924366, "learning_rate": 1.7788317445736126e-06, "loss": 0.5164, "step": 7269 }, { "epoch": 8.898408812729498, "grad_norm": 1.5469266223325504, "learning_rate": 1.7782181324636528e-06, "loss": 0.4624, "step": 7270 }, { "epoch": 8.899632802937576, "grad_norm": 1.5944443716307788, "learning_rate": 1.777604567786528e-06, "loss": 0.8594, "step": 7271 }, { "epoch": 8.900856793145655, "grad_norm": 2.4423840151601635, "learning_rate": 1.7769910505825586e-06, "loss": 0.4015, "step": 7272 }, { "epoch": 8.902080783353734, "grad_norm": 1.951278315470863, "learning_rate": 1.7763775808920636e-06, "loss": 1.3245, "step": 7273 }, { "epoch": 8.903304773561812, "grad_norm": 1.4755258663512056, "learning_rate": 1.775764158755358e-06, "loss": 0.6974, "step": 7274 }, { "epoch": 8.904528763769889, "grad_norm": 1.0436719703164428, "learning_rate": 1.7751507842127524e-06, "loss": 0.6846, "step": 7275 }, { "epoch": 8.905752753977968, "grad_norm": 2.4863360336753297, "learning_rate": 1.7745374573045566e-06, "loss": 0.4393, "step": 7276 }, { "epoch": 8.906976744186046, "grad_norm": 1.28380168324209, "learning_rate": 1.7739241780710749e-06, "loss": 0.3715, "step": 7277 }, { "epoch": 8.908200734394125, "grad_norm": 3.049118291345027, "learning_rate": 1.773310946552611e-06, "loss": 0.5353, "step": 7278 }, { "epoch": 8.909424724602204, "grad_norm": 1.525342834183679, "learning_rate": 1.7726977627894643e-06, "loss": 0.9874, "step": 7279 }, { "epoch": 8.910648714810282, "grad_norm": 1.2628119830494042, "learning_rate": 1.7720846268219306e-06, "loss": 0.7702, "step": 7280 }, { "epoch": 8.911872705018359, "grad_norm": 1.804141041345193, "learning_rate": 1.771471538690302e-06, "loss": 0.4062, "step": 7281 }, { "epoch": 8.913096695226438, "grad_norm": 1.1557754313848765, "learning_rate": 1.77085849843487e-06, "loss": 0.6631, "step": 7282 }, { "epoch": 8.914320685434516, "grad_norm": 1.8569190265225908, "learning_rate": 1.770245506095921e-06, "loss": 0.4885, "step": 7283 }, { "epoch": 8.915544675642595, "grad_norm": 1.6243052000935068, "learning_rate": 1.7696325617137377e-06, "loss": 0.4375, "step": 7284 }, { "epoch": 8.916768665850674, "grad_norm": 2.370303223623652, "learning_rate": 1.7690196653285999e-06, "loss": 1.0248, "step": 7285 }, { "epoch": 8.917992656058752, "grad_norm": 3.581657258254009, "learning_rate": 1.7684068169807875e-06, "loss": 0.3724, "step": 7286 }, { "epoch": 8.919216646266829, "grad_norm": 2.0289865612395723, "learning_rate": 1.767794016710573e-06, "loss": 0.3968, "step": 7287 }, { "epoch": 8.920440636474908, "grad_norm": 1.0229302016915696, "learning_rate": 1.7671812645582267e-06, "loss": 0.5125, "step": 7288 }, { "epoch": 8.921664626682986, "grad_norm": 3.2449769775471573, "learning_rate": 1.7665685605640177e-06, "loss": 0.4307, "step": 7289 }, { "epoch": 8.922888616891065, "grad_norm": 1.2679777739344953, "learning_rate": 1.7659559047682096e-06, "loss": 0.5293, "step": 7290 }, { "epoch": 8.924112607099143, "grad_norm": 1.6643903530350674, "learning_rate": 1.7653432972110646e-06, "loss": 1.5272, "step": 7291 }, { "epoch": 8.925336597307222, "grad_norm": 1.533170818512399, "learning_rate": 1.7647307379328416e-06, "loss": 0.7081, "step": 7292 }, { "epoch": 8.9265605875153, "grad_norm": 2.107621169466562, "learning_rate": 1.7641182269737944e-06, "loss": 0.6877, "step": 7293 }, { "epoch": 8.927784577723378, "grad_norm": 1.616720130796334, "learning_rate": 1.7635057643741748e-06, "loss": 0.9132, "step": 7294 }, { "epoch": 8.929008567931456, "grad_norm": 1.5971686331485757, "learning_rate": 1.7628933501742329e-06, "loss": 1.2728, "step": 7295 }, { "epoch": 8.930232558139535, "grad_norm": 1.82690413069777, "learning_rate": 1.7622809844142138e-06, "loss": 0.5623, "step": 7296 }, { "epoch": 8.931456548347613, "grad_norm": 1.647210268661482, "learning_rate": 1.7616686671343597e-06, "loss": 1.1804, "step": 7297 }, { "epoch": 8.932680538555692, "grad_norm": 2.7190606467067937, "learning_rate": 1.7610563983749093e-06, "loss": 1.0696, "step": 7298 }, { "epoch": 8.93390452876377, "grad_norm": 1.791917618280875, "learning_rate": 1.7604441781760997e-06, "loss": 0.5331, "step": 7299 }, { "epoch": 8.935128518971847, "grad_norm": 1.171288709421414, "learning_rate": 1.7598320065781634e-06, "loss": 0.4451, "step": 7300 }, { "epoch": 8.936352509179926, "grad_norm": 2.1333442598412713, "learning_rate": 1.7592198836213303e-06, "loss": 0.9304, "step": 7301 }, { "epoch": 8.937576499388005, "grad_norm": 2.459665837851935, "learning_rate": 1.7586078093458264e-06, "loss": 0.8271, "step": 7302 }, { "epoch": 8.938800489596083, "grad_norm": 2.598555636470584, "learning_rate": 1.757995783791874e-06, "loss": 0.4607, "step": 7303 }, { "epoch": 8.940024479804162, "grad_norm": 2.6726868483079484, "learning_rate": 1.7573838069996952e-06, "loss": 0.366, "step": 7304 }, { "epoch": 8.94124847001224, "grad_norm": 3.081633441370582, "learning_rate": 1.7567718790095054e-06, "loss": 0.3595, "step": 7305 }, { "epoch": 8.94247246022032, "grad_norm": 1.7195366255676325, "learning_rate": 1.756159999861519e-06, "loss": 0.4308, "step": 7306 }, { "epoch": 8.943696450428396, "grad_norm": 2.0725801412006954, "learning_rate": 1.7555481695959454e-06, "loss": 0.9615, "step": 7307 }, { "epoch": 8.944920440636475, "grad_norm": 1.6034859518174511, "learning_rate": 1.7549363882529933e-06, "loss": 0.5077, "step": 7308 }, { "epoch": 8.946144430844553, "grad_norm": 1.13764272081045, "learning_rate": 1.7543246558728658e-06, "loss": 0.5465, "step": 7309 }, { "epoch": 8.947368421052632, "grad_norm": 1.838306743231294, "learning_rate": 1.7537129724957644e-06, "loss": 0.9655, "step": 7310 }, { "epoch": 8.94859241126071, "grad_norm": 1.7071161011485083, "learning_rate": 1.7531013381618845e-06, "loss": 0.4746, "step": 7311 }, { "epoch": 8.949816401468787, "grad_norm": 2.2393829092126336, "learning_rate": 1.7524897529114238e-06, "loss": 1.2071, "step": 7312 }, { "epoch": 8.951040391676866, "grad_norm": 1.9619796232003515, "learning_rate": 1.7518782167845711e-06, "loss": 0.7218, "step": 7313 }, { "epoch": 8.952264381884945, "grad_norm": 1.3115222929669283, "learning_rate": 1.7512667298215148e-06, "loss": 0.5628, "step": 7314 }, { "epoch": 8.953488372093023, "grad_norm": 2.5344399473413377, "learning_rate": 1.7506552920624397e-06, "loss": 0.4139, "step": 7315 }, { "epoch": 8.954712362301102, "grad_norm": 2.1900013263305436, "learning_rate": 1.7500439035475266e-06, "loss": 0.5064, "step": 7316 }, { "epoch": 8.95593635250918, "grad_norm": 1.1581813379334414, "learning_rate": 1.749432564316955e-06, "loss": 0.5764, "step": 7317 }, { "epoch": 8.957160342717259, "grad_norm": 1.592425333078659, "learning_rate": 1.748821274410899e-06, "loss": 0.4619, "step": 7318 }, { "epoch": 8.958384332925336, "grad_norm": 1.3938741506356829, "learning_rate": 1.7482100338695307e-06, "loss": 0.7899, "step": 7319 }, { "epoch": 8.959608323133414, "grad_norm": 1.4368198683995814, "learning_rate": 1.747598842733017e-06, "loss": 0.5734, "step": 7320 }, { "epoch": 8.960832313341493, "grad_norm": 1.0838360185539475, "learning_rate": 1.746987701041526e-06, "loss": 0.5731, "step": 7321 }, { "epoch": 8.962056303549572, "grad_norm": 2.308754402059643, "learning_rate": 1.7463766088352174e-06, "loss": 0.7208, "step": 7322 }, { "epoch": 8.96328029375765, "grad_norm": 1.576790965673864, "learning_rate": 1.7457655661542507e-06, "loss": 0.8656, "step": 7323 }, { "epoch": 8.964504283965729, "grad_norm": 2.4277589496402623, "learning_rate": 1.7451545730387809e-06, "loss": 0.4746, "step": 7324 }, { "epoch": 8.965728274173806, "grad_norm": 1.2895734739086566, "learning_rate": 1.7445436295289608e-06, "loss": 0.5822, "step": 7325 }, { "epoch": 8.966952264381884, "grad_norm": 1.7934218937593864, "learning_rate": 1.7439327356649393e-06, "loss": 0.531, "step": 7326 }, { "epoch": 8.968176254589963, "grad_norm": 1.4644995218043975, "learning_rate": 1.7433218914868623e-06, "loss": 0.2291, "step": 7327 }, { "epoch": 8.969400244798042, "grad_norm": 2.4603890731752207, "learning_rate": 1.7427110970348715e-06, "loss": 0.2433, "step": 7328 }, { "epoch": 8.97062423500612, "grad_norm": 2.3298599839650733, "learning_rate": 1.7421003523491053e-06, "loss": 1.0318, "step": 7329 }, { "epoch": 8.971848225214199, "grad_norm": 1.0738048086629404, "learning_rate": 1.741489657469702e-06, "loss": 0.5765, "step": 7330 }, { "epoch": 8.973072215422278, "grad_norm": 2.5734639178488266, "learning_rate": 1.740879012436793e-06, "loss": 0.4886, "step": 7331 }, { "epoch": 8.974296205630354, "grad_norm": 2.6085480493635327, "learning_rate": 1.740268417290507e-06, "loss": 0.5175, "step": 7332 }, { "epoch": 8.975520195838433, "grad_norm": 1.3266427708898758, "learning_rate": 1.73965787207097e-06, "loss": 1.0491, "step": 7333 }, { "epoch": 8.976744186046512, "grad_norm": 2.480454393238444, "learning_rate": 1.7390473768183057e-06, "loss": 0.7954, "step": 7334 }, { "epoch": 8.97796817625459, "grad_norm": 1.4074068795036734, "learning_rate": 1.7384369315726339e-06, "loss": 1.0787, "step": 7335 }, { "epoch": 8.979192166462669, "grad_norm": 1.0356362334659857, "learning_rate": 1.73782653637407e-06, "loss": 0.5997, "step": 7336 }, { "epoch": 8.980416156670747, "grad_norm": 1.485679054577717, "learning_rate": 1.7372161912627257e-06, "loss": 0.5693, "step": 7337 }, { "epoch": 8.981640146878824, "grad_norm": 2.891961261565229, "learning_rate": 1.736605896278713e-06, "loss": 0.5569, "step": 7338 }, { "epoch": 8.982864137086903, "grad_norm": 1.2043327883313792, "learning_rate": 1.735995651462138e-06, "loss": 0.5626, "step": 7339 }, { "epoch": 8.984088127294982, "grad_norm": 1.3730325704176989, "learning_rate": 1.735385456853102e-06, "loss": 0.8025, "step": 7340 }, { "epoch": 8.98531211750306, "grad_norm": 2.4601944920817815, "learning_rate": 1.7347753124917061e-06, "loss": 0.4073, "step": 7341 }, { "epoch": 8.986536107711139, "grad_norm": 1.0942885526033632, "learning_rate": 1.7341652184180452e-06, "loss": 0.8489, "step": 7342 }, { "epoch": 8.987760097919217, "grad_norm": 1.5078588092896557, "learning_rate": 1.7335551746722145e-06, "loss": 0.3644, "step": 7343 }, { "epoch": 8.988984088127294, "grad_norm": 2.3235604598738044, "learning_rate": 1.7329451812943027e-06, "loss": 0.386, "step": 7344 }, { "epoch": 8.990208078335373, "grad_norm": 2.186779896769756, "learning_rate": 1.732335238324397e-06, "loss": 0.4365, "step": 7345 }, { "epoch": 8.991432068543451, "grad_norm": 2.135971929817507, "learning_rate": 1.7317253458025781e-06, "loss": 0.6278, "step": 7346 }, { "epoch": 8.99265605875153, "grad_norm": 1.7255863676093877, "learning_rate": 1.7311155037689291e-06, "loss": 0.5511, "step": 7347 }, { "epoch": 8.993880048959609, "grad_norm": 2.4747881333122552, "learning_rate": 1.730505712263526e-06, "loss": 0.2882, "step": 7348 }, { "epoch": 8.995104039167687, "grad_norm": 1.3995749897998755, "learning_rate": 1.7298959713264406e-06, "loss": 0.458, "step": 7349 }, { "epoch": 8.996328029375764, "grad_norm": 1.2088972642103417, "learning_rate": 1.7292862809977435e-06, "loss": 0.8983, "step": 7350 }, { "epoch": 8.997552019583843, "grad_norm": 1.148459420304103, "learning_rate": 1.7286766413175013e-06, "loss": 0.848, "step": 7351 }, { "epoch": 8.998776009791921, "grad_norm": 1.6300404356347673, "learning_rate": 1.7280670523257775e-06, "loss": 1.4589, "step": 7352 }, { "epoch": 9.0, "grad_norm": 2.309469555950492, "learning_rate": 1.7274575140626318e-06, "loss": 0.4989, "step": 7353 }, { "epoch": 9.001223990208079, "grad_norm": 2.4320292374327908, "learning_rate": 1.7268480265681214e-06, "loss": 0.5561, "step": 7354 }, { "epoch": 9.002447980416157, "grad_norm": 2.1936464845393497, "learning_rate": 1.7262385898822975e-06, "loss": 0.5535, "step": 7355 }, { "epoch": 9.003671970624236, "grad_norm": 1.8381751551558219, "learning_rate": 1.7256292040452122e-06, "loss": 0.4438, "step": 7356 }, { "epoch": 9.004895960832313, "grad_norm": 2.44716465069386, "learning_rate": 1.7250198690969122e-06, "loss": 0.3518, "step": 7357 }, { "epoch": 9.006119951040391, "grad_norm": 2.033779330329197, "learning_rate": 1.7244105850774395e-06, "loss": 1.1984, "step": 7358 }, { "epoch": 9.00734394124847, "grad_norm": 1.4259449873906966, "learning_rate": 1.723801352026834e-06, "loss": 1.5716, "step": 7359 }, { "epoch": 9.008567931456549, "grad_norm": 2.4819478339018897, "learning_rate": 1.7231921699851331e-06, "loss": 0.7305, "step": 7360 }, { "epoch": 9.009791921664627, "grad_norm": 1.3941192100423891, "learning_rate": 1.7225830389923695e-06, "loss": 0.357, "step": 7361 }, { "epoch": 9.011015911872706, "grad_norm": 3.002780147772951, "learning_rate": 1.721973959088573e-06, "loss": 0.7044, "step": 7362 }, { "epoch": 9.012239902080783, "grad_norm": 1.4639117390024883, "learning_rate": 1.72136493031377e-06, "loss": 0.9817, "step": 7363 }, { "epoch": 9.013463892288861, "grad_norm": 1.8798283869545693, "learning_rate": 1.7207559527079842e-06, "loss": 1.007, "step": 7364 }, { "epoch": 9.01468788249694, "grad_norm": 2.671469029121102, "learning_rate": 1.720147026311235e-06, "loss": 0.5882, "step": 7365 }, { "epoch": 9.015911872705018, "grad_norm": 1.60716243297815, "learning_rate": 1.719538151163539e-06, "loss": 0.432, "step": 7366 }, { "epoch": 9.017135862913097, "grad_norm": 1.3349995056224946, "learning_rate": 1.7189293273049087e-06, "loss": 0.68, "step": 7367 }, { "epoch": 9.018359853121176, "grad_norm": 1.5445956143643618, "learning_rate": 1.7183205547753534e-06, "loss": 0.4557, "step": 7368 }, { "epoch": 9.019583843329253, "grad_norm": 0.8974039351346341, "learning_rate": 1.7177118336148806e-06, "loss": 0.5246, "step": 7369 }, { "epoch": 9.020807833537331, "grad_norm": 1.0945899158253276, "learning_rate": 1.7171031638634927e-06, "loss": 0.5988, "step": 7370 }, { "epoch": 9.02203182374541, "grad_norm": 1.985047506625008, "learning_rate": 1.7164945455611892e-06, "loss": 0.3517, "step": 7371 }, { "epoch": 9.023255813953488, "grad_norm": 1.977454686395154, "learning_rate": 1.7158859787479654e-06, "loss": 0.386, "step": 7372 }, { "epoch": 9.024479804161567, "grad_norm": 1.7228349985905766, "learning_rate": 1.7152774634638153e-06, "loss": 1.0993, "step": 7373 }, { "epoch": 9.025703794369646, "grad_norm": 1.4462478558038456, "learning_rate": 1.7146689997487286e-06, "loss": 0.6078, "step": 7374 }, { "epoch": 9.026927784577722, "grad_norm": 2.3179122316069667, "learning_rate": 1.71406058764269e-06, "loss": 0.7379, "step": 7375 }, { "epoch": 9.028151774785801, "grad_norm": 1.3888921513914618, "learning_rate": 1.7134522271856823e-06, "loss": 0.6236, "step": 7376 }, { "epoch": 9.02937576499388, "grad_norm": 1.1096669564683088, "learning_rate": 1.7128439184176849e-06, "loss": 0.6139, "step": 7377 }, { "epoch": 9.030599755201958, "grad_norm": 1.7354039545722222, "learning_rate": 1.7122356613786739e-06, "loss": 0.9566, "step": 7378 }, { "epoch": 9.031823745410037, "grad_norm": 2.0265014556919145, "learning_rate": 1.7116274561086215e-06, "loss": 0.8138, "step": 7379 }, { "epoch": 9.033047735618116, "grad_norm": 1.4550687731848733, "learning_rate": 1.7110193026474967e-06, "loss": 0.6488, "step": 7380 }, { "epoch": 9.034271725826194, "grad_norm": 1.2314664462933818, "learning_rate": 1.7104112010352648e-06, "loss": 0.4319, "step": 7381 }, { "epoch": 9.035495716034271, "grad_norm": 2.148657200536083, "learning_rate": 1.7098031513118884e-06, "loss": 0.4501, "step": 7382 }, { "epoch": 9.03671970624235, "grad_norm": 0.9064463626245963, "learning_rate": 1.7091951535173268e-06, "loss": 0.3916, "step": 7383 }, { "epoch": 9.037943696450428, "grad_norm": 2.113452485343649, "learning_rate": 1.7085872076915343e-06, "loss": 0.5591, "step": 7384 }, { "epoch": 9.039167686658507, "grad_norm": 1.9414805894641267, "learning_rate": 1.7079793138744622e-06, "loss": 0.5511, "step": 7385 }, { "epoch": 9.040391676866586, "grad_norm": 1.0636713131653328, "learning_rate": 1.7073714721060619e-06, "loss": 0.6332, "step": 7386 }, { "epoch": 9.041615667074664, "grad_norm": 1.4125690729858618, "learning_rate": 1.706763682426276e-06, "loss": 0.53, "step": 7387 }, { "epoch": 9.042839657282741, "grad_norm": 1.1820816681765456, "learning_rate": 1.706155944875047e-06, "loss": 0.894, "step": 7388 }, { "epoch": 9.04406364749082, "grad_norm": 1.2946459628676847, "learning_rate": 1.7055482594923128e-06, "loss": 0.5779, "step": 7389 }, { "epoch": 9.045287637698898, "grad_norm": 1.428207256685282, "learning_rate": 1.7049406263180083e-06, "loss": 1.2311, "step": 7390 }, { "epoch": 9.046511627906977, "grad_norm": 3.3286952847981888, "learning_rate": 1.7043330453920653e-06, "loss": 0.3557, "step": 7391 }, { "epoch": 9.047735618115055, "grad_norm": 1.2241801016674074, "learning_rate": 1.7037255167544126e-06, "loss": 0.5943, "step": 7392 }, { "epoch": 9.048959608323134, "grad_norm": 2.2979281727308636, "learning_rate": 1.7031180404449727e-06, "loss": 0.4003, "step": 7393 }, { "epoch": 9.050183598531211, "grad_norm": 1.7279021369587342, "learning_rate": 1.702510616503667e-06, "loss": 1.1912, "step": 7394 }, { "epoch": 9.05140758873929, "grad_norm": 2.513210152125161, "learning_rate": 1.7019032449704154e-06, "loss": 0.3394, "step": 7395 }, { "epoch": 9.052631578947368, "grad_norm": 1.6443288371590166, "learning_rate": 1.70129592588513e-06, "loss": 0.8264, "step": 7396 }, { "epoch": 9.053855569155447, "grad_norm": 1.3364969871012475, "learning_rate": 1.7006886592877222e-06, "loss": 1.5693, "step": 7397 }, { "epoch": 9.055079559363525, "grad_norm": 1.5141677983562423, "learning_rate": 1.7000814452180987e-06, "loss": 0.82, "step": 7398 }, { "epoch": 9.056303549571604, "grad_norm": 2.483104312995922, "learning_rate": 1.6994742837161642e-06, "loss": 0.5391, "step": 7399 }, { "epoch": 9.057527539779683, "grad_norm": 1.2254782773621526, "learning_rate": 1.6988671748218194e-06, "loss": 0.5213, "step": 7400 }, { "epoch": 9.05875152998776, "grad_norm": 1.7507537023339734, "learning_rate": 1.698260118574961e-06, "loss": 1.0102, "step": 7401 }, { "epoch": 9.059975520195838, "grad_norm": 1.9723499197213978, "learning_rate": 1.6976531150154812e-06, "loss": 0.4368, "step": 7402 }, { "epoch": 9.061199510403917, "grad_norm": 2.1158641571342844, "learning_rate": 1.697046164183271e-06, "loss": 0.4802, "step": 7403 }, { "epoch": 9.062423500611995, "grad_norm": 2.4834183533399035, "learning_rate": 1.6964392661182171e-06, "loss": 0.6807, "step": 7404 }, { "epoch": 9.063647490820074, "grad_norm": 2.173938250173641, "learning_rate": 1.6958324208602028e-06, "loss": 0.6361, "step": 7405 }, { "epoch": 9.064871481028153, "grad_norm": 1.7283704997517901, "learning_rate": 1.6952256284491076e-06, "loss": 0.9332, "step": 7406 }, { "epoch": 9.06609547123623, "grad_norm": 1.4809577684031454, "learning_rate": 1.6946188889248066e-06, "loss": 0.57, "step": 7407 }, { "epoch": 9.067319461444308, "grad_norm": 1.4732728386203315, "learning_rate": 1.6940122023271737e-06, "loss": 0.7522, "step": 7408 }, { "epoch": 9.068543451652387, "grad_norm": 1.4631260871188463, "learning_rate": 1.693405568696078e-06, "loss": 0.4737, "step": 7409 }, { "epoch": 9.069767441860465, "grad_norm": 2.5344167705359073, "learning_rate": 1.6927989880713852e-06, "loss": 0.3958, "step": 7410 }, { "epoch": 9.070991432068544, "grad_norm": 2.6139084975741236, "learning_rate": 1.6921924604929556e-06, "loss": 0.9568, "step": 7411 }, { "epoch": 9.072215422276622, "grad_norm": 1.4331334036449888, "learning_rate": 1.6915859860006518e-06, "loss": 0.7967, "step": 7412 }, { "epoch": 9.0734394124847, "grad_norm": 1.2932872675392948, "learning_rate": 1.6909795646343258e-06, "loss": 0.5371, "step": 7413 }, { "epoch": 9.074663402692778, "grad_norm": 1.877414470536694, "learning_rate": 1.6903731964338307e-06, "loss": 0.393, "step": 7414 }, { "epoch": 9.075887392900857, "grad_norm": 1.9205248147961156, "learning_rate": 1.6897668814390146e-06, "loss": 0.5769, "step": 7415 }, { "epoch": 9.077111383108935, "grad_norm": 1.4565821586128898, "learning_rate": 1.6891606196897214e-06, "loss": 0.6139, "step": 7416 }, { "epoch": 9.078335373317014, "grad_norm": 1.4618410638503796, "learning_rate": 1.688554411225794e-06, "loss": 0.8109, "step": 7417 }, { "epoch": 9.079559363525092, "grad_norm": 1.8650891810955934, "learning_rate": 1.6879482560870692e-06, "loss": 0.6288, "step": 7418 }, { "epoch": 9.080783353733171, "grad_norm": 1.849012521862426, "learning_rate": 1.687342154313382e-06, "loss": 0.6264, "step": 7419 }, { "epoch": 9.082007343941248, "grad_norm": 1.673465420782349, "learning_rate": 1.6867361059445608e-06, "loss": 0.6815, "step": 7420 }, { "epoch": 9.083231334149326, "grad_norm": 1.481311999434326, "learning_rate": 1.6861301110204367e-06, "loss": 0.3823, "step": 7421 }, { "epoch": 9.084455324357405, "grad_norm": 1.1990501128762643, "learning_rate": 1.6855241695808305e-06, "loss": 0.6266, "step": 7422 }, { "epoch": 9.085679314565484, "grad_norm": 1.355299147917633, "learning_rate": 1.6849182816655634e-06, "loss": 0.4583, "step": 7423 }, { "epoch": 9.086903304773562, "grad_norm": 1.5845551257084098, "learning_rate": 1.6843124473144517e-06, "loss": 1.6407, "step": 7424 }, { "epoch": 9.088127294981641, "grad_norm": 2.30883691145928, "learning_rate": 1.6837066665673095e-06, "loss": 0.4642, "step": 7425 }, { "epoch": 9.089351285189718, "grad_norm": 1.8777367412475126, "learning_rate": 1.6831009394639457e-06, "loss": 0.96, "step": 7426 }, { "epoch": 9.090575275397796, "grad_norm": 1.0805342001267293, "learning_rate": 1.6824952660441668e-06, "loss": 0.581, "step": 7427 }, { "epoch": 9.091799265605875, "grad_norm": 1.7920189376951863, "learning_rate": 1.6818896463477757e-06, "loss": 1.2146, "step": 7428 }, { "epoch": 9.093023255813954, "grad_norm": 0.9466820138764676, "learning_rate": 1.6812840804145696e-06, "loss": 0.4487, "step": 7429 }, { "epoch": 9.094247246022032, "grad_norm": 1.994414379073264, "learning_rate": 1.6806785682843469e-06, "loss": 0.641, "step": 7430 }, { "epoch": 9.095471236230111, "grad_norm": 1.4433557806755262, "learning_rate": 1.680073109996898e-06, "loss": 0.868, "step": 7431 }, { "epoch": 9.096695226438188, "grad_norm": 1.5528384910429267, "learning_rate": 1.6794677055920115e-06, "loss": 0.6265, "step": 7432 }, { "epoch": 9.097919216646266, "grad_norm": 1.4563888367767, "learning_rate": 1.6788623551094719e-06, "loss": 0.6219, "step": 7433 }, { "epoch": 9.099143206854345, "grad_norm": 1.6216009605518524, "learning_rate": 1.6782570585890614e-06, "loss": 0.9079, "step": 7434 }, { "epoch": 9.100367197062424, "grad_norm": 1.0811948828613291, "learning_rate": 1.677651816070558e-06, "loss": 0.3719, "step": 7435 }, { "epoch": 9.101591187270502, "grad_norm": 2.512103991864088, "learning_rate": 1.6770466275937352e-06, "loss": 0.7876, "step": 7436 }, { "epoch": 9.10281517747858, "grad_norm": 1.6232227889531536, "learning_rate": 1.6764414931983641e-06, "loss": 0.7179, "step": 7437 }, { "epoch": 9.104039167686658, "grad_norm": 2.4135295157746848, "learning_rate": 1.6758364129242121e-06, "loss": 0.4662, "step": 7438 }, { "epoch": 9.105263157894736, "grad_norm": 1.8476128158723835, "learning_rate": 1.6752313868110432e-06, "loss": 0.3628, "step": 7439 }, { "epoch": 9.106487148102815, "grad_norm": 1.594114832721126, "learning_rate": 1.6746264148986165e-06, "loss": 0.641, "step": 7440 }, { "epoch": 9.107711138310894, "grad_norm": 2.5750320747101427, "learning_rate": 1.674021497226689e-06, "loss": 0.5945, "step": 7441 }, { "epoch": 9.108935128518972, "grad_norm": 2.450737319402239, "learning_rate": 1.6734166338350134e-06, "loss": 0.863, "step": 7442 }, { "epoch": 9.11015911872705, "grad_norm": 2.12673826122511, "learning_rate": 1.6728118247633393e-06, "loss": 0.4729, "step": 7443 }, { "epoch": 9.11138310893513, "grad_norm": 1.3155690434591856, "learning_rate": 1.6722070700514127e-06, "loss": 0.8674, "step": 7444 }, { "epoch": 9.112607099143206, "grad_norm": 2.8336592811884853, "learning_rate": 1.6716023697389755e-06, "loss": 0.3477, "step": 7445 }, { "epoch": 9.113831089351285, "grad_norm": 1.8540035009816502, "learning_rate": 1.6709977238657666e-06, "loss": 0.5226, "step": 7446 }, { "epoch": 9.115055079559363, "grad_norm": 3.3865994622373488, "learning_rate": 1.6703931324715208e-06, "loss": 0.4336, "step": 7447 }, { "epoch": 9.116279069767442, "grad_norm": 1.9578120569813284, "learning_rate": 1.6697885955959708e-06, "loss": 0.505, "step": 7448 }, { "epoch": 9.11750305997552, "grad_norm": 2.1043329784467875, "learning_rate": 1.6691841132788427e-06, "loss": 1.1792, "step": 7449 }, { "epoch": 9.1187270501836, "grad_norm": 1.3523302343157384, "learning_rate": 1.6685796855598618e-06, "loss": 0.6974, "step": 7450 }, { "epoch": 9.119951040391676, "grad_norm": 1.8931967385821504, "learning_rate": 1.6679753124787487e-06, "loss": 0.7305, "step": 7451 }, { "epoch": 9.121175030599755, "grad_norm": 1.6041782889438643, "learning_rate": 1.667370994075221e-06, "loss": 1.0846, "step": 7452 }, { "epoch": 9.122399020807833, "grad_norm": 1.5542174544576755, "learning_rate": 1.6667667303889915e-06, "loss": 0.7024, "step": 7453 }, { "epoch": 9.123623011015912, "grad_norm": 1.5976603224951254, "learning_rate": 1.6661625214597713e-06, "loss": 1.5584, "step": 7454 }, { "epoch": 9.12484700122399, "grad_norm": 0.8167979256575747, "learning_rate": 1.6655583673272646e-06, "loss": 0.3523, "step": 7455 }, { "epoch": 9.12607099143207, "grad_norm": 1.5282192978344455, "learning_rate": 1.6649542680311764e-06, "loss": 1.1666, "step": 7456 }, { "epoch": 9.127294981640146, "grad_norm": 3.47556122134679, "learning_rate": 1.6643502236112057e-06, "loss": 0.688, "step": 7457 }, { "epoch": 9.128518971848225, "grad_norm": 1.4809810862020516, "learning_rate": 1.663746234107047e-06, "loss": 0.6849, "step": 7458 }, { "epoch": 9.129742962056303, "grad_norm": 1.7869047861955456, "learning_rate": 1.6631422995583919e-06, "loss": 0.3146, "step": 7459 }, { "epoch": 9.130966952264382, "grad_norm": 1.5092463623738277, "learning_rate": 1.6625384200049306e-06, "loss": 0.468, "step": 7460 }, { "epoch": 9.13219094247246, "grad_norm": 1.3703307038467838, "learning_rate": 1.6619345954863464e-06, "loss": 1.1508, "step": 7461 }, { "epoch": 9.13341493268054, "grad_norm": 1.7546771127264036, "learning_rate": 1.6613308260423206e-06, "loss": 1.3086, "step": 7462 }, { "epoch": 9.134638922888616, "grad_norm": 2.5693560780419906, "learning_rate": 1.6607271117125307e-06, "loss": 0.7956, "step": 7463 }, { "epoch": 9.135862913096695, "grad_norm": 1.690079125697347, "learning_rate": 1.6601234525366514e-06, "loss": 0.6317, "step": 7464 }, { "epoch": 9.137086903304773, "grad_norm": 2.0519432348606195, "learning_rate": 1.659519848554352e-06, "loss": 0.3978, "step": 7465 }, { "epoch": 9.138310893512852, "grad_norm": 1.6848486984504072, "learning_rate": 1.6589162998053001e-06, "loss": 1.2311, "step": 7466 }, { "epoch": 9.13953488372093, "grad_norm": 1.9787035664555161, "learning_rate": 1.6583128063291576e-06, "loss": 0.412, "step": 7467 }, { "epoch": 9.140758873929009, "grad_norm": 1.2732037820252815, "learning_rate": 1.6577093681655836e-06, "loss": 0.6422, "step": 7468 }, { "epoch": 9.141982864137088, "grad_norm": 1.935440037801581, "learning_rate": 1.6571059853542349e-06, "loss": 0.9097, "step": 7469 }, { "epoch": 9.143206854345165, "grad_norm": 1.2810684817820306, "learning_rate": 1.6565026579347635e-06, "loss": 0.5547, "step": 7470 }, { "epoch": 9.144430844553243, "grad_norm": 0.9179909137125755, "learning_rate": 1.6558993859468175e-06, "loss": 0.4955, "step": 7471 }, { "epoch": 9.145654834761322, "grad_norm": 1.1190733425281176, "learning_rate": 1.6552961694300412e-06, "loss": 0.478, "step": 7472 }, { "epoch": 9.1468788249694, "grad_norm": 1.9055702894710678, "learning_rate": 1.6546930084240764e-06, "loss": 0.4305, "step": 7473 }, { "epoch": 9.148102815177479, "grad_norm": 1.5927696140868572, "learning_rate": 1.6540899029685612e-06, "loss": 1.2332, "step": 7474 }, { "epoch": 9.149326805385558, "grad_norm": 1.3861421229967714, "learning_rate": 1.653486853103129e-06, "loss": 0.6678, "step": 7475 }, { "epoch": 9.150550795593634, "grad_norm": 1.6672950290542983, "learning_rate": 1.6528838588674079e-06, "loss": 0.934, "step": 7476 }, { "epoch": 9.151774785801713, "grad_norm": 2.4262442547202876, "learning_rate": 1.6522809203010282e-06, "loss": 0.9524, "step": 7477 }, { "epoch": 9.152998776009792, "grad_norm": 1.3454839149488855, "learning_rate": 1.6516780374436103e-06, "loss": 1.1311, "step": 7478 }, { "epoch": 9.15422276621787, "grad_norm": 1.0676020976357605, "learning_rate": 1.6510752103347743e-06, "loss": 0.5386, "step": 7479 }, { "epoch": 9.155446756425949, "grad_norm": 1.2432936279532507, "learning_rate": 1.6504724390141353e-06, "loss": 0.718, "step": 7480 }, { "epoch": 9.156670746634028, "grad_norm": 1.970538873780607, "learning_rate": 1.6498697235213052e-06, "loss": 0.4105, "step": 7481 }, { "epoch": 9.157894736842104, "grad_norm": 1.1474040328986332, "learning_rate": 1.6492670638958924e-06, "loss": 0.5051, "step": 7482 }, { "epoch": 9.159118727050183, "grad_norm": 1.057407458276689, "learning_rate": 1.6486644601775026e-06, "loss": 0.6816, "step": 7483 }, { "epoch": 9.160342717258262, "grad_norm": 1.2656561484693738, "learning_rate": 1.6480619124057346e-06, "loss": 1.3327, "step": 7484 }, { "epoch": 9.16156670746634, "grad_norm": 1.0790203645680496, "learning_rate": 1.647459420620186e-06, "loss": 0.847, "step": 7485 }, { "epoch": 9.162790697674419, "grad_norm": 1.1764976300460082, "learning_rate": 1.6468569848604522e-06, "loss": 0.6854, "step": 7486 }, { "epoch": 9.164014687882498, "grad_norm": 1.496092009105994, "learning_rate": 1.6462546051661216e-06, "loss": 0.3599, "step": 7487 }, { "epoch": 9.165238678090576, "grad_norm": 1.4594527750186645, "learning_rate": 1.6456522815767803e-06, "loss": 0.4296, "step": 7488 }, { "epoch": 9.166462668298653, "grad_norm": 3.3226275455386824, "learning_rate": 1.6450500141320106e-06, "loss": 0.7665, "step": 7489 }, { "epoch": 9.167686658506732, "grad_norm": 2.2155004292618536, "learning_rate": 1.6444478028713922e-06, "loss": 1.0813, "step": 7490 }, { "epoch": 9.16891064871481, "grad_norm": 1.7777661521953578, "learning_rate": 1.6438456478345e-06, "loss": 0.7382, "step": 7491 }, { "epoch": 9.170134638922889, "grad_norm": 1.8536180788219556, "learning_rate": 1.643243549060905e-06, "loss": 0.4868, "step": 7492 }, { "epoch": 9.171358629130967, "grad_norm": 1.661181922160972, "learning_rate": 1.6426415065901752e-06, "loss": 1.3629, "step": 7493 }, { "epoch": 9.172582619339046, "grad_norm": 1.572638042219822, "learning_rate": 1.642039520461873e-06, "loss": 1.1767, "step": 7494 }, { "epoch": 9.173806609547123, "grad_norm": 2.841093339263793, "learning_rate": 1.6414375907155618e-06, "loss": 0.3269, "step": 7495 }, { "epoch": 9.175030599755202, "grad_norm": 1.4145488993828061, "learning_rate": 1.6408357173907963e-06, "loss": 0.7118, "step": 7496 }, { "epoch": 9.17625458996328, "grad_norm": 1.9912434461729371, "learning_rate": 1.6402339005271293e-06, "loss": 1.1239, "step": 7497 }, { "epoch": 9.177478580171359, "grad_norm": 1.1631664048328223, "learning_rate": 1.6396321401641097e-06, "loss": 0.6025, "step": 7498 }, { "epoch": 9.178702570379437, "grad_norm": 2.25539531702093, "learning_rate": 1.6390304363412844e-06, "loss": 0.4195, "step": 7499 }, { "epoch": 9.179926560587516, "grad_norm": 2.6269737645119995, "learning_rate": 1.638428789098194e-06, "loss": 0.4524, "step": 7500 }, { "epoch": 9.181150550795593, "grad_norm": 1.5935420819282342, "learning_rate": 1.6378271984743776e-06, "loss": 0.4588, "step": 7501 }, { "epoch": 9.182374541003671, "grad_norm": 2.1668346272573977, "learning_rate": 1.637225664509367e-06, "loss": 0.3788, "step": 7502 }, { "epoch": 9.18359853121175, "grad_norm": 1.4070613567009012, "learning_rate": 1.6366241872426958e-06, "loss": 0.8929, "step": 7503 }, { "epoch": 9.184822521419829, "grad_norm": 1.8299521014510098, "learning_rate": 1.63602276671389e-06, "loss": 1.1895, "step": 7504 }, { "epoch": 9.186046511627907, "grad_norm": 1.5112584911977351, "learning_rate": 1.6354214029624719e-06, "loss": 0.5261, "step": 7505 }, { "epoch": 9.187270501835986, "grad_norm": 1.7879417165329936, "learning_rate": 1.6348200960279615e-06, "loss": 0.6133, "step": 7506 }, { "epoch": 9.188494492044065, "grad_norm": 1.619224982556922, "learning_rate": 1.6342188459498734e-06, "loss": 0.6207, "step": 7507 }, { "epoch": 9.189718482252141, "grad_norm": 2.168237594354624, "learning_rate": 1.633617652767721e-06, "loss": 0.838, "step": 7508 }, { "epoch": 9.19094247246022, "grad_norm": 1.2735278005213808, "learning_rate": 1.6330165165210121e-06, "loss": 0.766, "step": 7509 }, { "epoch": 9.192166462668299, "grad_norm": 2.016123500966144, "learning_rate": 1.6324154372492517e-06, "loss": 0.3152, "step": 7510 }, { "epoch": 9.193390452876377, "grad_norm": 2.246387496156874, "learning_rate": 1.631814414991938e-06, "loss": 0.4233, "step": 7511 }, { "epoch": 9.194614443084456, "grad_norm": 1.214001098956161, "learning_rate": 1.6312134497885713e-06, "loss": 0.8433, "step": 7512 }, { "epoch": 9.195838433292534, "grad_norm": 1.4397466138779782, "learning_rate": 1.6306125416786428e-06, "loss": 0.4553, "step": 7513 }, { "epoch": 9.197062423500611, "grad_norm": 2.4739125519414435, "learning_rate": 1.6300116907016422e-06, "loss": 0.4343, "step": 7514 }, { "epoch": 9.19828641370869, "grad_norm": 0.9115821229459217, "learning_rate": 1.629410896897055e-06, "loss": 0.3468, "step": 7515 }, { "epoch": 9.199510403916769, "grad_norm": 2.274049998722299, "learning_rate": 1.6288101603043643e-06, "loss": 1.4134, "step": 7516 }, { "epoch": 9.200734394124847, "grad_norm": 1.3485728559554444, "learning_rate": 1.6282094809630475e-06, "loss": 0.7404, "step": 7517 }, { "epoch": 9.201958384332926, "grad_norm": 1.4903866773690893, "learning_rate": 1.6276088589125791e-06, "loss": 0.4067, "step": 7518 }, { "epoch": 9.203182374541004, "grad_norm": 2.0463047464912774, "learning_rate": 1.6270082941924303e-06, "loss": 0.9648, "step": 7519 }, { "epoch": 9.204406364749081, "grad_norm": 2.825413854895487, "learning_rate": 1.6264077868420658e-06, "loss": 0.4709, "step": 7520 }, { "epoch": 9.20563035495716, "grad_norm": 1.8617516938328105, "learning_rate": 1.6258073369009516e-06, "loss": 0.5198, "step": 7521 }, { "epoch": 9.206854345165238, "grad_norm": 2.301137925515693, "learning_rate": 1.6252069444085455e-06, "loss": 0.8644, "step": 7522 }, { "epoch": 9.208078335373317, "grad_norm": 2.832824186299885, "learning_rate": 1.6246066094043035e-06, "loss": 0.744, "step": 7523 }, { "epoch": 9.209302325581396, "grad_norm": 1.1655384786510916, "learning_rate": 1.6240063319276767e-06, "loss": 0.4835, "step": 7524 }, { "epoch": 9.210526315789474, "grad_norm": 1.8703431515203315, "learning_rate": 1.6234061120181144e-06, "loss": 0.4288, "step": 7525 }, { "epoch": 9.211750305997551, "grad_norm": 1.2251328249867697, "learning_rate": 1.6228059497150597e-06, "loss": 0.5594, "step": 7526 }, { "epoch": 9.21297429620563, "grad_norm": 1.2744250548482172, "learning_rate": 1.6222058450579536e-06, "loss": 1.0901, "step": 7527 }, { "epoch": 9.214198286413708, "grad_norm": 2.152058977683754, "learning_rate": 1.6216057980862332e-06, "loss": 1.3391, "step": 7528 }, { "epoch": 9.215422276621787, "grad_norm": 1.0959048755652003, "learning_rate": 1.6210058088393293e-06, "loss": 0.6638, "step": 7529 }, { "epoch": 9.216646266829866, "grad_norm": 1.8417302513053093, "learning_rate": 1.6204058773566742e-06, "loss": 0.483, "step": 7530 }, { "epoch": 9.217870257037944, "grad_norm": 1.8081453247114678, "learning_rate": 1.6198060036776907e-06, "loss": 0.6969, "step": 7531 }, { "epoch": 9.219094247246023, "grad_norm": 1.9393985557618256, "learning_rate": 1.6192061878418008e-06, "loss": 0.7779, "step": 7532 }, { "epoch": 9.2203182374541, "grad_norm": 1.530271845167981, "learning_rate": 1.6186064298884223e-06, "loss": 1.5569, "step": 7533 }, { "epoch": 9.221542227662178, "grad_norm": 2.015975167197436, "learning_rate": 1.6180067298569697e-06, "loss": 0.5764, "step": 7534 }, { "epoch": 9.222766217870257, "grad_norm": 1.7276944630303201, "learning_rate": 1.6174070877868523e-06, "loss": 0.6922, "step": 7535 }, { "epoch": 9.223990208078336, "grad_norm": 1.5077534472547494, "learning_rate": 1.6168075037174769e-06, "loss": 0.4993, "step": 7536 }, { "epoch": 9.225214198286414, "grad_norm": 2.5095281234550217, "learning_rate": 1.6162079776882447e-06, "loss": 0.5291, "step": 7537 }, { "epoch": 9.226438188494493, "grad_norm": 1.647393195631156, "learning_rate": 1.615608509738556e-06, "loss": 0.5555, "step": 7538 }, { "epoch": 9.22766217870257, "grad_norm": 1.149892454762325, "learning_rate": 1.6150090999078055e-06, "loss": 0.8865, "step": 7539 }, { "epoch": 9.228886168910648, "grad_norm": 1.1509723332673358, "learning_rate": 1.614409748235383e-06, "loss": 0.6732, "step": 7540 }, { "epoch": 9.230110159118727, "grad_norm": 1.387813686980432, "learning_rate": 1.6138104547606764e-06, "loss": 0.8144, "step": 7541 }, { "epoch": 9.231334149326806, "grad_norm": 2.134762082610977, "learning_rate": 1.613211219523068e-06, "loss": 0.4723, "step": 7542 }, { "epoch": 9.232558139534884, "grad_norm": 1.0567943190178304, "learning_rate": 1.6126120425619393e-06, "loss": 0.5656, "step": 7543 }, { "epoch": 9.233782129742963, "grad_norm": 1.620666753114307, "learning_rate": 1.612012923916665e-06, "loss": 0.854, "step": 7544 }, { "epoch": 9.23500611995104, "grad_norm": 2.757794154893919, "learning_rate": 1.6114138636266163e-06, "loss": 0.8297, "step": 7545 }, { "epoch": 9.236230110159118, "grad_norm": 1.6286892994301412, "learning_rate": 1.6108148617311615e-06, "loss": 0.3086, "step": 7546 }, { "epoch": 9.237454100367197, "grad_norm": 2.3588374642945147, "learning_rate": 1.6102159182696658e-06, "loss": 0.4605, "step": 7547 }, { "epoch": 9.238678090575275, "grad_norm": 1.6066203357204345, "learning_rate": 1.6096170332814895e-06, "loss": 0.5114, "step": 7548 }, { "epoch": 9.239902080783354, "grad_norm": 1.6109852211135527, "learning_rate": 1.6090182068059878e-06, "loss": 0.5553, "step": 7549 }, { "epoch": 9.241126070991433, "grad_norm": 2.9565928052739467, "learning_rate": 1.6084194388825136e-06, "loss": 0.4126, "step": 7550 }, { "epoch": 9.24235006119951, "grad_norm": 1.1458065398163393, "learning_rate": 1.6078207295504164e-06, "loss": 0.5709, "step": 7551 }, { "epoch": 9.243574051407588, "grad_norm": 1.941451908961006, "learning_rate": 1.6072220788490411e-06, "loss": 0.5367, "step": 7552 }, { "epoch": 9.244798041615667, "grad_norm": 1.4254214792588833, "learning_rate": 1.6066234868177289e-06, "loss": 0.5486, "step": 7553 }, { "epoch": 9.246022031823745, "grad_norm": 1.0491402201012068, "learning_rate": 1.6060249534958162e-06, "loss": 0.5489, "step": 7554 }, { "epoch": 9.247246022031824, "grad_norm": 1.0006747885443226, "learning_rate": 1.605426478922637e-06, "loss": 0.3996, "step": 7555 }, { "epoch": 9.248470012239903, "grad_norm": 1.3109338999615479, "learning_rate": 1.6048280631375208e-06, "loss": 0.6088, "step": 7556 }, { "epoch": 9.249694002447981, "grad_norm": 1.488472748994658, "learning_rate": 1.604229706179794e-06, "loss": 1.2609, "step": 7557 }, { "epoch": 9.250917992656058, "grad_norm": 1.454662542218424, "learning_rate": 1.6036314080887775e-06, "loss": 0.6536, "step": 7558 }, { "epoch": 9.252141982864137, "grad_norm": 1.5446260804534628, "learning_rate": 1.6030331689037886e-06, "loss": 0.516, "step": 7559 }, { "epoch": 9.253365973072215, "grad_norm": 2.257366593564765, "learning_rate": 1.6024349886641426e-06, "loss": 0.5224, "step": 7560 }, { "epoch": 9.254589963280294, "grad_norm": 1.932404285947136, "learning_rate": 1.6018368674091494e-06, "loss": 0.7649, "step": 7561 }, { "epoch": 9.255813953488373, "grad_norm": 2.619589126290217, "learning_rate": 1.6012388051781152e-06, "loss": 0.5088, "step": 7562 }, { "epoch": 9.257037943696451, "grad_norm": 2.151308208028464, "learning_rate": 1.600640802010342e-06, "loss": 0.5521, "step": 7563 }, { "epoch": 9.258261933904528, "grad_norm": 3.4344712032036613, "learning_rate": 1.6000428579451294e-06, "loss": 0.2819, "step": 7564 }, { "epoch": 9.259485924112607, "grad_norm": 2.303826968527441, "learning_rate": 1.5994449730217716e-06, "loss": 0.4927, "step": 7565 }, { "epoch": 9.260709914320685, "grad_norm": 1.8999382166168015, "learning_rate": 1.598847147279559e-06, "loss": 0.7327, "step": 7566 }, { "epoch": 9.261933904528764, "grad_norm": 1.4394114290656885, "learning_rate": 1.598249380757779e-06, "loss": 0.7858, "step": 7567 }, { "epoch": 9.263157894736842, "grad_norm": 2.980991453742353, "learning_rate": 1.5976516734957136e-06, "loss": 0.3119, "step": 7568 }, { "epoch": 9.264381884944921, "grad_norm": 3.037337445547191, "learning_rate": 1.597054025532643e-06, "loss": 0.4146, "step": 7569 }, { "epoch": 9.265605875152998, "grad_norm": 1.655967912464092, "learning_rate": 1.5964564369078423e-06, "loss": 1.5209, "step": 7570 }, { "epoch": 9.266829865361077, "grad_norm": 2.5232658104938235, "learning_rate": 1.5958589076605823e-06, "loss": 0.4786, "step": 7571 }, { "epoch": 9.268053855569155, "grad_norm": 1.930100952557493, "learning_rate": 1.5952614378301306e-06, "loss": 0.9969, "step": 7572 }, { "epoch": 9.269277845777234, "grad_norm": 1.5642594530504352, "learning_rate": 1.5946640274557512e-06, "loss": 0.5207, "step": 7573 }, { "epoch": 9.270501835985312, "grad_norm": 1.94343515820504, "learning_rate": 1.5940666765767032e-06, "loss": 1.0092, "step": 7574 }, { "epoch": 9.271725826193391, "grad_norm": 1.1717545439909676, "learning_rate": 1.593469385232243e-06, "loss": 0.5751, "step": 7575 }, { "epoch": 9.27294981640147, "grad_norm": 2.269651908477515, "learning_rate": 1.5928721534616197e-06, "loss": 0.4756, "step": 7576 }, { "epoch": 9.274173806609546, "grad_norm": 2.058980541305633, "learning_rate": 1.5922749813040854e-06, "loss": 0.5096, "step": 7577 }, { "epoch": 9.275397796817625, "grad_norm": 1.4028528032439145, "learning_rate": 1.5916778687988816e-06, "loss": 0.3625, "step": 7578 }, { "epoch": 9.276621787025704, "grad_norm": 1.7491620856876444, "learning_rate": 1.591080815985248e-06, "loss": 1.3473, "step": 7579 }, { "epoch": 9.277845777233782, "grad_norm": 1.79348763957945, "learning_rate": 1.5904838229024217e-06, "loss": 0.7476, "step": 7580 }, { "epoch": 9.279069767441861, "grad_norm": 1.7649929363549752, "learning_rate": 1.5898868895896336e-06, "loss": 0.9873, "step": 7581 }, { "epoch": 9.28029375764994, "grad_norm": 1.361732748978625, "learning_rate": 1.5892900160861136e-06, "loss": 0.4355, "step": 7582 }, { "epoch": 9.281517747858016, "grad_norm": 3.5326688660220604, "learning_rate": 1.5886932024310852e-06, "loss": 0.517, "step": 7583 }, { "epoch": 9.282741738066095, "grad_norm": 0.9090451174756919, "learning_rate": 1.5880964486637694e-06, "loss": 0.4141, "step": 7584 }, { "epoch": 9.283965728274174, "grad_norm": 2.339479188374513, "learning_rate": 1.5874997548233806e-06, "loss": 0.5988, "step": 7585 }, { "epoch": 9.285189718482252, "grad_norm": 2.1282053580621008, "learning_rate": 1.5869031209491342e-06, "loss": 0.5432, "step": 7586 }, { "epoch": 9.286413708690331, "grad_norm": 1.1175473069365263, "learning_rate": 1.5863065470802372e-06, "loss": 0.4507, "step": 7587 }, { "epoch": 9.28763769889841, "grad_norm": 2.135696524267453, "learning_rate": 1.5857100332558941e-06, "loss": 0.3071, "step": 7588 }, { "epoch": 9.288861689106486, "grad_norm": 2.162804181738649, "learning_rate": 1.5851135795153055e-06, "loss": 0.5154, "step": 7589 }, { "epoch": 9.290085679314565, "grad_norm": 3.252329033501468, "learning_rate": 1.584517185897669e-06, "loss": 0.3431, "step": 7590 }, { "epoch": 9.291309669522644, "grad_norm": 1.6688263547336875, "learning_rate": 1.583920852442177e-06, "loss": 0.4992, "step": 7591 }, { "epoch": 9.292533659730722, "grad_norm": 1.8547244979622948, "learning_rate": 1.5833245791880187e-06, "loss": 0.3426, "step": 7592 }, { "epoch": 9.2937576499388, "grad_norm": 1.5638078698115514, "learning_rate": 1.5827283661743777e-06, "loss": 0.4723, "step": 7593 }, { "epoch": 9.29498164014688, "grad_norm": 1.6968622122528758, "learning_rate": 1.5821322134404352e-06, "loss": 1.1942, "step": 7594 }, { "epoch": 9.296205630354958, "grad_norm": 1.342071760334371, "learning_rate": 1.58153612102537e-06, "loss": 0.5574, "step": 7595 }, { "epoch": 9.297429620563035, "grad_norm": 2.7046976116206185, "learning_rate": 1.580940088968353e-06, "loss": 0.3689, "step": 7596 }, { "epoch": 9.298653610771114, "grad_norm": 1.3212047157697402, "learning_rate": 1.5803441173085544e-06, "loss": 0.4671, "step": 7597 }, { "epoch": 9.299877600979192, "grad_norm": 1.4047847740027812, "learning_rate": 1.5797482060851379e-06, "loss": 0.4333, "step": 7598 }, { "epoch": 9.30110159118727, "grad_norm": 2.390689290822738, "learning_rate": 1.5791523553372662e-06, "loss": 1.0185, "step": 7599 }, { "epoch": 9.30232558139535, "grad_norm": 3.0831345022167844, "learning_rate": 1.5785565651040958e-06, "loss": 0.4624, "step": 7600 }, { "epoch": 9.303549571603428, "grad_norm": 2.138639495593019, "learning_rate": 1.5779608354247806e-06, "loss": 0.6065, "step": 7601 }, { "epoch": 9.304773561811505, "grad_norm": 2.1383068129112615, "learning_rate": 1.5773651663384667e-06, "loss": 1.0734, "step": 7602 }, { "epoch": 9.305997552019583, "grad_norm": 2.5168533224993315, "learning_rate": 1.576769557884303e-06, "loss": 0.4818, "step": 7603 }, { "epoch": 9.307221542227662, "grad_norm": 1.4050506152240345, "learning_rate": 1.5761740101014294e-06, "loss": 1.3135, "step": 7604 }, { "epoch": 9.30844553243574, "grad_norm": 1.2115716049683134, "learning_rate": 1.5755785230289824e-06, "loss": 0.5866, "step": 7605 }, { "epoch": 9.30966952264382, "grad_norm": 1.192425409171754, "learning_rate": 1.5749830967060958e-06, "loss": 1.0432, "step": 7606 }, { "epoch": 9.310893512851898, "grad_norm": 1.2759034800590605, "learning_rate": 1.5743877311718981e-06, "loss": 0.4446, "step": 7607 }, { "epoch": 9.312117503059975, "grad_norm": 1.7673772835308912, "learning_rate": 1.5737924264655155e-06, "loss": 0.9279, "step": 7608 }, { "epoch": 9.313341493268053, "grad_norm": 2.683291465314498, "learning_rate": 1.573197182626069e-06, "loss": 0.5062, "step": 7609 }, { "epoch": 9.314565483476132, "grad_norm": 2.016596997960212, "learning_rate": 1.5726019996926762e-06, "loss": 0.3911, "step": 7610 }, { "epoch": 9.31578947368421, "grad_norm": 1.712069436420586, "learning_rate": 1.5720068777044479e-06, "loss": 0.5497, "step": 7611 }, { "epoch": 9.31701346389229, "grad_norm": 1.4795882317143123, "learning_rate": 1.5714118167004967e-06, "loss": 0.5268, "step": 7612 }, { "epoch": 9.318237454100368, "grad_norm": 1.7267017365035329, "learning_rate": 1.5708168167199264e-06, "loss": 0.5818, "step": 7613 }, { "epoch": 9.319461444308445, "grad_norm": 2.34799663095906, "learning_rate": 1.5702218778018377e-06, "loss": 0.5213, "step": 7614 }, { "epoch": 9.320685434516523, "grad_norm": 1.9654454191284825, "learning_rate": 1.5696269999853278e-06, "loss": 1.1547, "step": 7615 }, { "epoch": 9.321909424724602, "grad_norm": 1.7540083018628057, "learning_rate": 1.5690321833094907e-06, "loss": 0.5873, "step": 7616 }, { "epoch": 9.32313341493268, "grad_norm": 2.2219159423055546, "learning_rate": 1.568437427813415e-06, "loss": 0.291, "step": 7617 }, { "epoch": 9.32435740514076, "grad_norm": 1.301144817391721, "learning_rate": 1.567842733536186e-06, "loss": 0.4112, "step": 7618 }, { "epoch": 9.325581395348838, "grad_norm": 2.3590202071159143, "learning_rate": 1.5672481005168851e-06, "loss": 0.7158, "step": 7619 }, { "epoch": 9.326805385556916, "grad_norm": 1.8427081582144975, "learning_rate": 1.5666535287945878e-06, "loss": 0.4008, "step": 7620 }, { "epoch": 9.328029375764993, "grad_norm": 1.808263878031781, "learning_rate": 1.5660590184083691e-06, "loss": 0.4607, "step": 7621 }, { "epoch": 9.329253365973072, "grad_norm": 0.8229265734992094, "learning_rate": 1.565464569397298e-06, "loss": 0.435, "step": 7622 }, { "epoch": 9.33047735618115, "grad_norm": 1.6558929204357908, "learning_rate": 1.5648701818004383e-06, "loss": 0.885, "step": 7623 }, { "epoch": 9.331701346389229, "grad_norm": 1.5215383471852133, "learning_rate": 1.564275855656851e-06, "loss": 0.5739, "step": 7624 }, { "epoch": 9.332925336597308, "grad_norm": 1.7930306291363125, "learning_rate": 1.5636815910055936e-06, "loss": 0.505, "step": 7625 }, { "epoch": 9.334149326805386, "grad_norm": 2.376361673609101, "learning_rate": 1.5630873878857193e-06, "loss": 0.6067, "step": 7626 }, { "epoch": 9.335373317013463, "grad_norm": 2.6036869894829824, "learning_rate": 1.5624932463362762e-06, "loss": 0.8309, "step": 7627 }, { "epoch": 9.336597307221542, "grad_norm": 2.7087013246130662, "learning_rate": 1.5618991663963091e-06, "loss": 1.0271, "step": 7628 }, { "epoch": 9.33782129742962, "grad_norm": 1.6421868824184356, "learning_rate": 1.561305148104859e-06, "loss": 1.3367, "step": 7629 }, { "epoch": 9.339045287637699, "grad_norm": 1.3553932528567398, "learning_rate": 1.5607111915009634e-06, "loss": 0.5097, "step": 7630 }, { "epoch": 9.340269277845778, "grad_norm": 1.0305316910906297, "learning_rate": 1.5601172966236536e-06, "loss": 0.5325, "step": 7631 }, { "epoch": 9.341493268053856, "grad_norm": 2.409073955333783, "learning_rate": 1.5595234635119587e-06, "loss": 0.6131, "step": 7632 }, { "epoch": 9.342717258261933, "grad_norm": 1.077794533422121, "learning_rate": 1.5589296922049025e-06, "loss": 0.6414, "step": 7633 }, { "epoch": 9.343941248470012, "grad_norm": 1.8536646905538017, "learning_rate": 1.5583359827415067e-06, "loss": 0.6058, "step": 7634 }, { "epoch": 9.34516523867809, "grad_norm": 2.8214767410696875, "learning_rate": 1.5577423351607873e-06, "loss": 0.4033, "step": 7635 }, { "epoch": 9.346389228886169, "grad_norm": 2.186990576758635, "learning_rate": 1.5571487495017565e-06, "loss": 0.9741, "step": 7636 }, { "epoch": 9.347613219094248, "grad_norm": 1.630383814889504, "learning_rate": 1.556555225803422e-06, "loss": 0.9119, "step": 7637 }, { "epoch": 9.348837209302326, "grad_norm": 1.8431071554710503, "learning_rate": 1.5559617641047886e-06, "loss": 0.9649, "step": 7638 }, { "epoch": 9.350061199510403, "grad_norm": 1.782870083823572, "learning_rate": 1.5553683644448574e-06, "loss": 0.7107, "step": 7639 }, { "epoch": 9.351285189718482, "grad_norm": 1.6951479853838531, "learning_rate": 1.5547750268626228e-06, "loss": 1.136, "step": 7640 }, { "epoch": 9.35250917992656, "grad_norm": 1.3836077206506656, "learning_rate": 1.5541817513970758e-06, "loss": 0.7244, "step": 7641 }, { "epoch": 9.353733170134639, "grad_norm": 1.769480744229076, "learning_rate": 1.5535885380872073e-06, "loss": 0.5297, "step": 7642 }, { "epoch": 9.354957160342718, "grad_norm": 1.896086784299193, "learning_rate": 1.5529953869719993e-06, "loss": 0.4831, "step": 7643 }, { "epoch": 9.356181150550796, "grad_norm": 1.2215784655024247, "learning_rate": 1.5524022980904318e-06, "loss": 0.7627, "step": 7644 }, { "epoch": 9.357405140758875, "grad_norm": 1.8307133723968905, "learning_rate": 1.5518092714814804e-06, "loss": 0.5854, "step": 7645 }, { "epoch": 9.358629130966952, "grad_norm": 1.9894250327069032, "learning_rate": 1.5512163071841158e-06, "loss": 0.6018, "step": 7646 }, { "epoch": 9.35985312117503, "grad_norm": 1.4087640779558612, "learning_rate": 1.5506234052373068e-06, "loss": 1.1184, "step": 7647 }, { "epoch": 9.361077111383109, "grad_norm": 1.5452525571186473, "learning_rate": 1.5500305656800167e-06, "loss": 0.4994, "step": 7648 }, { "epoch": 9.362301101591187, "grad_norm": 2.4439612013042398, "learning_rate": 1.5494377885512035e-06, "loss": 0.4702, "step": 7649 }, { "epoch": 9.363525091799266, "grad_norm": 2.3075053615597696, "learning_rate": 1.548845073889822e-06, "loss": 0.9011, "step": 7650 }, { "epoch": 9.364749082007345, "grad_norm": 1.6290569241892576, "learning_rate": 1.5482524217348254e-06, "loss": 0.5753, "step": 7651 }, { "epoch": 9.365973072215422, "grad_norm": 1.517145727666439, "learning_rate": 1.547659832125159e-06, "loss": 0.7207, "step": 7652 }, { "epoch": 9.3671970624235, "grad_norm": 2.4129736285474426, "learning_rate": 1.5470673050997664e-06, "loss": 0.9774, "step": 7653 }, { "epoch": 9.368421052631579, "grad_norm": 1.3316836891180373, "learning_rate": 1.546474840697585e-06, "loss": 0.362, "step": 7654 }, { "epoch": 9.369645042839657, "grad_norm": 1.3709060597558165, "learning_rate": 1.5458824389575505e-06, "loss": 0.5415, "step": 7655 }, { "epoch": 9.370869033047736, "grad_norm": 1.4913774635847235, "learning_rate": 1.5452900999185935e-06, "loss": 0.5649, "step": 7656 }, { "epoch": 9.372093023255815, "grad_norm": 1.4899986165819885, "learning_rate": 1.5446978236196402e-06, "loss": 0.576, "step": 7657 }, { "epoch": 9.373317013463891, "grad_norm": 1.7297256080676775, "learning_rate": 1.5441056100996121e-06, "loss": 1.805, "step": 7658 }, { "epoch": 9.37454100367197, "grad_norm": 1.3407580589767427, "learning_rate": 1.5435134593974272e-06, "loss": 0.5036, "step": 7659 }, { "epoch": 9.375764993880049, "grad_norm": 1.5638115718246837, "learning_rate": 1.5429213715520004e-06, "loss": 0.5027, "step": 7660 }, { "epoch": 9.376988984088127, "grad_norm": 2.393949686728973, "learning_rate": 1.542329346602241e-06, "loss": 0.4133, "step": 7661 }, { "epoch": 9.378212974296206, "grad_norm": 1.5164625647302858, "learning_rate": 1.541737384587055e-06, "loss": 1.1655, "step": 7662 }, { "epoch": 9.379436964504285, "grad_norm": 2.2362627623633293, "learning_rate": 1.5411454855453428e-06, "loss": 0.9901, "step": 7663 }, { "epoch": 9.380660954712361, "grad_norm": 1.9743227052685561, "learning_rate": 1.5405536495160036e-06, "loss": 0.536, "step": 7664 }, { "epoch": 9.38188494492044, "grad_norm": 1.6836518934439453, "learning_rate": 1.53996187653793e-06, "loss": 0.9997, "step": 7665 }, { "epoch": 9.383108935128519, "grad_norm": 1.777671918199954, "learning_rate": 1.539370166650011e-06, "loss": 0.5481, "step": 7666 }, { "epoch": 9.384332925336597, "grad_norm": 1.4381596955807274, "learning_rate": 1.5387785198911298e-06, "loss": 0.6725, "step": 7667 }, { "epoch": 9.385556915544676, "grad_norm": 1.8738116203087567, "learning_rate": 1.538186936300171e-06, "loss": 0.5502, "step": 7668 }, { "epoch": 9.386780905752754, "grad_norm": 1.7637689518555508, "learning_rate": 1.5375954159160086e-06, "loss": 0.6463, "step": 7669 }, { "epoch": 9.388004895960833, "grad_norm": 2.209167061231007, "learning_rate": 1.537003958777516e-06, "loss": 0.4308, "step": 7670 }, { "epoch": 9.38922888616891, "grad_norm": 1.3128577686424403, "learning_rate": 1.536412564923561e-06, "loss": 0.3686, "step": 7671 }, { "epoch": 9.390452876376989, "grad_norm": 3.136742353162124, "learning_rate": 1.5358212343930075e-06, "loss": 0.3841, "step": 7672 }, { "epoch": 9.391676866585067, "grad_norm": 2.031573890097241, "learning_rate": 1.5352299672247173e-06, "loss": 0.5504, "step": 7673 }, { "epoch": 9.392900856793146, "grad_norm": 3.0012185689367104, "learning_rate": 1.5346387634575447e-06, "loss": 0.4533, "step": 7674 }, { "epoch": 9.394124847001224, "grad_norm": 2.553692506369909, "learning_rate": 1.5340476231303426e-06, "loss": 0.3907, "step": 7675 }, { "epoch": 9.395348837209303, "grad_norm": 2.5661755241860575, "learning_rate": 1.5334565462819564e-06, "loss": 0.4247, "step": 7676 }, { "epoch": 9.39657282741738, "grad_norm": 1.4548819110936975, "learning_rate": 1.5328655329512327e-06, "loss": 0.5101, "step": 7677 }, { "epoch": 9.397796817625458, "grad_norm": 2.4242354771560484, "learning_rate": 1.532274583177008e-06, "loss": 0.3234, "step": 7678 }, { "epoch": 9.399020807833537, "grad_norm": 1.0300258741330313, "learning_rate": 1.5316836969981189e-06, "loss": 0.5972, "step": 7679 }, { "epoch": 9.400244798041616, "grad_norm": 1.788176614118708, "learning_rate": 1.5310928744533954e-06, "loss": 0.4572, "step": 7680 }, { "epoch": 9.401468788249694, "grad_norm": 1.0870691180630296, "learning_rate": 1.530502115581664e-06, "loss": 0.605, "step": 7681 }, { "epoch": 9.402692778457773, "grad_norm": 2.846313153703495, "learning_rate": 1.529911420421748e-06, "loss": 0.9662, "step": 7682 }, { "epoch": 9.403916768665852, "grad_norm": 3.160990339848278, "learning_rate": 1.5293207890124656e-06, "loss": 0.5279, "step": 7683 }, { "epoch": 9.405140758873928, "grad_norm": 2.5225510790438297, "learning_rate": 1.5287302213926308e-06, "loss": 0.5196, "step": 7684 }, { "epoch": 9.406364749082007, "grad_norm": 1.981582805860729, "learning_rate": 1.528139717601052e-06, "loss": 1.1424, "step": 7685 }, { "epoch": 9.407588739290086, "grad_norm": 1.8841424538471734, "learning_rate": 1.5275492776765383e-06, "loss": 0.4961, "step": 7686 }, { "epoch": 9.408812729498164, "grad_norm": 1.7537213207796096, "learning_rate": 1.5269589016578882e-06, "loss": 0.4706, "step": 7687 }, { "epoch": 9.410036719706243, "grad_norm": 1.5289965711741555, "learning_rate": 1.5263685895839e-06, "loss": 0.934, "step": 7688 }, { "epoch": 9.411260709914322, "grad_norm": 3.1428610249293643, "learning_rate": 1.5257783414933666e-06, "loss": 0.5966, "step": 7689 }, { "epoch": 9.412484700122398, "grad_norm": 1.863386261393905, "learning_rate": 1.5251881574250777e-06, "loss": 0.4617, "step": 7690 }, { "epoch": 9.413708690330477, "grad_norm": 3.404470780772266, "learning_rate": 1.5245980374178174e-06, "loss": 0.3512, "step": 7691 }, { "epoch": 9.414932680538556, "grad_norm": 1.492331858716318, "learning_rate": 1.5240079815103663e-06, "loss": 0.5537, "step": 7692 }, { "epoch": 9.416156670746634, "grad_norm": 2.851500793820335, "learning_rate": 1.5234179897415014e-06, "loss": 0.6928, "step": 7693 }, { "epoch": 9.417380660954713, "grad_norm": 1.5228350491719633, "learning_rate": 1.5228280621499924e-06, "loss": 0.573, "step": 7694 }, { "epoch": 9.418604651162791, "grad_norm": 1.5461196428676418, "learning_rate": 1.5222381987746104e-06, "loss": 0.4771, "step": 7695 }, { "epoch": 9.419828641370868, "grad_norm": 1.2905438081632337, "learning_rate": 1.521648399654117e-06, "loss": 0.6315, "step": 7696 }, { "epoch": 9.421052631578947, "grad_norm": 1.409768241260235, "learning_rate": 1.521058664827272e-06, "loss": 0.8005, "step": 7697 }, { "epoch": 9.422276621787026, "grad_norm": 1.5132861201688572, "learning_rate": 1.5204689943328304e-06, "loss": 0.4653, "step": 7698 }, { "epoch": 9.423500611995104, "grad_norm": 1.4788128604675161, "learning_rate": 1.519879388209544e-06, "loss": 0.3379, "step": 7699 }, { "epoch": 9.424724602203183, "grad_norm": 1.1102594406175743, "learning_rate": 1.5192898464961587e-06, "loss": 0.5969, "step": 7700 }, { "epoch": 9.425948592411261, "grad_norm": 1.2365524849132399, "learning_rate": 1.5187003692314174e-06, "loss": 0.6584, "step": 7701 }, { "epoch": 9.427172582619338, "grad_norm": 2.513447456755945, "learning_rate": 1.5181109564540578e-06, "loss": 0.3437, "step": 7702 }, { "epoch": 9.428396572827417, "grad_norm": 1.6502407109273025, "learning_rate": 1.517521608202815e-06, "loss": 0.5261, "step": 7703 }, { "epoch": 9.429620563035495, "grad_norm": 3.5585421741534047, "learning_rate": 1.5169323245164186e-06, "loss": 0.426, "step": 7704 }, { "epoch": 9.430844553243574, "grad_norm": 1.2309335169582556, "learning_rate": 1.5163431054335932e-06, "loss": 0.8207, "step": 7705 }, { "epoch": 9.432068543451653, "grad_norm": 1.6398812866529657, "learning_rate": 1.5157539509930601e-06, "loss": 0.4323, "step": 7706 }, { "epoch": 9.433292533659731, "grad_norm": 1.2429630779294745, "learning_rate": 1.5151648612335374e-06, "loss": 0.6499, "step": 7707 }, { "epoch": 9.43451652386781, "grad_norm": 1.63963394427148, "learning_rate": 1.514575836193737e-06, "loss": 0.5118, "step": 7708 }, { "epoch": 9.435740514075887, "grad_norm": 2.426609888425431, "learning_rate": 1.5139868759123682e-06, "loss": 0.3825, "step": 7709 }, { "epoch": 9.436964504283965, "grad_norm": 2.859314333428318, "learning_rate": 1.5133979804281354e-06, "loss": 0.4099, "step": 7710 }, { "epoch": 9.438188494492044, "grad_norm": 2.825770505726081, "learning_rate": 1.5128091497797364e-06, "loss": 0.8036, "step": 7711 }, { "epoch": 9.439412484700123, "grad_norm": 1.6209661009308896, "learning_rate": 1.5122203840058699e-06, "loss": 0.5609, "step": 7712 }, { "epoch": 9.440636474908201, "grad_norm": 2.0004031004642577, "learning_rate": 1.5116316831452269e-06, "loss": 0.771, "step": 7713 }, { "epoch": 9.44186046511628, "grad_norm": 1.42012215249765, "learning_rate": 1.511043047236493e-06, "loss": 0.5631, "step": 7714 }, { "epoch": 9.443084455324357, "grad_norm": 2.935060318438459, "learning_rate": 1.510454476318352e-06, "loss": 0.5333, "step": 7715 }, { "epoch": 9.444308445532435, "grad_norm": 1.9731980192439802, "learning_rate": 1.5098659704294833e-06, "loss": 0.5312, "step": 7716 }, { "epoch": 9.445532435740514, "grad_norm": 2.1999761331452032, "learning_rate": 1.5092775296085604e-06, "loss": 0.3559, "step": 7717 }, { "epoch": 9.446756425948593, "grad_norm": 1.5051226361301662, "learning_rate": 1.5086891538942542e-06, "loss": 1.2173, "step": 7718 }, { "epoch": 9.447980416156671, "grad_norm": 1.1944251790934965, "learning_rate": 1.5081008433252306e-06, "loss": 0.5812, "step": 7719 }, { "epoch": 9.44920440636475, "grad_norm": 1.3513047029674243, "learning_rate": 1.5075125979401492e-06, "loss": 0.5594, "step": 7720 }, { "epoch": 9.450428396572827, "grad_norm": 1.5929505799460393, "learning_rate": 1.50692441777767e-06, "loss": 0.6722, "step": 7721 }, { "epoch": 9.451652386780905, "grad_norm": 3.017591693603603, "learning_rate": 1.5063363028764456e-06, "loss": 0.3501, "step": 7722 }, { "epoch": 9.452876376988984, "grad_norm": 1.2431950018983517, "learning_rate": 1.505748253275123e-06, "loss": 0.3896, "step": 7723 }, { "epoch": 9.454100367197062, "grad_norm": 1.8155434298331987, "learning_rate": 1.5051602690123476e-06, "loss": 0.4882, "step": 7724 }, { "epoch": 9.455324357405141, "grad_norm": 2.942920789026892, "learning_rate": 1.50457235012676e-06, "loss": 0.4537, "step": 7725 }, { "epoch": 9.45654834761322, "grad_norm": 1.422440232204843, "learning_rate": 1.503984496656996e-06, "loss": 0.56, "step": 7726 }, { "epoch": 9.457772337821297, "grad_norm": 1.2222573756554564, "learning_rate": 1.5033967086416867e-06, "loss": 0.7513, "step": 7727 }, { "epoch": 9.458996328029375, "grad_norm": 1.6524463196543624, "learning_rate": 1.502808986119459e-06, "loss": 0.6617, "step": 7728 }, { "epoch": 9.460220318237454, "grad_norm": 2.6200365359053346, "learning_rate": 1.5022213291289368e-06, "loss": 0.8998, "step": 7729 }, { "epoch": 9.461444308445532, "grad_norm": 1.4110726999801746, "learning_rate": 1.5016337377087388e-06, "loss": 0.5035, "step": 7730 }, { "epoch": 9.462668298653611, "grad_norm": 1.9901968175959863, "learning_rate": 1.501046211897479e-06, "loss": 0.4451, "step": 7731 }, { "epoch": 9.46389228886169, "grad_norm": 1.93526354345633, "learning_rate": 1.5004587517337665e-06, "loss": 0.4944, "step": 7732 }, { "epoch": 9.465116279069768, "grad_norm": 1.9819462865222912, "learning_rate": 1.4998713572562076e-06, "loss": 0.421, "step": 7733 }, { "epoch": 9.466340269277845, "grad_norm": 1.330597474131783, "learning_rate": 1.4992840285034045e-06, "loss": 0.4537, "step": 7734 }, { "epoch": 9.467564259485924, "grad_norm": 1.0316442455516797, "learning_rate": 1.4986967655139533e-06, "loss": 0.514, "step": 7735 }, { "epoch": 9.468788249694002, "grad_norm": 1.0331210598123945, "learning_rate": 1.4981095683264474e-06, "loss": 0.5484, "step": 7736 }, { "epoch": 9.470012239902081, "grad_norm": 1.6280672085391878, "learning_rate": 1.4975224369794745e-06, "loss": 0.3752, "step": 7737 }, { "epoch": 9.47123623011016, "grad_norm": 2.3387651939120593, "learning_rate": 1.4969353715116192e-06, "loss": 1.2032, "step": 7738 }, { "epoch": 9.472460220318238, "grad_norm": 1.1533121805740116, "learning_rate": 1.4963483719614618e-06, "loss": 0.511, "step": 7739 }, { "epoch": 9.473684210526315, "grad_norm": 2.221493511139791, "learning_rate": 1.495761438367577e-06, "loss": 1.0364, "step": 7740 }, { "epoch": 9.474908200734394, "grad_norm": 1.7847144407512583, "learning_rate": 1.495174570768535e-06, "loss": 0.4644, "step": 7741 }, { "epoch": 9.476132190942472, "grad_norm": 3.0370539710921345, "learning_rate": 1.4945877692029049e-06, "loss": 0.5482, "step": 7742 }, { "epoch": 9.477356181150551, "grad_norm": 2.8287731391399067, "learning_rate": 1.4940010337092476e-06, "loss": 0.4969, "step": 7743 }, { "epoch": 9.47858017135863, "grad_norm": 1.297409915256897, "learning_rate": 1.4934143643261212e-06, "loss": 0.6743, "step": 7744 }, { "epoch": 9.479804161566708, "grad_norm": 1.7222407827546702, "learning_rate": 1.4928277610920796e-06, "loss": 1.162, "step": 7745 }, { "epoch": 9.481028151774785, "grad_norm": 1.9335693017029854, "learning_rate": 1.492241224045672e-06, "loss": 0.4156, "step": 7746 }, { "epoch": 9.482252141982864, "grad_norm": 1.7646391255095528, "learning_rate": 1.4916547532254439e-06, "loss": 0.5465, "step": 7747 }, { "epoch": 9.483476132190942, "grad_norm": 1.7079652169095414, "learning_rate": 1.4910683486699367e-06, "loss": 0.4353, "step": 7748 }, { "epoch": 9.48470012239902, "grad_norm": 2.3140826602586477, "learning_rate": 1.490482010417685e-06, "loss": 0.4498, "step": 7749 }, { "epoch": 9.4859241126071, "grad_norm": 2.2335820041641044, "learning_rate": 1.489895738507221e-06, "loss": 0.4864, "step": 7750 }, { "epoch": 9.487148102815178, "grad_norm": 1.0511851714779545, "learning_rate": 1.489309532977074e-06, "loss": 0.5102, "step": 7751 }, { "epoch": 9.488372093023255, "grad_norm": 1.0961443961737851, "learning_rate": 1.488723393865766e-06, "loss": 0.4195, "step": 7752 }, { "epoch": 9.489596083231334, "grad_norm": 1.7528833543708489, "learning_rate": 1.488137321211816e-06, "loss": 0.4185, "step": 7753 }, { "epoch": 9.490820073439412, "grad_norm": 1.7706910788916235, "learning_rate": 1.4875513150537386e-06, "loss": 0.71, "step": 7754 }, { "epoch": 9.49204406364749, "grad_norm": 1.3084633680813926, "learning_rate": 1.486965375430044e-06, "loss": 0.3913, "step": 7755 }, { "epoch": 9.49326805385557, "grad_norm": 3.245317618871229, "learning_rate": 1.4863795023792379e-06, "loss": 0.478, "step": 7756 }, { "epoch": 9.494492044063648, "grad_norm": 1.4586383337078286, "learning_rate": 1.4857936959398222e-06, "loss": 1.0855, "step": 7757 }, { "epoch": 9.495716034271727, "grad_norm": 2.546193504330229, "learning_rate": 1.4852079561502935e-06, "loss": 0.468, "step": 7758 }, { "epoch": 9.496940024479803, "grad_norm": 1.9592531389716572, "learning_rate": 1.484622283049143e-06, "loss": 0.5989, "step": 7759 }, { "epoch": 9.498164014687882, "grad_norm": 1.5683182830050475, "learning_rate": 1.4840366766748624e-06, "loss": 1.4866, "step": 7760 }, { "epoch": 9.49938800489596, "grad_norm": 2.263452282701963, "learning_rate": 1.4834511370659328e-06, "loss": 0.6095, "step": 7761 }, { "epoch": 9.50061199510404, "grad_norm": 1.0469385498693353, "learning_rate": 1.4828656642608347e-06, "loss": 0.7109, "step": 7762 }, { "epoch": 9.501835985312118, "grad_norm": 1.6884831926260175, "learning_rate": 1.4822802582980422e-06, "loss": 1.3635, "step": 7763 }, { "epoch": 9.503059975520197, "grad_norm": 2.372569541218042, "learning_rate": 1.4816949192160277e-06, "loss": 0.6124, "step": 7764 }, { "epoch": 9.504283965728273, "grad_norm": 2.368780894029947, "learning_rate": 1.4811096470532566e-06, "loss": 0.5068, "step": 7765 }, { "epoch": 9.505507955936352, "grad_norm": 1.4449189639629425, "learning_rate": 1.4805244418481911e-06, "loss": 0.568, "step": 7766 }, { "epoch": 9.50673194614443, "grad_norm": 2.43495043758298, "learning_rate": 1.4799393036392873e-06, "loss": 0.4837, "step": 7767 }, { "epoch": 9.50795593635251, "grad_norm": 1.6514222833394305, "learning_rate": 1.4793542324650007e-06, "loss": 0.4443, "step": 7768 }, { "epoch": 9.509179926560588, "grad_norm": 1.4303365639859202, "learning_rate": 1.4787692283637785e-06, "loss": 0.6209, "step": 7769 }, { "epoch": 9.510403916768666, "grad_norm": 1.0774641623553034, "learning_rate": 1.4781842913740654e-06, "loss": 0.389, "step": 7770 }, { "epoch": 9.511627906976745, "grad_norm": 1.6361367762109296, "learning_rate": 1.4775994215343016e-06, "loss": 1.4543, "step": 7771 }, { "epoch": 9.512851897184822, "grad_norm": 1.2927315909019155, "learning_rate": 1.477014618882921e-06, "loss": 0.7205, "step": 7772 }, { "epoch": 9.5140758873929, "grad_norm": 1.102163354968593, "learning_rate": 1.4764298834583569e-06, "loss": 0.5091, "step": 7773 }, { "epoch": 9.51529987760098, "grad_norm": 1.7697297329569817, "learning_rate": 1.4758452152990345e-06, "loss": 1.503, "step": 7774 }, { "epoch": 9.516523867809058, "grad_norm": 2.1967312264212513, "learning_rate": 1.4752606144433772e-06, "loss": 0.5829, "step": 7775 }, { "epoch": 9.517747858017136, "grad_norm": 1.7306170689814768, "learning_rate": 1.4746760809298009e-06, "loss": 0.4666, "step": 7776 }, { "epoch": 9.518971848225215, "grad_norm": 3.2541631665038144, "learning_rate": 1.474091614796721e-06, "loss": 0.3599, "step": 7777 }, { "epoch": 9.520195838433292, "grad_norm": 1.9696820920200477, "learning_rate": 1.4735072160825456e-06, "loss": 0.298, "step": 7778 }, { "epoch": 9.52141982864137, "grad_norm": 1.7624668221618032, "learning_rate": 1.4729228848256792e-06, "loss": 0.4505, "step": 7779 }, { "epoch": 9.522643818849449, "grad_norm": 1.6456610027621148, "learning_rate": 1.4723386210645213e-06, "loss": 0.4883, "step": 7780 }, { "epoch": 9.523867809057528, "grad_norm": 1.7661337505400532, "learning_rate": 1.4717544248374686e-06, "loss": 1.3847, "step": 7781 }, { "epoch": 9.525091799265606, "grad_norm": 2.324209520682565, "learning_rate": 1.471170296182912e-06, "loss": 0.5952, "step": 7782 }, { "epoch": 9.526315789473685, "grad_norm": 1.9447894510856336, "learning_rate": 1.4705862351392381e-06, "loss": 1.0379, "step": 7783 }, { "epoch": 9.527539779681762, "grad_norm": 2.7210739070395613, "learning_rate": 1.4700022417448296e-06, "loss": 0.3587, "step": 7784 }, { "epoch": 9.52876376988984, "grad_norm": 1.4361038823969978, "learning_rate": 1.4694183160380627e-06, "loss": 1.1598, "step": 7785 }, { "epoch": 9.529987760097919, "grad_norm": 1.020347335262806, "learning_rate": 1.468834458057314e-06, "loss": 0.5193, "step": 7786 }, { "epoch": 9.531211750305998, "grad_norm": 1.1049471167926914, "learning_rate": 1.4682506678409496e-06, "loss": 0.5854, "step": 7787 }, { "epoch": 9.532435740514076, "grad_norm": 3.6200201669649217, "learning_rate": 1.4676669454273357e-06, "loss": 0.3592, "step": 7788 }, { "epoch": 9.533659730722155, "grad_norm": 1.0423647094234372, "learning_rate": 1.467083290854831e-06, "loss": 0.5394, "step": 7789 }, { "epoch": 9.534883720930232, "grad_norm": 2.3141292887706686, "learning_rate": 1.4664997041617929e-06, "loss": 0.4518, "step": 7790 }, { "epoch": 9.53610771113831, "grad_norm": 1.1624547058258448, "learning_rate": 1.4659161853865712e-06, "loss": 0.5021, "step": 7791 }, { "epoch": 9.537331701346389, "grad_norm": 1.5464655430985592, "learning_rate": 1.4653327345675128e-06, "loss": 0.7497, "step": 7792 }, { "epoch": 9.538555691554468, "grad_norm": 1.5404888692298353, "learning_rate": 1.46474935174296e-06, "loss": 0.4797, "step": 7793 }, { "epoch": 9.539779681762546, "grad_norm": 1.4484178421911822, "learning_rate": 1.464166036951251e-06, "loss": 0.3357, "step": 7794 }, { "epoch": 9.541003671970625, "grad_norm": 2.327931362438206, "learning_rate": 1.4635827902307192e-06, "loss": 0.5821, "step": 7795 }, { "epoch": 9.542227662178703, "grad_norm": 2.3369418641811994, "learning_rate": 1.4629996116196927e-06, "loss": 0.4664, "step": 7796 }, { "epoch": 9.54345165238678, "grad_norm": 1.5895154356358212, "learning_rate": 1.462416501156496e-06, "loss": 0.5973, "step": 7797 }, { "epoch": 9.544675642594859, "grad_norm": 2.725269090929542, "learning_rate": 1.4618334588794484e-06, "loss": 0.3605, "step": 7798 }, { "epoch": 9.545899632802938, "grad_norm": 1.9165297045401903, "learning_rate": 1.4612504848268666e-06, "loss": 0.5589, "step": 7799 }, { "epoch": 9.547123623011016, "grad_norm": 2.0179432226742255, "learning_rate": 1.4606675790370612e-06, "loss": 0.7688, "step": 7800 }, { "epoch": 9.548347613219095, "grad_norm": 1.9835961588400162, "learning_rate": 1.4600847415483376e-06, "loss": 0.5966, "step": 7801 }, { "epoch": 9.549571603427173, "grad_norm": 1.5705094145015566, "learning_rate": 1.459501972398999e-06, "loss": 1.2966, "step": 7802 }, { "epoch": 9.55079559363525, "grad_norm": 1.4413121268827684, "learning_rate": 1.458919271627342e-06, "loss": 1.276, "step": 7803 }, { "epoch": 9.552019583843329, "grad_norm": 1.995387636502786, "learning_rate": 1.4583366392716597e-06, "loss": 1.3154, "step": 7804 }, { "epoch": 9.553243574051407, "grad_norm": 1.8011023225372598, "learning_rate": 1.4577540753702406e-06, "loss": 1.4084, "step": 7805 }, { "epoch": 9.554467564259486, "grad_norm": 1.3834294944255765, "learning_rate": 1.457171579961368e-06, "loss": 0.8463, "step": 7806 }, { "epoch": 9.555691554467565, "grad_norm": 2.5550572126455062, "learning_rate": 1.456589153083323e-06, "loss": 0.4073, "step": 7807 }, { "epoch": 9.556915544675643, "grad_norm": 2.011819189716954, "learning_rate": 1.45600679477438e-06, "loss": 0.5108, "step": 7808 }, { "epoch": 9.55813953488372, "grad_norm": 1.3136719539396435, "learning_rate": 1.4554245050728085e-06, "loss": 0.5299, "step": 7809 }, { "epoch": 9.559363525091799, "grad_norm": 2.184789209002383, "learning_rate": 1.4548422840168747e-06, "loss": 0.6223, "step": 7810 }, { "epoch": 9.560587515299877, "grad_norm": 1.7667545941254406, "learning_rate": 1.4542601316448393e-06, "loss": 0.5373, "step": 7811 }, { "epoch": 9.561811505507956, "grad_norm": 2.103308800745441, "learning_rate": 1.4536780479949614e-06, "loss": 1.108, "step": 7812 }, { "epoch": 9.563035495716035, "grad_norm": 2.075652930487441, "learning_rate": 1.4530960331054916e-06, "loss": 1.123, "step": 7813 }, { "epoch": 9.564259485924113, "grad_norm": 1.3458660346917939, "learning_rate": 1.4525140870146787e-06, "loss": 0.7404, "step": 7814 }, { "epoch": 9.56548347613219, "grad_norm": 1.7698614820452299, "learning_rate": 1.4519322097607653e-06, "loss": 1.5636, "step": 7815 }, { "epoch": 9.566707466340269, "grad_norm": 2.2204272376657808, "learning_rate": 1.4513504013819906e-06, "loss": 0.3771, "step": 7816 }, { "epoch": 9.567931456548347, "grad_norm": 2.266406691502686, "learning_rate": 1.450768661916589e-06, "loss": 0.6446, "step": 7817 }, { "epoch": 9.569155446756426, "grad_norm": 1.8866655263899608, "learning_rate": 1.45018699140279e-06, "loss": 1.1115, "step": 7818 }, { "epoch": 9.570379436964505, "grad_norm": 1.6348100183288583, "learning_rate": 1.4496053898788177e-06, "loss": 1.6255, "step": 7819 }, { "epoch": 9.571603427172583, "grad_norm": 1.161287885690219, "learning_rate": 1.4490238573828962e-06, "loss": 0.3849, "step": 7820 }, { "epoch": 9.572827417380662, "grad_norm": 1.1336257176363878, "learning_rate": 1.4484423939532385e-06, "loss": 0.6221, "step": 7821 }, { "epoch": 9.574051407588739, "grad_norm": 1.4662963054403146, "learning_rate": 1.4478609996280574e-06, "loss": 1.3097, "step": 7822 }, { "epoch": 9.575275397796817, "grad_norm": 2.085704359531065, "learning_rate": 1.4472796744455597e-06, "loss": 0.572, "step": 7823 }, { "epoch": 9.576499388004896, "grad_norm": 2.2412093208184256, "learning_rate": 1.446698418443947e-06, "loss": 0.4561, "step": 7824 }, { "epoch": 9.577723378212974, "grad_norm": 2.511733856993857, "learning_rate": 1.4461172316614194e-06, "loss": 0.6253, "step": 7825 }, { "epoch": 9.578947368421053, "grad_norm": 1.1272914598273136, "learning_rate": 1.4455361141361702e-06, "loss": 0.498, "step": 7826 }, { "epoch": 9.580171358629132, "grad_norm": 1.7850858120185553, "learning_rate": 1.4449550659063865e-06, "loss": 0.4749, "step": 7827 }, { "epoch": 9.581395348837209, "grad_norm": 2.5387154750620757, "learning_rate": 1.4443740870102523e-06, "loss": 0.9372, "step": 7828 }, { "epoch": 9.582619339045287, "grad_norm": 1.792417429866816, "learning_rate": 1.4437931774859498e-06, "loss": 1.5951, "step": 7829 }, { "epoch": 9.583843329253366, "grad_norm": 1.8055540216787278, "learning_rate": 1.4432123373716533e-06, "loss": 1.0893, "step": 7830 }, { "epoch": 9.585067319461444, "grad_norm": 2.2656926473564174, "learning_rate": 1.442631566705533e-06, "loss": 0.9082, "step": 7831 }, { "epoch": 9.586291309669523, "grad_norm": 1.8209217211503437, "learning_rate": 1.4420508655257551e-06, "loss": 0.5269, "step": 7832 }, { "epoch": 9.587515299877602, "grad_norm": 1.730461444608734, "learning_rate": 1.4414702338704811e-06, "loss": 1.1031, "step": 7833 }, { "epoch": 9.588739290085678, "grad_norm": 1.918957433846606, "learning_rate": 1.4408896717778684e-06, "loss": 0.3698, "step": 7834 }, { "epoch": 9.589963280293757, "grad_norm": 1.6762879847138858, "learning_rate": 1.440309179286069e-06, "loss": 1.2683, "step": 7835 }, { "epoch": 9.591187270501836, "grad_norm": 1.409427507958619, "learning_rate": 1.4397287564332308e-06, "loss": 0.5937, "step": 7836 }, { "epoch": 9.592411260709914, "grad_norm": 1.0944849049835192, "learning_rate": 1.439148403257497e-06, "loss": 0.6161, "step": 7837 }, { "epoch": 9.593635250917993, "grad_norm": 1.541258941020548, "learning_rate": 1.4385681197970065e-06, "loss": 0.5385, "step": 7838 }, { "epoch": 9.594859241126072, "grad_norm": 1.867668374573175, "learning_rate": 1.437987906089893e-06, "loss": 0.3655, "step": 7839 }, { "epoch": 9.596083231334148, "grad_norm": 2.1522802541860564, "learning_rate": 1.4374077621742863e-06, "loss": 0.8251, "step": 7840 }, { "epoch": 9.597307221542227, "grad_norm": 1.8006727526273558, "learning_rate": 1.4368276880883104e-06, "loss": 0.2976, "step": 7841 }, { "epoch": 9.598531211750306, "grad_norm": 1.2712864487880309, "learning_rate": 1.4362476838700868e-06, "loss": 0.9865, "step": 7842 }, { "epoch": 9.599755201958384, "grad_norm": 1.1284615046478494, "learning_rate": 1.4356677495577315e-06, "loss": 0.6061, "step": 7843 }, { "epoch": 9.600979192166463, "grad_norm": 1.2301289040702679, "learning_rate": 1.4350878851893557e-06, "loss": 0.8472, "step": 7844 }, { "epoch": 9.602203182374542, "grad_norm": 2.827472499302893, "learning_rate": 1.4345080908030644e-06, "loss": 0.4197, "step": 7845 }, { "epoch": 9.60342717258262, "grad_norm": 2.380886332457003, "learning_rate": 1.433928366436959e-06, "loss": 0.5406, "step": 7846 }, { "epoch": 9.604651162790697, "grad_norm": 1.8523805583802497, "learning_rate": 1.4333487121291397e-06, "loss": 0.4624, "step": 7847 }, { "epoch": 9.605875152998776, "grad_norm": 1.81005603652633, "learning_rate": 1.4327691279176977e-06, "loss": 0.4791, "step": 7848 }, { "epoch": 9.607099143206854, "grad_norm": 2.2833754692898514, "learning_rate": 1.432189613840721e-06, "loss": 0.6727, "step": 7849 }, { "epoch": 9.608323133414933, "grad_norm": 1.4962475092410927, "learning_rate": 1.4316101699362936e-06, "loss": 0.9147, "step": 7850 }, { "epoch": 9.609547123623011, "grad_norm": 1.5586761627539671, "learning_rate": 1.4310307962424941e-06, "loss": 0.9382, "step": 7851 }, { "epoch": 9.61077111383109, "grad_norm": 2.090053169462759, "learning_rate": 1.4304514927973967e-06, "loss": 0.3989, "step": 7852 }, { "epoch": 9.611995104039167, "grad_norm": 2.2057025175042533, "learning_rate": 1.4298722596390712e-06, "loss": 0.822, "step": 7853 }, { "epoch": 9.613219094247246, "grad_norm": 1.3573511614005591, "learning_rate": 1.4292930968055814e-06, "loss": 0.4382, "step": 7854 }, { "epoch": 9.614443084455324, "grad_norm": 1.33405331022911, "learning_rate": 1.4287140043349915e-06, "loss": 0.3115, "step": 7855 }, { "epoch": 9.615667074663403, "grad_norm": 2.069477248794864, "learning_rate": 1.4281349822653534e-06, "loss": 0.6041, "step": 7856 }, { "epoch": 9.616891064871481, "grad_norm": 1.5627865517408002, "learning_rate": 1.4275560306347202e-06, "loss": 0.4697, "step": 7857 }, { "epoch": 9.61811505507956, "grad_norm": 1.8510052643401809, "learning_rate": 1.4269771494811374e-06, "loss": 0.5335, "step": 7858 }, { "epoch": 9.619339045287639, "grad_norm": 1.5917315685237576, "learning_rate": 1.426398338842647e-06, "loss": 0.5644, "step": 7859 }, { "epoch": 9.620563035495715, "grad_norm": 2.0837222569498297, "learning_rate": 1.4258195987572876e-06, "loss": 0.5179, "step": 7860 }, { "epoch": 9.621787025703794, "grad_norm": 1.2254178811216514, "learning_rate": 1.4252409292630908e-06, "loss": 0.7708, "step": 7861 }, { "epoch": 9.623011015911873, "grad_norm": 2.9036862784295114, "learning_rate": 1.4246623303980865e-06, "loss": 0.9177, "step": 7862 }, { "epoch": 9.624235006119951, "grad_norm": 1.9195428132354044, "learning_rate": 1.424083802200294e-06, "loss": 0.5815, "step": 7863 }, { "epoch": 9.62545899632803, "grad_norm": 1.1075328625690877, "learning_rate": 1.4235053447077355e-06, "loss": 0.4731, "step": 7864 }, { "epoch": 9.626682986536107, "grad_norm": 2.4820894656888863, "learning_rate": 1.422926957958424e-06, "loss": 0.582, "step": 7865 }, { "epoch": 9.627906976744185, "grad_norm": 1.4658961978071154, "learning_rate": 1.4223486419903692e-06, "loss": 0.716, "step": 7866 }, { "epoch": 9.629130966952264, "grad_norm": 1.6589283452960928, "learning_rate": 1.4217703968415756e-06, "loss": 0.5043, "step": 7867 }, { "epoch": 9.630354957160343, "grad_norm": 1.3957420783107903, "learning_rate": 1.4211922225500435e-06, "loss": 0.9504, "step": 7868 }, { "epoch": 9.631578947368421, "grad_norm": 1.059235944846936, "learning_rate": 1.4206141191537681e-06, "loss": 0.5098, "step": 7869 }, { "epoch": 9.6328029375765, "grad_norm": 2.1807055266898696, "learning_rate": 1.4200360866907405e-06, "loss": 0.5276, "step": 7870 }, { "epoch": 9.634026927784578, "grad_norm": 1.8256361474364171, "learning_rate": 1.419458125198947e-06, "loss": 0.6137, "step": 7871 }, { "epoch": 9.635250917992655, "grad_norm": 2.0242362764237356, "learning_rate": 1.4188802347163677e-06, "loss": 0.6728, "step": 7872 }, { "epoch": 9.636474908200734, "grad_norm": 1.9362464625293698, "learning_rate": 1.4183024152809824e-06, "loss": 0.7264, "step": 7873 }, { "epoch": 9.637698898408813, "grad_norm": 1.2072852083308974, "learning_rate": 1.4177246669307606e-06, "loss": 0.5321, "step": 7874 }, { "epoch": 9.638922888616891, "grad_norm": 1.0470116355037258, "learning_rate": 1.4171469897036705e-06, "loss": 0.3959, "step": 7875 }, { "epoch": 9.64014687882497, "grad_norm": 1.5675033899924284, "learning_rate": 1.4165693836376743e-06, "loss": 1.1726, "step": 7876 }, { "epoch": 9.641370869033048, "grad_norm": 1.8090548772907329, "learning_rate": 1.415991848770732e-06, "loss": 1.4908, "step": 7877 }, { "epoch": 9.642594859241125, "grad_norm": 1.53413849233614, "learning_rate": 1.4154143851407953e-06, "loss": 0.6937, "step": 7878 }, { "epoch": 9.643818849449204, "grad_norm": 1.6542013259840307, "learning_rate": 1.4148369927858142e-06, "loss": 0.471, "step": 7879 }, { "epoch": 9.645042839657282, "grad_norm": 1.4517797336792546, "learning_rate": 1.4142596717437319e-06, "loss": 1.0757, "step": 7880 }, { "epoch": 9.646266829865361, "grad_norm": 1.5102948137964847, "learning_rate": 1.4136824220524884e-06, "loss": 0.5947, "step": 7881 }, { "epoch": 9.64749082007344, "grad_norm": 3.1325541856365744, "learning_rate": 1.4131052437500177e-06, "loss": 0.4458, "step": 7882 }, { "epoch": 9.648714810281518, "grad_norm": 1.207346392395892, "learning_rate": 1.4125281368742505e-06, "loss": 0.4517, "step": 7883 }, { "epoch": 9.649938800489597, "grad_norm": 1.179459796599702, "learning_rate": 1.4119511014631118e-06, "loss": 0.5734, "step": 7884 }, { "epoch": 9.651162790697674, "grad_norm": 2.384807294317284, "learning_rate": 1.4113741375545222e-06, "loss": 0.4529, "step": 7885 }, { "epoch": 9.652386780905752, "grad_norm": 1.5995875059351827, "learning_rate": 1.410797245186398e-06, "loss": 0.4625, "step": 7886 }, { "epoch": 9.653610771113831, "grad_norm": 0.5607521498322625, "learning_rate": 1.41022042439665e-06, "loss": 0.1298, "step": 7887 }, { "epoch": 9.65483476132191, "grad_norm": 1.4150962882603404, "learning_rate": 1.4096436752231849e-06, "loss": 1.0028, "step": 7888 }, { "epoch": 9.656058751529988, "grad_norm": 2.8092459572139545, "learning_rate": 1.4090669977039035e-06, "loss": 0.5353, "step": 7889 }, { "epoch": 9.657282741738067, "grad_norm": 3.114554793643203, "learning_rate": 1.4084903918767049e-06, "loss": 0.3566, "step": 7890 }, { "epoch": 9.658506731946144, "grad_norm": 1.340799385582237, "learning_rate": 1.4079138577794815e-06, "loss": 1.0899, "step": 7891 }, { "epoch": 9.659730722154222, "grad_norm": 2.5873698275307686, "learning_rate": 1.4073373954501191e-06, "loss": 0.663, "step": 7892 }, { "epoch": 9.660954712362301, "grad_norm": 1.4312496939337525, "learning_rate": 1.4067610049265008e-06, "loss": 0.5915, "step": 7893 }, { "epoch": 9.66217870257038, "grad_norm": 1.7395484822956622, "learning_rate": 1.4061846862465062e-06, "loss": 0.4119, "step": 7894 }, { "epoch": 9.663402692778458, "grad_norm": 1.121834135546993, "learning_rate": 1.4056084394480086e-06, "loss": 0.3102, "step": 7895 }, { "epoch": 9.664626682986537, "grad_norm": 1.876224009176409, "learning_rate": 1.4050322645688763e-06, "loss": 1.1101, "step": 7896 }, { "epoch": 9.665850673194614, "grad_norm": 1.4410877786494556, "learning_rate": 1.4044561616469745e-06, "loss": 0.9865, "step": 7897 }, { "epoch": 9.667074663402692, "grad_norm": 1.5269398591140442, "learning_rate": 1.4038801307201598e-06, "loss": 0.6927, "step": 7898 }, { "epoch": 9.668298653610771, "grad_norm": 1.420420711175893, "learning_rate": 1.4033041718262897e-06, "loss": 0.6192, "step": 7899 }, { "epoch": 9.66952264381885, "grad_norm": 1.8965038201077495, "learning_rate": 1.402728285003213e-06, "loss": 0.5421, "step": 7900 }, { "epoch": 9.670746634026928, "grad_norm": 1.6404664777070186, "learning_rate": 1.4021524702887746e-06, "loss": 0.9849, "step": 7901 }, { "epoch": 9.671970624235007, "grad_norm": 1.5174143165895246, "learning_rate": 1.4015767277208155e-06, "loss": 0.3255, "step": 7902 }, { "epoch": 9.673194614443084, "grad_norm": 2.534897772113679, "learning_rate": 1.401001057337171e-06, "loss": 0.4046, "step": 7903 }, { "epoch": 9.674418604651162, "grad_norm": 1.9596061033538106, "learning_rate": 1.4004254591756722e-06, "loss": 0.4974, "step": 7904 }, { "epoch": 9.67564259485924, "grad_norm": 1.8112417314344553, "learning_rate": 1.399849933274145e-06, "loss": 0.3541, "step": 7905 }, { "epoch": 9.67686658506732, "grad_norm": 2.019690822404401, "learning_rate": 1.39927447967041e-06, "loss": 0.7365, "step": 7906 }, { "epoch": 9.678090575275398, "grad_norm": 2.4389126154149894, "learning_rate": 1.398699098402286e-06, "loss": 1.0279, "step": 7907 }, { "epoch": 9.679314565483477, "grad_norm": 1.806026717270288, "learning_rate": 1.3981237895075839e-06, "loss": 0.4745, "step": 7908 }, { "epoch": 9.680538555691555, "grad_norm": 2.5704059657017084, "learning_rate": 1.3975485530241116e-06, "loss": 0.5146, "step": 7909 }, { "epoch": 9.681762545899632, "grad_norm": 1.5269557164089338, "learning_rate": 1.3969733889896696e-06, "loss": 0.4089, "step": 7910 }, { "epoch": 9.68298653610771, "grad_norm": 1.2741160150708244, "learning_rate": 1.3963982974420559e-06, "loss": 0.6047, "step": 7911 }, { "epoch": 9.68421052631579, "grad_norm": 2.0035054042985236, "learning_rate": 1.395823278419065e-06, "loss": 0.7365, "step": 7912 }, { "epoch": 9.685434516523868, "grad_norm": 1.7814404376258857, "learning_rate": 1.3952483319584846e-06, "loss": 0.5708, "step": 7913 }, { "epoch": 9.686658506731947, "grad_norm": 2.2125335872229916, "learning_rate": 1.3946734580980972e-06, "loss": 0.9677, "step": 7914 }, { "epoch": 9.687882496940025, "grad_norm": 2.4284247779225923, "learning_rate": 1.394098656875682e-06, "loss": 0.4882, "step": 7915 }, { "epoch": 9.689106487148102, "grad_norm": 1.5066525532846922, "learning_rate": 1.3935239283290124e-06, "loss": 1.1195, "step": 7916 }, { "epoch": 9.69033047735618, "grad_norm": 1.7739658544798638, "learning_rate": 1.392949272495858e-06, "loss": 0.9136, "step": 7917 }, { "epoch": 9.69155446756426, "grad_norm": 1.34686496683287, "learning_rate": 1.3923746894139823e-06, "loss": 0.6485, "step": 7918 }, { "epoch": 9.692778457772338, "grad_norm": 2.573370594637822, "learning_rate": 1.3918001791211444e-06, "loss": 0.3989, "step": 7919 }, { "epoch": 9.694002447980417, "grad_norm": 1.5362361724905673, "learning_rate": 1.391225741655102e-06, "loss": 0.2251, "step": 7920 }, { "epoch": 9.695226438188495, "grad_norm": 1.5949376271782958, "learning_rate": 1.3906513770536015e-06, "loss": 1.2478, "step": 7921 }, { "epoch": 9.696450428396572, "grad_norm": 1.7036609886837253, "learning_rate": 1.3900770853543893e-06, "loss": 0.7342, "step": 7922 }, { "epoch": 9.69767441860465, "grad_norm": 1.9481514454936066, "learning_rate": 1.389502866595206e-06, "loss": 0.5316, "step": 7923 }, { "epoch": 9.69889840881273, "grad_norm": 1.5251403320729136, "learning_rate": 1.3889287208137856e-06, "loss": 0.9237, "step": 7924 }, { "epoch": 9.700122399020808, "grad_norm": 1.4005386934755517, "learning_rate": 1.3883546480478611e-06, "loss": 0.5344, "step": 7925 }, { "epoch": 9.701346389228886, "grad_norm": 0.8040057559797467, "learning_rate": 1.3877806483351586e-06, "loss": 0.2191, "step": 7926 }, { "epoch": 9.702570379436965, "grad_norm": 1.5271536758140112, "learning_rate": 1.387206721713397e-06, "loss": 0.8235, "step": 7927 }, { "epoch": 9.703794369645042, "grad_norm": 1.1375149961408149, "learning_rate": 1.386632868220293e-06, "loss": 0.5374, "step": 7928 }, { "epoch": 9.70501835985312, "grad_norm": 2.7828863520663134, "learning_rate": 1.3860590878935593e-06, "loss": 0.4272, "step": 7929 }, { "epoch": 9.7062423500612, "grad_norm": 1.0195635340317435, "learning_rate": 1.3854853807709023e-06, "loss": 0.5874, "step": 7930 }, { "epoch": 9.707466340269278, "grad_norm": 2.7226350872138916, "learning_rate": 1.3849117468900238e-06, "loss": 0.5689, "step": 7931 }, { "epoch": 9.708690330477356, "grad_norm": 1.5618211395494557, "learning_rate": 1.384338186288621e-06, "loss": 0.4781, "step": 7932 }, { "epoch": 9.709914320685435, "grad_norm": 1.1922584984490505, "learning_rate": 1.3837646990043859e-06, "loss": 0.5401, "step": 7933 }, { "epoch": 9.711138310893514, "grad_norm": 2.3285419877722244, "learning_rate": 1.3831912850750062e-06, "loss": 0.5485, "step": 7934 }, { "epoch": 9.71236230110159, "grad_norm": 1.7883402571322842, "learning_rate": 1.3826179445381647e-06, "loss": 0.549, "step": 7935 }, { "epoch": 9.713586291309669, "grad_norm": 1.1940255368098498, "learning_rate": 1.3820446774315388e-06, "loss": 0.4748, "step": 7936 }, { "epoch": 9.714810281517748, "grad_norm": 1.575311527524914, "learning_rate": 1.3814714837928006e-06, "loss": 0.5813, "step": 7937 }, { "epoch": 9.716034271725826, "grad_norm": 1.920648392276851, "learning_rate": 1.3808983636596213e-06, "loss": 0.7495, "step": 7938 }, { "epoch": 9.717258261933905, "grad_norm": 1.9122446903195498, "learning_rate": 1.3803253170696612e-06, "loss": 0.4204, "step": 7939 }, { "epoch": 9.718482252141984, "grad_norm": 1.4910147609659479, "learning_rate": 1.3797523440605798e-06, "loss": 0.5301, "step": 7940 }, { "epoch": 9.71970624235006, "grad_norm": 1.5586942573870273, "learning_rate": 1.3791794446700302e-06, "loss": 1.0693, "step": 7941 }, { "epoch": 9.720930232558139, "grad_norm": 1.062781783465664, "learning_rate": 1.3786066189356628e-06, "loss": 0.5968, "step": 7942 }, { "epoch": 9.722154222766218, "grad_norm": 1.3869127936036552, "learning_rate": 1.3780338668951205e-06, "loss": 0.697, "step": 7943 }, { "epoch": 9.723378212974296, "grad_norm": 3.754608410003829, "learning_rate": 1.3774611885860434e-06, "loss": 0.3876, "step": 7944 }, { "epoch": 9.724602203182375, "grad_norm": 1.3922848199268503, "learning_rate": 1.3768885840460633e-06, "loss": 0.4031, "step": 7945 }, { "epoch": 9.725826193390454, "grad_norm": 1.5642192514310513, "learning_rate": 1.3763160533128122e-06, "loss": 0.2973, "step": 7946 }, { "epoch": 9.727050183598532, "grad_norm": 3.153555783694688, "learning_rate": 1.3757435964239141e-06, "loss": 0.3213, "step": 7947 }, { "epoch": 9.728274173806609, "grad_norm": 2.1682596151663174, "learning_rate": 1.3751712134169887e-06, "loss": 0.9232, "step": 7948 }, { "epoch": 9.729498164014688, "grad_norm": 2.0092496840646845, "learning_rate": 1.3745989043296502e-06, "loss": 0.6535, "step": 7949 }, { "epoch": 9.730722154222766, "grad_norm": 2.7421839687460907, "learning_rate": 1.374026669199509e-06, "loss": 0.4713, "step": 7950 }, { "epoch": 9.731946144430845, "grad_norm": 1.4311792871406153, "learning_rate": 1.373454508064171e-06, "loss": 0.9699, "step": 7951 }, { "epoch": 9.733170134638923, "grad_norm": 2.6108756316929513, "learning_rate": 1.3728824209612356e-06, "loss": 1.1841, "step": 7952 }, { "epoch": 9.734394124847, "grad_norm": 1.3649728677935322, "learning_rate": 1.3723104079282987e-06, "loss": 1.0468, "step": 7953 }, { "epoch": 9.735618115055079, "grad_norm": 1.2286292938480798, "learning_rate": 1.3717384690029494e-06, "loss": 0.4803, "step": 7954 }, { "epoch": 9.736842105263158, "grad_norm": 1.2695329685799752, "learning_rate": 1.3711666042227772e-06, "loss": 0.5109, "step": 7955 }, { "epoch": 9.738066095471236, "grad_norm": 2.25379087666454, "learning_rate": 1.3705948136253593e-06, "loss": 0.4536, "step": 7956 }, { "epoch": 9.739290085679315, "grad_norm": 2.651539324117214, "learning_rate": 1.370023097248273e-06, "loss": 0.5837, "step": 7957 }, { "epoch": 9.740514075887393, "grad_norm": 1.1657780094027623, "learning_rate": 1.3694514551290882e-06, "loss": 0.543, "step": 7958 }, { "epoch": 9.741738066095472, "grad_norm": 2.7753768525757496, "learning_rate": 1.3688798873053733e-06, "loss": 0.3606, "step": 7959 }, { "epoch": 9.742962056303549, "grad_norm": 1.712212797418991, "learning_rate": 1.3683083938146879e-06, "loss": 0.5202, "step": 7960 }, { "epoch": 9.744186046511627, "grad_norm": 1.6035053031878594, "learning_rate": 1.3677369746945896e-06, "loss": 0.4578, "step": 7961 }, { "epoch": 9.745410036719706, "grad_norm": 1.369445377042963, "learning_rate": 1.3671656299826302e-06, "loss": 0.4889, "step": 7962 }, { "epoch": 9.746634026927785, "grad_norm": 1.3119321635290635, "learning_rate": 1.3665943597163537e-06, "loss": 0.6274, "step": 7963 }, { "epoch": 9.747858017135863, "grad_norm": 2.5926756422167996, "learning_rate": 1.3660231639333043e-06, "loss": 0.9545, "step": 7964 }, { "epoch": 9.749082007343942, "grad_norm": 1.4869742578345693, "learning_rate": 1.3654520426710188e-06, "loss": 1.4116, "step": 7965 }, { "epoch": 9.750305997552019, "grad_norm": 2.558374331211004, "learning_rate": 1.3648809959670283e-06, "loss": 0.2816, "step": 7966 }, { "epoch": 9.751529987760097, "grad_norm": 1.8324285470064825, "learning_rate": 1.3643100238588603e-06, "loss": 0.6534, "step": 7967 }, { "epoch": 9.752753977968176, "grad_norm": 2.1617208159238888, "learning_rate": 1.363739126384037e-06, "loss": 0.6807, "step": 7968 }, { "epoch": 9.753977968176255, "grad_norm": 2.280708071464484, "learning_rate": 1.3631683035800753e-06, "loss": 0.5004, "step": 7969 }, { "epoch": 9.755201958384333, "grad_norm": 2.2654461758225106, "learning_rate": 1.362597555484488e-06, "loss": 0.4941, "step": 7970 }, { "epoch": 9.756425948592412, "grad_norm": 1.577259656422622, "learning_rate": 1.3620268821347815e-06, "loss": 1.0454, "step": 7971 }, { "epoch": 9.75764993880049, "grad_norm": 1.6850722920389036, "learning_rate": 1.3614562835684603e-06, "loss": 1.4016, "step": 7972 }, { "epoch": 9.758873929008567, "grad_norm": 1.8050305245592908, "learning_rate": 1.3608857598230215e-06, "loss": 0.491, "step": 7973 }, { "epoch": 9.760097919216646, "grad_norm": 2.7753517712366595, "learning_rate": 1.3603153109359568e-06, "loss": 1.0649, "step": 7974 }, { "epoch": 9.761321909424725, "grad_norm": 3.0081974476209443, "learning_rate": 1.3597449369447541e-06, "loss": 0.4678, "step": 7975 }, { "epoch": 9.762545899632803, "grad_norm": 1.3790644568257628, "learning_rate": 1.3591746378868959e-06, "loss": 0.4366, "step": 7976 }, { "epoch": 9.763769889840882, "grad_norm": 1.6788270304581494, "learning_rate": 1.3586044137998617e-06, "loss": 1.1759, "step": 7977 }, { "epoch": 9.76499388004896, "grad_norm": 1.5318741515706569, "learning_rate": 1.3580342647211236e-06, "loss": 2.2828, "step": 7978 }, { "epoch": 9.766217870257037, "grad_norm": 2.132443454682794, "learning_rate": 1.3574641906881504e-06, "loss": 0.5771, "step": 7979 }, { "epoch": 9.767441860465116, "grad_norm": 1.8681997852020857, "learning_rate": 1.3568941917384038e-06, "loss": 0.5884, "step": 7980 }, { "epoch": 9.768665850673194, "grad_norm": 2.483442977222384, "learning_rate": 1.3563242679093433e-06, "loss": 0.2374, "step": 7981 }, { "epoch": 9.769889840881273, "grad_norm": 1.792394140597607, "learning_rate": 1.3557544192384217e-06, "loss": 0.5337, "step": 7982 }, { "epoch": 9.771113831089352, "grad_norm": 1.871923691677858, "learning_rate": 1.3551846457630874e-06, "loss": 1.3919, "step": 7983 }, { "epoch": 9.77233782129743, "grad_norm": 2.553470644664137, "learning_rate": 1.3546149475207836e-06, "loss": 0.9168, "step": 7984 }, { "epoch": 9.773561811505507, "grad_norm": 1.9195844527159664, "learning_rate": 1.3540453245489493e-06, "loss": 0.3978, "step": 7985 }, { "epoch": 9.774785801713586, "grad_norm": 2.5286894843545085, "learning_rate": 1.3534757768850176e-06, "loss": 0.6752, "step": 7986 }, { "epoch": 9.776009791921664, "grad_norm": 1.7435202679144155, "learning_rate": 1.352906304566417e-06, "loss": 0.4211, "step": 7987 }, { "epoch": 9.777233782129743, "grad_norm": 2.233622415336265, "learning_rate": 1.3523369076305715e-06, "loss": 0.3543, "step": 7988 }, { "epoch": 9.778457772337822, "grad_norm": 1.4218462622780295, "learning_rate": 1.3517675861148982e-06, "loss": 1.3202, "step": 7989 }, { "epoch": 9.7796817625459, "grad_norm": 1.9995611685756287, "learning_rate": 1.3511983400568132e-06, "loss": 0.4402, "step": 7990 }, { "epoch": 9.780905752753977, "grad_norm": 1.3755761378221443, "learning_rate": 1.3506291694937252e-06, "loss": 0.368, "step": 7991 }, { "epoch": 9.782129742962056, "grad_norm": 1.7211652047967565, "learning_rate": 1.350060074463036e-06, "loss": 0.4515, "step": 7992 }, { "epoch": 9.783353733170134, "grad_norm": 3.0247166437372845, "learning_rate": 1.3494910550021445e-06, "loss": 0.5809, "step": 7993 }, { "epoch": 9.784577723378213, "grad_norm": 3.3940690297809146, "learning_rate": 1.3489221111484463e-06, "loss": 0.4208, "step": 7994 }, { "epoch": 9.785801713586292, "grad_norm": 1.3294161011406622, "learning_rate": 1.3483532429393291e-06, "loss": 0.68, "step": 7995 }, { "epoch": 9.78702570379437, "grad_norm": 1.9862537328369367, "learning_rate": 1.3477844504121776e-06, "loss": 0.9301, "step": 7996 }, { "epoch": 9.788249694002449, "grad_norm": 2.0135147164123515, "learning_rate": 1.3472157336043711e-06, "loss": 1.074, "step": 7997 }, { "epoch": 9.789473684210526, "grad_norm": 2.3409010296086077, "learning_rate": 1.346647092553281e-06, "loss": 0.4746, "step": 7998 }, { "epoch": 9.790697674418604, "grad_norm": 1.8285893979473264, "learning_rate": 1.3460785272962784e-06, "loss": 0.4588, "step": 7999 }, { "epoch": 9.791921664626683, "grad_norm": 1.6193666017388635, "learning_rate": 1.3455100378707276e-06, "loss": 0.4912, "step": 8000 }, { "epoch": 9.793145654834762, "grad_norm": 1.4663630193535033, "learning_rate": 1.3449416243139863e-06, "loss": 0.6176, "step": 8001 }, { "epoch": 9.79436964504284, "grad_norm": 1.847407936984587, "learning_rate": 1.3443732866634096e-06, "loss": 1.0126, "step": 8002 }, { "epoch": 9.795593635250919, "grad_norm": 1.2915425901654443, "learning_rate": 1.3438050249563461e-06, "loss": 0.977, "step": 8003 }, { "epoch": 9.796817625458996, "grad_norm": 1.0879716874868688, "learning_rate": 1.3432368392301397e-06, "loss": 0.5391, "step": 8004 }, { "epoch": 9.798041615667074, "grad_norm": 1.4768485969881757, "learning_rate": 1.34266872952213e-06, "loss": 0.6308, "step": 8005 }, { "epoch": 9.799265605875153, "grad_norm": 2.442793213840337, "learning_rate": 1.3421006958696495e-06, "loss": 0.5038, "step": 8006 }, { "epoch": 9.800489596083231, "grad_norm": 3.551107960139448, "learning_rate": 1.3415327383100297e-06, "loss": 0.4807, "step": 8007 }, { "epoch": 9.80171358629131, "grad_norm": 1.2262453504172577, "learning_rate": 1.340964856880593e-06, "loss": 0.4716, "step": 8008 }, { "epoch": 9.802937576499389, "grad_norm": 1.6222198961968428, "learning_rate": 1.34039705161866e-06, "loss": 0.5496, "step": 8009 }, { "epoch": 9.804161566707466, "grad_norm": 2.3803470921417484, "learning_rate": 1.339829322561543e-06, "loss": 0.4025, "step": 8010 }, { "epoch": 9.805385556915544, "grad_norm": 0.997432480439704, "learning_rate": 1.3392616697465507e-06, "loss": 0.4412, "step": 8011 }, { "epoch": 9.806609547123623, "grad_norm": 2.6626344846381533, "learning_rate": 1.3386940932109894e-06, "loss": 0.3651, "step": 8012 }, { "epoch": 9.807833537331701, "grad_norm": 2.1076255856199118, "learning_rate": 1.3381265929921566e-06, "loss": 0.423, "step": 8013 }, { "epoch": 9.80905752753978, "grad_norm": 2.155288162424569, "learning_rate": 1.3375591691273465e-06, "loss": 0.6179, "step": 8014 }, { "epoch": 9.810281517747859, "grad_norm": 1.020988069391773, "learning_rate": 1.3369918216538486e-06, "loss": 0.5283, "step": 8015 }, { "epoch": 9.811505507955935, "grad_norm": 1.0531956783136196, "learning_rate": 1.3364245506089463e-06, "loss": 0.6314, "step": 8016 }, { "epoch": 9.812729498164014, "grad_norm": 2.256314816581433, "learning_rate": 1.3358573560299188e-06, "loss": 0.3309, "step": 8017 }, { "epoch": 9.813953488372093, "grad_norm": 1.7406861177445812, "learning_rate": 1.33529023795404e-06, "loss": 0.6584, "step": 8018 }, { "epoch": 9.815177478580171, "grad_norm": 1.729475474888018, "learning_rate": 1.3347231964185774e-06, "loss": 0.7518, "step": 8019 }, { "epoch": 9.81640146878825, "grad_norm": 1.4813776660514777, "learning_rate": 1.3341562314607989e-06, "loss": 0.8742, "step": 8020 }, { "epoch": 9.817625458996329, "grad_norm": 1.815358076413664, "learning_rate": 1.3335893431179595e-06, "loss": 0.2987, "step": 8021 }, { "epoch": 9.818849449204407, "grad_norm": 1.1101863653528696, "learning_rate": 1.3330225314273142e-06, "loss": 0.6421, "step": 8022 }, { "epoch": 9.820073439412484, "grad_norm": 1.7811367614170501, "learning_rate": 1.3324557964261117e-06, "loss": 0.5298, "step": 8023 }, { "epoch": 9.821297429620563, "grad_norm": 2.1466995107353455, "learning_rate": 1.331889138151595e-06, "loss": 0.3794, "step": 8024 }, { "epoch": 9.822521419828641, "grad_norm": 1.338826732279015, "learning_rate": 1.3313225566410042e-06, "loss": 0.7136, "step": 8025 }, { "epoch": 9.82374541003672, "grad_norm": 1.8117744573329801, "learning_rate": 1.330756051931573e-06, "loss": 1.0233, "step": 8026 }, { "epoch": 9.824969400244798, "grad_norm": 2.558987007209238, "learning_rate": 1.33018962406053e-06, "loss": 0.8581, "step": 8027 }, { "epoch": 9.826193390452877, "grad_norm": 1.7708616995895166, "learning_rate": 1.329623273065096e-06, "loss": 1.4242, "step": 8028 }, { "epoch": 9.827417380660954, "grad_norm": 1.5961587348581474, "learning_rate": 1.3290569989824926e-06, "loss": 1.1718, "step": 8029 }, { "epoch": 9.828641370869033, "grad_norm": 1.6152438610091786, "learning_rate": 1.328490801849932e-06, "loss": 1.0163, "step": 8030 }, { "epoch": 9.829865361077111, "grad_norm": 2.2100971686955884, "learning_rate": 1.3279246817046233e-06, "loss": 0.5333, "step": 8031 }, { "epoch": 9.83108935128519, "grad_norm": 2.09330299814667, "learning_rate": 1.3273586385837688e-06, "loss": 0.5388, "step": 8032 }, { "epoch": 9.832313341493268, "grad_norm": 1.2913438100053727, "learning_rate": 1.3267926725245675e-06, "loss": 0.7753, "step": 8033 }, { "epoch": 9.833537331701347, "grad_norm": 2.5680303976268695, "learning_rate": 1.3262267835642122e-06, "loss": 0.4297, "step": 8034 }, { "epoch": 9.834761321909426, "grad_norm": 1.235802409813482, "learning_rate": 1.3256609717398916e-06, "loss": 0.5568, "step": 8035 }, { "epoch": 9.835985312117502, "grad_norm": 1.517519334032845, "learning_rate": 1.3250952370887881e-06, "loss": 1.4044, "step": 8036 }, { "epoch": 9.837209302325581, "grad_norm": 1.5942145995008146, "learning_rate": 1.324529579648079e-06, "loss": 0.929, "step": 8037 }, { "epoch": 9.83843329253366, "grad_norm": 1.5594342229237552, "learning_rate": 1.3239639994549402e-06, "loss": 0.6025, "step": 8038 }, { "epoch": 9.839657282741738, "grad_norm": 2.085534351577223, "learning_rate": 1.3233984965465363e-06, "loss": 0.8503, "step": 8039 }, { "epoch": 9.840881272949817, "grad_norm": 2.340798645266233, "learning_rate": 1.3228330709600311e-06, "loss": 0.5028, "step": 8040 }, { "epoch": 9.842105263157894, "grad_norm": 0.9059008798024042, "learning_rate": 1.322267722732582e-06, "loss": 0.4654, "step": 8041 }, { "epoch": 9.843329253365972, "grad_norm": 1.356303355752724, "learning_rate": 1.3217024519013427e-06, "loss": 0.7781, "step": 8042 }, { "epoch": 9.844553243574051, "grad_norm": 1.1116084051416137, "learning_rate": 1.32113725850346e-06, "loss": 0.5605, "step": 8043 }, { "epoch": 9.84577723378213, "grad_norm": 1.0379963082103314, "learning_rate": 1.3205721425760775e-06, "loss": 0.5931, "step": 8044 }, { "epoch": 9.847001223990208, "grad_norm": 3.014244156687469, "learning_rate": 1.3200071041563295e-06, "loss": 0.3801, "step": 8045 }, { "epoch": 9.848225214198287, "grad_norm": 1.5970972817302973, "learning_rate": 1.319442143281351e-06, "loss": 0.8606, "step": 8046 }, { "epoch": 9.849449204406366, "grad_norm": 1.6243014755566054, "learning_rate": 1.3188772599882683e-06, "loss": 0.4926, "step": 8047 }, { "epoch": 9.850673194614442, "grad_norm": 1.3576925305407561, "learning_rate": 1.318312454314203e-06, "loss": 0.5479, "step": 8048 }, { "epoch": 9.851897184822521, "grad_norm": 2.07896350509473, "learning_rate": 1.3177477262962729e-06, "loss": 0.5163, "step": 8049 }, { "epoch": 9.8531211750306, "grad_norm": 1.1742384952908267, "learning_rate": 1.3171830759715893e-06, "loss": 0.6611, "step": 8050 }, { "epoch": 9.854345165238678, "grad_norm": 1.3390240074058366, "learning_rate": 1.3166185033772589e-06, "loss": 0.5183, "step": 8051 }, { "epoch": 9.855569155446757, "grad_norm": 2.1154485430279633, "learning_rate": 1.3160540085503836e-06, "loss": 1.0176, "step": 8052 }, { "epoch": 9.856793145654835, "grad_norm": 1.6772393558237146, "learning_rate": 1.3154895915280596e-06, "loss": 1.6168, "step": 8053 }, { "epoch": 9.858017135862912, "grad_norm": 1.8609102070246282, "learning_rate": 1.3149252523473773e-06, "loss": 0.5943, "step": 8054 }, { "epoch": 9.859241126070991, "grad_norm": 1.6971387315376707, "learning_rate": 1.3143609910454252e-06, "loss": 0.5034, "step": 8055 }, { "epoch": 9.86046511627907, "grad_norm": 2.590757944734166, "learning_rate": 1.3137968076592847e-06, "loss": 0.4953, "step": 8056 }, { "epoch": 9.861689106487148, "grad_norm": 1.1483565394422681, "learning_rate": 1.3132327022260295e-06, "loss": 0.6951, "step": 8057 }, { "epoch": 9.862913096695227, "grad_norm": 1.3715307972084403, "learning_rate": 1.3126686747827303e-06, "loss": 0.5065, "step": 8058 }, { "epoch": 9.864137086903305, "grad_norm": 1.020385223578941, "learning_rate": 1.3121047253664555e-06, "loss": 0.5123, "step": 8059 }, { "epoch": 9.865361077111384, "grad_norm": 2.0791068406193873, "learning_rate": 1.3115408540142639e-06, "loss": 0.9728, "step": 8060 }, { "epoch": 9.86658506731946, "grad_norm": 1.291137886221936, "learning_rate": 1.3109770607632119e-06, "loss": 0.5921, "step": 8061 }, { "epoch": 9.86780905752754, "grad_norm": 1.995161090573588, "learning_rate": 1.3104133456503504e-06, "loss": 0.4117, "step": 8062 }, { "epoch": 9.869033047735618, "grad_norm": 2.142027805824703, "learning_rate": 1.3098497087127222e-06, "loss": 0.5806, "step": 8063 }, { "epoch": 9.870257037943697, "grad_norm": 1.8066999786471458, "learning_rate": 1.30928614998737e-06, "loss": 0.3917, "step": 8064 }, { "epoch": 9.871481028151775, "grad_norm": 1.6420458653040844, "learning_rate": 1.3087226695113276e-06, "loss": 0.5743, "step": 8065 }, { "epoch": 9.872705018359854, "grad_norm": 1.8625882107139966, "learning_rate": 1.3081592673216252e-06, "loss": 0.5333, "step": 8066 }, { "epoch": 9.87392900856793, "grad_norm": 2.559882595034093, "learning_rate": 1.3075959434552876e-06, "loss": 0.4058, "step": 8067 }, { "epoch": 9.87515299877601, "grad_norm": 1.7933464243498776, "learning_rate": 1.3070326979493338e-06, "loss": 1.0853, "step": 8068 }, { "epoch": 9.876376988984088, "grad_norm": 1.7445095797078758, "learning_rate": 1.306469530840779e-06, "loss": 0.9882, "step": 8069 }, { "epoch": 9.877600979192167, "grad_norm": 2.056815653286675, "learning_rate": 1.305906442166632e-06, "loss": 0.7186, "step": 8070 }, { "epoch": 9.878824969400245, "grad_norm": 1.2407986748266806, "learning_rate": 1.305343431963896e-06, "loss": 0.8955, "step": 8071 }, { "epoch": 9.880048959608324, "grad_norm": 2.759552075835713, "learning_rate": 1.3047805002695718e-06, "loss": 0.4134, "step": 8072 }, { "epoch": 9.8812729498164, "grad_norm": 1.5772786785977546, "learning_rate": 1.3042176471206536e-06, "loss": 0.7017, "step": 8073 }, { "epoch": 9.88249694002448, "grad_norm": 1.12952913792466, "learning_rate": 1.3036548725541274e-06, "loss": 0.5631, "step": 8074 }, { "epoch": 9.883720930232558, "grad_norm": 1.8853864236805709, "learning_rate": 1.3030921766069782e-06, "loss": 0.3928, "step": 8075 }, { "epoch": 9.884944920440637, "grad_norm": 1.5306608325133928, "learning_rate": 1.302529559316183e-06, "loss": 0.4799, "step": 8076 }, { "epoch": 9.886168910648715, "grad_norm": 1.3990669146836523, "learning_rate": 1.3019670207187168e-06, "loss": 0.5331, "step": 8077 }, { "epoch": 9.887392900856794, "grad_norm": 2.73480818433506, "learning_rate": 1.3014045608515474e-06, "loss": 0.4548, "step": 8078 }, { "epoch": 9.88861689106487, "grad_norm": 1.9907632264119175, "learning_rate": 1.3008421797516362e-06, "loss": 1.3199, "step": 8079 }, { "epoch": 9.88984088127295, "grad_norm": 1.0702868473666032, "learning_rate": 1.3002798774559422e-06, "loss": 0.3641, "step": 8080 }, { "epoch": 9.891064871481028, "grad_norm": 2.450308243490088, "learning_rate": 1.299717654001417e-06, "loss": 0.4247, "step": 8081 }, { "epoch": 9.892288861689106, "grad_norm": 0.9317481090065809, "learning_rate": 1.299155509425008e-06, "loss": 0.4284, "step": 8082 }, { "epoch": 9.893512851897185, "grad_norm": 1.446039585315098, "learning_rate": 1.2985934437636571e-06, "loss": 0.5624, "step": 8083 }, { "epoch": 9.894736842105264, "grad_norm": 2.0869902215945944, "learning_rate": 1.2980314570543007e-06, "loss": 0.6139, "step": 8084 }, { "epoch": 9.895960832313342, "grad_norm": 2.371140325055438, "learning_rate": 1.297469549333873e-06, "loss": 0.4909, "step": 8085 }, { "epoch": 9.89718482252142, "grad_norm": 2.0471134093654477, "learning_rate": 1.2969077206392978e-06, "loss": 0.8582, "step": 8086 }, { "epoch": 9.898408812729498, "grad_norm": 1.1215147470462608, "learning_rate": 1.296345971007497e-06, "loss": 1.2, "step": 8087 }, { "epoch": 9.899632802937576, "grad_norm": 1.9422197951080777, "learning_rate": 1.2957843004753872e-06, "loss": 1.2937, "step": 8088 }, { "epoch": 9.900856793145655, "grad_norm": 2.544173687299527, "learning_rate": 1.2952227090798784e-06, "loss": 0.4012, "step": 8089 }, { "epoch": 9.902080783353734, "grad_norm": 2.105821548421532, "learning_rate": 1.2946611968578776e-06, "loss": 1.1894, "step": 8090 }, { "epoch": 9.903304773561812, "grad_norm": 1.5427227474288623, "learning_rate": 1.2940997638462858e-06, "loss": 0.3576, "step": 8091 }, { "epoch": 9.904528763769889, "grad_norm": 1.6426363785752642, "learning_rate": 1.293538410081996e-06, "loss": 0.7311, "step": 8092 }, { "epoch": 9.905752753977968, "grad_norm": 2.3765374565375836, "learning_rate": 1.2929771356018988e-06, "loss": 0.4657, "step": 8093 }, { "epoch": 9.906976744186046, "grad_norm": 1.8893264336677156, "learning_rate": 1.2924159404428804e-06, "loss": 0.5267, "step": 8094 }, { "epoch": 9.908200734394125, "grad_norm": 1.8785081050415589, "learning_rate": 1.2918548246418203e-06, "loss": 0.6874, "step": 8095 }, { "epoch": 9.909424724602204, "grad_norm": 2.106446790910935, "learning_rate": 1.2912937882355922e-06, "loss": 1.134, "step": 8096 }, { "epoch": 9.910648714810282, "grad_norm": 0.9121897954427349, "learning_rate": 1.2907328312610657e-06, "loss": 0.4405, "step": 8097 }, { "epoch": 9.911872705018359, "grad_norm": 1.5801891280845868, "learning_rate": 1.290171953755105e-06, "loss": 0.6424, "step": 8098 }, { "epoch": 9.913096695226438, "grad_norm": 1.5312489742730087, "learning_rate": 1.289611155754568e-06, "loss": 0.4663, "step": 8099 }, { "epoch": 9.914320685434516, "grad_norm": 1.832173899501259, "learning_rate": 1.2890504372963097e-06, "loss": 0.7327, "step": 8100 }, { "epoch": 9.915544675642595, "grad_norm": 1.4342790719848306, "learning_rate": 1.2884897984171774e-06, "loss": 0.6887, "step": 8101 }, { "epoch": 9.916768665850674, "grad_norm": 1.5407238468634554, "learning_rate": 1.2879292391540144e-06, "loss": 0.9194, "step": 8102 }, { "epoch": 9.917992656058752, "grad_norm": 0.7183188872005705, "learning_rate": 1.2873687595436584e-06, "loss": 0.2571, "step": 8103 }, { "epoch": 9.919216646266829, "grad_norm": 2.240760802304668, "learning_rate": 1.286808359622943e-06, "loss": 1.0402, "step": 8104 }, { "epoch": 9.920440636474908, "grad_norm": 2.2041270011574223, "learning_rate": 1.2862480394286943e-06, "loss": 0.4201, "step": 8105 }, { "epoch": 9.921664626682986, "grad_norm": 1.9233089290326, "learning_rate": 1.2856877989977345e-06, "loss": 1.0515, "step": 8106 }, { "epoch": 9.922888616891065, "grad_norm": 2.066078765763395, "learning_rate": 1.2851276383668818e-06, "loss": 1.2067, "step": 8107 }, { "epoch": 9.924112607099143, "grad_norm": 1.72009800882765, "learning_rate": 1.2845675575729472e-06, "loss": 0.4748, "step": 8108 }, { "epoch": 9.925336597307222, "grad_norm": 1.4632596918282041, "learning_rate": 1.2840075566527382e-06, "loss": 0.2737, "step": 8109 }, { "epoch": 9.9265605875153, "grad_norm": 2.2461122903742936, "learning_rate": 1.283447635643053e-06, "loss": 1.021, "step": 8110 }, { "epoch": 9.927784577723378, "grad_norm": 2.640656277716113, "learning_rate": 1.2828877945806906e-06, "loss": 0.5183, "step": 8111 }, { "epoch": 9.929008567931456, "grad_norm": 2.355921390401225, "learning_rate": 1.2823280335024408e-06, "loss": 0.3703, "step": 8112 }, { "epoch": 9.930232558139535, "grad_norm": 1.3900421040770787, "learning_rate": 1.281768352445088e-06, "loss": 1.2829, "step": 8113 }, { "epoch": 9.931456548347613, "grad_norm": 1.1959243631425074, "learning_rate": 1.2812087514454135e-06, "loss": 0.4423, "step": 8114 }, { "epoch": 9.932680538555692, "grad_norm": 0.8582855255436878, "learning_rate": 1.2806492305401922e-06, "loss": 0.3505, "step": 8115 }, { "epoch": 9.93390452876377, "grad_norm": 1.3458561371163833, "learning_rate": 1.280089789766193e-06, "loss": 0.8735, "step": 8116 }, { "epoch": 9.935128518971847, "grad_norm": 1.2534729354487257, "learning_rate": 1.2795304291601806e-06, "loss": 0.4947, "step": 8117 }, { "epoch": 9.936352509179926, "grad_norm": 1.2740451163117554, "learning_rate": 1.2789711487589145e-06, "loss": 0.7332, "step": 8118 }, { "epoch": 9.937576499388005, "grad_norm": 0.9986660541729924, "learning_rate": 1.2784119485991475e-06, "loss": 0.4433, "step": 8119 }, { "epoch": 9.938800489596083, "grad_norm": 1.7607862007543333, "learning_rate": 1.2778528287176303e-06, "loss": 0.4632, "step": 8120 }, { "epoch": 9.940024479804162, "grad_norm": 2.5198665226735155, "learning_rate": 1.277293789151104e-06, "loss": 0.3638, "step": 8121 }, { "epoch": 9.94124847001224, "grad_norm": 1.7615670651280837, "learning_rate": 1.2767348299363075e-06, "loss": 1.4606, "step": 8122 }, { "epoch": 9.94247246022032, "grad_norm": 1.5351733186565144, "learning_rate": 1.2761759511099725e-06, "loss": 1.5444, "step": 8123 }, { "epoch": 9.943696450428396, "grad_norm": 1.1190199926485953, "learning_rate": 1.275617152708828e-06, "loss": 0.5728, "step": 8124 }, { "epoch": 9.944920440636475, "grad_norm": 1.7935902615294652, "learning_rate": 1.2750584347695958e-06, "loss": 0.4428, "step": 8125 }, { "epoch": 9.946144430844553, "grad_norm": 1.6938499208156552, "learning_rate": 1.2744997973289925e-06, "loss": 0.6233, "step": 8126 }, { "epoch": 9.947368421052632, "grad_norm": 2.9855230531314922, "learning_rate": 1.2739412404237306e-06, "loss": 0.4046, "step": 8127 }, { "epoch": 9.94859241126071, "grad_norm": 1.707416395903036, "learning_rate": 1.2733827640905138e-06, "loss": 0.5271, "step": 8128 }, { "epoch": 9.949816401468787, "grad_norm": 1.8332145661999193, "learning_rate": 1.2728243683660455e-06, "loss": 0.6163, "step": 8129 }, { "epoch": 9.951040391676866, "grad_norm": 2.73647573936174, "learning_rate": 1.2722660532870206e-06, "loss": 0.3684, "step": 8130 }, { "epoch": 9.952264381884945, "grad_norm": 2.522206804658083, "learning_rate": 1.27170781889013e-06, "loss": 0.8212, "step": 8131 }, { "epoch": 9.953488372093023, "grad_norm": 1.725992155644301, "learning_rate": 1.2711496652120581e-06, "loss": 0.632, "step": 8132 }, { "epoch": 9.954712362301102, "grad_norm": 1.8060124171612597, "learning_rate": 1.270591592289485e-06, "loss": 1.1746, "step": 8133 }, { "epoch": 9.95593635250918, "grad_norm": 1.9068337294165791, "learning_rate": 1.2700336001590855e-06, "loss": 0.4862, "step": 8134 }, { "epoch": 9.957160342717259, "grad_norm": 1.9872543582078661, "learning_rate": 1.2694756888575283e-06, "loss": 0.6593, "step": 8135 }, { "epoch": 9.958384332925336, "grad_norm": 1.8445177704521656, "learning_rate": 1.2689178584214764e-06, "loss": 0.5101, "step": 8136 }, { "epoch": 9.959608323133414, "grad_norm": 2.615276944257637, "learning_rate": 1.2683601088875903e-06, "loss": 1.2841, "step": 8137 }, { "epoch": 9.960832313341493, "grad_norm": 2.739393669596744, "learning_rate": 1.2678024402925234e-06, "loss": 0.446, "step": 8138 }, { "epoch": 9.962056303549572, "grad_norm": 1.8035775616269512, "learning_rate": 1.2672448526729217e-06, "loss": 0.4265, "step": 8139 }, { "epoch": 9.96328029375765, "grad_norm": 1.3163824057565425, "learning_rate": 1.2666873460654283e-06, "loss": 0.3976, "step": 8140 }, { "epoch": 9.964504283965729, "grad_norm": 2.598975873256679, "learning_rate": 1.2661299205066802e-06, "loss": 0.4378, "step": 8141 }, { "epoch": 9.965728274173806, "grad_norm": 1.6961857512775436, "learning_rate": 1.2655725760333107e-06, "loss": 0.5427, "step": 8142 }, { "epoch": 9.966952264381884, "grad_norm": 1.3930666744594422, "learning_rate": 1.2650153126819458e-06, "loss": 0.5461, "step": 8143 }, { "epoch": 9.968176254589963, "grad_norm": 1.948271497494847, "learning_rate": 1.2644581304892073e-06, "loss": 0.6331, "step": 8144 }, { "epoch": 9.969400244798042, "grad_norm": 1.9450647135590244, "learning_rate": 1.2639010294917089e-06, "loss": 0.9635, "step": 8145 }, { "epoch": 9.97062423500612, "grad_norm": 2.3478983003175617, "learning_rate": 1.2633440097260639e-06, "loss": 0.5526, "step": 8146 }, { "epoch": 9.971848225214199, "grad_norm": 1.5092468577056113, "learning_rate": 1.2627870712288761e-06, "loss": 0.494, "step": 8147 }, { "epoch": 9.973072215422278, "grad_norm": 2.4133926173573963, "learning_rate": 1.2622302140367463e-06, "loss": 0.5415, "step": 8148 }, { "epoch": 9.974296205630354, "grad_norm": 1.288738391285761, "learning_rate": 1.2616734381862684e-06, "loss": 0.5845, "step": 8149 }, { "epoch": 9.975520195838433, "grad_norm": 2.232398482721865, "learning_rate": 1.2611167437140318e-06, "loss": 0.676, "step": 8150 }, { "epoch": 9.976744186046512, "grad_norm": 2.376702996911529, "learning_rate": 1.2605601306566206e-06, "loss": 0.4283, "step": 8151 }, { "epoch": 9.97796817625459, "grad_norm": 1.15348726919268, "learning_rate": 1.2600035990506132e-06, "loss": 0.4457, "step": 8152 }, { "epoch": 9.979192166462669, "grad_norm": 1.514528109000496, "learning_rate": 1.259447148932583e-06, "loss": 0.6075, "step": 8153 }, { "epoch": 9.980416156670747, "grad_norm": 1.3495700232353645, "learning_rate": 1.2588907803390964e-06, "loss": 0.4051, "step": 8154 }, { "epoch": 9.981640146878824, "grad_norm": 1.133810049852892, "learning_rate": 1.2583344933067184e-06, "loss": 0.5689, "step": 8155 }, { "epoch": 9.982864137086903, "grad_norm": 1.5252738752754431, "learning_rate": 1.2577782878720058e-06, "loss": 0.6255, "step": 8156 }, { "epoch": 9.984088127294982, "grad_norm": 2.1745258972358594, "learning_rate": 1.2572221640715086e-06, "loss": 0.4886, "step": 8157 }, { "epoch": 9.98531211750306, "grad_norm": 1.8286749212242521, "learning_rate": 1.2566661219417732e-06, "loss": 1.8838, "step": 8158 }, { "epoch": 9.986536107711139, "grad_norm": 1.6495961404778083, "learning_rate": 1.256110161519342e-06, "loss": 0.3891, "step": 8159 }, { "epoch": 9.987760097919217, "grad_norm": 1.3911984108296396, "learning_rate": 1.2555542828407501e-06, "loss": 1.5603, "step": 8160 }, { "epoch": 9.988984088127294, "grad_norm": 1.1510034179347792, "learning_rate": 1.2549984859425285e-06, "loss": 0.7474, "step": 8161 }, { "epoch": 9.990208078335373, "grad_norm": 1.8815924038221463, "learning_rate": 1.254442770861202e-06, "loss": 0.4201, "step": 8162 }, { "epoch": 9.991432068543451, "grad_norm": 1.9741281136590338, "learning_rate": 1.2538871376332878e-06, "loss": 0.3242, "step": 8163 }, { "epoch": 9.99265605875153, "grad_norm": 1.3254113287515683, "learning_rate": 1.2533315862953027e-06, "loss": 0.5261, "step": 8164 }, { "epoch": 9.993880048959609, "grad_norm": 2.1754619356017253, "learning_rate": 1.2527761168837546e-06, "loss": 0.9569, "step": 8165 }, { "epoch": 9.995104039167687, "grad_norm": 2.4335911602722717, "learning_rate": 1.252220729435147e-06, "loss": 0.5612, "step": 8166 }, { "epoch": 9.996328029375764, "grad_norm": 2.0015933829808867, "learning_rate": 1.251665423985978e-06, "loss": 0.4919, "step": 8167 }, { "epoch": 9.997552019583843, "grad_norm": 1.4686481146738966, "learning_rate": 1.2511102005727399e-06, "loss": 1.0264, "step": 8168 }, { "epoch": 9.998776009791921, "grad_norm": 2.749867258845465, "learning_rate": 1.2505550592319202e-06, "loss": 0.2142, "step": 8169 }, { "epoch": 10.0, "grad_norm": 2.6529243501104967, "learning_rate": 1.2500000000000007e-06, "loss": 0.5056, "step": 8170 }, { "epoch": 10.001223990208079, "grad_norm": 1.5272856604806633, "learning_rate": 1.2494450229134567e-06, "loss": 0.5623, "step": 8171 }, { "epoch": 10.002447980416157, "grad_norm": 1.5543008482735035, "learning_rate": 1.2488901280087613e-06, "loss": 0.5772, "step": 8172 }, { "epoch": 10.003671970624236, "grad_norm": 1.5202782545636946, "learning_rate": 1.2483353153223798e-06, "loss": 0.9298, "step": 8173 }, { "epoch": 10.004895960832313, "grad_norm": 1.5064042870638137, "learning_rate": 1.2477805848907712e-06, "loss": 0.4899, "step": 8174 }, { "epoch": 10.006119951040391, "grad_norm": 1.1641299581266906, "learning_rate": 1.2472259367503909e-06, "loss": 0.4808, "step": 8175 }, { "epoch": 10.00734394124847, "grad_norm": 2.5686894202595267, "learning_rate": 1.2466713709376875e-06, "loss": 0.3473, "step": 8176 }, { "epoch": 10.008567931456549, "grad_norm": 1.4986064961233794, "learning_rate": 1.2461168874891064e-06, "loss": 1.2136, "step": 8177 }, { "epoch": 10.009791921664627, "grad_norm": 2.012337930389982, "learning_rate": 1.245562486441086e-06, "loss": 0.4355, "step": 8178 }, { "epoch": 10.011015911872706, "grad_norm": 2.451759957510325, "learning_rate": 1.245008167830059e-06, "loss": 0.2295, "step": 8179 }, { "epoch": 10.012239902080783, "grad_norm": 1.9988336048301478, "learning_rate": 1.2444539316924534e-06, "loss": 1.138, "step": 8180 }, { "epoch": 10.013463892288861, "grad_norm": 1.385858015401202, "learning_rate": 1.2438997780646914e-06, "loss": 0.5383, "step": 8181 }, { "epoch": 10.01468788249694, "grad_norm": 1.7335782221932536, "learning_rate": 1.2433457069831901e-06, "loss": 0.5225, "step": 8182 }, { "epoch": 10.015911872705018, "grad_norm": 1.2246282604725975, "learning_rate": 1.2427917184843608e-06, "loss": 0.5791, "step": 8183 }, { "epoch": 10.017135862913097, "grad_norm": 1.932138429942333, "learning_rate": 1.2422378126046086e-06, "loss": 0.4143, "step": 8184 }, { "epoch": 10.018359853121176, "grad_norm": 1.0066729571219584, "learning_rate": 1.2416839893803372e-06, "loss": 0.5843, "step": 8185 }, { "epoch": 10.019583843329253, "grad_norm": 2.4721424105251684, "learning_rate": 1.241130248847939e-06, "loss": 0.4168, "step": 8186 }, { "epoch": 10.020807833537331, "grad_norm": 2.1850229102350776, "learning_rate": 1.2405765910438044e-06, "loss": 0.3745, "step": 8187 }, { "epoch": 10.02203182374541, "grad_norm": 1.7509585065173705, "learning_rate": 1.240023016004318e-06, "loss": 1.4018, "step": 8188 }, { "epoch": 10.023255813953488, "grad_norm": 1.4657240349013696, "learning_rate": 1.239469523765858e-06, "loss": 0.453, "step": 8189 }, { "epoch": 10.024479804161567, "grad_norm": 1.622072845639361, "learning_rate": 1.2389161143647992e-06, "loss": 1.6222, "step": 8190 }, { "epoch": 10.025703794369646, "grad_norm": 2.6222760738247426, "learning_rate": 1.2383627878375098e-06, "loss": 0.8949, "step": 8191 }, { "epoch": 10.026927784577722, "grad_norm": 1.787674520191941, "learning_rate": 1.2378095442203509e-06, "loss": 1.5911, "step": 8192 }, { "epoch": 10.028151774785801, "grad_norm": 2.013038694250396, "learning_rate": 1.237256383549679e-06, "loss": 0.3099, "step": 8193 }, { "epoch": 10.02937576499388, "grad_norm": 1.7565197563065815, "learning_rate": 1.236703305861848e-06, "loss": 1.1584, "step": 8194 }, { "epoch": 10.030599755201958, "grad_norm": 3.025333169232765, "learning_rate": 1.2361503111932033e-06, "loss": 0.5436, "step": 8195 }, { "epoch": 10.031823745410037, "grad_norm": 1.1840975399804163, "learning_rate": 1.2355973995800855e-06, "loss": 0.583, "step": 8196 }, { "epoch": 10.033047735618116, "grad_norm": 0.9361176290479428, "learning_rate": 1.2350445710588297e-06, "loss": 0.3903, "step": 8197 }, { "epoch": 10.034271725826194, "grad_norm": 1.2009336767901206, "learning_rate": 1.2344918256657659e-06, "loss": 0.4389, "step": 8198 }, { "epoch": 10.035495716034271, "grad_norm": 2.1754870362520475, "learning_rate": 1.2339391634372185e-06, "loss": 0.554, "step": 8199 }, { "epoch": 10.03671970624235, "grad_norm": 1.9482305066046048, "learning_rate": 1.2333865844095067e-06, "loss": 0.4277, "step": 8200 }, { "epoch": 10.037943696450428, "grad_norm": 1.3754165781652723, "learning_rate": 1.2328340886189436e-06, "loss": 0.4338, "step": 8201 }, { "epoch": 10.039167686658507, "grad_norm": 1.7629831464277703, "learning_rate": 1.2322816761018371e-06, "loss": 0.5256, "step": 8202 }, { "epoch": 10.040391676866586, "grad_norm": 1.3989393514925506, "learning_rate": 1.2317293468944902e-06, "loss": 0.6196, "step": 8203 }, { "epoch": 10.041615667074664, "grad_norm": 2.4474447861156077, "learning_rate": 1.2311771010331995e-06, "loss": 0.6019, "step": 8204 }, { "epoch": 10.042839657282741, "grad_norm": 1.99594135717264, "learning_rate": 1.2306249385542566e-06, "loss": 0.4061, "step": 8205 }, { "epoch": 10.04406364749082, "grad_norm": 1.3853867474988133, "learning_rate": 1.2300728594939468e-06, "loss": 0.5548, "step": 8206 }, { "epoch": 10.045287637698898, "grad_norm": 2.0816188342711506, "learning_rate": 1.2295208638885522e-06, "loss": 0.5453, "step": 8207 }, { "epoch": 10.046511627906977, "grad_norm": 2.491426443448446, "learning_rate": 1.2289689517743475e-06, "loss": 0.3207, "step": 8208 }, { "epoch": 10.047735618115055, "grad_norm": 2.216455255875227, "learning_rate": 1.228417123187603e-06, "loss": 0.9833, "step": 8209 }, { "epoch": 10.048959608323134, "grad_norm": 0.7265104739593882, "learning_rate": 1.2278653781645799e-06, "loss": 0.2565, "step": 8210 }, { "epoch": 10.050183598531211, "grad_norm": 1.6455866797763894, "learning_rate": 1.22731371674154e-06, "loss": 0.7274, "step": 8211 }, { "epoch": 10.05140758873929, "grad_norm": 1.0537036040994, "learning_rate": 1.2267621389547352e-06, "loss": 0.594, "step": 8212 }, { "epoch": 10.052631578947368, "grad_norm": 1.442942957641787, "learning_rate": 1.2262106448404133e-06, "loss": 1.3063, "step": 8213 }, { "epoch": 10.053855569155447, "grad_norm": 1.673018287290156, "learning_rate": 1.2256592344348162e-06, "loss": 0.5001, "step": 8214 }, { "epoch": 10.055079559363525, "grad_norm": 1.9655095847721382, "learning_rate": 1.2251079077741808e-06, "loss": 0.7747, "step": 8215 }, { "epoch": 10.056303549571604, "grad_norm": 2.0685399088264167, "learning_rate": 1.2245566648947383e-06, "loss": 0.4333, "step": 8216 }, { "epoch": 10.057527539779683, "grad_norm": 0.9206153331720969, "learning_rate": 1.224005505832714e-06, "loss": 0.4128, "step": 8217 }, { "epoch": 10.05875152998776, "grad_norm": 1.1826377781750672, "learning_rate": 1.2234544306243288e-06, "loss": 0.8173, "step": 8218 }, { "epoch": 10.059975520195838, "grad_norm": 2.000236319146925, "learning_rate": 1.2229034393057953e-06, "loss": 1.0052, "step": 8219 }, { "epoch": 10.061199510403917, "grad_norm": 2.4806470268029908, "learning_rate": 1.2223525319133264e-06, "loss": 0.4018, "step": 8220 }, { "epoch": 10.062423500611995, "grad_norm": 1.9793296454222111, "learning_rate": 1.2218017084831223e-06, "loss": 0.4916, "step": 8221 }, { "epoch": 10.063647490820074, "grad_norm": 2.252426027766268, "learning_rate": 1.221250969051382e-06, "loss": 1.0335, "step": 8222 }, { "epoch": 10.064871481028153, "grad_norm": 1.0315081804434139, "learning_rate": 1.2207003136542978e-06, "loss": 0.5249, "step": 8223 }, { "epoch": 10.06609547123623, "grad_norm": 1.7496773267011276, "learning_rate": 1.2201497423280578e-06, "loss": 1.1004, "step": 8224 }, { "epoch": 10.067319461444308, "grad_norm": 1.7127974635210463, "learning_rate": 1.2195992551088428e-06, "loss": 0.9317, "step": 8225 }, { "epoch": 10.068543451652387, "grad_norm": 3.0772592770699694, "learning_rate": 1.2190488520328292e-06, "loss": 0.4501, "step": 8226 }, { "epoch": 10.069767441860465, "grad_norm": 1.677476660950609, "learning_rate": 1.2184985331361878e-06, "loss": 0.5169, "step": 8227 }, { "epoch": 10.070991432068544, "grad_norm": 2.6887453707222195, "learning_rate": 1.217948298455081e-06, "loss": 0.4996, "step": 8228 }, { "epoch": 10.072215422276622, "grad_norm": 1.5648063566456674, "learning_rate": 1.217398148025671e-06, "loss": 0.4967, "step": 8229 }, { "epoch": 10.0734394124847, "grad_norm": 1.8136752847791946, "learning_rate": 1.2168480818841107e-06, "loss": 0.4374, "step": 8230 }, { "epoch": 10.074663402692778, "grad_norm": 2.2246621789419283, "learning_rate": 1.2162981000665486e-06, "loss": 0.6307, "step": 8231 }, { "epoch": 10.075887392900857, "grad_norm": 1.825704109735742, "learning_rate": 1.2157482026091272e-06, "loss": 1.085, "step": 8232 }, { "epoch": 10.077111383108935, "grad_norm": 2.4097287098600786, "learning_rate": 1.2151983895479837e-06, "loss": 0.4595, "step": 8233 }, { "epoch": 10.078335373317014, "grad_norm": 1.9328201932020421, "learning_rate": 1.2146486609192498e-06, "loss": 1.1061, "step": 8234 }, { "epoch": 10.079559363525092, "grad_norm": 1.8236755858004243, "learning_rate": 1.2140990167590518e-06, "loss": 0.5444, "step": 8235 }, { "epoch": 10.080783353733171, "grad_norm": 2.1869585634303372, "learning_rate": 1.2135494571035094e-06, "loss": 0.3034, "step": 8236 }, { "epoch": 10.082007343941248, "grad_norm": 3.0280838363090163, "learning_rate": 1.2129999819887396e-06, "loss": 0.6973, "step": 8237 }, { "epoch": 10.083231334149326, "grad_norm": 1.647190943979056, "learning_rate": 1.2124505914508511e-06, "loss": 1.3327, "step": 8238 }, { "epoch": 10.084455324357405, "grad_norm": 2.702466857567919, "learning_rate": 1.2119012855259466e-06, "loss": 0.3638, "step": 8239 }, { "epoch": 10.085679314565484, "grad_norm": 1.7632375628661545, "learning_rate": 1.2113520642501254e-06, "loss": 1.3602, "step": 8240 }, { "epoch": 10.086903304773562, "grad_norm": 2.2592475324561976, "learning_rate": 1.2108029276594794e-06, "loss": 0.462, "step": 8241 }, { "epoch": 10.088127294981641, "grad_norm": 1.7987748992805637, "learning_rate": 1.210253875790097e-06, "loss": 0.728, "step": 8242 }, { "epoch": 10.089351285189718, "grad_norm": 3.519550948084943, "learning_rate": 1.2097049086780599e-06, "loss": 0.4761, "step": 8243 }, { "epoch": 10.090575275397796, "grad_norm": 1.9751605780177988, "learning_rate": 1.2091560263594435e-06, "loss": 0.3184, "step": 8244 }, { "epoch": 10.091799265605875, "grad_norm": 1.6908781106123083, "learning_rate": 1.2086072288703184e-06, "loss": 0.4175, "step": 8245 }, { "epoch": 10.093023255813954, "grad_norm": 1.943353057579902, "learning_rate": 1.20805851624675e-06, "loss": 1.31, "step": 8246 }, { "epoch": 10.094247246022032, "grad_norm": 2.0052688699310344, "learning_rate": 1.2075098885247974e-06, "loss": 0.44, "step": 8247 }, { "epoch": 10.095471236230111, "grad_norm": 1.4785465428453508, "learning_rate": 1.206961345740514e-06, "loss": 1.0817, "step": 8248 }, { "epoch": 10.096695226438188, "grad_norm": 1.490436442253079, "learning_rate": 1.2064128879299486e-06, "loss": 0.6816, "step": 8249 }, { "epoch": 10.097919216646266, "grad_norm": 2.370235088005835, "learning_rate": 1.2058645151291436e-06, "loss": 0.4538, "step": 8250 }, { "epoch": 10.099143206854345, "grad_norm": 2.773333748665765, "learning_rate": 1.205316227374136e-06, "loss": 0.3628, "step": 8251 }, { "epoch": 10.100367197062424, "grad_norm": 2.070182061908845, "learning_rate": 1.204768024700957e-06, "loss": 0.6366, "step": 8252 }, { "epoch": 10.101591187270502, "grad_norm": 1.2789623411138893, "learning_rate": 1.2042199071456329e-06, "loss": 0.7162, "step": 8253 }, { "epoch": 10.10281517747858, "grad_norm": 1.8348475575505871, "learning_rate": 1.2036718747441824e-06, "loss": 0.4543, "step": 8254 }, { "epoch": 10.104039167686658, "grad_norm": 2.3955825246976454, "learning_rate": 1.2031239275326228e-06, "loss": 0.3778, "step": 8255 }, { "epoch": 10.105263157894736, "grad_norm": 1.7767644922741932, "learning_rate": 1.2025760655469629e-06, "loss": 1.3813, "step": 8256 }, { "epoch": 10.106487148102815, "grad_norm": 2.0748755502066083, "learning_rate": 1.202028288823204e-06, "loss": 0.5967, "step": 8257 }, { "epoch": 10.107711138310894, "grad_norm": 1.7294531377421545, "learning_rate": 1.2014805973973443e-06, "loss": 0.8245, "step": 8258 }, { "epoch": 10.108935128518972, "grad_norm": 1.064669276497954, "learning_rate": 1.2009329913053778e-06, "loss": 0.508, "step": 8259 }, { "epoch": 10.11015911872705, "grad_norm": 1.4655805016074455, "learning_rate": 1.2003854705832898e-06, "loss": 0.3339, "step": 8260 }, { "epoch": 10.11138310893513, "grad_norm": 1.1222544637606002, "learning_rate": 1.1998380352670622e-06, "loss": 0.5782, "step": 8261 }, { "epoch": 10.112607099143206, "grad_norm": 1.541785995963906, "learning_rate": 1.1992906853926697e-06, "loss": 0.6895, "step": 8262 }, { "epoch": 10.113831089351285, "grad_norm": 2.450949135177737, "learning_rate": 1.1987434209960824e-06, "loss": 0.973, "step": 8263 }, { "epoch": 10.115055079559363, "grad_norm": 2.0126933828660314, "learning_rate": 1.1981962421132646e-06, "loss": 0.5913, "step": 8264 }, { "epoch": 10.116279069767442, "grad_norm": 1.1095219275690256, "learning_rate": 1.1976491487801747e-06, "loss": 0.5076, "step": 8265 }, { "epoch": 10.11750305997552, "grad_norm": 2.5256076999654327, "learning_rate": 1.1971021410327655e-06, "loss": 0.8521, "step": 8266 }, { "epoch": 10.1187270501836, "grad_norm": 2.1571133615491456, "learning_rate": 1.1965552189069845e-06, "loss": 0.5099, "step": 8267 }, { "epoch": 10.119951040391676, "grad_norm": 3.0704963594359054, "learning_rate": 1.1960083824387736e-06, "loss": 0.4063, "step": 8268 }, { "epoch": 10.121175030599755, "grad_norm": 1.5745353491333403, "learning_rate": 1.1954616316640686e-06, "loss": 0.4764, "step": 8269 }, { "epoch": 10.122399020807833, "grad_norm": 1.380129608422423, "learning_rate": 1.1949149666188e-06, "loss": 0.5026, "step": 8270 }, { "epoch": 10.123623011015912, "grad_norm": 1.2596891997770223, "learning_rate": 1.1943683873388917e-06, "loss": 0.8945, "step": 8271 }, { "epoch": 10.12484700122399, "grad_norm": 2.501873461919243, "learning_rate": 1.1938218938602647e-06, "loss": 0.6069, "step": 8272 }, { "epoch": 10.12607099143207, "grad_norm": 1.8309436780732844, "learning_rate": 1.1932754862188317e-06, "loss": 0.293, "step": 8273 }, { "epoch": 10.127294981640146, "grad_norm": 1.4932878912255487, "learning_rate": 1.1927291644505012e-06, "loss": 0.7847, "step": 8274 }, { "epoch": 10.128518971848225, "grad_norm": 1.8103390603412906, "learning_rate": 1.1921829285911732e-06, "loss": 0.5439, "step": 8275 }, { "epoch": 10.129742962056303, "grad_norm": 3.0065891509859193, "learning_rate": 1.1916367786767467e-06, "loss": 0.3207, "step": 8276 }, { "epoch": 10.130966952264382, "grad_norm": 1.7525682074132436, "learning_rate": 1.1910907147431116e-06, "loss": 0.4537, "step": 8277 }, { "epoch": 10.13219094247246, "grad_norm": 2.80797548226728, "learning_rate": 1.1905447368261535e-06, "loss": 0.5289, "step": 8278 }, { "epoch": 10.13341493268054, "grad_norm": 1.9902141688267647, "learning_rate": 1.1899988449617522e-06, "loss": 0.4123, "step": 8279 }, { "epoch": 10.134638922888616, "grad_norm": 2.500320404453797, "learning_rate": 1.1894530391857812e-06, "loss": 0.4628, "step": 8280 }, { "epoch": 10.135862913096695, "grad_norm": 1.5155202327501696, "learning_rate": 1.188907319534109e-06, "loss": 0.6703, "step": 8281 }, { "epoch": 10.137086903304773, "grad_norm": 2.7700567278648305, "learning_rate": 1.1883616860425986e-06, "loss": 1.0568, "step": 8282 }, { "epoch": 10.138310893512852, "grad_norm": 1.2351288809203242, "learning_rate": 1.1878161387471067e-06, "loss": 0.7676, "step": 8283 }, { "epoch": 10.13953488372093, "grad_norm": 1.6839217340996075, "learning_rate": 1.1872706776834839e-06, "loss": 0.6129, "step": 8284 }, { "epoch": 10.140758873929009, "grad_norm": 1.6982600803549823, "learning_rate": 1.1867253028875784e-06, "loss": 0.4296, "step": 8285 }, { "epoch": 10.141982864137088, "grad_norm": 2.4815063342210366, "learning_rate": 1.1861800143952276e-06, "loss": 0.4088, "step": 8286 }, { "epoch": 10.143206854345165, "grad_norm": 3.1407232694985896, "learning_rate": 1.185634812242267e-06, "loss": 0.3108, "step": 8287 }, { "epoch": 10.144430844553243, "grad_norm": 2.4330693436305153, "learning_rate": 1.1850896964645239e-06, "loss": 0.4856, "step": 8288 }, { "epoch": 10.145654834761322, "grad_norm": 1.0485110549815666, "learning_rate": 1.1845446670978234e-06, "loss": 0.5467, "step": 8289 }, { "epoch": 10.1468788249694, "grad_norm": 1.92814638414238, "learning_rate": 1.1839997241779818e-06, "loss": 0.5174, "step": 8290 }, { "epoch": 10.148102815177479, "grad_norm": 1.1528713881970116, "learning_rate": 1.1834548677408117e-06, "loss": 0.3078, "step": 8291 }, { "epoch": 10.149326805385558, "grad_norm": 2.1399883054346422, "learning_rate": 1.1829100978221173e-06, "loss": 1.1287, "step": 8292 }, { "epoch": 10.150550795593634, "grad_norm": 1.200249753807425, "learning_rate": 1.1823654144576987e-06, "loss": 0.6247, "step": 8293 }, { "epoch": 10.151774785801713, "grad_norm": 2.5688677097325536, "learning_rate": 1.1818208176833524e-06, "loss": 0.9486, "step": 8294 }, { "epoch": 10.152998776009792, "grad_norm": 1.8820273375769543, "learning_rate": 1.1812763075348663e-06, "loss": 0.4824, "step": 8295 }, { "epoch": 10.15422276621787, "grad_norm": 1.3585315496956756, "learning_rate": 1.1807318840480238e-06, "loss": 0.5342, "step": 8296 }, { "epoch": 10.155446756425949, "grad_norm": 2.7689388741900696, "learning_rate": 1.180187547258602e-06, "loss": 0.5641, "step": 8297 }, { "epoch": 10.156670746634028, "grad_norm": 2.6318550499052824, "learning_rate": 1.1796432972023733e-06, "loss": 0.3913, "step": 8298 }, { "epoch": 10.157894736842104, "grad_norm": 1.6261943813738844, "learning_rate": 1.179099133915103e-06, "loss": 0.8579, "step": 8299 }, { "epoch": 10.159118727050183, "grad_norm": 2.680480454729199, "learning_rate": 1.1785550574325524e-06, "loss": 0.4338, "step": 8300 }, { "epoch": 10.160342717258262, "grad_norm": 0.8241020161653674, "learning_rate": 1.1780110677904755e-06, "loss": 0.4332, "step": 8301 }, { "epoch": 10.16156670746634, "grad_norm": 1.631738735687105, "learning_rate": 1.1774671650246204e-06, "loss": 0.5582, "step": 8302 }, { "epoch": 10.162790697674419, "grad_norm": 1.4864583142816723, "learning_rate": 1.176923349170734e-06, "loss": 0.5071, "step": 8303 }, { "epoch": 10.164014687882498, "grad_norm": 2.632384294296717, "learning_rate": 1.17637962026455e-06, "loss": 0.3834, "step": 8304 }, { "epoch": 10.165238678090576, "grad_norm": 1.8159657192021468, "learning_rate": 1.1758359783418016e-06, "loss": 0.9095, "step": 8305 }, { "epoch": 10.166462668298653, "grad_norm": 2.087643538320378, "learning_rate": 1.1752924234382141e-06, "loss": 0.3477, "step": 8306 }, { "epoch": 10.167686658506732, "grad_norm": 2.129387787716989, "learning_rate": 1.1747489555895097e-06, "loss": 0.961, "step": 8307 }, { "epoch": 10.16891064871481, "grad_norm": 1.5299856998826533, "learning_rate": 1.174205574831402e-06, "loss": 0.8902, "step": 8308 }, { "epoch": 10.170134638922889, "grad_norm": 2.527984789059689, "learning_rate": 1.1736622811996016e-06, "loss": 0.4595, "step": 8309 }, { "epoch": 10.171358629130967, "grad_norm": 2.6295948047871867, "learning_rate": 1.173119074729808e-06, "loss": 0.5897, "step": 8310 }, { "epoch": 10.172582619339046, "grad_norm": 1.1382230550274333, "learning_rate": 1.1725759554577217e-06, "loss": 0.4959, "step": 8311 }, { "epoch": 10.173806609547123, "grad_norm": 2.2634845601624667, "learning_rate": 1.1720329234190345e-06, "loss": 0.5457, "step": 8312 }, { "epoch": 10.175030599755202, "grad_norm": 1.7138726695699769, "learning_rate": 1.1714899786494314e-06, "loss": 0.4313, "step": 8313 }, { "epoch": 10.17625458996328, "grad_norm": 1.4522650646617747, "learning_rate": 1.170947121184593e-06, "loss": 0.6163, "step": 8314 }, { "epoch": 10.177478580171359, "grad_norm": 1.2719371254014478, "learning_rate": 1.170404351060194e-06, "loss": 0.5912, "step": 8315 }, { "epoch": 10.178702570379437, "grad_norm": 3.0125555813835847, "learning_rate": 1.1698616683119032e-06, "loss": 0.91, "step": 8316 }, { "epoch": 10.179926560587516, "grad_norm": 1.3275171436275153, "learning_rate": 1.169319072975384e-06, "loss": 0.3945, "step": 8317 }, { "epoch": 10.181150550795593, "grad_norm": 2.695511269484224, "learning_rate": 1.1687765650862931e-06, "loss": 1.5551, "step": 8318 }, { "epoch": 10.182374541003671, "grad_norm": 1.6939889576153784, "learning_rate": 1.1682341446802817e-06, "loss": 1.4512, "step": 8319 }, { "epoch": 10.18359853121175, "grad_norm": 1.8149196040077349, "learning_rate": 1.1676918117929983e-06, "loss": 0.4596, "step": 8320 }, { "epoch": 10.184822521419829, "grad_norm": 1.5573051321121223, "learning_rate": 1.1671495664600804e-06, "loss": 0.3347, "step": 8321 }, { "epoch": 10.186046511627907, "grad_norm": 1.750645673531928, "learning_rate": 1.1666074087171628e-06, "loss": 0.9842, "step": 8322 }, { "epoch": 10.187270501835986, "grad_norm": 2.4020715131216086, "learning_rate": 1.1660653385998738e-06, "loss": 0.8615, "step": 8323 }, { "epoch": 10.188494492044065, "grad_norm": 1.4318909002906755, "learning_rate": 1.1655233561438375e-06, "loss": 0.7981, "step": 8324 }, { "epoch": 10.189718482252141, "grad_norm": 1.1306122936443008, "learning_rate": 1.1649814613846707e-06, "loss": 0.4167, "step": 8325 }, { "epoch": 10.19094247246022, "grad_norm": 1.1640537650362286, "learning_rate": 1.1644396543579842e-06, "loss": 0.5353, "step": 8326 }, { "epoch": 10.192166462668299, "grad_norm": 1.7140831606825007, "learning_rate": 1.1638979350993845e-06, "loss": 1.1736, "step": 8327 }, { "epoch": 10.193390452876377, "grad_norm": 2.2470977508041696, "learning_rate": 1.163356303644469e-06, "loss": 0.6721, "step": 8328 }, { "epoch": 10.194614443084456, "grad_norm": 1.5462304779107077, "learning_rate": 1.162814760028834e-06, "loss": 0.462, "step": 8329 }, { "epoch": 10.195838433292534, "grad_norm": 1.143920943810977, "learning_rate": 1.162273304288067e-06, "loss": 0.5965, "step": 8330 }, { "epoch": 10.197062423500611, "grad_norm": 2.0628194863949485, "learning_rate": 1.1617319364577508e-06, "loss": 0.8453, "step": 8331 }, { "epoch": 10.19828641370869, "grad_norm": 1.3388861742734397, "learning_rate": 1.1611906565734618e-06, "loss": 0.463, "step": 8332 }, { "epoch": 10.199510403916769, "grad_norm": 1.5654123475792425, "learning_rate": 1.1606494646707711e-06, "loss": 1.2579, "step": 8333 }, { "epoch": 10.200734394124847, "grad_norm": 1.8228851029165722, "learning_rate": 1.1601083607852433e-06, "loss": 1.8783, "step": 8334 }, { "epoch": 10.201958384332926, "grad_norm": 1.2500649201025675, "learning_rate": 1.1595673449524384e-06, "loss": 0.5001, "step": 8335 }, { "epoch": 10.203182374541004, "grad_norm": 3.646583771387821, "learning_rate": 1.1590264172079085e-06, "loss": 0.4199, "step": 8336 }, { "epoch": 10.204406364749081, "grad_norm": 2.190469362621497, "learning_rate": 1.1584855775872036e-06, "loss": 1.0131, "step": 8337 }, { "epoch": 10.20563035495716, "grad_norm": 2.3145338175383907, "learning_rate": 1.1579448261258654e-06, "loss": 0.9036, "step": 8338 }, { "epoch": 10.206854345165238, "grad_norm": 1.3557548223714138, "learning_rate": 1.1574041628594288e-06, "loss": 1.088, "step": 8339 }, { "epoch": 10.208078335373317, "grad_norm": 1.5903988371047444, "learning_rate": 1.1568635878234245e-06, "loss": 0.9209, "step": 8340 }, { "epoch": 10.209302325581396, "grad_norm": 1.210914049882735, "learning_rate": 1.1563231010533759e-06, "loss": 0.5301, "step": 8341 }, { "epoch": 10.210526315789474, "grad_norm": 1.0467182141933953, "learning_rate": 1.1557827025848048e-06, "loss": 0.7087, "step": 8342 }, { "epoch": 10.211750305997551, "grad_norm": 1.2897750019549414, "learning_rate": 1.1552423924532221e-06, "loss": 0.7182, "step": 8343 }, { "epoch": 10.21297429620563, "grad_norm": 1.617333055658577, "learning_rate": 1.1547021706941356e-06, "loss": 0.9815, "step": 8344 }, { "epoch": 10.214198286413708, "grad_norm": 1.8299443733612986, "learning_rate": 1.1541620373430466e-06, "loss": 0.6435, "step": 8345 }, { "epoch": 10.215422276621787, "grad_norm": 1.6015166042625728, "learning_rate": 1.1536219924354505e-06, "loss": 0.6386, "step": 8346 }, { "epoch": 10.216646266829866, "grad_norm": 1.699437146476779, "learning_rate": 1.1530820360068373e-06, "loss": 0.5719, "step": 8347 }, { "epoch": 10.217870257037944, "grad_norm": 1.6776732045011649, "learning_rate": 1.1525421680926904e-06, "loss": 0.7138, "step": 8348 }, { "epoch": 10.219094247246023, "grad_norm": 1.6357702570103423, "learning_rate": 1.1520023887284887e-06, "loss": 1.1736, "step": 8349 }, { "epoch": 10.2203182374541, "grad_norm": 1.8831999209550918, "learning_rate": 1.151462697949704e-06, "loss": 0.9931, "step": 8350 }, { "epoch": 10.221542227662178, "grad_norm": 1.912609688705624, "learning_rate": 1.1509230957918031e-06, "loss": 0.7452, "step": 8351 }, { "epoch": 10.222766217870257, "grad_norm": 1.4024398538551932, "learning_rate": 1.1503835822902462e-06, "loss": 0.455, "step": 8352 }, { "epoch": 10.223990208078336, "grad_norm": 0.918758601511904, "learning_rate": 1.1498441574804883e-06, "loss": 0.4639, "step": 8353 }, { "epoch": 10.225214198286414, "grad_norm": 2.5507953502152114, "learning_rate": 1.1493048213979778e-06, "loss": 0.6703, "step": 8354 }, { "epoch": 10.226438188494493, "grad_norm": 1.1059674081198785, "learning_rate": 1.1487655740781593e-06, "loss": 0.5653, "step": 8355 }, { "epoch": 10.22766217870257, "grad_norm": 3.4100362495835177, "learning_rate": 1.1482264155564704e-06, "loss": 0.413, "step": 8356 }, { "epoch": 10.228886168910648, "grad_norm": 3.0376916021455846, "learning_rate": 1.1476873458683407e-06, "loss": 0.5113, "step": 8357 }, { "epoch": 10.230110159118727, "grad_norm": 1.504266767393246, "learning_rate": 1.1471483650491957e-06, "loss": 0.5733, "step": 8358 }, { "epoch": 10.231334149326806, "grad_norm": 2.241040518246681, "learning_rate": 1.1466094731344577e-06, "loss": 0.4514, "step": 8359 }, { "epoch": 10.232558139534884, "grad_norm": 1.8791196078690673, "learning_rate": 1.1460706701595387e-06, "loss": 1.0033, "step": 8360 }, { "epoch": 10.233782129742963, "grad_norm": 1.3942239780713515, "learning_rate": 1.1455319561598478e-06, "loss": 0.4354, "step": 8361 }, { "epoch": 10.23500611995104, "grad_norm": 2.0887322855298183, "learning_rate": 1.1449933311707867e-06, "loss": 0.7139, "step": 8362 }, { "epoch": 10.236230110159118, "grad_norm": 1.1056140553296787, "learning_rate": 1.1444547952277522e-06, "loss": 0.6392, "step": 8363 }, { "epoch": 10.237454100367197, "grad_norm": 2.0290715332498626, "learning_rate": 1.1439163483661348e-06, "loss": 1.0704, "step": 8364 }, { "epoch": 10.238678090575275, "grad_norm": 1.4150767280861785, "learning_rate": 1.143377990621319e-06, "loss": 0.5298, "step": 8365 }, { "epoch": 10.239902080783354, "grad_norm": 1.4170582050575873, "learning_rate": 1.142839722028684e-06, "loss": 0.9999, "step": 8366 }, { "epoch": 10.241126070991433, "grad_norm": 1.4081944044271786, "learning_rate": 1.1423015426236023e-06, "loss": 1.2728, "step": 8367 }, { "epoch": 10.24235006119951, "grad_norm": 0.8783787897048889, "learning_rate": 1.1417634524414418e-06, "loss": 0.3488, "step": 8368 }, { "epoch": 10.243574051407588, "grad_norm": 1.5984788664020013, "learning_rate": 1.1412254515175633e-06, "loss": 0.4962, "step": 8369 }, { "epoch": 10.244798041615667, "grad_norm": 1.987829392514733, "learning_rate": 1.1406875398873224e-06, "loss": 0.5267, "step": 8370 }, { "epoch": 10.246022031823745, "grad_norm": 1.5639521959258251, "learning_rate": 1.140149717586068e-06, "loss": 0.5598, "step": 8371 }, { "epoch": 10.247246022031824, "grad_norm": 1.726152095353863, "learning_rate": 1.139611984649145e-06, "loss": 0.4489, "step": 8372 }, { "epoch": 10.248470012239903, "grad_norm": 2.0206152495520056, "learning_rate": 1.1390743411118907e-06, "loss": 0.419, "step": 8373 }, { "epoch": 10.249694002447981, "grad_norm": 3.3099355533192787, "learning_rate": 1.1385367870096379e-06, "loss": 0.4703, "step": 8374 }, { "epoch": 10.250917992656058, "grad_norm": 1.6844571898281888, "learning_rate": 1.13799932237771e-06, "loss": 0.6211, "step": 8375 }, { "epoch": 10.252141982864137, "grad_norm": 1.1269982840406254, "learning_rate": 1.1374619472514297e-06, "loss": 0.57, "step": 8376 }, { "epoch": 10.253365973072215, "grad_norm": 2.2250008274800535, "learning_rate": 1.1369246616661106e-06, "loss": 0.4441, "step": 8377 }, { "epoch": 10.254589963280294, "grad_norm": 1.596049240652772, "learning_rate": 1.1363874656570612e-06, "loss": 0.4982, "step": 8378 }, { "epoch": 10.255813953488373, "grad_norm": 1.2332760231446624, "learning_rate": 1.1358503592595837e-06, "loss": 1.0409, "step": 8379 }, { "epoch": 10.257037943696451, "grad_norm": 1.6205617388928162, "learning_rate": 1.1353133425089752e-06, "loss": 0.4733, "step": 8380 }, { "epoch": 10.258261933904528, "grad_norm": 1.7478631750592384, "learning_rate": 1.1347764154405262e-06, "loss": 0.9302, "step": 8381 }, { "epoch": 10.259485924112607, "grad_norm": 2.2085936672137376, "learning_rate": 1.1342395780895213e-06, "loss": 0.4659, "step": 8382 }, { "epoch": 10.260709914320685, "grad_norm": 2.3382153726279946, "learning_rate": 1.1337028304912399e-06, "loss": 0.4189, "step": 8383 }, { "epoch": 10.261933904528764, "grad_norm": 1.6922734960258807, "learning_rate": 1.133166172680954e-06, "loss": 1.265, "step": 8384 }, { "epoch": 10.263157894736842, "grad_norm": 1.768841475308521, "learning_rate": 1.1326296046939334e-06, "loss": 1.455, "step": 8385 }, { "epoch": 10.264381884944921, "grad_norm": 2.5550899911098384, "learning_rate": 1.1320931265654366e-06, "loss": 0.3588, "step": 8386 }, { "epoch": 10.265605875152998, "grad_norm": 1.5547804683775273, "learning_rate": 1.1315567383307203e-06, "loss": 0.7176, "step": 8387 }, { "epoch": 10.266829865361077, "grad_norm": 2.3686984188846814, "learning_rate": 1.1310204400250325e-06, "loss": 0.7347, "step": 8388 }, { "epoch": 10.268053855569155, "grad_norm": 2.5896477312817576, "learning_rate": 1.130484231683619e-06, "loss": 0.3931, "step": 8389 }, { "epoch": 10.269277845777234, "grad_norm": 1.4954587944254585, "learning_rate": 1.129948113341716e-06, "loss": 0.3217, "step": 8390 }, { "epoch": 10.270501835985312, "grad_norm": 1.796555392577347, "learning_rate": 1.1294120850345558e-06, "loss": 1.1709, "step": 8391 }, { "epoch": 10.271725826193391, "grad_norm": 1.585830372942829, "learning_rate": 1.1288761467973646e-06, "loss": 0.2952, "step": 8392 }, { "epoch": 10.27294981640147, "grad_norm": 1.610995756386133, "learning_rate": 1.12834029866536e-06, "loss": 0.6241, "step": 8393 }, { "epoch": 10.274173806609546, "grad_norm": 1.819408847372374, "learning_rate": 1.1278045406737584e-06, "loss": 0.2934, "step": 8394 }, { "epoch": 10.275397796817625, "grad_norm": 1.1400419023140156, "learning_rate": 1.127268872857767e-06, "loss": 0.4477, "step": 8395 }, { "epoch": 10.276621787025704, "grad_norm": 1.9444292724840881, "learning_rate": 1.1267332952525875e-06, "loss": 0.4805, "step": 8396 }, { "epoch": 10.277845777233782, "grad_norm": 2.0755990426405178, "learning_rate": 1.1261978078934168e-06, "loss": 0.6689, "step": 8397 }, { "epoch": 10.279069767441861, "grad_norm": 2.942265045004354, "learning_rate": 1.1256624108154449e-06, "loss": 0.8223, "step": 8398 }, { "epoch": 10.28029375764994, "grad_norm": 2.6342641796035897, "learning_rate": 1.1251271040538561e-06, "loss": 0.4195, "step": 8399 }, { "epoch": 10.281517747858016, "grad_norm": 2.805976035031549, "learning_rate": 1.1245918876438286e-06, "loss": 0.4467, "step": 8400 }, { "epoch": 10.282741738066095, "grad_norm": 1.6038462502907047, "learning_rate": 1.124056761620534e-06, "loss": 0.9341, "step": 8401 }, { "epoch": 10.283965728274174, "grad_norm": 2.2813991185786553, "learning_rate": 1.1235217260191406e-06, "loss": 0.5794, "step": 8402 }, { "epoch": 10.285189718482252, "grad_norm": 1.491523221012118, "learning_rate": 1.1229867808748093e-06, "loss": 0.9834, "step": 8403 }, { "epoch": 10.286413708690331, "grad_norm": 1.3781998760823395, "learning_rate": 1.1224519262226923e-06, "loss": 0.6246, "step": 8404 }, { "epoch": 10.28763769889841, "grad_norm": 1.607176538950427, "learning_rate": 1.1219171620979397e-06, "loss": 0.4661, "step": 8405 }, { "epoch": 10.288861689106486, "grad_norm": 1.8489478085080937, "learning_rate": 1.121382488535693e-06, "loss": 0.7074, "step": 8406 }, { "epoch": 10.290085679314565, "grad_norm": 2.1683448190993975, "learning_rate": 1.1208479055710912e-06, "loss": 0.5662, "step": 8407 }, { "epoch": 10.291309669522644, "grad_norm": 1.7655396358419815, "learning_rate": 1.1203134132392637e-06, "loss": 0.4061, "step": 8408 }, { "epoch": 10.292533659730722, "grad_norm": 2.1641590194239546, "learning_rate": 1.1197790115753366e-06, "loss": 0.6096, "step": 8409 }, { "epoch": 10.2937576499388, "grad_norm": 1.2520680820935073, "learning_rate": 1.1192447006144258e-06, "loss": 0.4763, "step": 8410 }, { "epoch": 10.29498164014688, "grad_norm": 1.490846415144508, "learning_rate": 1.1187104803916472e-06, "loss": 0.5454, "step": 8411 }, { "epoch": 10.296205630354958, "grad_norm": 1.7348293392828131, "learning_rate": 1.1181763509421068e-06, "loss": 0.516, "step": 8412 }, { "epoch": 10.297429620563035, "grad_norm": 1.1225321094959664, "learning_rate": 1.1176423123009056e-06, "loss": 0.369, "step": 8413 }, { "epoch": 10.298653610771114, "grad_norm": 2.2556798983018203, "learning_rate": 1.1171083645031385e-06, "loss": 0.4151, "step": 8414 }, { "epoch": 10.299877600979192, "grad_norm": 1.1510259088435155, "learning_rate": 1.116574507583895e-06, "loss": 1.1982, "step": 8415 }, { "epoch": 10.30110159118727, "grad_norm": 1.521151077397079, "learning_rate": 1.1160407415782578e-06, "loss": 0.6505, "step": 8416 }, { "epoch": 10.30232558139535, "grad_norm": 2.565945636579382, "learning_rate": 1.115507066521304e-06, "loss": 0.8568, "step": 8417 }, { "epoch": 10.303549571603428, "grad_norm": 1.8589512097746903, "learning_rate": 1.114973482448105e-06, "loss": 1.0799, "step": 8418 }, { "epoch": 10.304773561811505, "grad_norm": 1.469297712784982, "learning_rate": 1.1144399893937252e-06, "loss": 0.7116, "step": 8419 }, { "epoch": 10.305997552019583, "grad_norm": 1.765756101578622, "learning_rate": 1.1139065873932254e-06, "loss": 0.6298, "step": 8420 }, { "epoch": 10.307221542227662, "grad_norm": 2.0770371119312903, "learning_rate": 1.1133732764816586e-06, "loss": 0.8526, "step": 8421 }, { "epoch": 10.30844553243574, "grad_norm": 1.0403877838731264, "learning_rate": 1.11284005669407e-06, "loss": 0.5178, "step": 8422 }, { "epoch": 10.30966952264382, "grad_norm": 1.5937366936227606, "learning_rate": 1.1123069280655015e-06, "loss": 0.4744, "step": 8423 }, { "epoch": 10.310893512851898, "grad_norm": 1.6450343405730907, "learning_rate": 1.1117738906309902e-06, "loss": 0.5469, "step": 8424 }, { "epoch": 10.312117503059975, "grad_norm": 2.3251226468348043, "learning_rate": 1.1112409444255637e-06, "loss": 0.5172, "step": 8425 }, { "epoch": 10.313341493268053, "grad_norm": 1.726898309581724, "learning_rate": 1.110708089484246e-06, "loss": 0.5295, "step": 8426 }, { "epoch": 10.314565483476132, "grad_norm": 1.3796869583438913, "learning_rate": 1.1101753258420538e-06, "loss": 0.5917, "step": 8427 }, { "epoch": 10.31578947368421, "grad_norm": 2.0245731876288677, "learning_rate": 1.1096426535339985e-06, "loss": 0.4934, "step": 8428 }, { "epoch": 10.31701346389229, "grad_norm": 1.9540321646141319, "learning_rate": 1.1091100725950858e-06, "loss": 0.626, "step": 8429 }, { "epoch": 10.318237454100368, "grad_norm": 1.5745682173155513, "learning_rate": 1.1085775830603144e-06, "loss": 0.5905, "step": 8430 }, { "epoch": 10.319461444308445, "grad_norm": 1.7529256387205177, "learning_rate": 1.1080451849646776e-06, "loss": 0.6553, "step": 8431 }, { "epoch": 10.320685434516523, "grad_norm": 2.293978703904437, "learning_rate": 1.1075128783431626e-06, "loss": 0.4895, "step": 8432 }, { "epoch": 10.321909424724602, "grad_norm": 2.649267730402568, "learning_rate": 1.1069806632307513e-06, "loss": 0.4274, "step": 8433 }, { "epoch": 10.32313341493268, "grad_norm": 1.4920536372724003, "learning_rate": 1.106448539662418e-06, "loss": 0.5254, "step": 8434 }, { "epoch": 10.32435740514076, "grad_norm": 2.5704900251728153, "learning_rate": 1.1059165076731323e-06, "loss": 1.0254, "step": 8435 }, { "epoch": 10.325581395348838, "grad_norm": 1.773703460408595, "learning_rate": 1.1053845672978567e-06, "loss": 0.485, "step": 8436 }, { "epoch": 10.326805385556916, "grad_norm": 3.0486645792149627, "learning_rate": 1.1048527185715497e-06, "loss": 0.4595, "step": 8437 }, { "epoch": 10.328029375764993, "grad_norm": 2.6183222170709097, "learning_rate": 1.1043209615291625e-06, "loss": 0.7839, "step": 8438 }, { "epoch": 10.329253365973072, "grad_norm": 2.180740439115568, "learning_rate": 1.1037892962056387e-06, "loss": 0.483, "step": 8439 }, { "epoch": 10.33047735618115, "grad_norm": 2.0156348519167966, "learning_rate": 1.1032577226359172e-06, "loss": 0.2935, "step": 8440 }, { "epoch": 10.331701346389229, "grad_norm": 2.33427300877486, "learning_rate": 1.1027262408549334e-06, "loss": 0.4488, "step": 8441 }, { "epoch": 10.332925336597308, "grad_norm": 1.649204463122737, "learning_rate": 1.1021948508976122e-06, "loss": 0.4681, "step": 8442 }, { "epoch": 10.334149326805386, "grad_norm": 1.5667401243166743, "learning_rate": 1.1016635527988756e-06, "loss": 0.8094, "step": 8443 }, { "epoch": 10.335373317013463, "grad_norm": 2.5622843850130788, "learning_rate": 1.101132346593638e-06, "loss": 0.5512, "step": 8444 }, { "epoch": 10.336597307221542, "grad_norm": 2.9636747303880218, "learning_rate": 1.1006012323168089e-06, "loss": 0.4133, "step": 8445 }, { "epoch": 10.33782129742962, "grad_norm": 2.40250129964111, "learning_rate": 1.1000702100032903e-06, "loss": 0.537, "step": 8446 }, { "epoch": 10.339045287637699, "grad_norm": 2.090512835030234, "learning_rate": 1.09953927968798e-06, "loss": 0.834, "step": 8447 }, { "epoch": 10.340269277845778, "grad_norm": 2.524454845181693, "learning_rate": 1.0990084414057682e-06, "loss": 0.4771, "step": 8448 }, { "epoch": 10.341493268053856, "grad_norm": 3.0360937480957384, "learning_rate": 1.0984776951915385e-06, "loss": 0.3964, "step": 8449 }, { "epoch": 10.342717258261933, "grad_norm": 1.6879999746982848, "learning_rate": 1.097947041080173e-06, "loss": 0.8505, "step": 8450 }, { "epoch": 10.343941248470012, "grad_norm": 1.1691815199697158, "learning_rate": 1.0974164791065409e-06, "loss": 0.5359, "step": 8451 }, { "epoch": 10.34516523867809, "grad_norm": 2.1462666840432463, "learning_rate": 1.09688600930551e-06, "loss": 0.5126, "step": 8452 }, { "epoch": 10.346389228886169, "grad_norm": 1.8356308652447657, "learning_rate": 1.09635563171194e-06, "loss": 1.3052, "step": 8453 }, { "epoch": 10.347613219094248, "grad_norm": 1.7282599447279108, "learning_rate": 1.0958253463606872e-06, "loss": 1.5167, "step": 8454 }, { "epoch": 10.348837209302326, "grad_norm": 1.668961442470613, "learning_rate": 1.095295153286599e-06, "loss": 1.0814, "step": 8455 }, { "epoch": 10.350061199510403, "grad_norm": 2.595967695496556, "learning_rate": 1.0947650525245181e-06, "loss": 0.5352, "step": 8456 }, { "epoch": 10.351285189718482, "grad_norm": 1.7952438960163517, "learning_rate": 1.09423504410928e-06, "loss": 0.4463, "step": 8457 }, { "epoch": 10.35250917992656, "grad_norm": 2.309562721830781, "learning_rate": 1.0937051280757141e-06, "loss": 0.529, "step": 8458 }, { "epoch": 10.353733170134639, "grad_norm": 1.1468932196982267, "learning_rate": 1.0931753044586465e-06, "loss": 0.6099, "step": 8459 }, { "epoch": 10.354957160342718, "grad_norm": 1.170673848359504, "learning_rate": 1.0926455732928944e-06, "loss": 0.6046, "step": 8460 }, { "epoch": 10.356181150550796, "grad_norm": 1.7780906369407454, "learning_rate": 1.0921159346132697e-06, "loss": 1.5003, "step": 8461 }, { "epoch": 10.357405140758875, "grad_norm": 2.45774667451582, "learning_rate": 1.0915863884545789e-06, "loss": 0.7125, "step": 8462 }, { "epoch": 10.358629130966952, "grad_norm": 1.410064564754691, "learning_rate": 1.0910569348516206e-06, "loss": 0.3992, "step": 8463 }, { "epoch": 10.35985312117503, "grad_norm": 2.3153947208510592, "learning_rate": 1.0905275738391896e-06, "loss": 0.4272, "step": 8464 }, { "epoch": 10.361077111383109, "grad_norm": 2.6092059197116115, "learning_rate": 1.0899983054520732e-06, "loss": 0.3942, "step": 8465 }, { "epoch": 10.362301101591187, "grad_norm": 1.821736934644811, "learning_rate": 1.0894691297250528e-06, "loss": 0.8815, "step": 8466 }, { "epoch": 10.363525091799266, "grad_norm": 1.6484302717646422, "learning_rate": 1.088940046692904e-06, "loss": 0.5622, "step": 8467 }, { "epoch": 10.364749082007345, "grad_norm": 1.6179953938735485, "learning_rate": 1.0884110563903963e-06, "loss": 1.4848, "step": 8468 }, { "epoch": 10.365973072215422, "grad_norm": 1.57620223990001, "learning_rate": 1.0878821588522929e-06, "loss": 0.3544, "step": 8469 }, { "epoch": 10.3671970624235, "grad_norm": 0.5759291107983122, "learning_rate": 1.0873533541133507e-06, "loss": 0.1297, "step": 8470 }, { "epoch": 10.368421052631579, "grad_norm": 3.0648153432349017, "learning_rate": 1.0868246422083203e-06, "loss": 0.4475, "step": 8471 }, { "epoch": 10.369645042839657, "grad_norm": 2.616499137367613, "learning_rate": 1.0862960231719483e-06, "loss": 0.277, "step": 8472 }, { "epoch": 10.370869033047736, "grad_norm": 1.5288367613506255, "learning_rate": 1.0857674970389728e-06, "loss": 0.8205, "step": 8473 }, { "epoch": 10.372093023255815, "grad_norm": 2.0238795409573798, "learning_rate": 1.0852390638441273e-06, "loss": 0.7629, "step": 8474 }, { "epoch": 10.373317013463891, "grad_norm": 1.8461031123382783, "learning_rate": 1.084710723622136e-06, "loss": 0.4531, "step": 8475 }, { "epoch": 10.37454100367197, "grad_norm": 2.2634288277438324, "learning_rate": 1.084182476407722e-06, "loss": 0.8163, "step": 8476 }, { "epoch": 10.375764993880049, "grad_norm": 2.1175401077470717, "learning_rate": 1.0836543222355988e-06, "loss": 0.3873, "step": 8477 }, { "epoch": 10.376988984088127, "grad_norm": 1.9046717863016955, "learning_rate": 1.083126261140475e-06, "loss": 0.475, "step": 8478 }, { "epoch": 10.378212974296206, "grad_norm": 2.381146173054407, "learning_rate": 1.0825982931570527e-06, "loss": 1.1994, "step": 8479 }, { "epoch": 10.379436964504285, "grad_norm": 3.6868159414924055, "learning_rate": 1.082070418320028e-06, "loss": 0.2738, "step": 8480 }, { "epoch": 10.380660954712361, "grad_norm": 2.6036121991170247, "learning_rate": 1.0815426366640908e-06, "loss": 0.6751, "step": 8481 }, { "epoch": 10.38188494492044, "grad_norm": 2.4147349772096485, "learning_rate": 1.081014948223925e-06, "loss": 0.4479, "step": 8482 }, { "epoch": 10.383108935128519, "grad_norm": 1.687735603140283, "learning_rate": 1.080487353034208e-06, "loss": 0.44, "step": 8483 }, { "epoch": 10.384332925336597, "grad_norm": 1.9444074753120237, "learning_rate": 1.079959851129611e-06, "loss": 0.5245, "step": 8484 }, { "epoch": 10.385556915544676, "grad_norm": 2.455571156908325, "learning_rate": 1.0794324425448024e-06, "loss": 0.5175, "step": 8485 }, { "epoch": 10.386780905752754, "grad_norm": 2.2339148445775168, "learning_rate": 1.0789051273144383e-06, "loss": 0.6168, "step": 8486 }, { "epoch": 10.388004895960833, "grad_norm": 2.0290828295761054, "learning_rate": 1.0783779054731726e-06, "loss": 0.7334, "step": 8487 }, { "epoch": 10.38922888616891, "grad_norm": 1.6903125305731184, "learning_rate": 1.0778507770556523e-06, "loss": 0.9273, "step": 8488 }, { "epoch": 10.390452876376989, "grad_norm": 1.9205737346984166, "learning_rate": 1.0773237420965194e-06, "loss": 0.585, "step": 8489 }, { "epoch": 10.391676866585067, "grad_norm": 3.7927525297635896, "learning_rate": 1.0767968006304081e-06, "loss": 0.3788, "step": 8490 }, { "epoch": 10.392900856793146, "grad_norm": 2.240378435208915, "learning_rate": 1.076269952691947e-06, "loss": 0.3499, "step": 8491 }, { "epoch": 10.394124847001224, "grad_norm": 1.4477748483363693, "learning_rate": 1.0757431983157596e-06, "loss": 0.5325, "step": 8492 }, { "epoch": 10.395348837209303, "grad_norm": 1.3874320259018462, "learning_rate": 1.0752165375364593e-06, "loss": 1.5656, "step": 8493 }, { "epoch": 10.39657282741738, "grad_norm": 2.8363639458880465, "learning_rate": 1.0746899703886593e-06, "loss": 0.3526, "step": 8494 }, { "epoch": 10.397796817625458, "grad_norm": 2.138374078096518, "learning_rate": 1.0741634969069626e-06, "loss": 0.5121, "step": 8495 }, { "epoch": 10.399020807833537, "grad_norm": 1.1356686120448567, "learning_rate": 1.0736371171259668e-06, "loss": 0.4703, "step": 8496 }, { "epoch": 10.400244798041616, "grad_norm": 2.751851324266045, "learning_rate": 1.0731108310802644e-06, "loss": 0.5136, "step": 8497 }, { "epoch": 10.401468788249694, "grad_norm": 3.2914142929519596, "learning_rate": 1.07258463880444e-06, "loss": 0.3439, "step": 8498 }, { "epoch": 10.402692778457773, "grad_norm": 1.3125925852514093, "learning_rate": 1.0720585403330735e-06, "loss": 0.5825, "step": 8499 }, { "epoch": 10.403916768665852, "grad_norm": 2.0807189420410306, "learning_rate": 1.0715325357007383e-06, "loss": 0.6547, "step": 8500 }, { "epoch": 10.405140758873928, "grad_norm": 2.1604054101934342, "learning_rate": 1.0710066249420007e-06, "loss": 0.5354, "step": 8501 }, { "epoch": 10.406364749082007, "grad_norm": 2.6356944431209066, "learning_rate": 1.0704808080914228e-06, "loss": 0.3399, "step": 8502 }, { "epoch": 10.407588739290086, "grad_norm": 2.8971470984120393, "learning_rate": 1.0699550851835596e-06, "loss": 0.4209, "step": 8503 }, { "epoch": 10.408812729498164, "grad_norm": 1.0893131853232543, "learning_rate": 1.0694294562529581e-06, "loss": 0.5379, "step": 8504 }, { "epoch": 10.410036719706243, "grad_norm": 2.368927870768551, "learning_rate": 1.0689039213341615e-06, "loss": 0.6693, "step": 8505 }, { "epoch": 10.411260709914322, "grad_norm": 1.618998325175203, "learning_rate": 1.0683784804617048e-06, "loss": 0.5115, "step": 8506 }, { "epoch": 10.412484700122398, "grad_norm": 1.332734202616304, "learning_rate": 1.06785313367012e-06, "loss": 0.4026, "step": 8507 }, { "epoch": 10.413708690330477, "grad_norm": 2.4700827786989406, "learning_rate": 1.0673278809939305e-06, "loss": 1.2786, "step": 8508 }, { "epoch": 10.414932680538556, "grad_norm": 2.163851268908277, "learning_rate": 1.0668027224676541e-06, "loss": 1.0699, "step": 8509 }, { "epoch": 10.416156670746634, "grad_norm": 1.2365144974427893, "learning_rate": 1.0662776581258002e-06, "loss": 0.4284, "step": 8510 }, { "epoch": 10.417380660954713, "grad_norm": 1.853006161828569, "learning_rate": 1.0657526880028765e-06, "loss": 0.4219, "step": 8511 }, { "epoch": 10.418604651162791, "grad_norm": 2.716244962000596, "learning_rate": 1.065227812133381e-06, "loss": 0.9512, "step": 8512 }, { "epoch": 10.419828641370868, "grad_norm": 1.289329501372888, "learning_rate": 1.0647030305518072e-06, "loss": 0.5567, "step": 8513 }, { "epoch": 10.421052631578947, "grad_norm": 1.5825851498524606, "learning_rate": 1.0641783432926412e-06, "loss": 0.4049, "step": 8514 }, { "epoch": 10.422276621787026, "grad_norm": 3.243227755107791, "learning_rate": 1.0636537503903638e-06, "loss": 0.5923, "step": 8515 }, { "epoch": 10.423500611995104, "grad_norm": 2.0180419634127578, "learning_rate": 1.0631292518794493e-06, "loss": 0.4882, "step": 8516 }, { "epoch": 10.424724602203183, "grad_norm": 1.4154607401356551, "learning_rate": 1.0626048477943656e-06, "loss": 0.844, "step": 8517 }, { "epoch": 10.425948592411261, "grad_norm": 2.1397387088960964, "learning_rate": 1.0620805381695748e-06, "loss": 0.5346, "step": 8518 }, { "epoch": 10.427172582619338, "grad_norm": 2.0709679274138977, "learning_rate": 1.0615563230395319e-06, "loss": 0.4063, "step": 8519 }, { "epoch": 10.428396572827417, "grad_norm": 1.827648468172567, "learning_rate": 1.0610322024386873e-06, "loss": 0.5294, "step": 8520 }, { "epoch": 10.429620563035495, "grad_norm": 2.653414524870944, "learning_rate": 1.0605081764014854e-06, "loss": 0.4732, "step": 8521 }, { "epoch": 10.430844553243574, "grad_norm": 1.5313502745050567, "learning_rate": 1.0599842449623608e-06, "loss": 0.6231, "step": 8522 }, { "epoch": 10.432068543451653, "grad_norm": 1.9336528878368633, "learning_rate": 1.0594604081557445e-06, "loss": 0.611, "step": 8523 }, { "epoch": 10.433292533659731, "grad_norm": 2.0299623802001245, "learning_rate": 1.0589366660160627e-06, "loss": 0.5454, "step": 8524 }, { "epoch": 10.43451652386781, "grad_norm": 1.3729119294793166, "learning_rate": 1.0584130185777333e-06, "loss": 0.5235, "step": 8525 }, { "epoch": 10.435740514075887, "grad_norm": 1.2884984921244165, "learning_rate": 1.057889465875168e-06, "loss": 0.8946, "step": 8526 }, { "epoch": 10.436964504283965, "grad_norm": 1.325850595438034, "learning_rate": 1.0573660079427736e-06, "loss": 0.4913, "step": 8527 }, { "epoch": 10.438188494492044, "grad_norm": 1.61263251394551, "learning_rate": 1.0568426448149487e-06, "loss": 1.2441, "step": 8528 }, { "epoch": 10.439412484700123, "grad_norm": 2.090217960503598, "learning_rate": 1.0563193765260874e-06, "loss": 0.7318, "step": 8529 }, { "epoch": 10.440636474908201, "grad_norm": 1.294862815418946, "learning_rate": 1.055796203110577e-06, "loss": 0.7614, "step": 8530 }, { "epoch": 10.44186046511628, "grad_norm": 1.9132575257790458, "learning_rate": 1.0552731246027984e-06, "loss": 0.457, "step": 8531 }, { "epoch": 10.443084455324357, "grad_norm": 1.2814714036606616, "learning_rate": 1.054750141037126e-06, "loss": 0.9842, "step": 8532 }, { "epoch": 10.444308445532435, "grad_norm": 3.4945895559816735, "learning_rate": 1.0542272524479292e-06, "loss": 0.3477, "step": 8533 }, { "epoch": 10.445532435740514, "grad_norm": 1.6280409833084672, "learning_rate": 1.0537044588695698e-06, "loss": 1.0423, "step": 8534 }, { "epoch": 10.446756425948593, "grad_norm": 1.1946593891705124, "learning_rate": 1.0531817603364034e-06, "loss": 0.5404, "step": 8535 }, { "epoch": 10.447980416156671, "grad_norm": 1.5516899306316263, "learning_rate": 1.0526591568827798e-06, "loss": 0.4719, "step": 8536 }, { "epoch": 10.44920440636475, "grad_norm": 1.4791036006385672, "learning_rate": 1.052136648543044e-06, "loss": 0.2714, "step": 8537 }, { "epoch": 10.450428396572827, "grad_norm": 1.6057125036166295, "learning_rate": 1.051614235351533e-06, "loss": 0.5525, "step": 8538 }, { "epoch": 10.451652386780905, "grad_norm": 1.4560796067969188, "learning_rate": 1.0510919173425765e-06, "loss": 1.4085, "step": 8539 }, { "epoch": 10.452876376988984, "grad_norm": 1.3466907811331736, "learning_rate": 1.0505696945504993e-06, "loss": 0.5767, "step": 8540 }, { "epoch": 10.454100367197062, "grad_norm": 1.53148944518406, "learning_rate": 1.0500475670096214e-06, "loss": 0.7133, "step": 8541 }, { "epoch": 10.455324357405141, "grad_norm": 2.408949249335815, "learning_rate": 1.0495255347542544e-06, "loss": 0.3978, "step": 8542 }, { "epoch": 10.45654834761322, "grad_norm": 2.1987864312516665, "learning_rate": 1.0490035978187044e-06, "loss": 0.6024, "step": 8543 }, { "epoch": 10.457772337821297, "grad_norm": 1.073326998015257, "learning_rate": 1.048481756237271e-06, "loss": 0.6294, "step": 8544 }, { "epoch": 10.458996328029375, "grad_norm": 1.1647517581643725, "learning_rate": 1.0479600100442477e-06, "loss": 0.6928, "step": 8545 }, { "epoch": 10.460220318237454, "grad_norm": 1.84873925643271, "learning_rate": 1.047438359273922e-06, "loss": 0.3523, "step": 8546 }, { "epoch": 10.461444308445532, "grad_norm": 1.788973653532664, "learning_rate": 1.046916803960575e-06, "loss": 1.1933, "step": 8547 }, { "epoch": 10.462668298653611, "grad_norm": 1.730655429893381, "learning_rate": 1.0463953441384805e-06, "loss": 0.7487, "step": 8548 }, { "epoch": 10.46389228886169, "grad_norm": 2.448790105052832, "learning_rate": 1.0458739798419069e-06, "loss": 0.5002, "step": 8549 }, { "epoch": 10.465116279069768, "grad_norm": 1.8229118114534826, "learning_rate": 1.0453527111051183e-06, "loss": 0.4708, "step": 8550 }, { "epoch": 10.466340269277845, "grad_norm": 1.255274029461244, "learning_rate": 1.0448315379623686e-06, "loss": 0.6464, "step": 8551 }, { "epoch": 10.467564259485924, "grad_norm": 1.5478496195234754, "learning_rate": 1.044310460447908e-06, "loss": 0.2234, "step": 8552 }, { "epoch": 10.468788249694002, "grad_norm": 2.7132745979968043, "learning_rate": 1.0437894785959785e-06, "loss": 0.3596, "step": 8553 }, { "epoch": 10.470012239902081, "grad_norm": 1.1046243536726776, "learning_rate": 1.0432685924408195e-06, "loss": 0.5939, "step": 8554 }, { "epoch": 10.47123623011016, "grad_norm": 2.0125708444732764, "learning_rate": 1.0427478020166605e-06, "loss": 0.5471, "step": 8555 }, { "epoch": 10.472460220318238, "grad_norm": 1.689604537258621, "learning_rate": 1.0422271073577267e-06, "loss": 0.9057, "step": 8556 }, { "epoch": 10.473684210526315, "grad_norm": 1.8073316984831012, "learning_rate": 1.0417065084982347e-06, "loss": 1.0181, "step": 8557 }, { "epoch": 10.474908200734394, "grad_norm": 2.216652425391418, "learning_rate": 1.041186005472396e-06, "loss": 0.5383, "step": 8558 }, { "epoch": 10.476132190942472, "grad_norm": 1.929161702659491, "learning_rate": 1.0406655983144184e-06, "loss": 0.5542, "step": 8559 }, { "epoch": 10.477356181150551, "grad_norm": 2.380855596928087, "learning_rate": 1.0401452870585e-06, "loss": 0.5901, "step": 8560 }, { "epoch": 10.47858017135863, "grad_norm": 1.9690194840584812, "learning_rate": 1.0396250717388333e-06, "loss": 0.9588, "step": 8561 }, { "epoch": 10.479804161566708, "grad_norm": 2.5635183027017203, "learning_rate": 1.0391049523896058e-06, "loss": 0.6202, "step": 8562 }, { "epoch": 10.481028151774785, "grad_norm": 2.336644706463696, "learning_rate": 1.038584929044997e-06, "loss": 0.8969, "step": 8563 }, { "epoch": 10.482252141982864, "grad_norm": 1.393168657940036, "learning_rate": 1.0380650017391816e-06, "loss": 0.6936, "step": 8564 }, { "epoch": 10.483476132190942, "grad_norm": 2.3988903205596914, "learning_rate": 1.0375451705063266e-06, "loss": 0.4716, "step": 8565 }, { "epoch": 10.48470012239902, "grad_norm": 1.4249118757758763, "learning_rate": 1.037025435380593e-06, "loss": 0.9673, "step": 8566 }, { "epoch": 10.4859241126071, "grad_norm": 2.5041768966981386, "learning_rate": 1.0365057963961378e-06, "loss": 0.4478, "step": 8567 }, { "epoch": 10.487148102815178, "grad_norm": 3.046502042862484, "learning_rate": 1.035986253587109e-06, "loss": 0.3418, "step": 8568 }, { "epoch": 10.488372093023255, "grad_norm": 1.3838253755059224, "learning_rate": 1.035466806987648e-06, "loss": 0.7765, "step": 8569 }, { "epoch": 10.489596083231334, "grad_norm": 1.5085520372116943, "learning_rate": 1.0349474566318915e-06, "loss": 1.1161, "step": 8570 }, { "epoch": 10.490820073439412, "grad_norm": 2.905626955034027, "learning_rate": 1.0344282025539685e-06, "loss": 0.4615, "step": 8571 }, { "epoch": 10.49204406364749, "grad_norm": 1.4505173855674478, "learning_rate": 1.0339090447880038e-06, "loss": 0.6872, "step": 8572 }, { "epoch": 10.49326805385557, "grad_norm": 1.1811144475911761, "learning_rate": 1.033389983368114e-06, "loss": 0.5616, "step": 8573 }, { "epoch": 10.494492044063648, "grad_norm": 1.9184591243342848, "learning_rate": 1.032871018328411e-06, "loss": 1.4877, "step": 8574 }, { "epoch": 10.495716034271727, "grad_norm": 0.9759503101234503, "learning_rate": 1.0323521497029965e-06, "loss": 0.4276, "step": 8575 }, { "epoch": 10.496940024479803, "grad_norm": 1.4413719769243931, "learning_rate": 1.031833377525971e-06, "loss": 1.0234, "step": 8576 }, { "epoch": 10.498164014687882, "grad_norm": 2.404705329617172, "learning_rate": 1.0313147018314254e-06, "loss": 0.3636, "step": 8577 }, { "epoch": 10.49938800489596, "grad_norm": 1.9465073573921237, "learning_rate": 1.0307961226534455e-06, "loss": 0.4249, "step": 8578 }, { "epoch": 10.50061199510404, "grad_norm": 2.7092592426378204, "learning_rate": 1.03027764002611e-06, "loss": 0.3979, "step": 8579 }, { "epoch": 10.501835985312118, "grad_norm": 1.5498379859647287, "learning_rate": 1.0297592539834918e-06, "loss": 0.6045, "step": 8580 }, { "epoch": 10.503059975520197, "grad_norm": 2.033663399477973, "learning_rate": 1.0292409645596577e-06, "loss": 0.5741, "step": 8581 }, { "epoch": 10.504283965728273, "grad_norm": 1.2396010903048145, "learning_rate": 1.0287227717886674e-06, "loss": 0.4689, "step": 8582 }, { "epoch": 10.505507955936352, "grad_norm": 1.4061781072966644, "learning_rate": 1.0282046757045746e-06, "loss": 0.4861, "step": 8583 }, { "epoch": 10.50673194614443, "grad_norm": 1.411176176323051, "learning_rate": 1.027686676341426e-06, "loss": 0.6787, "step": 8584 }, { "epoch": 10.50795593635251, "grad_norm": 2.6524496951075696, "learning_rate": 1.0271687737332648e-06, "loss": 0.4892, "step": 8585 }, { "epoch": 10.509179926560588, "grad_norm": 1.9336231716134678, "learning_rate": 1.0266509679141234e-06, "loss": 0.6238, "step": 8586 }, { "epoch": 10.510403916768666, "grad_norm": 1.908748841258517, "learning_rate": 1.026133258918031e-06, "loss": 0.3384, "step": 8587 }, { "epoch": 10.511627906976745, "grad_norm": 1.0647839552556937, "learning_rate": 1.0256156467790081e-06, "loss": 0.5301, "step": 8588 }, { "epoch": 10.512851897184822, "grad_norm": 1.4737699117872558, "learning_rate": 1.0250981315310725e-06, "loss": 0.4304, "step": 8589 }, { "epoch": 10.5140758873929, "grad_norm": 1.586853684451767, "learning_rate": 1.0245807132082328e-06, "loss": 0.8166, "step": 8590 }, { "epoch": 10.51529987760098, "grad_norm": 2.473245525293707, "learning_rate": 1.0240633918444912e-06, "loss": 0.5376, "step": 8591 }, { "epoch": 10.516523867809058, "grad_norm": 2.3529658151318222, "learning_rate": 1.0235461674738443e-06, "loss": 1.078, "step": 8592 }, { "epoch": 10.517747858017136, "grad_norm": 1.8401405385427956, "learning_rate": 1.0230290401302822e-06, "loss": 1.3448, "step": 8593 }, { "epoch": 10.518971848225215, "grad_norm": 2.472234970532229, "learning_rate": 1.0225120098477887e-06, "loss": 0.4987, "step": 8594 }, { "epoch": 10.520195838433292, "grad_norm": 1.9533341669875603, "learning_rate": 1.0219950766603412e-06, "loss": 1.185, "step": 8595 }, { "epoch": 10.52141982864137, "grad_norm": 1.7165167903007745, "learning_rate": 1.0214782406019102e-06, "loss": 0.731, "step": 8596 }, { "epoch": 10.522643818849449, "grad_norm": 1.6499254116457798, "learning_rate": 1.0209615017064609e-06, "loss": 0.6991, "step": 8597 }, { "epoch": 10.523867809057528, "grad_norm": 1.534988972509923, "learning_rate": 1.0204448600079512e-06, "loss": 0.5498, "step": 8598 }, { "epoch": 10.525091799265606, "grad_norm": 2.7676899603373344, "learning_rate": 1.0199283155403326e-06, "loss": 0.9129, "step": 8599 }, { "epoch": 10.526315789473685, "grad_norm": 2.283960403297424, "learning_rate": 1.0194118683375502e-06, "loss": 0.639, "step": 8600 }, { "epoch": 10.527539779681762, "grad_norm": 1.7311773487278797, "learning_rate": 1.0188955184335434e-06, "loss": 1.0945, "step": 8601 }, { "epoch": 10.52876376988984, "grad_norm": 2.818609803500443, "learning_rate": 1.0183792658622454e-06, "loss": 0.3539, "step": 8602 }, { "epoch": 10.529987760097919, "grad_norm": 1.813462498281762, "learning_rate": 1.0178631106575828e-06, "loss": 1.1873, "step": 8603 }, { "epoch": 10.531211750305998, "grad_norm": 2.384475440937434, "learning_rate": 1.0173470528534738e-06, "loss": 0.5919, "step": 8604 }, { "epoch": 10.532435740514076, "grad_norm": 1.4996834917596134, "learning_rate": 1.0168310924838314e-06, "loss": 0.7503, "step": 8605 }, { "epoch": 10.533659730722155, "grad_norm": 1.4990267717439854, "learning_rate": 1.0163152295825651e-06, "loss": 0.6147, "step": 8606 }, { "epoch": 10.534883720930232, "grad_norm": 1.5818109512131402, "learning_rate": 1.0157994641835737e-06, "loss": 0.5704, "step": 8607 }, { "epoch": 10.53610771113831, "grad_norm": 1.7614122556750675, "learning_rate": 1.015283796320752e-06, "loss": 0.4147, "step": 8608 }, { "epoch": 10.537331701346389, "grad_norm": 2.068939008970077, "learning_rate": 1.0147682260279876e-06, "loss": 0.5738, "step": 8609 }, { "epoch": 10.538555691554468, "grad_norm": 1.5759608332509436, "learning_rate": 1.014252753339162e-06, "loss": 0.7462, "step": 8610 }, { "epoch": 10.539779681762546, "grad_norm": 2.250882510473957, "learning_rate": 1.0137373782881498e-06, "loss": 0.4675, "step": 8611 }, { "epoch": 10.541003671970625, "grad_norm": 1.6639344199171533, "learning_rate": 1.01322210090882e-06, "loss": 0.526, "step": 8612 }, { "epoch": 10.542227662178703, "grad_norm": 2.220784679362921, "learning_rate": 1.0127069212350344e-06, "loss": 0.9196, "step": 8613 }, { "epoch": 10.54345165238678, "grad_norm": 1.300612855856146, "learning_rate": 1.0121918393006488e-06, "loss": 0.7301, "step": 8614 }, { "epoch": 10.544675642594859, "grad_norm": 1.855938439511972, "learning_rate": 1.0116768551395127e-06, "loss": 0.4165, "step": 8615 }, { "epoch": 10.545899632802938, "grad_norm": 1.4739060358793459, "learning_rate": 1.0111619687854685e-06, "loss": 0.7944, "step": 8616 }, { "epoch": 10.547123623011016, "grad_norm": 1.0683053462846368, "learning_rate": 1.010647180272353e-06, "loss": 0.3939, "step": 8617 }, { "epoch": 10.548347613219095, "grad_norm": 1.8663184412326632, "learning_rate": 1.0101324896339962e-06, "loss": 0.486, "step": 8618 }, { "epoch": 10.549571603427173, "grad_norm": 1.3697879279837641, "learning_rate": 1.0096178969042203e-06, "loss": 0.4419, "step": 8619 }, { "epoch": 10.55079559363525, "grad_norm": 1.1819307295203072, "learning_rate": 1.009103402116845e-06, "loss": 0.5986, "step": 8620 }, { "epoch": 10.552019583843329, "grad_norm": 2.8455107984204524, "learning_rate": 1.0085890053056802e-06, "loss": 0.797, "step": 8621 }, { "epoch": 10.553243574051407, "grad_norm": 1.4189818917342532, "learning_rate": 1.0080747065045291e-06, "loss": 0.3661, "step": 8622 }, { "epoch": 10.554467564259486, "grad_norm": 1.4870154723582019, "learning_rate": 1.0075605057471888e-06, "loss": 0.5598, "step": 8623 }, { "epoch": 10.555691554467565, "grad_norm": 3.3156629681534624, "learning_rate": 1.007046403067453e-06, "loss": 0.4379, "step": 8624 }, { "epoch": 10.556915544675643, "grad_norm": 1.618991881944571, "learning_rate": 1.0065323984991055e-06, "loss": 0.7005, "step": 8625 }, { "epoch": 10.55813953488372, "grad_norm": 1.8464091349514562, "learning_rate": 1.0060184920759248e-06, "loss": 0.7448, "step": 8626 }, { "epoch": 10.559363525091799, "grad_norm": 2.70657871212265, "learning_rate": 1.005504683831683e-06, "loss": 0.5006, "step": 8627 }, { "epoch": 10.560587515299877, "grad_norm": 1.3999753808369797, "learning_rate": 1.0049909738001453e-06, "loss": 1.5577, "step": 8628 }, { "epoch": 10.561811505507956, "grad_norm": 2.0426703245408437, "learning_rate": 1.0044773620150714e-06, "loss": 1.5418, "step": 8629 }, { "epoch": 10.563035495716035, "grad_norm": 1.2065476412762617, "learning_rate": 1.0039638485102139e-06, "loss": 0.5026, "step": 8630 }, { "epoch": 10.564259485924113, "grad_norm": 2.0240925844000093, "learning_rate": 1.0034504333193184e-06, "loss": 0.7639, "step": 8631 }, { "epoch": 10.56548347613219, "grad_norm": 1.468703272721381, "learning_rate": 1.0029371164761254e-06, "loss": 1.1587, "step": 8632 }, { "epoch": 10.566707466340269, "grad_norm": 2.7534501882219993, "learning_rate": 1.0024238980143678e-06, "loss": 0.4463, "step": 8633 }, { "epoch": 10.567931456548347, "grad_norm": 2.388153447638572, "learning_rate": 1.0019107779677725e-06, "loss": 0.4443, "step": 8634 }, { "epoch": 10.569155446756426, "grad_norm": 1.3928934982659522, "learning_rate": 1.0013977563700598e-06, "loss": 0.6943, "step": 8635 }, { "epoch": 10.570379436964505, "grad_norm": 1.2741654113455056, "learning_rate": 1.0008848332549425e-06, "loss": 0.3862, "step": 8636 }, { "epoch": 10.571603427172583, "grad_norm": 1.2099311082184236, "learning_rate": 1.0003720086561302e-06, "loss": 0.5721, "step": 8637 }, { "epoch": 10.572827417380662, "grad_norm": 1.3736161263264792, "learning_rate": 9.998592826073229e-07, "loss": 1.0449, "step": 8638 }, { "epoch": 10.574051407588739, "grad_norm": 1.8240067095426356, "learning_rate": 9.993466551422154e-07, "loss": 0.5825, "step": 8639 }, { "epoch": 10.575275397796817, "grad_norm": 3.217571274147327, "learning_rate": 9.988341262944937e-07, "loss": 0.3487, "step": 8640 }, { "epoch": 10.576499388004896, "grad_norm": 1.657034530896635, "learning_rate": 9.983216960978418e-07, "loss": 0.3856, "step": 8641 }, { "epoch": 10.577723378212974, "grad_norm": 1.3708064243403908, "learning_rate": 9.978093645859332e-07, "loss": 1.3304, "step": 8642 }, { "epoch": 10.578947368421053, "grad_norm": 1.580619669799115, "learning_rate": 9.972971317924373e-07, "loss": 0.3792, "step": 8643 }, { "epoch": 10.580171358629132, "grad_norm": 1.3813460874033585, "learning_rate": 9.967849977510159e-07, "loss": 0.5153, "step": 8644 }, { "epoch": 10.581395348837209, "grad_norm": 1.3533727501677353, "learning_rate": 9.962729624953244e-07, "loss": 0.5263, "step": 8645 }, { "epoch": 10.582619339045287, "grad_norm": 1.761467395670106, "learning_rate": 9.957610260590117e-07, "loss": 1.6148, "step": 8646 }, { "epoch": 10.583843329253366, "grad_norm": 0.9766463317212936, "learning_rate": 9.952491884757207e-07, "loss": 0.4469, "step": 8647 }, { "epoch": 10.585067319461444, "grad_norm": 1.8151568760805872, "learning_rate": 9.947374497790873e-07, "loss": 0.5449, "step": 8648 }, { "epoch": 10.586291309669523, "grad_norm": 1.7224527971452135, "learning_rate": 9.942258100027401e-07, "loss": 0.6765, "step": 8649 }, { "epoch": 10.587515299877602, "grad_norm": 1.4977210939235652, "learning_rate": 9.937142691803053e-07, "loss": 0.71, "step": 8650 }, { "epoch": 10.588739290085678, "grad_norm": 1.6944018727479195, "learning_rate": 9.932028273453962e-07, "loss": 0.5209, "step": 8651 }, { "epoch": 10.589963280293757, "grad_norm": 2.2183766839352166, "learning_rate": 9.92691484531624e-07, "loss": 0.5768, "step": 8652 }, { "epoch": 10.591187270501836, "grad_norm": 1.0571109068385465, "learning_rate": 9.921802407725918e-07, "loss": 0.5947, "step": 8653 }, { "epoch": 10.592411260709914, "grad_norm": 2.582221738760998, "learning_rate": 9.916690961018977e-07, "loss": 0.5776, "step": 8654 }, { "epoch": 10.593635250917993, "grad_norm": 2.022265129630802, "learning_rate": 9.911580505531316e-07, "loss": 0.4807, "step": 8655 }, { "epoch": 10.594859241126072, "grad_norm": 1.231615140316117, "learning_rate": 9.906471041598786e-07, "loss": 0.5547, "step": 8656 }, { "epoch": 10.596083231334148, "grad_norm": 2.439320294952316, "learning_rate": 9.901362569557143e-07, "loss": 0.4984, "step": 8657 }, { "epoch": 10.597307221542227, "grad_norm": 1.4848563387223659, "learning_rate": 9.896255089742099e-07, "loss": 1.0723, "step": 8658 }, { "epoch": 10.598531211750306, "grad_norm": 1.5271439562341695, "learning_rate": 9.891148602489312e-07, "loss": 0.8646, "step": 8659 }, { "epoch": 10.599755201958384, "grad_norm": 1.9578350709093246, "learning_rate": 9.886043108134355e-07, "loss": 0.3934, "step": 8660 }, { "epoch": 10.600979192166463, "grad_norm": 1.8049429407361863, "learning_rate": 9.880938607012743e-07, "loss": 1.5604, "step": 8661 }, { "epoch": 10.602203182374542, "grad_norm": 1.366231074971778, "learning_rate": 9.875835099459923e-07, "loss": 0.3659, "step": 8662 }, { "epoch": 10.60342717258262, "grad_norm": 1.440098167803313, "learning_rate": 9.870732585811282e-07, "loss": 0.6654, "step": 8663 }, { "epoch": 10.604651162790697, "grad_norm": 1.9388807147106646, "learning_rate": 9.865631066402138e-07, "loss": 0.4815, "step": 8664 }, { "epoch": 10.605875152998776, "grad_norm": 2.903842649024455, "learning_rate": 9.860530541567739e-07, "loss": 0.7396, "step": 8665 }, { "epoch": 10.607099143206854, "grad_norm": 1.879653336823943, "learning_rate": 9.855431011643266e-07, "loss": 0.5056, "step": 8666 }, { "epoch": 10.608323133414933, "grad_norm": 1.35562382849663, "learning_rate": 9.850332476963861e-07, "loss": 0.5063, "step": 8667 }, { "epoch": 10.609547123623011, "grad_norm": 1.902253485951595, "learning_rate": 9.84523493786458e-07, "loss": 1.3884, "step": 8668 }, { "epoch": 10.61077111383109, "grad_norm": 2.3158541726232773, "learning_rate": 9.840138394680398e-07, "loss": 1.015, "step": 8669 }, { "epoch": 10.611995104039167, "grad_norm": 1.673576376310381, "learning_rate": 9.835042847746246e-07, "loss": 0.5081, "step": 8670 }, { "epoch": 10.613219094247246, "grad_norm": 2.3590375320053547, "learning_rate": 9.82994829739698e-07, "loss": 0.4699, "step": 8671 }, { "epoch": 10.614443084455324, "grad_norm": 1.2295717546046838, "learning_rate": 9.824854743967406e-07, "loss": 0.6836, "step": 8672 }, { "epoch": 10.615667074663403, "grad_norm": 3.2995308503302945, "learning_rate": 9.819762187792256e-07, "loss": 0.3792, "step": 8673 }, { "epoch": 10.616891064871481, "grad_norm": 1.1383697737551428, "learning_rate": 9.814670629206191e-07, "loss": 0.583, "step": 8674 }, { "epoch": 10.61811505507956, "grad_norm": 3.219568404590481, "learning_rate": 9.80958006854379e-07, "loss": 0.6838, "step": 8675 }, { "epoch": 10.619339045287639, "grad_norm": 1.6591554905641195, "learning_rate": 9.80449050613961e-07, "loss": 0.4724, "step": 8676 }, { "epoch": 10.620563035495715, "grad_norm": 1.9098929563326383, "learning_rate": 9.79940194232811e-07, "loss": 0.6928, "step": 8677 }, { "epoch": 10.621787025703794, "grad_norm": 2.2245099002831523, "learning_rate": 9.794314377443688e-07, "loss": 1.1699, "step": 8678 }, { "epoch": 10.623011015911873, "grad_norm": 1.3922823090475078, "learning_rate": 9.789227811820687e-07, "loss": 0.6065, "step": 8679 }, { "epoch": 10.624235006119951, "grad_norm": 1.9929958957257277, "learning_rate": 9.784142245793375e-07, "loss": 0.5309, "step": 8680 }, { "epoch": 10.62545899632803, "grad_norm": 1.74819941451009, "learning_rate": 9.77905767969595e-07, "loss": 0.4952, "step": 8681 }, { "epoch": 10.626682986536107, "grad_norm": 1.3671376813192477, "learning_rate": 9.77397411386256e-07, "loss": 0.6778, "step": 8682 }, { "epoch": 10.627906976744185, "grad_norm": 0.9317743979678326, "learning_rate": 9.768891548627272e-07, "loss": 0.5219, "step": 8683 }, { "epoch": 10.629130966952264, "grad_norm": 1.5656435340587507, "learning_rate": 9.763809984324086e-07, "loss": 0.535, "step": 8684 }, { "epoch": 10.630354957160343, "grad_norm": 2.965781577483978, "learning_rate": 9.758729421286973e-07, "loss": 0.4034, "step": 8685 }, { "epoch": 10.631578947368421, "grad_norm": 2.6088831859974113, "learning_rate": 9.753649859849775e-07, "loss": 1.178, "step": 8686 }, { "epoch": 10.6328029375765, "grad_norm": 1.1440959748705175, "learning_rate": 9.748571300346321e-07, "loss": 0.5074, "step": 8687 }, { "epoch": 10.634026927784578, "grad_norm": 1.3580094608487006, "learning_rate": 9.743493743110342e-07, "loss": 0.3884, "step": 8688 }, { "epoch": 10.635250917992655, "grad_norm": 1.7655060992618419, "learning_rate": 9.73841718847553e-07, "loss": 0.546, "step": 8689 }, { "epoch": 10.636474908200734, "grad_norm": 2.8686640765102758, "learning_rate": 9.733341636775493e-07, "loss": 0.4677, "step": 8690 }, { "epoch": 10.637698898408813, "grad_norm": 1.7772257892961434, "learning_rate": 9.728267088343774e-07, "loss": 0.565, "step": 8691 }, { "epoch": 10.638922888616891, "grad_norm": 0.9469959704166653, "learning_rate": 9.723193543513856e-07, "loss": 0.3448, "step": 8692 }, { "epoch": 10.64014687882497, "grad_norm": 1.2378826468750017, "learning_rate": 9.718121002619157e-07, "loss": 0.8448, "step": 8693 }, { "epoch": 10.641370869033048, "grad_norm": 2.6448299000974993, "learning_rate": 9.71304946599302e-07, "loss": 0.4008, "step": 8694 }, { "epoch": 10.642594859241125, "grad_norm": 2.318332669822744, "learning_rate": 9.707978933968728e-07, "loss": 1.0312, "step": 8695 }, { "epoch": 10.643818849449204, "grad_norm": 2.776212019999017, "learning_rate": 9.702909406879502e-07, "loss": 0.5121, "step": 8696 }, { "epoch": 10.645042839657282, "grad_norm": 1.8698647609886485, "learning_rate": 9.697840885058488e-07, "loss": 1.0071, "step": 8697 }, { "epoch": 10.646266829865361, "grad_norm": 1.2958359610298558, "learning_rate": 9.692773368838771e-07, "loss": 0.6018, "step": 8698 }, { "epoch": 10.64749082007344, "grad_norm": 2.6661637862725946, "learning_rate": 9.687706858553373e-07, "loss": 0.7899, "step": 8699 }, { "epoch": 10.648714810281518, "grad_norm": 1.5283666102224267, "learning_rate": 9.682641354535244e-07, "loss": 0.6181, "step": 8700 }, { "epoch": 10.649938800489597, "grad_norm": 1.5805063943634743, "learning_rate": 9.67757685711726e-07, "loss": 0.4633, "step": 8701 }, { "epoch": 10.651162790697674, "grad_norm": 1.5391531593192282, "learning_rate": 9.67251336663226e-07, "loss": 0.4031, "step": 8702 }, { "epoch": 10.652386780905752, "grad_norm": 1.612859561142017, "learning_rate": 9.667450883413e-07, "loss": 1.1694, "step": 8703 }, { "epoch": 10.653610771113831, "grad_norm": 1.3764455918373912, "learning_rate": 9.662389407792144e-07, "loss": 0.8637, "step": 8704 }, { "epoch": 10.65483476132191, "grad_norm": 1.452150281339315, "learning_rate": 9.65732894010232e-07, "loss": 0.8133, "step": 8705 }, { "epoch": 10.656058751529988, "grad_norm": 1.5110945313541537, "learning_rate": 9.6522694806761e-07, "loss": 1.1624, "step": 8706 }, { "epoch": 10.657282741738067, "grad_norm": 2.0113920849889753, "learning_rate": 9.64721102984596e-07, "loss": 0.458, "step": 8707 }, { "epoch": 10.658506731946144, "grad_norm": 2.4339823139710033, "learning_rate": 9.642153587944324e-07, "loss": 0.5764, "step": 8708 }, { "epoch": 10.659730722154222, "grad_norm": 1.7438866063595608, "learning_rate": 9.637097155303552e-07, "loss": 0.5396, "step": 8709 }, { "epoch": 10.660954712362301, "grad_norm": 1.6362915933231317, "learning_rate": 9.632041732255932e-07, "loss": 1.2278, "step": 8710 }, { "epoch": 10.66217870257038, "grad_norm": 1.888453850848573, "learning_rate": 9.626987319133684e-07, "loss": 1.105, "step": 8711 }, { "epoch": 10.663402692778458, "grad_norm": 1.3515672725581926, "learning_rate": 9.621933916268973e-07, "loss": 0.7372, "step": 8712 }, { "epoch": 10.664626682986537, "grad_norm": 1.3560359014460965, "learning_rate": 9.616881523993882e-07, "loss": 0.6713, "step": 8713 }, { "epoch": 10.665850673194614, "grad_norm": 1.5960417068658548, "learning_rate": 9.61183014264044e-07, "loss": 0.4691, "step": 8714 }, { "epoch": 10.667074663402692, "grad_norm": 1.370564965462065, "learning_rate": 9.606779772540603e-07, "loss": 0.5446, "step": 8715 }, { "epoch": 10.668298653610771, "grad_norm": 1.6866036025622224, "learning_rate": 9.601730414026264e-07, "loss": 0.6175, "step": 8716 }, { "epoch": 10.66952264381885, "grad_norm": 1.6359636711028158, "learning_rate": 9.59668206742925e-07, "loss": 1.6362, "step": 8717 }, { "epoch": 10.670746634026928, "grad_norm": 1.1278711875563878, "learning_rate": 9.591634733081307e-07, "loss": 0.5577, "step": 8718 }, { "epoch": 10.671970624235007, "grad_norm": 1.6534614343301133, "learning_rate": 9.586588411314145e-07, "loss": 0.489, "step": 8719 }, { "epoch": 10.673194614443084, "grad_norm": 3.172574123442707, "learning_rate": 9.581543102459386e-07, "loss": 0.4094, "step": 8720 }, { "epoch": 10.674418604651162, "grad_norm": 2.3492284913908716, "learning_rate": 9.576498806848592e-07, "loss": 0.6058, "step": 8721 }, { "epoch": 10.67564259485924, "grad_norm": 2.54798326735156, "learning_rate": 9.571455524813241e-07, "loss": 0.8169, "step": 8722 }, { "epoch": 10.67686658506732, "grad_norm": 2.9656438906393596, "learning_rate": 9.566413256684756e-07, "loss": 1.0203, "step": 8723 }, { "epoch": 10.678090575275398, "grad_norm": 1.6969211698690392, "learning_rate": 9.56137200279452e-07, "loss": 0.304, "step": 8724 }, { "epoch": 10.679314565483477, "grad_norm": 2.0417853280984164, "learning_rate": 9.556331763473808e-07, "loss": 1.0336, "step": 8725 }, { "epoch": 10.680538555691555, "grad_norm": 1.5624515352290547, "learning_rate": 9.551292539053851e-07, "loss": 0.6112, "step": 8726 }, { "epoch": 10.681762545899632, "grad_norm": 2.223770373062648, "learning_rate": 9.546254329865808e-07, "loss": 0.4748, "step": 8727 }, { "epoch": 10.68298653610771, "grad_norm": 2.184711110756065, "learning_rate": 9.541217136240773e-07, "loss": 0.6746, "step": 8728 }, { "epoch": 10.68421052631579, "grad_norm": 1.5391959924246308, "learning_rate": 9.536180958509769e-07, "loss": 2.2782, "step": 8729 }, { "epoch": 10.685434516523868, "grad_norm": 1.224400020218761, "learning_rate": 9.531145797003754e-07, "loss": 0.8429, "step": 8730 }, { "epoch": 10.686658506731947, "grad_norm": 2.179263562608608, "learning_rate": 9.526111652053613e-07, "loss": 0.3948, "step": 8731 }, { "epoch": 10.687882496940025, "grad_norm": 1.6256546591022707, "learning_rate": 9.521078523990199e-07, "loss": 1.0133, "step": 8732 }, { "epoch": 10.689106487148102, "grad_norm": 1.3739845897566165, "learning_rate": 9.516046413144242e-07, "loss": 0.5003, "step": 8733 }, { "epoch": 10.69033047735618, "grad_norm": 1.6591511978993947, "learning_rate": 9.511015319846445e-07, "loss": 0.4535, "step": 8734 }, { "epoch": 10.69155446756426, "grad_norm": 1.0952900532277625, "learning_rate": 9.505985244427432e-07, "loss": 0.5631, "step": 8735 }, { "epoch": 10.692778457772338, "grad_norm": 1.1330731040306519, "learning_rate": 9.50095618721775e-07, "loss": 0.8472, "step": 8736 }, { "epoch": 10.694002447980417, "grad_norm": 2.467991523325283, "learning_rate": 9.495928148547909e-07, "loss": 0.3958, "step": 8737 }, { "epoch": 10.695226438188495, "grad_norm": 3.135349117273822, "learning_rate": 9.490901128748325e-07, "loss": 0.3747, "step": 8738 }, { "epoch": 10.696450428396572, "grad_norm": 1.5127759286309204, "learning_rate": 9.485875128149364e-07, "loss": 0.6041, "step": 8739 }, { "epoch": 10.69767441860465, "grad_norm": 1.2177962501401907, "learning_rate": 9.480850147081289e-07, "loss": 0.66, "step": 8740 }, { "epoch": 10.69889840881273, "grad_norm": 1.8812683319909893, "learning_rate": 9.475826185874348e-07, "loss": 1.0097, "step": 8741 }, { "epoch": 10.700122399020808, "grad_norm": 1.6790218184462655, "learning_rate": 9.470803244858692e-07, "loss": 0.3713, "step": 8742 }, { "epoch": 10.701346389228886, "grad_norm": 2.038076377758818, "learning_rate": 9.465781324364409e-07, "loss": 0.6149, "step": 8743 }, { "epoch": 10.702570379436965, "grad_norm": 1.9828769609315426, "learning_rate": 9.460760424721519e-07, "loss": 0.727, "step": 8744 }, { "epoch": 10.703794369645042, "grad_norm": 1.6657334342773669, "learning_rate": 9.455740546259978e-07, "loss": 0.552, "step": 8745 }, { "epoch": 10.70501835985312, "grad_norm": 1.1585240425362378, "learning_rate": 9.450721689309675e-07, "loss": 0.6193, "step": 8746 }, { "epoch": 10.7062423500612, "grad_norm": 2.5833912737448155, "learning_rate": 9.445703854200428e-07, "loss": 0.4306, "step": 8747 }, { "epoch": 10.707466340269278, "grad_norm": 1.595504238547596, "learning_rate": 9.440687041261992e-07, "loss": 0.6001, "step": 8748 }, { "epoch": 10.708690330477356, "grad_norm": 1.1162940326982573, "learning_rate": 9.435671250824047e-07, "loss": 0.6395, "step": 8749 }, { "epoch": 10.709914320685435, "grad_norm": 0.9404529085916344, "learning_rate": 9.430656483216235e-07, "loss": 0.4394, "step": 8750 }, { "epoch": 10.711138310893514, "grad_norm": 2.6530022625131493, "learning_rate": 9.425642738768085e-07, "loss": 0.463, "step": 8751 }, { "epoch": 10.71236230110159, "grad_norm": 2.076440950055686, "learning_rate": 9.420630017809088e-07, "loss": 0.3832, "step": 8752 }, { "epoch": 10.713586291309669, "grad_norm": 1.4711210512883865, "learning_rate": 9.415618320668654e-07, "loss": 0.6273, "step": 8753 }, { "epoch": 10.714810281517748, "grad_norm": 2.9160325414065373, "learning_rate": 9.410607647676148e-07, "loss": 0.9598, "step": 8754 }, { "epoch": 10.716034271725826, "grad_norm": 2.1946812462566205, "learning_rate": 9.405597999160848e-07, "loss": 0.5048, "step": 8755 }, { "epoch": 10.717258261933905, "grad_norm": 2.3031194982500556, "learning_rate": 9.400589375451966e-07, "loss": 0.349, "step": 8756 }, { "epoch": 10.718482252141984, "grad_norm": 2.299966434251559, "learning_rate": 9.395581776878654e-07, "loss": 0.3265, "step": 8757 }, { "epoch": 10.71970624235006, "grad_norm": 1.8615364536448726, "learning_rate": 9.390575203769989e-07, "loss": 0.3894, "step": 8758 }, { "epoch": 10.720930232558139, "grad_norm": 1.1178188590796985, "learning_rate": 9.385569656454988e-07, "loss": 0.6617, "step": 8759 }, { "epoch": 10.722154222766218, "grad_norm": 3.5308817712691423, "learning_rate": 9.380565135262595e-07, "loss": 0.4249, "step": 8760 }, { "epoch": 10.723378212974296, "grad_norm": 1.5098062180095784, "learning_rate": 9.375561640521688e-07, "loss": 0.8715, "step": 8761 }, { "epoch": 10.724602203182375, "grad_norm": 1.090429577935605, "learning_rate": 9.370559172561081e-07, "loss": 0.5364, "step": 8762 }, { "epoch": 10.725826193390454, "grad_norm": 1.882517841787687, "learning_rate": 9.365557731709515e-07, "loss": 1.2128, "step": 8763 }, { "epoch": 10.727050183598532, "grad_norm": 2.415780578243912, "learning_rate": 9.360557318295666e-07, "loss": 0.4227, "step": 8764 }, { "epoch": 10.728274173806609, "grad_norm": 1.4104449616197556, "learning_rate": 9.355557932648143e-07, "loss": 1.1487, "step": 8765 }, { "epoch": 10.729498164014688, "grad_norm": 2.0027611719237717, "learning_rate": 9.350559575095483e-07, "loss": 0.7228, "step": 8766 }, { "epoch": 10.730722154222766, "grad_norm": 2.2556242479587594, "learning_rate": 9.345562245966169e-07, "loss": 0.4852, "step": 8767 }, { "epoch": 10.731946144430845, "grad_norm": 1.6321913971734976, "learning_rate": 9.340565945588614e-07, "loss": 0.4883, "step": 8768 }, { "epoch": 10.733170134638923, "grad_norm": 1.4217494546735152, "learning_rate": 9.335570674291136e-07, "loss": 0.4334, "step": 8769 }, { "epoch": 10.734394124847, "grad_norm": 1.933582147510335, "learning_rate": 9.330576432402014e-07, "loss": 0.4601, "step": 8770 }, { "epoch": 10.735618115055079, "grad_norm": 3.2266533748973805, "learning_rate": 9.325583220249443e-07, "loss": 0.4592, "step": 8771 }, { "epoch": 10.736842105263158, "grad_norm": 2.0951110288330064, "learning_rate": 9.320591038161575e-07, "loss": 1.119, "step": 8772 }, { "epoch": 10.738066095471236, "grad_norm": 1.7199314392050755, "learning_rate": 9.31559988646647e-07, "loss": 1.3985, "step": 8773 }, { "epoch": 10.739290085679315, "grad_norm": 1.470400044241514, "learning_rate": 9.310609765492137e-07, "loss": 0.5014, "step": 8774 }, { "epoch": 10.740514075887393, "grad_norm": 2.123458907503255, "learning_rate": 9.305620675566482e-07, "loss": 1.1858, "step": 8775 }, { "epoch": 10.741738066095472, "grad_norm": 2.572402597969765, "learning_rate": 9.300632617017396e-07, "loss": 0.5211, "step": 8776 }, { "epoch": 10.742962056303549, "grad_norm": 2.291279385010221, "learning_rate": 9.295645590172664e-07, "loss": 0.5232, "step": 8777 }, { "epoch": 10.744186046511627, "grad_norm": 1.9766594689003147, "learning_rate": 9.290659595360019e-07, "loss": 0.359, "step": 8778 }, { "epoch": 10.745410036719706, "grad_norm": 1.0969312860822165, "learning_rate": 9.285674632907121e-07, "loss": 0.6795, "step": 8779 }, { "epoch": 10.746634026927785, "grad_norm": 3.041037146917913, "learning_rate": 9.280690703141562e-07, "loss": 0.574, "step": 8780 }, { "epoch": 10.747858017135863, "grad_norm": 2.6418426451660504, "learning_rate": 9.275707806390871e-07, "loss": 0.7262, "step": 8781 }, { "epoch": 10.749082007343942, "grad_norm": 1.3024918877985507, "learning_rate": 9.2707259429825e-07, "loss": 0.9748, "step": 8782 }, { "epoch": 10.750305997552019, "grad_norm": 1.518329070600029, "learning_rate": 9.26574511324384e-07, "loss": 0.9156, "step": 8783 }, { "epoch": 10.751529987760097, "grad_norm": 1.1663899523561658, "learning_rate": 9.26076531750221e-07, "loss": 0.6702, "step": 8784 }, { "epoch": 10.752753977968176, "grad_norm": 1.3815846376700105, "learning_rate": 9.255786556084872e-07, "loss": 0.4513, "step": 8785 }, { "epoch": 10.753977968176255, "grad_norm": 3.7988457668781908, "learning_rate": 9.250808829319019e-07, "loss": 0.3555, "step": 8786 }, { "epoch": 10.755201958384333, "grad_norm": 1.8765315770441213, "learning_rate": 9.245832137531749e-07, "loss": 0.524, "step": 8787 }, { "epoch": 10.756425948592412, "grad_norm": 1.1778036011934452, "learning_rate": 9.240856481050111e-07, "loss": 0.5702, "step": 8788 }, { "epoch": 10.75764993880049, "grad_norm": 1.2883975459490684, "learning_rate": 9.235881860201104e-07, "loss": 0.5189, "step": 8789 }, { "epoch": 10.758873929008567, "grad_norm": 2.406356224106745, "learning_rate": 9.230908275311636e-07, "loss": 0.5474, "step": 8790 }, { "epoch": 10.760097919216646, "grad_norm": 1.4574476652097625, "learning_rate": 9.225935726708549e-07, "loss": 0.544, "step": 8791 }, { "epoch": 10.761321909424725, "grad_norm": 1.9744534664717306, "learning_rate": 9.220964214718622e-07, "loss": 0.3978, "step": 8792 }, { "epoch": 10.762545899632803, "grad_norm": 2.9176588876532046, "learning_rate": 9.215993739668563e-07, "loss": 0.3957, "step": 8793 }, { "epoch": 10.763769889840882, "grad_norm": 2.6633189263727477, "learning_rate": 9.211024301885016e-07, "loss": 0.6574, "step": 8794 }, { "epoch": 10.76499388004896, "grad_norm": 2.146300720611982, "learning_rate": 9.206055901694553e-07, "loss": 1.1045, "step": 8795 }, { "epoch": 10.766217870257037, "grad_norm": 1.581904086848936, "learning_rate": 9.201088539423675e-07, "loss": 0.5691, "step": 8796 }, { "epoch": 10.767441860465116, "grad_norm": 1.3893280587446335, "learning_rate": 9.196122215398825e-07, "loss": 0.8733, "step": 8797 }, { "epoch": 10.768665850673194, "grad_norm": 2.167982067488335, "learning_rate": 9.191156929946368e-07, "loss": 0.8206, "step": 8798 }, { "epoch": 10.769889840881273, "grad_norm": 3.277981078219065, "learning_rate": 9.186192683392603e-07, "loss": 0.5216, "step": 8799 }, { "epoch": 10.771113831089352, "grad_norm": 2.014471068521743, "learning_rate": 9.181229476063766e-07, "loss": 0.7658, "step": 8800 }, { "epoch": 10.77233782129743, "grad_norm": 2.5278238818525827, "learning_rate": 9.176267308286008e-07, "loss": 0.5576, "step": 8801 }, { "epoch": 10.773561811505507, "grad_norm": 1.43693889345621, "learning_rate": 9.171306180385445e-07, "loss": 0.7228, "step": 8802 }, { "epoch": 10.774785801713586, "grad_norm": 2.9054407027925486, "learning_rate": 9.166346092688099e-07, "loss": 0.6864, "step": 8803 }, { "epoch": 10.776009791921664, "grad_norm": 1.1486155007271546, "learning_rate": 9.161387045519918e-07, "loss": 0.6146, "step": 8804 }, { "epoch": 10.777233782129743, "grad_norm": 2.0138202757613364, "learning_rate": 9.156429039206785e-07, "loss": 0.5953, "step": 8805 }, { "epoch": 10.778457772337822, "grad_norm": 1.9856028274706146, "learning_rate": 9.151472074074547e-07, "loss": 1.2901, "step": 8806 }, { "epoch": 10.7796817625459, "grad_norm": 2.017828033716331, "learning_rate": 9.146516150448942e-07, "loss": 0.3913, "step": 8807 }, { "epoch": 10.780905752753977, "grad_norm": 1.0026918813187888, "learning_rate": 9.141561268655658e-07, "loss": 0.5101, "step": 8808 }, { "epoch": 10.782129742962056, "grad_norm": 1.8570817100163473, "learning_rate": 9.136607429020311e-07, "loss": 0.708, "step": 8809 }, { "epoch": 10.783353733170134, "grad_norm": 1.468635623858812, "learning_rate": 9.131654631868451e-07, "loss": 0.5578, "step": 8810 }, { "epoch": 10.784577723378213, "grad_norm": 1.750419702061781, "learning_rate": 9.126702877525554e-07, "loss": 0.6298, "step": 8811 }, { "epoch": 10.785801713586292, "grad_norm": 3.095617502427146, "learning_rate": 9.121752166317033e-07, "loss": 0.3049, "step": 8812 }, { "epoch": 10.78702570379437, "grad_norm": 2.48443147336221, "learning_rate": 9.116802498568231e-07, "loss": 1.0162, "step": 8813 }, { "epoch": 10.788249694002449, "grad_norm": 3.3719305723288735, "learning_rate": 9.11185387460441e-07, "loss": 0.354, "step": 8814 }, { "epoch": 10.789473684210526, "grad_norm": 1.4615253356058637, "learning_rate": 9.106906294750806e-07, "loss": 1.1294, "step": 8815 }, { "epoch": 10.790697674418604, "grad_norm": 2.0793834375042377, "learning_rate": 9.101959759332524e-07, "loss": 0.6516, "step": 8816 }, { "epoch": 10.791921664626683, "grad_norm": 1.6019213864908666, "learning_rate": 9.097014268674648e-07, "loss": 0.3562, "step": 8817 }, { "epoch": 10.793145654834762, "grad_norm": 2.1110277568050724, "learning_rate": 9.092069823102162e-07, "loss": 1.1941, "step": 8818 }, { "epoch": 10.79436964504284, "grad_norm": 2.4206268250881147, "learning_rate": 9.087126422940015e-07, "loss": 0.6073, "step": 8819 }, { "epoch": 10.795593635250919, "grad_norm": 2.0491564837202185, "learning_rate": 9.082184068513064e-07, "loss": 0.9624, "step": 8820 }, { "epoch": 10.796817625458996, "grad_norm": 2.73779763532413, "learning_rate": 9.077242760146108e-07, "loss": 0.439, "step": 8821 }, { "epoch": 10.798041615667074, "grad_norm": 1.28901050598713, "learning_rate": 9.072302498163857e-07, "loss": 0.7722, "step": 8822 }, { "epoch": 10.799265605875153, "grad_norm": 0.9961106599172523, "learning_rate": 9.067363282890962e-07, "loss": 0.4404, "step": 8823 }, { "epoch": 10.800489596083231, "grad_norm": 1.992592677874508, "learning_rate": 9.062425114652033e-07, "loss": 0.3881, "step": 8824 }, { "epoch": 10.80171358629131, "grad_norm": 1.3886421224080288, "learning_rate": 9.057487993771571e-07, "loss": 0.6446, "step": 8825 }, { "epoch": 10.802937576499389, "grad_norm": 1.9573628949458135, "learning_rate": 9.052551920574034e-07, "loss": 0.4887, "step": 8826 }, { "epoch": 10.804161566707466, "grad_norm": 1.6141869935493223, "learning_rate": 9.047616895383801e-07, "loss": 0.4652, "step": 8827 }, { "epoch": 10.805385556915544, "grad_norm": 2.0748215029726778, "learning_rate": 9.04268291852518e-07, "loss": 0.4078, "step": 8828 }, { "epoch": 10.806609547123623, "grad_norm": 1.1916634668897612, "learning_rate": 9.037749990322416e-07, "loss": 0.3834, "step": 8829 }, { "epoch": 10.807833537331701, "grad_norm": 1.5741422237682339, "learning_rate": 9.032818111099681e-07, "loss": 0.5224, "step": 8830 }, { "epoch": 10.80905752753978, "grad_norm": 1.0967594781715873, "learning_rate": 9.027887281181075e-07, "loss": 0.3619, "step": 8831 }, { "epoch": 10.810281517747859, "grad_norm": 1.7010932689977778, "learning_rate": 9.022957500890656e-07, "loss": 0.9959, "step": 8832 }, { "epoch": 10.811505507955935, "grad_norm": 2.1447784234308123, "learning_rate": 9.018028770552368e-07, "loss": 0.8109, "step": 8833 }, { "epoch": 10.812729498164014, "grad_norm": 1.695069812187889, "learning_rate": 9.013101090490114e-07, "loss": 1.5546, "step": 8834 }, { "epoch": 10.813953488372093, "grad_norm": 1.3802544845683025, "learning_rate": 9.008174461027724e-07, "loss": 0.4101, "step": 8835 }, { "epoch": 10.815177478580171, "grad_norm": 1.3397612469354259, "learning_rate": 9.003248882488952e-07, "loss": 0.7652, "step": 8836 }, { "epoch": 10.81640146878825, "grad_norm": 2.374832251367542, "learning_rate": 8.998324355197505e-07, "loss": 0.4148, "step": 8837 }, { "epoch": 10.817625458996329, "grad_norm": 1.9212712624656891, "learning_rate": 8.993400879476994e-07, "loss": 0.7362, "step": 8838 }, { "epoch": 10.818849449204407, "grad_norm": 1.8151163957596244, "learning_rate": 8.988478455650981e-07, "loss": 0.5339, "step": 8839 }, { "epoch": 10.820073439412484, "grad_norm": 1.7236462859569641, "learning_rate": 8.983557084042923e-07, "loss": 0.6586, "step": 8840 }, { "epoch": 10.821297429620563, "grad_norm": 1.8186051428188494, "learning_rate": 8.978636764976264e-07, "loss": 0.6106, "step": 8841 }, { "epoch": 10.822521419828641, "grad_norm": 2.6130220010581917, "learning_rate": 8.973717498774337e-07, "loss": 0.4753, "step": 8842 }, { "epoch": 10.82374541003672, "grad_norm": 1.1784443711672994, "learning_rate": 8.968799285760419e-07, "loss": 0.4435, "step": 8843 }, { "epoch": 10.824969400244798, "grad_norm": 1.480096826780721, "learning_rate": 8.963882126257717e-07, "loss": 0.6461, "step": 8844 }, { "epoch": 10.826193390452877, "grad_norm": 1.3355849944382563, "learning_rate": 8.958966020589369e-07, "loss": 0.5526, "step": 8845 }, { "epoch": 10.827417380660954, "grad_norm": 2.9573673857930625, "learning_rate": 8.95405096907844e-07, "loss": 0.3402, "step": 8846 }, { "epoch": 10.828641370869033, "grad_norm": 1.2175949492004883, "learning_rate": 8.949136972047931e-07, "loss": 0.4725, "step": 8847 }, { "epoch": 10.829865361077111, "grad_norm": 1.9324568925558754, "learning_rate": 8.944224029820775e-07, "loss": 0.5312, "step": 8848 }, { "epoch": 10.83108935128519, "grad_norm": 1.5117286796643794, "learning_rate": 8.939312142719819e-07, "loss": 1.4009, "step": 8849 }, { "epoch": 10.832313341493268, "grad_norm": 1.724660881008733, "learning_rate": 8.934401311067884e-07, "loss": 1.3589, "step": 8850 }, { "epoch": 10.833537331701347, "grad_norm": 2.855986319733748, "learning_rate": 8.929491535187659e-07, "loss": 0.3571, "step": 8851 }, { "epoch": 10.834761321909426, "grad_norm": 1.5480842598278306, "learning_rate": 8.924582815401814e-07, "loss": 0.9117, "step": 8852 }, { "epoch": 10.835985312117502, "grad_norm": 1.9782238554740605, "learning_rate": 8.919675152032917e-07, "loss": 0.3648, "step": 8853 }, { "epoch": 10.837209302325581, "grad_norm": 1.9931295510586355, "learning_rate": 8.914768545403501e-07, "loss": 0.906, "step": 8854 }, { "epoch": 10.83843329253366, "grad_norm": 1.5563984858716138, "learning_rate": 8.909862995836002e-07, "loss": 0.5227, "step": 8855 }, { "epoch": 10.839657282741738, "grad_norm": 1.4763442324442477, "learning_rate": 8.904958503652794e-07, "loss": 1.1161, "step": 8856 }, { "epoch": 10.840881272949817, "grad_norm": 2.5731467557224432, "learning_rate": 8.90005506917618e-07, "loss": 0.3308, "step": 8857 }, { "epoch": 10.842105263157894, "grad_norm": 2.1096327329981457, "learning_rate": 8.895152692728398e-07, "loss": 0.4174, "step": 8858 }, { "epoch": 10.843329253365972, "grad_norm": 1.6968615663372175, "learning_rate": 8.890251374631614e-07, "loss": 0.4296, "step": 8859 }, { "epoch": 10.844553243574051, "grad_norm": 1.6031819088825963, "learning_rate": 8.885351115207921e-07, "loss": 1.0664, "step": 8860 }, { "epoch": 10.84577723378213, "grad_norm": 1.9787644028906364, "learning_rate": 8.880451914779351e-07, "loss": 1.0485, "step": 8861 }, { "epoch": 10.847001223990208, "grad_norm": 1.1386704710765776, "learning_rate": 8.875553773667861e-07, "loss": 0.6304, "step": 8862 }, { "epoch": 10.848225214198287, "grad_norm": 2.4623961254858315, "learning_rate": 8.870656692195334e-07, "loss": 0.4867, "step": 8863 }, { "epoch": 10.849449204406366, "grad_norm": 2.033289342438134, "learning_rate": 8.865760670683591e-07, "loss": 0.502, "step": 8864 }, { "epoch": 10.850673194614442, "grad_norm": 1.503310129177509, "learning_rate": 8.86086570945438e-07, "loss": 0.3597, "step": 8865 }, { "epoch": 10.851897184822521, "grad_norm": 1.804326272697163, "learning_rate": 8.855971808829373e-07, "loss": 0.9527, "step": 8866 }, { "epoch": 10.8531211750306, "grad_norm": 1.8934647556548647, "learning_rate": 8.851078969130194e-07, "loss": 0.4726, "step": 8867 }, { "epoch": 10.854345165238678, "grad_norm": 1.2660418171643988, "learning_rate": 8.846187190678382e-07, "loss": 0.5716, "step": 8868 }, { "epoch": 10.855569155446757, "grad_norm": 2.128250943968023, "learning_rate": 8.841296473795391e-07, "loss": 1.204, "step": 8869 }, { "epoch": 10.856793145654835, "grad_norm": 2.6262913407430775, "learning_rate": 8.836406818802623e-07, "loss": 0.5077, "step": 8870 }, { "epoch": 10.858017135862912, "grad_norm": 1.3742961016978275, "learning_rate": 8.831518226021419e-07, "loss": 0.7373, "step": 8871 }, { "epoch": 10.859241126070991, "grad_norm": 3.0228403149208636, "learning_rate": 8.826630695773037e-07, "loss": 0.5288, "step": 8872 }, { "epoch": 10.86046511627907, "grad_norm": 2.425391869869488, "learning_rate": 8.821744228378662e-07, "loss": 0.5433, "step": 8873 }, { "epoch": 10.861689106487148, "grad_norm": 1.1007681533374445, "learning_rate": 8.816858824159424e-07, "loss": 0.5467, "step": 8874 }, { "epoch": 10.862913096695227, "grad_norm": 2.30995393899878, "learning_rate": 8.811974483436353e-07, "loss": 0.5483, "step": 8875 }, { "epoch": 10.864137086903305, "grad_norm": 2.4173971476024754, "learning_rate": 8.807091206530452e-07, "loss": 0.4602, "step": 8876 }, { "epoch": 10.865361077111384, "grad_norm": 1.2289146632248107, "learning_rate": 8.802208993762624e-07, "loss": 0.5375, "step": 8877 }, { "epoch": 10.86658506731946, "grad_norm": 2.61760851738504, "learning_rate": 8.797327845453707e-07, "loss": 0.9302, "step": 8878 }, { "epoch": 10.86780905752754, "grad_norm": 2.2179158246506674, "learning_rate": 8.792447761924475e-07, "loss": 0.9524, "step": 8879 }, { "epoch": 10.869033047735618, "grad_norm": 2.1980296200282776, "learning_rate": 8.787568743495628e-07, "loss": 0.3754, "step": 8880 }, { "epoch": 10.870257037943697, "grad_norm": 1.6277589894324616, "learning_rate": 8.7826907904878e-07, "loss": 0.5937, "step": 8881 }, { "epoch": 10.871481028151775, "grad_norm": 2.2051376806958802, "learning_rate": 8.777813903221549e-07, "loss": 0.5712, "step": 8882 }, { "epoch": 10.872705018359854, "grad_norm": 1.476588258781014, "learning_rate": 8.772938082017358e-07, "loss": 0.3546, "step": 8883 }, { "epoch": 10.87392900856793, "grad_norm": 3.4460999950284417, "learning_rate": 8.768063327195664e-07, "loss": 0.3369, "step": 8884 }, { "epoch": 10.87515299877601, "grad_norm": 1.4378715659017955, "learning_rate": 8.763189639076811e-07, "loss": 1.311, "step": 8885 }, { "epoch": 10.876376988984088, "grad_norm": 1.3537408903904238, "learning_rate": 8.758317017981086e-07, "loss": 0.6273, "step": 8886 }, { "epoch": 10.877600979192167, "grad_norm": 1.8066542490506188, "learning_rate": 8.753445464228685e-07, "loss": 0.4667, "step": 8887 }, { "epoch": 10.878824969400245, "grad_norm": 1.5869184308356272, "learning_rate": 8.748574978139748e-07, "loss": 1.571, "step": 8888 }, { "epoch": 10.880048959608324, "grad_norm": 2.1683892162459593, "learning_rate": 8.743705560034363e-07, "loss": 0.5993, "step": 8889 }, { "epoch": 10.8812729498164, "grad_norm": 2.0160370797276594, "learning_rate": 8.73883721023252e-07, "loss": 0.6309, "step": 8890 }, { "epoch": 10.88249694002448, "grad_norm": 2.0128745696234103, "learning_rate": 8.733969929054151e-07, "loss": 0.9254, "step": 8891 }, { "epoch": 10.883720930232558, "grad_norm": 1.4845349374718784, "learning_rate": 8.729103716819113e-07, "loss": 1.3185, "step": 8892 }, { "epoch": 10.884944920440637, "grad_norm": 1.2901472525128017, "learning_rate": 8.724238573847197e-07, "loss": 0.4491, "step": 8893 }, { "epoch": 10.886168910648715, "grad_norm": 1.5625853692253024, "learning_rate": 8.71937450045812e-07, "loss": 0.4264, "step": 8894 }, { "epoch": 10.887392900856794, "grad_norm": 1.6127149977192443, "learning_rate": 8.714511496971536e-07, "loss": 1.2939, "step": 8895 }, { "epoch": 10.88861689106487, "grad_norm": 1.1796151376489348, "learning_rate": 8.709649563707009e-07, "loss": 0.4761, "step": 8896 }, { "epoch": 10.88984088127295, "grad_norm": 1.3788548707825221, "learning_rate": 8.704788700984079e-07, "loss": 0.6402, "step": 8897 }, { "epoch": 10.891064871481028, "grad_norm": 1.4475268137371076, "learning_rate": 8.699928909122155e-07, "loss": 0.9475, "step": 8898 }, { "epoch": 10.892288861689106, "grad_norm": 2.799403731285428, "learning_rate": 8.695070188440613e-07, "loss": 0.5801, "step": 8899 }, { "epoch": 10.893512851897185, "grad_norm": 1.3275860635447232, "learning_rate": 8.69021253925875e-07, "loss": 0.5086, "step": 8900 }, { "epoch": 10.894736842105264, "grad_norm": 1.1760502646054831, "learning_rate": 8.685355961895783e-07, "loss": 0.5095, "step": 8901 }, { "epoch": 10.895960832313342, "grad_norm": 1.073271358261181, "learning_rate": 8.68050045667089e-07, "loss": 0.512, "step": 8902 }, { "epoch": 10.89718482252142, "grad_norm": 1.7787931892178646, "learning_rate": 8.67564602390315e-07, "loss": 0.6877, "step": 8903 }, { "epoch": 10.898408812729498, "grad_norm": 1.6730917363727054, "learning_rate": 8.67079266391157e-07, "loss": 0.4579, "step": 8904 }, { "epoch": 10.899632802937576, "grad_norm": 1.894003840891833, "learning_rate": 8.665940377015086e-07, "loss": 0.4594, "step": 8905 }, { "epoch": 10.900856793145655, "grad_norm": 1.4035659571897081, "learning_rate": 8.661089163532596e-07, "loss": 0.3591, "step": 8906 }, { "epoch": 10.902080783353734, "grad_norm": 2.3197191936502284, "learning_rate": 8.656239023782892e-07, "loss": 0.4804, "step": 8907 }, { "epoch": 10.903304773561812, "grad_norm": 2.1401141376686272, "learning_rate": 8.651389958084708e-07, "loss": 1.1199, "step": 8908 }, { "epoch": 10.904528763769889, "grad_norm": 1.9578680327250007, "learning_rate": 8.646541966756705e-07, "loss": 0.5221, "step": 8909 }, { "epoch": 10.905752753977968, "grad_norm": 1.477165036695425, "learning_rate": 8.64169505011748e-07, "loss": 0.5916, "step": 8910 }, { "epoch": 10.906976744186046, "grad_norm": 1.2970344280290176, "learning_rate": 8.636849208485548e-07, "loss": 0.6566, "step": 8911 }, { "epoch": 10.908200734394125, "grad_norm": 1.7399100675252694, "learning_rate": 8.632004442179365e-07, "loss": 1.8016, "step": 8912 }, { "epoch": 10.909424724602204, "grad_norm": 1.5198510557130038, "learning_rate": 8.627160751517308e-07, "loss": 1.2277, "step": 8913 }, { "epoch": 10.910648714810282, "grad_norm": 2.220286328304501, "learning_rate": 8.622318136817678e-07, "loss": 0.9632, "step": 8914 }, { "epoch": 10.911872705018359, "grad_norm": 1.147575674201209, "learning_rate": 8.617476598398742e-07, "loss": 0.7447, "step": 8915 }, { "epoch": 10.913096695226438, "grad_norm": 2.725471610613177, "learning_rate": 8.61263613657864e-07, "loss": 0.8274, "step": 8916 }, { "epoch": 10.914320685434516, "grad_norm": 0.8657904250779648, "learning_rate": 8.607796751675479e-07, "loss": 0.3509, "step": 8917 }, { "epoch": 10.915544675642595, "grad_norm": 0.7891734698344517, "learning_rate": 8.602958444007276e-07, "loss": 0.2181, "step": 8918 }, { "epoch": 10.916768665850674, "grad_norm": 2.823844167352141, "learning_rate": 8.598121213892002e-07, "loss": 0.5811, "step": 8919 }, { "epoch": 10.917992656058752, "grad_norm": 1.8467338048152613, "learning_rate": 8.593285061647539e-07, "loss": 0.4994, "step": 8920 }, { "epoch": 10.919216646266829, "grad_norm": 1.501730814717575, "learning_rate": 8.588449987591704e-07, "loss": 0.4768, "step": 8921 }, { "epoch": 10.920440636474908, "grad_norm": 1.6695639631283132, "learning_rate": 8.583615992042227e-07, "loss": 0.5084, "step": 8922 }, { "epoch": 10.921664626682986, "grad_norm": 1.111795791156414, "learning_rate": 8.578783075316777e-07, "loss": 0.6025, "step": 8923 }, { "epoch": 10.922888616891065, "grad_norm": 1.3371035246155394, "learning_rate": 8.573951237732972e-07, "loss": 1.0881, "step": 8924 }, { "epoch": 10.924112607099143, "grad_norm": 1.9300967908001243, "learning_rate": 8.569120479608339e-07, "loss": 0.6019, "step": 8925 }, { "epoch": 10.925336597307222, "grad_norm": 2.0573930980525854, "learning_rate": 8.564290801260333e-07, "loss": 1.1509, "step": 8926 }, { "epoch": 10.9265605875153, "grad_norm": 2.2991558761633013, "learning_rate": 8.559462203006344e-07, "loss": 1.4073, "step": 8927 }, { "epoch": 10.927784577723378, "grad_norm": 2.013762494315006, "learning_rate": 8.55463468516369e-07, "loss": 0.5142, "step": 8928 }, { "epoch": 10.929008567931456, "grad_norm": 2.3225612139624987, "learning_rate": 8.549808248049618e-07, "loss": 0.2862, "step": 8929 }, { "epoch": 10.930232558139535, "grad_norm": 1.5665924910995463, "learning_rate": 8.544982891981304e-07, "loss": 0.5678, "step": 8930 }, { "epoch": 10.931456548347613, "grad_norm": 1.3780835733691228, "learning_rate": 8.54015861727584e-07, "loss": 0.309, "step": 8931 }, { "epoch": 10.932680538555692, "grad_norm": 2.9374634730738003, "learning_rate": 8.535335424250282e-07, "loss": 0.2104, "step": 8932 }, { "epoch": 10.93390452876377, "grad_norm": 1.846529014987819, "learning_rate": 8.530513313221589e-07, "loss": 0.9246, "step": 8933 }, { "epoch": 10.935128518971847, "grad_norm": 1.1023103887743304, "learning_rate": 8.525692284506634e-07, "loss": 0.3871, "step": 8934 }, { "epoch": 10.936352509179926, "grad_norm": 1.054171522443247, "learning_rate": 8.520872338422251e-07, "loss": 0.3978, "step": 8935 }, { "epoch": 10.937576499388005, "grad_norm": 2.329122658947561, "learning_rate": 8.516053475285174e-07, "loss": 0.4964, "step": 8936 }, { "epoch": 10.938800489596083, "grad_norm": 1.2607869420555138, "learning_rate": 8.511235695412104e-07, "loss": 0.7486, "step": 8937 }, { "epoch": 10.940024479804162, "grad_norm": 1.6993531676981326, "learning_rate": 8.506418999119636e-07, "loss": 0.5714, "step": 8938 }, { "epoch": 10.94124847001224, "grad_norm": 1.9166522475525072, "learning_rate": 8.501603386724313e-07, "loss": 0.3608, "step": 8939 }, { "epoch": 10.94247246022032, "grad_norm": 1.0470781885175395, "learning_rate": 8.496788858542577e-07, "loss": 0.5906, "step": 8940 }, { "epoch": 10.943696450428396, "grad_norm": 2.0769101671348174, "learning_rate": 8.491975414890844e-07, "loss": 1.3167, "step": 8941 }, { "epoch": 10.944920440636475, "grad_norm": 2.042378957842182, "learning_rate": 8.487163056085427e-07, "loss": 0.5075, "step": 8942 }, { "epoch": 10.946144430844553, "grad_norm": 1.8228634964297847, "learning_rate": 8.482351782442577e-07, "loss": 0.9836, "step": 8943 }, { "epoch": 10.947368421052632, "grad_norm": 2.7149327295018866, "learning_rate": 8.477541594278474e-07, "loss": 0.9528, "step": 8944 }, { "epoch": 10.94859241126071, "grad_norm": 1.1930688404965586, "learning_rate": 8.472732491909225e-07, "loss": 0.8828, "step": 8945 }, { "epoch": 10.949816401468787, "grad_norm": 1.9291657922578964, "learning_rate": 8.467924475650865e-07, "loss": 0.5889, "step": 8946 }, { "epoch": 10.951040391676866, "grad_norm": 1.957743495582537, "learning_rate": 8.463117545819363e-07, "loss": 0.7267, "step": 8947 }, { "epoch": 10.952264381884945, "grad_norm": 1.751108947639815, "learning_rate": 8.458311702730607e-07, "loss": 0.5002, "step": 8948 }, { "epoch": 10.953488372093023, "grad_norm": 1.8483341817237053, "learning_rate": 8.453506946700419e-07, "loss": 0.464, "step": 8949 }, { "epoch": 10.954712362301102, "grad_norm": 2.0358231351121745, "learning_rate": 8.448703278044565e-07, "loss": 1.1752, "step": 8950 }, { "epoch": 10.95593635250918, "grad_norm": 2.2377133061067744, "learning_rate": 8.443900697078705e-07, "loss": 0.3958, "step": 8951 }, { "epoch": 10.957160342717259, "grad_norm": 1.9164186425605303, "learning_rate": 8.439099204118456e-07, "loss": 0.5828, "step": 8952 }, { "epoch": 10.958384332925336, "grad_norm": 1.5316622325990201, "learning_rate": 8.434298799479346e-07, "loss": 0.5665, "step": 8953 }, { "epoch": 10.959608323133414, "grad_norm": 1.661033787555597, "learning_rate": 8.429499483476853e-07, "loss": 1.1663, "step": 8954 }, { "epoch": 10.960832313341493, "grad_norm": 1.665625099147359, "learning_rate": 8.424701256426362e-07, "loss": 0.6374, "step": 8955 }, { "epoch": 10.962056303549572, "grad_norm": 1.975149715147597, "learning_rate": 8.419904118643199e-07, "loss": 0.683, "step": 8956 }, { "epoch": 10.96328029375765, "grad_norm": 1.6937528719568988, "learning_rate": 8.415108070442612e-07, "loss": 1.2298, "step": 8957 }, { "epoch": 10.964504283965729, "grad_norm": 1.754314461498832, "learning_rate": 8.410313112139778e-07, "loss": 0.3121, "step": 8958 }, { "epoch": 10.965728274173806, "grad_norm": 1.4272650585910678, "learning_rate": 8.405519244049806e-07, "loss": 1.2816, "step": 8959 }, { "epoch": 10.966952264381884, "grad_norm": 1.9662501427400545, "learning_rate": 8.400726466487732e-07, "loss": 0.5383, "step": 8960 }, { "epoch": 10.968176254589963, "grad_norm": 2.0489953724926746, "learning_rate": 8.395934779768517e-07, "loss": 0.5775, "step": 8961 }, { "epoch": 10.969400244798042, "grad_norm": 1.4849446446480306, "learning_rate": 8.391144184207053e-07, "loss": 0.5883, "step": 8962 }, { "epoch": 10.97062423500612, "grad_norm": 2.3237842079024786, "learning_rate": 8.386354680118161e-07, "loss": 0.9702, "step": 8963 }, { "epoch": 10.971848225214199, "grad_norm": 1.8979662160273187, "learning_rate": 8.381566267816593e-07, "loss": 0.6506, "step": 8964 }, { "epoch": 10.973072215422278, "grad_norm": 1.0105928205098453, "learning_rate": 8.37677894761702e-07, "loss": 0.4415, "step": 8965 }, { "epoch": 10.974296205630354, "grad_norm": 1.8026098561749249, "learning_rate": 8.371992719834041e-07, "loss": 1.4205, "step": 8966 }, { "epoch": 10.975520195838433, "grad_norm": 2.0120388489481553, "learning_rate": 8.367207584782205e-07, "loss": 0.4893, "step": 8967 }, { "epoch": 10.976744186046512, "grad_norm": 1.6500466456438387, "learning_rate": 8.362423542775977e-07, "loss": 0.6702, "step": 8968 }, { "epoch": 10.97796817625459, "grad_norm": 1.4490398125283181, "learning_rate": 8.357640594129726e-07, "loss": 0.6181, "step": 8969 }, { "epoch": 10.979192166462669, "grad_norm": 2.8983370979898364, "learning_rate": 8.352858739157774e-07, "loss": 0.4072, "step": 8970 }, { "epoch": 10.980416156670747, "grad_norm": 1.6274523799665237, "learning_rate": 8.348077978174377e-07, "loss": 0.4543, "step": 8971 }, { "epoch": 10.981640146878824, "grad_norm": 1.87577985792482, "learning_rate": 8.343298311493706e-07, "loss": 1.134, "step": 8972 }, { "epoch": 10.982864137086903, "grad_norm": 2.041703163954681, "learning_rate": 8.338519739429859e-07, "loss": 0.5284, "step": 8973 }, { "epoch": 10.984088127294982, "grad_norm": 1.7900977711684403, "learning_rate": 8.333742262296868e-07, "loss": 0.5777, "step": 8974 }, { "epoch": 10.98531211750306, "grad_norm": 2.159972071983754, "learning_rate": 8.328965880408694e-07, "loss": 0.97, "step": 8975 }, { "epoch": 10.986536107711139, "grad_norm": 2.310039857771576, "learning_rate": 8.324190594079218e-07, "loss": 0.3745, "step": 8976 }, { "epoch": 10.987760097919217, "grad_norm": 1.7053630985828936, "learning_rate": 8.319416403622258e-07, "loss": 0.9098, "step": 8977 }, { "epoch": 10.988984088127294, "grad_norm": 1.5961032425641881, "learning_rate": 8.314643309351553e-07, "loss": 0.9821, "step": 8978 }, { "epoch": 10.990208078335373, "grad_norm": 1.9702172662424797, "learning_rate": 8.309871311580778e-07, "loss": 0.4393, "step": 8979 }, { "epoch": 10.991432068543451, "grad_norm": 2.790925789912982, "learning_rate": 8.305100410623526e-07, "loss": 0.5051, "step": 8980 }, { "epoch": 10.99265605875153, "grad_norm": 2.9752884029539763, "learning_rate": 8.300330606793325e-07, "loss": 0.4907, "step": 8981 }, { "epoch": 10.993880048959609, "grad_norm": 2.325050124519071, "learning_rate": 8.295561900403629e-07, "loss": 1.3339, "step": 8982 }, { "epoch": 10.995104039167687, "grad_norm": 1.5812850622737917, "learning_rate": 8.290794291767809e-07, "loss": 0.69, "step": 8983 }, { "epoch": 10.996328029375764, "grad_norm": 1.9202572705944772, "learning_rate": 8.286027781199197e-07, "loss": 0.9627, "step": 8984 }, { "epoch": 10.997552019583843, "grad_norm": 3.5864533521274944, "learning_rate": 8.281262369011014e-07, "loss": 0.7625, "step": 8985 }, { "epoch": 10.998776009791921, "grad_norm": 1.3888240083779075, "learning_rate": 8.276498055516441e-07, "loss": 0.5563, "step": 8986 }, { "epoch": 11.0, "grad_norm": 0.9693536178833745, "learning_rate": 8.271734841028553e-07, "loss": 0.4933, "step": 8987 }, { "epoch": 11.001223990208079, "grad_norm": 1.1289925391657307, "learning_rate": 8.266972725860367e-07, "loss": 0.5689, "step": 8988 }, { "epoch": 11.002447980416157, "grad_norm": 1.8221020220221489, "learning_rate": 8.262211710324853e-07, "loss": 1.4173, "step": 8989 }, { "epoch": 11.003671970624236, "grad_norm": 1.1791998370160566, "learning_rate": 8.257451794734872e-07, "loss": 0.5592, "step": 8990 }, { "epoch": 11.004895960832313, "grad_norm": 1.9316723770530155, "learning_rate": 8.252692979403237e-07, "loss": 0.3354, "step": 8991 }, { "epoch": 11.006119951040391, "grad_norm": 1.3749000030580507, "learning_rate": 8.247935264642673e-07, "loss": 0.6034, "step": 8992 }, { "epoch": 11.00734394124847, "grad_norm": 2.0877353307772943, "learning_rate": 8.243178650765843e-07, "loss": 0.485, "step": 8993 }, { "epoch": 11.008567931456549, "grad_norm": 2.0878536127052523, "learning_rate": 8.238423138085333e-07, "loss": 0.4043, "step": 8994 }, { "epoch": 11.009791921664627, "grad_norm": 2.058080277792716, "learning_rate": 8.233668726913657e-07, "loss": 1.313, "step": 8995 }, { "epoch": 11.011015911872706, "grad_norm": 1.216947685758841, "learning_rate": 8.228915417563249e-07, "loss": 0.5002, "step": 8996 }, { "epoch": 11.012239902080783, "grad_norm": 1.9404533427866895, "learning_rate": 8.224163210346506e-07, "loss": 0.479, "step": 8997 }, { "epoch": 11.013463892288861, "grad_norm": 2.2693046245292816, "learning_rate": 8.219412105575697e-07, "loss": 0.5201, "step": 8998 }, { "epoch": 11.01468788249694, "grad_norm": 2.463331257307761, "learning_rate": 8.214662103563059e-07, "loss": 0.5386, "step": 8999 }, { "epoch": 11.015911872705018, "grad_norm": 2.959995702971444, "learning_rate": 8.209913204620743e-07, "loss": 0.398, "step": 9000 }, { "epoch": 11.017135862913097, "grad_norm": 0.8337580078870223, "learning_rate": 8.205165409060819e-07, "loss": 0.4318, "step": 9001 }, { "epoch": 11.018359853121176, "grad_norm": 3.0379771090488275, "learning_rate": 8.200418717195316e-07, "loss": 0.3805, "step": 9002 }, { "epoch": 11.019583843329253, "grad_norm": 1.6598730419960264, "learning_rate": 8.195673129336155e-07, "loss": 0.6753, "step": 9003 }, { "epoch": 11.020807833537331, "grad_norm": 3.38254614824278, "learning_rate": 8.19092864579521e-07, "loss": 0.6778, "step": 9004 }, { "epoch": 11.02203182374541, "grad_norm": 1.2449635255493556, "learning_rate": 8.186185266884245e-07, "loss": 0.7469, "step": 9005 }, { "epoch": 11.023255813953488, "grad_norm": 1.6497189206451062, "learning_rate": 8.181442992915001e-07, "loss": 0.4858, "step": 9006 }, { "epoch": 11.024479804161567, "grad_norm": 1.3058462142843823, "learning_rate": 8.176701824199118e-07, "loss": 0.7144, "step": 9007 }, { "epoch": 11.025703794369646, "grad_norm": 3.2527790581892657, "learning_rate": 8.171961761048167e-07, "loss": 0.5882, "step": 9008 }, { "epoch": 11.026927784577722, "grad_norm": 1.5922785584790589, "learning_rate": 8.167222803773645e-07, "loss": 0.8138, "step": 9009 }, { "epoch": 11.028151774785801, "grad_norm": 1.1924356260514575, "learning_rate": 8.162484952686981e-07, "loss": 0.4981, "step": 9010 }, { "epoch": 11.02937576499388, "grad_norm": 1.9626999528009017, "learning_rate": 8.157748208099525e-07, "loss": 0.7235, "step": 9011 }, { "epoch": 11.030599755201958, "grad_norm": 1.5058697414033677, "learning_rate": 8.153012570322566e-07, "loss": 0.4737, "step": 9012 }, { "epoch": 11.031823745410037, "grad_norm": 1.0028706580506175, "learning_rate": 8.148278039667306e-07, "loss": 0.438, "step": 9013 }, { "epoch": 11.033047735618116, "grad_norm": 1.9264411742542642, "learning_rate": 8.143544616444876e-07, "loss": 0.3575, "step": 9014 }, { "epoch": 11.034271725826194, "grad_norm": 1.6045848568118388, "learning_rate": 8.138812300966361e-07, "loss": 0.3528, "step": 9015 }, { "epoch": 11.035495716034271, "grad_norm": 2.0386715085310905, "learning_rate": 8.134081093542728e-07, "loss": 0.4772, "step": 9016 }, { "epoch": 11.03671970624235, "grad_norm": 2.4086092216733186, "learning_rate": 8.129350994484905e-07, "loss": 0.4433, "step": 9017 }, { "epoch": 11.037943696450428, "grad_norm": 1.7455660542088032, "learning_rate": 8.124622004103725e-07, "loss": 0.8219, "step": 9018 }, { "epoch": 11.039167686658507, "grad_norm": 1.4532366693555925, "learning_rate": 8.119894122709975e-07, "loss": 0.7217, "step": 9019 }, { "epoch": 11.040391676866586, "grad_norm": 2.117616021720788, "learning_rate": 8.11516735061435e-07, "loss": 0.6643, "step": 9020 }, { "epoch": 11.041615667074664, "grad_norm": 1.5605732319617553, "learning_rate": 8.110441688127485e-07, "loss": 0.5589, "step": 9021 }, { "epoch": 11.042839657282741, "grad_norm": 1.854794137892344, "learning_rate": 8.105717135559901e-07, "loss": 0.3489, "step": 9022 }, { "epoch": 11.04406364749082, "grad_norm": 1.1604213782803097, "learning_rate": 8.100993693222111e-07, "loss": 0.5567, "step": 9023 }, { "epoch": 11.045287637698898, "grad_norm": 2.428132684325884, "learning_rate": 8.09627136142451e-07, "loss": 0.8956, "step": 9024 }, { "epoch": 11.046511627906977, "grad_norm": 3.532493026301642, "learning_rate": 8.09155014047743e-07, "loss": 0.4088, "step": 9025 }, { "epoch": 11.047735618115055, "grad_norm": 2.6419304656065727, "learning_rate": 8.086830030691137e-07, "loss": 0.945, "step": 9026 }, { "epoch": 11.048959608323134, "grad_norm": 2.0147598842992127, "learning_rate": 8.082111032375817e-07, "loss": 0.4888, "step": 9027 }, { "epoch": 11.050183598531211, "grad_norm": 1.7221598166560412, "learning_rate": 8.077393145841583e-07, "loss": 0.8554, "step": 9028 }, { "epoch": 11.05140758873929, "grad_norm": 2.847893662916916, "learning_rate": 8.07267637139848e-07, "loss": 0.352, "step": 9029 }, { "epoch": 11.052631578947368, "grad_norm": 1.7539307739055112, "learning_rate": 8.067960709356479e-07, "loss": 0.7451, "step": 9030 }, { "epoch": 11.053855569155447, "grad_norm": 1.8805115400790082, "learning_rate": 8.063246160025462e-07, "loss": 0.483, "step": 9031 }, { "epoch": 11.055079559363525, "grad_norm": 1.4678512270655446, "learning_rate": 8.058532723715271e-07, "loss": 0.5549, "step": 9032 }, { "epoch": 11.056303549571604, "grad_norm": 1.5594408200453251, "learning_rate": 8.053820400735654e-07, "loss": 1.1142, "step": 9033 }, { "epoch": 11.057527539779683, "grad_norm": 1.5758230819702244, "learning_rate": 8.049109191396274e-07, "loss": 0.4715, "step": 9034 }, { "epoch": 11.05875152998776, "grad_norm": 2.4567145809287037, "learning_rate": 8.044399096006733e-07, "loss": 0.4498, "step": 9035 }, { "epoch": 11.059975520195838, "grad_norm": 1.4285953589742924, "learning_rate": 8.039690114876578e-07, "loss": 1.1269, "step": 9036 }, { "epoch": 11.061199510403917, "grad_norm": 1.2639135334186864, "learning_rate": 8.034982248315254e-07, "loss": 0.4477, "step": 9037 }, { "epoch": 11.062423500611995, "grad_norm": 2.0556390954012675, "learning_rate": 8.030275496632148e-07, "loss": 0.4154, "step": 9038 }, { "epoch": 11.063647490820074, "grad_norm": 1.3588686954988605, "learning_rate": 8.025569860136581e-07, "loss": 1.0416, "step": 9039 }, { "epoch": 11.064871481028153, "grad_norm": 1.5282918284009752, "learning_rate": 8.020865339137759e-07, "loss": 0.9821, "step": 9040 }, { "epoch": 11.06609547123623, "grad_norm": 1.534433316137476, "learning_rate": 8.016161933944874e-07, "loss": 0.7076, "step": 9041 }, { "epoch": 11.067319461444308, "grad_norm": 2.2054124690883317, "learning_rate": 8.011459644867009e-07, "loss": 0.5352, "step": 9042 }, { "epoch": 11.068543451652387, "grad_norm": 1.4714474783642406, "learning_rate": 8.00675847221318e-07, "loss": 0.5572, "step": 9043 }, { "epoch": 11.069767441860465, "grad_norm": 1.3320788204444765, "learning_rate": 8.00205841629233e-07, "loss": 0.574, "step": 9044 }, { "epoch": 11.070991432068544, "grad_norm": 1.1670758620919202, "learning_rate": 7.99735947741333e-07, "loss": 0.4736, "step": 9045 }, { "epoch": 11.072215422276622, "grad_norm": 1.8480131502968522, "learning_rate": 7.992661655884975e-07, "loss": 0.5313, "step": 9046 }, { "epoch": 11.0734394124847, "grad_norm": 2.8361725815278214, "learning_rate": 7.987964952015994e-07, "loss": 0.3577, "step": 9047 }, { "epoch": 11.074663402692778, "grad_norm": 2.027972525331852, "learning_rate": 7.983269366115023e-07, "loss": 0.7601, "step": 9048 }, { "epoch": 11.075887392900857, "grad_norm": 2.026673609144047, "learning_rate": 7.978574898490657e-07, "loss": 0.7706, "step": 9049 }, { "epoch": 11.077111383108935, "grad_norm": 1.2065909096414953, "learning_rate": 7.973881549451398e-07, "loss": 0.4704, "step": 9050 }, { "epoch": 11.078335373317014, "grad_norm": 3.839933680884358, "learning_rate": 7.969189319305662e-07, "loss": 0.3739, "step": 9051 }, { "epoch": 11.079559363525092, "grad_norm": 1.7507623043046063, "learning_rate": 7.96449820836181e-07, "loss": 0.5359, "step": 9052 }, { "epoch": 11.080783353733171, "grad_norm": 2.072053109697878, "learning_rate": 7.959808216928116e-07, "loss": 0.4024, "step": 9053 }, { "epoch": 11.082007343941248, "grad_norm": 2.1449997890513215, "learning_rate": 7.955119345312809e-07, "loss": 0.4154, "step": 9054 }, { "epoch": 11.083231334149326, "grad_norm": 2.4011598293805614, "learning_rate": 7.950431593824015e-07, "loss": 0.5864, "step": 9055 }, { "epoch": 11.084455324357405, "grad_norm": 2.1241554889745897, "learning_rate": 7.945744962769791e-07, "loss": 0.7425, "step": 9056 }, { "epoch": 11.085679314565484, "grad_norm": 1.6662494654909628, "learning_rate": 7.941059452458133e-07, "loss": 0.5051, "step": 9057 }, { "epoch": 11.086903304773562, "grad_norm": 1.0550890742901429, "learning_rate": 7.936375063196949e-07, "loss": 0.3959, "step": 9058 }, { "epoch": 11.088127294981641, "grad_norm": 1.254876338661226, "learning_rate": 7.931691795294083e-07, "loss": 0.578, "step": 9059 }, { "epoch": 11.089351285189718, "grad_norm": 2.8877306921798107, "learning_rate": 7.9270096490573e-07, "loss": 0.3495, "step": 9060 }, { "epoch": 11.090575275397796, "grad_norm": 1.0833302202086688, "learning_rate": 7.922328624794285e-07, "loss": 0.5891, "step": 9061 }, { "epoch": 11.091799265605875, "grad_norm": 1.4817649774576878, "learning_rate": 7.917648722812684e-07, "loss": 0.5549, "step": 9062 }, { "epoch": 11.093023255813954, "grad_norm": 2.953505720386357, "learning_rate": 7.912969943420018e-07, "loss": 0.4077, "step": 9063 }, { "epoch": 11.094247246022032, "grad_norm": 1.89259016000055, "learning_rate": 7.908292286923766e-07, "loss": 0.3858, "step": 9064 }, { "epoch": 11.095471236230111, "grad_norm": 2.2833507750051956, "learning_rate": 7.903615753631327e-07, "loss": 0.4424, "step": 9065 }, { "epoch": 11.096695226438188, "grad_norm": 2.695021120179962, "learning_rate": 7.898940343850017e-07, "loss": 0.4557, "step": 9066 }, { "epoch": 11.097919216646266, "grad_norm": 2.274765322029223, "learning_rate": 7.894266057887107e-07, "loss": 0.961, "step": 9067 }, { "epoch": 11.099143206854345, "grad_norm": 1.306886182953371, "learning_rate": 7.889592896049767e-07, "loss": 0.9722, "step": 9068 }, { "epoch": 11.100367197062424, "grad_norm": 2.0656082370102236, "learning_rate": 7.884920858645088e-07, "loss": 0.4034, "step": 9069 }, { "epoch": 11.101591187270502, "grad_norm": 3.7382445197822136, "learning_rate": 7.880249945980095e-07, "loss": 0.4169, "step": 9070 }, { "epoch": 11.10281517747858, "grad_norm": 3.1049568336389965, "learning_rate": 7.875580158361765e-07, "loss": 0.3003, "step": 9071 }, { "epoch": 11.104039167686658, "grad_norm": 1.3420903798518247, "learning_rate": 7.870911496096967e-07, "loss": 0.6379, "step": 9072 }, { "epoch": 11.105263157894736, "grad_norm": 1.296158044267461, "learning_rate": 7.866243959492509e-07, "loss": 0.9826, "step": 9073 }, { "epoch": 11.106487148102815, "grad_norm": 1.2906180768021773, "learning_rate": 7.861577548855123e-07, "loss": 0.7591, "step": 9074 }, { "epoch": 11.107711138310894, "grad_norm": 1.2433176899451637, "learning_rate": 7.856912264491473e-07, "loss": 0.5279, "step": 9075 }, { "epoch": 11.108935128518972, "grad_norm": 2.918391339038749, "learning_rate": 7.852248106708141e-07, "loss": 0.4171, "step": 9076 }, { "epoch": 11.11015911872705, "grad_norm": 1.5352841303201348, "learning_rate": 7.847585075811636e-07, "loss": 0.8624, "step": 9077 }, { "epoch": 11.11138310893513, "grad_norm": 1.7867949910916066, "learning_rate": 7.8429231721084e-07, "loss": 0.5427, "step": 9078 }, { "epoch": 11.112607099143206, "grad_norm": 1.1824868865946672, "learning_rate": 7.838262395904792e-07, "loss": 0.691, "step": 9079 }, { "epoch": 11.113831089351285, "grad_norm": 1.3146322324244117, "learning_rate": 7.833602747507101e-07, "loss": 0.7286, "step": 9080 }, { "epoch": 11.115055079559363, "grad_norm": 1.6182449169451625, "learning_rate": 7.828944227221546e-07, "loss": 1.2426, "step": 9081 }, { "epoch": 11.116279069767442, "grad_norm": 1.5955642993106962, "learning_rate": 7.824286835354263e-07, "loss": 0.9277, "step": 9082 }, { "epoch": 11.11750305997552, "grad_norm": 0.7424132452994368, "learning_rate": 7.819630572211315e-07, "loss": 0.2555, "step": 9083 }, { "epoch": 11.1187270501836, "grad_norm": 1.484020607951024, "learning_rate": 7.814975438098704e-07, "loss": 0.3579, "step": 9084 }, { "epoch": 11.119951040391676, "grad_norm": 2.2102104942832668, "learning_rate": 7.810321433322348e-07, "loss": 0.4653, "step": 9085 }, { "epoch": 11.121175030599755, "grad_norm": 1.4141735938394249, "learning_rate": 7.805668558188095e-07, "loss": 0.4281, "step": 9086 }, { "epoch": 11.122399020807833, "grad_norm": 2.2722903076937175, "learning_rate": 7.801016813001696e-07, "loss": 0.5753, "step": 9087 }, { "epoch": 11.123623011015912, "grad_norm": 1.6896563783615277, "learning_rate": 7.796366198068853e-07, "loss": 0.7264, "step": 9088 }, { "epoch": 11.12484700122399, "grad_norm": 1.9011428103531902, "learning_rate": 7.791716713695196e-07, "loss": 0.5987, "step": 9089 }, { "epoch": 11.12607099143207, "grad_norm": 1.3223997013010873, "learning_rate": 7.78706836018627e-07, "loss": 0.7352, "step": 9090 }, { "epoch": 11.127294981640146, "grad_norm": 1.4186627612434166, "learning_rate": 7.782421137847545e-07, "loss": 0.4313, "step": 9091 }, { "epoch": 11.128518971848225, "grad_norm": 3.66746973237799, "learning_rate": 7.77777504698442e-07, "loss": 0.4725, "step": 9092 }, { "epoch": 11.129742962056303, "grad_norm": 2.1174106319585206, "learning_rate": 7.773130087902217e-07, "loss": 0.6332, "step": 9093 }, { "epoch": 11.130966952264382, "grad_norm": 1.2773208865439043, "learning_rate": 7.768486260906186e-07, "loss": 0.8927, "step": 9094 }, { "epoch": 11.13219094247246, "grad_norm": 2.4292649627633094, "learning_rate": 7.763843566301505e-07, "loss": 0.5545, "step": 9095 }, { "epoch": 11.13341493268054, "grad_norm": 1.3557435645291944, "learning_rate": 7.759202004393263e-07, "loss": 0.5312, "step": 9096 }, { "epoch": 11.134638922888616, "grad_norm": 3.447571551851755, "learning_rate": 7.754561575486513e-07, "loss": 0.3307, "step": 9097 }, { "epoch": 11.135862913096695, "grad_norm": 1.638558041786235, "learning_rate": 7.749922279886183e-07, "loss": 1.1662, "step": 9098 }, { "epoch": 11.137086903304773, "grad_norm": 1.499538569700982, "learning_rate": 7.745284117897156e-07, "loss": 0.5427, "step": 9099 }, { "epoch": 11.138310893512852, "grad_norm": 1.089358414460181, "learning_rate": 7.740647089824236e-07, "loss": 0.3922, "step": 9100 }, { "epoch": 11.13953488372093, "grad_norm": 1.9188916057645615, "learning_rate": 7.736011195972143e-07, "loss": 1.1016, "step": 9101 }, { "epoch": 11.140758873929009, "grad_norm": 1.5536141968734014, "learning_rate": 7.731376436645547e-07, "loss": 0.3327, "step": 9102 }, { "epoch": 11.141982864137088, "grad_norm": 1.6680839934894849, "learning_rate": 7.726742812149018e-07, "loss": 1.1716, "step": 9103 }, { "epoch": 11.143206854345165, "grad_norm": 1.9484252697281426, "learning_rate": 7.722110322787072e-07, "loss": 0.489, "step": 9104 }, { "epoch": 11.144430844553243, "grad_norm": 2.569829002386726, "learning_rate": 7.717478968864111e-07, "loss": 0.3272, "step": 9105 }, { "epoch": 11.145654834761322, "grad_norm": 1.1535842832355192, "learning_rate": 7.71284875068452e-07, "loss": 0.5643, "step": 9106 }, { "epoch": 11.1468788249694, "grad_norm": 2.8093778229001214, "learning_rate": 7.708219668552566e-07, "loss": 0.4425, "step": 9107 }, { "epoch": 11.148102815177479, "grad_norm": 2.330324877420494, "learning_rate": 7.703591722772457e-07, "loss": 0.6354, "step": 9108 }, { "epoch": 11.149326805385558, "grad_norm": 1.732857296987824, "learning_rate": 7.698964913648327e-07, "loss": 0.4924, "step": 9109 }, { "epoch": 11.150550795593634, "grad_norm": 2.395273694420052, "learning_rate": 7.694339241484231e-07, "loss": 0.4109, "step": 9110 }, { "epoch": 11.151774785801713, "grad_norm": 1.1402521487978383, "learning_rate": 7.68971470658415e-07, "loss": 0.367, "step": 9111 }, { "epoch": 11.152998776009792, "grad_norm": 1.2498823238802579, "learning_rate": 7.685091309251991e-07, "loss": 0.5531, "step": 9112 }, { "epoch": 11.15422276621787, "grad_norm": 2.3409235946449924, "learning_rate": 7.680469049791589e-07, "loss": 0.9805, "step": 9113 }, { "epoch": 11.155446756425949, "grad_norm": 1.8843083817100326, "learning_rate": 7.675847928506691e-07, "loss": 0.5227, "step": 9114 }, { "epoch": 11.156670746634028, "grad_norm": 1.2280877648771822, "learning_rate": 7.671227945701007e-07, "loss": 0.6573, "step": 9115 }, { "epoch": 11.157894736842104, "grad_norm": 1.9848982625545588, "learning_rate": 7.666609101678121e-07, "loss": 0.3564, "step": 9116 }, { "epoch": 11.159118727050183, "grad_norm": 1.7460817593611593, "learning_rate": 7.661991396741569e-07, "loss": 1.5531, "step": 9117 }, { "epoch": 11.160342717258262, "grad_norm": 1.5512508292757279, "learning_rate": 7.657374831194811e-07, "loss": 0.3319, "step": 9118 }, { "epoch": 11.16156670746634, "grad_norm": 1.168089979472258, "learning_rate": 7.652759405341237e-07, "loss": 0.6023, "step": 9119 }, { "epoch": 11.162790697674419, "grad_norm": 2.0344674463992787, "learning_rate": 7.648145119484152e-07, "loss": 0.7586, "step": 9120 }, { "epoch": 11.164014687882498, "grad_norm": 1.747980283228499, "learning_rate": 7.643531973926791e-07, "loss": 0.482, "step": 9121 }, { "epoch": 11.165238678090576, "grad_norm": 1.4712900649859744, "learning_rate": 7.638919968972311e-07, "loss": 1.3046, "step": 9122 }, { "epoch": 11.166462668298653, "grad_norm": 1.153693911464379, "learning_rate": 7.634309104923798e-07, "loss": 0.4453, "step": 9123 }, { "epoch": 11.167686658506732, "grad_norm": 1.1122289361877795, "learning_rate": 7.629699382084257e-07, "loss": 0.6785, "step": 9124 }, { "epoch": 11.16891064871481, "grad_norm": 1.6293839506096226, "learning_rate": 7.625090800756626e-07, "loss": 1.0644, "step": 9125 }, { "epoch": 11.170134638922889, "grad_norm": 3.142294094788809, "learning_rate": 7.620483361243761e-07, "loss": 0.3691, "step": 9126 }, { "epoch": 11.171358629130967, "grad_norm": 2.6980444388815017, "learning_rate": 7.615877063848448e-07, "loss": 0.6525, "step": 9127 }, { "epoch": 11.172582619339046, "grad_norm": 1.481898591842148, "learning_rate": 7.611271908873397e-07, "loss": 0.7476, "step": 9128 }, { "epoch": 11.173806609547123, "grad_norm": 1.8642462988957658, "learning_rate": 7.606667896621236e-07, "loss": 0.5438, "step": 9129 }, { "epoch": 11.175030599755202, "grad_norm": 1.5543913081424334, "learning_rate": 7.602065027394528e-07, "loss": 0.6206, "step": 9130 }, { "epoch": 11.17625458996328, "grad_norm": 1.9162629538149112, "learning_rate": 7.59746330149575e-07, "loss": 0.4541, "step": 9131 }, { "epoch": 11.177478580171359, "grad_norm": 1.6064608708152974, "learning_rate": 7.592862719227323e-07, "loss": 0.5754, "step": 9132 }, { "epoch": 11.178702570379437, "grad_norm": 1.2616563022953007, "learning_rate": 7.588263280891581e-07, "loss": 0.5883, "step": 9133 }, { "epoch": 11.179926560587516, "grad_norm": 1.9146964240642799, "learning_rate": 7.583664986790767e-07, "loss": 0.4771, "step": 9134 }, { "epoch": 11.181150550795593, "grad_norm": 1.0736450356230924, "learning_rate": 7.579067837227064e-07, "loss": 0.5357, "step": 9135 }, { "epoch": 11.182374541003671, "grad_norm": 2.618260070759814, "learning_rate": 7.574471832502595e-07, "loss": 0.387, "step": 9136 }, { "epoch": 11.18359853121175, "grad_norm": 2.207984561315172, "learning_rate": 7.569876972919384e-07, "loss": 0.5316, "step": 9137 }, { "epoch": 11.184822521419829, "grad_norm": 2.473155313859028, "learning_rate": 7.565283258779388e-07, "loss": 0.3911, "step": 9138 }, { "epoch": 11.186046511627907, "grad_norm": 2.435780528158061, "learning_rate": 7.560690690384501e-07, "loss": 0.441, "step": 9139 }, { "epoch": 11.187270501835986, "grad_norm": 2.9536713629994464, "learning_rate": 7.556099268036498e-07, "loss": 0.3597, "step": 9140 }, { "epoch": 11.188494492044065, "grad_norm": 1.5630554290864693, "learning_rate": 7.55150899203714e-07, "loss": 0.9144, "step": 9141 }, { "epoch": 11.189718482252141, "grad_norm": 2.131091107003431, "learning_rate": 7.546919862688076e-07, "loss": 0.543, "step": 9142 }, { "epoch": 11.19094247246022, "grad_norm": 1.710285488802874, "learning_rate": 7.542331880290882e-07, "loss": 0.4134, "step": 9143 }, { "epoch": 11.192166462668299, "grad_norm": 2.2787525268794533, "learning_rate": 7.537745045147066e-07, "loss": 0.3687, "step": 9144 }, { "epoch": 11.193390452876377, "grad_norm": 1.6808967914357382, "learning_rate": 7.533159357558056e-07, "loss": 0.4521, "step": 9145 }, { "epoch": 11.194614443084456, "grad_norm": 2.6093955952367422, "learning_rate": 7.52857481782521e-07, "loss": 0.8137, "step": 9146 }, { "epoch": 11.195838433292534, "grad_norm": 1.963454899072075, "learning_rate": 7.523991426249802e-07, "loss": 0.5188, "step": 9147 }, { "epoch": 11.197062423500611, "grad_norm": 1.5012452829516905, "learning_rate": 7.519409183133033e-07, "loss": 0.6006, "step": 9148 }, { "epoch": 11.19828641370869, "grad_norm": 1.9075276671449244, "learning_rate": 7.514828088776041e-07, "loss": 0.727, "step": 9149 }, { "epoch": 11.199510403916769, "grad_norm": 1.7047109922290788, "learning_rate": 7.510248143479876e-07, "loss": 0.4509, "step": 9150 }, { "epoch": 11.200734394124847, "grad_norm": 1.8531093354105534, "learning_rate": 7.505669347545522e-07, "loss": 0.4685, "step": 9151 }, { "epoch": 11.201958384332926, "grad_norm": 1.6176632290320638, "learning_rate": 7.501091701273861e-07, "loss": 0.472, "step": 9152 }, { "epoch": 11.203182374541004, "grad_norm": 1.1426095234925107, "learning_rate": 7.496515204965723e-07, "loss": 0.6381, "step": 9153 }, { "epoch": 11.204406364749081, "grad_norm": 1.9169721458303726, "learning_rate": 7.491939858921871e-07, "loss": 0.5799, "step": 9154 }, { "epoch": 11.20563035495716, "grad_norm": 1.4332003874143537, "learning_rate": 7.487365663442975e-07, "loss": 0.43, "step": 9155 }, { "epoch": 11.206854345165238, "grad_norm": 2.204336299885022, "learning_rate": 7.482792618829635e-07, "loss": 0.7124, "step": 9156 }, { "epoch": 11.208078335373317, "grad_norm": 3.552237915978077, "learning_rate": 7.478220725382368e-07, "loss": 0.4215, "step": 9157 }, { "epoch": 11.209302325581396, "grad_norm": 2.2082325054828913, "learning_rate": 7.473649983401629e-07, "loss": 0.5635, "step": 9158 }, { "epoch": 11.210526315789474, "grad_norm": 1.6393584012439335, "learning_rate": 7.469080393187786e-07, "loss": 0.4008, "step": 9159 }, { "epoch": 11.211750305997551, "grad_norm": 1.2567536383872553, "learning_rate": 7.464511955041139e-07, "loss": 0.682, "step": 9160 }, { "epoch": 11.21297429620563, "grad_norm": 2.7340917580872452, "learning_rate": 7.459944669261898e-07, "loss": 0.9466, "step": 9161 }, { "epoch": 11.214198286413708, "grad_norm": 1.4058596927258034, "learning_rate": 7.455378536150234e-07, "loss": 0.8703, "step": 9162 }, { "epoch": 11.215422276621787, "grad_norm": 1.8665857222648208, "learning_rate": 7.45081355600619e-07, "loss": 0.6411, "step": 9163 }, { "epoch": 11.216646266829866, "grad_norm": 1.6787495099736394, "learning_rate": 7.44624972912977e-07, "loss": 0.6343, "step": 9164 }, { "epoch": 11.217870257037944, "grad_norm": 1.7791783845458926, "learning_rate": 7.441687055820893e-07, "loss": 0.6262, "step": 9165 }, { "epoch": 11.219094247246023, "grad_norm": 3.5719544405103507, "learning_rate": 7.437125536379389e-07, "loss": 0.758, "step": 9166 }, { "epoch": 11.2203182374541, "grad_norm": 2.144505497733659, "learning_rate": 7.432565171105041e-07, "loss": 1.1171, "step": 9167 }, { "epoch": 11.221542227662178, "grad_norm": 2.3872502001618265, "learning_rate": 7.428005960297541e-07, "loss": 0.4658, "step": 9168 }, { "epoch": 11.222766217870257, "grad_norm": 1.847329083113318, "learning_rate": 7.423447904256489e-07, "loss": 1.0824, "step": 9169 }, { "epoch": 11.223990208078336, "grad_norm": 2.2244255178768264, "learning_rate": 7.418891003281419e-07, "loss": 0.6726, "step": 9170 }, { "epoch": 11.225214198286414, "grad_norm": 3.0257141022428806, "learning_rate": 7.414335257671814e-07, "loss": 0.3409, "step": 9171 }, { "epoch": 11.226438188494493, "grad_norm": 2.6617880999155075, "learning_rate": 7.409780667727051e-07, "loss": 0.398, "step": 9172 }, { "epoch": 11.22766217870257, "grad_norm": 1.4008937151922232, "learning_rate": 7.405227233746439e-07, "loss": 0.7739, "step": 9173 }, { "epoch": 11.228886168910648, "grad_norm": 2.174140888281186, "learning_rate": 7.400674956029216e-07, "loss": 0.4332, "step": 9174 }, { "epoch": 11.230110159118727, "grad_norm": 1.295026517459073, "learning_rate": 7.396123834874541e-07, "loss": 0.4748, "step": 9175 }, { "epoch": 11.231334149326806, "grad_norm": 1.6189994835044081, "learning_rate": 7.391573870581492e-07, "loss": 0.5668, "step": 9176 }, { "epoch": 11.232558139534884, "grad_norm": 1.6400974007676203, "learning_rate": 7.387025063449082e-07, "loss": 1.2916, "step": 9177 }, { "epoch": 11.233782129742963, "grad_norm": 1.4401681692308206, "learning_rate": 7.382477413776237e-07, "loss": 1.1474, "step": 9178 }, { "epoch": 11.23500611995104, "grad_norm": 1.123553867978941, "learning_rate": 7.377930921861806e-07, "loss": 0.494, "step": 9179 }, { "epoch": 11.236230110159118, "grad_norm": 2.1241558425948175, "learning_rate": 7.373385588004592e-07, "loss": 1.201, "step": 9180 }, { "epoch": 11.237454100367197, "grad_norm": 2.6032484453652613, "learning_rate": 7.368841412503275e-07, "loss": 0.3166, "step": 9181 }, { "epoch": 11.238678090575275, "grad_norm": 1.516883684419195, "learning_rate": 7.364298395656483e-07, "loss": 1.3987, "step": 9182 }, { "epoch": 11.239902080783354, "grad_norm": 1.1605717128897368, "learning_rate": 7.359756537762766e-07, "loss": 0.5334, "step": 9183 }, { "epoch": 11.241126070991433, "grad_norm": 1.3880649547828297, "learning_rate": 7.355215839120608e-07, "loss": 0.4982, "step": 9184 }, { "epoch": 11.24235006119951, "grad_norm": 1.9746009760310927, "learning_rate": 7.350676300028403e-07, "loss": 0.7242, "step": 9185 }, { "epoch": 11.243574051407588, "grad_norm": 1.964830344463645, "learning_rate": 7.346137920784477e-07, "loss": 0.4557, "step": 9186 }, { "epoch": 11.244798041615667, "grad_norm": 1.3535945993292509, "learning_rate": 7.341600701687058e-07, "loss": 0.6225, "step": 9187 }, { "epoch": 11.246022031823745, "grad_norm": 2.0156665478079185, "learning_rate": 7.33706464303433e-07, "loss": 0.9995, "step": 9188 }, { "epoch": 11.247246022031824, "grad_norm": 1.65450977795214, "learning_rate": 7.332529745124386e-07, "loss": 1.6339, "step": 9189 }, { "epoch": 11.248470012239903, "grad_norm": 1.853633702127774, "learning_rate": 7.32799600825524e-07, "loss": 0.612, "step": 9190 }, { "epoch": 11.249694002447981, "grad_norm": 1.7687767927752847, "learning_rate": 7.323463432724831e-07, "loss": 0.5203, "step": 9191 }, { "epoch": 11.250917992656058, "grad_norm": 2.164772063946902, "learning_rate": 7.318932018831024e-07, "loss": 0.391, "step": 9192 }, { "epoch": 11.252141982864137, "grad_norm": 1.2557300534939466, "learning_rate": 7.314401766871607e-07, "loss": 1.0389, "step": 9193 }, { "epoch": 11.253365973072215, "grad_norm": 2.189965988523399, "learning_rate": 7.309872677144291e-07, "loss": 0.5955, "step": 9194 }, { "epoch": 11.254589963280294, "grad_norm": 1.2826601182778723, "learning_rate": 7.305344749946714e-07, "loss": 0.6445, "step": 9195 }, { "epoch": 11.255813953488373, "grad_norm": 1.2707907329611157, "learning_rate": 7.300817985576422e-07, "loss": 0.8924, "step": 9196 }, { "epoch": 11.257037943696451, "grad_norm": 1.4735894090703174, "learning_rate": 7.296292384330924e-07, "loss": 0.6637, "step": 9197 }, { "epoch": 11.258261933904528, "grad_norm": 1.626665321068316, "learning_rate": 7.2917679465076e-07, "loss": 0.5494, "step": 9198 }, { "epoch": 11.259485924112607, "grad_norm": 1.9587523873809236, "learning_rate": 7.28724467240379e-07, "loss": 0.4567, "step": 9199 }, { "epoch": 11.260709914320685, "grad_norm": 1.395759840563724, "learning_rate": 7.282722562316735e-07, "loss": 1.5626, "step": 9200 }, { "epoch": 11.261933904528764, "grad_norm": 1.5934285031704438, "learning_rate": 7.278201616543631e-07, "loss": 0.6878, "step": 9201 }, { "epoch": 11.263157894736842, "grad_norm": 2.1316361555822154, "learning_rate": 7.273681835381569e-07, "loss": 1.0999, "step": 9202 }, { "epoch": 11.264381884944921, "grad_norm": 2.047506549009432, "learning_rate": 7.26916321912757e-07, "loss": 0.3168, "step": 9203 }, { "epoch": 11.265605875152998, "grad_norm": 2.3429887012076076, "learning_rate": 7.26464576807859e-07, "loss": 0.9683, "step": 9204 }, { "epoch": 11.266829865361077, "grad_norm": 1.8865114985989797, "learning_rate": 7.260129482531477e-07, "loss": 0.609, "step": 9205 }, { "epoch": 11.268053855569155, "grad_norm": 2.4163214580647634, "learning_rate": 7.255614362783048e-07, "loss": 0.543, "step": 9206 }, { "epoch": 11.269277845777234, "grad_norm": 2.3934640945231846, "learning_rate": 7.251100409130008e-07, "loss": 0.416, "step": 9207 }, { "epoch": 11.270501835985312, "grad_norm": 1.58977672184993, "learning_rate": 7.246587621869003e-07, "loss": 0.351, "step": 9208 }, { "epoch": 11.271725826193391, "grad_norm": 2.628137159952994, "learning_rate": 7.242076001296594e-07, "loss": 0.9276, "step": 9209 }, { "epoch": 11.27294981640147, "grad_norm": 1.0762399565429819, "learning_rate": 7.237565547709266e-07, "loss": 0.5452, "step": 9210 }, { "epoch": 11.274173806609546, "grad_norm": 3.046337802339018, "learning_rate": 7.233056261403434e-07, "loss": 0.3155, "step": 9211 }, { "epoch": 11.275397796817625, "grad_norm": 2.0005295084866375, "learning_rate": 7.228548142675429e-07, "loss": 0.5441, "step": 9212 }, { "epoch": 11.276621787025704, "grad_norm": 1.358163587024687, "learning_rate": 7.224041191821499e-07, "loss": 0.4078, "step": 9213 }, { "epoch": 11.277845777233782, "grad_norm": 1.0591034443143093, "learning_rate": 7.219535409137843e-07, "loss": 0.5924, "step": 9214 }, { "epoch": 11.279069767441861, "grad_norm": 3.130781399445709, "learning_rate": 7.215030794920561e-07, "loss": 0.4567, "step": 9215 }, { "epoch": 11.28029375764994, "grad_norm": 2.4149238868241656, "learning_rate": 7.210527349465662e-07, "loss": 0.5885, "step": 9216 }, { "epoch": 11.281517747858016, "grad_norm": 3.3263824217706914, "learning_rate": 7.206025073069112e-07, "loss": 0.5176, "step": 9217 }, { "epoch": 11.282741738066095, "grad_norm": 1.200507949084988, "learning_rate": 7.201523966026769e-07, "loss": 0.6228, "step": 9218 }, { "epoch": 11.283965728274174, "grad_norm": 2.3254970094874405, "learning_rate": 7.197024028634447e-07, "loss": 1.4033, "step": 9219 }, { "epoch": 11.285189718482252, "grad_norm": 1.3277088100389418, "learning_rate": 7.192525261187858e-07, "loss": 0.7712, "step": 9220 }, { "epoch": 11.286413708690331, "grad_norm": 1.4289192678138443, "learning_rate": 7.188027663982642e-07, "loss": 0.4835, "step": 9221 }, { "epoch": 11.28763769889841, "grad_norm": 2.0537694934047264, "learning_rate": 7.183531237314364e-07, "loss": 1.0321, "step": 9222 }, { "epoch": 11.288861689106486, "grad_norm": 1.7661501941189586, "learning_rate": 7.179035981478519e-07, "loss": 1.7992, "step": 9223 }, { "epoch": 11.290085679314565, "grad_norm": 2.2568519707800605, "learning_rate": 7.174541896770512e-07, "loss": 0.3, "step": 9224 }, { "epoch": 11.291309669522644, "grad_norm": 1.2173106595406769, "learning_rate": 7.170048983485681e-07, "loss": 0.5702, "step": 9225 }, { "epoch": 11.292533659730722, "grad_norm": 1.9271085523600158, "learning_rate": 7.16555724191928e-07, "loss": 0.4589, "step": 9226 }, { "epoch": 11.2937576499388, "grad_norm": 1.8319674704634554, "learning_rate": 7.161066672366493e-07, "loss": 1.4968, "step": 9227 }, { "epoch": 11.29498164014688, "grad_norm": 2.126143438838529, "learning_rate": 7.156577275122423e-07, "loss": 0.611, "step": 9228 }, { "epoch": 11.296205630354958, "grad_norm": 1.9567019535296075, "learning_rate": 7.152089050482097e-07, "loss": 0.3936, "step": 9229 }, { "epoch": 11.297429620563035, "grad_norm": 1.3403997662206386, "learning_rate": 7.147601998740464e-07, "loss": 0.5882, "step": 9230 }, { "epoch": 11.298653610771114, "grad_norm": 1.616725754870654, "learning_rate": 7.143116120192386e-07, "loss": 0.4932, "step": 9231 }, { "epoch": 11.299877600979192, "grad_norm": 1.1790510775569667, "learning_rate": 7.138631415132677e-07, "loss": 0.6086, "step": 9232 }, { "epoch": 11.30110159118727, "grad_norm": 1.479883386081326, "learning_rate": 7.134147883856055e-07, "loss": 1.1139, "step": 9233 }, { "epoch": 11.30232558139535, "grad_norm": 2.2427046828883177, "learning_rate": 7.129665526657145e-07, "loss": 0.4472, "step": 9234 }, { "epoch": 11.303549571603428, "grad_norm": 1.429985698959392, "learning_rate": 7.125184343830513e-07, "loss": 0.5136, "step": 9235 }, { "epoch": 11.304773561811505, "grad_norm": 3.2859486962040343, "learning_rate": 7.120704335670656e-07, "loss": 0.4471, "step": 9236 }, { "epoch": 11.305997552019583, "grad_norm": 1.584146874112754, "learning_rate": 7.116225502471983e-07, "loss": 0.4031, "step": 9237 }, { "epoch": 11.307221542227662, "grad_norm": 1.4774946297530975, "learning_rate": 7.111747844528818e-07, "loss": 0.4996, "step": 9238 }, { "epoch": 11.30844553243574, "grad_norm": 3.1950924773787652, "learning_rate": 7.107271362135431e-07, "loss": 0.404, "step": 9239 }, { "epoch": 11.30966952264382, "grad_norm": 1.1187894276931432, "learning_rate": 7.102796055585975e-07, "loss": 0.3856, "step": 9240 }, { "epoch": 11.310893512851898, "grad_norm": 2.236064202470563, "learning_rate": 7.09832192517457e-07, "loss": 0.5745, "step": 9241 }, { "epoch": 11.312117503059975, "grad_norm": 2.4889539003623096, "learning_rate": 7.093848971195235e-07, "loss": 0.4829, "step": 9242 }, { "epoch": 11.313341493268053, "grad_norm": 2.0113920492199044, "learning_rate": 7.089377193941918e-07, "loss": 0.4126, "step": 9243 }, { "epoch": 11.314565483476132, "grad_norm": 1.4575247010121124, "learning_rate": 7.084906593708483e-07, "loss": 0.5274, "step": 9244 }, { "epoch": 11.31578947368421, "grad_norm": 1.6967011959221514, "learning_rate": 7.080437170788723e-07, "loss": 1.4492, "step": 9245 }, { "epoch": 11.31701346389229, "grad_norm": 1.6556376058000686, "learning_rate": 7.075968925476353e-07, "loss": 0.6989, "step": 9246 }, { "epoch": 11.318237454100368, "grad_norm": 2.0108899132862255, "learning_rate": 7.071501858065008e-07, "loss": 0.5107, "step": 9247 }, { "epoch": 11.319461444308445, "grad_norm": 1.9845941203102704, "learning_rate": 7.06703596884824e-07, "loss": 0.4858, "step": 9248 }, { "epoch": 11.320685434516523, "grad_norm": 1.6660343767372723, "learning_rate": 7.062571258119547e-07, "loss": 0.5911, "step": 9249 }, { "epoch": 11.321909424724602, "grad_norm": 1.3579458820539614, "learning_rate": 7.058107726172325e-07, "loss": 0.6761, "step": 9250 }, { "epoch": 11.32313341493268, "grad_norm": 1.6401436283944666, "learning_rate": 7.053645373299909e-07, "loss": 1.5544, "step": 9251 }, { "epoch": 11.32435740514076, "grad_norm": 1.4415503443343662, "learning_rate": 7.049184199795531e-07, "loss": 0.3641, "step": 9252 }, { "epoch": 11.325581395348838, "grad_norm": 1.6748704312231806, "learning_rate": 7.044724205952366e-07, "loss": 0.6145, "step": 9253 }, { "epoch": 11.326805385556916, "grad_norm": 3.3291981179399035, "learning_rate": 7.040265392063522e-07, "loss": 0.3392, "step": 9254 }, { "epoch": 11.328029375764993, "grad_norm": 2.745298199409814, "learning_rate": 7.035807758422006e-07, "loss": 0.4001, "step": 9255 }, { "epoch": 11.329253365973072, "grad_norm": 1.6478941029105787, "learning_rate": 7.031351305320761e-07, "loss": 0.5972, "step": 9256 }, { "epoch": 11.33047735618115, "grad_norm": 1.8171200256913236, "learning_rate": 7.026896033052647e-07, "loss": 0.6729, "step": 9257 }, { "epoch": 11.331701346389229, "grad_norm": 1.539496196105262, "learning_rate": 7.022441941910446e-07, "loss": 0.5241, "step": 9258 }, { "epoch": 11.332925336597308, "grad_norm": 2.418844382352003, "learning_rate": 7.017989032186867e-07, "loss": 0.4781, "step": 9259 }, { "epoch": 11.334149326805386, "grad_norm": 2.6292267949732278, "learning_rate": 7.013537304174537e-07, "loss": 0.4685, "step": 9260 }, { "epoch": 11.335373317013463, "grad_norm": 1.3485552864058679, "learning_rate": 7.009086758166001e-07, "loss": 0.5812, "step": 9261 }, { "epoch": 11.336597307221542, "grad_norm": 1.1445512436033012, "learning_rate": 7.004637394453756e-07, "loss": 0.6606, "step": 9262 }, { "epoch": 11.33782129742962, "grad_norm": 1.7408587670652917, "learning_rate": 7.000189213330171e-07, "loss": 0.994, "step": 9263 }, { "epoch": 11.339045287637699, "grad_norm": 2.438142777804475, "learning_rate": 6.995742215087578e-07, "loss": 0.3708, "step": 9264 }, { "epoch": 11.340269277845778, "grad_norm": 2.504701754848156, "learning_rate": 6.991296400018211e-07, "loss": 0.5173, "step": 9265 }, { "epoch": 11.341493268053856, "grad_norm": 1.5316765298500248, "learning_rate": 6.986851768414227e-07, "loss": 0.4893, "step": 9266 }, { "epoch": 11.342717258261933, "grad_norm": 1.9461252815952585, "learning_rate": 6.982408320567726e-07, "loss": 1.2106, "step": 9267 }, { "epoch": 11.343941248470012, "grad_norm": 2.61650354750077, "learning_rate": 6.977966056770719e-07, "loss": 0.3413, "step": 9268 }, { "epoch": 11.34516523867809, "grad_norm": 1.9296089579297702, "learning_rate": 6.973524977315114e-07, "loss": 0.4785, "step": 9269 }, { "epoch": 11.346389228886169, "grad_norm": 2.563334358725834, "learning_rate": 6.969085082492763e-07, "loss": 1.2763, "step": 9270 }, { "epoch": 11.347613219094248, "grad_norm": 1.5490257736661406, "learning_rate": 6.964646372595457e-07, "loss": 1.5693, "step": 9271 }, { "epoch": 11.348837209302326, "grad_norm": 1.0504999307705611, "learning_rate": 6.960208847914885e-07, "loss": 0.5832, "step": 9272 }, { "epoch": 11.350061199510403, "grad_norm": 1.5001011555471966, "learning_rate": 6.955772508742661e-07, "loss": 0.6149, "step": 9273 }, { "epoch": 11.351285189718482, "grad_norm": 2.082757985889313, "learning_rate": 6.951337355370328e-07, "loss": 0.4982, "step": 9274 }, { "epoch": 11.35250917992656, "grad_norm": 1.397001870090096, "learning_rate": 6.946903388089343e-07, "loss": 0.5416, "step": 9275 }, { "epoch": 11.353733170134639, "grad_norm": 1.6740243079566808, "learning_rate": 6.942470607191099e-07, "loss": 0.6964, "step": 9276 }, { "epoch": 11.354957160342718, "grad_norm": 1.51939938291957, "learning_rate": 6.938039012966894e-07, "loss": 1.3093, "step": 9277 }, { "epoch": 11.356181150550796, "grad_norm": 2.1098612718995247, "learning_rate": 6.933608605707959e-07, "loss": 0.5306, "step": 9278 }, { "epoch": 11.357405140758875, "grad_norm": 2.324104354015338, "learning_rate": 6.929179385705434e-07, "loss": 0.3697, "step": 9279 }, { "epoch": 11.358629130966952, "grad_norm": 1.0628003048229993, "learning_rate": 6.924751353250416e-07, "loss": 0.5238, "step": 9280 }, { "epoch": 11.35985312117503, "grad_norm": 1.6323572505380874, "learning_rate": 6.920324508633878e-07, "loss": 0.4637, "step": 9281 }, { "epoch": 11.361077111383109, "grad_norm": 1.5893445870669265, "learning_rate": 6.915898852146741e-07, "loss": 0.6266, "step": 9282 }, { "epoch": 11.362301101591187, "grad_norm": 3.4202677545557543, "learning_rate": 6.911474384079833e-07, "loss": 0.3468, "step": 9283 }, { "epoch": 11.363525091799266, "grad_norm": 1.4429774597057228, "learning_rate": 6.907051104723933e-07, "loss": 0.5362, "step": 9284 }, { "epoch": 11.364749082007345, "grad_norm": 1.813095750432886, "learning_rate": 6.902629014369714e-07, "loss": 1.5402, "step": 9285 }, { "epoch": 11.365973072215422, "grad_norm": 1.6739840417757066, "learning_rate": 6.898208113307784e-07, "loss": 0.3829, "step": 9286 }, { "epoch": 11.3671970624235, "grad_norm": 2.3730018276317026, "learning_rate": 6.893788401828649e-07, "loss": 0.4587, "step": 9287 }, { "epoch": 11.368421052631579, "grad_norm": 2.0577917038363496, "learning_rate": 6.889369880222776e-07, "loss": 0.5244, "step": 9288 }, { "epoch": 11.369645042839657, "grad_norm": 1.8321011967334995, "learning_rate": 6.88495254878053e-07, "loss": 0.5637, "step": 9289 }, { "epoch": 11.370869033047736, "grad_norm": 1.9997078362009568, "learning_rate": 6.880536407792199e-07, "loss": 0.3856, "step": 9290 }, { "epoch": 11.372093023255815, "grad_norm": 1.5212789368544173, "learning_rate": 6.876121457547996e-07, "loss": 0.7921, "step": 9291 }, { "epoch": 11.373317013463891, "grad_norm": 1.8689514556691684, "learning_rate": 6.871707698338057e-07, "loss": 0.4493, "step": 9292 }, { "epoch": 11.37454100367197, "grad_norm": 0.5860894790672225, "learning_rate": 6.867295130452434e-07, "loss": 0.1288, "step": 9293 }, { "epoch": 11.375764993880049, "grad_norm": 2.5729602225669703, "learning_rate": 6.862883754181107e-07, "loss": 0.4554, "step": 9294 }, { "epoch": 11.376988984088127, "grad_norm": 1.5944769672047863, "learning_rate": 6.858473569813976e-07, "loss": 0.3766, "step": 9295 }, { "epoch": 11.378212974296206, "grad_norm": 1.8328470671983572, "learning_rate": 6.854064577640851e-07, "loss": 0.4567, "step": 9296 }, { "epoch": 11.379436964504285, "grad_norm": 3.366510416089552, "learning_rate": 6.849656777951493e-07, "loss": 0.3765, "step": 9297 }, { "epoch": 11.380660954712361, "grad_norm": 1.36816397449437, "learning_rate": 6.845250171035566e-07, "loss": 0.4406, "step": 9298 }, { "epoch": 11.38188494492044, "grad_norm": 3.315066917680868, "learning_rate": 6.840844757182644e-07, "loss": 0.403, "step": 9299 }, { "epoch": 11.383108935128519, "grad_norm": 1.8403379772981971, "learning_rate": 6.836440536682226e-07, "loss": 0.4609, "step": 9300 }, { "epoch": 11.384332925336597, "grad_norm": 1.4349016803106063, "learning_rate": 6.832037509823763e-07, "loss": 0.5417, "step": 9301 }, { "epoch": 11.385556915544676, "grad_norm": 1.981789330852303, "learning_rate": 6.827635676896597e-07, "loss": 0.3908, "step": 9302 }, { "epoch": 11.386780905752754, "grad_norm": 1.4544851213683228, "learning_rate": 6.823235038189999e-07, "loss": 0.5288, "step": 9303 }, { "epoch": 11.388004895960833, "grad_norm": 2.365766202134165, "learning_rate": 6.81883559399317e-07, "loss": 0.494, "step": 9304 }, { "epoch": 11.38922888616891, "grad_norm": 1.8490652727922594, "learning_rate": 6.814437344595204e-07, "loss": 1.2295, "step": 9305 }, { "epoch": 11.390452876376989, "grad_norm": 1.8432405822810964, "learning_rate": 6.810040290285161e-07, "loss": 1.3025, "step": 9306 }, { "epoch": 11.391676866585067, "grad_norm": 2.78969850879817, "learning_rate": 6.805644431351987e-07, "loss": 1.1758, "step": 9307 }, { "epoch": 11.392900856793146, "grad_norm": 2.489499023623702, "learning_rate": 6.801249768084567e-07, "loss": 0.7093, "step": 9308 }, { "epoch": 11.394124847001224, "grad_norm": 1.2427983658380841, "learning_rate": 6.796856300771698e-07, "loss": 0.5366, "step": 9309 }, { "epoch": 11.395348837209303, "grad_norm": 1.5839588848534054, "learning_rate": 6.792464029702104e-07, "loss": 0.5653, "step": 9310 }, { "epoch": 11.39657282741738, "grad_norm": 2.183935233561735, "learning_rate": 6.788072955164429e-07, "loss": 1.1275, "step": 9311 }, { "epoch": 11.397796817625458, "grad_norm": 2.6073809612114474, "learning_rate": 6.783683077447239e-07, "loss": 0.5466, "step": 9312 }, { "epoch": 11.399020807833537, "grad_norm": 1.396541699922828, "learning_rate": 6.779294396839015e-07, "loss": 0.4484, "step": 9313 }, { "epoch": 11.400244798041616, "grad_norm": 2.8044411711207724, "learning_rate": 6.774906913628173e-07, "loss": 0.442, "step": 9314 }, { "epoch": 11.401468788249694, "grad_norm": 2.0390283123857063, "learning_rate": 6.770520628103048e-07, "loss": 0.7287, "step": 9315 }, { "epoch": 11.402692778457773, "grad_norm": 1.7418741331760392, "learning_rate": 6.766135540551871e-07, "loss": 0.4287, "step": 9316 }, { "epoch": 11.403916768665852, "grad_norm": 0.9766012577225778, "learning_rate": 6.761751651262827e-07, "loss": 0.4259, "step": 9317 }, { "epoch": 11.405140758873928, "grad_norm": 1.3795023581344423, "learning_rate": 6.757368960523996e-07, "loss": 0.67, "step": 9318 }, { "epoch": 11.406364749082007, "grad_norm": 2.9815500578584464, "learning_rate": 6.752987468623412e-07, "loss": 0.7354, "step": 9319 }, { "epoch": 11.407588739290086, "grad_norm": 2.0960857193191114, "learning_rate": 6.748607175848998e-07, "loss": 1.173, "step": 9320 }, { "epoch": 11.408812729498164, "grad_norm": 2.8223928325938297, "learning_rate": 6.744228082488616e-07, "loss": 0.5604, "step": 9321 }, { "epoch": 11.410036719706243, "grad_norm": 1.7528478969582482, "learning_rate": 6.739850188830038e-07, "loss": 0.5262, "step": 9322 }, { "epoch": 11.411260709914322, "grad_norm": 1.8434779777186812, "learning_rate": 6.735473495160969e-07, "loss": 1.0168, "step": 9323 }, { "epoch": 11.412484700122398, "grad_norm": 1.2691214575206564, "learning_rate": 6.731098001769023e-07, "loss": 0.7658, "step": 9324 }, { "epoch": 11.413708690330477, "grad_norm": 1.1871344284104646, "learning_rate": 6.726723708941745e-07, "loss": 0.7437, "step": 9325 }, { "epoch": 11.414932680538556, "grad_norm": 1.4489109913176736, "learning_rate": 6.722350616966591e-07, "loss": 0.9457, "step": 9326 }, { "epoch": 11.416156670746634, "grad_norm": 2.5199606476496235, "learning_rate": 6.717978726130966e-07, "loss": 0.5986, "step": 9327 }, { "epoch": 11.417380660954713, "grad_norm": 2.3108250341800822, "learning_rate": 6.713608036722149e-07, "loss": 0.5447, "step": 9328 }, { "epoch": 11.418604651162791, "grad_norm": 2.0685717782707655, "learning_rate": 6.709238549027375e-07, "loss": 0.5742, "step": 9329 }, { "epoch": 11.419828641370868, "grad_norm": 2.664088766293819, "learning_rate": 6.704870263333791e-07, "loss": 0.4217, "step": 9330 }, { "epoch": 11.421052631578947, "grad_norm": 2.537077886017719, "learning_rate": 6.700503179928458e-07, "loss": 1.0211, "step": 9331 }, { "epoch": 11.422276621787026, "grad_norm": 2.259080738592429, "learning_rate": 6.696137299098376e-07, "loss": 0.463, "step": 9332 }, { "epoch": 11.423500611995104, "grad_norm": 1.3577656594067957, "learning_rate": 6.691772621130457e-07, "loss": 0.5528, "step": 9333 }, { "epoch": 11.424724602203183, "grad_norm": 2.0495467986219253, "learning_rate": 6.687409146311519e-07, "loss": 0.5223, "step": 9334 }, { "epoch": 11.425948592411261, "grad_norm": 1.019803359664632, "learning_rate": 6.683046874928309e-07, "loss": 0.4455, "step": 9335 }, { "epoch": 11.427172582619338, "grad_norm": 1.5276721896186174, "learning_rate": 6.678685807267518e-07, "loss": 1.0796, "step": 9336 }, { "epoch": 11.428396572827417, "grad_norm": 2.6436119130361404, "learning_rate": 6.674325943615728e-07, "loss": 0.9097, "step": 9337 }, { "epoch": 11.429620563035495, "grad_norm": 1.5486684725355158, "learning_rate": 6.669967284259455e-07, "loss": 0.7146, "step": 9338 }, { "epoch": 11.430844553243574, "grad_norm": 1.5736753520620796, "learning_rate": 6.665609829485134e-07, "loss": 1.2112, "step": 9339 }, { "epoch": 11.432068543451653, "grad_norm": 2.184064177381339, "learning_rate": 6.661253579579121e-07, "loss": 0.9172, "step": 9340 }, { "epoch": 11.433292533659731, "grad_norm": 2.733677004674629, "learning_rate": 6.656898534827694e-07, "loss": 0.4599, "step": 9341 }, { "epoch": 11.43451652386781, "grad_norm": 1.5137001092022975, "learning_rate": 6.652544695517047e-07, "loss": 0.6122, "step": 9342 }, { "epoch": 11.435740514075887, "grad_norm": 1.8437874592768346, "learning_rate": 6.648192061933301e-07, "loss": 0.5806, "step": 9343 }, { "epoch": 11.436964504283965, "grad_norm": 1.4771095207972567, "learning_rate": 6.643840634362492e-07, "loss": 0.5904, "step": 9344 }, { "epoch": 11.438188494492044, "grad_norm": 1.141457399416088, "learning_rate": 6.639490413090585e-07, "loss": 0.4145, "step": 9345 }, { "epoch": 11.439412484700123, "grad_norm": 2.6422063932865876, "learning_rate": 6.635141398403455e-07, "loss": 0.3912, "step": 9346 }, { "epoch": 11.440636474908201, "grad_norm": 2.6793064550060053, "learning_rate": 6.630793590586906e-07, "loss": 0.7855, "step": 9347 }, { "epoch": 11.44186046511628, "grad_norm": 3.0440009967159574, "learning_rate": 6.626446989926652e-07, "loss": 0.6914, "step": 9348 }, { "epoch": 11.443084455324357, "grad_norm": 1.9529672859514824, "learning_rate": 6.622101596708349e-07, "loss": 0.6203, "step": 9349 }, { "epoch": 11.444308445532435, "grad_norm": 1.7416631964775362, "learning_rate": 6.617757411217554e-07, "loss": 0.4376, "step": 9350 }, { "epoch": 11.445532435740514, "grad_norm": 1.2058856693433617, "learning_rate": 6.61341443373976e-07, "loss": 0.8404, "step": 9351 }, { "epoch": 11.446756425948593, "grad_norm": 2.613322319471002, "learning_rate": 6.609072664560348e-07, "loss": 0.4262, "step": 9352 }, { "epoch": 11.447980416156671, "grad_norm": 1.131205544125502, "learning_rate": 6.604732103964661e-07, "loss": 0.4682, "step": 9353 }, { "epoch": 11.44920440636475, "grad_norm": 2.5730355731953307, "learning_rate": 6.600392752237945e-07, "loss": 0.2286, "step": 9354 }, { "epoch": 11.450428396572827, "grad_norm": 1.8761472128530874, "learning_rate": 6.596054609665359e-07, "loss": 0.9226, "step": 9355 }, { "epoch": 11.451652386780905, "grad_norm": 2.4582676566269335, "learning_rate": 6.591717676531991e-07, "loss": 0.4208, "step": 9356 }, { "epoch": 11.452876376988984, "grad_norm": 1.7868558556866694, "learning_rate": 6.587381953122854e-07, "loss": 0.4703, "step": 9357 }, { "epoch": 11.454100367197062, "grad_norm": 2.702666971179652, "learning_rate": 6.583047439722867e-07, "loss": 0.7219, "step": 9358 }, { "epoch": 11.455324357405141, "grad_norm": 1.8576364512876349, "learning_rate": 6.578714136616887e-07, "loss": 0.5408, "step": 9359 }, { "epoch": 11.45654834761322, "grad_norm": 1.1406870415609436, "learning_rate": 6.574382044089675e-07, "loss": 0.595, "step": 9360 }, { "epoch": 11.457772337821297, "grad_norm": 1.9511794120514112, "learning_rate": 6.57005116242592e-07, "loss": 0.5805, "step": 9361 }, { "epoch": 11.458996328029375, "grad_norm": 1.4294897263028947, "learning_rate": 6.565721491910245e-07, "loss": 1.2793, "step": 9362 }, { "epoch": 11.460220318237454, "grad_norm": 1.4461134639400834, "learning_rate": 6.561393032827165e-07, "loss": 0.7954, "step": 9363 }, { "epoch": 11.461444308445532, "grad_norm": 1.7847648027413354, "learning_rate": 6.557065785461137e-07, "loss": 0.5139, "step": 9364 }, { "epoch": 11.462668298653611, "grad_norm": 1.642418468034481, "learning_rate": 6.552739750096518e-07, "loss": 0.2935, "step": 9365 }, { "epoch": 11.46389228886169, "grad_norm": 1.7144186977569338, "learning_rate": 6.548414927017622e-07, "loss": 0.3684, "step": 9366 }, { "epoch": 11.465116279069768, "grad_norm": 2.566570151228328, "learning_rate": 6.544091316508647e-07, "loss": 0.6024, "step": 9367 }, { "epoch": 11.466340269277845, "grad_norm": 2.6190794824893993, "learning_rate": 6.539768918853729e-07, "loss": 0.8518, "step": 9368 }, { "epoch": 11.467564259485924, "grad_norm": 1.4362923647664918, "learning_rate": 6.535447734336922e-07, "loss": 0.6914, "step": 9369 }, { "epoch": 11.468788249694002, "grad_norm": 2.5567207056089805, "learning_rate": 6.531127763242182e-07, "loss": 0.4741, "step": 9370 }, { "epoch": 11.470012239902081, "grad_norm": 0.9368338150378875, "learning_rate": 6.526809005853421e-07, "loss": 0.463, "step": 9371 }, { "epoch": 11.47123623011016, "grad_norm": 1.357493197167332, "learning_rate": 6.522491462454445e-07, "loss": 0.5502, "step": 9372 }, { "epoch": 11.472460220318238, "grad_norm": 1.697535879445398, "learning_rate": 6.518175133328985e-07, "loss": 0.4551, "step": 9373 }, { "epoch": 11.473684210526315, "grad_norm": 2.076418045952692, "learning_rate": 6.513860018760698e-07, "loss": 0.6527, "step": 9374 }, { "epoch": 11.474908200734394, "grad_norm": 2.9402604494363027, "learning_rate": 6.509546119033153e-07, "loss": 0.6829, "step": 9375 }, { "epoch": 11.476132190942472, "grad_norm": 1.5866154278013265, "learning_rate": 6.505233434429847e-07, "loss": 0.5667, "step": 9376 }, { "epoch": 11.477356181150551, "grad_norm": 1.9411699637088407, "learning_rate": 6.500921965234191e-07, "loss": 0.5876, "step": 9377 }, { "epoch": 11.47858017135863, "grad_norm": 1.6885148853326255, "learning_rate": 6.496611711729514e-07, "loss": 1.2276, "step": 9378 }, { "epoch": 11.479804161566708, "grad_norm": 2.508540547839427, "learning_rate": 6.492302674199083e-07, "loss": 0.5117, "step": 9379 }, { "epoch": 11.481028151774785, "grad_norm": 1.5704231515551814, "learning_rate": 6.487994852926072e-07, "loss": 1.1615, "step": 9380 }, { "epoch": 11.482252141982864, "grad_norm": 1.816472301899404, "learning_rate": 6.483688248193562e-07, "loss": 0.6264, "step": 9381 }, { "epoch": 11.483476132190942, "grad_norm": 2.0342517102019415, "learning_rate": 6.479382860284572e-07, "loss": 0.9581, "step": 9382 }, { "epoch": 11.48470012239902, "grad_norm": 1.9488865219991744, "learning_rate": 6.475078689482029e-07, "loss": 0.6482, "step": 9383 }, { "epoch": 11.4859241126071, "grad_norm": 1.5686318557573427, "learning_rate": 6.470775736068805e-07, "loss": 0.4593, "step": 9384 }, { "epoch": 11.487148102815178, "grad_norm": 1.863604772546068, "learning_rate": 6.466474000327663e-07, "loss": 0.2897, "step": 9385 }, { "epoch": 11.488372093023255, "grad_norm": 2.671725793882436, "learning_rate": 6.462173482541306e-07, "loss": 0.4854, "step": 9386 }, { "epoch": 11.489596083231334, "grad_norm": 1.3471363828147445, "learning_rate": 6.457874182992327e-07, "loss": 0.3924, "step": 9387 }, { "epoch": 11.490820073439412, "grad_norm": 1.1374167441263758, "learning_rate": 6.453576101963282e-07, "loss": 0.5357, "step": 9388 }, { "epoch": 11.49204406364749, "grad_norm": 1.228177491981677, "learning_rate": 6.44927923973662e-07, "loss": 0.5079, "step": 9389 }, { "epoch": 11.49326805385557, "grad_norm": 2.646953232340129, "learning_rate": 6.444983596594706e-07, "loss": 0.3555, "step": 9390 }, { "epoch": 11.494492044063648, "grad_norm": 2.4750944415248246, "learning_rate": 6.440689172819845e-07, "loss": 0.5739, "step": 9391 }, { "epoch": 11.495716034271727, "grad_norm": 1.6597516084644672, "learning_rate": 6.436395968694242e-07, "loss": 0.4946, "step": 9392 }, { "epoch": 11.496940024479803, "grad_norm": 1.4711426170695057, "learning_rate": 6.432103984500035e-07, "loss": 0.8416, "step": 9393 }, { "epoch": 11.498164014687882, "grad_norm": 1.4167379271349267, "learning_rate": 6.427813220519277e-07, "loss": 0.6435, "step": 9394 }, { "epoch": 11.49938800489596, "grad_norm": 2.862736222518574, "learning_rate": 6.423523677033941e-07, "loss": 1.0555, "step": 9395 }, { "epoch": 11.50061199510404, "grad_norm": 2.0874055097795963, "learning_rate": 6.419235354325909e-07, "loss": 0.4371, "step": 9396 }, { "epoch": 11.501835985312118, "grad_norm": 1.7346817029848305, "learning_rate": 6.414948252677014e-07, "loss": 1.1708, "step": 9397 }, { "epoch": 11.503059975520197, "grad_norm": 1.8036958900179971, "learning_rate": 6.410662372368984e-07, "loss": 0.9831, "step": 9398 }, { "epoch": 11.504283965728273, "grad_norm": 1.5429064125064982, "learning_rate": 6.406377713683459e-07, "loss": 0.6677, "step": 9399 }, { "epoch": 11.505507955936352, "grad_norm": 1.6997807116334063, "learning_rate": 6.40209427690201e-07, "loss": 0.4261, "step": 9400 }, { "epoch": 11.50673194614443, "grad_norm": 1.6401340156260826, "learning_rate": 6.397812062306144e-07, "loss": 0.6673, "step": 9401 }, { "epoch": 11.50795593635251, "grad_norm": 3.0972981050120687, "learning_rate": 6.393531070177267e-07, "loss": 0.525, "step": 9402 }, { "epoch": 11.509179926560588, "grad_norm": 2.1329677927517623, "learning_rate": 6.389251300796704e-07, "loss": 0.3082, "step": 9403 }, { "epoch": 11.510403916768666, "grad_norm": 0.9145916739906653, "learning_rate": 6.384972754445718e-07, "loss": 0.3469, "step": 9404 }, { "epoch": 11.511627906976745, "grad_norm": 3.2843586112023253, "learning_rate": 6.380695431405453e-07, "loss": 0.3448, "step": 9405 }, { "epoch": 11.512851897184822, "grad_norm": 1.8362014002992468, "learning_rate": 6.376419331957028e-07, "loss": 1.8758, "step": 9406 }, { "epoch": 11.5140758873929, "grad_norm": 2.0648461342158537, "learning_rate": 6.372144456381438e-07, "loss": 0.7203, "step": 9407 }, { "epoch": 11.51529987760098, "grad_norm": 2.2416023371834384, "learning_rate": 6.367870804959616e-07, "loss": 0.4815, "step": 9408 }, { "epoch": 11.516523867809058, "grad_norm": 1.6502714203530948, "learning_rate": 6.363598377972407e-07, "loss": 0.4517, "step": 9409 }, { "epoch": 11.517747858017136, "grad_norm": 1.5710552233058677, "learning_rate": 6.359327175700583e-07, "loss": 0.5718, "step": 9410 }, { "epoch": 11.518971848225215, "grad_norm": 1.6922258138894823, "learning_rate": 6.35505719842483e-07, "loss": 1.6211, "step": 9411 }, { "epoch": 11.520195838433292, "grad_norm": 1.1530322472599936, "learning_rate": 6.350788446425754e-07, "loss": 0.5055, "step": 9412 }, { "epoch": 11.52141982864137, "grad_norm": 1.810848603551753, "learning_rate": 6.346520919983876e-07, "loss": 0.951, "step": 9413 }, { "epoch": 11.522643818849449, "grad_norm": 1.0803274652226387, "learning_rate": 6.342254619379657e-07, "loss": 0.5102, "step": 9414 }, { "epoch": 11.523867809057528, "grad_norm": 1.6834970943589604, "learning_rate": 6.337989544893461e-07, "loss": 0.5443, "step": 9415 }, { "epoch": 11.525091799265606, "grad_norm": 1.550240642881175, "learning_rate": 6.333725696805557e-07, "loss": 0.2205, "step": 9416 }, { "epoch": 11.526315789473685, "grad_norm": 1.7550914022539013, "learning_rate": 6.329463075396161e-07, "loss": 0.5688, "step": 9417 }, { "epoch": 11.527539779681762, "grad_norm": 2.1468247433281813, "learning_rate": 6.325201680945384e-07, "loss": 0.6483, "step": 9418 }, { "epoch": 11.52876376988984, "grad_norm": 2.1983005171120307, "learning_rate": 6.320941513733289e-07, "loss": 0.5952, "step": 9419 }, { "epoch": 11.529987760097919, "grad_norm": 1.2053465087849038, "learning_rate": 6.316682574039828e-07, "loss": 0.6691, "step": 9420 }, { "epoch": 11.531211750305998, "grad_norm": 2.849578731485106, "learning_rate": 6.312424862144884e-07, "loss": 0.463, "step": 9421 }, { "epoch": 11.532435740514076, "grad_norm": 2.8121690419913494, "learning_rate": 6.308168378328255e-07, "loss": 0.9492, "step": 9422 }, { "epoch": 11.533659730722155, "grad_norm": 2.181656274077179, "learning_rate": 6.303913122869668e-07, "loss": 0.6068, "step": 9423 }, { "epoch": 11.534883720930232, "grad_norm": 1.532927647137027, "learning_rate": 6.299659096048754e-07, "loss": 0.5583, "step": 9424 }, { "epoch": 11.53610771113831, "grad_norm": 1.8450190753702351, "learning_rate": 6.295406298145079e-07, "loss": 1.359, "step": 9425 }, { "epoch": 11.537331701346389, "grad_norm": 1.853350699501881, "learning_rate": 6.291154729438112e-07, "loss": 1.3998, "step": 9426 }, { "epoch": 11.538555691554468, "grad_norm": 2.1151419411436905, "learning_rate": 6.286904390207268e-07, "loss": 0.8428, "step": 9427 }, { "epoch": 11.539779681762546, "grad_norm": 1.757763529562232, "learning_rate": 6.28265528073185e-07, "loss": 0.5696, "step": 9428 }, { "epoch": 11.541003671970625, "grad_norm": 2.3846000553087774, "learning_rate": 6.278407401291092e-07, "loss": 0.4465, "step": 9429 }, { "epoch": 11.542227662178703, "grad_norm": 2.2684426215800335, "learning_rate": 6.274160752164154e-07, "loss": 1.012, "step": 9430 }, { "epoch": 11.54345165238678, "grad_norm": 1.3539459496677917, "learning_rate": 6.269915333630106e-07, "loss": 0.7641, "step": 9431 }, { "epoch": 11.544675642594859, "grad_norm": 1.4217083874877705, "learning_rate": 6.265671145967947e-07, "loss": 0.5006, "step": 9432 }, { "epoch": 11.545899632802938, "grad_norm": 3.6682815649729883, "learning_rate": 6.261428189456598e-07, "loss": 0.2685, "step": 9433 }, { "epoch": 11.547123623011016, "grad_norm": 0.9713195854723147, "learning_rate": 6.257186464374868e-07, "loss": 0.4117, "step": 9434 }, { "epoch": 11.548347613219095, "grad_norm": 2.290869652631053, "learning_rate": 6.252945971001514e-07, "loss": 0.4813, "step": 9435 }, { "epoch": 11.549571603427173, "grad_norm": 1.8975000771074098, "learning_rate": 6.248706709615216e-07, "loss": 0.4686, "step": 9436 }, { "epoch": 11.55079559363525, "grad_norm": 1.5233555045160905, "learning_rate": 6.244468680494559e-07, "loss": 1.0702, "step": 9437 }, { "epoch": 11.552019583843329, "grad_norm": 2.7898146098075514, "learning_rate": 6.240231883918046e-07, "loss": 0.577, "step": 9438 }, { "epoch": 11.553243574051407, "grad_norm": 1.7495884372160342, "learning_rate": 6.235996320164106e-07, "loss": 1.611, "step": 9439 }, { "epoch": 11.554467564259486, "grad_norm": 1.6200547462551997, "learning_rate": 6.231761989511087e-07, "loss": 0.7444, "step": 9440 }, { "epoch": 11.555691554467565, "grad_norm": 2.0260390656487703, "learning_rate": 6.22752889223725e-07, "loss": 1.1356, "step": 9441 }, { "epoch": 11.556915544675643, "grad_norm": 2.082440265072196, "learning_rate": 6.223297028620779e-07, "loss": 1.1483, "step": 9442 }, { "epoch": 11.55813953488372, "grad_norm": 1.4749237126467665, "learning_rate": 6.219066398939777e-07, "loss": 0.3574, "step": 9443 }, { "epoch": 11.559363525091799, "grad_norm": 1.761999904525041, "learning_rate": 6.214837003472265e-07, "loss": 0.929, "step": 9444 }, { "epoch": 11.560587515299877, "grad_norm": 2.77295256341615, "learning_rate": 6.210608842496186e-07, "loss": 0.4959, "step": 9445 }, { "epoch": 11.561811505507956, "grad_norm": 2.149433350932017, "learning_rate": 6.206381916289394e-07, "loss": 0.5084, "step": 9446 }, { "epoch": 11.563035495716035, "grad_norm": 1.1695936472661572, "learning_rate": 6.202156225129671e-07, "loss": 0.6288, "step": 9447 }, { "epoch": 11.564259485924113, "grad_norm": 1.8714435745461542, "learning_rate": 6.197931769294707e-07, "loss": 0.7052, "step": 9448 }, { "epoch": 11.56548347613219, "grad_norm": 1.4157664770669391, "learning_rate": 6.193708549062127e-07, "loss": 0.3643, "step": 9449 }, { "epoch": 11.566707466340269, "grad_norm": 1.722963581819155, "learning_rate": 6.189486564709463e-07, "loss": 0.5053, "step": 9450 }, { "epoch": 11.567931456548347, "grad_norm": 1.7248667980968144, "learning_rate": 6.185265816514174e-07, "loss": 0.4652, "step": 9451 }, { "epoch": 11.569155446756426, "grad_norm": 2.1274036119049824, "learning_rate": 6.181046304753611e-07, "loss": 0.8085, "step": 9452 }, { "epoch": 11.570379436964505, "grad_norm": 1.0947370160279923, "learning_rate": 6.176828029705084e-07, "loss": 0.7068, "step": 9453 }, { "epoch": 11.571603427172583, "grad_norm": 1.1292977135952094, "learning_rate": 6.172610991645797e-07, "loss": 0.5063, "step": 9454 }, { "epoch": 11.572827417380662, "grad_norm": 1.6530603780957207, "learning_rate": 6.16839519085288e-07, "loss": 0.98, "step": 9455 }, { "epoch": 11.574051407588739, "grad_norm": 2.4215853500892095, "learning_rate": 6.164180627603375e-07, "loss": 1.1965, "step": 9456 }, { "epoch": 11.575275397796817, "grad_norm": 2.6556120802929737, "learning_rate": 6.159967302174252e-07, "loss": 0.3949, "step": 9457 }, { "epoch": 11.576499388004896, "grad_norm": 2.739246997380678, "learning_rate": 6.155755214842393e-07, "loss": 0.8217, "step": 9458 }, { "epoch": 11.577723378212974, "grad_norm": 0.9752810173512048, "learning_rate": 6.151544365884601e-07, "loss": 0.4922, "step": 9459 }, { "epoch": 11.578947368421053, "grad_norm": 1.8472663396139954, "learning_rate": 6.147334755577597e-07, "loss": 1.4536, "step": 9460 }, { "epoch": 11.580171358629132, "grad_norm": 2.0649290709130694, "learning_rate": 6.143126384198015e-07, "loss": 1.067, "step": 9461 }, { "epoch": 11.581395348837209, "grad_norm": 1.1783126247932845, "learning_rate": 6.138919252022435e-07, "loss": 0.4426, "step": 9462 }, { "epoch": 11.582619339045287, "grad_norm": 1.8403710362480752, "learning_rate": 6.134713359327313e-07, "loss": 0.4435, "step": 9463 }, { "epoch": 11.583843329253366, "grad_norm": 1.2876206364339005, "learning_rate": 6.130508706389049e-07, "loss": 0.3846, "step": 9464 }, { "epoch": 11.585067319461444, "grad_norm": 1.9883212504611005, "learning_rate": 6.126305293483955e-07, "loss": 1.046, "step": 9465 }, { "epoch": 11.586291309669523, "grad_norm": 2.0579301321725287, "learning_rate": 6.122103120888274e-07, "loss": 0.4081, "step": 9466 }, { "epoch": 11.587515299877602, "grad_norm": 1.9039768957820018, "learning_rate": 6.117902188878149e-07, "loss": 0.503, "step": 9467 }, { "epoch": 11.588739290085678, "grad_norm": 2.922871407513824, "learning_rate": 6.113702497729657e-07, "loss": 0.4044, "step": 9468 }, { "epoch": 11.589963280293757, "grad_norm": 2.655156423371508, "learning_rate": 6.109504047718784e-07, "loss": 0.3875, "step": 9469 }, { "epoch": 11.591187270501836, "grad_norm": 2.147228957227427, "learning_rate": 6.105306839121422e-07, "loss": 0.3449, "step": 9470 }, { "epoch": 11.592411260709914, "grad_norm": 1.8738959464910643, "learning_rate": 6.101110872213414e-07, "loss": 1.5882, "step": 9471 }, { "epoch": 11.593635250917993, "grad_norm": 1.3890029956696825, "learning_rate": 6.096916147270498e-07, "loss": 1.329, "step": 9472 }, { "epoch": 11.594859241126072, "grad_norm": 2.581151621586061, "learning_rate": 6.092722664568335e-07, "loss": 0.4603, "step": 9473 }, { "epoch": 11.596083231334148, "grad_norm": 1.7458444187393871, "learning_rate": 6.088530424382505e-07, "loss": 1.2626, "step": 9474 }, { "epoch": 11.597307221542227, "grad_norm": 2.4913860679633095, "learning_rate": 6.084339426988508e-07, "loss": 0.6048, "step": 9475 }, { "epoch": 11.598531211750306, "grad_norm": 1.3654420454040583, "learning_rate": 6.080149672661759e-07, "loss": 0.5232, "step": 9476 }, { "epoch": 11.599755201958384, "grad_norm": 1.1377069029551743, "learning_rate": 6.075961161677593e-07, "loss": 0.5926, "step": 9477 }, { "epoch": 11.600979192166463, "grad_norm": 2.0680513647202674, "learning_rate": 6.071773894311256e-07, "loss": 0.592, "step": 9478 }, { "epoch": 11.602203182374542, "grad_norm": 1.8833169021707632, "learning_rate": 6.067587870837937e-07, "loss": 0.5276, "step": 9479 }, { "epoch": 11.60342717258262, "grad_norm": 1.236447288569973, "learning_rate": 6.063403091532724e-07, "loss": 0.8153, "step": 9480 }, { "epoch": 11.604651162790697, "grad_norm": 1.9767530223068288, "learning_rate": 6.059219556670612e-07, "loss": 0.5142, "step": 9481 }, { "epoch": 11.605875152998776, "grad_norm": 2.4242420091064703, "learning_rate": 6.055037266526534e-07, "loss": 0.4111, "step": 9482 }, { "epoch": 11.607099143206854, "grad_norm": 1.0854337592879344, "learning_rate": 6.050856221375323e-07, "loss": 0.5455, "step": 9483 }, { "epoch": 11.608323133414933, "grad_norm": 1.4044124752527214, "learning_rate": 6.046676421491765e-07, "loss": 0.3065, "step": 9484 }, { "epoch": 11.609547123623011, "grad_norm": 1.1900528172932887, "learning_rate": 6.042497867150527e-07, "loss": 0.479, "step": 9485 }, { "epoch": 11.61077111383109, "grad_norm": 1.1558885736567517, "learning_rate": 6.03832055862621e-07, "loss": 0.6122, "step": 9486 }, { "epoch": 11.611995104039167, "grad_norm": 1.1350885562527167, "learning_rate": 6.034144496193331e-07, "loss": 0.3067, "step": 9487 }, { "epoch": 11.613219094247246, "grad_norm": 2.4384332191331803, "learning_rate": 6.029969680126326e-07, "loss": 1.0754, "step": 9488 }, { "epoch": 11.614443084455324, "grad_norm": 1.7748893006656772, "learning_rate": 6.02579611069955e-07, "loss": 0.4986, "step": 9489 }, { "epoch": 11.615667074663403, "grad_norm": 3.107767185736674, "learning_rate": 6.021623788187273e-07, "loss": 0.4424, "step": 9490 }, { "epoch": 11.616891064871481, "grad_norm": 2.284322654872905, "learning_rate": 6.017452712863684e-07, "loss": 0.9507, "step": 9491 }, { "epoch": 11.61811505507956, "grad_norm": 2.0001695079378905, "learning_rate": 6.013282885002889e-07, "loss": 0.5364, "step": 9492 }, { "epoch": 11.619339045287639, "grad_norm": 2.391370226821474, "learning_rate": 6.009114304878916e-07, "loss": 0.6028, "step": 9493 }, { "epoch": 11.620563035495715, "grad_norm": 1.0988512118208684, "learning_rate": 6.00494697276571e-07, "loss": 0.506, "step": 9494 }, { "epoch": 11.621787025703794, "grad_norm": 2.8549939380085387, "learning_rate": 6.000780888937133e-07, "loss": 0.5747, "step": 9495 }, { "epoch": 11.623011015911873, "grad_norm": 1.8255568636601338, "learning_rate": 5.996616053666951e-07, "loss": 0.9275, "step": 9496 }, { "epoch": 11.624235006119951, "grad_norm": 2.686001985120119, "learning_rate": 5.992452467228884e-07, "loss": 0.5098, "step": 9497 }, { "epoch": 11.62545899632803, "grad_norm": 2.2384048670298244, "learning_rate": 5.988290129896543e-07, "loss": 0.5062, "step": 9498 }, { "epoch": 11.626682986536107, "grad_norm": 1.7862959659658524, "learning_rate": 5.984129041943448e-07, "loss": 0.6569, "step": 9499 }, { "epoch": 11.627906976744185, "grad_norm": 2.5197831258493535, "learning_rate": 5.979969203643049e-07, "loss": 0.4562, "step": 9500 }, { "epoch": 11.629130966952264, "grad_norm": 2.3411191503359503, "learning_rate": 5.975810615268731e-07, "loss": 0.5453, "step": 9501 }, { "epoch": 11.630354957160343, "grad_norm": 2.1608442336168947, "learning_rate": 5.971653277093778e-07, "loss": 0.3846, "step": 9502 }, { "epoch": 11.631578947368421, "grad_norm": 2.2959339098494604, "learning_rate": 5.967497189391385e-07, "loss": 0.615, "step": 9503 }, { "epoch": 11.6328029375765, "grad_norm": 2.6585807592232187, "learning_rate": 5.963342352434684e-07, "loss": 0.3341, "step": 9504 }, { "epoch": 11.634026927784578, "grad_norm": 3.47811285495677, "learning_rate": 5.959188766496713e-07, "loss": 0.467, "step": 9505 }, { "epoch": 11.635250917992655, "grad_norm": 1.1703020743265737, "learning_rate": 5.955036431850425e-07, "loss": 1.1966, "step": 9506 }, { "epoch": 11.636474908200734, "grad_norm": 1.8603818613953647, "learning_rate": 5.950885348768704e-07, "loss": 0.9084, "step": 9507 }, { "epoch": 11.637698898408813, "grad_norm": 3.589560565253967, "learning_rate": 5.946735517524337e-07, "loss": 0.3445, "step": 9508 }, { "epoch": 11.638922888616891, "grad_norm": 2.6745580152636625, "learning_rate": 5.942586938390041e-07, "loss": 0.8505, "step": 9509 }, { "epoch": 11.64014687882497, "grad_norm": 2.3572813334675113, "learning_rate": 5.938439611638442e-07, "loss": 0.814, "step": 9510 }, { "epoch": 11.641370869033048, "grad_norm": 2.3652719622979563, "learning_rate": 5.934293537542085e-07, "loss": 0.4239, "step": 9511 }, { "epoch": 11.642594859241125, "grad_norm": 2.0078424270760995, "learning_rate": 5.93014871637344e-07, "loss": 0.9549, "step": 9512 }, { "epoch": 11.643818849449204, "grad_norm": 1.6909670289061323, "learning_rate": 5.926005148404881e-07, "loss": 0.5153, "step": 9513 }, { "epoch": 11.645042839657282, "grad_norm": 1.8099705798867596, "learning_rate": 5.921862833908721e-07, "loss": 0.4736, "step": 9514 }, { "epoch": 11.646266829865361, "grad_norm": 2.0170978190786197, "learning_rate": 5.91772177315717e-07, "loss": 0.3611, "step": 9515 }, { "epoch": 11.64749082007344, "grad_norm": 1.4939581817327328, "learning_rate": 5.913581966422368e-07, "loss": 1.4075, "step": 9516 }, { "epoch": 11.648714810281518, "grad_norm": 1.1667402584822508, "learning_rate": 5.909443413976351e-07, "loss": 0.6179, "step": 9517 }, { "epoch": 11.649938800489597, "grad_norm": 1.7502141504277084, "learning_rate": 5.905306116091111e-07, "loss": 0.3021, "step": 9518 }, { "epoch": 11.651162790697674, "grad_norm": 1.1512375490074636, "learning_rate": 5.901170073038523e-07, "loss": 0.6378, "step": 9519 }, { "epoch": 11.652386780905752, "grad_norm": 1.8336913782993471, "learning_rate": 5.897035285090399e-07, "loss": 0.6854, "step": 9520 }, { "epoch": 11.653610771113831, "grad_norm": 1.87926866171637, "learning_rate": 5.892901752518457e-07, "loss": 1.3791, "step": 9521 }, { "epoch": 11.65483476132191, "grad_norm": 1.429720210691919, "learning_rate": 5.888769475594341e-07, "loss": 1.5557, "step": 9522 }, { "epoch": 11.656058751529988, "grad_norm": 3.093397373363434, "learning_rate": 5.884638454589609e-07, "loss": 0.5076, "step": 9523 }, { "epoch": 11.657282741738067, "grad_norm": 1.557990771263695, "learning_rate": 5.880508689775732e-07, "loss": 0.6164, "step": 9524 }, { "epoch": 11.658506731946144, "grad_norm": 2.079403310944342, "learning_rate": 5.876380181424107e-07, "loss": 0.5289, "step": 9525 }, { "epoch": 11.659730722154222, "grad_norm": 2.923485543074061, "learning_rate": 5.872252929806035e-07, "loss": 0.8196, "step": 9526 }, { "epoch": 11.660954712362301, "grad_norm": 2.1328601495085557, "learning_rate": 5.868126935192766e-07, "loss": 0.3816, "step": 9527 }, { "epoch": 11.66217870257038, "grad_norm": 2.525772039425355, "learning_rate": 5.864002197855425e-07, "loss": 0.5339, "step": 9528 }, { "epoch": 11.663402692778458, "grad_norm": 2.236291198492012, "learning_rate": 5.859878718065082e-07, "loss": 0.3934, "step": 9529 }, { "epoch": 11.664626682986537, "grad_norm": 1.5515494548596904, "learning_rate": 5.855756496092704e-07, "loss": 0.7119, "step": 9530 }, { "epoch": 11.665850673194614, "grad_norm": 2.104289627859134, "learning_rate": 5.85163553220921e-07, "loss": 0.7284, "step": 9531 }, { "epoch": 11.667074663402692, "grad_norm": 1.5653892279137194, "learning_rate": 5.847515826685401e-07, "loss": 0.6088, "step": 9532 }, { "epoch": 11.668298653610771, "grad_norm": 1.8814160373624913, "learning_rate": 5.843397379792021e-07, "loss": 1.5593, "step": 9533 }, { "epoch": 11.66952264381885, "grad_norm": 2.1035337091764, "learning_rate": 5.839280191799704e-07, "loss": 0.5051, "step": 9534 }, { "epoch": 11.670746634026928, "grad_norm": 1.3795937285578812, "learning_rate": 5.835164262979013e-07, "loss": 0.3865, "step": 9535 }, { "epoch": 11.671970624235007, "grad_norm": 2.4860021038401476, "learning_rate": 5.831049593600447e-07, "loss": 0.4827, "step": 9536 }, { "epoch": 11.673194614443084, "grad_norm": 1.7179572497580777, "learning_rate": 5.826936183934403e-07, "loss": 0.5199, "step": 9537 }, { "epoch": 11.674418604651162, "grad_norm": 2.191317693430263, "learning_rate": 5.822824034251195e-07, "loss": 0.5101, "step": 9538 }, { "epoch": 11.67564259485924, "grad_norm": 2.3327127561215573, "learning_rate": 5.818713144821062e-07, "loss": 1.0274, "step": 9539 }, { "epoch": 11.67686658506732, "grad_norm": 1.6289944897805386, "learning_rate": 5.814603515914153e-07, "loss": 0.4738, "step": 9540 }, { "epoch": 11.678090575275398, "grad_norm": 2.0401840764728836, "learning_rate": 5.810495147800538e-07, "loss": 1.0025, "step": 9541 }, { "epoch": 11.679314565483477, "grad_norm": 0.9675589717544911, "learning_rate": 5.806388040750205e-07, "loss": 0.4387, "step": 9542 }, { "epoch": 11.680538555691555, "grad_norm": 2.903803409098663, "learning_rate": 5.80228219503306e-07, "loss": 0.3527, "step": 9543 }, { "epoch": 11.681762545899632, "grad_norm": 1.3156137160977062, "learning_rate": 5.798177610918912e-07, "loss": 0.6545, "step": 9544 }, { "epoch": 11.68298653610771, "grad_norm": 2.786977548137845, "learning_rate": 5.794074288677526e-07, "loss": 0.5107, "step": 9545 }, { "epoch": 11.68421052631579, "grad_norm": 2.3185822128760805, "learning_rate": 5.78997222857853e-07, "loss": 0.5251, "step": 9546 }, { "epoch": 11.685434516523868, "grad_norm": 2.649237750016963, "learning_rate": 5.785871430891508e-07, "loss": 0.531, "step": 9547 }, { "epoch": 11.686658506731947, "grad_norm": 1.9812315258378146, "learning_rate": 5.781771895885937e-07, "loss": 1.3855, "step": 9548 }, { "epoch": 11.687882496940025, "grad_norm": 1.727502148485613, "learning_rate": 5.777673623831245e-07, "loss": 0.5599, "step": 9549 }, { "epoch": 11.689106487148102, "grad_norm": 1.7332818529486536, "learning_rate": 5.773576614996743e-07, "loss": 1.3305, "step": 9550 }, { "epoch": 11.69033047735618, "grad_norm": 2.225294020139932, "learning_rate": 5.769480869651678e-07, "loss": 0.4306, "step": 9551 }, { "epoch": 11.69155446756426, "grad_norm": 2.4005172831474253, "learning_rate": 5.765386388065189e-07, "loss": 0.6671, "step": 9552 }, { "epoch": 11.692778457772338, "grad_norm": 1.029971418326937, "learning_rate": 5.761293170506368e-07, "loss": 0.4401, "step": 9553 }, { "epoch": 11.694002447980417, "grad_norm": 1.4590314649251621, "learning_rate": 5.757201217244201e-07, "loss": 0.9655, "step": 9554 }, { "epoch": 11.695226438188495, "grad_norm": 1.710504564042804, "learning_rate": 5.753110528547596e-07, "loss": 1.0798, "step": 9555 }, { "epoch": 11.696450428396572, "grad_norm": 1.1015609029008078, "learning_rate": 5.749021104685379e-07, "loss": 0.5277, "step": 9556 }, { "epoch": 11.69767441860465, "grad_norm": 2.7549988942225436, "learning_rate": 5.74493294592629e-07, "loss": 0.4693, "step": 9557 }, { "epoch": 11.69889840881273, "grad_norm": 2.722589493264937, "learning_rate": 5.740846052538986e-07, "loss": 0.5858, "step": 9558 }, { "epoch": 11.700122399020808, "grad_norm": 0.9665818710005806, "learning_rate": 5.736760424792048e-07, "loss": 0.3887, "step": 9559 }, { "epoch": 11.701346389228886, "grad_norm": 1.6589136810326994, "learning_rate": 5.732676062953963e-07, "loss": 1.0112, "step": 9560 }, { "epoch": 11.702570379436965, "grad_norm": 1.7588763839172836, "learning_rate": 5.728592967293137e-07, "loss": 0.6186, "step": 9561 }, { "epoch": 11.703794369645042, "grad_norm": 2.0581047604530016, "learning_rate": 5.724511138077912e-07, "loss": 0.5881, "step": 9562 }, { "epoch": 11.70501835985312, "grad_norm": 1.7002094710676772, "learning_rate": 5.720430575576516e-07, "loss": 0.5492, "step": 9563 }, { "epoch": 11.7062423500612, "grad_norm": 1.6530125895485392, "learning_rate": 5.716351280057111e-07, "loss": 0.9325, "step": 9564 }, { "epoch": 11.707466340269278, "grad_norm": 2.2769363773842866, "learning_rate": 5.712273251787769e-07, "loss": 0.551, "step": 9565 }, { "epoch": 11.708690330477356, "grad_norm": 1.977321946021986, "learning_rate": 5.708196491036494e-07, "loss": 0.6089, "step": 9566 }, { "epoch": 11.709914320685435, "grad_norm": 2.3933127931980858, "learning_rate": 5.704120998071192e-07, "loss": 0.5144, "step": 9567 }, { "epoch": 11.711138310893514, "grad_norm": 1.6914215646448734, "learning_rate": 5.700046773159685e-07, "loss": 0.8486, "step": 9568 }, { "epoch": 11.71236230110159, "grad_norm": 1.5643157705745718, "learning_rate": 5.695973816569728e-07, "loss": 0.9091, "step": 9569 }, { "epoch": 11.713586291309669, "grad_norm": 1.7978107033483646, "learning_rate": 5.691902128568954e-07, "loss": 0.4454, "step": 9570 }, { "epoch": 11.714810281517748, "grad_norm": 1.8838832532215717, "learning_rate": 5.687831709424965e-07, "loss": 1.1693, "step": 9571 }, { "epoch": 11.716034271725826, "grad_norm": 1.1343877244493665, "learning_rate": 5.683762559405245e-07, "loss": 0.3605, "step": 9572 }, { "epoch": 11.717258261933905, "grad_norm": 1.9333511989666385, "learning_rate": 5.679694678777206e-07, "loss": 0.9604, "step": 9573 }, { "epoch": 11.718482252141984, "grad_norm": 1.8553587692472726, "learning_rate": 5.675628067808169e-07, "loss": 1.1862, "step": 9574 }, { "epoch": 11.71970624235006, "grad_norm": 3.0291618423154607, "learning_rate": 5.671562726765381e-07, "loss": 0.486, "step": 9575 }, { "epoch": 11.720930232558139, "grad_norm": 2.1280383493704855, "learning_rate": 5.667498655916002e-07, "loss": 1.1908, "step": 9576 }, { "epoch": 11.722154222766218, "grad_norm": 1.5382589574316063, "learning_rate": 5.6634358555271e-07, "loss": 0.811, "step": 9577 }, { "epoch": 11.723378212974296, "grad_norm": 2.326124098616378, "learning_rate": 5.659374325865668e-07, "loss": 1.3312, "step": 9578 }, { "epoch": 11.724602203182375, "grad_norm": 2.9264766995794607, "learning_rate": 5.655314067198628e-07, "loss": 0.206, "step": 9579 }, { "epoch": 11.725826193390454, "grad_norm": 1.5824709748210595, "learning_rate": 5.6512550797928e-07, "loss": 0.9187, "step": 9580 }, { "epoch": 11.727050183598532, "grad_norm": 1.9987175533476114, "learning_rate": 5.647197363914919e-07, "loss": 0.9907, "step": 9581 }, { "epoch": 11.728274173806609, "grad_norm": 1.3964943902835223, "learning_rate": 5.643140919831644e-07, "loss": 0.7094, "step": 9582 }, { "epoch": 11.729498164014688, "grad_norm": 1.5759832362872677, "learning_rate": 5.639085747809544e-07, "loss": 0.424, "step": 9583 }, { "epoch": 11.730722154222766, "grad_norm": 1.8150244741887251, "learning_rate": 5.635031848115124e-07, "loss": 0.4057, "step": 9584 }, { "epoch": 11.731946144430845, "grad_norm": 1.8934606554631424, "learning_rate": 5.630979221014787e-07, "loss": 0.4852, "step": 9585 }, { "epoch": 11.733170134638923, "grad_norm": 2.1657169690417035, "learning_rate": 5.626927866774857e-07, "loss": 1.1832, "step": 9586 }, { "epoch": 11.734394124847, "grad_norm": 2.3388524037771856, "learning_rate": 5.622877785661565e-07, "loss": 0.4854, "step": 9587 }, { "epoch": 11.735618115055079, "grad_norm": 2.726010922472211, "learning_rate": 5.61882897794108e-07, "loss": 0.2732, "step": 9588 }, { "epoch": 11.736842105263158, "grad_norm": 1.5541067439079779, "learning_rate": 5.614781443879463e-07, "loss": 0.5477, "step": 9589 }, { "epoch": 11.738066095471236, "grad_norm": 1.7496871356654915, "learning_rate": 5.610735183742713e-07, "loss": 0.9033, "step": 9590 }, { "epoch": 11.739290085679315, "grad_norm": 2.838162018131953, "learning_rate": 5.60669019779673e-07, "loss": 0.4372, "step": 9591 }, { "epoch": 11.740514075887393, "grad_norm": 2.533352684473776, "learning_rate": 5.602646486307334e-07, "loss": 0.4156, "step": 9592 }, { "epoch": 11.741738066095472, "grad_norm": 1.4886163409163444, "learning_rate": 5.598604049540268e-07, "loss": 0.6859, "step": 9593 }, { "epoch": 11.742962056303549, "grad_norm": 2.272022435309857, "learning_rate": 5.594562887761182e-07, "loss": 0.4707, "step": 9594 }, { "epoch": 11.744186046511627, "grad_norm": 1.439255800322469, "learning_rate": 5.590523001235646e-07, "loss": 1.2692, "step": 9595 }, { "epoch": 11.745410036719706, "grad_norm": 2.74586383663098, "learning_rate": 5.586484390229141e-07, "loss": 0.8912, "step": 9596 }, { "epoch": 11.746634026927785, "grad_norm": 1.4566049036829696, "learning_rate": 5.582447055007085e-07, "loss": 0.4534, "step": 9597 }, { "epoch": 11.747858017135863, "grad_norm": 2.068558175716681, "learning_rate": 5.578410995834793e-07, "loss": 0.3883, "step": 9598 }, { "epoch": 11.749082007343942, "grad_norm": 1.7657748824746549, "learning_rate": 5.57437621297749e-07, "loss": 0.7278, "step": 9599 }, { "epoch": 11.750305997552019, "grad_norm": 3.245117604734685, "learning_rate": 5.570342706700324e-07, "loss": 0.3063, "step": 9600 }, { "epoch": 11.751529987760097, "grad_norm": 1.9554461881287157, "learning_rate": 5.566310477268378e-07, "loss": 0.5278, "step": 9601 }, { "epoch": 11.752753977968176, "grad_norm": 1.996805271592485, "learning_rate": 5.562279524946623e-07, "loss": 0.9023, "step": 9602 }, { "epoch": 11.753977968176255, "grad_norm": 1.1559807910758335, "learning_rate": 5.558249849999967e-07, "loss": 0.602, "step": 9603 }, { "epoch": 11.755201958384333, "grad_norm": 1.1923324390588945, "learning_rate": 5.554221452693217e-07, "loss": 0.5679, "step": 9604 }, { "epoch": 11.756425948592412, "grad_norm": 1.403489346857492, "learning_rate": 5.550194333291112e-07, "loss": 0.7361, "step": 9605 }, { "epoch": 11.75764993880049, "grad_norm": 2.0561954092293897, "learning_rate": 5.546168492058293e-07, "loss": 0.9224, "step": 9606 }, { "epoch": 11.758873929008567, "grad_norm": 1.4788114971130026, "learning_rate": 5.542143929259327e-07, "loss": 0.6173, "step": 9607 }, { "epoch": 11.760097919216646, "grad_norm": 1.3344460188646743, "learning_rate": 5.538120645158692e-07, "loss": 0.5695, "step": 9608 }, { "epoch": 11.761321909424725, "grad_norm": 2.169798093171633, "learning_rate": 5.534098640020785e-07, "loss": 0.5026, "step": 9609 }, { "epoch": 11.762545899632803, "grad_norm": 2.8968636319003047, "learning_rate": 5.530077914109916e-07, "loss": 1.0183, "step": 9610 }, { "epoch": 11.763769889840882, "grad_norm": 1.5914225178824548, "learning_rate": 5.52605846769031e-07, "loss": 0.5329, "step": 9611 }, { "epoch": 11.76499388004896, "grad_norm": 2.757851758052305, "learning_rate": 5.522040301026114e-07, "loss": 0.4293, "step": 9612 }, { "epoch": 11.766217870257037, "grad_norm": 1.833039035028096, "learning_rate": 5.518023414381376e-07, "loss": 0.4645, "step": 9613 }, { "epoch": 11.767441860465116, "grad_norm": 2.2455754329775863, "learning_rate": 5.514007808020091e-07, "loss": 0.5719, "step": 9614 }, { "epoch": 11.768665850673194, "grad_norm": 2.534834805176596, "learning_rate": 5.509993482206136e-07, "loss": 0.5334, "step": 9615 }, { "epoch": 11.769889840881273, "grad_norm": 2.0622556236026863, "learning_rate": 5.505980437203329e-07, "loss": 0.5421, "step": 9616 }, { "epoch": 11.771113831089352, "grad_norm": 1.530941806701124, "learning_rate": 5.501968673275371e-07, "loss": 0.8066, "step": 9617 }, { "epoch": 11.77233782129743, "grad_norm": 2.1057014525580606, "learning_rate": 5.497958190685921e-07, "loss": 0.8304, "step": 9618 }, { "epoch": 11.773561811505507, "grad_norm": 2.8724926569680633, "learning_rate": 5.493948989698525e-07, "loss": 0.7937, "step": 9619 }, { "epoch": 11.774785801713586, "grad_norm": 1.6279512007918664, "learning_rate": 5.489941070576651e-07, "loss": 1.0397, "step": 9620 }, { "epoch": 11.776009791921664, "grad_norm": 1.7321569042192375, "learning_rate": 5.485934433583689e-07, "loss": 0.4275, "step": 9621 }, { "epoch": 11.777233782129743, "grad_norm": 2.1659002004250443, "learning_rate": 5.481929078982939e-07, "loss": 0.571, "step": 9622 }, { "epoch": 11.778457772337822, "grad_norm": 1.6172109566542565, "learning_rate": 5.477925007037619e-07, "loss": 0.4953, "step": 9623 }, { "epoch": 11.7796817625459, "grad_norm": 1.3366547369943436, "learning_rate": 5.473922218010857e-07, "loss": 1.087, "step": 9624 }, { "epoch": 11.780905752753977, "grad_norm": 1.508268997408351, "learning_rate": 5.469920712165708e-07, "loss": 0.5649, "step": 9625 }, { "epoch": 11.782129742962056, "grad_norm": 2.017968565615283, "learning_rate": 5.465920489765126e-07, "loss": 0.7628, "step": 9626 }, { "epoch": 11.783353733170134, "grad_norm": 1.7975004105553078, "learning_rate": 5.461921551072011e-07, "loss": 1.3571, "step": 9627 }, { "epoch": 11.784577723378213, "grad_norm": 1.9185394893974115, "learning_rate": 5.457923896349137e-07, "loss": 1.1039, "step": 9628 }, { "epoch": 11.785801713586292, "grad_norm": 1.976862238328463, "learning_rate": 5.453927525859223e-07, "loss": 1.0057, "step": 9629 }, { "epoch": 11.78702570379437, "grad_norm": 1.855032791202365, "learning_rate": 5.449932439864891e-07, "loss": 0.4195, "step": 9630 }, { "epoch": 11.788249694002449, "grad_norm": 2.625899833500155, "learning_rate": 5.445938638628695e-07, "loss": 0.6653, "step": 9631 }, { "epoch": 11.789473684210526, "grad_norm": 1.5105860511753944, "learning_rate": 5.441946122413086e-07, "loss": 0.433, "step": 9632 }, { "epoch": 11.790697674418604, "grad_norm": 1.530683490723502, "learning_rate": 5.437954891480443e-07, "loss": 0.6782, "step": 9633 }, { "epoch": 11.791921664626683, "grad_norm": 0.8629613158319624, "learning_rate": 5.433964946093045e-07, "loss": 0.3498, "step": 9634 }, { "epoch": 11.793145654834762, "grad_norm": 1.293572005345208, "learning_rate": 5.429976286513095e-07, "loss": 0.5548, "step": 9635 }, { "epoch": 11.79436964504284, "grad_norm": 3.850537247821223, "learning_rate": 5.425988913002722e-07, "loss": 0.3482, "step": 9636 }, { "epoch": 11.795593635250919, "grad_norm": 1.7943969858624227, "learning_rate": 5.422002825823961e-07, "loss": 1.1571, "step": 9637 }, { "epoch": 11.796817625458996, "grad_norm": 1.2768930838219346, "learning_rate": 5.418018025238758e-07, "loss": 0.4378, "step": 9638 }, { "epoch": 11.798041615667074, "grad_norm": 1.6957228805528795, "learning_rate": 5.414034511508983e-07, "loss": 0.7116, "step": 9639 }, { "epoch": 11.799265605875153, "grad_norm": 2.4637895774969207, "learning_rate": 5.410052284896419e-07, "loss": 0.498, "step": 9640 }, { "epoch": 11.800489596083231, "grad_norm": 1.7045370227986845, "learning_rate": 5.406071345662758e-07, "loss": 0.9076, "step": 9641 }, { "epoch": 11.80171358629131, "grad_norm": 2.922652563543034, "learning_rate": 5.402091694069614e-07, "loss": 0.9556, "step": 9642 }, { "epoch": 11.802937576499389, "grad_norm": 1.6447772945225934, "learning_rate": 5.398113330378513e-07, "loss": 0.5092, "step": 9643 }, { "epoch": 11.804161566707466, "grad_norm": 2.0881910115327713, "learning_rate": 5.394136254850906e-07, "loss": 1.1834, "step": 9644 }, { "epoch": 11.805385556915544, "grad_norm": 1.3886443180614014, "learning_rate": 5.390160467748154e-07, "loss": 0.504, "step": 9645 }, { "epoch": 11.806609547123623, "grad_norm": 1.722356172312867, "learning_rate": 5.386185969331517e-07, "loss": 1.3957, "step": 9646 }, { "epoch": 11.807833537331701, "grad_norm": 1.55599222316431, "learning_rate": 5.382212759862193e-07, "loss": 0.8694, "step": 9647 }, { "epoch": 11.80905752753978, "grad_norm": 1.3862346624743738, "learning_rate": 5.378240839601279e-07, "loss": 0.4612, "step": 9648 }, { "epoch": 11.810281517747859, "grad_norm": 2.362886969129296, "learning_rate": 5.374270208809807e-07, "loss": 0.3235, "step": 9649 }, { "epoch": 11.811505507955935, "grad_norm": 1.946137995379407, "learning_rate": 5.370300867748707e-07, "loss": 0.4211, "step": 9650 }, { "epoch": 11.812729498164014, "grad_norm": 1.902486642100752, "learning_rate": 5.366332816678835e-07, "loss": 0.4147, "step": 9651 }, { "epoch": 11.813953488372093, "grad_norm": 1.5762733339610606, "learning_rate": 5.362366055860935e-07, "loss": 0.3203, "step": 9652 }, { "epoch": 11.815177478580171, "grad_norm": 1.7960480211161267, "learning_rate": 5.358400585555712e-07, "loss": 1.0983, "step": 9653 }, { "epoch": 11.81640146878825, "grad_norm": 1.6861139069739322, "learning_rate": 5.354436406023752e-07, "loss": 0.4637, "step": 9654 }, { "epoch": 11.817625458996329, "grad_norm": 1.4899835558924546, "learning_rate": 5.35047351752557e-07, "loss": 1.3164, "step": 9655 }, { "epoch": 11.818849449204407, "grad_norm": 1.1140949937771847, "learning_rate": 5.346511920321587e-07, "loss": 0.5926, "step": 9656 }, { "epoch": 11.820073439412484, "grad_norm": 1.9757958411602672, "learning_rate": 5.342551614672148e-07, "loss": 0.6217, "step": 9657 }, { "epoch": 11.821297429620563, "grad_norm": 1.5729933232992068, "learning_rate": 5.338592600837511e-07, "loss": 0.6487, "step": 9658 }, { "epoch": 11.822521419828641, "grad_norm": 1.495183544550604, "learning_rate": 5.334634879077843e-07, "loss": 0.3529, "step": 9659 }, { "epoch": 11.82374541003672, "grad_norm": 2.085559635477468, "learning_rate": 5.330678449653234e-07, "loss": 0.8509, "step": 9660 }, { "epoch": 11.824969400244798, "grad_norm": 2.6324113940106697, "learning_rate": 5.326723312823681e-07, "loss": 0.573, "step": 9661 }, { "epoch": 11.826193390452877, "grad_norm": 1.3086173675091548, "learning_rate": 5.322769468849112e-07, "loss": 0.5175, "step": 9662 }, { "epoch": 11.827417380660954, "grad_norm": 1.3529903911702288, "learning_rate": 5.318816917989359e-07, "loss": 0.7168, "step": 9663 }, { "epoch": 11.828641370869033, "grad_norm": 1.2232475260676607, "learning_rate": 5.314865660504154e-07, "loss": 0.5815, "step": 9664 }, { "epoch": 11.829865361077111, "grad_norm": 3.0262573449101153, "learning_rate": 5.31091569665316e-07, "loss": 0.3941, "step": 9665 }, { "epoch": 11.83108935128519, "grad_norm": 1.857187413573764, "learning_rate": 5.30696702669597e-07, "loss": 1.3419, "step": 9666 }, { "epoch": 11.832313341493268, "grad_norm": 2.329777507072392, "learning_rate": 5.303019650892069e-07, "loss": 1.0322, "step": 9667 }, { "epoch": 11.833537331701347, "grad_norm": 1.8241093899538658, "learning_rate": 5.299073569500859e-07, "loss": 0.2915, "step": 9668 }, { "epoch": 11.834761321909426, "grad_norm": 2.0119448533452253, "learning_rate": 5.29512878278167e-07, "loss": 1.3067, "step": 9669 }, { "epoch": 11.835985312117502, "grad_norm": 2.3127739638229197, "learning_rate": 5.291185290993731e-07, "loss": 0.348, "step": 9670 }, { "epoch": 11.837209302325581, "grad_norm": 1.064128634279048, "learning_rate": 5.287243094396196e-07, "loss": 0.6264, "step": 9671 }, { "epoch": 11.83843329253366, "grad_norm": 2.17380824465741, "learning_rate": 5.283302193248132e-07, "loss": 0.9667, "step": 9672 }, { "epoch": 11.839657282741738, "grad_norm": 1.2234325371074593, "learning_rate": 5.279362587808523e-07, "loss": 0.539, "step": 9673 }, { "epoch": 11.840881272949817, "grad_norm": 1.1207950026267357, "learning_rate": 5.275424278336263e-07, "loss": 0.5618, "step": 9674 }, { "epoch": 11.842105263157894, "grad_norm": 2.6916425047957713, "learning_rate": 5.271487265090163e-07, "loss": 0.5045, "step": 9675 }, { "epoch": 11.843329253365972, "grad_norm": 2.7844439896510824, "learning_rate": 5.26755154832895e-07, "loss": 0.3558, "step": 9676 }, { "epoch": 11.844553243574051, "grad_norm": 2.2344096504865987, "learning_rate": 5.263617128311263e-07, "loss": 0.5686, "step": 9677 }, { "epoch": 11.84577723378213, "grad_norm": 1.1707255814781243, "learning_rate": 5.259684005295651e-07, "loss": 0.8456, "step": 9678 }, { "epoch": 11.847001223990208, "grad_norm": 1.8119866222418062, "learning_rate": 5.255752179540599e-07, "loss": 1.1896, "step": 9679 }, { "epoch": 11.848225214198287, "grad_norm": 1.7650960015505934, "learning_rate": 5.251821651304493e-07, "loss": 0.4979, "step": 9680 }, { "epoch": 11.849449204406366, "grad_norm": 2.0587448765329195, "learning_rate": 5.247892420845616e-07, "loss": 1.2884, "step": 9681 }, { "epoch": 11.850673194614442, "grad_norm": 2.320058181991726, "learning_rate": 5.243964488422185e-07, "loss": 0.6699, "step": 9682 }, { "epoch": 11.851897184822521, "grad_norm": 2.1584854181575603, "learning_rate": 5.240037854292343e-07, "loss": 1.1178, "step": 9683 }, { "epoch": 11.8531211750306, "grad_norm": 2.440839453620434, "learning_rate": 5.236112518714126e-07, "loss": 0.7301, "step": 9684 }, { "epoch": 11.854345165238678, "grad_norm": 1.4017775228277878, "learning_rate": 5.232188481945491e-07, "loss": 0.8612, "step": 9685 }, { "epoch": 11.855569155446757, "grad_norm": 1.4109160011250932, "learning_rate": 5.228265744244315e-07, "loss": 0.6916, "step": 9686 }, { "epoch": 11.856793145654835, "grad_norm": 3.206276308205612, "learning_rate": 5.224344305868381e-07, "loss": 0.9057, "step": 9687 }, { "epoch": 11.858017135862912, "grad_norm": 3.124955060085674, "learning_rate": 5.220424167075397e-07, "loss": 0.3936, "step": 9688 }, { "epoch": 11.859241126070991, "grad_norm": 2.2900567465888475, "learning_rate": 5.216505328122975e-07, "loss": 0.5994, "step": 9689 }, { "epoch": 11.86046511627907, "grad_norm": 1.3050467030486939, "learning_rate": 5.21258778926865e-07, "loss": 0.4002, "step": 9690 }, { "epoch": 11.861689106487148, "grad_norm": 1.5494740754288179, "learning_rate": 5.208671550769859e-07, "loss": 1.2261, "step": 9691 }, { "epoch": 11.862913096695227, "grad_norm": 1.136125403916061, "learning_rate": 5.204756612883985e-07, "loss": 0.577, "step": 9692 }, { "epoch": 11.864137086903305, "grad_norm": 1.8089213280564602, "learning_rate": 5.200842975868283e-07, "loss": 0.4124, "step": 9693 }, { "epoch": 11.865361077111384, "grad_norm": 2.0656860208362864, "learning_rate": 5.196930639979947e-07, "loss": 0.4255, "step": 9694 }, { "epoch": 11.86658506731946, "grad_norm": 1.504090737986394, "learning_rate": 5.193019605476077e-07, "loss": 0.2684, "step": 9695 }, { "epoch": 11.86780905752754, "grad_norm": 1.915281171046154, "learning_rate": 5.189109872613704e-07, "loss": 0.7417, "step": 9696 }, { "epoch": 11.869033047735618, "grad_norm": 1.1946191683253597, "learning_rate": 5.185201441649757e-07, "loss": 0.8805, "step": 9697 }, { "epoch": 11.870257037943697, "grad_norm": 1.1753436650387616, "learning_rate": 5.181294312841084e-07, "loss": 0.3811, "step": 9698 }, { "epoch": 11.871481028151775, "grad_norm": 1.5965487529600186, "learning_rate": 5.17738848644444e-07, "loss": 0.5212, "step": 9699 }, { "epoch": 11.872705018359854, "grad_norm": 3.131803704473436, "learning_rate": 5.173483962716499e-07, "loss": 0.5694, "step": 9700 }, { "epoch": 11.87392900856793, "grad_norm": 1.6189207930527607, "learning_rate": 5.169580741913865e-07, "loss": 2.2776, "step": 9701 }, { "epoch": 11.87515299877601, "grad_norm": 1.469925519128763, "learning_rate": 5.165678824293038e-07, "loss": 0.9973, "step": 9702 }, { "epoch": 11.876376988984088, "grad_norm": 1.6199140722888992, "learning_rate": 5.161778210110436e-07, "loss": 0.467, "step": 9703 }, { "epoch": 11.877600979192167, "grad_norm": 1.1576113713205967, "learning_rate": 5.157878899622393e-07, "loss": 0.5816, "step": 9704 }, { "epoch": 11.878824969400245, "grad_norm": 0.935159554091864, "learning_rate": 5.153980893085159e-07, "loss": 0.5203, "step": 9705 }, { "epoch": 11.880048959608324, "grad_norm": 2.537927233611365, "learning_rate": 5.150084190754895e-07, "loss": 0.3751, "step": 9706 }, { "epoch": 11.8812729498164, "grad_norm": 1.68789373503456, "learning_rate": 5.146188792887677e-07, "loss": 1.1672, "step": 9707 }, { "epoch": 11.88249694002448, "grad_norm": 1.6904049965287256, "learning_rate": 5.1422946997395e-07, "loss": 1.1648, "step": 9708 }, { "epoch": 11.883720930232558, "grad_norm": 1.3395828442593432, "learning_rate": 5.138401911566265e-07, "loss": 0.4891, "step": 9709 }, { "epoch": 11.884944920440637, "grad_norm": 1.784389486001225, "learning_rate": 5.134510428623793e-07, "loss": 1.0915, "step": 9710 }, { "epoch": 11.886168910648715, "grad_norm": 2.0459106624934917, "learning_rate": 5.130620251167817e-07, "loss": 0.5231, "step": 9711 }, { "epoch": 11.887392900856794, "grad_norm": 1.8766319960086206, "learning_rate": 5.126731379453989e-07, "loss": 0.5418, "step": 9712 }, { "epoch": 11.88861689106487, "grad_norm": 1.5803806809553844, "learning_rate": 5.122843813737863e-07, "loss": 0.4682, "step": 9713 }, { "epoch": 11.88984088127295, "grad_norm": 1.7017072148577388, "learning_rate": 5.118957554274925e-07, "loss": 0.4864, "step": 9714 }, { "epoch": 11.891064871481028, "grad_norm": 1.3854031300408698, "learning_rate": 5.115072601320561e-07, "loss": 0.522, "step": 9715 }, { "epoch": 11.892288861689106, "grad_norm": 2.883032235571502, "learning_rate": 5.111188955130087e-07, "loss": 0.4966, "step": 9716 }, { "epoch": 11.893512851897185, "grad_norm": 2.3695324441161896, "learning_rate": 5.107306615958693e-07, "loss": 0.2827, "step": 9717 }, { "epoch": 11.894736842105264, "grad_norm": 2.654315444297391, "learning_rate": 5.103425584061539e-07, "loss": 0.6726, "step": 9718 }, { "epoch": 11.895960832313342, "grad_norm": 3.421252877498952, "learning_rate": 5.099545859693664e-07, "loss": 0.4333, "step": 9719 }, { "epoch": 11.89718482252142, "grad_norm": 1.5473007777313559, "learning_rate": 5.095667443110028e-07, "loss": 1.2563, "step": 9720 }, { "epoch": 11.898408812729498, "grad_norm": 1.9100711860955442, "learning_rate": 5.091790334565508e-07, "loss": 0.7052, "step": 9721 }, { "epoch": 11.899632802937576, "grad_norm": 1.8940775234630107, "learning_rate": 5.087914534314889e-07, "loss": 0.4362, "step": 9722 }, { "epoch": 11.900856793145655, "grad_norm": 1.952782023059173, "learning_rate": 5.084040042612878e-07, "loss": 0.6901, "step": 9723 }, { "epoch": 11.902080783353734, "grad_norm": 1.1448534616713462, "learning_rate": 5.080166859714092e-07, "loss": 0.5347, "step": 9724 }, { "epoch": 11.903304773561812, "grad_norm": 2.5433503020435313, "learning_rate": 5.076294985873062e-07, "loss": 1.013, "step": 9725 }, { "epoch": 11.904528763769889, "grad_norm": 2.428415254490329, "learning_rate": 5.072424421344224e-07, "loss": 0.4676, "step": 9726 }, { "epoch": 11.905752753977968, "grad_norm": 2.447362214768637, "learning_rate": 5.068555166381961e-07, "loss": 0.3948, "step": 9727 }, { "epoch": 11.906976744186046, "grad_norm": 1.8078675432097615, "learning_rate": 5.064687221240525e-07, "loss": 1.132, "step": 9728 }, { "epoch": 11.908200734394125, "grad_norm": 1.3396673725894388, "learning_rate": 5.060820586174109e-07, "loss": 0.5065, "step": 9729 }, { "epoch": 11.909424724602204, "grad_norm": 1.0802932201395359, "learning_rate": 5.056955261436808e-07, "loss": 0.5159, "step": 9730 }, { "epoch": 11.910648714810282, "grad_norm": 0.7957081907104413, "learning_rate": 5.053091247282649e-07, "loss": 0.2172, "step": 9731 }, { "epoch": 11.911872705018359, "grad_norm": 1.5561072459775909, "learning_rate": 5.049228543965553e-07, "loss": 0.5226, "step": 9732 }, { "epoch": 11.913096695226438, "grad_norm": 1.8352110696108188, "learning_rate": 5.045367151739366e-07, "loss": 1.0056, "step": 9733 }, { "epoch": 11.914320685434516, "grad_norm": 2.100933243137448, "learning_rate": 5.041507070857852e-07, "loss": 0.4367, "step": 9734 }, { "epoch": 11.915544675642595, "grad_norm": 3.056708505341668, "learning_rate": 5.037648301574655e-07, "loss": 0.3375, "step": 9735 }, { "epoch": 11.916768665850674, "grad_norm": 1.5175747888218345, "learning_rate": 5.033790844143383e-07, "loss": 0.4513, "step": 9736 }, { "epoch": 11.917992656058752, "grad_norm": 1.9036691472886569, "learning_rate": 5.029934698817526e-07, "loss": 0.4517, "step": 9737 }, { "epoch": 11.919216646266829, "grad_norm": 1.7101949465399282, "learning_rate": 5.026079865850497e-07, "loss": 0.9251, "step": 9738 }, { "epoch": 11.920440636474908, "grad_norm": 2.48098954052835, "learning_rate": 5.022226345495624e-07, "loss": 0.3609, "step": 9739 }, { "epoch": 11.921664626682986, "grad_norm": 1.8301958282344546, "learning_rate": 5.01837413800614e-07, "loss": 0.5236, "step": 9740 }, { "epoch": 11.922888616891065, "grad_norm": 2.0491555028504864, "learning_rate": 5.0145232436352e-07, "loss": 0.6293, "step": 9741 }, { "epoch": 11.924112607099143, "grad_norm": 1.6395358698209082, "learning_rate": 5.010673662635873e-07, "loss": 0.6209, "step": 9742 }, { "epoch": 11.925336597307222, "grad_norm": 1.4037162715236198, "learning_rate": 5.00682539526113e-07, "loss": 1.0867, "step": 9743 }, { "epoch": 11.9265605875153, "grad_norm": 1.3999126230809755, "learning_rate": 5.002978441763881e-07, "loss": 0.6253, "step": 9744 }, { "epoch": 11.927784577723378, "grad_norm": 1.7653018523398902, "learning_rate": 4.999132802396927e-07, "loss": 0.3088, "step": 9745 }, { "epoch": 11.929008567931456, "grad_norm": 1.8258976037985457, "learning_rate": 4.995288477412985e-07, "loss": 0.6524, "step": 9746 }, { "epoch": 11.930232558139535, "grad_norm": 1.8157786506421503, "learning_rate": 4.99144546706469e-07, "loss": 0.5753, "step": 9747 }, { "epoch": 11.931456548347613, "grad_norm": 1.0224478759969862, "learning_rate": 4.987603771604588e-07, "loss": 0.5085, "step": 9748 }, { "epoch": 11.932680538555692, "grad_norm": 2.169745791481339, "learning_rate": 4.983763391285148e-07, "loss": 0.957, "step": 9749 }, { "epoch": 11.93390452876377, "grad_norm": 1.2438674301624664, "learning_rate": 4.979924326358745e-07, "loss": 0.426, "step": 9750 }, { "epoch": 11.935128518971847, "grad_norm": 1.4921247591356994, "learning_rate": 4.976086577077674e-07, "loss": 1.0218, "step": 9751 }, { "epoch": 11.936352509179926, "grad_norm": 1.0480078655922382, "learning_rate": 4.972250143694116e-07, "loss": 0.3435, "step": 9752 }, { "epoch": 11.937576499388005, "grad_norm": 1.9765446088836802, "learning_rate": 4.968415026460207e-07, "loss": 0.6809, "step": 9753 }, { "epoch": 11.938800489596083, "grad_norm": 1.6189816817936633, "learning_rate": 4.964581225627971e-07, "loss": 0.4623, "step": 9754 }, { "epoch": 11.940024479804162, "grad_norm": 1.8027419077705586, "learning_rate": 4.960748741449351e-07, "loss": 0.8804, "step": 9755 }, { "epoch": 11.94124847001224, "grad_norm": 1.8541038103766476, "learning_rate": 4.956917574176204e-07, "loss": 0.4968, "step": 9756 }, { "epoch": 11.94247246022032, "grad_norm": 2.5449358366994974, "learning_rate": 4.953087724060299e-07, "loss": 0.9668, "step": 9757 }, { "epoch": 11.943696450428396, "grad_norm": 2.3273895373275293, "learning_rate": 4.94925919135332e-07, "loss": 0.6276, "step": 9758 }, { "epoch": 11.944920440636475, "grad_norm": 2.442466034286734, "learning_rate": 4.945431976306863e-07, "loss": 0.4958, "step": 9759 }, { "epoch": 11.946144430844553, "grad_norm": 1.4990254208663694, "learning_rate": 4.941606079172443e-07, "loss": 0.6447, "step": 9760 }, { "epoch": 11.947368421052632, "grad_norm": 2.3219606290541654, "learning_rate": 4.937781500201475e-07, "loss": 0.3458, "step": 9761 }, { "epoch": 11.94859241126071, "grad_norm": 2.3844456749417966, "learning_rate": 4.933958239645304e-07, "loss": 0.9008, "step": 9762 }, { "epoch": 11.949816401468787, "grad_norm": 1.5364613348038414, "learning_rate": 4.930136297755189e-07, "loss": 0.8875, "step": 9763 }, { "epoch": 11.951040391676866, "grad_norm": 2.5643217208850495, "learning_rate": 4.926315674782273e-07, "loss": 0.4964, "step": 9764 }, { "epoch": 11.952264381884945, "grad_norm": 1.9294251259921487, "learning_rate": 4.922496370977639e-07, "loss": 0.7353, "step": 9765 }, { "epoch": 11.953488372093023, "grad_norm": 1.2829427997574674, "learning_rate": 4.918678386592288e-07, "loss": 0.4677, "step": 9766 }, { "epoch": 11.954712362301102, "grad_norm": 1.455683230986563, "learning_rate": 4.914861721877118e-07, "loss": 0.3964, "step": 9767 }, { "epoch": 11.95593635250918, "grad_norm": 1.672975215369213, "learning_rate": 4.911046377082945e-07, "loss": 0.6372, "step": 9768 }, { "epoch": 11.957160342717259, "grad_norm": 1.8520041810593246, "learning_rate": 4.907232352460503e-07, "loss": 0.9809, "step": 9769 }, { "epoch": 11.958384332925336, "grad_norm": 1.68820672878603, "learning_rate": 4.90341964826043e-07, "loss": 0.5565, "step": 9770 }, { "epoch": 11.959608323133414, "grad_norm": 2.7011217773256733, "learning_rate": 4.899608264733288e-07, "loss": 1.0109, "step": 9771 }, { "epoch": 11.960832313341493, "grad_norm": 1.901002300265054, "learning_rate": 4.895798202129543e-07, "loss": 1.4837, "step": 9772 }, { "epoch": 11.962056303549572, "grad_norm": 1.6346174957740531, "learning_rate": 4.891989460699578e-07, "loss": 1.482, "step": 9773 }, { "epoch": 11.96328029375765, "grad_norm": 1.5028344920080638, "learning_rate": 4.888182040693695e-07, "loss": 0.616, "step": 9774 }, { "epoch": 11.964504283965729, "grad_norm": 1.6352758187086325, "learning_rate": 4.884375942362096e-07, "loss": 0.9808, "step": 9775 }, { "epoch": 11.965728274173806, "grad_norm": 1.5788750109075158, "learning_rate": 4.880571165954912e-07, "loss": 0.5888, "step": 9776 }, { "epoch": 11.966952264381884, "grad_norm": 2.618036414249568, "learning_rate": 4.876767711722172e-07, "loss": 0.6162, "step": 9777 }, { "epoch": 11.968176254589963, "grad_norm": 2.6713428448670817, "learning_rate": 4.872965579913818e-07, "loss": 0.7802, "step": 9778 }, { "epoch": 11.969400244798042, "grad_norm": 2.929651143516116, "learning_rate": 4.869164770779733e-07, "loss": 0.4162, "step": 9779 }, { "epoch": 11.97062423500612, "grad_norm": 1.8890342145958732, "learning_rate": 4.865365284569684e-07, "loss": 1.0786, "step": 9780 }, { "epoch": 11.971848225214199, "grad_norm": 1.4854633095506888, "learning_rate": 4.861567121533351e-07, "loss": 1.1563, "step": 9781 }, { "epoch": 11.973072215422278, "grad_norm": 2.2381693687775344, "learning_rate": 4.857770281920329e-07, "loss": 0.8184, "step": 9782 }, { "epoch": 11.974296205630354, "grad_norm": 1.5934141898976182, "learning_rate": 4.853974765980154e-07, "loss": 0.689, "step": 9783 }, { "epoch": 11.975520195838433, "grad_norm": 1.5936599310354573, "learning_rate": 4.85018057396224e-07, "loss": 0.8192, "step": 9784 }, { "epoch": 11.976744186046512, "grad_norm": 2.182123113014971, "learning_rate": 4.846387706115932e-07, "loss": 1.0654, "step": 9785 }, { "epoch": 11.97796817625459, "grad_norm": 1.530252467702324, "learning_rate": 4.84259616269048e-07, "loss": 0.5055, "step": 9786 }, { "epoch": 11.979192166462669, "grad_norm": 2.0005235936531105, "learning_rate": 4.838805943935049e-07, "loss": 0.5513, "step": 9787 }, { "epoch": 11.980416156670747, "grad_norm": 1.3220546376351436, "learning_rate": 4.835017050098722e-07, "loss": 0.6003, "step": 9788 }, { "epoch": 11.981640146878824, "grad_norm": 2.9388842573965133, "learning_rate": 4.831229481430491e-07, "loss": 0.5266, "step": 9789 }, { "epoch": 11.982864137086903, "grad_norm": 1.2484547917169084, "learning_rate": 4.827443238179258e-07, "loss": 0.843, "step": 9790 }, { "epoch": 11.984088127294982, "grad_norm": 3.330047648081599, "learning_rate": 4.823658320593835e-07, "loss": 0.4561, "step": 9791 }, { "epoch": 11.98531211750306, "grad_norm": 2.982033096852002, "learning_rate": 4.819874728922974e-07, "loss": 0.4573, "step": 9792 }, { "epoch": 11.986536107711139, "grad_norm": 1.2345595751857097, "learning_rate": 4.816092463415298e-07, "loss": 0.5979, "step": 9793 }, { "epoch": 11.987760097919217, "grad_norm": 1.5479198301944783, "learning_rate": 4.812311524319369e-07, "loss": 0.7829, "step": 9794 }, { "epoch": 11.988984088127294, "grad_norm": 1.6277053177986331, "learning_rate": 4.808531911883651e-07, "loss": 0.5206, "step": 9795 }, { "epoch": 11.990208078335373, "grad_norm": 2.487736751957783, "learning_rate": 4.80475362635654e-07, "loss": 0.8581, "step": 9796 }, { "epoch": 11.991432068543451, "grad_norm": 1.5145936690931638, "learning_rate": 4.800976667986324e-07, "loss": 0.5853, "step": 9797 }, { "epoch": 11.99265605875153, "grad_norm": 2.071004183405466, "learning_rate": 4.797201037021218e-07, "loss": 0.2904, "step": 9798 }, { "epoch": 11.993880048959609, "grad_norm": 2.813941153099257, "learning_rate": 4.793426733709325e-07, "loss": 0.5011, "step": 9799 }, { "epoch": 11.995104039167687, "grad_norm": 2.553918810004041, "learning_rate": 4.789653758298684e-07, "loss": 0.4038, "step": 9800 }, { "epoch": 11.996328029375764, "grad_norm": 1.5772928371842687, "learning_rate": 4.785882111037252e-07, "loss": 0.6026, "step": 9801 }, { "epoch": 11.997552019583843, "grad_norm": 3.1949451206746886, "learning_rate": 4.78211179217288e-07, "loss": 0.5376, "step": 9802 }, { "epoch": 11.998776009791921, "grad_norm": 2.568425502389638, "learning_rate": 4.77834280195334e-07, "loss": 0.4461, "step": 9803 }, { "epoch": 12.0, "grad_norm": 1.763552642288535, "learning_rate": 4.774575140626317e-07, "loss": 1.5151, "step": 9804 }, { "epoch": 12.001223990208079, "grad_norm": 1.06937955991284, "learning_rate": 4.770808808439406e-07, "loss": 0.3953, "step": 9805 }, { "epoch": 12.002447980416157, "grad_norm": 1.3995286713720143, "learning_rate": 4.7670438056401174e-07, "loss": 0.447, "step": 9806 }, { "epoch": 12.003671970624236, "grad_norm": 1.5357033098348605, "learning_rate": 4.7632801324758754e-07, "loss": 1.3972, "step": 9807 }, { "epoch": 12.004895960832313, "grad_norm": 2.2988835833570582, "learning_rate": 4.7595177891940044e-07, "loss": 0.4785, "step": 9808 }, { "epoch": 12.006119951040391, "grad_norm": 1.621073675979508, "learning_rate": 4.755756776041773e-07, "loss": 0.4709, "step": 9809 }, { "epoch": 12.00734394124847, "grad_norm": 3.7232724328294435, "learning_rate": 4.7519970932663223e-07, "loss": 0.2661, "step": 9810 }, { "epoch": 12.008567931456549, "grad_norm": 1.3208411991142264, "learning_rate": 4.74823874111473e-07, "loss": 0.5988, "step": 9811 }, { "epoch": 12.009791921664627, "grad_norm": 1.9331367855600958, "learning_rate": 4.744481719833982e-07, "loss": 0.4807, "step": 9812 }, { "epoch": 12.011015911872706, "grad_norm": 1.787282415937334, "learning_rate": 4.7407260296709666e-07, "loss": 0.9278, "step": 9813 }, { "epoch": 12.012239902080783, "grad_norm": 1.3485463933977873, "learning_rate": 4.736971670872512e-07, "loss": 0.5799, "step": 9814 }, { "epoch": 12.013463892288861, "grad_norm": 2.2013901081983223, "learning_rate": 4.7332186436853307e-07, "loss": 1.1246, "step": 9815 }, { "epoch": 12.01468788249694, "grad_norm": 2.7074331637650175, "learning_rate": 4.729466948356065e-07, "loss": 0.7786, "step": 9816 }, { "epoch": 12.015911872705018, "grad_norm": 1.4606019344408274, "learning_rate": 4.7257165851312434e-07, "loss": 1.2787, "step": 9817 }, { "epoch": 12.017135862913097, "grad_norm": 2.052112431251952, "learning_rate": 4.7219675542573426e-07, "loss": 0.6275, "step": 9818 }, { "epoch": 12.018359853121176, "grad_norm": 2.7036255660517328, "learning_rate": 4.7182198559807323e-07, "loss": 0.7182, "step": 9819 }, { "epoch": 12.019583843329253, "grad_norm": 2.297677543451564, "learning_rate": 4.714473490547697e-07, "loss": 0.3449, "step": 9820 }, { "epoch": 12.020807833537331, "grad_norm": 2.457431787984592, "learning_rate": 4.710728458204433e-07, "loss": 0.4436, "step": 9821 }, { "epoch": 12.02203182374541, "grad_norm": 2.181894738407353, "learning_rate": 4.7069847591970475e-07, "loss": 0.6328, "step": 9822 }, { "epoch": 12.023255813953488, "grad_norm": 2.5880225741496283, "learning_rate": 4.703242393771568e-07, "loss": 0.6, "step": 9823 }, { "epoch": 12.024479804161567, "grad_norm": 1.2046080301760016, "learning_rate": 4.699501362173925e-07, "loss": 0.5673, "step": 9824 }, { "epoch": 12.025703794369646, "grad_norm": 1.1043697253791462, "learning_rate": 4.6957616646499643e-07, "loss": 0.5914, "step": 9825 }, { "epoch": 12.026927784577722, "grad_norm": 3.2463734644032005, "learning_rate": 4.692023301445442e-07, "loss": 0.3993, "step": 9826 }, { "epoch": 12.028151774785801, "grad_norm": 1.889592040310895, "learning_rate": 4.6882862728060475e-07, "loss": 0.4565, "step": 9827 }, { "epoch": 12.02937576499388, "grad_norm": 1.252174922983469, "learning_rate": 4.684550578977343e-07, "loss": 0.8143, "step": 9828 }, { "epoch": 12.030599755201958, "grad_norm": 1.8263571837891874, "learning_rate": 4.6808162202048343e-07, "loss": 0.9491, "step": 9829 }, { "epoch": 12.031823745410037, "grad_norm": 1.6679685461209077, "learning_rate": 4.6770831967339224e-07, "loss": 0.5487, "step": 9830 }, { "epoch": 12.033047735618116, "grad_norm": 2.448271005093146, "learning_rate": 4.6733515088099366e-07, "loss": 0.4164, "step": 9831 }, { "epoch": 12.034271725826194, "grad_norm": 1.377955968704842, "learning_rate": 4.6696211566781114e-07, "loss": 0.3909, "step": 9832 }, { "epoch": 12.035495716034271, "grad_norm": 2.2689595276360435, "learning_rate": 4.6658921405835835e-07, "loss": 0.6694, "step": 9833 }, { "epoch": 12.03671970624235, "grad_norm": 3.0943201450568636, "learning_rate": 4.662164460771415e-07, "loss": 0.9016, "step": 9834 }, { "epoch": 12.037943696450428, "grad_norm": 1.8728311781104834, "learning_rate": 4.6584381174865717e-07, "loss": 0.4176, "step": 9835 }, { "epoch": 12.039167686658507, "grad_norm": 1.4691314105199358, "learning_rate": 4.654713110973938e-07, "loss": 0.9435, "step": 9836 }, { "epoch": 12.040391676866586, "grad_norm": 2.1515442702737526, "learning_rate": 4.6509894414783095e-07, "loss": 1.1805, "step": 9837 }, { "epoch": 12.041615667074664, "grad_norm": 2.3327982597598984, "learning_rate": 4.6472671092443856e-07, "loss": 0.5425, "step": 9838 }, { "epoch": 12.042839657282741, "grad_norm": 1.589967871035463, "learning_rate": 4.6435461145167894e-07, "loss": 0.817, "step": 9839 }, { "epoch": 12.04406364749082, "grad_norm": 3.9085738900765192, "learning_rate": 4.6398264575400476e-07, "loss": 0.3701, "step": 9840 }, { "epoch": 12.045287637698898, "grad_norm": 2.293361326179226, "learning_rate": 4.6361081385586066e-07, "loss": 0.6126, "step": 9841 }, { "epoch": 12.046511627906977, "grad_norm": 1.146308521680064, "learning_rate": 4.632391157816815e-07, "loss": 0.3596, "step": 9842 }, { "epoch": 12.047735618115055, "grad_norm": 2.0531558712071036, "learning_rate": 4.628675515558936e-07, "loss": 0.9567, "step": 9843 }, { "epoch": 12.048959608323134, "grad_norm": 1.720940632293993, "learning_rate": 4.6249612120291635e-07, "loss": 0.9062, "step": 9844 }, { "epoch": 12.050183598531211, "grad_norm": 3.162836671029267, "learning_rate": 4.621248247471583e-07, "loss": 0.2977, "step": 9845 }, { "epoch": 12.05140758873929, "grad_norm": 1.9974275505551535, "learning_rate": 4.6175366221301837e-07, "loss": 0.4759, "step": 9846 }, { "epoch": 12.052631578947368, "grad_norm": 2.972693030628046, "learning_rate": 4.6138263362488806e-07, "loss": 0.4536, "step": 9847 }, { "epoch": 12.053855569155447, "grad_norm": 2.4616670578156983, "learning_rate": 4.610117390071517e-07, "loss": 0.465, "step": 9848 }, { "epoch": 12.055079559363525, "grad_norm": 1.422730272753212, "learning_rate": 4.60640978384182e-07, "loss": 0.7727, "step": 9849 }, { "epoch": 12.056303549571604, "grad_norm": 2.754119810554422, "learning_rate": 4.6027035178034417e-07, "loss": 0.8888, "step": 9850 }, { "epoch": 12.057527539779683, "grad_norm": 2.3891237683044912, "learning_rate": 4.5989985921999436e-07, "loss": 0.4916, "step": 9851 }, { "epoch": 12.05875152998776, "grad_norm": 1.1243558004549665, "learning_rate": 4.5952950072748e-07, "loss": 0.384, "step": 9852 }, { "epoch": 12.059975520195838, "grad_norm": 2.493761485173966, "learning_rate": 4.591592763271399e-07, "loss": 1.0089, "step": 9853 }, { "epoch": 12.061199510403917, "grad_norm": 1.1549428173005303, "learning_rate": 4.587891860433033e-07, "loss": 0.563, "step": 9854 }, { "epoch": 12.062423500611995, "grad_norm": 2.2935070150009675, "learning_rate": 4.5841922990029153e-07, "loss": 0.4616, "step": 9855 }, { "epoch": 12.063647490820074, "grad_norm": 1.1457588230874467, "learning_rate": 4.5804940792241684e-07, "loss": 0.4928, "step": 9856 }, { "epoch": 12.064871481028153, "grad_norm": 1.3943106284741955, "learning_rate": 4.576797201339822e-07, "loss": 0.6211, "step": 9857 }, { "epoch": 12.06609547123623, "grad_norm": 3.8900900268856082, "learning_rate": 4.5731016655928244e-07, "loss": 0.3466, "step": 9858 }, { "epoch": 12.067319461444308, "grad_norm": 1.6664150449720165, "learning_rate": 4.5694074722260314e-07, "loss": 0.462, "step": 9859 }, { "epoch": 12.068543451652387, "grad_norm": 2.340235700044478, "learning_rate": 4.5657146214822116e-07, "loss": 0.9666, "step": 9860 }, { "epoch": 12.069767441860465, "grad_norm": 1.0119321829613022, "learning_rate": 4.5620231136040414e-07, "loss": 0.4445, "step": 9861 }, { "epoch": 12.070991432068544, "grad_norm": 1.8829654314919344, "learning_rate": 4.558332948834121e-07, "loss": 0.4669, "step": 9862 }, { "epoch": 12.072215422276622, "grad_norm": 2.4532484608435126, "learning_rate": 4.5546441274149587e-07, "loss": 0.5828, "step": 9863 }, { "epoch": 12.0734394124847, "grad_norm": 1.5266845747389997, "learning_rate": 4.5509566495889555e-07, "loss": 0.7464, "step": 9864 }, { "epoch": 12.074663402692778, "grad_norm": 1.9942204621335993, "learning_rate": 4.547270515598437e-07, "loss": 0.4101, "step": 9865 }, { "epoch": 12.075887392900857, "grad_norm": 1.7291123741491843, "learning_rate": 4.543585725685659e-07, "loss": 1.4469, "step": 9866 }, { "epoch": 12.077111383108935, "grad_norm": 2.2852536769183307, "learning_rate": 4.539902280092764e-07, "loss": 0.5983, "step": 9867 }, { "epoch": 12.078335373317014, "grad_norm": 1.137836332383699, "learning_rate": 4.536220179061815e-07, "loss": 0.576, "step": 9868 }, { "epoch": 12.079559363525092, "grad_norm": 1.6148989892829824, "learning_rate": 4.5325394228347865e-07, "loss": 0.4599, "step": 9869 }, { "epoch": 12.080783353733171, "grad_norm": 1.1661929163130917, "learning_rate": 4.5288600116535605e-07, "loss": 0.4442, "step": 9870 }, { "epoch": 12.082007343941248, "grad_norm": 2.4521939122646987, "learning_rate": 4.525181945759938e-07, "loss": 0.7272, "step": 9871 }, { "epoch": 12.083231334149326, "grad_norm": 1.9861315474217156, "learning_rate": 4.521505225395628e-07, "loss": 0.5265, "step": 9872 }, { "epoch": 12.084455324357405, "grad_norm": 1.431905026346671, "learning_rate": 4.5178298508022494e-07, "loss": 0.4976, "step": 9873 }, { "epoch": 12.085679314565484, "grad_norm": 2.3893437288797177, "learning_rate": 4.5141558222213335e-07, "loss": 0.281, "step": 9874 }, { "epoch": 12.086903304773562, "grad_norm": 2.034914385477065, "learning_rate": 4.5104831398943255e-07, "loss": 0.7412, "step": 9875 }, { "epoch": 12.088127294981641, "grad_norm": 3.5822319490159376, "learning_rate": 4.50681180406258e-07, "loss": 0.3393, "step": 9876 }, { "epoch": 12.089351285189718, "grad_norm": 1.2244974744568673, "learning_rate": 4.503141814967366e-07, "loss": 0.5808, "step": 9877 }, { "epoch": 12.090575275397796, "grad_norm": 1.9876717054998867, "learning_rate": 4.4994731728498496e-07, "loss": 0.5092, "step": 9878 }, { "epoch": 12.091799265605875, "grad_norm": 2.649614995869319, "learning_rate": 4.49580587795114e-07, "loss": 1.0097, "step": 9879 }, { "epoch": 12.093023255813954, "grad_norm": 1.7790289257921463, "learning_rate": 4.4921399305122264e-07, "loss": 1.7968, "step": 9880 }, { "epoch": 12.094247246022032, "grad_norm": 1.8469362614924132, "learning_rate": 4.488475330774028e-07, "loss": 1.188, "step": 9881 }, { "epoch": 12.095471236230111, "grad_norm": 1.7208819606883992, "learning_rate": 4.484812078977352e-07, "loss": 0.9788, "step": 9882 }, { "epoch": 12.096695226438188, "grad_norm": 3.325942111886066, "learning_rate": 4.4811501753629557e-07, "loss": 0.3713, "step": 9883 }, { "epoch": 12.097919216646266, "grad_norm": 1.2198282354821652, "learning_rate": 4.4774896201714714e-07, "loss": 0.5966, "step": 9884 }, { "epoch": 12.099143206854345, "grad_norm": 1.488033900934936, "learning_rate": 4.4738304136434624e-07, "loss": 0.3515, "step": 9885 }, { "epoch": 12.100367197062424, "grad_norm": 1.5387776287805777, "learning_rate": 4.4701725560194003e-07, "loss": 0.5042, "step": 9886 }, { "epoch": 12.101591187270502, "grad_norm": 1.5998503885101045, "learning_rate": 4.4665160475396625e-07, "loss": 1.5388, "step": 9887 }, { "epoch": 12.10281517747858, "grad_norm": 1.4489724960266488, "learning_rate": 4.4628608884445395e-07, "loss": 0.6896, "step": 9888 }, { "epoch": 12.104039167686658, "grad_norm": 2.1859678800892475, "learning_rate": 4.45920707897424e-07, "loss": 0.6038, "step": 9889 }, { "epoch": 12.105263157894736, "grad_norm": 1.8365406759265028, "learning_rate": 4.455554619368874e-07, "loss": 0.5221, "step": 9890 }, { "epoch": 12.106487148102815, "grad_norm": 2.404010668777895, "learning_rate": 4.451903509868463e-07, "loss": 0.4573, "step": 9891 }, { "epoch": 12.107711138310894, "grad_norm": 2.0314762404045648, "learning_rate": 4.448253750712966e-07, "loss": 0.571, "step": 9892 }, { "epoch": 12.108935128518972, "grad_norm": 2.103575760472522, "learning_rate": 4.4446053421422107e-07, "loss": 0.4952, "step": 9893 }, { "epoch": 12.11015911872705, "grad_norm": 0.8003613923814068, "learning_rate": 4.4409582843959604e-07, "loss": 0.2165, "step": 9894 }, { "epoch": 12.11138310893513, "grad_norm": 2.5936684983742615, "learning_rate": 4.437312577713887e-07, "loss": 0.515, "step": 9895 }, { "epoch": 12.112607099143206, "grad_norm": 1.5208508362513038, "learning_rate": 4.433668222335577e-07, "loss": 0.7901, "step": 9896 }, { "epoch": 12.113831089351285, "grad_norm": 1.3546266302253611, "learning_rate": 4.430025218500525e-07, "loss": 0.6374, "step": 9897 }, { "epoch": 12.115055079559363, "grad_norm": 2.5274244906538925, "learning_rate": 4.4263835664481364e-07, "loss": 0.5329, "step": 9898 }, { "epoch": 12.116279069767442, "grad_norm": 1.898962987957822, "learning_rate": 4.4227432664177174e-07, "loss": 0.5007, "step": 9899 }, { "epoch": 12.11750305997552, "grad_norm": 1.6225671791530905, "learning_rate": 4.4191043186484954e-07, "loss": 0.7425, "step": 9900 }, { "epoch": 12.1187270501836, "grad_norm": 1.796820810908527, "learning_rate": 4.4154667233796215e-07, "loss": 0.5121, "step": 9901 }, { "epoch": 12.119951040391676, "grad_norm": 1.494615173288428, "learning_rate": 4.411830480850135e-07, "loss": 0.5538, "step": 9902 }, { "epoch": 12.121175030599755, "grad_norm": 1.6488432393290298, "learning_rate": 4.408195591299e-07, "loss": 0.5733, "step": 9903 }, { "epoch": 12.122399020807833, "grad_norm": 1.1461736505441344, "learning_rate": 4.404562054965089e-07, "loss": 0.3658, "step": 9904 }, { "epoch": 12.123623011015912, "grad_norm": 1.0758920038287227, "learning_rate": 4.4009298720871787e-07, "loss": 0.5876, "step": 9905 }, { "epoch": 12.12484700122399, "grad_norm": 2.435880849707312, "learning_rate": 4.397299042903969e-07, "loss": 0.4376, "step": 9906 }, { "epoch": 12.12607099143207, "grad_norm": 2.8805629482823836, "learning_rate": 4.393669567654063e-07, "loss": 0.5575, "step": 9907 }, { "epoch": 12.127294981640146, "grad_norm": 2.126941497861444, "learning_rate": 4.3900414465759657e-07, "loss": 0.3783, "step": 9908 }, { "epoch": 12.128518971848225, "grad_norm": 1.580872832876345, "learning_rate": 4.3864146799081244e-07, "loss": 0.6779, "step": 9909 }, { "epoch": 12.129742962056303, "grad_norm": 1.2147103196048037, "learning_rate": 4.382789267888868e-07, "loss": 0.4409, "step": 9910 }, { "epoch": 12.130966952264382, "grad_norm": 1.9571349064248427, "learning_rate": 4.3791652107564393e-07, "loss": 0.5972, "step": 9911 }, { "epoch": 12.13219094247246, "grad_norm": 1.6867800487866882, "learning_rate": 4.375542508749001e-07, "loss": 0.5576, "step": 9912 }, { "epoch": 12.13341493268054, "grad_norm": 1.4019699319878516, "learning_rate": 4.371921162104617e-07, "loss": 1.0408, "step": 9913 }, { "epoch": 12.134638922888616, "grad_norm": 2.214537381654303, "learning_rate": 4.3683011710612844e-07, "loss": 0.5014, "step": 9914 }, { "epoch": 12.135862913096695, "grad_norm": 1.8141860048296625, "learning_rate": 4.364682535856887e-07, "loss": 0.5727, "step": 9915 }, { "epoch": 12.137086903304773, "grad_norm": 2.3518674391936374, "learning_rate": 4.361065256729236e-07, "loss": 0.3672, "step": 9916 }, { "epoch": 12.138310893512852, "grad_norm": 1.7520402836416917, "learning_rate": 4.3574493339160224e-07, "loss": 0.6169, "step": 9917 }, { "epoch": 12.13953488372093, "grad_norm": 1.1273110087370142, "learning_rate": 4.3538347676548965e-07, "loss": 0.6769, "step": 9918 }, { "epoch": 12.140758873929009, "grad_norm": 2.4557490320529634, "learning_rate": 4.3502215581833846e-07, "loss": 0.511, "step": 9919 }, { "epoch": 12.141982864137088, "grad_norm": 1.1527724959225012, "learning_rate": 4.34660970573893e-07, "loss": 0.6591, "step": 9920 }, { "epoch": 12.143206854345165, "grad_norm": 2.180329050129752, "learning_rate": 4.342999210558899e-07, "loss": 1.115, "step": 9921 }, { "epoch": 12.144430844553243, "grad_norm": 2.7513851335840895, "learning_rate": 4.33939007288055e-07, "loss": 0.4276, "step": 9922 }, { "epoch": 12.145654834761322, "grad_norm": 1.9186742962310244, "learning_rate": 4.335782292941071e-07, "loss": 1.3762, "step": 9923 }, { "epoch": 12.1468788249694, "grad_norm": 1.9456019512713338, "learning_rate": 4.332175870977548e-07, "loss": 0.6794, "step": 9924 }, { "epoch": 12.148102815177479, "grad_norm": 1.5483407121093726, "learning_rate": 4.3285708072269785e-07, "loss": 0.6472, "step": 9925 }, { "epoch": 12.149326805385558, "grad_norm": 2.22433313110445, "learning_rate": 4.324967101926272e-07, "loss": 0.8157, "step": 9926 }, { "epoch": 12.150550795593634, "grad_norm": 1.3835871257512595, "learning_rate": 4.321364755312271e-07, "loss": 0.7076, "step": 9927 }, { "epoch": 12.151774785801713, "grad_norm": 2.1274055432143437, "learning_rate": 4.3177637676216856e-07, "loss": 0.849, "step": 9928 }, { "epoch": 12.152998776009792, "grad_norm": 1.4834720329633477, "learning_rate": 4.3141641390911675e-07, "loss": 1.3031, "step": 9929 }, { "epoch": 12.15422276621787, "grad_norm": 2.3829031925716606, "learning_rate": 4.310565869957262e-07, "loss": 0.4745, "step": 9930 }, { "epoch": 12.155446756425949, "grad_norm": 1.775159001372628, "learning_rate": 4.306968960456451e-07, "loss": 0.4964, "step": 9931 }, { "epoch": 12.156670746634028, "grad_norm": 1.7019094221349393, "learning_rate": 4.3033734108251013e-07, "loss": 0.8537, "step": 9932 }, { "epoch": 12.157894736842104, "grad_norm": 2.3290088843286036, "learning_rate": 4.299779221299499e-07, "loss": 0.5173, "step": 9933 }, { "epoch": 12.159118727050183, "grad_norm": 1.8607224235749455, "learning_rate": 4.296186392115842e-07, "loss": 0.7041, "step": 9934 }, { "epoch": 12.160342717258262, "grad_norm": 2.079523877477626, "learning_rate": 4.2925949235102377e-07, "loss": 0.2887, "step": 9935 }, { "epoch": 12.16156670746634, "grad_norm": 1.5694188618569103, "learning_rate": 4.289004815718703e-07, "loss": 0.912, "step": 9936 }, { "epoch": 12.162790697674419, "grad_norm": 1.6331903089491029, "learning_rate": 4.2854160689771665e-07, "loss": 1.1128, "step": 9937 }, { "epoch": 12.164014687882498, "grad_norm": 1.292644305368941, "learning_rate": 4.281828683521469e-07, "loss": 0.8919, "step": 9938 }, { "epoch": 12.165238678090576, "grad_norm": 2.352174852138731, "learning_rate": 4.278242659587359e-07, "loss": 0.4433, "step": 9939 }, { "epoch": 12.166462668298653, "grad_norm": 1.5745901328115093, "learning_rate": 4.2746579974104976e-07, "loss": 0.6078, "step": 9940 }, { "epoch": 12.167686658506732, "grad_norm": 1.5518559801000849, "learning_rate": 4.2710746972264547e-07, "loss": 1.0694, "step": 9941 }, { "epoch": 12.16891064871481, "grad_norm": 1.8807369220365866, "learning_rate": 4.2674927592707083e-07, "loss": 0.5398, "step": 9942 }, { "epoch": 12.170134638922889, "grad_norm": 1.2832099513160966, "learning_rate": 4.263912183778651e-07, "loss": 0.474, "step": 9943 }, { "epoch": 12.171358629130967, "grad_norm": 1.4316272011652191, "learning_rate": 4.260332970985592e-07, "loss": 0.675, "step": 9944 }, { "epoch": 12.172582619339046, "grad_norm": 1.6388463887116504, "learning_rate": 4.2567551211267487e-07, "loss": 0.5941, "step": 9945 }, { "epoch": 12.173806609547123, "grad_norm": 2.622173142198984, "learning_rate": 4.253178634437224e-07, "loss": 0.6634, "step": 9946 }, { "epoch": 12.175030599755202, "grad_norm": 1.5513127763662728, "learning_rate": 4.2496035111520587e-07, "loss": 0.8675, "step": 9947 }, { "epoch": 12.17625458996328, "grad_norm": 2.0108700065424903, "learning_rate": 4.246029751506206e-07, "loss": 0.4841, "step": 9948 }, { "epoch": 12.177478580171359, "grad_norm": 1.6024169841184293, "learning_rate": 4.2424573557345167e-07, "loss": 0.9785, "step": 9949 }, { "epoch": 12.178702570379437, "grad_norm": 2.4820886547416374, "learning_rate": 4.23888632407175e-07, "loss": 0.5381, "step": 9950 }, { "epoch": 12.179926560587516, "grad_norm": 1.6954237683852358, "learning_rate": 4.235316656752586e-07, "loss": 0.449, "step": 9951 }, { "epoch": 12.181150550795593, "grad_norm": 1.9971082501659663, "learning_rate": 4.2317483540116053e-07, "loss": 0.5131, "step": 9952 }, { "epoch": 12.182374541003671, "grad_norm": 2.0228969980278353, "learning_rate": 4.22818141608331e-07, "loss": 0.9541, "step": 9953 }, { "epoch": 12.18359853121175, "grad_norm": 3.1675237629912316, "learning_rate": 4.224615843202101e-07, "loss": 0.4531, "step": 9954 }, { "epoch": 12.184822521419829, "grad_norm": 1.994616569733034, "learning_rate": 4.221051635602294e-07, "loss": 0.5348, "step": 9955 }, { "epoch": 12.186046511627907, "grad_norm": 2.8997049103132784, "learning_rate": 4.217488793518121e-07, "loss": 0.4413, "step": 9956 }, { "epoch": 12.187270501835986, "grad_norm": 1.406741195723224, "learning_rate": 4.2139273171837134e-07, "loss": 0.8691, "step": 9957 }, { "epoch": 12.188494492044065, "grad_norm": 1.8371999451431766, "learning_rate": 4.210367206833119e-07, "loss": 0.684, "step": 9958 }, { "epoch": 12.189718482252141, "grad_norm": 3.084155337624326, "learning_rate": 4.2068084627002985e-07, "loss": 0.4442, "step": 9959 }, { "epoch": 12.19094247246022, "grad_norm": 2.142789729763367, "learning_rate": 4.2032510850191085e-07, "loss": 0.8283, "step": 9960 }, { "epoch": 12.192166462668299, "grad_norm": 2.2191790454478513, "learning_rate": 4.199695074023344e-07, "loss": 1.098, "step": 9961 }, { "epoch": 12.193390452876377, "grad_norm": 2.7477338368596724, "learning_rate": 4.1961404299466823e-07, "loss": 0.467, "step": 9962 }, { "epoch": 12.194614443084456, "grad_norm": 1.9050595841989424, "learning_rate": 4.1925871530227314e-07, "loss": 0.4124, "step": 9963 }, { "epoch": 12.195838433292534, "grad_norm": 2.220837347203319, "learning_rate": 4.189035243484982e-07, "loss": 1.1708, "step": 9964 }, { "epoch": 12.197062423500611, "grad_norm": 2.3434813200185336, "learning_rate": 4.185484701566858e-07, "loss": 1.3293, "step": 9965 }, { "epoch": 12.19828641370869, "grad_norm": 1.8929994169967128, "learning_rate": 4.181935527501699e-07, "loss": 0.4477, "step": 9966 }, { "epoch": 12.199510403916769, "grad_norm": 2.47015602636036, "learning_rate": 4.1783877215227363e-07, "loss": 0.5404, "step": 9967 }, { "epoch": 12.200734394124847, "grad_norm": 1.9153343766803266, "learning_rate": 4.174841283863118e-07, "loss": 0.3847, "step": 9968 }, { "epoch": 12.201958384332926, "grad_norm": 1.7100869861670718, "learning_rate": 4.1712962147559045e-07, "loss": 1.3291, "step": 9969 }, { "epoch": 12.203182374541004, "grad_norm": 1.1849610023040595, "learning_rate": 4.167752514434062e-07, "loss": 0.668, "step": 9970 }, { "epoch": 12.204406364749081, "grad_norm": 2.1366878715968047, "learning_rate": 4.164210183130471e-07, "loss": 0.6623, "step": 9971 }, { "epoch": 12.20563035495716, "grad_norm": 2.286220997230615, "learning_rate": 4.1606692210779203e-07, "loss": 0.4686, "step": 9972 }, { "epoch": 12.206854345165238, "grad_norm": 1.4050380501273065, "learning_rate": 4.1571296285091046e-07, "loss": 0.5532, "step": 9973 }, { "epoch": 12.208078335373317, "grad_norm": 2.0542360339015735, "learning_rate": 4.1535914056566484e-07, "loss": 1.1811, "step": 9974 }, { "epoch": 12.209302325581396, "grad_norm": 1.527193322654163, "learning_rate": 4.150054552753055e-07, "loss": 0.611, "step": 9975 }, { "epoch": 12.210526315789474, "grad_norm": 1.771356214462141, "learning_rate": 4.146519070030758e-07, "loss": 0.4362, "step": 9976 }, { "epoch": 12.211750305997551, "grad_norm": 1.2592842422992234, "learning_rate": 4.1429849577220933e-07, "loss": 0.5758, "step": 9977 }, { "epoch": 12.21297429620563, "grad_norm": 2.3607252046550555, "learning_rate": 4.139452216059309e-07, "loss": 1.4017, "step": 9978 }, { "epoch": 12.214198286413708, "grad_norm": 1.5527299601845577, "learning_rate": 4.1359208452745713e-07, "loss": 0.6659, "step": 9979 }, { "epoch": 12.215422276621787, "grad_norm": 1.1831708547825637, "learning_rate": 4.132390845599946e-07, "loss": 0.7424, "step": 9980 }, { "epoch": 12.216646266829866, "grad_norm": 2.333177974027238, "learning_rate": 4.1288622172674155e-07, "loss": 0.5909, "step": 9981 }, { "epoch": 12.217870257037944, "grad_norm": 2.0052352550097132, "learning_rate": 4.1253349605088506e-07, "loss": 0.7029, "step": 9982 }, { "epoch": 12.219094247246023, "grad_norm": 1.7383829323130149, "learning_rate": 4.1218090755560685e-07, "loss": 0.5044, "step": 9983 }, { "epoch": 12.2203182374541, "grad_norm": 2.5088337749745255, "learning_rate": 4.118284562640773e-07, "loss": 0.48, "step": 9984 }, { "epoch": 12.221542227662178, "grad_norm": 1.1694476124081807, "learning_rate": 4.1147614219945785e-07, "loss": 0.6165, "step": 9985 }, { "epoch": 12.222766217870257, "grad_norm": 1.8218305335472436, "learning_rate": 4.111239653849014e-07, "loss": 0.4028, "step": 9986 }, { "epoch": 12.223990208078336, "grad_norm": 0.8524906928475731, "learning_rate": 4.1077192584355165e-07, "loss": 0.4316, "step": 9987 }, { "epoch": 12.225214198286414, "grad_norm": 1.6003722853086957, "learning_rate": 4.1042002359854336e-07, "loss": 0.5875, "step": 9988 }, { "epoch": 12.226438188494493, "grad_norm": 1.2728991174416264, "learning_rate": 4.100682586730026e-07, "loss": 0.7642, "step": 9989 }, { "epoch": 12.22766217870257, "grad_norm": 1.9070716802546566, "learning_rate": 4.097166310900455e-07, "loss": 0.4348, "step": 9990 }, { "epoch": 12.228886168910648, "grad_norm": 1.3733714915735937, "learning_rate": 4.0936514087277943e-07, "loss": 0.488, "step": 9991 }, { "epoch": 12.230110159118727, "grad_norm": 1.3757391297952897, "learning_rate": 4.0901378804430486e-07, "loss": 0.4597, "step": 9992 }, { "epoch": 12.231334149326806, "grad_norm": 1.5502071189758553, "learning_rate": 4.0866257262770935e-07, "loss": 0.6243, "step": 9993 }, { "epoch": 12.232558139534884, "grad_norm": 2.4335702767304084, "learning_rate": 4.083114946460745e-07, "loss": 0.4095, "step": 9994 }, { "epoch": 12.233782129742963, "grad_norm": 1.9912566742233184, "learning_rate": 4.079605541224707e-07, "loss": 0.6199, "step": 9995 }, { "epoch": 12.23500611995104, "grad_norm": 1.2020196701503614, "learning_rate": 4.076097510799623e-07, "loss": 0.879, "step": 9996 }, { "epoch": 12.236230110159118, "grad_norm": 2.0340832703288165, "learning_rate": 4.0725908554160173e-07, "loss": 0.722, "step": 9997 }, { "epoch": 12.237454100367197, "grad_norm": 1.493228722510594, "learning_rate": 4.0690855753043404e-07, "loss": 0.6628, "step": 9998 }, { "epoch": 12.238678090575275, "grad_norm": 2.8264583789260915, "learning_rate": 4.065581670694929e-07, "loss": 0.5739, "step": 9999 }, { "epoch": 12.239902080783354, "grad_norm": 1.5619494379292729, "learning_rate": 4.0620791418180677e-07, "loss": 0.9795, "step": 10000 }, { "epoch": 12.241126070991433, "grad_norm": 2.755488288692661, "learning_rate": 4.0585779889039194e-07, "loss": 0.4146, "step": 10001 }, { "epoch": 12.24235006119951, "grad_norm": 2.586528908266464, "learning_rate": 4.055078212182567e-07, "loss": 0.2242, "step": 10002 }, { "epoch": 12.243574051407588, "grad_norm": 2.7427314668334923, "learning_rate": 4.051579811884007e-07, "loss": 0.7832, "step": 10003 }, { "epoch": 12.244798041615667, "grad_norm": 1.918886883613435, "learning_rate": 4.048082788238139e-07, "loss": 0.9207, "step": 10004 }, { "epoch": 12.246022031823745, "grad_norm": 1.1061764334775783, "learning_rate": 4.0445871414747733e-07, "loss": 0.5047, "step": 10005 }, { "epoch": 12.247246022031824, "grad_norm": 2.0847005800472758, "learning_rate": 4.041092871823632e-07, "loss": 1.1348, "step": 10006 }, { "epoch": 12.248470012239903, "grad_norm": 2.806116138859567, "learning_rate": 4.037599979514345e-07, "loss": 0.5075, "step": 10007 }, { "epoch": 12.249694002447981, "grad_norm": 1.8668924833122575, "learning_rate": 4.034108464776448e-07, "loss": 1.3398, "step": 10008 }, { "epoch": 12.250917992656058, "grad_norm": 2.5171763428542104, "learning_rate": 4.030618327839403e-07, "loss": 0.4803, "step": 10009 }, { "epoch": 12.252141982864137, "grad_norm": 3.742339060151395, "learning_rate": 4.027129568932564e-07, "loss": 0.4671, "step": 10010 }, { "epoch": 12.253365973072215, "grad_norm": 1.7408260289125281, "learning_rate": 4.02364218828519e-07, "loss": 0.9015, "step": 10011 }, { "epoch": 12.254589963280294, "grad_norm": 1.49256320180312, "learning_rate": 4.020156186126467e-07, "loss": 0.7947, "step": 10012 }, { "epoch": 12.255813953488373, "grad_norm": 2.8831431496059428, "learning_rate": 4.016671562685476e-07, "loss": 0.2715, "step": 10013 }, { "epoch": 12.257037943696451, "grad_norm": 3.3185030395573585, "learning_rate": 4.013188318191222e-07, "loss": 0.677, "step": 10014 }, { "epoch": 12.258261933904528, "grad_norm": 1.9840215712208988, "learning_rate": 4.0097064528726056e-07, "loss": 0.5866, "step": 10015 }, { "epoch": 12.259485924112607, "grad_norm": 1.9107566388762782, "learning_rate": 4.006225966958452e-07, "loss": 0.4599, "step": 10016 }, { "epoch": 12.260709914320685, "grad_norm": 2.0038572057368507, "learning_rate": 4.0027468606774615e-07, "loss": 0.3535, "step": 10017 }, { "epoch": 12.261933904528764, "grad_norm": 1.3988257899836924, "learning_rate": 3.9992691342582933e-07, "loss": 1.086, "step": 10018 }, { "epoch": 12.263157894736842, "grad_norm": 1.6438355637458306, "learning_rate": 3.995792787929481e-07, "loss": 0.5651, "step": 10019 }, { "epoch": 12.264381884944921, "grad_norm": 1.868734178806193, "learning_rate": 3.9923178219194753e-07, "loss": 1.1838, "step": 10020 }, { "epoch": 12.265605875152998, "grad_norm": 3.398866594945571, "learning_rate": 3.988844236456643e-07, "loss": 0.5147, "step": 10021 }, { "epoch": 12.266829865361077, "grad_norm": 2.000663868568739, "learning_rate": 3.985372031769252e-07, "loss": 0.9884, "step": 10022 }, { "epoch": 12.268053855569155, "grad_norm": 1.3799089394145148, "learning_rate": 3.981901208085481e-07, "loss": 0.5221, "step": 10023 }, { "epoch": 12.269277845777234, "grad_norm": 2.6092420017637155, "learning_rate": 3.9784317656334256e-07, "loss": 1.02, "step": 10024 }, { "epoch": 12.270501835985312, "grad_norm": 2.1413183405983784, "learning_rate": 3.9749637046410774e-07, "loss": 0.4281, "step": 10025 }, { "epoch": 12.271725826193391, "grad_norm": 1.5400396788837625, "learning_rate": 3.9714970253363463e-07, "loss": 1.2244, "step": 10026 }, { "epoch": 12.27294981640147, "grad_norm": 1.149487698949321, "learning_rate": 3.9680317279470555e-07, "loss": 0.6113, "step": 10027 }, { "epoch": 12.274173806609546, "grad_norm": 1.9384348488266716, "learning_rate": 3.964567812700934e-07, "loss": 0.9577, "step": 10028 }, { "epoch": 12.275397796817625, "grad_norm": 2.047353002800829, "learning_rate": 3.9611052798256025e-07, "loss": 0.5269, "step": 10029 }, { "epoch": 12.276621787025704, "grad_norm": 2.1860460927732888, "learning_rate": 3.957644129548613e-07, "loss": 0.9645, "step": 10030 }, { "epoch": 12.277845777233782, "grad_norm": 0.9468071453126047, "learning_rate": 3.954184362097424e-07, "loss": 0.4619, "step": 10031 }, { "epoch": 12.279069767441861, "grad_norm": 1.1102950184187865, "learning_rate": 3.950725977699396e-07, "loss": 0.5347, "step": 10032 }, { "epoch": 12.28029375764994, "grad_norm": 2.1567358049812326, "learning_rate": 3.947268976581803e-07, "loss": 1.188, "step": 10033 }, { "epoch": 12.281517747858016, "grad_norm": 1.2457808252523448, "learning_rate": 3.943813358971821e-07, "loss": 0.8422, "step": 10034 }, { "epoch": 12.282741738066095, "grad_norm": 1.3278506445009297, "learning_rate": 3.9403591250965464e-07, "loss": 0.5874, "step": 10035 }, { "epoch": 12.283965728274174, "grad_norm": 2.0246986452607434, "learning_rate": 3.936906275182975e-07, "loss": 0.7229, "step": 10036 }, { "epoch": 12.285189718482252, "grad_norm": 2.6648738550025857, "learning_rate": 3.933454809458015e-07, "loss": 0.6685, "step": 10037 }, { "epoch": 12.286413708690331, "grad_norm": 2.3367123683774307, "learning_rate": 3.930004728148487e-07, "loss": 0.668, "step": 10038 }, { "epoch": 12.28763769889841, "grad_norm": 1.5690994989515628, "learning_rate": 3.926556031481113e-07, "loss": 0.9257, "step": 10039 }, { "epoch": 12.288861689106486, "grad_norm": 1.7680102562467166, "learning_rate": 3.9231087196825335e-07, "loss": 0.6557, "step": 10040 }, { "epoch": 12.290085679314565, "grad_norm": 2.1810152576741904, "learning_rate": 3.9196627929792926e-07, "loss": 0.298, "step": 10041 }, { "epoch": 12.291309669522644, "grad_norm": 1.6861214474210215, "learning_rate": 3.9162182515978394e-07, "loss": 0.6972, "step": 10042 }, { "epoch": 12.292533659730722, "grad_norm": 1.421759382476858, "learning_rate": 3.912775095764534e-07, "loss": 0.3562, "step": 10043 }, { "epoch": 12.2937576499388, "grad_norm": 1.2701974788846186, "learning_rate": 3.909333325705658e-07, "loss": 0.4694, "step": 10044 }, { "epoch": 12.29498164014688, "grad_norm": 1.1425560510057682, "learning_rate": 3.905892941647393e-07, "loss": 0.5332, "step": 10045 }, { "epoch": 12.296205630354958, "grad_norm": 2.767562164639848, "learning_rate": 3.902453943815818e-07, "loss": 0.4946, "step": 10046 }, { "epoch": 12.297429620563035, "grad_norm": 1.173317110730678, "learning_rate": 3.8990163324369277e-07, "loss": 0.8449, "step": 10047 }, { "epoch": 12.298653610771114, "grad_norm": 1.6572332160276138, "learning_rate": 3.8955801077366463e-07, "loss": 0.4937, "step": 10048 }, { "epoch": 12.299877600979192, "grad_norm": 1.8582740884911884, "learning_rate": 3.8921452699407753e-07, "loss": 0.4629, "step": 10049 }, { "epoch": 12.30110159118727, "grad_norm": 2.10424507030358, "learning_rate": 3.8887118192750485e-07, "loss": 0.4348, "step": 10050 }, { "epoch": 12.30232558139535, "grad_norm": 2.5629995728757953, "learning_rate": 3.8852797559650936e-07, "loss": 0.4017, "step": 10051 }, { "epoch": 12.303549571603428, "grad_norm": 1.4077115386922814, "learning_rate": 3.881849080236455e-07, "loss": 0.6755, "step": 10052 }, { "epoch": 12.304773561811505, "grad_norm": 2.2576209123480995, "learning_rate": 3.878419792314586e-07, "loss": 0.5676, "step": 10053 }, { "epoch": 12.305997552019583, "grad_norm": 1.2980430776017775, "learning_rate": 3.8749918924248453e-07, "loss": 0.6431, "step": 10054 }, { "epoch": 12.307221542227662, "grad_norm": 2.48994872691508, "learning_rate": 3.8715653807924996e-07, "loss": 0.8554, "step": 10055 }, { "epoch": 12.30844553243574, "grad_norm": 1.6072094839106092, "learning_rate": 3.8681402576427216e-07, "loss": 0.5316, "step": 10056 }, { "epoch": 12.30966952264382, "grad_norm": 2.9813305660343516, "learning_rate": 3.864716523200618e-07, "loss": 0.4144, "step": 10057 }, { "epoch": 12.310893512851898, "grad_norm": 1.2717284139284284, "learning_rate": 3.861294177691163e-07, "loss": 0.6809, "step": 10058 }, { "epoch": 12.312117503059975, "grad_norm": 1.1745400635565966, "learning_rate": 3.85787322133927e-07, "loss": 0.6074, "step": 10059 }, { "epoch": 12.313341493268053, "grad_norm": 2.153387970241339, "learning_rate": 3.854453654369744e-07, "loss": 0.6466, "step": 10060 }, { "epoch": 12.314565483476132, "grad_norm": 2.177572509072106, "learning_rate": 3.851035477007317e-07, "loss": 0.5688, "step": 10061 }, { "epoch": 12.31578947368421, "grad_norm": 1.2524280924900757, "learning_rate": 3.847618689476612e-07, "loss": 0.5693, "step": 10062 }, { "epoch": 12.31701346389229, "grad_norm": 1.5581670551342393, "learning_rate": 3.844203292002177e-07, "loss": 0.5693, "step": 10063 }, { "epoch": 12.318237454100368, "grad_norm": 2.187174698495828, "learning_rate": 3.840789284808447e-07, "loss": 0.5605, "step": 10064 }, { "epoch": 12.319461444308445, "grad_norm": 1.5820517206423277, "learning_rate": 3.8373766681197745e-07, "loss": 0.2198, "step": 10065 }, { "epoch": 12.320685434516523, "grad_norm": 2.505797087811427, "learning_rate": 3.833965442160439e-07, "loss": 0.4487, "step": 10066 }, { "epoch": 12.321909424724602, "grad_norm": 1.1053389283515012, "learning_rate": 3.8305556071546097e-07, "loss": 0.5269, "step": 10067 }, { "epoch": 12.32313341493268, "grad_norm": 2.878333556506974, "learning_rate": 3.827147163326361e-07, "loss": 0.5727, "step": 10068 }, { "epoch": 12.32435740514076, "grad_norm": 1.5763898240411935, "learning_rate": 3.8237401108996903e-07, "loss": 1.0784, "step": 10069 }, { "epoch": 12.325581395348838, "grad_norm": 1.6527215375752544, "learning_rate": 3.8203344500984924e-07, "loss": 1.1636, "step": 10070 }, { "epoch": 12.326805385556916, "grad_norm": 3.636656996534286, "learning_rate": 3.8169301811465773e-07, "loss": 0.7551, "step": 10071 }, { "epoch": 12.328029375764993, "grad_norm": 1.6569435088492923, "learning_rate": 3.8135273042676613e-07, "loss": 0.2917, "step": 10072 }, { "epoch": 12.329253365973072, "grad_norm": 2.524449712059774, "learning_rate": 3.810125819685359e-07, "loss": 0.4955, "step": 10073 }, { "epoch": 12.33047735618115, "grad_norm": 1.9888043601032568, "learning_rate": 3.8067257276232255e-07, "loss": 0.549, "step": 10074 }, { "epoch": 12.331701346389229, "grad_norm": 1.2367683666664937, "learning_rate": 3.803327028304682e-07, "loss": 0.5383, "step": 10075 }, { "epoch": 12.332925336597308, "grad_norm": 1.5620771403302534, "learning_rate": 3.799929721953086e-07, "loss": 0.4872, "step": 10076 }, { "epoch": 12.334149326805386, "grad_norm": 1.5615057197115185, "learning_rate": 3.796533808791694e-07, "loss": 0.8598, "step": 10077 }, { "epoch": 12.335373317013463, "grad_norm": 1.6426163632338924, "learning_rate": 3.793139289043668e-07, "loss": 0.3521, "step": 10078 }, { "epoch": 12.336597307221542, "grad_norm": 1.147273181998868, "learning_rate": 3.789746162932098e-07, "loss": 0.5607, "step": 10079 }, { "epoch": 12.33782129742962, "grad_norm": 1.6076230362467414, "learning_rate": 3.786354430679956e-07, "loss": 0.3995, "step": 10080 }, { "epoch": 12.339045287637699, "grad_norm": 1.1851554721726174, "learning_rate": 3.7829640925101413e-07, "loss": 0.3806, "step": 10081 }, { "epoch": 12.340269277845778, "grad_norm": 2.1309015447436046, "learning_rate": 3.7795751486454425e-07, "loss": 0.4354, "step": 10082 }, { "epoch": 12.341493268053856, "grad_norm": 2.5154015374187098, "learning_rate": 3.7761875993085783e-07, "loss": 0.7047, "step": 10083 }, { "epoch": 12.342717258261933, "grad_norm": 3.162608994604238, "learning_rate": 3.772801444722166e-07, "loss": 0.3989, "step": 10084 }, { "epoch": 12.343941248470012, "grad_norm": 1.5813263898851395, "learning_rate": 3.769416685108726e-07, "loss": 0.887, "step": 10085 }, { "epoch": 12.34516523867809, "grad_norm": 2.6767825295849503, "learning_rate": 3.7660333206906986e-07, "loss": 0.3531, "step": 10086 }, { "epoch": 12.346389228886169, "grad_norm": 1.7641467857458457, "learning_rate": 3.762651351690419e-07, "loss": 0.9931, "step": 10087 }, { "epoch": 12.347613219094248, "grad_norm": 2.683230107274646, "learning_rate": 3.759270778330143e-07, "loss": 0.3919, "step": 10088 }, { "epoch": 12.348837209302326, "grad_norm": 2.704333470274218, "learning_rate": 3.7558916008320263e-07, "loss": 0.5296, "step": 10089 }, { "epoch": 12.350061199510403, "grad_norm": 1.4592034155164992, "learning_rate": 3.752513819418138e-07, "loss": 0.4301, "step": 10090 }, { "epoch": 12.351285189718482, "grad_norm": 1.9298902624247505, "learning_rate": 3.7491374343104457e-07, "loss": 0.3473, "step": 10091 }, { "epoch": 12.35250917992656, "grad_norm": 1.6783151816565625, "learning_rate": 3.745762445730852e-07, "loss": 0.6852, "step": 10092 }, { "epoch": 12.353733170134639, "grad_norm": 2.4251828365747192, "learning_rate": 3.742388853901133e-07, "loss": 0.9, "step": 10093 }, { "epoch": 12.354957160342718, "grad_norm": 1.5110005712408319, "learning_rate": 3.7390166590429876e-07, "loss": 0.997, "step": 10094 }, { "epoch": 12.356181150550796, "grad_norm": 2.0893612489991757, "learning_rate": 3.735645861378026e-07, "loss": 0.7291, "step": 10095 }, { "epoch": 12.357405140758875, "grad_norm": 1.7467157352317273, "learning_rate": 3.732276461127771e-07, "loss": 1.2255, "step": 10096 }, { "epoch": 12.358629130966952, "grad_norm": 3.330642419214167, "learning_rate": 3.728908458513644e-07, "loss": 0.3419, "step": 10097 }, { "epoch": 12.35985312117503, "grad_norm": 1.7491526154124082, "learning_rate": 3.725541853756975e-07, "loss": 0.9244, "step": 10098 }, { "epoch": 12.361077111383109, "grad_norm": 0.8664559294467663, "learning_rate": 3.7221766470790093e-07, "loss": 0.3488, "step": 10099 }, { "epoch": 12.362301101591187, "grad_norm": 1.069984346174424, "learning_rate": 3.7188128387008915e-07, "loss": 0.5082, "step": 10100 }, { "epoch": 12.363525091799266, "grad_norm": 1.3703224266725897, "learning_rate": 3.715450428843681e-07, "loss": 0.5301, "step": 10101 }, { "epoch": 12.364749082007345, "grad_norm": 1.514110692644841, "learning_rate": 3.71208941772834e-07, "loss": 0.4258, "step": 10102 }, { "epoch": 12.365973072215422, "grad_norm": 2.2832271872744956, "learning_rate": 3.7087298055757473e-07, "loss": 1.0094, "step": 10103 }, { "epoch": 12.3671970624235, "grad_norm": 1.3743471999808747, "learning_rate": 3.70537159260668e-07, "loss": 0.6679, "step": 10104 }, { "epoch": 12.368421052631579, "grad_norm": 1.4145066924085268, "learning_rate": 3.7020147790418266e-07, "loss": 1.3274, "step": 10105 }, { "epoch": 12.369645042839657, "grad_norm": 1.7766840737330394, "learning_rate": 3.698659365101787e-07, "loss": 0.7265, "step": 10106 }, { "epoch": 12.370869033047736, "grad_norm": 2.675778544477697, "learning_rate": 3.6953053510070643e-07, "loss": 0.9252, "step": 10107 }, { "epoch": 12.372093023255815, "grad_norm": 1.2062622739389488, "learning_rate": 3.6919527369780687e-07, "loss": 0.4964, "step": 10108 }, { "epoch": 12.373317013463891, "grad_norm": 1.6745607747112512, "learning_rate": 3.688601523235133e-07, "loss": 1.5521, "step": 10109 }, { "epoch": 12.37454100367197, "grad_norm": 2.1514967477965796, "learning_rate": 3.685251709998486e-07, "loss": 0.3061, "step": 10110 }, { "epoch": 12.375764993880049, "grad_norm": 1.1910871345374336, "learning_rate": 3.6819032974882513e-07, "loss": 0.4731, "step": 10111 }, { "epoch": 12.376988984088127, "grad_norm": 2.8872679051790873, "learning_rate": 3.6785562859244754e-07, "loss": 0.4385, "step": 10112 }, { "epoch": 12.378212974296206, "grad_norm": 2.446572264942344, "learning_rate": 3.6752106755271245e-07, "loss": 1.1939, "step": 10113 }, { "epoch": 12.379436964504285, "grad_norm": 1.8775611224207334, "learning_rate": 3.671866466516055e-07, "loss": 0.639, "step": 10114 }, { "epoch": 12.380660954712361, "grad_norm": 1.952213120907043, "learning_rate": 3.6685236591110314e-07, "loss": 1.0037, "step": 10115 }, { "epoch": 12.38188494492044, "grad_norm": 1.2054140475732251, "learning_rate": 3.6651822535317404e-07, "loss": 0.4778, "step": 10116 }, { "epoch": 12.383108935128519, "grad_norm": 1.642683215168807, "learning_rate": 3.661842249997746e-07, "loss": 0.4719, "step": 10117 }, { "epoch": 12.384332925336597, "grad_norm": 2.0414246015244153, "learning_rate": 3.658503648728562e-07, "loss": 0.7191, "step": 10118 }, { "epoch": 12.385556915544676, "grad_norm": 1.5978917418614638, "learning_rate": 3.6551664499435814e-07, "loss": 1.5683, "step": 10119 }, { "epoch": 12.386780905752754, "grad_norm": 1.1532625286040887, "learning_rate": 3.651830653862115e-07, "loss": 0.6375, "step": 10120 }, { "epoch": 12.388004895960833, "grad_norm": 1.4591648550459952, "learning_rate": 3.648496260703374e-07, "loss": 1.1451, "step": 10121 }, { "epoch": 12.38922888616891, "grad_norm": 3.5826061051209557, "learning_rate": 3.645163270686483e-07, "loss": 0.4048, "step": 10122 }, { "epoch": 12.390452876376989, "grad_norm": 1.0436087263399831, "learning_rate": 3.6418316840304805e-07, "loss": 0.4375, "step": 10123 }, { "epoch": 12.391676866585067, "grad_norm": 1.5465518830442484, "learning_rate": 3.6385015009542973e-07, "loss": 0.6433, "step": 10124 }, { "epoch": 12.392900856793146, "grad_norm": 2.314928098606368, "learning_rate": 3.63517272167678e-07, "loss": 0.6253, "step": 10125 }, { "epoch": 12.394124847001224, "grad_norm": 2.639917217968421, "learning_rate": 3.631845346416696e-07, "loss": 0.5712, "step": 10126 }, { "epoch": 12.395348837209303, "grad_norm": 1.9566367336943487, "learning_rate": 3.6285193753927e-07, "loss": 1.1005, "step": 10127 }, { "epoch": 12.39657282741738, "grad_norm": 1.4547435787002216, "learning_rate": 3.625194808823368e-07, "loss": 0.3622, "step": 10128 }, { "epoch": 12.397796817625458, "grad_norm": 2.235789977544165, "learning_rate": 3.6218716469271665e-07, "loss": 0.4787, "step": 10129 }, { "epoch": 12.399020807833537, "grad_norm": 2.0083394342922314, "learning_rate": 3.618549889922482e-07, "loss": 0.3923, "step": 10130 }, { "epoch": 12.400244798041616, "grad_norm": 1.4614367779830644, "learning_rate": 3.6152295380276234e-07, "loss": 0.5277, "step": 10131 }, { "epoch": 12.401468788249694, "grad_norm": 2.1602435008547083, "learning_rate": 3.611910591460779e-07, "loss": 0.3836, "step": 10132 }, { "epoch": 12.402692778457773, "grad_norm": 1.618286820905577, "learning_rate": 3.608593050440065e-07, "loss": 0.9078, "step": 10133 }, { "epoch": 12.403916768665852, "grad_norm": 2.157167795412002, "learning_rate": 3.605276915183492e-07, "loss": 1.1983, "step": 10134 }, { "epoch": 12.405140758873928, "grad_norm": 1.7091084156789649, "learning_rate": 3.601962185908986e-07, "loss": 1.5503, "step": 10135 }, { "epoch": 12.406364749082007, "grad_norm": 1.8212733841991549, "learning_rate": 3.5986488628343807e-07, "loss": 0.4107, "step": 10136 }, { "epoch": 12.407588739290086, "grad_norm": 1.4801638111471427, "learning_rate": 3.5953369461774134e-07, "loss": 1.1552, "step": 10137 }, { "epoch": 12.408812729498164, "grad_norm": 2.72914681221252, "learning_rate": 3.5920264361557266e-07, "loss": 0.9435, "step": 10138 }, { "epoch": 12.410036719706243, "grad_norm": 2.253379564449145, "learning_rate": 3.588717332986891e-07, "loss": 0.9155, "step": 10139 }, { "epoch": 12.411260709914322, "grad_norm": 1.685581444068458, "learning_rate": 3.585409636888351e-07, "loss": 0.3814, "step": 10140 }, { "epoch": 12.412484700122398, "grad_norm": 2.3636302590825076, "learning_rate": 3.5821033480774843e-07, "loss": 0.3429, "step": 10141 }, { "epoch": 12.413708690330477, "grad_norm": 2.657079168393158, "learning_rate": 3.578798466771563e-07, "loss": 0.6153, "step": 10142 }, { "epoch": 12.414932680538556, "grad_norm": 1.201482044576642, "learning_rate": 3.5754949931877725e-07, "loss": 0.5578, "step": 10143 }, { "epoch": 12.416156670746634, "grad_norm": 1.8890586575070734, "learning_rate": 3.572192927543211e-07, "loss": 1.5857, "step": 10144 }, { "epoch": 12.417380660954713, "grad_norm": 1.76857961487169, "learning_rate": 3.5688922700548826e-07, "loss": 0.3007, "step": 10145 }, { "epoch": 12.418604651162791, "grad_norm": 1.791252915491405, "learning_rate": 3.5655930209396784e-07, "loss": 1.6101, "step": 10146 }, { "epoch": 12.419828641370868, "grad_norm": 1.4936640102579652, "learning_rate": 3.562295180414413e-07, "loss": 1.4062, "step": 10147 }, { "epoch": 12.421052631578947, "grad_norm": 1.626191404677855, "learning_rate": 3.558998748695824e-07, "loss": 0.5077, "step": 10148 }, { "epoch": 12.422276621787026, "grad_norm": 1.783319789473257, "learning_rate": 3.555703726000531e-07, "loss": 1.0902, "step": 10149 }, { "epoch": 12.423500611995104, "grad_norm": 1.9335713097662122, "learning_rate": 3.5524101125450737e-07, "loss": 1.2087, "step": 10150 }, { "epoch": 12.424724602203183, "grad_norm": 0.5943678039609892, "learning_rate": 3.549117908545893e-07, "loss": 0.1286, "step": 10151 }, { "epoch": 12.425948592411261, "grad_norm": 2.141183373080161, "learning_rate": 3.545827114219344e-07, "loss": 0.954, "step": 10152 }, { "epoch": 12.427172582619338, "grad_norm": 1.315501832948651, "learning_rate": 3.542537729781684e-07, "loss": 0.5161, "step": 10153 }, { "epoch": 12.428396572827417, "grad_norm": 2.0791392529035737, "learning_rate": 3.5392497554490755e-07, "loss": 0.4233, "step": 10154 }, { "epoch": 12.429620563035495, "grad_norm": 1.6473902084908767, "learning_rate": 3.535963191437597e-07, "loss": 0.4615, "step": 10155 }, { "epoch": 12.430844553243574, "grad_norm": 2.099305150470503, "learning_rate": 3.532678037963222e-07, "loss": 0.3158, "step": 10156 }, { "epoch": 12.432068543451653, "grad_norm": 0.9864552343235745, "learning_rate": 3.529394295241856e-07, "loss": 0.4251, "step": 10157 }, { "epoch": 12.433292533659731, "grad_norm": 1.9209945833242439, "learning_rate": 3.526111963489276e-07, "loss": 1.4827, "step": 10158 }, { "epoch": 12.43451652386781, "grad_norm": 1.6608347914606691, "learning_rate": 3.5228310429211934e-07, "loss": 0.4503, "step": 10159 }, { "epoch": 12.435740514075887, "grad_norm": 1.6888052055073246, "learning_rate": 3.519551533753207e-07, "loss": 1.0106, "step": 10160 }, { "epoch": 12.436964504283965, "grad_norm": 1.7838017273957312, "learning_rate": 3.516273436200851e-07, "loss": 0.672, "step": 10161 }, { "epoch": 12.438188494492044, "grad_norm": 1.3720950434270993, "learning_rate": 3.5129967504795415e-07, "loss": 0.5733, "step": 10162 }, { "epoch": 12.439412484700123, "grad_norm": 2.692792322607686, "learning_rate": 3.509721476804617e-07, "loss": 0.9063, "step": 10163 }, { "epoch": 12.440636474908201, "grad_norm": 2.3178450368740435, "learning_rate": 3.5064476153913036e-07, "loss": 1.029, "step": 10164 }, { "epoch": 12.44186046511628, "grad_norm": 2.0120546205846064, "learning_rate": 3.5031751664547475e-07, "loss": 1.0431, "step": 10165 }, { "epoch": 12.443084455324357, "grad_norm": 2.9497064953831584, "learning_rate": 3.4999041302100164e-07, "loss": 0.7306, "step": 10166 }, { "epoch": 12.444308445532435, "grad_norm": 1.300586266279099, "learning_rate": 3.4966345068720597e-07, "loss": 0.9712, "step": 10167 }, { "epoch": 12.445532435740514, "grad_norm": 1.5306742093767491, "learning_rate": 3.493366296655748e-07, "loss": 0.7814, "step": 10168 }, { "epoch": 12.446756425948593, "grad_norm": 1.7925765286209163, "learning_rate": 3.4900994997758566e-07, "loss": 0.7439, "step": 10169 }, { "epoch": 12.447980416156671, "grad_norm": 1.8804417699946472, "learning_rate": 3.4868341164470685e-07, "loss": 0.5619, "step": 10170 }, { "epoch": 12.44920440636475, "grad_norm": 3.022492781697122, "learning_rate": 3.483570146883969e-07, "loss": 0.485, "step": 10171 }, { "epoch": 12.450428396572827, "grad_norm": 1.0662031085240027, "learning_rate": 3.4803075913010545e-07, "loss": 0.5913, "step": 10172 }, { "epoch": 12.451652386780905, "grad_norm": 2.3886765274370334, "learning_rate": 3.477046449912724e-07, "loss": 1.0716, "step": 10173 }, { "epoch": 12.452876376988984, "grad_norm": 2.911116283716038, "learning_rate": 3.4737867229333035e-07, "loss": 0.3506, "step": 10174 }, { "epoch": 12.454100367197062, "grad_norm": 1.9373305506523122, "learning_rate": 3.470528410576995e-07, "loss": 0.9972, "step": 10175 }, { "epoch": 12.455324357405141, "grad_norm": 2.0551508387200124, "learning_rate": 3.4672715130579263e-07, "loss": 0.5216, "step": 10176 }, { "epoch": 12.45654834761322, "grad_norm": 2.2226476257634373, "learning_rate": 3.464016030590131e-07, "loss": 0.5936, "step": 10177 }, { "epoch": 12.457772337821297, "grad_norm": 1.9941758554836873, "learning_rate": 3.4607619633875405e-07, "loss": 0.4529, "step": 10178 }, { "epoch": 12.458996328029375, "grad_norm": 1.7924555600920329, "learning_rate": 3.457509311664012e-07, "loss": 0.6511, "step": 10179 }, { "epoch": 12.460220318237454, "grad_norm": 1.1672275873971574, "learning_rate": 3.4542580756332903e-07, "loss": 0.5553, "step": 10180 }, { "epoch": 12.461444308445532, "grad_norm": 1.445043530368504, "learning_rate": 3.451008255509042e-07, "loss": 0.8088, "step": 10181 }, { "epoch": 12.462668298653611, "grad_norm": 1.4463485783763796, "learning_rate": 3.4477598515048143e-07, "loss": 0.9639, "step": 10182 }, { "epoch": 12.46389228886169, "grad_norm": 1.4869556007860945, "learning_rate": 3.4445128638340983e-07, "loss": 0.8051, "step": 10183 }, { "epoch": 12.465116279069768, "grad_norm": 1.4296856281724217, "learning_rate": 3.441267292710271e-07, "loss": 0.6909, "step": 10184 }, { "epoch": 12.466340269277845, "grad_norm": 1.583061208312277, "learning_rate": 3.4380231383466163e-07, "loss": 0.3564, "step": 10185 }, { "epoch": 12.467564259485924, "grad_norm": 1.2657906510496397, "learning_rate": 3.434780400956328e-07, "loss": 0.9815, "step": 10186 }, { "epoch": 12.468788249694002, "grad_norm": 2.8997391744810534, "learning_rate": 3.4315390807525076e-07, "loss": 0.4945, "step": 10187 }, { "epoch": 12.470012239902081, "grad_norm": 1.4446574939065222, "learning_rate": 3.4282991779481604e-07, "loss": 0.429, "step": 10188 }, { "epoch": 12.47123623011016, "grad_norm": 1.614542982146774, "learning_rate": 3.4250606927562076e-07, "loss": 0.4227, "step": 10189 }, { "epoch": 12.472460220318238, "grad_norm": 2.9180586206940493, "learning_rate": 3.4218236253894616e-07, "loss": 0.3485, "step": 10190 }, { "epoch": 12.473684210526315, "grad_norm": 2.027199126934295, "learning_rate": 3.4185879760606525e-07, "loss": 1.2858, "step": 10191 }, { "epoch": 12.474908200734394, "grad_norm": 2.573411157887824, "learning_rate": 3.4153537449824275e-07, "loss": 0.5319, "step": 10192 }, { "epoch": 12.476132190942472, "grad_norm": 2.0713752643275747, "learning_rate": 3.4121209323673104e-07, "loss": 0.3599, "step": 10193 }, { "epoch": 12.477356181150551, "grad_norm": 1.7903201737627987, "learning_rate": 3.4088895384277623e-07, "loss": 0.5248, "step": 10194 }, { "epoch": 12.47858017135863, "grad_norm": 1.469469020346149, "learning_rate": 3.405659563376124e-07, "loss": 0.512, "step": 10195 }, { "epoch": 12.479804161566708, "grad_norm": 2.7405915909712, "learning_rate": 3.4024310074246753e-07, "loss": 0.4847, "step": 10196 }, { "epoch": 12.481028151774785, "grad_norm": 1.508645562720883, "learning_rate": 3.399203870785575e-07, "loss": 0.7058, "step": 10197 }, { "epoch": 12.482252141982864, "grad_norm": 2.096223850795684, "learning_rate": 3.3959781536709006e-07, "loss": 0.5406, "step": 10198 }, { "epoch": 12.483476132190942, "grad_norm": 1.5835545792111225, "learning_rate": 3.392753856292635e-07, "loss": 2.2756, "step": 10199 }, { "epoch": 12.48470012239902, "grad_norm": 1.554835916673236, "learning_rate": 3.389530978862668e-07, "loss": 0.6153, "step": 10200 }, { "epoch": 12.4859241126071, "grad_norm": 1.630358496119116, "learning_rate": 3.38630952159279e-07, "loss": 0.931, "step": 10201 }, { "epoch": 12.487148102815178, "grad_norm": 2.454847688800301, "learning_rate": 3.38308948469471e-07, "loss": 0.3931, "step": 10202 }, { "epoch": 12.488372093023255, "grad_norm": 1.685144915042484, "learning_rate": 3.379870868380031e-07, "loss": 1.1655, "step": 10203 }, { "epoch": 12.489596083231334, "grad_norm": 1.8526855304761276, "learning_rate": 3.376653672860272e-07, "loss": 1.1668, "step": 10204 }, { "epoch": 12.490820073439412, "grad_norm": 2.656911714612, "learning_rate": 3.373437898346857e-07, "loss": 1.1732, "step": 10205 }, { "epoch": 12.49204406364749, "grad_norm": 1.7761932083247216, "learning_rate": 3.37022354505111e-07, "loss": 0.8204, "step": 10206 }, { "epoch": 12.49326805385557, "grad_norm": 3.3766375789728627, "learning_rate": 3.367010613184268e-07, "loss": 0.4315, "step": 10207 }, { "epoch": 12.494492044063648, "grad_norm": 3.05637134313613, "learning_rate": 3.3637991029574684e-07, "loss": 0.3911, "step": 10208 }, { "epoch": 12.495716034271727, "grad_norm": 1.4100756777620218, "learning_rate": 3.360589014581772e-07, "loss": 0.6421, "step": 10209 }, { "epoch": 12.496940024479803, "grad_norm": 2.46032745198828, "learning_rate": 3.35738034826813e-07, "loss": 0.4948, "step": 10210 }, { "epoch": 12.498164014687882, "grad_norm": 2.952971561838141, "learning_rate": 3.3541731042273985e-07, "loss": 0.9536, "step": 10211 }, { "epoch": 12.49938800489596, "grad_norm": 2.167634841699194, "learning_rate": 3.3509672826703393e-07, "loss": 0.8072, "step": 10212 }, { "epoch": 12.50061199510404, "grad_norm": 1.9527758735827099, "learning_rate": 3.3477628838076425e-07, "loss": 0.5782, "step": 10213 }, { "epoch": 12.501835985312118, "grad_norm": 3.1534958422880055, "learning_rate": 3.344559907849884e-07, "loss": 0.314, "step": 10214 }, { "epoch": 12.503059975520197, "grad_norm": 1.4039696292320003, "learning_rate": 3.341358355007551e-07, "loss": 0.3054, "step": 10215 }, { "epoch": 12.504283965728273, "grad_norm": 1.774537449025063, "learning_rate": 3.3381582254910366e-07, "loss": 0.4913, "step": 10216 }, { "epoch": 12.505507955936352, "grad_norm": 2.5376665415817894, "learning_rate": 3.3349595195106415e-07, "loss": 0.5528, "step": 10217 }, { "epoch": 12.50673194614443, "grad_norm": 2.2695178067527264, "learning_rate": 3.331762237276573e-07, "loss": 0.9481, "step": 10218 }, { "epoch": 12.50795593635251, "grad_norm": 1.1109394484900241, "learning_rate": 3.328566378998946e-07, "loss": 0.391, "step": 10219 }, { "epoch": 12.509179926560588, "grad_norm": 2.0542687459843103, "learning_rate": 3.3253719448877785e-07, "loss": 1.306, "step": 10220 }, { "epoch": 12.510403916768666, "grad_norm": 1.3247702472299907, "learning_rate": 3.3221789351529977e-07, "loss": 0.4366, "step": 10221 }, { "epoch": 12.511627906976745, "grad_norm": 1.853736696916223, "learning_rate": 3.318987350004438e-07, "loss": 0.2906, "step": 10222 }, { "epoch": 12.512851897184822, "grad_norm": 2.703554423358692, "learning_rate": 3.315797189651837e-07, "loss": 0.5024, "step": 10223 }, { "epoch": 12.5140758873929, "grad_norm": 1.9932568500055277, "learning_rate": 3.3126084543048386e-07, "loss": 0.5418, "step": 10224 }, { "epoch": 12.51529987760098, "grad_norm": 2.717140250026016, "learning_rate": 3.3094211441729916e-07, "loss": 0.5069, "step": 10225 }, { "epoch": 12.516523867809058, "grad_norm": 2.1998144023471937, "learning_rate": 3.306235259465765e-07, "loss": 0.5071, "step": 10226 }, { "epoch": 12.517747858017136, "grad_norm": 2.4944632499776778, "learning_rate": 3.303050800392518e-07, "loss": 0.4545, "step": 10227 }, { "epoch": 12.518971848225215, "grad_norm": 1.87358997801927, "learning_rate": 3.299867767162526e-07, "loss": 1.5569, "step": 10228 }, { "epoch": 12.520195838433292, "grad_norm": 1.6903712728942022, "learning_rate": 3.296686159984955e-07, "loss": 0.4845, "step": 10229 }, { "epoch": 12.52141982864137, "grad_norm": 3.495359444570635, "learning_rate": 3.2935059790688897e-07, "loss": 0.347, "step": 10230 }, { "epoch": 12.522643818849449, "grad_norm": 1.5485553251735609, "learning_rate": 3.29032722462333e-07, "loss": 1.21, "step": 10231 }, { "epoch": 12.523867809057528, "grad_norm": 1.3808182738087158, "learning_rate": 3.287149896857167e-07, "loss": 0.5487, "step": 10232 }, { "epoch": 12.525091799265606, "grad_norm": 3.1762759344252367, "learning_rate": 3.2839739959792034e-07, "loss": 0.3912, "step": 10233 }, { "epoch": 12.526315789473685, "grad_norm": 1.3457277575674944, "learning_rate": 3.280799522198144e-07, "loss": 0.7271, "step": 10234 }, { "epoch": 12.527539779681762, "grad_norm": 1.9042628230626772, "learning_rate": 3.277626475722609e-07, "loss": 0.7248, "step": 10235 }, { "epoch": 12.52876376988984, "grad_norm": 1.0935446813316578, "learning_rate": 3.274454856761114e-07, "loss": 0.5447, "step": 10236 }, { "epoch": 12.529987760097919, "grad_norm": 2.221961499070149, "learning_rate": 3.27128466552209e-07, "loss": 0.5293, "step": 10237 }, { "epoch": 12.531211750305998, "grad_norm": 2.0584157369543874, "learning_rate": 3.268115902213864e-07, "loss": 0.759, "step": 10238 }, { "epoch": 12.532435740514076, "grad_norm": 1.3907464042338014, "learning_rate": 3.26494856704469e-07, "loss": 0.3858, "step": 10239 }, { "epoch": 12.533659730722155, "grad_norm": 1.4771594051720225, "learning_rate": 3.261782660222701e-07, "loss": 0.7206, "step": 10240 }, { "epoch": 12.534883720930232, "grad_norm": 2.3269821202569636, "learning_rate": 3.258618181955947e-07, "loss": 0.3658, "step": 10241 }, { "epoch": 12.53610771113831, "grad_norm": 1.2537140834309628, "learning_rate": 3.2554551324523925e-07, "loss": 0.8393, "step": 10242 }, { "epoch": 12.537331701346389, "grad_norm": 2.7131664658702177, "learning_rate": 3.252293511919893e-07, "loss": 0.3863, "step": 10243 }, { "epoch": 12.538555691554468, "grad_norm": 2.860201957750692, "learning_rate": 3.2491333205662334e-07, "loss": 0.4331, "step": 10244 }, { "epoch": 12.539779681762546, "grad_norm": 2.303459103626772, "learning_rate": 3.245974558599077e-07, "loss": 0.4444, "step": 10245 }, { "epoch": 12.541003671970625, "grad_norm": 1.5604590317183562, "learning_rate": 3.2428172262260166e-07, "loss": 0.473, "step": 10246 }, { "epoch": 12.542227662178703, "grad_norm": 1.6539903288549973, "learning_rate": 3.239661323654525e-07, "loss": 0.3487, "step": 10247 }, { "epoch": 12.54345165238678, "grad_norm": 1.1903027959790389, "learning_rate": 3.236506851092008e-07, "loss": 0.6016, "step": 10248 }, { "epoch": 12.544675642594859, "grad_norm": 1.7384603750799568, "learning_rate": 3.233353808745765e-07, "loss": 0.4248, "step": 10249 }, { "epoch": 12.545899632802938, "grad_norm": 1.4260000743212158, "learning_rate": 3.230202196823001e-07, "loss": 0.3623, "step": 10250 }, { "epoch": 12.547123623011016, "grad_norm": 2.1568267080421815, "learning_rate": 3.227052015530827e-07, "loss": 1.3109, "step": 10251 }, { "epoch": 12.548347613219095, "grad_norm": 2.9775527525477288, "learning_rate": 3.223903265076259e-07, "loss": 0.3467, "step": 10252 }, { "epoch": 12.549571603427173, "grad_norm": 2.730084665725833, "learning_rate": 3.220755945666226e-07, "loss": 0.4554, "step": 10253 }, { "epoch": 12.55079559363525, "grad_norm": 1.5126924342657186, "learning_rate": 3.217610057507559e-07, "loss": 0.5558, "step": 10254 }, { "epoch": 12.552019583843329, "grad_norm": 1.5000183370612075, "learning_rate": 3.21446560080699e-07, "loss": 1.1125, "step": 10255 }, { "epoch": 12.553243574051407, "grad_norm": 1.4136235677865325, "learning_rate": 3.211322575771156e-07, "loss": 0.4071, "step": 10256 }, { "epoch": 12.554467564259486, "grad_norm": 1.3765333929599162, "learning_rate": 3.2081809826066234e-07, "loss": 0.6247, "step": 10257 }, { "epoch": 12.555691554467565, "grad_norm": 2.7481800011367317, "learning_rate": 3.2050408215198283e-07, "loss": 0.3799, "step": 10258 }, { "epoch": 12.556915544675643, "grad_norm": 1.7068743826124904, "learning_rate": 3.201902092717138e-07, "loss": 1.1698, "step": 10259 }, { "epoch": 12.55813953488372, "grad_norm": 1.7487390227351058, "learning_rate": 3.1987647964048075e-07, "loss": 0.7236, "step": 10260 }, { "epoch": 12.559363525091799, "grad_norm": 1.6290074102337266, "learning_rate": 3.195628932789027e-07, "loss": 0.5662, "step": 10261 }, { "epoch": 12.560587515299877, "grad_norm": 2.4785289456249444, "learning_rate": 3.192494502075863e-07, "loss": 0.5708, "step": 10262 }, { "epoch": 12.561811505507956, "grad_norm": 1.26988612588785, "learning_rate": 3.189361504471303e-07, "loss": 0.657, "step": 10263 }, { "epoch": 12.563035495716035, "grad_norm": 2.019636724527029, "learning_rate": 3.1862299401812236e-07, "loss": 0.4753, "step": 10264 }, { "epoch": 12.564259485924113, "grad_norm": 1.987795978557706, "learning_rate": 3.183099809411436e-07, "loss": 0.4873, "step": 10265 }, { "epoch": 12.56548347613219, "grad_norm": 1.239442087122034, "learning_rate": 3.179971112367633e-07, "loss": 0.499, "step": 10266 }, { "epoch": 12.566707466340269, "grad_norm": 1.1112099283641501, "learning_rate": 3.1768438492554204e-07, "loss": 0.523, "step": 10267 }, { "epoch": 12.567931456548347, "grad_norm": 1.1706234812292378, "learning_rate": 3.173718020280314e-07, "loss": 0.6369, "step": 10268 }, { "epoch": 12.569155446756426, "grad_norm": 2.2721909604122534, "learning_rate": 3.1705936256477274e-07, "loss": 0.9216, "step": 10269 }, { "epoch": 12.570379436964505, "grad_norm": 1.1850719579143436, "learning_rate": 3.167470665562988e-07, "loss": 0.4675, "step": 10270 }, { "epoch": 12.571603427172583, "grad_norm": 1.4810459344876397, "learning_rate": 3.1643491402313207e-07, "loss": 0.5339, "step": 10271 }, { "epoch": 12.572827417380662, "grad_norm": 1.5878182423511915, "learning_rate": 3.161229049857867e-07, "loss": 0.4666, "step": 10272 }, { "epoch": 12.574051407588739, "grad_norm": 1.6206296735125558, "learning_rate": 3.1581103946476563e-07, "loss": 0.6865, "step": 10273 }, { "epoch": 12.575275397796817, "grad_norm": 2.051090670910285, "learning_rate": 3.1549931748056483e-07, "loss": 1.3843, "step": 10274 }, { "epoch": 12.576499388004896, "grad_norm": 1.7239273993982327, "learning_rate": 3.1518773905366976e-07, "loss": 0.4634, "step": 10275 }, { "epoch": 12.577723378212974, "grad_norm": 2.3691404520186987, "learning_rate": 3.1487630420455466e-07, "loss": 0.3213, "step": 10276 }, { "epoch": 12.578947368421053, "grad_norm": 1.6941013900537065, "learning_rate": 3.145650129536862e-07, "loss": 0.4851, "step": 10277 }, { "epoch": 12.580171358629132, "grad_norm": 1.1725672069397968, "learning_rate": 3.1425386532152216e-07, "loss": 0.4142, "step": 10278 }, { "epoch": 12.581395348837209, "grad_norm": 3.0607638321829596, "learning_rate": 3.139428613285098e-07, "loss": 0.3354, "step": 10279 }, { "epoch": 12.582619339045287, "grad_norm": 1.9090094357253433, "learning_rate": 3.136320009950866e-07, "loss": 0.2886, "step": 10280 }, { "epoch": 12.583843329253366, "grad_norm": 1.7612608501105005, "learning_rate": 3.133212843416822e-07, "loss": 0.4122, "step": 10281 }, { "epoch": 12.585067319461444, "grad_norm": 2.7614053491711448, "learning_rate": 3.13010711388714e-07, "loss": 0.5819, "step": 10282 }, { "epoch": 12.586291309669523, "grad_norm": 2.935360182060802, "learning_rate": 3.1270028215659305e-07, "loss": 1.0536, "step": 10283 }, { "epoch": 12.587515299877602, "grad_norm": 2.3850349756811724, "learning_rate": 3.1238999666571955e-07, "loss": 0.4843, "step": 10284 }, { "epoch": 12.588739290085678, "grad_norm": 2.0554360829431215, "learning_rate": 3.1207985493648394e-07, "loss": 0.3867, "step": 10285 }, { "epoch": 12.589963280293757, "grad_norm": 1.4595596061463731, "learning_rate": 3.117698569892677e-07, "loss": 0.3956, "step": 10286 }, { "epoch": 12.591187270501836, "grad_norm": 2.058349301720611, "learning_rate": 3.1146000284444284e-07, "loss": 0.7263, "step": 10287 }, { "epoch": 12.592411260709914, "grad_norm": 1.6608164199295379, "learning_rate": 3.1115029252237165e-07, "loss": 0.4943, "step": 10288 }, { "epoch": 12.593635250917993, "grad_norm": 0.9805997184529526, "learning_rate": 3.1084072604340735e-07, "loss": 0.3877, "step": 10289 }, { "epoch": 12.594859241126072, "grad_norm": 1.3974373355979652, "learning_rate": 3.1053130342789254e-07, "loss": 0.5031, "step": 10290 }, { "epoch": 12.596083231334148, "grad_norm": 2.3650251892384966, "learning_rate": 3.102220246961632e-07, "loss": 1.0257, "step": 10291 }, { "epoch": 12.597307221542227, "grad_norm": 1.822561350385391, "learning_rate": 3.099128898685433e-07, "loss": 0.5191, "step": 10292 }, { "epoch": 12.598531211750306, "grad_norm": 1.5751133391103775, "learning_rate": 3.0960389896534685e-07, "loss": 1.1587, "step": 10293 }, { "epoch": 12.599755201958384, "grad_norm": 1.7474541483179569, "learning_rate": 3.0929505200688076e-07, "loss": 0.567, "step": 10294 }, { "epoch": 12.600979192166463, "grad_norm": 1.8757491981879895, "learning_rate": 3.0898634901344067e-07, "loss": 1.3018, "step": 10295 }, { "epoch": 12.602203182374542, "grad_norm": 1.6919072500880863, "learning_rate": 3.086777900053139e-07, "loss": 1.0619, "step": 10296 }, { "epoch": 12.60342717258262, "grad_norm": 1.8782058091601408, "learning_rate": 3.0836937500277775e-07, "loss": 1.4174, "step": 10297 }, { "epoch": 12.604651162790697, "grad_norm": 2.1007289178559927, "learning_rate": 3.080611040261e-07, "loss": 0.5868, "step": 10298 }, { "epoch": 12.605875152998776, "grad_norm": 1.4090732457599986, "learning_rate": 3.0775297709553895e-07, "loss": 0.8598, "step": 10299 }, { "epoch": 12.607099143206854, "grad_norm": 1.1705826578119698, "learning_rate": 3.07444994231344e-07, "loss": 0.6008, "step": 10300 }, { "epoch": 12.608323133414933, "grad_norm": 2.1145061694830796, "learning_rate": 3.0713715545375416e-07, "loss": 1.0651, "step": 10301 }, { "epoch": 12.609547123623011, "grad_norm": 3.390453368180344, "learning_rate": 3.0682946078299943e-07, "loss": 0.5854, "step": 10302 }, { "epoch": 12.61077111383109, "grad_norm": 0.9757744794203889, "learning_rate": 3.065219102393002e-07, "loss": 0.4379, "step": 10303 }, { "epoch": 12.611995104039167, "grad_norm": 2.6913040286280627, "learning_rate": 3.0621450384286906e-07, "loss": 0.3967, "step": 10304 }, { "epoch": 12.613219094247246, "grad_norm": 1.8690366710149815, "learning_rate": 3.0590724161390583e-07, "loss": 0.9066, "step": 10305 }, { "epoch": 12.614443084455324, "grad_norm": 1.8952469105191816, "learning_rate": 3.0560012357260333e-07, "loss": 0.4795, "step": 10306 }, { "epoch": 12.615667074663403, "grad_norm": 1.2430856523853449, "learning_rate": 3.0529314973914423e-07, "loss": 1.0381, "step": 10307 }, { "epoch": 12.616891064871481, "grad_norm": 1.3164544596758982, "learning_rate": 3.049863201337011e-07, "loss": 0.3993, "step": 10308 }, { "epoch": 12.61811505507956, "grad_norm": 1.9660933860552197, "learning_rate": 3.0467963477643857e-07, "loss": 0.6464, "step": 10309 }, { "epoch": 12.619339045287639, "grad_norm": 1.1690496558441839, "learning_rate": 3.043730936875114e-07, "loss": 0.5047, "step": 10310 }, { "epoch": 12.620563035495715, "grad_norm": 2.8894178890946747, "learning_rate": 3.040666968870626e-07, "loss": 0.356, "step": 10311 }, { "epoch": 12.621787025703794, "grad_norm": 1.0749831324777759, "learning_rate": 3.03760444395228e-07, "loss": 0.6262, "step": 10312 }, { "epoch": 12.623011015911873, "grad_norm": 2.2372521478348206, "learning_rate": 3.03454336232134e-07, "loss": 0.3707, "step": 10313 }, { "epoch": 12.624235006119951, "grad_norm": 1.667938036746886, "learning_rate": 3.0314837241789655e-07, "loss": 0.6204, "step": 10314 }, { "epoch": 12.62545899632803, "grad_norm": 1.6650047628456977, "learning_rate": 3.028425529726228e-07, "loss": 1.4817, "step": 10315 }, { "epoch": 12.626682986536107, "grad_norm": 2.3488497151823466, "learning_rate": 3.025368779164095e-07, "loss": 0.5227, "step": 10316 }, { "epoch": 12.627906976744185, "grad_norm": 2.8493640127184388, "learning_rate": 3.022313472693447e-07, "loss": 0.4987, "step": 10317 }, { "epoch": 12.629130966952264, "grad_norm": 1.7255063161140916, "learning_rate": 3.019259610515068e-07, "loss": 0.6136, "step": 10318 }, { "epoch": 12.630354957160343, "grad_norm": 1.6899967086768641, "learning_rate": 3.016207192829645e-07, "loss": 0.667, "step": 10319 }, { "epoch": 12.631578947368421, "grad_norm": 2.2719105152747594, "learning_rate": 3.0131562198377763e-07, "loss": 0.5716, "step": 10320 }, { "epoch": 12.6328029375765, "grad_norm": 2.0774410346714083, "learning_rate": 3.0101066917399556e-07, "loss": 1.0302, "step": 10321 }, { "epoch": 12.634026927784578, "grad_norm": 3.4019655005988634, "learning_rate": 3.0070586087365875e-07, "loss": 0.3358, "step": 10322 }, { "epoch": 12.635250917992655, "grad_norm": 1.7591567819844445, "learning_rate": 3.0040119710279816e-07, "loss": 1.169, "step": 10323 }, { "epoch": 12.636474908200734, "grad_norm": 2.537640717731943, "learning_rate": 3.0009667788143515e-07, "loss": 0.4539, "step": 10324 }, { "epoch": 12.637698898408813, "grad_norm": 1.6285760354130026, "learning_rate": 2.9979230322958103e-07, "loss": 0.4025, "step": 10325 }, { "epoch": 12.638922888616891, "grad_norm": 2.1707649803526086, "learning_rate": 2.9948807316723923e-07, "loss": 0.8413, "step": 10326 }, { "epoch": 12.64014687882497, "grad_norm": 1.4395334367055448, "learning_rate": 2.9918398771440206e-07, "loss": 1.086, "step": 10327 }, { "epoch": 12.641370869033048, "grad_norm": 2.6182792411096476, "learning_rate": 2.9888004689105383e-07, "loss": 0.4728, "step": 10328 }, { "epoch": 12.642594859241125, "grad_norm": 1.5789799488308673, "learning_rate": 2.985762507171666e-07, "loss": 0.5225, "step": 10329 }, { "epoch": 12.643818849449204, "grad_norm": 1.613376762717318, "learning_rate": 2.982725992127053e-07, "loss": 0.9173, "step": 10330 }, { "epoch": 12.645042839657282, "grad_norm": 1.9172354996041363, "learning_rate": 2.9796909239762586e-07, "loss": 0.7396, "step": 10331 }, { "epoch": 12.646266829865361, "grad_norm": 2.1003081355521243, "learning_rate": 2.9766573029187285e-07, "loss": 0.6882, "step": 10332 }, { "epoch": 12.64749082007344, "grad_norm": 1.7977184200964196, "learning_rate": 2.9736251291538206e-07, "loss": 0.4967, "step": 10333 }, { "epoch": 12.648714810281518, "grad_norm": 2.8832810339862465, "learning_rate": 2.9705944028808e-07, "loss": 0.8185, "step": 10334 }, { "epoch": 12.649938800489597, "grad_norm": 2.652429214618049, "learning_rate": 2.967565124298832e-07, "loss": 0.4542, "step": 10335 }, { "epoch": 12.651162790697674, "grad_norm": 2.3456830357359615, "learning_rate": 2.964537293606995e-07, "loss": 0.5505, "step": 10336 }, { "epoch": 12.652386780905752, "grad_norm": 1.981793544922854, "learning_rate": 2.961510911004262e-07, "loss": 0.5785, "step": 10337 }, { "epoch": 12.653610771113831, "grad_norm": 1.6035563791006946, "learning_rate": 2.9584859766895124e-07, "loss": 0.5638, "step": 10338 }, { "epoch": 12.65483476132191, "grad_norm": 1.8232961294457668, "learning_rate": 2.955462490861549e-07, "loss": 0.6255, "step": 10339 }, { "epoch": 12.656058751529988, "grad_norm": 1.4890747385523413, "learning_rate": 2.9524404537190464e-07, "loss": 0.5253, "step": 10340 }, { "epoch": 12.657282741738067, "grad_norm": 1.82644031509275, "learning_rate": 2.949419865460612e-07, "loss": 0.5413, "step": 10341 }, { "epoch": 12.658506731946144, "grad_norm": 1.5506534608840195, "learning_rate": 2.9464007262847425e-07, "loss": 0.5628, "step": 10342 }, { "epoch": 12.659730722154222, "grad_norm": 1.9879901570231193, "learning_rate": 2.943383036389841e-07, "loss": 0.4759, "step": 10343 }, { "epoch": 12.660954712362301, "grad_norm": 2.1319056599920847, "learning_rate": 2.940366795974231e-07, "loss": 0.5032, "step": 10344 }, { "epoch": 12.66217870257038, "grad_norm": 1.6845417707042072, "learning_rate": 2.9373520052361214e-07, "loss": 0.6344, "step": 10345 }, { "epoch": 12.663402692778458, "grad_norm": 2.9801956743631854, "learning_rate": 2.93433866437364e-07, "loss": 0.2038, "step": 10346 }, { "epoch": 12.664626682986537, "grad_norm": 1.447351253695189, "learning_rate": 2.931326773584797e-07, "loss": 1.126, "step": 10347 }, { "epoch": 12.665850673194614, "grad_norm": 1.6142866329755192, "learning_rate": 2.928316333067535e-07, "loss": 0.4589, "step": 10348 }, { "epoch": 12.667074663402692, "grad_norm": 2.115225898725521, "learning_rate": 2.9253073430196873e-07, "loss": 0.6104, "step": 10349 }, { "epoch": 12.668298653610771, "grad_norm": 1.5396703762631403, "learning_rate": 2.922299803638992e-07, "loss": 0.5562, "step": 10350 }, { "epoch": 12.66952264381885, "grad_norm": 2.8273308643215245, "learning_rate": 2.919293715123095e-07, "loss": 0.6498, "step": 10351 }, { "epoch": 12.670746634026928, "grad_norm": 2.963174918611286, "learning_rate": 2.9162890776695427e-07, "loss": 0.4018, "step": 10352 }, { "epoch": 12.671970624235007, "grad_norm": 1.7649007191956159, "learning_rate": 2.9132858914757894e-07, "loss": 0.5665, "step": 10353 }, { "epoch": 12.673194614443084, "grad_norm": 3.1007791193144523, "learning_rate": 2.910284156739196e-07, "loss": 0.522, "step": 10354 }, { "epoch": 12.674418604651162, "grad_norm": 1.6070858631534937, "learning_rate": 2.9072838736570243e-07, "loss": 0.7136, "step": 10355 }, { "epoch": 12.67564259485924, "grad_norm": 1.6252778542914683, "learning_rate": 2.904285042426433e-07, "loss": 0.812, "step": 10356 }, { "epoch": 12.67686658506732, "grad_norm": 2.0434886860885384, "learning_rate": 2.9012876632445146e-07, "loss": 0.6187, "step": 10357 }, { "epoch": 12.678090575275398, "grad_norm": 1.6117859414413007, "learning_rate": 2.898291736308229e-07, "loss": 0.5577, "step": 10358 }, { "epoch": 12.679314565483477, "grad_norm": 2.2833329329016574, "learning_rate": 2.8952972618144614e-07, "loss": 0.5053, "step": 10359 }, { "epoch": 12.680538555691555, "grad_norm": 2.7019622071424165, "learning_rate": 2.892304239959992e-07, "loss": 0.388, "step": 10360 }, { "epoch": 12.681762545899632, "grad_norm": 1.6522603562296996, "learning_rate": 2.8893126709415245e-07, "loss": 0.4695, "step": 10361 }, { "epoch": 12.68298653610771, "grad_norm": 2.0733933632395054, "learning_rate": 2.8863225549556423e-07, "loss": 0.7701, "step": 10362 }, { "epoch": 12.68421052631579, "grad_norm": 2.705157221368954, "learning_rate": 2.883333892198853e-07, "loss": 0.3832, "step": 10363 }, { "epoch": 12.685434516523868, "grad_norm": 2.014336957144505, "learning_rate": 2.8803466828675545e-07, "loss": 0.5304, "step": 10364 }, { "epoch": 12.686658506731947, "grad_norm": 1.860142208610143, "learning_rate": 2.877360927158057e-07, "loss": 0.4545, "step": 10365 }, { "epoch": 12.687882496940025, "grad_norm": 1.9939978699799252, "learning_rate": 2.8743766252665753e-07, "loss": 0.4547, "step": 10366 }, { "epoch": 12.689106487148102, "grad_norm": 2.9397784529717916, "learning_rate": 2.8713937773892225e-07, "loss": 0.356, "step": 10367 }, { "epoch": 12.69033047735618, "grad_norm": 2.1608082458690654, "learning_rate": 2.8684123837220246e-07, "loss": 0.4318, "step": 10368 }, { "epoch": 12.69155446756426, "grad_norm": 2.5436315513230587, "learning_rate": 2.865432444460906e-07, "loss": 0.3903, "step": 10369 }, { "epoch": 12.692778457772338, "grad_norm": 1.116764532938184, "learning_rate": 2.8624539598016946e-07, "loss": 0.7059, "step": 10370 }, { "epoch": 12.694002447980417, "grad_norm": 3.190529038977129, "learning_rate": 2.859476929940128e-07, "loss": 0.5041, "step": 10371 }, { "epoch": 12.695226438188495, "grad_norm": 1.8375030588905925, "learning_rate": 2.8565013550718473e-07, "loss": 0.981, "step": 10372 }, { "epoch": 12.696450428396572, "grad_norm": 1.9341826227571086, "learning_rate": 2.8535272353923894e-07, "loss": 1.0771, "step": 10373 }, { "epoch": 12.69767441860465, "grad_norm": 1.587307822760169, "learning_rate": 2.850554571097211e-07, "loss": 0.6008, "step": 10374 }, { "epoch": 12.69889840881273, "grad_norm": 2.518516297691733, "learning_rate": 2.8475833623816673e-07, "loss": 0.602, "step": 10375 }, { "epoch": 12.700122399020808, "grad_norm": 1.446319656093378, "learning_rate": 2.8446136094410023e-07, "loss": 0.4315, "step": 10376 }, { "epoch": 12.701346389228886, "grad_norm": 1.8697431661885309, "learning_rate": 2.841645312470381e-07, "loss": 0.4416, "step": 10377 }, { "epoch": 12.702570379436965, "grad_norm": 2.648712975274542, "learning_rate": 2.8386784716648743e-07, "loss": 0.3258, "step": 10378 }, { "epoch": 12.703794369645042, "grad_norm": 1.6294606004620265, "learning_rate": 2.835713087219452e-07, "loss": 0.5201, "step": 10379 }, { "epoch": 12.70501835985312, "grad_norm": 2.3321965611470654, "learning_rate": 2.8327491593289817e-07, "loss": 0.4384, "step": 10380 }, { "epoch": 12.7062423500612, "grad_norm": 2.0693141841551905, "learning_rate": 2.829786688188255e-07, "loss": 0.4061, "step": 10381 }, { "epoch": 12.707466340269278, "grad_norm": 1.1757987622546815, "learning_rate": 2.8268256739919343e-07, "loss": 0.6278, "step": 10382 }, { "epoch": 12.708690330477356, "grad_norm": 3.368876284419282, "learning_rate": 2.8238661169346203e-07, "loss": 0.4516, "step": 10383 }, { "epoch": 12.709914320685435, "grad_norm": 1.305527985518102, "learning_rate": 2.820908017210802e-07, "loss": 0.446, "step": 10384 }, { "epoch": 12.711138310893514, "grad_norm": 3.102182411675465, "learning_rate": 2.8179513750148743e-07, "loss": 0.3347, "step": 10385 }, { "epoch": 12.71236230110159, "grad_norm": 1.9869524088391952, "learning_rate": 2.814996190541136e-07, "loss": 0.4843, "step": 10386 }, { "epoch": 12.713586291309669, "grad_norm": 2.4040129500067655, "learning_rate": 2.812042463983794e-07, "loss": 0.9798, "step": 10387 }, { "epoch": 12.714810281517748, "grad_norm": 1.5394326517234969, "learning_rate": 2.809090195536951e-07, "loss": 0.5215, "step": 10388 }, { "epoch": 12.716034271725826, "grad_norm": 2.4524754745233497, "learning_rate": 2.806139385394621e-07, "loss": 0.4139, "step": 10389 }, { "epoch": 12.717258261933905, "grad_norm": 0.9239628177367463, "learning_rate": 2.803190033750719e-07, "loss": 0.3463, "step": 10390 }, { "epoch": 12.718482252141984, "grad_norm": 1.3243286408819352, "learning_rate": 2.8002421407990726e-07, "loss": 0.6532, "step": 10391 }, { "epoch": 12.71970624235006, "grad_norm": 2.5674546098805617, "learning_rate": 2.7972957067333995e-07, "loss": 0.372, "step": 10392 }, { "epoch": 12.720930232558139, "grad_norm": 1.8163925393630511, "learning_rate": 2.794350731747336e-07, "loss": 0.5345, "step": 10393 }, { "epoch": 12.722154222766218, "grad_norm": 1.4444024960937885, "learning_rate": 2.791407216034406e-07, "loss": 0.521, "step": 10394 }, { "epoch": 12.723378212974296, "grad_norm": 1.0963557190405375, "learning_rate": 2.788465159788042e-07, "loss": 0.5154, "step": 10395 }, { "epoch": 12.724602203182375, "grad_norm": 1.9318383241193127, "learning_rate": 2.7855245632015996e-07, "loss": 0.4506, "step": 10396 }, { "epoch": 12.725826193390454, "grad_norm": 1.766446251797245, "learning_rate": 2.782585426468318e-07, "loss": 0.3669, "step": 10397 }, { "epoch": 12.727050183598532, "grad_norm": 2.3834690939596204, "learning_rate": 2.7796477497813447e-07, "loss": 0.5734, "step": 10398 }, { "epoch": 12.728274173806609, "grad_norm": 1.4137717970030033, "learning_rate": 2.776711533333734e-07, "loss": 0.5517, "step": 10399 }, { "epoch": 12.729498164014688, "grad_norm": 3.8653878906188663, "learning_rate": 2.773776777318443e-07, "loss": 0.4129, "step": 10400 }, { "epoch": 12.730722154222766, "grad_norm": 1.6749436353024976, "learning_rate": 2.770843481928331e-07, "loss": 0.4659, "step": 10401 }, { "epoch": 12.731946144430845, "grad_norm": 1.7113645073805792, "learning_rate": 2.7679116473561695e-07, "loss": 0.4677, "step": 10402 }, { "epoch": 12.733170134638923, "grad_norm": 2.631374680023711, "learning_rate": 2.7649812737946145e-07, "loss": 0.4585, "step": 10403 }, { "epoch": 12.734394124847, "grad_norm": 2.5686270313780977, "learning_rate": 2.762052361436263e-07, "loss": 0.4134, "step": 10404 }, { "epoch": 12.735618115055079, "grad_norm": 1.5608715444836545, "learning_rate": 2.7591249104735705e-07, "loss": 0.4501, "step": 10405 }, { "epoch": 12.736842105263158, "grad_norm": 1.4271489844715066, "learning_rate": 2.7561989210989237e-07, "loss": 0.4822, "step": 10406 }, { "epoch": 12.738066095471236, "grad_norm": 1.0336533484210075, "learning_rate": 2.7532743935046125e-07, "loss": 0.3428, "step": 10407 }, { "epoch": 12.739290085679315, "grad_norm": 1.671856160252238, "learning_rate": 2.7503513278828166e-07, "loss": 0.4504, "step": 10408 }, { "epoch": 12.740514075887393, "grad_norm": 3.2003058212808315, "learning_rate": 2.747429724425643e-07, "loss": 0.535, "step": 10409 }, { "epoch": 12.741738066095472, "grad_norm": 1.5151763021778812, "learning_rate": 2.7445095833250835e-07, "loss": 1.2682, "step": 10410 }, { "epoch": 12.742962056303549, "grad_norm": 1.4626637940349263, "learning_rate": 2.741590904773034e-07, "loss": 0.8401, "step": 10411 }, { "epoch": 12.744186046511627, "grad_norm": 0.9979308653788985, "learning_rate": 2.7386736889612966e-07, "loss": 0.4912, "step": 10412 }, { "epoch": 12.745410036719706, "grad_norm": 2.0472708587385786, "learning_rate": 2.73575793608159e-07, "loss": 0.3835, "step": 10413 }, { "epoch": 12.746634026927785, "grad_norm": 1.7110184708344247, "learning_rate": 2.7328436463255226e-07, "loss": 0.5419, "step": 10414 }, { "epoch": 12.747858017135863, "grad_norm": 2.1113950473257583, "learning_rate": 2.7299308198846073e-07, "loss": 0.5241, "step": 10415 }, { "epoch": 12.749082007343942, "grad_norm": 1.7456887688679195, "learning_rate": 2.7270194569502723e-07, "loss": 1.2609, "step": 10416 }, { "epoch": 12.750305997552019, "grad_norm": 1.9006012563428076, "learning_rate": 2.7241095577138334e-07, "loss": 0.526, "step": 10417 }, { "epoch": 12.751529987760097, "grad_norm": 2.161932899861038, "learning_rate": 2.7212011223665207e-07, "loss": 0.3421, "step": 10418 }, { "epoch": 12.752753977968176, "grad_norm": 1.3170082134678263, "learning_rate": 2.718294151099468e-07, "loss": 0.5693, "step": 10419 }, { "epoch": 12.753977968176255, "grad_norm": 2.2288422958647303, "learning_rate": 2.715388644103711e-07, "loss": 0.7091, "step": 10420 }, { "epoch": 12.755201958384333, "grad_norm": 1.3377724377168099, "learning_rate": 2.712484601570181e-07, "loss": 0.7703, "step": 10421 }, { "epoch": 12.756425948592412, "grad_norm": 1.4952300112005785, "learning_rate": 2.7095820236897356e-07, "loss": 0.3294, "step": 10422 }, { "epoch": 12.75764993880049, "grad_norm": 3.175944171525824, "learning_rate": 2.706680910653109e-07, "loss": 0.4388, "step": 10423 }, { "epoch": 12.758873929008567, "grad_norm": 1.8564860082217187, "learning_rate": 2.7037812626509563e-07, "loss": 1.0974, "step": 10424 }, { "epoch": 12.760097919216646, "grad_norm": 2.433296081440086, "learning_rate": 2.7008830798738255e-07, "loss": 0.6629, "step": 10425 }, { "epoch": 12.761321909424725, "grad_norm": 1.062770904341979, "learning_rate": 2.6979863625121867e-07, "loss": 0.4396, "step": 10426 }, { "epoch": 12.762545899632803, "grad_norm": 2.091553738664652, "learning_rate": 2.695091110756393e-07, "loss": 0.5688, "step": 10427 }, { "epoch": 12.763769889840882, "grad_norm": 1.1559840765545346, "learning_rate": 2.6921973247967164e-07, "loss": 0.5804, "step": 10428 }, { "epoch": 12.76499388004896, "grad_norm": 2.181709499322586, "learning_rate": 2.6893050048233087e-07, "loss": 0.54, "step": 10429 }, { "epoch": 12.766217870257037, "grad_norm": 2.5825923346004993, "learning_rate": 2.686414151026262e-07, "loss": 0.9654, "step": 10430 }, { "epoch": 12.767441860465116, "grad_norm": 1.5826975781578805, "learning_rate": 2.6835247635955466e-07, "loss": 0.6196, "step": 10431 }, { "epoch": 12.768665850673194, "grad_norm": 2.2411067443447665, "learning_rate": 2.680636842721038e-07, "loss": 0.4837, "step": 10432 }, { "epoch": 12.769889840881273, "grad_norm": 2.39025091216356, "learning_rate": 2.6777503885925233e-07, "loss": 0.6006, "step": 10433 }, { "epoch": 12.771113831089352, "grad_norm": 2.8180070162518884, "learning_rate": 2.67486540139969e-07, "loss": 0.9477, "step": 10434 }, { "epoch": 12.77233782129743, "grad_norm": 1.8897208290234497, "learning_rate": 2.671981881332125e-07, "loss": 1.8745, "step": 10435 }, { "epoch": 12.773561811505507, "grad_norm": 1.6495641851822784, "learning_rate": 2.669099828579327e-07, "loss": 1.2406, "step": 10436 }, { "epoch": 12.774785801713586, "grad_norm": 2.069321488837679, "learning_rate": 2.666219243330692e-07, "loss": 0.7575, "step": 10437 }, { "epoch": 12.776009791921664, "grad_norm": 1.2002235986510243, "learning_rate": 2.6633401257755156e-07, "loss": 0.5328, "step": 10438 }, { "epoch": 12.777233782129743, "grad_norm": 2.326057044837044, "learning_rate": 2.6604624761030177e-07, "loss": 0.9596, "step": 10439 }, { "epoch": 12.778457772337822, "grad_norm": 2.0217914108938637, "learning_rate": 2.657586294502296e-07, "loss": 0.3878, "step": 10440 }, { "epoch": 12.7796817625459, "grad_norm": 1.7138880404660954, "learning_rate": 2.6547115811623613e-07, "loss": 1.6186, "step": 10441 }, { "epoch": 12.780905752753977, "grad_norm": 1.519204351357379, "learning_rate": 2.651838336272128e-07, "loss": 1.0206, "step": 10442 }, { "epoch": 12.782129742962056, "grad_norm": 1.267033236259421, "learning_rate": 2.6489665600204245e-07, "loss": 0.5275, "step": 10443 }, { "epoch": 12.783353733170134, "grad_norm": 2.4226299832521776, "learning_rate": 2.646096252595967e-07, "loss": 0.8934, "step": 10444 }, { "epoch": 12.784577723378213, "grad_norm": 0.7586572372138108, "learning_rate": 2.6432274141873827e-07, "loss": 0.2548, "step": 10445 }, { "epoch": 12.785801713586292, "grad_norm": 1.4739301033968428, "learning_rate": 2.6403600449832064e-07, "loss": 0.5402, "step": 10446 }, { "epoch": 12.78702570379437, "grad_norm": 1.679509520957207, "learning_rate": 2.637494145171854e-07, "loss": 0.6947, "step": 10447 }, { "epoch": 12.788249694002449, "grad_norm": 1.2781590217475853, "learning_rate": 2.634629714941678e-07, "loss": 0.467, "step": 10448 }, { "epoch": 12.789473684210526, "grad_norm": 2.6367825223352472, "learning_rate": 2.6317667544809135e-07, "loss": 0.3143, "step": 10449 }, { "epoch": 12.790697674418604, "grad_norm": 1.925263843742835, "learning_rate": 2.628905263977705e-07, "loss": 0.5397, "step": 10450 }, { "epoch": 12.791921664626683, "grad_norm": 1.6974673815466501, "learning_rate": 2.626045243620096e-07, "loss": 0.5044, "step": 10451 }, { "epoch": 12.793145654834762, "grad_norm": 1.561619449783848, "learning_rate": 2.623186693596039e-07, "loss": 0.7106, "step": 10452 }, { "epoch": 12.79436964504284, "grad_norm": 2.583441181430144, "learning_rate": 2.6203296140933864e-07, "loss": 0.8096, "step": 10453 }, { "epoch": 12.795593635250919, "grad_norm": 2.651230959182768, "learning_rate": 2.6174740052998943e-07, "loss": 0.943, "step": 10454 }, { "epoch": 12.796817625458996, "grad_norm": 1.9488626279522412, "learning_rate": 2.61461986740322e-07, "loss": 0.4185, "step": 10455 }, { "epoch": 12.798041615667074, "grad_norm": 1.792999412374256, "learning_rate": 2.611767200590934e-07, "loss": 0.427, "step": 10456 }, { "epoch": 12.799265605875153, "grad_norm": 1.7275846995514785, "learning_rate": 2.6089160050505047e-07, "loss": 1.0777, "step": 10457 }, { "epoch": 12.800489596083231, "grad_norm": 1.686285195434973, "learning_rate": 2.606066280969291e-07, "loss": 0.5552, "step": 10458 }, { "epoch": 12.80171358629131, "grad_norm": 1.1315262484207311, "learning_rate": 2.6032180285345707e-07, "loss": 0.5339, "step": 10459 }, { "epoch": 12.802937576499389, "grad_norm": 1.4205201942117065, "learning_rate": 2.6003712479335196e-07, "loss": 0.6025, "step": 10460 }, { "epoch": 12.804161566707466, "grad_norm": 2.9028786503639696, "learning_rate": 2.597525939353224e-07, "loss": 0.4629, "step": 10461 }, { "epoch": 12.805385556915544, "grad_norm": 1.8113087743211715, "learning_rate": 2.5946821029806613e-07, "loss": 1.1546, "step": 10462 }, { "epoch": 12.806609547123623, "grad_norm": 2.1456075011578446, "learning_rate": 2.591839739002719e-07, "loss": 1.0641, "step": 10463 }, { "epoch": 12.807833537331701, "grad_norm": 2.09766519248643, "learning_rate": 2.588998847606189e-07, "loss": 0.4019, "step": 10464 }, { "epoch": 12.80905752753978, "grad_norm": 1.2554562574320203, "learning_rate": 2.5861594289777606e-07, "loss": 0.5352, "step": 10465 }, { "epoch": 12.810281517747859, "grad_norm": 3.1676326060511406, "learning_rate": 2.58332148330403e-07, "loss": 0.5681, "step": 10466 }, { "epoch": 12.811505507955935, "grad_norm": 1.302667731454608, "learning_rate": 2.5804850107714996e-07, "loss": 0.7578, "step": 10467 }, { "epoch": 12.812729498164014, "grad_norm": 1.6141163526727345, "learning_rate": 2.577650011566571e-07, "loss": 0.374, "step": 10468 }, { "epoch": 12.813953488372093, "grad_norm": 1.4568784601022056, "learning_rate": 2.574816485875548e-07, "loss": 0.452, "step": 10469 }, { "epoch": 12.815177478580171, "grad_norm": 1.6486440355884764, "learning_rate": 2.5719844338846415e-07, "loss": 1.0384, "step": 10470 }, { "epoch": 12.81640146878825, "grad_norm": 2.470082621336716, "learning_rate": 2.5691538557799627e-07, "loss": 0.4141, "step": 10471 }, { "epoch": 12.817625458996329, "grad_norm": 2.9648848980558453, "learning_rate": 2.566324751747526e-07, "loss": 0.6802, "step": 10472 }, { "epoch": 12.818849449204407, "grad_norm": 1.8201693386458961, "learning_rate": 2.563497121973244e-07, "loss": 0.6256, "step": 10473 }, { "epoch": 12.820073439412484, "grad_norm": 1.9200878121963407, "learning_rate": 2.56067096664295e-07, "loss": 0.4672, "step": 10474 }, { "epoch": 12.821297429620563, "grad_norm": 1.1481844150688285, "learning_rate": 2.557846285942367e-07, "loss": 0.5915, "step": 10475 }, { "epoch": 12.822521419828641, "grad_norm": 1.361513155548424, "learning_rate": 2.555023080057115e-07, "loss": 0.7153, "step": 10476 }, { "epoch": 12.82374541003672, "grad_norm": 1.5242507983265545, "learning_rate": 2.5522013491727224e-07, "loss": 0.2677, "step": 10477 }, { "epoch": 12.824969400244798, "grad_norm": 3.092168843513804, "learning_rate": 2.5493810934746345e-07, "loss": 0.3964, "step": 10478 }, { "epoch": 12.826193390452877, "grad_norm": 3.145867977102824, "learning_rate": 2.546562313148179e-07, "loss": 0.6902, "step": 10479 }, { "epoch": 12.827417380660954, "grad_norm": 2.1752174438205385, "learning_rate": 2.543745008378601e-07, "loss": 1.1136, "step": 10480 }, { "epoch": 12.828641370869033, "grad_norm": 1.909358676055843, "learning_rate": 2.5409291793510454e-07, "loss": 0.4712, "step": 10481 }, { "epoch": 12.829865361077111, "grad_norm": 2.1052406315178263, "learning_rate": 2.5381148262505457e-07, "loss": 1.1474, "step": 10482 }, { "epoch": 12.83108935128519, "grad_norm": 0.987489595277372, "learning_rate": 2.5353019492620616e-07, "loss": 0.4111, "step": 10483 }, { "epoch": 12.832313341493268, "grad_norm": 1.1848560972743198, "learning_rate": 2.532490548570443e-07, "loss": 1.1955, "step": 10484 }, { "epoch": 12.833537331701347, "grad_norm": 2.778431065456724, "learning_rate": 2.529680624360445e-07, "loss": 0.3532, "step": 10485 }, { "epoch": 12.834761321909426, "grad_norm": 1.3415448985798206, "learning_rate": 2.526872176816722e-07, "loss": 0.7137, "step": 10486 }, { "epoch": 12.835985312117502, "grad_norm": 1.7233013075357044, "learning_rate": 2.524065206123838e-07, "loss": 0.5484, "step": 10487 }, { "epoch": 12.837209302325581, "grad_norm": 1.1541422018405514, "learning_rate": 2.521259712466256e-07, "loss": 0.5048, "step": 10488 }, { "epoch": 12.83843329253366, "grad_norm": 1.528146151946793, "learning_rate": 2.5184556960283434e-07, "loss": 0.6153, "step": 10489 }, { "epoch": 12.839657282741738, "grad_norm": 1.8428157353858758, "learning_rate": 2.515653156994366e-07, "loss": 1.4947, "step": 10490 }, { "epoch": 12.840881272949817, "grad_norm": 2.7280139646740187, "learning_rate": 2.5128520955485016e-07, "loss": 0.4201, "step": 10491 }, { "epoch": 12.842105263157894, "grad_norm": 1.3052925484652351, "learning_rate": 2.510052511874822e-07, "loss": 0.7456, "step": 10492 }, { "epoch": 12.843329253365972, "grad_norm": 1.575369343770074, "learning_rate": 2.507254406157314e-07, "loss": 0.5461, "step": 10493 }, { "epoch": 12.844553243574051, "grad_norm": 2.501162993322537, "learning_rate": 2.5044577785798466e-07, "loss": 0.3885, "step": 10494 }, { "epoch": 12.84577723378213, "grad_norm": 1.6459232482522708, "learning_rate": 2.5016626293262014e-07, "loss": 1.289, "step": 10495 }, { "epoch": 12.847001223990208, "grad_norm": 1.8156455932637243, "learning_rate": 2.498868958580081e-07, "loss": 0.925, "step": 10496 }, { "epoch": 12.848225214198287, "grad_norm": 1.888399909822423, "learning_rate": 2.496076766525063e-07, "loss": 0.5409, "step": 10497 }, { "epoch": 12.849449204406366, "grad_norm": 2.540899393307378, "learning_rate": 2.4932860533446456e-07, "loss": 0.5957, "step": 10498 }, { "epoch": 12.850673194614442, "grad_norm": 1.370788553514035, "learning_rate": 2.490496819222224e-07, "loss": 0.7627, "step": 10499 }, { "epoch": 12.851897184822521, "grad_norm": 1.6312869986835035, "learning_rate": 2.487709064341093e-07, "loss": 0.5189, "step": 10500 }, { "epoch": 12.8531211750306, "grad_norm": 1.593314971435857, "learning_rate": 2.484922788884453e-07, "loss": 0.3312, "step": 10501 }, { "epoch": 12.854345165238678, "grad_norm": 1.7276822283138542, "learning_rate": 2.4821379930354107e-07, "loss": 0.518, "step": 10502 }, { "epoch": 12.855569155446757, "grad_norm": 2.30133103489708, "learning_rate": 2.4793546769769697e-07, "loss": 0.8122, "step": 10503 }, { "epoch": 12.856793145654835, "grad_norm": 3.6032999609006637, "learning_rate": 2.476572840892047e-07, "loss": 0.3274, "step": 10504 }, { "epoch": 12.858017135862912, "grad_norm": 1.1271547434604146, "learning_rate": 2.473792484963447e-07, "loss": 0.3049, "step": 10505 }, { "epoch": 12.859241126070991, "grad_norm": 1.9956620240845078, "learning_rate": 2.4710136093738843e-07, "loss": 0.6066, "step": 10506 }, { "epoch": 12.86046511627907, "grad_norm": 1.7454426957749365, "learning_rate": 2.4682362143059803e-07, "loss": 0.4249, "step": 10507 }, { "epoch": 12.861689106487148, "grad_norm": 1.5005019264695287, "learning_rate": 2.465460299942246e-07, "loss": 0.6133, "step": 10508 }, { "epoch": 12.862913096695227, "grad_norm": 1.6897099562272597, "learning_rate": 2.462685866465117e-07, "loss": 1.1652, "step": 10509 }, { "epoch": 12.864137086903305, "grad_norm": 1.9929297545864118, "learning_rate": 2.4599129140569195e-07, "loss": 0.5178, "step": 10510 }, { "epoch": 12.865361077111384, "grad_norm": 1.9398297132974978, "learning_rate": 2.4571414428998686e-07, "loss": 0.7328, "step": 10511 }, { "epoch": 12.86658506731946, "grad_norm": 2.5727693582057447, "learning_rate": 2.4543714531760964e-07, "loss": 0.4931, "step": 10512 }, { "epoch": 12.86780905752754, "grad_norm": 1.4264422212105943, "learning_rate": 2.4516029450676495e-07, "loss": 1.5535, "step": 10513 }, { "epoch": 12.869033047735618, "grad_norm": 2.4755796854096714, "learning_rate": 2.4488359187564536e-07, "loss": 0.3587, "step": 10514 }, { "epoch": 12.870257037943697, "grad_norm": 1.427110474032553, "learning_rate": 2.44607037442435e-07, "loss": 0.5411, "step": 10515 }, { "epoch": 12.871481028151775, "grad_norm": 1.0955125011599471, "learning_rate": 2.4433063122530825e-07, "loss": 0.5097, "step": 10516 }, { "epoch": 12.872705018359854, "grad_norm": 1.8649534367460903, "learning_rate": 2.4405437324242914e-07, "loss": 1.0142, "step": 10517 }, { "epoch": 12.87392900856793, "grad_norm": 1.4508571560928227, "learning_rate": 2.4377826351195235e-07, "loss": 0.4994, "step": 10518 }, { "epoch": 12.87515299877601, "grad_norm": 1.6198864868970955, "learning_rate": 2.4350230205202303e-07, "loss": 0.4916, "step": 10519 }, { "epoch": 12.876376988984088, "grad_norm": 2.9992369588671206, "learning_rate": 2.4322648888077615e-07, "loss": 0.8179, "step": 10520 }, { "epoch": 12.877600979192167, "grad_norm": 2.4451044649152154, "learning_rate": 2.429508240163367e-07, "loss": 0.4643, "step": 10521 }, { "epoch": 12.878824969400245, "grad_norm": 2.23319044696988, "learning_rate": 2.4267530747682203e-07, "loss": 0.508, "step": 10522 }, { "epoch": 12.880048959608324, "grad_norm": 3.3921726224875686, "learning_rate": 2.4239993928033613e-07, "loss": 0.4624, "step": 10523 }, { "epoch": 12.8812729498164, "grad_norm": 0.9370913795152549, "learning_rate": 2.4212471944497607e-07, "loss": 0.5199, "step": 10524 }, { "epoch": 12.88249694002448, "grad_norm": 1.1891408697639916, "learning_rate": 2.418496479888277e-07, "loss": 0.567, "step": 10525 }, { "epoch": 12.883720930232558, "grad_norm": 1.6767074578609364, "learning_rate": 2.4157472492996853e-07, "loss": 0.5903, "step": 10526 }, { "epoch": 12.884944920440637, "grad_norm": 2.9273867952438675, "learning_rate": 2.412999502864649e-07, "loss": 0.7908, "step": 10527 }, { "epoch": 12.886168910648715, "grad_norm": 1.783706135714741, "learning_rate": 2.410253240763749e-07, "loss": 0.8795, "step": 10528 }, { "epoch": 12.887392900856794, "grad_norm": 1.5235444170238044, "learning_rate": 2.407508463177444e-07, "loss": 0.5839, "step": 10529 }, { "epoch": 12.88861689106487, "grad_norm": 1.5266542189808174, "learning_rate": 2.404765170286122e-07, "loss": 0.599, "step": 10530 }, { "epoch": 12.88984088127295, "grad_norm": 1.390438014565871, "learning_rate": 2.402023362270059e-07, "loss": 0.5056, "step": 10531 }, { "epoch": 12.891064871481028, "grad_norm": 2.5752864748044284, "learning_rate": 2.3992830393094383e-07, "loss": 0.5106, "step": 10532 }, { "epoch": 12.892288861689106, "grad_norm": 1.8705222777940498, "learning_rate": 2.3965442015843386e-07, "loss": 0.5793, "step": 10533 }, { "epoch": 12.893512851897185, "grad_norm": 1.7296513200779486, "learning_rate": 2.393806849274752e-07, "loss": 0.7091, "step": 10534 }, { "epoch": 12.894736842105264, "grad_norm": 2.3218670190041233, "learning_rate": 2.3910709825605645e-07, "loss": 0.4095, "step": 10535 }, { "epoch": 12.895960832313342, "grad_norm": 2.1287419400099408, "learning_rate": 2.3883366016215685e-07, "loss": 0.3997, "step": 10536 }, { "epoch": 12.89718482252142, "grad_norm": 3.2576876232308822, "learning_rate": 2.3856037066374545e-07, "loss": 0.304, "step": 10537 }, { "epoch": 12.898408812729498, "grad_norm": 1.6977532574192873, "learning_rate": 2.382872297787814e-07, "loss": 0.6332, "step": 10538 }, { "epoch": 12.899632802937576, "grad_norm": 2.3724659953480636, "learning_rate": 2.3801423752521645e-07, "loss": 0.5425, "step": 10539 }, { "epoch": 12.900856793145655, "grad_norm": 1.869397998199903, "learning_rate": 2.3774139392098854e-07, "loss": 0.5201, "step": 10540 }, { "epoch": 12.902080783353734, "grad_norm": 1.701696485345459, "learning_rate": 2.3746869898402886e-07, "loss": 0.8466, "step": 10541 }, { "epoch": 12.903304773561812, "grad_norm": 2.252302019197749, "learning_rate": 2.371961527322572e-07, "loss": 0.4623, "step": 10542 }, { "epoch": 12.904528763769889, "grad_norm": 1.7971740497007647, "learning_rate": 2.3692375518358512e-07, "loss": 1.3555, "step": 10543 }, { "epoch": 12.905752753977968, "grad_norm": 1.488954266700243, "learning_rate": 2.3665150635591356e-07, "loss": 0.6846, "step": 10544 }, { "epoch": 12.906976744186046, "grad_norm": 1.9327916358998265, "learning_rate": 2.3637940626713346e-07, "loss": 0.4516, "step": 10545 }, { "epoch": 12.908200734394125, "grad_norm": 1.7897888588441095, "learning_rate": 2.3610745493512667e-07, "loss": 0.3085, "step": 10546 }, { "epoch": 12.909424724602204, "grad_norm": 1.2003940891175295, "learning_rate": 2.358356523777633e-07, "loss": 0.69, "step": 10547 }, { "epoch": 12.910648714810282, "grad_norm": 1.3109445974497864, "learning_rate": 2.3556399861290713e-07, "loss": 0.5536, "step": 10548 }, { "epoch": 12.911872705018359, "grad_norm": 1.4158494756615523, "learning_rate": 2.352924936584089e-07, "loss": 1.5615, "step": 10549 }, { "epoch": 12.913096695226438, "grad_norm": 1.3829202192308803, "learning_rate": 2.350211375321118e-07, "loss": 0.7341, "step": 10550 }, { "epoch": 12.914320685434516, "grad_norm": 2.6358003248072666, "learning_rate": 2.3474993025184772e-07, "loss": 0.8453, "step": 10551 }, { "epoch": 12.915544675642595, "grad_norm": 1.2967009208693403, "learning_rate": 2.3447887183543987e-07, "loss": 0.8918, "step": 10552 }, { "epoch": 12.916768665850674, "grad_norm": 1.356560117364283, "learning_rate": 2.3420796230070098e-07, "loss": 0.4394, "step": 10553 }, { "epoch": 12.917992656058752, "grad_norm": 1.2161117846134966, "learning_rate": 2.3393720166543398e-07, "loss": 0.6216, "step": 10554 }, { "epoch": 12.919216646266829, "grad_norm": 1.257795741768906, "learning_rate": 2.3366658994743217e-07, "loss": 0.425, "step": 10555 }, { "epoch": 12.920440636474908, "grad_norm": 2.715195673651283, "learning_rate": 2.3339612716447995e-07, "loss": 0.3321, "step": 10556 }, { "epoch": 12.921664626682986, "grad_norm": 3.6614060244652435, "learning_rate": 2.3312581333435114e-07, "loss": 0.4176, "step": 10557 }, { "epoch": 12.922888616891065, "grad_norm": 1.3707810497381692, "learning_rate": 2.3285564847480874e-07, "loss": 0.5875, "step": 10558 }, { "epoch": 12.924112607099143, "grad_norm": 1.861083603885412, "learning_rate": 2.3258563260360773e-07, "loss": 1.1303, "step": 10559 }, { "epoch": 12.925336597307222, "grad_norm": 2.636362633974648, "learning_rate": 2.3231576573849168e-07, "loss": 0.8501, "step": 10560 }, { "epoch": 12.9265605875153, "grad_norm": 2.211656698145912, "learning_rate": 2.3204604789719643e-07, "loss": 0.5292, "step": 10561 }, { "epoch": 12.927784577723378, "grad_norm": 1.5762795810313264, "learning_rate": 2.317764790974464e-07, "loss": 0.3185, "step": 10562 }, { "epoch": 12.929008567931456, "grad_norm": 3.2132361481623555, "learning_rate": 2.3150705935695656e-07, "loss": 0.3677, "step": 10563 }, { "epoch": 12.930232558139535, "grad_norm": 1.87302501426283, "learning_rate": 2.3123778869343223e-07, "loss": 1.0046, "step": 10564 }, { "epoch": 12.931456548347613, "grad_norm": 2.650363237291908, "learning_rate": 2.3096866712456894e-07, "loss": 0.4231, "step": 10565 }, { "epoch": 12.932680538555692, "grad_norm": 2.0606690681732873, "learning_rate": 2.3069969466805202e-07, "loss": 0.7625, "step": 10566 }, { "epoch": 12.93390452876377, "grad_norm": 2.970515645798855, "learning_rate": 2.304308713415579e-07, "loss": 0.5249, "step": 10567 }, { "epoch": 12.935128518971847, "grad_norm": 1.6968367152014547, "learning_rate": 2.3016219716275184e-07, "loss": 0.4539, "step": 10568 }, { "epoch": 12.936352509179926, "grad_norm": 1.7723017715445057, "learning_rate": 2.2989367214929142e-07, "loss": 0.5137, "step": 10569 }, { "epoch": 12.937576499388005, "grad_norm": 2.886126429324236, "learning_rate": 2.2962529631882197e-07, "loss": 1.0152, "step": 10570 }, { "epoch": 12.938800489596083, "grad_norm": 1.501863389895933, "learning_rate": 2.2935706968898048e-07, "loss": 0.588, "step": 10571 }, { "epoch": 12.940024479804162, "grad_norm": 2.405042506990251, "learning_rate": 2.2908899227739367e-07, "loss": 0.4228, "step": 10572 }, { "epoch": 12.94124847001224, "grad_norm": 1.8304503699631078, "learning_rate": 2.288210641016786e-07, "loss": 1.3572, "step": 10573 }, { "epoch": 12.94247246022032, "grad_norm": 2.6294889979386293, "learning_rate": 2.2855328517944281e-07, "loss": 0.5428, "step": 10574 }, { "epoch": 12.943696450428396, "grad_norm": 2.1076150616584144, "learning_rate": 2.2828565552828447e-07, "loss": 1.0004, "step": 10575 }, { "epoch": 12.944920440636475, "grad_norm": 1.2794457662741603, "learning_rate": 2.280181751657895e-07, "loss": 0.5519, "step": 10576 }, { "epoch": 12.946144430844553, "grad_norm": 1.2207351712393135, "learning_rate": 2.2775084410953634e-07, "loss": 0.5071, "step": 10577 }, { "epoch": 12.947368421052632, "grad_norm": 2.746312882354513, "learning_rate": 2.2748366237709374e-07, "loss": 0.3975, "step": 10578 }, { "epoch": 12.94859241126071, "grad_norm": 2.2780037925545353, "learning_rate": 2.272166299860193e-07, "loss": 0.3922, "step": 10579 }, { "epoch": 12.949816401468787, "grad_norm": 2.3774665124851797, "learning_rate": 2.2694974695386147e-07, "loss": 0.6335, "step": 10580 }, { "epoch": 12.951040391676866, "grad_norm": 3.0664511622546406, "learning_rate": 2.26683013298159e-07, "loss": 0.4052, "step": 10581 }, { "epoch": 12.952264381884945, "grad_norm": 1.9841513899980527, "learning_rate": 2.264164290364404e-07, "loss": 0.3335, "step": 10582 }, { "epoch": 12.953488372093023, "grad_norm": 1.0471763052563037, "learning_rate": 2.261499941862247e-07, "loss": 0.5822, "step": 10583 }, { "epoch": 12.954712362301102, "grad_norm": 1.7670892769380695, "learning_rate": 2.2588370876502093e-07, "loss": 1.5136, "step": 10584 }, { "epoch": 12.95593635250918, "grad_norm": 1.5094888895905199, "learning_rate": 2.2561757279032842e-07, "loss": 1.3154, "step": 10585 }, { "epoch": 12.957160342717259, "grad_norm": 1.9178632616758706, "learning_rate": 2.2535158627963684e-07, "loss": 0.6065, "step": 10586 }, { "epoch": 12.958384332925336, "grad_norm": 1.8505792774125136, "learning_rate": 2.2508574925042547e-07, "loss": 1.4512, "step": 10587 }, { "epoch": 12.959608323133414, "grad_norm": 2.6822541849682167, "learning_rate": 2.248200617201643e-07, "loss": 0.4681, "step": 10588 }, { "epoch": 12.960832313341493, "grad_norm": 1.5100010878320198, "learning_rate": 2.245545237063135e-07, "loss": 0.4991, "step": 10589 }, { "epoch": 12.962056303549572, "grad_norm": 2.0420344308446134, "learning_rate": 2.2428913522632267e-07, "loss": 0.4883, "step": 10590 }, { "epoch": 12.96328029375765, "grad_norm": 1.7366508444890774, "learning_rate": 2.2402389629763294e-07, "loss": 0.6118, "step": 10591 }, { "epoch": 12.964504283965729, "grad_norm": 1.7134895484351578, "learning_rate": 2.237588069376745e-07, "loss": 1.2262, "step": 10592 }, { "epoch": 12.965728274173806, "grad_norm": 1.993709513663499, "learning_rate": 2.2349386716386868e-07, "loss": 1.1028, "step": 10593 }, { "epoch": 12.966952264381884, "grad_norm": 2.0945507311413802, "learning_rate": 2.2322907699362463e-07, "loss": 0.591, "step": 10594 }, { "epoch": 12.968176254589963, "grad_norm": 1.8921381859587465, "learning_rate": 2.2296443644434507e-07, "loss": 1.08, "step": 10595 }, { "epoch": 12.969400244798042, "grad_norm": 1.676071742013873, "learning_rate": 2.2269994553342028e-07, "loss": 1.6322, "step": 10596 }, { "epoch": 12.97062423500612, "grad_norm": 1.0963230615092272, "learning_rate": 2.2243560427823218e-07, "loss": 0.5443, "step": 10597 }, { "epoch": 12.971848225214199, "grad_norm": 1.9276678626298318, "learning_rate": 2.221714126961519e-07, "loss": 0.5207, "step": 10598 }, { "epoch": 12.973072215422278, "grad_norm": 2.167614156880599, "learning_rate": 2.2190737080454132e-07, "loss": 0.4765, "step": 10599 }, { "epoch": 12.974296205630354, "grad_norm": 1.4200751684629938, "learning_rate": 2.216434786207522e-07, "loss": 0.7358, "step": 10600 }, { "epoch": 12.975520195838433, "grad_norm": 1.1553015806363534, "learning_rate": 2.2137973616212672e-07, "loss": 0.5942, "step": 10601 }, { "epoch": 12.976744186046512, "grad_norm": 1.8695272632706061, "learning_rate": 2.2111614344599686e-07, "loss": 0.4947, "step": 10602 }, { "epoch": 12.97796817625459, "grad_norm": 2.1314823184365963, "learning_rate": 2.208527004896846e-07, "loss": 0.4023, "step": 10603 }, { "epoch": 12.979192166462669, "grad_norm": 2.1027981129461506, "learning_rate": 2.2058940731050387e-07, "loss": 0.6523, "step": 10604 }, { "epoch": 12.980416156670747, "grad_norm": 2.274066895915053, "learning_rate": 2.2032626392575557e-07, "loss": 0.5332, "step": 10605 }, { "epoch": 12.981640146878824, "grad_norm": 1.3017128160308493, "learning_rate": 2.200632703527336e-07, "loss": 0.3838, "step": 10606 }, { "epoch": 12.982864137086903, "grad_norm": 1.874907537541565, "learning_rate": 2.1980042660872002e-07, "loss": 0.9792, "step": 10607 }, { "epoch": 12.984088127294982, "grad_norm": 1.8485890232250277, "learning_rate": 2.1953773271098905e-07, "loss": 1.3976, "step": 10608 }, { "epoch": 12.98531211750306, "grad_norm": 2.5475064953115973, "learning_rate": 2.1927518867680352e-07, "loss": 1.2735, "step": 10609 }, { "epoch": 12.986536107711139, "grad_norm": 2.442569954295866, "learning_rate": 2.1901279452341662e-07, "loss": 0.5861, "step": 10610 }, { "epoch": 12.987760097919217, "grad_norm": 2.204485358159137, "learning_rate": 2.1875055026807257e-07, "loss": 0.4152, "step": 10611 }, { "epoch": 12.988984088127294, "grad_norm": 1.4890199926063017, "learning_rate": 2.184884559280037e-07, "loss": 0.6159, "step": 10612 }, { "epoch": 12.990208078335373, "grad_norm": 2.032254708132032, "learning_rate": 2.1822651152043516e-07, "loss": 0.9009, "step": 10613 }, { "epoch": 12.991432068543451, "grad_norm": 2.4731638053574807, "learning_rate": 2.1796471706258066e-07, "loss": 0.4411, "step": 10614 }, { "epoch": 12.99265605875153, "grad_norm": 1.5201754265090965, "learning_rate": 2.177030725716442e-07, "loss": 0.5404, "step": 10615 }, { "epoch": 12.993880048959609, "grad_norm": 1.553221801983763, "learning_rate": 2.1744157806482036e-07, "loss": 1.2546, "step": 10616 }, { "epoch": 12.995104039167687, "grad_norm": 1.9859296388373553, "learning_rate": 2.1718023355929346e-07, "loss": 0.3551, "step": 10617 }, { "epoch": 12.996328029375764, "grad_norm": 2.7241307574855544, "learning_rate": 2.1691903907223782e-07, "loss": 0.3402, "step": 10618 }, { "epoch": 12.997552019583843, "grad_norm": 1.8086843663778427, "learning_rate": 2.1665799462081833e-07, "loss": 0.4442, "step": 10619 }, { "epoch": 12.998776009791921, "grad_norm": 1.7687571371842425, "learning_rate": 2.1639710022218984e-07, "loss": 1.3947, "step": 10620 }, { "epoch": 13.0, "grad_norm": 1.471676692229264, "learning_rate": 2.1613635589349756e-07, "loss": 1.3069, "step": 10621 }, { "epoch": 13.001223990208079, "grad_norm": 1.4078831021812097, "learning_rate": 2.1587576165187724e-07, "loss": 0.3618, "step": 10622 }, { "epoch": 13.002447980416157, "grad_norm": 1.8268492227718474, "learning_rate": 2.1561531751445293e-07, "loss": 0.5731, "step": 10623 }, { "epoch": 13.003671970624236, "grad_norm": 1.6611127812544357, "learning_rate": 2.1535502349834065e-07, "loss": 1.1632, "step": 10624 }, { "epoch": 13.004895960832313, "grad_norm": 1.6471403203435377, "learning_rate": 2.1509487962064536e-07, "loss": 0.5938, "step": 10625 }, { "epoch": 13.006119951040391, "grad_norm": 1.9826636549166305, "learning_rate": 2.1483488589846363e-07, "loss": 0.5476, "step": 10626 }, { "epoch": 13.00734394124847, "grad_norm": 2.6413232711312062, "learning_rate": 2.1457504234888127e-07, "loss": 0.8485, "step": 10627 }, { "epoch": 13.008567931456549, "grad_norm": 2.9594860210660223, "learning_rate": 2.1431534898897406e-07, "loss": 0.5231, "step": 10628 }, { "epoch": 13.009791921664627, "grad_norm": 1.5818665449303817, "learning_rate": 2.140558058358072e-07, "loss": 0.4719, "step": 10629 }, { "epoch": 13.011015911872706, "grad_norm": 2.5109699715423726, "learning_rate": 2.137964129064382e-07, "loss": 0.5504, "step": 10630 }, { "epoch": 13.012239902080783, "grad_norm": 1.035109928704139, "learning_rate": 2.1353717021791288e-07, "loss": 0.4908, "step": 10631 }, { "epoch": 13.013463892288861, "grad_norm": 2.04294917593886, "learning_rate": 2.1327807778726755e-07, "loss": 0.5071, "step": 10632 }, { "epoch": 13.01468788249694, "grad_norm": 1.8183773564970662, "learning_rate": 2.1301913563152892e-07, "loss": 0.518, "step": 10633 }, { "epoch": 13.015911872705018, "grad_norm": 0.757797128181512, "learning_rate": 2.1276034376771393e-07, "loss": 0.2549, "step": 10634 }, { "epoch": 13.017135862913097, "grad_norm": 1.8983057876243326, "learning_rate": 2.1250170221282895e-07, "loss": 1.2993, "step": 10635 }, { "epoch": 13.018359853121176, "grad_norm": 1.4904906527404769, "learning_rate": 2.1224321098387151e-07, "loss": 1.314, "step": 10636 }, { "epoch": 13.019583843329253, "grad_norm": 1.6050771633207728, "learning_rate": 2.1198487009782832e-07, "loss": 0.8163, "step": 10637 }, { "epoch": 13.020807833537331, "grad_norm": 2.4613570334324684, "learning_rate": 2.117266795716763e-07, "loss": 0.4422, "step": 10638 }, { "epoch": 13.02203182374541, "grad_norm": 1.6004702677801592, "learning_rate": 2.1146863942238332e-07, "loss": 0.7055, "step": 10639 }, { "epoch": 13.023255813953488, "grad_norm": 1.980127400724189, "learning_rate": 2.1121074966690746e-07, "loss": 1.1804, "step": 10640 }, { "epoch": 13.024479804161567, "grad_norm": 2.829994268379481, "learning_rate": 2.109530103221949e-07, "loss": 0.5721, "step": 10641 }, { "epoch": 13.025703794369646, "grad_norm": 3.915845584897813, "learning_rate": 2.1069542140518346e-07, "loss": 0.3685, "step": 10642 }, { "epoch": 13.026927784577722, "grad_norm": 2.0273138738762957, "learning_rate": 2.1043798293280182e-07, "loss": 0.3579, "step": 10643 }, { "epoch": 13.028151774785801, "grad_norm": 1.431602169000849, "learning_rate": 2.101806949219673e-07, "loss": 0.7718, "step": 10644 }, { "epoch": 13.02937576499388, "grad_norm": 1.544755847289692, "learning_rate": 2.099235573895883e-07, "loss": 0.781, "step": 10645 }, { "epoch": 13.030599755201958, "grad_norm": 1.406490975701887, "learning_rate": 2.0966657035256298e-07, "loss": 0.4457, "step": 10646 }, { "epoch": 13.031823745410037, "grad_norm": 3.6200259111861626, "learning_rate": 2.0940973382777868e-07, "loss": 0.7537, "step": 10647 }, { "epoch": 13.033047735618116, "grad_norm": 1.1510963267752847, "learning_rate": 2.0915304783211437e-07, "loss": 0.5913, "step": 10648 }, { "epoch": 13.034271725826194, "grad_norm": 1.994459182504412, "learning_rate": 2.088965123824388e-07, "loss": 0.7015, "step": 10649 }, { "epoch": 13.035495716034271, "grad_norm": 1.4067703540831262, "learning_rate": 2.0864012749561013e-07, "loss": 0.6746, "step": 10650 }, { "epoch": 13.03671970624235, "grad_norm": 2.7371415347713195, "learning_rate": 2.0838389318847712e-07, "loss": 0.4642, "step": 10651 }, { "epoch": 13.037943696450428, "grad_norm": 2.439559826634428, "learning_rate": 2.081278094778788e-07, "loss": 0.5814, "step": 10652 }, { "epoch": 13.039167686658507, "grad_norm": 2.174299768494385, "learning_rate": 2.0787187638064337e-07, "loss": 0.3828, "step": 10653 }, { "epoch": 13.040391676866586, "grad_norm": 2.058014208605519, "learning_rate": 2.0761609391359043e-07, "loss": 0.7271, "step": 10654 }, { "epoch": 13.041615667074664, "grad_norm": 2.528971527663554, "learning_rate": 2.073604620935285e-07, "loss": 1.0081, "step": 10655 }, { "epoch": 13.042839657282741, "grad_norm": 1.3965508007436056, "learning_rate": 2.0710498093725745e-07, "loss": 0.5518, "step": 10656 }, { "epoch": 13.04406364749082, "grad_norm": 1.3189881224705073, "learning_rate": 2.0684965046156692e-07, "loss": 0.4454, "step": 10657 }, { "epoch": 13.045287637698898, "grad_norm": 1.969666254082318, "learning_rate": 2.0659447068323484e-07, "loss": 0.5956, "step": 10658 }, { "epoch": 13.046511627906977, "grad_norm": 1.3139505615526006, "learning_rate": 2.0633944161903147e-07, "loss": 0.5864, "step": 10659 }, { "epoch": 13.047735618115055, "grad_norm": 1.932891456951174, "learning_rate": 2.0608456328571585e-07, "loss": 1.482, "step": 10660 }, { "epoch": 13.048959608323134, "grad_norm": 1.5238700370529894, "learning_rate": 2.058298357000388e-07, "loss": 0.7894, "step": 10661 }, { "epoch": 13.050183598531211, "grad_norm": 1.906783963370472, "learning_rate": 2.0557525887873908e-07, "loss": 0.9194, "step": 10662 }, { "epoch": 13.05140758873929, "grad_norm": 1.0927219116472158, "learning_rate": 2.05320832838547e-07, "loss": 0.5337, "step": 10663 }, { "epoch": 13.052631578947368, "grad_norm": 1.6666857023512764, "learning_rate": 2.0506655759618245e-07, "loss": 0.5472, "step": 10664 }, { "epoch": 13.053855569155447, "grad_norm": 1.6583089346370568, "learning_rate": 2.0481243316835543e-07, "loss": 0.5179, "step": 10665 }, { "epoch": 13.055079559363525, "grad_norm": 2.9882163081165465, "learning_rate": 2.0455845957176563e-07, "loss": 0.3547, "step": 10666 }, { "epoch": 13.056303549571604, "grad_norm": 1.3776549262178153, "learning_rate": 2.0430463682310387e-07, "loss": 0.521, "step": 10667 }, { "epoch": 13.057527539779683, "grad_norm": 2.9589987933776456, "learning_rate": 2.0405096493904985e-07, "loss": 0.3443, "step": 10668 }, { "epoch": 13.05875152998776, "grad_norm": 2.437518545860974, "learning_rate": 2.03797443936275e-07, "loss": 0.4918, "step": 10669 }, { "epoch": 13.059975520195838, "grad_norm": 2.086566076103048, "learning_rate": 2.035440738314387e-07, "loss": 0.4939, "step": 10670 }, { "epoch": 13.061199510403917, "grad_norm": 2.678157025969947, "learning_rate": 2.0329085464119157e-07, "loss": 0.3955, "step": 10671 }, { "epoch": 13.062423500611995, "grad_norm": 1.9500250405987996, "learning_rate": 2.0303778638217503e-07, "loss": 0.678, "step": 10672 }, { "epoch": 13.063647490820074, "grad_norm": 3.8256931989743523, "learning_rate": 2.0278486907101852e-07, "loss": 0.4101, "step": 10673 }, { "epoch": 13.064871481028153, "grad_norm": 2.7202564353012986, "learning_rate": 2.0253210272434404e-07, "loss": 0.2703, "step": 10674 }, { "epoch": 13.06609547123623, "grad_norm": 3.656628430490908, "learning_rate": 2.0227948735876274e-07, "loss": 0.4169, "step": 10675 }, { "epoch": 13.067319461444308, "grad_norm": 2.327646762265486, "learning_rate": 2.020270229908741e-07, "loss": 0.5158, "step": 10676 }, { "epoch": 13.068543451652387, "grad_norm": 2.0065586994834166, "learning_rate": 2.0177470963726963e-07, "loss": 0.6194, "step": 10677 }, { "epoch": 13.069767441860465, "grad_norm": 1.6390633286319256, "learning_rate": 2.0152254731453102e-07, "loss": 0.4689, "step": 10678 }, { "epoch": 13.070991432068544, "grad_norm": 1.1674926757836828, "learning_rate": 2.0127053603922918e-07, "loss": 0.5045, "step": 10679 }, { "epoch": 13.072215422276622, "grad_norm": 2.6689240535714887, "learning_rate": 2.0101867582792533e-07, "loss": 0.3252, "step": 10680 }, { "epoch": 13.0734394124847, "grad_norm": 2.919849596127553, "learning_rate": 2.0076696669717066e-07, "loss": 0.3476, "step": 10681 }, { "epoch": 13.074663402692778, "grad_norm": 1.8761902479416768, "learning_rate": 2.0051540866350694e-07, "loss": 1.3393, "step": 10682 }, { "epoch": 13.075887392900857, "grad_norm": 2.167567530735371, "learning_rate": 2.0026400174346512e-07, "loss": 1.1134, "step": 10683 }, { "epoch": 13.077111383108935, "grad_norm": 2.0716018172033723, "learning_rate": 2.0001274595356696e-07, "loss": 0.3847, "step": 10684 }, { "epoch": 13.078335373317014, "grad_norm": 1.6661860952843675, "learning_rate": 1.9976164131032427e-07, "loss": 0.2911, "step": 10685 }, { "epoch": 13.079559363525092, "grad_norm": 1.5715609645077604, "learning_rate": 1.995106878302383e-07, "loss": 0.8599, "step": 10686 }, { "epoch": 13.080783353733171, "grad_norm": 1.1561336932536757, "learning_rate": 1.9925988552980114e-07, "loss": 0.6363, "step": 10687 }, { "epoch": 13.082007343941248, "grad_norm": 2.347464430123916, "learning_rate": 1.990092344254943e-07, "loss": 0.5403, "step": 10688 }, { "epoch": 13.083231334149326, "grad_norm": 1.194724387618907, "learning_rate": 1.9875873453378992e-07, "loss": 0.7416, "step": 10689 }, { "epoch": 13.084455324357405, "grad_norm": 2.9282037947895962, "learning_rate": 1.9850838587114923e-07, "loss": 0.3467, "step": 10690 }, { "epoch": 13.085679314565484, "grad_norm": 2.6825106352891033, "learning_rate": 1.982581884540255e-07, "loss": 0.3823, "step": 10691 }, { "epoch": 13.086903304773562, "grad_norm": 1.331154935044085, "learning_rate": 1.9800814229885974e-07, "loss": 0.5983, "step": 10692 }, { "epoch": 13.088127294981641, "grad_norm": 1.1690900339191943, "learning_rate": 1.9775824742208516e-07, "loss": 1.1941, "step": 10693 }, { "epoch": 13.089351285189718, "grad_norm": 2.2354395951051513, "learning_rate": 1.97508503840122e-07, "loss": 0.5278, "step": 10694 }, { "epoch": 13.090575275397796, "grad_norm": 1.8851408272278223, "learning_rate": 1.972589115693843e-07, "loss": 1.8735, "step": 10695 }, { "epoch": 13.091799265605875, "grad_norm": 2.016810099898951, "learning_rate": 1.9700947062627345e-07, "loss": 1.0416, "step": 10696 }, { "epoch": 13.093023255813954, "grad_norm": 2.244564978362661, "learning_rate": 1.9676018102718215e-07, "loss": 0.4781, "step": 10697 }, { "epoch": 13.094247246022032, "grad_norm": 2.4739186648045077, "learning_rate": 1.965110427884928e-07, "loss": 0.3572, "step": 10698 }, { "epoch": 13.095471236230111, "grad_norm": 3.6583261804831473, "learning_rate": 1.9626205592657765e-07, "loss": 0.3376, "step": 10699 }, { "epoch": 13.096695226438188, "grad_norm": 2.2021991068962166, "learning_rate": 1.9601322045779913e-07, "loss": 0.954, "step": 10700 }, { "epoch": 13.097919216646266, "grad_norm": 2.176663228708014, "learning_rate": 1.9576453639850972e-07, "loss": 1.1873, "step": 10701 }, { "epoch": 13.099143206854345, "grad_norm": 1.8008079671162243, "learning_rate": 1.9551600376505247e-07, "loss": 0.8196, "step": 10702 }, { "epoch": 13.100367197062424, "grad_norm": 1.8310770447131426, "learning_rate": 1.9526762257375932e-07, "loss": 0.6233, "step": 10703 }, { "epoch": 13.101591187270502, "grad_norm": 1.367096349125418, "learning_rate": 1.950193928409541e-07, "loss": 0.572, "step": 10704 }, { "epoch": 13.10281517747858, "grad_norm": 1.7092612781039433, "learning_rate": 1.9477131458294858e-07, "loss": 0.4943, "step": 10705 }, { "epoch": 13.104039167686658, "grad_norm": 1.5422920833297205, "learning_rate": 1.9452338781604573e-07, "loss": 0.6142, "step": 10706 }, { "epoch": 13.105263157894736, "grad_norm": 1.635020772035553, "learning_rate": 1.9427561255653816e-07, "loss": 0.5651, "step": 10707 }, { "epoch": 13.106487148102815, "grad_norm": 2.192782832537619, "learning_rate": 1.9402798882070944e-07, "loss": 0.304, "step": 10708 }, { "epoch": 13.107711138310894, "grad_norm": 2.952130952364076, "learning_rate": 1.9378051662483188e-07, "loss": 0.4929, "step": 10709 }, { "epoch": 13.108935128518972, "grad_norm": 3.01593307612884, "learning_rate": 1.9353319598516884e-07, "loss": 0.4511, "step": 10710 }, { "epoch": 13.11015911872705, "grad_norm": 1.416224838425651, "learning_rate": 1.9328602691797372e-07, "loss": 0.6734, "step": 10711 }, { "epoch": 13.11138310893513, "grad_norm": 1.4025118658974476, "learning_rate": 1.9303900943948767e-07, "loss": 0.5961, "step": 10712 }, { "epoch": 13.112607099143206, "grad_norm": 1.8666928361208635, "learning_rate": 1.9279214356594576e-07, "loss": 0.4612, "step": 10713 }, { "epoch": 13.113831089351285, "grad_norm": 1.5894334355936408, "learning_rate": 1.9254542931357062e-07, "loss": 0.4217, "step": 10714 }, { "epoch": 13.115055079559363, "grad_norm": 1.1630234833268647, "learning_rate": 1.922988666985748e-07, "loss": 0.3654, "step": 10715 }, { "epoch": 13.116279069767442, "grad_norm": 3.1195658146939125, "learning_rate": 1.9205245573716196e-07, "loss": 0.5194, "step": 10716 }, { "epoch": 13.11750305997552, "grad_norm": 2.098411736307625, "learning_rate": 1.9180619644552534e-07, "loss": 0.7683, "step": 10717 }, { "epoch": 13.1187270501836, "grad_norm": 1.3336682117626448, "learning_rate": 1.9156008883984805e-07, "loss": 0.3989, "step": 10718 }, { "epoch": 13.119951040391676, "grad_norm": 1.3091345357772137, "learning_rate": 1.913141329363033e-07, "loss": 0.4244, "step": 10719 }, { "epoch": 13.121175030599755, "grad_norm": 1.758357839776261, "learning_rate": 1.9106832875105398e-07, "loss": 0.5672, "step": 10720 }, { "epoch": 13.122399020807833, "grad_norm": 2.782687790887837, "learning_rate": 1.9082267630025463e-07, "loss": 0.7821, "step": 10721 }, { "epoch": 13.123623011015912, "grad_norm": 1.8906565899797316, "learning_rate": 1.9057717560004822e-07, "loss": 1.0789, "step": 10722 }, { "epoch": 13.12484700122399, "grad_norm": 1.4704545480534001, "learning_rate": 1.903318266665674e-07, "loss": 1.3067, "step": 10723 }, { "epoch": 13.12607099143207, "grad_norm": 2.152609883794192, "learning_rate": 1.9008662951593622e-07, "loss": 0.3779, "step": 10724 }, { "epoch": 13.127294981640146, "grad_norm": 1.907154629870227, "learning_rate": 1.8984158416426728e-07, "loss": 0.5248, "step": 10725 }, { "epoch": 13.128518971848225, "grad_norm": 1.7064674444064973, "learning_rate": 1.8959669062766556e-07, "loss": 0.4536, "step": 10726 }, { "epoch": 13.129742962056303, "grad_norm": 1.4989115303642548, "learning_rate": 1.893519489222234e-07, "loss": 0.6241, "step": 10727 }, { "epoch": 13.130966952264382, "grad_norm": 2.6946255187613417, "learning_rate": 1.8910735906402495e-07, "loss": 0.4662, "step": 10728 }, { "epoch": 13.13219094247246, "grad_norm": 1.7071063339189552, "learning_rate": 1.888629210691431e-07, "loss": 0.5546, "step": 10729 }, { "epoch": 13.13341493268054, "grad_norm": 1.0919970074102305, "learning_rate": 1.8861863495364202e-07, "loss": 0.5875, "step": 10730 }, { "epoch": 13.134638922888616, "grad_norm": 1.4362684303710538, "learning_rate": 1.8837450073357493e-07, "loss": 0.355, "step": 10731 }, { "epoch": 13.135862913096695, "grad_norm": 1.789025549299541, "learning_rate": 1.881305184249857e-07, "loss": 0.5243, "step": 10732 }, { "epoch": 13.137086903304773, "grad_norm": 2.1033087793088496, "learning_rate": 1.878866880439076e-07, "loss": 0.5227, "step": 10733 }, { "epoch": 13.138310893512852, "grad_norm": 1.5667056071262764, "learning_rate": 1.8764300960636446e-07, "loss": 1.1114, "step": 10734 }, { "epoch": 13.13953488372093, "grad_norm": 1.7347827005351377, "learning_rate": 1.8739948312837015e-07, "loss": 0.8454, "step": 10735 }, { "epoch": 13.140758873929009, "grad_norm": 1.7687830207123703, "learning_rate": 1.8715610862592797e-07, "loss": 1.5121, "step": 10736 }, { "epoch": 13.141982864137088, "grad_norm": 2.3982472282136484, "learning_rate": 1.8691288611503156e-07, "loss": 1.0244, "step": 10737 }, { "epoch": 13.143206854345165, "grad_norm": 1.5536916520950197, "learning_rate": 1.8666981561166447e-07, "loss": 1.0673, "step": 10738 }, { "epoch": 13.144430844553243, "grad_norm": 1.7798937542116158, "learning_rate": 1.864268971318012e-07, "loss": 1.2262, "step": 10739 }, { "epoch": 13.145654834761322, "grad_norm": 1.821191039712932, "learning_rate": 1.86184130691405e-07, "loss": 0.6496, "step": 10740 }, { "epoch": 13.1468788249694, "grad_norm": 2.5945754075618983, "learning_rate": 1.859415163064293e-07, "loss": 0.4008, "step": 10741 }, { "epoch": 13.148102815177479, "grad_norm": 2.8043650791734036, "learning_rate": 1.8569905399281773e-07, "loss": 0.5057, "step": 10742 }, { "epoch": 13.149326805385558, "grad_norm": 3.086942011029647, "learning_rate": 1.8545674376650474e-07, "loss": 0.2036, "step": 10743 }, { "epoch": 13.150550795593634, "grad_norm": 1.1782561549721169, "learning_rate": 1.8521458564341344e-07, "loss": 0.8444, "step": 10744 }, { "epoch": 13.151774785801713, "grad_norm": 1.9158324714069161, "learning_rate": 1.8497257963945803e-07, "loss": 0.4554, "step": 10745 }, { "epoch": 13.152998776009792, "grad_norm": 2.7168552012879483, "learning_rate": 1.847307257705419e-07, "loss": 0.9041, "step": 10746 }, { "epoch": 13.15422276621787, "grad_norm": 1.4901572475435245, "learning_rate": 1.8448902405255875e-07, "loss": 0.5262, "step": 10747 }, { "epoch": 13.155446756425949, "grad_norm": 1.6993474226866663, "learning_rate": 1.8424747450139252e-07, "loss": 0.4843, "step": 10748 }, { "epoch": 13.156670746634028, "grad_norm": 2.730684559942783, "learning_rate": 1.8400607713291691e-07, "loss": 0.7161, "step": 10749 }, { "epoch": 13.157894736842104, "grad_norm": 1.4922369982380612, "learning_rate": 1.837648319629956e-07, "loss": 0.6617, "step": 10750 }, { "epoch": 13.159118727050183, "grad_norm": 2.6154939610887364, "learning_rate": 1.835237390074826e-07, "loss": 0.809, "step": 10751 }, { "epoch": 13.160342717258262, "grad_norm": 2.253508069302998, "learning_rate": 1.8328279828222134e-07, "loss": 0.8147, "step": 10752 }, { "epoch": 13.16156670746634, "grad_norm": 1.5155161123409944, "learning_rate": 1.8304200980304553e-07, "loss": 1.4059, "step": 10753 }, { "epoch": 13.162790697674419, "grad_norm": 2.3261801653531498, "learning_rate": 1.8280137358577892e-07, "loss": 0.477, "step": 10754 }, { "epoch": 13.164014687882498, "grad_norm": 2.3070712285817128, "learning_rate": 1.8256088964623498e-07, "loss": 0.5965, "step": 10755 }, { "epoch": 13.165238678090576, "grad_norm": 1.4924585403227348, "learning_rate": 1.8232055800021826e-07, "loss": 0.62, "step": 10756 }, { "epoch": 13.166462668298653, "grad_norm": 1.9381792718964943, "learning_rate": 1.820803786635217e-07, "loss": 0.3836, "step": 10757 }, { "epoch": 13.167686658506732, "grad_norm": 1.9085052725699083, "learning_rate": 1.8184035165192986e-07, "loss": 0.7244, "step": 10758 }, { "epoch": 13.16891064871481, "grad_norm": 1.93959065510096, "learning_rate": 1.8160047698121519e-07, "loss": 0.4664, "step": 10759 }, { "epoch": 13.170134638922889, "grad_norm": 2.7749625302833376, "learning_rate": 1.8136075466714225e-07, "loss": 0.8874, "step": 10760 }, { "epoch": 13.171358629130967, "grad_norm": 2.83586223226373, "learning_rate": 1.811211847254643e-07, "loss": 0.3517, "step": 10761 }, { "epoch": 13.172582619339046, "grad_norm": 1.847277119047382, "learning_rate": 1.808817671719254e-07, "loss": 0.521, "step": 10762 }, { "epoch": 13.173806609547123, "grad_norm": 2.3333794732089617, "learning_rate": 1.8064250202225881e-07, "loss": 0.4082, "step": 10763 }, { "epoch": 13.175030599755202, "grad_norm": 1.8104690864242152, "learning_rate": 1.8040338929218832e-07, "loss": 0.3073, "step": 10764 }, { "epoch": 13.17625458996328, "grad_norm": 2.667455516796761, "learning_rate": 1.8016442899742747e-07, "loss": 0.6668, "step": 10765 }, { "epoch": 13.177478580171359, "grad_norm": 2.0485787941196, "learning_rate": 1.7992562115367983e-07, "loss": 1.0291, "step": 10766 }, { "epoch": 13.178702570379437, "grad_norm": 1.5937292172026674, "learning_rate": 1.7968696577663923e-07, "loss": 0.3183, "step": 10767 }, { "epoch": 13.179926560587516, "grad_norm": 2.0497641717052435, "learning_rate": 1.7944846288198842e-07, "loss": 0.5106, "step": 10768 }, { "epoch": 13.181150550795593, "grad_norm": 0.9812707088664636, "learning_rate": 1.7921011248540233e-07, "loss": 0.3874, "step": 10769 }, { "epoch": 13.182374541003671, "grad_norm": 2.1040564946447304, "learning_rate": 1.7897191460254316e-07, "loss": 0.4, "step": 10770 }, { "epoch": 13.18359853121175, "grad_norm": 1.7700125707148011, "learning_rate": 1.7873386924906476e-07, "loss": 0.743, "step": 10771 }, { "epoch": 13.184822521419829, "grad_norm": 2.654962935092691, "learning_rate": 1.784959764406105e-07, "loss": 0.4575, "step": 10772 }, { "epoch": 13.186046511627907, "grad_norm": 0.9687164084364793, "learning_rate": 1.7825823619281452e-07, "loss": 0.4373, "step": 10773 }, { "epoch": 13.187270501835986, "grad_norm": 1.1084454954272414, "learning_rate": 1.7802064852129957e-07, "loss": 0.504, "step": 10774 }, { "epoch": 13.188494492044065, "grad_norm": 2.076194384364367, "learning_rate": 1.777832134416796e-07, "loss": 0.5209, "step": 10775 }, { "epoch": 13.189718482252141, "grad_norm": 1.1795323473334225, "learning_rate": 1.775459309695571e-07, "loss": 0.5551, "step": 10776 }, { "epoch": 13.19094247246022, "grad_norm": 1.5935701728585197, "learning_rate": 1.7730880112052568e-07, "loss": 0.6065, "step": 10777 }, { "epoch": 13.192166462668299, "grad_norm": 1.6408053564338947, "learning_rate": 1.7707182391016902e-07, "loss": 1.2883, "step": 10778 }, { "epoch": 13.193390452876377, "grad_norm": 2.414702596140684, "learning_rate": 1.7683499935406023e-07, "loss": 0.8985, "step": 10779 }, { "epoch": 13.194614443084456, "grad_norm": 2.1913066763070503, "learning_rate": 1.765983274677624e-07, "loss": 0.8288, "step": 10780 }, { "epoch": 13.195838433292534, "grad_norm": 1.432272376149489, "learning_rate": 1.7636180826682892e-07, "loss": 0.5403, "step": 10781 }, { "epoch": 13.197062423500611, "grad_norm": 2.6400438930332313, "learning_rate": 1.761254417668029e-07, "loss": 0.6135, "step": 10782 }, { "epoch": 13.19828641370869, "grad_norm": 1.610207887302078, "learning_rate": 1.7588922798321723e-07, "loss": 0.5626, "step": 10783 }, { "epoch": 13.199510403916769, "grad_norm": 1.2489417890792995, "learning_rate": 1.7565316693159528e-07, "loss": 0.5371, "step": 10784 }, { "epoch": 13.200734394124847, "grad_norm": 1.5402933914798187, "learning_rate": 1.7541725862745024e-07, "loss": 0.4498, "step": 10785 }, { "epoch": 13.201958384332926, "grad_norm": 1.7506840936823174, "learning_rate": 1.751815030862844e-07, "loss": 0.6652, "step": 10786 }, { "epoch": 13.203182374541004, "grad_norm": 2.901722710543732, "learning_rate": 1.7494590032359204e-07, "loss": 0.437, "step": 10787 }, { "epoch": 13.204406364749081, "grad_norm": 2.080655535296931, "learning_rate": 1.7471045035485495e-07, "loss": 0.7562, "step": 10788 }, { "epoch": 13.20563035495716, "grad_norm": 2.3333946452347702, "learning_rate": 1.7447515319554632e-07, "loss": 0.6658, "step": 10789 }, { "epoch": 13.206854345165238, "grad_norm": 2.044234534582507, "learning_rate": 1.7424000886112875e-07, "loss": 0.6179, "step": 10790 }, { "epoch": 13.208078335373317, "grad_norm": 1.5621450202085896, "learning_rate": 1.74005017367056e-07, "loss": 0.8664, "step": 10791 }, { "epoch": 13.209302325581396, "grad_norm": 1.910650533699908, "learning_rate": 1.737701787287699e-07, "loss": 0.433, "step": 10792 }, { "epoch": 13.210526315789474, "grad_norm": 2.9039544718234946, "learning_rate": 1.735354929617042e-07, "loss": 0.4397, "step": 10793 }, { "epoch": 13.211750305997551, "grad_norm": 2.4771014994006273, "learning_rate": 1.733009600812799e-07, "loss": 0.391, "step": 10794 }, { "epoch": 13.21297429620563, "grad_norm": 2.086556318852155, "learning_rate": 1.7306658010291134e-07, "loss": 0.6273, "step": 10795 }, { "epoch": 13.214198286413708, "grad_norm": 2.1694867274762886, "learning_rate": 1.7283235304200008e-07, "loss": 1.0624, "step": 10796 }, { "epoch": 13.215422276621787, "grad_norm": 1.1495953584462653, "learning_rate": 1.725982789139391e-07, "loss": 0.575, "step": 10797 }, { "epoch": 13.216646266829866, "grad_norm": 1.5811994439121682, "learning_rate": 1.7236435773411082e-07, "loss": 1.0772, "step": 10798 }, { "epoch": 13.217870257037944, "grad_norm": 3.0894868780916296, "learning_rate": 1.7213058951788763e-07, "loss": 0.335, "step": 10799 }, { "epoch": 13.219094247246023, "grad_norm": 2.5962865360129244, "learning_rate": 1.71896974280632e-07, "loss": 0.5091, "step": 10800 }, { "epoch": 13.2203182374541, "grad_norm": 1.2079963450320874, "learning_rate": 1.7166351203769582e-07, "loss": 0.532, "step": 10801 }, { "epoch": 13.221542227662178, "grad_norm": 2.369377977850026, "learning_rate": 1.7143020280442207e-07, "loss": 0.4555, "step": 10802 }, { "epoch": 13.222766217870257, "grad_norm": 1.6343277011988764, "learning_rate": 1.711970465961421e-07, "loss": 0.5068, "step": 10803 }, { "epoch": 13.223990208078336, "grad_norm": 2.262014734534502, "learning_rate": 1.7096404342817925e-07, "loss": 0.5048, "step": 10804 }, { "epoch": 13.225214198286414, "grad_norm": 1.4766126463559532, "learning_rate": 1.7073119331584454e-07, "loss": 0.7934, "step": 10805 }, { "epoch": 13.226438188494493, "grad_norm": 1.6919889888114774, "learning_rate": 1.7049849627444076e-07, "loss": 1.0084, "step": 10806 }, { "epoch": 13.22766217870257, "grad_norm": 2.1710621221441975, "learning_rate": 1.7026595231925874e-07, "loss": 0.8056, "step": 10807 }, { "epoch": 13.228886168910648, "grad_norm": 1.7864428063764535, "learning_rate": 1.7003356146558208e-07, "loss": 0.4257, "step": 10808 }, { "epoch": 13.230110159118727, "grad_norm": 2.860779390789012, "learning_rate": 1.6980132372868135e-07, "loss": 0.8193, "step": 10809 }, { "epoch": 13.231334149326806, "grad_norm": 1.1441442959846486, "learning_rate": 1.6956923912381906e-07, "loss": 0.3838, "step": 10810 }, { "epoch": 13.232558139534884, "grad_norm": 1.8751782267202073, "learning_rate": 1.6933730766624716e-07, "loss": 0.9056, "step": 10811 }, { "epoch": 13.233782129742963, "grad_norm": 1.2521568035969637, "learning_rate": 1.69105529371206e-07, "loss": 0.8133, "step": 10812 }, { "epoch": 13.23500611995104, "grad_norm": 1.8123350563859886, "learning_rate": 1.6887390425392862e-07, "loss": 1.1545, "step": 10813 }, { "epoch": 13.236230110159118, "grad_norm": 2.2390308022553835, "learning_rate": 1.6864243232963596e-07, "loss": 1.1239, "step": 10814 }, { "epoch": 13.237454100367197, "grad_norm": 1.4107062863668618, "learning_rate": 1.684111136135394e-07, "loss": 0.5026, "step": 10815 }, { "epoch": 13.238678090575275, "grad_norm": 1.2846502534865445, "learning_rate": 1.6817994812084043e-07, "loss": 0.745, "step": 10816 }, { "epoch": 13.239902080783354, "grad_norm": 1.2438650669410458, "learning_rate": 1.6794893586673077e-07, "loss": 0.5796, "step": 10817 }, { "epoch": 13.241126070991433, "grad_norm": 2.6572833885915275, "learning_rate": 1.6771807686639107e-07, "loss": 0.5675, "step": 10818 }, { "epoch": 13.24235006119951, "grad_norm": 1.303866672751778, "learning_rate": 1.674873711349931e-07, "loss": 0.6422, "step": 10819 }, { "epoch": 13.243574051407588, "grad_norm": 3.220991829650694, "learning_rate": 1.6725681868769716e-07, "loss": 0.503, "step": 10820 }, { "epoch": 13.244798041615667, "grad_norm": 1.2361705384488078, "learning_rate": 1.6702641953965537e-07, "loss": 0.569, "step": 10821 }, { "epoch": 13.246022031823745, "grad_norm": 1.8394625493099772, "learning_rate": 1.6679617370600866e-07, "loss": 0.9238, "step": 10822 }, { "epoch": 13.247246022031824, "grad_norm": 1.9441899957108804, "learning_rate": 1.6656608120188711e-07, "loss": 1.2074, "step": 10823 }, { "epoch": 13.248470012239903, "grad_norm": 1.484827110309995, "learning_rate": 1.663361420424117e-07, "loss": 0.7205, "step": 10824 }, { "epoch": 13.249694002447981, "grad_norm": 2.6321178192356496, "learning_rate": 1.6610635624269316e-07, "loss": 0.5139, "step": 10825 }, { "epoch": 13.250917992656058, "grad_norm": 1.16619766981921, "learning_rate": 1.65876723817833e-07, "loss": 0.5041, "step": 10826 }, { "epoch": 13.252141982864137, "grad_norm": 2.453868361025007, "learning_rate": 1.6564724478292104e-07, "loss": 0.4116, "step": 10827 }, { "epoch": 13.253365973072215, "grad_norm": 2.097264568588874, "learning_rate": 1.654179191530378e-07, "loss": 0.2873, "step": 10828 }, { "epoch": 13.254589963280294, "grad_norm": 1.926811787043996, "learning_rate": 1.651887469432542e-07, "loss": 0.5198, "step": 10829 }, { "epoch": 13.255813953488373, "grad_norm": 2.1484456148198183, "learning_rate": 1.649597281686302e-07, "loss": 0.4348, "step": 10830 }, { "epoch": 13.257037943696451, "grad_norm": 2.460527724459208, "learning_rate": 1.6473086284421598e-07, "loss": 0.4371, "step": 10831 }, { "epoch": 13.258261933904528, "grad_norm": 1.435788568962698, "learning_rate": 1.6450215098505195e-07, "loss": 0.869, "step": 10832 }, { "epoch": 13.259485924112607, "grad_norm": 1.6495361639700936, "learning_rate": 1.6427359260616836e-07, "loss": 0.9789, "step": 10833 }, { "epoch": 13.260709914320685, "grad_norm": 1.9502321829070655, "learning_rate": 1.6404518772258488e-07, "loss": 0.4494, "step": 10834 }, { "epoch": 13.261933904528764, "grad_norm": 1.5242530374589205, "learning_rate": 1.638169363493114e-07, "loss": 1.3017, "step": 10835 }, { "epoch": 13.263157894736842, "grad_norm": 1.5662903106132338, "learning_rate": 1.6358883850134815e-07, "loss": 0.54, "step": 10836 }, { "epoch": 13.264381884944921, "grad_norm": 1.9278425193351822, "learning_rate": 1.6336089419368455e-07, "loss": 0.4834, "step": 10837 }, { "epoch": 13.265605875152998, "grad_norm": 2.9952831029568916, "learning_rate": 1.6313310344130002e-07, "loss": 0.9525, "step": 10838 }, { "epoch": 13.266829865361077, "grad_norm": 1.2754614481353925, "learning_rate": 1.6290546625916475e-07, "loss": 0.5682, "step": 10839 }, { "epoch": 13.268053855569155, "grad_norm": 1.8580386242631521, "learning_rate": 1.6267798266223821e-07, "loss": 1.1827, "step": 10840 }, { "epoch": 13.269277845777234, "grad_norm": 2.3688391700801446, "learning_rate": 1.6245065266546928e-07, "loss": 0.3197, "step": 10841 }, { "epoch": 13.270501835985312, "grad_norm": 1.1490147691059225, "learning_rate": 1.6222347628379708e-07, "loss": 0.5327, "step": 10842 }, { "epoch": 13.271725826193391, "grad_norm": 1.1454077696957563, "learning_rate": 1.6199645353215164e-07, "loss": 0.5599, "step": 10843 }, { "epoch": 13.27294981640147, "grad_norm": 0.9149384642967056, "learning_rate": 1.6176958442545133e-07, "loss": 0.3486, "step": 10844 }, { "epoch": 13.274173806609546, "grad_norm": 1.7583961126641905, "learning_rate": 1.6154286897860583e-07, "loss": 0.4895, "step": 10845 }, { "epoch": 13.275397796817625, "grad_norm": 2.4980617802245404, "learning_rate": 1.6131630720651327e-07, "loss": 0.5401, "step": 10846 }, { "epoch": 13.276621787025704, "grad_norm": 2.893561610405688, "learning_rate": 1.610898991240628e-07, "loss": 0.5546, "step": 10847 }, { "epoch": 13.277845777233782, "grad_norm": 2.0954861631110866, "learning_rate": 1.608636447461334e-07, "loss": 0.4827, "step": 10848 }, { "epoch": 13.279069767441861, "grad_norm": 1.2815612048103522, "learning_rate": 1.6063754408759318e-07, "loss": 0.6803, "step": 10849 }, { "epoch": 13.28029375764994, "grad_norm": 1.7073844131635023, "learning_rate": 1.604115971633008e-07, "loss": 0.4478, "step": 10850 }, { "epoch": 13.281517747858016, "grad_norm": 2.2804247157815505, "learning_rate": 1.601858039881049e-07, "loss": 0.532, "step": 10851 }, { "epoch": 13.282741738066095, "grad_norm": 2.349756458271896, "learning_rate": 1.5996016457684365e-07, "loss": 0.522, "step": 10852 }, { "epoch": 13.283965728274174, "grad_norm": 1.9986119536360727, "learning_rate": 1.597346789443449e-07, "loss": 0.3911, "step": 10853 }, { "epoch": 13.285189718482252, "grad_norm": 2.3793216247820506, "learning_rate": 1.5950934710542738e-07, "loss": 0.9767, "step": 10854 }, { "epoch": 13.286413708690331, "grad_norm": 2.0329442389016696, "learning_rate": 1.5928416907489808e-07, "loss": 0.5407, "step": 10855 }, { "epoch": 13.28763769889841, "grad_norm": 2.0249883014943983, "learning_rate": 1.5905914486755575e-07, "loss": 0.9887, "step": 10856 }, { "epoch": 13.288861689106486, "grad_norm": 1.5420696953007693, "learning_rate": 1.58834274498188e-07, "loss": 0.8052, "step": 10857 }, { "epoch": 13.290085679314565, "grad_norm": 2.0371489354556163, "learning_rate": 1.5860955798157275e-07, "loss": 0.9525, "step": 10858 }, { "epoch": 13.291309669522644, "grad_norm": 1.9251879668124443, "learning_rate": 1.5838499533247648e-07, "loss": 1.076, "step": 10859 }, { "epoch": 13.292533659730722, "grad_norm": 1.3714859166374462, "learning_rate": 1.581605865656574e-07, "loss": 0.5293, "step": 10860 }, { "epoch": 13.2937576499388, "grad_norm": 1.8820078626693557, "learning_rate": 1.5793633169586293e-07, "loss": 0.54, "step": 10861 }, { "epoch": 13.29498164014688, "grad_norm": 2.702883351291548, "learning_rate": 1.577122307378301e-07, "loss": 1.1724, "step": 10862 }, { "epoch": 13.296205630354958, "grad_norm": 0.7918596604935282, "learning_rate": 1.5748828370628578e-07, "loss": 0.2162, "step": 10863 }, { "epoch": 13.297429620563035, "grad_norm": 1.4395185554913117, "learning_rate": 1.5726449061594706e-07, "loss": 0.6883, "step": 10864 }, { "epoch": 13.298653610771114, "grad_norm": 1.7058986957170796, "learning_rate": 1.570408514815211e-07, "loss": 0.461, "step": 10865 }, { "epoch": 13.299877600979192, "grad_norm": 2.8504793596031384, "learning_rate": 1.5681736631770416e-07, "loss": 0.4976, "step": 10866 }, { "epoch": 13.30110159118727, "grad_norm": 2.5778118136458597, "learning_rate": 1.5659403513918313e-07, "loss": 0.4112, "step": 10867 }, { "epoch": 13.30232558139535, "grad_norm": 1.6672873047533252, "learning_rate": 1.5637085796063407e-07, "loss": 1.2398, "step": 10868 }, { "epoch": 13.303549571603428, "grad_norm": 1.3844709341649555, "learning_rate": 1.5614783479672436e-07, "loss": 1.0851, "step": 10869 }, { "epoch": 13.304773561811505, "grad_norm": 2.6574773128525058, "learning_rate": 1.5592496566210953e-07, "loss": 0.661, "step": 10870 }, { "epoch": 13.305997552019583, "grad_norm": 2.5427782694388137, "learning_rate": 1.5570225057143562e-07, "loss": 0.447, "step": 10871 }, { "epoch": 13.307221542227662, "grad_norm": 2.939229664591074, "learning_rate": 1.5547968953933846e-07, "loss": 0.8152, "step": 10872 }, { "epoch": 13.30844553243574, "grad_norm": 1.1612952353732153, "learning_rate": 1.5525728258044464e-07, "loss": 0.4919, "step": 10873 }, { "epoch": 13.30966952264382, "grad_norm": 1.7358483247606669, "learning_rate": 1.5503502970936972e-07, "loss": 0.5556, "step": 10874 }, { "epoch": 13.310893512851898, "grad_norm": 3.7617256753745183, "learning_rate": 1.548129309407198e-07, "loss": 0.4657, "step": 10875 }, { "epoch": 13.312117503059975, "grad_norm": 1.674560755268325, "learning_rate": 1.5459098628908931e-07, "loss": 1.1644, "step": 10876 }, { "epoch": 13.313341493268053, "grad_norm": 1.4391134139915083, "learning_rate": 1.543691957690635e-07, "loss": 1.5537, "step": 10877 }, { "epoch": 13.314565483476132, "grad_norm": 1.631606911415783, "learning_rate": 1.541475593952188e-07, "loss": 0.3738, "step": 10878 }, { "epoch": 13.31578947368421, "grad_norm": 2.158577088564206, "learning_rate": 1.5392607718211993e-07, "loss": 0.5895, "step": 10879 }, { "epoch": 13.31701346389229, "grad_norm": 1.5088371006107566, "learning_rate": 1.5370474914432166e-07, "loss": 1.1123, "step": 10880 }, { "epoch": 13.318237454100368, "grad_norm": 2.4734886079116154, "learning_rate": 1.534835752963687e-07, "loss": 0.4394, "step": 10881 }, { "epoch": 13.319461444308445, "grad_norm": 2.269118948809782, "learning_rate": 1.532625556527964e-07, "loss": 0.391, "step": 10882 }, { "epoch": 13.320685434516523, "grad_norm": 1.0006367676963155, "learning_rate": 1.530416902281287e-07, "loss": 0.3424, "step": 10883 }, { "epoch": 13.321909424724602, "grad_norm": 2.1837726295426343, "learning_rate": 1.5282097903688036e-07, "loss": 1.1129, "step": 10884 }, { "epoch": 13.32313341493268, "grad_norm": 1.1791248680404671, "learning_rate": 1.5260042209355534e-07, "loss": 0.563, "step": 10885 }, { "epoch": 13.32435740514076, "grad_norm": 1.9618569987731278, "learning_rate": 1.5238001941264847e-07, "loss": 0.9575, "step": 10886 }, { "epoch": 13.325581395348838, "grad_norm": 1.7291278723524608, "learning_rate": 1.5215977100864394e-07, "loss": 0.5302, "step": 10887 }, { "epoch": 13.326805385556916, "grad_norm": 1.8326640205649831, "learning_rate": 1.5193967689601462e-07, "loss": 1.0893, "step": 10888 }, { "epoch": 13.328029375764993, "grad_norm": 1.5086102203100962, "learning_rate": 1.517197370892251e-07, "loss": 0.5524, "step": 10889 }, { "epoch": 13.329253365973072, "grad_norm": 2.1180461916230415, "learning_rate": 1.5149995160272824e-07, "loss": 1.3102, "step": 10890 }, { "epoch": 13.33047735618115, "grad_norm": 2.7575883979639357, "learning_rate": 1.5128032045096858e-07, "loss": 0.5806, "step": 10891 }, { "epoch": 13.331701346389229, "grad_norm": 1.87411483411944, "learning_rate": 1.5106084364837875e-07, "loss": 0.4941, "step": 10892 }, { "epoch": 13.332925336597308, "grad_norm": 2.5273256166629086, "learning_rate": 1.508415212093828e-07, "loss": 0.4523, "step": 10893 }, { "epoch": 13.334149326805386, "grad_norm": 3.7759644214310994, "learning_rate": 1.506223531483922e-07, "loss": 0.2642, "step": 10894 }, { "epoch": 13.335373317013463, "grad_norm": 1.3263639223442347, "learning_rate": 1.5040333947981127e-07, "loss": 0.5531, "step": 10895 }, { "epoch": 13.336597307221542, "grad_norm": 1.8516890541158635, "learning_rate": 1.5018448021803217e-07, "loss": 0.9479, "step": 10896 }, { "epoch": 13.33782129742962, "grad_norm": 1.5211796951401029, "learning_rate": 1.499657753774378e-07, "loss": 0.3507, "step": 10897 }, { "epoch": 13.339045287637699, "grad_norm": 2.1092140960958656, "learning_rate": 1.497472249724005e-07, "loss": 0.7614, "step": 10898 }, { "epoch": 13.340269277845778, "grad_norm": 2.2737466794586583, "learning_rate": 1.495288290172825e-07, "loss": 0.9141, "step": 10899 }, { "epoch": 13.341493268053856, "grad_norm": 1.2471647266876746, "learning_rate": 1.4931058752643613e-07, "loss": 0.8388, "step": 10900 }, { "epoch": 13.342717258261933, "grad_norm": 2.21695813910818, "learning_rate": 1.4909250051420327e-07, "loss": 0.6029, "step": 10901 }, { "epoch": 13.343941248470012, "grad_norm": 3.151954403779326, "learning_rate": 1.4887456799491577e-07, "loss": 0.3893, "step": 10902 }, { "epoch": 13.34516523867809, "grad_norm": 2.063810627218216, "learning_rate": 1.486567899828953e-07, "loss": 0.7256, "step": 10903 }, { "epoch": 13.346389228886169, "grad_norm": 1.0871145753586322, "learning_rate": 1.484391664924542e-07, "loss": 0.5089, "step": 10904 }, { "epoch": 13.347613219094248, "grad_norm": 1.4548310648503975, "learning_rate": 1.4822169753789305e-07, "loss": 0.4309, "step": 10905 }, { "epoch": 13.348837209302326, "grad_norm": 1.464659419604835, "learning_rate": 1.4800438313350318e-07, "loss": 0.5335, "step": 10906 }, { "epoch": 13.350061199510403, "grad_norm": 1.6966142281407584, "learning_rate": 1.4778722329356538e-07, "loss": 0.3804, "step": 10907 }, { "epoch": 13.351285189718482, "grad_norm": 1.8406945873731702, "learning_rate": 1.4757021803235156e-07, "loss": 1.0965, "step": 10908 }, { "epoch": 13.35250917992656, "grad_norm": 1.7291889961421663, "learning_rate": 1.4735336736412198e-07, "loss": 0.5028, "step": 10909 }, { "epoch": 13.353733170134639, "grad_norm": 1.9655327365906838, "learning_rate": 1.471366713031272e-07, "loss": 1.0997, "step": 10910 }, { "epoch": 13.354957160342718, "grad_norm": 1.6880825842042297, "learning_rate": 1.4692012986360775e-07, "loss": 1.5524, "step": 10911 }, { "epoch": 13.356181150550796, "grad_norm": 1.5405336092491348, "learning_rate": 1.4670374305979397e-07, "loss": 0.7457, "step": 10912 }, { "epoch": 13.357405140758875, "grad_norm": 2.5163713221162776, "learning_rate": 1.464875109059058e-07, "loss": 0.4792, "step": 10913 }, { "epoch": 13.358629130966952, "grad_norm": 2.522892827791178, "learning_rate": 1.462714334161533e-07, "loss": 0.601, "step": 10914 }, { "epoch": 13.35985312117503, "grad_norm": 1.4193905928615667, "learning_rate": 1.4605551060473678e-07, "loss": 1.0845, "step": 10915 }, { "epoch": 13.361077111383109, "grad_norm": 3.5633840878752276, "learning_rate": 1.4583974248584516e-07, "loss": 0.3883, "step": 10916 }, { "epoch": 13.362301101591187, "grad_norm": 1.6037680563010934, "learning_rate": 1.456241290736582e-07, "loss": 0.9245, "step": 10917 }, { "epoch": 13.363525091799266, "grad_norm": 1.875026209907363, "learning_rate": 1.454086703823454e-07, "loss": 0.5392, "step": 10918 }, { "epoch": 13.364749082007345, "grad_norm": 1.8234691534869811, "learning_rate": 1.4519336642606546e-07, "loss": 0.6257, "step": 10919 }, { "epoch": 13.365973072215422, "grad_norm": 1.6184075088374692, "learning_rate": 1.449782172189676e-07, "loss": 0.9168, "step": 10920 }, { "epoch": 13.3671970624235, "grad_norm": 2.2621672547693352, "learning_rate": 1.4476322277519105e-07, "loss": 0.5649, "step": 10921 }, { "epoch": 13.368421052631579, "grad_norm": 2.5811845520218846, "learning_rate": 1.4454838310886427e-07, "loss": 0.4928, "step": 10922 }, { "epoch": 13.369645042839657, "grad_norm": 2.3669033587335067, "learning_rate": 1.443336982341051e-07, "loss": 0.6247, "step": 10923 }, { "epoch": 13.370869033047736, "grad_norm": 1.4067260982400014, "learning_rate": 1.4411916816502224e-07, "loss": 0.5106, "step": 10924 }, { "epoch": 13.372093023255815, "grad_norm": 1.7540784786546804, "learning_rate": 1.439047929157139e-07, "loss": 0.5134, "step": 10925 }, { "epoch": 13.373317013463891, "grad_norm": 3.6618990093218686, "learning_rate": 1.4369057250026824e-07, "loss": 0.3273, "step": 10926 }, { "epoch": 13.37454100367197, "grad_norm": 2.754121058678357, "learning_rate": 1.434765069327626e-07, "loss": 0.3953, "step": 10927 }, { "epoch": 13.375764993880049, "grad_norm": 2.1193150408409713, "learning_rate": 1.4326259622726462e-07, "loss": 0.8471, "step": 10928 }, { "epoch": 13.376988984088127, "grad_norm": 1.213814937239059, "learning_rate": 1.4304884039783224e-07, "loss": 0.879, "step": 10929 }, { "epoch": 13.378212974296206, "grad_norm": 1.8967576318743815, "learning_rate": 1.4283523945851197e-07, "loss": 1.5842, "step": 10930 }, { "epoch": 13.379436964504285, "grad_norm": 2.0255844152529336, "learning_rate": 1.426217934233412e-07, "loss": 1.3822, "step": 10931 }, { "epoch": 13.380660954712361, "grad_norm": 1.1853242545499996, "learning_rate": 1.4240850230634708e-07, "loss": 0.4133, "step": 10932 }, { "epoch": 13.38188494492044, "grad_norm": 1.6154801037507642, "learning_rate": 1.4219536612154556e-07, "loss": 0.8119, "step": 10933 }, { "epoch": 13.383108935128519, "grad_norm": 1.865362818518297, "learning_rate": 1.4198238488294436e-07, "loss": 1.1661, "step": 10934 }, { "epoch": 13.384332925336597, "grad_norm": 3.0120903672731205, "learning_rate": 1.4176955860453868e-07, "loss": 0.6491, "step": 10935 }, { "epoch": 13.385556915544676, "grad_norm": 1.736528876777888, "learning_rate": 1.4155688730031536e-07, "loss": 0.978, "step": 10936 }, { "epoch": 13.386780905752754, "grad_norm": 2.9131605047881637, "learning_rate": 1.4134437098424992e-07, "loss": 0.4614, "step": 10937 }, { "epoch": 13.388004895960833, "grad_norm": 2.9086776144812116, "learning_rate": 1.411320096703081e-07, "loss": 1.0509, "step": 10938 }, { "epoch": 13.38922888616891, "grad_norm": 1.6150453333181418, "learning_rate": 1.4091980337244626e-07, "loss": 0.6006, "step": 10939 }, { "epoch": 13.390452876376989, "grad_norm": 2.4380176437477896, "learning_rate": 1.4070775210460936e-07, "loss": 1.0704, "step": 10940 }, { "epoch": 13.391676866585067, "grad_norm": 2.4082467773789933, "learning_rate": 1.4049585588073233e-07, "loss": 0.633, "step": 10941 }, { "epoch": 13.392900856793146, "grad_norm": 2.908076008955064, "learning_rate": 1.402841147147402e-07, "loss": 0.5711, "step": 10942 }, { "epoch": 13.394124847001224, "grad_norm": 1.6047706103248838, "learning_rate": 1.4007252862054816e-07, "loss": 0.6773, "step": 10943 }, { "epoch": 13.395348837209303, "grad_norm": 3.369858722812164, "learning_rate": 1.3986109761206097e-07, "loss": 0.3685, "step": 10944 }, { "epoch": 13.39657282741738, "grad_norm": 1.9973847632431856, "learning_rate": 1.3964982170317303e-07, "loss": 0.6055, "step": 10945 }, { "epoch": 13.397796817625458, "grad_norm": 2.1590781389777547, "learning_rate": 1.3943870090776829e-07, "loss": 0.4271, "step": 10946 }, { "epoch": 13.399020807833537, "grad_norm": 2.7149098844063673, "learning_rate": 1.3922773523972115e-07, "loss": 0.5009, "step": 10947 }, { "epoch": 13.400244798041616, "grad_norm": 1.6521008980740193, "learning_rate": 1.39016924712895e-07, "loss": 1.061, "step": 10948 }, { "epoch": 13.401468788249694, "grad_norm": 2.03225120288025, "learning_rate": 1.3880626934114432e-07, "loss": 0.7209, "step": 10949 }, { "epoch": 13.402692778457773, "grad_norm": 1.2077853607205484, "learning_rate": 1.3859576913831192e-07, "loss": 0.5577, "step": 10950 }, { "epoch": 13.403916768665852, "grad_norm": 1.3739859444511033, "learning_rate": 1.383854241182314e-07, "loss": 0.7148, "step": 10951 }, { "epoch": 13.405140758873928, "grad_norm": 3.3987758441834894, "learning_rate": 1.3817523429472597e-07, "loss": 0.5841, "step": 10952 }, { "epoch": 13.406364749082007, "grad_norm": 1.4687778066785078, "learning_rate": 1.3796519968160809e-07, "loss": 0.3613, "step": 10953 }, { "epoch": 13.407588739290086, "grad_norm": 2.5539251738271247, "learning_rate": 1.3775532029268096e-07, "loss": 0.5947, "step": 10954 }, { "epoch": 13.408812729498164, "grad_norm": 1.2102666829624382, "learning_rate": 1.375455961417363e-07, "loss": 0.4957, "step": 10955 }, { "epoch": 13.410036719706243, "grad_norm": 1.2957113073653597, "learning_rate": 1.3733602724255723e-07, "loss": 0.9811, "step": 10956 }, { "epoch": 13.411260709914322, "grad_norm": 1.3401143634531203, "learning_rate": 1.3712661360891582e-07, "loss": 0.7132, "step": 10957 }, { "epoch": 13.412484700122398, "grad_norm": 2.4748375246819325, "learning_rate": 1.3691735525457384e-07, "loss": 1.193, "step": 10958 }, { "epoch": 13.413708690330477, "grad_norm": 1.5736608449679688, "learning_rate": 1.3670825219328222e-07, "loss": 0.5685, "step": 10959 }, { "epoch": 13.414932680538556, "grad_norm": 1.5096504213822937, "learning_rate": 1.364993044387833e-07, "loss": 0.5871, "step": 10960 }, { "epoch": 13.416156670746634, "grad_norm": 2.5570932917277602, "learning_rate": 1.362905120048083e-07, "loss": 0.4789, "step": 10961 }, { "epoch": 13.417380660954713, "grad_norm": 1.084047464143069, "learning_rate": 1.3608187490507795e-07, "loss": 0.6254, "step": 10962 }, { "epoch": 13.418604651162791, "grad_norm": 2.7767564885696947, "learning_rate": 1.358733931533035e-07, "loss": 0.3909, "step": 10963 }, { "epoch": 13.419828641370868, "grad_norm": 1.6922774145086323, "learning_rate": 1.356650667631851e-07, "loss": 1.5492, "step": 10964 }, { "epoch": 13.421052631578947, "grad_norm": 2.149805042951668, "learning_rate": 1.3545689574841341e-07, "loss": 0.6095, "step": 10965 }, { "epoch": 13.422276621787026, "grad_norm": 1.8493065806451887, "learning_rate": 1.3524888012266867e-07, "loss": 0.2891, "step": 10966 }, { "epoch": 13.423500611995104, "grad_norm": 1.9137664744956175, "learning_rate": 1.3504101989962132e-07, "loss": 0.7388, "step": 10967 }, { "epoch": 13.424724602203183, "grad_norm": 3.179601886558288, "learning_rate": 1.3483331509292984e-07, "loss": 0.6886, "step": 10968 }, { "epoch": 13.425948592411261, "grad_norm": 2.465401736900861, "learning_rate": 1.3462576571624587e-07, "loss": 0.891, "step": 10969 }, { "epoch": 13.427172582619338, "grad_norm": 1.3553563169147569, "learning_rate": 1.3441837178320732e-07, "loss": 0.5795, "step": 10970 }, { "epoch": 13.428396572827417, "grad_norm": 1.9522204248379809, "learning_rate": 1.3421113330744333e-07, "loss": 0.9957, "step": 10971 }, { "epoch": 13.429620563035495, "grad_norm": 1.6244637135657432, "learning_rate": 1.340040503025733e-07, "loss": 0.7409, "step": 10972 }, { "epoch": 13.430844553243574, "grad_norm": 1.6761914402883717, "learning_rate": 1.3379712278220574e-07, "loss": 0.853, "step": 10973 }, { "epoch": 13.432068543451653, "grad_norm": 1.9013548062027816, "learning_rate": 1.3359035075993982e-07, "loss": 0.4658, "step": 10974 }, { "epoch": 13.433292533659731, "grad_norm": 1.5524312175296473, "learning_rate": 1.3338373424936297e-07, "loss": 1.3961, "step": 10975 }, { "epoch": 13.43451652386781, "grad_norm": 2.7426981605857397, "learning_rate": 1.3317727326405406e-07, "loss": 0.3777, "step": 10976 }, { "epoch": 13.435740514075887, "grad_norm": 1.7748861384118169, "learning_rate": 1.3297096781757973e-07, "loss": 0.4951, "step": 10977 }, { "epoch": 13.436964504283965, "grad_norm": 2.008637090290666, "learning_rate": 1.327648179234986e-07, "loss": 0.7394, "step": 10978 }, { "epoch": 13.438188494492044, "grad_norm": 1.7388775520228492, "learning_rate": 1.3255882359535787e-07, "loss": 0.9068, "step": 10979 }, { "epoch": 13.439412484700123, "grad_norm": 1.8033364089340687, "learning_rate": 1.3235298484669478e-07, "loss": 0.671, "step": 10980 }, { "epoch": 13.440636474908201, "grad_norm": 2.5626874032664584, "learning_rate": 1.321473016910363e-07, "loss": 0.5301, "step": 10981 }, { "epoch": 13.44186046511628, "grad_norm": 1.7587953276228914, "learning_rate": 1.3194177414189907e-07, "loss": 1.1681, "step": 10982 }, { "epoch": 13.443084455324357, "grad_norm": 1.9339332360326953, "learning_rate": 1.3173640221278954e-07, "loss": 0.4995, "step": 10983 }, { "epoch": 13.444308445532435, "grad_norm": 1.3999172731370204, "learning_rate": 1.3153118591720388e-07, "loss": 0.3847, "step": 10984 }, { "epoch": 13.445532435740514, "grad_norm": 1.3252218332946255, "learning_rate": 1.313261252686282e-07, "loss": 0.6528, "step": 10985 }, { "epoch": 13.446756425948593, "grad_norm": 2.6754146586997156, "learning_rate": 1.311212202805387e-07, "loss": 0.4229, "step": 10986 }, { "epoch": 13.447980416156671, "grad_norm": 2.340049657431051, "learning_rate": 1.3091647096640127e-07, "loss": 0.6118, "step": 10987 }, { "epoch": 13.44920440636475, "grad_norm": 1.8830703203020929, "learning_rate": 1.307118773396701e-07, "loss": 0.5788, "step": 10988 }, { "epoch": 13.450428396572827, "grad_norm": 1.6498102203809497, "learning_rate": 1.3050743941379092e-07, "loss": 1.0375, "step": 10989 }, { "epoch": 13.451652386780905, "grad_norm": 2.1805949127854976, "learning_rate": 1.3030315720219871e-07, "loss": 0.7075, "step": 10990 }, { "epoch": 13.452876376988984, "grad_norm": 1.4148859998193026, "learning_rate": 1.300990307183181e-07, "loss": 0.3053, "step": 10991 }, { "epoch": 13.454100367197062, "grad_norm": 2.18311957944349, "learning_rate": 1.2989505997556357e-07, "loss": 0.5395, "step": 10992 }, { "epoch": 13.455324357405141, "grad_norm": 2.1356930153170657, "learning_rate": 1.296912449873397e-07, "loss": 0.5024, "step": 10993 }, { "epoch": 13.45654834761322, "grad_norm": 1.1693039927819981, "learning_rate": 1.294875857670394e-07, "loss": 0.6062, "step": 10994 }, { "epoch": 13.457772337821297, "grad_norm": 1.849100880946639, "learning_rate": 1.292840823280475e-07, "loss": 1.356, "step": 10995 }, { "epoch": 13.458996328029375, "grad_norm": 2.932136399962728, "learning_rate": 1.2908073468373694e-07, "loss": 0.3555, "step": 10996 }, { "epoch": 13.460220318237454, "grad_norm": 2.46997952055088, "learning_rate": 1.288775428474709e-07, "loss": 0.5683, "step": 10997 }, { "epoch": 13.461444308445532, "grad_norm": 1.6369068877774033, "learning_rate": 1.2867450683260285e-07, "loss": 1.5381, "step": 10998 }, { "epoch": 13.462668298653611, "grad_norm": 1.7107786616999563, "learning_rate": 1.284716266524749e-07, "loss": 0.5412, "step": 10999 }, { "epoch": 13.46389228886169, "grad_norm": 1.5109899691383375, "learning_rate": 1.2826890232042032e-07, "loss": 0.5252, "step": 11000 }, { "epoch": 13.465116279069768, "grad_norm": 2.001823404510624, "learning_rate": 1.2806633384976092e-07, "loss": 0.5168, "step": 11001 }, { "epoch": 13.466340269277845, "grad_norm": 2.5199662994255627, "learning_rate": 1.278639212538091e-07, "loss": 0.8541, "step": 11002 }, { "epoch": 13.467564259485924, "grad_norm": 2.1984264397760884, "learning_rate": 1.2766166454586588e-07, "loss": 0.9633, "step": 11003 }, { "epoch": 13.468788249694002, "grad_norm": 2.469468730934486, "learning_rate": 1.2745956373922369e-07, "loss": 0.4635, "step": 11004 }, { "epoch": 13.470012239902081, "grad_norm": 1.285610803026136, "learning_rate": 1.2725761884716387e-07, "loss": 0.4681, "step": 11005 }, { "epoch": 13.47123623011016, "grad_norm": 1.6630595811907822, "learning_rate": 1.2705582988295716e-07, "loss": 0.4937, "step": 11006 }, { "epoch": 13.472460220318238, "grad_norm": 1.2646272828577558, "learning_rate": 1.2685419685986383e-07, "loss": 0.4986, "step": 11007 }, { "epoch": 13.473684210526315, "grad_norm": 3.1921743450758253, "learning_rate": 1.2665271979113518e-07, "loss": 0.3654, "step": 11008 }, { "epoch": 13.474908200734394, "grad_norm": 1.1997157101357585, "learning_rate": 1.2645139869001177e-07, "loss": 0.5068, "step": 11009 }, { "epoch": 13.476132190942472, "grad_norm": 2.1227874106563256, "learning_rate": 1.26250233569723e-07, "loss": 0.5395, "step": 11010 }, { "epoch": 13.477356181150551, "grad_norm": 1.6879048542159112, "learning_rate": 1.260492244434891e-07, "loss": 0.4679, "step": 11011 }, { "epoch": 13.47858017135863, "grad_norm": 1.079361865176921, "learning_rate": 1.2584837132451955e-07, "loss": 0.3946, "step": 11012 }, { "epoch": 13.479804161566708, "grad_norm": 2.1668191513827812, "learning_rate": 1.2564767422601348e-07, "loss": 0.4217, "step": 11013 }, { "epoch": 13.481028151774785, "grad_norm": 1.3442914102829693, "learning_rate": 1.2544713316116036e-07, "loss": 0.4737, "step": 11014 }, { "epoch": 13.482252141982864, "grad_norm": 2.2813284660389415, "learning_rate": 1.2524674814313853e-07, "loss": 0.572, "step": 11015 }, { "epoch": 13.483476132190942, "grad_norm": 3.424324989436231, "learning_rate": 1.2504651918511717e-07, "loss": 0.3367, "step": 11016 }, { "epoch": 13.48470012239902, "grad_norm": 1.997653441280003, "learning_rate": 1.2484644630025384e-07, "loss": 0.4531, "step": 11017 }, { "epoch": 13.4859241126071, "grad_norm": 1.3849121262170272, "learning_rate": 1.2464652950169714e-07, "loss": 0.7071, "step": 11018 }, { "epoch": 13.487148102815178, "grad_norm": 2.3506183969178367, "learning_rate": 1.2444676880258493e-07, "loss": 0.5731, "step": 11019 }, { "epoch": 13.488372093023255, "grad_norm": 3.1537248051712954, "learning_rate": 1.2424716421604393e-07, "loss": 0.5648, "step": 11020 }, { "epoch": 13.489596083231334, "grad_norm": 2.665260513011284, "learning_rate": 1.2404771575519226e-07, "loss": 0.9418, "step": 11021 }, { "epoch": 13.490820073439412, "grad_norm": 2.0015986711530944, "learning_rate": 1.238484234331372e-07, "loss": 0.452, "step": 11022 }, { "epoch": 13.49204406364749, "grad_norm": 1.479618104621981, "learning_rate": 1.2364928726297465e-07, "loss": 0.4257, "step": 11023 }, { "epoch": 13.49326805385557, "grad_norm": 1.6699964202185855, "learning_rate": 1.2345030725779084e-07, "loss": 0.6192, "step": 11024 }, { "epoch": 13.494492044063648, "grad_norm": 2.0540829431355325, "learning_rate": 1.2325148343066308e-07, "loss": 0.487, "step": 11025 }, { "epoch": 13.495716034271727, "grad_norm": 1.5949102498545895, "learning_rate": 1.23052815794657e-07, "loss": 0.5189, "step": 11026 }, { "epoch": 13.496940024479803, "grad_norm": 1.3786093163391744, "learning_rate": 1.2285430436282803e-07, "loss": 0.3903, "step": 11027 }, { "epoch": 13.498164014687882, "grad_norm": 1.6371275718321252, "learning_rate": 1.2265594914822154e-07, "loss": 0.9306, "step": 11028 }, { "epoch": 13.49938800489596, "grad_norm": 1.0881442193893551, "learning_rate": 1.2245775016387297e-07, "loss": 0.5912, "step": 11029 }, { "epoch": 13.50061199510404, "grad_norm": 1.165154814360197, "learning_rate": 1.222597074228074e-07, "loss": 0.4669, "step": 11030 }, { "epoch": 13.501835985312118, "grad_norm": 1.8679464607479288, "learning_rate": 1.2206182093803892e-07, "loss": 0.5605, "step": 11031 }, { "epoch": 13.503059975520197, "grad_norm": 1.1497059790945496, "learning_rate": 1.218640907225721e-07, "loss": 0.3052, "step": 11032 }, { "epoch": 13.504283965728273, "grad_norm": 1.202216139200076, "learning_rate": 1.21666516789401e-07, "loss": 0.6899, "step": 11033 }, { "epoch": 13.505507955936352, "grad_norm": 2.137118527357587, "learning_rate": 1.2146909915151022e-07, "loss": 0.399, "step": 11034 }, { "epoch": 13.50673194614443, "grad_norm": 1.3712583578475774, "learning_rate": 1.2127183782187246e-07, "loss": 0.5476, "step": 11035 }, { "epoch": 13.50795593635251, "grad_norm": 1.8853655876978586, "learning_rate": 1.2107473281345095e-07, "loss": 0.9794, "step": 11036 }, { "epoch": 13.509179926560588, "grad_norm": 3.211805267485466, "learning_rate": 1.2087778413919866e-07, "loss": 0.2947, "step": 11037 }, { "epoch": 13.510403916768666, "grad_norm": 1.7578810763873924, "learning_rate": 1.206809918120591e-07, "loss": 0.3663, "step": 11038 }, { "epoch": 13.511627906976745, "grad_norm": 2.9776727394898277, "learning_rate": 1.204843558449642e-07, "loss": 0.3993, "step": 11039 }, { "epoch": 13.512851897184822, "grad_norm": 1.1884347813775973, "learning_rate": 1.2028787625083666e-07, "loss": 0.4433, "step": 11040 }, { "epoch": 13.5140758873929, "grad_norm": 2.5140022197185536, "learning_rate": 1.2009155304258753e-07, "loss": 0.9646, "step": 11041 }, { "epoch": 13.51529987760098, "grad_norm": 2.3445721046948944, "learning_rate": 1.1989538623311842e-07, "loss": 0.3651, "step": 11042 }, { "epoch": 13.516523867809058, "grad_norm": 2.4037120788572968, "learning_rate": 1.1969937583532153e-07, "loss": 0.4214, "step": 11043 }, { "epoch": 13.517747858017136, "grad_norm": 1.7271730059638564, "learning_rate": 1.195035218620777e-07, "loss": 1.6187, "step": 11044 }, { "epoch": 13.518971848225215, "grad_norm": 1.6932436968850926, "learning_rate": 1.1930782432625742e-07, "loss": 0.3478, "step": 11045 }, { "epoch": 13.520195838433292, "grad_norm": 2.4314593590689135, "learning_rate": 1.1911228324072127e-07, "loss": 0.6619, "step": 11046 }, { "epoch": 13.52141982864137, "grad_norm": 2.9731999809742207, "learning_rate": 1.1891689861831979e-07, "loss": 0.6782, "step": 11047 }, { "epoch": 13.522643818849449, "grad_norm": 2.5255570479128338, "learning_rate": 1.1872167047189242e-07, "loss": 0.3868, "step": 11048 }, { "epoch": 13.523867809057528, "grad_norm": 1.6624506975068105, "learning_rate": 1.1852659881426947e-07, "loss": 0.5726, "step": 11049 }, { "epoch": 13.525091799265606, "grad_norm": 2.531557536686078, "learning_rate": 1.183316836582693e-07, "loss": 0.4944, "step": 11050 }, { "epoch": 13.526315789473685, "grad_norm": 3.151626575717831, "learning_rate": 1.1813692501670276e-07, "loss": 0.4373, "step": 11051 }, { "epoch": 13.527539779681762, "grad_norm": 1.0608006595112094, "learning_rate": 1.1794232290236685e-07, "loss": 0.4395, "step": 11052 }, { "epoch": 13.52876376988984, "grad_norm": 1.8586674311708151, "learning_rate": 1.1774787732805104e-07, "loss": 0.703, "step": 11053 }, { "epoch": 13.529987760097919, "grad_norm": 1.837849111109393, "learning_rate": 1.1755358830653346e-07, "loss": 0.4022, "step": 11054 }, { "epoch": 13.531211750305998, "grad_norm": 1.4649750724375565, "learning_rate": 1.1735945585058167e-07, "loss": 1.1249, "step": 11055 }, { "epoch": 13.532435740514076, "grad_norm": 2.9287670869870612, "learning_rate": 1.1716547997295408e-07, "loss": 0.7908, "step": 11056 }, { "epoch": 13.533659730722155, "grad_norm": 2.0548497291930476, "learning_rate": 1.1697166068639743e-07, "loss": 0.92, "step": 11057 }, { "epoch": 13.534883720930232, "grad_norm": 1.3188057107502962, "learning_rate": 1.1677799800364958e-07, "loss": 0.4361, "step": 11058 }, { "epoch": 13.53610771113831, "grad_norm": 2.558051427838892, "learning_rate": 1.1658449193743593e-07, "loss": 0.9634, "step": 11059 }, { "epoch": 13.537331701346389, "grad_norm": 2.0189747947917303, "learning_rate": 1.1639114250047406e-07, "loss": 0.4092, "step": 11060 }, { "epoch": 13.538555691554468, "grad_norm": 2.071330675261987, "learning_rate": 1.1619794970547022e-07, "loss": 0.6874, "step": 11061 }, { "epoch": 13.539779681762546, "grad_norm": 2.4573087528509485, "learning_rate": 1.1600491356511978e-07, "loss": 0.5856, "step": 11062 }, { "epoch": 13.541003671970625, "grad_norm": 1.6084071741882107, "learning_rate": 1.1581203409210873e-07, "loss": 0.218, "step": 11063 }, { "epoch": 13.542227662178703, "grad_norm": 1.3979429688890508, "learning_rate": 1.1561931129911247e-07, "loss": 1.3268, "step": 11064 }, { "epoch": 13.54345165238678, "grad_norm": 1.9781212357335967, "learning_rate": 1.1542674519879587e-07, "loss": 0.4182, "step": 11065 }, { "epoch": 13.544675642594859, "grad_norm": 2.029963936984083, "learning_rate": 1.1523433580381327e-07, "loss": 0.4343, "step": 11066 }, { "epoch": 13.545899632802938, "grad_norm": 1.093803055772265, "learning_rate": 1.1504208312680981e-07, "loss": 0.5147, "step": 11067 }, { "epoch": 13.547123623011016, "grad_norm": 2.147833891602053, "learning_rate": 1.1484998718041902e-07, "loss": 0.4005, "step": 11068 }, { "epoch": 13.548347613219095, "grad_norm": 1.9165143638009656, "learning_rate": 1.1465804797726549e-07, "loss": 0.4469, "step": 11069 }, { "epoch": 13.549571603427173, "grad_norm": 1.1693761687961362, "learning_rate": 1.1446626552996192e-07, "loss": 0.627, "step": 11070 }, { "epoch": 13.55079559363525, "grad_norm": 1.7246288790326252, "learning_rate": 1.1427463985111187e-07, "loss": 1.0767, "step": 11071 }, { "epoch": 13.552019583843329, "grad_norm": 1.9761759469607916, "learning_rate": 1.1408317095330829e-07, "loss": 0.5775, "step": 11072 }, { "epoch": 13.553243574051407, "grad_norm": 2.705594610690152, "learning_rate": 1.1389185884913389e-07, "loss": 0.5052, "step": 11073 }, { "epoch": 13.554467564259486, "grad_norm": 1.607683081303067, "learning_rate": 1.1370070355116086e-07, "loss": 0.503, "step": 11074 }, { "epoch": 13.555691554467565, "grad_norm": 2.006463631450044, "learning_rate": 1.1350970507195136e-07, "loss": 0.4755, "step": 11075 }, { "epoch": 13.556915544675643, "grad_norm": 2.390755351967212, "learning_rate": 1.133188634240573e-07, "loss": 0.4425, "step": 11076 }, { "epoch": 13.55813953488372, "grad_norm": 1.7881894060918522, "learning_rate": 1.1312817862001945e-07, "loss": 0.7248, "step": 11077 }, { "epoch": 13.559363525091799, "grad_norm": 1.2815362875889305, "learning_rate": 1.129376506723695e-07, "loss": 0.466, "step": 11078 }, { "epoch": 13.560587515299877, "grad_norm": 0.9449306285328828, "learning_rate": 1.1274727959362769e-07, "loss": 0.5193, "step": 11079 }, { "epoch": 13.561811505507956, "grad_norm": 3.294419479735996, "learning_rate": 1.1255706539630512e-07, "loss": 0.3015, "step": 11080 }, { "epoch": 13.563035495716035, "grad_norm": 1.8752505239388875, "learning_rate": 1.1236700809290152e-07, "loss": 0.4418, "step": 11081 }, { "epoch": 13.564259485924113, "grad_norm": 1.6532702985254657, "learning_rate": 1.1217710769590717e-07, "loss": 0.564, "step": 11082 }, { "epoch": 13.56548347613219, "grad_norm": 1.8968282654574655, "learning_rate": 1.1198736421780099e-07, "loss": 1.3756, "step": 11083 }, { "epoch": 13.566707466340269, "grad_norm": 2.097407038122065, "learning_rate": 1.1179777767105299e-07, "loss": 0.9567, "step": 11084 }, { "epoch": 13.567931456548347, "grad_norm": 1.9037381185776505, "learning_rate": 1.1160834806812099e-07, "loss": 0.539, "step": 11085 }, { "epoch": 13.569155446756426, "grad_norm": 0.9084982542741277, "learning_rate": 1.1141907542145475e-07, "loss": 0.3457, "step": 11086 }, { "epoch": 13.570379436964505, "grad_norm": 1.1643992573023785, "learning_rate": 1.1122995974349265e-07, "loss": 0.5658, "step": 11087 }, { "epoch": 13.571603427172583, "grad_norm": 1.7943913226373358, "learning_rate": 1.1104100104666171e-07, "loss": 1.4942, "step": 11088 }, { "epoch": 13.572827417380662, "grad_norm": 2.0756608679664637, "learning_rate": 1.1085219934337948e-07, "loss": 0.5258, "step": 11089 }, { "epoch": 13.574051407588739, "grad_norm": 1.3986422984943154, "learning_rate": 1.1066355464605438e-07, "loss": 0.4586, "step": 11090 }, { "epoch": 13.575275397796817, "grad_norm": 2.1326258864961214, "learning_rate": 1.1047506696708288e-07, "loss": 0.4304, "step": 11091 }, { "epoch": 13.576499388004896, "grad_norm": 1.1006660014263576, "learning_rate": 1.1028673631885173e-07, "loss": 0.5262, "step": 11092 }, { "epoch": 13.577723378212974, "grad_norm": 1.5763700166583237, "learning_rate": 1.1009856271373742e-07, "loss": 0.646, "step": 11093 }, { "epoch": 13.578947368421053, "grad_norm": 2.790531541988042, "learning_rate": 1.099105461641059e-07, "loss": 0.4263, "step": 11094 }, { "epoch": 13.580171358629132, "grad_norm": 2.753480839464336, "learning_rate": 1.0972268668231284e-07, "loss": 0.9231, "step": 11095 }, { "epoch": 13.581395348837209, "grad_norm": 3.950430962359351, "learning_rate": 1.0953498428070392e-07, "loss": 0.3441, "step": 11096 }, { "epoch": 13.582619339045287, "grad_norm": 2.058694348622937, "learning_rate": 1.0934743897161426e-07, "loss": 1.3044, "step": 11097 }, { "epoch": 13.583843329253366, "grad_norm": 2.428594556390859, "learning_rate": 1.0916005076736819e-07, "loss": 1.0078, "step": 11098 }, { "epoch": 13.585067319461444, "grad_norm": 1.6106987869284564, "learning_rate": 1.0897281968028056e-07, "loss": 0.4659, "step": 11099 }, { "epoch": 13.586291309669523, "grad_norm": 2.6258494937218058, "learning_rate": 1.0878574572265571e-07, "loss": 0.5985, "step": 11100 }, { "epoch": 13.587515299877602, "grad_norm": 2.1557763303471518, "learning_rate": 1.0859882890678686e-07, "loss": 0.3416, "step": 11101 }, { "epoch": 13.588739290085678, "grad_norm": 2.3803155656307817, "learning_rate": 1.0841206924495779e-07, "loss": 0.3663, "step": 11102 }, { "epoch": 13.589963280293757, "grad_norm": 2.755945707753369, "learning_rate": 1.0822546674944145e-07, "loss": 0.4541, "step": 11103 }, { "epoch": 13.591187270501836, "grad_norm": 1.1476632095226418, "learning_rate": 1.0803902143250111e-07, "loss": 0.5341, "step": 11104 }, { "epoch": 13.592411260709914, "grad_norm": 2.362687548023299, "learning_rate": 1.0785273330638946e-07, "loss": 1.4002, "step": 11105 }, { "epoch": 13.593635250917993, "grad_norm": 1.5413221937978268, "learning_rate": 1.0766660238334781e-07, "loss": 0.8849, "step": 11106 }, { "epoch": 13.594859241126072, "grad_norm": 2.3500682905182484, "learning_rate": 1.0748062867560805e-07, "loss": 0.8105, "step": 11107 }, { "epoch": 13.596083231334148, "grad_norm": 2.127328205160727, "learning_rate": 1.0729481219539262e-07, "loss": 1.0643, "step": 11108 }, { "epoch": 13.597307221542227, "grad_norm": 1.6994023515085155, "learning_rate": 1.0710915295491176e-07, "loss": 0.503, "step": 11109 }, { "epoch": 13.598531211750306, "grad_norm": 2.2334491392057245, "learning_rate": 1.0692365096636709e-07, "loss": 0.3686, "step": 11110 }, { "epoch": 13.599755201958384, "grad_norm": 2.699523270135167, "learning_rate": 1.0673830624194832e-07, "loss": 0.351, "step": 11111 }, { "epoch": 13.600979192166463, "grad_norm": 2.1374666312408603, "learning_rate": 1.0655311879383623e-07, "loss": 1.1458, "step": 11112 }, { "epoch": 13.602203182374542, "grad_norm": 1.785613951831227, "learning_rate": 1.0636808863420056e-07, "loss": 1.3939, "step": 11113 }, { "epoch": 13.60342717258262, "grad_norm": 2.2909132928278826, "learning_rate": 1.0618321577520047e-07, "loss": 1.0088, "step": 11114 }, { "epoch": 13.604651162790697, "grad_norm": 2.394710877948436, "learning_rate": 1.0599850022898539e-07, "loss": 0.5987, "step": 11115 }, { "epoch": 13.605875152998776, "grad_norm": 1.5469692003859765, "learning_rate": 1.0581394200769396e-07, "loss": 1.2238, "step": 11116 }, { "epoch": 13.607099143206854, "grad_norm": 3.03760536615815, "learning_rate": 1.056295411234548e-07, "loss": 0.4128, "step": 11117 }, { "epoch": 13.608323133414933, "grad_norm": 1.5814033986960192, "learning_rate": 1.05445297588386e-07, "loss": 0.6194, "step": 11118 }, { "epoch": 13.609547123623011, "grad_norm": 2.269585357125046, "learning_rate": 1.0526121141459567e-07, "loss": 0.6671, "step": 11119 }, { "epoch": 13.61077111383109, "grad_norm": 1.430924149470905, "learning_rate": 1.0507728261418049e-07, "loss": 0.6902, "step": 11120 }, { "epoch": 13.611995104039167, "grad_norm": 1.5466580541574284, "learning_rate": 1.048935111992283e-07, "loss": 0.9781, "step": 11121 }, { "epoch": 13.613219094247246, "grad_norm": 2.656740758652461, "learning_rate": 1.0470989718181585e-07, "loss": 0.8443, "step": 11122 }, { "epoch": 13.614443084455324, "grad_norm": 1.1299804642563789, "learning_rate": 1.0452644057400957e-07, "loss": 0.6768, "step": 11123 }, { "epoch": 13.615667074663403, "grad_norm": 1.5897359392885195, "learning_rate": 1.0434314138786484e-07, "loss": 0.4868, "step": 11124 }, { "epoch": 13.616891064871481, "grad_norm": 1.506412187285538, "learning_rate": 1.0415999963542811e-07, "loss": 1.0196, "step": 11125 }, { "epoch": 13.61811505507956, "grad_norm": 1.6920213796950074, "learning_rate": 1.0397701532873478e-07, "loss": 1.6318, "step": 11126 }, { "epoch": 13.619339045287639, "grad_norm": 1.435410574452025, "learning_rate": 1.0379418847980965e-07, "loss": 0.496, "step": 11127 }, { "epoch": 13.620563035495715, "grad_norm": 2.768201150720236, "learning_rate": 1.0361151910066758e-07, "loss": 0.9433, "step": 11128 }, { "epoch": 13.621787025703794, "grad_norm": 1.4125285107617327, "learning_rate": 1.0342900720331283e-07, "loss": 0.6013, "step": 11129 }, { "epoch": 13.623011015911873, "grad_norm": 1.224265081732127, "learning_rate": 1.0324665279973944e-07, "loss": 0.6109, "step": 11130 }, { "epoch": 13.624235006119951, "grad_norm": 2.089226802465181, "learning_rate": 1.0306445590193087e-07, "loss": 1.0, "step": 11131 }, { "epoch": 13.62545899632803, "grad_norm": 1.735695421842296, "learning_rate": 1.0288241652186087e-07, "loss": 0.6129, "step": 11132 }, { "epoch": 13.626682986536107, "grad_norm": 2.2306231259373894, "learning_rate": 1.0270053467149154e-07, "loss": 1.0983, "step": 11133 }, { "epoch": 13.627906976744185, "grad_norm": 2.1118858987132976, "learning_rate": 1.0251881036277694e-07, "loss": 0.4743, "step": 11134 }, { "epoch": 13.629130966952264, "grad_norm": 2.8997207745554756, "learning_rate": 1.0233724360765807e-07, "loss": 1.0147, "step": 11135 }, { "epoch": 13.630354957160343, "grad_norm": 1.9254803527581499, "learning_rate": 1.0215583441806703e-07, "loss": 0.2872, "step": 11136 }, { "epoch": 13.631578947368421, "grad_norm": 2.475735130990915, "learning_rate": 1.0197458280592542e-07, "loss": 0.4135, "step": 11137 }, { "epoch": 13.6328029375765, "grad_norm": 1.8751890066268249, "learning_rate": 1.0179348878314483e-07, "loss": 0.4959, "step": 11138 }, { "epoch": 13.634026927784578, "grad_norm": 2.000967733894036, "learning_rate": 1.0161255236162571e-07, "loss": 0.3335, "step": 11139 }, { "epoch": 13.635250917992655, "grad_norm": 1.0293833336746747, "learning_rate": 1.0143177355325912e-07, "loss": 0.4443, "step": 11140 }, { "epoch": 13.636474908200734, "grad_norm": 3.0716746299435833, "learning_rate": 1.0125115236992416e-07, "loss": 0.3882, "step": 11141 }, { "epoch": 13.637698898408813, "grad_norm": 1.2440689960082065, "learning_rate": 1.0107068882349108e-07, "loss": 0.8415, "step": 11142 }, { "epoch": 13.638922888616891, "grad_norm": 1.692351104435305, "learning_rate": 1.0089038292581926e-07, "loss": 0.6342, "step": 11143 }, { "epoch": 13.64014687882497, "grad_norm": 2.0310225406138547, "learning_rate": 1.0071023468875813e-07, "loss": 0.4741, "step": 11144 }, { "epoch": 13.641370869033048, "grad_norm": 3.2274155885282334, "learning_rate": 1.0053024412414569e-07, "loss": 0.4427, "step": 11145 }, { "epoch": 13.642594859241125, "grad_norm": 2.4020147746963416, "learning_rate": 1.0035041124381084e-07, "loss": 0.2798, "step": 11146 }, { "epoch": 13.643818849449204, "grad_norm": 1.0774078090802617, "learning_rate": 1.0017073605957106e-07, "loss": 0.5075, "step": 11147 }, { "epoch": 13.645042839657282, "grad_norm": 1.2661809390689824, "learning_rate": 9.99912185832344e-08, "loss": 1.0385, "step": 11148 }, { "epoch": 13.646266829865361, "grad_norm": 2.755154886578883, "learning_rate": 9.981185882659756e-08, "loss": 0.4188, "step": 11149 }, { "epoch": 13.64749082007344, "grad_norm": 2.2288398355490555, "learning_rate": 9.963265680144774e-08, "loss": 0.5923, "step": 11150 }, { "epoch": 13.648714810281518, "grad_norm": 1.512706888677791, "learning_rate": 9.945361251956137e-08, "loss": 0.614, "step": 11151 }, { "epoch": 13.649938800489597, "grad_norm": 2.743536722908378, "learning_rate": 9.927472599270488e-08, "loss": 0.3391, "step": 11152 }, { "epoch": 13.651162790697674, "grad_norm": 1.8235457878013934, "learning_rate": 9.909599723263358e-08, "loss": 1.1872, "step": 11153 }, { "epoch": 13.652386780905752, "grad_norm": 2.440807889050476, "learning_rate": 9.89174262510928e-08, "loss": 0.5102, "step": 11154 }, { "epoch": 13.653610771113831, "grad_norm": 1.640130233099999, "learning_rate": 9.873901305981787e-08, "loss": 0.4593, "step": 11155 }, { "epoch": 13.65483476132191, "grad_norm": 1.4241379165698853, "learning_rate": 9.85607576705333e-08, "loss": 0.552, "step": 11156 }, { "epoch": 13.656058751529988, "grad_norm": 3.084291972110577, "learning_rate": 9.838266009495362e-08, "loss": 0.4821, "step": 11157 }, { "epoch": 13.657282741738067, "grad_norm": 1.45300594811808, "learning_rate": 9.820472034478251e-08, "loss": 0.4987, "step": 11158 }, { "epoch": 13.658506731946144, "grad_norm": 1.6877245262100105, "learning_rate": 9.80269384317134e-08, "loss": 1.224, "step": 11159 }, { "epoch": 13.659730722154222, "grad_norm": 2.045992411529776, "learning_rate": 9.784931436742973e-08, "loss": 1.2858, "step": 11160 }, { "epoch": 13.660954712362301, "grad_norm": 1.709239527867135, "learning_rate": 9.767184816360409e-08, "loss": 0.6318, "step": 11161 }, { "epoch": 13.66217870257038, "grad_norm": 2.128085614219623, "learning_rate": 9.74945398318991e-08, "loss": 0.5858, "step": 11162 }, { "epoch": 13.663402692778458, "grad_norm": 1.504783155218763, "learning_rate": 9.731738938396657e-08, "loss": 0.6155, "step": 11163 }, { "epoch": 13.664626682986537, "grad_norm": 1.7919409033866889, "learning_rate": 9.714039683144855e-08, "loss": 1.6098, "step": 11164 }, { "epoch": 13.665850673194614, "grad_norm": 1.7781825859072111, "learning_rate": 9.696356218597575e-08, "loss": 0.6541, "step": 11165 }, { "epoch": 13.667074663402692, "grad_norm": 1.6331418162693239, "learning_rate": 9.67868854591697e-08, "loss": 0.4014, "step": 11166 }, { "epoch": 13.668298653610771, "grad_norm": 1.2664878009220473, "learning_rate": 9.661036666264057e-08, "loss": 0.5341, "step": 11167 }, { "epoch": 13.66952264381885, "grad_norm": 1.262166609576095, "learning_rate": 9.643400580798823e-08, "loss": 0.575, "step": 11168 }, { "epoch": 13.670746634026928, "grad_norm": 1.5834442316659936, "learning_rate": 9.62578029068037e-08, "loss": 0.3554, "step": 11169 }, { "epoch": 13.671970624235007, "grad_norm": 1.792114240577857, "learning_rate": 9.608175797066494e-08, "loss": 0.993, "step": 11170 }, { "epoch": 13.673194614443084, "grad_norm": 1.7783690354574087, "learning_rate": 9.590587101114185e-08, "loss": 0.3001, "step": 11171 }, { "epoch": 13.674418604651162, "grad_norm": 1.5414809564837917, "learning_rate": 9.573014203979241e-08, "loss": 0.267, "step": 11172 }, { "epoch": 13.67564259485924, "grad_norm": 1.8189540231636432, "learning_rate": 9.555457106816546e-08, "loss": 0.8787, "step": 11173 }, { "epoch": 13.67686658506732, "grad_norm": 3.0240749504166944, "learning_rate": 9.537915810779868e-08, "loss": 0.7308, "step": 11174 }, { "epoch": 13.678090575275398, "grad_norm": 1.47360594273982, "learning_rate": 9.520390317021954e-08, "loss": 0.4508, "step": 11175 }, { "epoch": 13.679314565483477, "grad_norm": 1.6039112060372953, "learning_rate": 9.502880626694522e-08, "loss": 0.3986, "step": 11176 }, { "epoch": 13.680538555691555, "grad_norm": 1.7884197286317915, "learning_rate": 9.485386740948232e-08, "loss": 0.7231, "step": 11177 }, { "epoch": 13.681762545899632, "grad_norm": 3.08791482579676, "learning_rate": 9.467908660932724e-08, "loss": 0.9014, "step": 11178 }, { "epoch": 13.68298653610771, "grad_norm": 1.3893882452562059, "learning_rate": 9.450446387796603e-08, "loss": 0.7341, "step": 11179 }, { "epoch": 13.68421052631579, "grad_norm": 3.1796499171678563, "learning_rate": 9.432999922687397e-08, "loss": 0.6366, "step": 11180 }, { "epoch": 13.685434516523868, "grad_norm": 2.4557351719134695, "learning_rate": 9.415569266751662e-08, "loss": 0.4627, "step": 11181 }, { "epoch": 13.686658506731947, "grad_norm": 2.6569542664934405, "learning_rate": 9.398154421134842e-08, "loss": 0.5427, "step": 11182 }, { "epoch": 13.687882496940025, "grad_norm": 1.9690902273147446, "learning_rate": 9.380755386981383e-08, "loss": 0.6455, "step": 11183 }, { "epoch": 13.689106487148102, "grad_norm": 1.490958929363694, "learning_rate": 9.363372165434703e-08, "loss": 0.9957, "step": 11184 }, { "epoch": 13.69033047735618, "grad_norm": 2.0553051903768003, "learning_rate": 9.346004757637112e-08, "loss": 0.571, "step": 11185 }, { "epoch": 13.69155446756426, "grad_norm": 2.2717346631280195, "learning_rate": 9.328653164730001e-08, "loss": 0.9466, "step": 11186 }, { "epoch": 13.692778457772338, "grad_norm": 3.4444210647351374, "learning_rate": 9.31131738785368e-08, "loss": 0.4599, "step": 11187 }, { "epoch": 13.694002447980417, "grad_norm": 2.712392405383955, "learning_rate": 9.293997428147267e-08, "loss": 0.7778, "step": 11188 }, { "epoch": 13.695226438188495, "grad_norm": 1.2337734793324764, "learning_rate": 9.276693286749017e-08, "loss": 0.6565, "step": 11189 }, { "epoch": 13.696450428396572, "grad_norm": 2.635160157722294, "learning_rate": 9.259404964796131e-08, "loss": 0.4702, "step": 11190 }, { "epoch": 13.69767441860465, "grad_norm": 1.3109972168699626, "learning_rate": 9.24213246342473e-08, "loss": 0.3829, "step": 11191 }, { "epoch": 13.69889840881273, "grad_norm": 1.8022580737796436, "learning_rate": 9.224875783769876e-08, "loss": 0.5112, "step": 11192 }, { "epoch": 13.700122399020808, "grad_norm": 1.387148926839083, "learning_rate": 9.207634926965608e-08, "loss": 0.6241, "step": 11193 }, { "epoch": 13.701346389228886, "grad_norm": 1.3231476733769352, "learning_rate": 9.190409894144964e-08, "loss": 0.7636, "step": 11194 }, { "epoch": 13.702570379436965, "grad_norm": 1.8653468522807153, "learning_rate": 9.173200686439871e-08, "loss": 1.4503, "step": 11195 }, { "epoch": 13.703794369645042, "grad_norm": 1.8258687047261888, "learning_rate": 9.156007304981285e-08, "loss": 0.41, "step": 11196 }, { "epoch": 13.70501835985312, "grad_norm": 2.3024978378874583, "learning_rate": 9.138829750899081e-08, "loss": 0.9586, "step": 11197 }, { "epoch": 13.7062423500612, "grad_norm": 1.939164829313745, "learning_rate": 9.121668025322078e-08, "loss": 0.7334, "step": 11198 }, { "epoch": 13.707466340269278, "grad_norm": 2.219053075379834, "learning_rate": 9.104522129378124e-08, "loss": 0.5601, "step": 11199 }, { "epoch": 13.708690330477356, "grad_norm": 1.486148050302697, "learning_rate": 9.087392064193956e-08, "loss": 0.4814, "step": 11200 }, { "epoch": 13.709914320685435, "grad_norm": 0.9516978437332777, "learning_rate": 9.070277830895313e-08, "loss": 0.4612, "step": 11201 }, { "epoch": 13.711138310893514, "grad_norm": 1.750241164016142, "learning_rate": 9.053179430606851e-08, "loss": 0.9061, "step": 11202 }, { "epoch": 13.71236230110159, "grad_norm": 1.519069105048366, "learning_rate": 9.036096864452254e-08, "loss": 0.5547, "step": 11203 }, { "epoch": 13.713586291309669, "grad_norm": 1.6830575315119183, "learning_rate": 9.019030133554125e-08, "loss": 0.4649, "step": 11204 }, { "epoch": 13.714810281517748, "grad_norm": 2.20055440116889, "learning_rate": 9.00197923903401e-08, "loss": 1.1812, "step": 11205 }, { "epoch": 13.716034271725826, "grad_norm": 1.900838892132819, "learning_rate": 8.984944182012428e-08, "loss": 0.5399, "step": 11206 }, { "epoch": 13.717258261933905, "grad_norm": 1.4199244936646243, "learning_rate": 8.96792496360882e-08, "loss": 0.8594, "step": 11207 }, { "epoch": 13.718482252141984, "grad_norm": 2.5383854393917824, "learning_rate": 8.95092158494168e-08, "loss": 0.7034, "step": 11208 }, { "epoch": 13.71970624235006, "grad_norm": 1.7131423122616922, "learning_rate": 8.933934047128417e-08, "loss": 0.7083, "step": 11209 }, { "epoch": 13.720930232558139, "grad_norm": 3.4164906536505866, "learning_rate": 8.916962351285363e-08, "loss": 0.513, "step": 11210 }, { "epoch": 13.722154222766218, "grad_norm": 2.2426104845853296, "learning_rate": 8.900006498527818e-08, "loss": 0.5011, "step": 11211 }, { "epoch": 13.723378212974296, "grad_norm": 1.5378521874351199, "learning_rate": 8.883066489970116e-08, "loss": 0.6429, "step": 11212 }, { "epoch": 13.724602203182375, "grad_norm": 1.568545376445181, "learning_rate": 8.866142326725418e-08, "loss": 0.7096, "step": 11213 }, { "epoch": 13.725826193390454, "grad_norm": 1.6225240102562688, "learning_rate": 8.849234009905977e-08, "loss": 0.6852, "step": 11214 }, { "epoch": 13.727050183598532, "grad_norm": 2.730391092490124, "learning_rate": 8.832341540622902e-08, "loss": 0.3848, "step": 11215 }, { "epoch": 13.728274173806609, "grad_norm": 1.572513732017943, "learning_rate": 8.815464919986361e-08, "loss": 0.5452, "step": 11216 }, { "epoch": 13.729498164014688, "grad_norm": 0.8724213273101712, "learning_rate": 8.798604149105355e-08, "loss": 0.4308, "step": 11217 }, { "epoch": 13.730722154222766, "grad_norm": 2.6005425468932173, "learning_rate": 8.781759229087944e-08, "loss": 1.0176, "step": 11218 }, { "epoch": 13.731946144430845, "grad_norm": 1.7810398952573052, "learning_rate": 8.76493016104113e-08, "loss": 0.4112, "step": 11219 }, { "epoch": 13.733170134638923, "grad_norm": 1.9378270540014673, "learning_rate": 8.748116946070778e-08, "loss": 1.13, "step": 11220 }, { "epoch": 13.734394124847, "grad_norm": 1.4912517612074776, "learning_rate": 8.731319585281922e-08, "loss": 0.3289, "step": 11221 }, { "epoch": 13.735618115055079, "grad_norm": 2.3266439838181245, "learning_rate": 8.714538079778318e-08, "loss": 0.4438, "step": 11222 }, { "epoch": 13.736842105263158, "grad_norm": 1.4956494747935003, "learning_rate": 8.697772430662859e-08, "loss": 0.6841, "step": 11223 }, { "epoch": 13.738066095471236, "grad_norm": 1.8122725188949516, "learning_rate": 8.681022639037223e-08, "loss": 0.5337, "step": 11224 }, { "epoch": 13.739290085679315, "grad_norm": 1.2961342162176972, "learning_rate": 8.664288706002221e-08, "loss": 0.5268, "step": 11225 }, { "epoch": 13.740514075887393, "grad_norm": 2.225226826893795, "learning_rate": 8.647570632657532e-08, "loss": 0.4142, "step": 11226 }, { "epoch": 13.741738066095472, "grad_norm": 1.490725715656985, "learning_rate": 8.630868420101828e-08, "loss": 0.9434, "step": 11227 }, { "epoch": 13.742962056303549, "grad_norm": 1.8802394142161059, "learning_rate": 8.614182069432653e-08, "loss": 1.4165, "step": 11228 }, { "epoch": 13.744186046511627, "grad_norm": 2.035592533579679, "learning_rate": 8.597511581746626e-08, "loss": 0.7217, "step": 11229 }, { "epoch": 13.745410036719706, "grad_norm": 2.2619173839453026, "learning_rate": 8.580856958139233e-08, "loss": 0.4793, "step": 11230 }, { "epoch": 13.746634026927785, "grad_norm": 1.663392148463811, "learning_rate": 8.564218199704988e-08, "loss": 0.3306, "step": 11231 }, { "epoch": 13.747858017135863, "grad_norm": 3.360561051178189, "learning_rate": 8.54759530753732e-08, "loss": 0.4503, "step": 11232 }, { "epoch": 13.749082007343942, "grad_norm": 1.1509994841871396, "learning_rate": 8.53098828272858e-08, "loss": 0.6587, "step": 11233 }, { "epoch": 13.750305997552019, "grad_norm": 1.7638802781686982, "learning_rate": 8.5143971263702e-08, "loss": 0.901, "step": 11234 }, { "epoch": 13.751529987760097, "grad_norm": 1.3775878624300648, "learning_rate": 8.497821839552445e-08, "loss": 0.7619, "step": 11235 }, { "epoch": 13.752753977968176, "grad_norm": 1.3821126011356417, "learning_rate": 8.481262423364584e-08, "loss": 0.6667, "step": 11236 }, { "epoch": 13.753977968176255, "grad_norm": 1.4390776143915307, "learning_rate": 8.4647188788948e-08, "loss": 0.735, "step": 11237 }, { "epoch": 13.755201958384333, "grad_norm": 1.6701094589895662, "learning_rate": 8.448191207230338e-08, "loss": 0.6961, "step": 11238 }, { "epoch": 13.756425948592412, "grad_norm": 1.831365441121446, "learning_rate": 8.4316794094573e-08, "loss": 0.4442, "step": 11239 }, { "epoch": 13.75764993880049, "grad_norm": 1.7086748091591495, "learning_rate": 8.415183486660844e-08, "loss": 0.4838, "step": 11240 }, { "epoch": 13.758873929008567, "grad_norm": 1.1150375698558823, "learning_rate": 8.39870343992491e-08, "loss": 0.5912, "step": 11241 }, { "epoch": 13.760097919216646, "grad_norm": 1.3566758870436664, "learning_rate": 8.382239270332604e-08, "loss": 0.5048, "step": 11242 }, { "epoch": 13.761321909424725, "grad_norm": 2.508705691549121, "learning_rate": 8.365790978965809e-08, "loss": 0.4523, "step": 11243 }, { "epoch": 13.762545899632803, "grad_norm": 1.0666988475740022, "learning_rate": 8.349358566905524e-08, "loss": 0.5814, "step": 11244 }, { "epoch": 13.763769889840882, "grad_norm": 2.884520767458708, "learning_rate": 8.332942035231578e-08, "loss": 0.4319, "step": 11245 }, { "epoch": 13.76499388004896, "grad_norm": 1.6465984700217884, "learning_rate": 8.31654138502283e-08, "loss": 0.4914, "step": 11246 }, { "epoch": 13.766217870257037, "grad_norm": 2.0520875542986756, "learning_rate": 8.30015661735703e-08, "loss": 0.4067, "step": 11247 }, { "epoch": 13.767441860465116, "grad_norm": 2.332151182115059, "learning_rate": 8.283787733310982e-08, "loss": 1.3273, "step": 11248 }, { "epoch": 13.768665850673194, "grad_norm": 1.7544132327652926, "learning_rate": 8.267434733960356e-08, "loss": 0.923, "step": 11249 }, { "epoch": 13.769889840881273, "grad_norm": 2.2954964838209992, "learning_rate": 8.25109762037976e-08, "loss": 0.437, "step": 11250 }, { "epoch": 13.771113831089352, "grad_norm": 1.538516468946373, "learning_rate": 8.234776393642895e-08, "loss": 0.5833, "step": 11251 }, { "epoch": 13.77233782129743, "grad_norm": 1.344462372373999, "learning_rate": 8.218471054822346e-08, "loss": 0.7269, "step": 11252 }, { "epoch": 13.773561811505507, "grad_norm": 3.1099072082345507, "learning_rate": 8.202181604989535e-08, "loss": 0.3327, "step": 11253 }, { "epoch": 13.774785801713586, "grad_norm": 1.7717170213846847, "learning_rate": 8.185908045215019e-08, "loss": 1.2594, "step": 11254 }, { "epoch": 13.776009791921664, "grad_norm": 3.367380278525698, "learning_rate": 8.169650376568195e-08, "loss": 0.3403, "step": 11255 }, { "epoch": 13.777233782129743, "grad_norm": 1.9291897063529257, "learning_rate": 8.153408600117485e-08, "loss": 0.4113, "step": 11256 }, { "epoch": 13.778457772337822, "grad_norm": 1.0919906767746577, "learning_rate": 8.137182716930258e-08, "loss": 0.5438, "step": 11257 }, { "epoch": 13.7796817625459, "grad_norm": 2.308410322136566, "learning_rate": 8.120972728072801e-08, "loss": 0.4675, "step": 11258 }, { "epoch": 13.780905752753977, "grad_norm": 2.469636843237229, "learning_rate": 8.104778634610316e-08, "loss": 0.4072, "step": 11259 }, { "epoch": 13.782129742962056, "grad_norm": 1.2976619544979011, "learning_rate": 8.08860043760712e-08, "loss": 0.5519, "step": 11260 }, { "epoch": 13.783353733170134, "grad_norm": 1.860781253602644, "learning_rate": 8.072438138126304e-08, "loss": 1.004, "step": 11261 }, { "epoch": 13.784577723378213, "grad_norm": 1.7592327200116582, "learning_rate": 8.056291737230049e-08, "loss": 0.435, "step": 11262 }, { "epoch": 13.785801713586292, "grad_norm": 2.4100294244383256, "learning_rate": 8.040161235979394e-08, "loss": 0.4744, "step": 11263 }, { "epoch": 13.78702570379437, "grad_norm": 1.675046000284579, "learning_rate": 8.024046635434408e-08, "loss": 1.1653, "step": 11264 }, { "epoch": 13.788249694002449, "grad_norm": 1.602945930924442, "learning_rate": 8.007947936654075e-08, "loss": 1.2546, "step": 11265 }, { "epoch": 13.789473684210526, "grad_norm": 1.5035631731103103, "learning_rate": 7.991865140696331e-08, "loss": 0.9637, "step": 11266 }, { "epoch": 13.790697674418604, "grad_norm": 2.2749651163408195, "learning_rate": 7.975798248618077e-08, "loss": 0.4622, "step": 11267 }, { "epoch": 13.791921664626683, "grad_norm": 1.674879102971246, "learning_rate": 7.959747261475164e-08, "loss": 0.4497, "step": 11268 }, { "epoch": 13.793145654834762, "grad_norm": 2.4103904666035643, "learning_rate": 7.943712180322417e-08, "loss": 0.4904, "step": 11269 }, { "epoch": 13.79436964504284, "grad_norm": 1.7627491610209542, "learning_rate": 7.927693006213655e-08, "loss": 0.4247, "step": 11270 }, { "epoch": 13.795593635250919, "grad_norm": 1.1001336314844625, "learning_rate": 7.911689740201511e-08, "loss": 0.7054, "step": 11271 }, { "epoch": 13.796817625458996, "grad_norm": 3.3325472427400245, "learning_rate": 7.895702383337673e-08, "loss": 0.5334, "step": 11272 }, { "epoch": 13.798041615667074, "grad_norm": 1.907054229585781, "learning_rate": 7.879730936672797e-08, "loss": 0.5198, "step": 11273 }, { "epoch": 13.799265605875153, "grad_norm": 2.09141300634952, "learning_rate": 7.86377540125649e-08, "loss": 0.3541, "step": 11274 }, { "epoch": 13.800489596083231, "grad_norm": 1.9463603753461127, "learning_rate": 7.847835778137247e-08, "loss": 0.5771, "step": 11275 }, { "epoch": 13.80171358629131, "grad_norm": 1.5421689500669706, "learning_rate": 7.831912068362563e-08, "loss": 0.555, "step": 11276 }, { "epoch": 13.802937576499389, "grad_norm": 1.4706651677242522, "learning_rate": 7.816004272978933e-08, "loss": 0.4298, "step": 11277 }, { "epoch": 13.804161566707466, "grad_norm": 1.152633078863864, "learning_rate": 7.800112393031716e-08, "loss": 0.3584, "step": 11278 }, { "epoch": 13.805385556915544, "grad_norm": 1.3160925078997552, "learning_rate": 7.784236429565273e-08, "loss": 0.8918, "step": 11279 }, { "epoch": 13.806609547123623, "grad_norm": 2.064412176778618, "learning_rate": 7.768376383622905e-08, "loss": 0.7572, "step": 11280 }, { "epoch": 13.807833537331701, "grad_norm": 2.191335214769516, "learning_rate": 7.752532256246892e-08, "loss": 0.528, "step": 11281 }, { "epoch": 13.80905752753978, "grad_norm": 1.1235996988741317, "learning_rate": 7.736704048478482e-08, "loss": 0.3908, "step": 11282 }, { "epoch": 13.810281517747859, "grad_norm": 1.9112348438002085, "learning_rate": 7.720891761357763e-08, "loss": 0.4161, "step": 11283 }, { "epoch": 13.811505507955935, "grad_norm": 1.60703189359901, "learning_rate": 7.705095395923956e-08, "loss": 0.5867, "step": 11284 }, { "epoch": 13.812729498164014, "grad_norm": 1.8652604509031379, "learning_rate": 7.689314953215038e-08, "loss": 0.4586, "step": 11285 }, { "epoch": 13.813953488372093, "grad_norm": 1.4277351008869217, "learning_rate": 7.673550434268123e-08, "loss": 1.5613, "step": 11286 }, { "epoch": 13.815177478580171, "grad_norm": 2.3294003884701646, "learning_rate": 7.657801840119189e-08, "loss": 0.5491, "step": 11287 }, { "epoch": 13.81640146878825, "grad_norm": 2.5567025444351237, "learning_rate": 7.642069171803157e-08, "loss": 1.2722, "step": 11288 }, { "epoch": 13.817625458996329, "grad_norm": 1.7517269064418863, "learning_rate": 7.626352430353867e-08, "loss": 0.5656, "step": 11289 }, { "epoch": 13.818849449204407, "grad_norm": 2.6469350368992917, "learning_rate": 7.610651616804271e-08, "loss": 0.3126, "step": 11290 }, { "epoch": 13.820073439412484, "grad_norm": 1.4006151936728661, "learning_rate": 7.594966732186071e-08, "loss": 0.5869, "step": 11291 }, { "epoch": 13.821297429620563, "grad_norm": 1.480070344664168, "learning_rate": 7.57929777753011e-08, "loss": 0.4283, "step": 11292 }, { "epoch": 13.822521419828641, "grad_norm": 2.699380648826637, "learning_rate": 7.563644753866006e-08, "loss": 0.3721, "step": 11293 }, { "epoch": 13.82374541003672, "grad_norm": 1.1625575784773936, "learning_rate": 7.548007662222495e-08, "loss": 0.5938, "step": 11294 }, { "epoch": 13.824969400244798, "grad_norm": 2.5079313912740138, "learning_rate": 7.532386503627143e-08, "loss": 0.5353, "step": 11295 }, { "epoch": 13.826193390452877, "grad_norm": 2.1516572802943834, "learning_rate": 7.516781279106516e-08, "loss": 1.1984, "step": 11296 }, { "epoch": 13.827417380660954, "grad_norm": 1.7422491090045111, "learning_rate": 7.50119198968613e-08, "loss": 0.4623, "step": 11297 }, { "epoch": 13.828641370869033, "grad_norm": 1.946064688095308, "learning_rate": 7.485618636390468e-08, "loss": 0.4799, "step": 11298 }, { "epoch": 13.829865361077111, "grad_norm": 1.7403013159273397, "learning_rate": 7.470061220242964e-08, "loss": 0.4234, "step": 11299 }, { "epoch": 13.83108935128519, "grad_norm": 3.0485113638642, "learning_rate": 7.454519742265992e-08, "loss": 0.4042, "step": 11300 }, { "epoch": 13.832313341493268, "grad_norm": 1.417618530882634, "learning_rate": 7.438994203480848e-08, "loss": 0.6415, "step": 11301 }, { "epoch": 13.833537331701347, "grad_norm": 2.1692204852078487, "learning_rate": 7.423484604907827e-08, "loss": 0.6511, "step": 11302 }, { "epoch": 13.834761321909426, "grad_norm": 2.7369393588293454, "learning_rate": 7.407990947566169e-08, "loss": 0.4512, "step": 11303 }, { "epoch": 13.835985312117502, "grad_norm": 1.5623553734423097, "learning_rate": 7.392513232474086e-08, "loss": 1.2097, "step": 11304 }, { "epoch": 13.837209302325581, "grad_norm": 2.3045123012626463, "learning_rate": 7.377051460648682e-08, "loss": 0.2963, "step": 11305 }, { "epoch": 13.83843329253366, "grad_norm": 1.5091614863591931, "learning_rate": 7.361605633106061e-08, "loss": 0.5401, "step": 11306 }, { "epoch": 13.839657282741738, "grad_norm": 1.1682973219792911, "learning_rate": 7.346175750861217e-08, "loss": 0.6364, "step": 11307 }, { "epoch": 13.840881272949817, "grad_norm": 2.115705477665773, "learning_rate": 7.330761814928228e-08, "loss": 0.3145, "step": 11308 }, { "epoch": 13.842105263157894, "grad_norm": 2.4938549887531862, "learning_rate": 7.315363826320005e-08, "loss": 0.5307, "step": 11309 }, { "epoch": 13.843329253365972, "grad_norm": 1.8298109488374228, "learning_rate": 7.299981786048432e-08, "loss": 0.4701, "step": 11310 }, { "epoch": 13.844553243574051, "grad_norm": 2.082035596738621, "learning_rate": 7.28461569512437e-08, "loss": 0.7186, "step": 11311 }, { "epoch": 13.84577723378213, "grad_norm": 1.7645570068555108, "learning_rate": 7.269265554557619e-08, "loss": 0.5179, "step": 11312 }, { "epoch": 13.847001223990208, "grad_norm": 1.6623289775199221, "learning_rate": 7.253931365356931e-08, "loss": 0.4607, "step": 11313 }, { "epoch": 13.848225214198287, "grad_norm": 1.7917325734921157, "learning_rate": 7.238613128530053e-08, "loss": 0.927, "step": 11314 }, { "epoch": 13.849449204406366, "grad_norm": 1.6149798525629688, "learning_rate": 7.223310845083542e-08, "loss": 2.2751, "step": 11315 }, { "epoch": 13.850673194614442, "grad_norm": 2.194400597270533, "learning_rate": 7.208024516023149e-08, "loss": 0.6316, "step": 11316 }, { "epoch": 13.851897184822521, "grad_norm": 1.691738178363653, "learning_rate": 7.19275414235332e-08, "loss": 0.6943, "step": 11317 }, { "epoch": 13.8531211750306, "grad_norm": 1.4712867618376417, "learning_rate": 7.177499725077613e-08, "loss": 0.3942, "step": 11318 }, { "epoch": 13.854345165238678, "grad_norm": 1.6126733293461826, "learning_rate": 7.162261265198478e-08, "loss": 0.5573, "step": 11319 }, { "epoch": 13.855569155446757, "grad_norm": 1.522073705428832, "learning_rate": 7.147038763717334e-08, "loss": 0.5209, "step": 11320 }, { "epoch": 13.856793145654835, "grad_norm": 1.1747999485172196, "learning_rate": 7.13183222163455e-08, "loss": 0.5228, "step": 11321 }, { "epoch": 13.858017135862912, "grad_norm": 2.2216112166802855, "learning_rate": 7.116641639949462e-08, "loss": 0.5683, "step": 11322 }, { "epoch": 13.859241126070991, "grad_norm": 1.6570630187848416, "learning_rate": 7.101467019660357e-08, "loss": 0.4683, "step": 11323 }, { "epoch": 13.86046511627907, "grad_norm": 1.9926977919090887, "learning_rate": 7.086308361764355e-08, "loss": 0.4515, "step": 11324 }, { "epoch": 13.861689106487148, "grad_norm": 1.7319392475175175, "learning_rate": 7.071165667257712e-08, "loss": 0.5479, "step": 11325 }, { "epoch": 13.862913096695227, "grad_norm": 2.022781484744085, "learning_rate": 7.056038937135551e-08, "loss": 0.3526, "step": 11326 }, { "epoch": 13.864137086903305, "grad_norm": 1.2054124609764831, "learning_rate": 7.040928172391908e-08, "loss": 0.4719, "step": 11327 }, { "epoch": 13.865361077111384, "grad_norm": 2.4174537972286716, "learning_rate": 7.025833374019825e-08, "loss": 0.5414, "step": 11328 }, { "epoch": 13.86658506731946, "grad_norm": 1.4973860410822903, "learning_rate": 7.010754543011256e-08, "loss": 1.1557, "step": 11329 }, { "epoch": 13.86780905752754, "grad_norm": 1.5811063261293088, "learning_rate": 6.995691680357159e-08, "loss": 0.5625, "step": 11330 }, { "epoch": 13.869033047735618, "grad_norm": 2.247074048148165, "learning_rate": 6.980644787047408e-08, "loss": 0.5073, "step": 11331 }, { "epoch": 13.870257037943697, "grad_norm": 1.4152562984974963, "learning_rate": 6.965613864070797e-08, "loss": 0.4062, "step": 11332 }, { "epoch": 13.871481028151775, "grad_norm": 2.4895883604674087, "learning_rate": 6.95059891241509e-08, "loss": 0.4154, "step": 11333 }, { "epoch": 13.872705018359854, "grad_norm": 1.3504034538665053, "learning_rate": 6.935599933067111e-08, "loss": 0.4873, "step": 11334 }, { "epoch": 13.87392900856793, "grad_norm": 2.2170234548160552, "learning_rate": 6.920616927012431e-08, "loss": 1.169, "step": 11335 }, { "epoch": 13.87515299877601, "grad_norm": 2.069847693577311, "learning_rate": 6.905649895235732e-08, "loss": 1.133, "step": 11336 }, { "epoch": 13.876376988984088, "grad_norm": 2.0423452010544807, "learning_rate": 6.890698838720538e-08, "loss": 0.9001, "step": 11337 }, { "epoch": 13.877600979192167, "grad_norm": 1.4855737470131296, "learning_rate": 6.875763758449477e-08, "loss": 0.8089, "step": 11338 }, { "epoch": 13.878824969400245, "grad_norm": 0.5925390683598846, "learning_rate": 6.86084465540393e-08, "loss": 0.1285, "step": 11339 }, { "epoch": 13.880048959608324, "grad_norm": 1.527874505755996, "learning_rate": 6.845941530564393e-08, "loss": 0.6098, "step": 11340 }, { "epoch": 13.8812729498164, "grad_norm": 1.803660106069246, "learning_rate": 6.831054384910191e-08, "loss": 1.7954, "step": 11341 }, { "epoch": 13.88249694002448, "grad_norm": 1.4030742991866738, "learning_rate": 6.81618321941968e-08, "loss": 0.4386, "step": 11342 }, { "epoch": 13.883720930232558, "grad_norm": 1.794211667536993, "learning_rate": 6.801328035070138e-08, "loss": 1.3551, "step": 11343 }, { "epoch": 13.884944920440637, "grad_norm": 1.7935326346910867, "learning_rate": 6.786488832837778e-08, "loss": 0.6161, "step": 11344 }, { "epoch": 13.886168910648715, "grad_norm": 2.7406554899264495, "learning_rate": 6.771665613697798e-08, "loss": 0.3303, "step": 11345 }, { "epoch": 13.887392900856794, "grad_norm": 1.3169698604985118, "learning_rate": 6.756858378624303e-08, "loss": 0.892, "step": 11346 }, { "epoch": 13.88861689106487, "grad_norm": 1.5700670762773787, "learning_rate": 6.74206712859038e-08, "loss": 0.6652, "step": 11347 }, { "epoch": 13.88984088127295, "grad_norm": 2.705008982452678, "learning_rate": 6.72729186456808e-08, "loss": 0.387, "step": 11348 }, { "epoch": 13.891064871481028, "grad_norm": 1.924009363087987, "learning_rate": 6.712532587528326e-08, "loss": 0.6378, "step": 11349 }, { "epoch": 13.892288861689106, "grad_norm": 1.4697418233957091, "learning_rate": 6.69778929844106e-08, "loss": 1.1437, "step": 11350 }, { "epoch": 13.893512851897185, "grad_norm": 2.856751314721035, "learning_rate": 6.683061998275204e-08, "loss": 0.9453, "step": 11351 }, { "epoch": 13.894736842105264, "grad_norm": 1.734408007696091, "learning_rate": 6.668350687998565e-08, "loss": 0.6112, "step": 11352 }, { "epoch": 13.895960832313342, "grad_norm": 3.297411093070813, "learning_rate": 6.653655368577872e-08, "loss": 0.3991, "step": 11353 }, { "epoch": 13.89718482252142, "grad_norm": 1.8406464647651577, "learning_rate": 6.638976040978851e-08, "loss": 0.6831, "step": 11354 }, { "epoch": 13.898408812729498, "grad_norm": 1.1994948894757536, "learning_rate": 6.624312706166203e-08, "loss": 0.567, "step": 11355 }, { "epoch": 13.899632802937576, "grad_norm": 1.1913793572217108, "learning_rate": 6.609665365103546e-08, "loss": 0.3803, "step": 11356 }, { "epoch": 13.900856793145655, "grad_norm": 2.03621561868549, "learning_rate": 6.595034018753444e-08, "loss": 0.5341, "step": 11357 }, { "epoch": 13.902080783353734, "grad_norm": 2.092696066439, "learning_rate": 6.580418668077459e-08, "loss": 0.5894, "step": 11358 }, { "epoch": 13.903304773561812, "grad_norm": 2.226065846505523, "learning_rate": 6.565819314035937e-08, "loss": 0.4828, "step": 11359 }, { "epoch": 13.904528763769889, "grad_norm": 1.8621954827750984, "learning_rate": 6.551235957588414e-08, "loss": 1.3972, "step": 11360 }, { "epoch": 13.905752753977968, "grad_norm": 1.4081685003313074, "learning_rate": 6.53666859969318e-08, "loss": 0.5197, "step": 11361 }, { "epoch": 13.906976744186046, "grad_norm": 1.6376567849722556, "learning_rate": 6.522117241307608e-08, "loss": 0.6838, "step": 11362 }, { "epoch": 13.908200734394125, "grad_norm": 1.3112630290640406, "learning_rate": 6.507581883387904e-08, "loss": 0.9707, "step": 11363 }, { "epoch": 13.909424724602204, "grad_norm": 1.93070692078817, "learning_rate": 6.493062526889305e-08, "loss": 1.004, "step": 11364 }, { "epoch": 13.910648714810282, "grad_norm": 3.073420871307899, "learning_rate": 6.478559172765991e-08, "loss": 0.3943, "step": 11365 }, { "epoch": 13.911872705018359, "grad_norm": 1.4732247519403472, "learning_rate": 6.464071821971035e-08, "loss": 0.8396, "step": 11366 }, { "epoch": 13.913096695226438, "grad_norm": 1.2378981120865593, "learning_rate": 6.449600475456453e-08, "loss": 0.44, "step": 11367 }, { "epoch": 13.914320685434516, "grad_norm": 1.0389271561267996, "learning_rate": 6.435145134173343e-08, "loss": 0.4366, "step": 11368 }, { "epoch": 13.915544675642595, "grad_norm": 1.724829882381599, "learning_rate": 6.420705799071613e-08, "loss": 1.3278, "step": 11369 }, { "epoch": 13.916768665850674, "grad_norm": 2.005458444816427, "learning_rate": 6.406282471100173e-08, "loss": 0.4873, "step": 11370 }, { "epoch": 13.917992656058752, "grad_norm": 2.318805652315315, "learning_rate": 6.391875151206845e-08, "loss": 0.4598, "step": 11371 }, { "epoch": 13.919216646266829, "grad_norm": 1.6878687165168977, "learning_rate": 6.3774838403384e-08, "loss": 0.589, "step": 11372 }, { "epoch": 13.920440636474908, "grad_norm": 2.758391285305014, "learning_rate": 6.363108539440665e-08, "loss": 0.4814, "step": 11373 }, { "epoch": 13.921664626682986, "grad_norm": 1.1636246593683968, "learning_rate": 6.348749249458275e-08, "loss": 0.5798, "step": 11374 }, { "epoch": 13.922888616891065, "grad_norm": 2.328624874994347, "learning_rate": 6.334405971334861e-08, "loss": 0.344, "step": 11375 }, { "epoch": 13.924112607099143, "grad_norm": 1.1075233625282106, "learning_rate": 6.320078706013061e-08, "loss": 0.5443, "step": 11376 }, { "epoch": 13.925336597307222, "grad_norm": 2.8857976653910145, "learning_rate": 6.305767454434369e-08, "loss": 0.2219, "step": 11377 }, { "epoch": 13.9265605875153, "grad_norm": 1.6795459471032432, "learning_rate": 6.291472217539285e-08, "loss": 0.4497, "step": 11378 }, { "epoch": 13.927784577723378, "grad_norm": 1.8197205752387493, "learning_rate": 6.277192996267222e-08, "loss": 0.9799, "step": 11379 }, { "epoch": 13.929008567931456, "grad_norm": 0.9886663625272907, "learning_rate": 6.262929791556543e-08, "loss": 0.4101, "step": 11380 }, { "epoch": 13.930232558139535, "grad_norm": 2.766879551643058, "learning_rate": 6.248682604344636e-08, "loss": 0.5276, "step": 11381 }, { "epoch": 13.931456548347613, "grad_norm": 1.627029445158408, "learning_rate": 6.234451435567724e-08, "loss": 0.4581, "step": 11382 }, { "epoch": 13.932680538555692, "grad_norm": 3.163905943289417, "learning_rate": 6.22023628616103e-08, "loss": 0.313, "step": 11383 }, { "epoch": 13.93390452876377, "grad_norm": 2.79659800119924, "learning_rate": 6.206037157058725e-08, "loss": 0.4933, "step": 11384 }, { "epoch": 13.935128518971847, "grad_norm": 3.510631437126887, "learning_rate": 6.191854049193896e-08, "loss": 0.345, "step": 11385 }, { "epoch": 13.936352509179926, "grad_norm": 1.5012295276410548, "learning_rate": 6.177686963498686e-08, "loss": 1.2675, "step": 11386 }, { "epoch": 13.937576499388005, "grad_norm": 1.3326569757626876, "learning_rate": 6.163535900904044e-08, "loss": 0.5157, "step": 11387 }, { "epoch": 13.938800489596083, "grad_norm": 1.004282237153405, "learning_rate": 6.149400862339949e-08, "loss": 0.4251, "step": 11388 }, { "epoch": 13.940024479804162, "grad_norm": 1.4132434507715836, "learning_rate": 6.135281848735242e-08, "loss": 1.0403, "step": 11389 }, { "epoch": 13.94124847001224, "grad_norm": 2.0897579475531467, "learning_rate": 6.121178861017845e-08, "loss": 0.5669, "step": 11390 }, { "epoch": 13.94247246022032, "grad_norm": 1.8777700093535563, "learning_rate": 6.10709190011452e-08, "loss": 1.0137, "step": 11391 }, { "epoch": 13.943696450428396, "grad_norm": 1.2332177503547308, "learning_rate": 6.093020966951024e-08, "loss": 0.6219, "step": 11392 }, { "epoch": 13.944920440636475, "grad_norm": 1.8579618427342717, "learning_rate": 6.078966062452036e-08, "loss": 0.4536, "step": 11393 }, { "epoch": 13.946144430844553, "grad_norm": 1.231128116153342, "learning_rate": 6.064927187541202e-08, "loss": 0.4774, "step": 11394 }, { "epoch": 13.947368421052632, "grad_norm": 1.655350555373785, "learning_rate": 6.050904343141095e-08, "loss": 0.4708, "step": 11395 }, { "epoch": 13.94859241126071, "grad_norm": 2.0109748319878116, "learning_rate": 6.036897530173253e-08, "loss": 1.1023, "step": 11396 }, { "epoch": 13.949816401468787, "grad_norm": 1.6167401440960756, "learning_rate": 6.022906749558161e-08, "loss": 1.5683, "step": 11397 }, { "epoch": 13.951040391676866, "grad_norm": 2.3560357292357343, "learning_rate": 6.008932002215195e-08, "loss": 1.0277, "step": 11398 }, { "epoch": 13.952264381884945, "grad_norm": 1.181344427317334, "learning_rate": 5.994973289062789e-08, "loss": 0.6158, "step": 11399 }, { "epoch": 13.953488372093023, "grad_norm": 1.2144266317883052, "learning_rate": 5.981030611018235e-08, "loss": 0.6676, "step": 11400 }, { "epoch": 13.954712362301102, "grad_norm": 2.0225602199720027, "learning_rate": 5.967103968997745e-08, "loss": 0.3828, "step": 11401 }, { "epoch": 13.95593635250918, "grad_norm": 1.697990560287181, "learning_rate": 5.9531933639165876e-08, "loss": 0.7125, "step": 11402 }, { "epoch": 13.957160342717259, "grad_norm": 3.4934935001719754, "learning_rate": 5.939298796688892e-08, "loss": 0.4287, "step": 11403 }, { "epoch": 13.958384332925336, "grad_norm": 1.308279105581052, "learning_rate": 5.925420268227761e-08, "loss": 0.7577, "step": 11404 }, { "epoch": 13.959608323133414, "grad_norm": 2.3697787646343094, "learning_rate": 5.911557779445271e-08, "loss": 0.4839, "step": 11405 }, { "epoch": 13.960832313341493, "grad_norm": 2.4717233764738404, "learning_rate": 5.897711331252359e-08, "loss": 0.7261, "step": 11406 }, { "epoch": 13.962056303549572, "grad_norm": 2.141728707191067, "learning_rate": 5.8838809245589656e-08, "loss": 0.6448, "step": 11407 }, { "epoch": 13.96328029375765, "grad_norm": 2.195527976822376, "learning_rate": 5.8700665602740016e-08, "loss": 0.8397, "step": 11408 }, { "epoch": 13.964504283965729, "grad_norm": 3.3167494379496, "learning_rate": 5.8562682393053236e-08, "loss": 0.6749, "step": 11409 }, { "epoch": 13.965728274173806, "grad_norm": 2.712816720550659, "learning_rate": 5.842485962559652e-08, "loss": 0.4127, "step": 11410 }, { "epoch": 13.966952264381884, "grad_norm": 2.341582891983283, "learning_rate": 5.8287197309427056e-08, "loss": 0.3426, "step": 11411 }, { "epoch": 13.968176254589963, "grad_norm": 2.0038027248813832, "learning_rate": 5.814969545359206e-08, "loss": 0.4755, "step": 11412 }, { "epoch": 13.969400244798042, "grad_norm": 1.5096502532358407, "learning_rate": 5.8012354067127074e-08, "loss": 0.6124, "step": 11413 }, { "epoch": 13.97062423500612, "grad_norm": 1.5702929242035408, "learning_rate": 5.787517315905794e-08, "loss": 0.9104, "step": 11414 }, { "epoch": 13.971848225214199, "grad_norm": 3.6564626866674934, "learning_rate": 5.773815273839939e-08, "loss": 0.403, "step": 11415 }, { "epoch": 13.973072215422278, "grad_norm": 3.193233164324562, "learning_rate": 5.7601292814156715e-08, "loss": 0.4514, "step": 11416 }, { "epoch": 13.974296205630354, "grad_norm": 1.6521890156723305, "learning_rate": 5.7464593395323006e-08, "loss": 0.351, "step": 11417 }, { "epoch": 13.975520195838433, "grad_norm": 2.2000739713883326, "learning_rate": 5.732805449088191e-08, "loss": 0.5061, "step": 11418 }, { "epoch": 13.976744186046512, "grad_norm": 1.517983253206331, "learning_rate": 5.719167610980597e-08, "loss": 0.4984, "step": 11419 }, { "epoch": 13.97796817625459, "grad_norm": 1.1583438625693545, "learning_rate": 5.7055458261058013e-08, "loss": 0.5998, "step": 11420 }, { "epoch": 13.979192166462669, "grad_norm": 1.6624847300663583, "learning_rate": 5.69194009535895e-08, "loss": 1.4804, "step": 11421 }, { "epoch": 13.980416156670747, "grad_norm": 1.5986708742956313, "learning_rate": 5.67835041963416e-08, "loss": 0.5215, "step": 11422 }, { "epoch": 13.981640146878824, "grad_norm": 1.9878811813985708, "learning_rate": 5.664776799824523e-08, "loss": 0.5843, "step": 11423 }, { "epoch": 13.982864137086903, "grad_norm": 1.9072267580272728, "learning_rate": 5.651219236821964e-08, "loss": 0.6062, "step": 11424 }, { "epoch": 13.984088127294982, "grad_norm": 1.460633168781365, "learning_rate": 5.637677731517521e-08, "loss": 1.2778, "step": 11425 }, { "epoch": 13.98531211750306, "grad_norm": 1.5332182996584836, "learning_rate": 5.6241522848010656e-08, "loss": 0.5986, "step": 11426 }, { "epoch": 13.986536107711139, "grad_norm": 2.160740066542416, "learning_rate": 5.610642897561441e-08, "loss": 0.6614, "step": 11427 }, { "epoch": 13.987760097919217, "grad_norm": 1.1822734559054673, "learning_rate": 5.597149570686439e-08, "loss": 0.6012, "step": 11428 }, { "epoch": 13.988984088127294, "grad_norm": 1.9712204945594751, "learning_rate": 5.583672305062765e-08, "loss": 0.5293, "step": 11429 }, { "epoch": 13.990208078335373, "grad_norm": 2.010657863222377, "learning_rate": 5.570211101576128e-08, "loss": 0.5254, "step": 11430 }, { "epoch": 13.991432068543451, "grad_norm": 1.3384451482999957, "learning_rate": 5.5567659611111546e-08, "loss": 0.7699, "step": 11431 }, { "epoch": 13.99265605875153, "grad_norm": 1.7468952987084188, "learning_rate": 5.543336884551359e-08, "loss": 1.4465, "step": 11432 }, { "epoch": 13.993880048959609, "grad_norm": 1.7071536204035782, "learning_rate": 5.529923872779258e-08, "loss": 1.1686, "step": 11433 }, { "epoch": 13.995104039167687, "grad_norm": 1.6064996242212264, "learning_rate": 5.516526926676397e-08, "loss": 1.1584, "step": 11434 }, { "epoch": 13.996328029375764, "grad_norm": 2.0240950602542838, "learning_rate": 5.503146047123043e-08, "loss": 0.3872, "step": 11435 }, { "epoch": 13.997552019583843, "grad_norm": 1.889461247733897, "learning_rate": 5.489781234998631e-08, "loss": 1.556, "step": 11436 }, { "epoch": 13.998776009791921, "grad_norm": 1.920988715923497, "learning_rate": 5.476432491181377e-08, "loss": 0.3464, "step": 11437 }, { "epoch": 14.0, "grad_norm": 3.785202186943911, "learning_rate": 5.463099816548578e-08, "loss": 0.3989, "step": 11438 }, { "epoch": 14.001223990208079, "grad_norm": 2.039578043424495, "learning_rate": 5.449783211976395e-08, "loss": 0.5069, "step": 11439 }, { "epoch": 14.002447980416157, "grad_norm": 1.8235320904536156, "learning_rate": 5.436482678339905e-08, "loss": 1.1535, "step": 11440 }, { "epoch": 14.003671970624236, "grad_norm": 1.6192206504975701, "learning_rate": 5.423198216513214e-08, "loss": 0.8117, "step": 11441 }, { "epoch": 14.004895960832313, "grad_norm": 3.079595824774245, "learning_rate": 5.4099298273692925e-08, "loss": 0.3891, "step": 11442 }, { "epoch": 14.006119951040391, "grad_norm": 1.5938909656903524, "learning_rate": 5.396677511780135e-08, "loss": 0.6835, "step": 11443 }, { "epoch": 14.00734394124847, "grad_norm": 2.1627160714429965, "learning_rate": 5.383441270616574e-08, "loss": 0.4332, "step": 11444 }, { "epoch": 14.008567931456549, "grad_norm": 2.979533393528313, "learning_rate": 5.370221104748496e-08, "loss": 0.3441, "step": 11445 }, { "epoch": 14.009791921664627, "grad_norm": 2.1500958959057863, "learning_rate": 5.35701701504468e-08, "loss": 0.528, "step": 11446 }, { "epoch": 14.011015911872706, "grad_norm": 1.425045375955601, "learning_rate": 5.343829002372791e-08, "loss": 0.601, "step": 11447 }, { "epoch": 14.012239902080783, "grad_norm": 1.9796971356028275, "learning_rate": 5.330657067599582e-08, "loss": 0.6457, "step": 11448 }, { "epoch": 14.013463892288861, "grad_norm": 2.0627247609467623, "learning_rate": 5.317501211590581e-08, "loss": 1.3042, "step": 11449 }, { "epoch": 14.01468788249694, "grad_norm": 1.432195581201928, "learning_rate": 5.304361435210404e-08, "loss": 0.6367, "step": 11450 }, { "epoch": 14.015911872705018, "grad_norm": 2.335243922415969, "learning_rate": 5.291237739322525e-08, "loss": 0.6109, "step": 11451 }, { "epoch": 14.017135862913097, "grad_norm": 1.4125734884482832, "learning_rate": 5.2781301247893934e-08, "loss": 1.0404, "step": 11452 }, { "epoch": 14.018359853121176, "grad_norm": 1.0624051299811235, "learning_rate": 5.2650385924724045e-08, "loss": 0.4369, "step": 11453 }, { "epoch": 14.019583843329253, "grad_norm": 1.7001860454816549, "learning_rate": 5.251963143231814e-08, "loss": 0.6329, "step": 11454 }, { "epoch": 14.020807833537331, "grad_norm": 1.6852605757930093, "learning_rate": 5.2389037779269625e-08, "loss": 1.5519, "step": 11455 }, { "epoch": 14.02203182374541, "grad_norm": 2.1322827932502864, "learning_rate": 5.22586049741608e-08, "loss": 0.406, "step": 11456 }, { "epoch": 14.023255813953488, "grad_norm": 2.0905221049587204, "learning_rate": 5.212833302556258e-08, "loss": 0.5698, "step": 11457 }, { "epoch": 14.024479804161567, "grad_norm": 1.562124019001566, "learning_rate": 5.199822194203646e-08, "loss": 0.9787, "step": 11458 }, { "epoch": 14.025703794369646, "grad_norm": 2.148848377831356, "learning_rate": 5.1868271732132546e-08, "loss": 0.6446, "step": 11459 }, { "epoch": 14.026927784577722, "grad_norm": 2.8022867618528133, "learning_rate": 5.1738482404390666e-08, "loss": 0.6472, "step": 11460 }, { "epoch": 14.028151774785801, "grad_norm": 2.2162888108042402, "learning_rate": 5.160885396734039e-08, "loss": 0.5678, "step": 11461 }, { "epoch": 14.02937576499388, "grad_norm": 1.9095854328970123, "learning_rate": 5.1479386429500455e-08, "loss": 0.6377, "step": 11462 }, { "epoch": 14.030599755201958, "grad_norm": 1.2799407444053532, "learning_rate": 5.1350079799378505e-08, "loss": 0.4662, "step": 11463 }, { "epoch": 14.031823745410037, "grad_norm": 1.7113868187885042, "learning_rate": 5.1220934085472465e-08, "loss": 0.4833, "step": 11464 }, { "epoch": 14.033047735618116, "grad_norm": 1.8036133321081538, "learning_rate": 5.109194929626915e-08, "loss": 1.6092, "step": 11465 }, { "epoch": 14.034271725826194, "grad_norm": 2.6846284697772664, "learning_rate": 5.096312544024512e-08, "loss": 0.3809, "step": 11466 }, { "epoch": 14.035495716034271, "grad_norm": 2.837767339294805, "learning_rate": 5.0834462525866104e-08, "loss": 0.351, "step": 11467 }, { "epoch": 14.03671970624235, "grad_norm": 1.5084833628440928, "learning_rate": 5.0705960561587276e-08, "loss": 1.0195, "step": 11468 }, { "epoch": 14.037943696450428, "grad_norm": 1.2509477250390353, "learning_rate": 5.057761955585355e-08, "loss": 0.6215, "step": 11469 }, { "epoch": 14.039167686658507, "grad_norm": 1.6146665421754045, "learning_rate": 5.044943951709902e-08, "loss": 1.5676, "step": 11470 }, { "epoch": 14.040391676866586, "grad_norm": 1.8880110175775144, "learning_rate": 5.032142045374694e-08, "loss": 0.5792, "step": 11471 }, { "epoch": 14.041615667074664, "grad_norm": 1.7856066396330448, "learning_rate": 5.019356237421002e-08, "loss": 0.6159, "step": 11472 }, { "epoch": 14.042839657282741, "grad_norm": 2.5983947823962814, "learning_rate": 5.0065865286891555e-08, "loss": 0.808, "step": 11473 }, { "epoch": 14.04406364749082, "grad_norm": 2.1647748139221985, "learning_rate": 4.993832920018232e-08, "loss": 1.1135, "step": 11474 }, { "epoch": 14.045287637698898, "grad_norm": 1.9089547069748665, "learning_rate": 4.981095412246423e-08, "loss": 0.5245, "step": 11475 }, { "epoch": 14.046511627906977, "grad_norm": 1.517343281139389, "learning_rate": 4.968374006210752e-08, "loss": 0.5546, "step": 11476 }, { "epoch": 14.047735618115055, "grad_norm": 1.660802960716323, "learning_rate": 4.9556687027472185e-08, "loss": 0.4604, "step": 11477 }, { "epoch": 14.048959608323134, "grad_norm": 1.5113278088346342, "learning_rate": 4.9429795026907925e-08, "loss": 0.5522, "step": 11478 }, { "epoch": 14.050183598531211, "grad_norm": 1.573569846579871, "learning_rate": 4.9303064068753627e-08, "loss": 0.4861, "step": 11479 }, { "epoch": 14.05140758873929, "grad_norm": 2.9822326342336405, "learning_rate": 4.9176494161337074e-08, "loss": 0.3987, "step": 11480 }, { "epoch": 14.052631578947368, "grad_norm": 0.9446770010512163, "learning_rate": 4.905008531297661e-08, "loss": 0.5192, "step": 11481 }, { "epoch": 14.053855569155447, "grad_norm": 1.3091344584162352, "learning_rate": 4.8923837531979214e-08, "loss": 0.891, "step": 11482 }, { "epoch": 14.055079559363525, "grad_norm": 1.464953740085889, "learning_rate": 4.879775082664129e-08, "loss": 0.3619, "step": 11483 }, { "epoch": 14.056303549571604, "grad_norm": 2.1351515784926636, "learning_rate": 4.867182520524871e-08, "loss": 0.3974, "step": 11484 }, { "epoch": 14.057527539779683, "grad_norm": 1.4892104814436362, "learning_rate": 4.854606067607681e-08, "loss": 0.9952, "step": 11485 }, { "epoch": 14.05875152998776, "grad_norm": 1.1866602615022195, "learning_rate": 4.842045724739064e-08, "loss": 0.6159, "step": 11486 }, { "epoch": 14.059975520195838, "grad_norm": 1.7495963305716633, "learning_rate": 4.829501492744415e-08, "loss": 1.4457, "step": 11487 }, { "epoch": 14.061199510403917, "grad_norm": 1.6713414890648837, "learning_rate": 4.816973372448131e-08, "loss": 0.2906, "step": 11488 }, { "epoch": 14.062423500611995, "grad_norm": 1.502220429597136, "learning_rate": 4.8044613646734686e-08, "loss": 0.8086, "step": 11489 }, { "epoch": 14.063647490820074, "grad_norm": 1.2621520684575402, "learning_rate": 4.791965470242688e-08, "loss": 0.4239, "step": 11490 }, { "epoch": 14.064871481028153, "grad_norm": 2.335134681481364, "learning_rate": 4.779485689976965e-08, "loss": 0.5147, "step": 11491 }, { "epoch": 14.06609547123623, "grad_norm": 2.8666228033949386, "learning_rate": 4.767022024696477e-08, "loss": 0.4929, "step": 11492 }, { "epoch": 14.067319461444308, "grad_norm": 1.212321480545358, "learning_rate": 4.754574475220208e-08, "loss": 0.878, "step": 11493 }, { "epoch": 14.068543451652387, "grad_norm": 1.3946061585871428, "learning_rate": 4.7421430423662266e-08, "loss": 0.7068, "step": 11494 }, { "epoch": 14.069767441860465, "grad_norm": 2.0316620882976966, "learning_rate": 4.729727726951433e-08, "loss": 0.382, "step": 11495 }, { "epoch": 14.070991432068544, "grad_norm": 1.72891736112303, "learning_rate": 4.717328529791759e-08, "loss": 0.9783, "step": 11496 }, { "epoch": 14.072215422276622, "grad_norm": 1.7660666583421578, "learning_rate": 4.7049454517020254e-08, "loss": 1.2593, "step": 11497 }, { "epoch": 14.0734394124847, "grad_norm": 1.516154234676482, "learning_rate": 4.6925784934959695e-08, "loss": 0.3544, "step": 11498 }, { "epoch": 14.074663402692778, "grad_norm": 2.5972292388337834, "learning_rate": 4.680227655986358e-08, "loss": 0.4465, "step": 11499 }, { "epoch": 14.075887392900857, "grad_norm": 2.345389270560507, "learning_rate": 4.6678929399847916e-08, "loss": 1.3283, "step": 11500 }, { "epoch": 14.077111383108935, "grad_norm": 1.9117511671342071, "learning_rate": 4.655574346301872e-08, "loss": 0.7232, "step": 11501 }, { "epoch": 14.078335373317014, "grad_norm": 2.4801037887617725, "learning_rate": 4.643271875747118e-08, "loss": 0.3904, "step": 11502 }, { "epoch": 14.079559363525092, "grad_norm": 1.376050470079658, "learning_rate": 4.630985529129051e-08, "loss": 0.5481, "step": 11503 }, { "epoch": 14.080783353733171, "grad_norm": 1.3421354612085616, "learning_rate": 4.618715307255078e-08, "loss": 0.5159, "step": 11504 }, { "epoch": 14.082007343941248, "grad_norm": 1.8784380183108278, "learning_rate": 4.606461210931529e-08, "loss": 1.3395, "step": 11505 }, { "epoch": 14.083231334149326, "grad_norm": 0.9873884261568006, "learning_rate": 4.5942232409636743e-08, "loss": 0.4101, "step": 11506 }, { "epoch": 14.084455324357405, "grad_norm": 1.181490898321425, "learning_rate": 4.582001398155789e-08, "loss": 0.6105, "step": 11507 }, { "epoch": 14.085679314565484, "grad_norm": 2.3654117717561425, "learning_rate": 4.569795683311035e-08, "loss": 0.5402, "step": 11508 }, { "epoch": 14.086903304773562, "grad_norm": 1.9457352337801574, "learning_rate": 4.5576060972315205e-08, "loss": 1.2063, "step": 11509 }, { "epoch": 14.088127294981641, "grad_norm": 1.515194784322062, "learning_rate": 4.5454326407183e-08, "loss": 0.6424, "step": 11510 }, { "epoch": 14.089351285189718, "grad_norm": 3.4834668813856684, "learning_rate": 4.5332753145713725e-08, "loss": 0.5129, "step": 11511 }, { "epoch": 14.090575275397796, "grad_norm": 1.3763958824624418, "learning_rate": 4.521134119589682e-08, "loss": 0.521, "step": 11512 }, { "epoch": 14.091799265605875, "grad_norm": 2.7290058238189037, "learning_rate": 4.5090090565711174e-08, "loss": 0.5267, "step": 11513 }, { "epoch": 14.093023255813954, "grad_norm": 1.6910953284631638, "learning_rate": 4.4969001263124314e-08, "loss": 0.3476, "step": 11514 }, { "epoch": 14.094247246022032, "grad_norm": 1.2546829030093793, "learning_rate": 4.484807329609403e-08, "loss": 0.5755, "step": 11515 }, { "epoch": 14.095471236230111, "grad_norm": 2.075031722358038, "learning_rate": 4.4727306672567585e-08, "loss": 0.7177, "step": 11516 }, { "epoch": 14.096695226438188, "grad_norm": 1.1125701421456615, "learning_rate": 4.46067014004814e-08, "loss": 0.7051, "step": 11517 }, { "epoch": 14.097919216646266, "grad_norm": 2.5244349394204577, "learning_rate": 4.448625748776081e-08, "loss": 0.4934, "step": 11518 }, { "epoch": 14.099143206854345, "grad_norm": 2.469658290140717, "learning_rate": 4.4365974942320865e-08, "loss": 0.7263, "step": 11519 }, { "epoch": 14.100367197062424, "grad_norm": 2.9827934857524454, "learning_rate": 4.424585377206636e-08, "loss": 0.6774, "step": 11520 }, { "epoch": 14.101591187270502, "grad_norm": 1.782733044709766, "learning_rate": 4.412589398489098e-08, "loss": 0.4247, "step": 11521 }, { "epoch": 14.10281517747858, "grad_norm": 1.9010187821802655, "learning_rate": 4.400609558867841e-08, "loss": 1.1657, "step": 11522 }, { "epoch": 14.104039167686658, "grad_norm": 2.9195448351279243, "learning_rate": 4.388645859130153e-08, "loss": 1.0144, "step": 11523 }, { "epoch": 14.105263157894736, "grad_norm": 2.442437479576611, "learning_rate": 4.376698300062127e-08, "loss": 0.8908, "step": 11524 }, { "epoch": 14.106487148102815, "grad_norm": 1.9348220106633436, "learning_rate": 4.364766882449023e-08, "loss": 0.4831, "step": 11525 }, { "epoch": 14.107711138310894, "grad_norm": 1.6985035927867322, "learning_rate": 4.352851607074909e-08, "loss": 1.0085, "step": 11526 }, { "epoch": 14.108935128518972, "grad_norm": 1.9175424425077368, "learning_rate": 4.3409524747227985e-08, "loss": 0.4325, "step": 11527 }, { "epoch": 14.11015911872705, "grad_norm": 2.359400189948757, "learning_rate": 4.3290694861746474e-08, "loss": 0.5727, "step": 11528 }, { "epoch": 14.11138310893513, "grad_norm": 1.8321517974819657, "learning_rate": 4.317202642211388e-08, "loss": 0.4101, "step": 11529 }, { "epoch": 14.112607099143206, "grad_norm": 1.4453695770484185, "learning_rate": 4.3053519436128674e-08, "loss": 0.4984, "step": 11530 }, { "epoch": 14.113831089351285, "grad_norm": 1.692696691799215, "learning_rate": 4.2935173911578253e-08, "loss": 0.4641, "step": 11531 }, { "epoch": 14.115055079559363, "grad_norm": 2.5104911860689088, "learning_rate": 4.2816989856240273e-08, "loss": 0.4784, "step": 11532 }, { "epoch": 14.116279069767442, "grad_norm": 3.156164252006626, "learning_rate": 4.2698967277881033e-08, "loss": 0.3114, "step": 11533 }, { "epoch": 14.11750305997552, "grad_norm": 2.009846448144662, "learning_rate": 4.258110618425737e-08, "loss": 0.987, "step": 11534 }, { "epoch": 14.1187270501836, "grad_norm": 2.346603970296224, "learning_rate": 4.246340658311365e-08, "loss": 0.4367, "step": 11535 }, { "epoch": 14.119951040391676, "grad_norm": 2.693151025265512, "learning_rate": 4.2345868482184806e-08, "loss": 0.4511, "step": 11536 }, { "epoch": 14.121175030599755, "grad_norm": 1.3111137201625636, "learning_rate": 4.2228491889195475e-08, "loss": 0.7634, "step": 11537 }, { "epoch": 14.122399020807833, "grad_norm": 2.459296345421133, "learning_rate": 4.2111276811858946e-08, "loss": 0.4112, "step": 11538 }, { "epoch": 14.123623011015912, "grad_norm": 1.702879181582311, "learning_rate": 4.199422325787822e-08, "loss": 0.467, "step": 11539 }, { "epoch": 14.12484700122399, "grad_norm": 1.6802103097334484, "learning_rate": 4.187733123494575e-08, "loss": 0.4926, "step": 11540 }, { "epoch": 14.12607099143207, "grad_norm": 1.628003829424904, "learning_rate": 4.1760600750742894e-08, "loss": 0.7405, "step": 11541 }, { "epoch": 14.127294981640146, "grad_norm": 2.224913109908127, "learning_rate": 4.164403181294102e-08, "loss": 1.1694, "step": 11542 }, { "epoch": 14.128518971848225, "grad_norm": 2.7267319616298766, "learning_rate": 4.1527624429200666e-08, "loss": 0.4179, "step": 11543 }, { "epoch": 14.129742962056303, "grad_norm": 1.4274047845659668, "learning_rate": 4.1411378607171257e-08, "loss": 0.5398, "step": 11544 }, { "epoch": 14.130966952264382, "grad_norm": 1.4160259733062652, "learning_rate": 4.1295294354492534e-08, "loss": 0.8598, "step": 11545 }, { "epoch": 14.13219094247246, "grad_norm": 1.285984688967624, "learning_rate": 4.11793716787931e-08, "loss": 0.5685, "step": 11546 }, { "epoch": 14.13341493268054, "grad_norm": 1.357954799674962, "learning_rate": 4.106361058769076e-08, "loss": 0.5287, "step": 11547 }, { "epoch": 14.134638922888616, "grad_norm": 1.6086501826621533, "learning_rate": 4.094801108879304e-08, "loss": 0.8594, "step": 11548 }, { "epoch": 14.135862913096695, "grad_norm": 1.5187195778415952, "learning_rate": 4.0832573189696356e-08, "loss": 0.4986, "step": 11549 }, { "epoch": 14.137086903304773, "grad_norm": 2.027910966186808, "learning_rate": 4.071729689798715e-08, "loss": 0.7196, "step": 11550 }, { "epoch": 14.138310893512852, "grad_norm": 1.705699092980649, "learning_rate": 4.0602182221241016e-08, "loss": 1.1633, "step": 11551 }, { "epoch": 14.13953488372093, "grad_norm": 2.176808643240439, "learning_rate": 4.0487229167023026e-08, "loss": 0.9539, "step": 11552 }, { "epoch": 14.140758873929009, "grad_norm": 1.295708271090009, "learning_rate": 4.0372437742887126e-08, "loss": 0.5682, "step": 11553 }, { "epoch": 14.141982864137088, "grad_norm": 1.086415595318528, "learning_rate": 4.025780795637674e-08, "loss": 0.5913, "step": 11554 }, { "epoch": 14.143206854345165, "grad_norm": 1.782111477689603, "learning_rate": 4.014333981502555e-08, "loss": 0.9918, "step": 11555 }, { "epoch": 14.144430844553243, "grad_norm": 1.7792593054169954, "learning_rate": 4.00290333263556e-08, "loss": 0.9261, "step": 11556 }, { "epoch": 14.145654834761322, "grad_norm": 2.2731073810412106, "learning_rate": 3.991488849787867e-08, "loss": 0.9458, "step": 11557 }, { "epoch": 14.1468788249694, "grad_norm": 2.4126094281333232, "learning_rate": 3.980090533709624e-08, "loss": 0.5093, "step": 11558 }, { "epoch": 14.148102815177479, "grad_norm": 1.9044766735889704, "learning_rate": 3.968708385149872e-08, "loss": 1.299, "step": 11559 }, { "epoch": 14.149326805385558, "grad_norm": 1.5128046278689606, "learning_rate": 3.9573424048565676e-08, "loss": 0.3502, "step": 11560 }, { "epoch": 14.150550795593634, "grad_norm": 1.4215092097734507, "learning_rate": 3.945992593576698e-08, "loss": 0.6417, "step": 11561 }, { "epoch": 14.151774785801713, "grad_norm": 1.878073222297094, "learning_rate": 3.9346589520561084e-08, "loss": 0.5385, "step": 11562 }, { "epoch": 14.152998776009792, "grad_norm": 2.219573074885244, "learning_rate": 3.9233414810395944e-08, "loss": 0.4136, "step": 11563 }, { "epoch": 14.15422276621787, "grad_norm": 2.298919082921737, "learning_rate": 3.912040181270893e-08, "loss": 0.4672, "step": 11564 }, { "epoch": 14.155446756425949, "grad_norm": 1.6592771490220868, "learning_rate": 3.9007550534926886e-08, "loss": 1.4808, "step": 11565 }, { "epoch": 14.156670746634028, "grad_norm": 2.540451332907024, "learning_rate": 3.88948609844661e-08, "loss": 0.5341, "step": 11566 }, { "epoch": 14.157894736842104, "grad_norm": 1.7861241915990238, "learning_rate": 3.8782333168732033e-08, "loss": 0.4896, "step": 11567 }, { "epoch": 14.159118727050183, "grad_norm": 1.1605425198447181, "learning_rate": 3.866996709511961e-08, "loss": 0.5037, "step": 11568 }, { "epoch": 14.160342717258262, "grad_norm": 1.89199162528628, "learning_rate": 3.855776277101319e-08, "loss": 1.4151, "step": 11569 }, { "epoch": 14.16156670746634, "grad_norm": 2.7777913591483094, "learning_rate": 3.844572020378662e-08, "loss": 0.325, "step": 11570 }, { "epoch": 14.162790697674419, "grad_norm": 1.3159594571678437, "learning_rate": 3.833383940080232e-08, "loss": 0.586, "step": 11571 }, { "epoch": 14.164014687882498, "grad_norm": 1.5766500178813465, "learning_rate": 3.8222120369413027e-08, "loss": 1.1583, "step": 11572 }, { "epoch": 14.165238678090576, "grad_norm": 2.2545169239406846, "learning_rate": 3.811056311696065e-08, "loss": 0.9134, "step": 11573 }, { "epoch": 14.166462668298653, "grad_norm": 1.9921955048625328, "learning_rate": 3.7999167650775994e-08, "loss": 1.4809, "step": 11574 }, { "epoch": 14.167686658506732, "grad_norm": 1.4924615581117442, "learning_rate": 3.7887933978179856e-08, "loss": 0.5262, "step": 11575 }, { "epoch": 14.16891064871481, "grad_norm": 1.4634206618207768, "learning_rate": 3.777686210648196e-08, "loss": 0.8393, "step": 11576 }, { "epoch": 14.170134638922889, "grad_norm": 1.7380511598766002, "learning_rate": 3.7665952042981455e-08, "loss": 0.8453, "step": 11577 }, { "epoch": 14.171358629130967, "grad_norm": 3.9753989450083833, "learning_rate": 3.7555203794967244e-08, "loss": 0.3303, "step": 11578 }, { "epoch": 14.172582619339046, "grad_norm": 3.0736544747473604, "learning_rate": 3.744461736971683e-08, "loss": 0.4034, "step": 11579 }, { "epoch": 14.173806609547123, "grad_norm": 1.5679606316155348, "learning_rate": 3.7334192774497734e-08, "loss": 0.6647, "step": 11580 }, { "epoch": 14.175030599755202, "grad_norm": 2.0983529190998373, "learning_rate": 3.722393001656721e-08, "loss": 1.284, "step": 11581 }, { "epoch": 14.17625458996328, "grad_norm": 2.08734653699696, "learning_rate": 3.711382910317057e-08, "loss": 0.385, "step": 11582 }, { "epoch": 14.177478580171359, "grad_norm": 1.4435749213001259, "learning_rate": 3.700389004154342e-08, "loss": 1.0847, "step": 11583 }, { "epoch": 14.178702570379437, "grad_norm": 2.1886935730768635, "learning_rate": 3.689411283891053e-08, "loss": 0.3037, "step": 11584 }, { "epoch": 14.179926560587516, "grad_norm": 1.8790090013118523, "learning_rate": 3.678449750248614e-08, "loss": 1.8729, "step": 11585 }, { "epoch": 14.181150550795593, "grad_norm": 1.8661664510236045, "learning_rate": 3.6675044039473916e-08, "loss": 1.3964, "step": 11586 }, { "epoch": 14.182374541003671, "grad_norm": 1.9924873567101151, "learning_rate": 3.6565752457066726e-08, "loss": 0.6046, "step": 11587 }, { "epoch": 14.18359853121175, "grad_norm": 1.9356725677062565, "learning_rate": 3.64566227624466e-08, "loss": 0.4486, "step": 11588 }, { "epoch": 14.184822521419829, "grad_norm": 2.6941735227047383, "learning_rate": 3.6347654962785027e-08, "loss": 0.3506, "step": 11589 }, { "epoch": 14.186046511627907, "grad_norm": 1.178597833456061, "learning_rate": 3.623884906524322e-08, "loss": 0.5658, "step": 11590 }, { "epoch": 14.187270501835986, "grad_norm": 1.1034270059029487, "learning_rate": 3.613020507697157e-08, "loss": 0.5144, "step": 11591 }, { "epoch": 14.188494492044065, "grad_norm": 2.230667106297832, "learning_rate": 3.6021723005109367e-08, "loss": 0.5008, "step": 11592 }, { "epoch": 14.189718482252141, "grad_norm": 2.3795204005223076, "learning_rate": 3.591340285678618e-08, "loss": 0.5417, "step": 11593 }, { "epoch": 14.19094247246022, "grad_norm": 1.7173081556664576, "learning_rate": 3.580524463911994e-08, "loss": 1.3272, "step": 11594 }, { "epoch": 14.192166462668299, "grad_norm": 1.7794697219047417, "learning_rate": 3.569724835921856e-08, "loss": 1.5124, "step": 11595 }, { "epoch": 14.193390452876377, "grad_norm": 1.059273739554263, "learning_rate": 3.5589414024179424e-08, "loss": 0.5819, "step": 11596 }, { "epoch": 14.194614443084456, "grad_norm": 2.006767433860227, "learning_rate": 3.548174164108853e-08, "loss": 1.3819, "step": 11597 }, { "epoch": 14.195838433292534, "grad_norm": 1.8067232898076888, "learning_rate": 3.537423121702188e-08, "loss": 1.1676, "step": 11598 }, { "epoch": 14.197062423500611, "grad_norm": 1.844198293921561, "learning_rate": 3.526688275904522e-08, "loss": 0.6823, "step": 11599 }, { "epoch": 14.19828641370869, "grad_norm": 1.7536055468013614, "learning_rate": 3.515969627421234e-08, "loss": 0.3654, "step": 11600 }, { "epoch": 14.199510403916769, "grad_norm": 1.5160245454205818, "learning_rate": 3.505267176956734e-08, "loss": 1.4056, "step": 11601 }, { "epoch": 14.200734394124847, "grad_norm": 2.0253107207626075, "learning_rate": 3.494580925214347e-08, "loss": 0.8996, "step": 11602 }, { "epoch": 14.201958384332926, "grad_norm": 2.2306124659748567, "learning_rate": 3.4839108728963456e-08, "loss": 0.5593, "step": 11603 }, { "epoch": 14.203182374541004, "grad_norm": 1.7133365424344351, "learning_rate": 3.473257020703946e-08, "loss": 0.4838, "step": 11604 }, { "epoch": 14.204406364749081, "grad_norm": 2.674147104253107, "learning_rate": 3.4626193693372555e-08, "loss": 0.661, "step": 11605 }, { "epoch": 14.20563035495716, "grad_norm": 1.2984869896595264, "learning_rate": 3.451997919495298e-08, "loss": 0.9812, "step": 11606 }, { "epoch": 14.206854345165238, "grad_norm": 1.7839045610799191, "learning_rate": 3.4413926718761536e-08, "loss": 0.654, "step": 11607 }, { "epoch": 14.208078335373317, "grad_norm": 2.3983645678281746, "learning_rate": 3.43080362717671e-08, "loss": 0.8395, "step": 11608 }, { "epoch": 14.209302325581396, "grad_norm": 1.7899105272480045, "learning_rate": 3.420230786092854e-08, "loss": 0.6705, "step": 11609 }, { "epoch": 14.210526315789474, "grad_norm": 2.5871150656806163, "learning_rate": 3.4096741493194196e-08, "loss": 0.4109, "step": 11610 }, { "epoch": 14.211750305997551, "grad_norm": 1.1561780863248206, "learning_rate": 3.399133717550101e-08, "loss": 0.4919, "step": 11611 }, { "epoch": 14.21297429620563, "grad_norm": 1.417475593075374, "learning_rate": 3.3886094914776234e-08, "loss": 0.5101, "step": 11612 }, { "epoch": 14.214198286413708, "grad_norm": 1.1838885648081627, "learning_rate": 3.378101471793543e-08, "loss": 0.5548, "step": 11613 }, { "epoch": 14.215422276621787, "grad_norm": 1.8051408579744028, "learning_rate": 3.3676096591884754e-08, "loss": 0.5333, "step": 11614 }, { "epoch": 14.216646266829866, "grad_norm": 1.546798399095004, "learning_rate": 3.35713405435184e-08, "loss": 0.5984, "step": 11615 }, { "epoch": 14.217870257037944, "grad_norm": 2.434809116122178, "learning_rate": 3.346674657972116e-08, "loss": 0.6317, "step": 11616 }, { "epoch": 14.219094247246023, "grad_norm": 2.3299915232780894, "learning_rate": 3.336231470736639e-08, "loss": 0.4435, "step": 11617 }, { "epoch": 14.2203182374541, "grad_norm": 1.1982419714059898, "learning_rate": 3.3258044933316415e-08, "loss": 0.6674, "step": 11618 }, { "epoch": 14.221542227662178, "grad_norm": 1.6924698154810476, "learning_rate": 3.315393726442406e-08, "loss": 0.5466, "step": 11619 }, { "epoch": 14.222766217870257, "grad_norm": 2.006685676341781, "learning_rate": 3.304999170753054e-08, "loss": 0.4751, "step": 11620 }, { "epoch": 14.223990208078336, "grad_norm": 2.367508537765273, "learning_rate": 3.2946208269467316e-08, "loss": 0.4839, "step": 11621 }, { "epoch": 14.225214198286414, "grad_norm": 1.0091807194140585, "learning_rate": 3.284258695705394e-08, "loss": 0.4248, "step": 11622 }, { "epoch": 14.226438188494493, "grad_norm": 1.8851653798644707, "learning_rate": 3.273912777710081e-08, "loss": 0.4416, "step": 11623 }, { "epoch": 14.22766217870257, "grad_norm": 1.4621903372359608, "learning_rate": 3.2635830736406084e-08, "loss": 1.1437, "step": 11624 }, { "epoch": 14.228886168910648, "grad_norm": 2.246366029496762, "learning_rate": 3.25326958417585e-08, "loss": 0.5078, "step": 11625 }, { "epoch": 14.230110159118727, "grad_norm": 2.7910140967340133, "learning_rate": 3.2429723099935416e-08, "loss": 0.8857, "step": 11626 }, { "epoch": 14.231334149326806, "grad_norm": 2.7582763345124524, "learning_rate": 3.23269125177042e-08, "loss": 0.942, "step": 11627 }, { "epoch": 14.232558139534884, "grad_norm": 1.2417303717553203, "learning_rate": 3.222426410182111e-08, "loss": 0.8412, "step": 11628 }, { "epoch": 14.233782129742963, "grad_norm": 1.7119093260851967, "learning_rate": 3.2121777859031586e-08, "loss": 0.5125, "step": 11629 }, { "epoch": 14.23500611995104, "grad_norm": 1.2008851054970762, "learning_rate": 3.20194537960708e-08, "loss": 0.532, "step": 11630 }, { "epoch": 14.236230110159118, "grad_norm": 1.8617456707875482, "learning_rate": 3.191729191966308e-08, "loss": 0.2887, "step": 11631 }, { "epoch": 14.237454100367197, "grad_norm": 1.0472858500837887, "learning_rate": 3.1815292236521964e-08, "loss": 0.507, "step": 11632 }, { "epoch": 14.238678090575275, "grad_norm": 2.66187275135374, "learning_rate": 3.171345475335069e-08, "loss": 0.9408, "step": 11633 }, { "epoch": 14.239902080783354, "grad_norm": 1.9635684735716827, "learning_rate": 3.161177947684169e-08, "loss": 0.4794, "step": 11634 }, { "epoch": 14.241126070991433, "grad_norm": 2.083226513637041, "learning_rate": 3.1510266413676824e-08, "loss": 0.4935, "step": 11635 }, { "epoch": 14.24235006119951, "grad_norm": 2.8529470991982198, "learning_rate": 3.1408915570526886e-08, "loss": 0.818, "step": 11636 }, { "epoch": 14.243574051407588, "grad_norm": 2.651350017845024, "learning_rate": 3.1307726954052096e-08, "loss": 0.5419, "step": 11637 }, { "epoch": 14.244798041615667, "grad_norm": 2.2751694882032103, "learning_rate": 3.1206700570902416e-08, "loss": 1.0085, "step": 11638 }, { "epoch": 14.246022031823745, "grad_norm": 2.1477813456902917, "learning_rate": 3.110583642771725e-08, "loss": 0.5855, "step": 11639 }, { "epoch": 14.247246022031824, "grad_norm": 1.2614400898889264, "learning_rate": 3.100513453112436e-08, "loss": 0.5345, "step": 11640 }, { "epoch": 14.248470012239903, "grad_norm": 2.061670784508604, "learning_rate": 3.090459488774206e-08, "loss": 1.0281, "step": 11641 }, { "epoch": 14.249694002447981, "grad_norm": 1.1692629557981942, "learning_rate": 3.080421750417728e-08, "loss": 0.6359, "step": 11642 }, { "epoch": 14.250917992656058, "grad_norm": 1.8710895142469839, "learning_rate": 3.0704002387026144e-08, "loss": 0.4581, "step": 11643 }, { "epoch": 14.252141982864137, "grad_norm": 2.0934255053045745, "learning_rate": 3.0603949542875044e-08, "loss": 0.7678, "step": 11644 }, { "epoch": 14.253365973072215, "grad_norm": 1.1561156403568145, "learning_rate": 3.050405897829817e-08, "loss": 0.3584, "step": 11645 }, { "epoch": 14.254589963280294, "grad_norm": 1.8956602501359068, "learning_rate": 3.0404330699861097e-08, "loss": 0.7012, "step": 11646 }, { "epoch": 14.255813953488373, "grad_norm": 1.1679160510875775, "learning_rate": 3.030476471411664e-08, "loss": 0.5618, "step": 11647 }, { "epoch": 14.257037943696451, "grad_norm": 3.326333466463565, "learning_rate": 3.020536102760846e-08, "loss": 0.4488, "step": 11648 }, { "epoch": 14.258261933904528, "grad_norm": 3.1786477860261986, "learning_rate": 3.0106119646868825e-08, "loss": 0.6886, "step": 11649 }, { "epoch": 14.259485924112607, "grad_norm": 1.3481901641623955, "learning_rate": 3.000704057841919e-08, "loss": 0.5045, "step": 11650 }, { "epoch": 14.260709914320685, "grad_norm": 1.6214457424078033, "learning_rate": 2.990812382877128e-08, "loss": 0.9166, "step": 11651 }, { "epoch": 14.261933904528764, "grad_norm": 1.3075330202586593, "learning_rate": 2.980936940442547e-08, "loss": 0.9705, "step": 11652 }, { "epoch": 14.263157894736842, "grad_norm": 1.7263405859211527, "learning_rate": 2.9710777311871e-08, "loss": 1.225, "step": 11653 }, { "epoch": 14.264381884944921, "grad_norm": 2.7406729117748556, "learning_rate": 2.961234755758713e-08, "loss": 0.3837, "step": 11654 }, { "epoch": 14.265605875152998, "grad_norm": 2.0452176854971413, "learning_rate": 2.951408014804258e-08, "loss": 0.5401, "step": 11655 }, { "epoch": 14.266829865361077, "grad_norm": 1.9630203490975757, "learning_rate": 2.9415975089694958e-08, "loss": 0.6869, "step": 11656 }, { "epoch": 14.268053855569155, "grad_norm": 2.6266351401127044, "learning_rate": 2.9318032388991336e-08, "loss": 0.5979, "step": 11657 }, { "epoch": 14.269277845777234, "grad_norm": 1.0761070303267677, "learning_rate": 2.9220252052368226e-08, "loss": 0.6256, "step": 11658 }, { "epoch": 14.270501835985312, "grad_norm": 2.758011128934539, "learning_rate": 2.91226340862516e-08, "loss": 0.4539, "step": 11659 }, { "epoch": 14.271725826193391, "grad_norm": 2.0305520162915047, "learning_rate": 2.902517849705605e-08, "loss": 0.9523, "step": 11660 }, { "epoch": 14.27294981640147, "grad_norm": 1.0392629944070975, "learning_rate": 2.8927885291186454e-08, "loss": 0.3421, "step": 11661 }, { "epoch": 14.274173806609546, "grad_norm": 1.566445192548322, "learning_rate": 2.8830754475036306e-08, "loss": 0.5453, "step": 11662 }, { "epoch": 14.275397796817625, "grad_norm": 1.8481900540335332, "learning_rate": 2.8733786054988554e-08, "loss": 0.4956, "step": 11663 }, { "epoch": 14.276621787025704, "grad_norm": 2.023655132184568, "learning_rate": 2.8636980037416162e-08, "loss": 0.409, "step": 11664 }, { "epoch": 14.277845777233782, "grad_norm": 1.8633761956167059, "learning_rate": 2.8540336428680427e-08, "loss": 0.4528, "step": 11665 }, { "epoch": 14.279069767441861, "grad_norm": 2.155644878627489, "learning_rate": 2.844385523513238e-08, "loss": 0.4007, "step": 11666 }, { "epoch": 14.28029375764994, "grad_norm": 3.0185114540935554, "learning_rate": 2.8347536463112502e-08, "loss": 0.4122, "step": 11667 }, { "epoch": 14.281517747858016, "grad_norm": 1.923739816677876, "learning_rate": 2.8251380118950456e-08, "loss": 0.5188, "step": 11668 }, { "epoch": 14.282741738066095, "grad_norm": 2.190569808509688, "learning_rate": 2.815538620896535e-08, "loss": 0.5384, "step": 11669 }, { "epoch": 14.283965728274174, "grad_norm": 1.8312989130086386, "learning_rate": 2.8059554739465756e-08, "loss": 0.9797, "step": 11670 }, { "epoch": 14.285189718482252, "grad_norm": 1.6146703087694012, "learning_rate": 2.7963885716748852e-08, "loss": 0.5304, "step": 11671 }, { "epoch": 14.286413708690331, "grad_norm": 1.4179713287610263, "learning_rate": 2.7868379147102108e-08, "loss": 0.6746, "step": 11672 }, { "epoch": 14.28763769889841, "grad_norm": 1.3252836946576443, "learning_rate": 2.77730350368019e-08, "loss": 0.3982, "step": 11673 }, { "epoch": 14.288861689106486, "grad_norm": 1.3988438935532834, "learning_rate": 2.7677853392113486e-08, "loss": 0.384, "step": 11674 }, { "epoch": 14.290085679314565, "grad_norm": 1.5162230799891747, "learning_rate": 2.758283421929214e-08, "loss": 0.789, "step": 11675 }, { "epoch": 14.291309669522644, "grad_norm": 2.5323268335299303, "learning_rate": 2.7487977524582043e-08, "loss": 0.3862, "step": 11676 }, { "epoch": 14.292533659730722, "grad_norm": 2.6920520886903447, "learning_rate": 2.739328331421681e-08, "loss": 0.4998, "step": 11677 }, { "epoch": 14.2937576499388, "grad_norm": 1.705254791363702, "learning_rate": 2.7298751594419527e-08, "loss": 0.3803, "step": 11678 }, { "epoch": 14.29498164014688, "grad_norm": 1.1733221769327755, "learning_rate": 2.7204382371402437e-08, "loss": 0.3651, "step": 11679 }, { "epoch": 14.296205630354958, "grad_norm": 2.015471664894745, "learning_rate": 2.71101756513667e-08, "loss": 0.4745, "step": 11680 }, { "epoch": 14.297429620563035, "grad_norm": 2.390686038277476, "learning_rate": 2.7016131440504302e-08, "loss": 0.296, "step": 11681 }, { "epoch": 14.298653610771114, "grad_norm": 2.031379861002272, "learning_rate": 2.6922249744994466e-08, "loss": 0.3875, "step": 11682 }, { "epoch": 14.299877600979192, "grad_norm": 2.165849881958494, "learning_rate": 2.6828530571006983e-08, "loss": 0.6617, "step": 11683 }, { "epoch": 14.30110159118727, "grad_norm": 1.5262334771236559, "learning_rate": 2.6734973924700815e-08, "loss": 0.6095, "step": 11684 }, { "epoch": 14.30232558139535, "grad_norm": 1.2255133684029256, "learning_rate": 2.6641579812224373e-08, "loss": 0.4723, "step": 11685 }, { "epoch": 14.303549571603428, "grad_norm": 1.434240934499408, "learning_rate": 2.6548348239714972e-08, "loss": 1.5531, "step": 11686 }, { "epoch": 14.304773561811505, "grad_norm": 1.0822822409555777, "learning_rate": 2.6455279213299655e-08, "loss": 0.3941, "step": 11687 }, { "epoch": 14.305997552019583, "grad_norm": 3.3835073281953574, "learning_rate": 2.6362372739094365e-08, "loss": 0.3683, "step": 11688 }, { "epoch": 14.307221542227662, "grad_norm": 1.3153805424307832, "learning_rate": 2.626962882320422e-08, "loss": 0.4445, "step": 11689 }, { "epoch": 14.30844553243574, "grad_norm": 1.6171247350862432, "learning_rate": 2.6177047471724337e-08, "loss": 0.5649, "step": 11690 }, { "epoch": 14.30966952264382, "grad_norm": 1.676473528603012, "learning_rate": 2.60846286907393e-08, "loss": 0.6957, "step": 11691 }, { "epoch": 14.310893512851898, "grad_norm": 1.203470415982053, "learning_rate": 2.5992372486321748e-08, "loss": 0.6893, "step": 11692 }, { "epoch": 14.312117503059975, "grad_norm": 2.6076008675748636, "learning_rate": 2.5900278864534888e-08, "loss": 0.9651, "step": 11693 }, { "epoch": 14.313341493268053, "grad_norm": 2.2134449266285725, "learning_rate": 2.5808347831430547e-08, "loss": 0.9627, "step": 11694 }, { "epoch": 14.314565483476132, "grad_norm": 1.601688109821816, "learning_rate": 2.571657939305028e-08, "loss": 0.5187, "step": 11695 }, { "epoch": 14.31578947368421, "grad_norm": 2.070009315730931, "learning_rate": 2.5624973555424816e-08, "loss": 0.4734, "step": 11696 }, { "epoch": 14.31701346389229, "grad_norm": 2.9623710786535082, "learning_rate": 2.5533530324573507e-08, "loss": 0.4923, "step": 11697 }, { "epoch": 14.318237454100368, "grad_norm": 2.661553502018858, "learning_rate": 2.544224970650655e-08, "loss": 0.5678, "step": 11698 }, { "epoch": 14.319461444308445, "grad_norm": 1.428219079259293, "learning_rate": 2.5351131707222197e-08, "loss": 0.4956, "step": 11699 }, { "epoch": 14.320685434516523, "grad_norm": 2.6141243373854324, "learning_rate": 2.5260176332708443e-08, "loss": 0.2217, "step": 11700 }, { "epoch": 14.321909424724602, "grad_norm": 1.878326016154184, "learning_rate": 2.5169383588942443e-08, "loss": 1.4938, "step": 11701 }, { "epoch": 14.32313341493268, "grad_norm": 1.8312593042792251, "learning_rate": 2.507875348189054e-08, "loss": 0.5722, "step": 11702 }, { "epoch": 14.32435740514076, "grad_norm": 1.536563667054103, "learning_rate": 2.49882860175088e-08, "loss": 1.0675, "step": 11703 }, { "epoch": 14.325581395348838, "grad_norm": 1.425857044544653, "learning_rate": 2.489798120174275e-08, "loss": 0.3611, "step": 11704 }, { "epoch": 14.326805385556916, "grad_norm": 1.9717003876714219, "learning_rate": 2.4807839040526527e-08, "loss": 0.9571, "step": 11705 }, { "epoch": 14.328029375764993, "grad_norm": 1.9549049337500595, "learning_rate": 2.4717859539784006e-08, "loss": 0.4509, "step": 11706 }, { "epoch": 14.329253365973072, "grad_norm": 3.000171286213234, "learning_rate": 2.462804270542851e-08, "loss": 0.951, "step": 11707 }, { "epoch": 14.33047735618115, "grad_norm": 2.096143845073606, "learning_rate": 2.453838854336199e-08, "loss": 0.7605, "step": 11708 }, { "epoch": 14.331701346389229, "grad_norm": 2.183883198097502, "learning_rate": 2.444889705947667e-08, "loss": 0.4002, "step": 11709 }, { "epoch": 14.332925336597308, "grad_norm": 1.5109545455747402, "learning_rate": 2.435956825965341e-08, "loss": 0.6144, "step": 11710 }, { "epoch": 14.334149326805386, "grad_norm": 1.7705403756815545, "learning_rate": 2.4270402149762506e-08, "loss": 1.3939, "step": 11711 }, { "epoch": 14.335373317013463, "grad_norm": 1.255307885829857, "learning_rate": 2.4181398735663442e-08, "loss": 0.5066, "step": 11712 }, { "epoch": 14.336597307221542, "grad_norm": 2.1887185896535266, "learning_rate": 2.4092558023205703e-08, "loss": 0.8053, "step": 11713 }, { "epoch": 14.33782129742962, "grad_norm": 2.947993249459628, "learning_rate": 2.40038800182274e-08, "loss": 0.347, "step": 11714 }, { "epoch": 14.339045287637699, "grad_norm": 1.7214416567497126, "learning_rate": 2.391536472655581e-08, "loss": 0.5546, "step": 11715 }, { "epoch": 14.340269277845778, "grad_norm": 1.094444799582268, "learning_rate": 2.3827012154007957e-08, "loss": 0.587, "step": 11716 }, { "epoch": 14.341493268053856, "grad_norm": 1.9003232496011773, "learning_rate": 2.3738822306390575e-08, "loss": 0.6058, "step": 11717 }, { "epoch": 14.342717258261933, "grad_norm": 2.0278507527153016, "learning_rate": 2.3650795189498478e-08, "loss": 1.1802, "step": 11718 }, { "epoch": 14.343941248470012, "grad_norm": 2.2058271188254324, "learning_rate": 2.356293080911648e-08, "loss": 1.0622, "step": 11719 }, { "epoch": 14.34516523867809, "grad_norm": 1.3933374008279338, "learning_rate": 2.3475229171019132e-08, "loss": 0.4582, "step": 11720 }, { "epoch": 14.346389228886169, "grad_norm": 1.6051290188013752, "learning_rate": 2.3387690280969876e-08, "loss": 0.9236, "step": 11721 }, { "epoch": 14.347613219094248, "grad_norm": 2.463631406373448, "learning_rate": 2.330031414472106e-08, "loss": 0.581, "step": 11722 }, { "epoch": 14.348837209302326, "grad_norm": 2.5316313477642587, "learning_rate": 2.3213100768015308e-08, "loss": 0.9192, "step": 11723 }, { "epoch": 14.350061199510403, "grad_norm": 1.652838283588307, "learning_rate": 2.3126050156583034e-08, "loss": 0.4709, "step": 11724 }, { "epoch": 14.351285189718482, "grad_norm": 2.0461511259403755, "learning_rate": 2.303916231614578e-08, "loss": 0.5108, "step": 11725 }, { "epoch": 14.35250917992656, "grad_norm": 2.6728897673054326, "learning_rate": 2.2952437252412863e-08, "loss": 0.6141, "step": 11726 }, { "epoch": 14.353733170134639, "grad_norm": 1.9605931347826409, "learning_rate": 2.286587497108389e-08, "loss": 0.6778, "step": 11727 }, { "epoch": 14.354957160342718, "grad_norm": 1.411126660408548, "learning_rate": 2.2779475477847368e-08, "loss": 0.4454, "step": 11728 }, { "epoch": 14.356181150550796, "grad_norm": 2.3295372341044795, "learning_rate": 2.2693238778381254e-08, "loss": 0.8104, "step": 11729 }, { "epoch": 14.357405140758875, "grad_norm": 1.5240227712258276, "learning_rate": 2.2607164878352405e-08, "loss": 1.3019, "step": 11730 }, { "epoch": 14.358629130966952, "grad_norm": 1.2428271538947602, "learning_rate": 2.2521253783417406e-08, "loss": 0.5789, "step": 11731 }, { "epoch": 14.35985312117503, "grad_norm": 2.326192198932301, "learning_rate": 2.2435505499222023e-08, "loss": 0.476, "step": 11732 }, { "epoch": 14.361077111383109, "grad_norm": 3.8985033997031717, "learning_rate": 2.2349920031401463e-08, "loss": 0.4136, "step": 11733 }, { "epoch": 14.362301101591187, "grad_norm": 2.005983974230653, "learning_rate": 2.226449738558012e-08, "loss": 0.3536, "step": 11734 }, { "epoch": 14.363525091799266, "grad_norm": 1.3399811877289287, "learning_rate": 2.217923756737128e-08, "loss": 0.4874, "step": 11735 }, { "epoch": 14.364749082007345, "grad_norm": 1.3984865811348874, "learning_rate": 2.209414058237824e-08, "loss": 0.5025, "step": 11736 }, { "epoch": 14.365973072215422, "grad_norm": 2.2487304932718466, "learning_rate": 2.200920643619292e-08, "loss": 0.8143, "step": 11737 }, { "epoch": 14.3671970624235, "grad_norm": 2.4424634801401615, "learning_rate": 2.1924435134397237e-08, "loss": 0.4624, "step": 11738 }, { "epoch": 14.368421052631579, "grad_norm": 1.8771345416346272, "learning_rate": 2.1839826682562015e-08, "loss": 0.4934, "step": 11739 }, { "epoch": 14.369645042839657, "grad_norm": 1.242761941608854, "learning_rate": 2.175538108624725e-08, "loss": 0.8384, "step": 11740 }, { "epoch": 14.370869033047736, "grad_norm": 1.0988167645982703, "learning_rate": 2.1671098351002394e-08, "loss": 0.5038, "step": 11741 }, { "epoch": 14.372093023255815, "grad_norm": 1.7175959759758357, "learning_rate": 2.1586978482366072e-08, "loss": 0.5412, "step": 11742 }, { "epoch": 14.373317013463891, "grad_norm": 1.8701375613710491, "learning_rate": 2.1503021485866914e-08, "loss": 1.003, "step": 11743 }, { "epoch": 14.37454100367197, "grad_norm": 1.2633508770820023, "learning_rate": 2.1419227367021624e-08, "loss": 0.4675, "step": 11744 }, { "epoch": 14.375764993880049, "grad_norm": 3.6372614061318895, "learning_rate": 2.13355961313369e-08, "loss": 0.4024, "step": 11745 }, { "epoch": 14.376988984088127, "grad_norm": 2.522834002658433, "learning_rate": 2.1252127784309183e-08, "loss": 0.6006, "step": 11746 }, { "epoch": 14.378212974296206, "grad_norm": 1.1406569759017657, "learning_rate": 2.1168822331423255e-08, "loss": 0.5339, "step": 11747 }, { "epoch": 14.379436964504285, "grad_norm": 1.9533712713502616, "learning_rate": 2.1085679778153622e-08, "loss": 0.9957, "step": 11748 }, { "epoch": 14.380660954712361, "grad_norm": 2.5569573918808204, "learning_rate": 2.1002700129964527e-08, "loss": 0.5301, "step": 11749 }, { "epoch": 14.38188494492044, "grad_norm": 2.598140699947736, "learning_rate": 2.0919883392308548e-08, "loss": 0.5079, "step": 11750 }, { "epoch": 14.383108935128519, "grad_norm": 1.5535158980739432, "learning_rate": 2.0837229570628283e-08, "loss": 0.8665, "step": 11751 }, { "epoch": 14.384332925336597, "grad_norm": 3.010698931818374, "learning_rate": 2.0754738670355767e-08, "loss": 0.8149, "step": 11752 }, { "epoch": 14.385556915544676, "grad_norm": 1.6567035073889302, "learning_rate": 2.067241069691167e-08, "loss": 0.3504, "step": 11753 }, { "epoch": 14.386780905752754, "grad_norm": 1.337612912356196, "learning_rate": 2.0590245655706386e-08, "loss": 0.598, "step": 11754 }, { "epoch": 14.388004895960833, "grad_norm": 2.458712013143854, "learning_rate": 2.0508243552139206e-08, "loss": 0.491, "step": 11755 }, { "epoch": 14.38922888616891, "grad_norm": 1.5971211357046111, "learning_rate": 2.0426404391599432e-08, "loss": 1.1109, "step": 11756 }, { "epoch": 14.390452876376989, "grad_norm": 1.4175683273732314, "learning_rate": 2.0344728179464977e-08, "loss": 0.3542, "step": 11757 }, { "epoch": 14.391676866585067, "grad_norm": 2.202986575845754, "learning_rate": 2.0263214921103503e-08, "loss": 0.5061, "step": 11758 }, { "epoch": 14.392900856793146, "grad_norm": 1.5589195402199008, "learning_rate": 2.018186462187155e-08, "loss": 0.5397, "step": 11759 }, { "epoch": 14.394124847001224, "grad_norm": 2.210875263394031, "learning_rate": 2.01006772871154e-08, "loss": 0.7073, "step": 11760 }, { "epoch": 14.395348837209303, "grad_norm": 1.4550626386564818, "learning_rate": 2.0019652922169953e-08, "loss": 1.1252, "step": 11761 }, { "epoch": 14.39657282741738, "grad_norm": 1.60376420276922, "learning_rate": 1.993879153236039e-08, "loss": 0.4212, "step": 11762 }, { "epoch": 14.397796817625458, "grad_norm": 2.267791986931107, "learning_rate": 1.9858093122999965e-08, "loss": 0.4612, "step": 11763 }, { "epoch": 14.399020807833537, "grad_norm": 1.1373166688957719, "learning_rate": 1.977755769939277e-08, "loss": 0.3834, "step": 11764 }, { "epoch": 14.400244798041616, "grad_norm": 2.7349403066868194, "learning_rate": 1.9697185266830397e-08, "loss": 0.7153, "step": 11765 }, { "epoch": 14.401468788249694, "grad_norm": 1.996663612541804, "learning_rate": 1.961697583059502e-08, "loss": 1.0988, "step": 11766 }, { "epoch": 14.402692778457773, "grad_norm": 2.707934784550659, "learning_rate": 1.9536929395957693e-08, "loss": 0.3385, "step": 11767 }, { "epoch": 14.403916768665852, "grad_norm": 3.08854396899621, "learning_rate": 1.9457045968178655e-08, "loss": 0.3931, "step": 11768 }, { "epoch": 14.405140758873928, "grad_norm": 1.7061962053028807, "learning_rate": 1.9377325552507875e-08, "loss": 0.5041, "step": 11769 }, { "epoch": 14.406364749082007, "grad_norm": 1.6234895462022758, "learning_rate": 1.9297768154184217e-08, "loss": 1.5378, "step": 11770 }, { "epoch": 14.407588739290086, "grad_norm": 0.8689854084722297, "learning_rate": 1.9218373778435438e-08, "loss": 0.3484, "step": 11771 }, { "epoch": 14.408812729498164, "grad_norm": 2.0296730210407166, "learning_rate": 1.9139142430479596e-08, "loss": 1.0414, "step": 11772 }, { "epoch": 14.410036719706243, "grad_norm": 2.4730986206468595, "learning_rate": 1.9060074115523075e-08, "loss": 0.4559, "step": 11773 }, { "epoch": 14.411260709914322, "grad_norm": 1.368271376042541, "learning_rate": 1.8981168838762276e-08, "loss": 0.39, "step": 11774 }, { "epoch": 14.412484700122398, "grad_norm": 1.764557077058254, "learning_rate": 1.890242660538222e-08, "loss": 0.6112, "step": 11775 }, { "epoch": 14.413708690330477, "grad_norm": 2.8675963537706797, "learning_rate": 1.8823847420557928e-08, "loss": 0.4314, "step": 11776 }, { "epoch": 14.414932680538556, "grad_norm": 1.8984365108407846, "learning_rate": 1.8745431289453054e-08, "loss": 0.5396, "step": 11777 }, { "epoch": 14.416156670746634, "grad_norm": 1.1476624182569166, "learning_rate": 1.8667178217220972e-08, "loss": 0.5751, "step": 11778 }, { "epoch": 14.417380660954713, "grad_norm": 2.005957403813264, "learning_rate": 1.8589088209003957e-08, "loss": 0.5164, "step": 11779 }, { "epoch": 14.418604651162791, "grad_norm": 1.2404171614270372, "learning_rate": 1.851116126993402e-08, "loss": 0.4981, "step": 11780 }, { "epoch": 14.419828641370868, "grad_norm": 1.1480485945897576, "learning_rate": 1.8433397405132336e-08, "loss": 0.6582, "step": 11781 }, { "epoch": 14.421052631578947, "grad_norm": 1.8835107570680054, "learning_rate": 1.8355796619708988e-08, "loss": 0.978, "step": 11782 }, { "epoch": 14.422276621787026, "grad_norm": 1.4946091528276997, "learning_rate": 1.8278358918763507e-08, "loss": 0.7933, "step": 11783 }, { "epoch": 14.423500611995104, "grad_norm": 2.0859782857346985, "learning_rate": 1.8201084307385152e-08, "loss": 0.5897, "step": 11784 }, { "epoch": 14.424724602203183, "grad_norm": 1.7032582920274157, "learning_rate": 1.812397279065209e-08, "loss": 1.2233, "step": 11785 }, { "epoch": 14.425948592411261, "grad_norm": 3.686123481091353, "learning_rate": 1.804702437363165e-08, "loss": 0.3361, "step": 11786 }, { "epoch": 14.427172582619338, "grad_norm": 2.2311141265503185, "learning_rate": 1.79702390613809e-08, "loss": 1.0975, "step": 11787 }, { "epoch": 14.428396572827417, "grad_norm": 1.3050973454066233, "learning_rate": 1.7893616858945527e-08, "loss": 0.7575, "step": 11788 }, { "epoch": 14.429620563035495, "grad_norm": 1.613549286430271, "learning_rate": 1.7817157771360672e-08, "loss": 0.5569, "step": 11789 }, { "epoch": 14.430844553243574, "grad_norm": 1.726274143875277, "learning_rate": 1.774086180365148e-08, "loss": 0.9227, "step": 11790 }, { "epoch": 14.432068543451653, "grad_norm": 3.199461748609514, "learning_rate": 1.766472896083171e-08, "loss": 0.3655, "step": 11791 }, { "epoch": 14.433292533659731, "grad_norm": 2.38988802438142, "learning_rate": 1.7588759247904318e-08, "loss": 0.366, "step": 11792 }, { "epoch": 14.43451652386781, "grad_norm": 1.8449225782756353, "learning_rate": 1.751295266986197e-08, "loss": 0.4016, "step": 11793 }, { "epoch": 14.435740514075887, "grad_norm": 1.249866727807637, "learning_rate": 1.743730923168624e-08, "loss": 0.5958, "step": 11794 }, { "epoch": 14.436964504283965, "grad_norm": 1.7649587090711278, "learning_rate": 1.7361828938348146e-08, "loss": 0.424, "step": 11795 }, { "epoch": 14.438188494492044, "grad_norm": 1.901882166204179, "learning_rate": 1.728651179480789e-08, "loss": 1.584, "step": 11796 }, { "epoch": 14.439412484700123, "grad_norm": 1.136078275604049, "learning_rate": 1.7211357806015128e-08, "loss": 0.6762, "step": 11797 }, { "epoch": 14.440636474908201, "grad_norm": 1.9187996126188502, "learning_rate": 1.713636697690896e-08, "loss": 1.0033, "step": 11798 }, { "epoch": 14.44186046511628, "grad_norm": 2.4646574683137126, "learning_rate": 1.7061539312417107e-08, "loss": 0.4362, "step": 11799 }, { "epoch": 14.443084455324357, "grad_norm": 2.512580202228061, "learning_rate": 1.698687481745731e-08, "loss": 0.4129, "step": 11800 }, { "epoch": 14.444308445532435, "grad_norm": 2.1536094623987103, "learning_rate": 1.691237349693592e-08, "loss": 0.5839, "step": 11801 }, { "epoch": 14.445532435740514, "grad_norm": 1.3789985928296056, "learning_rate": 1.6838035355749017e-08, "loss": 0.5712, "step": 11802 }, { "epoch": 14.446756425948593, "grad_norm": 3.4174334077344595, "learning_rate": 1.6763860398781863e-08, "loss": 0.334, "step": 11803 }, { "epoch": 14.447980416156671, "grad_norm": 1.7336034070909205, "learning_rate": 1.6689848630909166e-08, "loss": 0.5473, "step": 11804 }, { "epoch": 14.44920440636475, "grad_norm": 2.375920381884651, "learning_rate": 1.6616000056994264e-08, "loss": 0.4302, "step": 11805 }, { "epoch": 14.450428396572827, "grad_norm": 2.4810399807205457, "learning_rate": 1.654231468189049e-08, "loss": 0.5688, "step": 11806 }, { "epoch": 14.451652386780905, "grad_norm": 1.4394338433680323, "learning_rate": 1.6468792510440368e-08, "loss": 0.7348, "step": 11807 }, { "epoch": 14.452876376988984, "grad_norm": 1.8965097914352775, "learning_rate": 1.6395433547475025e-08, "loss": 0.5376, "step": 11808 }, { "epoch": 14.454100367197062, "grad_norm": 2.1341089191605405, "learning_rate": 1.6322237797815332e-08, "loss": 1.0636, "step": 11809 }, { "epoch": 14.455324357405141, "grad_norm": 2.0684190606030652, "learning_rate": 1.624920526627216e-08, "loss": 0.526, "step": 11810 }, { "epoch": 14.45654834761322, "grad_norm": 1.7125353162899737, "learning_rate": 1.6176335957644174e-08, "loss": 0.503, "step": 11811 }, { "epoch": 14.457772337821297, "grad_norm": 1.4007636509966792, "learning_rate": 1.6103629876720318e-08, "loss": 0.8919, "step": 11812 }, { "epoch": 14.458996328029375, "grad_norm": 3.234711150044288, "learning_rate": 1.6031087028278712e-08, "loss": 0.3323, "step": 11813 }, { "epoch": 14.460220318237454, "grad_norm": 1.4460708387005283, "learning_rate": 1.595870741708666e-08, "loss": 0.6733, "step": 11814 }, { "epoch": 14.461444308445532, "grad_norm": 1.4269761311710913, "learning_rate": 1.5886491047900076e-08, "loss": 0.6901, "step": 11815 }, { "epoch": 14.462668298653611, "grad_norm": 3.9653498934754503, "learning_rate": 1.581443792546544e-08, "loss": 0.3425, "step": 11816 }, { "epoch": 14.46389228886169, "grad_norm": 1.308485454500981, "learning_rate": 1.574254805451786e-08, "loss": 0.5265, "step": 11817 }, { "epoch": 14.465116279069768, "grad_norm": 1.266865276911998, "learning_rate": 1.567082143978105e-08, "loss": 0.474, "step": 11818 }, { "epoch": 14.466340269277845, "grad_norm": 3.1993644843224747, "learning_rate": 1.5599258085969016e-08, "loss": 0.4501, "step": 11819 }, { "epoch": 14.467564259485924, "grad_norm": 2.712878966537097, "learning_rate": 1.5527857997784668e-08, "loss": 0.3871, "step": 11820 }, { "epoch": 14.468788249694002, "grad_norm": 1.6318394739688387, "learning_rate": 1.5456621179920084e-08, "loss": 0.506, "step": 11821 }, { "epoch": 14.470012239902081, "grad_norm": 1.7845639001870235, "learning_rate": 1.5385547637056518e-08, "loss": 0.2996, "step": 11822 }, { "epoch": 14.47123623011016, "grad_norm": 0.9936944391624243, "learning_rate": 1.531463737386496e-08, "loss": 0.4905, "step": 11823 }, { "epoch": 14.472460220318238, "grad_norm": 1.1731937438076323, "learning_rate": 1.5243890395005012e-08, "loss": 0.5994, "step": 11824 }, { "epoch": 14.473684210526315, "grad_norm": 1.5332970122576097, "learning_rate": 1.517330670512629e-08, "loss": 0.7045, "step": 11825 }, { "epoch": 14.474908200734394, "grad_norm": 2.467300168526655, "learning_rate": 1.5102886308867027e-08, "loss": 0.5305, "step": 11826 }, { "epoch": 14.476132190942472, "grad_norm": 2.6390742502230475, "learning_rate": 1.5032629210855188e-08, "loss": 0.3123, "step": 11827 }, { "epoch": 14.477356181150551, "grad_norm": 1.7540093059336526, "learning_rate": 1.4962535415707634e-08, "loss": 0.5173, "step": 11828 }, { "epoch": 14.47858017135863, "grad_norm": 0.972641891192008, "learning_rate": 1.4892604928030685e-08, "loss": 0.4371, "step": 11829 }, { "epoch": 14.479804161566708, "grad_norm": 1.5555448608481006, "learning_rate": 1.4822837752420106e-08, "loss": 1.396, "step": 11830 }, { "epoch": 14.481028151774785, "grad_norm": 2.1405433933521834, "learning_rate": 1.4753233893460561e-08, "loss": 0.4216, "step": 11831 }, { "epoch": 14.482252141982864, "grad_norm": 2.9165378635399026, "learning_rate": 1.4683793355725895e-08, "loss": 0.2022, "step": 11832 }, { "epoch": 14.483476132190942, "grad_norm": 2.901719301365012, "learning_rate": 1.4614516143780233e-08, "loss": 1.051, "step": 11833 }, { "epoch": 14.48470012239902, "grad_norm": 3.0236898440651006, "learning_rate": 1.454540226217549e-08, "loss": 0.7308, "step": 11834 }, { "epoch": 14.4859241126071, "grad_norm": 2.3528956915174, "learning_rate": 1.4476451715453865e-08, "loss": 0.5211, "step": 11835 }, { "epoch": 14.487148102815178, "grad_norm": 1.7657759389375858, "learning_rate": 1.4407664508146456e-08, "loss": 0.9009, "step": 11836 }, { "epoch": 14.488372093023255, "grad_norm": 1.4764207347625293, "learning_rate": 1.4339040644774094e-08, "loss": 0.4252, "step": 11837 }, { "epoch": 14.489596083231334, "grad_norm": 1.156354300069392, "learning_rate": 1.4270580129845946e-08, "loss": 0.6266, "step": 11838 }, { "epoch": 14.490820073439412, "grad_norm": 1.5976375105950849, "learning_rate": 1.4202282967861192e-08, "loss": 0.5217, "step": 11839 }, { "epoch": 14.49204406364749, "grad_norm": 1.8725890922041513, "learning_rate": 1.4134149163307908e-08, "loss": 0.9049, "step": 11840 }, { "epoch": 14.49326805385557, "grad_norm": 2.325899575857879, "learning_rate": 1.4066178720663904e-08, "loss": 0.5483, "step": 11841 }, { "epoch": 14.494492044063648, "grad_norm": 1.7233622950815752, "learning_rate": 1.399837164439588e-08, "loss": 0.7079, "step": 11842 }, { "epoch": 14.495716034271727, "grad_norm": 1.6373487622783311, "learning_rate": 1.393072793895972e-08, "loss": 0.3732, "step": 11843 }, { "epoch": 14.496940024479803, "grad_norm": 1.5393461812813325, "learning_rate": 1.3863247608800756e-08, "loss": 0.7452, "step": 11844 }, { "epoch": 14.498164014687882, "grad_norm": 1.5735384743488203, "learning_rate": 1.3795930658353495e-08, "loss": 0.5683, "step": 11845 }, { "epoch": 14.49938800489596, "grad_norm": 1.4071899566763624, "learning_rate": 1.3728777092042179e-08, "loss": 0.7336, "step": 11846 }, { "epoch": 14.50061199510404, "grad_norm": 1.1644197447096472, "learning_rate": 1.3661786914279384e-08, "loss": 0.4126, "step": 11847 }, { "epoch": 14.501835985312118, "grad_norm": 1.9215345178696646, "learning_rate": 1.3594960129467427e-08, "loss": 0.4465, "step": 11848 }, { "epoch": 14.503059975520197, "grad_norm": 1.4384645458134662, "learning_rate": 1.352829674199807e-08, "loss": 1.5609, "step": 11849 }, { "epoch": 14.504283965728273, "grad_norm": 1.4054785686746254, "learning_rate": 1.3461796756252531e-08, "loss": 0.4362, "step": 11850 }, { "epoch": 14.505507955936352, "grad_norm": 2.728758969139984, "learning_rate": 1.339546017660065e-08, "loss": 0.5048, "step": 11851 }, { "epoch": 14.50673194614443, "grad_norm": 1.477308892430505, "learning_rate": 1.3329287007401715e-08, "loss": 1.3063, "step": 11852 }, { "epoch": 14.50795593635251, "grad_norm": 1.6010467500319885, "learning_rate": 1.326327725300447e-08, "loss": 0.3175, "step": 11853 }, { "epoch": 14.509179926560588, "grad_norm": 1.9201227859345615, "learning_rate": 1.3197430917746557e-08, "loss": 0.7383, "step": 11854 }, { "epoch": 14.510403916768666, "grad_norm": 1.4805472450593995, "learning_rate": 1.3131748005955903e-08, "loss": 0.5399, "step": 11855 }, { "epoch": 14.511627906976745, "grad_norm": 1.38502471310643, "learning_rate": 1.3066228521948221e-08, "loss": 0.6665, "step": 11856 }, { "epoch": 14.512851897184822, "grad_norm": 2.5982048097192707, "learning_rate": 1.3000872470029513e-08, "loss": 0.4914, "step": 11857 }, { "epoch": 14.5140758873929, "grad_norm": 1.5884836363063317, "learning_rate": 1.2935679854494677e-08, "loss": 0.6063, "step": 11858 }, { "epoch": 14.51529987760098, "grad_norm": 2.111370198701462, "learning_rate": 1.2870650679628061e-08, "loss": 0.3139, "step": 11859 }, { "epoch": 14.516523867809058, "grad_norm": 3.0783092234918326, "learning_rate": 1.2805784949702916e-08, "loss": 0.3329, "step": 11860 }, { "epoch": 14.517747858017136, "grad_norm": 1.8290989754725602, "learning_rate": 1.2741082668982219e-08, "loss": 1.0947, "step": 11861 }, { "epoch": 14.518971848225215, "grad_norm": 1.6199585479820948, "learning_rate": 1.267654384171757e-08, "loss": 2.2747, "step": 11862 }, { "epoch": 14.520195838433292, "grad_norm": 2.420293015904689, "learning_rate": 1.2612168472150577e-08, "loss": 0.4903, "step": 11863 }, { "epoch": 14.52141982864137, "grad_norm": 1.3422236150452398, "learning_rate": 1.254795656451202e-08, "loss": 0.77, "step": 11864 }, { "epoch": 14.522643818849449, "grad_norm": 1.5390193212206982, "learning_rate": 1.2483908123021027e-08, "loss": 0.9435, "step": 11865 }, { "epoch": 14.523867809057528, "grad_norm": 1.8621453150722713, "learning_rate": 1.2420023151886729e-08, "loss": 0.5602, "step": 11866 }, { "epoch": 14.525091799265606, "grad_norm": 2.8605052832350903, "learning_rate": 1.2356301655307711e-08, "loss": 0.4965, "step": 11867 }, { "epoch": 14.526315789473685, "grad_norm": 1.54081937197799, "learning_rate": 1.2292743637471461e-08, "loss": 0.8848, "step": 11868 }, { "epoch": 14.527539779681762, "grad_norm": 1.5147516651083055, "learning_rate": 1.2229349102554633e-08, "loss": 0.524, "step": 11869 }, { "epoch": 14.52876376988984, "grad_norm": 1.4956979398730028, "learning_rate": 1.216611805472362e-08, "loss": 1.1114, "step": 11870 }, { "epoch": 14.529987760097919, "grad_norm": 1.1498299068457392, "learning_rate": 1.2103050498133151e-08, "loss": 0.5324, "step": 11871 }, { "epoch": 14.531211750305998, "grad_norm": 3.6704461662364696, "learning_rate": 1.2040146436928247e-08, "loss": 0.4145, "step": 11872 }, { "epoch": 14.532435740514076, "grad_norm": 1.690375561608401, "learning_rate": 1.1977405875242543e-08, "loss": 0.4685, "step": 11873 }, { "epoch": 14.533659730722155, "grad_norm": 1.625648613037077, "learning_rate": 1.1914828817199131e-08, "loss": 0.4713, "step": 11874 }, { "epoch": 14.534883720930232, "grad_norm": 1.87470599450634, "learning_rate": 1.1852415266910555e-08, "loss": 1.0122, "step": 11875 }, { "epoch": 14.53610771113831, "grad_norm": 1.1687518117412161, "learning_rate": 1.1790165228477979e-08, "loss": 0.8442, "step": 11876 }, { "epoch": 14.537331701346389, "grad_norm": 2.268592143382809, "learning_rate": 1.1728078705992574e-08, "loss": 0.4772, "step": 11877 }, { "epoch": 14.538555691554468, "grad_norm": 2.388133926293711, "learning_rate": 1.166615570353441e-08, "loss": 0.3201, "step": 11878 }, { "epoch": 14.539779681762546, "grad_norm": 2.7157227569830136, "learning_rate": 1.1604396225173008e-08, "loss": 0.3893, "step": 11879 }, { "epoch": 14.541003671970625, "grad_norm": 1.7440937413547302, "learning_rate": 1.1542800274966238e-08, "loss": 0.4353, "step": 11880 }, { "epoch": 14.542227662178703, "grad_norm": 3.377514025499077, "learning_rate": 1.1481367856962799e-08, "loss": 0.5826, "step": 11881 }, { "epoch": 14.54345165238678, "grad_norm": 3.2201798006659974, "learning_rate": 1.1420098975199467e-08, "loss": 0.2938, "step": 11882 }, { "epoch": 14.544675642594859, "grad_norm": 2.1574853631045228, "learning_rate": 1.1358993633702742e-08, "loss": 0.3777, "step": 11883 }, { "epoch": 14.545899632802938, "grad_norm": 1.5698418038342512, "learning_rate": 1.1298051836487744e-08, "loss": 1.209, "step": 11884 }, { "epoch": 14.547123623011016, "grad_norm": 2.0541512849196963, "learning_rate": 1.1237273587559883e-08, "loss": 0.6165, "step": 11885 }, { "epoch": 14.548347613219095, "grad_norm": 2.025850129381251, "learning_rate": 1.1176658890913183e-08, "loss": 0.7398, "step": 11886 }, { "epoch": 14.549571603427173, "grad_norm": 2.5164853719986344, "learning_rate": 1.1116207750530849e-08, "loss": 0.4526, "step": 11887 }, { "epoch": 14.55079559363525, "grad_norm": 2.5613033165525025, "learning_rate": 1.1055920170385815e-08, "loss": 0.5504, "step": 11888 }, { "epoch": 14.552019583843329, "grad_norm": 2.1078180559131594, "learning_rate": 1.0995796154439353e-08, "loss": 0.5385, "step": 11889 }, { "epoch": 14.553243574051407, "grad_norm": 1.7492266003685322, "learning_rate": 1.0935835706643028e-08, "loss": 1.0756, "step": 11890 }, { "epoch": 14.554467564259486, "grad_norm": 2.5540251074783464, "learning_rate": 1.0876038830937296e-08, "loss": 1.2731, "step": 11891 }, { "epoch": 14.555691554467565, "grad_norm": 1.8211669417351077, "learning_rate": 1.0816405531251794e-08, "loss": 0.6246, "step": 11892 }, { "epoch": 14.556915544675643, "grad_norm": 2.0677350341790794, "learning_rate": 1.0756935811505054e-08, "loss": 0.6275, "step": 11893 }, { "epoch": 14.55813953488372, "grad_norm": 1.7977723299605575, "learning_rate": 1.0697629675605614e-08, "loss": 0.8195, "step": 11894 }, { "epoch": 14.559363525091799, "grad_norm": 2.756020941164699, "learning_rate": 1.0638487127450636e-08, "loss": 0.3941, "step": 11895 }, { "epoch": 14.560587515299877, "grad_norm": 2.7268879397644956, "learning_rate": 1.0579508170926734e-08, "loss": 0.3765, "step": 11896 }, { "epoch": 14.561811505507956, "grad_norm": 2.5307430494653174, "learning_rate": 1.0520692809909693e-08, "loss": 0.5388, "step": 11897 }, { "epoch": 14.563035495716035, "grad_norm": 3.660668411193295, "learning_rate": 1.0462041048264759e-08, "loss": 0.7531, "step": 11898 }, { "epoch": 14.564259485924113, "grad_norm": 2.650056337336155, "learning_rate": 1.0403552889846624e-08, "loss": 0.8471, "step": 11899 }, { "epoch": 14.56548347613219, "grad_norm": 1.787920617403508, "learning_rate": 1.0345228338498325e-08, "loss": 0.4947, "step": 11900 }, { "epoch": 14.566707466340269, "grad_norm": 1.4050997129385918, "learning_rate": 1.0287067398053185e-08, "loss": 1.3264, "step": 11901 }, { "epoch": 14.567931456548347, "grad_norm": 1.556164719124341, "learning_rate": 1.0229070072332869e-08, "loss": 0.7102, "step": 11902 }, { "epoch": 14.569155446756426, "grad_norm": 1.7056940569171484, "learning_rate": 1.0171236365149329e-08, "loss": 0.4475, "step": 11903 }, { "epoch": 14.570379436964505, "grad_norm": 3.093813583841516, "learning_rate": 1.0113566280302855e-08, "loss": 0.8997, "step": 11904 }, { "epoch": 14.571603427172583, "grad_norm": 2.3997118108888413, "learning_rate": 1.0056059821583475e-08, "loss": 0.5994, "step": 11905 }, { "epoch": 14.572827417380662, "grad_norm": 2.1259527008949064, "learning_rate": 9.998716992770107e-09, "loss": 0.6087, "step": 11906 }, { "epoch": 14.574051407588739, "grad_norm": 1.6915588935346049, "learning_rate": 9.941537797631129e-09, "loss": 1.6306, "step": 11907 }, { "epoch": 14.575275397796817, "grad_norm": 2.5364224234102584, "learning_rate": 9.884522239924088e-09, "loss": 0.7041, "step": 11908 }, { "epoch": 14.576499388004896, "grad_norm": 1.616157646868152, "learning_rate": 9.827670323395988e-09, "loss": 0.7118, "step": 11909 }, { "epoch": 14.577723378212974, "grad_norm": 2.0087992094288336, "learning_rate": 9.770982051783006e-09, "loss": 1.1018, "step": 11910 }, { "epoch": 14.578947368421053, "grad_norm": 2.273333147234298, "learning_rate": 9.71445742881022e-09, "loss": 0.5036, "step": 11911 }, { "epoch": 14.580171358629132, "grad_norm": 2.106530816330172, "learning_rate": 9.658096458192435e-09, "loss": 0.8463, "step": 11912 }, { "epoch": 14.581395348837209, "grad_norm": 2.1911559966597234, "learning_rate": 9.60189914363363e-09, "loss": 0.5279, "step": 11913 }, { "epoch": 14.582619339045287, "grad_norm": 1.6590976725928632, "learning_rate": 9.545865488826411e-09, "loss": 0.5939, "step": 11914 }, { "epoch": 14.583843329253366, "grad_norm": 1.5869074758027664, "learning_rate": 9.489995497453387e-09, "loss": 1.2542, "step": 11915 }, { "epoch": 14.585067319461444, "grad_norm": 0.8688571252077006, "learning_rate": 9.434289173186062e-09, "loss": 0.4307, "step": 11916 }, { "epoch": 14.586291309669523, "grad_norm": 1.3842279746139179, "learning_rate": 9.378746519685399e-09, "loss": 1.0849, "step": 11917 }, { "epoch": 14.587515299877602, "grad_norm": 2.421870413006182, "learning_rate": 9.323367540601259e-09, "loss": 0.2802, "step": 11918 }, { "epoch": 14.588739290085678, "grad_norm": 1.8400634518648125, "learning_rate": 9.268152239572947e-09, "loss": 0.5393, "step": 11919 }, { "epoch": 14.589963280293757, "grad_norm": 1.79413541723308, "learning_rate": 9.213100620229231e-09, "loss": 1.3544, "step": 11920 }, { "epoch": 14.591187270501836, "grad_norm": 2.710439080452024, "learning_rate": 9.158212686187773e-09, "loss": 0.9037, "step": 11921 }, { "epoch": 14.592411260709914, "grad_norm": 1.8426638880890254, "learning_rate": 9.103488441055408e-09, "loss": 0.9238, "step": 11922 }, { "epoch": 14.593635250917993, "grad_norm": 2.1596562994816866, "learning_rate": 9.048927888428981e-09, "loss": 1.1979, "step": 11923 }, { "epoch": 14.594859241126072, "grad_norm": 3.234753870796488, "learning_rate": 8.994531031893406e-09, "loss": 0.5026, "step": 11924 }, { "epoch": 14.596083231334148, "grad_norm": 2.232730173339315, "learning_rate": 8.940297875023596e-09, "loss": 0.3686, "step": 11925 }, { "epoch": 14.597307221542227, "grad_norm": 1.7933304550002303, "learning_rate": 8.886228421383647e-09, "loss": 0.523, "step": 11926 }, { "epoch": 14.598531211750306, "grad_norm": 2.070882561295051, "learning_rate": 8.832322674526827e-09, "loss": 0.7251, "step": 11927 }, { "epoch": 14.599755201958384, "grad_norm": 2.234513769421013, "learning_rate": 8.778580637995305e-09, "loss": 1.1796, "step": 11928 }, { "epoch": 14.600979192166463, "grad_norm": 2.5890045777317274, "learning_rate": 8.725002315321528e-09, "loss": 0.3987, "step": 11929 }, { "epoch": 14.602203182374542, "grad_norm": 2.177484019942961, "learning_rate": 8.671587710025742e-09, "loss": 0.382, "step": 11930 }, { "epoch": 14.60342717258262, "grad_norm": 2.0575025051918394, "learning_rate": 8.618336825618468e-09, "loss": 0.4862, "step": 11931 }, { "epoch": 14.604651162790697, "grad_norm": 2.5488984398304217, "learning_rate": 8.565249665598851e-09, "loss": 0.452, "step": 11932 }, { "epoch": 14.605875152998776, "grad_norm": 2.3688001762611353, "learning_rate": 8.512326233456048e-09, "loss": 1.0275, "step": 11933 }, { "epoch": 14.607099143206854, "grad_norm": 1.5351374298177984, "learning_rate": 8.459566532668107e-09, "loss": 0.2666, "step": 11934 }, { "epoch": 14.608323133414933, "grad_norm": 1.292612311880505, "learning_rate": 8.4069705667017e-09, "loss": 0.5516, "step": 11935 }, { "epoch": 14.609547123623011, "grad_norm": 1.4053273472957217, "learning_rate": 8.35453833901323e-09, "loss": 0.6197, "step": 11936 }, { "epoch": 14.61077111383109, "grad_norm": 1.1754848663813604, "learning_rate": 8.30226985304855e-09, "loss": 1.1947, "step": 11937 }, { "epoch": 14.611995104039167, "grad_norm": 1.4964674015147679, "learning_rate": 8.250165112242692e-09, "loss": 0.3289, "step": 11938 }, { "epoch": 14.613219094247246, "grad_norm": 1.1005721013641638, "learning_rate": 8.198224120019583e-09, "loss": 0.533, "step": 11939 }, { "epoch": 14.614443084455324, "grad_norm": 1.0656451590078104, "learning_rate": 8.146446879792602e-09, "loss": 0.439, "step": 11940 }, { "epoch": 14.615667074663403, "grad_norm": 1.5423375834387363, "learning_rate": 8.094833394964585e-09, "loss": 0.5554, "step": 11941 }, { "epoch": 14.616891064871481, "grad_norm": 1.323191775369342, "learning_rate": 8.043383668926985e-09, "loss": 0.6524, "step": 11942 }, { "epoch": 14.61811505507956, "grad_norm": 1.444818577042891, "learning_rate": 7.992097705061263e-09, "loss": 0.4302, "step": 11943 }, { "epoch": 14.619339045287639, "grad_norm": 1.2418132988363064, "learning_rate": 7.940975506737502e-09, "loss": 0.5371, "step": 11944 }, { "epoch": 14.620563035495715, "grad_norm": 2.1517068161596042, "learning_rate": 7.890017077315237e-09, "loss": 1.3102, "step": 11945 }, { "epoch": 14.621787025703794, "grad_norm": 1.6180776595586872, "learning_rate": 7.839222420143456e-09, "loss": 0.5866, "step": 11946 }, { "epoch": 14.623011015911873, "grad_norm": 1.6568905927271154, "learning_rate": 7.78859153856032e-09, "loss": 0.4684, "step": 11947 }, { "epoch": 14.624235006119951, "grad_norm": 1.847092959139734, "learning_rate": 7.738124435892614e-09, "loss": 0.5175, "step": 11948 }, { "epoch": 14.62545899632803, "grad_norm": 1.6705355619277302, "learning_rate": 7.687821115457129e-09, "loss": 0.4495, "step": 11949 }, { "epoch": 14.626682986536107, "grad_norm": 3.367147213086265, "learning_rate": 7.637681580559831e-09, "loss": 0.3399, "step": 11950 }, { "epoch": 14.627906976744185, "grad_norm": 1.5482650158698146, "learning_rate": 7.587705834495306e-09, "loss": 0.5029, "step": 11951 }, { "epoch": 14.629130966952264, "grad_norm": 2.1559075335298328, "learning_rate": 7.537893880548153e-09, "loss": 0.427, "step": 11952 }, { "epoch": 14.630354957160343, "grad_norm": 1.2637290871920543, "learning_rate": 7.488245721991582e-09, "loss": 0.3047, "step": 11953 }, { "epoch": 14.631578947368421, "grad_norm": 2.0732105228290028, "learning_rate": 7.438761362087987e-09, "loss": 0.7268, "step": 11954 }, { "epoch": 14.6328029375765, "grad_norm": 1.346906025070435, "learning_rate": 7.3894408040900424e-09, "loss": 0.5786, "step": 11955 }, { "epoch": 14.634026927784578, "grad_norm": 1.7215283619514976, "learning_rate": 7.340284051238489e-09, "loss": 0.4531, "step": 11956 }, { "epoch": 14.635250917992655, "grad_norm": 1.7928761879910258, "learning_rate": 7.291291106763521e-09, "loss": 0.5101, "step": 11957 }, { "epoch": 14.636474908200734, "grad_norm": 2.074789328630792, "learning_rate": 7.2424619738853394e-09, "loss": 1.133, "step": 11958 }, { "epoch": 14.637698898408813, "grad_norm": 1.6102854322773874, "learning_rate": 7.193796655812213e-09, "loss": 0.8154, "step": 11959 }, { "epoch": 14.638922888616891, "grad_norm": 1.8628115225171258, "learning_rate": 7.145295155742693e-09, "loss": 1.356, "step": 11960 }, { "epoch": 14.64014687882497, "grad_norm": 2.4821720912681857, "learning_rate": 7.096957476863953e-09, "loss": 1.192, "step": 11961 }, { "epoch": 14.641370869033048, "grad_norm": 1.7010688543696035, "learning_rate": 7.04878362235234e-09, "loss": 1.0613, "step": 11962 }, { "epoch": 14.642594859241125, "grad_norm": 1.6979937111115437, "learning_rate": 7.000773595373933e-09, "loss": 1.1685, "step": 11963 }, { "epoch": 14.643818849449204, "grad_norm": 1.2074746733778579, "learning_rate": 6.952927399083986e-09, "loss": 0.5668, "step": 11964 }, { "epoch": 14.645042839657282, "grad_norm": 3.266282170561454, "learning_rate": 6.905245036626374e-09, "loss": 0.4428, "step": 11965 }, { "epoch": 14.646266829865361, "grad_norm": 1.717595010715417, "learning_rate": 6.8577265111344216e-09, "loss": 0.6313, "step": 11966 }, { "epoch": 14.64749082007344, "grad_norm": 1.8427544513034466, "learning_rate": 6.810371825731465e-09, "loss": 0.3074, "step": 11967 }, { "epoch": 14.648714810281518, "grad_norm": 1.9572611390514756, "learning_rate": 6.763180983529183e-09, "loss": 1.0754, "step": 11968 }, { "epoch": 14.649938800489597, "grad_norm": 1.4871658253088362, "learning_rate": 6.716153987628982e-09, "loss": 1.2778, "step": 11969 }, { "epoch": 14.651162790697674, "grad_norm": 1.2142159911734456, "learning_rate": 6.66929084112089e-09, "loss": 0.5572, "step": 11970 }, { "epoch": 14.652386780905752, "grad_norm": 3.190358159888911, "learning_rate": 6.622591547084667e-09, "loss": 0.533, "step": 11971 }, { "epoch": 14.653610771113831, "grad_norm": 2.5459206174972855, "learning_rate": 6.576056108589801e-09, "loss": 0.3707, "step": 11972 }, { "epoch": 14.65483476132191, "grad_norm": 2.3497110115447692, "learning_rate": 6.529684528693569e-09, "loss": 1.3994, "step": 11973 }, { "epoch": 14.656058751529988, "grad_norm": 2.2195152468493013, "learning_rate": 6.483476810444089e-09, "loss": 0.5922, "step": 11974 }, { "epoch": 14.657282741738067, "grad_norm": 2.8520197263322316, "learning_rate": 6.437432956877543e-09, "loss": 0.5711, "step": 11975 }, { "epoch": 14.658506731946144, "grad_norm": 1.4445986387827394, "learning_rate": 6.391552971019843e-09, "loss": 0.4815, "step": 11976 }, { "epoch": 14.659730722154222, "grad_norm": 1.2059594119922876, "learning_rate": 6.345836855885801e-09, "loss": 0.4399, "step": 11977 }, { "epoch": 14.660954712362301, "grad_norm": 2.082890381616836, "learning_rate": 6.300284614480234e-09, "loss": 0.9559, "step": 11978 }, { "epoch": 14.66217870257038, "grad_norm": 2.3515845652890675, "learning_rate": 6.254896249796305e-09, "loss": 0.6654, "step": 11979 }, { "epoch": 14.663402692778458, "grad_norm": 1.9182379912512766, "learning_rate": 6.2096717648169044e-09, "loss": 0.4548, "step": 11980 }, { "epoch": 14.664626682986537, "grad_norm": 1.3247897748085322, "learning_rate": 6.1646111625141006e-09, "loss": 0.5527, "step": 11981 }, { "epoch": 14.665850673194614, "grad_norm": 1.6460759888894458, "learning_rate": 6.1197144458488565e-09, "loss": 1.288, "step": 11982 }, { "epoch": 14.667074663402692, "grad_norm": 1.683770761948297, "learning_rate": 6.074981617771869e-09, "loss": 0.6941, "step": 11983 }, { "epoch": 14.668298653610771, "grad_norm": 1.916964652302, "learning_rate": 6.030412681222453e-09, "loss": 0.4109, "step": 11984 }, { "epoch": 14.66952264381885, "grad_norm": 1.8092482515901291, "learning_rate": 5.986007639130209e-09, "loss": 0.8527, "step": 11985 }, { "epoch": 14.670746634026928, "grad_norm": 1.498960594800365, "learning_rate": 5.941766494412526e-09, "loss": 1.2674, "step": 11986 }, { "epoch": 14.671970624235007, "grad_norm": 2.484691166491592, "learning_rate": 5.897689249977079e-09, "loss": 0.4151, "step": 11987 }, { "epoch": 14.673194614443084, "grad_norm": 1.110358911457998, "learning_rate": 5.853775908720438e-09, "loss": 0.3908, "step": 11988 }, { "epoch": 14.674418604651162, "grad_norm": 1.5586375318363053, "learning_rate": 5.8100264735283516e-09, "loss": 1.0768, "step": 11989 }, { "epoch": 14.67564259485924, "grad_norm": 1.7083323992340866, "learning_rate": 5.76644094727602e-09, "loss": 0.493, "step": 11990 }, { "epoch": 14.67686658506732, "grad_norm": 1.8417214245081257, "learning_rate": 5.723019332827817e-09, "loss": 1.4501, "step": 11991 }, { "epoch": 14.678090575275398, "grad_norm": 1.6729142902705374, "learning_rate": 5.6797616330370175e-09, "loss": 0.4605, "step": 11992 }, { "epoch": 14.679314565483477, "grad_norm": 1.6610347680549726, "learning_rate": 5.636667850746347e-09, "loss": 1.0375, "step": 11993 }, { "epoch": 14.680538555691555, "grad_norm": 1.3865362252421265, "learning_rate": 5.593737988787984e-09, "loss": 0.6234, "step": 11994 }, { "epoch": 14.681762545899632, "grad_norm": 2.5947741402192563, "learning_rate": 5.550972049982728e-09, "loss": 1.0169, "step": 11995 }, { "epoch": 14.68298653610771, "grad_norm": 2.623945129456273, "learning_rate": 5.5083700371413865e-09, "loss": 0.4698, "step": 11996 }, { "epoch": 14.68421052631579, "grad_norm": 1.5753835654399828, "learning_rate": 5.465931953063663e-09, "loss": 0.6456, "step": 11997 }, { "epoch": 14.685434516523868, "grad_norm": 2.2905436304190707, "learning_rate": 5.4236578005378845e-09, "loss": 0.571, "step": 11998 }, { "epoch": 14.686658506731947, "grad_norm": 2.384636399693884, "learning_rate": 5.3815475823429385e-09, "loss": 0.4426, "step": 11999 }, { "epoch": 14.687882496940025, "grad_norm": 3.0899334764713244, "learning_rate": 5.3396013012455e-09, "loss": 0.4811, "step": 12000 }, { "epoch": 14.689106487148102, "grad_norm": 3.4129137468504642, "learning_rate": 5.297818960002255e-09, "loss": 0.4286, "step": 12001 }, { "epoch": 14.69033047735618, "grad_norm": 1.1749579390202942, "learning_rate": 5.256200561359337e-09, "loss": 0.4431, "step": 12002 }, { "epoch": 14.69155446756426, "grad_norm": 1.6701097562477463, "learning_rate": 5.214746108051505e-09, "loss": 0.4494, "step": 12003 }, { "epoch": 14.692778457772338, "grad_norm": 1.7745840209709787, "learning_rate": 5.1734556028026905e-09, "loss": 0.4106, "step": 12004 }, { "epoch": 14.694002447980417, "grad_norm": 3.940483915286823, "learning_rate": 5.132329048326834e-09, "loss": 0.388, "step": 12005 }, { "epoch": 14.695226438188495, "grad_norm": 2.0111992093033293, "learning_rate": 5.091366447326495e-09, "loss": 0.5253, "step": 12006 }, { "epoch": 14.696450428396572, "grad_norm": 2.1689262865218373, "learning_rate": 5.0505678024934115e-09, "loss": 1.1861, "step": 12007 }, { "epoch": 14.69767441860465, "grad_norm": 1.6165103064162882, "learning_rate": 5.00993311650877e-09, "loss": 0.5625, "step": 12008 }, { "epoch": 14.69889840881273, "grad_norm": 1.4941545553563675, "learning_rate": 4.969462392043211e-09, "loss": 1.155, "step": 12009 }, { "epoch": 14.700122399020808, "grad_norm": 2.045676560298923, "learning_rate": 4.9291556317559995e-09, "loss": 0.3533, "step": 12010 }, { "epoch": 14.701346389228886, "grad_norm": 1.796473087557276, "learning_rate": 4.8890128382961255e-09, "loss": 1.089, "step": 12011 }, { "epoch": 14.702570379436965, "grad_norm": 3.1126240470788797, "learning_rate": 4.849034014301479e-09, "loss": 0.5186, "step": 12012 }, { "epoch": 14.703794369645042, "grad_norm": 2.071772835442174, "learning_rate": 4.809219162399403e-09, "loss": 0.4337, "step": 12013 }, { "epoch": 14.70501835985312, "grad_norm": 1.974740397526902, "learning_rate": 4.769568285206139e-09, "loss": 0.4173, "step": 12014 }, { "epoch": 14.7062423500612, "grad_norm": 2.155642759295847, "learning_rate": 4.730081385327934e-09, "loss": 0.8275, "step": 12015 }, { "epoch": 14.707466340269278, "grad_norm": 2.651671253206143, "learning_rate": 4.6907584653593795e-09, "loss": 0.8438, "step": 12016 }, { "epoch": 14.708690330477356, "grad_norm": 2.456385601623163, "learning_rate": 4.6515995278845204e-09, "loss": 0.5846, "step": 12017 }, { "epoch": 14.709914320685435, "grad_norm": 2.0122952947416355, "learning_rate": 4.612604575477131e-09, "loss": 0.4755, "step": 12018 }, { "epoch": 14.711138310893514, "grad_norm": 1.4237014690514598, "learning_rate": 4.573773610699328e-09, "loss": 0.3048, "step": 12019 }, { "epoch": 14.71236230110159, "grad_norm": 1.1272883569587864, "learning_rate": 4.535106636103237e-09, "loss": 0.5222, "step": 12020 }, { "epoch": 14.713586291309669, "grad_norm": 1.3285399796581532, "learning_rate": 4.496603654229881e-09, "loss": 0.5866, "step": 12021 }, { "epoch": 14.714810281517748, "grad_norm": 0.9573492710695023, "learning_rate": 4.458264667609458e-09, "loss": 0.4613, "step": 12022 }, { "epoch": 14.716034271725826, "grad_norm": 2.4529661752430694, "learning_rate": 4.420089678761619e-09, "loss": 1.0072, "step": 12023 }, { "epoch": 14.717258261933905, "grad_norm": 1.7090606276554066, "learning_rate": 4.382078690194913e-09, "loss": 0.589, "step": 12024 }, { "epoch": 14.718482252141984, "grad_norm": 1.4383684640349346, "learning_rate": 4.3442317044073425e-09, "loss": 0.4389, "step": 12025 }, { "epoch": 14.71970624235006, "grad_norm": 1.1818214551203328, "learning_rate": 4.306548723886084e-09, "loss": 0.6066, "step": 12026 }, { "epoch": 14.720930232558139, "grad_norm": 2.227678613992446, "learning_rate": 4.269029751107489e-09, "loss": 0.6025, "step": 12027 }, { "epoch": 14.722154222766218, "grad_norm": 1.6451130107259915, "learning_rate": 4.231674788537088e-09, "loss": 0.5173, "step": 12028 }, { "epoch": 14.723378212974296, "grad_norm": 1.841379568337515, "learning_rate": 4.194483838630137e-09, "loss": 0.6235, "step": 12029 }, { "epoch": 14.724602203182375, "grad_norm": 1.7273954034842762, "learning_rate": 4.157456903829959e-09, "loss": 0.6123, "step": 12030 }, { "epoch": 14.725826193390454, "grad_norm": 2.181940262109109, "learning_rate": 4.120593986570443e-09, "loss": 0.6303, "step": 12031 }, { "epoch": 14.727050183598532, "grad_norm": 1.8206841947665204, "learning_rate": 4.083895089273815e-09, "loss": 0.47, "step": 12032 }, { "epoch": 14.728274173806609, "grad_norm": 1.4337594383819938, "learning_rate": 4.0473602143520365e-09, "loss": 0.7708, "step": 12033 }, { "epoch": 14.729498164014688, "grad_norm": 1.5297240679015143, "learning_rate": 4.0109893642056866e-09, "loss": 0.6231, "step": 12034 }, { "epoch": 14.730722154222766, "grad_norm": 2.0518213106638745, "learning_rate": 3.9747825412250755e-09, "loss": 0.358, "step": 12035 }, { "epoch": 14.731946144430845, "grad_norm": 1.3506714254113898, "learning_rate": 3.9387397477896885e-09, "loss": 0.7262, "step": 12036 }, { "epoch": 14.733170134638923, "grad_norm": 0.7910128903737157, "learning_rate": 3.902860986267909e-09, "loss": 0.216, "step": 12037 }, { "epoch": 14.734394124847, "grad_norm": 1.872401694921444, "learning_rate": 3.867146259017851e-09, "loss": 1.1821, "step": 12038 }, { "epoch": 14.735618115055079, "grad_norm": 1.9329840944187948, "learning_rate": 3.831595568386248e-09, "loss": 0.4656, "step": 12039 }, { "epoch": 14.736842105263158, "grad_norm": 1.175646503572795, "learning_rate": 3.796208916709565e-09, "loss": 0.4665, "step": 12040 }, { "epoch": 14.738066095471236, "grad_norm": 1.7022426068931982, "learning_rate": 3.7609863063131635e-09, "loss": 0.5556, "step": 12041 }, { "epoch": 14.739290085679315, "grad_norm": 2.048106299018896, "learning_rate": 3.725927739511581e-09, "loss": 0.4828, "step": 12042 }, { "epoch": 14.740514075887393, "grad_norm": 1.637698322371853, "learning_rate": 3.6910332186093635e-09, "loss": 0.9305, "step": 12043 }, { "epoch": 14.741738066095472, "grad_norm": 1.5876905463233346, "learning_rate": 3.6563027458991205e-09, "loss": 0.4657, "step": 12044 }, { "epoch": 14.742962056303549, "grad_norm": 1.7489582337449632, "learning_rate": 3.6217363236631943e-09, "loss": 0.7226, "step": 12045 }, { "epoch": 14.744186046511627, "grad_norm": 1.7586886267498139, "learning_rate": 3.5873339541733775e-09, "loss": 0.5657, "step": 12046 }, { "epoch": 14.745410036719706, "grad_norm": 2.4085866413787382, "learning_rate": 3.553095639690363e-09, "loss": 0.8971, "step": 12047 }, { "epoch": 14.746634026927785, "grad_norm": 2.7538762763649465, "learning_rate": 3.5190213824640164e-09, "loss": 0.2691, "step": 12048 }, { "epoch": 14.747858017135863, "grad_norm": 1.1374786839385027, "learning_rate": 3.4851111847339357e-09, "loss": 0.5039, "step": 12049 }, { "epoch": 14.749082007343942, "grad_norm": 1.5575306930777009, "learning_rate": 3.451365048728339e-09, "loss": 0.6768, "step": 12050 }, { "epoch": 14.750305997552019, "grad_norm": 1.838356351293293, "learning_rate": 3.417782976664896e-09, "loss": 0.9063, "step": 12051 }, { "epoch": 14.751529987760097, "grad_norm": 2.922553590812241, "learning_rate": 3.384364970750731e-09, "loss": 0.4372, "step": 12052 }, { "epoch": 14.752753977968176, "grad_norm": 1.4723203170658377, "learning_rate": 3.3511110331815867e-09, "loss": 0.3946, "step": 12053 }, { "epoch": 14.753977968176255, "grad_norm": 1.8983366420931678, "learning_rate": 3.318021166142937e-09, "loss": 0.4157, "step": 12054 }, { "epoch": 14.755201958384333, "grad_norm": 1.6420109865714743, "learning_rate": 3.2850953718094324e-09, "loss": 0.3307, "step": 12055 }, { "epoch": 14.756425948592412, "grad_norm": 2.123250641195415, "learning_rate": 3.252333652344619e-09, "loss": 1.1456, "step": 12056 }, { "epoch": 14.75764993880049, "grad_norm": 2.028019907887872, "learning_rate": 3.219736009901775e-09, "loss": 0.7208, "step": 12057 }, { "epoch": 14.758873929008567, "grad_norm": 1.9170852764164041, "learning_rate": 3.1873024466227976e-09, "loss": 0.9184, "step": 12058 }, { "epoch": 14.760097919216646, "grad_norm": 1.4843253161436196, "learning_rate": 3.1550329646393152e-09, "loss": 0.7201, "step": 12059 }, { "epoch": 14.761321909424725, "grad_norm": 2.9491646637516444, "learning_rate": 3.122927566071854e-09, "loss": 0.3541, "step": 12060 }, { "epoch": 14.762545899632803, "grad_norm": 1.2256035735028528, "learning_rate": 3.0909862530301147e-09, "loss": 0.4769, "step": 12061 }, { "epoch": 14.763769889840882, "grad_norm": 2.936739804706395, "learning_rate": 3.05920902761353e-09, "loss": 0.3458, "step": 12062 }, { "epoch": 14.76499388004896, "grad_norm": 2.420783951128229, "learning_rate": 3.027595891910151e-09, "loss": 0.4738, "step": 12063 }, { "epoch": 14.766217870257037, "grad_norm": 1.6331076879900313, "learning_rate": 2.996146847997483e-09, "loss": 0.4587, "step": 12064 }, { "epoch": 14.767441860465116, "grad_norm": 1.92520803216434, "learning_rate": 2.9648618979424836e-09, "loss": 0.4989, "step": 12065 }, { "epoch": 14.768665850673194, "grad_norm": 1.6014861423912945, "learning_rate": 2.9337410438007306e-09, "loss": 0.563, "step": 12066 }, { "epoch": 14.769889840881273, "grad_norm": 1.3779677310953304, "learning_rate": 2.902784287617255e-09, "loss": 0.7145, "step": 12067 }, { "epoch": 14.771113831089352, "grad_norm": 1.30988462885473, "learning_rate": 2.871991631426818e-09, "loss": 0.3832, "step": 12068 }, { "epoch": 14.77233782129743, "grad_norm": 1.4130832172974648, "learning_rate": 2.8413630772530786e-09, "loss": 0.5191, "step": 12069 }, { "epoch": 14.773561811505507, "grad_norm": 1.6468348058531797, "learning_rate": 2.810898627108316e-09, "loss": 0.9782, "step": 12070 }, { "epoch": 14.774785801713586, "grad_norm": 2.3334597617261887, "learning_rate": 2.780598282995095e-09, "loss": 0.9581, "step": 12071 }, { "epoch": 14.776009791921664, "grad_norm": 1.8136525688234348, "learning_rate": 2.7504620469040456e-09, "loss": 0.8794, "step": 12072 }, { "epoch": 14.777233782129743, "grad_norm": 1.4085511647361788, "learning_rate": 2.7204899208160828e-09, "loss": 0.551, "step": 12073 }, { "epoch": 14.778457772337822, "grad_norm": 1.589010917623209, "learning_rate": 2.69068190670102e-09, "loss": 0.6186, "step": 12074 }, { "epoch": 14.7796817625459, "grad_norm": 2.492124256032937, "learning_rate": 2.661038006517014e-09, "loss": 0.4632, "step": 12075 }, { "epoch": 14.780905752753977, "grad_norm": 2.303652306306227, "learning_rate": 2.631558222212782e-09, "loss": 0.5955, "step": 12076 }, { "epoch": 14.782129742962056, "grad_norm": 1.8006199562568233, "learning_rate": 2.602242555725387e-09, "loss": 1.7956, "step": 12077 }, { "epoch": 14.783353733170134, "grad_norm": 2.097502637413556, "learning_rate": 2.5730910089813432e-09, "loss": 0.4858, "step": 12078 }, { "epoch": 14.784577723378213, "grad_norm": 3.9322304699397472, "learning_rate": 2.5441035838966177e-09, "loss": 0.3676, "step": 12079 }, { "epoch": 14.785801713586292, "grad_norm": 1.7135196509592694, "learning_rate": 2.515280282375521e-09, "loss": 1.5488, "step": 12080 }, { "epoch": 14.78702570379437, "grad_norm": 1.2745926845952955, "learning_rate": 2.486621106312925e-09, "loss": 0.6801, "step": 12081 }, { "epoch": 14.788249694002449, "grad_norm": 1.4761354796202726, "learning_rate": 2.4581260575920473e-09, "loss": 0.4279, "step": 12082 }, { "epoch": 14.789473684210526, "grad_norm": 2.4985856694385586, "learning_rate": 2.429795138085278e-09, "loss": 0.3572, "step": 12083 }, { "epoch": 14.790697674418604, "grad_norm": 1.0926703644246472, "learning_rate": 2.4016283496544614e-09, "loss": 0.5258, "step": 12084 }, { "epoch": 14.791921664626683, "grad_norm": 3.29754122662569, "learning_rate": 2.3736256941506164e-09, "loss": 0.3985, "step": 12085 }, { "epoch": 14.793145654834762, "grad_norm": 1.9205216032894543, "learning_rate": 2.345787173413938e-09, "loss": 0.2866, "step": 12086 }, { "epoch": 14.79436964504284, "grad_norm": 1.5141406215281468, "learning_rate": 2.318112789274074e-09, "loss": 0.6123, "step": 12087 }, { "epoch": 14.795593635250919, "grad_norm": 1.6336229696842457, "learning_rate": 2.2906025435495694e-09, "loss": 0.4015, "step": 12088 }, { "epoch": 14.796817625458996, "grad_norm": 1.470674357324275, "learning_rate": 2.2632564380481447e-09, "loss": 0.4291, "step": 12089 }, { "epoch": 14.798041615667074, "grad_norm": 2.60864405655822, "learning_rate": 2.2360744745669737e-09, "loss": 0.5127, "step": 12090 }, { "epoch": 14.799265605875153, "grad_norm": 2.707511124003092, "learning_rate": 2.2090566548926827e-09, "loss": 0.4216, "step": 12091 }, { "epoch": 14.800489596083231, "grad_norm": 2.37247533698625, "learning_rate": 2.182202980800241e-09, "loss": 0.976, "step": 12092 }, { "epoch": 14.80171358629131, "grad_norm": 1.7697433928768147, "learning_rate": 2.155513454054625e-09, "loss": 0.4794, "step": 12093 }, { "epoch": 14.802937576499389, "grad_norm": 2.9133471710146006, "learning_rate": 2.128988076409988e-09, "loss": 0.5694, "step": 12094 }, { "epoch": 14.804161566707466, "grad_norm": 2.544908183901255, "learning_rate": 2.1026268496091018e-09, "loss": 0.8543, "step": 12095 }, { "epoch": 14.805385556915544, "grad_norm": 1.4402036684224415, "learning_rate": 2.0764297753844698e-09, "loss": 0.8688, "step": 12096 }, { "epoch": 14.806609547123623, "grad_norm": 1.6484440044821693, "learning_rate": 2.05039685545777e-09, "loss": 0.5635, "step": 12097 }, { "epoch": 14.807833537331701, "grad_norm": 1.1169493879033598, "learning_rate": 2.024528091539857e-09, "loss": 0.5908, "step": 12098 }, { "epoch": 14.80905752753978, "grad_norm": 0.5911293090855352, "learning_rate": 1.998823485330481e-09, "loss": 0.1283, "step": 12099 }, { "epoch": 14.810281517747859, "grad_norm": 1.504717898552085, "learning_rate": 1.9732830385188474e-09, "loss": 1.3141, "step": 12100 }, { "epoch": 14.811505507955935, "grad_norm": 1.1131744411137092, "learning_rate": 1.9479067527836124e-09, "loss": 0.5437, "step": 12101 }, { "epoch": 14.812729498164014, "grad_norm": 1.1572848853061175, "learning_rate": 1.9226946297920545e-09, "loss": 0.5797, "step": 12102 }, { "epoch": 14.813953488372093, "grad_norm": 2.5622719318110545, "learning_rate": 1.8976466712014586e-09, "loss": 0.9624, "step": 12103 }, { "epoch": 14.815177478580171, "grad_norm": 1.7476371751757434, "learning_rate": 1.87276287865773e-09, "loss": 0.4235, "step": 12104 }, { "epoch": 14.81640146878825, "grad_norm": 2.4872121374879037, "learning_rate": 1.848043253796228e-09, "loss": 0.4401, "step": 12105 }, { "epoch": 14.817625458996329, "grad_norm": 1.6213591940632264, "learning_rate": 1.8234877982409305e-09, "loss": 0.4908, "step": 12106 }, { "epoch": 14.818849449204407, "grad_norm": 2.2973941492091443, "learning_rate": 1.7990965136061022e-09, "loss": 0.3903, "step": 12107 }, { "epoch": 14.820073439412484, "grad_norm": 1.5306668125827514, "learning_rate": 1.77486940149435e-09, "loss": 0.5833, "step": 12108 }, { "epoch": 14.821297429620563, "grad_norm": 1.9527980338400983, "learning_rate": 1.7508064634980116e-09, "loss": 0.3458, "step": 12109 }, { "epoch": 14.822521419828641, "grad_norm": 1.9038430495331289, "learning_rate": 1.7269077011983215e-09, "loss": 0.4654, "step": 12110 }, { "epoch": 14.82374541003672, "grad_norm": 1.2611722333494386, "learning_rate": 1.7031731161656906e-09, "loss": 0.6562, "step": 12111 }, { "epoch": 14.824969400244798, "grad_norm": 1.4983964768160178, "learning_rate": 1.6796027099599822e-09, "loss": 0.9635, "step": 12112 }, { "epoch": 14.826193390452877, "grad_norm": 1.419291008784469, "learning_rate": 1.6561964841299573e-09, "loss": 0.4061, "step": 12113 }, { "epoch": 14.827417380660954, "grad_norm": 1.1844347429774829, "learning_rate": 1.6329544402141074e-09, "loss": 0.7414, "step": 12114 }, { "epoch": 14.828641370869033, "grad_norm": 1.5006779837142645, "learning_rate": 1.609876579739822e-09, "loss": 0.6152, "step": 12115 }, { "epoch": 14.829865361077111, "grad_norm": 1.51535588543413, "learning_rate": 1.5869629042233881e-09, "loss": 0.6847, "step": 12116 }, { "epoch": 14.83108935128519, "grad_norm": 1.9269026655554389, "learning_rate": 1.5642134151708233e-09, "loss": 0.7323, "step": 12117 }, { "epoch": 14.832313341493268, "grad_norm": 1.6874168731787287, "learning_rate": 1.5416281140770428e-09, "loss": 1.165, "step": 12118 }, { "epoch": 14.833537331701347, "grad_norm": 2.44621958737006, "learning_rate": 1.5192070024261373e-09, "loss": 0.4414, "step": 12119 }, { "epoch": 14.834761321909426, "grad_norm": 2.544850546660493, "learning_rate": 1.496950081691928e-09, "loss": 1.0075, "step": 12120 }, { "epoch": 14.835985312117502, "grad_norm": 2.08706063020094, "learning_rate": 1.4748573533368559e-09, "loss": 0.7565, "step": 12121 }, { "epoch": 14.837209302325581, "grad_norm": 2.76413655421455, "learning_rate": 1.452928818812538e-09, "loss": 0.4627, "step": 12122 }, { "epoch": 14.83843329253366, "grad_norm": 0.9176992532751239, "learning_rate": 1.4311644795605983e-09, "loss": 0.3457, "step": 12123 }, { "epoch": 14.839657282741738, "grad_norm": 2.093914345734106, "learning_rate": 1.409564337010727e-09, "loss": 0.4833, "step": 12124 }, { "epoch": 14.840881272949817, "grad_norm": 1.71893280075994, "learning_rate": 1.3881283925826218e-09, "loss": 1.6187, "step": 12125 }, { "epoch": 14.842105263157894, "grad_norm": 1.425465714589987, "learning_rate": 1.3668566476848777e-09, "loss": 0.5523, "step": 12126 }, { "epoch": 14.843329253365972, "grad_norm": 3.581411937168354, "learning_rate": 1.345749103715821e-09, "loss": 0.326, "step": 12127 }, { "epoch": 14.844553243574051, "grad_norm": 2.700769460940968, "learning_rate": 1.3248057620621201e-09, "loss": 0.6665, "step": 12128 }, { "epoch": 14.84577723378213, "grad_norm": 1.1615428445747722, "learning_rate": 1.3040266241001742e-09, "loss": 0.6358, "step": 12129 }, { "epoch": 14.847001223990208, "grad_norm": 2.715689852981607, "learning_rate": 1.2834116911955573e-09, "loss": 0.4253, "step": 12130 }, { "epoch": 14.848225214198287, "grad_norm": 1.660760693237752, "learning_rate": 1.2629609647030194e-09, "loss": 1.1632, "step": 12131 }, { "epoch": 14.849449204406366, "grad_norm": 3.4769599616557496, "learning_rate": 1.2426744459664853e-09, "loss": 0.398, "step": 12132 }, { "epoch": 14.850673194614442, "grad_norm": 1.8096389108700857, "learning_rate": 1.2225521363193326e-09, "loss": 0.7256, "step": 12133 }, { "epoch": 14.851897184822521, "grad_norm": 2.811508015445776, "learning_rate": 1.2025940370832823e-09, "loss": 0.5045, "step": 12134 }, { "epoch": 14.8531211750306, "grad_norm": 1.1644296693343723, "learning_rate": 1.1828001495708952e-09, "loss": 0.5939, "step": 12135 }, { "epoch": 14.854345165238678, "grad_norm": 2.0175096479105363, "learning_rate": 1.163170475081965e-09, "loss": 0.4513, "step": 12136 }, { "epoch": 14.855569155446757, "grad_norm": 2.6949738456902383, "learning_rate": 1.143705014907126e-09, "loss": 0.3948, "step": 12137 }, { "epoch": 14.856793145654835, "grad_norm": 2.1770994380904005, "learning_rate": 1.1244037703250776e-09, "loss": 0.3411, "step": 12138 }, { "epoch": 14.858017135862912, "grad_norm": 3.1748044322195215, "learning_rate": 1.1052667426048048e-09, "loss": 0.4374, "step": 12139 }, { "epoch": 14.859241126070991, "grad_norm": 1.694281725978661, "learning_rate": 1.0862939330036348e-09, "loss": 0.665, "step": 12140 }, { "epoch": 14.86046511627907, "grad_norm": 2.1338522287398587, "learning_rate": 1.0674853427683484e-09, "loss": 0.5023, "step": 12141 }, { "epoch": 14.861689106487148, "grad_norm": 1.106307045578746, "learning_rate": 1.0488409731349015e-09, "loss": 0.5437, "step": 12142 }, { "epoch": 14.862913096695227, "grad_norm": 2.801346877059512, "learning_rate": 1.0303608253287023e-09, "loss": 0.7777, "step": 12143 }, { "epoch": 14.864137086903305, "grad_norm": 2.7479443552578964, "learning_rate": 1.0120449005640576e-09, "loss": 0.4815, "step": 12144 }, { "epoch": 14.865361077111384, "grad_norm": 2.5565814458690688, "learning_rate": 9.938932000450042e-10, "loss": 0.5948, "step": 12145 }, { "epoch": 14.86658506731946, "grad_norm": 1.99935208349718, "learning_rate": 9.759057249639214e-10, "loss": 0.6177, "step": 12146 }, { "epoch": 14.86780905752754, "grad_norm": 1.1990070637721293, "learning_rate": 9.580824765029196e-10, "loss": 0.4955, "step": 12147 }, { "epoch": 14.869033047735618, "grad_norm": 2.0996915065452786, "learning_rate": 9.404234558335612e-10, "loss": 0.9994, "step": 12148 }, { "epoch": 14.870257037943697, "grad_norm": 0.9781065508599823, "learning_rate": 9.229286641160296e-10, "loss": 0.387, "step": 12149 }, { "epoch": 14.871481028151775, "grad_norm": 0.7527029392547504, "learning_rate": 9.055981025005156e-10, "loss": 0.2544, "step": 12150 }, { "epoch": 14.872705018359854, "grad_norm": 2.0286136317249053, "learning_rate": 8.884317721252755e-10, "loss": 0.5338, "step": 12151 }, { "epoch": 14.87392900856793, "grad_norm": 1.8928283831463266, "learning_rate": 8.714296741188511e-10, "loss": 1.5565, "step": 12152 }, { "epoch": 14.87515299877601, "grad_norm": 1.6086632483617815, "learning_rate": 8.545918095984041e-10, "loss": 0.3983, "step": 12153 }, { "epoch": 14.876376988984088, "grad_norm": 1.9313992628794288, "learning_rate": 8.379181796705494e-10, "loss": 0.3838, "step": 12154 }, { "epoch": 14.877600979192167, "grad_norm": 1.6495793177562212, "learning_rate": 8.214087854307995e-10, "loss": 0.4579, "step": 12155 }, { "epoch": 14.878824969400245, "grad_norm": 1.6670821775479912, "learning_rate": 8.050636279643975e-10, "loss": 0.6191, "step": 12156 }, { "epoch": 14.880048959608324, "grad_norm": 1.3146688933626793, "learning_rate": 7.888827083452066e-10, "loss": 0.642, "step": 12157 }, { "epoch": 14.8812729498164, "grad_norm": 1.3693291798353748, "learning_rate": 7.728660276368205e-10, "loss": 0.7618, "step": 12158 }, { "epoch": 14.88249694002448, "grad_norm": 2.9188593606646474, "learning_rate": 7.570135868917305e-10, "loss": 0.3537, "step": 12159 }, { "epoch": 14.883720930232558, "grad_norm": 1.5304508702577193, "learning_rate": 7.413253871516036e-10, "loss": 0.5208, "step": 12160 }, { "epoch": 14.884944920440637, "grad_norm": 2.1933457741337814, "learning_rate": 7.258014294472815e-10, "loss": 0.5896, "step": 12161 }, { "epoch": 14.886168910648715, "grad_norm": 2.2341117489606988, "learning_rate": 7.10441714799337e-10, "loss": 1.123, "step": 12162 }, { "epoch": 14.887392900856794, "grad_norm": 3.0100885533785338, "learning_rate": 6.952462442166852e-10, "loss": 0.5218, "step": 12163 }, { "epoch": 14.88861689106487, "grad_norm": 2.357731993440913, "learning_rate": 6.802150186985268e-10, "loss": 0.3423, "step": 12164 }, { "epoch": 14.88984088127295, "grad_norm": 2.3382865116353932, "learning_rate": 6.653480392318501e-10, "loss": 0.3433, "step": 12165 }, { "epoch": 14.891064871481028, "grad_norm": 3.18343263901569, "learning_rate": 6.50645306794484e-10, "loss": 0.3886, "step": 12166 }, { "epoch": 14.892288861689106, "grad_norm": 1.8372837847887986, "learning_rate": 6.36106822352045e-10, "loss": 1.187, "step": 12167 }, { "epoch": 14.893512851897185, "grad_norm": 1.570964071747566, "learning_rate": 6.217325868601576e-10, "loss": 0.9105, "step": 12168 }, { "epoch": 14.894736842105264, "grad_norm": 2.176347643471491, "learning_rate": 6.075226012636215e-10, "loss": 1.1125, "step": 12169 }, { "epoch": 14.895960832313342, "grad_norm": 2.3496200916455345, "learning_rate": 5.934768664958568e-10, "loss": 0.3643, "step": 12170 }, { "epoch": 14.89718482252142, "grad_norm": 2.7002069853474455, "learning_rate": 5.795953834802914e-10, "loss": 1.1717, "step": 12171 }, { "epoch": 14.898408812729498, "grad_norm": 1.8913277098298553, "learning_rate": 5.658781531289737e-10, "loss": 0.7032, "step": 12172 }, { "epoch": 14.899632802937576, "grad_norm": 1.7650691811391428, "learning_rate": 5.52325176343127e-10, "loss": 0.5663, "step": 12173 }, { "epoch": 14.900856793145655, "grad_norm": 2.0122160071963524, "learning_rate": 5.38936454013983e-10, "loss": 0.4432, "step": 12174 }, { "epoch": 14.902080783353734, "grad_norm": 1.7710164578573233, "learning_rate": 5.257119870208382e-10, "loss": 0.7418, "step": 12175 }, { "epoch": 14.903304773561812, "grad_norm": 2.3170921207536046, "learning_rate": 5.12651776233275e-10, "loss": 0.4596, "step": 12176 }, { "epoch": 14.904528763769889, "grad_norm": 1.500459527370147, "learning_rate": 4.997558225089405e-10, "loss": 0.4506, "step": 12177 }, { "epoch": 14.905752753977968, "grad_norm": 1.156096024014779, "learning_rate": 4.870241266957676e-10, "loss": 0.5907, "step": 12178 }, { "epoch": 14.906976744186046, "grad_norm": 1.8320985481279137, "learning_rate": 4.744566896305869e-10, "loss": 0.6494, "step": 12179 }, { "epoch": 14.908200734394125, "grad_norm": 2.7613927846731827, "learning_rate": 4.6205351213857166e-10, "loss": 0.7804, "step": 12180 }, { "epoch": 14.909424724602204, "grad_norm": 2.416877500235346, "learning_rate": 4.498145950354582e-10, "loss": 0.4209, "step": 12181 }, { "epoch": 14.910648714810282, "grad_norm": 2.757327476424989, "learning_rate": 4.3773993912532565e-10, "loss": 0.5797, "step": 12182 }, { "epoch": 14.911872705018359, "grad_norm": 2.083272355374593, "learning_rate": 4.2582954520170583e-10, "loss": 0.758, "step": 12183 }, { "epoch": 14.913096695226438, "grad_norm": 2.2805726620547135, "learning_rate": 4.14083414047306e-10, "loss": 0.6667, "step": 12184 }, { "epoch": 14.914320685434516, "grad_norm": 1.0471311613156458, "learning_rate": 4.0250154643400875e-10, "loss": 0.4442, "step": 12185 }, { "epoch": 14.915544675642595, "grad_norm": 2.0996702165943355, "learning_rate": 3.910839431228719e-10, "loss": 0.6508, "step": 12186 }, { "epoch": 14.916768665850674, "grad_norm": 2.323210710765075, "learning_rate": 3.798306048641287e-10, "loss": 0.4069, "step": 12187 }, { "epoch": 14.917992656058752, "grad_norm": 2.34444065538037, "learning_rate": 3.687415323977428e-10, "loss": 0.6244, "step": 12188 }, { "epoch": 14.919216646266829, "grad_norm": 1.9993310813210843, "learning_rate": 3.578167264520205e-10, "loss": 0.5283, "step": 12189 }, { "epoch": 14.920440636474908, "grad_norm": 1.2944814995705447, "learning_rate": 3.470561877449985e-10, "loss": 0.7448, "step": 12190 }, { "epoch": 14.921664626682986, "grad_norm": 2.92691211330855, "learning_rate": 3.364599169841665e-10, "loss": 0.7883, "step": 12191 }, { "epoch": 14.922888616891065, "grad_norm": 1.102546285043171, "learning_rate": 3.260279148653567e-10, "loss": 0.5082, "step": 12192 }, { "epoch": 14.924112607099143, "grad_norm": 1.53369750188442, "learning_rate": 3.157601820744094e-10, "loss": 0.4495, "step": 12193 }, { "epoch": 14.925336597307222, "grad_norm": 2.1260879794021674, "learning_rate": 3.056567192857851e-10, "loss": 0.5674, "step": 12194 }, { "epoch": 14.9265605875153, "grad_norm": 1.8500510113772122, "learning_rate": 2.957175271639523e-10, "loss": 0.5194, "step": 12195 }, { "epoch": 14.927784577723378, "grad_norm": 1.663544358762414, "learning_rate": 2.859426063617221e-10, "loss": 1.2389, "step": 12196 }, { "epoch": 14.929008567931456, "grad_norm": 2.737260708682882, "learning_rate": 2.7633195752135853e-10, "loss": 0.9231, "step": 12197 }, { "epoch": 14.930232558139535, "grad_norm": 2.000415977064803, "learning_rate": 2.668855812748561e-10, "loss": 0.4528, "step": 12198 }, { "epoch": 14.931456548347613, "grad_norm": 3.7732259497610237, "learning_rate": 2.576034782425518e-10, "loss": 0.464, "step": 12199 }, { "epoch": 14.932680538555692, "grad_norm": 1.2566507119353914, "learning_rate": 2.484856490347909e-10, "loss": 0.8127, "step": 12200 }, { "epoch": 14.93390452876377, "grad_norm": 1.3422610623642606, "learning_rate": 2.3953209425053856e-10, "loss": 0.7128, "step": 12201 }, { "epoch": 14.935128518971847, "grad_norm": 3.2902868750312675, "learning_rate": 2.3074281447821312e-10, "loss": 0.3016, "step": 12202 }, { "epoch": 14.936352509179926, "grad_norm": 1.9797471702062786, "learning_rate": 2.2211781029568558e-10, "loss": 0.5766, "step": 12203 }, { "epoch": 14.937576499388005, "grad_norm": 1.9549526648305577, "learning_rate": 2.1365708226944726e-10, "loss": 0.5766, "step": 12204 }, { "epoch": 14.938800489596083, "grad_norm": 2.888419011438165, "learning_rate": 2.0536063095544235e-10, "loss": 0.4356, "step": 12205 }, { "epoch": 14.940024479804162, "grad_norm": 2.518199044381515, "learning_rate": 1.972284568990679e-10, "loss": 0.4783, "step": 12206 }, { "epoch": 14.94124847001224, "grad_norm": 1.5488572801144849, "learning_rate": 1.892605606348963e-10, "loss": 1.2234, "step": 12207 }, { "epoch": 14.94247246022032, "grad_norm": 1.85600130528948, "learning_rate": 1.8145694268612012e-10, "loss": 0.4611, "step": 12208 }, { "epoch": 14.943696450428396, "grad_norm": 2.2837818478089473, "learning_rate": 1.7381760356594e-10, "loss": 0.5313, "step": 12209 }, { "epoch": 14.944920440636475, "grad_norm": 1.8332031190468716, "learning_rate": 1.6634254377645432e-10, "loss": 0.9476, "step": 12210 }, { "epoch": 14.946144430844553, "grad_norm": 3.075960561264935, "learning_rate": 1.590317638083816e-10, "loss": 0.4564, "step": 12211 }, { "epoch": 14.947368421052632, "grad_norm": 2.2696290002918187, "learning_rate": 1.5188526414244842e-10, "loss": 0.5648, "step": 12212 }, { "epoch": 14.94859241126071, "grad_norm": 3.4844274701905937, "learning_rate": 1.449030452485567e-10, "loss": 0.3437, "step": 12213 }, { "epoch": 14.949816401468787, "grad_norm": 1.6480128099897529, "learning_rate": 1.3808510758522854e-10, "loss": 0.5715, "step": 12214 }, { "epoch": 14.951040391676866, "grad_norm": 2.1077799716103023, "learning_rate": 1.314314516004389e-10, "loss": 0.5223, "step": 12215 }, { "epoch": 14.952264381884945, "grad_norm": 2.109482248832846, "learning_rate": 1.2494207773189327e-10, "loss": 0.2867, "step": 12216 }, { "epoch": 14.953488372093023, "grad_norm": 1.2497103164198702, "learning_rate": 1.1861698640563968e-10, "loss": 1.0372, "step": 12217 }, { "epoch": 14.954712362301102, "grad_norm": 1.1928900426167912, "learning_rate": 1.1245617803717911e-10, "loss": 0.3802, "step": 12218 }, { "epoch": 14.95593635250918, "grad_norm": 3.1742222852726467, "learning_rate": 1.0645965303202055e-10, "loss": 0.5662, "step": 12219 }, { "epoch": 14.957160342717259, "grad_norm": 1.9881201370155401, "learning_rate": 1.0062741178373802e-10, "loss": 0.3329, "step": 12220 }, { "epoch": 14.958384332925336, "grad_norm": 1.6440781160495652, "learning_rate": 9.495945467563605e-11, "loss": 0.6851, "step": 12221 }, { "epoch": 14.959608323133414, "grad_norm": 1.5144669466286118, "learning_rate": 8.945578208047201e-11, "loss": 0.5867, "step": 12222 }, { "epoch": 14.960832313341493, "grad_norm": 3.0332587876248644, "learning_rate": 8.41163943596235e-11, "loss": 0.4517, "step": 12223 }, { "epoch": 14.962056303549572, "grad_norm": 1.5883003699647809, "learning_rate": 7.894129186419852e-11, "loss": 0.2182, "step": 12224 }, { "epoch": 14.96328029375765, "grad_norm": 1.9958316965022371, "learning_rate": 7.393047493392536e-11, "loss": 0.3904, "step": 12225 }, { "epoch": 14.964504283965729, "grad_norm": 2.979382427301688, "learning_rate": 6.908394389854023e-11, "loss": 0.9454, "step": 12226 }, { "epoch": 14.965728274173806, "grad_norm": 3.440231529978535, "learning_rate": 6.440169907639959e-11, "loss": 0.4595, "step": 12227 }, { "epoch": 14.966952264381884, "grad_norm": 1.965075211933853, "learning_rate": 5.988374077503523e-11, "loss": 0.5942, "step": 12228 }, { "epoch": 14.968176254589963, "grad_norm": 3.8639173518875762, "learning_rate": 5.553006929170934e-11, "loss": 0.409, "step": 12229 }, { "epoch": 14.969400244798042, "grad_norm": 3.3657868069095946, "learning_rate": 5.134068491202682e-11, "loss": 0.6739, "step": 12230 }, { "epoch": 14.97062423500612, "grad_norm": 1.7191415493922453, "learning_rate": 4.731558791160051e-11, "loss": 0.4619, "step": 12231 }, { "epoch": 14.971848225214199, "grad_norm": 1.9010766832709547, "learning_rate": 4.345477855494107e-11, "loss": 1.3759, "step": 12232 }, { "epoch": 14.973072215422278, "grad_norm": 2.0757603875307797, "learning_rate": 3.9758257096012e-11, "loss": 0.5201, "step": 12233 }, { "epoch": 14.974296205630354, "grad_norm": 2.4683511732100945, "learning_rate": 3.622602377711948e-11, "loss": 0.4067, "step": 12234 }, { "epoch": 14.975520195838433, "grad_norm": 1.889939944461724, "learning_rate": 3.285807883085523e-11, "loss": 1.0785, "step": 12235 }, { "epoch": 14.976744186046512, "grad_norm": 1.6052645569748314, "learning_rate": 2.96544224784312e-11, "loss": 0.6005, "step": 12236 }, { "epoch": 14.97796817625459, "grad_norm": 2.4251814553148594, "learning_rate": 2.661505493023464e-11, "loss": 0.6619, "step": 12237 }, { "epoch": 14.979192166462669, "grad_norm": 1.1853608137145886, "learning_rate": 2.37399763861057e-11, "loss": 0.6008, "step": 12238 }, { "epoch": 14.980416156670747, "grad_norm": 1.7482523770849725, "learning_rate": 2.1029187035337408e-11, "loss": 0.9052, "step": 12239 }, { "epoch": 14.981640146878824, "grad_norm": 1.910113257656022, "learning_rate": 1.848268705556544e-11, "loss": 0.5193, "step": 12240 }, { "epoch": 14.982864137086903, "grad_norm": 1.5420679824612047, "learning_rate": 1.6100476614155925e-11, "loss": 0.804, "step": 12241 }, { "epoch": 14.984088127294982, "grad_norm": 1.472540643143315, "learning_rate": 1.388255586792786e-11, "loss": 0.5325, "step": 12242 }, { "epoch": 14.98531211750306, "grad_norm": 2.4354546752340425, "learning_rate": 1.1828924962598021e-11, "loss": 1.0698, "step": 12243 }, { "epoch": 14.986536107711139, "grad_norm": 2.707119701108476, "learning_rate": 9.939584033058503e-12, "loss": 0.4656, "step": 12244 }, { "epoch": 14.987760097919217, "grad_norm": 1.4428071224820438, "learning_rate": 8.214533203376729e-12, "loss": 0.6878, "step": 12245 }, { "epoch": 14.988984088127294, "grad_norm": 1.8374203895556762, "learning_rate": 6.653772587073004e-12, "loss": 1.1293, "step": 12246 }, { "epoch": 14.990208078335373, "grad_norm": 1.5528808681272455, "learning_rate": 5.2573022865654026e-12, "loss": 0.6141, "step": 12247 }, { "epoch": 14.991432068543451, "grad_norm": 1.4932512185123261, "learning_rate": 4.025122394002434e-12, "loss": 0.6609, "step": 12248 }, { "epoch": 14.99265605875153, "grad_norm": 2.9071477513073467, "learning_rate": 2.9572329895977135e-12, "loss": 0.5553, "step": 12249 }, { "epoch": 14.993880048959609, "grad_norm": 3.7650760860741346, "learning_rate": 2.053634144405514e-12, "loss": 0.2617, "step": 12250 }, { "epoch": 14.995104039167687, "grad_norm": 2.391589787169674, "learning_rate": 1.314325917267656e-12, "loss": 1.0234, "step": 12251 }, { "epoch": 14.996328029375764, "grad_norm": 2.916659861815552, "learning_rate": 7.393083567563964e-13, "loss": 0.4603, "step": 12252 }, { "epoch": 14.997552019583843, "grad_norm": 1.5471975959201474, "learning_rate": 3.28581500896874e-13, "loss": 0.7804, "step": 12253 }, { "epoch": 14.998776009791921, "grad_norm": 1.1298569357087687, "learning_rate": 8.214537661199729e-14, "loss": 0.5595, "step": 12254 }, { "epoch": 15.0, "grad_norm": 1.9960724675909332, "learning_rate": 0.0, "loss": 0.5467, "step": 12255 }, { "epoch": 15.0, "step": 12255, "total_flos": 4797777151918080.0, "train_loss": 0.6943354440338512, "train_runtime": 124613.968, "train_samples_per_second": 0.098, "train_steps_per_second": 0.098 } ], "logging_steps": 1, "max_steps": 12255, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4797777151918080.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }