{ "best_metric": 0.9573606848716736, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.6861063464837049, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003430531732418525, "grad_norm": 0.2732606530189514, "learning_rate": 6e-06, "loss": 1.2807, "step": 1 }, { "epoch": 0.003430531732418525, "eval_loss": 1.3408838510513306, "eval_runtime": 36.5524, "eval_samples_per_second": 13.433, "eval_steps_per_second": 3.365, "step": 1 }, { "epoch": 0.00686106346483705, "grad_norm": 0.2737733721733093, "learning_rate": 1.2e-05, "loss": 1.3341, "step": 2 }, { "epoch": 0.010291595197255575, "grad_norm": 0.29186567664146423, "learning_rate": 1.8e-05, "loss": 1.2978, "step": 3 }, { "epoch": 0.0137221269296741, "grad_norm": 0.2877369821071625, "learning_rate": 2.4e-05, "loss": 1.2388, "step": 4 }, { "epoch": 0.017152658662092625, "grad_norm": 0.301379919052124, "learning_rate": 3e-05, "loss": 1.3055, "step": 5 }, { "epoch": 0.02058319039451115, "grad_norm": 0.29611584544181824, "learning_rate": 3.6e-05, "loss": 1.2891, "step": 6 }, { "epoch": 0.024013722126929673, "grad_norm": 0.3051072061061859, "learning_rate": 4.2e-05, "loss": 1.3017, "step": 7 }, { "epoch": 0.0274442538593482, "grad_norm": 0.25434353947639465, "learning_rate": 4.8e-05, "loss": 1.2517, "step": 8 }, { "epoch": 0.030874785591766724, "grad_norm": 0.24664027988910675, "learning_rate": 5.4000000000000005e-05, "loss": 1.23, "step": 9 }, { "epoch": 0.03430531732418525, "grad_norm": 0.2708350121974945, "learning_rate": 6e-05, "loss": 1.2397, "step": 10 }, { "epoch": 0.03773584905660377, "grad_norm": 0.27940887212753296, "learning_rate": 5.999589914977407e-05, "loss": 1.2664, "step": 11 }, { "epoch": 0.0411663807890223, "grad_norm": 0.34617090225219727, "learning_rate": 5.998359772022778e-05, "loss": 1.1993, "step": 12 }, { "epoch": 0.044596912521440824, "grad_norm": 0.39878541231155396, "learning_rate": 5.996309907444915e-05, "loss": 1.2262, "step": 13 }, { "epoch": 0.048027444253859346, "grad_norm": 0.3468141257762909, "learning_rate": 5.9934408816563236e-05, "loss": 1.1357, "step": 14 }, { "epoch": 0.051457975986277875, "grad_norm": 0.2871208190917969, "learning_rate": 5.98975347902001e-05, "loss": 1.1708, "step": 15 }, { "epoch": 0.0548885077186964, "grad_norm": 0.27395567297935486, "learning_rate": 5.9852487076350345e-05, "loss": 1.1996, "step": 16 }, { "epoch": 0.058319039451114926, "grad_norm": 0.24767224490642548, "learning_rate": 5.979927799060915e-05, "loss": 1.1241, "step": 17 }, { "epoch": 0.06174957118353345, "grad_norm": 0.26944437623023987, "learning_rate": 5.9737922079809257e-05, "loss": 1.1806, "step": 18 }, { "epoch": 0.06518010291595197, "grad_norm": 0.27347785234451294, "learning_rate": 5.9668436118044054e-05, "loss": 1.1681, "step": 19 }, { "epoch": 0.0686106346483705, "grad_norm": 0.2989870011806488, "learning_rate": 5.959083910208167e-05, "loss": 1.1656, "step": 20 }, { "epoch": 0.07204116638078903, "grad_norm": 0.28481730818748474, "learning_rate": 5.9505152246171474e-05, "loss": 1.1905, "step": 21 }, { "epoch": 0.07547169811320754, "grad_norm": 0.2873131036758423, "learning_rate": 5.941139897624428e-05, "loss": 1.1379, "step": 22 }, { "epoch": 0.07890222984562607, "grad_norm": 0.3055589199066162, "learning_rate": 5.9309604923507984e-05, "loss": 1.1685, "step": 23 }, { "epoch": 0.0823327615780446, "grad_norm": 0.3371344208717346, "learning_rate": 5.9199797917440176e-05, "loss": 1.2551, "step": 24 }, { "epoch": 0.08576329331046312, "grad_norm": 0.298042893409729, "learning_rate": 5.908200797817991e-05, "loss": 1.1628, "step": 25 }, { "epoch": 0.08919382504288165, "grad_norm": 0.3116256892681122, "learning_rate": 5.895626730832046e-05, "loss": 1.1208, "step": 26 }, { "epoch": 0.09262435677530018, "grad_norm": 0.31638380885124207, "learning_rate": 5.882261028410545e-05, "loss": 1.198, "step": 27 }, { "epoch": 0.09605488850771869, "grad_norm": 0.32093462347984314, "learning_rate": 5.8681073446030734e-05, "loss": 1.097, "step": 28 }, { "epoch": 0.09948542024013722, "grad_norm": 0.32988405227661133, "learning_rate": 5.853169548885461e-05, "loss": 1.106, "step": 29 }, { "epoch": 0.10291595197255575, "grad_norm": 0.3348427712917328, "learning_rate": 5.8374517251019035e-05, "loss": 1.0533, "step": 30 }, { "epoch": 0.10634648370497427, "grad_norm": 0.32859793305397034, "learning_rate": 5.820958170348484e-05, "loss": 1.1193, "step": 31 }, { "epoch": 0.1097770154373928, "grad_norm": 0.32682138681411743, "learning_rate": 5.8036933937983825e-05, "loss": 1.046, "step": 32 }, { "epoch": 0.11320754716981132, "grad_norm": 0.36188262701034546, "learning_rate": 5.7856621154691217e-05, "loss": 1.0577, "step": 33 }, { "epoch": 0.11663807890222985, "grad_norm": 0.37638968229293823, "learning_rate": 5.766869264932154e-05, "loss": 1.1084, "step": 34 }, { "epoch": 0.12006861063464837, "grad_norm": 0.38222038745880127, "learning_rate": 5.747319979965172e-05, "loss": 1.0492, "step": 35 }, { "epoch": 0.1234991423670669, "grad_norm": 0.3611724078655243, "learning_rate": 5.727019605147488e-05, "loss": 1.0813, "step": 36 }, { "epoch": 0.1269296740994854, "grad_norm": 0.38864666223526, "learning_rate": 5.7059736903988775e-05, "loss": 1.0633, "step": 37 }, { "epoch": 0.13036020583190394, "grad_norm": 0.39809203147888184, "learning_rate": 5.684187989462291e-05, "loss": 1.0584, "step": 38 }, { "epoch": 0.13379073756432247, "grad_norm": 0.4621327817440033, "learning_rate": 5.661668458330836e-05, "loss": 1.0978, "step": 39 }, { "epoch": 0.137221269296741, "grad_norm": 0.3770487904548645, "learning_rate": 5.638421253619467e-05, "loss": 1.0413, "step": 40 }, { "epoch": 0.14065180102915953, "grad_norm": 0.44612008333206177, "learning_rate": 5.614452730881832e-05, "loss": 1.0674, "step": 41 }, { "epoch": 0.14408233276157806, "grad_norm": 0.4539085924625397, "learning_rate": 5.589769442872722e-05, "loss": 1.0854, "step": 42 }, { "epoch": 0.14751286449399656, "grad_norm": 0.4452250897884369, "learning_rate": 5.5643781377566175e-05, "loss": 1.0438, "step": 43 }, { "epoch": 0.1509433962264151, "grad_norm": 0.5143773555755615, "learning_rate": 5.538285757262806e-05, "loss": 1.0925, "step": 44 }, { "epoch": 0.15437392795883362, "grad_norm": 0.4816042482852936, "learning_rate": 5.5114994347875856e-05, "loss": 1.0804, "step": 45 }, { "epoch": 0.15780445969125215, "grad_norm": 0.5185014009475708, "learning_rate": 5.48402649344406e-05, "loss": 1.0586, "step": 46 }, { "epoch": 0.16123499142367068, "grad_norm": 0.512935996055603, "learning_rate": 5.455874444060078e-05, "loss": 1.009, "step": 47 }, { "epoch": 0.1646655231560892, "grad_norm": 0.5778262615203857, "learning_rate": 5.427050983124843e-05, "loss": 1.1493, "step": 48 }, { "epoch": 0.1680960548885077, "grad_norm": 0.6094892621040344, "learning_rate": 5.397563990684774e-05, "loss": 1.0522, "step": 49 }, { "epoch": 0.17152658662092624, "grad_norm": 0.7731658816337585, "learning_rate": 5.367421528189181e-05, "loss": 1.157, "step": 50 }, { "epoch": 0.17152658662092624, "eval_loss": 1.082780122756958, "eval_runtime": 37.0843, "eval_samples_per_second": 13.24, "eval_steps_per_second": 3.317, "step": 50 }, { "epoch": 0.17495711835334476, "grad_norm": 0.5998779535293579, "learning_rate": 5.336631836286338e-05, "loss": 1.1837, "step": 51 }, { "epoch": 0.1783876500857633, "grad_norm": 0.5658138394355774, "learning_rate": 5.3052033325705774e-05, "loss": 1.2262, "step": 52 }, { "epoch": 0.18181818181818182, "grad_norm": 0.4508465826511383, "learning_rate": 5.2731446092810044e-05, "loss": 1.1704, "step": 53 }, { "epoch": 0.18524871355060035, "grad_norm": 0.3549885153770447, "learning_rate": 5.240464430952462e-05, "loss": 1.1425, "step": 54 }, { "epoch": 0.18867924528301888, "grad_norm": 0.29392892122268677, "learning_rate": 5.207171732019395e-05, "loss": 1.1385, "step": 55 }, { "epoch": 0.19210977701543738, "grad_norm": 0.25449082255363464, "learning_rate": 5.1732756143732675e-05, "loss": 1.1097, "step": 56 }, { "epoch": 0.1955403087478559, "grad_norm": 0.23871001601219177, "learning_rate": 5.1387853448741916e-05, "loss": 1.0445, "step": 57 }, { "epoch": 0.19897084048027444, "grad_norm": 0.2559022009372711, "learning_rate": 5.103710352817465e-05, "loss": 1.1335, "step": 58 }, { "epoch": 0.20240137221269297, "grad_norm": 0.26953640580177307, "learning_rate": 5.068060227355698e-05, "loss": 1.1055, "step": 59 }, { "epoch": 0.2058319039451115, "grad_norm": 0.2536450922489166, "learning_rate": 5.0318447148772234e-05, "loss": 1.0726, "step": 60 }, { "epoch": 0.20926243567753003, "grad_norm": 0.2779094874858856, "learning_rate": 4.995073716341545e-05, "loss": 1.0435, "step": 61 }, { "epoch": 0.21269296740994853, "grad_norm": 0.2876923382282257, "learning_rate": 4.957757284572506e-05, "loss": 1.1535, "step": 62 }, { "epoch": 0.21612349914236706, "grad_norm": 0.2710215449333191, "learning_rate": 4.91990562150995e-05, "loss": 1.0925, "step": 63 }, { "epoch": 0.2195540308747856, "grad_norm": 0.2818925976753235, "learning_rate": 4.881529075420611e-05, "loss": 1.0242, "step": 64 }, { "epoch": 0.22298456260720412, "grad_norm": 0.28108134865760803, "learning_rate": 4.8426381380690036e-05, "loss": 1.0035, "step": 65 }, { "epoch": 0.22641509433962265, "grad_norm": 0.2871432304382324, "learning_rate": 4.8032434418490753e-05, "loss": 0.9466, "step": 66 }, { "epoch": 0.22984562607204118, "grad_norm": 0.29641860723495483, "learning_rate": 4.7633557568774194e-05, "loss": 1.0229, "step": 67 }, { "epoch": 0.2332761578044597, "grad_norm": 0.3117521107196808, "learning_rate": 4.722985988048831e-05, "loss": 1.0753, "step": 68 }, { "epoch": 0.2367066895368782, "grad_norm": 0.29570478200912476, "learning_rate": 4.6821451720550184e-05, "loss": 1.0407, "step": 69 }, { "epoch": 0.24013722126929674, "grad_norm": 0.30424389243125916, "learning_rate": 4.640844474367282e-05, "loss": 1.0567, "step": 70 }, { "epoch": 0.24356775300171526, "grad_norm": 0.3210867941379547, "learning_rate": 4.5990951861839815e-05, "loss": 1.1198, "step": 71 }, { "epoch": 0.2469982847341338, "grad_norm": 0.30303463339805603, "learning_rate": 4.5569087213436455e-05, "loss": 0.975, "step": 72 }, { "epoch": 0.2504288164665523, "grad_norm": 0.3214765787124634, "learning_rate": 4.514296613204532e-05, "loss": 0.9588, "step": 73 }, { "epoch": 0.2538593481989708, "grad_norm": 0.31114473938941956, "learning_rate": 4.471270511491525e-05, "loss": 0.9988, "step": 74 }, { "epoch": 0.25728987993138935, "grad_norm": 0.3351743817329407, "learning_rate": 4.427842179111221e-05, "loss": 0.9991, "step": 75 }, { "epoch": 0.2607204116638079, "grad_norm": 0.34250015020370483, "learning_rate": 4.3840234889360634e-05, "loss": 1.0851, "step": 76 }, { "epoch": 0.2641509433962264, "grad_norm": 0.3616669476032257, "learning_rate": 4.33982642055842e-05, "loss": 1.0118, "step": 77 }, { "epoch": 0.26758147512864494, "grad_norm": 0.33308422565460205, "learning_rate": 4.2952630570154785e-05, "loss": 1.0029, "step": 78 }, { "epoch": 0.27101200686106347, "grad_norm": 0.36905744671821594, "learning_rate": 4.250345581485871e-05, "loss": 1.0374, "step": 79 }, { "epoch": 0.274442538593482, "grad_norm": 0.33839741349220276, "learning_rate": 4.205086273958909e-05, "loss": 1.0069, "step": 80 }, { "epoch": 0.27787307032590053, "grad_norm": 0.3820783495903015, "learning_rate": 4.1594975078773565e-05, "loss": 1.0584, "step": 81 }, { "epoch": 0.28130360205831906, "grad_norm": 0.3676692247390747, "learning_rate": 4.113591746754662e-05, "loss": 0.9845, "step": 82 }, { "epoch": 0.2847341337907376, "grad_norm": 0.3857022523880005, "learning_rate": 4.06738154076755e-05, "loss": 0.9866, "step": 83 }, { "epoch": 0.2881646655231561, "grad_norm": 0.3994433879852295, "learning_rate": 4.020879523324929e-05, "loss": 1.0072, "step": 84 }, { "epoch": 0.2915951972555746, "grad_norm": 0.37817177176475525, "learning_rate": 3.974098407614051e-05, "loss": 0.9437, "step": 85 }, { "epoch": 0.2950257289879931, "grad_norm": 0.400671124458313, "learning_rate": 3.927050983124842e-05, "loss": 0.9925, "step": 86 }, { "epoch": 0.29845626072041165, "grad_norm": 0.4129525125026703, "learning_rate": 3.8797501121533946e-05, "loss": 0.9855, "step": 87 }, { "epoch": 0.3018867924528302, "grad_norm": 0.44146278500556946, "learning_rate": 3.832208726285534e-05, "loss": 1.0223, "step": 88 }, { "epoch": 0.3053173241852487, "grad_norm": 0.45769569277763367, "learning_rate": 3.784439822861459e-05, "loss": 1.0432, "step": 89 }, { "epoch": 0.30874785591766724, "grad_norm": 0.47213757038116455, "learning_rate": 3.7364564614223976e-05, "loss": 1.0935, "step": 90 }, { "epoch": 0.31217838765008576, "grad_norm": 0.45397675037384033, "learning_rate": 3.688271760140255e-05, "loss": 0.9947, "step": 91 }, { "epoch": 0.3156089193825043, "grad_norm": 0.49769651889801025, "learning_rate": 3.6398988922312406e-05, "loss": 1.0535, "step": 92 }, { "epoch": 0.3190394511149228, "grad_norm": 0.473308265209198, "learning_rate": 3.591351082354441e-05, "loss": 1.0343, "step": 93 }, { "epoch": 0.32246998284734135, "grad_norm": 0.5368250012397766, "learning_rate": 3.54264160299633e-05, "loss": 0.9386, "step": 94 }, { "epoch": 0.3259005145797599, "grad_norm": 0.5603958368301392, "learning_rate": 3.493783770842202e-05, "loss": 1.0444, "step": 95 }, { "epoch": 0.3293310463121784, "grad_norm": 0.5480132102966309, "learning_rate": 3.444790943135526e-05, "loss": 1.0075, "step": 96 }, { "epoch": 0.33276157804459694, "grad_norm": 0.549180805683136, "learning_rate": 3.3956765140262074e-05, "loss": 0.9175, "step": 97 }, { "epoch": 0.3361921097770154, "grad_norm": 0.5887949466705322, "learning_rate": 3.346453910908759e-05, "loss": 0.9748, "step": 98 }, { "epoch": 0.33962264150943394, "grad_norm": 0.745449423789978, "learning_rate": 3.297136590751389e-05, "loss": 1.0102, "step": 99 }, { "epoch": 0.34305317324185247, "grad_norm": 0.8202319145202637, "learning_rate": 3.247738036416998e-05, "loss": 1.0495, "step": 100 }, { "epoch": 0.34305317324185247, "eval_loss": 1.0051817893981934, "eval_runtime": 36.8152, "eval_samples_per_second": 13.337, "eval_steps_per_second": 3.341, "step": 100 }, { "epoch": 0.346483704974271, "grad_norm": 0.36224040389060974, "learning_rate": 3.1982717529770985e-05, "loss": 1.0916, "step": 101 }, { "epoch": 0.34991423670668953, "grad_norm": 0.38614827394485474, "learning_rate": 3.148751264019667e-05, "loss": 1.135, "step": 102 }, { "epoch": 0.35334476843910806, "grad_norm": 0.3652215600013733, "learning_rate": 3.099190107951924e-05, "loss": 1.0972, "step": 103 }, { "epoch": 0.3567753001715266, "grad_norm": 0.3642301559448242, "learning_rate": 3.049601834299076e-05, "loss": 1.0843, "step": 104 }, { "epoch": 0.3602058319039451, "grad_norm": 0.3324933350086212, "learning_rate": 3e-05, "loss": 1.1357, "step": 105 }, { "epoch": 0.36363636363636365, "grad_norm": 0.32382386922836304, "learning_rate": 2.9503981657009246e-05, "loss": 1.0928, "step": 106 }, { "epoch": 0.3670668953687822, "grad_norm": 0.31067949533462524, "learning_rate": 2.9008098920480752e-05, "loss": 1.0738, "step": 107 }, { "epoch": 0.3704974271012007, "grad_norm": 0.3035469055175781, "learning_rate": 2.851248735980333e-05, "loss": 1.0208, "step": 108 }, { "epoch": 0.37392795883361923, "grad_norm": 0.3173302412033081, "learning_rate": 2.801728247022902e-05, "loss": 1.0499, "step": 109 }, { "epoch": 0.37735849056603776, "grad_norm": 0.30463680624961853, "learning_rate": 2.7522619635830034e-05, "loss": 1.0523, "step": 110 }, { "epoch": 0.38078902229845624, "grad_norm": 0.3191668689250946, "learning_rate": 2.702863409248612e-05, "loss": 1.0131, "step": 111 }, { "epoch": 0.38421955403087477, "grad_norm": 0.3322906494140625, "learning_rate": 2.6535460890912416e-05, "loss": 0.944, "step": 112 }, { "epoch": 0.3876500857632933, "grad_norm": 0.3359247148036957, "learning_rate": 2.604323485973793e-05, "loss": 0.9608, "step": 113 }, { "epoch": 0.3910806174957118, "grad_norm": 0.32519465684890747, "learning_rate": 2.555209056864474e-05, "loss": 0.969, "step": 114 }, { "epoch": 0.39451114922813035, "grad_norm": 0.3210972547531128, "learning_rate": 2.5062162291577978e-05, "loss": 1.0239, "step": 115 }, { "epoch": 0.3979416809605489, "grad_norm": 0.323660671710968, "learning_rate": 2.4573583970036712e-05, "loss": 1.0374, "step": 116 }, { "epoch": 0.4013722126929674, "grad_norm": 0.3377305567264557, "learning_rate": 2.4086489176455595e-05, "loss": 0.9631, "step": 117 }, { "epoch": 0.40480274442538594, "grad_norm": 0.33635398745536804, "learning_rate": 2.36010110776876e-05, "loss": 1.0388, "step": 118 }, { "epoch": 0.40823327615780447, "grad_norm": 0.3557407259941101, "learning_rate": 2.3117282398597456e-05, "loss": 0.9769, "step": 119 }, { "epoch": 0.411663807890223, "grad_norm": 0.34973031282424927, "learning_rate": 2.263543538577603e-05, "loss": 1.0182, "step": 120 }, { "epoch": 0.41509433962264153, "grad_norm": 0.3528589606285095, "learning_rate": 2.215560177138541e-05, "loss": 0.9451, "step": 121 }, { "epoch": 0.41852487135506006, "grad_norm": 0.38225072622299194, "learning_rate": 2.167791273714467e-05, "loss": 0.9343, "step": 122 }, { "epoch": 0.4219554030874786, "grad_norm": 0.41067805886268616, "learning_rate": 2.1202498878466062e-05, "loss": 1.0003, "step": 123 }, { "epoch": 0.42538593481989706, "grad_norm": 0.3684796094894409, "learning_rate": 2.072949016875158e-05, "loss": 0.9985, "step": 124 }, { "epoch": 0.4288164665523156, "grad_norm": 0.40370795130729675, "learning_rate": 2.0259015923859498e-05, "loss": 1.0254, "step": 125 }, { "epoch": 0.4322469982847341, "grad_norm": 0.40378862619400024, "learning_rate": 1.979120476675071e-05, "loss": 1.0512, "step": 126 }, { "epoch": 0.43567753001715265, "grad_norm": 0.40470102429389954, "learning_rate": 1.9326184592324503e-05, "loss": 0.9445, "step": 127 }, { "epoch": 0.4391080617495712, "grad_norm": 0.39239588379859924, "learning_rate": 1.8864082532453373e-05, "loss": 0.9897, "step": 128 }, { "epoch": 0.4425385934819897, "grad_norm": 0.4297294318675995, "learning_rate": 1.840502492122644e-05, "loss": 0.948, "step": 129 }, { "epoch": 0.44596912521440824, "grad_norm": 0.3933764398097992, "learning_rate": 1.7949137260410924e-05, "loss": 0.9737, "step": 130 }, { "epoch": 0.44939965694682676, "grad_norm": 0.3823975920677185, "learning_rate": 1.7496544185141295e-05, "loss": 0.9043, "step": 131 }, { "epoch": 0.4528301886792453, "grad_norm": 0.40711742639541626, "learning_rate": 1.7047369429845216e-05, "loss": 0.9259, "step": 132 }, { "epoch": 0.4562607204116638, "grad_norm": 0.42227280139923096, "learning_rate": 1.6601735794415806e-05, "loss": 0.9292, "step": 133 }, { "epoch": 0.45969125214408235, "grad_norm": 0.41876545548439026, "learning_rate": 1.615976511063937e-05, "loss": 0.9579, "step": 134 }, { "epoch": 0.4631217838765009, "grad_norm": 0.426393985748291, "learning_rate": 1.5721578208887793e-05, "loss": 0.9606, "step": 135 }, { "epoch": 0.4665523156089194, "grad_norm": 0.446198046207428, "learning_rate": 1.5287294885084766e-05, "loss": 0.9419, "step": 136 }, { "epoch": 0.4699828473413379, "grad_norm": 0.4536357522010803, "learning_rate": 1.4857033867954697e-05, "loss": 1.0326, "step": 137 }, { "epoch": 0.4734133790737564, "grad_norm": 0.45464301109313965, "learning_rate": 1.4430912786563554e-05, "loss": 1.0325, "step": 138 }, { "epoch": 0.47684391080617494, "grad_norm": 0.45512655377388, "learning_rate": 1.4009048138160195e-05, "loss": 0.9605, "step": 139 }, { "epoch": 0.48027444253859347, "grad_norm": 0.4851442575454712, "learning_rate": 1.3591555256327199e-05, "loss": 0.9561, "step": 140 }, { "epoch": 0.483704974271012, "grad_norm": 0.4672333300113678, "learning_rate": 1.3178548279449822e-05, "loss": 0.9497, "step": 141 }, { "epoch": 0.48713550600343053, "grad_norm": 0.49268803000450134, "learning_rate": 1.2770140119511693e-05, "loss": 0.9491, "step": 142 }, { "epoch": 0.49056603773584906, "grad_norm": 0.5017989277839661, "learning_rate": 1.2366442431225809e-05, "loss": 0.9614, "step": 143 }, { "epoch": 0.4939965694682676, "grad_norm": 0.5563336610794067, "learning_rate": 1.1967565581509248e-05, "loss": 0.995, "step": 144 }, { "epoch": 0.4974271012006861, "grad_norm": 0.5561904907226562, "learning_rate": 1.1573618619309965e-05, "loss": 1.0163, "step": 145 }, { "epoch": 0.5008576329331046, "grad_norm": 0.5744907855987549, "learning_rate": 1.1184709245793889e-05, "loss": 1.0965, "step": 146 }, { "epoch": 0.5042881646655232, "grad_norm": 0.6907095313072205, "learning_rate": 1.0800943784900502e-05, "loss": 0.9873, "step": 147 }, { "epoch": 0.5077186963979416, "grad_norm": 0.6119263768196106, "learning_rate": 1.042242715427494e-05, "loss": 0.9336, "step": 148 }, { "epoch": 0.5111492281303602, "grad_norm": 0.705530047416687, "learning_rate": 1.004926283658455e-05, "loss": 1.0054, "step": 149 }, { "epoch": 0.5145797598627787, "grad_norm": 1.1301898956298828, "learning_rate": 9.681552851227774e-06, "loss": 1.0328, "step": 150 }, { "epoch": 0.5145797598627787, "eval_loss": 0.9647265076637268, "eval_runtime": 37.0798, "eval_samples_per_second": 13.242, "eval_steps_per_second": 3.317, "step": 150 }, { "epoch": 0.5180102915951973, "grad_norm": 0.2788316011428833, "learning_rate": 9.319397726443026e-06, "loss": 1.0724, "step": 151 }, { "epoch": 0.5214408233276158, "grad_norm": 0.29611045122146606, "learning_rate": 8.962896471825342e-06, "loss": 1.1351, "step": 152 }, { "epoch": 0.5248713550600344, "grad_norm": 0.31029313802719116, "learning_rate": 8.61214655125809e-06, "loss": 1.0506, "step": 153 }, { "epoch": 0.5283018867924528, "grad_norm": 0.3131580352783203, "learning_rate": 8.267243856267331e-06, "loss": 1.0794, "step": 154 }, { "epoch": 0.5317324185248714, "grad_norm": 0.3210444152355194, "learning_rate": 7.928282679806052e-06, "loss": 1.0677, "step": 155 }, { "epoch": 0.5351629502572899, "grad_norm": 0.3272711932659149, "learning_rate": 7.595355690475393e-06, "loss": 1.0264, "step": 156 }, { "epoch": 0.5385934819897084, "grad_norm": 0.3380805253982544, "learning_rate": 7.268553907189964e-06, "loss": 1.0323, "step": 157 }, { "epoch": 0.5420240137221269, "grad_norm": 0.324920654296875, "learning_rate": 6.947966674294236e-06, "loss": 1.0284, "step": 158 }, { "epoch": 0.5454545454545454, "grad_norm": 0.34995001554489136, "learning_rate": 6.6336816371366305e-06, "loss": 0.9675, "step": 159 }, { "epoch": 0.548885077186964, "grad_norm": 0.3302232623100281, "learning_rate": 6.325784718108196e-06, "loss": 0.9964, "step": 160 }, { "epoch": 0.5523156089193825, "grad_norm": 0.34856972098350525, "learning_rate": 6.0243600931522595e-06, "loss": 0.9764, "step": 161 }, { "epoch": 0.5557461406518011, "grad_norm": 0.35645270347595215, "learning_rate": 5.72949016875158e-06, "loss": 1.0893, "step": 162 }, { "epoch": 0.5591766723842195, "grad_norm": 0.34165555238723755, "learning_rate": 5.44125555939923e-06, "loss": 1.0396, "step": 163 }, { "epoch": 0.5626072041166381, "grad_norm": 0.3658033013343811, "learning_rate": 5.159735065559399e-06, "loss": 0.9651, "step": 164 }, { "epoch": 0.5660377358490566, "grad_norm": 0.3548159599304199, "learning_rate": 4.885005652124144e-06, "loss": 1.0168, "step": 165 }, { "epoch": 0.5694682675814752, "grad_norm": 0.35155749320983887, "learning_rate": 4.617142427371934e-06, "loss": 0.9882, "step": 166 }, { "epoch": 0.5728987993138936, "grad_norm": 0.3642813265323639, "learning_rate": 4.3562186224338265e-06, "loss": 1.0211, "step": 167 }, { "epoch": 0.5763293310463122, "grad_norm": 0.36729952692985535, "learning_rate": 4.102305571272783e-06, "loss": 1.0002, "step": 168 }, { "epoch": 0.5797598627787307, "grad_norm": 0.3639819025993347, "learning_rate": 3.855472691181678e-06, "loss": 0.9742, "step": 169 }, { "epoch": 0.5831903945111492, "grad_norm": 0.3743532598018646, "learning_rate": 3.615787463805331e-06, "loss": 1.0119, "step": 170 }, { "epoch": 0.5866209262435678, "grad_norm": 0.39362025260925293, "learning_rate": 3.383315416691646e-06, "loss": 0.852, "step": 171 }, { "epoch": 0.5900514579759862, "grad_norm": 0.36990445852279663, "learning_rate": 3.158120105377096e-06, "loss": 1.0292, "step": 172 }, { "epoch": 0.5934819897084048, "grad_norm": 0.395361989736557, "learning_rate": 2.940263096011233e-06, "loss": 0.9459, "step": 173 }, { "epoch": 0.5969125214408233, "grad_norm": 0.38748112320899963, "learning_rate": 2.729803948525125e-06, "loss": 0.9552, "step": 174 }, { "epoch": 0.6003430531732419, "grad_norm": 0.3924119472503662, "learning_rate": 2.526800200348275e-06, "loss": 0.9526, "step": 175 }, { "epoch": 0.6037735849056604, "grad_norm": 0.39469361305236816, "learning_rate": 2.3313073506784575e-06, "loss": 0.9458, "step": 176 }, { "epoch": 0.6072041166380789, "grad_norm": 0.39666324853897095, "learning_rate": 2.143378845308791e-06, "loss": 0.9717, "step": 177 }, { "epoch": 0.6106346483704974, "grad_norm": 0.40818580985069275, "learning_rate": 1.9630660620161777e-06, "loss": 0.9296, "step": 178 }, { "epoch": 0.614065180102916, "grad_norm": 0.4014730155467987, "learning_rate": 1.790418296515165e-06, "loss": 1.0224, "step": 179 }, { "epoch": 0.6174957118353345, "grad_norm": 0.43228840827941895, "learning_rate": 1.625482748980961e-06, "loss": 0.9433, "step": 180 }, { "epoch": 0.6209262435677531, "grad_norm": 0.4143679141998291, "learning_rate": 1.4683045111453942e-06, "loss": 1.0152, "step": 181 }, { "epoch": 0.6243567753001715, "grad_norm": 0.4030207395553589, "learning_rate": 1.3189265539692707e-06, "loss": 0.9542, "step": 182 }, { "epoch": 0.62778730703259, "grad_norm": 0.44624990224838257, "learning_rate": 1.1773897158945557e-06, "loss": 0.9112, "step": 183 }, { "epoch": 0.6312178387650086, "grad_norm": 0.45174139738082886, "learning_rate": 1.0437326916795432e-06, "loss": 1.0063, "step": 184 }, { "epoch": 0.6346483704974271, "grad_norm": 0.4539107382297516, "learning_rate": 9.179920218200888e-07, "loss": 0.9326, "step": 185 }, { "epoch": 0.6380789022298456, "grad_norm": 0.4369543492794037, "learning_rate": 8.002020825598277e-07, "loss": 0.9534, "step": 186 }, { "epoch": 0.6415094339622641, "grad_norm": 0.4899698495864868, "learning_rate": 6.90395076492022e-07, "loss": 1.0126, "step": 187 }, { "epoch": 0.6449399656946827, "grad_norm": 0.4699549973011017, "learning_rate": 5.886010237557194e-07, "loss": 0.9727, "step": 188 }, { "epoch": 0.6483704974271012, "grad_norm": 0.4589851498603821, "learning_rate": 4.94847753828529e-07, "loss": 0.9804, "step": 189 }, { "epoch": 0.6518010291595198, "grad_norm": 0.5151140689849854, "learning_rate": 4.091608979183303e-07, "loss": 0.9987, "step": 190 }, { "epoch": 0.6552315608919382, "grad_norm": 0.5203879475593567, "learning_rate": 3.315638819559452e-07, "loss": 0.9682, "step": 191 }, { "epoch": 0.6586620926243568, "grad_norm": 0.4911907911300659, "learning_rate": 2.6207792019074414e-07, "loss": 0.9684, "step": 192 }, { "epoch": 0.6620926243567753, "grad_norm": 0.5031991600990295, "learning_rate": 2.0072200939085573e-07, "loss": 0.9443, "step": 193 }, { "epoch": 0.6655231560891939, "grad_norm": 0.5311073064804077, "learning_rate": 1.475129236496575e-07, "loss": 0.9774, "step": 194 }, { "epoch": 0.6689536878216124, "grad_norm": 0.5446680188179016, "learning_rate": 1.0246520979990459e-07, "loss": 0.9561, "step": 195 }, { "epoch": 0.6723842195540308, "grad_norm": 0.5319830775260925, "learning_rate": 6.559118343676396e-08, "loss": 0.9256, "step": 196 }, { "epoch": 0.6758147512864494, "grad_norm": 0.5953107476234436, "learning_rate": 3.690092555085789e-08, "loss": 1.0225, "step": 197 }, { "epoch": 0.6792452830188679, "grad_norm": 0.6728432774543762, "learning_rate": 1.640227977221853e-08, "loss": 0.986, "step": 198 }, { "epoch": 0.6826758147512865, "grad_norm": 0.6387631893157959, "learning_rate": 4.1008502259298755e-09, "loss": 0.9947, "step": 199 }, { "epoch": 0.6861063464837049, "grad_norm": 0.9395211935043335, "learning_rate": 0.0, "loss": 0.9974, "step": 200 }, { "epoch": 0.6861063464837049, "eval_loss": 0.9573606848716736, "eval_runtime": 36.8437, "eval_samples_per_second": 13.327, "eval_steps_per_second": 3.338, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7608054677635072e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }