|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.6660306135147467, |
|
"eval_steps": 100, |
|
"global_step": 5020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0033186070147055774, |
|
"grad_norm": 4.456874370574951, |
|
"learning_rate": 2.9850746268656716e-06, |
|
"loss": 1.5353, |
|
"mean_token_accuracy": 0.6246657922863961, |
|
"num_tokens": 10478467.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006637214029411155, |
|
"grad_norm": 4.286330223083496, |
|
"learning_rate": 6.301824212271974e-06, |
|
"loss": 1.539, |
|
"mean_token_accuracy": 0.6244419991970063, |
|
"num_tokens": 20957957.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009955821044116731, |
|
"grad_norm": 3.0627472400665283, |
|
"learning_rate": 9.618573797678276e-06, |
|
"loss": 1.4971, |
|
"mean_token_accuracy": 0.632022830657661, |
|
"num_tokens": 31438289.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01327442805882231, |
|
"grad_norm": 1.7509390115737915, |
|
"learning_rate": 1.2935323383084577e-05, |
|
"loss": 1.4407, |
|
"mean_token_accuracy": 0.6434033919125796, |
|
"num_tokens": 41924049.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016593035073527888, |
|
"grad_norm": 0.8987186551094055, |
|
"learning_rate": 1.6252072968490882e-05, |
|
"loss": 1.343, |
|
"mean_token_accuracy": 0.6632689341902733, |
|
"num_tokens": 52403960.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.019911642088233462, |
|
"grad_norm": 0.532718300819397, |
|
"learning_rate": 1.956882255389718e-05, |
|
"loss": 1.2772, |
|
"mean_token_accuracy": 0.6764605836942792, |
|
"num_tokens": 62888044.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02323024910293904, |
|
"grad_norm": 0.2609790861606598, |
|
"learning_rate": 2.2885572139303486e-05, |
|
"loss": 1.2434, |
|
"mean_token_accuracy": 0.6825638456270099, |
|
"num_tokens": 73373804.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02654885611764462, |
|
"grad_norm": 0.18926776945590973, |
|
"learning_rate": 2.6202321724709784e-05, |
|
"loss": 1.2089, |
|
"mean_token_accuracy": 0.6879415104165674, |
|
"num_tokens": 83859564.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.029867463132350197, |
|
"grad_norm": 0.12605148553848267, |
|
"learning_rate": 2.9519071310116086e-05, |
|
"loss": 1.1813, |
|
"mean_token_accuracy": 0.6934926675632596, |
|
"num_tokens": 94341375.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.033186070147055775, |
|
"grad_norm": 0.11590579897165298, |
|
"learning_rate": 3.283582089552239e-05, |
|
"loss": 1.1752, |
|
"mean_token_accuracy": 0.693216635286808, |
|
"num_tokens": 104822223.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.036504677161761354, |
|
"grad_norm": 0.09547381848096848, |
|
"learning_rate": 3.6152570480928693e-05, |
|
"loss": 1.1604, |
|
"mean_token_accuracy": 0.6961587481200695, |
|
"num_tokens": 115300251.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.039823284176466925, |
|
"grad_norm": 0.0811898410320282, |
|
"learning_rate": 3.946932006633499e-05, |
|
"loss": 1.1467, |
|
"mean_token_accuracy": 0.6987002680078149, |
|
"num_tokens": 125786011.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0431418911911725, |
|
"grad_norm": 0.09140197932720184, |
|
"learning_rate": 4.27860696517413e-05, |
|
"loss": 1.1379, |
|
"mean_token_accuracy": 0.7001684376969933, |
|
"num_tokens": 136271771.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04646049820587808, |
|
"grad_norm": 0.08167432248592377, |
|
"learning_rate": 4.6102819237147596e-05, |
|
"loss": 1.1199, |
|
"mean_token_accuracy": 0.7031927773728966, |
|
"num_tokens": 146750501.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04977910522058366, |
|
"grad_norm": 0.0784958228468895, |
|
"learning_rate": 4.94195688225539e-05, |
|
"loss": 1.1163, |
|
"mean_token_accuracy": 0.7042481828480959, |
|
"num_tokens": 157236261.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05309771223528924, |
|
"grad_norm": 0.07430163770914078, |
|
"learning_rate": 5.2736318407960206e-05, |
|
"loss": 1.1156, |
|
"mean_token_accuracy": 0.7039731776341795, |
|
"num_tokens": 167707724.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.056416319249994816, |
|
"grad_norm": 0.060517650097608566, |
|
"learning_rate": 5.6053067993366505e-05, |
|
"loss": 1.0956, |
|
"mean_token_accuracy": 0.7088292304426431, |
|
"num_tokens": 178192873.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.059734926264700394, |
|
"grad_norm": 0.06856600195169449, |
|
"learning_rate": 5.9369817578772804e-05, |
|
"loss": 1.0927, |
|
"mean_token_accuracy": 0.7086629722267389, |
|
"num_tokens": 188669161.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06305353327940597, |
|
"grad_norm": 0.07093485444784164, |
|
"learning_rate": 6.268656716417911e-05, |
|
"loss": 1.0955, |
|
"mean_token_accuracy": 0.7079184643924237, |
|
"num_tokens": 199154921.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06637214029411155, |
|
"grad_norm": 0.05826742202043533, |
|
"learning_rate": 6.600331674958541e-05, |
|
"loss": 1.0832, |
|
"mean_token_accuracy": 0.7102724656462669, |
|
"num_tokens": 209640324.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06969074730881712, |
|
"grad_norm": 0.07690238952636719, |
|
"learning_rate": 6.93200663349917e-05, |
|
"loss": 1.0817, |
|
"mean_token_accuracy": 0.7107248041778803, |
|
"num_tokens": 220126084.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07300935432352271, |
|
"grad_norm": 0.06953492760658264, |
|
"learning_rate": 7.263681592039802e-05, |
|
"loss": 1.0682, |
|
"mean_token_accuracy": 0.713701151125133, |
|
"num_tokens": 230610135.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07632796133822828, |
|
"grad_norm": 0.06193999573588371, |
|
"learning_rate": 7.595356550580432e-05, |
|
"loss": 1.0681, |
|
"mean_token_accuracy": 0.7131396872922778, |
|
"num_tokens": 241083143.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07964656835293385, |
|
"grad_norm": 0.06369080394506454, |
|
"learning_rate": 7.927031509121063e-05, |
|
"loss": 1.0581, |
|
"mean_token_accuracy": 0.7152676824480295, |
|
"num_tokens": 251567950.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08296517536763943, |
|
"grad_norm": 0.06942826509475708, |
|
"learning_rate": 8.258706467661693e-05, |
|
"loss": 1.0594, |
|
"mean_token_accuracy": 0.7150783598423004, |
|
"num_tokens": 262047274.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.086283782382345, |
|
"grad_norm": 0.06275887042284012, |
|
"learning_rate": 8.590381426202321e-05, |
|
"loss": 1.056, |
|
"mean_token_accuracy": 0.7154936328530311, |
|
"num_tokens": 272528975.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08960238939705059, |
|
"grad_norm": 0.08972194045782089, |
|
"learning_rate": 8.922056384742952e-05, |
|
"loss": 1.0536, |
|
"mean_token_accuracy": 0.7161832038313151, |
|
"num_tokens": 283003396.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09292099641175616, |
|
"grad_norm": 0.08157947659492493, |
|
"learning_rate": 9.253731343283582e-05, |
|
"loss": 1.0425, |
|
"mean_token_accuracy": 0.7184201672673225, |
|
"num_tokens": 293478971.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09623960342646175, |
|
"grad_norm": 0.07844681292772293, |
|
"learning_rate": 9.585406301824212e-05, |
|
"loss": 1.038, |
|
"mean_token_accuracy": 0.7192981209605932, |
|
"num_tokens": 303963290.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09955821044116732, |
|
"grad_norm": 0.0755002573132515, |
|
"learning_rate": 9.917081260364843e-05, |
|
"loss": 1.0364, |
|
"mean_token_accuracy": 0.7191660417243838, |
|
"num_tokens": 314449050.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1028768174558729, |
|
"grad_norm": 0.08028624951839447, |
|
"learning_rate": 0.00010248756218905473, |
|
"loss": 1.0299, |
|
"mean_token_accuracy": 0.7209183381870389, |
|
"num_tokens": 324927560.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10619542447057848, |
|
"grad_norm": 0.10507698357105255, |
|
"learning_rate": 0.00010580431177446104, |
|
"loss": 1.031, |
|
"mean_token_accuracy": 0.7204138159751892, |
|
"num_tokens": 335407530.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10951403148528405, |
|
"grad_norm": 0.06311548501253128, |
|
"learning_rate": 0.00010912106135986733, |
|
"loss": 1.0146, |
|
"mean_token_accuracy": 0.7238196378573776, |
|
"num_tokens": 345873027.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11283263849998963, |
|
"grad_norm": 0.08195403218269348, |
|
"learning_rate": 0.00011243781094527364, |
|
"loss": 1.0224, |
|
"mean_token_accuracy": 0.7224542181938887, |
|
"num_tokens": 356354384.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1161512455146952, |
|
"grad_norm": 0.08367642015218735, |
|
"learning_rate": 0.00011575456053067994, |
|
"loss": 1.0217, |
|
"mean_token_accuracy": 0.7223985634744168, |
|
"num_tokens": 366830653.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11946985252940079, |
|
"grad_norm": 0.10050886124372482, |
|
"learning_rate": 0.00011907131011608624, |
|
"loss": 1.0191, |
|
"mean_token_accuracy": 0.7234168009832501, |
|
"num_tokens": 377316413.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12278845954410636, |
|
"grad_norm": 0.07655831426382065, |
|
"learning_rate": 0.00012238805970149255, |
|
"loss": 1.0148, |
|
"mean_token_accuracy": 0.7238867349922657, |
|
"num_tokens": 387802173.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12610706655881193, |
|
"grad_norm": 0.0980726107954979, |
|
"learning_rate": 0.00012570480928689886, |
|
"loss": 1.008, |
|
"mean_token_accuracy": 0.7251724308356643, |
|
"num_tokens": 398281916.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1294256735735175, |
|
"grad_norm": 0.06910485029220581, |
|
"learning_rate": 0.00012902155887230515, |
|
"loss": 1.0041, |
|
"mean_token_accuracy": 0.7259774435311556, |
|
"num_tokens": 408763403.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1327442805882231, |
|
"grad_norm": 0.06646759063005447, |
|
"learning_rate": 0.00013233830845771146, |
|
"loss": 0.9998, |
|
"mean_token_accuracy": 0.7269706262275577, |
|
"num_tokens": 419249163.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13606288760292867, |
|
"grad_norm": 0.0789259672164917, |
|
"learning_rate": 0.00013565505804311774, |
|
"loss": 0.9958, |
|
"mean_token_accuracy": 0.7277587926015258, |
|
"num_tokens": 429719364.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13938149461763424, |
|
"grad_norm": 0.18468795716762543, |
|
"learning_rate": 0.00013897180762852403, |
|
"loss": 1.0021, |
|
"mean_token_accuracy": 0.7267118034884333, |
|
"num_tokens": 440201684.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14270010163233982, |
|
"grad_norm": 0.08505365252494812, |
|
"learning_rate": 0.00014228855721393034, |
|
"loss": 0.9932, |
|
"mean_token_accuracy": 0.728511150740087, |
|
"num_tokens": 450685358.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.14601870864704541, |
|
"grad_norm": 0.11861028522253036, |
|
"learning_rate": 0.00014560530679933665, |
|
"loss": 0.986, |
|
"mean_token_accuracy": 0.7299669992178679, |
|
"num_tokens": 461157458.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14933731566175099, |
|
"grad_norm": 0.15240196883678436, |
|
"learning_rate": 0.00014892205638474297, |
|
"loss": 0.9923, |
|
"mean_token_accuracy": 0.7286881178617477, |
|
"num_tokens": 471643218.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15265592267645656, |
|
"grad_norm": 0.07285284996032715, |
|
"learning_rate": 0.00015223880597014925, |
|
"loss": 0.9918, |
|
"mean_token_accuracy": 0.728894804045558, |
|
"num_tokens": 482128978.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.15597452969116213, |
|
"grad_norm": 0.12552587687969208, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 0.9819, |
|
"mean_token_accuracy": 0.730852185562253, |
|
"num_tokens": 492607364.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1592931367058677, |
|
"grad_norm": 0.11646983027458191, |
|
"learning_rate": 0.00015887230514096188, |
|
"loss": 0.9866, |
|
"mean_token_accuracy": 0.7297255454584957, |
|
"num_tokens": 10469538.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1626117437205733, |
|
"grad_norm": 0.11252231895923615, |
|
"learning_rate": 0.00016218905472636816, |
|
"loss": 0.9854, |
|
"mean_token_accuracy": 0.7299044447019696, |
|
"num_tokens": 20950616.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16593035073527887, |
|
"grad_norm": 0.20020079612731934, |
|
"learning_rate": 0.00016550580431177447, |
|
"loss": 0.9855, |
|
"mean_token_accuracy": 0.7301053939387202, |
|
"num_tokens": 31431941.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16924895774998444, |
|
"grad_norm": 0.13659614324569702, |
|
"learning_rate": 0.00016882255389718078, |
|
"loss": 0.966, |
|
"mean_token_accuracy": 0.7341468974947929, |
|
"num_tokens": 41893875.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.17256756476469, |
|
"grad_norm": 0.07486096769571304, |
|
"learning_rate": 0.00017213930348258707, |
|
"loss": 0.9701, |
|
"mean_token_accuracy": 0.733239185065031, |
|
"num_tokens": 52370633.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1758861717793956, |
|
"grad_norm": 0.11297636479139328, |
|
"learning_rate": 0.00017545605306799338, |
|
"loss": 0.9709, |
|
"mean_token_accuracy": 0.7331767013296485, |
|
"num_tokens": 62846694.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.17920477879410118, |
|
"grad_norm": 0.11334758251905441, |
|
"learning_rate": 0.0001787728026533997, |
|
"loss": 0.968, |
|
"mean_token_accuracy": 0.7332857694476843, |
|
"num_tokens": 73332454.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.18252338580880675, |
|
"grad_norm": 0.13509759306907654, |
|
"learning_rate": 0.00018208955223880598, |
|
"loss": 0.9672, |
|
"mean_token_accuracy": 0.7335175754502415, |
|
"num_tokens": 83811602.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18584199282351233, |
|
"grad_norm": 0.16971918940544128, |
|
"learning_rate": 0.0001854063018242123, |
|
"loss": 0.9741, |
|
"mean_token_accuracy": 0.7320338256657124, |
|
"num_tokens": 94297362.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1891605998382179, |
|
"grad_norm": 0.15524017810821533, |
|
"learning_rate": 0.0001887230514096186, |
|
"loss": 0.9581, |
|
"mean_token_accuracy": 0.735401445068419, |
|
"num_tokens": 104782926.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1924792068529235, |
|
"grad_norm": 0.12434681504964828, |
|
"learning_rate": 0.0001920398009950249, |
|
"loss": 0.9548, |
|
"mean_token_accuracy": 0.7366825319826603, |
|
"num_tokens": 115268218.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.19579781386762907, |
|
"grad_norm": 0.11100148409605026, |
|
"learning_rate": 0.00019535655058043117, |
|
"loss": 0.949, |
|
"mean_token_accuracy": 0.7375084878876805, |
|
"num_tokens": 125753978.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.19911642088233464, |
|
"grad_norm": 0.14663784205913544, |
|
"learning_rate": 0.00019867330016583748, |
|
"loss": 0.9584, |
|
"mean_token_accuracy": 0.7354120567440987, |
|
"num_tokens": 136239738.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2024350278970402, |
|
"grad_norm": 0.09907996654510498, |
|
"learning_rate": 0.00019999945633031953, |
|
"loss": 0.9525, |
|
"mean_token_accuracy": 0.7368813240900636, |
|
"num_tokens": 146720869.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2057536349117458, |
|
"grad_norm": 0.1264956295490265, |
|
"learning_rate": 0.00019999613392828115, |
|
"loss": 0.9502, |
|
"mean_token_accuracy": 0.7371340295299887, |
|
"num_tokens": 157206629.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.20907224192645138, |
|
"grad_norm": 0.09327522665262222, |
|
"learning_rate": 0.00019998979127427963, |
|
"loss": 0.9493, |
|
"mean_token_accuracy": 0.7376966508105397, |
|
"num_tokens": 167683348.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.21239084894115695, |
|
"grad_norm": 0.10375913232564926, |
|
"learning_rate": 0.00019998042858117328, |
|
"loss": 0.9528, |
|
"mean_token_accuracy": 0.7371220109984279, |
|
"num_tokens": 178169108.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.21570945595586252, |
|
"grad_norm": 0.09532920271158218, |
|
"learning_rate": 0.00019996804616317213, |
|
"loss": 0.9397, |
|
"mean_token_accuracy": 0.7393126789480448, |
|
"num_tokens": 188654868.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2190280629705681, |
|
"grad_norm": 0.07365228980779648, |
|
"learning_rate": 0.0001999526444358276, |
|
"loss": 0.9428, |
|
"mean_token_accuracy": 0.7387492464855313, |
|
"num_tokens": 199132993.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2223466699852737, |
|
"grad_norm": 0.09153572469949722, |
|
"learning_rate": 0.0001999342239160185, |
|
"loss": 0.9458, |
|
"mean_token_accuracy": 0.7383740542456507, |
|
"num_tokens": 209618753.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.22566527699997926, |
|
"grad_norm": 0.11583089828491211, |
|
"learning_rate": 0.00019991278522193379, |
|
"loss": 0.939, |
|
"mean_token_accuracy": 0.7398658065125346, |
|
"num_tokens": 220096565.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.22898388401468484, |
|
"grad_norm": 0.11790600419044495, |
|
"learning_rate": 0.0001998883290730517, |
|
"loss": 0.9346, |
|
"mean_token_accuracy": 0.7406760269775987, |
|
"num_tokens": 230582325.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2323024910293904, |
|
"grad_norm": 0.10092464089393616, |
|
"learning_rate": 0.00019986085629011556, |
|
"loss": 0.9413, |
|
"mean_token_accuracy": 0.7393560750409961, |
|
"num_tokens": 241068085.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.23562109804409598, |
|
"grad_norm": 0.116122305393219, |
|
"learning_rate": 0.00019983036779510634, |
|
"loss": 0.9328, |
|
"mean_token_accuracy": 0.7409803809598088, |
|
"num_tokens": 251553845.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.23893970505880158, |
|
"grad_norm": 0.08817829936742783, |
|
"learning_rate": 0.00019979686461121186, |
|
"loss": 0.9309, |
|
"mean_token_accuracy": 0.7414666019380093, |
|
"num_tokens": 262031432.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.24225831207350715, |
|
"grad_norm": 0.09951848536729813, |
|
"learning_rate": 0.0001997603478627921, |
|
"loss": 0.9343, |
|
"mean_token_accuracy": 0.7407814783975482, |
|
"num_tokens": 272498214.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.24557691908821272, |
|
"grad_norm": 0.11073072999715805, |
|
"learning_rate": 0.0001997208187753417, |
|
"loss": 0.9356, |
|
"mean_token_accuracy": 0.7404868887737394, |
|
"num_tokens": 282983974.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2488955261029183, |
|
"grad_norm": 0.07899338752031326, |
|
"learning_rate": 0.00019967827867544883, |
|
"loss": 0.928, |
|
"mean_token_accuracy": 0.7419990302994848, |
|
"num_tokens": 293469734.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.25221413311762386, |
|
"grad_norm": 0.06835592538118362, |
|
"learning_rate": 0.00019963272899075054, |
|
"loss": 0.9308, |
|
"mean_token_accuracy": 0.7415564397349954, |
|
"num_tokens": 303948769.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.25553274013232946, |
|
"grad_norm": 0.07545626163482666, |
|
"learning_rate": 0.00019958417124988503, |
|
"loss": 0.9259, |
|
"mean_token_accuracy": 0.7425217816606164, |
|
"num_tokens": 314427690.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.258851347147035, |
|
"grad_norm": 0.13108845055103302, |
|
"learning_rate": 0.00019953260708244027, |
|
"loss": 0.9191, |
|
"mean_token_accuracy": 0.7440043790265918, |
|
"num_tokens": 324907918.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2621699541617406, |
|
"grad_norm": 0.09534457325935364, |
|
"learning_rate": 0.00019947803821889913, |
|
"loss": 0.915, |
|
"mean_token_accuracy": 0.7448834925889969, |
|
"num_tokens": 335374336.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2654885611764462, |
|
"grad_norm": 0.07554920017719269, |
|
"learning_rate": 0.00019942046649058166, |
|
"loss": 0.9227, |
|
"mean_token_accuracy": 0.7431973684579134, |
|
"num_tokens": 345835121.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26880716819115175, |
|
"grad_norm": 0.09551262110471725, |
|
"learning_rate": 0.00019935989382958335, |
|
"loss": 0.9209, |
|
"mean_token_accuracy": 0.7434859313070774, |
|
"num_tokens": 356310668.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.27212577520585735, |
|
"grad_norm": 0.09637200087308884, |
|
"learning_rate": 0.00019929632226871035, |
|
"loss": 0.9148, |
|
"mean_token_accuracy": 0.7451079055666924, |
|
"num_tokens": 366789361.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.27544438222056294, |
|
"grad_norm": 0.09493401646614075, |
|
"learning_rate": 0.00019922975394141135, |
|
"loss": 0.9262, |
|
"mean_token_accuracy": 0.7423798104748129, |
|
"num_tokens": 377268594.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2787629892352685, |
|
"grad_norm": 0.12235710024833679, |
|
"learning_rate": 0.00019916019108170586, |
|
"loss": 0.9084, |
|
"mean_token_accuracy": 0.7457977183163166, |
|
"num_tokens": 387743774.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2820815962499741, |
|
"grad_norm": 0.13307656347751617, |
|
"learning_rate": 0.0001990876360241094, |
|
"loss": 0.9156, |
|
"mean_token_accuracy": 0.7445944549515844, |
|
"num_tokens": 10472523.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.28540020326467963, |
|
"grad_norm": 0.1116153821349144, |
|
"learning_rate": 0.00019901209120355484, |
|
"loss": 0.9135, |
|
"mean_token_accuracy": 0.745184931717813, |
|
"num_tokens": 20945510.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.28871881027938523, |
|
"grad_norm": 0.1001197025179863, |
|
"learning_rate": 0.0001989335591553111, |
|
"loss": 0.9016, |
|
"mean_token_accuracy": 0.7475148588418961, |
|
"num_tokens": 31425559.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.29203741729409083, |
|
"grad_norm": 0.11557689309120178, |
|
"learning_rate": 0.00019885204251489768, |
|
"loss": 0.9104, |
|
"mean_token_accuracy": 0.7455625429749488, |
|
"num_tokens": 41903088.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.29535602430879637, |
|
"grad_norm": 0.11414580792188644, |
|
"learning_rate": 0.00019876754401799648, |
|
"loss": 0.9103, |
|
"mean_token_accuracy": 0.7457076044753194, |
|
"num_tokens": 52384135.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.29867463132350197, |
|
"grad_norm": 0.1487276554107666, |
|
"learning_rate": 0.00019868006650035992, |
|
"loss": 0.9034, |
|
"mean_token_accuracy": 0.7474555226042867, |
|
"num_tokens": 62849020.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3019932383382075, |
|
"grad_norm": 0.24512167274951935, |
|
"learning_rate": 0.0001985896128977157, |
|
"loss": 0.9022, |
|
"mean_token_accuracy": 0.7475779382511973, |
|
"num_tokens": 73330811.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.3053118453529131, |
|
"grad_norm": 0.09478301554918289, |
|
"learning_rate": 0.00019849618624566832, |
|
"loss": 0.9181, |
|
"mean_token_accuracy": 0.744060843065381, |
|
"num_tokens": 83806268.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3086304523676187, |
|
"grad_norm": 0.09469889849424362, |
|
"learning_rate": 0.00019839978967959728, |
|
"loss": 0.9121, |
|
"mean_token_accuracy": 0.7452088924124837, |
|
"num_tokens": 94291147.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.31194905938232426, |
|
"grad_norm": 0.1338571161031723, |
|
"learning_rate": 0.00019830042643455165, |
|
"loss": 0.9057, |
|
"mean_token_accuracy": 0.7466160029172897, |
|
"num_tokens": 104774123.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.31526766639702986, |
|
"grad_norm": 0.124733105301857, |
|
"learning_rate": 0.00019819809984514183, |
|
"loss": 0.8991, |
|
"mean_token_accuracy": 0.7479092827066779, |
|
"num_tokens": 115255061.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3185862734117354, |
|
"grad_norm": 0.11022835224866867, |
|
"learning_rate": 0.00019809281334542736, |
|
"loss": 0.8941, |
|
"mean_token_accuracy": 0.7492923114448786, |
|
"num_tokens": 125734347.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.321904880426441, |
|
"grad_norm": 0.08194189518690109, |
|
"learning_rate": 0.00019798457046880168, |
|
"loss": 0.8987, |
|
"mean_token_accuracy": 0.7483082629740239, |
|
"num_tokens": 136220107.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3252234874411466, |
|
"grad_norm": 0.13797785341739655, |
|
"learning_rate": 0.00019787337484787385, |
|
"loss": 0.8995, |
|
"mean_token_accuracy": 0.748322376422584, |
|
"num_tokens": 146705867.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.32854209445585214, |
|
"grad_norm": 0.13881815969944, |
|
"learning_rate": 0.00019775923021434622, |
|
"loss": 0.8928, |
|
"mean_token_accuracy": 0.7496528748422862, |
|
"num_tokens": 157180049.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.33186070147055774, |
|
"grad_norm": 0.0982946902513504, |
|
"learning_rate": 0.0001976421403988895, |
|
"loss": 0.8986, |
|
"mean_token_accuracy": 0.7486499631777406, |
|
"num_tokens": 167661741.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3351793084852633, |
|
"grad_norm": 0.1060449555516243, |
|
"learning_rate": 0.00019752210933101416, |
|
"loss": 0.9011, |
|
"mean_token_accuracy": 0.7479203771799803, |
|
"num_tokens": 178132832.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3384979154999689, |
|
"grad_norm": 0.14371642470359802, |
|
"learning_rate": 0.00019739914103893838, |
|
"loss": 0.899, |
|
"mean_token_accuracy": 0.7481364840641618, |
|
"num_tokens": 188618592.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.3418165225146745, |
|
"grad_norm": 0.08715486526489258, |
|
"learning_rate": 0.0001972732396494531, |
|
"loss": 0.8967, |
|
"mean_token_accuracy": 0.7492184940725565, |
|
"num_tokens": 199096729.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.34513512952938, |
|
"grad_norm": 0.12464672327041626, |
|
"learning_rate": 0.00019714440938778328, |
|
"loss": 0.8864, |
|
"mean_token_accuracy": 0.7509257266297936, |
|
"num_tokens": 209565027.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3484537365440856, |
|
"grad_norm": 0.1320679634809494, |
|
"learning_rate": 0.0001970126545774464, |
|
"loss": 0.8909, |
|
"mean_token_accuracy": 0.7498966613784432, |
|
"num_tokens": 220044714.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3517723435587912, |
|
"grad_norm": 0.12225856631994247, |
|
"learning_rate": 0.00019687797964010708, |
|
"loss": 0.8878, |
|
"mean_token_accuracy": 0.7507304925471544, |
|
"num_tokens": 230529041.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.35509095057349677, |
|
"grad_norm": 0.12753193080425262, |
|
"learning_rate": 0.00019674038909542888, |
|
"loss": 0.8913, |
|
"mean_token_accuracy": 0.7500258840620517, |
|
"num_tokens": 241009835.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.35840955758820237, |
|
"grad_norm": 0.12206187844276428, |
|
"learning_rate": 0.00019659988756092246, |
|
"loss": 0.8922, |
|
"mean_token_accuracy": 0.7499410387128591, |
|
"num_tokens": 251493111.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3617281646029079, |
|
"grad_norm": 0.07569900155067444, |
|
"learning_rate": 0.00019645647975179077, |
|
"loss": 0.8884, |
|
"mean_token_accuracy": 0.7509477300569415, |
|
"num_tokens": 261972173.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3650467716176135, |
|
"grad_norm": 0.07960104942321777, |
|
"learning_rate": 0.00019631017048077077, |
|
"loss": 0.8865, |
|
"mean_token_accuracy": 0.7507594024762512, |
|
"num_tokens": 272450445.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3683653786323191, |
|
"grad_norm": 0.091305673122406, |
|
"learning_rate": 0.00019616096465797185, |
|
"loss": 0.8903, |
|
"mean_token_accuracy": 0.7503410935401916, |
|
"num_tokens": 282933131.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.37168398564702465, |
|
"grad_norm": 0.12274038791656494, |
|
"learning_rate": 0.00019600886729071108, |
|
"loss": 0.8831, |
|
"mean_token_accuracy": 0.7516113016754389, |
|
"num_tokens": 293417228.0, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.37500259266173025, |
|
"grad_norm": 0.13974039256572723, |
|
"learning_rate": 0.0001958538834833452, |
|
"loss": 0.8842, |
|
"mean_token_accuracy": 0.7515292694792152, |
|
"num_tokens": 303891928.0, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3783211996764358, |
|
"grad_norm": 0.1255372166633606, |
|
"learning_rate": 0.0001956960184370993, |
|
"loss": 0.8883, |
|
"mean_token_accuracy": 0.7505260732024908, |
|
"num_tokens": 314377156.0, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3816398066911414, |
|
"grad_norm": 0.1027783751487732, |
|
"learning_rate": 0.0001955352774498922, |
|
"loss": 0.8882, |
|
"mean_token_accuracy": 0.7508626377210021, |
|
"num_tokens": 324862309.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.384958413705847, |
|
"grad_norm": 0.10859072208404541, |
|
"learning_rate": 0.0001953716659161588, |
|
"loss": 0.8872, |
|
"mean_token_accuracy": 0.7507986824959516, |
|
"num_tokens": 335334617.0, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.38827702072055253, |
|
"grad_norm": 0.11672750860452652, |
|
"learning_rate": 0.00019520518932666892, |
|
"loss": 0.8841, |
|
"mean_token_accuracy": 0.7513738388195634, |
|
"num_tokens": 345820377.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.39159562773525813, |
|
"grad_norm": 0.11614928394556046, |
|
"learning_rate": 0.00019503585326834303, |
|
"loss": 0.8779, |
|
"mean_token_accuracy": 0.7526221085339785, |
|
"num_tokens": 356293562.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3949142347499637, |
|
"grad_norm": 0.09835066646337509, |
|
"learning_rate": 0.0001948636634240649, |
|
"loss": 0.8783, |
|
"mean_token_accuracy": 0.7532607067376376, |
|
"num_tokens": 366774649.0, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3982328417646693, |
|
"grad_norm": 0.1638692319393158, |
|
"learning_rate": 0.00019468862557249064, |
|
"loss": 0.8756, |
|
"mean_token_accuracy": 0.7537252847105265, |
|
"num_tokens": 377256275.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4015514487793749, |
|
"grad_norm": 0.15556573867797852, |
|
"learning_rate": 0.00019451074558785497, |
|
"loss": 0.8811, |
|
"mean_token_accuracy": 0.7520053081214428, |
|
"num_tokens": 387739975.0, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4048700557940804, |
|
"grad_norm": 0.12542197108268738, |
|
"learning_rate": 0.0001943300294397741, |
|
"loss": 0.8877, |
|
"mean_token_accuracy": 0.7509451704099774, |
|
"num_tokens": 398220868.0, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.408188662808786, |
|
"grad_norm": 0.09943215548992157, |
|
"learning_rate": 0.00019414648319304517, |
|
"loss": 0.8796, |
|
"mean_token_accuracy": 0.7522788923233747, |
|
"num_tokens": 408706628.0, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.4115072698234916, |
|
"grad_norm": 0.1275465190410614, |
|
"learning_rate": 0.0001939601130074429, |
|
"loss": 0.8788, |
|
"mean_token_accuracy": 0.7530741615220904, |
|
"num_tokens": 419192388.0, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.41482587683819716, |
|
"grad_norm": 0.09036266803741455, |
|
"learning_rate": 0.00019377092513751286, |
|
"loss": 0.8734, |
|
"mean_token_accuracy": 0.7542628727853298, |
|
"num_tokens": 429678148.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.41814448385290276, |
|
"grad_norm": 0.11080717295408249, |
|
"learning_rate": 0.00019357892593236151, |
|
"loss": 0.8735, |
|
"mean_token_accuracy": 0.7538423674181104, |
|
"num_tokens": 440161951.0, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4214630908676083, |
|
"grad_norm": 0.07577451318502426, |
|
"learning_rate": 0.00019338412183544316, |
|
"loss": 0.8791, |
|
"mean_token_accuracy": 0.7527194999158382, |
|
"num_tokens": 450642673.0, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.4247816978823139, |
|
"grad_norm": 0.10234798491001129, |
|
"learning_rate": 0.00019318651938434372, |
|
"loss": 0.8656, |
|
"mean_token_accuracy": 0.7552645441144705, |
|
"num_tokens": 461128433.0, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.4281003048970195, |
|
"grad_norm": 0.06869622319936752, |
|
"learning_rate": 0.00019298612521056126, |
|
"loss": 0.8655, |
|
"mean_token_accuracy": 0.7555084478110075, |
|
"num_tokens": 471602493.0, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.43141891191172504, |
|
"grad_norm": 0.06701388210058212, |
|
"learning_rate": 0.00019278294603928352, |
|
"loss": 0.8841, |
|
"mean_token_accuracy": 0.7517282158136368, |
|
"num_tokens": 482073209.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.43473751892643064, |
|
"grad_norm": 0.11147171258926392, |
|
"learning_rate": 0.0001925769886891622, |
|
"loss": 0.8771, |
|
"mean_token_accuracy": 0.7530317701399326, |
|
"num_tokens": 492543515.0, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4380561259411362, |
|
"grad_norm": 0.12707966566085815, |
|
"learning_rate": 0.00019236826007208412, |
|
"loss": 0.8726, |
|
"mean_token_accuracy": 0.7535556375980377, |
|
"num_tokens": 503029275.0, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.4413747329558418, |
|
"grad_norm": 0.09481809288263321, |
|
"learning_rate": 0.00019215676719293927, |
|
"loss": 0.8798, |
|
"mean_token_accuracy": 0.7525555873289704, |
|
"num_tokens": 513515035.0, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4446933399705474, |
|
"grad_norm": 0.11830797046422958, |
|
"learning_rate": 0.00019194251714938564, |
|
"loss": 0.8667, |
|
"mean_token_accuracy": 0.7554455721750856, |
|
"num_tokens": 524000795.0, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.44801194698525293, |
|
"grad_norm": 0.10708022862672806, |
|
"learning_rate": 0.00019172551713161114, |
|
"loss": 0.8699, |
|
"mean_token_accuracy": 0.7541650122031569, |
|
"num_tokens": 534486555.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.45133055399995853, |
|
"grad_norm": 0.1296638399362564, |
|
"learning_rate": 0.00019150577442209233, |
|
"loss": 0.8689, |
|
"mean_token_accuracy": 0.7550072424113751, |
|
"num_tokens": 544966895.0, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.45464916101466407, |
|
"grad_norm": 0.10959133505821228, |
|
"learning_rate": 0.0001912832963953498, |
|
"loss": 0.8719, |
|
"mean_token_accuracy": 0.754011202044785, |
|
"num_tokens": 555448608.0, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.45796776802936967, |
|
"grad_norm": 0.13069604337215424, |
|
"learning_rate": 0.00019105809051770097, |
|
"loss": 0.8741, |
|
"mean_token_accuracy": 0.7538234619423747, |
|
"num_tokens": 565934368.0, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.46128637504407527, |
|
"grad_norm": 0.08488507568836212, |
|
"learning_rate": 0.00019083016434700924, |
|
"loss": 0.8578, |
|
"mean_token_accuracy": 0.7571931520476938, |
|
"num_tokens": 576412160.0, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4646049820587808, |
|
"grad_norm": 0.12605434656143188, |
|
"learning_rate": 0.00019059952553243065, |
|
"loss": 0.8528, |
|
"mean_token_accuracy": 0.7582787131890655, |
|
"num_tokens": 586897920.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4679235890734864, |
|
"grad_norm": 0.11124076694250107, |
|
"learning_rate": 0.00019036618181415696, |
|
"loss": 0.8704, |
|
"mean_token_accuracy": 0.7547924173995852, |
|
"num_tokens": 597383680.0, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.47124219608819196, |
|
"grad_norm": 0.09255870431661606, |
|
"learning_rate": 0.00019013014102315585, |
|
"loss": 0.8691, |
|
"mean_token_accuracy": 0.7550907328724861, |
|
"num_tokens": 607866591.0, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.47456080310289755, |
|
"grad_norm": 0.08366747945547104, |
|
"learning_rate": 0.00018989141108090833, |
|
"loss": 0.8625, |
|
"mean_token_accuracy": 0.7563490144908428, |
|
"num_tokens": 618337879.0, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.47787941011760315, |
|
"grad_norm": 0.13047565519809723, |
|
"learning_rate": 0.00018964999999914275, |
|
"loss": 0.867, |
|
"mean_token_accuracy": 0.7551734404638409, |
|
"num_tokens": 628816840.0, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4811980171323087, |
|
"grad_norm": 0.12230905890464783, |
|
"learning_rate": 0.00018940591587956592, |
|
"loss": 0.8607, |
|
"mean_token_accuracy": 0.7568699663504959, |
|
"num_tokens": 639290749.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.4845166241470143, |
|
"grad_norm": 0.09320119768381119, |
|
"learning_rate": 0.00018915916691359132, |
|
"loss": 0.8684, |
|
"mean_token_accuracy": 0.7548679873347283, |
|
"num_tokens": 649769619.0, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4878352311617199, |
|
"grad_norm": 0.11995600163936615, |
|
"learning_rate": 0.00018890976138206405, |
|
"loss": 0.8548, |
|
"mean_token_accuracy": 0.7585142718628048, |
|
"num_tokens": 660248506.0, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.49115383817642544, |
|
"grad_norm": 0.11266778409481049, |
|
"learning_rate": 0.00018865770765498312, |
|
"loss": 0.8588, |
|
"mean_token_accuracy": 0.757077083736658, |
|
"num_tokens": 670732739.0, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.49447244519113104, |
|
"grad_norm": 0.09791893512010574, |
|
"learning_rate": 0.0001884030141912203, |
|
"loss": 0.8579, |
|
"mean_token_accuracy": 0.7574174387380481, |
|
"num_tokens": 681218499.0, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4977910522058366, |
|
"grad_norm": 0.07898171246051788, |
|
"learning_rate": 0.00018814568953823654, |
|
"loss": 0.8497, |
|
"mean_token_accuracy": 0.7593075778335333, |
|
"num_tokens": 691697020.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5011096592205422, |
|
"grad_norm": 0.12533102929592133, |
|
"learning_rate": 0.00018788574233179485, |
|
"loss": 0.865, |
|
"mean_token_accuracy": 0.7557816857472062, |
|
"num_tokens": 702180475.0, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5044282662352477, |
|
"grad_norm": 0.12405435740947723, |
|
"learning_rate": 0.00018762318129567066, |
|
"loss": 0.8592, |
|
"mean_token_accuracy": 0.7572093108668924, |
|
"num_tokens": 712656102.0, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5077468732499534, |
|
"grad_norm": 0.1356426179409027, |
|
"learning_rate": 0.00018735801524135897, |
|
"loss": 0.8669, |
|
"mean_token_accuracy": 0.7553769392892719, |
|
"num_tokens": 723127725.0, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5110654802646589, |
|
"grad_norm": 0.07235228270292282, |
|
"learning_rate": 0.0001870902530677787, |
|
"loss": 0.8549, |
|
"mean_token_accuracy": 0.7580863270908594, |
|
"num_tokens": 733606702.0, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5143840872793645, |
|
"grad_norm": 0.10672928392887115, |
|
"learning_rate": 0.00018681990376097392, |
|
"loss": 0.8555, |
|
"mean_token_accuracy": 0.75790313500911, |
|
"num_tokens": 744081682.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.51770269429407, |
|
"grad_norm": 0.10490447282791138, |
|
"learning_rate": 0.00018654697639381248, |
|
"loss": 0.8449, |
|
"mean_token_accuracy": 0.7598242353647947, |
|
"num_tokens": 754567442.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5210213013087757, |
|
"grad_norm": 0.12515957653522491, |
|
"learning_rate": 0.0001862714801256813, |
|
"loss": 0.8596, |
|
"mean_token_accuracy": 0.756611785106361, |
|
"num_tokens": 765040235.0, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5243399083234812, |
|
"grad_norm": 0.11183013767004013, |
|
"learning_rate": 0.00018599342420217915, |
|
"loss": 0.8545, |
|
"mean_token_accuracy": 0.7579139837995171, |
|
"num_tokens": 775525995.0, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5276585153381868, |
|
"grad_norm": 0.08167802542448044, |
|
"learning_rate": 0.0001857128179548063, |
|
"loss": 0.8562, |
|
"mean_token_accuracy": 0.7578157868236304, |
|
"num_tokens": 786003789.0, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5309771223528924, |
|
"grad_norm": 0.09493687748908997, |
|
"learning_rate": 0.0001854296708006514, |
|
"loss": 0.8578, |
|
"mean_token_accuracy": 0.7576355727389454, |
|
"num_tokens": 796489549.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.534295729367598, |
|
"grad_norm": 0.1299983710050583, |
|
"learning_rate": 0.00018514399224207545, |
|
"loss": 0.8563, |
|
"mean_token_accuracy": 0.7578905867412686, |
|
"num_tokens": 806968696.0, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5376143363823035, |
|
"grad_norm": 0.10947688668966293, |
|
"learning_rate": 0.0001848557918663927, |
|
"loss": 0.8537, |
|
"mean_token_accuracy": 0.7585803955793381, |
|
"num_tokens": 817454456.0, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5409329433970091, |
|
"grad_norm": 0.17463596165180206, |
|
"learning_rate": 0.00018456507934554926, |
|
"loss": 0.8465, |
|
"mean_token_accuracy": 0.759968264773488, |
|
"num_tokens": 827931259.0, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5442515504117147, |
|
"grad_norm": 0.1509849727153778, |
|
"learning_rate": 0.0001842718644357982, |
|
"loss": 0.8549, |
|
"mean_token_accuracy": 0.7577539026737213, |
|
"num_tokens": 838409784.0, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5475701574264202, |
|
"grad_norm": 0.10163529962301254, |
|
"learning_rate": 0.00018397615697737235, |
|
"loss": 0.8435, |
|
"mean_token_accuracy": 0.7604600984603167, |
|
"num_tokens": 848880888.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5508887644411259, |
|
"grad_norm": 0.08583993464708328, |
|
"learning_rate": 0.0001836779668941538, |
|
"loss": 0.8447, |
|
"mean_token_accuracy": 0.760548378713429, |
|
"num_tokens": 859360736.0, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5542073714558314, |
|
"grad_norm": 0.09835182130336761, |
|
"learning_rate": 0.00018337730419334128, |
|
"loss": 0.8456, |
|
"mean_token_accuracy": 0.760054505057633, |
|
"num_tokens": 869843033.0, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.557525978470537, |
|
"grad_norm": 0.11671733111143112, |
|
"learning_rate": 0.00018307417896511386, |
|
"loss": 0.8563, |
|
"mean_token_accuracy": 0.7578504301607609, |
|
"num_tokens": 880321148.0, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5608445854852425, |
|
"grad_norm": 0.1542026400566101, |
|
"learning_rate": 0.00018276860138229253, |
|
"loss": 0.8424, |
|
"mean_token_accuracy": 0.7604312267154455, |
|
"num_tokens": 890806908.0, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5641631924999482, |
|
"grad_norm": 0.1336473971605301, |
|
"learning_rate": 0.00018246058169999894, |
|
"loss": 0.8412, |
|
"mean_token_accuracy": 0.761156851425767, |
|
"num_tokens": 901287636.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5674817995146537, |
|
"grad_norm": 0.14709115028381348, |
|
"learning_rate": 0.000182150130255311, |
|
"loss": 0.8553, |
|
"mean_token_accuracy": 0.7579629141837358, |
|
"num_tokens": 911773396.0, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5708004065293593, |
|
"grad_norm": 0.08315177261829376, |
|
"learning_rate": 0.00018183725746691607, |
|
"loss": 0.8501, |
|
"mean_token_accuracy": 0.7591048136353493, |
|
"num_tokens": 922252700.0, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5741190135440649, |
|
"grad_norm": 0.13854211568832397, |
|
"learning_rate": 0.0001815219738347614, |
|
"loss": 0.8369, |
|
"mean_token_accuracy": 0.7621023176237941, |
|
"num_tokens": 932731797.0, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5774376205587705, |
|
"grad_norm": 0.09793327003717422, |
|
"learning_rate": 0.0001812042899397015, |
|
"loss": 0.8571, |
|
"mean_token_accuracy": 0.7572241809219122, |
|
"num_tokens": 943210781.0, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.580756227573476, |
|
"grad_norm": 0.15137037634849548, |
|
"learning_rate": 0.00018088421644314344, |
|
"loss": 0.8513, |
|
"mean_token_accuracy": 0.7588881932199001, |
|
"num_tokens": 953695566.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5840748345881817, |
|
"grad_norm": 0.12938319146633148, |
|
"learning_rate": 0.00018056176408668862, |
|
"loss": 0.8444, |
|
"mean_token_accuracy": 0.7602495316416025, |
|
"num_tokens": 964181326.0, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5873934416028872, |
|
"grad_norm": 0.10059011727571487, |
|
"learning_rate": 0.00018023694369177266, |
|
"loss": 0.8503, |
|
"mean_token_accuracy": 0.7586783742532134, |
|
"num_tokens": 974653160.0, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5907120486175927, |
|
"grad_norm": 0.11409953236579895, |
|
"learning_rate": 0.00017990976615930192, |
|
"loss": 0.844, |
|
"mean_token_accuracy": 0.7604539269581437, |
|
"num_tokens": 985138920.0, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5940306556322983, |
|
"grad_norm": 0.11919377744197845, |
|
"learning_rate": 0.00017958024246928793, |
|
"loss": 0.8397, |
|
"mean_token_accuracy": 0.76123832706362, |
|
"num_tokens": 995624680.0, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5973492626470039, |
|
"grad_norm": 0.10679172724485397, |
|
"learning_rate": 0.00017924838368047873, |
|
"loss": 0.8407, |
|
"mean_token_accuracy": 0.7609953947365284, |
|
"num_tokens": 1006102776.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6006678696617095, |
|
"grad_norm": 0.15905922651290894, |
|
"learning_rate": 0.0001789142009299879, |
|
"loss": 0.8376, |
|
"mean_token_accuracy": 0.7617803292348981, |
|
"num_tokens": 1016586824.0, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.603986476676415, |
|
"grad_norm": 0.11889604479074478, |
|
"learning_rate": 0.0001785777054329205, |
|
"loss": 0.8395, |
|
"mean_token_accuracy": 0.7612660808488727, |
|
"num_tokens": 1027072584.0, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6073050836911207, |
|
"grad_norm": 0.0883103609085083, |
|
"learning_rate": 0.00017823890848199707, |
|
"loss": 0.8504, |
|
"mean_token_accuracy": 0.7590753987431527, |
|
"num_tokens": 1037554689.0, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6106236907058262, |
|
"grad_norm": 0.14156708121299744, |
|
"learning_rate": 0.00017789782144717446, |
|
"loss": 0.8434, |
|
"mean_token_accuracy": 0.7608080705627799, |
|
"num_tokens": 1048040449.0, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.6139422977205318, |
|
"grad_norm": 0.13070257008075714, |
|
"learning_rate": 0.00017755445577526422, |
|
"loss": 0.8442, |
|
"mean_token_accuracy": 0.7606491534039378, |
|
"num_tokens": 1058523338.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6172609047352374, |
|
"grad_norm": 0.11883427202701569, |
|
"learning_rate": 0.0001772088229895485, |
|
"loss": 0.8375, |
|
"mean_token_accuracy": 0.7617071146145463, |
|
"num_tokens": 1068998950.0, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.620579511749943, |
|
"grad_norm": 0.11459514498710632, |
|
"learning_rate": 0.00017686093468939328, |
|
"loss": 0.8439, |
|
"mean_token_accuracy": 0.7603834588080645, |
|
"num_tokens": 1079462588.0, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6238981187646485, |
|
"grad_norm": 0.0874410942196846, |
|
"learning_rate": 0.00017651080254985933, |
|
"loss": 0.8526, |
|
"mean_token_accuracy": 0.7584025146439671, |
|
"num_tokens": 1089948348.0, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6272167257793542, |
|
"grad_norm": 0.0875912755727768, |
|
"learning_rate": 0.00017615843832131004, |
|
"loss": 0.8297, |
|
"mean_token_accuracy": 0.7632458617910742, |
|
"num_tokens": 1100430212.0, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6305353327940597, |
|
"grad_norm": 0.12681277096271515, |
|
"learning_rate": 0.0001758038538290172, |
|
"loss": 0.8336, |
|
"mean_token_accuracy": 0.7626405974850059, |
|
"num_tokens": 1110913054.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6338539398087653, |
|
"grad_norm": 0.09869815409183502, |
|
"learning_rate": 0.00017544706097276442, |
|
"loss": 0.8281, |
|
"mean_token_accuracy": 0.7639485985040665, |
|
"num_tokens": 1121380974.0, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6371725468234708, |
|
"grad_norm": 0.1290808469057083, |
|
"learning_rate": 0.0001750880717264474, |
|
"loss": 0.8318, |
|
"mean_token_accuracy": 0.7631193609908223, |
|
"num_tokens": 1131865472.0, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6404911538381765, |
|
"grad_norm": 0.11061016470193863, |
|
"learning_rate": 0.00017472689813767233, |
|
"loss": 0.8341, |
|
"mean_token_accuracy": 0.7627505380660295, |
|
"num_tokens": 1142348802.0, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.643809760852882, |
|
"grad_norm": 0.08618409931659698, |
|
"learning_rate": 0.0001743635523273514, |
|
"loss": 0.8343, |
|
"mean_token_accuracy": 0.7625327391549945, |
|
"num_tokens": 1152830874.0, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6471283678675875, |
|
"grad_norm": 0.11828841269016266, |
|
"learning_rate": 0.0001739980464892961, |
|
"loss": 0.8337, |
|
"mean_token_accuracy": 0.7623861156404018, |
|
"num_tokens": 1163316634.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6504469748822932, |
|
"grad_norm": 0.06827554851770401, |
|
"learning_rate": 0.00017363039288980815, |
|
"loss": 0.8309, |
|
"mean_token_accuracy": 0.7630320440977811, |
|
"num_tokens": 1173796355.0, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6537655818969987, |
|
"grad_norm": 0.09030026942491531, |
|
"learning_rate": 0.00017326060386726758, |
|
"loss": 0.8368, |
|
"mean_token_accuracy": 0.762061744555831, |
|
"num_tokens": 1184271410.0, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6570841889117043, |
|
"grad_norm": 0.12762907147407532, |
|
"learning_rate": 0.00017288869183171887, |
|
"loss": 0.8324, |
|
"mean_token_accuracy": 0.7626056496053935, |
|
"num_tokens": 1194743145.0, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6604027959264099, |
|
"grad_norm": 0.09776946902275085, |
|
"learning_rate": 0.00017251466926445432, |
|
"loss": 0.8366, |
|
"mean_token_accuracy": 0.7619501492008567, |
|
"num_tokens": 1205220732.0, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6637214029411155, |
|
"grad_norm": 0.11090876907110214, |
|
"learning_rate": 0.00017213854871759524, |
|
"loss": 0.8307, |
|
"mean_token_accuracy": 0.762708786316216, |
|
"num_tokens": 1215706492.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.667040009955821, |
|
"grad_norm": 0.09803315252065659, |
|
"learning_rate": 0.00017176034281367076, |
|
"loss": 0.8343, |
|
"mean_token_accuracy": 0.7625784434378147, |
|
"num_tokens": 1226178379.0, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6703586169705266, |
|
"grad_norm": 0.13478384912014008, |
|
"learning_rate": 0.00017138006424519418, |
|
"loss": 0.8308, |
|
"mean_token_accuracy": 0.7635397264733911, |
|
"num_tokens": 1236664139.0, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6736772239852322, |
|
"grad_norm": 0.07813051342964172, |
|
"learning_rate": 0.00017099772577423687, |
|
"loss": 0.8352, |
|
"mean_token_accuracy": 0.7626818338409066, |
|
"num_tokens": 1247129133.0, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6769958309999378, |
|
"grad_norm": 0.10438504815101624, |
|
"learning_rate": 0.0001706133402320003, |
|
"loss": 0.8289, |
|
"mean_token_accuracy": 0.7641463791951537, |
|
"num_tokens": 1257612919.0, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6803144380146433, |
|
"grad_norm": 0.07366824150085449, |
|
"learning_rate": 0.00017022692051838512, |
|
"loss": 0.8321, |
|
"mean_token_accuracy": 0.7632344393059611, |
|
"num_tokens": 1268090812.0, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.683633045029349, |
|
"grad_norm": 0.11721368134021759, |
|
"learning_rate": 0.00016983847960155837, |
|
"loss": 0.8305, |
|
"mean_token_accuracy": 0.7635456619784236, |
|
"num_tokens": 1278567420.0, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6869516520440545, |
|
"grad_norm": 0.12061051279306412, |
|
"learning_rate": 0.00016944803051751825, |
|
"loss": 0.8267, |
|
"mean_token_accuracy": 0.764389518275857, |
|
"num_tokens": 1289038677.0, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.69027025905876, |
|
"grad_norm": 0.09406062960624695, |
|
"learning_rate": 0.00016905558636965665, |
|
"loss": 0.8293, |
|
"mean_token_accuracy": 0.7638228122144938, |
|
"num_tokens": 1299524437.0, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6935888660734657, |
|
"grad_norm": 0.10373110324144363, |
|
"learning_rate": 0.0001686611603283195, |
|
"loss": 0.8329, |
|
"mean_token_accuracy": 0.7628338277339936, |
|
"num_tokens": 1310010197.0, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6969074730881712, |
|
"grad_norm": 0.1447826474905014, |
|
"learning_rate": 0.0001682647656303645, |
|
"loss": 0.8313, |
|
"mean_token_accuracy": 0.7633772963657975, |
|
"num_tokens": 1320495957.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7002260801028768, |
|
"grad_norm": 0.12078222632408142, |
|
"learning_rate": 0.00016786641557871716, |
|
"loss": 0.8207, |
|
"mean_token_accuracy": 0.7654614228755235, |
|
"num_tokens": 1330959272.0, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7035446871175824, |
|
"grad_norm": 0.11979147046804428, |
|
"learning_rate": 0.0001674661235419243, |
|
"loss": 0.8191, |
|
"mean_token_accuracy": 0.7659799061715603, |
|
"num_tokens": 1341445032.0, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.706863294132288, |
|
"grad_norm": 0.09704011678695679, |
|
"learning_rate": 0.00016706390295370517, |
|
"loss": 0.8306, |
|
"mean_token_accuracy": 0.7632294148206711, |
|
"num_tokens": 1351904029.0, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.7101819011469935, |
|
"grad_norm": 0.15954768657684326, |
|
"learning_rate": 0.00016665976731250115, |
|
"loss": 0.8247, |
|
"mean_token_accuracy": 0.7646481277421117, |
|
"num_tokens": 1362389789.0, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.7135005081616991, |
|
"grad_norm": 0.14417655766010284, |
|
"learning_rate": 0.0001662537301810221, |
|
"loss": 0.8362, |
|
"mean_token_accuracy": 0.7622023215517402, |
|
"num_tokens": 1372875549.0, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7168191151764047, |
|
"grad_norm": 0.08781994134187698, |
|
"learning_rate": 0.00016584580518579163, |
|
"loss": 0.8278, |
|
"mean_token_accuracy": 0.7639455853030086, |
|
"num_tokens": 1383345624.0, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.7201377221911103, |
|
"grad_norm": 0.09008403867483139, |
|
"learning_rate": 0.00016543600601668973, |
|
"loss": 0.8262, |
|
"mean_token_accuracy": 0.7642499217763543, |
|
"num_tokens": 1393831384.0, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7234563292058158, |
|
"grad_norm": 0.1012243777513504, |
|
"learning_rate": 0.0001650243464264932, |
|
"loss": 0.8252, |
|
"mean_token_accuracy": 0.76409882735461, |
|
"num_tokens": 1404309091.0, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7267749362205215, |
|
"grad_norm": 0.08128339797258377, |
|
"learning_rate": 0.0001646108402304142, |
|
"loss": 0.8229, |
|
"mean_token_accuracy": 0.7648266503587365, |
|
"num_tokens": 1414794700.0, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.730093543235227, |
|
"grad_norm": 0.11436230689287186, |
|
"learning_rate": 0.0001641955013056366, |
|
"loss": 0.8276, |
|
"mean_token_accuracy": 0.7645352957770228, |
|
"num_tokens": 1425280460.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7334121502499326, |
|
"grad_norm": 0.09136373549699783, |
|
"learning_rate": 0.00016377834359085038, |
|
"loss": 0.831, |
|
"mean_token_accuracy": 0.7633469367399812, |
|
"num_tokens": 1435765321.0, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7367307572646382, |
|
"grad_norm": 0.07795336097478867, |
|
"learning_rate": 0.00016335938108578358, |
|
"loss": 0.8306, |
|
"mean_token_accuracy": 0.7631049901247025, |
|
"num_tokens": 1446251081.0, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7400493642793438, |
|
"grad_norm": 0.09848517179489136, |
|
"learning_rate": 0.00016293862785073278, |
|
"loss": 0.8309, |
|
"mean_token_accuracy": 0.7634068943560124, |
|
"num_tokens": 1456730603.0, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7433679712940493, |
|
"grad_norm": 0.11463901400566101, |
|
"learning_rate": 0.000162516098006091, |
|
"loss": 0.8255, |
|
"mean_token_accuracy": 0.7645809097215533, |
|
"num_tokens": 1467207203.0, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.7466865783087548, |
|
"grad_norm": 0.11084874719381332, |
|
"learning_rate": 0.00016209180573187394, |
|
"loss": 0.83, |
|
"mean_token_accuracy": 0.7636783845722676, |
|
"num_tokens": 1477678213.0, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7500051853234605, |
|
"grad_norm": 0.09073188900947571, |
|
"learning_rate": 0.0001616657652672441, |
|
"loss": 0.8233, |
|
"mean_token_accuracy": 0.7654302371665835, |
|
"num_tokens": 1488163973.0, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.753323792338166, |
|
"grad_norm": 0.14429152011871338, |
|
"learning_rate": 0.00016123799091003285, |
|
"loss": 0.8191, |
|
"mean_token_accuracy": 0.7660181423649192, |
|
"num_tokens": 1498637634.0, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7566423993528716, |
|
"grad_norm": 0.07778177410364151, |
|
"learning_rate": 0.00016080849701626072, |
|
"loss": 0.826, |
|
"mean_token_accuracy": 0.764384999871254, |
|
"num_tokens": 1509120509.0, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7599610063675772, |
|
"grad_norm": 0.12707629799842834, |
|
"learning_rate": 0.00016037729799965548, |
|
"loss": 0.8254, |
|
"mean_token_accuracy": 0.7643630396574735, |
|
"num_tokens": 1519606269.0, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7632796133822828, |
|
"grad_norm": 0.09941043704748154, |
|
"learning_rate": 0.00015994440833116846, |
|
"loss": 0.8251, |
|
"mean_token_accuracy": 0.7642654919996857, |
|
"num_tokens": 1530087863.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7665982203969883, |
|
"grad_norm": 0.07371927052736282, |
|
"learning_rate": 0.000159509842538489, |
|
"loss": 0.8093, |
|
"mean_token_accuracy": 0.767849182151258, |
|
"num_tokens": 1540567795.0, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.769916827411694, |
|
"grad_norm": 0.09582529217004776, |
|
"learning_rate": 0.0001590736152055567, |
|
"loss": 0.8287, |
|
"mean_token_accuracy": 0.7640875386074185, |
|
"num_tokens": 1551047059.0, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7732354344263995, |
|
"grad_norm": 0.0895957350730896, |
|
"learning_rate": 0.00015863574097207226, |
|
"loss": 0.8125, |
|
"mean_token_accuracy": 0.7677104644477367, |
|
"num_tokens": 1561532819.0, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7765540414411051, |
|
"grad_norm": 0.1037888452410698, |
|
"learning_rate": 0.00015819623453300593, |
|
"loss": 0.8153, |
|
"mean_token_accuracy": 0.7671598410233855, |
|
"num_tokens": 1572018579.0, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7798726484558107, |
|
"grad_norm": 0.06928929686546326, |
|
"learning_rate": 0.00015775511063810448, |
|
"loss": 0.8185, |
|
"mean_token_accuracy": 0.7661426233127713, |
|
"num_tokens": 1582504339.0, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7831912554705163, |
|
"grad_norm": 0.14609582722187042, |
|
"learning_rate": 0.0001573123840913962, |
|
"loss": 0.8152, |
|
"mean_token_accuracy": 0.7668612107634545, |
|
"num_tokens": 1592990099.0, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7865098624852218, |
|
"grad_norm": 0.09062914550304413, |
|
"learning_rate": 0.0001568680697506939, |
|
"loss": 0.8255, |
|
"mean_token_accuracy": 0.7646079743281007, |
|
"num_tokens": 1603475859.0, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7898284694999274, |
|
"grad_norm": 0.09113902598619461, |
|
"learning_rate": 0.0001564221825270967, |
|
"loss": 0.8181, |
|
"mean_token_accuracy": 0.7658322915434838, |
|
"num_tokens": 1613950532.0, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.793147076514633, |
|
"grad_norm": 0.0710316002368927, |
|
"learning_rate": 0.0001559747373844891, |
|
"loss": 0.8259, |
|
"mean_token_accuracy": 0.7642976144328714, |
|
"num_tokens": 1624432221.0, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7964656835293386, |
|
"grad_norm": 0.09138786047697067, |
|
"learning_rate": 0.0001555257493390392, |
|
"loss": 0.8109, |
|
"mean_token_accuracy": 0.7680446734651923, |
|
"num_tokens": 1634917981.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7997842905440441, |
|
"grad_norm": 0.07873648405075073, |
|
"learning_rate": 0.00015507523345869448, |
|
"loss": 0.8197, |
|
"mean_token_accuracy": 0.7657880330458283, |
|
"num_tokens": 1645400312.0, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8031028975587498, |
|
"grad_norm": 0.08953968435525894, |
|
"learning_rate": 0.00015462320486267636, |
|
"loss": 0.8131, |
|
"mean_token_accuracy": 0.7674291767179966, |
|
"num_tokens": 1655884350.0, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.8064215045734553, |
|
"grad_norm": 0.07061683386564255, |
|
"learning_rate": 0.00015416967872097266, |
|
"loss": 0.8211, |
|
"mean_token_accuracy": 0.7657505188137292, |
|
"num_tokens": 1666370110.0, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.8097401115881608, |
|
"grad_norm": 0.09341979771852493, |
|
"learning_rate": 0.00015371467025382848, |
|
"loss": 0.8198, |
|
"mean_token_accuracy": 0.7655730178579688, |
|
"num_tokens": 1676855870.0, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8130587186028665, |
|
"grad_norm": 0.08582761883735657, |
|
"learning_rate": 0.00015325819473123556, |
|
"loss": 0.8245, |
|
"mean_token_accuracy": 0.7646628726273775, |
|
"num_tokens": 1687338884.0, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8163603932467748, |
|
"grad_norm": 0.12421102076768875, |
|
"learning_rate": 0.0001528002674724196, |
|
"loss": 0.8194, |
|
"mean_token_accuracy": 0.7662278890609742, |
|
"num_tokens": 10485760.0, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.8196789314307048, |
|
"grad_norm": 0.0791928842663765, |
|
"learning_rate": 0.00015234090384532632, |
|
"loss": 0.8111, |
|
"mean_token_accuracy": 0.7678848147392273, |
|
"num_tokens": 20971520.0, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.8229974696146347, |
|
"grad_norm": 0.10556567460298538, |
|
"learning_rate": 0.00015188011926610574, |
|
"loss": 0.8215, |
|
"mean_token_accuracy": 0.7652393862605095, |
|
"num_tokens": 31438457.0, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8263160077985647, |
|
"grad_norm": 0.07929036021232605, |
|
"learning_rate": 0.00015141792919859466, |
|
"loss": 0.8135, |
|
"mean_token_accuracy": 0.7669544376432895, |
|
"num_tokens": 41911744.0, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8296345459824948, |
|
"grad_norm": 0.12944328784942627, |
|
"learning_rate": 0.0001509543491537978, |
|
"loss": 0.8097, |
|
"mean_token_accuracy": 0.7680105246603489, |
|
"num_tokens": 52397504.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8329530841664247, |
|
"grad_norm": 0.1384814977645874, |
|
"learning_rate": 0.00015048939468936728, |
|
"loss": 0.8143, |
|
"mean_token_accuracy": 0.7673531487584114, |
|
"num_tokens": 62877209.0, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.8362716223503547, |
|
"grad_norm": 0.100552037358284, |
|
"learning_rate": 0.0001500230814090803, |
|
"loss": 0.8119, |
|
"mean_token_accuracy": 0.7678131617605686, |
|
"num_tokens": 73352990.0, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8395901605342847, |
|
"grad_norm": 0.08017473667860031, |
|
"learning_rate": 0.0001495554249623159, |
|
"loss": 0.8229, |
|
"mean_token_accuracy": 0.764815166592598, |
|
"num_tokens": 83835077.0, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8429086987182146, |
|
"grad_norm": 0.07418405264616013, |
|
"learning_rate": 0.00014908644104352938, |
|
"loss": 0.8148, |
|
"mean_token_accuracy": 0.7670687526464463, |
|
"num_tokens": 94320837.0, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8462272369021446, |
|
"grad_norm": 0.09131650626659393, |
|
"learning_rate": 0.0001486161453917257, |
|
"loss": 0.8137, |
|
"mean_token_accuracy": 0.7672370247542858, |
|
"num_tokens": 104791728.0, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8495457750860745, |
|
"grad_norm": 0.067315474152565, |
|
"learning_rate": 0.0001481445537899313, |
|
"loss": 0.8182, |
|
"mean_token_accuracy": 0.7661247923970222, |
|
"num_tokens": 115277488.0, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8528643132700046, |
|
"grad_norm": 0.13266009092330933, |
|
"learning_rate": 0.00014767168206466456, |
|
"loss": 0.8138, |
|
"mean_token_accuracy": 0.7671967223286629, |
|
"num_tokens": 125747330.0, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8561828514539346, |
|
"grad_norm": 0.06727499514818192, |
|
"learning_rate": 0.00014719754608540441, |
|
"loss": 0.8148, |
|
"mean_token_accuracy": 0.7670690394937992, |
|
"num_tokens": 136233090.0, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8595013896378645, |
|
"grad_norm": 0.07351548224687576, |
|
"learning_rate": 0.000146722161764058, |
|
"loss": 0.8196, |
|
"mean_token_accuracy": 0.7655053928494453, |
|
"num_tokens": 146718850.0, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.8628199278217945, |
|
"grad_norm": 0.0864776000380516, |
|
"learning_rate": 0.00014624554505442646, |
|
"loss": 0.8227, |
|
"mean_token_accuracy": 0.7648808546364307, |
|
"num_tokens": 157204610.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8661384660057245, |
|
"grad_norm": 0.10597188770771027, |
|
"learning_rate": 0.00014576771195166983, |
|
"loss": 0.819, |
|
"mean_token_accuracy": 0.7662368580698967, |
|
"num_tokens": 167690370.0, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8694570041896544, |
|
"grad_norm": 0.08794150501489639, |
|
"learning_rate": 0.00014528867849176978, |
|
"loss": 0.8137, |
|
"mean_token_accuracy": 0.7670834071934223, |
|
"num_tokens": 178167040.0, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8727755423735845, |
|
"grad_norm": 0.1239166259765625, |
|
"learning_rate": 0.00014480846075099193, |
|
"loss": 0.8188, |
|
"mean_token_accuracy": 0.7662507459521294, |
|
"num_tokens": 188630882.0, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8760940805575144, |
|
"grad_norm": 0.09917481243610382, |
|
"learning_rate": 0.00014432707484534601, |
|
"loss": 0.8095, |
|
"mean_token_accuracy": 0.7680152080953121, |
|
"num_tokens": 199112069.0, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.8794126187414444, |
|
"grad_norm": 0.08738167583942413, |
|
"learning_rate": 0.00014384453693004525, |
|
"loss": 0.8081, |
|
"mean_token_accuracy": 0.7682105012238025, |
|
"num_tokens": 209594632.0, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8827311569253744, |
|
"grad_norm": 0.0878102034330368, |
|
"learning_rate": 0.00014336086319896395, |
|
"loss": 0.8097, |
|
"mean_token_accuracy": 0.7682200662791729, |
|
"num_tokens": 220076190.0, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8860496951093043, |
|
"grad_norm": 0.06970654428005219, |
|
"learning_rate": 0.0001428760698840942, |
|
"loss": 0.8126, |
|
"mean_token_accuracy": 0.7674026750028133, |
|
"num_tokens": 230561950.0, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8893682332932343, |
|
"grad_norm": 0.11389122903347015, |
|
"learning_rate": 0.0001423901732550012, |
|
"loss": 0.8129, |
|
"mean_token_accuracy": 0.7673342004418373, |
|
"num_tokens": 241043090.0, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8926867714771644, |
|
"grad_norm": 0.10004062205553055, |
|
"learning_rate": 0.000141903189618277, |
|
"loss": 0.8143, |
|
"mean_token_accuracy": 0.7669317826628685, |
|
"num_tokens": 251528850.0, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8960053096610943, |
|
"grad_norm": 0.11632239073514938, |
|
"learning_rate": 0.00014141513531699346, |
|
"loss": 0.8165, |
|
"mean_token_accuracy": 0.7663782104849816, |
|
"num_tokens": 262014610.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8993238478450243, |
|
"grad_norm": 0.11542349308729172, |
|
"learning_rate": 0.00014092602673015377, |
|
"loss": 0.8118, |
|
"mean_token_accuracy": 0.767609317600727, |
|
"num_tokens": 272495857.0, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.9026423860289542, |
|
"grad_norm": 0.09444668143987656, |
|
"learning_rate": 0.0001404358802721427, |
|
"loss": 0.821, |
|
"mean_token_accuracy": 0.7657337315380573, |
|
"num_tokens": 282973745.0, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.9059609242128842, |
|
"grad_norm": 0.1121552363038063, |
|
"learning_rate": 0.00013994471239217574, |
|
"loss": 0.813, |
|
"mean_token_accuracy": 0.7671238102018834, |
|
"num_tokens": 293447158.0, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.9092794623968142, |
|
"grad_norm": 0.09517167508602142, |
|
"learning_rate": 0.0001394525395737471, |
|
"loss": 0.811, |
|
"mean_token_accuracy": 0.7677459038794041, |
|
"num_tokens": 303923754.0, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.9125980005807441, |
|
"grad_norm": 0.11541863530874252, |
|
"learning_rate": 0.00013895937833407652, |
|
"loss": 0.8048, |
|
"mean_token_accuracy": 0.7691592775285244, |
|
"num_tokens": 314409304.0, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.9159165387646742, |
|
"grad_norm": 0.108350470662117, |
|
"learning_rate": 0.00013846524522355503, |
|
"loss": 0.8084, |
|
"mean_token_accuracy": 0.7679906852543354, |
|
"num_tokens": 324895064.0, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.9192350769486042, |
|
"grad_norm": 0.09402704238891602, |
|
"learning_rate": 0.00013797015682518937, |
|
"loss": 0.8039, |
|
"mean_token_accuracy": 0.7693029120564461, |
|
"num_tokens": 335380824.0, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.9225536151325341, |
|
"grad_norm": 0.10586155951023102, |
|
"learning_rate": 0.0001374741297540455, |
|
"loss": 0.8108, |
|
"mean_token_accuracy": 0.7674855582416058, |
|
"num_tokens": 345866584.0, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.9258721533164641, |
|
"grad_norm": 0.08136095851659775, |
|
"learning_rate": 0.0001369771806566912, |
|
"loss": 0.8159, |
|
"mean_token_accuracy": 0.7668742746114731, |
|
"num_tokens": 356352344.0, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.9291906915003941, |
|
"grad_norm": 0.07308819890022278, |
|
"learning_rate": 0.00013647932621063712, |
|
"loss": 0.8037, |
|
"mean_token_accuracy": 0.7694840542972088, |
|
"num_tokens": 366829339.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.932509229684324, |
|
"grad_norm": 0.13607661426067352, |
|
"learning_rate": 0.00013598058312377727, |
|
"loss": 0.8023, |
|
"mean_token_accuracy": 0.7699119284749031, |
|
"num_tokens": 377305734.0, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.9358277678682541, |
|
"grad_norm": 0.09279151260852814, |
|
"learning_rate": 0.0001354809681338283, |
|
"loss": 0.8096, |
|
"mean_token_accuracy": 0.7683122970163823, |
|
"num_tokens": 387786198.0, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.939146306052184, |
|
"grad_norm": 0.09076095372438431, |
|
"learning_rate": 0.00013498049800776777, |
|
"loss": 0.8085, |
|
"mean_token_accuracy": 0.7685585737228393, |
|
"num_tokens": 398271958.0, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.942464844236114, |
|
"grad_norm": 0.08061348646879196, |
|
"learning_rate": 0.00013447918954127133, |
|
"loss": 0.8145, |
|
"mean_token_accuracy": 0.7670104600489139, |
|
"num_tokens": 408743324.0, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.945783382420044, |
|
"grad_norm": 0.07820383459329605, |
|
"learning_rate": 0.0001339770595581492, |
|
"loss": 0.8138, |
|
"mean_token_accuracy": 0.7669403731822968, |
|
"num_tokens": 419216266.0, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9491019206039739, |
|
"grad_norm": 0.08628549426794052, |
|
"learning_rate": 0.00013347412490978164, |
|
"loss": 0.8024, |
|
"mean_token_accuracy": 0.7698437228798867, |
|
"num_tokens": 429687773.0, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9524204587879039, |
|
"grad_norm": 0.09182088822126389, |
|
"learning_rate": 0.00013297040247455317, |
|
"loss": 0.8189, |
|
"mean_token_accuracy": 0.7659400343894959, |
|
"num_tokens": 440159176.0, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.955738996971834, |
|
"grad_norm": 0.08850604295730591, |
|
"learning_rate": 0.00013246590915728636, |
|
"loss": 0.8072, |
|
"mean_token_accuracy": 0.7684715516865254, |
|
"num_tokens": 450639026.0, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.9590575351557639, |
|
"grad_norm": 0.0957779511809349, |
|
"learning_rate": 0.0001319606618886744, |
|
"loss": 0.8033, |
|
"mean_token_accuracy": 0.7693692311644554, |
|
"num_tokens": 461119251.0, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.9623760733396939, |
|
"grad_norm": 0.12147443741559982, |
|
"learning_rate": 0.00013145467762471288, |
|
"loss": 0.8079, |
|
"mean_token_accuracy": 0.7683925211429596, |
|
"num_tokens": 471605011.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9656946115236238, |
|
"grad_norm": 0.1033417284488678, |
|
"learning_rate": 0.00013094797334613095, |
|
"loss": 0.8045, |
|
"mean_token_accuracy": 0.7693076834082604, |
|
"num_tokens": 482090771.0, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9690131497075538, |
|
"grad_norm": 0.1582411527633667, |
|
"learning_rate": 0.00013044056605782115, |
|
"loss": 0.7987, |
|
"mean_token_accuracy": 0.7702130317687989, |
|
"num_tokens": 492574990.0, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9723316878914838, |
|
"grad_norm": 0.07741738110780716, |
|
"learning_rate": 0.00012993247278826896, |
|
"loss": 0.8114, |
|
"mean_token_accuracy": 0.7679798111319542, |
|
"num_tokens": 503052706.0, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9756502260754137, |
|
"grad_norm": 0.08740879595279694, |
|
"learning_rate": 0.00012942371058898125, |
|
"loss": 0.8065, |
|
"mean_token_accuracy": 0.7690397098660469, |
|
"num_tokens": 513536496.0, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9789687642593438, |
|
"grad_norm": 0.090821273624897, |
|
"learning_rate": 0.00012891429653391403, |
|
"loss": 0.7992, |
|
"mean_token_accuracy": 0.7701906517148018, |
|
"num_tokens": 524009801.0, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9822873024432738, |
|
"grad_norm": 0.12651872634887695, |
|
"learning_rate": 0.0001284042477188994, |
|
"loss": 0.807, |
|
"mean_token_accuracy": 0.7687178313732147, |
|
"num_tokens": 534494491.0, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9856058406272037, |
|
"grad_norm": 0.14386378228664398, |
|
"learning_rate": 0.00012789358126107193, |
|
"loss": 0.8044, |
|
"mean_token_accuracy": 0.7690846063196659, |
|
"num_tokens": 544979162.0, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.9889243788111337, |
|
"grad_norm": 0.09475958347320557, |
|
"learning_rate": 0.00012738231429829423, |
|
"loss": 0.8025, |
|
"mean_token_accuracy": 0.7699846878647805, |
|
"num_tokens": 555459373.0, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.9922429169950637, |
|
"grad_norm": 0.09171073138713837, |
|
"learning_rate": 0.0001268704639885816, |
|
"loss": 0.8187, |
|
"mean_token_accuracy": 0.7660573951900005, |
|
"num_tokens": 565938285.0, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9955614551789936, |
|
"grad_norm": 0.08441364765167236, |
|
"learning_rate": 0.0001263580475095264, |
|
"loss": 0.7976, |
|
"mean_token_accuracy": 0.7706850297749043, |
|
"num_tokens": 576415157.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9988799933629237, |
|
"grad_norm": 0.07692991942167282, |
|
"learning_rate": 0.0001258450820577215, |
|
"loss": 0.8044, |
|
"mean_token_accuracy": 0.7694275721907615, |
|
"num_tokens": 586900917.0, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.002322976728751, |
|
"grad_norm": 0.12558159232139587, |
|
"learning_rate": 0.0001253315848481833, |
|
"loss": 0.8821, |
|
"mean_token_accuracy": 0.7701714175293245, |
|
"num_tokens": 597771454.0, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.005641514912681, |
|
"grad_norm": 0.09756749123334885, |
|
"learning_rate": 0.0001248175731137737, |
|
"loss": 0.795, |
|
"mean_token_accuracy": 0.7711833357810974, |
|
"num_tokens": 608253785.0, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.008960053096611, |
|
"grad_norm": 0.06631714850664139, |
|
"learning_rate": 0.000124303064104622, |
|
"loss": 0.7922, |
|
"mean_token_accuracy": 0.7717965915799141, |
|
"num_tokens": 618731913.0, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.012278591280541, |
|
"grad_norm": 0.09513410925865173, |
|
"learning_rate": 0.00012378807508754607, |
|
"loss": 0.7952, |
|
"mean_token_accuracy": 0.7712634272873402, |
|
"num_tokens": 629217673.0, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.015597129464471, |
|
"grad_norm": 0.09036098420619965, |
|
"learning_rate": 0.00012327262334547258, |
|
"loss": 0.807, |
|
"mean_token_accuracy": 0.7688079163432121, |
|
"num_tokens": 639691567.0, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.0189156676484008, |
|
"grad_norm": 0.0928923711180687, |
|
"learning_rate": 0.0001227567261768573, |
|
"loss": 0.7918, |
|
"mean_token_accuracy": 0.7722170487046242, |
|
"num_tokens": 650148718.0, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.0222342058323308, |
|
"grad_norm": 0.12867727875709534, |
|
"learning_rate": 0.00012224040089510422, |
|
"loss": 0.7996, |
|
"mean_token_accuracy": 0.7704552851617337, |
|
"num_tokens": 660634478.0, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.0255527440162608, |
|
"grad_norm": 0.09281349927186966, |
|
"learning_rate": 0.00012172366482798498, |
|
"loss": 0.7969, |
|
"mean_token_accuracy": 0.7704703398048878, |
|
"num_tokens": 671118306.0, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.0288712822001909, |
|
"grad_norm": 0.12323810905218124, |
|
"learning_rate": 0.00012120653531705696, |
|
"loss": 0.8012, |
|
"mean_token_accuracy": 0.7698994226753711, |
|
"num_tokens": 681591800.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0321898203841209, |
|
"grad_norm": 0.08596418052911758, |
|
"learning_rate": 0.0001206890297170814, |
|
"loss": 0.8047, |
|
"mean_token_accuracy": 0.7692951105535031, |
|
"num_tokens": 692066403.0, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.0355083585680507, |
|
"grad_norm": 0.07619434595108032, |
|
"learning_rate": 0.00012017116539544106, |
|
"loss": 0.7992, |
|
"mean_token_accuracy": 0.7704711869359017, |
|
"num_tokens": 702551806.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.0388268967519807, |
|
"grad_norm": 0.10103894025087357, |
|
"learning_rate": 0.00011965295973155733, |
|
"loss": 0.7994, |
|
"mean_token_accuracy": 0.7704467341303826, |
|
"num_tokens": 713032600.0, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.0421454349359107, |
|
"grad_norm": 0.13758236169815063, |
|
"learning_rate": 0.00011913443011630694, |
|
"loss": 0.792, |
|
"mean_token_accuracy": 0.7720990940928459, |
|
"num_tokens": 723516954.0, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.0454639731198407, |
|
"grad_norm": 0.09025830775499344, |
|
"learning_rate": 0.0001186155939514384, |
|
"loss": 0.7947, |
|
"mean_token_accuracy": 0.7716163240373135, |
|
"num_tokens": 733990493.0, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.0487825113037708, |
|
"grad_norm": 0.09836422652006149, |
|
"learning_rate": 0.00011809646864898796, |
|
"loss": 0.7883, |
|
"mean_token_accuracy": 0.772812956571579, |
|
"num_tokens": 744476253.0, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.0521010494877006, |
|
"grad_norm": 0.12760159373283386, |
|
"learning_rate": 0.00011757707163069532, |
|
"loss": 0.7978, |
|
"mean_token_accuracy": 0.7706296682357788, |
|
"num_tokens": 754945045.0, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.0554195876716306, |
|
"grad_norm": 0.10166628658771515, |
|
"learning_rate": 0.00011705742032741869, |
|
"loss": 0.7935, |
|
"mean_token_accuracy": 0.7715852670371532, |
|
"num_tokens": 765424810.0, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.0587381258555606, |
|
"grad_norm": 0.09439677745103836, |
|
"learning_rate": 0.0001165375321785503, |
|
"loss": 0.7903, |
|
"mean_token_accuracy": 0.772304394096136, |
|
"num_tokens": 775910570.0, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.0620566640394906, |
|
"grad_norm": 0.10146058350801468, |
|
"learning_rate": 0.00011601742463143078, |
|
"loss": 0.7908, |
|
"mean_token_accuracy": 0.7720572873950005, |
|
"num_tokens": 786382409.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.0653752022234206, |
|
"grad_norm": 0.08949461579322815, |
|
"learning_rate": 0.00011549711514076362, |
|
"loss": 0.7877, |
|
"mean_token_accuracy": 0.7730796381831169, |
|
"num_tokens": 796868169.0, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.0686937404073507, |
|
"grad_norm": 0.15434816479682922, |
|
"learning_rate": 0.00011497662116802966, |
|
"loss": 0.7992, |
|
"mean_token_accuracy": 0.7702365778386593, |
|
"num_tokens": 807353929.0, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.0720122785912805, |
|
"grad_norm": 0.10207755118608475, |
|
"learning_rate": 0.00011445596018090075, |
|
"loss": 0.7967, |
|
"mean_token_accuracy": 0.7711164936423301, |
|
"num_tokens": 817835575.0, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.0753308167752105, |
|
"grad_norm": 0.10703703761100769, |
|
"learning_rate": 0.00011393514965265386, |
|
"loss": 0.7969, |
|
"mean_token_accuracy": 0.7708111874759197, |
|
"num_tokens": 828314969.0, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.0786493549591405, |
|
"grad_norm": 0.08254210650920868, |
|
"learning_rate": 0.00011341420706158443, |
|
"loss": 0.8012, |
|
"mean_token_accuracy": 0.7694485619664192, |
|
"num_tokens": 838793937.0, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.0819678931430705, |
|
"grad_norm": 0.07379141449928284, |
|
"learning_rate": 0.00011289314989041992, |
|
"loss": 0.8013, |
|
"mean_token_accuracy": 0.7696585588157177, |
|
"num_tokens": 849270592.0, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.0852864313270005, |
|
"grad_norm": 0.0758775994181633, |
|
"learning_rate": 0.00011237199562573316, |
|
"loss": 0.7934, |
|
"mean_token_accuracy": 0.7715918220579624, |
|
"num_tokens": 859756352.0, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.0886049695109303, |
|
"grad_norm": 0.10384336858987808, |
|
"learning_rate": 0.00011185076175735527, |
|
"loss": 0.7916, |
|
"mean_token_accuracy": 0.772300997376442, |
|
"num_tokens": 870235675.0, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.0919235076948604, |
|
"grad_norm": 0.13019920885562897, |
|
"learning_rate": 0.00011132946577778902, |
|
"loss": 0.8043, |
|
"mean_token_accuracy": 0.769441581517458, |
|
"num_tokens": 880714024.0, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.0952420458787904, |
|
"grad_norm": 0.10255826264619827, |
|
"learning_rate": 0.00011080812518162143, |
|
"loss": 0.781, |
|
"mean_token_accuracy": 0.774506414681673, |
|
"num_tokens": 891199784.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.0985605840627204, |
|
"grad_norm": 0.06190058961510658, |
|
"learning_rate": 0.000110286757464937, |
|
"loss": 0.7933, |
|
"mean_token_accuracy": 0.7713362969458103, |
|
"num_tokens": 901685544.0, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.1018791222466504, |
|
"grad_norm": 0.06466338783502579, |
|
"learning_rate": 0.00010976538012473035, |
|
"loss": 0.7938, |
|
"mean_token_accuracy": 0.7716447144746781, |
|
"num_tokens": 912159149.0, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.1051976604305804, |
|
"grad_norm": 0.12337905913591385, |
|
"learning_rate": 0.00010924401065831902, |
|
"loss": 0.7947, |
|
"mean_token_accuracy": 0.77125583589077, |
|
"num_tokens": 922641469.0, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.1085161986145102, |
|
"grad_norm": 0.0873250737786293, |
|
"learning_rate": 0.00010872266656275629, |
|
"loss": 0.7982, |
|
"mean_token_accuracy": 0.7702787436544896, |
|
"num_tokens": 933121876.0, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.1118347367984402, |
|
"grad_norm": 0.09560223668813705, |
|
"learning_rate": 0.00010820136533424421, |
|
"loss": 0.7842, |
|
"mean_token_accuracy": 0.7739646509289742, |
|
"num_tokens": 943601592.0, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1151532749823703, |
|
"grad_norm": 0.06694887578487396, |
|
"learning_rate": 0.00010768012446754587, |
|
"loss": 0.7911, |
|
"mean_token_accuracy": 0.7719299428164959, |
|
"num_tokens": 954087352.0, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.1184718131663003, |
|
"grad_norm": 0.10535664856433868, |
|
"learning_rate": 0.00010715896145539888, |
|
"loss": 0.7823, |
|
"mean_token_accuracy": 0.7744208499789238, |
|
"num_tokens": 964567924.0, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.1217903513502303, |
|
"grad_norm": 0.1375124454498291, |
|
"learning_rate": 0.00010663789378792802, |
|
"loss": 0.7973, |
|
"mean_token_accuracy": 0.7705752685666084, |
|
"num_tokens": 975053684.0, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.12510888953416, |
|
"grad_norm": 0.08048464357852936, |
|
"learning_rate": 0.0001061169389520583, |
|
"loss": 0.7918, |
|
"mean_token_accuracy": 0.7719768635928631, |
|
"num_tokens": 985539444.0, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.1284274277180901, |
|
"grad_norm": 0.0729563981294632, |
|
"learning_rate": 0.00010559611443092816, |
|
"loss": 0.7926, |
|
"mean_token_accuracy": 0.7714476838707924, |
|
"num_tokens": 996019948.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1317459659020201, |
|
"grad_norm": 0.09618189930915833, |
|
"learning_rate": 0.00010507543770330256, |
|
"loss": 0.7846, |
|
"mean_token_accuracy": 0.7737831912934781, |
|
"num_tokens": 1006497720.0, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.1350645040859502, |
|
"grad_norm": 0.08791498839855194, |
|
"learning_rate": 0.00010455492624298677, |
|
"loss": 0.7924, |
|
"mean_token_accuracy": 0.7721051789820195, |
|
"num_tokens": 1016976970.0, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.1383830422698802, |
|
"grad_norm": 0.09482545405626297, |
|
"learning_rate": 0.00010403459751823956, |
|
"loss": 0.7902, |
|
"mean_token_accuracy": 0.7721517287194729, |
|
"num_tokens": 1027460107.0, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.1417015804538102, |
|
"grad_norm": 0.08068563789129257, |
|
"learning_rate": 0.00010351446899118719, |
|
"loss": 0.801, |
|
"mean_token_accuracy": 0.7699498064815998, |
|
"num_tokens": 1037937962.0, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.14502011863774, |
|
"grad_norm": 0.0996297150850296, |
|
"learning_rate": 0.0001029945581172373, |
|
"loss": 0.7863, |
|
"mean_token_accuracy": 0.7731556259095669, |
|
"num_tokens": 1048417873.0, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.14833865682167, |
|
"grad_norm": 0.12460768222808838, |
|
"learning_rate": 0.00010247488234449316, |
|
"loss": 0.7969, |
|
"mean_token_accuracy": 0.7708781689405442, |
|
"num_tokens": 1058902339.0, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.1516571950056, |
|
"grad_norm": 0.11318309605121613, |
|
"learning_rate": 0.00010195545911316804, |
|
"loss": 0.7881, |
|
"mean_token_accuracy": 0.7728428095579147, |
|
"num_tokens": 1069388099.0, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.15497573318953, |
|
"grad_norm": 0.09075281769037247, |
|
"learning_rate": 0.00010143630585500005, |
|
"loss": 0.7942, |
|
"mean_token_accuracy": 0.7715302638709545, |
|
"num_tokens": 1079866485.0, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.15829427137346, |
|
"grad_norm": 0.11182031035423279, |
|
"learning_rate": 0.00010091743999266701, |
|
"loss": 0.7982, |
|
"mean_token_accuracy": 0.7702980041503906, |
|
"num_tokens": 1090352245.0, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.1616128095573899, |
|
"grad_norm": 0.11811356246471405, |
|
"learning_rate": 0.00010039887893920181, |
|
"loss": 0.7852, |
|
"mean_token_accuracy": 0.7734420292079449, |
|
"num_tokens": 1100827274.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.16493134774132, |
|
"grad_norm": 0.12840627133846283, |
|
"learning_rate": 9.9880640097408e-05, |
|
"loss": 0.7921, |
|
"mean_token_accuracy": 0.7717530056834221, |
|
"num_tokens": 1111308266.0, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.16824988592525, |
|
"grad_norm": 0.12075144052505493, |
|
"learning_rate": 9.936274085927574e-05, |
|
"loss": 0.7918, |
|
"mean_token_accuracy": 0.7717990390956402, |
|
"num_tokens": 1121793522.0, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.17156842410918, |
|
"grad_norm": 0.0767706111073494, |
|
"learning_rate": 9.884519860539827e-05, |
|
"loss": 0.7858, |
|
"mean_token_accuracy": 0.7738195836544037, |
|
"num_tokens": 1132279282.0, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.17488696229311, |
|
"grad_norm": 0.10788602381944656, |
|
"learning_rate": 9.832803070438841e-05, |
|
"loss": 0.79, |
|
"mean_token_accuracy": 0.7723886162042618, |
|
"num_tokens": 1142765042.0, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.17820550047704, |
|
"grad_norm": 0.05944109335541725, |
|
"learning_rate": 9.781125451229583e-05, |
|
"loss": 0.7893, |
|
"mean_token_accuracy": 0.7728860370814801, |
|
"num_tokens": 1153248080.0, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.1815240386609698, |
|
"grad_norm": 0.06739296019077301, |
|
"learning_rate": 9.72948873720244e-05, |
|
"loss": 0.7849, |
|
"mean_token_accuracy": 0.7739810697734356, |
|
"num_tokens": 1163733840.0, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.1848425768448998, |
|
"grad_norm": 0.08329974114894867, |
|
"learning_rate": 9.677894661275051e-05, |
|
"loss": 0.7921, |
|
"mean_token_accuracy": 0.7716331586241723, |
|
"num_tokens": 1174212145.0, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.1881611150288298, |
|
"grad_norm": 0.0659755989909172, |
|
"learning_rate": 9.626344954934115e-05, |
|
"loss": 0.7919, |
|
"mean_token_accuracy": 0.7715703479945659, |
|
"num_tokens": 1184694650.0, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.1914796532127598, |
|
"grad_norm": 0.09240937978029251, |
|
"learning_rate": 9.574841348177294e-05, |
|
"loss": 0.7912, |
|
"mean_token_accuracy": 0.7719730474054813, |
|
"num_tokens": 1195180410.0, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.1947981913966896, |
|
"grad_norm": 0.08444469422101974, |
|
"learning_rate": 9.523385569455174e-05, |
|
"loss": 0.7911, |
|
"mean_token_accuracy": 0.7720673143863678, |
|
"num_tokens": 1205655112.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.1981167295806197, |
|
"grad_norm": 0.09250594675540924, |
|
"learning_rate": 9.471979345613218e-05, |
|
"loss": 0.793, |
|
"mean_token_accuracy": 0.7719442062079906, |
|
"num_tokens": 1216136330.0, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.2014352677645497, |
|
"grad_norm": 0.11664444208145142, |
|
"learning_rate": 9.420624401833857e-05, |
|
"loss": 0.7919, |
|
"mean_token_accuracy": 0.7720701314508915, |
|
"num_tokens": 1226618040.0, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.2047538059484797, |
|
"grad_norm": 0.06814084202051163, |
|
"learning_rate": 9.369322461578565e-05, |
|
"loss": 0.7842, |
|
"mean_token_accuracy": 0.7741835057735443, |
|
"num_tokens": 1237102124.0, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.2080723441324097, |
|
"grad_norm": 0.10373629629611969, |
|
"learning_rate": 9.318075246530027e-05, |
|
"loss": 0.787, |
|
"mean_token_accuracy": 0.7731135085225105, |
|
"num_tokens": 1247585838.0, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.2113908823163397, |
|
"grad_norm": 0.0860927551984787, |
|
"learning_rate": 9.266884476534372e-05, |
|
"loss": 0.7922, |
|
"mean_token_accuracy": 0.7722108155488968, |
|
"num_tokens": 1258052880.0, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.2147094205002698, |
|
"grad_norm": 0.06990491598844528, |
|
"learning_rate": 9.215751869543428e-05, |
|
"loss": 0.7887, |
|
"mean_token_accuracy": 0.7730976432561875, |
|
"num_tokens": 1268537741.0, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.2180279586841996, |
|
"grad_norm": 0.09814431518316269, |
|
"learning_rate": 9.164679141557096e-05, |
|
"loss": 0.7807, |
|
"mean_token_accuracy": 0.7746933653950692, |
|
"num_tokens": 1279014642.0, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.2213464968681296, |
|
"grad_norm": 0.08182793855667114, |
|
"learning_rate": 9.113668006565748e-05, |
|
"loss": 0.7821, |
|
"mean_token_accuracy": 0.7741866044700145, |
|
"num_tokens": 1289494803.0, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.2246650350520596, |
|
"grad_norm": 0.07170803844928741, |
|
"learning_rate": 9.062720176492707e-05, |
|
"loss": 0.7953, |
|
"mean_token_accuracy": 0.7713256381452084, |
|
"num_tokens": 1299968716.0, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.2279835732359896, |
|
"grad_norm": 0.09101345390081406, |
|
"learning_rate": 9.011837361136796e-05, |
|
"loss": 0.7819, |
|
"mean_token_accuracy": 0.7744043007493019, |
|
"num_tokens": 1310450276.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2313021114199194, |
|
"grad_norm": 0.06418652832508087, |
|
"learning_rate": 8.961021268114948e-05, |
|
"loss": 0.7851, |
|
"mean_token_accuracy": 0.7738088794052601, |
|
"num_tokens": 1320933118.0, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.2346206496038494, |
|
"grad_norm": 0.08518897742033005, |
|
"learning_rate": 8.910273602804929e-05, |
|
"loss": 0.7846, |
|
"mean_token_accuracy": 0.7735204815864563, |
|
"num_tokens": 1331418878.0, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.2379391877877794, |
|
"grad_norm": 0.08660221844911575, |
|
"learning_rate": 8.859596068288063e-05, |
|
"loss": 0.7785, |
|
"mean_token_accuracy": 0.7747997909784317, |
|
"num_tokens": 1341902082.0, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.2412577259717095, |
|
"grad_norm": 0.08849412202835083, |
|
"learning_rate": 8.808990365292112e-05, |
|
"loss": 0.7737, |
|
"mean_token_accuracy": 0.776513147354126, |
|
"num_tokens": 1352382695.0, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.2445762641556395, |
|
"grad_norm": 0.09439458698034286, |
|
"learning_rate": 8.758458192134182e-05, |
|
"loss": 0.7944, |
|
"mean_token_accuracy": 0.7717543512582778, |
|
"num_tokens": 1362860546.0, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.2478948023395695, |
|
"grad_norm": 0.09551844000816345, |
|
"learning_rate": 8.708001244663735e-05, |
|
"loss": 0.7907, |
|
"mean_token_accuracy": 0.7722452618181705, |
|
"num_tokens": 1373346306.0, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.2512133405234995, |
|
"grad_norm": 0.07816372066736221, |
|
"learning_rate": 8.657621216205674e-05, |
|
"loss": 0.7865, |
|
"mean_token_accuracy": 0.7729771360754967, |
|
"num_tokens": 1383824479.0, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.2545318787074293, |
|
"grad_norm": 0.08735434710979462, |
|
"learning_rate": 8.607319797503508e-05, |
|
"loss": 0.7861, |
|
"mean_token_accuracy": 0.7732105948030948, |
|
"num_tokens": 1394310239.0, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.2578504168913593, |
|
"grad_norm": 0.0805339515209198, |
|
"learning_rate": 8.557098676662629e-05, |
|
"loss": 0.7887, |
|
"mean_token_accuracy": 0.7724261164665223, |
|
"num_tokens": 1404791576.0, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.2611689550752894, |
|
"grad_norm": 0.0686466172337532, |
|
"learning_rate": 8.506959539093653e-05, |
|
"loss": 0.7938, |
|
"mean_token_accuracy": 0.7717478558421135, |
|
"num_tokens": 1415260344.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.2644874932592194, |
|
"grad_norm": 0.09132260084152222, |
|
"learning_rate": 8.456904067455834e-05, |
|
"loss": 0.7872, |
|
"mean_token_accuracy": 0.7728922508656979, |
|
"num_tokens": 1425740863.0, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.2678060314431492, |
|
"grad_norm": 0.0798739492893219, |
|
"learning_rate": 8.406933941600626e-05, |
|
"loss": 0.8006, |
|
"mean_token_accuracy": 0.7696124531328679, |
|
"num_tokens": 1436215590.0, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.2711245696270792, |
|
"grad_norm": 0.07844545692205429, |
|
"learning_rate": 8.357050838515305e-05, |
|
"loss": 0.7927, |
|
"mean_token_accuracy": 0.7717352271080017, |
|
"num_tokens": 1446690944.0, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.2744431078110092, |
|
"grad_norm": 0.08331983536481857, |
|
"learning_rate": 8.307256432266673e-05, |
|
"loss": 0.7724, |
|
"mean_token_accuracy": 0.7766589388251305, |
|
"num_tokens": 1457171083.0, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.2777616459949392, |
|
"grad_norm": 0.08180844038724899, |
|
"learning_rate": 8.257552393944871e-05, |
|
"loss": 0.7859, |
|
"mean_token_accuracy": 0.7733499869704247, |
|
"num_tokens": 1467640759.0, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.2810801841788693, |
|
"grad_norm": 0.07894773781299591, |
|
"learning_rate": 8.207940391607334e-05, |
|
"loss": 0.7912, |
|
"mean_token_accuracy": 0.7721025735139847, |
|
"num_tokens": 1478126519.0, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.2843987223627993, |
|
"grad_norm": 0.0866783857345581, |
|
"learning_rate": 8.15842209022277e-05, |
|
"loss": 0.7818, |
|
"mean_token_accuracy": 0.7743147432804107, |
|
"num_tokens": 1488598705.0, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.2877172605467293, |
|
"grad_norm": 0.10071973502635956, |
|
"learning_rate": 8.108999151615315e-05, |
|
"loss": 0.7772, |
|
"mean_token_accuracy": 0.7752561815083027, |
|
"num_tokens": 1499063951.0, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.291035798730659, |
|
"grad_norm": 0.14016222953796387, |
|
"learning_rate": 8.059673234408742e-05, |
|
"loss": 0.7787, |
|
"mean_token_accuracy": 0.7748059660196305, |
|
"num_tokens": 1509548184.0, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.2943543369145891, |
|
"grad_norm": 0.11304216831922531, |
|
"learning_rate": 8.010445993970801e-05, |
|
"loss": 0.7926, |
|
"mean_token_accuracy": 0.7715144835412502, |
|
"num_tokens": 1520033715.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.2976728750985191, |
|
"grad_norm": 0.08642231673002243, |
|
"learning_rate": 7.961319082357683e-05, |
|
"loss": 0.7721, |
|
"mean_token_accuracy": 0.7762427523732185, |
|
"num_tokens": 1530519443.0, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.3009914132824492, |
|
"grad_norm": 0.07883604615926743, |
|
"learning_rate": 7.912294148258552e-05, |
|
"loss": 0.7875, |
|
"mean_token_accuracy": 0.7730479247868061, |
|
"num_tokens": 1540979289.0, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.304309951466379, |
|
"grad_norm": 0.0846463143825531, |
|
"learning_rate": 7.863372836940235e-05, |
|
"loss": 0.7791, |
|
"mean_token_accuracy": 0.7748443402349949, |
|
"num_tokens": 1551465049.0, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.307628489650309, |
|
"grad_norm": 0.10091651231050491, |
|
"learning_rate": 7.814556790191998e-05, |
|
"loss": 0.7714, |
|
"mean_token_accuracy": 0.7768020920455456, |
|
"num_tokens": 1561950809.0, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.310947027834239, |
|
"grad_norm": 0.07219678163528442, |
|
"learning_rate": 7.765847646270444e-05, |
|
"loss": 0.7765, |
|
"mean_token_accuracy": 0.7754981651902199, |
|
"num_tokens": 1572429097.0, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.314265566018169, |
|
"grad_norm": 0.09150810539722443, |
|
"learning_rate": 7.717247039844544e-05, |
|
"loss": 0.7762, |
|
"mean_token_accuracy": 0.7758684448897839, |
|
"num_tokens": 1582893545.0, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.317584104202099, |
|
"grad_norm": 0.1428510546684265, |
|
"learning_rate": 7.66875660194077e-05, |
|
"loss": 0.7913, |
|
"mean_token_accuracy": 0.772127203643322, |
|
"num_tokens": 1593369998.0, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.320902642386029, |
|
"grad_norm": 0.09178195148706436, |
|
"learning_rate": 7.62037795988837e-05, |
|
"loss": 0.7793, |
|
"mean_token_accuracy": 0.7747820563614368, |
|
"num_tokens": 1603848669.0, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.3242211805699589, |
|
"grad_norm": 0.07012277841567993, |
|
"learning_rate": 7.572112737264734e-05, |
|
"loss": 0.782, |
|
"mean_token_accuracy": 0.774373073130846, |
|
"num_tokens": 1614314052.0, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.3275397187538889, |
|
"grad_norm": 0.06344877183437347, |
|
"learning_rate": 7.523962553840917e-05, |
|
"loss": 0.7869, |
|
"mean_token_accuracy": 0.773041807860136, |
|
"num_tokens": 1624794343.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.330858256937819, |
|
"grad_norm": 0.11461348831653595, |
|
"learning_rate": 7.475929025527298e-05, |
|
"loss": 0.788, |
|
"mean_token_accuracy": 0.7731569536030293, |
|
"num_tokens": 1635275263.0, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.334176795121749, |
|
"grad_norm": 0.0666208416223526, |
|
"learning_rate": 7.428013764319318e-05, |
|
"loss": 0.7795, |
|
"mean_token_accuracy": 0.7747226603329181, |
|
"num_tokens": 1645746148.0, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.3374953333056787, |
|
"grad_norm": 0.07270121574401855, |
|
"learning_rate": 7.380218378243409e-05, |
|
"loss": 0.7805, |
|
"mean_token_accuracy": 0.774812101572752, |
|
"num_tokens": 1656231908.0, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.3408138714896087, |
|
"grad_norm": 0.09579236060380936, |
|
"learning_rate": 7.332544471303011e-05, |
|
"loss": 0.7881, |
|
"mean_token_accuracy": 0.7726102568209171, |
|
"num_tokens": 1666707837.0, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.3441324096735388, |
|
"grad_norm": 0.07544690370559692, |
|
"learning_rate": 7.284993643424751e-05, |
|
"loss": 0.7816, |
|
"mean_token_accuracy": 0.7744814246892929, |
|
"num_tokens": 1677193597.0, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.3474509478574688, |
|
"grad_norm": 0.07443518191576004, |
|
"learning_rate": 7.237567490404747e-05, |
|
"loss": 0.7834, |
|
"mean_token_accuracy": 0.774057149887085, |
|
"num_tokens": 1687677911.0, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.3507694860413988, |
|
"grad_norm": 0.06961726397275925, |
|
"learning_rate": 7.190267603855058e-05, |
|
"loss": 0.774, |
|
"mean_token_accuracy": 0.7760677352547646, |
|
"num_tokens": 1698157172.0, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.3540880242253288, |
|
"grad_norm": 0.08477471768856049, |
|
"learning_rate": 7.14309557115026e-05, |
|
"loss": 0.7746, |
|
"mean_token_accuracy": 0.7758500918745994, |
|
"num_tokens": 1708636922.0, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.3574065624092588, |
|
"grad_norm": 0.10532096773386002, |
|
"learning_rate": 7.09605297537419e-05, |
|
"loss": 0.7803, |
|
"mean_token_accuracy": 0.774381760507822, |
|
"num_tokens": 1719116205.0, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.3607251005931886, |
|
"grad_norm": 0.0649094209074974, |
|
"learning_rate": 7.049141395266792e-05, |
|
"loss": 0.7738, |
|
"mean_token_accuracy": 0.7759659215807915, |
|
"num_tokens": 1729597345.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.3640436387771186, |
|
"grad_norm": 0.09603875875473022, |
|
"learning_rate": 7.002362405171164e-05, |
|
"loss": 0.7875, |
|
"mean_token_accuracy": 0.7733911462128162, |
|
"num_tokens": 1740083105.0, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.3673621769610487, |
|
"grad_norm": 0.09281712770462036, |
|
"learning_rate": 6.955717574980701e-05, |
|
"loss": 0.7797, |
|
"mean_token_accuracy": 0.7746181301772594, |
|
"num_tokens": 1750565848.0, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.3706807151449787, |
|
"grad_norm": 0.07830017805099487, |
|
"learning_rate": 6.90920847008643e-05, |
|
"loss": 0.7848, |
|
"mean_token_accuracy": 0.7739394195377827, |
|
"num_tokens": 1761044225.0, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.3739992533289085, |
|
"grad_norm": 0.11212293058633804, |
|
"learning_rate": 6.862836651324457e-05, |
|
"loss": 0.7792, |
|
"mean_token_accuracy": 0.7750230267643928, |
|
"num_tokens": 1771529777.0, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.3773177915128385, |
|
"grad_norm": 0.08509458601474762, |
|
"learning_rate": 6.816603674923592e-05, |
|
"loss": 0.7736, |
|
"mean_token_accuracy": 0.7765054024755955, |
|
"num_tokens": 1782012345.0, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.3806363296967685, |
|
"grad_norm": 0.08097831904888153, |
|
"learning_rate": 6.770511092453128e-05, |
|
"loss": 0.7811, |
|
"mean_token_accuracy": 0.7747589543461799, |
|
"num_tokens": 1792495321.0, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.3839548678806985, |
|
"grad_norm": 0.07892820984125137, |
|
"learning_rate": 6.724560450770766e-05, |
|
"loss": 0.7903, |
|
"mean_token_accuracy": 0.7728420466184616, |
|
"num_tokens": 1802981081.0, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.3872734060646286, |
|
"grad_norm": 0.1068158969283104, |
|
"learning_rate": 6.678753291970699e-05, |
|
"loss": 0.7842, |
|
"mean_token_accuracy": 0.773830558359623, |
|
"num_tokens": 1813466841.0, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.3905919442485586, |
|
"grad_norm": 0.05995525047183037, |
|
"learning_rate": 6.63309115333187e-05, |
|
"loss": 0.7773, |
|
"mean_token_accuracy": 0.7754997856914997, |
|
"num_tokens": 1823952601.0, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.3939104824324886, |
|
"grad_norm": 0.06882365792989731, |
|
"learning_rate": 6.587575567266365e-05, |
|
"loss": 0.7839, |
|
"mean_token_accuracy": 0.7739127658307552, |
|
"num_tokens": 1834429277.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.3972290206164184, |
|
"grad_norm": 0.1076866015791893, |
|
"learning_rate": 6.542208061267998e-05, |
|
"loss": 0.7913, |
|
"mean_token_accuracy": 0.7725215159356594, |
|
"num_tokens": 1844908653.0, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.4005475588003484, |
|
"grad_norm": 0.08683478087186813, |
|
"learning_rate": 6.496990157861063e-05, |
|
"loss": 0.777, |
|
"mean_token_accuracy": 0.7755366273224353, |
|
"num_tokens": 1855383144.0, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.4038660969842784, |
|
"grad_norm": 0.07578273862600327, |
|
"learning_rate": 6.451923374549198e-05, |
|
"loss": 0.7741, |
|
"mean_token_accuracy": 0.7762755893170834, |
|
"num_tokens": 1865852790.0, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.4071846351682085, |
|
"grad_norm": 0.066180020570755, |
|
"learning_rate": 6.407009223764497e-05, |
|
"loss": 0.7793, |
|
"mean_token_accuracy": 0.7752706862986087, |
|
"num_tokens": 1876338550.0, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.4105031733521383, |
|
"grad_norm": 0.08395300805568695, |
|
"learning_rate": 6.362249212816726e-05, |
|
"loss": 0.77, |
|
"mean_token_accuracy": 0.7771471291780472, |
|
"num_tokens": 1886820844.0, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.4138217115360683, |
|
"grad_norm": 0.052178774029016495, |
|
"learning_rate": 6.317644843842758e-05, |
|
"loss": 0.7834, |
|
"mean_token_accuracy": 0.7741237193346023, |
|
"num_tokens": 1897293470.0, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.4171402497199983, |
|
"grad_norm": 0.07526011019945145, |
|
"learning_rate": 6.273197613756148e-05, |
|
"loss": 0.7873, |
|
"mean_token_accuracy": 0.7730425357818603, |
|
"num_tokens": 1907779230.0, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.4204587879039283, |
|
"grad_norm": 0.1261919140815735, |
|
"learning_rate": 6.228909014196907e-05, |
|
"loss": 0.7719, |
|
"mean_token_accuracy": 0.7760506883263588, |
|
"num_tokens": 1918253340.0, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.4237773260878583, |
|
"grad_norm": 0.12899625301361084, |
|
"learning_rate": 6.184780531481431e-05, |
|
"loss": 0.7794, |
|
"mean_token_accuracy": 0.7748920321464539, |
|
"num_tokens": 1928739100.0, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.4270958642717884, |
|
"grad_norm": 0.11729896813631058, |
|
"learning_rate": 6.140813646552633e-05, |
|
"loss": 0.783, |
|
"mean_token_accuracy": 0.7740238346159458, |
|
"num_tokens": 1939222890.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.4304144024557184, |
|
"grad_norm": 0.07414821535348892, |
|
"learning_rate": 6.097009834930236e-05, |
|
"loss": 0.7757, |
|
"mean_token_accuracy": 0.7755299225449562, |
|
"num_tokens": 1949708650.0, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.4337329406396482, |
|
"grad_norm": 0.12376933544874191, |
|
"learning_rate": 6.053370566661254e-05, |
|
"loss": 0.7787, |
|
"mean_token_accuracy": 0.7750980533659458, |
|
"num_tokens": 1960194410.0, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.4370514788235782, |
|
"grad_norm": 0.12327831983566284, |
|
"learning_rate": 6.009897306270662e-05, |
|
"loss": 0.7757, |
|
"mean_token_accuracy": 0.7755475424230098, |
|
"num_tokens": 1970678987.0, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.4403700170075082, |
|
"grad_norm": 0.07328958809375763, |
|
"learning_rate": 5.966591512712239e-05, |
|
"loss": 0.7837, |
|
"mean_token_accuracy": 0.773887825012207, |
|
"num_tokens": 1981153472.0, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.4436885551914382, |
|
"grad_norm": 0.06780887395143509, |
|
"learning_rate": 5.9234546393196146e-05, |
|
"loss": 0.7781, |
|
"mean_token_accuracy": 0.7751296453177929, |
|
"num_tokens": 1991631098.0, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.447007093375368, |
|
"grad_norm": 0.1018683910369873, |
|
"learning_rate": 5.880488133757494e-05, |
|
"loss": 0.77, |
|
"mean_token_accuracy": 0.7767154656350612, |
|
"num_tokens": 2002106094.0, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.450325631559298, |
|
"grad_norm": 0.1522637903690338, |
|
"learning_rate": 5.837693437973073e-05, |
|
"loss": 0.783, |
|
"mean_token_accuracy": 0.7741337135434151, |
|
"num_tokens": 2012587800.0, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.453644169743228, |
|
"grad_norm": 0.08331139385700226, |
|
"learning_rate": 5.795071988147649e-05, |
|
"loss": 0.7763, |
|
"mean_token_accuracy": 0.7759707301855088, |
|
"num_tokens": 2023064521.0, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.456962707927158, |
|
"grad_norm": 0.08645942062139511, |
|
"learning_rate": 5.752625214648404e-05, |
|
"loss": 0.7746, |
|
"mean_token_accuracy": 0.7762944281101227, |
|
"num_tokens": 2033548491.0, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.4602812461110881, |
|
"grad_norm": 0.09425269067287445, |
|
"learning_rate": 5.710354541980435e-05, |
|
"loss": 0.7751, |
|
"mean_token_accuracy": 0.775750569999218, |
|
"num_tokens": 2044033298.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.4635997842950181, |
|
"grad_norm": 0.09406430274248123, |
|
"learning_rate": 5.668261388738927e-05, |
|
"loss": 0.7666, |
|
"mean_token_accuracy": 0.7776207953691483, |
|
"num_tokens": 2054515177.0, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.4669183224789482, |
|
"grad_norm": 0.08789300918579102, |
|
"learning_rate": 5.626347167561543e-05, |
|
"loss": 0.7727, |
|
"mean_token_accuracy": 0.7764379486441613, |
|
"num_tokens": 2064985847.0, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.470236860662878, |
|
"grad_norm": 0.07988473773002625, |
|
"learning_rate": 5.584613285081031e-05, |
|
"loss": 0.7763, |
|
"mean_token_accuracy": 0.7756894461810588, |
|
"num_tokens": 2075471545.0, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.473555398846808, |
|
"grad_norm": 0.07571577280759811, |
|
"learning_rate": 5.543061141878001e-05, |
|
"loss": 0.7827, |
|
"mean_token_accuracy": 0.7742387793958188, |
|
"num_tokens": 2085957305.0, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.476873937030738, |
|
"grad_norm": 0.09301453828811646, |
|
"learning_rate": 5.501692132433937e-05, |
|
"loss": 0.7766, |
|
"mean_token_accuracy": 0.7756397731602191, |
|
"num_tokens": 2096438910.0, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.480192475214668, |
|
"grad_norm": 0.07560286670923233, |
|
"learning_rate": 5.460507645084385e-05, |
|
"loss": 0.7755, |
|
"mean_token_accuracy": 0.7756211280822753, |
|
"num_tokens": 2106914965.0, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.4835110133985978, |
|
"grad_norm": 0.07497697323560715, |
|
"learning_rate": 5.419509061972372e-05, |
|
"loss": 0.7794, |
|
"mean_token_accuracy": 0.7751137435436248, |
|
"num_tokens": 2117398567.0, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.4868295515825278, |
|
"grad_norm": 0.06486741453409195, |
|
"learning_rate": 5.378697759002016e-05, |
|
"loss": 0.7739, |
|
"mean_token_accuracy": 0.7761786207556725, |
|
"num_tokens": 2127871680.0, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.4901480897664579, |
|
"grad_norm": 0.06366632133722305, |
|
"learning_rate": 5.338075105792344e-05, |
|
"loss": 0.7803, |
|
"mean_token_accuracy": 0.7748170055449008, |
|
"num_tokens": 2138349551.0, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.4934666279503879, |
|
"grad_norm": 0.06042411923408508, |
|
"learning_rate": 5.2976424656313425e-05, |
|
"loss": 0.779, |
|
"mean_token_accuracy": 0.7748967044055461, |
|
"num_tokens": 2148835311.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.496785166134318, |
|
"grad_norm": 0.07581470161676407, |
|
"learning_rate": 5.2574011954301984e-05, |
|
"loss": 0.775, |
|
"mean_token_accuracy": 0.7760786689817906, |
|
"num_tokens": 2159319596.0, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.500103704318248, |
|
"grad_norm": 0.06507743149995804, |
|
"learning_rate": 5.2173526456777644e-05, |
|
"loss": 0.7811, |
|
"mean_token_accuracy": 0.7744904771447182, |
|
"num_tokens": 2169799824.0, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.503422242502178, |
|
"grad_norm": 0.06990982592105865, |
|
"learning_rate": 5.17749816039523e-05, |
|
"loss": 0.7749, |
|
"mean_token_accuracy": 0.7760398216545582, |
|
"num_tokens": 2180285584.0, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.5067407806861077, |
|
"grad_norm": 0.087668277323246, |
|
"learning_rate": 5.137839077091016e-05, |
|
"loss": 0.7754, |
|
"mean_token_accuracy": 0.7759116359055043, |
|
"num_tokens": 2190771344.0, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.5100593188700377, |
|
"grad_norm": 0.0703718364238739, |
|
"learning_rate": 5.098376726715899e-05, |
|
"loss": 0.7825, |
|
"mean_token_accuracy": 0.7736853919923306, |
|
"num_tokens": 2201257104.0, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.5133778570539678, |
|
"grad_norm": 0.07812221348285675, |
|
"learning_rate": 5.059112433618338e-05, |
|
"loss": 0.7781, |
|
"mean_token_accuracy": 0.7749250061810017, |
|
"num_tokens": 2211735778.0, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.5166963952378976, |
|
"grad_norm": 0.06752278655767441, |
|
"learning_rate": 5.0200475155000294e-05, |
|
"loss": 0.7742, |
|
"mean_token_accuracy": 0.776116319745779, |
|
"num_tokens": 2222214925.0, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.5200149334218276, |
|
"grad_norm": 0.06351261585950851, |
|
"learning_rate": 4.9811832833716834e-05, |
|
"loss": 0.7785, |
|
"mean_token_accuracy": 0.775452945381403, |
|
"num_tokens": 2232685414.0, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.5233334716057576, |
|
"grad_norm": 0.08866973221302032, |
|
"learning_rate": 4.942521041509034e-05, |
|
"loss": 0.7754, |
|
"mean_token_accuracy": 0.775909723341465, |
|
"num_tokens": 2243171174.0, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.5266520097896876, |
|
"grad_norm": 0.08251211047172546, |
|
"learning_rate": 4.904062087409058e-05, |
|
"loss": 0.7753, |
|
"mean_token_accuracy": 0.7754239991307259, |
|
"num_tokens": 2253651360.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.5299705479736176, |
|
"grad_norm": 0.08602248132228851, |
|
"learning_rate": 4.8658077117464386e-05, |
|
"loss": 0.7776, |
|
"mean_token_accuracy": 0.7752176553010941, |
|
"num_tokens": 2264137120.0, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.5332890861575477, |
|
"grad_norm": 0.0935678705573082, |
|
"learning_rate": 4.827759198330248e-05, |
|
"loss": 0.7746, |
|
"mean_token_accuracy": 0.7757936008274555, |
|
"num_tokens": 2274607585.0, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.5366076243414777, |
|
"grad_norm": 0.0817374438047409, |
|
"learning_rate": 4.7899178240608664e-05, |
|
"loss": 0.7789, |
|
"mean_token_accuracy": 0.7749560095369816, |
|
"num_tokens": 2285085749.0, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.5399261625254077, |
|
"grad_norm": 0.06255115568637848, |
|
"learning_rate": 4.752284858887117e-05, |
|
"loss": 0.7761, |
|
"mean_token_accuracy": 0.7755250073969364, |
|
"num_tokens": 2295566954.0, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.5432447007093375, |
|
"grad_norm": 0.0950886607170105, |
|
"learning_rate": 4.714861565763665e-05, |
|
"loss": 0.7714, |
|
"mean_token_accuracy": 0.776761844754219, |
|
"num_tokens": 2306052714.0, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.5465632388932675, |
|
"grad_norm": 0.06539395451545715, |
|
"learning_rate": 4.677649200608619e-05, |
|
"loss": 0.7703, |
|
"mean_token_accuracy": 0.7772861413657666, |
|
"num_tokens": 2316538474.0, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.5498817770771975, |
|
"grad_norm": 0.06552979350090027, |
|
"learning_rate": 4.640649012261396e-05, |
|
"loss": 0.7775, |
|
"mean_token_accuracy": 0.7757456503808499, |
|
"num_tokens": 2327018194.0, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.5532003152611273, |
|
"grad_norm": 0.12058078497648239, |
|
"learning_rate": 4.6038622424407956e-05, |
|
"loss": 0.7743, |
|
"mean_token_accuracy": 0.7763642080128192, |
|
"num_tokens": 2337503954.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.5565188534450574, |
|
"grad_norm": 0.08425801992416382, |
|
"learning_rate": 4.567290125703331e-05, |
|
"loss": 0.7758, |
|
"mean_token_accuracy": 0.7755994580686092, |
|
"num_tokens": 2347989714.0, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.5598373916289874, |
|
"grad_norm": 0.07768967747688293, |
|
"learning_rate": 4.53093388940181e-05, |
|
"loss": 0.7748, |
|
"mean_token_accuracy": 0.775930143892765, |
|
"num_tokens": 2358463543.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.5631559298129174, |
|
"grad_norm": 0.08114529401063919, |
|
"learning_rate": 4.494794753644134e-05, |
|
"loss": 0.7781, |
|
"mean_token_accuracy": 0.7751664377748966, |
|
"num_tokens": 2368949303.0, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.5664744679968474, |
|
"grad_norm": 0.10389601439237595, |
|
"learning_rate": 4.4588739312523515e-05, |
|
"loss": 0.7727, |
|
"mean_token_accuracy": 0.7765629783272743, |
|
"num_tokens": 2379435063.0, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.5697930061807774, |
|
"grad_norm": 0.07367324829101562, |
|
"learning_rate": 4.4231726277219636e-05, |
|
"loss": 0.7767, |
|
"mean_token_accuracy": 0.7754860304296016, |
|
"num_tokens": 2389918369.0, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.5731115443647075, |
|
"grad_norm": 0.09348005801439285, |
|
"learning_rate": 4.387692041181459e-05, |
|
"loss": 0.776, |
|
"mean_token_accuracy": 0.7754479125142097, |
|
"num_tokens": 2400401304.0, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.5764300825486375, |
|
"grad_norm": 0.07626502215862274, |
|
"learning_rate": 4.352433362352112e-05, |
|
"loss": 0.7779, |
|
"mean_token_accuracy": 0.774944218993187, |
|
"num_tokens": 2410879449.0, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.5797486207325673, |
|
"grad_norm": 0.08088341355323792, |
|
"learning_rate": 4.3173977745080166e-05, |
|
"loss": 0.7768, |
|
"mean_token_accuracy": 0.775431302934885, |
|
"num_tokens": 2421365209.0, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.5830671589164973, |
|
"grad_norm": 0.05723877623677254, |
|
"learning_rate": 4.282586453436383e-05, |
|
"loss": 0.7752, |
|
"mean_token_accuracy": 0.77611697986722, |
|
"num_tokens": 2431850969.0, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.5863856971004273, |
|
"grad_norm": 0.10977209359407425, |
|
"learning_rate": 4.248000567398066e-05, |
|
"loss": 0.7893, |
|
"mean_token_accuracy": 0.7726804412901401, |
|
"num_tokens": 2442328548.0, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.5897042352843571, |
|
"grad_norm": 0.09593162685632706, |
|
"learning_rate": 4.213641277088373e-05, |
|
"loss": 0.7762, |
|
"mean_token_accuracy": 0.7757141642272473, |
|
"num_tokens": 2452811459.0, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.5930227734682871, |
|
"grad_norm": 0.12312334030866623, |
|
"learning_rate": 4.179509735598105e-05, |
|
"loss": 0.7731, |
|
"mean_token_accuracy": 0.7762953326106071, |
|
"num_tokens": 2463293597.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.5963413116522172, |
|
"grad_norm": 0.10306856781244278, |
|
"learning_rate": 4.145607088374854e-05, |
|
"loss": 0.7761, |
|
"mean_token_accuracy": 0.7758133940398693, |
|
"num_tokens": 2473779357.0, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.5996598498361472, |
|
"grad_norm": 0.09463858604431152, |
|
"learning_rate": 4.111934473184577e-05, |
|
"loss": 0.7635, |
|
"mean_token_accuracy": 0.7788975313305855, |
|
"num_tokens": 2484257764.0, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.6029783880200772, |
|
"grad_norm": 0.10744182020425797, |
|
"learning_rate": 4.07849302007339e-05, |
|
"loss": 0.767, |
|
"mean_token_accuracy": 0.7774240888655186, |
|
"num_tokens": 2494733568.0, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.6062969262040072, |
|
"grad_norm": 0.08114374428987503, |
|
"learning_rate": 4.045283851329663e-05, |
|
"loss": 0.7814, |
|
"mean_token_accuracy": 0.7743571817874908, |
|
"num_tokens": 2505210335.0, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.6096154643879372, |
|
"grad_norm": 0.0817594826221466, |
|
"learning_rate": 4.012308081446351e-05, |
|
"loss": 0.7713, |
|
"mean_token_accuracy": 0.7768932566046715, |
|
"num_tokens": 2515674732.0, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.6129340025718673, |
|
"grad_norm": 0.08101768046617508, |
|
"learning_rate": 3.9795668170835874e-05, |
|
"loss": 0.7832, |
|
"mean_token_accuracy": 0.773908444494009, |
|
"num_tokens": 2526158406.0, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.616252540755797, |
|
"grad_norm": 0.05949486419558525, |
|
"learning_rate": 3.947061157031552e-05, |
|
"loss": 0.779, |
|
"mean_token_accuracy": 0.775159952044487, |
|
"num_tokens": 2536644166.0, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.619571078939727, |
|
"grad_norm": 0.08656177669763565, |
|
"learning_rate": 3.9147921921735875e-05, |
|
"loss": 0.7684, |
|
"mean_token_accuracy": 0.7776845954358578, |
|
"num_tokens": 2547119566.0, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.6228896171236569, |
|
"grad_norm": 0.1058259829878807, |
|
"learning_rate": 3.882761005449595e-05, |
|
"loss": 0.7746, |
|
"mean_token_accuracy": 0.7759816244244575, |
|
"num_tokens": 2557595698.0, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.6262081553075869, |
|
"grad_norm": 0.08031774312257767, |
|
"learning_rate": 3.8509686718196935e-05, |
|
"loss": 0.7753, |
|
"mean_token_accuracy": 0.775927259773016, |
|
"num_tokens": 2568075546.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.629526693491517, |
|
"grad_norm": 0.0715925469994545, |
|
"learning_rate": 3.819416258228136e-05, |
|
"loss": 0.7618, |
|
"mean_token_accuracy": 0.778892420232296, |
|
"num_tokens": 2578561306.0, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.632845231675447, |
|
"grad_norm": 0.15235373377799988, |
|
"learning_rate": 3.7881048235675123e-05, |
|
"loss": 0.7756, |
|
"mean_token_accuracy": 0.7754349842667579, |
|
"num_tokens": 2589037906.0, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.636163769859377, |
|
"grad_norm": 0.08016230911016464, |
|
"learning_rate": 3.757035418643201e-05, |
|
"loss": 0.7793, |
|
"mean_token_accuracy": 0.775123992562294, |
|
"num_tokens": 2599523666.0, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.639482308043307, |
|
"grad_norm": 0.11928538233041763, |
|
"learning_rate": 3.7262090861381194e-05, |
|
"loss": 0.7799, |
|
"mean_token_accuracy": 0.7747947402298451, |
|
"num_tokens": 2610009426.0, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.642800846227237, |
|
"grad_norm": 0.10283015668392181, |
|
"learning_rate": 3.69562686057772e-05, |
|
"loss": 0.7739, |
|
"mean_token_accuracy": 0.7760435193777084, |
|
"num_tokens": 2620472464.0, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.646119384411167, |
|
"grad_norm": 0.06924333423376083, |
|
"learning_rate": 3.665289768295275e-05, |
|
"loss": 0.7712, |
|
"mean_token_accuracy": 0.7768824696540833, |
|
"num_tokens": 2630950285.0, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.6494379225950968, |
|
"grad_norm": 0.08660932630300522, |
|
"learning_rate": 3.635198827397443e-05, |
|
"loss": 0.7793, |
|
"mean_token_accuracy": 0.7751142635941506, |
|
"num_tokens": 2641428630.0, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.6527564607790268, |
|
"grad_norm": 0.0652114674448967, |
|
"learning_rate": 3.6053550477300755e-05, |
|
"loss": 0.7704, |
|
"mean_token_accuracy": 0.7767096310853958, |
|
"num_tokens": 2651910105.0, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.6560749989629568, |
|
"grad_norm": 0.07259002327919006, |
|
"learning_rate": 3.575759430844358e-05, |
|
"loss": 0.7763, |
|
"mean_token_accuracy": 0.7753757193684578, |
|
"num_tokens": 2662386058.0, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.6593935371468866, |
|
"grad_norm": 0.08280441910028458, |
|
"learning_rate": 3.5464129699631756e-05, |
|
"loss": 0.7665, |
|
"mean_token_accuracy": 0.7776149123907089, |
|
"num_tokens": 2672871818.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6627120753308167, |
|
"grad_norm": 0.08868120610713959, |
|
"learning_rate": 3.5173166499477905e-05, |
|
"loss": 0.7726, |
|
"mean_token_accuracy": 0.7763372980058193, |
|
"num_tokens": 2683348419.0, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.6660306135147467, |
|
"grad_norm": 0.10042263567447662, |
|
"learning_rate": 3.4884714472647915e-05, |
|
"loss": 0.7689, |
|
"mean_token_accuracy": 0.7774047166109085, |
|
"num_tokens": 2693822490.0, |
|
"step": 5020 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6026, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.544416933810536e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|