|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04504504504504504, |
|
"grad_norm": 3.175370931625366, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 0.6725, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09009009009009009, |
|
"grad_norm": 1.692520260810852, |
|
"learning_rate": 1.7117117117117117e-05, |
|
"loss": 0.314, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 1.95720374584198, |
|
"learning_rate": 2.6126126126126128e-05, |
|
"loss": 0.2577, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18018018018018017, |
|
"grad_norm": 1.2667555809020996, |
|
"learning_rate": 3.513513513513514e-05, |
|
"loss": 0.2377, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22522522522522523, |
|
"grad_norm": 1.4453164339065552, |
|
"learning_rate": 4.414414414414415e-05, |
|
"loss": 0.2102, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 1.5506036281585693, |
|
"learning_rate": 5.3153153153153155e-05, |
|
"loss": 0.1921, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3153153153153153, |
|
"grad_norm": 0.9104294776916504, |
|
"learning_rate": 6.216216216216216e-05, |
|
"loss": 0.1875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.36036036036036034, |
|
"grad_norm": 1.6911464929580688, |
|
"learning_rate": 7.117117117117116e-05, |
|
"loss": 0.1705, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.40540540540540543, |
|
"grad_norm": 1.2363160848617554, |
|
"learning_rate": 8.018018018018019e-05, |
|
"loss": 0.188, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.45045045045045046, |
|
"grad_norm": 1.005192518234253, |
|
"learning_rate": 8.918918918918919e-05, |
|
"loss": 0.1554, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4954954954954955, |
|
"grad_norm": 1.2431941032409668, |
|
"learning_rate": 9.81981981981982e-05, |
|
"loss": 0.1369, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 1.1538969278335571, |
|
"learning_rate": 9.999644972962145e-05, |
|
"loss": 0.1376, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5855855855855856, |
|
"grad_norm": 1.21573805809021, |
|
"learning_rate": 9.998202762029625e-05, |
|
"loss": 0.135, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6306306306306306, |
|
"grad_norm": 1.4540833234786987, |
|
"learning_rate": 9.995651497779182e-05, |
|
"loss": 0.1342, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 1.2237752676010132, |
|
"learning_rate": 9.991991746311917e-05, |
|
"loss": 0.1136, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7207207207207207, |
|
"grad_norm": 1.0290318727493286, |
|
"learning_rate": 9.987224319691624e-05, |
|
"loss": 0.1114, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7657657657657657, |
|
"grad_norm": 0.8585171103477478, |
|
"learning_rate": 9.981350275764608e-05, |
|
"loss": 0.1005, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.7964202761650085, |
|
"learning_rate": 9.97437091792495e-05, |
|
"loss": 0.1074, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8558558558558559, |
|
"grad_norm": 0.6369871497154236, |
|
"learning_rate": 9.966287794825305e-05, |
|
"loss": 0.103, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9009009009009009, |
|
"grad_norm": 0.5182180404663086, |
|
"learning_rate": 9.957102700033265e-05, |
|
"loss": 0.095, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9459459459459459, |
|
"grad_norm": 0.5850012898445129, |
|
"learning_rate": 9.946817671633384e-05, |
|
"loss": 0.0854, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.990990990990991, |
|
"grad_norm": 0.9804884791374207, |
|
"learning_rate": 9.935434991774952e-05, |
|
"loss": 0.0898, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0360360360360361, |
|
"grad_norm": 0.7499725222587585, |
|
"learning_rate": 9.922957186165598e-05, |
|
"loss": 0.0882, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.9342101216316223, |
|
"learning_rate": 9.909387023510871e-05, |
|
"loss": 0.0856, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1261261261261262, |
|
"grad_norm": 0.752739667892456, |
|
"learning_rate": 9.894727514899881e-05, |
|
"loss": 0.0863, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1711711711711712, |
|
"grad_norm": 0.5502750873565674, |
|
"learning_rate": 9.878981913137179e-05, |
|
"loss": 0.0805, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2162162162162162, |
|
"grad_norm": 1.2474815845489502, |
|
"learning_rate": 9.862153712020972e-05, |
|
"loss": 0.0874, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2612612612612613, |
|
"grad_norm": 0.8233453631401062, |
|
"learning_rate": 9.844246645567902e-05, |
|
"loss": 0.077, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3063063063063063, |
|
"grad_norm": 0.5110873579978943, |
|
"learning_rate": 9.825264687184493e-05, |
|
"loss": 0.0765, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.5558863878250122, |
|
"learning_rate": 9.805212048785493e-05, |
|
"loss": 0.08, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3963963963963963, |
|
"grad_norm": 0.8767929673194885, |
|
"learning_rate": 9.78409317985929e-05, |
|
"loss": 0.0781, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.4414414414414414, |
|
"grad_norm": 0.6763063073158264, |
|
"learning_rate": 9.761912766480614e-05, |
|
"loss": 0.0749, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.4864864864864864, |
|
"grad_norm": 0.3673441410064697, |
|
"learning_rate": 9.738675730270737e-05, |
|
"loss": 0.0772, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.5315315315315314, |
|
"grad_norm": 0.3171934187412262, |
|
"learning_rate": 9.714387227305422e-05, |
|
"loss": 0.0725, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5765765765765765, |
|
"grad_norm": 0.6207244992256165, |
|
"learning_rate": 9.689052646970829e-05, |
|
"loss": 0.0723, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 0.7387151122093201, |
|
"learning_rate": 9.662677610767672e-05, |
|
"loss": 0.0705, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.5089544057846069, |
|
"learning_rate": 9.635267971063848e-05, |
|
"loss": 0.0667, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.7117117117117115, |
|
"grad_norm": 0.8173167109489441, |
|
"learning_rate": 9.606829809795871e-05, |
|
"loss": 0.0672, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.7567567567567568, |
|
"grad_norm": 0.5291303992271423, |
|
"learning_rate": 9.57736943711933e-05, |
|
"loss": 0.0731, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.8018018018018018, |
|
"grad_norm": 0.3580414950847626, |
|
"learning_rate": 9.546893390008738e-05, |
|
"loss": 0.0709, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.8468468468468469, |
|
"grad_norm": 0.643068253993988, |
|
"learning_rate": 9.515408430807036e-05, |
|
"loss": 0.0641, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 0.6227248311042786, |
|
"learning_rate": 9.482921545725097e-05, |
|
"loss": 0.065, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.936936936936937, |
|
"grad_norm": 0.7317764163017273, |
|
"learning_rate": 9.449439943291541e-05, |
|
"loss": 0.0694, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.981981981981982, |
|
"grad_norm": 0.6944959163665771, |
|
"learning_rate": 9.414971052753252e-05, |
|
"loss": 0.0739, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.027027027027027, |
|
"grad_norm": 0.5378988981246948, |
|
"learning_rate": 9.379522522426879e-05, |
|
"loss": 0.066, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0720720720720722, |
|
"grad_norm": 0.6093091368675232, |
|
"learning_rate": 9.343102218001762e-05, |
|
"loss": 0.0725, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.1171171171171173, |
|
"grad_norm": 1.0975353717803955, |
|
"learning_rate": 9.305718220794604e-05, |
|
"loss": 0.0748, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 0.7575361132621765, |
|
"learning_rate": 9.267378825956301e-05, |
|
"loss": 0.0634, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.2072072072072073, |
|
"grad_norm": 0.5477670431137085, |
|
"learning_rate": 9.228092540631342e-05, |
|
"loss": 0.0661, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.2522522522522523, |
|
"grad_norm": 0.5405918955802917, |
|
"learning_rate": 9.187868082070132e-05, |
|
"loss": 0.0607, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.2972972972972974, |
|
"grad_norm": 0.7018790245056152, |
|
"learning_rate": 9.146714375694745e-05, |
|
"loss": 0.0567, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.3423423423423424, |
|
"grad_norm": 0.630402684211731, |
|
"learning_rate": 9.104640553118435e-05, |
|
"loss": 0.0618, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.3873873873873874, |
|
"grad_norm": 0.4060376286506653, |
|
"learning_rate": 9.06165595011943e-05, |
|
"loss": 0.0651, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 0.5323811173439026, |
|
"learning_rate": 9.01777010456941e-05, |
|
"loss": 0.0569, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.4774774774774775, |
|
"grad_norm": 0.4360101521015167, |
|
"learning_rate": 8.972992754317144e-05, |
|
"loss": 0.0542, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.5225225225225225, |
|
"grad_norm": 0.6220657825469971, |
|
"learning_rate": 8.927333835027759e-05, |
|
"loss": 0.0615, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.5675675675675675, |
|
"grad_norm": 0.5657696723937988, |
|
"learning_rate": 8.880803477978102e-05, |
|
"loss": 0.058, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.6126126126126126, |
|
"grad_norm": 0.5093798637390137, |
|
"learning_rate": 8.833412007808713e-05, |
|
"loss": 0.0594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.6576576576576576, |
|
"grad_norm": 0.452443927526474, |
|
"learning_rate": 8.78516994023289e-05, |
|
"loss": 0.0564, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 0.4802651107311249, |
|
"learning_rate": 8.73608797970334e-05, |
|
"loss": 0.0496, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.7477477477477477, |
|
"grad_norm": 0.33183491230010986, |
|
"learning_rate": 8.686177017036979e-05, |
|
"loss": 0.053, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.7927927927927927, |
|
"grad_norm": 0.40296322107315063, |
|
"learning_rate": 8.635448126998352e-05, |
|
"loss": 0.0568, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.8378378378378377, |
|
"grad_norm": 0.30987316370010376, |
|
"learning_rate": 8.583912565842257e-05, |
|
"loss": 0.0515, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.8828828828828827, |
|
"grad_norm": 0.6701423525810242, |
|
"learning_rate": 8.531581768816084e-05, |
|
"loss": 0.0526, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.9279279279279278, |
|
"grad_norm": 0.439345121383667, |
|
"learning_rate": 8.478467347622443e-05, |
|
"loss": 0.0577, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.972972972972973, |
|
"grad_norm": 0.42080074548721313, |
|
"learning_rate": 8.424581087842647e-05, |
|
"loss": 0.048, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.018018018018018, |
|
"grad_norm": 0.43773335218429565, |
|
"learning_rate": 8.369934946321595e-05, |
|
"loss": 0.0523, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.063063063063063, |
|
"grad_norm": 0.45040780305862427, |
|
"learning_rate": 8.314541048514664e-05, |
|
"loss": 0.0509, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.108108108108108, |
|
"grad_norm": 0.36781787872314453, |
|
"learning_rate": 8.25841168579719e-05, |
|
"loss": 0.0495, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.153153153153153, |
|
"grad_norm": 0.477033406496048, |
|
"learning_rate": 8.201559312737132e-05, |
|
"loss": 0.0514, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.1981981981981984, |
|
"grad_norm": 0.4004456698894501, |
|
"learning_rate": 8.143996544331511e-05, |
|
"loss": 0.0479, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.2432432432432434, |
|
"grad_norm": 0.5489519834518433, |
|
"learning_rate": 8.085736153207277e-05, |
|
"loss": 0.0475, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.2882882882882885, |
|
"grad_norm": 0.5412061810493469, |
|
"learning_rate": 8.026791066787176e-05, |
|
"loss": 0.0548, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.23101916909217834, |
|
"learning_rate": 7.967174364421284e-05, |
|
"loss": 0.0503, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.3783783783783785, |
|
"grad_norm": 0.27605608105659485, |
|
"learning_rate": 7.90689927448482e-05, |
|
"loss": 0.0514, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.4234234234234235, |
|
"grad_norm": 0.29271090030670166, |
|
"learning_rate": 7.8459791714429e-05, |
|
"loss": 0.0498, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.4684684684684686, |
|
"grad_norm": 0.4988279938697815, |
|
"learning_rate": 7.784427572882871e-05, |
|
"loss": 0.0488, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.5135135135135136, |
|
"grad_norm": 0.4085995554924011, |
|
"learning_rate": 7.722258136514884e-05, |
|
"loss": 0.0484, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.5585585585585586, |
|
"grad_norm": 0.5015743374824524, |
|
"learning_rate": 7.659484657141382e-05, |
|
"loss": 0.0547, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.6036036036036037, |
|
"grad_norm": 0.41595619916915894, |
|
"learning_rate": 7.596121063596168e-05, |
|
"loss": 0.0493, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6486486486486487, |
|
"grad_norm": 0.5095012187957764, |
|
"learning_rate": 7.532181415653725e-05, |
|
"loss": 0.0503, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.6936936936936937, |
|
"grad_norm": 0.4954228103160858, |
|
"learning_rate": 7.467679900909489e-05, |
|
"loss": 0.0528, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.7387387387387387, |
|
"grad_norm": 0.5485273003578186, |
|
"learning_rate": 7.40263083163176e-05, |
|
"loss": 0.0516, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.7837837837837838, |
|
"grad_norm": 0.35983964800834656, |
|
"learning_rate": 7.337048641585937e-05, |
|
"loss": 0.05, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.828828828828829, |
|
"grad_norm": 0.40537890791893005, |
|
"learning_rate": 7.270947882831822e-05, |
|
"loss": 0.044, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.873873873873874, |
|
"grad_norm": 0.3156987428665161, |
|
"learning_rate": 7.20434322249464e-05, |
|
"loss": 0.045, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.918918918918919, |
|
"grad_norm": 0.3144851326942444, |
|
"learning_rate": 7.137249439510548e-05, |
|
"loss": 0.0458, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.963963963963964, |
|
"grad_norm": 0.2909262478351593, |
|
"learning_rate": 7.069681421347339e-05, |
|
"loss": 0.0428, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.009009009009009, |
|
"grad_norm": 0.34615159034729004, |
|
"learning_rate": 7.001654160701046e-05, |
|
"loss": 0.0468, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.054054054054054, |
|
"grad_norm": 0.4577626883983612, |
|
"learning_rate": 6.933182752169213e-05, |
|
"loss": 0.0399, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.099099099099099, |
|
"grad_norm": 0.3926784098148346, |
|
"learning_rate": 6.864282388901544e-05, |
|
"loss": 0.0394, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.1441441441441444, |
|
"grad_norm": 0.3566577732563019, |
|
"learning_rate": 6.794968359228688e-05, |
|
"loss": 0.0452, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.1891891891891895, |
|
"grad_norm": 0.3146449029445648, |
|
"learning_rate": 6.725256043269912e-05, |
|
"loss": 0.0474, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.2342342342342345, |
|
"grad_norm": 0.2554456293582916, |
|
"learning_rate": 6.65516090952039e-05, |
|
"loss": 0.0458, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.2792792792792795, |
|
"grad_norm": 0.3618452250957489, |
|
"learning_rate": 6.584698511418901e-05, |
|
"loss": 0.0466, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.324324324324325, |
|
"grad_norm": 0.38914114236831665, |
|
"learning_rate": 6.513884483896666e-05, |
|
"loss": 0.0424, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.36936936936937, |
|
"grad_norm": 0.6733397841453552, |
|
"learning_rate": 6.4427345399081e-05, |
|
"loss": 0.0488, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.414414414414415, |
|
"grad_norm": 0.37581348419189453, |
|
"learning_rate": 6.37126446694427e-05, |
|
"loss": 0.0481, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.45945945945946, |
|
"grad_norm": 0.3915393352508545, |
|
"learning_rate": 6.299490123529797e-05, |
|
"loss": 0.0413, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.504504504504505, |
|
"grad_norm": 0.4132647216320038, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 0.0475, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.54954954954955, |
|
"grad_norm": 0.4121103584766388, |
|
"learning_rate": 6.155092393487051e-05, |
|
"loss": 0.0493, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.594594594594595, |
|
"grad_norm": 0.33546769618988037, |
|
"learning_rate": 6.08250104733197e-05, |
|
"loss": 0.041, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.63963963963964, |
|
"grad_norm": 0.34651216864585876, |
|
"learning_rate": 6.009669504563153e-05, |
|
"loss": 0.043, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.684684684684685, |
|
"grad_norm": 0.37287256121635437, |
|
"learning_rate": 5.9366139258023326e-05, |
|
"loss": 0.0413, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.72972972972973, |
|
"grad_norm": 0.3134576082229614, |
|
"learning_rate": 5.863350521382671e-05, |
|
"loss": 0.0405, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.774774774774775, |
|
"grad_norm": 0.33046990633010864, |
|
"learning_rate": 5.7898955477518666e-05, |
|
"loss": 0.0379, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.81981981981982, |
|
"grad_norm": 0.5138404369354248, |
|
"learning_rate": 5.716265303864978e-05, |
|
"loss": 0.0353, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.864864864864865, |
|
"grad_norm": 0.31907182931900024, |
|
"learning_rate": 5.642476127567866e-05, |
|
"loss": 0.0421, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.90990990990991, |
|
"grad_norm": 0.2643144428730011, |
|
"learning_rate": 5.5685443919719634e-05, |
|
"loss": 0.0431, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.954954954954955, |
|
"grad_norm": 0.3213581442832947, |
|
"learning_rate": 5.4944865018212497e-05, |
|
"loss": 0.0436, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.2239149659872055, |
|
"learning_rate": 5.4203188898521895e-05, |
|
"loss": 0.0406, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.045045045045045, |
|
"grad_norm": 0.3077867031097412, |
|
"learning_rate": 5.346058013147469e-05, |
|
"loss": 0.0417, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.09009009009009, |
|
"grad_norm": 0.4085425138473511, |
|
"learning_rate": 5.271720349484326e-05, |
|
"loss": 0.0397, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.135135135135135, |
|
"grad_norm": 0.4555751383304596, |
|
"learning_rate": 5.1973223936782887e-05, |
|
"loss": 0.0382, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.18018018018018, |
|
"grad_norm": 0.4915667474269867, |
|
"learning_rate": 5.122880653923134e-05, |
|
"loss": 0.0422, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.225225225225225, |
|
"grad_norm": 0.3977510333061218, |
|
"learning_rate": 5.04841164812788e-05, |
|
"loss": 0.0377, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.27027027027027, |
|
"grad_norm": 0.42886292934417725, |
|
"learning_rate": 4.973931900251611e-05, |
|
"loss": 0.0411, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.315315315315315, |
|
"grad_norm": 0.4493580460548401, |
|
"learning_rate": 4.899457936636988e-05, |
|
"loss": 0.0392, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.36036036036036, |
|
"grad_norm": 0.2820551097393036, |
|
"learning_rate": 4.82500628234319e-05, |
|
"loss": 0.0356, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.405405405405405, |
|
"grad_norm": 0.2753814160823822, |
|
"learning_rate": 4.750593457479171e-05, |
|
"loss": 0.0365, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.45045045045045, |
|
"grad_norm": 0.3056580424308777, |
|
"learning_rate": 4.676235973538013e-05, |
|
"loss": 0.0385, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.495495495495495, |
|
"grad_norm": 0.23721066117286682, |
|
"learning_rate": 4.6019503297331736e-05, |
|
"loss": 0.0401, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.54054054054054, |
|
"grad_norm": 0.2635497748851776, |
|
"learning_rate": 4.5277530093374734e-05, |
|
"loss": 0.0358, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 5.585585585585585, |
|
"grad_norm": 0.26532334089279175, |
|
"learning_rate": 4.4536604760256123e-05, |
|
"loss": 0.0406, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.63063063063063, |
|
"grad_norm": 0.31581610441207886, |
|
"learning_rate": 4.379689170221043e-05, |
|
"loss": 0.0391, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.675675675675675, |
|
"grad_norm": 0.31361261010169983, |
|
"learning_rate": 4.3058555054479924e-05, |
|
"loss": 0.0424, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 5.7207207207207205, |
|
"grad_norm": 0.25601786375045776, |
|
"learning_rate": 4.232175864689464e-05, |
|
"loss": 0.0392, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 5.7657657657657655, |
|
"grad_norm": 0.2870907485485077, |
|
"learning_rate": 4.158666596752004e-05, |
|
"loss": 0.0347, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 5.8108108108108105, |
|
"grad_norm": 0.27915236353874207, |
|
"learning_rate": 4.085344012638067e-05, |
|
"loss": 0.0351, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 5.8558558558558556, |
|
"grad_norm": 0.29854437708854675, |
|
"learning_rate": 4.01222438192675e-05, |
|
"loss": 0.0434, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.900900900900901, |
|
"grad_norm": 0.34385260939598083, |
|
"learning_rate": 3.939323929163738e-05, |
|
"loss": 0.0352, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 5.945945945945946, |
|
"grad_norm": 0.3390043377876282, |
|
"learning_rate": 3.866658830261224e-05, |
|
"loss": 0.0343, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.990990990990991, |
|
"grad_norm": 0.3802526593208313, |
|
"learning_rate": 3.794245208908639e-05, |
|
"loss": 0.0403, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.036036036036036, |
|
"grad_norm": 0.26724833250045776, |
|
"learning_rate": 3.722099132994949e-05, |
|
"loss": 0.037, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.081081081081081, |
|
"grad_norm": 0.2882586717605591, |
|
"learning_rate": 3.650236611043355e-05, |
|
"loss": 0.0307, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.126126126126126, |
|
"grad_norm": 0.24341371655464172, |
|
"learning_rate": 3.578673588659145e-05, |
|
"loss": 0.0318, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.171171171171171, |
|
"grad_norm": 0.28189948201179504, |
|
"learning_rate": 3.5074259449915284e-05, |
|
"loss": 0.0306, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.216216216216216, |
|
"grad_norm": 0.2897877097129822, |
|
"learning_rate": 3.436509489210189e-05, |
|
"loss": 0.0311, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.261261261261261, |
|
"grad_norm": 0.18506336212158203, |
|
"learning_rate": 3.365939956997399e-05, |
|
"loss": 0.0305, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.306306306306306, |
|
"grad_norm": 0.21617922186851501, |
|
"learning_rate": 3.2957330070564085e-05, |
|
"loss": 0.0332, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.351351351351352, |
|
"grad_norm": 0.26593586802482605, |
|
"learning_rate": 3.225904217636939e-05, |
|
"loss": 0.0329, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.396396396396397, |
|
"grad_norm": 0.24474456906318665, |
|
"learning_rate": 3.1564690830785106e-05, |
|
"loss": 0.0293, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 6.441441441441442, |
|
"grad_norm": 0.21675890684127808, |
|
"learning_rate": 3.0874430103724015e-05, |
|
"loss": 0.033, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 6.486486486486487, |
|
"grad_norm": 0.31768059730529785, |
|
"learning_rate": 3.0188413157429828e-05, |
|
"loss": 0.0303, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 6.531531531531532, |
|
"grad_norm": 0.2665558159351349, |
|
"learning_rate": 2.9506792212491986e-05, |
|
"loss": 0.0326, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.576576576576577, |
|
"grad_norm": 0.3153150975704193, |
|
"learning_rate": 2.8829718514069265e-05, |
|
"loss": 0.0322, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 6.621621621621622, |
|
"grad_norm": 0.32425829768180847, |
|
"learning_rate": 2.815734229833007e-05, |
|
"loss": 0.032, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.2635970413684845, |
|
"learning_rate": 2.748981275911633e-05, |
|
"loss": 0.0307, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 6.711711711711712, |
|
"grad_norm": 0.41540664434432983, |
|
"learning_rate": 2.6827278014838953e-05, |
|
"loss": 0.0336, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 6.756756756756757, |
|
"grad_norm": 0.2897012233734131, |
|
"learning_rate": 2.616988507561161e-05, |
|
"loss": 0.0301, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.801801801801802, |
|
"grad_norm": 0.2893607020378113, |
|
"learning_rate": 2.5517779810630728e-05, |
|
"loss": 0.0331, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 6.846846846846847, |
|
"grad_norm": 0.5410453081130981, |
|
"learning_rate": 2.4871106915808434e-05, |
|
"loss": 0.0329, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 6.891891891891892, |
|
"grad_norm": 0.28260162472724915, |
|
"learning_rate": 2.4230009881666022e-05, |
|
"loss": 0.0353, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 6.936936936936937, |
|
"grad_norm": 0.178074911236763, |
|
"learning_rate": 2.359463096149461e-05, |
|
"loss": 0.0308, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 6.981981981981982, |
|
"grad_norm": 0.19077138602733612, |
|
"learning_rate": 2.2965111139790697e-05, |
|
"loss": 0.0296, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.027027027027027, |
|
"grad_norm": 0.29912513494491577, |
|
"learning_rate": 2.234159010097287e-05, |
|
"loss": 0.0318, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.072072072072072, |
|
"grad_norm": 0.3348890542984009, |
|
"learning_rate": 2.1724206198387294e-05, |
|
"loss": 0.0306, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.117117117117117, |
|
"grad_norm": 0.1999453604221344, |
|
"learning_rate": 2.1113096423608358e-05, |
|
"loss": 0.0311, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.162162162162162, |
|
"grad_norm": 0.27420762181282043, |
|
"learning_rate": 2.050839637604165e-05, |
|
"loss": 0.029, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.207207207207207, |
|
"grad_norm": 0.18965670466423035, |
|
"learning_rate": 1.991024023283562e-05, |
|
"loss": 0.0309, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.252252252252252, |
|
"grad_norm": 0.23550300300121307, |
|
"learning_rate": 1.9318760719109054e-05, |
|
"loss": 0.031, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.297297297297297, |
|
"grad_norm": 0.2695910334587097, |
|
"learning_rate": 1.8734089078500565e-05, |
|
"loss": 0.026, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 7.342342342342342, |
|
"grad_norm": 0.2515803277492523, |
|
"learning_rate": 1.815635504404701e-05, |
|
"loss": 0.0276, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 7.387387387387387, |
|
"grad_norm": 0.19094723463058472, |
|
"learning_rate": 1.7585686809396822e-05, |
|
"loss": 0.0294, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 7.4324324324324325, |
|
"grad_norm": 0.26856082677841187, |
|
"learning_rate": 1.702221100036515e-05, |
|
"loss": 0.0268, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.4774774774774775, |
|
"grad_norm": 0.2571709454059601, |
|
"learning_rate": 1.6466052646836832e-05, |
|
"loss": 0.0289, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 7.5225225225225225, |
|
"grad_norm": 0.2961537837982178, |
|
"learning_rate": 1.5917335155023367e-05, |
|
"loss": 0.0301, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 7.5675675675675675, |
|
"grad_norm": 0.18612734973430634, |
|
"learning_rate": 1.5376180280080333e-05, |
|
"loss": 0.027, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 7.612612612612613, |
|
"grad_norm": 0.2395772635936737, |
|
"learning_rate": 1.4842708099091047e-05, |
|
"loss": 0.0269, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 7.657657657657658, |
|
"grad_norm": 0.2048547863960266, |
|
"learning_rate": 1.4317036984422671e-05, |
|
"loss": 0.0257, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.702702702702703, |
|
"grad_norm": 0.1941969245672226, |
|
"learning_rate": 1.3799283577460431e-05, |
|
"loss": 0.0254, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 7.747747747747748, |
|
"grad_norm": 0.3675873279571533, |
|
"learning_rate": 1.328956276272606e-05, |
|
"loss": 0.026, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 7.792792792792793, |
|
"grad_norm": 0.3239746391773224, |
|
"learning_rate": 1.2787987642386007e-05, |
|
"loss": 0.0245, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 7.837837837837838, |
|
"grad_norm": 0.2818147838115692, |
|
"learning_rate": 1.2294669511155193e-05, |
|
"loss": 0.0348, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 7.882882882882883, |
|
"grad_norm": 0.23256780207157135, |
|
"learning_rate": 1.1809717831601697e-05, |
|
"loss": 0.0299, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.927927927927928, |
|
"grad_norm": 0.46159571409225464, |
|
"learning_rate": 1.1333240209858159e-05, |
|
"loss": 0.0299, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 7.972972972972973, |
|
"grad_norm": 0.19868823885917664, |
|
"learning_rate": 1.0865342371744924e-05, |
|
"loss": 0.0269, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.018018018018019, |
|
"grad_norm": 0.29607120156288147, |
|
"learning_rate": 1.0406128139310533e-05, |
|
"loss": 0.0266, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.063063063063064, |
|
"grad_norm": 0.1842232197523117, |
|
"learning_rate": 9.955699407794594e-06, |
|
"loss": 0.0252, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.108108108108109, |
|
"grad_norm": 0.1922302395105362, |
|
"learning_rate": 9.514156123018258e-06, |
|
"loss": 0.0243, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.153153153153154, |
|
"grad_norm": 0.21968339383602142, |
|
"learning_rate": 9.081596259207109e-06, |
|
"loss": 0.0266, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.198198198198199, |
|
"grad_norm": 0.24987168610095978, |
|
"learning_rate": 8.658115797251676e-06, |
|
"loss": 0.0255, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 8.243243243243244, |
|
"grad_norm": 0.30064335465431213, |
|
"learning_rate": 8.243808703410177e-06, |
|
"loss": 0.0245, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 8.288288288288289, |
|
"grad_norm": 0.26207128167152405, |
|
"learning_rate": 7.838766908458339e-06, |
|
"loss": 0.0238, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 0.23758377134799957, |
|
"learning_rate": 7.443080287290782e-06, |
|
"loss": 0.0265, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 8.378378378378379, |
|
"grad_norm": 0.30203232169151306, |
|
"learning_rate": 7.0568366389786975e-06, |
|
"loss": 0.0283, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 8.423423423423424, |
|
"grad_norm": 0.23233330249786377, |
|
"learning_rate": 6.680121667288025e-06, |
|
"loss": 0.0249, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 8.468468468468469, |
|
"grad_norm": 0.3445972204208374, |
|
"learning_rate": 6.3130189616626474e-06, |
|
"loss": 0.0236, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 8.513513513513514, |
|
"grad_norm": 0.31303051114082336, |
|
"learning_rate": 5.955609978676652e-06, |
|
"loss": 0.0243, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 8.558558558558559, |
|
"grad_norm": 0.2929190397262573, |
|
"learning_rate": 5.607974023959978e-06, |
|
"loss": 0.0314, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.603603603603604, |
|
"grad_norm": 0.4855507016181946, |
|
"learning_rate": 5.270188234601142e-06, |
|
"loss": 0.0267, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 8.64864864864865, |
|
"grad_norm": 0.3386547863483429, |
|
"learning_rate": 4.942327562031357e-06, |
|
"loss": 0.0247, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 8.693693693693694, |
|
"grad_norm": 0.18032731115818024, |
|
"learning_rate": 4.624464755393459e-06, |
|
"loss": 0.0241, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 8.73873873873874, |
|
"grad_norm": 0.2196054905653, |
|
"learning_rate": 4.316670345399626e-06, |
|
"loss": 0.029, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 8.783783783783784, |
|
"grad_norm": 0.1823672503232956, |
|
"learning_rate": 4.019012628681234e-06, |
|
"loss": 0.0228, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8.82882882882883, |
|
"grad_norm": 0.25816360116004944, |
|
"learning_rate": 3.731557652634543e-06, |
|
"loss": 0.0239, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 8.873873873873874, |
|
"grad_norm": 0.21811510622501373, |
|
"learning_rate": 3.454369200765356e-06, |
|
"loss": 0.0244, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 8.91891891891892, |
|
"grad_norm": 0.19826571643352509, |
|
"learning_rate": 3.1875087785361137e-06, |
|
"loss": 0.0236, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 8.963963963963964, |
|
"grad_norm": 0.1923084408044815, |
|
"learning_rate": 2.931035599718396e-06, |
|
"loss": 0.0235, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.00900900900901, |
|
"grad_norm": 0.16076907515525818, |
|
"learning_rate": 2.6850065732539842e-06, |
|
"loss": 0.0262, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.054054054054054, |
|
"grad_norm": 0.2307383120059967, |
|
"learning_rate": 2.449476290627273e-06, |
|
"loss": 0.0245, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.0990990990991, |
|
"grad_norm": 0.21985507011413574, |
|
"learning_rate": 2.2244970137519583e-06, |
|
"loss": 0.0266, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.144144144144144, |
|
"grad_norm": 0.26605159044265747, |
|
"learning_rate": 2.010118663374627e-06, |
|
"loss": 0.0225, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.18918918918919, |
|
"grad_norm": 0.3474940359592438, |
|
"learning_rate": 1.8063888079978331e-06, |
|
"loss": 0.0255, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 9.234234234234235, |
|
"grad_norm": 0.1781226396560669, |
|
"learning_rate": 1.6133526533250565e-06, |
|
"loss": 0.0274, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.27927927927928, |
|
"grad_norm": 0.2931009829044342, |
|
"learning_rate": 1.4310530322300453e-06, |
|
"loss": 0.0253, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 9.324324324324325, |
|
"grad_norm": 0.21503065526485443, |
|
"learning_rate": 1.2595303952525672e-06, |
|
"loss": 0.0223, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 9.36936936936937, |
|
"grad_norm": 0.23624898493289948, |
|
"learning_rate": 1.0988228016228508e-06, |
|
"loss": 0.0252, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 9.414414414414415, |
|
"grad_norm": 0.2636685073375702, |
|
"learning_rate": 9.48965910816596e-07, |
|
"loss": 0.0302, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 9.45945945945946, |
|
"grad_norm": 0.23757557570934296, |
|
"learning_rate": 8.099929746424706e-07, |
|
"loss": 0.0248, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.504504504504505, |
|
"grad_norm": 0.3245598077774048, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 0.0219, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 9.54954954954955, |
|
"grad_norm": 0.260775089263916, |
|
"learning_rate": 5.648198913565494e-07, |
|
"loss": 0.0198, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 9.594594594594595, |
|
"grad_norm": 0.18040095269680023, |
|
"learning_rate": 4.5867414580355593e-07, |
|
"loss": 0.0263, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 9.63963963963964, |
|
"grad_norm": 0.17358094453811646, |
|
"learning_rate": 3.635211459291188e-07, |
|
"loss": 0.0236, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 9.684684684684685, |
|
"grad_norm": 0.1593407541513443, |
|
"learning_rate": 2.793820052725049e-07, |
|
"loss": 0.0233, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9.72972972972973, |
|
"grad_norm": 0.24278421700000763, |
|
"learning_rate": 2.06275393503097e-07, |
|
"loss": 0.0201, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 9.774774774774775, |
|
"grad_norm": 0.1488327831029892, |
|
"learning_rate": 1.4421753227780722e-07, |
|
"loss": 0.0234, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 9.81981981981982, |
|
"grad_norm": 0.2808060050010681, |
|
"learning_rate": 9.32221916416176e-08, |
|
"loss": 0.0248, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 9.864864864864865, |
|
"grad_norm": 0.2229873239994049, |
|
"learning_rate": 5.330068697215751e-08, |
|
"loss": 0.0259, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 9.90990990990991, |
|
"grad_norm": 0.2598866820335388, |
|
"learning_rate": 2.4461876468934163e-08, |
|
"loss": 0.0273, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.954954954954955, |
|
"grad_norm": 0.29245129227638245, |
|
"learning_rate": 6.712159187766131e-09, |
|
"loss": 0.0245, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.2520624101161957, |
|
"learning_rate": 5.547362090241315e-11, |
|
"loss": 0.0271, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2220, |
|
"total_flos": 0.0, |
|
"train_loss": 0.055370314842140354, |
|
"train_runtime": 2269.5641, |
|
"train_samples_per_second": 47.895, |
|
"train_steps_per_second": 0.978 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 20000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 49, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|