| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9998697407841606, | |
| "eval_steps": 500, | |
| "global_step": 34545, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004341973861317355, | |
| "grad_norm": 32.08115005493164, | |
| "learning_rate": 9.98697351280938e-06, | |
| "loss": 2.7201, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00868394772263471, | |
| "grad_norm": 11.602313041687012, | |
| "learning_rate": 9.972499638153135e-06, | |
| "loss": 0.8616, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.013025921583952065, | |
| "grad_norm": 13.351936340332031, | |
| "learning_rate": 9.95802576349689e-06, | |
| "loss": 0.3809, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01736789544526942, | |
| "grad_norm": 11.841660499572754, | |
| "learning_rate": 9.943551888840644e-06, | |
| "loss": 0.3299, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.021709869306586773, | |
| "grad_norm": 13.042312622070312, | |
| "learning_rate": 9.929078014184398e-06, | |
| "loss": 0.3422, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02605184316790413, | |
| "grad_norm": 12.432320594787598, | |
| "learning_rate": 9.914604139528152e-06, | |
| "loss": 0.3427, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.030393817029221483, | |
| "grad_norm": 11.445119857788086, | |
| "learning_rate": 9.900130264871906e-06, | |
| "loss": 0.3318, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.03473579089053884, | |
| "grad_norm": 16.4143009185791, | |
| "learning_rate": 9.885656390215662e-06, | |
| "loss": 0.2945, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.039077764751856196, | |
| "grad_norm": 10.369935989379883, | |
| "learning_rate": 9.871182515559416e-06, | |
| "loss": 0.3232, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.043419738613173546, | |
| "grad_norm": 10.7285737991333, | |
| "learning_rate": 9.856708640903172e-06, | |
| "loss": 0.3065, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0477617124744909, | |
| "grad_norm": 9.987811088562012, | |
| "learning_rate": 9.842234766246926e-06, | |
| "loss": 0.2889, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.05210368633580826, | |
| "grad_norm": 13.18855094909668, | |
| "learning_rate": 9.82776089159068e-06, | |
| "loss": 0.359, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.056445660197125616, | |
| "grad_norm": 7.523106098175049, | |
| "learning_rate": 9.813287016934434e-06, | |
| "loss": 0.3753, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.060787634058442966, | |
| "grad_norm": 8.611462593078613, | |
| "learning_rate": 9.798813142278188e-06, | |
| "loss": 0.327, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.06512960791976033, | |
| "grad_norm": 9.731183052062988, | |
| "learning_rate": 9.784339267621942e-06, | |
| "loss": 0.3218, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06947158178107768, | |
| "grad_norm": 14.622904777526855, | |
| "learning_rate": 9.769865392965696e-06, | |
| "loss": 0.3116, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07381355564239503, | |
| "grad_norm": 14.486369132995605, | |
| "learning_rate": 9.755391518309452e-06, | |
| "loss": 0.302, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.07815552950371239, | |
| "grad_norm": 9.997029304504395, | |
| "learning_rate": 9.740917643653206e-06, | |
| "loss": 0.3332, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.08249750336502974, | |
| "grad_norm": 11.677199363708496, | |
| "learning_rate": 9.726443768996962e-06, | |
| "loss": 0.2776, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.08683947722634709, | |
| "grad_norm": 15.453819274902344, | |
| "learning_rate": 9.711969894340716e-06, | |
| "loss": 0.304, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09118145108766446, | |
| "grad_norm": 10.257116317749023, | |
| "learning_rate": 9.69749601968447e-06, | |
| "loss": 0.3241, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0955234249489818, | |
| "grad_norm": 12.801012992858887, | |
| "learning_rate": 9.683022145028224e-06, | |
| "loss": 0.3245, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.09986539881029916, | |
| "grad_norm": 9.938461303710938, | |
| "learning_rate": 9.668548270371979e-06, | |
| "loss": 0.3146, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.10420737267161652, | |
| "grad_norm": 7.490509986877441, | |
| "learning_rate": 9.654074395715733e-06, | |
| "loss": 0.2823, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.10854934653293387, | |
| "grad_norm": 13.965841293334961, | |
| "learning_rate": 9.639600521059488e-06, | |
| "loss": 0.3684, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.11289132039425123, | |
| "grad_norm": 12.257745742797852, | |
| "learning_rate": 9.625126646403243e-06, | |
| "loss": 0.3316, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.11723329425556858, | |
| "grad_norm": 13.204375267028809, | |
| "learning_rate": 9.610652771746998e-06, | |
| "loss": 0.2866, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.12157526811688593, | |
| "grad_norm": 14.752259254455566, | |
| "learning_rate": 9.596178897090753e-06, | |
| "loss": 0.2965, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.12591724197820328, | |
| "grad_norm": 14.065028190612793, | |
| "learning_rate": 9.581705022434507e-06, | |
| "loss": 0.2953, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.13025921583952066, | |
| "grad_norm": 7.947408676147461, | |
| "learning_rate": 9.56723114777826e-06, | |
| "loss": 0.3143, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.134601189700838, | |
| "grad_norm": 14.14394474029541, | |
| "learning_rate": 9.552757273122015e-06, | |
| "loss": 0.2843, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.13894316356215536, | |
| "grad_norm": 8.776320457458496, | |
| "learning_rate": 9.538283398465769e-06, | |
| "loss": 0.2435, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.1432851374234727, | |
| "grad_norm": 8.089349746704102, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 0.3011, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.14762711128479006, | |
| "grad_norm": 9.590496063232422, | |
| "learning_rate": 9.509335649153279e-06, | |
| "loss": 0.2947, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1519690851461074, | |
| "grad_norm": 9.659972190856934, | |
| "learning_rate": 9.494861774497035e-06, | |
| "loss": 0.3485, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.15631105900742479, | |
| "grad_norm": 10.331656455993652, | |
| "learning_rate": 9.480387899840789e-06, | |
| "loss": 0.2765, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.16065303286874213, | |
| "grad_norm": 12.779083251953125, | |
| "learning_rate": 9.465914025184543e-06, | |
| "loss": 0.2817, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.16499500673005948, | |
| "grad_norm": 9.097893714904785, | |
| "learning_rate": 9.451440150528297e-06, | |
| "loss": 0.3581, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.16933698059137683, | |
| "grad_norm": 18.47726058959961, | |
| "learning_rate": 9.436966275872051e-06, | |
| "loss": 0.2617, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.17367895445269418, | |
| "grad_norm": 7.92081356048584, | |
| "learning_rate": 9.422492401215805e-06, | |
| "loss": 0.3314, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.17802092831401156, | |
| "grad_norm": 11.863414764404297, | |
| "learning_rate": 9.408018526559561e-06, | |
| "loss": 0.3495, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.1823629021753289, | |
| "grad_norm": 12.667139053344727, | |
| "learning_rate": 9.393544651903315e-06, | |
| "loss": 0.318, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.18670487603664626, | |
| "grad_norm": 13.5122652053833, | |
| "learning_rate": 9.379070777247071e-06, | |
| "loss": 0.3017, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.1910468498979636, | |
| "grad_norm": 6.8720526695251465, | |
| "learning_rate": 9.364596902590825e-06, | |
| "loss": 0.2728, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.19538882375928096, | |
| "grad_norm": 15.49057388305664, | |
| "learning_rate": 9.350123027934579e-06, | |
| "loss": 0.2858, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.1997307976205983, | |
| "grad_norm": 13.840463638305664, | |
| "learning_rate": 9.335649153278333e-06, | |
| "loss": 0.2733, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.2040727714819157, | |
| "grad_norm": 12.819256782531738, | |
| "learning_rate": 9.321175278622087e-06, | |
| "loss": 0.3099, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.20841474534323304, | |
| "grad_norm": 14.073617935180664, | |
| "learning_rate": 9.306701403965841e-06, | |
| "loss": 0.3347, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2127567192045504, | |
| "grad_norm": 9.651225090026855, | |
| "learning_rate": 9.292227529309597e-06, | |
| "loss": 0.3033, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.21709869306586774, | |
| "grad_norm": 8.687524795532227, | |
| "learning_rate": 9.277753654653351e-06, | |
| "loss": 0.2807, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2214406669271851, | |
| "grad_norm": 10.235136032104492, | |
| "learning_rate": 9.263279779997107e-06, | |
| "loss": 0.2196, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.22578264078850246, | |
| "grad_norm": 12.453109741210938, | |
| "learning_rate": 9.248805905340861e-06, | |
| "loss": 0.336, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.23012461464981981, | |
| "grad_norm": 11.39977741241455, | |
| "learning_rate": 9.234332030684615e-06, | |
| "loss": 0.287, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.23446658851113716, | |
| "grad_norm": 15.636773109436035, | |
| "learning_rate": 9.21985815602837e-06, | |
| "loss": 0.2747, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.2388085623724545, | |
| "grad_norm": 9.498825073242188, | |
| "learning_rate": 9.205384281372124e-06, | |
| "loss": 0.2462, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.24315053623377186, | |
| "grad_norm": 3.69565486907959, | |
| "learning_rate": 9.190910406715878e-06, | |
| "loss": 0.2816, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.24749251009508924, | |
| "grad_norm": 12.666276931762695, | |
| "learning_rate": 9.176436532059632e-06, | |
| "loss": 0.3002, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.25183448395640656, | |
| "grad_norm": 16.72841453552246, | |
| "learning_rate": 9.161962657403388e-06, | |
| "loss": 0.2584, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.2561764578177239, | |
| "grad_norm": 11.932791709899902, | |
| "learning_rate": 9.147488782747142e-06, | |
| "loss": 0.311, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.2605184316790413, | |
| "grad_norm": 7.06049919128418, | |
| "learning_rate": 9.133014908090898e-06, | |
| "loss": 0.2333, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.26486040554035867, | |
| "grad_norm": 13.139076232910156, | |
| "learning_rate": 9.118541033434652e-06, | |
| "loss": 0.2429, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.269202379401676, | |
| "grad_norm": 4.549722671508789, | |
| "learning_rate": 9.104067158778406e-06, | |
| "loss": 0.2384, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.27354435326299337, | |
| "grad_norm": 15.118318557739258, | |
| "learning_rate": 9.08959328412216e-06, | |
| "loss": 0.3034, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.2778863271243107, | |
| "grad_norm": 11.380433082580566, | |
| "learning_rate": 9.075119409465914e-06, | |
| "loss": 0.2701, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.28222830098562807, | |
| "grad_norm": 8.537070274353027, | |
| "learning_rate": 9.060645534809668e-06, | |
| "loss": 0.2854, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.2865702748469454, | |
| "grad_norm": 15.4993314743042, | |
| "learning_rate": 9.046171660153424e-06, | |
| "loss": 0.3197, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.29091224870826277, | |
| "grad_norm": 39.80760192871094, | |
| "learning_rate": 9.031697785497178e-06, | |
| "loss": 0.3389, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.2952542225695801, | |
| "grad_norm": 11.710318565368652, | |
| "learning_rate": 9.017223910840934e-06, | |
| "loss": 0.2886, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.29959619643089747, | |
| "grad_norm": 10.475316047668457, | |
| "learning_rate": 9.002750036184688e-06, | |
| "loss": 0.285, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.3039381702922148, | |
| "grad_norm": 7.522524833679199, | |
| "learning_rate": 8.988276161528442e-06, | |
| "loss": 0.298, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3082801441535322, | |
| "grad_norm": 5.4467244148254395, | |
| "learning_rate": 8.973802286872196e-06, | |
| "loss": 0.2792, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.31262211801484957, | |
| "grad_norm": 12.941061973571777, | |
| "learning_rate": 8.95932841221595e-06, | |
| "loss": 0.2212, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.3169640918761669, | |
| "grad_norm": 13.501566886901855, | |
| "learning_rate": 8.944854537559704e-06, | |
| "loss": 0.2792, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.32130606573748427, | |
| "grad_norm": 5.546044826507568, | |
| "learning_rate": 8.93038066290346e-06, | |
| "loss": 0.3064, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.3256480395988016, | |
| "grad_norm": 9.782098770141602, | |
| "learning_rate": 8.915906788247214e-06, | |
| "loss": 0.2601, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.32999001346011897, | |
| "grad_norm": 4.493556022644043, | |
| "learning_rate": 8.90143291359097e-06, | |
| "loss": 0.2417, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.3343319873214363, | |
| "grad_norm": 10.674678802490234, | |
| "learning_rate": 8.886959038934724e-06, | |
| "loss": 0.2525, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.33867396118275367, | |
| "grad_norm": 12.951682090759277, | |
| "learning_rate": 8.872485164278478e-06, | |
| "loss": 0.2882, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.343015935044071, | |
| "grad_norm": 15.413273811340332, | |
| "learning_rate": 8.858011289622232e-06, | |
| "loss": 0.268, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.34735790890538837, | |
| "grad_norm": 6.657174110412598, | |
| "learning_rate": 8.843537414965987e-06, | |
| "loss": 0.2952, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3516998827667057, | |
| "grad_norm": 6.247474670410156, | |
| "learning_rate": 8.82906354030974e-06, | |
| "loss": 0.2185, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.3560418566280231, | |
| "grad_norm": 14.013585090637207, | |
| "learning_rate": 8.814589665653496e-06, | |
| "loss": 0.2911, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.3603838304893405, | |
| "grad_norm": 4.614453315734863, | |
| "learning_rate": 8.80011579099725e-06, | |
| "loss": 0.2627, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.3647258043506578, | |
| "grad_norm": 18.471010208129883, | |
| "learning_rate": 8.785641916341006e-06, | |
| "loss": 0.2986, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.3690677782119752, | |
| "grad_norm": 20.08515167236328, | |
| "learning_rate": 8.771457519177886e-06, | |
| "loss": 0.2659, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.3734097520732925, | |
| "grad_norm": 11.523843765258789, | |
| "learning_rate": 8.75698364452164e-06, | |
| "loss": 0.2879, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.37775172593460987, | |
| "grad_norm": 12.521258354187012, | |
| "learning_rate": 8.742509769865394e-06, | |
| "loss": 0.2479, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.3820936997959272, | |
| "grad_norm": 4.8445587158203125, | |
| "learning_rate": 8.728035895209148e-06, | |
| "loss": 0.3093, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.38643567365724457, | |
| "grad_norm": 16.926706314086914, | |
| "learning_rate": 8.713562020552902e-06, | |
| "loss": 0.2764, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.3907776475185619, | |
| "grad_norm": 12.894941329956055, | |
| "learning_rate": 8.699088145896656e-06, | |
| "loss": 0.326, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.39511962137987927, | |
| "grad_norm": 7.567624568939209, | |
| "learning_rate": 8.684614271240412e-06, | |
| "loss": 0.2442, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.3994615952411966, | |
| "grad_norm": 0.5340330600738525, | |
| "learning_rate": 8.670140396584166e-06, | |
| "loss": 0.2505, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.403803569102514, | |
| "grad_norm": 2.206448554992676, | |
| "learning_rate": 8.655666521927922e-06, | |
| "loss": 0.2872, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.4081455429638314, | |
| "grad_norm": 3.9290964603424072, | |
| "learning_rate": 8.641192647271676e-06, | |
| "loss": 0.264, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.4124875168251487, | |
| "grad_norm": 10.728494644165039, | |
| "learning_rate": 8.62671877261543e-06, | |
| "loss": 0.3088, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.4168294906864661, | |
| "grad_norm": 1.5482255220413208, | |
| "learning_rate": 8.612244897959184e-06, | |
| "loss": 0.2577, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.4211714645477834, | |
| "grad_norm": 12.54301929473877, | |
| "learning_rate": 8.597771023302938e-06, | |
| "loss": 0.2649, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.4255134384091008, | |
| "grad_norm": 8.942083358764648, | |
| "learning_rate": 8.583297148646693e-06, | |
| "loss": 0.3254, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.4298554122704181, | |
| "grad_norm": 6.604653358459473, | |
| "learning_rate": 8.568823273990448e-06, | |
| "loss": 0.3005, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.4341973861317355, | |
| "grad_norm": 4.268127918243408, | |
| "learning_rate": 8.554349399334202e-06, | |
| "loss": 0.2745, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4385393599930528, | |
| "grad_norm": 6.471729755401611, | |
| "learning_rate": 8.539875524677957e-06, | |
| "loss": 0.2735, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.4428813338543702, | |
| "grad_norm": 15.852076530456543, | |
| "learning_rate": 8.525401650021712e-06, | |
| "loss": 0.3188, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.4472233077156876, | |
| "grad_norm": 9.784584999084473, | |
| "learning_rate": 8.510927775365467e-06, | |
| "loss": 0.2664, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.45156528157700493, | |
| "grad_norm": 7.76936674118042, | |
| "learning_rate": 8.49645390070922e-06, | |
| "loss": 0.2571, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.4559072554383223, | |
| "grad_norm": 11.905091285705566, | |
| "learning_rate": 8.481980026052975e-06, | |
| "loss": 0.3088, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.46024922929963963, | |
| "grad_norm": 7.778932094573975, | |
| "learning_rate": 8.467506151396729e-06, | |
| "loss": 0.2754, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.464591203160957, | |
| "grad_norm": 7.64968729019165, | |
| "learning_rate": 8.453032276740483e-06, | |
| "loss": 0.253, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.46893317702227433, | |
| "grad_norm": 7.020500183105469, | |
| "learning_rate": 8.438558402084239e-06, | |
| "loss": 0.2032, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.4732751508835917, | |
| "grad_norm": 15.488313674926758, | |
| "learning_rate": 8.424084527427993e-06, | |
| "loss": 0.2675, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.477617124744909, | |
| "grad_norm": 6.211950302124023, | |
| "learning_rate": 8.409610652771749e-06, | |
| "loss": 0.2203, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.4819590986062264, | |
| "grad_norm": 11.131321907043457, | |
| "learning_rate": 8.395136778115503e-06, | |
| "loss": 0.323, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.4863010724675437, | |
| "grad_norm": 7.9977126121521, | |
| "learning_rate": 8.380662903459257e-06, | |
| "loss": 0.2231, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.4906430463288611, | |
| "grad_norm": 10.198436737060547, | |
| "learning_rate": 8.366189028803011e-06, | |
| "loss": 0.2465, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.4949850201901785, | |
| "grad_norm": 14.98998737335205, | |
| "learning_rate": 8.351715154146765e-06, | |
| "loss": 0.2235, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.49932699405149583, | |
| "grad_norm": 13.291168212890625, | |
| "learning_rate": 8.33724127949052e-06, | |
| "loss": 0.2082, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.5036689679128131, | |
| "grad_norm": 11.353480339050293, | |
| "learning_rate": 8.322767404834275e-06, | |
| "loss": 0.2813, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.5080109417741305, | |
| "grad_norm": 4.9565815925598145, | |
| "learning_rate": 8.308293530178029e-06, | |
| "loss": 0.2223, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.5123529156354478, | |
| "grad_norm": 10.184609413146973, | |
| "learning_rate": 8.293819655521785e-06, | |
| "loss": 0.2622, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.5166948894967652, | |
| "grad_norm": 11.807438850402832, | |
| "learning_rate": 8.279345780865539e-06, | |
| "loss": 0.221, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.5210368633580826, | |
| "grad_norm": 10.200774192810059, | |
| "learning_rate": 8.264871906209293e-06, | |
| "loss": 0.2987, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.5253788372193999, | |
| "grad_norm": 12.71509838104248, | |
| "learning_rate": 8.250398031553047e-06, | |
| "loss": 0.281, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.5297208110807173, | |
| "grad_norm": 6.515493392944336, | |
| "learning_rate": 8.235924156896801e-06, | |
| "loss": 0.2731, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.5340627849420346, | |
| "grad_norm": 12.207219123840332, | |
| "learning_rate": 8.221450282240555e-06, | |
| "loss": 0.1996, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.538404758803352, | |
| "grad_norm": 14.700127601623535, | |
| "learning_rate": 8.206976407584311e-06, | |
| "loss": 0.2901, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.5427467326646693, | |
| "grad_norm": 13.294961929321289, | |
| "learning_rate": 8.192502532928065e-06, | |
| "loss": 0.2529, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.5470887065259867, | |
| "grad_norm": 7.7809295654296875, | |
| "learning_rate": 8.178028658271821e-06, | |
| "loss": 0.2201, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.551430680387304, | |
| "grad_norm": 9.105611801147461, | |
| "learning_rate": 8.163554783615575e-06, | |
| "loss": 0.2144, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.5557726542486214, | |
| "grad_norm": 9.768695831298828, | |
| "learning_rate": 8.149370386452455e-06, | |
| "loss": 0.2631, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5601146281099387, | |
| "grad_norm": 15.364813804626465, | |
| "learning_rate": 8.134896511796209e-06, | |
| "loss": 0.2801, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.5644566019712561, | |
| "grad_norm": 11.26169204711914, | |
| "learning_rate": 8.120422637139963e-06, | |
| "loss": 0.2521, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5687985758325735, | |
| "grad_norm": 14.16651725769043, | |
| "learning_rate": 8.105948762483717e-06, | |
| "loss": 0.2911, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.5731405496938908, | |
| "grad_norm": 7.423428535461426, | |
| "learning_rate": 8.091474887827471e-06, | |
| "loss": 0.2962, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.5774825235552082, | |
| "grad_norm": 12.954950332641602, | |
| "learning_rate": 8.077001013171227e-06, | |
| "loss": 0.2628, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.5818244974165255, | |
| "grad_norm": 0.6374977827072144, | |
| "learning_rate": 8.062527138514981e-06, | |
| "loss": 0.2317, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5861664712778429, | |
| "grad_norm": 14.547224998474121, | |
| "learning_rate": 8.048053263858737e-06, | |
| "loss": 0.2576, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.5905084451391602, | |
| "grad_norm": 15.81212043762207, | |
| "learning_rate": 8.033579389202491e-06, | |
| "loss": 0.2389, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5948504190004776, | |
| "grad_norm": 6.094769477844238, | |
| "learning_rate": 8.019105514546245e-06, | |
| "loss": 0.2705, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.5991923928617949, | |
| "grad_norm": 12.700167655944824, | |
| "learning_rate": 8.00463163989e-06, | |
| "loss": 0.2271, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.6035343667231123, | |
| "grad_norm": 9.446319580078125, | |
| "learning_rate": 7.990157765233753e-06, | |
| "loss": 0.1957, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.6078763405844296, | |
| "grad_norm": 10.391845703125, | |
| "learning_rate": 7.975683890577507e-06, | |
| "loss": 0.2764, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.612218314445747, | |
| "grad_norm": 0.8525072932243347, | |
| "learning_rate": 7.961210015921263e-06, | |
| "loss": 0.2413, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.6165602883070644, | |
| "grad_norm": 13.354928970336914, | |
| "learning_rate": 7.946736141265017e-06, | |
| "loss": 0.2644, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.6209022621683817, | |
| "grad_norm": 10.913607597351074, | |
| "learning_rate": 7.932262266608773e-06, | |
| "loss": 0.2478, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.6252442360296991, | |
| "grad_norm": 10.41576099395752, | |
| "learning_rate": 7.917788391952527e-06, | |
| "loss": 0.2523, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.6295862098910164, | |
| "grad_norm": 9.93879508972168, | |
| "learning_rate": 7.903314517296281e-06, | |
| "loss": 0.2313, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.6339281837523338, | |
| "grad_norm": 11.395098686218262, | |
| "learning_rate": 7.888840642640035e-06, | |
| "loss": 0.2674, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.6382701576136511, | |
| "grad_norm": 13.613016128540039, | |
| "learning_rate": 7.87436676798379e-06, | |
| "loss": 0.2442, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.6426121314749685, | |
| "grad_norm": 7.089151859283447, | |
| "learning_rate": 7.859892893327544e-06, | |
| "loss": 0.2313, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.6469541053362858, | |
| "grad_norm": 9.300673484802246, | |
| "learning_rate": 7.845419018671298e-06, | |
| "loss": 0.2631, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.6512960791976032, | |
| "grad_norm": 12.743701934814453, | |
| "learning_rate": 7.830945144015054e-06, | |
| "loss": 0.2909, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6556380530589205, | |
| "grad_norm": 7.693624019622803, | |
| "learning_rate": 7.816471269358808e-06, | |
| "loss": 0.3017, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.6599800269202379, | |
| "grad_norm": 10.756463050842285, | |
| "learning_rate": 7.801997394702564e-06, | |
| "loss": 0.2438, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.6643220007815553, | |
| "grad_norm": 9.87155818939209, | |
| "learning_rate": 7.787523520046318e-06, | |
| "loss": 0.2453, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.6686639746428726, | |
| "grad_norm": 4.115950107574463, | |
| "learning_rate": 7.773049645390072e-06, | |
| "loss": 0.2131, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.67300594850419, | |
| "grad_norm": 0.4190388023853302, | |
| "learning_rate": 7.758575770733826e-06, | |
| "loss": 0.2383, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.6773479223655073, | |
| "grad_norm": 10.512711524963379, | |
| "learning_rate": 7.74410189607758e-06, | |
| "loss": 0.2181, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.6816898962268247, | |
| "grad_norm": 9.304518699645996, | |
| "learning_rate": 7.729628021421334e-06, | |
| "loss": 0.2388, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.686031870088142, | |
| "grad_norm": 6.525651931762695, | |
| "learning_rate": 7.71515414676509e-06, | |
| "loss": 0.2645, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.6903738439494594, | |
| "grad_norm": 9.819262504577637, | |
| "learning_rate": 7.700680272108844e-06, | |
| "loss": 0.247, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.6947158178107767, | |
| "grad_norm": 14.438730239868164, | |
| "learning_rate": 7.6862063974526e-06, | |
| "loss": 0.2389, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6990577916720941, | |
| "grad_norm": 12.934776306152344, | |
| "learning_rate": 7.671732522796354e-06, | |
| "loss": 0.2695, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.7033997655334114, | |
| "grad_norm": 10.648255348205566, | |
| "learning_rate": 7.657258648140108e-06, | |
| "loss": 0.2091, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.7077417393947288, | |
| "grad_norm": 7.91797399520874, | |
| "learning_rate": 7.642784773483862e-06, | |
| "loss": 0.2358, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.7120837132560462, | |
| "grad_norm": 8.622334480285645, | |
| "learning_rate": 7.628310898827617e-06, | |
| "loss": 0.2107, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.7164256871173635, | |
| "grad_norm": 10.815421104431152, | |
| "learning_rate": 7.613837024171371e-06, | |
| "loss": 0.2201, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.720767660978681, | |
| "grad_norm": 12.649157524108887, | |
| "learning_rate": 7.599363149515125e-06, | |
| "loss": 0.258, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.7251096348399982, | |
| "grad_norm": 13.133886337280273, | |
| "learning_rate": 7.58488927485888e-06, | |
| "loss": 0.2818, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.7294516087013156, | |
| "grad_norm": 11.359635353088379, | |
| "learning_rate": 7.570415400202635e-06, | |
| "loss": 0.2722, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.7337935825626329, | |
| "grad_norm": 17.86835479736328, | |
| "learning_rate": 7.555941525546389e-06, | |
| "loss": 0.2276, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.7381355564239503, | |
| "grad_norm": 20.632299423217773, | |
| "learning_rate": 7.541467650890144e-06, | |
| "loss": 0.291, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.7424775302852676, | |
| "grad_norm": 8.320937156677246, | |
| "learning_rate": 7.526993776233898e-06, | |
| "loss": 0.2789, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.746819504146585, | |
| "grad_norm": 10.321586608886719, | |
| "learning_rate": 7.5125199015776525e-06, | |
| "loss": 0.2535, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.7511614780079023, | |
| "grad_norm": 3.8393821716308594, | |
| "learning_rate": 7.4980460269214074e-06, | |
| "loss": 0.2108, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.7555034518692197, | |
| "grad_norm": 8.907423973083496, | |
| "learning_rate": 7.4835721522651616e-06, | |
| "loss": 0.2765, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.7598454257305371, | |
| "grad_norm": 3.0093770027160645, | |
| "learning_rate": 7.469098277608916e-06, | |
| "loss": 0.2116, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.7641873995918544, | |
| "grad_norm": 2.4784209728240967, | |
| "learning_rate": 7.4546244029526715e-06, | |
| "loss": 0.2738, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.7685293734531718, | |
| "grad_norm": 4.4596686363220215, | |
| "learning_rate": 7.440150528296426e-06, | |
| "loss": 0.2195, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.7728713473144891, | |
| "grad_norm": 3.5557031631469727, | |
| "learning_rate": 7.4256766536401805e-06, | |
| "loss": 0.2738, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.7772133211758065, | |
| "grad_norm": 10.603437423706055, | |
| "learning_rate": 7.411202778983935e-06, | |
| "loss": 0.2311, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.7815552950371238, | |
| "grad_norm": 14.556783676147461, | |
| "learning_rate": 7.396728904327689e-06, | |
| "loss": 0.2405, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7858972688984412, | |
| "grad_norm": 10.350156784057617, | |
| "learning_rate": 7.382255029671444e-06, | |
| "loss": 0.2749, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.7902392427597585, | |
| "grad_norm": 8.201991081237793, | |
| "learning_rate": 7.367781155015198e-06, | |
| "loss": 0.2591, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.794581216621076, | |
| "grad_norm": 2.724789619445801, | |
| "learning_rate": 7.353307280358952e-06, | |
| "loss": 0.2934, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.7989231904823932, | |
| "grad_norm": 5.613165855407715, | |
| "learning_rate": 7.338833405702708e-06, | |
| "loss": 0.2337, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.8032651643437106, | |
| "grad_norm": 3.3908209800720215, | |
| "learning_rate": 7.324359531046462e-06, | |
| "loss": 0.2413, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.807607138205028, | |
| "grad_norm": 9.619438171386719, | |
| "learning_rate": 7.309885656390217e-06, | |
| "loss": 0.2663, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.8119491120663453, | |
| "grad_norm": 6.928030967712402, | |
| "learning_rate": 7.295411781733971e-06, | |
| "loss": 0.2713, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.8162910859276628, | |
| "grad_norm": 11.707024574279785, | |
| "learning_rate": 7.280937907077725e-06, | |
| "loss": 0.2465, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.82063305978898, | |
| "grad_norm": 9.797958374023438, | |
| "learning_rate": 7.26646403242148e-06, | |
| "loss": 0.2931, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.8249750336502975, | |
| "grad_norm": 11.354745864868164, | |
| "learning_rate": 7.251990157765234e-06, | |
| "loss": 0.1794, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.8293170075116147, | |
| "grad_norm": 13.923330307006836, | |
| "learning_rate": 7.237516283108988e-06, | |
| "loss": 0.2276, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.8336589813729322, | |
| "grad_norm": 6.454158306121826, | |
| "learning_rate": 7.223042408452743e-06, | |
| "loss": 0.2378, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.8380009552342494, | |
| "grad_norm": 6.767538547515869, | |
| "learning_rate": 7.208568533796498e-06, | |
| "loss": 0.2194, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.8423429290955669, | |
| "grad_norm": 4.610179424285889, | |
| "learning_rate": 7.194094659140253e-06, | |
| "loss": 0.1802, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.8466849029568841, | |
| "grad_norm": 6.4949870109558105, | |
| "learning_rate": 7.179620784484007e-06, | |
| "loss": 0.2525, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.8510268768182015, | |
| "grad_norm": 6.070311069488525, | |
| "learning_rate": 7.165146909827761e-06, | |
| "loss": 0.2412, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.855368850679519, | |
| "grad_norm": 11.405903816223145, | |
| "learning_rate": 7.150673035171516e-06, | |
| "loss": 0.2727, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.8597108245408362, | |
| "grad_norm": 13.383929252624512, | |
| "learning_rate": 7.13619916051527e-06, | |
| "loss": 0.2882, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.8640527984021537, | |
| "grad_norm": 13.404510498046875, | |
| "learning_rate": 7.1217252858590245e-06, | |
| "loss": 0.2219, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.868394772263471, | |
| "grad_norm": 9.866695404052734, | |
| "learning_rate": 7.107251411202779e-06, | |
| "loss": 0.2302, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.8727367461247884, | |
| "grad_norm": 13.462258338928223, | |
| "learning_rate": 7.092777536546534e-06, | |
| "loss": 0.244, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.8770787199861056, | |
| "grad_norm": 4.718477249145508, | |
| "learning_rate": 7.078303661890289e-06, | |
| "loss": 0.2321, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.881420693847423, | |
| "grad_norm": 6.899251937866211, | |
| "learning_rate": 7.0638297872340434e-06, | |
| "loss": 0.2126, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.8857626677087403, | |
| "grad_norm": 7.1671576499938965, | |
| "learning_rate": 7.0493559125777976e-06, | |
| "loss": 0.2159, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.8901046415700578, | |
| "grad_norm": 13.115740776062012, | |
| "learning_rate": 7.0348820379215525e-06, | |
| "loss": 0.244, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.8944466154313752, | |
| "grad_norm": 12.494568824768066, | |
| "learning_rate": 7.020408163265307e-06, | |
| "loss": 0.2641, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.8987885892926925, | |
| "grad_norm": 9.132339477539062, | |
| "learning_rate": 7.005934288609061e-06, | |
| "loss": 0.2483, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.9031305631540099, | |
| "grad_norm": 8.740550994873047, | |
| "learning_rate": 6.991460413952816e-06, | |
| "loss": 0.1968, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.9074725370153272, | |
| "grad_norm": 6.999782562255859, | |
| "learning_rate": 6.976986539296571e-06, | |
| "loss": 0.2513, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.9118145108766446, | |
| "grad_norm": 10.487621307373047, | |
| "learning_rate": 6.96280214213345e-06, | |
| "loss": 0.2685, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.9161564847379619, | |
| "grad_norm": 10.34904670715332, | |
| "learning_rate": 6.948328267477204e-06, | |
| "loss": 0.2173, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.9204984585992793, | |
| "grad_norm": 10.263396263122559, | |
| "learning_rate": 6.933854392820959e-06, | |
| "loss": 0.2394, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.9248404324605966, | |
| "grad_norm": 7.749691963195801, | |
| "learning_rate": 6.919380518164713e-06, | |
| "loss": 0.3045, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.929182406321914, | |
| "grad_norm": 7.5561017990112305, | |
| "learning_rate": 6.904906643508467e-06, | |
| "loss": 0.2006, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.9335243801832313, | |
| "grad_norm": 9.14453125, | |
| "learning_rate": 6.890432768852222e-06, | |
| "loss": 0.246, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.9378663540445487, | |
| "grad_norm": 12.95663833618164, | |
| "learning_rate": 6.875958894195976e-06, | |
| "loss": 0.2269, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.9422083279058661, | |
| "grad_norm": 7.384112358093262, | |
| "learning_rate": 6.8614850195397305e-06, | |
| "loss": 0.2313, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.9465503017671834, | |
| "grad_norm": 11.830831527709961, | |
| "learning_rate": 6.847011144883486e-06, | |
| "loss": 0.2247, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.9508922756285008, | |
| "grad_norm": 11.668469429016113, | |
| "learning_rate": 6.832826747720366e-06, | |
| "loss": 0.3188, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.955234249489818, | |
| "grad_norm": 3.9130442142486572, | |
| "learning_rate": 6.81835287306412e-06, | |
| "loss": 0.2325, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.9595762233511355, | |
| "grad_norm": 5.120070457458496, | |
| "learning_rate": 6.803878998407875e-06, | |
| "loss": 0.2465, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.9639181972124528, | |
| "grad_norm": 8.14548397064209, | |
| "learning_rate": 6.789405123751629e-06, | |
| "loss": 0.1962, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.9682601710737702, | |
| "grad_norm": 8.161116600036621, | |
| "learning_rate": 6.774931249095383e-06, | |
| "loss": 0.2031, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.9726021449350875, | |
| "grad_norm": 6.700616359710693, | |
| "learning_rate": 6.760457374439138e-06, | |
| "loss": 0.2406, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.9769441187964049, | |
| "grad_norm": 9.01518440246582, | |
| "learning_rate": 6.745983499782892e-06, | |
| "loss": 0.2324, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.9812860926577222, | |
| "grad_norm": 8.013750076293945, | |
| "learning_rate": 6.731509625126646e-06, | |
| "loss": 0.2223, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.9856280665190396, | |
| "grad_norm": 3.7864232063293457, | |
| "learning_rate": 6.717035750470402e-06, | |
| "loss": 0.253, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.989970040380357, | |
| "grad_norm": 10.278191566467285, | |
| "learning_rate": 6.702561875814156e-06, | |
| "loss": 0.2362, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.9943120142416743, | |
| "grad_norm": 6.585808753967285, | |
| "learning_rate": 6.688088001157911e-06, | |
| "loss": 0.2489, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.9986539881029917, | |
| "grad_norm": 7.475987911224365, | |
| "learning_rate": 6.673614126501665e-06, | |
| "loss": 0.2333, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.9999565802613868, | |
| "eval_loss": 0.22973021864891052, | |
| "eval_runtime": 158.8031, | |
| "eval_samples_per_second": 64.457, | |
| "eval_steps_per_second": 16.114, | |
| "step": 11515 | |
| }, | |
| { | |
| "epoch": 1.002995961964309, | |
| "grad_norm": 2.4895355701446533, | |
| "learning_rate": 6.659140251845419e-06, | |
| "loss": 0.1887, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.0073379358256263, | |
| "grad_norm": 6.796756267547607, | |
| "learning_rate": 6.644666377189174e-06, | |
| "loss": 0.175, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.0116799096869438, | |
| "grad_norm": 6.118704319000244, | |
| "learning_rate": 6.630192502532928e-06, | |
| "loss": 0.1286, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.016021883548261, | |
| "grad_norm": 2.102407693862915, | |
| "learning_rate": 6.6157186278766824e-06, | |
| "loss": 0.1484, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.0203638574095784, | |
| "grad_norm": 6.9978156089782715, | |
| "learning_rate": 6.601244753220437e-06, | |
| "loss": 0.1353, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 1.0247058312708957, | |
| "grad_norm": 12.549331665039062, | |
| "learning_rate": 6.586770878564192e-06, | |
| "loss": 0.1731, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.0290478051322132, | |
| "grad_norm": 0.8738523125648499, | |
| "learning_rate": 6.572297003907947e-06, | |
| "loss": 0.1472, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.0333897789935305, | |
| "grad_norm": 12.091641426086426, | |
| "learning_rate": 6.557823129251701e-06, | |
| "loss": 0.12, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.0377317528548478, | |
| "grad_norm": 4.691054344177246, | |
| "learning_rate": 6.5433492545954555e-06, | |
| "loss": 0.1343, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 1.0420737267161653, | |
| "grad_norm": 11.15015983581543, | |
| "learning_rate": 6.5288753799392105e-06, | |
| "loss": 0.1486, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.0464157005774826, | |
| "grad_norm": 19.46121597290039, | |
| "learning_rate": 6.514401505282965e-06, | |
| "loss": 0.1872, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 1.0507576744387999, | |
| "grad_norm": 8.74670696258545, | |
| "learning_rate": 6.499927630626719e-06, | |
| "loss": 0.1351, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.0550996483001172, | |
| "grad_norm": 1.0911568403244019, | |
| "learning_rate": 6.485453755970474e-06, | |
| "loss": 0.1493, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 1.0594416221614347, | |
| "grad_norm": 15.545437812805176, | |
| "learning_rate": 6.470979881314229e-06, | |
| "loss": 0.1405, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.063783596022752, | |
| "grad_norm": 16.014005661010742, | |
| "learning_rate": 6.4565060066579836e-06, | |
| "loss": 0.1594, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 1.0681255698840693, | |
| "grad_norm": 10.566229820251465, | |
| "learning_rate": 6.442032132001738e-06, | |
| "loss": 0.1568, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.0724675437453866, | |
| "grad_norm": 5.546818733215332, | |
| "learning_rate": 6.427558257345492e-06, | |
| "loss": 0.162, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 1.076809517606704, | |
| "grad_norm": 12.857758522033691, | |
| "learning_rate": 6.413084382689247e-06, | |
| "loss": 0.1412, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.0811514914680214, | |
| "grad_norm": 4.669007778167725, | |
| "learning_rate": 6.398610508033001e-06, | |
| "loss": 0.1747, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 1.0854934653293387, | |
| "grad_norm": 7.152470588684082, | |
| "learning_rate": 6.384136633376755e-06, | |
| "loss": 0.1562, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.0898354391906562, | |
| "grad_norm": 11.267487525939941, | |
| "learning_rate": 6.36966275872051e-06, | |
| "loss": 0.1583, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 1.0941774130519735, | |
| "grad_norm": 12.031243324279785, | |
| "learning_rate": 6.355188884064265e-06, | |
| "loss": 0.1545, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.0985193869132908, | |
| "grad_norm": 7.165139198303223, | |
| "learning_rate": 6.34071500940802e-06, | |
| "loss": 0.1658, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 1.102861360774608, | |
| "grad_norm": 9.312381744384766, | |
| "learning_rate": 6.326241134751774e-06, | |
| "loss": 0.1689, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.1072033346359256, | |
| "grad_norm": 6.647734642028809, | |
| "learning_rate": 6.311767260095528e-06, | |
| "loss": 0.149, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 1.1115453084972429, | |
| "grad_norm": 0.020784372463822365, | |
| "learning_rate": 6.297293385439283e-06, | |
| "loss": 0.1683, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.1158872823585602, | |
| "grad_norm": 8.196714401245117, | |
| "learning_rate": 6.282819510783037e-06, | |
| "loss": 0.1611, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 1.1202292562198775, | |
| "grad_norm": 12.446895599365234, | |
| "learning_rate": 6.268345636126791e-06, | |
| "loss": 0.1875, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.124571230081195, | |
| "grad_norm": 14.737337112426758, | |
| "learning_rate": 6.253871761470545e-06, | |
| "loss": 0.1379, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 1.1289132039425123, | |
| "grad_norm": 7.295111179351807, | |
| "learning_rate": 6.239397886814301e-06, | |
| "loss": 0.1062, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.1332551778038296, | |
| "grad_norm": 4.94767427444458, | |
| "learning_rate": 6.224924012158055e-06, | |
| "loss": 0.1716, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 1.137597151665147, | |
| "grad_norm": 3.360748767852783, | |
| "learning_rate": 6.21045013750181e-06, | |
| "loss": 0.1346, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.1419391255264644, | |
| "grad_norm": 8.84126091003418, | |
| "learning_rate": 6.195976262845564e-06, | |
| "loss": 0.1176, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 1.1462810993877817, | |
| "grad_norm": 5.007967472076416, | |
| "learning_rate": 6.1815023881893184e-06, | |
| "loss": 0.1754, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.150623073249099, | |
| "grad_norm": 1.278257131576538, | |
| "learning_rate": 6.167028513533073e-06, | |
| "loss": 0.1639, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 1.1549650471104165, | |
| "grad_norm": 4.407376766204834, | |
| "learning_rate": 6.1525546388768275e-06, | |
| "loss": 0.1605, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.1593070209717338, | |
| "grad_norm": 11.04604721069336, | |
| "learning_rate": 6.138080764220582e-06, | |
| "loss": 0.1222, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 1.163648994833051, | |
| "grad_norm": 12.066889762878418, | |
| "learning_rate": 6.1236068895643366e-06, | |
| "loss": 0.1046, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.1679909686943684, | |
| "grad_norm": 8.209555625915527, | |
| "learning_rate": 6.1091330149080915e-06, | |
| "loss": 0.1586, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 1.1723329425556859, | |
| "grad_norm": 7.170156955718994, | |
| "learning_rate": 6.0946591402518465e-06, | |
| "loss": 0.1218, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.1766749164170032, | |
| "grad_norm": 14.487652778625488, | |
| "learning_rate": 6.080185265595601e-06, | |
| "loss": 0.1915, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 1.1810168902783205, | |
| "grad_norm": 6.8529839515686035, | |
| "learning_rate": 6.065711390939355e-06, | |
| "loss": 0.1129, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.185358864139638, | |
| "grad_norm": 9.348575592041016, | |
| "learning_rate": 6.05123751628311e-06, | |
| "loss": 0.1528, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 1.1897008380009553, | |
| "grad_norm": 5.676689624786377, | |
| "learning_rate": 6.036763641626864e-06, | |
| "loss": 0.1625, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.1940428118622726, | |
| "grad_norm": 13.035611152648926, | |
| "learning_rate": 6.022289766970618e-06, | |
| "loss": 0.1763, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 1.1983847857235899, | |
| "grad_norm": 11.174485206604004, | |
| "learning_rate": 6.007815892314373e-06, | |
| "loss": 0.1428, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.2027267595849074, | |
| "grad_norm": 1.6729018688201904, | |
| "learning_rate": 5.993342017658128e-06, | |
| "loss": 0.1718, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 1.2070687334462247, | |
| "grad_norm": 6.937321662902832, | |
| "learning_rate": 5.978868143001883e-06, | |
| "loss": 0.1085, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.211410707307542, | |
| "grad_norm": 2.6701741218566895, | |
| "learning_rate": 5.964394268345637e-06, | |
| "loss": 0.1343, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 1.2157526811688593, | |
| "grad_norm": 6.4982380867004395, | |
| "learning_rate": 5.949920393689391e-06, | |
| "loss": 0.1584, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.2200946550301768, | |
| "grad_norm": 8.566705703735352, | |
| "learning_rate": 5.935446519033146e-06, | |
| "loss": 0.1593, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 1.224436628891494, | |
| "grad_norm": 2.0202713012695312, | |
| "learning_rate": 5.9209726443769e-06, | |
| "loss": 0.1566, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 1.2287786027528114, | |
| "grad_norm": 5.631886005401611, | |
| "learning_rate": 5.906498769720654e-06, | |
| "loss": 0.1839, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 1.2331205766141289, | |
| "grad_norm": 7.408547878265381, | |
| "learning_rate": 5.892024895064409e-06, | |
| "loss": 0.1288, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.2374625504754462, | |
| "grad_norm": 3.71185040473938, | |
| "learning_rate": 5.877551020408164e-06, | |
| "loss": 0.1153, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 1.2418045243367635, | |
| "grad_norm": 5.404465675354004, | |
| "learning_rate": 5.863077145751919e-06, | |
| "loss": 0.1844, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 1.2461464981980808, | |
| "grad_norm": 12.6980619430542, | |
| "learning_rate": 5.848603271095673e-06, | |
| "loss": 0.1846, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 1.2504884720593983, | |
| "grad_norm": 0.8423546552658081, | |
| "learning_rate": 5.834129396439427e-06, | |
| "loss": 0.135, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.2548304459207156, | |
| "grad_norm": 5.374547004699707, | |
| "learning_rate": 5.819655521783182e-06, | |
| "loss": 0.1453, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 1.2591724197820329, | |
| "grad_norm": 7.896478176116943, | |
| "learning_rate": 5.805181647126936e-06, | |
| "loss": 0.1445, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.2635143936433502, | |
| "grad_norm": 5.153639793395996, | |
| "learning_rate": 5.79070777247069e-06, | |
| "loss": 0.1264, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 1.2678563675046677, | |
| "grad_norm": 7.638331413269043, | |
| "learning_rate": 5.776233897814445e-06, | |
| "loss": 0.1268, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.272198341365985, | |
| "grad_norm": 5.552703380584717, | |
| "learning_rate": 5.7617600231581995e-06, | |
| "loss": 0.1677, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 1.2765403152273023, | |
| "grad_norm": 8.722034454345703, | |
| "learning_rate": 5.747286148501955e-06, | |
| "loss": 0.1498, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 1.2808822890886198, | |
| "grad_norm": 5.28520393371582, | |
| "learning_rate": 5.732812273845709e-06, | |
| "loss": 0.1263, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 1.285224262949937, | |
| "grad_norm": 0.4377736747264862, | |
| "learning_rate": 5.7183383991894635e-06, | |
| "loss": 0.1699, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.2895662368112544, | |
| "grad_norm": 0.9347131848335266, | |
| "learning_rate": 5.7038645245332185e-06, | |
| "loss": 0.1209, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 1.2939082106725717, | |
| "grad_norm": 9.414371490478516, | |
| "learning_rate": 5.6893906498769726e-06, | |
| "loss": 0.1452, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 1.2982501845338892, | |
| "grad_norm": 8.528615951538086, | |
| "learning_rate": 5.674916775220727e-06, | |
| "loss": 0.1239, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 1.3025921583952065, | |
| "grad_norm": 7.305627346038818, | |
| "learning_rate": 5.660442900564481e-06, | |
| "loss": 0.1117, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.3069341322565238, | |
| "grad_norm": 4.432664394378662, | |
| "learning_rate": 5.645969025908236e-06, | |
| "loss": 0.1666, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 1.311276106117841, | |
| "grad_norm": 8.12741756439209, | |
| "learning_rate": 5.631495151251991e-06, | |
| "loss": 0.1476, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 1.3156180799791586, | |
| "grad_norm": 0.27312299609184265, | |
| "learning_rate": 5.61731075408887e-06, | |
| "loss": 0.1639, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 1.3199600538404759, | |
| "grad_norm": 5.976969242095947, | |
| "learning_rate": 5.602836879432625e-06, | |
| "loss": 0.1481, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.3243020277017932, | |
| "grad_norm": 12.17628288269043, | |
| "learning_rate": 5.588363004776379e-06, | |
| "loss": 0.1714, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 1.3286440015631107, | |
| "grad_norm": 2.3492331504821777, | |
| "learning_rate": 5.573889130120133e-06, | |
| "loss": 0.1696, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 1.332985975424428, | |
| "grad_norm": 6.673547744750977, | |
| "learning_rate": 5.559415255463888e-06, | |
| "loss": 0.1497, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 1.3373279492857453, | |
| "grad_norm": 10.461968421936035, | |
| "learning_rate": 5.544941380807642e-06, | |
| "loss": 0.151, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.3416699231470628, | |
| "grad_norm": 5.2127299308776855, | |
| "learning_rate": 5.5304675061513964e-06, | |
| "loss": 0.1359, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 1.34601189700838, | |
| "grad_norm": 10.900973320007324, | |
| "learning_rate": 5.515993631495151e-06, | |
| "loss": 0.1645, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.3503538708696974, | |
| "grad_norm": 6.777567386627197, | |
| "learning_rate": 5.501519756838906e-06, | |
| "loss": 0.1412, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 1.3546958447310147, | |
| "grad_norm": 5.665555477142334, | |
| "learning_rate": 5.487045882182661e-06, | |
| "loss": 0.1334, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.359037818592332, | |
| "grad_norm": 6.394235610961914, | |
| "learning_rate": 5.4725720075264154e-06, | |
| "loss": 0.1389, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 1.3633797924536495, | |
| "grad_norm": 7.386012077331543, | |
| "learning_rate": 5.4580981328701695e-06, | |
| "loss": 0.1534, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 1.3677217663149668, | |
| "grad_norm": 14.629314422607422, | |
| "learning_rate": 5.4436242582139245e-06, | |
| "loss": 0.1852, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 1.372063740176284, | |
| "grad_norm": 0.6826161742210388, | |
| "learning_rate": 5.429150383557679e-06, | |
| "loss": 0.1459, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.3764057140376016, | |
| "grad_norm": 9.265501022338867, | |
| "learning_rate": 5.414676508901433e-06, | |
| "loss": 0.1522, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 1.3807476878989189, | |
| "grad_norm": 6.414170742034912, | |
| "learning_rate": 5.400202634245188e-06, | |
| "loss": 0.1615, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 1.3850896617602362, | |
| "grad_norm": 8.480489730834961, | |
| "learning_rate": 5.385728759588943e-06, | |
| "loss": 0.1398, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 1.3894316356215537, | |
| "grad_norm": 8.438610076904297, | |
| "learning_rate": 5.371254884932698e-06, | |
| "loss": 0.1657, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.393773609482871, | |
| "grad_norm": 9.783675193786621, | |
| "learning_rate": 5.356781010276452e-06, | |
| "loss": 0.1656, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 1.3981155833441883, | |
| "grad_norm": 10.249602317810059, | |
| "learning_rate": 5.342307135620206e-06, | |
| "loss": 0.141, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.4024575572055056, | |
| "grad_norm": 1.1902185678482056, | |
| "learning_rate": 5.327833260963961e-06, | |
| "loss": 0.0986, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 1.4067995310668229, | |
| "grad_norm": 18.99185562133789, | |
| "learning_rate": 5.313359386307715e-06, | |
| "loss": 0.1236, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.4111415049281404, | |
| "grad_norm": 7.957430839538574, | |
| "learning_rate": 5.298885511651469e-06, | |
| "loss": 0.1832, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 1.4154834787894577, | |
| "grad_norm": 8.840742111206055, | |
| "learning_rate": 5.284411636995224e-06, | |
| "loss": 0.1175, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.419825452650775, | |
| "grad_norm": 8.315855979919434, | |
| "learning_rate": 5.269937762338978e-06, | |
| "loss": 0.1502, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 1.4241674265120925, | |
| "grad_norm": 10.64714527130127, | |
| "learning_rate": 5.255463887682734e-06, | |
| "loss": 0.1416, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.4285094003734098, | |
| "grad_norm": 9.392339706420898, | |
| "learning_rate": 5.240990013026488e-06, | |
| "loss": 0.1501, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 1.432851374234727, | |
| "grad_norm": 6.508515357971191, | |
| "learning_rate": 5.226516138370242e-06, | |
| "loss": 0.1356, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.4371933480960446, | |
| "grad_norm": 12.240583419799805, | |
| "learning_rate": 5.212042263713997e-06, | |
| "loss": 0.1318, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 1.441535321957362, | |
| "grad_norm": 9.682429313659668, | |
| "learning_rate": 5.197568389057751e-06, | |
| "loss": 0.1392, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.4458772958186792, | |
| "grad_norm": 6.940093517303467, | |
| "learning_rate": 5.183094514401505e-06, | |
| "loss": 0.1288, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 1.4502192696799965, | |
| "grad_norm": 7.383029937744141, | |
| "learning_rate": 5.16862063974526e-06, | |
| "loss": 0.1638, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.4545612435413138, | |
| "grad_norm": 8.015626907348633, | |
| "learning_rate": 5.154146765089014e-06, | |
| "loss": 0.1337, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 1.4589032174026313, | |
| "grad_norm": 4.140705108642578, | |
| "learning_rate": 5.13967289043277e-06, | |
| "loss": 0.1572, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.4632451912639486, | |
| "grad_norm": 4.263915061950684, | |
| "learning_rate": 5.125199015776524e-06, | |
| "loss": 0.1591, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 1.4675871651252659, | |
| "grad_norm": 7.539843559265137, | |
| "learning_rate": 5.110725141120278e-06, | |
| "loss": 0.1663, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.4719291389865834, | |
| "grad_norm": 6.80403470993042, | |
| "learning_rate": 5.096251266464033e-06, | |
| "loss": 0.1443, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 1.4762711128479007, | |
| "grad_norm": 7.334268569946289, | |
| "learning_rate": 5.081777391807787e-06, | |
| "loss": 0.1345, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.480613086709218, | |
| "grad_norm": 21.538869857788086, | |
| "learning_rate": 5.0673035171515415e-06, | |
| "loss": 0.1439, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 1.4849550605705355, | |
| "grad_norm": 9.157646179199219, | |
| "learning_rate": 5.0528296424952965e-06, | |
| "loss": 0.1883, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.4892970344318528, | |
| "grad_norm": 10.004467964172363, | |
| "learning_rate": 5.038355767839051e-06, | |
| "loss": 0.1345, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 1.49363900829317, | |
| "grad_norm": 8.288803100585938, | |
| "learning_rate": 5.0238818931828055e-06, | |
| "loss": 0.1418, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.4979809821544874, | |
| "grad_norm": 6.881669044494629, | |
| "learning_rate": 5.009697496019685e-06, | |
| "loss": 0.141, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 1.5023229560158047, | |
| "grad_norm": 8.799572944641113, | |
| "learning_rate": 4.995223621363439e-06, | |
| "loss": 0.1546, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.5066649298771222, | |
| "grad_norm": 6.629570960998535, | |
| "learning_rate": 4.980749746707194e-06, | |
| "loss": 0.1725, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 1.5110069037384395, | |
| "grad_norm": 10.236489295959473, | |
| "learning_rate": 4.966275872050948e-06, | |
| "loss": 0.1267, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.5153488775997568, | |
| "grad_norm": 4.510442733764648, | |
| "learning_rate": 4.951801997394703e-06, | |
| "loss": 0.1569, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 1.5196908514610743, | |
| "grad_norm": 11.930350303649902, | |
| "learning_rate": 4.937328122738457e-06, | |
| "loss": 0.1326, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.5240328253223916, | |
| "grad_norm": 11.577521324157715, | |
| "learning_rate": 4.922854248082212e-06, | |
| "loss": 0.1689, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 1.5283747991837089, | |
| "grad_norm": 7.687832832336426, | |
| "learning_rate": 4.908380373425966e-06, | |
| "loss": 0.1633, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.5327167730450264, | |
| "grad_norm": 10.742979049682617, | |
| "learning_rate": 4.893906498769721e-06, | |
| "loss": 0.1631, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 1.5370587469063435, | |
| "grad_norm": 12.676881790161133, | |
| "learning_rate": 4.879432624113475e-06, | |
| "loss": 0.1466, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.541400720767661, | |
| "grad_norm": 7.963159084320068, | |
| "learning_rate": 4.86495874945723e-06, | |
| "loss": 0.1602, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 1.5457426946289783, | |
| "grad_norm": 8.404998779296875, | |
| "learning_rate": 4.850484874800984e-06, | |
| "loss": 0.1426, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.5500846684902956, | |
| "grad_norm": 12.583954811096191, | |
| "learning_rate": 4.836011000144739e-06, | |
| "loss": 0.1424, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 1.554426642351613, | |
| "grad_norm": 10.515045166015625, | |
| "learning_rate": 4.8215371254884934e-06, | |
| "loss": 0.1388, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.5587686162129304, | |
| "grad_norm": 0.09036704897880554, | |
| "learning_rate": 4.807063250832248e-06, | |
| "loss": 0.1354, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 1.5631105900742477, | |
| "grad_norm": 3.9979803562164307, | |
| "learning_rate": 4.7925893761760025e-06, | |
| "loss": 0.16, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.5674525639355652, | |
| "grad_norm": 10.055344581604004, | |
| "learning_rate": 4.7781155015197575e-06, | |
| "loss": 0.1765, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 1.5717945377968825, | |
| "grad_norm": 6.515308856964111, | |
| "learning_rate": 4.763641626863512e-06, | |
| "loss": 0.1497, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.5761365116581998, | |
| "grad_norm": 4.9170966148376465, | |
| "learning_rate": 4.749167752207266e-06, | |
| "loss": 0.1309, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 1.5804784855195173, | |
| "grad_norm": 9.118842124938965, | |
| "learning_rate": 4.734693877551021e-06, | |
| "loss": 0.1611, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.5848204593808344, | |
| "grad_norm": 4.670436859130859, | |
| "learning_rate": 4.720220002894776e-06, | |
| "loss": 0.1671, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 1.589162433242152, | |
| "grad_norm": 5.8963165283203125, | |
| "learning_rate": 4.70574612823853e-06, | |
| "loss": 0.155, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.5935044071034692, | |
| "grad_norm": 0.9755913019180298, | |
| "learning_rate": 4.691272253582284e-06, | |
| "loss": 0.1451, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 1.5978463809647865, | |
| "grad_norm": 2.514676570892334, | |
| "learning_rate": 4.676798378926039e-06, | |
| "loss": 0.1874, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.602188354826104, | |
| "grad_norm": 6.621218681335449, | |
| "learning_rate": 4.662324504269794e-06, | |
| "loss": 0.1527, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 1.6065303286874213, | |
| "grad_norm": 6.083352088928223, | |
| "learning_rate": 4.647850629613548e-06, | |
| "loss": 0.1522, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.6108723025487386, | |
| "grad_norm": 7.968784332275391, | |
| "learning_rate": 4.633376754957302e-06, | |
| "loss": 0.1157, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 1.615214276410056, | |
| "grad_norm": 3.370387315750122, | |
| "learning_rate": 4.618902880301057e-06, | |
| "loss": 0.1205, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.6195562502713734, | |
| "grad_norm": 7.180332660675049, | |
| "learning_rate": 4.604429005644812e-06, | |
| "loss": 0.1302, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 1.6238982241326907, | |
| "grad_norm": 0.08418329805135727, | |
| "learning_rate": 4.589955130988566e-06, | |
| "loss": 0.1485, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 1.6282401979940082, | |
| "grad_norm": 9.862198829650879, | |
| "learning_rate": 4.57548125633232e-06, | |
| "loss": 0.1407, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 1.6325821718553253, | |
| "grad_norm": 8.546820640563965, | |
| "learning_rate": 4.561007381676075e-06, | |
| "loss": 0.1593, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.6369241457166428, | |
| "grad_norm": 8.208939552307129, | |
| "learning_rate": 4.54653350701983e-06, | |
| "loss": 0.1009, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 1.64126611957796, | |
| "grad_norm": 9.600082397460938, | |
| "learning_rate": 4.532059632363584e-06, | |
| "loss": 0.1497, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 1.6456080934392774, | |
| "grad_norm": 6.318692207336426, | |
| "learning_rate": 4.517585757707338e-06, | |
| "loss": 0.1526, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 1.649950067300595, | |
| "grad_norm": 8.331518173217773, | |
| "learning_rate": 4.503111883051093e-06, | |
| "loss": 0.1279, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.6542920411619122, | |
| "grad_norm": 8.760932922363281, | |
| "learning_rate": 4.488638008394848e-06, | |
| "loss": 0.1117, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 1.6586340150232295, | |
| "grad_norm": 5.412656784057617, | |
| "learning_rate": 4.474164133738602e-06, | |
| "loss": 0.1405, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 1.662975988884547, | |
| "grad_norm": 0.18343329429626465, | |
| "learning_rate": 4.459690259082356e-06, | |
| "loss": 0.1385, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 1.6673179627458643, | |
| "grad_norm": 1.2332340478897095, | |
| "learning_rate": 4.445216384426111e-06, | |
| "loss": 0.1096, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.6716599366071816, | |
| "grad_norm": 8.579971313476562, | |
| "learning_rate": 4.430742509769866e-06, | |
| "loss": 0.1522, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 1.6760019104684991, | |
| "grad_norm": 11.554192543029785, | |
| "learning_rate": 4.41626863511362e-06, | |
| "loss": 0.1487, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 1.6803438843298162, | |
| "grad_norm": 4.856355667114258, | |
| "learning_rate": 4.4020842379505e-06, | |
| "loss": 0.1479, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 1.6846858581911337, | |
| "grad_norm": 2.942544937133789, | |
| "learning_rate": 4.387610363294254e-06, | |
| "loss": 0.1119, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.689027832052451, | |
| "grad_norm": 2.100930690765381, | |
| "learning_rate": 4.373136488638009e-06, | |
| "loss": 0.1388, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 1.6933698059137683, | |
| "grad_norm": 0.6170673370361328, | |
| "learning_rate": 4.358662613981764e-06, | |
| "loss": 0.1314, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.6977117797750858, | |
| "grad_norm": 4.570499420166016, | |
| "learning_rate": 4.344188739325518e-06, | |
| "loss": 0.1695, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 1.702053753636403, | |
| "grad_norm": 9.56963062286377, | |
| "learning_rate": 4.329714864669272e-06, | |
| "loss": 0.1226, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.7063957274977204, | |
| "grad_norm": 5.0290350914001465, | |
| "learning_rate": 4.315240990013027e-06, | |
| "loss": 0.1508, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 1.710737701359038, | |
| "grad_norm": 11.994057655334473, | |
| "learning_rate": 4.300767115356781e-06, | |
| "loss": 0.1797, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 1.7150796752203552, | |
| "grad_norm": 4.463140487670898, | |
| "learning_rate": 4.286293240700536e-06, | |
| "loss": 0.1461, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 1.7194216490816725, | |
| "grad_norm": 4.990943908691406, | |
| "learning_rate": 4.27181936604429e-06, | |
| "loss": 0.1524, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.72376362294299, | |
| "grad_norm": 6.280303001403809, | |
| "learning_rate": 4.257345491388045e-06, | |
| "loss": 0.1422, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 1.728105596804307, | |
| "grad_norm": 5.439699172973633, | |
| "learning_rate": 4.242871616731799e-06, | |
| "loss": 0.1045, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 1.7324475706656246, | |
| "grad_norm": 6.375192165374756, | |
| "learning_rate": 4.228397742075554e-06, | |
| "loss": 0.1001, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 1.736789544526942, | |
| "grad_norm": 12.014942169189453, | |
| "learning_rate": 4.213923867419308e-06, | |
| "loss": 0.1702, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.7411315183882592, | |
| "grad_norm": 7.028641223907471, | |
| "learning_rate": 4.199449992763062e-06, | |
| "loss": 0.1177, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 1.7454734922495767, | |
| "grad_norm": 18.258798599243164, | |
| "learning_rate": 4.184976118106817e-06, | |
| "loss": 0.1513, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 1.749815466110894, | |
| "grad_norm": 10.809378623962402, | |
| "learning_rate": 4.170502243450572e-06, | |
| "loss": 0.138, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 1.7541574399722113, | |
| "grad_norm": 10.029609680175781, | |
| "learning_rate": 4.156028368794326e-06, | |
| "loss": 0.134, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.7584994138335288, | |
| "grad_norm": 4.14710807800293, | |
| "learning_rate": 4.1415544941380805e-06, | |
| "loss": 0.1448, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 1.762841387694846, | |
| "grad_norm": 7.409153461456299, | |
| "learning_rate": 4.1270806194818355e-06, | |
| "loss": 0.1332, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 1.7671833615561634, | |
| "grad_norm": 9.64983081817627, | |
| "learning_rate": 4.1126067448255904e-06, | |
| "loss": 0.1416, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 1.771525335417481, | |
| "grad_norm": 0.016156112775206566, | |
| "learning_rate": 4.0981328701693445e-06, | |
| "loss": 0.1428, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.775867309278798, | |
| "grad_norm": 6.8022050857543945, | |
| "learning_rate": 4.083658995513099e-06, | |
| "loss": 0.1286, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 1.7802092831401155, | |
| "grad_norm": 2.9759087562561035, | |
| "learning_rate": 4.069185120856854e-06, | |
| "loss": 0.1372, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.7845512570014328, | |
| "grad_norm": 9.457865715026855, | |
| "learning_rate": 4.0547112462006086e-06, | |
| "loss": 0.1482, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 1.78889323086275, | |
| "grad_norm": 1.2885161638259888, | |
| "learning_rate": 4.040237371544363e-06, | |
| "loss": 0.1411, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.7932352047240676, | |
| "grad_norm": 8.49577808380127, | |
| "learning_rate": 4.025763496888117e-06, | |
| "loss": 0.1596, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 1.797577178585385, | |
| "grad_norm": 3.6167070865631104, | |
| "learning_rate": 4.011289622231872e-06, | |
| "loss": 0.1424, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 1.8019191524467022, | |
| "grad_norm": 2.6539206504821777, | |
| "learning_rate": 3.996815747575627e-06, | |
| "loss": 0.1328, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 1.8062611263080197, | |
| "grad_norm": 5.883168697357178, | |
| "learning_rate": 3.982341872919381e-06, | |
| "loss": 0.1268, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.810603100169337, | |
| "grad_norm": 6.293718338012695, | |
| "learning_rate": 3.967867998263135e-06, | |
| "loss": 0.1356, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 1.8149450740306543, | |
| "grad_norm": 7.67991828918457, | |
| "learning_rate": 3.95339412360689e-06, | |
| "loss": 0.1119, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 1.8192870478919718, | |
| "grad_norm": 9.023368835449219, | |
| "learning_rate": 3.938920248950645e-06, | |
| "loss": 0.1614, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 1.823629021753289, | |
| "grad_norm": 13.551576614379883, | |
| "learning_rate": 3.924446374294399e-06, | |
| "loss": 0.1653, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.8279709956146064, | |
| "grad_norm": 7.216656684875488, | |
| "learning_rate": 3.909972499638153e-06, | |
| "loss": 0.1446, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 1.8323129694759237, | |
| "grad_norm": 13.925249099731445, | |
| "learning_rate": 3.895498624981908e-06, | |
| "loss": 0.1075, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 1.836654943337241, | |
| "grad_norm": 7.241964817047119, | |
| "learning_rate": 3.881024750325663e-06, | |
| "loss": 0.1382, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 1.8409969171985585, | |
| "grad_norm": 2.9219863414764404, | |
| "learning_rate": 3.866550875669417e-06, | |
| "loss": 0.1132, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.8453388910598758, | |
| "grad_norm": 9.134288787841797, | |
| "learning_rate": 3.852077001013171e-06, | |
| "loss": 0.1412, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 1.849680864921193, | |
| "grad_norm": 4.604516506195068, | |
| "learning_rate": 3.837603126356926e-06, | |
| "loss": 0.1318, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 1.8540228387825106, | |
| "grad_norm": 0.3576798737049103, | |
| "learning_rate": 3.823129251700681e-06, | |
| "loss": 0.1255, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 1.858364812643828, | |
| "grad_norm": 0.4319687485694885, | |
| "learning_rate": 3.80894485453756e-06, | |
| "loss": 0.1328, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.8627067865051452, | |
| "grad_norm": 1.7280389070510864, | |
| "learning_rate": 3.7944709798813146e-06, | |
| "loss": 0.1386, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 1.8670487603664627, | |
| "grad_norm": 1.2364240884780884, | |
| "learning_rate": 3.7799971052250687e-06, | |
| "loss": 0.1617, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.8713907342277798, | |
| "grad_norm": 10.524025917053223, | |
| "learning_rate": 3.7655232305688237e-06, | |
| "loss": 0.1643, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 1.8757327080890973, | |
| "grad_norm": 8.031953811645508, | |
| "learning_rate": 3.7510493559125782e-06, | |
| "loss": 0.1748, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.8800746819504146, | |
| "grad_norm": 4.875488758087158, | |
| "learning_rate": 3.7365754812563327e-06, | |
| "loss": 0.1507, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 1.884416655811732, | |
| "grad_norm": 7.984673976898193, | |
| "learning_rate": 3.722101606600087e-06, | |
| "loss": 0.131, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 1.8887586296730494, | |
| "grad_norm": 0.16609551012516022, | |
| "learning_rate": 3.7076277319438414e-06, | |
| "loss": 0.13, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 1.8931006035343667, | |
| "grad_norm": 7.161849498748779, | |
| "learning_rate": 3.6931538572875963e-06, | |
| "loss": 0.1393, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.897442577395684, | |
| "grad_norm": 7.236243724822998, | |
| "learning_rate": 3.678679982631351e-06, | |
| "loss": 0.129, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 1.9017845512570015, | |
| "grad_norm": 10.374253273010254, | |
| "learning_rate": 3.664206107975105e-06, | |
| "loss": 0.1368, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.9061265251183188, | |
| "grad_norm": 2.0947864055633545, | |
| "learning_rate": 3.6497322333188595e-06, | |
| "loss": 0.1172, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 1.910468498979636, | |
| "grad_norm": 6.918741703033447, | |
| "learning_rate": 3.6352583586626145e-06, | |
| "loss": 0.1496, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.9148104728409536, | |
| "grad_norm": 7.337772846221924, | |
| "learning_rate": 3.620784484006369e-06, | |
| "loss": 0.1176, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 1.9191524467022707, | |
| "grad_norm": 0.9349635243415833, | |
| "learning_rate": 3.606310609350123e-06, | |
| "loss": 0.1231, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 1.9234944205635882, | |
| "grad_norm": 9.466097831726074, | |
| "learning_rate": 3.5918367346938777e-06, | |
| "loss": 0.1161, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 1.9278363944249055, | |
| "grad_norm": 1.3035846948623657, | |
| "learning_rate": 3.5773628600376326e-06, | |
| "loss": 0.1522, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.9321783682862228, | |
| "grad_norm": 8.847750663757324, | |
| "learning_rate": 3.562888985381387e-06, | |
| "loss": 0.1666, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 1.9365203421475403, | |
| "grad_norm": 9.673260688781738, | |
| "learning_rate": 3.5484151107251413e-06, | |
| "loss": 0.1778, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 1.9408623160088576, | |
| "grad_norm": 5.941328048706055, | |
| "learning_rate": 3.533941236068896e-06, | |
| "loss": 0.1482, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 1.945204289870175, | |
| "grad_norm": 4.996196269989014, | |
| "learning_rate": 3.5194673614126507e-06, | |
| "loss": 0.1375, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.9495462637314924, | |
| "grad_norm": 14.44809627532959, | |
| "learning_rate": 3.504993486756405e-06, | |
| "loss": 0.0934, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 1.9538882375928097, | |
| "grad_norm": 2.986774444580078, | |
| "learning_rate": 3.4905196121001594e-06, | |
| "loss": 0.0977, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.958230211454127, | |
| "grad_norm": 0.16753756999969482, | |
| "learning_rate": 3.476045737443914e-06, | |
| "loss": 0.1383, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 1.9625721853154445, | |
| "grad_norm": 9.141763687133789, | |
| "learning_rate": 3.461571862787669e-06, | |
| "loss": 0.1123, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.9669141591767616, | |
| "grad_norm": 11.578398704528809, | |
| "learning_rate": 3.447097988131423e-06, | |
| "loss": 0.1295, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 1.9712561330380791, | |
| "grad_norm": 6.170989036560059, | |
| "learning_rate": 3.4326241134751775e-06, | |
| "loss": 0.129, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 1.9755981068993964, | |
| "grad_norm": 8.633371353149414, | |
| "learning_rate": 3.418150238818932e-06, | |
| "loss": 0.1252, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 1.9799400807607137, | |
| "grad_norm": 0.2691422402858734, | |
| "learning_rate": 3.403676364162687e-06, | |
| "loss": 0.1309, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.9842820546220312, | |
| "grad_norm": 1.046929121017456, | |
| "learning_rate": 3.389202489506441e-06, | |
| "loss": 0.128, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 1.9886240284833485, | |
| "grad_norm": 9.4624662399292, | |
| "learning_rate": 3.3747286148501957e-06, | |
| "loss": 0.1663, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 1.9929660023446658, | |
| "grad_norm": 3.3540661334991455, | |
| "learning_rate": 3.36025474019395e-06, | |
| "loss": 0.094, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 1.9973079762059833, | |
| "grad_norm": 7.1674675941467285, | |
| "learning_rate": 3.345780865537705e-06, | |
| "loss": 0.1405, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.21683721244335175, | |
| "eval_runtime": 158.5988, | |
| "eval_samples_per_second": 64.54, | |
| "eval_steps_per_second": 16.135, | |
| "step": 23031 | |
| }, | |
| { | |
| "epoch": 2.0016499500673004, | |
| "grad_norm": 2.219898223876953, | |
| "learning_rate": 3.3313069908814593e-06, | |
| "loss": 0.1019, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 2.005991923928618, | |
| "grad_norm": 4.620298385620117, | |
| "learning_rate": 3.3168331162252138e-06, | |
| "loss": 0.0865, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 2.0103338977899354, | |
| "grad_norm": 5.638464450836182, | |
| "learning_rate": 3.3023592415689683e-06, | |
| "loss": 0.0756, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 2.0146758716512525, | |
| "grad_norm": 4.311452388763428, | |
| "learning_rate": 3.2878853669127224e-06, | |
| "loss": 0.0872, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.01901784551257, | |
| "grad_norm": 10.365550994873047, | |
| "learning_rate": 3.2734114922564774e-06, | |
| "loss": 0.0868, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 2.0233598193738875, | |
| "grad_norm": 11.352954864501953, | |
| "learning_rate": 3.258937617600232e-06, | |
| "loss": 0.0979, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 2.0277017932352046, | |
| "grad_norm": 1.122918963432312, | |
| "learning_rate": 3.2444637429439865e-06, | |
| "loss": 0.0736, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 2.032043767096522, | |
| "grad_norm": 5.547560214996338, | |
| "learning_rate": 3.2299898682877406e-06, | |
| "loss": 0.0936, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.0363857409578396, | |
| "grad_norm": 7.175875663757324, | |
| "learning_rate": 3.2155159936314955e-06, | |
| "loss": 0.0939, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 2.0407277148191567, | |
| "grad_norm": 7.929403305053711, | |
| "learning_rate": 3.20104211897525e-06, | |
| "loss": 0.1086, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.0450696886804742, | |
| "grad_norm": 2.572625160217285, | |
| "learning_rate": 3.186568244319004e-06, | |
| "loss": 0.1032, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 2.0494116625417913, | |
| "grad_norm": 9.160089492797852, | |
| "learning_rate": 3.1723838471558836e-06, | |
| "loss": 0.0887, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.053753636403109, | |
| "grad_norm": 1.3796570301055908, | |
| "learning_rate": 3.157909972499638e-06, | |
| "loss": 0.0798, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 2.0580956102644263, | |
| "grad_norm": 2.670893669128418, | |
| "learning_rate": 3.143436097843393e-06, | |
| "loss": 0.0898, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 2.0624375841257434, | |
| "grad_norm": 3.4575767517089844, | |
| "learning_rate": 3.1289622231871476e-06, | |
| "loss": 0.0821, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 2.066779557987061, | |
| "grad_norm": 6.139395236968994, | |
| "learning_rate": 3.1144883485309017e-06, | |
| "loss": 0.1038, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.0711215318483784, | |
| "grad_norm": 1.5725222826004028, | |
| "learning_rate": 3.1000144738746562e-06, | |
| "loss": 0.0722, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 2.0754635057096955, | |
| "grad_norm": 5.873169422149658, | |
| "learning_rate": 3.085540599218411e-06, | |
| "loss": 0.1012, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 2.079805479571013, | |
| "grad_norm": 5.5453877449035645, | |
| "learning_rate": 3.0710667245621657e-06, | |
| "loss": 0.0812, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 2.0841474534323305, | |
| "grad_norm": 8.914033889770508, | |
| "learning_rate": 3.05659284990592e-06, | |
| "loss": 0.0798, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.0884894272936476, | |
| "grad_norm": 1.6803654432296753, | |
| "learning_rate": 3.0421189752496744e-06, | |
| "loss": 0.0793, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 2.092831401154965, | |
| "grad_norm": 6.029511451721191, | |
| "learning_rate": 3.0276451005934293e-06, | |
| "loss": 0.098, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 2.097173375016282, | |
| "grad_norm": 1.0954087972640991, | |
| "learning_rate": 3.013171225937184e-06, | |
| "loss": 0.0664, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 2.1015153488775997, | |
| "grad_norm": 5.141661643981934, | |
| "learning_rate": 2.998697351280938e-06, | |
| "loss": 0.0796, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.1058573227389172, | |
| "grad_norm": 7.69569730758667, | |
| "learning_rate": 2.9842234766246925e-06, | |
| "loss": 0.0926, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 2.1101992966002343, | |
| "grad_norm": 5.892622947692871, | |
| "learning_rate": 2.9697496019684475e-06, | |
| "loss": 0.0775, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 2.114541270461552, | |
| "grad_norm": 8.389673233032227, | |
| "learning_rate": 2.955275727312202e-06, | |
| "loss": 0.0766, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 2.1188832443228693, | |
| "grad_norm": 3.9702394008636475, | |
| "learning_rate": 2.940801852655956e-06, | |
| "loss": 0.0917, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.1232252181841864, | |
| "grad_norm": 4.655553817749023, | |
| "learning_rate": 2.9263279779997106e-06, | |
| "loss": 0.096, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 2.127567192045504, | |
| "grad_norm": 8.192728996276855, | |
| "learning_rate": 2.9118541033434656e-06, | |
| "loss": 0.1074, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.1319091659068214, | |
| "grad_norm": 1.8472944498062134, | |
| "learning_rate": 2.89738022868722e-06, | |
| "loss": 0.073, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 2.1362511397681385, | |
| "grad_norm": 13.238656044006348, | |
| "learning_rate": 2.8829063540309742e-06, | |
| "loss": 0.0838, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.140593113629456, | |
| "grad_norm": 5.6969709396362305, | |
| "learning_rate": 2.8684324793747288e-06, | |
| "loss": 0.0664, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 2.144935087490773, | |
| "grad_norm": 8.73705768585205, | |
| "learning_rate": 2.8539586047184837e-06, | |
| "loss": 0.0668, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 2.1492770613520906, | |
| "grad_norm": 0.2656320631504059, | |
| "learning_rate": 2.839484730062238e-06, | |
| "loss": 0.085, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 2.153619035213408, | |
| "grad_norm": 7.568329334259033, | |
| "learning_rate": 2.8250108554059924e-06, | |
| "loss": 0.0902, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.157961009074725, | |
| "grad_norm": 7.81199836730957, | |
| "learning_rate": 2.810536980749747e-06, | |
| "loss": 0.0889, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 2.1623029829360427, | |
| "grad_norm": 6.70134162902832, | |
| "learning_rate": 2.796063106093502e-06, | |
| "loss": 0.0781, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 2.1666449567973602, | |
| "grad_norm": 8.440710067749023, | |
| "learning_rate": 2.781589231437256e-06, | |
| "loss": 0.0842, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 2.1709869306586773, | |
| "grad_norm": 4.488495826721191, | |
| "learning_rate": 2.7671153567810105e-06, | |
| "loss": 0.0983, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.175328904519995, | |
| "grad_norm": 12.057733535766602, | |
| "learning_rate": 2.752641482124765e-06, | |
| "loss": 0.0724, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 2.1796708783813123, | |
| "grad_norm": 12.238190650939941, | |
| "learning_rate": 2.738167607468519e-06, | |
| "loss": 0.0791, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 2.1840128522426294, | |
| "grad_norm": 8.079179763793945, | |
| "learning_rate": 2.723693732812274e-06, | |
| "loss": 0.0651, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 2.188354826103947, | |
| "grad_norm": 9.638249397277832, | |
| "learning_rate": 2.7092198581560286e-06, | |
| "loss": 0.1001, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 2.192696799965264, | |
| "grad_norm": 0.03366376459598541, | |
| "learning_rate": 2.694745983499783e-06, | |
| "loss": 0.0724, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 2.1970387738265815, | |
| "grad_norm": 0.43450728058815, | |
| "learning_rate": 2.6802721088435373e-06, | |
| "loss": 0.1011, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 2.201380747687899, | |
| "grad_norm": 3.758702516555786, | |
| "learning_rate": 2.6657982341872922e-06, | |
| "loss": 0.1216, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 2.205722721549216, | |
| "grad_norm": 2.247030735015869, | |
| "learning_rate": 2.6513243595310468e-06, | |
| "loss": 0.0606, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.2100646954105336, | |
| "grad_norm": 7.871387004852295, | |
| "learning_rate": 2.6368504848748013e-06, | |
| "loss": 0.0791, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 2.214406669271851, | |
| "grad_norm": 4.234960079193115, | |
| "learning_rate": 2.6223766102185554e-06, | |
| "loss": 0.0968, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.218748643133168, | |
| "grad_norm": 0.44377923011779785, | |
| "learning_rate": 2.6079027355623104e-06, | |
| "loss": 0.0897, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 2.2230906169944857, | |
| "grad_norm": 7.014172077178955, | |
| "learning_rate": 2.593428860906065e-06, | |
| "loss": 0.1147, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 2.2274325908558033, | |
| "grad_norm": 0.3093360960483551, | |
| "learning_rate": 2.578954986249819e-06, | |
| "loss": 0.0793, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 2.2317745647171203, | |
| "grad_norm": 4.603691577911377, | |
| "learning_rate": 2.5644811115935735e-06, | |
| "loss": 0.106, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 2.236116538578438, | |
| "grad_norm": 0.18263719975948334, | |
| "learning_rate": 2.5500072369373285e-06, | |
| "loss": 0.0685, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 2.240458512439755, | |
| "grad_norm": 6.405594348907471, | |
| "learning_rate": 2.535822839774208e-06, | |
| "loss": 0.0661, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 2.2448004863010724, | |
| "grad_norm": 6.221367359161377, | |
| "learning_rate": 2.5213489651179624e-06, | |
| "loss": 0.0773, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 2.24914246016239, | |
| "grad_norm": 7.3640289306640625, | |
| "learning_rate": 2.5068750904617165e-06, | |
| "loss": 0.1208, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 2.253484434023707, | |
| "grad_norm": 2.9754855632781982, | |
| "learning_rate": 2.4924012158054715e-06, | |
| "loss": 0.0656, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 2.2578264078850245, | |
| "grad_norm": 0.4063643515110016, | |
| "learning_rate": 2.4779273411492256e-06, | |
| "loss": 0.0794, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.262168381746342, | |
| "grad_norm": 11.072209358215332, | |
| "learning_rate": 2.4634534664929806e-06, | |
| "loss": 0.0872, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 2.266510355607659, | |
| "grad_norm": 5.498048782348633, | |
| "learning_rate": 2.4489795918367347e-06, | |
| "loss": 0.0767, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 2.2708523294689766, | |
| "grad_norm": 8.235716819763184, | |
| "learning_rate": 2.4345057171804896e-06, | |
| "loss": 0.1209, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 2.275194303330294, | |
| "grad_norm": 8.56951904296875, | |
| "learning_rate": 2.4200318425242437e-06, | |
| "loss": 0.0847, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 2.2795362771916112, | |
| "grad_norm": 7.200361728668213, | |
| "learning_rate": 2.4055579678679987e-06, | |
| "loss": 0.0757, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 2.2838782510529287, | |
| "grad_norm": 0.4996426999568939, | |
| "learning_rate": 2.391084093211753e-06, | |
| "loss": 0.076, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 2.2882202249142463, | |
| "grad_norm": 2.8761146068573, | |
| "learning_rate": 2.3766102185555078e-06, | |
| "loss": 0.0959, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 2.2925621987755633, | |
| "grad_norm": 5.218446731567383, | |
| "learning_rate": 2.362136343899262e-06, | |
| "loss": 0.0775, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 2.296904172636881, | |
| "grad_norm": 1.0682421922683716, | |
| "learning_rate": 2.347662469243017e-06, | |
| "loss": 0.0883, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 2.301246146498198, | |
| "grad_norm": 2.8506529331207275, | |
| "learning_rate": 2.333188594586771e-06, | |
| "loss": 0.1023, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.3055881203595154, | |
| "grad_norm": 8.659663200378418, | |
| "learning_rate": 2.318714719930526e-06, | |
| "loss": 0.0658, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 2.309930094220833, | |
| "grad_norm": 7.557945728302002, | |
| "learning_rate": 2.30424084527428e-06, | |
| "loss": 0.0954, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 2.31427206808215, | |
| "grad_norm": 0.2548428475856781, | |
| "learning_rate": 2.289766970618035e-06, | |
| "loss": 0.0826, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 2.3186140419434675, | |
| "grad_norm": 3.291508436203003, | |
| "learning_rate": 2.275293095961789e-06, | |
| "loss": 0.0879, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 2.3229560158047846, | |
| "grad_norm": 5.556234836578369, | |
| "learning_rate": 2.2608192213055436e-06, | |
| "loss": 0.0976, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 2.327297989666102, | |
| "grad_norm": 1.609104871749878, | |
| "learning_rate": 2.246345346649298e-06, | |
| "loss": 0.0786, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 2.3316399635274196, | |
| "grad_norm": 5.0335235595703125, | |
| "learning_rate": 2.2318714719930527e-06, | |
| "loss": 0.0728, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 2.3359819373887367, | |
| "grad_norm": 8.968177795410156, | |
| "learning_rate": 2.217397597336807e-06, | |
| "loss": 0.0847, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 2.3403239112500542, | |
| "grad_norm": 9.932825088500977, | |
| "learning_rate": 2.2029237226805617e-06, | |
| "loss": 0.1038, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 2.3446658851113718, | |
| "grad_norm": 2.8147237300872803, | |
| "learning_rate": 2.1884498480243163e-06, | |
| "loss": 0.0825, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.349007858972689, | |
| "grad_norm": 0.979704737663269, | |
| "learning_rate": 2.173975973368071e-06, | |
| "loss": 0.1116, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 2.3533498328340063, | |
| "grad_norm": 0.11242897063493729, | |
| "learning_rate": 2.1595020987118253e-06, | |
| "loss": 0.0789, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 2.357691806695324, | |
| "grad_norm": 7.687259674072266, | |
| "learning_rate": 2.14502822405558e-06, | |
| "loss": 0.0723, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 2.362033780556641, | |
| "grad_norm": 1.6177990436553955, | |
| "learning_rate": 2.1305543493993344e-06, | |
| "loss": 0.093, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 2.3663757544179584, | |
| "grad_norm": 8.21517562866211, | |
| "learning_rate": 2.116080474743089e-06, | |
| "loss": 0.0951, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 2.370717728279276, | |
| "grad_norm": 6.920109748840332, | |
| "learning_rate": 2.1016066000868435e-06, | |
| "loss": 0.0914, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 2.375059702140593, | |
| "grad_norm": 4.636850833892822, | |
| "learning_rate": 2.087132725430598e-06, | |
| "loss": 0.089, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 2.3794016760019105, | |
| "grad_norm": 4.11488676071167, | |
| "learning_rate": 2.0726588507743525e-06, | |
| "loss": 0.0841, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 2.383743649863228, | |
| "grad_norm": 4.323261260986328, | |
| "learning_rate": 2.058184976118107e-06, | |
| "loss": 0.0593, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 2.388085623724545, | |
| "grad_norm": 10.200052261352539, | |
| "learning_rate": 2.0437111014618616e-06, | |
| "loss": 0.0966, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.3924275975858627, | |
| "grad_norm": 12.892340660095215, | |
| "learning_rate": 2.029237226805616e-06, | |
| "loss": 0.0839, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 2.3967695714471797, | |
| "grad_norm": 0.33158212900161743, | |
| "learning_rate": 2.0147633521493707e-06, | |
| "loss": 0.107, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 2.4011115453084972, | |
| "grad_norm": 4.843432903289795, | |
| "learning_rate": 2.000289477493125e-06, | |
| "loss": 0.0761, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 2.4054535191698148, | |
| "grad_norm": 8.558151245117188, | |
| "learning_rate": 1.9858156028368797e-06, | |
| "loss": 0.0931, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 2.409795493031132, | |
| "grad_norm": 4.9575653076171875, | |
| "learning_rate": 1.9713417281806343e-06, | |
| "loss": 0.0916, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 2.4141374668924493, | |
| "grad_norm": 0.742382287979126, | |
| "learning_rate": 1.956867853524389e-06, | |
| "loss": 0.0661, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 2.4184794407537664, | |
| "grad_norm": 5.219630718231201, | |
| "learning_rate": 1.9423939788681433e-06, | |
| "loss": 0.085, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 2.422821414615084, | |
| "grad_norm": 5.431758403778076, | |
| "learning_rate": 1.927920104211898e-06, | |
| "loss": 0.0806, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 2.4271633884764015, | |
| "grad_norm": 2.560927391052246, | |
| "learning_rate": 1.913446229555652e-06, | |
| "loss": 0.0749, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 2.4315053623377185, | |
| "grad_norm": 13.62498950958252, | |
| "learning_rate": 1.898972354899407e-06, | |
| "loss": 0.0777, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.435847336199036, | |
| "grad_norm": 7.314693927764893, | |
| "learning_rate": 1.8844984802431613e-06, | |
| "loss": 0.098, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 2.4401893100603536, | |
| "grad_norm": 7.054198741912842, | |
| "learning_rate": 1.870024605586916e-06, | |
| "loss": 0.0782, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 2.4445312839216706, | |
| "grad_norm": 7.161545276641846, | |
| "learning_rate": 1.8555507309306703e-06, | |
| "loss": 0.0716, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 2.448873257782988, | |
| "grad_norm": 12.497211456298828, | |
| "learning_rate": 1.8410768562744249e-06, | |
| "loss": 0.0766, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 2.4532152316443057, | |
| "grad_norm": 7.773441314697266, | |
| "learning_rate": 1.8266029816181794e-06, | |
| "loss": 0.1298, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 2.4575572055056227, | |
| "grad_norm": 2.4828858375549316, | |
| "learning_rate": 1.8121291069619337e-06, | |
| "loss": 0.0958, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 2.4618991793669402, | |
| "grad_norm": 3.412752628326416, | |
| "learning_rate": 1.7976552323056885e-06, | |
| "loss": 0.0909, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 2.4662411532282578, | |
| "grad_norm": 9.695392608642578, | |
| "learning_rate": 1.7831813576494428e-06, | |
| "loss": 0.0878, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 2.470583127089575, | |
| "grad_norm": 8.524370193481445, | |
| "learning_rate": 1.7687074829931975e-06, | |
| "loss": 0.0735, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 2.4749251009508924, | |
| "grad_norm": 5.256426811218262, | |
| "learning_rate": 1.7542336083369518e-06, | |
| "loss": 0.0939, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.47926707481221, | |
| "grad_norm": 2.187129259109497, | |
| "learning_rate": 1.7397597336807066e-06, | |
| "loss": 0.0881, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 2.483609048673527, | |
| "grad_norm": 1.136833906173706, | |
| "learning_rate": 1.725285859024461e-06, | |
| "loss": 0.0802, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 2.4879510225348445, | |
| "grad_norm": 6.4525251388549805, | |
| "learning_rate": 1.7108119843682157e-06, | |
| "loss": 0.1154, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 2.4922929963961615, | |
| "grad_norm": 7.624198913574219, | |
| "learning_rate": 1.69633810971197e-06, | |
| "loss": 0.0824, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 2.496634970257479, | |
| "grad_norm": 6.494277000427246, | |
| "learning_rate": 1.6818642350557245e-06, | |
| "loss": 0.0781, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 2.5009769441187966, | |
| "grad_norm": 0.3975910544395447, | |
| "learning_rate": 1.667390360399479e-06, | |
| "loss": 0.0752, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 2.5053189179801136, | |
| "grad_norm": 7.682430744171143, | |
| "learning_rate": 1.6529164857432336e-06, | |
| "loss": 0.0882, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 2.509660891841431, | |
| "grad_norm": 2.4172372817993164, | |
| "learning_rate": 1.6384426110869881e-06, | |
| "loss": 0.0828, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 2.5140028657027482, | |
| "grad_norm": 1.9863077402114868, | |
| "learning_rate": 1.6239687364307426e-06, | |
| "loss": 0.0655, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 2.5183448395640657, | |
| "grad_norm": 4.845276355743408, | |
| "learning_rate": 1.6094948617744972e-06, | |
| "loss": 0.0756, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.5226868134253833, | |
| "grad_norm": 6.560926914215088, | |
| "learning_rate": 1.5950209871182517e-06, | |
| "loss": 0.0731, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 2.5270287872867003, | |
| "grad_norm": 7.662853717803955, | |
| "learning_rate": 1.5805471124620062e-06, | |
| "loss": 0.0728, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 2.531370761148018, | |
| "grad_norm": 0.009299159049987793, | |
| "learning_rate": 1.5660732378057608e-06, | |
| "loss": 0.0744, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 2.5357127350093354, | |
| "grad_norm": 7.238016605377197, | |
| "learning_rate": 1.5515993631495153e-06, | |
| "loss": 0.0932, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 2.5400547088706524, | |
| "grad_norm": 1.8960305452346802, | |
| "learning_rate": 1.5371254884932698e-06, | |
| "loss": 0.0688, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 2.54439668273197, | |
| "grad_norm": 5.69565486907959, | |
| "learning_rate": 1.5226516138370242e-06, | |
| "loss": 0.0766, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 2.5487386565932875, | |
| "grad_norm": 12.48017692565918, | |
| "learning_rate": 1.508177739180779e-06, | |
| "loss": 0.0881, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 2.5530806304546045, | |
| "grad_norm": 1.490441918373108, | |
| "learning_rate": 1.4937038645245332e-06, | |
| "loss": 0.0827, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 2.557422604315922, | |
| "grad_norm": 0.3555105924606323, | |
| "learning_rate": 1.479229989868288e-06, | |
| "loss": 0.0791, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 2.5617645781772396, | |
| "grad_norm": 6.290306091308594, | |
| "learning_rate": 1.4647561152120423e-06, | |
| "loss": 0.0771, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.5661065520385566, | |
| "grad_norm": 0.16260845959186554, | |
| "learning_rate": 1.450282240555797e-06, | |
| "loss": 0.0934, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 2.570448525899874, | |
| "grad_norm": 7.777038097381592, | |
| "learning_rate": 1.4360978433926764e-06, | |
| "loss": 0.0803, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 2.5747904997611917, | |
| "grad_norm": 0.2070547193288803, | |
| "learning_rate": 1.4216239687364308e-06, | |
| "loss": 0.0856, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 2.5791324736225087, | |
| "grad_norm": 0.10048296302556992, | |
| "learning_rate": 1.4071500940801855e-06, | |
| "loss": 0.0746, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 2.5834744474838263, | |
| "grad_norm": 6.591347694396973, | |
| "learning_rate": 1.3926762194239398e-06, | |
| "loss": 0.0788, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 2.5878164213451433, | |
| "grad_norm": 6.4904866218566895, | |
| "learning_rate": 1.3782023447676946e-06, | |
| "loss": 0.0897, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 2.592158395206461, | |
| "grad_norm": 3.354762315750122, | |
| "learning_rate": 1.363728470111449e-06, | |
| "loss": 0.0927, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 2.5965003690677784, | |
| "grad_norm": 2.270613670349121, | |
| "learning_rate": 1.3492545954552036e-06, | |
| "loss": 0.1087, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 2.6008423429290954, | |
| "grad_norm": 0.7417836785316467, | |
| "learning_rate": 1.334780720798958e-06, | |
| "loss": 0.0904, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 2.605184316790413, | |
| "grad_norm": 4.791884422302246, | |
| "learning_rate": 1.3203068461427127e-06, | |
| "loss": 0.0761, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.60952629065173, | |
| "grad_norm": 2.9381160736083984, | |
| "learning_rate": 1.305832971486467e-06, | |
| "loss": 0.0886, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 2.6138682645130475, | |
| "grad_norm": 4.2343926429748535, | |
| "learning_rate": 1.2913590968302213e-06, | |
| "loss": 0.0931, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 2.618210238374365, | |
| "grad_norm": 1.3249711990356445, | |
| "learning_rate": 1.276885222173976e-06, | |
| "loss": 0.0817, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 2.622552212235682, | |
| "grad_norm": 5.119120121002197, | |
| "learning_rate": 1.2624113475177304e-06, | |
| "loss": 0.0715, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 2.6268941860969997, | |
| "grad_norm": 2.266857862472534, | |
| "learning_rate": 1.2479374728614852e-06, | |
| "loss": 0.0702, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 2.631236159958317, | |
| "grad_norm": 7.146674633026123, | |
| "learning_rate": 1.2334635982052397e-06, | |
| "loss": 0.0732, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 2.6355781338196342, | |
| "grad_norm": 4.819944858551025, | |
| "learning_rate": 1.2189897235489942e-06, | |
| "loss": 0.0778, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 2.6399201076809518, | |
| "grad_norm": 2.735713243484497, | |
| "learning_rate": 1.2045158488927488e-06, | |
| "loss": 0.0532, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 2.6442620815422693, | |
| "grad_norm": 8.688481330871582, | |
| "learning_rate": 1.1900419742365033e-06, | |
| "loss": 0.1277, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 2.6486040554035863, | |
| "grad_norm": 7.23391056060791, | |
| "learning_rate": 1.1755680995802578e-06, | |
| "loss": 0.0984, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.652946029264904, | |
| "grad_norm": 6.083117961883545, | |
| "learning_rate": 1.1610942249240124e-06, | |
| "loss": 0.0938, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 2.6572880031262214, | |
| "grad_norm": 9.920032501220703, | |
| "learning_rate": 1.1466203502677669e-06, | |
| "loss": 0.0894, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 2.6616299769875384, | |
| "grad_norm": 6.3701605796813965, | |
| "learning_rate": 1.1321464756115214e-06, | |
| "loss": 0.0821, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 2.665971950848856, | |
| "grad_norm": 1.6920627355575562, | |
| "learning_rate": 1.117672600955276e-06, | |
| "loss": 0.0709, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 2.6703139247101735, | |
| "grad_norm": 1.3545681238174438, | |
| "learning_rate": 1.1031987262990305e-06, | |
| "loss": 0.0866, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 2.6746558985714906, | |
| "grad_norm": 13.171427726745605, | |
| "learning_rate": 1.0887248516427848e-06, | |
| "loss": 0.0669, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 2.678997872432808, | |
| "grad_norm": 1.8995404243469238, | |
| "learning_rate": 1.0742509769865393e-06, | |
| "loss": 0.0911, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 2.6833398462941256, | |
| "grad_norm": 3.4048333168029785, | |
| "learning_rate": 1.0597771023302939e-06, | |
| "loss": 0.0642, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 2.6876818201554427, | |
| "grad_norm": 9.538905143737793, | |
| "learning_rate": 1.0453032276740484e-06, | |
| "loss": 0.0715, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 2.69202379401676, | |
| "grad_norm": 7.958563327789307, | |
| "learning_rate": 1.030829353017803e-06, | |
| "loss": 0.0726, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.6963657678780772, | |
| "grad_norm": 7.041146755218506, | |
| "learning_rate": 1.0163554783615575e-06, | |
| "loss": 0.0641, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 2.7007077417393948, | |
| "grad_norm": 6.417462348937988, | |
| "learning_rate": 1.001881603705312e-06, | |
| "loss": 0.0744, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 2.705049715600712, | |
| "grad_norm": 13.097779273986816, | |
| "learning_rate": 9.874077290490665e-07, | |
| "loss": 0.12, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 2.7093916894620294, | |
| "grad_norm": 8.754915237426758, | |
| "learning_rate": 9.72933854392821e-07, | |
| "loss": 0.0745, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 2.713733663323347, | |
| "grad_norm": 1.3674185276031494, | |
| "learning_rate": 9.584599797365756e-07, | |
| "loss": 0.0902, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 2.718075637184664, | |
| "grad_norm": 6.0695719718933105, | |
| "learning_rate": 9.4398610508033e-07, | |
| "loss": 0.0886, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 2.7224176110459815, | |
| "grad_norm": 7.46438455581665, | |
| "learning_rate": 9.295122304240846e-07, | |
| "loss": 0.1084, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 2.726759584907299, | |
| "grad_norm": 0.5424315929412842, | |
| "learning_rate": 9.150383557678391e-07, | |
| "loss": 0.0919, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 2.731101558768616, | |
| "grad_norm": 1.2682020664215088, | |
| "learning_rate": 9.005644811115936e-07, | |
| "loss": 0.0729, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 2.7354435326299336, | |
| "grad_norm": 1.612199306488037, | |
| "learning_rate": 8.860906064553482e-07, | |
| "loss": 0.0776, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.739785506491251, | |
| "grad_norm": 6.857169151306152, | |
| "learning_rate": 8.716167317991027e-07, | |
| "loss": 0.0829, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 2.744127480352568, | |
| "grad_norm": 8.613885879516602, | |
| "learning_rate": 8.571428571428572e-07, | |
| "loss": 0.0839, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 2.7484694542138857, | |
| "grad_norm": 3.3205294609069824, | |
| "learning_rate": 8.426689824866118e-07, | |
| "loss": 0.0703, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 2.752811428075203, | |
| "grad_norm": 0.2709499001502991, | |
| "learning_rate": 8.281951078303663e-07, | |
| "loss": 0.091, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 2.7571534019365203, | |
| "grad_norm": 0.9320999979972839, | |
| "learning_rate": 8.137212331741208e-07, | |
| "loss": 0.0905, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 2.7614953757978378, | |
| "grad_norm": 7.626304626464844, | |
| "learning_rate": 7.992473585178754e-07, | |
| "loss": 0.072, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 2.7658373496591553, | |
| "grad_norm": 0.484938383102417, | |
| "learning_rate": 7.847734838616298e-07, | |
| "loss": 0.0893, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 2.7701793235204724, | |
| "grad_norm": 3.4637513160705566, | |
| "learning_rate": 7.705890866985093e-07, | |
| "loss": 0.106, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 2.77452129738179, | |
| "grad_norm": 19.361818313598633, | |
| "learning_rate": 7.561152120422638e-07, | |
| "loss": 0.0739, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 2.7788632712431074, | |
| "grad_norm": 3.0003960132598877, | |
| "learning_rate": 7.416413373860183e-07, | |
| "loss": 0.0855, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.7832052451044245, | |
| "grad_norm": 11.666556358337402, | |
| "learning_rate": 7.271674627297728e-07, | |
| "loss": 0.083, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 2.787547218965742, | |
| "grad_norm": 1.3639492988586426, | |
| "learning_rate": 7.126935880735273e-07, | |
| "loss": 0.0843, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 2.791889192827059, | |
| "grad_norm": 0.856368362903595, | |
| "learning_rate": 6.982197134172819e-07, | |
| "loss": 0.0675, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 2.7962311666883766, | |
| "grad_norm": 4.6131272315979, | |
| "learning_rate": 6.837458387610364e-07, | |
| "loss": 0.0889, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 2.8005731405496936, | |
| "grad_norm": 5.821038722991943, | |
| "learning_rate": 6.692719641047909e-07, | |
| "loss": 0.0883, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 2.804915114411011, | |
| "grad_norm": 8.942717552185059, | |
| "learning_rate": 6.547980894485455e-07, | |
| "loss": 0.0767, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 2.8092570882723287, | |
| "grad_norm": 1.455705165863037, | |
| "learning_rate": 6.403242147923e-07, | |
| "loss": 0.0836, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 2.8135990621336457, | |
| "grad_norm": 0.6939010620117188, | |
| "learning_rate": 6.258503401360545e-07, | |
| "loss": 0.0696, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 2.8179410359949633, | |
| "grad_norm": 1.1920336484909058, | |
| "learning_rate": 6.11376465479809e-07, | |
| "loss": 0.0635, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 2.8222830098562808, | |
| "grad_norm": 7.259981155395508, | |
| "learning_rate": 5.969025908235635e-07, | |
| "loss": 0.0931, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.826624983717598, | |
| "grad_norm": 6.035602569580078, | |
| "learning_rate": 5.82428716167318e-07, | |
| "loss": 0.0826, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 2.8309669575789154, | |
| "grad_norm": 10.794766426086426, | |
| "learning_rate": 5.679548415110726e-07, | |
| "loss": 0.0552, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 2.835308931440233, | |
| "grad_norm": 4.196424961090088, | |
| "learning_rate": 5.534809668548271e-07, | |
| "loss": 0.055, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 2.83965090530155, | |
| "grad_norm": 7.877914905548096, | |
| "learning_rate": 5.390070921985816e-07, | |
| "loss": 0.0699, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 2.8439928791628675, | |
| "grad_norm": 2.6493046283721924, | |
| "learning_rate": 5.245332175423362e-07, | |
| "loss": 0.0687, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 2.848334853024185, | |
| "grad_norm": 5.444716453552246, | |
| "learning_rate": 5.100593428860907e-07, | |
| "loss": 0.0589, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 2.852676826885502, | |
| "grad_norm": 0.2738840878009796, | |
| "learning_rate": 4.955854682298452e-07, | |
| "loss": 0.0624, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 2.8570188007468196, | |
| "grad_norm": 7.651127338409424, | |
| "learning_rate": 4.811115935735998e-07, | |
| "loss": 0.0856, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 2.861360774608137, | |
| "grad_norm": 0.7363032698631287, | |
| "learning_rate": 4.666377189173542e-07, | |
| "loss": 0.0584, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 2.865702748469454, | |
| "grad_norm": 5.483317852020264, | |
| "learning_rate": 4.521638442611087e-07, | |
| "loss": 0.0671, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.8700447223307717, | |
| "grad_norm": 6.474352836608887, | |
| "learning_rate": 4.3768996960486325e-07, | |
| "loss": 0.0892, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 2.874386696192089, | |
| "grad_norm": 0.2653914988040924, | |
| "learning_rate": 4.232160949486178e-07, | |
| "loss": 0.0761, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 2.8787286700534063, | |
| "grad_norm": 0.05122917890548706, | |
| "learning_rate": 4.0874222029237227e-07, | |
| "loss": 0.0564, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 2.883070643914724, | |
| "grad_norm": 9.418333053588867, | |
| "learning_rate": 3.942683456361268e-07, | |
| "loss": 0.0655, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 2.887412617776041, | |
| "grad_norm": 7.8067307472229, | |
| "learning_rate": 3.7979447097988133e-07, | |
| "loss": 0.0626, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 2.8917545916373584, | |
| "grad_norm": 3.910670757293701, | |
| "learning_rate": 3.6532059632363587e-07, | |
| "loss": 0.0855, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 2.8960965654986754, | |
| "grad_norm": 7.972170352935791, | |
| "learning_rate": 3.508467216673904e-07, | |
| "loss": 0.0488, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 2.900438539359993, | |
| "grad_norm": 10.280926704406738, | |
| "learning_rate": 3.3637284701114493e-07, | |
| "loss": 0.0631, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 2.9047805132213105, | |
| "grad_norm": 4.887994766235352, | |
| "learning_rate": 3.2189897235489947e-07, | |
| "loss": 0.0731, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 2.9091224870826276, | |
| "grad_norm": 14.115626335144043, | |
| "learning_rate": 3.0742509769865395e-07, | |
| "loss": 0.0877, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.913464460943945, | |
| "grad_norm": 10.208263397216797, | |
| "learning_rate": 2.929512230424085e-07, | |
| "loss": 0.0975, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 2.9178064348052626, | |
| "grad_norm": 1.1648341417312622, | |
| "learning_rate": 2.78477348386163e-07, | |
| "loss": 0.0761, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 2.9221484086665797, | |
| "grad_norm": 6.795315265655518, | |
| "learning_rate": 2.640034737299175e-07, | |
| "loss": 0.0777, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 2.926490382527897, | |
| "grad_norm": 7.7475762367248535, | |
| "learning_rate": 2.4952959907367203e-07, | |
| "loss": 0.0667, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 2.9308323563892147, | |
| "grad_norm": 0.7257171273231506, | |
| "learning_rate": 2.3505572441742656e-07, | |
| "loss": 0.0832, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 2.9351743302505318, | |
| "grad_norm": 7.833991527557373, | |
| "learning_rate": 2.205818497611811e-07, | |
| "loss": 0.0845, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.9395163041118493, | |
| "grad_norm": 5.486457347869873, | |
| "learning_rate": 2.0610797510493563e-07, | |
| "loss": 0.0899, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 2.943858277973167, | |
| "grad_norm": 7.7840375900268555, | |
| "learning_rate": 1.916341004486901e-07, | |
| "loss": 0.0751, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 2.948200251834484, | |
| "grad_norm": 6.603894233703613, | |
| "learning_rate": 1.7716022579244464e-07, | |
| "loss": 0.0644, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 2.9525422256958014, | |
| "grad_norm": 4.2936482429504395, | |
| "learning_rate": 1.6268635113619917e-07, | |
| "loss": 0.0853, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.956884199557119, | |
| "grad_norm": 3.4259464740753174, | |
| "learning_rate": 1.482124764799537e-07, | |
| "loss": 0.059, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 2.961226173418436, | |
| "grad_norm": 5.449902057647705, | |
| "learning_rate": 1.3402807931683313e-07, | |
| "loss": 0.077, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 2.9655681472797535, | |
| "grad_norm": 0.609876275062561, | |
| "learning_rate": 1.1955420466058764e-07, | |
| "loss": 0.0661, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 2.969910121141071, | |
| "grad_norm": 4.248520851135254, | |
| "learning_rate": 1.0508033000434217e-07, | |
| "loss": 0.0977, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 2.974252095002388, | |
| "grad_norm": 2.3178467750549316, | |
| "learning_rate": 9.060645534809668e-08, | |
| "loss": 0.0972, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 2.9785940688637056, | |
| "grad_norm": 4.120424747467041, | |
| "learning_rate": 7.613258069185121e-08, | |
| "loss": 0.0727, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 2.9829360427250227, | |
| "grad_norm": 8.699881553649902, | |
| "learning_rate": 6.165870603560573e-08, | |
| "loss": 0.0642, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 2.98727801658634, | |
| "grad_norm": 8.902776718139648, | |
| "learning_rate": 4.718483137936026e-08, | |
| "loss": 0.0706, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 2.9916199904476573, | |
| "grad_norm": 4.606892108917236, | |
| "learning_rate": 3.271095672311478e-08, | |
| "loss": 0.0996, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 2.9959619643089748, | |
| "grad_norm": 8.253307342529297, | |
| "learning_rate": 1.82370820668693e-08, | |
| "loss": 0.0519, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.9998697407841606, | |
| "eval_loss": 0.21618051826953888, | |
| "eval_runtime": 157.9614, | |
| "eval_samples_per_second": 64.801, | |
| "eval_steps_per_second": 16.2, | |
| "step": 34545 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 34545, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.97534612324352e+19, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |