|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.913669064748202, |
|
"eval_steps": 500, |
|
"global_step": 828, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014388489208633094, |
|
"grad_norm": 3.779401339094513, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 3.8262, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02877697841726619, |
|
"grad_norm": 10.455825805969267, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 6.7511, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04316546762589928, |
|
"grad_norm": 5.6185654464107815, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 4.6228, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05755395683453238, |
|
"grad_norm": 4.885801768876069, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 4.2241, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07194244604316546, |
|
"grad_norm": 10.403297492152396, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 6.7416, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08633093525179857, |
|
"grad_norm": 4.5044178093473395, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 4.1637, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.10071942446043165, |
|
"grad_norm": 4.734693138114069, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 4.3628, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.11510791366906475, |
|
"grad_norm": 4.288727085080122, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 4.0006, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.12949640287769784, |
|
"grad_norm": 6.746488042337733, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 5.1247, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.14388489208633093, |
|
"grad_norm": 4.730837471611876, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 4.167, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15827338129496402, |
|
"grad_norm": 5.617511430019142, |
|
"learning_rate": 8.8e-06, |
|
"loss": 4.8073, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.17266187050359713, |
|
"grad_norm": 4.585866335374106, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 4.1609, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.18705035971223022, |
|
"grad_norm": 11.787607704302536, |
|
"learning_rate": 1.04e-05, |
|
"loss": 7.1837, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2014388489208633, |
|
"grad_norm": 4.123753478777725, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 3.9041, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2158273381294964, |
|
"grad_norm": 3.8762103667223227, |
|
"learning_rate": 1.2e-05, |
|
"loss": 3.7087, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2302158273381295, |
|
"grad_norm": 4.21611793164487, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 3.8878, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2446043165467626, |
|
"grad_norm": 4.614901815617855, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 4.098, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2589928057553957, |
|
"grad_norm": 3.8977663337164286, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 3.6943, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2733812949640288, |
|
"grad_norm": 4.8023114000216465, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 4.0675, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.28776978417266186, |
|
"grad_norm": 4.371540379053842, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 3.8599, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.302158273381295, |
|
"grad_norm": 5.726889530637721, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 4.5061, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.31654676258992803, |
|
"grad_norm": 3.9969844690452887, |
|
"learning_rate": 1.76e-05, |
|
"loss": 3.6472, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.33093525179856115, |
|
"grad_norm": 11.516154261012499, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 6.2924, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.34532374100719426, |
|
"grad_norm": 5.202990002676231, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 4.0705, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3597122302158273, |
|
"grad_norm": 4.176782643264017, |
|
"learning_rate": 2e-05, |
|
"loss": 3.5909, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.37410071942446044, |
|
"grad_norm": 6.424896250724197, |
|
"learning_rate": 1.9999923468873635e-05, |
|
"loss": 4.2519, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.38848920863309355, |
|
"grad_norm": 3.814366825221532, |
|
"learning_rate": 1.999969387666594e-05, |
|
"loss": 3.2879, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4028776978417266, |
|
"grad_norm": 10.112357856323287, |
|
"learning_rate": 1.9999311226891104e-05, |
|
"loss": 5.1954, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4172661870503597, |
|
"grad_norm": 5.7174917457637795, |
|
"learning_rate": 1.999877552540605e-05, |
|
"loss": 3.7522, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4316546762589928, |
|
"grad_norm": 10.763244722760309, |
|
"learning_rate": 1.9998086780410353e-05, |
|
"loss": 4.8527, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4460431654676259, |
|
"grad_norm": 3.982452930852662, |
|
"learning_rate": 1.999724500244609e-05, |
|
"loss": 3.1306, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.460431654676259, |
|
"grad_norm": 5.709026877118652, |
|
"learning_rate": 1.999625020439771e-05, |
|
"loss": 3.3423, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4748201438848921, |
|
"grad_norm": 6.659790318204302, |
|
"learning_rate": 1.999510240149181e-05, |
|
"loss": 3.4041, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.4892086330935252, |
|
"grad_norm": 4.7853395869132545, |
|
"learning_rate": 1.9993801611296923e-05, |
|
"loss": 2.985, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5035971223021583, |
|
"grad_norm": 4.922928181710041, |
|
"learning_rate": 1.999234785372324e-05, |
|
"loss": 2.8203, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5179856115107914, |
|
"grad_norm": 5.760568951577959, |
|
"learning_rate": 1.9990741151022302e-05, |
|
"loss": 2.6983, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5323741007194245, |
|
"grad_norm": 4.4691409045575075, |
|
"learning_rate": 1.9988981527786656e-05, |
|
"loss": 2.5688, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5467625899280576, |
|
"grad_norm": 7.274174017956967, |
|
"learning_rate": 1.99870690109495e-05, |
|
"loss": 2.6621, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5611510791366906, |
|
"grad_norm": 4.569137013282271, |
|
"learning_rate": 1.9985003629784237e-05, |
|
"loss": 2.5249, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5755395683453237, |
|
"grad_norm": 2.929731882279866, |
|
"learning_rate": 1.9982785415904063e-05, |
|
"loss": 2.4861, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5899280575539568, |
|
"grad_norm": 3.7590625764657815, |
|
"learning_rate": 1.998041440326146e-05, |
|
"loss": 2.3703, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.60431654676259, |
|
"grad_norm": 3.113972586690868, |
|
"learning_rate": 1.9977890628147684e-05, |
|
"loss": 2.3579, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6187050359712231, |
|
"grad_norm": 4.310115895361215, |
|
"learning_rate": 1.99752141291922e-05, |
|
"loss": 2.4436, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6330935251798561, |
|
"grad_norm": 3.609141873223579, |
|
"learning_rate": 1.99723849473621e-05, |
|
"loss": 2.3687, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6474820143884892, |
|
"grad_norm": 2.889773990288414, |
|
"learning_rate": 1.996940312596149e-05, |
|
"loss": 2.2521, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6618705035971223, |
|
"grad_norm": 2.8196363081660745, |
|
"learning_rate": 1.9966268710630795e-05, |
|
"loss": 2.1572, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.6762589928057554, |
|
"grad_norm": 3.5289128758260255, |
|
"learning_rate": 1.996298174934608e-05, |
|
"loss": 2.2095, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6906474820143885, |
|
"grad_norm": 4.166486783655687, |
|
"learning_rate": 1.9959542292418317e-05, |
|
"loss": 2.0916, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7050359712230215, |
|
"grad_norm": 3.6766424577128047, |
|
"learning_rate": 1.9955950392492604e-05, |
|
"loss": 2.0578, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7194244604316546, |
|
"grad_norm": 2.989826753407446, |
|
"learning_rate": 1.9952206104547378e-05, |
|
"loss": 2.0855, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7338129496402878, |
|
"grad_norm": 6.40215562149978, |
|
"learning_rate": 1.994830948589355e-05, |
|
"loss": 1.8788, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.7482014388489209, |
|
"grad_norm": 4.072155747218333, |
|
"learning_rate": 1.9944260596173642e-05, |
|
"loss": 1.9819, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.762589928057554, |
|
"grad_norm": 3.724293840264578, |
|
"learning_rate": 1.9940059497360874e-05, |
|
"loss": 1.8445, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.7769784172661871, |
|
"grad_norm": 3.406626514519604, |
|
"learning_rate": 1.9935706253758206e-05, |
|
"loss": 1.9222, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.7913669064748201, |
|
"grad_norm": 3.3579698036375034, |
|
"learning_rate": 1.9931200931997372e-05, |
|
"loss": 1.716, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8057553956834532, |
|
"grad_norm": 3.430514351410994, |
|
"learning_rate": 1.9926543601037843e-05, |
|
"loss": 1.795, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.8201438848920863, |
|
"grad_norm": 3.0933971993062017, |
|
"learning_rate": 1.992173433216577e-05, |
|
"loss": 1.6326, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.8345323741007195, |
|
"grad_norm": 2.658306477244865, |
|
"learning_rate": 1.99167731989929e-05, |
|
"loss": 1.8422, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.8489208633093526, |
|
"grad_norm": 2.609103199735189, |
|
"learning_rate": 1.9911660277455473e-05, |
|
"loss": 1.7832, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.8633093525179856, |
|
"grad_norm": 3.0489907374135896, |
|
"learning_rate": 1.9906395645813e-05, |
|
"loss": 1.5908, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8776978417266187, |
|
"grad_norm": 2.9154327708322803, |
|
"learning_rate": 1.990097938464713e-05, |
|
"loss": 1.5599, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.8920863309352518, |
|
"grad_norm": 2.6855178783593328, |
|
"learning_rate": 1.989541157686037e-05, |
|
"loss": 1.6876, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.9064748201438849, |
|
"grad_norm": 2.410132159045943, |
|
"learning_rate": 1.9889692307674847e-05, |
|
"loss": 1.5693, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.920863309352518, |
|
"grad_norm": 2.6119620830871244, |
|
"learning_rate": 1.9883821664630977e-05, |
|
"loss": 1.4076, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.935251798561151, |
|
"grad_norm": 2.5077319176478996, |
|
"learning_rate": 1.987779973758615e-05, |
|
"loss": 1.5181, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9496402877697842, |
|
"grad_norm": 2.8061485591127266, |
|
"learning_rate": 1.987162661871333e-05, |
|
"loss": 1.3751, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.9640287769784173, |
|
"grad_norm": 2.685195543198549, |
|
"learning_rate": 1.986530240249968e-05, |
|
"loss": 1.3248, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.9784172661870504, |
|
"grad_norm": 2.6515044506712546, |
|
"learning_rate": 1.985882718574506e-05, |
|
"loss": 1.2112, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.9928057553956835, |
|
"grad_norm": 2.1678461224085193, |
|
"learning_rate": 1.9852201067560607e-05, |
|
"loss": 1.3792, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.0071942446043165, |
|
"grad_norm": 2.231749821096067, |
|
"learning_rate": 1.984542414936718e-05, |
|
"loss": 1.2438, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0215827338129497, |
|
"grad_norm": 2.5781231491148353, |
|
"learning_rate": 1.9838496534893807e-05, |
|
"loss": 1.5007, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.0359712230215827, |
|
"grad_norm": 2.5108817806595574, |
|
"learning_rate": 1.9831418330176127e-05, |
|
"loss": 1.4368, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.0503597122302157, |
|
"grad_norm": 1.9489467948836736, |
|
"learning_rate": 1.9824189643554724e-05, |
|
"loss": 1.2176, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.064748201438849, |
|
"grad_norm": 2.431713013736794, |
|
"learning_rate": 1.9816810585673515e-05, |
|
"loss": 1.2662, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.079136690647482, |
|
"grad_norm": 2.615444743987483, |
|
"learning_rate": 1.9809281269478015e-05, |
|
"loss": 0.6466, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.0935251798561152, |
|
"grad_norm": 1.9780475992590945, |
|
"learning_rate": 1.9801601810213634e-05, |
|
"loss": 1.1773, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.1079136690647482, |
|
"grad_norm": 2.738489524134038, |
|
"learning_rate": 1.979377232542391e-05, |
|
"loss": 0.7522, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.1223021582733812, |
|
"grad_norm": 2.92031012217587, |
|
"learning_rate": 1.9785792934948697e-05, |
|
"loss": 1.2811, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.1366906474820144, |
|
"grad_norm": 2.1812951842981407, |
|
"learning_rate": 1.9777663760922342e-05, |
|
"loss": 1.2223, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.1510791366906474, |
|
"grad_norm": 1.9774480380800536, |
|
"learning_rate": 1.976938492777182e-05, |
|
"loss": 1.2216, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1654676258992807, |
|
"grad_norm": 2.0162999343359904, |
|
"learning_rate": 1.9760956562214808e-05, |
|
"loss": 1.1783, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.1798561151079137, |
|
"grad_norm": 1.8114249309162656, |
|
"learning_rate": 1.9752378793257777e-05, |
|
"loss": 0.9817, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.1942446043165469, |
|
"grad_norm": 2.7290657585488143, |
|
"learning_rate": 1.9743651752193983e-05, |
|
"loss": 0.9542, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.20863309352518, |
|
"grad_norm": 2.017011266770035, |
|
"learning_rate": 1.9734775572601487e-05, |
|
"loss": 1.1217, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.223021582733813, |
|
"grad_norm": 1.748500187733016, |
|
"learning_rate": 1.9725750390341093e-05, |
|
"loss": 0.7081, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.2374100719424461, |
|
"grad_norm": 5.7387726390350515, |
|
"learning_rate": 1.9716576343554274e-05, |
|
"loss": 0.7381, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.2517985611510791, |
|
"grad_norm": 2.160256977147074, |
|
"learning_rate": 1.9707253572661057e-05, |
|
"loss": 1.0861, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.2661870503597124, |
|
"grad_norm": 2.4513022300810223, |
|
"learning_rate": 1.969778222035787e-05, |
|
"loss": 1.0924, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.2805755395683454, |
|
"grad_norm": 2.2964346352019116, |
|
"learning_rate": 1.9688162431615367e-05, |
|
"loss": 0.7906, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.2949640287769784, |
|
"grad_norm": 2.760514099820931, |
|
"learning_rate": 1.9678394353676203e-05, |
|
"loss": 1.0421, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3093525179856116, |
|
"grad_norm": 2.977379886340304, |
|
"learning_rate": 1.9668478136052776e-05, |
|
"loss": 1.0089, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.3237410071942446, |
|
"grad_norm": 3.228823722579014, |
|
"learning_rate": 1.9658413930524955e-05, |
|
"loss": 0.882, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.3381294964028778, |
|
"grad_norm": 1.8879288085516621, |
|
"learning_rate": 1.9648201891137725e-05, |
|
"loss": 0.8884, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.3525179856115108, |
|
"grad_norm": 5.701861760835263, |
|
"learning_rate": 1.963784217419887e-05, |
|
"loss": 0.5543, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.3669064748201438, |
|
"grad_norm": 1.9714095423076823, |
|
"learning_rate": 1.9627334938276547e-05, |
|
"loss": 0.9301, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.381294964028777, |
|
"grad_norm": 2.440289446102772, |
|
"learning_rate": 1.961668034419688e-05, |
|
"loss": 0.5015, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.39568345323741, |
|
"grad_norm": 2.3315013817563237, |
|
"learning_rate": 1.9605878555041484e-05, |
|
"loss": 0.9329, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.4100719424460433, |
|
"grad_norm": 2.543223303151188, |
|
"learning_rate": 1.9594929736144978e-05, |
|
"loss": 0.981, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.4244604316546763, |
|
"grad_norm": 2.2968302939118486, |
|
"learning_rate": 1.9583834055092446e-05, |
|
"loss": 0.8583, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.4388489208633093, |
|
"grad_norm": 2.3678435949631287, |
|
"learning_rate": 1.9572591681716888e-05, |
|
"loss": 0.9773, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4532374100719425, |
|
"grad_norm": 5.699515821984953, |
|
"learning_rate": 1.95612027880966e-05, |
|
"loss": 0.5195, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.4676258992805755, |
|
"grad_norm": 2.4763879942295812, |
|
"learning_rate": 1.9549667548552557e-05, |
|
"loss": 0.7692, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.4820143884892087, |
|
"grad_norm": 1.726706976599975, |
|
"learning_rate": 1.9537986139645724e-05, |
|
"loss": 0.7894, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.4964028776978417, |
|
"grad_norm": 2.880795266012809, |
|
"learning_rate": 1.9526158740174392e-05, |
|
"loss": 0.8268, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.5107913669064748, |
|
"grad_norm": 2.2837687142476737, |
|
"learning_rate": 1.951418553117139e-05, |
|
"loss": 0.7285, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.5251798561151078, |
|
"grad_norm": 2.401929061303925, |
|
"learning_rate": 1.950206669590136e-05, |
|
"loss": 0.7437, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.539568345323741, |
|
"grad_norm": 2.109686331424331, |
|
"learning_rate": 1.9489802419857918e-05, |
|
"loss": 0.7687, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.5539568345323742, |
|
"grad_norm": 2.52807406566671, |
|
"learning_rate": 1.947739289076084e-05, |
|
"loss": 0.7827, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.5683453237410072, |
|
"grad_norm": 2.399711195058357, |
|
"learning_rate": 1.9464838298553172e-05, |
|
"loss": 0.4237, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.5827338129496402, |
|
"grad_norm": 2.06327086788237, |
|
"learning_rate": 1.9452138835398333e-05, |
|
"loss": 0.6328, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.5971223021582732, |
|
"grad_norm": 1.8934017549759041, |
|
"learning_rate": 1.9439294695677168e-05, |
|
"loss": 0.7544, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.6115107913669064, |
|
"grad_norm": 1.7326175063319036, |
|
"learning_rate": 1.9426306075984968e-05, |
|
"loss": 0.5431, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.6258992805755397, |
|
"grad_norm": 2.1015742280893184, |
|
"learning_rate": 1.9413173175128472e-05, |
|
"loss": 0.6663, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.6402877697841727, |
|
"grad_norm": 2.231853898087344, |
|
"learning_rate": 1.9399896194122824e-05, |
|
"loss": 0.6107, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.6546762589928057, |
|
"grad_norm": 2.090875801066189, |
|
"learning_rate": 1.9386475336188484e-05, |
|
"loss": 0.6786, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.6690647482014387, |
|
"grad_norm": 2.3679863454534678, |
|
"learning_rate": 1.9372910806748124e-05, |
|
"loss": 0.5826, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.683453237410072, |
|
"grad_norm": 2.0143151155086585, |
|
"learning_rate": 1.935920281342349e-05, |
|
"loss": 0.5107, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.6978417266187051, |
|
"grad_norm": 2.0579871874173326, |
|
"learning_rate": 1.934535156603222e-05, |
|
"loss": 0.5457, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.7122302158273381, |
|
"grad_norm": 2.0248928133131505, |
|
"learning_rate": 1.933135727658462e-05, |
|
"loss": 0.5204, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.7266187050359711, |
|
"grad_norm": 2.143170055281987, |
|
"learning_rate": 1.931722015928044e-05, |
|
"loss": 0.4414, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.7410071942446042, |
|
"grad_norm": 3.8092005485468086, |
|
"learning_rate": 1.930294043050558e-05, |
|
"loss": 0.3688, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.7553956834532374, |
|
"grad_norm": 1.989587109704384, |
|
"learning_rate": 1.928851830882879e-05, |
|
"loss": 0.4322, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.7697841726618706, |
|
"grad_norm": 2.12538729505454, |
|
"learning_rate": 1.9273954014998307e-05, |
|
"loss": 0.3567, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.7841726618705036, |
|
"grad_norm": 2.0988722867429135, |
|
"learning_rate": 1.92592477719385e-05, |
|
"loss": 0.4249, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.7985611510791366, |
|
"grad_norm": 2.0670095637448815, |
|
"learning_rate": 1.9244399804746436e-05, |
|
"loss": 0.4687, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.8129496402877698, |
|
"grad_norm": 2.9462308640831436, |
|
"learning_rate": 1.9229410340688442e-05, |
|
"loss": 0.4576, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.8273381294964028, |
|
"grad_norm": 2.2511867293077144, |
|
"learning_rate": 1.9214279609196632e-05, |
|
"loss": 0.361, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.841726618705036, |
|
"grad_norm": 2.9578408313586224, |
|
"learning_rate": 1.9199007841865395e-05, |
|
"loss": 0.3939, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.856115107913669, |
|
"grad_norm": 1.67569117249568, |
|
"learning_rate": 1.9183595272447843e-05, |
|
"loss": 0.3387, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.870503597122302, |
|
"grad_norm": 1.922970389381037, |
|
"learning_rate": 1.9168042136852228e-05, |
|
"loss": 0.3162, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.8848920863309353, |
|
"grad_norm": 8.10597561850986, |
|
"learning_rate": 1.9152348673138355e-05, |
|
"loss": 0.2718, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.8992805755395683, |
|
"grad_norm": 4.044552909471015, |
|
"learning_rate": 1.913651512151391e-05, |
|
"loss": 0.2843, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.9136690647482015, |
|
"grad_norm": 1.8361101289997892, |
|
"learning_rate": 1.9120541724330802e-05, |
|
"loss": 0.2922, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.9280575539568345, |
|
"grad_norm": 1.8567473300234578, |
|
"learning_rate": 1.910442872608145e-05, |
|
"loss": 0.2136, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.9424460431654675, |
|
"grad_norm": 2.263518478480423, |
|
"learning_rate": 1.908817637339503e-05, |
|
"loss": 0.2331, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.9568345323741008, |
|
"grad_norm": 2.655129174999631, |
|
"learning_rate": 1.9071784915033717e-05, |
|
"loss": 0.2805, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.9712230215827338, |
|
"grad_norm": 2.349586516019947, |
|
"learning_rate": 1.9055254601888867e-05, |
|
"loss": 0.3259, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.985611510791367, |
|
"grad_norm": 2.03687355334055, |
|
"learning_rate": 1.9038585686977168e-05, |
|
"loss": 0.2869, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.9583687224064192, |
|
"learning_rate": 1.9021778425436797e-05, |
|
"loss": 0.3408, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.014388489208633, |
|
"grad_norm": 1.91826726074781, |
|
"learning_rate": 1.9004833074523478e-05, |
|
"loss": 0.2307, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.028776978417266, |
|
"grad_norm": 1.1616239437089266, |
|
"learning_rate": 1.8987749893606575e-05, |
|
"loss": 0.158, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.0431654676258995, |
|
"grad_norm": 2.661585557841078, |
|
"learning_rate": 1.8970529144165103e-05, |
|
"loss": 0.182, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.0575539568345325, |
|
"grad_norm": 1.4648504478084143, |
|
"learning_rate": 1.8953171089783725e-05, |
|
"loss": 0.1868, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.0719424460431655, |
|
"grad_norm": 1.6563021788099919, |
|
"learning_rate": 1.8935675996148738e-05, |
|
"loss": 0.2079, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.0863309352517985, |
|
"grad_norm": 1.765245657702871, |
|
"learning_rate": 1.8918044131043987e-05, |
|
"loss": 0.2056, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.1007194244604315, |
|
"grad_norm": 1.5438608872021158, |
|
"learning_rate": 1.890027576434677e-05, |
|
"loss": 0.2635, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.115107913669065, |
|
"grad_norm": 2.2956532783932535, |
|
"learning_rate": 1.8882371168023708e-05, |
|
"loss": 0.2029, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.129496402877698, |
|
"grad_norm": 1.4619363151073599, |
|
"learning_rate": 1.8864330616126586e-05, |
|
"loss": 0.155, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.143884892086331, |
|
"grad_norm": 2.1621204649874124, |
|
"learning_rate": 1.8846154384788162e-05, |
|
"loss": 0.1719, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.158273381294964, |
|
"grad_norm": 2.7166855043485967, |
|
"learning_rate": 1.8827842752217917e-05, |
|
"loss": 0.1819, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.172661870503597, |
|
"grad_norm": 1.8834021077536263, |
|
"learning_rate": 1.8809395998697835e-05, |
|
"loss": 0.1828, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.1870503597122304, |
|
"grad_norm": 1.5733332816754892, |
|
"learning_rate": 1.8790814406578073e-05, |
|
"loss": 0.2194, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.2014388489208634, |
|
"grad_norm": 1.9669982488920634, |
|
"learning_rate": 1.877209826027267e-05, |
|
"loss": 0.164, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.2158273381294964, |
|
"grad_norm": 10.292309004798875, |
|
"learning_rate": 1.8753247846255175e-05, |
|
"loss": 0.2773, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.2302158273381294, |
|
"grad_norm": 2.2748735550997563, |
|
"learning_rate": 1.8734263453054274e-05, |
|
"loss": 0.1718, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.2446043165467624, |
|
"grad_norm": 2.8655891892789866, |
|
"learning_rate": 1.871514537124936e-05, |
|
"loss": 0.1579, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.258992805755396, |
|
"grad_norm": 2.217166529615519, |
|
"learning_rate": 1.869589389346611e-05, |
|
"loss": 0.1787, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.273381294964029, |
|
"grad_norm": 1.8903126047188956, |
|
"learning_rate": 1.8676509314371977e-05, |
|
"loss": 0.1848, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.287769784172662, |
|
"grad_norm": 2.22331229134063, |
|
"learning_rate": 1.8656991930671687e-05, |
|
"loss": 0.1547, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.302158273381295, |
|
"grad_norm": 2.4540086773291323, |
|
"learning_rate": 1.863734204110272e-05, |
|
"loss": 0.1621, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.316546762589928, |
|
"grad_norm": 2.718097280283145, |
|
"learning_rate": 1.861755994643071e-05, |
|
"loss": 0.1644, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.3309352517985613, |
|
"grad_norm": 2.4404623229012996, |
|
"learning_rate": 1.859764594944485e-05, |
|
"loss": 0.1555, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.3453237410071943, |
|
"grad_norm": 2.3253380241917685, |
|
"learning_rate": 1.8577600354953273e-05, |
|
"loss": 0.1524, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.3597122302158273, |
|
"grad_norm": 2.4646303579458353, |
|
"learning_rate": 1.8557423469778356e-05, |
|
"loss": 0.1473, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.3741007194244603, |
|
"grad_norm": 1.5376405445606527, |
|
"learning_rate": 1.8537115602752054e-05, |
|
"loss": 0.1495, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.3884892086330938, |
|
"grad_norm": 4.745077306666788, |
|
"learning_rate": 1.851667706471115e-05, |
|
"loss": 0.1821, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.402877697841727, |
|
"grad_norm": 2.552588156434035, |
|
"learning_rate": 1.8496108168492518e-05, |
|
"loss": 0.1319, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.41726618705036, |
|
"grad_norm": 1.8367052946729043, |
|
"learning_rate": 1.8475409228928314e-05, |
|
"loss": 0.1349, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.431654676258993, |
|
"grad_norm": 2.5445371080031314, |
|
"learning_rate": 1.8454580562841165e-05, |
|
"loss": 0.13, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.446043165467626, |
|
"grad_norm": 2.344234909741198, |
|
"learning_rate": 1.8433622489039333e-05, |
|
"loss": 0.1506, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.460431654676259, |
|
"grad_norm": 2.376247631671717, |
|
"learning_rate": 1.8412535328311813e-05, |
|
"loss": 0.1344, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.4748201438848922, |
|
"grad_norm": 2.9461424922262784, |
|
"learning_rate": 1.839131940342344e-05, |
|
"loss": 0.1483, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.4892086330935252, |
|
"grad_norm": 1.9976524594362375, |
|
"learning_rate": 1.8369975039109937e-05, |
|
"loss": 0.1803, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.5035971223021583, |
|
"grad_norm": 1.0443749894322358, |
|
"learning_rate": 1.8348502562072955e-05, |
|
"loss": 0.1171, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.5179856115107913, |
|
"grad_norm": 1.4361047756192113, |
|
"learning_rate": 1.8326902300975063e-05, |
|
"loss": 0.149, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.5323741007194247, |
|
"grad_norm": 1.6346883755053931, |
|
"learning_rate": 1.8305174586434724e-05, |
|
"loss": 0.1444, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.5467625899280577, |
|
"grad_norm": 1.6834887574364132, |
|
"learning_rate": 1.828331975102123e-05, |
|
"loss": 0.1144, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.5611510791366907, |
|
"grad_norm": 1.354227075277937, |
|
"learning_rate": 1.8261338129249623e-05, |
|
"loss": 0.1178, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.5755395683453237, |
|
"grad_norm": 2.2741550896246587, |
|
"learning_rate": 1.8239230057575542e-05, |
|
"loss": 0.1534, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.5899280575539567, |
|
"grad_norm": 1.1972242744740567, |
|
"learning_rate": 1.8216995874390128e-05, |
|
"loss": 0.0885, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.6043165467625897, |
|
"grad_norm": 1.973587237520198, |
|
"learning_rate": 1.819463592001479e-05, |
|
"loss": 0.135, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.618705035971223, |
|
"grad_norm": 3.1272879249109753, |
|
"learning_rate": 1.817215053669603e-05, |
|
"loss": 0.1586, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.633093525179856, |
|
"grad_norm": 2.191338511461004, |
|
"learning_rate": 1.814954006860018e-05, |
|
"loss": 0.1416, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.647482014388489, |
|
"grad_norm": 1.572355984168845, |
|
"learning_rate": 1.8126804861808175e-05, |
|
"loss": 0.1185, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.661870503597122, |
|
"grad_norm": 0.975274936777776, |
|
"learning_rate": 1.81039452643102e-05, |
|
"loss": 0.1, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.6762589928057556, |
|
"grad_norm": 1.6008482801863315, |
|
"learning_rate": 1.808096162600041e-05, |
|
"loss": 0.1051, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.6906474820143886, |
|
"grad_norm": 1.7704784936509204, |
|
"learning_rate": 1.8057854298671545e-05, |
|
"loss": 0.13, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.7050359712230216, |
|
"grad_norm": 1.8920163138110342, |
|
"learning_rate": 1.803462363600957e-05, |
|
"loss": 0.1458, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.7194244604316546, |
|
"grad_norm": 5.176691025598706, |
|
"learning_rate": 1.8011269993588234e-05, |
|
"loss": 0.1791, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.7338129496402876, |
|
"grad_norm": 1.6375993170680554, |
|
"learning_rate": 1.798779372886365e-05, |
|
"loss": 0.1177, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.7482014388489207, |
|
"grad_norm": 4.122622959327029, |
|
"learning_rate": 1.796419520116882e-05, |
|
"loss": 0.26, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 2.762589928057554, |
|
"grad_norm": 1.902781478532848, |
|
"learning_rate": 1.7940474771708118e-05, |
|
"loss": 0.1298, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.776978417266187, |
|
"grad_norm": 1.7134666967364445, |
|
"learning_rate": 1.791663280355178e-05, |
|
"loss": 0.1075, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.79136690647482, |
|
"grad_norm": 2.1429774203344274, |
|
"learning_rate": 1.789266966163035e-05, |
|
"loss": 0.1131, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 2.805755395683453, |
|
"grad_norm": 2.1096187128500317, |
|
"learning_rate": 1.786858571272907e-05, |
|
"loss": 0.1407, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.8201438848920866, |
|
"grad_norm": 1.7461053345159192, |
|
"learning_rate": 1.7844381325482293e-05, |
|
"loss": 0.0962, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.8345323741007196, |
|
"grad_norm": 2.272810785009126, |
|
"learning_rate": 1.7820056870367813e-05, |
|
"loss": 0.1982, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.8489208633093526, |
|
"grad_norm": 1.887371974290098, |
|
"learning_rate": 1.7795612719701228e-05, |
|
"loss": 0.1436, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.8633093525179856, |
|
"grad_norm": 1.8778160079306951, |
|
"learning_rate": 1.7771049247630215e-05, |
|
"loss": 0.1218, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 2.8776978417266186, |
|
"grad_norm": 1.1423107655269147, |
|
"learning_rate": 1.7746366830128803e-05, |
|
"loss": 0.0901, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.8920863309352516, |
|
"grad_norm": 1.5837299145510542, |
|
"learning_rate": 1.7721565844991643e-05, |
|
"loss": 0.0799, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 2.906474820143885, |
|
"grad_norm": 1.3786215691627017, |
|
"learning_rate": 1.76966466718282e-05, |
|
"loss": 0.1063, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 2.920863309352518, |
|
"grad_norm": 2.048529831050718, |
|
"learning_rate": 1.7671609692056946e-05, |
|
"loss": 0.1188, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 2.935251798561151, |
|
"grad_norm": 4.107034047711157, |
|
"learning_rate": 1.7646455288899535e-05, |
|
"loss": 0.1608, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 2.949640287769784, |
|
"grad_norm": 1.419074613907907, |
|
"learning_rate": 1.7621183847374935e-05, |
|
"loss": 0.0947, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.9640287769784175, |
|
"grad_norm": 1.7241505506887336, |
|
"learning_rate": 1.7595795754293514e-05, |
|
"loss": 0.0933, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 2.9784172661870505, |
|
"grad_norm": 5.437440138394324, |
|
"learning_rate": 1.7570291398251153e-05, |
|
"loss": 0.1616, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 2.9928057553956835, |
|
"grad_norm": 1.20883185060423, |
|
"learning_rate": 1.7544671169623263e-05, |
|
"loss": 0.0926, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.0071942446043165, |
|
"grad_norm": 1.4389531365223567, |
|
"learning_rate": 1.751893546055884e-05, |
|
"loss": 0.079, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 3.0215827338129495, |
|
"grad_norm": 2.9037490869241642, |
|
"learning_rate": 1.749308466497444e-05, |
|
"loss": 0.1041, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0359712230215825, |
|
"grad_norm": 2.3752741387378062, |
|
"learning_rate": 1.746711917854817e-05, |
|
"loss": 0.1602, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 3.050359712230216, |
|
"grad_norm": 3.434671646120461, |
|
"learning_rate": 1.744103939871361e-05, |
|
"loss": 0.1553, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.064748201438849, |
|
"grad_norm": 1.7548341283748703, |
|
"learning_rate": 1.7414845724653743e-05, |
|
"loss": 0.1046, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.079136690647482, |
|
"grad_norm": 2.4665177125279514, |
|
"learning_rate": 1.738853855729485e-05, |
|
"loss": 0.1063, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.093525179856115, |
|
"grad_norm": 2.2287980432317136, |
|
"learning_rate": 1.7362118299300363e-05, |
|
"loss": 0.1017, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.1079136690647484, |
|
"grad_norm": 2.035780926825967, |
|
"learning_rate": 1.733558535506469e-05, |
|
"loss": 0.1022, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.1223021582733814, |
|
"grad_norm": 2.3935570961264707, |
|
"learning_rate": 1.730894013070707e-05, |
|
"loss": 0.1185, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 3.1366906474820144, |
|
"grad_norm": 3.4732921260424536, |
|
"learning_rate": 1.7282183034065296e-05, |
|
"loss": 0.1375, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.1510791366906474, |
|
"grad_norm": 1.9600377867714436, |
|
"learning_rate": 1.7255314474689524e-05, |
|
"loss": 0.0858, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.1654676258992804, |
|
"grad_norm": 1.2129581509037186, |
|
"learning_rate": 1.7228334863835972e-05, |
|
"loss": 0.0786, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.1798561151079134, |
|
"grad_norm": 1.3441082560042694, |
|
"learning_rate": 1.7201244614460645e-05, |
|
"loss": 0.1193, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 3.194244604316547, |
|
"grad_norm": 1.2454468384467656, |
|
"learning_rate": 1.7174044141213e-05, |
|
"loss": 0.0742, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.20863309352518, |
|
"grad_norm": 2.522852911662667, |
|
"learning_rate": 1.7146733860429614e-05, |
|
"loss": 0.118, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 3.223021582733813, |
|
"grad_norm": 1.9278040098990679, |
|
"learning_rate": 1.7119314190127786e-05, |
|
"loss": 0.0977, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.237410071942446, |
|
"grad_norm": 2.1026449906365707, |
|
"learning_rate": 1.7091785549999177e-05, |
|
"loss": 0.1052, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.2517985611510793, |
|
"grad_norm": 2.6529166852325257, |
|
"learning_rate": 1.7064148361403347e-05, |
|
"loss": 0.2227, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 3.2661870503597124, |
|
"grad_norm": 4.552433635116517, |
|
"learning_rate": 1.7036403047361336e-05, |
|
"loss": 0.1501, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 3.2805755395683454, |
|
"grad_norm": 2.310770991325816, |
|
"learning_rate": 1.7008550032549167e-05, |
|
"loss": 0.1216, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.2949640287769784, |
|
"grad_norm": 3.36364275278406, |
|
"learning_rate": 1.6980589743291362e-05, |
|
"loss": 0.1235, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 3.3093525179856114, |
|
"grad_norm": 1.829401154310567, |
|
"learning_rate": 1.695252260755441e-05, |
|
"loss": 0.1233, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.3237410071942444, |
|
"grad_norm": 2.223251742922475, |
|
"learning_rate": 1.6924349054940204e-05, |
|
"loss": 0.1139, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.338129496402878, |
|
"grad_norm": 2.1098226205823734, |
|
"learning_rate": 1.6896069516679494e-05, |
|
"loss": 0.0954, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.352517985611511, |
|
"grad_norm": 2.420417244636502, |
|
"learning_rate": 1.6867684425625265e-05, |
|
"loss": 0.1024, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 3.366906474820144, |
|
"grad_norm": 3.516667669455294, |
|
"learning_rate": 1.683919421624611e-05, |
|
"loss": 0.1811, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.381294964028777, |
|
"grad_norm": 1.1050242423015832, |
|
"learning_rate": 1.681059932461959e-05, |
|
"loss": 0.0677, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.3956834532374103, |
|
"grad_norm": 2.176543994109208, |
|
"learning_rate": 1.6781900188425565e-05, |
|
"loss": 0.093, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.4100719424460433, |
|
"grad_norm": 2.4696463349977873, |
|
"learning_rate": 1.6753097246939475e-05, |
|
"loss": 0.0865, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 3.4244604316546763, |
|
"grad_norm": 1.2628603534222926, |
|
"learning_rate": 1.672419094102563e-05, |
|
"loss": 0.0867, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 3.4388489208633093, |
|
"grad_norm": 6.155069816331444, |
|
"learning_rate": 1.6695181713130462e-05, |
|
"loss": 0.1917, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 3.4532374100719423, |
|
"grad_norm": 2.147319268591747, |
|
"learning_rate": 1.6666070007275746e-05, |
|
"loss": 0.1466, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.4676258992805753, |
|
"grad_norm": 2.163903497725947, |
|
"learning_rate": 1.6636856269051813e-05, |
|
"loss": 0.1364, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 3.4820143884892087, |
|
"grad_norm": 1.6774031370584257, |
|
"learning_rate": 1.6607540945610722e-05, |
|
"loss": 0.0906, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 3.4964028776978417, |
|
"grad_norm": 9.014308156169962, |
|
"learning_rate": 1.6578124485659414e-05, |
|
"loss": 0.1861, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.5107913669064748, |
|
"grad_norm": 2.3885377725749715, |
|
"learning_rate": 1.6548607339452853e-05, |
|
"loss": 0.1281, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.5251798561151078, |
|
"grad_norm": 1.6108063549734837, |
|
"learning_rate": 1.6518989958787126e-05, |
|
"loss": 0.0981, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.539568345323741, |
|
"grad_norm": 3.5691166357587054, |
|
"learning_rate": 1.6489272796992536e-05, |
|
"loss": 0.1074, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 3.553956834532374, |
|
"grad_norm": 3.4979429013811467, |
|
"learning_rate": 1.6459456308926662e-05, |
|
"loss": 0.1338, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 3.568345323741007, |
|
"grad_norm": 4.957592452667262, |
|
"learning_rate": 1.642954095096737e-05, |
|
"loss": 0.2005, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 3.58273381294964, |
|
"grad_norm": 2.5648919694650107, |
|
"learning_rate": 1.639952718100589e-05, |
|
"loss": 0.1081, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 3.597122302158273, |
|
"grad_norm": 3.7598235610947643, |
|
"learning_rate": 1.636941545843973e-05, |
|
"loss": 0.1533, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.6115107913669062, |
|
"grad_norm": 2.9195056813718496, |
|
"learning_rate": 1.6339206244165705e-05, |
|
"loss": 0.1188, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 3.6258992805755397, |
|
"grad_norm": 2.79416599036527, |
|
"learning_rate": 1.630890000057285e-05, |
|
"loss": 0.1106, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 3.6402877697841727, |
|
"grad_norm": 2.2623014827430485, |
|
"learning_rate": 1.6278497191535364e-05, |
|
"loss": 0.0913, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 3.6546762589928057, |
|
"grad_norm": 3.0134989782439736, |
|
"learning_rate": 1.6247998282405486e-05, |
|
"loss": 0.1368, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 3.6690647482014387, |
|
"grad_norm": 1.0640618586580433, |
|
"learning_rate": 1.621740374000639e-05, |
|
"loss": 0.0749, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 3.683453237410072, |
|
"grad_norm": 2.0407254201531857, |
|
"learning_rate": 1.6186714032625036e-05, |
|
"loss": 0.1347, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 3.697841726618705, |
|
"grad_norm": 1.3495622495153805, |
|
"learning_rate": 1.6155929630004995e-05, |
|
"loss": 0.0938, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 3.712230215827338, |
|
"grad_norm": 1.626082630128798, |
|
"learning_rate": 1.6125051003339277e-05, |
|
"loss": 0.0735, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 3.726618705035971, |
|
"grad_norm": 1.045029527525099, |
|
"learning_rate": 1.6094078625263085e-05, |
|
"loss": 0.0665, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 3.741007194244604, |
|
"grad_norm": 1.0769877251894375, |
|
"learning_rate": 1.6063012969846624e-05, |
|
"loss": 0.0594, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.755395683453237, |
|
"grad_norm": 1.573095185027216, |
|
"learning_rate": 1.603185451258781e-05, |
|
"loss": 0.0989, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 3.7697841726618706, |
|
"grad_norm": 1.5939550154351394, |
|
"learning_rate": 1.6000603730405013e-05, |
|
"loss": 0.0918, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 3.7841726618705036, |
|
"grad_norm": 3.084625753380633, |
|
"learning_rate": 1.5969261101629744e-05, |
|
"loss": 0.1507, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 3.7985611510791366, |
|
"grad_norm": 2.1876597444656367, |
|
"learning_rate": 1.593782710599934e-05, |
|
"loss": 0.1153, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 3.81294964028777, |
|
"grad_norm": 1.4453678209486815, |
|
"learning_rate": 1.5906302224649613e-05, |
|
"loss": 0.0881, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 3.827338129496403, |
|
"grad_norm": 1.415817693353708, |
|
"learning_rate": 1.5874686940107507e-05, |
|
"loss": 0.0921, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 3.841726618705036, |
|
"grad_norm": 1.3019018391132993, |
|
"learning_rate": 1.5842981736283686e-05, |
|
"loss": 0.0942, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 3.856115107913669, |
|
"grad_norm": 2.124481879469022, |
|
"learning_rate": 1.581118709846514e-05, |
|
"loss": 0.0892, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 3.870503597122302, |
|
"grad_norm": 0.7530466355105916, |
|
"learning_rate": 1.5779303513307765e-05, |
|
"loss": 0.0611, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 3.884892086330935, |
|
"grad_norm": 1.9390276125404986, |
|
"learning_rate": 1.574733146882889e-05, |
|
"loss": 0.0711, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.899280575539568, |
|
"grad_norm": 1.7780292244134437, |
|
"learning_rate": 1.571527145439983e-05, |
|
"loss": 0.0912, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 3.9136690647482015, |
|
"grad_norm": 1.5211678706292164, |
|
"learning_rate": 1.5683123960738395e-05, |
|
"loss": 0.0828, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 3.9280575539568345, |
|
"grad_norm": 3.235343849443108, |
|
"learning_rate": 1.5650889479901356e-05, |
|
"loss": 0.1355, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 3.9424460431654675, |
|
"grad_norm": 2.418347718572089, |
|
"learning_rate": 1.5618568505276948e-05, |
|
"loss": 0.0934, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 3.956834532374101, |
|
"grad_norm": 1.856004868367365, |
|
"learning_rate": 1.558616153157728e-05, |
|
"loss": 0.1214, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.971223021582734, |
|
"grad_norm": 1.9055690379940484, |
|
"learning_rate": 1.5553669054830806e-05, |
|
"loss": 0.0759, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 3.985611510791367, |
|
"grad_norm": 0.7835271833606042, |
|
"learning_rate": 1.552109157237468e-05, |
|
"loss": 0.0636, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.9789660771379471, |
|
"learning_rate": 1.5488429582847194e-05, |
|
"loss": 0.0935, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 4.014388489208633, |
|
"grad_norm": 1.920696415437042, |
|
"learning_rate": 1.5455683586180117e-05, |
|
"loss": 0.0732, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.028776978417266, |
|
"grad_norm": 1.7756913937042766, |
|
"learning_rate": 1.542285408359105e-05, |
|
"loss": 0.1339, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.043165467625899, |
|
"grad_norm": 1.3081335048820986, |
|
"learning_rate": 1.5389941577575753e-05, |
|
"loss": 0.0805, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 4.057553956834532, |
|
"grad_norm": 1.545019490212882, |
|
"learning_rate": 1.5356946571900465e-05, |
|
"loss": 0.0764, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.071942446043165, |
|
"grad_norm": 1.6175892304589476, |
|
"learning_rate": 1.5323869571594166e-05, |
|
"loss": 0.0838, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 4.086330935251799, |
|
"grad_norm": 8.86376731047678, |
|
"learning_rate": 1.5290711082940883e-05, |
|
"loss": 0.2142, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.100719424460432, |
|
"grad_norm": 2.0771074188946748, |
|
"learning_rate": 1.5257471613471908e-05, |
|
"loss": 0.1161, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.115107913669065, |
|
"grad_norm": 1.841466825633457, |
|
"learning_rate": 1.5224151671958045e-05, |
|
"loss": 0.111, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 4.129496402877698, |
|
"grad_norm": 2.1971490305870254, |
|
"learning_rate": 1.5190751768401835e-05, |
|
"loss": 0.1001, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 4.143884892086331, |
|
"grad_norm": 1.887590572191191, |
|
"learning_rate": 1.515727241402972e-05, |
|
"loss": 0.0817, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.158273381294964, |
|
"grad_norm": 1.5504037915944906, |
|
"learning_rate": 1.512371412128424e-05, |
|
"loss": 0.0721, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 4.172661870503597, |
|
"grad_norm": 3.2084429695170398, |
|
"learning_rate": 1.509007740381618e-05, |
|
"loss": 0.1495, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.18705035971223, |
|
"grad_norm": 1.9773379887209472, |
|
"learning_rate": 1.505636277647672e-05, |
|
"loss": 0.1021, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.201438848920863, |
|
"grad_norm": 1.7919752378903473, |
|
"learning_rate": 1.5022570755309542e-05, |
|
"loss": 0.069, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 4.215827338129497, |
|
"grad_norm": 1.8353019507539692, |
|
"learning_rate": 1.4988701857542932e-05, |
|
"loss": 0.0908, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 4.23021582733813, |
|
"grad_norm": 1.6805427100586665, |
|
"learning_rate": 1.495475660158187e-05, |
|
"loss": 0.0785, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 4.244604316546763, |
|
"grad_norm": 1.380400795948778, |
|
"learning_rate": 1.492073550700009e-05, |
|
"loss": 0.0817, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.258992805755396, |
|
"grad_norm": 0.9200650137201957, |
|
"learning_rate": 1.4886639094532129e-05, |
|
"loss": 0.0646, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 4.273381294964029, |
|
"grad_norm": 2.0328467587787884, |
|
"learning_rate": 1.4852467886065357e-05, |
|
"loss": 0.0816, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 4.287769784172662, |
|
"grad_norm": 2.471692983826739, |
|
"learning_rate": 1.4818222404631993e-05, |
|
"loss": 0.1168, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 4.302158273381295, |
|
"grad_norm": 1.9480957771711376, |
|
"learning_rate": 1.4783903174401086e-05, |
|
"loss": 0.1056, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 4.316546762589928, |
|
"grad_norm": 1.3707094764913785, |
|
"learning_rate": 1.4749510720670506e-05, |
|
"loss": 0.081, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.330935251798561, |
|
"grad_norm": 1.8160322544483793, |
|
"learning_rate": 1.4715045569858895e-05, |
|
"loss": 0.0784, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 4.345323741007194, |
|
"grad_norm": 1.6519200877530784, |
|
"learning_rate": 1.4680508249497622e-05, |
|
"loss": 0.0758, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 4.359712230215827, |
|
"grad_norm": 1.1953425576844743, |
|
"learning_rate": 1.4645899288222686e-05, |
|
"loss": 0.076, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 4.374100719424461, |
|
"grad_norm": 2.1532443296286217, |
|
"learning_rate": 1.461121921576665e-05, |
|
"loss": 0.0956, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 4.388489208633094, |
|
"grad_norm": 7.291642986227351, |
|
"learning_rate": 1.457646856295051e-05, |
|
"loss": 0.1638, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.402877697841727, |
|
"grad_norm": 1.9760971103123395, |
|
"learning_rate": 1.4541647861675592e-05, |
|
"loss": 0.0898, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 4.41726618705036, |
|
"grad_norm": 1.549355801958334, |
|
"learning_rate": 1.4506757644915393e-05, |
|
"loss": 0.0804, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 4.431654676258993, |
|
"grad_norm": 2.1548681765232005, |
|
"learning_rate": 1.4471798446707426e-05, |
|
"loss": 0.0917, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 4.446043165467626, |
|
"grad_norm": 2.416819319060172, |
|
"learning_rate": 1.443677080214506e-05, |
|
"loss": 0.1, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 4.460431654676259, |
|
"grad_norm": 2.357572317260481, |
|
"learning_rate": 1.4401675247369307e-05, |
|
"loss": 0.0842, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.474820143884892, |
|
"grad_norm": 2.3896391383138305, |
|
"learning_rate": 1.4366512319560642e-05, |
|
"loss": 0.0825, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 4.489208633093525, |
|
"grad_norm": 2.43013200710383, |
|
"learning_rate": 1.4331282556930753e-05, |
|
"loss": 0.0694, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 4.503597122302159, |
|
"grad_norm": 2.044331917417573, |
|
"learning_rate": 1.4295986498714326e-05, |
|
"loss": 0.0782, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 4.517985611510792, |
|
"grad_norm": 2.5158625592140424, |
|
"learning_rate": 1.4260624685160778e-05, |
|
"loss": 0.0861, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 4.532374100719425, |
|
"grad_norm": 2.390662985836405, |
|
"learning_rate": 1.4225197657525996e-05, |
|
"loss": 0.0998, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.546762589928058, |
|
"grad_norm": 4.358152932825534, |
|
"learning_rate": 1.4189705958064041e-05, |
|
"loss": 0.2349, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 4.561151079136691, |
|
"grad_norm": 1.4662171648939122, |
|
"learning_rate": 1.4154150130018867e-05, |
|
"loss": 0.0828, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 4.575539568345324, |
|
"grad_norm": 2.618941379165625, |
|
"learning_rate": 1.4118530717615982e-05, |
|
"loss": 0.1057, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.589928057553957, |
|
"grad_norm": 5.796662333549953, |
|
"learning_rate": 1.4082848266054136e-05, |
|
"loss": 0.1314, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 4.60431654676259, |
|
"grad_norm": 1.3755791710433443, |
|
"learning_rate": 1.4047103321496977e-05, |
|
"loss": 0.0568, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.618705035971223, |
|
"grad_norm": 1.4690673745162433, |
|
"learning_rate": 1.4011296431064675e-05, |
|
"loss": 0.0857, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 4.633093525179856, |
|
"grad_norm": 1.510028184893069, |
|
"learning_rate": 1.3975428142825562e-05, |
|
"loss": 0.0661, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 4.647482014388489, |
|
"grad_norm": 1.8656945859612246, |
|
"learning_rate": 1.3939499005787735e-05, |
|
"loss": 0.0885, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 4.661870503597123, |
|
"grad_norm": 4.360334150991463, |
|
"learning_rate": 1.3903509569890663e-05, |
|
"loss": 0.1249, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.676258992805756, |
|
"grad_norm": 1.2705938890119497, |
|
"learning_rate": 1.3867460385996756e-05, |
|
"loss": 0.0483, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.690647482014389, |
|
"grad_norm": 1.6429511839584963, |
|
"learning_rate": 1.3831352005882947e-05, |
|
"loss": 0.0678, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 4.705035971223022, |
|
"grad_norm": 0.8064327586315418, |
|
"learning_rate": 1.3795184982232234e-05, |
|
"loss": 0.0481, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 4.719424460431655, |
|
"grad_norm": 1.6276254758163173, |
|
"learning_rate": 1.3758959868625233e-05, |
|
"loss": 0.0642, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 4.733812949640288, |
|
"grad_norm": 1.6478404195254295, |
|
"learning_rate": 1.3722677219531684e-05, |
|
"loss": 0.0537, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 4.748201438848921, |
|
"grad_norm": 2.0775687439431443, |
|
"learning_rate": 1.3686337590301997e-05, |
|
"loss": 0.0826, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.762589928057554, |
|
"grad_norm": 1.6321172079536772, |
|
"learning_rate": 1.364994153715872e-05, |
|
"loss": 0.0931, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 4.7769784172661875, |
|
"grad_norm": 1.5139845300039876, |
|
"learning_rate": 1.361348961718804e-05, |
|
"loss": 0.0755, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 4.7913669064748206, |
|
"grad_norm": 1.6885011783715966, |
|
"learning_rate": 1.3576982388331258e-05, |
|
"loss": 0.0708, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 4.805755395683454, |
|
"grad_norm": 3.58412758620127, |
|
"learning_rate": 1.3540420409376237e-05, |
|
"loss": 0.1443, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 4.820143884892087, |
|
"grad_norm": 3.970560863668551, |
|
"learning_rate": 1.3503804239948874e-05, |
|
"loss": 0.1164, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 4.83453237410072, |
|
"grad_norm": 3.0950452614631216, |
|
"learning_rate": 1.3467134440504497e-05, |
|
"loss": 0.1638, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 4.848920863309353, |
|
"grad_norm": 1.2344229500725417, |
|
"learning_rate": 1.3430411572319323e-05, |
|
"loss": 0.0414, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 4.863309352517986, |
|
"grad_norm": 4.693981116757926, |
|
"learning_rate": 1.3393636197481842e-05, |
|
"loss": 0.1099, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 4.877697841726619, |
|
"grad_norm": 1.280394160785586, |
|
"learning_rate": 1.335680887888423e-05, |
|
"loss": 0.069, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 4.892086330935252, |
|
"grad_norm": 2.6531048299227287, |
|
"learning_rate": 1.3319930180213713e-05, |
|
"loss": 0.0945, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.906474820143885, |
|
"grad_norm": 3.102339751706606, |
|
"learning_rate": 1.3283000665943972e-05, |
|
"loss": 0.1103, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 4.920863309352518, |
|
"grad_norm": 1.4150229722556091, |
|
"learning_rate": 1.3246020901326465e-05, |
|
"loss": 0.0787, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 4.935251798561151, |
|
"grad_norm": 2.3202398526115933, |
|
"learning_rate": 1.3208991452381798e-05, |
|
"loss": 0.0956, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 4.9496402877697845, |
|
"grad_norm": 6.464019103643947, |
|
"learning_rate": 1.3171912885891063e-05, |
|
"loss": 0.1059, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 4.9640287769784175, |
|
"grad_norm": 2.2987115160645617, |
|
"learning_rate": 1.3134785769387147e-05, |
|
"loss": 0.0905, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 4.9784172661870505, |
|
"grad_norm": 3.42537659995528, |
|
"learning_rate": 1.3097610671146063e-05, |
|
"loss": 0.0891, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 4.9928057553956835, |
|
"grad_norm": 1.607289746350073, |
|
"learning_rate": 1.3060388160178237e-05, |
|
"loss": 0.0756, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 5.0071942446043165, |
|
"grad_norm": 2.0287001101488733, |
|
"learning_rate": 1.302311880621981e-05, |
|
"loss": 0.1092, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 5.0215827338129495, |
|
"grad_norm": 2.296081619843284, |
|
"learning_rate": 1.2985803179723903e-05, |
|
"loss": 0.0814, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 5.0359712230215825, |
|
"grad_norm": 1.3541106943850767, |
|
"learning_rate": 1.294844185185191e-05, |
|
"loss": 0.0495, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.0503597122302155, |
|
"grad_norm": 3.473072731374066, |
|
"learning_rate": 1.2911035394464724e-05, |
|
"loss": 0.1115, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 5.0647482014388485, |
|
"grad_norm": 1.878085254754344, |
|
"learning_rate": 1.2873584380114012e-05, |
|
"loss": 0.0758, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 5.079136690647482, |
|
"grad_norm": 1.5828011496456365, |
|
"learning_rate": 1.283608938203344e-05, |
|
"loss": 0.0653, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 5.093525179856115, |
|
"grad_norm": 2.310554737444448, |
|
"learning_rate": 1.2798550974129888e-05, |
|
"loss": 0.0795, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 5.107913669064748, |
|
"grad_norm": 1.392927738275557, |
|
"learning_rate": 1.2760969730974692e-05, |
|
"loss": 0.0555, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 5.122302158273381, |
|
"grad_norm": 1.9277356604394835, |
|
"learning_rate": 1.2723346227794817e-05, |
|
"loss": 0.0709, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 5.136690647482014, |
|
"grad_norm": 1.5371398787685273, |
|
"learning_rate": 1.2685681040464081e-05, |
|
"loss": 0.0596, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 5.151079136690647, |
|
"grad_norm": 2.8096729108941134, |
|
"learning_rate": 1.264797474549433e-05, |
|
"loss": 0.1064, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 5.16546762589928, |
|
"grad_norm": 0.9781522138651574, |
|
"learning_rate": 1.2610227920026608e-05, |
|
"loss": 0.051, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 5.179856115107913, |
|
"grad_norm": 1.1509313519794822, |
|
"learning_rate": 1.2572441141822322e-05, |
|
"loss": 0.0651, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.194244604316546, |
|
"grad_norm": 4.388250469742715, |
|
"learning_rate": 1.2534614989254423e-05, |
|
"loss": 0.0967, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 5.2086330935251794, |
|
"grad_norm": 0.9566869047445085, |
|
"learning_rate": 1.2496750041298515e-05, |
|
"loss": 0.0609, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 5.223021582733813, |
|
"grad_norm": 2.2089936486598076, |
|
"learning_rate": 1.2458846877524025e-05, |
|
"loss": 0.0657, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 5.237410071942446, |
|
"grad_norm": 2.1577271836034058, |
|
"learning_rate": 1.2420906078085316e-05, |
|
"loss": 0.0859, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 5.251798561151079, |
|
"grad_norm": 2.107752486687129, |
|
"learning_rate": 1.2382928223712807e-05, |
|
"loss": 0.0493, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 5.266187050359712, |
|
"grad_norm": 1.5362209636910469, |
|
"learning_rate": 1.2344913895704099e-05, |
|
"loss": 0.0551, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 5.280575539568345, |
|
"grad_norm": 1.8284113473780614, |
|
"learning_rate": 1.2306863675915058e-05, |
|
"loss": 0.0639, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 5.294964028776978, |
|
"grad_norm": 1.5434857110112714, |
|
"learning_rate": 1.2268778146750914e-05, |
|
"loss": 0.0665, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 5.309352517985611, |
|
"grad_norm": 1.5270591397764983, |
|
"learning_rate": 1.2230657891157365e-05, |
|
"loss": 0.0614, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 5.323741007194244, |
|
"grad_norm": 1.1644989385855498, |
|
"learning_rate": 1.2192503492611625e-05, |
|
"loss": 0.0516, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.338129496402877, |
|
"grad_norm": 2.4389365127644664, |
|
"learning_rate": 1.2154315535113513e-05, |
|
"loss": 0.0763, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 5.35251798561151, |
|
"grad_norm": 2.570405580037144, |
|
"learning_rate": 1.2116094603176513e-05, |
|
"loss": 0.0645, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 5.366906474820144, |
|
"grad_norm": 2.525534092427501, |
|
"learning_rate": 1.2077841281818816e-05, |
|
"loss": 0.0754, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 5.381294964028777, |
|
"grad_norm": 2.3428164694918654, |
|
"learning_rate": 1.203955615655438e-05, |
|
"loss": 0.0861, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 5.39568345323741, |
|
"grad_norm": 2.235811734094292, |
|
"learning_rate": 1.2001239813383951e-05, |
|
"loss": 0.0549, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.410071942446043, |
|
"grad_norm": 4.506038617907832, |
|
"learning_rate": 1.1962892838786116e-05, |
|
"loss": 0.0857, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 5.424460431654676, |
|
"grad_norm": 2.267447064370234, |
|
"learning_rate": 1.19245158197083e-05, |
|
"loss": 0.0717, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 5.438848920863309, |
|
"grad_norm": 1.3482474852811166, |
|
"learning_rate": 1.1886109343557808e-05, |
|
"loss": 0.0772, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 5.453237410071942, |
|
"grad_norm": 2.1410593958658954, |
|
"learning_rate": 1.1847673998192815e-05, |
|
"loss": 0.0536, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 5.467625899280575, |
|
"grad_norm": 1.5967556597455597, |
|
"learning_rate": 1.180921037191337e-05, |
|
"loss": 0.0459, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.482014388489208, |
|
"grad_norm": 1.2435751103662702, |
|
"learning_rate": 1.1770719053452408e-05, |
|
"loss": 0.0443, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 5.496402877697841, |
|
"grad_norm": 3.2548280663914784, |
|
"learning_rate": 1.1732200631966717e-05, |
|
"loss": 0.0843, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 5.510791366906475, |
|
"grad_norm": 1.991827364041768, |
|
"learning_rate": 1.1693655697027935e-05, |
|
"loss": 0.0561, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 5.525179856115108, |
|
"grad_norm": 1.7434178537703542, |
|
"learning_rate": 1.165508483861352e-05, |
|
"loss": 0.0631, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.539568345323741, |
|
"grad_norm": 3.6171543812005362, |
|
"learning_rate": 1.1616488647097718e-05, |
|
"loss": 0.0704, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 5.553956834532374, |
|
"grad_norm": 3.4479680855430113, |
|
"learning_rate": 1.1577867713242532e-05, |
|
"loss": 0.0751, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 5.568345323741007, |
|
"grad_norm": 1.7391495389296567, |
|
"learning_rate": 1.1539222628188675e-05, |
|
"loss": 0.0524, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 5.58273381294964, |
|
"grad_norm": 2.204791122892472, |
|
"learning_rate": 1.1500553983446527e-05, |
|
"loss": 0.0696, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 5.597122302158273, |
|
"grad_norm": 4.934866987474034, |
|
"learning_rate": 1.1461862370887076e-05, |
|
"loss": 0.0841, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 5.611510791366906, |
|
"grad_norm": 1.5682934454178645, |
|
"learning_rate": 1.1423148382732854e-05, |
|
"loss": 0.0604, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.625899280575539, |
|
"grad_norm": 1.5376010738808106, |
|
"learning_rate": 1.1384412611548887e-05, |
|
"loss": 0.0763, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 5.640287769784173, |
|
"grad_norm": 2.2954456281899858, |
|
"learning_rate": 1.134565565023362e-05, |
|
"loss": 0.0631, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 5.654676258992806, |
|
"grad_norm": 2.8902333297932334, |
|
"learning_rate": 1.1306878092009828e-05, |
|
"loss": 0.1072, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 5.669064748201439, |
|
"grad_norm": 3.1298174346773635, |
|
"learning_rate": 1.1268080530415557e-05, |
|
"loss": 0.0906, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 5.683453237410072, |
|
"grad_norm": 1.9540710814071045, |
|
"learning_rate": 1.122926355929502e-05, |
|
"loss": 0.0815, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 5.697841726618705, |
|
"grad_norm": 3.0936428631268424, |
|
"learning_rate": 1.119042777278953e-05, |
|
"loss": 0.0933, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 5.712230215827338, |
|
"grad_norm": 0.779804603376737, |
|
"learning_rate": 1.1151573765328374e-05, |
|
"loss": 0.0377, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 5.726618705035971, |
|
"grad_norm": 1.4682872653216061, |
|
"learning_rate": 1.1112702131619747e-05, |
|
"loss": 0.0553, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 5.741007194244604, |
|
"grad_norm": 2.2888255822610586, |
|
"learning_rate": 1.1073813466641633e-05, |
|
"loss": 0.0592, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 5.755395683453237, |
|
"grad_norm": 2.0210708158515356, |
|
"learning_rate": 1.1034908365632695e-05, |
|
"loss": 0.0591, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.76978417266187, |
|
"grad_norm": 2.8477043817929175, |
|
"learning_rate": 1.0995987424083178e-05, |
|
"loss": 0.0665, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 5.784172661870503, |
|
"grad_norm": 4.075702050234569, |
|
"learning_rate": 1.0957051237725775e-05, |
|
"loss": 0.0891, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 5.798561151079137, |
|
"grad_norm": 3.4838765154884053, |
|
"learning_rate": 1.0918100402526533e-05, |
|
"loss": 0.0752, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 5.81294964028777, |
|
"grad_norm": 2.190868453265739, |
|
"learning_rate": 1.0879135514675706e-05, |
|
"loss": 0.0678, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 5.827338129496403, |
|
"grad_norm": 6.885762602295394, |
|
"learning_rate": 1.0840157170578645e-05, |
|
"loss": 0.1085, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 5.841726618705036, |
|
"grad_norm": 2.1256795879435946, |
|
"learning_rate": 1.0801165966846662e-05, |
|
"loss": 0.0587, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 5.856115107913669, |
|
"grad_norm": 3.0358003868740777, |
|
"learning_rate": 1.0762162500287916e-05, |
|
"loss": 0.1023, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 5.870503597122302, |
|
"grad_norm": 2.72412213052102, |
|
"learning_rate": 1.0723147367898243e-05, |
|
"loss": 0.0755, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 5.884892086330935, |
|
"grad_norm": 5.991640096070039, |
|
"learning_rate": 1.068412116685205e-05, |
|
"loss": 0.0906, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 5.899280575539568, |
|
"grad_norm": 4.47844473482202, |
|
"learning_rate": 1.0645084494493166e-05, |
|
"loss": 0.1367, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.913669064748201, |
|
"grad_norm": 5.278497486551652, |
|
"learning_rate": 1.0606037948325686e-05, |
|
"loss": 0.0934, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 5.928057553956835, |
|
"grad_norm": 1.662465142150059, |
|
"learning_rate": 1.0566982126004848e-05, |
|
"loss": 0.0425, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 5.942446043165468, |
|
"grad_norm": 2.1454760909414476, |
|
"learning_rate": 1.052791762532786e-05, |
|
"loss": 0.0632, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 5.956834532374101, |
|
"grad_norm": 1.8023401117347966, |
|
"learning_rate": 1.0488845044224774e-05, |
|
"loss": 0.0562, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 5.971223021582734, |
|
"grad_norm": 1.998906167438941, |
|
"learning_rate": 1.0449764980749317e-05, |
|
"loss": 0.0464, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 5.985611510791367, |
|
"grad_norm": 1.9791549061655505, |
|
"learning_rate": 1.0410678033069745e-05, |
|
"loss": 0.0509, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.783059633172495, |
|
"learning_rate": 1.0371584799459684e-05, |
|
"loss": 0.0693, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 6.014388489208633, |
|
"grad_norm": 3.1713602527987077, |
|
"learning_rate": 1.0332485878288977e-05, |
|
"loss": 0.0896, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 6.028776978417266, |
|
"grad_norm": 13.02216773829757, |
|
"learning_rate": 1.029338186801451e-05, |
|
"loss": 0.1842, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 6.043165467625899, |
|
"grad_norm": 3.175785916601786, |
|
"learning_rate": 1.0254273367171085e-05, |
|
"loss": 0.0673, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.057553956834532, |
|
"grad_norm": 3.0816879666228885, |
|
"learning_rate": 1.0215160974362224e-05, |
|
"loss": 0.0648, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 6.071942446043165, |
|
"grad_norm": 3.138491600475823, |
|
"learning_rate": 1.0176045288251014e-05, |
|
"loss": 0.0537, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 6.086330935251799, |
|
"grad_norm": 1.9965147840919386, |
|
"learning_rate": 1.0136926907550968e-05, |
|
"loss": 0.0493, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 6.100719424460432, |
|
"grad_norm": 4.040616547013923, |
|
"learning_rate": 1.0097806431016825e-05, |
|
"loss": 0.0718, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 6.115107913669065, |
|
"grad_norm": 3.684053981687095, |
|
"learning_rate": 1.0058684457435419e-05, |
|
"loss": 0.0885, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.129496402877698, |
|
"grad_norm": 3.4407063513016207, |
|
"learning_rate": 1.0019561585616485e-05, |
|
"loss": 0.0878, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 6.143884892086331, |
|
"grad_norm": 3.631968287932021, |
|
"learning_rate": 9.980438414383518e-06, |
|
"loss": 0.0716, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 6.158273381294964, |
|
"grad_norm": 3.0607381634313535, |
|
"learning_rate": 9.941315542564583e-06, |
|
"loss": 0.058, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 6.172661870503597, |
|
"grad_norm": 3.9600176462472607, |
|
"learning_rate": 9.902193568983177e-06, |
|
"loss": 0.1314, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 6.18705035971223, |
|
"grad_norm": 1.7718441919276489, |
|
"learning_rate": 9.863073092449033e-06, |
|
"loss": 0.0619, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.201438848920863, |
|
"grad_norm": 2.14606486678126, |
|
"learning_rate": 9.823954711748987e-06, |
|
"loss": 0.0537, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 6.215827338129497, |
|
"grad_norm": 4.994325517315764, |
|
"learning_rate": 9.78483902563778e-06, |
|
"loss": 0.0989, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 6.23021582733813, |
|
"grad_norm": 1.5344735636293707, |
|
"learning_rate": 9.745726632828913e-06, |
|
"loss": 0.0536, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 6.244604316546763, |
|
"grad_norm": 2.0531411979678658, |
|
"learning_rate": 9.706618131985489e-06, |
|
"loss": 0.0522, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 6.258992805755396, |
|
"grad_norm": 1.8694652467324728, |
|
"learning_rate": 9.667514121711025e-06, |
|
"loss": 0.0652, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 6.273381294964029, |
|
"grad_norm": 1.7939240986480327, |
|
"learning_rate": 9.628415200540317e-06, |
|
"loss": 0.0585, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 6.287769784172662, |
|
"grad_norm": 1.1701921170565006, |
|
"learning_rate": 9.589321966930255e-06, |
|
"loss": 0.0446, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 6.302158273381295, |
|
"grad_norm": 1.3183342321738782, |
|
"learning_rate": 9.550235019250688e-06, |
|
"loss": 0.0365, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 6.316546762589928, |
|
"grad_norm": 3.380696309795155, |
|
"learning_rate": 9.51115495577523e-06, |
|
"loss": 0.0855, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 6.330935251798561, |
|
"grad_norm": 3.2163242534678536, |
|
"learning_rate": 9.472082374672145e-06, |
|
"loss": 0.1112, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.345323741007194, |
|
"grad_norm": 2.8805271526903398, |
|
"learning_rate": 9.433017873995159e-06, |
|
"loss": 0.0567, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 6.359712230215827, |
|
"grad_norm": 2.9270420886671626, |
|
"learning_rate": 9.393962051674319e-06, |
|
"loss": 0.073, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 6.374100719424461, |
|
"grad_norm": 0.716770072244056, |
|
"learning_rate": 9.354915505506839e-06, |
|
"loss": 0.0273, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 6.388489208633094, |
|
"grad_norm": 1.9375688885176805, |
|
"learning_rate": 9.315878833147953e-06, |
|
"loss": 0.0458, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 6.402877697841727, |
|
"grad_norm": 1.5114813171921724, |
|
"learning_rate": 9.27685263210176e-06, |
|
"loss": 0.0408, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 6.41726618705036, |
|
"grad_norm": 2.176739947497608, |
|
"learning_rate": 9.237837499712088e-06, |
|
"loss": 0.0406, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 6.431654676258993, |
|
"grad_norm": 1.0546177826776708, |
|
"learning_rate": 9.19883403315334e-06, |
|
"loss": 0.032, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 6.446043165467626, |
|
"grad_norm": 0.5710171789573429, |
|
"learning_rate": 9.159842829421358e-06, |
|
"loss": 0.0283, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 6.460431654676259, |
|
"grad_norm": 2.4971494935474463, |
|
"learning_rate": 9.1208644853243e-06, |
|
"loss": 0.053, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 6.474820143884892, |
|
"grad_norm": 2.1060917891375213, |
|
"learning_rate": 9.081899597473469e-06, |
|
"loss": 0.0685, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.489208633093525, |
|
"grad_norm": 4.872774207148436, |
|
"learning_rate": 9.042948762274227e-06, |
|
"loss": 0.0878, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 6.503597122302159, |
|
"grad_norm": 3.57310670704578, |
|
"learning_rate": 9.004012575916825e-06, |
|
"loss": 0.0898, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 6.517985611510792, |
|
"grad_norm": 1.5890926297246664, |
|
"learning_rate": 8.965091634367306e-06, |
|
"loss": 0.0443, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 6.532374100719425, |
|
"grad_norm": 1.6565607787351473, |
|
"learning_rate": 8.92618653335837e-06, |
|
"loss": 0.0414, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 6.546762589928058, |
|
"grad_norm": 1.6598383644043355, |
|
"learning_rate": 8.887297868380255e-06, |
|
"loss": 0.0404, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 6.561151079136691, |
|
"grad_norm": 1.3690066201582984, |
|
"learning_rate": 8.84842623467163e-06, |
|
"loss": 0.0596, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 6.575539568345324, |
|
"grad_norm": 1.795250419015445, |
|
"learning_rate": 8.809572227210472e-06, |
|
"loss": 0.038, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 6.589928057553957, |
|
"grad_norm": 3.139575746021586, |
|
"learning_rate": 8.770736440704979e-06, |
|
"loss": 0.0709, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 6.60431654676259, |
|
"grad_norm": 3.5570255461130476, |
|
"learning_rate": 8.731919469584443e-06, |
|
"loss": 0.0707, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 6.618705035971223, |
|
"grad_norm": 2.2552703837998322, |
|
"learning_rate": 8.693121907990177e-06, |
|
"loss": 0.0653, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.633093525179856, |
|
"grad_norm": 1.9006385019285805, |
|
"learning_rate": 8.654344349766384e-06, |
|
"loss": 0.0629, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 6.647482014388489, |
|
"grad_norm": 2.4225211791810053, |
|
"learning_rate": 8.615587388451116e-06, |
|
"loss": 0.0546, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 6.661870503597123, |
|
"grad_norm": 2.687344986181635, |
|
"learning_rate": 8.576851617267151e-06, |
|
"loss": 0.0499, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 6.676258992805756, |
|
"grad_norm": 1.6697444443320166, |
|
"learning_rate": 8.53813762911293e-06, |
|
"loss": 0.0424, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 6.690647482014389, |
|
"grad_norm": 4.050618894489727, |
|
"learning_rate": 8.499446016553475e-06, |
|
"loss": 0.1016, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 6.705035971223022, |
|
"grad_norm": 1.3790673230578159, |
|
"learning_rate": 8.460777371811327e-06, |
|
"loss": 0.0328, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 6.719424460431655, |
|
"grad_norm": 1.1507918309844436, |
|
"learning_rate": 8.42213228675747e-06, |
|
"loss": 0.0223, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 6.733812949640288, |
|
"grad_norm": 2.4647114023706256, |
|
"learning_rate": 8.383511352902285e-06, |
|
"loss": 0.0684, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 6.748201438848921, |
|
"grad_norm": 1.9432128594740437, |
|
"learning_rate": 8.344915161386485e-06, |
|
"loss": 0.0544, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 6.762589928057554, |
|
"grad_norm": 1.9522718283037046, |
|
"learning_rate": 8.306344302972066e-06, |
|
"loss": 0.0545, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.7769784172661875, |
|
"grad_norm": 3.50307964418194, |
|
"learning_rate": 8.267799368033288e-06, |
|
"loss": 0.0727, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 6.7913669064748206, |
|
"grad_norm": 4.659530620027555, |
|
"learning_rate": 8.229280946547595e-06, |
|
"loss": 0.1447, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 6.805755395683454, |
|
"grad_norm": 2.459012650016016, |
|
"learning_rate": 8.190789628086632e-06, |
|
"loss": 0.0544, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 6.820143884892087, |
|
"grad_norm": 2.6126248982483204, |
|
"learning_rate": 8.15232600180719e-06, |
|
"loss": 0.0799, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 6.83453237410072, |
|
"grad_norm": 1.6545392348906836, |
|
"learning_rate": 8.113890656442194e-06, |
|
"loss": 0.0422, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.848920863309353, |
|
"grad_norm": 1.8182442546260595, |
|
"learning_rate": 8.075484180291702e-06, |
|
"loss": 0.0539, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 6.863309352517986, |
|
"grad_norm": 1.6010075595221613, |
|
"learning_rate": 8.037107161213886e-06, |
|
"loss": 0.0425, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 6.877697841726619, |
|
"grad_norm": 1.6332321065292132, |
|
"learning_rate": 7.99876018661605e-06, |
|
"loss": 0.059, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 6.892086330935252, |
|
"grad_norm": 2.4841015711601613, |
|
"learning_rate": 7.960443843445622e-06, |
|
"loss": 0.0493, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 6.906474820143885, |
|
"grad_norm": 2.325274291210812, |
|
"learning_rate": 7.922158718181184e-06, |
|
"loss": 0.0535, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.920863309352518, |
|
"grad_norm": 5.9237325194153225, |
|
"learning_rate": 7.883905396823487e-06, |
|
"loss": 0.0702, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 6.935251798561151, |
|
"grad_norm": 1.3505054193697872, |
|
"learning_rate": 7.845684464886487e-06, |
|
"loss": 0.0463, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 6.9496402877697845, |
|
"grad_norm": 4.741713866445365, |
|
"learning_rate": 7.80749650738838e-06, |
|
"loss": 0.0741, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 6.9640287769784175, |
|
"grad_norm": 3.680501521362714, |
|
"learning_rate": 7.769342108842641e-06, |
|
"loss": 0.0597, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 6.9784172661870505, |
|
"grad_norm": 1.5698663322195734, |
|
"learning_rate": 7.731221853249089e-06, |
|
"loss": 0.0481, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 6.9928057553956835, |
|
"grad_norm": 2.7205015167111535, |
|
"learning_rate": 7.693136324084949e-06, |
|
"loss": 0.0779, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 7.0071942446043165, |
|
"grad_norm": 2.492166076915753, |
|
"learning_rate": 7.655086104295904e-06, |
|
"loss": 0.0444, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 7.0215827338129495, |
|
"grad_norm": 2.8913487401770097, |
|
"learning_rate": 7.617071776287196e-06, |
|
"loss": 0.0474, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 7.0359712230215825, |
|
"grad_norm": 3.022407364083313, |
|
"learning_rate": 7.5790939219146874e-06, |
|
"loss": 0.0663, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 7.0503597122302155, |
|
"grad_norm": 4.042879715219215, |
|
"learning_rate": 7.541153122475978e-06, |
|
"loss": 0.0654, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.0647482014388485, |
|
"grad_norm": 3.5955340416725727, |
|
"learning_rate": 7.503249958701489e-06, |
|
"loss": 0.076, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 7.079136690647482, |
|
"grad_norm": 1.1422784311056753, |
|
"learning_rate": 7.46538501074558e-06, |
|
"loss": 0.0312, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 7.093525179856115, |
|
"grad_norm": 2.8616905836854674, |
|
"learning_rate": 7.427558858177679e-06, |
|
"loss": 0.0707, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 7.107913669064748, |
|
"grad_norm": 3.736472268458676, |
|
"learning_rate": 7.389772079973397e-06, |
|
"loss": 0.07, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 7.122302158273381, |
|
"grad_norm": 1.952123520605071, |
|
"learning_rate": 7.352025254505672e-06, |
|
"loss": 0.0644, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 7.136690647482014, |
|
"grad_norm": 2.5321330171246577, |
|
"learning_rate": 7.31431895953592e-06, |
|
"loss": 0.0597, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 7.151079136690647, |
|
"grad_norm": 1.3288743837048944, |
|
"learning_rate": 7.276653772205187e-06, |
|
"loss": 0.043, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 7.16546762589928, |
|
"grad_norm": 1.1513739812834443, |
|
"learning_rate": 7.239030269025311e-06, |
|
"loss": 0.035, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 7.179856115107913, |
|
"grad_norm": 1.5262098875270578, |
|
"learning_rate": 7.201449025870113e-06, |
|
"loss": 0.0377, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 7.194244604316546, |
|
"grad_norm": 3.435543237475937, |
|
"learning_rate": 7.163910617966563e-06, |
|
"loss": 0.0609, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.2086330935251794, |
|
"grad_norm": 0.9604425592050114, |
|
"learning_rate": 7.126415619885987e-06, |
|
"loss": 0.0307, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 7.223021582733813, |
|
"grad_norm": 5.364737035927668, |
|
"learning_rate": 7.088964605535278e-06, |
|
"loss": 0.0779, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 7.237410071942446, |
|
"grad_norm": 2.0995461577780086, |
|
"learning_rate": 7.0515581481480925e-06, |
|
"loss": 0.0614, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 7.251798561151079, |
|
"grad_norm": 1.9139618540642809, |
|
"learning_rate": 7.014196820276098e-06, |
|
"loss": 0.0339, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 7.266187050359712, |
|
"grad_norm": 1.282509471776567, |
|
"learning_rate": 6.976881193780196e-06, |
|
"loss": 0.0415, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 7.280575539568345, |
|
"grad_norm": 1.8876563385392593, |
|
"learning_rate": 6.9396118398217675e-06, |
|
"loss": 0.0555, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 7.294964028776978, |
|
"grad_norm": 1.6273768839175773, |
|
"learning_rate": 6.90238932885394e-06, |
|
"loss": 0.0315, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 7.309352517985611, |
|
"grad_norm": 2.502695211360501, |
|
"learning_rate": 6.865214230612858e-06, |
|
"loss": 0.0517, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 7.323741007194244, |
|
"grad_norm": 4.1055254501583995, |
|
"learning_rate": 6.8280871141089415e-06, |
|
"loss": 0.0733, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 7.338129496402877, |
|
"grad_norm": 3.3526140983673285, |
|
"learning_rate": 6.791008547618207e-06, |
|
"loss": 0.0537, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.35251798561151, |
|
"grad_norm": 2.27702848929816, |
|
"learning_rate": 6.753979098673539e-06, |
|
"loss": 0.0394, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 7.366906474820144, |
|
"grad_norm": 2.5895671987953053, |
|
"learning_rate": 6.716999334056031e-06, |
|
"loss": 0.072, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 7.381294964028777, |
|
"grad_norm": 2.560346455515735, |
|
"learning_rate": 6.680069819786288e-06, |
|
"loss": 0.0551, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 7.39568345323741, |
|
"grad_norm": 2.4213413575290885, |
|
"learning_rate": 6.643191121115773e-06, |
|
"loss": 0.0604, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 7.410071942446043, |
|
"grad_norm": 2.5037600137764415, |
|
"learning_rate": 6.6063638025181594e-06, |
|
"loss": 0.0505, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 7.424460431654676, |
|
"grad_norm": 5.462794355900936, |
|
"learning_rate": 6.5695884276806784e-06, |
|
"loss": 0.0601, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 7.438848920863309, |
|
"grad_norm": 5.901550532764724, |
|
"learning_rate": 6.532865559495505e-06, |
|
"loss": 0.0732, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 7.453237410071942, |
|
"grad_norm": 2.1680456127592382, |
|
"learning_rate": 6.496195760051128e-06, |
|
"loss": 0.037, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 7.467625899280575, |
|
"grad_norm": 1.0418430134694838, |
|
"learning_rate": 6.459579590623763e-06, |
|
"loss": 0.0296, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 7.482014388489208, |
|
"grad_norm": 4.611606098360257, |
|
"learning_rate": 6.423017611668745e-06, |
|
"loss": 0.0904, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.496402877697841, |
|
"grad_norm": 1.7334595469785676, |
|
"learning_rate": 6.386510382811963e-06, |
|
"loss": 0.065, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 7.510791366906475, |
|
"grad_norm": 2.5488880130044302, |
|
"learning_rate": 6.350058462841283e-06, |
|
"loss": 0.0711, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 7.525179856115108, |
|
"grad_norm": 4.383649882258492, |
|
"learning_rate": 6.313662409698004e-06, |
|
"loss": 0.0672, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 7.539568345323741, |
|
"grad_norm": 2.161691670015276, |
|
"learning_rate": 6.277322780468317e-06, |
|
"loss": 0.0542, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 7.553956834532374, |
|
"grad_norm": 1.75533651285235, |
|
"learning_rate": 6.241040131374769e-06, |
|
"loss": 0.0385, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.568345323741007, |
|
"grad_norm": 1.6713788310284323, |
|
"learning_rate": 6.204815017767767e-06, |
|
"loss": 0.0651, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 7.58273381294964, |
|
"grad_norm": 2.7767180589589957, |
|
"learning_rate": 6.168647994117057e-06, |
|
"loss": 0.0727, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 7.597122302158273, |
|
"grad_norm": 1.7703681537405498, |
|
"learning_rate": 6.132539614003249e-06, |
|
"loss": 0.0399, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 7.611510791366906, |
|
"grad_norm": 2.182319585582808, |
|
"learning_rate": 6.096490430109343e-06, |
|
"loss": 0.0537, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 7.625899280575539, |
|
"grad_norm": 3.13728684435282, |
|
"learning_rate": 6.0605009942122705e-06, |
|
"loss": 0.0486, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.640287769784173, |
|
"grad_norm": 1.9092924208411692, |
|
"learning_rate": 6.024571857174443e-06, |
|
"loss": 0.0426, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 7.654676258992806, |
|
"grad_norm": 1.2985276847388703, |
|
"learning_rate": 5.988703568935329e-06, |
|
"loss": 0.0229, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 7.669064748201439, |
|
"grad_norm": 0.8412966639532138, |
|
"learning_rate": 5.952896678503025e-06, |
|
"loss": 0.0185, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 7.683453237410072, |
|
"grad_norm": 2.7092486788073074, |
|
"learning_rate": 5.917151733945865e-06, |
|
"loss": 0.0428, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 7.697841726618705, |
|
"grad_norm": 3.4234238702022526, |
|
"learning_rate": 5.88146928238402e-06, |
|
"loss": 0.0405, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 7.712230215827338, |
|
"grad_norm": 4.2390460482616685, |
|
"learning_rate": 5.845849869981137e-06, |
|
"loss": 0.0623, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 7.726618705035971, |
|
"grad_norm": 2.9908661711146167, |
|
"learning_rate": 5.8102940419359595e-06, |
|
"loss": 0.0584, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 7.741007194244604, |
|
"grad_norm": 1.6724838566811557, |
|
"learning_rate": 5.7748023424740085e-06, |
|
"loss": 0.0255, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 7.755395683453237, |
|
"grad_norm": 5.552785695728314, |
|
"learning_rate": 5.739375314839226e-06, |
|
"loss": 0.047, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 7.76978417266187, |
|
"grad_norm": 4.78316445415734, |
|
"learning_rate": 5.704013501285679e-06, |
|
"loss": 0.059, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.784172661870503, |
|
"grad_norm": 2.2867834949348116, |
|
"learning_rate": 5.6687174430692495e-06, |
|
"loss": 0.049, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 7.798561151079137, |
|
"grad_norm": 1.4114860474896007, |
|
"learning_rate": 5.633487680439362e-06, |
|
"loss": 0.0282, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 7.81294964028777, |
|
"grad_norm": 3.3647369349864102, |
|
"learning_rate": 5.598324752630695e-06, |
|
"loss": 0.0749, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 7.827338129496403, |
|
"grad_norm": 2.684742315386639, |
|
"learning_rate": 5.5632291978549445e-06, |
|
"loss": 0.0455, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 7.841726618705036, |
|
"grad_norm": 2.0554793850593005, |
|
"learning_rate": 5.528201553292578e-06, |
|
"loss": 0.0439, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 7.856115107913669, |
|
"grad_norm": 4.087197798794891, |
|
"learning_rate": 5.493242355084609e-06, |
|
"loss": 0.0688, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 7.870503597122302, |
|
"grad_norm": 1.2791224839211275, |
|
"learning_rate": 5.458352138324408e-06, |
|
"loss": 0.0298, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 7.884892086330935, |
|
"grad_norm": 5.979296747331615, |
|
"learning_rate": 5.423531437049491e-06, |
|
"loss": 0.0662, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 7.899280575539568, |
|
"grad_norm": 4.045189397887292, |
|
"learning_rate": 5.388780784233354e-06, |
|
"loss": 0.0554, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 7.913669064748201, |
|
"grad_norm": 4.898228214701075, |
|
"learning_rate": 5.354100711777317e-06, |
|
"loss": 0.0594, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.928057553956835, |
|
"grad_norm": 2.02299170942208, |
|
"learning_rate": 5.319491750502383e-06, |
|
"loss": 0.0617, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 7.942446043165468, |
|
"grad_norm": 3.392724832422359, |
|
"learning_rate": 5.284954430141109e-06, |
|
"loss": 0.0574, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 7.956834532374101, |
|
"grad_norm": 2.0165506744847437, |
|
"learning_rate": 5.250489279329501e-06, |
|
"loss": 0.0261, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 7.971223021582734, |
|
"grad_norm": 1.399835068970492, |
|
"learning_rate": 5.216096825598917e-06, |
|
"loss": 0.0324, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 7.985611510791367, |
|
"grad_norm": 2.9458649739740155, |
|
"learning_rate": 5.18177759536801e-06, |
|
"loss": 0.0497, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.7940680363085926, |
|
"learning_rate": 5.147532113934646e-06, |
|
"loss": 0.0181, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 8.014388489208633, |
|
"grad_norm": 1.9007802957877749, |
|
"learning_rate": 5.113360905467875e-06, |
|
"loss": 0.037, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 8.028776978417266, |
|
"grad_norm": 3.2781065841019066, |
|
"learning_rate": 5.079264492999916e-06, |
|
"loss": 0.036, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 8.043165467625899, |
|
"grad_norm": 3.819122592314504, |
|
"learning_rate": 5.0452433984181315e-06, |
|
"loss": 0.0523, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 8.057553956834532, |
|
"grad_norm": 4.2144956363601525, |
|
"learning_rate": 5.011298142457069e-06, |
|
"loss": 0.0636, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.071942446043165, |
|
"grad_norm": 13.047476159517892, |
|
"learning_rate": 4.97742924469046e-06, |
|
"loss": 0.1163, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 8.086330935251798, |
|
"grad_norm": 6.830416937477266, |
|
"learning_rate": 4.943637223523282e-06, |
|
"loss": 0.1028, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 8.100719424460431, |
|
"grad_norm": 2.3255270246907203, |
|
"learning_rate": 4.909922596183822e-06, |
|
"loss": 0.0521, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 8.115107913669064, |
|
"grad_norm": 1.1435086086266115, |
|
"learning_rate": 4.876285878715764e-06, |
|
"loss": 0.0157, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 8.129496402877697, |
|
"grad_norm": 3.7825947186999436, |
|
"learning_rate": 4.842727585970284e-06, |
|
"loss": 0.0393, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 8.14388489208633, |
|
"grad_norm": 3.3170824479840797, |
|
"learning_rate": 4.8092482315981685e-06, |
|
"loss": 0.0507, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 8.158273381294965, |
|
"grad_norm": 4.7246048323427265, |
|
"learning_rate": 4.775848328041956e-06, |
|
"loss": 0.0752, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.172661870503598, |
|
"grad_norm": 2.8604995363327896, |
|
"learning_rate": 4.742528386528094e-06, |
|
"loss": 0.0447, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 8.18705035971223, |
|
"grad_norm": 4.122397781085086, |
|
"learning_rate": 4.709288917059118e-06, |
|
"loss": 0.0571, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 8.201438848920864, |
|
"grad_norm": 3.8083495479732816, |
|
"learning_rate": 4.676130428405834e-06, |
|
"loss": 0.0494, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.215827338129497, |
|
"grad_norm": 3.48317844967832, |
|
"learning_rate": 4.643053428099538e-06, |
|
"loss": 0.0841, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 8.23021582733813, |
|
"grad_norm": 1.551504859564384, |
|
"learning_rate": 4.610058422424249e-06, |
|
"loss": 0.0375, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 8.244604316546763, |
|
"grad_norm": 1.9477471961254322, |
|
"learning_rate": 4.577145916408955e-06, |
|
"loss": 0.0257, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 8.258992805755396, |
|
"grad_norm": 3.2335530341856886, |
|
"learning_rate": 4.544316413819888e-06, |
|
"loss": 0.075, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 8.273381294964029, |
|
"grad_norm": 1.0232024159790356, |
|
"learning_rate": 4.5115704171528105e-06, |
|
"loss": 0.026, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.287769784172662, |
|
"grad_norm": 2.8739397189315956, |
|
"learning_rate": 4.478908427625323e-06, |
|
"loss": 0.0409, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 8.302158273381295, |
|
"grad_norm": 7.820713750474336, |
|
"learning_rate": 4.446330945169197e-06, |
|
"loss": 0.0697, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 8.316546762589928, |
|
"grad_norm": 1.4559489461697241, |
|
"learning_rate": 4.41383846842272e-06, |
|
"loss": 0.0338, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 8.33093525179856, |
|
"grad_norm": 1.6200703776809549, |
|
"learning_rate": 4.381431494723056e-06, |
|
"loss": 0.0453, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 8.345323741007194, |
|
"grad_norm": 2.462591590876853, |
|
"learning_rate": 4.349110520098644e-06, |
|
"loss": 0.0452, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.359712230215827, |
|
"grad_norm": 2.4958974429500675, |
|
"learning_rate": 4.31687603926161e-06, |
|
"loss": 0.0419, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 8.37410071942446, |
|
"grad_norm": 3.3620333990871414, |
|
"learning_rate": 4.284728545600174e-06, |
|
"loss": 0.0918, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.388489208633093, |
|
"grad_norm": 2.076920143609935, |
|
"learning_rate": 4.252668531171117e-06, |
|
"loss": 0.0333, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 8.402877697841726, |
|
"grad_norm": 11.337846192838462, |
|
"learning_rate": 4.220696486692241e-06, |
|
"loss": 0.0809, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 8.417266187050359, |
|
"grad_norm": 1.4691851543426133, |
|
"learning_rate": 4.18881290153486e-06, |
|
"loss": 0.0297, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 8.431654676258994, |
|
"grad_norm": 2.322343633696552, |
|
"learning_rate": 4.1570182637163155e-06, |
|
"loss": 0.041, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 8.446043165467627, |
|
"grad_norm": 1.857573901571014, |
|
"learning_rate": 4.125313059892494e-06, |
|
"loss": 0.026, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 8.46043165467626, |
|
"grad_norm": 2.420406989345141, |
|
"learning_rate": 4.093697775350388e-06, |
|
"loss": 0.0425, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 8.474820143884893, |
|
"grad_norm": 2.528933273598754, |
|
"learning_rate": 4.062172894000664e-06, |
|
"loss": 0.0194, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 8.489208633093526, |
|
"grad_norm": 4.851880404918257, |
|
"learning_rate": 4.0307388983702555e-06, |
|
"loss": 0.0456, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.503597122302159, |
|
"grad_norm": 1.0566440356278923, |
|
"learning_rate": 3.9993962695949865e-06, |
|
"loss": 0.037, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 8.517985611510792, |
|
"grad_norm": 0.9342625319695776, |
|
"learning_rate": 3.9681454874121905e-06, |
|
"loss": 0.0246, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 8.532374100719425, |
|
"grad_norm": 1.404658963816671, |
|
"learning_rate": 3.9369870301533785e-06, |
|
"loss": 0.0323, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 8.546762589928058, |
|
"grad_norm": 2.8421121591274474, |
|
"learning_rate": 3.905921374736919e-06, |
|
"loss": 0.042, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 8.56115107913669, |
|
"grad_norm": 1.665249679691359, |
|
"learning_rate": 3.87494899666073e-06, |
|
"loss": 0.0471, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 8.575539568345324, |
|
"grad_norm": 3.150615281085969, |
|
"learning_rate": 3.844070369995008e-06, |
|
"loss": 0.0592, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 8.589928057553957, |
|
"grad_norm": 2.1657965045021093, |
|
"learning_rate": 3.8132859673749688e-06, |
|
"loss": 0.0335, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 8.60431654676259, |
|
"grad_norm": 0.9753064067607254, |
|
"learning_rate": 3.7825962599936117e-06, |
|
"loss": 0.0173, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 8.618705035971223, |
|
"grad_norm": 1.6281860386953595, |
|
"learning_rate": 3.7520017175945168e-06, |
|
"loss": 0.0327, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 8.633093525179856, |
|
"grad_norm": 1.3250219320378607, |
|
"learning_rate": 3.7215028084646385e-06, |
|
"loss": 0.0389, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.647482014388489, |
|
"grad_norm": 2.663488081521059, |
|
"learning_rate": 3.691099999427152e-06, |
|
"loss": 0.0451, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 8.661870503597122, |
|
"grad_norm": 1.0684718799506963, |
|
"learning_rate": 3.6607937558342975e-06, |
|
"loss": 0.0227, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 8.676258992805755, |
|
"grad_norm": 1.8713141278138534, |
|
"learning_rate": 3.6305845415602726e-06, |
|
"loss": 0.0324, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 8.690647482014388, |
|
"grad_norm": 1.959513645334475, |
|
"learning_rate": 3.6004728189941142e-06, |
|
"loss": 0.0483, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 8.70503597122302, |
|
"grad_norm": 8.273881877761735, |
|
"learning_rate": 3.5704590490326298e-06, |
|
"loss": 0.0701, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 8.719424460431654, |
|
"grad_norm": 1.932144875971402, |
|
"learning_rate": 3.5405436910733437e-06, |
|
"loss": 0.0412, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 8.733812949640289, |
|
"grad_norm": 7.650174976201004, |
|
"learning_rate": 3.5107272030074626e-06, |
|
"loss": 0.0525, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 8.748201438848922, |
|
"grad_norm": 2.482410934643446, |
|
"learning_rate": 3.4810100412128743e-06, |
|
"loss": 0.0447, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 8.762589928057555, |
|
"grad_norm": 1.7887809194693534, |
|
"learning_rate": 3.4513926605471504e-06, |
|
"loss": 0.0334, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 8.776978417266188, |
|
"grad_norm": 2.262589027477362, |
|
"learning_rate": 3.421875514340589e-06, |
|
"loss": 0.0438, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.79136690647482, |
|
"grad_norm": 5.096455221790141, |
|
"learning_rate": 3.392459054389281e-06, |
|
"loss": 0.0589, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 8.805755395683454, |
|
"grad_norm": 1.7161312058815825, |
|
"learning_rate": 3.3631437309481853e-06, |
|
"loss": 0.0261, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 8.820143884892087, |
|
"grad_norm": 2.941763984376986, |
|
"learning_rate": 3.333929992724253e-06, |
|
"loss": 0.0576, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 8.83453237410072, |
|
"grad_norm": 3.006151368731597, |
|
"learning_rate": 3.30481828686954e-06, |
|
"loss": 0.0443, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 8.848920863309353, |
|
"grad_norm": 1.6800648366968625, |
|
"learning_rate": 3.275809058974373e-06, |
|
"loss": 0.0307, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 8.863309352517986, |
|
"grad_norm": 0.8243738116908417, |
|
"learning_rate": 3.2469027530605255e-06, |
|
"loss": 0.0184, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 8.877697841726619, |
|
"grad_norm": 2.9566481267485365, |
|
"learning_rate": 3.2180998115744387e-06, |
|
"loss": 0.0373, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 8.892086330935252, |
|
"grad_norm": 1.9637756364219565, |
|
"learning_rate": 3.1894006753804143e-06, |
|
"loss": 0.0414, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 8.906474820143885, |
|
"grad_norm": 2.3021615520028984, |
|
"learning_rate": 3.1608057837538976e-06, |
|
"loss": 0.0423, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 8.920863309352518, |
|
"grad_norm": 2.145448976885504, |
|
"learning_rate": 3.1323155743747393e-06, |
|
"loss": 0.0404, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.93525179856115, |
|
"grad_norm": 2.6838866265175287, |
|
"learning_rate": 3.1039304833205073e-06, |
|
"loss": 0.042, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 8.949640287769784, |
|
"grad_norm": 0.9803533745198019, |
|
"learning_rate": 3.075650945059799e-06, |
|
"loss": 0.028, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 8.964028776978417, |
|
"grad_norm": 6.015138805293879, |
|
"learning_rate": 3.047477392445596e-06, |
|
"loss": 0.0469, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 8.97841726618705, |
|
"grad_norm": 2.47190153965276, |
|
"learning_rate": 3.019410256708637e-06, |
|
"loss": 0.0699, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 8.992805755395683, |
|
"grad_norm": 1.6888494123739861, |
|
"learning_rate": 2.9914499674508337e-06, |
|
"loss": 0.0352, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 9.007194244604317, |
|
"grad_norm": 0.8291369253401952, |
|
"learning_rate": 2.9635969526386665e-06, |
|
"loss": 0.0173, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 9.02158273381295, |
|
"grad_norm": 3.1456360877961043, |
|
"learning_rate": 2.935851638596655e-06, |
|
"loss": 0.0445, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 9.035971223021583, |
|
"grad_norm": 2.3237289901109754, |
|
"learning_rate": 2.908214450000828e-06, |
|
"loss": 0.0392, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 9.050359712230216, |
|
"grad_norm": 2.1054179088541547, |
|
"learning_rate": 2.8806858098722155e-06, |
|
"loss": 0.0585, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 9.06474820143885, |
|
"grad_norm": 1.2969196699460492, |
|
"learning_rate": 2.853266139570391e-06, |
|
"loss": 0.0208, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.079136690647482, |
|
"grad_norm": 0.9970140027989338, |
|
"learning_rate": 2.825955858787002e-06, |
|
"loss": 0.0183, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 9.093525179856115, |
|
"grad_norm": 1.046038196325873, |
|
"learning_rate": 2.798755385539358e-06, |
|
"loss": 0.0196, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 9.107913669064748, |
|
"grad_norm": 1.9785975611671889, |
|
"learning_rate": 2.7716651361640277e-06, |
|
"loss": 0.046, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 9.122302158273381, |
|
"grad_norm": 2.076469916316301, |
|
"learning_rate": 2.7446855253104775e-06, |
|
"loss": 0.035, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 9.136690647482014, |
|
"grad_norm": 0.996431472737031, |
|
"learning_rate": 2.717816965934705e-06, |
|
"loss": 0.0377, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 9.151079136690647, |
|
"grad_norm": 4.566127623498886, |
|
"learning_rate": 2.6910598692929323e-06, |
|
"loss": 0.0767, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 9.16546762589928, |
|
"grad_norm": 12.39526382129473, |
|
"learning_rate": 2.6644146449353103e-06, |
|
"loss": 0.1713, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 9.179856115107913, |
|
"grad_norm": 2.4799629481553724, |
|
"learning_rate": 2.6378817006996393e-06, |
|
"loss": 0.0314, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 9.194244604316546, |
|
"grad_norm": 8.085625915606265, |
|
"learning_rate": 2.611461442705152e-06, |
|
"loss": 0.051, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 9.20863309352518, |
|
"grad_norm": 3.9263706221680685, |
|
"learning_rate": 2.5851542753462612e-06, |
|
"loss": 0.0521, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.223021582733812, |
|
"grad_norm": 2.226548600355445, |
|
"learning_rate": 2.5589606012863968e-06, |
|
"loss": 0.0332, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 9.237410071942445, |
|
"grad_norm": 1.255383022545736, |
|
"learning_rate": 2.532880821451833e-06, |
|
"loss": 0.0248, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 9.251798561151078, |
|
"grad_norm": 5.241468552575159, |
|
"learning_rate": 2.5069153350255617e-06, |
|
"loss": 0.0544, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 9.266187050359711, |
|
"grad_norm": 1.3999294797764177, |
|
"learning_rate": 2.4810645394411636e-06, |
|
"loss": 0.0284, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 9.280575539568344, |
|
"grad_norm": 1.7303921026902358, |
|
"learning_rate": 2.455328830376741e-06, |
|
"loss": 0.0212, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 9.29496402877698, |
|
"grad_norm": 3.2404444332120868, |
|
"learning_rate": 2.429708601748849e-06, |
|
"loss": 0.0698, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 9.309352517985612, |
|
"grad_norm": 1.553700541641244, |
|
"learning_rate": 2.4042042457064863e-06, |
|
"loss": 0.0389, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 9.323741007194245, |
|
"grad_norm": 2.570999782342014, |
|
"learning_rate": 2.3788161526250677e-06, |
|
"loss": 0.034, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 9.338129496402878, |
|
"grad_norm": 1.4441708855755897, |
|
"learning_rate": 2.3535447111004662e-06, |
|
"loss": 0.0318, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 9.352517985611511, |
|
"grad_norm": 2.9286217319244456, |
|
"learning_rate": 2.3283903079430582e-06, |
|
"loss": 0.0426, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.366906474820144, |
|
"grad_norm": 2.3158759616027758, |
|
"learning_rate": 2.3033533281718036e-06, |
|
"loss": 0.04, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 9.381294964028777, |
|
"grad_norm": 5.000706423916787, |
|
"learning_rate": 2.2784341550083577e-06, |
|
"loss": 0.0778, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 9.39568345323741, |
|
"grad_norm": 3.1672796078263667, |
|
"learning_rate": 2.253633169871198e-06, |
|
"loss": 0.0477, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 9.410071942446043, |
|
"grad_norm": 2.936168504993618, |
|
"learning_rate": 2.2289507523697894e-06, |
|
"loss": 0.0355, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 9.424460431654676, |
|
"grad_norm": 4.144038176169699, |
|
"learning_rate": 2.204387280298772e-06, |
|
"loss": 0.0636, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 9.43884892086331, |
|
"grad_norm": 3.317961055103226, |
|
"learning_rate": 2.1799431296321883e-06, |
|
"loss": 0.0254, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 9.453237410071942, |
|
"grad_norm": 2.0945507810139166, |
|
"learning_rate": 2.155618674517711e-06, |
|
"loss": 0.0396, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 9.467625899280575, |
|
"grad_norm": 2.1490741031658755, |
|
"learning_rate": 2.131414287270931e-06, |
|
"loss": 0.0351, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 9.482014388489208, |
|
"grad_norm": 1.170394818981662, |
|
"learning_rate": 2.107330338369652e-06, |
|
"loss": 0.0223, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 9.496402877697841, |
|
"grad_norm": 1.9060637808073648, |
|
"learning_rate": 2.083367196448219e-06, |
|
"loss": 0.0314, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.510791366906474, |
|
"grad_norm": 2.682470653382018, |
|
"learning_rate": 2.0595252282918875e-06, |
|
"loss": 0.0311, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 9.525179856115107, |
|
"grad_norm": 2.824810882470908, |
|
"learning_rate": 2.0358047988311857e-06, |
|
"loss": 0.0408, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 9.53956834532374, |
|
"grad_norm": 1.8783543488553889, |
|
"learning_rate": 2.012206271136353e-06, |
|
"loss": 0.0316, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 9.553956834532373, |
|
"grad_norm": 1.277908232236526, |
|
"learning_rate": 1.988730006411769e-06, |
|
"loss": 0.0227, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 9.568345323741006, |
|
"grad_norm": 2.3170498768091763, |
|
"learning_rate": 1.9653763639904333e-06, |
|
"loss": 0.0324, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 9.582733812949641, |
|
"grad_norm": 1.4282734057780846, |
|
"learning_rate": 1.942145701328456e-06, |
|
"loss": 0.0282, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 9.597122302158274, |
|
"grad_norm": 2.7421058903271676, |
|
"learning_rate": 1.9190383739995933e-06, |
|
"loss": 0.0433, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 9.611510791366907, |
|
"grad_norm": 0.5131708654352847, |
|
"learning_rate": 1.8960547356897997e-06, |
|
"loss": 0.0161, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 9.62589928057554, |
|
"grad_norm": 2.0530874650800106, |
|
"learning_rate": 1.8731951381918257e-06, |
|
"loss": 0.0427, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 9.640287769784173, |
|
"grad_norm": 1.8051899893158643, |
|
"learning_rate": 1.8504599313998196e-06, |
|
"loss": 0.0218, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.654676258992806, |
|
"grad_norm": 0.7906143024517177, |
|
"learning_rate": 1.8278494633039756e-06, |
|
"loss": 0.0195, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 9.66906474820144, |
|
"grad_norm": 3.283281196176163, |
|
"learning_rate": 1.8053640799852134e-06, |
|
"loss": 0.0279, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 9.683453237410072, |
|
"grad_norm": 1.4837554695301376, |
|
"learning_rate": 1.783004125609873e-06, |
|
"loss": 0.0303, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 9.697841726618705, |
|
"grad_norm": 3.9513747658965364, |
|
"learning_rate": 1.7607699424244583e-06, |
|
"loss": 0.0453, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 9.712230215827338, |
|
"grad_norm": 2.932528943145533, |
|
"learning_rate": 1.7386618707503822e-06, |
|
"loss": 0.0551, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.726618705035971, |
|
"grad_norm": 1.8691452428430109, |
|
"learning_rate": 1.7166802489787704e-06, |
|
"loss": 0.0288, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 9.741007194244604, |
|
"grad_norm": 2.267950496543606, |
|
"learning_rate": 1.6948254135652764e-06, |
|
"loss": 0.0471, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 9.755395683453237, |
|
"grad_norm": 3.362001877990058, |
|
"learning_rate": 1.673097699024938e-06, |
|
"loss": 0.0443, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 9.76978417266187, |
|
"grad_norm": 0.6960568252828435, |
|
"learning_rate": 1.6514974379270465e-06, |
|
"loss": 0.0143, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 9.784172661870503, |
|
"grad_norm": 1.5401636007060717, |
|
"learning_rate": 1.6300249608900654e-06, |
|
"loss": 0.0318, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.798561151079136, |
|
"grad_norm": 2.2596630157728765, |
|
"learning_rate": 1.608680596576563e-06, |
|
"loss": 0.0332, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 9.81294964028777, |
|
"grad_norm": 1.1661531548368926, |
|
"learning_rate": 1.587464671688187e-06, |
|
"loss": 0.0187, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 9.827338129496402, |
|
"grad_norm": 1.4871849727237823, |
|
"learning_rate": 1.5663775109606682e-06, |
|
"loss": 0.0283, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 9.841726618705035, |
|
"grad_norm": 0.9194790607066714, |
|
"learning_rate": 1.5454194371588383e-06, |
|
"loss": 0.0166, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 9.85611510791367, |
|
"grad_norm": 2.308580159255162, |
|
"learning_rate": 1.5245907710716912e-06, |
|
"loss": 0.0349, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 9.870503597122303, |
|
"grad_norm": 2.314639880080365, |
|
"learning_rate": 1.5038918315074825e-06, |
|
"loss": 0.0281, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 9.884892086330936, |
|
"grad_norm": 1.619927122217182, |
|
"learning_rate": 1.48332293528885e-06, |
|
"loss": 0.0575, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 9.899280575539569, |
|
"grad_norm": 1.5455675444988652, |
|
"learning_rate": 1.462884397247949e-06, |
|
"loss": 0.0281, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 9.913669064748202, |
|
"grad_norm": 1.496371665596201, |
|
"learning_rate": 1.4425765302216467e-06, |
|
"loss": 0.0262, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 9.928057553956835, |
|
"grad_norm": 0.6246401749746331, |
|
"learning_rate": 1.4223996450467291e-06, |
|
"loss": 0.0155, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.942446043165468, |
|
"grad_norm": 10.244899114128767, |
|
"learning_rate": 1.4023540505551514e-06, |
|
"loss": 0.0584, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 9.956834532374101, |
|
"grad_norm": 3.578140786106804, |
|
"learning_rate": 1.382440053569295e-06, |
|
"loss": 0.0403, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 9.971223021582734, |
|
"grad_norm": 1.6841621634178627, |
|
"learning_rate": 1.3626579588972843e-06, |
|
"loss": 0.0347, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 9.985611510791367, |
|
"grad_norm": 1.6886542091112717, |
|
"learning_rate": 1.3430080693283176e-06, |
|
"loss": 0.0252, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.1884226287968, |
|
"learning_rate": 1.3234906856280272e-06, |
|
"loss": 0.0289, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 10.014388489208633, |
|
"grad_norm": 1.8465342865749588, |
|
"learning_rate": 1.30410610653389e-06, |
|
"loss": 0.0317, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 10.028776978417266, |
|
"grad_norm": 1.6653664187493198, |
|
"learning_rate": 1.2848546287506392e-06, |
|
"loss": 0.0327, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 10.043165467625899, |
|
"grad_norm": 2.716731042561852, |
|
"learning_rate": 1.2657365469457295e-06, |
|
"loss": 0.0356, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 10.057553956834532, |
|
"grad_norm": 0.5749921009255655, |
|
"learning_rate": 1.2467521537448258e-06, |
|
"loss": 0.0129, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 10.071942446043165, |
|
"grad_norm": 1.6695913891538687, |
|
"learning_rate": 1.227901739727332e-06, |
|
"loss": 0.0386, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.086330935251798, |
|
"grad_norm": 1.480135248408385, |
|
"learning_rate": 1.2091855934219289e-06, |
|
"loss": 0.0325, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 10.100719424460431, |
|
"grad_norm": 1.3476082037635144, |
|
"learning_rate": 1.1906040013021668e-06, |
|
"loss": 0.0143, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 10.115107913669064, |
|
"grad_norm": 1.0352529590879136, |
|
"learning_rate": 1.172157247782083e-06, |
|
"loss": 0.0198, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 10.129496402877697, |
|
"grad_norm": 1.1278501275644894, |
|
"learning_rate": 1.1538456152118394e-06, |
|
"loss": 0.0207, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 10.14388489208633, |
|
"grad_norm": 3.9502198656971963, |
|
"learning_rate": 1.1356693838734134e-06, |
|
"loss": 0.0514, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 10.158273381294965, |
|
"grad_norm": 1.530725864660551, |
|
"learning_rate": 1.1176288319762963e-06, |
|
"loss": 0.0221, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 10.172661870503598, |
|
"grad_norm": 2.2735261159620723, |
|
"learning_rate": 1.0997242356532335e-06, |
|
"loss": 0.0466, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 10.18705035971223, |
|
"grad_norm": 1.0618045257756556, |
|
"learning_rate": 1.0819558689560162e-06, |
|
"loss": 0.0227, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 10.201438848920864, |
|
"grad_norm": 1.6182455175606794, |
|
"learning_rate": 1.0643240038512648e-06, |
|
"loss": 0.0231, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 10.215827338129497, |
|
"grad_norm": 8.00225455212014, |
|
"learning_rate": 1.0468289102162788e-06, |
|
"loss": 0.0681, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.23021582733813, |
|
"grad_norm": 2.591497264989186, |
|
"learning_rate": 1.0294708558349031e-06, |
|
"loss": 0.0251, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 10.244604316546763, |
|
"grad_norm": 1.7149356695320543, |
|
"learning_rate": 1.0122501063934266e-06, |
|
"loss": 0.0304, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 10.258992805755396, |
|
"grad_norm": 2.487093456558291, |
|
"learning_rate": 9.951669254765227e-07, |
|
"loss": 0.0316, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 10.273381294964029, |
|
"grad_norm": 1.8372925491948908, |
|
"learning_rate": 9.782215745632063e-07, |
|
"loss": 0.0244, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 10.287769784172662, |
|
"grad_norm": 1.6522089041208652, |
|
"learning_rate": 9.614143130228336e-07, |
|
"loss": 0.0213, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 10.302158273381295, |
|
"grad_norm": 1.388640294189459, |
|
"learning_rate": 9.447453981111377e-07, |
|
"loss": 0.022, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 10.316546762589928, |
|
"grad_norm": 1.8567918043310965, |
|
"learning_rate": 9.282150849662841e-07, |
|
"loss": 0.0277, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 10.33093525179856, |
|
"grad_norm": 1.7484564597114693, |
|
"learning_rate": 9.118236266049707e-07, |
|
"loss": 0.04, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 10.345323741007194, |
|
"grad_norm": 1.3472624690293522, |
|
"learning_rate": 8.955712739185529e-07, |
|
"loss": 0.0284, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 10.359712230215827, |
|
"grad_norm": 2.569862357797179, |
|
"learning_rate": 8.794582756691994e-07, |
|
"loss": 0.0487, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.37410071942446, |
|
"grad_norm": 1.1341008302050821, |
|
"learning_rate": 8.634848784860916e-07, |
|
"loss": 0.0259, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 10.388489208633093, |
|
"grad_norm": 3.9556296920276326, |
|
"learning_rate": 8.476513268616471e-07, |
|
"loss": 0.0303, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 10.402877697841726, |
|
"grad_norm": 1.0067776542085125, |
|
"learning_rate": 8.319578631477731e-07, |
|
"loss": 0.0215, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 10.417266187050359, |
|
"grad_norm": 1.5246834403644955, |
|
"learning_rate": 8.164047275521614e-07, |
|
"loss": 0.0279, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 10.431654676258994, |
|
"grad_norm": 1.5007545531516848, |
|
"learning_rate": 8.00992158134607e-07, |
|
"loss": 0.0371, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 10.446043165467627, |
|
"grad_norm": 0.962027429251609, |
|
"learning_rate": 7.857203908033684e-07, |
|
"loss": 0.026, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 10.46043165467626, |
|
"grad_norm": 1.2598114860067426, |
|
"learning_rate": 7.705896593115614e-07, |
|
"loss": 0.0275, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 10.474820143884893, |
|
"grad_norm": 1.5601411747424254, |
|
"learning_rate": 7.556001952535697e-07, |
|
"loss": 0.0336, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 10.489208633093526, |
|
"grad_norm": 2.7709712616630626, |
|
"learning_rate": 7.40752228061502e-07, |
|
"loss": 0.0355, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 10.503597122302159, |
|
"grad_norm": 4.058223338059946, |
|
"learning_rate": 7.260459850016932e-07, |
|
"loss": 0.0587, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.517985611510792, |
|
"grad_norm": 0.8541524082485146, |
|
"learning_rate": 7.114816911712131e-07, |
|
"loss": 0.0137, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 10.532374100719425, |
|
"grad_norm": 2.2965443467696542, |
|
"learning_rate": 6.970595694944215e-07, |
|
"loss": 0.0441, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 10.546762589928058, |
|
"grad_norm": 4.196670787721921, |
|
"learning_rate": 6.827798407195629e-07, |
|
"loss": 0.0284, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 10.56115107913669, |
|
"grad_norm": 1.1707179381668629, |
|
"learning_rate": 6.686427234153814e-07, |
|
"loss": 0.0277, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 10.575539568345324, |
|
"grad_norm": 1.7237854874400753, |
|
"learning_rate": 6.546484339677817e-07, |
|
"loss": 0.0229, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 10.589928057553957, |
|
"grad_norm": 1.9963363458117016, |
|
"learning_rate": 6.407971865765095e-07, |
|
"loss": 0.0403, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 10.60431654676259, |
|
"grad_norm": 1.8475932957020715, |
|
"learning_rate": 6.270891932518775e-07, |
|
"loss": 0.0339, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 10.618705035971223, |
|
"grad_norm": 2.207865836762862, |
|
"learning_rate": 6.1352466381152e-07, |
|
"loss": 0.0206, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 10.633093525179856, |
|
"grad_norm": 1.5028628741315406, |
|
"learning_rate": 6.00103805877178e-07, |
|
"loss": 0.0279, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 10.647482014388489, |
|
"grad_norm": 1.6578850661820663, |
|
"learning_rate": 5.868268248715292e-07, |
|
"loss": 0.0458, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.661870503597122, |
|
"grad_norm": 2.8376855115850255, |
|
"learning_rate": 5.736939240150363e-07, |
|
"loss": 0.0357, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 10.676258992805755, |
|
"grad_norm": 2.5385576697329313, |
|
"learning_rate": 5.607053043228361e-07, |
|
"loss": 0.0331, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 10.690647482014388, |
|
"grad_norm": 1.2033464016461923, |
|
"learning_rate": 5.478611646016674e-07, |
|
"loss": 0.0207, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 10.70503597122302, |
|
"grad_norm": 0.981803438753484, |
|
"learning_rate": 5.35161701446828e-07, |
|
"loss": 0.0188, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 10.719424460431654, |
|
"grad_norm": 8.071336752025925, |
|
"learning_rate": 5.226071092391616e-07, |
|
"loss": 0.0645, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 10.733812949640289, |
|
"grad_norm": 2.41778659174354, |
|
"learning_rate": 5.101975801420844e-07, |
|
"loss": 0.04, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 10.748201438848922, |
|
"grad_norm": 0.641208094752535, |
|
"learning_rate": 4.979333040986434e-07, |
|
"loss": 0.0131, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 10.762589928057555, |
|
"grad_norm": 6.926642444043881, |
|
"learning_rate": 4.858144688286103e-07, |
|
"loss": 0.0465, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 10.776978417266188, |
|
"grad_norm": 2.1100789184063675, |
|
"learning_rate": 4.7384125982561035e-07, |
|
"loss": 0.0303, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 10.79136690647482, |
|
"grad_norm": 4.2180297630453305, |
|
"learning_rate": 4.6201386035427785e-07, |
|
"loss": 0.0313, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.805755395683454, |
|
"grad_norm": 2.0665863271791167, |
|
"learning_rate": 4.503324514474483e-07, |
|
"loss": 0.0274, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 10.820143884892087, |
|
"grad_norm": 1.6728982206562557, |
|
"learning_rate": 4.387972119034023e-07, |
|
"loss": 0.0307, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 10.83453237410072, |
|
"grad_norm": 3.4746815846649746, |
|
"learning_rate": 4.274083182831157e-07, |
|
"loss": 0.0355, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 10.848920863309353, |
|
"grad_norm": 1.8969392294440341, |
|
"learning_rate": 4.161659449075572e-07, |
|
"loss": 0.0343, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 10.863309352517986, |
|
"grad_norm": 2.7876842253949112, |
|
"learning_rate": 4.0507026385502747e-07, |
|
"loss": 0.0467, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 10.877697841726619, |
|
"grad_norm": 4.67027301314563, |
|
"learning_rate": 3.9412144495851845e-07, |
|
"loss": 0.0436, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 10.892086330935252, |
|
"grad_norm": 1.399538680812464, |
|
"learning_rate": 3.833196558031216e-07, |
|
"loss": 0.0217, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 10.906474820143885, |
|
"grad_norm": 1.7409190227121822, |
|
"learning_rate": 3.7266506172345507e-07, |
|
"loss": 0.0372, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 10.920863309352518, |
|
"grad_norm": 2.1365410486540974, |
|
"learning_rate": 3.621578258011338e-07, |
|
"loss": 0.0245, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 10.93525179856115, |
|
"grad_norm": 3.4216815696618257, |
|
"learning_rate": 3.517981088622768e-07, |
|
"loss": 0.0447, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 10.949640287769784, |
|
"grad_norm": 2.5234742535562695, |
|
"learning_rate": 3.4158606947504944e-07, |
|
"loss": 0.0397, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 10.964028776978417, |
|
"grad_norm": 4.126068958805298, |
|
"learning_rate": 3.3152186394722506e-07, |
|
"loss": 0.0538, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 10.97841726618705, |
|
"grad_norm": 2.2465535716930614, |
|
"learning_rate": 3.2160564632380043e-07, |
|
"loss": 0.0693, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 10.992805755395683, |
|
"grad_norm": 3.814910469078625, |
|
"learning_rate": 3.118375683846353e-07, |
|
"loss": 0.0713, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 11.007194244604317, |
|
"grad_norm": 0.870573413202881, |
|
"learning_rate": 3.022177796421322e-07, |
|
"loss": 0.0141, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 11.02158273381295, |
|
"grad_norm": 2.5693617992072033, |
|
"learning_rate": 2.9274642733894577e-07, |
|
"loss": 0.0341, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 11.035971223021583, |
|
"grad_norm": 1.6022862996501166, |
|
"learning_rate": 2.834236564457271e-07, |
|
"loss": 0.0354, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 11.050359712230216, |
|
"grad_norm": 1.0198868538110064, |
|
"learning_rate": 2.742496096589076e-07, |
|
"loss": 0.0166, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 11.06474820143885, |
|
"grad_norm": 1.9386025908425748, |
|
"learning_rate": 2.652244273985127e-07, |
|
"loss": 0.0406, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 11.079136690647482, |
|
"grad_norm": 1.506641792856537, |
|
"learning_rate": 2.5634824780601753e-07, |
|
"loss": 0.0215, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.093525179856115, |
|
"grad_norm": 2.8974005417112054, |
|
"learning_rate": 2.4762120674222456e-07, |
|
"loss": 0.0337, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 11.107913669064748, |
|
"grad_norm": 3.2363471525554632, |
|
"learning_rate": 2.390434377851925e-07, |
|
"loss": 0.0659, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 11.122302158273381, |
|
"grad_norm": 1.7250018435406151, |
|
"learning_rate": 2.3061507222818303e-07, |
|
"loss": 0.0264, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 11.136690647482014, |
|
"grad_norm": 1.3716552652282374, |
|
"learning_rate": 2.2233623907765956e-07, |
|
"loss": 0.0301, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 11.151079136690647, |
|
"grad_norm": 4.081730598474069, |
|
"learning_rate": 2.1420706505130728e-07, |
|
"loss": 0.0296, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 11.16546762589928, |
|
"grad_norm": 2.1837875002466993, |
|
"learning_rate": 2.0622767457609384e-07, |
|
"loss": 0.0487, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 11.179856115107913, |
|
"grad_norm": 2.5403013467016016, |
|
"learning_rate": 1.983981897863685e-07, |
|
"loss": 0.0298, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 11.194244604316546, |
|
"grad_norm": 0.9102126378246074, |
|
"learning_rate": 1.9071873052198818e-07, |
|
"loss": 0.0198, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 11.20863309352518, |
|
"grad_norm": 1.3128849233164, |
|
"learning_rate": 1.8318941432648785e-07, |
|
"loss": 0.0193, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 11.223021582733812, |
|
"grad_norm": 1.4735468637747058, |
|
"learning_rate": 1.7581035644527623e-07, |
|
"loss": 0.0243, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.237410071942445, |
|
"grad_norm": 1.8021548024444698, |
|
"learning_rate": 1.6858166982387624e-07, |
|
"loss": 0.0205, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 11.251798561151078, |
|
"grad_norm": 2.25011481794677, |
|
"learning_rate": 1.6150346510619197e-07, |
|
"loss": 0.0401, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 11.266187050359711, |
|
"grad_norm": 3.4908449211884482, |
|
"learning_rate": 1.5457585063282322e-07, |
|
"loss": 0.0739, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 11.280575539568344, |
|
"grad_norm": 0.8061010306090391, |
|
"learning_rate": 1.4779893243939358e-07, |
|
"loss": 0.0143, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 11.29496402877698, |
|
"grad_norm": 1.6504383979055732, |
|
"learning_rate": 1.4117281425494178e-07, |
|
"loss": 0.0248, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 11.309352517985612, |
|
"grad_norm": 1.402157952033158, |
|
"learning_rate": 1.3469759750032508e-07, |
|
"loss": 0.023, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 11.323741007194245, |
|
"grad_norm": 2.645079301698734, |
|
"learning_rate": 1.2837338128666942e-07, |
|
"loss": 0.0647, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 11.338129496402878, |
|
"grad_norm": 0.7567449101065951, |
|
"learning_rate": 1.2220026241385296e-07, |
|
"loss": 0.0156, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 11.352517985611511, |
|
"grad_norm": 0.6465755222813994, |
|
"learning_rate": 1.1617833536902489e-07, |
|
"loss": 0.0179, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 11.366906474820144, |
|
"grad_norm": 1.8698065932207548, |
|
"learning_rate": 1.1030769232515559e-07, |
|
"loss": 0.023, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.381294964028777, |
|
"grad_norm": 1.7988268213081555, |
|
"learning_rate": 1.0458842313963102e-07, |
|
"loss": 0.0411, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 11.39568345323741, |
|
"grad_norm": 3.6957888652655444, |
|
"learning_rate": 9.902061535287278e-08, |
|
"loss": 0.0338, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 11.410071942446043, |
|
"grad_norm": 2.7446261934693443, |
|
"learning_rate": 9.360435418700131e-08, |
|
"loss": 0.0324, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 11.424460431654676, |
|
"grad_norm": 0.7757882042918355, |
|
"learning_rate": 8.83397225445315e-08, |
|
"loss": 0.0281, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 11.43884892086331, |
|
"grad_norm": 2.857395014664893, |
|
"learning_rate": 8.322680100710023e-08, |
|
"loss": 0.0336, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 11.453237410071942, |
|
"grad_norm": 0.8599695116705092, |
|
"learning_rate": 7.826566783423639e-08, |
|
"loss": 0.0153, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 11.467625899280575, |
|
"grad_norm": 1.2009242568158411, |
|
"learning_rate": 7.345639896216173e-08, |
|
"loss": 0.0218, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 11.482014388489208, |
|
"grad_norm": 1.8513393342942366, |
|
"learning_rate": 6.879906800262848e-08, |
|
"loss": 0.0248, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 11.496402877697841, |
|
"grad_norm": 1.2374041080258498, |
|
"learning_rate": 6.429374624179474e-08, |
|
"loss": 0.0187, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 11.510791366906474, |
|
"grad_norm": 1.493116059019553, |
|
"learning_rate": 5.994050263912976e-08, |
|
"loss": 0.0228, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.525179856115107, |
|
"grad_norm": 1.9511941731868687, |
|
"learning_rate": 5.573940382636145e-08, |
|
"loss": 0.05, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 11.53956834532374, |
|
"grad_norm": 1.6434577538884052, |
|
"learning_rate": 5.169051410645276e-08, |
|
"loss": 0.0183, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 11.553956834532373, |
|
"grad_norm": 3.362352075361728, |
|
"learning_rate": 4.7793895452623584e-08, |
|
"loss": 0.037, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 11.568345323741006, |
|
"grad_norm": 5.575378703603965, |
|
"learning_rate": 4.4049607507397066e-08, |
|
"loss": 0.0794, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 11.582733812949641, |
|
"grad_norm": 3.109823706873304, |
|
"learning_rate": 4.045770758168699e-08, |
|
"loss": 0.0317, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 11.597122302158274, |
|
"grad_norm": 2.2826191924472394, |
|
"learning_rate": 3.701825065392184e-08, |
|
"loss": 0.0293, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 11.611510791366907, |
|
"grad_norm": 2.018015360147881, |
|
"learning_rate": 3.3731289369206556e-08, |
|
"loss": 0.0398, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 11.62589928057554, |
|
"grad_norm": 8.047431870090547, |
|
"learning_rate": 3.059687403850986e-08, |
|
"loss": 0.1134, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 11.640287769784173, |
|
"grad_norm": 2.298672016403596, |
|
"learning_rate": 2.761505263789821e-08, |
|
"loss": 0.0243, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 11.654676258992806, |
|
"grad_norm": 2.5580107049006315, |
|
"learning_rate": 2.4785870807803036e-08, |
|
"loss": 0.0363, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.66906474820144, |
|
"grad_norm": 2.1820443354655548, |
|
"learning_rate": 2.2109371852317985e-08, |
|
"loss": 0.0321, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 11.683453237410072, |
|
"grad_norm": 1.8784576576635295, |
|
"learning_rate": 1.9585596738539436e-08, |
|
"loss": 0.0294, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 11.697841726618705, |
|
"grad_norm": 1.2733181144368895, |
|
"learning_rate": 1.7214584095937015e-08, |
|
"loss": 0.0322, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 11.712230215827338, |
|
"grad_norm": 1.4234884359583329, |
|
"learning_rate": 1.4996370215765165e-08, |
|
"loss": 0.0234, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 11.726618705035971, |
|
"grad_norm": 1.3429094276944762, |
|
"learning_rate": 1.2930989050504717e-08, |
|
"loss": 0.0166, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 11.741007194244604, |
|
"grad_norm": 0.9942863729919723, |
|
"learning_rate": 1.101847221334551e-08, |
|
"loss": 0.0178, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 11.755395683453237, |
|
"grad_norm": 4.8436461710096514, |
|
"learning_rate": 9.25884897770013e-09, |
|
"loss": 0.0356, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 11.76978417266187, |
|
"grad_norm": 3.3015502134325945, |
|
"learning_rate": 7.652146276759808e-09, |
|
"loss": 0.0656, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 11.784172661870503, |
|
"grad_norm": 1.6919061388445014, |
|
"learning_rate": 6.1983887030769855e-09, |
|
"loss": 0.0243, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 11.798561151079136, |
|
"grad_norm": 2.579249722184542, |
|
"learning_rate": 4.897598508192269e-09, |
|
"loss": 0.0411, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 11.81294964028777, |
|
"grad_norm": 1.8241696897466821, |
|
"learning_rate": 3.749795602294715e-09, |
|
"loss": 0.0294, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 11.827338129496402, |
|
"grad_norm": 2.10962756409833, |
|
"learning_rate": 2.7549975539120644e-09, |
|
"loss": 0.0307, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 11.841726618705035, |
|
"grad_norm": 2.127397019212184, |
|
"learning_rate": 1.9132195896498505e-09, |
|
"loss": 0.0287, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 11.85611510791367, |
|
"grad_norm": 1.7438775653570628, |
|
"learning_rate": 1.2244745939493651e-09, |
|
"loss": 0.023, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 11.870503597122303, |
|
"grad_norm": 1.7148926403499662, |
|
"learning_rate": 6.887731088978111e-10, |
|
"loss": 0.0321, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 11.884892086330936, |
|
"grad_norm": 3.3795126925432015, |
|
"learning_rate": 3.0612333406176976e-10, |
|
"loss": 0.0481, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 11.899280575539569, |
|
"grad_norm": 2.9388316113176565, |
|
"learning_rate": 7.65311263661861e-11, |
|
"loss": 0.0546, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 11.913669064748202, |
|
"grad_norm": 6.73693905281281, |
|
"learning_rate": 0.0, |
|
"loss": 0.0567, |
|
"step": 828 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 828, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 200.0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 418903697129472.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|