|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8013816925734024, |
|
"eval_steps": 869, |
|
"global_step": 1392, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005757052389176742, |
|
"grad_norm": 2.557003974914551, |
|
"learning_rate": 0.0, |
|
"loss": 5.4277, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005757052389176742, |
|
"eval_loss": 5.319709300994873, |
|
"eval_runtime": 1026.7022, |
|
"eval_samples_per_second": 2.496, |
|
"eval_steps_per_second": 2.496, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011514104778353484, |
|
"grad_norm": 2.985229969024658, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 5.7019, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0017271157167530224, |
|
"grad_norm": 3.0353081226348877, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 6.1934, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002302820955670697, |
|
"grad_norm": 3.724905490875244, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 5.4617, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0028785261945883708, |
|
"grad_norm": 2.6505627632141113, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 5.4285, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0034542314335060447, |
|
"grad_norm": 2.7363409996032715, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 5.8634, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004029936672423719, |
|
"grad_norm": 3.082538366317749, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 4.7461, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004605641911341394, |
|
"grad_norm": 9.095250129699707, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 7.5703, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0051813471502590676, |
|
"grad_norm": 2.2597923278808594, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 5.3631, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0057570523891767415, |
|
"grad_norm": 5.053525924682617, |
|
"learning_rate": 3.6e-06, |
|
"loss": 6.0132, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0063327576280944155, |
|
"grad_norm": 2.7407820224761963, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 5.9776, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0069084628670120895, |
|
"grad_norm": 2.4892263412475586, |
|
"learning_rate": 4.4e-06, |
|
"loss": 5.524, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007484168105929764, |
|
"grad_norm": 2.5302274227142334, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 5.8044, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.008059873344847437, |
|
"grad_norm": 2.992504358291626, |
|
"learning_rate": 5.2e-06, |
|
"loss": 6.0307, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008635578583765112, |
|
"grad_norm": 4.081608295440674, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 4.6732, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009211283822682787, |
|
"grad_norm": 2.33296799659729, |
|
"learning_rate": 6e-06, |
|
"loss": 4.6356, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00978698906160046, |
|
"grad_norm": 2.798452854156494, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 5.2941, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010362694300518135, |
|
"grad_norm": 2.290029525756836, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 4.9405, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010938399539435808, |
|
"grad_norm": 3.2164740562438965, |
|
"learning_rate": 7.2e-06, |
|
"loss": 5.6711, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.011514104778353483, |
|
"grad_norm": 2.4481987953186035, |
|
"learning_rate": 7.6e-06, |
|
"loss": 5.0366, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012089810017271158, |
|
"grad_norm": 3.398063898086548, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 5.9377, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.012665515256188831, |
|
"grad_norm": 2.3936686515808105, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 5.4237, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.013241220495106506, |
|
"grad_norm": 2.7233810424804688, |
|
"learning_rate": 8.8e-06, |
|
"loss": 5.6551, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.013816925734024179, |
|
"grad_norm": 2.9957566261291504, |
|
"learning_rate": 9.2e-06, |
|
"loss": 4.7701, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.014392630972941854, |
|
"grad_norm": 6.397132396697998, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 6.4459, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014968336211859529, |
|
"grad_norm": 3.0593409538269043, |
|
"learning_rate": 1e-05, |
|
"loss": 5.2758, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.015544041450777202, |
|
"grad_norm": 2.9723803997039795, |
|
"learning_rate": 1.04e-05, |
|
"loss": 5.6136, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.016119746689694875, |
|
"grad_norm": 2.03314471244812, |
|
"learning_rate": 1.08e-05, |
|
"loss": 5.3556, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01669545192861255, |
|
"grad_norm": 1.777107834815979, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 5.1061, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.017271157167530225, |
|
"grad_norm": 3.2192044258117676, |
|
"learning_rate": 1.16e-05, |
|
"loss": 5.2414, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017846862406447898, |
|
"grad_norm": 3.924452066421509, |
|
"learning_rate": 1.2e-05, |
|
"loss": 5.2754, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.018422567645365574, |
|
"grad_norm": 3.5611093044281006, |
|
"learning_rate": 1.24e-05, |
|
"loss": 5.2817, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.018998272884283247, |
|
"grad_norm": 2.5194263458251953, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 5.9063, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01957397812320092, |
|
"grad_norm": 2.403895854949951, |
|
"learning_rate": 1.32e-05, |
|
"loss": 5.1161, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.020149683362118594, |
|
"grad_norm": 2.496400833129883, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 5.3049, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02072538860103627, |
|
"grad_norm": 3.0970828533172607, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 5.5807, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.021301093839953943, |
|
"grad_norm": 3.941403388977051, |
|
"learning_rate": 1.44e-05, |
|
"loss": 6.0418, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.021876799078871616, |
|
"grad_norm": 2.291431188583374, |
|
"learning_rate": 1.48e-05, |
|
"loss": 4.3686, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.022452504317789293, |
|
"grad_norm": 2.783054828643799, |
|
"learning_rate": 1.52e-05, |
|
"loss": 5.15, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.023028209556706966, |
|
"grad_norm": 3.579267978668213, |
|
"learning_rate": 1.56e-05, |
|
"loss": 5.7507, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02360391479562464, |
|
"grad_norm": 3.5277323722839355, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 6.112, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.024179620034542316, |
|
"grad_norm": 2.5100817680358887, |
|
"learning_rate": 1.6400000000000002e-05, |
|
"loss": 5.2133, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02475532527345999, |
|
"grad_norm": 2.3821561336517334, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 6.0345, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.025331030512377662, |
|
"grad_norm": 3.0675108432769775, |
|
"learning_rate": 1.7199999999999998e-05, |
|
"loss": 5.2294, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.025906735751295335, |
|
"grad_norm": 2.8790383338928223, |
|
"learning_rate": 1.76e-05, |
|
"loss": 5.6393, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02648244099021301, |
|
"grad_norm": 3.3649141788482666, |
|
"learning_rate": 1.8e-05, |
|
"loss": 6.014, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.027058146229130685, |
|
"grad_norm": 3.4695286750793457, |
|
"learning_rate": 1.84e-05, |
|
"loss": 5.3457, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.027633851468048358, |
|
"grad_norm": 3.303622245788574, |
|
"learning_rate": 1.88e-05, |
|
"loss": 5.593, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.028209556706966035, |
|
"grad_norm": 2.481895923614502, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 5.1439, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.028785261945883708, |
|
"grad_norm": 2.888579845428467, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 4.6318, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02936096718480138, |
|
"grad_norm": 3.4528300762176514, |
|
"learning_rate": 2e-05, |
|
"loss": 5.0376, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.029936672423719057, |
|
"grad_norm": 3.6751370429992676, |
|
"learning_rate": 2.04e-05, |
|
"loss": 4.9183, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03051237766263673, |
|
"grad_norm": 3.382035970687866, |
|
"learning_rate": 2.08e-05, |
|
"loss": 5.499, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.031088082901554404, |
|
"grad_norm": 2.8802406787872314, |
|
"learning_rate": 2.12e-05, |
|
"loss": 5.3177, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03166378814047208, |
|
"grad_norm": 6.158539772033691, |
|
"learning_rate": 2.16e-05, |
|
"loss": 6.2133, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03223949337938975, |
|
"grad_norm": 2.599864959716797, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 5.3691, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03281519861830743, |
|
"grad_norm": 3.4526188373565674, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 5.3801, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0333909038572251, |
|
"grad_norm": 9.494807243347168, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 7.3116, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.033966609096142776, |
|
"grad_norm": 4.3456130027771, |
|
"learning_rate": 2.32e-05, |
|
"loss": 4.7467, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03454231433506045, |
|
"grad_norm": 3.8471431732177734, |
|
"learning_rate": 2.36e-05, |
|
"loss": 5.2742, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03511801957397812, |
|
"grad_norm": 3.985994815826416, |
|
"learning_rate": 2.4e-05, |
|
"loss": 5.4615, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.035693724812895795, |
|
"grad_norm": 9.588626861572266, |
|
"learning_rate": 2.44e-05, |
|
"loss": 6.8261, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03626943005181347, |
|
"grad_norm": 5.3343915939331055, |
|
"learning_rate": 2.48e-05, |
|
"loss": 6.0899, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03684513529073115, |
|
"grad_norm": 5.611617088317871, |
|
"learning_rate": 2.5200000000000003e-05, |
|
"loss": 6.4523, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03742084052964882, |
|
"grad_norm": 4.497012615203857, |
|
"learning_rate": 2.5600000000000002e-05, |
|
"loss": 4.787, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.037996545768566495, |
|
"grad_norm": 5.032821178436279, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 5.6337, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03857225100748417, |
|
"grad_norm": 3.732733726501465, |
|
"learning_rate": 2.64e-05, |
|
"loss": 5.5212, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.03914795624640184, |
|
"grad_norm": 4.3597517013549805, |
|
"learning_rate": 2.6800000000000004e-05, |
|
"loss": 4.647, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.039723661485319514, |
|
"grad_norm": 5.359225273132324, |
|
"learning_rate": 2.7200000000000004e-05, |
|
"loss": 5.7052, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04029936672423719, |
|
"grad_norm": 4.9161601066589355, |
|
"learning_rate": 2.7600000000000003e-05, |
|
"loss": 5.3191, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04087507196315487, |
|
"grad_norm": 4.137385368347168, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 5.1797, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04145077720207254, |
|
"grad_norm": 4.728359699249268, |
|
"learning_rate": 2.84e-05, |
|
"loss": 5.1125, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.042026482440990214, |
|
"grad_norm": 4.568793773651123, |
|
"learning_rate": 2.88e-05, |
|
"loss": 5.7705, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.04260218767990789, |
|
"grad_norm": 4.931026935577393, |
|
"learning_rate": 2.9199999999999998e-05, |
|
"loss": 5.1052, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04317789291882556, |
|
"grad_norm": 4.697461128234863, |
|
"learning_rate": 2.96e-05, |
|
"loss": 5.1404, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04375359815774323, |
|
"grad_norm": 6.393320083618164, |
|
"learning_rate": 3e-05, |
|
"loss": 6.2212, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04432930339666091, |
|
"grad_norm": 5.876922607421875, |
|
"learning_rate": 3.04e-05, |
|
"loss": 5.7775, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.044905008635578586, |
|
"grad_norm": 4.749701499938965, |
|
"learning_rate": 3.08e-05, |
|
"loss": 4.7321, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04548071387449626, |
|
"grad_norm": 4.894115447998047, |
|
"learning_rate": 3.12e-05, |
|
"loss": 5.2017, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04605641911341393, |
|
"grad_norm": 5.125804424285889, |
|
"learning_rate": 3.16e-05, |
|
"loss": 5.1661, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.046632124352331605, |
|
"grad_norm": 7.571075439453125, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 6.1439, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04720782959124928, |
|
"grad_norm": 4.469061374664307, |
|
"learning_rate": 3.24e-05, |
|
"loss": 5.1732, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04778353483016695, |
|
"grad_norm": 4.565371513366699, |
|
"learning_rate": 3.2800000000000004e-05, |
|
"loss": 5.4892, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.04835924006908463, |
|
"grad_norm": 5.844489097595215, |
|
"learning_rate": 3.32e-05, |
|
"loss": 5.875, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.048934945308002305, |
|
"grad_norm": 10.564720153808594, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 5.9008, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04951065054691998, |
|
"grad_norm": 6.923472881317139, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 5.4949, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.05008635578583765, |
|
"grad_norm": 6.902386665344238, |
|
"learning_rate": 3.4399999999999996e-05, |
|
"loss": 4.9801, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.050662061024755324, |
|
"grad_norm": 8.239148139953613, |
|
"learning_rate": 3.48e-05, |
|
"loss": 5.6578, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.051237766263673, |
|
"grad_norm": 6.162630081176758, |
|
"learning_rate": 3.52e-05, |
|
"loss": 4.9911, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05181347150259067, |
|
"grad_norm": 7.2612433433532715, |
|
"learning_rate": 3.56e-05, |
|
"loss": 5.7976, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05238917674150835, |
|
"grad_norm": 6.149419784545898, |
|
"learning_rate": 3.6e-05, |
|
"loss": 4.9756, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05296488198042602, |
|
"grad_norm": 7.4116106033325195, |
|
"learning_rate": 3.6400000000000004e-05, |
|
"loss": 5.5805, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0535405872193437, |
|
"grad_norm": 5.512300491333008, |
|
"learning_rate": 3.68e-05, |
|
"loss": 4.5575, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05411629245826137, |
|
"grad_norm": 14.799551963806152, |
|
"learning_rate": 3.72e-05, |
|
"loss": 5.2244, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05469199769717904, |
|
"grad_norm": 9.756938934326172, |
|
"learning_rate": 3.76e-05, |
|
"loss": 4.8444, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.055267702936096716, |
|
"grad_norm": 6.400147914886475, |
|
"learning_rate": 3.8e-05, |
|
"loss": 5.5091, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.055843408175014396, |
|
"grad_norm": 8.406181335449219, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 5.2641, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.05641911341393207, |
|
"grad_norm": 6.860042572021484, |
|
"learning_rate": 3.88e-05, |
|
"loss": 5.2917, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.05699481865284974, |
|
"grad_norm": 7.542653560638428, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 5.1584, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.057570523891767415, |
|
"grad_norm": 8.149137496948242, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 5.5326, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05814622913068509, |
|
"grad_norm": 5.590121269226074, |
|
"learning_rate": 4e-05, |
|
"loss": 5.2789, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.05872193436960276, |
|
"grad_norm": 7.877676010131836, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 4.8526, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.059297639608520435, |
|
"grad_norm": 5.773808479309082, |
|
"learning_rate": 4.08e-05, |
|
"loss": 5.033, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.059873344847438115, |
|
"grad_norm": 6.092824935913086, |
|
"learning_rate": 4.12e-05, |
|
"loss": 4.8936, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.06044905008635579, |
|
"grad_norm": 5.934675693511963, |
|
"learning_rate": 4.16e-05, |
|
"loss": 4.4764, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06102475532527346, |
|
"grad_norm": 5.622652530670166, |
|
"learning_rate": 4.2e-05, |
|
"loss": 5.1344, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.061600460564191134, |
|
"grad_norm": 7.697418212890625, |
|
"learning_rate": 4.24e-05, |
|
"loss": 5.2087, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.06217616580310881, |
|
"grad_norm": 5.204082489013672, |
|
"learning_rate": 4.2800000000000004e-05, |
|
"loss": 4.6294, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06275187104202648, |
|
"grad_norm": 6.288537979125977, |
|
"learning_rate": 4.32e-05, |
|
"loss": 5.3009, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06332757628094415, |
|
"grad_norm": 6.717288017272949, |
|
"learning_rate": 4.36e-05, |
|
"loss": 5.5392, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06390328151986183, |
|
"grad_norm": 5.432399272918701, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 4.3602, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0644789867587795, |
|
"grad_norm": 6.823062896728516, |
|
"learning_rate": 4.44e-05, |
|
"loss": 5.7343, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06505469199769717, |
|
"grad_norm": 6.532074928283691, |
|
"learning_rate": 4.4800000000000005e-05, |
|
"loss": 5.0605, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.06563039723661486, |
|
"grad_norm": 5.982126712799072, |
|
"learning_rate": 4.52e-05, |
|
"loss": 5.2182, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06620610247553253, |
|
"grad_norm": 5.759943962097168, |
|
"learning_rate": 4.5600000000000004e-05, |
|
"loss": 4.9098, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0667818077144502, |
|
"grad_norm": 5.147834300994873, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 4.8671, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06735751295336788, |
|
"grad_norm": 8.015042304992676, |
|
"learning_rate": 4.64e-05, |
|
"loss": 5.7445, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.06793321819228555, |
|
"grad_norm": 7.161843299865723, |
|
"learning_rate": 4.6800000000000006e-05, |
|
"loss": 5.9092, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06850892343120323, |
|
"grad_norm": 9.394163131713867, |
|
"learning_rate": 4.72e-05, |
|
"loss": 4.7243, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0690846286701209, |
|
"grad_norm": 4.96219539642334, |
|
"learning_rate": 4.76e-05, |
|
"loss": 4.7233, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06966033390903857, |
|
"grad_norm": 6.473387241363525, |
|
"learning_rate": 4.8e-05, |
|
"loss": 5.1295, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.07023603914795624, |
|
"grad_norm": 6.797422885894775, |
|
"learning_rate": 4.8400000000000004e-05, |
|
"loss": 4.7697, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.07081174438687392, |
|
"grad_norm": 6.656020641326904, |
|
"learning_rate": 4.88e-05, |
|
"loss": 5.2377, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.07138744962579159, |
|
"grad_norm": 5.552718639373779, |
|
"learning_rate": 4.92e-05, |
|
"loss": 4.4741, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07196315486470926, |
|
"grad_norm": 6.101820468902588, |
|
"learning_rate": 4.96e-05, |
|
"loss": 4.4192, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07253886010362694, |
|
"grad_norm": 7.695935249328613, |
|
"learning_rate": 5e-05, |
|
"loss": 5.4128, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07311456534254462, |
|
"grad_norm": 6.9946208000183105, |
|
"learning_rate": 5.0400000000000005e-05, |
|
"loss": 5.4829, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0736902705814623, |
|
"grad_norm": 16.10480308532715, |
|
"learning_rate": 5.08e-05, |
|
"loss": 4.6945, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.07426597582037997, |
|
"grad_norm": 5.313148021697998, |
|
"learning_rate": 5.1200000000000004e-05, |
|
"loss": 4.2429, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07484168105929764, |
|
"grad_norm": 5.506260871887207, |
|
"learning_rate": 5.16e-05, |
|
"loss": 4.7241, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07541738629821532, |
|
"grad_norm": 5.655925273895264, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 5.4156, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07599309153713299, |
|
"grad_norm": 6.528857231140137, |
|
"learning_rate": 5.2400000000000007e-05, |
|
"loss": 5.3606, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07656879677605066, |
|
"grad_norm": 5.360299110412598, |
|
"learning_rate": 5.28e-05, |
|
"loss": 5.0686, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.07714450201496834, |
|
"grad_norm": 5.301785945892334, |
|
"learning_rate": 5.3200000000000006e-05, |
|
"loss": 4.845, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07772020725388601, |
|
"grad_norm": 4.986385345458984, |
|
"learning_rate": 5.360000000000001e-05, |
|
"loss": 5.1493, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07829591249280368, |
|
"grad_norm": 5.200460433959961, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 4.781, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07887161773172136, |
|
"grad_norm": 7.154032230377197, |
|
"learning_rate": 5.440000000000001e-05, |
|
"loss": 5.8801, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.07944732297063903, |
|
"grad_norm": 4.641168117523193, |
|
"learning_rate": 5.4800000000000004e-05, |
|
"loss": 5.1929, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0800230282095567, |
|
"grad_norm": 4.8809123039245605, |
|
"learning_rate": 5.520000000000001e-05, |
|
"loss": 5.0221, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.08059873344847437, |
|
"grad_norm": 5.0507402420043945, |
|
"learning_rate": 5.560000000000001e-05, |
|
"loss": 4.8543, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08117443868739206, |
|
"grad_norm": 6.459733963012695, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 5.051, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.08175014392630973, |
|
"grad_norm": 6.107847690582275, |
|
"learning_rate": 5.6399999999999995e-05, |
|
"loss": 4.8338, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08232584916522741, |
|
"grad_norm": 6.28361701965332, |
|
"learning_rate": 5.68e-05, |
|
"loss": 5.1373, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08290155440414508, |
|
"grad_norm": 4.957414627075195, |
|
"learning_rate": 5.72e-05, |
|
"loss": 4.8154, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08347725964306275, |
|
"grad_norm": 4.774332046508789, |
|
"learning_rate": 5.76e-05, |
|
"loss": 4.7262, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08405296488198043, |
|
"grad_norm": 7.41762113571167, |
|
"learning_rate": 5.8e-05, |
|
"loss": 5.5137, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0846286701208981, |
|
"grad_norm": 7.484424591064453, |
|
"learning_rate": 5.8399999999999997e-05, |
|
"loss": 5.766, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.08520437535981577, |
|
"grad_norm": 4.917182922363281, |
|
"learning_rate": 5.88e-05, |
|
"loss": 5.0193, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08578008059873345, |
|
"grad_norm": 4.608645915985107, |
|
"learning_rate": 5.92e-05, |
|
"loss": 5.0873, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08635578583765112, |
|
"grad_norm": 6.5947794914245605, |
|
"learning_rate": 5.96e-05, |
|
"loss": 4.9855, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08693149107656879, |
|
"grad_norm": 3.8302507400512695, |
|
"learning_rate": 6e-05, |
|
"loss": 3.7953, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.08750719631548647, |
|
"grad_norm": 3.6352171897888184, |
|
"learning_rate": 6.04e-05, |
|
"loss": 4.1647, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.08808290155440414, |
|
"grad_norm": 4.818563461303711, |
|
"learning_rate": 6.08e-05, |
|
"loss": 4.2128, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.08865860679332183, |
|
"grad_norm": 7.7323503494262695, |
|
"learning_rate": 6.12e-05, |
|
"loss": 5.4562, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0892343120322395, |
|
"grad_norm": 5.785284996032715, |
|
"learning_rate": 6.16e-05, |
|
"loss": 4.8956, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08981001727115717, |
|
"grad_norm": 6.181385040283203, |
|
"learning_rate": 6.2e-05, |
|
"loss": 5.2373, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.09038572251007485, |
|
"grad_norm": 6.015028476715088, |
|
"learning_rate": 6.24e-05, |
|
"loss": 4.3663, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.09096142774899252, |
|
"grad_norm": 4.41657829284668, |
|
"learning_rate": 6.280000000000001e-05, |
|
"loss": 4.5991, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.09153713298791019, |
|
"grad_norm": 6.5107622146606445, |
|
"learning_rate": 6.32e-05, |
|
"loss": 4.8784, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09211283822682786, |
|
"grad_norm": 4.11070442199707, |
|
"learning_rate": 6.36e-05, |
|
"loss": 4.6766, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09268854346574554, |
|
"grad_norm": 8.204343795776367, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 5.5088, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.09326424870466321, |
|
"grad_norm": 3.9389288425445557, |
|
"learning_rate": 6.440000000000001e-05, |
|
"loss": 4.3476, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09383995394358088, |
|
"grad_norm": 5.597643852233887, |
|
"learning_rate": 6.48e-05, |
|
"loss": 4.9976, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09441565918249856, |
|
"grad_norm": 8.994287490844727, |
|
"learning_rate": 6.52e-05, |
|
"loss": 5.5959, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09499136442141623, |
|
"grad_norm": 5.60779333114624, |
|
"learning_rate": 6.560000000000001e-05, |
|
"loss": 4.6283, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0955670696603339, |
|
"grad_norm": 4.319982528686523, |
|
"learning_rate": 6.6e-05, |
|
"loss": 4.041, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.09614277489925158, |
|
"grad_norm": 5.684337615966797, |
|
"learning_rate": 6.64e-05, |
|
"loss": 4.8941, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.09671848013816926, |
|
"grad_norm": 3.872518539428711, |
|
"learning_rate": 6.680000000000001e-05, |
|
"loss": 4.2242, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.09729418537708694, |
|
"grad_norm": 4.826557636260986, |
|
"learning_rate": 6.720000000000001e-05, |
|
"loss": 4.8546, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.09786989061600461, |
|
"grad_norm": 4.660156726837158, |
|
"learning_rate": 6.76e-05, |
|
"loss": 4.3797, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09844559585492228, |
|
"grad_norm": 4.616059303283691, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 4.7293, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.09902130109383996, |
|
"grad_norm": 7.685507774353027, |
|
"learning_rate": 6.840000000000001e-05, |
|
"loss": 5.6251, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.09959700633275763, |
|
"grad_norm": 7.424576282501221, |
|
"learning_rate": 6.879999999999999e-05, |
|
"loss": 4.8253, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1001727115716753, |
|
"grad_norm": 4.379521369934082, |
|
"learning_rate": 6.92e-05, |
|
"loss": 4.5287, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.10074841681059298, |
|
"grad_norm": 4.753964424133301, |
|
"learning_rate": 6.96e-05, |
|
"loss": 4.5554, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10132412204951065, |
|
"grad_norm": 4.559609413146973, |
|
"learning_rate": 7e-05, |
|
"loss": 4.5615, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.10189982728842832, |
|
"grad_norm": 5.178406238555908, |
|
"learning_rate": 7.04e-05, |
|
"loss": 4.6344, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.102475532527346, |
|
"grad_norm": 7.4183526039123535, |
|
"learning_rate": 7.08e-05, |
|
"loss": 4.5451, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.10305123776626367, |
|
"grad_norm": 5.832037448883057, |
|
"learning_rate": 7.12e-05, |
|
"loss": 4.7097, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.10362694300518134, |
|
"grad_norm": 4.9681925773620605, |
|
"learning_rate": 7.16e-05, |
|
"loss": 4.6288, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10420264824409903, |
|
"grad_norm": 4.886664867401123, |
|
"learning_rate": 7.2e-05, |
|
"loss": 4.7019, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1047783534830167, |
|
"grad_norm": 4.668741226196289, |
|
"learning_rate": 7.24e-05, |
|
"loss": 4.4534, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.10535405872193437, |
|
"grad_norm": 7.459389686584473, |
|
"learning_rate": 7.280000000000001e-05, |
|
"loss": 5.4758, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.10592976396085205, |
|
"grad_norm": 31.545869827270508, |
|
"learning_rate": 7.32e-05, |
|
"loss": 6.179, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.10650546919976972, |
|
"grad_norm": 9.739182472229004, |
|
"learning_rate": 7.36e-05, |
|
"loss": 4.9662, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1070811744386874, |
|
"grad_norm": 4.12076997756958, |
|
"learning_rate": 7.4e-05, |
|
"loss": 3.88, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.10765687967760507, |
|
"grad_norm": 5.808717727661133, |
|
"learning_rate": 7.44e-05, |
|
"loss": 4.6157, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.10823258491652274, |
|
"grad_norm": 3.6208741664886475, |
|
"learning_rate": 7.48e-05, |
|
"loss": 3.9156, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.10880829015544041, |
|
"grad_norm": 4.674955368041992, |
|
"learning_rate": 7.52e-05, |
|
"loss": 4.4751, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.10938399539435809, |
|
"grad_norm": 5.331599235534668, |
|
"learning_rate": 7.560000000000001e-05, |
|
"loss": 4.3887, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10995970063327576, |
|
"grad_norm": 5.1405534744262695, |
|
"learning_rate": 7.6e-05, |
|
"loss": 4.9114, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.11053540587219343, |
|
"grad_norm": 3.7066593170166016, |
|
"learning_rate": 7.64e-05, |
|
"loss": 3.8948, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 5.185431003570557, |
|
"learning_rate": 7.680000000000001e-05, |
|
"loss": 4.232, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.11168681635002879, |
|
"grad_norm": 4.900607585906982, |
|
"learning_rate": 7.72e-05, |
|
"loss": 4.667, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.11226252158894647, |
|
"grad_norm": 5.091091632843018, |
|
"learning_rate": 7.76e-05, |
|
"loss": 4.3946, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11283822682786414, |
|
"grad_norm": 4.859619617462158, |
|
"learning_rate": 7.800000000000001e-05, |
|
"loss": 4.6306, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11341393206678181, |
|
"grad_norm": 3.544200897216797, |
|
"learning_rate": 7.840000000000001e-05, |
|
"loss": 4.2118, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.11398963730569948, |
|
"grad_norm": 8.28862190246582, |
|
"learning_rate": 7.88e-05, |
|
"loss": 4.4431, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.11456534254461716, |
|
"grad_norm": 6.373688220977783, |
|
"learning_rate": 7.920000000000001e-05, |
|
"loss": 4.7554, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.11514104778353483, |
|
"grad_norm": 6.8544392585754395, |
|
"learning_rate": 7.960000000000001e-05, |
|
"loss": 4.8723, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1157167530224525, |
|
"grad_norm": 7.207869052886963, |
|
"learning_rate": 8e-05, |
|
"loss": 4.1096, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.11629245826137018, |
|
"grad_norm": 4.9073333740234375, |
|
"learning_rate": 8.04e-05, |
|
"loss": 3.6834, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.11686816350028785, |
|
"grad_norm": 6.523554801940918, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 4.4934, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.11744386873920552, |
|
"grad_norm": 9.581537246704102, |
|
"learning_rate": 8.120000000000001e-05, |
|
"loss": 4.8199, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1180195739781232, |
|
"grad_norm": 5.319664001464844, |
|
"learning_rate": 8.16e-05, |
|
"loss": 4.0881, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.11859527921704087, |
|
"grad_norm": 7.609442710876465, |
|
"learning_rate": 8.2e-05, |
|
"loss": 5.1011, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.11917098445595854, |
|
"grad_norm": 5.437283515930176, |
|
"learning_rate": 8.24e-05, |
|
"loss": 4.7683, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.11974668969487623, |
|
"grad_norm": 9.015962600708008, |
|
"learning_rate": 8.28e-05, |
|
"loss": 5.1197, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1203223949337939, |
|
"grad_norm": 5.41486120223999, |
|
"learning_rate": 8.32e-05, |
|
"loss": 4.2228, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.12089810017271158, |
|
"grad_norm": 4.068630218505859, |
|
"learning_rate": 8.36e-05, |
|
"loss": 3.9683, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12147380541162925, |
|
"grad_norm": 4.818974494934082, |
|
"learning_rate": 8.4e-05, |
|
"loss": 4.3969, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.12204951065054692, |
|
"grad_norm": 8.309637069702148, |
|
"learning_rate": 8.44e-05, |
|
"loss": 4.8983, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1226252158894646, |
|
"grad_norm": 5.997379302978516, |
|
"learning_rate": 8.48e-05, |
|
"loss": 4.6983, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.12320092112838227, |
|
"grad_norm": 6.416568279266357, |
|
"learning_rate": 8.52e-05, |
|
"loss": 4.6, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.12377662636729994, |
|
"grad_norm": 5.038214206695557, |
|
"learning_rate": 8.560000000000001e-05, |
|
"loss": 4.1803, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12435233160621761, |
|
"grad_norm": 5.035988807678223, |
|
"learning_rate": 8.6e-05, |
|
"loss": 4.1585, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.12492803684513529, |
|
"grad_norm": 6.7663726806640625, |
|
"learning_rate": 8.64e-05, |
|
"loss": 4.4256, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.12550374208405296, |
|
"grad_norm": 5.394269943237305, |
|
"learning_rate": 8.680000000000001e-05, |
|
"loss": 3.9008, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.12607944732297063, |
|
"grad_norm": 5.4501800537109375, |
|
"learning_rate": 8.72e-05, |
|
"loss": 3.9869, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1266551525618883, |
|
"grad_norm": 4.7380170822143555, |
|
"learning_rate": 8.76e-05, |
|
"loss": 4.0876, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12723085780080598, |
|
"grad_norm": 6.059116840362549, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 4.147, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.12780656303972365, |
|
"grad_norm": 5.5021586418151855, |
|
"learning_rate": 8.840000000000001e-05, |
|
"loss": 4.4547, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.12838226827864133, |
|
"grad_norm": 4.760106563568115, |
|
"learning_rate": 8.88e-05, |
|
"loss": 4.075, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.128957973517559, |
|
"grad_norm": 7.5847649574279785, |
|
"learning_rate": 8.92e-05, |
|
"loss": 4.6163, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.12953367875647667, |
|
"grad_norm": 6.257955074310303, |
|
"learning_rate": 8.960000000000001e-05, |
|
"loss": 4.6043, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.13010938399539435, |
|
"grad_norm": 7.368046283721924, |
|
"learning_rate": 9e-05, |
|
"loss": 4.7961, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.13068508923431202, |
|
"grad_norm": 4.385096549987793, |
|
"learning_rate": 9.04e-05, |
|
"loss": 4.1968, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.13126079447322972, |
|
"grad_norm": 6.34293794631958, |
|
"learning_rate": 9.080000000000001e-05, |
|
"loss": 4.3076, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.1318364997121474, |
|
"grad_norm": 6.403743267059326, |
|
"learning_rate": 9.120000000000001e-05, |
|
"loss": 3.8917, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.13241220495106507, |
|
"grad_norm": 6.792156219482422, |
|
"learning_rate": 9.16e-05, |
|
"loss": 3.9843, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13298791018998274, |
|
"grad_norm": 8.062408447265625, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 4.2562, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.1335636154289004, |
|
"grad_norm": 8.513936042785645, |
|
"learning_rate": 9.240000000000001e-05, |
|
"loss": 4.6536, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.13413932066781808, |
|
"grad_norm": 5.92789363861084, |
|
"learning_rate": 9.28e-05, |
|
"loss": 4.104, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.13471502590673576, |
|
"grad_norm": 44.009300231933594, |
|
"learning_rate": 9.320000000000002e-05, |
|
"loss": 4.8297, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.13529073114565343, |
|
"grad_norm": 5.342921257019043, |
|
"learning_rate": 9.360000000000001e-05, |
|
"loss": 4.0662, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1358664363845711, |
|
"grad_norm": 5.618771076202393, |
|
"learning_rate": 9.4e-05, |
|
"loss": 4.1692, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.13644214162348878, |
|
"grad_norm": 6.6655473709106445, |
|
"learning_rate": 9.44e-05, |
|
"loss": 4.2759, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.13701784686240645, |
|
"grad_norm": 6.415508270263672, |
|
"learning_rate": 9.48e-05, |
|
"loss": 4.025, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.13759355210132412, |
|
"grad_norm": 62.65280532836914, |
|
"learning_rate": 9.52e-05, |
|
"loss": 5.3187, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1381692573402418, |
|
"grad_norm": 5.9870147705078125, |
|
"learning_rate": 9.56e-05, |
|
"loss": 4.3549, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13874496257915947, |
|
"grad_norm": 6.323814868927002, |
|
"learning_rate": 9.6e-05, |
|
"loss": 4.0618, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.13932066781807714, |
|
"grad_norm": 7.25873327255249, |
|
"learning_rate": 9.64e-05, |
|
"loss": 4.6113, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.13989637305699482, |
|
"grad_norm": 6.708962440490723, |
|
"learning_rate": 9.680000000000001e-05, |
|
"loss": 4.2734, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1404720782959125, |
|
"grad_norm": 6.766256332397461, |
|
"learning_rate": 9.72e-05, |
|
"loss": 3.8169, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.14104778353483016, |
|
"grad_norm": 9.25779914855957, |
|
"learning_rate": 9.76e-05, |
|
"loss": 4.0823, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14162348877374784, |
|
"grad_norm": 6.24402379989624, |
|
"learning_rate": 9.8e-05, |
|
"loss": 3.9761, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.1421991940126655, |
|
"grad_norm": 4.627258777618408, |
|
"learning_rate": 9.84e-05, |
|
"loss": 3.3376, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.14277489925158318, |
|
"grad_norm": 6.5364766120910645, |
|
"learning_rate": 9.88e-05, |
|
"loss": 3.9101, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.14335060449050085, |
|
"grad_norm": 6.722381591796875, |
|
"learning_rate": 9.92e-05, |
|
"loss": 4.2916, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.14392630972941853, |
|
"grad_norm": 7.2800493240356445, |
|
"learning_rate": 9.960000000000001e-05, |
|
"loss": 4.1714, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1445020149683362, |
|
"grad_norm": 9.137832641601562, |
|
"learning_rate": 0.0001, |
|
"loss": 3.9733, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.14507772020725387, |
|
"grad_norm": 5.290084362030029, |
|
"learning_rate": 0.0001004, |
|
"loss": 3.8465, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.14565342544617155, |
|
"grad_norm": 7.146475791931152, |
|
"learning_rate": 0.00010080000000000001, |
|
"loss": 4.154, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.14622913068508925, |
|
"grad_norm": 5.462000370025635, |
|
"learning_rate": 0.00010120000000000001, |
|
"loss": 3.8403, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.14680483592400692, |
|
"grad_norm": 8.053996086120605, |
|
"learning_rate": 0.0001016, |
|
"loss": 4.224, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.1473805411629246, |
|
"grad_norm": 56.904518127441406, |
|
"learning_rate": 0.00010200000000000001, |
|
"loss": 5.3512, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.14795624640184227, |
|
"grad_norm": 67.7396469116211, |
|
"learning_rate": 0.00010240000000000001, |
|
"loss": 4.136, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.14853195164075994, |
|
"grad_norm": 5.19423770904541, |
|
"learning_rate": 0.0001028, |
|
"loss": 3.6272, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.1491076568796776, |
|
"grad_norm": 6.946446418762207, |
|
"learning_rate": 0.0001032, |
|
"loss": 3.7617, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.1496833621185953, |
|
"grad_norm": 6.839754104614258, |
|
"learning_rate": 0.00010360000000000001, |
|
"loss": 4.2895, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.15025906735751296, |
|
"grad_norm": 7.3253254890441895, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 4.0997, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.15083477259643063, |
|
"grad_norm": 6.981521129608154, |
|
"learning_rate": 0.0001044, |
|
"loss": 3.4663, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.1514104778353483, |
|
"grad_norm": 6.424066543579102, |
|
"learning_rate": 0.00010480000000000001, |
|
"loss": 4.0914, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.15198618307426598, |
|
"grad_norm": 6.7790398597717285, |
|
"learning_rate": 0.00010520000000000001, |
|
"loss": 4.0818, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.15256188831318365, |
|
"grad_norm": 7.887113094329834, |
|
"learning_rate": 0.0001056, |
|
"loss": 4.3784, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.15313759355210133, |
|
"grad_norm": 8.3016939163208, |
|
"learning_rate": 0.00010600000000000002, |
|
"loss": 3.7843, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.153713298791019, |
|
"grad_norm": 10.073237419128418, |
|
"learning_rate": 0.00010640000000000001, |
|
"loss": 4.0118, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.15428900402993667, |
|
"grad_norm": 6.9664106369018555, |
|
"learning_rate": 0.00010680000000000001, |
|
"loss": 3.8644, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.15486470926885434, |
|
"grad_norm": 8.479534149169922, |
|
"learning_rate": 0.00010720000000000002, |
|
"loss": 3.7009, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.15544041450777202, |
|
"grad_norm": 8.317602157592773, |
|
"learning_rate": 0.00010760000000000001, |
|
"loss": 3.7018, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1560161197466897, |
|
"grad_norm": 6.020889759063721, |
|
"learning_rate": 0.00010800000000000001, |
|
"loss": 3.656, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.15659182498560736, |
|
"grad_norm": 7.147673606872559, |
|
"learning_rate": 0.00010840000000000002, |
|
"loss": 3.9216, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.15716753022452504, |
|
"grad_norm": 5.485556125640869, |
|
"learning_rate": 0.00010880000000000002, |
|
"loss": 3.4732, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.1577432354634427, |
|
"grad_norm": 7.432086944580078, |
|
"learning_rate": 0.00010920000000000001, |
|
"loss": 3.423, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.15831894070236038, |
|
"grad_norm": 6.897833824157715, |
|
"learning_rate": 0.00010960000000000001, |
|
"loss": 3.6169, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.15889464594127806, |
|
"grad_norm": 7.707437992095947, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 3.6883, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.15947035118019573, |
|
"grad_norm": 5.546234607696533, |
|
"learning_rate": 0.00011040000000000001, |
|
"loss": 3.8388, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.1600460564191134, |
|
"grad_norm": 10.001431465148926, |
|
"learning_rate": 0.00011080000000000001, |
|
"loss": 3.372, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.16062176165803108, |
|
"grad_norm": 8.793180465698242, |
|
"learning_rate": 0.00011120000000000002, |
|
"loss": 3.7929, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.16119746689694875, |
|
"grad_norm": 8.189177513122559, |
|
"learning_rate": 0.00011160000000000002, |
|
"loss": 4.0091, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16177317213586645, |
|
"grad_norm": 6.998697280883789, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 3.648, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.16234887737478412, |
|
"grad_norm": 8.115317344665527, |
|
"learning_rate": 0.00011240000000000002, |
|
"loss": 4.0327, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.1629245826137018, |
|
"grad_norm": 7.597106456756592, |
|
"learning_rate": 0.00011279999999999999, |
|
"loss": 3.7811, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.16350028785261947, |
|
"grad_norm": 6.518374443054199, |
|
"learning_rate": 0.0001132, |
|
"loss": 3.3359, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.16407599309153714, |
|
"grad_norm": 6.962795257568359, |
|
"learning_rate": 0.0001136, |
|
"loss": 3.3726, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.16465169833045482, |
|
"grad_norm": 8.1845703125, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 4.0042, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.1652274035693725, |
|
"grad_norm": 6.869271755218506, |
|
"learning_rate": 0.0001144, |
|
"loss": 3.4989, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.16580310880829016, |
|
"grad_norm": 12.261098861694336, |
|
"learning_rate": 0.0001148, |
|
"loss": 4.1045, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.16637881404720783, |
|
"grad_norm": 6.912962913513184, |
|
"learning_rate": 0.0001152, |
|
"loss": 3.6853, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1669545192861255, |
|
"grad_norm": 8.545379638671875, |
|
"learning_rate": 0.00011559999999999999, |
|
"loss": 3.8903, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16753022452504318, |
|
"grad_norm": 15.040228843688965, |
|
"learning_rate": 0.000116, |
|
"loss": 3.4079, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.16810592976396085, |
|
"grad_norm": 7.038132667541504, |
|
"learning_rate": 0.0001164, |
|
"loss": 3.7119, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.16868163500287853, |
|
"grad_norm": 6.259817123413086, |
|
"learning_rate": 0.00011679999999999999, |
|
"loss": 3.4931, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.1692573402417962, |
|
"grad_norm": 6.947351455688477, |
|
"learning_rate": 0.0001172, |
|
"loss": 3.677, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.16983304548071387, |
|
"grad_norm": 14.260014533996582, |
|
"learning_rate": 0.0001176, |
|
"loss": 3.9591, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.17040875071963155, |
|
"grad_norm": 6.70070743560791, |
|
"learning_rate": 0.000118, |
|
"loss": 3.2433, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.17098445595854922, |
|
"grad_norm": 11.697699546813965, |
|
"learning_rate": 0.0001184, |
|
"loss": 4.0909, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.1715601611974669, |
|
"grad_norm": 10.029029846191406, |
|
"learning_rate": 0.0001188, |
|
"loss": 3.5743, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.17213586643638457, |
|
"grad_norm": 6.6930365562438965, |
|
"learning_rate": 0.0001192, |
|
"loss": 3.2007, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.17271157167530224, |
|
"grad_norm": 21.772619247436523, |
|
"learning_rate": 0.00011960000000000001, |
|
"loss": 3.8505, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1732872769142199, |
|
"grad_norm": 9.126256942749023, |
|
"learning_rate": 0.00012, |
|
"loss": 3.5777, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.17386298215313759, |
|
"grad_norm": 7.574469566345215, |
|
"learning_rate": 0.0001204, |
|
"loss": 3.5329, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.17443868739205526, |
|
"grad_norm": 6.436075687408447, |
|
"learning_rate": 0.0001208, |
|
"loss": 3.279, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.17501439263097293, |
|
"grad_norm": 5.945929527282715, |
|
"learning_rate": 0.0001212, |
|
"loss": 3.4338, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.1755900978698906, |
|
"grad_norm": 5.7057785987854, |
|
"learning_rate": 0.0001216, |
|
"loss": 3.2369, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.17616580310880828, |
|
"grad_norm": 9.411810874938965, |
|
"learning_rate": 0.000122, |
|
"loss": 3.5364, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.17674150834772595, |
|
"grad_norm": 8.872260093688965, |
|
"learning_rate": 0.0001224, |
|
"loss": 3.7803, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.17731721358664365, |
|
"grad_norm": 46.1115837097168, |
|
"learning_rate": 0.0001228, |
|
"loss": 3.7188, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.17789291882556132, |
|
"grad_norm": 48.33805465698242, |
|
"learning_rate": 0.0001232, |
|
"loss": 3.7491, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.178468624064479, |
|
"grad_norm": 7.272097587585449, |
|
"learning_rate": 0.0001236, |
|
"loss": 3.559, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17904432930339667, |
|
"grad_norm": 7.471408367156982, |
|
"learning_rate": 0.000124, |
|
"loss": 3.6014, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.17962003454231434, |
|
"grad_norm": 11.095893859863281, |
|
"learning_rate": 0.00012440000000000002, |
|
"loss": 3.5741, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.18019573978123202, |
|
"grad_norm": 8.782601356506348, |
|
"learning_rate": 0.0001248, |
|
"loss": 3.2475, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.1807714450201497, |
|
"grad_norm": 7.485610485076904, |
|
"learning_rate": 0.0001252, |
|
"loss": 3.0304, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.18134715025906736, |
|
"grad_norm": 7.794425964355469, |
|
"learning_rate": 0.00012560000000000002, |
|
"loss": 2.9428, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18192285549798504, |
|
"grad_norm": 6.470662593841553, |
|
"learning_rate": 0.000126, |
|
"loss": 3.4341, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.1824985607369027, |
|
"grad_norm": 10.054426193237305, |
|
"learning_rate": 0.0001264, |
|
"loss": 2.941, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.18307426597582038, |
|
"grad_norm": 93.38629150390625, |
|
"learning_rate": 0.00012680000000000002, |
|
"loss": 4.2291, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.18364997121473806, |
|
"grad_norm": 9.805968284606934, |
|
"learning_rate": 0.0001272, |
|
"loss": 3.0641, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.18422567645365573, |
|
"grad_norm": 6.104334831237793, |
|
"learning_rate": 0.0001276, |
|
"loss": 3.0856, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1848013816925734, |
|
"grad_norm": 8.24195384979248, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 3.0774, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.18537708693149108, |
|
"grad_norm": 6.327628135681152, |
|
"learning_rate": 0.0001284, |
|
"loss": 3.0826, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.18595279217040875, |
|
"grad_norm": 11.529990196228027, |
|
"learning_rate": 0.00012880000000000001, |
|
"loss": 3.7882, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.18652849740932642, |
|
"grad_norm": 9.700762748718262, |
|
"learning_rate": 0.00012920000000000002, |
|
"loss": 3.4958, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.1871042026482441, |
|
"grad_norm": 10.289152145385742, |
|
"learning_rate": 0.0001296, |
|
"loss": 3.3652, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.18767990788716177, |
|
"grad_norm": 6.888269901275635, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 3.1086, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.18825561312607944, |
|
"grad_norm": 9.220719337463379, |
|
"learning_rate": 0.0001304, |
|
"loss": 3.5314, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.1888313183649971, |
|
"grad_norm": 9.044048309326172, |
|
"learning_rate": 0.0001308, |
|
"loss": 2.943, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.1894070236039148, |
|
"grad_norm": 11.338268280029297, |
|
"learning_rate": 0.00013120000000000002, |
|
"loss": 3.4617, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.18998272884283246, |
|
"grad_norm": 5.949525833129883, |
|
"learning_rate": 0.0001316, |
|
"loss": 2.8324, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.19055843408175013, |
|
"grad_norm": 9.158703804016113, |
|
"learning_rate": 0.000132, |
|
"loss": 3.1961, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.1911341393206678, |
|
"grad_norm": 8.708706855773926, |
|
"learning_rate": 0.00013240000000000002, |
|
"loss": 3.1941, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.19170984455958548, |
|
"grad_norm": 10.610583305358887, |
|
"learning_rate": 0.0001328, |
|
"loss": 3.3617, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.19228554979850315, |
|
"grad_norm": 8.023892402648926, |
|
"learning_rate": 0.0001332, |
|
"loss": 3.1775, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.19286125503742085, |
|
"grad_norm": 7.895623683929443, |
|
"learning_rate": 0.00013360000000000002, |
|
"loss": 3.1033, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.19343696027633853, |
|
"grad_norm": 6.376975059509277, |
|
"learning_rate": 0.000134, |
|
"loss": 2.808, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.1940126655152562, |
|
"grad_norm": 5.185142993927002, |
|
"learning_rate": 0.00013440000000000001, |
|
"loss": 2.8337, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.19458837075417387, |
|
"grad_norm": 6.408693790435791, |
|
"learning_rate": 0.00013480000000000002, |
|
"loss": 3.0604, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.19516407599309155, |
|
"grad_norm": 21.610239028930664, |
|
"learning_rate": 0.0001352, |
|
"loss": 3.431, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.19573978123200922, |
|
"grad_norm": 9.485398292541504, |
|
"learning_rate": 0.00013560000000000002, |
|
"loss": 3.2208, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1963154864709269, |
|
"grad_norm": 6.460340976715088, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 2.793, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.19689119170984457, |
|
"grad_norm": 5.64215612411499, |
|
"learning_rate": 0.0001364, |
|
"loss": 2.8589, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.19746689694876224, |
|
"grad_norm": 6.9033427238464355, |
|
"learning_rate": 0.00013680000000000002, |
|
"loss": 3.1031, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.1980426021876799, |
|
"grad_norm": 5.724493980407715, |
|
"learning_rate": 0.00013720000000000003, |
|
"loss": 2.8605, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.19861830742659758, |
|
"grad_norm": 15.779448509216309, |
|
"learning_rate": 0.00013759999999999998, |
|
"loss": 3.2151, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.19919401266551526, |
|
"grad_norm": 6.960752964019775, |
|
"learning_rate": 0.000138, |
|
"loss": 2.8537, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.19976971790443293, |
|
"grad_norm": 8.871850967407227, |
|
"learning_rate": 0.0001384, |
|
"loss": 2.7536, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2003454231433506, |
|
"grad_norm": 6.670348644256592, |
|
"learning_rate": 0.00013879999999999999, |
|
"loss": 2.9525, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.20092112838226828, |
|
"grad_norm": 9.574007034301758, |
|
"learning_rate": 0.0001392, |
|
"loss": 2.7996, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.20149683362118595, |
|
"grad_norm": 5.3862223625183105, |
|
"learning_rate": 0.0001396, |
|
"loss": 2.662, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.20207253886010362, |
|
"grad_norm": 11.832735061645508, |
|
"learning_rate": 0.00014, |
|
"loss": 3.1706, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2026482440990213, |
|
"grad_norm": 8.553043365478516, |
|
"learning_rate": 0.0001404, |
|
"loss": 2.8034, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.20322394933793897, |
|
"grad_norm": 17.231216430664062, |
|
"learning_rate": 0.0001408, |
|
"loss": 2.8267, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.20379965457685664, |
|
"grad_norm": 10.80978012084961, |
|
"learning_rate": 0.0001412, |
|
"loss": 2.7008, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.20437535981577432, |
|
"grad_norm": 7.117002010345459, |
|
"learning_rate": 0.0001416, |
|
"loss": 2.5399, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.204951065054692, |
|
"grad_norm": 5.009802341461182, |
|
"learning_rate": 0.000142, |
|
"loss": 2.7215, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.20552677029360966, |
|
"grad_norm": 16.786869049072266, |
|
"learning_rate": 0.0001424, |
|
"loss": 2.9873, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.20610247553252733, |
|
"grad_norm": 7.779325008392334, |
|
"learning_rate": 0.0001428, |
|
"loss": 2.892, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.206678180771445, |
|
"grad_norm": 9.354433059692383, |
|
"learning_rate": 0.0001432, |
|
"loss": 2.7065, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.20725388601036268, |
|
"grad_norm": 13.15522575378418, |
|
"learning_rate": 0.0001436, |
|
"loss": 2.8061, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.20782959124928038, |
|
"grad_norm": 6.927896976470947, |
|
"learning_rate": 0.000144, |
|
"loss": 2.8687, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.20840529648819806, |
|
"grad_norm": 8.532772064208984, |
|
"learning_rate": 0.0001444, |
|
"loss": 2.9418, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.20898100172711573, |
|
"grad_norm": 8.618231773376465, |
|
"learning_rate": 0.0001448, |
|
"loss": 2.588, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2095567069660334, |
|
"grad_norm": 4.94150447845459, |
|
"learning_rate": 0.0001452, |
|
"loss": 2.5464, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.21013241220495107, |
|
"grad_norm": 5.547298431396484, |
|
"learning_rate": 0.00014560000000000002, |
|
"loss": 2.755, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.21070811744386875, |
|
"grad_norm": 8.270822525024414, |
|
"learning_rate": 0.000146, |
|
"loss": 2.8345, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.21128382268278642, |
|
"grad_norm": 6.572064399719238, |
|
"learning_rate": 0.0001464, |
|
"loss": 2.6624, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2118595279217041, |
|
"grad_norm": 8.243054389953613, |
|
"learning_rate": 0.00014680000000000002, |
|
"loss": 2.7102, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.21243523316062177, |
|
"grad_norm": 6.671678066253662, |
|
"learning_rate": 0.0001472, |
|
"loss": 2.4775, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.21301093839953944, |
|
"grad_norm": 5.922910690307617, |
|
"learning_rate": 0.0001476, |
|
"loss": 2.919, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2135866436384571, |
|
"grad_norm": 12.84566593170166, |
|
"learning_rate": 0.000148, |
|
"loss": 2.5189, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2141623488773748, |
|
"grad_norm": 7.342642307281494, |
|
"learning_rate": 0.0001484, |
|
"loss": 2.8968, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.21473805411629246, |
|
"grad_norm": 14.625147819519043, |
|
"learning_rate": 0.0001488, |
|
"loss": 2.6793, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.21531375935521013, |
|
"grad_norm": 6.683467388153076, |
|
"learning_rate": 0.0001492, |
|
"loss": 2.3975, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2158894645941278, |
|
"grad_norm": 12.186212539672852, |
|
"learning_rate": 0.0001496, |
|
"loss": 2.856, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.21646516983304548, |
|
"grad_norm": 8.417567253112793, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 2.6326, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.21704087507196315, |
|
"grad_norm": 5.414144992828369, |
|
"learning_rate": 0.0001504, |
|
"loss": 2.7417, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.21761658031088082, |
|
"grad_norm": 13.388712882995605, |
|
"learning_rate": 0.0001508, |
|
"loss": 2.8813, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2181922855497985, |
|
"grad_norm": 6.375700950622559, |
|
"learning_rate": 0.00015120000000000002, |
|
"loss": 2.7187, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.21876799078871617, |
|
"grad_norm": 9.897554397583008, |
|
"learning_rate": 0.0001516, |
|
"loss": 2.6278, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.21934369602763384, |
|
"grad_norm": 10.079334259033203, |
|
"learning_rate": 0.000152, |
|
"loss": 2.4861, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.21991940126655152, |
|
"grad_norm": 10.082268714904785, |
|
"learning_rate": 0.00015240000000000002, |
|
"loss": 2.6516, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2204951065054692, |
|
"grad_norm": 9.192161560058594, |
|
"learning_rate": 0.0001528, |
|
"loss": 2.3307, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.22107081174438686, |
|
"grad_norm": 8.085034370422363, |
|
"learning_rate": 0.0001532, |
|
"loss": 2.3445, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.22164651698330454, |
|
"grad_norm": 5.418321132659912, |
|
"learning_rate": 0.00015360000000000002, |
|
"loss": 2.7119, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 16.515369415283203, |
|
"learning_rate": 0.000154, |
|
"loss": 2.6647, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.22279792746113988, |
|
"grad_norm": 11.138907432556152, |
|
"learning_rate": 0.0001544, |
|
"loss": 2.6742, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.22337363270005758, |
|
"grad_norm": 20.75733184814453, |
|
"learning_rate": 0.00015480000000000002, |
|
"loss": 2.8834, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.22394933793897526, |
|
"grad_norm": 8.349270820617676, |
|
"learning_rate": 0.0001552, |
|
"loss": 2.6376, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.22452504317789293, |
|
"grad_norm": 6.902172088623047, |
|
"learning_rate": 0.00015560000000000001, |
|
"loss": 2.6186, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2251007484168106, |
|
"grad_norm": 14.718120574951172, |
|
"learning_rate": 0.00015600000000000002, |
|
"loss": 2.7649, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.22567645365572828, |
|
"grad_norm": 5.805610656738281, |
|
"learning_rate": 0.0001564, |
|
"loss": 2.6221, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.22625215889464595, |
|
"grad_norm": 6.138345718383789, |
|
"learning_rate": 0.00015680000000000002, |
|
"loss": 2.5751, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.22682786413356362, |
|
"grad_norm": 29.98923683166504, |
|
"learning_rate": 0.00015720000000000003, |
|
"loss": 2.824, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.2274035693724813, |
|
"grad_norm": 31.91318702697754, |
|
"learning_rate": 0.0001576, |
|
"loss": 2.6134, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.22797927461139897, |
|
"grad_norm": 10.812357902526855, |
|
"learning_rate": 0.00015800000000000002, |
|
"loss": 2.4594, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.22855497985031664, |
|
"grad_norm": 7.6294755935668945, |
|
"learning_rate": 0.00015840000000000003, |
|
"loss": 2.5259, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.22913068508923431, |
|
"grad_norm": 5.666753768920898, |
|
"learning_rate": 0.0001588, |
|
"loss": 2.6108, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.229706390328152, |
|
"grad_norm": 6.732410907745361, |
|
"learning_rate": 0.00015920000000000002, |
|
"loss": 2.5352, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.23028209556706966, |
|
"grad_norm": 6.749885082244873, |
|
"learning_rate": 0.0001596, |
|
"loss": 2.5103, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23085780080598733, |
|
"grad_norm": 5.389144420623779, |
|
"learning_rate": 0.00016, |
|
"loss": 2.5803, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.231433506044905, |
|
"grad_norm": 6.996800422668457, |
|
"learning_rate": 0.00016040000000000002, |
|
"loss": 2.7746, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.23200921128382268, |
|
"grad_norm": 22.8950138092041, |
|
"learning_rate": 0.0001608, |
|
"loss": 2.5619, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.23258491652274035, |
|
"grad_norm": 11.477226257324219, |
|
"learning_rate": 0.00016120000000000002, |
|
"loss": 2.4898, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.23316062176165803, |
|
"grad_norm": 8.584878921508789, |
|
"learning_rate": 0.00016160000000000002, |
|
"loss": 2.4191, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2337363270005757, |
|
"grad_norm": 6.987226963043213, |
|
"learning_rate": 0.000162, |
|
"loss": 2.4045, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.23431203223949337, |
|
"grad_norm": 12.917460441589355, |
|
"learning_rate": 0.00016240000000000002, |
|
"loss": 2.656, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.23488773747841105, |
|
"grad_norm": 13.053242683410645, |
|
"learning_rate": 0.0001628, |
|
"loss": 2.5026, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.23546344271732872, |
|
"grad_norm": 6.013350486755371, |
|
"learning_rate": 0.0001632, |
|
"loss": 2.4027, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.2360391479562464, |
|
"grad_norm": 21.95798110961914, |
|
"learning_rate": 0.0001636, |
|
"loss": 2.4155, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.23661485319516407, |
|
"grad_norm": 6.197417259216309, |
|
"learning_rate": 0.000164, |
|
"loss": 2.2512, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.23719055843408174, |
|
"grad_norm": 5.798823356628418, |
|
"learning_rate": 0.0001644, |
|
"loss": 2.5775, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.2377662636729994, |
|
"grad_norm": 12.58922290802002, |
|
"learning_rate": 0.0001648, |
|
"loss": 2.2999, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.23834196891191708, |
|
"grad_norm": 6.5375213623046875, |
|
"learning_rate": 0.0001652, |
|
"loss": 2.5818, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.23891767415083479, |
|
"grad_norm": 8.916410446166992, |
|
"learning_rate": 0.0001656, |
|
"loss": 2.3589, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.23949337938975246, |
|
"grad_norm": 7.457561492919922, |
|
"learning_rate": 0.000166, |
|
"loss": 2.5489, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.24006908462867013, |
|
"grad_norm": 18.522987365722656, |
|
"learning_rate": 0.0001664, |
|
"loss": 2.6065, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.2406447898675878, |
|
"grad_norm": 64.20520782470703, |
|
"learning_rate": 0.0001668, |
|
"loss": 2.7258, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.24122049510650548, |
|
"grad_norm": 40.07137680053711, |
|
"learning_rate": 0.0001672, |
|
"loss": 2.6834, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.24179620034542315, |
|
"grad_norm": 6.103574752807617, |
|
"learning_rate": 0.0001676, |
|
"loss": 2.4909, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24237190558434082, |
|
"grad_norm": 6.48091983795166, |
|
"learning_rate": 0.000168, |
|
"loss": 2.5598, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.2429476108232585, |
|
"grad_norm": 6.65122127532959, |
|
"learning_rate": 0.0001684, |
|
"loss": 2.0797, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.24352331606217617, |
|
"grad_norm": 7.160250663757324, |
|
"learning_rate": 0.0001688, |
|
"loss": 2.4701, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.24409902130109384, |
|
"grad_norm": 5.73784875869751, |
|
"learning_rate": 0.0001692, |
|
"loss": 2.333, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.24467472654001152, |
|
"grad_norm": 21.651309967041016, |
|
"learning_rate": 0.0001696, |
|
"loss": 2.5034, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2452504317789292, |
|
"grad_norm": 17.80324935913086, |
|
"learning_rate": 0.00017, |
|
"loss": 2.4943, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.24582613701784686, |
|
"grad_norm": 6.137923240661621, |
|
"learning_rate": 0.0001704, |
|
"loss": 2.4143, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.24640184225676454, |
|
"grad_norm": 5.833311080932617, |
|
"learning_rate": 0.0001708, |
|
"loss": 2.4598, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.2469775474956822, |
|
"grad_norm": 20.596446990966797, |
|
"learning_rate": 0.00017120000000000001, |
|
"loss": 2.4136, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.24755325273459988, |
|
"grad_norm": 5.577768802642822, |
|
"learning_rate": 0.0001716, |
|
"loss": 2.4349, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.24812895797351756, |
|
"grad_norm": 6.16340446472168, |
|
"learning_rate": 0.000172, |
|
"loss": 2.5712, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.24870466321243523, |
|
"grad_norm": 5.587292671203613, |
|
"learning_rate": 0.00017240000000000002, |
|
"loss": 2.522, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.2492803684513529, |
|
"grad_norm": 7.1100945472717285, |
|
"learning_rate": 0.0001728, |
|
"loss": 2.2343, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.24985607369027057, |
|
"grad_norm": 6.089508056640625, |
|
"learning_rate": 0.0001732, |
|
"loss": 2.5291, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.2504317789291883, |
|
"grad_norm": 12.8109769821167, |
|
"learning_rate": 0.00017360000000000002, |
|
"loss": 2.4944, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.2510074841681059, |
|
"grad_norm": 9.722925186157227, |
|
"learning_rate": 0.000174, |
|
"loss": 2.2176, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.2515831894070236, |
|
"grad_norm": 13.540785789489746, |
|
"learning_rate": 0.0001744, |
|
"loss": 2.3636, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.25215889464594127, |
|
"grad_norm": 22.12358856201172, |
|
"learning_rate": 0.00017480000000000002, |
|
"loss": 2.4757, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.25273459988485897, |
|
"grad_norm": 8.760823249816895, |
|
"learning_rate": 0.0001752, |
|
"loss": 2.256, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.2533103051237766, |
|
"grad_norm": 7.311398506164551, |
|
"learning_rate": 0.0001756, |
|
"loss": 2.3274, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2538860103626943, |
|
"grad_norm": 9.8610200881958, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 2.4749, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.25446171560161196, |
|
"grad_norm": 7.475802898406982, |
|
"learning_rate": 0.0001764, |
|
"loss": 2.4547, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.25503742084052966, |
|
"grad_norm": 13.036137580871582, |
|
"learning_rate": 0.00017680000000000001, |
|
"loss": 2.1679, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.2556131260794473, |
|
"grad_norm": 11.247735977172852, |
|
"learning_rate": 0.0001772, |
|
"loss": 2.5446, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.256188831318365, |
|
"grad_norm": 7.0622124671936035, |
|
"learning_rate": 0.0001776, |
|
"loss": 2.3196, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.25676453655728265, |
|
"grad_norm": 5.404714107513428, |
|
"learning_rate": 0.00017800000000000002, |
|
"loss": 2.2713, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.25734024179620035, |
|
"grad_norm": 44.592891693115234, |
|
"learning_rate": 0.0001784, |
|
"loss": 2.2287, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.257915947035118, |
|
"grad_norm": 30.109132766723633, |
|
"learning_rate": 0.0001788, |
|
"loss": 2.3153, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.2584916522740357, |
|
"grad_norm": 15.7490873336792, |
|
"learning_rate": 0.00017920000000000002, |
|
"loss": 2.3081, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.25906735751295334, |
|
"grad_norm": 13.772661209106445, |
|
"learning_rate": 0.0001796, |
|
"loss": 2.2548, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.25964306275187105, |
|
"grad_norm": 6.858334064483643, |
|
"learning_rate": 0.00018, |
|
"loss": 2.4804, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.2602187679907887, |
|
"grad_norm": 6.23155403137207, |
|
"learning_rate": 0.00018040000000000002, |
|
"loss": 2.4281, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.2607944732297064, |
|
"grad_norm": 5.4447150230407715, |
|
"learning_rate": 0.0001808, |
|
"loss": 2.342, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.26137017846862404, |
|
"grad_norm": 11.79716682434082, |
|
"learning_rate": 0.0001812, |
|
"loss": 2.2528, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.26194588370754174, |
|
"grad_norm": 10.708625793457031, |
|
"learning_rate": 0.00018160000000000002, |
|
"loss": 2.0204, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.26252158894645944, |
|
"grad_norm": 21.41659164428711, |
|
"learning_rate": 0.000182, |
|
"loss": 2.2593, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.2630972941853771, |
|
"grad_norm": 5.636983394622803, |
|
"learning_rate": 0.00018240000000000002, |
|
"loss": 2.1412, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.2636729994242948, |
|
"grad_norm": 9.639352798461914, |
|
"learning_rate": 0.00018280000000000003, |
|
"loss": 2.1103, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.26424870466321243, |
|
"grad_norm": 4.263064384460449, |
|
"learning_rate": 0.0001832, |
|
"loss": 2.2493, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.26482440990213013, |
|
"grad_norm": 8.983839988708496, |
|
"learning_rate": 0.00018360000000000002, |
|
"loss": 2.3782, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2654001151410478, |
|
"grad_norm": 9.911988258361816, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 2.2117, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.2659758203799655, |
|
"grad_norm": 8.42939567565918, |
|
"learning_rate": 0.0001844, |
|
"loss": 2.0942, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.2665515256188831, |
|
"grad_norm": 9.866201400756836, |
|
"learning_rate": 0.00018480000000000002, |
|
"loss": 2.2761, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.2671272308578008, |
|
"grad_norm": 4.9825758934021, |
|
"learning_rate": 0.00018520000000000003, |
|
"loss": 2.1351, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.26770293609671847, |
|
"grad_norm": 3.4520153999328613, |
|
"learning_rate": 0.0001856, |
|
"loss": 2.234, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.26827864133563617, |
|
"grad_norm": 6.94691276550293, |
|
"learning_rate": 0.00018600000000000002, |
|
"loss": 2.1368, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.2688543465745538, |
|
"grad_norm": 19.923587799072266, |
|
"learning_rate": 0.00018640000000000003, |
|
"loss": 2.4301, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.2694300518134715, |
|
"grad_norm": 24.741535186767578, |
|
"learning_rate": 0.00018680000000000001, |
|
"loss": 2.256, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.27000575705238916, |
|
"grad_norm": 9.313246726989746, |
|
"learning_rate": 0.00018720000000000002, |
|
"loss": 2.6483, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.27058146229130686, |
|
"grad_norm": 10.217698097229004, |
|
"learning_rate": 0.0001876, |
|
"loss": 1.8293, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2711571675302245, |
|
"grad_norm": 28.85066032409668, |
|
"learning_rate": 0.000188, |
|
"loss": 2.3165, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.2717328727691422, |
|
"grad_norm": 5.764794826507568, |
|
"learning_rate": 0.0001884, |
|
"loss": 2.6914, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.27230857800805985, |
|
"grad_norm": 8.115283966064453, |
|
"learning_rate": 0.0001888, |
|
"loss": 2.5605, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.27288428324697755, |
|
"grad_norm": 11.941910743713379, |
|
"learning_rate": 0.0001892, |
|
"loss": 1.9626, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.2734599884858952, |
|
"grad_norm": 11.117420196533203, |
|
"learning_rate": 0.0001896, |
|
"loss": 2.4614, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2740356937248129, |
|
"grad_norm": 6.908642292022705, |
|
"learning_rate": 0.00019, |
|
"loss": 2.3911, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.27461139896373055, |
|
"grad_norm": 10.433818817138672, |
|
"learning_rate": 0.0001904, |
|
"loss": 2.3747, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.27518710420264825, |
|
"grad_norm": 8.546224594116211, |
|
"learning_rate": 0.0001908, |
|
"loss": 2.2947, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.2757628094415659, |
|
"grad_norm": 5.434266090393066, |
|
"learning_rate": 0.0001912, |
|
"loss": 2.2115, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.2763385146804836, |
|
"grad_norm": 9.27397346496582, |
|
"learning_rate": 0.0001916, |
|
"loss": 2.2165, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.27691421991940124, |
|
"grad_norm": 4.052639484405518, |
|
"learning_rate": 0.000192, |
|
"loss": 2.1148, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.27748992515831894, |
|
"grad_norm": 7.541112422943115, |
|
"learning_rate": 0.00019240000000000001, |
|
"loss": 2.2489, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.27806563039723664, |
|
"grad_norm": 20.005165100097656, |
|
"learning_rate": 0.0001928, |
|
"loss": 2.3552, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.2786413356361543, |
|
"grad_norm": 6.74354362487793, |
|
"learning_rate": 0.0001932, |
|
"loss": 2.0027, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.279217040875072, |
|
"grad_norm": 4.244668960571289, |
|
"learning_rate": 0.00019360000000000002, |
|
"loss": 2.466, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.27979274611398963, |
|
"grad_norm": 32.92999267578125, |
|
"learning_rate": 0.000194, |
|
"loss": 2.0058, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.28036845135290733, |
|
"grad_norm": 5.099974155426025, |
|
"learning_rate": 0.0001944, |
|
"loss": 2.1534, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.280944156591825, |
|
"grad_norm": 8.950968742370605, |
|
"learning_rate": 0.0001948, |
|
"loss": 2.3272, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.2815198618307427, |
|
"grad_norm": 29.126623153686523, |
|
"learning_rate": 0.0001952, |
|
"loss": 2.0942, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.2820955670696603, |
|
"grad_norm": 26.04970932006836, |
|
"learning_rate": 0.0001956, |
|
"loss": 2.3703, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.282671272308578, |
|
"grad_norm": 7.4286370277404785, |
|
"learning_rate": 0.000196, |
|
"loss": 1.8691, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.28324697754749567, |
|
"grad_norm": 6.331235408782959, |
|
"learning_rate": 0.0001964, |
|
"loss": 2.2338, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.28382268278641337, |
|
"grad_norm": 4.98259162902832, |
|
"learning_rate": 0.0001968, |
|
"loss": 1.9059, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.284398388025331, |
|
"grad_norm": 12.111970901489258, |
|
"learning_rate": 0.0001972, |
|
"loss": 2.0567, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.2849740932642487, |
|
"grad_norm": 4.433606147766113, |
|
"learning_rate": 0.0001976, |
|
"loss": 2.061, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.28554979850316636, |
|
"grad_norm": 9.483826637268066, |
|
"learning_rate": 0.00019800000000000002, |
|
"loss": 2.1855, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.28612550374208406, |
|
"grad_norm": 8.829517364501953, |
|
"learning_rate": 0.0001984, |
|
"loss": 2.0813, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.2867012089810017, |
|
"grad_norm": 5.547176361083984, |
|
"learning_rate": 0.0001988, |
|
"loss": 2.1782, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.2872769142199194, |
|
"grad_norm": 13.865377426147461, |
|
"learning_rate": 0.00019920000000000002, |
|
"loss": 1.9131, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.28785261945883706, |
|
"grad_norm": 13.441047668457031, |
|
"learning_rate": 0.0001996, |
|
"loss": 2.1865, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28842832469775476, |
|
"grad_norm": 4.224601745605469, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4949, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.2890040299366724, |
|
"grad_norm": 4.024444580078125, |
|
"learning_rate": 0.000199999709749734, |
|
"loss": 2.2281, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.2895797351755901, |
|
"grad_norm": 6.911625862121582, |
|
"learning_rate": 0.000199998839000808, |
|
"loss": 2.0534, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.29015544041450775, |
|
"grad_norm": 15.578252792358398, |
|
"learning_rate": 0.00019999738775883837, |
|
"loss": 2.1315, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.29073114565342545, |
|
"grad_norm": 14.918317794799805, |
|
"learning_rate": 0.00019999535603318567, |
|
"loss": 2.1605, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2913068508923431, |
|
"grad_norm": 3.6653409004211426, |
|
"learning_rate": 0.0001999927438369545, |
|
"loss": 2.3675, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.2918825561312608, |
|
"grad_norm": 9.457073211669922, |
|
"learning_rate": 0.0001999895511869936, |
|
"loss": 2.2067, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.2924582613701785, |
|
"grad_norm": 16.254053115844727, |
|
"learning_rate": 0.00019998577810389551, |
|
"loss": 1.8262, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.29303396660909614, |
|
"grad_norm": 12.8787260055542, |
|
"learning_rate": 0.00019998142461199664, |
|
"loss": 2.1758, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.29360967184801384, |
|
"grad_norm": 7.122046947479248, |
|
"learning_rate": 0.00019997649073937707, |
|
"loss": 2.1842, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2941853770869315, |
|
"grad_norm": 7.713693618774414, |
|
"learning_rate": 0.00019997097651786033, |
|
"loss": 2.1556, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.2947610823258492, |
|
"grad_norm": 5.447865962982178, |
|
"learning_rate": 0.00019996488198301314, |
|
"loss": 2.2058, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.29533678756476683, |
|
"grad_norm": 10.775145530700684, |
|
"learning_rate": 0.0001999582071741453, |
|
"loss": 2.365, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.29591249280368453, |
|
"grad_norm": 15.842108726501465, |
|
"learning_rate": 0.00019995095213430937, |
|
"loss": 2.1598, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.2964881980426022, |
|
"grad_norm": 27.204334259033203, |
|
"learning_rate": 0.00019994311691030038, |
|
"loss": 2.3135, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2970639032815199, |
|
"grad_norm": 17.095380783081055, |
|
"learning_rate": 0.0001999347015526556, |
|
"loss": 2.1369, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.2976396085204375, |
|
"grad_norm": 5.58231258392334, |
|
"learning_rate": 0.0001999257061156541, |
|
"loss": 2.1524, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.2982153137593552, |
|
"grad_norm": 16.57658576965332, |
|
"learning_rate": 0.00019991613065731652, |
|
"loss": 2.2354, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.2987910189982729, |
|
"grad_norm": 10.48273754119873, |
|
"learning_rate": 0.00019990597523940467, |
|
"loss": 1.9177, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.2993667242371906, |
|
"grad_norm": 11.657440185546875, |
|
"learning_rate": 0.00019989523992742096, |
|
"loss": 2.1773, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2999424294761082, |
|
"grad_norm": 8.428756713867188, |
|
"learning_rate": 0.00019988392479060828, |
|
"loss": 2.0253, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3005181347150259, |
|
"grad_norm": 23.292964935302734, |
|
"learning_rate": 0.00019987202990194938, |
|
"loss": 2.1664, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.30109383995394357, |
|
"grad_norm": 11.076370239257812, |
|
"learning_rate": 0.00019985955533816623, |
|
"loss": 2.1639, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.30166954519286127, |
|
"grad_norm": 5.960238933563232, |
|
"learning_rate": 0.00019984650117971993, |
|
"loss": 2.28, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3022452504317789, |
|
"grad_norm": 5.293085098266602, |
|
"learning_rate": 0.00019983286751080984, |
|
"loss": 2.0797, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3028209556706966, |
|
"grad_norm": 6.787145614624023, |
|
"learning_rate": 0.00019981865441937326, |
|
"loss": 2.1623, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.30339666090961426, |
|
"grad_norm": 4.942052841186523, |
|
"learning_rate": 0.00019980386199708468, |
|
"loss": 1.9676, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.30397236614853196, |
|
"grad_norm": 3.7597603797912598, |
|
"learning_rate": 0.0001997884903393553, |
|
"loss": 2.0058, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.3045480713874496, |
|
"grad_norm": 4.015875339508057, |
|
"learning_rate": 0.00019977253954533243, |
|
"loss": 2.4905, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.3051237766263673, |
|
"grad_norm": 71.47596740722656, |
|
"learning_rate": 0.00019975600971789873, |
|
"loss": 2.2579, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.30569948186528495, |
|
"grad_norm": 5.182095050811768, |
|
"learning_rate": 0.00019973890096367173, |
|
"loss": 2.0343, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.30627518710420265, |
|
"grad_norm": 11.30048656463623, |
|
"learning_rate": 0.000199721213393003, |
|
"loss": 1.709, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.3068508923431203, |
|
"grad_norm": 7.1390700340271, |
|
"learning_rate": 0.00019970294711997745, |
|
"loss": 1.9582, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.307426597582038, |
|
"grad_norm": 4.473127841949463, |
|
"learning_rate": 0.0001996841022624127, |
|
"loss": 2.0572, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3080023028209557, |
|
"grad_norm": 15.681950569152832, |
|
"learning_rate": 0.00019966467894185812, |
|
"loss": 2.0069, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.30857800805987334, |
|
"grad_norm": 3.4989001750946045, |
|
"learning_rate": 0.0001996446772835943, |
|
"loss": 2.3378, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.30915371329879104, |
|
"grad_norm": 6.156066417694092, |
|
"learning_rate": 0.00019962409741663202, |
|
"loss": 2.1491, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.3097294185377087, |
|
"grad_norm": 3.5091023445129395, |
|
"learning_rate": 0.00019960293947371153, |
|
"loss": 2.2707, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.3103051237766264, |
|
"grad_norm": 11.965502738952637, |
|
"learning_rate": 0.00019958120359130178, |
|
"loss": 1.9268, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.31088082901554404, |
|
"grad_norm": 23.450349807739258, |
|
"learning_rate": 0.0001995588899095992, |
|
"loss": 1.7938, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.31145653425446174, |
|
"grad_norm": 38.560482025146484, |
|
"learning_rate": 0.00019953599857252733, |
|
"loss": 2.2267, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.3120322394933794, |
|
"grad_norm": 15.327620506286621, |
|
"learning_rate": 0.00019951252972773525, |
|
"loss": 2.3168, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3126079447322971, |
|
"grad_norm": 4.420673370361328, |
|
"learning_rate": 0.0001994884835265973, |
|
"loss": 2.0744, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.31318364997121473, |
|
"grad_norm": 19.81424903869629, |
|
"learning_rate": 0.00019946386012421153, |
|
"loss": 1.9736, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.31375935521013243, |
|
"grad_norm": 4.647876739501953, |
|
"learning_rate": 0.00019943865967939908, |
|
"loss": 2.1716, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3143350604490501, |
|
"grad_norm": 5.088565349578857, |
|
"learning_rate": 0.00019941288235470291, |
|
"loss": 1.9915, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.3149107656879678, |
|
"grad_norm": 6.194237232208252, |
|
"learning_rate": 0.00019938652831638697, |
|
"loss": 1.9701, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.3154864709268854, |
|
"grad_norm": 5.4519429206848145, |
|
"learning_rate": 0.00019935959773443497, |
|
"loss": 2.2597, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.3160621761658031, |
|
"grad_norm": 7.437872409820557, |
|
"learning_rate": 0.0001993320907825493, |
|
"loss": 2.3016, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.31663788140472077, |
|
"grad_norm": 4.233456134796143, |
|
"learning_rate": 0.00019930400763814993, |
|
"loss": 1.8935, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.31721358664363847, |
|
"grad_norm": 5.772792339324951, |
|
"learning_rate": 0.00019927534848237336, |
|
"loss": 1.6373, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.3177892918825561, |
|
"grad_norm": 7.545225143432617, |
|
"learning_rate": 0.0001992461135000713, |
|
"loss": 1.9868, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.3183649971214738, |
|
"grad_norm": 5.72635555267334, |
|
"learning_rate": 0.00019921630287980956, |
|
"loss": 1.7728, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.31894070236039146, |
|
"grad_norm": 5.739555358886719, |
|
"learning_rate": 0.0001991859168138668, |
|
"loss": 1.8478, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.31951640759930916, |
|
"grad_norm": 3.295530319213867, |
|
"learning_rate": 0.0001991549554982333, |
|
"loss": 2.1454, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.3200921128382268, |
|
"grad_norm": 10.391168594360352, |
|
"learning_rate": 0.0001991234191326098, |
|
"loss": 2.2763, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.3206678180771445, |
|
"grad_norm": 14.846756935119629, |
|
"learning_rate": 0.00019909130792040598, |
|
"loss": 1.9783, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.32124352331606215, |
|
"grad_norm": 4.79947566986084, |
|
"learning_rate": 0.0001990586220687394, |
|
"loss": 2.1489, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.32181922855497985, |
|
"grad_norm": 4.786315441131592, |
|
"learning_rate": 0.00019902536178843395, |
|
"loss": 2.0194, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.3223949337938975, |
|
"grad_norm": 11.64875602722168, |
|
"learning_rate": 0.00019899152729401868, |
|
"loss": 1.8983, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3229706390328152, |
|
"grad_norm": 9.477057456970215, |
|
"learning_rate": 0.00019895711880372628, |
|
"loss": 1.9139, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.3235463442717329, |
|
"grad_norm": 7.5819993019104, |
|
"learning_rate": 0.00019892213653949166, |
|
"loss": 1.843, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.32412204951065055, |
|
"grad_norm": 4.9545207023620605, |
|
"learning_rate": 0.00019888658072695066, |
|
"loss": 2.1052, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.32469775474956825, |
|
"grad_norm": 4.684484958648682, |
|
"learning_rate": 0.0001988504515954385, |
|
"loss": 1.7933, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.3252734599884859, |
|
"grad_norm": 8.41274356842041, |
|
"learning_rate": 0.00019881374937798826, |
|
"loss": 2.0737, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.3258491652274036, |
|
"grad_norm": 20.587425231933594, |
|
"learning_rate": 0.00019877647431132948, |
|
"loss": 1.6823, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.32642487046632124, |
|
"grad_norm": 12.793438911437988, |
|
"learning_rate": 0.00019873862663588658, |
|
"loss": 2.1764, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.32700057570523894, |
|
"grad_norm": 3.9023592472076416, |
|
"learning_rate": 0.00019870020659577725, |
|
"loss": 2.3804, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.3275762809441566, |
|
"grad_norm": 5.434683799743652, |
|
"learning_rate": 0.000198661214438811, |
|
"loss": 2.162, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.3281519861830743, |
|
"grad_norm": 5.3589019775390625, |
|
"learning_rate": 0.00019862165041648744, |
|
"loss": 2.2068, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.32872769142199193, |
|
"grad_norm": 5.979888439178467, |
|
"learning_rate": 0.00019858151478399478, |
|
"loss": 1.9811, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.32930339666090963, |
|
"grad_norm": 10.225967407226562, |
|
"learning_rate": 0.0001985408078002081, |
|
"loss": 1.7766, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.3298791018998273, |
|
"grad_norm": 7.599292278289795, |
|
"learning_rate": 0.00019849952972768767, |
|
"loss": 1.851, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.330454807138745, |
|
"grad_norm": 3.455409049987793, |
|
"learning_rate": 0.0001984576808326773, |
|
"loss": 1.9795, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.3310305123776626, |
|
"grad_norm": 4.577173709869385, |
|
"learning_rate": 0.00019841526138510257, |
|
"loss": 2.1139, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3316062176165803, |
|
"grad_norm": 3.0964651107788086, |
|
"learning_rate": 0.00019837227165856922, |
|
"loss": 2.2629, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.33218192285549797, |
|
"grad_norm": 5.796529293060303, |
|
"learning_rate": 0.0001983287119303612, |
|
"loss": 2.1801, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.33275762809441567, |
|
"grad_norm": 13.77210521697998, |
|
"learning_rate": 0.00019828458248143913, |
|
"loss": 1.9382, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 67.12165832519531, |
|
"learning_rate": 0.00019823988359643805, |
|
"loss": 2.0376, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.333909038572251, |
|
"grad_norm": 18.68467140197754, |
|
"learning_rate": 0.00019819461556366615, |
|
"loss": 2.1364, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.33448474381116866, |
|
"grad_norm": 3.4618403911590576, |
|
"learning_rate": 0.00019814877867510244, |
|
"loss": 2.4019, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.33506044905008636, |
|
"grad_norm": 2.982158899307251, |
|
"learning_rate": 0.00019810237322639518, |
|
"loss": 2.2236, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.335636154289004, |
|
"grad_norm": 18.654964447021484, |
|
"learning_rate": 0.00019805539951685974, |
|
"loss": 2.1278, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.3362118595279217, |
|
"grad_norm": 16.564912796020508, |
|
"learning_rate": 0.00019800785784947683, |
|
"loss": 1.8014, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.33678756476683935, |
|
"grad_norm": 31.10694122314453, |
|
"learning_rate": 0.00019795974853089053, |
|
"loss": 1.9206, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.33736327000575705, |
|
"grad_norm": 22.873422622680664, |
|
"learning_rate": 0.00019791107187140618, |
|
"loss": 2.0762, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.3379389752446747, |
|
"grad_norm": 4.631661415100098, |
|
"learning_rate": 0.00019786182818498852, |
|
"loss": 1.9247, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.3385146804835924, |
|
"grad_norm": 8.853531837463379, |
|
"learning_rate": 0.00019781201778925969, |
|
"loss": 1.7691, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.3390903857225101, |
|
"grad_norm": 10.22801685333252, |
|
"learning_rate": 0.00019776164100549694, |
|
"loss": 2.1087, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.33966609096142775, |
|
"grad_norm": 11.78855037689209, |
|
"learning_rate": 0.0001977106981586309, |
|
"loss": 2.2109, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.34024179620034545, |
|
"grad_norm": 9.323369026184082, |
|
"learning_rate": 0.00019765918957724319, |
|
"loss": 2.104, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.3408175014392631, |
|
"grad_norm": 10.116569519042969, |
|
"learning_rate": 0.00019760711559356449, |
|
"loss": 2.0949, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.3413932066781808, |
|
"grad_norm": 9.48695182800293, |
|
"learning_rate": 0.00019755447654347226, |
|
"loss": 2.1322, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.34196891191709844, |
|
"grad_norm": 11.74067497253418, |
|
"learning_rate": 0.00019750127276648872, |
|
"loss": 2.0404, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.34254461715601614, |
|
"grad_norm": 11.347387313842773, |
|
"learning_rate": 0.00019744750460577856, |
|
"loss": 1.6953, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.3431203223949338, |
|
"grad_norm": 3.8460686206817627, |
|
"learning_rate": 0.00019739317240814668, |
|
"loss": 2.1369, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.3436960276338515, |
|
"grad_norm": 3.9272382259368896, |
|
"learning_rate": 0.00019733827652403615, |
|
"loss": 2.1408, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.34427173287276913, |
|
"grad_norm": 6.900027751922607, |
|
"learning_rate": 0.00019728281730752568, |
|
"loss": 2.1793, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.34484743811168683, |
|
"grad_norm": 14.802238464355469, |
|
"learning_rate": 0.00019722679511632757, |
|
"loss": 2.0497, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.3454231433506045, |
|
"grad_norm": 12.431273460388184, |
|
"learning_rate": 0.00019717021031178528, |
|
"loss": 2.1025, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3459988485895222, |
|
"grad_norm": 26.413490295410156, |
|
"learning_rate": 0.00019711306325887116, |
|
"loss": 2.0722, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.3465745538284398, |
|
"grad_norm": 15.21047592163086, |
|
"learning_rate": 0.000197055354326184, |
|
"loss": 1.8285, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.3471502590673575, |
|
"grad_norm": 4.017702579498291, |
|
"learning_rate": 0.0001969970838859468, |
|
"loss": 2.3358, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.34772596430627517, |
|
"grad_norm": 4.556800842285156, |
|
"learning_rate": 0.00019693825231400423, |
|
"loss": 2.1526, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.34830166954519287, |
|
"grad_norm": 8.760571479797363, |
|
"learning_rate": 0.0001968788599898202, |
|
"loss": 2.3013, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.3488773747841105, |
|
"grad_norm": 4.910373210906982, |
|
"learning_rate": 0.0001968189072964757, |
|
"loss": 2.038, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.3494530800230282, |
|
"grad_norm": 14.5215425491333, |
|
"learning_rate": 0.00019675839462066582, |
|
"loss": 2.2494, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.35002878526194586, |
|
"grad_norm": 7.343470573425293, |
|
"learning_rate": 0.00019669732235269775, |
|
"loss": 1.8103, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.35060449050086356, |
|
"grad_norm": 6.75926399230957, |
|
"learning_rate": 0.00019663569088648796, |
|
"loss": 2.0837, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.3511801957397812, |
|
"grad_norm": 4.556105136871338, |
|
"learning_rate": 0.0001965735006195598, |
|
"loss": 2.111, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3517559009786989, |
|
"grad_norm": 7.004334449768066, |
|
"learning_rate": 0.0001965107519530408, |
|
"loss": 2.1351, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.35233160621761656, |
|
"grad_norm": 3.7574639320373535, |
|
"learning_rate": 0.00019644744529166025, |
|
"loss": 1.9899, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.35290731145653426, |
|
"grad_norm": 7.275668144226074, |
|
"learning_rate": 0.0001963835810437465, |
|
"loss": 1.7074, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.3534830166954519, |
|
"grad_norm": 4.05908727645874, |
|
"learning_rate": 0.00019631915962122436, |
|
"loss": 1.7602, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.3540587219343696, |
|
"grad_norm": 3.8614981174468994, |
|
"learning_rate": 0.00019625418143961234, |
|
"loss": 1.878, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.3546344271732873, |
|
"grad_norm": 3.8152644634246826, |
|
"learning_rate": 0.00019618864691802013, |
|
"loss": 2.1187, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.35521013241220495, |
|
"grad_norm": 7.1591668128967285, |
|
"learning_rate": 0.00019612255647914574, |
|
"loss": 1.889, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.35578583765112265, |
|
"grad_norm": 13.686311721801758, |
|
"learning_rate": 0.00019605591054927294, |
|
"loss": 1.8415, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.3563615428900403, |
|
"grad_norm": 4.854591369628906, |
|
"learning_rate": 0.00019598870955826828, |
|
"loss": 2.2113, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.356937248128958, |
|
"grad_norm": 8.912299156188965, |
|
"learning_rate": 0.00019592095393957868, |
|
"loss": 1.7633, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.35751295336787564, |
|
"grad_norm": 3.053098678588867, |
|
"learning_rate": 0.00019585264413022818, |
|
"loss": 1.9866, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.35808865860679334, |
|
"grad_norm": 22.903722763061523, |
|
"learning_rate": 0.0001957837805708155, |
|
"loss": 2.0566, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.358664363845711, |
|
"grad_norm": 2.5533032417297363, |
|
"learning_rate": 0.000195714363705511, |
|
"loss": 2.0484, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.3592400690846287, |
|
"grad_norm": 4.685166358947754, |
|
"learning_rate": 0.00019564439398205388, |
|
"loss": 1.9809, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.35981577432354633, |
|
"grad_norm": 6.896650314331055, |
|
"learning_rate": 0.00019557387185174924, |
|
"loss": 1.9147, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.36039147956246403, |
|
"grad_norm": 3.927499532699585, |
|
"learning_rate": 0.00019550279776946525, |
|
"loss": 1.8356, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.3609671848013817, |
|
"grad_norm": 2.7331018447875977, |
|
"learning_rate": 0.00019543117219363016, |
|
"loss": 1.9191, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.3615428900402994, |
|
"grad_norm": 6.016903400421143, |
|
"learning_rate": 0.0001953589955862294, |
|
"loss": 2.0919, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.362118595279217, |
|
"grad_norm": 5.431999683380127, |
|
"learning_rate": 0.00019528626841280246, |
|
"loss": 1.5794, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.3626943005181347, |
|
"grad_norm": 4.026440620422363, |
|
"learning_rate": 0.00019521299114244004, |
|
"loss": 2.2844, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36327000575705237, |
|
"grad_norm": 7.0267863273620605, |
|
"learning_rate": 0.00019513916424778097, |
|
"loss": 1.8249, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.3638457109959701, |
|
"grad_norm": 4.048052787780762, |
|
"learning_rate": 0.00019506478820500918, |
|
"loss": 2.1139, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.3644214162348877, |
|
"grad_norm": 3.6997714042663574, |
|
"learning_rate": 0.0001949898634938506, |
|
"loss": 1.6588, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.3649971214738054, |
|
"grad_norm": 2.560260772705078, |
|
"learning_rate": 0.00019491439059757002, |
|
"loss": 1.9762, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.36557282671272306, |
|
"grad_norm": 17.04377555847168, |
|
"learning_rate": 0.00019483837000296806, |
|
"loss": 1.7949, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.36614853195164077, |
|
"grad_norm": 2.873385190963745, |
|
"learning_rate": 0.00019476180220037807, |
|
"loss": 1.9637, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.3667242371905584, |
|
"grad_norm": 7.77009391784668, |
|
"learning_rate": 0.00019468468768366276, |
|
"loss": 1.8636, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.3672999424294761, |
|
"grad_norm": 6.612690448760986, |
|
"learning_rate": 0.00019460702695021123, |
|
"loss": 1.734, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.36787564766839376, |
|
"grad_norm": 4.485565185546875, |
|
"learning_rate": 0.0001945288205009357, |
|
"loss": 1.6968, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.36845135290731146, |
|
"grad_norm": 2.9968698024749756, |
|
"learning_rate": 0.0001944500688402682, |
|
"loss": 1.8785, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3690270581462291, |
|
"grad_norm": 4.952812194824219, |
|
"learning_rate": 0.00019437077247615747, |
|
"loss": 1.7285, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.3696027633851468, |
|
"grad_norm": 5.491662502288818, |
|
"learning_rate": 0.00019429093192006543, |
|
"loss": 1.6328, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.3701784686240645, |
|
"grad_norm": 3.8749518394470215, |
|
"learning_rate": 0.00019421054768696422, |
|
"loss": 2.2014, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.37075417386298215, |
|
"grad_norm": 6.7762627601623535, |
|
"learning_rate": 0.0001941296202953326, |
|
"loss": 2.0592, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.37132987910189985, |
|
"grad_norm": 3.307373046875, |
|
"learning_rate": 0.00019404815026715267, |
|
"loss": 2.3178, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.3719055843408175, |
|
"grad_norm": 3.4404947757720947, |
|
"learning_rate": 0.00019396613812790666, |
|
"loss": 2.3772, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.3724812895797352, |
|
"grad_norm": 4.453017711639404, |
|
"learning_rate": 0.00019388358440657332, |
|
"loss": 1.8855, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.37305699481865284, |
|
"grad_norm": 3.175896406173706, |
|
"learning_rate": 0.00019380048963562466, |
|
"loss": 1.8799, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.37363270005757054, |
|
"grad_norm": 22.680187225341797, |
|
"learning_rate": 0.0001937168543510224, |
|
"loss": 1.4755, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.3742084052964882, |
|
"grad_norm": 14.353453636169434, |
|
"learning_rate": 0.00019363267909221468, |
|
"loss": 1.9199, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3747841105354059, |
|
"grad_norm": 3.4111273288726807, |
|
"learning_rate": 0.00019354796440213237, |
|
"loss": 2.0271, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.37535981577432354, |
|
"grad_norm": 2.5413737297058105, |
|
"learning_rate": 0.00019346271082718575, |
|
"loss": 2.0859, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.37593552101324124, |
|
"grad_norm": 2.3607568740844727, |
|
"learning_rate": 0.00019337691891726087, |
|
"loss": 2.1843, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.3765112262521589, |
|
"grad_norm": 6.652899265289307, |
|
"learning_rate": 0.00019329058922571608, |
|
"loss": 2.2823, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.3770869314910766, |
|
"grad_norm": 6.970069885253906, |
|
"learning_rate": 0.00019320372230937835, |
|
"loss": 2.0684, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3776626367299942, |
|
"grad_norm": 6.668658256530762, |
|
"learning_rate": 0.00019311631872853983, |
|
"loss": 1.4474, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.37823834196891193, |
|
"grad_norm": 16.344221115112305, |
|
"learning_rate": 0.00019302837904695418, |
|
"loss": 1.958, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.3788140472078296, |
|
"grad_norm": 11.7747163772583, |
|
"learning_rate": 0.00019293990383183277, |
|
"loss": 1.9691, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.3793897524467473, |
|
"grad_norm": 3.1886119842529297, |
|
"learning_rate": 0.00019285089365384138, |
|
"loss": 2.0175, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.3799654576856649, |
|
"grad_norm": 7.979596138000488, |
|
"learning_rate": 0.00019276134908709607, |
|
"loss": 1.8705, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3805411629245826, |
|
"grad_norm": 3.5675251483917236, |
|
"learning_rate": 0.0001926712707091599, |
|
"loss": 1.754, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.38111686816350027, |
|
"grad_norm": 2.696560859680176, |
|
"learning_rate": 0.00019258065910103886, |
|
"loss": 2.0815, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.38169257340241797, |
|
"grad_norm": 3.2219901084899902, |
|
"learning_rate": 0.0001924895148471785, |
|
"loss": 1.9866, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.3822682786413356, |
|
"grad_norm": 4.226520538330078, |
|
"learning_rate": 0.00019239783853545962, |
|
"loss": 1.848, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.3828439838802533, |
|
"grad_norm": 3.089921474456787, |
|
"learning_rate": 0.00019230563075719513, |
|
"loss": 2.3039, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.38341968911917096, |
|
"grad_norm": 3.959472179412842, |
|
"learning_rate": 0.00019221289210712562, |
|
"loss": 1.902, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.38399539435808866, |
|
"grad_norm": 9.597257614135742, |
|
"learning_rate": 0.000192119623183416, |
|
"loss": 1.9769, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.3845710995970063, |
|
"grad_norm": 15.12406063079834, |
|
"learning_rate": 0.00019202582458765138, |
|
"loss": 1.985, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.385146804835924, |
|
"grad_norm": 3.456636428833008, |
|
"learning_rate": 0.00019193149692483326, |
|
"loss": 1.6099, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.3857225100748417, |
|
"grad_norm": 2.650001287460327, |
|
"learning_rate": 0.00019183664080337556, |
|
"loss": 2.4192, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.38629821531375935, |
|
"grad_norm": 2.6241097450256348, |
|
"learning_rate": 0.00019174125683510092, |
|
"loss": 2.3614, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.38687392055267705, |
|
"grad_norm": 5.434971332550049, |
|
"learning_rate": 0.00019164534563523641, |
|
"loss": 1.782, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.3874496257915947, |
|
"grad_norm": 2.1434805393218994, |
|
"learning_rate": 0.0001915489078224099, |
|
"loss": 2.0285, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.3880253310305124, |
|
"grad_norm": 3.9473743438720703, |
|
"learning_rate": 0.00019145194401864581, |
|
"loss": 2.137, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.38860103626943004, |
|
"grad_norm": 3.913604259490967, |
|
"learning_rate": 0.00019135445484936127, |
|
"loss": 1.7514, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.38917674150834775, |
|
"grad_norm": 4.43229341506958, |
|
"learning_rate": 0.000191256440943362, |
|
"loss": 1.6698, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.3897524467472654, |
|
"grad_norm": 3.1151657104492188, |
|
"learning_rate": 0.00019115790293283827, |
|
"loss": 2.2421, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.3903281519861831, |
|
"grad_norm": 3.7743000984191895, |
|
"learning_rate": 0.00019105884145336085, |
|
"loss": 1.8634, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.39090385722510074, |
|
"grad_norm": 2.8315064907073975, |
|
"learning_rate": 0.00019095925714387682, |
|
"loss": 1.6003, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.39147956246401844, |
|
"grad_norm": 13.85084342956543, |
|
"learning_rate": 0.00019085915064670557, |
|
"loss": 1.9885, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3920552677029361, |
|
"grad_norm": 8.601191520690918, |
|
"learning_rate": 0.00019075852260753463, |
|
"loss": 1.8575, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.3926309729418538, |
|
"grad_norm": 2.420215606689453, |
|
"learning_rate": 0.00019065737367541545, |
|
"loss": 2.0188, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.39320667818077143, |
|
"grad_norm": 9.427199363708496, |
|
"learning_rate": 0.0001905557045027592, |
|
"loss": 1.8087, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.39378238341968913, |
|
"grad_norm": 2.8770227432250977, |
|
"learning_rate": 0.00019045351574533274, |
|
"loss": 1.9231, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.3943580886586068, |
|
"grad_norm": 5.514732837677002, |
|
"learning_rate": 0.00019035080806225404, |
|
"loss": 1.8629, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3949337938975245, |
|
"grad_norm": 5.505938529968262, |
|
"learning_rate": 0.00019024758211598833, |
|
"loss": 2.0178, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.3955094991364421, |
|
"grad_norm": 4.23834753036499, |
|
"learning_rate": 0.00019014383857234355, |
|
"loss": 1.5748, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.3960852043753598, |
|
"grad_norm": 4.225786209106445, |
|
"learning_rate": 0.00019003957810046615, |
|
"loss": 1.8404, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.39666090961427747, |
|
"grad_norm": 10.373225212097168, |
|
"learning_rate": 0.00018993480137283685, |
|
"loss": 1.9054, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.39723661485319517, |
|
"grad_norm": 5.083978652954102, |
|
"learning_rate": 0.00018982950906526615, |
|
"loss": 1.8938, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3978123200921128, |
|
"grad_norm": 4.478074073791504, |
|
"learning_rate": 0.00018972370185689, |
|
"loss": 1.9073, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.3983880253310305, |
|
"grad_norm": 3.557939052581787, |
|
"learning_rate": 0.00018961738043016556, |
|
"loss": 1.847, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.39896373056994816, |
|
"grad_norm": 4.705951690673828, |
|
"learning_rate": 0.00018951054547086666, |
|
"loss": 1.7451, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.39953943580886586, |
|
"grad_norm": 2.8145923614501953, |
|
"learning_rate": 0.00018940319766807943, |
|
"loss": 2.054, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.40011514104778356, |
|
"grad_norm": 10.403020858764648, |
|
"learning_rate": 0.00018929533771419783, |
|
"loss": 1.8062, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.4006908462867012, |
|
"grad_norm": 5.947159290313721, |
|
"learning_rate": 0.00018918696630491915, |
|
"loss": 1.8459, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.4012665515256189, |
|
"grad_norm": 3.2348601818084717, |
|
"learning_rate": 0.00018907808413923968, |
|
"loss": 1.815, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.40184225676453655, |
|
"grad_norm": 8.500833511352539, |
|
"learning_rate": 0.00018896869191945, |
|
"loss": 1.535, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.40241796200345425, |
|
"grad_norm": 5.008488178253174, |
|
"learning_rate": 0.0001888587903511306, |
|
"loss": 2.3482, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.4029936672423719, |
|
"grad_norm": 2.739643096923828, |
|
"learning_rate": 0.00018874838014314724, |
|
"loss": 2.284, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4035693724812896, |
|
"grad_norm": 10.596121788024902, |
|
"learning_rate": 0.0001886374620076464, |
|
"loss": 1.6078, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.40414507772020725, |
|
"grad_norm": 2.6627824306488037, |
|
"learning_rate": 0.00018852603666005073, |
|
"loss": 1.7632, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.40472078295912495, |
|
"grad_norm": 4.91156530380249, |
|
"learning_rate": 0.00018841410481905434, |
|
"loss": 1.8726, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.4052964881980426, |
|
"grad_norm": 3.1350510120391846, |
|
"learning_rate": 0.0001883016672066183, |
|
"loss": 2.0387, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.4058721934369603, |
|
"grad_norm": 16.92197036743164, |
|
"learning_rate": 0.0001881887245479659, |
|
"loss": 2.1191, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.40644789867587794, |
|
"grad_norm": 3.093526840209961, |
|
"learning_rate": 0.00018807527757157787, |
|
"loss": 2.0547, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.40702360391479564, |
|
"grad_norm": 4.2745680809021, |
|
"learning_rate": 0.00018796132700918793, |
|
"loss": 1.8785, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.4075993091537133, |
|
"grad_norm": 3.898991107940674, |
|
"learning_rate": 0.00018784687359577791, |
|
"loss": 1.6114, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.408175014392631, |
|
"grad_norm": 2.8433330059051514, |
|
"learning_rate": 0.00018773191806957298, |
|
"loss": 2.0685, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.40875071963154863, |
|
"grad_norm": 16.428258895874023, |
|
"learning_rate": 0.00018761646117203696, |
|
"loss": 1.8752, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.40932642487046633, |
|
"grad_norm": 4.984866142272949, |
|
"learning_rate": 0.0001875005036478675, |
|
"loss": 2.0355, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.409902130109384, |
|
"grad_norm": 3.2980496883392334, |
|
"learning_rate": 0.00018738404624499136, |
|
"loss": 1.8788, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.4104778353483017, |
|
"grad_norm": 2.58404541015625, |
|
"learning_rate": 0.00018726708971455945, |
|
"loss": 2.0856, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.4110535405872193, |
|
"grad_norm": 22.611204147338867, |
|
"learning_rate": 0.00018714963481094207, |
|
"loss": 1.7451, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.411629245826137, |
|
"grad_norm": 3.256192445755005, |
|
"learning_rate": 0.0001870316822917241, |
|
"loss": 1.7017, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.41220495106505467, |
|
"grad_norm": 3.7287187576293945, |
|
"learning_rate": 0.00018691323291769992, |
|
"loss": 1.5585, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.41278065630397237, |
|
"grad_norm": 4.698144912719727, |
|
"learning_rate": 0.00018679428745286872, |
|
"loss": 1.8391, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.41335636154289, |
|
"grad_norm": 8.514996528625488, |
|
"learning_rate": 0.00018667484666442944, |
|
"loss": 1.7789, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.4139320667818077, |
|
"grad_norm": 7.140060901641846, |
|
"learning_rate": 0.00018655491132277589, |
|
"loss": 1.894, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.41450777202072536, |
|
"grad_norm": 3.8144354820251465, |
|
"learning_rate": 0.00018643448220149173, |
|
"loss": 1.7206, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.41508347725964306, |
|
"grad_norm": 18.383852005004883, |
|
"learning_rate": 0.0001863135600773455, |
|
"loss": 1.9508, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.41565918249856076, |
|
"grad_norm": 4.58454704284668, |
|
"learning_rate": 0.00018619214573028562, |
|
"loss": 1.9023, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.4162348877374784, |
|
"grad_norm": 2.33950138092041, |
|
"learning_rate": 0.00018607023994343533, |
|
"loss": 1.9343, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.4168105929763961, |
|
"grad_norm": 3.2266829013824463, |
|
"learning_rate": 0.0001859478435030877, |
|
"loss": 2.0175, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.41738629821531376, |
|
"grad_norm": 2.7907965183258057, |
|
"learning_rate": 0.00018582495719870047, |
|
"loss": 1.9857, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.41796200345423146, |
|
"grad_norm": 9.727225303649902, |
|
"learning_rate": 0.00018570158182289103, |
|
"loss": 1.7811, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.4185377086931491, |
|
"grad_norm": 8.029142379760742, |
|
"learning_rate": 0.00018557771817143132, |
|
"loss": 2.1232, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.4191134139320668, |
|
"grad_norm": 17.49789810180664, |
|
"learning_rate": 0.0001854533670432426, |
|
"loss": 1.8807, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.41968911917098445, |
|
"grad_norm": 2.434882402420044, |
|
"learning_rate": 0.00018532852924039035, |
|
"loss": 1.839, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.42026482440990215, |
|
"grad_norm": 4.167110919952393, |
|
"learning_rate": 0.0001852032055680792, |
|
"loss": 1.803, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4208405296488198, |
|
"grad_norm": 2.675874948501587, |
|
"learning_rate": 0.00018507739683464752, |
|
"loss": 2.3544, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.4214162348877375, |
|
"grad_norm": 3.3444297313690186, |
|
"learning_rate": 0.00018495110385156237, |
|
"loss": 2.0448, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.42199194012665514, |
|
"grad_norm": 4.82088041305542, |
|
"learning_rate": 0.00018482432743341433, |
|
"loss": 2.0651, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.42256764536557284, |
|
"grad_norm": 2.519653081893921, |
|
"learning_rate": 0.000184697068397912, |
|
"loss": 2.0949, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.4231433506044905, |
|
"grad_norm": 5.350189685821533, |
|
"learning_rate": 0.0001845693275658769, |
|
"loss": 1.7065, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.4237190558434082, |
|
"grad_norm": 3.690128803253174, |
|
"learning_rate": 0.00018444110576123812, |
|
"loss": 1.6012, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.42429476108232583, |
|
"grad_norm": 3.2592711448669434, |
|
"learning_rate": 0.00018431240381102713, |
|
"loss": 1.956, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.42487046632124353, |
|
"grad_norm": 8.19218921661377, |
|
"learning_rate": 0.0001841832225453722, |
|
"loss": 1.6416, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.4254461715601612, |
|
"grad_norm": 2.3770790100097656, |
|
"learning_rate": 0.0001840535627974933, |
|
"loss": 2.1405, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.4260218767990789, |
|
"grad_norm": 4.077558994293213, |
|
"learning_rate": 0.00018392342540369657, |
|
"loss": 1.5653, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4265975820379965, |
|
"grad_norm": 2.6370084285736084, |
|
"learning_rate": 0.00018379281120336897, |
|
"loss": 1.768, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.4271732872769142, |
|
"grad_norm": 2.450295925140381, |
|
"learning_rate": 0.00018366172103897283, |
|
"loss": 1.7358, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.42774899251583187, |
|
"grad_norm": 5.39324951171875, |
|
"learning_rate": 0.00018353015575604052, |
|
"loss": 2.0319, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.4283246977547496, |
|
"grad_norm": 2.238994836807251, |
|
"learning_rate": 0.0001833981162031689, |
|
"loss": 2.1476, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.4289004029936672, |
|
"grad_norm": 2.7961604595184326, |
|
"learning_rate": 0.00018326560323201382, |
|
"loss": 1.937, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.4294761082325849, |
|
"grad_norm": 3.0251359939575195, |
|
"learning_rate": 0.00018313261769728478, |
|
"loss": 1.387, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.43005181347150256, |
|
"grad_norm": 12.313629150390625, |
|
"learning_rate": 0.00018299916045673922, |
|
"loss": 1.858, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.43062751871042027, |
|
"grad_norm": 2.7766458988189697, |
|
"learning_rate": 0.00018286523237117717, |
|
"loss": 1.7907, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.43120322394933797, |
|
"grad_norm": 4.888232707977295, |
|
"learning_rate": 0.00018273083430443555, |
|
"loss": 2.0781, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.4317789291882556, |
|
"grad_norm": 3.5016186237335205, |
|
"learning_rate": 0.00018259596712338268, |
|
"loss": 1.9422, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4323546344271733, |
|
"grad_norm": 2.471903085708618, |
|
"learning_rate": 0.00018246063169791269, |
|
"loss": 1.4743, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.43293033966609096, |
|
"grad_norm": 5.355947494506836, |
|
"learning_rate": 0.0001823248289009399, |
|
"loss": 1.7892, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.43350604490500866, |
|
"grad_norm": 30.411474227905273, |
|
"learning_rate": 0.00018218855960839308, |
|
"loss": 2.1997, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.4340817501439263, |
|
"grad_norm": 22.769906997680664, |
|
"learning_rate": 0.00018205182469921001, |
|
"loss": 2.2114, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.434657455382844, |
|
"grad_norm": 2.85595703125, |
|
"learning_rate": 0.00018191462505533172, |
|
"loss": 1.6781, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.43523316062176165, |
|
"grad_norm": 13.515556335449219, |
|
"learning_rate": 0.00018177696156169664, |
|
"loss": 1.8627, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.43580886586067935, |
|
"grad_norm": 2.449556589126587, |
|
"learning_rate": 0.00018163883510623514, |
|
"loss": 1.9787, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.436384571099597, |
|
"grad_norm": 3.347914457321167, |
|
"learning_rate": 0.00018150024657986373, |
|
"loss": 1.6179, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.4369602763385147, |
|
"grad_norm": 2.167959451675415, |
|
"learning_rate": 0.00018136119687647912, |
|
"loss": 2.2641, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.43753598157743234, |
|
"grad_norm": 1.9680399894714355, |
|
"learning_rate": 0.00018122168689295283, |
|
"loss": 2.1557, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.43811168681635004, |
|
"grad_norm": 1.9507166147232056, |
|
"learning_rate": 0.000181081717529125, |
|
"loss": 1.9336, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.4386873920552677, |
|
"grad_norm": 10.30239200592041, |
|
"learning_rate": 0.0001809412896877989, |
|
"loss": 1.716, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.4392630972941854, |
|
"grad_norm": 17.331459045410156, |
|
"learning_rate": 0.0001808004042747349, |
|
"loss": 1.7709, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.43983880253310303, |
|
"grad_norm": 8.791406631469727, |
|
"learning_rate": 0.00018065906219864476, |
|
"loss": 1.8645, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.44041450777202074, |
|
"grad_norm": 14.691398620605469, |
|
"learning_rate": 0.0001805172643711857, |
|
"loss": 1.732, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4409902130109384, |
|
"grad_norm": 8.738691329956055, |
|
"learning_rate": 0.00018037501170695459, |
|
"loss": 1.8119, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.4415659182498561, |
|
"grad_norm": 3.6133551597595215, |
|
"learning_rate": 0.00018023230512348193, |
|
"loss": 2.2517, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.4421416234887737, |
|
"grad_norm": 4.219852447509766, |
|
"learning_rate": 0.00018008914554122597, |
|
"loss": 1.9683, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.44271732872769143, |
|
"grad_norm": 6.014896869659424, |
|
"learning_rate": 0.00017994553388356695, |
|
"loss": 1.6415, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.4432930339666091, |
|
"grad_norm": 5.801370620727539, |
|
"learning_rate": 0.00017980147107680083, |
|
"loss": 1.8638, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4438687392055268, |
|
"grad_norm": 3.9522225856781006, |
|
"learning_rate": 0.00017965695805013365, |
|
"loss": 2.1045, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 2.9866466522216797, |
|
"learning_rate": 0.00017951199573567524, |
|
"loss": 1.9505, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.4450201496833621, |
|
"grad_norm": 3.0734667778015137, |
|
"learning_rate": 0.00017936658506843335, |
|
"loss": 2.2168, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.44559585492227977, |
|
"grad_norm": 3.1300957202911377, |
|
"learning_rate": 0.00017922072698630772, |
|
"loss": 1.8417, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.44617156016119747, |
|
"grad_norm": 2.1265501976013184, |
|
"learning_rate": 0.00017907442243008382, |
|
"loss": 2.0039, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.44674726540011517, |
|
"grad_norm": 2.7996795177459717, |
|
"learning_rate": 0.00017892767234342684, |
|
"loss": 1.8003, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.4473229706390328, |
|
"grad_norm": 6.149320602416992, |
|
"learning_rate": 0.00017878047767287577, |
|
"loss": 1.7551, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.4478986758779505, |
|
"grad_norm": 2.3379878997802734, |
|
"learning_rate": 0.00017863283936783708, |
|
"loss": 1.9938, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.44847438111686816, |
|
"grad_norm": 3.4820477962493896, |
|
"learning_rate": 0.00017848475838057873, |
|
"loss": 1.5153, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.44905008635578586, |
|
"grad_norm": 3.582059860229492, |
|
"learning_rate": 0.00017833623566622397, |
|
"loss": 1.9166, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4496257915947035, |
|
"grad_norm": 3.9701995849609375, |
|
"learning_rate": 0.00017818727218274513, |
|
"loss": 1.4762, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.4502014968336212, |
|
"grad_norm": 2.0633654594421387, |
|
"learning_rate": 0.00017803786889095764, |
|
"loss": 2.1013, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.45077720207253885, |
|
"grad_norm": 2.459294080734253, |
|
"learning_rate": 0.00017788802675451352, |
|
"loss": 1.8575, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.45135290731145655, |
|
"grad_norm": 5.101840496063232, |
|
"learning_rate": 0.00017773774673989553, |
|
"loss": 1.4693, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.4519286125503742, |
|
"grad_norm": 2.827829360961914, |
|
"learning_rate": 0.0001775870298164106, |
|
"loss": 2.1377, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.4525043177892919, |
|
"grad_norm": 2.8199522495269775, |
|
"learning_rate": 0.0001774358769561838, |
|
"loss": 1.9839, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.45308002302820954, |
|
"grad_norm": 3.852107286453247, |
|
"learning_rate": 0.00017728428913415192, |
|
"loss": 1.7629, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.45365572826712725, |
|
"grad_norm": 5.424905776977539, |
|
"learning_rate": 0.00017713226732805738, |
|
"loss": 1.8626, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.4542314335060449, |
|
"grad_norm": 8.507397651672363, |
|
"learning_rate": 0.0001769798125184417, |
|
"loss": 2.0023, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.4548071387449626, |
|
"grad_norm": 2.348231792449951, |
|
"learning_rate": 0.00017682692568863926, |
|
"loss": 1.8687, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.45538284398388024, |
|
"grad_norm": 16.692827224731445, |
|
"learning_rate": 0.00017667360782477106, |
|
"loss": 2.0102, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.45595854922279794, |
|
"grad_norm": 3.0447139739990234, |
|
"learning_rate": 0.00017651985991573826, |
|
"loss": 1.867, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.4565342544617156, |
|
"grad_norm": 2.9347379207611084, |
|
"learning_rate": 0.00017636568295321573, |
|
"loss": 1.6961, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.4571099597006333, |
|
"grad_norm": 6.137345790863037, |
|
"learning_rate": 0.00017621107793164582, |
|
"loss": 1.5024, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.45768566493955093, |
|
"grad_norm": 1.9799631834030151, |
|
"learning_rate": 0.0001760560458482318, |
|
"loss": 1.9756, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.45826137017846863, |
|
"grad_norm": 10.146332740783691, |
|
"learning_rate": 0.00017590058770293156, |
|
"loss": 1.8236, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.4588370754173863, |
|
"grad_norm": 2.7647223472595215, |
|
"learning_rate": 0.00017574470449845103, |
|
"loss": 1.9874, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.459412780656304, |
|
"grad_norm": 2.6900711059570312, |
|
"learning_rate": 0.00017558839724023781, |
|
"loss": 1.9816, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.4599884858952216, |
|
"grad_norm": 2.0507872104644775, |
|
"learning_rate": 0.00017543166693647467, |
|
"loss": 2.1267, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.4605641911341393, |
|
"grad_norm": 5.527276515960693, |
|
"learning_rate": 0.00017527451459807292, |
|
"loss": 1.9468, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46113989637305697, |
|
"grad_norm": 8.115386009216309, |
|
"learning_rate": 0.00017511694123866615, |
|
"loss": 1.6804, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.46171560161197467, |
|
"grad_norm": 2.108691692352295, |
|
"learning_rate": 0.0001749589478746034, |
|
"loss": 1.5739, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.46229130685089237, |
|
"grad_norm": 15.620509147644043, |
|
"learning_rate": 0.00017480053552494288, |
|
"loss": 1.7336, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.46286701208981, |
|
"grad_norm": 3.442645788192749, |
|
"learning_rate": 0.00017464170521144508, |
|
"loss": 1.7112, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.4634427173287277, |
|
"grad_norm": 7.99544095993042, |
|
"learning_rate": 0.0001744824579585665, |
|
"loss": 1.447, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46401842256764536, |
|
"grad_norm": 7.0831217765808105, |
|
"learning_rate": 0.0001743227947934529, |
|
"loss": 1.8631, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.46459412780656306, |
|
"grad_norm": 6.996182441711426, |
|
"learning_rate": 0.0001741627167459326, |
|
"loss": 1.6913, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.4651698330454807, |
|
"grad_norm": 2.5042760372161865, |
|
"learning_rate": 0.00017400222484851001, |
|
"loss": 1.8691, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.4657455382843984, |
|
"grad_norm": 3.7604565620422363, |
|
"learning_rate": 0.00017384132013635874, |
|
"loss": 2.0776, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.46632124352331605, |
|
"grad_norm": 4.427332878112793, |
|
"learning_rate": 0.00017368000364731517, |
|
"loss": 1.879, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.46689694876223375, |
|
"grad_norm": 3.7155957221984863, |
|
"learning_rate": 0.0001735182764218716, |
|
"loss": 1.9187, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.4674726540011514, |
|
"grad_norm": 3.7291319370269775, |
|
"learning_rate": 0.00017335613950316962, |
|
"loss": 1.5446, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.4680483592400691, |
|
"grad_norm": 5.578220367431641, |
|
"learning_rate": 0.0001731935939369933, |
|
"loss": 1.9119, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.46862406447898675, |
|
"grad_norm": 3.377821207046509, |
|
"learning_rate": 0.00017303064077176246, |
|
"loss": 2.0695, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.46919976971790445, |
|
"grad_norm": 2.6164028644561768, |
|
"learning_rate": 0.000172867281058526, |
|
"loss": 1.8049, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.4697754749568221, |
|
"grad_norm": 13.39267635345459, |
|
"learning_rate": 0.00017270351585095507, |
|
"loss": 1.45, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.4703511801957398, |
|
"grad_norm": 2.8776485919952393, |
|
"learning_rate": 0.00017253934620533625, |
|
"loss": 1.9204, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.47092688543465744, |
|
"grad_norm": 3.7855734825134277, |
|
"learning_rate": 0.00017237477318056462, |
|
"loss": 1.2213, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.47150259067357514, |
|
"grad_norm": 2.4604861736297607, |
|
"learning_rate": 0.00017220979783813724, |
|
"loss": 1.6834, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.4720782959124928, |
|
"grad_norm": 3.9738218784332275, |
|
"learning_rate": 0.00017204442124214603, |
|
"loss": 2.0102, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4726540011514105, |
|
"grad_norm": 53.47602462768555, |
|
"learning_rate": 0.00017187864445927103, |
|
"loss": 1.9759, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.47322970639032813, |
|
"grad_norm": 9.791850090026855, |
|
"learning_rate": 0.0001717124685587734, |
|
"loss": 1.5549, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.47380541162924583, |
|
"grad_norm": 3.8684937953948975, |
|
"learning_rate": 0.00017154589461248877, |
|
"loss": 1.5908, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.4743811168681635, |
|
"grad_norm": 24.494279861450195, |
|
"learning_rate": 0.00017137892369482004, |
|
"loss": 1.6402, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.4749568221070812, |
|
"grad_norm": 3.5959551334381104, |
|
"learning_rate": 0.00017121155688273057, |
|
"loss": 1.6951, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4755325273459988, |
|
"grad_norm": 5.078423976898193, |
|
"learning_rate": 0.00017104379525573738, |
|
"loss": 1.6469, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.4761082325849165, |
|
"grad_norm": 2.160147190093994, |
|
"learning_rate": 0.00017087563989590386, |
|
"loss": 1.756, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.47668393782383417, |
|
"grad_norm": 3.555968999862671, |
|
"learning_rate": 0.00017070709188783318, |
|
"loss": 1.9666, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.47725964306275187, |
|
"grad_norm": 2.406215190887451, |
|
"learning_rate": 0.00017053815231866088, |
|
"loss": 2.0264, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.47783534830166957, |
|
"grad_norm": 3.1693837642669678, |
|
"learning_rate": 0.00017036882227804826, |
|
"loss": 1.6301, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4784110535405872, |
|
"grad_norm": 2.4648032188415527, |
|
"learning_rate": 0.00017019910285817505, |
|
"loss": 2.1071, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.4789867587795049, |
|
"grad_norm": 2.020519733428955, |
|
"learning_rate": 0.00017002899515373252, |
|
"loss": 2.0168, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.47956246401842256, |
|
"grad_norm": 8.258744239807129, |
|
"learning_rate": 0.00016985850026191634, |
|
"loss": 1.738, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.48013816925734026, |
|
"grad_norm": 2.6535940170288086, |
|
"learning_rate": 0.0001696876192824196, |
|
"loss": 2.0153, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.4807138744962579, |
|
"grad_norm": 2.494950294494629, |
|
"learning_rate": 0.00016951635331742564, |
|
"loss": 1.8319, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.4812895797351756, |
|
"grad_norm": 13.977831840515137, |
|
"learning_rate": 0.0001693447034716009, |
|
"loss": 1.6622, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.48186528497409326, |
|
"grad_norm": 5.264831066131592, |
|
"learning_rate": 0.00016917267085208798, |
|
"loss": 1.7066, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.48244099021301096, |
|
"grad_norm": 3.8573200702667236, |
|
"learning_rate": 0.0001690002565684982, |
|
"loss": 1.7634, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.4830166954519286, |
|
"grad_norm": 4.011209487915039, |
|
"learning_rate": 0.0001688274617329048, |
|
"loss": 1.6426, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.4835924006908463, |
|
"grad_norm": 4.699135780334473, |
|
"learning_rate": 0.00016865428745983538, |
|
"loss": 1.9267, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48416810592976395, |
|
"grad_norm": 4.957156181335449, |
|
"learning_rate": 0.0001684807348662651, |
|
"loss": 1.8474, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.48474381116868165, |
|
"grad_norm": 2.934382677078247, |
|
"learning_rate": 0.00016830680507160924, |
|
"loss": 1.7862, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.4853195164075993, |
|
"grad_norm": 2.630108594894409, |
|
"learning_rate": 0.00016813249919771592, |
|
"loss": 1.8025, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.485895221646517, |
|
"grad_norm": 3.7840278148651123, |
|
"learning_rate": 0.00016795781836885913, |
|
"loss": 1.5822, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.48647092688543464, |
|
"grad_norm": 4.31719446182251, |
|
"learning_rate": 0.00016778276371173123, |
|
"loss": 1.4343, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.48704663212435234, |
|
"grad_norm": 11.9985933303833, |
|
"learning_rate": 0.00016760733635543578, |
|
"loss": 1.8163, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.48762233736327, |
|
"grad_norm": 3.754255533218384, |
|
"learning_rate": 0.00016743153743148024, |
|
"loss": 1.9249, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.4881980426021877, |
|
"grad_norm": 2.8391778469085693, |
|
"learning_rate": 0.00016725536807376873, |
|
"loss": 1.6164, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.48877374784110533, |
|
"grad_norm": 2.3037109375, |
|
"learning_rate": 0.0001670788294185947, |
|
"loss": 1.7085, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.48934945308002303, |
|
"grad_norm": 30.62044334411621, |
|
"learning_rate": 0.00016690192260463346, |
|
"loss": 1.8193, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4899251583189407, |
|
"grad_norm": 9.023576736450195, |
|
"learning_rate": 0.00016672464877293504, |
|
"loss": 1.7972, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.4905008635578584, |
|
"grad_norm": 4.591246128082275, |
|
"learning_rate": 0.00016654700906691664, |
|
"loss": 1.5173, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.491076568796776, |
|
"grad_norm": 4.623873710632324, |
|
"learning_rate": 0.00016636900463235549, |
|
"loss": 1.6689, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.4916522740356937, |
|
"grad_norm": 2.144906759262085, |
|
"learning_rate": 0.00016619063661738124, |
|
"loss": 2.0085, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.49222797927461137, |
|
"grad_norm": 2.73002290725708, |
|
"learning_rate": 0.00016601190617246858, |
|
"loss": 1.5989, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.49280368451352907, |
|
"grad_norm": 9.77154541015625, |
|
"learning_rate": 0.00016583281445042998, |
|
"loss": 1.9491, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.4933793897524468, |
|
"grad_norm": 5.815699577331543, |
|
"learning_rate": 0.00016565336260640812, |
|
"loss": 2.0127, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.4939550949913644, |
|
"grad_norm": 3.791182041168213, |
|
"learning_rate": 0.00016547355179786838, |
|
"loss": 1.865, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.4945308002302821, |
|
"grad_norm": 3.301182508468628, |
|
"learning_rate": 0.00016529338318459165, |
|
"loss": 1.7362, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.49510650546919976, |
|
"grad_norm": 3.233245849609375, |
|
"learning_rate": 0.00016511285792866648, |
|
"loss": 1.5621, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.49568221070811747, |
|
"grad_norm": 4.960054397583008, |
|
"learning_rate": 0.00016493197719448182, |
|
"loss": 1.739, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.4962579159470351, |
|
"grad_norm": 1.8711457252502441, |
|
"learning_rate": 0.00016475074214871953, |
|
"loss": 1.8269, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.4968336211859528, |
|
"grad_norm": 2.714691638946533, |
|
"learning_rate": 0.00016456915396034666, |
|
"loss": 2.1049, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.49740932642487046, |
|
"grad_norm": 4.092164993286133, |
|
"learning_rate": 0.0001643872138006082, |
|
"loss": 1.8893, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.49798503166378816, |
|
"grad_norm": 3.0131890773773193, |
|
"learning_rate": 0.00016420492284301917, |
|
"loss": 1.9821, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4985607369027058, |
|
"grad_norm": 10.343781471252441, |
|
"learning_rate": 0.00016402228226335735, |
|
"loss": 2.1446, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.4991364421416235, |
|
"grad_norm": 4.245277404785156, |
|
"learning_rate": 0.00016383929323965555, |
|
"loss": 2.072, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.49971214738054115, |
|
"grad_norm": 11.761308670043945, |
|
"learning_rate": 0.0001636559569521941, |
|
"loss": 1.3602, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.5002878526194589, |
|
"grad_norm": 9.922039985656738, |
|
"learning_rate": 0.00016347227458349302, |
|
"loss": 1.7432, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.5002878526194589, |
|
"eval_loss": 1.1380085945129395, |
|
"eval_runtime": 1021.271, |
|
"eval_samples_per_second": 2.51, |
|
"eval_steps_per_second": 2.51, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.5008635578583766, |
|
"grad_norm": 16.10755157470703, |
|
"learning_rate": 0.00016328824731830482, |
|
"loss": 1.9049, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5014392630972941, |
|
"grad_norm": 4.447638511657715, |
|
"learning_rate": 0.00016310387634360638, |
|
"loss": 1.7126, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.5020149683362118, |
|
"grad_norm": 17.62114715576172, |
|
"learning_rate": 0.00016291916284859155, |
|
"loss": 1.5879, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.5025906735751295, |
|
"grad_norm": 3.3077991008758545, |
|
"learning_rate": 0.00016273410802466353, |
|
"loss": 1.5913, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.5031663788140472, |
|
"grad_norm": 3.9533162117004395, |
|
"learning_rate": 0.00016254871306542695, |
|
"loss": 1.5365, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.5037420840529648, |
|
"grad_norm": 5.701480865478516, |
|
"learning_rate": 0.00016236297916668045, |
|
"loss": 1.5129, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5043177892918825, |
|
"grad_norm": 12.35987377166748, |
|
"learning_rate": 0.0001621769075264088, |
|
"loss": 1.9525, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.5048934945308002, |
|
"grad_norm": 2.3410115242004395, |
|
"learning_rate": 0.0001619904993447751, |
|
"loss": 1.9946, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.5054691997697179, |
|
"grad_norm": 3.4730076789855957, |
|
"learning_rate": 0.00016180375582411328, |
|
"loss": 1.2603, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.5060449050086355, |
|
"grad_norm": 10.503929138183594, |
|
"learning_rate": 0.00016161667816892012, |
|
"loss": 1.7077, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.5066206102475532, |
|
"grad_norm": 7.053747177124023, |
|
"learning_rate": 0.00016142926758584767, |
|
"loss": 1.5856, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5071963154864709, |
|
"grad_norm": 4.179106712341309, |
|
"learning_rate": 0.00016124152528369519, |
|
"loss": 1.6933, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.5077720207253886, |
|
"grad_norm": 2.949862241744995, |
|
"learning_rate": 0.00016105345247340171, |
|
"loss": 2.1307, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.5083477259643063, |
|
"grad_norm": 2.3588945865631104, |
|
"learning_rate": 0.000160865050368038, |
|
"loss": 2.2399, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.5089234312032239, |
|
"grad_norm": 3.3859925270080566, |
|
"learning_rate": 0.00016067632018279865, |
|
"loss": 1.9435, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.5094991364421416, |
|
"grad_norm": 3.3930764198303223, |
|
"learning_rate": 0.00016048726313499457, |
|
"loss": 1.9658, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.5100748416810593, |
|
"grad_norm": 3.163694381713867, |
|
"learning_rate": 0.00016029788044404477, |
|
"loss": 2.0123, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.510650546919977, |
|
"grad_norm": 2.4561283588409424, |
|
"learning_rate": 0.00016010817333146876, |
|
"loss": 1.8384, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.5112262521588946, |
|
"grad_norm": 3.2786359786987305, |
|
"learning_rate": 0.00015991814302087853, |
|
"loss": 1.54, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.5118019573978123, |
|
"grad_norm": 9.279755592346191, |
|
"learning_rate": 0.0001597277907379707, |
|
"loss": 1.3626, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.51237766263673, |
|
"grad_norm": 1.9229642152786255, |
|
"learning_rate": 0.0001595371177105186, |
|
"loss": 2.2197, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5129533678756477, |
|
"grad_norm": 2.7186543941497803, |
|
"learning_rate": 0.00015934612516836446, |
|
"loss": 1.9476, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.5135290731145653, |
|
"grad_norm": 2.1631014347076416, |
|
"learning_rate": 0.00015915481434341123, |
|
"loss": 1.7848, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.514104778353483, |
|
"grad_norm": 11.677098274230957, |
|
"learning_rate": 0.0001589631864696149, |
|
"loss": 1.8612, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.5146804835924007, |
|
"grad_norm": 3.0613834857940674, |
|
"learning_rate": 0.00015877124278297636, |
|
"loss": 1.6289, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.5152561888313184, |
|
"grad_norm": 2.7426950931549072, |
|
"learning_rate": 0.00015857898452153354, |
|
"loss": 2.2257, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.515831894070236, |
|
"grad_norm": 3.3551688194274902, |
|
"learning_rate": 0.00015838641292535339, |
|
"loss": 1.9517, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.5164075993091537, |
|
"grad_norm": 9.800053596496582, |
|
"learning_rate": 0.0001581935292365238, |
|
"loss": 1.6841, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.5169833045480714, |
|
"grad_norm": 2.620741844177246, |
|
"learning_rate": 0.00015800033469914572, |
|
"loss": 1.9933, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.5175590097869891, |
|
"grad_norm": 8.128425598144531, |
|
"learning_rate": 0.00015780683055932504, |
|
"loss": 1.9019, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.5181347150259067, |
|
"grad_norm": 2.946974754333496, |
|
"learning_rate": 0.00015761301806516468, |
|
"loss": 1.7235, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5187104202648244, |
|
"grad_norm": 10.84506607055664, |
|
"learning_rate": 0.00015741889846675625, |
|
"loss": 1.6609, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.5192861255037421, |
|
"grad_norm": 4.477931499481201, |
|
"learning_rate": 0.00015722447301617237, |
|
"loss": 2.0412, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.5198618307426598, |
|
"grad_norm": 13.774998664855957, |
|
"learning_rate": 0.00015702974296745843, |
|
"loss": 1.5644, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.5204375359815774, |
|
"grad_norm": 4.497225284576416, |
|
"learning_rate": 0.00015683470957662425, |
|
"loss": 1.7644, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.5210132412204951, |
|
"grad_norm": 3.543210983276367, |
|
"learning_rate": 0.00015663937410163644, |
|
"loss": 1.6813, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.5215889464594128, |
|
"grad_norm": 4.446226119995117, |
|
"learning_rate": 0.00015644373780240994, |
|
"loss": 1.5355, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.5221646516983305, |
|
"grad_norm": 2.871023416519165, |
|
"learning_rate": 0.00015624780194080004, |
|
"loss": 1.8574, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.5227403569372481, |
|
"grad_norm": 3.543043851852417, |
|
"learning_rate": 0.00015605156778059426, |
|
"loss": 1.6743, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.5233160621761658, |
|
"grad_norm": 3.302360773086548, |
|
"learning_rate": 0.00015585503658750399, |
|
"loss": 2.004, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.5238917674150835, |
|
"grad_norm": 3.226323127746582, |
|
"learning_rate": 0.00015565820962915668, |
|
"loss": 1.8727, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5244674726540012, |
|
"grad_norm": 2.56103777885437, |
|
"learning_rate": 0.0001554610881750873, |
|
"loss": 1.5572, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.5250431778929189, |
|
"grad_norm": 2.682222366333008, |
|
"learning_rate": 0.00015526367349673044, |
|
"loss": 1.8991, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.5256188831318365, |
|
"grad_norm": 2.8308794498443604, |
|
"learning_rate": 0.00015506596686741192, |
|
"loss": 1.895, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.5261945883707542, |
|
"grad_norm": 2.992013931274414, |
|
"learning_rate": 0.0001548679695623407, |
|
"loss": 1.7397, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.5267702936096719, |
|
"grad_norm": 3.145866870880127, |
|
"learning_rate": 0.00015466968285860055, |
|
"loss": 1.7754, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.5273459988485896, |
|
"grad_norm": 3.8366172313690186, |
|
"learning_rate": 0.00015447110803514186, |
|
"loss": 1.4537, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.5279217040875072, |
|
"grad_norm": 3.056429624557495, |
|
"learning_rate": 0.00015427224637277348, |
|
"loss": 1.9531, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.5284974093264249, |
|
"grad_norm": 23.298696517944336, |
|
"learning_rate": 0.00015407309915415425, |
|
"loss": 1.6287, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.5290731145653426, |
|
"grad_norm": 2.360717535018921, |
|
"learning_rate": 0.0001538736676637849, |
|
"loss": 2.492, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.5296488198042603, |
|
"grad_norm": 3.8945038318634033, |
|
"learning_rate": 0.00015367395318799973, |
|
"loss": 1.8311, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5302245250431779, |
|
"grad_norm": 2.1640625, |
|
"learning_rate": 0.00015347395701495833, |
|
"loss": 1.9314, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.5308002302820956, |
|
"grad_norm": 2.0766513347625732, |
|
"learning_rate": 0.00015327368043463718, |
|
"loss": 2.1496, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.5313759355210133, |
|
"grad_norm": 2.49560809135437, |
|
"learning_rate": 0.00015307312473882137, |
|
"loss": 1.6021, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.531951640759931, |
|
"grad_norm": 3.279759645462036, |
|
"learning_rate": 0.00015287229122109633, |
|
"loss": 1.7603, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.5325273459988485, |
|
"grad_norm": 2.0442922115325928, |
|
"learning_rate": 0.0001526711811768395, |
|
"loss": 2.0437, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5331030512377662, |
|
"grad_norm": 2.3960659503936768, |
|
"learning_rate": 0.0001524697959032118, |
|
"loss": 1.4901, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.533678756476684, |
|
"grad_norm": 2.597766160964966, |
|
"learning_rate": 0.00015226813669914948, |
|
"loss": 1.6113, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.5342544617156016, |
|
"grad_norm": 12.007303237915039, |
|
"learning_rate": 0.00015206620486535552, |
|
"loss": 1.6338, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.5348301669545192, |
|
"grad_norm": 2.297700881958008, |
|
"learning_rate": 0.0001518640017042915, |
|
"loss": 1.5866, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.5354058721934369, |
|
"grad_norm": 4.744719505310059, |
|
"learning_rate": 0.00015166152852016902, |
|
"loss": 2.1694, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5359815774323546, |
|
"grad_norm": 2.273824453353882, |
|
"learning_rate": 0.00015145878661894125, |
|
"loss": 1.9095, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.5365572826712723, |
|
"grad_norm": 3.70763897895813, |
|
"learning_rate": 0.00015125577730829473, |
|
"loss": 1.7984, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.5371329879101899, |
|
"grad_norm": 4.088851451873779, |
|
"learning_rate": 0.00015105250189764063, |
|
"loss": 1.8663, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.5377086931491076, |
|
"grad_norm": 1.9229241609573364, |
|
"learning_rate": 0.0001508489616981066, |
|
"loss": 2.0268, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.5382843983880253, |
|
"grad_norm": 6.149089336395264, |
|
"learning_rate": 0.00015064515802252817, |
|
"loss": 1.5163, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.538860103626943, |
|
"grad_norm": 3.5841498374938965, |
|
"learning_rate": 0.00015044109218544015, |
|
"loss": 1.6879, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.5394358088658607, |
|
"grad_norm": 5.613948345184326, |
|
"learning_rate": 0.00015023676550306848, |
|
"loss": 1.4269, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.5400115141047783, |
|
"grad_norm": 5.511398792266846, |
|
"learning_rate": 0.00015003217929332143, |
|
"loss": 1.5413, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.540587219343696, |
|
"grad_norm": 5.14990758895874, |
|
"learning_rate": 0.00014982733487578127, |
|
"loss": 1.6226, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.5411629245826137, |
|
"grad_norm": 2.902458667755127, |
|
"learning_rate": 0.0001496222335716957, |
|
"loss": 1.6545, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5417386298215314, |
|
"grad_norm": 4.643445014953613, |
|
"learning_rate": 0.00014941687670396938, |
|
"loss": 2.0121, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.542314335060449, |
|
"grad_norm": 3.1277265548706055, |
|
"learning_rate": 0.00014921126559715528, |
|
"loss": 1.6763, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.5428900402993667, |
|
"grad_norm": 1.9732697010040283, |
|
"learning_rate": 0.00014900540157744625, |
|
"loss": 1.8311, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.5434657455382844, |
|
"grad_norm": 7.364120006561279, |
|
"learning_rate": 0.00014879928597266644, |
|
"loss": 1.3101, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.5440414507772021, |
|
"grad_norm": 4.716955661773682, |
|
"learning_rate": 0.0001485929201122628, |
|
"loss": 1.8418, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.5446171560161197, |
|
"grad_norm": 2.276949167251587, |
|
"learning_rate": 0.0001483863053272962, |
|
"loss": 1.8196, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.5451928612550374, |
|
"grad_norm": 2.367461681365967, |
|
"learning_rate": 0.00014817944295043332, |
|
"loss": 1.952, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.5457685664939551, |
|
"grad_norm": 8.040431022644043, |
|
"learning_rate": 0.0001479723343159377, |
|
"loss": 1.9765, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.5463442717328728, |
|
"grad_norm": 1.7760546207427979, |
|
"learning_rate": 0.0001477649807596613, |
|
"loss": 1.8163, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.5469199769717904, |
|
"grad_norm": 6.472499370574951, |
|
"learning_rate": 0.00014755738361903566, |
|
"loss": 1.9802, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5474956822107081, |
|
"grad_norm": 16.233253479003906, |
|
"learning_rate": 0.00014734954423306371, |
|
"loss": 1.8819, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.5480713874496258, |
|
"grad_norm": 2.759955644607544, |
|
"learning_rate": 0.00014714146394231061, |
|
"loss": 1.849, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.5486470926885435, |
|
"grad_norm": 2.3619799613952637, |
|
"learning_rate": 0.00014693314408889554, |
|
"loss": 1.9287, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.5492227979274611, |
|
"grad_norm": 7.854180335998535, |
|
"learning_rate": 0.00014672458601648272, |
|
"loss": 1.6121, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.5497985031663788, |
|
"grad_norm": 3.9259274005889893, |
|
"learning_rate": 0.000146515791070273, |
|
"loss": 1.4874, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.5503742084052965, |
|
"grad_norm": 3.472259521484375, |
|
"learning_rate": 0.000146306760596995, |
|
"loss": 1.7002, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.5509499136442142, |
|
"grad_norm": 9.12783145904541, |
|
"learning_rate": 0.0001460974959448965, |
|
"loss": 1.668, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.5515256188831318, |
|
"grad_norm": 3.1406407356262207, |
|
"learning_rate": 0.00014588799846373574, |
|
"loss": 1.6456, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.5521013241220495, |
|
"grad_norm": 2.9766335487365723, |
|
"learning_rate": 0.00014567826950477277, |
|
"loss": 1.5559, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.5526770293609672, |
|
"grad_norm": 3.6111319065093994, |
|
"learning_rate": 0.00014546831042076052, |
|
"loss": 1.7752, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5532527345998849, |
|
"grad_norm": 4.001034259796143, |
|
"learning_rate": 0.00014525812256593637, |
|
"loss": 1.9761, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.5538284398388025, |
|
"grad_norm": 3.3448774814605713, |
|
"learning_rate": 0.00014504770729601327, |
|
"loss": 1.7559, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.5544041450777202, |
|
"grad_norm": 4.9256157875061035, |
|
"learning_rate": 0.0001448370659681709, |
|
"loss": 1.3841, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.5549798503166379, |
|
"grad_norm": 2.9494550228118896, |
|
"learning_rate": 0.00014462619994104706, |
|
"loss": 2.1265, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 4.05312967300415, |
|
"learning_rate": 0.00014441511057472893, |
|
"loss": 1.3376, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5561312607944733, |
|
"grad_norm": 4.632201194763184, |
|
"learning_rate": 0.0001442037992307441, |
|
"loss": 1.7425, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.5567069660333909, |
|
"grad_norm": 2.694762706756592, |
|
"learning_rate": 0.00014399226727205205, |
|
"loss": 2.0141, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.5572826712723086, |
|
"grad_norm": 2.672029495239258, |
|
"learning_rate": 0.00014378051606303512, |
|
"loss": 1.7991, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.5578583765112263, |
|
"grad_norm": 4.377752304077148, |
|
"learning_rate": 0.00014356854696948986, |
|
"loss": 1.5728, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.558434081750144, |
|
"grad_norm": 2.543248414993286, |
|
"learning_rate": 0.00014335636135861824, |
|
"loss": 2.063, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5590097869890616, |
|
"grad_norm": 2.466642141342163, |
|
"learning_rate": 0.00014314396059901863, |
|
"loss": 1.7082, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.5595854922279793, |
|
"grad_norm": 4.953126430511475, |
|
"learning_rate": 0.00014293134606067722, |
|
"loss": 1.5022, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.560161197466897, |
|
"grad_norm": 5.41407585144043, |
|
"learning_rate": 0.000142718519114959, |
|
"loss": 1.7407, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.5607369027058147, |
|
"grad_norm": 2.768073797225952, |
|
"learning_rate": 0.00014250548113459909, |
|
"loss": 1.8574, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.5613126079447323, |
|
"grad_norm": 2.6404266357421875, |
|
"learning_rate": 0.00014229223349369373, |
|
"loss": 1.5253, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.56188831318365, |
|
"grad_norm": 2.4457454681396484, |
|
"learning_rate": 0.00014207877756769138, |
|
"loss": 1.9335, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.5624640184225677, |
|
"grad_norm": 12.25162410736084, |
|
"learning_rate": 0.0001418651147333841, |
|
"loss": 1.5656, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.5630397236614854, |
|
"grad_norm": 2.4453790187835693, |
|
"learning_rate": 0.00014165124636889836, |
|
"loss": 1.6927, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.563615428900403, |
|
"grad_norm": 2.6654469966888428, |
|
"learning_rate": 0.0001414371738536865, |
|
"loss": 1.5248, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.5641911341393206, |
|
"grad_norm": 2.3234498500823975, |
|
"learning_rate": 0.00014122289856851735, |
|
"loss": 1.7526, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5647668393782384, |
|
"grad_norm": 2.1236610412597656, |
|
"learning_rate": 0.0001410084218954679, |
|
"loss": 1.81, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.565342544617156, |
|
"grad_norm": 2.213833808898926, |
|
"learning_rate": 0.00014079374521791389, |
|
"loss": 1.825, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.5659182498560736, |
|
"grad_norm": 5.118985176086426, |
|
"learning_rate": 0.00014057886992052115, |
|
"loss": 1.7381, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.5664939550949913, |
|
"grad_norm": 26.596086502075195, |
|
"learning_rate": 0.00014036379738923668, |
|
"loss": 1.4509, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.567069660333909, |
|
"grad_norm": 2.0581257343292236, |
|
"learning_rate": 0.00014014852901127954, |
|
"loss": 1.8644, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.5676453655728267, |
|
"grad_norm": 2.2645015716552734, |
|
"learning_rate": 0.00013993306617513204, |
|
"loss": 1.7144, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.5682210708117443, |
|
"grad_norm": 2.477935791015625, |
|
"learning_rate": 0.00013971741027053071, |
|
"loss": 1.6473, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.568796776050662, |
|
"grad_norm": 3.0278351306915283, |
|
"learning_rate": 0.00013950156268845748, |
|
"loss": 1.9054, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.5693724812895797, |
|
"grad_norm": 3.1719486713409424, |
|
"learning_rate": 0.00013928552482113054, |
|
"loss": 1.8626, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.5699481865284974, |
|
"grad_norm": 2.6236214637756348, |
|
"learning_rate": 0.0001390692980619953, |
|
"loss": 1.7584, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5705238917674151, |
|
"grad_norm": 2.0949866771698, |
|
"learning_rate": 0.00013885288380571575, |
|
"loss": 1.8523, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.5710995970063327, |
|
"grad_norm": 3.192641496658325, |
|
"learning_rate": 0.00013863628344816506, |
|
"loss": 1.8358, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.5716753022452504, |
|
"grad_norm": 2.1230075359344482, |
|
"learning_rate": 0.00013841949838641683, |
|
"loss": 1.6974, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.5722510074841681, |
|
"grad_norm": 2.634091854095459, |
|
"learning_rate": 0.00013820253001873602, |
|
"loss": 1.8269, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.5728267127230858, |
|
"grad_norm": 2.884781837463379, |
|
"learning_rate": 0.00013798537974456983, |
|
"loss": 1.3469, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.5734024179620034, |
|
"grad_norm": 2.4384100437164307, |
|
"learning_rate": 0.0001377680489645389, |
|
"loss": 1.5845, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.5739781232009211, |
|
"grad_norm": 2.9231410026550293, |
|
"learning_rate": 0.00013755053908042793, |
|
"loss": 1.7073, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.5745538284398388, |
|
"grad_norm": 3.0704684257507324, |
|
"learning_rate": 0.000137332851495177, |
|
"loss": 1.4623, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.5751295336787565, |
|
"grad_norm": 2.402618885040283, |
|
"learning_rate": 0.0001371149876128724, |
|
"loss": 2.1455, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.5757052389176741, |
|
"grad_norm": 2.723708152770996, |
|
"learning_rate": 0.00013689694883873733, |
|
"loss": 1.5748, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5762809441565918, |
|
"grad_norm": 1.9962085485458374, |
|
"learning_rate": 0.00013667873657912332, |
|
"loss": 1.9438, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.5768566493955095, |
|
"grad_norm": 5.691616058349609, |
|
"learning_rate": 0.0001364603522415006, |
|
"loss": 2.3176, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.5774323546344272, |
|
"grad_norm": 2.1746699810028076, |
|
"learning_rate": 0.00013624179723444952, |
|
"loss": 1.8675, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.5780080598733448, |
|
"grad_norm": 4.523983478546143, |
|
"learning_rate": 0.00013602307296765108, |
|
"loss": 1.5397, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.5785837651122625, |
|
"grad_norm": 29.249481201171875, |
|
"learning_rate": 0.0001358041808518782, |
|
"loss": 1.7436, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.5791594703511802, |
|
"grad_norm": 2.0091192722320557, |
|
"learning_rate": 0.00013558512229898628, |
|
"loss": 1.9335, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.5797351755900979, |
|
"grad_norm": 2.7708356380462646, |
|
"learning_rate": 0.00013536589872190425, |
|
"loss": 1.7912, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.5803108808290155, |
|
"grad_norm": 2.417196273803711, |
|
"learning_rate": 0.00013514651153462555, |
|
"loss": 1.8221, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.5808865860679332, |
|
"grad_norm": 2.3912861347198486, |
|
"learning_rate": 0.00013492696215219874, |
|
"loss": 1.526, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.5814622913068509, |
|
"grad_norm": 1.8041644096374512, |
|
"learning_rate": 0.00013470725199071868, |
|
"loss": 1.5973, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5820379965457686, |
|
"grad_norm": 3.6920905113220215, |
|
"learning_rate": 0.00013448738246731723, |
|
"loss": 1.8089, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.5826137017846862, |
|
"grad_norm": 1.7696315050125122, |
|
"learning_rate": 0.00013426735500015412, |
|
"loss": 1.8036, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.5831894070236039, |
|
"grad_norm": 2.644516706466675, |
|
"learning_rate": 0.00013404717100840775, |
|
"loss": 1.7614, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.5837651122625216, |
|
"grad_norm": 6.610838413238525, |
|
"learning_rate": 0.00013382683191226626, |
|
"loss": 1.8272, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.5843408175014393, |
|
"grad_norm": 25.5416202545166, |
|
"learning_rate": 0.00013360633913291805, |
|
"loss": 1.5991, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.584916522740357, |
|
"grad_norm": 6.063877105712891, |
|
"learning_rate": 0.00013338569409254285, |
|
"loss": 1.9859, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.5854922279792746, |
|
"grad_norm": 10.058717727661133, |
|
"learning_rate": 0.00013316489821430257, |
|
"loss": 1.5081, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.5860679332181923, |
|
"grad_norm": 3.7417914867401123, |
|
"learning_rate": 0.00013294395292233179, |
|
"loss": 1.4591, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.58664363845711, |
|
"grad_norm": 2.1001293659210205, |
|
"learning_rate": 0.00013272285964172905, |
|
"loss": 1.4471, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.5872193436960277, |
|
"grad_norm": 4.2659454345703125, |
|
"learning_rate": 0.00013250161979854727, |
|
"loss": 1.6232, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5877950489349453, |
|
"grad_norm": 3.3600850105285645, |
|
"learning_rate": 0.00013228023481978477, |
|
"loss": 2.0188, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.588370754173863, |
|
"grad_norm": 4.816986560821533, |
|
"learning_rate": 0.00013205870613337598, |
|
"loss": 1.883, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.5889464594127807, |
|
"grad_norm": 7.538907527923584, |
|
"learning_rate": 0.00013183703516818221, |
|
"loss": 1.6426, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.5895221646516984, |
|
"grad_norm": 3.869892120361328, |
|
"learning_rate": 0.00013161522335398252, |
|
"loss": 1.5805, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.590097869890616, |
|
"grad_norm": 2.9215962886810303, |
|
"learning_rate": 0.00013139327212146438, |
|
"loss": 2.0736, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5906735751295337, |
|
"grad_norm": 3.044528007507324, |
|
"learning_rate": 0.0001311711829022146, |
|
"loss": 1.773, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.5912492803684514, |
|
"grad_norm": 2.4565680027008057, |
|
"learning_rate": 0.00013094895712870993, |
|
"loss": 1.8526, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.5918249856073691, |
|
"grad_norm": 2.8523366451263428, |
|
"learning_rate": 0.00013072659623430797, |
|
"loss": 1.6659, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.5924006908462867, |
|
"grad_norm": 3.7679712772369385, |
|
"learning_rate": 0.0001305041016532377, |
|
"loss": 1.6689, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.5929763960852044, |
|
"grad_norm": 2.0870449542999268, |
|
"learning_rate": 0.0001302814748205906, |
|
"loss": 1.955, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5935521013241221, |
|
"grad_norm": 4.419449329376221, |
|
"learning_rate": 0.000130058717172311, |
|
"loss": 2.0321, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.5941278065630398, |
|
"grad_norm": 3.5084879398345947, |
|
"learning_rate": 0.00012983583014518704, |
|
"loss": 1.8397, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.5947035118019574, |
|
"grad_norm": 5.1162943840026855, |
|
"learning_rate": 0.00012961281517684137, |
|
"loss": 1.3311, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.595279217040875, |
|
"grad_norm": 3.4573514461517334, |
|
"learning_rate": 0.00012938967370572187, |
|
"loss": 1.7966, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.5958549222797928, |
|
"grad_norm": 3.372185468673706, |
|
"learning_rate": 0.00012916640717109234, |
|
"loss": 1.8347, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.5964306275187105, |
|
"grad_norm": 5.885191440582275, |
|
"learning_rate": 0.00012894301701302325, |
|
"loss": 1.5451, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.597006332757628, |
|
"grad_norm": 3.0024893283843994, |
|
"learning_rate": 0.00012871950467238243, |
|
"loss": 1.614, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.5975820379965457, |
|
"grad_norm": 3.6248486042022705, |
|
"learning_rate": 0.0001284958715908258, |
|
"loss": 1.9764, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.5981577432354634, |
|
"grad_norm": 2.173245668411255, |
|
"learning_rate": 0.00012827211921078807, |
|
"loss": 2.005, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.5987334484743811, |
|
"grad_norm": 3.430807590484619, |
|
"learning_rate": 0.00012804824897547342, |
|
"loss": 1.4051, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5993091537132987, |
|
"grad_norm": 2.929710865020752, |
|
"learning_rate": 0.00012782426232884616, |
|
"loss": 1.6919, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.5998848589522164, |
|
"grad_norm": 10.41037654876709, |
|
"learning_rate": 0.00012760016071562154, |
|
"loss": 1.6745, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.6004605641911341, |
|
"grad_norm": 2.4413540363311768, |
|
"learning_rate": 0.00012737594558125622, |
|
"loss": 1.7962, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.6010362694300518, |
|
"grad_norm": 1.9357279539108276, |
|
"learning_rate": 0.00012715161837193917, |
|
"loss": 1.7911, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.6016119746689695, |
|
"grad_norm": 1.77413010597229, |
|
"learning_rate": 0.00012692718053458228, |
|
"loss": 2.0216, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.6021876799078871, |
|
"grad_norm": 2.166997194290161, |
|
"learning_rate": 0.0001267026335168108, |
|
"loss": 1.512, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.6027633851468048, |
|
"grad_norm": 2.3894009590148926, |
|
"learning_rate": 0.00012647797876695442, |
|
"loss": 1.6451, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.6033390903857225, |
|
"grad_norm": 1.9920623302459717, |
|
"learning_rate": 0.00012625321773403759, |
|
"loss": 1.8585, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.6039147956246402, |
|
"grad_norm": 7.759680271148682, |
|
"learning_rate": 0.00012602835186777028, |
|
"loss": 1.4727, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.6044905008635578, |
|
"grad_norm": 2.342421531677246, |
|
"learning_rate": 0.00012580338261853867, |
|
"loss": 1.8267, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6050662061024755, |
|
"grad_norm": 3.670834541320801, |
|
"learning_rate": 0.00012557831143739573, |
|
"loss": 1.5525, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.6056419113413932, |
|
"grad_norm": 2.409895181655884, |
|
"learning_rate": 0.00012535313977605193, |
|
"loss": 1.7907, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.6062176165803109, |
|
"grad_norm": 46.487396240234375, |
|
"learning_rate": 0.0001251278690868658, |
|
"loss": 1.6402, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.6067933218192285, |
|
"grad_norm": 2.1283092498779297, |
|
"learning_rate": 0.00012490250082283462, |
|
"loss": 1.8462, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.6073690270581462, |
|
"grad_norm": 2.77925705909729, |
|
"learning_rate": 0.00012467703643758506, |
|
"loss": 1.7877, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.6079447322970639, |
|
"grad_norm": 2.246321439743042, |
|
"learning_rate": 0.00012445147738536367, |
|
"loss": 2.0602, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.6085204375359816, |
|
"grad_norm": 10.697153091430664, |
|
"learning_rate": 0.00012422582512102776, |
|
"loss": 1.5962, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.6090961427748992, |
|
"grad_norm": 2.082714557647705, |
|
"learning_rate": 0.00012400008110003568, |
|
"loss": 1.1343, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.6096718480138169, |
|
"grad_norm": 3.4832651615142822, |
|
"learning_rate": 0.00012377424677843777, |
|
"loss": 1.7796, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.6102475532527346, |
|
"grad_norm": 2.243643045425415, |
|
"learning_rate": 0.0001235483236128667, |
|
"loss": 1.6396, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6108232584916523, |
|
"grad_norm": 5.907801151275635, |
|
"learning_rate": 0.0001233223130605282, |
|
"loss": 1.7067, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.6113989637305699, |
|
"grad_norm": 1.8457263708114624, |
|
"learning_rate": 0.0001230962165791917, |
|
"loss": 1.8954, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.6119746689694876, |
|
"grad_norm": 2.2211413383483887, |
|
"learning_rate": 0.00012287003562718083, |
|
"loss": 1.6708, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.6125503742084053, |
|
"grad_norm": 2.75714373588562, |
|
"learning_rate": 0.00012264377166336412, |
|
"loss": 1.8238, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.613126079447323, |
|
"grad_norm": 2.9811720848083496, |
|
"learning_rate": 0.00012241742614714542, |
|
"loss": 1.7964, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.6137017846862406, |
|
"grad_norm": 1.753533959388733, |
|
"learning_rate": 0.00012219100053845465, |
|
"loss": 2.1655, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.6142774899251583, |
|
"grad_norm": 3.2706820964813232, |
|
"learning_rate": 0.00012196449629773837, |
|
"loss": 1.9122, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.614853195164076, |
|
"grad_norm": 3.0341358184814453, |
|
"learning_rate": 0.00012173791488595019, |
|
"loss": 1.3391, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.6154289004029937, |
|
"grad_norm": 6.340384006500244, |
|
"learning_rate": 0.00012151125776454161, |
|
"loss": 1.898, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.6160046056419114, |
|
"grad_norm": 2.7424800395965576, |
|
"learning_rate": 0.00012128452639545243, |
|
"loss": 1.7949, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.616580310880829, |
|
"grad_norm": 4.640672206878662, |
|
"learning_rate": 0.00012105772224110125, |
|
"loss": 1.7932, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.6171560161197467, |
|
"grad_norm": 15.990649223327637, |
|
"learning_rate": 0.00012083084676437626, |
|
"loss": 1.7652, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.6177317213586644, |
|
"grad_norm": 2.783991575241089, |
|
"learning_rate": 0.00012060390142862562, |
|
"loss": 1.6402, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.6183074265975821, |
|
"grad_norm": 2.2968053817749023, |
|
"learning_rate": 0.00012037688769764803, |
|
"loss": 1.9988, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.6188831318364997, |
|
"grad_norm": 2.0364556312561035, |
|
"learning_rate": 0.0001201498070356835, |
|
"loss": 2.2376, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.6194588370754174, |
|
"grad_norm": 2.1875343322753906, |
|
"learning_rate": 0.00011992266090740356, |
|
"loss": 2.0095, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.6200345423143351, |
|
"grad_norm": 2.150895118713379, |
|
"learning_rate": 0.00011969545077790212, |
|
"loss": 1.84, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.6206102475532528, |
|
"grad_norm": 1.9572124481201172, |
|
"learning_rate": 0.00011946817811268583, |
|
"loss": 1.8961, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.6211859527921704, |
|
"grad_norm": 2.214392900466919, |
|
"learning_rate": 0.00011924084437766474, |
|
"loss": 2.1148, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.6217616580310881, |
|
"grad_norm": 3.1197032928466797, |
|
"learning_rate": 0.00011901345103914278, |
|
"loss": 1.8522, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6223373632700058, |
|
"grad_norm": 2.3327748775482178, |
|
"learning_rate": 0.00011878599956380833, |
|
"loss": 1.3488, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.6229130685089235, |
|
"grad_norm": 4.232432842254639, |
|
"learning_rate": 0.00011855849141872478, |
|
"loss": 1.3803, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.6234887737478411, |
|
"grad_norm": 2.6493353843688965, |
|
"learning_rate": 0.00011833092807132094, |
|
"loss": 1.6068, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.6240644789867588, |
|
"grad_norm": 4.1762495040893555, |
|
"learning_rate": 0.00011810331098938183, |
|
"loss": 1.8435, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.6246401842256765, |
|
"grad_norm": 1.8596819639205933, |
|
"learning_rate": 0.0001178756416410389, |
|
"loss": 1.6533, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.6252158894645942, |
|
"grad_norm": 3.031169891357422, |
|
"learning_rate": 0.00011764792149476082, |
|
"loss": 2.0339, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.6257915947035118, |
|
"grad_norm": 2.637482166290283, |
|
"learning_rate": 0.00011742015201934391, |
|
"loss": 1.6899, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.6263672999424295, |
|
"grad_norm": 2.928927421569824, |
|
"learning_rate": 0.0001171923346839026, |
|
"loss": 1.5966, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.6269430051813472, |
|
"grad_norm": 1.9385987520217896, |
|
"learning_rate": 0.00011696447095786005, |
|
"loss": 1.7942, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.6275187104202649, |
|
"grad_norm": 2.5360302925109863, |
|
"learning_rate": 0.00011673656231093866, |
|
"loss": 1.8061, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6280944156591824, |
|
"grad_norm": 3.7727694511413574, |
|
"learning_rate": 0.00011650861021315053, |
|
"loss": 1.6725, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.6286701208981001, |
|
"grad_norm": 2.319878101348877, |
|
"learning_rate": 0.00011628061613478805, |
|
"loss": 1.8349, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.6292458261370178, |
|
"grad_norm": 2.03865385055542, |
|
"learning_rate": 0.00011605258154641436, |
|
"loss": 2.0155, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.6298215313759356, |
|
"grad_norm": 8.286460876464844, |
|
"learning_rate": 0.00011582450791885396, |
|
"loss": 1.4949, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.6303972366148531, |
|
"grad_norm": 3.0670244693756104, |
|
"learning_rate": 0.00011559639672318301, |
|
"loss": 1.5505, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.6309729418537708, |
|
"grad_norm": 3.125257730484009, |
|
"learning_rate": 0.00011536824943072013, |
|
"loss": 1.8383, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.6315486470926885, |
|
"grad_norm": 12.684760093688965, |
|
"learning_rate": 0.00011514006751301665, |
|
"loss": 1.6088, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.6321243523316062, |
|
"grad_norm": 2.321836471557617, |
|
"learning_rate": 0.00011491185244184737, |
|
"loss": 1.3411, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.6327000575705239, |
|
"grad_norm": 2.899108409881592, |
|
"learning_rate": 0.00011468360568920075, |
|
"loss": 1.6938, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.6332757628094415, |
|
"grad_norm": 2.300163745880127, |
|
"learning_rate": 0.00011445532872726978, |
|
"loss": 1.9663, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6338514680483592, |
|
"grad_norm": 2.8410146236419678, |
|
"learning_rate": 0.00011422702302844217, |
|
"loss": 1.411, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.6344271732872769, |
|
"grad_norm": 2.567687749862671, |
|
"learning_rate": 0.00011399869006529104, |
|
"loss": 1.4076, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.6350028785261946, |
|
"grad_norm": 2.7900848388671875, |
|
"learning_rate": 0.00011377033131056536, |
|
"loss": 1.927, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.6355785837651122, |
|
"grad_norm": 4.51284646987915, |
|
"learning_rate": 0.00011354194823718046, |
|
"loss": 1.9738, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.6361542890040299, |
|
"grad_norm": 2.4686641693115234, |
|
"learning_rate": 0.00011331354231820844, |
|
"loss": 1.8788, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.6367299942429476, |
|
"grad_norm": 4.021846294403076, |
|
"learning_rate": 0.00011308511502686887, |
|
"loss": 1.6456, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.6373056994818653, |
|
"grad_norm": 2.271044969558716, |
|
"learning_rate": 0.00011285666783651918, |
|
"loss": 1.8461, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.6378814047207829, |
|
"grad_norm": 2.7978787422180176, |
|
"learning_rate": 0.00011262820222064503, |
|
"loss": 1.6951, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.6384571099597006, |
|
"grad_norm": 2.1274783611297607, |
|
"learning_rate": 0.00011239971965285103, |
|
"loss": 1.579, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.6390328151986183, |
|
"grad_norm": 2.2610461711883545, |
|
"learning_rate": 0.00011217122160685107, |
|
"loss": 1.7816, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.639608520437536, |
|
"grad_norm": 3.3982491493225098, |
|
"learning_rate": 0.00011194270955645894, |
|
"loss": 1.645, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.6401842256764536, |
|
"grad_norm": 2.5890631675720215, |
|
"learning_rate": 0.00011171418497557866, |
|
"loss": 1.8953, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.6407599309153713, |
|
"grad_norm": 2.629909038543701, |
|
"learning_rate": 0.00011148564933819515, |
|
"loss": 1.3796, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.641335636154289, |
|
"grad_norm": 8.610105514526367, |
|
"learning_rate": 0.00011125710411836463, |
|
"loss": 1.6267, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.6419113413932067, |
|
"grad_norm": 1.8409522771835327, |
|
"learning_rate": 0.0001110285507902051, |
|
"loss": 1.5968, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.6424870466321243, |
|
"grad_norm": 9.315035820007324, |
|
"learning_rate": 0.00011079999082788695, |
|
"loss": 1.6575, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.643062751871042, |
|
"grad_norm": 1.8414576053619385, |
|
"learning_rate": 0.00011057142570562317, |
|
"loss": 1.9512, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.6436384571099597, |
|
"grad_norm": 3.665769577026367, |
|
"learning_rate": 0.00011034285689766025, |
|
"loss": 1.6937, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.6442141623488774, |
|
"grad_norm": 4.7261738777160645, |
|
"learning_rate": 0.00011011428587826829, |
|
"loss": 1.1675, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.644789867587795, |
|
"grad_norm": 1.9036201238632202, |
|
"learning_rate": 0.00010988571412173174, |
|
"loss": 1.7549, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6453655728267127, |
|
"grad_norm": 3.041926145553589, |
|
"learning_rate": 0.00010965714310233979, |
|
"loss": 1.6196, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.6459412780656304, |
|
"grad_norm": 2.1135053634643555, |
|
"learning_rate": 0.00010942857429437688, |
|
"loss": 2.0311, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.6465169833045481, |
|
"grad_norm": 2.664389133453369, |
|
"learning_rate": 0.0001092000091721131, |
|
"loss": 1.3273, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.6470926885434658, |
|
"grad_norm": 1.9533644914627075, |
|
"learning_rate": 0.00010897144920979492, |
|
"loss": 2.0707, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.6476683937823834, |
|
"grad_norm": 3.114617109298706, |
|
"learning_rate": 0.00010874289588163538, |
|
"loss": 1.9657, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6482440990213011, |
|
"grad_norm": 2.789735794067383, |
|
"learning_rate": 0.00010851435066180486, |
|
"loss": 1.7617, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.6488198042602188, |
|
"grad_norm": 3.554517984390259, |
|
"learning_rate": 0.00010828581502442139, |
|
"loss": 1.7084, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.6493955094991365, |
|
"grad_norm": 2.6153769493103027, |
|
"learning_rate": 0.00010805729044354111, |
|
"loss": 1.9482, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.6499712147380541, |
|
"grad_norm": 3.099222183227539, |
|
"learning_rate": 0.00010782877839314895, |
|
"loss": 2.0045, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.6505469199769718, |
|
"grad_norm": 3.3257076740264893, |
|
"learning_rate": 0.00010760028034714899, |
|
"loss": 1.7569, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6511226252158895, |
|
"grad_norm": 11.2058744430542, |
|
"learning_rate": 0.000107371797779355, |
|
"loss": 1.4291, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.6516983304548072, |
|
"grad_norm": 1.9433313608169556, |
|
"learning_rate": 0.00010714333216348087, |
|
"loss": 1.7501, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.6522740356937248, |
|
"grad_norm": 6.8387651443481445, |
|
"learning_rate": 0.00010691488497313115, |
|
"loss": 1.8655, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.6528497409326425, |
|
"grad_norm": 14.710614204406738, |
|
"learning_rate": 0.0001066864576817916, |
|
"loss": 1.6142, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.6534254461715602, |
|
"grad_norm": 2.8211591243743896, |
|
"learning_rate": 0.0001064580517628196, |
|
"loss": 1.9977, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.6540011514104779, |
|
"grad_norm": 10.522199630737305, |
|
"learning_rate": 0.00010622966868943466, |
|
"loss": 1.4866, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.6545768566493955, |
|
"grad_norm": 18.36501121520996, |
|
"learning_rate": 0.000106001309934709, |
|
"loss": 1.9982, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.6551525618883132, |
|
"grad_norm": 7.4732866287231445, |
|
"learning_rate": 0.00010577297697155786, |
|
"loss": 1.6959, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.6557282671272309, |
|
"grad_norm": 4.5649094581604, |
|
"learning_rate": 0.00010554467127273025, |
|
"loss": 1.4753, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.6563039723661486, |
|
"grad_norm": 2.2757229804992676, |
|
"learning_rate": 0.00010531639431079927, |
|
"loss": 1.8967, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6568796776050662, |
|
"grad_norm": 3.5460915565490723, |
|
"learning_rate": 0.00010508814755815266, |
|
"loss": 1.445, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.6574553828439839, |
|
"grad_norm": 5.268508434295654, |
|
"learning_rate": 0.00010485993248698337, |
|
"loss": 1.6088, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.6580310880829016, |
|
"grad_norm": 4.5665388107299805, |
|
"learning_rate": 0.00010463175056927991, |
|
"loss": 1.5503, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.6586067933218193, |
|
"grad_norm": 4.9753546714782715, |
|
"learning_rate": 0.00010440360327681702, |
|
"loss": 2.015, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.6591824985607369, |
|
"grad_norm": 4.048013210296631, |
|
"learning_rate": 0.00010417549208114608, |
|
"loss": 1.8249, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.6597582037996546, |
|
"grad_norm": 1.9025744199752808, |
|
"learning_rate": 0.00010394741845358564, |
|
"loss": 1.9516, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.6603339090385723, |
|
"grad_norm": 3.0757157802581787, |
|
"learning_rate": 0.00010371938386521196, |
|
"loss": 1.8365, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.66090961427749, |
|
"grad_norm": 4.190611362457275, |
|
"learning_rate": 0.00010349138978684949, |
|
"loss": 1.725, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.6614853195164075, |
|
"grad_norm": 3.264749765396118, |
|
"learning_rate": 0.00010326343768906138, |
|
"loss": 1.4593, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.6620610247553252, |
|
"grad_norm": 2.502470016479492, |
|
"learning_rate": 0.00010303552904213998, |
|
"loss": 1.6255, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.662636729994243, |
|
"grad_norm": 2.5127601623535156, |
|
"learning_rate": 0.0001028076653160974, |
|
"loss": 1.8302, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.6632124352331606, |
|
"grad_norm": 1.9711816310882568, |
|
"learning_rate": 0.00010257984798065615, |
|
"loss": 1.6349, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.6637881404720783, |
|
"grad_norm": 2.5800209045410156, |
|
"learning_rate": 0.00010235207850523923, |
|
"loss": 1.8541, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.6643638457109959, |
|
"grad_norm": 2.15277361869812, |
|
"learning_rate": 0.00010212435835896113, |
|
"loss": 2.0093, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.6649395509499136, |
|
"grad_norm": 2.074312925338745, |
|
"learning_rate": 0.0001018966890106182, |
|
"loss": 1.5909, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.6655152561888313, |
|
"grad_norm": 1.9219588041305542, |
|
"learning_rate": 0.00010166907192867907, |
|
"loss": 1.816, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.666090961427749, |
|
"grad_norm": 11.162732124328613, |
|
"learning_rate": 0.00010144150858127529, |
|
"loss": 1.359, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.046509265899658, |
|
"learning_rate": 0.00010121400043619169, |
|
"loss": 1.8493, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.6672423719055843, |
|
"grad_norm": 2.8525936603546143, |
|
"learning_rate": 0.00010098654896085724, |
|
"loss": 1.7511, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.667818077144502, |
|
"grad_norm": 2.0714986324310303, |
|
"learning_rate": 0.00010075915562233529, |
|
"loss": 2.1803, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6683937823834197, |
|
"grad_norm": 2.4236810207366943, |
|
"learning_rate": 0.00010053182188731418, |
|
"loss": 1.9524, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.6689694876223373, |
|
"grad_norm": 2.0779378414154053, |
|
"learning_rate": 0.00010030454922209792, |
|
"loss": 1.4239, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.669545192861255, |
|
"grad_norm": 2.2357749938964844, |
|
"learning_rate": 0.00010007733909259646, |
|
"loss": 1.946, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.6701208981001727, |
|
"grad_norm": 3.204003095626831, |
|
"learning_rate": 9.985019296431652e-05, |
|
"loss": 1.5353, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.6706966033390904, |
|
"grad_norm": 6.192695140838623, |
|
"learning_rate": 9.962311230235195e-05, |
|
"loss": 1.7483, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.671272308578008, |
|
"grad_norm": 1.6048142910003662, |
|
"learning_rate": 9.939609857137439e-05, |
|
"loss": 1.919, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.6718480138169257, |
|
"grad_norm": 1.7395859956741333, |
|
"learning_rate": 9.916915323562377e-05, |
|
"loss": 1.8967, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.6724237190558434, |
|
"grad_norm": 9.254240989685059, |
|
"learning_rate": 9.894227775889877e-05, |
|
"loss": 1.468, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.6729994242947611, |
|
"grad_norm": 2.008310079574585, |
|
"learning_rate": 9.871547360454761e-05, |
|
"loss": 2.1097, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.6735751295336787, |
|
"grad_norm": 5.330569267272949, |
|
"learning_rate": 9.848874223545838e-05, |
|
"loss": 1.5188, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6741508347725964, |
|
"grad_norm": 2.926645040512085, |
|
"learning_rate": 9.826208511404979e-05, |
|
"loss": 1.8197, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.6747265400115141, |
|
"grad_norm": 2.621838331222534, |
|
"learning_rate": 9.803550370226168e-05, |
|
"loss": 1.8151, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.6753022452504318, |
|
"grad_norm": 3.4823410511016846, |
|
"learning_rate": 9.780899946154535e-05, |
|
"loss": 1.5427, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.6758779504893494, |
|
"grad_norm": 4.249687671661377, |
|
"learning_rate": 9.758257385285459e-05, |
|
"loss": 1.6277, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.6764536557282671, |
|
"grad_norm": 3.4192488193511963, |
|
"learning_rate": 9.735622833663589e-05, |
|
"loss": 1.958, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6770293609671848, |
|
"grad_norm": 2.2742438316345215, |
|
"learning_rate": 9.712996437281919e-05, |
|
"loss": 1.9273, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.6776050662061025, |
|
"grad_norm": 2.9063401222229004, |
|
"learning_rate": 9.690378342080832e-05, |
|
"loss": 1.3239, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.6781807714450202, |
|
"grad_norm": 7.125305652618408, |
|
"learning_rate": 9.667768693947184e-05, |
|
"loss": 1.3987, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.6787564766839378, |
|
"grad_norm": 2.73252010345459, |
|
"learning_rate": 9.645167638713334e-05, |
|
"loss": 1.7407, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.6793321819228555, |
|
"grad_norm": 2.822937250137329, |
|
"learning_rate": 9.622575322156226e-05, |
|
"loss": 2.0836, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6799078871617732, |
|
"grad_norm": 1.957633376121521, |
|
"learning_rate": 9.599991889996435e-05, |
|
"loss": 1.8197, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.6804835924006909, |
|
"grad_norm": 3.0812935829162598, |
|
"learning_rate": 9.577417487897227e-05, |
|
"loss": 1.8606, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.6810592976396085, |
|
"grad_norm": 2.941861152648926, |
|
"learning_rate": 9.554852261463634e-05, |
|
"loss": 2.1113, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.6816350028785262, |
|
"grad_norm": 2.8906712532043457, |
|
"learning_rate": 9.532296356241496e-05, |
|
"loss": 1.6709, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.6822107081174439, |
|
"grad_norm": 9.170804977416992, |
|
"learning_rate": 9.509749917716537e-05, |
|
"loss": 1.6481, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.6827864133563616, |
|
"grad_norm": 6.398213863372803, |
|
"learning_rate": 9.487213091313422e-05, |
|
"loss": 1.6377, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.6833621185952792, |
|
"grad_norm": 2.3572301864624023, |
|
"learning_rate": 9.46468602239481e-05, |
|
"loss": 1.4058, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.6839378238341969, |
|
"grad_norm": 2.4743688106536865, |
|
"learning_rate": 9.44216885626043e-05, |
|
"loss": 1.6128, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.6845135290731146, |
|
"grad_norm": 2.0609121322631836, |
|
"learning_rate": 9.419661738146137e-05, |
|
"loss": 2.064, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.6850892343120323, |
|
"grad_norm": 2.409160614013672, |
|
"learning_rate": 9.397164813222974e-05, |
|
"loss": 1.4584, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6856649395509499, |
|
"grad_norm": 18.25377655029297, |
|
"learning_rate": 9.374678226596245e-05, |
|
"loss": 1.8134, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.6862406447898676, |
|
"grad_norm": 2.806365489959717, |
|
"learning_rate": 9.352202123304561e-05, |
|
"loss": 1.5046, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.6868163500287853, |
|
"grad_norm": 1.8805674314498901, |
|
"learning_rate": 9.329736648318921e-05, |
|
"loss": 1.8979, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.687392055267703, |
|
"grad_norm": 3.183284282684326, |
|
"learning_rate": 9.307281946541774e-05, |
|
"loss": 2.0024, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.6879677605066206, |
|
"grad_norm": 4.2624006271362305, |
|
"learning_rate": 9.284838162806082e-05, |
|
"loss": 1.5019, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.6885434657455383, |
|
"grad_norm": 3.5346248149871826, |
|
"learning_rate": 9.262405441874382e-05, |
|
"loss": 1.3593, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.689119170984456, |
|
"grad_norm": 1.8233470916748047, |
|
"learning_rate": 9.23998392843785e-05, |
|
"loss": 2.3358, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.6896948762233737, |
|
"grad_norm": 3.124511480331421, |
|
"learning_rate": 9.217573767115383e-05, |
|
"loss": 1.4681, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.6902705814622913, |
|
"grad_norm": 2.2693393230438232, |
|
"learning_rate": 9.195175102452657e-05, |
|
"loss": 1.6685, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.690846286701209, |
|
"grad_norm": 2.4168596267700195, |
|
"learning_rate": 9.172788078921191e-05, |
|
"loss": 2.1741, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6914219919401267, |
|
"grad_norm": 2.8741111755371094, |
|
"learning_rate": 9.150412840917421e-05, |
|
"loss": 1.8631, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.6919976971790444, |
|
"grad_norm": 2.5383141040802, |
|
"learning_rate": 9.128049532761759e-05, |
|
"loss": 1.5728, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.6925734024179621, |
|
"grad_norm": 2.7610490322113037, |
|
"learning_rate": 9.105698298697676e-05, |
|
"loss": 1.7652, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.6931491076568796, |
|
"grad_norm": 7.25991678237915, |
|
"learning_rate": 9.083359282890767e-05, |
|
"loss": 1.4954, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.6937248128957973, |
|
"grad_norm": 3.7028870582580566, |
|
"learning_rate": 9.061032629427812e-05, |
|
"loss": 1.5809, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.694300518134715, |
|
"grad_norm": 2.123594284057617, |
|
"learning_rate": 9.038718482315866e-05, |
|
"loss": 1.9145, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.6948762233736328, |
|
"grad_norm": 5.9489827156066895, |
|
"learning_rate": 9.0164169854813e-05, |
|
"loss": 1.6488, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.6954519286125503, |
|
"grad_norm": 2.7276246547698975, |
|
"learning_rate": 8.994128282768903e-05, |
|
"loss": 1.3695, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.696027633851468, |
|
"grad_norm": 4.6926116943359375, |
|
"learning_rate": 8.971852517940941e-05, |
|
"loss": 1.751, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.6966033390903857, |
|
"grad_norm": 8.851062774658203, |
|
"learning_rate": 8.949589834676229e-05, |
|
"loss": 1.8289, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6971790443293034, |
|
"grad_norm": 2.5200765132904053, |
|
"learning_rate": 8.927340376569209e-05, |
|
"loss": 1.6664, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.697754749568221, |
|
"grad_norm": 2.227609157562256, |
|
"learning_rate": 8.90510428712901e-05, |
|
"loss": 1.7865, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.6983304548071387, |
|
"grad_norm": 2.7458364963531494, |
|
"learning_rate": 8.882881709778541e-05, |
|
"loss": 1.8385, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.6989061600460564, |
|
"grad_norm": 3.33707594871521, |
|
"learning_rate": 8.860672787853563e-05, |
|
"loss": 1.2672, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.6994818652849741, |
|
"grad_norm": 1.9355860948562622, |
|
"learning_rate": 8.838477664601752e-05, |
|
"loss": 1.7449, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.7000575705238917, |
|
"grad_norm": 2.399412155151367, |
|
"learning_rate": 8.816296483181783e-05, |
|
"loss": 1.8579, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.7006332757628094, |
|
"grad_norm": 2.3706471920013428, |
|
"learning_rate": 8.794129386662405e-05, |
|
"loss": 1.9366, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.7012089810017271, |
|
"grad_norm": 7.656529903411865, |
|
"learning_rate": 8.771976518021523e-05, |
|
"loss": 1.7295, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.7017846862406448, |
|
"grad_norm": 1.9502934217453003, |
|
"learning_rate": 8.749838020145275e-05, |
|
"loss": 1.8921, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.7023603914795624, |
|
"grad_norm": 2.2740495204925537, |
|
"learning_rate": 8.727714035827097e-05, |
|
"loss": 1.5383, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7029360967184801, |
|
"grad_norm": 1.800449252128601, |
|
"learning_rate": 8.705604707766824e-05, |
|
"loss": 1.7226, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.7035118019573978, |
|
"grad_norm": 3.378472089767456, |
|
"learning_rate": 8.683510178569747e-05, |
|
"loss": 1.774, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.7040875071963155, |
|
"grad_norm": 9.736815452575684, |
|
"learning_rate": 8.661430590745716e-05, |
|
"loss": 1.5152, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.7046632124352331, |
|
"grad_norm": 2.5010931491851807, |
|
"learning_rate": 8.639366086708198e-05, |
|
"loss": 2.0701, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.7052389176741508, |
|
"grad_norm": 4.483166694641113, |
|
"learning_rate": 8.617316808773377e-05, |
|
"loss": 1.7541, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7058146229130685, |
|
"grad_norm": 2.1824142932891846, |
|
"learning_rate": 8.595282899159224e-05, |
|
"loss": 1.9888, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.7063903281519862, |
|
"grad_norm": 2.690760612487793, |
|
"learning_rate": 8.573264499984592e-05, |
|
"loss": 1.4562, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.7069660333909038, |
|
"grad_norm": 2.433600425720215, |
|
"learning_rate": 8.551261753268278e-05, |
|
"loss": 1.7995, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.7075417386298215, |
|
"grad_norm": 2.5094082355499268, |
|
"learning_rate": 8.529274800928133e-05, |
|
"loss": 1.9741, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.7081174438687392, |
|
"grad_norm": 3.7042555809020996, |
|
"learning_rate": 8.507303784780131e-05, |
|
"loss": 1.9219, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7086931491076569, |
|
"grad_norm": 9.36429214477539, |
|
"learning_rate": 8.485348846537454e-05, |
|
"loss": 1.4939, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.7092688543465746, |
|
"grad_norm": 15.673849105834961, |
|
"learning_rate": 8.463410127809576e-05, |
|
"loss": 1.6711, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.7098445595854922, |
|
"grad_norm": 4.705075740814209, |
|
"learning_rate": 8.441487770101375e-05, |
|
"loss": 1.7079, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.7104202648244099, |
|
"grad_norm": 3.9517462253570557, |
|
"learning_rate": 8.41958191481218e-05, |
|
"loss": 1.7706, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.7109959700633276, |
|
"grad_norm": 2.705690622329712, |
|
"learning_rate": 8.397692703234894e-05, |
|
"loss": 1.7043, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.7115716753022453, |
|
"grad_norm": 3.2132418155670166, |
|
"learning_rate": 8.375820276555056e-05, |
|
"loss": 1.6642, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.7121473805411629, |
|
"grad_norm": 5.179574489593506, |
|
"learning_rate": 8.353964775849943e-05, |
|
"loss": 1.2606, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.7127230857800806, |
|
"grad_norm": 2.834803581237793, |
|
"learning_rate": 8.332126342087672e-05, |
|
"loss": 1.9747, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.7132987910189983, |
|
"grad_norm": 4.0652995109558105, |
|
"learning_rate": 8.310305116126265e-05, |
|
"loss": 1.6335, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.713874496257916, |
|
"grad_norm": 3.4388492107391357, |
|
"learning_rate": 8.288501238712765e-05, |
|
"loss": 1.7383, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7144502014968336, |
|
"grad_norm": 2.006385326385498, |
|
"learning_rate": 8.2667148504823e-05, |
|
"loss": 1.6261, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.7150259067357513, |
|
"grad_norm": 3.83569598197937, |
|
"learning_rate": 8.24494609195721e-05, |
|
"loss": 1.4952, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.715601611974669, |
|
"grad_norm": 3.785851240158081, |
|
"learning_rate": 8.223195103546115e-05, |
|
"loss": 1.9108, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.7161773172135867, |
|
"grad_norm": 2.0811095237731934, |
|
"learning_rate": 8.201462025543017e-05, |
|
"loss": 1.5987, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.7167530224525043, |
|
"grad_norm": 3.619274139404297, |
|
"learning_rate": 8.179746998126403e-05, |
|
"loss": 1.9112, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.717328727691422, |
|
"grad_norm": 2.366683006286621, |
|
"learning_rate": 8.158050161358319e-05, |
|
"loss": 1.8774, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.7179044329303397, |
|
"grad_norm": 3.2668917179107666, |
|
"learning_rate": 8.136371655183497e-05, |
|
"loss": 1.5932, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.7184801381692574, |
|
"grad_norm": 5.13369607925415, |
|
"learning_rate": 8.114711619428428e-05, |
|
"loss": 1.5434, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.719055843408175, |
|
"grad_norm": 5.853895664215088, |
|
"learning_rate": 8.09307019380047e-05, |
|
"loss": 1.5837, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.7196315486470927, |
|
"grad_norm": 2.1570184230804443, |
|
"learning_rate": 8.071447517886952e-05, |
|
"loss": 2.0624, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7202072538860104, |
|
"grad_norm": 1.9529211521148682, |
|
"learning_rate": 8.049843731154254e-05, |
|
"loss": 1.7785, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.7207829591249281, |
|
"grad_norm": 5.114016532897949, |
|
"learning_rate": 8.02825897294693e-05, |
|
"loss": 1.7406, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.7213586643638457, |
|
"grad_norm": 2.745934009552002, |
|
"learning_rate": 8.006693382486801e-05, |
|
"loss": 1.737, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.7219343696027634, |
|
"grad_norm": 10.330817222595215, |
|
"learning_rate": 7.985147098872048e-05, |
|
"loss": 1.5649, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.7225100748416811, |
|
"grad_norm": 1.7928460836410522, |
|
"learning_rate": 7.963620261076334e-05, |
|
"loss": 1.8267, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.7230857800805988, |
|
"grad_norm": 1.9616749286651611, |
|
"learning_rate": 7.942113007947887e-05, |
|
"loss": 1.9707, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.7236614853195165, |
|
"grad_norm": 2.0727272033691406, |
|
"learning_rate": 7.920625478208615e-05, |
|
"loss": 1.8589, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.724237190558434, |
|
"grad_norm": 3.0034399032592773, |
|
"learning_rate": 7.899157810453213e-05, |
|
"loss": 1.9103, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.7248128957973518, |
|
"grad_norm": 2.041135787963867, |
|
"learning_rate": 7.877710143148267e-05, |
|
"loss": 1.8537, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.7253886010362695, |
|
"grad_norm": 8.20665454864502, |
|
"learning_rate": 7.856282614631356e-05, |
|
"loss": 1.2161, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7259643062751872, |
|
"grad_norm": 2.1425111293792725, |
|
"learning_rate": 7.834875363110166e-05, |
|
"loss": 1.9613, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.7265400115141047, |
|
"grad_norm": 2.895951509475708, |
|
"learning_rate": 7.813488526661595e-05, |
|
"loss": 1.6368, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.7271157167530224, |
|
"grad_norm": 1.6481860876083374, |
|
"learning_rate": 7.792122243230864e-05, |
|
"loss": 2.0891, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.7276914219919401, |
|
"grad_norm": 1.5633034706115723, |
|
"learning_rate": 7.770776650630631e-05, |
|
"loss": 1.4721, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.7282671272308578, |
|
"grad_norm": 2.921679735183716, |
|
"learning_rate": 7.749451886540091e-05, |
|
"loss": 1.2774, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.7288428324697754, |
|
"grad_norm": 2.167282819747925, |
|
"learning_rate": 7.7281480885041e-05, |
|
"loss": 1.7595, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.7294185377086931, |
|
"grad_norm": 2.5153279304504395, |
|
"learning_rate": 7.706865393932283e-05, |
|
"loss": 2.0746, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.7299942429476108, |
|
"grad_norm": 1.6189182996749878, |
|
"learning_rate": 7.68560394009814e-05, |
|
"loss": 1.7528, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.7305699481865285, |
|
"grad_norm": 3.0685760974884033, |
|
"learning_rate": 7.664363864138178e-05, |
|
"loss": 1.3272, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.7311456534254461, |
|
"grad_norm": 1.9861022233963013, |
|
"learning_rate": 7.643145303051016e-05, |
|
"loss": 1.8826, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7317213586643638, |
|
"grad_norm": 2.7280659675598145, |
|
"learning_rate": 7.621948393696492e-05, |
|
"loss": 1.9363, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.7322970639032815, |
|
"grad_norm": 4.763440132141113, |
|
"learning_rate": 7.600773272794798e-05, |
|
"loss": 1.8717, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.7328727691421992, |
|
"grad_norm": 3.8978776931762695, |
|
"learning_rate": 7.57962007692559e-05, |
|
"loss": 1.692, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.7334484743811168, |
|
"grad_norm": 4.123043060302734, |
|
"learning_rate": 7.558488942527109e-05, |
|
"loss": 1.8497, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.7340241796200345, |
|
"grad_norm": 2.32194185256958, |
|
"learning_rate": 7.537380005895296e-05, |
|
"loss": 1.7812, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.7345998848589522, |
|
"grad_norm": 2.7786030769348145, |
|
"learning_rate": 7.516293403182912e-05, |
|
"loss": 1.4425, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.7351755900978699, |
|
"grad_norm": 1.8613038063049316, |
|
"learning_rate": 7.495229270398674e-05, |
|
"loss": 1.9893, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.7357512953367875, |
|
"grad_norm": 2.449399709701538, |
|
"learning_rate": 7.474187743406362e-05, |
|
"loss": 1.5826, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.7363270005757052, |
|
"grad_norm": 2.0494472980499268, |
|
"learning_rate": 7.453168957923949e-05, |
|
"loss": 1.9416, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.7369027058146229, |
|
"grad_norm": 1.910123348236084, |
|
"learning_rate": 7.432173049522728e-05, |
|
"loss": 1.5807, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7374784110535406, |
|
"grad_norm": 2.3191299438476562, |
|
"learning_rate": 7.411200153626428e-05, |
|
"loss": 2.169, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.7380541162924582, |
|
"grad_norm": 2.6251776218414307, |
|
"learning_rate": 7.390250405510352e-05, |
|
"loss": 1.3672, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.7386298215313759, |
|
"grad_norm": 3.652298927307129, |
|
"learning_rate": 7.369323940300503e-05, |
|
"loss": 1.7192, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.7392055267702936, |
|
"grad_norm": 2.2988710403442383, |
|
"learning_rate": 7.348420892972705e-05, |
|
"loss": 2.2075, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.7397812320092113, |
|
"grad_norm": 2.720458984375, |
|
"learning_rate": 7.327541398351731e-05, |
|
"loss": 1.53, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.740356937248129, |
|
"grad_norm": 22.601173400878906, |
|
"learning_rate": 7.306685591110449e-05, |
|
"loss": 1.6638, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.7409326424870466, |
|
"grad_norm": 3.0639970302581787, |
|
"learning_rate": 7.28585360576894e-05, |
|
"loss": 2.0271, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.7415083477259643, |
|
"grad_norm": 3.046778678894043, |
|
"learning_rate": 7.265045576693632e-05, |
|
"loss": 1.6931, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.742084052964882, |
|
"grad_norm": 4.0570969581604, |
|
"learning_rate": 7.244261638096434e-05, |
|
"loss": 1.5182, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.7426597582037997, |
|
"grad_norm": 2.360447645187378, |
|
"learning_rate": 7.223501924033878e-05, |
|
"loss": 1.7788, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7432354634427173, |
|
"grad_norm": 2.19985294342041, |
|
"learning_rate": 7.202766568406234e-05, |
|
"loss": 1.826, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.743811168681635, |
|
"grad_norm": 6.608851432800293, |
|
"learning_rate": 7.182055704956671e-05, |
|
"loss": 1.2474, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.7443868739205527, |
|
"grad_norm": 2.359436511993408, |
|
"learning_rate": 7.161369467270385e-05, |
|
"loss": 1.6463, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.7449625791594704, |
|
"grad_norm": 2.1586437225341797, |
|
"learning_rate": 7.14070798877373e-05, |
|
"loss": 1.8218, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.745538284398388, |
|
"grad_norm": 3.0305073261260986, |
|
"learning_rate": 7.120071402733359e-05, |
|
"loss": 1.5873, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.7461139896373057, |
|
"grad_norm": 1.7526609897613525, |
|
"learning_rate": 7.09945984225538e-05, |
|
"loss": 1.9706, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.7466896948762234, |
|
"grad_norm": 3.412294387817383, |
|
"learning_rate": 7.078873440284477e-05, |
|
"loss": 1.8407, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.7472654001151411, |
|
"grad_norm": 2.3854801654815674, |
|
"learning_rate": 7.058312329603065e-05, |
|
"loss": 1.4573, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.7478411053540587, |
|
"grad_norm": 3.6485955715179443, |
|
"learning_rate": 7.037776642830433e-05, |
|
"loss": 1.6198, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.7484168105929764, |
|
"grad_norm": 1.9419339895248413, |
|
"learning_rate": 7.017266512421878e-05, |
|
"loss": 1.8447, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7489925158318941, |
|
"grad_norm": 2.8531014919281006, |
|
"learning_rate": 6.99678207066786e-05, |
|
"loss": 1.5635, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.7495682210708118, |
|
"grad_norm": 2.279210090637207, |
|
"learning_rate": 6.976323449693156e-05, |
|
"loss": 1.7548, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.7501439263097294, |
|
"grad_norm": 1.8601477146148682, |
|
"learning_rate": 6.955890781455987e-05, |
|
"loss": 2.1579, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.7507196315486471, |
|
"grad_norm": 2.6298389434814453, |
|
"learning_rate": 6.93548419774719e-05, |
|
"loss": 1.5116, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.7512953367875648, |
|
"grad_norm": 3.179581880569458, |
|
"learning_rate": 6.915103830189342e-05, |
|
"loss": 1.3385, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.7518710420264825, |
|
"grad_norm": 51.62275695800781, |
|
"learning_rate": 6.894749810235938e-05, |
|
"loss": 1.4703, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.7524467472654001, |
|
"grad_norm": 5.406750679016113, |
|
"learning_rate": 6.87442226917053e-05, |
|
"loss": 1.4089, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.7530224525043178, |
|
"grad_norm": 3.1115217208862305, |
|
"learning_rate": 6.854121338105875e-05, |
|
"loss": 1.8881, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.7535981577432355, |
|
"grad_norm": 11.897812843322754, |
|
"learning_rate": 6.833847147983103e-05, |
|
"loss": 1.3264, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.7541738629821532, |
|
"grad_norm": 2.3330416679382324, |
|
"learning_rate": 6.813599829570852e-05, |
|
"loss": 1.2674, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7547495682210709, |
|
"grad_norm": 2.1948859691619873, |
|
"learning_rate": 6.793379513464449e-05, |
|
"loss": 1.7049, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.7553252734599885, |
|
"grad_norm": 4.7169413566589355, |
|
"learning_rate": 6.773186330085054e-05, |
|
"loss": 1.6359, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.7559009786989062, |
|
"grad_norm": 2.132951498031616, |
|
"learning_rate": 6.753020409678819e-05, |
|
"loss": 1.3256, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.7564766839378239, |
|
"grad_norm": 2.582689046859741, |
|
"learning_rate": 6.732881882316052e-05, |
|
"loss": 2.0414, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.7570523891767416, |
|
"grad_norm": 3.74625301361084, |
|
"learning_rate": 6.712770877890369e-05, |
|
"loss": 1.6727, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.7576280944156591, |
|
"grad_norm": 11.082663536071777, |
|
"learning_rate": 6.692687526117865e-05, |
|
"loss": 1.7192, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.7582037996545768, |
|
"grad_norm": 7.31722354888916, |
|
"learning_rate": 6.672631956536286e-05, |
|
"loss": 1.9284, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.7587795048934945, |
|
"grad_norm": 2.2905490398406982, |
|
"learning_rate": 6.652604298504168e-05, |
|
"loss": 1.6203, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.7593552101324123, |
|
"grad_norm": 1.9625123739242554, |
|
"learning_rate": 6.63260468120003e-05, |
|
"loss": 1.7967, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.7599309153713298, |
|
"grad_norm": 4.101654529571533, |
|
"learning_rate": 6.612633233621513e-05, |
|
"loss": 1.6865, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7605066206102475, |
|
"grad_norm": 1.899525761604309, |
|
"learning_rate": 6.59269008458458e-05, |
|
"loss": 1.9603, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.7610823258491652, |
|
"grad_norm": 2.649081230163574, |
|
"learning_rate": 6.572775362722654e-05, |
|
"loss": 1.8846, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.7616580310880829, |
|
"grad_norm": 2.6892385482788086, |
|
"learning_rate": 6.552889196485812e-05, |
|
"loss": 1.7135, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.7622337363270005, |
|
"grad_norm": 2.641362190246582, |
|
"learning_rate": 6.533031714139947e-05, |
|
"loss": 1.6431, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.7628094415659182, |
|
"grad_norm": 2.4511632919311523, |
|
"learning_rate": 6.513203043765932e-05, |
|
"loss": 1.6093, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7633851468048359, |
|
"grad_norm": 2.739757537841797, |
|
"learning_rate": 6.49340331325881e-05, |
|
"loss": 1.576, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.7639608520437536, |
|
"grad_norm": 3.0558271408081055, |
|
"learning_rate": 6.473632650326958e-05, |
|
"loss": 1.2116, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.7645365572826712, |
|
"grad_norm": 2.0293328762054443, |
|
"learning_rate": 6.453891182491272e-05, |
|
"loss": 2.0026, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.7651122625215889, |
|
"grad_norm": 2.7013015747070312, |
|
"learning_rate": 6.434179037084338e-05, |
|
"loss": 1.5642, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.7656879677605066, |
|
"grad_norm": 2.754798412322998, |
|
"learning_rate": 6.414496341249605e-05, |
|
"loss": 1.959, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7662636729994243, |
|
"grad_norm": 10.559093475341797, |
|
"learning_rate": 6.394843221940578e-05, |
|
"loss": 1.5451, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.7668393782383419, |
|
"grad_norm": 2.1670234203338623, |
|
"learning_rate": 6.375219805919995e-05, |
|
"loss": 1.8978, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.7674150834772596, |
|
"grad_norm": 2.215994358062744, |
|
"learning_rate": 6.355626219759005e-05, |
|
"loss": 1.7036, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.7679907887161773, |
|
"grad_norm": 10.172525405883789, |
|
"learning_rate": 6.336062589836359e-05, |
|
"loss": 1.7827, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.768566493955095, |
|
"grad_norm": 4.433283805847168, |
|
"learning_rate": 6.316529042337577e-05, |
|
"loss": 1.4581, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.7691421991940126, |
|
"grad_norm": 2.3281450271606445, |
|
"learning_rate": 6.29702570325416e-05, |
|
"loss": 1.7009, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.7697179044329303, |
|
"grad_norm": 2.0074803829193115, |
|
"learning_rate": 6.277552698382761e-05, |
|
"loss": 2.1468, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.770293609671848, |
|
"grad_norm": 1.4934829473495483, |
|
"learning_rate": 6.25811015332438e-05, |
|
"loss": 1.456, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.7708693149107657, |
|
"grad_norm": 5.488325595855713, |
|
"learning_rate": 6.238698193483539e-05, |
|
"loss": 1.2045, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.7714450201496834, |
|
"grad_norm": 2.7806265354156494, |
|
"learning_rate": 6.219316944067497e-05, |
|
"loss": 1.7917, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.772020725388601, |
|
"grad_norm": 8.058002471923828, |
|
"learning_rate": 6.19996653008543e-05, |
|
"loss": 1.7587, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.7725964306275187, |
|
"grad_norm": 4.710812568664551, |
|
"learning_rate": 6.180647076347621e-05, |
|
"loss": 1.4779, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.7731721358664364, |
|
"grad_norm": 13.711342811584473, |
|
"learning_rate": 6.161358707464666e-05, |
|
"loss": 1.5732, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.7737478411053541, |
|
"grad_norm": 2.5771145820617676, |
|
"learning_rate": 6.142101547846648e-05, |
|
"loss": 1.6616, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.7743235463442717, |
|
"grad_norm": 3.267062187194824, |
|
"learning_rate": 6.122875721702365e-05, |
|
"loss": 1.541, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.7748992515831894, |
|
"grad_norm": 2.233874559402466, |
|
"learning_rate": 6.103681353038511e-05, |
|
"loss": 1.5368, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.7754749568221071, |
|
"grad_norm": 2.080592632293701, |
|
"learning_rate": 6.084518565658877e-05, |
|
"loss": 1.8878, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.7760506620610248, |
|
"grad_norm": 2.5689923763275146, |
|
"learning_rate": 6.065387483163556e-05, |
|
"loss": 1.5645, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.7766263672999424, |
|
"grad_norm": 2.084365129470825, |
|
"learning_rate": 6.04628822894814e-05, |
|
"loss": 1.4915, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.7772020725388601, |
|
"grad_norm": 4.5587382316589355, |
|
"learning_rate": 6.0272209262029324e-05, |
|
"loss": 1.7316, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 2.5827181339263916, |
|
"learning_rate": 6.008185697912148e-05, |
|
"loss": 1.8245, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.7783534830166955, |
|
"grad_norm": 1.711262822151184, |
|
"learning_rate": 5.989182666853125e-05, |
|
"loss": 1.879, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.7789291882556131, |
|
"grad_norm": 3.053706407546997, |
|
"learning_rate": 5.970211955595526e-05, |
|
"loss": 1.73, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.7795048934945308, |
|
"grad_norm": 4.040714263916016, |
|
"learning_rate": 5.951273686500546e-05, |
|
"loss": 1.9029, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.7800805987334485, |
|
"grad_norm": 3.6155359745025635, |
|
"learning_rate": 5.9323679817201347e-05, |
|
"loss": 1.8518, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.7806563039723662, |
|
"grad_norm": 2.805917739868164, |
|
"learning_rate": 5.913494963196202e-05, |
|
"loss": 1.3817, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.7812320092112838, |
|
"grad_norm": 2.4106318950653076, |
|
"learning_rate": 5.894654752659826e-05, |
|
"loss": 1.779, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.7818077144502015, |
|
"grad_norm": 2.7342028617858887, |
|
"learning_rate": 5.875847471630482e-05, |
|
"loss": 1.539, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.7823834196891192, |
|
"grad_norm": 4.626698970794678, |
|
"learning_rate": 5.8570732414152354e-05, |
|
"loss": 1.9292, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.7829591249280369, |
|
"grad_norm": 2.9248266220092773, |
|
"learning_rate": 5.8383321831079866e-05, |
|
"loss": 2.1375, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7835348301669545, |
|
"grad_norm": 4.143459796905518, |
|
"learning_rate": 5.8196244175886714e-05, |
|
"loss": 1.4635, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.7841105354058722, |
|
"grad_norm": 2.493175983428955, |
|
"learning_rate": 5.8009500655224926e-05, |
|
"loss": 2.0653, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.7846862406447899, |
|
"grad_norm": 2.802454710006714, |
|
"learning_rate": 5.782309247359122e-05, |
|
"loss": 1.4671, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.7852619458837076, |
|
"grad_norm": 2.482621192932129, |
|
"learning_rate": 5.7637020833319543e-05, |
|
"loss": 1.7631, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.7858376511226253, |
|
"grad_norm": 1.9779695272445679, |
|
"learning_rate": 5.745128693457305e-05, |
|
"loss": 2.0734, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.7864133563615429, |
|
"grad_norm": 11.706380844116211, |
|
"learning_rate": 5.7265891975336516e-05, |
|
"loss": 1.4198, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.7869890616004606, |
|
"grad_norm": 2.605114459991455, |
|
"learning_rate": 5.708083715140847e-05, |
|
"loss": 1.6385, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.7875647668393783, |
|
"grad_norm": 3.059677839279175, |
|
"learning_rate": 5.689612365639365e-05, |
|
"loss": 1.6017, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.788140472078296, |
|
"grad_norm": 1.8578213453292847, |
|
"learning_rate": 5.6711752681695196e-05, |
|
"loss": 2.0283, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.7887161773172136, |
|
"grad_norm": 3.1265645027160645, |
|
"learning_rate": 5.6527725416507004e-05, |
|
"loss": 1.4247, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7892918825561313, |
|
"grad_norm": 1.9533611536026, |
|
"learning_rate": 5.634404304780596e-05, |
|
"loss": 2.2994, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.789867587795049, |
|
"grad_norm": 2.1204652786254883, |
|
"learning_rate": 5.6160706760344474e-05, |
|
"loss": 1.5297, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.7904432930339667, |
|
"grad_norm": 1.745361089706421, |
|
"learning_rate": 5.597771773664268e-05, |
|
"loss": 1.8504, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.7910189982728842, |
|
"grad_norm": 7.502842903137207, |
|
"learning_rate": 5.579507715698088e-05, |
|
"loss": 1.6416, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.791594703511802, |
|
"grad_norm": 2.107741117477417, |
|
"learning_rate": 5.561278619939183e-05, |
|
"loss": 1.5842, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7921704087507196, |
|
"grad_norm": 2.415151357650757, |
|
"learning_rate": 5.5430846039653336e-05, |
|
"loss": 1.8056, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.7927461139896373, |
|
"grad_norm": 2.2191529273986816, |
|
"learning_rate": 5.52492578512805e-05, |
|
"loss": 1.9055, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 0.7933218192285549, |
|
"grad_norm": 2.3005576133728027, |
|
"learning_rate": 5.506802280551822e-05, |
|
"loss": 1.8145, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.7938975244674726, |
|
"grad_norm": 4.749843597412109, |
|
"learning_rate": 5.4887142071333564e-05, |
|
"loss": 1.4035, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 0.7944732297063903, |
|
"grad_norm": 2.0399606227874756, |
|
"learning_rate": 5.470661681540838e-05, |
|
"loss": 2.1479, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.795048934945308, |
|
"grad_norm": 7.833768844604492, |
|
"learning_rate": 5.452644820213162e-05, |
|
"loss": 1.5584, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 0.7956246401842256, |
|
"grad_norm": 3.434250593185425, |
|
"learning_rate": 5.434663739359189e-05, |
|
"loss": 1.7674, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.7962003454231433, |
|
"grad_norm": 2.5569870471954346, |
|
"learning_rate": 5.416718554957005e-05, |
|
"loss": 1.5466, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 0.796776050662061, |
|
"grad_norm": 2.374488115310669, |
|
"learning_rate": 5.398809382753145e-05, |
|
"loss": 1.65, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.7973517559009787, |
|
"grad_norm": 13.02526569366455, |
|
"learning_rate": 5.3809363382618795e-05, |
|
"loss": 1.5528, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.7979274611398963, |
|
"grad_norm": 1.7816729545593262, |
|
"learning_rate": 5.363099536764451e-05, |
|
"loss": 1.5285, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.798503166378814, |
|
"grad_norm": 1.901544451713562, |
|
"learning_rate": 5.345299093308336e-05, |
|
"loss": 2.0212, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 0.7990788716177317, |
|
"grad_norm": 2.702831506729126, |
|
"learning_rate": 5.3275351227065e-05, |
|
"loss": 1.6804, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.7996545768566494, |
|
"grad_norm": 2.1472456455230713, |
|
"learning_rate": 5.309807739536656e-05, |
|
"loss": 1.8676, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 0.8002302820955671, |
|
"grad_norm": 3.610424518585205, |
|
"learning_rate": 5.292117058140532e-05, |
|
"loss": 1.6772, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8008059873344847, |
|
"grad_norm": 2.5551178455352783, |
|
"learning_rate": 5.274463192623127e-05, |
|
"loss": 1.9616, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 0.8013816925734024, |
|
"grad_norm": 1.9815688133239746, |
|
"learning_rate": 5.256846256851982e-05, |
|
"loss": 1.9971, |
|
"step": 1392 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1737, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 348, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.609052611463558e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|