diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,22729 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9998457504241863, + "eval_steps": 500, + "global_step": 3241, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00030849915162733303, + "grad_norm": 0.0, + "learning_rate": 2.0408163265306121e-07, + "loss": 2.1748, + "step": 1 + }, + { + "epoch": 0.0006169983032546661, + "grad_norm": 0.0, + "learning_rate": 4.0816326530612243e-07, + "loss": 2.3846, + "step": 2 + }, + { + "epoch": 0.0009254974548819991, + "grad_norm": 0.0, + "learning_rate": 6.122448979591837e-07, + "loss": 2.3896, + "step": 3 + }, + { + "epoch": 0.0012339966065093321, + "grad_norm": 0.0, + "learning_rate": 8.163265306122449e-07, + "loss": 2.2741, + "step": 4 + }, + { + "epoch": 0.001542495758136665, + "grad_norm": 0.0, + "learning_rate": 1.0204081632653063e-06, + "loss": 2.4158, + "step": 5 + }, + { + "epoch": 0.0018509949097639982, + "grad_norm": 0.0, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.4049, + "step": 6 + }, + { + "epoch": 0.0021594940613913313, + "grad_norm": 0.0, + "learning_rate": 1.4285714285714286e-06, + "loss": 2.1787, + "step": 7 + }, + { + "epoch": 0.0024679932130186643, + "grad_norm": 0.0, + "learning_rate": 1.6326530612244897e-06, + "loss": 2.5618, + "step": 8 + }, + { + "epoch": 0.002776492364645997, + "grad_norm": 0.0, + "learning_rate": 1.8367346938775512e-06, + "loss": 2.3799, + "step": 9 + }, + { + "epoch": 0.00308499151627333, + "grad_norm": 0.0, + "learning_rate": 2.0408163265306125e-06, + "loss": 2.1604, + "step": 10 + }, + { + "epoch": 0.0033934906679006635, + "grad_norm": 0.0, + "learning_rate": 2.244897959183674e-06, + "loss": 1.9946, + "step": 11 + }, + { + "epoch": 0.0037019898195279964, + "grad_norm": 0.0, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.996, + "step": 12 + }, + { + "epoch": 0.004010488971155329, + "grad_norm": 0.0, + "learning_rate": 2.6530612244897964e-06, + "loss": 2.084, + "step": 13 + }, + { + "epoch": 0.004318988122782663, + "grad_norm": 0.0, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.7759, + "step": 14 + }, + { + "epoch": 0.004627487274409995, + "grad_norm": 0.0, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6765, + "step": 15 + }, + { + "epoch": 0.0049359864260373285, + "grad_norm": 0.0, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.6134, + "step": 16 + }, + { + "epoch": 0.005244485577664661, + "grad_norm": 0.0, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6945, + "step": 17 + }, + { + "epoch": 0.005552984729291994, + "grad_norm": 0.0, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.367, + "step": 18 + }, + { + "epoch": 0.005861483880919328, + "grad_norm": 0.0, + "learning_rate": 3.877551020408164e-06, + "loss": 1.5206, + "step": 19 + }, + { + "epoch": 0.00616998303254666, + "grad_norm": 0.0, + "learning_rate": 4.081632653061225e-06, + "loss": 1.4788, + "step": 20 + }, + { + "epoch": 0.006478482184173994, + "grad_norm": 0.0, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.4541, + "step": 21 + }, + { + "epoch": 0.006786981335801327, + "grad_norm": 0.0, + "learning_rate": 4.489795918367348e-06, + "loss": 1.3626, + "step": 22 + }, + { + "epoch": 0.007095480487428659, + "grad_norm": 0.0, + "learning_rate": 4.693877551020409e-06, + "loss": 1.3584, + "step": 23 + }, + { + "epoch": 0.007403979639055993, + "grad_norm": 0.0, + "learning_rate": 4.897959183673469e-06, + "loss": 1.3817, + "step": 24 + }, + { + "epoch": 0.007712478790683325, + "grad_norm": 0.0, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.4, + "step": 25 + }, + { + "epoch": 0.008020977942310659, + "grad_norm": 0.0, + "learning_rate": 5.306122448979593e-06, + "loss": 1.2967, + "step": 26 + }, + { + "epoch": 0.008329477093937991, + "grad_norm": 0.0, + "learning_rate": 5.510204081632653e-06, + "loss": 1.3896, + "step": 27 + }, + { + "epoch": 0.008637976245565325, + "grad_norm": 0.0, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.3068, + "step": 28 + }, + { + "epoch": 0.008946475397192658, + "grad_norm": 0.0, + "learning_rate": 5.918367346938776e-06, + "loss": 1.2201, + "step": 29 + }, + { + "epoch": 0.00925497454881999, + "grad_norm": 0.0, + "learning_rate": 6.122448979591837e-06, + "loss": 1.262, + "step": 30 + }, + { + "epoch": 0.009563473700447325, + "grad_norm": 0.0, + "learning_rate": 6.326530612244899e-06, + "loss": 1.3774, + "step": 31 + }, + { + "epoch": 0.009871972852074657, + "grad_norm": 0.0, + "learning_rate": 6.530612244897959e-06, + "loss": 1.2003, + "step": 32 + }, + { + "epoch": 0.01018047200370199, + "grad_norm": 0.0, + "learning_rate": 6.734693877551021e-06, + "loss": 1.2146, + "step": 33 + }, + { + "epoch": 0.010488971155329322, + "grad_norm": 0.0, + "learning_rate": 6.938775510204082e-06, + "loss": 1.1563, + "step": 34 + }, + { + "epoch": 0.010797470306956656, + "grad_norm": 0.0, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.2303, + "step": 35 + }, + { + "epoch": 0.011105969458583989, + "grad_norm": 0.0, + "learning_rate": 7.346938775510205e-06, + "loss": 1.0958, + "step": 36 + }, + { + "epoch": 0.011414468610211321, + "grad_norm": 0.0, + "learning_rate": 7.551020408163265e-06, + "loss": 1.1453, + "step": 37 + }, + { + "epoch": 0.011722967761838655, + "grad_norm": 0.0, + "learning_rate": 7.755102040816327e-06, + "loss": 1.1907, + "step": 38 + }, + { + "epoch": 0.012031466913465988, + "grad_norm": 0.0, + "learning_rate": 7.959183673469388e-06, + "loss": 1.181, + "step": 39 + }, + { + "epoch": 0.01233996606509332, + "grad_norm": 0.0, + "learning_rate": 8.16326530612245e-06, + "loss": 1.147, + "step": 40 + }, + { + "epoch": 0.012648465216720655, + "grad_norm": 0.0, + "learning_rate": 8.36734693877551e-06, + "loss": 1.1413, + "step": 41 + }, + { + "epoch": 0.012956964368347987, + "grad_norm": 0.0, + "learning_rate": 8.571428571428571e-06, + "loss": 1.1279, + "step": 42 + }, + { + "epoch": 0.01326546351997532, + "grad_norm": 0.0, + "learning_rate": 8.775510204081633e-06, + "loss": 1.1662, + "step": 43 + }, + { + "epoch": 0.013573962671602654, + "grad_norm": 0.0, + "learning_rate": 8.979591836734695e-06, + "loss": 1.2089, + "step": 44 + }, + { + "epoch": 0.013882461823229986, + "grad_norm": 0.0, + "learning_rate": 9.183673469387756e-06, + "loss": 1.1917, + "step": 45 + }, + { + "epoch": 0.014190960974857319, + "grad_norm": 0.0, + "learning_rate": 9.387755102040818e-06, + "loss": 1.1331, + "step": 46 + }, + { + "epoch": 0.014499460126484651, + "grad_norm": 0.0, + "learning_rate": 9.591836734693878e-06, + "loss": 1.0763, + "step": 47 + }, + { + "epoch": 0.014807959278111986, + "grad_norm": 0.0, + "learning_rate": 9.795918367346939e-06, + "loss": 1.1443, + "step": 48 + }, + { + "epoch": 0.015116458429739318, + "grad_norm": 0.0, + "learning_rate": 1e-05, + "loss": 1.1878, + "step": 49 + }, + { + "epoch": 0.01542495758136665, + "grad_norm": 0.0, + "learning_rate": 1.0204081632653063e-05, + "loss": 0.9932, + "step": 50 + }, + { + "epoch": 0.015733456732993985, + "grad_norm": 0.0, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.1024, + "step": 51 + }, + { + "epoch": 0.016041955884621317, + "grad_norm": 0.0, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.132, + "step": 52 + }, + { + "epoch": 0.01635045503624865, + "grad_norm": 0.0, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.0862, + "step": 53 + }, + { + "epoch": 0.016658954187875982, + "grad_norm": 0.0, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.0846, + "step": 54 + }, + { + "epoch": 0.016967453339503315, + "grad_norm": 0.0, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.1018, + "step": 55 + }, + { + "epoch": 0.01727595249113065, + "grad_norm": 0.0, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.06, + "step": 56 + }, + { + "epoch": 0.017584451642757983, + "grad_norm": 0.0, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.0426, + "step": 57 + }, + { + "epoch": 0.017892950794385316, + "grad_norm": 0.0, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.1386, + "step": 58 + }, + { + "epoch": 0.018201449946012648, + "grad_norm": 0.0, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.0103, + "step": 59 + }, + { + "epoch": 0.01850994909763998, + "grad_norm": 0.0, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.0551, + "step": 60 + }, + { + "epoch": 0.018818448249267313, + "grad_norm": 0.0, + "learning_rate": 1.2448979591836736e-05, + "loss": 0.9686, + "step": 61 + }, + { + "epoch": 0.01912694740089465, + "grad_norm": 0.0, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.0008, + "step": 62 + }, + { + "epoch": 0.01943544655252198, + "grad_norm": 0.0, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.0213, + "step": 63 + }, + { + "epoch": 0.019743945704149314, + "grad_norm": 0.0, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.0082, + "step": 64 + }, + { + "epoch": 0.020052444855776647, + "grad_norm": 0.0, + "learning_rate": 1.326530612244898e-05, + "loss": 1.0559, + "step": 65 + }, + { + "epoch": 0.02036094400740398, + "grad_norm": 0.0, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.0025, + "step": 66 + }, + { + "epoch": 0.02066944315903131, + "grad_norm": 0.0, + "learning_rate": 1.3673469387755102e-05, + "loss": 0.8757, + "step": 67 + }, + { + "epoch": 0.020977942310658644, + "grad_norm": 0.0, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.0191, + "step": 68 + }, + { + "epoch": 0.02128644146228598, + "grad_norm": 0.0, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.0413, + "step": 69 + }, + { + "epoch": 0.021594940613913313, + "grad_norm": 0.0, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.002, + "step": 70 + }, + { + "epoch": 0.021903439765540645, + "grad_norm": 0.0, + "learning_rate": 1.448979591836735e-05, + "loss": 1.0391, + "step": 71 + }, + { + "epoch": 0.022211938917167977, + "grad_norm": 0.0, + "learning_rate": 1.469387755102041e-05, + "loss": 1.0641, + "step": 72 + }, + { + "epoch": 0.02252043806879531, + "grad_norm": 0.0, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.0567, + "step": 73 + }, + { + "epoch": 0.022828937220422642, + "grad_norm": 0.0, + "learning_rate": 1.510204081632653e-05, + "loss": 1.0336, + "step": 74 + }, + { + "epoch": 0.02313743637204998, + "grad_norm": 0.0, + "learning_rate": 1.530612244897959e-05, + "loss": 1.0058, + "step": 75 + }, + { + "epoch": 0.02344593552367731, + "grad_norm": 0.0, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.119, + "step": 76 + }, + { + "epoch": 0.023754434675304643, + "grad_norm": 0.0, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.0142, + "step": 77 + }, + { + "epoch": 0.024062933826931976, + "grad_norm": 0.0, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.0398, + "step": 78 + }, + { + "epoch": 0.02437143297855931, + "grad_norm": 0.0, + "learning_rate": 1.612244897959184e-05, + "loss": 0.9653, + "step": 79 + }, + { + "epoch": 0.02467993213018664, + "grad_norm": 0.0, + "learning_rate": 1.63265306122449e-05, + "loss": 1.2841, + "step": 80 + }, + { + "epoch": 0.024988431281813973, + "grad_norm": 0.0, + "learning_rate": 1.653061224489796e-05, + "loss": 1.02, + "step": 81 + }, + { + "epoch": 0.02529693043344131, + "grad_norm": 0.0, + "learning_rate": 1.673469387755102e-05, + "loss": 0.9886, + "step": 82 + }, + { + "epoch": 0.025605429585068642, + "grad_norm": 0.0, + "learning_rate": 1.6938775510204085e-05, + "loss": 0.9234, + "step": 83 + }, + { + "epoch": 0.025913928736695974, + "grad_norm": 0.0, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.9129, + "step": 84 + }, + { + "epoch": 0.026222427888323307, + "grad_norm": 0.0, + "learning_rate": 1.7346938775510206e-05, + "loss": 0.9701, + "step": 85 + }, + { + "epoch": 0.02653092703995064, + "grad_norm": 0.0, + "learning_rate": 1.7551020408163266e-05, + "loss": 1.0317, + "step": 86 + }, + { + "epoch": 0.026839426191577972, + "grad_norm": 0.0, + "learning_rate": 1.7755102040816327e-05, + "loss": 0.9648, + "step": 87 + }, + { + "epoch": 0.027147925343205308, + "grad_norm": 0.0, + "learning_rate": 1.795918367346939e-05, + "loss": 1.2338, + "step": 88 + }, + { + "epoch": 0.02745642449483264, + "grad_norm": 0.0, + "learning_rate": 1.816326530612245e-05, + "loss": 0.9986, + "step": 89 + }, + { + "epoch": 0.027764923646459973, + "grad_norm": 0.0, + "learning_rate": 1.836734693877551e-05, + "loss": 1.0245, + "step": 90 + }, + { + "epoch": 0.028073422798087305, + "grad_norm": 0.0, + "learning_rate": 1.8571428571428575e-05, + "loss": 0.9307, + "step": 91 + }, + { + "epoch": 0.028381921949714638, + "grad_norm": 0.0, + "learning_rate": 1.8775510204081636e-05, + "loss": 1.0043, + "step": 92 + }, + { + "epoch": 0.02869042110134197, + "grad_norm": 0.0, + "learning_rate": 1.8979591836734696e-05, + "loss": 1.0452, + "step": 93 + }, + { + "epoch": 0.028998920252969303, + "grad_norm": 0.0, + "learning_rate": 1.9183673469387756e-05, + "loss": 0.9908, + "step": 94 + }, + { + "epoch": 0.02930741940459664, + "grad_norm": 0.0, + "learning_rate": 1.9387755102040817e-05, + "loss": 0.933, + "step": 95 + }, + { + "epoch": 0.02961591855622397, + "grad_norm": 0.0, + "learning_rate": 1.9591836734693877e-05, + "loss": 0.9802, + "step": 96 + }, + { + "epoch": 0.029924417707851304, + "grad_norm": 0.0, + "learning_rate": 1.979591836734694e-05, + "loss": 1.0628, + "step": 97 + }, + { + "epoch": 0.030232916859478636, + "grad_norm": 0.0, + "learning_rate": 2e-05, + "loss": 1.0759, + "step": 98 + }, + { + "epoch": 0.03054141601110597, + "grad_norm": 0.0, + "learning_rate": 1.999999500447713e-05, + "loss": 1.026, + "step": 99 + }, + { + "epoch": 0.0308499151627333, + "grad_norm": 0.0, + "learning_rate": 1.9999980017913514e-05, + "loss": 0.9354, + "step": 100 + }, + { + "epoch": 0.031158414314360637, + "grad_norm": 0.0, + "learning_rate": 1.999995504032412e-05, + "loss": 1.0366, + "step": 101 + }, + { + "epoch": 0.03146691346598797, + "grad_norm": 0.0, + "learning_rate": 1.999992007173391e-05, + "loss": 0.9773, + "step": 102 + }, + { + "epoch": 0.0317754126176153, + "grad_norm": 0.0, + "learning_rate": 1.999987511217781e-05, + "loss": 0.9804, + "step": 103 + }, + { + "epoch": 0.032083911769242635, + "grad_norm": 0.0, + "learning_rate": 1.999982016170075e-05, + "loss": 0.9758, + "step": 104 + }, + { + "epoch": 0.03239241092086997, + "grad_norm": 0.0, + "learning_rate": 1.999975522035763e-05, + "loss": 0.9966, + "step": 105 + }, + { + "epoch": 0.0327009100724973, + "grad_norm": 0.0, + "learning_rate": 1.999968028821333e-05, + "loss": 0.9426, + "step": 106 + }, + { + "epoch": 0.03300940922412463, + "grad_norm": 0.0, + "learning_rate": 1.9999595365342715e-05, + "loss": 1.0034, + "step": 107 + }, + { + "epoch": 0.033317908375751965, + "grad_norm": 0.0, + "learning_rate": 1.9999500451830634e-05, + "loss": 0.9326, + "step": 108 + }, + { + "epoch": 0.0336264075273793, + "grad_norm": 0.0, + "learning_rate": 1.9999395547771914e-05, + "loss": 1.0403, + "step": 109 + }, + { + "epoch": 0.03393490667900663, + "grad_norm": 0.0, + "learning_rate": 1.9999280653271364e-05, + "loss": 1.0244, + "step": 110 + }, + { + "epoch": 0.03424340583063397, + "grad_norm": 0.0, + "learning_rate": 1.9999155768443777e-05, + "loss": 0.9906, + "step": 111 + }, + { + "epoch": 0.0345519049822613, + "grad_norm": 0.0, + "learning_rate": 1.999902089341393e-05, + "loss": 0.9533, + "step": 112 + }, + { + "epoch": 0.034860404133888634, + "grad_norm": 0.0, + "learning_rate": 1.999887602831657e-05, + "loss": 1.0805, + "step": 113 + }, + { + "epoch": 0.035168903285515966, + "grad_norm": 0.0, + "learning_rate": 1.999872117329644e-05, + "loss": 1.3153, + "step": 114 + }, + { + "epoch": 0.0354774024371433, + "grad_norm": 0.0, + "learning_rate": 1.999855632850825e-05, + "loss": 0.9861, + "step": 115 + }, + { + "epoch": 0.03578590158877063, + "grad_norm": 0.0, + "learning_rate": 1.9998381494116693e-05, + "loss": 1.0677, + "step": 116 + }, + { + "epoch": 0.036094400740397964, + "grad_norm": 0.0, + "learning_rate": 1.999819667029646e-05, + "loss": 0.9798, + "step": 117 + }, + { + "epoch": 0.036402899892025296, + "grad_norm": 0.0, + "learning_rate": 1.99980018572322e-05, + "loss": 0.9872, + "step": 118 + }, + { + "epoch": 0.03671139904365263, + "grad_norm": 0.0, + "learning_rate": 1.999779705511856e-05, + "loss": 0.9897, + "step": 119 + }, + { + "epoch": 0.03701989819527996, + "grad_norm": 0.0, + "learning_rate": 1.9997582264160147e-05, + "loss": 1.0007, + "step": 120 + }, + { + "epoch": 0.037328397346907294, + "grad_norm": 0.0, + "learning_rate": 1.9997357484571566e-05, + "loss": 0.9305, + "step": 121 + }, + { + "epoch": 0.037636896498534626, + "grad_norm": 0.0, + "learning_rate": 1.9997122716577397e-05, + "loss": 0.9443, + "step": 122 + }, + { + "epoch": 0.03794539565016196, + "grad_norm": 0.0, + "learning_rate": 1.9996877960412192e-05, + "loss": 0.9924, + "step": 123 + }, + { + "epoch": 0.0382538948017893, + "grad_norm": 0.0, + "learning_rate": 1.9996623216320496e-05, + "loss": 1.0336, + "step": 124 + }, + { + "epoch": 0.03856239395341663, + "grad_norm": 0.0, + "learning_rate": 1.9996358484556818e-05, + "loss": 0.9855, + "step": 125 + }, + { + "epoch": 0.03887089310504396, + "grad_norm": 0.0, + "learning_rate": 1.9996083765385656e-05, + "loss": 0.9731, + "step": 126 + }, + { + "epoch": 0.039179392256671296, + "grad_norm": 0.0, + "learning_rate": 1.999579905908148e-05, + "loss": 1.0336, + "step": 127 + }, + { + "epoch": 0.03948789140829863, + "grad_norm": 0.0, + "learning_rate": 1.9995504365928746e-05, + "loss": 0.9698, + "step": 128 + }, + { + "epoch": 0.03979639055992596, + "grad_norm": 0.0, + "learning_rate": 1.999519968622188e-05, + "loss": 1.0549, + "step": 129 + }, + { + "epoch": 0.04010488971155329, + "grad_norm": 0.0, + "learning_rate": 1.9994885020265293e-05, + "loss": 0.906, + "step": 130 + }, + { + "epoch": 0.040413388863180626, + "grad_norm": 0.0, + "learning_rate": 1.9994560368373366e-05, + "loss": 1.0536, + "step": 131 + }, + { + "epoch": 0.04072188801480796, + "grad_norm": 0.0, + "learning_rate": 1.999422573087046e-05, + "loss": 0.9394, + "step": 132 + }, + { + "epoch": 0.04103038716643529, + "grad_norm": 0.0, + "learning_rate": 1.999388110809091e-05, + "loss": 1.0359, + "step": 133 + }, + { + "epoch": 0.04133888631806262, + "grad_norm": 0.0, + "learning_rate": 1.9993526500379037e-05, + "loss": 1.0034, + "step": 134 + }, + { + "epoch": 0.041647385469689956, + "grad_norm": 0.0, + "learning_rate": 1.9993161908089125e-05, + "loss": 0.9976, + "step": 135 + }, + { + "epoch": 0.04195588462131729, + "grad_norm": 0.0, + "learning_rate": 1.9992787331585447e-05, + "loss": 0.9835, + "step": 136 + }, + { + "epoch": 0.04226438377294463, + "grad_norm": 0.0, + "learning_rate": 1.9992402771242236e-05, + "loss": 0.9441, + "step": 137 + }, + { + "epoch": 0.04257288292457196, + "grad_norm": 0.0, + "learning_rate": 1.999200822744371e-05, + "loss": 0.873, + "step": 138 + }, + { + "epoch": 0.04288138207619929, + "grad_norm": 0.0, + "learning_rate": 1.9991603700584063e-05, + "loss": 1.0707, + "step": 139 + }, + { + "epoch": 0.043189881227826625, + "grad_norm": 0.0, + "learning_rate": 1.9991189191067457e-05, + "loss": 0.9523, + "step": 140 + }, + { + "epoch": 0.04349838037945396, + "grad_norm": 0.0, + "learning_rate": 1.9990764699308033e-05, + "loss": 0.9397, + "step": 141 + }, + { + "epoch": 0.04380687953108129, + "grad_norm": 0.0, + "learning_rate": 1.99903302257299e-05, + "loss": 0.9902, + "step": 142 + }, + { + "epoch": 0.04411537868270862, + "grad_norm": 0.0, + "learning_rate": 1.9989885770767143e-05, + "loss": 0.9969, + "step": 143 + }, + { + "epoch": 0.044423877834335955, + "grad_norm": 0.0, + "learning_rate": 1.9989431334863817e-05, + "loss": 1.0595, + "step": 144 + }, + { + "epoch": 0.04473237698596329, + "grad_norm": 0.0, + "learning_rate": 1.9988966918473957e-05, + "loss": 0.9438, + "step": 145 + }, + { + "epoch": 0.04504087613759062, + "grad_norm": 0.0, + "learning_rate": 1.998849252206156e-05, + "loss": 0.9605, + "step": 146 + }, + { + "epoch": 0.04534937528921795, + "grad_norm": 0.0, + "learning_rate": 1.9988008146100594e-05, + "loss": 1.1187, + "step": 147 + }, + { + "epoch": 0.045657874440845285, + "grad_norm": 0.0, + "learning_rate": 1.9987513791075007e-05, + "loss": 0.9843, + "step": 148 + }, + { + "epoch": 0.04596637359247262, + "grad_norm": 0.0, + "learning_rate": 1.998700945747871e-05, + "loss": 0.9245, + "step": 149 + }, + { + "epoch": 0.04627487274409996, + "grad_norm": 0.0, + "learning_rate": 1.998649514581558e-05, + "loss": 0.9899, + "step": 150 + }, + { + "epoch": 0.04658337189572729, + "grad_norm": 0.0, + "learning_rate": 1.9985970856599476e-05, + "loss": 1.0891, + "step": 151 + }, + { + "epoch": 0.04689187104735462, + "grad_norm": 0.0, + "learning_rate": 1.9985436590354207e-05, + "loss": 1.0649, + "step": 152 + }, + { + "epoch": 0.047200370198981954, + "grad_norm": 0.0, + "learning_rate": 1.9984892347613577e-05, + "loss": 1.0248, + "step": 153 + }, + { + "epoch": 0.04750886935060929, + "grad_norm": 0.0, + "learning_rate": 1.9984338128921327e-05, + "loss": 0.9197, + "step": 154 + }, + { + "epoch": 0.04781736850223662, + "grad_norm": 0.0, + "learning_rate": 1.9983773934831185e-05, + "loss": 1.0391, + "step": 155 + }, + { + "epoch": 0.04812586765386395, + "grad_norm": 0.0, + "learning_rate": 1.998319976590684e-05, + "loss": 1.0306, + "step": 156 + }, + { + "epoch": 0.048434366805491284, + "grad_norm": 0.0, + "learning_rate": 1.9982615622721948e-05, + "loss": 0.8968, + "step": 157 + }, + { + "epoch": 0.04874286595711862, + "grad_norm": 0.0, + "learning_rate": 1.9982021505860128e-05, + "loss": 0.9711, + "step": 158 + }, + { + "epoch": 0.04905136510874595, + "grad_norm": 0.0, + "learning_rate": 1.9981417415914957e-05, + "loss": 0.963, + "step": 159 + }, + { + "epoch": 0.04935986426037328, + "grad_norm": 0.0, + "learning_rate": 1.9980803353490002e-05, + "loss": 1.0086, + "step": 160 + }, + { + "epoch": 0.049668363412000614, + "grad_norm": 0.0, + "learning_rate": 1.9980179319198757e-05, + "loss": 0.9816, + "step": 161 + }, + { + "epoch": 0.04997686256362795, + "grad_norm": 0.0, + "learning_rate": 1.997954531366471e-05, + "loss": 1.0639, + "step": 162 + }, + { + "epoch": 0.050285361715255286, + "grad_norm": 0.0, + "learning_rate": 1.997890133752129e-05, + "loss": 0.9421, + "step": 163 + }, + { + "epoch": 0.05059386086688262, + "grad_norm": 0.0, + "learning_rate": 1.9978247391411905e-05, + "loss": 0.9511, + "step": 164 + }, + { + "epoch": 0.05090236001850995, + "grad_norm": 0.0, + "learning_rate": 1.9977583475989907e-05, + "loss": 0.9865, + "step": 165 + }, + { + "epoch": 0.051210859170137284, + "grad_norm": 0.0, + "learning_rate": 1.9976909591918624e-05, + "loss": 0.9379, + "step": 166 + }, + { + "epoch": 0.051519358321764616, + "grad_norm": 0.0, + "learning_rate": 1.9976225739871335e-05, + "loss": 1.0527, + "step": 167 + }, + { + "epoch": 0.05182785747339195, + "grad_norm": 0.0, + "learning_rate": 1.9975531920531277e-05, + "loss": 0.9074, + "step": 168 + }, + { + "epoch": 0.05213635662501928, + "grad_norm": 0.0, + "learning_rate": 1.997482813459165e-05, + "loss": 0.949, + "step": 169 + }, + { + "epoch": 0.052444855776646614, + "grad_norm": 0.0, + "learning_rate": 1.997411438275561e-05, + "loss": 1.0138, + "step": 170 + }, + { + "epoch": 0.052753354928273946, + "grad_norm": 0.0, + "learning_rate": 1.997339066573627e-05, + "loss": 0.9684, + "step": 171 + }, + { + "epoch": 0.05306185407990128, + "grad_norm": 0.0, + "learning_rate": 1.9972656984256695e-05, + "loss": 0.8981, + "step": 172 + }, + { + "epoch": 0.05337035323152861, + "grad_norm": 0.0, + "learning_rate": 1.9971913339049914e-05, + "loss": 0.9672, + "step": 173 + }, + { + "epoch": 0.053678852383155944, + "grad_norm": 0.0, + "learning_rate": 1.9971159730858903e-05, + "loss": 1.0605, + "step": 174 + }, + { + "epoch": 0.053987351534783276, + "grad_norm": 0.0, + "learning_rate": 1.9970396160436602e-05, + "loss": 0.8753, + "step": 175 + }, + { + "epoch": 0.054295850686410616, + "grad_norm": 0.0, + "learning_rate": 1.996962262854589e-05, + "loss": 0.9577, + "step": 176 + }, + { + "epoch": 0.05460434983803795, + "grad_norm": 0.0, + "learning_rate": 1.996883913595961e-05, + "loss": 1.0418, + "step": 177 + }, + { + "epoch": 0.05491284898966528, + "grad_norm": 0.0, + "learning_rate": 1.9968045683460554e-05, + "loss": 0.8894, + "step": 178 + }, + { + "epoch": 0.05522134814129261, + "grad_norm": 0.0, + "learning_rate": 1.9967242271841458e-05, + "loss": 0.9815, + "step": 179 + }, + { + "epoch": 0.055529847292919945, + "grad_norm": 0.0, + "learning_rate": 1.9966428901905025e-05, + "loss": 0.9228, + "step": 180 + }, + { + "epoch": 0.05583834644454728, + "grad_norm": 0.0, + "learning_rate": 1.9965605574463885e-05, + "loss": 0.9451, + "step": 181 + }, + { + "epoch": 0.05614684559617461, + "grad_norm": 0.0, + "learning_rate": 1.9964772290340633e-05, + "loss": 1.0085, + "step": 182 + }, + { + "epoch": 0.05645534474780194, + "grad_norm": 0.0, + "learning_rate": 1.996392905036781e-05, + "loss": 0.9334, + "step": 183 + }, + { + "epoch": 0.056763843899429275, + "grad_norm": 0.0, + "learning_rate": 1.9963075855387898e-05, + "loss": 1.0261, + "step": 184 + }, + { + "epoch": 0.05707234305105661, + "grad_norm": 0.0, + "learning_rate": 1.9962212706253327e-05, + "loss": 1.0683, + "step": 185 + }, + { + "epoch": 0.05738084220268394, + "grad_norm": 0.0, + "learning_rate": 1.9961339603826476e-05, + "loss": 1.0197, + "step": 186 + }, + { + "epoch": 0.05768934135431127, + "grad_norm": 0.0, + "learning_rate": 1.9960456548979664e-05, + "loss": 0.9587, + "step": 187 + }, + { + "epoch": 0.057997840505938605, + "grad_norm": 0.0, + "learning_rate": 1.9959563542595157e-05, + "loss": 0.9745, + "step": 188 + }, + { + "epoch": 0.058306339657565945, + "grad_norm": 0.0, + "learning_rate": 1.995866058556516e-05, + "loss": 0.9802, + "step": 189 + }, + { + "epoch": 0.05861483880919328, + "grad_norm": 0.0, + "learning_rate": 1.995774767879182e-05, + "loss": 0.9877, + "step": 190 + }, + { + "epoch": 0.05892333796082061, + "grad_norm": 0.0, + "learning_rate": 1.995682482318723e-05, + "loss": 0.967, + "step": 191 + }, + { + "epoch": 0.05923183711244794, + "grad_norm": 0.0, + "learning_rate": 1.9955892019673412e-05, + "loss": 1.065, + "step": 192 + }, + { + "epoch": 0.059540336264075275, + "grad_norm": 0.0, + "learning_rate": 1.9954949269182343e-05, + "loss": 0.9254, + "step": 193 + }, + { + "epoch": 0.05984883541570261, + "grad_norm": 0.0, + "learning_rate": 1.9953996572655928e-05, + "loss": 0.947, + "step": 194 + }, + { + "epoch": 0.06015733456732994, + "grad_norm": 0.0, + "learning_rate": 1.9953033931046005e-05, + "loss": 1.0769, + "step": 195 + }, + { + "epoch": 0.06046583371895727, + "grad_norm": 0.0, + "learning_rate": 1.9952061345314355e-05, + "loss": 1.0375, + "step": 196 + }, + { + "epoch": 0.060774332870584605, + "grad_norm": 0.0, + "learning_rate": 1.99510788164327e-05, + "loss": 0.9665, + "step": 197 + }, + { + "epoch": 0.06108283202221194, + "grad_norm": 0.0, + "learning_rate": 1.995008634538268e-05, + "loss": 0.9598, + "step": 198 + }, + { + "epoch": 0.06139133117383927, + "grad_norm": 0.0, + "learning_rate": 1.994908393315588e-05, + "loss": 0.9486, + "step": 199 + }, + { + "epoch": 0.0616998303254666, + "grad_norm": 0.0, + "learning_rate": 1.994807158075382e-05, + "loss": 0.9422, + "step": 200 + }, + { + "epoch": 0.062008329477093935, + "grad_norm": 0.0, + "learning_rate": 1.9947049289187942e-05, + "loss": 0.8275, + "step": 201 + }, + { + "epoch": 0.062316828628721274, + "grad_norm": 0.0, + "learning_rate": 1.9946017059479617e-05, + "loss": 0.9641, + "step": 202 + }, + { + "epoch": 0.0626253277803486, + "grad_norm": 0.0, + "learning_rate": 1.9944974892660158e-05, + "loss": 1.0779, + "step": 203 + }, + { + "epoch": 0.06293382693197594, + "grad_norm": 0.0, + "learning_rate": 1.9943922789770797e-05, + "loss": 1.0373, + "step": 204 + }, + { + "epoch": 0.06324232608360326, + "grad_norm": 0.0, + "learning_rate": 1.9942860751862696e-05, + "loss": 1.0035, + "step": 205 + }, + { + "epoch": 0.0635508252352306, + "grad_norm": 0.0, + "learning_rate": 1.9941788779996937e-05, + "loss": 1.0022, + "step": 206 + }, + { + "epoch": 0.06385932438685793, + "grad_norm": 0.0, + "learning_rate": 1.994070687524453e-05, + "loss": 0.9576, + "step": 207 + }, + { + "epoch": 0.06416782353848527, + "grad_norm": 0.0, + "learning_rate": 1.9939615038686423e-05, + "loss": 0.9669, + "step": 208 + }, + { + "epoch": 0.06447632269011261, + "grad_norm": 0.0, + "learning_rate": 1.9938513271413464e-05, + "loss": 0.9042, + "step": 209 + }, + { + "epoch": 0.06478482184173993, + "grad_norm": 0.0, + "learning_rate": 1.993740157452644e-05, + "loss": 0.8647, + "step": 210 + }, + { + "epoch": 0.06509332099336727, + "grad_norm": 0.0, + "learning_rate": 1.9936279949136047e-05, + "loss": 0.9643, + "step": 211 + }, + { + "epoch": 0.0654018201449946, + "grad_norm": 0.0, + "learning_rate": 1.993514839636291e-05, + "loss": 0.8882, + "step": 212 + }, + { + "epoch": 0.06571031929662194, + "grad_norm": 0.0, + "learning_rate": 1.993400691733757e-05, + "loss": 1.0237, + "step": 213 + }, + { + "epoch": 0.06601881844824926, + "grad_norm": 0.0, + "learning_rate": 1.993285551320048e-05, + "loss": 0.96, + "step": 214 + }, + { + "epoch": 0.0663273175998766, + "grad_norm": 0.0, + "learning_rate": 1.993169418510201e-05, + "loss": 0.9605, + "step": 215 + }, + { + "epoch": 0.06663581675150393, + "grad_norm": 0.0, + "learning_rate": 1.9930522934202455e-05, + "loss": 0.837, + "step": 216 + }, + { + "epoch": 0.06694431590313127, + "grad_norm": 0.0, + "learning_rate": 1.9929341761672017e-05, + "loss": 0.9478, + "step": 217 + }, + { + "epoch": 0.0672528150547586, + "grad_norm": 0.0, + "learning_rate": 1.9928150668690807e-05, + "loss": 1.013, + "step": 218 + }, + { + "epoch": 0.06756131420638593, + "grad_norm": 0.0, + "learning_rate": 1.992694965644885e-05, + "loss": 0.8715, + "step": 219 + }, + { + "epoch": 0.06786981335801326, + "grad_norm": 0.0, + "learning_rate": 1.9925738726146094e-05, + "loss": 0.9242, + "step": 220 + }, + { + "epoch": 0.0681783125096406, + "grad_norm": 0.0, + "learning_rate": 1.992451787899237e-05, + "loss": 0.9094, + "step": 221 + }, + { + "epoch": 0.06848681166126794, + "grad_norm": 0.0, + "learning_rate": 1.9923287116207442e-05, + "loss": 0.9811, + "step": 222 + }, + { + "epoch": 0.06879531081289526, + "grad_norm": 0.0, + "learning_rate": 1.9922046439020967e-05, + "loss": 0.927, + "step": 223 + }, + { + "epoch": 0.0691038099645226, + "grad_norm": 0.0, + "learning_rate": 1.9920795848672512e-05, + "loss": 0.9411, + "step": 224 + }, + { + "epoch": 0.06941230911614993, + "grad_norm": 0.0, + "learning_rate": 1.9919535346411546e-05, + "loss": 0.9485, + "step": 225 + }, + { + "epoch": 0.06972080826777727, + "grad_norm": 0.0, + "learning_rate": 1.9918264933497444e-05, + "loss": 0.97, + "step": 226 + }, + { + "epoch": 0.0700293074194046, + "grad_norm": 0.0, + "learning_rate": 1.9916984611199485e-05, + "loss": 0.9, + "step": 227 + }, + { + "epoch": 0.07033780657103193, + "grad_norm": 0.0, + "learning_rate": 1.9915694380796838e-05, + "loss": 1.0275, + "step": 228 + }, + { + "epoch": 0.07064630572265926, + "grad_norm": 0.0, + "learning_rate": 1.9914394243578582e-05, + "loss": 0.8988, + "step": 229 + }, + { + "epoch": 0.0709548048742866, + "grad_norm": 0.0, + "learning_rate": 1.991308420084369e-05, + "loss": 1.0184, + "step": 230 + }, + { + "epoch": 0.07126330402591392, + "grad_norm": 0.0, + "learning_rate": 1.9911764253901032e-05, + "loss": 0.9032, + "step": 231 + }, + { + "epoch": 0.07157180317754126, + "grad_norm": 0.0, + "learning_rate": 1.9910434404069368e-05, + "loss": 0.9098, + "step": 232 + }, + { + "epoch": 0.07188030232916859, + "grad_norm": 0.0, + "learning_rate": 1.9909094652677364e-05, + "loss": 0.9501, + "step": 233 + }, + { + "epoch": 0.07218880148079593, + "grad_norm": 0.0, + "learning_rate": 1.990774500106357e-05, + "loss": 0.9101, + "step": 234 + }, + { + "epoch": 0.07249730063242327, + "grad_norm": 0.0, + "learning_rate": 1.9906385450576424e-05, + "loss": 0.9228, + "step": 235 + }, + { + "epoch": 0.07280579978405059, + "grad_norm": 0.0, + "learning_rate": 1.9905016002574266e-05, + "loss": 1.0251, + "step": 236 + }, + { + "epoch": 0.07311429893567793, + "grad_norm": 0.0, + "learning_rate": 1.9903636658425316e-05, + "loss": 0.9231, + "step": 237 + }, + { + "epoch": 0.07342279808730526, + "grad_norm": 0.0, + "learning_rate": 1.990224741950768e-05, + "loss": 0.9371, + "step": 238 + }, + { + "epoch": 0.0737312972389326, + "grad_norm": 0.0, + "learning_rate": 1.9900848287209356e-05, + "loss": 1.0318, + "step": 239 + }, + { + "epoch": 0.07403979639055992, + "grad_norm": 0.0, + "learning_rate": 1.989943926292822e-05, + "loss": 0.9119, + "step": 240 + }, + { + "epoch": 0.07434829554218726, + "grad_norm": 0.0, + "learning_rate": 1.989802034807204e-05, + "loss": 1.0122, + "step": 241 + }, + { + "epoch": 0.07465679469381459, + "grad_norm": 0.0, + "learning_rate": 1.9896591544058458e-05, + "loss": 1.006, + "step": 242 + }, + { + "epoch": 0.07496529384544193, + "grad_norm": 0.0, + "learning_rate": 1.9895152852314995e-05, + "loss": 0.9525, + "step": 243 + }, + { + "epoch": 0.07527379299706925, + "grad_norm": 0.0, + "learning_rate": 1.9893704274279057e-05, + "loss": 0.8847, + "step": 244 + }, + { + "epoch": 0.07558229214869659, + "grad_norm": 0.0, + "learning_rate": 1.9892245811397924e-05, + "loss": 1.0279, + "step": 245 + }, + { + "epoch": 0.07589079130032392, + "grad_norm": 0.0, + "learning_rate": 1.989077746512876e-05, + "loss": 0.9846, + "step": 246 + }, + { + "epoch": 0.07619929045195126, + "grad_norm": 0.0, + "learning_rate": 1.9889299236938585e-05, + "loss": 1.023, + "step": 247 + }, + { + "epoch": 0.0765077896035786, + "grad_norm": 0.0, + "learning_rate": 1.9887811128304312e-05, + "loss": 0.9629, + "step": 248 + }, + { + "epoch": 0.07681628875520592, + "grad_norm": 0.0, + "learning_rate": 1.9886313140712717e-05, + "loss": 1.0206, + "step": 249 + }, + { + "epoch": 0.07712478790683326, + "grad_norm": 0.0, + "learning_rate": 1.988480527566044e-05, + "loss": 0.9065, + "step": 250 + }, + { + "epoch": 0.07743328705846059, + "grad_norm": 0.0, + "learning_rate": 1.9883287534654e-05, + "loss": 0.951, + "step": 251 + }, + { + "epoch": 0.07774178621008793, + "grad_norm": 0.0, + "learning_rate": 1.988175991920978e-05, + "loss": 1.0458, + "step": 252 + }, + { + "epoch": 0.07805028536171525, + "grad_norm": 0.0, + "learning_rate": 1.9880222430854025e-05, + "loss": 0.9257, + "step": 253 + }, + { + "epoch": 0.07835878451334259, + "grad_norm": 0.0, + "learning_rate": 1.9878675071122848e-05, + "loss": 0.9411, + "step": 254 + }, + { + "epoch": 0.07866728366496992, + "grad_norm": 0.0, + "learning_rate": 1.9877117841562222e-05, + "loss": 1.002, + "step": 255 + }, + { + "epoch": 0.07897578281659726, + "grad_norm": 0.0, + "learning_rate": 1.9875550743727982e-05, + "loss": 0.8932, + "step": 256 + }, + { + "epoch": 0.07928428196822458, + "grad_norm": 0.0, + "learning_rate": 1.9873973779185828e-05, + "loss": 0.9698, + "step": 257 + }, + { + "epoch": 0.07959278111985192, + "grad_norm": 0.0, + "learning_rate": 1.9872386949511308e-05, + "loss": 0.8699, + "step": 258 + }, + { + "epoch": 0.07990128027147925, + "grad_norm": 0.0, + "learning_rate": 1.9870790256289827e-05, + "loss": 0.9997, + "step": 259 + }, + { + "epoch": 0.08020977942310659, + "grad_norm": 0.0, + "learning_rate": 1.9869183701116655e-05, + "loss": 0.9943, + "step": 260 + }, + { + "epoch": 0.08051827857473393, + "grad_norm": 0.0, + "learning_rate": 1.9867567285596903e-05, + "loss": 1.0419, + "step": 261 + }, + { + "epoch": 0.08082677772636125, + "grad_norm": 0.0, + "learning_rate": 1.9865941011345547e-05, + "loss": 1.3233, + "step": 262 + }, + { + "epoch": 0.08113527687798859, + "grad_norm": 0.0, + "learning_rate": 1.9864304879987397e-05, + "loss": 0.8918, + "step": 263 + }, + { + "epoch": 0.08144377602961592, + "grad_norm": 0.0, + "learning_rate": 1.9862658893157124e-05, + "loss": 0.9424, + "step": 264 + }, + { + "epoch": 0.08175227518124326, + "grad_norm": 0.0, + "learning_rate": 1.986100305249924e-05, + "loss": 0.9601, + "step": 265 + }, + { + "epoch": 0.08206077433287058, + "grad_norm": 0.0, + "learning_rate": 1.9859337359668102e-05, + "loss": 0.9477, + "step": 266 + }, + { + "epoch": 0.08236927348449792, + "grad_norm": 0.0, + "learning_rate": 1.9857661816327913e-05, + "loss": 0.9606, + "step": 267 + }, + { + "epoch": 0.08267777263612525, + "grad_norm": 0.0, + "learning_rate": 1.9855976424152713e-05, + "loss": 1.0228, + "step": 268 + }, + { + "epoch": 0.08298627178775259, + "grad_norm": 0.0, + "learning_rate": 1.9854281184826386e-05, + "loss": 0.9454, + "step": 269 + }, + { + "epoch": 0.08329477093937991, + "grad_norm": 0.0, + "learning_rate": 1.9852576100042656e-05, + "loss": 0.9233, + "step": 270 + }, + { + "epoch": 0.08360327009100725, + "grad_norm": 0.0, + "learning_rate": 1.985086117150508e-05, + "loss": 0.9667, + "step": 271 + }, + { + "epoch": 0.08391176924263458, + "grad_norm": 0.0, + "learning_rate": 1.9849136400927048e-05, + "loss": 0.9803, + "step": 272 + }, + { + "epoch": 0.08422026839426192, + "grad_norm": 0.0, + "learning_rate": 1.9847401790031792e-05, + "loss": 0.861, + "step": 273 + }, + { + "epoch": 0.08452876754588926, + "grad_norm": 0.0, + "learning_rate": 1.9845657340552366e-05, + "loss": 0.9085, + "step": 274 + }, + { + "epoch": 0.08483726669751658, + "grad_norm": 0.0, + "learning_rate": 1.9843903054231653e-05, + "loss": 0.996, + "step": 275 + }, + { + "epoch": 0.08514576584914392, + "grad_norm": 0.0, + "learning_rate": 1.9842138932822377e-05, + "loss": 0.9158, + "step": 276 + }, + { + "epoch": 0.08545426500077125, + "grad_norm": 0.0, + "learning_rate": 1.9840364978087073e-05, + "loss": 0.842, + "step": 277 + }, + { + "epoch": 0.08576276415239859, + "grad_norm": 0.0, + "learning_rate": 1.9838581191798117e-05, + "loss": 0.9036, + "step": 278 + }, + { + "epoch": 0.08607126330402591, + "grad_norm": 0.0, + "learning_rate": 1.9836787575737683e-05, + "loss": 0.8667, + "step": 279 + }, + { + "epoch": 0.08637976245565325, + "grad_norm": 0.0, + "learning_rate": 1.9834984131697796e-05, + "loss": 0.9167, + "step": 280 + }, + { + "epoch": 0.08668826160728058, + "grad_norm": 0.0, + "learning_rate": 1.983317086148027e-05, + "loss": 0.9307, + "step": 281 + }, + { + "epoch": 0.08699676075890792, + "grad_norm": 0.0, + "learning_rate": 1.9831347766896766e-05, + "loss": 1.3152, + "step": 282 + }, + { + "epoch": 0.08730525991053524, + "grad_norm": 0.0, + "learning_rate": 1.9829514849768737e-05, + "loss": 0.9292, + "step": 283 + }, + { + "epoch": 0.08761375906216258, + "grad_norm": 0.0, + "learning_rate": 1.9827672111927466e-05, + "loss": 0.9636, + "step": 284 + }, + { + "epoch": 0.0879222582137899, + "grad_norm": 0.0, + "learning_rate": 1.9825819555214035e-05, + "loss": 0.9255, + "step": 285 + }, + { + "epoch": 0.08823075736541725, + "grad_norm": 0.0, + "learning_rate": 1.982395718147934e-05, + "loss": 0.8859, + "step": 286 + }, + { + "epoch": 0.08853925651704458, + "grad_norm": 0.0, + "learning_rate": 1.9822084992584098e-05, + "loss": 0.8519, + "step": 287 + }, + { + "epoch": 0.08884775566867191, + "grad_norm": 0.0, + "learning_rate": 1.982020299039881e-05, + "loss": 0.9121, + "step": 288 + }, + { + "epoch": 0.08915625482029925, + "grad_norm": 0.0, + "learning_rate": 1.9818311176803796e-05, + "loss": 0.9241, + "step": 289 + }, + { + "epoch": 0.08946475397192657, + "grad_norm": 0.0, + "learning_rate": 1.9816409553689178e-05, + "loss": 0.9331, + "step": 290 + }, + { + "epoch": 0.08977325312355391, + "grad_norm": 0.0, + "learning_rate": 1.9814498122954875e-05, + "loss": 0.9826, + "step": 291 + }, + { + "epoch": 0.09008175227518124, + "grad_norm": 0.0, + "learning_rate": 1.9812576886510607e-05, + "loss": 0.9549, + "step": 292 + }, + { + "epoch": 0.09039025142680858, + "grad_norm": 0.0, + "learning_rate": 1.981064584627589e-05, + "loss": 0.9691, + "step": 293 + }, + { + "epoch": 0.0906987505784359, + "grad_norm": 0.0, + "learning_rate": 1.9808705004180032e-05, + "loss": 0.907, + "step": 294 + }, + { + "epoch": 0.09100724973006324, + "grad_norm": 0.0, + "learning_rate": 1.9806754362162143e-05, + "loss": 0.8657, + "step": 295 + }, + { + "epoch": 0.09131574888169057, + "grad_norm": 0.0, + "learning_rate": 1.9804793922171114e-05, + "loss": 1.0198, + "step": 296 + }, + { + "epoch": 0.09162424803331791, + "grad_norm": 0.0, + "learning_rate": 1.980282368616563e-05, + "loss": 0.8932, + "step": 297 + }, + { + "epoch": 0.09193274718494523, + "grad_norm": 0.0, + "learning_rate": 1.9800843656114167e-05, + "loss": 0.8682, + "step": 298 + }, + { + "epoch": 0.09224124633657257, + "grad_norm": 0.0, + "learning_rate": 1.9798853833994975e-05, + "loss": 0.9221, + "step": 299 + }, + { + "epoch": 0.09254974548819991, + "grad_norm": 0.0, + "learning_rate": 1.9796854221796097e-05, + "loss": 1.0219, + "step": 300 + }, + { + "epoch": 0.09285824463982724, + "grad_norm": 0.0, + "learning_rate": 1.979484482151536e-05, + "loss": 1.0476, + "step": 301 + }, + { + "epoch": 0.09316674379145458, + "grad_norm": 0.0, + "learning_rate": 1.9792825635160357e-05, + "loss": 0.8485, + "step": 302 + }, + { + "epoch": 0.0934752429430819, + "grad_norm": 0.0, + "learning_rate": 1.979079666474847e-05, + "loss": 0.9505, + "step": 303 + }, + { + "epoch": 0.09378374209470924, + "grad_norm": 0.0, + "learning_rate": 1.9788757912306856e-05, + "loss": 0.926, + "step": 304 + }, + { + "epoch": 0.09409224124633657, + "grad_norm": 0.0, + "learning_rate": 1.9786709379872436e-05, + "loss": 0.9169, + "step": 305 + }, + { + "epoch": 0.09440074039796391, + "grad_norm": 0.0, + "learning_rate": 1.9784651069491914e-05, + "loss": 0.8907, + "step": 306 + }, + { + "epoch": 0.09470923954959123, + "grad_norm": 0.0, + "learning_rate": 1.978258298322175e-05, + "loss": 0.9022, + "step": 307 + }, + { + "epoch": 0.09501773870121857, + "grad_norm": 0.0, + "learning_rate": 1.9780505123128187e-05, + "loss": 0.8888, + "step": 308 + }, + { + "epoch": 0.0953262378528459, + "grad_norm": 0.0, + "learning_rate": 1.9778417491287217e-05, + "loss": 0.967, + "step": 309 + }, + { + "epoch": 0.09563473700447324, + "grad_norm": 0.0, + "learning_rate": 1.9776320089784605e-05, + "loss": 0.9804, + "step": 310 + }, + { + "epoch": 0.09594323615610056, + "grad_norm": 0.0, + "learning_rate": 1.9774212920715876e-05, + "loss": 0.9284, + "step": 311 + }, + { + "epoch": 0.0962517353077279, + "grad_norm": 0.0, + "learning_rate": 1.977209598618631e-05, + "loss": 0.9764, + "step": 312 + }, + { + "epoch": 0.09656023445935524, + "grad_norm": 0.0, + "learning_rate": 1.976996928831095e-05, + "loss": 0.9649, + "step": 313 + }, + { + "epoch": 0.09686873361098257, + "grad_norm": 0.0, + "learning_rate": 1.9767832829214588e-05, + "loss": 0.9338, + "step": 314 + }, + { + "epoch": 0.09717723276260991, + "grad_norm": 0.0, + "learning_rate": 1.976568661103176e-05, + "loss": 0.9494, + "step": 315 + }, + { + "epoch": 0.09748573191423723, + "grad_norm": 0.0, + "learning_rate": 1.976353063590678e-05, + "loss": 0.9908, + "step": 316 + }, + { + "epoch": 0.09779423106586457, + "grad_norm": 0.0, + "learning_rate": 1.976136490599368e-05, + "loss": 0.9387, + "step": 317 + }, + { + "epoch": 0.0981027302174919, + "grad_norm": 0.0, + "learning_rate": 1.975918942345626e-05, + "loss": 0.8793, + "step": 318 + }, + { + "epoch": 0.09841122936911924, + "grad_norm": 0.0, + "learning_rate": 1.975700419046804e-05, + "loss": 0.8569, + "step": 319 + }, + { + "epoch": 0.09871972852074656, + "grad_norm": 0.0, + "learning_rate": 1.975480920921231e-05, + "loss": 1.0324, + "step": 320 + }, + { + "epoch": 0.0990282276723739, + "grad_norm": 0.0, + "learning_rate": 1.975260448188208e-05, + "loss": 0.9234, + "step": 321 + }, + { + "epoch": 0.09933672682400123, + "grad_norm": 0.0, + "learning_rate": 1.975039001068011e-05, + "loss": 0.9475, + "step": 322 + }, + { + "epoch": 0.09964522597562857, + "grad_norm": 0.0, + "learning_rate": 1.9748165797818875e-05, + "loss": 0.9887, + "step": 323 + }, + { + "epoch": 0.0999537251272559, + "grad_norm": 0.0, + "learning_rate": 1.974593184552061e-05, + "loss": 0.9338, + "step": 324 + }, + { + "epoch": 0.10026222427888323, + "grad_norm": 0.0, + "learning_rate": 1.9743688156017254e-05, + "loss": 0.9126, + "step": 325 + }, + { + "epoch": 0.10057072343051057, + "grad_norm": 0.0, + "learning_rate": 1.9741434731550498e-05, + "loss": 0.9219, + "step": 326 + }, + { + "epoch": 0.1008792225821379, + "grad_norm": 0.0, + "learning_rate": 1.9739171574371744e-05, + "loss": 0.8622, + "step": 327 + }, + { + "epoch": 0.10118772173376524, + "grad_norm": 0.0, + "learning_rate": 1.9736898686742125e-05, + "loss": 0.9795, + "step": 328 + }, + { + "epoch": 0.10149622088539256, + "grad_norm": 0.0, + "learning_rate": 1.973461607093249e-05, + "loss": 0.8908, + "step": 329 + }, + { + "epoch": 0.1018047200370199, + "grad_norm": 0.0, + "learning_rate": 1.9732323729223414e-05, + "loss": 0.9311, + "step": 330 + }, + { + "epoch": 0.10211321918864723, + "grad_norm": 0.0, + "learning_rate": 1.9730021663905186e-05, + "loss": 0.9054, + "step": 331 + }, + { + "epoch": 0.10242171834027457, + "grad_norm": 0.0, + "learning_rate": 1.972770987727781e-05, + "loss": 0.9287, + "step": 332 + }, + { + "epoch": 0.10273021749190189, + "grad_norm": 0.0, + "learning_rate": 1.9725388371651e-05, + "loss": 0.9404, + "step": 333 + }, + { + "epoch": 0.10303871664352923, + "grad_norm": 0.0, + "learning_rate": 1.9723057149344184e-05, + "loss": 0.9188, + "step": 334 + }, + { + "epoch": 0.10334721579515656, + "grad_norm": 0.0, + "learning_rate": 1.97207162126865e-05, + "loss": 1.0049, + "step": 335 + }, + { + "epoch": 0.1036557149467839, + "grad_norm": 0.0, + "learning_rate": 1.9718365564016785e-05, + "loss": 0.9919, + "step": 336 + }, + { + "epoch": 0.10396421409841122, + "grad_norm": 0.0, + "learning_rate": 1.971600520568358e-05, + "loss": 0.8696, + "step": 337 + }, + { + "epoch": 0.10427271325003856, + "grad_norm": 0.0, + "learning_rate": 1.9713635140045134e-05, + "loss": 0.9094, + "step": 338 + }, + { + "epoch": 0.1045812124016659, + "grad_norm": 0.0, + "learning_rate": 1.9711255369469394e-05, + "loss": 0.9393, + "step": 339 + }, + { + "epoch": 0.10488971155329323, + "grad_norm": 0.0, + "learning_rate": 1.9708865896333993e-05, + "loss": 0.9828, + "step": 340 + }, + { + "epoch": 0.10519821070492057, + "grad_norm": 0.0, + "learning_rate": 1.970646672302627e-05, + "loss": 0.8656, + "step": 341 + }, + { + "epoch": 0.10550670985654789, + "grad_norm": 0.0, + "learning_rate": 1.9704057851943244e-05, + "loss": 0.9252, + "step": 342 + }, + { + "epoch": 0.10581520900817523, + "grad_norm": 0.0, + "learning_rate": 1.9701639285491633e-05, + "loss": 0.8904, + "step": 343 + }, + { + "epoch": 0.10612370815980256, + "grad_norm": 0.0, + "learning_rate": 1.969921102608784e-05, + "loss": 0.906, + "step": 344 + }, + { + "epoch": 0.1064322073114299, + "grad_norm": 0.0, + "learning_rate": 1.9696773076157942e-05, + "loss": 1.0674, + "step": 345 + }, + { + "epoch": 0.10674070646305722, + "grad_norm": 0.0, + "learning_rate": 1.9694325438137716e-05, + "loss": 0.9022, + "step": 346 + }, + { + "epoch": 0.10704920561468456, + "grad_norm": 0.0, + "learning_rate": 1.9691868114472602e-05, + "loss": 0.9366, + "step": 347 + }, + { + "epoch": 0.10735770476631189, + "grad_norm": 0.0, + "learning_rate": 1.9689401107617722e-05, + "loss": 0.8698, + "step": 348 + }, + { + "epoch": 0.10766620391793923, + "grad_norm": 0.0, + "learning_rate": 1.9686924420037877e-05, + "loss": 0.9393, + "step": 349 + }, + { + "epoch": 0.10797470306956655, + "grad_norm": 0.0, + "learning_rate": 1.968443805420754e-05, + "loss": 0.9386, + "step": 350 + }, + { + "epoch": 0.10828320222119389, + "grad_norm": 0.0, + "learning_rate": 1.9681942012610847e-05, + "loss": 0.8884, + "step": 351 + }, + { + "epoch": 0.10859170137282123, + "grad_norm": 0.0, + "learning_rate": 1.96794362977416e-05, + "loss": 0.9948, + "step": 352 + }, + { + "epoch": 0.10890020052444856, + "grad_norm": 0.0, + "learning_rate": 1.9676920912103278e-05, + "loss": 0.9183, + "step": 353 + }, + { + "epoch": 0.1092086996760759, + "grad_norm": 0.0, + "learning_rate": 1.9674395858209014e-05, + "loss": 0.9598, + "step": 354 + }, + { + "epoch": 0.10951719882770322, + "grad_norm": 0.0, + "learning_rate": 1.9671861138581594e-05, + "loss": 0.9646, + "step": 355 + }, + { + "epoch": 0.10982569797933056, + "grad_norm": 0.0, + "learning_rate": 1.966931675575347e-05, + "loss": 0.9969, + "step": 356 + }, + { + "epoch": 0.11013419713095789, + "grad_norm": 0.0, + "learning_rate": 1.966676271226675e-05, + "loss": 0.9328, + "step": 357 + }, + { + "epoch": 0.11044269628258523, + "grad_norm": 0.0, + "learning_rate": 1.9664199010673192e-05, + "loss": 0.8916, + "step": 358 + }, + { + "epoch": 0.11075119543421255, + "grad_norm": 0.0, + "learning_rate": 1.9661625653534196e-05, + "loss": 0.9142, + "step": 359 + }, + { + "epoch": 0.11105969458583989, + "grad_norm": 0.0, + "learning_rate": 1.9659042643420817e-05, + "loss": 0.9221, + "step": 360 + }, + { + "epoch": 0.11136819373746722, + "grad_norm": 0.0, + "learning_rate": 1.9656449982913757e-05, + "loss": 0.9171, + "step": 361 + }, + { + "epoch": 0.11167669288909456, + "grad_norm": 0.0, + "learning_rate": 1.9653847674603348e-05, + "loss": 0.8394, + "step": 362 + }, + { + "epoch": 0.11198519204072188, + "grad_norm": 0.0, + "learning_rate": 1.9651235721089575e-05, + "loss": 0.8754, + "step": 363 + }, + { + "epoch": 0.11229369119234922, + "grad_norm": 0.0, + "learning_rate": 1.9648614124982044e-05, + "loss": 0.9579, + "step": 364 + }, + { + "epoch": 0.11260219034397656, + "grad_norm": 0.0, + "learning_rate": 1.964598288890001e-05, + "loss": 0.9248, + "step": 365 + }, + { + "epoch": 0.11291068949560389, + "grad_norm": 0.0, + "learning_rate": 1.964334201547235e-05, + "loss": 1.0278, + "step": 366 + }, + { + "epoch": 0.11321918864723123, + "grad_norm": 0.0, + "learning_rate": 1.964069150733758e-05, + "loss": 0.8592, + "step": 367 + }, + { + "epoch": 0.11352768779885855, + "grad_norm": 0.0, + "learning_rate": 1.963803136714382e-05, + "loss": 0.9319, + "step": 368 + }, + { + "epoch": 0.11383618695048589, + "grad_norm": 0.0, + "learning_rate": 1.9635361597548844e-05, + "loss": 1.0014, + "step": 369 + }, + { + "epoch": 0.11414468610211322, + "grad_norm": 0.0, + "learning_rate": 1.9632682201220022e-05, + "loss": 0.9013, + "step": 370 + }, + { + "epoch": 0.11445318525374056, + "grad_norm": 0.0, + "learning_rate": 1.9629993180834356e-05, + "loss": 0.8994, + "step": 371 + }, + { + "epoch": 0.11476168440536788, + "grad_norm": 0.0, + "learning_rate": 1.9627294539078454e-05, + "loss": 0.901, + "step": 372 + }, + { + "epoch": 0.11507018355699522, + "grad_norm": 0.0, + "learning_rate": 1.9624586278648544e-05, + "loss": 1.0434, + "step": 373 + }, + { + "epoch": 0.11537868270862255, + "grad_norm": 0.0, + "learning_rate": 1.962186840225046e-05, + "loss": 0.9425, + "step": 374 + }, + { + "epoch": 0.11568718186024989, + "grad_norm": 0.0, + "learning_rate": 1.9619140912599643e-05, + "loss": 0.89, + "step": 375 + }, + { + "epoch": 0.11599568101187721, + "grad_norm": 0.0, + "learning_rate": 1.9616403812421147e-05, + "loss": 0.997, + "step": 376 + }, + { + "epoch": 0.11630418016350455, + "grad_norm": 0.0, + "learning_rate": 1.9613657104449615e-05, + "loss": 0.866, + "step": 377 + }, + { + "epoch": 0.11661267931513189, + "grad_norm": 0.0, + "learning_rate": 1.96109007914293e-05, + "loss": 0.9479, + "step": 378 + }, + { + "epoch": 0.11692117846675922, + "grad_norm": 0.0, + "learning_rate": 1.960813487611404e-05, + "loss": 0.9128, + "step": 379 + }, + { + "epoch": 0.11722967761838655, + "grad_norm": 0.0, + "learning_rate": 1.9605359361267282e-05, + "loss": 0.9501, + "step": 380 + }, + { + "epoch": 0.11753817677001388, + "grad_norm": 0.0, + "learning_rate": 1.960257424966205e-05, + "loss": 0.9546, + "step": 381 + }, + { + "epoch": 0.11784667592164122, + "grad_norm": 0.0, + "learning_rate": 1.9599779544080966e-05, + "loss": 0.9422, + "step": 382 + }, + { + "epoch": 0.11815517507326855, + "grad_norm": 0.0, + "learning_rate": 1.9596975247316226e-05, + "loss": 0.949, + "step": 383 + }, + { + "epoch": 0.11846367422489588, + "grad_norm": 0.0, + "learning_rate": 1.9594161362169627e-05, + "loss": 0.9639, + "step": 384 + }, + { + "epoch": 0.11877217337652321, + "grad_norm": 0.0, + "learning_rate": 1.9591337891452525e-05, + "loss": 0.8488, + "step": 385 + }, + { + "epoch": 0.11908067252815055, + "grad_norm": 0.0, + "learning_rate": 1.958850483798586e-05, + "loss": 0.9729, + "step": 386 + }, + { + "epoch": 0.11938917167977788, + "grad_norm": 0.0, + "learning_rate": 1.958566220460016e-05, + "loss": 0.944, + "step": 387 + }, + { + "epoch": 0.11969767083140521, + "grad_norm": 0.0, + "learning_rate": 1.9582809994135505e-05, + "loss": 0.996, + "step": 388 + }, + { + "epoch": 0.12000616998303254, + "grad_norm": 0.0, + "learning_rate": 1.9579948209441558e-05, + "loss": 0.891, + "step": 389 + }, + { + "epoch": 0.12031466913465988, + "grad_norm": 0.0, + "learning_rate": 1.957707685337753e-05, + "loss": 0.933, + "step": 390 + }, + { + "epoch": 0.12062316828628722, + "grad_norm": 0.0, + "learning_rate": 1.957419592881222e-05, + "loss": 0.9058, + "step": 391 + }, + { + "epoch": 0.12093166743791454, + "grad_norm": 0.0, + "learning_rate": 1.957130543862396e-05, + "loss": 0.9237, + "step": 392 + }, + { + "epoch": 0.12124016658954188, + "grad_norm": 0.0, + "learning_rate": 1.9568405385700658e-05, + "loss": 0.9637, + "step": 393 + }, + { + "epoch": 0.12154866574116921, + "grad_norm": 0.0, + "learning_rate": 1.956549577293977e-05, + "loss": 1.0328, + "step": 394 + }, + { + "epoch": 0.12185716489279655, + "grad_norm": 0.0, + "learning_rate": 1.9562576603248306e-05, + "loss": 0.9848, + "step": 395 + }, + { + "epoch": 0.12216566404442387, + "grad_norm": 0.0, + "learning_rate": 1.9559647879542817e-05, + "loss": 0.973, + "step": 396 + }, + { + "epoch": 0.12247416319605121, + "grad_norm": 0.0, + "learning_rate": 1.9556709604749408e-05, + "loss": 0.9057, + "step": 397 + }, + { + "epoch": 0.12278266234767854, + "grad_norm": 0.0, + "learning_rate": 1.9553761781803718e-05, + "loss": 0.8314, + "step": 398 + }, + { + "epoch": 0.12309116149930588, + "grad_norm": 0.0, + "learning_rate": 1.9550804413650935e-05, + "loss": 0.9103, + "step": 399 + }, + { + "epoch": 0.1233996606509332, + "grad_norm": 0.0, + "learning_rate": 1.954783750324578e-05, + "loss": 0.9178, + "step": 400 + }, + { + "epoch": 0.12370815980256054, + "grad_norm": 0.0, + "learning_rate": 1.9544861053552503e-05, + "loss": 0.9996, + "step": 401 + }, + { + "epoch": 0.12401665895418787, + "grad_norm": 0.0, + "learning_rate": 1.954187506754489e-05, + "loss": 0.9381, + "step": 402 + }, + { + "epoch": 0.12432515810581521, + "grad_norm": 0.0, + "learning_rate": 1.953887954820625e-05, + "loss": 0.9707, + "step": 403 + }, + { + "epoch": 0.12463365725744255, + "grad_norm": 0.0, + "learning_rate": 1.9535874498529423e-05, + "loss": 0.8752, + "step": 404 + }, + { + "epoch": 0.12494215640906987, + "grad_norm": 0.0, + "learning_rate": 1.953285992151677e-05, + "loss": 0.9752, + "step": 405 + }, + { + "epoch": 0.1252506555606972, + "grad_norm": 0.0, + "learning_rate": 1.9529835820180166e-05, + "loss": 0.9104, + "step": 406 + }, + { + "epoch": 0.12555915471232454, + "grad_norm": 0.0, + "learning_rate": 1.952680219754101e-05, + "loss": 0.9053, + "step": 407 + }, + { + "epoch": 0.12586765386395188, + "grad_norm": 0.0, + "learning_rate": 1.9523759056630196e-05, + "loss": 0.8727, + "step": 408 + }, + { + "epoch": 0.12617615301557922, + "grad_norm": 0.0, + "learning_rate": 1.952070640048815e-05, + "loss": 0.871, + "step": 409 + }, + { + "epoch": 0.12648465216720653, + "grad_norm": 0.0, + "learning_rate": 1.9517644232164793e-05, + "loss": 0.8591, + "step": 410 + }, + { + "epoch": 0.12679315131883387, + "grad_norm": 0.0, + "learning_rate": 1.951457255471955e-05, + "loss": 0.9535, + "step": 411 + }, + { + "epoch": 0.1271016504704612, + "grad_norm": 0.0, + "learning_rate": 1.9511491371221347e-05, + "loss": 0.9831, + "step": 412 + }, + { + "epoch": 0.12741014962208855, + "grad_norm": 0.0, + "learning_rate": 1.9508400684748615e-05, + "loss": 0.9473, + "step": 413 + }, + { + "epoch": 0.12771864877371586, + "grad_norm": 0.0, + "learning_rate": 1.950530049838926e-05, + "loss": 1.0712, + "step": 414 + }, + { + "epoch": 0.1280271479253432, + "grad_norm": 0.0, + "learning_rate": 1.9502190815240708e-05, + "loss": 0.9336, + "step": 415 + }, + { + "epoch": 0.12833564707697054, + "grad_norm": 0.0, + "learning_rate": 1.9499071638409845e-05, + "loss": 0.8945, + "step": 416 + }, + { + "epoch": 0.12864414622859788, + "grad_norm": 0.0, + "learning_rate": 1.949594297101306e-05, + "loss": 0.8821, + "step": 417 + }, + { + "epoch": 0.12895264538022522, + "grad_norm": 0.0, + "learning_rate": 1.9492804816176223e-05, + "loss": 0.8441, + "step": 418 + }, + { + "epoch": 0.12926114453185253, + "grad_norm": 0.0, + "learning_rate": 1.9489657177034673e-05, + "loss": 0.8545, + "step": 419 + }, + { + "epoch": 0.12956964368347987, + "grad_norm": 0.0, + "learning_rate": 1.948650005673323e-05, + "loss": 0.9616, + "step": 420 + }, + { + "epoch": 0.1298781428351072, + "grad_norm": 0.0, + "learning_rate": 1.9483333458426192e-05, + "loss": 1.0049, + "step": 421 + }, + { + "epoch": 0.13018664198673455, + "grad_norm": 0.0, + "learning_rate": 1.948015738527732e-05, + "loss": 0.9311, + "step": 422 + }, + { + "epoch": 0.13049514113836186, + "grad_norm": 0.0, + "learning_rate": 1.947697184045984e-05, + "loss": 0.9357, + "step": 423 + }, + { + "epoch": 0.1308036402899892, + "grad_norm": 0.0, + "learning_rate": 1.947377682715645e-05, + "loss": 0.8408, + "step": 424 + }, + { + "epoch": 0.13111213944161654, + "grad_norm": 0.0, + "learning_rate": 1.9470572348559295e-05, + "loss": 0.885, + "step": 425 + }, + { + "epoch": 0.13142063859324388, + "grad_norm": 0.0, + "learning_rate": 1.946735840786999e-05, + "loss": 0.9128, + "step": 426 + }, + { + "epoch": 0.1317291377448712, + "grad_norm": 0.0, + "learning_rate": 1.94641350082996e-05, + "loss": 0.9241, + "step": 427 + }, + { + "epoch": 0.13203763689649853, + "grad_norm": 0.0, + "learning_rate": 1.9460902153068633e-05, + "loss": 0.975, + "step": 428 + }, + { + "epoch": 0.13234613604812587, + "grad_norm": 0.0, + "learning_rate": 1.945765984540705e-05, + "loss": 0.8414, + "step": 429 + }, + { + "epoch": 0.1326546351997532, + "grad_norm": 0.0, + "learning_rate": 1.945440808855426e-05, + "loss": 0.9713, + "step": 430 + }, + { + "epoch": 0.13296313435138055, + "grad_norm": 0.0, + "learning_rate": 1.94511468857591e-05, + "loss": 0.9718, + "step": 431 + }, + { + "epoch": 0.13327163350300786, + "grad_norm": 0.0, + "learning_rate": 1.944787624027986e-05, + "loss": 0.8874, + "step": 432 + }, + { + "epoch": 0.1335801326546352, + "grad_norm": 0.0, + "learning_rate": 1.9444596155384253e-05, + "loss": 0.9717, + "step": 433 + }, + { + "epoch": 0.13388863180626254, + "grad_norm": 0.0, + "learning_rate": 1.944130663434943e-05, + "loss": 0.9274, + "step": 434 + }, + { + "epoch": 0.13419713095788988, + "grad_norm": 0.0, + "learning_rate": 1.9438007680461965e-05, + "loss": 0.879, + "step": 435 + }, + { + "epoch": 0.1345056301095172, + "grad_norm": 0.0, + "learning_rate": 1.9434699297017855e-05, + "loss": 0.8611, + "step": 436 + }, + { + "epoch": 0.13481412926114453, + "grad_norm": 0.0, + "learning_rate": 1.9431381487322527e-05, + "loss": 0.9236, + "step": 437 + }, + { + "epoch": 0.13512262841277187, + "grad_norm": 0.0, + "learning_rate": 1.9428054254690812e-05, + "loss": 0.9343, + "step": 438 + }, + { + "epoch": 0.1354311275643992, + "grad_norm": 0.0, + "learning_rate": 1.9424717602446973e-05, + "loss": 0.831, + "step": 439 + }, + { + "epoch": 0.13573962671602652, + "grad_norm": 0.0, + "learning_rate": 1.9421371533924664e-05, + "loss": 0.8447, + "step": 440 + }, + { + "epoch": 0.13604812586765386, + "grad_norm": 0.0, + "learning_rate": 1.9418016052466965e-05, + "loss": 0.9319, + "step": 441 + }, + { + "epoch": 0.1363566250192812, + "grad_norm": 0.0, + "learning_rate": 1.941465116142635e-05, + "loss": 1.0648, + "step": 442 + }, + { + "epoch": 0.13666512417090854, + "grad_norm": 0.0, + "learning_rate": 1.94112768641647e-05, + "loss": 0.8477, + "step": 443 + }, + { + "epoch": 0.13697362332253588, + "grad_norm": 0.0, + "learning_rate": 1.9407893164053285e-05, + "loss": 0.8829, + "step": 444 + }, + { + "epoch": 0.1372821224741632, + "grad_norm": 0.0, + "learning_rate": 1.940450006447278e-05, + "loss": 0.9168, + "step": 445 + }, + { + "epoch": 0.13759062162579053, + "grad_norm": 0.0, + "learning_rate": 1.9401097568813244e-05, + "loss": 0.936, + "step": 446 + }, + { + "epoch": 0.13789912077741787, + "grad_norm": 0.0, + "learning_rate": 1.939768568047413e-05, + "loss": 0.8714, + "step": 447 + }, + { + "epoch": 0.1382076199290452, + "grad_norm": 0.0, + "learning_rate": 1.9394264402864265e-05, + "loss": 0.9071, + "step": 448 + }, + { + "epoch": 0.13851611908067252, + "grad_norm": 0.0, + "learning_rate": 1.939083373940187e-05, + "loss": 0.8958, + "step": 449 + }, + { + "epoch": 0.13882461823229986, + "grad_norm": 0.0, + "learning_rate": 1.9387393693514532e-05, + "loss": 0.9762, + "step": 450 + }, + { + "epoch": 0.1391331173839272, + "grad_norm": 0.0, + "learning_rate": 1.9383944268639213e-05, + "loss": 0.942, + "step": 451 + }, + { + "epoch": 0.13944161653555454, + "grad_norm": 0.0, + "learning_rate": 1.9380485468222257e-05, + "loss": 0.9979, + "step": 452 + }, + { + "epoch": 0.13975011568718185, + "grad_norm": 0.0, + "learning_rate": 1.9377017295719362e-05, + "loss": 0.9849, + "step": 453 + }, + { + "epoch": 0.1400586148388092, + "grad_norm": 0.0, + "learning_rate": 1.9373539754595598e-05, + "loss": 0.9884, + "step": 454 + }, + { + "epoch": 0.14036711399043653, + "grad_norm": 0.0, + "learning_rate": 1.9370052848325392e-05, + "loss": 0.9648, + "step": 455 + }, + { + "epoch": 0.14067561314206387, + "grad_norm": 0.0, + "learning_rate": 1.9366556580392527e-05, + "loss": 0.893, + "step": 456 + }, + { + "epoch": 0.1409841122936912, + "grad_norm": 0.0, + "learning_rate": 1.936305095429014e-05, + "loss": 0.8304, + "step": 457 + }, + { + "epoch": 0.14129261144531852, + "grad_norm": 0.0, + "learning_rate": 1.935953597352072e-05, + "loss": 0.936, + "step": 458 + }, + { + "epoch": 0.14160111059694586, + "grad_norm": 0.0, + "learning_rate": 1.9356011641596096e-05, + "loss": 0.8423, + "step": 459 + }, + { + "epoch": 0.1419096097485732, + "grad_norm": 0.0, + "learning_rate": 1.9352477962037448e-05, + "loss": 0.9692, + "step": 460 + }, + { + "epoch": 0.14221810890020053, + "grad_norm": 0.0, + "learning_rate": 1.934893493837529e-05, + "loss": 0.9955, + "step": 461 + }, + { + "epoch": 0.14252660805182785, + "grad_norm": 0.0, + "learning_rate": 1.9345382574149473e-05, + "loss": 1.0374, + "step": 462 + }, + { + "epoch": 0.14283510720345519, + "grad_norm": 0.0, + "learning_rate": 1.9341820872909184e-05, + "loss": 0.9431, + "step": 463 + }, + { + "epoch": 0.14314360635508253, + "grad_norm": 0.0, + "learning_rate": 1.933824983821293e-05, + "loss": 0.951, + "step": 464 + }, + { + "epoch": 0.14345210550670986, + "grad_norm": 0.0, + "learning_rate": 1.933466947362855e-05, + "loss": 0.8325, + "step": 465 + }, + { + "epoch": 0.14376060465833718, + "grad_norm": 0.0, + "learning_rate": 1.9331079782733204e-05, + "loss": 0.859, + "step": 466 + }, + { + "epoch": 0.14406910380996452, + "grad_norm": 0.0, + "learning_rate": 1.9327480769113366e-05, + "loss": 0.8799, + "step": 467 + }, + { + "epoch": 0.14437760296159186, + "grad_norm": 0.0, + "learning_rate": 1.9323872436364825e-05, + "loss": 0.8794, + "step": 468 + }, + { + "epoch": 0.1446861021132192, + "grad_norm": 0.0, + "learning_rate": 1.932025478809269e-05, + "loss": 0.928, + "step": 469 + }, + { + "epoch": 0.14499460126484653, + "grad_norm": 0.0, + "learning_rate": 1.9316627827911366e-05, + "loss": 0.895, + "step": 470 + }, + { + "epoch": 0.14530310041647385, + "grad_norm": 0.0, + "learning_rate": 1.9312991559444565e-05, + "loss": 0.9044, + "step": 471 + }, + { + "epoch": 0.14561159956810119, + "grad_norm": 0.0, + "learning_rate": 1.9309345986325298e-05, + "loss": 0.975, + "step": 472 + }, + { + "epoch": 0.14592009871972852, + "grad_norm": 0.0, + "learning_rate": 1.9305691112195876e-05, + "loss": 0.9109, + "step": 473 + }, + { + "epoch": 0.14622859787135586, + "grad_norm": 0.0, + "learning_rate": 1.9302026940707903e-05, + "loss": 0.8694, + "step": 474 + }, + { + "epoch": 0.14653709702298318, + "grad_norm": 0.0, + "learning_rate": 1.9298353475522263e-05, + "loss": 0.8612, + "step": 475 + }, + { + "epoch": 0.14684559617461052, + "grad_norm": 0.0, + "learning_rate": 1.929467072030914e-05, + "loss": 0.9448, + "step": 476 + }, + { + "epoch": 0.14715409532623785, + "grad_norm": 0.0, + "learning_rate": 1.9290978678747984e-05, + "loss": 0.9417, + "step": 477 + }, + { + "epoch": 0.1474625944778652, + "grad_norm": 0.0, + "learning_rate": 1.9287277354527535e-05, + "loss": 0.8778, + "step": 478 + }, + { + "epoch": 0.1477710936294925, + "grad_norm": 0.0, + "learning_rate": 1.92835667513458e-05, + "loss": 0.9273, + "step": 479 + }, + { + "epoch": 0.14807959278111985, + "grad_norm": 0.0, + "learning_rate": 1.927984687291006e-05, + "loss": 0.9406, + "step": 480 + }, + { + "epoch": 0.14838809193274718, + "grad_norm": 0.0, + "learning_rate": 1.9276117722936867e-05, + "loss": 0.9262, + "step": 481 + }, + { + "epoch": 0.14869659108437452, + "grad_norm": 0.0, + "learning_rate": 1.9272379305152026e-05, + "loss": 1.0137, + "step": 482 + }, + { + "epoch": 0.14900509023600186, + "grad_norm": 0.0, + "learning_rate": 1.926863162329061e-05, + "loss": 0.8827, + "step": 483 + }, + { + "epoch": 0.14931358938762918, + "grad_norm": 0.0, + "learning_rate": 1.9264874681096948e-05, + "loss": 0.8382, + "step": 484 + }, + { + "epoch": 0.14962208853925651, + "grad_norm": 0.0, + "learning_rate": 1.9261108482324612e-05, + "loss": 0.8825, + "step": 485 + }, + { + "epoch": 0.14993058769088385, + "grad_norm": 0.0, + "learning_rate": 1.9257333030736433e-05, + "loss": 0.8738, + "step": 486 + }, + { + "epoch": 0.1502390868425112, + "grad_norm": 0.0, + "learning_rate": 1.925354833010448e-05, + "loss": 0.9005, + "step": 487 + }, + { + "epoch": 0.1505475859941385, + "grad_norm": 0.0, + "learning_rate": 1.9249754384210066e-05, + "loss": 0.9739, + "step": 488 + }, + { + "epoch": 0.15085608514576584, + "grad_norm": 0.0, + "learning_rate": 1.9245951196843736e-05, + "loss": 0.9109, + "step": 489 + }, + { + "epoch": 0.15116458429739318, + "grad_norm": 0.0, + "learning_rate": 1.924213877180528e-05, + "loss": 0.9328, + "step": 490 + }, + { + "epoch": 0.15147308344902052, + "grad_norm": 0.0, + "learning_rate": 1.92383171129037e-05, + "loss": 0.9959, + "step": 491 + }, + { + "epoch": 0.15178158260064784, + "grad_norm": 0.0, + "learning_rate": 1.9234486223957238e-05, + "loss": 0.8328, + "step": 492 + }, + { + "epoch": 0.15209008175227517, + "grad_norm": 0.0, + "learning_rate": 1.9230646108793353e-05, + "loss": 1.26, + "step": 493 + }, + { + "epoch": 0.15239858090390251, + "grad_norm": 0.0, + "learning_rate": 1.922679677124872e-05, + "loss": 0.9531, + "step": 494 + }, + { + "epoch": 0.15270708005552985, + "grad_norm": 0.0, + "learning_rate": 1.9222938215169227e-05, + "loss": 0.8436, + "step": 495 + }, + { + "epoch": 0.1530155792071572, + "grad_norm": 0.0, + "learning_rate": 1.921907044440998e-05, + "loss": 0.909, + "step": 496 + }, + { + "epoch": 0.1533240783587845, + "grad_norm": 0.0, + "learning_rate": 1.9215193462835285e-05, + "loss": 1.0067, + "step": 497 + }, + { + "epoch": 0.15363257751041184, + "grad_norm": 0.0, + "learning_rate": 1.921130727431865e-05, + "loss": 0.8809, + "step": 498 + }, + { + "epoch": 0.15394107666203918, + "grad_norm": 0.0, + "learning_rate": 1.9207411882742784e-05, + "loss": 1.0474, + "step": 499 + }, + { + "epoch": 0.15424957581366652, + "grad_norm": 0.0, + "learning_rate": 1.9203507291999598e-05, + "loss": 0.9858, + "step": 500 + }, + { + "epoch": 0.15455807496529383, + "grad_norm": 0.0, + "learning_rate": 1.9199593505990173e-05, + "loss": 0.9519, + "step": 501 + }, + { + "epoch": 0.15486657411692117, + "grad_norm": 0.0, + "learning_rate": 1.91956705286248e-05, + "loss": 0.9109, + "step": 502 + }, + { + "epoch": 0.1551750732685485, + "grad_norm": 0.0, + "learning_rate": 1.9191738363822943e-05, + "loss": 0.9116, + "step": 503 + }, + { + "epoch": 0.15548357242017585, + "grad_norm": 0.0, + "learning_rate": 1.9187797015513244e-05, + "loss": 0.9334, + "step": 504 + }, + { + "epoch": 0.15579207157180316, + "grad_norm": 0.0, + "learning_rate": 1.9183846487633524e-05, + "loss": 0.9153, + "step": 505 + }, + { + "epoch": 0.1561005707234305, + "grad_norm": 0.0, + "learning_rate": 1.9179886784130767e-05, + "loss": 0.9827, + "step": 506 + }, + { + "epoch": 0.15640906987505784, + "grad_norm": 0.0, + "learning_rate": 1.917591790896114e-05, + "loss": 0.9264, + "step": 507 + }, + { + "epoch": 0.15671756902668518, + "grad_norm": 0.0, + "learning_rate": 1.917193986608996e-05, + "loss": 0.8661, + "step": 508 + }, + { + "epoch": 0.15702606817831252, + "grad_norm": 0.0, + "learning_rate": 1.9167952659491703e-05, + "loss": 1.0055, + "step": 509 + }, + { + "epoch": 0.15733456732993983, + "grad_norm": 0.0, + "learning_rate": 1.9163956293150017e-05, + "loss": 0.984, + "step": 510 + }, + { + "epoch": 0.15764306648156717, + "grad_norm": 0.0, + "learning_rate": 1.915995077105768e-05, + "loss": 1.028, + "step": 511 + }, + { + "epoch": 0.1579515656331945, + "grad_norm": 0.0, + "learning_rate": 1.915593609721663e-05, + "loss": 0.9713, + "step": 512 + }, + { + "epoch": 0.15826006478482185, + "grad_norm": 0.0, + "learning_rate": 1.9151912275637946e-05, + "loss": 0.941, + "step": 513 + }, + { + "epoch": 0.15856856393644916, + "grad_norm": 0.0, + "learning_rate": 1.914787931034185e-05, + "loss": 0.9633, + "step": 514 + }, + { + "epoch": 0.1588770630880765, + "grad_norm": 0.0, + "learning_rate": 1.914383720535769e-05, + "loss": 0.9754, + "step": 515 + }, + { + "epoch": 0.15918556223970384, + "grad_norm": 0.0, + "learning_rate": 1.9139785964723955e-05, + "loss": 0.9942, + "step": 516 + }, + { + "epoch": 0.15949406139133118, + "grad_norm": 0.0, + "learning_rate": 1.913572559248826e-05, + "loss": 1.3288, + "step": 517 + }, + { + "epoch": 0.1598025605429585, + "grad_norm": 0.0, + "learning_rate": 1.9131656092707337e-05, + "loss": 0.8349, + "step": 518 + }, + { + "epoch": 0.16011105969458583, + "grad_norm": 0.0, + "learning_rate": 1.9127577469447045e-05, + "loss": 1.0034, + "step": 519 + }, + { + "epoch": 0.16041955884621317, + "grad_norm": 0.0, + "learning_rate": 1.912348972678235e-05, + "loss": 1.005, + "step": 520 + }, + { + "epoch": 0.1607280579978405, + "grad_norm": 0.0, + "learning_rate": 1.9119392868797347e-05, + "loss": 0.8971, + "step": 521 + }, + { + "epoch": 0.16103655714946785, + "grad_norm": 0.0, + "learning_rate": 1.9115286899585214e-05, + "loss": 0.9835, + "step": 522 + }, + { + "epoch": 0.16134505630109516, + "grad_norm": 0.0, + "learning_rate": 1.9111171823248243e-05, + "loss": 0.9274, + "step": 523 + }, + { + "epoch": 0.1616535554527225, + "grad_norm": 0.0, + "learning_rate": 1.9107047643897835e-05, + "loss": 0.8566, + "step": 524 + }, + { + "epoch": 0.16196205460434984, + "grad_norm": 0.0, + "learning_rate": 1.9102914365654465e-05, + "loss": 0.933, + "step": 525 + }, + { + "epoch": 0.16227055375597718, + "grad_norm": 0.0, + "learning_rate": 1.909877199264772e-05, + "loss": 0.9228, + "step": 526 + }, + { + "epoch": 0.1625790529076045, + "grad_norm": 0.0, + "learning_rate": 1.909462052901626e-05, + "loss": 0.9014, + "step": 527 + }, + { + "epoch": 0.16288755205923183, + "grad_norm": 0.0, + "learning_rate": 1.909045997890783e-05, + "loss": 1.0034, + "step": 528 + }, + { + "epoch": 0.16319605121085917, + "grad_norm": 0.0, + "learning_rate": 1.9086290346479254e-05, + "loss": 0.906, + "step": 529 + }, + { + "epoch": 0.1635045503624865, + "grad_norm": 0.0, + "learning_rate": 1.9082111635896438e-05, + "loss": 0.9278, + "step": 530 + }, + { + "epoch": 0.16381304951411382, + "grad_norm": 0.0, + "learning_rate": 1.9077923851334342e-05, + "loss": 0.9236, + "step": 531 + }, + { + "epoch": 0.16412154866574116, + "grad_norm": 0.0, + "learning_rate": 1.9073726996977004e-05, + "loss": 0.9511, + "step": 532 + }, + { + "epoch": 0.1644300478173685, + "grad_norm": 0.0, + "learning_rate": 1.906952107701752e-05, + "loss": 0.7983, + "step": 533 + }, + { + "epoch": 0.16473854696899584, + "grad_norm": 0.0, + "learning_rate": 1.9065306095658048e-05, + "loss": 0.9158, + "step": 534 + }, + { + "epoch": 0.16504704612062318, + "grad_norm": 0.0, + "learning_rate": 1.9061082057109787e-05, + "loss": 0.9543, + "step": 535 + }, + { + "epoch": 0.1653555452722505, + "grad_norm": 0.0, + "learning_rate": 1.9056848965593e-05, + "loss": 0.9502, + "step": 536 + }, + { + "epoch": 0.16566404442387783, + "grad_norm": 0.0, + "learning_rate": 1.905260682533699e-05, + "loss": 0.9167, + "step": 537 + }, + { + "epoch": 0.16597254357550517, + "grad_norm": 0.0, + "learning_rate": 1.9048355640580087e-05, + "loss": 1.0018, + "step": 538 + }, + { + "epoch": 0.1662810427271325, + "grad_norm": 0.0, + "learning_rate": 1.904409541556968e-05, + "loss": 0.8541, + "step": 539 + }, + { + "epoch": 0.16658954187875982, + "grad_norm": 0.0, + "learning_rate": 1.903982615456218e-05, + "loss": 0.8704, + "step": 540 + }, + { + "epoch": 0.16689804103038716, + "grad_norm": 0.0, + "learning_rate": 1.9035547861823016e-05, + "loss": 0.9562, + "step": 541 + }, + { + "epoch": 0.1672065401820145, + "grad_norm": 0.0, + "learning_rate": 1.903126054162666e-05, + "loss": 0.9997, + "step": 542 + }, + { + "epoch": 0.16751503933364184, + "grad_norm": 0.0, + "learning_rate": 1.9026964198256583e-05, + "loss": 0.9693, + "step": 543 + }, + { + "epoch": 0.16782353848526915, + "grad_norm": 0.0, + "learning_rate": 1.902265883600529e-05, + "loss": 0.8984, + "step": 544 + }, + { + "epoch": 0.1681320376368965, + "grad_norm": 0.0, + "learning_rate": 1.9018344459174285e-05, + "loss": 0.8469, + "step": 545 + }, + { + "epoch": 0.16844053678852383, + "grad_norm": 0.0, + "learning_rate": 1.901402107207408e-05, + "loss": 0.8924, + "step": 546 + }, + { + "epoch": 0.16874903594015117, + "grad_norm": 0.0, + "learning_rate": 1.900968867902419e-05, + "loss": 0.9235, + "step": 547 + }, + { + "epoch": 0.1690575350917785, + "grad_norm": 0.0, + "learning_rate": 1.9005347284353136e-05, + "loss": 1.2873, + "step": 548 + }, + { + "epoch": 0.16936603424340582, + "grad_norm": 0.0, + "learning_rate": 1.9000996892398418e-05, + "loss": 0.9419, + "step": 549 + }, + { + "epoch": 0.16967453339503316, + "grad_norm": 0.0, + "learning_rate": 1.8996637507506538e-05, + "loss": 0.8735, + "step": 550 + }, + { + "epoch": 0.1699830325466605, + "grad_norm": 0.0, + "learning_rate": 1.899226913403297e-05, + "loss": 0.8714, + "step": 551 + }, + { + "epoch": 0.17029153169828784, + "grad_norm": 0.0, + "learning_rate": 1.898789177634218e-05, + "loss": 0.9299, + "step": 552 + }, + { + "epoch": 0.17060003084991515, + "grad_norm": 0.0, + "learning_rate": 1.898350543880761e-05, + "loss": 0.8985, + "step": 553 + }, + { + "epoch": 0.1709085300015425, + "grad_norm": 0.0, + "learning_rate": 1.897911012581166e-05, + "loss": 0.936, + "step": 554 + }, + { + "epoch": 0.17121702915316983, + "grad_norm": 0.0, + "learning_rate": 1.8974705841745717e-05, + "loss": 0.9537, + "step": 555 + }, + { + "epoch": 0.17152552830479717, + "grad_norm": 0.0, + "learning_rate": 1.897029259101012e-05, + "loss": 1.2198, + "step": 556 + }, + { + "epoch": 0.17183402745642448, + "grad_norm": 0.0, + "learning_rate": 1.896587037801416e-05, + "loss": 0.9152, + "step": 557 + }, + { + "epoch": 0.17214252660805182, + "grad_norm": 0.0, + "learning_rate": 1.89614392071761e-05, + "loss": 0.9322, + "step": 558 + }, + { + "epoch": 0.17245102575967916, + "grad_norm": 0.0, + "learning_rate": 1.895699908292314e-05, + "loss": 0.9327, + "step": 559 + }, + { + "epoch": 0.1727595249113065, + "grad_norm": 0.0, + "learning_rate": 1.8952550009691424e-05, + "loss": 0.9372, + "step": 560 + }, + { + "epoch": 0.17306802406293384, + "grad_norm": 0.0, + "learning_rate": 1.894809199192605e-05, + "loss": 0.9337, + "step": 561 + }, + { + "epoch": 0.17337652321456115, + "grad_norm": 0.0, + "learning_rate": 1.8943625034081032e-05, + "loss": 0.9088, + "step": 562 + }, + { + "epoch": 0.1736850223661885, + "grad_norm": 0.0, + "learning_rate": 1.8939149140619338e-05, + "loss": 0.9155, + "step": 563 + }, + { + "epoch": 0.17399352151781583, + "grad_norm": 0.0, + "learning_rate": 1.8934664316012853e-05, + "loss": 0.9342, + "step": 564 + }, + { + "epoch": 0.17430202066944317, + "grad_norm": 0.0, + "learning_rate": 1.8930170564742377e-05, + "loss": 0.9431, + "step": 565 + }, + { + "epoch": 0.17461051982107048, + "grad_norm": 0.0, + "learning_rate": 1.8925667891297646e-05, + "loss": 0.843, + "step": 566 + }, + { + "epoch": 0.17491901897269782, + "grad_norm": 0.0, + "learning_rate": 1.8921156300177303e-05, + "loss": 0.881, + "step": 567 + }, + { + "epoch": 0.17522751812432516, + "grad_norm": 0.0, + "learning_rate": 1.8916635795888895e-05, + "loss": 0.9073, + "step": 568 + }, + { + "epoch": 0.1755360172759525, + "grad_norm": 0.0, + "learning_rate": 1.8912106382948875e-05, + "loss": 0.9315, + "step": 569 + }, + { + "epoch": 0.1758445164275798, + "grad_norm": 0.0, + "learning_rate": 1.890756806588261e-05, + "loss": 0.9232, + "step": 570 + }, + { + "epoch": 0.17615301557920715, + "grad_norm": 0.0, + "learning_rate": 1.8903020849224343e-05, + "loss": 0.8926, + "step": 571 + }, + { + "epoch": 0.1764615147308345, + "grad_norm": 0.0, + "learning_rate": 1.8898464737517225e-05, + "loss": 0.8986, + "step": 572 + }, + { + "epoch": 0.17677001388246183, + "grad_norm": 0.0, + "learning_rate": 1.8893899735313285e-05, + "loss": 0.9223, + "step": 573 + }, + { + "epoch": 0.17707851303408917, + "grad_norm": 0.0, + "learning_rate": 1.8889325847173444e-05, + "loss": 0.8557, + "step": 574 + }, + { + "epoch": 0.17738701218571648, + "grad_norm": 0.0, + "learning_rate": 1.8884743077667487e-05, + "loss": 0.924, + "step": 575 + }, + { + "epoch": 0.17769551133734382, + "grad_norm": 0.0, + "learning_rate": 1.8880151431374082e-05, + "loss": 0.9458, + "step": 576 + }, + { + "epoch": 0.17800401048897116, + "grad_norm": 0.0, + "learning_rate": 1.8875550912880766e-05, + "loss": 0.9072, + "step": 577 + }, + { + "epoch": 0.1783125096405985, + "grad_norm": 0.0, + "learning_rate": 1.8870941526783933e-05, + "loss": 0.895, + "step": 578 + }, + { + "epoch": 0.1786210087922258, + "grad_norm": 0.0, + "learning_rate": 1.886632327768885e-05, + "loss": 0.9086, + "step": 579 + }, + { + "epoch": 0.17892950794385315, + "grad_norm": 0.0, + "learning_rate": 1.8861696170209626e-05, + "loss": 0.8276, + "step": 580 + }, + { + "epoch": 0.1792380070954805, + "grad_norm": 0.0, + "learning_rate": 1.8857060208969225e-05, + "loss": 0.9947, + "step": 581 + }, + { + "epoch": 0.17954650624710783, + "grad_norm": 0.0, + "learning_rate": 1.8852415398599455e-05, + "loss": 0.9668, + "step": 582 + }, + { + "epoch": 0.17985500539873514, + "grad_norm": 0.0, + "learning_rate": 1.8847761743740972e-05, + "loss": 0.9431, + "step": 583 + }, + { + "epoch": 0.18016350455036248, + "grad_norm": 0.0, + "learning_rate": 1.8843099249043258e-05, + "loss": 0.9453, + "step": 584 + }, + { + "epoch": 0.18047200370198982, + "grad_norm": 0.0, + "learning_rate": 1.8838427919164643e-05, + "loss": 0.9127, + "step": 585 + }, + { + "epoch": 0.18078050285361716, + "grad_norm": 0.0, + "learning_rate": 1.8833747758772264e-05, + "loss": 0.9754, + "step": 586 + }, + { + "epoch": 0.1810890020052445, + "grad_norm": 0.0, + "learning_rate": 1.8829058772542092e-05, + "loss": 0.9359, + "step": 587 + }, + { + "epoch": 0.1813975011568718, + "grad_norm": 0.0, + "learning_rate": 1.882436096515892e-05, + "loss": 0.9143, + "step": 588 + }, + { + "epoch": 0.18170600030849915, + "grad_norm": 0.0, + "learning_rate": 1.8819654341316344e-05, + "loss": 0.9026, + "step": 589 + }, + { + "epoch": 0.1820144994601265, + "grad_norm": 0.0, + "learning_rate": 1.8814938905716778e-05, + "loss": 0.9464, + "step": 590 + }, + { + "epoch": 0.18232299861175383, + "grad_norm": 0.0, + "learning_rate": 1.8810214663071428e-05, + "loss": 0.9162, + "step": 591 + }, + { + "epoch": 0.18263149776338114, + "grad_norm": 0.0, + "learning_rate": 1.8805481618100314e-05, + "loss": 1.0065, + "step": 592 + }, + { + "epoch": 0.18293999691500848, + "grad_norm": 0.0, + "learning_rate": 1.8800739775532238e-05, + "loss": 0.92, + "step": 593 + }, + { + "epoch": 0.18324849606663582, + "grad_norm": 0.0, + "learning_rate": 1.8795989140104797e-05, + "loss": 0.936, + "step": 594 + }, + { + "epoch": 0.18355699521826316, + "grad_norm": 0.0, + "learning_rate": 1.8791229716564374e-05, + "loss": 0.9698, + "step": 595 + }, + { + "epoch": 0.18386549436989047, + "grad_norm": 0.0, + "learning_rate": 1.8786461509666135e-05, + "loss": 0.9489, + "step": 596 + }, + { + "epoch": 0.1841739935215178, + "grad_norm": 0.0, + "learning_rate": 1.8781684524174006e-05, + "loss": 0.9536, + "step": 597 + }, + { + "epoch": 0.18448249267314515, + "grad_norm": 0.0, + "learning_rate": 1.8776898764860707e-05, + "loss": 0.9249, + "step": 598 + }, + { + "epoch": 0.1847909918247725, + "grad_norm": 0.0, + "learning_rate": 1.8772104236507703e-05, + "loss": 0.9112, + "step": 599 + }, + { + "epoch": 0.18509949097639983, + "grad_norm": 0.0, + "learning_rate": 1.8767300943905238e-05, + "loss": 0.9114, + "step": 600 + }, + { + "epoch": 0.18540799012802714, + "grad_norm": 0.0, + "learning_rate": 1.8762488891852296e-05, + "loss": 0.9409, + "step": 601 + }, + { + "epoch": 0.18571648927965448, + "grad_norm": 0.0, + "learning_rate": 1.8757668085156628e-05, + "loss": 0.9692, + "step": 602 + }, + { + "epoch": 0.18602498843128182, + "grad_norm": 0.0, + "learning_rate": 1.875283852863471e-05, + "loss": 0.8082, + "step": 603 + }, + { + "epoch": 0.18633348758290916, + "grad_norm": 0.0, + "learning_rate": 1.874800022711179e-05, + "loss": 1.0027, + "step": 604 + }, + { + "epoch": 0.18664198673453647, + "grad_norm": 0.0, + "learning_rate": 1.8743153185421826e-05, + "loss": 0.9285, + "step": 605 + }, + { + "epoch": 0.1869504858861638, + "grad_norm": 0.0, + "learning_rate": 1.8738297408407525e-05, + "loss": 0.867, + "step": 606 + }, + { + "epoch": 0.18725898503779115, + "grad_norm": 0.0, + "learning_rate": 1.8733432900920316e-05, + "loss": 0.9368, + "step": 607 + }, + { + "epoch": 0.1875674841894185, + "grad_norm": 0.0, + "learning_rate": 1.872855966782035e-05, + "loss": 1.0111, + "step": 608 + }, + { + "epoch": 0.1878759833410458, + "grad_norm": 0.0, + "learning_rate": 1.8723677713976494e-05, + "loss": 0.9438, + "step": 609 + }, + { + "epoch": 0.18818448249267314, + "grad_norm": 0.0, + "learning_rate": 1.8718787044266334e-05, + "loss": 0.9616, + "step": 610 + }, + { + "epoch": 0.18849298164430048, + "grad_norm": 0.0, + "learning_rate": 1.8713887663576156e-05, + "loss": 1.1784, + "step": 611 + }, + { + "epoch": 0.18880148079592782, + "grad_norm": 0.0, + "learning_rate": 1.8708979576800962e-05, + "loss": 0.9202, + "step": 612 + }, + { + "epoch": 0.18910997994755516, + "grad_norm": 0.0, + "learning_rate": 1.8704062788844435e-05, + "loss": 0.9447, + "step": 613 + }, + { + "epoch": 0.18941847909918247, + "grad_norm": 0.0, + "learning_rate": 1.8699137304618963e-05, + "loss": 0.8955, + "step": 614 + }, + { + "epoch": 0.1897269782508098, + "grad_norm": 0.0, + "learning_rate": 1.869420312904562e-05, + "loss": 0.9599, + "step": 615 + }, + { + "epoch": 0.19003547740243715, + "grad_norm": 0.0, + "learning_rate": 1.868926026705417e-05, + "loss": 0.8844, + "step": 616 + }, + { + "epoch": 0.1903439765540645, + "grad_norm": 0.0, + "learning_rate": 1.8684308723583037e-05, + "loss": 0.8832, + "step": 617 + }, + { + "epoch": 0.1906524757056918, + "grad_norm": 0.0, + "learning_rate": 1.8679348503579336e-05, + "loss": 0.9653, + "step": 618 + }, + { + "epoch": 0.19096097485731914, + "grad_norm": 0.0, + "learning_rate": 1.867437961199885e-05, + "loss": 0.8365, + "step": 619 + }, + { + "epoch": 0.19126947400894648, + "grad_norm": 0.0, + "learning_rate": 1.8669402053806016e-05, + "loss": 0.8715, + "step": 620 + }, + { + "epoch": 0.19157797316057382, + "grad_norm": 0.0, + "learning_rate": 1.8664415833973937e-05, + "loss": 0.9752, + "step": 621 + }, + { + "epoch": 0.19188647231220113, + "grad_norm": 0.0, + "learning_rate": 1.8659420957484367e-05, + "loss": 0.9336, + "step": 622 + }, + { + "epoch": 0.19219497146382847, + "grad_norm": 0.0, + "learning_rate": 1.865441742932771e-05, + "loss": 0.9283, + "step": 623 + }, + { + "epoch": 0.1925034706154558, + "grad_norm": 0.0, + "learning_rate": 1.8649405254503018e-05, + "loss": 0.879, + "step": 624 + }, + { + "epoch": 0.19281196976708315, + "grad_norm": 0.0, + "learning_rate": 1.864438443801797e-05, + "loss": 1.036, + "step": 625 + }, + { + "epoch": 0.1931204689187105, + "grad_norm": 0.0, + "learning_rate": 1.863935498488889e-05, + "loss": 1.0019, + "step": 626 + }, + { + "epoch": 0.1934289680703378, + "grad_norm": 0.0, + "learning_rate": 1.8634316900140733e-05, + "loss": 0.9301, + "step": 627 + }, + { + "epoch": 0.19373746722196514, + "grad_norm": 0.0, + "learning_rate": 1.862927018880707e-05, + "loss": 1.1868, + "step": 628 + }, + { + "epoch": 0.19404596637359248, + "grad_norm": 0.0, + "learning_rate": 1.862421485593009e-05, + "loss": 1.0311, + "step": 629 + }, + { + "epoch": 0.19435446552521982, + "grad_norm": 0.0, + "learning_rate": 1.86191509065606e-05, + "loss": 0.959, + "step": 630 + }, + { + "epoch": 0.19466296467684713, + "grad_norm": 0.0, + "learning_rate": 1.8614078345758014e-05, + "loss": 0.8862, + "step": 631 + }, + { + "epoch": 0.19497146382847447, + "grad_norm": 0.0, + "learning_rate": 1.8608997178590354e-05, + "loss": 0.8945, + "step": 632 + }, + { + "epoch": 0.1952799629801018, + "grad_norm": 0.0, + "learning_rate": 1.8603907410134233e-05, + "loss": 0.889, + "step": 633 + }, + { + "epoch": 0.19558846213172915, + "grad_norm": 0.0, + "learning_rate": 1.8598809045474868e-05, + "loss": 0.8235, + "step": 634 + }, + { + "epoch": 0.19589696128335646, + "grad_norm": 0.0, + "learning_rate": 1.859370208970605e-05, + "loss": 0.8782, + "step": 635 + }, + { + "epoch": 0.1962054604349838, + "grad_norm": 0.0, + "learning_rate": 1.8588586547930176e-05, + "loss": 0.9183, + "step": 636 + }, + { + "epoch": 0.19651395958661114, + "grad_norm": 0.0, + "learning_rate": 1.8583462425258192e-05, + "loss": 0.866, + "step": 637 + }, + { + "epoch": 0.19682245873823848, + "grad_norm": 0.0, + "learning_rate": 1.857832972680964e-05, + "loss": 0.9157, + "step": 638 + }, + { + "epoch": 0.19713095788986582, + "grad_norm": 0.0, + "learning_rate": 1.8573188457712624e-05, + "loss": 0.8942, + "step": 639 + }, + { + "epoch": 0.19743945704149313, + "grad_norm": 0.0, + "learning_rate": 1.8568038623103802e-05, + "loss": 0.95, + "step": 640 + }, + { + "epoch": 0.19774795619312047, + "grad_norm": 0.0, + "learning_rate": 1.8562880228128402e-05, + "loss": 0.9938, + "step": 641 + }, + { + "epoch": 0.1980564553447478, + "grad_norm": 0.0, + "learning_rate": 1.8557713277940203e-05, + "loss": 0.8951, + "step": 642 + }, + { + "epoch": 0.19836495449637515, + "grad_norm": 0.0, + "learning_rate": 1.8552537777701525e-05, + "loss": 0.8841, + "step": 643 + }, + { + "epoch": 0.19867345364800246, + "grad_norm": 0.0, + "learning_rate": 1.8547353732583234e-05, + "loss": 0.923, + "step": 644 + }, + { + "epoch": 0.1989819527996298, + "grad_norm": 0.0, + "learning_rate": 1.8542161147764735e-05, + "loss": 0.8871, + "step": 645 + }, + { + "epoch": 0.19929045195125714, + "grad_norm": 0.0, + "learning_rate": 1.853696002843396e-05, + "loss": 0.806, + "step": 646 + }, + { + "epoch": 0.19959895110288448, + "grad_norm": 0.0, + "learning_rate": 1.8531750379787375e-05, + "loss": 0.8171, + "step": 647 + }, + { + "epoch": 0.1999074502545118, + "grad_norm": 0.0, + "learning_rate": 1.852653220702996e-05, + "loss": 0.8989, + "step": 648 + }, + { + "epoch": 0.20021594940613913, + "grad_norm": 0.0, + "learning_rate": 1.8521305515375217e-05, + "loss": 0.8877, + "step": 649 + }, + { + "epoch": 0.20052444855776647, + "grad_norm": 0.0, + "learning_rate": 1.8516070310045157e-05, + "loss": 0.9185, + "step": 650 + }, + { + "epoch": 0.2008329477093938, + "grad_norm": 0.0, + "learning_rate": 1.85108265962703e-05, + "loss": 0.883, + "step": 651 + }, + { + "epoch": 0.20114144686102114, + "grad_norm": 0.0, + "learning_rate": 1.850557437928966e-05, + "loss": 0.8563, + "step": 652 + }, + { + "epoch": 0.20144994601264846, + "grad_norm": 0.0, + "learning_rate": 1.8500313664350758e-05, + "loss": 0.9672, + "step": 653 + }, + { + "epoch": 0.2017584451642758, + "grad_norm": 0.0, + "learning_rate": 1.849504445670959e-05, + "loss": 0.9432, + "step": 654 + }, + { + "epoch": 0.20206694431590314, + "grad_norm": 0.0, + "learning_rate": 1.848976676163065e-05, + "loss": 0.9075, + "step": 655 + }, + { + "epoch": 0.20237544346753047, + "grad_norm": 0.0, + "learning_rate": 1.8484480584386907e-05, + "loss": 0.9218, + "step": 656 + }, + { + "epoch": 0.2026839426191578, + "grad_norm": 0.0, + "learning_rate": 1.847918593025981e-05, + "loss": 0.849, + "step": 657 + }, + { + "epoch": 0.20299244177078513, + "grad_norm": 0.0, + "learning_rate": 1.847388280453926e-05, + "loss": 0.8514, + "step": 658 + }, + { + "epoch": 0.20330094092241247, + "grad_norm": 0.0, + "learning_rate": 1.846857121252364e-05, + "loss": 0.8227, + "step": 659 + }, + { + "epoch": 0.2036094400740398, + "grad_norm": 0.0, + "learning_rate": 1.8463251159519793e-05, + "loss": 0.8503, + "step": 660 + }, + { + "epoch": 0.20391793922566712, + "grad_norm": 0.0, + "learning_rate": 1.8457922650842998e-05, + "loss": 0.9469, + "step": 661 + }, + { + "epoch": 0.20422643837729446, + "grad_norm": 0.0, + "learning_rate": 1.8452585691817e-05, + "loss": 0.8618, + "step": 662 + }, + { + "epoch": 0.2045349375289218, + "grad_norm": 0.0, + "learning_rate": 1.8447240287773973e-05, + "loss": 0.9251, + "step": 663 + }, + { + "epoch": 0.20484343668054913, + "grad_norm": 0.0, + "learning_rate": 1.8441886444054534e-05, + "loss": 1.0748, + "step": 664 + }, + { + "epoch": 0.20515193583217647, + "grad_norm": 0.0, + "learning_rate": 1.8436524166007742e-05, + "loss": 0.9532, + "step": 665 + }, + { + "epoch": 0.20546043498380379, + "grad_norm": 0.0, + "learning_rate": 1.8431153458991066e-05, + "loss": 0.9112, + "step": 666 + }, + { + "epoch": 0.20576893413543113, + "grad_norm": 0.0, + "learning_rate": 1.8425774328370402e-05, + "loss": 0.9206, + "step": 667 + }, + { + "epoch": 0.20607743328705846, + "grad_norm": 0.0, + "learning_rate": 1.8420386779520073e-05, + "loss": 0.8469, + "step": 668 + }, + { + "epoch": 0.2063859324386858, + "grad_norm": 0.0, + "learning_rate": 1.8414990817822794e-05, + "loss": 0.8662, + "step": 669 + }, + { + "epoch": 0.20669443159031312, + "grad_norm": 0.0, + "learning_rate": 1.84095864486697e-05, + "loss": 0.9517, + "step": 670 + }, + { + "epoch": 0.20700293074194046, + "grad_norm": 0.0, + "learning_rate": 1.840417367746032e-05, + "loss": 1.2155, + "step": 671 + }, + { + "epoch": 0.2073114298935678, + "grad_norm": 0.0, + "learning_rate": 1.839875250960258e-05, + "loss": 0.882, + "step": 672 + }, + { + "epoch": 0.20761992904519513, + "grad_norm": 0.0, + "learning_rate": 1.8393322950512793e-05, + "loss": 0.9063, + "step": 673 + }, + { + "epoch": 0.20792842819682245, + "grad_norm": 0.0, + "learning_rate": 1.8387885005615652e-05, + "loss": 0.9307, + "step": 674 + }, + { + "epoch": 0.20823692734844979, + "grad_norm": 0.0, + "learning_rate": 1.838243868034424e-05, + "loss": 0.9967, + "step": 675 + }, + { + "epoch": 0.20854542650007712, + "grad_norm": 0.0, + "learning_rate": 1.837698398014e-05, + "loss": 0.885, + "step": 676 + }, + { + "epoch": 0.20885392565170446, + "grad_norm": 0.0, + "learning_rate": 1.837152091045275e-05, + "loss": 0.923, + "step": 677 + }, + { + "epoch": 0.2091624248033318, + "grad_norm": 0.0, + "learning_rate": 1.8366049476740667e-05, + "loss": 0.8932, + "step": 678 + }, + { + "epoch": 0.20947092395495912, + "grad_norm": 0.0, + "learning_rate": 1.8360569684470286e-05, + "loss": 0.8664, + "step": 679 + }, + { + "epoch": 0.20977942310658645, + "grad_norm": 0.0, + "learning_rate": 1.8355081539116492e-05, + "loss": 0.9405, + "step": 680 + }, + { + "epoch": 0.2100879222582138, + "grad_norm": 0.0, + "learning_rate": 1.8349585046162517e-05, + "loss": 0.9038, + "step": 681 + }, + { + "epoch": 0.21039642140984113, + "grad_norm": 0.0, + "learning_rate": 1.8344080211099934e-05, + "loss": 0.986, + "step": 682 + }, + { + "epoch": 0.21070492056146845, + "grad_norm": 0.0, + "learning_rate": 1.8338567039428642e-05, + "loss": 0.871, + "step": 683 + }, + { + "epoch": 0.21101341971309578, + "grad_norm": 0.0, + "learning_rate": 1.8333045536656882e-05, + "loss": 0.9415, + "step": 684 + }, + { + "epoch": 0.21132191886472312, + "grad_norm": 0.0, + "learning_rate": 1.8327515708301217e-05, + "loss": 0.8756, + "step": 685 + }, + { + "epoch": 0.21163041801635046, + "grad_norm": 0.0, + "learning_rate": 1.8321977559886514e-05, + "loss": 1.0333, + "step": 686 + }, + { + "epoch": 0.21193891716797778, + "grad_norm": 0.0, + "learning_rate": 1.8316431096945965e-05, + "loss": 0.892, + "step": 687 + }, + { + "epoch": 0.21224741631960511, + "grad_norm": 0.0, + "learning_rate": 1.831087632502107e-05, + "loss": 0.9391, + "step": 688 + }, + { + "epoch": 0.21255591547123245, + "grad_norm": 0.0, + "learning_rate": 1.8305313249661628e-05, + "loss": 1.0055, + "step": 689 + }, + { + "epoch": 0.2128644146228598, + "grad_norm": 0.0, + "learning_rate": 1.8299741876425725e-05, + "loss": 0.9493, + "step": 690 + }, + { + "epoch": 0.21317291377448713, + "grad_norm": 0.0, + "learning_rate": 1.8294162210879753e-05, + "loss": 0.9286, + "step": 691 + }, + { + "epoch": 0.21348141292611444, + "grad_norm": 0.0, + "learning_rate": 1.8288574258598378e-05, + "loss": 0.8777, + "step": 692 + }, + { + "epoch": 0.21378991207774178, + "grad_norm": 0.0, + "learning_rate": 1.8282978025164553e-05, + "loss": 0.9229, + "step": 693 + }, + { + "epoch": 0.21409841122936912, + "grad_norm": 0.0, + "learning_rate": 1.8277373516169493e-05, + "loss": 0.907, + "step": 694 + }, + { + "epoch": 0.21440691038099646, + "grad_norm": 0.0, + "learning_rate": 1.8271760737212697e-05, + "loss": 0.9317, + "step": 695 + }, + { + "epoch": 0.21471540953262377, + "grad_norm": 0.0, + "learning_rate": 1.8266139693901914e-05, + "loss": 0.865, + "step": 696 + }, + { + "epoch": 0.2150239086842511, + "grad_norm": 0.0, + "learning_rate": 1.826051039185315e-05, + "loss": 0.9932, + "step": 697 + }, + { + "epoch": 0.21533240783587845, + "grad_norm": 0.0, + "learning_rate": 1.8254872836690672e-05, + "loss": 0.8622, + "step": 698 + }, + { + "epoch": 0.2156409069875058, + "grad_norm": 0.0, + "learning_rate": 1.8249227034046986e-05, + "loss": 0.9023, + "step": 699 + }, + { + "epoch": 0.2159494061391331, + "grad_norm": 0.0, + "learning_rate": 1.824357298956284e-05, + "loss": 0.8664, + "step": 700 + }, + { + "epoch": 0.21625790529076044, + "grad_norm": 0.0, + "learning_rate": 1.823791070888721e-05, + "loss": 0.9108, + "step": 701 + }, + { + "epoch": 0.21656640444238778, + "grad_norm": 0.0, + "learning_rate": 1.8232240197677318e-05, + "loss": 0.9412, + "step": 702 + }, + { + "epoch": 0.21687490359401512, + "grad_norm": 0.0, + "learning_rate": 1.8226561461598583e-05, + "loss": 0.9271, + "step": 703 + }, + { + "epoch": 0.21718340274564246, + "grad_norm": 0.0, + "learning_rate": 1.8220874506324667e-05, + "loss": 0.9417, + "step": 704 + }, + { + "epoch": 0.21749190189726977, + "grad_norm": 0.0, + "learning_rate": 1.8215179337537433e-05, + "loss": 0.9904, + "step": 705 + }, + { + "epoch": 0.2178004010488971, + "grad_norm": 0.0, + "learning_rate": 1.8209475960926946e-05, + "loss": 0.8971, + "step": 706 + }, + { + "epoch": 0.21810890020052445, + "grad_norm": 0.0, + "learning_rate": 1.8203764382191476e-05, + "loss": 0.9891, + "step": 707 + }, + { + "epoch": 0.2184173993521518, + "grad_norm": 0.0, + "learning_rate": 1.8198044607037486e-05, + "loss": 0.8612, + "step": 708 + }, + { + "epoch": 0.2187258985037791, + "grad_norm": 0.0, + "learning_rate": 1.8192316641179634e-05, + "loss": 0.865, + "step": 709 + }, + { + "epoch": 0.21903439765540644, + "grad_norm": 0.0, + "learning_rate": 1.8186580490340754e-05, + "loss": 0.9252, + "step": 710 + }, + { + "epoch": 0.21934289680703378, + "grad_norm": 0.0, + "learning_rate": 1.8180836160251863e-05, + "loss": 0.9649, + "step": 711 + }, + { + "epoch": 0.21965139595866112, + "grad_norm": 0.0, + "learning_rate": 1.8175083656652143e-05, + "loss": 0.7586, + "step": 712 + }, + { + "epoch": 0.21995989511028843, + "grad_norm": 0.0, + "learning_rate": 1.816932298528895e-05, + "loss": 0.8505, + "step": 713 + }, + { + "epoch": 0.22026839426191577, + "grad_norm": 0.0, + "learning_rate": 1.8163554151917796e-05, + "loss": 0.8742, + "step": 714 + }, + { + "epoch": 0.2205768934135431, + "grad_norm": 0.0, + "learning_rate": 1.815777716230235e-05, + "loss": 0.8806, + "step": 715 + }, + { + "epoch": 0.22088539256517045, + "grad_norm": 0.0, + "learning_rate": 1.8151992022214425e-05, + "loss": 0.8527, + "step": 716 + }, + { + "epoch": 0.2211938917167978, + "grad_norm": 0.0, + "learning_rate": 1.8146198737433993e-05, + "loss": 0.9671, + "step": 717 + }, + { + "epoch": 0.2215023908684251, + "grad_norm": 0.0, + "learning_rate": 1.8140397313749134e-05, + "loss": 0.9928, + "step": 718 + }, + { + "epoch": 0.22181089002005244, + "grad_norm": 0.0, + "learning_rate": 1.8134587756956084e-05, + "loss": 1.0005, + "step": 719 + }, + { + "epoch": 0.22211938917167978, + "grad_norm": 0.0, + "learning_rate": 1.8128770072859202e-05, + "loss": 0.8197, + "step": 720 + }, + { + "epoch": 0.22242788832330712, + "grad_norm": 0.0, + "learning_rate": 1.812294426727096e-05, + "loss": 0.8596, + "step": 721 + }, + { + "epoch": 0.22273638747493443, + "grad_norm": 0.0, + "learning_rate": 1.8117110346011946e-05, + "loss": 0.947, + "step": 722 + }, + { + "epoch": 0.22304488662656177, + "grad_norm": 0.0, + "learning_rate": 1.8111268314910857e-05, + "loss": 0.9231, + "step": 723 + }, + { + "epoch": 0.2233533857781891, + "grad_norm": 0.0, + "learning_rate": 1.81054181798045e-05, + "loss": 0.9239, + "step": 724 + }, + { + "epoch": 0.22366188492981645, + "grad_norm": 0.0, + "learning_rate": 1.809955994653776e-05, + "loss": 0.9616, + "step": 725 + }, + { + "epoch": 0.22397038408144376, + "grad_norm": 0.0, + "learning_rate": 1.809369362096363e-05, + "loss": 0.8898, + "step": 726 + }, + { + "epoch": 0.2242788832330711, + "grad_norm": 0.0, + "learning_rate": 1.8087819208943186e-05, + "loss": 0.8848, + "step": 727 + }, + { + "epoch": 0.22458738238469844, + "grad_norm": 0.0, + "learning_rate": 1.8081936716345574e-05, + "loss": 0.8048, + "step": 728 + }, + { + "epoch": 0.22489588153632578, + "grad_norm": 0.0, + "learning_rate": 1.8076046149048024e-05, + "loss": 0.8379, + "step": 729 + }, + { + "epoch": 0.22520438068795312, + "grad_norm": 0.0, + "learning_rate": 1.8070147512935828e-05, + "loss": 0.8821, + "step": 730 + }, + { + "epoch": 0.22551287983958043, + "grad_norm": 0.0, + "learning_rate": 1.806424081390234e-05, + "loss": 0.8625, + "step": 731 + }, + { + "epoch": 0.22582137899120777, + "grad_norm": 0.0, + "learning_rate": 1.8058326057848966e-05, + "loss": 0.9501, + "step": 732 + }, + { + "epoch": 0.2261298781428351, + "grad_norm": 0.0, + "learning_rate": 1.8052403250685172e-05, + "loss": 0.8423, + "step": 733 + }, + { + "epoch": 0.22643837729446245, + "grad_norm": 0.0, + "learning_rate": 1.804647239832846e-05, + "loss": 0.884, + "step": 734 + }, + { + "epoch": 0.22674687644608976, + "grad_norm": 0.0, + "learning_rate": 1.804053350670437e-05, + "loss": 0.9397, + "step": 735 + }, + { + "epoch": 0.2270553755977171, + "grad_norm": 0.0, + "learning_rate": 1.8034586581746474e-05, + "loss": 0.9681, + "step": 736 + }, + { + "epoch": 0.22736387474934444, + "grad_norm": 0.0, + "learning_rate": 1.8028631629396377e-05, + "loss": 0.9377, + "step": 737 + }, + { + "epoch": 0.22767237390097178, + "grad_norm": 0.0, + "learning_rate": 1.8022668655603696e-05, + "loss": 0.9687, + "step": 738 + }, + { + "epoch": 0.2279808730525991, + "grad_norm": 0.0, + "learning_rate": 1.8016697666326066e-05, + "loss": 0.9338, + "step": 739 + }, + { + "epoch": 0.22828937220422643, + "grad_norm": 0.0, + "learning_rate": 1.801071866752913e-05, + "loss": 0.8938, + "step": 740 + }, + { + "epoch": 0.22859787135585377, + "grad_norm": 0.0, + "learning_rate": 1.8004731665186532e-05, + "loss": 0.8316, + "step": 741 + }, + { + "epoch": 0.2289063705074811, + "grad_norm": 0.0, + "learning_rate": 1.7998736665279914e-05, + "loss": 0.9167, + "step": 742 + }, + { + "epoch": 0.22921486965910845, + "grad_norm": 0.0, + "learning_rate": 1.799273367379891e-05, + "loss": 0.9561, + "step": 743 + }, + { + "epoch": 0.22952336881073576, + "grad_norm": 0.0, + "learning_rate": 1.7986722696741132e-05, + "loss": 0.9086, + "step": 744 + }, + { + "epoch": 0.2298318679623631, + "grad_norm": 0.0, + "learning_rate": 1.798070374011218e-05, + "loss": 0.8398, + "step": 745 + }, + { + "epoch": 0.23014036711399044, + "grad_norm": 0.0, + "learning_rate": 1.7974676809925613e-05, + "loss": 0.8583, + "step": 746 + }, + { + "epoch": 0.23044886626561778, + "grad_norm": 0.0, + "learning_rate": 1.7968641912202973e-05, + "loss": 0.9205, + "step": 747 + }, + { + "epoch": 0.2307573654172451, + "grad_norm": 0.0, + "learning_rate": 1.7962599052973746e-05, + "loss": 0.9376, + "step": 748 + }, + { + "epoch": 0.23106586456887243, + "grad_norm": 0.0, + "learning_rate": 1.7956548238275387e-05, + "loss": 0.9171, + "step": 749 + }, + { + "epoch": 0.23137436372049977, + "grad_norm": 0.0, + "learning_rate": 1.7950489474153293e-05, + "loss": 0.8935, + "step": 750 + }, + { + "epoch": 0.2316828628721271, + "grad_norm": 0.0, + "learning_rate": 1.7944422766660797e-05, + "loss": 1.0081, + "step": 751 + }, + { + "epoch": 0.23199136202375442, + "grad_norm": 0.0, + "learning_rate": 1.7938348121859183e-05, + "loss": 0.8754, + "step": 752 + }, + { + "epoch": 0.23229986117538176, + "grad_norm": 0.0, + "learning_rate": 1.7932265545817645e-05, + "loss": 0.8547, + "step": 753 + }, + { + "epoch": 0.2326083603270091, + "grad_norm": 0.0, + "learning_rate": 1.792617504461332e-05, + "loss": 0.8784, + "step": 754 + }, + { + "epoch": 0.23291685947863644, + "grad_norm": 0.0, + "learning_rate": 1.7920076624331254e-05, + "loss": 0.9178, + "step": 755 + }, + { + "epoch": 0.23322535863026378, + "grad_norm": 0.0, + "learning_rate": 1.791397029106441e-05, + "loss": 0.8957, + "step": 756 + }, + { + "epoch": 0.2335338577818911, + "grad_norm": 0.0, + "learning_rate": 1.7907856050913644e-05, + "loss": 1.1664, + "step": 757 + }, + { + "epoch": 0.23384235693351843, + "grad_norm": 0.0, + "learning_rate": 1.7901733909987735e-05, + "loss": 0.8931, + "step": 758 + }, + { + "epoch": 0.23415085608514577, + "grad_norm": 0.0, + "learning_rate": 1.7895603874403326e-05, + "loss": 0.9065, + "step": 759 + }, + { + "epoch": 0.2344593552367731, + "grad_norm": 0.0, + "learning_rate": 1.788946595028498e-05, + "loss": 0.8369, + "step": 760 + }, + { + "epoch": 0.23476785438840042, + "grad_norm": 0.0, + "learning_rate": 1.7883320143765113e-05, + "loss": 0.9097, + "step": 761 + }, + { + "epoch": 0.23507635354002776, + "grad_norm": 0.0, + "learning_rate": 1.787716646098403e-05, + "loss": 0.9211, + "step": 762 + }, + { + "epoch": 0.2353848526916551, + "grad_norm": 0.0, + "learning_rate": 1.787100490808991e-05, + "loss": 0.9252, + "step": 763 + }, + { + "epoch": 0.23569335184328244, + "grad_norm": 0.0, + "learning_rate": 1.7864835491238785e-05, + "loss": 0.8593, + "step": 764 + }, + { + "epoch": 0.23600185099490975, + "grad_norm": 0.0, + "learning_rate": 1.7858658216594545e-05, + "loss": 0.9216, + "step": 765 + }, + { + "epoch": 0.2363103501465371, + "grad_norm": 0.0, + "learning_rate": 1.7852473090328937e-05, + "loss": 0.8814, + "step": 766 + }, + { + "epoch": 0.23661884929816443, + "grad_norm": 0.0, + "learning_rate": 1.7846280118621547e-05, + "loss": 0.9103, + "step": 767 + }, + { + "epoch": 0.23692734844979177, + "grad_norm": 0.0, + "learning_rate": 1.7840079307659803e-05, + "loss": 0.8907, + "step": 768 + }, + { + "epoch": 0.2372358476014191, + "grad_norm": 0.0, + "learning_rate": 1.783387066363896e-05, + "loss": 0.8361, + "step": 769 + }, + { + "epoch": 0.23754434675304642, + "grad_norm": 0.0, + "learning_rate": 1.782765419276211e-05, + "loss": 0.8302, + "step": 770 + }, + { + "epoch": 0.23785284590467376, + "grad_norm": 0.0, + "learning_rate": 1.782142990124015e-05, + "loss": 0.9117, + "step": 771 + }, + { + "epoch": 0.2381613450563011, + "grad_norm": 0.0, + "learning_rate": 1.78151977952918e-05, + "loss": 0.952, + "step": 772 + }, + { + "epoch": 0.23846984420792844, + "grad_norm": 0.0, + "learning_rate": 1.7808957881143588e-05, + "loss": 0.8969, + "step": 773 + }, + { + "epoch": 0.23877834335955575, + "grad_norm": 0.0, + "learning_rate": 1.780271016502984e-05, + "loss": 0.8482, + "step": 774 + }, + { + "epoch": 0.2390868425111831, + "grad_norm": 0.0, + "learning_rate": 1.7796454653192675e-05, + "loss": 0.7801, + "step": 775 + }, + { + "epoch": 0.23939534166281043, + "grad_norm": 0.0, + "learning_rate": 1.7790191351882006e-05, + "loss": 0.9085, + "step": 776 + }, + { + "epoch": 0.23970384081443777, + "grad_norm": 0.0, + "learning_rate": 1.7783920267355527e-05, + "loss": 0.8567, + "step": 777 + }, + { + "epoch": 0.24001233996606508, + "grad_norm": 0.0, + "learning_rate": 1.7777641405878706e-05, + "loss": 0.8105, + "step": 778 + }, + { + "epoch": 0.24032083911769242, + "grad_norm": 0.0, + "learning_rate": 1.777135477372478e-05, + "loss": 0.8481, + "step": 779 + }, + { + "epoch": 0.24062933826931976, + "grad_norm": 0.0, + "learning_rate": 1.7765060377174753e-05, + "loss": 0.8734, + "step": 780 + }, + { + "epoch": 0.2409378374209471, + "grad_norm": 0.0, + "learning_rate": 1.7758758222517387e-05, + "loss": 0.8233, + "step": 781 + }, + { + "epoch": 0.24124633657257444, + "grad_norm": 0.0, + "learning_rate": 1.7752448316049197e-05, + "loss": 0.8362, + "step": 782 + }, + { + "epoch": 0.24155483572420175, + "grad_norm": 0.0, + "learning_rate": 1.774613066407443e-05, + "loss": 0.9261, + "step": 783 + }, + { + "epoch": 0.2418633348758291, + "grad_norm": 0.0, + "learning_rate": 1.7739805272905087e-05, + "loss": 0.9551, + "step": 784 + }, + { + "epoch": 0.24217183402745643, + "grad_norm": 0.0, + "learning_rate": 1.7733472148860893e-05, + "loss": 0.8842, + "step": 785 + }, + { + "epoch": 0.24248033317908377, + "grad_norm": 0.0, + "learning_rate": 1.7727131298269306e-05, + "loss": 0.9109, + "step": 786 + }, + { + "epoch": 0.24278883233071108, + "grad_norm": 0.0, + "learning_rate": 1.7720782727465494e-05, + "loss": 0.845, + "step": 787 + }, + { + "epoch": 0.24309733148233842, + "grad_norm": 0.0, + "learning_rate": 1.7714426442792344e-05, + "loss": 0.9728, + "step": 788 + }, + { + "epoch": 0.24340583063396576, + "grad_norm": 0.0, + "learning_rate": 1.770806245060045e-05, + "loss": 0.9432, + "step": 789 + }, + { + "epoch": 0.2437143297855931, + "grad_norm": 0.0, + "learning_rate": 1.770169075724811e-05, + "loss": 0.7779, + "step": 790 + }, + { + "epoch": 0.2440228289372204, + "grad_norm": 0.0, + "learning_rate": 1.76953113691013e-05, + "loss": 0.7957, + "step": 791 + }, + { + "epoch": 0.24433132808884775, + "grad_norm": 0.0, + "learning_rate": 1.7688924292533706e-05, + "loss": 0.8393, + "step": 792 + }, + { + "epoch": 0.2446398272404751, + "grad_norm": 0.0, + "learning_rate": 1.768252953392668e-05, + "loss": 0.8052, + "step": 793 + }, + { + "epoch": 0.24494832639210243, + "grad_norm": 0.0, + "learning_rate": 1.7676127099669265e-05, + "loss": 0.7727, + "step": 794 + }, + { + "epoch": 0.24525682554372977, + "grad_norm": 0.0, + "learning_rate": 1.7669716996158148e-05, + "loss": 0.8535, + "step": 795 + }, + { + "epoch": 0.24556532469535708, + "grad_norm": 0.0, + "learning_rate": 1.76632992297977e-05, + "loss": 0.8624, + "step": 796 + }, + { + "epoch": 0.24587382384698442, + "grad_norm": 0.0, + "learning_rate": 1.765687380699994e-05, + "loss": 0.8997, + "step": 797 + }, + { + "epoch": 0.24618232299861176, + "grad_norm": 0.0, + "learning_rate": 1.765044073418454e-05, + "loss": 0.8142, + "step": 798 + }, + { + "epoch": 0.2464908221502391, + "grad_norm": 0.0, + "learning_rate": 1.7644000017778807e-05, + "loss": 0.9157, + "step": 799 + }, + { + "epoch": 0.2467993213018664, + "grad_norm": 0.0, + "learning_rate": 1.7637551664217695e-05, + "loss": 0.8581, + "step": 800 + }, + { + "epoch": 0.24710782045349375, + "grad_norm": 0.0, + "learning_rate": 1.763109567994378e-05, + "loss": 0.9645, + "step": 801 + }, + { + "epoch": 0.2474163196051211, + "grad_norm": 0.0, + "learning_rate": 1.7624632071407268e-05, + "loss": 0.8924, + "step": 802 + }, + { + "epoch": 0.24772481875674843, + "grad_norm": 0.0, + "learning_rate": 1.7618160845065978e-05, + "loss": 0.919, + "step": 803 + }, + { + "epoch": 0.24803331790837574, + "grad_norm": 0.0, + "learning_rate": 1.7611682007385345e-05, + "loss": 0.889, + "step": 804 + }, + { + "epoch": 0.24834181706000308, + "grad_norm": 0.0, + "learning_rate": 1.76051955648384e-05, + "loss": 0.9173, + "step": 805 + }, + { + "epoch": 0.24865031621163042, + "grad_norm": 0.0, + "learning_rate": 1.7598701523905783e-05, + "loss": 0.9212, + "step": 806 + }, + { + "epoch": 0.24895881536325776, + "grad_norm": 0.0, + "learning_rate": 1.7592199891075714e-05, + "loss": 0.7934, + "step": 807 + }, + { + "epoch": 0.2492673145148851, + "grad_norm": 0.0, + "learning_rate": 1.758569067284401e-05, + "loss": 0.8689, + "step": 808 + }, + { + "epoch": 0.2495758136665124, + "grad_norm": 0.0, + "learning_rate": 1.7579173875714058e-05, + "loss": 0.8654, + "step": 809 + }, + { + "epoch": 0.24988431281813975, + "grad_norm": 0.0, + "learning_rate": 1.757264950619682e-05, + "loss": 0.8837, + "step": 810 + }, + { + "epoch": 0.25019281196976706, + "grad_norm": 0.0, + "learning_rate": 1.7566117570810822e-05, + "loss": 0.8707, + "step": 811 + }, + { + "epoch": 0.2505013111213944, + "grad_norm": 0.0, + "learning_rate": 1.7559578076082156e-05, + "loss": 0.9307, + "step": 812 + }, + { + "epoch": 0.25080981027302174, + "grad_norm": 0.0, + "learning_rate": 1.7553031028544452e-05, + "loss": 0.9386, + "step": 813 + }, + { + "epoch": 0.2511183094246491, + "grad_norm": 0.0, + "learning_rate": 1.7546476434738904e-05, + "loss": 0.8065, + "step": 814 + }, + { + "epoch": 0.2514268085762764, + "grad_norm": 0.0, + "learning_rate": 1.7539914301214233e-05, + "loss": 0.8347, + "step": 815 + }, + { + "epoch": 0.25173530772790376, + "grad_norm": 0.0, + "learning_rate": 1.7533344634526693e-05, + "loss": 0.8738, + "step": 816 + }, + { + "epoch": 0.2520438068795311, + "grad_norm": 0.0, + "learning_rate": 1.7526767441240075e-05, + "loss": 0.8786, + "step": 817 + }, + { + "epoch": 0.25235230603115844, + "grad_norm": 0.0, + "learning_rate": 1.7520182727925678e-05, + "loss": 0.8297, + "step": 818 + }, + { + "epoch": 0.2526608051827858, + "grad_norm": 0.0, + "learning_rate": 1.751359050116232e-05, + "loss": 0.9052, + "step": 819 + }, + { + "epoch": 0.25296930433441306, + "grad_norm": 0.0, + "learning_rate": 1.7506990767536326e-05, + "loss": 0.8696, + "step": 820 + }, + { + "epoch": 0.2532778034860404, + "grad_norm": 0.0, + "learning_rate": 1.750038353364152e-05, + "loss": 0.8608, + "step": 821 + }, + { + "epoch": 0.25358630263766774, + "grad_norm": 0.0, + "learning_rate": 1.749376880607922e-05, + "loss": 0.8843, + "step": 822 + }, + { + "epoch": 0.2538948017892951, + "grad_norm": 0.0, + "learning_rate": 1.748714659145823e-05, + "loss": 0.9389, + "step": 823 + }, + { + "epoch": 0.2542033009409224, + "grad_norm": 0.0, + "learning_rate": 1.7480516896394833e-05, + "loss": 0.9542, + "step": 824 + }, + { + "epoch": 0.25451180009254976, + "grad_norm": 0.0, + "learning_rate": 1.747387972751279e-05, + "loss": 0.9504, + "step": 825 + }, + { + "epoch": 0.2548202992441771, + "grad_norm": 0.0, + "learning_rate": 1.7467235091443326e-05, + "loss": 0.934, + "step": 826 + }, + { + "epoch": 0.25512879839580443, + "grad_norm": 0.0, + "learning_rate": 1.7460582994825127e-05, + "loss": 0.8076, + "step": 827 + }, + { + "epoch": 0.2554372975474317, + "grad_norm": 0.0, + "learning_rate": 1.7453923444304334e-05, + "loss": 0.879, + "step": 828 + }, + { + "epoch": 0.25574579669905906, + "grad_norm": 0.0, + "learning_rate": 1.7447256446534534e-05, + "loss": 0.7934, + "step": 829 + }, + { + "epoch": 0.2560542958506864, + "grad_norm": 0.0, + "learning_rate": 1.7440582008176756e-05, + "loss": 0.8131, + "step": 830 + }, + { + "epoch": 0.25636279500231374, + "grad_norm": 0.0, + "learning_rate": 1.743390013589946e-05, + "loss": 0.8817, + "step": 831 + }, + { + "epoch": 0.2566712941539411, + "grad_norm": 0.0, + "learning_rate": 1.7427210836378535e-05, + "loss": 0.8438, + "step": 832 + }, + { + "epoch": 0.2569797933055684, + "grad_norm": 0.0, + "learning_rate": 1.7420514116297294e-05, + "loss": 0.8695, + "step": 833 + }, + { + "epoch": 0.25728829245719576, + "grad_norm": 0.0, + "learning_rate": 1.7413809982346458e-05, + "loss": 0.8446, + "step": 834 + }, + { + "epoch": 0.2575967916088231, + "grad_norm": 0.0, + "learning_rate": 1.7407098441224154e-05, + "loss": 0.8197, + "step": 835 + }, + { + "epoch": 0.25790529076045043, + "grad_norm": 0.0, + "learning_rate": 1.7400379499635926e-05, + "loss": 0.8765, + "step": 836 + }, + { + "epoch": 0.2582137899120777, + "grad_norm": 0.0, + "learning_rate": 1.7393653164294685e-05, + "loss": 0.8708, + "step": 837 + }, + { + "epoch": 0.25852228906370506, + "grad_norm": 0.0, + "learning_rate": 1.7386919441920748e-05, + "loss": 0.7986, + "step": 838 + }, + { + "epoch": 0.2588307882153324, + "grad_norm": 0.0, + "learning_rate": 1.738017833924181e-05, + "loss": 0.8454, + "step": 839 + }, + { + "epoch": 0.25913928736695974, + "grad_norm": 0.0, + "learning_rate": 1.737342986299294e-05, + "loss": 0.8292, + "step": 840 + }, + { + "epoch": 0.2594477865185871, + "grad_norm": 0.0, + "learning_rate": 1.7366674019916567e-05, + "loss": 0.8716, + "step": 841 + }, + { + "epoch": 0.2597562856702144, + "grad_norm": 0.0, + "learning_rate": 1.7359910816762487e-05, + "loss": 0.7871, + "step": 842 + }, + { + "epoch": 0.26006478482184175, + "grad_norm": 0.0, + "learning_rate": 1.7353140260287845e-05, + "loss": 0.9423, + "step": 843 + }, + { + "epoch": 0.2603732839734691, + "grad_norm": 0.0, + "learning_rate": 1.7346362357257135e-05, + "loss": 0.8139, + "step": 844 + }, + { + "epoch": 0.26068178312509643, + "grad_norm": 0.0, + "learning_rate": 1.7339577114442194e-05, + "loss": 0.8117, + "step": 845 + }, + { + "epoch": 0.2609902822767237, + "grad_norm": 0.0, + "learning_rate": 1.7332784538622184e-05, + "loss": 0.9124, + "step": 846 + }, + { + "epoch": 0.26129878142835106, + "grad_norm": 0.0, + "learning_rate": 1.7325984636583606e-05, + "loss": 0.8608, + "step": 847 + }, + { + "epoch": 0.2616072805799784, + "grad_norm": 0.0, + "learning_rate": 1.731917741512027e-05, + "loss": 0.8616, + "step": 848 + }, + { + "epoch": 0.26191577973160574, + "grad_norm": 0.0, + "learning_rate": 1.7312362881033293e-05, + "loss": 0.8625, + "step": 849 + }, + { + "epoch": 0.2622242788832331, + "grad_norm": 0.0, + "learning_rate": 1.730554104113112e-05, + "loss": 0.8, + "step": 850 + }, + { + "epoch": 0.2625327780348604, + "grad_norm": 0.0, + "learning_rate": 1.7298711902229478e-05, + "loss": 0.7432, + "step": 851 + }, + { + "epoch": 0.26284127718648775, + "grad_norm": 0.0, + "learning_rate": 1.7291875471151392e-05, + "loss": 0.93, + "step": 852 + }, + { + "epoch": 0.2631497763381151, + "grad_norm": 0.0, + "learning_rate": 1.728503175472717e-05, + "loss": 0.8965, + "step": 853 + }, + { + "epoch": 0.2634582754897424, + "grad_norm": 0.0, + "learning_rate": 1.7278180759794397e-05, + "loss": 0.8166, + "step": 854 + }, + { + "epoch": 0.2637667746413697, + "grad_norm": 0.0, + "learning_rate": 1.727132249319794e-05, + "loss": 0.8628, + "step": 855 + }, + { + "epoch": 0.26407527379299706, + "grad_norm": 0.0, + "learning_rate": 1.726445696178992e-05, + "loss": 0.8244, + "step": 856 + }, + { + "epoch": 0.2643837729446244, + "grad_norm": 0.0, + "learning_rate": 1.7257584172429723e-05, + "loss": 0.7755, + "step": 857 + }, + { + "epoch": 0.26469227209625174, + "grad_norm": 0.0, + "learning_rate": 1.7250704131983984e-05, + "loss": 0.7426, + "step": 858 + }, + { + "epoch": 0.2650007712478791, + "grad_norm": 0.0, + "learning_rate": 1.724381684732658e-05, + "loss": 0.889, + "step": 859 + }, + { + "epoch": 0.2653092703995064, + "grad_norm": 0.0, + "learning_rate": 1.723692232533863e-05, + "loss": 0.9441, + "step": 860 + }, + { + "epoch": 0.26561776955113375, + "grad_norm": 0.0, + "learning_rate": 1.723002057290849e-05, + "loss": 0.8384, + "step": 861 + }, + { + "epoch": 0.2659262687027611, + "grad_norm": 0.0, + "learning_rate": 1.7223111596931722e-05, + "loss": 0.8027, + "step": 862 + }, + { + "epoch": 0.2662347678543884, + "grad_norm": 0.0, + "learning_rate": 1.721619540431112e-05, + "loss": 0.8804, + "step": 863 + }, + { + "epoch": 0.2665432670060157, + "grad_norm": 0.0, + "learning_rate": 1.720927200195668e-05, + "loss": 0.8112, + "step": 864 + }, + { + "epoch": 0.26685176615764306, + "grad_norm": 0.0, + "learning_rate": 1.7202341396785613e-05, + "loss": 0.9347, + "step": 865 + }, + { + "epoch": 0.2671602653092704, + "grad_norm": 0.0, + "learning_rate": 1.719540359572231e-05, + "loss": 0.8728, + "step": 866 + }, + { + "epoch": 0.26746876446089773, + "grad_norm": 0.0, + "learning_rate": 1.7188458605698365e-05, + "loss": 0.8349, + "step": 867 + }, + { + "epoch": 0.2677772636125251, + "grad_norm": 0.0, + "learning_rate": 1.7181506433652545e-05, + "loss": 0.7284, + "step": 868 + }, + { + "epoch": 0.2680857627641524, + "grad_norm": 0.0, + "learning_rate": 1.71745470865308e-05, + "loss": 0.8875, + "step": 869 + }, + { + "epoch": 0.26839426191577975, + "grad_norm": 0.0, + "learning_rate": 1.7167580571286247e-05, + "loss": 0.8623, + "step": 870 + }, + { + "epoch": 0.2687027610674071, + "grad_norm": 0.0, + "learning_rate": 1.716060689487916e-05, + "loss": 0.817, + "step": 871 + }, + { + "epoch": 0.2690112602190344, + "grad_norm": 0.0, + "learning_rate": 1.7153626064276972e-05, + "loss": 0.8666, + "step": 872 + }, + { + "epoch": 0.2693197593706617, + "grad_norm": 0.0, + "learning_rate": 1.7146638086454264e-05, + "loss": 0.8331, + "step": 873 + }, + { + "epoch": 0.26962825852228905, + "grad_norm": 0.0, + "learning_rate": 1.7139642968392754e-05, + "loss": 0.8649, + "step": 874 + }, + { + "epoch": 0.2699367576739164, + "grad_norm": 0.0, + "learning_rate": 1.71326407170813e-05, + "loss": 0.9272, + "step": 875 + }, + { + "epoch": 0.27024525682554373, + "grad_norm": 0.0, + "learning_rate": 1.712563133951588e-05, + "loss": 0.8894, + "step": 876 + }, + { + "epoch": 0.2705537559771711, + "grad_norm": 0.0, + "learning_rate": 1.7118614842699595e-05, + "loss": 0.8173, + "step": 877 + }, + { + "epoch": 0.2708622551287984, + "grad_norm": 0.0, + "learning_rate": 1.711159123364266e-05, + "loss": 0.8226, + "step": 878 + }, + { + "epoch": 0.27117075428042575, + "grad_norm": 0.0, + "learning_rate": 1.7104560519362398e-05, + "loss": 0.8828, + "step": 879 + }, + { + "epoch": 0.27147925343205304, + "grad_norm": 0.0, + "learning_rate": 1.7097522706883225e-05, + "loss": 0.7943, + "step": 880 + }, + { + "epoch": 0.2717877525836804, + "grad_norm": 0.0, + "learning_rate": 1.709047780323665e-05, + "loss": 0.8355, + "step": 881 + }, + { + "epoch": 0.2720962517353077, + "grad_norm": 0.0, + "learning_rate": 1.7083425815461273e-05, + "loss": 0.8576, + "step": 882 + }, + { + "epoch": 0.27240475088693505, + "grad_norm": 0.0, + "learning_rate": 1.707636675060276e-05, + "loss": 0.8617, + "step": 883 + }, + { + "epoch": 0.2727132500385624, + "grad_norm": 0.0, + "learning_rate": 1.7069300615713866e-05, + "loss": 0.7715, + "step": 884 + }, + { + "epoch": 0.27302174919018973, + "grad_norm": 0.0, + "learning_rate": 1.7062227417854388e-05, + "loss": 0.8574, + "step": 885 + }, + { + "epoch": 0.2733302483418171, + "grad_norm": 0.0, + "learning_rate": 1.7055147164091197e-05, + "loss": 0.8169, + "step": 886 + }, + { + "epoch": 0.2736387474934444, + "grad_norm": 0.0, + "learning_rate": 1.7048059861498205e-05, + "loss": 0.8913, + "step": 887 + }, + { + "epoch": 0.27394724664507175, + "grad_norm": 0.0, + "learning_rate": 1.7040965517156365e-05, + "loss": 0.898, + "step": 888 + }, + { + "epoch": 0.27425574579669904, + "grad_norm": 0.0, + "learning_rate": 1.703386413815367e-05, + "loss": 0.8039, + "step": 889 + }, + { + "epoch": 0.2745642449483264, + "grad_norm": 0.0, + "learning_rate": 1.7026755731585146e-05, + "loss": 0.8275, + "step": 890 + }, + { + "epoch": 0.2748727440999537, + "grad_norm": 0.0, + "learning_rate": 1.7019640304552832e-05, + "loss": 0.8026, + "step": 891 + }, + { + "epoch": 0.27518124325158105, + "grad_norm": 0.0, + "learning_rate": 1.7012517864165778e-05, + "loss": 0.9627, + "step": 892 + }, + { + "epoch": 0.2754897424032084, + "grad_norm": 0.0, + "learning_rate": 1.7005388417540055e-05, + "loss": 0.7905, + "step": 893 + }, + { + "epoch": 0.27579824155483573, + "grad_norm": 0.0, + "learning_rate": 1.6998251971798717e-05, + "loss": 0.8794, + "step": 894 + }, + { + "epoch": 0.27610674070646307, + "grad_norm": 0.0, + "learning_rate": 1.699110853407183e-05, + "loss": 0.8351, + "step": 895 + }, + { + "epoch": 0.2764152398580904, + "grad_norm": 0.0, + "learning_rate": 1.6983958111496428e-05, + "loss": 0.8739, + "step": 896 + }, + { + "epoch": 0.27672373900971775, + "grad_norm": 0.0, + "learning_rate": 1.6976800711216527e-05, + "loss": 0.9094, + "step": 897 + }, + { + "epoch": 0.27703223816134503, + "grad_norm": 0.0, + "learning_rate": 1.6969636340383134e-05, + "loss": 0.8181, + "step": 898 + }, + { + "epoch": 0.2773407373129724, + "grad_norm": 0.0, + "learning_rate": 1.6962465006154186e-05, + "loss": 0.8547, + "step": 899 + }, + { + "epoch": 0.2776492364645997, + "grad_norm": 0.0, + "learning_rate": 1.695528671569461e-05, + "loss": 0.8662, + "step": 900 + }, + { + "epoch": 0.27795773561622705, + "grad_norm": 0.0, + "learning_rate": 1.694810147617626e-05, + "loss": 0.8293, + "step": 901 + }, + { + "epoch": 0.2782662347678544, + "grad_norm": 0.0, + "learning_rate": 1.6940909294777945e-05, + "loss": 0.8625, + "step": 902 + }, + { + "epoch": 0.27857473391948173, + "grad_norm": 0.0, + "learning_rate": 1.6933710178685406e-05, + "loss": 0.8169, + "step": 903 + }, + { + "epoch": 0.27888323307110907, + "grad_norm": 0.0, + "learning_rate": 1.6926504135091315e-05, + "loss": 0.7649, + "step": 904 + }, + { + "epoch": 0.2791917322227364, + "grad_norm": 0.0, + "learning_rate": 1.691929117119526e-05, + "loss": 0.8727, + "step": 905 + }, + { + "epoch": 0.2795002313743637, + "grad_norm": 0.0, + "learning_rate": 1.6912071294203746e-05, + "loss": 0.8505, + "step": 906 + }, + { + "epoch": 0.27980873052599103, + "grad_norm": 0.0, + "learning_rate": 1.690484451133019e-05, + "loss": 0.817, + "step": 907 + }, + { + "epoch": 0.2801172296776184, + "grad_norm": 0.0, + "learning_rate": 1.6897610829794898e-05, + "loss": 0.8791, + "step": 908 + }, + { + "epoch": 0.2804257288292457, + "grad_norm": 0.0, + "learning_rate": 1.6890370256825077e-05, + "loss": 0.7989, + "step": 909 + }, + { + "epoch": 0.28073422798087305, + "grad_norm": 0.0, + "learning_rate": 1.6883122799654814e-05, + "loss": 0.8845, + "step": 910 + }, + { + "epoch": 0.2810427271325004, + "grad_norm": 0.0, + "learning_rate": 1.6875868465525084e-05, + "loss": 0.8552, + "step": 911 + }, + { + "epoch": 0.28135122628412773, + "grad_norm": 0.0, + "learning_rate": 1.6868607261683716e-05, + "loss": 0.9005, + "step": 912 + }, + { + "epoch": 0.28165972543575507, + "grad_norm": 0.0, + "learning_rate": 1.686133919538542e-05, + "loss": 0.8005, + "step": 913 + }, + { + "epoch": 0.2819682245873824, + "grad_norm": 0.0, + "learning_rate": 1.685406427389175e-05, + "loss": 0.8123, + "step": 914 + }, + { + "epoch": 0.2822767237390097, + "grad_norm": 0.0, + "learning_rate": 1.6846782504471112e-05, + "loss": 0.8071, + "step": 915 + }, + { + "epoch": 0.28258522289063703, + "grad_norm": 0.0, + "learning_rate": 1.6839493894398753e-05, + "loss": 0.8606, + "step": 916 + }, + { + "epoch": 0.2828937220422644, + "grad_norm": 0.0, + "learning_rate": 1.6832198450956766e-05, + "loss": 0.8498, + "step": 917 + }, + { + "epoch": 0.2832022211938917, + "grad_norm": 0.0, + "learning_rate": 1.6824896181434055e-05, + "loss": 0.8226, + "step": 918 + }, + { + "epoch": 0.28351072034551905, + "grad_norm": 0.0, + "learning_rate": 1.6817587093126354e-05, + "loss": 0.9171, + "step": 919 + }, + { + "epoch": 0.2838192194971464, + "grad_norm": 0.0, + "learning_rate": 1.6810271193336203e-05, + "loss": 0.7303, + "step": 920 + }, + { + "epoch": 0.28412771864877373, + "grad_norm": 0.0, + "learning_rate": 1.6802948489372956e-05, + "loss": 0.8212, + "step": 921 + }, + { + "epoch": 0.28443621780040107, + "grad_norm": 0.0, + "learning_rate": 1.6795618988552754e-05, + "loss": 0.7725, + "step": 922 + }, + { + "epoch": 0.2847447169520284, + "grad_norm": 0.0, + "learning_rate": 1.6788282698198536e-05, + "loss": 0.8705, + "step": 923 + }, + { + "epoch": 0.2850532161036557, + "grad_norm": 0.0, + "learning_rate": 1.678093962564003e-05, + "loss": 0.7047, + "step": 924 + }, + { + "epoch": 0.28536171525528303, + "grad_norm": 0.0, + "learning_rate": 1.6773589778213724e-05, + "loss": 0.7995, + "step": 925 + }, + { + "epoch": 0.28567021440691037, + "grad_norm": 0.0, + "learning_rate": 1.6766233163262893e-05, + "loss": 0.8124, + "step": 926 + }, + { + "epoch": 0.2859787135585377, + "grad_norm": 0.0, + "learning_rate": 1.675886978813756e-05, + "loss": 0.9003, + "step": 927 + }, + { + "epoch": 0.28628721271016505, + "grad_norm": 0.0, + "learning_rate": 1.6751499660194502e-05, + "loss": 0.8927, + "step": 928 + }, + { + "epoch": 0.2865957118617924, + "grad_norm": 0.0, + "learning_rate": 1.6744122786797254e-05, + "loss": 0.8025, + "step": 929 + }, + { + "epoch": 0.28690421101341973, + "grad_norm": 0.0, + "learning_rate": 1.6736739175316086e-05, + "loss": 0.9125, + "step": 930 + }, + { + "epoch": 0.28721271016504707, + "grad_norm": 0.0, + "learning_rate": 1.672934883312799e-05, + "loss": 0.8202, + "step": 931 + }, + { + "epoch": 0.28752120931667435, + "grad_norm": 0.0, + "learning_rate": 1.6721951767616696e-05, + "loss": 0.8578, + "step": 932 + }, + { + "epoch": 0.2878297084683017, + "grad_norm": 0.0, + "learning_rate": 1.671454798617265e-05, + "loss": 0.8437, + "step": 933 + }, + { + "epoch": 0.28813820761992903, + "grad_norm": 0.0, + "learning_rate": 1.6707137496192994e-05, + "loss": 0.8265, + "step": 934 + }, + { + "epoch": 0.28844670677155637, + "grad_norm": 0.0, + "learning_rate": 1.669972030508159e-05, + "loss": 0.8087, + "step": 935 + }, + { + "epoch": 0.2887552059231837, + "grad_norm": 0.0, + "learning_rate": 1.6692296420248985e-05, + "loss": 0.7741, + "step": 936 + }, + { + "epoch": 0.28906370507481105, + "grad_norm": 0.0, + "learning_rate": 1.6684865849112414e-05, + "loss": 0.8904, + "step": 937 + }, + { + "epoch": 0.2893722042264384, + "grad_norm": 0.0, + "learning_rate": 1.6677428599095796e-05, + "loss": 0.867, + "step": 938 + }, + { + "epoch": 0.28968070337806573, + "grad_norm": 0.0, + "learning_rate": 1.666998467762973e-05, + "loss": 0.765, + "step": 939 + }, + { + "epoch": 0.28998920252969307, + "grad_norm": 0.0, + "learning_rate": 1.6662534092151457e-05, + "loss": 0.7802, + "step": 940 + }, + { + "epoch": 0.29029770168132035, + "grad_norm": 0.0, + "learning_rate": 1.6655076850104902e-05, + "loss": 0.8506, + "step": 941 + }, + { + "epoch": 0.2906062008329477, + "grad_norm": 0.0, + "learning_rate": 1.6647612958940622e-05, + "loss": 0.8015, + "step": 942 + }, + { + "epoch": 0.29091469998457503, + "grad_norm": 0.0, + "learning_rate": 1.6640142426115833e-05, + "loss": 0.8601, + "step": 943 + }, + { + "epoch": 0.29122319913620237, + "grad_norm": 0.0, + "learning_rate": 1.663266525909437e-05, + "loss": 0.8014, + "step": 944 + }, + { + "epoch": 0.2915316982878297, + "grad_norm": 0.0, + "learning_rate": 1.6625181465346717e-05, + "loss": 0.7378, + "step": 945 + }, + { + "epoch": 0.29184019743945705, + "grad_norm": 0.0, + "learning_rate": 1.6617691052349954e-05, + "loss": 0.9138, + "step": 946 + }, + { + "epoch": 0.2921486965910844, + "grad_norm": 0.0, + "learning_rate": 1.661019402758779e-05, + "loss": 0.8472, + "step": 947 + }, + { + "epoch": 0.29245719574271173, + "grad_norm": 0.0, + "learning_rate": 1.6602690398550542e-05, + "loss": 0.8478, + "step": 948 + }, + { + "epoch": 0.29276569489433907, + "grad_norm": 0.0, + "learning_rate": 1.6595180172735116e-05, + "loss": 0.8009, + "step": 949 + }, + { + "epoch": 0.29307419404596635, + "grad_norm": 0.0, + "learning_rate": 1.658766335764501e-05, + "loss": 0.8826, + "step": 950 + }, + { + "epoch": 0.2933826931975937, + "grad_norm": 0.0, + "learning_rate": 1.6580139960790316e-05, + "loss": 0.89, + "step": 951 + }, + { + "epoch": 0.29369119234922103, + "grad_norm": 0.0, + "learning_rate": 1.6572609989687687e-05, + "loss": 0.8615, + "step": 952 + }, + { + "epoch": 0.29399969150084837, + "grad_norm": 0.0, + "learning_rate": 1.6565073451860355e-05, + "loss": 0.8855, + "step": 953 + }, + { + "epoch": 0.2943081906524757, + "grad_norm": 0.0, + "learning_rate": 1.6557530354838108e-05, + "loss": 0.824, + "step": 954 + }, + { + "epoch": 0.29461668980410305, + "grad_norm": 0.0, + "learning_rate": 1.6549980706157295e-05, + "loss": 0.8551, + "step": 955 + }, + { + "epoch": 0.2949251889557304, + "grad_norm": 0.0, + "learning_rate": 1.6542424513360793e-05, + "loss": 0.8421, + "step": 956 + }, + { + "epoch": 0.29523368810735773, + "grad_norm": 0.0, + "learning_rate": 1.653486178399804e-05, + "loss": 0.8077, + "step": 957 + }, + { + "epoch": 0.295542187258985, + "grad_norm": 0.0, + "learning_rate": 1.6527292525624986e-05, + "loss": 0.8392, + "step": 958 + }, + { + "epoch": 0.29585068641061235, + "grad_norm": 0.0, + "learning_rate": 1.6519716745804112e-05, + "loss": 0.7634, + "step": 959 + }, + { + "epoch": 0.2961591855622397, + "grad_norm": 0.0, + "learning_rate": 1.651213445210442e-05, + "loss": 0.7943, + "step": 960 + }, + { + "epoch": 0.29646768471386703, + "grad_norm": 0.0, + "learning_rate": 1.650454565210141e-05, + "loss": 0.848, + "step": 961 + }, + { + "epoch": 0.29677618386549437, + "grad_norm": 0.0, + "learning_rate": 1.649695035337709e-05, + "loss": 0.8384, + "step": 962 + }, + { + "epoch": 0.2970846830171217, + "grad_norm": 0.0, + "learning_rate": 1.648934856351995e-05, + "loss": 0.8023, + "step": 963 + }, + { + "epoch": 0.29739318216874905, + "grad_norm": 0.0, + "learning_rate": 1.648174029012498e-05, + "loss": 0.872, + "step": 964 + }, + { + "epoch": 0.2977016813203764, + "grad_norm": 0.0, + "learning_rate": 1.647412554079364e-05, + "loss": 0.7895, + "step": 965 + }, + { + "epoch": 0.2980101804720037, + "grad_norm": 0.0, + "learning_rate": 1.6466504323133857e-05, + "loss": 0.8942, + "step": 966 + }, + { + "epoch": 0.298318679623631, + "grad_norm": 0.0, + "learning_rate": 1.6458876644760033e-05, + "loss": 0.877, + "step": 967 + }, + { + "epoch": 0.29862717877525835, + "grad_norm": 0.0, + "learning_rate": 1.6451242513293005e-05, + "loss": 0.7891, + "step": 968 + }, + { + "epoch": 0.2989356779268857, + "grad_norm": 0.0, + "learning_rate": 1.644360193636008e-05, + "loss": 0.863, + "step": 969 + }, + { + "epoch": 0.29924417707851303, + "grad_norm": 0.0, + "learning_rate": 1.6435954921594985e-05, + "loss": 0.8448, + "step": 970 + }, + { + "epoch": 0.29955267623014037, + "grad_norm": 0.0, + "learning_rate": 1.642830147663789e-05, + "loss": 0.8145, + "step": 971 + }, + { + "epoch": 0.2998611753817677, + "grad_norm": 0.0, + "learning_rate": 1.6420641609135388e-05, + "loss": 0.8186, + "step": 972 + }, + { + "epoch": 0.30016967453339505, + "grad_norm": 0.0, + "learning_rate": 1.6412975326740485e-05, + "loss": 0.7535, + "step": 973 + }, + { + "epoch": 0.3004781736850224, + "grad_norm": 0.0, + "learning_rate": 1.6405302637112598e-05, + "loss": 0.8412, + "step": 974 + }, + { + "epoch": 0.3007866728366497, + "grad_norm": 0.0, + "learning_rate": 1.6397623547917553e-05, + "loss": 0.7729, + "step": 975 + }, + { + "epoch": 0.301095171988277, + "grad_norm": 0.0, + "learning_rate": 1.6389938066827556e-05, + "loss": 0.7915, + "step": 976 + }, + { + "epoch": 0.30140367113990435, + "grad_norm": 0.0, + "learning_rate": 1.6382246201521213e-05, + "loss": 0.7591, + "step": 977 + }, + { + "epoch": 0.3017121702915317, + "grad_norm": 0.0, + "learning_rate": 1.6374547959683497e-05, + "loss": 0.8292, + "step": 978 + }, + { + "epoch": 0.30202066944315903, + "grad_norm": 0.0, + "learning_rate": 1.6366843349005755e-05, + "loss": 0.7889, + "step": 979 + }, + { + "epoch": 0.30232916859478637, + "grad_norm": 0.0, + "learning_rate": 1.63591323771857e-05, + "loss": 0.7864, + "step": 980 + }, + { + "epoch": 0.3026376677464137, + "grad_norm": 0.0, + "learning_rate": 1.6351415051927407e-05, + "loss": 0.8059, + "step": 981 + }, + { + "epoch": 0.30294616689804105, + "grad_norm": 0.0, + "learning_rate": 1.634369138094128e-05, + "loss": 0.8261, + "step": 982 + }, + { + "epoch": 0.3032546660496684, + "grad_norm": 0.0, + "learning_rate": 1.6335961371944084e-05, + "loss": 0.8465, + "step": 983 + }, + { + "epoch": 0.30356316520129567, + "grad_norm": 0.0, + "learning_rate": 1.6328225032658892e-05, + "loss": 0.8349, + "step": 984 + }, + { + "epoch": 0.303871664352923, + "grad_norm": 0.0, + "learning_rate": 1.6320482370815132e-05, + "loss": 0.9298, + "step": 985 + }, + { + "epoch": 0.30418016350455035, + "grad_norm": 0.0, + "learning_rate": 1.6312733394148524e-05, + "loss": 0.7644, + "step": 986 + }, + { + "epoch": 0.3044886626561777, + "grad_norm": 0.0, + "learning_rate": 1.6304978110401106e-05, + "loss": 0.8733, + "step": 987 + }, + { + "epoch": 0.30479716180780503, + "grad_norm": 0.0, + "learning_rate": 1.6297216527321223e-05, + "loss": 0.7945, + "step": 988 + }, + { + "epoch": 0.30510566095943237, + "grad_norm": 0.0, + "learning_rate": 1.62894486526635e-05, + "loss": 0.8094, + "step": 989 + }, + { + "epoch": 0.3054141601110597, + "grad_norm": 0.0, + "learning_rate": 1.6281674494188863e-05, + "loss": 0.8753, + "step": 990 + }, + { + "epoch": 0.30572265926268705, + "grad_norm": 0.0, + "learning_rate": 1.6273894059664507e-05, + "loss": 0.7734, + "step": 991 + }, + { + "epoch": 0.3060311584143144, + "grad_norm": 0.0, + "learning_rate": 1.62661073568639e-05, + "loss": 0.8767, + "step": 992 + }, + { + "epoch": 0.30633965756594167, + "grad_norm": 0.0, + "learning_rate": 1.625831439356677e-05, + "loss": 0.843, + "step": 993 + }, + { + "epoch": 0.306648156717569, + "grad_norm": 0.0, + "learning_rate": 1.6250515177559106e-05, + "loss": 0.8475, + "step": 994 + }, + { + "epoch": 0.30695665586919635, + "grad_norm": 0.0, + "learning_rate": 1.6242709716633137e-05, + "loss": 0.806, + "step": 995 + }, + { + "epoch": 0.3072651550208237, + "grad_norm": 0.0, + "learning_rate": 1.6234898018587336e-05, + "loss": 0.8225, + "step": 996 + }, + { + "epoch": 0.307573654172451, + "grad_norm": 0.0, + "learning_rate": 1.622708009122641e-05, + "loss": 0.8084, + "step": 997 + }, + { + "epoch": 0.30788215332407837, + "grad_norm": 0.0, + "learning_rate": 1.621925594236128e-05, + "loss": 0.8576, + "step": 998 + }, + { + "epoch": 0.3081906524757057, + "grad_norm": 0.0, + "learning_rate": 1.621142557980909e-05, + "loss": 0.917, + "step": 999 + }, + { + "epoch": 0.30849915162733305, + "grad_norm": 0.0, + "learning_rate": 1.6203589011393198e-05, + "loss": 0.8328, + "step": 1000 + }, + { + "epoch": 0.3088076507789604, + "grad_norm": 0.0, + "learning_rate": 1.6195746244943142e-05, + "loss": 1.1333, + "step": 1001 + }, + { + "epoch": 0.30911614993058767, + "grad_norm": 0.0, + "learning_rate": 1.618789728829468e-05, + "loss": 0.8886, + "step": 1002 + }, + { + "epoch": 0.309424649082215, + "grad_norm": 0.0, + "learning_rate": 1.618004214928973e-05, + "loss": 0.7299, + "step": 1003 + }, + { + "epoch": 0.30973314823384235, + "grad_norm": 0.0, + "learning_rate": 1.6172180835776404e-05, + "loss": 0.8005, + "step": 1004 + }, + { + "epoch": 0.3100416473854697, + "grad_norm": 0.0, + "learning_rate": 1.6164313355608974e-05, + "loss": 0.7836, + "step": 1005 + }, + { + "epoch": 0.310350146537097, + "grad_norm": 0.0, + "learning_rate": 1.6156439716647875e-05, + "loss": 0.8636, + "step": 1006 + }, + { + "epoch": 0.31065864568872437, + "grad_norm": 0.0, + "learning_rate": 1.6148559926759694e-05, + "loss": 0.8338, + "step": 1007 + }, + { + "epoch": 0.3109671448403517, + "grad_norm": 0.0, + "learning_rate": 1.614067399381717e-05, + "loss": 0.8371, + "step": 1008 + }, + { + "epoch": 0.31127564399197905, + "grad_norm": 0.0, + "learning_rate": 1.6132781925699168e-05, + "loss": 1.1121, + "step": 1009 + }, + { + "epoch": 0.31158414314360633, + "grad_norm": 0.0, + "learning_rate": 1.6124883730290695e-05, + "loss": 0.8098, + "step": 1010 + }, + { + "epoch": 0.31189264229523367, + "grad_norm": 0.0, + "learning_rate": 1.6116979415482875e-05, + "loss": 0.9123, + "step": 1011 + }, + { + "epoch": 0.312201141446861, + "grad_norm": 0.0, + "learning_rate": 1.6109068989172937e-05, + "loss": 0.8123, + "step": 1012 + }, + { + "epoch": 0.31250964059848835, + "grad_norm": 0.0, + "learning_rate": 1.610115245926423e-05, + "loss": 0.8503, + "step": 1013 + }, + { + "epoch": 0.3128181397501157, + "grad_norm": 0.0, + "learning_rate": 1.60932298336662e-05, + "loss": 0.7881, + "step": 1014 + }, + { + "epoch": 0.313126638901743, + "grad_norm": 0.0, + "learning_rate": 1.608530112029437e-05, + "loss": 0.786, + "step": 1015 + }, + { + "epoch": 0.31343513805337037, + "grad_norm": 0.0, + "learning_rate": 1.6077366327070354e-05, + "loss": 0.8752, + "step": 1016 + }, + { + "epoch": 0.3137436372049977, + "grad_norm": 0.0, + "learning_rate": 1.606942546192185e-05, + "loss": 0.8288, + "step": 1017 + }, + { + "epoch": 0.31405213635662504, + "grad_norm": 0.0, + "learning_rate": 1.60614785327826e-05, + "loss": 0.8726, + "step": 1018 + }, + { + "epoch": 0.31436063550825233, + "grad_norm": 0.0, + "learning_rate": 1.6053525547592424e-05, + "loss": 0.7704, + "step": 1019 + }, + { + "epoch": 0.31466913465987967, + "grad_norm": 0.0, + "learning_rate": 1.6045566514297184e-05, + "loss": 0.8248, + "step": 1020 + }, + { + "epoch": 0.314977633811507, + "grad_norm": 0.0, + "learning_rate": 1.603760144084879e-05, + "loss": 0.8371, + "step": 1021 + }, + { + "epoch": 0.31528613296313435, + "grad_norm": 0.0, + "learning_rate": 1.602963033520518e-05, + "loss": 0.829, + "step": 1022 + }, + { + "epoch": 0.3155946321147617, + "grad_norm": 0.0, + "learning_rate": 1.602165320533032e-05, + "loss": 0.7444, + "step": 1023 + }, + { + "epoch": 0.315903131266389, + "grad_norm": 0.0, + "learning_rate": 1.6013670059194203e-05, + "loss": 0.7885, + "step": 1024 + }, + { + "epoch": 0.31621163041801637, + "grad_norm": 0.0, + "learning_rate": 1.6005680904772822e-05, + "loss": 0.7537, + "step": 1025 + }, + { + "epoch": 0.3165201295696437, + "grad_norm": 0.0, + "learning_rate": 1.5997685750048183e-05, + "loss": 0.8151, + "step": 1026 + }, + { + "epoch": 0.31682862872127104, + "grad_norm": 0.0, + "learning_rate": 1.5989684603008274e-05, + "loss": 0.7634, + "step": 1027 + }, + { + "epoch": 0.31713712787289833, + "grad_norm": 0.0, + "learning_rate": 1.5981677471647085e-05, + "loss": 0.802, + "step": 1028 + }, + { + "epoch": 0.31744562702452567, + "grad_norm": 0.0, + "learning_rate": 1.5973664363964573e-05, + "loss": 0.811, + "step": 1029 + }, + { + "epoch": 0.317754126176153, + "grad_norm": 0.0, + "learning_rate": 1.5965645287966674e-05, + "loss": 0.8126, + "step": 1030 + }, + { + "epoch": 0.31806262532778035, + "grad_norm": 0.0, + "learning_rate": 1.5957620251665272e-05, + "loss": 0.8259, + "step": 1031 + }, + { + "epoch": 0.3183711244794077, + "grad_norm": 0.0, + "learning_rate": 1.594958926307824e-05, + "loss": 0.7898, + "step": 1032 + }, + { + "epoch": 0.318679623631035, + "grad_norm": 0.0, + "learning_rate": 1.5941552330229352e-05, + "loss": 0.8517, + "step": 1033 + }, + { + "epoch": 0.31898812278266236, + "grad_norm": 0.0, + "learning_rate": 1.593350946114836e-05, + "loss": 0.7696, + "step": 1034 + }, + { + "epoch": 0.3192966219342897, + "grad_norm": 0.0, + "learning_rate": 1.592546066387092e-05, + "loss": 0.7539, + "step": 1035 + }, + { + "epoch": 0.319605121085917, + "grad_norm": 0.0, + "learning_rate": 1.5917405946438635e-05, + "loss": 0.9824, + "step": 1036 + }, + { + "epoch": 0.3199136202375443, + "grad_norm": 0.0, + "learning_rate": 1.5909345316899e-05, + "loss": 0.862, + "step": 1037 + }, + { + "epoch": 0.32022211938917167, + "grad_norm": 0.0, + "learning_rate": 1.590127878330543e-05, + "loss": 0.8534, + "step": 1038 + }, + { + "epoch": 0.320530618540799, + "grad_norm": 0.0, + "learning_rate": 1.5893206353717234e-05, + "loss": 0.8594, + "step": 1039 + }, + { + "epoch": 0.32083911769242635, + "grad_norm": 0.0, + "learning_rate": 1.5885128036199615e-05, + "loss": 0.8474, + "step": 1040 + }, + { + "epoch": 0.3211476168440537, + "grad_norm": 0.0, + "learning_rate": 1.587704383882366e-05, + "loss": 0.7864, + "step": 1041 + }, + { + "epoch": 0.321456115995681, + "grad_norm": 0.0, + "learning_rate": 1.586895376966632e-05, + "loss": 0.9179, + "step": 1042 + }, + { + "epoch": 0.32176461514730836, + "grad_norm": 0.0, + "learning_rate": 1.5860857836810427e-05, + "loss": 0.7946, + "step": 1043 + }, + { + "epoch": 0.3220731142989357, + "grad_norm": 0.0, + "learning_rate": 1.585275604834466e-05, + "loss": 0.8053, + "step": 1044 + }, + { + "epoch": 0.322381613450563, + "grad_norm": 0.0, + "learning_rate": 1.584464841236356e-05, + "loss": 0.8583, + "step": 1045 + }, + { + "epoch": 0.3226901126021903, + "grad_norm": 0.0, + "learning_rate": 1.5836534936967493e-05, + "loss": 0.796, + "step": 1046 + }, + { + "epoch": 0.32299861175381767, + "grad_norm": 0.0, + "learning_rate": 1.5828415630262678e-05, + "loss": 0.864, + "step": 1047 + }, + { + "epoch": 0.323307110905445, + "grad_norm": 0.0, + "learning_rate": 1.5820290500361147e-05, + "loss": 0.791, + "step": 1048 + }, + { + "epoch": 0.32361561005707234, + "grad_norm": 0.0, + "learning_rate": 1.5812159555380752e-05, + "loss": 0.8613, + "step": 1049 + }, + { + "epoch": 0.3239241092086997, + "grad_norm": 0.0, + "learning_rate": 1.5804022803445164e-05, + "loss": 0.9215, + "step": 1050 + }, + { + "epoch": 0.324232608360327, + "grad_norm": 0.0, + "learning_rate": 1.5795880252683848e-05, + "loss": 0.8645, + "step": 1051 + }, + { + "epoch": 0.32454110751195436, + "grad_norm": 0.0, + "learning_rate": 1.5787731911232057e-05, + "loss": 0.8505, + "step": 1052 + }, + { + "epoch": 0.3248496066635817, + "grad_norm": 0.0, + "learning_rate": 1.5779577787230843e-05, + "loss": 0.8863, + "step": 1053 + }, + { + "epoch": 0.325158105815209, + "grad_norm": 0.0, + "learning_rate": 1.5771417888827026e-05, + "loss": 0.7677, + "step": 1054 + }, + { + "epoch": 0.3254666049668363, + "grad_norm": 0.0, + "learning_rate": 1.5763252224173196e-05, + "loss": 0.7598, + "step": 1055 + }, + { + "epoch": 0.32577510411846367, + "grad_norm": 0.0, + "learning_rate": 1.575508080142771e-05, + "loss": 0.8464, + "step": 1056 + }, + { + "epoch": 0.326083603270091, + "grad_norm": 0.0, + "learning_rate": 1.5746903628754672e-05, + "loss": 0.7798, + "step": 1057 + }, + { + "epoch": 0.32639210242171834, + "grad_norm": 0.0, + "learning_rate": 1.5738720714323935e-05, + "loss": 0.8526, + "step": 1058 + }, + { + "epoch": 0.3267006015733457, + "grad_norm": 0.0, + "learning_rate": 1.573053206631108e-05, + "loss": 0.8373, + "step": 1059 + }, + { + "epoch": 0.327009100724973, + "grad_norm": 0.0, + "learning_rate": 1.5722337692897428e-05, + "loss": 0.7411, + "step": 1060 + }, + { + "epoch": 0.32731759987660036, + "grad_norm": 0.0, + "learning_rate": 1.571413760227001e-05, + "loss": 0.7517, + "step": 1061 + }, + { + "epoch": 0.32762609902822765, + "grad_norm": 0.0, + "learning_rate": 1.5705931802621583e-05, + "loss": 0.7368, + "step": 1062 + }, + { + "epoch": 0.327934598179855, + "grad_norm": 0.0, + "learning_rate": 1.569772030215059e-05, + "loss": 0.8144, + "step": 1063 + }, + { + "epoch": 0.3282430973314823, + "grad_norm": 0.0, + "learning_rate": 1.5689503109061185e-05, + "loss": 0.8972, + "step": 1064 + }, + { + "epoch": 0.32855159648310966, + "grad_norm": 0.0, + "learning_rate": 1.5681280231563196e-05, + "loss": 0.8487, + "step": 1065 + }, + { + "epoch": 0.328860095634737, + "grad_norm": 0.0, + "learning_rate": 1.5673051677872143e-05, + "loss": 0.8097, + "step": 1066 + }, + { + "epoch": 0.32916859478636434, + "grad_norm": 0.0, + "learning_rate": 1.566481745620921e-05, + "loss": 0.8631, + "step": 1067 + }, + { + "epoch": 0.3294770939379917, + "grad_norm": 0.0, + "learning_rate": 1.565657757480125e-05, + "loss": 0.7832, + "step": 1068 + }, + { + "epoch": 0.329785593089619, + "grad_norm": 0.0, + "learning_rate": 1.564833204188076e-05, + "loss": 0.8752, + "step": 1069 + }, + { + "epoch": 0.33009409224124636, + "grad_norm": 0.0, + "learning_rate": 1.5640080865685888e-05, + "loss": 0.8046, + "step": 1070 + }, + { + "epoch": 0.33040259139287365, + "grad_norm": 0.0, + "learning_rate": 1.563182405446043e-05, + "loss": 0.8132, + "step": 1071 + }, + { + "epoch": 0.330711090544501, + "grad_norm": 0.0, + "learning_rate": 1.5623561616453798e-05, + "loss": 0.8263, + "step": 1072 + }, + { + "epoch": 0.3310195896961283, + "grad_norm": 0.0, + "learning_rate": 1.5615293559921037e-05, + "loss": 0.8533, + "step": 1073 + }, + { + "epoch": 0.33132808884775566, + "grad_norm": 0.0, + "learning_rate": 1.5607019893122792e-05, + "loss": 0.7371, + "step": 1074 + }, + { + "epoch": 0.331636587999383, + "grad_norm": 0.0, + "learning_rate": 1.5598740624325325e-05, + "loss": 0.9294, + "step": 1075 + }, + { + "epoch": 0.33194508715101034, + "grad_norm": 0.0, + "learning_rate": 1.5590455761800494e-05, + "loss": 0.7979, + "step": 1076 + }, + { + "epoch": 0.3322535863026377, + "grad_norm": 0.0, + "learning_rate": 1.558216531382574e-05, + "loss": 0.8072, + "step": 1077 + }, + { + "epoch": 0.332562085454265, + "grad_norm": 0.0, + "learning_rate": 1.5573869288684087e-05, + "loss": 0.8374, + "step": 1078 + }, + { + "epoch": 0.33287058460589236, + "grad_norm": 0.0, + "learning_rate": 1.556556769466414e-05, + "loss": 0.7954, + "step": 1079 + }, + { + "epoch": 0.33317908375751965, + "grad_norm": 0.0, + "learning_rate": 1.5557260540060047e-05, + "loss": 0.7761, + "step": 1080 + }, + { + "epoch": 0.333487582909147, + "grad_norm": 0.0, + "learning_rate": 1.554894783317153e-05, + "loss": 0.8671, + "step": 1081 + }, + { + "epoch": 0.3337960820607743, + "grad_norm": 0.0, + "learning_rate": 1.554062958230385e-05, + "loss": 0.8552, + "step": 1082 + }, + { + "epoch": 0.33410458121240166, + "grad_norm": 0.0, + "learning_rate": 1.5532305795767817e-05, + "loss": 0.844, + "step": 1083 + }, + { + "epoch": 0.334413080364029, + "grad_norm": 0.0, + "learning_rate": 1.5523976481879754e-05, + "loss": 0.7294, + "step": 1084 + }, + { + "epoch": 0.33472157951565634, + "grad_norm": 0.0, + "learning_rate": 1.5515641648961526e-05, + "loss": 0.8419, + "step": 1085 + }, + { + "epoch": 0.3350300786672837, + "grad_norm": 0.0, + "learning_rate": 1.5507301305340496e-05, + "loss": 0.8752, + "step": 1086 + }, + { + "epoch": 0.335338577818911, + "grad_norm": 0.0, + "learning_rate": 1.549895545934954e-05, + "loss": 0.7934, + "step": 1087 + }, + { + "epoch": 0.3356470769705383, + "grad_norm": 0.0, + "learning_rate": 1.549060411932704e-05, + "loss": 0.928, + "step": 1088 + }, + { + "epoch": 0.33595557612216564, + "grad_norm": 0.0, + "learning_rate": 1.5482247293616843e-05, + "loss": 0.859, + "step": 1089 + }, + { + "epoch": 0.336264075273793, + "grad_norm": 0.0, + "learning_rate": 1.5473884990568298e-05, + "loss": 0.9184, + "step": 1090 + }, + { + "epoch": 0.3365725744254203, + "grad_norm": 0.0, + "learning_rate": 1.5465517218536228e-05, + "loss": 0.7502, + "step": 1091 + }, + { + "epoch": 0.33688107357704766, + "grad_norm": 0.0, + "learning_rate": 1.5457143985880905e-05, + "loss": 0.7997, + "step": 1092 + }, + { + "epoch": 0.337189572728675, + "grad_norm": 0.0, + "learning_rate": 1.5448765300968066e-05, + "loss": 0.7976, + "step": 1093 + }, + { + "epoch": 0.33749807188030234, + "grad_norm": 0.0, + "learning_rate": 1.544038117216889e-05, + "loss": 0.7606, + "step": 1094 + }, + { + "epoch": 0.3378065710319297, + "grad_norm": 0.0, + "learning_rate": 1.5431991607859997e-05, + "loss": 0.782, + "step": 1095 + }, + { + "epoch": 0.338115070183557, + "grad_norm": 0.0, + "learning_rate": 1.542359661642345e-05, + "loss": 0.7834, + "step": 1096 + }, + { + "epoch": 0.3384235693351843, + "grad_norm": 0.0, + "learning_rate": 1.5415196206246712e-05, + "loss": 0.8193, + "step": 1097 + }, + { + "epoch": 0.33873206848681164, + "grad_norm": 0.0, + "learning_rate": 1.5406790385722676e-05, + "loss": 0.8953, + "step": 1098 + }, + { + "epoch": 0.339040567638439, + "grad_norm": 0.0, + "learning_rate": 1.5398379163249636e-05, + "loss": 0.7241, + "step": 1099 + }, + { + "epoch": 0.3393490667900663, + "grad_norm": 0.0, + "learning_rate": 1.5389962547231286e-05, + "loss": 0.8359, + "step": 1100 + }, + { + "epoch": 0.33965756594169366, + "grad_norm": 0.0, + "learning_rate": 1.5381540546076694e-05, + "loss": 0.7672, + "step": 1101 + }, + { + "epoch": 0.339966065093321, + "grad_norm": 0.0, + "learning_rate": 1.5373113168200332e-05, + "loss": 0.7012, + "step": 1102 + }, + { + "epoch": 0.34027456424494834, + "grad_norm": 0.0, + "learning_rate": 1.536468042202203e-05, + "loss": 0.7484, + "step": 1103 + }, + { + "epoch": 0.3405830633965757, + "grad_norm": 0.0, + "learning_rate": 1.5356242315966974e-05, + "loss": 0.8494, + "step": 1104 + }, + { + "epoch": 0.340891562548203, + "grad_norm": 0.0, + "learning_rate": 1.5347798858465727e-05, + "loss": 0.8927, + "step": 1105 + }, + { + "epoch": 0.3412000616998303, + "grad_norm": 0.0, + "learning_rate": 1.5339350057954178e-05, + "loss": 0.8585, + "step": 1106 + }, + { + "epoch": 0.34150856085145764, + "grad_norm": 0.0, + "learning_rate": 1.5330895922873562e-05, + "loss": 0.8511, + "step": 1107 + }, + { + "epoch": 0.341817060003085, + "grad_norm": 0.0, + "learning_rate": 1.5322436461670445e-05, + "loss": 0.8373, + "step": 1108 + }, + { + "epoch": 0.3421255591547123, + "grad_norm": 0.0, + "learning_rate": 1.531397168279672e-05, + "loss": 0.7488, + "step": 1109 + }, + { + "epoch": 0.34243405830633966, + "grad_norm": 0.0, + "learning_rate": 1.5305501594709578e-05, + "loss": 0.78, + "step": 1110 + }, + { + "epoch": 0.342742557457967, + "grad_norm": 0.0, + "learning_rate": 1.5297026205871528e-05, + "loss": 0.8525, + "step": 1111 + }, + { + "epoch": 0.34305105660959434, + "grad_norm": 0.0, + "learning_rate": 1.5288545524750366e-05, + "loss": 0.7766, + "step": 1112 + }, + { + "epoch": 0.3433595557612217, + "grad_norm": 0.0, + "learning_rate": 1.5280059559819177e-05, + "loss": 0.7915, + "step": 1113 + }, + { + "epoch": 0.34366805491284896, + "grad_norm": 0.0, + "learning_rate": 1.5271568319556336e-05, + "loss": 0.8904, + "step": 1114 + }, + { + "epoch": 0.3439765540644763, + "grad_norm": 0.0, + "learning_rate": 1.5263071812445475e-05, + "loss": 0.7938, + "step": 1115 + }, + { + "epoch": 0.34428505321610364, + "grad_norm": 0.0, + "learning_rate": 1.525457004697549e-05, + "loss": 0.7644, + "step": 1116 + }, + { + "epoch": 0.344593552367731, + "grad_norm": 0.0, + "learning_rate": 1.524606303164054e-05, + "loss": 0.829, + "step": 1117 + }, + { + "epoch": 0.3449020515193583, + "grad_norm": 0.0, + "learning_rate": 1.5237550774940018e-05, + "loss": 0.8274, + "step": 1118 + }, + { + "epoch": 0.34521055067098566, + "grad_norm": 0.0, + "learning_rate": 1.522903328537856e-05, + "loss": 0.7864, + "step": 1119 + }, + { + "epoch": 0.345519049822613, + "grad_norm": 0.0, + "learning_rate": 1.522051057146603e-05, + "loss": 0.7428, + "step": 1120 + }, + { + "epoch": 0.34582754897424034, + "grad_norm": 0.0, + "learning_rate": 1.5211982641717509e-05, + "loss": 0.7813, + "step": 1121 + }, + { + "epoch": 0.3461360481258677, + "grad_norm": 0.0, + "learning_rate": 1.5203449504653294e-05, + "loss": 0.8042, + "step": 1122 + }, + { + "epoch": 0.34644454727749496, + "grad_norm": 0.0, + "learning_rate": 1.5194911168798876e-05, + "loss": 0.8232, + "step": 1123 + }, + { + "epoch": 0.3467530464291223, + "grad_norm": 0.0, + "learning_rate": 1.5186367642684952e-05, + "loss": 0.8271, + "step": 1124 + }, + { + "epoch": 0.34706154558074964, + "grad_norm": 0.0, + "learning_rate": 1.517781893484739e-05, + "loss": 0.8243, + "step": 1125 + }, + { + "epoch": 0.347370044732377, + "grad_norm": 0.0, + "learning_rate": 1.5169265053827246e-05, + "loss": 0.7355, + "step": 1126 + }, + { + "epoch": 0.3476785438840043, + "grad_norm": 0.0, + "learning_rate": 1.5160706008170744e-05, + "loss": 0.7534, + "step": 1127 + }, + { + "epoch": 0.34798704303563166, + "grad_norm": 0.0, + "learning_rate": 1.5152141806429268e-05, + "loss": 0.7316, + "step": 1128 + }, + { + "epoch": 0.348295542187259, + "grad_norm": 0.0, + "learning_rate": 1.5143572457159344e-05, + "loss": 0.8143, + "step": 1129 + }, + { + "epoch": 0.34860404133888634, + "grad_norm": 0.0, + "learning_rate": 1.5134997968922655e-05, + "loss": 0.7644, + "step": 1130 + }, + { + "epoch": 0.3489125404905137, + "grad_norm": 0.0, + "learning_rate": 1.5126418350286005e-05, + "loss": 0.8303, + "step": 1131 + }, + { + "epoch": 0.34922103964214096, + "grad_norm": 0.0, + "learning_rate": 1.5117833609821333e-05, + "loss": 0.7803, + "step": 1132 + }, + { + "epoch": 0.3495295387937683, + "grad_norm": 0.0, + "learning_rate": 1.5109243756105692e-05, + "loss": 0.8298, + "step": 1133 + }, + { + "epoch": 0.34983803794539564, + "grad_norm": 0.0, + "learning_rate": 1.510064879772125e-05, + "loss": 0.8462, + "step": 1134 + }, + { + "epoch": 0.350146537097023, + "grad_norm": 0.0, + "learning_rate": 1.5092048743255258e-05, + "loss": 0.8431, + "step": 1135 + }, + { + "epoch": 0.3504550362486503, + "grad_norm": 0.0, + "learning_rate": 1.5083443601300078e-05, + "loss": 0.8221, + "step": 1136 + }, + { + "epoch": 0.35076353540027766, + "grad_norm": 0.0, + "learning_rate": 1.5074833380453146e-05, + "loss": 0.8576, + "step": 1137 + }, + { + "epoch": 0.351072034551905, + "grad_norm": 0.0, + "learning_rate": 1.5066218089316972e-05, + "loss": 0.8344, + "step": 1138 + }, + { + "epoch": 0.35138053370353234, + "grad_norm": 0.0, + "learning_rate": 1.505759773649913e-05, + "loss": 0.8229, + "step": 1139 + }, + { + "epoch": 0.3516890328551596, + "grad_norm": 0.0, + "learning_rate": 1.5048972330612256e-05, + "loss": 0.7867, + "step": 1140 + }, + { + "epoch": 0.35199753200678696, + "grad_norm": 0.0, + "learning_rate": 1.5040341880274038e-05, + "loss": 0.8067, + "step": 1141 + }, + { + "epoch": 0.3523060311584143, + "grad_norm": 0.0, + "learning_rate": 1.5031706394107188e-05, + "loss": 0.8575, + "step": 1142 + }, + { + "epoch": 0.35261453031004164, + "grad_norm": 0.0, + "learning_rate": 1.502306588073947e-05, + "loss": 0.8686, + "step": 1143 + }, + { + "epoch": 0.352923029461669, + "grad_norm": 0.0, + "learning_rate": 1.5014420348803649e-05, + "loss": 0.7929, + "step": 1144 + }, + { + "epoch": 0.3532315286132963, + "grad_norm": 0.0, + "learning_rate": 1.5005769806937523e-05, + "loss": 0.6688, + "step": 1145 + }, + { + "epoch": 0.35354002776492366, + "grad_norm": 0.0, + "learning_rate": 1.4997114263783887e-05, + "loss": 0.8364, + "step": 1146 + }, + { + "epoch": 0.353848526916551, + "grad_norm": 0.0, + "learning_rate": 1.4988453727990537e-05, + "loss": 0.7549, + "step": 1147 + }, + { + "epoch": 0.35415702606817834, + "grad_norm": 0.0, + "learning_rate": 1.4979788208210249e-05, + "loss": 0.8211, + "step": 1148 + }, + { + "epoch": 0.3544655252198056, + "grad_norm": 0.0, + "learning_rate": 1.4971117713100785e-05, + "loss": 0.8714, + "step": 1149 + }, + { + "epoch": 0.35477402437143296, + "grad_norm": 0.0, + "learning_rate": 1.4962442251324876e-05, + "loss": 0.8186, + "step": 1150 + }, + { + "epoch": 0.3550825235230603, + "grad_norm": 0.0, + "learning_rate": 1.4953761831550212e-05, + "loss": 0.7695, + "step": 1151 + }, + { + "epoch": 0.35539102267468764, + "grad_norm": 0.0, + "learning_rate": 1.4945076462449448e-05, + "loss": 0.8127, + "step": 1152 + }, + { + "epoch": 0.355699521826315, + "grad_norm": 0.0, + "learning_rate": 1.493638615270017e-05, + "loss": 0.7623, + "step": 1153 + }, + { + "epoch": 0.3560080209779423, + "grad_norm": 0.0, + "learning_rate": 1.4927690910984911e-05, + "loss": 0.7749, + "step": 1154 + }, + { + "epoch": 0.35631652012956966, + "grad_norm": 0.0, + "learning_rate": 1.4918990745991122e-05, + "loss": 0.8062, + "step": 1155 + }, + { + "epoch": 0.356625019281197, + "grad_norm": 0.0, + "learning_rate": 1.491028566641118e-05, + "loss": 0.8323, + "step": 1156 + }, + { + "epoch": 0.35693351843282434, + "grad_norm": 0.0, + "learning_rate": 1.4901575680942368e-05, + "loss": 0.791, + "step": 1157 + }, + { + "epoch": 0.3572420175844516, + "grad_norm": 0.0, + "learning_rate": 1.4892860798286875e-05, + "loss": 0.8861, + "step": 1158 + }, + { + "epoch": 0.35755051673607896, + "grad_norm": 0.0, + "learning_rate": 1.4884141027151778e-05, + "loss": 0.8189, + "step": 1159 + }, + { + "epoch": 0.3578590158877063, + "grad_norm": 0.0, + "learning_rate": 1.487541637624904e-05, + "loss": 1.1021, + "step": 1160 + }, + { + "epoch": 0.35816751503933364, + "grad_norm": 0.0, + "learning_rate": 1.4866686854295502e-05, + "loss": 0.8152, + "step": 1161 + }, + { + "epoch": 0.358476014190961, + "grad_norm": 0.0, + "learning_rate": 1.4857952470012871e-05, + "loss": 0.7575, + "step": 1162 + }, + { + "epoch": 0.3587845133425883, + "grad_norm": 0.0, + "learning_rate": 1.4849213232127701e-05, + "loss": 0.681, + "step": 1163 + }, + { + "epoch": 0.35909301249421566, + "grad_norm": 0.0, + "learning_rate": 1.4840469149371414e-05, + "loss": 0.7734, + "step": 1164 + }, + { + "epoch": 0.359401511645843, + "grad_norm": 0.0, + "learning_rate": 1.483172023048026e-05, + "loss": 0.7589, + "step": 1165 + }, + { + "epoch": 0.3597100107974703, + "grad_norm": 0.0, + "learning_rate": 1.4822966484195323e-05, + "loss": 0.8426, + "step": 1166 + }, + { + "epoch": 0.3600185099490976, + "grad_norm": 0.0, + "learning_rate": 1.4814207919262513e-05, + "loss": 0.7434, + "step": 1167 + }, + { + "epoch": 0.36032700910072496, + "grad_norm": 0.0, + "learning_rate": 1.4805444544432547e-05, + "loss": 0.7557, + "step": 1168 + }, + { + "epoch": 0.3606355082523523, + "grad_norm": 0.0, + "learning_rate": 1.4796676368460963e-05, + "loss": 0.7985, + "step": 1169 + }, + { + "epoch": 0.36094400740397964, + "grad_norm": 0.0, + "learning_rate": 1.4787903400108074e-05, + "loss": 0.7233, + "step": 1170 + }, + { + "epoch": 0.361252506555607, + "grad_norm": 0.0, + "learning_rate": 1.4779125648139002e-05, + "loss": 0.774, + "step": 1171 + }, + { + "epoch": 0.3615610057072343, + "grad_norm": 0.0, + "learning_rate": 1.4770343121323633e-05, + "loss": 0.783, + "step": 1172 + }, + { + "epoch": 0.36186950485886166, + "grad_norm": 0.0, + "learning_rate": 1.4761555828436635e-05, + "loss": 0.7645, + "step": 1173 + }, + { + "epoch": 0.362178004010489, + "grad_norm": 0.0, + "learning_rate": 1.4752763778257427e-05, + "loss": 0.8663, + "step": 1174 + }, + { + "epoch": 0.3624865031621163, + "grad_norm": 0.0, + "learning_rate": 1.474396697957019e-05, + "loss": 0.7965, + "step": 1175 + }, + { + "epoch": 0.3627950023137436, + "grad_norm": 0.0, + "learning_rate": 1.4735165441163846e-05, + "loss": 0.8533, + "step": 1176 + }, + { + "epoch": 0.36310350146537096, + "grad_norm": 0.0, + "learning_rate": 1.472635917183205e-05, + "loss": 0.8312, + "step": 1177 + }, + { + "epoch": 0.3634120006169983, + "grad_norm": 0.0, + "learning_rate": 1.4717548180373187e-05, + "loss": 0.8175, + "step": 1178 + }, + { + "epoch": 0.36372049976862564, + "grad_norm": 0.0, + "learning_rate": 1.4708732475590361e-05, + "loss": 0.9286, + "step": 1179 + }, + { + "epoch": 0.364028998920253, + "grad_norm": 0.0, + "learning_rate": 1.4699912066291383e-05, + "loss": 0.6973, + "step": 1180 + }, + { + "epoch": 0.3643374980718803, + "grad_norm": 0.0, + "learning_rate": 1.4691086961288758e-05, + "loss": 0.7972, + "step": 1181 + }, + { + "epoch": 0.36464599722350766, + "grad_norm": 0.0, + "learning_rate": 1.4682257169399697e-05, + "loss": 0.7783, + "step": 1182 + }, + { + "epoch": 0.364954496375135, + "grad_norm": 0.0, + "learning_rate": 1.4673422699446078e-05, + "loss": 0.9437, + "step": 1183 + }, + { + "epoch": 0.3652629955267623, + "grad_norm": 0.0, + "learning_rate": 1.4664583560254465e-05, + "loss": 0.8215, + "step": 1184 + }, + { + "epoch": 0.3655714946783896, + "grad_norm": 0.0, + "learning_rate": 1.4655739760656082e-05, + "loss": 0.7997, + "step": 1185 + }, + { + "epoch": 0.36587999383001696, + "grad_norm": 0.0, + "learning_rate": 1.464689130948681e-05, + "loss": 0.7802, + "step": 1186 + }, + { + "epoch": 0.3661884929816443, + "grad_norm": 0.0, + "learning_rate": 1.4638038215587176e-05, + "loss": 0.8049, + "step": 1187 + }, + { + "epoch": 0.36649699213327164, + "grad_norm": 0.0, + "learning_rate": 1.4629180487802348e-05, + "loss": 0.8285, + "step": 1188 + }, + { + "epoch": 0.366805491284899, + "grad_norm": 0.0, + "learning_rate": 1.4620318134982114e-05, + "loss": 0.743, + "step": 1189 + }, + { + "epoch": 0.3671139904365263, + "grad_norm": 0.0, + "learning_rate": 1.4611451165980905e-05, + "loss": 0.7363, + "step": 1190 + }, + { + "epoch": 0.36742248958815366, + "grad_norm": 0.0, + "learning_rate": 1.4602579589657742e-05, + "loss": 0.8358, + "step": 1191 + }, + { + "epoch": 0.36773098873978094, + "grad_norm": 0.0, + "learning_rate": 1.4593703414876262e-05, + "loss": 0.7217, + "step": 1192 + }, + { + "epoch": 0.3680394878914083, + "grad_norm": 0.0, + "learning_rate": 1.4584822650504685e-05, + "loss": 0.7513, + "step": 1193 + }, + { + "epoch": 0.3683479870430356, + "grad_norm": 0.0, + "learning_rate": 1.4575937305415829e-05, + "loss": 0.8975, + "step": 1194 + }, + { + "epoch": 0.36865648619466296, + "grad_norm": 0.0, + "learning_rate": 1.4567047388487077e-05, + "loss": 0.8446, + "step": 1195 + }, + { + "epoch": 0.3689649853462903, + "grad_norm": 0.0, + "learning_rate": 1.4558152908600394e-05, + "loss": 0.7981, + "step": 1196 + }, + { + "epoch": 0.36927348449791764, + "grad_norm": 0.0, + "learning_rate": 1.4549253874642289e-05, + "loss": 0.749, + "step": 1197 + }, + { + "epoch": 0.369581983649545, + "grad_norm": 0.0, + "learning_rate": 1.4540350295503834e-05, + "loss": 0.8389, + "step": 1198 + }, + { + "epoch": 0.3698904828011723, + "grad_norm": 0.0, + "learning_rate": 1.4531442180080625e-05, + "loss": 0.8098, + "step": 1199 + }, + { + "epoch": 0.37019898195279966, + "grad_norm": 0.0, + "learning_rate": 1.4522529537272813e-05, + "loss": 0.84, + "step": 1200 + }, + { + "epoch": 0.37050748110442694, + "grad_norm": 0.0, + "learning_rate": 1.451361237598505e-05, + "loss": 0.7923, + "step": 1201 + }, + { + "epoch": 0.3708159802560543, + "grad_norm": 0.0, + "learning_rate": 1.4504690705126519e-05, + "loss": 0.7732, + "step": 1202 + }, + { + "epoch": 0.3711244794076816, + "grad_norm": 0.0, + "learning_rate": 1.4495764533610902e-05, + "loss": 0.8072, + "step": 1203 + }, + { + "epoch": 0.37143297855930896, + "grad_norm": 0.0, + "learning_rate": 1.4486833870356374e-05, + "loss": 0.7858, + "step": 1204 + }, + { + "epoch": 0.3717414777109363, + "grad_norm": 0.0, + "learning_rate": 1.4477898724285603e-05, + "loss": 0.7929, + "step": 1205 + }, + { + "epoch": 0.37204997686256364, + "grad_norm": 0.0, + "learning_rate": 1.4468959104325737e-05, + "loss": 0.7335, + "step": 1206 + }, + { + "epoch": 0.372358476014191, + "grad_norm": 0.0, + "learning_rate": 1.446001501940839e-05, + "loss": 0.9012, + "step": 1207 + }, + { + "epoch": 0.3726669751658183, + "grad_norm": 0.0, + "learning_rate": 1.4451066478469633e-05, + "loss": 0.8734, + "step": 1208 + }, + { + "epoch": 0.37297547431744565, + "grad_norm": 0.0, + "learning_rate": 1.4442113490450002e-05, + "loss": 0.8447, + "step": 1209 + }, + { + "epoch": 0.37328397346907294, + "grad_norm": 0.0, + "learning_rate": 1.4433156064294465e-05, + "loss": 0.7566, + "step": 1210 + }, + { + "epoch": 0.3735924726207003, + "grad_norm": 0.0, + "learning_rate": 1.4424194208952427e-05, + "loss": 0.8284, + "step": 1211 + }, + { + "epoch": 0.3739009717723276, + "grad_norm": 0.0, + "learning_rate": 1.4415227933377715e-05, + "loss": 0.8023, + "step": 1212 + }, + { + "epoch": 0.37420947092395496, + "grad_norm": 0.0, + "learning_rate": 1.4406257246528584e-05, + "loss": 0.8064, + "step": 1213 + }, + { + "epoch": 0.3745179700755823, + "grad_norm": 0.0, + "learning_rate": 1.4397282157367682e-05, + "loss": 0.7575, + "step": 1214 + }, + { + "epoch": 0.37482646922720964, + "grad_norm": 0.0, + "learning_rate": 1.4388302674862065e-05, + "loss": 0.8027, + "step": 1215 + }, + { + "epoch": 0.375134968378837, + "grad_norm": 0.0, + "learning_rate": 1.4379318807983172e-05, + "loss": 0.8194, + "step": 1216 + }, + { + "epoch": 0.3754434675304643, + "grad_norm": 0.0, + "learning_rate": 1.4370330565706826e-05, + "loss": 0.8225, + "step": 1217 + }, + { + "epoch": 0.3757519666820916, + "grad_norm": 0.0, + "learning_rate": 1.4361337957013227e-05, + "loss": 0.7896, + "step": 1218 + }, + { + "epoch": 0.37606046583371894, + "grad_norm": 0.0, + "learning_rate": 1.4352340990886924e-05, + "loss": 0.7724, + "step": 1219 + }, + { + "epoch": 0.3763689649853463, + "grad_norm": 0.0, + "learning_rate": 1.434333967631683e-05, + "loss": 0.7446, + "step": 1220 + }, + { + "epoch": 0.3766774641369736, + "grad_norm": 0.0, + "learning_rate": 1.4334334022296196e-05, + "loss": 0.8344, + "step": 1221 + }, + { + "epoch": 0.37698596328860096, + "grad_norm": 0.0, + "learning_rate": 1.432532403782262e-05, + "loss": 0.7863, + "step": 1222 + }, + { + "epoch": 0.3772944624402283, + "grad_norm": 0.0, + "learning_rate": 1.431630973189801e-05, + "loss": 0.8061, + "step": 1223 + }, + { + "epoch": 0.37760296159185563, + "grad_norm": 0.0, + "learning_rate": 1.430729111352861e-05, + "loss": 0.7785, + "step": 1224 + }, + { + "epoch": 0.377911460743483, + "grad_norm": 0.0, + "learning_rate": 1.4298268191724951e-05, + "loss": 0.8627, + "step": 1225 + }, + { + "epoch": 0.3782199598951103, + "grad_norm": 0.0, + "learning_rate": 1.4289240975501885e-05, + "loss": 0.714, + "step": 1226 + }, + { + "epoch": 0.3785284590467376, + "grad_norm": 0.0, + "learning_rate": 1.4280209473878541e-05, + "loss": 0.8796, + "step": 1227 + }, + { + "epoch": 0.37883695819836494, + "grad_norm": 0.0, + "learning_rate": 1.4271173695878335e-05, + "loss": 0.8085, + "step": 1228 + }, + { + "epoch": 0.3791454573499923, + "grad_norm": 0.0, + "learning_rate": 1.4262133650528951e-05, + "loss": 0.7804, + "step": 1229 + }, + { + "epoch": 0.3794539565016196, + "grad_norm": 0.0, + "learning_rate": 1.4253089346862346e-05, + "loss": 0.8191, + "step": 1230 + }, + { + "epoch": 0.37976245565324696, + "grad_norm": 0.0, + "learning_rate": 1.4244040793914717e-05, + "loss": 0.7658, + "step": 1231 + }, + { + "epoch": 0.3800709548048743, + "grad_norm": 0.0, + "learning_rate": 1.423498800072652e-05, + "loss": 0.7949, + "step": 1232 + }, + { + "epoch": 0.38037945395650163, + "grad_norm": 0.0, + "learning_rate": 1.422593097634244e-05, + "loss": 0.8312, + "step": 1233 + }, + { + "epoch": 0.380687953108129, + "grad_norm": 0.0, + "learning_rate": 1.4216869729811393e-05, + "loss": 0.7824, + "step": 1234 + }, + { + "epoch": 0.3809964522597563, + "grad_norm": 0.0, + "learning_rate": 1.4207804270186514e-05, + "loss": 0.8182, + "step": 1235 + }, + { + "epoch": 0.3813049514113836, + "grad_norm": 0.0, + "learning_rate": 1.419873460652514e-05, + "loss": 0.8145, + "step": 1236 + }, + { + "epoch": 0.38161345056301094, + "grad_norm": 0.0, + "learning_rate": 1.4189660747888816e-05, + "loss": 0.7986, + "step": 1237 + }, + { + "epoch": 0.3819219497146383, + "grad_norm": 0.0, + "learning_rate": 1.4180582703343276e-05, + "loss": 0.7957, + "step": 1238 + }, + { + "epoch": 0.3822304488662656, + "grad_norm": 0.0, + "learning_rate": 1.4171500481958433e-05, + "loss": 0.8269, + "step": 1239 + }, + { + "epoch": 0.38253894801789295, + "grad_norm": 0.0, + "learning_rate": 1.416241409280838e-05, + "loss": 0.7489, + "step": 1240 + }, + { + "epoch": 0.3828474471695203, + "grad_norm": 0.0, + "learning_rate": 1.4153323544971371e-05, + "loss": 0.7288, + "step": 1241 + }, + { + "epoch": 0.38315594632114763, + "grad_norm": 0.0, + "learning_rate": 1.4144228847529809e-05, + "loss": 0.9038, + "step": 1242 + }, + { + "epoch": 0.383464445472775, + "grad_norm": 0.0, + "learning_rate": 1.4135130009570251e-05, + "loss": 0.7882, + "step": 1243 + }, + { + "epoch": 0.38377294462440226, + "grad_norm": 0.0, + "learning_rate": 1.4126027040183382e-05, + "loss": 0.7397, + "step": 1244 + }, + { + "epoch": 0.3840814437760296, + "grad_norm": 0.0, + "learning_rate": 1.411691994846403e-05, + "loss": 0.7556, + "step": 1245 + }, + { + "epoch": 0.38438994292765694, + "grad_norm": 0.0, + "learning_rate": 1.4107808743511124e-05, + "loss": 0.7622, + "step": 1246 + }, + { + "epoch": 0.3846984420792843, + "grad_norm": 0.0, + "learning_rate": 1.4098693434427715e-05, + "loss": 0.9192, + "step": 1247 + }, + { + "epoch": 0.3850069412309116, + "grad_norm": 0.0, + "learning_rate": 1.4089574030320947e-05, + "loss": 0.7547, + "step": 1248 + }, + { + "epoch": 0.38531544038253895, + "grad_norm": 0.0, + "learning_rate": 1.4080450540302061e-05, + "loss": 0.8098, + "step": 1249 + }, + { + "epoch": 0.3856239395341663, + "grad_norm": 0.0, + "learning_rate": 1.4071322973486376e-05, + "loss": 0.7672, + "step": 1250 + }, + { + "epoch": 0.38593243868579363, + "grad_norm": 0.0, + "learning_rate": 1.4062191338993288e-05, + "loss": 0.7344, + "step": 1251 + }, + { + "epoch": 0.386240937837421, + "grad_norm": 0.0, + "learning_rate": 1.405305564594625e-05, + "loss": 0.7607, + "step": 1252 + }, + { + "epoch": 0.38654943698904826, + "grad_norm": 0.0, + "learning_rate": 1.4043915903472777e-05, + "loss": 0.7472, + "step": 1253 + }, + { + "epoch": 0.3868579361406756, + "grad_norm": 0.0, + "learning_rate": 1.4034772120704431e-05, + "loss": 0.9083, + "step": 1254 + }, + { + "epoch": 0.38716643529230294, + "grad_norm": 0.0, + "learning_rate": 1.4025624306776806e-05, + "loss": 0.8289, + "step": 1255 + }, + { + "epoch": 0.3874749344439303, + "grad_norm": 0.0, + "learning_rate": 1.401647247082952e-05, + "loss": 0.7318, + "step": 1256 + }, + { + "epoch": 0.3877834335955576, + "grad_norm": 0.0, + "learning_rate": 1.400731662200622e-05, + "loss": 0.8786, + "step": 1257 + }, + { + "epoch": 0.38809193274718495, + "grad_norm": 0.0, + "learning_rate": 1.3998156769454549e-05, + "loss": 0.8683, + "step": 1258 + }, + { + "epoch": 0.3884004318988123, + "grad_norm": 0.0, + "learning_rate": 1.3988992922326166e-05, + "loss": 1.088, + "step": 1259 + }, + { + "epoch": 0.38870893105043963, + "grad_norm": 0.0, + "learning_rate": 1.3979825089776708e-05, + "loss": 0.8816, + "step": 1260 + }, + { + "epoch": 0.38901743020206697, + "grad_norm": 0.0, + "learning_rate": 1.39706532809658e-05, + "loss": 0.8478, + "step": 1261 + }, + { + "epoch": 0.38932592935369426, + "grad_norm": 0.0, + "learning_rate": 1.396147750505704e-05, + "loss": 0.7809, + "step": 1262 + }, + { + "epoch": 0.3896344285053216, + "grad_norm": 0.0, + "learning_rate": 1.395229777121798e-05, + "loss": 0.7805, + "step": 1263 + }, + { + "epoch": 0.38994292765694893, + "grad_norm": 0.0, + "learning_rate": 1.394311408862014e-05, + "loss": 0.8007, + "step": 1264 + }, + { + "epoch": 0.3902514268085763, + "grad_norm": 0.0, + "learning_rate": 1.3933926466438979e-05, + "loss": 0.7991, + "step": 1265 + }, + { + "epoch": 0.3905599259602036, + "grad_norm": 0.0, + "learning_rate": 1.3924734913853893e-05, + "loss": 0.7925, + "step": 1266 + }, + { + "epoch": 0.39086842511183095, + "grad_norm": 0.0, + "learning_rate": 1.3915539440048203e-05, + "loss": 0.8606, + "step": 1267 + }, + { + "epoch": 0.3911769242634583, + "grad_norm": 0.0, + "learning_rate": 1.3906340054209149e-05, + "loss": 0.693, + "step": 1268 + }, + { + "epoch": 0.39148542341508563, + "grad_norm": 0.0, + "learning_rate": 1.3897136765527879e-05, + "loss": 0.8273, + "step": 1269 + }, + { + "epoch": 0.3917939225667129, + "grad_norm": 0.0, + "learning_rate": 1.388792958319944e-05, + "loss": 0.7817, + "step": 1270 + }, + { + "epoch": 0.39210242171834025, + "grad_norm": 0.0, + "learning_rate": 1.3878718516422773e-05, + "loss": 0.7842, + "step": 1271 + }, + { + "epoch": 0.3924109208699676, + "grad_norm": 0.0, + "learning_rate": 1.3869503574400694e-05, + "loss": 0.7567, + "step": 1272 + }, + { + "epoch": 0.39271942002159493, + "grad_norm": 0.0, + "learning_rate": 1.3860284766339896e-05, + "loss": 0.7805, + "step": 1273 + }, + { + "epoch": 0.3930279191732223, + "grad_norm": 0.0, + "learning_rate": 1.385106210145093e-05, + "loss": 0.7966, + "step": 1274 + }, + { + "epoch": 0.3933364183248496, + "grad_norm": 0.0, + "learning_rate": 1.3841835588948208e-05, + "loss": 0.8702, + "step": 1275 + }, + { + "epoch": 0.39364491747647695, + "grad_norm": 0.0, + "learning_rate": 1.3832605238049972e-05, + "loss": 0.8001, + "step": 1276 + }, + { + "epoch": 0.3939534166281043, + "grad_norm": 0.0, + "learning_rate": 1.3823371057978312e-05, + "loss": 0.8095, + "step": 1277 + }, + { + "epoch": 0.39426191577973163, + "grad_norm": 0.0, + "learning_rate": 1.3814133057959143e-05, + "loss": 0.8103, + "step": 1278 + }, + { + "epoch": 0.3945704149313589, + "grad_norm": 0.0, + "learning_rate": 1.380489124722219e-05, + "loss": 0.7678, + "step": 1279 + }, + { + "epoch": 0.39487891408298625, + "grad_norm": 0.0, + "learning_rate": 1.3795645635000987e-05, + "loss": 0.8294, + "step": 1280 + }, + { + "epoch": 0.3951874132346136, + "grad_norm": 0.0, + "learning_rate": 1.3786396230532871e-05, + "loss": 0.7952, + "step": 1281 + }, + { + "epoch": 0.39549591238624093, + "grad_norm": 0.0, + "learning_rate": 1.377714304305896e-05, + "loss": 0.7974, + "step": 1282 + }, + { + "epoch": 0.3958044115378683, + "grad_norm": 0.0, + "learning_rate": 1.3767886081824158e-05, + "loss": 0.7424, + "step": 1283 + }, + { + "epoch": 0.3961129106894956, + "grad_norm": 0.0, + "learning_rate": 1.375862535607714e-05, + "loss": 0.7725, + "step": 1284 + }, + { + "epoch": 0.39642140984112295, + "grad_norm": 0.0, + "learning_rate": 1.3749360875070337e-05, + "loss": 0.7731, + "step": 1285 + }, + { + "epoch": 0.3967299089927503, + "grad_norm": 0.0, + "learning_rate": 1.3740092648059933e-05, + "loss": 0.7925, + "step": 1286 + }, + { + "epoch": 0.39703840814437763, + "grad_norm": 0.0, + "learning_rate": 1.3730820684305857e-05, + "loss": 0.7795, + "step": 1287 + }, + { + "epoch": 0.3973469072960049, + "grad_norm": 0.0, + "learning_rate": 1.3721544993071774e-05, + "loss": 0.7334, + "step": 1288 + }, + { + "epoch": 0.39765540644763225, + "grad_norm": 0.0, + "learning_rate": 1.3712265583625059e-05, + "loss": 0.7917, + "step": 1289 + }, + { + "epoch": 0.3979639055992596, + "grad_norm": 0.0, + "learning_rate": 1.3702982465236827e-05, + "loss": 0.7484, + "step": 1290 + }, + { + "epoch": 0.39827240475088693, + "grad_norm": 0.0, + "learning_rate": 1.3693695647181873e-05, + "loss": 0.8103, + "step": 1291 + }, + { + "epoch": 0.39858090390251427, + "grad_norm": 0.0, + "learning_rate": 1.3684405138738705e-05, + "loss": 0.759, + "step": 1292 + }, + { + "epoch": 0.3988894030541416, + "grad_norm": 0.0, + "learning_rate": 1.367511094918951e-05, + "loss": 0.8789, + "step": 1293 + }, + { + "epoch": 0.39919790220576895, + "grad_norm": 0.0, + "learning_rate": 1.3665813087820157e-05, + "loss": 0.8449, + "step": 1294 + }, + { + "epoch": 0.3995064013573963, + "grad_norm": 0.0, + "learning_rate": 1.3656511563920178e-05, + "loss": 0.7016, + "step": 1295 + }, + { + "epoch": 0.3998149005090236, + "grad_norm": 0.0, + "learning_rate": 1.3647206386782774e-05, + "loss": 0.7889, + "step": 1296 + }, + { + "epoch": 0.4001233996606509, + "grad_norm": 0.0, + "learning_rate": 1.3637897565704785e-05, + "loss": 0.7689, + "step": 1297 + }, + { + "epoch": 0.40043189881227825, + "grad_norm": 0.0, + "learning_rate": 1.3628585109986699e-05, + "loss": 0.7998, + "step": 1298 + }, + { + "epoch": 0.4007403979639056, + "grad_norm": 0.0, + "learning_rate": 1.3619269028932633e-05, + "loss": 0.763, + "step": 1299 + }, + { + "epoch": 0.40104889711553293, + "grad_norm": 0.0, + "learning_rate": 1.3609949331850323e-05, + "loss": 0.8125, + "step": 1300 + }, + { + "epoch": 0.40135739626716027, + "grad_norm": 0.0, + "learning_rate": 1.3600626028051127e-05, + "loss": 0.7986, + "step": 1301 + }, + { + "epoch": 0.4016658954187876, + "grad_norm": 0.0, + "learning_rate": 1.3591299126849994e-05, + "loss": 1.0926, + "step": 1302 + }, + { + "epoch": 0.40197439457041495, + "grad_norm": 0.0, + "learning_rate": 1.3581968637565481e-05, + "loss": 0.8111, + "step": 1303 + }, + { + "epoch": 0.4022828937220423, + "grad_norm": 0.0, + "learning_rate": 1.3572634569519718e-05, + "loss": 0.8507, + "step": 1304 + }, + { + "epoch": 0.4025913928736696, + "grad_norm": 0.0, + "learning_rate": 1.3563296932038416e-05, + "loss": 0.7634, + "step": 1305 + }, + { + "epoch": 0.4028998920252969, + "grad_norm": 0.0, + "learning_rate": 1.355395573445085e-05, + "loss": 0.8496, + "step": 1306 + }, + { + "epoch": 0.40320839117692425, + "grad_norm": 0.0, + "learning_rate": 1.3544610986089853e-05, + "loss": 0.808, + "step": 1307 + }, + { + "epoch": 0.4035168903285516, + "grad_norm": 0.0, + "learning_rate": 1.3535262696291805e-05, + "loss": 0.7791, + "step": 1308 + }, + { + "epoch": 0.40382538948017893, + "grad_norm": 0.0, + "learning_rate": 1.352591087439663e-05, + "loss": 0.8124, + "step": 1309 + }, + { + "epoch": 0.40413388863180627, + "grad_norm": 0.0, + "learning_rate": 1.3516555529747772e-05, + "loss": 0.7973, + "step": 1310 + }, + { + "epoch": 0.4044423877834336, + "grad_norm": 0.0, + "learning_rate": 1.3507196671692202e-05, + "loss": 0.8126, + "step": 1311 + }, + { + "epoch": 0.40475088693506095, + "grad_norm": 0.0, + "learning_rate": 1.3497834309580396e-05, + "loss": 0.78, + "step": 1312 + }, + { + "epoch": 0.4050593860866883, + "grad_norm": 0.0, + "learning_rate": 1.3488468452766328e-05, + "loss": 0.9037, + "step": 1313 + }, + { + "epoch": 0.4053678852383156, + "grad_norm": 0.0, + "learning_rate": 1.3479099110607475e-05, + "loss": 0.7797, + "step": 1314 + }, + { + "epoch": 0.4056763843899429, + "grad_norm": 0.0, + "learning_rate": 1.3469726292464788e-05, + "loss": 0.7638, + "step": 1315 + }, + { + "epoch": 0.40598488354157025, + "grad_norm": 0.0, + "learning_rate": 1.3460350007702691e-05, + "loss": 0.7942, + "step": 1316 + }, + { + "epoch": 0.4062933826931976, + "grad_norm": 0.0, + "learning_rate": 1.3450970265689074e-05, + "loss": 0.8901, + "step": 1317 + }, + { + "epoch": 0.40660188184482493, + "grad_norm": 0.0, + "learning_rate": 1.3441587075795281e-05, + "loss": 0.7406, + "step": 1318 + }, + { + "epoch": 0.40691038099645227, + "grad_norm": 0.0, + "learning_rate": 1.34322004473961e-05, + "loss": 0.7988, + "step": 1319 + }, + { + "epoch": 0.4072188801480796, + "grad_norm": 0.0, + "learning_rate": 1.3422810389869749e-05, + "loss": 0.7412, + "step": 1320 + }, + { + "epoch": 0.40752737929970695, + "grad_norm": 0.0, + "learning_rate": 1.3413416912597885e-05, + "loss": 0.8367, + "step": 1321 + }, + { + "epoch": 0.40783587845133423, + "grad_norm": 0.0, + "learning_rate": 1.340402002496557e-05, + "loss": 0.7749, + "step": 1322 + }, + { + "epoch": 0.40814437760296157, + "grad_norm": 0.0, + "learning_rate": 1.3394619736361278e-05, + "loss": 0.7756, + "step": 1323 + }, + { + "epoch": 0.4084528767545889, + "grad_norm": 0.0, + "learning_rate": 1.3385216056176882e-05, + "loss": 0.7485, + "step": 1324 + }, + { + "epoch": 0.40876137590621625, + "grad_norm": 0.0, + "learning_rate": 1.337580899380764e-05, + "loss": 0.732, + "step": 1325 + }, + { + "epoch": 0.4090698750578436, + "grad_norm": 0.0, + "learning_rate": 1.3366398558652192e-05, + "loss": 0.77, + "step": 1326 + }, + { + "epoch": 0.40937837420947093, + "grad_norm": 0.0, + "learning_rate": 1.3356984760112543e-05, + "loss": 0.7426, + "step": 1327 + }, + { + "epoch": 0.40968687336109827, + "grad_norm": 0.0, + "learning_rate": 1.3347567607594069e-05, + "loss": 0.7481, + "step": 1328 + }, + { + "epoch": 0.4099953725127256, + "grad_norm": 0.0, + "learning_rate": 1.3338147110505486e-05, + "loss": 0.8337, + "step": 1329 + }, + { + "epoch": 0.41030387166435295, + "grad_norm": 0.0, + "learning_rate": 1.3328723278258857e-05, + "loss": 0.8113, + "step": 1330 + }, + { + "epoch": 0.41061237081598023, + "grad_norm": 0.0, + "learning_rate": 1.3319296120269573e-05, + "loss": 0.8169, + "step": 1331 + }, + { + "epoch": 0.41092086996760757, + "grad_norm": 0.0, + "learning_rate": 1.3309865645956355e-05, + "loss": 0.8443, + "step": 1332 + }, + { + "epoch": 0.4112293691192349, + "grad_norm": 0.0, + "learning_rate": 1.3300431864741229e-05, + "loss": 0.7986, + "step": 1333 + }, + { + "epoch": 0.41153786827086225, + "grad_norm": 0.0, + "learning_rate": 1.3290994786049532e-05, + "loss": 0.6888, + "step": 1334 + }, + { + "epoch": 0.4118463674224896, + "grad_norm": 0.0, + "learning_rate": 1.3281554419309892e-05, + "loss": 0.7991, + "step": 1335 + }, + { + "epoch": 0.41215486657411693, + "grad_norm": 0.0, + "learning_rate": 1.3272110773954221e-05, + "loss": 0.7479, + "step": 1336 + }, + { + "epoch": 0.41246336572574427, + "grad_norm": 0.0, + "learning_rate": 1.326266385941771e-05, + "loss": 0.6592, + "step": 1337 + }, + { + "epoch": 0.4127718648773716, + "grad_norm": 0.0, + "learning_rate": 1.3253213685138814e-05, + "loss": 0.7235, + "step": 1338 + }, + { + "epoch": 0.41308036402899895, + "grad_norm": 0.0, + "learning_rate": 1.3243760260559245e-05, + "loss": 0.8486, + "step": 1339 + }, + { + "epoch": 0.41338886318062623, + "grad_norm": 0.0, + "learning_rate": 1.323430359512396e-05, + "loss": 0.7809, + "step": 1340 + }, + { + "epoch": 0.41369736233225357, + "grad_norm": 0.0, + "learning_rate": 1.3224843698281165e-05, + "loss": 0.7706, + "step": 1341 + }, + { + "epoch": 0.4140058614838809, + "grad_norm": 0.0, + "learning_rate": 1.3215380579482277e-05, + "loss": 0.7855, + "step": 1342 + }, + { + "epoch": 0.41431436063550825, + "grad_norm": 0.0, + "learning_rate": 1.3205914248181948e-05, + "loss": 0.8558, + "step": 1343 + }, + { + "epoch": 0.4146228597871356, + "grad_norm": 0.0, + "learning_rate": 1.3196444713838028e-05, + "loss": 0.7852, + "step": 1344 + }, + { + "epoch": 0.41493135893876293, + "grad_norm": 0.0, + "learning_rate": 1.3186971985911575e-05, + "loss": 0.7275, + "step": 1345 + }, + { + "epoch": 0.41523985809039027, + "grad_norm": 0.0, + "learning_rate": 1.317749607386683e-05, + "loss": 0.8248, + "step": 1346 + }, + { + "epoch": 0.4155483572420176, + "grad_norm": 0.0, + "learning_rate": 1.316801698717123e-05, + "loss": 0.8147, + "step": 1347 + }, + { + "epoch": 0.4158568563936449, + "grad_norm": 0.0, + "learning_rate": 1.3158534735295365e-05, + "loss": 0.8669, + "step": 1348 + }, + { + "epoch": 0.41616535554527223, + "grad_norm": 0.0, + "learning_rate": 1.3149049327712996e-05, + "loss": 0.7874, + "step": 1349 + }, + { + "epoch": 0.41647385469689957, + "grad_norm": 0.0, + "learning_rate": 1.3139560773901045e-05, + "loss": 0.7368, + "step": 1350 + }, + { + "epoch": 0.4167823538485269, + "grad_norm": 0.0, + "learning_rate": 1.3130069083339563e-05, + "loss": 0.7996, + "step": 1351 + }, + { + "epoch": 0.41709085300015425, + "grad_norm": 0.0, + "learning_rate": 1.3120574265511744e-05, + "loss": 0.7961, + "step": 1352 + }, + { + "epoch": 0.4173993521517816, + "grad_norm": 0.0, + "learning_rate": 1.3111076329903898e-05, + "loss": 0.7844, + "step": 1353 + }, + { + "epoch": 0.41770785130340893, + "grad_norm": 0.0, + "learning_rate": 1.3101575286005467e-05, + "loss": 0.7793, + "step": 1354 + }, + { + "epoch": 0.41801635045503627, + "grad_norm": 0.0, + "learning_rate": 1.309207114330898e-05, + "loss": 0.7862, + "step": 1355 + }, + { + "epoch": 0.4183248496066636, + "grad_norm": 0.0, + "learning_rate": 1.3082563911310069e-05, + "loss": 0.8153, + "step": 1356 + }, + { + "epoch": 0.4186333487582909, + "grad_norm": 0.0, + "learning_rate": 1.3073053599507455e-05, + "loss": 0.752, + "step": 1357 + }, + { + "epoch": 0.41894184790991823, + "grad_norm": 0.0, + "learning_rate": 1.3063540217402934e-05, + "loss": 0.8408, + "step": 1358 + }, + { + "epoch": 0.41925034706154557, + "grad_norm": 0.0, + "learning_rate": 1.305402377450137e-05, + "loss": 0.8508, + "step": 1359 + }, + { + "epoch": 0.4195588462131729, + "grad_norm": 0.0, + "learning_rate": 1.3044504280310683e-05, + "loss": 0.6705, + "step": 1360 + }, + { + "epoch": 0.41986734536480025, + "grad_norm": 0.0, + "learning_rate": 1.3034981744341843e-05, + "loss": 0.8153, + "step": 1361 + }, + { + "epoch": 0.4201758445164276, + "grad_norm": 0.0, + "learning_rate": 1.3025456176108864e-05, + "loss": 0.8187, + "step": 1362 + }, + { + "epoch": 0.4204843436680549, + "grad_norm": 0.0, + "learning_rate": 1.301592758512878e-05, + "loss": 0.8127, + "step": 1363 + }, + { + "epoch": 0.42079284281968227, + "grad_norm": 0.0, + "learning_rate": 1.3006395980921649e-05, + "loss": 0.6688, + "step": 1364 + }, + { + "epoch": 0.4211013419713096, + "grad_norm": 0.0, + "learning_rate": 1.2996861373010543e-05, + "loss": 0.8386, + "step": 1365 + }, + { + "epoch": 0.4214098411229369, + "grad_norm": 0.0, + "learning_rate": 1.2987323770921531e-05, + "loss": 0.8533, + "step": 1366 + }, + { + "epoch": 0.42171834027456423, + "grad_norm": 0.0, + "learning_rate": 1.2977783184183679e-05, + "loss": 0.8437, + "step": 1367 + }, + { + "epoch": 0.42202683942619157, + "grad_norm": 0.0, + "learning_rate": 1.2968239622329027e-05, + "loss": 0.8493, + "step": 1368 + }, + { + "epoch": 0.4223353385778189, + "grad_norm": 0.0, + "learning_rate": 1.2958693094892589e-05, + "loss": 0.7388, + "step": 1369 + }, + { + "epoch": 0.42264383772944625, + "grad_norm": 0.0, + "learning_rate": 1.294914361141235e-05, + "loss": 0.7706, + "step": 1370 + }, + { + "epoch": 0.4229523368810736, + "grad_norm": 0.0, + "learning_rate": 1.2939591181429235e-05, + "loss": 0.7611, + "step": 1371 + }, + { + "epoch": 0.4232608360327009, + "grad_norm": 0.0, + "learning_rate": 1.2930035814487128e-05, + "loss": 0.784, + "step": 1372 + }, + { + "epoch": 0.42356933518432827, + "grad_norm": 0.0, + "learning_rate": 1.292047752013284e-05, + "loss": 0.7931, + "step": 1373 + }, + { + "epoch": 0.42387783433595555, + "grad_norm": 0.0, + "learning_rate": 1.29109163079161e-05, + "loss": 0.7797, + "step": 1374 + }, + { + "epoch": 0.4241863334875829, + "grad_norm": 0.0, + "learning_rate": 1.2901352187389562e-05, + "loss": 0.7803, + "step": 1375 + }, + { + "epoch": 0.42449483263921023, + "grad_norm": 0.0, + "learning_rate": 1.2891785168108784e-05, + "loss": 0.7076, + "step": 1376 + }, + { + "epoch": 0.42480333179083757, + "grad_norm": 0.0, + "learning_rate": 1.2882215259632217e-05, + "loss": 0.7885, + "step": 1377 + }, + { + "epoch": 0.4251118309424649, + "grad_norm": 0.0, + "learning_rate": 1.28726424715212e-05, + "loss": 0.7596, + "step": 1378 + }, + { + "epoch": 0.42542033009409225, + "grad_norm": 0.0, + "learning_rate": 1.2863066813339953e-05, + "loss": 0.773, + "step": 1379 + }, + { + "epoch": 0.4257288292457196, + "grad_norm": 0.0, + "learning_rate": 1.2853488294655556e-05, + "loss": 0.8012, + "step": 1380 + }, + { + "epoch": 0.4260373283973469, + "grad_norm": 0.0, + "learning_rate": 1.2843906925037955e-05, + "loss": 0.766, + "step": 1381 + }, + { + "epoch": 0.42634582754897427, + "grad_norm": 0.0, + "learning_rate": 1.2834322714059932e-05, + "loss": 0.7433, + "step": 1382 + }, + { + "epoch": 0.42665432670060155, + "grad_norm": 0.0, + "learning_rate": 1.2824735671297121e-05, + "loss": 0.7938, + "step": 1383 + }, + { + "epoch": 0.4269628258522289, + "grad_norm": 0.0, + "learning_rate": 1.2815145806327986e-05, + "loss": 0.7867, + "step": 1384 + }, + { + "epoch": 0.42727132500385623, + "grad_norm": 0.0, + "learning_rate": 1.2805553128733797e-05, + "loss": 0.697, + "step": 1385 + }, + { + "epoch": 0.42757982415548357, + "grad_norm": 0.0, + "learning_rate": 1.2795957648098645e-05, + "loss": 0.7269, + "step": 1386 + }, + { + "epoch": 0.4278883233071109, + "grad_norm": 0.0, + "learning_rate": 1.2786359374009418e-05, + "loss": 0.7957, + "step": 1387 + }, + { + "epoch": 0.42819682245873825, + "grad_norm": 0.0, + "learning_rate": 1.2776758316055797e-05, + "loss": 0.7505, + "step": 1388 + }, + { + "epoch": 0.4285053216103656, + "grad_norm": 0.0, + "learning_rate": 1.276715448383024e-05, + "loss": 0.7421, + "step": 1389 + }, + { + "epoch": 0.4288138207619929, + "grad_norm": 0.0, + "learning_rate": 1.2757547886927982e-05, + "loss": 0.766, + "step": 1390 + }, + { + "epoch": 0.42912231991362026, + "grad_norm": 0.0, + "learning_rate": 1.2747938534947015e-05, + "loss": 0.7551, + "step": 1391 + }, + { + "epoch": 0.42943081906524755, + "grad_norm": 0.0, + "learning_rate": 1.2738326437488093e-05, + "loss": 0.7754, + "step": 1392 + }, + { + "epoch": 0.4297393182168749, + "grad_norm": 0.0, + "learning_rate": 1.2728711604154702e-05, + "loss": 0.7794, + "step": 1393 + }, + { + "epoch": 0.4300478173685022, + "grad_norm": 0.0, + "learning_rate": 1.2719094044553066e-05, + "loss": 0.7675, + "step": 1394 + }, + { + "epoch": 0.43035631652012957, + "grad_norm": 0.0, + "learning_rate": 1.270947376829213e-05, + "loss": 0.7542, + "step": 1395 + }, + { + "epoch": 0.4306648156717569, + "grad_norm": 0.0, + "learning_rate": 1.269985078498356e-05, + "loss": 0.7695, + "step": 1396 + }, + { + "epoch": 0.43097331482338425, + "grad_norm": 0.0, + "learning_rate": 1.2690225104241722e-05, + "loss": 0.7702, + "step": 1397 + }, + { + "epoch": 0.4312818139750116, + "grad_norm": 0.0, + "learning_rate": 1.2680596735683679e-05, + "loss": 0.7786, + "step": 1398 + }, + { + "epoch": 0.4315903131266389, + "grad_norm": 0.0, + "learning_rate": 1.2670965688929176e-05, + "loss": 0.7759, + "step": 1399 + }, + { + "epoch": 0.4318988122782662, + "grad_norm": 0.0, + "learning_rate": 1.2661331973600635e-05, + "loss": 0.7005, + "step": 1400 + }, + { + "epoch": 0.43220731142989355, + "grad_norm": 0.0, + "learning_rate": 1.2651695599323147e-05, + "loss": 0.7682, + "step": 1401 + }, + { + "epoch": 0.4325158105815209, + "grad_norm": 0.0, + "learning_rate": 1.2642056575724457e-05, + "loss": 0.7238, + "step": 1402 + }, + { + "epoch": 0.4328243097331482, + "grad_norm": 0.0, + "learning_rate": 1.2632414912434961e-05, + "loss": 0.7869, + "step": 1403 + }, + { + "epoch": 0.43313280888477557, + "grad_norm": 0.0, + "learning_rate": 1.262277061908768e-05, + "loss": 0.852, + "step": 1404 + }, + { + "epoch": 0.4334413080364029, + "grad_norm": 0.0, + "learning_rate": 1.2613123705318281e-05, + "loss": 0.7744, + "step": 1405 + }, + { + "epoch": 0.43374980718803025, + "grad_norm": 0.0, + "learning_rate": 1.2603474180765035e-05, + "loss": 0.7881, + "step": 1406 + }, + { + "epoch": 0.4340583063396576, + "grad_norm": 0.0, + "learning_rate": 1.2593822055068825e-05, + "loss": 0.7329, + "step": 1407 + }, + { + "epoch": 0.4343668054912849, + "grad_norm": 0.0, + "learning_rate": 1.2584167337873137e-05, + "loss": 0.8911, + "step": 1408 + }, + { + "epoch": 0.4346753046429122, + "grad_norm": 0.0, + "learning_rate": 1.2574510038824042e-05, + "loss": 0.742, + "step": 1409 + }, + { + "epoch": 0.43498380379453955, + "grad_norm": 0.0, + "learning_rate": 1.256485016757019e-05, + "loss": 0.791, + "step": 1410 + }, + { + "epoch": 0.4352923029461669, + "grad_norm": 0.0, + "learning_rate": 1.2555187733762808e-05, + "loss": 0.7505, + "step": 1411 + }, + { + "epoch": 0.4356008020977942, + "grad_norm": 0.0, + "learning_rate": 1.2545522747055669e-05, + "loss": 0.7848, + "step": 1412 + }, + { + "epoch": 0.43590930124942157, + "grad_norm": 0.0, + "learning_rate": 1.2535855217105114e-05, + "loss": 0.6734, + "step": 1413 + }, + { + "epoch": 0.4362178004010489, + "grad_norm": 0.0, + "learning_rate": 1.252618515357001e-05, + "loss": 0.7401, + "step": 1414 + }, + { + "epoch": 0.43652629955267624, + "grad_norm": 0.0, + "learning_rate": 1.2516512566111766e-05, + "loss": 0.7617, + "step": 1415 + }, + { + "epoch": 0.4368347987043036, + "grad_norm": 0.0, + "learning_rate": 1.2506837464394307e-05, + "loss": 0.7688, + "step": 1416 + }, + { + "epoch": 0.4371432978559309, + "grad_norm": 0.0, + "learning_rate": 1.2497159858084072e-05, + "loss": 0.7622, + "step": 1417 + }, + { + "epoch": 0.4374517970075582, + "grad_norm": 0.0, + "learning_rate": 1.2487479756850001e-05, + "loss": 0.6837, + "step": 1418 + }, + { + "epoch": 0.43776029615918555, + "grad_norm": 0.0, + "learning_rate": 1.2477797170363524e-05, + "loss": 0.8013, + "step": 1419 + }, + { + "epoch": 0.4380687953108129, + "grad_norm": 0.0, + "learning_rate": 1.2468112108298563e-05, + "loss": 0.8371, + "step": 1420 + }, + { + "epoch": 0.4383772944624402, + "grad_norm": 0.0, + "learning_rate": 1.2458424580331505e-05, + "loss": 0.6998, + "step": 1421 + }, + { + "epoch": 0.43868579361406757, + "grad_norm": 0.0, + "learning_rate": 1.2448734596141205e-05, + "loss": 0.7428, + "step": 1422 + }, + { + "epoch": 0.4389942927656949, + "grad_norm": 0.0, + "learning_rate": 1.243904216540897e-05, + "loss": 0.749, + "step": 1423 + }, + { + "epoch": 0.43930279191732224, + "grad_norm": 0.0, + "learning_rate": 1.2429347297818551e-05, + "loss": 0.8241, + "step": 1424 + }, + { + "epoch": 0.4396112910689496, + "grad_norm": 0.0, + "learning_rate": 1.2419650003056134e-05, + "loss": 1.087, + "step": 1425 + }, + { + "epoch": 0.43991979022057687, + "grad_norm": 0.0, + "learning_rate": 1.2409950290810332e-05, + "loss": 0.786, + "step": 1426 + }, + { + "epoch": 0.4402282893722042, + "grad_norm": 0.0, + "learning_rate": 1.240024817077217e-05, + "loss": 0.8391, + "step": 1427 + }, + { + "epoch": 0.44053678852383155, + "grad_norm": 0.0, + "learning_rate": 1.2390543652635088e-05, + "loss": 0.8163, + "step": 1428 + }, + { + "epoch": 0.4408452876754589, + "grad_norm": 0.0, + "learning_rate": 1.2380836746094903e-05, + "loss": 0.7677, + "step": 1429 + }, + { + "epoch": 0.4411537868270862, + "grad_norm": 0.0, + "learning_rate": 1.2371127460849837e-05, + "loss": 0.7921, + "step": 1430 + }, + { + "epoch": 0.44146228597871356, + "grad_norm": 0.0, + "learning_rate": 1.236141580660048e-05, + "loss": 0.8214, + "step": 1431 + }, + { + "epoch": 0.4417707851303409, + "grad_norm": 0.0, + "learning_rate": 1.2351701793049786e-05, + "loss": 0.7893, + "step": 1432 + }, + { + "epoch": 0.44207928428196824, + "grad_norm": 0.0, + "learning_rate": 1.2341985429903075e-05, + "loss": 0.7722, + "step": 1433 + }, + { + "epoch": 0.4423877834335956, + "grad_norm": 0.0, + "learning_rate": 1.2332266726868013e-05, + "loss": 0.7685, + "step": 1434 + }, + { + "epoch": 0.44269628258522287, + "grad_norm": 0.0, + "learning_rate": 1.2322545693654595e-05, + "loss": 0.8296, + "step": 1435 + }, + { + "epoch": 0.4430047817368502, + "grad_norm": 0.0, + "learning_rate": 1.2312822339975147e-05, + "loss": 0.7838, + "step": 1436 + }, + { + "epoch": 0.44331328088847755, + "grad_norm": 0.0, + "learning_rate": 1.2303096675544325e-05, + "loss": 0.8509, + "step": 1437 + }, + { + "epoch": 0.4436217800401049, + "grad_norm": 0.0, + "learning_rate": 1.229336871007908e-05, + "loss": 0.7628, + "step": 1438 + }, + { + "epoch": 0.4439302791917322, + "grad_norm": 0.0, + "learning_rate": 1.2283638453298665e-05, + "loss": 0.7456, + "step": 1439 + }, + { + "epoch": 0.44423877834335956, + "grad_norm": 0.0, + "learning_rate": 1.2273905914924627e-05, + "loss": 0.8279, + "step": 1440 + }, + { + "epoch": 0.4445472774949869, + "grad_norm": 0.0, + "learning_rate": 1.2264171104680791e-05, + "loss": 0.742, + "step": 1441 + }, + { + "epoch": 0.44485577664661424, + "grad_norm": 0.0, + "learning_rate": 1.2254434032293245e-05, + "loss": 0.7001, + "step": 1442 + }, + { + "epoch": 0.4451642757982416, + "grad_norm": 0.0, + "learning_rate": 1.2244694707490348e-05, + "loss": 0.7747, + "step": 1443 + }, + { + "epoch": 0.44547277494986887, + "grad_norm": 0.0, + "learning_rate": 1.2234953140002698e-05, + "loss": 0.8176, + "step": 1444 + }, + { + "epoch": 0.4457812741014962, + "grad_norm": 0.0, + "learning_rate": 1.2225209339563144e-05, + "loss": 0.8175, + "step": 1445 + }, + { + "epoch": 0.44608977325312354, + "grad_norm": 0.0, + "learning_rate": 1.2215463315906764e-05, + "loss": 0.8301, + "step": 1446 + }, + { + "epoch": 0.4463982724047509, + "grad_norm": 0.0, + "learning_rate": 1.220571507877085e-05, + "loss": 0.7825, + "step": 1447 + }, + { + "epoch": 0.4467067715563782, + "grad_norm": 0.0, + "learning_rate": 1.2195964637894913e-05, + "loss": 0.8353, + "step": 1448 + }, + { + "epoch": 0.44701527070800556, + "grad_norm": 0.0, + "learning_rate": 1.2186212003020658e-05, + "loss": 0.8001, + "step": 1449 + }, + { + "epoch": 0.4473237698596329, + "grad_norm": 0.0, + "learning_rate": 1.2176457183891996e-05, + "loss": 0.7869, + "step": 1450 + }, + { + "epoch": 0.44763226901126024, + "grad_norm": 0.0, + "learning_rate": 1.2166700190255006e-05, + "loss": 0.768, + "step": 1451 + }, + { + "epoch": 0.4479407681628875, + "grad_norm": 0.0, + "learning_rate": 1.2156941031857943e-05, + "loss": 0.774, + "step": 1452 + }, + { + "epoch": 0.44824926731451487, + "grad_norm": 0.0, + "learning_rate": 1.214717971845123e-05, + "loss": 0.7134, + "step": 1453 + }, + { + "epoch": 0.4485577664661422, + "grad_norm": 0.0, + "learning_rate": 1.2137416259787441e-05, + "loss": 0.8372, + "step": 1454 + }, + { + "epoch": 0.44886626561776954, + "grad_norm": 0.0, + "learning_rate": 1.212765066562129e-05, + "loss": 0.7524, + "step": 1455 + }, + { + "epoch": 0.4491747647693969, + "grad_norm": 0.0, + "learning_rate": 1.2117882945709626e-05, + "loss": 0.6967, + "step": 1456 + }, + { + "epoch": 0.4494832639210242, + "grad_norm": 0.0, + "learning_rate": 1.2108113109811426e-05, + "loss": 0.7692, + "step": 1457 + }, + { + "epoch": 0.44979176307265156, + "grad_norm": 0.0, + "learning_rate": 1.2098341167687773e-05, + "loss": 1.0787, + "step": 1458 + }, + { + "epoch": 0.4501002622242789, + "grad_norm": 0.0, + "learning_rate": 1.2088567129101861e-05, + "loss": 0.7832, + "step": 1459 + }, + { + "epoch": 0.45040876137590624, + "grad_norm": 0.0, + "learning_rate": 1.2078791003818981e-05, + "loss": 0.7352, + "step": 1460 + }, + { + "epoch": 0.4507172605275335, + "grad_norm": 0.0, + "learning_rate": 1.20690128016065e-05, + "loss": 0.7652, + "step": 1461 + }, + { + "epoch": 0.45102575967916086, + "grad_norm": 0.0, + "learning_rate": 1.2059232532233864e-05, + "loss": 0.7968, + "step": 1462 + }, + { + "epoch": 0.4513342588307882, + "grad_norm": 0.0, + "learning_rate": 1.2049450205472585e-05, + "loss": 0.7473, + "step": 1463 + }, + { + "epoch": 0.45164275798241554, + "grad_norm": 0.0, + "learning_rate": 1.203966583109623e-05, + "loss": 0.7823, + "step": 1464 + }, + { + "epoch": 0.4519512571340429, + "grad_norm": 0.0, + "learning_rate": 1.2029879418880419e-05, + "loss": 0.7657, + "step": 1465 + }, + { + "epoch": 0.4522597562856702, + "grad_norm": 0.0, + "learning_rate": 1.2020090978602795e-05, + "loss": 0.7575, + "step": 1466 + }, + { + "epoch": 0.45256825543729756, + "grad_norm": 0.0, + "learning_rate": 1.2010300520043035e-05, + "loss": 0.8245, + "step": 1467 + }, + { + "epoch": 0.4528767545889249, + "grad_norm": 0.0, + "learning_rate": 1.2000508052982827e-05, + "loss": 0.7677, + "step": 1468 + }, + { + "epoch": 0.45318525374055224, + "grad_norm": 0.0, + "learning_rate": 1.1990713587205876e-05, + "loss": 0.8229, + "step": 1469 + }, + { + "epoch": 0.4534937528921795, + "grad_norm": 0.0, + "learning_rate": 1.1980917132497871e-05, + "loss": 0.7153, + "step": 1470 + }, + { + "epoch": 0.45380225204380686, + "grad_norm": 0.0, + "learning_rate": 1.1971118698646502e-05, + "loss": 0.7822, + "step": 1471 + }, + { + "epoch": 0.4541107511954342, + "grad_norm": 0.0, + "learning_rate": 1.1961318295441424e-05, + "loss": 0.7923, + "step": 1472 + }, + { + "epoch": 0.45441925034706154, + "grad_norm": 0.0, + "learning_rate": 1.1951515932674267e-05, + "loss": 0.8233, + "step": 1473 + }, + { + "epoch": 0.4547277494986889, + "grad_norm": 0.0, + "learning_rate": 1.1941711620138614e-05, + "loss": 0.7519, + "step": 1474 + }, + { + "epoch": 0.4550362486503162, + "grad_norm": 0.0, + "learning_rate": 1.1931905367630004e-05, + "loss": 0.6626, + "step": 1475 + }, + { + "epoch": 0.45534474780194356, + "grad_norm": 0.0, + "learning_rate": 1.1922097184945897e-05, + "loss": 0.8072, + "step": 1476 + }, + { + "epoch": 0.4556532469535709, + "grad_norm": 0.0, + "learning_rate": 1.1912287081885708e-05, + "loss": 0.6687, + "step": 1477 + }, + { + "epoch": 0.4559617461051982, + "grad_norm": 0.0, + "learning_rate": 1.1902475068250747e-05, + "loss": 0.8199, + "step": 1478 + }, + { + "epoch": 0.4562702452568255, + "grad_norm": 0.0, + "learning_rate": 1.1892661153844243e-05, + "loss": 0.7257, + "step": 1479 + }, + { + "epoch": 0.45657874440845286, + "grad_norm": 0.0, + "learning_rate": 1.1882845348471324e-05, + "loss": 0.6936, + "step": 1480 + }, + { + "epoch": 0.4568872435600802, + "grad_norm": 0.0, + "learning_rate": 1.1873027661939003e-05, + "loss": 0.6945, + "step": 1481 + }, + { + "epoch": 0.45719574271170754, + "grad_norm": 0.0, + "learning_rate": 1.1863208104056179e-05, + "loss": 0.7709, + "step": 1482 + }, + { + "epoch": 0.4575042418633349, + "grad_norm": 0.0, + "learning_rate": 1.1853386684633616e-05, + "loss": 0.748, + "step": 1483 + }, + { + "epoch": 0.4578127410149622, + "grad_norm": 0.0, + "learning_rate": 1.1843563413483941e-05, + "loss": 0.7041, + "step": 1484 + }, + { + "epoch": 0.45812124016658956, + "grad_norm": 0.0, + "learning_rate": 1.1833738300421625e-05, + "loss": 1.0664, + "step": 1485 + }, + { + "epoch": 0.4584297393182169, + "grad_norm": 0.0, + "learning_rate": 1.1823911355262986e-05, + "loss": 0.8065, + "step": 1486 + }, + { + "epoch": 0.4587382384698442, + "grad_norm": 0.0, + "learning_rate": 1.181408258782617e-05, + "loss": 0.8089, + "step": 1487 + }, + { + "epoch": 0.4590467376214715, + "grad_norm": 0.0, + "learning_rate": 1.1804252007931142e-05, + "loss": 0.7305, + "step": 1488 + }, + { + "epoch": 0.45935523677309886, + "grad_norm": 0.0, + "learning_rate": 1.1794419625399682e-05, + "loss": 0.6922, + "step": 1489 + }, + { + "epoch": 0.4596637359247262, + "grad_norm": 0.0, + "learning_rate": 1.1784585450055367e-05, + "loss": 0.6915, + "step": 1490 + }, + { + "epoch": 0.45997223507635354, + "grad_norm": 0.0, + "learning_rate": 1.1774749491723565e-05, + "loss": 0.7147, + "step": 1491 + }, + { + "epoch": 0.4602807342279809, + "grad_norm": 0.0, + "learning_rate": 1.176491176023143e-05, + "loss": 0.7419, + "step": 1492 + }, + { + "epoch": 0.4605892333796082, + "grad_norm": 0.0, + "learning_rate": 1.1755072265407881e-05, + "loss": 0.8442, + "step": 1493 + }, + { + "epoch": 0.46089773253123556, + "grad_norm": 0.0, + "learning_rate": 1.1745231017083603e-05, + "loss": 0.748, + "step": 1494 + }, + { + "epoch": 0.4612062316828629, + "grad_norm": 0.0, + "learning_rate": 1.1735388025091032e-05, + "loss": 0.8019, + "step": 1495 + }, + { + "epoch": 0.4615147308344902, + "grad_norm": 0.0, + "learning_rate": 1.1725543299264352e-05, + "loss": 0.7985, + "step": 1496 + }, + { + "epoch": 0.4618232299861175, + "grad_norm": 0.0, + "learning_rate": 1.1715696849439465e-05, + "loss": 0.8069, + "step": 1497 + }, + { + "epoch": 0.46213172913774486, + "grad_norm": 0.0, + "learning_rate": 1.1705848685454011e-05, + "loss": 0.8735, + "step": 1498 + }, + { + "epoch": 0.4624402282893722, + "grad_norm": 0.0, + "learning_rate": 1.1695998817147334e-05, + "loss": 0.8315, + "step": 1499 + }, + { + "epoch": 0.46274872744099954, + "grad_norm": 0.0, + "learning_rate": 1.168614725436048e-05, + "loss": 0.7874, + "step": 1500 + }, + { + "epoch": 0.4630572265926269, + "grad_norm": 0.0, + "learning_rate": 1.167629400693619e-05, + "loss": 0.7339, + "step": 1501 + }, + { + "epoch": 0.4633657257442542, + "grad_norm": 0.0, + "learning_rate": 1.1666439084718893e-05, + "loss": 0.8196, + "step": 1502 + }, + { + "epoch": 0.46367422489588156, + "grad_norm": 0.0, + "learning_rate": 1.1656582497554682e-05, + "loss": 0.8023, + "step": 1503 + }, + { + "epoch": 0.46398272404750884, + "grad_norm": 0.0, + "learning_rate": 1.1646724255291321e-05, + "loss": 0.7816, + "step": 1504 + }, + { + "epoch": 0.4642912231991362, + "grad_norm": 0.0, + "learning_rate": 1.1636864367778226e-05, + "loss": 0.7904, + "step": 1505 + }, + { + "epoch": 0.4645997223507635, + "grad_norm": 0.0, + "learning_rate": 1.1627002844866455e-05, + "loss": 0.8138, + "step": 1506 + }, + { + "epoch": 0.46490822150239086, + "grad_norm": 0.0, + "learning_rate": 1.1617139696408696e-05, + "loss": 0.7566, + "step": 1507 + }, + { + "epoch": 0.4652167206540182, + "grad_norm": 0.0, + "learning_rate": 1.1607274932259273e-05, + "loss": 0.8639, + "step": 1508 + }, + { + "epoch": 0.46552521980564554, + "grad_norm": 0.0, + "learning_rate": 1.1597408562274113e-05, + "loss": 0.7517, + "step": 1509 + }, + { + "epoch": 0.4658337189572729, + "grad_norm": 0.0, + "learning_rate": 1.1587540596310751e-05, + "loss": 0.8349, + "step": 1510 + }, + { + "epoch": 0.4661422181089002, + "grad_norm": 0.0, + "learning_rate": 1.157767104422832e-05, + "loss": 0.7613, + "step": 1511 + }, + { + "epoch": 0.46645071726052756, + "grad_norm": 0.0, + "learning_rate": 1.1567799915887532e-05, + "loss": 0.7602, + "step": 1512 + }, + { + "epoch": 0.46675921641215484, + "grad_norm": 0.0, + "learning_rate": 1.1557927221150674e-05, + "loss": 0.7969, + "step": 1513 + }, + { + "epoch": 0.4670677155637822, + "grad_norm": 0.0, + "learning_rate": 1.1548052969881604e-05, + "loss": 0.6685, + "step": 1514 + }, + { + "epoch": 0.4673762147154095, + "grad_norm": 0.0, + "learning_rate": 1.1538177171945733e-05, + "loss": 0.7429, + "step": 1515 + }, + { + "epoch": 0.46768471386703686, + "grad_norm": 0.0, + "learning_rate": 1.1528299837210015e-05, + "loss": 0.7313, + "step": 1516 + }, + { + "epoch": 0.4679932130186642, + "grad_norm": 0.0, + "learning_rate": 1.1518420975542937e-05, + "loss": 0.7563, + "step": 1517 + }, + { + "epoch": 0.46830171217029154, + "grad_norm": 0.0, + "learning_rate": 1.150854059681452e-05, + "loss": 0.7186, + "step": 1518 + }, + { + "epoch": 0.4686102113219189, + "grad_norm": 0.0, + "learning_rate": 1.149865871089629e-05, + "loss": 0.8129, + "step": 1519 + }, + { + "epoch": 0.4689187104735462, + "grad_norm": 0.0, + "learning_rate": 1.1488775327661288e-05, + "loss": 0.7723, + "step": 1520 + }, + { + "epoch": 0.46922720962517356, + "grad_norm": 0.0, + "learning_rate": 1.1478890456984044e-05, + "loss": 0.7515, + "step": 1521 + }, + { + "epoch": 0.46953570877680084, + "grad_norm": 0.0, + "learning_rate": 1.1469004108740584e-05, + "loss": 0.7922, + "step": 1522 + }, + { + "epoch": 0.4698442079284282, + "grad_norm": 0.0, + "learning_rate": 1.14591162928084e-05, + "loss": 0.7006, + "step": 1523 + }, + { + "epoch": 0.4701527070800555, + "grad_norm": 0.0, + "learning_rate": 1.1449227019066452e-05, + "loss": 0.7337, + "step": 1524 + }, + { + "epoch": 0.47046120623168286, + "grad_norm": 0.0, + "learning_rate": 1.1439336297395161e-05, + "loss": 0.756, + "step": 1525 + }, + { + "epoch": 0.4707697053833102, + "grad_norm": 0.0, + "learning_rate": 1.1429444137676391e-05, + "loss": 0.7558, + "step": 1526 + }, + { + "epoch": 0.47107820453493754, + "grad_norm": 0.0, + "learning_rate": 1.1419550549793443e-05, + "loss": 0.7017, + "step": 1527 + }, + { + "epoch": 0.4713867036865649, + "grad_norm": 0.0, + "learning_rate": 1.1409655543631054e-05, + "loss": 0.6988, + "step": 1528 + }, + { + "epoch": 0.4716952028381922, + "grad_norm": 0.0, + "learning_rate": 1.1399759129075358e-05, + "loss": 0.8331, + "step": 1529 + }, + { + "epoch": 0.4720037019898195, + "grad_norm": 0.0, + "learning_rate": 1.1389861316013914e-05, + "loss": 0.7593, + "step": 1530 + }, + { + "epoch": 0.47231220114144684, + "grad_norm": 0.0, + "learning_rate": 1.1379962114335676e-05, + "loss": 0.8505, + "step": 1531 + }, + { + "epoch": 0.4726207002930742, + "grad_norm": 0.0, + "learning_rate": 1.1370061533930974e-05, + "loss": 0.7795, + "step": 1532 + }, + { + "epoch": 0.4729291994447015, + "grad_norm": 0.0, + "learning_rate": 1.1360159584691529e-05, + "loss": 0.7112, + "step": 1533 + }, + { + "epoch": 0.47323769859632886, + "grad_norm": 0.0, + "learning_rate": 1.1350256276510421e-05, + "loss": 0.7215, + "step": 1534 + }, + { + "epoch": 0.4735461977479562, + "grad_norm": 0.0, + "learning_rate": 1.1340351619282091e-05, + "loss": 0.7342, + "step": 1535 + }, + { + "epoch": 0.47385469689958354, + "grad_norm": 0.0, + "learning_rate": 1.1330445622902326e-05, + "loss": 0.7108, + "step": 1536 + }, + { + "epoch": 0.4741631960512109, + "grad_norm": 0.0, + "learning_rate": 1.1320538297268258e-05, + "loss": 0.8285, + "step": 1537 + }, + { + "epoch": 0.4744716952028382, + "grad_norm": 0.0, + "learning_rate": 1.1310629652278334e-05, + "loss": 0.7805, + "step": 1538 + }, + { + "epoch": 0.4747801943544655, + "grad_norm": 0.0, + "learning_rate": 1.1300719697832329e-05, + "loss": 0.724, + "step": 1539 + }, + { + "epoch": 0.47508869350609284, + "grad_norm": 0.0, + "learning_rate": 1.1290808443831324e-05, + "loss": 0.7394, + "step": 1540 + }, + { + "epoch": 0.4753971926577202, + "grad_norm": 0.0, + "learning_rate": 1.1280895900177704e-05, + "loss": 0.7535, + "step": 1541 + }, + { + "epoch": 0.4757056918093475, + "grad_norm": 0.0, + "learning_rate": 1.1270982076775126e-05, + "loss": 0.7777, + "step": 1542 + }, + { + "epoch": 0.47601419096097486, + "grad_norm": 0.0, + "learning_rate": 1.1261066983528544e-05, + "loss": 0.7488, + "step": 1543 + }, + { + "epoch": 0.4763226901126022, + "grad_norm": 0.0, + "learning_rate": 1.125115063034417e-05, + "loss": 0.7654, + "step": 1544 + }, + { + "epoch": 0.47663118926422954, + "grad_norm": 0.0, + "learning_rate": 1.124123302712948e-05, + "loss": 0.7223, + "step": 1545 + }, + { + "epoch": 0.4769396884158569, + "grad_norm": 0.0, + "learning_rate": 1.1231314183793192e-05, + "loss": 0.7698, + "step": 1546 + }, + { + "epoch": 0.4772481875674842, + "grad_norm": 0.0, + "learning_rate": 1.1221394110245271e-05, + "loss": 0.7299, + "step": 1547 + }, + { + "epoch": 0.4775566867191115, + "grad_norm": 0.0, + "learning_rate": 1.1211472816396912e-05, + "loss": 0.7969, + "step": 1548 + }, + { + "epoch": 0.47786518587073884, + "grad_norm": 0.0, + "learning_rate": 1.120155031216052e-05, + "loss": 0.7025, + "step": 1549 + }, + { + "epoch": 0.4781736850223662, + "grad_norm": 0.0, + "learning_rate": 1.1191626607449713e-05, + "loss": 0.7603, + "step": 1550 + }, + { + "epoch": 0.4784821841739935, + "grad_norm": 0.0, + "learning_rate": 1.1181701712179311e-05, + "loss": 0.7384, + "step": 1551 + }, + { + "epoch": 0.47879068332562086, + "grad_norm": 0.0, + "learning_rate": 1.1171775636265326e-05, + "loss": 0.7992, + "step": 1552 + }, + { + "epoch": 0.4790991824772482, + "grad_norm": 0.0, + "learning_rate": 1.1161848389624942e-05, + "loss": 0.7818, + "step": 1553 + }, + { + "epoch": 0.47940768162887554, + "grad_norm": 0.0, + "learning_rate": 1.115191998217652e-05, + "loss": 0.7237, + "step": 1554 + }, + { + "epoch": 0.4797161807805029, + "grad_norm": 0.0, + "learning_rate": 1.1141990423839573e-05, + "loss": 0.7212, + "step": 1555 + }, + { + "epoch": 0.48002467993213016, + "grad_norm": 0.0, + "learning_rate": 1.1132059724534772e-05, + "loss": 0.7296, + "step": 1556 + }, + { + "epoch": 0.4803331790837575, + "grad_norm": 0.0, + "learning_rate": 1.112212789418392e-05, + "loss": 0.7456, + "step": 1557 + }, + { + "epoch": 0.48064167823538484, + "grad_norm": 0.0, + "learning_rate": 1.1112194942709956e-05, + "loss": 0.7829, + "step": 1558 + }, + { + "epoch": 0.4809501773870122, + "grad_norm": 0.0, + "learning_rate": 1.1102260880036937e-05, + "loss": 0.7913, + "step": 1559 + }, + { + "epoch": 0.4812586765386395, + "grad_norm": 0.0, + "learning_rate": 1.1092325716090035e-05, + "loss": 0.7316, + "step": 1560 + }, + { + "epoch": 0.48156717569026686, + "grad_norm": 0.0, + "learning_rate": 1.108238946079551e-05, + "loss": 0.8268, + "step": 1561 + }, + { + "epoch": 0.4818756748418942, + "grad_norm": 0.0, + "learning_rate": 1.1072452124080728e-05, + "loss": 0.7784, + "step": 1562 + }, + { + "epoch": 0.48218417399352154, + "grad_norm": 0.0, + "learning_rate": 1.1062513715874119e-05, + "loss": 0.8224, + "step": 1563 + }, + { + "epoch": 0.4824926731451489, + "grad_norm": 0.0, + "learning_rate": 1.1052574246105196e-05, + "loss": 0.7781, + "step": 1564 + }, + { + "epoch": 0.48280117229677616, + "grad_norm": 0.0, + "learning_rate": 1.1042633724704534e-05, + "loss": 0.6717, + "step": 1565 + }, + { + "epoch": 0.4831096714484035, + "grad_norm": 0.0, + "learning_rate": 1.1032692161603746e-05, + "loss": 0.8659, + "step": 1566 + }, + { + "epoch": 0.48341817060003084, + "grad_norm": 0.0, + "learning_rate": 1.1022749566735497e-05, + "loss": 0.7775, + "step": 1567 + }, + { + "epoch": 0.4837266697516582, + "grad_norm": 0.0, + "learning_rate": 1.1012805950033476e-05, + "loss": 0.7473, + "step": 1568 + }, + { + "epoch": 0.4840351689032855, + "grad_norm": 0.0, + "learning_rate": 1.1002861321432401e-05, + "loss": 0.7479, + "step": 1569 + }, + { + "epoch": 0.48434366805491286, + "grad_norm": 0.0, + "learning_rate": 1.0992915690867989e-05, + "loss": 0.721, + "step": 1570 + }, + { + "epoch": 0.4846521672065402, + "grad_norm": 0.0, + "learning_rate": 1.0982969068276971e-05, + "loss": 0.7837, + "step": 1571 + }, + { + "epoch": 0.48496066635816754, + "grad_norm": 0.0, + "learning_rate": 1.097302146359706e-05, + "loss": 0.7825, + "step": 1572 + }, + { + "epoch": 0.4852691655097949, + "grad_norm": 0.0, + "learning_rate": 1.0963072886766956e-05, + "loss": 0.7759, + "step": 1573 + }, + { + "epoch": 0.48557766466142216, + "grad_norm": 0.0, + "learning_rate": 1.0953123347726325e-05, + "loss": 0.7175, + "step": 1574 + }, + { + "epoch": 0.4858861638130495, + "grad_norm": 0.0, + "learning_rate": 1.0943172856415798e-05, + "loss": 0.7636, + "step": 1575 + }, + { + "epoch": 0.48619466296467684, + "grad_norm": 0.0, + "learning_rate": 1.0933221422776953e-05, + "loss": 0.7212, + "step": 1576 + }, + { + "epoch": 0.4865031621163042, + "grad_norm": 0.0, + "learning_rate": 1.092326905675232e-05, + "loss": 0.7146, + "step": 1577 + }, + { + "epoch": 0.4868116612679315, + "grad_norm": 0.0, + "learning_rate": 1.0913315768285347e-05, + "loss": 0.7574, + "step": 1578 + }, + { + "epoch": 0.48712016041955886, + "grad_norm": 0.0, + "learning_rate": 1.0903361567320411e-05, + "loss": 0.7246, + "step": 1579 + }, + { + "epoch": 0.4874286595711862, + "grad_norm": 0.0, + "learning_rate": 1.0893406463802805e-05, + "loss": 0.785, + "step": 1580 + }, + { + "epoch": 0.48773715872281354, + "grad_norm": 0.0, + "learning_rate": 1.0883450467678711e-05, + "loss": 0.7738, + "step": 1581 + }, + { + "epoch": 0.4880456578744408, + "grad_norm": 0.0, + "learning_rate": 1.0873493588895216e-05, + "loss": 0.732, + "step": 1582 + }, + { + "epoch": 0.48835415702606816, + "grad_norm": 0.0, + "learning_rate": 1.0863535837400281e-05, + "loss": 0.7488, + "step": 1583 + }, + { + "epoch": 0.4886626561776955, + "grad_norm": 0.0, + "learning_rate": 1.0853577223142742e-05, + "loss": 0.8508, + "step": 1584 + }, + { + "epoch": 0.48897115532932284, + "grad_norm": 0.0, + "learning_rate": 1.0843617756072294e-05, + "loss": 0.8112, + "step": 1585 + }, + { + "epoch": 0.4892796544809502, + "grad_norm": 0.0, + "learning_rate": 1.0833657446139487e-05, + "loss": 0.7818, + "step": 1586 + }, + { + "epoch": 0.4895881536325775, + "grad_norm": 0.0, + "learning_rate": 1.0823696303295712e-05, + "loss": 0.7754, + "step": 1587 + }, + { + "epoch": 0.48989665278420486, + "grad_norm": 0.0, + "learning_rate": 1.0813734337493194e-05, + "loss": 0.7389, + "step": 1588 + }, + { + "epoch": 0.4902051519358322, + "grad_norm": 0.0, + "learning_rate": 1.0803771558684977e-05, + "loss": 0.6946, + "step": 1589 + }, + { + "epoch": 0.49051365108745953, + "grad_norm": 0.0, + "learning_rate": 1.079380797682492e-05, + "loss": 0.7306, + "step": 1590 + }, + { + "epoch": 0.4908221502390868, + "grad_norm": 0.0, + "learning_rate": 1.0783843601867681e-05, + "loss": 0.681, + "step": 1591 + }, + { + "epoch": 0.49113064939071416, + "grad_norm": 0.0, + "learning_rate": 1.0773878443768716e-05, + "loss": 0.7666, + "step": 1592 + }, + { + "epoch": 0.4914391485423415, + "grad_norm": 0.0, + "learning_rate": 1.0763912512484257e-05, + "loss": 0.807, + "step": 1593 + }, + { + "epoch": 0.49174764769396884, + "grad_norm": 0.0, + "learning_rate": 1.0753945817971311e-05, + "loss": 0.8633, + "step": 1594 + }, + { + "epoch": 0.4920561468455962, + "grad_norm": 0.0, + "learning_rate": 1.0743978370187651e-05, + "loss": 0.8063, + "step": 1595 + }, + { + "epoch": 0.4923646459972235, + "grad_norm": 0.0, + "learning_rate": 1.0734010179091798e-05, + "loss": 0.7307, + "step": 1596 + }, + { + "epoch": 0.49267314514885086, + "grad_norm": 0.0, + "learning_rate": 1.072404125464302e-05, + "loss": 0.747, + "step": 1597 + }, + { + "epoch": 0.4929816443004782, + "grad_norm": 0.0, + "learning_rate": 1.0714071606801314e-05, + "loss": 0.7696, + "step": 1598 + }, + { + "epoch": 0.49329014345210553, + "grad_norm": 0.0, + "learning_rate": 1.0704101245527396e-05, + "loss": 0.774, + "step": 1599 + }, + { + "epoch": 0.4935986426037328, + "grad_norm": 0.0, + "learning_rate": 1.0694130180782705e-05, + "loss": 0.7023, + "step": 1600 + }, + { + "epoch": 0.49390714175536016, + "grad_norm": 0.0, + "learning_rate": 1.0684158422529374e-05, + "loss": 0.7351, + "step": 1601 + }, + { + "epoch": 0.4942156409069875, + "grad_norm": 0.0, + "learning_rate": 1.0674185980730234e-05, + "loss": 0.7549, + "step": 1602 + }, + { + "epoch": 0.49452414005861484, + "grad_norm": 0.0, + "learning_rate": 1.0664212865348798e-05, + "loss": 0.7447, + "step": 1603 + }, + { + "epoch": 0.4948326392102422, + "grad_norm": 0.0, + "learning_rate": 1.065423908634925e-05, + "loss": 0.7676, + "step": 1604 + }, + { + "epoch": 0.4951411383618695, + "grad_norm": 0.0, + "learning_rate": 1.064426465369644e-05, + "loss": 0.7771, + "step": 1605 + }, + { + "epoch": 0.49544963751349685, + "grad_norm": 0.0, + "learning_rate": 1.0634289577355864e-05, + "loss": 0.7675, + "step": 1606 + }, + { + "epoch": 0.4957581366651242, + "grad_norm": 0.0, + "learning_rate": 1.062431386729367e-05, + "loss": 0.7554, + "step": 1607 + }, + { + "epoch": 0.4960666358167515, + "grad_norm": 0.0, + "learning_rate": 1.0614337533476635e-05, + "loss": 0.8138, + "step": 1608 + }, + { + "epoch": 0.4963751349683788, + "grad_norm": 0.0, + "learning_rate": 1.0604360585872165e-05, + "loss": 0.7168, + "step": 1609 + }, + { + "epoch": 0.49668363412000616, + "grad_norm": 0.0, + "learning_rate": 1.0594383034448266e-05, + "loss": 0.7353, + "step": 1610 + }, + { + "epoch": 0.4969921332716335, + "grad_norm": 0.0, + "learning_rate": 1.058440488917356e-05, + "loss": 0.7012, + "step": 1611 + }, + { + "epoch": 0.49730063242326084, + "grad_norm": 0.0, + "learning_rate": 1.0574426160017257e-05, + "loss": 0.8099, + "step": 1612 + }, + { + "epoch": 0.4976091315748882, + "grad_norm": 0.0, + "learning_rate": 1.0564446856949146e-05, + "loss": 0.827, + "step": 1613 + }, + { + "epoch": 0.4979176307265155, + "grad_norm": 0.0, + "learning_rate": 1.0554466989939602e-05, + "loss": 0.855, + "step": 1614 + }, + { + "epoch": 0.49822612987814285, + "grad_norm": 0.0, + "learning_rate": 1.0544486568959551e-05, + "loss": 0.7281, + "step": 1615 + }, + { + "epoch": 0.4985346290297702, + "grad_norm": 0.0, + "learning_rate": 1.0534505603980481e-05, + "loss": 0.7121, + "step": 1616 + }, + { + "epoch": 0.4988431281813975, + "grad_norm": 0.0, + "learning_rate": 1.0524524104974414e-05, + "loss": 0.7575, + "step": 1617 + }, + { + "epoch": 0.4991516273330248, + "grad_norm": 0.0, + "learning_rate": 1.0514542081913916e-05, + "loss": 0.8202, + "step": 1618 + }, + { + "epoch": 0.49946012648465216, + "grad_norm": 0.0, + "learning_rate": 1.0504559544772071e-05, + "loss": 0.858, + "step": 1619 + }, + { + "epoch": 0.4997686256362795, + "grad_norm": 0.0, + "learning_rate": 1.0494576503522475e-05, + "loss": 0.7307, + "step": 1620 + }, + { + "epoch": 0.5000771247879068, + "grad_norm": 0.0, + "learning_rate": 1.0484592968139234e-05, + "loss": 0.7756, + "step": 1621 + }, + { + "epoch": 0.5003856239395341, + "grad_norm": 0.0, + "learning_rate": 1.0474608948596943e-05, + "loss": 0.7073, + "step": 1622 + }, + { + "epoch": 0.5006941230911615, + "grad_norm": 0.0, + "learning_rate": 1.0464624454870678e-05, + "loss": 0.7282, + "step": 1623 + }, + { + "epoch": 0.5010026222427888, + "grad_norm": 0.0, + "learning_rate": 1.0454639496935997e-05, + "loss": 0.8389, + "step": 1624 + }, + { + "epoch": 0.5013111213944161, + "grad_norm": 0.0, + "learning_rate": 1.0444654084768915e-05, + "loss": 0.7465, + "step": 1625 + }, + { + "epoch": 0.5016196205460435, + "grad_norm": 0.0, + "learning_rate": 1.0434668228345901e-05, + "loss": 0.8251, + "step": 1626 + }, + { + "epoch": 0.5019281196976708, + "grad_norm": 0.0, + "learning_rate": 1.0424681937643872e-05, + "loss": 0.8191, + "step": 1627 + }, + { + "epoch": 0.5022366188492982, + "grad_norm": 0.0, + "learning_rate": 1.041469522264018e-05, + "loss": 0.8236, + "step": 1628 + }, + { + "epoch": 0.5025451180009255, + "grad_norm": 0.0, + "learning_rate": 1.0404708093312593e-05, + "loss": 0.7842, + "step": 1629 + }, + { + "epoch": 0.5028536171525528, + "grad_norm": 0.0, + "learning_rate": 1.0394720559639295e-05, + "loss": 0.8182, + "step": 1630 + }, + { + "epoch": 0.5031621163041802, + "grad_norm": 0.0, + "learning_rate": 1.0384732631598886e-05, + "loss": 0.7417, + "step": 1631 + }, + { + "epoch": 0.5034706154558075, + "grad_norm": 0.0, + "learning_rate": 1.0374744319170342e-05, + "loss": 0.7373, + "step": 1632 + }, + { + "epoch": 0.5037791146074349, + "grad_norm": 0.0, + "learning_rate": 1.0364755632333034e-05, + "loss": 0.7286, + "step": 1633 + }, + { + "epoch": 0.5040876137590622, + "grad_norm": 0.0, + "learning_rate": 1.0354766581066706e-05, + "loss": 0.8905, + "step": 1634 + }, + { + "epoch": 0.5043961129106895, + "grad_norm": 0.0, + "learning_rate": 1.0344777175351467e-05, + "loss": 0.7573, + "step": 1635 + }, + { + "epoch": 0.5047046120623169, + "grad_norm": 0.0, + "learning_rate": 1.0334787425167772e-05, + "loss": 0.7438, + "step": 1636 + }, + { + "epoch": 0.5050131112139442, + "grad_norm": 0.0, + "learning_rate": 1.0324797340496432e-05, + "loss": 0.7304, + "step": 1637 + }, + { + "epoch": 0.5053216103655715, + "grad_norm": 0.0, + "learning_rate": 1.031480693131858e-05, + "loss": 0.6828, + "step": 1638 + }, + { + "epoch": 0.5056301095171988, + "grad_norm": 0.0, + "learning_rate": 1.0304816207615682e-05, + "loss": 0.762, + "step": 1639 + }, + { + "epoch": 0.5059386086688261, + "grad_norm": 0.0, + "learning_rate": 1.0294825179369518e-05, + "loss": 0.6984, + "step": 1640 + }, + { + "epoch": 0.5062471078204535, + "grad_norm": 0.0, + "learning_rate": 1.0284833856562173e-05, + "loss": 0.677, + "step": 1641 + }, + { + "epoch": 0.5065556069720808, + "grad_norm": 0.0, + "learning_rate": 1.0274842249176016e-05, + "loss": 0.7709, + "step": 1642 + }, + { + "epoch": 0.5068641061237081, + "grad_norm": 0.0, + "learning_rate": 1.0264850367193713e-05, + "loss": 0.748, + "step": 1643 + }, + { + "epoch": 0.5071726052753355, + "grad_norm": 0.0, + "learning_rate": 1.0254858220598194e-05, + "loss": 0.671, + "step": 1644 + }, + { + "epoch": 0.5074811044269628, + "grad_norm": 0.0, + "learning_rate": 1.024486581937266e-05, + "loss": 0.7853, + "step": 1645 + }, + { + "epoch": 0.5077896035785902, + "grad_norm": 0.0, + "learning_rate": 1.0234873173500566e-05, + "loss": 0.7042, + "step": 1646 + }, + { + "epoch": 0.5080981027302175, + "grad_norm": 0.0, + "learning_rate": 1.0224880292965611e-05, + "loss": 0.6942, + "step": 1647 + }, + { + "epoch": 0.5084066018818448, + "grad_norm": 0.0, + "learning_rate": 1.021488718775173e-05, + "loss": 0.7299, + "step": 1648 + }, + { + "epoch": 0.5087151010334722, + "grad_norm": 0.0, + "learning_rate": 1.0204893867843073e-05, + "loss": 0.7374, + "step": 1649 + }, + { + "epoch": 0.5090236001850995, + "grad_norm": 0.0, + "learning_rate": 1.019490034322402e-05, + "loss": 0.7705, + "step": 1650 + }, + { + "epoch": 0.5093320993367269, + "grad_norm": 0.0, + "learning_rate": 1.0184906623879137e-05, + "loss": 0.7399, + "step": 1651 + }, + { + "epoch": 0.5096405984883542, + "grad_norm": 0.0, + "learning_rate": 1.0174912719793202e-05, + "loss": 0.7825, + "step": 1652 + }, + { + "epoch": 0.5099490976399815, + "grad_norm": 0.0, + "learning_rate": 1.0164918640951169e-05, + "loss": 0.7848, + "step": 1653 + }, + { + "epoch": 0.5102575967916089, + "grad_norm": 0.0, + "learning_rate": 1.0154924397338169e-05, + "loss": 0.7622, + "step": 1654 + }, + { + "epoch": 0.5105660959432362, + "grad_norm": 0.0, + "learning_rate": 1.0144929998939491e-05, + "loss": 0.8247, + "step": 1655 + }, + { + "epoch": 0.5108745950948634, + "grad_norm": 0.0, + "learning_rate": 1.0134935455740593e-05, + "loss": 0.7686, + "step": 1656 + }, + { + "epoch": 0.5111830942464908, + "grad_norm": 0.0, + "learning_rate": 1.0124940777727061e-05, + "loss": 0.7389, + "step": 1657 + }, + { + "epoch": 0.5114915933981181, + "grad_norm": 0.0, + "learning_rate": 1.0114945974884627e-05, + "loss": 0.7494, + "step": 1658 + }, + { + "epoch": 0.5118000925497455, + "grad_norm": 0.0, + "learning_rate": 1.0104951057199144e-05, + "loss": 0.82, + "step": 1659 + }, + { + "epoch": 0.5121085917013728, + "grad_norm": 0.0, + "learning_rate": 1.0094956034656582e-05, + "loss": 0.749, + "step": 1660 + }, + { + "epoch": 0.5124170908530001, + "grad_norm": 0.0, + "learning_rate": 1.0084960917243008e-05, + "loss": 0.6975, + "step": 1661 + }, + { + "epoch": 0.5127255900046275, + "grad_norm": 0.0, + "learning_rate": 1.0074965714944593e-05, + "loss": 0.8068, + "step": 1662 + }, + { + "epoch": 0.5130340891562548, + "grad_norm": 0.0, + "learning_rate": 1.0064970437747592e-05, + "loss": 0.6657, + "step": 1663 + }, + { + "epoch": 0.5133425883078822, + "grad_norm": 0.0, + "learning_rate": 1.0054975095638327e-05, + "loss": 0.7297, + "step": 1664 + }, + { + "epoch": 0.5136510874595095, + "grad_norm": 0.0, + "learning_rate": 1.0044979698603195e-05, + "loss": 0.6419, + "step": 1665 + }, + { + "epoch": 0.5139595866111368, + "grad_norm": 0.0, + "learning_rate": 1.0034984256628637e-05, + "loss": 0.8095, + "step": 1666 + }, + { + "epoch": 0.5142680857627642, + "grad_norm": 0.0, + "learning_rate": 1.002498877970115e-05, + "loss": 0.7918, + "step": 1667 + }, + { + "epoch": 0.5145765849143915, + "grad_norm": 0.0, + "learning_rate": 1.001499327780726e-05, + "loss": 0.7891, + "step": 1668 + }, + { + "epoch": 0.5148850840660188, + "grad_norm": 0.0, + "learning_rate": 1.0004997760933517e-05, + "loss": 0.7117, + "step": 1669 + }, + { + "epoch": 0.5151935832176462, + "grad_norm": 0.0, + "learning_rate": 9.995002239066486e-06, + "loss": 0.7741, + "step": 1670 + }, + { + "epoch": 0.5155020823692735, + "grad_norm": 0.0, + "learning_rate": 9.985006722192742e-06, + "loss": 0.8163, + "step": 1671 + }, + { + "epoch": 0.5158105815209009, + "grad_norm": 0.0, + "learning_rate": 9.975011220298853e-06, + "loss": 0.7816, + "step": 1672 + }, + { + "epoch": 0.5161190806725281, + "grad_norm": 0.0, + "learning_rate": 9.965015743371368e-06, + "loss": 0.7656, + "step": 1673 + }, + { + "epoch": 0.5164275798241554, + "grad_norm": 0.0, + "learning_rate": 9.95502030139681e-06, + "loss": 0.8098, + "step": 1674 + }, + { + "epoch": 0.5167360789757828, + "grad_norm": 0.0, + "learning_rate": 9.945024904361675e-06, + "loss": 0.7805, + "step": 1675 + }, + { + "epoch": 0.5170445781274101, + "grad_norm": 0.0, + "learning_rate": 9.935029562252413e-06, + "loss": 0.7974, + "step": 1676 + }, + { + "epoch": 0.5173530772790375, + "grad_norm": 0.0, + "learning_rate": 9.92503428505541e-06, + "loss": 0.7451, + "step": 1677 + }, + { + "epoch": 0.5176615764306648, + "grad_norm": 0.0, + "learning_rate": 9.915039082756995e-06, + "loss": 0.7494, + "step": 1678 + }, + { + "epoch": 0.5179700755822921, + "grad_norm": 0.0, + "learning_rate": 9.905043965343421e-06, + "loss": 0.7199, + "step": 1679 + }, + { + "epoch": 0.5182785747339195, + "grad_norm": 0.0, + "learning_rate": 9.895048942800856e-06, + "loss": 0.7528, + "step": 1680 + }, + { + "epoch": 0.5185870738855468, + "grad_norm": 0.0, + "learning_rate": 9.885054025115378e-06, + "loss": 0.7486, + "step": 1681 + }, + { + "epoch": 0.5188955730371742, + "grad_norm": 0.0, + "learning_rate": 9.875059222272942e-06, + "loss": 0.6966, + "step": 1682 + }, + { + "epoch": 0.5192040721888015, + "grad_norm": 0.0, + "learning_rate": 9.865064544259409e-06, + "loss": 0.6967, + "step": 1683 + }, + { + "epoch": 0.5195125713404288, + "grad_norm": 0.0, + "learning_rate": 9.855070001060508e-06, + "loss": 0.8254, + "step": 1684 + }, + { + "epoch": 0.5198210704920562, + "grad_norm": 0.0, + "learning_rate": 9.845075602661836e-06, + "loss": 0.7459, + "step": 1685 + }, + { + "epoch": 0.5201295696436835, + "grad_norm": 0.0, + "learning_rate": 9.835081359048835e-06, + "loss": 0.7506, + "step": 1686 + }, + { + "epoch": 0.5204380687953108, + "grad_norm": 0.0, + "learning_rate": 9.825087280206801e-06, + "loss": 0.8133, + "step": 1687 + }, + { + "epoch": 0.5207465679469382, + "grad_norm": 0.0, + "learning_rate": 9.815093376120866e-06, + "loss": 0.7707, + "step": 1688 + }, + { + "epoch": 0.5210550670985655, + "grad_norm": 0.0, + "learning_rate": 9.805099656775987e-06, + "loss": 0.7421, + "step": 1689 + }, + { + "epoch": 0.5213635662501929, + "grad_norm": 0.0, + "learning_rate": 9.79510613215693e-06, + "loss": 0.6809, + "step": 1690 + }, + { + "epoch": 0.5216720654018201, + "grad_norm": 0.0, + "learning_rate": 9.785112812248274e-06, + "loss": 0.7263, + "step": 1691 + }, + { + "epoch": 0.5219805645534474, + "grad_norm": 0.0, + "learning_rate": 9.775119707034389e-06, + "loss": 0.7779, + "step": 1692 + }, + { + "epoch": 0.5222890637050748, + "grad_norm": 0.0, + "learning_rate": 9.765126826499437e-06, + "loss": 0.7037, + "step": 1693 + }, + { + "epoch": 0.5225975628567021, + "grad_norm": 0.0, + "learning_rate": 9.755134180627342e-06, + "loss": 0.78, + "step": 1694 + }, + { + "epoch": 0.5229060620083295, + "grad_norm": 0.0, + "learning_rate": 9.74514177940181e-06, + "loss": 0.7053, + "step": 1695 + }, + { + "epoch": 0.5232145611599568, + "grad_norm": 0.0, + "learning_rate": 9.73514963280629e-06, + "loss": 0.851, + "step": 1696 + }, + { + "epoch": 0.5235230603115841, + "grad_norm": 0.0, + "learning_rate": 9.725157750823984e-06, + "loss": 0.7866, + "step": 1697 + }, + { + "epoch": 0.5238315594632115, + "grad_norm": 0.0, + "learning_rate": 9.715166143437832e-06, + "loss": 0.7903, + "step": 1698 + }, + { + "epoch": 0.5241400586148388, + "grad_norm": 0.0, + "learning_rate": 9.705174820630483e-06, + "loss": 0.7754, + "step": 1699 + }, + { + "epoch": 0.5244485577664662, + "grad_norm": 0.0, + "learning_rate": 9.69518379238432e-06, + "loss": 0.7465, + "step": 1700 + }, + { + "epoch": 0.5247570569180935, + "grad_norm": 0.0, + "learning_rate": 9.685193068681423e-06, + "loss": 0.7097, + "step": 1701 + }, + { + "epoch": 0.5250655560697208, + "grad_norm": 0.0, + "learning_rate": 9.675202659503575e-06, + "loss": 0.737, + "step": 1702 + }, + { + "epoch": 0.5253740552213482, + "grad_norm": 0.0, + "learning_rate": 9.665212574832233e-06, + "loss": 0.7015, + "step": 1703 + }, + { + "epoch": 0.5256825543729755, + "grad_norm": 0.0, + "learning_rate": 9.655222824648537e-06, + "loss": 0.7699, + "step": 1704 + }, + { + "epoch": 0.5259910535246028, + "grad_norm": 0.0, + "learning_rate": 9.645233418933296e-06, + "loss": 0.6963, + "step": 1705 + }, + { + "epoch": 0.5262995526762302, + "grad_norm": 0.0, + "learning_rate": 9.63524436766697e-06, + "loss": 0.7432, + "step": 1706 + }, + { + "epoch": 0.5266080518278575, + "grad_norm": 0.0, + "learning_rate": 9.625255680829661e-06, + "loss": 0.7409, + "step": 1707 + }, + { + "epoch": 0.5269165509794848, + "grad_norm": 0.0, + "learning_rate": 9.615267368401118e-06, + "loss": 0.7743, + "step": 1708 + }, + { + "epoch": 0.5272250501311121, + "grad_norm": 0.0, + "learning_rate": 9.605279440360705e-06, + "loss": 0.8109, + "step": 1709 + }, + { + "epoch": 0.5275335492827394, + "grad_norm": 0.0, + "learning_rate": 9.595291906687414e-06, + "loss": 0.7718, + "step": 1710 + }, + { + "epoch": 0.5278420484343668, + "grad_norm": 0.0, + "learning_rate": 9.585304777359826e-06, + "loss": 0.7101, + "step": 1711 + }, + { + "epoch": 0.5281505475859941, + "grad_norm": 0.0, + "learning_rate": 9.57531806235613e-06, + "loss": 0.8157, + "step": 1712 + }, + { + "epoch": 0.5284590467376215, + "grad_norm": 0.0, + "learning_rate": 9.565331771654102e-06, + "loss": 0.7767, + "step": 1713 + }, + { + "epoch": 0.5287675458892488, + "grad_norm": 0.0, + "learning_rate": 9.55534591523109e-06, + "loss": 0.7739, + "step": 1714 + }, + { + "epoch": 0.5290760450408761, + "grad_norm": 0.0, + "learning_rate": 9.545360503064007e-06, + "loss": 0.7932, + "step": 1715 + }, + { + "epoch": 0.5293845441925035, + "grad_norm": 0.0, + "learning_rate": 9.535375545129326e-06, + "loss": 0.7008, + "step": 1716 + }, + { + "epoch": 0.5296930433441308, + "grad_norm": 0.0, + "learning_rate": 9.52539105140306e-06, + "loss": 0.7103, + "step": 1717 + }, + { + "epoch": 0.5300015424957581, + "grad_norm": 0.0, + "learning_rate": 9.515407031860768e-06, + "loss": 0.7694, + "step": 1718 + }, + { + "epoch": 0.5303100416473855, + "grad_norm": 0.0, + "learning_rate": 9.505423496477527e-06, + "loss": 0.6776, + "step": 1719 + }, + { + "epoch": 0.5306185407990128, + "grad_norm": 0.0, + "learning_rate": 9.495440455227932e-06, + "loss": 0.7044, + "step": 1720 + }, + { + "epoch": 0.5309270399506402, + "grad_norm": 0.0, + "learning_rate": 9.485457918086086e-06, + "loss": 0.7518, + "step": 1721 + }, + { + "epoch": 0.5312355391022675, + "grad_norm": 0.0, + "learning_rate": 9.475475895025586e-06, + "loss": 0.7754, + "step": 1722 + }, + { + "epoch": 0.5315440382538948, + "grad_norm": 0.0, + "learning_rate": 9.465494396019524e-06, + "loss": 0.6894, + "step": 1723 + }, + { + "epoch": 0.5318525374055222, + "grad_norm": 0.0, + "learning_rate": 9.455513431040452e-06, + "loss": 0.7407, + "step": 1724 + }, + { + "epoch": 0.5321610365571494, + "grad_norm": 0.0, + "learning_rate": 9.445533010060402e-06, + "loss": 0.7462, + "step": 1725 + }, + { + "epoch": 0.5324695357087768, + "grad_norm": 0.0, + "learning_rate": 9.435553143050856e-06, + "loss": 0.7176, + "step": 1726 + }, + { + "epoch": 0.5327780348604041, + "grad_norm": 0.0, + "learning_rate": 9.42557383998275e-06, + "loss": 0.7529, + "step": 1727 + }, + { + "epoch": 0.5330865340120314, + "grad_norm": 0.0, + "learning_rate": 9.415595110826444e-06, + "loss": 0.7301, + "step": 1728 + }, + { + "epoch": 0.5333950331636588, + "grad_norm": 0.0, + "learning_rate": 9.405616965551738e-06, + "loss": 0.7651, + "step": 1729 + }, + { + "epoch": 0.5337035323152861, + "grad_norm": 0.0, + "learning_rate": 9.395639414127837e-06, + "loss": 0.6992, + "step": 1730 + }, + { + "epoch": 0.5340120314669135, + "grad_norm": 0.0, + "learning_rate": 9.385662466523364e-06, + "loss": 0.7285, + "step": 1731 + }, + { + "epoch": 0.5343205306185408, + "grad_norm": 0.0, + "learning_rate": 9.375686132706333e-06, + "loss": 0.8753, + "step": 1732 + }, + { + "epoch": 0.5346290297701681, + "grad_norm": 0.0, + "learning_rate": 9.36571042264414e-06, + "loss": 0.767, + "step": 1733 + }, + { + "epoch": 0.5349375289217955, + "grad_norm": 0.0, + "learning_rate": 9.355735346303566e-06, + "loss": 0.7476, + "step": 1734 + }, + { + "epoch": 0.5352460280734228, + "grad_norm": 0.0, + "learning_rate": 9.345760913650752e-06, + "loss": 0.8124, + "step": 1735 + }, + { + "epoch": 0.5355545272250501, + "grad_norm": 0.0, + "learning_rate": 9.335787134651207e-06, + "loss": 0.8571, + "step": 1736 + }, + { + "epoch": 0.5358630263766775, + "grad_norm": 0.0, + "learning_rate": 9.32581401926977e-06, + "loss": 0.743, + "step": 1737 + }, + { + "epoch": 0.5361715255283048, + "grad_norm": 0.0, + "learning_rate": 9.31584157747063e-06, + "loss": 0.763, + "step": 1738 + }, + { + "epoch": 0.5364800246799322, + "grad_norm": 0.0, + "learning_rate": 9.305869819217299e-06, + "loss": 0.7776, + "step": 1739 + }, + { + "epoch": 0.5367885238315595, + "grad_norm": 0.0, + "learning_rate": 9.295898754472607e-06, + "loss": 0.7812, + "step": 1740 + }, + { + "epoch": 0.5370970229831868, + "grad_norm": 0.0, + "learning_rate": 9.285928393198691e-06, + "loss": 0.8133, + "step": 1741 + }, + { + "epoch": 0.5374055221348142, + "grad_norm": 0.0, + "learning_rate": 9.275958745356982e-06, + "loss": 0.7735, + "step": 1742 + }, + { + "epoch": 0.5377140212864414, + "grad_norm": 0.0, + "learning_rate": 9.2659898209082e-06, + "loss": 0.7431, + "step": 1743 + }, + { + "epoch": 0.5380225204380688, + "grad_norm": 0.0, + "learning_rate": 9.256021629812349e-06, + "loss": 0.7729, + "step": 1744 + }, + { + "epoch": 0.5383310195896961, + "grad_norm": 0.0, + "learning_rate": 9.24605418202869e-06, + "loss": 0.6907, + "step": 1745 + }, + { + "epoch": 0.5386395187413234, + "grad_norm": 0.0, + "learning_rate": 9.236087487515746e-06, + "loss": 0.7184, + "step": 1746 + }, + { + "epoch": 0.5389480178929508, + "grad_norm": 0.0, + "learning_rate": 9.226121556231287e-06, + "loss": 0.6686, + "step": 1747 + }, + { + "epoch": 0.5392565170445781, + "grad_norm": 0.0, + "learning_rate": 9.21615639813232e-06, + "loss": 0.7839, + "step": 1748 + }, + { + "epoch": 0.5395650161962054, + "grad_norm": 0.0, + "learning_rate": 9.206192023175086e-06, + "loss": 0.776, + "step": 1749 + }, + { + "epoch": 0.5398735153478328, + "grad_norm": 0.0, + "learning_rate": 9.196228441315028e-06, + "loss": 0.7371, + "step": 1750 + }, + { + "epoch": 0.5401820144994601, + "grad_norm": 0.0, + "learning_rate": 9.18626566250681e-06, + "loss": 0.6791, + "step": 1751 + }, + { + "epoch": 0.5404905136510875, + "grad_norm": 0.0, + "learning_rate": 9.176303696704288e-06, + "loss": 0.7811, + "step": 1752 + }, + { + "epoch": 0.5407990128027148, + "grad_norm": 0.0, + "learning_rate": 9.166342553860518e-06, + "loss": 0.7032, + "step": 1753 + }, + { + "epoch": 0.5411075119543421, + "grad_norm": 0.0, + "learning_rate": 9.15638224392771e-06, + "loss": 0.7617, + "step": 1754 + }, + { + "epoch": 0.5414160111059695, + "grad_norm": 0.0, + "learning_rate": 9.146422776857262e-06, + "loss": 0.6893, + "step": 1755 + }, + { + "epoch": 0.5417245102575968, + "grad_norm": 0.0, + "learning_rate": 9.13646416259972e-06, + "loss": 0.7536, + "step": 1756 + }, + { + "epoch": 0.5420330094092242, + "grad_norm": 0.0, + "learning_rate": 9.126506411104786e-06, + "loss": 0.756, + "step": 1757 + }, + { + "epoch": 0.5423415085608515, + "grad_norm": 0.0, + "learning_rate": 9.11654953232129e-06, + "loss": 0.81, + "step": 1758 + }, + { + "epoch": 0.5426500077124788, + "grad_norm": 0.0, + "learning_rate": 9.106593536197198e-06, + "loss": 0.7235, + "step": 1759 + }, + { + "epoch": 0.5429585068641061, + "grad_norm": 0.0, + "learning_rate": 9.09663843267959e-06, + "loss": 1.0373, + "step": 1760 + }, + { + "epoch": 0.5432670060157334, + "grad_norm": 0.0, + "learning_rate": 9.086684231714655e-06, + "loss": 0.751, + "step": 1761 + }, + { + "epoch": 0.5435755051673608, + "grad_norm": 0.0, + "learning_rate": 9.076730943247687e-06, + "loss": 0.7119, + "step": 1762 + }, + { + "epoch": 0.5438840043189881, + "grad_norm": 0.0, + "learning_rate": 9.06677857722305e-06, + "loss": 0.7603, + "step": 1763 + }, + { + "epoch": 0.5441925034706154, + "grad_norm": 0.0, + "learning_rate": 9.056827143584206e-06, + "loss": 0.6543, + "step": 1764 + }, + { + "epoch": 0.5445010026222428, + "grad_norm": 0.0, + "learning_rate": 9.046876652273677e-06, + "loss": 0.7651, + "step": 1765 + }, + { + "epoch": 0.5448095017738701, + "grad_norm": 0.0, + "learning_rate": 9.036927113233049e-06, + "loss": 0.8314, + "step": 1766 + }, + { + "epoch": 0.5451180009254974, + "grad_norm": 0.0, + "learning_rate": 9.026978536402943e-06, + "loss": 0.7353, + "step": 1767 + }, + { + "epoch": 0.5454265000771248, + "grad_norm": 0.0, + "learning_rate": 9.01703093172303e-06, + "loss": 0.7053, + "step": 1768 + }, + { + "epoch": 0.5457349992287521, + "grad_norm": 0.0, + "learning_rate": 9.007084309132013e-06, + "loss": 0.7845, + "step": 1769 + }, + { + "epoch": 0.5460434983803795, + "grad_norm": 0.0, + "learning_rate": 8.997138678567602e-06, + "loss": 0.778, + "step": 1770 + }, + { + "epoch": 0.5463519975320068, + "grad_norm": 0.0, + "learning_rate": 8.987194049966526e-06, + "loss": 0.7123, + "step": 1771 + }, + { + "epoch": 0.5466604966836341, + "grad_norm": 0.0, + "learning_rate": 8.977250433264504e-06, + "loss": 0.7844, + "step": 1772 + }, + { + "epoch": 0.5469689958352615, + "grad_norm": 0.0, + "learning_rate": 8.967307838396256e-06, + "loss": 0.7202, + "step": 1773 + }, + { + "epoch": 0.5472774949868888, + "grad_norm": 0.0, + "learning_rate": 8.957366275295471e-06, + "loss": 0.7484, + "step": 1774 + }, + { + "epoch": 0.5475859941385162, + "grad_norm": 0.0, + "learning_rate": 8.947425753894805e-06, + "loss": 0.77, + "step": 1775 + }, + { + "epoch": 0.5478944932901435, + "grad_norm": 0.0, + "learning_rate": 8.937486284125883e-06, + "loss": 0.8058, + "step": 1776 + }, + { + "epoch": 0.5482029924417707, + "grad_norm": 0.0, + "learning_rate": 8.927547875919275e-06, + "loss": 0.8029, + "step": 1777 + }, + { + "epoch": 0.5485114915933981, + "grad_norm": 0.0, + "learning_rate": 8.91761053920449e-06, + "loss": 0.7281, + "step": 1778 + }, + { + "epoch": 0.5488199907450254, + "grad_norm": 0.0, + "learning_rate": 8.90767428390997e-06, + "loss": 1.0318, + "step": 1779 + }, + { + "epoch": 0.5491284898966527, + "grad_norm": 0.0, + "learning_rate": 8.897739119963065e-06, + "loss": 0.8065, + "step": 1780 + }, + { + "epoch": 0.5494369890482801, + "grad_norm": 0.0, + "learning_rate": 8.887805057290048e-06, + "loss": 0.7575, + "step": 1781 + }, + { + "epoch": 0.5497454881999074, + "grad_norm": 0.0, + "learning_rate": 8.877872105816082e-06, + "loss": 0.7086, + "step": 1782 + }, + { + "epoch": 0.5500539873515348, + "grad_norm": 0.0, + "learning_rate": 8.867940275465233e-06, + "loss": 0.7722, + "step": 1783 + }, + { + "epoch": 0.5503624865031621, + "grad_norm": 0.0, + "learning_rate": 8.85800957616043e-06, + "loss": 0.7618, + "step": 1784 + }, + { + "epoch": 0.5506709856547894, + "grad_norm": 0.0, + "learning_rate": 8.848080017823482e-06, + "loss": 0.7556, + "step": 1785 + }, + { + "epoch": 0.5509794848064168, + "grad_norm": 0.0, + "learning_rate": 8.83815161037506e-06, + "loss": 0.8066, + "step": 1786 + }, + { + "epoch": 0.5512879839580441, + "grad_norm": 0.0, + "learning_rate": 8.828224363734679e-06, + "loss": 0.791, + "step": 1787 + }, + { + "epoch": 0.5515964831096715, + "grad_norm": 0.0, + "learning_rate": 8.81829828782069e-06, + "loss": 0.778, + "step": 1788 + }, + { + "epoch": 0.5519049822612988, + "grad_norm": 0.0, + "learning_rate": 8.80837339255029e-06, + "loss": 0.676, + "step": 1789 + }, + { + "epoch": 0.5522134814129261, + "grad_norm": 0.0, + "learning_rate": 8.798449687839482e-06, + "loss": 0.7408, + "step": 1790 + }, + { + "epoch": 0.5525219805645535, + "grad_norm": 0.0, + "learning_rate": 8.788527183603093e-06, + "loss": 0.7563, + "step": 1791 + }, + { + "epoch": 0.5528304797161808, + "grad_norm": 0.0, + "learning_rate": 8.77860588975473e-06, + "loss": 0.7402, + "step": 1792 + }, + { + "epoch": 0.5531389788678082, + "grad_norm": 0.0, + "learning_rate": 8.768685816206811e-06, + "loss": 0.7686, + "step": 1793 + }, + { + "epoch": 0.5534474780194355, + "grad_norm": 0.0, + "learning_rate": 8.758766972870525e-06, + "loss": 0.7298, + "step": 1794 + }, + { + "epoch": 0.5537559771710627, + "grad_norm": 0.0, + "learning_rate": 8.748849369655833e-06, + "loss": 0.7227, + "step": 1795 + }, + { + "epoch": 0.5540644763226901, + "grad_norm": 0.0, + "learning_rate": 8.738933016471461e-06, + "loss": 0.802, + "step": 1796 + }, + { + "epoch": 0.5543729754743174, + "grad_norm": 0.0, + "learning_rate": 8.729017923224878e-06, + "loss": 0.7057, + "step": 1797 + }, + { + "epoch": 0.5546814746259447, + "grad_norm": 0.0, + "learning_rate": 8.7191040998223e-06, + "loss": 0.708, + "step": 1798 + }, + { + "epoch": 0.5549899737775721, + "grad_norm": 0.0, + "learning_rate": 8.709191556168675e-06, + "loss": 0.6959, + "step": 1799 + }, + { + "epoch": 0.5552984729291994, + "grad_norm": 0.0, + "learning_rate": 8.699280302167674e-06, + "loss": 0.7177, + "step": 1800 + }, + { + "epoch": 0.5556069720808268, + "grad_norm": 0.0, + "learning_rate": 8.689370347721668e-06, + "loss": 0.731, + "step": 1801 + }, + { + "epoch": 0.5559154712324541, + "grad_norm": 0.0, + "learning_rate": 8.679461702731746e-06, + "loss": 0.8371, + "step": 1802 + }, + { + "epoch": 0.5562239703840814, + "grad_norm": 0.0, + "learning_rate": 8.669554377097674e-06, + "loss": 0.8826, + "step": 1803 + }, + { + "epoch": 0.5565324695357088, + "grad_norm": 0.0, + "learning_rate": 8.659648380717914e-06, + "loss": 0.6772, + "step": 1804 + }, + { + "epoch": 0.5568409686873361, + "grad_norm": 0.0, + "learning_rate": 8.649743723489582e-06, + "loss": 0.6543, + "step": 1805 + }, + { + "epoch": 0.5571494678389635, + "grad_norm": 0.0, + "learning_rate": 8.639840415308475e-06, + "loss": 0.7435, + "step": 1806 + }, + { + "epoch": 0.5574579669905908, + "grad_norm": 0.0, + "learning_rate": 8.629938466069028e-06, + "loss": 0.6827, + "step": 1807 + }, + { + "epoch": 0.5577664661422181, + "grad_norm": 0.0, + "learning_rate": 8.62003788566433e-06, + "loss": 0.7311, + "step": 1808 + }, + { + "epoch": 0.5580749652938455, + "grad_norm": 0.0, + "learning_rate": 8.610138683986088e-06, + "loss": 0.762, + "step": 1809 + }, + { + "epoch": 0.5583834644454728, + "grad_norm": 0.0, + "learning_rate": 8.600240870924645e-06, + "loss": 0.7338, + "step": 1810 + }, + { + "epoch": 0.5586919635971002, + "grad_norm": 0.0, + "learning_rate": 8.59034445636895e-06, + "loss": 0.7335, + "step": 1811 + }, + { + "epoch": 0.5590004627487274, + "grad_norm": 0.0, + "learning_rate": 8.580449450206555e-06, + "loss": 0.765, + "step": 1812 + }, + { + "epoch": 0.5593089619003547, + "grad_norm": 0.0, + "learning_rate": 8.570555862323612e-06, + "loss": 0.7169, + "step": 1813 + }, + { + "epoch": 0.5596174610519821, + "grad_norm": 0.0, + "learning_rate": 8.560663702604844e-06, + "loss": 0.7687, + "step": 1814 + }, + { + "epoch": 0.5599259602036094, + "grad_norm": 0.0, + "learning_rate": 8.55077298093355e-06, + "loss": 0.6824, + "step": 1815 + }, + { + "epoch": 0.5602344593552367, + "grad_norm": 0.0, + "learning_rate": 8.540883707191602e-06, + "loss": 0.7119, + "step": 1816 + }, + { + "epoch": 0.5605429585068641, + "grad_norm": 0.0, + "learning_rate": 8.53099589125942e-06, + "loss": 0.7964, + "step": 1817 + }, + { + "epoch": 0.5608514576584914, + "grad_norm": 0.0, + "learning_rate": 8.521109543015958e-06, + "loss": 0.7385, + "step": 1818 + }, + { + "epoch": 0.5611599568101188, + "grad_norm": 0.0, + "learning_rate": 8.511224672338715e-06, + "loss": 0.7068, + "step": 1819 + }, + { + "epoch": 0.5614684559617461, + "grad_norm": 0.0, + "learning_rate": 8.501341289103712e-06, + "loss": 1.0249, + "step": 1820 + }, + { + "epoch": 0.5617769551133734, + "grad_norm": 0.0, + "learning_rate": 8.491459403185485e-06, + "loss": 0.7633, + "step": 1821 + }, + { + "epoch": 0.5620854542650008, + "grad_norm": 0.0, + "learning_rate": 8.481579024457066e-06, + "loss": 0.6486, + "step": 1822 + }, + { + "epoch": 0.5623939534166281, + "grad_norm": 0.0, + "learning_rate": 8.471700162789989e-06, + "loss": 0.7773, + "step": 1823 + }, + { + "epoch": 0.5627024525682555, + "grad_norm": 0.0, + "learning_rate": 8.461822828054269e-06, + "loss": 0.7, + "step": 1824 + }, + { + "epoch": 0.5630109517198828, + "grad_norm": 0.0, + "learning_rate": 8.451947030118397e-06, + "loss": 0.7306, + "step": 1825 + }, + { + "epoch": 0.5633194508715101, + "grad_norm": 0.0, + "learning_rate": 8.442072778849328e-06, + "loss": 0.7438, + "step": 1826 + }, + { + "epoch": 0.5636279500231375, + "grad_norm": 0.0, + "learning_rate": 8.432200084112473e-06, + "loss": 0.6776, + "step": 1827 + }, + { + "epoch": 0.5639364491747648, + "grad_norm": 0.0, + "learning_rate": 8.422328955771683e-06, + "loss": 0.7962, + "step": 1828 + }, + { + "epoch": 0.564244948326392, + "grad_norm": 0.0, + "learning_rate": 8.412459403689249e-06, + "loss": 0.7924, + "step": 1829 + }, + { + "epoch": 0.5645534474780194, + "grad_norm": 0.0, + "learning_rate": 8.40259143772589e-06, + "loss": 0.7442, + "step": 1830 + }, + { + "epoch": 0.5648619466296467, + "grad_norm": 0.0, + "learning_rate": 8.39272506774073e-06, + "loss": 0.6769, + "step": 1831 + }, + { + "epoch": 0.5651704457812741, + "grad_norm": 0.0, + "learning_rate": 8.382860303591306e-06, + "loss": 0.7485, + "step": 1832 + }, + { + "epoch": 0.5654789449329014, + "grad_norm": 0.0, + "learning_rate": 8.372997155133548e-06, + "loss": 0.7878, + "step": 1833 + }, + { + "epoch": 0.5657874440845287, + "grad_norm": 0.0, + "learning_rate": 8.363135632221777e-06, + "loss": 0.8033, + "step": 1834 + }, + { + "epoch": 0.5660959432361561, + "grad_norm": 0.0, + "learning_rate": 8.35327574470868e-06, + "loss": 0.6677, + "step": 1835 + }, + { + "epoch": 0.5664044423877834, + "grad_norm": 0.0, + "learning_rate": 8.34341750244532e-06, + "loss": 0.7917, + "step": 1836 + }, + { + "epoch": 0.5667129415394108, + "grad_norm": 0.0, + "learning_rate": 8.333560915281109e-06, + "loss": 0.7731, + "step": 1837 + }, + { + "epoch": 0.5670214406910381, + "grad_norm": 0.0, + "learning_rate": 8.323705993063813e-06, + "loss": 0.7557, + "step": 1838 + }, + { + "epoch": 0.5673299398426654, + "grad_norm": 0.0, + "learning_rate": 8.313852745639523e-06, + "loss": 0.761, + "step": 1839 + }, + { + "epoch": 0.5676384389942928, + "grad_norm": 0.0, + "learning_rate": 8.304001182852668e-06, + "loss": 0.7892, + "step": 1840 + }, + { + "epoch": 0.5679469381459201, + "grad_norm": 0.0, + "learning_rate": 8.294151314545988e-06, + "loss": 0.7484, + "step": 1841 + }, + { + "epoch": 0.5682554372975475, + "grad_norm": 0.0, + "learning_rate": 8.284303150560538e-06, + "loss": 0.7573, + "step": 1842 + }, + { + "epoch": 0.5685639364491748, + "grad_norm": 0.0, + "learning_rate": 8.274456700735653e-06, + "loss": 0.6972, + "step": 1843 + }, + { + "epoch": 0.5688724356008021, + "grad_norm": 0.0, + "learning_rate": 8.26461197490897e-06, + "loss": 0.7, + "step": 1844 + }, + { + "epoch": 0.5691809347524295, + "grad_norm": 0.0, + "learning_rate": 8.2547689829164e-06, + "loss": 0.7058, + "step": 1845 + }, + { + "epoch": 0.5694894339040568, + "grad_norm": 0.0, + "learning_rate": 8.24492773459212e-06, + "loss": 0.7191, + "step": 1846 + }, + { + "epoch": 0.569797933055684, + "grad_norm": 0.0, + "learning_rate": 8.235088239768577e-06, + "loss": 0.7849, + "step": 1847 + }, + { + "epoch": 0.5701064322073114, + "grad_norm": 0.0, + "learning_rate": 8.225250508276439e-06, + "loss": 0.7633, + "step": 1848 + }, + { + "epoch": 0.5704149313589387, + "grad_norm": 0.0, + "learning_rate": 8.215414549944636e-06, + "loss": 0.6931, + "step": 1849 + }, + { + "epoch": 0.5707234305105661, + "grad_norm": 0.0, + "learning_rate": 8.20558037460032e-06, + "loss": 0.6946, + "step": 1850 + }, + { + "epoch": 0.5710319296621934, + "grad_norm": 0.0, + "learning_rate": 8.19574799206886e-06, + "loss": 0.6747, + "step": 1851 + }, + { + "epoch": 0.5713404288138207, + "grad_norm": 0.0, + "learning_rate": 8.185917412173832e-06, + "loss": 0.7037, + "step": 1852 + }, + { + "epoch": 0.5716489279654481, + "grad_norm": 0.0, + "learning_rate": 8.176088644737015e-06, + "loss": 0.7733, + "step": 1853 + }, + { + "epoch": 0.5719574271170754, + "grad_norm": 0.0, + "learning_rate": 8.166261699578375e-06, + "loss": 0.7072, + "step": 1854 + }, + { + "epoch": 0.5722659262687028, + "grad_norm": 0.0, + "learning_rate": 8.156436586516064e-06, + "loss": 0.7545, + "step": 1855 + }, + { + "epoch": 0.5725744254203301, + "grad_norm": 0.0, + "learning_rate": 8.146613315366387e-06, + "loss": 0.7408, + "step": 1856 + }, + { + "epoch": 0.5728829245719574, + "grad_norm": 0.0, + "learning_rate": 8.136791895943825e-06, + "loss": 0.7409, + "step": 1857 + }, + { + "epoch": 0.5731914237235848, + "grad_norm": 0.0, + "learning_rate": 8.126972338060997e-06, + "loss": 0.7324, + "step": 1858 + }, + { + "epoch": 0.5734999228752121, + "grad_norm": 0.0, + "learning_rate": 8.117154651528676e-06, + "loss": 0.8158, + "step": 1859 + }, + { + "epoch": 0.5738084220268395, + "grad_norm": 0.0, + "learning_rate": 8.107338846155762e-06, + "loss": 0.7496, + "step": 1860 + }, + { + "epoch": 0.5741169211784668, + "grad_norm": 0.0, + "learning_rate": 8.097524931749256e-06, + "loss": 0.7526, + "step": 1861 + }, + { + "epoch": 0.5744254203300941, + "grad_norm": 0.0, + "learning_rate": 8.087712918114294e-06, + "loss": 0.7048, + "step": 1862 + }, + { + "epoch": 0.5747339194817215, + "grad_norm": 0.0, + "learning_rate": 8.077902815054102e-06, + "loss": 0.7347, + "step": 1863 + }, + { + "epoch": 0.5750424186333487, + "grad_norm": 0.0, + "learning_rate": 8.06809463237e-06, + "loss": 1.0299, + "step": 1864 + }, + { + "epoch": 0.575350917784976, + "grad_norm": 0.0, + "learning_rate": 8.058288379861387e-06, + "loss": 0.7647, + "step": 1865 + }, + { + "epoch": 0.5756594169366034, + "grad_norm": 0.0, + "learning_rate": 8.048484067325735e-06, + "loss": 0.7311, + "step": 1866 + }, + { + "epoch": 0.5759679160882307, + "grad_norm": 0.0, + "learning_rate": 8.038681704558578e-06, + "loss": 0.6969, + "step": 1867 + }, + { + "epoch": 0.5762764152398581, + "grad_norm": 0.0, + "learning_rate": 8.028881301353503e-06, + "loss": 0.7247, + "step": 1868 + }, + { + "epoch": 0.5765849143914854, + "grad_norm": 0.0, + "learning_rate": 8.019082867502132e-06, + "loss": 0.713, + "step": 1869 + }, + { + "epoch": 0.5768934135431127, + "grad_norm": 0.0, + "learning_rate": 8.009286412794126e-06, + "loss": 0.7307, + "step": 1870 + }, + { + "epoch": 0.5772019126947401, + "grad_norm": 0.0, + "learning_rate": 7.999491947017174e-06, + "loss": 0.6996, + "step": 1871 + }, + { + "epoch": 0.5775104118463674, + "grad_norm": 0.0, + "learning_rate": 7.989699479956972e-06, + "loss": 0.7088, + "step": 1872 + }, + { + "epoch": 0.5778189109979948, + "grad_norm": 0.0, + "learning_rate": 7.97990902139721e-06, + "loss": 0.7401, + "step": 1873 + }, + { + "epoch": 0.5781274101496221, + "grad_norm": 0.0, + "learning_rate": 7.970120581119584e-06, + "loss": 0.7832, + "step": 1874 + }, + { + "epoch": 0.5784359093012494, + "grad_norm": 0.0, + "learning_rate": 7.960334168903769e-06, + "loss": 0.6926, + "step": 1875 + }, + { + "epoch": 0.5787444084528768, + "grad_norm": 0.0, + "learning_rate": 7.950549794527418e-06, + "loss": 0.7896, + "step": 1876 + }, + { + "epoch": 0.5790529076045041, + "grad_norm": 0.0, + "learning_rate": 7.940767467766142e-06, + "loss": 0.7093, + "step": 1877 + }, + { + "epoch": 0.5793614067561315, + "grad_norm": 0.0, + "learning_rate": 7.930987198393506e-06, + "loss": 0.7789, + "step": 1878 + }, + { + "epoch": 0.5796699059077588, + "grad_norm": 0.0, + "learning_rate": 7.921208996181022e-06, + "loss": 0.7411, + "step": 1879 + }, + { + "epoch": 0.5799784050593861, + "grad_norm": 0.0, + "learning_rate": 7.911432870898139e-06, + "loss": 0.7569, + "step": 1880 + }, + { + "epoch": 0.5802869042110134, + "grad_norm": 0.0, + "learning_rate": 7.901658832312234e-06, + "loss": 0.6853, + "step": 1881 + }, + { + "epoch": 0.5805954033626407, + "grad_norm": 0.0, + "learning_rate": 7.891886890188578e-06, + "loss": 0.8369, + "step": 1882 + }, + { + "epoch": 0.580903902514268, + "grad_norm": 0.0, + "learning_rate": 7.882117054290375e-06, + "loss": 0.804, + "step": 1883 + }, + { + "epoch": 0.5812124016658954, + "grad_norm": 0.0, + "learning_rate": 7.872349334378712e-06, + "loss": 0.7166, + "step": 1884 + }, + { + "epoch": 0.5815209008175227, + "grad_norm": 0.0, + "learning_rate": 7.862583740212564e-06, + "loss": 0.8346, + "step": 1885 + }, + { + "epoch": 0.5818293999691501, + "grad_norm": 0.0, + "learning_rate": 7.852820281548773e-06, + "loss": 0.6754, + "step": 1886 + }, + { + "epoch": 0.5821378991207774, + "grad_norm": 0.0, + "learning_rate": 7.84305896814206e-06, + "loss": 0.727, + "step": 1887 + }, + { + "epoch": 0.5824463982724047, + "grad_norm": 0.0, + "learning_rate": 7.833299809744997e-06, + "loss": 0.7691, + "step": 1888 + }, + { + "epoch": 0.5827548974240321, + "grad_norm": 0.0, + "learning_rate": 7.823542816108007e-06, + "loss": 0.7847, + "step": 1889 + }, + { + "epoch": 0.5830633965756594, + "grad_norm": 0.0, + "learning_rate": 7.813787996979343e-06, + "loss": 0.7657, + "step": 1890 + }, + { + "epoch": 0.5833718957272868, + "grad_norm": 0.0, + "learning_rate": 7.804035362105092e-06, + "loss": 0.7125, + "step": 1891 + }, + { + "epoch": 0.5836803948789141, + "grad_norm": 0.0, + "learning_rate": 7.794284921229151e-06, + "loss": 0.7207, + "step": 1892 + }, + { + "epoch": 0.5839888940305414, + "grad_norm": 0.0, + "learning_rate": 7.784536684093237e-06, + "loss": 0.7243, + "step": 1893 + }, + { + "epoch": 0.5842973931821688, + "grad_norm": 0.0, + "learning_rate": 7.774790660436857e-06, + "loss": 0.6716, + "step": 1894 + }, + { + "epoch": 0.5846058923337961, + "grad_norm": 0.0, + "learning_rate": 7.765046859997303e-06, + "loss": 0.7218, + "step": 1895 + }, + { + "epoch": 0.5849143914854235, + "grad_norm": 0.0, + "learning_rate": 7.755305292509656e-06, + "loss": 0.7652, + "step": 1896 + }, + { + "epoch": 0.5852228906370508, + "grad_norm": 0.0, + "learning_rate": 7.745565967706757e-06, + "loss": 0.7331, + "step": 1897 + }, + { + "epoch": 0.5855313897886781, + "grad_norm": 0.0, + "learning_rate": 7.735828895319215e-06, + "loss": 0.7257, + "step": 1898 + }, + { + "epoch": 0.5858398889403054, + "grad_norm": 0.0, + "learning_rate": 7.726094085075377e-06, + "loss": 0.7037, + "step": 1899 + }, + { + "epoch": 0.5861483880919327, + "grad_norm": 0.0, + "learning_rate": 7.716361546701337e-06, + "loss": 0.7186, + "step": 1900 + }, + { + "epoch": 0.58645688724356, + "grad_norm": 0.0, + "learning_rate": 7.706631289920923e-06, + "loss": 0.7586, + "step": 1901 + }, + { + "epoch": 0.5867653863951874, + "grad_norm": 0.0, + "learning_rate": 7.696903324455678e-06, + "loss": 0.7501, + "step": 1902 + }, + { + "epoch": 0.5870738855468147, + "grad_norm": 0.0, + "learning_rate": 7.687177660024854e-06, + "loss": 0.7695, + "step": 1903 + }, + { + "epoch": 0.5873823846984421, + "grad_norm": 0.0, + "learning_rate": 7.677454306345408e-06, + "loss": 0.7259, + "step": 1904 + }, + { + "epoch": 0.5876908838500694, + "grad_norm": 0.0, + "learning_rate": 7.667733273131989e-06, + "loss": 0.7603, + "step": 1905 + }, + { + "epoch": 0.5879993830016967, + "grad_norm": 0.0, + "learning_rate": 7.658014570096926e-06, + "loss": 0.6914, + "step": 1906 + }, + { + "epoch": 0.5883078821533241, + "grad_norm": 0.0, + "learning_rate": 7.648298206950216e-06, + "loss": 0.7278, + "step": 1907 + }, + { + "epoch": 0.5886163813049514, + "grad_norm": 0.0, + "learning_rate": 7.638584193399524e-06, + "loss": 0.7888, + "step": 1908 + }, + { + "epoch": 0.5889248804565788, + "grad_norm": 0.0, + "learning_rate": 7.628872539150165e-06, + "loss": 0.7208, + "step": 1909 + }, + { + "epoch": 0.5892333796082061, + "grad_norm": 0.0, + "learning_rate": 7.619163253905097e-06, + "loss": 0.6926, + "step": 1910 + }, + { + "epoch": 0.5895418787598334, + "grad_norm": 0.0, + "learning_rate": 7.609456347364919e-06, + "loss": 0.7166, + "step": 1911 + }, + { + "epoch": 0.5898503779114608, + "grad_norm": 0.0, + "learning_rate": 7.599751829227832e-06, + "loss": 0.7261, + "step": 1912 + }, + { + "epoch": 0.5901588770630881, + "grad_norm": 0.0, + "learning_rate": 7.590049709189671e-06, + "loss": 0.6931, + "step": 1913 + }, + { + "epoch": 0.5904673762147155, + "grad_norm": 0.0, + "learning_rate": 7.580349996943868e-06, + "loss": 0.6828, + "step": 1914 + }, + { + "epoch": 0.5907758753663428, + "grad_norm": 0.0, + "learning_rate": 7.570652702181454e-06, + "loss": 0.727, + "step": 1915 + }, + { + "epoch": 0.59108437451797, + "grad_norm": 0.0, + "learning_rate": 7.560957834591034e-06, + "loss": 0.7789, + "step": 1916 + }, + { + "epoch": 0.5913928736695974, + "grad_norm": 0.0, + "learning_rate": 7.551265403858797e-06, + "loss": 0.7021, + "step": 1917 + }, + { + "epoch": 0.5917013728212247, + "grad_norm": 0.0, + "learning_rate": 7.541575419668497e-06, + "loss": 0.6739, + "step": 1918 + }, + { + "epoch": 0.592009871972852, + "grad_norm": 0.0, + "learning_rate": 7.531887891701441e-06, + "loss": 0.8115, + "step": 1919 + }, + { + "epoch": 0.5923183711244794, + "grad_norm": 0.0, + "learning_rate": 7.522202829636478e-06, + "loss": 0.7618, + "step": 1920 + }, + { + "epoch": 0.5926268702761067, + "grad_norm": 0.0, + "learning_rate": 7.512520243150003e-06, + "loss": 0.7356, + "step": 1921 + }, + { + "epoch": 0.5929353694277341, + "grad_norm": 0.0, + "learning_rate": 7.50284014191593e-06, + "loss": 0.7187, + "step": 1922 + }, + { + "epoch": 0.5932438685793614, + "grad_norm": 0.0, + "learning_rate": 7.493162535605698e-06, + "loss": 0.7121, + "step": 1923 + }, + { + "epoch": 0.5935523677309887, + "grad_norm": 0.0, + "learning_rate": 7.483487433888238e-06, + "loss": 0.7162, + "step": 1924 + }, + { + "epoch": 0.5938608668826161, + "grad_norm": 0.0, + "learning_rate": 7.473814846429993e-06, + "loss": 0.6884, + "step": 1925 + }, + { + "epoch": 0.5941693660342434, + "grad_norm": 0.0, + "learning_rate": 7.46414478289489e-06, + "loss": 0.7447, + "step": 1926 + }, + { + "epoch": 0.5944778651858708, + "grad_norm": 0.0, + "learning_rate": 7.4544772529443295e-06, + "loss": 0.7128, + "step": 1927 + }, + { + "epoch": 0.5947863643374981, + "grad_norm": 0.0, + "learning_rate": 7.444812266237198e-06, + "loss": 0.665, + "step": 1928 + }, + { + "epoch": 0.5950948634891254, + "grad_norm": 0.0, + "learning_rate": 7.435149832429812e-06, + "loss": 0.7802, + "step": 1929 + }, + { + "epoch": 0.5954033626407528, + "grad_norm": 0.0, + "learning_rate": 7.4254899611759616e-06, + "loss": 1.0484, + "step": 1930 + }, + { + "epoch": 0.5957118617923801, + "grad_norm": 0.0, + "learning_rate": 7.415832662126865e-06, + "loss": 0.7477, + "step": 1931 + }, + { + "epoch": 0.5960203609440075, + "grad_norm": 0.0, + "learning_rate": 7.406177944931179e-06, + "loss": 0.6942, + "step": 1932 + }, + { + "epoch": 0.5963288600956347, + "grad_norm": 0.0, + "learning_rate": 7.396525819234969e-06, + "loss": 0.7013, + "step": 1933 + }, + { + "epoch": 0.596637359247262, + "grad_norm": 0.0, + "learning_rate": 7.386876294681722e-06, + "loss": 0.7359, + "step": 1934 + }, + { + "epoch": 0.5969458583988894, + "grad_norm": 0.0, + "learning_rate": 7.377229380912321e-06, + "loss": 0.7121, + "step": 1935 + }, + { + "epoch": 0.5972543575505167, + "grad_norm": 0.0, + "learning_rate": 7.367585087565046e-06, + "loss": 0.7173, + "step": 1936 + }, + { + "epoch": 0.597562856702144, + "grad_norm": 0.0, + "learning_rate": 7.357943424275547e-06, + "loss": 0.7273, + "step": 1937 + }, + { + "epoch": 0.5978713558537714, + "grad_norm": 0.0, + "learning_rate": 7.348304400676856e-06, + "loss": 0.7294, + "step": 1938 + }, + { + "epoch": 0.5981798550053987, + "grad_norm": 0.0, + "learning_rate": 7.338668026399365e-06, + "loss": 0.7416, + "step": 1939 + }, + { + "epoch": 0.5984883541570261, + "grad_norm": 0.0, + "learning_rate": 7.329034311070828e-06, + "loss": 0.8225, + "step": 1940 + }, + { + "epoch": 0.5987968533086534, + "grad_norm": 0.0, + "learning_rate": 7.319403264316325e-06, + "loss": 0.7122, + "step": 1941 + }, + { + "epoch": 0.5991053524602807, + "grad_norm": 0.0, + "learning_rate": 7.30977489575828e-06, + "loss": 0.6605, + "step": 1942 + }, + { + "epoch": 0.5994138516119081, + "grad_norm": 0.0, + "learning_rate": 7.300149215016442e-06, + "loss": 0.7746, + "step": 1943 + }, + { + "epoch": 0.5997223507635354, + "grad_norm": 0.0, + "learning_rate": 7.290526231707873e-06, + "loss": 0.6528, + "step": 1944 + }, + { + "epoch": 0.6000308499151628, + "grad_norm": 0.0, + "learning_rate": 7.28090595544694e-06, + "loss": 0.7022, + "step": 1945 + }, + { + "epoch": 0.6003393490667901, + "grad_norm": 0.0, + "learning_rate": 7.271288395845302e-06, + "loss": 0.6744, + "step": 1946 + }, + { + "epoch": 0.6006478482184174, + "grad_norm": 0.0, + "learning_rate": 7.2616735625119085e-06, + "loss": 0.6828, + "step": 1947 + }, + { + "epoch": 0.6009563473700448, + "grad_norm": 0.0, + "learning_rate": 7.252061465052984e-06, + "loss": 0.7555, + "step": 1948 + }, + { + "epoch": 0.6012648465216721, + "grad_norm": 0.0, + "learning_rate": 7.242452113072022e-06, + "loss": 0.7715, + "step": 1949 + }, + { + "epoch": 0.6015733456732995, + "grad_norm": 0.0, + "learning_rate": 7.232845516169764e-06, + "loss": 0.7449, + "step": 1950 + }, + { + "epoch": 0.6018818448249267, + "grad_norm": 0.0, + "learning_rate": 7.223241683944204e-06, + "loss": 0.7889, + "step": 1951 + }, + { + "epoch": 0.602190343976554, + "grad_norm": 0.0, + "learning_rate": 7.213640625990582e-06, + "loss": 0.6666, + "step": 1952 + }, + { + "epoch": 0.6024988431281814, + "grad_norm": 0.0, + "learning_rate": 7.204042351901359e-06, + "loss": 1.0587, + "step": 1953 + }, + { + "epoch": 0.6028073422798087, + "grad_norm": 0.0, + "learning_rate": 7.194446871266206e-06, + "loss": 0.6755, + "step": 1954 + }, + { + "epoch": 0.603115841431436, + "grad_norm": 0.0, + "learning_rate": 7.184854193672017e-06, + "loss": 0.7586, + "step": 1955 + }, + { + "epoch": 0.6034243405830634, + "grad_norm": 0.0, + "learning_rate": 7.175264328702878e-06, + "loss": 0.7476, + "step": 1956 + }, + { + "epoch": 0.6037328397346907, + "grad_norm": 0.0, + "learning_rate": 7.165677285940071e-06, + "loss": 0.6872, + "step": 1957 + }, + { + "epoch": 0.6040413388863181, + "grad_norm": 0.0, + "learning_rate": 7.156093074962052e-06, + "loss": 0.7498, + "step": 1958 + }, + { + "epoch": 0.6043498380379454, + "grad_norm": 0.0, + "learning_rate": 7.1465117053444465e-06, + "loss": 0.7997, + "step": 1959 + }, + { + "epoch": 0.6046583371895727, + "grad_norm": 0.0, + "learning_rate": 7.136933186660049e-06, + "loss": 0.6762, + "step": 1960 + }, + { + "epoch": 0.6049668363412001, + "grad_norm": 0.0, + "learning_rate": 7.1273575284788e-06, + "loss": 0.8435, + "step": 1961 + }, + { + "epoch": 0.6052753354928274, + "grad_norm": 0.0, + "learning_rate": 7.117784740367788e-06, + "loss": 0.8126, + "step": 1962 + }, + { + "epoch": 0.6055838346444548, + "grad_norm": 0.0, + "learning_rate": 7.108214831891219e-06, + "loss": 0.6898, + "step": 1963 + }, + { + "epoch": 0.6058923337960821, + "grad_norm": 0.0, + "learning_rate": 7.09864781261044e-06, + "loss": 0.7271, + "step": 1964 + }, + { + "epoch": 0.6062008329477094, + "grad_norm": 0.0, + "learning_rate": 7.089083692083902e-06, + "loss": 0.7512, + "step": 1965 + }, + { + "epoch": 0.6065093320993368, + "grad_norm": 0.0, + "learning_rate": 7.0795224798671666e-06, + "loss": 0.8055, + "step": 1966 + }, + { + "epoch": 0.6068178312509641, + "grad_norm": 0.0, + "learning_rate": 7.069964185512874e-06, + "loss": 0.6799, + "step": 1967 + }, + { + "epoch": 0.6071263304025913, + "grad_norm": 0.0, + "learning_rate": 7.060408818570768e-06, + "loss": 0.7722, + "step": 1968 + }, + { + "epoch": 0.6074348295542187, + "grad_norm": 0.0, + "learning_rate": 7.050856388587655e-06, + "loss": 0.7811, + "step": 1969 + }, + { + "epoch": 0.607743328705846, + "grad_norm": 0.0, + "learning_rate": 7.0413069051074146e-06, + "loss": 0.6801, + "step": 1970 + }, + { + "epoch": 0.6080518278574734, + "grad_norm": 0.0, + "learning_rate": 7.031760377670978e-06, + "loss": 0.7812, + "step": 1971 + }, + { + "epoch": 0.6083603270091007, + "grad_norm": 0.0, + "learning_rate": 7.022216815816323e-06, + "loss": 0.7256, + "step": 1972 + }, + { + "epoch": 0.608668826160728, + "grad_norm": 0.0, + "learning_rate": 7.012676229078469e-06, + "loss": 0.7172, + "step": 1973 + }, + { + "epoch": 0.6089773253123554, + "grad_norm": 0.0, + "learning_rate": 7.003138626989457e-06, + "loss": 1.0376, + "step": 1974 + }, + { + "epoch": 0.6092858244639827, + "grad_norm": 0.0, + "learning_rate": 6.993604019078354e-06, + "loss": 0.6692, + "step": 1975 + }, + { + "epoch": 0.6095943236156101, + "grad_norm": 0.0, + "learning_rate": 6.984072414871223e-06, + "loss": 0.7331, + "step": 1976 + }, + { + "epoch": 0.6099028227672374, + "grad_norm": 0.0, + "learning_rate": 6.974543823891138e-06, + "loss": 0.7152, + "step": 1977 + }, + { + "epoch": 0.6102113219188647, + "grad_norm": 0.0, + "learning_rate": 6.965018255658156e-06, + "loss": 0.7579, + "step": 1978 + }, + { + "epoch": 0.6105198210704921, + "grad_norm": 0.0, + "learning_rate": 6.955495719689321e-06, + "loss": 0.6, + "step": 1979 + }, + { + "epoch": 0.6108283202221194, + "grad_norm": 0.0, + "learning_rate": 6.9459762254986344e-06, + "loss": 0.8325, + "step": 1980 + }, + { + "epoch": 0.6111368193737468, + "grad_norm": 0.0, + "learning_rate": 6.936459782597069e-06, + "loss": 0.7452, + "step": 1981 + }, + { + "epoch": 0.6114453185253741, + "grad_norm": 0.0, + "learning_rate": 6.9269464004925466e-06, + "loss": 0.6906, + "step": 1982 + }, + { + "epoch": 0.6117538176770014, + "grad_norm": 0.0, + "learning_rate": 6.917436088689935e-06, + "loss": 0.7581, + "step": 1983 + }, + { + "epoch": 0.6120623168286288, + "grad_norm": 0.0, + "learning_rate": 6.907928856691024e-06, + "loss": 0.6741, + "step": 1984 + }, + { + "epoch": 0.612370815980256, + "grad_norm": 0.0, + "learning_rate": 6.898424713994536e-06, + "loss": 0.6596, + "step": 1985 + }, + { + "epoch": 0.6126793151318833, + "grad_norm": 0.0, + "learning_rate": 6.888923670096102e-06, + "loss": 0.748, + "step": 1986 + }, + { + "epoch": 0.6129878142835107, + "grad_norm": 0.0, + "learning_rate": 6.879425734488261e-06, + "loss": 0.7023, + "step": 1987 + }, + { + "epoch": 0.613296313435138, + "grad_norm": 0.0, + "learning_rate": 6.86993091666044e-06, + "loss": 0.7277, + "step": 1988 + }, + { + "epoch": 0.6136048125867654, + "grad_norm": 0.0, + "learning_rate": 6.860439226098956e-06, + "loss": 0.8299, + "step": 1989 + }, + { + "epoch": 0.6139133117383927, + "grad_norm": 0.0, + "learning_rate": 6.850950672287003e-06, + "loss": 0.8051, + "step": 1990 + }, + { + "epoch": 0.61422181089002, + "grad_norm": 0.0, + "learning_rate": 6.841465264704636e-06, + "loss": 0.8194, + "step": 1991 + }, + { + "epoch": 0.6145303100416474, + "grad_norm": 0.0, + "learning_rate": 6.831983012828775e-06, + "loss": 0.7889, + "step": 1992 + }, + { + "epoch": 0.6148388091932747, + "grad_norm": 0.0, + "learning_rate": 6.82250392613317e-06, + "loss": 0.7253, + "step": 1993 + }, + { + "epoch": 0.615147308344902, + "grad_norm": 0.0, + "learning_rate": 6.8130280140884286e-06, + "loss": 0.7615, + "step": 1994 + }, + { + "epoch": 0.6154558074965294, + "grad_norm": 0.0, + "learning_rate": 6.803555286161973e-06, + "loss": 0.6758, + "step": 1995 + }, + { + "epoch": 0.6157643066481567, + "grad_norm": 0.0, + "learning_rate": 6.7940857518180555e-06, + "loss": 0.8349, + "step": 1996 + }, + { + "epoch": 0.6160728057997841, + "grad_norm": 0.0, + "learning_rate": 6.784619420517724e-06, + "loss": 0.7385, + "step": 1997 + }, + { + "epoch": 0.6163813049514114, + "grad_norm": 0.0, + "learning_rate": 6.775156301718837e-06, + "loss": 0.6718, + "step": 1998 + }, + { + "epoch": 0.6166898041030388, + "grad_norm": 0.0, + "learning_rate": 6.765696404876039e-06, + "loss": 0.7217, + "step": 1999 + }, + { + "epoch": 0.6169983032546661, + "grad_norm": 0.0, + "learning_rate": 6.756239739440758e-06, + "loss": 0.8079, + "step": 2000 + }, + { + "epoch": 0.6173068024062934, + "grad_norm": 0.0, + "learning_rate": 6.746786314861189e-06, + "loss": 0.703, + "step": 2001 + }, + { + "epoch": 0.6176153015579208, + "grad_norm": 0.0, + "learning_rate": 6.737336140582291e-06, + "loss": 0.7275, + "step": 2002 + }, + { + "epoch": 0.617923800709548, + "grad_norm": 0.0, + "learning_rate": 6.72788922604578e-06, + "loss": 0.7693, + "step": 2003 + }, + { + "epoch": 0.6182322998611753, + "grad_norm": 0.0, + "learning_rate": 6.718445580690113e-06, + "loss": 0.7452, + "step": 2004 + }, + { + "epoch": 0.6185407990128027, + "grad_norm": 0.0, + "learning_rate": 6.709005213950472e-06, + "loss": 0.7144, + "step": 2005 + }, + { + "epoch": 0.61884929816443, + "grad_norm": 0.0, + "learning_rate": 6.699568135258774e-06, + "loss": 0.6836, + "step": 2006 + }, + { + "epoch": 0.6191577973160574, + "grad_norm": 0.0, + "learning_rate": 6.690134354043649e-06, + "loss": 0.7265, + "step": 2007 + }, + { + "epoch": 0.6194662964676847, + "grad_norm": 0.0, + "learning_rate": 6.68070387973043e-06, + "loss": 0.6829, + "step": 2008 + }, + { + "epoch": 0.619774795619312, + "grad_norm": 0.0, + "learning_rate": 6.671276721741149e-06, + "loss": 0.7093, + "step": 2009 + }, + { + "epoch": 0.6200832947709394, + "grad_norm": 0.0, + "learning_rate": 6.6618528894945175e-06, + "loss": 0.7282, + "step": 2010 + }, + { + "epoch": 0.6203917939225667, + "grad_norm": 0.0, + "learning_rate": 6.652432392405934e-06, + "loss": 0.7671, + "step": 2011 + }, + { + "epoch": 0.620700293074194, + "grad_norm": 0.0, + "learning_rate": 6.643015239887458e-06, + "loss": 0.7393, + "step": 2012 + }, + { + "epoch": 0.6210087922258214, + "grad_norm": 0.0, + "learning_rate": 6.633601441347812e-06, + "loss": 0.6363, + "step": 2013 + }, + { + "epoch": 0.6213172913774487, + "grad_norm": 0.0, + "learning_rate": 6.624191006192363e-06, + "loss": 0.7655, + "step": 2014 + }, + { + "epoch": 0.6216257905290761, + "grad_norm": 0.0, + "learning_rate": 6.61478394382312e-06, + "loss": 0.6773, + "step": 2015 + }, + { + "epoch": 0.6219342896807034, + "grad_norm": 0.0, + "learning_rate": 6.605380263638722e-06, + "loss": 0.6704, + "step": 2016 + }, + { + "epoch": 0.6222427888323308, + "grad_norm": 0.0, + "learning_rate": 6.595979975034434e-06, + "loss": 0.7343, + "step": 2017 + }, + { + "epoch": 0.6225512879839581, + "grad_norm": 0.0, + "learning_rate": 6.586583087402119e-06, + "loss": 0.7653, + "step": 2018 + }, + { + "epoch": 0.6228597871355854, + "grad_norm": 0.0, + "learning_rate": 6.577189610130254e-06, + "loss": 0.7775, + "step": 2019 + }, + { + "epoch": 0.6231682862872127, + "grad_norm": 0.0, + "learning_rate": 6.567799552603904e-06, + "loss": 0.7751, + "step": 2020 + }, + { + "epoch": 0.62347678543884, + "grad_norm": 0.0, + "learning_rate": 6.558412924204722e-06, + "loss": 0.676, + "step": 2021 + }, + { + "epoch": 0.6237852845904673, + "grad_norm": 0.0, + "learning_rate": 6.549029734310928e-06, + "loss": 0.6836, + "step": 2022 + }, + { + "epoch": 0.6240937837420947, + "grad_norm": 0.0, + "learning_rate": 6.539649992297311e-06, + "loss": 0.7058, + "step": 2023 + }, + { + "epoch": 0.624402282893722, + "grad_norm": 0.0, + "learning_rate": 6.530273707535214e-06, + "loss": 0.724, + "step": 2024 + }, + { + "epoch": 0.6247107820453494, + "grad_norm": 0.0, + "learning_rate": 6.520900889392525e-06, + "loss": 0.7247, + "step": 2025 + }, + { + "epoch": 0.6250192811969767, + "grad_norm": 0.0, + "learning_rate": 6.511531547233674e-06, + "loss": 0.761, + "step": 2026 + }, + { + "epoch": 0.625327780348604, + "grad_norm": 0.0, + "learning_rate": 6.502165690419608e-06, + "loss": 0.801, + "step": 2027 + }, + { + "epoch": 0.6256362795002314, + "grad_norm": 0.0, + "learning_rate": 6.492803328307799e-06, + "loss": 0.726, + "step": 2028 + }, + { + "epoch": 0.6259447786518587, + "grad_norm": 0.0, + "learning_rate": 6.483444470252227e-06, + "loss": 0.7648, + "step": 2029 + }, + { + "epoch": 0.626253277803486, + "grad_norm": 0.0, + "learning_rate": 6.4740891256033736e-06, + "loss": 0.7044, + "step": 2030 + }, + { + "epoch": 0.6265617769551134, + "grad_norm": 0.0, + "learning_rate": 6.464737303708197e-06, + "loss": 0.7878, + "step": 2031 + }, + { + "epoch": 0.6268702761067407, + "grad_norm": 0.0, + "learning_rate": 6.455389013910151e-06, + "loss": 0.6916, + "step": 2032 + }, + { + "epoch": 0.6271787752583681, + "grad_norm": 0.0, + "learning_rate": 6.4460442655491515e-06, + "loss": 0.6691, + "step": 2033 + }, + { + "epoch": 0.6274872744099954, + "grad_norm": 0.0, + "learning_rate": 6.436703067961589e-06, + "loss": 0.6556, + "step": 2034 + }, + { + "epoch": 0.6277957735616227, + "grad_norm": 0.0, + "learning_rate": 6.4273654304802844e-06, + "loss": 0.7585, + "step": 2035 + }, + { + "epoch": 0.6281042727132501, + "grad_norm": 0.0, + "learning_rate": 6.4180313624345205e-06, + "loss": 0.7429, + "step": 2036 + }, + { + "epoch": 0.6284127718648773, + "grad_norm": 0.0, + "learning_rate": 6.408700873150005e-06, + "loss": 0.6842, + "step": 2037 + }, + { + "epoch": 0.6287212710165047, + "grad_norm": 0.0, + "learning_rate": 6.399373971948877e-06, + "loss": 0.7534, + "step": 2038 + }, + { + "epoch": 0.629029770168132, + "grad_norm": 0.0, + "learning_rate": 6.3900506681496786e-06, + "loss": 0.7038, + "step": 2039 + }, + { + "epoch": 0.6293382693197593, + "grad_norm": 0.0, + "learning_rate": 6.38073097106737e-06, + "loss": 0.6684, + "step": 2040 + }, + { + "epoch": 0.6296467684713867, + "grad_norm": 0.0, + "learning_rate": 6.371414890013304e-06, + "loss": 0.6384, + "step": 2041 + }, + { + "epoch": 0.629955267623014, + "grad_norm": 0.0, + "learning_rate": 6.362102434295216e-06, + "loss": 0.816, + "step": 2042 + }, + { + "epoch": 0.6302637667746414, + "grad_norm": 0.0, + "learning_rate": 6.352793613217232e-06, + "loss": 0.6687, + "step": 2043 + }, + { + "epoch": 0.6305722659262687, + "grad_norm": 0.0, + "learning_rate": 6.3434884360798255e-06, + "loss": 0.7027, + "step": 2044 + }, + { + "epoch": 0.630880765077896, + "grad_norm": 0.0, + "learning_rate": 6.334186912179845e-06, + "loss": 0.7615, + "step": 2045 + }, + { + "epoch": 0.6311892642295234, + "grad_norm": 0.0, + "learning_rate": 6.3248890508104895e-06, + "loss": 0.6306, + "step": 2046 + }, + { + "epoch": 0.6314977633811507, + "grad_norm": 0.0, + "learning_rate": 6.315594861261299e-06, + "loss": 0.8275, + "step": 2047 + }, + { + "epoch": 0.631806262532778, + "grad_norm": 0.0, + "learning_rate": 6.3063043528181286e-06, + "loss": 0.7133, + "step": 2048 + }, + { + "epoch": 0.6321147616844054, + "grad_norm": 0.0, + "learning_rate": 6.297017534763175e-06, + "loss": 0.7637, + "step": 2049 + }, + { + "epoch": 0.6324232608360327, + "grad_norm": 0.0, + "learning_rate": 6.28773441637494e-06, + "loss": 1.0219, + "step": 2050 + }, + { + "epoch": 0.6327317599876601, + "grad_norm": 0.0, + "learning_rate": 6.278455006928233e-06, + "loss": 0.6952, + "step": 2051 + }, + { + "epoch": 0.6330402591392874, + "grad_norm": 0.0, + "learning_rate": 6.269179315694145e-06, + "loss": 0.6483, + "step": 2052 + }, + { + "epoch": 0.6333487582909147, + "grad_norm": 0.0, + "learning_rate": 6.259907351940069e-06, + "loss": 0.614, + "step": 2053 + }, + { + "epoch": 0.6336572574425421, + "grad_norm": 0.0, + "learning_rate": 6.250639124929665e-06, + "loss": 0.7949, + "step": 2054 + }, + { + "epoch": 0.6339657565941693, + "grad_norm": 0.0, + "learning_rate": 6.241374643922864e-06, + "loss": 0.6852, + "step": 2055 + }, + { + "epoch": 0.6342742557457967, + "grad_norm": 0.0, + "learning_rate": 6.232113918175845e-06, + "loss": 0.7307, + "step": 2056 + }, + { + "epoch": 0.634582754897424, + "grad_norm": 0.0, + "learning_rate": 6.222856956941041e-06, + "loss": 0.7534, + "step": 2057 + }, + { + "epoch": 0.6348912540490513, + "grad_norm": 0.0, + "learning_rate": 6.213603769467132e-06, + "loss": 0.6979, + "step": 2058 + }, + { + "epoch": 0.6351997532006787, + "grad_norm": 0.0, + "learning_rate": 6.204354364999014e-06, + "loss": 0.7602, + "step": 2059 + }, + { + "epoch": 0.635508252352306, + "grad_norm": 0.0, + "learning_rate": 6.195108752777814e-06, + "loss": 0.7576, + "step": 2060 + }, + { + "epoch": 0.6358167515039334, + "grad_norm": 0.0, + "learning_rate": 6.185866942040861e-06, + "loss": 0.7792, + "step": 2061 + }, + { + "epoch": 0.6361252506555607, + "grad_norm": 0.0, + "learning_rate": 6.17662894202169e-06, + "loss": 0.7114, + "step": 2062 + }, + { + "epoch": 0.636433749807188, + "grad_norm": 0.0, + "learning_rate": 6.167394761950032e-06, + "loss": 0.8525, + "step": 2063 + }, + { + "epoch": 0.6367422489588154, + "grad_norm": 0.0, + "learning_rate": 6.158164411051799e-06, + "loss": 0.6502, + "step": 2064 + }, + { + "epoch": 0.6370507481104427, + "grad_norm": 0.0, + "learning_rate": 6.148937898549072e-06, + "loss": 0.7319, + "step": 2065 + }, + { + "epoch": 0.63735924726207, + "grad_norm": 0.0, + "learning_rate": 6.139715233660106e-06, + "loss": 0.7326, + "step": 2066 + }, + { + "epoch": 0.6376677464136974, + "grad_norm": 0.0, + "learning_rate": 6.130496425599308e-06, + "loss": 0.7331, + "step": 2067 + }, + { + "epoch": 0.6379762455653247, + "grad_norm": 0.0, + "learning_rate": 6.121281483577233e-06, + "loss": 0.7268, + "step": 2068 + }, + { + "epoch": 0.6382847447169521, + "grad_norm": 0.0, + "learning_rate": 6.112070416800562e-06, + "loss": 0.7058, + "step": 2069 + }, + { + "epoch": 0.6385932438685794, + "grad_norm": 0.0, + "learning_rate": 6.102863234472124e-06, + "loss": 0.6867, + "step": 2070 + }, + { + "epoch": 0.6389017430202067, + "grad_norm": 0.0, + "learning_rate": 6.093659945790853e-06, + "loss": 0.7267, + "step": 2071 + }, + { + "epoch": 0.639210242171834, + "grad_norm": 0.0, + "learning_rate": 6.084460559951802e-06, + "loss": 0.6444, + "step": 2072 + }, + { + "epoch": 0.6395187413234613, + "grad_norm": 0.0, + "learning_rate": 6.075265086146111e-06, + "loss": 1.0321, + "step": 2073 + }, + { + "epoch": 0.6398272404750887, + "grad_norm": 0.0, + "learning_rate": 6.066073533561024e-06, + "loss": 0.6793, + "step": 2074 + }, + { + "epoch": 0.640135739626716, + "grad_norm": 0.0, + "learning_rate": 6.056885911379863e-06, + "loss": 0.6712, + "step": 2075 + }, + { + "epoch": 0.6404442387783433, + "grad_norm": 0.0, + "learning_rate": 6.047702228782023e-06, + "loss": 0.7343, + "step": 2076 + }, + { + "epoch": 0.6407527379299707, + "grad_norm": 0.0, + "learning_rate": 6.0385224949429666e-06, + "loss": 0.6725, + "step": 2077 + }, + { + "epoch": 0.641061237081598, + "grad_norm": 0.0, + "learning_rate": 6.029346719034203e-06, + "loss": 0.6364, + "step": 2078 + }, + { + "epoch": 0.6413697362332254, + "grad_norm": 0.0, + "learning_rate": 6.020174910223293e-06, + "loss": 0.7213, + "step": 2079 + }, + { + "epoch": 0.6416782353848527, + "grad_norm": 0.0, + "learning_rate": 6.011007077673835e-06, + "loss": 0.6837, + "step": 2080 + }, + { + "epoch": 0.64198673453648, + "grad_norm": 0.0, + "learning_rate": 6.001843230545452e-06, + "loss": 0.669, + "step": 2081 + }, + { + "epoch": 0.6422952336881074, + "grad_norm": 0.0, + "learning_rate": 5.992683377993784e-06, + "loss": 0.6777, + "step": 2082 + }, + { + "epoch": 0.6426037328397347, + "grad_norm": 0.0, + "learning_rate": 5.983527529170481e-06, + "loss": 0.7283, + "step": 2083 + }, + { + "epoch": 0.642912231991362, + "grad_norm": 0.0, + "learning_rate": 5.9743756932231955e-06, + "loss": 0.7848, + "step": 2084 + }, + { + "epoch": 0.6432207311429894, + "grad_norm": 0.0, + "learning_rate": 5.965227879295572e-06, + "loss": 0.7034, + "step": 2085 + }, + { + "epoch": 0.6435292302946167, + "grad_norm": 0.0, + "learning_rate": 5.956084096527224e-06, + "loss": 0.7339, + "step": 2086 + }, + { + "epoch": 0.6438377294462441, + "grad_norm": 0.0, + "learning_rate": 5.946944354053753e-06, + "loss": 0.6728, + "step": 2087 + }, + { + "epoch": 0.6441462285978714, + "grad_norm": 0.0, + "learning_rate": 5.937808661006715e-06, + "loss": 0.7308, + "step": 2088 + }, + { + "epoch": 0.6444547277494986, + "grad_norm": 0.0, + "learning_rate": 5.928677026513627e-06, + "loss": 0.6657, + "step": 2089 + }, + { + "epoch": 0.644763226901126, + "grad_norm": 0.0, + "learning_rate": 5.919549459697942e-06, + "loss": 0.7471, + "step": 2090 + }, + { + "epoch": 0.6450717260527533, + "grad_norm": 0.0, + "learning_rate": 5.910425969679056e-06, + "loss": 0.7049, + "step": 2091 + }, + { + "epoch": 0.6453802252043807, + "grad_norm": 0.0, + "learning_rate": 5.901306565572288e-06, + "loss": 0.6928, + "step": 2092 + }, + { + "epoch": 0.645688724356008, + "grad_norm": 0.0, + "learning_rate": 5.8921912564888775e-06, + "loss": 0.758, + "step": 2093 + }, + { + "epoch": 0.6459972235076353, + "grad_norm": 0.0, + "learning_rate": 5.883080051535974e-06, + "loss": 0.6874, + "step": 2094 + }, + { + "epoch": 0.6463057226592627, + "grad_norm": 0.0, + "learning_rate": 5.873972959816619e-06, + "loss": 0.7153, + "step": 2095 + }, + { + "epoch": 0.64661422181089, + "grad_norm": 0.0, + "learning_rate": 5.864869990429753e-06, + "loss": 0.6867, + "step": 2096 + }, + { + "epoch": 0.6469227209625174, + "grad_norm": 0.0, + "learning_rate": 5.855771152470193e-06, + "loss": 0.6989, + "step": 2097 + }, + { + "epoch": 0.6472312201141447, + "grad_norm": 0.0, + "learning_rate": 5.846676455028635e-06, + "loss": 0.7185, + "step": 2098 + }, + { + "epoch": 0.647539719265772, + "grad_norm": 0.0, + "learning_rate": 5.83758590719162e-06, + "loss": 0.7486, + "step": 2099 + }, + { + "epoch": 0.6478482184173994, + "grad_norm": 0.0, + "learning_rate": 5.8284995180415685e-06, + "loss": 0.6952, + "step": 2100 + }, + { + "epoch": 0.6481567175690267, + "grad_norm": 0.0, + "learning_rate": 5.819417296656724e-06, + "loss": 0.7227, + "step": 2101 + }, + { + "epoch": 0.648465216720654, + "grad_norm": 0.0, + "learning_rate": 5.81033925211119e-06, + "loss": 0.7285, + "step": 2102 + }, + { + "epoch": 0.6487737158722814, + "grad_norm": 0.0, + "learning_rate": 5.8012653934748644e-06, + "loss": 0.7128, + "step": 2103 + }, + { + "epoch": 0.6490822150239087, + "grad_norm": 0.0, + "learning_rate": 5.7921957298134865e-06, + "loss": 0.6555, + "step": 2104 + }, + { + "epoch": 0.6493907141755361, + "grad_norm": 0.0, + "learning_rate": 5.783130270188607e-06, + "loss": 0.7154, + "step": 2105 + }, + { + "epoch": 0.6496992133271634, + "grad_norm": 0.0, + "learning_rate": 5.774069023657558e-06, + "loss": 0.7147, + "step": 2106 + }, + { + "epoch": 0.6500077124787906, + "grad_norm": 0.0, + "learning_rate": 5.765011999273484e-06, + "loss": 0.7412, + "step": 2107 + }, + { + "epoch": 0.650316211630418, + "grad_norm": 0.0, + "learning_rate": 5.755959206085285e-06, + "loss": 0.7176, + "step": 2108 + }, + { + "epoch": 0.6506247107820453, + "grad_norm": 0.0, + "learning_rate": 5.746910653137659e-06, + "loss": 0.6684, + "step": 2109 + }, + { + "epoch": 0.6509332099336727, + "grad_norm": 0.0, + "learning_rate": 5.73786634947105e-06, + "loss": 0.717, + "step": 2110 + }, + { + "epoch": 0.6512417090853, + "grad_norm": 0.0, + "learning_rate": 5.7288263041216685e-06, + "loss": 0.7727, + "step": 2111 + }, + { + "epoch": 0.6515502082369273, + "grad_norm": 0.0, + "learning_rate": 5.719790526121462e-06, + "loss": 0.7078, + "step": 2112 + }, + { + "epoch": 0.6518587073885547, + "grad_norm": 0.0, + "learning_rate": 5.7107590244981156e-06, + "loss": 0.7385, + "step": 2113 + }, + { + "epoch": 0.652167206540182, + "grad_norm": 0.0, + "learning_rate": 5.70173180827505e-06, + "loss": 0.7426, + "step": 2114 + }, + { + "epoch": 0.6524757056918093, + "grad_norm": 0.0, + "learning_rate": 5.692708886471395e-06, + "loss": 0.6916, + "step": 2115 + }, + { + "epoch": 0.6527842048434367, + "grad_norm": 0.0, + "learning_rate": 5.683690268101989e-06, + "loss": 0.6724, + "step": 2116 + }, + { + "epoch": 0.653092703995064, + "grad_norm": 0.0, + "learning_rate": 5.674675962177383e-06, + "loss": 0.6995, + "step": 2117 + }, + { + "epoch": 0.6534012031466914, + "grad_norm": 0.0, + "learning_rate": 5.665665977703803e-06, + "loss": 0.6714, + "step": 2118 + }, + { + "epoch": 0.6537097022983187, + "grad_norm": 0.0, + "learning_rate": 5.656660323683177e-06, + "loss": 0.6629, + "step": 2119 + }, + { + "epoch": 0.654018201449946, + "grad_norm": 0.0, + "learning_rate": 5.647659009113079e-06, + "loss": 0.6691, + "step": 2120 + }, + { + "epoch": 0.6543267006015734, + "grad_norm": 0.0, + "learning_rate": 5.638662042986777e-06, + "loss": 0.6801, + "step": 2121 + }, + { + "epoch": 0.6546351997532007, + "grad_norm": 0.0, + "learning_rate": 5.629669434293172e-06, + "loss": 0.746, + "step": 2122 + }, + { + "epoch": 0.6549436989048281, + "grad_norm": 0.0, + "learning_rate": 5.62068119201683e-06, + "loss": 0.7055, + "step": 2123 + }, + { + "epoch": 0.6552521980564553, + "grad_norm": 0.0, + "learning_rate": 5.611697325137939e-06, + "loss": 0.7346, + "step": 2124 + }, + { + "epoch": 0.6555606972080826, + "grad_norm": 0.0, + "learning_rate": 5.602717842632319e-06, + "loss": 0.6536, + "step": 2125 + }, + { + "epoch": 0.65586919635971, + "grad_norm": 0.0, + "learning_rate": 5.5937427534714195e-06, + "loss": 0.6763, + "step": 2126 + }, + { + "epoch": 0.6561776955113373, + "grad_norm": 0.0, + "learning_rate": 5.584772066622284e-06, + "loss": 0.7353, + "step": 2127 + }, + { + "epoch": 0.6564861946629647, + "grad_norm": 0.0, + "learning_rate": 5.575805791047577e-06, + "loss": 0.7712, + "step": 2128 + }, + { + "epoch": 0.656794693814592, + "grad_norm": 0.0, + "learning_rate": 5.566843935705539e-06, + "loss": 0.754, + "step": 2129 + }, + { + "epoch": 0.6571031929662193, + "grad_norm": 0.0, + "learning_rate": 5.557886509549998e-06, + "loss": 0.7469, + "step": 2130 + }, + { + "epoch": 0.6574116921178467, + "grad_norm": 0.0, + "learning_rate": 5.5489335215303674e-06, + "loss": 0.6798, + "step": 2131 + }, + { + "epoch": 0.657720191269474, + "grad_norm": 0.0, + "learning_rate": 5.539984980591615e-06, + "loss": 0.6893, + "step": 2132 + }, + { + "epoch": 0.6580286904211013, + "grad_norm": 0.0, + "learning_rate": 5.531040895674267e-06, + "loss": 0.7365, + "step": 2133 + }, + { + "epoch": 0.6583371895727287, + "grad_norm": 0.0, + "learning_rate": 5.5221012757143974e-06, + "loss": 0.6681, + "step": 2134 + }, + { + "epoch": 0.658645688724356, + "grad_norm": 0.0, + "learning_rate": 5.51316612964363e-06, + "loss": 0.7298, + "step": 2135 + }, + { + "epoch": 0.6589541878759834, + "grad_norm": 0.0, + "learning_rate": 5.504235466389103e-06, + "loss": 0.6818, + "step": 2136 + }, + { + "epoch": 0.6592626870276107, + "grad_norm": 0.0, + "learning_rate": 5.495309294873483e-06, + "loss": 0.7411, + "step": 2137 + }, + { + "epoch": 0.659571186179238, + "grad_norm": 0.0, + "learning_rate": 5.486387624014952e-06, + "loss": 0.734, + "step": 2138 + }, + { + "epoch": 0.6598796853308654, + "grad_norm": 0.0, + "learning_rate": 5.47747046272719e-06, + "loss": 0.7049, + "step": 2139 + }, + { + "epoch": 0.6601881844824927, + "grad_norm": 0.0, + "learning_rate": 5.468557819919378e-06, + "loss": 0.7069, + "step": 2140 + }, + { + "epoch": 0.66049668363412, + "grad_norm": 0.0, + "learning_rate": 5.4596497044961725e-06, + "loss": 0.7255, + "step": 2141 + }, + { + "epoch": 0.6608051827857473, + "grad_norm": 0.0, + "learning_rate": 5.450746125357712e-06, + "loss": 0.7469, + "step": 2142 + }, + { + "epoch": 0.6611136819373746, + "grad_norm": 0.0, + "learning_rate": 5.44184709139961e-06, + "loss": 0.7603, + "step": 2143 + }, + { + "epoch": 0.661422181089002, + "grad_norm": 0.0, + "learning_rate": 5.432952611512923e-06, + "loss": 0.8311, + "step": 2144 + }, + { + "epoch": 0.6617306802406293, + "grad_norm": 0.0, + "learning_rate": 5.424062694584179e-06, + "loss": 0.6956, + "step": 2145 + }, + { + "epoch": 0.6620391793922566, + "grad_norm": 0.0, + "learning_rate": 5.41517734949532e-06, + "loss": 0.7034, + "step": 2146 + }, + { + "epoch": 0.662347678543884, + "grad_norm": 0.0, + "learning_rate": 5.406296585123745e-06, + "loss": 0.7404, + "step": 2147 + }, + { + "epoch": 0.6626561776955113, + "grad_norm": 0.0, + "learning_rate": 5.397420410342259e-06, + "loss": 0.7452, + "step": 2148 + }, + { + "epoch": 0.6629646768471387, + "grad_norm": 0.0, + "learning_rate": 5.388548834019097e-06, + "loss": 0.629, + "step": 2149 + }, + { + "epoch": 0.663273175998766, + "grad_norm": 0.0, + "learning_rate": 5.379681865017887e-06, + "loss": 0.7724, + "step": 2150 + }, + { + "epoch": 0.6635816751503933, + "grad_norm": 0.0, + "learning_rate": 5.370819512197656e-06, + "loss": 0.7365, + "step": 2151 + }, + { + "epoch": 0.6638901743020207, + "grad_norm": 0.0, + "learning_rate": 5.361961784412828e-06, + "loss": 0.6681, + "step": 2152 + }, + { + "epoch": 0.664198673453648, + "grad_norm": 0.0, + "learning_rate": 5.353108690513193e-06, + "loss": 0.7178, + "step": 2153 + }, + { + "epoch": 0.6645071726052754, + "grad_norm": 0.0, + "learning_rate": 5.344260239343919e-06, + "loss": 0.7258, + "step": 2154 + }, + { + "epoch": 0.6648156717569027, + "grad_norm": 0.0, + "learning_rate": 5.335416439745538e-06, + "loss": 0.7827, + "step": 2155 + }, + { + "epoch": 0.66512417090853, + "grad_norm": 0.0, + "learning_rate": 5.326577300553923e-06, + "loss": 0.757, + "step": 2156 + }, + { + "epoch": 0.6654326700601574, + "grad_norm": 0.0, + "learning_rate": 5.317742830600306e-06, + "loss": 0.824, + "step": 2157 + }, + { + "epoch": 0.6657411692117847, + "grad_norm": 0.0, + "learning_rate": 5.308913038711245e-06, + "loss": 0.7818, + "step": 2158 + }, + { + "epoch": 0.666049668363412, + "grad_norm": 0.0, + "learning_rate": 5.300087933708624e-06, + "loss": 0.7217, + "step": 2159 + }, + { + "epoch": 0.6663581675150393, + "grad_norm": 0.0, + "learning_rate": 5.29126752440964e-06, + "loss": 0.7242, + "step": 2160 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.0, + "learning_rate": 5.282451819626815e-06, + "loss": 0.7298, + "step": 2161 + }, + { + "epoch": 0.666975165818294, + "grad_norm": 0.0, + "learning_rate": 5.273640828167954e-06, + "loss": 0.7062, + "step": 2162 + }, + { + "epoch": 0.6672836649699213, + "grad_norm": 0.0, + "learning_rate": 5.264834558836156e-06, + "loss": 0.7027, + "step": 2163 + }, + { + "epoch": 0.6675921641215486, + "grad_norm": 0.0, + "learning_rate": 5.256033020429813e-06, + "loss": 0.7512, + "step": 2164 + }, + { + "epoch": 0.667900663273176, + "grad_norm": 0.0, + "learning_rate": 5.247236221742575e-06, + "loss": 0.7286, + "step": 2165 + }, + { + "epoch": 0.6682091624248033, + "grad_norm": 0.0, + "learning_rate": 5.238444171563368e-06, + "loss": 0.7256, + "step": 2166 + }, + { + "epoch": 0.6685176615764307, + "grad_norm": 0.0, + "learning_rate": 5.22965687867637e-06, + "loss": 0.744, + "step": 2167 + }, + { + "epoch": 0.668826160728058, + "grad_norm": 0.0, + "learning_rate": 5.220874351861001e-06, + "loss": 0.7358, + "step": 2168 + }, + { + "epoch": 0.6691346598796853, + "grad_norm": 0.0, + "learning_rate": 5.212096599891927e-06, + "loss": 0.7285, + "step": 2169 + }, + { + "epoch": 0.6694431590313127, + "grad_norm": 0.0, + "learning_rate": 5.203323631539042e-06, + "loss": 0.6504, + "step": 2170 + }, + { + "epoch": 0.66975165818294, + "grad_norm": 0.0, + "learning_rate": 5.194555455567456e-06, + "loss": 0.8199, + "step": 2171 + }, + { + "epoch": 0.6700601573345674, + "grad_norm": 0.0, + "learning_rate": 5.185792080737491e-06, + "loss": 0.7196, + "step": 2172 + }, + { + "epoch": 0.6703686564861947, + "grad_norm": 0.0, + "learning_rate": 5.177033515804682e-06, + "loss": 0.751, + "step": 2173 + }, + { + "epoch": 0.670677155637822, + "grad_norm": 0.0, + "learning_rate": 5.168279769519742e-06, + "loss": 0.771, + "step": 2174 + }, + { + "epoch": 0.6709856547894494, + "grad_norm": 0.0, + "learning_rate": 5.159530850628589e-06, + "loss": 0.7746, + "step": 2175 + }, + { + "epoch": 0.6712941539410766, + "grad_norm": 0.0, + "learning_rate": 5.150786767872302e-06, + "loss": 0.7745, + "step": 2176 + }, + { + "epoch": 0.671602653092704, + "grad_norm": 0.0, + "learning_rate": 5.142047529987133e-06, + "loss": 0.6888, + "step": 2177 + }, + { + "epoch": 0.6719111522443313, + "grad_norm": 0.0, + "learning_rate": 5.1333131457044995e-06, + "loss": 0.7485, + "step": 2178 + }, + { + "epoch": 0.6722196513959586, + "grad_norm": 0.0, + "learning_rate": 5.124583623750963e-06, + "loss": 0.7505, + "step": 2179 + }, + { + "epoch": 0.672528150547586, + "grad_norm": 0.0, + "learning_rate": 5.115858972848224e-06, + "loss": 0.6436, + "step": 2180 + }, + { + "epoch": 0.6728366496992133, + "grad_norm": 0.0, + "learning_rate": 5.107139201713128e-06, + "loss": 0.7271, + "step": 2181 + }, + { + "epoch": 0.6731451488508406, + "grad_norm": 0.0, + "learning_rate": 5.098424319057632e-06, + "loss": 0.7306, + "step": 2182 + }, + { + "epoch": 0.673453648002468, + "grad_norm": 0.0, + "learning_rate": 5.089714333588827e-06, + "loss": 1.0136, + "step": 2183 + }, + { + "epoch": 0.6737621471540953, + "grad_norm": 0.0, + "learning_rate": 5.081009254008882e-06, + "loss": 0.7328, + "step": 2184 + }, + { + "epoch": 0.6740706463057227, + "grad_norm": 0.0, + "learning_rate": 5.072309089015092e-06, + "loss": 0.7289, + "step": 2185 + }, + { + "epoch": 0.67437914545735, + "grad_norm": 0.0, + "learning_rate": 5.063613847299831e-06, + "loss": 0.6871, + "step": 2186 + }, + { + "epoch": 0.6746876446089773, + "grad_norm": 0.0, + "learning_rate": 5.054923537550554e-06, + "loss": 0.6143, + "step": 2187 + }, + { + "epoch": 0.6749961437606047, + "grad_norm": 0.0, + "learning_rate": 5.046238168449791e-06, + "loss": 0.6871, + "step": 2188 + }, + { + "epoch": 0.675304642912232, + "grad_norm": 0.0, + "learning_rate": 5.037557748675128e-06, + "loss": 0.6316, + "step": 2189 + }, + { + "epoch": 0.6756131420638594, + "grad_norm": 0.0, + "learning_rate": 5.028882286899219e-06, + "loss": 0.7132, + "step": 2190 + }, + { + "epoch": 0.6759216412154867, + "grad_norm": 0.0, + "learning_rate": 5.020211791789753e-06, + "loss": 0.6796, + "step": 2191 + }, + { + "epoch": 0.676230140367114, + "grad_norm": 0.0, + "learning_rate": 5.011546272009464e-06, + "loss": 0.7367, + "step": 2192 + }, + { + "epoch": 0.6765386395187413, + "grad_norm": 0.0, + "learning_rate": 5.0028857362161144e-06, + "loss": 0.7244, + "step": 2193 + }, + { + "epoch": 0.6768471386703686, + "grad_norm": 0.0, + "learning_rate": 4.994230193062477e-06, + "loss": 0.6804, + "step": 2194 + }, + { + "epoch": 0.677155637821996, + "grad_norm": 0.0, + "learning_rate": 4.985579651196354e-06, + "loss": 0.7504, + "step": 2195 + }, + { + "epoch": 0.6774641369736233, + "grad_norm": 0.0, + "learning_rate": 4.976934119260537e-06, + "loss": 0.6989, + "step": 2196 + }, + { + "epoch": 0.6777726361252506, + "grad_norm": 0.0, + "learning_rate": 4.968293605892817e-06, + "loss": 0.6624, + "step": 2197 + }, + { + "epoch": 0.678081135276878, + "grad_norm": 0.0, + "learning_rate": 4.959658119725965e-06, + "loss": 0.7303, + "step": 2198 + }, + { + "epoch": 0.6783896344285053, + "grad_norm": 0.0, + "learning_rate": 4.951027669387741e-06, + "loss": 0.7036, + "step": 2199 + }, + { + "epoch": 0.6786981335801326, + "grad_norm": 0.0, + "learning_rate": 4.942402263500874e-06, + "loss": 0.7572, + "step": 2200 + }, + { + "epoch": 0.67900663273176, + "grad_norm": 0.0, + "learning_rate": 4.933781910683031e-06, + "loss": 0.7067, + "step": 2201 + }, + { + "epoch": 0.6793151318833873, + "grad_norm": 0.0, + "learning_rate": 4.925166619546857e-06, + "loss": 0.8102, + "step": 2202 + }, + { + "epoch": 0.6796236310350147, + "grad_norm": 0.0, + "learning_rate": 4.916556398699922e-06, + "loss": 0.7812, + "step": 2203 + }, + { + "epoch": 0.679932130186642, + "grad_norm": 0.0, + "learning_rate": 4.907951256744744e-06, + "loss": 0.7577, + "step": 2204 + }, + { + "epoch": 0.6802406293382693, + "grad_norm": 0.0, + "learning_rate": 4.899351202278756e-06, + "loss": 0.7994, + "step": 2205 + }, + { + "epoch": 0.6805491284898967, + "grad_norm": 0.0, + "learning_rate": 4.890756243894308e-06, + "loss": 0.698, + "step": 2206 + }, + { + "epoch": 0.680857627641524, + "grad_norm": 0.0, + "learning_rate": 4.88216639017867e-06, + "loss": 0.7232, + "step": 2207 + }, + { + "epoch": 0.6811661267931514, + "grad_norm": 0.0, + "learning_rate": 4.873581649713996e-06, + "loss": 0.6626, + "step": 2208 + }, + { + "epoch": 0.6814746259447787, + "grad_norm": 0.0, + "learning_rate": 4.865002031077353e-06, + "loss": 0.774, + "step": 2209 + }, + { + "epoch": 0.681783125096406, + "grad_norm": 0.0, + "learning_rate": 4.856427542840658e-06, + "loss": 0.7072, + "step": 2210 + }, + { + "epoch": 0.6820916242480333, + "grad_norm": 0.0, + "learning_rate": 4.847858193570733e-06, + "loss": 0.7224, + "step": 2211 + }, + { + "epoch": 0.6824001233996606, + "grad_norm": 0.0, + "learning_rate": 4.839293991829256e-06, + "loss": 0.733, + "step": 2212 + }, + { + "epoch": 0.682708622551288, + "grad_norm": 0.0, + "learning_rate": 4.830734946172756e-06, + "loss": 0.6981, + "step": 2213 + }, + { + "epoch": 0.6830171217029153, + "grad_norm": 0.0, + "learning_rate": 4.8221810651526154e-06, + "loss": 0.7701, + "step": 2214 + }, + { + "epoch": 0.6833256208545426, + "grad_norm": 0.0, + "learning_rate": 4.8136323573150525e-06, + "loss": 0.7602, + "step": 2215 + }, + { + "epoch": 0.68363412000617, + "grad_norm": 0.0, + "learning_rate": 4.805088831201127e-06, + "loss": 0.7599, + "step": 2216 + }, + { + "epoch": 0.6839426191577973, + "grad_norm": 0.0, + "learning_rate": 4.796550495346711e-06, + "loss": 0.744, + "step": 2217 + }, + { + "epoch": 0.6842511183094246, + "grad_norm": 0.0, + "learning_rate": 4.788017358282492e-06, + "loss": 0.7434, + "step": 2218 + }, + { + "epoch": 0.684559617461052, + "grad_norm": 0.0, + "learning_rate": 4.779489428533973e-06, + "loss": 0.748, + "step": 2219 + }, + { + "epoch": 0.6848681166126793, + "grad_norm": 0.0, + "learning_rate": 4.770966714621441e-06, + "loss": 0.7359, + "step": 2220 + }, + { + "epoch": 0.6851766157643067, + "grad_norm": 0.0, + "learning_rate": 4.762449225059985e-06, + "loss": 0.7254, + "step": 2221 + }, + { + "epoch": 0.685485114915934, + "grad_norm": 0.0, + "learning_rate": 4.753936968359465e-06, + "loss": 0.7747, + "step": 2222 + }, + { + "epoch": 0.6857936140675613, + "grad_norm": 0.0, + "learning_rate": 4.745429953024511e-06, + "loss": 0.7263, + "step": 2223 + }, + { + "epoch": 0.6861021132191887, + "grad_norm": 0.0, + "learning_rate": 4.736928187554529e-06, + "loss": 0.7798, + "step": 2224 + }, + { + "epoch": 0.686410612370816, + "grad_norm": 0.0, + "learning_rate": 4.728431680443663e-06, + "loss": 0.7363, + "step": 2225 + }, + { + "epoch": 0.6867191115224434, + "grad_norm": 0.0, + "learning_rate": 4.719940440180827e-06, + "loss": 0.8307, + "step": 2226 + }, + { + "epoch": 0.6870276106740707, + "grad_norm": 0.0, + "learning_rate": 4.711454475249638e-06, + "loss": 0.7031, + "step": 2227 + }, + { + "epoch": 0.6873361098256979, + "grad_norm": 0.0, + "learning_rate": 4.702973794128477e-06, + "loss": 0.7277, + "step": 2228 + }, + { + "epoch": 0.6876446089773253, + "grad_norm": 0.0, + "learning_rate": 4.694498405290423e-06, + "loss": 0.6801, + "step": 2229 + }, + { + "epoch": 0.6879531081289526, + "grad_norm": 0.0, + "learning_rate": 4.686028317203283e-06, + "loss": 0.6709, + "step": 2230 + }, + { + "epoch": 0.68826160728058, + "grad_norm": 0.0, + "learning_rate": 4.6775635383295555e-06, + "loss": 0.8133, + "step": 2231 + }, + { + "epoch": 0.6885701064322073, + "grad_norm": 0.0, + "learning_rate": 4.669104077126439e-06, + "loss": 0.6551, + "step": 2232 + }, + { + "epoch": 0.6888786055838346, + "grad_norm": 0.0, + "learning_rate": 4.660649942045826e-06, + "loss": 0.7909, + "step": 2233 + }, + { + "epoch": 0.689187104735462, + "grad_norm": 0.0, + "learning_rate": 4.652201141534279e-06, + "loss": 0.6711, + "step": 2234 + }, + { + "epoch": 0.6894956038870893, + "grad_norm": 0.0, + "learning_rate": 4.643757684033026e-06, + "loss": 0.7137, + "step": 2235 + }, + { + "epoch": 0.6898041030387166, + "grad_norm": 0.0, + "learning_rate": 4.635319577977975e-06, + "loss": 0.615, + "step": 2236 + }, + { + "epoch": 0.690112602190344, + "grad_norm": 0.0, + "learning_rate": 4.626886831799668e-06, + "loss": 0.6953, + "step": 2237 + }, + { + "epoch": 0.6904211013419713, + "grad_norm": 0.0, + "learning_rate": 4.618459453923307e-06, + "loss": 0.7033, + "step": 2238 + }, + { + "epoch": 0.6907296004935987, + "grad_norm": 0.0, + "learning_rate": 4.6100374527687195e-06, + "loss": 0.7572, + "step": 2239 + }, + { + "epoch": 0.691038099645226, + "grad_norm": 0.0, + "learning_rate": 4.601620836750367e-06, + "loss": 0.7321, + "step": 2240 + }, + { + "epoch": 0.6913465987968533, + "grad_norm": 0.0, + "learning_rate": 4.593209614277325e-06, + "loss": 0.6992, + "step": 2241 + }, + { + "epoch": 0.6916550979484807, + "grad_norm": 0.0, + "learning_rate": 4.58480379375329e-06, + "loss": 1.0177, + "step": 2242 + }, + { + "epoch": 0.691963597100108, + "grad_norm": 0.0, + "learning_rate": 4.576403383576555e-06, + "loss": 0.6625, + "step": 2243 + }, + { + "epoch": 0.6922720962517354, + "grad_norm": 0.0, + "learning_rate": 4.568008392140003e-06, + "loss": 0.7334, + "step": 2244 + }, + { + "epoch": 0.6925805954033626, + "grad_norm": 0.0, + "learning_rate": 4.559618827831116e-06, + "loss": 0.6984, + "step": 2245 + }, + { + "epoch": 0.6928890945549899, + "grad_norm": 0.0, + "learning_rate": 4.551234699031938e-06, + "loss": 0.6868, + "step": 2246 + }, + { + "epoch": 0.6931975937066173, + "grad_norm": 0.0, + "learning_rate": 4.542856014119098e-06, + "loss": 0.7404, + "step": 2247 + }, + { + "epoch": 0.6935060928582446, + "grad_norm": 0.0, + "learning_rate": 4.534482781463775e-06, + "loss": 0.8335, + "step": 2248 + }, + { + "epoch": 0.6938145920098719, + "grad_norm": 0.0, + "learning_rate": 4.5261150094317e-06, + "loss": 0.7299, + "step": 2249 + }, + { + "epoch": 0.6941230911614993, + "grad_norm": 0.0, + "learning_rate": 4.517752706383159e-06, + "loss": 0.6716, + "step": 2250 + }, + { + "epoch": 0.6944315903131266, + "grad_norm": 0.0, + "learning_rate": 4.509395880672967e-06, + "loss": 0.6818, + "step": 2251 + }, + { + "epoch": 0.694740089464754, + "grad_norm": 0.0, + "learning_rate": 4.501044540650464e-06, + "loss": 0.7939, + "step": 2252 + }, + { + "epoch": 0.6950485886163813, + "grad_norm": 0.0, + "learning_rate": 4.4926986946595065e-06, + "loss": 0.7085, + "step": 2253 + }, + { + "epoch": 0.6953570877680086, + "grad_norm": 0.0, + "learning_rate": 4.484358351038478e-06, + "loss": 0.8178, + "step": 2254 + }, + { + "epoch": 0.695665586919636, + "grad_norm": 0.0, + "learning_rate": 4.4760235181202465e-06, + "loss": 0.7343, + "step": 2255 + }, + { + "epoch": 0.6959740860712633, + "grad_norm": 0.0, + "learning_rate": 4.467694204232187e-06, + "loss": 0.7586, + "step": 2256 + }, + { + "epoch": 0.6962825852228907, + "grad_norm": 0.0, + "learning_rate": 4.459370417696152e-06, + "loss": 0.8157, + "step": 2257 + }, + { + "epoch": 0.696591084374518, + "grad_norm": 0.0, + "learning_rate": 4.4510521668284736e-06, + "loss": 0.6318, + "step": 2258 + }, + { + "epoch": 0.6968995835261453, + "grad_norm": 0.0, + "learning_rate": 4.4427394599399575e-06, + "loss": 0.7224, + "step": 2259 + }, + { + "epoch": 0.6972080826777727, + "grad_norm": 0.0, + "learning_rate": 4.434432305335866e-06, + "loss": 0.6759, + "step": 2260 + }, + { + "epoch": 0.6975165818294, + "grad_norm": 0.0, + "learning_rate": 4.426130711315913e-06, + "loss": 0.7719, + "step": 2261 + }, + { + "epoch": 0.6978250809810274, + "grad_norm": 0.0, + "learning_rate": 4.417834686174263e-06, + "loss": 0.6957, + "step": 2262 + }, + { + "epoch": 0.6981335801326546, + "grad_norm": 0.0, + "learning_rate": 4.4095442381995055e-06, + "loss": 0.7719, + "step": 2263 + }, + { + "epoch": 0.6984420792842819, + "grad_norm": 0.0, + "learning_rate": 4.401259375674679e-06, + "loss": 0.6877, + "step": 2264 + }, + { + "epoch": 0.6987505784359093, + "grad_norm": 0.0, + "learning_rate": 4.392980106877212e-06, + "loss": 0.7441, + "step": 2265 + }, + { + "epoch": 0.6990590775875366, + "grad_norm": 0.0, + "learning_rate": 4.384706440078968e-06, + "loss": 0.7662, + "step": 2266 + }, + { + "epoch": 0.6993675767391639, + "grad_norm": 0.0, + "learning_rate": 4.376438383546202e-06, + "loss": 0.7548, + "step": 2267 + }, + { + "epoch": 0.6996760758907913, + "grad_norm": 0.0, + "learning_rate": 4.368175945539572e-06, + "loss": 0.6793, + "step": 2268 + }, + { + "epoch": 0.6999845750424186, + "grad_norm": 0.0, + "learning_rate": 4.359919134314113e-06, + "loss": 0.6686, + "step": 2269 + }, + { + "epoch": 0.700293074194046, + "grad_norm": 0.0, + "learning_rate": 4.351667958119242e-06, + "loss": 0.6941, + "step": 2270 + }, + { + "epoch": 0.7006015733456733, + "grad_norm": 0.0, + "learning_rate": 4.343422425198753e-06, + "loss": 0.7611, + "step": 2271 + }, + { + "epoch": 0.7009100724973006, + "grad_norm": 0.0, + "learning_rate": 4.335182543790788e-06, + "loss": 0.8059, + "step": 2272 + }, + { + "epoch": 0.701218571648928, + "grad_norm": 0.0, + "learning_rate": 4.326948322127858e-06, + "loss": 0.6992, + "step": 2273 + }, + { + "epoch": 0.7015270708005553, + "grad_norm": 0.0, + "learning_rate": 4.318719768436808e-06, + "loss": 0.7047, + "step": 2274 + }, + { + "epoch": 0.7018355699521827, + "grad_norm": 0.0, + "learning_rate": 4.3104968909388174e-06, + "loss": 0.7672, + "step": 2275 + }, + { + "epoch": 0.70214406910381, + "grad_norm": 0.0, + "learning_rate": 4.302279697849412e-06, + "loss": 0.7012, + "step": 2276 + }, + { + "epoch": 0.7024525682554373, + "grad_norm": 0.0, + "learning_rate": 4.29406819737842e-06, + "loss": 0.6696, + "step": 2277 + }, + { + "epoch": 0.7027610674070647, + "grad_norm": 0.0, + "learning_rate": 4.285862397729993e-06, + "loss": 0.7496, + "step": 2278 + }, + { + "epoch": 0.703069566558692, + "grad_norm": 0.0, + "learning_rate": 4.277662307102574e-06, + "loss": 0.7151, + "step": 2279 + }, + { + "epoch": 0.7033780657103192, + "grad_norm": 0.0, + "learning_rate": 4.26946793368892e-06, + "loss": 0.6707, + "step": 2280 + }, + { + "epoch": 0.7036865648619466, + "grad_norm": 0.0, + "learning_rate": 4.261279285676071e-06, + "loss": 0.6088, + "step": 2281 + }, + { + "epoch": 0.7039950640135739, + "grad_norm": 0.0, + "learning_rate": 4.253096371245329e-06, + "loss": 0.6599, + "step": 2282 + }, + { + "epoch": 0.7043035631652013, + "grad_norm": 0.0, + "learning_rate": 4.244919198572293e-06, + "loss": 0.7794, + "step": 2283 + }, + { + "epoch": 0.7046120623168286, + "grad_norm": 0.0, + "learning_rate": 4.236747775826804e-06, + "loss": 0.6954, + "step": 2284 + }, + { + "epoch": 0.7049205614684559, + "grad_norm": 0.0, + "learning_rate": 4.228582111172977e-06, + "loss": 0.6958, + "step": 2285 + }, + { + "epoch": 0.7052290606200833, + "grad_norm": 0.0, + "learning_rate": 4.220422212769161e-06, + "loss": 0.6756, + "step": 2286 + }, + { + "epoch": 0.7055375597717106, + "grad_norm": 0.0, + "learning_rate": 4.212268088767944e-06, + "loss": 0.9999, + "step": 2287 + }, + { + "epoch": 0.705846058923338, + "grad_norm": 0.0, + "learning_rate": 4.204119747316157e-06, + "loss": 0.759, + "step": 2288 + }, + { + "epoch": 0.7061545580749653, + "grad_norm": 0.0, + "learning_rate": 4.195977196554835e-06, + "loss": 0.7907, + "step": 2289 + }, + { + "epoch": 0.7064630572265926, + "grad_norm": 0.0, + "learning_rate": 4.187840444619251e-06, + "loss": 0.7364, + "step": 2290 + }, + { + "epoch": 0.70677155637822, + "grad_norm": 0.0, + "learning_rate": 4.179709499638857e-06, + "loss": 0.7315, + "step": 2291 + }, + { + "epoch": 0.7070800555298473, + "grad_norm": 0.0, + "learning_rate": 4.171584369737322e-06, + "loss": 0.7142, + "step": 2292 + }, + { + "epoch": 0.7073885546814747, + "grad_norm": 0.0, + "learning_rate": 4.163465063032507e-06, + "loss": 0.7166, + "step": 2293 + }, + { + "epoch": 0.707697053833102, + "grad_norm": 0.0, + "learning_rate": 4.1553515876364435e-06, + "loss": 0.6211, + "step": 2294 + }, + { + "epoch": 0.7080055529847293, + "grad_norm": 0.0, + "learning_rate": 4.147243951655341e-06, + "loss": 0.7314, + "step": 2295 + }, + { + "epoch": 0.7083140521363567, + "grad_norm": 0.0, + "learning_rate": 4.139142163189573e-06, + "loss": 0.7196, + "step": 2296 + }, + { + "epoch": 0.7086225512879839, + "grad_norm": 0.0, + "learning_rate": 4.131046230333682e-06, + "loss": 0.7261, + "step": 2297 + }, + { + "epoch": 0.7089310504396112, + "grad_norm": 0.0, + "learning_rate": 4.1229561611763445e-06, + "loss": 0.7069, + "step": 2298 + }, + { + "epoch": 0.7092395495912386, + "grad_norm": 0.0, + "learning_rate": 4.114871963800385e-06, + "loss": 0.6299, + "step": 2299 + }, + { + "epoch": 0.7095480487428659, + "grad_norm": 0.0, + "learning_rate": 4.106793646282769e-06, + "loss": 0.7613, + "step": 2300 + }, + { + "epoch": 0.7098565478944933, + "grad_norm": 0.0, + "learning_rate": 4.098721216694572e-06, + "loss": 0.7339, + "step": 2301 + }, + { + "epoch": 0.7101650470461206, + "grad_norm": 0.0, + "learning_rate": 4.090654683101007e-06, + "loss": 0.7247, + "step": 2302 + }, + { + "epoch": 0.7104735461977479, + "grad_norm": 0.0, + "learning_rate": 4.082594053561369e-06, + "loss": 0.664, + "step": 2303 + }, + { + "epoch": 0.7107820453493753, + "grad_norm": 0.0, + "learning_rate": 4.074539336129079e-06, + "loss": 0.7243, + "step": 2304 + }, + { + "epoch": 0.7110905445010026, + "grad_norm": 0.0, + "learning_rate": 4.066490538851644e-06, + "loss": 0.6925, + "step": 2305 + }, + { + "epoch": 0.71139904365263, + "grad_norm": 0.0, + "learning_rate": 4.0584476697706475e-06, + "loss": 0.7386, + "step": 2306 + }, + { + "epoch": 0.7117075428042573, + "grad_norm": 0.0, + "learning_rate": 4.0504107369217686e-06, + "loss": 0.6881, + "step": 2307 + }, + { + "epoch": 0.7120160419558846, + "grad_norm": 0.0, + "learning_rate": 4.042379748334727e-06, + "loss": 0.719, + "step": 2308 + }, + { + "epoch": 0.712324541107512, + "grad_norm": 0.0, + "learning_rate": 4.034354712033332e-06, + "loss": 0.7329, + "step": 2309 + }, + { + "epoch": 0.7126330402591393, + "grad_norm": 0.0, + "learning_rate": 4.026335636035429e-06, + "loss": 0.7455, + "step": 2310 + }, + { + "epoch": 0.7129415394107667, + "grad_norm": 0.0, + "learning_rate": 4.018322528352917e-06, + "loss": 0.7113, + "step": 2311 + }, + { + "epoch": 0.713250038562394, + "grad_norm": 0.0, + "learning_rate": 4.010315396991727e-06, + "loss": 0.6837, + "step": 2312 + }, + { + "epoch": 0.7135585377140213, + "grad_norm": 0.0, + "learning_rate": 4.002314249951819e-06, + "loss": 0.6868, + "step": 2313 + }, + { + "epoch": 0.7138670368656487, + "grad_norm": 0.0, + "learning_rate": 3.994319095227178e-06, + "loss": 0.7261, + "step": 2314 + }, + { + "epoch": 0.7141755360172759, + "grad_norm": 0.0, + "learning_rate": 3.986329940805799e-06, + "loss": 0.7157, + "step": 2315 + }, + { + "epoch": 0.7144840351689032, + "grad_norm": 0.0, + "learning_rate": 3.978346794669679e-06, + "loss": 0.741, + "step": 2316 + }, + { + "epoch": 0.7147925343205306, + "grad_norm": 0.0, + "learning_rate": 3.970369664794823e-06, + "loss": 0.7708, + "step": 2317 + }, + { + "epoch": 0.7151010334721579, + "grad_norm": 0.0, + "learning_rate": 3.9623985591512105e-06, + "loss": 0.7344, + "step": 2318 + }, + { + "epoch": 0.7154095326237853, + "grad_norm": 0.0, + "learning_rate": 3.95443348570282e-06, + "loss": 0.7342, + "step": 2319 + }, + { + "epoch": 0.7157180317754126, + "grad_norm": 0.0, + "learning_rate": 3.946474452407579e-06, + "loss": 0.6732, + "step": 2320 + }, + { + "epoch": 0.7160265309270399, + "grad_norm": 0.0, + "learning_rate": 3.938521467217405e-06, + "loss": 0.6827, + "step": 2321 + }, + { + "epoch": 0.7163350300786673, + "grad_norm": 0.0, + "learning_rate": 3.930574538078155e-06, + "loss": 0.7727, + "step": 2322 + }, + { + "epoch": 0.7166435292302946, + "grad_norm": 0.0, + "learning_rate": 3.922633672929648e-06, + "loss": 0.7735, + "step": 2323 + }, + { + "epoch": 0.716952028381922, + "grad_norm": 0.0, + "learning_rate": 3.914698879705635e-06, + "loss": 0.7188, + "step": 2324 + }, + { + "epoch": 0.7172605275335493, + "grad_norm": 0.0, + "learning_rate": 3.906770166333802e-06, + "loss": 0.644, + "step": 2325 + }, + { + "epoch": 0.7175690266851766, + "grad_norm": 0.0, + "learning_rate": 3.898847540735771e-06, + "loss": 0.6054, + "step": 2326 + }, + { + "epoch": 0.717877525836804, + "grad_norm": 0.0, + "learning_rate": 3.890931010827062e-06, + "loss": 0.6724, + "step": 2327 + }, + { + "epoch": 0.7181860249884313, + "grad_norm": 0.0, + "learning_rate": 3.883020584517129e-06, + "loss": 0.8042, + "step": 2328 + }, + { + "epoch": 0.7184945241400587, + "grad_norm": 0.0, + "learning_rate": 3.875116269709307e-06, + "loss": 0.7232, + "step": 2329 + }, + { + "epoch": 0.718803023291686, + "grad_norm": 0.0, + "learning_rate": 3.867218074300832e-06, + "loss": 0.7611, + "step": 2330 + }, + { + "epoch": 0.7191115224433133, + "grad_norm": 0.0, + "learning_rate": 3.859326006182833e-06, + "loss": 0.6313, + "step": 2331 + }, + { + "epoch": 0.7194200215949406, + "grad_norm": 0.0, + "learning_rate": 3.851440073240309e-06, + "loss": 0.7193, + "step": 2332 + }, + { + "epoch": 0.7197285207465679, + "grad_norm": 0.0, + "learning_rate": 3.84356028335213e-06, + "loss": 0.6549, + "step": 2333 + }, + { + "epoch": 0.7200370198981952, + "grad_norm": 0.0, + "learning_rate": 3.835686644391029e-06, + "loss": 0.6637, + "step": 2334 + }, + { + "epoch": 0.7203455190498226, + "grad_norm": 0.0, + "learning_rate": 3.827819164223599e-06, + "loss": 0.7928, + "step": 2335 + }, + { + "epoch": 0.7206540182014499, + "grad_norm": 0.0, + "learning_rate": 3.819957850710269e-06, + "loss": 0.6682, + "step": 2336 + }, + { + "epoch": 0.7209625173530773, + "grad_norm": 0.0, + "learning_rate": 3.812102711705323e-06, + "loss": 0.7203, + "step": 2337 + }, + { + "epoch": 0.7212710165047046, + "grad_norm": 0.0, + "learning_rate": 3.8042537550568603e-06, + "loss": 0.6589, + "step": 2338 + }, + { + "epoch": 0.7215795156563319, + "grad_norm": 0.0, + "learning_rate": 3.7964109886068066e-06, + "loss": 0.7143, + "step": 2339 + }, + { + "epoch": 0.7218880148079593, + "grad_norm": 0.0, + "learning_rate": 3.7885744201909115e-06, + "loss": 0.7169, + "step": 2340 + }, + { + "epoch": 0.7221965139595866, + "grad_norm": 0.0, + "learning_rate": 3.7807440576387244e-06, + "loss": 0.6901, + "step": 2341 + }, + { + "epoch": 0.722505013111214, + "grad_norm": 0.0, + "learning_rate": 3.7729199087735924e-06, + "loss": 0.7984, + "step": 2342 + }, + { + "epoch": 0.7228135122628413, + "grad_norm": 0.0, + "learning_rate": 3.7651019814126656e-06, + "loss": 0.693, + "step": 2343 + }, + { + "epoch": 0.7231220114144686, + "grad_norm": 0.0, + "learning_rate": 3.7572902833668635e-06, + "loss": 0.7642, + "step": 2344 + }, + { + "epoch": 0.723430510566096, + "grad_norm": 0.0, + "learning_rate": 3.7494848224408998e-06, + "loss": 0.6759, + "step": 2345 + }, + { + "epoch": 0.7237390097177233, + "grad_norm": 0.0, + "learning_rate": 3.741685606433233e-06, + "loss": 0.7235, + "step": 2346 + }, + { + "epoch": 0.7240475088693507, + "grad_norm": 0.0, + "learning_rate": 3.7338926431361055e-06, + "loss": 0.7229, + "step": 2347 + }, + { + "epoch": 0.724356008020978, + "grad_norm": 0.0, + "learning_rate": 3.726105940335495e-06, + "loss": 0.7559, + "step": 2348 + }, + { + "epoch": 0.7246645071726052, + "grad_norm": 0.0, + "learning_rate": 3.71832550581114e-06, + "loss": 0.7572, + "step": 2349 + }, + { + "epoch": 0.7249730063242326, + "grad_norm": 0.0, + "learning_rate": 3.710551347336504e-06, + "loss": 0.7353, + "step": 2350 + }, + { + "epoch": 0.7252815054758599, + "grad_norm": 0.0, + "learning_rate": 3.7027834726787806e-06, + "loss": 0.6815, + "step": 2351 + }, + { + "epoch": 0.7255900046274872, + "grad_norm": 0.0, + "learning_rate": 3.6950218895988966e-06, + "loss": 0.7229, + "step": 2352 + }, + { + "epoch": 0.7258985037791146, + "grad_norm": 0.0, + "learning_rate": 3.6872666058514783e-06, + "loss": 0.6987, + "step": 2353 + }, + { + "epoch": 0.7262070029307419, + "grad_norm": 0.0, + "learning_rate": 3.6795176291848713e-06, + "loss": 0.7232, + "step": 2354 + }, + { + "epoch": 0.7265155020823693, + "grad_norm": 0.0, + "learning_rate": 3.6717749673411096e-06, + "loss": 0.7647, + "step": 2355 + }, + { + "epoch": 0.7268240012339966, + "grad_norm": 0.0, + "learning_rate": 3.66403862805592e-06, + "loss": 0.7256, + "step": 2356 + }, + { + "epoch": 0.7271325003856239, + "grad_norm": 0.0, + "learning_rate": 3.6563086190587215e-06, + "loss": 0.6587, + "step": 2357 + }, + { + "epoch": 0.7274409995372513, + "grad_norm": 0.0, + "learning_rate": 3.6485849480725964e-06, + "loss": 0.6631, + "step": 2358 + }, + { + "epoch": 0.7277494986888786, + "grad_norm": 0.0, + "learning_rate": 3.6408676228143013e-06, + "loss": 0.7334, + "step": 2359 + }, + { + "epoch": 0.728057997840506, + "grad_norm": 0.0, + "learning_rate": 3.633156650994247e-06, + "loss": 0.9905, + "step": 2360 + }, + { + "epoch": 0.7283664969921333, + "grad_norm": 0.0, + "learning_rate": 3.625452040316505e-06, + "loss": 0.7015, + "step": 2361 + }, + { + "epoch": 0.7286749961437606, + "grad_norm": 0.0, + "learning_rate": 3.6177537984787924e-06, + "loss": 0.731, + "step": 2362 + }, + { + "epoch": 0.728983495295388, + "grad_norm": 0.0, + "learning_rate": 3.610061933172445e-06, + "loss": 0.7258, + "step": 2363 + }, + { + "epoch": 0.7292919944470153, + "grad_norm": 0.0, + "learning_rate": 3.602376452082451e-06, + "loss": 0.7024, + "step": 2364 + }, + { + "epoch": 0.7296004935986427, + "grad_norm": 0.0, + "learning_rate": 3.5946973628874026e-06, + "loss": 0.7763, + "step": 2365 + }, + { + "epoch": 0.72990899275027, + "grad_norm": 0.0, + "learning_rate": 3.587024673259519e-06, + "loss": 0.7543, + "step": 2366 + }, + { + "epoch": 0.7302174919018972, + "grad_norm": 0.0, + "learning_rate": 3.579358390864618e-06, + "loss": 0.9771, + "step": 2367 + }, + { + "epoch": 0.7305259910535246, + "grad_norm": 0.0, + "learning_rate": 3.571698523362113e-06, + "loss": 0.7753, + "step": 2368 + }, + { + "epoch": 0.7308344902051519, + "grad_norm": 0.0, + "learning_rate": 3.5640450784050185e-06, + "loss": 0.7656, + "step": 2369 + }, + { + "epoch": 0.7311429893567792, + "grad_norm": 0.0, + "learning_rate": 3.556398063639921e-06, + "loss": 0.7309, + "step": 2370 + }, + { + "epoch": 0.7314514885084066, + "grad_norm": 0.0, + "learning_rate": 3.548757486706997e-06, + "loss": 0.713, + "step": 2371 + }, + { + "epoch": 0.7317599876600339, + "grad_norm": 0.0, + "learning_rate": 3.5411233552399703e-06, + "loss": 0.6742, + "step": 2372 + }, + { + "epoch": 0.7320684868116613, + "grad_norm": 0.0, + "learning_rate": 3.533495676866141e-06, + "loss": 0.8157, + "step": 2373 + }, + { + "epoch": 0.7323769859632886, + "grad_norm": 0.0, + "learning_rate": 3.525874459206362e-06, + "loss": 0.7107, + "step": 2374 + }, + { + "epoch": 0.7326854851149159, + "grad_norm": 0.0, + "learning_rate": 3.518259709875023e-06, + "loss": 0.7892, + "step": 2375 + }, + { + "epoch": 0.7329939842665433, + "grad_norm": 0.0, + "learning_rate": 3.5106514364800548e-06, + "loss": 0.6195, + "step": 2376 + }, + { + "epoch": 0.7333024834181706, + "grad_norm": 0.0, + "learning_rate": 3.503049646622915e-06, + "loss": 0.7394, + "step": 2377 + }, + { + "epoch": 0.733610982569798, + "grad_norm": 0.0, + "learning_rate": 3.4954543478985926e-06, + "loss": 0.7432, + "step": 2378 + }, + { + "epoch": 0.7339194817214253, + "grad_norm": 0.0, + "learning_rate": 3.4878655478955838e-06, + "loss": 0.6289, + "step": 2379 + }, + { + "epoch": 0.7342279808730526, + "grad_norm": 0.0, + "learning_rate": 3.4802832541958886e-06, + "loss": 0.7075, + "step": 2380 + }, + { + "epoch": 0.73453648002468, + "grad_norm": 0.0, + "learning_rate": 3.472707474375018e-06, + "loss": 0.7032, + "step": 2381 + }, + { + "epoch": 0.7348449791763073, + "grad_norm": 0.0, + "learning_rate": 3.465138216001962e-06, + "loss": 0.7202, + "step": 2382 + }, + { + "epoch": 0.7351534783279347, + "grad_norm": 0.0, + "learning_rate": 3.457575486639211e-06, + "loss": 0.6907, + "step": 2383 + }, + { + "epoch": 0.7354619774795619, + "grad_norm": 0.0, + "learning_rate": 3.4500192938427093e-06, + "loss": 0.5944, + "step": 2384 + }, + { + "epoch": 0.7357704766311892, + "grad_norm": 0.0, + "learning_rate": 3.44246964516189e-06, + "loss": 0.7482, + "step": 2385 + }, + { + "epoch": 0.7360789757828166, + "grad_norm": 0.0, + "learning_rate": 3.4349265481396453e-06, + "loss": 0.7875, + "step": 2386 + }, + { + "epoch": 0.7363874749344439, + "grad_norm": 0.0, + "learning_rate": 3.4273900103123125e-06, + "loss": 0.7129, + "step": 2387 + }, + { + "epoch": 0.7366959740860712, + "grad_norm": 0.0, + "learning_rate": 3.4198600392096894e-06, + "loss": 0.7024, + "step": 2388 + }, + { + "epoch": 0.7370044732376986, + "grad_norm": 0.0, + "learning_rate": 3.4123366423549907e-06, + "loss": 0.7508, + "step": 2389 + }, + { + "epoch": 0.7373129723893259, + "grad_norm": 0.0, + "learning_rate": 3.404819827264888e-06, + "loss": 0.7088, + "step": 2390 + }, + { + "epoch": 0.7376214715409533, + "grad_norm": 0.0, + "learning_rate": 3.397309601449459e-06, + "loss": 0.766, + "step": 2391 + }, + { + "epoch": 0.7379299706925806, + "grad_norm": 0.0, + "learning_rate": 3.38980597241221e-06, + "loss": 0.7314, + "step": 2392 + }, + { + "epoch": 0.7382384698442079, + "grad_norm": 0.0, + "learning_rate": 3.3823089476500492e-06, + "loss": 0.7783, + "step": 2393 + }, + { + "epoch": 0.7385469689958353, + "grad_norm": 0.0, + "learning_rate": 3.3748185346532847e-06, + "loss": 0.6801, + "step": 2394 + }, + { + "epoch": 0.7388554681474626, + "grad_norm": 0.0, + "learning_rate": 3.367334740905629e-06, + "loss": 0.7453, + "step": 2395 + }, + { + "epoch": 0.73916396729909, + "grad_norm": 0.0, + "learning_rate": 3.3598575738841698e-06, + "loss": 0.7184, + "step": 2396 + }, + { + "epoch": 0.7394724664507173, + "grad_norm": 0.0, + "learning_rate": 3.352387041059377e-06, + "loss": 0.7691, + "step": 2397 + }, + { + "epoch": 0.7397809656023446, + "grad_norm": 0.0, + "learning_rate": 3.3449231498951016e-06, + "loss": 0.7078, + "step": 2398 + }, + { + "epoch": 0.740089464753972, + "grad_norm": 0.0, + "learning_rate": 3.337465907848544e-06, + "loss": 0.7818, + "step": 2399 + }, + { + "epoch": 0.7403979639055993, + "grad_norm": 0.0, + "learning_rate": 3.330015322370277e-06, + "loss": 0.7287, + "step": 2400 + }, + { + "epoch": 0.7407064630572265, + "grad_norm": 0.0, + "learning_rate": 3.3225714009042045e-06, + "loss": 0.6973, + "step": 2401 + }, + { + "epoch": 0.7410149622088539, + "grad_norm": 0.0, + "learning_rate": 3.31513415088759e-06, + "loss": 0.6757, + "step": 2402 + }, + { + "epoch": 0.7413234613604812, + "grad_norm": 0.0, + "learning_rate": 3.307703579751018e-06, + "loss": 0.661, + "step": 2403 + }, + { + "epoch": 0.7416319605121086, + "grad_norm": 0.0, + "learning_rate": 3.3002796949184135e-06, + "loss": 0.8085, + "step": 2404 + }, + { + "epoch": 0.7419404596637359, + "grad_norm": 0.0, + "learning_rate": 3.2928625038070093e-06, + "loss": 0.7602, + "step": 2405 + }, + { + "epoch": 0.7422489588153632, + "grad_norm": 0.0, + "learning_rate": 3.2854520138273526e-06, + "loss": 0.7372, + "step": 2406 + }, + { + "epoch": 0.7425574579669906, + "grad_norm": 0.0, + "learning_rate": 3.278048232383305e-06, + "loss": 0.6769, + "step": 2407 + }, + { + "epoch": 0.7428659571186179, + "grad_norm": 0.0, + "learning_rate": 3.27065116687201e-06, + "loss": 0.6679, + "step": 2408 + }, + { + "epoch": 0.7431744562702453, + "grad_norm": 0.0, + "learning_rate": 3.263260824683917e-06, + "loss": 0.7707, + "step": 2409 + }, + { + "epoch": 0.7434829554218726, + "grad_norm": 0.0, + "learning_rate": 3.255877213202748e-06, + "loss": 0.6441, + "step": 2410 + }, + { + "epoch": 0.7437914545734999, + "grad_norm": 0.0, + "learning_rate": 3.248500339805499e-06, + "loss": 0.7432, + "step": 2411 + }, + { + "epoch": 0.7440999537251273, + "grad_norm": 0.0, + "learning_rate": 3.2411302118624453e-06, + "loss": 0.7155, + "step": 2412 + }, + { + "epoch": 0.7444084528767546, + "grad_norm": 0.0, + "learning_rate": 3.233766836737111e-06, + "loss": 0.6777, + "step": 2413 + }, + { + "epoch": 0.744716952028382, + "grad_norm": 0.0, + "learning_rate": 3.2264102217862794e-06, + "loss": 0.6547, + "step": 2414 + }, + { + "epoch": 0.7450254511800093, + "grad_norm": 0.0, + "learning_rate": 3.2190603743599723e-06, + "loss": 0.724, + "step": 2415 + }, + { + "epoch": 0.7453339503316366, + "grad_norm": 0.0, + "learning_rate": 3.2117173018014656e-06, + "loss": 0.7582, + "step": 2416 + }, + { + "epoch": 0.745642449483264, + "grad_norm": 0.0, + "learning_rate": 3.2043810114472508e-06, + "loss": 0.7383, + "step": 2417 + }, + { + "epoch": 0.7459509486348913, + "grad_norm": 0.0, + "learning_rate": 3.1970515106270474e-06, + "loss": 0.712, + "step": 2418 + }, + { + "epoch": 0.7462594477865185, + "grad_norm": 0.0, + "learning_rate": 3.1897288066638e-06, + "loss": 0.7693, + "step": 2419 + }, + { + "epoch": 0.7465679469381459, + "grad_norm": 0.0, + "learning_rate": 3.1824129068736474e-06, + "loss": 0.6618, + "step": 2420 + }, + { + "epoch": 0.7468764460897732, + "grad_norm": 0.0, + "learning_rate": 3.1751038185659467e-06, + "loss": 0.7433, + "step": 2421 + }, + { + "epoch": 0.7471849452414006, + "grad_norm": 0.0, + "learning_rate": 3.1678015490432368e-06, + "loss": 0.6502, + "step": 2422 + }, + { + "epoch": 0.7474934443930279, + "grad_norm": 0.0, + "learning_rate": 3.1605061056012465e-06, + "loss": 0.7712, + "step": 2423 + }, + { + "epoch": 0.7478019435446552, + "grad_norm": 0.0, + "learning_rate": 3.153217495528893e-06, + "loss": 0.6721, + "step": 2424 + }, + { + "epoch": 0.7481104426962826, + "grad_norm": 0.0, + "learning_rate": 3.1459357261082525e-06, + "loss": 0.7167, + "step": 2425 + }, + { + "epoch": 0.7484189418479099, + "grad_norm": 0.0, + "learning_rate": 3.138660804614586e-06, + "loss": 0.6691, + "step": 2426 + }, + { + "epoch": 0.7487274409995373, + "grad_norm": 0.0, + "learning_rate": 3.131392738316286e-06, + "loss": 0.7755, + "step": 2427 + }, + { + "epoch": 0.7490359401511646, + "grad_norm": 0.0, + "learning_rate": 3.12413153447492e-06, + "loss": 0.6478, + "step": 2428 + }, + { + "epoch": 0.7493444393027919, + "grad_norm": 0.0, + "learning_rate": 3.1168772003451855e-06, + "loss": 0.7047, + "step": 2429 + }, + { + "epoch": 0.7496529384544193, + "grad_norm": 0.0, + "learning_rate": 3.1096297431749257e-06, + "loss": 0.6735, + "step": 2430 + }, + { + "epoch": 0.7499614376060466, + "grad_norm": 0.0, + "learning_rate": 3.1023891702051057e-06, + "loss": 1.0232, + "step": 2431 + }, + { + "epoch": 0.750269936757674, + "grad_norm": 0.0, + "learning_rate": 3.095155488669812e-06, + "loss": 0.6308, + "step": 2432 + }, + { + "epoch": 0.7505784359093013, + "grad_norm": 0.0, + "learning_rate": 3.087928705796256e-06, + "loss": 0.7367, + "step": 2433 + }, + { + "epoch": 0.7508869350609286, + "grad_norm": 0.0, + "learning_rate": 3.0807088288047447e-06, + "loss": 0.6943, + "step": 2434 + }, + { + "epoch": 0.751195434212556, + "grad_norm": 0.0, + "learning_rate": 3.0734958649086878e-06, + "loss": 0.6738, + "step": 2435 + }, + { + "epoch": 0.7515039333641832, + "grad_norm": 0.0, + "learning_rate": 3.0662898213145965e-06, + "loss": 0.7519, + "step": 2436 + }, + { + "epoch": 0.7518124325158105, + "grad_norm": 0.0, + "learning_rate": 3.0590907052220566e-06, + "loss": 0.6588, + "step": 2437 + }, + { + "epoch": 0.7521209316674379, + "grad_norm": 0.0, + "learning_rate": 3.051898523823743e-06, + "loss": 0.6724, + "step": 2438 + }, + { + "epoch": 0.7524294308190652, + "grad_norm": 0.0, + "learning_rate": 3.044713284305395e-06, + "loss": 0.7122, + "step": 2439 + }, + { + "epoch": 0.7527379299706926, + "grad_norm": 0.0, + "learning_rate": 3.037534993845818e-06, + "loss": 0.6708, + "step": 2440 + }, + { + "epoch": 0.7530464291223199, + "grad_norm": 0.0, + "learning_rate": 3.0303636596168706e-06, + "loss": 0.7263, + "step": 2441 + }, + { + "epoch": 0.7533549282739472, + "grad_norm": 0.0, + "learning_rate": 3.0231992887834725e-06, + "loss": 0.7195, + "step": 2442 + }, + { + "epoch": 0.7536634274255746, + "grad_norm": 0.0, + "learning_rate": 3.016041888503578e-06, + "loss": 0.7201, + "step": 2443 + }, + { + "epoch": 0.7539719265772019, + "grad_norm": 0.0, + "learning_rate": 3.008891465928172e-06, + "loss": 0.7869, + "step": 2444 + }, + { + "epoch": 0.7542804257288293, + "grad_norm": 0.0, + "learning_rate": 3.0017480282012836e-06, + "loss": 0.6756, + "step": 2445 + }, + { + "epoch": 0.7545889248804566, + "grad_norm": 0.0, + "learning_rate": 2.9946115824599465e-06, + "loss": 0.7122, + "step": 2446 + }, + { + "epoch": 0.7548974240320839, + "grad_norm": 0.0, + "learning_rate": 2.987482135834223e-06, + "loss": 0.6999, + "step": 2447 + }, + { + "epoch": 0.7552059231837113, + "grad_norm": 0.0, + "learning_rate": 2.9803596954471714e-06, + "loss": 0.6999, + "step": 2448 + }, + { + "epoch": 0.7555144223353386, + "grad_norm": 0.0, + "learning_rate": 2.9732442684148533e-06, + "loss": 0.7637, + "step": 2449 + }, + { + "epoch": 0.755822921486966, + "grad_norm": 0.0, + "learning_rate": 2.9661358618463297e-06, + "loss": 0.724, + "step": 2450 + }, + { + "epoch": 0.7561314206385933, + "grad_norm": 0.0, + "learning_rate": 2.959034482843639e-06, + "loss": 0.6431, + "step": 2451 + }, + { + "epoch": 0.7564399197902206, + "grad_norm": 0.0, + "learning_rate": 2.951940138501801e-06, + "loss": 0.9853, + "step": 2452 + }, + { + "epoch": 0.7567484189418479, + "grad_norm": 0.0, + "learning_rate": 2.944852835908806e-06, + "loss": 0.6493, + "step": 2453 + }, + { + "epoch": 0.7570569180934752, + "grad_norm": 0.0, + "learning_rate": 2.9377725821456138e-06, + "loss": 0.6325, + "step": 2454 + }, + { + "epoch": 0.7573654172451025, + "grad_norm": 0.0, + "learning_rate": 2.9306993842861354e-06, + "loss": 0.69, + "step": 2455 + }, + { + "epoch": 0.7576739163967299, + "grad_norm": 0.0, + "learning_rate": 2.9236332493972386e-06, + "loss": 0.6958, + "step": 2456 + }, + { + "epoch": 0.7579824155483572, + "grad_norm": 0.0, + "learning_rate": 2.916574184538731e-06, + "loss": 0.837, + "step": 2457 + }, + { + "epoch": 0.7582909146999846, + "grad_norm": 0.0, + "learning_rate": 2.9095221967633502e-06, + "loss": 0.6747, + "step": 2458 + }, + { + "epoch": 0.7585994138516119, + "grad_norm": 0.0, + "learning_rate": 2.9024772931167777e-06, + "loss": 0.6415, + "step": 2459 + }, + { + "epoch": 0.7589079130032392, + "grad_norm": 0.0, + "learning_rate": 2.8954394806376052e-06, + "loss": 0.6934, + "step": 2460 + }, + { + "epoch": 0.7592164121548666, + "grad_norm": 0.0, + "learning_rate": 2.8884087663573403e-06, + "loss": 0.6837, + "step": 2461 + }, + { + "epoch": 0.7595249113064939, + "grad_norm": 0.0, + "learning_rate": 2.881385157300408e-06, + "loss": 0.7331, + "step": 2462 + }, + { + "epoch": 0.7598334104581213, + "grad_norm": 0.0, + "learning_rate": 2.8743686604841227e-06, + "loss": 0.7135, + "step": 2463 + }, + { + "epoch": 0.7601419096097486, + "grad_norm": 0.0, + "learning_rate": 2.8673592829187057e-06, + "loss": 0.765, + "step": 2464 + }, + { + "epoch": 0.7604504087613759, + "grad_norm": 0.0, + "learning_rate": 2.8603570316072484e-06, + "loss": 0.7272, + "step": 2465 + }, + { + "epoch": 0.7607589079130033, + "grad_norm": 0.0, + "learning_rate": 2.8533619135457402e-06, + "loss": 0.637, + "step": 2466 + }, + { + "epoch": 0.7610674070646306, + "grad_norm": 0.0, + "learning_rate": 2.846373935723029e-06, + "loss": 0.7201, + "step": 2467 + }, + { + "epoch": 0.761375906216258, + "grad_norm": 0.0, + "learning_rate": 2.839393105120839e-06, + "loss": 0.747, + "step": 2468 + }, + { + "epoch": 0.7616844053678853, + "grad_norm": 0.0, + "learning_rate": 2.8324194287137565e-06, + "loss": 0.7197, + "step": 2469 + }, + { + "epoch": 0.7619929045195126, + "grad_norm": 0.0, + "learning_rate": 2.8254529134692e-06, + "loss": 1.0135, + "step": 2470 + }, + { + "epoch": 0.7623014036711399, + "grad_norm": 0.0, + "learning_rate": 2.8184935663474567e-06, + "loss": 0.7235, + "step": 2471 + }, + { + "epoch": 0.7626099028227672, + "grad_norm": 0.0, + "learning_rate": 2.8115413943016366e-06, + "loss": 0.6625, + "step": 2472 + }, + { + "epoch": 0.7629184019743945, + "grad_norm": 0.0, + "learning_rate": 2.804596404277692e-06, + "loss": 0.7001, + "step": 2473 + }, + { + "epoch": 0.7632269011260219, + "grad_norm": 0.0, + "learning_rate": 2.7976586032143914e-06, + "loss": 0.6663, + "step": 2474 + }, + { + "epoch": 0.7635354002776492, + "grad_norm": 0.0, + "learning_rate": 2.7907279980433197e-06, + "loss": 0.7304, + "step": 2475 + }, + { + "epoch": 0.7638438994292766, + "grad_norm": 0.0, + "learning_rate": 2.7838045956888836e-06, + "loss": 0.7103, + "step": 2476 + }, + { + "epoch": 0.7641523985809039, + "grad_norm": 0.0, + "learning_rate": 2.776888403068282e-06, + "loss": 0.6845, + "step": 2477 + }, + { + "epoch": 0.7644608977325312, + "grad_norm": 0.0, + "learning_rate": 2.769979427091515e-06, + "loss": 0.6667, + "step": 2478 + }, + { + "epoch": 0.7647693968841586, + "grad_norm": 0.0, + "learning_rate": 2.7630776746613696e-06, + "loss": 0.6478, + "step": 2479 + }, + { + "epoch": 0.7650778960357859, + "grad_norm": 0.0, + "learning_rate": 2.7561831526734207e-06, + "loss": 0.7323, + "step": 2480 + }, + { + "epoch": 0.7653863951874132, + "grad_norm": 0.0, + "learning_rate": 2.749295868016022e-06, + "loss": 0.6891, + "step": 2481 + }, + { + "epoch": 0.7656948943390406, + "grad_norm": 0.0, + "learning_rate": 2.7424158275702807e-06, + "loss": 0.7104, + "step": 2482 + }, + { + "epoch": 0.7660033934906679, + "grad_norm": 0.0, + "learning_rate": 2.735543038210083e-06, + "loss": 0.6838, + "step": 2483 + }, + { + "epoch": 0.7663118926422953, + "grad_norm": 0.0, + "learning_rate": 2.728677506802062e-06, + "loss": 0.7601, + "step": 2484 + }, + { + "epoch": 0.7666203917939226, + "grad_norm": 0.0, + "learning_rate": 2.7218192402056052e-06, + "loss": 0.6113, + "step": 2485 + }, + { + "epoch": 0.76692889094555, + "grad_norm": 0.0, + "learning_rate": 2.7149682452728354e-06, + "loss": 0.6649, + "step": 2486 + }, + { + "epoch": 0.7672373900971773, + "grad_norm": 0.0, + "learning_rate": 2.70812452884861e-06, + "loss": 0.7328, + "step": 2487 + }, + { + "epoch": 0.7675458892488045, + "grad_norm": 0.0, + "learning_rate": 2.7012880977705236e-06, + "loss": 0.6515, + "step": 2488 + }, + { + "epoch": 0.7678543884004319, + "grad_norm": 0.0, + "learning_rate": 2.69445895886888e-06, + "loss": 0.6507, + "step": 2489 + }, + { + "epoch": 0.7681628875520592, + "grad_norm": 0.0, + "learning_rate": 2.6876371189667107e-06, + "loss": 1.0037, + "step": 2490 + }, + { + "epoch": 0.7684713867036865, + "grad_norm": 0.0, + "learning_rate": 2.6808225848797365e-06, + "loss": 0.7197, + "step": 2491 + }, + { + "epoch": 0.7687798858553139, + "grad_norm": 0.0, + "learning_rate": 2.6740153634163955e-06, + "loss": 0.6876, + "step": 2492 + }, + { + "epoch": 0.7690883850069412, + "grad_norm": 0.0, + "learning_rate": 2.6672154613778166e-06, + "loss": 0.7228, + "step": 2493 + }, + { + "epoch": 0.7693968841585686, + "grad_norm": 0.0, + "learning_rate": 2.6604228855578096e-06, + "loss": 0.6671, + "step": 2494 + }, + { + "epoch": 0.7697053833101959, + "grad_norm": 0.0, + "learning_rate": 2.653637642742869e-06, + "loss": 0.7387, + "step": 2495 + }, + { + "epoch": 0.7700138824618232, + "grad_norm": 0.0, + "learning_rate": 2.6468597397121575e-06, + "loss": 0.6826, + "step": 2496 + }, + { + "epoch": 0.7703223816134506, + "grad_norm": 0.0, + "learning_rate": 2.6400891832375163e-06, + "loss": 0.7781, + "step": 2497 + }, + { + "epoch": 0.7706308807650779, + "grad_norm": 0.0, + "learning_rate": 2.6333259800834366e-06, + "loss": 0.6769, + "step": 2498 + }, + { + "epoch": 0.7709393799167052, + "grad_norm": 0.0, + "learning_rate": 2.626570137007061e-06, + "loss": 0.6654, + "step": 2499 + }, + { + "epoch": 0.7712478790683326, + "grad_norm": 0.0, + "learning_rate": 2.6198216607581907e-06, + "loss": 0.6779, + "step": 2500 + }, + { + "epoch": 0.7715563782199599, + "grad_norm": 0.0, + "learning_rate": 2.613080558079253e-06, + "loss": 0.6879, + "step": 2501 + }, + { + "epoch": 0.7718648773715873, + "grad_norm": 0.0, + "learning_rate": 2.606346835705319e-06, + "loss": 0.7331, + "step": 2502 + }, + { + "epoch": 0.7721733765232146, + "grad_norm": 0.0, + "learning_rate": 2.5996205003640796e-06, + "loss": 0.7353, + "step": 2503 + }, + { + "epoch": 0.772481875674842, + "grad_norm": 0.0, + "learning_rate": 2.5929015587758455e-06, + "loss": 0.6572, + "step": 2504 + }, + { + "epoch": 0.7727903748264692, + "grad_norm": 0.0, + "learning_rate": 2.586190017653546e-06, + "loss": 0.7388, + "step": 2505 + }, + { + "epoch": 0.7730988739780965, + "grad_norm": 0.0, + "learning_rate": 2.579485883702707e-06, + "loss": 0.695, + "step": 2506 + }, + { + "epoch": 0.7734073731297239, + "grad_norm": 0.0, + "learning_rate": 2.5727891636214684e-06, + "loss": 0.6227, + "step": 2507 + }, + { + "epoch": 0.7737158722813512, + "grad_norm": 0.0, + "learning_rate": 2.566099864100543e-06, + "loss": 0.7622, + "step": 2508 + }, + { + "epoch": 0.7740243714329785, + "grad_norm": 0.0, + "learning_rate": 2.5594179918232476e-06, + "loss": 0.7167, + "step": 2509 + }, + { + "epoch": 0.7743328705846059, + "grad_norm": 0.0, + "learning_rate": 2.5527435534654667e-06, + "loss": 0.6815, + "step": 2510 + }, + { + "epoch": 0.7746413697362332, + "grad_norm": 0.0, + "learning_rate": 2.546076555695668e-06, + "loss": 0.6534, + "step": 2511 + }, + { + "epoch": 0.7749498688878605, + "grad_norm": 0.0, + "learning_rate": 2.539417005174876e-06, + "loss": 0.661, + "step": 2512 + }, + { + "epoch": 0.7752583680394879, + "grad_norm": 0.0, + "learning_rate": 2.5327649085566763e-06, + "loss": 0.6576, + "step": 2513 + }, + { + "epoch": 0.7755668671911152, + "grad_norm": 0.0, + "learning_rate": 2.5261202724872126e-06, + "loss": 0.7524, + "step": 2514 + }, + { + "epoch": 0.7758753663427426, + "grad_norm": 0.0, + "learning_rate": 2.5194831036051716e-06, + "loss": 0.6885, + "step": 2515 + }, + { + "epoch": 0.7761838654943699, + "grad_norm": 0.0, + "learning_rate": 2.5128534085417734e-06, + "loss": 0.6119, + "step": 2516 + }, + { + "epoch": 0.7764923646459972, + "grad_norm": 0.0, + "learning_rate": 2.506231193920783e-06, + "loss": 0.6358, + "step": 2517 + }, + { + "epoch": 0.7768008637976246, + "grad_norm": 0.0, + "learning_rate": 2.49961646635848e-06, + "loss": 0.684, + "step": 2518 + }, + { + "epoch": 0.7771093629492519, + "grad_norm": 0.0, + "learning_rate": 2.4930092324636744e-06, + "loss": 0.7587, + "step": 2519 + }, + { + "epoch": 0.7774178621008793, + "grad_norm": 0.0, + "learning_rate": 2.486409498837683e-06, + "loss": 0.7202, + "step": 2520 + }, + { + "epoch": 0.7777263612525066, + "grad_norm": 0.0, + "learning_rate": 2.4798172720743262e-06, + "loss": 0.9812, + "step": 2521 + }, + { + "epoch": 0.7780348604041339, + "grad_norm": 0.0, + "learning_rate": 2.473232558759927e-06, + "loss": 0.6815, + "step": 2522 + }, + { + "epoch": 0.7783433595557612, + "grad_norm": 0.0, + "learning_rate": 2.4666553654733095e-06, + "loss": 0.7473, + "step": 2523 + }, + { + "epoch": 0.7786518587073885, + "grad_norm": 0.0, + "learning_rate": 2.4600856987857725e-06, + "loss": 0.692, + "step": 2524 + }, + { + "epoch": 0.7789603578590159, + "grad_norm": 0.0, + "learning_rate": 2.4535235652610976e-06, + "loss": 0.7427, + "step": 2525 + }, + { + "epoch": 0.7792688570106432, + "grad_norm": 0.0, + "learning_rate": 2.44696897145555e-06, + "loss": 0.6775, + "step": 2526 + }, + { + "epoch": 0.7795773561622705, + "grad_norm": 0.0, + "learning_rate": 2.440421923917846e-06, + "loss": 0.63, + "step": 2527 + }, + { + "epoch": 0.7798858553138979, + "grad_norm": 0.0, + "learning_rate": 2.4338824291891795e-06, + "loss": 0.6496, + "step": 2528 + }, + { + "epoch": 0.7801943544655252, + "grad_norm": 0.0, + "learning_rate": 2.4273504938031835e-06, + "loss": 0.7412, + "step": 2529 + }, + { + "epoch": 0.7805028536171525, + "grad_norm": 0.0, + "learning_rate": 2.420826124285943e-06, + "loss": 0.797, + "step": 2530 + }, + { + "epoch": 0.7808113527687799, + "grad_norm": 0.0, + "learning_rate": 2.4143093271559925e-06, + "loss": 0.7071, + "step": 2531 + }, + { + "epoch": 0.7811198519204072, + "grad_norm": 0.0, + "learning_rate": 2.4078001089242887e-06, + "loss": 0.6181, + "step": 2532 + }, + { + "epoch": 0.7814283510720346, + "grad_norm": 0.0, + "learning_rate": 2.4012984760942224e-06, + "loss": 0.6362, + "step": 2533 + }, + { + "epoch": 0.7817368502236619, + "grad_norm": 0.0, + "learning_rate": 2.394804435161603e-06, + "loss": 0.6895, + "step": 2534 + }, + { + "epoch": 0.7820453493752892, + "grad_norm": 0.0, + "learning_rate": 2.3883179926146593e-06, + "loss": 0.6591, + "step": 2535 + }, + { + "epoch": 0.7823538485269166, + "grad_norm": 0.0, + "learning_rate": 2.3818391549340225e-06, + "loss": 0.7316, + "step": 2536 + }, + { + "epoch": 0.7826623476785439, + "grad_norm": 0.0, + "learning_rate": 2.3753679285927345e-06, + "loss": 0.6605, + "step": 2537 + }, + { + "epoch": 0.7829708468301713, + "grad_norm": 0.0, + "learning_rate": 2.368904320056222e-06, + "loss": 0.7423, + "step": 2538 + }, + { + "epoch": 0.7832793459817986, + "grad_norm": 0.0, + "learning_rate": 2.362448335782307e-06, + "loss": 0.6575, + "step": 2539 + }, + { + "epoch": 0.7835878451334258, + "grad_norm": 0.0, + "learning_rate": 2.3559999822211943e-06, + "loss": 0.7509, + "step": 2540 + }, + { + "epoch": 0.7838963442850532, + "grad_norm": 0.0, + "learning_rate": 2.349559265815463e-06, + "loss": 0.6073, + "step": 2541 + }, + { + "epoch": 0.7842048434366805, + "grad_norm": 0.0, + "learning_rate": 2.3431261930000602e-06, + "loss": 0.7227, + "step": 2542 + }, + { + "epoch": 0.7845133425883078, + "grad_norm": 0.0, + "learning_rate": 2.3367007702023016e-06, + "loss": 0.6742, + "step": 2543 + }, + { + "epoch": 0.7848218417399352, + "grad_norm": 0.0, + "learning_rate": 2.330283003841853e-06, + "loss": 0.8234, + "step": 2544 + }, + { + "epoch": 0.7851303408915625, + "grad_norm": 0.0, + "learning_rate": 2.3238729003307412e-06, + "loss": 0.9959, + "step": 2545 + }, + { + "epoch": 0.7854388400431899, + "grad_norm": 0.0, + "learning_rate": 2.3174704660733192e-06, + "loss": 0.7392, + "step": 2546 + }, + { + "epoch": 0.7857473391948172, + "grad_norm": 0.0, + "learning_rate": 2.311075707466297e-06, + "loss": 0.7615, + "step": 2547 + }, + { + "epoch": 0.7860558383464445, + "grad_norm": 0.0, + "learning_rate": 2.3046886308987017e-06, + "loss": 0.6454, + "step": 2548 + }, + { + "epoch": 0.7863643374980719, + "grad_norm": 0.0, + "learning_rate": 2.298309242751896e-06, + "loss": 0.6645, + "step": 2549 + }, + { + "epoch": 0.7866728366496992, + "grad_norm": 0.0, + "learning_rate": 2.291937549399552e-06, + "loss": 0.7703, + "step": 2550 + }, + { + "epoch": 0.7869813358013266, + "grad_norm": 0.0, + "learning_rate": 2.2855735572076564e-06, + "loss": 0.6995, + "step": 2551 + }, + { + "epoch": 0.7872898349529539, + "grad_norm": 0.0, + "learning_rate": 2.2792172725345084e-06, + "loss": 0.6252, + "step": 2552 + }, + { + "epoch": 0.7875983341045812, + "grad_norm": 0.0, + "learning_rate": 2.272868701730695e-06, + "loss": 0.6929, + "step": 2553 + }, + { + "epoch": 0.7879068332562086, + "grad_norm": 0.0, + "learning_rate": 2.2665278511391075e-06, + "loss": 0.6335, + "step": 2554 + }, + { + "epoch": 0.7882153324078359, + "grad_norm": 0.0, + "learning_rate": 2.2601947270949164e-06, + "loss": 0.7216, + "step": 2555 + }, + { + "epoch": 0.7885238315594633, + "grad_norm": 0.0, + "learning_rate": 2.2538693359255724e-06, + "loss": 0.7121, + "step": 2556 + }, + { + "epoch": 0.7888323307110905, + "grad_norm": 0.0, + "learning_rate": 2.2475516839508072e-06, + "loss": 0.6249, + "step": 2557 + }, + { + "epoch": 0.7891408298627178, + "grad_norm": 0.0, + "learning_rate": 2.2412417774826144e-06, + "loss": 0.6763, + "step": 2558 + }, + { + "epoch": 0.7894493290143452, + "grad_norm": 0.0, + "learning_rate": 2.2349396228252506e-06, + "loss": 0.7082, + "step": 2559 + }, + { + "epoch": 0.7897578281659725, + "grad_norm": 0.0, + "learning_rate": 2.2286452262752223e-06, + "loss": 0.7742, + "step": 2560 + }, + { + "epoch": 0.7900663273175998, + "grad_norm": 0.0, + "learning_rate": 2.2223585941212956e-06, + "loss": 0.6681, + "step": 2561 + }, + { + "epoch": 0.7903748264692272, + "grad_norm": 0.0, + "learning_rate": 2.216079732644476e-06, + "loss": 0.7311, + "step": 2562 + }, + { + "epoch": 0.7906833256208545, + "grad_norm": 0.0, + "learning_rate": 2.2098086481179947e-06, + "loss": 0.6645, + "step": 2563 + }, + { + "epoch": 0.7909918247724819, + "grad_norm": 0.0, + "learning_rate": 2.2035453468073263e-06, + "loss": 0.557, + "step": 2564 + }, + { + "epoch": 0.7913003239241092, + "grad_norm": 0.0, + "learning_rate": 2.197289834970161e-06, + "loss": 0.6466, + "step": 2565 + }, + { + "epoch": 0.7916088230757365, + "grad_norm": 0.0, + "learning_rate": 2.1910421188564134e-06, + "loss": 0.6967, + "step": 2566 + }, + { + "epoch": 0.7919173222273639, + "grad_norm": 0.0, + "learning_rate": 2.184802204708202e-06, + "loss": 0.7423, + "step": 2567 + }, + { + "epoch": 0.7922258213789912, + "grad_norm": 0.0, + "learning_rate": 2.178570098759851e-06, + "loss": 0.7138, + "step": 2568 + }, + { + "epoch": 0.7925343205306186, + "grad_norm": 0.0, + "learning_rate": 2.1723458072378924e-06, + "loss": 0.6411, + "step": 2569 + }, + { + "epoch": 0.7928428196822459, + "grad_norm": 0.0, + "learning_rate": 2.166129336361038e-06, + "loss": 0.8109, + "step": 2570 + }, + { + "epoch": 0.7931513188338732, + "grad_norm": 0.0, + "learning_rate": 2.159920692340202e-06, + "loss": 0.7287, + "step": 2571 + }, + { + "epoch": 0.7934598179855006, + "grad_norm": 0.0, + "learning_rate": 2.1537198813784554e-06, + "loss": 0.7112, + "step": 2572 + }, + { + "epoch": 0.7937683171371279, + "grad_norm": 0.0, + "learning_rate": 2.147526909671064e-06, + "loss": 0.6677, + "step": 2573 + }, + { + "epoch": 0.7940768162887553, + "grad_norm": 0.0, + "learning_rate": 2.141341783405457e-06, + "loss": 0.7404, + "step": 2574 + }, + { + "epoch": 0.7943853154403825, + "grad_norm": 0.0, + "learning_rate": 2.1351645087612195e-06, + "loss": 0.7257, + "step": 2575 + }, + { + "epoch": 0.7946938145920098, + "grad_norm": 0.0, + "learning_rate": 2.1289950919100932e-06, + "loss": 0.7316, + "step": 2576 + }, + { + "epoch": 0.7950023137436372, + "grad_norm": 0.0, + "learning_rate": 2.1228335390159704e-06, + "loss": 0.7996, + "step": 2577 + }, + { + "epoch": 0.7953108128952645, + "grad_norm": 0.0, + "learning_rate": 2.116679856234892e-06, + "loss": 0.7102, + "step": 2578 + }, + { + "epoch": 0.7956193120468918, + "grad_norm": 0.0, + "learning_rate": 2.1105340497150264e-06, + "loss": 0.7584, + "step": 2579 + }, + { + "epoch": 0.7959278111985192, + "grad_norm": 0.0, + "learning_rate": 2.104396125596675e-06, + "loss": 0.7088, + "step": 2580 + }, + { + "epoch": 0.7962363103501465, + "grad_norm": 0.0, + "learning_rate": 2.098266090012271e-06, + "loss": 0.6948, + "step": 2581 + }, + { + "epoch": 0.7965448095017739, + "grad_norm": 0.0, + "learning_rate": 2.092143949086356e-06, + "loss": 0.6375, + "step": 2582 + }, + { + "epoch": 0.7968533086534012, + "grad_norm": 0.0, + "learning_rate": 2.0860297089355943e-06, + "loss": 0.7181, + "step": 2583 + }, + { + "epoch": 0.7971618078050285, + "grad_norm": 0.0, + "learning_rate": 2.0799233756687477e-06, + "loss": 0.7211, + "step": 2584 + }, + { + "epoch": 0.7974703069566559, + "grad_norm": 0.0, + "learning_rate": 2.0738249553866807e-06, + "loss": 0.6163, + "step": 2585 + }, + { + "epoch": 0.7977788061082832, + "grad_norm": 0.0, + "learning_rate": 2.0677344541823573e-06, + "loss": 0.7215, + "step": 2586 + }, + { + "epoch": 0.7980873052599106, + "grad_norm": 0.0, + "learning_rate": 2.061651878140819e-06, + "loss": 0.7583, + "step": 2587 + }, + { + "epoch": 0.7983958044115379, + "grad_norm": 0.0, + "learning_rate": 2.0555772333392055e-06, + "loss": 0.7149, + "step": 2588 + }, + { + "epoch": 0.7987043035631652, + "grad_norm": 0.0, + "learning_rate": 2.0495105258467085e-06, + "loss": 0.7232, + "step": 2589 + }, + { + "epoch": 0.7990128027147926, + "grad_norm": 0.0, + "learning_rate": 2.043451761724614e-06, + "loss": 0.7209, + "step": 2590 + }, + { + "epoch": 0.7993213018664199, + "grad_norm": 0.0, + "learning_rate": 2.0374009470262545e-06, + "loss": 0.7242, + "step": 2591 + }, + { + "epoch": 0.7996298010180471, + "grad_norm": 0.0, + "learning_rate": 2.031358087797032e-06, + "loss": 0.7512, + "step": 2592 + }, + { + "epoch": 0.7999383001696745, + "grad_norm": 0.0, + "learning_rate": 2.0253231900743906e-06, + "loss": 0.6766, + "step": 2593 + }, + { + "epoch": 0.8002467993213018, + "grad_norm": 0.0, + "learning_rate": 2.019296259887825e-06, + "loss": 0.7268, + "step": 2594 + }, + { + "epoch": 0.8005552984729292, + "grad_norm": 0.0, + "learning_rate": 2.0132773032588717e-06, + "loss": 0.7501, + "step": 2595 + }, + { + "epoch": 0.8008637976245565, + "grad_norm": 0.0, + "learning_rate": 2.007266326201095e-06, + "loss": 0.7455, + "step": 2596 + }, + { + "epoch": 0.8011722967761838, + "grad_norm": 0.0, + "learning_rate": 2.0012633347200873e-06, + "loss": 0.6826, + "step": 2597 + }, + { + "epoch": 0.8014807959278112, + "grad_norm": 0.0, + "learning_rate": 1.9952683348134704e-06, + "loss": 0.6854, + "step": 2598 + }, + { + "epoch": 0.8017892950794385, + "grad_norm": 0.0, + "learning_rate": 1.9892813324708705e-06, + "loss": 0.7561, + "step": 2599 + }, + { + "epoch": 0.8020977942310659, + "grad_norm": 0.0, + "learning_rate": 1.9833023336739354e-06, + "loss": 0.654, + "step": 2600 + }, + { + "epoch": 0.8024062933826932, + "grad_norm": 0.0, + "learning_rate": 1.977331344396306e-06, + "loss": 0.6279, + "step": 2601 + }, + { + "epoch": 0.8027147925343205, + "grad_norm": 0.0, + "learning_rate": 1.971368370603626e-06, + "loss": 0.7695, + "step": 2602 + }, + { + "epoch": 0.8030232916859479, + "grad_norm": 0.0, + "learning_rate": 1.9654134182535267e-06, + "loss": 0.7384, + "step": 2603 + }, + { + "epoch": 0.8033317908375752, + "grad_norm": 0.0, + "learning_rate": 1.959466493295633e-06, + "loss": 0.6961, + "step": 2604 + }, + { + "epoch": 0.8036402899892026, + "grad_norm": 0.0, + "learning_rate": 1.953527601671543e-06, + "loss": 0.7267, + "step": 2605 + }, + { + "epoch": 0.8039487891408299, + "grad_norm": 0.0, + "learning_rate": 1.9475967493148283e-06, + "loss": 0.6406, + "step": 2606 + }, + { + "epoch": 0.8042572882924572, + "grad_norm": 0.0, + "learning_rate": 1.9416739421510354e-06, + "loss": 0.674, + "step": 2607 + }, + { + "epoch": 0.8045657874440846, + "grad_norm": 0.0, + "learning_rate": 1.935759186097662e-06, + "loss": 0.63, + "step": 2608 + }, + { + "epoch": 0.8048742865957118, + "grad_norm": 0.0, + "learning_rate": 1.9298524870641734e-06, + "loss": 0.712, + "step": 2609 + }, + { + "epoch": 0.8051827857473391, + "grad_norm": 0.0, + "learning_rate": 1.9239538509519784e-06, + "loss": 0.7013, + "step": 2610 + }, + { + "epoch": 0.8054912848989665, + "grad_norm": 0.0, + "learning_rate": 1.9180632836544268e-06, + "loss": 0.7126, + "step": 2611 + }, + { + "epoch": 0.8057997840505938, + "grad_norm": 0.0, + "learning_rate": 1.912180791056817e-06, + "loss": 0.6296, + "step": 2612 + }, + { + "epoch": 0.8061082832022212, + "grad_norm": 0.0, + "learning_rate": 1.9063063790363733e-06, + "loss": 0.6604, + "step": 2613 + }, + { + "epoch": 0.8064167823538485, + "grad_norm": 0.0, + "learning_rate": 1.9004400534622458e-06, + "loss": 0.7148, + "step": 2614 + }, + { + "epoch": 0.8067252815054758, + "grad_norm": 0.0, + "learning_rate": 1.8945818201955047e-06, + "loss": 0.7608, + "step": 2615 + }, + { + "epoch": 0.8070337806571032, + "grad_norm": 0.0, + "learning_rate": 1.8887316850891446e-06, + "loss": 0.6483, + "step": 2616 + }, + { + "epoch": 0.8073422798087305, + "grad_norm": 0.0, + "learning_rate": 1.8828896539880549e-06, + "loss": 0.755, + "step": 2617 + }, + { + "epoch": 0.8076507789603579, + "grad_norm": 0.0, + "learning_rate": 1.8770557327290418e-06, + "loss": 0.7258, + "step": 2618 + }, + { + "epoch": 0.8079592781119852, + "grad_norm": 0.0, + "learning_rate": 1.8712299271407998e-06, + "loss": 0.597, + "step": 2619 + }, + { + "epoch": 0.8082677772636125, + "grad_norm": 0.0, + "learning_rate": 1.865412243043916e-06, + "loss": 0.7922, + "step": 2620 + }, + { + "epoch": 0.8085762764152399, + "grad_norm": 0.0, + "learning_rate": 1.8596026862508697e-06, + "loss": 0.7384, + "step": 2621 + }, + { + "epoch": 0.8088847755668672, + "grad_norm": 0.0, + "learning_rate": 1.853801262566014e-06, + "loss": 0.6699, + "step": 2622 + }, + { + "epoch": 0.8091932747184946, + "grad_norm": 0.0, + "learning_rate": 1.848007977785573e-06, + "loss": 0.6537, + "step": 2623 + }, + { + "epoch": 0.8095017738701219, + "grad_norm": 0.0, + "learning_rate": 1.8422228376976526e-06, + "loss": 0.6377, + "step": 2624 + }, + { + "epoch": 0.8098102730217492, + "grad_norm": 0.0, + "learning_rate": 1.8364458480822045e-06, + "loss": 0.7561, + "step": 2625 + }, + { + "epoch": 0.8101187721733766, + "grad_norm": 0.0, + "learning_rate": 1.8306770147110542e-06, + "loss": 0.6598, + "step": 2626 + }, + { + "epoch": 0.8104272713250038, + "grad_norm": 0.0, + "learning_rate": 1.8249163433478601e-06, + "loss": 0.665, + "step": 2627 + }, + { + "epoch": 0.8107357704766311, + "grad_norm": 0.0, + "learning_rate": 1.8191638397481415e-06, + "loss": 0.7308, + "step": 2628 + }, + { + "epoch": 0.8110442696282585, + "grad_norm": 0.0, + "learning_rate": 1.8134195096592466e-06, + "loss": 0.7206, + "step": 2629 + }, + { + "epoch": 0.8113527687798858, + "grad_norm": 0.0, + "learning_rate": 1.807683358820368e-06, + "loss": 0.722, + "step": 2630 + }, + { + "epoch": 0.8116612679315132, + "grad_norm": 0.0, + "learning_rate": 1.8019553929625166e-06, + "loss": 0.6939, + "step": 2631 + }, + { + "epoch": 0.8119697670831405, + "grad_norm": 0.0, + "learning_rate": 1.796235617808526e-06, + "loss": 0.6726, + "step": 2632 + }, + { + "epoch": 0.8122782662347678, + "grad_norm": 0.0, + "learning_rate": 1.7905240390730571e-06, + "loss": 0.6318, + "step": 2633 + }, + { + "epoch": 0.8125867653863952, + "grad_norm": 0.0, + "learning_rate": 1.7848206624625675e-06, + "loss": 0.7716, + "step": 2634 + }, + { + "epoch": 0.8128952645380225, + "grad_norm": 0.0, + "learning_rate": 1.7791254936753323e-06, + "loss": 0.6721, + "step": 2635 + }, + { + "epoch": 0.8132037636896499, + "grad_norm": 0.0, + "learning_rate": 1.7734385384014175e-06, + "loss": 0.6664, + "step": 2636 + }, + { + "epoch": 0.8135122628412772, + "grad_norm": 0.0, + "learning_rate": 1.767759802322685e-06, + "loss": 0.6266, + "step": 2637 + }, + { + "epoch": 0.8138207619929045, + "grad_norm": 0.0, + "learning_rate": 1.7620892911127897e-06, + "loss": 0.6798, + "step": 2638 + }, + { + "epoch": 0.8141292611445319, + "grad_norm": 0.0, + "learning_rate": 1.7564270104371638e-06, + "loss": 0.6637, + "step": 2639 + }, + { + "epoch": 0.8144377602961592, + "grad_norm": 0.0, + "learning_rate": 1.750772965953017e-06, + "loss": 0.7297, + "step": 2640 + }, + { + "epoch": 0.8147462594477866, + "grad_norm": 0.0, + "learning_rate": 1.7451271633093292e-06, + "loss": 0.6687, + "step": 2641 + }, + { + "epoch": 0.8150547585994139, + "grad_norm": 0.0, + "learning_rate": 1.7394896081468527e-06, + "loss": 0.7067, + "step": 2642 + }, + { + "epoch": 0.8153632577510412, + "grad_norm": 0.0, + "learning_rate": 1.733860306098092e-06, + "loss": 1.034, + "step": 2643 + }, + { + "epoch": 0.8156717569026685, + "grad_norm": 0.0, + "learning_rate": 1.7282392627873046e-06, + "loss": 0.6971, + "step": 2644 + }, + { + "epoch": 0.8159802560542958, + "grad_norm": 0.0, + "learning_rate": 1.7226264838305074e-06, + "loss": 0.7338, + "step": 2645 + }, + { + "epoch": 0.8162887552059231, + "grad_norm": 0.0, + "learning_rate": 1.7170219748354488e-06, + "loss": 0.7389, + "step": 2646 + }, + { + "epoch": 0.8165972543575505, + "grad_norm": 0.0, + "learning_rate": 1.7114257414016223e-06, + "loss": 0.7335, + "step": 2647 + }, + { + "epoch": 0.8169057535091778, + "grad_norm": 0.0, + "learning_rate": 1.7058377891202493e-06, + "loss": 0.6609, + "step": 2648 + }, + { + "epoch": 0.8172142526608052, + "grad_norm": 0.0, + "learning_rate": 1.7002581235742766e-06, + "loss": 0.6985, + "step": 2649 + }, + { + "epoch": 0.8175227518124325, + "grad_norm": 0.0, + "learning_rate": 1.6946867503383768e-06, + "loss": 0.7653, + "step": 2650 + }, + { + "epoch": 0.8178312509640598, + "grad_norm": 0.0, + "learning_rate": 1.6891236749789297e-06, + "loss": 0.7605, + "step": 2651 + }, + { + "epoch": 0.8181397501156872, + "grad_norm": 0.0, + "learning_rate": 1.6835689030540382e-06, + "loss": 0.6845, + "step": 2652 + }, + { + "epoch": 0.8184482492673145, + "grad_norm": 0.0, + "learning_rate": 1.6780224401134903e-06, + "loss": 0.687, + "step": 2653 + }, + { + "epoch": 0.8187567484189419, + "grad_norm": 0.0, + "learning_rate": 1.6724842916987882e-06, + "loss": 0.6749, + "step": 2654 + }, + { + "epoch": 0.8190652475705692, + "grad_norm": 0.0, + "learning_rate": 1.6669544633431178e-06, + "loss": 0.6974, + "step": 2655 + }, + { + "epoch": 0.8193737467221965, + "grad_norm": 0.0, + "learning_rate": 1.6614329605713597e-06, + "loss": 0.6402, + "step": 2656 + }, + { + "epoch": 0.8196822458738239, + "grad_norm": 0.0, + "learning_rate": 1.655919788900071e-06, + "loss": 0.6724, + "step": 2657 + }, + { + "epoch": 0.8199907450254512, + "grad_norm": 0.0, + "learning_rate": 1.6504149538374848e-06, + "loss": 0.7253, + "step": 2658 + }, + { + "epoch": 0.8202992441770786, + "grad_norm": 0.0, + "learning_rate": 1.6449184608835112e-06, + "loss": 0.7499, + "step": 2659 + }, + { + "epoch": 0.8206077433287059, + "grad_norm": 0.0, + "learning_rate": 1.6394303155297186e-06, + "loss": 0.6714, + "step": 2660 + }, + { + "epoch": 0.8209162424803331, + "grad_norm": 0.0, + "learning_rate": 1.6339505232593356e-06, + "loss": 0.6722, + "step": 2661 + }, + { + "epoch": 0.8212247416319605, + "grad_norm": 0.0, + "learning_rate": 1.6284790895472536e-06, + "loss": 0.7015, + "step": 2662 + }, + { + "epoch": 0.8215332407835878, + "grad_norm": 0.0, + "learning_rate": 1.623016019860002e-06, + "loss": 0.6303, + "step": 2663 + }, + { + "epoch": 0.8218417399352151, + "grad_norm": 0.0, + "learning_rate": 1.6175613196557648e-06, + "loss": 0.7468, + "step": 2664 + }, + { + "epoch": 0.8221502390868425, + "grad_norm": 0.0, + "learning_rate": 1.6121149943843494e-06, + "loss": 0.6882, + "step": 2665 + }, + { + "epoch": 0.8224587382384698, + "grad_norm": 0.0, + "learning_rate": 1.606677049487212e-06, + "loss": 0.6778, + "step": 2666 + }, + { + "epoch": 0.8227672373900972, + "grad_norm": 0.0, + "learning_rate": 1.6012474903974218e-06, + "loss": 0.6876, + "step": 2667 + }, + { + "epoch": 0.8230757365417245, + "grad_norm": 0.0, + "learning_rate": 1.5958263225396796e-06, + "loss": 0.6676, + "step": 2668 + }, + { + "epoch": 0.8233842356933518, + "grad_norm": 0.0, + "learning_rate": 1.5904135513303032e-06, + "loss": 0.6778, + "step": 2669 + }, + { + "epoch": 0.8236927348449792, + "grad_norm": 0.0, + "learning_rate": 1.5850091821772085e-06, + "loss": 0.7142, + "step": 2670 + }, + { + "epoch": 0.8240012339966065, + "grad_norm": 0.0, + "learning_rate": 1.579613220479932e-06, + "loss": 0.6722, + "step": 2671 + }, + { + "epoch": 0.8243097331482339, + "grad_norm": 0.0, + "learning_rate": 1.5742256716295978e-06, + "loss": 0.657, + "step": 2672 + }, + { + "epoch": 0.8246182322998612, + "grad_norm": 0.0, + "learning_rate": 1.5688465410089371e-06, + "loss": 0.7935, + "step": 2673 + }, + { + "epoch": 0.8249267314514885, + "grad_norm": 0.0, + "learning_rate": 1.5634758339922619e-06, + "loss": 0.7298, + "step": 2674 + }, + { + "epoch": 0.8252352306031159, + "grad_norm": 0.0, + "learning_rate": 1.5581135559454653e-06, + "loss": 0.6653, + "step": 2675 + }, + { + "epoch": 0.8255437297547432, + "grad_norm": 0.0, + "learning_rate": 1.5527597122260307e-06, + "loss": 0.6033, + "step": 2676 + }, + { + "epoch": 0.8258522289063706, + "grad_norm": 0.0, + "learning_rate": 1.547414308183005e-06, + "loss": 0.6975, + "step": 2677 + }, + { + "epoch": 0.8261607280579979, + "grad_norm": 0.0, + "learning_rate": 1.542077349157005e-06, + "loss": 0.6166, + "step": 2678 + }, + { + "epoch": 0.8264692272096251, + "grad_norm": 0.0, + "learning_rate": 1.536748840480209e-06, + "loss": 0.7719, + "step": 2679 + }, + { + "epoch": 0.8267777263612525, + "grad_norm": 0.0, + "learning_rate": 1.5314287874763578e-06, + "loss": 0.7122, + "step": 2680 + }, + { + "epoch": 0.8270862255128798, + "grad_norm": 0.0, + "learning_rate": 1.5261171954607435e-06, + "loss": 0.7992, + "step": 2681 + }, + { + "epoch": 0.8273947246645071, + "grad_norm": 0.0, + "learning_rate": 1.5208140697401942e-06, + "loss": 0.7702, + "step": 2682 + }, + { + "epoch": 0.8277032238161345, + "grad_norm": 0.0, + "learning_rate": 1.5155194156130936e-06, + "loss": 0.6515, + "step": 2683 + }, + { + "epoch": 0.8280117229677618, + "grad_norm": 0.0, + "learning_rate": 1.5102332383693496e-06, + "loss": 0.6377, + "step": 2684 + }, + { + "epoch": 0.8283202221193892, + "grad_norm": 0.0, + "learning_rate": 1.5049555432904118e-06, + "loss": 0.6163, + "step": 2685 + }, + { + "epoch": 0.8286287212710165, + "grad_norm": 0.0, + "learning_rate": 1.4996863356492464e-06, + "loss": 0.6824, + "step": 2686 + }, + { + "epoch": 0.8289372204226438, + "grad_norm": 0.0, + "learning_rate": 1.4944256207103403e-06, + "loss": 0.734, + "step": 2687 + }, + { + "epoch": 0.8292457195742712, + "grad_norm": 0.0, + "learning_rate": 1.4891734037297035e-06, + "loss": 0.7307, + "step": 2688 + }, + { + "epoch": 0.8295542187258985, + "grad_norm": 0.0, + "learning_rate": 1.4839296899548438e-06, + "loss": 0.6306, + "step": 2689 + }, + { + "epoch": 0.8298627178775259, + "grad_norm": 0.0, + "learning_rate": 1.4786944846247887e-06, + "loss": 0.7072, + "step": 2690 + }, + { + "epoch": 0.8301712170291532, + "grad_norm": 0.0, + "learning_rate": 1.4734677929700435e-06, + "loss": 0.7571, + "step": 2691 + }, + { + "epoch": 0.8304797161807805, + "grad_norm": 0.0, + "learning_rate": 1.4682496202126283e-06, + "loss": 0.723, + "step": 2692 + }, + { + "epoch": 0.8307882153324079, + "grad_norm": 0.0, + "learning_rate": 1.4630399715660425e-06, + "loss": 0.7182, + "step": 2693 + }, + { + "epoch": 0.8310967144840352, + "grad_norm": 0.0, + "learning_rate": 1.4578388522352682e-06, + "loss": 0.623, + "step": 2694 + }, + { + "epoch": 0.8314052136356626, + "grad_norm": 0.0, + "learning_rate": 1.4526462674167685e-06, + "loss": 0.6567, + "step": 2695 + }, + { + "epoch": 0.8317137127872898, + "grad_norm": 0.0, + "learning_rate": 1.447462222298477e-06, + "loss": 0.662, + "step": 2696 + }, + { + "epoch": 0.8320222119389171, + "grad_norm": 0.0, + "learning_rate": 1.442286722059799e-06, + "loss": 0.7251, + "step": 2697 + }, + { + "epoch": 0.8323307110905445, + "grad_norm": 0.0, + "learning_rate": 1.4371197718715967e-06, + "loss": 0.6979, + "step": 2698 + }, + { + "epoch": 0.8326392102421718, + "grad_norm": 0.0, + "learning_rate": 1.4319613768962003e-06, + "loss": 0.6503, + "step": 2699 + }, + { + "epoch": 0.8329477093937991, + "grad_norm": 0.0, + "learning_rate": 1.4268115422873807e-06, + "loss": 0.7083, + "step": 2700 + }, + { + "epoch": 0.8332562085454265, + "grad_norm": 0.0, + "learning_rate": 1.4216702731903608e-06, + "loss": 0.6817, + "step": 2701 + }, + { + "epoch": 0.8335647076970538, + "grad_norm": 0.0, + "learning_rate": 1.4165375747418097e-06, + "loss": 0.7009, + "step": 2702 + }, + { + "epoch": 0.8338732068486812, + "grad_norm": 0.0, + "learning_rate": 1.4114134520698286e-06, + "loss": 0.7503, + "step": 2703 + }, + { + "epoch": 0.8341817060003085, + "grad_norm": 0.0, + "learning_rate": 1.4062979102939478e-06, + "loss": 0.6682, + "step": 2704 + }, + { + "epoch": 0.8344902051519358, + "grad_norm": 0.0, + "learning_rate": 1.4011909545251357e-06, + "loss": 0.6859, + "step": 2705 + }, + { + "epoch": 0.8347987043035632, + "grad_norm": 0.0, + "learning_rate": 1.3960925898657674e-06, + "loss": 0.7062, + "step": 2706 + }, + { + "epoch": 0.8351072034551905, + "grad_norm": 0.0, + "learning_rate": 1.3910028214096517e-06, + "loss": 0.6987, + "step": 2707 + }, + { + "epoch": 0.8354157026068179, + "grad_norm": 0.0, + "learning_rate": 1.3859216542419907e-06, + "loss": 0.6976, + "step": 2708 + }, + { + "epoch": 0.8357242017584452, + "grad_norm": 0.0, + "learning_rate": 1.3808490934394058e-06, + "loss": 0.7817, + "step": 2709 + }, + { + "epoch": 0.8360327009100725, + "grad_norm": 0.0, + "learning_rate": 1.3757851440699132e-06, + "loss": 0.6481, + "step": 2710 + }, + { + "epoch": 0.8363412000616999, + "grad_norm": 0.0, + "learning_rate": 1.3707298111929335e-06, + "loss": 0.7614, + "step": 2711 + }, + { + "epoch": 0.8366496992133272, + "grad_norm": 0.0, + "learning_rate": 1.3656830998592674e-06, + "loss": 0.6758, + "step": 2712 + }, + { + "epoch": 0.8369581983649544, + "grad_norm": 0.0, + "learning_rate": 1.3606450151111083e-06, + "loss": 0.6121, + "step": 2713 + }, + { + "epoch": 0.8372666975165818, + "grad_norm": 0.0, + "learning_rate": 1.3556155619820322e-06, + "loss": 0.6688, + "step": 2714 + }, + { + "epoch": 0.8375751966682091, + "grad_norm": 0.0, + "learning_rate": 1.3505947454969849e-06, + "loss": 0.6738, + "step": 2715 + }, + { + "epoch": 0.8378836958198365, + "grad_norm": 0.0, + "learning_rate": 1.3455825706722913e-06, + "loss": 0.6668, + "step": 2716 + }, + { + "epoch": 0.8381921949714638, + "grad_norm": 0.0, + "learning_rate": 1.340579042515636e-06, + "loss": 0.7641, + "step": 2717 + }, + { + "epoch": 0.8385006941230911, + "grad_norm": 0.0, + "learning_rate": 1.3355841660260648e-06, + "loss": 0.677, + "step": 2718 + }, + { + "epoch": 0.8388091932747185, + "grad_norm": 0.0, + "learning_rate": 1.3305979461939866e-06, + "loss": 0.7054, + "step": 2719 + }, + { + "epoch": 0.8391176924263458, + "grad_norm": 0.0, + "learning_rate": 1.3256203880011531e-06, + "loss": 0.7803, + "step": 2720 + }, + { + "epoch": 0.8394261915779732, + "grad_norm": 0.0, + "learning_rate": 1.3206514964206664e-06, + "loss": 0.6283, + "step": 2721 + }, + { + "epoch": 0.8397346907296005, + "grad_norm": 0.0, + "learning_rate": 1.315691276416966e-06, + "loss": 0.6933, + "step": 2722 + }, + { + "epoch": 0.8400431898812278, + "grad_norm": 0.0, + "learning_rate": 1.3107397329458348e-06, + "loss": 0.6158, + "step": 2723 + }, + { + "epoch": 0.8403516890328552, + "grad_norm": 0.0, + "learning_rate": 1.3057968709543812e-06, + "loss": 0.6628, + "step": 2724 + }, + { + "epoch": 0.8406601881844825, + "grad_norm": 0.0, + "learning_rate": 1.3008626953810388e-06, + "loss": 0.7122, + "step": 2725 + }, + { + "epoch": 0.8409686873361099, + "grad_norm": 0.0, + "learning_rate": 1.295937211155568e-06, + "loss": 0.7095, + "step": 2726 + }, + { + "epoch": 0.8412771864877372, + "grad_norm": 0.0, + "learning_rate": 1.2910204231990397e-06, + "loss": 0.6882, + "step": 2727 + }, + { + "epoch": 0.8415856856393645, + "grad_norm": 0.0, + "learning_rate": 1.2861123364238438e-06, + "loss": 0.7383, + "step": 2728 + }, + { + "epoch": 0.8418941847909919, + "grad_norm": 0.0, + "learning_rate": 1.2812129557336684e-06, + "loss": 0.7447, + "step": 2729 + }, + { + "epoch": 0.8422026839426192, + "grad_norm": 0.0, + "learning_rate": 1.276322286023506e-06, + "loss": 0.6829, + "step": 2730 + }, + { + "epoch": 0.8425111830942464, + "grad_norm": 0.0, + "learning_rate": 1.271440332179652e-06, + "loss": 0.6144, + "step": 2731 + }, + { + "epoch": 0.8428196822458738, + "grad_norm": 0.0, + "learning_rate": 1.266567099079683e-06, + "loss": 0.7344, + "step": 2732 + }, + { + "epoch": 0.8431281813975011, + "grad_norm": 0.0, + "learning_rate": 1.2617025915924764e-06, + "loss": 0.6714, + "step": 2733 + }, + { + "epoch": 0.8434366805491285, + "grad_norm": 0.0, + "learning_rate": 1.2568468145781753e-06, + "loss": 0.6946, + "step": 2734 + }, + { + "epoch": 0.8437451797007558, + "grad_norm": 0.0, + "learning_rate": 1.2519997728882138e-06, + "loss": 0.7134, + "step": 2735 + }, + { + "epoch": 0.8440536788523831, + "grad_norm": 0.0, + "learning_rate": 1.2471614713652902e-06, + "loss": 0.7373, + "step": 2736 + }, + { + "epoch": 0.8443621780040105, + "grad_norm": 0.0, + "learning_rate": 1.2423319148433777e-06, + "loss": 0.7617, + "step": 2737 + }, + { + "epoch": 0.8446706771556378, + "grad_norm": 0.0, + "learning_rate": 1.2375111081477065e-06, + "loss": 0.6986, + "step": 2738 + }, + { + "epoch": 0.8449791763072652, + "grad_norm": 0.0, + "learning_rate": 1.2326990560947627e-06, + "loss": 0.6288, + "step": 2739 + }, + { + "epoch": 0.8452876754588925, + "grad_norm": 0.0, + "learning_rate": 1.2278957634922972e-06, + "loss": 0.6866, + "step": 2740 + }, + { + "epoch": 0.8455961746105198, + "grad_norm": 0.0, + "learning_rate": 1.2231012351392958e-06, + "loss": 0.664, + "step": 2741 + }, + { + "epoch": 0.8459046737621472, + "grad_norm": 0.0, + "learning_rate": 1.218315475825994e-06, + "loss": 0.6362, + "step": 2742 + }, + { + "epoch": 0.8462131729137745, + "grad_norm": 0.0, + "learning_rate": 1.2135384903338699e-06, + "loss": 0.7224, + "step": 2743 + }, + { + "epoch": 0.8465216720654019, + "grad_norm": 0.0, + "learning_rate": 1.2087702834356264e-06, + "loss": 0.6184, + "step": 2744 + }, + { + "epoch": 0.8468301712170292, + "grad_norm": 0.0, + "learning_rate": 1.2040108598952072e-06, + "loss": 0.7239, + "step": 2745 + }, + { + "epoch": 0.8471386703686565, + "grad_norm": 0.0, + "learning_rate": 1.1992602244677653e-06, + "loss": 0.7085, + "step": 2746 + }, + { + "epoch": 0.8474471695202839, + "grad_norm": 0.0, + "learning_rate": 1.194518381899691e-06, + "loss": 0.712, + "step": 2747 + }, + { + "epoch": 0.8477556686719111, + "grad_norm": 0.0, + "learning_rate": 1.1897853369285738e-06, + "loss": 0.6558, + "step": 2748 + }, + { + "epoch": 0.8480641678235384, + "grad_norm": 0.0, + "learning_rate": 1.185061094283224e-06, + "loss": 0.6414, + "step": 2749 + }, + { + "epoch": 0.8483726669751658, + "grad_norm": 0.0, + "learning_rate": 1.180345658683658e-06, + "loss": 0.6253, + "step": 2750 + }, + { + "epoch": 0.8486811661267931, + "grad_norm": 0.0, + "learning_rate": 1.1756390348410819e-06, + "loss": 0.699, + "step": 2751 + }, + { + "epoch": 0.8489896652784205, + "grad_norm": 0.0, + "learning_rate": 1.170941227457909e-06, + "loss": 0.6701, + "step": 2752 + }, + { + "epoch": 0.8492981644300478, + "grad_norm": 0.0, + "learning_rate": 1.1662522412277388e-06, + "loss": 0.7196, + "step": 2753 + }, + { + "epoch": 0.8496066635816751, + "grad_norm": 0.0, + "learning_rate": 1.1615720808353604e-06, + "loss": 0.7144, + "step": 2754 + }, + { + "epoch": 0.8499151627333025, + "grad_norm": 0.0, + "learning_rate": 1.156900750956742e-06, + "loss": 0.7172, + "step": 2755 + }, + { + "epoch": 0.8502236618849298, + "grad_norm": 0.0, + "learning_rate": 1.1522382562590305e-06, + "loss": 0.6855, + "step": 2756 + }, + { + "epoch": 0.8505321610365572, + "grad_norm": 0.0, + "learning_rate": 1.1475846014005477e-06, + "loss": 0.6519, + "step": 2757 + }, + { + "epoch": 0.8508406601881845, + "grad_norm": 0.0, + "learning_rate": 1.1429397910307794e-06, + "loss": 0.6767, + "step": 2758 + }, + { + "epoch": 0.8511491593398118, + "grad_norm": 0.0, + "learning_rate": 1.138303829790378e-06, + "loss": 0.7498, + "step": 2759 + }, + { + "epoch": 0.8514576584914392, + "grad_norm": 0.0, + "learning_rate": 1.1336767223111521e-06, + "loss": 0.7421, + "step": 2760 + }, + { + "epoch": 0.8517661576430665, + "grad_norm": 0.0, + "learning_rate": 1.129058473216066e-06, + "loss": 0.6301, + "step": 2761 + }, + { + "epoch": 0.8520746567946939, + "grad_norm": 0.0, + "learning_rate": 1.1244490871192382e-06, + "loss": 0.6477, + "step": 2762 + }, + { + "epoch": 0.8523831559463212, + "grad_norm": 0.0, + "learning_rate": 1.1198485686259208e-06, + "loss": 0.6272, + "step": 2763 + }, + { + "epoch": 0.8526916550979485, + "grad_norm": 0.0, + "learning_rate": 1.1152569223325171e-06, + "loss": 0.698, + "step": 2764 + }, + { + "epoch": 0.8530001542495758, + "grad_norm": 0.0, + "learning_rate": 1.1106741528265585e-06, + "loss": 0.6155, + "step": 2765 + }, + { + "epoch": 0.8533086534012031, + "grad_norm": 0.0, + "learning_rate": 1.1061002646867159e-06, + "loss": 0.6813, + "step": 2766 + }, + { + "epoch": 0.8536171525528304, + "grad_norm": 0.0, + "learning_rate": 1.101535262482778e-06, + "loss": 0.6624, + "step": 2767 + }, + { + "epoch": 0.8539256517044578, + "grad_norm": 0.0, + "learning_rate": 1.0969791507756588e-06, + "loss": 0.9815, + "step": 2768 + }, + { + "epoch": 0.8542341508560851, + "grad_norm": 0.0, + "learning_rate": 1.0924319341173938e-06, + "loss": 0.7458, + "step": 2769 + }, + { + "epoch": 0.8545426500077125, + "grad_norm": 0.0, + "learning_rate": 1.0878936170511246e-06, + "loss": 0.7608, + "step": 2770 + }, + { + "epoch": 0.8548511491593398, + "grad_norm": 0.0, + "learning_rate": 1.0833642041111102e-06, + "loss": 0.666, + "step": 2771 + }, + { + "epoch": 0.8551596483109671, + "grad_norm": 0.0, + "learning_rate": 1.0788436998226991e-06, + "loss": 0.7159, + "step": 2772 + }, + { + "epoch": 0.8554681474625945, + "grad_norm": 0.0, + "learning_rate": 1.0743321087023528e-06, + "loss": 0.6403, + "step": 2773 + }, + { + "epoch": 0.8557766466142218, + "grad_norm": 0.0, + "learning_rate": 1.0698294352576232e-06, + "loss": 0.706, + "step": 2774 + }, + { + "epoch": 0.8560851457658492, + "grad_norm": 0.0, + "learning_rate": 1.065335683987152e-06, + "loss": 0.634, + "step": 2775 + }, + { + "epoch": 0.8563936449174765, + "grad_norm": 0.0, + "learning_rate": 1.0608508593806655e-06, + "loss": 0.7068, + "step": 2776 + }, + { + "epoch": 0.8567021440691038, + "grad_norm": 0.0, + "learning_rate": 1.0563749659189703e-06, + "loss": 0.7045, + "step": 2777 + }, + { + "epoch": 0.8570106432207312, + "grad_norm": 0.0, + "learning_rate": 1.0519080080739562e-06, + "loss": 0.6228, + "step": 2778 + }, + { + "epoch": 0.8573191423723585, + "grad_norm": 0.0, + "learning_rate": 1.047449990308579e-06, + "loss": 0.767, + "step": 2779 + }, + { + "epoch": 0.8576276415239859, + "grad_norm": 0.0, + "learning_rate": 1.043000917076863e-06, + "loss": 0.714, + "step": 2780 + }, + { + "epoch": 0.8579361406756132, + "grad_norm": 0.0, + "learning_rate": 1.0385607928239017e-06, + "loss": 0.6613, + "step": 2781 + }, + { + "epoch": 0.8582446398272405, + "grad_norm": 0.0, + "learning_rate": 1.0341296219858398e-06, + "loss": 0.6747, + "step": 2782 + }, + { + "epoch": 0.8585531389788678, + "grad_norm": 0.0, + "learning_rate": 1.029707408989884e-06, + "loss": 0.7062, + "step": 2783 + }, + { + "epoch": 0.8588616381304951, + "grad_norm": 0.0, + "learning_rate": 1.0252941582542852e-06, + "loss": 0.5831, + "step": 2784 + }, + { + "epoch": 0.8591701372821224, + "grad_norm": 0.0, + "learning_rate": 1.02088987418834e-06, + "loss": 0.6918, + "step": 2785 + }, + { + "epoch": 0.8594786364337498, + "grad_norm": 0.0, + "learning_rate": 1.0164945611923948e-06, + "loss": 0.7209, + "step": 2786 + }, + { + "epoch": 0.8597871355853771, + "grad_norm": 0.0, + "learning_rate": 1.0121082236578205e-06, + "loss": 0.6809, + "step": 2787 + }, + { + "epoch": 0.8600956347370045, + "grad_norm": 0.0, + "learning_rate": 1.0077308659670348e-06, + "loss": 0.7467, + "step": 2788 + }, + { + "epoch": 0.8604041338886318, + "grad_norm": 0.0, + "learning_rate": 1.003362492493466e-06, + "loss": 0.6971, + "step": 2789 + }, + { + "epoch": 0.8607126330402591, + "grad_norm": 0.0, + "learning_rate": 9.990031076015838e-07, + "loss": 0.7136, + "step": 2790 + }, + { + "epoch": 0.8610211321918865, + "grad_norm": 0.0, + "learning_rate": 9.946527156468643e-07, + "loss": 0.7539, + "step": 2791 + }, + { + "epoch": 0.8613296313435138, + "grad_norm": 0.0, + "learning_rate": 9.903113209758098e-07, + "loss": 0.7467, + "step": 2792 + }, + { + "epoch": 0.8616381304951412, + "grad_norm": 0.0, + "learning_rate": 9.859789279259225e-07, + "loss": 0.6418, + "step": 2793 + }, + { + "epoch": 0.8619466296467685, + "grad_norm": 0.0, + "learning_rate": 9.816555408257177e-07, + "loss": 0.7238, + "step": 2794 + }, + { + "epoch": 0.8622551287983958, + "grad_norm": 0.0, + "learning_rate": 9.773411639947117e-07, + "loss": 0.6799, + "step": 2795 + }, + { + "epoch": 0.8625636279500232, + "grad_norm": 0.0, + "learning_rate": 9.730358017434194e-07, + "loss": 0.6993, + "step": 2796 + }, + { + "epoch": 0.8628721271016505, + "grad_norm": 0.0, + "learning_rate": 9.687394583733435e-07, + "loss": 0.659, + "step": 2797 + }, + { + "epoch": 0.8631806262532778, + "grad_norm": 0.0, + "learning_rate": 9.644521381769855e-07, + "loss": 0.7527, + "step": 2798 + }, + { + "epoch": 0.8634891254049052, + "grad_norm": 0.0, + "learning_rate": 9.601738454378229e-07, + "loss": 0.7055, + "step": 2799 + }, + { + "epoch": 0.8637976245565324, + "grad_norm": 0.0, + "learning_rate": 9.559045844303193e-07, + "loss": 0.9917, + "step": 2800 + }, + { + "epoch": 0.8641061237081598, + "grad_norm": 0.0, + "learning_rate": 9.516443594199143e-07, + "loss": 0.677, + "step": 2801 + }, + { + "epoch": 0.8644146228597871, + "grad_norm": 0.0, + "learning_rate": 9.47393174663016e-07, + "loss": 0.7474, + "step": 2802 + }, + { + "epoch": 0.8647231220114144, + "grad_norm": 0.0, + "learning_rate": 9.431510344070005e-07, + "loss": 0.7235, + "step": 2803 + }, + { + "epoch": 0.8650316211630418, + "grad_norm": 0.0, + "learning_rate": 9.389179428902139e-07, + "loss": 0.5918, + "step": 2804 + }, + { + "epoch": 0.8653401203146691, + "grad_norm": 0.0, + "learning_rate": 9.346939043419568e-07, + "loss": 0.6162, + "step": 2805 + }, + { + "epoch": 0.8656486194662965, + "grad_norm": 0.0, + "learning_rate": 9.304789229824806e-07, + "loss": 0.7186, + "step": 2806 + }, + { + "epoch": 0.8659571186179238, + "grad_norm": 0.0, + "learning_rate": 9.262730030229983e-07, + "loss": 0.9891, + "step": 2807 + }, + { + "epoch": 0.8662656177695511, + "grad_norm": 0.0, + "learning_rate": 9.220761486656593e-07, + "loss": 0.7034, + "step": 2808 + }, + { + "epoch": 0.8665741169211785, + "grad_norm": 0.0, + "learning_rate": 9.178883641035652e-07, + "loss": 0.7197, + "step": 2809 + }, + { + "epoch": 0.8668826160728058, + "grad_norm": 0.0, + "learning_rate": 9.137096535207457e-07, + "loss": 0.6205, + "step": 2810 + }, + { + "epoch": 0.8671911152244332, + "grad_norm": 0.0, + "learning_rate": 9.095400210921712e-07, + "loss": 0.6383, + "step": 2811 + }, + { + "epoch": 0.8674996143760605, + "grad_norm": 0.0, + "learning_rate": 9.053794709837415e-07, + "loss": 0.7297, + "step": 2812 + }, + { + "epoch": 0.8678081135276878, + "grad_norm": 0.0, + "learning_rate": 9.012280073522816e-07, + "loss": 0.6416, + "step": 2813 + }, + { + "epoch": 0.8681166126793152, + "grad_norm": 0.0, + "learning_rate": 8.97085634345537e-07, + "loss": 0.752, + "step": 2814 + }, + { + "epoch": 0.8684251118309425, + "grad_norm": 0.0, + "learning_rate": 8.929523561021691e-07, + "loss": 0.6442, + "step": 2815 + }, + { + "epoch": 0.8687336109825698, + "grad_norm": 0.0, + "learning_rate": 8.888281767517582e-07, + "loss": 0.6653, + "step": 2816 + }, + { + "epoch": 0.8690421101341971, + "grad_norm": 0.0, + "learning_rate": 8.847131004147891e-07, + "loss": 0.7447, + "step": 2817 + }, + { + "epoch": 0.8693506092858244, + "grad_norm": 0.0, + "learning_rate": 8.806071312026554e-07, + "loss": 0.7057, + "step": 2818 + }, + { + "epoch": 0.8696591084374518, + "grad_norm": 0.0, + "learning_rate": 8.765102732176489e-07, + "loss": 0.7113, + "step": 2819 + }, + { + "epoch": 0.8699676075890791, + "grad_norm": 0.0, + "learning_rate": 8.724225305529577e-07, + "loss": 0.7002, + "step": 2820 + }, + { + "epoch": 0.8702761067407064, + "grad_norm": 0.0, + "learning_rate": 8.683439072926658e-07, + "loss": 0.7051, + "step": 2821 + }, + { + "epoch": 0.8705846058923338, + "grad_norm": 0.0, + "learning_rate": 8.642744075117448e-07, + "loss": 0.6729, + "step": 2822 + }, + { + "epoch": 0.8708931050439611, + "grad_norm": 0.0, + "learning_rate": 8.602140352760469e-07, + "loss": 0.6511, + "step": 2823 + }, + { + "epoch": 0.8712016041955885, + "grad_norm": 0.0, + "learning_rate": 8.561627946423134e-07, + "loss": 0.6652, + "step": 2824 + }, + { + "epoch": 0.8715101033472158, + "grad_norm": 0.0, + "learning_rate": 8.521206896581535e-07, + "loss": 0.6922, + "step": 2825 + }, + { + "epoch": 0.8718186024988431, + "grad_norm": 0.0, + "learning_rate": 8.480877243620578e-07, + "loss": 0.7121, + "step": 2826 + }, + { + "epoch": 0.8721271016504705, + "grad_norm": 0.0, + "learning_rate": 8.44063902783373e-07, + "loss": 0.7083, + "step": 2827 + }, + { + "epoch": 0.8724356008020978, + "grad_norm": 0.0, + "learning_rate": 8.400492289423235e-07, + "loss": 0.8806, + "step": 2828 + }, + { + "epoch": 0.8727440999537251, + "grad_norm": 0.0, + "learning_rate": 8.360437068499849e-07, + "loss": 0.6684, + "step": 2829 + }, + { + "epoch": 0.8730525991053525, + "grad_norm": 0.0, + "learning_rate": 8.320473405082952e-07, + "loss": 0.7055, + "step": 2830 + }, + { + "epoch": 0.8733610982569798, + "grad_norm": 0.0, + "learning_rate": 8.280601339100436e-07, + "loss": 0.6907, + "step": 2831 + }, + { + "epoch": 0.8736695974086072, + "grad_norm": 0.0, + "learning_rate": 8.240820910388614e-07, + "loss": 0.6585, + "step": 2832 + }, + { + "epoch": 0.8739780965602345, + "grad_norm": 0.0, + "learning_rate": 8.201132158692327e-07, + "loss": 0.7069, + "step": 2833 + }, + { + "epoch": 0.8742865957118618, + "grad_norm": 0.0, + "learning_rate": 8.161535123664776e-07, + "loss": 0.6241, + "step": 2834 + }, + { + "epoch": 0.8745950948634891, + "grad_norm": 0.0, + "learning_rate": 8.122029844867562e-07, + "loss": 0.6609, + "step": 2835 + }, + { + "epoch": 0.8749035940151164, + "grad_norm": 0.0, + "learning_rate": 8.082616361770579e-07, + "loss": 0.7406, + "step": 2836 + }, + { + "epoch": 0.8752120931667438, + "grad_norm": 0.0, + "learning_rate": 8.043294713751992e-07, + "loss": 0.6594, + "step": 2837 + }, + { + "epoch": 0.8755205923183711, + "grad_norm": 0.0, + "learning_rate": 8.004064940098277e-07, + "loss": 0.6504, + "step": 2838 + }, + { + "epoch": 0.8758290914699984, + "grad_norm": 0.0, + "learning_rate": 7.964927080004059e-07, + "loss": 0.6821, + "step": 2839 + }, + { + "epoch": 0.8761375906216258, + "grad_norm": 0.0, + "learning_rate": 7.925881172572169e-07, + "loss": 0.7181, + "step": 2840 + }, + { + "epoch": 0.8764460897732531, + "grad_norm": 0.0, + "learning_rate": 7.886927256813514e-07, + "loss": 0.6449, + "step": 2841 + }, + { + "epoch": 0.8767545889248805, + "grad_norm": 0.0, + "learning_rate": 7.848065371647162e-07, + "loss": 0.6459, + "step": 2842 + }, + { + "epoch": 0.8770630880765078, + "grad_norm": 0.0, + "learning_rate": 7.809295555900231e-07, + "loss": 0.7463, + "step": 2843 + }, + { + "epoch": 0.8773715872281351, + "grad_norm": 0.0, + "learning_rate": 7.770617848307749e-07, + "loss": 0.6198, + "step": 2844 + }, + { + "epoch": 0.8776800863797625, + "grad_norm": 0.0, + "learning_rate": 7.732032287512847e-07, + "loss": 0.7149, + "step": 2845 + }, + { + "epoch": 0.8779885855313898, + "grad_norm": 0.0, + "learning_rate": 7.6935389120665e-07, + "loss": 0.6899, + "step": 2846 + }, + { + "epoch": 0.8782970846830171, + "grad_norm": 0.0, + "learning_rate": 7.655137760427645e-07, + "loss": 0.6932, + "step": 2847 + }, + { + "epoch": 0.8786055838346445, + "grad_norm": 0.0, + "learning_rate": 7.616828870963023e-07, + "loss": 0.6516, + "step": 2848 + }, + { + "epoch": 0.8789140829862718, + "grad_norm": 0.0, + "learning_rate": 7.578612281947229e-07, + "loss": 0.6304, + "step": 2849 + }, + { + "epoch": 0.8792225821378992, + "grad_norm": 0.0, + "learning_rate": 7.540488031562642e-07, + "loss": 0.6516, + "step": 2850 + }, + { + "epoch": 0.8795310812895265, + "grad_norm": 0.0, + "learning_rate": 7.502456157899351e-07, + "loss": 0.6183, + "step": 2851 + }, + { + "epoch": 0.8798395804411537, + "grad_norm": 0.0, + "learning_rate": 7.464516698955226e-07, + "loss": 0.6367, + "step": 2852 + }, + { + "epoch": 0.8801480795927811, + "grad_norm": 0.0, + "learning_rate": 7.426669692635691e-07, + "loss": 0.673, + "step": 2853 + }, + { + "epoch": 0.8804565787444084, + "grad_norm": 0.0, + "learning_rate": 7.388915176753886e-07, + "loss": 0.7608, + "step": 2854 + }, + { + "epoch": 0.8807650778960358, + "grad_norm": 0.0, + "learning_rate": 7.351253189030538e-07, + "loss": 0.7474, + "step": 2855 + }, + { + "epoch": 0.8810735770476631, + "grad_norm": 0.0, + "learning_rate": 7.313683767093915e-07, + "loss": 0.669, + "step": 2856 + }, + { + "epoch": 0.8813820761992904, + "grad_norm": 0.0, + "learning_rate": 7.276206948479769e-07, + "loss": 0.633, + "step": 2857 + }, + { + "epoch": 0.8816905753509178, + "grad_norm": 0.0, + "learning_rate": 7.23882277063136e-07, + "loss": 0.6076, + "step": 2858 + }, + { + "epoch": 0.8819990745025451, + "grad_norm": 0.0, + "learning_rate": 7.20153127089942e-07, + "loss": 0.6447, + "step": 2859 + }, + { + "epoch": 0.8823075736541725, + "grad_norm": 0.0, + "learning_rate": 7.164332486542036e-07, + "loss": 0.7517, + "step": 2860 + }, + { + "epoch": 0.8826160728057998, + "grad_norm": 0.0, + "learning_rate": 7.127226454724689e-07, + "loss": 0.7321, + "step": 2861 + }, + { + "epoch": 0.8829245719574271, + "grad_norm": 0.0, + "learning_rate": 7.090213212520191e-07, + "loss": 0.6784, + "step": 2862 + }, + { + "epoch": 0.8832330711090545, + "grad_norm": 0.0, + "learning_rate": 7.053292796908629e-07, + "loss": 0.7292, + "step": 2863 + }, + { + "epoch": 0.8835415702606818, + "grad_norm": 0.0, + "learning_rate": 7.01646524477737e-07, + "loss": 0.6657, + "step": 2864 + }, + { + "epoch": 0.8838500694123091, + "grad_norm": 0.0, + "learning_rate": 6.979730592920997e-07, + "loss": 0.7044, + "step": 2865 + }, + { + "epoch": 0.8841585685639365, + "grad_norm": 0.0, + "learning_rate": 6.943088878041238e-07, + "loss": 0.7455, + "step": 2866 + }, + { + "epoch": 0.8844670677155638, + "grad_norm": 0.0, + "learning_rate": 6.906540136747031e-07, + "loss": 0.6747, + "step": 2867 + }, + { + "epoch": 0.8847755668671912, + "grad_norm": 0.0, + "learning_rate": 6.870084405554367e-07, + "loss": 0.6485, + "step": 2868 + }, + { + "epoch": 0.8850840660188184, + "grad_norm": 0.0, + "learning_rate": 6.833721720886377e-07, + "loss": 0.7415, + "step": 2869 + }, + { + "epoch": 0.8853925651704457, + "grad_norm": 0.0, + "learning_rate": 6.797452119073111e-07, + "loss": 0.6579, + "step": 2870 + }, + { + "epoch": 0.8857010643220731, + "grad_norm": 0.0, + "learning_rate": 6.76127563635176e-07, + "loss": 0.7267, + "step": 2871 + }, + { + "epoch": 0.8860095634737004, + "grad_norm": 0.0, + "learning_rate": 6.725192308866368e-07, + "loss": 0.7453, + "step": 2872 + }, + { + "epoch": 0.8863180626253278, + "grad_norm": 0.0, + "learning_rate": 6.689202172667996e-07, + "loss": 0.6308, + "step": 2873 + }, + { + "epoch": 0.8866265617769551, + "grad_norm": 0.0, + "learning_rate": 6.653305263714526e-07, + "loss": 0.6391, + "step": 2874 + }, + { + "epoch": 0.8869350609285824, + "grad_norm": 0.0, + "learning_rate": 6.617501617870726e-07, + "loss": 0.6722, + "step": 2875 + }, + { + "epoch": 0.8872435600802098, + "grad_norm": 0.0, + "learning_rate": 6.581791270908189e-07, + "loss": 0.657, + "step": 2876 + }, + { + "epoch": 0.8875520592318371, + "grad_norm": 0.0, + "learning_rate": 6.546174258505289e-07, + "loss": 0.6685, + "step": 2877 + }, + { + "epoch": 0.8878605583834644, + "grad_norm": 0.0, + "learning_rate": 6.510650616247116e-07, + "loss": 0.6154, + "step": 2878 + }, + { + "epoch": 0.8881690575350918, + "grad_norm": 0.0, + "learning_rate": 6.475220379625546e-07, + "loss": 0.7037, + "step": 2879 + }, + { + "epoch": 0.8884775566867191, + "grad_norm": 0.0, + "learning_rate": 6.439883584039064e-07, + "loss": 0.6844, + "step": 2880 + }, + { + "epoch": 0.8887860558383465, + "grad_norm": 0.0, + "learning_rate": 6.404640264792839e-07, + "loss": 0.7061, + "step": 2881 + }, + { + "epoch": 0.8890945549899738, + "grad_norm": 0.0, + "learning_rate": 6.369490457098626e-07, + "loss": 0.7333, + "step": 2882 + }, + { + "epoch": 0.8894030541416011, + "grad_norm": 0.0, + "learning_rate": 6.334434196074757e-07, + "loss": 0.7895, + "step": 2883 + }, + { + "epoch": 0.8897115532932285, + "grad_norm": 0.0, + "learning_rate": 6.299471516746092e-07, + "loss": 0.6718, + "step": 2884 + }, + { + "epoch": 0.8900200524448558, + "grad_norm": 0.0, + "learning_rate": 6.264602454044033e-07, + "loss": 0.7163, + "step": 2885 + }, + { + "epoch": 0.8903285515964832, + "grad_norm": 0.0, + "learning_rate": 6.229827042806403e-07, + "loss": 0.7093, + "step": 2886 + }, + { + "epoch": 0.8906370507481104, + "grad_norm": 0.0, + "learning_rate": 6.195145317777452e-07, + "loss": 0.6634, + "step": 2887 + }, + { + "epoch": 0.8909455498997377, + "grad_norm": 0.0, + "learning_rate": 6.160557313607884e-07, + "loss": 0.72, + "step": 2888 + }, + { + "epoch": 0.8912540490513651, + "grad_norm": 0.0, + "learning_rate": 6.126063064854715e-07, + "loss": 0.7211, + "step": 2889 + }, + { + "epoch": 0.8915625482029924, + "grad_norm": 0.0, + "learning_rate": 6.091662605981319e-07, + "loss": 0.7169, + "step": 2890 + }, + { + "epoch": 0.8918710473546198, + "grad_norm": 0.0, + "learning_rate": 6.057355971357359e-07, + "loss": 0.6511, + "step": 2891 + }, + { + "epoch": 0.8921795465062471, + "grad_norm": 0.0, + "learning_rate": 6.023143195258708e-07, + "loss": 0.7037, + "step": 2892 + }, + { + "epoch": 0.8924880456578744, + "grad_norm": 0.0, + "learning_rate": 5.989024311867553e-07, + "loss": 0.7011, + "step": 2893 + }, + { + "epoch": 0.8927965448095018, + "grad_norm": 0.0, + "learning_rate": 5.954999355272207e-07, + "loss": 0.7156, + "step": 2894 + }, + { + "epoch": 0.8931050439611291, + "grad_norm": 0.0, + "learning_rate": 5.921068359467164e-07, + "loss": 0.7205, + "step": 2895 + }, + { + "epoch": 0.8934135431127564, + "grad_norm": 0.0, + "learning_rate": 5.887231358353018e-07, + "loss": 0.7144, + "step": 2896 + }, + { + "epoch": 0.8937220422643838, + "grad_norm": 0.0, + "learning_rate": 5.853488385736506e-07, + "loss": 0.6505, + "step": 2897 + }, + { + "epoch": 0.8940305414160111, + "grad_norm": 0.0, + "learning_rate": 5.819839475330358e-07, + "loss": 0.69, + "step": 2898 + }, + { + "epoch": 0.8943390405676385, + "grad_norm": 0.0, + "learning_rate": 5.786284660753372e-07, + "loss": 0.7309, + "step": 2899 + }, + { + "epoch": 0.8946475397192658, + "grad_norm": 0.0, + "learning_rate": 5.752823975530308e-07, + "loss": 0.6849, + "step": 2900 + }, + { + "epoch": 0.8949560388708931, + "grad_norm": 0.0, + "learning_rate": 5.719457453091881e-07, + "loss": 0.6756, + "step": 2901 + }, + { + "epoch": 0.8952645380225205, + "grad_norm": 0.0, + "learning_rate": 5.686185126774757e-07, + "loss": 0.6996, + "step": 2902 + }, + { + "epoch": 0.8955730371741478, + "grad_norm": 0.0, + "learning_rate": 5.653007029821467e-07, + "loss": 0.719, + "step": 2903 + }, + { + "epoch": 0.895881536325775, + "grad_norm": 0.0, + "learning_rate": 5.619923195380372e-07, + "loss": 0.7229, + "step": 2904 + }, + { + "epoch": 0.8961900354774024, + "grad_norm": 0.0, + "learning_rate": 5.586933656505711e-07, + "loss": 0.7041, + "step": 2905 + }, + { + "epoch": 0.8964985346290297, + "grad_norm": 0.0, + "learning_rate": 5.554038446157473e-07, + "loss": 0.6751, + "step": 2906 + }, + { + "epoch": 0.8968070337806571, + "grad_norm": 0.0, + "learning_rate": 5.521237597201423e-07, + "loss": 0.7222, + "step": 2907 + }, + { + "epoch": 0.8971155329322844, + "grad_norm": 0.0, + "learning_rate": 5.488531142409015e-07, + "loss": 0.6142, + "step": 2908 + }, + { + "epoch": 0.8974240320839117, + "grad_norm": 0.0, + "learning_rate": 5.455919114457442e-07, + "loss": 0.6486, + "step": 2909 + }, + { + "epoch": 0.8977325312355391, + "grad_norm": 0.0, + "learning_rate": 5.423401545929518e-07, + "loss": 0.6912, + "step": 2910 + }, + { + "epoch": 0.8980410303871664, + "grad_norm": 0.0, + "learning_rate": 5.390978469313701e-07, + "loss": 0.7119, + "step": 2911 + }, + { + "epoch": 0.8983495295387938, + "grad_norm": 0.0, + "learning_rate": 5.358649917004033e-07, + "loss": 0.6782, + "step": 2912 + }, + { + "epoch": 0.8986580286904211, + "grad_norm": 0.0, + "learning_rate": 5.326415921300099e-07, + "loss": 0.7136, + "step": 2913 + }, + { + "epoch": 0.8989665278420484, + "grad_norm": 0.0, + "learning_rate": 5.294276514407071e-07, + "loss": 0.6287, + "step": 2914 + }, + { + "epoch": 0.8992750269936758, + "grad_norm": 0.0, + "learning_rate": 5.262231728435541e-07, + "loss": 0.732, + "step": 2915 + }, + { + "epoch": 0.8995835261453031, + "grad_norm": 0.0, + "learning_rate": 5.230281595401621e-07, + "loss": 0.6561, + "step": 2916 + }, + { + "epoch": 0.8998920252969305, + "grad_norm": 0.0, + "learning_rate": 5.198426147226843e-07, + "loss": 0.6318, + "step": 2917 + }, + { + "epoch": 0.9002005244485578, + "grad_norm": 0.0, + "learning_rate": 5.166665415738093e-07, + "loss": 0.6749, + "step": 2918 + }, + { + "epoch": 0.9005090236001851, + "grad_norm": 0.0, + "learning_rate": 5.134999432667708e-07, + "loss": 0.6787, + "step": 2919 + }, + { + "epoch": 0.9008175227518125, + "grad_norm": 0.0, + "learning_rate": 5.103428229653296e-07, + "loss": 0.7511, + "step": 2920 + }, + { + "epoch": 0.9011260219034397, + "grad_norm": 0.0, + "learning_rate": 5.071951838237787e-07, + "loss": 0.665, + "step": 2921 + }, + { + "epoch": 0.901434521055067, + "grad_norm": 0.0, + "learning_rate": 5.040570289869384e-07, + "loss": 0.6458, + "step": 2922 + }, + { + "epoch": 0.9017430202066944, + "grad_norm": 0.0, + "learning_rate": 5.009283615901561e-07, + "loss": 0.669, + "step": 2923 + }, + { + "epoch": 0.9020515193583217, + "grad_norm": 0.0, + "learning_rate": 4.978091847592959e-07, + "loss": 0.6912, + "step": 2924 + }, + { + "epoch": 0.9023600185099491, + "grad_norm": 0.0, + "learning_rate": 4.946995016107392e-07, + "loss": 0.6912, + "step": 2925 + }, + { + "epoch": 0.9026685176615764, + "grad_norm": 0.0, + "learning_rate": 4.915993152513887e-07, + "loss": 0.7632, + "step": 2926 + }, + { + "epoch": 0.9029770168132037, + "grad_norm": 0.0, + "learning_rate": 4.885086287786522e-07, + "loss": 0.6636, + "step": 2927 + }, + { + "epoch": 0.9032855159648311, + "grad_norm": 0.0, + "learning_rate": 4.854274452804519e-07, + "loss": 0.7389, + "step": 2928 + }, + { + "epoch": 0.9035940151164584, + "grad_norm": 0.0, + "learning_rate": 4.823557678352097e-07, + "loss": 0.6548, + "step": 2929 + }, + { + "epoch": 0.9039025142680858, + "grad_norm": 0.0, + "learning_rate": 4.792935995118509e-07, + "loss": 0.7511, + "step": 2930 + }, + { + "epoch": 0.9042110134197131, + "grad_norm": 0.0, + "learning_rate": 4.7624094336980565e-07, + "loss": 0.7295, + "step": 2931 + }, + { + "epoch": 0.9045195125713404, + "grad_norm": 0.0, + "learning_rate": 4.731978024589945e-07, + "loss": 0.65, + "step": 2932 + }, + { + "epoch": 0.9048280117229678, + "grad_norm": 0.0, + "learning_rate": 4.701641798198353e-07, + "loss": 0.6513, + "step": 2933 + }, + { + "epoch": 0.9051365108745951, + "grad_norm": 0.0, + "learning_rate": 4.671400784832314e-07, + "loss": 0.7507, + "step": 2934 + }, + { + "epoch": 0.9054450100262225, + "grad_norm": 0.0, + "learning_rate": 4.641255014705781e-07, + "loss": 0.6802, + "step": 2935 + }, + { + "epoch": 0.9057535091778498, + "grad_norm": 0.0, + "learning_rate": 4.6112045179375286e-07, + "loss": 0.6565, + "step": 2936 + }, + { + "epoch": 0.9060620083294771, + "grad_norm": 0.0, + "learning_rate": 4.58124932455114e-07, + "loss": 0.7055, + "step": 2937 + }, + { + "epoch": 0.9063705074811045, + "grad_norm": 0.0, + "learning_rate": 4.5513894644750087e-07, + "loss": 0.7324, + "step": 2938 + }, + { + "epoch": 0.9066790066327317, + "grad_norm": 0.0, + "learning_rate": 4.5216249675422153e-07, + "loss": 0.7231, + "step": 2939 + }, + { + "epoch": 0.906987505784359, + "grad_norm": 0.0, + "learning_rate": 4.491955863490649e-07, + "loss": 0.69, + "step": 2940 + }, + { + "epoch": 0.9072960049359864, + "grad_norm": 0.0, + "learning_rate": 4.462382181962832e-07, + "loss": 0.6883, + "step": 2941 + }, + { + "epoch": 0.9076045040876137, + "grad_norm": 0.0, + "learning_rate": 4.43290395250594e-07, + "loss": 0.6147, + "step": 2942 + }, + { + "epoch": 0.9079130032392411, + "grad_norm": 0.0, + "learning_rate": 4.4035212045718366e-07, + "loss": 0.7843, + "step": 2943 + }, + { + "epoch": 0.9082215023908684, + "grad_norm": 0.0, + "learning_rate": 4.37423396751695e-07, + "loss": 0.7698, + "step": 2944 + }, + { + "epoch": 0.9085300015424957, + "grad_norm": 0.0, + "learning_rate": 4.3450422706022956e-07, + "loss": 0.7347, + "step": 2945 + }, + { + "epoch": 0.9088385006941231, + "grad_norm": 0.0, + "learning_rate": 4.315946142993432e-07, + "loss": 0.6458, + "step": 2946 + }, + { + "epoch": 0.9091469998457504, + "grad_norm": 0.0, + "learning_rate": 4.286945613760429e-07, + "loss": 0.7321, + "step": 2947 + }, + { + "epoch": 0.9094554989973778, + "grad_norm": 0.0, + "learning_rate": 4.258040711877842e-07, + "loss": 0.7338, + "step": 2948 + }, + { + "epoch": 0.9097639981490051, + "grad_norm": 0.0, + "learning_rate": 4.229231466224693e-07, + "loss": 0.6162, + "step": 2949 + }, + { + "epoch": 0.9100724973006324, + "grad_norm": 0.0, + "learning_rate": 4.200517905584467e-07, + "loss": 0.632, + "step": 2950 + }, + { + "epoch": 0.9103809964522598, + "grad_norm": 0.0, + "learning_rate": 4.1719000586449507e-07, + "loss": 0.662, + "step": 2951 + }, + { + "epoch": 0.9106894956038871, + "grad_norm": 0.0, + "learning_rate": 4.143377953998418e-07, + "loss": 0.7428, + "step": 2952 + }, + { + "epoch": 0.9109979947555145, + "grad_norm": 0.0, + "learning_rate": 4.1149516201413944e-07, + "loss": 0.6866, + "step": 2953 + }, + { + "epoch": 0.9113064939071418, + "grad_norm": 0.0, + "learning_rate": 4.0866210854747956e-07, + "loss": 0.6504, + "step": 2954 + }, + { + "epoch": 0.9116149930587691, + "grad_norm": 0.0, + "learning_rate": 4.058386378303769e-07, + "loss": 0.6522, + "step": 2955 + }, + { + "epoch": 0.9119234922103964, + "grad_norm": 0.0, + "learning_rate": 4.0302475268377386e-07, + "loss": 0.6448, + "step": 2956 + }, + { + "epoch": 0.9122319913620237, + "grad_norm": 0.0, + "learning_rate": 4.00220455919037e-07, + "loss": 0.6592, + "step": 2957 + }, + { + "epoch": 0.912540490513651, + "grad_norm": 0.0, + "learning_rate": 3.974257503379508e-07, + "loss": 0.6448, + "step": 2958 + }, + { + "epoch": 0.9128489896652784, + "grad_norm": 0.0, + "learning_rate": 3.9464063873272064e-07, + "loss": 0.6848, + "step": 2959 + }, + { + "epoch": 0.9131574888169057, + "grad_norm": 0.0, + "learning_rate": 3.9186512388596053e-07, + "loss": 0.7141, + "step": 2960 + }, + { + "epoch": 0.9134659879685331, + "grad_norm": 0.0, + "learning_rate": 3.8909920857070237e-07, + "loss": 0.6235, + "step": 2961 + }, + { + "epoch": 0.9137744871201604, + "grad_norm": 0.0, + "learning_rate": 3.863428955503856e-07, + "loss": 0.6826, + "step": 2962 + }, + { + "epoch": 0.9140829862717877, + "grad_norm": 0.0, + "learning_rate": 3.835961875788552e-07, + "loss": 0.6908, + "step": 2963 + }, + { + "epoch": 0.9143914854234151, + "grad_norm": 0.0, + "learning_rate": 3.8085908740035706e-07, + "loss": 0.6579, + "step": 2964 + }, + { + "epoch": 0.9146999845750424, + "grad_norm": 0.0, + "learning_rate": 3.7813159774954256e-07, + "loss": 0.7451, + "step": 2965 + }, + { + "epoch": 0.9150084837266698, + "grad_norm": 0.0, + "learning_rate": 3.754137213514586e-07, + "loss": 0.6747, + "step": 2966 + }, + { + "epoch": 0.9153169828782971, + "grad_norm": 0.0, + "learning_rate": 3.7270546092154856e-07, + "loss": 0.7102, + "step": 2967 + }, + { + "epoch": 0.9156254820299244, + "grad_norm": 0.0, + "learning_rate": 3.7000681916564583e-07, + "loss": 0.7626, + "step": 2968 + }, + { + "epoch": 0.9159339811815518, + "grad_norm": 0.0, + "learning_rate": 3.673177987799781e-07, + "loss": 0.6208, + "step": 2969 + }, + { + "epoch": 0.9162424803331791, + "grad_norm": 0.0, + "learning_rate": 3.646384024511562e-07, + "loss": 0.7493, + "step": 2970 + }, + { + "epoch": 0.9165509794848065, + "grad_norm": 0.0, + "learning_rate": 3.6196863285618e-07, + "loss": 0.7094, + "step": 2971 + }, + { + "epoch": 0.9168594786364338, + "grad_norm": 0.0, + "learning_rate": 3.5930849266242465e-07, + "loss": 0.6915, + "step": 2972 + }, + { + "epoch": 0.917167977788061, + "grad_norm": 0.0, + "learning_rate": 3.5665798452764966e-07, + "loss": 0.6739, + "step": 2973 + }, + { + "epoch": 0.9174764769396884, + "grad_norm": 0.0, + "learning_rate": 3.540171110999913e-07, + "loss": 0.6801, + "step": 2974 + }, + { + "epoch": 0.9177849760913157, + "grad_norm": 0.0, + "learning_rate": 3.5138587501795884e-07, + "loss": 0.6038, + "step": 2975 + }, + { + "epoch": 0.918093475242943, + "grad_norm": 0.0, + "learning_rate": 3.487642789104295e-07, + "loss": 0.6319, + "step": 2976 + }, + { + "epoch": 0.9184019743945704, + "grad_norm": 0.0, + "learning_rate": 3.4615232539665254e-07, + "loss": 0.7123, + "step": 2977 + }, + { + "epoch": 0.9187104735461977, + "grad_norm": 0.0, + "learning_rate": 3.4355001708624493e-07, + "loss": 0.651, + "step": 2978 + }, + { + "epoch": 0.9190189726978251, + "grad_norm": 0.0, + "learning_rate": 3.4095735657918127e-07, + "loss": 0.6915, + "step": 2979 + }, + { + "epoch": 0.9193274718494524, + "grad_norm": 0.0, + "learning_rate": 3.3837434646580514e-07, + "loss": 0.8021, + "step": 2980 + }, + { + "epoch": 0.9196359710010797, + "grad_norm": 0.0, + "learning_rate": 3.3580098932680994e-07, + "loss": 0.6535, + "step": 2981 + }, + { + "epoch": 0.9199444701527071, + "grad_norm": 0.0, + "learning_rate": 3.3323728773324904e-07, + "loss": 0.6707, + "step": 2982 + }, + { + "epoch": 0.9202529693043344, + "grad_norm": 0.0, + "learning_rate": 3.306832442465302e-07, + "loss": 0.6688, + "step": 2983 + }, + { + "epoch": 0.9205614684559618, + "grad_norm": 0.0, + "learning_rate": 3.281388614184089e-07, + "loss": 0.6877, + "step": 2984 + }, + { + "epoch": 0.9208699676075891, + "grad_norm": 0.0, + "learning_rate": 3.256041417909894e-07, + "loss": 0.6977, + "step": 2985 + }, + { + "epoch": 0.9211784667592164, + "grad_norm": 0.0, + "learning_rate": 3.230790878967216e-07, + "loss": 0.6933, + "step": 2986 + }, + { + "epoch": 0.9214869659108438, + "grad_norm": 0.0, + "learning_rate": 3.205637022583985e-07, + "loss": 0.6552, + "step": 2987 + }, + { + "epoch": 0.9217954650624711, + "grad_norm": 0.0, + "learning_rate": 3.180579873891565e-07, + "loss": 0.6738, + "step": 2988 + }, + { + "epoch": 0.9221039642140985, + "grad_norm": 0.0, + "learning_rate": 3.155619457924608e-07, + "loss": 0.6119, + "step": 2989 + }, + { + "epoch": 0.9224124633657258, + "grad_norm": 0.0, + "learning_rate": 3.130755799621221e-07, + "loss": 0.643, + "step": 2990 + }, + { + "epoch": 0.922720962517353, + "grad_norm": 0.0, + "learning_rate": 3.1059889238227893e-07, + "loss": 0.6854, + "step": 2991 + }, + { + "epoch": 0.9230294616689804, + "grad_norm": 0.0, + "learning_rate": 3.0813188552740067e-07, + "loss": 0.6532, + "step": 2992 + }, + { + "epoch": 0.9233379608206077, + "grad_norm": 0.0, + "learning_rate": 3.0567456186228694e-07, + "loss": 0.6868, + "step": 2993 + }, + { + "epoch": 0.923646459972235, + "grad_norm": 0.0, + "learning_rate": 3.032269238420582e-07, + "loss": 0.7246, + "step": 2994 + }, + { + "epoch": 0.9239549591238624, + "grad_norm": 0.0, + "learning_rate": 3.0078897391216387e-07, + "loss": 0.6767, + "step": 2995 + }, + { + "epoch": 0.9242634582754897, + "grad_norm": 0.0, + "learning_rate": 2.9836071450836776e-07, + "loss": 0.6672, + "step": 2996 + }, + { + "epoch": 0.9245719574271171, + "grad_norm": 0.0, + "learning_rate": 2.9594214805675703e-07, + "loss": 0.6284, + "step": 2997 + }, + { + "epoch": 0.9248804565787444, + "grad_norm": 0.0, + "learning_rate": 2.935332769737331e-07, + "loss": 0.703, + "step": 2998 + }, + { + "epoch": 0.9251889557303717, + "grad_norm": 0.0, + "learning_rate": 2.911341036660065e-07, + "loss": 0.7389, + "step": 2999 + }, + { + "epoch": 0.9254974548819991, + "grad_norm": 0.0, + "learning_rate": 2.887446305306074e-07, + "loss": 0.6566, + "step": 3000 + }, + { + "epoch": 0.9258059540336264, + "grad_norm": 0.0, + "learning_rate": 2.863648599548652e-07, + "loss": 0.6818, + "step": 3001 + }, + { + "epoch": 0.9261144531852538, + "grad_norm": 0.0, + "learning_rate": 2.839947943164223e-07, + "loss": 0.6706, + "step": 3002 + }, + { + "epoch": 0.9264229523368811, + "grad_norm": 0.0, + "learning_rate": 2.81634435983219e-07, + "loss": 0.6665, + "step": 3003 + }, + { + "epoch": 0.9267314514885084, + "grad_norm": 0.0, + "learning_rate": 2.792837873135035e-07, + "loss": 0.6966, + "step": 3004 + }, + { + "epoch": 0.9270399506401358, + "grad_norm": 0.0, + "learning_rate": 2.7694285065581807e-07, + "loss": 0.7047, + "step": 3005 + }, + { + "epoch": 0.9273484497917631, + "grad_norm": 0.0, + "learning_rate": 2.746116283490019e-07, + "loss": 0.7193, + "step": 3006 + }, + { + "epoch": 0.9276569489433905, + "grad_norm": 0.0, + "learning_rate": 2.722901227221919e-07, + "loss": 0.6711, + "step": 3007 + }, + { + "epoch": 0.9279654480950177, + "grad_norm": 0.0, + "learning_rate": 2.6997833609481384e-07, + "loss": 0.6848, + "step": 3008 + }, + { + "epoch": 0.928273947246645, + "grad_norm": 0.0, + "learning_rate": 2.6767627077658563e-07, + "loss": 0.6251, + "step": 3009 + }, + { + "epoch": 0.9285824463982724, + "grad_norm": 0.0, + "learning_rate": 2.653839290675109e-07, + "loss": 0.6504, + "step": 3010 + }, + { + "epoch": 0.9288909455498997, + "grad_norm": 0.0, + "learning_rate": 2.6310131325787634e-07, + "loss": 0.5879, + "step": 3011 + }, + { + "epoch": 0.929199444701527, + "grad_norm": 0.0, + "learning_rate": 2.608284256282567e-07, + "loss": 0.7294, + "step": 3012 + }, + { + "epoch": 0.9295079438531544, + "grad_norm": 0.0, + "learning_rate": 2.5856526844950324e-07, + "loss": 0.7099, + "step": 3013 + }, + { + "epoch": 0.9298164430047817, + "grad_norm": 0.0, + "learning_rate": 2.5631184398274834e-07, + "loss": 0.6724, + "step": 3014 + }, + { + "epoch": 0.9301249421564091, + "grad_norm": 0.0, + "learning_rate": 2.540681544793955e-07, + "loss": 0.7407, + "step": 3015 + }, + { + "epoch": 0.9304334413080364, + "grad_norm": 0.0, + "learning_rate": 2.518342021811271e-07, + "loss": 0.6528, + "step": 3016 + }, + { + "epoch": 0.9307419404596637, + "grad_norm": 0.0, + "learning_rate": 2.496099893198944e-07, + "loss": 0.7048, + "step": 3017 + }, + { + "epoch": 0.9310504396112911, + "grad_norm": 0.0, + "learning_rate": 2.4739551811791996e-07, + "loss": 0.7183, + "step": 3018 + }, + { + "epoch": 0.9313589387629184, + "grad_norm": 0.0, + "learning_rate": 2.451907907876916e-07, + "loss": 0.6742, + "step": 3019 + }, + { + "epoch": 0.9316674379145458, + "grad_norm": 0.0, + "learning_rate": 2.4299580953196066e-07, + "loss": 0.6479, + "step": 3020 + }, + { + "epoch": 0.9319759370661731, + "grad_norm": 0.0, + "learning_rate": 2.408105765437452e-07, + "loss": 0.6814, + "step": 3021 + }, + { + "epoch": 0.9322844362178004, + "grad_norm": 0.0, + "learning_rate": 2.386350940063209e-07, + "loss": 0.6509, + "step": 3022 + }, + { + "epoch": 0.9325929353694278, + "grad_norm": 0.0, + "learning_rate": 2.364693640932214e-07, + "loss": 0.6871, + "step": 3023 + }, + { + "epoch": 0.9329014345210551, + "grad_norm": 0.0, + "learning_rate": 2.3431338896823917e-07, + "loss": 0.6811, + "step": 3024 + }, + { + "epoch": 0.9332099336726823, + "grad_norm": 0.0, + "learning_rate": 2.321671707854156e-07, + "loss": 0.6711, + "step": 3025 + }, + { + "epoch": 0.9335184328243097, + "grad_norm": 0.0, + "learning_rate": 2.300307116890521e-07, + "loss": 0.6324, + "step": 3026 + }, + { + "epoch": 0.933826931975937, + "grad_norm": 0.0, + "learning_rate": 2.279040138136901e-07, + "loss": 0.6814, + "step": 3027 + }, + { + "epoch": 0.9341354311275644, + "grad_norm": 0.0, + "learning_rate": 2.2578707928412545e-07, + "loss": 0.6596, + "step": 3028 + }, + { + "epoch": 0.9344439302791917, + "grad_norm": 0.0, + "learning_rate": 2.2367991021539637e-07, + "loss": 0.679, + "step": 3029 + }, + { + "epoch": 0.934752429430819, + "grad_norm": 0.0, + "learning_rate": 2.215825087127843e-07, + "loss": 0.601, + "step": 3030 + }, + { + "epoch": 0.9350609285824464, + "grad_norm": 0.0, + "learning_rate": 2.1949487687181525e-07, + "loss": 0.691, + "step": 3031 + }, + { + "epoch": 0.9353694277340737, + "grad_norm": 0.0, + "learning_rate": 2.1741701677824966e-07, + "loss": 0.7404, + "step": 3032 + }, + { + "epoch": 0.9356779268857011, + "grad_norm": 0.0, + "learning_rate": 2.1534893050808802e-07, + "loss": 0.654, + "step": 3033 + }, + { + "epoch": 0.9359864260373284, + "grad_norm": 0.0, + "learning_rate": 2.1329062012756308e-07, + "loss": 0.622, + "step": 3034 + }, + { + "epoch": 0.9362949251889557, + "grad_norm": 0.0, + "learning_rate": 2.1124208769314424e-07, + "loss": 0.7046, + "step": 3035 + }, + { + "epoch": 0.9366034243405831, + "grad_norm": 0.0, + "learning_rate": 2.092033352515288e-07, + "loss": 0.6661, + "step": 3036 + }, + { + "epoch": 0.9369119234922104, + "grad_norm": 0.0, + "learning_rate": 2.0717436483964293e-07, + "loss": 0.7741, + "step": 3037 + }, + { + "epoch": 0.9372204226438378, + "grad_norm": 0.0, + "learning_rate": 2.0515517848464284e-07, + "loss": 0.6348, + "step": 3038 + }, + { + "epoch": 0.9375289217954651, + "grad_norm": 0.0, + "learning_rate": 2.031457782039037e-07, + "loss": 0.6986, + "step": 3039 + }, + { + "epoch": 0.9378374209470924, + "grad_norm": 0.0, + "learning_rate": 2.0114616600502845e-07, + "loss": 0.6558, + "step": 3040 + }, + { + "epoch": 0.9381459200987198, + "grad_norm": 0.0, + "learning_rate": 1.991563438858368e-07, + "loss": 0.6811, + "step": 3041 + }, + { + "epoch": 0.9384544192503471, + "grad_norm": 0.0, + "learning_rate": 1.9717631383437075e-07, + "loss": 0.6629, + "step": 3042 + }, + { + "epoch": 0.9387629184019743, + "grad_norm": 0.0, + "learning_rate": 1.952060778288889e-07, + "loss": 0.6583, + "step": 3043 + }, + { + "epoch": 0.9390714175536017, + "grad_norm": 0.0, + "learning_rate": 1.9324563783785888e-07, + "loss": 0.6649, + "step": 3044 + }, + { + "epoch": 0.939379916705229, + "grad_norm": 0.0, + "learning_rate": 1.9129499581996945e-07, + "loss": 0.7522, + "step": 3045 + }, + { + "epoch": 0.9396884158568564, + "grad_norm": 0.0, + "learning_rate": 1.893541537241128e-07, + "loss": 0.6718, + "step": 3046 + }, + { + "epoch": 0.9399969150084837, + "grad_norm": 0.0, + "learning_rate": 1.8742311348939558e-07, + "loss": 0.6987, + "step": 3047 + }, + { + "epoch": 0.940305414160111, + "grad_norm": 0.0, + "learning_rate": 1.855018770451278e-07, + "loss": 0.6032, + "step": 3048 + }, + { + "epoch": 0.9406139133117384, + "grad_norm": 0.0, + "learning_rate": 1.83590446310824e-07, + "loss": 0.701, + "step": 3049 + }, + { + "epoch": 0.9409224124633657, + "grad_norm": 0.0, + "learning_rate": 1.8168882319620663e-07, + "loss": 0.7369, + "step": 3050 + }, + { + "epoch": 0.9412309116149931, + "grad_norm": 0.0, + "learning_rate": 1.797970096011936e-07, + "loss": 0.7331, + "step": 3051 + }, + { + "epoch": 0.9415394107666204, + "grad_norm": 0.0, + "learning_rate": 1.779150074159064e-07, + "loss": 0.6953, + "step": 3052 + }, + { + "epoch": 0.9418479099182477, + "grad_norm": 0.0, + "learning_rate": 1.7604281852065973e-07, + "loss": 0.7046, + "step": 3053 + }, + { + "epoch": 0.9421564090698751, + "grad_norm": 0.0, + "learning_rate": 1.7418044478596742e-07, + "loss": 0.7163, + "step": 3054 + }, + { + "epoch": 0.9424649082215024, + "grad_norm": 0.0, + "learning_rate": 1.7232788807253654e-07, + "loss": 0.6754, + "step": 3055 + }, + { + "epoch": 0.9427734073731298, + "grad_norm": 0.0, + "learning_rate": 1.7048515023126323e-07, + "loss": 0.6923, + "step": 3056 + }, + { + "epoch": 0.9430819065247571, + "grad_norm": 0.0, + "learning_rate": 1.6865223310323586e-07, + "loss": 0.6807, + "step": 3057 + }, + { + "epoch": 0.9433904056763844, + "grad_norm": 0.0, + "learning_rate": 1.6682913851972959e-07, + "loss": 0.6595, + "step": 3058 + }, + { + "epoch": 0.9436989048280118, + "grad_norm": 0.0, + "learning_rate": 1.6501586830220852e-07, + "loss": 0.7242, + "step": 3059 + }, + { + "epoch": 0.944007403979639, + "grad_norm": 0.0, + "learning_rate": 1.6321242426231787e-07, + "loss": 0.9792, + "step": 3060 + }, + { + "epoch": 0.9443159031312663, + "grad_norm": 0.0, + "learning_rate": 1.6141880820188749e-07, + "loss": 0.6886, + "step": 3061 + }, + { + "epoch": 0.9446244022828937, + "grad_norm": 0.0, + "learning_rate": 1.596350219129261e-07, + "loss": 0.6266, + "step": 3062 + }, + { + "epoch": 0.944932901434521, + "grad_norm": 0.0, + "learning_rate": 1.5786106717762373e-07, + "loss": 0.6921, + "step": 3063 + }, + { + "epoch": 0.9452414005861484, + "grad_norm": 0.0, + "learning_rate": 1.5609694576834698e-07, + "loss": 0.682, + "step": 3064 + }, + { + "epoch": 0.9455498997377757, + "grad_norm": 0.0, + "learning_rate": 1.5434265944763717e-07, + "loss": 0.609, + "step": 3065 + }, + { + "epoch": 0.945858398889403, + "grad_norm": 0.0, + "learning_rate": 1.5259820996820884e-07, + "loss": 0.6423, + "step": 3066 + }, + { + "epoch": 0.9461668980410304, + "grad_norm": 0.0, + "learning_rate": 1.5086359907295124e-07, + "loss": 0.6499, + "step": 3067 + }, + { + "epoch": 0.9464753971926577, + "grad_norm": 0.0, + "learning_rate": 1.4913882849492022e-07, + "loss": 0.6977, + "step": 3068 + }, + { + "epoch": 0.9467838963442851, + "grad_norm": 0.0, + "learning_rate": 1.4742389995734519e-07, + "loss": 0.7011, + "step": 3069 + }, + { + "epoch": 0.9470923954959124, + "grad_norm": 0.0, + "learning_rate": 1.457188151736144e-07, + "loss": 0.6401, + "step": 3070 + }, + { + "epoch": 0.9474008946475397, + "grad_norm": 0.0, + "learning_rate": 1.4402357584728966e-07, + "loss": 0.7007, + "step": 3071 + }, + { + "epoch": 0.9477093937991671, + "grad_norm": 0.0, + "learning_rate": 1.423381836720894e-07, + "loss": 0.6718, + "step": 3072 + }, + { + "epoch": 0.9480178929507944, + "grad_norm": 0.0, + "learning_rate": 1.4066264033190002e-07, + "loss": 0.6217, + "step": 3073 + }, + { + "epoch": 0.9483263921024218, + "grad_norm": 0.0, + "learning_rate": 1.3899694750076241e-07, + "loss": 0.6629, + "step": 3074 + }, + { + "epoch": 0.9486348912540491, + "grad_norm": 0.0, + "learning_rate": 1.3734110684287761e-07, + "loss": 0.6666, + "step": 3075 + }, + { + "epoch": 0.9489433904056764, + "grad_norm": 0.0, + "learning_rate": 1.3569512001260444e-07, + "loss": 0.7184, + "step": 3076 + }, + { + "epoch": 0.9492518895573037, + "grad_norm": 0.0, + "learning_rate": 1.3405898865445522e-07, + "loss": 0.6494, + "step": 3077 + }, + { + "epoch": 0.949560388708931, + "grad_norm": 0.0, + "learning_rate": 1.3243271440309679e-07, + "loss": 0.7152, + "step": 3078 + }, + { + "epoch": 0.9498688878605583, + "grad_norm": 0.0, + "learning_rate": 1.3081629888334834e-07, + "loss": 0.6792, + "step": 3079 + }, + { + "epoch": 0.9501773870121857, + "grad_norm": 0.0, + "learning_rate": 1.292097437101747e-07, + "loss": 0.6981, + "step": 3080 + }, + { + "epoch": 0.950485886163813, + "grad_norm": 0.0, + "learning_rate": 1.276130504886963e-07, + "loss": 0.6675, + "step": 3081 + }, + { + "epoch": 0.9507943853154404, + "grad_norm": 0.0, + "learning_rate": 1.260262208141738e-07, + "loss": 0.6856, + "step": 3082 + }, + { + "epoch": 0.9511028844670677, + "grad_norm": 0.0, + "learning_rate": 1.2444925627201786e-07, + "loss": 0.7035, + "step": 3083 + }, + { + "epoch": 0.951411383618695, + "grad_norm": 0.0, + "learning_rate": 1.2288215843777928e-07, + "loss": 0.652, + "step": 3084 + }, + { + "epoch": 0.9517198827703224, + "grad_norm": 0.0, + "learning_rate": 1.2132492887715454e-07, + "loss": 0.7783, + "step": 3085 + }, + { + "epoch": 0.9520283819219497, + "grad_norm": 0.0, + "learning_rate": 1.1977756914597792e-07, + "loss": 0.682, + "step": 3086 + }, + { + "epoch": 0.9523368810735771, + "grad_norm": 0.0, + "learning_rate": 1.1824008079022286e-07, + "loss": 0.6684, + "step": 3087 + }, + { + "epoch": 0.9526453802252044, + "grad_norm": 0.0, + "learning_rate": 1.1671246534600167e-07, + "loss": 0.6782, + "step": 3088 + }, + { + "epoch": 0.9529538793768317, + "grad_norm": 0.0, + "learning_rate": 1.1519472433956125e-07, + "loss": 0.6979, + "step": 3089 + }, + { + "epoch": 0.9532623785284591, + "grad_norm": 0.0, + "learning_rate": 1.136868592872864e-07, + "loss": 0.6788, + "step": 3090 + }, + { + "epoch": 0.9535708776800864, + "grad_norm": 0.0, + "learning_rate": 1.1218887169568871e-07, + "loss": 0.7168, + "step": 3091 + }, + { + "epoch": 0.9538793768317138, + "grad_norm": 0.0, + "learning_rate": 1.107007630614143e-07, + "loss": 0.6758, + "step": 3092 + }, + { + "epoch": 0.9541878759833411, + "grad_norm": 0.0, + "learning_rate": 1.0922253487124279e-07, + "loss": 0.7599, + "step": 3093 + }, + { + "epoch": 0.9544963751349684, + "grad_norm": 0.0, + "learning_rate": 1.0775418860207498e-07, + "loss": 0.6001, + "step": 3094 + }, + { + "epoch": 0.9548048742865957, + "grad_norm": 0.0, + "learning_rate": 1.0629572572094515e-07, + "loss": 0.6489, + "step": 3095 + }, + { + "epoch": 0.955113373438223, + "grad_norm": 0.0, + "learning_rate": 1.048471476850077e-07, + "loss": 0.6668, + "step": 3096 + }, + { + "epoch": 0.9554218725898503, + "grad_norm": 0.0, + "learning_rate": 1.0340845594154603e-07, + "loss": 0.6543, + "step": 3097 + }, + { + "epoch": 0.9557303717414777, + "grad_norm": 0.0, + "learning_rate": 1.0197965192796145e-07, + "loss": 0.7041, + "step": 3098 + }, + { + "epoch": 0.956038870893105, + "grad_norm": 0.0, + "learning_rate": 1.0056073707177982e-07, + "loss": 0.7049, + "step": 3099 + }, + { + "epoch": 0.9563473700447324, + "grad_norm": 0.0, + "learning_rate": 9.915171279064606e-08, + "loss": 0.7802, + "step": 3100 + }, + { + "epoch": 0.9566558691963597, + "grad_norm": 0.0, + "learning_rate": 9.775258049232072e-08, + "loss": 0.6623, + "step": 3101 + }, + { + "epoch": 0.956964368347987, + "grad_norm": 0.0, + "learning_rate": 9.636334157468563e-08, + "loss": 0.7155, + "step": 3102 + }, + { + "epoch": 0.9572728674996144, + "grad_norm": 0.0, + "learning_rate": 9.498399742573495e-08, + "loss": 0.7015, + "step": 3103 + }, + { + "epoch": 0.9575813666512417, + "grad_norm": 0.0, + "learning_rate": 9.361454942357628e-08, + "loss": 0.6937, + "step": 3104 + }, + { + "epoch": 0.9578898658028691, + "grad_norm": 0.0, + "learning_rate": 9.225499893643297e-08, + "loss": 0.6619, + "step": 3105 + }, + { + "epoch": 0.9581983649544964, + "grad_norm": 0.0, + "learning_rate": 9.090534732263734e-08, + "loss": 0.6644, + "step": 3106 + }, + { + "epoch": 0.9585068641061237, + "grad_norm": 0.0, + "learning_rate": 8.956559593063408e-08, + "loss": 0.6596, + "step": 3107 + }, + { + "epoch": 0.9588153632577511, + "grad_norm": 0.0, + "learning_rate": 8.823574609897134e-08, + "loss": 0.6561, + "step": 3108 + }, + { + "epoch": 0.9591238624093784, + "grad_norm": 0.0, + "learning_rate": 8.691579915631299e-08, + "loss": 0.6519, + "step": 3109 + }, + { + "epoch": 0.9594323615610058, + "grad_norm": 0.0, + "learning_rate": 8.560575642141966e-08, + "loss": 0.7131, + "step": 3110 + }, + { + "epoch": 0.9597408607126331, + "grad_norm": 0.0, + "learning_rate": 8.430561920316438e-08, + "loss": 0.755, + "step": 3111 + }, + { + "epoch": 0.9600493598642603, + "grad_norm": 0.0, + "learning_rate": 8.301538880051808e-08, + "loss": 0.6812, + "step": 3112 + }, + { + "epoch": 0.9603578590158877, + "grad_norm": 0.0, + "learning_rate": 8.173506650255625e-08, + "loss": 0.6656, + "step": 3113 + }, + { + "epoch": 0.960666358167515, + "grad_norm": 0.0, + "learning_rate": 8.046465358845568e-08, + "loss": 0.6969, + "step": 3114 + }, + { + "epoch": 0.9609748573191423, + "grad_norm": 0.0, + "learning_rate": 7.920415132748993e-08, + "loss": 0.6985, + "step": 3115 + }, + { + "epoch": 0.9612833564707697, + "grad_norm": 0.0, + "learning_rate": 7.795356097903495e-08, + "loss": 0.654, + "step": 3116 + }, + { + "epoch": 0.961591855622397, + "grad_norm": 0.0, + "learning_rate": 7.671288379256015e-08, + "loss": 0.783, + "step": 3117 + }, + { + "epoch": 0.9619003547740244, + "grad_norm": 0.0, + "learning_rate": 7.548212100763063e-08, + "loss": 0.7332, + "step": 3118 + }, + { + "epoch": 0.9622088539256517, + "grad_norm": 0.0, + "learning_rate": 7.426127385390947e-08, + "loss": 0.6841, + "step": 3119 + }, + { + "epoch": 0.962517353077279, + "grad_norm": 0.0, + "learning_rate": 7.30503435511487e-08, + "loss": 0.6868, + "step": 3120 + }, + { + "epoch": 0.9628258522289064, + "grad_norm": 0.0, + "learning_rate": 7.184933130919614e-08, + "loss": 0.6421, + "step": 3121 + }, + { + "epoch": 0.9631343513805337, + "grad_norm": 0.0, + "learning_rate": 7.065823832798524e-08, + "loss": 0.7666, + "step": 3122 + }, + { + "epoch": 0.963442850532161, + "grad_norm": 0.0, + "learning_rate": 6.947706579754632e-08, + "loss": 0.701, + "step": 3123 + }, + { + "epoch": 0.9637513496837884, + "grad_norm": 0.0, + "learning_rate": 6.830581489799204e-08, + "loss": 1.0016, + "step": 3124 + }, + { + "epoch": 0.9640598488354157, + "grad_norm": 0.0, + "learning_rate": 6.714448679952524e-08, + "loss": 0.6931, + "step": 3125 + }, + { + "epoch": 0.9643683479870431, + "grad_norm": 0.0, + "learning_rate": 6.599308266243443e-08, + "loss": 0.6796, + "step": 3126 + }, + { + "epoch": 0.9646768471386704, + "grad_norm": 0.0, + "learning_rate": 6.485160363709053e-08, + "loss": 0.6407, + "step": 3127 + }, + { + "epoch": 0.9649853462902978, + "grad_norm": 0.0, + "learning_rate": 6.37200508639535e-08, + "loss": 0.7245, + "step": 3128 + }, + { + "epoch": 0.965293845441925, + "grad_norm": 0.0, + "learning_rate": 6.259842547356231e-08, + "loss": 0.6961, + "step": 3129 + }, + { + "epoch": 0.9656023445935523, + "grad_norm": 0.0, + "learning_rate": 6.148672858653615e-08, + "loss": 0.7058, + "step": 3130 + }, + { + "epoch": 0.9659108437451797, + "grad_norm": 0.0, + "learning_rate": 6.038496131357874e-08, + "loss": 0.6755, + "step": 3131 + }, + { + "epoch": 0.966219342896807, + "grad_norm": 0.0, + "learning_rate": 5.929312475546845e-08, + "loss": 0.7858, + "step": 3132 + }, + { + "epoch": 0.9665278420484343, + "grad_norm": 0.0, + "learning_rate": 5.8211220003067114e-08, + "loss": 0.6399, + "step": 3133 + }, + { + "epoch": 0.9668363412000617, + "grad_norm": 0.0, + "learning_rate": 5.7139248137307865e-08, + "loss": 0.6506, + "step": 3134 + }, + { + "epoch": 0.967144840351689, + "grad_norm": 0.0, + "learning_rate": 5.607721022920398e-08, + "loss": 0.6636, + "step": 3135 + }, + { + "epoch": 0.9674533395033164, + "grad_norm": 0.0, + "learning_rate": 5.5025107339842234e-08, + "loss": 0.7279, + "step": 3136 + }, + { + "epoch": 0.9677618386549437, + "grad_norm": 0.0, + "learning_rate": 5.3982940520382885e-08, + "loss": 0.659, + "step": 3137 + }, + { + "epoch": 0.968070337806571, + "grad_norm": 0.0, + "learning_rate": 5.295071081206194e-08, + "loss": 0.6571, + "step": 3138 + }, + { + "epoch": 0.9683788369581984, + "grad_norm": 0.0, + "learning_rate": 5.1928419246181085e-08, + "loss": 0.6417, + "step": 3139 + }, + { + "epoch": 0.9686873361098257, + "grad_norm": 0.0, + "learning_rate": 5.091606684411998e-08, + "loss": 0.6781, + "step": 3140 + }, + { + "epoch": 0.968995835261453, + "grad_norm": 0.0, + "learning_rate": 4.9913654617322894e-08, + "loss": 0.7228, + "step": 3141 + }, + { + "epoch": 0.9693043344130804, + "grad_norm": 0.0, + "learning_rate": 4.892118356730313e-08, + "loss": 0.9788, + "step": 3142 + }, + { + "epoch": 0.9696128335647077, + "grad_norm": 0.0, + "learning_rate": 4.79386546856464e-08, + "loss": 0.6991, + "step": 3143 + }, + { + "epoch": 0.9699213327163351, + "grad_norm": 0.0, + "learning_rate": 4.696606895399858e-08, + "loss": 0.6679, + "step": 3144 + }, + { + "epoch": 0.9702298318679624, + "grad_norm": 0.0, + "learning_rate": 4.60034273440757e-08, + "loss": 0.7438, + "step": 3145 + }, + { + "epoch": 0.9705383310195898, + "grad_norm": 0.0, + "learning_rate": 4.505073081765843e-08, + "loss": 0.6752, + "step": 3146 + }, + { + "epoch": 0.970846830171217, + "grad_norm": 0.0, + "learning_rate": 4.4107980326589806e-08, + "loss": 0.6864, + "step": 3147 + }, + { + "epoch": 0.9711553293228443, + "grad_norm": 0.0, + "learning_rate": 4.317517681277528e-08, + "loss": 0.6253, + "step": 3148 + }, + { + "epoch": 0.9714638284744717, + "grad_norm": 0.0, + "learning_rate": 4.225232120818268e-08, + "loss": 0.6613, + "step": 3149 + }, + { + "epoch": 0.971772327626099, + "grad_norm": 0.0, + "learning_rate": 4.133941443484335e-08, + "loss": 0.6816, + "step": 3150 + }, + { + "epoch": 0.9720808267777263, + "grad_norm": 0.0, + "learning_rate": 4.043645740484436e-08, + "loss": 0.6524, + "step": 3151 + }, + { + "epoch": 0.9723893259293537, + "grad_norm": 0.0, + "learning_rate": 3.95434510203363e-08, + "loss": 0.6208, + "step": 3152 + }, + { + "epoch": 0.972697825080981, + "grad_norm": 0.0, + "learning_rate": 3.866039617352324e-08, + "loss": 0.7372, + "step": 3153 + }, + { + "epoch": 0.9730063242326084, + "grad_norm": 0.0, + "learning_rate": 3.778729374667278e-08, + "loss": 0.7214, + "step": 3154 + }, + { + "epoch": 0.9733148233842357, + "grad_norm": 0.0, + "learning_rate": 3.6924144612102695e-08, + "loss": 0.6511, + "step": 3155 + }, + { + "epoch": 0.973623322535863, + "grad_norm": 0.0, + "learning_rate": 3.6070949632190934e-08, + "loss": 0.6205, + "step": 3156 + }, + { + "epoch": 0.9739318216874904, + "grad_norm": 0.0, + "learning_rate": 3.5227709659367834e-08, + "loss": 0.7322, + "step": 3157 + }, + { + "epoch": 0.9742403208391177, + "grad_norm": 0.0, + "learning_rate": 3.439442553611727e-08, + "loss": 0.9699, + "step": 3158 + }, + { + "epoch": 0.974548819990745, + "grad_norm": 0.0, + "learning_rate": 3.3571098094978825e-08, + "loss": 0.6859, + "step": 3159 + }, + { + "epoch": 0.9748573191423724, + "grad_norm": 0.0, + "learning_rate": 3.275772815854228e-08, + "loss": 0.69, + "step": 3160 + }, + { + "epoch": 0.9751658182939997, + "grad_norm": 0.0, + "learning_rate": 3.195431653944869e-08, + "loss": 0.6999, + "step": 3161 + }, + { + "epoch": 0.9754743174456271, + "grad_norm": 0.0, + "learning_rate": 3.1160864040391534e-08, + "loss": 0.6989, + "step": 3162 + }, + { + "epoch": 0.9757828165972544, + "grad_norm": 0.0, + "learning_rate": 3.0377371454112234e-08, + "loss": 0.6986, + "step": 3163 + }, + { + "epoch": 0.9760913157488816, + "grad_norm": 0.0, + "learning_rate": 2.9603839563400183e-08, + "loss": 0.6149, + "step": 3164 + }, + { + "epoch": 0.976399814900509, + "grad_norm": 0.0, + "learning_rate": 2.8840269141097165e-08, + "loss": 0.729, + "step": 3165 + }, + { + "epoch": 0.9767083140521363, + "grad_norm": 0.0, + "learning_rate": 2.8086660950088497e-08, + "loss": 0.6686, + "step": 3166 + }, + { + "epoch": 0.9770168132037637, + "grad_norm": 0.0, + "learning_rate": 2.7343015743307443e-08, + "loss": 0.6762, + "step": 3167 + }, + { + "epoch": 0.977325312355391, + "grad_norm": 0.0, + "learning_rate": 2.660933426373413e-08, + "loss": 0.7482, + "step": 3168 + }, + { + "epoch": 0.9776338115070183, + "grad_norm": 0.0, + "learning_rate": 2.5885617244392204e-08, + "loss": 0.7199, + "step": 3169 + }, + { + "epoch": 0.9779423106586457, + "grad_norm": 0.0, + "learning_rate": 2.5171865408352147e-08, + "loss": 0.7257, + "step": 3170 + }, + { + "epoch": 0.978250809810273, + "grad_norm": 0.0, + "learning_rate": 2.4468079468724647e-08, + "loss": 0.6469, + "step": 3171 + }, + { + "epoch": 0.9785593089619004, + "grad_norm": 0.0, + "learning_rate": 2.3774260128667238e-08, + "loss": 0.8081, + "step": 3172 + }, + { + "epoch": 0.9788678081135277, + "grad_norm": 0.0, + "learning_rate": 2.3090408081376525e-08, + "loss": 0.6983, + "step": 3173 + }, + { + "epoch": 0.979176307265155, + "grad_norm": 0.0, + "learning_rate": 2.2416524010092644e-08, + "loss": 0.6102, + "step": 3174 + }, + { + "epoch": 0.9794848064167824, + "grad_norm": 0.0, + "learning_rate": 2.1752608588097024e-08, + "loss": 0.6399, + "step": 3175 + }, + { + "epoch": 0.9797933055684097, + "grad_norm": 0.0, + "learning_rate": 2.1098662478710173e-08, + "loss": 0.6928, + "step": 3176 + }, + { + "epoch": 0.980101804720037, + "grad_norm": 0.0, + "learning_rate": 2.0454686335292794e-08, + "loss": 0.8059, + "step": 3177 + }, + { + "epoch": 0.9804103038716644, + "grad_norm": 0.0, + "learning_rate": 1.9820680801243554e-08, + "loss": 0.6157, + "step": 3178 + }, + { + "epoch": 0.9807188030232917, + "grad_norm": 0.0, + "learning_rate": 1.9196646510001303e-08, + "loss": 0.6005, + "step": 3179 + }, + { + "epoch": 0.9810273021749191, + "grad_norm": 0.0, + "learning_rate": 1.8582584085041765e-08, + "loss": 0.7006, + "step": 3180 + }, + { + "epoch": 0.9813358013265463, + "grad_norm": 0.0, + "learning_rate": 1.797849413987529e-08, + "loss": 0.7217, + "step": 3181 + }, + { + "epoch": 0.9816443004781736, + "grad_norm": 0.0, + "learning_rate": 1.7384377278053533e-08, + "loss": 0.7453, + "step": 3182 + }, + { + "epoch": 0.981952799629801, + "grad_norm": 0.0, + "learning_rate": 1.680023409316056e-08, + "loss": 0.7559, + "step": 3183 + }, + { + "epoch": 0.9822612987814283, + "grad_norm": 0.0, + "learning_rate": 1.6226065168816198e-08, + "loss": 0.6353, + "step": 3184 + }, + { + "epoch": 0.9825697979330557, + "grad_norm": 0.0, + "learning_rate": 1.5661871078674895e-08, + "loss": 0.7237, + "step": 3185 + }, + { + "epoch": 0.982878297084683, + "grad_norm": 0.0, + "learning_rate": 1.510765238642575e-08, + "loss": 0.7412, + "step": 3186 + }, + { + "epoch": 0.9831867962363103, + "grad_norm": 0.0, + "learning_rate": 1.4563409645792503e-08, + "loss": 0.6923, + "step": 3187 + }, + { + "epoch": 0.9834952953879377, + "grad_norm": 0.0, + "learning_rate": 1.4029143400529077e-08, + "loss": 0.662, + "step": 3188 + }, + { + "epoch": 0.983803794539565, + "grad_norm": 0.0, + "learning_rate": 1.3504854184422932e-08, + "loss": 0.5827, + "step": 3189 + }, + { + "epoch": 0.9841122936911924, + "grad_norm": 0.0, + "learning_rate": 1.2990542521295057e-08, + "loss": 0.6758, + "step": 3190 + }, + { + "epoch": 0.9844207928428197, + "grad_norm": 0.0, + "learning_rate": 1.2486208924996634e-08, + "loss": 0.7292, + "step": 3191 + }, + { + "epoch": 0.984729291994447, + "grad_norm": 0.0, + "learning_rate": 1.1991853899409044e-08, + "loss": 0.6786, + "step": 3192 + }, + { + "epoch": 0.9850377911460744, + "grad_norm": 0.0, + "learning_rate": 1.1507477938443868e-08, + "loss": 0.6944, + "step": 3193 + }, + { + "epoch": 0.9853462902977017, + "grad_norm": 0.0, + "learning_rate": 1.1033081526045098e-08, + "loss": 0.6941, + "step": 3194 + }, + { + "epoch": 0.985654789449329, + "grad_norm": 0.0, + "learning_rate": 1.0568665136183597e-08, + "loss": 0.6545, + "step": 3195 + }, + { + "epoch": 0.9859632886009564, + "grad_norm": 0.0, + "learning_rate": 1.0114229232859319e-08, + "loss": 0.7082, + "step": 3196 + }, + { + "epoch": 0.9862717877525837, + "grad_norm": 0.0, + "learning_rate": 9.669774270102406e-09, + "loss": 0.6543, + "step": 3197 + }, + { + "epoch": 0.9865802869042111, + "grad_norm": 0.0, + "learning_rate": 9.235300691969872e-09, + "loss": 0.6542, + "step": 3198 + }, + { + "epoch": 0.9868887860558383, + "grad_norm": 0.0, + "learning_rate": 8.81080893254449e-09, + "loss": 0.6826, + "step": 3199 + }, + { + "epoch": 0.9871972852074656, + "grad_norm": 0.0, + "learning_rate": 8.396299415939223e-09, + "loss": 0.6522, + "step": 3200 + }, + { + "epoch": 0.987505784359093, + "grad_norm": 0.0, + "learning_rate": 7.991772556291689e-09, + "loss": 0.6831, + "step": 3201 + }, + { + "epoch": 0.9878142835107203, + "grad_norm": 0.0, + "learning_rate": 7.597228757767472e-09, + "loss": 0.6952, + "step": 3202 + }, + { + "epoch": 0.9881227826623477, + "grad_norm": 0.0, + "learning_rate": 7.212668414556811e-09, + "loss": 0.7034, + "step": 3203 + }, + { + "epoch": 0.988431281813975, + "grad_norm": 0.0, + "learning_rate": 6.8380919108757e-09, + "loss": 0.7407, + "step": 3204 + }, + { + "epoch": 0.9887397809656023, + "grad_norm": 0.0, + "learning_rate": 6.473499620965884e-09, + "loss": 0.7395, + "step": 3205 + }, + { + "epoch": 0.9890482801172297, + "grad_norm": 0.0, + "learning_rate": 6.11889190909043e-09, + "loss": 0.7463, + "step": 3206 + }, + { + "epoch": 0.989356779268857, + "grad_norm": 0.0, + "learning_rate": 5.774269129543708e-09, + "loss": 0.7032, + "step": 3207 + }, + { + "epoch": 0.9896652784204844, + "grad_norm": 0.0, + "learning_rate": 5.439631626635855e-09, + "loss": 0.6845, + "step": 3208 + }, + { + "epoch": 0.9899737775721117, + "grad_norm": 0.0, + "learning_rate": 5.114979734707204e-09, + "loss": 0.6526, + "step": 3209 + }, + { + "epoch": 0.990282276723739, + "grad_norm": 0.0, + "learning_rate": 4.8003137781194035e-09, + "loss": 0.6862, + "step": 3210 + }, + { + "epoch": 0.9905907758753664, + "grad_norm": 0.0, + "learning_rate": 4.495634071254307e-09, + "loss": 0.6675, + "step": 3211 + }, + { + "epoch": 0.9908992750269937, + "grad_norm": 0.0, + "learning_rate": 4.200940918520635e-09, + "loss": 0.6661, + "step": 3212 + }, + { + "epoch": 0.991207774178621, + "grad_norm": 0.0, + "learning_rate": 3.916234614346204e-09, + "loss": 0.685, + "step": 3213 + }, + { + "epoch": 0.9915162733302484, + "grad_norm": 0.0, + "learning_rate": 3.6415154431845846e-09, + "loss": 0.6182, + "step": 3214 + }, + { + "epoch": 0.9918247724818757, + "grad_norm": 0.0, + "learning_rate": 3.3767836795062235e-09, + "loss": 0.7722, + "step": 3215 + }, + { + "epoch": 0.992133271633503, + "grad_norm": 0.0, + "learning_rate": 3.1220395878084343e-09, + "loss": 0.6592, + "step": 3216 + }, + { + "epoch": 0.9924417707851303, + "grad_norm": 0.0, + "learning_rate": 2.8772834226054036e-09, + "loss": 0.6126, + "step": 3217 + }, + { + "epoch": 0.9927502699367576, + "grad_norm": 0.0, + "learning_rate": 2.6425154284359655e-09, + "loss": 0.7914, + "step": 3218 + }, + { + "epoch": 0.993058769088385, + "grad_norm": 0.0, + "learning_rate": 2.4177358398558283e-09, + "loss": 0.6343, + "step": 3219 + }, + { + "epoch": 0.9933672682400123, + "grad_norm": 0.0, + "learning_rate": 2.2029448814431254e-09, + "loss": 0.6759, + "step": 3220 + }, + { + "epoch": 0.9936757673916397, + "grad_norm": 0.0, + "learning_rate": 1.9981427677995267e-09, + "loss": 0.7549, + "step": 3221 + }, + { + "epoch": 0.993984266543267, + "grad_norm": 0.0, + "learning_rate": 1.8033297035402463e-09, + "loss": 0.7574, + "step": 3222 + }, + { + "epoch": 0.9942927656948943, + "grad_norm": 0.0, + "learning_rate": 1.6185058833062538e-09, + "loss": 0.6929, + "step": 3223 + }, + { + "epoch": 0.9946012648465217, + "grad_norm": 0.0, + "learning_rate": 1.4436714917542839e-09, + "loss": 0.6079, + "step": 3224 + }, + { + "epoch": 0.994909763998149, + "grad_norm": 0.0, + "learning_rate": 1.278826703563496e-09, + "loss": 0.6822, + "step": 3225 + }, + { + "epoch": 0.9952182631497763, + "grad_norm": 0.0, + "learning_rate": 1.1239716834310354e-09, + "loss": 0.9537, + "step": 3226 + }, + { + "epoch": 0.9955267623014037, + "grad_norm": 0.0, + "learning_rate": 9.791065860720316e-10, + "loss": 0.6772, + "step": 3227 + }, + { + "epoch": 0.995835261453031, + "grad_norm": 0.0, + "learning_rate": 8.442315562229298e-10, + "loss": 0.686, + "step": 3228 + }, + { + "epoch": 0.9961437606046584, + "grad_norm": 0.0, + "learning_rate": 7.193467286370492e-10, + "loss": 0.68, + "step": 3229 + }, + { + "epoch": 0.9964522597562857, + "grad_norm": 0.0, + "learning_rate": 6.044522280890252e-10, + "loss": 0.6778, + "step": 3230 + }, + { + "epoch": 0.996760758907913, + "grad_norm": 0.0, + "learning_rate": 4.995481693681469e-10, + "loss": 0.6982, + "step": 3231 + }, + { + "epoch": 0.9970692580595404, + "grad_norm": 0.0, + "learning_rate": 4.0463465728723947e-10, + "loss": 0.6542, + "step": 3232 + }, + { + "epoch": 0.9973777572111676, + "grad_norm": 0.0, + "learning_rate": 3.1971178667267176e-10, + "loss": 0.6921, + "step": 3233 + }, + { + "epoch": 0.997686256362795, + "grad_norm": 0.0, + "learning_rate": 2.4477964237212823e-10, + "loss": 0.71, + "step": 3234 + }, + { + "epoch": 0.9979947555144223, + "grad_norm": 0.0, + "learning_rate": 1.7983829925016793e-10, + "loss": 0.7023, + "step": 3235 + }, + { + "epoch": 0.9983032546660496, + "grad_norm": 0.0, + "learning_rate": 1.248878221904448e-10, + "loss": 0.6873, + "step": 3236 + }, + { + "epoch": 0.998611753817677, + "grad_norm": 0.0, + "learning_rate": 7.992826609459769e-11, + "loss": 0.6443, + "step": 3237 + }, + { + "epoch": 0.9989202529693043, + "grad_norm": 0.0, + "learning_rate": 4.4959675881139916e-11, + "loss": 0.6832, + "step": 3238 + }, + { + "epoch": 0.9992287521209317, + "grad_norm": 0.0, + "learning_rate": 1.9982086487679853e-11, + "loss": 0.6543, + "step": 3239 + }, + { + "epoch": 0.999537251272559, + "grad_norm": 0.0, + "learning_rate": 4.995522869810643e-12, + "loss": 0.7005, + "step": 3240 + }, + { + "epoch": 0.9998457504241863, + "grad_norm": 0.0, + "learning_rate": 0.0, + "loss": 0.8583, + "step": 3241 + }, + { + "epoch": 0.9998457504241863, + "step": 3241, + "total_flos": 1.4116677844331397e+19, + "train_loss": 0.5099010296678588, + "train_runtime": 75843.298, + "train_samples_per_second": 5.47, + "train_steps_per_second": 0.043 + } + ], + "logging_steps": 1.0, + "max_steps": 3241, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4116677844331397e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}