|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.251700680272109, |
|
"eval_steps": 184, |
|
"global_step": 920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0013605442176870747, |
|
"grad_norm": 0.24701461672354935, |
|
"learning_rate": 1.36986301369863e-07, |
|
"loss": 1.6736, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013605442176870747, |
|
"eval_loss": 1.7904456853866577, |
|
"eval_runtime": 75.582, |
|
"eval_samples_per_second": 53.888, |
|
"eval_steps_per_second": 6.748, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0027210884353741495, |
|
"grad_norm": 0.21437113339785932, |
|
"learning_rate": 2.73972602739726e-07, |
|
"loss": 1.5884, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004081632653061225, |
|
"grad_norm": 0.3228668200940542, |
|
"learning_rate": 4.1095890410958903e-07, |
|
"loss": 1.6821, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005442176870748299, |
|
"grad_norm": 0.19408831616689562, |
|
"learning_rate": 5.47945205479452e-07, |
|
"loss": 1.8146, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.006802721088435374, |
|
"grad_norm": 0.18446566294319683, |
|
"learning_rate": 6.849315068493151e-07, |
|
"loss": 1.6316, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00816326530612245, |
|
"grad_norm": 0.26237580245842185, |
|
"learning_rate": 8.219178082191781e-07, |
|
"loss": 1.7544, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009523809523809525, |
|
"grad_norm": 0.1659195721310037, |
|
"learning_rate": 9.589041095890411e-07, |
|
"loss": 1.8325, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.010884353741496598, |
|
"grad_norm": 0.14112003912821341, |
|
"learning_rate": 1.095890410958904e-06, |
|
"loss": 1.8533, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.012244897959183673, |
|
"grad_norm": 0.22295406766041573, |
|
"learning_rate": 1.2328767123287673e-06, |
|
"loss": 1.7309, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.013605442176870748, |
|
"grad_norm": 0.20855919407710727, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 1.4983, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014965986394557823, |
|
"grad_norm": 0.39634451341504184, |
|
"learning_rate": 1.5068493150684932e-06, |
|
"loss": 1.71, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0163265306122449, |
|
"grad_norm": 0.2918296142957545, |
|
"learning_rate": 1.6438356164383561e-06, |
|
"loss": 1.6983, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.017687074829931974, |
|
"grad_norm": 0.3333249210865954, |
|
"learning_rate": 1.7808219178082193e-06, |
|
"loss": 1.6435, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01904761904761905, |
|
"grad_norm": 0.3288930419026758, |
|
"learning_rate": 1.9178082191780823e-06, |
|
"loss": 1.9445, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02040816326530612, |
|
"grad_norm": 0.3311742875918285, |
|
"learning_rate": 2.0547945205479454e-06, |
|
"loss": 1.8007, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.021768707482993196, |
|
"grad_norm": 0.24222843258421317, |
|
"learning_rate": 2.191780821917808e-06, |
|
"loss": 1.8698, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02312925170068027, |
|
"grad_norm": 0.2863215351075517, |
|
"learning_rate": 2.3287671232876713e-06, |
|
"loss": 1.8295, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.024489795918367346, |
|
"grad_norm": 0.37000991286313667, |
|
"learning_rate": 2.4657534246575345e-06, |
|
"loss": 1.7748, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02585034013605442, |
|
"grad_norm": 0.305345665951125, |
|
"learning_rate": 2.6027397260273973e-06, |
|
"loss": 1.7799, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.027210884353741496, |
|
"grad_norm": 0.276577651886119, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"loss": 1.5892, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02857142857142857, |
|
"grad_norm": 0.40747672861545675, |
|
"learning_rate": 2.876712328767123e-06, |
|
"loss": 1.797, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.029931972789115645, |
|
"grad_norm": 0.1663214297242309, |
|
"learning_rate": 3.0136986301369864e-06, |
|
"loss": 1.8254, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.031292517006802724, |
|
"grad_norm": 0.34875514252556655, |
|
"learning_rate": 3.1506849315068495e-06, |
|
"loss": 1.5967, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0326530612244898, |
|
"grad_norm": 0.31488445168418, |
|
"learning_rate": 3.2876712328767123e-06, |
|
"loss": 1.8033, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.034013605442176874, |
|
"grad_norm": 0.9585107293220959, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"loss": 1.9985, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03537414965986395, |
|
"grad_norm": 0.4719659909416967, |
|
"learning_rate": 3.5616438356164386e-06, |
|
"loss": 1.6673, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.036734693877551024, |
|
"grad_norm": 0.5206398105101208, |
|
"learning_rate": 3.6986301369863014e-06, |
|
"loss": 1.7832, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0380952380952381, |
|
"grad_norm": 0.5525391513084628, |
|
"learning_rate": 3.8356164383561645e-06, |
|
"loss": 1.8033, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03945578231292517, |
|
"grad_norm": 0.5864368554335787, |
|
"learning_rate": 3.972602739726027e-06, |
|
"loss": 1.637, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 0.18211390682659326, |
|
"learning_rate": 4.109589041095891e-06, |
|
"loss": 1.6996, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04217687074829932, |
|
"grad_norm": 0.26324481615027445, |
|
"learning_rate": 4.246575342465754e-06, |
|
"loss": 1.7077, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04353741496598639, |
|
"grad_norm": 0.487665052197852, |
|
"learning_rate": 4.383561643835616e-06, |
|
"loss": 1.5757, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.044897959183673466, |
|
"grad_norm": 0.5110956602957011, |
|
"learning_rate": 4.52054794520548e-06, |
|
"loss": 1.6525, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04625850340136054, |
|
"grad_norm": 0.41488349790070234, |
|
"learning_rate": 4.657534246575343e-06, |
|
"loss": 1.7469, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 0.3205549447320179, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"loss": 1.6621, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04897959183673469, |
|
"grad_norm": 0.2759804237950767, |
|
"learning_rate": 4.931506849315069e-06, |
|
"loss": 1.8364, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.050340136054421766, |
|
"grad_norm": 0.4070079284746193, |
|
"learning_rate": 5.068493150684932e-06, |
|
"loss": 1.7928, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05170068027210884, |
|
"grad_norm": 0.3162452736080499, |
|
"learning_rate": 5.2054794520547945e-06, |
|
"loss": 1.8174, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.053061224489795916, |
|
"grad_norm": 0.339190852848117, |
|
"learning_rate": 5.342465753424658e-06, |
|
"loss": 1.8372, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05442176870748299, |
|
"grad_norm": 0.31599130496764827, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 1.7265, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.055782312925170066, |
|
"grad_norm": 0.18290357316608127, |
|
"learning_rate": 5.6164383561643845e-06, |
|
"loss": 1.7055, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05714285714285714, |
|
"grad_norm": 0.26450493295787797, |
|
"learning_rate": 5.753424657534246e-06, |
|
"loss": 1.859, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.058503401360544216, |
|
"grad_norm": 0.2105468885683211, |
|
"learning_rate": 5.89041095890411e-06, |
|
"loss": 1.7903, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05986394557823129, |
|
"grad_norm": 0.21904274744659627, |
|
"learning_rate": 6.027397260273973e-06, |
|
"loss": 1.7112, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.061224489795918366, |
|
"grad_norm": 0.2766631664495227, |
|
"learning_rate": 6.164383561643836e-06, |
|
"loss": 1.626, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06258503401360545, |
|
"grad_norm": 0.27137304801321466, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 1.8546, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06394557823129252, |
|
"grad_norm": 0.17562873404669305, |
|
"learning_rate": 6.438356164383563e-06, |
|
"loss": 1.8687, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0653061224489796, |
|
"grad_norm": 0.23608638226381062, |
|
"learning_rate": 6.5753424657534245e-06, |
|
"loss": 1.5768, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 0.12395160133391969, |
|
"learning_rate": 6.712328767123288e-06, |
|
"loss": 1.8217, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 0.21069127406909471, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 1.7057, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06938775510204082, |
|
"grad_norm": 0.17153884217244356, |
|
"learning_rate": 6.9863013698630145e-06, |
|
"loss": 1.9143, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0707482993197279, |
|
"grad_norm": 0.3084343242877715, |
|
"learning_rate": 7.123287671232877e-06, |
|
"loss": 1.8398, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07210884353741497, |
|
"grad_norm": 0.14644662918576262, |
|
"learning_rate": 7.260273972602741e-06, |
|
"loss": 1.6646, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07346938775510205, |
|
"grad_norm": 0.3001793602079481, |
|
"learning_rate": 7.397260273972603e-06, |
|
"loss": 1.689, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07482993197278912, |
|
"grad_norm": 0.301851334470962, |
|
"learning_rate": 7.534246575342466e-06, |
|
"loss": 1.5179, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0761904761904762, |
|
"grad_norm": 0.33200247196496224, |
|
"learning_rate": 7.671232876712329e-06, |
|
"loss": 1.8986, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07755102040816327, |
|
"grad_norm": 0.18181195505623798, |
|
"learning_rate": 7.808219178082192e-06, |
|
"loss": 1.6426, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.07891156462585033, |
|
"grad_norm": 0.12250708549849011, |
|
"learning_rate": 7.945205479452055e-06, |
|
"loss": 1.6214, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08027210884353742, |
|
"grad_norm": 0.09796847494385076, |
|
"learning_rate": 8.082191780821919e-06, |
|
"loss": 1.6547, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 0.12998919923759888, |
|
"learning_rate": 8.219178082191782e-06, |
|
"loss": 1.7818, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08299319727891157, |
|
"grad_norm": 0.2260386111575877, |
|
"learning_rate": 8.356164383561644e-06, |
|
"loss": 1.7807, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08435374149659863, |
|
"grad_norm": 0.33754760373428094, |
|
"learning_rate": 8.493150684931507e-06, |
|
"loss": 1.617, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08571428571428572, |
|
"grad_norm": 0.35962963555168737, |
|
"learning_rate": 8.63013698630137e-06, |
|
"loss": 1.6799, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08707482993197278, |
|
"grad_norm": 0.32506967541048193, |
|
"learning_rate": 8.767123287671233e-06, |
|
"loss": 1.6454, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.08843537414965986, |
|
"grad_norm": 0.21523079823600388, |
|
"learning_rate": 8.904109589041097e-06, |
|
"loss": 1.8856, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.08979591836734693, |
|
"grad_norm": 0.5363358811064897, |
|
"learning_rate": 9.04109589041096e-06, |
|
"loss": 1.6952, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09115646258503401, |
|
"grad_norm": 0.14306066721600327, |
|
"learning_rate": 9.178082191780823e-06, |
|
"loss": 1.8208, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09251700680272108, |
|
"grad_norm": 0.18646957264381078, |
|
"learning_rate": 9.315068493150685e-06, |
|
"loss": 1.7517, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09387755102040816, |
|
"grad_norm": 0.19137982075531637, |
|
"learning_rate": 9.452054794520548e-06, |
|
"loss": 1.6456, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 0.15987203027468555, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 1.7148, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09659863945578231, |
|
"grad_norm": 0.16311504243422864, |
|
"learning_rate": 9.726027397260275e-06, |
|
"loss": 1.6627, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09795918367346938, |
|
"grad_norm": 0.10186314299964105, |
|
"learning_rate": 9.863013698630138e-06, |
|
"loss": 1.5856, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09931972789115646, |
|
"grad_norm": 0.13469761876363148, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6557, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.10068027210884353, |
|
"grad_norm": 0.11568418682806415, |
|
"learning_rate": 9.999987357098372e-06, |
|
"loss": 1.7807, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"grad_norm": 0.11288388506482096, |
|
"learning_rate": 9.999949428457423e-06, |
|
"loss": 1.8232, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.10340136054421768, |
|
"grad_norm": 0.16329859637421754, |
|
"learning_rate": 9.999886214268967e-06, |
|
"loss": 1.7462, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.10476190476190476, |
|
"grad_norm": 0.20231664635671653, |
|
"learning_rate": 9.999797714852686e-06, |
|
"loss": 1.5938, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.10612244897959183, |
|
"grad_norm": 0.34538065180937266, |
|
"learning_rate": 9.999683930656135e-06, |
|
"loss": 1.8806, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.10748299319727891, |
|
"grad_norm": 0.13354157904043504, |
|
"learning_rate": 9.999544862254743e-06, |
|
"loss": 1.801, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.10884353741496598, |
|
"grad_norm": 0.13220305876865404, |
|
"learning_rate": 9.999380510351796e-06, |
|
"loss": 1.6805, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11020408163265306, |
|
"grad_norm": 0.13768110879863274, |
|
"learning_rate": 9.999190875778452e-06, |
|
"loss": 1.7481, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11156462585034013, |
|
"grad_norm": 0.11222690770456831, |
|
"learning_rate": 9.998975959493722e-06, |
|
"loss": 1.7894, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11292517006802721, |
|
"grad_norm": 0.11775170157819592, |
|
"learning_rate": 9.998735762584471e-06, |
|
"loss": 1.8592, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 0.20855277686570553, |
|
"learning_rate": 9.998470286265415e-06, |
|
"loss": 1.7145, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11564625850340136, |
|
"grad_norm": 0.10682809945125131, |
|
"learning_rate": 9.998179531879112e-06, |
|
"loss": 1.7563, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11700680272108843, |
|
"grad_norm": 0.1332681057101403, |
|
"learning_rate": 9.99786350089595e-06, |
|
"loss": 1.6698, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11836734693877551, |
|
"grad_norm": 0.1442352006249483, |
|
"learning_rate": 9.99752219491415e-06, |
|
"loss": 1.542, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.11972789115646258, |
|
"grad_norm": 0.09723976872539679, |
|
"learning_rate": 9.997155615659753e-06, |
|
"loss": 1.5545, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12108843537414966, |
|
"grad_norm": 0.15078850009122496, |
|
"learning_rate": 9.996763764986606e-06, |
|
"loss": 1.6872, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 0.09880013032692718, |
|
"learning_rate": 9.996346644876363e-06, |
|
"loss": 1.5761, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12380952380952381, |
|
"grad_norm": 0.1797981570168221, |
|
"learning_rate": 9.995904257438467e-06, |
|
"loss": 1.5885, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1251700680272109, |
|
"grad_norm": 0.14066405347976094, |
|
"learning_rate": 9.995436604910142e-06, |
|
"loss": 1.7558, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12653061224489795, |
|
"grad_norm": 0.2804984380485241, |
|
"learning_rate": 9.994943689656381e-06, |
|
"loss": 1.5653, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12789115646258503, |
|
"grad_norm": 0.09802426112688165, |
|
"learning_rate": 9.994425514169938e-06, |
|
"loss": 1.8666, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1292517006802721, |
|
"grad_norm": 0.2640163991220947, |
|
"learning_rate": 9.993882081071307e-06, |
|
"loss": 1.8331, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1306122448979592, |
|
"grad_norm": 0.12584718580988416, |
|
"learning_rate": 9.99331339310872e-06, |
|
"loss": 1.7264, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13197278911564625, |
|
"grad_norm": 0.11723300893007116, |
|
"learning_rate": 9.99271945315812e-06, |
|
"loss": 1.774, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.11104245778454394, |
|
"learning_rate": 9.992100264223156e-06, |
|
"loss": 1.7154, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1346938775510204, |
|
"grad_norm": 0.0915644970371204, |
|
"learning_rate": 9.99145582943517e-06, |
|
"loss": 1.6768, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 0.11971918094721708, |
|
"learning_rate": 9.990786152053169e-06, |
|
"loss": 1.895, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13741496598639455, |
|
"grad_norm": 0.13849974347702929, |
|
"learning_rate": 9.99009123546382e-06, |
|
"loss": 1.9232, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.13877551020408163, |
|
"grad_norm": 0.0832290902024341, |
|
"learning_rate": 9.98937108318143e-06, |
|
"loss": 1.419, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.1401360544217687, |
|
"grad_norm": 0.09490309244168035, |
|
"learning_rate": 9.988625698847921e-06, |
|
"loss": 1.6096, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1414965986394558, |
|
"grad_norm": 0.08634281151584555, |
|
"learning_rate": 9.987855086232824e-06, |
|
"loss": 1.6766, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.12657846070776754, |
|
"learning_rate": 9.98705924923325e-06, |
|
"loss": 1.7755, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14421768707482993, |
|
"grad_norm": 0.1730231080244019, |
|
"learning_rate": 9.986238191873874e-06, |
|
"loss": 1.671, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.145578231292517, |
|
"grad_norm": 0.11653855558191023, |
|
"learning_rate": 9.985391918306915e-06, |
|
"loss": 1.6012, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1469387755102041, |
|
"grad_norm": 0.09868922955378823, |
|
"learning_rate": 9.984520432812117e-06, |
|
"loss": 1.8218, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14829931972789115, |
|
"grad_norm": 0.08718149041105193, |
|
"learning_rate": 9.983623739796718e-06, |
|
"loss": 1.6361, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.14965986394557823, |
|
"grad_norm": 0.08536190731319725, |
|
"learning_rate": 9.982701843795441e-06, |
|
"loss": 1.8356, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1510204081632653, |
|
"grad_norm": 0.1778419657439268, |
|
"learning_rate": 9.981754749470463e-06, |
|
"loss": 1.6968, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1523809523809524, |
|
"grad_norm": 0.12982223254146993, |
|
"learning_rate": 9.980782461611391e-06, |
|
"loss": 1.8005, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15374149659863945, |
|
"grad_norm": 0.08982117932691205, |
|
"learning_rate": 9.979784985135239e-06, |
|
"loss": 1.7645, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.15510204081632653, |
|
"grad_norm": 0.12460716696891104, |
|
"learning_rate": 9.978762325086408e-06, |
|
"loss": 1.6455, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1564625850340136, |
|
"grad_norm": 0.09362932823935477, |
|
"learning_rate": 9.977714486636657e-06, |
|
"loss": 1.8083, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15782312925170067, |
|
"grad_norm": 0.09099536634076917, |
|
"learning_rate": 9.976641475085067e-06, |
|
"loss": 1.7776, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.15918367346938775, |
|
"grad_norm": 0.08568595730791906, |
|
"learning_rate": 9.975543295858035e-06, |
|
"loss": 1.8846, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.16054421768707483, |
|
"grad_norm": 0.1310404323604523, |
|
"learning_rate": 9.974419954509225e-06, |
|
"loss": 1.5725, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1619047619047619, |
|
"grad_norm": 0.11863021260862251, |
|
"learning_rate": 9.97327145671956e-06, |
|
"loss": 1.6409, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.11864941995819639, |
|
"learning_rate": 9.972097808297174e-06, |
|
"loss": 1.7081, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16462585034013605, |
|
"grad_norm": 0.08013610894171046, |
|
"learning_rate": 9.970899015177398e-06, |
|
"loss": 1.7804, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.16598639455782313, |
|
"grad_norm": 0.12399055095582327, |
|
"learning_rate": 9.969675083422719e-06, |
|
"loss": 1.6848, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1673469387755102, |
|
"grad_norm": 0.1433779964353759, |
|
"learning_rate": 9.96842601922276e-06, |
|
"loss": 1.6888, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.16870748299319727, |
|
"grad_norm": 0.09915990901687576, |
|
"learning_rate": 9.967151828894234e-06, |
|
"loss": 1.7802, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"grad_norm": 0.10206449162778881, |
|
"learning_rate": 9.965852518880931e-06, |
|
"loss": 1.806, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17142857142857143, |
|
"grad_norm": 0.09282143748721522, |
|
"learning_rate": 9.964528095753669e-06, |
|
"loss": 1.5987, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1727891156462585, |
|
"grad_norm": 0.16907020113729054, |
|
"learning_rate": 9.963178566210268e-06, |
|
"loss": 1.7569, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.17414965986394557, |
|
"grad_norm": 0.08207848199751772, |
|
"learning_rate": 9.961803937075516e-06, |
|
"loss": 1.6724, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17551020408163265, |
|
"grad_norm": 0.07319670048822571, |
|
"learning_rate": 9.960404215301133e-06, |
|
"loss": 1.7498, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.17687074829931973, |
|
"grad_norm": 0.08159880339274488, |
|
"learning_rate": 9.958979407965738e-06, |
|
"loss": 1.65, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1782312925170068, |
|
"grad_norm": 0.09730054828361595, |
|
"learning_rate": 9.95752952227481e-06, |
|
"loss": 1.7796, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.17959183673469387, |
|
"grad_norm": 0.2629732363287427, |
|
"learning_rate": 9.956054565560653e-06, |
|
"loss": 1.6904, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.18095238095238095, |
|
"grad_norm": 0.07651749890098045, |
|
"learning_rate": 9.954554545282363e-06, |
|
"loss": 1.7809, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.18231292517006803, |
|
"grad_norm": 0.09628395581138101, |
|
"learning_rate": 9.953029469025777e-06, |
|
"loss": 1.8135, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1836734693877551, |
|
"grad_norm": 0.09612376832963275, |
|
"learning_rate": 9.951479344503459e-06, |
|
"loss": 1.6617, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.18503401360544217, |
|
"grad_norm": 0.08107993061371403, |
|
"learning_rate": 9.949904179554632e-06, |
|
"loss": 1.6634, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18639455782312925, |
|
"grad_norm": 0.07754512965459885, |
|
"learning_rate": 9.94830398214516e-06, |
|
"loss": 1.7732, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.18775510204081633, |
|
"grad_norm": 0.07265030754659244, |
|
"learning_rate": 9.946678760367498e-06, |
|
"loss": 1.7905, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1891156462585034, |
|
"grad_norm": 0.09088517967487394, |
|
"learning_rate": 9.945028522440654e-06, |
|
"loss": 1.49, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 0.21999926224724559, |
|
"learning_rate": 9.943353276710146e-06, |
|
"loss": 2.0726, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19183673469387755, |
|
"grad_norm": 0.07397509235485085, |
|
"learning_rate": 9.941653031647963e-06, |
|
"loss": 1.6069, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.19319727891156463, |
|
"grad_norm": 0.17678430730401373, |
|
"learning_rate": 9.939927795852513e-06, |
|
"loss": 1.8128, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1945578231292517, |
|
"grad_norm": 0.09311447920236875, |
|
"learning_rate": 9.938177578048593e-06, |
|
"loss": 1.682, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.19591836734693877, |
|
"grad_norm": 0.08923483853542422, |
|
"learning_rate": 9.936402387087339e-06, |
|
"loss": 1.7808, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19727891156462585, |
|
"grad_norm": 0.3457260004062318, |
|
"learning_rate": 9.93460223194617e-06, |
|
"loss": 1.921, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.19863945578231293, |
|
"grad_norm": 0.11272086420065035, |
|
"learning_rate": 9.932777121728765e-06, |
|
"loss": 1.627, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0828154138118513, |
|
"learning_rate": 9.930927065664997e-06, |
|
"loss": 1.85, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.20136054421768707, |
|
"grad_norm": 0.10871781486388528, |
|
"learning_rate": 9.929052073110897e-06, |
|
"loss": 1.8526, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.20272108843537415, |
|
"grad_norm": 0.08372164326475892, |
|
"learning_rate": 9.927152153548605e-06, |
|
"loss": 1.6184, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 0.2301749352348319, |
|
"learning_rate": 9.925227316586316e-06, |
|
"loss": 1.6416, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2054421768707483, |
|
"grad_norm": 0.11534866479323268, |
|
"learning_rate": 9.923277571958245e-06, |
|
"loss": 1.6587, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.20680272108843537, |
|
"grad_norm": 0.1411655046905855, |
|
"learning_rate": 9.921302929524561e-06, |
|
"loss": 1.671, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.20816326530612245, |
|
"grad_norm": 0.07211757999248616, |
|
"learning_rate": 9.919303399271348e-06, |
|
"loss": 1.7163, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.20952380952380953, |
|
"grad_norm": 0.0873746156242924, |
|
"learning_rate": 9.917278991310553e-06, |
|
"loss": 1.6367, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2108843537414966, |
|
"grad_norm": 0.0819591281688772, |
|
"learning_rate": 9.915229715879928e-06, |
|
"loss": 1.6989, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.21224489795918366, |
|
"grad_norm": 0.08552981032847369, |
|
"learning_rate": 9.913155583342994e-06, |
|
"loss": 1.5244, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.21360544217687075, |
|
"grad_norm": 0.13550974122069206, |
|
"learning_rate": 9.91105660418897e-06, |
|
"loss": 1.7495, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.21496598639455783, |
|
"grad_norm": 0.07091163304804983, |
|
"learning_rate": 9.908932789032729e-06, |
|
"loss": 1.7387, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2163265306122449, |
|
"grad_norm": 0.0838103140533003, |
|
"learning_rate": 9.906784148614745e-06, |
|
"loss": 1.7076, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.21768707482993196, |
|
"grad_norm": 0.11349611508198672, |
|
"learning_rate": 9.904610693801042e-06, |
|
"loss": 1.6596, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21904761904761905, |
|
"grad_norm": 0.07733122749252737, |
|
"learning_rate": 9.902412435583127e-06, |
|
"loss": 1.6503, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.22040816326530613, |
|
"grad_norm": 0.14625572340923682, |
|
"learning_rate": 9.900189385077948e-06, |
|
"loss": 1.564, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2217687074829932, |
|
"grad_norm": 0.09712144690644532, |
|
"learning_rate": 9.897941553527823e-06, |
|
"loss": 1.7217, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.22312925170068026, |
|
"grad_norm": 0.0712274015908157, |
|
"learning_rate": 9.895668952300403e-06, |
|
"loss": 1.6412, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.22448979591836735, |
|
"grad_norm": 0.08811945291100708, |
|
"learning_rate": 9.893371592888594e-06, |
|
"loss": 1.6192, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22585034013605443, |
|
"grad_norm": 0.07563751954927482, |
|
"learning_rate": 9.891049486910513e-06, |
|
"loss": 1.6283, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2272108843537415, |
|
"grad_norm": 0.07473029887668768, |
|
"learning_rate": 9.888702646109423e-06, |
|
"loss": 1.6979, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 0.07966661835478112, |
|
"learning_rate": 9.886331082353673e-06, |
|
"loss": 1.6951, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.22993197278911565, |
|
"grad_norm": 0.08625904148958655, |
|
"learning_rate": 9.883934807636645e-06, |
|
"loss": 1.6239, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.23129251700680273, |
|
"grad_norm": 0.06615144618602906, |
|
"learning_rate": 9.881513834076683e-06, |
|
"loss": 1.7456, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23265306122448978, |
|
"grad_norm": 0.14491038831608893, |
|
"learning_rate": 9.87906817391704e-06, |
|
"loss": 1.7035, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.23401360544217686, |
|
"grad_norm": 0.0832300629302243, |
|
"learning_rate": 9.876597839525814e-06, |
|
"loss": 1.6672, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.23537414965986395, |
|
"grad_norm": 0.0917489076009908, |
|
"learning_rate": 9.87410284339588e-06, |
|
"loss": 1.6075, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.23673469387755103, |
|
"grad_norm": 0.06866742872418008, |
|
"learning_rate": 9.871583198144836e-06, |
|
"loss": 1.7646, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 0.08465371920438951, |
|
"learning_rate": 9.869038916514932e-06, |
|
"loss": 1.6692, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23945578231292516, |
|
"grad_norm": 0.09375415555940526, |
|
"learning_rate": 9.866470011373009e-06, |
|
"loss": 1.778, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.24081632653061225, |
|
"grad_norm": 0.07220432655814331, |
|
"learning_rate": 9.863876495710433e-06, |
|
"loss": 1.6857, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.24217687074829933, |
|
"grad_norm": 0.0797192184377915, |
|
"learning_rate": 9.86125838264303e-06, |
|
"loss": 1.7893, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.24353741496598638, |
|
"grad_norm": 0.07597718520214916, |
|
"learning_rate": 9.858615685411018e-06, |
|
"loss": 1.8848, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 0.08003681020814803, |
|
"learning_rate": 9.85594841737894e-06, |
|
"loss": 1.8, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24625850340136055, |
|
"grad_norm": 0.09696384289585193, |
|
"learning_rate": 9.853256592035602e-06, |
|
"loss": 1.7965, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.24761904761904763, |
|
"grad_norm": 0.12333580747104468, |
|
"learning_rate": 9.850540222993994e-06, |
|
"loss": 1.6365, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.24897959183673468, |
|
"grad_norm": 0.07310272321033273, |
|
"learning_rate": 9.847799323991234e-06, |
|
"loss": 1.5765, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.2503401360544218, |
|
"grad_norm": 0.12923131777808997, |
|
"learning_rate": 9.845033908888485e-06, |
|
"loss": 1.8017, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2503401360544218, |
|
"eval_loss": 1.7241544723510742, |
|
"eval_runtime": 76.6185, |
|
"eval_samples_per_second": 53.159, |
|
"eval_steps_per_second": 6.656, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.25170068027210885, |
|
"grad_norm": 0.06809715986288661, |
|
"learning_rate": 9.842243991670899e-06, |
|
"loss": 1.79, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.2530612244897959, |
|
"grad_norm": 0.08842474286261379, |
|
"learning_rate": 9.839429586447534e-06, |
|
"loss": 1.6168, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.254421768707483, |
|
"grad_norm": 0.10935159810036835, |
|
"learning_rate": 9.836590707451287e-06, |
|
"loss": 1.8505, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.25578231292517006, |
|
"grad_norm": 0.12270237138661655, |
|
"learning_rate": 9.833727369038827e-06, |
|
"loss": 1.635, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.2571428571428571, |
|
"grad_norm": 0.10719376109260877, |
|
"learning_rate": 9.830839585690519e-06, |
|
"loss": 1.8374, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2585034013605442, |
|
"grad_norm": 0.07990417660478581, |
|
"learning_rate": 9.827927372010343e-06, |
|
"loss": 1.5681, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2598639455782313, |
|
"grad_norm": 0.09668140137221073, |
|
"learning_rate": 9.824990742725835e-06, |
|
"loss": 1.6568, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.2612244897959184, |
|
"grad_norm": 0.07968515377548961, |
|
"learning_rate": 9.822029712687999e-06, |
|
"loss": 1.6007, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.26258503401360545, |
|
"grad_norm": 0.08837508173810749, |
|
"learning_rate": 9.81904429687124e-06, |
|
"loss": 1.6621, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.2639455782312925, |
|
"grad_norm": 0.08728025707185973, |
|
"learning_rate": 9.816034510373287e-06, |
|
"loss": 1.8335, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2653061224489796, |
|
"grad_norm": 0.07961309839255727, |
|
"learning_rate": 9.81300036841511e-06, |
|
"loss": 1.7037, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.09980119114286523, |
|
"learning_rate": 9.809941886340854e-06, |
|
"loss": 1.5664, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2680272108843537, |
|
"grad_norm": 0.07147953268098678, |
|
"learning_rate": 9.806859079617757e-06, |
|
"loss": 1.7601, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2693877551020408, |
|
"grad_norm": 0.08653388534305975, |
|
"learning_rate": 9.803751963836065e-06, |
|
"loss": 1.54, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2707482993197279, |
|
"grad_norm": 0.0776685121413518, |
|
"learning_rate": 9.800620554708962e-06, |
|
"loss": 1.5557, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 0.07711567735740478, |
|
"learning_rate": 9.797464868072489e-06, |
|
"loss": 1.7101, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27346938775510204, |
|
"grad_norm": 0.09355853262847387, |
|
"learning_rate": 9.794284919885456e-06, |
|
"loss": 1.7454, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.2748299319727891, |
|
"grad_norm": 0.0975587232648776, |
|
"learning_rate": 9.791080726229376e-06, |
|
"loss": 1.7479, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.2761904761904762, |
|
"grad_norm": 0.07709180794261607, |
|
"learning_rate": 9.78785230330837e-06, |
|
"loss": 1.8086, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.27755102040816326, |
|
"grad_norm": 0.09748041740615765, |
|
"learning_rate": 9.784599667449088e-06, |
|
"loss": 1.683, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.2789115646258503, |
|
"grad_norm": 0.09608384188874226, |
|
"learning_rate": 9.781322835100639e-06, |
|
"loss": 1.7985, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.2802721088435374, |
|
"grad_norm": 0.24417626356607502, |
|
"learning_rate": 9.778021822834484e-06, |
|
"loss": 1.721, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.2816326530612245, |
|
"grad_norm": 0.08000490559142356, |
|
"learning_rate": 9.774696647344376e-06, |
|
"loss": 1.5646, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.2829931972789116, |
|
"grad_norm": 0.08448656773098678, |
|
"learning_rate": 9.771347325446261e-06, |
|
"loss": 1.7897, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.28435374149659864, |
|
"grad_norm": 0.09777185072102372, |
|
"learning_rate": 9.767973874078196e-06, |
|
"loss": 1.8829, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.07490737419370372, |
|
"learning_rate": 9.764576310300268e-06, |
|
"loss": 1.8031, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2870748299319728, |
|
"grad_norm": 0.07567900709987288, |
|
"learning_rate": 9.761154651294505e-06, |
|
"loss": 1.752, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.28843537414965986, |
|
"grad_norm": 0.08322325294858353, |
|
"learning_rate": 9.757708914364784e-06, |
|
"loss": 1.6328, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.2897959183673469, |
|
"grad_norm": 0.19949313188011264, |
|
"learning_rate": 9.75423911693675e-06, |
|
"loss": 1.8577, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.291156462585034, |
|
"grad_norm": 0.07497191387563905, |
|
"learning_rate": 9.750745276557725e-06, |
|
"loss": 1.4911, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2925170068027211, |
|
"grad_norm": 0.09096003185892962, |
|
"learning_rate": 9.747227410896624e-06, |
|
"loss": 1.5857, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2938775510204082, |
|
"grad_norm": 0.06790968657114778, |
|
"learning_rate": 9.743685537743856e-06, |
|
"loss": 1.6452, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.29523809523809524, |
|
"grad_norm": 0.08487437350333037, |
|
"learning_rate": 9.740119675011246e-06, |
|
"loss": 1.674, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.2965986394557823, |
|
"grad_norm": 0.07405858389177783, |
|
"learning_rate": 9.73652984073193e-06, |
|
"loss": 1.7461, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.2979591836734694, |
|
"grad_norm": 0.07067520018576251, |
|
"learning_rate": 9.73291605306028e-06, |
|
"loss": 1.7163, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.29931972789115646, |
|
"grad_norm": 0.0791420166635673, |
|
"learning_rate": 9.7292783302718e-06, |
|
"loss": 1.6668, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3006802721088435, |
|
"grad_norm": 0.07323313348575836, |
|
"learning_rate": 9.72561669076304e-06, |
|
"loss": 1.6398, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.3020408163265306, |
|
"grad_norm": 0.07634813609350796, |
|
"learning_rate": 9.721931153051497e-06, |
|
"loss": 1.6447, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3034013605442177, |
|
"grad_norm": 0.07950888254230533, |
|
"learning_rate": 9.718221735775527e-06, |
|
"loss": 1.7845, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.3047619047619048, |
|
"grad_norm": 0.1408011781580588, |
|
"learning_rate": 9.714488457694252e-06, |
|
"loss": 1.7427, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"grad_norm": 0.20352696620915875, |
|
"learning_rate": 9.710731337687457e-06, |
|
"loss": 1.7789, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3074829931972789, |
|
"grad_norm": 0.3632349628769343, |
|
"learning_rate": 9.7069503947555e-06, |
|
"loss": 1.7108, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.308843537414966, |
|
"grad_norm": 0.08426345577870951, |
|
"learning_rate": 9.70314564801922e-06, |
|
"loss": 1.5991, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.31020408163265306, |
|
"grad_norm": 0.07487391878859809, |
|
"learning_rate": 9.699317116719831e-06, |
|
"loss": 1.6637, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3115646258503401, |
|
"grad_norm": 0.11408922719163124, |
|
"learning_rate": 9.695464820218829e-06, |
|
"loss": 1.734, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3129251700680272, |
|
"grad_norm": 0.08379918487601977, |
|
"learning_rate": 9.6915887779979e-06, |
|
"loss": 1.5813, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3142857142857143, |
|
"grad_norm": 0.08052280305893883, |
|
"learning_rate": 9.68768900965881e-06, |
|
"loss": 1.7164, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.31564625850340133, |
|
"grad_norm": 0.09154693865406958, |
|
"learning_rate": 9.683765534923315e-06, |
|
"loss": 1.5906, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.31700680272108844, |
|
"grad_norm": 0.1156976576631727, |
|
"learning_rate": 9.679818373633054e-06, |
|
"loss": 1.5045, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.3183673469387755, |
|
"grad_norm": 0.08364003073959134, |
|
"learning_rate": 9.67584754574946e-06, |
|
"loss": 1.6152, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3197278911564626, |
|
"grad_norm": 0.15089067639580192, |
|
"learning_rate": 9.671853071353645e-06, |
|
"loss": 1.7127, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.32108843537414966, |
|
"grad_norm": 0.08835383872875176, |
|
"learning_rate": 9.667834970646309e-06, |
|
"loss": 1.609, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.3224489795918367, |
|
"grad_norm": 0.08056944247122935, |
|
"learning_rate": 9.663793263947631e-06, |
|
"loss": 1.6126, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.3238095238095238, |
|
"grad_norm": 0.09989875040942321, |
|
"learning_rate": 9.659727971697173e-06, |
|
"loss": 1.8035, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.3251700680272109, |
|
"grad_norm": 0.07493814768096231, |
|
"learning_rate": 9.655639114453771e-06, |
|
"loss": 1.813, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.0879835131172389, |
|
"learning_rate": 9.651526712895431e-06, |
|
"loss": 1.6926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.32789115646258504, |
|
"grad_norm": 0.1315730713741491, |
|
"learning_rate": 9.647390787819232e-06, |
|
"loss": 1.6993, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.3292517006802721, |
|
"grad_norm": 0.08144482513159658, |
|
"learning_rate": 9.643231360141205e-06, |
|
"loss": 1.5821, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.3306122448979592, |
|
"grad_norm": 0.11213233561578728, |
|
"learning_rate": 9.639048450896251e-06, |
|
"loss": 1.6491, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.33197278911564626, |
|
"grad_norm": 0.10094097099195502, |
|
"learning_rate": 9.63484208123801e-06, |
|
"loss": 1.5492, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.06549445299823263, |
|
"learning_rate": 9.630612272438771e-06, |
|
"loss": 1.6378, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3346938775510204, |
|
"grad_norm": 0.09866738594204459, |
|
"learning_rate": 9.626359045889356e-06, |
|
"loss": 1.6712, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.3360544217687075, |
|
"grad_norm": 0.08348510716160104, |
|
"learning_rate": 9.622082423099013e-06, |
|
"loss": 1.6177, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.33741496598639453, |
|
"grad_norm": 0.0912979636888496, |
|
"learning_rate": 9.617782425695314e-06, |
|
"loss": 1.7233, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.33877551020408164, |
|
"grad_norm": 0.0730054447204009, |
|
"learning_rate": 9.613459075424033e-06, |
|
"loss": 1.8876, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 0.07708564338244163, |
|
"learning_rate": 9.609112394149052e-06, |
|
"loss": 1.5562, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3414965986394558, |
|
"grad_norm": 0.09288554194206548, |
|
"learning_rate": 9.604742403852232e-06, |
|
"loss": 1.7512, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 0.09029592521774728, |
|
"learning_rate": 9.600349126633317e-06, |
|
"loss": 1.4964, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.3442176870748299, |
|
"grad_norm": 0.09268667220104135, |
|
"learning_rate": 9.595932584709815e-06, |
|
"loss": 1.5166, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.345578231292517, |
|
"grad_norm": 0.09895745636981777, |
|
"learning_rate": 9.59149280041689e-06, |
|
"loss": 1.5862, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3469387755102041, |
|
"grad_norm": 0.0918028367563306, |
|
"learning_rate": 9.587029796207246e-06, |
|
"loss": 1.6704, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.34829931972789113, |
|
"grad_norm": 0.08184214411162406, |
|
"learning_rate": 9.582543594651006e-06, |
|
"loss": 1.5767, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.34965986394557824, |
|
"grad_norm": 0.10380891219881462, |
|
"learning_rate": 9.578034218435618e-06, |
|
"loss": 1.7974, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.3510204081632653, |
|
"grad_norm": 0.07763979596015386, |
|
"learning_rate": 9.573501690365718e-06, |
|
"loss": 1.6754, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.3523809523809524, |
|
"grad_norm": 0.0974676538104556, |
|
"learning_rate": 9.568946033363032e-06, |
|
"loss": 1.7312, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.35374149659863946, |
|
"grad_norm": 0.08571433404959614, |
|
"learning_rate": 9.564367270466247e-06, |
|
"loss": 1.5805, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3551020408163265, |
|
"grad_norm": 0.0818876254257716, |
|
"learning_rate": 9.559765424830903e-06, |
|
"loss": 1.7883, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.3564625850340136, |
|
"grad_norm": 0.0803950448066755, |
|
"learning_rate": 9.555140519729273e-06, |
|
"loss": 1.7474, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3578231292517007, |
|
"grad_norm": 0.0843722550488927, |
|
"learning_rate": 9.550492578550246e-06, |
|
"loss": 1.5564, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.35918367346938773, |
|
"grad_norm": 0.07803324012301414, |
|
"learning_rate": 9.545821624799205e-06, |
|
"loss": 1.4879, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.36054421768707484, |
|
"grad_norm": 0.08256837435665124, |
|
"learning_rate": 9.541127682097916e-06, |
|
"loss": 1.7395, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3619047619047619, |
|
"grad_norm": 0.08789940175813588, |
|
"learning_rate": 9.536410774184397e-06, |
|
"loss": 1.6602, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.363265306122449, |
|
"grad_norm": 0.10680679462820718, |
|
"learning_rate": 9.531670924912814e-06, |
|
"loss": 1.4675, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.36462585034013606, |
|
"grad_norm": 0.07750163404143413, |
|
"learning_rate": 9.526908158253345e-06, |
|
"loss": 1.7119, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.3659863945578231, |
|
"grad_norm": 0.07081517755227287, |
|
"learning_rate": 9.522122498292066e-06, |
|
"loss": 1.6457, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.3673469387755102, |
|
"grad_norm": 0.10501830739522407, |
|
"learning_rate": 9.517313969230826e-06, |
|
"loss": 1.6398, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3687074829931973, |
|
"grad_norm": 0.08143325631533527, |
|
"learning_rate": 9.512482595387131e-06, |
|
"loss": 1.6122, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.37006802721088433, |
|
"grad_norm": 0.08749817297460229, |
|
"learning_rate": 9.507628401194015e-06, |
|
"loss": 1.7328, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.37142857142857144, |
|
"grad_norm": 0.10516442293102286, |
|
"learning_rate": 9.50275141119992e-06, |
|
"loss": 1.5773, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.3727891156462585, |
|
"grad_norm": 0.07808789214492048, |
|
"learning_rate": 9.497851650068561e-06, |
|
"loss": 1.6635, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3741496598639456, |
|
"grad_norm": 0.17789525917278, |
|
"learning_rate": 9.492929142578823e-06, |
|
"loss": 1.9121, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.37551020408163266, |
|
"grad_norm": 0.11269638220817121, |
|
"learning_rate": 9.487983913624616e-06, |
|
"loss": 1.7355, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.3768707482993197, |
|
"grad_norm": 0.08409991854502387, |
|
"learning_rate": 9.483015988214757e-06, |
|
"loss": 1.7628, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.3782312925170068, |
|
"grad_norm": 0.07365655913118094, |
|
"learning_rate": 9.478025391472841e-06, |
|
"loss": 1.8144, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.3795918367346939, |
|
"grad_norm": 0.12778690022382183, |
|
"learning_rate": 9.473012148637121e-06, |
|
"loss": 1.4851, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 0.0809734685514322, |
|
"learning_rate": 9.467976285060369e-06, |
|
"loss": 1.7698, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.38231292517006804, |
|
"grad_norm": 0.08513218534341072, |
|
"learning_rate": 9.462917826209757e-06, |
|
"loss": 1.6411, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.3836734693877551, |
|
"grad_norm": 0.09008342709764051, |
|
"learning_rate": 9.457836797666722e-06, |
|
"loss": 1.694, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.38503401360544215, |
|
"grad_norm": 0.10502121760541401, |
|
"learning_rate": 9.452733225126845e-06, |
|
"loss": 1.6999, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.38639455782312926, |
|
"grad_norm": 0.07343666145039363, |
|
"learning_rate": 9.44760713439971e-06, |
|
"loss": 1.8164, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3877551020408163, |
|
"grad_norm": 0.08974960620233877, |
|
"learning_rate": 9.442458551408784e-06, |
|
"loss": 1.8539, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3891156462585034, |
|
"grad_norm": 0.07202421778040075, |
|
"learning_rate": 9.437287502191275e-06, |
|
"loss": 1.5453, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3904761904761905, |
|
"grad_norm": 0.09076340883522513, |
|
"learning_rate": 9.43209401289801e-06, |
|
"loss": 1.7088, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.39183673469387753, |
|
"grad_norm": 0.08425026537963505, |
|
"learning_rate": 9.426878109793301e-06, |
|
"loss": 1.4451, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.39319727891156464, |
|
"grad_norm": 0.09510405982528822, |
|
"learning_rate": 9.421639819254806e-06, |
|
"loss": 1.7913, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.3945578231292517, |
|
"grad_norm": 0.09616833843974483, |
|
"learning_rate": 9.416379167773403e-06, |
|
"loss": 1.649, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.39591836734693875, |
|
"grad_norm": 0.08286660774561835, |
|
"learning_rate": 9.41109618195305e-06, |
|
"loss": 1.9277, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.39727891156462586, |
|
"grad_norm": 0.07769697723139755, |
|
"learning_rate": 9.405790888510655e-06, |
|
"loss": 1.7279, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3986394557823129, |
|
"grad_norm": 0.09006259560100753, |
|
"learning_rate": 9.400463314275942e-06, |
|
"loss": 1.6039, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.09012228611455256, |
|
"learning_rate": 9.39511348619131e-06, |
|
"loss": 1.7865, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.4013605442176871, |
|
"grad_norm": 0.09086893554478499, |
|
"learning_rate": 9.389741431311694e-06, |
|
"loss": 1.6225, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.40272108843537413, |
|
"grad_norm": 0.1067223732758171, |
|
"learning_rate": 9.384347176804441e-06, |
|
"loss": 1.8657, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.40408163265306124, |
|
"grad_norm": 0.09585514004003831, |
|
"learning_rate": 9.378930749949166e-06, |
|
"loss": 1.6826, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.4054421768707483, |
|
"grad_norm": 0.08230460323355868, |
|
"learning_rate": 9.373492178137606e-06, |
|
"loss": 1.8107, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.40680272108843535, |
|
"grad_norm": 0.08574613920900533, |
|
"learning_rate": 9.368031488873492e-06, |
|
"loss": 1.5687, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 0.07911826770354155, |
|
"learning_rate": 9.36254870977241e-06, |
|
"loss": 1.8716, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4095238095238095, |
|
"grad_norm": 0.08540782320726534, |
|
"learning_rate": 9.357043868561653e-06, |
|
"loss": 1.7997, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.4108843537414966, |
|
"grad_norm": 0.08850925770828139, |
|
"learning_rate": 9.351516993080088e-06, |
|
"loss": 1.6299, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4122448979591837, |
|
"grad_norm": 0.10611011164886108, |
|
"learning_rate": 9.34596811127801e-06, |
|
"loss": 1.5621, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.41360544217687073, |
|
"grad_norm": 0.08382183266579103, |
|
"learning_rate": 9.340397251217009e-06, |
|
"loss": 1.4407, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.41496598639455784, |
|
"grad_norm": 0.09119095993947843, |
|
"learning_rate": 9.334804441069819e-06, |
|
"loss": 1.7161, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4163265306122449, |
|
"grad_norm": 0.11113995697266435, |
|
"learning_rate": 9.329189709120175e-06, |
|
"loss": 1.4126, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.41768707482993195, |
|
"grad_norm": 0.08080147586730375, |
|
"learning_rate": 9.323553083762681e-06, |
|
"loss": 1.7303, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.41904761904761906, |
|
"grad_norm": 0.10411476203370171, |
|
"learning_rate": 9.31789459350266e-06, |
|
"loss": 1.6408, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4204081632653061, |
|
"grad_norm": 0.0859822277343485, |
|
"learning_rate": 9.312214266956003e-06, |
|
"loss": 1.6534, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.4217687074829932, |
|
"grad_norm": 0.08604098070322257, |
|
"learning_rate": 9.306512132849035e-06, |
|
"loss": 1.6252, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4231292517006803, |
|
"grad_norm": 0.0876425921649466, |
|
"learning_rate": 9.300788220018363e-06, |
|
"loss": 1.5096, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.42448979591836733, |
|
"grad_norm": 0.08371274563563173, |
|
"learning_rate": 9.295042557410736e-06, |
|
"loss": 1.7352, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.42585034013605444, |
|
"grad_norm": 0.13500827487489858, |
|
"learning_rate": 9.28927517408289e-06, |
|
"loss": 1.7902, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.4272108843537415, |
|
"grad_norm": 0.08754620765852711, |
|
"learning_rate": 9.28348609920141e-06, |
|
"loss": 1.6862, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.1028946128162606, |
|
"learning_rate": 9.27767536204258e-06, |
|
"loss": 1.6907, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.42993197278911566, |
|
"grad_norm": 0.08692259223714764, |
|
"learning_rate": 9.271842991992231e-06, |
|
"loss": 1.638, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.4312925170068027, |
|
"grad_norm": 0.09542502295403, |
|
"learning_rate": 9.26598901854559e-06, |
|
"loss": 1.7157, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.4326530612244898, |
|
"grad_norm": 0.09080935991935338, |
|
"learning_rate": 9.260113471307148e-06, |
|
"loss": 1.5851, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.4340136054421769, |
|
"grad_norm": 0.09865728458566181, |
|
"learning_rate": 9.254216379990487e-06, |
|
"loss": 1.8897, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.43537414965986393, |
|
"grad_norm": 0.10060290482375228, |
|
"learning_rate": 9.248297774418147e-06, |
|
"loss": 1.5605, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.43673469387755104, |
|
"grad_norm": 0.09013488977054508, |
|
"learning_rate": 9.242357684521467e-06, |
|
"loss": 1.5582, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4380952380952381, |
|
"grad_norm": 0.12978917401977683, |
|
"learning_rate": 9.236396140340435e-06, |
|
"loss": 1.5953, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.43945578231292515, |
|
"grad_norm": 0.09012186897343348, |
|
"learning_rate": 9.230413172023538e-06, |
|
"loss": 1.6678, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.44081632653061226, |
|
"grad_norm": 0.07610784588472097, |
|
"learning_rate": 9.224408809827609e-06, |
|
"loss": 1.6697, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.4421768707482993, |
|
"grad_norm": 0.08134414493064245, |
|
"learning_rate": 9.218383084117671e-06, |
|
"loss": 1.6543, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4435374149659864, |
|
"grad_norm": 0.07969199857220131, |
|
"learning_rate": 9.212336025366789e-06, |
|
"loss": 1.7372, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.4448979591836735, |
|
"grad_norm": 0.08833397568245774, |
|
"learning_rate": 9.206267664155906e-06, |
|
"loss": 1.5033, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.44625850340136053, |
|
"grad_norm": 0.09013321784578471, |
|
"learning_rate": 9.200178031173706e-06, |
|
"loss": 1.7467, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.44761904761904764, |
|
"grad_norm": 0.08492129873211993, |
|
"learning_rate": 9.194067157216436e-06, |
|
"loss": 1.6346, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.4489795918367347, |
|
"grad_norm": 0.08924441822794496, |
|
"learning_rate": 9.187935073187768e-06, |
|
"loss": 1.5647, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.45034013605442175, |
|
"grad_norm": 0.2238269401441882, |
|
"learning_rate": 9.181781810098638e-06, |
|
"loss": 1.9641, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.45170068027210886, |
|
"grad_norm": 0.08505567098719835, |
|
"learning_rate": 9.175607399067086e-06, |
|
"loss": 1.723, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.4530612244897959, |
|
"grad_norm": 0.09479499636885363, |
|
"learning_rate": 9.1694118713181e-06, |
|
"loss": 1.7358, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.454421768707483, |
|
"grad_norm": 0.09532298638421347, |
|
"learning_rate": 9.163195258183457e-06, |
|
"loss": 1.652, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.4557823129251701, |
|
"grad_norm": 0.1435078453546247, |
|
"learning_rate": 9.156957591101573e-06, |
|
"loss": 1.8876, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 0.08623460392050768, |
|
"learning_rate": 9.150698901617326e-06, |
|
"loss": 1.6408, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.45850340136054424, |
|
"grad_norm": 0.08103596406817255, |
|
"learning_rate": 9.144419221381919e-06, |
|
"loss": 1.582, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.4598639455782313, |
|
"grad_norm": 0.09624169989634061, |
|
"learning_rate": 9.138118582152704e-06, |
|
"loss": 1.7272, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.46122448979591835, |
|
"grad_norm": 0.06914577882609961, |
|
"learning_rate": 9.131797015793026e-06, |
|
"loss": 1.6864, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.46258503401360546, |
|
"grad_norm": 0.09682023357095138, |
|
"learning_rate": 9.125454554272057e-06, |
|
"loss": 1.5849, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4639455782312925, |
|
"grad_norm": 0.08865054891775723, |
|
"learning_rate": 9.119091229664648e-06, |
|
"loss": 1.4716, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.46530612244897956, |
|
"grad_norm": 0.09112991799939003, |
|
"learning_rate": 9.112707074151152e-06, |
|
"loss": 1.6393, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.4666666666666667, |
|
"grad_norm": 0.09319883560181061, |
|
"learning_rate": 9.106302120017272e-06, |
|
"loss": 1.7619, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.46802721088435373, |
|
"grad_norm": 0.101061665585339, |
|
"learning_rate": 9.099876399653885e-06, |
|
"loss": 1.6286, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.46938775510204084, |
|
"grad_norm": 0.09755047037445551, |
|
"learning_rate": 9.093429945556895e-06, |
|
"loss": 1.6591, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.4707482993197279, |
|
"grad_norm": 0.0831755062746902, |
|
"learning_rate": 9.086962790327057e-06, |
|
"loss": 1.7728, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.47210884353741495, |
|
"grad_norm": 0.0981445280966388, |
|
"learning_rate": 9.08047496666981e-06, |
|
"loss": 1.6489, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.47346938775510206, |
|
"grad_norm": 0.0965447984049988, |
|
"learning_rate": 9.073966507395123e-06, |
|
"loss": 1.7807, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.4748299319727891, |
|
"grad_norm": 0.08805192634428297, |
|
"learning_rate": 9.06743744541732e-06, |
|
"loss": 1.6932, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 0.07486129688956443, |
|
"learning_rate": 9.060887813754914e-06, |
|
"loss": 1.6828, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4775510204081633, |
|
"grad_norm": 0.08844318223023526, |
|
"learning_rate": 9.054317645530449e-06, |
|
"loss": 1.5791, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.47891156462585033, |
|
"grad_norm": 0.08590901507611927, |
|
"learning_rate": 9.047726973970317e-06, |
|
"loss": 1.8916, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.48027210884353744, |
|
"grad_norm": 0.08275711528371654, |
|
"learning_rate": 9.041115832404605e-06, |
|
"loss": 1.6376, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.4816326530612245, |
|
"grad_norm": 0.13882142863507996, |
|
"learning_rate": 9.03448425426692e-06, |
|
"loss": 1.5496, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.48299319727891155, |
|
"grad_norm": 0.09033930791413211, |
|
"learning_rate": 9.027832273094213e-06, |
|
"loss": 1.8207, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.48435374149659866, |
|
"grad_norm": 0.08776380923196873, |
|
"learning_rate": 9.021159922526623e-06, |
|
"loss": 1.734, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.4857142857142857, |
|
"grad_norm": 0.0871797042611384, |
|
"learning_rate": 9.014467236307303e-06, |
|
"loss": 1.7255, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.48707482993197276, |
|
"grad_norm": 0.0832915611192237, |
|
"learning_rate": 9.007754248282236e-06, |
|
"loss": 1.6354, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.4884353741496599, |
|
"grad_norm": 0.11961492483949776, |
|
"learning_rate": 9.001020992400086e-06, |
|
"loss": 1.4193, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 0.0794404046382169, |
|
"learning_rate": 8.994267502712007e-06, |
|
"loss": 1.728, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.49115646258503404, |
|
"grad_norm": 0.13335980644840648, |
|
"learning_rate": 8.987493813371481e-06, |
|
"loss": 1.6729, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.4925170068027211, |
|
"grad_norm": 0.08449457187355984, |
|
"learning_rate": 8.980699958634147e-06, |
|
"loss": 1.6142, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.49387755102040815, |
|
"grad_norm": 0.09270436024065673, |
|
"learning_rate": 8.973885972857616e-06, |
|
"loss": 1.6753, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.49523809523809526, |
|
"grad_norm": 0.08033630058062084, |
|
"learning_rate": 8.96705189050131e-06, |
|
"loss": 1.6269, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.4965986394557823, |
|
"grad_norm": 0.4381615422407444, |
|
"learning_rate": 8.96019774612628e-06, |
|
"loss": 1.6131, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.49795918367346936, |
|
"grad_norm": 0.09462169653835782, |
|
"learning_rate": 8.953323574395038e-06, |
|
"loss": 1.5629, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.4993197278911565, |
|
"grad_norm": 0.13595506352030012, |
|
"learning_rate": 8.946429410071373e-06, |
|
"loss": 1.5593, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5006802721088436, |
|
"grad_norm": 0.10253342646979322, |
|
"learning_rate": 8.939515288020182e-06, |
|
"loss": 1.6281, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5006802721088436, |
|
"eval_loss": 1.7046507596969604, |
|
"eval_runtime": 76.5686, |
|
"eval_samples_per_second": 53.194, |
|
"eval_steps_per_second": 6.661, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5020408163265306, |
|
"grad_norm": 0.08764026597495381, |
|
"learning_rate": 8.932581243207289e-06, |
|
"loss": 1.5909, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5034013605442177, |
|
"grad_norm": 0.07898445718193145, |
|
"learning_rate": 8.925627310699275e-06, |
|
"loss": 1.761, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5047619047619047, |
|
"grad_norm": 0.10577594540101636, |
|
"learning_rate": 8.918653525663295e-06, |
|
"loss": 1.695, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5061224489795918, |
|
"grad_norm": 0.110620128076388, |
|
"learning_rate": 8.911659923366897e-06, |
|
"loss": 1.7043, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.507482993197279, |
|
"grad_norm": 0.08992699882848652, |
|
"learning_rate": 8.904646539177852e-06, |
|
"loss": 1.674, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.508843537414966, |
|
"grad_norm": 0.0971954506200858, |
|
"learning_rate": 8.897613408563972e-06, |
|
"loss": 1.6565, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 0.14199688300843188, |
|
"learning_rate": 8.89056056709293e-06, |
|
"loss": 1.4402, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5115646258503401, |
|
"grad_norm": 0.1034458287611268, |
|
"learning_rate": 8.883488050432073e-06, |
|
"loss": 1.6606, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5129251700680272, |
|
"grad_norm": 0.09270052668783225, |
|
"learning_rate": 8.87639589434826e-06, |
|
"loss": 1.7067, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.5142857142857142, |
|
"grad_norm": 0.08063860406008341, |
|
"learning_rate": 8.869284134707659e-06, |
|
"loss": 1.7683, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5156462585034014, |
|
"grad_norm": 0.08700140267251555, |
|
"learning_rate": 8.862152807475584e-06, |
|
"loss": 1.7135, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5170068027210885, |
|
"grad_norm": 0.09374652236807894, |
|
"learning_rate": 8.8550019487163e-06, |
|
"loss": 1.5927, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5183673469387755, |
|
"grad_norm": 0.09628602949564133, |
|
"learning_rate": 8.847831594592851e-06, |
|
"loss": 1.6169, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5197278911564626, |
|
"grad_norm": 0.09133344115171436, |
|
"learning_rate": 8.840641781366867e-06, |
|
"loss": 1.6077, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5210884353741496, |
|
"grad_norm": 0.0898264889709745, |
|
"learning_rate": 8.83343254539839e-06, |
|
"loss": 1.7476, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5224489795918368, |
|
"grad_norm": 0.09463509154870942, |
|
"learning_rate": 8.826203923145687e-06, |
|
"loss": 1.6178, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5238095238095238, |
|
"grad_norm": 0.086933970419064, |
|
"learning_rate": 8.818955951165059e-06, |
|
"loss": 1.6544, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5251700680272109, |
|
"grad_norm": 0.08686966519278672, |
|
"learning_rate": 8.811688666110663e-06, |
|
"loss": 1.7268, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5265306122448979, |
|
"grad_norm": 0.10219001251324068, |
|
"learning_rate": 8.80440210473433e-06, |
|
"loss": 1.6538, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.527891156462585, |
|
"grad_norm": 0.08199431219832622, |
|
"learning_rate": 8.797096303885374e-06, |
|
"loss": 1.6524, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.5292517006802722, |
|
"grad_norm": 0.09053864014656055, |
|
"learning_rate": 8.789771300510397e-06, |
|
"loss": 1.5971, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.5306122448979592, |
|
"grad_norm": 0.08221661934582344, |
|
"learning_rate": 8.782427131653121e-06, |
|
"loss": 1.6643, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5319727891156463, |
|
"grad_norm": 0.08888879198967926, |
|
"learning_rate": 8.77506383445419e-06, |
|
"loss": 1.7855, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.08638631079426494, |
|
"learning_rate": 8.767681446150977e-06, |
|
"loss": 1.8028, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.5346938775510204, |
|
"grad_norm": 0.07669664542559732, |
|
"learning_rate": 8.76028000407741e-06, |
|
"loss": 1.5725, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.5360544217687074, |
|
"grad_norm": 0.098633841165777, |
|
"learning_rate": 8.752859545663766e-06, |
|
"loss": 1.6692, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.5374149659863946, |
|
"grad_norm": 0.09002401371176637, |
|
"learning_rate": 8.745420108436498e-06, |
|
"loss": 1.7636, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5387755102040817, |
|
"grad_norm": 0.0952379865665774, |
|
"learning_rate": 8.737961730018034e-06, |
|
"loss": 1.5664, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.5401360544217687, |
|
"grad_norm": 0.09623958141543322, |
|
"learning_rate": 8.730484448126594e-06, |
|
"loss": 1.5345, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.5414965986394558, |
|
"grad_norm": 0.1502927654981511, |
|
"learning_rate": 8.722988300575992e-06, |
|
"loss": 1.7841, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.5428571428571428, |
|
"grad_norm": 0.09222431588896472, |
|
"learning_rate": 8.71547332527545e-06, |
|
"loss": 1.9559, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 0.0937414921537921, |
|
"learning_rate": 8.707939560229406e-06, |
|
"loss": 1.7022, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.545578231292517, |
|
"grad_norm": 0.11025083403359445, |
|
"learning_rate": 8.700387043537319e-06, |
|
"loss": 1.4365, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.5469387755102041, |
|
"grad_norm": 0.08441282686563067, |
|
"learning_rate": 8.692815813393483e-06, |
|
"loss": 1.6488, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.5482993197278911, |
|
"grad_norm": 0.3399602286969607, |
|
"learning_rate": 8.68522590808682e-06, |
|
"loss": 1.6829, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.5496598639455782, |
|
"grad_norm": 0.0924431892667502, |
|
"learning_rate": 8.677617366000705e-06, |
|
"loss": 1.7404, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.5510204081632653, |
|
"grad_norm": 0.09778253267340173, |
|
"learning_rate": 8.669990225612754e-06, |
|
"loss": 1.7674, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5523809523809524, |
|
"grad_norm": 0.09471837896307483, |
|
"learning_rate": 8.662344525494643e-06, |
|
"loss": 1.6406, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.5537414965986395, |
|
"grad_norm": 0.08997298756417596, |
|
"learning_rate": 8.654680304311908e-06, |
|
"loss": 1.7875, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.5551020408163265, |
|
"grad_norm": 0.08849454584462031, |
|
"learning_rate": 8.646997600823743e-06, |
|
"loss": 1.5942, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.5564625850340136, |
|
"grad_norm": 0.10974758415682516, |
|
"learning_rate": 8.639296453882816e-06, |
|
"loss": 1.5229, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.5578231292517006, |
|
"grad_norm": 0.09669599431293863, |
|
"learning_rate": 8.631576902435063e-06, |
|
"loss": 1.7031, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5591836734693878, |
|
"grad_norm": 0.10337335565704198, |
|
"learning_rate": 8.623838985519498e-06, |
|
"loss": 1.6138, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.5605442176870749, |
|
"grad_norm": 0.08963980024729686, |
|
"learning_rate": 8.616082742268005e-06, |
|
"loss": 1.6527, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.5619047619047619, |
|
"grad_norm": 0.13730489095006465, |
|
"learning_rate": 8.608308211905159e-06, |
|
"loss": 1.5823, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.563265306122449, |
|
"grad_norm": 0.08745105418294691, |
|
"learning_rate": 8.600515433748003e-06, |
|
"loss": 1.6647, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.564625850340136, |
|
"grad_norm": 0.19321095274209943, |
|
"learning_rate": 8.592704447205872e-06, |
|
"loss": 1.5218, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.5659863945578232, |
|
"grad_norm": 0.10518406439497321, |
|
"learning_rate": 8.584875291780178e-06, |
|
"loss": 1.5199, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.5673469387755102, |
|
"grad_norm": 0.09495102640071197, |
|
"learning_rate": 8.577028007064218e-06, |
|
"loss": 1.6623, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.5687074829931973, |
|
"grad_norm": 0.13232264409241218, |
|
"learning_rate": 8.569162632742973e-06, |
|
"loss": 1.606, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.5700680272108843, |
|
"grad_norm": 0.09382708523657708, |
|
"learning_rate": 8.561279208592902e-06, |
|
"loss": 1.6563, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.10154097299186025, |
|
"learning_rate": 8.553377774481748e-06, |
|
"loss": 1.5177, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5727891156462585, |
|
"grad_norm": 0.09008372077918206, |
|
"learning_rate": 8.545458370368336e-06, |
|
"loss": 1.5358, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.5741496598639456, |
|
"grad_norm": 0.11141318964471479, |
|
"learning_rate": 8.53752103630236e-06, |
|
"loss": 1.5975, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.5755102040816327, |
|
"grad_norm": 0.11594947645844504, |
|
"learning_rate": 8.529565812424195e-06, |
|
"loss": 1.5417, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.5768707482993197, |
|
"grad_norm": 0.1115243557715104, |
|
"learning_rate": 8.521592738964689e-06, |
|
"loss": 1.6912, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.5782312925170068, |
|
"grad_norm": 0.10294673717555994, |
|
"learning_rate": 8.513601856244951e-06, |
|
"loss": 1.6883, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5795918367346938, |
|
"grad_norm": 0.12234481799540493, |
|
"learning_rate": 8.505593204676162e-06, |
|
"loss": 1.6903, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.580952380952381, |
|
"grad_norm": 0.1164390253206002, |
|
"learning_rate": 8.497566824759359e-06, |
|
"loss": 1.6433, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.582312925170068, |
|
"grad_norm": 0.10041564805248225, |
|
"learning_rate": 8.489522757085234e-06, |
|
"loss": 1.5482, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.5836734693877551, |
|
"grad_norm": 0.10770970938141446, |
|
"learning_rate": 8.481461042333929e-06, |
|
"loss": 1.6092, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.5850340136054422, |
|
"grad_norm": 0.08821548122605594, |
|
"learning_rate": 8.473381721274832e-06, |
|
"loss": 1.5793, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5863945578231292, |
|
"grad_norm": 0.089232378501521, |
|
"learning_rate": 8.465284834766365e-06, |
|
"loss": 1.6233, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.5877551020408164, |
|
"grad_norm": 0.10250742081774146, |
|
"learning_rate": 8.457170423755786e-06, |
|
"loss": 1.5625, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.5891156462585034, |
|
"grad_norm": 0.1202037568410257, |
|
"learning_rate": 8.449038529278976e-06, |
|
"loss": 1.5843, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.5904761904761905, |
|
"grad_norm": 0.1323622148062186, |
|
"learning_rate": 8.440889192460232e-06, |
|
"loss": 1.808, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.5918367346938775, |
|
"grad_norm": 0.12495144537173114, |
|
"learning_rate": 8.432722454512057e-06, |
|
"loss": 1.9389, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.5931972789115646, |
|
"grad_norm": 0.09699121967438322, |
|
"learning_rate": 8.424538356734957e-06, |
|
"loss": 1.7367, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.5945578231292517, |
|
"grad_norm": 0.09801443283628167, |
|
"learning_rate": 8.416336940517229e-06, |
|
"loss": 1.6276, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.5959183673469388, |
|
"grad_norm": 0.10487807929341668, |
|
"learning_rate": 8.408118247334755e-06, |
|
"loss": 1.5578, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.5972789115646259, |
|
"grad_norm": 0.09364043239294455, |
|
"learning_rate": 8.399882318750785e-06, |
|
"loss": 1.5889, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.5986394557823129, |
|
"grad_norm": 0.10020651855110954, |
|
"learning_rate": 8.391629196415733e-06, |
|
"loss": 1.607, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0899206896336364, |
|
"learning_rate": 8.383358922066965e-06, |
|
"loss": 1.5508, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.601360544217687, |
|
"grad_norm": 0.09009526459055692, |
|
"learning_rate": 8.375071537528587e-06, |
|
"loss": 1.6629, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.6027210884353742, |
|
"grad_norm": 0.09413295136140958, |
|
"learning_rate": 8.366767084711232e-06, |
|
"loss": 1.6568, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.6040816326530613, |
|
"grad_norm": 0.22548893033360673, |
|
"learning_rate": 8.358445605611856e-06, |
|
"loss": 1.7594, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.6054421768707483, |
|
"grad_norm": 0.12757276275079013, |
|
"learning_rate": 8.350107142313513e-06, |
|
"loss": 1.4311, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6068027210884354, |
|
"grad_norm": 0.09965051028904835, |
|
"learning_rate": 8.34175173698515e-06, |
|
"loss": 1.6618, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.6081632653061224, |
|
"grad_norm": 0.09666753861412604, |
|
"learning_rate": 8.333379431881398e-06, |
|
"loss": 1.6729, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.6095238095238096, |
|
"grad_norm": 0.09235423529974252, |
|
"learning_rate": 8.324990269342345e-06, |
|
"loss": 1.7872, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.6108843537414966, |
|
"grad_norm": 0.0969813250335363, |
|
"learning_rate": 8.316584291793337e-06, |
|
"loss": 1.5299, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 0.08468781546097646, |
|
"learning_rate": 8.30816154174475e-06, |
|
"loss": 1.7958, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6136054421768707, |
|
"grad_norm": 0.08812390382328499, |
|
"learning_rate": 8.299722061791788e-06, |
|
"loss": 1.7292, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6149659863945578, |
|
"grad_norm": 0.08809745700239427, |
|
"learning_rate": 8.291265894614253e-06, |
|
"loss": 1.758, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.6163265306122448, |
|
"grad_norm": 0.09490957390276747, |
|
"learning_rate": 8.282793082976343e-06, |
|
"loss": 1.5475, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.617687074829932, |
|
"grad_norm": 0.11060404799058053, |
|
"learning_rate": 8.274303669726427e-06, |
|
"loss": 1.6792, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6190476190476191, |
|
"grad_norm": 0.09540512150005957, |
|
"learning_rate": 8.265797697796831e-06, |
|
"loss": 1.6685, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6204081632653061, |
|
"grad_norm": 0.12873788369707956, |
|
"learning_rate": 8.257275210203621e-06, |
|
"loss": 1.5043, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6217687074829932, |
|
"grad_norm": 0.6499840705042578, |
|
"learning_rate": 8.248736250046389e-06, |
|
"loss": 1.7548, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6231292517006802, |
|
"grad_norm": 0.08484061508587025, |
|
"learning_rate": 8.240180860508027e-06, |
|
"loss": 1.8159, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6244897959183674, |
|
"grad_norm": 0.09906794908825851, |
|
"learning_rate": 8.231609084854513e-06, |
|
"loss": 1.7116, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6258503401360545, |
|
"grad_norm": 0.0939998149984381, |
|
"learning_rate": 8.223020966434695e-06, |
|
"loss": 1.7448, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6272108843537415, |
|
"grad_norm": 0.09983102806108725, |
|
"learning_rate": 8.214416548680065e-06, |
|
"loss": 1.7284, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6285714285714286, |
|
"grad_norm": 0.1276929064149538, |
|
"learning_rate": 8.205795875104549e-06, |
|
"loss": 1.5541, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.6299319727891156, |
|
"grad_norm": 0.0971007724405045, |
|
"learning_rate": 8.197158989304277e-06, |
|
"loss": 1.749, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.6312925170068027, |
|
"grad_norm": 0.09931797410533018, |
|
"learning_rate": 8.188505934957368e-06, |
|
"loss": 1.7908, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.6326530612244898, |
|
"grad_norm": 0.10420469743883767, |
|
"learning_rate": 8.179836755823707e-06, |
|
"loss": 1.7156, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6340136054421769, |
|
"grad_norm": 0.09471973183090822, |
|
"learning_rate": 8.171151495744726e-06, |
|
"loss": 1.6598, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.6353741496598639, |
|
"grad_norm": 0.08008463828970452, |
|
"learning_rate": 8.162450198643184e-06, |
|
"loss": 1.8476, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.636734693877551, |
|
"grad_norm": 0.09336869405905367, |
|
"learning_rate": 8.153732908522933e-06, |
|
"loss": 1.677, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.638095238095238, |
|
"grad_norm": 0.10567938348565759, |
|
"learning_rate": 8.144999669468714e-06, |
|
"loss": 1.6987, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.6394557823129252, |
|
"grad_norm": 0.08460548198303047, |
|
"learning_rate": 8.136250525645916e-06, |
|
"loss": 1.8206, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6408163265306123, |
|
"grad_norm": 0.0993816872028852, |
|
"learning_rate": 8.127485521300366e-06, |
|
"loss": 1.6618, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.6421768707482993, |
|
"grad_norm": 0.1043492135258874, |
|
"learning_rate": 8.118704700758103e-06, |
|
"loss": 1.641, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.6435374149659864, |
|
"grad_norm": 0.0880711093010387, |
|
"learning_rate": 8.109908108425142e-06, |
|
"loss": 1.8376, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.6448979591836734, |
|
"grad_norm": 0.09271039929379037, |
|
"learning_rate": 8.101095788787266e-06, |
|
"loss": 1.6914, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.6462585034013606, |
|
"grad_norm": 0.10199949996500506, |
|
"learning_rate": 8.092267786409788e-06, |
|
"loss": 1.6264, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6476190476190476, |
|
"grad_norm": 0.09927764715965239, |
|
"learning_rate": 8.08342414593734e-06, |
|
"loss": 1.7495, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.6489795918367347, |
|
"grad_norm": 0.11011814904081185, |
|
"learning_rate": 8.07456491209363e-06, |
|
"loss": 1.8044, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.6503401360544218, |
|
"grad_norm": 0.10592895331787452, |
|
"learning_rate": 8.065690129681224e-06, |
|
"loss": 1.5279, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.6517006802721088, |
|
"grad_norm": 0.09546781848020944, |
|
"learning_rate": 8.056799843581326e-06, |
|
"loss": 1.5599, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 0.09305244725756194, |
|
"learning_rate": 8.04789409875354e-06, |
|
"loss": 1.7328, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.654421768707483, |
|
"grad_norm": 0.10356673246511783, |
|
"learning_rate": 8.038972940235647e-06, |
|
"loss": 1.6317, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.6557823129251701, |
|
"grad_norm": 0.08355158958046802, |
|
"learning_rate": 8.030036413143382e-06, |
|
"loss": 1.823, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.6571428571428571, |
|
"grad_norm": 0.13429081910577273, |
|
"learning_rate": 8.021084562670193e-06, |
|
"loss": 1.765, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.6585034013605442, |
|
"grad_norm": 0.10977848927929611, |
|
"learning_rate": 8.012117434087032e-06, |
|
"loss": 1.7983, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.6598639455782312, |
|
"grad_norm": 0.10127247770991282, |
|
"learning_rate": 8.003135072742106e-06, |
|
"loss": 1.7146, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6612244897959184, |
|
"grad_norm": 0.08420649737520919, |
|
"learning_rate": 7.994137524060656e-06, |
|
"loss": 1.7273, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.6625850340136055, |
|
"grad_norm": 0.12998243453204658, |
|
"learning_rate": 7.985124833544737e-06, |
|
"loss": 1.7116, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.6639455782312925, |
|
"grad_norm": 0.12572177154996375, |
|
"learning_rate": 7.976097046772971e-06, |
|
"loss": 1.5875, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.6653061224489796, |
|
"grad_norm": 0.09257333856558611, |
|
"learning_rate": 7.967054209400325e-06, |
|
"loss": 1.6259, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.15599333098996487, |
|
"learning_rate": 7.95799636715788e-06, |
|
"loss": 1.6602, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6680272108843538, |
|
"grad_norm": 0.09770606447991517, |
|
"learning_rate": 7.948923565852597e-06, |
|
"loss": 1.6867, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.6693877551020408, |
|
"grad_norm": 0.1303849167483497, |
|
"learning_rate": 7.939835851367097e-06, |
|
"loss": 1.6583, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.6707482993197279, |
|
"grad_norm": 0.10864319676746689, |
|
"learning_rate": 7.930733269659405e-06, |
|
"loss": 1.6832, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.672108843537415, |
|
"grad_norm": 0.11083699474291044, |
|
"learning_rate": 7.921615866762743e-06, |
|
"loss": 1.7117, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.673469387755102, |
|
"grad_norm": 0.13148294366762545, |
|
"learning_rate": 7.912483688785281e-06, |
|
"loss": 1.5234, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.6748299319727891, |
|
"grad_norm": 0.11365843655270903, |
|
"learning_rate": 7.903336781909911e-06, |
|
"loss": 1.7783, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.6761904761904762, |
|
"grad_norm": 0.12277794049039219, |
|
"learning_rate": 7.89417519239401e-06, |
|
"loss": 1.6908, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.6775510204081633, |
|
"grad_norm": 0.11413949336950495, |
|
"learning_rate": 7.884998966569206e-06, |
|
"loss": 1.5654, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.6789115646258503, |
|
"grad_norm": 0.10494845825770326, |
|
"learning_rate": 7.87580815084115e-06, |
|
"loss": 1.5383, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 0.09020097927859841, |
|
"learning_rate": 7.866602791689272e-06, |
|
"loss": 1.596, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6816326530612244, |
|
"grad_norm": 0.08585696598298209, |
|
"learning_rate": 7.857382935666554e-06, |
|
"loss": 1.7307, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.6829931972789116, |
|
"grad_norm": 0.1010505486947337, |
|
"learning_rate": 7.848148629399287e-06, |
|
"loss": 1.6699, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.6843537414965987, |
|
"grad_norm": 0.11201229130009283, |
|
"learning_rate": 7.838899919586841e-06, |
|
"loss": 1.6521, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 0.10009251853964633, |
|
"learning_rate": 7.82963685300143e-06, |
|
"loss": 1.6464, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.6870748299319728, |
|
"grad_norm": 0.10676770701676057, |
|
"learning_rate": 7.820359476487866e-06, |
|
"loss": 1.4472, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.6884353741496598, |
|
"grad_norm": 0.10987902455707375, |
|
"learning_rate": 7.811067836963337e-06, |
|
"loss": 1.6637, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.689795918367347, |
|
"grad_norm": 0.10593286251613945, |
|
"learning_rate": 7.801761981417152e-06, |
|
"loss": 1.714, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.691156462585034, |
|
"grad_norm": 0.11171983777047709, |
|
"learning_rate": 7.792441956910523e-06, |
|
"loss": 1.5948, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.6925170068027211, |
|
"grad_norm": 0.10208550757531555, |
|
"learning_rate": 7.783107810576306e-06, |
|
"loss": 1.7267, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.6938775510204082, |
|
"grad_norm": 0.11315560578865806, |
|
"learning_rate": 7.773759589618782e-06, |
|
"loss": 1.5995, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6952380952380952, |
|
"grad_norm": 0.10501036944532048, |
|
"learning_rate": 7.764397341313403e-06, |
|
"loss": 1.4624, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.6965986394557823, |
|
"grad_norm": 0.08619282794989483, |
|
"learning_rate": 7.755021113006567e-06, |
|
"loss": 1.7983, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.6979591836734694, |
|
"grad_norm": 0.09109224074659802, |
|
"learning_rate": 7.745630952115365e-06, |
|
"loss": 1.6753, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.6993197278911565, |
|
"grad_norm": 0.11332860880884088, |
|
"learning_rate": 7.736226906127344e-06, |
|
"loss": 1.7472, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.7006802721088435, |
|
"grad_norm": 0.11046061973302813, |
|
"learning_rate": 7.726809022600284e-06, |
|
"loss": 1.6219, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7020408163265306, |
|
"grad_norm": 0.09720494400811149, |
|
"learning_rate": 7.71737734916193e-06, |
|
"loss": 1.7941, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.7034013605442176, |
|
"grad_norm": 0.13391377607878455, |
|
"learning_rate": 7.70793193350977e-06, |
|
"loss": 1.7904, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.7047619047619048, |
|
"grad_norm": 0.12625091217599146, |
|
"learning_rate": 7.69847282341079e-06, |
|
"loss": 1.663, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.7061224489795919, |
|
"grad_norm": 0.12406607142823292, |
|
"learning_rate": 7.68900006670123e-06, |
|
"loss": 1.6766, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.7074829931972789, |
|
"grad_norm": 0.09475581974666519, |
|
"learning_rate": 7.679513711286338e-06, |
|
"loss": 1.7449, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.708843537414966, |
|
"grad_norm": 0.12489019591501085, |
|
"learning_rate": 7.670013805140143e-06, |
|
"loss": 1.7526, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.710204081632653, |
|
"grad_norm": 0.09091443476585943, |
|
"learning_rate": 7.660500396305194e-06, |
|
"loss": 1.66, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.7115646258503401, |
|
"grad_norm": 0.0843896278465122, |
|
"learning_rate": 7.650973532892325e-06, |
|
"loss": 1.5741, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.7129251700680272, |
|
"grad_norm": 0.12842839378581072, |
|
"learning_rate": 7.641433263080418e-06, |
|
"loss": 1.5639, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.12573336181400996, |
|
"learning_rate": 7.631879635116152e-06, |
|
"loss": 1.5261, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7156462585034014, |
|
"grad_norm": 0.08357817960907306, |
|
"learning_rate": 7.622312697313754e-06, |
|
"loss": 1.624, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.7170068027210884, |
|
"grad_norm": 0.08826301097458839, |
|
"learning_rate": 7.612732498054769e-06, |
|
"loss": 1.7131, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.7183673469387755, |
|
"grad_norm": 0.1030553177752236, |
|
"learning_rate": 7.603139085787801e-06, |
|
"loss": 1.76, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.7197278911564626, |
|
"grad_norm": 0.10248942600565085, |
|
"learning_rate": 7.5935325090282785e-06, |
|
"loss": 1.6537, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.7210884353741497, |
|
"grad_norm": 0.10935378440777248, |
|
"learning_rate": 7.583912816358203e-06, |
|
"loss": 1.7441, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7224489795918367, |
|
"grad_norm": 0.10474278332332895, |
|
"learning_rate": 7.574280056425907e-06, |
|
"loss": 1.5672, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.7238095238095238, |
|
"grad_norm": 0.10337823669221702, |
|
"learning_rate": 7.564634277945803e-06, |
|
"loss": 1.7301, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.7251700680272108, |
|
"grad_norm": 0.1021232974428954, |
|
"learning_rate": 7.554975529698143e-06, |
|
"loss": 1.8401, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.726530612244898, |
|
"grad_norm": 0.10873875060914909, |
|
"learning_rate": 7.54530386052877e-06, |
|
"loss": 1.6832, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.7278911564625851, |
|
"grad_norm": 0.13142427274605614, |
|
"learning_rate": 7.5356193193488655e-06, |
|
"loss": 1.6824, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.7292517006802721, |
|
"grad_norm": 0.10578967924155344, |
|
"learning_rate": 7.525921955134714e-06, |
|
"loss": 1.6128, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.7306122448979592, |
|
"grad_norm": 0.1428684712147148, |
|
"learning_rate": 7.5162118169274424e-06, |
|
"loss": 1.6909, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.7319727891156462, |
|
"grad_norm": 0.12364317671003774, |
|
"learning_rate": 7.506488953832779e-06, |
|
"loss": 1.5894, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.7333333333333333, |
|
"grad_norm": 0.12070521447402052, |
|
"learning_rate": 7.4967534150208066e-06, |
|
"loss": 1.6316, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 0.11805699210709567, |
|
"learning_rate": 7.487005249725705e-06, |
|
"loss": 1.773, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7360544217687075, |
|
"grad_norm": 0.10518788665559951, |
|
"learning_rate": 7.477244507245517e-06, |
|
"loss": 1.5496, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.7374149659863946, |
|
"grad_norm": 0.10115679657141723, |
|
"learning_rate": 7.4674712369418815e-06, |
|
"loss": 1.6332, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.7387755102040816, |
|
"grad_norm": 0.13249620123305414, |
|
"learning_rate": 7.457685488239799e-06, |
|
"loss": 1.5464, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.7401360544217687, |
|
"grad_norm": 0.10912963076663591, |
|
"learning_rate": 7.44788731062737e-06, |
|
"loss": 1.6537, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.7414965986394558, |
|
"grad_norm": 0.0903784233977396, |
|
"learning_rate": 7.438076753655557e-06, |
|
"loss": 1.7509, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.7428571428571429, |
|
"grad_norm": 0.11534137833637581, |
|
"learning_rate": 7.4282538669379186e-06, |
|
"loss": 1.8423, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.7442176870748299, |
|
"grad_norm": 0.10673090155703877, |
|
"learning_rate": 7.418418700150373e-06, |
|
"loss": 1.5147, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.745578231292517, |
|
"grad_norm": 0.11078677610408977, |
|
"learning_rate": 7.408571303030939e-06, |
|
"loss": 1.598, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.746938775510204, |
|
"grad_norm": 0.20089268434188828, |
|
"learning_rate": 7.398711725379486e-06, |
|
"loss": 1.6854, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 0.11465528276050316, |
|
"learning_rate": 7.388840017057479e-06, |
|
"loss": 1.7166, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7496598639455783, |
|
"grad_norm": 0.17554597081264026, |
|
"learning_rate": 7.378956227987738e-06, |
|
"loss": 1.7621, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.7510204081632653, |
|
"grad_norm": 0.12001150745168689, |
|
"learning_rate": 7.369060408154166e-06, |
|
"loss": 1.6292, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.7510204081632653, |
|
"eval_loss": 1.695604681968689, |
|
"eval_runtime": 76.6065, |
|
"eval_samples_per_second": 53.168, |
|
"eval_steps_per_second": 6.657, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.7523809523809524, |
|
"grad_norm": 0.08810105735536967, |
|
"learning_rate": 7.35915260760152e-06, |
|
"loss": 1.7169, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.7537414965986394, |
|
"grad_norm": 0.1670593915051014, |
|
"learning_rate": 7.349232876435135e-06, |
|
"loss": 1.5579, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.7551020408163265, |
|
"grad_norm": 0.11454927724613997, |
|
"learning_rate": 7.3393012648206865e-06, |
|
"loss": 1.7283, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.7564625850340136, |
|
"grad_norm": 0.969049897753043, |
|
"learning_rate": 7.329357822983929e-06, |
|
"loss": 1.7205, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.7578231292517007, |
|
"grad_norm": 0.14156929561935405, |
|
"learning_rate": 7.319402601210448e-06, |
|
"loss": 1.6642, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.7591836734693878, |
|
"grad_norm": 0.0984076585901956, |
|
"learning_rate": 7.3094356498453955e-06, |
|
"loss": 1.5543, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.7605442176870748, |
|
"grad_norm": 0.1065633653807634, |
|
"learning_rate": 7.299457019293248e-06, |
|
"loss": 1.6024, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 0.10385154170413329, |
|
"learning_rate": 7.289466760017543e-06, |
|
"loss": 1.6121, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.763265306122449, |
|
"grad_norm": 0.09706098665043214, |
|
"learning_rate": 7.279464922540626e-06, |
|
"loss": 1.6291, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.7646258503401361, |
|
"grad_norm": 0.1005953850437904, |
|
"learning_rate": 7.269451557443396e-06, |
|
"loss": 1.5871, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.7659863945578231, |
|
"grad_norm": 0.12409919864299088, |
|
"learning_rate": 7.2594267153650525e-06, |
|
"loss": 1.8507, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.7673469387755102, |
|
"grad_norm": 0.17577412545797552, |
|
"learning_rate": 7.249390447002827e-06, |
|
"loss": 1.6741, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.7687074829931972, |
|
"grad_norm": 0.10625351066460387, |
|
"learning_rate": 7.239342803111744e-06, |
|
"loss": 1.6995, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.7700680272108843, |
|
"grad_norm": 0.10141675080832888, |
|
"learning_rate": 7.229283834504351e-06, |
|
"loss": 1.7018, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.7714285714285715, |
|
"grad_norm": 0.10351854356175742, |
|
"learning_rate": 7.21921359205047e-06, |
|
"loss": 1.6347, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.7727891156462585, |
|
"grad_norm": 0.097783522808633, |
|
"learning_rate": 7.209132126676934e-06, |
|
"loss": 1.63, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.7741496598639456, |
|
"grad_norm": 0.1032937811881391, |
|
"learning_rate": 7.199039489367334e-06, |
|
"loss": 1.7088, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.7755102040816326, |
|
"grad_norm": 0.12703981258728564, |
|
"learning_rate": 7.188935731161756e-06, |
|
"loss": 1.5488, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7768707482993197, |
|
"grad_norm": 0.09603678350259663, |
|
"learning_rate": 7.178820903156532e-06, |
|
"loss": 1.7006, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.7782312925170068, |
|
"grad_norm": 0.10137848978882107, |
|
"learning_rate": 7.168695056503967e-06, |
|
"loss": 1.5343, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.7795918367346939, |
|
"grad_norm": 0.09168277145084121, |
|
"learning_rate": 7.1585582424121005e-06, |
|
"loss": 1.7654, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.780952380952381, |
|
"grad_norm": 0.10323000544095898, |
|
"learning_rate": 7.148410512144425e-06, |
|
"loss": 1.6613, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.782312925170068, |
|
"grad_norm": 0.09038228211904249, |
|
"learning_rate": 7.138251917019645e-06, |
|
"loss": 1.7182, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7836734693877551, |
|
"grad_norm": 0.12949706977298758, |
|
"learning_rate": 7.1280825084114065e-06, |
|
"loss": 1.5075, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.7850340136054422, |
|
"grad_norm": 0.10182930523451113, |
|
"learning_rate": 7.117902337748045e-06, |
|
"loss": 1.5249, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.7863945578231293, |
|
"grad_norm": 0.1014784602992118, |
|
"learning_rate": 7.107711456512316e-06, |
|
"loss": 1.5699, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.7877551020408163, |
|
"grad_norm": 0.08920528739055578, |
|
"learning_rate": 7.097509916241145e-06, |
|
"loss": 1.7604, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.7891156462585034, |
|
"grad_norm": 0.14771070265391437, |
|
"learning_rate": 7.08729776852536e-06, |
|
"loss": 1.8294, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.7904761904761904, |
|
"grad_norm": 0.10437668016493229, |
|
"learning_rate": 7.0770750650094335e-06, |
|
"loss": 1.5263, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.7918367346938775, |
|
"grad_norm": 0.09453693869246346, |
|
"learning_rate": 7.066841857391215e-06, |
|
"loss": 1.7625, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.7931972789115647, |
|
"grad_norm": 0.1024749155019567, |
|
"learning_rate": 7.056598197421686e-06, |
|
"loss": 1.6953, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.7945578231292517, |
|
"grad_norm": 0.09967417247994723, |
|
"learning_rate": 7.046344136904675e-06, |
|
"loss": 1.5067, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.7959183673469388, |
|
"grad_norm": 0.10667510272444436, |
|
"learning_rate": 7.036079727696618e-06, |
|
"loss": 1.6966, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.7972789115646258, |
|
"grad_norm": 0.13727868268941767, |
|
"learning_rate": 7.025805021706276e-06, |
|
"loss": 1.6554, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.7986394557823129, |
|
"grad_norm": 0.09241703194497365, |
|
"learning_rate": 7.0155200708944915e-06, |
|
"loss": 1.7987, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.11984006315877656, |
|
"learning_rate": 7.005224927273913e-06, |
|
"loss": 1.7059, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.8013605442176871, |
|
"grad_norm": 0.12763094516083248, |
|
"learning_rate": 6.9949196429087355e-06, |
|
"loss": 1.8147, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.8027210884353742, |
|
"grad_norm": 0.13522443685391508, |
|
"learning_rate": 6.984604269914437e-06, |
|
"loss": 1.63, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8040816326530612, |
|
"grad_norm": 0.10116021604335325, |
|
"learning_rate": 6.974278860457515e-06, |
|
"loss": 1.5963, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.8054421768707483, |
|
"grad_norm": 0.10234671686214797, |
|
"learning_rate": 6.963943466755225e-06, |
|
"loss": 1.7491, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.8068027210884354, |
|
"grad_norm": 0.09993833464748478, |
|
"learning_rate": 6.953598141075315e-06, |
|
"loss": 1.8742, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.8081632653061225, |
|
"grad_norm": 0.10194114128873782, |
|
"learning_rate": 6.943242935735757e-06, |
|
"loss": 1.8295, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.8095238095238095, |
|
"grad_norm": 0.11091877191061177, |
|
"learning_rate": 6.932877903104487e-06, |
|
"loss": 1.7282, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.8108843537414966, |
|
"grad_norm": 0.11885618706334389, |
|
"learning_rate": 6.922503095599142e-06, |
|
"loss": 1.7013, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.8122448979591836, |
|
"grad_norm": 0.128832076499377, |
|
"learning_rate": 6.912118565686789e-06, |
|
"loss": 1.6604, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.8136054421768707, |
|
"grad_norm": 0.149361446054299, |
|
"learning_rate": 6.901724365883665e-06, |
|
"loss": 1.5922, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.8149659863945579, |
|
"grad_norm": 0.10349101338251292, |
|
"learning_rate": 6.89132054875491e-06, |
|
"loss": 1.7742, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.10814162257394357, |
|
"learning_rate": 6.8809071669142946e-06, |
|
"loss": 1.6099, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.817687074829932, |
|
"grad_norm": 0.09463224320256956, |
|
"learning_rate": 6.870484273023967e-06, |
|
"loss": 1.5986, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.819047619047619, |
|
"grad_norm": 0.10081423766781461, |
|
"learning_rate": 6.8600519197941725e-06, |
|
"loss": 1.7488, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.8204081632653061, |
|
"grad_norm": 0.10845349706729722, |
|
"learning_rate": 6.849610159983003e-06, |
|
"loss": 1.6419, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.8217687074829932, |
|
"grad_norm": 0.1220236426902432, |
|
"learning_rate": 6.839159046396109e-06, |
|
"loss": 1.6193, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.8231292517006803, |
|
"grad_norm": 0.11987974076038253, |
|
"learning_rate": 6.828698631886455e-06, |
|
"loss": 1.6836, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.8244897959183674, |
|
"grad_norm": 0.12488732229202766, |
|
"learning_rate": 6.8182289693540375e-06, |
|
"loss": 1.6057, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.8258503401360544, |
|
"grad_norm": 0.10061110967809365, |
|
"learning_rate": 6.807750111745619e-06, |
|
"loss": 1.6481, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.8272108843537415, |
|
"grad_norm": 0.10585066880846151, |
|
"learning_rate": 6.797262112054469e-06, |
|
"loss": 1.6665, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.8285714285714286, |
|
"grad_norm": 0.10134141638102989, |
|
"learning_rate": 6.786765023320085e-06, |
|
"loss": 1.5092, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.8299319727891157, |
|
"grad_norm": 0.11227140982751639, |
|
"learning_rate": 6.776258898627932e-06, |
|
"loss": 1.7522, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.8312925170068027, |
|
"grad_norm": 0.08967360374579285, |
|
"learning_rate": 6.765743791109172e-06, |
|
"loss": 1.7738, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.8326530612244898, |
|
"grad_norm": 0.12577528209737293, |
|
"learning_rate": 6.755219753940389e-06, |
|
"loss": 1.6958, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.8340136054421768, |
|
"grad_norm": 0.1026220373525454, |
|
"learning_rate": 6.744686840343333e-06, |
|
"loss": 1.7081, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.8353741496598639, |
|
"grad_norm": 0.1616576311932069, |
|
"learning_rate": 6.734145103584638e-06, |
|
"loss": 1.7878, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.8367346938775511, |
|
"grad_norm": 0.11606031099035814, |
|
"learning_rate": 6.72359459697556e-06, |
|
"loss": 1.704, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.8380952380952381, |
|
"grad_norm": 0.11405134687997359, |
|
"learning_rate": 6.713035373871711e-06, |
|
"loss": 1.6157, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.8394557823129252, |
|
"grad_norm": 0.11855263544564604, |
|
"learning_rate": 6.702467487672771e-06, |
|
"loss": 1.7325, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.8408163265306122, |
|
"grad_norm": 0.10438275658978799, |
|
"learning_rate": 6.691890991822243e-06, |
|
"loss": 1.6522, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.8421768707482993, |
|
"grad_norm": 0.10374577213787556, |
|
"learning_rate": 6.681305939807165e-06, |
|
"loss": 1.6307, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.8435374149659864, |
|
"grad_norm": 0.09336351222681238, |
|
"learning_rate": 6.670712385157846e-06, |
|
"loss": 1.5821, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8448979591836735, |
|
"grad_norm": 0.12997638800995778, |
|
"learning_rate": 6.660110381447593e-06, |
|
"loss": 1.672, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.8462585034013606, |
|
"grad_norm": 0.09931623670142671, |
|
"learning_rate": 6.649499982292441e-06, |
|
"loss": 1.6305, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.8476190476190476, |
|
"grad_norm": 0.1164641533977229, |
|
"learning_rate": 6.638881241350884e-06, |
|
"loss": 1.6891, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.8489795918367347, |
|
"grad_norm": 0.09080847058543648, |
|
"learning_rate": 6.628254212323601e-06, |
|
"loss": 1.5685, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.8503401360544217, |
|
"grad_norm": 0.09554345130938086, |
|
"learning_rate": 6.617618948953186e-06, |
|
"loss": 1.8238, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.8517006802721089, |
|
"grad_norm": 0.11850979230452785, |
|
"learning_rate": 6.606975505023874e-06, |
|
"loss": 1.5686, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.8530612244897959, |
|
"grad_norm": 0.11391698206139707, |
|
"learning_rate": 6.596323934361268e-06, |
|
"loss": 1.6122, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.854421768707483, |
|
"grad_norm": 0.1259510837772197, |
|
"learning_rate": 6.5856642908320745e-06, |
|
"loss": 1.5638, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.85578231292517, |
|
"grad_norm": 0.3467765021073813, |
|
"learning_rate": 6.574996628343824e-06, |
|
"loss": 1.7503, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.11342921408966115, |
|
"learning_rate": 6.564321000844598e-06, |
|
"loss": 1.6653, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.8585034013605443, |
|
"grad_norm": 0.10000357784567925, |
|
"learning_rate": 6.553637462322759e-06, |
|
"loss": 1.5783, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.8598639455782313, |
|
"grad_norm": 0.1067459993698176, |
|
"learning_rate": 6.5429460668066825e-06, |
|
"loss": 1.7222, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.8612244897959184, |
|
"grad_norm": 0.1669410376966875, |
|
"learning_rate": 6.5322468683644665e-06, |
|
"loss": 1.6325, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.8625850340136054, |
|
"grad_norm": 0.107409115579869, |
|
"learning_rate": 6.5215399211036815e-06, |
|
"loss": 1.5369, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.8639455782312925, |
|
"grad_norm": 0.1076970404147157, |
|
"learning_rate": 6.510825279171077e-06, |
|
"loss": 1.7722, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.8653061224489796, |
|
"grad_norm": 0.15949100738404212, |
|
"learning_rate": 6.5001029967523195e-06, |
|
"loss": 1.5295, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.8666666666666667, |
|
"grad_norm": 0.13488187893377007, |
|
"learning_rate": 6.489373128071714e-06, |
|
"loss": 1.6053, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.8680272108843538, |
|
"grad_norm": 0.1117979139478637, |
|
"learning_rate": 6.4786357273919296e-06, |
|
"loss": 1.6219, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.8693877551020408, |
|
"grad_norm": 0.11161396243370707, |
|
"learning_rate": 6.467890849013728e-06, |
|
"loss": 1.6193, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.8707482993197279, |
|
"grad_norm": 0.10619453607937132, |
|
"learning_rate": 6.4571385472756835e-06, |
|
"loss": 1.6587, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8721088435374149, |
|
"grad_norm": 0.1092897734952719, |
|
"learning_rate": 6.446378876553914e-06, |
|
"loss": 1.5463, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.8734693877551021, |
|
"grad_norm": 0.10030800864141241, |
|
"learning_rate": 6.4356118912618025e-06, |
|
"loss": 1.6678, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.8748299319727891, |
|
"grad_norm": 0.10968908293389731, |
|
"learning_rate": 6.424837645849724e-06, |
|
"loss": 1.6558, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.8761904761904762, |
|
"grad_norm": 0.10586492564834864, |
|
"learning_rate": 6.41405619480477e-06, |
|
"loss": 1.6116, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.8775510204081632, |
|
"grad_norm": 0.10414471406609932, |
|
"learning_rate": 6.403267592650466e-06, |
|
"loss": 1.5987, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.8789115646258503, |
|
"grad_norm": 0.11102655800428667, |
|
"learning_rate": 6.39247189394651e-06, |
|
"loss": 1.5676, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.8802721088435375, |
|
"grad_norm": 0.1414944802203078, |
|
"learning_rate": 6.381669153288485e-06, |
|
"loss": 1.5632, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.8816326530612245, |
|
"grad_norm": 0.11327036672893112, |
|
"learning_rate": 6.370859425307583e-06, |
|
"loss": 1.6175, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.8829931972789116, |
|
"grad_norm": 0.10646525745122841, |
|
"learning_rate": 6.360042764670337e-06, |
|
"loss": 1.6644, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 0.13888676328597274, |
|
"learning_rate": 6.349219226078338e-06, |
|
"loss": 1.707, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8857142857142857, |
|
"grad_norm": 0.12373888508470542, |
|
"learning_rate": 6.3383888642679585e-06, |
|
"loss": 1.562, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.8870748299319728, |
|
"grad_norm": 0.1150370310262787, |
|
"learning_rate": 6.327551734010079e-06, |
|
"loss": 1.5981, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.8884353741496599, |
|
"grad_norm": 0.1333929036077325, |
|
"learning_rate": 6.3167078901098064e-06, |
|
"loss": 1.6216, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.889795918367347, |
|
"grad_norm": 0.09937346566987916, |
|
"learning_rate": 6.305857387406204e-06, |
|
"loss": 1.7385, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.891156462585034, |
|
"grad_norm": 0.10542763073830541, |
|
"learning_rate": 6.295000280772004e-06, |
|
"loss": 1.5687, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.8925170068027211, |
|
"grad_norm": 0.11666549211549145, |
|
"learning_rate": 6.2841366251133405e-06, |
|
"loss": 1.674, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.8938775510204081, |
|
"grad_norm": 0.1010611811906902, |
|
"learning_rate": 6.273266475369466e-06, |
|
"loss": 1.8506, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.8952380952380953, |
|
"grad_norm": 0.12229341089698531, |
|
"learning_rate": 6.262389886512475e-06, |
|
"loss": 1.6744, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.8965986394557823, |
|
"grad_norm": 0.1127681300190243, |
|
"learning_rate": 6.251506913547021e-06, |
|
"loss": 1.5399, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 0.12397997048973994, |
|
"learning_rate": 6.240617611510049e-06, |
|
"loss": 1.5651, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.8993197278911564, |
|
"grad_norm": 0.11295244013894742, |
|
"learning_rate": 6.229722035470509e-06, |
|
"loss": 1.6198, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.9006802721088435, |
|
"grad_norm": 0.1075660765121038, |
|
"learning_rate": 6.21882024052908e-06, |
|
"loss": 1.6066, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.9020408163265307, |
|
"grad_norm": 0.1202834632226026, |
|
"learning_rate": 6.2079122818178885e-06, |
|
"loss": 1.7857, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.9034013605442177, |
|
"grad_norm": 0.10694066024862514, |
|
"learning_rate": 6.196998214500236e-06, |
|
"loss": 1.7661, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.9047619047619048, |
|
"grad_norm": 0.10432718122763324, |
|
"learning_rate": 6.186078093770312e-06, |
|
"loss": 1.5971, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.9061224489795918, |
|
"grad_norm": 0.11861992306540457, |
|
"learning_rate": 6.1751519748529235e-06, |
|
"loss": 1.5868, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.9074829931972789, |
|
"grad_norm": 0.10675270335750815, |
|
"learning_rate": 6.164219913003208e-06, |
|
"loss": 1.7003, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.908843537414966, |
|
"grad_norm": 0.11325117184223481, |
|
"learning_rate": 6.153281963506359e-06, |
|
"loss": 1.5944, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.9102040816326531, |
|
"grad_norm": 0.10366546297175218, |
|
"learning_rate": 6.142338181677344e-06, |
|
"loss": 1.8128, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.9115646258503401, |
|
"grad_norm": 0.10444277193505327, |
|
"learning_rate": 6.131388622860627e-06, |
|
"loss": 1.8767, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9129251700680272, |
|
"grad_norm": 0.10972076945530858, |
|
"learning_rate": 6.1204333424298835e-06, |
|
"loss": 1.7049, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 0.10751143314986424, |
|
"learning_rate": 6.10947239578773e-06, |
|
"loss": 1.7047, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.9156462585034013, |
|
"grad_norm": 0.10664892548031508, |
|
"learning_rate": 6.098505838365431e-06, |
|
"loss": 1.7452, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.9170068027210885, |
|
"grad_norm": 0.10573801367962304, |
|
"learning_rate": 6.087533725622631e-06, |
|
"loss": 1.6404, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.9183673469387755, |
|
"grad_norm": 0.11119140472745474, |
|
"learning_rate": 6.076556113047066e-06, |
|
"loss": 1.7246, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9197278911564626, |
|
"grad_norm": 0.09707326009557425, |
|
"learning_rate": 6.065573056154289e-06, |
|
"loss": 1.6736, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.9210884353741496, |
|
"grad_norm": 0.14286264459704873, |
|
"learning_rate": 6.05458461048738e-06, |
|
"loss": 1.7604, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.9224489795918367, |
|
"grad_norm": 0.10186789751886853, |
|
"learning_rate": 6.043590831616677e-06, |
|
"loss": 1.52, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.9238095238095239, |
|
"grad_norm": 0.08610978660077238, |
|
"learning_rate": 6.032591775139483e-06, |
|
"loss": 1.6948, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.9251700680272109, |
|
"grad_norm": 0.14039977263434622, |
|
"learning_rate": 6.0215874966797935e-06, |
|
"loss": 1.7652, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.926530612244898, |
|
"grad_norm": 0.09777556431038441, |
|
"learning_rate": 6.0105780518880156e-06, |
|
"loss": 1.5695, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.927891156462585, |
|
"grad_norm": 0.10076506193217513, |
|
"learning_rate": 5.999563496440678e-06, |
|
"loss": 1.5797, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.9292517006802721, |
|
"grad_norm": 0.13645736281296353, |
|
"learning_rate": 5.988543886040157e-06, |
|
"loss": 1.6124, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.9306122448979591, |
|
"grad_norm": 0.18938296093195647, |
|
"learning_rate": 5.977519276414393e-06, |
|
"loss": 1.7377, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.9319727891156463, |
|
"grad_norm": 0.11671931887431021, |
|
"learning_rate": 5.966489723316609e-06, |
|
"loss": 1.57, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.13192750753599222, |
|
"learning_rate": 5.955455282525027e-06, |
|
"loss": 1.5089, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.9346938775510204, |
|
"grad_norm": 0.1279138939941876, |
|
"learning_rate": 5.944416009842585e-06, |
|
"loss": 1.4862, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.9360544217687075, |
|
"grad_norm": 0.1368328421716675, |
|
"learning_rate": 5.933371961096661e-06, |
|
"loss": 1.7591, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.9374149659863945, |
|
"grad_norm": 0.09459226556926907, |
|
"learning_rate": 5.92232319213878e-06, |
|
"loss": 1.7315, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.9387755102040817, |
|
"grad_norm": 0.0944762256396259, |
|
"learning_rate": 5.9112697588443456e-06, |
|
"loss": 1.6664, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9401360544217687, |
|
"grad_norm": 0.11165381365887841, |
|
"learning_rate": 5.900211717112343e-06, |
|
"loss": 1.512, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.9414965986394558, |
|
"grad_norm": 0.10026925859750246, |
|
"learning_rate": 5.889149122865067e-06, |
|
"loss": 1.8164, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.9428571428571428, |
|
"grad_norm": 0.14168969817408006, |
|
"learning_rate": 5.8780820320478325e-06, |
|
"loss": 1.7176, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.9442176870748299, |
|
"grad_norm": 0.11535232171729691, |
|
"learning_rate": 5.867010500628698e-06, |
|
"loss": 1.6684, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.9455782312925171, |
|
"grad_norm": 0.10755021118232244, |
|
"learning_rate": 5.855934584598175e-06, |
|
"loss": 1.7584, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.9469387755102041, |
|
"grad_norm": 0.11779529743978591, |
|
"learning_rate": 5.844854339968952e-06, |
|
"loss": 1.6906, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.9482993197278912, |
|
"grad_norm": 0.11394418134579978, |
|
"learning_rate": 5.8337698227756035e-06, |
|
"loss": 1.6403, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.9496598639455782, |
|
"grad_norm": 0.1136124987259348, |
|
"learning_rate": 5.822681089074315e-06, |
|
"loss": 1.5563, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.9510204081632653, |
|
"grad_norm": 0.10296990279179531, |
|
"learning_rate": 5.811588194942593e-06, |
|
"loss": 1.7407, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.11976200431163776, |
|
"learning_rate": 5.800491196478989e-06, |
|
"loss": 1.4828, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9537414965986395, |
|
"grad_norm": 0.11272704179233839, |
|
"learning_rate": 5.789390149802802e-06, |
|
"loss": 1.602, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.9551020408163265, |
|
"grad_norm": 0.23740154863087562, |
|
"learning_rate": 5.778285111053812e-06, |
|
"loss": 1.6265, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.9564625850340136, |
|
"grad_norm": 0.11973657664057448, |
|
"learning_rate": 5.767176136391982e-06, |
|
"loss": 1.5886, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.9578231292517007, |
|
"grad_norm": 0.11604087204409168, |
|
"learning_rate": 5.756063281997183e-06, |
|
"loss": 1.6891, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.9591836734693877, |
|
"grad_norm": 0.10628214364332488, |
|
"learning_rate": 5.744946604068904e-06, |
|
"loss": 1.6309, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.9605442176870749, |
|
"grad_norm": 0.09908640691690514, |
|
"learning_rate": 5.733826158825973e-06, |
|
"loss": 1.6741, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.9619047619047619, |
|
"grad_norm": 0.10347727809388245, |
|
"learning_rate": 5.722702002506264e-06, |
|
"loss": 1.6104, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.963265306122449, |
|
"grad_norm": 0.10424856238627535, |
|
"learning_rate": 5.711574191366427e-06, |
|
"loss": 1.6592, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.964625850340136, |
|
"grad_norm": 0.09391458584535348, |
|
"learning_rate": 5.700442781681588e-06, |
|
"loss": 1.7451, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.9659863945578231, |
|
"grad_norm": 0.1114698322409562, |
|
"learning_rate": 5.689307829745074e-06, |
|
"loss": 1.5695, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.9673469387755103, |
|
"grad_norm": 0.1263756308873154, |
|
"learning_rate": 5.678169391868128e-06, |
|
"loss": 1.7918, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.9687074829931973, |
|
"grad_norm": 0.1068286657604504, |
|
"learning_rate": 5.6670275243796194e-06, |
|
"loss": 1.6695, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.9700680272108844, |
|
"grad_norm": 0.08766062542171628, |
|
"learning_rate": 5.65588228362576e-06, |
|
"loss": 1.8529, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.9714285714285714, |
|
"grad_norm": 0.10061266375776706, |
|
"learning_rate": 5.6447337259698245e-06, |
|
"loss": 1.8285, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.9727891156462585, |
|
"grad_norm": 0.12075281384282141, |
|
"learning_rate": 5.633581907791858e-06, |
|
"loss": 1.7784, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.9741496598639455, |
|
"grad_norm": 0.1332470465992645, |
|
"learning_rate": 5.6224268854884e-06, |
|
"loss": 1.675, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.9755102040816327, |
|
"grad_norm": 0.1283354676957953, |
|
"learning_rate": 5.611268715472187e-06, |
|
"loss": 1.4725, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.9768707482993197, |
|
"grad_norm": 0.16904870188773163, |
|
"learning_rate": 5.600107454171879e-06, |
|
"loss": 1.6237, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.9782312925170068, |
|
"grad_norm": 0.1259321663471572, |
|
"learning_rate": 5.5889431580317655e-06, |
|
"loss": 1.663, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.10967294130093679, |
|
"learning_rate": 5.577775883511489e-06, |
|
"loss": 1.6294, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9809523809523809, |
|
"grad_norm": 0.17546450235130967, |
|
"learning_rate": 5.566605687085749e-06, |
|
"loss": 1.5841, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.9823129251700681, |
|
"grad_norm": 0.12265102828756684, |
|
"learning_rate": 5.555432625244024e-06, |
|
"loss": 1.4919, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.9836734693877551, |
|
"grad_norm": 0.10886604673890071, |
|
"learning_rate": 5.5442567544902805e-06, |
|
"loss": 1.6385, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.9850340136054422, |
|
"grad_norm": 0.11421831431853076, |
|
"learning_rate": 5.533078131342695e-06, |
|
"loss": 1.6341, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.9863945578231292, |
|
"grad_norm": 0.09054819655341287, |
|
"learning_rate": 5.5218968123333594e-06, |
|
"loss": 1.624, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9877551020408163, |
|
"grad_norm": 0.10971827184516575, |
|
"learning_rate": 5.510712854008001e-06, |
|
"loss": 1.5447, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.9891156462585035, |
|
"grad_norm": 0.12112012725777507, |
|
"learning_rate": 5.499526312925693e-06, |
|
"loss": 1.7353, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.9904761904761905, |
|
"grad_norm": 0.10541432689931088, |
|
"learning_rate": 5.488337245658569e-06, |
|
"loss": 1.6583, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.9918367346938776, |
|
"grad_norm": 0.14736400599380917, |
|
"learning_rate": 5.477145708791543e-06, |
|
"loss": 1.6641, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.9931972789115646, |
|
"grad_norm": 0.10683105881710352, |
|
"learning_rate": 5.4659517589220135e-06, |
|
"loss": 1.4082, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.9945578231292517, |
|
"grad_norm": 0.1212803888010618, |
|
"learning_rate": 5.454755452659583e-06, |
|
"loss": 1.7298, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.9959183673469387, |
|
"grad_norm": 0.114898199928806, |
|
"learning_rate": 5.443556846625773e-06, |
|
"loss": 1.6922, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.9972789115646259, |
|
"grad_norm": 0.17977169651896993, |
|
"learning_rate": 5.432355997453729e-06, |
|
"loss": 1.6933, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.998639455782313, |
|
"grad_norm": 0.13478171843239625, |
|
"learning_rate": 5.42115296178795e-06, |
|
"loss": 1.758, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.10983192009638694, |
|
"learning_rate": 5.409947796283982e-06, |
|
"loss": 1.6745, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.0013605442176872, |
|
"grad_norm": 0.15728756550496747, |
|
"learning_rate": 5.398740557608151e-06, |
|
"loss": 1.5976, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.0013605442176872, |
|
"eval_loss": 1.6908553838729858, |
|
"eval_runtime": 76.8223, |
|
"eval_samples_per_second": 53.018, |
|
"eval_steps_per_second": 6.639, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.002721088435374, |
|
"grad_norm": 0.10538368349304568, |
|
"learning_rate": 5.38753130243726e-06, |
|
"loss": 1.6615, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.0040816326530613, |
|
"grad_norm": 0.11186773529092461, |
|
"learning_rate": 5.376320087458316e-06, |
|
"loss": 1.686, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.0054421768707482, |
|
"grad_norm": 0.09819212487389627, |
|
"learning_rate": 5.365106969368235e-06, |
|
"loss": 1.6144, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.0068027210884354, |
|
"grad_norm": 0.10858676538086111, |
|
"learning_rate": 5.353892004873554e-06, |
|
"loss": 1.7423, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0081632653061225, |
|
"grad_norm": 0.11968981856525805, |
|
"learning_rate": 5.34267525069015e-06, |
|
"loss": 1.6532, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.0095238095238095, |
|
"grad_norm": 0.09936611311145167, |
|
"learning_rate": 5.331456763542954e-06, |
|
"loss": 1.8078, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.0108843537414967, |
|
"grad_norm": 0.10022311903878614, |
|
"learning_rate": 5.3202366001656535e-06, |
|
"loss": 1.5739, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.0122448979591836, |
|
"grad_norm": 0.4103772585061572, |
|
"learning_rate": 5.309014817300422e-06, |
|
"loss": 1.6617, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.0136054421768708, |
|
"grad_norm": 0.10960071410597617, |
|
"learning_rate": 5.297791471697614e-06, |
|
"loss": 1.5742, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.014965986394558, |
|
"grad_norm": 0.1310811762519516, |
|
"learning_rate": 5.286566620115493e-06, |
|
"loss": 1.7022, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.0163265306122449, |
|
"grad_norm": 0.15568202046790616, |
|
"learning_rate": 5.2753403193199374e-06, |
|
"loss": 1.592, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.017687074829932, |
|
"grad_norm": 0.11383339823280172, |
|
"learning_rate": 5.264112626084153e-06, |
|
"loss": 1.6331, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.019047619047619, |
|
"grad_norm": 0.11927282820236593, |
|
"learning_rate": 5.2528835971883876e-06, |
|
"loss": 1.7091, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 0.11606987091212463, |
|
"learning_rate": 5.241653289419647e-06, |
|
"loss": 1.8403, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.021768707482993, |
|
"grad_norm": 0.16512841788961088, |
|
"learning_rate": 5.230421759571398e-06, |
|
"loss": 1.785, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.0231292517006803, |
|
"grad_norm": 0.12135979152340173, |
|
"learning_rate": 5.219189064443296e-06, |
|
"loss": 1.5237, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.0244897959183674, |
|
"grad_norm": 0.12001388963231435, |
|
"learning_rate": 5.207955260840879e-06, |
|
"loss": 1.6265, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.0258503401360544, |
|
"grad_norm": 0.14241973320076035, |
|
"learning_rate": 5.1967204055753e-06, |
|
"loss": 1.6843, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.0272108843537415, |
|
"grad_norm": 0.3503178069734055, |
|
"learning_rate": 5.185484555463026e-06, |
|
"loss": 1.8022, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.0285714285714285, |
|
"grad_norm": 0.10584406401171531, |
|
"learning_rate": 5.17424776732556e-06, |
|
"loss": 1.6713, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.0299319727891156, |
|
"grad_norm": 0.11525477562466327, |
|
"learning_rate": 5.163010097989138e-06, |
|
"loss": 1.73, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.0312925170068028, |
|
"grad_norm": 0.13500765537475423, |
|
"learning_rate": 5.151771604284465e-06, |
|
"loss": 1.405, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.0326530612244897, |
|
"grad_norm": 0.18732245961092447, |
|
"learning_rate": 5.140532343046406e-06, |
|
"loss": 1.5587, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.034013605442177, |
|
"grad_norm": 0.10827851940146976, |
|
"learning_rate": 5.129292371113712e-06, |
|
"loss": 1.7328, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.0353741496598639, |
|
"grad_norm": 0.11027741963001209, |
|
"learning_rate": 5.118051745328725e-06, |
|
"loss": 1.6382, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.036734693877551, |
|
"grad_norm": 0.09919441836217119, |
|
"learning_rate": 5.1068105225370975e-06, |
|
"loss": 1.6855, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.0380952380952382, |
|
"grad_norm": 0.13830797432010328, |
|
"learning_rate": 5.095568759587497e-06, |
|
"loss": 1.7411, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.0394557823129251, |
|
"grad_norm": 0.0972282280148536, |
|
"learning_rate": 5.084326513331328e-06, |
|
"loss": 1.621, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.0408163265306123, |
|
"grad_norm": 0.10578341535327669, |
|
"learning_rate": 5.0730838406224324e-06, |
|
"loss": 1.6273, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.0421768707482992, |
|
"grad_norm": 0.10463652066026084, |
|
"learning_rate": 5.061840798316815e-06, |
|
"loss": 1.725, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.0435374149659864, |
|
"grad_norm": 0.11942338757679548, |
|
"learning_rate": 5.0505974432723445e-06, |
|
"loss": 1.5898, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.0448979591836736, |
|
"grad_norm": 0.13689962498964267, |
|
"learning_rate": 5.039353832348477e-06, |
|
"loss": 1.5068, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.0462585034013605, |
|
"grad_norm": 0.13678139029155267, |
|
"learning_rate": 5.028110022405955e-06, |
|
"loss": 1.7158, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.0476190476190477, |
|
"grad_norm": 0.1000363922457913, |
|
"learning_rate": 5.0168660703065354e-06, |
|
"loss": 1.741, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0489795918367346, |
|
"grad_norm": 0.1188815368304704, |
|
"learning_rate": 5.005622032912687e-06, |
|
"loss": 1.6623, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.0503401360544218, |
|
"grad_norm": 0.12019243813821356, |
|
"learning_rate": 4.994377967087316e-06, |
|
"loss": 1.5774, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.051700680272109, |
|
"grad_norm": 0.12450888223025358, |
|
"learning_rate": 4.983133929693467e-06, |
|
"loss": 1.5663, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.0530612244897959, |
|
"grad_norm": 0.10816898093198718, |
|
"learning_rate": 4.971889977594048e-06, |
|
"loss": 1.6911, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.054421768707483, |
|
"grad_norm": 1.729544315460825, |
|
"learning_rate": 4.960646167651524e-06, |
|
"loss": 1.7524, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.05578231292517, |
|
"grad_norm": 0.1133964092151596, |
|
"learning_rate": 4.949402556727655e-06, |
|
"loss": 1.7612, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.0571428571428572, |
|
"grad_norm": 0.12041549603751739, |
|
"learning_rate": 4.9381592016831856e-06, |
|
"loss": 1.5116, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.0585034013605443, |
|
"grad_norm": 0.11576436512487191, |
|
"learning_rate": 4.9269161593775675e-06, |
|
"loss": 1.5329, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.0598639455782313, |
|
"grad_norm": 0.12679277289105279, |
|
"learning_rate": 4.915673486668673e-06, |
|
"loss": 1.5506, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.0612244897959184, |
|
"grad_norm": 0.11679477613827106, |
|
"learning_rate": 4.904431240412503e-06, |
|
"loss": 1.5008, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0625850340136054, |
|
"grad_norm": 0.10065169200286987, |
|
"learning_rate": 4.893189477462905e-06, |
|
"loss": 1.7685, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.0639455782312925, |
|
"grad_norm": 0.10287181892423168, |
|
"learning_rate": 4.881948254671277e-06, |
|
"loss": 1.6379, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.0653061224489795, |
|
"grad_norm": 0.10896624508987876, |
|
"learning_rate": 4.870707628886291e-06, |
|
"loss": 1.5234, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.10915491188092791, |
|
"learning_rate": 4.859467656953596e-06, |
|
"loss": 1.5865, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.0680272108843538, |
|
"grad_norm": 0.1275517896298314, |
|
"learning_rate": 4.8482283957155355e-06, |
|
"loss": 1.7069, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.0693877551020408, |
|
"grad_norm": 0.10862109015230759, |
|
"learning_rate": 4.836989902010863e-06, |
|
"loss": 1.7682, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.070748299319728, |
|
"grad_norm": 0.09805361474836685, |
|
"learning_rate": 4.825752232674441e-06, |
|
"loss": 1.6228, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.0721088435374149, |
|
"grad_norm": 0.12481176224206536, |
|
"learning_rate": 4.814515444536975e-06, |
|
"loss": 1.5027, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.073469387755102, |
|
"grad_norm": 0.11390040747547601, |
|
"learning_rate": 4.8032795944247e-06, |
|
"loss": 1.5168, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.0748299319727892, |
|
"grad_norm": 0.13458744184978433, |
|
"learning_rate": 4.792044739159124e-06, |
|
"loss": 1.5188, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.0761904761904761, |
|
"grad_norm": 0.12294480568667183, |
|
"learning_rate": 4.780810935556707e-06, |
|
"loss": 1.3946, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.0775510204081633, |
|
"grad_norm": 0.13732274998725952, |
|
"learning_rate": 4.7695782404286045e-06, |
|
"loss": 1.6201, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.0789115646258503, |
|
"grad_norm": 0.09919081899157949, |
|
"learning_rate": 4.758346710580355e-06, |
|
"loss": 1.7815, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.0802721088435374, |
|
"grad_norm": 0.10000541950613748, |
|
"learning_rate": 4.747116402811612e-06, |
|
"loss": 1.8098, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.0816326530612246, |
|
"grad_norm": 0.11420683474567415, |
|
"learning_rate": 4.735887373915848e-06, |
|
"loss": 1.4835, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.0829931972789115, |
|
"grad_norm": 0.15140465279516127, |
|
"learning_rate": 4.724659680680063e-06, |
|
"loss": 1.5029, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.0843537414965987, |
|
"grad_norm": 0.11208117898027617, |
|
"learning_rate": 4.713433379884508e-06, |
|
"loss": 1.7194, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.0857142857142856, |
|
"grad_norm": 0.16855436651508154, |
|
"learning_rate": 4.7022085283023875e-06, |
|
"loss": 1.7491, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.0870748299319728, |
|
"grad_norm": 0.11595366549169164, |
|
"learning_rate": 4.690985182699581e-06, |
|
"loss": 1.6313, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.08843537414966, |
|
"grad_norm": 0.09503431401550515, |
|
"learning_rate": 4.679763399834347e-06, |
|
"loss": 1.5984, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.089795918367347, |
|
"grad_norm": 0.10896290135710643, |
|
"learning_rate": 4.668543236457049e-06, |
|
"loss": 1.7379, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.091156462585034, |
|
"grad_norm": 0.11312340154871599, |
|
"learning_rate": 4.657324749309851e-06, |
|
"loss": 1.7817, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.092517006802721, |
|
"grad_norm": 0.11398862476288807, |
|
"learning_rate": 4.646107995126447e-06, |
|
"loss": 1.6113, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.0938775510204082, |
|
"grad_norm": 0.11528891846651022, |
|
"learning_rate": 4.634893030631767e-06, |
|
"loss": 1.6745, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.0952380952380953, |
|
"grad_norm": 0.11540329784321814, |
|
"learning_rate": 4.623679912541683e-06, |
|
"loss": 1.6443, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.0965986394557823, |
|
"grad_norm": 0.11457602895195093, |
|
"learning_rate": 4.612468697562741e-06, |
|
"loss": 1.5109, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.0979591836734695, |
|
"grad_norm": 0.108466968677861, |
|
"learning_rate": 4.6012594423918505e-06, |
|
"loss": 1.6285, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.0993197278911564, |
|
"grad_norm": 0.11392368141416775, |
|
"learning_rate": 4.5900522037160205e-06, |
|
"loss": 1.524, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.1006802721088436, |
|
"grad_norm": 0.10657614384924384, |
|
"learning_rate": 4.578847038212052e-06, |
|
"loss": 1.7741, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.1020408163265305, |
|
"grad_norm": 0.11158460621214396, |
|
"learning_rate": 4.567644002546273e-06, |
|
"loss": 1.6648, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.1034013605442177, |
|
"grad_norm": 0.11249423664710494, |
|
"learning_rate": 4.556443153374229e-06, |
|
"loss": 1.5484, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.1047619047619048, |
|
"grad_norm": 0.18597431248705018, |
|
"learning_rate": 4.5452445473404175e-06, |
|
"loss": 1.4591, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.1061224489795918, |
|
"grad_norm": 0.1146701110937394, |
|
"learning_rate": 4.534048241077987e-06, |
|
"loss": 1.7267, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.107482993197279, |
|
"grad_norm": 0.10820118015434177, |
|
"learning_rate": 4.522854291208458e-06, |
|
"loss": 1.7739, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.1088435374149659, |
|
"grad_norm": 0.09878497379061389, |
|
"learning_rate": 4.511662754341433e-06, |
|
"loss": 1.6488, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.110204081632653, |
|
"grad_norm": 0.12970442140488536, |
|
"learning_rate": 4.50047368707431e-06, |
|
"loss": 1.4903, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.1115646258503402, |
|
"grad_norm": 0.10382954289821261, |
|
"learning_rate": 4.489287145992002e-06, |
|
"loss": 1.8014, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.1129251700680272, |
|
"grad_norm": 0.12667728251125698, |
|
"learning_rate": 4.478103187666642e-06, |
|
"loss": 1.6478, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.1142857142857143, |
|
"grad_norm": 0.13500564587776376, |
|
"learning_rate": 4.4669218686573065e-06, |
|
"loss": 1.6239, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.1156462585034013, |
|
"grad_norm": 0.1244506846499162, |
|
"learning_rate": 4.45574324550972e-06, |
|
"loss": 1.718, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.1170068027210884, |
|
"grad_norm": 0.11873853458664813, |
|
"learning_rate": 4.444567374755978e-06, |
|
"loss": 1.7092, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.1183673469387756, |
|
"grad_norm": 0.12208418986043022, |
|
"learning_rate": 4.433394312914253e-06, |
|
"loss": 1.7201, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.1197278911564625, |
|
"grad_norm": 0.13523682008047516, |
|
"learning_rate": 4.4222241164885114e-06, |
|
"loss": 1.6248, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.1210884353741497, |
|
"grad_norm": 0.11246329856395026, |
|
"learning_rate": 4.411056841968236e-06, |
|
"loss": 1.5008, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.1224489795918366, |
|
"grad_norm": 0.12873507179822666, |
|
"learning_rate": 4.3998925458281225e-06, |
|
"loss": 1.6518, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1238095238095238, |
|
"grad_norm": 0.13059910987895082, |
|
"learning_rate": 4.388731284527816e-06, |
|
"loss": 1.6547, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.125170068027211, |
|
"grad_norm": 0.13485735751834116, |
|
"learning_rate": 4.377573114511602e-06, |
|
"loss": 1.5989, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.126530612244898, |
|
"grad_norm": 0.12101663226880303, |
|
"learning_rate": 4.366418092208144e-06, |
|
"loss": 1.6142, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.127891156462585, |
|
"grad_norm": 0.11607507412451191, |
|
"learning_rate": 4.355266274030177e-06, |
|
"loss": 1.6316, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.129251700680272, |
|
"grad_norm": 0.10911266136424244, |
|
"learning_rate": 4.344117716374241e-06, |
|
"loss": 1.5342, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.1306122448979592, |
|
"grad_norm": 0.12731437170330562, |
|
"learning_rate": 4.332972475620381e-06, |
|
"loss": 1.6973, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.1319727891156464, |
|
"grad_norm": 0.09762084699859862, |
|
"learning_rate": 4.321830608131872e-06, |
|
"loss": 1.6633, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.1333333333333333, |
|
"grad_norm": 0.12978891884133562, |
|
"learning_rate": 4.310692170254927e-06, |
|
"loss": 1.6098, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.1346938775510205, |
|
"grad_norm": 0.12415908074295716, |
|
"learning_rate": 4.299557218318413e-06, |
|
"loss": 1.6307, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.1360544217687074, |
|
"grad_norm": 0.2173948925361782, |
|
"learning_rate": 4.2884258086335755e-06, |
|
"loss": 1.5907, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.1374149659863946, |
|
"grad_norm": 0.11912082507711948, |
|
"learning_rate": 4.277297997493737e-06, |
|
"loss": 1.5837, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.1387755102040815, |
|
"grad_norm": 0.19450908692643373, |
|
"learning_rate": 4.266173841174031e-06, |
|
"loss": 1.7324, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.1401360544217687, |
|
"grad_norm": 0.11032655579441553, |
|
"learning_rate": 4.255053395931097e-06, |
|
"loss": 1.7134, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.1414965986394559, |
|
"grad_norm": 0.10178083734022528, |
|
"learning_rate": 4.243936718002818e-06, |
|
"loss": 1.6472, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.10102775840788815, |
|
"learning_rate": 4.23282386360802e-06, |
|
"loss": 1.6893, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.14421768707483, |
|
"grad_norm": 0.13340734842779356, |
|
"learning_rate": 4.22171488894619e-06, |
|
"loss": 1.582, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.1455782312925171, |
|
"grad_norm": 0.39827971211004304, |
|
"learning_rate": 4.2106098501972e-06, |
|
"loss": 1.5918, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.146938775510204, |
|
"grad_norm": 0.12809647826224843, |
|
"learning_rate": 4.1995088035210126e-06, |
|
"loss": 1.6786, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.1482993197278912, |
|
"grad_norm": 0.11881853753159395, |
|
"learning_rate": 4.1884118050574084e-06, |
|
"loss": 1.6218, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.1496598639455782, |
|
"grad_norm": 0.13188862048286334, |
|
"learning_rate": 4.177318910925686e-06, |
|
"loss": 1.6943, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.1510204081632653, |
|
"grad_norm": 0.11973362482121355, |
|
"learning_rate": 4.1662301772244e-06, |
|
"loss": 1.3518, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.1523809523809523, |
|
"grad_norm": 0.1265246682564999, |
|
"learning_rate": 4.15514566003105e-06, |
|
"loss": 1.717, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.1537414965986394, |
|
"grad_norm": 0.12163131262108975, |
|
"learning_rate": 4.144065415401825e-06, |
|
"loss": 1.6591, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.1551020408163266, |
|
"grad_norm": 0.11281334037077397, |
|
"learning_rate": 4.132989499371303e-06, |
|
"loss": 1.7488, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.1564625850340136, |
|
"grad_norm": 0.116733746797477, |
|
"learning_rate": 4.1219179679521675e-06, |
|
"loss": 1.4784, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1578231292517007, |
|
"grad_norm": 0.10824656908525278, |
|
"learning_rate": 4.110850877134935e-06, |
|
"loss": 1.6377, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.1591836734693877, |
|
"grad_norm": 0.13121005190396953, |
|
"learning_rate": 4.099788282887658e-06, |
|
"loss": 1.7325, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.1605442176870748, |
|
"grad_norm": 0.10756631934140032, |
|
"learning_rate": 4.088730241155657e-06, |
|
"loss": 1.7462, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.161904761904762, |
|
"grad_norm": 0.17347249647835658, |
|
"learning_rate": 4.077676807861221e-06, |
|
"loss": 1.4988, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.163265306122449, |
|
"grad_norm": 0.10923737210751697, |
|
"learning_rate": 4.066628038903341e-06, |
|
"loss": 1.753, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.164625850340136, |
|
"grad_norm": 0.12401952453107581, |
|
"learning_rate": 4.055583990157416e-06, |
|
"loss": 1.6061, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.165986394557823, |
|
"grad_norm": 0.16613834484616688, |
|
"learning_rate": 4.044544717474974e-06, |
|
"loss": 1.6004, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.1673469387755102, |
|
"grad_norm": 0.10830889852190581, |
|
"learning_rate": 4.033510276683392e-06, |
|
"loss": 1.8508, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.1687074829931974, |
|
"grad_norm": 0.11570096470891132, |
|
"learning_rate": 4.022480723585608e-06, |
|
"loss": 1.7405, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.1700680272108843, |
|
"grad_norm": 0.13085816560126146, |
|
"learning_rate": 4.011456113959845e-06, |
|
"loss": 1.6882, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.1714285714285715, |
|
"grad_norm": 0.09457110643687838, |
|
"learning_rate": 4.000436503559324e-06, |
|
"loss": 1.7382, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.1727891156462584, |
|
"grad_norm": 0.11447572869844899, |
|
"learning_rate": 3.989421948111987e-06, |
|
"loss": 1.7447, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.1741496598639456, |
|
"grad_norm": 0.10219706136235482, |
|
"learning_rate": 3.978412503320207e-06, |
|
"loss": 1.6079, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.1755102040816325, |
|
"grad_norm": 0.09697745892488446, |
|
"learning_rate": 3.967408224860518e-06, |
|
"loss": 1.7658, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.1768707482993197, |
|
"grad_norm": 0.11935090111442251, |
|
"learning_rate": 3.956409168383325e-06, |
|
"loss": 1.642, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.1782312925170069, |
|
"grad_norm": 0.09593894653017256, |
|
"learning_rate": 3.94541538951262e-06, |
|
"loss": 1.6837, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.1795918367346938, |
|
"grad_norm": 0.17924060912455603, |
|
"learning_rate": 3.934426943845712e-06, |
|
"loss": 1.7668, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.180952380952381, |
|
"grad_norm": 0.11737602024296209, |
|
"learning_rate": 3.923443886952934e-06, |
|
"loss": 1.5348, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.1823129251700681, |
|
"grad_norm": 0.09917281103783915, |
|
"learning_rate": 3.912466274377371e-06, |
|
"loss": 1.6659, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.183673469387755, |
|
"grad_norm": 0.10815570929741564, |
|
"learning_rate": 3.901494161634571e-06, |
|
"loss": 1.5029, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.1850340136054422, |
|
"grad_norm": 0.09777863761181164, |
|
"learning_rate": 3.890527604212273e-06, |
|
"loss": 1.7809, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.1863945578231292, |
|
"grad_norm": 0.12446664048031521, |
|
"learning_rate": 3.879566657570118e-06, |
|
"loss": 1.6918, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.1877551020408164, |
|
"grad_norm": 0.11809808630659835, |
|
"learning_rate": 3.868611377139375e-06, |
|
"loss": 1.5293, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.1891156462585033, |
|
"grad_norm": 0.11626193699249888, |
|
"learning_rate": 3.857661818322657e-06, |
|
"loss": 1.5724, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 0.10675553850073687, |
|
"learning_rate": 3.846718036493642e-06, |
|
"loss": 1.7026, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.1918367346938776, |
|
"grad_norm": 0.25785039953724753, |
|
"learning_rate": 3.835780086996794e-06, |
|
"loss": 1.816, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.1931972789115646, |
|
"grad_norm": 0.09919492633891115, |
|
"learning_rate": 3.824848025147078e-06, |
|
"loss": 1.8633, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.1945578231292517, |
|
"grad_norm": 0.13021628412390385, |
|
"learning_rate": 3.81392190622969e-06, |
|
"loss": 1.6292, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.1959183673469387, |
|
"grad_norm": 0.1063338899612945, |
|
"learning_rate": 3.8030017854997654e-06, |
|
"loss": 1.7619, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.1972789115646258, |
|
"grad_norm": 0.1400157862890337, |
|
"learning_rate": 3.7920877181821136e-06, |
|
"loss": 1.5179, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.198639455782313, |
|
"grad_norm": 0.1386445404000312, |
|
"learning_rate": 3.781179759470921e-06, |
|
"loss": 1.6624, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.1069092489728771, |
|
"learning_rate": 3.7702779645294907e-06, |
|
"loss": 1.8113, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.2013605442176871, |
|
"grad_norm": 0.1190549983688599, |
|
"learning_rate": 3.759382388489952e-06, |
|
"loss": 1.6425, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.202721088435374, |
|
"grad_norm": 0.11079642382956639, |
|
"learning_rate": 3.74849308645298e-06, |
|
"loss": 1.6609, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.2040816326530612, |
|
"grad_norm": 0.14105617751955826, |
|
"learning_rate": 3.7376101134875278e-06, |
|
"loss": 1.55, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.2054421768707484, |
|
"grad_norm": 0.14448379899642935, |
|
"learning_rate": 3.7267335246305346e-06, |
|
"loss": 1.778, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.2068027210884353, |
|
"grad_norm": 0.10874354603342407, |
|
"learning_rate": 3.715863374886661e-06, |
|
"loss": 1.6611, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.2081632653061225, |
|
"grad_norm": 0.10802721342705796, |
|
"learning_rate": 3.7049997192279976e-06, |
|
"loss": 1.5966, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.2095238095238094, |
|
"grad_norm": 0.12233010145027158, |
|
"learning_rate": 3.6941426125937992e-06, |
|
"loss": 1.5311, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.2108843537414966, |
|
"grad_norm": 0.10009553518935768, |
|
"learning_rate": 3.6832921098901952e-06, |
|
"loss": 1.5145, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2122448979591836, |
|
"grad_norm": 0.11901267626986793, |
|
"learning_rate": 3.6724482659899226e-06, |
|
"loss": 1.7466, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.2136054421768707, |
|
"grad_norm": 0.12962797644138863, |
|
"learning_rate": 3.661611135732043e-06, |
|
"loss": 1.5964, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.2149659863945579, |
|
"grad_norm": 0.10733667423090783, |
|
"learning_rate": 3.6507807739216628e-06, |
|
"loss": 1.7763, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.2163265306122448, |
|
"grad_norm": 0.11144056418847463, |
|
"learning_rate": 3.6399572353296642e-06, |
|
"loss": 1.7047, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.217687074829932, |
|
"grad_norm": 0.08875006973132216, |
|
"learning_rate": 3.6291405746924186e-06, |
|
"loss": 1.7604, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.2190476190476192, |
|
"grad_norm": 0.13024552194401284, |
|
"learning_rate": 3.6183308467115174e-06, |
|
"loss": 1.6061, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.220408163265306, |
|
"grad_norm": 0.12857972357080566, |
|
"learning_rate": 3.6075281060534917e-06, |
|
"loss": 1.6149, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.2217687074829933, |
|
"grad_norm": 0.11603998479755155, |
|
"learning_rate": 3.5967324073495363e-06, |
|
"loss": 1.6111, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.2231292517006802, |
|
"grad_norm": 0.11178507529251955, |
|
"learning_rate": 3.585943805195232e-06, |
|
"loss": 1.6971, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 0.14802404367937794, |
|
"learning_rate": 3.575162354150276e-06, |
|
"loss": 1.7452, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2258503401360543, |
|
"grad_norm": 0.09565639349194041, |
|
"learning_rate": 3.5643881087381983e-06, |
|
"loss": 1.7094, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.2272108843537415, |
|
"grad_norm": 0.12259182420540694, |
|
"learning_rate": 3.553621123446087e-06, |
|
"loss": 1.4945, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.2285714285714286, |
|
"grad_norm": 0.09891155424272953, |
|
"learning_rate": 3.542861452724318e-06, |
|
"loss": 1.7481, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.2299319727891156, |
|
"grad_norm": 0.12577917367601982, |
|
"learning_rate": 3.5321091509862733e-06, |
|
"loss": 1.658, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.2312925170068028, |
|
"grad_norm": 0.11110224185718393, |
|
"learning_rate": 3.521364272608071e-06, |
|
"loss": 1.7805, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.2326530612244897, |
|
"grad_norm": 1.029133559209144, |
|
"learning_rate": 3.5106268719282863e-06, |
|
"loss": 1.6974, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.2340136054421769, |
|
"grad_norm": 0.11635973055276655, |
|
"learning_rate": 3.499897003247682e-06, |
|
"loss": 1.6067, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.235374149659864, |
|
"grad_norm": 0.1374434820581917, |
|
"learning_rate": 3.489174720828924e-06, |
|
"loss": 1.4329, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.236734693877551, |
|
"grad_norm": 0.12005946386037955, |
|
"learning_rate": 3.4784600788963197e-06, |
|
"loss": 1.6061, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.2380952380952381, |
|
"grad_norm": 0.2566914570489289, |
|
"learning_rate": 3.4677531316355343e-06, |
|
"loss": 1.6285, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.239455782312925, |
|
"grad_norm": 0.12805153195567712, |
|
"learning_rate": 3.4570539331933196e-06, |
|
"loss": 1.6518, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.2408163265306122, |
|
"grad_norm": 0.12193109943671782, |
|
"learning_rate": 3.4463625376772415e-06, |
|
"loss": 1.7769, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.2421768707482994, |
|
"grad_norm": 0.11785291933334519, |
|
"learning_rate": 3.4356789991554036e-06, |
|
"loss": 1.7037, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.2435374149659864, |
|
"grad_norm": 0.13098314516857928, |
|
"learning_rate": 3.425003371656178e-06, |
|
"loss": 1.6332, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.2448979591836735, |
|
"grad_norm": 0.11058534722726451, |
|
"learning_rate": 3.4143357091679276e-06, |
|
"loss": 1.8928, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.2462585034013605, |
|
"grad_norm": 0.1304628400810422, |
|
"learning_rate": 3.403676065638735e-06, |
|
"loss": 1.5842, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.2476190476190476, |
|
"grad_norm": 0.12141593155806699, |
|
"learning_rate": 3.393024494976128e-06, |
|
"loss": 1.6872, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.2489795918367346, |
|
"grad_norm": 0.1180901086883869, |
|
"learning_rate": 3.3823810510468146e-06, |
|
"loss": 1.4999, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.2503401360544217, |
|
"grad_norm": 0.1426231115449843, |
|
"learning_rate": 3.3717457876763994e-06, |
|
"loss": 1.7262, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.251700680272109, |
|
"grad_norm": 0.11420848866165159, |
|
"learning_rate": 3.361118758649116e-06, |
|
"loss": 1.617, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.251700680272109, |
|
"eval_loss": 1.6881320476531982, |
|
"eval_runtime": 76.6095, |
|
"eval_samples_per_second": 53.166, |
|
"eval_steps_per_second": 6.657, |
|
"step": 920 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1470, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 184, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.289148227243213e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|