|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 1700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14705882352941177, |
|
"grad_norm": 0.14238034188747406, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.8446, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 0.14489340782165527, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.8425, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4411764705882353, |
|
"grad_norm": 0.1551039218902588, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.8918, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 0.15667259693145752, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.8473, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 0.14174659550189972, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.8519, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 0.15220147371292114, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.8201, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0294117647058822, |
|
"grad_norm": 0.14088748395442963, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.8345, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 0.1412580907344818, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.8549, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.3235294117647058, |
|
"grad_norm": 0.12632407248020172, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.8431, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 0.11696045845746994, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.8157, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.6176470588235294, |
|
"grad_norm": 0.1272636502981186, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.8236, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.10813312977552414, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.8019, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.9117647058823528, |
|
"grad_norm": 0.11896699666976929, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.8185, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.0588235294117645, |
|
"grad_norm": 0.10265181213617325, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 0.8044, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 0.10106126219034195, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.7858, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 0.10131911188364029, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.8084, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.09996718913316727, |
|
"learning_rate": 5e-05, |
|
"loss": 0.814, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.6470588235294117, |
|
"grad_norm": 0.0959576964378357, |
|
"learning_rate": 4.999893574965545e-05, |
|
"loss": 0.8105, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.7941176470588234, |
|
"grad_norm": 0.11985825002193451, |
|
"learning_rate": 4.9995743099299886e-05, |
|
"loss": 0.7813, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 0.09690634161233902, |
|
"learning_rate": 4.9990422350958156e-05, |
|
"loss": 0.7917, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.088235294117647, |
|
"grad_norm": 0.11959923803806305, |
|
"learning_rate": 4.99829740079732e-05, |
|
"loss": 0.7531, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.235294117647059, |
|
"grad_norm": 0.09686623513698578, |
|
"learning_rate": 4.99733987749585e-05, |
|
"loss": 0.783, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.3823529411764706, |
|
"grad_norm": 0.0896935760974884, |
|
"learning_rate": 4.996169755773138e-05, |
|
"loss": 0.7506, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.5294117647058822, |
|
"grad_norm": 0.09192899614572525, |
|
"learning_rate": 4.9947871463227374e-05, |
|
"loss": 0.7597, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.6764705882352944, |
|
"grad_norm": 0.09252317994832993, |
|
"learning_rate": 4.993192179939542e-05, |
|
"loss": 0.767, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 3.8235294117647056, |
|
"grad_norm": 0.09957632422447205, |
|
"learning_rate": 4.991385007507422e-05, |
|
"loss": 0.8006, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.9705882352941178, |
|
"grad_norm": 0.12217137217521667, |
|
"learning_rate": 4.989365799984943e-05, |
|
"loss": 0.7513, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 4.117647058823529, |
|
"grad_norm": 0.09010059386491776, |
|
"learning_rate": 4.9871347483892006e-05, |
|
"loss": 0.7531, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.264705882352941, |
|
"grad_norm": 0.09149183332920074, |
|
"learning_rate": 4.984692063777743e-05, |
|
"loss": 0.7518, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.411764705882353, |
|
"grad_norm": 0.09295113384723663, |
|
"learning_rate": 4.9820379772286095e-05, |
|
"loss": 0.7665, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.5588235294117645, |
|
"grad_norm": 0.09854214638471603, |
|
"learning_rate": 4.979172739818469e-05, |
|
"loss": 0.7739, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 0.09157629311084747, |
|
"learning_rate": 4.9760966225988675e-05, |
|
"loss": 0.7522, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.852941176470588, |
|
"grad_norm": 0.10833761096000671, |
|
"learning_rate": 4.9728099165705895e-05, |
|
"loss": 0.7605, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.09924355149269104, |
|
"learning_rate": 4.9693129326561254e-05, |
|
"loss": 0.7153, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.147058823529412, |
|
"grad_norm": 0.09313185513019562, |
|
"learning_rate": 4.9656060016702606e-05, |
|
"loss": 0.7494, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 5.294117647058823, |
|
"grad_norm": 0.11171400547027588, |
|
"learning_rate": 4.961689474288779e-05, |
|
"loss": 0.733, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.4411764705882355, |
|
"grad_norm": 0.09828388690948486, |
|
"learning_rate": 4.957563721015293e-05, |
|
"loss": 0.7663, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 5.588235294117647, |
|
"grad_norm": 0.09972433745861053, |
|
"learning_rate": 4.953229132146186e-05, |
|
"loss": 0.7576, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.735294117647059, |
|
"grad_norm": 0.11432339251041412, |
|
"learning_rate": 4.948686117733699e-05, |
|
"loss": 0.7379, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 0.10343588888645172, |
|
"learning_rate": 4.9439351075471346e-05, |
|
"loss": 0.7066, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.029411764705882, |
|
"grad_norm": 0.0964265912771225, |
|
"learning_rate": 4.9389765510322026e-05, |
|
"loss": 0.7322, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 6.176470588235294, |
|
"grad_norm": 0.11457476019859314, |
|
"learning_rate": 4.9338109172685006e-05, |
|
"loss": 0.742, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.323529411764706, |
|
"grad_norm": 0.10812544822692871, |
|
"learning_rate": 4.92843869492514e-05, |
|
"loss": 0.7572, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 6.470588235294118, |
|
"grad_norm": 0.1057206243276596, |
|
"learning_rate": 4.9228603922145206e-05, |
|
"loss": 0.7342, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.617647058823529, |
|
"grad_norm": 0.11412467062473297, |
|
"learning_rate": 4.917076536844248e-05, |
|
"loss": 0.7331, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 6.764705882352941, |
|
"grad_norm": 0.11059483885765076, |
|
"learning_rate": 4.9110876759672184e-05, |
|
"loss": 0.718, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.911764705882353, |
|
"grad_norm": 0.10819140076637268, |
|
"learning_rate": 4.9048943761298544e-05, |
|
"loss": 0.7153, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 0.11002287268638611, |
|
"learning_rate": 4.89849722321851e-05, |
|
"loss": 0.7201, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.205882352941177, |
|
"grad_norm": 0.11289830505847931, |
|
"learning_rate": 4.891896822404046e-05, |
|
"loss": 0.7261, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 7.352941176470588, |
|
"grad_norm": 0.12590822577476501, |
|
"learning_rate": 4.885093798084583e-05, |
|
"loss": 0.7329, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 0.10964758694171906, |
|
"learning_rate": 4.878088793826428e-05, |
|
"loss": 0.7413, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 7.647058823529412, |
|
"grad_norm": 0.10680090636014938, |
|
"learning_rate": 4.8708824723031995e-05, |
|
"loss": 0.7174, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.794117647058823, |
|
"grad_norm": 0.10832036286592484, |
|
"learning_rate": 4.8634755152331355e-05, |
|
"loss": 0.7345, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 7.9411764705882355, |
|
"grad_norm": 0.10789214819669724, |
|
"learning_rate": 4.8558686233145996e-05, |
|
"loss": 0.7213, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.088235294117647, |
|
"grad_norm": 0.11145245283842087, |
|
"learning_rate": 4.8480625161598e-05, |
|
"loss": 0.7184, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 8.235294117647058, |
|
"grad_norm": 0.12131233513355255, |
|
"learning_rate": 4.840057932226715e-05, |
|
"loss": 0.737, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.382352941176471, |
|
"grad_norm": 0.11897268146276474, |
|
"learning_rate": 4.831855628749228e-05, |
|
"loss": 0.7254, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 8.529411764705882, |
|
"grad_norm": 0.1130763441324234, |
|
"learning_rate": 4.823456381665501e-05, |
|
"loss": 0.7213, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.676470588235293, |
|
"grad_norm": 0.11835259944200516, |
|
"learning_rate": 4.8148609855445624e-05, |
|
"loss": 0.7102, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 0.12309901416301727, |
|
"learning_rate": 4.806070253511151e-05, |
|
"loss": 0.7227, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.970588235294118, |
|
"grad_norm": 0.11361519992351532, |
|
"learning_rate": 4.797085017168787e-05, |
|
"loss": 0.7125, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 9.117647058823529, |
|
"grad_norm": 0.13154913485050201, |
|
"learning_rate": 4.7879061265211e-05, |
|
"loss": 0.7293, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 9.264705882352942, |
|
"grad_norm": 0.12245271354913712, |
|
"learning_rate": 4.778534449891428e-05, |
|
"loss": 0.7216, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 0.10899204015731812, |
|
"learning_rate": 4.768970873840669e-05, |
|
"loss": 0.706, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.558823529411764, |
|
"grad_norm": 0.1145118996500969, |
|
"learning_rate": 4.75921630308341e-05, |
|
"loss": 0.7039, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 9.705882352941176, |
|
"grad_norm": 0.12111522257328033, |
|
"learning_rate": 4.749271660402341e-05, |
|
"loss": 0.7359, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 9.852941176470589, |
|
"grad_norm": 0.11270228773355484, |
|
"learning_rate": 4.739137886560966e-05, |
|
"loss": 0.7006, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.10989291220903397, |
|
"learning_rate": 4.7288159402146e-05, |
|
"loss": 0.7123, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.147058823529411, |
|
"grad_norm": 0.11979430168867111, |
|
"learning_rate": 4.7183067978196855e-05, |
|
"loss": 0.7213, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 10.294117647058824, |
|
"grad_norm": 0.11735141277313232, |
|
"learning_rate": 4.707611453541412e-05, |
|
"loss": 0.7061, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 10.441176470588236, |
|
"grad_norm": 0.12181384861469269, |
|
"learning_rate": 4.696730919159677e-05, |
|
"loss": 0.6962, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 10.588235294117647, |
|
"grad_norm": 0.11275137960910797, |
|
"learning_rate": 4.6856662239733666e-05, |
|
"loss": 0.7467, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 10.735294117647058, |
|
"grad_norm": 0.13028523325920105, |
|
"learning_rate": 4.674418414702985e-05, |
|
"loss": 0.7047, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 10.882352941176471, |
|
"grad_norm": 0.12034178525209427, |
|
"learning_rate": 4.662988555391632e-05, |
|
"loss": 0.7061, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 11.029411764705882, |
|
"grad_norm": 0.11595606803894043, |
|
"learning_rate": 4.6513777273043495e-05, |
|
"loss": 0.7023, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 11.176470588235293, |
|
"grad_norm": 0.11920719593763351, |
|
"learning_rate": 4.63958702882583e-05, |
|
"loss": 0.6886, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 11.323529411764707, |
|
"grad_norm": 0.12535597383975983, |
|
"learning_rate": 4.6276175753565105e-05, |
|
"loss": 0.7209, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 11.470588235294118, |
|
"grad_norm": 0.12857039272785187, |
|
"learning_rate": 4.615470499207056e-05, |
|
"loss": 0.7018, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 11.617647058823529, |
|
"grad_norm": 0.13531994819641113, |
|
"learning_rate": 4.6031469494912416e-05, |
|
"loss": 0.7145, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 11.764705882352942, |
|
"grad_norm": 0.10658453404903412, |
|
"learning_rate": 4.59064809201725e-05, |
|
"loss": 0.723, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.911764705882353, |
|
"grad_norm": 0.12011521309614182, |
|
"learning_rate": 4.5779751091773774e-05, |
|
"loss": 0.7011, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 12.058823529411764, |
|
"grad_norm": 0.11478458344936371, |
|
"learning_rate": 4.5651291998361926e-05, |
|
"loss": 0.7117, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 12.205882352941176, |
|
"grad_norm": 0.1333089917898178, |
|
"learning_rate": 4.55211157921711e-05, |
|
"loss": 0.7148, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 12.352941176470589, |
|
"grad_norm": 0.11957768350839615, |
|
"learning_rate": 4.538923478787439e-05, |
|
"loss": 0.7049, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 0.11590797454118729, |
|
"learning_rate": 4.5255661461418854e-05, |
|
"loss": 0.6797, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 12.647058823529411, |
|
"grad_norm": 0.12927260994911194, |
|
"learning_rate": 4.5120408448845264e-05, |
|
"loss": 0.7126, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 12.794117647058824, |
|
"grad_norm": 0.13119827210903168, |
|
"learning_rate": 4.4983488545092753e-05, |
|
"loss": 0.7082, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 12.941176470588236, |
|
"grad_norm": 0.1294083297252655, |
|
"learning_rate": 4.4844914702788386e-05, |
|
"loss": 0.699, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 13.088235294117647, |
|
"grad_norm": 0.11839265376329422, |
|
"learning_rate": 4.470470003102192e-05, |
|
"loss": 0.71, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 13.235294117647058, |
|
"grad_norm": 0.12290767580270767, |
|
"learning_rate": 4.456285779410558e-05, |
|
"loss": 0.7058, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 13.382352941176471, |
|
"grad_norm": 0.12060663849115372, |
|
"learning_rate": 4.4419401410319334e-05, |
|
"loss": 0.6744, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 13.529411764705882, |
|
"grad_norm": 0.12197393923997879, |
|
"learning_rate": 4.427434445064148e-05, |
|
"loss": 0.6919, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 13.676470588235293, |
|
"grad_norm": 0.12659871578216553, |
|
"learning_rate": 4.4127700637464834e-05, |
|
"loss": 0.7102, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 13.823529411764707, |
|
"grad_norm": 0.12616273760795593, |
|
"learning_rate": 4.3979483843298624e-05, |
|
"loss": 0.6924, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 13.970588235294118, |
|
"grad_norm": 0.1336318999528885, |
|
"learning_rate": 4.382970808945612e-05, |
|
"loss": 0.7248, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 14.117647058823529, |
|
"grad_norm": 0.12975798547267914, |
|
"learning_rate": 4.367838754472821e-05, |
|
"loss": 0.7266, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 14.264705882352942, |
|
"grad_norm": 0.1267329454421997, |
|
"learning_rate": 4.3525536524043076e-05, |
|
"loss": 0.7028, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 14.411764705882353, |
|
"grad_norm": 0.12307338416576385, |
|
"learning_rate": 4.337116948711195e-05, |
|
"loss": 0.7052, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 14.558823529411764, |
|
"grad_norm": 0.14381247758865356, |
|
"learning_rate": 4.3215301037061244e-05, |
|
"loss": 0.6947, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 14.705882352941176, |
|
"grad_norm": 0.11929916590452194, |
|
"learning_rate": 4.305794591905113e-05, |
|
"loss": 0.691, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 14.852941176470589, |
|
"grad_norm": 0.12451142817735672, |
|
"learning_rate": 4.289911901888056e-05, |
|
"loss": 0.6859, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.12542015314102173, |
|
"learning_rate": 4.2738835361579175e-05, |
|
"loss": 0.7139, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 15.147058823529411, |
|
"grad_norm": 0.11630310118198395, |
|
"learning_rate": 4.257711010998586e-05, |
|
"loss": 0.705, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 15.294117647058824, |
|
"grad_norm": 0.13719907402992249, |
|
"learning_rate": 4.241395856331437e-05, |
|
"loss": 0.7001, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 15.441176470588236, |
|
"grad_norm": 0.13168473541736603, |
|
"learning_rate": 4.224939615570602e-05, |
|
"loss": 0.7047, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 15.588235294117647, |
|
"grad_norm": 0.11908990889787674, |
|
"learning_rate": 4.2083438454769606e-05, |
|
"loss": 0.7086, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 15.735294117647058, |
|
"grad_norm": 0.12613283097743988, |
|
"learning_rate": 4.1916101160108715e-05, |
|
"loss": 0.6911, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 15.882352941176471, |
|
"grad_norm": 0.12481500208377838, |
|
"learning_rate": 4.174740010183656e-05, |
|
"loss": 0.6845, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 16.029411764705884, |
|
"grad_norm": 0.1311793029308319, |
|
"learning_rate": 4.15773512390784e-05, |
|
"loss": 0.6976, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 16.176470588235293, |
|
"grad_norm": 0.11820737272500992, |
|
"learning_rate": 4.140597065846188e-05, |
|
"loss": 0.7101, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 16.323529411764707, |
|
"grad_norm": 0.12259554117918015, |
|
"learning_rate": 4.123327457259517e-05, |
|
"loss": 0.6973, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 16.470588235294116, |
|
"grad_norm": 0.12584823369979858, |
|
"learning_rate": 4.105927931853327e-05, |
|
"loss": 0.6903, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 16.61764705882353, |
|
"grad_norm": 0.13863661885261536, |
|
"learning_rate": 4.088400135623256e-05, |
|
"loss": 0.6726, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 16.764705882352942, |
|
"grad_norm": 0.13399486243724823, |
|
"learning_rate": 4.070745726699363e-05, |
|
"loss": 0.6977, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 16.91176470588235, |
|
"grad_norm": 0.1234726756811142, |
|
"learning_rate": 4.0529663751892734e-05, |
|
"loss": 0.6907, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 17.058823529411764, |
|
"grad_norm": 0.11935710906982422, |
|
"learning_rate": 4.035063763020185e-05, |
|
"loss": 0.7128, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 17.205882352941178, |
|
"grad_norm": 0.12929606437683105, |
|
"learning_rate": 4.017039583779756e-05, |
|
"loss": 0.7106, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 17.352941176470587, |
|
"grad_norm": 0.1248982772231102, |
|
"learning_rate": 3.9988955425558965e-05, |
|
"loss": 0.6897, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 0.1281740367412567, |
|
"learning_rate": 3.980633355775461e-05, |
|
"loss": 0.6871, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 17.647058823529413, |
|
"grad_norm": 0.12564094364643097, |
|
"learning_rate": 3.962254751041877e-05, |
|
"loss": 0.7008, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 17.794117647058822, |
|
"grad_norm": 0.1336313784122467, |
|
"learning_rate": 3.943761466971717e-05, |
|
"loss": 0.6851, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 17.941176470588236, |
|
"grad_norm": 0.1354963481426239, |
|
"learning_rate": 3.9251552530302206e-05, |
|
"loss": 0.6951, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 18.08823529411765, |
|
"grad_norm": 0.15230301022529602, |
|
"learning_rate": 3.906437869365795e-05, |
|
"loss": 0.693, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 18.235294117647058, |
|
"grad_norm": 0.1296202540397644, |
|
"learning_rate": 3.887611086643508e-05, |
|
"loss": 0.6874, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 18.38235294117647, |
|
"grad_norm": 0.13370412588119507, |
|
"learning_rate": 3.8686766858775843e-05, |
|
"loss": 0.7085, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 18.529411764705884, |
|
"grad_norm": 0.13132105767726898, |
|
"learning_rate": 3.849636458262913e-05, |
|
"loss": 0.7037, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 18.676470588235293, |
|
"grad_norm": 0.13370752334594727, |
|
"learning_rate": 3.830492205005612e-05, |
|
"loss": 0.679, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"grad_norm": 0.12619805335998535, |
|
"learning_rate": 3.811245737152624e-05, |
|
"loss": 0.6846, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 18.970588235294116, |
|
"grad_norm": 0.13043031096458435, |
|
"learning_rate": 3.7918988754203985e-05, |
|
"loss": 0.6729, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 19.11764705882353, |
|
"grad_norm": 0.132464200258255, |
|
"learning_rate": 3.772453450022649e-05, |
|
"loss": 0.7112, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 19.264705882352942, |
|
"grad_norm": 0.1268807351589203, |
|
"learning_rate": 3.752911300497212e-05, |
|
"loss": 0.6804, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 19.41176470588235, |
|
"grad_norm": 0.14288154244422913, |
|
"learning_rate": 3.73327427553203e-05, |
|
"loss": 0.6867, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 19.558823529411764, |
|
"grad_norm": 0.14849698543548584, |
|
"learning_rate": 3.7135442327902695e-05, |
|
"loss": 0.694, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 19.705882352941178, |
|
"grad_norm": 0.12607896327972412, |
|
"learning_rate": 3.6937230387345746e-05, |
|
"loss": 0.6873, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 19.852941176470587, |
|
"grad_norm": 0.12860074639320374, |
|
"learning_rate": 3.673812568450513e-05, |
|
"loss": 0.6942, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.12468240410089493, |
|
"learning_rate": 3.6538147054691817e-05, |
|
"loss": 0.6844, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 20.147058823529413, |
|
"grad_norm": 0.1367003470659256, |
|
"learning_rate": 3.6337313415890315e-05, |
|
"loss": 0.7005, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 20.294117647058822, |
|
"grad_norm": 0.13072577118873596, |
|
"learning_rate": 3.6135643766969e-05, |
|
"loss": 0.671, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 20.441176470588236, |
|
"grad_norm": 0.1326008439064026, |
|
"learning_rate": 3.593315718588286e-05, |
|
"loss": 0.6727, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 20.58823529411765, |
|
"grad_norm": 0.1257023960351944, |
|
"learning_rate": 3.572987282786864e-05, |
|
"loss": 0.7073, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 20.735294117647058, |
|
"grad_norm": 0.14335250854492188, |
|
"learning_rate": 3.552580992363285e-05, |
|
"loss": 0.6821, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 20.88235294117647, |
|
"grad_norm": 0.13271793723106384, |
|
"learning_rate": 3.5320987777532465e-05, |
|
"loss": 0.6959, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 21.029411764705884, |
|
"grad_norm": 0.12265238165855408, |
|
"learning_rate": 3.5115425765748793e-05, |
|
"loss": 0.6767, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 21.176470588235293, |
|
"grad_norm": 0.13558083772659302, |
|
"learning_rate": 3.4909143334454454e-05, |
|
"loss": 0.6859, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 21.323529411764707, |
|
"grad_norm": 0.1432723104953766, |
|
"learning_rate": 3.4702159997973747e-05, |
|
"loss": 0.6921, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 21.470588235294116, |
|
"grad_norm": 0.13662855327129364, |
|
"learning_rate": 3.449449533693664e-05, |
|
"loss": 0.7063, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 21.61764705882353, |
|
"grad_norm": 0.1422967165708542, |
|
"learning_rate": 3.428616899642645e-05, |
|
"loss": 0.6987, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 21.764705882352942, |
|
"grad_norm": 0.1233050599694252, |
|
"learning_rate": 3.4077200684121345e-05, |
|
"loss": 0.6831, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 21.91176470588235, |
|
"grad_norm": 0.13728494942188263, |
|
"learning_rate": 3.3867610168430084e-05, |
|
"loss": 0.6873, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 22.058823529411764, |
|
"grad_norm": 0.1322290301322937, |
|
"learning_rate": 3.365741727662187e-05, |
|
"loss": 0.651, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 22.205882352941178, |
|
"grad_norm": 0.13796144723892212, |
|
"learning_rate": 3.3446641892950696e-05, |
|
"loss": 0.671, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 22.352941176470587, |
|
"grad_norm": 0.13293515145778656, |
|
"learning_rate": 3.3235303956774324e-05, |
|
"loss": 0.7056, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"grad_norm": 0.13630028069019318, |
|
"learning_rate": 3.3023423460667985e-05, |
|
"loss": 0.6866, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 22.647058823529413, |
|
"grad_norm": 0.1360238939523697, |
|
"learning_rate": 3.281102044853309e-05, |
|
"loss": 0.6991, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 22.794117647058822, |
|
"grad_norm": 0.13794392347335815, |
|
"learning_rate": 3.2598115013701114e-05, |
|
"loss": 0.6959, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 22.941176470588236, |
|
"grad_norm": 0.1369139850139618, |
|
"learning_rate": 3.2384727297032705e-05, |
|
"loss": 0.6657, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 23.08823529411765, |
|
"grad_norm": 0.12718403339385986, |
|
"learning_rate": 3.217087748501237e-05, |
|
"loss": 0.6733, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 23.235294117647058, |
|
"grad_norm": 0.13353672623634338, |
|
"learning_rate": 3.1956585807838914e-05, |
|
"loss": 0.6774, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 23.38235294117647, |
|
"grad_norm": 0.13364404439926147, |
|
"learning_rate": 3.1741872537511535e-05, |
|
"loss": 0.6752, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 23.529411764705884, |
|
"grad_norm": 0.14464542269706726, |
|
"learning_rate": 3.152675798591219e-05, |
|
"loss": 0.6667, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 23.676470588235293, |
|
"grad_norm": 0.13043712079524994, |
|
"learning_rate": 3.131126250288405e-05, |
|
"loss": 0.6924, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 23.823529411764707, |
|
"grad_norm": 0.12341820448637009, |
|
"learning_rate": 3.109540647430641e-05, |
|
"loss": 0.6969, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 23.970588235294116, |
|
"grad_norm": 0.14009004831314087, |
|
"learning_rate": 3.087921032016619e-05, |
|
"loss": 0.6947, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 24.11764705882353, |
|
"grad_norm": 0.13528534770011902, |
|
"learning_rate": 3.066269449262618e-05, |
|
"loss": 0.6833, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 24.264705882352942, |
|
"grad_norm": 0.1405653953552246, |
|
"learning_rate": 3.04458794740903e-05, |
|
"loss": 0.6919, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 24.41176470588235, |
|
"grad_norm": 0.14450417459011078, |
|
"learning_rate": 3.0228785775265943e-05, |
|
"loss": 0.7085, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 24.558823529411764, |
|
"grad_norm": 0.1257210224866867, |
|
"learning_rate": 3.001143393322368e-05, |
|
"loss": 0.7022, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 24.705882352941178, |
|
"grad_norm": 0.14650067687034607, |
|
"learning_rate": 2.9793844509454417e-05, |
|
"loss": 0.6559, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 24.852941176470587, |
|
"grad_norm": 0.1673348844051361, |
|
"learning_rate": 2.9576038087924297e-05, |
|
"loss": 0.6628, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.13868844509124756, |
|
"learning_rate": 2.9358035273127483e-05, |
|
"loss": 0.6761, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 25.147058823529413, |
|
"grad_norm": 0.12067105621099472, |
|
"learning_rate": 2.9139856688136917e-05, |
|
"loss": 0.6735, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 25.294117647058822, |
|
"grad_norm": 0.1306021362543106, |
|
"learning_rate": 2.8921522972653437e-05, |
|
"loss": 0.6711, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 25.441176470588236, |
|
"grad_norm": 0.13525615632534027, |
|
"learning_rate": 2.8703054781053194e-05, |
|
"loss": 0.6723, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 25.58823529411765, |
|
"grad_norm": 0.1306258738040924, |
|
"learning_rate": 2.8484472780433828e-05, |
|
"loss": 0.6922, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 25.735294117647058, |
|
"grad_norm": 0.14182746410369873, |
|
"learning_rate": 2.8265797648659283e-05, |
|
"loss": 0.6911, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 25.88235294117647, |
|
"grad_norm": 0.13599254190921783, |
|
"learning_rate": 2.8047050072403713e-05, |
|
"loss": 0.6891, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 26.029411764705884, |
|
"grad_norm": 0.1291087120771408, |
|
"learning_rate": 2.7828250745194544e-05, |
|
"loss": 0.6971, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 26.176470588235293, |
|
"grad_norm": 0.11979696899652481, |
|
"learning_rate": 2.7609420365454823e-05, |
|
"loss": 0.6921, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 26.323529411764707, |
|
"grad_norm": 0.1369645744562149, |
|
"learning_rate": 2.7390579634545182e-05, |
|
"loss": 0.667, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 26.470588235294116, |
|
"grad_norm": 0.1354684978723526, |
|
"learning_rate": 2.7171749254805458e-05, |
|
"loss": 0.6918, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 26.61764705882353, |
|
"grad_norm": 0.1434841752052307, |
|
"learning_rate": 2.6952949927596295e-05, |
|
"loss": 0.6961, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 26.764705882352942, |
|
"grad_norm": 0.13030685484409332, |
|
"learning_rate": 2.6734202351340726e-05, |
|
"loss": 0.6742, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 26.91176470588235, |
|
"grad_norm": 0.1375734657049179, |
|
"learning_rate": 2.651552721956617e-05, |
|
"loss": 0.66, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 27.058823529411764, |
|
"grad_norm": 0.1508912891149521, |
|
"learning_rate": 2.6296945218946804e-05, |
|
"loss": 0.6928, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 27.205882352941178, |
|
"grad_norm": 0.13976359367370605, |
|
"learning_rate": 2.6078477027346572e-05, |
|
"loss": 0.6916, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 27.352941176470587, |
|
"grad_norm": 0.13399522006511688, |
|
"learning_rate": 2.586014331186309e-05, |
|
"loss": 0.6617, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"grad_norm": 0.1378486156463623, |
|
"learning_rate": 2.5641964726872526e-05, |
|
"loss": 0.6779, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 27.647058823529413, |
|
"grad_norm": 0.1410367488861084, |
|
"learning_rate": 2.5423961912075712e-05, |
|
"loss": 0.6951, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 27.794117647058822, |
|
"grad_norm": 0.1448415368795395, |
|
"learning_rate": 2.5206155490545585e-05, |
|
"loss": 0.6958, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 27.941176470588236, |
|
"grad_norm": 0.1381085067987442, |
|
"learning_rate": 2.4988566066776327e-05, |
|
"loss": 0.6629, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 28.08823529411765, |
|
"grad_norm": 0.14611601829528809, |
|
"learning_rate": 2.4771214224734056e-05, |
|
"loss": 0.6642, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 28.235294117647058, |
|
"grad_norm": 0.13046316802501678, |
|
"learning_rate": 2.4554120525909703e-05, |
|
"loss": 0.6554, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 28.38235294117647, |
|
"grad_norm": 0.1373993307352066, |
|
"learning_rate": 2.4337305507373832e-05, |
|
"loss": 0.6791, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 28.529411764705884, |
|
"grad_norm": 0.140591099858284, |
|
"learning_rate": 2.4120789679833815e-05, |
|
"loss": 0.6729, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 28.676470588235293, |
|
"grad_norm": 0.1307932734489441, |
|
"learning_rate": 2.3904593525693593e-05, |
|
"loss": 0.6887, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 28.823529411764707, |
|
"grad_norm": 0.13051795959472656, |
|
"learning_rate": 2.3688737497115953e-05, |
|
"loss": 0.6823, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 28.970588235294116, |
|
"grad_norm": 0.12720821797847748, |
|
"learning_rate": 2.3473242014087814e-05, |
|
"loss": 0.7063, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 29.11764705882353, |
|
"grad_norm": 0.137127086520195, |
|
"learning_rate": 2.3258127462488467e-05, |
|
"loss": 0.6744, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 29.264705882352942, |
|
"grad_norm": 0.13432453572750092, |
|
"learning_rate": 2.30434141921611e-05, |
|
"loss": 0.68, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 29.41176470588235, |
|
"grad_norm": 0.14380089938640594, |
|
"learning_rate": 2.2829122514987634e-05, |
|
"loss": 0.6808, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 29.558823529411764, |
|
"grad_norm": 0.12999729812145233, |
|
"learning_rate": 2.2615272702967304e-05, |
|
"loss": 0.6963, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 29.705882352941178, |
|
"grad_norm": 0.13407659530639648, |
|
"learning_rate": 2.2401884986298892e-05, |
|
"loss": 0.6729, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 29.852941176470587, |
|
"grad_norm": 0.13908743858337402, |
|
"learning_rate": 2.2188979551466916e-05, |
|
"loss": 0.6766, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.13133063912391663, |
|
"learning_rate": 2.1976576539332024e-05, |
|
"loss": 0.664, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 30.147058823529413, |
|
"grad_norm": 0.15663307905197144, |
|
"learning_rate": 2.1764696043225685e-05, |
|
"loss": 0.7082, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 30.294117647058822, |
|
"grad_norm": 0.13505025207996368, |
|
"learning_rate": 2.155335810704931e-05, |
|
"loss": 0.6463, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 30.441176470588236, |
|
"grad_norm": 0.1344403475522995, |
|
"learning_rate": 2.134258272337814e-05, |
|
"loss": 0.6753, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 30.58823529411765, |
|
"grad_norm": 0.14067409932613373, |
|
"learning_rate": 2.1132389831569915e-05, |
|
"loss": 0.6715, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 30.735294117647058, |
|
"grad_norm": 0.13444367051124573, |
|
"learning_rate": 2.092279931587866e-05, |
|
"loss": 0.6838, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 30.88235294117647, |
|
"grad_norm": 0.13275469839572906, |
|
"learning_rate": 2.0713831003573564e-05, |
|
"loss": 0.6842, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 31.029411764705884, |
|
"grad_norm": 0.12724100053310394, |
|
"learning_rate": 2.0505504663063364e-05, |
|
"loss": 0.6745, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 31.176470588235293, |
|
"grad_norm": 0.12783651053905487, |
|
"learning_rate": 2.029784000202627e-05, |
|
"loss": 0.6839, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 31.323529411764707, |
|
"grad_norm": 0.13505741953849792, |
|
"learning_rate": 2.0090856665545554e-05, |
|
"loss": 0.6577, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 31.470588235294116, |
|
"grad_norm": 0.14324721693992615, |
|
"learning_rate": 1.98845742342512e-05, |
|
"loss": 0.6786, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 31.61764705882353, |
|
"grad_norm": 0.14350536465644836, |
|
"learning_rate": 1.967901222246754e-05, |
|
"loss": 0.6715, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 31.764705882352942, |
|
"grad_norm": 0.12864898145198822, |
|
"learning_rate": 1.947419007636716e-05, |
|
"loss": 0.6901, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 31.91176470588235, |
|
"grad_norm": 0.13163405656814575, |
|
"learning_rate": 1.9270127172131363e-05, |
|
"loss": 0.6767, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 32.05882352941177, |
|
"grad_norm": 0.13823044300079346, |
|
"learning_rate": 1.906684281411715e-05, |
|
"loss": 0.6888, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 32.205882352941174, |
|
"grad_norm": 0.13260214030742645, |
|
"learning_rate": 1.8864356233031e-05, |
|
"loss": 0.6899, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 32.35294117647059, |
|
"grad_norm": 0.13542212545871735, |
|
"learning_rate": 1.866268658410969e-05, |
|
"loss": 0.6604, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"grad_norm": 0.14194779098033905, |
|
"learning_rate": 1.8461852945308196e-05, |
|
"loss": 0.6538, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 32.64705882352941, |
|
"grad_norm": 0.13551092147827148, |
|
"learning_rate": 1.8261874315494874e-05, |
|
"loss": 0.6851, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 32.794117647058826, |
|
"grad_norm": 0.13539521396160126, |
|
"learning_rate": 1.806276961265425e-05, |
|
"loss": 0.6731, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 32.94117647058823, |
|
"grad_norm": 0.14235951006412506, |
|
"learning_rate": 1.786455767209732e-05, |
|
"loss": 0.6798, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 33.088235294117645, |
|
"grad_norm": 0.12894190847873688, |
|
"learning_rate": 1.7667257244679702e-05, |
|
"loss": 0.6815, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 33.23529411764706, |
|
"grad_norm": 0.13332705199718475, |
|
"learning_rate": 1.747088699502789e-05, |
|
"loss": 0.6709, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 33.38235294117647, |
|
"grad_norm": 0.13527055084705353, |
|
"learning_rate": 1.727546549977352e-05, |
|
"loss": 0.689, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 33.529411764705884, |
|
"grad_norm": 0.13612490892410278, |
|
"learning_rate": 1.7081011245796013e-05, |
|
"loss": 0.6744, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 33.6764705882353, |
|
"grad_norm": 0.13099683821201324, |
|
"learning_rate": 1.6887542628473763e-05, |
|
"loss": 0.6871, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 33.8235294117647, |
|
"grad_norm": 0.13698424398899078, |
|
"learning_rate": 1.6695077949943892e-05, |
|
"loss": 0.6852, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 33.970588235294116, |
|
"grad_norm": 0.13121846318244934, |
|
"learning_rate": 1.6503635417370882e-05, |
|
"loss": 0.6529, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 34.11764705882353, |
|
"grad_norm": 0.1369757056236267, |
|
"learning_rate": 1.6313233141224165e-05, |
|
"loss": 0.6855, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 34.26470588235294, |
|
"grad_norm": 0.13654442131519318, |
|
"learning_rate": 1.612388913356493e-05, |
|
"loss": 0.6596, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 34.411764705882355, |
|
"grad_norm": 0.136439248919487, |
|
"learning_rate": 1.5935621306342057e-05, |
|
"loss": 0.6843, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 34.55882352941177, |
|
"grad_norm": 0.1410278081893921, |
|
"learning_rate": 1.5748447469697803e-05, |
|
"loss": 0.6786, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 34.705882352941174, |
|
"grad_norm": 0.16095899045467377, |
|
"learning_rate": 1.556238533028283e-05, |
|
"loss": 0.6563, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 34.85294117647059, |
|
"grad_norm": 0.13262508809566498, |
|
"learning_rate": 1.5377452489581234e-05, |
|
"loss": 0.6888, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 0.13472063839435577, |
|
"learning_rate": 1.5193666442245402e-05, |
|
"loss": 0.681, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 35.14705882352941, |
|
"grad_norm": 0.13549183309078217, |
|
"learning_rate": 1.5011044574441036e-05, |
|
"loss": 0.6755, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 35.294117647058826, |
|
"grad_norm": 0.1411600261926651, |
|
"learning_rate": 1.4829604162202442e-05, |
|
"loss": 0.7007, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 35.44117647058823, |
|
"grad_norm": 0.14127956330776215, |
|
"learning_rate": 1.4649362369798152e-05, |
|
"loss": 0.6551, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 35.588235294117645, |
|
"grad_norm": 0.13209925591945648, |
|
"learning_rate": 1.4470336248107266e-05, |
|
"loss": 0.6762, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 35.73529411764706, |
|
"grad_norm": 0.12888824939727783, |
|
"learning_rate": 1.4292542733006372e-05, |
|
"loss": 0.6775, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 35.88235294117647, |
|
"grad_norm": 0.14431186020374298, |
|
"learning_rate": 1.4115998643767447e-05, |
|
"loss": 0.6654, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 36.029411764705884, |
|
"grad_norm": 0.12955108284950256, |
|
"learning_rate": 1.3940720681466734e-05, |
|
"loss": 0.6807, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 36.1764705882353, |
|
"grad_norm": 0.13727155327796936, |
|
"learning_rate": 1.3766725427404843e-05, |
|
"loss": 0.6925, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 36.3235294117647, |
|
"grad_norm": 0.13375459611415863, |
|
"learning_rate": 1.3594029341538128e-05, |
|
"loss": 0.6884, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 36.470588235294116, |
|
"grad_norm": 0.13129761815071106, |
|
"learning_rate": 1.34226487609216e-05, |
|
"loss": 0.6868, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 36.61764705882353, |
|
"grad_norm": 0.1358431726694107, |
|
"learning_rate": 1.3252599898163454e-05, |
|
"loss": 0.6538, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 36.76470588235294, |
|
"grad_norm": 0.14378570020198822, |
|
"learning_rate": 1.3083898839891284e-05, |
|
"loss": 0.6457, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 36.911764705882355, |
|
"grad_norm": 0.14497900009155273, |
|
"learning_rate": 1.29165615452304e-05, |
|
"loss": 0.6746, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 37.05882352941177, |
|
"grad_norm": 0.13402092456817627, |
|
"learning_rate": 1.275060384429398e-05, |
|
"loss": 0.6721, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 37.205882352941174, |
|
"grad_norm": 0.13633766770362854, |
|
"learning_rate": 1.258604143668563e-05, |
|
"loss": 0.6724, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 37.35294117647059, |
|
"grad_norm": 0.13490943610668182, |
|
"learning_rate": 1.2422889890014143e-05, |
|
"loss": 0.6578, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"grad_norm": 0.1326485574245453, |
|
"learning_rate": 1.2261164638420832e-05, |
|
"loss": 0.6664, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 37.64705882352941, |
|
"grad_norm": 0.14504876732826233, |
|
"learning_rate": 1.2100880981119447e-05, |
|
"loss": 0.6856, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 37.794117647058826, |
|
"grad_norm": 0.13638907670974731, |
|
"learning_rate": 1.1942054080948878e-05, |
|
"loss": 0.6842, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 37.94117647058823, |
|
"grad_norm": 0.16528142988681793, |
|
"learning_rate": 1.1784698962938763e-05, |
|
"loss": 0.6759, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 38.088235294117645, |
|
"grad_norm": 0.15061551332473755, |
|
"learning_rate": 1.1628830512888057e-05, |
|
"loss": 0.6899, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 38.23529411764706, |
|
"grad_norm": 0.13696105778217316, |
|
"learning_rate": 1.1474463475956926e-05, |
|
"loss": 0.6624, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 38.38235294117647, |
|
"grad_norm": 0.12491544336080551, |
|
"learning_rate": 1.1321612455271793e-05, |
|
"loss": 0.6725, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 38.529411764705884, |
|
"grad_norm": 0.13985736668109894, |
|
"learning_rate": 1.117029191054389e-05, |
|
"loss": 0.6942, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 38.6764705882353, |
|
"grad_norm": 0.14015409350395203, |
|
"learning_rate": 1.1020516156701383e-05, |
|
"loss": 0.6759, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 38.8235294117647, |
|
"grad_norm": 0.14540641009807587, |
|
"learning_rate": 1.0872299362535173e-05, |
|
"loss": 0.6645, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 38.970588235294116, |
|
"grad_norm": 0.1425599455833435, |
|
"learning_rate": 1.0725655549358532e-05, |
|
"loss": 0.6711, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 39.11764705882353, |
|
"grad_norm": 0.13927870988845825, |
|
"learning_rate": 1.0580598589680664e-05, |
|
"loss": 0.6956, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 39.26470588235294, |
|
"grad_norm": 0.13224616646766663, |
|
"learning_rate": 1.0437142205894418e-05, |
|
"loss": 0.6868, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 39.411764705882355, |
|
"grad_norm": 0.13682135939598083, |
|
"learning_rate": 1.029529996897808e-05, |
|
"loss": 0.6735, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 39.55882352941177, |
|
"grad_norm": 0.1319390833377838, |
|
"learning_rate": 1.0155085297211618e-05, |
|
"loss": 0.6513, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 39.705882352941174, |
|
"grad_norm": 0.1452108919620514, |
|
"learning_rate": 1.001651145490726e-05, |
|
"loss": 0.6772, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 39.85294117647059, |
|
"grad_norm": 0.14989398419857025, |
|
"learning_rate": 9.87959155115474e-06, |
|
"loss": 0.6633, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.14220058917999268, |
|
"learning_rate": 9.744338538581147e-06, |
|
"loss": 0.6778, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 40.14705882352941, |
|
"grad_norm": 0.15334346890449524, |
|
"learning_rate": 9.610765212125607e-06, |
|
"loss": 0.6775, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 40.294117647058826, |
|
"grad_norm": 0.1356540322303772, |
|
"learning_rate": 9.478884207828912e-06, |
|
"loss": 0.6513, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 40.44117647058823, |
|
"grad_norm": 0.14519663155078888, |
|
"learning_rate": 9.34870800163808e-06, |
|
"loss": 0.6847, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 40.588235294117645, |
|
"grad_norm": 0.13579830527305603, |
|
"learning_rate": 9.220248908226224e-06, |
|
"loss": 0.6661, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 40.73529411764706, |
|
"grad_norm": 0.13304731249809265, |
|
"learning_rate": 9.09351907982751e-06, |
|
"loss": 0.6569, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 40.88235294117647, |
|
"grad_norm": 0.14108242094516754, |
|
"learning_rate": 8.968530505087582e-06, |
|
"loss": 0.6894, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 41.029411764705884, |
|
"grad_norm": 0.13457804918289185, |
|
"learning_rate": 8.845295007929446e-06, |
|
"loss": 0.6814, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 41.1764705882353, |
|
"grad_norm": 0.13951298594474792, |
|
"learning_rate": 8.7238242464349e-06, |
|
"loss": 0.6721, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 41.3235294117647, |
|
"grad_norm": 0.14416338503360748, |
|
"learning_rate": 8.604129711741706e-06, |
|
"loss": 0.6881, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 41.470588235294116, |
|
"grad_norm": 0.13295041024684906, |
|
"learning_rate": 8.486222726956508e-06, |
|
"loss": 0.6624, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 41.61764705882353, |
|
"grad_norm": 0.1342659443616867, |
|
"learning_rate": 8.370114446083686e-06, |
|
"loss": 0.6956, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 41.76470588235294, |
|
"grad_norm": 0.13162069022655487, |
|
"learning_rate": 8.255815852970153e-06, |
|
"loss": 0.6646, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 41.911764705882355, |
|
"grad_norm": 0.12931868433952332, |
|
"learning_rate": 8.143337760266331e-06, |
|
"loss": 0.6618, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 42.05882352941177, |
|
"grad_norm": 0.13857227563858032, |
|
"learning_rate": 8.032690808403232e-06, |
|
"loss": 0.6672, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 42.205882352941174, |
|
"grad_norm": 0.13812746107578278, |
|
"learning_rate": 7.923885464585884e-06, |
|
"loss": 0.6866, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 42.35294117647059, |
|
"grad_norm": 0.1503993570804596, |
|
"learning_rate": 7.816932021803154e-06, |
|
"loss": 0.6885, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"grad_norm": 0.13175919651985168, |
|
"learning_rate": 7.711840597853998e-06, |
|
"loss": 0.6686, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 42.64705882352941, |
|
"grad_norm": 0.13319700956344604, |
|
"learning_rate": 7.608621134390344e-06, |
|
"loss": 0.6561, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 42.794117647058826, |
|
"grad_norm": 0.1399184763431549, |
|
"learning_rate": 7.507283395976592e-06, |
|
"loss": 0.6537, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 42.94117647058823, |
|
"grad_norm": 0.13498006761074066, |
|
"learning_rate": 7.407836969165911e-06, |
|
"loss": 0.6886, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 43.088235294117645, |
|
"grad_norm": 0.1388946920633316, |
|
"learning_rate": 7.310291261593308e-06, |
|
"loss": 0.6797, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 43.23529411764706, |
|
"grad_norm": 0.13527587056159973, |
|
"learning_rate": 7.2146555010857155e-06, |
|
"loss": 0.6813, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 43.38235294117647, |
|
"grad_norm": 0.13819634914398193, |
|
"learning_rate": 7.120938734789012e-06, |
|
"loss": 0.6752, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 43.529411764705884, |
|
"grad_norm": 0.12921284139156342, |
|
"learning_rate": 7.029149828312145e-06, |
|
"loss": 0.6761, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 43.6764705882353, |
|
"grad_norm": 0.13316689431667328, |
|
"learning_rate": 6.93929746488849e-06, |
|
"loss": 0.6632, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 43.8235294117647, |
|
"grad_norm": 0.13092860579490662, |
|
"learning_rate": 6.851390144554372e-06, |
|
"loss": 0.6705, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 43.970588235294116, |
|
"grad_norm": 0.13236363232135773, |
|
"learning_rate": 6.765436183344996e-06, |
|
"loss": 0.6602, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 44.11764705882353, |
|
"grad_norm": 0.14111852645874023, |
|
"learning_rate": 6.6814437125077135e-06, |
|
"loss": 0.6554, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 44.26470588235294, |
|
"grad_norm": 0.13777071237564087, |
|
"learning_rate": 6.599420677732848e-06, |
|
"loss": 0.6783, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 44.411764705882355, |
|
"grad_norm": 0.15148292481899261, |
|
"learning_rate": 6.519374838401997e-06, |
|
"loss": 0.6818, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 44.55882352941177, |
|
"grad_norm": 0.14915376901626587, |
|
"learning_rate": 6.44131376685401e-06, |
|
"loss": 0.6758, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 44.705882352941174, |
|
"grad_norm": 0.13784313201904297, |
|
"learning_rate": 6.36524484766865e-06, |
|
"loss": 0.6652, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 44.85294117647059, |
|
"grad_norm": 0.13148203492164612, |
|
"learning_rate": 6.291175276968002e-06, |
|
"loss": 0.6758, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 0.14285942912101746, |
|
"learning_rate": 6.219112061735721e-06, |
|
"loss": 0.6716, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 45.14705882352941, |
|
"grad_norm": 0.14051543176174164, |
|
"learning_rate": 6.149062019154174e-06, |
|
"loss": 0.6833, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 45.294117647058826, |
|
"grad_norm": 0.12927637994289398, |
|
"learning_rate": 6.081031775959542e-06, |
|
"loss": 0.648, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 45.44117647058823, |
|
"grad_norm": 0.1433332860469818, |
|
"learning_rate": 6.0150277678149055e-06, |
|
"loss": 0.6377, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 45.588235294117645, |
|
"grad_norm": 0.1298450231552124, |
|
"learning_rate": 5.951056238701456e-06, |
|
"loss": 0.6866, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 45.73529411764706, |
|
"grad_norm": 0.13296933472156525, |
|
"learning_rate": 5.889123240327819e-06, |
|
"loss": 0.6747, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 45.88235294117647, |
|
"grad_norm": 0.1386398822069168, |
|
"learning_rate": 5.829234631557524e-06, |
|
"loss": 0.6827, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 46.029411764705884, |
|
"grad_norm": 0.12945467233657837, |
|
"learning_rate": 5.771396077854802e-06, |
|
"loss": 0.6823, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 46.1764705882353, |
|
"grad_norm": 0.1447058618068695, |
|
"learning_rate": 5.715613050748604e-06, |
|
"loss": 0.6542, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 46.3235294117647, |
|
"grad_norm": 0.13116587698459625, |
|
"learning_rate": 5.661890827315004e-06, |
|
"loss": 0.664, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 46.470588235294116, |
|
"grad_norm": 0.13510237634181976, |
|
"learning_rate": 5.61023448967798e-06, |
|
"loss": 0.6698, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 46.61764705882353, |
|
"grad_norm": 0.13816939294338226, |
|
"learning_rate": 5.560648924528657e-06, |
|
"loss": 0.7097, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 46.76470588235294, |
|
"grad_norm": 0.1419830620288849, |
|
"learning_rate": 5.513138822663016e-06, |
|
"loss": 0.6905, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 46.911764705882355, |
|
"grad_norm": 0.13188646733760834, |
|
"learning_rate": 5.467708678538148e-06, |
|
"loss": 0.6457, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 47.05882352941177, |
|
"grad_norm": 0.1413869559764862, |
|
"learning_rate": 5.424362789847082e-06, |
|
"loss": 0.6766, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 47.205882352941174, |
|
"grad_norm": 0.13414201140403748, |
|
"learning_rate": 5.38310525711221e-06, |
|
"loss": 0.6795, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 47.35294117647059, |
|
"grad_norm": 0.1363234668970108, |
|
"learning_rate": 5.343939983297398e-06, |
|
"loss": 0.6713, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"grad_norm": 0.1344790905714035, |
|
"learning_rate": 5.3068706734387484e-06, |
|
"loss": 0.6584, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 47.64705882352941, |
|
"grad_norm": 0.1452033668756485, |
|
"learning_rate": 5.271900834294105e-06, |
|
"loss": 0.667, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 47.794117647058826, |
|
"grad_norm": 0.13405530154705048, |
|
"learning_rate": 5.239033774011322e-06, |
|
"loss": 0.669, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 47.94117647058823, |
|
"grad_norm": 0.13740584254264832, |
|
"learning_rate": 5.208272601815313e-06, |
|
"loss": 0.6836, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 48.088235294117645, |
|
"grad_norm": 0.12757079303264618, |
|
"learning_rate": 5.1796202277139075e-06, |
|
"loss": 0.6909, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 48.23529411764706, |
|
"grad_norm": 0.12381689995527267, |
|
"learning_rate": 5.1530793622225725e-06, |
|
"loss": 0.6605, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 48.38235294117647, |
|
"grad_norm": 0.13950037956237793, |
|
"learning_rate": 5.128652516107996e-06, |
|
"loss": 0.6814, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 48.529411764705884, |
|
"grad_norm": 0.13073165714740753, |
|
"learning_rate": 5.10634200015057e-06, |
|
"loss": 0.6866, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 48.6764705882353, |
|
"grad_norm": 0.1424126774072647, |
|
"learning_rate": 5.086149924925788e-06, |
|
"loss": 0.6697, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 48.8235294117647, |
|
"grad_norm": 0.15078318119049072, |
|
"learning_rate": 5.068078200604584e-06, |
|
"loss": 0.6615, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 48.970588235294116, |
|
"grad_norm": 0.1373935043811798, |
|
"learning_rate": 5.052128536772629e-06, |
|
"loss": 0.6665, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 49.11764705882353, |
|
"grad_norm": 0.1401982456445694, |
|
"learning_rate": 5.038302442268617e-06, |
|
"loss": 0.6597, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 49.26470588235294, |
|
"grad_norm": 0.1401190608739853, |
|
"learning_rate": 5.026601225041503e-06, |
|
"loss": 0.6929, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 49.411764705882355, |
|
"grad_norm": 0.1368054449558258, |
|
"learning_rate": 5.0170259920268025e-06, |
|
"loss": 0.6923, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 49.55882352941177, |
|
"grad_norm": 0.13621118664741516, |
|
"learning_rate": 5.009577649041847e-06, |
|
"loss": 0.6574, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 49.705882352941174, |
|
"grad_norm": 0.13294735550880432, |
|
"learning_rate": 5.004256900700115e-06, |
|
"loss": 0.6646, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 49.85294117647059, |
|
"grad_norm": 0.14144855737686157, |
|
"learning_rate": 5.001064250344557e-06, |
|
"loss": 0.666, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.13993091881275177, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6593, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 1700, |
|
"total_flos": 2.629944131882844e+18, |
|
"train_loss": 0.6986723100437837, |
|
"train_runtime": 41334.8641, |
|
"train_samples_per_second": 0.481, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.629944131882844e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|