diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6008 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0017030357752335, + "global_step": 99076, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.9999979372962823e-05, + "loss": 0.1516, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999938118888465e-05, + "loss": 0.1292, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999896864814113e-05, + "loss": 0.1529, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999855610739755e-05, + "loss": 0.128, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 2.99998143566654e-05, + "loss": 0.1422, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999773102591045e-05, + "loss": 0.1399, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 2.999973184851669e-05, + "loss": 0.1531, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999969059444233e-05, + "loss": 0.1411, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 2.999964934036798e-05, + "loss": 0.1376, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 2.999960808629362e-05, + "loss": 0.1655, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999566832219266e-05, + "loss": 0.1721, + "step": 1100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999525578144915e-05, + "loss": 0.1759, + "step": 1200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999484324070556e-05, + "loss": 0.1762, + "step": 1300 + }, + { + "epoch": 0.0, + "learning_rate": 2.99994430699962e-05, + "loss": 0.1584, + "step": 1400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999401815921846e-05, + "loss": 0.1661, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999936056184749e-05, + "loss": 0.1591, + "step": 1600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999319307773133e-05, + "loss": 0.1646, + "step": 1700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999927805369878e-05, + "loss": 0.1636, + "step": 1800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999236799624423e-05, + "loss": 0.1518, + "step": 1900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999195545550068e-05, + "loss": 0.1463, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999154291475713e-05, + "loss": 0.1507, + "step": 2100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999113037401357e-05, + "loss": 0.1528, + "step": 2200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999071783327e-05, + "loss": 0.1542, + "step": 2300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999030529252647e-05, + "loss": 0.1494, + "step": 2400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999898927517829e-05, + "loss": 0.154, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998948021103934e-05, + "loss": 0.1474, + "step": 2600 + }, + { + "epoch": 0.0, + "learning_rate": 2.999890676702958e-05, + "loss": 0.1562, + "step": 2700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998865512955224e-05, + "loss": 0.1482, + "step": 2800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998824258880865e-05, + "loss": 0.166, + "step": 2900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998783004806514e-05, + "loss": 0.1187, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998741750732155e-05, + "loss": 0.1322, + "step": 3100 + }, + { + "epoch": 0.0, + "learning_rate": 2.99987004966578e-05, + "loss": 0.1612, + "step": 3200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999865924258345e-05, + "loss": 0.152, + "step": 3300 + }, + { + "epoch": 0.0, + "learning_rate": 2.999861798850909e-05, + "loss": 0.1375, + "step": 3400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998576734434735e-05, + "loss": 0.1252, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999853548036038e-05, + "loss": 0.1582, + "step": 3600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998494226286025e-05, + "loss": 0.1371, + "step": 3700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998452972211667e-05, + "loss": 0.1264, + "step": 3800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998411718137315e-05, + "loss": 0.1409, + "step": 3900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998370876603703e-05, + "loss": 0.1247, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998329622529344e-05, + "loss": 0.1342, + "step": 4100 + }, + { + "epoch": 0.0, + "learning_rate": 2.999828836845499e-05, + "loss": 0.1528, + "step": 4200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998247114380634e-05, + "loss": 0.1532, + "step": 4300 + }, + { + "epoch": 0.0, + "learning_rate": 2.999820586030628e-05, + "loss": 0.1239, + "step": 4400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999816460623192e-05, + "loss": 0.1489, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999812335215757e-05, + "loss": 0.1367, + "step": 4600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998082098083214e-05, + "loss": 0.1593, + "step": 4700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9998040844008856e-05, + "loss": 0.1559, + "step": 4800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997999589934504e-05, + "loss": 0.1366, + "step": 4900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997958335860146e-05, + "loss": 0.1535, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 2.999791708178579e-05, + "loss": 0.1515, + "step": 5100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997875827711436e-05, + "loss": 0.1453, + "step": 5200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999783457363708e-05, + "loss": 0.1378, + "step": 5300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997793319562722e-05, + "loss": 0.1202, + "step": 5400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999775206548837e-05, + "loss": 0.1503, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997710811414012e-05, + "loss": 0.1573, + "step": 5600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997669557339657e-05, + "loss": 0.1508, + "step": 5700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997628303265302e-05, + "loss": 0.137, + "step": 5800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997587049190947e-05, + "loss": 0.1373, + "step": 5900 + }, + { + "epoch": 0.0, + "learning_rate": 2.999754579511659e-05, + "loss": 0.152, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997504541042237e-05, + "loss": 0.1575, + "step": 6100 + }, + { + "epoch": 0.0, + "learning_rate": 2.999746328696788e-05, + "loss": 0.1468, + "step": 6200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999742244543427e-05, + "loss": 0.1289, + "step": 6300 + }, + { + "epoch": 0.0, + "learning_rate": 2.999738119135991e-05, + "loss": 0.1549, + "step": 6400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999733993728556e-05, + "loss": 0.1638, + "step": 6500 + }, + { + "epoch": 0.0, + "learning_rate": 2.99972986832112e-05, + "loss": 0.1305, + "step": 6600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997257429136846e-05, + "loss": 0.1522, + "step": 6700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999721617506249e-05, + "loss": 0.1496, + "step": 6800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997174920988136e-05, + "loss": 0.1535, + "step": 6900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997133666913778e-05, + "loss": 0.1444, + "step": 7000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997092412839426e-05, + "loss": 0.1505, + "step": 7100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997051158765068e-05, + "loss": 0.1233, + "step": 7200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9997009904690713e-05, + "loss": 0.1575, + "step": 7300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996968650616358e-05, + "loss": 0.1305, + "step": 7400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996927396542003e-05, + "loss": 0.1433, + "step": 7500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996886142467644e-05, + "loss": 0.1621, + "step": 7600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996844888393293e-05, + "loss": 0.1501, + "step": 7700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996803634318934e-05, + "loss": 0.1601, + "step": 7800 + }, + { + "epoch": 0.0, + "learning_rate": 2.999676238024458e-05, + "loss": 0.1459, + "step": 7900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996721126170224e-05, + "loss": 0.1671, + "step": 8000 + }, + { + "epoch": 0.0, + "learning_rate": 2.999667987209587e-05, + "loss": 0.1321, + "step": 8100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996638618021514e-05, + "loss": 0.1512, + "step": 8200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999659736394716e-05, + "loss": 0.1654, + "step": 8300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996556109872804e-05, + "loss": 0.1468, + "step": 8400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996514855798446e-05, + "loss": 0.154, + "step": 8500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996473601724094e-05, + "loss": 0.1684, + "step": 8600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996432347649735e-05, + "loss": 0.1538, + "step": 8700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996391506116123e-05, + "loss": 0.1543, + "step": 8800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996350252041768e-05, + "loss": 0.1543, + "step": 8900 + }, + { + "epoch": 0.0, + "learning_rate": 2.999630899796741e-05, + "loss": 0.1595, + "step": 9000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996267743893058e-05, + "loss": 0.1749, + "step": 9100 + }, + { + "epoch": 0.0, + "learning_rate": 2.99962264898187e-05, + "loss": 0.1828, + "step": 9200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996185235744345e-05, + "loss": 0.1355, + "step": 9300 + }, + { + "epoch": 0.0, + "learning_rate": 2.999614398166999e-05, + "loss": 0.1533, + "step": 9400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996102727595635e-05, + "loss": 0.1432, + "step": 9500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999606147352128e-05, + "loss": 0.1227, + "step": 9600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9996020219446924e-05, + "loss": 0.1507, + "step": 9700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999597896537257e-05, + "loss": 0.1324, + "step": 9800 + }, + { + "epoch": 0.0, + "learning_rate": 2.999593771129821e-05, + "loss": 0.1549, + "step": 9900 + }, + { + "epoch": 0.0, + "learning_rate": 2.999589645722386e-05, + "loss": 0.1554, + "step": 10000 + }, + { + "epoch": 0.0, + "learning_rate": 2.99958552031495e-05, + "loss": 0.1591, + "step": 10100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995813949075146e-05, + "loss": 0.1546, + "step": 10200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999577269500079e-05, + "loss": 0.162, + "step": 10300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995731440926436e-05, + "loss": 0.1678, + "step": 10400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995690186852077e-05, + "loss": 0.1584, + "step": 10500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995648932777726e-05, + "loss": 0.164, + "step": 10600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995607678703367e-05, + "loss": 0.1569, + "step": 10700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995566424629012e-05, + "loss": 0.1531, + "step": 10800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995525170554657e-05, + "loss": 0.1566, + "step": 10900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995483916480302e-05, + "loss": 0.1423, + "step": 11000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995442662405944e-05, + "loss": 0.1405, + "step": 11100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995401408331592e-05, + "loss": 0.1471, + "step": 11200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995360154257234e-05, + "loss": 0.1347, + "step": 11300 + }, + { + "epoch": 0.0, + "learning_rate": 2.999531890018288e-05, + "loss": 0.1674, + "step": 11400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995277646108524e-05, + "loss": 0.1459, + "step": 11500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999523639203417e-05, + "loss": 0.1151, + "step": 11600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995195137959814e-05, + "loss": 0.1601, + "step": 11700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999515388388546e-05, + "loss": 0.1225, + "step": 11800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995112629811104e-05, + "loss": 0.1402, + "step": 11900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995071375736745e-05, + "loss": 0.1446, + "step": 12000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9995030121662394e-05, + "loss": 0.1492, + "step": 12100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994988867588035e-05, + "loss": 0.1349, + "step": 12200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999494761351368e-05, + "loss": 0.1444, + "step": 12300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994906359439325e-05, + "loss": 0.1433, + "step": 12400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999486510536497e-05, + "loss": 0.1442, + "step": 12500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999482385129061e-05, + "loss": 0.1139, + "step": 12600 + }, + { + "epoch": 0.0, + "learning_rate": 2.999478259721626e-05, + "loss": 0.1315, + "step": 12700 + }, + { + "epoch": 0.0, + "learning_rate": 2.99947413431419e-05, + "loss": 0.1569, + "step": 12800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994700089067547e-05, + "loss": 0.1222, + "step": 12900 + }, + { + "epoch": 0.0, + "learning_rate": 2.999465883499319e-05, + "loss": 0.1266, + "step": 13000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994617580918837e-05, + "loss": 0.1427, + "step": 13100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994576326844478e-05, + "loss": 0.1287, + "step": 13200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994535072770126e-05, + "loss": 0.1353, + "step": 13300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994493818695768e-05, + "loss": 0.1429, + "step": 13400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994452564621413e-05, + "loss": 0.1268, + "step": 13500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994411310547058e-05, + "loss": 0.1187, + "step": 13600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994370056472703e-05, + "loss": 0.1497, + "step": 13700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994328802398348e-05, + "loss": 0.1325, + "step": 13800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994288373405478e-05, + "loss": 0.1375, + "step": 13900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994247119331123e-05, + "loss": 0.1542, + "step": 14000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994205865256768e-05, + "loss": 0.147, + "step": 14100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994164611182413e-05, + "loss": 0.1269, + "step": 14200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994123357108055e-05, + "loss": 0.1141, + "step": 14300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994082103033703e-05, + "loss": 0.1304, + "step": 14400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9994040848959348e-05, + "loss": 0.1029, + "step": 14500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999399959488499e-05, + "loss": 0.0914, + "step": 14600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993958340810638e-05, + "loss": 0.1389, + "step": 14700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999391708673628e-05, + "loss": 0.1376, + "step": 14800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993876245202667e-05, + "loss": 0.1233, + "step": 14900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993834991128312e-05, + "loss": 0.1309, + "step": 15000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993793737053954e-05, + "loss": 0.133, + "step": 15100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993752482979602e-05, + "loss": 0.1391, + "step": 15200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993711228905244e-05, + "loss": 0.1182, + "step": 15300 + }, + { + "epoch": 0.0, + "learning_rate": 2.999366997483089e-05, + "loss": 0.1414, + "step": 15400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993628720756534e-05, + "loss": 0.1214, + "step": 15500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999358746668218e-05, + "loss": 0.0809, + "step": 15600 + }, + { + "epoch": 0.0, + "learning_rate": 2.999354621260782e-05, + "loss": 0.1307, + "step": 15700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999350495853347e-05, + "loss": 0.0915, + "step": 15800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993463704459114e-05, + "loss": 0.1278, + "step": 15900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993422450384755e-05, + "loss": 0.1266, + "step": 16000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993381196310404e-05, + "loss": 0.1135, + "step": 16100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993339942236045e-05, + "loss": 0.1251, + "step": 16200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999329868816169e-05, + "loss": 0.1367, + "step": 16300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993257434087335e-05, + "loss": 0.1205, + "step": 16400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999321618001298e-05, + "loss": 0.1462, + "step": 16500 + }, + { + "epoch": 0.0, + "learning_rate": 2.999317492593862e-05, + "loss": 0.1299, + "step": 16600 + }, + { + "epoch": 0.0, + "learning_rate": 2.999313367186427e-05, + "loss": 0.1049, + "step": 16700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999309241778991e-05, + "loss": 0.1164, + "step": 16800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9993051163715557e-05, + "loss": 0.1475, + "step": 16900 + }, + { + "epoch": 0.0, + "learning_rate": 2.99930099096412e-05, + "loss": 0.1131, + "step": 17000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992968655566846e-05, + "loss": 0.1297, + "step": 17100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992927401492488e-05, + "loss": 0.1154, + "step": 17200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992886147418136e-05, + "loss": 0.1271, + "step": 17300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992844893343778e-05, + "loss": 0.1306, + "step": 17400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992803639269423e-05, + "loss": 0.1157, + "step": 17500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992762385195068e-05, + "loss": 0.1525, + "step": 17600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992721131120713e-05, + "loss": 0.1188, + "step": 17700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992679877046358e-05, + "loss": 0.1322, + "step": 17800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992638622972003e-05, + "loss": 0.1322, + "step": 17900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992597368897648e-05, + "loss": 0.1288, + "step": 18000 + }, + { + "epoch": 0.0, + "learning_rate": 2.999255611482329e-05, + "loss": 0.123, + "step": 18100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992514860748938e-05, + "loss": 0.1447, + "step": 18200 + }, + { + "epoch": 0.0, + "learning_rate": 2.999247360667458e-05, + "loss": 0.1312, + "step": 18300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992432352600224e-05, + "loss": 0.1187, + "step": 18400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999239109852587e-05, + "loss": 0.1371, + "step": 18500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992349844451514e-05, + "loss": 0.1273, + "step": 18600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992308590377156e-05, + "loss": 0.1524, + "step": 18700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992267336302804e-05, + "loss": 0.1354, + "step": 18800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992226082228446e-05, + "loss": 0.1204, + "step": 18900 + }, + { + "epoch": 0.0, + "learning_rate": 2.999218482815409e-05, + "loss": 0.1483, + "step": 19000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992143574079736e-05, + "loss": 0.1246, + "step": 19100 + }, + { + "epoch": 0.0, + "learning_rate": 2.999210232000538e-05, + "loss": 0.094, + "step": 19200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9992061065931022e-05, + "loss": 0.1177, + "step": 19300 + }, + { + "epoch": 0.0, + "learning_rate": 2.999201981185667e-05, + "loss": 0.1363, + "step": 19400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991978557782312e-05, + "loss": 0.1276, + "step": 19500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991937303707957e-05, + "loss": 0.1299, + "step": 19600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991896049633602e-05, + "loss": 0.1389, + "step": 19700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991854795559247e-05, + "loss": 0.1224, + "step": 19800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991813541484892e-05, + "loss": 0.1317, + "step": 19900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991772287410537e-05, + "loss": 0.1223, + "step": 20000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991731033336182e-05, + "loss": 0.1234, + "step": 20100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991689779261824e-05, + "loss": 0.1001, + "step": 20200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991648525187472e-05, + "loss": 0.1282, + "step": 20300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991607271113114e-05, + "loss": 0.0976, + "step": 20400 + }, + { + "epoch": 0.0, + "learning_rate": 2.999156601703876e-05, + "loss": 0.1214, + "step": 20500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991524762964403e-05, + "loss": 0.1281, + "step": 20600 + }, + { + "epoch": 0.0, + "learning_rate": 2.999148350889005e-05, + "loss": 0.1027, + "step": 20700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999144225481569e-05, + "loss": 0.0946, + "step": 20800 + }, + { + "epoch": 0.0, + "learning_rate": 2.999140100074134e-05, + "loss": 0.1501, + "step": 20900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991360159207726e-05, + "loss": 0.1294, + "step": 21000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991318905133368e-05, + "loss": 0.1237, + "step": 21100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991277651059013e-05, + "loss": 0.1613, + "step": 21200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991236396984658e-05, + "loss": 0.134, + "step": 21300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991195142910303e-05, + "loss": 0.1156, + "step": 21400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991153888835948e-05, + "loss": 0.111, + "step": 21500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991112634761592e-05, + "loss": 0.1627, + "step": 21600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9991071380687237e-05, + "loss": 0.1362, + "step": 21700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999103012661288e-05, + "loss": 0.1193, + "step": 21800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990988872538527e-05, + "loss": 0.133, + "step": 21900 + }, + { + "epoch": 0.0, + "learning_rate": 2.999094761846417e-05, + "loss": 0.1091, + "step": 22000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990906364389814e-05, + "loss": 0.1259, + "step": 22100 + }, + { + "epoch": 0.0, + "learning_rate": 2.999086511031546e-05, + "loss": 0.1234, + "step": 22200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990823856241104e-05, + "loss": 0.115, + "step": 22300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990782602166745e-05, + "loss": 0.1402, + "step": 22400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990741348092394e-05, + "loss": 0.1286, + "step": 22500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990700094018035e-05, + "loss": 0.1092, + "step": 22600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990659252484423e-05, + "loss": 0.1191, + "step": 22700 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990617998410068e-05, + "loss": 0.1197, + "step": 22800 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990576744335716e-05, + "loss": 0.1361, + "step": 22900 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990535490261358e-05, + "loss": 0.1234, + "step": 23000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990494236187003e-05, + "loss": 0.1068, + "step": 23100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990452982112648e-05, + "loss": 0.1248, + "step": 23200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990411728038293e-05, + "loss": 0.1148, + "step": 23300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990370473963934e-05, + "loss": 0.1355, + "step": 23400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990329219889583e-05, + "loss": 0.1161, + "step": 23500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990287965815224e-05, + "loss": 0.12, + "step": 23600 + }, + { + "epoch": 0.0, + "learning_rate": 2.999024671174087e-05, + "loss": 0.1173, + "step": 23700 + }, + { + "epoch": 0.0, + "learning_rate": 2.999020545766651e-05, + "loss": 0.1193, + "step": 23800 + }, + { + "epoch": 0.0, + "learning_rate": 2.999016420359216e-05, + "loss": 0.1455, + "step": 23900 + }, + { + "epoch": 0.0, + "learning_rate": 2.99901229495178e-05, + "loss": 0.1296, + "step": 24000 + }, + { + "epoch": 0.0, + "learning_rate": 2.9990081695443446e-05, + "loss": 0.1402, + "step": 24100 + }, + { + "epoch": 0.0, + "learning_rate": 2.999004044136909e-05, + "loss": 0.1232, + "step": 24200 + }, + { + "epoch": 0.0, + "learning_rate": 2.9989999187294736e-05, + "loss": 0.1263, + "step": 24300 + }, + { + "epoch": 0.0, + "learning_rate": 2.9989957933220377e-05, + "loss": 0.1339, + "step": 24400 + }, + { + "epoch": 0.0, + "learning_rate": 2.9989916679146026e-05, + "loss": 0.1242, + "step": 24500 + }, + { + "epoch": 0.0, + "learning_rate": 2.9989875425071667e-05, + "loss": 0.1557, + "step": 24600 + }, + { + "epoch": 0.0, + "learning_rate": 2.9989834170997312e-05, + "loss": 0.128, + "step": 24700 + }, + { + "epoch": 0.0, + "eval_accuracy": 0.9412505310427588, + "eval_f1": 0.9412461155109177, + "eval_loss": 0.15371489524841309, + "eval_matthews_correlation": 0.8826317470341026, + "eval_precision": 0.9413817526260211, + "eval_recall": 0.9412500042409688, + "eval_runtime": 1573.539, + "eval_samples_per_second": 2628.686, + "eval_steps_per_second": 2628.686, + "step": 24769 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989792916922957e-05, + "loss": 0.1622, + "step": 24800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989751662848602e-05, + "loss": 0.1287, + "step": 24900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989710408774247e-05, + "loss": 0.1407, + "step": 25000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989669154699892e-05, + "loss": 0.1415, + "step": 25100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989627900625537e-05, + "loss": 0.1269, + "step": 25200 + }, + { + "epoch": 1.0, + "learning_rate": 2.998958664655118e-05, + "loss": 0.1266, + "step": 25300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989545392476827e-05, + "loss": 0.1323, + "step": 25400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998950413840247e-05, + "loss": 0.1448, + "step": 25500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989462884328114e-05, + "loss": 0.1476, + "step": 25600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998942163025376e-05, + "loss": 0.1401, + "step": 25700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989380376179404e-05, + "loss": 0.1688, + "step": 25800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989339122105045e-05, + "loss": 0.1509, + "step": 25900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989297868030694e-05, + "loss": 0.1828, + "step": 26000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989256613956335e-05, + "loss": 0.1663, + "step": 26100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998921535988198e-05, + "loss": 0.1476, + "step": 26200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9989174105807625e-05, + "loss": 0.1601, + "step": 26300 + }, + { + "epoch": 1.0, + "learning_rate": 2.998913285173327e-05, + "loss": 0.1552, + "step": 26400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998909159765891e-05, + "loss": 0.1573, + "step": 26500 + }, + { + "epoch": 1.0, + "learning_rate": 2.998905034358456e-05, + "loss": 0.1707, + "step": 26600 + }, + { + "epoch": 1.0, + "learning_rate": 2.99890090895102e-05, + "loss": 0.1389, + "step": 26700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988967835435847e-05, + "loss": 0.1472, + "step": 26800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988926581361495e-05, + "loss": 0.1456, + "step": 26900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988885327287137e-05, + "loss": 0.1366, + "step": 27000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998884407321278e-05, + "loss": 0.1623, + "step": 27100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988802819138426e-05, + "loss": 0.1525, + "step": 27200 + }, + { + "epoch": 1.0, + "learning_rate": 2.998876156506407e-05, + "loss": 0.1426, + "step": 27300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988720310989713e-05, + "loss": 0.1444, + "step": 27400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998867905691536e-05, + "loss": 0.152, + "step": 27500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988637802841003e-05, + "loss": 0.1441, + "step": 27600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988596548766648e-05, + "loss": 0.1566, + "step": 27700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988555294692293e-05, + "loss": 0.1052, + "step": 27800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988514040617938e-05, + "loss": 0.1509, + "step": 27900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998847278654358e-05, + "loss": 0.1519, + "step": 28000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988431532469228e-05, + "loss": 0.1331, + "step": 28100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998839027839487e-05, + "loss": 0.1391, + "step": 28200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988349024320514e-05, + "loss": 0.1295, + "step": 28300 + }, + { + "epoch": 1.0, + "learning_rate": 2.998830777024616e-05, + "loss": 0.151, + "step": 28400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988266516171804e-05, + "loss": 0.1396, + "step": 28500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988225262097446e-05, + "loss": 0.1138, + "step": 28600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988184008023094e-05, + "loss": 0.138, + "step": 28700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988143166489482e-05, + "loss": 0.118, + "step": 28800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988101912415127e-05, + "loss": 0.1409, + "step": 28900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998806065834077e-05, + "loss": 0.1454, + "step": 29000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988019404266417e-05, + "loss": 0.1435, + "step": 29100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987978562732804e-05, + "loss": 0.1201, + "step": 29200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987937308658446e-05, + "loss": 0.1533, + "step": 29300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987896467124837e-05, + "loss": 0.1382, + "step": 29400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998785521305048e-05, + "loss": 0.1454, + "step": 29500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987813958976127e-05, + "loss": 0.1453, + "step": 29600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998777270490177e-05, + "loss": 0.1374, + "step": 29700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987731450827414e-05, + "loss": 0.1434, + "step": 29800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998769019675306e-05, + "loss": 0.1465, + "step": 29900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987648942678703e-05, + "loss": 0.1518, + "step": 30000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987607688604345e-05, + "loss": 0.1218, + "step": 30100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987566434529993e-05, + "loss": 0.1333, + "step": 30200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987525180455635e-05, + "loss": 0.1646, + "step": 30300 + }, + { + "epoch": 1.0, + "learning_rate": 2.998748392638128e-05, + "loss": 0.1376, + "step": 30400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998744267230692e-05, + "loss": 0.1376, + "step": 30500 + }, + { + "epoch": 1.0, + "learning_rate": 2.998740141823257e-05, + "loss": 0.1368, + "step": 30600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998736016415821e-05, + "loss": 0.1459, + "step": 30700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987318910083856e-05, + "loss": 0.1297, + "step": 30800 + }, + { + "epoch": 1.0, + "learning_rate": 2.99872776560095e-05, + "loss": 0.1576, + "step": 30900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987236401935146e-05, + "loss": 0.1458, + "step": 31000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998719514786079e-05, + "loss": 0.1278, + "step": 31100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987153893786436e-05, + "loss": 0.1475, + "step": 31200 + }, + { + "epoch": 1.0, + "learning_rate": 2.998711263971208e-05, + "loss": 0.1608, + "step": 31300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9987071385637723e-05, + "loss": 0.1405, + "step": 31400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998703013156337e-05, + "loss": 0.1395, + "step": 31500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986988877489013e-05, + "loss": 0.1495, + "step": 31600 + }, + { + "epoch": 1.0, + "learning_rate": 2.99869480359554e-05, + "loss": 0.1523, + "step": 31700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986906781881045e-05, + "loss": 0.1298, + "step": 31800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998686552780669e-05, + "loss": 0.1558, + "step": 31900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986824273732335e-05, + "loss": 0.1224, + "step": 32000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986783019657977e-05, + "loss": 0.1515, + "step": 32100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986741765583625e-05, + "loss": 0.1261, + "step": 32200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986700511509267e-05, + "loss": 0.1513, + "step": 32300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986659257434912e-05, + "loss": 0.1606, + "step": 32400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998661800336056e-05, + "loss": 0.142, + "step": 32500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986576749286202e-05, + "loss": 0.1448, + "step": 32600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986535495211847e-05, + "loss": 0.1492, + "step": 32700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986494241137492e-05, + "loss": 0.1591, + "step": 32800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986452987063137e-05, + "loss": 0.1365, + "step": 32900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998641173298878e-05, + "loss": 0.1585, + "step": 33000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986370478914427e-05, + "loss": 0.1526, + "step": 33100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986329224840068e-05, + "loss": 0.1301, + "step": 33200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986287970765713e-05, + "loss": 0.1698, + "step": 33300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986246716691358e-05, + "loss": 0.1513, + "step": 33400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986205462617003e-05, + "loss": 0.1554, + "step": 33500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986164208542645e-05, + "loss": 0.1619, + "step": 33600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986122954468293e-05, + "loss": 0.1456, + "step": 33700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9986081700393935e-05, + "loss": 0.1724, + "step": 33800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998604044631958e-05, + "loss": 0.1806, + "step": 33900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985999192245225e-05, + "loss": 0.1503, + "step": 34000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998595793817087e-05, + "loss": 0.1341, + "step": 34100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998591668409651e-05, + "loss": 0.1599, + "step": 34200 + }, + { + "epoch": 1.0, + "learning_rate": 2.998587543002216e-05, + "loss": 0.1327, + "step": 34300 + }, + { + "epoch": 1.0, + "learning_rate": 2.99858341759478e-05, + "loss": 0.1172, + "step": 34400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985792921873446e-05, + "loss": 0.1544, + "step": 34500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985751667799095e-05, + "loss": 0.146, + "step": 34600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985710413724736e-05, + "loss": 0.1323, + "step": 34700 + }, + { + "epoch": 1.0, + "learning_rate": 2.998566915965038e-05, + "loss": 0.1682, + "step": 34800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985627905576026e-05, + "loss": 0.1505, + "step": 34900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998558665150167e-05, + "loss": 0.1501, + "step": 35000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985545397427313e-05, + "loss": 0.1841, + "step": 35100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998550414335296e-05, + "loss": 0.1416, + "step": 35200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985462889278603e-05, + "loss": 0.1621, + "step": 35300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985421635204247e-05, + "loss": 0.1578, + "step": 35400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985380381129892e-05, + "loss": 0.1456, + "step": 35500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985339127055537e-05, + "loss": 0.1641, + "step": 35600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998529787298118e-05, + "loss": 0.146, + "step": 35700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985256618906827e-05, + "loss": 0.1264, + "step": 35800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998521536483247e-05, + "loss": 0.149, + "step": 35900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985174110758114e-05, + "loss": 0.1421, + "step": 36000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998513285668376e-05, + "loss": 0.1442, + "step": 36100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985091602609404e-05, + "loss": 0.1563, + "step": 36200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985050348535045e-05, + "loss": 0.1368, + "step": 36300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9985009094460694e-05, + "loss": 0.1202, + "step": 36400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984967840386335e-05, + "loss": 0.156, + "step": 36500 + }, + { + "epoch": 1.0, + "learning_rate": 2.998492658631198e-05, + "loss": 0.1316, + "step": 36600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998488533223763e-05, + "loss": 0.1266, + "step": 36700 + }, + { + "epoch": 1.0, + "learning_rate": 2.998484407816327e-05, + "loss": 0.1362, + "step": 36800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984802824088915e-05, + "loss": 0.1532, + "step": 36900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998476157001456e-05, + "loss": 0.1364, + "step": 37000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984720315940205e-05, + "loss": 0.1402, + "step": 37100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984679061865847e-05, + "loss": 0.1469, + "step": 37200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984637807791495e-05, + "loss": 0.1281, + "step": 37300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984596966257883e-05, + "loss": 0.1166, + "step": 37400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984555712183524e-05, + "loss": 0.14, + "step": 37500 + }, + { + "epoch": 1.0, + "learning_rate": 2.998451445810917e-05, + "loss": 0.1445, + "step": 37600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984473204034814e-05, + "loss": 0.1244, + "step": 37700 + }, + { + "epoch": 1.0, + "learning_rate": 2.998443194996046e-05, + "loss": 0.1236, + "step": 37800 + }, + { + "epoch": 1.0, + "learning_rate": 2.99843906958861e-05, + "loss": 0.1377, + "step": 37900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998434944181175e-05, + "loss": 0.1247, + "step": 38000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984308187737394e-05, + "loss": 0.1328, + "step": 38100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984266933663036e-05, + "loss": 0.1409, + "step": 38200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984225679588684e-05, + "loss": 0.1145, + "step": 38300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984184425514326e-05, + "loss": 0.1356, + "step": 38400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998414317143997e-05, + "loss": 0.1317, + "step": 38500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984101917365616e-05, + "loss": 0.1297, + "step": 38600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984061075832e-05, + "loss": 0.1321, + "step": 38700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9984019821757648e-05, + "loss": 0.1552, + "step": 38800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998397856768329e-05, + "loss": 0.1352, + "step": 38900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983937313608935e-05, + "loss": 0.1217, + "step": 39000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998389605953458e-05, + "loss": 0.1159, + "step": 39100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983854805460225e-05, + "loss": 0.1141, + "step": 39200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983813551385866e-05, + "loss": 0.0878, + "step": 39300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983772297311515e-05, + "loss": 0.1115, + "step": 39400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983731455777902e-05, + "loss": 0.1444, + "step": 39500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983690201703547e-05, + "loss": 0.1187, + "step": 39600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998364894762919e-05, + "loss": 0.1168, + "step": 39700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983607693554837e-05, + "loss": 0.1417, + "step": 39800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998356643948048e-05, + "loss": 0.1224, + "step": 39900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983525185406124e-05, + "loss": 0.1378, + "step": 40000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998348393133177e-05, + "loss": 0.1148, + "step": 40100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983442677257414e-05, + "loss": 0.1312, + "step": 40200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983401423183055e-05, + "loss": 0.1075, + "step": 40300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983360169108704e-05, + "loss": 0.0949, + "step": 40400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983318915034345e-05, + "loss": 0.1106, + "step": 40500 + }, + { + "epoch": 1.0, + "learning_rate": 2.998327766095999e-05, + "loss": 0.0915, + "step": 40600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983236406885635e-05, + "loss": 0.1321, + "step": 40700 + }, + { + "epoch": 1.0, + "learning_rate": 2.998319515281128e-05, + "loss": 0.1145, + "step": 40800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983153898736925e-05, + "loss": 0.111, + "step": 40900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998311264466257e-05, + "loss": 0.1445, + "step": 41000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983071390588215e-05, + "loss": 0.1068, + "step": 41100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9983030136513857e-05, + "loss": 0.1167, + "step": 41200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982988882439505e-05, + "loss": 0.1445, + "step": 41300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982947628365147e-05, + "loss": 0.1114, + "step": 41400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998290637429079e-05, + "loss": 0.104, + "step": 41500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982865120216437e-05, + "loss": 0.1261, + "step": 41600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998282386614208e-05, + "loss": 0.1371, + "step": 41700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982782612067723e-05, + "loss": 0.1097, + "step": 41800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998274135799337e-05, + "loss": 0.1258, + "step": 41900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982700103919013e-05, + "loss": 0.1119, + "step": 42000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982658849844658e-05, + "loss": 0.1315, + "step": 42100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982617595770303e-05, + "loss": 0.1242, + "step": 42200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982576341695948e-05, + "loss": 0.1184, + "step": 42300 + }, + { + "epoch": 1.0, + "learning_rate": 2.998253508762159e-05, + "loss": 0.1344, + "step": 42400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982493833547238e-05, + "loss": 0.1254, + "step": 42500 + }, + { + "epoch": 1.0, + "learning_rate": 2.998245257947288e-05, + "loss": 0.121, + "step": 42600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982411325398525e-05, + "loss": 0.1254, + "step": 42700 + }, + { + "epoch": 1.0, + "learning_rate": 2.998237007132417e-05, + "loss": 0.1308, + "step": 42800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982328817249814e-05, + "loss": 0.1099, + "step": 42900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998228756317546e-05, + "loss": 0.1433, + "step": 43000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982246309101104e-05, + "loss": 0.1263, + "step": 43100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998220505502675e-05, + "loss": 0.1217, + "step": 43200 + }, + { + "epoch": 1.0, + "learning_rate": 2.998216380095239e-05, + "loss": 0.1298, + "step": 43300 + }, + { + "epoch": 1.0, + "learning_rate": 2.998212254687804e-05, + "loss": 0.1359, + "step": 43400 + }, + { + "epoch": 1.0, + "learning_rate": 2.998208129280368e-05, + "loss": 0.1514, + "step": 43500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9982040038729326e-05, + "loss": 0.1165, + "step": 43600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998199878465497e-05, + "loss": 0.1191, + "step": 43700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981957530580616e-05, + "loss": 0.1484, + "step": 43800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981916276506257e-05, + "loss": 0.1221, + "step": 43900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981875022431906e-05, + "loss": 0.0714, + "step": 44000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981833768357547e-05, + "loss": 0.1398, + "step": 44100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981792514283192e-05, + "loss": 0.13, + "step": 44200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981751260208837e-05, + "loss": 0.1309, + "step": 44300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981710006134482e-05, + "loss": 0.1177, + "step": 44400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981668752060124e-05, + "loss": 0.1376, + "step": 44500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981627497985772e-05, + "loss": 0.1107, + "step": 44600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981586243911414e-05, + "loss": 0.1182, + "step": 44700 + }, + { + "epoch": 1.0, + "learning_rate": 2.998154498983706e-05, + "loss": 0.1288, + "step": 44800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981503735762704e-05, + "loss": 0.1198, + "step": 44900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998146248168835e-05, + "loss": 0.0918, + "step": 45000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981421227613994e-05, + "loss": 0.1233, + "step": 45100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998137997353964e-05, + "loss": 0.0905, + "step": 45200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981338719465284e-05, + "loss": 0.1315, + "step": 45300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981297465390925e-05, + "loss": 0.1153, + "step": 45400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981256211316574e-05, + "loss": 0.0926, + "step": 45500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981214957242215e-05, + "loss": 0.1079, + "step": 45600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998117370316786e-05, + "loss": 0.1597, + "step": 45700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981132449093505e-05, + "loss": 0.1067, + "step": 45800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981091607559893e-05, + "loss": 0.1353, + "step": 45900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9981050353485538e-05, + "loss": 0.1461, + "step": 46000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998100909941118e-05, + "loss": 0.123, + "step": 46100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980967845336828e-05, + "loss": 0.1224, + "step": 46200 + }, + { + "epoch": 1.0, + "learning_rate": 2.998092659126247e-05, + "loss": 0.124, + "step": 46300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980885337188114e-05, + "loss": 0.1518, + "step": 46400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980844083113763e-05, + "loss": 0.1229, + "step": 46500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980802829039404e-05, + "loss": 0.1259, + "step": 46600 + }, + { + "epoch": 1.0, + "learning_rate": 2.998076157496505e-05, + "loss": 0.112, + "step": 46700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980720320890694e-05, + "loss": 0.1137, + "step": 46800 + }, + { + "epoch": 1.0, + "learning_rate": 2.998067906681634e-05, + "loss": 0.1286, + "step": 46900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998063781274198e-05, + "loss": 0.1205, + "step": 47000 + }, + { + "epoch": 1.0, + "learning_rate": 2.998059655866763e-05, + "loss": 0.1052, + "step": 47100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998055530459327e-05, + "loss": 0.1469, + "step": 47200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980514050518916e-05, + "loss": 0.1166, + "step": 47300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980473208985303e-05, + "loss": 0.1041, + "step": 47400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980431954910945e-05, + "loss": 0.1268, + "step": 47500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980390700836593e-05, + "loss": 0.1193, + "step": 47600 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980349446762235e-05, + "loss": 0.1408, + "step": 47700 + }, + { + "epoch": 1.0, + "learning_rate": 2.998030819268788e-05, + "loss": 0.1067, + "step": 47800 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980266938613528e-05, + "loss": 0.1193, + "step": 47900 + }, + { + "epoch": 1.0, + "learning_rate": 2.998022568453917e-05, + "loss": 0.1211, + "step": 48000 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980184430464815e-05, + "loss": 0.1295, + "step": 48100 + }, + { + "epoch": 1.0, + "learning_rate": 2.998014317639046e-05, + "loss": 0.13, + "step": 48200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980101922316105e-05, + "loss": 0.1217, + "step": 48300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980060668241746e-05, + "loss": 0.1054, + "step": 48400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9980019414167394e-05, + "loss": 0.1127, + "step": 48500 + }, + { + "epoch": 1.0, + "learning_rate": 2.9979978160093036e-05, + "loss": 0.1308, + "step": 48600 + }, + { + "epoch": 1.0, + "learning_rate": 2.997993690601868e-05, + "loss": 0.1329, + "step": 48700 + }, + { + "epoch": 1.0, + "learning_rate": 2.9979895651944326e-05, + "loss": 0.14, + "step": 48800 + }, + { + "epoch": 1.0, + "learning_rate": 2.997985439786997e-05, + "loss": 0.1262, + "step": 48900 + }, + { + "epoch": 1.0, + "learning_rate": 2.9979813143795613e-05, + "loss": 0.1331, + "step": 49000 + }, + { + "epoch": 1.0, + "learning_rate": 2.997977188972126e-05, + "loss": 0.1177, + "step": 49100 + }, + { + "epoch": 1.0, + "learning_rate": 2.9979730635646902e-05, + "loss": 0.134, + "step": 49200 + }, + { + "epoch": 1.0, + "learning_rate": 2.9979689381572547e-05, + "loss": 0.1338, + "step": 49300 + }, + { + "epoch": 1.0, + "learning_rate": 2.9979648127498192e-05, + "loss": 0.1415, + "step": 49400 + }, + { + "epoch": 1.0, + "learning_rate": 2.9979606873423837e-05, + "loss": 0.1381, + "step": 49500 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.942031046023061, + "eval_f1": 0.9420274047220744, + "eval_loss": 0.15210944414138794, + "eval_matthews_correlation": 0.8841713498744154, + "eval_precision": 0.9421407920220504, + "eval_recall": 0.9420305647232377, + "eval_runtime": 1387.9505, + "eval_samples_per_second": 2980.178, + "eval_steps_per_second": 2980.178, + "step": 49538 + }, + { + "epoch": 2.0, + "learning_rate": 2.997956561934948e-05, + "loss": 0.1507, + "step": 49600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9979524365275127e-05, + "loss": 0.132, + "step": 49700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997948311120077e-05, + "loss": 0.1351, + "step": 49800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9979441857126414e-05, + "loss": 0.1282, + "step": 49900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9979400603052062e-05, + "loss": 0.1244, + "step": 50000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9979359348977704e-05, + "loss": 0.1386, + "step": 50100 + }, + { + "epoch": 2.0, + "learning_rate": 2.997931809490335e-05, + "loss": 0.1273, + "step": 50200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9979276840828994e-05, + "loss": 0.1344, + "step": 50300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997923558675464e-05, + "loss": 0.1375, + "step": 50400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997919433268028e-05, + "loss": 0.148, + "step": 50500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997915307860593e-05, + "loss": 0.1519, + "step": 50600 + }, + { + "epoch": 2.0, + "learning_rate": 2.997911182453157e-05, + "loss": 0.1551, + "step": 50700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9979070570457215e-05, + "loss": 0.1844, + "step": 50800 + }, + { + "epoch": 2.0, + "learning_rate": 2.997902931638286e-05, + "loss": 0.1535, + "step": 50900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978988062308505e-05, + "loss": 0.1542, + "step": 51000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978946808234147e-05, + "loss": 0.1406, + "step": 51100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978905554159795e-05, + "loss": 0.1647, + "step": 51200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978864300085437e-05, + "loss": 0.1429, + "step": 51300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978823046011082e-05, + "loss": 0.161, + "step": 51400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978781791936727e-05, + "loss": 0.1395, + "step": 51500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997874053786237e-05, + "loss": 0.1425, + "step": 51600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978699283788013e-05, + "loss": 0.1512, + "step": 51700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997865802971366e-05, + "loss": 0.1334, + "step": 51800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978616775639303e-05, + "loss": 0.1536, + "step": 51900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978575521564948e-05, + "loss": 0.1398, + "step": 52000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978534267490596e-05, + "loss": 0.1366, + "step": 52100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978493013416238e-05, + "loss": 0.1508, + "step": 52200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978451759341883e-05, + "loss": 0.1413, + "step": 52300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978410505267528e-05, + "loss": 0.1459, + "step": 52400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978369251193173e-05, + "loss": 0.1295, + "step": 52500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978327997118815e-05, + "loss": 0.1225, + "step": 52600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978286743044463e-05, + "loss": 0.1369, + "step": 52700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978245488970105e-05, + "loss": 0.1569, + "step": 52800 + }, + { + "epoch": 2.0, + "learning_rate": 2.997820423489575e-05, + "loss": 0.1257, + "step": 52900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978162980821394e-05, + "loss": 0.1295, + "step": 53000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997812172674704e-05, + "loss": 0.1445, + "step": 53100 + }, + { + "epoch": 2.0, + "learning_rate": 2.997808047267268e-05, + "loss": 0.1323, + "step": 53200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997803963113907e-05, + "loss": 0.1238, + "step": 53300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977998377064717e-05, + "loss": 0.1092, + "step": 53400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977957535531105e-05, + "loss": 0.1483, + "step": 53500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997791628145675e-05, + "loss": 0.1059, + "step": 53600 + }, + { + "epoch": 2.0, + "learning_rate": 2.997787502738239e-05, + "loss": 0.1509, + "step": 53700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977833773308036e-05, + "loss": 0.1375, + "step": 53800 + }, + { + "epoch": 2.0, + "learning_rate": 2.997779251923368e-05, + "loss": 0.1362, + "step": 53900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977751265159326e-05, + "loss": 0.1153, + "step": 54000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977710011084968e-05, + "loss": 0.1498, + "step": 54100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977668757010616e-05, + "loss": 0.1421, + "step": 54200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977627502936258e-05, + "loss": 0.1391, + "step": 54300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977586248861903e-05, + "loss": 0.1347, + "step": 54400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977544994787548e-05, + "loss": 0.1515, + "step": 54500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977503740713193e-05, + "loss": 0.1356, + "step": 54600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977462486638834e-05, + "loss": 0.1271, + "step": 54700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977421232564483e-05, + "loss": 0.162, + "step": 54800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977379978490127e-05, + "loss": 0.1029, + "step": 54900 + }, + { + "epoch": 2.0, + "learning_rate": 2.997733872441577e-05, + "loss": 0.1429, + "step": 55000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977297470341417e-05, + "loss": 0.1495, + "step": 55100 + }, + { + "epoch": 2.0, + "learning_rate": 2.997725621626706e-05, + "loss": 0.1389, + "step": 55200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977214962192704e-05, + "loss": 0.1377, + "step": 55300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997717370811835e-05, + "loss": 0.1273, + "step": 55400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977132454043994e-05, + "loss": 0.14, + "step": 55500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977091199969636e-05, + "loss": 0.1491, + "step": 55600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9977049945895284e-05, + "loss": 0.1362, + "step": 55700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997700910436167e-05, + "loss": 0.1254, + "step": 55800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976967850287313e-05, + "loss": 0.1389, + "step": 55900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976926596212958e-05, + "loss": 0.154, + "step": 56000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976885342138603e-05, + "loss": 0.138, + "step": 56100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976844088064248e-05, + "loss": 0.139, + "step": 56200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976802833989893e-05, + "loss": 0.1337, + "step": 56300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976761579915538e-05, + "loss": 0.1509, + "step": 56400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976720325841183e-05, + "loss": 0.1452, + "step": 56500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976679071766824e-05, + "loss": 0.1306, + "step": 56600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976637817692473e-05, + "loss": 0.1358, + "step": 56700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976596563618114e-05, + "loss": 0.1243, + "step": 56800 + }, + { + "epoch": 2.0, + "learning_rate": 2.997655530954376e-05, + "loss": 0.1451, + "step": 56900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976514055469404e-05, + "loss": 0.1322, + "step": 57000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997647280139505e-05, + "loss": 0.1396, + "step": 57100 + }, + { + "epoch": 2.0, + "learning_rate": 2.997643154732069e-05, + "loss": 0.1636, + "step": 57200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997639029324634e-05, + "loss": 0.1388, + "step": 57300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997634903917198e-05, + "loss": 0.14, + "step": 57400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976307785097626e-05, + "loss": 0.1556, + "step": 57500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997626653102327e-05, + "loss": 0.1406, + "step": 57600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976225276948916e-05, + "loss": 0.1241, + "step": 57700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976184022874557e-05, + "loss": 0.1656, + "step": 57800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976142768800206e-05, + "loss": 0.1459, + "step": 57900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976101514725847e-05, + "loss": 0.1462, + "step": 58000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976060260651492e-05, + "loss": 0.1437, + "step": 58100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9976019006577137e-05, + "loss": 0.1607, + "step": 58200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975978165043528e-05, + "loss": 0.146, + "step": 58300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997593691096917e-05, + "loss": 0.1542, + "step": 58400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975895656894815e-05, + "loss": 0.1431, + "step": 58500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997585440282046e-05, + "loss": 0.1782, + "step": 58600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975813148746105e-05, + "loss": 0.177, + "step": 58700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975771894671746e-05, + "loss": 0.1327, + "step": 58800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975730640597395e-05, + "loss": 0.1325, + "step": 58900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975689386523036e-05, + "loss": 0.1535, + "step": 59000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997564813244868e-05, + "loss": 0.122, + "step": 59100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975606878374326e-05, + "loss": 0.1304, + "step": 59200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997556562429997e-05, + "loss": 0.1326, + "step": 59300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975524370225613e-05, + "loss": 0.1542, + "step": 59400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997548311615126e-05, + "loss": 0.1349, + "step": 59500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975441862076903e-05, + "loss": 0.1556, + "step": 59600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975400608002548e-05, + "loss": 0.1555, + "step": 59700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975359353928196e-05, + "loss": 0.1384, + "step": 59800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975318099853838e-05, + "loss": 0.1839, + "step": 59900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975276845779483e-05, + "loss": 0.1432, + "step": 60000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975235591705128e-05, + "loss": 0.1479, + "step": 60100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975194337630773e-05, + "loss": 0.1689, + "step": 60200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975153083556414e-05, + "loss": 0.1339, + "step": 60300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975111829482063e-05, + "loss": 0.1528, + "step": 60400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9975070575407704e-05, + "loss": 0.1409, + "step": 60500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997502932133335e-05, + "loss": 0.1224, + "step": 60600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974988067258994e-05, + "loss": 0.1473, + "step": 60700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997494681318464e-05, + "loss": 0.1305, + "step": 60800 + }, + { + "epoch": 2.0, + "learning_rate": 2.997490555911028e-05, + "loss": 0.1506, + "step": 60900 + }, + { + "epoch": 2.0, + "learning_rate": 2.997486430503593e-05, + "loss": 0.1501, + "step": 61000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997482305096157e-05, + "loss": 0.1168, + "step": 61100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974781796887216e-05, + "loss": 0.1375, + "step": 61200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997474054281286e-05, + "loss": 0.1334, + "step": 61300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974699288738505e-05, + "loss": 0.1325, + "step": 61400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974658034664147e-05, + "loss": 0.125, + "step": 61500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974616780589795e-05, + "loss": 0.1437, + "step": 61600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974575526515437e-05, + "loss": 0.1316, + "step": 61700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974534272441082e-05, + "loss": 0.143, + "step": 61800 + }, + { + "epoch": 2.0, + "learning_rate": 2.997449301836673e-05, + "loss": 0.1341, + "step": 61900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974451764292372e-05, + "loss": 0.1405, + "step": 62000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974410510218017e-05, + "loss": 0.1164, + "step": 62100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974369256143662e-05, + "loss": 0.1148, + "step": 62200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974328002069307e-05, + "loss": 0.1521, + "step": 62300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997428674799495e-05, + "loss": 0.1292, + "step": 62400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974245493920597e-05, + "loss": 0.12, + "step": 62500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997420423984624e-05, + "loss": 0.1294, + "step": 62600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974162985771883e-05, + "loss": 0.1173, + "step": 62700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974121731697528e-05, + "loss": 0.1189, + "step": 62800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974080477623173e-05, + "loss": 0.1302, + "step": 62900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9974039223548815e-05, + "loss": 0.1299, + "step": 63000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973997969474463e-05, + "loss": 0.1051, + "step": 63100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973956715400105e-05, + "loss": 0.1283, + "step": 63200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997391546132575e-05, + "loss": 0.1244, + "step": 63300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973874207251395e-05, + "loss": 0.1226, + "step": 63400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973833365717782e-05, + "loss": 0.1194, + "step": 63500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973792111643427e-05, + "loss": 0.1556, + "step": 63600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973750857569072e-05, + "loss": 0.1274, + "step": 63700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973709603494714e-05, + "loss": 0.1127, + "step": 63800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973668349420362e-05, + "loss": 0.1167, + "step": 63900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973627095346004e-05, + "loss": 0.1089, + "step": 64000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997358584127165e-05, + "loss": 0.063, + "step": 64100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973544587197294e-05, + "loss": 0.1274, + "step": 64200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997350333312294e-05, + "loss": 0.1307, + "step": 64300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997346207904858e-05, + "loss": 0.1188, + "step": 64400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997342082497423e-05, + "loss": 0.1141, + "step": 64500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997337957089987e-05, + "loss": 0.1371, + "step": 64600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973338316825515e-05, + "loss": 0.1143, + "step": 64700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997329706275116e-05, + "loss": 0.1226, + "step": 64800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973255808676805e-05, + "loss": 0.1176, + "step": 64900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973214554602447e-05, + "loss": 0.1231, + "step": 65000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973173300528095e-05, + "loss": 0.0797, + "step": 65100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973132046453737e-05, + "loss": 0.1038, + "step": 65200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997309079237938e-05, + "loss": 0.0957, + "step": 65300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997304953830503e-05, + "loss": 0.098, + "step": 65400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997300828423067e-05, + "loss": 0.1269, + "step": 65500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997296744269706e-05, + "loss": 0.0959, + "step": 65600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972926188622704e-05, + "loss": 0.1248, + "step": 65700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972885347089092e-05, + "loss": 0.1319, + "step": 65800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972844093014737e-05, + "loss": 0.1025, + "step": 65900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972802838940382e-05, + "loss": 0.1175, + "step": 66000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972761584866027e-05, + "loss": 0.1335, + "step": 66100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972720330791672e-05, + "loss": 0.0931, + "step": 66200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972679076717317e-05, + "loss": 0.1022, + "step": 66300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997263782264296e-05, + "loss": 0.1277, + "step": 66400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972596568568607e-05, + "loss": 0.1197, + "step": 66500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972555314494248e-05, + "loss": 0.112, + "step": 66600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972514060419893e-05, + "loss": 0.1176, + "step": 66700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972472806345538e-05, + "loss": 0.1074, + "step": 66800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972431552271183e-05, + "loss": 0.1321, + "step": 66900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972390298196825e-05, + "loss": 0.1112, + "step": 67000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972349044122473e-05, + "loss": 0.1173, + "step": 67100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972307790048115e-05, + "loss": 0.1236, + "step": 67200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997226653597376e-05, + "loss": 0.1198, + "step": 67300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972225281899405e-05, + "loss": 0.1195, + "step": 67400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997218402782505e-05, + "loss": 0.1228, + "step": 67500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997214277375069e-05, + "loss": 0.1159, + "step": 67600 + }, + { + "epoch": 2.0, + "learning_rate": 2.997210151967634e-05, + "loss": 0.1124, + "step": 67700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997206026560198e-05, + "loss": 0.1325, + "step": 67800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9972019011527626e-05, + "loss": 0.1154, + "step": 67900 + }, + { + "epoch": 2.0, + "learning_rate": 2.997197775745327e-05, + "loss": 0.1182, + "step": 68000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997193691591966e-05, + "loss": 0.1123, + "step": 68100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971895661845304e-05, + "loss": 0.1388, + "step": 68200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997185440777095e-05, + "loss": 0.1451, + "step": 68300 + }, + { + "epoch": 2.0, + "learning_rate": 2.997181315369659e-05, + "loss": 0.109, + "step": 68400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997177189962224e-05, + "loss": 0.1114, + "step": 68500 + }, + { + "epoch": 2.0, + "learning_rate": 2.997173064554788e-05, + "loss": 0.1373, + "step": 68600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971689391473525e-05, + "loss": 0.114, + "step": 68700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997164813739917e-05, + "loss": 0.0818, + "step": 68800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971606883324815e-05, + "loss": 0.1278, + "step": 68900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971565629250457e-05, + "loss": 0.1206, + "step": 69000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971524375176105e-05, + "loss": 0.1209, + "step": 69100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971483121101747e-05, + "loss": 0.1178, + "step": 69200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997144186702739e-05, + "loss": 0.1235, + "step": 69300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971400612953037e-05, + "loss": 0.1157, + "step": 69400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997135935887868e-05, + "loss": 0.1075, + "step": 69500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971318104804327e-05, + "loss": 0.116, + "step": 69600 + }, + { + "epoch": 2.0, + "learning_rate": 2.997127685072997e-05, + "loss": 0.1181, + "step": 69700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971235596655616e-05, + "loss": 0.0814, + "step": 69800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971194342581258e-05, + "loss": 0.1161, + "step": 69900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971153088506906e-05, + "loss": 0.0829, + "step": 70000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971111834432548e-05, + "loss": 0.1345, + "step": 70100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971070580358193e-05, + "loss": 0.0964, + "step": 70200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9971029326283838e-05, + "loss": 0.0862, + "step": 70300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970988072209483e-05, + "loss": 0.1249, + "step": 70400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970946818135124e-05, + "loss": 0.1382, + "step": 70500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970905564060773e-05, + "loss": 0.098, + "step": 70600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970864309986414e-05, + "loss": 0.156, + "step": 70700 + }, + { + "epoch": 2.0, + "learning_rate": 2.997082305591206e-05, + "loss": 0.1253, + "step": 70800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970781801837704e-05, + "loss": 0.1206, + "step": 70900 + }, + { + "epoch": 2.0, + "learning_rate": 2.997074054776335e-05, + "loss": 0.0953, + "step": 71000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997069929368899e-05, + "loss": 0.1493, + "step": 71100 + }, + { + "epoch": 2.0, + "learning_rate": 2.997065803961464e-05, + "loss": 0.1247, + "step": 71200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997061678554028e-05, + "loss": 0.1099, + "step": 71300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970575531465926e-05, + "loss": 0.1281, + "step": 71400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997053427739157e-05, + "loss": 0.1005, + "step": 71500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970493023317216e-05, + "loss": 0.1172, + "step": 71600 + }, + { + "epoch": 2.0, + "learning_rate": 2.997045176924286e-05, + "loss": 0.1112, + "step": 71700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970410515168506e-05, + "loss": 0.1126, + "step": 71800 + }, + { + "epoch": 2.0, + "learning_rate": 2.997036926109415e-05, + "loss": 0.1204, + "step": 71900 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970328007019792e-05, + "loss": 0.1262, + "step": 72000 + }, + { + "epoch": 2.0, + "learning_rate": 2.997028675294544e-05, + "loss": 0.1157, + "step": 72100 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970245911411828e-05, + "loss": 0.1122, + "step": 72200 + }, + { + "epoch": 2.0, + "learning_rate": 2.997020465733747e-05, + "loss": 0.1112, + "step": 72300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970163403263115e-05, + "loss": 0.1196, + "step": 72400 + }, + { + "epoch": 2.0, + "learning_rate": 2.997012214918876e-05, + "loss": 0.1379, + "step": 72500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970080895114405e-05, + "loss": 0.1008, + "step": 72600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9970039641040046e-05, + "loss": 0.1172, + "step": 72700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969998386965695e-05, + "loss": 0.1098, + "step": 72800 + }, + { + "epoch": 2.0, + "learning_rate": 2.996995713289134e-05, + "loss": 0.1368, + "step": 72900 + }, + { + "epoch": 2.0, + "learning_rate": 2.996991587881698e-05, + "loss": 0.1233, + "step": 73000 + }, + { + "epoch": 2.0, + "learning_rate": 2.996987462474263e-05, + "loss": 0.1142, + "step": 73100 + }, + { + "epoch": 2.0, + "learning_rate": 2.996983337066827e-05, + "loss": 0.1051, + "step": 73200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969792116593916e-05, + "loss": 0.1079, + "step": 73300 + }, + { + "epoch": 2.0, + "learning_rate": 2.996975086251956e-05, + "loss": 0.1337, + "step": 73400 + }, + { + "epoch": 2.0, + "learning_rate": 2.996971002098595e-05, + "loss": 0.1179, + "step": 73500 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969668766911594e-05, + "loss": 0.1478, + "step": 73600 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969627512837235e-05, + "loss": 0.1154, + "step": 73700 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969586258762884e-05, + "loss": 0.1226, + "step": 73800 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969545004688525e-05, + "loss": 0.1274, + "step": 73900 + }, + { + "epoch": 2.0, + "learning_rate": 2.996950375061417e-05, + "loss": 0.1253, + "step": 74000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969462496539815e-05, + "loss": 0.1382, + "step": 74100 + }, + { + "epoch": 2.0, + "learning_rate": 2.996942124246546e-05, + "loss": 0.1317, + "step": 74200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9969379988391105e-05, + "loss": 0.1321, + "step": 74300 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9419618925892423, + "eval_f1": 0.9419576710015996, + "eval_loss": 0.1524653136730194, + "eval_matthews_correlation": 0.8840504346996007, + "eval_precision": 0.9420890695231088, + "eval_recall": 0.9419613743988416, + "eval_runtime": 1386.958, + "eval_samples_per_second": 2982.311, + "eval_steps_per_second": 2982.311, + "step": 74307 + }, + { + "epoch": 3.0, + "learning_rate": 2.996933873431675e-05, + "loss": 0.146, + "step": 74400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9969297480242395e-05, + "loss": 0.1157, + "step": 74500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9969256226168037e-05, + "loss": 0.1385, + "step": 74600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9969214972093685e-05, + "loss": 0.1157, + "step": 74700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9969173718019327e-05, + "loss": 0.1254, + "step": 74800 + }, + { + "epoch": 3.0, + "learning_rate": 2.996913246394497e-05, + "loss": 0.1329, + "step": 74900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9969091209870617e-05, + "loss": 0.1386, + "step": 75000 + }, + { + "epoch": 3.0, + "learning_rate": 2.996904995579626e-05, + "loss": 0.1264, + "step": 75100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9969008701721903e-05, + "loss": 0.1288, + "step": 75200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996896744764755e-05, + "loss": 0.147, + "step": 75300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968926193573193e-05, + "loss": 0.1562, + "step": 75400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968884939498838e-05, + "loss": 0.1632, + "step": 75500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968843685424483e-05, + "loss": 0.1574, + "step": 75600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968802431350128e-05, + "loss": 0.1518, + "step": 75700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996876117727577e-05, + "loss": 0.152, + "step": 75800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968719923201418e-05, + "loss": 0.1457, + "step": 75900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996867866912706e-05, + "loss": 0.1518, + "step": 76000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968637415052704e-05, + "loss": 0.148, + "step": 76100 + }, + { + "epoch": 3.0, + "learning_rate": 2.996859616097835e-05, + "loss": 0.1408, + "step": 76200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968554906903994e-05, + "loss": 0.1371, + "step": 76300 + }, + { + "epoch": 3.0, + "learning_rate": 2.996851365282964e-05, + "loss": 0.1374, + "step": 76400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968472398755284e-05, + "loss": 0.1405, + "step": 76500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996843114468093e-05, + "loss": 0.1403, + "step": 76600 + }, + { + "epoch": 3.0, + "learning_rate": 2.996838989060657e-05, + "loss": 0.1392, + "step": 76700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996834863653222e-05, + "loss": 0.1413, + "step": 76800 + }, + { + "epoch": 3.0, + "learning_rate": 2.996830738245786e-05, + "loss": 0.1371, + "step": 76900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968266128383506e-05, + "loss": 0.1412, + "step": 77000 + }, + { + "epoch": 3.0, + "learning_rate": 2.996822487430915e-05, + "loss": 0.1406, + "step": 77100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968183620234796e-05, + "loss": 0.1447, + "step": 77200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968142366160437e-05, + "loss": 0.1151, + "step": 77300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968101112086082e-05, + "loss": 0.1255, + "step": 77400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968059858011727e-05, + "loss": 0.1464, + "step": 77500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9968018603937372e-05, + "loss": 0.1391, + "step": 77600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967977349863014e-05, + "loss": 0.124, + "step": 77700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967936095788662e-05, + "loss": 0.118, + "step": 77800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967894841714304e-05, + "loss": 0.1472, + "step": 77900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996785358763995e-05, + "loss": 0.1268, + "step": 78000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967812333565594e-05, + "loss": 0.1126, + "step": 78100 + }, + { + "epoch": 3.0, + "learning_rate": 2.996777107949124e-05, + "loss": 0.1199, + "step": 78200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967730237957626e-05, + "loss": 0.1234, + "step": 78300 + }, + { + "epoch": 3.0, + "learning_rate": 2.996768898388327e-05, + "loss": 0.1256, + "step": 78400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967647729808916e-05, + "loss": 0.1378, + "step": 78500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996760647573456e-05, + "loss": 0.1361, + "step": 78600 + }, + { + "epoch": 3.0, + "learning_rate": 2.996756563420095e-05, + "loss": 0.1173, + "step": 78700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996752438012659e-05, + "loss": 0.1356, + "step": 78800 + }, + { + "epoch": 3.0, + "learning_rate": 2.996748312605224e-05, + "loss": 0.1194, + "step": 78900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996744187197788e-05, + "loss": 0.1528, + "step": 79000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967400617903525e-05, + "loss": 0.1384, + "step": 79100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967359363829174e-05, + "loss": 0.1279, + "step": 79200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967318109754815e-05, + "loss": 0.1436, + "step": 79300 + }, + { + "epoch": 3.0, + "learning_rate": 2.996727685568046e-05, + "loss": 0.1289, + "step": 79400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967235601606105e-05, + "loss": 0.1422, + "step": 79500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996719434753175e-05, + "loss": 0.1297, + "step": 79600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967153093457392e-05, + "loss": 0.1057, + "step": 79700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996711183938304e-05, + "loss": 0.1373, + "step": 79800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967070585308682e-05, + "loss": 0.1465, + "step": 79900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9967029331234327e-05, + "loss": 0.1409, + "step": 80000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966988077159972e-05, + "loss": 0.1224, + "step": 80100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966946823085617e-05, + "loss": 0.1252, + "step": 80200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966905569011258e-05, + "loss": 0.1413, + "step": 80300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966864314936907e-05, + "loss": 0.1481, + "step": 80400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966823060862548e-05, + "loss": 0.1335, + "step": 80500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966781806788193e-05, + "loss": 0.1177, + "step": 80600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966740552713838e-05, + "loss": 0.1428, + "step": 80700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966699298639483e-05, + "loss": 0.1483, + "step": 80800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966658044565125e-05, + "loss": 0.1266, + "step": 80900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966616790490773e-05, + "loss": 0.136, + "step": 81000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966575536416415e-05, + "loss": 0.1336, + "step": 81100 + }, + { + "epoch": 3.0, + "learning_rate": 2.996653428234206e-05, + "loss": 0.1477, + "step": 81200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966493440808447e-05, + "loss": 0.1408, + "step": 81300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966452186734096e-05, + "loss": 0.1346, + "step": 81400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966410932659737e-05, + "loss": 0.1142, + "step": 81500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966369678585382e-05, + "loss": 0.1426, + "step": 81600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966328424511027e-05, + "loss": 0.125, + "step": 81700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966287170436672e-05, + "loss": 0.1311, + "step": 81800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966245916362314e-05, + "loss": 0.1454, + "step": 81900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966204662287962e-05, + "loss": 0.1472, + "step": 82000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966163408213604e-05, + "loss": 0.1498, + "step": 82100 + }, + { + "epoch": 3.0, + "learning_rate": 2.996612215413925e-05, + "loss": 0.1362, + "step": 82200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9966080900064894e-05, + "loss": 0.1541, + "step": 82300 + }, + { + "epoch": 3.0, + "learning_rate": 2.996603964599054e-05, + "loss": 0.118, + "step": 82400 + }, + { + "epoch": 3.0, + "learning_rate": 2.996599839191618e-05, + "loss": 0.1398, + "step": 82500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996595713784183e-05, + "loss": 0.1556, + "step": 82600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965915883767473e-05, + "loss": 0.1388, + "step": 82700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965874629693115e-05, + "loss": 0.1466, + "step": 82800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965833375618763e-05, + "loss": 0.1534, + "step": 82900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965792121544405e-05, + "loss": 0.1421, + "step": 83000 + }, + { + "epoch": 3.0, + "learning_rate": 2.996575086747005e-05, + "loss": 0.1453, + "step": 83100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965709613395695e-05, + "loss": 0.1439, + "step": 83200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996566835932134e-05, + "loss": 0.1482, + "step": 83300 + }, + { + "epoch": 3.0, + "learning_rate": 2.996562710524698e-05, + "loss": 0.1651, + "step": 83400 + }, + { + "epoch": 3.0, + "learning_rate": 2.996558585117263e-05, + "loss": 0.174, + "step": 83500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996554459709827e-05, + "loss": 0.1261, + "step": 83600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965503343023916e-05, + "loss": 0.1385, + "step": 83700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996546208894956e-05, + "loss": 0.1411, + "step": 83800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965420834875206e-05, + "loss": 0.1117, + "step": 83900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965379580800848e-05, + "loss": 0.1382, + "step": 84000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965338326726493e-05, + "loss": 0.1223, + "step": 84100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965297072652138e-05, + "loss": 0.148, + "step": 84200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965255818577783e-05, + "loss": 0.1443, + "step": 84300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965214564503424e-05, + "loss": 0.153, + "step": 84400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965173310429073e-05, + "loss": 0.1436, + "step": 84500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965132056354714e-05, + "loss": 0.1508, + "step": 84600 + }, + { + "epoch": 3.0, + "learning_rate": 2.996509080228036e-05, + "loss": 0.1631, + "step": 84700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9965049548206008e-05, + "loss": 0.1436, + "step": 84800 + }, + { + "epoch": 3.0, + "learning_rate": 2.996500829413165e-05, + "loss": 0.1492, + "step": 84900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964967040057294e-05, + "loss": 0.1493, + "step": 85000 + }, + { + "epoch": 3.0, + "learning_rate": 2.996492578598294e-05, + "loss": 0.1386, + "step": 85100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964884531908584e-05, + "loss": 0.1495, + "step": 85200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964843277834226e-05, + "loss": 0.1346, + "step": 85300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964802023759874e-05, + "loss": 0.1273, + "step": 85400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964760769685516e-05, + "loss": 0.1391, + "step": 85500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996471951561116e-05, + "loss": 0.1252, + "step": 85600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964678261536806e-05, + "loss": 0.1555, + "step": 85700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996463700746245e-05, + "loss": 0.1366, + "step": 85800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964595753388092e-05, + "loss": 0.1052, + "step": 85900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996455449931374e-05, + "loss": 0.1499, + "step": 86000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964513245239382e-05, + "loss": 0.1135, + "step": 86100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964471991165027e-05, + "loss": 0.1344, + "step": 86200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964430737090672e-05, + "loss": 0.13, + "step": 86300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964389483016317e-05, + "loss": 0.1346, + "step": 86400 + }, + { + "epoch": 3.0, + "learning_rate": 2.996434822894196e-05, + "loss": 0.1274, + "step": 86500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964306974867607e-05, + "loss": 0.1374, + "step": 86600 + }, + { + "epoch": 3.0, + "learning_rate": 2.996426572079325e-05, + "loss": 0.1353, + "step": 86700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964224466718894e-05, + "loss": 0.1307, + "step": 86800 + }, + { + "epoch": 3.0, + "learning_rate": 2.996418362518528e-05, + "loss": 0.1063, + "step": 86900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996414237111093e-05, + "loss": 0.1176, + "step": 87000 + }, + { + "epoch": 3.0, + "learning_rate": 2.996410111703657e-05, + "loss": 0.1491, + "step": 87100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9964059862962216e-05, + "loss": 0.1189, + "step": 87200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996401860888786e-05, + "loss": 0.1142, + "step": 87300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963977354813506e-05, + "loss": 0.1296, + "step": 87400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963936100739148e-05, + "loss": 0.1116, + "step": 87500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963894846664796e-05, + "loss": 0.1184, + "step": 87600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963853592590438e-05, + "loss": 0.1217, + "step": 87700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963812338516083e-05, + "loss": 0.1114, + "step": 87800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963771084441728e-05, + "loss": 0.1036, + "step": 87900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963729830367373e-05, + "loss": 0.1306, + "step": 88000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963688576293014e-05, + "loss": 0.1135, + "step": 88100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963647734759405e-05, + "loss": 0.113, + "step": 88200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996360648068505e-05, + "loss": 0.1347, + "step": 88300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963565226610695e-05, + "loss": 0.1365, + "step": 88400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963523972536337e-05, + "loss": 0.1104, + "step": 88500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963482718461985e-05, + "loss": 0.1048, + "step": 88600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963441464387627e-05, + "loss": 0.1142, + "step": 88700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996340021031327e-05, + "loss": 0.0926, + "step": 88800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963358956238917e-05, + "loss": 0.0777, + "step": 88900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996331770216456e-05, + "loss": 0.1193, + "step": 89000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963276448090203e-05, + "loss": 0.1257, + "step": 89100 + }, + { + "epoch": 3.0, + "learning_rate": 2.996323519401585e-05, + "loss": 0.1059, + "step": 89200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963193939941493e-05, + "loss": 0.114, + "step": 89300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963152685867138e-05, + "loss": 0.116, + "step": 89400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963111431792783e-05, + "loss": 0.1223, + "step": 89500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963070177718428e-05, + "loss": 0.1025, + "step": 89600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9963028923644073e-05, + "loss": 0.1222, + "step": 89700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962987669569718e-05, + "loss": 0.1044, + "step": 89800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962946415495363e-05, + "loss": 0.0695, + "step": 89900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962905161421004e-05, + "loss": 0.1164, + "step": 90000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962863907346653e-05, + "loss": 0.0778, + "step": 90100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962822653272294e-05, + "loss": 0.1077, + "step": 90200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996278139919794e-05, + "loss": 0.1145, + "step": 90300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962740145123584e-05, + "loss": 0.0945, + "step": 90400 + }, + { + "epoch": 3.0, + "learning_rate": 2.996269889104923e-05, + "loss": 0.1119, + "step": 90500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996265763697487e-05, + "loss": 0.1222, + "step": 90600 + }, + { + "epoch": 3.0, + "learning_rate": 2.996261638290052e-05, + "loss": 0.1044, + "step": 90700 + }, + { + "epoch": 3.0, + "learning_rate": 2.996257512882616e-05, + "loss": 0.1253, + "step": 90800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962533874751806e-05, + "loss": 0.1137, + "step": 90900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996249262067745e-05, + "loss": 0.0822, + "step": 91000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962451366603096e-05, + "loss": 0.099, + "step": 91100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962410112528737e-05, + "loss": 0.1331, + "step": 91200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962368858454386e-05, + "loss": 0.097, + "step": 91300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962327604380027e-05, + "loss": 0.1154, + "step": 91400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962286350305672e-05, + "loss": 0.1002, + "step": 91500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962245096231317e-05, + "loss": 0.1111, + "step": 91600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962203842156962e-05, + "loss": 0.1211, + "step": 91700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962162588082607e-05, + "loss": 0.1011, + "step": 91800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962121334008252e-05, + "loss": 0.1321, + "step": 91900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9962080079933897e-05, + "loss": 0.103, + "step": 92000 + }, + { + "epoch": 3.0, + "learning_rate": 2.996203882585954e-05, + "loss": 0.1161, + "step": 92100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961997571785187e-05, + "loss": 0.1111, + "step": 92200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996195631771083e-05, + "loss": 0.1105, + "step": 92300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961915063636474e-05, + "loss": 0.1121, + "step": 92400 + }, + { + "epoch": 3.0, + "learning_rate": 2.996187380956212e-05, + "loss": 0.1265, + "step": 92500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961832968028503e-05, + "loss": 0.1109, + "step": 92600 + }, + { + "epoch": 3.0, + "learning_rate": 2.996179171395415e-05, + "loss": 0.1032, + "step": 92700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961750459879793e-05, + "loss": 0.1122, + "step": 92800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961709618346184e-05, + "loss": 0.1165, + "step": 92900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961668364271825e-05, + "loss": 0.1329, + "step": 93000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961627110197474e-05, + "loss": 0.1272, + "step": 93100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961585856123115e-05, + "loss": 0.1092, + "step": 93200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996154460204876e-05, + "loss": 0.1252, + "step": 93300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961503347974405e-05, + "loss": 0.1195, + "step": 93400 + }, + { + "epoch": 3.0, + "learning_rate": 2.996146209390005e-05, + "loss": 0.0835, + "step": 93500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961420839825692e-05, + "loss": 0.1005, + "step": 93600 + }, + { + "epoch": 3.0, + "learning_rate": 2.996137958575134e-05, + "loss": 0.1201, + "step": 93700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961338331676982e-05, + "loss": 0.1167, + "step": 93800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961297077602627e-05, + "loss": 0.1121, + "step": 93900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961255823528272e-05, + "loss": 0.1146, + "step": 94000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961214569453917e-05, + "loss": 0.1087, + "step": 94100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961173315379558e-05, + "loss": 0.1156, + "step": 94200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961132061305207e-05, + "loss": 0.1016, + "step": 94300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961090807230848e-05, + "loss": 0.1064, + "step": 94400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9961049553156493e-05, + "loss": 0.0906, + "step": 94500 + }, + { + "epoch": 3.0, + "learning_rate": 2.996100829908214e-05, + "loss": 0.0982, + "step": 94600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960967045007783e-05, + "loss": 0.0944, + "step": 94700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960925790933428e-05, + "loss": 0.0984, + "step": 94800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960884536859073e-05, + "loss": 0.1191, + "step": 94900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960843282784718e-05, + "loss": 0.0901, + "step": 95000 + }, + { + "epoch": 3.0, + "learning_rate": 2.996080202871036e-05, + "loss": 0.0836, + "step": 95100 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960760774636008e-05, + "loss": 0.1309, + "step": 95200 + }, + { + "epoch": 3.0, + "learning_rate": 2.996071952056165e-05, + "loss": 0.113, + "step": 95300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960678266487295e-05, + "loss": 0.1097, + "step": 95400 + }, + { + "epoch": 3.0, + "learning_rate": 2.996063701241294e-05, + "loss": 0.1448, + "step": 95500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960595758338584e-05, + "loss": 0.1221, + "step": 95600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960554504264226e-05, + "loss": 0.0998, + "step": 95700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960513250189874e-05, + "loss": 0.0946, + "step": 95800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960471996115516e-05, + "loss": 0.1486, + "step": 95900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996043074204116e-05, + "loss": 0.1168, + "step": 96000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960389487966806e-05, + "loss": 0.1024, + "step": 96100 + }, + { + "epoch": 3.0, + "learning_rate": 2.996034823389245e-05, + "loss": 0.1186, + "step": 96200 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960306979818092e-05, + "loss": 0.0986, + "step": 96300 + }, + { + "epoch": 3.0, + "learning_rate": 2.996026572574374e-05, + "loss": 0.1118, + "step": 96400 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960224471669382e-05, + "loss": 0.1072, + "step": 96500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960183217595027e-05, + "loss": 0.1039, + "step": 96600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960141963520676e-05, + "loss": 0.1214, + "step": 96700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960100709446317e-05, + "loss": 0.1225, + "step": 96800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9960059455371962e-05, + "loss": 0.1082, + "step": 96900 + }, + { + "epoch": 3.0, + "learning_rate": 2.996001861383835e-05, + "loss": 0.1089, + "step": 97000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959977359763995e-05, + "loss": 0.1109, + "step": 97100 + }, + { + "epoch": 3.0, + "learning_rate": 2.995993610568964e-05, + "loss": 0.1302, + "step": 97200 + }, + { + "epoch": 3.0, + "learning_rate": 2.995989485161528e-05, + "loss": 0.1174, + "step": 97300 + }, + { + "epoch": 3.0, + "learning_rate": 2.995985359754093e-05, + "loss": 0.1003, + "step": 97400 + }, + { + "epoch": 3.0, + "learning_rate": 2.995981234346657e-05, + "loss": 0.1141, + "step": 97500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959771089392216e-05, + "loss": 0.1078, + "step": 97600 + }, + { + "epoch": 3.0, + "learning_rate": 2.995972983531786e-05, + "loss": 0.1351, + "step": 97700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959688581243506e-05, + "loss": 0.1077, + "step": 97800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959647327169148e-05, + "loss": 0.1125, + "step": 97900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959606073094796e-05, + "loss": 0.1085, + "step": 98000 + }, + { + "epoch": 3.0, + "learning_rate": 2.995956481902044e-05, + "loss": 0.1107, + "step": 98100 + }, + { + "epoch": 3.0, + "learning_rate": 2.995952397748683e-05, + "loss": 0.1362, + "step": 98200 + }, + { + "epoch": 3.0, + "learning_rate": 2.995948272341247e-05, + "loss": 0.11, + "step": 98300 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959441469338115e-05, + "loss": 0.1443, + "step": 98400 + }, + { + "epoch": 3.0, + "learning_rate": 2.995940021526376e-05, + "loss": 0.1067, + "step": 98500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959358961189405e-05, + "loss": 0.1241, + "step": 98600 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959317707115047e-05, + "loss": 0.1221, + "step": 98700 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959276453040695e-05, + "loss": 0.1182, + "step": 98800 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959235198966337e-05, + "loss": 0.1437, + "step": 98900 + }, + { + "epoch": 3.0, + "learning_rate": 2.9959193944891982e-05, + "loss": 0.1188, + "step": 99000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9421570213343179, + "eval_f1": 0.9421534578908957, + "eval_loss": 0.15225644409656525, + "eval_matthews_correlation": 0.8844212104135638, + "eval_precision": 0.942264672299383, + "eval_recall": 0.9421565447239119, + "eval_runtime": 1388.3348, + "eval_samples_per_second": 2979.353, + "eval_steps_per_second": 2979.353, + "step": 99076 + } + ], + "max_steps": 72720140, + "num_train_epochs": 5, + "total_flos": 1.7037705236761805e+18, + "trial_name": null, + "trial_params": null +}