| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 68478, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.963491924413681e-05, | |
| "loss": 2.5929, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9269838488273604e-05, | |
| "loss": 2.2003, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.890475773241041e-05, | |
| "loss": 2.0692, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.853967697654722e-05, | |
| "loss": 1.9485, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.817459622068401e-05, | |
| "loss": 1.9084, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.780951546482082e-05, | |
| "loss": 1.8495, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.744443470895763e-05, | |
| "loss": 1.8025, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.707935395309443e-05, | |
| "loss": 1.7957, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.671427319723123e-05, | |
| "loss": 1.7414, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.634919244136804e-05, | |
| "loss": 1.7284, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.598411168550484e-05, | |
| "loss": 1.7033, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.561903092964164e-05, | |
| "loss": 1.705, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.525395017377844e-05, | |
| "loss": 1.667, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.4888869417915246e-05, | |
| "loss": 1.6606, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.452378866205205e-05, | |
| "loss": 1.6179, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.415870790618885e-05, | |
| "loss": 1.5977, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.3793627150325655e-05, | |
| "loss": 1.5827, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.3428546394462456e-05, | |
| "loss": 1.5621, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.306346563859926e-05, | |
| "loss": 1.5675, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.2698384882736064e-05, | |
| "loss": 1.5729, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.2333304126872865e-05, | |
| "loss": 1.546, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.1968223371009666e-05, | |
| "loss": 1.5546, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.160314261514647e-05, | |
| "loss": 1.5163, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.123806185928328e-05, | |
| "loss": 1.5097, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.0872981103420075e-05, | |
| "loss": 1.5074, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.050790034755688e-05, | |
| "loss": 1.4856, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.014281959169369e-05, | |
| "loss": 1.5068, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9777738835830483e-05, | |
| "loss": 1.5035, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.941265807996729e-05, | |
| "loss": 1.4676, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.90475773241041e-05, | |
| "loss": 1.4623, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.868249656824089e-05, | |
| "loss": 1.4472, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.83174158123777e-05, | |
| "loss": 1.447, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.795233505651451e-05, | |
| "loss": 1.4557, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.75872543006513e-05, | |
| "loss": 1.4439, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.722217354478811e-05, | |
| "loss": 1.4332, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.685709278892491e-05, | |
| "loss": 1.4365, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.649201203306172e-05, | |
| "loss": 1.4098, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.612693127719852e-05, | |
| "loss": 1.4125, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.576185052133532e-05, | |
| "loss": 1.4088, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5396769765472126e-05, | |
| "loss": 1.3968, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.503168900960893e-05, | |
| "loss": 1.4125, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.466660825374573e-05, | |
| "loss": 1.385, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.4301527497882535e-05, | |
| "loss": 1.3985, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.3936446742019336e-05, | |
| "loss": 1.3747, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.3571365986156136e-05, | |
| "loss": 1.3743, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.3206285230292944e-05, | |
| "loss": 1.3065, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.2841204474429745e-05, | |
| "loss": 1.1883, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.2476123718566545e-05, | |
| "loss": 1.2017, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.211104296270335e-05, | |
| "loss": 1.1919, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.1745962206840153e-05, | |
| "loss": 1.1873, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.1380881450976954e-05, | |
| "loss": 1.2045, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.101580069511376e-05, | |
| "loss": 1.1915, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.065071993925057e-05, | |
| "loss": 1.1932, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0285639183387367e-05, | |
| "loss": 1.1793, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.992055842752417e-05, | |
| "loss": 1.2086, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9555477671660975e-05, | |
| "loss": 1.2023, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9190396915797775e-05, | |
| "loss": 1.1953, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.882531615993458e-05, | |
| "loss": 1.2049, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8460235404071384e-05, | |
| "loss": 1.1931, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.8095154648208184e-05, | |
| "loss": 1.197, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.773007389234499e-05, | |
| "loss": 1.1704, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7364993136481793e-05, | |
| "loss": 1.1548, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.6999912380618593e-05, | |
| "loss": 1.187, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6634831624755397e-05, | |
| "loss": 1.1769, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.62697508688922e-05, | |
| "loss": 1.181, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5904670113029006e-05, | |
| "loss": 1.1564, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5539589357165806e-05, | |
| "loss": 1.1742, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.517450860130261e-05, | |
| "loss": 1.1651, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.480942784543941e-05, | |
| "loss": 1.1637, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4444347089576215e-05, | |
| "loss": 1.1531, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.407926633371302e-05, | |
| "loss": 1.1703, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.371418557784982e-05, | |
| "loss": 1.1482, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3349104821986624e-05, | |
| "loss": 1.1501, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.2984024066123428e-05, | |
| "loss": 1.1525, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.261894331026023e-05, | |
| "loss": 1.1576, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2253862554397033e-05, | |
| "loss": 1.1518, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1888781798533837e-05, | |
| "loss": 1.1594, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.152370104267064e-05, | |
| "loss": 1.1614, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1158620286807442e-05, | |
| "loss": 1.185, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0793539530944246e-05, | |
| "loss": 1.1399, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.042845877508105e-05, | |
| "loss": 1.1262, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.006337801921785e-05, | |
| "loss": 1.1458, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9698297263354655e-05, | |
| "loss": 1.1379, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.933321650749146e-05, | |
| "loss": 1.1437, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.896813575162826e-05, | |
| "loss": 1.1161, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8603054995765064e-05, | |
| "loss": 1.1205, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8237974239901868e-05, | |
| "loss": 1.1422, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.787289348403867e-05, | |
| "loss": 1.1265, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7507812728175473e-05, | |
| "loss": 1.0999, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7142731972312277e-05, | |
| "loss": 1.11, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.677765121644908e-05, | |
| "loss": 1.1267, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6412570460585882e-05, | |
| "loss": 1.0115, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6047489704722686e-05, | |
| "loss": 0.9535, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.568240894885949e-05, | |
| "loss": 0.9458, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.531732819299629e-05, | |
| "loss": 0.9517, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4952247437133093e-05, | |
| "loss": 0.9496, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4587166681269897e-05, | |
| "loss": 0.9649, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.42220859254067e-05, | |
| "loss": 0.934, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3857005169543506e-05, | |
| "loss": 0.9757, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3491924413680306e-05, | |
| "loss": 0.9349, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3126843657817109e-05, | |
| "loss": 0.9548, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2761762901953914e-05, | |
| "loss": 0.9426, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2396682146090715e-05, | |
| "loss": 0.9543, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.203160139022752e-05, | |
| "loss": 0.9438, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1666520634364322e-05, | |
| "loss": 0.963, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1301439878501124e-05, | |
| "loss": 0.9334, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0936359122637928e-05, | |
| "loss": 0.9526, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.057127836677473e-05, | |
| "loss": 0.9397, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0206197610911535e-05, | |
| "loss": 0.9569, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.841116855048337e-06, | |
| "loss": 0.94, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.47603609918514e-06, | |
| "loss": 0.9345, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.110955343321944e-06, | |
| "loss": 0.933, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.745874587458746e-06, | |
| "loss": 0.9289, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.380793831595548e-06, | |
| "loss": 0.9581, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.015713075732353e-06, | |
| "loss": 0.9203, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.650632319869155e-06, | |
| "loss": 0.9539, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.285551564005959e-06, | |
| "loss": 0.9482, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.920470808142762e-06, | |
| "loss": 0.9206, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.555390052279564e-06, | |
| "loss": 0.9186, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.190309296416367e-06, | |
| "loss": 0.9168, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.825228540553171e-06, | |
| "loss": 0.9151, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.460147784689974e-06, | |
| "loss": 0.924, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.095067028826777e-06, | |
| "loss": 0.916, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.729986272963579e-06, | |
| "loss": 0.9311, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.364905517100383e-06, | |
| "loss": 0.9175, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.999824761237186e-06, | |
| "loss": 0.9121, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.634744005373989e-06, | |
| "loss": 0.9226, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.269663249510792e-06, | |
| "loss": 0.9335, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.9045824936475953e-06, | |
| "loss": 0.9104, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.539501737784398e-06, | |
| "loss": 0.9197, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.1744209819212013e-06, | |
| "loss": 0.8976, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.8093402260580042e-06, | |
| "loss": 0.9401, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.444259470194807e-06, | |
| "loss": 0.9265, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.07917871433161e-06, | |
| "loss": 0.9255, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.140979584684132e-07, | |
| "loss": 0.9099, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.490172026052163e-07, | |
| "loss": 0.9033, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 68478, | |
| "total_flos": 9.218136679415808e+16, | |
| "train_loss": 1.233144689885432, | |
| "train_runtime": 17748.5921, | |
| "train_samples_per_second": 38.582, | |
| "train_steps_per_second": 3.858 | |
| } | |
| ], | |
| "max_steps": 68478, | |
| "num_train_epochs": 3, | |
| "total_flos": 9.218136679415808e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |