|
{ |
|
"best_metric": 0.49101120233535767, |
|
"best_model_checkpoint": "/home/jcanete/ft-data/all_results/xnli/bertin_roberta_base/epochs_3_bs_16_lr_1e-5/checkpoint-22000", |
|
"epoch": 3.0, |
|
"global_step": 73632, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.932366362451109e-06, |
|
"loss": 1.0444, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.864461103867885e-06, |
|
"loss": 0.8226, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.79655584528466e-06, |
|
"loss": 0.7505, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.728650586701435e-06, |
|
"loss": 0.7012, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.7076305220883534, |
|
"eval_loss": 0.715941309928894, |
|
"eval_runtime": 1.744, |
|
"eval_samples_per_second": 1427.79, |
|
"eval_steps_per_second": 89.452, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.660881138635376e-06, |
|
"loss": 0.6765, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.592975880052153e-06, |
|
"loss": 0.6657, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.525070621468926e-06, |
|
"loss": 0.6506, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.457165362885704e-06, |
|
"loss": 0.6259, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.7538152610441767, |
|
"eval_loss": 0.6095600724220276, |
|
"eval_runtime": 1.7428, |
|
"eval_samples_per_second": 1428.771, |
|
"eval_steps_per_second": 89.513, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.389260104302477e-06, |
|
"loss": 0.6367, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.321354845719254e-06, |
|
"loss": 0.6355, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.253449587136028e-06, |
|
"loss": 0.6289, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.185544328552804e-06, |
|
"loss": 0.6059, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.7526104417670683, |
|
"eval_loss": 0.6048593521118164, |
|
"eval_runtime": 1.7395, |
|
"eval_samples_per_second": 1431.463, |
|
"eval_steps_per_second": 89.682, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.117639069969579e-06, |
|
"loss": 0.608, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.049869621903521e-06, |
|
"loss": 0.6132, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.981964363320296e-06, |
|
"loss": 0.6056, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.914059104737072e-06, |
|
"loss": 0.5869, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.765863453815261, |
|
"eval_loss": 0.5669301152229309, |
|
"eval_runtime": 1.7452, |
|
"eval_samples_per_second": 1426.778, |
|
"eval_steps_per_second": 89.388, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.846153846153847e-06, |
|
"loss": 0.5859, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.778384398087787e-06, |
|
"loss": 0.5757, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.710479139504565e-06, |
|
"loss": 0.5837, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.642573880921338e-06, |
|
"loss": 0.5783, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.7746987951807229, |
|
"eval_loss": 0.5589046478271484, |
|
"eval_runtime": 1.7403, |
|
"eval_samples_per_second": 1430.765, |
|
"eval_steps_per_second": 89.638, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.574668622338115e-06, |
|
"loss": 0.5792, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.506763363754889e-06, |
|
"loss": 0.5693, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.438858105171666e-06, |
|
"loss": 0.5649, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.371088657105606e-06, |
|
"loss": 0.5557, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.7819277108433735, |
|
"eval_loss": 0.5610175132751465, |
|
"eval_runtime": 1.7411, |
|
"eval_samples_per_second": 1430.157, |
|
"eval_steps_per_second": 89.6, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.303183398522382e-06, |
|
"loss": 0.5548, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.235278139939157e-06, |
|
"loss": 0.5766, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.167372881355933e-06, |
|
"loss": 0.5551, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.099467622772708e-06, |
|
"loss": 0.5614, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.7819277108433735, |
|
"eval_loss": 0.5594050884246826, |
|
"eval_runtime": 1.7412, |
|
"eval_samples_per_second": 1430.056, |
|
"eval_steps_per_second": 89.594, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.031562364189484e-06, |
|
"loss": 0.5486, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.963657105606259e-06, |
|
"loss": 0.5678, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.895751847023034e-06, |
|
"loss": 0.5458, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.827982398956976e-06, |
|
"loss": 0.5492, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.7927710843373494, |
|
"eval_loss": 0.5257371068000793, |
|
"eval_runtime": 1.7251, |
|
"eval_samples_per_second": 1443.436, |
|
"eval_steps_per_second": 90.432, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.76007714037375e-06, |
|
"loss": 0.5466, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.692171881790527e-06, |
|
"loss": 0.5364, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.624266623207302e-06, |
|
"loss": 0.5526, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.556497175141243e-06, |
|
"loss": 0.526, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.7730923694779116, |
|
"eval_loss": 0.5546149611473083, |
|
"eval_runtime": 1.7312, |
|
"eval_samples_per_second": 1438.268, |
|
"eval_steps_per_second": 90.108, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.488591916558019e-06, |
|
"loss": 0.5278, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.420686657974794e-06, |
|
"loss": 0.5469, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.35278139939157e-06, |
|
"loss": 0.5406, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.2848761408083445e-06, |
|
"loss": 0.5241, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.7975903614457831, |
|
"eval_loss": 0.5097489953041077, |
|
"eval_runtime": 1.7046, |
|
"eval_samples_per_second": 1460.741, |
|
"eval_steps_per_second": 91.516, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.216970882225121e-06, |
|
"loss": 0.5384, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.149065623641895e-06, |
|
"loss": 0.5353, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.08116036505867e-06, |
|
"loss": 0.5194, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.013255106475446e-06, |
|
"loss": 0.5358, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.8064257028112449, |
|
"eval_loss": 0.49101120233535767, |
|
"eval_runtime": 1.7316, |
|
"eval_samples_per_second": 1437.957, |
|
"eval_steps_per_second": 90.089, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.945485658409388e-06, |
|
"loss": 0.525, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.877580399826163e-06, |
|
"loss": 0.5274, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.809675141242938e-06, |
|
"loss": 0.5208, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.7417698826597135e-06, |
|
"loss": 0.5327, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.7899598393574297, |
|
"eval_loss": 0.5255107283592224, |
|
"eval_runtime": 1.7081, |
|
"eval_samples_per_second": 1457.767, |
|
"eval_steps_per_second": 91.33, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.674000434593656e-06, |
|
"loss": 0.5227, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.606095176010431e-06, |
|
"loss": 0.3802, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.538189917427205e-06, |
|
"loss": 0.3898, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.470420469361148e-06, |
|
"loss": 0.3598, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.797991967871486, |
|
"eval_loss": 0.5192009210586548, |
|
"eval_runtime": 1.7342, |
|
"eval_samples_per_second": 1435.797, |
|
"eval_steps_per_second": 89.954, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.4025152107779235e-06, |
|
"loss": 0.3638, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.334609952194698e-06, |
|
"loss": 0.3636, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.2667046936114736e-06, |
|
"loss": 0.3698, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.198799435028249e-06, |
|
"loss": 0.3635, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.8076305220883534, |
|
"eval_loss": 0.5339061617851257, |
|
"eval_runtime": 1.707, |
|
"eval_samples_per_second": 1458.725, |
|
"eval_steps_per_second": 91.39, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.13102998696219e-06, |
|
"loss": 0.3472, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.063124728378966e-06, |
|
"loss": 0.3599, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5.995219469795741e-06, |
|
"loss": 0.3521, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.927314211212517e-06, |
|
"loss": 0.3633, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.8040160642570281, |
|
"eval_loss": 0.5154483914375305, |
|
"eval_runtime": 1.7227, |
|
"eval_samples_per_second": 1445.384, |
|
"eval_steps_per_second": 90.554, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.859408952629292e-06, |
|
"loss": 0.3621, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.791503694046068e-06, |
|
"loss": 0.3604, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.723734245980009e-06, |
|
"loss": 0.355, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.6558289873967845e-06, |
|
"loss": 0.3586, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.7975903614457831, |
|
"eval_loss": 0.5583391785621643, |
|
"eval_runtime": 1.7322, |
|
"eval_samples_per_second": 1437.489, |
|
"eval_steps_per_second": 90.06, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.58792372881356e-06, |
|
"loss": 0.3527, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.520018470230335e-06, |
|
"loss": 0.3644, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.452249022164276e-06, |
|
"loss": 0.3769, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.384343763581053e-06, |
|
"loss": 0.3614, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.8056224899598393, |
|
"eval_loss": 0.5339176058769226, |
|
"eval_runtime": 1.706, |
|
"eval_samples_per_second": 1459.585, |
|
"eval_steps_per_second": 91.444, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.316438504997827e-06, |
|
"loss": 0.3577, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.2485332464146035e-06, |
|
"loss": 0.3593, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.1807637983485445e-06, |
|
"loss": 0.3475, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.11285853976532e-06, |
|
"loss": 0.3631, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.8076305220883534, |
|
"eval_loss": 0.5045333504676819, |
|
"eval_runtime": 1.7273, |
|
"eval_samples_per_second": 1441.576, |
|
"eval_steps_per_second": 90.316, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.044953281182095e-06, |
|
"loss": 0.3523, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.977048022598871e-06, |
|
"loss": 0.3507, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.909142764015646e-06, |
|
"loss": 0.3602, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.841373315949587e-06, |
|
"loss": 0.3542, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.5346028804779053, |
|
"eval_runtime": 1.7369, |
|
"eval_samples_per_second": 1433.604, |
|
"eval_steps_per_second": 89.816, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.773468057366363e-06, |
|
"loss": 0.3527, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.705562798783138e-06, |
|
"loss": 0.3753, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.6376575401999135e-06, |
|
"loss": 0.3655, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.569752281616689e-06, |
|
"loss": 0.3564, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.8008032128514057, |
|
"eval_loss": 0.5383270382881165, |
|
"eval_runtime": 1.7313, |
|
"eval_samples_per_second": 1438.187, |
|
"eval_steps_per_second": 90.103, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.501982833550631e-06, |
|
"loss": 0.3574, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.434077574967406e-06, |
|
"loss": 0.3566, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.366172316384181e-06, |
|
"loss": 0.3629, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.298267057800956e-06, |
|
"loss": 0.3508, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.7987951807228916, |
|
"eval_loss": 0.5298879742622375, |
|
"eval_runtime": 1.7287, |
|
"eval_samples_per_second": 1440.391, |
|
"eval_steps_per_second": 90.241, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.230497609734898e-06, |
|
"loss": 0.3587, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.162592351151674e-06, |
|
"loss": 0.3622, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.094687092568449e-06, |
|
"loss": 0.3599, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.0267818339852245e-06, |
|
"loss": 0.354, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.7927710843373494, |
|
"eval_loss": 0.5385558605194092, |
|
"eval_runtime": 1.733, |
|
"eval_samples_per_second": 1436.852, |
|
"eval_steps_per_second": 90.02, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.958876575401999e-06, |
|
"loss": 0.3661, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.891107127335941e-06, |
|
"loss": 0.3614, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.823201868752716e-06, |
|
"loss": 0.3561, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.755296610169492e-06, |
|
"loss": 0.3622, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.8100401606425702, |
|
"eval_loss": 0.5162017941474915, |
|
"eval_runtime": 1.7327, |
|
"eval_samples_per_second": 1437.039, |
|
"eval_steps_per_second": 90.031, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.6873913515862668e-06, |
|
"loss": 0.3472, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.619621903520209e-06, |
|
"loss": 0.3381, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.5517166449369845e-06, |
|
"loss": 0.3443, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.483811386353759e-06, |
|
"loss": 0.3559, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.8060240963855422, |
|
"eval_loss": 0.513914942741394, |
|
"eval_runtime": 1.7383, |
|
"eval_samples_per_second": 1432.408, |
|
"eval_steps_per_second": 89.741, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.4159061277705345e-06, |
|
"loss": 0.3518, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.34800086918731e-06, |
|
"loss": 0.3554, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.2803672316384183e-06, |
|
"loss": 0.2133, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2124619730551937e-06, |
|
"loss": 0.1896, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.7947791164658634, |
|
"eval_loss": 0.7987900972366333, |
|
"eval_runtime": 1.7334, |
|
"eval_samples_per_second": 1436.451, |
|
"eval_steps_per_second": 89.995, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.144556714471969e-06, |
|
"loss": 0.1934, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.0766514558887446e-06, |
|
"loss": 0.1986, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.008882007822686e-06, |
|
"loss": 0.1973, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.9409767492394614e-06, |
|
"loss": 0.1867, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.7987951807228916, |
|
"eval_loss": 0.8654219508171082, |
|
"eval_runtime": 1.7101, |
|
"eval_samples_per_second": 1456.025, |
|
"eval_steps_per_second": 91.221, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.873071490656237e-06, |
|
"loss": 0.2036, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.8051662320730123e-06, |
|
"loss": 0.1956, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.737260973489787e-06, |
|
"loss": 0.1934, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.669491525423729e-06, |
|
"loss": 0.1945, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.804417670682731, |
|
"eval_loss": 0.8309808373451233, |
|
"eval_runtime": 1.7106, |
|
"eval_samples_per_second": 1455.613, |
|
"eval_steps_per_second": 91.195, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.6015862668405046e-06, |
|
"loss": 0.1885, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.53368100825728e-06, |
|
"loss": 0.1949, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.465775749674055e-06, |
|
"loss": 0.2041, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.39787049109083e-06, |
|
"loss": 0.2005, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.7899598393574297, |
|
"eval_loss": 0.9045226573944092, |
|
"eval_runtime": 1.7241, |
|
"eval_samples_per_second": 1444.229, |
|
"eval_steps_per_second": 90.482, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3299652325076055e-06, |
|
"loss": 0.1869, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.262059973924381e-06, |
|
"loss": 0.1704, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.1942905258583228e-06, |
|
"loss": 0.197, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1263852672750978e-06, |
|
"loss": 0.1879, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.8036144578313253, |
|
"eval_loss": 0.8717219233512878, |
|
"eval_runtime": 1.7314, |
|
"eval_samples_per_second": 1438.131, |
|
"eval_steps_per_second": 90.1, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.058480008691873e-06, |
|
"loss": 0.1924, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.9905747501086486e-06, |
|
"loss": 0.2094, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9226694915254236e-06, |
|
"loss": 0.1904, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8549000434593655e-06, |
|
"loss": 0.2007, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.7995983935742972, |
|
"eval_loss": 0.8504888415336609, |
|
"eval_runtime": 1.7179, |
|
"eval_samples_per_second": 1449.471, |
|
"eval_steps_per_second": 90.81, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.786994784876141e-06, |
|
"loss": 0.1967, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7190895262929164e-06, |
|
"loss": 0.1954, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6511842677096914e-06, |
|
"loss": 0.1831, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5832790091264668e-06, |
|
"loss": 0.1861, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.7983935742971887, |
|
"eval_loss": 0.8534940481185913, |
|
"eval_runtime": 1.7626, |
|
"eval_samples_per_second": 1412.7, |
|
"eval_steps_per_second": 88.507, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.5153737505432422e-06, |
|
"loss": 0.1869, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4476043024771841e-06, |
|
"loss": 0.1767, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3796990438939591e-06, |
|
"loss": 0.1902, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.3117937853107346e-06, |
|
"loss": 0.1836, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8850274085998535, |
|
"eval_runtime": 1.7307, |
|
"eval_samples_per_second": 1438.73, |
|
"eval_steps_per_second": 90.137, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.2438885267275098e-06, |
|
"loss": 0.1765, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.1761190786614516e-06, |
|
"loss": 0.1877, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.1082138200782269e-06, |
|
"loss": 0.189, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.0403085614950023e-06, |
|
"loss": 0.2009, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8615464568138123, |
|
"eval_runtime": 1.7274, |
|
"eval_samples_per_second": 1441.493, |
|
"eval_steps_per_second": 90.31, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.724033029117775e-07, |
|
"loss": 0.1853, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.044980443285528e-07, |
|
"loss": 0.1793, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.365927857453282e-07, |
|
"loss": 0.1938, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.686875271621035e-07, |
|
"loss": 0.1809, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.7991967871485943, |
|
"eval_loss": 0.9009087085723877, |
|
"eval_runtime": 1.7248, |
|
"eval_samples_per_second": 1443.619, |
|
"eval_steps_per_second": 90.444, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.009180790960452e-07, |
|
"loss": 0.1958, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.330128205128206e-07, |
|
"loss": 0.1866, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.651075619295959e-07, |
|
"loss": 0.189, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.972023033463712e-07, |
|
"loss": 0.1885, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.8064257028112449, |
|
"eval_loss": 0.89076167345047, |
|
"eval_runtime": 1.7155, |
|
"eval_samples_per_second": 1451.454, |
|
"eval_steps_per_second": 90.934, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.292970447631465e-07, |
|
"loss": 0.1845, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.613917861799218e-07, |
|
"loss": 0.1844, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.936223381138636e-07, |
|
"loss": 0.1919, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.2571707953063886e-07, |
|
"loss": 0.1941, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.8036144578313253, |
|
"eval_loss": 0.8937243819236755, |
|
"eval_runtime": 1.7336, |
|
"eval_samples_per_second": 1436.277, |
|
"eval_steps_per_second": 89.984, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.5781182094741418e-07, |
|
"loss": 0.1873, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.990656236418949e-08, |
|
"loss": 0.1923, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.2001303780964798e-08, |
|
"loss": 0.1831, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 73632, |
|
"total_flos": 5.651008562499955e+16, |
|
"train_loss": 0.38008860063159117, |
|
"train_runtime": 3735.6095, |
|
"train_samples_per_second": 315.372, |
|
"train_steps_per_second": 19.711 |
|
} |
|
], |
|
"max_steps": 73632, |
|
"num_train_epochs": 3, |
|
"total_flos": 5.651008562499955e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|