misrtal-lucio-7b-v0.1 / trainer_state.json
shadyy's picture
LUCIO_1000
70b9bee
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.487562189054726,
"eval_steps": 25,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 2.43993993993994e-05,
"loss": 0.8106,
"step": 25
},
{
"epoch": 0.06,
"eval_loss": 0.2529616057872772,
"eval_runtime": 9.2377,
"eval_samples_per_second": 9.634,
"eval_steps_per_second": 1.299,
"step": 25
},
{
"epoch": 0.12,
"learning_rate": 2.3773773773773775e-05,
"loss": 0.1936,
"step": 50
},
{
"epoch": 0.12,
"eval_loss": 0.1928737312555313,
"eval_runtime": 9.3374,
"eval_samples_per_second": 9.532,
"eval_steps_per_second": 1.285,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 2.314814814814815e-05,
"loss": 0.1423,
"step": 75
},
{
"epoch": 0.19,
"eval_loss": 0.18249212205410004,
"eval_runtime": 9.3738,
"eval_samples_per_second": 9.495,
"eval_steps_per_second": 1.28,
"step": 75
},
{
"epoch": 0.25,
"learning_rate": 2.2522522522522523e-05,
"loss": 0.1499,
"step": 100
},
{
"epoch": 0.25,
"eval_loss": 0.17995011806488037,
"eval_runtime": 9.4057,
"eval_samples_per_second": 9.462,
"eval_steps_per_second": 1.276,
"step": 100
},
{
"epoch": 0.31,
"learning_rate": 2.18968968968969e-05,
"loss": 0.1177,
"step": 125
},
{
"epoch": 0.31,
"eval_loss": 0.174322709441185,
"eval_runtime": 9.4044,
"eval_samples_per_second": 9.464,
"eval_steps_per_second": 1.276,
"step": 125
},
{
"epoch": 0.37,
"learning_rate": 2.1271271271271275e-05,
"loss": 0.128,
"step": 150
},
{
"epoch": 0.37,
"eval_loss": 0.1779523640871048,
"eval_runtime": 9.4208,
"eval_samples_per_second": 9.447,
"eval_steps_per_second": 1.274,
"step": 150
},
{
"epoch": 0.44,
"learning_rate": 2.0645645645645647e-05,
"loss": 0.1023,
"step": 175
},
{
"epoch": 0.44,
"eval_loss": 0.1758423000574112,
"eval_runtime": 9.3522,
"eval_samples_per_second": 9.517,
"eval_steps_per_second": 1.283,
"step": 175
},
{
"epoch": 0.5,
"learning_rate": 2.0020020020020023e-05,
"loss": 0.1145,
"step": 200
},
{
"epoch": 0.5,
"eval_loss": 0.17488867044448853,
"eval_runtime": 9.397,
"eval_samples_per_second": 9.471,
"eval_steps_per_second": 1.277,
"step": 200
},
{
"epoch": 0.56,
"learning_rate": 1.9394394394394395e-05,
"loss": 0.0945,
"step": 225
},
{
"epoch": 0.56,
"eval_loss": 0.1771743893623352,
"eval_runtime": 9.2878,
"eval_samples_per_second": 9.583,
"eval_steps_per_second": 1.292,
"step": 225
},
{
"epoch": 0.62,
"learning_rate": 1.8768768768768768e-05,
"loss": 0.0813,
"step": 250
},
{
"epoch": 0.62,
"eval_loss": 0.21932578086853027,
"eval_runtime": 9.3376,
"eval_samples_per_second": 9.531,
"eval_steps_per_second": 1.285,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 1.8143143143143144e-05,
"loss": 0.0898,
"step": 275
},
{
"epoch": 0.68,
"eval_loss": 0.17662294209003448,
"eval_runtime": 9.3638,
"eval_samples_per_second": 9.505,
"eval_steps_per_second": 1.282,
"step": 275
},
{
"epoch": 0.75,
"learning_rate": 1.7517517517517516e-05,
"loss": 0.0846,
"step": 300
},
{
"epoch": 0.75,
"eval_loss": 0.1968599408864975,
"eval_runtime": 9.3136,
"eval_samples_per_second": 9.556,
"eval_steps_per_second": 1.288,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 1.6891891891891892e-05,
"loss": 0.0743,
"step": 325
},
{
"epoch": 0.81,
"eval_loss": 0.19715578854084015,
"eval_runtime": 9.4533,
"eval_samples_per_second": 9.415,
"eval_steps_per_second": 1.269,
"step": 325
},
{
"epoch": 0.87,
"learning_rate": 1.6266266266266268e-05,
"loss": 0.0763,
"step": 350
},
{
"epoch": 0.87,
"eval_loss": 0.20438912510871887,
"eval_runtime": 9.3395,
"eval_samples_per_second": 9.529,
"eval_steps_per_second": 1.285,
"step": 350
},
{
"epoch": 0.93,
"learning_rate": 1.564064064064064e-05,
"loss": 0.0742,
"step": 375
},
{
"epoch": 0.93,
"eval_loss": 0.19478727877140045,
"eval_runtime": 9.3251,
"eval_samples_per_second": 9.544,
"eval_steps_per_second": 1.287,
"step": 375
},
{
"epoch": 1.0,
"learning_rate": 1.5015015015015016e-05,
"loss": 0.0642,
"step": 400
},
{
"epoch": 1.0,
"eval_loss": 0.21925467252731323,
"eval_runtime": 9.3222,
"eval_samples_per_second": 9.547,
"eval_steps_per_second": 1.287,
"step": 400
},
{
"epoch": 1.06,
"learning_rate": 1.438938938938939e-05,
"loss": 0.0595,
"step": 425
},
{
"epoch": 1.06,
"eval_loss": 0.2161593735218048,
"eval_runtime": 9.317,
"eval_samples_per_second": 9.552,
"eval_steps_per_second": 1.288,
"step": 425
},
{
"epoch": 1.12,
"learning_rate": 1.3763763763763765e-05,
"loss": 0.0549,
"step": 450
},
{
"epoch": 1.12,
"eval_loss": 0.216683030128479,
"eval_runtime": 9.247,
"eval_samples_per_second": 9.625,
"eval_steps_per_second": 1.298,
"step": 450
},
{
"epoch": 1.18,
"learning_rate": 1.3138138138138139e-05,
"loss": 0.051,
"step": 475
},
{
"epoch": 1.18,
"eval_loss": 0.2373329997062683,
"eval_runtime": 8.9608,
"eval_samples_per_second": 9.932,
"eval_steps_per_second": 1.339,
"step": 475
},
{
"epoch": 1.24,
"learning_rate": 1.2512512512512515e-05,
"loss": 0.0552,
"step": 500
},
{
"epoch": 1.24,
"eval_loss": 0.22366206347942352,
"eval_runtime": 9.3515,
"eval_samples_per_second": 9.517,
"eval_steps_per_second": 1.283,
"step": 500
},
{
"epoch": 1.31,
"learning_rate": 1.1886886886886887e-05,
"loss": 0.0551,
"step": 525
},
{
"epoch": 1.31,
"eval_loss": 0.20705640316009521,
"eval_runtime": 9.3684,
"eval_samples_per_second": 9.5,
"eval_steps_per_second": 1.281,
"step": 525
},
{
"epoch": 1.37,
"learning_rate": 1.1261261261261261e-05,
"loss": 0.0558,
"step": 550
},
{
"epoch": 1.37,
"eval_loss": 0.22326058149337769,
"eval_runtime": 9.355,
"eval_samples_per_second": 9.514,
"eval_steps_per_second": 1.283,
"step": 550
},
{
"epoch": 1.43,
"learning_rate": 1.0635635635635637e-05,
"loss": 0.0576,
"step": 575
},
{
"epoch": 1.43,
"eval_loss": 0.21783779561519623,
"eval_runtime": 9.1966,
"eval_samples_per_second": 9.677,
"eval_steps_per_second": 1.305,
"step": 575
},
{
"epoch": 1.49,
"learning_rate": 1.0010010010010011e-05,
"loss": 0.0534,
"step": 600
},
{
"epoch": 1.49,
"eval_loss": 0.2279473841190338,
"eval_runtime": 9.1641,
"eval_samples_per_second": 9.712,
"eval_steps_per_second": 1.309,
"step": 600
},
{
"epoch": 1.55,
"learning_rate": 9.384384384384384e-06,
"loss": 0.0531,
"step": 625
},
{
"epoch": 1.55,
"eval_loss": 0.230553537607193,
"eval_runtime": 9.2393,
"eval_samples_per_second": 9.633,
"eval_steps_per_second": 1.299,
"step": 625
},
{
"epoch": 1.62,
"learning_rate": 8.758758758758758e-06,
"loss": 0.0507,
"step": 650
},
{
"epoch": 1.62,
"eval_loss": 0.2528133690357208,
"eval_runtime": 9.2368,
"eval_samples_per_second": 9.635,
"eval_steps_per_second": 1.299,
"step": 650
},
{
"epoch": 1.68,
"learning_rate": 8.133133133133134e-06,
"loss": 0.0496,
"step": 675
},
{
"epoch": 1.68,
"eval_loss": 0.27711576223373413,
"eval_runtime": 9.2578,
"eval_samples_per_second": 9.614,
"eval_steps_per_second": 1.296,
"step": 675
},
{
"epoch": 1.74,
"learning_rate": 7.507507507507508e-06,
"loss": 0.0525,
"step": 700
},
{
"epoch": 1.74,
"eval_loss": 0.24864882230758667,
"eval_runtime": 9.3261,
"eval_samples_per_second": 9.543,
"eval_steps_per_second": 1.287,
"step": 700
},
{
"epoch": 1.8,
"learning_rate": 6.881881881881882e-06,
"loss": 0.0477,
"step": 725
},
{
"epoch": 1.8,
"eval_loss": 0.26756495237350464,
"eval_runtime": 9.1971,
"eval_samples_per_second": 9.677,
"eval_steps_per_second": 1.305,
"step": 725
},
{
"epoch": 1.87,
"learning_rate": 6.256256256256257e-06,
"loss": 0.0505,
"step": 750
},
{
"epoch": 1.87,
"eval_loss": 0.2570458650588989,
"eval_runtime": 9.2232,
"eval_samples_per_second": 9.65,
"eval_steps_per_second": 1.301,
"step": 750
},
{
"epoch": 1.93,
"learning_rate": 5.630630630630631e-06,
"loss": 0.0483,
"step": 775
},
{
"epoch": 1.93,
"eval_loss": 0.25569072365760803,
"eval_runtime": 9.2668,
"eval_samples_per_second": 9.604,
"eval_steps_per_second": 1.295,
"step": 775
},
{
"epoch": 1.99,
"learning_rate": 5.005005005005006e-06,
"loss": 0.0499,
"step": 800
},
{
"epoch": 1.99,
"eval_loss": 0.26184016466140747,
"eval_runtime": 9.3556,
"eval_samples_per_second": 9.513,
"eval_steps_per_second": 1.283,
"step": 800
},
{
"epoch": 2.05,
"learning_rate": 4.379379379379379e-06,
"loss": 0.0438,
"step": 825
},
{
"epoch": 2.05,
"eval_loss": 0.2792048454284668,
"eval_runtime": 9.3539,
"eval_samples_per_second": 9.515,
"eval_steps_per_second": 1.283,
"step": 825
},
{
"epoch": 2.11,
"learning_rate": 3.753753753753754e-06,
"loss": 0.0433,
"step": 850
},
{
"epoch": 2.11,
"eval_loss": 0.2763405740261078,
"eval_runtime": 9.3987,
"eval_samples_per_second": 9.469,
"eval_steps_per_second": 1.277,
"step": 850
},
{
"epoch": 2.18,
"learning_rate": 3.1281281281281287e-06,
"loss": 0.0438,
"step": 875
},
{
"epoch": 2.18,
"eval_loss": 0.28726398944854736,
"eval_runtime": 9.3415,
"eval_samples_per_second": 9.527,
"eval_steps_per_second": 1.285,
"step": 875
},
{
"epoch": 2.24,
"learning_rate": 2.502502502502503e-06,
"loss": 0.0439,
"step": 900
},
{
"epoch": 2.24,
"eval_loss": 0.29283300042152405,
"eval_runtime": 9.3228,
"eval_samples_per_second": 9.547,
"eval_steps_per_second": 1.287,
"step": 900
},
{
"epoch": 2.3,
"learning_rate": 1.876876876876877e-06,
"loss": 0.0443,
"step": 925
},
{
"epoch": 2.3,
"eval_loss": 0.28899624943733215,
"eval_runtime": 9.3231,
"eval_samples_per_second": 9.546,
"eval_steps_per_second": 1.287,
"step": 925
},
{
"epoch": 2.36,
"learning_rate": 1.2512512512512514e-06,
"loss": 0.0458,
"step": 950
},
{
"epoch": 2.36,
"eval_loss": 0.28502869606018066,
"eval_runtime": 9.3924,
"eval_samples_per_second": 9.476,
"eval_steps_per_second": 1.278,
"step": 950
},
{
"epoch": 2.43,
"learning_rate": 6.256256256256257e-07,
"loss": 0.0431,
"step": 975
},
{
"epoch": 2.43,
"eval_loss": 0.2905334532260895,
"eval_runtime": 9.3998,
"eval_samples_per_second": 9.468,
"eval_steps_per_second": 1.277,
"step": 975
},
{
"epoch": 2.49,
"learning_rate": 0.0,
"loss": 0.0427,
"step": 1000
},
{
"epoch": 2.49,
"eval_loss": 0.2926941215991974,
"eval_runtime": 9.3458,
"eval_samples_per_second": 9.523,
"eval_steps_per_second": 1.284,
"step": 1000
}
],
"logging_steps": 25,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 25,
"total_flos": 2.1565516640256e+16,
"trial_name": null,
"trial_params": null
}