lowhipa-large-thchs30 / trainer_state.json
jshrdt's picture
Upload folder using huggingface_hub
36bb876 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.015873015873016,
"eval_steps": 126,
"global_step": 630,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015873015873015873,
"grad_norm": 1.802138090133667,
"learning_rate": 1e-05,
"loss": 2.7056,
"step": 1
},
{
"epoch": 1.0015873015873016,
"grad_norm": 1.3845086097717285,
"learning_rate": 0.00063,
"loss": 1.2986,
"step": 63
},
{
"epoch": 2.003174603174603,
"grad_norm": 0.29827240109443665,
"learning_rate": 0.0009509433962264152,
"loss": 0.369,
"step": 126
},
{
"epoch": 2.003174603174603,
"eval_loss": 0.2990573048591614,
"eval_runtime": 76.7071,
"eval_samples_per_second": 6.479,
"eval_steps_per_second": 0.821,
"step": 126
},
{
"epoch": 3.0047619047619047,
"grad_norm": 0.28911444544792175,
"learning_rate": 0.0008320754716981132,
"loss": 0.2589,
"step": 189
},
{
"epoch": 4.006349206349206,
"grad_norm": 0.2769618332386017,
"learning_rate": 0.0007132075471698113,
"loss": 0.2183,
"step": 252
},
{
"epoch": 4.006349206349206,
"eval_loss": 0.24794502556324005,
"eval_runtime": 75.6398,
"eval_samples_per_second": 6.571,
"eval_steps_per_second": 0.833,
"step": 252
},
{
"epoch": 5.007936507936508,
"grad_norm": 0.2538459897041321,
"learning_rate": 0.0005943396226415095,
"loss": 0.1887,
"step": 315
},
{
"epoch": 6.0095238095238095,
"grad_norm": 0.28301894664764404,
"learning_rate": 0.0004754716981132076,
"loss": 0.1622,
"step": 378
},
{
"epoch": 6.0095238095238095,
"eval_loss": 0.253131628036499,
"eval_runtime": 75.5929,
"eval_samples_per_second": 6.575,
"eval_steps_per_second": 0.833,
"step": 378
},
{
"epoch": 7.011111111111111,
"grad_norm": 0.29330751299858093,
"learning_rate": 0.00035660377358490565,
"loss": 0.138,
"step": 441
},
{
"epoch": 8.012698412698413,
"grad_norm": 0.2697054147720337,
"learning_rate": 0.0002377358490566038,
"loss": 0.1124,
"step": 504
},
{
"epoch": 8.012698412698413,
"eval_loss": 0.2732747197151184,
"eval_runtime": 75.8397,
"eval_samples_per_second": 6.553,
"eval_steps_per_second": 0.831,
"step": 504
},
{
"epoch": 9.014285714285714,
"grad_norm": 0.21484734117984772,
"learning_rate": 0.0001188679245283019,
"loss": 0.0883,
"step": 567
},
{
"epoch": 10.015873015873016,
"grad_norm": 0.20847243070602417,
"learning_rate": 0.0,
"loss": 0.0692,
"step": 630
},
{
"epoch": 10.015873015873016,
"eval_loss": 0.2962268590927124,
"eval_runtime": 75.5859,
"eval_samples_per_second": 6.575,
"eval_steps_per_second": 0.833,
"step": 630
}
],
"logging_steps": 63,
"max_steps": 630,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 126,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.1425470562304e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}