bert_tiny_lda_100_v1_stsb / trainer_state.json
gokulsrinivasagan's picture
End of training
1e1d2a8 verified
{
"best_metric": 2.363229513168335,
"best_model_checkpoint": "bert_tiny_lda_100_v1_stsb/checkpoint-138",
"epoch": 11.0,
"eval_steps": 500,
"global_step": 253,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.582594633102417,
"learning_rate": 4.9e-05,
"loss": 2.8077,
"step": 23
},
{
"epoch": 1.0,
"eval_combined_score": 0.06606349756318877,
"eval_loss": 2.3849637508392334,
"eval_pearson": 0.06667821062565503,
"eval_runtime": 0.4882,
"eval_samples_per_second": 3072.315,
"eval_spearmanr": 0.06544878450072253,
"eval_steps_per_second": 12.289,
"step": 23
},
{
"epoch": 2.0,
"grad_norm": 7.826333522796631,
"learning_rate": 4.8e-05,
"loss": 2.042,
"step": 46
},
{
"epoch": 2.0,
"eval_combined_score": 0.08412433299225583,
"eval_loss": 2.608870506286621,
"eval_pearson": 0.08343756630067128,
"eval_runtime": 0.498,
"eval_samples_per_second": 3012.31,
"eval_spearmanr": 0.08481109968384036,
"eval_steps_per_second": 12.049,
"step": 46
},
{
"epoch": 3.0,
"grad_norm": 3.146524429321289,
"learning_rate": 4.7e-05,
"loss": 1.9254,
"step": 69
},
{
"epoch": 3.0,
"eval_combined_score": 0.14664388177022744,
"eval_loss": 2.3925955295562744,
"eval_pearson": 0.15389971532281455,
"eval_runtime": 0.4854,
"eval_samples_per_second": 3089.992,
"eval_spearmanr": 0.13938804821764034,
"eval_steps_per_second": 12.36,
"step": 69
},
{
"epoch": 4.0,
"grad_norm": 8.050851821899414,
"learning_rate": 4.600000000000001e-05,
"loss": 1.8381,
"step": 92
},
{
"epoch": 4.0,
"eval_combined_score": 0.17322394096142973,
"eval_loss": 2.5560152530670166,
"eval_pearson": 0.17442871935710869,
"eval_runtime": 0.4812,
"eval_samples_per_second": 3117.34,
"eval_spearmanr": 0.17201916256575076,
"eval_steps_per_second": 12.469,
"step": 92
},
{
"epoch": 5.0,
"grad_norm": 35.69472122192383,
"learning_rate": 4.5e-05,
"loss": 1.6974,
"step": 115
},
{
"epoch": 5.0,
"eval_combined_score": 0.17843619975720487,
"eval_loss": 3.0256927013397217,
"eval_pearson": 0.18117857347398758,
"eval_runtime": 0.4745,
"eval_samples_per_second": 3161.345,
"eval_spearmanr": 0.17569382604042216,
"eval_steps_per_second": 12.645,
"step": 115
},
{
"epoch": 6.0,
"grad_norm": 33.44086837768555,
"learning_rate": 4.4000000000000006e-05,
"loss": 1.5776,
"step": 138
},
{
"epoch": 6.0,
"eval_combined_score": 0.23191784902182225,
"eval_loss": 2.363229513168335,
"eval_pearson": 0.23504600421092406,
"eval_runtime": 0.4766,
"eval_samples_per_second": 3147.58,
"eval_spearmanr": 0.22878969383272044,
"eval_steps_per_second": 12.59,
"step": 138
},
{
"epoch": 7.0,
"grad_norm": 9.113436698913574,
"learning_rate": 4.3e-05,
"loss": 1.2951,
"step": 161
},
{
"epoch": 7.0,
"eval_combined_score": 0.25835199884600446,
"eval_loss": 2.4535396099090576,
"eval_pearson": 0.25938358753958196,
"eval_runtime": 0.4759,
"eval_samples_per_second": 3151.903,
"eval_spearmanr": 0.25732041015242696,
"eval_steps_per_second": 12.608,
"step": 161
},
{
"epoch": 8.0,
"grad_norm": 27.608457565307617,
"learning_rate": 4.2e-05,
"loss": 1.0896,
"step": 184
},
{
"epoch": 8.0,
"eval_combined_score": 0.26369469335834095,
"eval_loss": 2.5246102809906006,
"eval_pearson": 0.2651694385752293,
"eval_runtime": 0.4775,
"eval_samples_per_second": 3141.474,
"eval_spearmanr": 0.26221994814145255,
"eval_steps_per_second": 12.566,
"step": 184
},
{
"epoch": 9.0,
"grad_norm": 37.94071960449219,
"learning_rate": 4.1e-05,
"loss": 0.9372,
"step": 207
},
{
"epoch": 9.0,
"eval_combined_score": 0.26428845490221176,
"eval_loss": 2.982673406600952,
"eval_pearson": 0.2715918907093777,
"eval_runtime": 0.482,
"eval_samples_per_second": 3111.778,
"eval_spearmanr": 0.25698501909504573,
"eval_steps_per_second": 12.447,
"step": 207
},
{
"epoch": 10.0,
"grad_norm": 15.401611328125,
"learning_rate": 4e-05,
"loss": 0.7915,
"step": 230
},
{
"epoch": 10.0,
"eval_combined_score": 0.2991297782809159,
"eval_loss": 2.6918020248413086,
"eval_pearson": 0.3056396011613086,
"eval_runtime": 0.4829,
"eval_samples_per_second": 3106.332,
"eval_spearmanr": 0.29261995540052316,
"eval_steps_per_second": 12.425,
"step": 230
},
{
"epoch": 11.0,
"grad_norm": 8.023918151855469,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.673,
"step": 253
},
{
"epoch": 11.0,
"eval_combined_score": 0.29500401259607845,
"eval_loss": 2.752037763595581,
"eval_pearson": 0.3012777534013385,
"eval_runtime": 0.4793,
"eval_samples_per_second": 3129.505,
"eval_spearmanr": 0.2887302717908184,
"eval_steps_per_second": 12.518,
"step": 253
},
{
"epoch": 11.0,
"step": 253,
"total_flos": 1658303025916416.0,
"train_loss": 1.5158629624739937,
"train_runtime": 49.0816,
"train_samples_per_second": 5856.575,
"train_steps_per_second": 23.43
}
],
"logging_steps": 1,
"max_steps": 1150,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1658303025916416.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}