|
{ |
|
"best_metric": 2.363229513168335, |
|
"best_model_checkpoint": "bert_tiny_lda_100_v1_stsb/checkpoint-138", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 253, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.582594633102417, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.8077, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.06606349756318877, |
|
"eval_loss": 2.3849637508392334, |
|
"eval_pearson": 0.06667821062565503, |
|
"eval_runtime": 0.4882, |
|
"eval_samples_per_second": 3072.315, |
|
"eval_spearmanr": 0.06544878450072253, |
|
"eval_steps_per_second": 12.289, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 7.826333522796631, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.042, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.08412433299225583, |
|
"eval_loss": 2.608870506286621, |
|
"eval_pearson": 0.08343756630067128, |
|
"eval_runtime": 0.498, |
|
"eval_samples_per_second": 3012.31, |
|
"eval_spearmanr": 0.08481109968384036, |
|
"eval_steps_per_second": 12.049, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.146524429321289, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.9254, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.14664388177022744, |
|
"eval_loss": 2.3925955295562744, |
|
"eval_pearson": 0.15389971532281455, |
|
"eval_runtime": 0.4854, |
|
"eval_samples_per_second": 3089.992, |
|
"eval_spearmanr": 0.13938804821764034, |
|
"eval_steps_per_second": 12.36, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 8.050851821899414, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.8381, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.17322394096142973, |
|
"eval_loss": 2.5560152530670166, |
|
"eval_pearson": 0.17442871935710869, |
|
"eval_runtime": 0.4812, |
|
"eval_samples_per_second": 3117.34, |
|
"eval_spearmanr": 0.17201916256575076, |
|
"eval_steps_per_second": 12.469, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 35.69472122192383, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.6974, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.17843619975720487, |
|
"eval_loss": 3.0256927013397217, |
|
"eval_pearson": 0.18117857347398758, |
|
"eval_runtime": 0.4745, |
|
"eval_samples_per_second": 3161.345, |
|
"eval_spearmanr": 0.17569382604042216, |
|
"eval_steps_per_second": 12.645, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 33.44086837768555, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.5776, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.23191784902182225, |
|
"eval_loss": 2.363229513168335, |
|
"eval_pearson": 0.23504600421092406, |
|
"eval_runtime": 0.4766, |
|
"eval_samples_per_second": 3147.58, |
|
"eval_spearmanr": 0.22878969383272044, |
|
"eval_steps_per_second": 12.59, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 9.113436698913574, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.2951, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.25835199884600446, |
|
"eval_loss": 2.4535396099090576, |
|
"eval_pearson": 0.25938358753958196, |
|
"eval_runtime": 0.4759, |
|
"eval_samples_per_second": 3151.903, |
|
"eval_spearmanr": 0.25732041015242696, |
|
"eval_steps_per_second": 12.608, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 27.608457565307617, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.0896, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.26369469335834095, |
|
"eval_loss": 2.5246102809906006, |
|
"eval_pearson": 0.2651694385752293, |
|
"eval_runtime": 0.4775, |
|
"eval_samples_per_second": 3141.474, |
|
"eval_spearmanr": 0.26221994814145255, |
|
"eval_steps_per_second": 12.566, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 37.94071960449219, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.9372, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.26428845490221176, |
|
"eval_loss": 2.982673406600952, |
|
"eval_pearson": 0.2715918907093777, |
|
"eval_runtime": 0.482, |
|
"eval_samples_per_second": 3111.778, |
|
"eval_spearmanr": 0.25698501909504573, |
|
"eval_steps_per_second": 12.447, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 15.401611328125, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7915, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.2991297782809159, |
|
"eval_loss": 2.6918020248413086, |
|
"eval_pearson": 0.3056396011613086, |
|
"eval_runtime": 0.4829, |
|
"eval_samples_per_second": 3106.332, |
|
"eval_spearmanr": 0.29261995540052316, |
|
"eval_steps_per_second": 12.425, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 8.023918151855469, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.673, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.29500401259607845, |
|
"eval_loss": 2.752037763595581, |
|
"eval_pearson": 0.3012777534013385, |
|
"eval_runtime": 0.4793, |
|
"eval_samples_per_second": 3129.505, |
|
"eval_spearmanr": 0.2887302717908184, |
|
"eval_steps_per_second": 12.518, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 253, |
|
"total_flos": 1658303025916416.0, |
|
"train_loss": 1.5158629624739937, |
|
"train_runtime": 49.0816, |
|
"train_samples_per_second": 5856.575, |
|
"train_steps_per_second": 23.43 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1658303025916416.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|