|
{ |
|
"best_metric": 0.4843710611445973, |
|
"best_model_checkpoint": "./reader_model/checkpoint-8730", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 8730, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1718213058419244, |
|
"grad_norm": 75.05482482910156, |
|
"learning_rate": 4.713631156930126e-05, |
|
"loss": 2.9216, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3436426116838488, |
|
"grad_norm": 21.536182403564453, |
|
"learning_rate": 4.427262313860252e-05, |
|
"loss": 2.3951, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"grad_norm": 185.8365020751953, |
|
"learning_rate": 4.140893470790378e-05, |
|
"loss": 2.1697, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6872852233676976, |
|
"grad_norm": 48.49885559082031, |
|
"learning_rate": 3.854524627720504e-05, |
|
"loss": 2.2481, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8591065292096219, |
|
"grad_norm": 56.232383728027344, |
|
"learning_rate": 3.5681557846506306e-05, |
|
"loss": 1.9251, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bertscore": 0.9379365905851414, |
|
"eval_bleu1": 0.0, |
|
"eval_f1": 0.44161785163619677, |
|
"eval_loss": 1.82194983959198, |
|
"eval_runtime": 87.956, |
|
"eval_samples_per_second": 16.44, |
|
"eval_steps_per_second": 4.116, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 71.09529876708984, |
|
"learning_rate": 3.2817869415807564e-05, |
|
"loss": 1.8373, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2027491408934707, |
|
"grad_norm": 58.3028450012207, |
|
"learning_rate": 2.9954180985108822e-05, |
|
"loss": 1.5451, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3745704467353952, |
|
"grad_norm": 84.97217559814453, |
|
"learning_rate": 2.709049255441008e-05, |
|
"loss": 1.461, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"grad_norm": 41.77628707885742, |
|
"learning_rate": 2.422680412371134e-05, |
|
"loss": 1.3964, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.718213058419244, |
|
"grad_norm": 4.4470438957214355, |
|
"learning_rate": 2.13631156930126e-05, |
|
"loss": 1.3665, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.8900343642611683, |
|
"grad_norm": 43.41935729980469, |
|
"learning_rate": 1.849942726231386e-05, |
|
"loss": 1.3783, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bertscore": 0.9420511256774596, |
|
"eval_bleu1": 0.0, |
|
"eval_f1": 0.47942977396358094, |
|
"eval_loss": 1.7279274463653564, |
|
"eval_runtime": 81.3708, |
|
"eval_samples_per_second": 17.77, |
|
"eval_steps_per_second": 4.449, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 29.33169937133789, |
|
"learning_rate": 1.5635738831615122e-05, |
|
"loss": 1.2364, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.2336769759450172, |
|
"grad_norm": 101.00111389160156, |
|
"learning_rate": 1.277205040091638e-05, |
|
"loss": 0.952, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.4054982817869415, |
|
"grad_norm": 163.68463134765625, |
|
"learning_rate": 9.90836197021764e-06, |
|
"loss": 0.9447, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.5773195876288657, |
|
"grad_norm": 105.01953125, |
|
"learning_rate": 7.0446735395189e-06, |
|
"loss": 0.8667, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.7491408934707904, |
|
"grad_norm": 41.25511169433594, |
|
"learning_rate": 4.18098510882016e-06, |
|
"loss": 0.8508, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.9209621993127146, |
|
"grad_norm": 22.426847457885742, |
|
"learning_rate": 1.3172966781214204e-06, |
|
"loss": 0.8592, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bertscore": 0.9449925996868113, |
|
"eval_bleu1": 0.0, |
|
"eval_f1": 0.4843710611445973, |
|
"eval_loss": 1.917205572128296, |
|
"eval_runtime": 81.3736, |
|
"eval_samples_per_second": 17.77, |
|
"eval_steps_per_second": 4.449, |
|
"step": 8730 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8730, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.242487592315699e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|