{ "best_metric": 0.4843710611445973, "best_model_checkpoint": "./reader_model/checkpoint-8730", "epoch": 3.0, "eval_steps": 500, "global_step": 8730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1718213058419244, "grad_norm": 75.05482482910156, "learning_rate": 4.713631156930126e-05, "loss": 2.9216, "step": 500 }, { "epoch": 0.3436426116838488, "grad_norm": 21.536182403564453, "learning_rate": 4.427262313860252e-05, "loss": 2.3951, "step": 1000 }, { "epoch": 0.5154639175257731, "grad_norm": 185.8365020751953, "learning_rate": 4.140893470790378e-05, "loss": 2.1697, "step": 1500 }, { "epoch": 0.6872852233676976, "grad_norm": 48.49885559082031, "learning_rate": 3.854524627720504e-05, "loss": 2.2481, "step": 2000 }, { "epoch": 0.8591065292096219, "grad_norm": 56.232383728027344, "learning_rate": 3.5681557846506306e-05, "loss": 1.9251, "step": 2500 }, { "epoch": 1.0, "eval_bertscore": 0.9379365905851414, "eval_bleu1": 0.0, "eval_f1": 0.44161785163619677, "eval_loss": 1.82194983959198, "eval_runtime": 87.956, "eval_samples_per_second": 16.44, "eval_steps_per_second": 4.116, "step": 2910 }, { "epoch": 1.0309278350515463, "grad_norm": 71.09529876708984, "learning_rate": 3.2817869415807564e-05, "loss": 1.8373, "step": 3000 }, { "epoch": 1.2027491408934707, "grad_norm": 58.3028450012207, "learning_rate": 2.9954180985108822e-05, "loss": 1.5451, "step": 3500 }, { "epoch": 1.3745704467353952, "grad_norm": 84.97217559814453, "learning_rate": 2.709049255441008e-05, "loss": 1.461, "step": 4000 }, { "epoch": 1.5463917525773194, "grad_norm": 41.77628707885742, "learning_rate": 2.422680412371134e-05, "loss": 1.3964, "step": 4500 }, { "epoch": 1.718213058419244, "grad_norm": 4.4470438957214355, "learning_rate": 2.13631156930126e-05, "loss": 1.3665, "step": 5000 }, { "epoch": 1.8900343642611683, "grad_norm": 43.41935729980469, "learning_rate": 1.849942726231386e-05, "loss": 1.3783, "step": 5500 }, { "epoch": 2.0, "eval_bertscore": 0.9420511256774596, "eval_bleu1": 0.0, "eval_f1": 0.47942977396358094, "eval_loss": 1.7279274463653564, "eval_runtime": 81.3708, "eval_samples_per_second": 17.77, "eval_steps_per_second": 4.449, "step": 5820 }, { "epoch": 2.0618556701030926, "grad_norm": 29.33169937133789, "learning_rate": 1.5635738831615122e-05, "loss": 1.2364, "step": 6000 }, { "epoch": 2.2336769759450172, "grad_norm": 101.00111389160156, "learning_rate": 1.277205040091638e-05, "loss": 0.952, "step": 6500 }, { "epoch": 2.4054982817869415, "grad_norm": 163.68463134765625, "learning_rate": 9.90836197021764e-06, "loss": 0.9447, "step": 7000 }, { "epoch": 2.5773195876288657, "grad_norm": 105.01953125, "learning_rate": 7.0446735395189e-06, "loss": 0.8667, "step": 7500 }, { "epoch": 2.7491408934707904, "grad_norm": 41.25511169433594, "learning_rate": 4.18098510882016e-06, "loss": 0.8508, "step": 8000 }, { "epoch": 2.9209621993127146, "grad_norm": 22.426847457885742, "learning_rate": 1.3172966781214204e-06, "loss": 0.8592, "step": 8500 }, { "epoch": 3.0, "eval_bertscore": 0.9449925996868113, "eval_bleu1": 0.0, "eval_f1": 0.4843710611445973, "eval_loss": 1.917205572128296, "eval_runtime": 81.3736, "eval_samples_per_second": 17.77, "eval_steps_per_second": 4.449, "step": 8730 } ], "logging_steps": 500, "max_steps": 8730, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.242487592315699e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }