|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 78.255126953125, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3942, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8419489007724301, |
|
"eval_loss": 0.3128369450569153, |
|
"eval_precision": 0.8857758620689655, |
|
"eval_recall": 0.8173758865248226, |
|
"eval_runtime": 1.6299, |
|
"eval_samples_per_second": 244.801, |
|
"eval_steps_per_second": 30.677, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 42.82415771484375, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2168, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8676337535436396, |
|
"eval_loss": 0.3043781518936157, |
|
"eval_precision": 0.8658613445378152, |
|
"eval_recall": 0.8694762684124386, |
|
"eval_runtime": 1.6375, |
|
"eval_samples_per_second": 243.661, |
|
"eval_steps_per_second": 30.534, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.2970781624317169, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1372, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8595250288055307, |
|
"eval_loss": 0.5317866802215576, |
|
"eval_precision": 0.885164197446576, |
|
"eval_recall": 0.8419712675031824, |
|
"eval_runtime": 1.6412, |
|
"eval_samples_per_second": 243.114, |
|
"eval_steps_per_second": 30.465, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.16418644785881042, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0957, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8765906680805938, |
|
"eval_loss": 0.47654101252555847, |
|
"eval_precision": 0.8675710594315245, |
|
"eval_recall": 0.888025095471904, |
|
"eval_runtime": 1.6551, |
|
"eval_samples_per_second": 241.073, |
|
"eval_steps_per_second": 30.21, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.4955180287361145, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0674, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8728804559453431, |
|
"eval_loss": 0.552257239818573, |
|
"eval_precision": 0.8576773985140519, |
|
"eval_recall": 0.9027095835606473, |
|
"eval_runtime": 1.6807, |
|
"eval_samples_per_second": 237.402, |
|
"eval_steps_per_second": 29.75, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.03946012258529663, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0535, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8878574955372402, |
|
"eval_loss": 0.5158531069755554, |
|
"eval_precision": 0.8888448885098087, |
|
"eval_recall": 0.8868885251863976, |
|
"eval_runtime": 1.6465, |
|
"eval_samples_per_second": 242.338, |
|
"eval_steps_per_second": 30.368, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.008608223870396614, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.027, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8642908431276217, |
|
"eval_loss": 0.5940884351730347, |
|
"eval_precision": 0.8633964654080464, |
|
"eval_recall": 0.8652027641389344, |
|
"eval_runtime": 1.6485, |
|
"eval_samples_per_second": 242.043, |
|
"eval_steps_per_second": 30.331, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.010127891786396503, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0223, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8548827059465357, |
|
"eval_loss": 0.7166243195533752, |
|
"eval_precision": 0.8548827059465357, |
|
"eval_recall": 0.8548827059465357, |
|
"eval_runtime": 1.6562, |
|
"eval_samples_per_second": 240.913, |
|
"eval_steps_per_second": 30.19, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.005933025386184454, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0145, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8829621606985718, |
|
"eval_loss": 0.7022837996482849, |
|
"eval_precision": 0.8802419354838709, |
|
"eval_recall": 0.8858428805237315, |
|
"eval_runtime": 1.6595, |
|
"eval_samples_per_second": 240.429, |
|
"eval_steps_per_second": 30.129, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.02505210041999817, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0106, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8839406001224739, |
|
"eval_loss": 0.699307918548584, |
|
"eval_precision": 0.8880654743486602, |
|
"eval_recall": 0.880114566284779, |
|
"eval_runtime": 1.6551, |
|
"eval_samples_per_second": 241.07, |
|
"eval_steps_per_second": 30.209, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.002501419745385647, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0093, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8703663593044124, |
|
"eval_loss": 0.8273664712905884, |
|
"eval_precision": 0.8789149003479912, |
|
"eval_recall": 0.8630205491907619, |
|
"eval_runtime": 1.6583, |
|
"eval_samples_per_second": 240.615, |
|
"eval_steps_per_second": 30.152, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.012166227214038372, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0086, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8744522298370696, |
|
"eval_loss": 0.7971612215042114, |
|
"eval_precision": 0.8795731707317074, |
|
"eval_recall": 0.8697945080923805, |
|
"eval_runtime": 1.6712, |
|
"eval_samples_per_second": 238.744, |
|
"eval_steps_per_second": 29.918, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.00197013420984149, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0106, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8787009231453675, |
|
"eval_loss": 0.7591652870178223, |
|
"eval_precision": 0.8714896214896215, |
|
"eval_recall": 0.8872976904891798, |
|
"eval_runtime": 1.6672, |
|
"eval_samples_per_second": 239.329, |
|
"eval_steps_per_second": 29.991, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.0050615849904716015, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0072, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8813841488792438, |
|
"eval_loss": 0.7834069728851318, |
|
"eval_precision": 0.8748029197080291, |
|
"eval_recall": 0.8890707401345699, |
|
"eval_runtime": 1.6555, |
|
"eval_samples_per_second": 241.019, |
|
"eval_steps_per_second": 30.203, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.002086851978674531, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0098, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8802521008403361, |
|
"eval_loss": 0.8048883676528931, |
|
"eval_precision": 0.8767168083714847, |
|
"eval_recall": 0.8840698308783415, |
|
"eval_runtime": 1.6591, |
|
"eval_samples_per_second": 240.488, |
|
"eval_steps_per_second": 30.136, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0012473827227950096, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0058, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8802521008403361, |
|
"eval_loss": 0.7670984268188477, |
|
"eval_precision": 0.8767168083714847, |
|
"eval_recall": 0.8840698308783415, |
|
"eval_runtime": 1.659, |
|
"eval_samples_per_second": 240.503, |
|
"eval_steps_per_second": 30.138, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.00188881263602525, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0035, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8856624319419237, |
|
"eval_loss": 0.8084732294082642, |
|
"eval_precision": 0.8758364312267658, |
|
"eval_recall": 0.8983451536643026, |
|
"eval_runtime": 1.6569, |
|
"eval_samples_per_second": 240.816, |
|
"eval_steps_per_second": 30.177, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0014366944087669253, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0052, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8808243727598566, |
|
"eval_loss": 0.7721081972122192, |
|
"eval_precision": 0.875706963591375, |
|
"eval_recall": 0.8865702855064557, |
|
"eval_runtime": 1.6546, |
|
"eval_samples_per_second": 241.143, |
|
"eval_steps_per_second": 30.218, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0011094665387645364, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0028, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8792560061999484, |
|
"eval_loss": 0.8358559608459473, |
|
"eval_precision": 0.8707622232472325, |
|
"eval_recall": 0.889798145117294, |
|
"eval_runtime": 1.6584, |
|
"eval_samples_per_second": 240.592, |
|
"eval_steps_per_second": 30.149, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.0015741140814498067, |
|
"learning_rate": 0.0, |
|
"loss": 0.0033, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8792560061999484, |
|
"eval_loss": 0.8335620164871216, |
|
"eval_precision": 0.8707622232472325, |
|
"eval_recall": 0.889798145117294, |
|
"eval_runtime": 1.6776, |
|
"eval_samples_per_second": 237.834, |
|
"eval_steps_per_second": 29.804, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7584162436176000.0, |
|
"train_loss": 0.05526667458356404, |
|
"train_runtime": 862.9394, |
|
"train_samples_per_second": 84.316, |
|
"train_steps_per_second": 2.828 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7584162436176000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|