| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 141, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0070921985815602835, | |
| "grad_norm": 8.338533401489258, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.5469, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03546099290780142, | |
| "grad_norm": 5.200538158416748, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.5471, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.07092198581560284, | |
| "grad_norm": 2.1772303581237793, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.413, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10638297872340426, | |
| "grad_norm": 1.2401998043060303, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2907, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.14184397163120568, | |
| "grad_norm": 1.1364105939865112, | |
| "learning_rate": 9.961196033000862e-06, | |
| "loss": 0.196, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1773049645390071, | |
| "grad_norm": 1.2267452478408813, | |
| "learning_rate": 9.84538643114539e-06, | |
| "loss": 0.1008, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 2.399948835372925, | |
| "learning_rate": 9.654368743221022e-06, | |
| "loss": 0.0427, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.24822695035460993, | |
| "grad_norm": 0.4254029095172882, | |
| "learning_rate": 9.391107866851143e-06, | |
| "loss": 0.0169, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 0.3183421492576599, | |
| "learning_rate": 9.059690028579285e-06, | |
| "loss": 0.0103, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3191489361702128, | |
| "grad_norm": 0.21898160874843597, | |
| "learning_rate": 8.665259359149132e-06, | |
| "loss": 0.0088, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "grad_norm": 5.1714396476745605, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 0.0072, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3900709219858156, | |
| "grad_norm": 0.2684312164783478, | |
| "learning_rate": 7.712731319328798e-06, | |
| "loss": 0.0064, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 0.7641976475715637, | |
| "learning_rate": 7.169418695587791e-06, | |
| "loss": 0.008, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.46099290780141844, | |
| "grad_norm": 0.3036028742790222, | |
| "learning_rate": 6.592433251258423e-06, | |
| "loss": 0.0071, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.49645390070921985, | |
| "grad_norm": 0.11222974210977554, | |
| "learning_rate": 5.990730715996989e-06, | |
| "loss": 0.0052, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 0.5315371751785278, | |
| "learning_rate": 5.373650467932122e-06, | |
| "loss": 0.0046, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 0.21975472569465637, | |
| "learning_rate": 4.750770571696514e-06, | |
| "loss": 0.0038, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6028368794326241, | |
| "grad_norm": 0.16280204057693481, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.0031, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 0.1793779581785202, | |
| "learning_rate": 3.526224127945479e-06, | |
| "loss": 0.0044, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6737588652482269, | |
| "grad_norm": 0.16913849115371704, | |
| "learning_rate": 2.9435644843469434e-06, | |
| "loss": 0.0037, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "grad_norm": 0.13710705935955048, | |
| "learning_rate": 2.39282398310251e-06, | |
| "loss": 0.0037, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7446808510638298, | |
| "grad_norm": 0.1352899819612503, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 0.0035, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.7801418439716312, | |
| "grad_norm": 0.1684824377298355, | |
| "learning_rate": 1.4206657537014078e-06, | |
| "loss": 0.0031, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.8156028368794326, | |
| "grad_norm": 0.2994441092014313, | |
| "learning_rate": 1.0143374638853892e-06, | |
| "loss": 0.0035, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 0.17537814378738403, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.0039, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8865248226950354, | |
| "grad_norm": 0.14183929562568665, | |
| "learning_rate": 3.9261894064796136e-07, | |
| "loss": 0.0027, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.9219858156028369, | |
| "grad_norm": 0.21473850309848785, | |
| "learning_rate": 1.8687876524993987e-07, | |
| "loss": 0.0048, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9574468085106383, | |
| "grad_norm": 0.3028695285320282, | |
| "learning_rate": 5.584586887435739e-08, | |
| "loss": 0.003, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.9929078014184397, | |
| "grad_norm": 0.4601861238479614, | |
| "learning_rate": 1.5540899959187727e-09, | |
| "loss": 0.0025, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.010753666050732136, | |
| "eval_runtime": 1.2761, | |
| "eval_samples_per_second": 0.784, | |
| "eval_steps_per_second": 0.784, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 141, | |
| "total_flos": 2.5427771438137344e+16, | |
| "train_loss": 0.0606694411179249, | |
| "train_runtime": 598.0457, | |
| "train_samples_per_second": 0.236, | |
| "train_steps_per_second": 0.236 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 141, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.5427771438137344e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |