Training in progress, step 24, checkpoint

Files changed (7) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7afb463104162bf4e9f1d10076bf189c7d775e4575db1da1224a852baa88b767
 size 161533192

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ba439b31da40ff6545b36f10bf2088503cd9dea9d31f61fb9a3d8017beae65c
 size 161533192

last-checkpoint/optimizer.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4df9d9f2b8d46b1730c5a393376634fbf7d7edfba1f71f4c55808c0ce5a5af37
 size 323311082

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b167ec1b666e8efbcade53545b496c1fa7f52cd8119115ce70f37b4cbf323b9
 size 323311082

last-checkpoint/pytorch_model_fsdp.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:938dc7835e27c1d17ad3e4ccaa82831bf6732d830784887d63a6ca026feada4d
 size 161626662

 version https://git-lfs.github.com/spec/v1
+oid sha256:18083d90c673193948f1cfa9c2bd07fe9b613716d27d6061ff34a78ffe28be12
 size 161626662

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bebf6cc5c8c7780feadaebd275d45bcf2ecda4d47e61bbd51368454f4d1c553
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:21b50e03f15fbc12dbbda95def3adbd15c7f7294db0db88cc217672a7686e8b4
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88eb0519ba5419045287051a9023514a1ac66b2ff4faa690c6ef26d732fe7601
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:a475bf81f402c30ceed6fd5d0f731aea39f076be079d3c3684f6d4e6041d1462
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d66ac607544becbf0b1e2248ebb09e59e5572bf9bfa9d7d701b793d0945c55db
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:859214dc387eb3e62419c68b424697c31e86f60684ce969a4432863354d30aa8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01353528843055108,
   "eval_steps": 3,
-  "global_step": 21,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -219,6 +219,35 @@
       "eval_samples_per_second": 10.959,
       "eval_steps_per_second": 2.74,
       "step": 21
     }
   ],
   "logging_steps": 1,
@@ -238,7 +267,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.919921093921997e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.015468901063486949,
   "eval_steps": 3,
+  "global_step": 24,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.959,
       "eval_steps_per_second": 2.74,
       "step": 21
+    },
+    {
+      "epoch": 0.014179825974863036,
+      "grad_norm": 0.7142388224601746,
+      "learning_rate": 5.7422070843492734e-05,
+      "loss": 1.4037,
+      "step": 22
+    },
+    {
+      "epoch": 0.014824363519174991,
+      "grad_norm": 0.9205169081687927,
+      "learning_rate": 4.6417320502100316e-05,
+      "loss": 1.6486,
+      "step": 23
+    },
+    {
+      "epoch": 0.015468901063486949,
+      "grad_norm": 1.2948448657989502,
+      "learning_rate": 3.6257601025131026e-05,
+      "loss": 1.6989,
+      "step": 24
+    },
+    {
+      "epoch": 0.015468901063486949,
+      "eval_loss": 3.01545786857605,
+      "eval_runtime": 460.7675,
+      "eval_samples_per_second": 11.344,
+      "eval_steps_per_second": 2.837,
+      "step": 24
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.028467876213555e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null