Training in progress, step 200, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40afff962f83d25242ae886d301c4ec413d496314897aef7825a0fcf5eeaecfc
 size 399673504

 version https://git-lfs.github.com/spec/v1
+oid sha256:0107738a3fc62d785cac3110b92ee540cdb6876b0a2d78e0c3fb2de49de1c85d
 size 399673504

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:939d17523ab726ce2ddcb344be8b053d797c6f2780cea931524280bb3efd9753
 size 35917596

 version https://git-lfs.github.com/spec/v1
+oid sha256:96d2be8f0dbf1c12e884583a5c0138b66bb30ef6ca537a84b482ac914f357865
 size 35917596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c02148ac9ea527d4e3ffbba1429252cae43c93d94b6a876879af84d3a387d4ac
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9908c65f888932a672ea89ea1f3e3f2ba8a2341258f8e1b8d57f7777b0c1416
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f58ba35c8b651c13de09248709fe177fe61d200042e5942ab821d19bb8977c9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:684c3e2167c5a4e88fb6c84465f9d1b861ad3145efebcf9adf0596790d98dbaa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.17271157167530224,
   "eval_steps": 1000,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -14,6 +14,13 @@
       "learning_rate": 1e-05,
       "loss": 1.8574,
       "step": 100
     }
   ],
   "logging_steps": 100,
@@ -33,7 +40,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5937091156377600.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3454231433506045,
   "eval_steps": 1000,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1e-05,
       "loss": 1.8574,
       "step": 100
+    },
+    {
+      "epoch": 0.3454231433506045,
+      "grad_norm": 3.3716881275177,
+      "learning_rate": 2e-05,
+      "loss": 1.8131,
+      "step": 200
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.18741823127552e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null