Training in progress, step 2132, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c8d7bce05d2c1bccb7701b6db68f853023ff91aab74c3d1cf1bd7d572d94fa9
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:26522d5ea558b7b30e432a3e8326c772da331af1345d7d2385595a8e092514f6
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9defb768ce5b55a1efe4f6d1f9b620625802708d0e54f82955b357bdd9b30a5
 size 75455810

 version https://git-lfs.github.com/spec/v1
+oid sha256:f606e7bf0d69e20856ff02233f6c65f5d5d774b35c8e19ea35f2f4123eb47bc7
 size 75455810

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4116e11c3ebde79688e83478e3698e3c297d9a40de552daf6ce57141f242e94
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e73a6957b047016025e7645447d5032c71a249e8c0df387b0746129826f0864d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:040d16000d19870802caba86c426292142c47812c4a4f6c7622cff272ce23091
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:17cac372009eea4e7cef6a84a78c5a8084c9295f985d63cf718cd0dcfd001889
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8245452853006483,
   "eval_steps": 500,
-  "global_step": 2128,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -14903,6 +14903,34 @@
       "learning_rate": 7.682691173428503e-07,
       "loss": 1.3673,
       "step": 2128
     }
   ],
   "logging_steps": 1.0,
@@ -14922,7 +14950,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1053657564724787e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.826095182453469,
   "eval_steps": 500,
+  "global_step": 2132,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.682691173428503e-07,
       "loss": 1.3673,
       "step": 2128
+    },
+    {
+      "epoch": 0.8249327595888535,
+      "grad_norm": 0.21315915882587433,
+      "learning_rate": 7.649628197340931e-07,
+      "loss": 1.3686,
+      "step": 2129
+    },
+    {
+      "epoch": 0.8253202338770587,
+      "grad_norm": 0.1892424076795578,
+      "learning_rate": 7.616630625022609e-07,
+      "loss": 1.3537,
+      "step": 2130
+    },
+    {
+      "epoch": 0.8257077081652638,
+      "grad_norm": 0.17962083220481873,
+      "learning_rate": 7.58369850743334e-07,
+      "loss": 1.3944,
+      "step": 2131
+    },
+    {
+      "epoch": 0.826095182453469,
+      "grad_norm": 0.18284156918525696,
+      "learning_rate": 7.550831895431799e-07,
+      "loss": 1.3092,
+      "step": 2132
     }
   ],
   "logging_steps": 1.0,
       "attributes": {}
     }
   },
+  "total_flos": 2.1091234518819717e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null