Training in progress, step 2136, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26522d5ea558b7b30e432a3e8326c772da331af1345d7d2385595a8e092514f6
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c4b48605b7908c6c3cb92661a446438803c1acd9c09849363fbd525f536e574
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f606e7bf0d69e20856ff02233f6c65f5d5d774b35c8e19ea35f2f4123eb47bc7
 size 75455810

 version https://git-lfs.github.com/spec/v1
+oid sha256:fadc7ddaa7c8d0042358e2a5baa607920153a3514ff6ac69482a291d9a05ddda
 size 75455810

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e73a6957b047016025e7645447d5032c71a249e8c0df387b0746129826f0864d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d443a2e4e38986fdfbce09bb15fb1f520183aec59744ce156f4604030fd8c589
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17cac372009eea4e7cef6a84a78c5a8084c9295f985d63cf718cd0dcfd001889
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7461be29c782b65290c8ea4037877ee044396194b7fab404fd344a9cb3ecddbe
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.826095182453469,
   "eval_steps": 500,
-  "global_step": 2132,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -14931,6 +14931,34 @@
       "learning_rate": 7.550831895431799e-07,
       "loss": 1.3092,
       "step": 2132
     }
   ],
   "logging_steps": 1.0,
@@ -14950,7 +14978,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1091234518819717e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8276450796062897,
   "eval_steps": 500,
+  "global_step": 2136,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.550831895431799e-07,
       "loss": 1.3092,
       "step": 2132
+    },
+    {
+      "epoch": 0.8264826567416742,
+      "grad_norm": 0.1986715942621231,
+      "learning_rate": 7.518030839775536e-07,
+      "loss": 1.3829,
+      "step": 2133
+    },
+    {
+      "epoch": 0.8268701310298794,
+      "grad_norm": 0.18789222836494446,
+      "learning_rate": 7.485295391120823e-07,
+      "loss": 1.313,
+      "step": 2134
+    },
+    {
+      "epoch": 0.8272576053180846,
+      "grad_norm": 0.18721547722816467,
+      "learning_rate": 7.452625600022629e-07,
+      "loss": 1.3803,
+      "step": 2135
+    },
+    {
+      "epoch": 0.8276450796062897,
+      "grad_norm": 0.20958904922008514,
+      "learning_rate": 7.420021516934539e-07,
+      "loss": 1.3736,
+      "step": 2136
     }
   ],
   "logging_steps": 1.0,
       "attributes": {}
     }
   },
+  "total_flos": 2.1129038541824717e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null