Training in progress, step 112, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33a5cf1ddf5e24ebda98f15082d6444331484a209797ef29f375faf287b2faa4
 size 147770496

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a382456c446f34b195fbfd702a439be3a1e58d2a36d956d357f01865a9f79d8
 size 147770496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b2636679b94b781d00a8346afff4ec981364bfdb4a51d2a9b5522d462db1409
 size 74440308

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f98750a3e57cc2fc9efbb170ea0c2d4e6e07105ecd32dc2e6b33fd619403c2a
 size 74440308

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3004543ff75789c80835e4aaafc7cbfb049f5ef29584c27910625b7aef3b36b6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:de022fc7aa1925c72ae7474cead510ec1b0250a1879a157882455c4937721e6f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:359057d9660fe2d14af7a17c1643f1a9bad16dc0d152f01e09a837e37cbd211e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:db8ff398e296fc80d27c81c792fc561b8565bd13d3296779cf24d36d13c2df0f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.04184722312616072,
   "eval_steps": 500,
-  "global_step": 108,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -763,6 +763,34 @@
       "learning_rate": 9.98789718527821e-06,
       "loss": 1.786,
       "step": 108
     }
   ],
   "logging_steps": 1.0,
@@ -782,7 +810,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0647698517059789e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.04339712027898149,
   "eval_steps": 500,
+  "global_step": 112,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.98789718527821e-06,
       "loss": 1.786,
       "step": 108
+    },
+    {
+      "epoch": 0.04223469741436591,
+      "grad_norm": 0.13741853833198547,
+      "learning_rate": 9.987461264941803e-06,
+      "loss": 1.7147,
+      "step": 109
+    },
+    {
+      "epoch": 0.042622171702571106,
+      "grad_norm": 0.11314312368631363,
+      "learning_rate": 9.987017642222821e-06,
+      "loss": 1.7257,
+      "step": 110
+    },
+    {
+      "epoch": 0.04300964599077629,
+      "grad_norm": 0.12022025883197784,
+      "learning_rate": 9.986566317806374e-06,
+      "loss": 1.7829,
+      "step": 111
+    },
+    {
+      "epoch": 0.04339712027898149,
+      "grad_norm": 0.11993694305419922,
+      "learning_rate": 9.986107292389465e-06,
+      "loss": 1.7713,
+      "step": 112
     }
   ],
   "logging_steps": 1.0,
       "attributes": {}
     }
   },
+  "total_flos": 1.1051198111020954e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null