Training in progress, step 300, checkpoint

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4853732d5c5187f500f15cbe605a09c1f694db0b89d14223ed94327a34fe118
 size 27280152

 version https://git-lfs.github.com/spec/v1
+oid sha256:eba7056fd399e1059a89eabd49a56be2ed8f342d50bb40715053752c47047dc9
 size 27280152

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1fe25557fe1cdae29d9cd56ed99d0de30b337173d1401d64e357843aba2b2be
 size 54633978

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0f52fe00bc97bd174d591edb500019cf5ee2c5aa778a33cd64699eaaba661ed
 size 54633978

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16cc509c9606ba9de627d27965415804a7297fe216a129b8cb0b83ce075eaccd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d03ccbe83e3e708cdc550a3d9c0d87bf08436a7e1a6b3f8de5d854497b9be5e6
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:393afb4cbd250bf22d5d5ace359604e7205b76612aad4125a76131e274b10710
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:92873c8c89778fe11b8eeb338a181eefdf056f2f8096c36bf259c3fd791afb34
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c27036b199a4c30a73639d9297e51f7392b3e00ab56964e4ff5bf53b55c162dc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:62db676ea589f2e897f3ed22ee3133a534ed12d0dd978bfaec8bc59572ea976b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.49777777777777776,
   "eval_steps": 20,
-  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -246,6 +246,23 @@
       "eval_samples_per_second": 3.3,
       "eval_steps_per_second": 0.413,
       "step": 280
     }
   ],
   "logging_steps": 20,
@@ -265,7 +282,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.943388413394944e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5333333333333333,
   "eval_steps": 20,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.3,
       "eval_steps_per_second": 0.413,
       "step": 280
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 1.1581426858901978,
+      "learning_rate": 0.00016438896236023375,
+      "loss": 1.6868,
+      "step": 300
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "eval_loss": 1.8817518949508667,
+      "eval_mean_token_accuracy": 0.5664287745952606,
+      "eval_num_tokens": 1429137.0,
+      "eval_runtime": 303.9322,
+      "eval_samples_per_second": 3.29,
+      "eval_steps_per_second": 0.411,
+      "step": 300
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 6.137234291392512e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null