Training in progress, step 180, checkpoint

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66012ccb2e672254b4feae075afca231c263ad6547e502796d4b21e7df42afdf
 size 27280152

 version https://git-lfs.github.com/spec/v1
+oid sha256:38ee321f80ef10ff7f70c0b916088e62e63301e960bbb19329669742c451bdf3
 size 27280152

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a359c8fcf31918c8aa7a4cfb8dee8dbb4f0d51124b5e818a003d8f20fdbb21df
 size 54633978

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7558567ed3ae2948f27aa20eb53d1e2d55c3c362483785657b14d4c1df604c7
 size 54633978

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1109adbe12059c6c8cad0cd1f504ae1fe6537b864ec466879513aafd9c107a7c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ffa9c2c105bb48c868c99c24f95c12d4c685abd40387d72157912c716838a60
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb926ea15dfbbdfcbf10b4eeb7bd912f9e27d82ce5588f4af48957c623a6a487
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:894d0e48bf1444f129e12325905662a936cdeeb9fec3a46a0155b3b08f997b67
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e0466309d6ddfb8ea316cec28e551aa3aa5b773e47de4fb79fdcd97127d3d71
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc1255db14e553e59635768b23aee538de1fba4f2cb6eff8d03df681021ffb34
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7600950118764845,
   "eval_steps": 20,
-  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,6 +144,23 @@
       "eval_samples_per_second": 2.43,
       "eval_steps_per_second": 0.305,
       "step": 160
     }
   ],
   "logging_steps": 20,
@@ -163,7 +180,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.338847435948032e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8551068883610451,
   "eval_steps": 20,
+  "global_step": 180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.43,
       "eval_steps_per_second": 0.305,
       "step": 160
+    },
+    {
+      "epoch": 0.8551068883610451,
+      "grad_norm": 0.5900708436965942,
+      "learning_rate": 0.00018858031147626325,
+      "loss": 1.6863,
+      "step": 180
+    },
+    {
+      "epoch": 0.8551068883610451,
+      "eval_loss": 1.7521910667419434,
+      "eval_mean_token_accuracy": 0.5876746786401627,
+      "eval_num_tokens": 1133445.0,
+      "eval_runtime": 151.8321,
+      "eval_samples_per_second": 2.47,
+      "eval_steps_per_second": 0.31,
+      "step": 180
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 4.864305054302208e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null