Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12b08d1715856b5ca2a529d7640909b2a5c7d028e9075cb1450868463bb32dc7
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f84e7b63436671da375a41d5bbf5e1239f4c8aec811167813a8bf31b1ced550
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2960da7bc056067209bc6af4a3d5c67f3ff6ccafacc272498cd8ae6d7f0b35b9
 size 591203178

 version https://git-lfs.github.com/spec/v1
+oid sha256:215b2efb297e43e0b0bd6527f407a3a1c19dabcbcffd428b261c614017f3386c
 size 591203178

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:887938d50f919c0bc26ff7f587b338999d3f54bebedefc7121b973f64dd1939d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d264ea2b31efc6175c9b88c292060b1e331f84753196c51fa4b1c3c0bd9a5f38
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.32786885245901637,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -163,6 +163,84 @@
       "eval_samples_per_second": 1.092,
       "eval_steps_per_second": 1.092,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -182,7 +260,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3763915769856000.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4918032786885246,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.092,
       "eval_steps_per_second": 1.092,
       "step": 200
+    },
+    {
+      "epoch": 0.3442622950819672,
+      "grad_norm": 1.2340621948242188,
+      "learning_rate": 8.290001430804025e-05,
+      "loss": 0.1302,
+      "step": 210
+    },
+    {
+      "epoch": 0.36065573770491804,
+      "grad_norm": 0.7683020234107971,
+      "learning_rate": 8.06928070911306e-05,
+      "loss": 0.1771,
+      "step": 220
+    },
+    {
+      "epoch": 0.3770491803278688,
+      "grad_norm": 1.2670581340789795,
+      "learning_rate": 7.838512137210565e-05,
+      "loss": 0.1442,
+      "step": 230
+    },
+    {
+      "epoch": 0.39344262295081966,
+      "grad_norm": 0.6768158078193665,
+      "learning_rate": 7.598451178106857e-05,
+      "loss": 0.1389,
+      "step": 240
+    },
+    {
+      "epoch": 0.4098360655737705,
+      "grad_norm": 1.129391074180603,
+      "learning_rate": 7.3498837151366e-05,
+      "loss": 0.1385,
+      "step": 250
+    },
+    {
+      "epoch": 0.4262295081967213,
+      "grad_norm": 1.101136326789856,
+      "learning_rate": 7.093623479226385e-05,
+      "loss": 0.1564,
+      "step": 260
+    },
+    {
+      "epoch": 0.4426229508196721,
+      "grad_norm": 0.2261950969696045,
+      "learning_rate": 6.830509384998114e-05,
+      "loss": 0.1431,
+      "step": 270
+    },
+    {
+      "epoch": 0.45901639344262296,
+      "grad_norm": 0.3458766043186188,
+      "learning_rate": 6.561402784428974e-05,
+      "loss": 0.1206,
+      "step": 280
+    },
+    {
+      "epoch": 0.47540983606557374,
+      "grad_norm": 1.1199126243591309,
+      "learning_rate": 6.287184647058648e-05,
+      "loss": 0.11,
+      "step": 290
+    },
+    {
+      "epoch": 0.4918032786885246,
+      "grad_norm": 0.45597025752067566,
+      "learning_rate": 6.0087526759748304e-05,
+      "loss": 0.1191,
+      "step": 300
+    },
+    {
+      "epoch": 0.4918032786885246,
+      "eval_loss": 0.1460057497024536,
+      "eval_runtime": 62.2673,
+      "eval_samples_per_second": 1.092,
+      "eval_steps_per_second": 1.092,
+      "step": 300
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 5636040300257280.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null