Training in progress, step 20, checkpoint

Browse files

Files changed (7) hide show

checkpoint-20/adapter_config.json +6 -6
checkpoint-20/adapter_model.safetensors +1 -1
checkpoint-20/optimizer.pt +1 -1
checkpoint-20/rng_state.pth +1 -1
checkpoint-20/scheduler.pt +1 -1
checkpoint-20/trainer_state.json +32 -13
checkpoint-20/training_args.bin +1 -1

checkpoint-20/adapter_config.json CHANGED Viewed

@@ -28,16 +28,16 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "k_proj",
-    "gate_proj",
     "fc1",
     "q_proj",
-    "v_proj",
     "up_proj",
     "down_proj",
-    "out_proj",
-    "fc2"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "fc1",
+    "o_proj",
+    "fc2",
+    "out_proj",
     "q_proj",
+    "gate_proj",
     "up_proj",
     "down_proj",
+    "v_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

checkpoint-20/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e3cb107c01a3c7342a51f5cc8e1090b035ba2a13268420756a95f0d6114f559
 size 2839126480

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdda4478dbd5bd2e9d6fd020c8f89c04cf43e9c7cb839ca8e27ec76a4814e478
 size 2839126480

checkpoint-20/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505d1a7a195c72587d286c26793412f1e849b8018ee6d531b15e3ed2b296593e
 size 5678690152

 version https://git-lfs.github.com/spec/v1
+oid sha256:3351616e75477416da57bccd262ac5d6967d67143486d25c6ba87cc889a3b844
 size 5678690152

checkpoint-20/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a7c19685927162de419f962184a4ce06ccfb2a3441588f0dc66e1e7594957f3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:142e5c3a83ea588f932553c5bde7b70bfd98ed130ade922e3cb2e56b069aacf3
 size 14244

checkpoint-20/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46f456ef683d11200f96f387b8eee25e5e55fcabcb7dc642edf15ecd17d4d595
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:43b8a7fc5388dd8f77109db1fef12582ae17574ccfc9b9d15825551c90221061
 size 1064

checkpoint-20/trainer_state.json CHANGED Viewed

@@ -3,36 +3,55 @@
   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 0.38461538461538464,
-  "eval_steps": 20,
   "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 2.9819071292877197,
-      "learning_rate": 0.00017,
-      "loss": 2.4045,
-      "mean_token_accuracy": 0.9366250112652779,
       "num_tokens": 98240.0,
       "step": 20
     },
     {
       "epoch": 0.38461538461538464,
-      "eval_loss": 0.04179316386580467,
-      "eval_mean_token_accuracy": 0.9780769439843985,
       "eval_num_tokens": 98240.0,
-      "eval_runtime": 17.9013,
-      "eval_samples_per_second": 2.793,
-      "eval_steps_per_second": 0.726,
       "step": 20
     }
   ],
-  "logging_steps": 20,
-  "max_steps": 104,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 2,
   "save_steps": 20,
   "stateful_callbacks": {
     "TrainerControl": {

   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 0.38461538461538464,
+  "eval_steps": 10,
   "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
+    {
+      "epoch": 0.19230769230769232,
+      "grad_norm": 21.498493194580078,
+      "learning_rate": 0.00019470198675496689,
+      "loss": 14.4551,
+      "mean_token_accuracy": 0.6612499989569187,
+      "num_tokens": 49120.0,
+      "step": 10
+    },
+    {
+      "epoch": 0.19230769230769232,
+      "eval_loss": 1.0248602628707886,
+      "eval_mean_token_accuracy": 0.8969230560156015,
+      "eval_num_tokens": 49120.0,
+      "eval_runtime": 17.9265,
+      "eval_samples_per_second": 2.789,
+      "eval_steps_per_second": 0.725,
+      "step": 10
+    },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 6.777862071990967,
+      "learning_rate": 0.00018145695364238411,
+      "loss": 2.185,
+      "mean_token_accuracy": 0.9445000052452087,
       "num_tokens": 98240.0,
       "step": 20
     },
     {
       "epoch": 0.38461538461538464,
+      "eval_loss": 0.13720029592514038,
+      "eval_mean_token_accuracy": 0.9761538688953106,
       "eval_num_tokens": 98240.0,
+      "eval_runtime": 17.8731,
+      "eval_samples_per_second": 2.797,
+      "eval_steps_per_second": 0.727,
       "step": 20
     }
   ],
+  "logging_steps": 10,
+  "max_steps": 156,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
   "save_steps": 20,
   "stateful_callbacks": {
     "TrainerControl": {

checkpoint-20/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2661e9d16a322a8b69865bc4b33ea73e1b304a23fa93839c76bab58d65645833
 size 5816

 version https://git-lfs.github.com/spec/v1
+oid sha256:652c8ce42c66ba23561de837fae1da08a712795400ecc170511dcf7fa6b44940
 size 5816