Training in progress, step 2144, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +130 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e31a11b8093a7dc9efbf932e92cc2201b7cba0591316ca570ba6fc5cf01792fb
 size 2384234968

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5d6179563befe7adfc76c6862ae789878edfc301425a31f7c05e460c293e42c
 size 2384234968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a1c8f58f9175b9d2f69570c18408756e4438131f09244dd384c4cf26f3fe8f8
 size 4768662910

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c5fb3a478c067735c5106afc5a92aafb91022f2494eb9c1a86a597f9aad06d7
 size 4768662910

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d8715e1d936b90bedf8fa9339815a5475f6dd3a74a7e90e59a4e0f3a7bbb964
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2021d3136a5f60737933a3df1beba61e770d3dfaf0f9f8fec5ae750dacb73b71
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9325096165054202,
   "eval_steps": 100,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1808,6 +1808,132 @@
       "mean_token_accuracy": 0.9734833620488643,
       "num_tokens": 8192000.0,
       "step": 2000
     }
   ],
   "logging_steps": 10,
@@ -1822,12 +1948,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.1649856397312e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9996503088938105,
   "eval_steps": 100,
+  "global_step": 2144,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.9734833620488643,
       "num_tokens": 8192000.0,
       "step": 2000
+    },
+    {
+      "epoch": 0.9371721645879473,
+      "grad_norm": 0.8202114701271057,
+      "learning_rate": 3.4992223950233285e-06,
+      "loss": 0.1578,
+      "mean_token_accuracy": 0.9717221096158027,
+      "num_tokens": 8232960.0,
+      "step": 2010
+    },
+    {
+      "epoch": 0.9418347126704744,
+      "grad_norm": 0.731518566608429,
+      "learning_rate": 3.2400207361327116e-06,
+      "loss": 0.1481,
+      "mean_token_accuracy": 0.972725048661232,
+      "num_tokens": 8273920.0,
+      "step": 2020
+    },
+    {
+      "epoch": 0.9464972607530016,
+      "grad_norm": 1.1924803256988525,
+      "learning_rate": 2.9808190772420947e-06,
+      "loss": 0.1497,
+      "mean_token_accuracy": 0.9719667322933674,
+      "num_tokens": 8314880.0,
+      "step": 2030
+    },
+    {
+      "epoch": 0.9511598088355286,
+      "grad_norm": 0.8626778721809387,
+      "learning_rate": 2.721617418351478e-06,
+      "loss": 0.1553,
+      "mean_token_accuracy": 0.9710861027240754,
+      "num_tokens": 8355840.0,
+      "step": 2040
+    },
+    {
+      "epoch": 0.9558223569180557,
+      "grad_norm": 0.7663152813911438,
+      "learning_rate": 2.462415759460861e-06,
+      "loss": 0.1342,
+      "mean_token_accuracy": 0.9749266132712364,
+      "num_tokens": 8396800.0,
+      "step": 2050
+    },
+    {
+      "epoch": 0.9604849050005828,
+      "grad_norm": 0.9175160527229309,
+      "learning_rate": 2.2032141005702436e-06,
+      "loss": 0.1494,
+      "mean_token_accuracy": 0.9725293479859829,
+      "num_tokens": 8437760.0,
+      "step": 2060
+    },
+    {
+      "epoch": 0.9651474530831099,
+      "grad_norm": 0.6397636532783508,
+      "learning_rate": 1.9440124416796267e-06,
+      "loss": 0.1353,
+      "mean_token_accuracy": 0.9759050846099854,
+      "num_tokens": 8478720.0,
+      "step": 2070
+    },
+    {
+      "epoch": 0.969810001165637,
+      "grad_norm": 0.8350099325180054,
+      "learning_rate": 1.6848107827890098e-06,
+      "loss": 0.1424,
+      "mean_token_accuracy": 0.974143834412098,
+      "num_tokens": 8519680.0,
+      "step": 2080
+    },
+    {
+      "epoch": 0.9744725492481642,
+      "grad_norm": 0.7387396097183228,
+      "learning_rate": 1.4256091238983931e-06,
+      "loss": 0.1371,
+      "mean_token_accuracy": 0.9749266110360623,
+      "num_tokens": 8560640.0,
+      "step": 2090
+    },
+    {
+      "epoch": 0.9791350973306913,
+      "grad_norm": 0.7936656475067139,
+      "learning_rate": 1.1664074650077762e-06,
+      "loss": 0.1373,
+      "mean_token_accuracy": 0.9743150658905506,
+      "num_tokens": 8601600.0,
+      "step": 2100
+    },
+    {
+      "epoch": 0.9837976454132183,
+      "grad_norm": 0.7077323198318481,
+      "learning_rate": 9.072058061171591e-07,
+      "loss": 0.1429,
+      "mean_token_accuracy": 0.9742172166705132,
+      "num_tokens": 8642560.0,
+      "step": 2110
+    },
+    {
+      "epoch": 0.9884601934957454,
+      "grad_norm": 0.7181702256202698,
+      "learning_rate": 6.480041472265423e-07,
+      "loss": 0.1482,
+      "mean_token_accuracy": 0.9726516641676426,
+      "num_tokens": 8683520.0,
+      "step": 2120
+    },
+    {
+      "epoch": 0.9931227415782725,
+      "grad_norm": 0.7022804021835327,
+      "learning_rate": 3.888024883359254e-07,
+      "loss": 0.1486,
+      "mean_token_accuracy": 0.9724804282188415,
+      "num_tokens": 8724480.0,
+      "step": 2130
+    },
+    {
+      "epoch": 0.9977852896607996,
+      "grad_norm": 0.8867694735527039,
+      "learning_rate": 1.2960082944530845e-07,
+      "loss": 0.1601,
+      "mean_token_accuracy": 0.9704256311058999,
+      "num_tokens": 8765440.0,
+      "step": 2140
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.3208646057918464e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null