Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92600951d2230c185e2f4b432ae7854191326564d2b3896429cd06dcc57d3479
 size 37762064

 version https://git-lfs.github.com/spec/v1
+oid sha256:12810d468258b5304f33fae2ca923f932d8f2f1f9d77d00d54be78b22bf0817e
 size 37762064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81f4341e9417cb9ad739edcbf3690894eb90f4a7883b39c4efaace24cedab31d
 size 19283770

 version https://git-lfs.github.com/spec/v1
+oid sha256:c41ac035eb21d1f99367307de0a21b21a31efec944e3d595df3dac603b49120a
 size 19283770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40eeb7dfe79107ab30a3df5ab797ee37d2bb792e86b7e9508ea9164419baee1f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fbfca1b4b6ded81771f66676f6dc6609585dd69c24c4161ef69050a6d989b68
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4a786186a574bdc543ff4b4563aab7c5e0b442c74c85899bb42a25553c5d0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca62d85cf5423834480d3c20680f93b185c8e6574a8a14021d285e0b05c7449f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.1347427368164062,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.5154639175257731,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 144.344,
       "eval_steps_per_second": 36.233,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4959928577949696.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.0797932147979736,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.6872852233676976,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 144.344,
       "eval_steps_per_second": 36.233,
       "step": 150
+    },
+    {
+      "epoch": 0.5189003436426117,
+      "grad_norm": 2705.450439453125,
+      "learning_rate": 9.319397726443026e-06,
+      "loss": 7.5747,
+      "step": 151
+    },
+    {
+      "epoch": 0.5223367697594502,
+      "grad_norm": 3762.812744140625,
+      "learning_rate": 8.962896471825342e-06,
+      "loss": 7.5746,
+      "step": 152
+    },
+    {
+      "epoch": 0.5257731958762887,
+      "grad_norm": 1911.854736328125,
+      "learning_rate": 8.61214655125809e-06,
+      "loss": 7.438,
+      "step": 153
+    },
+    {
+      "epoch": 0.5292096219931272,
+      "grad_norm": 1469.72119140625,
+      "learning_rate": 8.267243856267331e-06,
+      "loss": 7.2063,
+      "step": 154
+    },
+    {
+      "epoch": 0.5326460481099656,
+      "grad_norm": 1573.3465576171875,
+      "learning_rate": 7.928282679806052e-06,
+      "loss": 7.2888,
+      "step": 155
+    },
+    {
+      "epoch": 0.5360824742268041,
+      "grad_norm": 1803.7042236328125,
+      "learning_rate": 7.595355690475393e-06,
+      "loss": 7.4952,
+      "step": 156
+    },
+    {
+      "epoch": 0.5395189003436426,
+      "grad_norm": 1138.683837890625,
+      "learning_rate": 7.268553907189964e-06,
+      "loss": 7.2513,
+      "step": 157
+    },
+    {
+      "epoch": 0.5429553264604811,
+      "grad_norm": 1677.6661376953125,
+      "learning_rate": 6.947966674294236e-06,
+      "loss": 7.5826,
+      "step": 158
+    },
+    {
+      "epoch": 0.5463917525773195,
+      "grad_norm": 1217.319091796875,
+      "learning_rate": 6.6336816371366305e-06,
+      "loss": 7.5123,
+      "step": 159
+    },
+    {
+      "epoch": 0.5498281786941581,
+      "grad_norm": 1235.499755859375,
+      "learning_rate": 6.325784718108196e-06,
+      "loss": 7.6381,
+      "step": 160
+    },
+    {
+      "epoch": 0.5532646048109966,
+      "grad_norm": 1441.3863525390625,
+      "learning_rate": 6.0243600931522595e-06,
+      "loss": 7.5427,
+      "step": 161
+    },
+    {
+      "epoch": 0.5567010309278351,
+      "grad_norm": 1629.83935546875,
+      "learning_rate": 5.72949016875158e-06,
+      "loss": 7.4876,
+      "step": 162
+    },
+    {
+      "epoch": 0.5601374570446735,
+      "grad_norm": 1260.6104736328125,
+      "learning_rate": 5.44125555939923e-06,
+      "loss": 7.7081,
+      "step": 163
+    },
+    {
+      "epoch": 0.563573883161512,
+      "grad_norm": 1765.477294921875,
+      "learning_rate": 5.159735065559399e-06,
+      "loss": 7.521,
+      "step": 164
+    },
+    {
+      "epoch": 0.5670103092783505,
+      "grad_norm": 2019.567626953125,
+      "learning_rate": 4.885005652124144e-06,
+      "loss": 7.6643,
+      "step": 165
+    },
+    {
+      "epoch": 0.570446735395189,
+      "grad_norm": 1756.0921630859375,
+      "learning_rate": 4.617142427371934e-06,
+      "loss": 7.7939,
+      "step": 166
+    },
+    {
+      "epoch": 0.5738831615120275,
+      "grad_norm": 1463.5172119140625,
+      "learning_rate": 4.3562186224338265e-06,
+      "loss": 7.6615,
+      "step": 167
+    },
+    {
+      "epoch": 0.5773195876288659,
+      "grad_norm": 1783.8548583984375,
+      "learning_rate": 4.102305571272783e-06,
+      "loss": 7.4967,
+      "step": 168
+    },
+    {
+      "epoch": 0.5807560137457045,
+      "grad_norm": 1875.7342529296875,
+      "learning_rate": 3.855472691181678e-06,
+      "loss": 7.4725,
+      "step": 169
+    },
+    {
+      "epoch": 0.584192439862543,
+      "grad_norm": 2582.930908203125,
+      "learning_rate": 3.615787463805331e-06,
+      "loss": 8.2118,
+      "step": 170
+    },
+    {
+      "epoch": 0.5876288659793815,
+      "grad_norm": 1897.54638671875,
+      "learning_rate": 3.383315416691646e-06,
+      "loss": 8.2408,
+      "step": 171
+    },
+    {
+      "epoch": 0.5910652920962199,
+      "grad_norm": 2314.038330078125,
+      "learning_rate": 3.158120105377096e-06,
+      "loss": 7.6892,
+      "step": 172
+    },
+    {
+      "epoch": 0.5945017182130584,
+      "grad_norm": 1786.5667724609375,
+      "learning_rate": 2.940263096011233e-06,
+      "loss": 8.3423,
+      "step": 173
+    },
+    {
+      "epoch": 0.5979381443298969,
+      "grad_norm": 2857.995849609375,
+      "learning_rate": 2.729803948525125e-06,
+      "loss": 7.616,
+      "step": 174
+    },
+    {
+      "epoch": 0.6013745704467354,
+      "grad_norm": 2287.089111328125,
+      "learning_rate": 2.526800200348275e-06,
+      "loss": 8.6003,
+      "step": 175
+    },
+    {
+      "epoch": 0.6048109965635738,
+      "grad_norm": 1499.41015625,
+      "learning_rate": 2.3313073506784575e-06,
+      "loss": 7.7733,
+      "step": 176
+    },
+    {
+      "epoch": 0.6082474226804123,
+      "grad_norm": 2719.6171875,
+      "learning_rate": 2.143378845308791e-06,
+      "loss": 8.3946,
+      "step": 177
+    },
+    {
+      "epoch": 0.6116838487972509,
+      "grad_norm": 2311.212158203125,
+      "learning_rate": 1.9630660620161777e-06,
+      "loss": 8.9659,
+      "step": 178
+    },
+    {
+      "epoch": 0.6151202749140894,
+      "grad_norm": 2324.716552734375,
+      "learning_rate": 1.790418296515165e-06,
+      "loss": 8.1793,
+      "step": 179
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 2077.054443359375,
+      "learning_rate": 1.625482748980961e-06,
+      "loss": 8.5296,
+      "step": 180
+    },
+    {
+      "epoch": 0.6219931271477663,
+      "grad_norm": 3390.5927734375,
+      "learning_rate": 1.4683045111453942e-06,
+      "loss": 8.2995,
+      "step": 181
+    },
+    {
+      "epoch": 0.6254295532646048,
+      "grad_norm": 2156.932373046875,
+      "learning_rate": 1.3189265539692707e-06,
+      "loss": 8.547,
+      "step": 182
+    },
+    {
+      "epoch": 0.6288659793814433,
+      "grad_norm": 2244.710205078125,
+      "learning_rate": 1.1773897158945557e-06,
+      "loss": 7.3688,
+      "step": 183
+    },
+    {
+      "epoch": 0.6323024054982818,
+      "grad_norm": 3128.6435546875,
+      "learning_rate": 1.0437326916795432e-06,
+      "loss": 9.2353,
+      "step": 184
+    },
+    {
+      "epoch": 0.6357388316151202,
+      "grad_norm": 3524.7509765625,
+      "learning_rate": 9.179920218200888e-07,
+      "loss": 8.7623,
+      "step": 185
+    },
+    {
+      "epoch": 0.6391752577319587,
+      "grad_norm": 4061.834716796875,
+      "learning_rate": 8.002020825598277e-07,
+      "loss": 8.2449,
+      "step": 186
+    },
+    {
+      "epoch": 0.6426116838487973,
+      "grad_norm": 4144.5556640625,
+      "learning_rate": 6.90395076492022e-07,
+      "loss": 8.4433,
+      "step": 187
+    },
+    {
+      "epoch": 0.6460481099656358,
+      "grad_norm": 3348.039306640625,
+      "learning_rate": 5.886010237557194e-07,
+      "loss": 7.4949,
+      "step": 188
+    },
+    {
+      "epoch": 0.6494845360824743,
+      "grad_norm": 2904.456298828125,
+      "learning_rate": 4.94847753828529e-07,
+      "loss": 7.7912,
+      "step": 189
+    },
+    {
+      "epoch": 0.6529209621993127,
+      "grad_norm": 3423.37353515625,
+      "learning_rate": 4.091608979183303e-07,
+      "loss": 8.2337,
+      "step": 190
+    },
+    {
+      "epoch": 0.6563573883161512,
+      "grad_norm": 2644.664794921875,
+      "learning_rate": 3.315638819559452e-07,
+      "loss": 7.876,
+      "step": 191
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 4483.470703125,
+      "learning_rate": 2.6207792019074414e-07,
+      "loss": 8.5568,
+      "step": 192
+    },
+    {
+      "epoch": 0.6632302405498282,
+      "grad_norm": 8579.669921875,
+      "learning_rate": 2.0072200939085573e-07,
+      "loss": 8.8126,
+      "step": 193
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 7409.6220703125,
+      "learning_rate": 1.475129236496575e-07,
+      "loss": 7.9544,
+      "step": 194
+    },
+    {
+      "epoch": 0.6701030927835051,
+      "grad_norm": 4703.0517578125,
+      "learning_rate": 1.0246520979990459e-07,
+      "loss": 8.26,
+      "step": 195
+    },
+    {
+      "epoch": 0.6735395189003437,
+      "grad_norm": 4779.853515625,
+      "learning_rate": 6.559118343676396e-08,
+      "loss": 8.2973,
+      "step": 196
+    },
+    {
+      "epoch": 0.6769759450171822,
+      "grad_norm": 6850.4453125,
+      "learning_rate": 3.690092555085789e-08,
+      "loss": 8.3535,
+      "step": 197
+    },
+    {
+      "epoch": 0.6804123711340206,
+      "grad_norm": 5519.74365234375,
+      "learning_rate": 1.640227977221853e-08,
+      "loss": 8.6194,
+      "step": 198
+    },
+    {
+      "epoch": 0.6838487972508591,
+      "grad_norm": 7148.4296875,
+      "learning_rate": 4.1008502259298755e-09,
+      "loss": 10.8181,
+      "step": 199
+    },
+    {
+      "epoch": 0.6872852233676976,
+      "grad_norm": 3729.072509765625,
+      "learning_rate": 0.0,
+      "loss": 13.0084,
+      "step": 200
+    },
+    {
+      "epoch": 0.6872852233676976,
+      "eval_loss": 2.0797932147979736,
+      "eval_runtime": 3.4064,
+      "eval_samples_per_second": 143.848,
+      "eval_steps_per_second": 36.109,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6628506133856256.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null