Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52688682ab160ada36c8b8985844039b3b87c62dc055461bfb1835a2ca535fe3
 size 231448

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fd36fe83d13f2f5e86d115d8efe5af8711842010cdbdfef7e816744f06b134c
 size 231448

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:552e3d04aec0817942545aabf4cb786f3da7e2fa9515ab30f987dc1f796ced31
 size 472686

 version https://git-lfs.github.com/spec/v1
+oid sha256:72d82aaac7c49e87f9b1b263662080a89333669dcaad88fd14d869e627cd00f8
 size 472686

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8de3ef895c305cfe7095cb1e41b494c9e8ef1a08aef7efc38ac2bc841e66a3aa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ece1639867b1899fad05d3e0ce39e1eb4e584d09fa186927058f1a8be4673b3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.327882766723633,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.3456221198156682,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 300.053,
       "eval_steps_per_second": 150.232,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 38962543460352.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.327311515808105,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.4608294930875576,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 300.053,
       "eval_steps_per_second": 150.232,
       "step": 150
+    },
+    {
+      "epoch": 0.347926267281106,
+      "grad_norm": 0.10251141339540482,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 10.3284,
+      "step": 151
+    },
+    {
+      "epoch": 0.35023041474654376,
+      "grad_norm": 0.08700370043516159,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 10.3343,
+      "step": 152
+    },
+    {
+      "epoch": 0.35253456221198154,
+      "grad_norm": 0.12526540458202362,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 10.3316,
+      "step": 153
+    },
+    {
+      "epoch": 0.3548387096774194,
+      "grad_norm": 0.0898861438035965,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 10.3252,
+      "step": 154
+    },
+    {
+      "epoch": 0.35714285714285715,
+      "grad_norm": 0.11189351230859756,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 10.323,
+      "step": 155
+    },
+    {
+      "epoch": 0.35944700460829493,
+      "grad_norm": 0.11552563309669495,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 10.3247,
+      "step": 156
+    },
+    {
+      "epoch": 0.3617511520737327,
+      "grad_norm": 0.12044576555490494,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 10.3281,
+      "step": 157
+    },
+    {
+      "epoch": 0.3640552995391705,
+      "grad_norm": 0.11070229858160019,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 10.3338,
+      "step": 158
+    },
+    {
+      "epoch": 0.3663594470046083,
+      "grad_norm": 0.11015692353248596,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 10.3278,
+      "step": 159
+    },
+    {
+      "epoch": 0.3686635944700461,
+      "grad_norm": 0.10629776120185852,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 10.3247,
+      "step": 160
+    },
+    {
+      "epoch": 0.3709677419354839,
+      "grad_norm": 0.10437588393688202,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 10.3252,
+      "step": 161
+    },
+    {
+      "epoch": 0.37327188940092165,
+      "grad_norm": 0.09978479146957397,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 10.3267,
+      "step": 162
+    },
+    {
+      "epoch": 0.37557603686635943,
+      "grad_norm": 0.09733416140079498,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 10.327,
+      "step": 163
+    },
+    {
+      "epoch": 0.3778801843317972,
+      "grad_norm": 0.09752761572599411,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 10.3341,
+      "step": 164
+    },
+    {
+      "epoch": 0.38018433179723504,
+      "grad_norm": 0.12869524955749512,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 10.3326,
+      "step": 165
+    },
+    {
+      "epoch": 0.3824884792626728,
+      "grad_norm": 0.11425069719552994,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 10.3302,
+      "step": 166
+    },
+    {
+      "epoch": 0.3847926267281106,
+      "grad_norm": 0.11582373827695847,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 10.3266,
+      "step": 167
+    },
+    {
+      "epoch": 0.3870967741935484,
+      "grad_norm": 0.09680487960577011,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 10.3279,
+      "step": 168
+    },
+    {
+      "epoch": 0.38940092165898615,
+      "grad_norm": 0.09466324746608734,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 10.3284,
+      "step": 169
+    },
+    {
+      "epoch": 0.391705069124424,
+      "grad_norm": 0.0974016785621643,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 10.329,
+      "step": 170
+    },
+    {
+      "epoch": 0.39400921658986177,
+      "grad_norm": 0.10970694571733475,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 10.33,
+      "step": 171
+    },
+    {
+      "epoch": 0.39631336405529954,
+      "grad_norm": 0.09148397296667099,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 10.3251,
+      "step": 172
+    },
+    {
+      "epoch": 0.3986175115207373,
+      "grad_norm": 0.109307199716568,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 10.3254,
+      "step": 173
+    },
+    {
+      "epoch": 0.4009216589861751,
+      "grad_norm": 0.15083882212638855,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 10.3316,
+      "step": 174
+    },
+    {
+      "epoch": 0.4032258064516129,
+      "grad_norm": 0.11527399718761444,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 10.3302,
+      "step": 175
+    },
+    {
+      "epoch": 0.4055299539170507,
+      "grad_norm": 0.0940781831741333,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 10.3206,
+      "step": 176
+    },
+    {
+      "epoch": 0.4078341013824885,
+      "grad_norm": 0.11266635358333588,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 10.3351,
+      "step": 177
+    },
+    {
+      "epoch": 0.41013824884792627,
+      "grad_norm": 0.1146661788225174,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 10.321,
+      "step": 178
+    },
+    {
+      "epoch": 0.41244239631336405,
+      "grad_norm": 0.10519295185804367,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 10.316,
+      "step": 179
+    },
+    {
+      "epoch": 0.4147465437788018,
+      "grad_norm": 0.10742777585983276,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 10.326,
+      "step": 180
+    },
+    {
+      "epoch": 0.41705069124423966,
+      "grad_norm": 0.10106495022773743,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 10.3198,
+      "step": 181
+    },
+    {
+      "epoch": 0.41935483870967744,
+      "grad_norm": 0.09719803929328918,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 10.3187,
+      "step": 182
+    },
+    {
+      "epoch": 0.4216589861751152,
+      "grad_norm": 0.10572856664657593,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 10.3254,
+      "step": 183
+    },
+    {
+      "epoch": 0.423963133640553,
+      "grad_norm": 0.11672014743089676,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 10.3351,
+      "step": 184
+    },
+    {
+      "epoch": 0.42626728110599077,
+      "grad_norm": 0.10764797776937485,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 10.3289,
+      "step": 185
+    },
+    {
+      "epoch": 0.42857142857142855,
+      "grad_norm": 0.10186281055212021,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 10.3256,
+      "step": 186
+    },
+    {
+      "epoch": 0.4308755760368664,
+      "grad_norm": 0.11612407118082047,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 10.3336,
+      "step": 187
+    },
+    {
+      "epoch": 0.43317972350230416,
+      "grad_norm": 0.1148737370967865,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 10.3273,
+      "step": 188
+    },
+    {
+      "epoch": 0.43548387096774194,
+      "grad_norm": 0.10187527537345886,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 10.3245,
+      "step": 189
+    },
+    {
+      "epoch": 0.4377880184331797,
+      "grad_norm": 0.1042233407497406,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 10.3346,
+      "step": 190
+    },
+    {
+      "epoch": 0.4400921658986175,
+      "grad_norm": 0.10812435299158096,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 10.3263,
+      "step": 191
+    },
+    {
+      "epoch": 0.4423963133640553,
+      "grad_norm": 0.10974939912557602,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 10.3246,
+      "step": 192
+    },
+    {
+      "epoch": 0.4447004608294931,
+      "grad_norm": 0.11074187606573105,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 10.3176,
+      "step": 193
+    },
+    {
+      "epoch": 0.4470046082949309,
+      "grad_norm": 0.1112871989607811,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 10.3235,
+      "step": 194
+    },
+    {
+      "epoch": 0.44930875576036866,
+      "grad_norm": 0.09153088182210922,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 10.3312,
+      "step": 195
+    },
+    {
+      "epoch": 0.45161290322580644,
+      "grad_norm": 0.1320706158876419,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 10.3204,
+      "step": 196
+    },
+    {
+      "epoch": 0.4539170506912442,
+      "grad_norm": 0.105406753718853,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 10.3289,
+      "step": 197
+    },
+    {
+      "epoch": 0.45622119815668205,
+      "grad_norm": 0.08604098111391068,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 10.3316,
+      "step": 198
+    },
+    {
+      "epoch": 0.45852534562211983,
+      "grad_norm": 0.12545925378799438,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 10.3256,
+      "step": 199
+    },
+    {
+      "epoch": 0.4608294930875576,
+      "grad_norm": 0.13731050491333008,
+      "learning_rate": 0.0,
+      "loss": 10.3267,
+      "step": 200
+    },
+    {
+      "epoch": 0.4608294930875576,
+      "eval_loss": 10.327311515808105,
+      "eval_runtime": 2.4311,
+      "eval_samples_per_second": 300.689,
+      "eval_steps_per_second": 150.55,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 51769173344256.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null