Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:919873168567a3022611e09f7c783de7398b8ccfb2edcb9342d17c7d56e4fee3
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1aa4ef5ac1f98f09ef1661af5b6e8cc54abde8931a06ff3e4f9c52a3ef43c6d
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95b6827e46f47e00cf57225a0c5a705ce91217e06cc8424f6f73e7c59b9b1c86
 size 243802484

 version https://git-lfs.github.com/spec/v1
+oid sha256:741430405159cd87ce3fdcdc0ce6ab889f1063931b0bd46504db77baf1e07940
 size 243802484

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91122c57712ef968c2ddad3b58f988626959c35a95aaa6ce01a88a7926613701
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:95e164db2df0e6c7a8d53d754342c4b79b96c43d67ec9389a3353770808b2b47
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed88ff7cbe02e7c054abfce3510882248a01917e1eb8cf2c358f612dd7312357
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c09a3e270daa077e38bac4ad605b0e6c9b43938bcf49dc3d62c3671f11248c14
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.0011682260083034635,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.11614401858304298,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 20.394,
       "eval_steps_per_second": 5.098,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.906155760182886e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.0010981361847370863,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1548586914440573,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 20.394,
       "eval_steps_per_second": 5.098,
       "step": 150
+    },
+    {
+      "epoch": 0.11691831204026326,
+      "grad_norm": 8.589386940002441,
+      "learning_rate": 1.397909658966454e-05,
+      "loss": 0.0052,
+      "step": 151
+    },
+    {
+      "epoch": 0.11769260549748355,
+      "grad_norm": 0.01945851743221283,
+      "learning_rate": 1.3444344707738015e-05,
+      "loss": 0.0006,
+      "step": 152
+    },
+    {
+      "epoch": 0.11846689895470383,
+      "grad_norm": 0.7007508277893066,
+      "learning_rate": 1.2918219826887136e-05,
+      "loss": 0.0015,
+      "step": 153
+    },
+    {
+      "epoch": 0.11924119241192412,
+      "grad_norm": 37.36264419555664,
+      "learning_rate": 1.2400865784400998e-05,
+      "loss": 0.0022,
+      "step": 154
+    },
+    {
+      "epoch": 0.1200154858691444,
+      "grad_norm": 1.3579168319702148,
+      "learning_rate": 1.189242401970908e-05,
+      "loss": 0.002,
+      "step": 155
+    },
+    {
+      "epoch": 0.1207897793263647,
+      "grad_norm": 0.004012103192508221,
+      "learning_rate": 1.139303353571309e-05,
+      "loss": 0.0001,
+      "step": 156
+    },
+    {
+      "epoch": 0.12156407278358498,
+      "grad_norm": 0.07467560470104218,
+      "learning_rate": 1.0902830860784946e-05,
+      "loss": 0.0019,
+      "step": 157
+    },
+    {
+      "epoch": 0.12233836624080527,
+      "grad_norm": 0.0046264780685305595,
+      "learning_rate": 1.0421950011441355e-05,
+      "loss": 0.0001,
+      "step": 158
+    },
+    {
+      "epoch": 0.12311265969802555,
+      "grad_norm": 0.0015560417668893933,
+      "learning_rate": 9.950522455704946e-06,
+      "loss": 0.0,
+      "step": 159
+    },
+    {
+      "epoch": 0.12388695315524584,
+      "grad_norm": 0.0010828773956745863,
+      "learning_rate": 9.488677077162294e-06,
+      "loss": 0.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.12466124661246612,
+      "grad_norm": 0.0019125222461298108,
+      "learning_rate": 9.03654013972839e-06,
+      "loss": 0.0001,
+      "step": 161
+    },
+    {
+      "epoch": 0.1254355400696864,
+      "grad_norm": 0.11289074271917343,
+      "learning_rate": 8.59423525312737e-06,
+      "loss": 0.0034,
+      "step": 162
+    },
+    {
+      "epoch": 0.1262098335269067,
+      "grad_norm": 0.0018281425582244992,
+      "learning_rate": 8.161883339098845e-06,
+      "loss": 0.0,
+      "step": 163
+    },
+    {
+      "epoch": 0.12698412698412698,
+      "grad_norm": 0.0015775542706251144,
+      "learning_rate": 7.739602598339099e-06,
+      "loss": 0.0,
+      "step": 164
+    },
+    {
+      "epoch": 0.12775842044134728,
+      "grad_norm": 0.0017349289264529943,
+      "learning_rate": 7.327508478186216e-06,
+      "loss": 0.0,
+      "step": 165
+    },
+    {
+      "epoch": 0.12853271389856755,
+      "grad_norm": 0.04309145361185074,
+      "learning_rate": 6.925713641057902e-06,
+      "loss": 0.0008,
+      "step": 166
+    },
+    {
+      "epoch": 0.12930700735578785,
+      "grad_norm": 0.0019816451240330935,
+      "learning_rate": 6.53432793365074e-06,
+      "loss": 0.0,
+      "step": 167
+    },
+    {
+      "epoch": 0.13008130081300814,
+      "grad_norm": 0.06418117135763168,
+      "learning_rate": 6.153458356909174e-06,
+      "loss": 0.0005,
+      "step": 168
+    },
+    {
+      "epoch": 0.13085559427022841,
+      "grad_norm": 0.017173679545521736,
+      "learning_rate": 5.783209036772518e-06,
+      "loss": 0.0002,
+      "step": 169
+    },
+    {
+      "epoch": 0.1316298877274487,
+      "grad_norm": 0.11571363359689713,
+      "learning_rate": 5.423681195707997e-06,
+      "loss": 0.0005,
+      "step": 170
+    },
+    {
+      "epoch": 0.13240418118466898,
+      "grad_norm": 0.0009111211984418333,
+      "learning_rate": 5.074973125037469e-06,
+      "loss": 0.0,
+      "step": 171
+    },
+    {
+      "epoch": 0.13317847464188928,
+      "grad_norm": 0.06231530383229256,
+      "learning_rate": 4.737180158065644e-06,
+      "loss": 0.0005,
+      "step": 172
+    },
+    {
+      "epoch": 0.13395276809910955,
+      "grad_norm": 0.0028604816179722548,
+      "learning_rate": 4.41039464401685e-06,
+      "loss": 0.0001,
+      "step": 173
+    },
+    {
+      "epoch": 0.13472706155632985,
+      "grad_norm": 0.00407991511747241,
+      "learning_rate": 4.094705922787687e-06,
+      "loss": 0.0001,
+      "step": 174
+    },
+    {
+      "epoch": 0.13550135501355012,
+      "grad_norm": 0.012540704570710659,
+      "learning_rate": 3.7902003005224126e-06,
+      "loss": 0.0001,
+      "step": 175
+    },
+    {
+      "epoch": 0.13627564847077042,
+      "grad_norm": 0.040631502866744995,
+      "learning_rate": 3.4969610260176865e-06,
+      "loss": 0.0002,
+      "step": 176
+    },
+    {
+      "epoch": 0.13704994192799072,
+      "grad_norm": 0.0855015367269516,
+      "learning_rate": 3.2150682679631867e-06,
+      "loss": 0.0041,
+      "step": 177
+    },
+    {
+      "epoch": 0.137824235385211,
+      "grad_norm": 0.002300119958817959,
+      "learning_rate": 2.9445990930242668e-06,
+      "loss": 0.0001,
+      "step": 178
+    },
+    {
+      "epoch": 0.1385985288424313,
+      "grad_norm": 0.004780174233019352,
+      "learning_rate": 2.6856274447727475e-06,
+      "loss": 0.0001,
+      "step": 179
+    },
+    {
+      "epoch": 0.13937282229965156,
+      "grad_norm": 0.02303953841328621,
+      "learning_rate": 2.4382241234714413e-06,
+      "loss": 0.0003,
+      "step": 180
+    },
+    {
+      "epoch": 0.14014711575687186,
+      "grad_norm": 0.021668056026101112,
+      "learning_rate": 2.2024567667180914e-06,
+      "loss": 0.0003,
+      "step": 181
+    },
+    {
+      "epoch": 0.14092140921409213,
+      "grad_norm": 0.010320520959794521,
+      "learning_rate": 1.978389830953906e-06,
+      "loss": 0.0002,
+      "step": 182
+    },
+    {
+      "epoch": 0.14169570267131243,
+      "grad_norm": 0.00375885097309947,
+      "learning_rate": 1.7660845738418336e-06,
+      "loss": 0.0001,
+      "step": 183
+    },
+    {
+      "epoch": 0.14246999612853273,
+      "grad_norm": 0.13294894993305206,
+      "learning_rate": 1.5655990375193147e-06,
+      "loss": 0.0009,
+      "step": 184
+    },
+    {
+      "epoch": 0.143244289585753,
+      "grad_norm": 0.12707974016666412,
+      "learning_rate": 1.3769880327301332e-06,
+      "loss": 0.0093,
+      "step": 185
+    },
+    {
+      "epoch": 0.1440185830429733,
+      "grad_norm": 0.0048987469635903835,
+      "learning_rate": 1.2003031238397417e-06,
+      "loss": 0.0001,
+      "step": 186
+    },
+    {
+      "epoch": 0.14479287650019357,
+      "grad_norm": 0.0014035734347999096,
+      "learning_rate": 1.035592614738033e-06,
+      "loss": 0.0,
+      "step": 187
+    },
+    {
+      "epoch": 0.14556716995741387,
+      "grad_norm": 0.001624911092221737,
+      "learning_rate": 8.829015356335791e-07,
+      "loss": 0.0,
+      "step": 188
+    },
+    {
+      "epoch": 0.14634146341463414,
+      "grad_norm": 0.004533092956990004,
+      "learning_rate": 7.422716307427936e-07,
+      "loss": 0.0001,
+      "step": 189
+    },
+    {
+      "epoch": 0.14711575687185444,
+      "grad_norm": 0.0016615334898233414,
+      "learning_rate": 6.137413468774955e-07,
+      "loss": 0.0,
+      "step": 190
+    },
+    {
+      "epoch": 0.1478900503290747,
+      "grad_norm": 0.00272984872572124,
+      "learning_rate": 4.973458229339179e-07,
+      "loss": 0.0001,
+      "step": 191
+    },
+    {
+      "epoch": 0.148664343786295,
+      "grad_norm": 0.08710943907499313,
+      "learning_rate": 3.9311688028611627e-07,
+      "loss": 0.0005,
+      "step": 192
+    },
+    {
+      "epoch": 0.1494386372435153,
+      "grad_norm": 0.005952900741249323,
+      "learning_rate": 3.010830140862836e-07,
+      "loss": 0.0001,
+      "step": 193
+    },
+    {
+      "epoch": 0.15021293070073558,
+      "grad_norm": 0.004029720555990934,
+      "learning_rate": 2.2126938547448627e-07,
+      "loss": 0.0001,
+      "step": 194
+    },
+    {
+      "epoch": 0.15098722415795587,
+      "grad_norm": 0.0068871923722326756,
+      "learning_rate": 1.536978146998569e-07,
+      "loss": 0.0001,
+      "step": 195
+    },
+    {
+      "epoch": 0.15176151761517614,
+      "grad_norm": 0.0033995830453932285,
+      "learning_rate": 9.838677515514594e-08,
+      "loss": 0.0001,
+      "step": 196
+    },
+    {
+      "epoch": 0.15253581107239644,
+      "grad_norm": 0.004005973227322102,
+      "learning_rate": 5.5351388326286834e-08,
+      "loss": 0.0001,
+      "step": 197
+    },
+    {
+      "epoch": 0.15331010452961671,
+      "grad_norm": 0.03095901943743229,
+      "learning_rate": 2.4603419658327797e-08,
+      "loss": 0.0004,
+      "step": 198
+    },
+    {
+      "epoch": 0.154084397986837,
+      "grad_norm": 0.0032219612039625645,
+      "learning_rate": 6.151275338894813e-09,
+      "loss": 0.0001,
+      "step": 199
+    },
+    {
+      "epoch": 0.1548586914440573,
+      "grad_norm": 0.0060548316687345505,
+      "learning_rate": 0.0,
+      "loss": 0.0001,
+      "step": 200
+    },
+    {
+      "epoch": 0.1548586914440573,
+      "eval_loss": 0.0010981361847370863,
+      "eval_runtime": 106.6826,
+      "eval_samples_per_second": 20.397,
+      "eval_steps_per_second": 5.099,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1865389622991258e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null