Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +256 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ff045ac320d83e6646c6f6ab6832b60324d8abccf00a476635b8ad51c195138
 size 9450352

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab26f18e6314da78474ca938c1257abcb0d2dc24bf52ff5f89f7936a1b5b755c
 size 9450352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c9ea2cbf525af9c054129774c924e498aa89301d6a601575f3647cf19519500
 size 4906490

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a0468144609fbba619a69627d5a8d8d1502da18071b17d1a4de9a30f4be4020
 size 4906490

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa46734679de1685e01f36d430a9f749b2182c205f22d5f38bcbf4bc21f64491
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f557330525e75b8ec8859b7784404c001117d2a2896a23772c13160438b441ea
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d35a71cd96ab153cf58aa68810dc4405b5e1a7fbfef5501d233ae84e34aa51c4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:285055c203c0e51e433ff14d6fa6e0c364698ec978202e506a844a8372901f4a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.003874519933385447,
   "eval_steps": 38,
-  "global_step": 114,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -837,6 +837,258 @@
       "eval_samples_per_second": 25.008,
       "eval_steps_per_second": 12.505,
       "step": 114
     }
   ],
   "logging_steps": 1,
@@ -851,12 +1103,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 510358352560128.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.00509805254392822,
   "eval_steps": 38,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.008,
       "eval_steps_per_second": 12.505,
       "step": 114
+    },
+    {
+      "epoch": 0.0039085069503449685,
+      "grad_norm": 7067.63134765625,
+      "learning_rate": 5.857864376269051e-05,
+      "loss": 8.026,
+      "step": 115
+    },
+    {
+      "epoch": 0.00394249396730449,
+      "grad_norm": 10362.53515625,
+      "learning_rate": 5.544102723452171e-05,
+      "loss": 7.5678,
+      "step": 116
+    },
+    {
+      "epoch": 0.0039764809842640116,
+      "grad_norm": 3495.585693359375,
+      "learning_rate": 5.237620050253189e-05,
+      "loss": 7.1326,
+      "step": 117
+    },
+    {
+      "epoch": 0.004010468001223532,
+      "grad_norm": 10234.3955078125,
+      "learning_rate": 4.938570679927783e-05,
+      "loss": 8.7901,
+      "step": 118
+    },
+    {
+      "epoch": 0.004044455018183054,
+      "grad_norm": 4103.04052734375,
+      "learning_rate": 4.647105192839778e-05,
+      "loss": 8.4194,
+      "step": 119
+    },
+    {
+      "epoch": 0.004078442035142575,
+      "grad_norm": 6749.89208984375,
+      "learning_rate": 4.363370350639404e-05,
+      "loss": 7.9044,
+      "step": 120
+    },
+    {
+      "epoch": 0.004112429052102097,
+      "grad_norm": 3126.854248046875,
+      "learning_rate": 4.087509022364382e-05,
+      "loss": 7.5001,
+      "step": 121
+    },
+    {
+      "epoch": 0.004146416069061618,
+      "grad_norm": 4431.7529296875,
+      "learning_rate": 3.819660112501053e-05,
+      "loss": 9.3944,
+      "step": 122
+    },
+    {
+      "epoch": 0.00418040308602114,
+      "grad_norm": 5783.607421875,
+      "learning_rate": 3.5599584910418035e-05,
+      "loss": 9.3595,
+      "step": 123
+    },
+    {
+      "epoch": 0.0042143901029806615,
+      "grad_norm": 4225.36279296875,
+      "learning_rate": 3.3085349255739474e-05,
+      "loss": 8.6062,
+      "step": 124
+    },
+    {
+      "epoch": 0.004248377119940183,
+      "grad_norm": 5815.947265625,
+      "learning_rate": 3.0655160154343174e-05,
+      "loss": 6.9025,
+      "step": 125
+    },
+    {
+      "epoch": 0.0042823641368997046,
+      "grad_norm": 4482.81884765625,
+      "learning_rate": 2.831024127962678e-05,
+      "loss": 8.1797,
+      "step": 126
+    },
+    {
+      "epoch": 0.004316351153859226,
+      "grad_norm": 6426.4365234375,
+      "learning_rate": 2.6051773368860934e-05,
+      "loss": 9.5757,
+      "step": 127
+    },
+    {
+      "epoch": 0.004350338170818748,
+      "grad_norm": 4510.48681640625,
+      "learning_rate": 2.38808936286524e-05,
+      "loss": 7.8607,
+      "step": 128
+    },
+    {
+      "epoch": 0.004384325187778268,
+      "grad_norm": 5353.71875,
+      "learning_rate": 2.1798695162326442e-05,
+      "loss": 9.2137,
+      "step": 129
+    },
+    {
+      "epoch": 0.00441831220473779,
+      "grad_norm": 14885.669921875,
+      "learning_rate": 1.9806226419516192e-05,
+      "loss": 8.0031,
+      "step": 130
+    },
+    {
+      "epoch": 0.004452299221697311,
+      "grad_norm": 3923.73486328125,
+      "learning_rate": 1.790449066823683e-05,
+      "loss": 8.4159,
+      "step": 131
+    },
+    {
+      "epoch": 0.004486286238656833,
+      "grad_norm": 2896.96337890625,
+      "learning_rate": 1.6094445489709885e-05,
+      "loss": 8.781,
+      "step": 132
+    },
+    {
+      "epoch": 0.0045202732556163545,
+      "grad_norm": 4708.7001953125,
+      "learning_rate": 1.4377002296192233e-05,
+      "loss": 7.7557,
+      "step": 133
+    },
+    {
+      "epoch": 0.004554260272575876,
+      "grad_norm": 3181.123779296875,
+      "learning_rate": 1.275302587205256e-05,
+      "loss": 8.2043,
+      "step": 134
+    },
+    {
+      "epoch": 0.0045882472895353976,
+      "grad_norm": 4846.9462890625,
+      "learning_rate": 1.1223333938326485e-05,
+      "loss": 7.4767,
+      "step": 135
+    },
+    {
+      "epoch": 0.004622234306494919,
+      "grad_norm": 6198.0390625,
+      "learning_rate": 9.788696740969295e-06,
+      "loss": 9.4116,
+      "step": 136
+    },
+    {
+      "epoch": 0.004656221323454441,
+      "grad_norm": 3074.818115234375,
+      "learning_rate": 8.44983666301391e-06,
+      "loss": 8.0024,
+      "step": 137
+    },
+    {
+      "epoch": 0.004690208340413962,
+      "grad_norm": 5073.27734375,
+      "learning_rate": 7.2074278608293525e-06,
+      "loss": 10.4057,
+      "step": 138
+    },
+    {
+      "epoch": 0.004724195357373484,
+      "grad_norm": 4744.4404296875,
+      "learning_rate": 6.062095924662625e-06,
+      "loss": 8.419,
+      "step": 139
+    },
+    {
+      "epoch": 0.004758182374333004,
+      "grad_norm": 3285.78076171875,
+      "learning_rate": 5.0144175636352765e-06,
+      "loss": 7.7747,
+      "step": 140
+    },
+    {
+      "epoch": 0.004792169391292526,
+      "grad_norm": 3387.646484375,
+      "learning_rate": 4.064920315352904e-06,
+      "loss": 9.9511,
+      "step": 141
+    },
+    {
+      "epoch": 0.0048261564082520475,
+      "grad_norm": 4019.52587890625,
+      "learning_rate": 3.2140822802740668e-06,
+      "loss": 8.495,
+      "step": 142
+    },
+    {
+      "epoch": 0.004860143425211569,
+      "grad_norm": 4878.927734375,
+      "learning_rate": 2.462331880972468e-06,
+      "loss": 7.6559,
+      "step": 143
+    },
+    {
+      "epoch": 0.0048941304421710905,
+      "grad_norm": 8631.8037109375,
+      "learning_rate": 1.81004764641306e-06,
+      "loss": 9.9675,
+      "step": 144
+    },
+    {
+      "epoch": 0.004928117459130612,
+      "grad_norm": 4712.03515625,
+      "learning_rate": 1.2575580213514792e-06,
+      "loss": 9.007,
+      "step": 145
+    },
+    {
+      "epoch": 0.004962104476090134,
+      "grad_norm": 4180.6337890625,
+      "learning_rate": 8.051412009521864e-07,
+      "loss": 7.0792,
+      "step": 146
+    },
+    {
+      "epoch": 0.004996091493049655,
+      "grad_norm": 3335.519287109375,
+      "learning_rate": 4.530249907087836e-07,
+      "loss": 7.036,
+      "step": 147
+    },
+    {
+      "epoch": 0.005030078510009177,
+      "grad_norm": 3107.85595703125,
+      "learning_rate": 2.0138669173708213e-07,
+      "loss": 8.3826,
+      "step": 148
+    },
+    {
+      "epoch": 0.005064065526968698,
+      "grad_norm": 3581.00927734375,
+      "learning_rate": 5.035301149869387e-08,
+      "loss": 7.9239,
+      "step": 149
+    },
+    {
+      "epoch": 0.00509805254392822,
+      "grad_norm": 4885.18798828125,
+      "learning_rate": 0.0,
+      "loss": 8.5003,
+      "step": 150
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 672217071353856.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null