Training in progress, step 72, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +158 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e86986d8025895edb167555ba3c7f2b2e0ee52c7f003fb8250a1d0da2f2f035e
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:32294b6c46bc43ef84c0eebf1af182adda942b1265d65e9561a3d32d7e5b5f96
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abf16c050d5e677569c332a612df9fd48a4505cd57e4708b8f3d3f4d8aceafd5
 size 403961210

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fc70ec3b653941e76e60e49b387ae9df91421956e133792dbb358df6afaf323
 size 403961210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb748a394dd8639f495e53146ece6dcc513c7d4c53f228c65155ef16fcfdea0a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:72854dbea47a181fb59801a9cd0d2506e105202835e2c1a90a85d036cdb08bf7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab312e097c7be48627b21f885022ddfe347acb5d1a8e7b7a3552aeeb5898a86d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d13b10b3d9fbe4e28626ec43572eba9a2ee3d859a1db9fbf8bf9f5510e6d555
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.8469011783599854,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.1052631578947367,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,160 @@
       "eval_samples_per_second": 42.772,
       "eval_steps_per_second": 21.386,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +550,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0652407088283648e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.8469011783599854,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.031578947368421,
   "eval_steps": 50,
+  "global_step": 72,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 42.772,
       "eval_steps_per_second": 21.386,
       "step": 50
+    },
+    {
+      "epoch": 2.1473684210526316,
+      "grad_norm": 1.6968210935592651,
+      "learning_rate": 5e-05,
+      "loss": 1.1525,
+      "step": 51
+    },
+    {
+      "epoch": 2.1894736842105265,
+      "grad_norm": 2.781216621398926,
+      "learning_rate": 4.626349532067879e-05,
+      "loss": 1.622,
+      "step": 52
+    },
+    {
+      "epoch": 2.231578947368421,
+      "grad_norm": 2.010775089263916,
+      "learning_rate": 4.254788669119127e-05,
+      "loss": 1.7309,
+      "step": 53
+    },
+    {
+      "epoch": 2.2736842105263158,
+      "grad_norm": 1.5809705257415771,
+      "learning_rate": 3.887395330218429e-05,
+      "loss": 1.621,
+      "step": 54
+    },
+    {
+      "epoch": 2.3157894736842106,
+      "grad_norm": 1.6603655815124512,
+      "learning_rate": 3.5262241279454785e-05,
+      "loss": 1.3279,
+      "step": 55
+    },
+    {
+      "epoch": 2.3578947368421055,
+      "grad_norm": 1.745091199874878,
+      "learning_rate": 3.173294878168025e-05,
+      "loss": 1.2659,
+      "step": 56
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 2.30116868019104,
+      "learning_rate": 2.8305813044122097e-05,
+      "loss": 1.668,
+      "step": 57
+    },
+    {
+      "epoch": 2.442105263157895,
+      "grad_norm": 1.877693772315979,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 1.6808,
+      "step": 58
+    },
+    {
+      "epoch": 2.4842105263157896,
+      "grad_norm": 1.442994475364685,
+      "learning_rate": 2.1833997096818898e-05,
+      "loss": 1.5137,
+      "step": 59
+    },
+    {
+      "epoch": 2.526315789473684,
+      "grad_norm": 1.3358914852142334,
+      "learning_rate": 1.8825509907063327e-05,
+      "loss": 1.0792,
+      "step": 60
+    },
+    {
+      "epoch": 2.568421052631579,
+      "grad_norm": 1.8833284378051758,
+      "learning_rate": 1.599136311145402e-05,
+      "loss": 1.423,
+      "step": 61
+    },
+    {
+      "epoch": 2.610526315789474,
+      "grad_norm": 2.2090237140655518,
+      "learning_rate": 1.3347406408508695e-05,
+      "loss": 1.5257,
+      "step": 62
+    },
+    {
+      "epoch": 2.6526315789473687,
+      "grad_norm": 1.901261806488037,
+      "learning_rate": 1.090842587659851e-05,
+      "loss": 1.8915,
+      "step": 63
+    },
+    {
+      "epoch": 2.694736842105263,
+      "grad_norm": 1.4518189430236816,
+      "learning_rate": 8.688061284200266e-06,
+      "loss": 1.3918,
+      "step": 64
+    },
+    {
+      "epoch": 2.736842105263158,
+      "grad_norm": 1.5766031742095947,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.3363,
+      "step": 65
+    },
+    {
+      "epoch": 2.7789473684210524,
+      "grad_norm": 1.7875163555145264,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 1.3858,
+      "step": 66
+    },
+    {
+      "epoch": 2.8210526315789473,
+      "grad_norm": 2.0564568042755127,
+      "learning_rate": 3.4563125677897932e-06,
+      "loss": 1.4302,
+      "step": 67
+    },
+    {
+      "epoch": 2.863157894736842,
+      "grad_norm": 1.6587474346160889,
+      "learning_rate": 2.221359710692961e-06,
+      "loss": 1.5845,
+      "step": 68
+    },
+    {
+      "epoch": 2.905263157894737,
+      "grad_norm": 1.4605294466018677,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 1.2857,
+      "step": 69
+    },
+    {
+      "epoch": 2.9473684210526314,
+      "grad_norm": 1.5679987668991089,
+      "learning_rate": 5.584586887435739e-07,
+      "loss": 1.1443,
+      "step": 70
+    },
+    {
+      "epoch": 2.9894736842105263,
+      "grad_norm": 2.4672374725341797,
+      "learning_rate": 1.3981014094099353e-07,
+      "loss": 1.6679,
+      "step": 71
+    },
+    {
+      "epoch": 3.031578947368421,
+      "grad_norm": 1.6800793409347534,
+      "learning_rate": 0.0,
+      "loss": 1.7372,
+      "step": 72
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5338666276683776e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null