Training in progress, step 12, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 828526568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a71600f62dd80c2cb2771c778f9fcc7687fd433d5acd8919c1667b806cf333e1
|
3 |
size 828526568
|
last-checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1657755122
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b82159ba1842f5020191443502e307172d245b5bf63a145c25271448111e3ec
|
3 |
size 1657755122
|
last-checkpoint/pytorch_model_fsdp.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 828793718
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:398dc8fa67b696a75dd1b612994af1eb7c36a2bab5fa2451729db67847b86e09
|
3 |
size 828793718
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:665f0ba302167a9bd53533aaddc229d42076af770231d3d86051da7e37633fd6
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f4bf140f1750c409018fa2d22904d6214c4bbb78a9a8f54c136f62b8c60edb7
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:913fc19bf2ad8265632a721056db64133a2c28d72c90664e19bbea762ef652b8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"best_global_step":
|
3 |
-
"best_metric": 0.
|
4 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 3,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -103,6 +103,35 @@
|
|
103 |
"eval_samples_per_second": 6.301,
|
104 |
"eval_steps_per_second": 0.789,
|
105 |
"step": 9
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
}
|
107 |
],
|
108 |
"logging_steps": 1,
|
@@ -126,12 +155,12 @@
|
|
126 |
"should_evaluate": false,
|
127 |
"should_log": false,
|
128 |
"should_save": true,
|
129 |
-
"should_training_stop":
|
130 |
},
|
131 |
"attributes": {}
|
132 |
}
|
133 |
},
|
134 |
-
"total_flos": 2.
|
135 |
"train_batch_size": 4,
|
136 |
"trial_name": null,
|
137 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_global_step": 12,
|
3 |
+
"best_metric": 0.25252172350883484,
|
4 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-12",
|
5 |
+
"epoch": 0.043272481406355645,
|
6 |
"eval_steps": 3,
|
7 |
+
"global_step": 12,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
103 |
"eval_samples_per_second": 6.301,
|
104 |
"eval_steps_per_second": 0.789,
|
105 |
"step": 9
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 0.03606040117196304,
|
109 |
+
"grad_norm": 0.1475931704044342,
|
110 |
+
"learning_rate": 4.12214747707527e-05,
|
111 |
+
"loss": 0.2489,
|
112 |
+
"step": 10
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 0.03966644128915934,
|
116 |
+
"grad_norm": 0.2981042265892029,
|
117 |
+
"learning_rate": 1.9098300562505266e-05,
|
118 |
+
"loss": 0.2705,
|
119 |
+
"step": 11
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 0.043272481406355645,
|
123 |
+
"grad_norm": 0.18590322136878967,
|
124 |
+
"learning_rate": 4.8943483704846475e-06,
|
125 |
+
"loss": 0.282,
|
126 |
+
"step": 12
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 0.043272481406355645,
|
130 |
+
"eval_loss": 0.25252172350883484,
|
131 |
+
"eval_runtime": 297.3539,
|
132 |
+
"eval_samples_per_second": 6.282,
|
133 |
+
"eval_steps_per_second": 0.787,
|
134 |
+
"step": 12
|
135 |
}
|
136 |
],
|
137 |
"logging_steps": 1,
|
|
|
155 |
"should_evaluate": false,
|
156 |
"should_log": false,
|
157 |
"should_save": true,
|
158 |
+
"should_training_stop": true
|
159 |
},
|
160 |
"attributes": {}
|
161 |
}
|
162 |
},
|
163 |
+
"total_flos": 2.762101349322588e+17,
|
164 |
"train_batch_size": 4,
|
165 |
"trial_name": null,
|
166 |
"trial_params": null
|