Training in progress, step 80, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step80/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +25 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9450448
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ca2d0789f8cfefc3ae782c9c211e30db8fcfde8211a7aa026f963759ffa596b
|
3 |
size 9450448
|
last-checkpoint/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09093f6dc5f091ecd2b49cf68f93f21db705787b0ca6a27718fc730b6f546628
|
3 |
+
size 28320208
|
last-checkpoint/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a62bf6856c03735d6594a4ccc20a1b0fec307282dcb03baa95c101b9a439cbbc
|
3 |
+
size 28320208
|
last-checkpoint/global_step80/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3176959e987ae88695bfb58dd388a6206667bec907cea4cef0eb31b822525d62
|
3 |
+
size 9523692
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step80
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cf0b22231e0aa02f53ac9a900d016562c5e7c021c34b5088c73b5a29b90d23d
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f24bb0f430f282c43aa6537fcfce58560e72b7881eb48726030f0bc77debaf2
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49b8a1dbbf2c2a0b7fde326d57c34bd6c5e5d17e0aaf8b19016c1f721c049db1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 10,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -169,6 +169,28 @@
|
|
169 |
"eval_samples_per_second": 102.941,
|
170 |
"eval_steps_per_second": 25.892,
|
171 |
"step": 70
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
}
|
173 |
],
|
174 |
"logging_steps": 5,
|
@@ -188,7 +210,7 @@
|
|
188 |
"attributes": {}
|
189 |
}
|
190 |
},
|
191 |
-
"total_flos":
|
192 |
"train_batch_size": 2,
|
193 |
"trial_name": null,
|
194 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.1368106028217187,
|
5 |
"eval_steps": 10,
|
6 |
+
"global_step": 80,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
169 |
"eval_samples_per_second": 102.941,
|
170 |
"eval_steps_per_second": 25.892,
|
171 |
"step": 70
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"epoch": 0.12825994014536127,
|
175 |
+
"grad_norm": 37.37099075317383,
|
176 |
+
"learning_rate": 3.5721239031346066e-05,
|
177 |
+
"loss": 3.4387,
|
178 |
+
"step": 75
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"epoch": 0.1368106028217187,
|
182 |
+
"grad_norm": 14.69621467590332,
|
183 |
+
"learning_rate": 2.339555568810221e-05,
|
184 |
+
"loss": 3.0516,
|
185 |
+
"step": 80
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"epoch": 0.1368106028217187,
|
189 |
+
"eval_loss": 3.148390054702759,
|
190 |
+
"eval_runtime": 4.685,
|
191 |
+
"eval_samples_per_second": 105.229,
|
192 |
+
"eval_steps_per_second": 26.467,
|
193 |
+
"step": 80
|
194 |
}
|
195 |
],
|
196 |
"logging_steps": 5,
|
|
|
210 |
"attributes": {}
|
211 |
}
|
212 |
},
|
213 |
+
"total_flos": 2044930264399872.0,
|
214 |
"train_batch_size": 2,
|
215 |
"trial_name": null,
|
216 |
"trial_params": null
|