Training in progress, step 70, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step70/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step70/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step70/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +25 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9450448
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5f1b072d203d5b76fe9560659963ef8f781250b4a92e1868c89f6c51fb112d6
|
3 |
size 9450448
|
last-checkpoint/global_step70/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a47f396fa71f1e96c9af42df609aca51b09333fd6dd0ca0ecb2d8f49a5f9b96d
|
3 |
+
size 28320208
|
last-checkpoint/global_step70/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79a33126a85874efb86475c39fa113d95974048c506496f03fa3c779530e2636
|
3 |
+
size 28320208
|
last-checkpoint/global_step70/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5af21c45ef7e37b5a61efd780540df6f6f32b3c48a8b0ecb3da9df86e8899eca
|
3 |
+
size 9523692
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step70
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:041c1299befadbb78df28d6c51995637fdc42b23b8c9a0ddd6d7b9d2b7b43095
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d39b95751aa22f7c631b63c6ae50b62913410b70130404c3fc20f638c1f28ddb
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f98a8feef34550913a6c17e6d111551876ee5198dbf8b76141d29b9c822b726
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 10,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -147,6 +147,28 @@
|
|
147 |
"eval_samples_per_second": 109.58,
|
148 |
"eval_steps_per_second": 27.562,
|
149 |
"step": 60
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
}
|
151 |
],
|
152 |
"logging_steps": 5,
|
@@ -166,7 +188,7 @@
|
|
166 |
"attributes": {}
|
167 |
}
|
168 |
},
|
169 |
-
"total_flos":
|
170 |
"train_batch_size": 2,
|
171 |
"trial_name": null,
|
172 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.11970927746900385,
|
5 |
"eval_steps": 10,
|
6 |
+
"global_step": 70,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
147 |
"eval_samples_per_second": 109.58,
|
148 |
"eval_steps_per_second": 27.562,
|
149 |
"step": 60
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.11115861479264642,
|
153 |
+
"grad_norm": 37.74872970581055,
|
154 |
+
"learning_rate": 6.579798566743314e-05,
|
155 |
+
"loss": 3.1812,
|
156 |
+
"step": 65
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.11970927746900385,
|
160 |
+
"grad_norm": 22.35642433166504,
|
161 |
+
"learning_rate": 5.000000000000002e-05,
|
162 |
+
"loss": 3.1637,
|
163 |
+
"step": 70
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.11970927746900385,
|
167 |
+
"eval_loss": 3.1739985942840576,
|
168 |
+
"eval_runtime": 4.7892,
|
169 |
+
"eval_samples_per_second": 102.941,
|
170 |
+
"eval_steps_per_second": 25.892,
|
171 |
+
"step": 70
|
172 |
}
|
173 |
],
|
174 |
"logging_steps": 5,
|
|
|
188 |
"attributes": {}
|
189 |
}
|
190 |
},
|
191 |
+
"total_flos": 1786158224769024.0,
|
192 |
"train_batch_size": 2,
|
193 |
"trial_name": null,
|
194 |
"trial_params": null
|