Training in progress, step 11400, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step11400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11400/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11400/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11400/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11400/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e1585cc4b434bda55aa62dcf220f7cf2435c298689413f9bb652f6fb53da2ee
|
3 |
size 29034840
|
last-checkpoint/global_step11400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c7add0ba3e79d77224a913476c8b8bbcf89ead2501d62b29992aec11d4d5af2
|
3 |
+
size 43429616
|
last-checkpoint/global_step11400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:488efdb1a984d45f03427912e4be5f949a6a9cf771806f8327d0c0066592c6e0
|
3 |
+
size 43429616
|
last-checkpoint/global_step11400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5e31b7bdfe59a1ba05152087f9ff51451b4af21cbf48a919eb4b5265c346ea0
|
3 |
+
size 43429616
|
last-checkpoint/global_step11400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78d87820e83f7ddeb996eec1fd36bc5f384f8b85afa4ace7019b0b1b018603af
|
3 |
+
size 43429616
|
last-checkpoint/global_step11400/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb2d8b91bacc8e70283982fc2b5ee66df73703effe37b0df072e504fb42dc19
|
3 |
+
size 637299
|
last-checkpoint/global_step11400/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f171543238da415fec1fa7b3646fb93ad6b2e8cc64f7ad80c134e28f7b15218
|
3 |
+
size 637171
|
last-checkpoint/global_step11400/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f188c3894c4731e7625183fabfed0440c1ccfe47364b416e305240b921608ea4
|
3 |
+
size 637171
|
last-checkpoint/global_step11400/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3be1dd2d1df06de5afb116bafcacc4625acaee8e468fd1870bbca7acd0528dfb
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step11400
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e27648483919bf6db61bc01bdc08d8433f1eb24e83aac7f139542c05da435ddb
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbfb347c5b75d89470fe0836fe28917c7357455657faee86c6d63bcebf4d2364
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:784373446df3f533577379aca34fdc3d78674dfd62151a3124bbcd593cc45985
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efd05d8ba8e5381dd14b88a432136176dbc3553bfe0868349d69d1ddfcb01223
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27cd5dd8b545df66df0f76dc60e9970f057069194a0bb0e6af5e90abad0f56cc
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20210,11 +20210,100 @@
|
|
20210 |
"eval_steps_per_second": 0.762,
|
20211 |
"num_input_tokens_seen": 75870800,
|
20212 |
"step": 11350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20213 |
}
|
20214 |
],
|
20215 |
"logging_steps": 5,
|
20216 |
"max_steps": 16324,
|
20217 |
-
"num_input_tokens_seen":
|
20218 |
"num_train_epochs": 2,
|
20219 |
"save_steps": 50,
|
20220 |
"stateful_callbacks": {
|
@@ -20229,7 +20318,7 @@
|
|
20229 |
"attributes": {}
|
20230 |
}
|
20231 |
},
|
20232 |
-
"total_flos":
|
20233 |
"train_batch_size": 1,
|
20234 |
"trial_name": null,
|
20235 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.396680040427552,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 11400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20210 |
"eval_steps_per_second": 0.762,
|
20211 |
"num_input_tokens_seen": 75870800,
|
20212 |
"step": 11350
|
20213 |
+
},
|
20214 |
+
{
|
20215 |
+
"epoch": 1.391167192429022,
|
20216 |
+
"grad_norm": 1.0752398721462886,
|
20217 |
+
"learning_rate": 2.3266501610390906e-05,
|
20218 |
+
"loss": 0.1958,
|
20219 |
+
"num_input_tokens_seen": 75905072,
|
20220 |
+
"step": 11355
|
20221 |
+
},
|
20222 |
+
{
|
20223 |
+
"epoch": 1.3917797310955253,
|
20224 |
+
"grad_norm": 1.4944338956696954,
|
20225 |
+
"learning_rate": 2.322371470597699e-05,
|
20226 |
+
"loss": 0.2484,
|
20227 |
+
"num_input_tokens_seen": 75938048,
|
20228 |
+
"step": 11360
|
20229 |
+
},
|
20230 |
+
{
|
20231 |
+
"epoch": 1.3923922697620288,
|
20232 |
+
"grad_norm": 1.4611829079192886,
|
20233 |
+
"learning_rate": 2.3180955276358628e-05,
|
20234 |
+
"loss": 0.2457,
|
20235 |
+
"num_input_tokens_seen": 75971152,
|
20236 |
+
"step": 11365
|
20237 |
+
},
|
20238 |
+
{
|
20239 |
+
"epoch": 1.393004808428532,
|
20240 |
+
"grad_norm": 1.3441537014783147,
|
20241 |
+
"learning_rate": 2.3138223365410743e-05,
|
20242 |
+
"loss": 0.219,
|
20243 |
+
"num_input_tokens_seen": 76004648,
|
20244 |
+
"step": 11370
|
20245 |
+
},
|
20246 |
+
{
|
20247 |
+
"epoch": 1.3936173470950353,
|
20248 |
+
"grad_norm": 1.001647162051465,
|
20249 |
+
"learning_rate": 2.3095519016979976e-05,
|
20250 |
+
"loss": 0.2099,
|
20251 |
+
"num_input_tokens_seen": 76037936,
|
20252 |
+
"step": 11375
|
20253 |
+
},
|
20254 |
+
{
|
20255 |
+
"epoch": 1.3942298857615387,
|
20256 |
+
"grad_norm": 1.218728080470328,
|
20257 |
+
"learning_rate": 2.3052842274884728e-05,
|
20258 |
+
"loss": 0.202,
|
20259 |
+
"num_input_tokens_seen": 76071568,
|
20260 |
+
"step": 11380
|
20261 |
+
},
|
20262 |
+
{
|
20263 |
+
"epoch": 1.394842424428042,
|
20264 |
+
"grad_norm": 1.482661327395792,
|
20265 |
+
"learning_rate": 2.301019318291502e-05,
|
20266 |
+
"loss": 0.2149,
|
20267 |
+
"num_input_tokens_seen": 76105328,
|
20268 |
+
"step": 11385
|
20269 |
+
},
|
20270 |
+
{
|
20271 |
+
"epoch": 1.3954549630945454,
|
20272 |
+
"grad_norm": 1.3147384066460048,
|
20273 |
+
"learning_rate": 2.296757178483251e-05,
|
20274 |
+
"loss": 0.2472,
|
20275 |
+
"num_input_tokens_seen": 76138608,
|
20276 |
+
"step": 11390
|
20277 |
+
},
|
20278 |
+
{
|
20279 |
+
"epoch": 1.3960675017610487,
|
20280 |
+
"grad_norm": 1.1511171118587837,
|
20281 |
+
"learning_rate": 2.2924978124370472e-05,
|
20282 |
+
"loss": 0.1948,
|
20283 |
+
"num_input_tokens_seen": 76171936,
|
20284 |
+
"step": 11395
|
20285 |
+
},
|
20286 |
+
{
|
20287 |
+
"epoch": 1.396680040427552,
|
20288 |
+
"grad_norm": 1.4155361943897409,
|
20289 |
+
"learning_rate": 2.2882412245233727e-05,
|
20290 |
+
"loss": 0.2228,
|
20291 |
+
"num_input_tokens_seen": 76204736,
|
20292 |
+
"step": 11400
|
20293 |
+
},
|
20294 |
+
{
|
20295 |
+
"epoch": 1.396680040427552,
|
20296 |
+
"eval_loss": 0.13607601821422577,
|
20297 |
+
"eval_runtime": 19.5448,
|
20298 |
+
"eval_samples_per_second": 3.07,
|
20299 |
+
"eval_steps_per_second": 0.767,
|
20300 |
+
"num_input_tokens_seen": 76204736,
|
20301 |
+
"step": 11400
|
20302 |
}
|
20303 |
],
|
20304 |
"logging_steps": 5,
|
20305 |
"max_steps": 16324,
|
20306 |
+
"num_input_tokens_seen": 76204736,
|
20307 |
"num_train_epochs": 2,
|
20308 |
"save_steps": 50,
|
20309 |
"stateful_callbacks": {
|
|
|
20318 |
"attributes": {}
|
20319 |
}
|
20320 |
},
|
20321 |
+
"total_flos": 4779843103358976.0,
|
20322 |
"train_batch_size": 1,
|
20323 |
"trial_name": null,
|
20324 |
"trial_params": null
|