Training in progress, step 10450, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10450/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10450/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10450/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10450/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10450/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10450/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10450/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10450/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e8edf7966575f7d3701653b2fca9b0412bc222ec3b462a6c9529e461d38d6b9
|
3 |
size 29034840
|
last-checkpoint/global_step10450/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec479ed2e52c76c518e6cfbad68cfa9d1774a794d684c0bc3fb41283b8992311
|
3 |
+
size 43429616
|
last-checkpoint/global_step10450/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8af67540c285ba5dd59ff71fc242ea799f642f68fd31d569e6f8253c2ff6a70
|
3 |
+
size 43429616
|
last-checkpoint/global_step10450/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e78e3330f62ec1dfac9df508d305865691902baad4dd2e1df5f999ce684ca1ed
|
3 |
+
size 43429616
|
last-checkpoint/global_step10450/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63ff13ba0e9054570f5857e7ae608663c24850c18c58eed547f1583b72285655
|
3 |
+
size 43429616
|
last-checkpoint/global_step10450/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb756b726bc8af6d469690f400267e9b6505b7a8359145ac29df73ded279562e
|
3 |
+
size 637299
|
last-checkpoint/global_step10450/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e47e2679e7b1bf6f5a6bac157e5d4d9eacc0fc074d9f13a3bbdb538ef8dd41e
|
3 |
+
size 637171
|
last-checkpoint/global_step10450/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b942a8e29d3d22567872a5f9bba629ec28f25b94362f94ba2561b7ece1926c63
|
3 |
+
size 637171
|
last-checkpoint/global_step10450/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caed3472b8fcb4f1b741572540c8888bc98c25dc325273109005e5f119d61f7b
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10450
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:728c4e2a8ee1d2ed42b3586a4c3cac5b8fbb9fdca53167fd48cf8caf96987518
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e818ca5561167b20b2d1eadae23ef7dfb90bfbf49ff2932c7d035446f9f3308d
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c9ae8590f6a84b7b264a99934b6cb306a13089ad904020884db757e1c77945a
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6dbe1ec1980d7726aec0058d16ed92fa46fb441c65518943aecc4b0155fa42e
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97b0c1e81998443d9840edc6cac740fbae2d7dbf17c82e810da7dbdfc0ef4135
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18519,11 +18519,100 @@
|
|
18519 |
"eval_steps_per_second": 0.766,
|
18520 |
"num_input_tokens_seen": 69486944,
|
18521 |
"step": 10400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18522 |
}
|
18523 |
],
|
18524 |
"logging_steps": 5,
|
18525 |
"max_steps": 16324,
|
18526 |
-
"num_input_tokens_seen":
|
18527 |
"num_train_epochs": 2,
|
18528 |
"save_steps": 50,
|
18529 |
"stateful_callbacks": {
|
@@ -18538,7 +18627,7 @@
|
|
18538 |
"attributes": {}
|
18539 |
}
|
18540 |
},
|
18541 |
-
"total_flos":
|
18542 |
"train_batch_size": 1,
|
18543 |
"trial_name": null,
|
18544 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.2802976937919206,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10450,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18519 |
"eval_steps_per_second": 0.766,
|
18520 |
"num_input_tokens_seen": 69486944,
|
18521 |
"step": 10400
|
18522 |
+
},
|
18523 |
+
{
|
18524 |
+
"epoch": 1.2747848457933908,
|
18525 |
+
"grad_norm": 0.9713327822934117,
|
18526 |
+
"learning_rate": 3.184211020688667e-05,
|
18527 |
+
"loss": 0.2063,
|
18528 |
+
"num_input_tokens_seen": 69521024,
|
18529 |
+
"step": 10405
|
18530 |
+
},
|
18531 |
+
{
|
18532 |
+
"epoch": 1.275397384459894,
|
18533 |
+
"grad_norm": 1.089429970441031,
|
18534 |
+
"learning_rate": 3.179492941107207e-05,
|
18535 |
+
"loss": 0.1923,
|
18536 |
+
"num_input_tokens_seen": 69555328,
|
18537 |
+
"step": 10410
|
18538 |
+
},
|
18539 |
+
{
|
18540 |
+
"epoch": 1.2760099231263973,
|
18541 |
+
"grad_norm": 1.8111858959264027,
|
18542 |
+
"learning_rate": 3.174776729524196e-05,
|
18543 |
+
"loss": 0.2565,
|
18544 |
+
"num_input_tokens_seen": 69589040,
|
18545 |
+
"step": 10415
|
18546 |
+
},
|
18547 |
+
{
|
18548 |
+
"epoch": 1.2766224617929007,
|
18549 |
+
"grad_norm": 1.2755084193576243,
|
18550 |
+
"learning_rate": 3.17006239077887e-05,
|
18551 |
+
"loss": 0.2575,
|
18552 |
+
"num_input_tokens_seen": 69622376,
|
18553 |
+
"step": 10420
|
18554 |
+
},
|
18555 |
+
{
|
18556 |
+
"epoch": 1.277235000459404,
|
18557 |
+
"grad_norm": 1.2029984019706983,
|
18558 |
+
"learning_rate": 3.165349929708553e-05,
|
18559 |
+
"loss": 0.2472,
|
18560 |
+
"num_input_tokens_seen": 69654960,
|
18561 |
+
"step": 10425
|
18562 |
+
},
|
18563 |
+
{
|
18564 |
+
"epoch": 1.2778475391259074,
|
18565 |
+
"grad_norm": 1.2226650815852944,
|
18566 |
+
"learning_rate": 3.160639351148639e-05,
|
18567 |
+
"loss": 0.2499,
|
18568 |
+
"num_input_tokens_seen": 69688264,
|
18569 |
+
"step": 10430
|
18570 |
+
},
|
18571 |
+
{
|
18572 |
+
"epoch": 1.2784600777924107,
|
18573 |
+
"grad_norm": 1.1852482827746924,
|
18574 |
+
"learning_rate": 3.155930659932593e-05,
|
18575 |
+
"loss": 0.1853,
|
18576 |
+
"num_input_tokens_seen": 69722104,
|
18577 |
+
"step": 10435
|
18578 |
+
},
|
18579 |
+
{
|
18580 |
+
"epoch": 1.279072616458914,
|
18581 |
+
"grad_norm": 0.7157981569032704,
|
18582 |
+
"learning_rate": 3.15122386089194e-05,
|
18583 |
+
"loss": 0.192,
|
18584 |
+
"num_input_tokens_seen": 69755784,
|
18585 |
+
"step": 10440
|
18586 |
+
},
|
18587 |
+
{
|
18588 |
+
"epoch": 1.2796851551254174,
|
18589 |
+
"grad_norm": 1.453118087596932,
|
18590 |
+
"learning_rate": 3.146518958856264e-05,
|
18591 |
+
"loss": 0.2412,
|
18592 |
+
"num_input_tokens_seen": 69789632,
|
18593 |
+
"step": 10445
|
18594 |
+
},
|
18595 |
+
{
|
18596 |
+
"epoch": 1.2802976937919206,
|
18597 |
+
"grad_norm": 1.4403312114135585,
|
18598 |
+
"learning_rate": 3.1418159586532055e-05,
|
18599 |
+
"loss": 0.2546,
|
18600 |
+
"num_input_tokens_seen": 69822376,
|
18601 |
+
"step": 10450
|
18602 |
+
},
|
18603 |
+
{
|
18604 |
+
"epoch": 1.2802976937919206,
|
18605 |
+
"eval_loss": 0.18921419978141785,
|
18606 |
+
"eval_runtime": 19.3651,
|
18607 |
+
"eval_samples_per_second": 3.098,
|
18608 |
+
"eval_steps_per_second": 0.775,
|
18609 |
+
"num_input_tokens_seen": 69822376,
|
18610 |
+
"step": 10450
|
18611 |
}
|
18612 |
],
|
18613 |
"logging_steps": 5,
|
18614 |
"max_steps": 16324,
|
18615 |
+
"num_input_tokens_seen": 69822376,
|
18616 |
"num_train_epochs": 2,
|
18617 |
"save_steps": 50,
|
18618 |
"stateful_callbacks": {
|
|
|
18627 |
"attributes": {}
|
18628 |
}
|
18629 |
},
|
18630 |
+
"total_flos": 4379630441201664.0,
|
18631 |
"train_batch_size": 1,
|
18632 |
"trial_name": null,
|
18633 |
"trial_params": null
|