Training in progress, step 10000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10000/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10000/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10000/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10000/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f3df1923d9fa4d4ba4210c00d4064b1217e6de0d9fae21da85801e619830098
|
3 |
size 29034840
|
last-checkpoint/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d240048f026803dbddd1724523a7ca2de2827f7f4046a28bec7612506c1f4896
|
3 |
+
size 43429616
|
last-checkpoint/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af9d739eaf611dfc542e6970b3bce4c233d874be1a835af91983ff1b48e6133d
|
3 |
+
size 43429616
|
last-checkpoint/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87b477d28fcd4876c45a421b65b72bef6e5547331943356dd7c2650f53908619
|
3 |
+
size 43429616
|
last-checkpoint/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95d791eb0fa00dd6deff37b2ce257dec0682492c7016877770930a3b66f7357d
|
3 |
+
size 43429616
|
last-checkpoint/global_step10000/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f670f971061ee5ccc2ce8949c62d7f3b28e9592fb01ed74d60690c0b63dfeab
|
3 |
+
size 637299
|
last-checkpoint/global_step10000/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2394d91162704cf4261ad592249df26cc4f13718fa706ae188428fdb800ea63f
|
3 |
+
size 637171
|
last-checkpoint/global_step10000/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b35d849abccc721fe708faa4eb82c9af45fdf8e6aa0031f673275451eba8cc8
|
3 |
+
size 637171
|
last-checkpoint/global_step10000/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4de57588d8117b8dd97d287d5f926e84cda7a969757a2ba316ac4f8aff113557
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10000
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08cce78177d96f46cb05953508580d717e9ab4cfa0af96f68f8fbf922cd3832f
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2dd5e724cd1bc66d3308b441566c50035a5b4cfe9d731ef333cb45aff2efa0a8
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db95c164a3f9f096591c54d374b04e63d0f313de87d240a7e902ed5ba07a81ca
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ac2f9e5ecbefb231e5ddf28d7401794dd06330430a1d71c9e093f1b6b293b05
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1e4f4d6952a4344fd27915c35f70bbe655fc6d5c21ec3a8fcbb8a3a75429e23
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17718,11 +17718,100 @@
|
|
17718 |
"eval_steps_per_second": 0.735,
|
17719 |
"num_input_tokens_seen": 66462256,
|
17720 |
"step": 9950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17721 |
}
|
17722 |
],
|
17723 |
"logging_steps": 5,
|
17724 |
"max_steps": 16324,
|
17725 |
-
"num_input_tokens_seen":
|
17726 |
"num_train_epochs": 2,
|
17727 |
"save_steps": 50,
|
17728 |
"stateful_callbacks": {
|
@@ -17737,7 +17826,7 @@
|
|
17737 |
"attributes": {}
|
17738 |
}
|
17739 |
},
|
17740 |
-
"total_flos":
|
17741 |
"train_batch_size": 1,
|
17742 |
"trial_name": null,
|
17743 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.2251692138066215,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17718 |
"eval_steps_per_second": 0.735,
|
17719 |
"num_input_tokens_seen": 66462256,
|
17720 |
"step": 9950
|
17721 |
+
},
|
17722 |
+
{
|
17723 |
+
"epoch": 1.2196563658080917,
|
17724 |
+
"grad_norm": 1.4544809627964557,
|
17725 |
+
"learning_rate": 3.615874587797381e-05,
|
17726 |
+
"loss": 0.2162,
|
17727 |
+
"num_input_tokens_seen": 66495928,
|
17728 |
+
"step": 9955
|
17729 |
+
},
|
17730 |
+
{
|
17731 |
+
"epoch": 1.220268904474595,
|
17732 |
+
"grad_norm": 1.314166192503101,
|
17733 |
+
"learning_rate": 3.6110084317858005e-05,
|
17734 |
+
"loss": 0.2371,
|
17735 |
+
"num_input_tokens_seen": 66529384,
|
17736 |
+
"step": 9960
|
17737 |
+
},
|
17738 |
+
{
|
17739 |
+
"epoch": 1.2208814431410984,
|
17740 |
+
"grad_norm": 1.8272913355200002,
|
17741 |
+
"learning_rate": 3.606143701000271e-05,
|
17742 |
+
"loss": 0.2425,
|
17743 |
+
"num_input_tokens_seen": 66563312,
|
17744 |
+
"step": 9965
|
17745 |
+
},
|
17746 |
+
{
|
17747 |
+
"epoch": 1.2214939818076016,
|
17748 |
+
"grad_norm": 1.1190635076196744,
|
17749 |
+
"learning_rate": 3.6012804004324326e-05,
|
17750 |
+
"loss": 0.228,
|
17751 |
+
"num_input_tokens_seen": 66596840,
|
17752 |
+
"step": 9970
|
17753 |
+
},
|
17754 |
+
{
|
17755 |
+
"epoch": 1.2221065204741048,
|
17756 |
+
"grad_norm": 1.417082438708923,
|
17757 |
+
"learning_rate": 3.596418535072451e-05,
|
17758 |
+
"loss": 0.2407,
|
17759 |
+
"num_input_tokens_seen": 66630168,
|
17760 |
+
"step": 9975
|
17761 |
+
},
|
17762 |
+
{
|
17763 |
+
"epoch": 1.2227190591406083,
|
17764 |
+
"grad_norm": 1.5615601472935723,
|
17765 |
+
"learning_rate": 3.591558109909025e-05,
|
17766 |
+
"loss": 0.2049,
|
17767 |
+
"num_input_tokens_seen": 66663848,
|
17768 |
+
"step": 9980
|
17769 |
+
},
|
17770 |
+
{
|
17771 |
+
"epoch": 1.2233315978071115,
|
17772 |
+
"grad_norm": 1.061850813247257,
|
17773 |
+
"learning_rate": 3.586699129929373e-05,
|
17774 |
+
"loss": 0.1661,
|
17775 |
+
"num_input_tokens_seen": 66698912,
|
17776 |
+
"step": 9985
|
17777 |
+
},
|
17778 |
+
{
|
17779 |
+
"epoch": 1.223944136473615,
|
17780 |
+
"grad_norm": 1.0446245994987036,
|
17781 |
+
"learning_rate": 3.5818416001192276e-05,
|
17782 |
+
"loss": 0.2329,
|
17783 |
+
"num_input_tokens_seen": 66732112,
|
17784 |
+
"step": 9990
|
17785 |
+
},
|
17786 |
+
{
|
17787 |
+
"epoch": 1.2245566751401182,
|
17788 |
+
"grad_norm": 1.0453311293261525,
|
17789 |
+
"learning_rate": 3.576985525462841e-05,
|
17790 |
+
"loss": 0.2364,
|
17791 |
+
"num_input_tokens_seen": 66765832,
|
17792 |
+
"step": 9995
|
17793 |
+
},
|
17794 |
+
{
|
17795 |
+
"epoch": 1.2251692138066215,
|
17796 |
+
"grad_norm": 1.022044445150293,
|
17797 |
+
"learning_rate": 3.572130910942964e-05,
|
17798 |
+
"loss": 0.2736,
|
17799 |
+
"num_input_tokens_seen": 66798760,
|
17800 |
+
"step": 10000
|
17801 |
+
},
|
17802 |
+
{
|
17803 |
+
"epoch": 1.2251692138066215,
|
17804 |
+
"eval_loss": 0.15618817508220673,
|
17805 |
+
"eval_runtime": 20.4016,
|
17806 |
+
"eval_samples_per_second": 2.941,
|
17807 |
+
"eval_steps_per_second": 0.735,
|
17808 |
+
"num_input_tokens_seen": 66798760,
|
17809 |
+
"step": 10000
|
17810 |
}
|
17811 |
],
|
17812 |
"logging_steps": 5,
|
17813 |
"max_steps": 16324,
|
17814 |
+
"num_input_tokens_seen": 66798760,
|
17815 |
"num_train_epochs": 2,
|
17816 |
"save_steps": 50,
|
17817 |
"stateful_callbacks": {
|
|
|
17826 |
"attributes": {}
|
17827 |
}
|
17828 |
},
|
17829 |
+
"total_flos": 4189946547535872.0,
|
17830 |
"train_batch_size": 1,
|
17831 |
"trial_name": null,
|
17832 |
"trial_params": null
|