Training in progress, step 10050, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10050/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10050/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10050/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10050/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0a6a6262afae6166e77e12add482e0b68ec745ba62898fef6f14cd815532e17
|
3 |
size 29034840
|
last-checkpoint/global_step10050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d94a2dd4e276b2697f19dbea4ce694bc412576b5b7a1b5e81c0cd456979b2ca8
|
3 |
+
size 43429616
|
last-checkpoint/global_step10050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1933ca4ed8155d2b20a9b2bd2c9172ac06103c506122885f40adb24b54be22cf
|
3 |
+
size 43429616
|
last-checkpoint/global_step10050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e89111a98363ae9587a1b1cfc00fe355428da0588f0ab50fd45546f99267180
|
3 |
+
size 43429616
|
last-checkpoint/global_step10050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3ced2c62f2f31aa9c015c5963c96063ce25c571d21b55cb5f207bc2a8e8632d
|
3 |
+
size 43429616
|
last-checkpoint/global_step10050/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bb545f3d7af85503010cd0dec10746f08ab85195f5f761d725d3e64627b6fdc
|
3 |
+
size 637299
|
last-checkpoint/global_step10050/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d9a2074cf86b0596c3c1043c255a7cecdcfd881f3b2a8c1f95f35ef9c6974d9
|
3 |
+
size 637171
|
last-checkpoint/global_step10050/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2ba27a8c4e9d538b0d1dbb587e4f61ae1108c541c6c2e18b7b487f28a95a048
|
3 |
+
size 637171
|
last-checkpoint/global_step10050/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64abd79776cfb99b8efe931a91766f8f5182dbc91b35c2aa1613d2a20ffc790b
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10050
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14f5c873d68a76dc6491d3b4a95315f1091083531e3aa4f8b2b7feb95b350da4
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23a6e3e9dc8c26044036cdc34507fcb486d4110a0c7dbfe0544e104eea4009e9
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b3f836738843d1bb9208fec3a6e760ce3c0184b7626c476ea70406afa8bae1a
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7993e4f988863604fc4b4467044a66db7ae4962d4f97c04568dfde8c6189851a
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2378b17bb22c7e6b87904c947af707f6da3c5b1f649a44e4ef948891abf0cd8e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17807,11 +17807,100 @@
|
|
17807 |
"eval_steps_per_second": 0.735,
|
17808 |
"num_input_tokens_seen": 66798760,
|
17809 |
"step": 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17810 |
}
|
17811 |
],
|
17812 |
"logging_steps": 5,
|
17813 |
"max_steps": 16324,
|
17814 |
-
"num_input_tokens_seen":
|
17815 |
"num_train_epochs": 2,
|
17816 |
"save_steps": 50,
|
17817 |
"stateful_callbacks": {
|
@@ -17826,7 +17915,7 @@
|
|
17826 |
"attributes": {}
|
17827 |
}
|
17828 |
},
|
17829 |
-
"total_flos":
|
17830 |
"train_batch_size": 1,
|
17831 |
"trial_name": null,
|
17832 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.2312946004716547,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10050,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17807 |
"eval_steps_per_second": 0.735,
|
17808 |
"num_input_tokens_seen": 66798760,
|
17809 |
"step": 10000
|
17810 |
+
},
|
17811 |
+
{
|
17812 |
+
"epoch": 1.225781752473125,
|
17813 |
+
"grad_norm": 1.2319192900049627,
|
17814 |
+
"learning_rate": 3.567277761540856e-05,
|
17815 |
+
"loss": 0.2139,
|
17816 |
+
"num_input_tokens_seen": 66832616,
|
17817 |
+
"step": 10005
|
17818 |
+
},
|
17819 |
+
{
|
17820 |
+
"epoch": 1.2263942911396282,
|
17821 |
+
"grad_norm": 1.1192074348789183,
|
17822 |
+
"learning_rate": 3.56242608223627e-05,
|
17823 |
+
"loss": 0.2183,
|
17824 |
+
"num_input_tokens_seen": 66866584,
|
17825 |
+
"step": 10010
|
17826 |
+
},
|
17827 |
+
{
|
17828 |
+
"epoch": 1.2270068298061316,
|
17829 |
+
"grad_norm": 0.9826408269969161,
|
17830 |
+
"learning_rate": 3.5575758780074475e-05,
|
17831 |
+
"loss": 0.2262,
|
17832 |
+
"num_input_tokens_seen": 66900296,
|
17833 |
+
"step": 10015
|
17834 |
+
},
|
17835 |
+
{
|
17836 |
+
"epoch": 1.2276193684726349,
|
17837 |
+
"grad_norm": 1.270723370422665,
|
17838 |
+
"learning_rate": 3.5527271538311205e-05,
|
17839 |
+
"loss": 0.2384,
|
17840 |
+
"num_input_tokens_seen": 66933888,
|
17841 |
+
"step": 10020
|
17842 |
+
},
|
17843 |
+
{
|
17844 |
+
"epoch": 1.228231907139138,
|
17845 |
+
"grad_norm": 1.5856866789234034,
|
17846 |
+
"learning_rate": 3.5478799146825024e-05,
|
17847 |
+
"loss": 0.257,
|
17848 |
+
"num_input_tokens_seen": 66966912,
|
17849 |
+
"step": 10025
|
17850 |
+
},
|
17851 |
+
{
|
17852 |
+
"epoch": 1.2288444458056416,
|
17853 |
+
"grad_norm": 1.0062064527623327,
|
17854 |
+
"learning_rate": 3.543034165535282e-05,
|
17855 |
+
"loss": 0.2248,
|
17856 |
+
"num_input_tokens_seen": 67000680,
|
17857 |
+
"step": 10030
|
17858 |
+
},
|
17859 |
+
{
|
17860 |
+
"epoch": 1.2294569844721448,
|
17861 |
+
"grad_norm": 1.056356299335675,
|
17862 |
+
"learning_rate": 3.538189911361618e-05,
|
17863 |
+
"loss": 0.2604,
|
17864 |
+
"num_input_tokens_seen": 67033808,
|
17865 |
+
"step": 10035
|
17866 |
+
},
|
17867 |
+
{
|
17868 |
+
"epoch": 1.230069523138648,
|
17869 |
+
"grad_norm": 1.1192489100705239,
|
17870 |
+
"learning_rate": 3.5333471571321375e-05,
|
17871 |
+
"loss": 0.1984,
|
17872 |
+
"num_input_tokens_seen": 67067608,
|
17873 |
+
"step": 10040
|
17874 |
+
},
|
17875 |
+
{
|
17876 |
+
"epoch": 1.2306820618051515,
|
17877 |
+
"grad_norm": 1.4785817376378587,
|
17878 |
+
"learning_rate": 3.528505907815925e-05,
|
17879 |
+
"loss": 0.1937,
|
17880 |
+
"num_input_tokens_seen": 67101040,
|
17881 |
+
"step": 10045
|
17882 |
+
},
|
17883 |
+
{
|
17884 |
+
"epoch": 1.2312946004716547,
|
17885 |
+
"grad_norm": 1.0996901038465798,
|
17886 |
+
"learning_rate": 3.523666168380525e-05,
|
17887 |
+
"loss": 0.2419,
|
17888 |
+
"num_input_tokens_seen": 67134040,
|
17889 |
+
"step": 10050
|
17890 |
+
},
|
17891 |
+
{
|
17892 |
+
"epoch": 1.2312946004716547,
|
17893 |
+
"eval_loss": 0.12978222966194153,
|
17894 |
+
"eval_runtime": 19.9168,
|
17895 |
+
"eval_samples_per_second": 3.013,
|
17896 |
+
"eval_steps_per_second": 0.753,
|
17897 |
+
"num_input_tokens_seen": 67134040,
|
17898 |
+
"step": 10050
|
17899 |
}
|
17900 |
],
|
17901 |
"logging_steps": 5,
|
17902 |
"max_steps": 16324,
|
17903 |
+
"num_input_tokens_seen": 67134040,
|
17904 |
"num_train_epochs": 2,
|
17905 |
"save_steps": 50,
|
17906 |
"stateful_callbacks": {
|
|
|
17915 |
"attributes": {}
|
17916 |
}
|
17917 |
},
|
17918 |
+
"total_flos": 4210985530949632.0,
|
17919 |
"train_batch_size": 1,
|
17920 |
"trial_name": null,
|
17921 |
"trial_params": null
|