Training in progress, step 10650, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10650/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10650/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10650/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10650/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10650/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10650/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10650/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10650/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d437bc70c4c04c351d9b268de8b986e9c10960030d11d9b5cc0a07a032d4e75
|
3 |
size 29034840
|
last-checkpoint/global_step10650/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c261636c3df0562dd1476438915bacee16d685655a6be339b6c2fcb39887b82b
|
3 |
+
size 43429616
|
last-checkpoint/global_step10650/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06071e93820617bbbaf8d2ac106d765d8e77239b0921a89abbab4afc88bf4f54
|
3 |
+
size 43429616
|
last-checkpoint/global_step10650/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:959532c115c4ffdb9439d0ddd4b7af71f9bc386acfc816a8011d28cb419036d0
|
3 |
+
size 43429616
|
last-checkpoint/global_step10650/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba42b8d4967ffcb9d5e350e75e85706faf9742570daa843fd8beb4d291c238fb
|
3 |
+
size 43429616
|
last-checkpoint/global_step10650/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:563087516e6fc6458e3ec0687e1eb861035d70ea480ab603abc913afb64230ae
|
3 |
+
size 637299
|
last-checkpoint/global_step10650/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a32e04c4bb15deccc3d759468c493aa07253776f77e1d1d4f931e4d0e7693c08
|
3 |
+
size 637171
|
last-checkpoint/global_step10650/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cb160edf404eef880dca232404353124e40f3b51c2a88e1049faf0c32723eaa
|
3 |
+
size 637171
|
last-checkpoint/global_step10650/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98b605490906905eb8c61c8c35c3fc5fbd32f8ba12f748993ee9098045dbcde9
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10650
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:882705638474934670faaaf11f480a9ca965116088d43b545d3c16f14930bd88
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b49e541bf1b6ee313511bcb33f9c57c56d53d3ca5fc5060ada8ae009ad408c0
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c575f73a7906ffee30747f33b3ff9d606e71678d1c70f907532bff0706c91b26
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad85b3229606911aae4c7db32cce537b0b9493ac5a6f9cb5cde256955cbe0543
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:284555ebd24f2f7c70a3cb7790c9d4c47d92d96814317f7156c53c32553482a5
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18875,11 +18875,100 @@
|
|
18875 |
"eval_steps_per_second": 0.76,
|
18876 |
"num_input_tokens_seen": 70832136,
|
18877 |
"step": 10600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18878 |
}
|
18879 |
],
|
18880 |
"logging_steps": 5,
|
18881 |
"max_steps": 16324,
|
18882 |
-
"num_input_tokens_seen":
|
18883 |
"num_train_epochs": 2,
|
18884 |
"save_steps": 50,
|
18885 |
"stateful_callbacks": {
|
@@ -18894,7 +18983,7 @@
|
|
18894 |
"attributes": {}
|
18895 |
}
|
18896 |
},
|
18897 |
-
"total_flos":
|
18898 |
"train_batch_size": 1,
|
18899 |
"trial_name": null,
|
18900 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.3047992404520534,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10650,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18875 |
"eval_steps_per_second": 0.76,
|
18876 |
"num_input_tokens_seen": 70832136,
|
18877 |
"step": 10600
|
18878 |
+
},
|
18879 |
+
{
|
18880 |
+
"epoch": 1.2992863924535236,
|
18881 |
+
"grad_norm": 1.035231554256891,
|
18882 |
+
"learning_rate": 2.996992509055528e-05,
|
18883 |
+
"loss": 0.218,
|
18884 |
+
"num_input_tokens_seen": 70866120,
|
18885 |
+
"step": 10605
|
18886 |
+
},
|
18887 |
+
{
|
18888 |
+
"epoch": 1.299898931120027,
|
18889 |
+
"grad_norm": 1.501133744865775,
|
18890 |
+
"learning_rate": 2.992352904582717e-05,
|
18891 |
+
"loss": 0.215,
|
18892 |
+
"num_input_tokens_seen": 70899936,
|
18893 |
+
"step": 10610
|
18894 |
+
},
|
18895 |
+
{
|
18896 |
+
"epoch": 1.3005114697865303,
|
18897 |
+
"grad_norm": 1.50291918580223,
|
18898 |
+
"learning_rate": 2.9877153601302893e-05,
|
18899 |
+
"loss": 0.222,
|
18900 |
+
"num_input_tokens_seen": 70933288,
|
18901 |
+
"step": 10615
|
18902 |
+
},
|
18903 |
+
{
|
18904 |
+
"epoch": 1.3011240084530336,
|
18905 |
+
"grad_norm": 1.2133468261955178,
|
18906 |
+
"learning_rate": 2.9830798804567716e-05,
|
18907 |
+
"loss": 0.2343,
|
18908 |
+
"num_input_tokens_seen": 70967296,
|
18909 |
+
"step": 10620
|
18910 |
+
},
|
18911 |
+
{
|
18912 |
+
"epoch": 1.3017365471195368,
|
18913 |
+
"grad_norm": 1.1725568338460373,
|
18914 |
+
"learning_rate": 2.9784464703185666e-05,
|
18915 |
+
"loss": 0.2402,
|
18916 |
+
"num_input_tokens_seen": 71000784,
|
18917 |
+
"step": 10625
|
18918 |
+
},
|
18919 |
+
{
|
18920 |
+
"epoch": 1.3023490857860403,
|
18921 |
+
"grad_norm": 1.0014305965405448,
|
18922 |
+
"learning_rate": 2.973815134469958e-05,
|
18923 |
+
"loss": 0.2326,
|
18924 |
+
"num_input_tokens_seen": 71034616,
|
18925 |
+
"step": 10630
|
18926 |
+
},
|
18927 |
+
{
|
18928 |
+
"epoch": 1.3029616244525435,
|
18929 |
+
"grad_norm": 1.3780511441318963,
|
18930 |
+
"learning_rate": 2.9691858776630965e-05,
|
18931 |
+
"loss": 0.2387,
|
18932 |
+
"num_input_tokens_seen": 71067488,
|
18933 |
+
"step": 10635
|
18934 |
+
},
|
18935 |
+
{
|
18936 |
+
"epoch": 1.303574163119047,
|
18937 |
+
"grad_norm": 1.544176148681233,
|
18938 |
+
"learning_rate": 2.964558704648003e-05,
|
18939 |
+
"loss": 0.2606,
|
18940 |
+
"num_input_tokens_seen": 71100848,
|
18941 |
+
"step": 10640
|
18942 |
+
},
|
18943 |
+
{
|
18944 |
+
"epoch": 1.3041867017855502,
|
18945 |
+
"grad_norm": 1.5606371132464487,
|
18946 |
+
"learning_rate": 2.959933620172559e-05,
|
18947 |
+
"loss": 0.2349,
|
18948 |
+
"num_input_tokens_seen": 71134040,
|
18949 |
+
"step": 10645
|
18950 |
+
},
|
18951 |
+
{
|
18952 |
+
"epoch": 1.3047992404520534,
|
18953 |
+
"grad_norm": 1.3061543164710514,
|
18954 |
+
"learning_rate": 2.9553106289825028e-05,
|
18955 |
+
"loss": 0.2106,
|
18956 |
+
"num_input_tokens_seen": 71167464,
|
18957 |
+
"step": 10650
|
18958 |
+
},
|
18959 |
+
{
|
18960 |
+
"epoch": 1.3047992404520534,
|
18961 |
+
"eval_loss": 0.14994314312934875,
|
18962 |
+
"eval_runtime": 19.5296,
|
18963 |
+
"eval_samples_per_second": 3.072,
|
18964 |
+
"eval_steps_per_second": 0.768,
|
18965 |
+
"num_input_tokens_seen": 71167464,
|
18966 |
+
"step": 10650
|
18967 |
}
|
18968 |
],
|
18969 |
"logging_steps": 5,
|
18970 |
"max_steps": 16324,
|
18971 |
+
"num_input_tokens_seen": 71167464,
|
18972 |
"num_train_epochs": 2,
|
18973 |
"save_steps": 50,
|
18974 |
"stateful_callbacks": {
|
|
|
18983 |
"attributes": {}
|
18984 |
}
|
18985 |
},
|
18986 |
+
"total_flos": 4464001544224768.0,
|
18987 |
"train_batch_size": 1,
|
18988 |
"trial_name": null,
|
18989 |
"trial_params": null
|