Training in progress, step 10100, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10100/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10100/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10100/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10100/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e79db939395753141bfe875b738ef82cca9d45d7bcfdfddd95fe7c15504a2484
|
3 |
size 29034840
|
last-checkpoint/global_step10100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9223046293624a56244d529257e7407c6d36f8148a92b17f95f2c78c966f82a3
|
3 |
+
size 43429616
|
last-checkpoint/global_step10100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f1190c45107fc5fff28e5985b4fe42d3974a2a3b97ed76c2b6fbffb9bd464f9
|
3 |
+
size 43429616
|
last-checkpoint/global_step10100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00a6c9ae557379eadc72dae87a8b1c9f4b07b463c13c68402067b35f8f6fb723
|
3 |
+
size 43429616
|
last-checkpoint/global_step10100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45a79a6482597fa34aeb6e22a50bd2a625b05cbda1248d9d431c49ff2858a009
|
3 |
+
size 43429616
|
last-checkpoint/global_step10100/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a479968d1f02d603be5427645d2a84cd402b2e7984404dc0311f5a520ccc7afa
|
3 |
+
size 637299
|
last-checkpoint/global_step10100/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dee17f1a543d69c089e320b92706057ef8076de6b3533317c888eac6ee861a0
|
3 |
+
size 637171
|
last-checkpoint/global_step10100/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:233fd5e0cef08eb151b19ccde5ffae7cce896dfe8ac28ecad491494efb1d2d1f
|
3 |
+
size 637171
|
last-checkpoint/global_step10100/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:791be335648ac706646da7407a6c416cf1e7a04f57989605c056b3cec90b241a
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10100
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a0a5c8c7d25a319fd50aab320820c49b23d25eda1fb3bf644952fdcb35df10c
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d05960ea8920915bac2c24df21232e8d63ea0962959ab1bded77448f7c82743a
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db279daa0fca9102a6ff0177599f2c335a51d72a070de4249cc06ee79b379358
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0038ab880e16b83b70c8c8f774126e510d3953b05bcb4f1c08d33cdc03f1d99
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:605c43263e93e15eea18711fe7654abd11378f9d72f92eb1ebe17e1ac6d71f02
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17896,11 +17896,100 @@
|
|
17896 |
"eval_steps_per_second": 0.753,
|
17897 |
"num_input_tokens_seen": 67134040,
|
17898 |
"step": 10050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17899 |
}
|
17900 |
],
|
17901 |
"logging_steps": 5,
|
17902 |
"max_steps": 16324,
|
17903 |
-
"num_input_tokens_seen":
|
17904 |
"num_train_epochs": 2,
|
17905 |
"save_steps": 50,
|
17906 |
"stateful_callbacks": {
|
@@ -17915,7 +18004,7 @@
|
|
17915 |
"attributes": {}
|
17916 |
}
|
17917 |
},
|
17918 |
-
"total_flos":
|
17919 |
"train_batch_size": 1,
|
17920 |
"trial_name": null,
|
17921 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.237419987136688,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17896 |
"eval_steps_per_second": 0.753,
|
17897 |
"num_input_tokens_seen": 67134040,
|
17898 |
"step": 10050
|
17899 |
+
},
|
17900 |
+
{
|
17901 |
+
"epoch": 1.2319071391381582,
|
17902 |
+
"grad_norm": 1.2139171380445146,
|
17903 |
+
"learning_rate": 3.51882794379193e-05,
|
17904 |
+
"loss": 0.2511,
|
17905 |
+
"num_input_tokens_seen": 67167360,
|
17906 |
+
"step": 10055
|
17907 |
+
},
|
17908 |
+
{
|
17909 |
+
"epoch": 1.2325196778046614,
|
17910 |
+
"grad_norm": 1.289528857461294,
|
17911 |
+
"learning_rate": 3.513991239014579e-05,
|
17912 |
+
"loss": 0.2128,
|
17913 |
+
"num_input_tokens_seen": 67201216,
|
17914 |
+
"step": 10060
|
17915 |
+
},
|
17916 |
+
{
|
17917 |
+
"epoch": 1.2331322164711647,
|
17918 |
+
"grad_norm": 1.4892651263348418,
|
17919 |
+
"learning_rate": 3.509156059011352e-05,
|
17920 |
+
"loss": 0.2481,
|
17921 |
+
"num_input_tokens_seen": 67234824,
|
17922 |
+
"step": 10065
|
17923 |
+
},
|
17924 |
+
{
|
17925 |
+
"epoch": 1.2337447551376681,
|
17926 |
+
"grad_norm": 1.354493452944373,
|
17927 |
+
"learning_rate": 3.504322408743562e-05,
|
17928 |
+
"loss": 0.2275,
|
17929 |
+
"num_input_tokens_seen": 67268160,
|
17930 |
+
"step": 10070
|
17931 |
+
},
|
17932 |
+
{
|
17933 |
+
"epoch": 1.2343572938041714,
|
17934 |
+
"grad_norm": 1.1377962133780994,
|
17935 |
+
"learning_rate": 3.499490293170956e-05,
|
17936 |
+
"loss": 0.2403,
|
17937 |
+
"num_input_tokens_seen": 67301776,
|
17938 |
+
"step": 10075
|
17939 |
+
},
|
17940 |
+
{
|
17941 |
+
"epoch": 1.2349698324706746,
|
17942 |
+
"grad_norm": 1.3213479082546846,
|
17943 |
+
"learning_rate": 3.494659717251704e-05,
|
17944 |
+
"loss": 0.2145,
|
17945 |
+
"num_input_tokens_seen": 67336184,
|
17946 |
+
"step": 10080
|
17947 |
+
},
|
17948 |
+
{
|
17949 |
+
"epoch": 1.235582371137178,
|
17950 |
+
"grad_norm": 1.0822056854819904,
|
17951 |
+
"learning_rate": 3.489830685942397e-05,
|
17952 |
+
"loss": 0.2269,
|
17953 |
+
"num_input_tokens_seen": 67370240,
|
17954 |
+
"step": 10085
|
17955 |
+
},
|
17956 |
+
{
|
17957 |
+
"epoch": 1.2361949098036813,
|
17958 |
+
"grad_norm": 1.095493880410798,
|
17959 |
+
"learning_rate": 3.485003204198041e-05,
|
17960 |
+
"loss": 0.2157,
|
17961 |
+
"num_input_tokens_seen": 67404432,
|
17962 |
+
"step": 10090
|
17963 |
+
},
|
17964 |
+
{
|
17965 |
+
"epoch": 1.2368074484701848,
|
17966 |
+
"grad_norm": 1.4155671248879054,
|
17967 |
+
"learning_rate": 3.480177276972051e-05,
|
17968 |
+
"loss": 0.2186,
|
17969 |
+
"num_input_tokens_seen": 67437912,
|
17970 |
+
"step": 10095
|
17971 |
+
},
|
17972 |
+
{
|
17973 |
+
"epoch": 1.237419987136688,
|
17974 |
+
"grad_norm": 1.1597648700568828,
|
17975 |
+
"learning_rate": 3.475352909216246e-05,
|
17976 |
+
"loss": 0.2574,
|
17977 |
+
"num_input_tokens_seen": 67471000,
|
17978 |
+
"step": 10100
|
17979 |
+
},
|
17980 |
+
{
|
17981 |
+
"epoch": 1.237419987136688,
|
17982 |
+
"eval_loss": 0.21491877734661102,
|
17983 |
+
"eval_runtime": 19.4089,
|
17984 |
+
"eval_samples_per_second": 3.091,
|
17985 |
+
"eval_steps_per_second": 0.773,
|
17986 |
+
"num_input_tokens_seen": 67471000,
|
17987 |
+
"step": 10100
|
17988 |
}
|
17989 |
],
|
17990 |
"logging_steps": 5,
|
17991 |
"max_steps": 16324,
|
17992 |
+
"num_input_tokens_seen": 67471000,
|
17993 |
"num_train_epochs": 2,
|
17994 |
"save_steps": 50,
|
17995 |
"stateful_callbacks": {
|
|
|
18004 |
"attributes": {}
|
18005 |
}
|
18006 |
},
|
18007 |
+
"total_flos": 4232111462940672.0,
|
18008 |
"train_batch_size": 1,
|
18009 |
"trial_name": null,
|
18010 |
"trial_params": null
|