Training in progress, step 12000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12000/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step12000/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step12000/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step12000/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1082457bebc6ec3cbfedcbeb51773ce1a8a48f6301c6edb5b36375b688353dc
|
3 |
size 29034840
|
last-checkpoint/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcd282575201abb845e4547aa3218e69dd0c82b53701da19395500b137e88b00
|
3 |
+
size 43429616
|
last-checkpoint/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36557aa02ff68fab17b6a497fbebdca74a729931d59a7cbb7d78e0485332f24d
|
3 |
+
size 43429616
|
last-checkpoint/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a025172d4c6e03423cb2806bf3d84611cc408dec31a2c8c578ae9eeb8313e9f
|
3 |
+
size 43429616
|
last-checkpoint/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac1c7e94b722fceac222a804f4789b4797c7c139ed8b4dd16266ab1660e27748
|
3 |
+
size 43429616
|
last-checkpoint/global_step12000/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2e78ae8e5dbac3adf4beb071f4e1f3ba95f56e4301c5060b0f4993f68bc38cf
|
3 |
+
size 637299
|
last-checkpoint/global_step12000/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7d40c8d91d68c0f848ffa845291bbf02e099fddbddbbbcca3576b2f9f3eff14
|
3 |
+
size 637171
|
last-checkpoint/global_step12000/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a13e1124a23a85eeca805e121f8b25ac3b3993a36c3ef49d4f4a33c8eae3d44
|
3 |
+
size 637171
|
last-checkpoint/global_step12000/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2736c1b3e5de618714277113984c2240d97bc552e624c19701ab8bf642636cd
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step12000
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c19f1aa2e9d61d357b6204eae1219eaf78e21bc8b326b4ae45539d10d19b552e
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dddbc25c1e4accbb8ee3e8c21c61d1a8d47c2223f777e10f16a31d9fe3716bd1
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44dfda6d0cf493b706b65c41a9191234d5faf1a15068d781e2015b936fbf1040
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e52156225dccd5afb40ba3b9bf5ef7185d674a38c6e5cda245b57dc54a65ecdf
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae47c8c40a05ad370051cd694712264f349dd136bc69f50e79d1844afc8bc9b4
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -21278,11 +21278,100 @@
|
|
21278 |
"eval_steps_per_second": 0.772,
|
21279 |
"num_input_tokens_seen": 79898064,
|
21280 |
"step": 11950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21281 |
}
|
21282 |
],
|
21283 |
"logging_steps": 5,
|
21284 |
"max_steps": 16324,
|
21285 |
-
"num_input_tokens_seen":
|
21286 |
"num_train_epochs": 2,
|
21287 |
"save_steps": 50,
|
21288 |
"stateful_callbacks": {
|
@@ -21297,7 +21386,7 @@
|
|
21297 |
"attributes": {}
|
21298 |
}
|
21299 |
},
|
21300 |
-
"total_flos":
|
21301 |
"train_batch_size": 1,
|
21302 |
"trial_name": null,
|
21303 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.4701846804079508,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 12000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
21278 |
"eval_steps_per_second": 0.772,
|
21279 |
"num_input_tokens_seen": 79898064,
|
21280 |
"step": 11950
|
21281 |
+
},
|
21282 |
+
{
|
21283 |
+
"epoch": 1.4646718324094208,
|
21284 |
+
"grad_norm": 1.427037338582067,
|
21285 |
+
"learning_rate": 1.8340324528853237e-05,
|
21286 |
+
"loss": 0.2375,
|
21287 |
+
"num_input_tokens_seen": 79931680,
|
21288 |
+
"step": 11955
|
21289 |
+
},
|
21290 |
+
{
|
21291 |
+
"epoch": 1.4652843710759242,
|
21292 |
+
"grad_norm": 1.2338777567554464,
|
21293 |
+
"learning_rate": 1.830113957279535e-05,
|
21294 |
+
"loss": 0.2129,
|
21295 |
+
"num_input_tokens_seen": 79966176,
|
21296 |
+
"step": 11960
|
21297 |
+
},
|
21298 |
+
{
|
21299 |
+
"epoch": 1.4658969097424275,
|
21300 |
+
"grad_norm": 1.4924198149687233,
|
21301 |
+
"learning_rate": 1.8261987142522906e-05,
|
21302 |
+
"loss": 0.2137,
|
21303 |
+
"num_input_tokens_seen": 80000440,
|
21304 |
+
"step": 11965
|
21305 |
+
},
|
21306 |
+
{
|
21307 |
+
"epoch": 1.4665094484089307,
|
21308 |
+
"grad_norm": 1.113112713151578,
|
21309 |
+
"learning_rate": 1.8222867278209714e-05,
|
21310 |
+
"loss": 0.1713,
|
21311 |
+
"num_input_tokens_seen": 80034544,
|
21312 |
+
"step": 11970
|
21313 |
+
},
|
21314 |
+
{
|
21315 |
+
"epoch": 1.4671219870754342,
|
21316 |
+
"grad_norm": 1.2727009402893619,
|
21317 |
+
"learning_rate": 1.8183780019996123e-05,
|
21318 |
+
"loss": 0.1994,
|
21319 |
+
"num_input_tokens_seen": 80068616,
|
21320 |
+
"step": 11975
|
21321 |
+
},
|
21322 |
+
{
|
21323 |
+
"epoch": 1.4677345257419374,
|
21324 |
+
"grad_norm": 1.3941438496106238,
|
21325 |
+
"learning_rate": 1.814472540798906e-05,
|
21326 |
+
"loss": 0.2176,
|
21327 |
+
"num_input_tokens_seen": 80102032,
|
21328 |
+
"step": 11980
|
21329 |
+
},
|
21330 |
+
{
|
21331 |
+
"epoch": 1.4683470644084409,
|
21332 |
+
"grad_norm": 1.682655836854595,
|
21333 |
+
"learning_rate": 1.8105703482261983e-05,
|
21334 |
+
"loss": 0.2363,
|
21335 |
+
"num_input_tokens_seen": 80136208,
|
21336 |
+
"step": 11985
|
21337 |
+
},
|
21338 |
+
{
|
21339 |
+
"epoch": 1.468959603074944,
|
21340 |
+
"grad_norm": 1.5733409771340396,
|
21341 |
+
"learning_rate": 1.806671428285474e-05,
|
21342 |
+
"loss": 0.2082,
|
21343 |
+
"num_input_tokens_seen": 80170208,
|
21344 |
+
"step": 11990
|
21345 |
+
},
|
21346 |
+
{
|
21347 |
+
"epoch": 1.4695721417414473,
|
21348 |
+
"grad_norm": 0.9718487815537233,
|
21349 |
+
"learning_rate": 1.8027757849773657e-05,
|
21350 |
+
"loss": 0.1776,
|
21351 |
+
"num_input_tokens_seen": 80205136,
|
21352 |
+
"step": 11995
|
21353 |
+
},
|
21354 |
+
{
|
21355 |
+
"epoch": 1.4701846804079508,
|
21356 |
+
"grad_norm": 41.69567000042487,
|
21357 |
+
"learning_rate": 1.798883422299143e-05,
|
21358 |
+
"loss": 0.2189,
|
21359 |
+
"num_input_tokens_seen": 80238176,
|
21360 |
+
"step": 12000
|
21361 |
+
},
|
21362 |
+
{
|
21363 |
+
"epoch": 1.4701846804079508,
|
21364 |
+
"eval_loss": 0.07542052865028381,
|
21365 |
+
"eval_runtime": 19.2561,
|
21366 |
+
"eval_samples_per_second": 3.116,
|
21367 |
+
"eval_steps_per_second": 0.779,
|
21368 |
+
"num_input_tokens_seen": 80238176,
|
21369 |
+
"step": 12000
|
21370 |
}
|
21371 |
],
|
21372 |
"logging_steps": 5,
|
21373 |
"max_steps": 16324,
|
21374 |
+
"num_input_tokens_seen": 80238176,
|
21375 |
"num_train_epochs": 2,
|
21376 |
"save_steps": 50,
|
21377 |
"stateful_callbacks": {
|
|
|
21386 |
"attributes": {}
|
21387 |
}
|
21388 |
},
|
21389 |
+
"total_flos": 5032876634275840.0,
|
21390 |
"train_batch_size": 1,
|
21391 |
"trial_name": null,
|
21392 |
"trial_params": null
|