Training in progress, step 10900, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10900/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10900/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10900/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10900/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ff0736f0881487b8244d2133fef7cb88a6ac691a37848cb82a476493d6efeee
|
3 |
size 29034840
|
last-checkpoint/global_step10900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:332ff7240f2b8d7446bd7867e86e17e8d14f45c08f046dbe455a2347bafd8922
|
3 |
+
size 43429616
|
last-checkpoint/global_step10900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce32ae173846d823cf9feb02140f23b9acd9b4d70c4a8d4cb30d6434b1dc7398
|
3 |
+
size 43429616
|
last-checkpoint/global_step10900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74d4c9bf4c2f57a3fc2520f996a4427cfccbbc1b1adb6c9d799360e51b5215c3
|
3 |
+
size 43429616
|
last-checkpoint/global_step10900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4838b04cd027a8ae8be16d4ccfc4cc0a27397a3cf67b764ebc10f91149e8a7d
|
3 |
+
size 43429616
|
last-checkpoint/global_step10900/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:067c5d08b56bda1ed8f453d34b0eec21c29d29346967d3b105f39e2968e35ccf
|
3 |
+
size 637299
|
last-checkpoint/global_step10900/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:156b88936bb89f49e54d5cac5ce0640b90f00607787ccf1f90953cb9bb311246
|
3 |
+
size 637171
|
last-checkpoint/global_step10900/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15f57c00d3d0d7f4e3be04b98b5c779e7c3595e8031cab176dbb35667d0101c2
|
3 |
+
size 637171
|
last-checkpoint/global_step10900/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac44236a809ddcff98bf0ef937d14a7257fdfe4b119c5dfd9151e17b84a84367
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10900
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9d3a6160d0905edda4f5d6565a1e909b7492e9250558ddb952664b28cf76f6f
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58e47c675ebf1b1688491bf8baefbb380531c820f51050fe07d1c2836f640595
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:126cc2b8ba359be76c7c08b8c5b404d8759c46dce0b0fe23b4ab7fa6565d3ddb
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a0801e4fd6f4317ec8793f2b6303686ab6868ed680c95a065417ff9ee1ac54a
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:151dcdc36f0c528cf744fa0ce59d21db50650332df291ef322548af3dd56274c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -19320,11 +19320,100 @@
|
|
19320 |
"eval_steps_per_second": 0.75,
|
19321 |
"num_input_tokens_seen": 72511424,
|
19322 |
"step": 10850
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19323 |
}
|
19324 |
],
|
19325 |
"logging_steps": 5,
|
19326 |
"max_steps": 16324,
|
19327 |
-
"num_input_tokens_seen":
|
19328 |
"num_train_epochs": 2,
|
19329 |
"save_steps": 50,
|
19330 |
"stateful_callbacks": {
|
@@ -19339,7 +19428,7 @@
|
|
19339 |
"attributes": {}
|
19340 |
}
|
19341 |
},
|
19342 |
-
"total_flos":
|
19343 |
"train_batch_size": 1,
|
19344 |
"trial_name": null,
|
19345 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.3354261737772197,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
19320 |
"eval_steps_per_second": 0.75,
|
19321 |
"num_input_tokens_seen": 72511424,
|
19322 |
"step": 10850
|
19323 |
+
},
|
19324 |
+
{
|
19325 |
+
"epoch": 1.3299133257786897,
|
19326 |
+
"grad_norm": 0.9400960640670121,
|
19327 |
+
"learning_rate": 2.767628579220536e-05,
|
19328 |
+
"loss": 0.2181,
|
19329 |
+
"num_input_tokens_seen": 72545080,
|
19330 |
+
"step": 10855
|
19331 |
+
},
|
19332 |
+
{
|
19333 |
+
"epoch": 1.3305258644451932,
|
19334 |
+
"grad_norm": 0.9574102034561467,
|
19335 |
+
"learning_rate": 2.7630977623137244e-05,
|
19336 |
+
"loss": 0.2648,
|
19337 |
+
"num_input_tokens_seen": 72578272,
|
19338 |
+
"step": 10860
|
19339 |
+
},
|
19340 |
+
{
|
19341 |
+
"epoch": 1.3311384031116964,
|
19342 |
+
"grad_norm": 1.105259832070157,
|
19343 |
+
"learning_rate": 2.758569240662998e-05,
|
19344 |
+
"loss": 0.1966,
|
19345 |
+
"num_input_tokens_seen": 72612536,
|
19346 |
+
"step": 10865
|
19347 |
+
},
|
19348 |
+
{
|
19349 |
+
"epoch": 1.3317509417781999,
|
19350 |
+
"grad_norm": 1.2001173134882535,
|
19351 |
+
"learning_rate": 2.7540430189150098e-05,
|
19352 |
+
"loss": 0.247,
|
19353 |
+
"num_input_tokens_seen": 72645920,
|
19354 |
+
"step": 10870
|
19355 |
+
},
|
19356 |
+
{
|
19357 |
+
"epoch": 1.332363480444703,
|
19358 |
+
"grad_norm": 1.3396654675470272,
|
19359 |
+
"learning_rate": 2.7495191017140582e-05,
|
19360 |
+
"loss": 0.1914,
|
19361 |
+
"num_input_tokens_seen": 72680416,
|
19362 |
+
"step": 10875
|
19363 |
+
},
|
19364 |
+
{
|
19365 |
+
"epoch": 1.3329760191112063,
|
19366 |
+
"grad_norm": 1.373286103807185,
|
19367 |
+
"learning_rate": 2.7449974937020778e-05,
|
19368 |
+
"loss": 0.2471,
|
19369 |
+
"num_input_tokens_seen": 72714560,
|
19370 |
+
"step": 10880
|
19371 |
+
},
|
19372 |
+
{
|
19373 |
+
"epoch": 1.3335885577777098,
|
19374 |
+
"grad_norm": 11.397219776191838,
|
19375 |
+
"learning_rate": 2.7404781995186267e-05,
|
19376 |
+
"loss": 0.231,
|
19377 |
+
"num_input_tokens_seen": 72748336,
|
19378 |
+
"step": 10885
|
19379 |
+
},
|
19380 |
+
{
|
19381 |
+
"epoch": 1.334201096444213,
|
19382 |
+
"grad_norm": 1.6874289712995845,
|
19383 |
+
"learning_rate": 2.7359612238008968e-05,
|
19384 |
+
"loss": 0.2507,
|
19385 |
+
"num_input_tokens_seen": 72782040,
|
19386 |
+
"step": 10890
|
19387 |
+
},
|
19388 |
+
{
|
19389 |
+
"epoch": 1.3348136351107165,
|
19390 |
+
"grad_norm": 1.0849549796943527,
|
19391 |
+
"learning_rate": 2.7314465711836967e-05,
|
19392 |
+
"loss": 0.2298,
|
19393 |
+
"num_input_tokens_seen": 72815992,
|
19394 |
+
"step": 10895
|
19395 |
+
},
|
19396 |
+
{
|
19397 |
+
"epoch": 1.3354261737772197,
|
19398 |
+
"grad_norm": 0.9600258760416324,
|
19399 |
+
"learning_rate": 2.7269342462994563e-05,
|
19400 |
+
"loss": 0.197,
|
19401 |
+
"num_input_tokens_seen": 72850512,
|
19402 |
+
"step": 10900
|
19403 |
+
},
|
19404 |
+
{
|
19405 |
+
"epoch": 1.3354261737772197,
|
19406 |
+
"eval_loss": 0.22737418115139008,
|
19407 |
+
"eval_runtime": 19.6079,
|
19408 |
+
"eval_samples_per_second": 3.06,
|
19409 |
+
"eval_steps_per_second": 0.765,
|
19410 |
+
"num_input_tokens_seen": 72850512,
|
19411 |
+
"step": 10900
|
19412 |
}
|
19413 |
],
|
19414 |
"logging_steps": 5,
|
19415 |
"max_steps": 16324,
|
19416 |
+
"num_input_tokens_seen": 72850512,
|
19417 |
"num_train_epochs": 2,
|
19418 |
"save_steps": 50,
|
19419 |
"stateful_callbacks": {
|
|
|
19428 |
"attributes": {}
|
19429 |
}
|
19430 |
},
|
19431 |
+
"total_flos": 4569494991798272.0,
|
19432 |
"train_batch_size": 1,
|
19433 |
"trial_name": null,
|
19434 |
"trial_params": null
|