Training in progress, step 10200, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10200/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10200/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10200/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10200/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f45d5830c2e09f370b3d63c8ba834333afe356f626320b42c74a5ee1fe9417ef
|
3 |
size 29034840
|
last-checkpoint/global_step10200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d551158e0f34a36fdc3978643e23b675795b3f5f249eb404c0253f98e2484389
|
3 |
+
size 43429616
|
last-checkpoint/global_step10200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671c8a93f9fa8edb54f07e77ecf0657d35765a0357b65e69b53eefc590bb3b45
|
3 |
+
size 43429616
|
last-checkpoint/global_step10200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa7c38e62f627acf80f1e3c5041d7f1d477a739cd07d8687e25a2b2bcb5c42a8
|
3 |
+
size 43429616
|
last-checkpoint/global_step10200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7c71e3b6ba799372447ad2fcdcd0e8071089c6f3a7ad0d054bc6998603cb99f
|
3 |
+
size 43429616
|
last-checkpoint/global_step10200/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc15bc3b9e4151e842fc8099a51ffad55774d012eb69d52b4dab5fbf44ccb439
|
3 |
+
size 637299
|
last-checkpoint/global_step10200/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b357ea23b3f31e27e36a4ee5d5f694402e55c2970c38783b007bbe395a7ed3d7
|
3 |
+
size 637171
|
last-checkpoint/global_step10200/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a925f7de099825ce5aad1b7c9a92b2816a8e59dd1c239e704eaafae0545f0143
|
3 |
+
size 637171
|
last-checkpoint/global_step10200/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f3370483c49ceb4590bfeec7265de1cfc11e7cb342c52e89492acaf462dc3cc
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10200
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81c742bef4e9bd4396f69ec57fa3bd186faf61932dc3a3e366435e42756967d6
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a68921b2f8e05186da263a9a77a4b36025ed43aca0639d90191be06bdef3b767
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c2efaab0dbf0012b03c39da2b9cc6388d50975e301c40d4015caf7f591c7d11
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea9174784682317bb501948a41f1be2d5af58713ba3a3b62566dbb9e9f137c6b
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50a3e5820ba1c22bd9113df8ca1abded61b49b208d64e1c8f74ef02411fd9798
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18074,11 +18074,100 @@
|
|
18074 |
"eval_steps_per_second": 0.784,
|
18075 |
"num_input_tokens_seen": 67807536,
|
18076 |
"step": 10150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18077 |
}
|
18078 |
],
|
18079 |
"logging_steps": 5,
|
18080 |
"max_steps": 16324,
|
18081 |
-
"num_input_tokens_seen":
|
18082 |
"num_train_epochs": 2,
|
18083 |
"save_steps": 50,
|
18084 |
"stateful_callbacks": {
|
@@ -18093,7 +18182,7 @@
|
|
18093 |
"attributes": {}
|
18094 |
}
|
18095 |
},
|
18096 |
-
"total_flos":
|
18097 |
"train_batch_size": 1,
|
18098 |
"trial_name": null,
|
18099 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.2496707604667545,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18074 |
"eval_steps_per_second": 0.784,
|
18075 |
"num_input_tokens_seen": 67807536,
|
18076 |
"step": 10150
|
18077 |
+
},
|
18078 |
+
{
|
18079 |
+
"epoch": 1.2441579124682245,
|
18080 |
+
"grad_norm": 1.380027226146845,
|
18081 |
+
"learning_rate": 3.4223892035435414e-05,
|
18082 |
+
"loss": 0.2761,
|
18083 |
+
"num_input_tokens_seen": 67840480,
|
18084 |
+
"step": 10155
|
18085 |
+
},
|
18086 |
+
{
|
18087 |
+
"epoch": 1.244770451134728,
|
18088 |
+
"grad_norm": 0.9654065219994176,
|
18089 |
+
"learning_rate": 3.4175839350844576e-05,
|
18090 |
+
"loss": 0.2171,
|
18091 |
+
"num_input_tokens_seen": 67873680,
|
18092 |
+
"step": 10160
|
18093 |
+
},
|
18094 |
+
{
|
18095 |
+
"epoch": 1.2453829898012312,
|
18096 |
+
"grad_norm": 0.9787005093120792,
|
18097 |
+
"learning_rate": 3.41278029032177e-05,
|
18098 |
+
"loss": 0.2261,
|
18099 |
+
"num_input_tokens_seen": 67907360,
|
18100 |
+
"step": 10165
|
18101 |
+
},
|
18102 |
+
{
|
18103 |
+
"epoch": 1.2459955284677346,
|
18104 |
+
"grad_norm": 1.3834394549082887,
|
18105 |
+
"learning_rate": 3.407978274184435e-05,
|
18106 |
+
"loss": 0.2116,
|
18107 |
+
"num_input_tokens_seen": 67941136,
|
18108 |
+
"step": 10170
|
18109 |
+
},
|
18110 |
+
{
|
18111 |
+
"epoch": 1.2466080671342379,
|
18112 |
+
"grad_norm": 0.8379514180119303,
|
18113 |
+
"learning_rate": 3.4031778915997375e-05,
|
18114 |
+
"loss": 0.1925,
|
18115 |
+
"num_input_tokens_seen": 67975016,
|
18116 |
+
"step": 10175
|
18117 |
+
},
|
18118 |
+
{
|
18119 |
+
"epoch": 1.2472206058007411,
|
18120 |
+
"grad_norm": 1.1872491287630014,
|
18121 |
+
"learning_rate": 3.3983791474932864e-05,
|
18122 |
+
"loss": 0.194,
|
18123 |
+
"num_input_tokens_seen": 68009336,
|
18124 |
+
"step": 10180
|
18125 |
+
},
|
18126 |
+
{
|
18127 |
+
"epoch": 1.2478331444672446,
|
18128 |
+
"grad_norm": 1.384942926349532,
|
18129 |
+
"learning_rate": 3.393582046789012e-05,
|
18130 |
+
"loss": 0.243,
|
18131 |
+
"num_input_tokens_seen": 68042592,
|
18132 |
+
"step": 10185
|
18133 |
+
},
|
18134 |
+
{
|
18135 |
+
"epoch": 1.2484456831337478,
|
18136 |
+
"grad_norm": 0.9933902622789962,
|
18137 |
+
"learning_rate": 3.388786594409155e-05,
|
18138 |
+
"loss": 0.2092,
|
18139 |
+
"num_input_tokens_seen": 68076744,
|
18140 |
+
"step": 10190
|
18141 |
+
},
|
18142 |
+
{
|
18143 |
+
"epoch": 1.249058221800251,
|
18144 |
+
"grad_norm": 1.303521595759064,
|
18145 |
+
"learning_rate": 3.383992795274268e-05,
|
18146 |
+
"loss": 0.2521,
|
18147 |
+
"num_input_tokens_seen": 68110272,
|
18148 |
+
"step": 10195
|
18149 |
+
},
|
18150 |
+
{
|
18151 |
+
"epoch": 1.2496707604667545,
|
18152 |
+
"grad_norm": 1.2843223741616177,
|
18153 |
+
"learning_rate": 3.379200654303204e-05,
|
18154 |
+
"loss": 0.2047,
|
18155 |
+
"num_input_tokens_seen": 68143912,
|
18156 |
+
"step": 10200
|
18157 |
+
},
|
18158 |
+
{
|
18159 |
+
"epoch": 1.2496707604667545,
|
18160 |
+
"eval_loss": 0.16254977881908417,
|
18161 |
+
"eval_runtime": 19.3699,
|
18162 |
+
"eval_samples_per_second": 3.098,
|
18163 |
+
"eval_steps_per_second": 0.774,
|
18164 |
+
"num_input_tokens_seen": 68143912,
|
18165 |
+
"step": 10200
|
18166 |
}
|
18167 |
],
|
18168 |
"logging_steps": 5,
|
18169 |
"max_steps": 16324,
|
18170 |
+
"num_input_tokens_seen": 68143912,
|
18171 |
"num_train_epochs": 2,
|
18172 |
"save_steps": 50,
|
18173 |
"stateful_callbacks": {
|
|
|
18182 |
"attributes": {}
|
18183 |
}
|
18184 |
},
|
18185 |
+
"total_flos": 4274338021900288.0,
|
18186 |
"train_batch_size": 1,
|
18187 |
"trial_name": null,
|
18188 |
"trial_params": null
|