Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:601088f397eeb3909ce8e91cfb116fca6b84fc18d36265a3741a8a6c06aa0205
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:972fef381446dc54bb81002f849cea881ea6b4efd9f7415f7c2c1373b9b5bd66
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf60673445bdd44111c8e6ab141a0477e485f5bd2e6214db12f6a1de7ac3366
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da479f80097fdef6726424fff54e48c3d5a490f482fdb9070c5d237a52e37e8c
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2a41fb606f45f68ffb68b8c0c9c8f10c507bf08713651dd8823b5a65e17b4f6
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b79245c1084550d87dc88caeb54be1fb2882af094eafe31cfc3c2fc3d8ef9cb
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:669c5d9f2adedf284327718bb5ef779557cb3689d3eb72257288b6ab968d6d4f
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b04fa35c89561db794f4a8fcdf7083eef3e4aef17ee8943e20c2188189708ca6
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 1.3103408813476562,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0898,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 3.532261371612549,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.535,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 3.486604928970337,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.4653,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 6.665052890777588,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.5876,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 2.4172110557556152,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.5611,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 0.8320339918136597,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.088,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 4.306198596954346,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.498,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 2.505366802215576,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.2767,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 4.672132968902588,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.3158,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 3.8136868476867676,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.8975,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 0.8911241888999939,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1162,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 6.382510185241699,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.6136,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 5.372596740722656,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.363,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 4.140794277191162,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4008,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 8.768181800842285,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.9195,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.8078761100769043,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3291,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 6.264554023742676,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.767,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 11.220328330993652,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 1.473,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 3.9702634811401367,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3766,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 2.120680332183838,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3277,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 6.803924560546875,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.4957,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 2.5803332328796387,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.1319,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 9.29668140411377,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 1.9043,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 3.864070415496826,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.515,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.14567677676677704,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3086,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 2.552821159362793,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.693,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 7.949587345123291,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.5568,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 1.4345118999481201,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.617,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 4.910844802856445,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.1581,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.9731581211090088,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.3823,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 10.148768424987793,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 1.3181,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 4.756891250610352,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.446,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 2.3829259872436523,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.1589,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 2.3166489601135254,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.8197,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 1.1867741346359253,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.0674,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 11.073589324951172,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 2.0834,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 2.9917004108428955,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.7352,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.03345826640725136,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.1388,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.6753065586090088,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.1295,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 2.0468225479125977,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.2035,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.6748114228248596,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.074,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 2.4343173503875732,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.1473,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.3457399904727936,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.0603,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.3236846923828125,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.3552,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.8542829751968384,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.0992,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 1.0302435159683228,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.1318,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 6.573387622833252,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.1148,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 7.864552974700928,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.3684,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 0.19719119369983673,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.301,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 0.3533047139644623,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.0359,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2053446886752256.0,
|
365 |
+
"train_loss": 0.5710413241386414,
|
366 |
+
"train_runtime": 100.1311,
|
367 |
+
"train_samples_per_second": 3.995,
|
368 |
+
"train_steps_per_second": 0.999
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2053446886752256.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:515801884b96a34fbe54560143329ce06d512f7bc9f748e41058588403e77c22
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bbc94f43e777c70d6db0593944717c276f0797216f959f3afc34de5433ab17d
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f33db7dfc952f99bdfd6f5ca79752d67f0bd782aedc6f71bc47157491890465b
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:152bd53476880a0673ea49289cc36843973ad9b0b0f54cf45bf1fae800f555c6
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c61bb6099a0787cce3325f6a28e587efc290398fb146f4d2404e4e1037d5e73a
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c86fea7f6a5f529d490af7ffa0540adb01ce00513d4cf95cdbb2fd4bcfc187f
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0440ab6c1705f26b96e4fb57a69009a2be64bf2be9750ec3be57311aafa7860
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98ba3961e9695875a3f1f66ffaf6fe5b528a8f0031ab86107cf824c6c890770f
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 4.053181171417236,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3885,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 0.0796952024102211,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0052,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 3.2136785984039307,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.2577,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 0.09704623371362686,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0057,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 0.2581525146961212,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0124,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 0.7397000789642334,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0302,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 1.1551579236984253,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0662,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 3.3530216217041016,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4311,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 0.0396430566906929,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0086,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 0.048732295632362366,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0033,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 1.054931640625,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0521,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 1.3746412992477417,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0877,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 3.6224493980407715,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3346,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 0.21258927881717682,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0154,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 0.0053559038788080215,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.5832,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 2.0714592933654785,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.1197,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 0.034640390425920486,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0017,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 0.035930756479501724,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0035,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 3.9088540077209473,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1674,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 0.10309179872274399,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0052,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 0.11337645351886749,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0074,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 0.006353128235787153,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.006,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 0.11211053282022476,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0061,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 0.7047258615493774,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0202,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.10752872377634048,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0068,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 11.707930564880371,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 1.1686,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 1.2754460573196411,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.0411,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 0.32534557580947876,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.0346,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 0.7839933633804321,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.169,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.016284512355923653,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.0085,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 0.01285564061254263,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.0037,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 0.017023563385009766,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.0031,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 0.5894005298614502,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.0344,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 2.5769262313842773,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.2399,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 0.11012467741966248,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.0076,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.2998049855232239,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.0185,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.9940203428268433,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.0655,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.38424286246299744,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.0414,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 0.009840243496000767,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.0233,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 0.0006893413374200463,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.401,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.08065320551395416,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.0393,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.3744912147521973,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.0766,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.011814103461802006,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.0015,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.8807648420333862,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.0752,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.019948307424783707,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.0012,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 0.02643703483045101,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.0024,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 0.035402942448854446,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.0249,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.0117949815467,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.0014,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 0.026442598551511765,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.0019,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 8.142333030700684,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.2835,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2069576665792512.0,
|
365 |
+
"train_loss": 0.10788208454847335,
|
366 |
+
"train_runtime": 99.8712,
|
367 |
+
"train_samples_per_second": 4.005,
|
368 |
+
"train_steps_per_second": 1.001
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2069576665792512.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f78b25bab6835f62f9822e5b6287e857a1b5ec83852ec2504feee15bba61bae
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0899b6fad8fcee471b664ea79e2ee9761b6fd2ee15b275fc97bc89a39f366260
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09828ae718f740f9789b0aad26812e31e7bfbaa7d1345bbe0d6f2133273618ae
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58e699f47be1f086e80f5927ab87c970e415fc2249dacbe477de24b2d4b4f290
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98076d1a485dee413d0a3058c25ae735c0c60b05929dabc07edc8b07230a699d
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecc202d5d7a13afe84562def5972aa2b53b55494013e0c796ee940e04ac06411
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8e35590c3355bd5576bfdc1cdf900c8965358057209a964522dbc96cc45bdfd
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e88943196a7a63864f254c5584ef10c4ebe5eb4a197c6fe2c426104db1c90fa
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 6.272475242614746,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.4564,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 1.9333751201629639,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.2648,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 8.752674102783203,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.0459,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 6.856117248535156,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.074,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 1.8297804594039917,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.2858,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 1.140818476676941,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.3342,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 1.5394253730773926,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.5499,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 0.8623746633529663,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.5739,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.4835646152496338,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5788,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 5.786844253540039,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.1156,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 1.184555172920227,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.393,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.456709146499634,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5986,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 2.970411539077759,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.9743,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 3.235511064529419,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.5695,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 1.388797640800476,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.3121,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 0.7918416857719421,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.4166,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 5.021376132965088,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 1.0664,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 2.587064266204834,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.4099,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 2.4496819972991943,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.7411,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 0.2078651487827301,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0655,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 4.392916202545166,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.6521,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 1.40581476688385,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.6168,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 3.039031744003296,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.9925,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 0.7090831995010376,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1715,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 1.347177267074585,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2748,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 1.3329389095306396,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.1272,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.08257925510406494,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.2341,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 4.227807998657227,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.6303,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 4.126163959503174,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.1953,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 4.300910472869873,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.3218,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 1.9219881296157837,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.1746,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 2.9596614837646484,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.4584,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 3.6123971939086914,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.208,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.38876235485076904,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.2364,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 0.2788524329662323,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.1004,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.8127436637878418,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.4335,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 1.1976258754730225,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.3541,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 2.5443003177642822,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.5296,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 7.851327419281006,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.6387,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 0.6018063426017761,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.2443,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.8378749489784241,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.3417,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.4673182964324951,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.1931,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 2.720952033996582,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.4961,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.9158133268356323,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.3774,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 3.9416582584381104,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.3777,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 4.797536849975586,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 2.2166,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 0.18337757885456085,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.0285,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 2.1316816806793213,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.2311,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 2.1029789447784424,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.2891,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 5.11422061920166,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.8276,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 4915713315700736.0,
|
365 |
+
"train_loss": 0.5959944343566894,
|
366 |
+
"train_runtime": 164.7489,
|
367 |
+
"train_samples_per_second": 2.428,
|
368 |
+
"train_steps_per_second": 0.607
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 4915713315700736.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b90a5dd0fd661e09a67c5ff5ce2e4e6b7f27bce3cc8819b4a12d07ed2e923d0
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5707c613f0ab26a54511a0e422c567178a4e9646a71cb5ea16ca021a852528f
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:128ef94d7638cf3e09c9ad1624721e707d92df02f600fe7d15049c93dfba2d44
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cd25ea69cc102dc88a95c6e544b5ccb62a87a364e695ecc06a48c0d829d2107
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8fcd31d481950ba1813d170fae25768537f117f828c5e6493d281d312ec2ff0
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:982149de6ff717c45e71c05f899d16246f64df8b457c4d0255e66955582870d6
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:856d2908d444479fef018729000b9d0427d423ef37e51aaf9c7879b8975afedd
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91202957aa3e8f9c37e874e055083b17873ffbbb16fb5e7ed3f6eb86c0a00099
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 1.8781095743179321,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.7201,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 4.64780330657959,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.7212,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 4.34021520614624,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.1729,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 3.4610328674316406,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.5734,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 6.560781478881836,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.0993,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 4.692245960235596,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.8895,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 3.2022180557250977,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.6035,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 9.959211349487305,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4861,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 4.851236343383789,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 1.23,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 2.00787353515625,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.8202,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 7.79062557220459,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 1.9206,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.094217538833618,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.26,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 7.391254901885986,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.8843,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 5.2949700355529785,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4376,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 3.2532601356506348,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.5991,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 1.2858636379241943,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.5054,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 4.041250705718994,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.9352,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 3.889521598815918,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5882,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 9.420170783996582,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 1.4432,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 16.8153133392334,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.3346,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 7.50994873046875,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.0825,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 8.1231107711792,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 1.4161,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 8.429718017578125,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.49,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 3.6227402687072754,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.9992,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 5.889152526855469,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 1.4035,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 4.411243915557861,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.8505,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 7.3300628662109375,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 1.5046,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 10.950540542602539,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 2.0556,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 6.158329963684082,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.1124,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 3.026836395263672,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.6212,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 7.47479248046875,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 1.7262,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 4.971011161804199,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 1.1455,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 3.6353936195373535,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.9557,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.9492896199226379,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.741,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 14.06840705871582,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.9984,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 2.856049060821533,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.5109,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 2.644953966140747,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.8948,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 4.469645977020264,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 1.3262,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 2.310840368270874,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 1.0936,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 2.9231162071228027,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.9224,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 7.485569477081299,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 1.5591,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 3.495986223220825,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.3848,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 8.537229537963867,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 1.1093,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 7.835823059082031,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 1.5268,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 7.483861446380615,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 1.4335,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 6.753383636474609,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.2968,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 5.714827537536621,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.7101,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 2.2157304286956787,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.8783,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 5.616714954376221,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.445,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 7.787622451782227,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 1.4313,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2098644891205632.0,
|
365 |
+
"train_loss": 1.0769911789894104,
|
366 |
+
"train_runtime": 99.8148,
|
367 |
+
"train_samples_per_second": 4.007,
|
368 |
+
"train_steps_per_second": 1.002
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2098644891205632.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc04153b001e017d209e57e4f7e110ceffdcf8233d0d3c3af57e0f2fe48da24a
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d713770573d500be237293e72b94a1cc8dddbc8d4c7665b4020e3ff56dacf989
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d91972bff7e58989a0015e2c3ec3b73758bc6aa261e4de87fcbb0af0d536f409
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:add14524942e24d06b6fa87becf4438589ced02a70a0bb53df810aa4ac3da857
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bf1edf6d867ba7dd0fece4032f4c607a03cd3e354e59a4e213e6d2131f6f799
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87c2b00b249d1803956e5e975f2f439efc91691d91b31331ad305f184953e805
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f39d888f201ae7da27f5342683ce24f06ab6863474eaa5a4ee43ffacd50993b2
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c616640cd02faeca67fb9ad8c43c9ff3378582a127b880526740e422052d27b
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 3.8667259216308594,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.0569,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 1.0802192687988281,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.4849,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 1.5820523500442505,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.8122,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 3.165304183959961,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4609,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 2.1040215492248535,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.3716,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 2.737826347351074,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.1401,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 0.948131263256073,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.632,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 2.155320405960083,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.0287,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.2282243967056274,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5867,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 2.5964159965515137,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.6555,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 1.4527921676635742,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.9789,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 1.5666605234146118,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5485,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 3.837740182876587,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.8414,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 1.679490566253662,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.6234,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.4019229412078857,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6795,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.0277788639068604,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.6551,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 3.1049275398254395,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 1.3996,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 2.1981680393218994,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.6906,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 0.4658830463886261,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1708,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 0.9990465044975281,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.2357,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 1.218131184577942,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.604,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 0.8305265307426453,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.263,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 4.220275402069092,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.9145,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 3.2627382278442383,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.2621,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 7.637389183044434,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.6855,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 2.2909798622131348,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.4636,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 1.6831642389297485,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.3304,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 3.669160842895508,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.3234,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 9.243528366088867,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.9173,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 7.592850685119629,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.0405,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 4.612028121948242,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.9296,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 1.7067667245864868,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.4996,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 2.345529079437256,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.2364,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 4.779994487762451,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.9,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 3.520698070526123,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.274,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 1.043005108833313,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.1549,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 4.629940509796143,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.5627,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 1.3093677759170532,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.5079,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 0.6198597550392151,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.9565,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 2.102959156036377,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.4706,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 1.7533611059188843,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.6437,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.9639290571212769,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.6471,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 2.027463912963867,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.4873,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 2.5160040855407715,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 1.1644,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.7003493905067444,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.2009,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 1.787532925605774,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.6372,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 3.888639211654663,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.2432,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 2.0158474445343018,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.7993,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 3.9803707599639893,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.2802,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 2.657550573348999,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.7877,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 5692822418096128.0,
|
365 |
+
"train_loss": 0.7448138999938965,
|
366 |
+
"train_runtime": 164.7682,
|
367 |
+
"train_samples_per_second": 2.428,
|
368 |
+
"train_steps_per_second": 0.607
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 5692822418096128.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e10b6891ffb0d7ee2bbc7d4c5e22e7d3914155f461e036c14c64145d94b777bf
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7284e5b745babed407499e559af061cd69c87a12563b08b6075b15e2c942b8fc
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b84518474151c39c0ae7a01ab9e58001c8b59b1b4a6e02b12d1588ff9138cfd4
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba1d8705800a9de15220cdaab6edd725444a970ab5b0c3958e08f93f6b170d4d
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8924b694f6781834a72230a07694dc5273317d364888d728ebeb1bc0830255d
|
3 |
+
size 395786922
|