Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7281b71feb874201c3510fd2532c735c41834e0ea57f60b3a19d83a172c8062d
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43f3b3c81a25994ed2450656aad07f50b0d6a686213e90ee5936f5227e75b924
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02634f446483c9401811d202c4fb26c234e200c1b5bd389dca72585c2d61d034
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b593199404c65a7fb376d970ec893e60e94daef7a5ae8eed3ca517e9275fe0c
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f312bff96288926254f6abc38cb4e1313e8d188484ebbbeadf5f59357ce0f3e3
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03270abac1135f1e20d23bcbaf393ffd4157017414b071ece5ec558a5319692a
|
3 |
+
size 184221358
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc2eaa3387065398c5c4e6cb687acf55c8c80d95eddaf5452a6456e4dd5b1d6f
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bea9fbbcea6ab83084b33a90fc70ebbcc775b9472862c5f46300a085976a036
|
3 |
+
size 184220842
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 4.327882289886475,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3049,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 4.293352127075195,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.4343,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 8.278122901916504,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.464,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 5.572937488555908,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.1184,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 1.2564692497253418,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.1493,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 2.756223201751709,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.1429,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 6.3286638259887695,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 1.0986,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 3.1671125888824463,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.421,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 12.160600662231445,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.6418,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 2.2388546466827393,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3821,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 4.057492733001709,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2804,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.4171645641326904,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.2768,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 4.334846019744873,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.5183,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 5.31112003326416,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3826,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 8.947898864746094,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.7833,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 4.790311336517334,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.7227,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 1.9647458791732788,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.2151,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 7.152100086212158,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.6738,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 9.002793312072754,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 1.2422,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 4.538879871368408,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.7256,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 7.2817158699035645,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.4922,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 8.116897583007812,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2341,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 5.7875776290893555,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.5828,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 2.9006869792938232,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.2646,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 5.325571060180664,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.5073,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 1.7935487031936646,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.3458,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 6.689419269561768,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.9642,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 9.98328685760498,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.7798,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 10.168957710266113,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.3466,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 5.621067523956299,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.4138,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 16.461015701293945,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.963,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 3.5031635761260986,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.6482,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 1.3376978635787964,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.1724,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 1.3010202646255493,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.2854,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 8.061728477478027,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.4303,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 5.248547077178955,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.5894,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 4.7633819580078125,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.6274,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 1.664157509803772,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.3461,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 2.7871382236480713,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.5779,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 5.3619914054870605,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.7158,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 4.908105373382568,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.5618,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 4.174102306365967,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.645,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 5.976959705352783,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.5615,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 5.8181586265563965,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.6136,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 2.994356155395508,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.196,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 4.384360313415527,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.2452,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 3.3636257648468018,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.3809,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 3.677785634994507,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.3801,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 6.116004467010498,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.5769,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 5.415121555328369,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.385,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 6589214774788096.0,
|
365 |
+
"train_loss": 0.5162237358093261,
|
366 |
+
"train_runtime": 121.9606,
|
367 |
+
"train_samples_per_second": 3.28,
|
368 |
+
"train_steps_per_second": 0.82
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 6589214774788096.0,
|
389 |
+
"train_batch_size": 2,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4b7524ff1cdb9c623c9f04b1b2f456ce956f8333f742e367f0627615c523df0
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b14de6a6ff83a367693fffd74a202f81b6610ae9866eca998fee857876b57cc6
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc604f9b5b83debe041f5baec0917bf5f25da3d2400cc3642da153b1a214c1a0
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fab435c732539a8ae5d19b6f27b57b14502392076a996005e26331e5c8488f24
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45faca8a1b9116373952a8ef00712fa1e379398914a28043649b825dc9345f5b
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e371888600f40c5bfd4398e7526aab31e42df408736003ec9c070aa0a7ed0d7e
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac5f9b83f02f09f963455cb0e4dbe43fe7930e2e9b1fd8d2976ffad70b1f71ab
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7728b781894e5139abed7ddf3350ce0c8d77c0f74d4a37fb6fa9db4f9b6b07b9
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 8.241416931152344,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.8475,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 4.8631792068481445,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.6782,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 7.5998454093933105,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.5532,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 2.5101563930511475,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.5537,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 4.472541332244873,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.6426,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 4.083187103271484,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.5195,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 5.575020790100098,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.4822,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 4.933296203613281,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.5742,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 2.017449378967285,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4663,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 6.998116493225098,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.582,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 2.1770007610321045,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.5249,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 5.969364166259766,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.623,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 1.237125277519226,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3623,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 6.773047924041748,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4094,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.925546169281006,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6287,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.002979278564453,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.7397,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 1.8541330099105835,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.5017,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 7.924871921539307,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.6328,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 2.390252113342285,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3247,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 3.807579278945923,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4795,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 3.4766390323638916,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.4468,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 1.4840394258499146,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.5312,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 3.136503219604492,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4313,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 2.3651793003082275,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3152,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 3.1154613494873047,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.6135,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 5.85906982421875,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.9023,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 5.495807647705078,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.5264,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 2.5824015140533447,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.2805,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 6.252473831176758,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.4327,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 5.942077159881592,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.4004,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 5.180813789367676,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.864,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 3.008401870727539,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.292,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 1.5255389213562012,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.7554,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 11.456948280334473,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.7676,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 1.6395078897476196,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.564,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 2.9087085723876953,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.5242,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 2.2395269870758057,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.4575,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 7.380107879638672,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.5474,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 3.7302591800689697,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.365,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 7.54459810256958,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.8384,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 1.8007885217666626,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.354,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.5501184463500977,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.5158,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 7.850472927093506,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.5859,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 2.9437716007232666,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.5186,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 6.130828380584717,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.5325,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 3.8235013484954834,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.4294,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 10.231576919555664,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.5122,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 3.4070258140563965,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.4346,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 2.635401725769043,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.4543,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 4.57978630065918,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.8027,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 1.6576168127889408e+16,
|
365 |
+
"train_loss": 0.5424420166015625,
|
366 |
+
"train_runtime": 289.5764,
|
367 |
+
"train_samples_per_second": 1.381,
|
368 |
+
"train_steps_per_second": 0.345
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 1.6576168127889408e+16,
|
389 |
+
"train_batch_size": 2,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b16e88793c124816f1f558de280952798c45b5b80104feb95b1ea3c823f4317
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aeb99f249d83641dd27f96f403f9a3896cc2c106017f1dac1743ed4462b07ee5
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1544b776c37e6184161b7dce77cfc588b766260a10846dd63415c96f9147be4
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34be3c453cd7a2af03cf351083bd8e44c0b7da498f43599dc9da884887ab8ec6
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23b15c520084743ef62975eae5db8d4352e6c8829c36aa7360dc593c57110e2c
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adc4c8842a382db73fdc998bf2ec50d04d6f160cab990be0e64eaa17ee6d697d
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c91d31a53873f602e9654cdc5d71ec452143909946552d992ae47d2e138a505
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4b6565669e5be448c7713d437c76b999e97c1f563a913e8d7b24fb23f5a026b
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 7.872583389282227,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.4408,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 3.1676270961761475,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.2183,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 3.10235595703125,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3065,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 1.986329436302185,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.1555,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 6.654890060424805,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.4976,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 8.408140182495117,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6118,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 9.52741813659668,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 1.0054,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 3.1671223640441895,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.3345,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 6.954838275909424,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5977,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 3.3944637775421143,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3164,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 3.7810606956481934,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.5396,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.8664538860321045,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5024,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 5.678687572479248,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.4766,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 7.057774066925049,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4387,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.025454044342041,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.3817,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 2.349515676498413,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3013,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 3.751689910888672,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.3142,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 9.54448127746582,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.6094,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 2.6551620960235596,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3418,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 7.663268089294434,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3774,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 5.838813781738281,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.5634,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 3.6920807361602783,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.1825,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 2.167635202407837,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2479,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 1.2298402786254883,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.274,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 8.415931701660156,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.6021,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 5.416069984436035,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.4319,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 2.6783201694488525,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.4147,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 1.76664137840271,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.2076,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 3.5323493480682373,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.3228,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 4.204054832458496,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.3702,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 16.42522621154785,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.8672,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 1.9896955490112305,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.4229,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 6.183014869689941,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.552,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 5.789742469787598,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.3384,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 11.22754955291748,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.3374,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 1.9776259660720825,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.3358,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 4.03769588470459,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.4689,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 6.714748382568359,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.7046,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.7425217628479004,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.0582,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 2.1245808601379395,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.3812,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 8.465872764587402,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.4424,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 2.888294219970703,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.1748,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 3.158863067626953,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.2654,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 7.765163898468018,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.448,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 2.6586227416992188,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.1208,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 4.466168403625488,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.4543,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 9.527925491333008,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.0899,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 7.631684303283691,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.9442,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 9.0326566696167,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.7216,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 9.811245918273926,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.5501,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 1.9115952041885696e+16,
|
365 |
+
"train_loss": 0.44124862670898435,
|
366 |
+
"train_runtime": 324.4955,
|
367 |
+
"train_samples_per_second": 1.233,
|
368 |
+
"train_steps_per_second": 0.308
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 1.9115952041885696e+16,
|
389 |
+
"train_batch_size": 2,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b95720db68933091baf1fdecaa1719d3ae6d74f9d586153dfbdd9a8d412efd5
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9462aef5d3987e462b4eb54f5767c7a989d765da13e27c013f24ea225359ff85
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da91a450b0e42f8409cf5f1538d602d5016eb3af4a99a673858d20fac1506bf4
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2137138ebfad8ba608a66235c099e93ed835a9caccde901a0cce2619288f7d1
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a1463e43df9db510a4857be4caea7caef4417a1d68d2af7784cc2114322d425
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6986573a8d5a03d8aafbd13449ec4c1b317445289bb1d6fe3f86f5a60b2293a1
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b459889ad8c0dddbdb5b5892a3dc82a8c7aff84f714b2aa33a6d025449e86354
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4c6ae5cabef78b40781076f85a494852e0479f8117956282442c035a9696f6c
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 4.6497626304626465,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.5825,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 3.6290929317474365,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.915,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 2.3345847129821777,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.4902,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 5.3167009353637695,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4688,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 3.8224802017211914,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.5308,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 2.823826551437378,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.2563,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 2.9755983352661133,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3347,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 2.022836923599243,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.426,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.7052284479141235,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5396,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 1.3424859046936035,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4526,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 5.240848541259766,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.4441,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 4.661110877990723,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.6226,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 1.6846646070480347,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3237,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 5.903859615325928,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.7686,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.1405582427978516,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6973,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.7082715034484863,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.5305,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 4.9589338302612305,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.4478,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 7.48822546005249,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.4746,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 8.993917465209961,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.501,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 2.639606237411499,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3493,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 7.567841529846191,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.5112,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 3.463698625564575,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.3148,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 3.490788698196411,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 1.1011,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 3.9706830978393555,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.303,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 4.992152214050293,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.6089,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 1.5260461568832397,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.1576,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 2.76921010017395,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.4696,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 3.4018092155456543,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.5396,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 3.256915330886841,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.3384,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 2.284102439880371,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.2712,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 5.065925121307373,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.3423,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 2.2383081912994385,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.5986,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 3.604736566543579,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.594,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 2.073974609375,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.1852,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 4.072497367858887,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.3573,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 2.104954719543457,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.4517,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 20.248361587524414,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.5629,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 5.614309787750244,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.5081,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 3.6506497859954834,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.7004,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 0.7987788319587708,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.2193,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 2.1284451484680176,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.3389,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 7.366853713989258,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.748,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 4.810544490814209,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.7964,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 6.225423336029053,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.5322,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 4.1773576736450195,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.5176,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 6.231110572814941,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.4377,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 2.0640790462493896,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.3445,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 3.780561685562134,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.5642,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 4.6030449867248535,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.363,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 1.9157155752182007,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.2215,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2.386061380550656e+16,
|
365 |
+
"train_loss": 0.4830999755859375,
|
366 |
+
"train_runtime": 409.3895,
|
367 |
+
"train_samples_per_second": 0.977,
|
368 |
+
"train_steps_per_second": 0.244
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2.386061380550656e+16,
|
389 |
+
"train_batch_size": 2,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:893681346ba343e9d0fa2f36e51accfc6d218f0664ecca940e57f3d2dc374ad9
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b46411ee8d7334f558926aaf247e2004138dcd997e035c87686449bcfc1cc469
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdb196c7e4df84cc6052571a2d10021bc4e6208c3ed2b79af86450bb25dee17c
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b45484c94bb3ab612febd88336db3b78e201867c1e805bbd435f37ef49104931
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:118254270d48a1bd72d800e0b757f7eee2d6879af1e323d473ca3b25a5b156fe
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:479108ba00e734b040fe1210bcbd5d58bcbf9b9d213843eada9ace7329f09763
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cda0740f3777ae9d6bc03793b0f525db6fc40288d9de12a48092382737c4350
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3def9c46010f15f880769a9efe2296b8114ce3d0a5ad9754de24319c2fe578c
|
3 |
+
size 395786922
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 3.530467987060547,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.2157,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 1.8090039491653442,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.1584,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 2.7021498680114746,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.2725,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 1.0994998216629028,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.1925,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 4.899983882904053,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.4641,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 1.8486827611923218,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6014,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 0.669780433177948,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.1162,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 4.612163066864014,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.5449,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.251813292503357,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.3157,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 4.452538013458252,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1715,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 4.419332981109619,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2881,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.8015778064727783,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.8685,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 5.626010894775391,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.0619,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 7.333406925201416,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 1.0535,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 6.450397968292236,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.9023,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.454080820083618,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.4188,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 2.1945369243621826,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.4095,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 2.2680203914642334,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.9443,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 5.364918231964111,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2831,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 5.871020793914795,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4717,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 0.05804154649376869,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2073,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 3.274716854095459,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.595,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 2.444748878479004,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.395,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 2.9482669830322266,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3739,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 4.834102630615234,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.464,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 4.0864577293396,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 1.0002,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.2608688473701477,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.3152,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 1.6589609384536743,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.1979,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 2.3401970863342285,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.3342,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 2.3094046115875244,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.2121,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 2.6082193851470947,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.345,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 7.014457702636719,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.8164,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 4.024754047393799,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.7554,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.4044950008392334,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.0986,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 4.961355209350586,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.4958,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 7.53846549987793,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.5591,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.829863429069519,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.4915,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 3.274028778076172,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.4124,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 5.568228721618652,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.3262,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 6.694833755493164,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.6133,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 5.746543884277344,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.8642,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 11.073870658874512,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.833,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 4.428623676300049,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.333,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 7.5453033447265625,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.7605,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 7.614424228668213,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.2277,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 8.295862197875977,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.9114,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 2.497485876083374,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.2709,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 1.357002854347229,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.6364,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 6.519164562225342,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.3441,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 2.9073774814605713,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.4589,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2.410703500397773e+16,
|
365 |
+
"train_loss": 0.48806270599365237,
|
366 |
+
"train_runtime": 603.2643,
|
367 |
+
"train_samples_per_second": 0.663,
|
368 |
+
"train_steps_per_second": 0.166
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2.410703500397773e+16,
|
389 |
+
"train_batch_size": 2,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e98397f8ee641afb7450a1f3f2b7b46bc3b40c3af558ed2250763f6daf8926a1
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1182ee1ea28f8787bca578e777f053b2ee0a10bd3430a546fb68a7cdcf699d7
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63ba0332da3f84fc4e4f6fd1d7367267ccf7e78e25877de4de3f58a1c5482730
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:423c7b5ce9ea433b1ebd72c1011c1ce8ca96c7bc1ca28b81212c078e95829b52
|
3 |
+
size 395787774
|
client_states_fedMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc132_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bde583558a09eb45715c7e129609121fa1addebd7bba6f8de5847ea26e0ecadd
|
3 |
+
size 395786922
|