Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0052812b7d94644a57b6eef853c50e0f528b82ce2f493df7c3cfa87e27f64a7
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:592d18e9bddf4309c54535a49180cca95faba83a52d48adf98f7961a5e2135ef
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c8f3d10158929562c385814e21d43669b6298edaedcab9c33c03002f2b773a8
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfe38a666ac72c1f67e59e27ce4139f783188a5a685b3dbd927a3b2e1ae2f9b0
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c73b406da7ef6628fc4620b383a2eb2b8d19893053c8807f0bfd2f62622d7e25
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3f7562fa34accdd98509ecc7d12b53c7bc3b945769e2b7dd78ab458df8afe59
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d912945759350fcc28d90c00e86c414481a8808081218c9c01537808b49094d4
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ceb9cef58b85c190907da7c10858fb1441500a4b86f9e18edf2eb3a5c04d0d0c
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 5.217213153839111,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0978,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 19.47102165222168,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.3909,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 0.8035042881965637,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.2027,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 16.921295166015625,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.7616,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 2.1919631958007812,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.218,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 16.655672073364258,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.8259,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 7.701754570007324,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.1924,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 18.302875518798828,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4014,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 8.74785041809082,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.6177,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 11.008668899536133,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3468,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 0.6823620200157166,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0258,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 29.831308364868164,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.7534,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 12.918745040893555,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.156,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 15.153372764587402,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.1453,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 6.898128986358643,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.503,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 1.4652340412139893,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3035,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 7.845968246459961,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1804,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 20.450454711914062,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5681,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 9.576227188110352,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1961,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 0.2676473557949066,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.092,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 15.099814414978027,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.2276,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 4.5359721183776855,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2739,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 3.9087729454040527,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.645,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 10.639699935913086,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.175,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 4.5063700675964355,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0602,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2192016167403520.0,
|
190 |
+
"train_loss": 0.3744270062446594,
|
191 |
+
"train_runtime": 129.0215,
|
192 |
+
"train_samples_per_second": 1.55,
|
193 |
+
"train_steps_per_second": 0.388
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2192016167403520.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acf1efd2501377804ead17cb242cb745f20c3a39e51add10e22617f0df9544a7
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dee6e1a55e0e88f756d8b7b1049abe48e46d033dd49e38a9a98e52540fd696a
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1de4108e40b3df26a3da94a1b349647f8f322da85b91eac7fb90d02eef430b3a
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:112179718633f347a60ccf07934ee229c5a427f0a9d32d6778e77b38a29493e1
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05558873fb3aff150bd80b0274ed58798d9cedf559987aabce5e6672158bc111
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f98dfd1d683fba3d0872960f7af59a7c733fb754d0d87f75ca3b238fdc45b4b2
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:127f5df578036552d14e67be01a9faf04edd91c9ebaf433da945f21affb38603
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2d87c1864fc7de3c59671e08aac30fdc77a432de09636eff524cbe28ff618c9
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 15.88451099395752,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1708,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.0260764230042696,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0869,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 0.23019923269748688,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3902,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 0.6337835788726807,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0138,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 0.2402428835630417,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0181,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 0.05544476956129074,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0089,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 5.406139373779297,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0436,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 0.7542925477027893,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0138,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 0.9043628573417664,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0479,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 0.017281576991081238,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0151,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 1.3869414329528809,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0295,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 1.5793606042861938,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0195,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 7.564406394958496,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.1626,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 4.126201629638672,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0253,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 0.022082654759287834,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.0086,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 0.06628133356571198,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0091,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 0.2595360577106476,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0123,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 1.762347936630249,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.1144,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 3.6209044456481934,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0225,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 1.9108753204345703,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0234,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 0.019227411597967148,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0078,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 0.6679308414459229,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0185,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 0.04014873877167702,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0086,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 10.506927490234375,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0433,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 0.1354411095380783,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0089,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2207325494444032.0,
|
190 |
+
"train_loss": 0.05293830454349518,
|
191 |
+
"train_runtime": 129.0073,
|
192 |
+
"train_samples_per_second": 1.55,
|
193 |
+
"train_steps_per_second": 0.388
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2207325494444032.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c51474252c8a20490d1e29adaa895374937a7fbfc7a95ca5501f4a646acb5aca
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af02480a11b44a7c5510ffc3ea59978633bd5b817da2fea8cb265214da5f03f6
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95f82c6dd8725532069f364c941a009cbb50a8ea61c6c90b70e70fc5e1fcf05a
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82500b4ffc15a4d7a51e4ca6720458d01c877fd0c912ed5b74b17e42b9257b57
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a093f5582d82b1f8a200c425bb5cd7e1d516756ffea2f7aae33aa017e5fb4321
|
3 |
+
size 791576546
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de6da5e0deb52b2297f995582e6e724bd6d5da51d710a540ea73acde7b3fe215
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6122b8ec945987236080d2fd5b2f51301c1f83fffebe47685762f1523230d849
|
3 |
+
size 791576546
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52efe14f1720fe65ced240d3d8fd89e561976c40c6a295e71eca15df8b64251b
|
3 |
+
size 791576546
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 3.483114004135132,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0725,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.19014683365821838,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.2018,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 9.548799514770508,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.8036,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 11.098209381103516,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.8353,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 6.181906223297119,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.6294,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 2.481546640396118,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.2403,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 9.917572975158691,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.1437,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 6.766469478607178,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.3933,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 3.4954700469970703,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.3106,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 4.8355231285095215,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3069,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 4.6046671867370605,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.3142,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 9.980782508850098,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.4414,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 13.631030082702637,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.5174,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 7.253704071044922,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3955,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 3.715864658355713,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.2063,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 1.8238903284072876,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0721,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 9.186090469360352,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.5733,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 19.38672637939453,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5391,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 1.94209623336792,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.6341,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 1.7068160772323608,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.1208,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 8.549454689025879,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.318,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 6.2620110511779785,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2257,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 7.983525276184082,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.185,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 9.400800704956055,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.4126,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 14.556591987609863,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3437,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5205036221071360.0,
|
190 |
+
"train_loss": 0.4094633960723877,
|
191 |
+
"train_runtime": 211.8895,
|
192 |
+
"train_samples_per_second": 0.944,
|
193 |
+
"train_steps_per_second": 0.236
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5205036221071360.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e52cc2180863d978ba87425f515d98fa4204b8b8700599b5ffda5c38f4e3d51
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88caf62f25889cfd1ec0f645524a7bb3322e24576218872f976ca431c86e7599
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5244091b3de0e92e2c7f7d89f83d6c1b7a2146f4729996754fa0c834836aaf44
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e84f80cadcef593d6bfd32fa27e08da1527dece9267dee9e22da8761ce8e3c26
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:503a54e1b424e3ecbcdb640eb135cc3b29761defbddb2ce6353775a07ffb6172
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5c86ee2cca4256f0d8529a4b0dfbe5b118743542b9f55564b5dbecff660043
|
3 |
+
size 368443438
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf1437bbba8c0b2af6a74eb1dc2bbc667158600da1dcf17451e2dd249a091b10
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33211c9c9f759ba204253eef6e13965fee72732891d8b96fdcbaa1f31261d7f5
|
3 |
+
size 368442474
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 14.853601455688477,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.702,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 4.492935657501221,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.5924,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 19.62257957458496,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.4307,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 15.086118698120117,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.6837,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 17.595279693603516,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.8814,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 17.479902267456055,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6408,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 6.619649887084961,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2844,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 8.41659927368164,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4303,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 14.263152122497559,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.7531,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 17.28653907775879,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3875,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 14.577817916870117,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.9195,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 16.577980041503906,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.9566,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 22.426490783691406,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.0937,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 27.654651641845703,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.688,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 16.97934341430664,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.7858,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 20.013566970825195,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.7763,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 19.297901153564453,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.7932,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 9.753912925720215,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.194,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 23.354145050048828,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.672,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 13.23054313659668,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.8113,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 21.378204345703125,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.8214,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 13.187995910644531,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.4943,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 7.820920467376709,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4863,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 16.609262466430664,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.6728,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 13.173944473266602,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.4593,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2237662652006400.0,
|
190 |
+
"train_loss": 0.656429648399353,
|
191 |
+
"train_runtime": 125.9763,
|
192 |
+
"train_samples_per_second": 1.588,
|
193 |
+
"train_steps_per_second": 0.397
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2237662652006400.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e12da33086c01924059e76e93f56bade89b4e7df69f3b9530db8c5222083c12a
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcc262405e677f974bb5955e49805974f6658519a6b8761410e068acf4c09c04
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91b61afaf370c8881bee78a729da3d33bffb2da579eab558ab6170fb2cb34b2f
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fa2a8f09aaab35b30df9704accdbc1ca8e73412d60145786ad6a37d4ff58eb7
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:debed4f224299f2e60197b210cd40e0a9482c4c33ea6dce7f12a01b7b4c0d1af
|
3 |
+
size 791576546
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b81150c64f084928ae5e3ec440232e6259a17df9f6da0b6351e3f2f9dd2d12d6
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e0b10f32c4c9070e55ee1e47d5d6764b081586e68a427063b4e2f7523efa95f
|
3 |
+
size 791576546
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6128577adfeed60f40d4b5ed83d7a38b0e0f7497f2908c8bcbec8e81077a7885
|
3 |
+
size 791576546
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 10.564441680908203,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3613,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 4.964231967926025,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.6445,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 3.1381404399871826,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3126,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 5.2721052169799805,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.3248,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 6.106097221374512,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.2732,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 6.087716102600098,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.3326,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 8.19166374206543,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3898,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 7.321465969085693,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.3813,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 5.099972248077393,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4527,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 5.690826416015625,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3984,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 4.295352935791016,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.3307,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 4.787858009338379,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5855,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 5.640788555145264,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.4468,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 1.2240551710128784,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.2536,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 6.876025199890137,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.4679,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 2.8769640922546387,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2076,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 6.484592437744141,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.7747,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 7.028562545776367,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.3312,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 2.0920963287353516,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2506,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 7.528481483459473,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3327,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 6.548537254333496,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2954,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 12.54763412475586,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.5996,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 2.3413326740264893,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2644,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 3.854572296142578,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1726,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 4.879101753234863,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.4778,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 6032394863771648.0,
|
190 |
+
"train_loss": 0.3864997100830078,
|
191 |
+
"train_runtime": 213.4321,
|
192 |
+
"train_samples_per_second": 0.937,
|
193 |
+
"train_steps_per_second": 0.234
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 6032394863771648.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7f767e41ddb236c053785267711747addf8de17d516391fa73f01113552f4b5
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4bc626ba4f9e8c3eced27d13b3ceab99d506fbc75fbcfddf4cefbaf4c443a36
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b70d891232b93ec07648f7b596a83ecaeaa005c012daf2b77a00675beb28c66
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d28c2a3f386ab271594188a39467549ebc645e23547ad5186a31495fb666e118
|
3 |
+
size 791578182
|
client_states_ditto_feddualMultipqfullfreeze_homoAgg_pca_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f3fbd45af36d4a8ed90c797d64a2b25407897b9ee98f7202fa3b4c3d9d46b11
|
3 |
+
size 791576546
|