Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e97a178ad07f93c2416428537e0ebb17e76938b3af5b9317eb49a1799f96d462
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bb442cab84c4a4ce96f7b23006e86db8e87acdf17f5ac629a5fe2bb1fc72283
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ccd1e02f6c44c372315ab293a7ec71916771b5681ec6280a7a70db7e600c79f
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62683ff26556ce21097ddea2f3c9c2f9d573e198ff9949a797a908d40f5250c7
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a7f676c7427580136ab410df1abb887040e5489b7d7b7f65b5641a99a5e95c3
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:faefd5c33f7e65f88683a6938382403ede739ab52ee9960f777027320860ec1c
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5096192d60955cc6239ad742abdb0ba21b817f3291a2089cb8b281800ec669b6
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d7e16824a5fd6f09c42254ff21a13f01110dc6a2ab0d17eda950f6badb6d79c
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 1.3082879781723022,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.382,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 7.398350715637207,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 1.5622,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 5.8386311531066895,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.5934,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 8.876031875610352,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4576,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 0.8822630047798157,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.533,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 0.8977208733558655,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.1235,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 4.8954572677612305,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.6145,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 2.364208698272705,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.2037,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 6.215276718139648,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4045,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 1.6845049858093262,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3949,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 0.7680008411407471,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0562,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 8.639182090759277,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5587,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 5.095108509063721,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.2591,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 4.774632930755615,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3083,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 13.420112609863281,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 2.695,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 7.402116298675537,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.8668,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 6.176011085510254,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.9256,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 12.859660148620605,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 1.2148,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 6.652098178863525,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.7266,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 4.232424736022949,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3186,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 5.211861610412598,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.664,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 2.3429152965545654,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.1264,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 7.41301155090332,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 2.2214,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 6.739213943481445,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.5323,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.16466465592384338,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2526,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 1.8708200454711914,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.587,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 2.8934383392333984,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.4451,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 0.6178646683692932,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.6075,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 2.2261078357696533,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.034,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.3027094900608063,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.474,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 3.4519269466400146,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.9553,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 2.5445189476013184,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.5833,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 1.0719250440597534,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.2296,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 2.4732906818389893,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 1.2008,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 1.3075364828109741,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.7658,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 6.077316761016846,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 1.7325,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.7028217315673828,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.6084,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.03688935935497284,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.0744,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.558180570602417,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.2631,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 1.651378870010376,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.331,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 3.096226692199707,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.2716,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.9856376647949219,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.2417,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.3018156588077545,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.1055,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.15684664249420166,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.3354,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.3770679831504822,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.0729,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 2.0283687114715576,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.0331,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 3.1098225116729736,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.1469,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 4.576979160308838,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.4126,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 0.10817147046327591,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.3318,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 0.2188994139432907,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.0426,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2190942744346624.0,
|
365 |
+
"train_loss": 0.6376361560821533,
|
366 |
+
"train_runtime": 138.1433,
|
367 |
+
"train_samples_per_second": 2.896,
|
368 |
+
"train_steps_per_second": 0.724
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2190942744346624.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2df384fb287406d01c6ac47203f430568f40f9606f69f8f125b635c282d7d092
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b551ea01e38e2a2cb95facdbfff51f74e900b1b79b0181295e7e38e2646d99db
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d542560fc66bc7b621148ba6fa51735d64ad57f335ce21cb3839515e1e67582
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82f77e71308f2b7aa18b9d6fa8133a3957aa38f39306afb84ca1bead67ca5d10
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c13f89aa25e72c44da6f78833eaffbdbc09fe7723cac5955204fce2ba777168e
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9629cc43a9b2467735fe71d84d13bf5ee18830013aa314403e366294718d505f
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b911b77a2c95d38aaecacc3ba610c224325e7cea721fa2f885465d97f0d5c61a
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27ae0a6acd5268884668a889560be909dd798ecccb5f0cdb5b4e28d0c046946d
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 5.438493728637695,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.6253,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 0.06039601191878319,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0071,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 3.3221728801727295,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.1724,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 1.3266348838806152,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0556,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 10.860568046569824,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.3214,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 0.26336294412612915,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0231,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 3.3594837188720703,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2503,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 4.755346775054932,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.5165,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 0.13056811690330505,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.1325,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 0.19030329585075378,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0153,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 2.790400743484497,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2883,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 0.9987291097640991,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0559,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 3.3109381198883057,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.2969,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 0.24974896013736725,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0719,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 0.02681863121688366,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.3216,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 2.1715784072875977,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2291,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 0.15720851719379425,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0086,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 0.07507246732711792,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0222,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 3.0812454223632812,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1993,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 0.22983674705028534,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0121,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 0.11351170390844345,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0088,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 0.014975732192397118,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0141,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 0.04912685975432396,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0099,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 0.09525927156209946,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0043,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.053252220153808594,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0056,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 5.65633487701416,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 1.1687,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.7466307282447815,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.0578,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 0.19205646216869354,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.0363,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 1.4861888885498047,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.1323,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.2961069643497467,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.0288,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 0.012061057612299919,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.0054,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 0.010159369558095932,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.0029,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 0.262207955121994,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.0613,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 1.2027740478515625,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.2165,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 0.10255525261163712,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.0084,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.1920798122882843,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.0228,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.8249365091323853,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.0964,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.08167055249214172,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.0199,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 0.09379951655864716,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.0429,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 0.0005258667515590787,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.3772,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.055758312344551086,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.0245,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.6854066848754883,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.2178,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.005169401410967112,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.0064,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.4939119219779968,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.0627,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.015726575627923012,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.0019,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 0.03543732315301895,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.0042,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 0.027740249410271645,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.0398,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.012435679323971272,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.003,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 0.03141430765390396,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.0037,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 3.874453067779541,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.2269,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2208152577638400.0,
|
365 |
+
"train_loss": 0.1507337412238121,
|
366 |
+
"train_runtime": 136.9726,
|
367 |
+
"train_samples_per_second": 2.92,
|
368 |
+
"train_steps_per_second": 0.73
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2208152577638400.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5155c6d3ccd59d8a88c5fadb4d55b7c55e757d72fed6700339f2a55ba9d206b
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f62c93b1b07445065f1c327317188271a9b39f4a40b53f3acbcef69cae588b0
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e9b358fff8705a58269da9150542305dbf26ce68799e95cecc3d85c96f70b1d
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39cd8335243dbc72570d4318fe64af4a1d8e00cf2a7eb67fad0c2eca24e61cfc
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1cf1a3c9f0043493c32c78abd02daddae42e5bd2823cc8b66edc676d0791dd0
|
3 |
+
size 791576546
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9953dd762080b274427366bfd5e028435e1fd61f4d7712e2d3c7a7bf0c66713
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68c0b2ac7a9bd6d77493cc2bd7e107f7e04847c31ad38dce7ee89a1f3260cb76
|
3 |
+
size 791576546
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b97aff48462a93e7bc1cf42a7d28365d5c276dfe0e5eb85d1b94b214df2a1b30
|
3 |
+
size 791576546
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 2.7511374950408936,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.2946,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 1.288777232170105,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.5413,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 2.8379461765289307,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.9373,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 2.078542947769165,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.8663,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 1.4642187356948853,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.7827,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 0.9326199293136597,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.0563,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 4.39100456237793,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.922,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 1.1711935997009277,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.8917,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 0.9163213968276978,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4478,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 4.13539457321167,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.1375,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 1.2938967943191528,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.4566,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.292201280593872,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.9561,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 1.9226994514465332,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.8209,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 2.53096079826355,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.6961,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.718522787094116,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.776,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 2.3070623874664307,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3574,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 3.3535683155059814,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.6592,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 2.309844970703125,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5191,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 2.560328483581543,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.6774,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 1.079557180404663,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.132,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 6.772784233093262,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.7784,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 1.8025261163711548,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.7205,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 3.360507011413574,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.8593,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 0.8153305053710938,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.2696,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.7224266529083252,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2182,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 0.5766162276268005,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.0827,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.022444158792495728,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.4786,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 2.547598123550415,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.3983,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 2.8408164978027344,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.7896,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.3001943826675415,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.063,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 0.5612508654594421,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.068,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 0.38100701570510864,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.1456,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 1.6601585149765015,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.0226,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.3090905547142029,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.1554,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 0.07504302263259888,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.9354,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.43841126561164856,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.3726,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.9700837135314941,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.3882,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 1.769338607788086,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.3361,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 3.9080419540405273,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.5156,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 0.41056567430496216,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.2299,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.3981069028377533,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.1811,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.7696741819381714,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.2616,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 1.6954944133758545,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.5364,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.0885348692536354,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.2733,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.8655412793159485,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.1542,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 0.22551484405994415,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 2.1966,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 0.18391752243041992,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.047,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.981302797794342,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.1401,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 1.6460645198822021,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.395,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 2.7264297008514404,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.6037,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 5207450529562624.0,
|
365 |
+
"train_loss": 0.5909026241302491,
|
366 |
+
"train_runtime": 217.8458,
|
367 |
+
"train_samples_per_second": 1.836,
|
368 |
+
"train_steps_per_second": 0.459
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 5207450529562624.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d4ec54f2264de9d5066627dd2579b6f2687909f1bd195443308183b9867ed4e
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bff21d08e8d34a7152bd63b77effe319611f317b2e0f305ff6b9f6a4e75aa861
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da3a6821e6085f2bd28d30ca6eb0d458ea5c5d3e23b3177c2345359d4a31ab85
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79646f529a49a14c6cf888826de5ccfb5275f73da3d9785eedafd1aca441bcbd
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:780361ca8e347e1b3bd2219d28d65e0826ca409e3a7e0fba6af746dbbc5616ff
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dfe770a5196824f0b6a2220e19158a1b69f0411506a84e9133389f9a4c4900d
|
3 |
+
size 368443438
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e92cf3340f68925f39d845baeb5dae745fce9ed15d7624d98f382fdef8f435da
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb745e55cb09490ca340fa48f6f6a68b2b4f7940ad5c559cd1aafd94e5be0bed
|
3 |
+
size 368442474
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 3.859758138656616,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.0478,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 3.1250510215759277,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 1.0184,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 4.169061660766602,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.2416,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 4.2583136558532715,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.6852,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 5.320484161376953,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.26,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 3.881507635116577,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.9806,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 2.992048978805542,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.8097,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 10.06558895111084,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.1399,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 5.604303359985352,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 1.8142,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 3.0004537105560303,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.0156,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 8.061921119689941,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 2.0607,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 13.409745216369629,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.7125,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 3.7100305557250977,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.7186,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 4.847060680389404,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.6527,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 12.18260383605957,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 1.7499,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 2.2415454387664795,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 1.0221,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 3.2538342475891113,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 1.1897,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 2.36144757270813,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.6208,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 9.6058988571167,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 1.8176,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 5.7604498863220215,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.891,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 4.558753490447998,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.2292,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 3.545152187347412,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 1.2327,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 5.126194000244141,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.7782,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 4.15905237197876,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.4625,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 8.5696382522583,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 2.1859,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 1.0490771532058716,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.6732,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 4.333555698394775,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 1.2707,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 4.150696277618408,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 2.296,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 3.1090495586395264,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.374,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 2.279902935028076,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.7701,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 3.1808507442474365,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 1.5774,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 2.980347156524658,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 1.3503,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 1.9250891208648682,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.0087,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.43418270349502563,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.5027,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 6.883395671844482,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 2.4104,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 1.5485419034957886,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.6374,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 1.1076228618621826,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.1517,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 1.7041553258895874,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 2.0387,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.2205132246017456,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 1.1971,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 1.1478123664855957,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.715,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 2.3879141807556152,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 1.2586,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 1.8638067245483398,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.4435,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 2.9053351879119873,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.8824,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 4.362792491912842,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 1.4925,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 3.5274477005004883,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 1.3381,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 1.9348663091659546,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.3041,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 3.1137640476226807,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.1207,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 1.3478007316589355,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.8649,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 3.8190839290618896,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.6968,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 3.223320722579956,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 1.0917,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2239167178211328.0,
|
365 |
+
"train_loss": 1.1960790348052979,
|
366 |
+
"train_runtime": 130.6243,
|
367 |
+
"train_samples_per_second": 3.062,
|
368 |
+
"train_steps_per_second": 0.766
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2239167178211328.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:169ebfa5ea45498c858e2141245075d803dff199394089e935fdba4da7a3099a
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c3485ba0093a053529dff2fcd3d4cd079804e71aa2b559ec8f559f9ba71055c
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bd98ef5d90acc7a29e5fef3ce1fc5e7706f0d3239ff49fef6f382c05ba55849
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3854789b581cb0260e2d94c3d708f95a4c379713ad720e099ecb0ad2e28f63c
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:035bf44788d365a4d9eebd5d350a631cde6bbe9c766c0f0feff763b495a47f8f
|
3 |
+
size 791576546
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76d31317f83b371f254d5922e2d81721fff42d341d39690889aaf77053f678a1
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16cabb09de90f1c5e2fde2a8d0aabc8dabcd0bb09b864ca39c1e5ccf9d3be264
|
3 |
+
size 791576546
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b08678e0ccc6b5c35814c8ff802230cd341a7747cf7496a9bb58cdfe57120ac7
|
3 |
+
size 791576546
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 3.574599266052246,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.2507,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 0.12841464579105377,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.7722,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 1.6093394756317139,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.2003,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 1.608458161354065,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.3632,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 1.834401249885559,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.9866,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 2.831615924835205,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.117,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 1.3170740604400635,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.9013,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 2.3447439670562744,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.677,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.6816301345825195,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5952,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 4.032651901245117,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.821,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 2.3172712326049805,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 1.2874,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.2676846981048584,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.6908,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 5.13706111907959,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.0195,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 1.1814277172088623,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3295,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.5663914680480957,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 1.1604,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.956602096557617,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.565,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 3.0503950119018555,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 1.6086,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 1.34660005569458,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5632,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 0.9926305413246155,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2543,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 1.6534584760665894,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.427,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 1.608451008796692,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.7323,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 1.8026962280273438,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.389,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 4.112421035766602,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.8067,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 4.906567096710205,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.1125,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 9.803025245666504,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 1.3187,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 0.7343541383743286,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.4331,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.47376033663749695,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.33,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 1.4215737581253052,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.6756,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 1.5952926874160767,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.861,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 2.4449350833892822,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.0126,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 1.2033276557922363,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.8229,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 1.0484799146652222,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.4583,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 0.7214386463165283,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.1516,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 1.754631519317627,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.8129,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 1.1184951066970825,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.1875,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.5391192436218262,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.1746,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 1.6062778234481812,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.2598,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.573376476764679,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.4363,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 0.27290117740631104,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.8375,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 1.3175345659255981,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.5903,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 1.023700475692749,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.533,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 0.8385341763496399,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.666,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 1.081638216972351,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.4824,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 1.2847450971603394,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 1.0647,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.40036866068840027,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.1547,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 0.7571963667869568,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.5782,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 1.6084556579589844,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.0859,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 1.3350269794464111,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.8913,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 1.8509856462478638,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.4388,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 1.0608395338058472,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 1.0815,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 6030679393435648.0,
|
365 |
+
"train_loss": 0.7993921279907227,
|
366 |
+
"train_runtime": 219.9706,
|
367 |
+
"train_samples_per_second": 1.818,
|
368 |
+
"train_steps_per_second": 0.455
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 6030679393435648.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16c57250fcfe34d344a24dfe1172b2a64bc17bada58a2c6b17723f541b5fa08f
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af227748b467356df6cae04dd2a32abe3d124b38f08bc8390687d2806915ad2f
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f651d0bef6787aa7e3a030795fa8d154d1532d7d4689512ee09bc04a67afcb4d
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a1490aa7c713d9f5b9bdd625c4b8d7d75f50d85fd15d02563a8c6dbaa5292ee
|
3 |
+
size 791578182
|
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e88e8e3c32c5cb38055ff420fc67ed684dd8deb4a18f122f1675fe70a499944f
|
3 |
+
size 791576546
|