thkim0305 commited on
Commit
70ebc22
·
verified ·
1 Parent(s): 4849338

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
  10. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
  19. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
  28. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
  37. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
  46. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e97a178ad07f93c2416428537e0ebb17e76938b3af5b9317eb49a1799f96d462
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb442cab84c4a4ce96f7b23006e86db8e87acdf17f5ac629a5fe2bb1fc72283
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ccd1e02f6c44c372315ab293a7ec71916771b5681ec6280a7a70db7e600c79f
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62683ff26556ce21097ddea2f3c9c2f9d573e198ff9949a797a908d40f5250c7
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a7f676c7427580136ab410df1abb887040e5489b7d7b7f65b5641a99a5e95c3
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faefd5c33f7e65f88683a6938382403ede739ab52ee9960f777027320860ec1c
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5096192d60955cc6239ad742abdb0ba21b817f3291a2089cb8b281800ec669b6
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d7e16824a5fd6f09c42254ff21a13f01110dc6a2ab0d17eda950f6badb6d79c
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.3082879781723022,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.382,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 7.398350715637207,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.5622,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 5.8386311531066895,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.5934,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 8.876031875610352,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.4576,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.8822630047798157,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.533,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.8977208733558655,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.1235,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 4.8954572677612305,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6145,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.364208698272705,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.2037,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 6.215276718139648,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4045,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 1.6845049858093262,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3949,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.7680008411407471,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0562,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 8.639182090759277,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5587,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 5.095108509063721,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.2591,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 4.774632930755615,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3083,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 13.420112609863281,
112
+ "learning_rate": 2e-05,
113
+ "loss": 2.695,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 7.402116298675537,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8668,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 6.176011085510254,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.9256,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 12.859660148620605,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.2148,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 6.652098178863525,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7266,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 4.232424736022949,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3186,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 5.211861610412598,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.664,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.3429152965545654,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1264,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 7.41301155090332,
168
+ "learning_rate": 2e-05,
169
+ "loss": 2.2214,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 6.739213943481445,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.5323,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.16466465592384338,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2526,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.8708200454711914,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.587,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 2.8934383392333984,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.4451,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.6178646683692932,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.6075,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.2261078357696533,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.034,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.3027094900608063,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.474,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 3.4519269466400146,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.9553,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.5445189476013184,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.5833,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 1.0719250440597534,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.2296,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.4732906818389893,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.2008,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 1.3075364828109741,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7658,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 6.077316761016846,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.7325,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.7028217315673828,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.6084,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.03688935935497284,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0744,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.558180570602417,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.2631,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.651378870010376,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.331,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 3.096226692199707,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.2716,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.9856376647949219,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.2417,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.3018156588077545,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.1055,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.15684664249420166,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.3354,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.3770679831504822,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0729,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 2.0283687114715576,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.0331,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 3.1098225116729736,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.1469,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 4.576979160308838,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.4126,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.10817147046327591,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.3318,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.2188994139432907,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.0426,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2190942744346624.0,
365
+ "train_loss": 0.6376361560821533,
366
+ "train_runtime": 138.1433,
367
+ "train_samples_per_second": 2.896,
368
+ "train_steps_per_second": 0.724
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2190942744346624.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df384fb287406d01c6ac47203f430568f40f9606f69f8f125b635c282d7d092
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b551ea01e38e2a2cb95facdbfff51f74e900b1b79b0181295e7e38e2646d99db
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d542560fc66bc7b621148ba6fa51735d64ad57f335ce21cb3839515e1e67582
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82f77e71308f2b7aa18b9d6fa8133a3957aa38f39306afb84ca1bead67ca5d10
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c13f89aa25e72c44da6f78833eaffbdbc09fe7723cac5955204fce2ba777168e
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9629cc43a9b2467735fe71d84d13bf5ee18830013aa314403e366294718d505f
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b911b77a2c95d38aaecacc3ba610c224325e7cea721fa2f885465d97f0d5c61a
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ae0a6acd5268884668a889560be909dd798ecccb5f0cdb5b4e28d0c046946d
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 5.438493728637695,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.6253,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.06039601191878319,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0071,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.3221728801727295,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.1724,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 1.3266348838806152,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0556,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 10.860568046569824,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.3214,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.26336294412612915,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0231,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.3594837188720703,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2503,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 4.755346775054932,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5165,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.13056811690330505,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.1325,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.19030329585075378,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0153,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 2.790400743484497,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.2883,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 0.9987291097640991,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0559,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.3109381198883057,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.2969,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.24974896013736725,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0719,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.02681863121688366,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3216,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.1715784072875977,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2291,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 0.15720851719379425,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0086,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.07507246732711792,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0222,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 3.0812454223632812,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1993,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.22983674705028534,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0121,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.11351170390844345,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0088,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 0.014975732192397118,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0141,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.04912685975432396,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0099,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.09525927156209946,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0043,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.053252220153808594,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0056,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 5.65633487701416,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.1687,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.7466307282447815,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0578,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.19205646216869354,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.0363,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.4861888885498047,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1323,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.2961069643497467,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.0288,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.012061057612299919,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0054,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.010159369558095932,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0029,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.262207955121994,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0613,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 1.2027740478515625,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.2165,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.10255525261163712,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.0084,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.1920798122882843,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0228,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.8249365091323853,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0964,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.08167055249214172,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0199,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.09379951655864716,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.0429,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.0005258667515590787,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.3772,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.055758312344551086,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0245,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.6854066848754883,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.2178,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.005169401410967112,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0064,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.4939119219779968,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0627,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.015726575627923012,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0019,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.03543732315301895,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.0042,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.027740249410271645,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0398,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.012435679323971272,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.003,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.03141430765390396,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.0037,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 3.874453067779541,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.2269,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2208152577638400.0,
365
+ "train_loss": 0.1507337412238121,
366
+ "train_runtime": 136.9726,
367
+ "train_samples_per_second": 2.92,
368
+ "train_steps_per_second": 0.73
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2208152577638400.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5155c6d3ccd59d8a88c5fadb4d55b7c55e757d72fed6700339f2a55ba9d206b
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f62c93b1b07445065f1c327317188271a9b39f4a40b53f3acbcef69cae588b0
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9b358fff8705a58269da9150542305dbf26ce68799e95cecc3d85c96f70b1d
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39cd8335243dbc72570d4318fe64af4a1d8e00cf2a7eb67fad0c2eca24e61cfc
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1cf1a3c9f0043493c32c78abd02daddae42e5bd2823cc8b66edc676d0791dd0
3
+ size 791576546
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9953dd762080b274427366bfd5e028435e1fd61f4d7712e2d3c7a7bf0c66713
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c0b2ac7a9bd6d77493cc2bd7e107f7e04847c31ad38dce7ee89a1f3260cb76
3
+ size 791576546
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b97aff48462a93e7bc1cf42a7d28365d5c276dfe0e5eb85d1b94b214df2a1b30
3
+ size 791576546
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 2.7511374950408936,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.2946,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.288777232170105,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5413,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 2.8379461765289307,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.9373,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 2.078542947769165,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.8663,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.4642187356948853,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.7827,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.9326199293136597,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.0563,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 4.39100456237793,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.922,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 1.1711935997009277,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.8917,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.9163213968276978,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4478,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 4.13539457321167,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.1375,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.2938967943191528,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4566,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.292201280593872,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9561,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.9226994514465332,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8209,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.53096079826355,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6961,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.718522787094116,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.776,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.3070623874664307,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3574,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 3.3535683155059814,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6592,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.309844970703125,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5191,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.560328483581543,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.6774,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 1.079557180404663,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.132,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 6.772784233093262,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7784,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.8025261163711548,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7205,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.360507011413574,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.8593,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.8153305053710938,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.2696,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.7224266529083252,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2182,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 0.5766162276268005,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.0827,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.022444158792495728,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.4786,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 2.547598123550415,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.3983,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.8408164978027344,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.7896,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.3001943826675415,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.063,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.5612508654594421,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.068,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.38100701570510864,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.1456,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 1.6601585149765015,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.0226,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.3090905547142029,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.1554,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.07504302263259888,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.9354,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.43841126561164856,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.3726,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.9700837135314941,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.3882,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.769338607788086,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.3361,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 3.9080419540405273,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.5156,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.41056567430496216,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2299,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.3981069028377533,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.1811,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.7696741819381714,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.2616,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 1.6954944133758545,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.5364,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.0885348692536354,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.2733,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.8655412793159485,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.1542,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.22551484405994415,
329
+ "learning_rate": 2e-05,
330
+ "loss": 2.1966,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.18391752243041992,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.047,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.981302797794342,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.1401,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.6460645198822021,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.395,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.7264297008514404,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.6037,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5207450529562624.0,
365
+ "train_loss": 0.5909026241302491,
366
+ "train_runtime": 217.8458,
367
+ "train_samples_per_second": 1.836,
368
+ "train_steps_per_second": 0.459
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5207450529562624.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4ec54f2264de9d5066627dd2579b6f2687909f1bd195443308183b9867ed4e
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bff21d08e8d34a7152bd63b77effe319611f317b2e0f305ff6b9f6a4e75aa861
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3a6821e6085f2bd28d30ca6eb0d458ea5c5d3e23b3177c2345359d4a31ab85
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79646f529a49a14c6cf888826de5ccfb5275f73da3d9785eedafd1aca441bcbd
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:780361ca8e347e1b3bd2219d28d65e0826ca409e3a7e0fba6af746dbbc5616ff
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfe770a5196824f0b6a2220e19158a1b69f0411506a84e9133389f9a4c4900d
3
+ size 368443438
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92cf3340f68925f39d845baeb5dae745fce9ed15d7624d98f382fdef8f435da
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb745e55cb09490ca340fa48f6f6a68b2b4f7940ad5c559cd1aafd94e5be0bed
3
+ size 368442474
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 3.859758138656616,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0478,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 3.1250510215759277,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0184,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 4.169061660766602,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.2416,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 4.2583136558532715,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6852,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 5.320484161376953,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.26,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 3.881507635116577,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.9806,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 2.992048978805542,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.8097,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 10.06558895111084,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.1399,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 5.604303359985352,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.8142,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 3.0004537105560303,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.0156,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 8.061921119689941,
84
+ "learning_rate": 2e-05,
85
+ "loss": 2.0607,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 13.409745216369629,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.7125,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.7100305557250977,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.7186,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 4.847060680389404,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6527,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 12.18260383605957,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.7499,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.2415454387664795,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.0221,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 3.2538342475891113,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.1897,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.36144757270813,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6208,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 9.6058988571167,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.8176,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 5.7604498863220215,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.891,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 4.558753490447998,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.2292,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 3.545152187347412,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.2327,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 5.126194000244141,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.7782,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 4.15905237197876,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.4625,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 8.5696382522583,
182
+ "learning_rate": 2e-05,
183
+ "loss": 2.1859,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.0490771532058716,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.6732,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 4.333555698394775,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.2707,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 4.150696277618408,
203
+ "learning_rate": 2e-05,
204
+ "loss": 2.296,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 3.1090495586395264,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.374,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.279902935028076,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.7701,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 3.1808507442474365,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.5774,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.980347156524658,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.3503,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 1.9250891208648682,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.0087,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.43418270349502563,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.5027,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 6.883395671844482,
252
+ "learning_rate": 2e-05,
253
+ "loss": 2.4104,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.5485419034957886,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.6374,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.1076228618621826,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.1517,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.7041553258895874,
273
+ "learning_rate": 2e-05,
274
+ "loss": 2.0387,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.2205132246017456,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.1971,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.1478123664855957,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.715,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 2.3879141807556152,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.2586,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.8638067245483398,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.4435,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.9053351879119873,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.8824,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 4.362792491912842,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.4925,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 3.5274477005004883,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.3381,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.9348663091659546,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.3041,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 3.1137640476226807,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.1207,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.3478007316589355,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8649,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 3.8190839290618896,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.6968,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 3.223320722579956,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.0917,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2239167178211328.0,
365
+ "train_loss": 1.1960790348052979,
366
+ "train_runtime": 130.6243,
367
+ "train_samples_per_second": 3.062,
368
+ "train_steps_per_second": 0.766
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2239167178211328.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169ebfa5ea45498c858e2141245075d803dff199394089e935fdba4da7a3099a
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3485ba0093a053529dff2fcd3d4cd079804e71aa2b559ec8f559f9ba71055c
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd98ef5d90acc7a29e5fef3ce1fc5e7706f0d3239ff49fef6f382c05ba55849
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3854789b581cb0260e2d94c3d708f95a4c379713ad720e099ecb0ad2e28f63c
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:035bf44788d365a4d9eebd5d350a631cde6bbe9c766c0f0feff763b495a47f8f
3
+ size 791576546
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d31317f83b371f254d5922e2d81721fff42d341d39690889aaf77053f678a1
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16cabb09de90f1c5e2fde2a8d0aabc8dabcd0bb09b864ca39c1e5ccf9d3be264
3
+ size 791576546
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b08678e0ccc6b5c35814c8ff802230cd341a7747cf7496a9bb58cdfe57120ac7
3
+ size 791576546
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 3.574599266052246,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.2507,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.12841464579105377,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.7722,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.6093394756317139,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.2003,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 1.608458161354065,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3632,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.834401249885559,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.9866,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 2.831615924835205,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.117,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.3170740604400635,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9013,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.3447439670562744,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.677,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.6816301345825195,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5952,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 4.032651901245117,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.821,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 2.3172712326049805,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.2874,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.2676846981048584,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.6908,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 5.13706111907959,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.0195,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 1.1814277172088623,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3295,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.5663914680480957,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.1604,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 3.956602096557617,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.565,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 3.0503950119018555,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.6086,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 1.34660005569458,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5632,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 0.9926305413246155,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2543,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 1.6534584760665894,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.427,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 1.608451008796692,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7323,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.8026962280273438,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.389,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 4.112421035766602,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.8067,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 4.906567096710205,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.1125,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 9.803025245666504,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.3187,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 0.7343541383743286,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.4331,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.47376033663749695,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.33,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.4215737581253052,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.6756,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.5952926874160767,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.861,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.4449350833892822,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0126,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.2033276557922363,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.8229,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.0484799146652222,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.4583,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.7214386463165283,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.1516,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 1.754631519317627,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8129,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 1.1184951066970825,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1875,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.5391192436218262,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.1746,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.6062778234481812,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.2598,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.573376476764679,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.4363,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.27290117740631104,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.8375,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.3175345659255981,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.5903,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.023700475692749,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.533,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.8385341763496399,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.666,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 1.081638216972351,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.4824,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.2847450971603394,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.0647,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.40036866068840027,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.1547,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.7571963667869568,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5782,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.6084556579589844,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.0859,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.3350269794464111,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8913,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.8509856462478638,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.4388,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 1.0608395338058472,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.0815,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 6030679393435648.0,
365
+ "train_loss": 0.7993921279907227,
366
+ "train_runtime": 219.9706,
367
+ "train_samples_per_second": 1.818,
368
+ "train_steps_per_second": 0.455
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 6030679393435648.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16c57250fcfe34d344a24dfe1172b2a64bc17bada58a2c6b17723f541b5fa08f
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af227748b467356df6cae04dd2a32abe3d124b38f08bc8390687d2806915ad2f
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f651d0bef6787aa7e3a030795fa8d154d1532d7d4689512ee09bc04a67afcb4d
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1490aa7c713d9f5b9bdd625c4b8d7d75f50d85fd15d02563a8c6dbaa5292ee
3
+ size 791578182
client_states_feddpa_feddualMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88e8e3c32c5cb38055ff420fc67ed684dd8deb4a18f122f1675fe70a499944f
3
+ size 791576546